doko 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -12,8 +12,13 @@ p addrs #=> ["東京都港区芝浦3-41"]
12
12
  # from url
13
13
  addrs = Doko.parse("http://r.tabelog.com/tokyo/A....")
14
14
  p addrs #=> ["神奈川県横浜市中区.."]
15
+
16
+ # from a web site
17
+ addrs = Doko.deep("http://foo-bar.com/")
18
+ p addrs #=> ["東京都.."] # this is from foo-bar.com/access/index.htm ,for example
15
19
  ```
16
20
 
21
+
17
22
  "doko?" means "where?" in japanese.
18
23
 
19
24
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.0
1
+ 0.2.1
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "doko"
8
- s.version = "0.2.0"
8
+ s.version = "0.2.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Masaki Sawamura"]
12
- s.date = "2012-06-11"
12
+ s.date = "2012-07-05"
13
13
  s.description = "retrieve japanese address line from a web page or a string"
14
14
  s.email = "masaki.sw@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -9,20 +9,25 @@ class Doko
9
9
  self.new(str).parse
10
10
  end
11
11
 
12
- def self.deep(url)
13
- addrs = parse(url)
12
+ def self.deep(str,base_uri=nil)
13
+ addrs = parse(str)
14
14
  if addrs.empty?
15
- addrs = links(url).map{ |u|
15
+ addrs = links(str,base_uri).map{ |u|
16
16
  parse(u)
17
17
  }.flatten
18
18
  end
19
19
  addrs
20
20
  end
21
21
 
22
- def self.links(url)
23
- uri = URI.parse(url)
22
+ def self.links(str,base_uri=nil)
24
23
  out = []
25
- doc = Nokogiri::HTML(open(url).read)
24
+ if str.match( /^#{URI.regexp}$/ )
25
+ uri = URI.parse(str)
26
+ doc = Nokogiri::HTML(open(uri).read)
27
+ elsif str.kind_of? String
28
+ uri = URI.parse(base_uri)
29
+ doc = Nokogiri::HTML(str)
30
+ end
26
31
  doc.search("a").each do |a|
27
32
  if a[:href] && a[:href].match(/access/) && !a[:href].match(/http/)
28
33
  out << uri + a[:href]
@@ -81,6 +86,7 @@ class Doko
81
86
  t.tr("ー","-")
82
87
  end
83
88
  line.sub!(/\s$/,"")
89
+ line.gsub!(/\s{3,}+.+$/,"")
84
90
  line
85
91
  end
86
92
  end
@@ -1,7 +1,7 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- doko (0.1.9)
4
+ doko (0.2.0)
5
5
  nokogiri
6
6
  nokogiri (1.5.3)
7
7
  rack (1.3.6)
@@ -50,6 +50,7 @@
50
50
  </p>
51
51
  <% end %>
52
52
 
53
+ <!-- new version -->
53
54
  </body>
54
55
  </html>
55
56
 
@@ -6,6 +6,7 @@ require 'open-uri'
6
6
  describe "Doko" do
7
7
 
8
8
  describe "parse" do
9
+
9
10
  it "should retrieve address from string" do
10
11
  Doko.parse("ここは\n東京都港区芝浦3-4-1\nです").first.should == "東京都港区芝浦3-4-1"
11
12
  end
@@ -71,6 +72,12 @@ describe "Doko" do
71
72
  Doko.deep("http://www.risonare-atami.com/").first.should == "静岡県熱海市水口町2-13-1"
72
73
  end
73
74
 
75
+ it "can pase from page html" do
76
+ url = "http://www.risonare-atami.com/"
77
+ page = open(url).read
78
+ Doko.deep(page,url).first.should == "静岡県熱海市水口町2-13-1"
79
+ end
80
+
74
81
  it do
75
82
  Doko.deep("http://www.yokohama-akarenga.jp/index.html").should be_empty
76
83
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doko
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-11 00:00:00.000000000 Z
12
+ date: 2012-07-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -146,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
146
146
  version: '0'
147
147
  segments:
148
148
  - 0
149
- hash: -4468521619932611074
149
+ hash: 3043947709731025530
150
150
  required_rubygems_version: !ruby/object:Gem::Requirement
151
151
  none: false
152
152
  requirements: