doko 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -12,8 +12,13 @@ p addrs #=> ["東京都港区芝浦3-41"]
12
12
  # from url
13
13
  addrs = Doko.parse("http://r.tabelog.com/tokyo/A....")
14
14
  p addrs #=> ["神奈川県横浜市中区.."]
15
+
16
+ # from a web site
17
+ addrs = Doko.deep("http://foo-bar.com/")
18
+ p addrs #=> ["東京都.."] # this is from foo-bar.com/access/index.htm ,for example
15
19
  ```
16
20
 
21
+
17
22
  "doko?" means "where?" in japanese.
18
23
 
19
24
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.0
1
+ 0.2.1
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "doko"
8
- s.version = "0.2.0"
8
+ s.version = "0.2.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Masaki Sawamura"]
12
- s.date = "2012-06-11"
12
+ s.date = "2012-07-05"
13
13
  s.description = "retrieve japanese address line from a web page or a string"
14
14
  s.email = "masaki.sw@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -9,20 +9,25 @@ class Doko
9
9
  self.new(str).parse
10
10
  end
11
11
 
12
- def self.deep(url)
13
- addrs = parse(url)
12
+ def self.deep(str,base_uri=nil)
13
+ addrs = parse(str)
14
14
  if addrs.empty?
15
- addrs = links(url).map{ |u|
15
+ addrs = links(str,base_uri).map{ |u|
16
16
  parse(u)
17
17
  }.flatten
18
18
  end
19
19
  addrs
20
20
  end
21
21
 
22
- def self.links(url)
23
- uri = URI.parse(url)
22
+ def self.links(str,base_uri=nil)
24
23
  out = []
25
- doc = Nokogiri::HTML(open(url).read)
24
+ if str.match( /^#{URI.regexp}$/ )
25
+ uri = URI.parse(str)
26
+ doc = Nokogiri::HTML(open(uri).read)
27
+ elsif str.kind_of? String
28
+ uri = URI.parse(base_uri)
29
+ doc = Nokogiri::HTML(str)
30
+ end
26
31
  doc.search("a").each do |a|
27
32
  if a[:href] && a[:href].match(/access/) && !a[:href].match(/http/)
28
33
  out << uri + a[:href]
@@ -81,6 +86,7 @@ class Doko
81
86
  t.tr("ー","-")
82
87
  end
83
88
  line.sub!(/\s$/,"")
89
+ line.gsub!(/\s{3,}+.+$/,"")
84
90
  line
85
91
  end
86
92
  end
@@ -1,7 +1,7 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- doko (0.1.9)
4
+ doko (0.2.0)
5
5
  nokogiri
6
6
  nokogiri (1.5.3)
7
7
  rack (1.3.6)
@@ -50,6 +50,7 @@
50
50
  </p>
51
51
  <% end %>
52
52
 
53
+ <!-- new version -->
53
54
  </body>
54
55
  </html>
55
56
 
@@ -6,6 +6,7 @@ require 'open-uri'
6
6
  describe "Doko" do
7
7
 
8
8
  describe "parse" do
9
+
9
10
  it "should retrieve address from string" do
10
11
  Doko.parse("ここは\n東京都港区芝浦3-4-1\nです").first.should == "東京都港区芝浦3-4-1"
11
12
  end
@@ -71,6 +72,12 @@ describe "Doko" do
71
72
  Doko.deep("http://www.risonare-atami.com/").first.should == "静岡県熱海市水口町2-13-1"
72
73
  end
73
74
 
75
+ it "can pase from page html" do
76
+ url = "http://www.risonare-atami.com/"
77
+ page = open(url).read
78
+ Doko.deep(page,url).first.should == "静岡県熱海市水口町2-13-1"
79
+ end
80
+
74
81
  it do
75
82
  Doko.deep("http://www.yokohama-akarenga.jp/index.html").should be_empty
76
83
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doko
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-11 00:00:00.000000000 Z
12
+ date: 2012-07-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -146,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
146
146
  version: '0'
147
147
  segments:
148
148
  - 0
149
- hash: -4468521619932611074
149
+ hash: 3043947709731025530
150
150
  required_rubygems_version: !ruby/object:Gem::Requirement
151
151
  none: false
152
152
  requirements: