doko 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/doko.gemspec +1 -1
- data/lib/doko.rb +25 -1
- data/sample/Gemfile.lock +7 -3
- data/sample/app.rb +1 -1
- data/spec/doko_spec.rb +58 -50
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.9
|
data/doko.gemspec
CHANGED
data/lib/doko.rb
CHANGED
@@ -10,8 +10,32 @@ class Doko
|
|
10
10
|
self.new(str).parse
|
11
11
|
end
|
12
12
|
|
13
|
+
def self.deep(url)
|
14
|
+
addrs = parse(url)
|
15
|
+
if addrs.empty?
|
16
|
+
addrs = links(url).map{ |u|
|
17
|
+
parse(u)
|
18
|
+
}.flatten
|
19
|
+
end
|
20
|
+
addrs
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.links(url)
|
24
|
+
uri = URI.parse(url)
|
25
|
+
out = []
|
26
|
+
doc = Nokogiri::HTML(open(url).read)
|
27
|
+
doc.search("a").each do |a|
|
28
|
+
if a[:href].match(/access/) && !a[:href].match(/http/)
|
29
|
+
out << uri + a[:href]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
out.uniq
|
33
|
+
end
|
34
|
+
|
13
35
|
def initialize(str)
|
14
|
-
if str.
|
36
|
+
if str.kind_of? URI
|
37
|
+
str = open(str.to_s).read
|
38
|
+
elsif str.match( /^#{URI.regexp}$/ )
|
15
39
|
str = open(str).read
|
16
40
|
end
|
17
41
|
if str.match(/<html/i)
|
data/sample/Gemfile.lock
CHANGED
@@ -1,8 +1,12 @@
|
|
1
|
+
PATH
|
2
|
+
remote: ..
|
3
|
+
specs:
|
4
|
+
doko (0.1.8)
|
5
|
+
nokogiri
|
6
|
+
|
1
7
|
GEM
|
2
8
|
remote: http://rubygems.org/
|
3
9
|
specs:
|
4
|
-
doko (0.1.7)
|
5
|
-
nokogiri
|
6
10
|
nokogiri (1.5.3)
|
7
11
|
rack (1.3.6)
|
8
12
|
rack-protection (1.2.0)
|
@@ -17,5 +21,5 @@ PLATFORMS
|
|
17
21
|
ruby
|
18
22
|
|
19
23
|
DEPENDENCIES
|
20
|
-
doko
|
24
|
+
doko!
|
21
25
|
sinatra
|
data/sample/app.rb
CHANGED
data/spec/doko_spec.rb
CHANGED
@@ -5,59 +5,67 @@ require 'open-uri'
|
|
5
5
|
|
6
6
|
describe "Doko" do
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
8
|
+
describe "parse" do
|
9
|
+
|
10
|
+
it "should retrieve address from string" do
|
11
|
+
Doko.parse("ここは\n東京都港区芝浦3-4-1\nです").first.should == "東京都港区芝浦3-4-1"
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should retrieve address from html" do
|
15
|
+
addrs = Doko.parse(open("http://r.tabelog.com/tokyo/A1304/A130401/13130066/").read)
|
16
|
+
addrs.first.should == "東京都新宿区新宿3-38-1 ルミネエスト7F"
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should return addr from tabelog url" do
|
18
20
|
addrs = Doko.parse("http://r.tabelog.com/kanagawa/A1401/A140104/14001924/")
|
19
21
|
addrs.first.should == "神奈川県横浜市中区海岸通1-1"
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should return from 30min" do
|
25
|
+
addrs = Doko.parse("http://30min.jp/place/23481")
|
26
|
+
addrs.first.should == "東京都墨田区業平1-21-4 第2刀川ビル1F"
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should return addr from site 1" do
|
30
|
+
Doko.parse("http://thanikitchen.com/")[0] == "東京都品川区南大井6-11-10"
|
31
|
+
Doko.parse("http://thanikitchen.com/")[1] == "東京都品川区大井7-29-8"
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should return addr in kyoto" do
|
35
|
+
addrs = Doko.parse("http://www.tripadvisor.jp/Hotel_Review-g298564-d2317992-Reviews-Royal_Park_Hotel_The_Kyoto-Kyoto_Kyoto_Prefecture_Kinki.html")
|
36
|
+
addrs.first.should == "京都府京都市中京区三条通河原町東入ル"
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should return addr in kumamoto" do
|
40
|
+
addrs = Doko.parse("http://travel.rakuten.co.jp/HOTEL/68236/68236_std.html")
|
41
|
+
addrs.first.should == "熊本県阿蘇郡南阿蘇村河陽4673-18"
|
42
|
+
end
|
43
|
+
|
44
|
+
it do
|
45
|
+
Doko.parse("http://www.ynu.ac.jp/index.html").first.should == "神奈川県横浜市保土ケ谷区常盤台79-1"
|
46
|
+
end
|
47
|
+
|
48
|
+
it do
|
49
|
+
Doko.parse("http://www.nissan-stadium.jp/").first.should == "横浜市港北区小机町3300"
|
50
|
+
end
|
51
|
+
|
52
|
+
it do
|
53
|
+
Doko.parse("http://atnd.org/events/28384").first.should == "東京都千代田区神田駿河台2-3 DH2001Bldg."
|
54
|
+
end
|
55
|
+
|
56
|
+
it do
|
57
|
+
page = open("http://www.nttr.co.jp/corporate_profile/index.htm").read.encode("utf-8")
|
58
|
+
Doko.parse(page).first.should == "東京都港区芝浦3-4-1 グランパークタワー"
|
59
|
+
end
|
60
|
+
|
61
|
+
it do
|
62
|
+
Doko.parse("http://www.stadium2002.com/stadium/index.php").first.should == "さいたま市緑区中野田500"
|
63
|
+
end
|
48
64
|
end
|
49
65
|
|
50
|
-
|
51
|
-
|
66
|
+
describe "deep" do
|
67
|
+
it do
|
68
|
+
Doko.deep("http://www.risonare-atami.com/").first.should == "静岡県熱海市水口町2-13-1"
|
69
|
+
end
|
52
70
|
end
|
53
|
-
|
54
|
-
it do
|
55
|
-
page = open("http://www.nttr.co.jp/corporate_profile/index.htm").read.encode("utf-8")
|
56
|
-
Doko.parse(page).first.should == "東京都港区芝浦3-4-1 グランパークタワー"
|
57
|
-
end
|
58
|
-
|
59
|
-
it do
|
60
|
-
Doko.parse("http://www.stadium2002.com/stadium/index.php").first.should == "さいたま市緑区中野田500"
|
61
|
-
end
|
62
|
-
|
63
71
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: doko
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -146,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
146
146
|
version: '0'
|
147
147
|
segments:
|
148
148
|
- 0
|
149
|
-
hash:
|
149
|
+
hash: 2942783918463996752
|
150
150
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
151
|
none: false
|
152
152
|
requirements:
|