doko 0.1.8 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/doko.gemspec +1 -1
- data/lib/doko.rb +25 -1
- data/sample/Gemfile.lock +7 -3
- data/sample/app.rb +1 -1
- data/spec/doko_spec.rb +58 -50
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.9
|
data/doko.gemspec
CHANGED
data/lib/doko.rb
CHANGED
@@ -10,8 +10,32 @@ class Doko
|
|
10
10
|
self.new(str).parse
|
11
11
|
end
|
12
12
|
|
13
|
+
def self.deep(url)
|
14
|
+
addrs = parse(url)
|
15
|
+
if addrs.empty?
|
16
|
+
addrs = links(url).map{ |u|
|
17
|
+
parse(u)
|
18
|
+
}.flatten
|
19
|
+
end
|
20
|
+
addrs
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.links(url)
|
24
|
+
uri = URI.parse(url)
|
25
|
+
out = []
|
26
|
+
doc = Nokogiri::HTML(open(url).read)
|
27
|
+
doc.search("a").each do |a|
|
28
|
+
if a[:href].match(/access/) && !a[:href].match(/http/)
|
29
|
+
out << uri + a[:href]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
out.uniq
|
33
|
+
end
|
34
|
+
|
13
35
|
def initialize(str)
|
14
|
-
if str.
|
36
|
+
if str.kind_of? URI
|
37
|
+
str = open(str.to_s).read
|
38
|
+
elsif str.match( /^#{URI.regexp}$/ )
|
15
39
|
str = open(str).read
|
16
40
|
end
|
17
41
|
if str.match(/<html/i)
|
data/sample/Gemfile.lock
CHANGED
@@ -1,8 +1,12 @@
|
|
1
|
+
PATH
|
2
|
+
remote: ..
|
3
|
+
specs:
|
4
|
+
doko (0.1.8)
|
5
|
+
nokogiri
|
6
|
+
|
1
7
|
GEM
|
2
8
|
remote: http://rubygems.org/
|
3
9
|
specs:
|
4
|
-
doko (0.1.7)
|
5
|
-
nokogiri
|
6
10
|
nokogiri (1.5.3)
|
7
11
|
rack (1.3.6)
|
8
12
|
rack-protection (1.2.0)
|
@@ -17,5 +21,5 @@ PLATFORMS
|
|
17
21
|
ruby
|
18
22
|
|
19
23
|
DEPENDENCIES
|
20
|
-
doko
|
24
|
+
doko!
|
21
25
|
sinatra
|
data/sample/app.rb
CHANGED
data/spec/doko_spec.rb
CHANGED
@@ -5,59 +5,67 @@ require 'open-uri'
|
|
5
5
|
|
6
6
|
describe "Doko" do
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
8
|
+
describe "parse" do
|
9
|
+
|
10
|
+
it "should retrieve address from string" do
|
11
|
+
Doko.parse("ここは\n東京都港区芝浦3-4-1\nです").first.should == "東京都港区芝浦3-4-1"
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should retrieve address from html" do
|
15
|
+
addrs = Doko.parse(open("http://r.tabelog.com/tokyo/A1304/A130401/13130066/").read)
|
16
|
+
addrs.first.should == "東京都新宿区新宿3-38-1 ルミネエスト7F"
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should return addr from tabelog url" do
|
18
20
|
addrs = Doko.parse("http://r.tabelog.com/kanagawa/A1401/A140104/14001924/")
|
19
21
|
addrs.first.should == "神奈川県横浜市中区海岸通1-1"
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should return from 30min" do
|
25
|
+
addrs = Doko.parse("http://30min.jp/place/23481")
|
26
|
+
addrs.first.should == "東京都墨田区業平1-21-4 第2刀川ビル1F"
|
27
|
+
end
|
28
|
+
|
29
|
+
it "should return addr from site 1" do
|
30
|
+
Doko.parse("http://thanikitchen.com/")[0] == "東京都品川区南大井6-11-10"
|
31
|
+
Doko.parse("http://thanikitchen.com/")[1] == "東京都品川区大井7-29-8"
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should return addr in kyoto" do
|
35
|
+
addrs = Doko.parse("http://www.tripadvisor.jp/Hotel_Review-g298564-d2317992-Reviews-Royal_Park_Hotel_The_Kyoto-Kyoto_Kyoto_Prefecture_Kinki.html")
|
36
|
+
addrs.first.should == "京都府京都市中京区三条通河原町東入ル"
|
37
|
+
end
|
38
|
+
|
39
|
+
it "should return addr in kumamoto" do
|
40
|
+
addrs = Doko.parse("http://travel.rakuten.co.jp/HOTEL/68236/68236_std.html")
|
41
|
+
addrs.first.should == "熊本県阿蘇郡南阿蘇村河陽4673-18"
|
42
|
+
end
|
43
|
+
|
44
|
+
it do
|
45
|
+
Doko.parse("http://www.ynu.ac.jp/index.html").first.should == "神奈川県横浜市保土ケ谷区常盤台79-1"
|
46
|
+
end
|
47
|
+
|
48
|
+
it do
|
49
|
+
Doko.parse("http://www.nissan-stadium.jp/").first.should == "横浜市港北区小机町3300"
|
50
|
+
end
|
51
|
+
|
52
|
+
it do
|
53
|
+
Doko.parse("http://atnd.org/events/28384").first.should == "東京都千代田区神田駿河台2-3 DH2001Bldg."
|
54
|
+
end
|
55
|
+
|
56
|
+
it do
|
57
|
+
page = open("http://www.nttr.co.jp/corporate_profile/index.htm").read.encode("utf-8")
|
58
|
+
Doko.parse(page).first.should == "東京都港区芝浦3-4-1 グランパークタワー"
|
59
|
+
end
|
60
|
+
|
61
|
+
it do
|
62
|
+
Doko.parse("http://www.stadium2002.com/stadium/index.php").first.should == "さいたま市緑区中野田500"
|
63
|
+
end
|
48
64
|
end
|
49
65
|
|
50
|
-
|
51
|
-
|
66
|
+
describe "deep" do
|
67
|
+
it do
|
68
|
+
Doko.deep("http://www.risonare-atami.com/").first.should == "静岡県熱海市水口町2-13-1"
|
69
|
+
end
|
52
70
|
end
|
53
|
-
|
54
|
-
it do
|
55
|
-
page = open("http://www.nttr.co.jp/corporate_profile/index.htm").read.encode("utf-8")
|
56
|
-
Doko.parse(page).first.should == "東京都港区芝浦3-4-1 グランパークタワー"
|
57
|
-
end
|
58
|
-
|
59
|
-
it do
|
60
|
-
Doko.parse("http://www.stadium2002.com/stadium/index.php").first.should == "さいたま市緑区中野田500"
|
61
|
-
end
|
62
|
-
|
63
71
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: doko
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -146,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
146
146
|
version: '0'
|
147
147
|
segments:
|
148
148
|
- 0
|
149
|
-
hash:
|
149
|
+
hash: 2942783918463996752
|
150
150
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
151
|
none: false
|
152
152
|
requirements:
|