doko 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/doko.gemspec +1 -1
- data/lib/doko.rb +20 -16
- data/spec/doko_spec.rb +5 -1
- metadata +2 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.7
|
data/doko.gemspec
CHANGED
data/lib/doko.rb
CHANGED
@@ -14,7 +14,7 @@ class Doko
|
|
14
14
|
if str.match( /^#{URI.regexp}$/ )
|
15
15
|
str = open(str).read
|
16
16
|
end
|
17
|
-
if str.match(/<html/)
|
17
|
+
if str.match(/<html/i)
|
18
18
|
@text = (Nokogiri::HTML(str)/"body").text
|
19
19
|
else
|
20
20
|
@text = str
|
@@ -24,36 +24,40 @@ class Doko
|
|
24
24
|
def parse
|
25
25
|
body = @text
|
26
26
|
body.tr!("0-9","0-9")
|
27
|
-
body.tr!("ー","-")
|
28
27
|
body.tr!("()","()")
|
29
28
|
body.tr!("、",",")
|
30
29
|
body.tr!(" "," ")
|
31
30
|
body.tr!(".",".")
|
32
|
-
|
33
31
|
blackchars = ",()\n"
|
34
|
-
|
32
|
+
|
35
33
|
addrs = body.scan(/\b([^\s,()]{2,3}(都|道|府|県)[^\s,()]{1,8}(市|区|町|村)[^#{blackchars}]+)/).map{ |m|
|
36
|
-
|
37
|
-
line.gsub!(/住所(\s|\n)?/,"")
|
38
|
-
line.gsub!(/〒\d{3}-\d{4} ?/,"")
|
39
|
-
line.gsub!(/\s+$/,"")
|
40
|
-
line.gsub!(/\s?電話:.+$/,"")
|
41
|
-
line
|
34
|
+
clean(m[0])
|
42
35
|
}
|
43
36
|
if addrs.empty?
|
44
37
|
addrs = body.scan(/([^\s]{1,6}(市|区).{2,8}(区|町|村)[^\s,()]{2,10}\d)/).map{ |m|
|
45
|
-
|
46
|
-
line.gsub!(/住所(\s|\n)?/,"")
|
47
|
-
line.gsub!(/〒\d{3}-\d{4} ?/,"")
|
48
|
-
line.gsub!("[MAP]","")
|
49
|
-
line.gsub!(/(TEL|FAX):\d{2,4}-\d{2,4}-\d{2,4}/,"")
|
50
|
-
line
|
38
|
+
clean(m[0])
|
51
39
|
}
|
52
40
|
end
|
53
41
|
addrs.select{ |a|
|
54
42
|
!a.match(/を/)
|
55
43
|
}
|
56
44
|
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
|
49
|
+
def clean(line)
|
50
|
+
line.gsub!(/住所(\s|\n)?/,"")
|
51
|
+
line.gsub!(/〒\d{3}-\d{4} ?/,"")
|
52
|
+
line.gsub!(/\s+$/,"")
|
53
|
+
line.gsub!(/\s?電話:.+$/,"")
|
54
|
+
line.gsub!("[MAP]","")
|
55
|
+
line.gsub!(/(TEL|FAX):\d{2,4}-\d{2,4}-\d{2,4}/,"")
|
56
|
+
line.gsub!(/(\dー)*\d/) do |t|
|
57
|
+
t.tr("ー","-")
|
58
|
+
end
|
59
|
+
line
|
60
|
+
end
|
57
61
|
end
|
58
62
|
|
59
63
|
|
data/spec/doko_spec.rb
CHANGED
@@ -4,7 +4,6 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
4
4
|
require 'open-uri'
|
5
5
|
|
6
6
|
describe "Doko" do
|
7
|
-
|
8
7
|
it "should retrieve address from string" do
|
9
8
|
Doko.parse("ここは\n東京都港区芝浦3-4-1\nです").first.should == "東京都港区芝浦3-4-1"
|
10
9
|
end
|
@@ -50,4 +49,9 @@ describe "Doko" do
|
|
50
49
|
it do
|
51
50
|
Doko.parse("http://atnd.org/events/28384").first.should == "東京都千代田区神田駿河台2-3 DH2001Bldg."
|
52
51
|
end
|
52
|
+
|
53
|
+
it do
|
54
|
+
page = open("http://www.nttr.co.jp/corporate_profile/index.htm").read.encode("utf-8")
|
55
|
+
Doko.parse(page).first.should == "東京都港区芝浦3-4-1 グランパークタワー"
|
56
|
+
end
|
53
57
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: doko
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -146,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
146
146
|
version: '0'
|
147
147
|
segments:
|
148
148
|
- 0
|
149
|
-
hash:
|
149
|
+
hash: 4235072000421726462
|
150
150
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
151
|
none: false
|
152
152
|
requirements:
|