doko 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/VERSION +1 -1
  2. data/doko.gemspec +1 -1
  3. data/lib/doko.rb +20 -16
  4. data/spec/doko_spec.rb +5 -1
  5. metadata +2 -2
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.6
1
+ 0.1.7
data/doko.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "doko"
8
- s.version = "0.1.6"
8
+ s.version = "0.1.7"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Masaki Sawamura"]
data/lib/doko.rb CHANGED
@@ -14,7 +14,7 @@ class Doko
14
14
  if str.match( /^#{URI.regexp}$/ )
15
15
  str = open(str).read
16
16
  end
17
- if str.match(/<html/)
17
+ if str.match(/<html/i)
18
18
  @text = (Nokogiri::HTML(str)/"body").text
19
19
  else
20
20
  @text = str
@@ -24,36 +24,40 @@ class Doko
24
24
  def parse
25
25
  body = @text
26
26
  body.tr!("0-9","0-9")
27
- body.tr!("ー","-")
28
27
  body.tr!("()","()")
29
28
  body.tr!("、",",")
30
29
  body.tr!(" "," ")
31
30
  body.tr!(".",".")
32
-
33
31
  blackchars = ",()\n"
34
-
32
+
35
33
  addrs = body.scan(/\b([^\s,()]{2,3}(都|道|府|県)[^\s,()]{1,8}(市|区|町|村)[^#{blackchars}]+)/).map{ |m|
36
- line = m[0]
37
- line.gsub!(/住所(\s|\n)?/,"")
38
- line.gsub!(/〒\d{3}-\d{4} ?/,"")
39
- line.gsub!(/\s+$/,"")
40
- line.gsub!(/\s?電話:.+$/,"")
41
- line
34
+ clean(m[0])
42
35
  }
43
36
  if addrs.empty?
44
37
  addrs = body.scan(/([^\s]{1,6}(市|区).{2,8}(区|町|村)[^\s,()]{2,10}\d)/).map{ |m|
45
- line = m[0]
46
- line.gsub!(/住所(\s|\n)?/,"")
47
- line.gsub!(/〒\d{3}-\d{4} ?/,"")
48
- line.gsub!("[MAP]","")
49
- line.gsub!(/(TEL|FAX):\d{2,4}-\d{2,4}-\d{2,4}/,"")
50
- line
38
+ clean(m[0])
51
39
  }
52
40
  end
53
41
  addrs.select{ |a|
54
42
  !a.match(/を/)
55
43
  }
56
44
  end
45
+
46
+ private
47
+
48
+
49
+ def clean(line)
50
+ line.gsub!(/住所(\s|\n)?/,"")
51
+ line.gsub!(/〒\d{3}-\d{4} ?/,"")
52
+ line.gsub!(/\s+$/,"")
53
+ line.gsub!(/\s?電話:.+$/,"")
54
+ line.gsub!("[MAP]","")
55
+ line.gsub!(/(TEL|FAX):\d{2,4}-\d{2,4}-\d{2,4}/,"")
56
+ line.gsub!(/(\dー)*\d/) do |t|
57
+ t.tr("ー","-")
58
+ end
59
+ line
60
+ end
57
61
  end
58
62
 
59
63
 
data/spec/doko_spec.rb CHANGED
@@ -4,7 +4,6 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
4
4
  require 'open-uri'
5
5
 
6
6
  describe "Doko" do
7
-
8
7
  it "should retrieve address from string" do
9
8
  Doko.parse("ここは\n東京都港区芝浦3-4-1\nです").first.should == "東京都港区芝浦3-4-1"
10
9
  end
@@ -50,4 +49,9 @@ describe "Doko" do
50
49
  it do
51
50
  Doko.parse("http://atnd.org/events/28384").first.should == "東京都千代田区神田駿河台2-3 DH2001Bldg."
52
51
  end
52
+
53
+ it do
54
+ page = open("http://www.nttr.co.jp/corporate_profile/index.htm").read.encode("utf-8")
55
+ Doko.parse(page).first.should == "東京都港区芝浦3-4-1 グランパークタワー"
56
+ end
53
57
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doko
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -146,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
146
146
  version: '0'
147
147
  segments:
148
148
  - 0
149
- hash: 3862698864711078623
149
+ hash: 4235072000421726462
150
150
  required_rubygems_version: !ruby/object:Gem::Requirement
151
151
  none: false
152
152
  requirements: