doko 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. data/VERSION +1 -1
  2. data/doko.gemspec +1 -1
  3. data/lib/doko.rb +20 -16
  4. data/spec/doko_spec.rb +5 -1
  5. metadata +2 -2
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.6
1
+ 0.1.7
data/doko.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "doko"
8
- s.version = "0.1.6"
8
+ s.version = "0.1.7"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Masaki Sawamura"]
data/lib/doko.rb CHANGED
@@ -14,7 +14,7 @@ class Doko
14
14
  if str.match( /^#{URI.regexp}$/ )
15
15
  str = open(str).read
16
16
  end
17
- if str.match(/<html/)
17
+ if str.match(/<html/i)
18
18
  @text = (Nokogiri::HTML(str)/"body").text
19
19
  else
20
20
  @text = str
@@ -24,36 +24,40 @@ class Doko
24
24
  def parse
25
25
  body = @text
26
26
  body.tr!("0-9","0-9")
27
- body.tr!("ー","-")
28
27
  body.tr!("()","()")
29
28
  body.tr!("、",",")
30
29
  body.tr!(" "," ")
31
30
  body.tr!(".",".")
32
-
33
31
  blackchars = ",()\n"
34
-
32
+
35
33
  addrs = body.scan(/\b([^\s,()]{2,3}(都|道|府|県)[^\s,()]{1,8}(市|区|町|村)[^#{blackchars}]+)/).map{ |m|
36
- line = m[0]
37
- line.gsub!(/住所(\s|\n)?/,"")
38
- line.gsub!(/〒\d{3}-\d{4} ?/,"")
39
- line.gsub!(/\s+$/,"")
40
- line.gsub!(/\s?電話:.+$/,"")
41
- line
34
+ clean(m[0])
42
35
  }
43
36
  if addrs.empty?
44
37
  addrs = body.scan(/([^\s]{1,6}(市|区).{2,8}(区|町|村)[^\s,()]{2,10}\d)/).map{ |m|
45
- line = m[0]
46
- line.gsub!(/住所(\s|\n)?/,"")
47
- line.gsub!(/〒\d{3}-\d{4} ?/,"")
48
- line.gsub!("[MAP]","")
49
- line.gsub!(/(TEL|FAX):\d{2,4}-\d{2,4}-\d{2,4}/,"")
50
- line
38
+ clean(m[0])
51
39
  }
52
40
  end
53
41
  addrs.select{ |a|
54
42
  !a.match(/を/)
55
43
  }
56
44
  end
45
+
46
+ private
47
+
48
+
49
+ def clean(line)
50
+ line.gsub!(/住所(\s|\n)?/,"")
51
+ line.gsub!(/〒\d{3}-\d{4} ?/,"")
52
+ line.gsub!(/\s+$/,"")
53
+ line.gsub!(/\s?電話:.+$/,"")
54
+ line.gsub!("[MAP]","")
55
+ line.gsub!(/(TEL|FAX):\d{2,4}-\d{2,4}-\d{2,4}/,"")
56
+ line.gsub!(/(\dー)*\d/) do |t|
57
+ t.tr("ー","-")
58
+ end
59
+ line
60
+ end
57
61
  end
58
62
 
59
63
 
data/spec/doko_spec.rb CHANGED
@@ -4,7 +4,6 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
4
4
  require 'open-uri'
5
5
 
6
6
  describe "Doko" do
7
-
8
7
  it "should retrieve address from string" do
9
8
  Doko.parse("ここは\n東京都港区芝浦3-4-1\nです").first.should == "東京都港区芝浦3-4-1"
10
9
  end
@@ -50,4 +49,9 @@ describe "Doko" do
50
49
  it do
51
50
  Doko.parse("http://atnd.org/events/28384").first.should == "東京都千代田区神田駿河台2-3 DH2001Bldg."
52
51
  end
52
+
53
+ it do
54
+ page = open("http://www.nttr.co.jp/corporate_profile/index.htm").read.encode("utf-8")
55
+ Doko.parse(page).first.should == "東京都港区芝浦3-4-1 グランパークタワー"
56
+ end
53
57
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doko
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -146,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
146
146
  version: '0'
147
147
  segments:
148
148
  - 0
149
- hash: 3862698864711078623
149
+ hash: 4235072000421726462
150
150
  required_rubygems_version: !ruby/object:Gem::Requirement
151
151
  none: false
152
152
  requirements: