doko 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +5 -0
- data/VERSION +1 -1
- data/doko.gemspec +2 -2
- data/lib/doko.rb +12 -6
- data/sample/Gemfile.lock +1 -1
- data/sample/views/index.erb +1 -0
- data/spec/doko_spec.rb +7 -0
- metadata +3 -3
data/README.md
CHANGED
@@ -12,8 +12,13 @@ p addrs #=> ["東京都港区芝浦3-41"]
|
|
12
12
|
# from url
|
13
13
|
addrs = Doko.parse("http://r.tabelog.com/tokyo/A....")
|
14
14
|
p addrs #=> ["神奈川県横浜市中区.."]
|
15
|
+
|
16
|
+
# from a web site
|
17
|
+
addrs = Doko.deep("http://foo-bar.com/")
|
18
|
+
p addrs #=> ["東京都.."] # this is from foo-bar.com/access/index.htm ,for example
|
15
19
|
```
|
16
20
|
|
21
|
+
|
17
22
|
"doko?" means "where?" in japanese.
|
18
23
|
|
19
24
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.1
|
data/doko.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "doko"
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Masaki Sawamura"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-07-05"
|
13
13
|
s.description = "retrieve japanese address line from a web page or a string"
|
14
14
|
s.email = "masaki.sw@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
data/lib/doko.rb
CHANGED
@@ -9,20 +9,25 @@ class Doko
|
|
9
9
|
self.new(str).parse
|
10
10
|
end
|
11
11
|
|
12
|
-
def self.deep(
|
13
|
-
addrs = parse(
|
12
|
+
def self.deep(str,base_uri=nil)
|
13
|
+
addrs = parse(str)
|
14
14
|
if addrs.empty?
|
15
|
-
addrs = links(
|
15
|
+
addrs = links(str,base_uri).map{ |u|
|
16
16
|
parse(u)
|
17
17
|
}.flatten
|
18
18
|
end
|
19
19
|
addrs
|
20
20
|
end
|
21
21
|
|
22
|
-
def self.links(
|
23
|
-
uri = URI.parse(url)
|
22
|
+
def self.links(str,base_uri=nil)
|
24
23
|
out = []
|
25
|
-
|
24
|
+
if str.match( /^#{URI.regexp}$/ )
|
25
|
+
uri = URI.parse(str)
|
26
|
+
doc = Nokogiri::HTML(open(uri).read)
|
27
|
+
elsif str.kind_of? String
|
28
|
+
uri = URI.parse(base_uri)
|
29
|
+
doc = Nokogiri::HTML(str)
|
30
|
+
end
|
26
31
|
doc.search("a").each do |a|
|
27
32
|
if a[:href] && a[:href].match(/access/) && !a[:href].match(/http/)
|
28
33
|
out << uri + a[:href]
|
@@ -81,6 +86,7 @@ class Doko
|
|
81
86
|
t.tr("ー","-")
|
82
87
|
end
|
83
88
|
line.sub!(/\s$/,"")
|
89
|
+
line.gsub!(/\s{3,}+.+$/,"")
|
84
90
|
line
|
85
91
|
end
|
86
92
|
end
|
data/sample/Gemfile.lock
CHANGED
data/sample/views/index.erb
CHANGED
data/spec/doko_spec.rb
CHANGED
@@ -6,6 +6,7 @@ require 'open-uri'
|
|
6
6
|
describe "Doko" do
|
7
7
|
|
8
8
|
describe "parse" do
|
9
|
+
|
9
10
|
it "should retrieve address from string" do
|
10
11
|
Doko.parse("ここは\n東京都港区芝浦3-4-1\nです").first.should == "東京都港区芝浦3-4-1"
|
11
12
|
end
|
@@ -71,6 +72,12 @@ describe "Doko" do
|
|
71
72
|
Doko.deep("http://www.risonare-atami.com/").first.should == "静岡県熱海市水口町2-13-1"
|
72
73
|
end
|
73
74
|
|
75
|
+
it "can pase from page html" do
|
76
|
+
url = "http://www.risonare-atami.com/"
|
77
|
+
page = open(url).read
|
78
|
+
Doko.deep(page,url).first.should == "静岡県熱海市水口町2-13-1"
|
79
|
+
end
|
80
|
+
|
74
81
|
it do
|
75
82
|
Doko.deep("http://www.yokohama-akarenga.jp/index.html").should be_empty
|
76
83
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: doko
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-07-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -146,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
146
146
|
version: '0'
|
147
147
|
segments:
|
148
148
|
- 0
|
149
|
-
hash:
|
149
|
+
hash: 3043947709731025530
|
150
150
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
151
|
none: false
|
152
152
|
requirements:
|