doko 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +5 -0
- data/VERSION +1 -1
- data/doko.gemspec +2 -2
- data/lib/doko.rb +12 -6
- data/sample/Gemfile.lock +1 -1
- data/sample/views/index.erb +1 -0
- data/spec/doko_spec.rb +7 -0
- metadata +3 -3
data/README.md
CHANGED
@@ -12,8 +12,13 @@ p addrs #=> ["東京都港区芝浦3-41"]
|
|
12
12
|
# from url
|
13
13
|
addrs = Doko.parse("http://r.tabelog.com/tokyo/A....")
|
14
14
|
p addrs #=> ["神奈川県横浜市中区.."]
|
15
|
+
|
16
|
+
# from a web site
|
17
|
+
addrs = Doko.deep("http://foo-bar.com/")
|
18
|
+
p addrs #=> ["東京都.."] # this is from foo-bar.com/access/index.htm ,for example
|
15
19
|
```
|
16
20
|
|
21
|
+
|
17
22
|
"doko?" means "where?" in japanese.
|
18
23
|
|
19
24
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.1
|
data/doko.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "doko"
|
8
|
-
s.version = "0.2.
|
8
|
+
s.version = "0.2.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Masaki Sawamura"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-07-05"
|
13
13
|
s.description = "retrieve japanese address line from a web page or a string"
|
14
14
|
s.email = "masaki.sw@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
data/lib/doko.rb
CHANGED
@@ -9,20 +9,25 @@ class Doko
|
|
9
9
|
self.new(str).parse
|
10
10
|
end
|
11
11
|
|
12
|
-
def self.deep(
|
13
|
-
addrs = parse(
|
12
|
+
def self.deep(str,base_uri=nil)
|
13
|
+
addrs = parse(str)
|
14
14
|
if addrs.empty?
|
15
|
-
addrs = links(
|
15
|
+
addrs = links(str,base_uri).map{ |u|
|
16
16
|
parse(u)
|
17
17
|
}.flatten
|
18
18
|
end
|
19
19
|
addrs
|
20
20
|
end
|
21
21
|
|
22
|
-
def self.links(
|
23
|
-
uri = URI.parse(url)
|
22
|
+
def self.links(str,base_uri=nil)
|
24
23
|
out = []
|
25
|
-
|
24
|
+
if str.match( /^#{URI.regexp}$/ )
|
25
|
+
uri = URI.parse(str)
|
26
|
+
doc = Nokogiri::HTML(open(uri).read)
|
27
|
+
elsif str.kind_of? String
|
28
|
+
uri = URI.parse(base_uri)
|
29
|
+
doc = Nokogiri::HTML(str)
|
30
|
+
end
|
26
31
|
doc.search("a").each do |a|
|
27
32
|
if a[:href] && a[:href].match(/access/) && !a[:href].match(/http/)
|
28
33
|
out << uri + a[:href]
|
@@ -81,6 +86,7 @@ class Doko
|
|
81
86
|
t.tr("ー","-")
|
82
87
|
end
|
83
88
|
line.sub!(/\s$/,"")
|
89
|
+
line.gsub!(/\s{3,}+.+$/,"")
|
84
90
|
line
|
85
91
|
end
|
86
92
|
end
|
data/sample/Gemfile.lock
CHANGED
data/sample/views/index.erb
CHANGED
data/spec/doko_spec.rb
CHANGED
@@ -6,6 +6,7 @@ require 'open-uri'
|
|
6
6
|
describe "Doko" do
|
7
7
|
|
8
8
|
describe "parse" do
|
9
|
+
|
9
10
|
it "should retrieve address from string" do
|
10
11
|
Doko.parse("ここは\n東京都港区芝浦3-4-1\nです").first.should == "東京都港区芝浦3-4-1"
|
11
12
|
end
|
@@ -71,6 +72,12 @@ describe "Doko" do
|
|
71
72
|
Doko.deep("http://www.risonare-atami.com/").first.should == "静岡県熱海市水口町2-13-1"
|
72
73
|
end
|
73
74
|
|
75
|
+
it "can pase from page html" do
|
76
|
+
url = "http://www.risonare-atami.com/"
|
77
|
+
page = open(url).read
|
78
|
+
Doko.deep(page,url).first.should == "静岡県熱海市水口町2-13-1"
|
79
|
+
end
|
80
|
+
|
74
81
|
it do
|
75
82
|
Doko.deep("http://www.yokohama-akarenga.jp/index.html").should be_empty
|
76
83
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: doko
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-07-05 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -146,7 +146,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
146
146
|
version: '0'
|
147
147
|
segments:
|
148
148
|
- 0
|
149
|
-
hash:
|
149
|
+
hash: 3043947709731025530
|
150
150
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
151
|
none: false
|
152
152
|
requirements:
|