doko 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,16 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "rspec", "~> 2.8.0"
10
+ gem "rdoc", "~> 3.12"
11
+ gem "bundler"
12
+ gem "jeweler", "~> 1.8.3"
13
+ gem "simplecov"
14
+ end
15
+
16
+ gem 'nokogiri'
data/Gemfile.lock ADDED
@@ -0,0 +1,39 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.3)
5
+ git (1.2.5)
6
+ jeweler (1.8.3)
7
+ bundler (~> 1.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+ rdoc
11
+ json (1.7.3)
12
+ multi_json (1.3.6)
13
+ nokogiri (1.5.2)
14
+ rake (0.9.2.2)
15
+ rdoc (3.12)
16
+ json (~> 1.4)
17
+ rspec (2.8.0)
18
+ rspec-core (~> 2.8.0)
19
+ rspec-expectations (~> 2.8.0)
20
+ rspec-mocks (~> 2.8.0)
21
+ rspec-core (2.8.0)
22
+ rspec-expectations (2.8.0)
23
+ diff-lcs (~> 1.1.2)
24
+ rspec-mocks (2.8.0)
25
+ simplecov (0.6.4)
26
+ multi_json (~> 1.0)
27
+ simplecov-html (~> 0.5.3)
28
+ simplecov-html (0.5.3)
29
+
30
+ PLATFORMS
31
+ ruby
32
+
33
+ DEPENDENCIES
34
+ bundler
35
+ jeweler (~> 1.8.3)
36
+ nokogiri
37
+ rdoc (~> 3.12)
38
+ rspec (~> 2.8.0)
39
+ simplecov
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Masaki Sawamura
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,41 @@
1
+ # doko
2
+
3
+ Japanese address retriever.
4
+
5
+ ## Usage
6
+
7
+ ```ruby
8
+ # from string
9
+ addrs = Doko.parse("..\n住所\n 東京都港区芝浦3-41 \n..\n...")
10
+ p addrs #=> ["東京都港区芝浦3-41"]
11
+
12
+ # from url
13
+ addrs = Doko.parse("http://r.tabelog.com/tokyo/A....")
14
+ p addrs #=> ["神奈川県横浜市中区.."]
15
+ ```
16
+
17
+ "doko?" means "where?" in japanese.
18
+
19
+
20
+ ## Install
21
+
22
+ ```
23
+ gem install doko
24
+ ```
25
+
26
+
27
+ ## Contributing to doko
28
+
29
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
30
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
31
+ * Fork the project.
32
+ * Start a feature/bugfix branch.
33
+ * Commit and push until you are happy with your contribution.
34
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
35
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
36
+
37
+ ## Copyright
38
+
39
+ Copyright (c) 2012 Masaki Sawamura. See LICENSE.txt for
40
+ further details.
41
+
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "doko"
18
+ gem.homepage = "http://github.com/sawamur/doko"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Japanese address retierver}
21
+ gem.description = %Q{Japanese address retierver}
22
+ gem.email = "masaki.sw@gmail.com"
23
+ gem.authors = ["Masaki Sawamura"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'rdoc/task'
42
+ Rake::RDocTask.new do |rdoc|
43
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "doko #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/doko.gemspec ADDED
@@ -0,0 +1,66 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "doko"
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Masaki Sawamura"]
12
+ s.date = "2012-06-07"
13
+ s.description = "Japanese address retierver"
14
+ s.email = "masaki.sw@gmail.com"
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.md"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".rspec",
22
+ "Gemfile",
23
+ "Gemfile.lock",
24
+ "LICENSE.txt",
25
+ "README.md",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "doko.gemspec",
29
+ "lib/doko.rb",
30
+ "spec/doko_spec.rb",
31
+ "spec/spec_helper.rb"
32
+ ]
33
+ s.homepage = "http://github.com/sawamur/doko"
34
+ s.licenses = ["MIT"]
35
+ s.require_paths = ["lib"]
36
+ s.rubygems_version = "1.8.24"
37
+ s.summary = "Japanese address retierver"
38
+
39
+ if s.respond_to? :specification_version then
40
+ s.specification_version = 3
41
+
42
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
43
+ s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
44
+ s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
45
+ s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
46
+ s.add_development_dependency(%q<bundler>, [">= 0"])
47
+ s.add_development_dependency(%q<jeweler>, ["~> 1.8.3"])
48
+ s.add_development_dependency(%q<simplecov>, [">= 0"])
49
+ else
50
+ s.add_dependency(%q<nokogiri>, [">= 0"])
51
+ s.add_dependency(%q<rspec>, ["~> 2.8.0"])
52
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
53
+ s.add_dependency(%q<bundler>, [">= 0"])
54
+ s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
55
+ s.add_dependency(%q<simplecov>, [">= 0"])
56
+ end
57
+ else
58
+ s.add_dependency(%q<nokogiri>, [">= 0"])
59
+ s.add_dependency(%q<rspec>, ["~> 2.8.0"])
60
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
61
+ s.add_dependency(%q<bundler>, [">= 0"])
62
+ s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
63
+ s.add_dependency(%q<simplecov>, [">= 0"])
64
+ end
65
+ end
66
+
data/lib/doko.rb ADDED
@@ -0,0 +1,51 @@
1
+ # -*- coding: utf-8 -*-
2
+ # -*- code:utf-8 -*-
3
+
4
+ require 'nokogiri'
5
+ require 'open-uri'
6
+ require 'uri'
7
+
8
+ class Doko
9
+ def self.parse(str)
10
+ self.new(str).parse
11
+ end
12
+
13
+ def initialize(str)
14
+ if str.match( /^#{URI.regexp}$/ )
15
+ str = open(str).read
16
+ end
17
+ @doc = Nokogiri::HTML(str)
18
+ end
19
+
20
+ def parse
21
+ body = (@doc/"body").text
22
+ body.tr!("0-9","0-9")
23
+ body.tr!("ー","-")
24
+ body.tr!("()","()")
25
+ body.tr!("、",",")
26
+
27
+ addrs = body.scan(/([^\s,()]{2,8}(都|道|府|県)[^\s,()]{1,8}(市|区|町|村).+)/).map{ |m|
28
+ line = m[0]
29
+ line.gsub!(/住所(\s|\n)?/,"")
30
+ line.gsub!(/〒\d{3}-\d{4} ?/,"")
31
+ line.gsub!(/\s+$/,"")
32
+ line.gsub!(/\s?電話:.+$/,"")
33
+ line
34
+ }
35
+ if addrs.empty?
36
+ addrs = body.scan(/([^\s]+(市|区).{2,8}(町|村).{2,10}\d)/).map{ |m|
37
+ line = m[0]
38
+ line.gsub!(/住所(\s|\n)?/,"")
39
+ line.gsub!(/〒\d{3}-\d{4} ?/,"")
40
+ line.gsub!("[MAP]","")
41
+ line.gsub!(/(TEL|FAX):\d{2,4}-\d{2,4}-\d{2,4}/,"")
42
+ line
43
+ }
44
+ end
45
+ addrs
46
+ end
47
+ end
48
+
49
+
50
+
51
+
data/spec/doko_spec.rb ADDED
@@ -0,0 +1,39 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
4
+ require 'open-uri'
5
+
6
+ describe "Doko" do
7
+
8
+ it "should return address" do
9
+ addrs = Doko.parse(open("http://r.tabelog.com/tokyo/A1304/A130401/13130066/").read)
10
+ addrs.first.should == "東京都新宿区新宿3-38-1 ルミネエスト7F"
11
+ end
12
+
13
+
14
+ it "should return addr from tabelog url" do
15
+ addrs = Doko.parse("http://r.tabelog.com/kanagawa/A1401/A140104/14001924/")
16
+ addrs.first.should == "神奈川県横浜市中区海岸通1-1"
17
+ end
18
+
19
+
20
+ it "should return from 30min" do
21
+ addrs = Doko.parse("http://30min.jp/place/23481")
22
+ addrs.first.should == "東京都墨田区業平1-21-4 第2刀川ビル1F"
23
+ end
24
+
25
+ it "should return addr from site 1" do
26
+ Doko.parse("http://thanikitchen.com/")[0] == "東京都品川区南大井6-11-10"
27
+ Doko.parse("http://thanikitchen.com/")[1] == "東京都品川区大井7-29-8"
28
+ end
29
+
30
+ it "should return addr in kyoto" do
31
+ addrs = Doko.parse("http://www.tripadvisor.jp/Hotel_Review-g298564-d2317992-Reviews-Royal_Park_Hotel_The_Kyoto-Kyoto_Kyoto_Prefecture_Kinki.html")
32
+ addrs.first.should == "京都府京都市中京区三条通河原町東入ル"
33
+ end
34
+
35
+ it "should return addr in kumamoto" do
36
+ addrs = Doko.parse("http://travel.rakuten.co.jp/HOTEL/68236/68236_std.html")
37
+ addrs.first.should == "熊本県阿蘇郡南阿蘇村河陽4673-18"
38
+ end
39
+ end
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'doko'
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,158 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: doko
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Masaki Sawamura
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-06-07 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 2.8.0
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 2.8.0
46
+ - !ruby/object:Gem::Dependency
47
+ name: rdoc
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '3.12'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '3.12'
62
+ - !ruby/object:Gem::Dependency
63
+ name: bundler
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: jeweler
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ~>
84
+ - !ruby/object:Gem::Version
85
+ version: 1.8.3
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: 1.8.3
94
+ - !ruby/object:Gem::Dependency
95
+ name: simplecov
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ description: Japanese address retierver
111
+ email: masaki.sw@gmail.com
112
+ executables: []
113
+ extensions: []
114
+ extra_rdoc_files:
115
+ - LICENSE.txt
116
+ - README.md
117
+ files:
118
+ - .document
119
+ - .rspec
120
+ - Gemfile
121
+ - Gemfile.lock
122
+ - LICENSE.txt
123
+ - README.md
124
+ - Rakefile
125
+ - VERSION
126
+ - doko.gemspec
127
+ - lib/doko.rb
128
+ - spec/doko_spec.rb
129
+ - spec/spec_helper.rb
130
+ homepage: http://github.com/sawamur/doko
131
+ licenses:
132
+ - MIT
133
+ post_install_message:
134
+ rdoc_options: []
135
+ require_paths:
136
+ - lib
137
+ required_ruby_version: !ruby/object:Gem::Requirement
138
+ none: false
139
+ requirements:
140
+ - - ! '>='
141
+ - !ruby/object:Gem::Version
142
+ version: '0'
143
+ segments:
144
+ - 0
145
+ hash: -4421121066746144535
146
+ required_rubygems_version: !ruby/object:Gem::Requirement
147
+ none: false
148
+ requirements:
149
+ - - ! '>='
150
+ - !ruby/object:Gem::Version
151
+ version: '0'
152
+ requirements: []
153
+ rubyforge_project:
154
+ rubygems_version: 1.8.24
155
+ signing_key:
156
+ specification_version: 3
157
+ summary: Japanese address retierver
158
+ test_files: []