swissmatch-street 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,159 @@
1
+ # encoding: utf-8
2
+
3
+
4
+
5
+ require 'swissmatch/street/version'
6
+
7
+
8
+
9
+ # From SwissMatch::Street
10
+ # Parse and handle street names and numbers.
11
+ #
12
+ # @note
13
+ # All strings passed to SwissMatch are expected to be utf-8. All strings
14
+ # returned by SwissMatch are also in utf-8.
15
+ #
16
+ module SwissMatch
17
+
18
+ # Street
19
+ # Parse and handle street names and numbers.
20
+ class Street
21
+ # House number formats:
22
+ # '12'
23
+ # '12b'
24
+ # '12bis', '12BIS', '12Bis', '12 bis', … - see development/Noteworthy_things.txt
25
+ # '12 B'
26
+ # '12/14'
27
+ # '105-107'
28
+ # '16/2/22'
29
+ # '8-10-12'
30
+ # '16-2/22'
31
+ # '16/2-22'
32
+ HouseNumber = /\d+(?:\x20?\w{1,2}|(?:\/\d+|-\d+\w{1,2})*)/
33
+ FrontHouseNumber = /\d+(?:\x20?(?!ch|bd|av)\w{1,2}|(?:\/\d+|-\d+\w{1,2})*)/i
34
+ MatchStreetAndStreetNumberGerman = /\A(.*?)(?:(?: |, ?)(#{HouseNumber}))\z/
35
+ MatchStreetAndStreetNumberFrench = /\A(?:(#{FrontHouseNumber}), )(.*?)\z/
36
+ MatchBrokenStreetAndNumberGerman = /\A(.*?)(?:(#{HouseNumber}))\z/
37
+ MatchBrokenStreetAndNumberFrench = /\A(?:(#{FrontHouseNumber})[.,]| )(.*?)\z/
38
+
39
+ # Abbreviations that can be expanded
40
+ Replacements = {
41
+ 'ch' => 'Chemin',
42
+ 'chem' => 'Chemin',
43
+ 'rte' => 'Route',
44
+ 'bd' => 'Boulevard',
45
+ 'av' => 'Avenue',
46
+ 'ave' => 'Avenue',
47
+ 'str' => 'strasse',
48
+ 'mte' => 'Monte',
49
+ 's' => 'san',
50
+ }
51
+ # Detect the abbreviations to expand
52
+ ReplacementsMatch = /\b(?:ch|chem|rte|bd|ave?|mte)(?:\.|\b)|\bs(?:\.|\b)(?!$)|\Bstr(?:\.|\b)/i
53
+
54
+
55
+ NoCapitalize = {
56
+ 'auf' => 'auf',
57
+ 'uf' => 'uf', # hurray for swiss german street names :D
58
+ 'em' => 'em',
59
+ 'der' => 'der',
60
+ 'die' => 'die',
61
+ 'das' => 'das',
62
+ 'von' => 'von',
63
+ 'nach' => 'nach',
64
+ 'im' => 'im',
65
+ 'in' => 'in',
66
+ 'zum' => 'zum',
67
+ 'zur' => 'zur',
68
+ 'unteren' => 'unteren',
69
+ 'oberen' => 'oberen',
70
+
71
+ 'd' => 'd',
72
+ 'de' => 'de',
73
+ 'des' => 'des',
74
+ 'du' => 'du',
75
+ 'l' => 'l',
76
+ 'le' => 'le',
77
+ 'la' => 'la',
78
+ 'les' => 'les',
79
+ 'vers' => 'vers',
80
+
81
+ 'il' => 'il',
82
+ 'dei' => 'dei',
83
+ 'di' => 'di',
84
+ 'delle' => 'delle',
85
+ 'della' => 'della',
86
+ 'al' => 'al',
87
+ 'alla' => 'alla',
88
+ 'alle' => 'alle',
89
+ 'ai' => 'ai',
90
+ }
91
+
92
+ def self.normalize_street(street)
93
+ return '' unless street
94
+
95
+ street.strip.
96
+ squeeze(' ').
97
+ gsub(/\s*-\s*/, '-').
98
+ gsub(/\A(#{FrontHouseNumber}) /, '\1, '). # '24 Rue Baulacre' => '24, Rue Baulacre' - but not '24 bd blabla' -> '24 Boulevard, Blabla'
99
+ gsub(/\s*([.,])(?=\S)/, '\1 '). # '283,Rte.de Meyrin' => '283, Rte. de Meyrin; '283 ,Foo' => '283, Foo'
100
+ gsub(ReplacementsMatch) { |m| # ch., chem., str. etc. => chemin, strasse etc.
101
+ Replacements[m.downcase.chomp('.')]
102
+ }.
103
+ gsub(/\s*n°\s*/, ' ')
104
+ end
105
+
106
+ def self.normalize_name(name)
107
+ name.
108
+ gsub(/\b[\p{Letter}\p{Mark}\p{Connector_Punctuation}]{2,}\b/) { |word| NoCapitalize.fetch(word.downcase) { word.capitalize } }.
109
+ sub(/\b[\p{Letter}\p{Mark}\p{Connector_Punctuation}]{2,}\b/) { |word| word.capitalize } # [\p{Letter}\p{Mark}\p{Connector_Punctuation}] is \p{Word} without digits
110
+ end
111
+
112
+ def self.normalize_number(number)
113
+ return unless number
114
+ normalized = number.downcase.delete('^0-9a-z/()-')
115
+
116
+ normalized.empty? ? nil : normalized
117
+ end
118
+
119
+ def self.parse(street, normalize=false)
120
+ normalized = normalize_street(street)
121
+ name, number, pos = case normalized
122
+ when MatchStreetAndStreetNumberGerman then [$1, $2, :end]
123
+ when MatchStreetAndStreetNumberFrench then [$2, $1, :begin]
124
+ when MatchBrokenStreetAndNumberGerman then [$1, $2, :end]
125
+ when MatchBrokenStreetAndNumberFrench then [$2, $1, :begin]
126
+ else [normalized, nil, nil]
127
+ end
128
+ name = normalize_name(name) if normalize
129
+ number = normalize_number(number) if normalize
130
+
131
+ new(name, number, pos, street)
132
+ end
133
+
134
+ attr_reader :original, :name, :number, :number_position, :full
135
+
136
+ def initialize(name, number=nil, number_position=:end, original=nil)
137
+ @name = name
138
+ @number = number
139
+ @number_position = number_position
140
+ @original = original
141
+ @full = case number_position
142
+ when :end then [name, number].compact.join(" ")
143
+ when :begin then [number, name].compact.join(", ")
144
+ when nil then name.dup
145
+ else raise ArgumentError, "Invalid value for number_position: #{number_position.inspect}"
146
+ end
147
+ end
148
+
149
+ def original_or_full
150
+ @original || @full
151
+ end
152
+
153
+ alias to_s full
154
+
155
+ def inspect
156
+ "#<Street #{self}>"
157
+ end
158
+ end
159
+ end
@@ -0,0 +1,15 @@
1
+ # encoding: utf-8
2
+
3
+ begin
4
+ require 'rubygems/version' # newer rubygems use this
5
+ rescue LoadError
6
+ require 'gem/version' # older rubygems use this
7
+ end
8
+
9
+ module SwissMatch
10
+ class Street
11
+
12
+ # The version of the swissmatch-street gem.
13
+ Version = Gem::Version.new("0.0.1")
14
+ end
15
+ end
@@ -0,0 +1,38 @@
1
+ # encoding: utf-8
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = "swissmatch-street"
5
+ s.version = "0.0.1"
6
+ s.authors = "Stefan Rusterholz"
7
+ s.email = "stefan.rusterholz@gmail.com"
8
+ s.homepage = "http://github.com/apeiros/swissmatch-street"
9
+
10
+ s.description = <<-DESCRIPTION.gsub(/^ /, '').chomp
11
+ Parse, tokenize, analyze, repair and handle swiss street names and numbers.
12
+ DESCRIPTION
13
+ s.summary = <<-SUMMARY.gsub(/^ /, '').chomp
14
+ Parse, tokenize, analyze, repair and handle swiss street names and numbers.
15
+ SUMMARY
16
+
17
+ s.files =
18
+ Dir['bin/**/*'] +
19
+ Dir['data/**/*'] +
20
+ Dir['lib/**/*'] +
21
+ Dir['rake/**/*'] +
22
+ Dir['test/**/*'] +
23
+ Dir['*.gemspec'] +
24
+ %w[
25
+ LICENSE.txt
26
+ Rakefile
27
+ README.markdown
28
+ ]
29
+
30
+ if File.directory?('bin') then
31
+ executables = Dir.chdir('bin') { Dir.glob('**/*').select { |f| File.executable?(f) } }
32
+ s.executables = executables unless executables.empty?
33
+ end
34
+
35
+ s.required_rubygems_version = Gem::Requirement.new("> 1.3.1")
36
+ s.rubygems_version = "1.3.1"
37
+ s.specification_version = 3
38
+ end
@@ -0,0 +1,31 @@
1
+ require 'stringio'
2
+
3
+ module TestSuite
4
+ attr_accessor :name
5
+ end
6
+
7
+ module Kernel
8
+ def suite(name, &block)
9
+ klass = Class.new(Test::Unit::TestCase, &block)
10
+ klass.extend TestSuite
11
+ klass.name = "Suite #{name}"
12
+
13
+ klass
14
+ end
15
+ module_function :suite
16
+ end
17
+
18
+ class Test::Unit::TestCase
19
+ def self.test(desc, &impl)
20
+ define_method("test #{desc}", &impl)
21
+ end
22
+
23
+ def capture_stdout
24
+ captured = StringIO.new
25
+ $stdout = captured
26
+ yield
27
+ captured.string
28
+ ensure
29
+ $stdout = STDOUT
30
+ end
31
+ end
@@ -0,0 +1,20 @@
1
+ # run with `ruby test/runner.rb`
2
+ # if you only want to run a single test-file: `ruby test/runner.rb testfile.rb`
3
+
4
+ $LOAD_PATH << File.expand_path('../../lib', __FILE__)
5
+ $LOAD_PATH << File.expand_path('../../test/lib', __FILE__)
6
+ TEST_DIR = File.expand_path('../../test', __FILE__)
7
+
8
+ require 'test/unit'
9
+ require 'helper'
10
+
11
+ if ENV['COVERAGE']
12
+ require 'simplecov'
13
+ SimpleCov.start
14
+ end
15
+
16
+ units = ARGV.empty? ? Dir["#{TEST_DIR}/unit/**/*.rb"] : ARGV
17
+
18
+ units.each do |unit|
19
+ load unit
20
+ end
@@ -0,0 +1,102 @@
1
+ # encoding: utf-8
2
+
3
+ require 'swissmatch/street'
4
+ include SwissMatch
5
+
6
+ suite "Street" do
7
+ test "Street.new with original" do
8
+ original = ' Beispielstrasse 15 '
9
+ street_name = 'Beispielstrasse'
10
+ street_number = '15'
11
+ number_position = :end
12
+
13
+ street = Street.new(street_name, street_number, number_position, original)
14
+
15
+ assert_equal street_name, street.name
16
+ assert_equal street_number, street.number
17
+ assert_equal number_position, street.number_position
18
+ assert_equal original, street.original
19
+ assert_equal 'Beispielstrasse 15', street.full
20
+ assert_equal original, street.original_or_full
21
+ end
22
+
23
+ test "Street.new without original" do
24
+ street_name = 'Beispielstrasse'
25
+ street_number = '15'
26
+ number_position = :end
27
+
28
+ street = Street.new(street_name, street_number, number_position)
29
+
30
+ assert_equal street_name, street.name
31
+ assert_equal street_number, street.number
32
+ assert_equal number_position, street.number_position
33
+ assert_equal nil, street.original
34
+ assert_equal 'Beispielstrasse 15', street.full
35
+ assert_equal 'Beispielstrasse 15', street.original_or_full
36
+ end
37
+
38
+ [
39
+ '12',
40
+ '12b',
41
+ '12 B',
42
+ '12/14',
43
+ '16/2/22',
44
+ '105-107',
45
+ '12/A',
46
+ ].each do |number|
47
+ test "House number #{number.inspect}" do
48
+ assert Street::HouseNumber =~ number
49
+ end
50
+ end
51
+
52
+ {
53
+ %{12,Rue Quelquechose} => %{24, Rue Quelquechose},
54
+ %{12/345 Foo} => %{12/345, Foo},
55
+ %{1A ch. des Choses} => %{1A, Chemin des Choses},
56
+ %{ Beispielstr. 15 } => %{Beispielstrasse 15},
57
+ %{12b,Rte. d'Anywhere} => %{25b, Route d'Anywhere},
58
+ %{1, Av. de Blé} => %{1, Avenue de Blé},
59
+ }.each do |original, expected|
60
+ test "Street.normalize_street #{original.inspect}, false" do
61
+ actual = Street.normalize_street(original)
62
+
63
+ assert_equal expected, actual
64
+ end
65
+ end
66
+
67
+ {
68
+ %{Beispielstrasse 15} => ['Beispielstrasse', '15', :end, 'Beispielstrasse 15'],
69
+ %{Beispielstrasse 45/4} => ['Beispielstrasse', '45/4', :end, 'Beispielstrasse 45/4'],
70
+ %{24,Rue Example} => [%{Rue Example}, '24', :begin, %{24, Rue Example}],
71
+ %{70/141 Example} => [%{Example}, '70/141', :begin, %{70/141, Example}],
72
+ %{6A ch. des Cornillons} => [%{Chemin des Cornillons}, '6a', :begin, %{6a, Chemin des Cornillons}],
73
+ %{ Beispielstr. 15 } => [%{Beispielstrasse}, '15', :end, %{Beispielstrasse 15}],
74
+ %{25b,Rte. d'Yverdon} => [%{Route d'Yverdon}, '25b', :begin, %{25b, Route d'Yverdon}],
75
+ %{6, Av. de Budé} => [%{Avenue de Budé}, '6', :begin, %{6, Avenue de Budé}],
76
+ %{Rue Ferdinand Hodler,19} => [%{Rue Ferdinand Hodler}, '19', :end, %{Rue Ferdinand Hodler 19}],
77
+ %{Burgstrasse37} => [%{Burgstrasse}, '37', :end, %{Burgstrasse 37}],
78
+ %{Ave. de Casino 8-10-12} => [%{Avenue de Casino}, '8-10-12', :end, %{Avenue de Casino 8-10-12}],
79
+ %{Ave Bel- Air 49 B} => [%{Avenue Bel-Air}, '49b', :end, %{Avenue Bel-Air 49b}],
80
+ %{39 rue Louis Faure} => [%{Rue Louis Faure}, '39', :begin, %{39, Rue Louis Faure}],
81
+ %{Rte d'Alle 13} => [%{Route d'Alle}, '13', :end, %{Route d'Alle 13}],
82
+ %{Via Filagni, 2/a} => [%{Via Filagni}, '2/a', :end, %{Via Filagni 2/a}],
83
+ %{Riehenring 189/A} => [%{Riehenring}, '189a', :end, %{Riehenring 189a}],
84
+ %{Lorraine 12c/9} => [%{Lorraine}, '12c/9', :end, %{Lorraine 12c/9}],
85
+ %{Lwaldmannstrasse 67 / J2} => [%{Lwaldmannstrasse}, '189a', :end, %{Lwaldmannstrasse 67/j2}],
86
+ %{Kaysersbergerstrasse 56/3.} => [%{Kaysersbergerstrasse}, '56/3', :end, %{Kaysersbergerstrasse 56/3}],
87
+ %{Rue Montfalcon 2bis} => [%{Rue Montfalcon}, '2bis', :end, %{Rue Montfalcon 2bis}],
88
+ %{Rue Montfalcon 2 bis} => [%{Rue Montfalcon}, '2bis', :end, %{Rue Montfalcon 2bis}],
89
+ %{Elsässerstrasse 261-4} => [%{Elsässerstrasse}, '261-4', :end, %{Elsässerstrasse 261-4}],
90
+ }.each do |original, (street_name, street_number, number_position, full)|
91
+ test "Street.parse #{original.inspect}" do
92
+ street = Street.parse(original, true)
93
+
94
+ assert_equal street_name, street.name
95
+ assert_equal street_number, street.number
96
+ assert_equal number_position, street.number_position
97
+ assert_equal original, street.original
98
+ assert_equal full, street.full
99
+ assert_equal original, street.original_or_full
100
+ end
101
+ end
102
+ end
metadata ADDED
@@ -0,0 +1,55 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: swissmatch-street
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Stefan Rusterholz
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-08 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Parse, tokenize, analyze, repair and handle swiss street names and numbers.
15
+ email: stefan.rusterholz@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - data/sty21072012.txt
21
+ - lib/swissmatch/street/version.rb
22
+ - lib/swissmatch/street.rb
23
+ - test/lib/helper.rb
24
+ - test/runner.rb
25
+ - test/unit/lib/swissmatch/street.rb
26
+ - swissmatch-street.gemspec
27
+ - LICENSE.txt
28
+ - Rakefile
29
+ - README.markdown
30
+ homepage: http://github.com/apeiros/swissmatch-street
31
+ licenses: []
32
+ post_install_message:
33
+ rdoc_options: []
34
+ require_paths:
35
+ - lib
36
+ required_ruby_version: !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ! '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ required_rubygems_version: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>'
46
+ - !ruby/object:Gem::Version
47
+ version: 1.3.1
48
+ requirements: []
49
+ rubyforge_project:
50
+ rubygems_version: 1.8.24
51
+ signing_key:
52
+ specification_version: 3
53
+ summary: Parse, tokenize, analyze, repair and handle swiss street names and numbers.
54
+ test_files: []
55
+ has_rdoc: