swissmatch-street 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,159 @@
1
+ # encoding: utf-8
2
+
3
+
4
+
5
+ require 'swissmatch/street/version'
6
+
7
+
8
+
9
+ # From SwissMatch::Street
10
+ # Parse and handle street names and numbers.
11
+ #
12
+ # @note
13
+ # All strings passed to SwissMatch are expected to be utf-8. All strings
14
+ # returned by SwissMatch are also in utf-8.
15
+ #
16
+ module SwissMatch
17
+
18
+ # Street
19
+ # Parse and handle street names and numbers.
20
+ class Street
21
+ # House number formats:
22
+ # '12'
23
+ # '12b'
24
+ # '12bis', '12BIS', '12Bis', '12 bis', … - see development/Noteworthy_things.txt
25
+ # '12 B'
26
+ # '12/14'
27
+ # '105-107'
28
+ # '16/2/22'
29
+ # '8-10-12'
30
+ # '16-2/22'
31
+ # '16/2-22'
32
+ HouseNumber = /\d+(?:\x20?\w{1,2}|(?:\/\d+|-\d+\w{1,2})*)/
33
+ FrontHouseNumber = /\d+(?:\x20?(?!ch|bd|av)\w{1,2}|(?:\/\d+|-\d+\w{1,2})*)/i
34
+ MatchStreetAndStreetNumberGerman = /\A(.*?)(?:(?: |, ?)(#{HouseNumber}))\z/
35
+ MatchStreetAndStreetNumberFrench = /\A(?:(#{FrontHouseNumber}), )(.*?)\z/
36
+ MatchBrokenStreetAndNumberGerman = /\A(.*?)(?:(#{HouseNumber}))\z/
37
+ MatchBrokenStreetAndNumberFrench = /\A(?:(#{FrontHouseNumber})[.,]| )(.*?)\z/
38
+
39
+ # Abbreviations that can be expanded
40
+ Replacements = {
41
+ 'ch' => 'Chemin',
42
+ 'chem' => 'Chemin',
43
+ 'rte' => 'Route',
44
+ 'bd' => 'Boulevard',
45
+ 'av' => 'Avenue',
46
+ 'ave' => 'Avenue',
47
+ 'str' => 'strasse',
48
+ 'mte' => 'Monte',
49
+ 's' => 'san',
50
+ }
51
+ # Detect the abbreviations to expand
52
+ ReplacementsMatch = /\b(?:ch|chem|rte|bd|ave?|mte)(?:\.|\b)|\bs(?:\.|\b)(?!$)|\Bstr(?:\.|\b)/i
53
+
54
+
55
+ NoCapitalize = {
56
+ 'auf' => 'auf',
57
+ 'uf' => 'uf', # hurray for swiss german street names :D
58
+ 'em' => 'em',
59
+ 'der' => 'der',
60
+ 'die' => 'die',
61
+ 'das' => 'das',
62
+ 'von' => 'von',
63
+ 'nach' => 'nach',
64
+ 'im' => 'im',
65
+ 'in' => 'in',
66
+ 'zum' => 'zum',
67
+ 'zur' => 'zur',
68
+ 'unteren' => 'unteren',
69
+ 'oberen' => 'oberen',
70
+
71
+ 'd' => 'd',
72
+ 'de' => 'de',
73
+ 'des' => 'des',
74
+ 'du' => 'du',
75
+ 'l' => 'l',
76
+ 'le' => 'le',
77
+ 'la' => 'la',
78
+ 'les' => 'les',
79
+ 'vers' => 'vers',
80
+
81
+ 'il' => 'il',
82
+ 'dei' => 'dei',
83
+ 'di' => 'di',
84
+ 'delle' => 'delle',
85
+ 'della' => 'della',
86
+ 'al' => 'al',
87
+ 'alla' => 'alla',
88
+ 'alle' => 'alle',
89
+ 'ai' => 'ai',
90
+ }
91
+
92
+ def self.normalize_street(street)
93
+ return '' unless street
94
+
95
+ street.strip.
96
+ squeeze(' ').
97
+ gsub(/\s*-\s*/, '-').
98
+ gsub(/\A(#{FrontHouseNumber}) /, '\1, '). # '24 Rue Baulacre' => '24, Rue Baulacre' - but not '24 bd blabla' -> '24 Boulevard, Blabla'
99
+ gsub(/\s*([.,])(?=\S)/, '\1 '). # '283,Rte.de Meyrin' => '283, Rte. de Meyrin; '283 ,Foo' => '283, Foo'
100
+ gsub(ReplacementsMatch) { |m| # ch., chem., str. etc. => chemin, strasse etc.
101
+ Replacements[m.downcase.chomp('.')]
102
+ }.
103
+ gsub(/\s*n°\s*/, ' ')
104
+ end
105
+
106
+ def self.normalize_name(name)
107
+ name.
108
+ gsub(/\b[\p{Letter}\p{Mark}\p{Connector_Punctuation}]{2,}\b/) { |word| NoCapitalize.fetch(word.downcase) { word.capitalize } }.
109
+ sub(/\b[\p{Letter}\p{Mark}\p{Connector_Punctuation}]{2,}\b/) { |word| word.capitalize } # [\p{Letter}\p{Mark}\p{Connector_Punctuation}] is \p{Word} without digits
110
+ end
111
+
112
+ def self.normalize_number(number)
113
+ return unless number
114
+ normalized = number.downcase.delete('^0-9a-z/()-')
115
+
116
+ normalized.empty? ? nil : normalized
117
+ end
118
+
119
+ def self.parse(street, normalize=false)
120
+ normalized = normalize_street(street)
121
+ name, number, pos = case normalized
122
+ when MatchStreetAndStreetNumberGerman then [$1, $2, :end]
123
+ when MatchStreetAndStreetNumberFrench then [$2, $1, :begin]
124
+ when MatchBrokenStreetAndNumberGerman then [$1, $2, :end]
125
+ when MatchBrokenStreetAndNumberFrench then [$2, $1, :begin]
126
+ else [normalized, nil, nil]
127
+ end
128
+ name = normalize_name(name) if normalize
129
+ number = normalize_number(number) if normalize
130
+
131
+ new(name, number, pos, street)
132
+ end
133
+
134
+ attr_reader :original, :name, :number, :number_position, :full
135
+
136
+ def initialize(name, number=nil, number_position=:end, original=nil)
137
+ @name = name
138
+ @number = number
139
+ @number_position = number_position
140
+ @original = original
141
+ @full = case number_position
142
+ when :end then [name, number].compact.join(" ")
143
+ when :begin then [number, name].compact.join(", ")
144
+ when nil then name.dup
145
+ else raise ArgumentError, "Invalid value for number_position: #{number_position.inspect}"
146
+ end
147
+ end
148
+
149
+ def original_or_full
150
+ @original || @full
151
+ end
152
+
153
+ alias to_s full
154
+
155
+ def inspect
156
+ "#<Street #{self}>"
157
+ end
158
+ end
159
+ end
@@ -0,0 +1,15 @@
1
+ # encoding: utf-8
2
+
3
+ begin
4
+ require 'rubygems/version' # newer rubygems use this
5
+ rescue LoadError
6
+ require 'gem/version' # older rubygems use this
7
+ end
8
+
9
+ module SwissMatch
10
+ class Street
11
+
12
+ # The version of the swissmatch-street gem.
13
+ Version = Gem::Version.new("0.0.1")
14
+ end
15
+ end
@@ -0,0 +1,38 @@
1
+ # encoding: utf-8
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = "swissmatch-street"
5
+ s.version = "0.0.1"
6
+ s.authors = "Stefan Rusterholz"
7
+ s.email = "stefan.rusterholz@gmail.com"
8
+ s.homepage = "http://github.com/apeiros/swissmatch-street"
9
+
10
+ s.description = <<-DESCRIPTION.gsub(/^ /, '').chomp
11
+ Parse, tokenize, analyze, repair and handle swiss street names and numbers.
12
+ DESCRIPTION
13
+ s.summary = <<-SUMMARY.gsub(/^ /, '').chomp
14
+ Parse, tokenize, analyze, repair and handle swiss street names and numbers.
15
+ SUMMARY
16
+
17
+ s.files =
18
+ Dir['bin/**/*'] +
19
+ Dir['data/**/*'] +
20
+ Dir['lib/**/*'] +
21
+ Dir['rake/**/*'] +
22
+ Dir['test/**/*'] +
23
+ Dir['*.gemspec'] +
24
+ %w[
25
+ LICENSE.txt
26
+ Rakefile
27
+ README.markdown
28
+ ]
29
+
30
+ if File.directory?('bin') then
31
+ executables = Dir.chdir('bin') { Dir.glob('**/*').select { |f| File.executable?(f) } }
32
+ s.executables = executables unless executables.empty?
33
+ end
34
+
35
+ s.required_rubygems_version = Gem::Requirement.new("> 1.3.1")
36
+ s.rubygems_version = "1.3.1"
37
+ s.specification_version = 3
38
+ end
@@ -0,0 +1,31 @@
1
+ require 'stringio'
2
+
3
+ module TestSuite
4
+ attr_accessor :name
5
+ end
6
+
7
+ module Kernel
8
+ def suite(name, &block)
9
+ klass = Class.new(Test::Unit::TestCase, &block)
10
+ klass.extend TestSuite
11
+ klass.name = "Suite #{name}"
12
+
13
+ klass
14
+ end
15
+ module_function :suite
16
+ end
17
+
18
+ class Test::Unit::TestCase
19
+ def self.test(desc, &impl)
20
+ define_method("test #{desc}", &impl)
21
+ end
22
+
23
+ def capture_stdout
24
+ captured = StringIO.new
25
+ $stdout = captured
26
+ yield
27
+ captured.string
28
+ ensure
29
+ $stdout = STDOUT
30
+ end
31
+ end
@@ -0,0 +1,20 @@
1
+ # run with `ruby test/runner.rb`
2
+ # if you only want to run a single test-file: `ruby test/runner.rb testfile.rb`
3
+
4
+ $LOAD_PATH << File.expand_path('../../lib', __FILE__)
5
+ $LOAD_PATH << File.expand_path('../../test/lib', __FILE__)
6
+ TEST_DIR = File.expand_path('../../test', __FILE__)
7
+
8
+ require 'test/unit'
9
+ require 'helper'
10
+
11
+ if ENV['COVERAGE']
12
+ require 'simplecov'
13
+ SimpleCov.start
14
+ end
15
+
16
+ units = ARGV.empty? ? Dir["#{TEST_DIR}/unit/**/*.rb"] : ARGV
17
+
18
+ units.each do |unit|
19
+ load unit
20
+ end
@@ -0,0 +1,102 @@
1
+ # encoding: utf-8
2
+
3
+ require 'swissmatch/street'
4
+ include SwissMatch
5
+
6
+ suite "Street" do
7
+ test "Street.new with original" do
8
+ original = ' Beispielstrasse 15 '
9
+ street_name = 'Beispielstrasse'
10
+ street_number = '15'
11
+ number_position = :end
12
+
13
+ street = Street.new(street_name, street_number, number_position, original)
14
+
15
+ assert_equal street_name, street.name
16
+ assert_equal street_number, street.number
17
+ assert_equal number_position, street.number_position
18
+ assert_equal original, street.original
19
+ assert_equal 'Beispielstrasse 15', street.full
20
+ assert_equal original, street.original_or_full
21
+ end
22
+
23
+ test "Street.new without original" do
24
+ street_name = 'Beispielstrasse'
25
+ street_number = '15'
26
+ number_position = :end
27
+
28
+ street = Street.new(street_name, street_number, number_position)
29
+
30
+ assert_equal street_name, street.name
31
+ assert_equal street_number, street.number
32
+ assert_equal number_position, street.number_position
33
+ assert_equal nil, street.original
34
+ assert_equal 'Beispielstrasse 15', street.full
35
+ assert_equal 'Beispielstrasse 15', street.original_or_full
36
+ end
37
+
38
+ [
39
+ '12',
40
+ '12b',
41
+ '12 B',
42
+ '12/14',
43
+ '16/2/22',
44
+ '105-107',
45
+ '12/A',
46
+ ].each do |number|
47
+ test "House number #{number.inspect}" do
48
+ assert Street::HouseNumber =~ number
49
+ end
50
+ end
51
+
52
+ {
53
+ %{12,Rue Quelquechose} => %{24, Rue Quelquechose},
54
+ %{12/345 Foo} => %{12/345, Foo},
55
+ %{1A ch. des Choses} => %{1A, Chemin des Choses},
56
+ %{ Beispielstr. 15 } => %{Beispielstrasse 15},
57
+ %{12b,Rte. d'Anywhere} => %{25b, Route d'Anywhere},
58
+ %{1, Av. de Blé} => %{1, Avenue de Blé},
59
+ }.each do |original, expected|
60
+ test "Street.normalize_street #{original.inspect}, false" do
61
+ actual = Street.normalize_street(original)
62
+
63
+ assert_equal expected, actual
64
+ end
65
+ end
66
+
67
+ {
68
+ %{Beispielstrasse 15} => ['Beispielstrasse', '15', :end, 'Beispielstrasse 15'],
69
+ %{Beispielstrasse 45/4} => ['Beispielstrasse', '45/4', :end, 'Beispielstrasse 45/4'],
70
+ %{24,Rue Example} => [%{Rue Example}, '24', :begin, %{24, Rue Example}],
71
+ %{70/141 Example} => [%{Example}, '70/141', :begin, %{70/141, Example}],
72
+ %{6A ch. des Cornillons} => [%{Chemin des Cornillons}, '6a', :begin, %{6a, Chemin des Cornillons}],
73
+ %{ Beispielstr. 15 } => [%{Beispielstrasse}, '15', :end, %{Beispielstrasse 15}],
74
+ %{25b,Rte. d'Yverdon} => [%{Route d'Yverdon}, '25b', :begin, %{25b, Route d'Yverdon}],
75
+ %{6, Av. de Budé} => [%{Avenue de Budé}, '6', :begin, %{6, Avenue de Budé}],
76
+ %{Rue Ferdinand Hodler,19} => [%{Rue Ferdinand Hodler}, '19', :end, %{Rue Ferdinand Hodler 19}],
77
+ %{Burgstrasse37} => [%{Burgstrasse}, '37', :end, %{Burgstrasse 37}],
78
+ %{Ave. de Casino 8-10-12} => [%{Avenue de Casino}, '8-10-12', :end, %{Avenue de Casino 8-10-12}],
79
+ %{Ave Bel- Air 49 B} => [%{Avenue Bel-Air}, '49b', :end, %{Avenue Bel-Air 49b}],
80
+ %{39 rue Louis Faure} => [%{Rue Louis Faure}, '39', :begin, %{39, Rue Louis Faure}],
81
+ %{Rte d'Alle 13} => [%{Route d'Alle}, '13', :end, %{Route d'Alle 13}],
82
+ %{Via Filagni, 2/a} => [%{Via Filagni}, '2/a', :end, %{Via Filagni 2/a}],
83
+ %{Riehenring 189/A} => [%{Riehenring}, '189a', :end, %{Riehenring 189a}],
84
+ %{Lorraine 12c/9} => [%{Lorraine}, '12c/9', :end, %{Lorraine 12c/9}],
85
+ %{Lwaldmannstrasse 67 / J2} => [%{Lwaldmannstrasse}, '189a', :end, %{Lwaldmannstrasse 67/j2}],
86
+ %{Kaysersbergerstrasse 56/3.} => [%{Kaysersbergerstrasse}, '56/3', :end, %{Kaysersbergerstrasse 56/3}],
87
+ %{Rue Montfalcon 2bis} => [%{Rue Montfalcon}, '2bis', :end, %{Rue Montfalcon 2bis}],
88
+ %{Rue Montfalcon 2 bis} => [%{Rue Montfalcon}, '2bis', :end, %{Rue Montfalcon 2bis}],
89
+ %{Elsässerstrasse 261-4} => [%{Elsässerstrasse}, '261-4', :end, %{Elsässerstrasse 261-4}],
90
+ }.each do |original, (street_name, street_number, number_position, full)|
91
+ test "Street.parse #{original.inspect}" do
92
+ street = Street.parse(original, true)
93
+
94
+ assert_equal street_name, street.name
95
+ assert_equal street_number, street.number
96
+ assert_equal number_position, street.number_position
97
+ assert_equal original, street.original
98
+ assert_equal full, street.full
99
+ assert_equal original, street.original_or_full
100
+ end
101
+ end
102
+ end
metadata ADDED
@@ -0,0 +1,55 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: swissmatch-street
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Stefan Rusterholz
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-08 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Parse, tokenize, analyze, repair and handle swiss street names and numbers.
15
+ email: stefan.rusterholz@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - data/sty21072012.txt
21
+ - lib/swissmatch/street/version.rb
22
+ - lib/swissmatch/street.rb
23
+ - test/lib/helper.rb
24
+ - test/runner.rb
25
+ - test/unit/lib/swissmatch/street.rb
26
+ - swissmatch-street.gemspec
27
+ - LICENSE.txt
28
+ - Rakefile
29
+ - README.markdown
30
+ homepage: http://github.com/apeiros/swissmatch-street
31
+ licenses: []
32
+ post_install_message:
33
+ rdoc_options: []
34
+ require_paths:
35
+ - lib
36
+ required_ruby_version: !ruby/object:Gem::Requirement
37
+ none: false
38
+ requirements:
39
+ - - ! '>='
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ required_rubygems_version: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>'
46
+ - !ruby/object:Gem::Version
47
+ version: 1.3.1
48
+ requirements: []
49
+ rubyforge_project:
50
+ rubygems_version: 1.8.24
51
+ signing_key:
52
+ specification_version: 3
53
+ summary: Parse, tokenize, analyze, repair and handle swiss street names and numbers.
54
+ test_files: []
55
+ has_rdoc: