swissmatch-street 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE.txt +8 -0
- data/README.markdown +50 -0
- data/Rakefile +10 -0
- data/data/sty21072012.txt +18001 -0
- data/lib/swissmatch/street.rb +159 -0
- data/lib/swissmatch/street/version.rb +15 -0
- data/swissmatch-street.gemspec +38 -0
- data/test/lib/helper.rb +31 -0
- data/test/runner.rb +20 -0
- data/test/unit/lib/swissmatch/street.rb +102 -0
- metadata +55 -0
@@ -0,0 +1,159 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
require 'swissmatch/street/version'
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
# From SwissMatch::Street
|
10
|
+
# Parse and handle street names and numbers.
|
11
|
+
#
|
12
|
+
# @note
|
13
|
+
# All strings passed to SwissMatch are expected to be utf-8. All strings
|
14
|
+
# returned by SwissMatch are also in utf-8.
|
15
|
+
#
|
16
|
+
module SwissMatch
|
17
|
+
|
18
|
+
# Street
|
19
|
+
# Parse and handle street names and numbers.
|
20
|
+
class Street
|
21
|
+
# House number formats:
|
22
|
+
# '12'
|
23
|
+
# '12b'
|
24
|
+
# '12bis', '12BIS', '12Bis', '12 bis', … - see development/Noteworthy_things.txt
|
25
|
+
# '12 B'
|
26
|
+
# '12/14'
|
27
|
+
# '105-107'
|
28
|
+
# '16/2/22'
|
29
|
+
# '8-10-12'
|
30
|
+
# '16-2/22'
|
31
|
+
# '16/2-22'
|
32
|
+
HouseNumber = /\d+(?:\x20?\w{1,2}|(?:\/\d+|-\d+\w{1,2})*)/
|
33
|
+
FrontHouseNumber = /\d+(?:\x20?(?!ch|bd|av)\w{1,2}|(?:\/\d+|-\d+\w{1,2})*)/i
|
34
|
+
MatchStreetAndStreetNumberGerman = /\A(.*?)(?:(?: |, ?)(#{HouseNumber}))\z/
|
35
|
+
MatchStreetAndStreetNumberFrench = /\A(?:(#{FrontHouseNumber}), )(.*?)\z/
|
36
|
+
MatchBrokenStreetAndNumberGerman = /\A(.*?)(?:(#{HouseNumber}))\z/
|
37
|
+
MatchBrokenStreetAndNumberFrench = /\A(?:(#{FrontHouseNumber})[.,]| )(.*?)\z/
|
38
|
+
|
39
|
+
# Abbreviations that can be expanded
|
40
|
+
Replacements = {
|
41
|
+
'ch' => 'Chemin',
|
42
|
+
'chem' => 'Chemin',
|
43
|
+
'rte' => 'Route',
|
44
|
+
'bd' => 'Boulevard',
|
45
|
+
'av' => 'Avenue',
|
46
|
+
'ave' => 'Avenue',
|
47
|
+
'str' => 'strasse',
|
48
|
+
'mte' => 'Monte',
|
49
|
+
's' => 'san',
|
50
|
+
}
|
51
|
+
# Detect the abbreviations to expand
|
52
|
+
ReplacementsMatch = /\b(?:ch|chem|rte|bd|ave?|mte)(?:\.|\b)|\bs(?:\.|\b)(?!$)|\Bstr(?:\.|\b)/i
|
53
|
+
|
54
|
+
|
55
|
+
NoCapitalize = {
|
56
|
+
'auf' => 'auf',
|
57
|
+
'uf' => 'uf', # hurray for swiss german street names :D
|
58
|
+
'em' => 'em',
|
59
|
+
'der' => 'der',
|
60
|
+
'die' => 'die',
|
61
|
+
'das' => 'das',
|
62
|
+
'von' => 'von',
|
63
|
+
'nach' => 'nach',
|
64
|
+
'im' => 'im',
|
65
|
+
'in' => 'in',
|
66
|
+
'zum' => 'zum',
|
67
|
+
'zur' => 'zur',
|
68
|
+
'unteren' => 'unteren',
|
69
|
+
'oberen' => 'oberen',
|
70
|
+
|
71
|
+
'd' => 'd',
|
72
|
+
'de' => 'de',
|
73
|
+
'des' => 'des',
|
74
|
+
'du' => 'du',
|
75
|
+
'l' => 'l',
|
76
|
+
'le' => 'le',
|
77
|
+
'la' => 'la',
|
78
|
+
'les' => 'les',
|
79
|
+
'vers' => 'vers',
|
80
|
+
|
81
|
+
'il' => 'il',
|
82
|
+
'dei' => 'dei',
|
83
|
+
'di' => 'di',
|
84
|
+
'delle' => 'delle',
|
85
|
+
'della' => 'della',
|
86
|
+
'al' => 'al',
|
87
|
+
'alla' => 'alla',
|
88
|
+
'alle' => 'alle',
|
89
|
+
'ai' => 'ai',
|
90
|
+
}
|
91
|
+
|
92
|
+
def self.normalize_street(street)
|
93
|
+
return '' unless street
|
94
|
+
|
95
|
+
street.strip.
|
96
|
+
squeeze(' ').
|
97
|
+
gsub(/\s*-\s*/, '-').
|
98
|
+
gsub(/\A(#{FrontHouseNumber}) /, '\1, '). # '24 Rue Baulacre' => '24, Rue Baulacre' - but not '24 bd blabla' -> '24 Boulevard, Blabla'
|
99
|
+
gsub(/\s*([.,])(?=\S)/, '\1 '). # '283,Rte.de Meyrin' => '283, Rte. de Meyrin; '283 ,Foo' => '283, Foo'
|
100
|
+
gsub(ReplacementsMatch) { |m| # ch., chem., str. etc. => chemin, strasse etc.
|
101
|
+
Replacements[m.downcase.chomp('.')]
|
102
|
+
}.
|
103
|
+
gsub(/\s*n°\s*/, ' ')
|
104
|
+
end
|
105
|
+
|
106
|
+
def self.normalize_name(name)
|
107
|
+
name.
|
108
|
+
gsub(/\b[\p{Letter}\p{Mark}\p{Connector_Punctuation}]{2,}\b/) { |word| NoCapitalize.fetch(word.downcase) { word.capitalize } }.
|
109
|
+
sub(/\b[\p{Letter}\p{Mark}\p{Connector_Punctuation}]{2,}\b/) { |word| word.capitalize } # [\p{Letter}\p{Mark}\p{Connector_Punctuation}] is \p{Word} without digits
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.normalize_number(number)
|
113
|
+
return unless number
|
114
|
+
normalized = number.downcase.delete('^0-9a-z/()-')
|
115
|
+
|
116
|
+
normalized.empty? ? nil : normalized
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.parse(street, normalize=false)
|
120
|
+
normalized = normalize_street(street)
|
121
|
+
name, number, pos = case normalized
|
122
|
+
when MatchStreetAndStreetNumberGerman then [$1, $2, :end]
|
123
|
+
when MatchStreetAndStreetNumberFrench then [$2, $1, :begin]
|
124
|
+
when MatchBrokenStreetAndNumberGerman then [$1, $2, :end]
|
125
|
+
when MatchBrokenStreetAndNumberFrench then [$2, $1, :begin]
|
126
|
+
else [normalized, nil, nil]
|
127
|
+
end
|
128
|
+
name = normalize_name(name) if normalize
|
129
|
+
number = normalize_number(number) if normalize
|
130
|
+
|
131
|
+
new(name, number, pos, street)
|
132
|
+
end
|
133
|
+
|
134
|
+
attr_reader :original, :name, :number, :number_position, :full
|
135
|
+
|
136
|
+
def initialize(name, number=nil, number_position=:end, original=nil)
|
137
|
+
@name = name
|
138
|
+
@number = number
|
139
|
+
@number_position = number_position
|
140
|
+
@original = original
|
141
|
+
@full = case number_position
|
142
|
+
when :end then [name, number].compact.join(" ")
|
143
|
+
when :begin then [number, name].compact.join(", ")
|
144
|
+
when nil then name.dup
|
145
|
+
else raise ArgumentError, "Invalid value for number_position: #{number_position.inspect}"
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def original_or_full
|
150
|
+
@original || @full
|
151
|
+
end
|
152
|
+
|
153
|
+
alias to_s full
|
154
|
+
|
155
|
+
def inspect
|
156
|
+
"#<Street #{self}>"
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'rubygems/version' # newer rubygems use this
|
5
|
+
rescue LoadError
|
6
|
+
require 'gem/version' # older rubygems use this
|
7
|
+
end
|
8
|
+
|
9
|
+
module SwissMatch
|
10
|
+
class Street
|
11
|
+
|
12
|
+
# The version of the swissmatch-street gem.
|
13
|
+
Version = Gem::Version.new("0.0.1")
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = "swissmatch-street"
|
5
|
+
s.version = "0.0.1"
|
6
|
+
s.authors = "Stefan Rusterholz"
|
7
|
+
s.email = "stefan.rusterholz@gmail.com"
|
8
|
+
s.homepage = "http://github.com/apeiros/swissmatch-street"
|
9
|
+
|
10
|
+
s.description = <<-DESCRIPTION.gsub(/^ /, '').chomp
|
11
|
+
Parse, tokenize, analyze, repair and handle swiss street names and numbers.
|
12
|
+
DESCRIPTION
|
13
|
+
s.summary = <<-SUMMARY.gsub(/^ /, '').chomp
|
14
|
+
Parse, tokenize, analyze, repair and handle swiss street names and numbers.
|
15
|
+
SUMMARY
|
16
|
+
|
17
|
+
s.files =
|
18
|
+
Dir['bin/**/*'] +
|
19
|
+
Dir['data/**/*'] +
|
20
|
+
Dir['lib/**/*'] +
|
21
|
+
Dir['rake/**/*'] +
|
22
|
+
Dir['test/**/*'] +
|
23
|
+
Dir['*.gemspec'] +
|
24
|
+
%w[
|
25
|
+
LICENSE.txt
|
26
|
+
Rakefile
|
27
|
+
README.markdown
|
28
|
+
]
|
29
|
+
|
30
|
+
if File.directory?('bin') then
|
31
|
+
executables = Dir.chdir('bin') { Dir.glob('**/*').select { |f| File.executable?(f) } }
|
32
|
+
s.executables = executables unless executables.empty?
|
33
|
+
end
|
34
|
+
|
35
|
+
s.required_rubygems_version = Gem::Requirement.new("> 1.3.1")
|
36
|
+
s.rubygems_version = "1.3.1"
|
37
|
+
s.specification_version = 3
|
38
|
+
end
|
data/test/lib/helper.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
module TestSuite
|
4
|
+
attr_accessor :name
|
5
|
+
end
|
6
|
+
|
7
|
+
module Kernel
|
8
|
+
def suite(name, &block)
|
9
|
+
klass = Class.new(Test::Unit::TestCase, &block)
|
10
|
+
klass.extend TestSuite
|
11
|
+
klass.name = "Suite #{name}"
|
12
|
+
|
13
|
+
klass
|
14
|
+
end
|
15
|
+
module_function :suite
|
16
|
+
end
|
17
|
+
|
18
|
+
class Test::Unit::TestCase
|
19
|
+
def self.test(desc, &impl)
|
20
|
+
define_method("test #{desc}", &impl)
|
21
|
+
end
|
22
|
+
|
23
|
+
def capture_stdout
|
24
|
+
captured = StringIO.new
|
25
|
+
$stdout = captured
|
26
|
+
yield
|
27
|
+
captured.string
|
28
|
+
ensure
|
29
|
+
$stdout = STDOUT
|
30
|
+
end
|
31
|
+
end
|
data/test/runner.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# run with `ruby test/runner.rb`
|
2
|
+
# if you only want to run a single test-file: `ruby test/runner.rb testfile.rb`
|
3
|
+
|
4
|
+
$LOAD_PATH << File.expand_path('../../lib', __FILE__)
|
5
|
+
$LOAD_PATH << File.expand_path('../../test/lib', __FILE__)
|
6
|
+
TEST_DIR = File.expand_path('../../test', __FILE__)
|
7
|
+
|
8
|
+
require 'test/unit'
|
9
|
+
require 'helper'
|
10
|
+
|
11
|
+
if ENV['COVERAGE']
|
12
|
+
require 'simplecov'
|
13
|
+
SimpleCov.start
|
14
|
+
end
|
15
|
+
|
16
|
+
units = ARGV.empty? ? Dir["#{TEST_DIR}/unit/**/*.rb"] : ARGV
|
17
|
+
|
18
|
+
units.each do |unit|
|
19
|
+
load unit
|
20
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'swissmatch/street'
|
4
|
+
include SwissMatch
|
5
|
+
|
6
|
+
suite "Street" do
|
7
|
+
test "Street.new with original" do
|
8
|
+
original = ' Beispielstrasse 15 '
|
9
|
+
street_name = 'Beispielstrasse'
|
10
|
+
street_number = '15'
|
11
|
+
number_position = :end
|
12
|
+
|
13
|
+
street = Street.new(street_name, street_number, number_position, original)
|
14
|
+
|
15
|
+
assert_equal street_name, street.name
|
16
|
+
assert_equal street_number, street.number
|
17
|
+
assert_equal number_position, street.number_position
|
18
|
+
assert_equal original, street.original
|
19
|
+
assert_equal 'Beispielstrasse 15', street.full
|
20
|
+
assert_equal original, street.original_or_full
|
21
|
+
end
|
22
|
+
|
23
|
+
test "Street.new without original" do
|
24
|
+
street_name = 'Beispielstrasse'
|
25
|
+
street_number = '15'
|
26
|
+
number_position = :end
|
27
|
+
|
28
|
+
street = Street.new(street_name, street_number, number_position)
|
29
|
+
|
30
|
+
assert_equal street_name, street.name
|
31
|
+
assert_equal street_number, street.number
|
32
|
+
assert_equal number_position, street.number_position
|
33
|
+
assert_equal nil, street.original
|
34
|
+
assert_equal 'Beispielstrasse 15', street.full
|
35
|
+
assert_equal 'Beispielstrasse 15', street.original_or_full
|
36
|
+
end
|
37
|
+
|
38
|
+
[
|
39
|
+
'12',
|
40
|
+
'12b',
|
41
|
+
'12 B',
|
42
|
+
'12/14',
|
43
|
+
'16/2/22',
|
44
|
+
'105-107',
|
45
|
+
'12/A',
|
46
|
+
].each do |number|
|
47
|
+
test "House number #{number.inspect}" do
|
48
|
+
assert Street::HouseNumber =~ number
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
{
|
53
|
+
%{12,Rue Quelquechose} => %{24, Rue Quelquechose},
|
54
|
+
%{12/345 Foo} => %{12/345, Foo},
|
55
|
+
%{1A ch. des Choses} => %{1A, Chemin des Choses},
|
56
|
+
%{ Beispielstr. 15 } => %{Beispielstrasse 15},
|
57
|
+
%{12b,Rte. d'Anywhere} => %{25b, Route d'Anywhere},
|
58
|
+
%{1, Av. de Blé} => %{1, Avenue de Blé},
|
59
|
+
}.each do |original, expected|
|
60
|
+
test "Street.normalize_street #{original.inspect}, false" do
|
61
|
+
actual = Street.normalize_street(original)
|
62
|
+
|
63
|
+
assert_equal expected, actual
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
{
|
68
|
+
%{Beispielstrasse 15} => ['Beispielstrasse', '15', :end, 'Beispielstrasse 15'],
|
69
|
+
%{Beispielstrasse 45/4} => ['Beispielstrasse', '45/4', :end, 'Beispielstrasse 45/4'],
|
70
|
+
%{24,Rue Example} => [%{Rue Example}, '24', :begin, %{24, Rue Example}],
|
71
|
+
%{70/141 Example} => [%{Example}, '70/141', :begin, %{70/141, Example}],
|
72
|
+
%{6A ch. des Cornillons} => [%{Chemin des Cornillons}, '6a', :begin, %{6a, Chemin des Cornillons}],
|
73
|
+
%{ Beispielstr. 15 } => [%{Beispielstrasse}, '15', :end, %{Beispielstrasse 15}],
|
74
|
+
%{25b,Rte. d'Yverdon} => [%{Route d'Yverdon}, '25b', :begin, %{25b, Route d'Yverdon}],
|
75
|
+
%{6, Av. de Budé} => [%{Avenue de Budé}, '6', :begin, %{6, Avenue de Budé}],
|
76
|
+
%{Rue Ferdinand Hodler,19} => [%{Rue Ferdinand Hodler}, '19', :end, %{Rue Ferdinand Hodler 19}],
|
77
|
+
%{Burgstrasse37} => [%{Burgstrasse}, '37', :end, %{Burgstrasse 37}],
|
78
|
+
%{Ave. de Casino 8-10-12} => [%{Avenue de Casino}, '8-10-12', :end, %{Avenue de Casino 8-10-12}],
|
79
|
+
%{Ave Bel- Air 49 B} => [%{Avenue Bel-Air}, '49b', :end, %{Avenue Bel-Air 49b}],
|
80
|
+
%{39 rue Louis Faure} => [%{Rue Louis Faure}, '39', :begin, %{39, Rue Louis Faure}],
|
81
|
+
%{Rte d'Alle 13} => [%{Route d'Alle}, '13', :end, %{Route d'Alle 13}],
|
82
|
+
%{Via Filagni, 2/a} => [%{Via Filagni}, '2/a', :end, %{Via Filagni 2/a}],
|
83
|
+
%{Riehenring 189/A} => [%{Riehenring}, '189a', :end, %{Riehenring 189a}],
|
84
|
+
%{Lorraine 12c/9} => [%{Lorraine}, '12c/9', :end, %{Lorraine 12c/9}],
|
85
|
+
%{Lwaldmannstrasse 67 / J2} => [%{Lwaldmannstrasse}, '189a', :end, %{Lwaldmannstrasse 67/j2}],
|
86
|
+
%{Kaysersbergerstrasse 56/3.} => [%{Kaysersbergerstrasse}, '56/3', :end, %{Kaysersbergerstrasse 56/3}],
|
87
|
+
%{Rue Montfalcon 2bis} => [%{Rue Montfalcon}, '2bis', :end, %{Rue Montfalcon 2bis}],
|
88
|
+
%{Rue Montfalcon 2 bis} => [%{Rue Montfalcon}, '2bis', :end, %{Rue Montfalcon 2bis}],
|
89
|
+
%{Elsässerstrasse 261-4} => [%{Elsässerstrasse}, '261-4', :end, %{Elsässerstrasse 261-4}],
|
90
|
+
}.each do |original, (street_name, street_number, number_position, full)|
|
91
|
+
test "Street.parse #{original.inspect}" do
|
92
|
+
street = Street.parse(original, true)
|
93
|
+
|
94
|
+
assert_equal street_name, street.name
|
95
|
+
assert_equal street_number, street.number
|
96
|
+
assert_equal number_position, street.number_position
|
97
|
+
assert_equal original, street.original
|
98
|
+
assert_equal full, street.full
|
99
|
+
assert_equal original, street.original_or_full
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
metadata
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: swissmatch-street
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Stefan Rusterholz
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-08-08 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Parse, tokenize, analyze, repair and handle swiss street names and numbers.
|
15
|
+
email: stefan.rusterholz@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- data/sty21072012.txt
|
21
|
+
- lib/swissmatch/street/version.rb
|
22
|
+
- lib/swissmatch/street.rb
|
23
|
+
- test/lib/helper.rb
|
24
|
+
- test/runner.rb
|
25
|
+
- test/unit/lib/swissmatch/street.rb
|
26
|
+
- swissmatch-street.gemspec
|
27
|
+
- LICENSE.txt
|
28
|
+
- Rakefile
|
29
|
+
- README.markdown
|
30
|
+
homepage: http://github.com/apeiros/swissmatch-street
|
31
|
+
licenses: []
|
32
|
+
post_install_message:
|
33
|
+
rdoc_options: []
|
34
|
+
require_paths:
|
35
|
+
- lib
|
36
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ! '>='
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ! '>'
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.3.1
|
48
|
+
requirements: []
|
49
|
+
rubyforge_project:
|
50
|
+
rubygems_version: 1.8.24
|
51
|
+
signing_key:
|
52
|
+
specification_version: 3
|
53
|
+
summary: Parse, tokenize, analyze, repair and handle swiss street names and numbers.
|
54
|
+
test_files: []
|
55
|
+
has_rdoc:
|