swissmatch-street 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.txt +8 -0
- data/README.markdown +50 -0
- data/Rakefile +10 -0
- data/data/sty21072012.txt +18001 -0
- data/lib/swissmatch/street.rb +159 -0
- data/lib/swissmatch/street/version.rb +15 -0
- data/swissmatch-street.gemspec +38 -0
- data/test/lib/helper.rb +31 -0
- data/test/runner.rb +20 -0
- data/test/unit/lib/swissmatch/street.rb +102 -0
- metadata +55 -0
@@ -0,0 +1,159 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
require 'swissmatch/street/version'
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
# From SwissMatch::Street
|
10
|
+
# Parse and handle street names and numbers.
|
11
|
+
#
|
12
|
+
# @note
|
13
|
+
# All strings passed to SwissMatch are expected to be utf-8. All strings
|
14
|
+
# returned by SwissMatch are also in utf-8.
|
15
|
+
#
|
16
|
+
module SwissMatch
|
17
|
+
|
18
|
+
# Street
|
19
|
+
# Parse and handle street names and numbers.
|
20
|
+
class Street
|
21
|
+
# House number formats:
|
22
|
+
# '12'
|
23
|
+
# '12b'
|
24
|
+
# '12bis', '12BIS', '12Bis', '12 bis', … - see development/Noteworthy_things.txt
|
25
|
+
# '12 B'
|
26
|
+
# '12/14'
|
27
|
+
# '105-107'
|
28
|
+
# '16/2/22'
|
29
|
+
# '8-10-12'
|
30
|
+
# '16-2/22'
|
31
|
+
# '16/2-22'
|
32
|
+
HouseNumber = /\d+(?:\x20?\w{1,2}|(?:\/\d+|-\d+\w{1,2})*)/
|
33
|
+
FrontHouseNumber = /\d+(?:\x20?(?!ch|bd|av)\w{1,2}|(?:\/\d+|-\d+\w{1,2})*)/i
|
34
|
+
MatchStreetAndStreetNumberGerman = /\A(.*?)(?:(?: |, ?)(#{HouseNumber}))\z/
|
35
|
+
MatchStreetAndStreetNumberFrench = /\A(?:(#{FrontHouseNumber}), )(.*?)\z/
|
36
|
+
MatchBrokenStreetAndNumberGerman = /\A(.*?)(?:(#{HouseNumber}))\z/
|
37
|
+
MatchBrokenStreetAndNumberFrench = /\A(?:(#{FrontHouseNumber})[.,]| )(.*?)\z/
|
38
|
+
|
39
|
+
# Abbreviations that can be expanded
|
40
|
+
Replacements = {
|
41
|
+
'ch' => 'Chemin',
|
42
|
+
'chem' => 'Chemin',
|
43
|
+
'rte' => 'Route',
|
44
|
+
'bd' => 'Boulevard',
|
45
|
+
'av' => 'Avenue',
|
46
|
+
'ave' => 'Avenue',
|
47
|
+
'str' => 'strasse',
|
48
|
+
'mte' => 'Monte',
|
49
|
+
's' => 'san',
|
50
|
+
}
|
51
|
+
# Detect the abbreviations to expand
|
52
|
+
ReplacementsMatch = /\b(?:ch|chem|rte|bd|ave?|mte)(?:\.|\b)|\bs(?:\.|\b)(?!$)|\Bstr(?:\.|\b)/i
|
53
|
+
|
54
|
+
|
55
|
+
NoCapitalize = {
|
56
|
+
'auf' => 'auf',
|
57
|
+
'uf' => 'uf', # hurray for swiss german street names :D
|
58
|
+
'em' => 'em',
|
59
|
+
'der' => 'der',
|
60
|
+
'die' => 'die',
|
61
|
+
'das' => 'das',
|
62
|
+
'von' => 'von',
|
63
|
+
'nach' => 'nach',
|
64
|
+
'im' => 'im',
|
65
|
+
'in' => 'in',
|
66
|
+
'zum' => 'zum',
|
67
|
+
'zur' => 'zur',
|
68
|
+
'unteren' => 'unteren',
|
69
|
+
'oberen' => 'oberen',
|
70
|
+
|
71
|
+
'd' => 'd',
|
72
|
+
'de' => 'de',
|
73
|
+
'des' => 'des',
|
74
|
+
'du' => 'du',
|
75
|
+
'l' => 'l',
|
76
|
+
'le' => 'le',
|
77
|
+
'la' => 'la',
|
78
|
+
'les' => 'les',
|
79
|
+
'vers' => 'vers',
|
80
|
+
|
81
|
+
'il' => 'il',
|
82
|
+
'dei' => 'dei',
|
83
|
+
'di' => 'di',
|
84
|
+
'delle' => 'delle',
|
85
|
+
'della' => 'della',
|
86
|
+
'al' => 'al',
|
87
|
+
'alla' => 'alla',
|
88
|
+
'alle' => 'alle',
|
89
|
+
'ai' => 'ai',
|
90
|
+
}
|
91
|
+
|
92
|
+
def self.normalize_street(street)
|
93
|
+
return '' unless street
|
94
|
+
|
95
|
+
street.strip.
|
96
|
+
squeeze(' ').
|
97
|
+
gsub(/\s*-\s*/, '-').
|
98
|
+
gsub(/\A(#{FrontHouseNumber}) /, '\1, '). # '24 Rue Baulacre' => '24, Rue Baulacre' - but not '24 bd blabla' -> '24 Boulevard, Blabla'
|
99
|
+
gsub(/\s*([.,])(?=\S)/, '\1 '). # '283,Rte.de Meyrin' => '283, Rte. de Meyrin; '283 ,Foo' => '283, Foo'
|
100
|
+
gsub(ReplacementsMatch) { |m| # ch., chem., str. etc. => chemin, strasse etc.
|
101
|
+
Replacements[m.downcase.chomp('.')]
|
102
|
+
}.
|
103
|
+
gsub(/\s*n°\s*/, ' ')
|
104
|
+
end
|
105
|
+
|
106
|
+
def self.normalize_name(name)
|
107
|
+
name.
|
108
|
+
gsub(/\b[\p{Letter}\p{Mark}\p{Connector_Punctuation}]{2,}\b/) { |word| NoCapitalize.fetch(word.downcase) { word.capitalize } }.
|
109
|
+
sub(/\b[\p{Letter}\p{Mark}\p{Connector_Punctuation}]{2,}\b/) { |word| word.capitalize } # [\p{Letter}\p{Mark}\p{Connector_Punctuation}] is \p{Word} without digits
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.normalize_number(number)
|
113
|
+
return unless number
|
114
|
+
normalized = number.downcase.delete('^0-9a-z/()-')
|
115
|
+
|
116
|
+
normalized.empty? ? nil : normalized
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.parse(street, normalize=false)
|
120
|
+
normalized = normalize_street(street)
|
121
|
+
name, number, pos = case normalized
|
122
|
+
when MatchStreetAndStreetNumberGerman then [$1, $2, :end]
|
123
|
+
when MatchStreetAndStreetNumberFrench then [$2, $1, :begin]
|
124
|
+
when MatchBrokenStreetAndNumberGerman then [$1, $2, :end]
|
125
|
+
when MatchBrokenStreetAndNumberFrench then [$2, $1, :begin]
|
126
|
+
else [normalized, nil, nil]
|
127
|
+
end
|
128
|
+
name = normalize_name(name) if normalize
|
129
|
+
number = normalize_number(number) if normalize
|
130
|
+
|
131
|
+
new(name, number, pos, street)
|
132
|
+
end
|
133
|
+
|
134
|
+
attr_reader :original, :name, :number, :number_position, :full
|
135
|
+
|
136
|
+
def initialize(name, number=nil, number_position=:end, original=nil)
|
137
|
+
@name = name
|
138
|
+
@number = number
|
139
|
+
@number_position = number_position
|
140
|
+
@original = original
|
141
|
+
@full = case number_position
|
142
|
+
when :end then [name, number].compact.join(" ")
|
143
|
+
when :begin then [number, name].compact.join(", ")
|
144
|
+
when nil then name.dup
|
145
|
+
else raise ArgumentError, "Invalid value for number_position: #{number_position.inspect}"
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def original_or_full
|
150
|
+
@original || @full
|
151
|
+
end
|
152
|
+
|
153
|
+
alias to_s full
|
154
|
+
|
155
|
+
def inspect
|
156
|
+
"#<Street #{self}>"
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'rubygems/version' # newer rubygems use this
|
5
|
+
rescue LoadError
|
6
|
+
require 'gem/version' # older rubygems use this
|
7
|
+
end
|
8
|
+
|
9
|
+
module SwissMatch
|
10
|
+
class Street
|
11
|
+
|
12
|
+
# The version of the swissmatch-street gem.
|
13
|
+
Version = Gem::Version.new("0.0.1")
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = "swissmatch-street"
|
5
|
+
s.version = "0.0.1"
|
6
|
+
s.authors = "Stefan Rusterholz"
|
7
|
+
s.email = "stefan.rusterholz@gmail.com"
|
8
|
+
s.homepage = "http://github.com/apeiros/swissmatch-street"
|
9
|
+
|
10
|
+
s.description = <<-DESCRIPTION.gsub(/^ /, '').chomp
|
11
|
+
Parse, tokenize, analyze, repair and handle swiss street names and numbers.
|
12
|
+
DESCRIPTION
|
13
|
+
s.summary = <<-SUMMARY.gsub(/^ /, '').chomp
|
14
|
+
Parse, tokenize, analyze, repair and handle swiss street names and numbers.
|
15
|
+
SUMMARY
|
16
|
+
|
17
|
+
s.files =
|
18
|
+
Dir['bin/**/*'] +
|
19
|
+
Dir['data/**/*'] +
|
20
|
+
Dir['lib/**/*'] +
|
21
|
+
Dir['rake/**/*'] +
|
22
|
+
Dir['test/**/*'] +
|
23
|
+
Dir['*.gemspec'] +
|
24
|
+
%w[
|
25
|
+
LICENSE.txt
|
26
|
+
Rakefile
|
27
|
+
README.markdown
|
28
|
+
]
|
29
|
+
|
30
|
+
if File.directory?('bin') then
|
31
|
+
executables = Dir.chdir('bin') { Dir.glob('**/*').select { |f| File.executable?(f) } }
|
32
|
+
s.executables = executables unless executables.empty?
|
33
|
+
end
|
34
|
+
|
35
|
+
s.required_rubygems_version = Gem::Requirement.new("> 1.3.1")
|
36
|
+
s.rubygems_version = "1.3.1"
|
37
|
+
s.specification_version = 3
|
38
|
+
end
|
data/test/lib/helper.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
module TestSuite
|
4
|
+
attr_accessor :name
|
5
|
+
end
|
6
|
+
|
7
|
+
module Kernel
|
8
|
+
def suite(name, &block)
|
9
|
+
klass = Class.new(Test::Unit::TestCase, &block)
|
10
|
+
klass.extend TestSuite
|
11
|
+
klass.name = "Suite #{name}"
|
12
|
+
|
13
|
+
klass
|
14
|
+
end
|
15
|
+
module_function :suite
|
16
|
+
end
|
17
|
+
|
18
|
+
class Test::Unit::TestCase
|
19
|
+
def self.test(desc, &impl)
|
20
|
+
define_method("test #{desc}", &impl)
|
21
|
+
end
|
22
|
+
|
23
|
+
def capture_stdout
|
24
|
+
captured = StringIO.new
|
25
|
+
$stdout = captured
|
26
|
+
yield
|
27
|
+
captured.string
|
28
|
+
ensure
|
29
|
+
$stdout = STDOUT
|
30
|
+
end
|
31
|
+
end
|
data/test/runner.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# run with `ruby test/runner.rb`
|
2
|
+
# if you only want to run a single test-file: `ruby test/runner.rb testfile.rb`
|
3
|
+
|
4
|
+
$LOAD_PATH << File.expand_path('../../lib', __FILE__)
|
5
|
+
$LOAD_PATH << File.expand_path('../../test/lib', __FILE__)
|
6
|
+
TEST_DIR = File.expand_path('../../test', __FILE__)
|
7
|
+
|
8
|
+
require 'test/unit'
|
9
|
+
require 'helper'
|
10
|
+
|
11
|
+
if ENV['COVERAGE']
|
12
|
+
require 'simplecov'
|
13
|
+
SimpleCov.start
|
14
|
+
end
|
15
|
+
|
16
|
+
units = ARGV.empty? ? Dir["#{TEST_DIR}/unit/**/*.rb"] : ARGV
|
17
|
+
|
18
|
+
units.each do |unit|
|
19
|
+
load unit
|
20
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'swissmatch/street'
|
4
|
+
include SwissMatch
|
5
|
+
|
6
|
+
suite "Street" do
|
7
|
+
test "Street.new with original" do
|
8
|
+
original = ' Beispielstrasse 15 '
|
9
|
+
street_name = 'Beispielstrasse'
|
10
|
+
street_number = '15'
|
11
|
+
number_position = :end
|
12
|
+
|
13
|
+
street = Street.new(street_name, street_number, number_position, original)
|
14
|
+
|
15
|
+
assert_equal street_name, street.name
|
16
|
+
assert_equal street_number, street.number
|
17
|
+
assert_equal number_position, street.number_position
|
18
|
+
assert_equal original, street.original
|
19
|
+
assert_equal 'Beispielstrasse 15', street.full
|
20
|
+
assert_equal original, street.original_or_full
|
21
|
+
end
|
22
|
+
|
23
|
+
test "Street.new without original" do
|
24
|
+
street_name = 'Beispielstrasse'
|
25
|
+
street_number = '15'
|
26
|
+
number_position = :end
|
27
|
+
|
28
|
+
street = Street.new(street_name, street_number, number_position)
|
29
|
+
|
30
|
+
assert_equal street_name, street.name
|
31
|
+
assert_equal street_number, street.number
|
32
|
+
assert_equal number_position, street.number_position
|
33
|
+
assert_equal nil, street.original
|
34
|
+
assert_equal 'Beispielstrasse 15', street.full
|
35
|
+
assert_equal 'Beispielstrasse 15', street.original_or_full
|
36
|
+
end
|
37
|
+
|
38
|
+
[
|
39
|
+
'12',
|
40
|
+
'12b',
|
41
|
+
'12 B',
|
42
|
+
'12/14',
|
43
|
+
'16/2/22',
|
44
|
+
'105-107',
|
45
|
+
'12/A',
|
46
|
+
].each do |number|
|
47
|
+
test "House number #{number.inspect}" do
|
48
|
+
assert Street::HouseNumber =~ number
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
{
|
53
|
+
%{12,Rue Quelquechose} => %{24, Rue Quelquechose},
|
54
|
+
%{12/345 Foo} => %{12/345, Foo},
|
55
|
+
%{1A ch. des Choses} => %{1A, Chemin des Choses},
|
56
|
+
%{ Beispielstr. 15 } => %{Beispielstrasse 15},
|
57
|
+
%{12b,Rte. d'Anywhere} => %{25b, Route d'Anywhere},
|
58
|
+
%{1, Av. de Blé} => %{1, Avenue de Blé},
|
59
|
+
}.each do |original, expected|
|
60
|
+
test "Street.normalize_street #{original.inspect}, false" do
|
61
|
+
actual = Street.normalize_street(original)
|
62
|
+
|
63
|
+
assert_equal expected, actual
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
{
|
68
|
+
%{Beispielstrasse 15} => ['Beispielstrasse', '15', :end, 'Beispielstrasse 15'],
|
69
|
+
%{Beispielstrasse 45/4} => ['Beispielstrasse', '45/4', :end, 'Beispielstrasse 45/4'],
|
70
|
+
%{24,Rue Example} => [%{Rue Example}, '24', :begin, %{24, Rue Example}],
|
71
|
+
%{70/141 Example} => [%{Example}, '70/141', :begin, %{70/141, Example}],
|
72
|
+
%{6A ch. des Cornillons} => [%{Chemin des Cornillons}, '6a', :begin, %{6a, Chemin des Cornillons}],
|
73
|
+
%{ Beispielstr. 15 } => [%{Beispielstrasse}, '15', :end, %{Beispielstrasse 15}],
|
74
|
+
%{25b,Rte. d'Yverdon} => [%{Route d'Yverdon}, '25b', :begin, %{25b, Route d'Yverdon}],
|
75
|
+
%{6, Av. de Budé} => [%{Avenue de Budé}, '6', :begin, %{6, Avenue de Budé}],
|
76
|
+
%{Rue Ferdinand Hodler,19} => [%{Rue Ferdinand Hodler}, '19', :end, %{Rue Ferdinand Hodler 19}],
|
77
|
+
%{Burgstrasse37} => [%{Burgstrasse}, '37', :end, %{Burgstrasse 37}],
|
78
|
+
%{Ave. de Casino 8-10-12} => [%{Avenue de Casino}, '8-10-12', :end, %{Avenue de Casino 8-10-12}],
|
79
|
+
%{Ave Bel- Air 49 B} => [%{Avenue Bel-Air}, '49b', :end, %{Avenue Bel-Air 49b}],
|
80
|
+
%{39 rue Louis Faure} => [%{Rue Louis Faure}, '39', :begin, %{39, Rue Louis Faure}],
|
81
|
+
%{Rte d'Alle 13} => [%{Route d'Alle}, '13', :end, %{Route d'Alle 13}],
|
82
|
+
%{Via Filagni, 2/a} => [%{Via Filagni}, '2/a', :end, %{Via Filagni 2/a}],
|
83
|
+
%{Riehenring 189/A} => [%{Riehenring}, '189a', :end, %{Riehenring 189a}],
|
84
|
+
%{Lorraine 12c/9} => [%{Lorraine}, '12c/9', :end, %{Lorraine 12c/9}],
|
85
|
+
%{Lwaldmannstrasse 67 / J2} => [%{Lwaldmannstrasse}, '189a', :end, %{Lwaldmannstrasse 67/j2}],
|
86
|
+
%{Kaysersbergerstrasse 56/3.} => [%{Kaysersbergerstrasse}, '56/3', :end, %{Kaysersbergerstrasse 56/3}],
|
87
|
+
%{Rue Montfalcon 2bis} => [%{Rue Montfalcon}, '2bis', :end, %{Rue Montfalcon 2bis}],
|
88
|
+
%{Rue Montfalcon 2 bis} => [%{Rue Montfalcon}, '2bis', :end, %{Rue Montfalcon 2bis}],
|
89
|
+
%{Elsässerstrasse 261-4} => [%{Elsässerstrasse}, '261-4', :end, %{Elsässerstrasse 261-4}],
|
90
|
+
}.each do |original, (street_name, street_number, number_position, full)|
|
91
|
+
test "Street.parse #{original.inspect}" do
|
92
|
+
street = Street.parse(original, true)
|
93
|
+
|
94
|
+
assert_equal street_name, street.name
|
95
|
+
assert_equal street_number, street.number
|
96
|
+
assert_equal number_position, street.number_position
|
97
|
+
assert_equal original, street.original
|
98
|
+
assert_equal full, street.full
|
99
|
+
assert_equal original, street.original_or_full
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
metadata
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: swissmatch-street
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Stefan Rusterholz
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-08-08 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: Parse, tokenize, analyze, repair and handle swiss street names and numbers.
|
15
|
+
email: stefan.rusterholz@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- data/sty21072012.txt
|
21
|
+
- lib/swissmatch/street/version.rb
|
22
|
+
- lib/swissmatch/street.rb
|
23
|
+
- test/lib/helper.rb
|
24
|
+
- test/runner.rb
|
25
|
+
- test/unit/lib/swissmatch/street.rb
|
26
|
+
- swissmatch-street.gemspec
|
27
|
+
- LICENSE.txt
|
28
|
+
- Rakefile
|
29
|
+
- README.markdown
|
30
|
+
homepage: http://github.com/apeiros/swissmatch-street
|
31
|
+
licenses: []
|
32
|
+
post_install_message:
|
33
|
+
rdoc_options: []
|
34
|
+
require_paths:
|
35
|
+
- lib
|
36
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ! '>='
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ! '>'
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.3.1
|
48
|
+
requirements: []
|
49
|
+
rubyforge_project:
|
50
|
+
rubygems_version: 1.8.24
|
51
|
+
signing_key:
|
52
|
+
specification_version: 3
|
53
|
+
summary: Parse, tokenize, analyze, repair and handle swiss street names and numbers.
|
54
|
+
test_files: []
|
55
|
+
has_rdoc:
|