geo_coder 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (119) hide show
  1. data/Gemfile +12 -0
  2. data/Gemfile.lock +32 -0
  3. data/History.txt +6 -0
  4. data/Makefile +13 -0
  5. data/Manifest.txt +18 -0
  6. data/README.rdoc +197 -0
  7. data/Rakefile +53 -0
  8. data/TODO.txt +8 -0
  9. data/VERSION +1 -0
  10. data/bin/build_indexes +8 -0
  11. data/bin/rebuild_cluster +22 -0
  12. data/bin/rebuild_metaphones +23 -0
  13. data/bin/tiger_import +59 -0
  14. data/demos/demo/app/ext/geocodewrap.rb +84 -0
  15. data/demos/demo/app/views/index.builder +13 -0
  16. data/demos/demo/app/views/index.erb +71 -0
  17. data/demos/demo/config.ru +12 -0
  18. data/demos/demo/config/bootstraps.rb +130 -0
  19. data/demos/demo/config/geoenvironment.rb +25 -0
  20. data/demos/demo/geocoder_helper.rb +12 -0
  21. data/demos/demo/geocom_geocode.rb +10 -0
  22. data/demos/demo/main.rb +3 -0
  23. data/demos/demo/rakefile.rb +17 -0
  24. data/demos/demo/tmp/restart.txt +0 -0
  25. data/demos/simpledemo/views/index.builder +13 -0
  26. data/demos/simpledemo/views/index.erb +69 -0
  27. data/demos/simpledemo/ws.rb +83 -0
  28. data/doc/Makefile +7 -0
  29. data/doc/html4css1.css +279 -0
  30. data/doc/lookup.rst +193 -0
  31. data/doc/parsing.rst +125 -0
  32. data/doc/voidspace.css +147 -0
  33. data/geo_coder.gemspec +172 -0
  34. data/lib/geocoder/us.rb +21 -0
  35. data/lib/geocoder/us/address.rb +290 -0
  36. data/lib/geocoder/us/constants.rb +670 -0
  37. data/lib/geocoder/us/database.rb +745 -0
  38. data/lib/geocoder/us/import.rb +181 -0
  39. data/lib/geocoder/us/import/tiger.rb +13 -0
  40. data/lib/geocoder/us/numbers.rb +58 -0
  41. data/navteq/README +4 -0
  42. data/navteq/convert.sql +37 -0
  43. data/navteq/navteq_import +39 -0
  44. data/navteq/prepare.sql +92 -0
  45. data/sql/cluster.sql +16 -0
  46. data/sql/convert.sql +80 -0
  47. data/sql/create.sql +37 -0
  48. data/sql/index.sql +12 -0
  49. data/sql/place.csv +104944 -0
  50. data/sql/place.sql +104948 -0
  51. data/sql/setup.sql +78 -0
  52. data/src/Makefile +13 -0
  53. data/src/README +14 -0
  54. data/src/liblwgeom/Makefile +75 -0
  55. data/src/liblwgeom/box2d.c +54 -0
  56. data/src/liblwgeom/lex.yy.c +4799 -0
  57. data/src/liblwgeom/liblwgeom.h +1405 -0
  58. data/src/liblwgeom/lwalgorithm.c +946 -0
  59. data/src/liblwgeom/lwalgorithm.h +52 -0
  60. data/src/liblwgeom/lwcircstring.c +759 -0
  61. data/src/liblwgeom/lwcollection.c +541 -0
  62. data/src/liblwgeom/lwcompound.c +118 -0
  63. data/src/liblwgeom/lwcurvepoly.c +86 -0
  64. data/src/liblwgeom/lwgeom.c +886 -0
  65. data/src/liblwgeom/lwgeom_api.c +2201 -0
  66. data/src/liblwgeom/lwgparse.c +1219 -0
  67. data/src/liblwgeom/lwgunparse.c +1054 -0
  68. data/src/liblwgeom/lwline.c +525 -0
  69. data/src/liblwgeom/lwmcurve.c +125 -0
  70. data/src/liblwgeom/lwmline.c +137 -0
  71. data/src/liblwgeom/lwmpoint.c +138 -0
  72. data/src/liblwgeom/lwmpoly.c +141 -0
  73. data/src/liblwgeom/lwmsurface.c +129 -0
  74. data/src/liblwgeom/lwpoint.c +439 -0
  75. data/src/liblwgeom/lwpoly.c +579 -0
  76. data/src/liblwgeom/lwsegmentize.c +1047 -0
  77. data/src/liblwgeom/lwutil.c +369 -0
  78. data/src/liblwgeom/measures.c +861 -0
  79. data/src/liblwgeom/postgis_config.h +93 -0
  80. data/src/liblwgeom/ptarray.c +847 -0
  81. data/src/liblwgeom/vsprintf.c +179 -0
  82. data/src/liblwgeom/wktparse.h +126 -0
  83. data/src/liblwgeom/wktparse.lex +74 -0
  84. data/src/liblwgeom/wktparse.tab.c +2353 -0
  85. data/src/liblwgeom/wktparse.tab.h +145 -0
  86. data/src/liblwgeom/wktparse.y +385 -0
  87. data/src/libsqlite3_geocoder/Makefile +22 -0
  88. data/src/libsqlite3_geocoder/Makefile.nix +15 -0
  89. data/src/libsqlite3_geocoder/Makefile.redhat +15 -0
  90. data/src/libsqlite3_geocoder/extension.c +121 -0
  91. data/src/libsqlite3_geocoder/extension.h +13 -0
  92. data/src/libsqlite3_geocoder/levenshtein.c +42 -0
  93. data/src/libsqlite3_geocoder/metaphon.c +278 -0
  94. data/src/libsqlite3_geocoder/util.c +37 -0
  95. data/src/libsqlite3_geocoder/wkb_compress.c +54 -0
  96. data/src/metaphone/Makefile +7 -0
  97. data/src/metaphone/README +49 -0
  98. data/src/metaphone/extension.c +37 -0
  99. data/src/metaphone/metaphon.c +251 -0
  100. data/src/shp2sqlite/Makefile +37 -0
  101. data/src/shp2sqlite/Makefile.nix +36 -0
  102. data/src/shp2sqlite/Makefile.redhat +35 -0
  103. data/src/shp2sqlite/dbfopen.c +1595 -0
  104. data/src/shp2sqlite/getopt.c +695 -0
  105. data/src/shp2sqlite/getopt.h +127 -0
  106. data/src/shp2sqlite/shapefil.h +500 -0
  107. data/src/shp2sqlite/shp2sqlite.c +1974 -0
  108. data/src/shp2sqlite/shpopen.c +1894 -0
  109. data/tests/address.rb +236 -0
  110. data/tests/benchmark.rb +20 -0
  111. data/tests/constants.rb +57 -0
  112. data/tests/data/address-sample.csv +52 -0
  113. data/tests/data/db-test.csv +57 -0
  114. data/tests/data/locations.csv +4 -0
  115. data/tests/database.rb +137 -0
  116. data/tests/generate.rb +34 -0
  117. data/tests/numbers.rb +46 -0
  118. data/tests/run.rb +11 -0
  119. metadata +237 -0
@@ -0,0 +1,181 @@
1
+ require 'tmpdir'
2
+ require 'geocoder/us/database'
3
+
4
+ require 'rubygems'
5
+ require 'geo_ruby'
6
+ require 'zip/zip'
7
+
8
+ class Geocoder::US::Import < Geocoder::US::Database
9
+ @tables = {}
10
+
11
+ def self.tables
12
+ @tables
13
+ end
14
+
15
+ def tables
16
+ self.class.tables
17
+ end
18
+
19
+ def initialize (filename, options)
20
+ options[:create] = true
21
+ super(filename, options)
22
+ @sqlpath = options[:sql]
23
+ create_tables
24
+ end
25
+
26
+ def log (*args)
27
+ $stderr.print *args
28
+ end
29
+
30
+ def spin
31
+ @spin ||= 0
32
+ log "|/-\\"[@spin/100..@spin/100]+"\010" if @spin % 100 == 0
33
+ @spin += 1
34
+ @spin %= 400
35
+ end
36
+
37
+ def execute_batch (*args)
38
+ @db.execute_batch(*args)
39
+ end
40
+
41
+ def execute_script (file)
42
+ if File.expand_path(file) != file
43
+ file = File.join(@sqlpath, file)
44
+ end
45
+ execute_batch File.open(file).read
46
+ end
47
+
48
+ def load_features (file)
49
+ dataset = GeoRuby::Shp4r::ShpFile.open(file)
50
+ fields = dataset.fields.map {|f| f.name}
51
+ dataset.each do |record|
52
+ attrs = fields.map {|f| record.data[f]}
53
+ geom = record.geometry
54
+ geom = geom.geometries[0] \
55
+ if geom.kind_of? GeoRuby::SimpleFeatures::GeometryCollection
56
+ points = geom.points.map {|pt| [pt.x, pt.y].map {|i| (i*1000000).to_i}}
57
+ coords = points.flatten.pack("V*")
58
+ yield attrs, coords
59
+ end
60
+ end
61
+
62
+ def insert_data (st, table, attrs)
63
+ unless st
64
+ values = placeholders_for attrs
65
+ st = @db.prepare("INSERT INTO #{table} VALUES (#{values});")
66
+ end
67
+ st.execute(attrs)
68
+ end
69
+
70
+ def insert_shapefile (file, table)
71
+ st = nil
72
+ load_features(file) do |attrs, geom|
73
+ attrs << SQLite3::Blob.new(geom) if geom
74
+ insert_data st, table, attrs
75
+ end
76
+ end
77
+
78
+ def insert_dbf (file, table)
79
+ st = nil
80
+ GeoRuby::Shp4r::Dbf::Reader.open(file) do |dbf|
81
+ fields = dbf.fields.map {|f| f.name}
82
+ dbf.rows.each do |record|
83
+ attrs = fields.map {|f| record[f]}
84
+ insert_data st, table, attrs
85
+ end
86
+ end
87
+ end
88
+
89
+ def insert_csv (file, table, delimiter="|")
90
+ st = nil
91
+ File.open(file).readlines.each do |line|
92
+ attrs = line.chomp.split(delimiter)
93
+ insert_data st, table, attrs
94
+ end
95
+ end
96
+
97
+ def make_temp_dir (cleanup=true)
98
+ path = File.join(Dir.tmpdir, "geocoder-#{$$}")
99
+ FileUtils.mkdir_p path
100
+ if block_given?
101
+ begin
102
+ yield path
103
+ ensure
104
+ FileUtils.rm_r(path) if cleanup
105
+ end
106
+ else
107
+ path
108
+ end
109
+ end
110
+
111
+ def unpack_zip (file, path)
112
+ # log "- unpacking #{file}"
113
+ Zip::ZipFile.open(file).each do |entry|
114
+ target = File.join(path, entry.name)
115
+ # log " - #{target}"
116
+ entry.extract target
117
+ end
118
+ end
119
+
120
+ def import_zip (zipfile, table)
121
+ make_temp_dir do |tmpdir|
122
+ unpack_zip zipfile, tmpdir
123
+ basename = File.join(tmpdir, File.basename(zipfile))[0..-5]
124
+ shpfile = basename + ".shp"
125
+ dbffile = basename + ".dbf"
126
+ if File.exists? shpfile
127
+ log "#{table} "
128
+ insert_shapefile shpfile, table
129
+ elsif File.exists? dbffile
130
+ log "#{table} "
131
+ insert_dbf dbffile, table
132
+ else
133
+ log "\nNOT FOUND: #{shpfile}\n"
134
+ end
135
+ end
136
+ end
137
+
138
+ def import_path (path)
139
+ log "\n#{path}: "
140
+ execute_script "setup.sql"
141
+ @db.transaction do
142
+ tables.each do |table, glob|
143
+ file = Dir[File.join(path, glob)][0]
144
+ next unless file
145
+ if file =~ /\.zip$/io
146
+ import_zip file, table
147
+ else
148
+ import_shapefile file, table
149
+ end
150
+ end
151
+ end
152
+ execute_script "convert.sql"
153
+ end
154
+
155
+ def import_tree (root)
156
+ if !Dir[File.join(root, tables.values[0])].empty?
157
+ import_path root
158
+ else
159
+ Dir[File.join(root, "*")].sort.each do |file|
160
+ import_tree file if File.directory? file
161
+ end
162
+ end
163
+ end
164
+
165
+ def create_tables
166
+ uninit = false
167
+ begin
168
+ @db.execute("SELECT 0 FROM place")
169
+ rescue SQLite3::SQLException
170
+ uninit = true
171
+ end
172
+ if uninit
173
+ log "creating tables\n"
174
+ execute_script "create.sql"
175
+ post_create
176
+ end
177
+ end
178
+
179
+ def post_create
180
+ end
181
+ end
@@ -0,0 +1,13 @@
1
+ require 'geocoder/us/import'
2
+
3
+ class Geocoder::US::Import::TIGER < Geocoder::US::Import
4
+ @tables = {:tiger_edges => "*_edges.zip",
5
+ :tiger_featnames => "*_featnames.zip",
6
+ :tiger_addr => "*_addr.zip"}
7
+ def post_create
8
+ log "importing places"
9
+ @db.transaction do
10
+ # insert_csv File.join(@sqlpath, "place.csv"), "place"
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,58 @@
1
+ module Geocoder
2
+ end
3
+
4
+ module Geocoder::US
5
+ # The NumberMap class provides a means for mapping ordinal
6
+ # and cardinal number words to digits and back.
7
+ class NumberMap < Hash
8
+ attr_accessor :regexp
9
+ def self.[] (array)
10
+ nmap = self.new({})
11
+ array.each {|item| nmap << item }
12
+ nmap.build_match
13
+ nmap
14
+ end
15
+ def initialize (array)
16
+ @count = 0
17
+ end
18
+ def build_match
19
+ @regexp = Regexp.new(
20
+ '\b(' + keys.flatten.join("|") + ')\b',
21
+ Regexp::IGNORECASE)
22
+ end
23
+ def clean (key)
24
+ key.is_a?(String) ? key.downcase.gsub(/\W/o, "") : key
25
+ end
26
+ def <<(item)
27
+ store clean(item), @count
28
+ store @count, item
29
+ @count += 1
30
+ end
31
+ def [] (key)
32
+ super(clean(key))
33
+ end
34
+ end
35
+
36
+ # The Cardinals constant maps digits to cardinal number words and back.
37
+ Cardinals = NumberMap[%w[
38
+ zero one two three four five six seven eight nine ten
39
+ eleven twelve thirteen fourteen fifteen sixteen seventeen
40
+ eighteen nineteen
41
+ ]]
42
+ Cardinal_Tens = %w[ twenty thirty forty fifty sixty seventy eighty ninety ]
43
+ Cardinal_Tens.each {|tens|
44
+ Cardinals << tens
45
+ (1..9).each {|n| Cardinals << tens + "-" + Cardinals[n]}
46
+ }
47
+
48
+ # The Ordinals constant maps digits to ordinal number words and back.
49
+ Ordinals = NumberMap[%w[
50
+ zeroth first second third fourth fifth sixth seventh eighth ninth
51
+ tenth eleventh twelfth thirteenth fourteenth fifteenth sixteenth
52
+ seventeenth eighteenth nineteenth
53
+ ]]
54
+ Cardinal_Tens.each {|tens|
55
+ Ordinals << tens.gsub("y","ieth")
56
+ (1..9).each {|n| Ordinals << tens + "-" + Ordinals[n]}
57
+ }
58
+ end
data/navteq/README ADDED
@@ -0,0 +1,4 @@
1
+ The navteq_import script in this directory is designed to be used with Navteq's
2
+ local_streets layer. It works basically like tiger_import, except that you
3
+ provide either a list of .zip files containing the local_streets.* files on the
4
+ command line, or via standard input.
@@ -0,0 +1,37 @@
1
+ BEGIN;
2
+ CREATE INDEX navteq_link_id on local_streets (link_id);
3
+
4
+ CREATE TEMPORARY TABLE linezip AS
5
+ SELECT DISTINCT tlid, zip FROM (
6
+ SELECT link_id AS tlid, r_postcode AS zip FROM local_streets
7
+ WHERE addr_type IS NOT NULL AND st_name IS NOT NULL
8
+ AND r_postcode IS NOT NULL
9
+ UNION
10
+ SELECT link_id AS tlid, l_postcode AS zip FROM local_streets
11
+ WHERE addr_type IS NOT NULL AND st_name IS NOT NULL
12
+ AND l_postcode IS NOT NULL
13
+ ) AS whatever;
14
+
15
+ INSERT INTO feature
16
+ SELECT l.tlid, st_nm_base, metaphone(st_nm_base,5), st_nm_pref, st_typ_bef,
17
+ NULL, st_nm_suff, st_typ_aft, NULL, 'P', zip
18
+ FROM linezip l, local_streets f
19
+ WHERE l.tlid=f.link_id AND st_name IS NOT NULL;
20
+
21
+ INSERT OR IGNORE INTO edge
22
+ SELECT l.tlid, compress_wkb_line(the_geom) FROM
23
+ (SELECT DISTINCT tlid FROM linezip) AS l, local_streets f
24
+ WHERE l.tlid=f.link_id AND st_name IS NOT NULL;
25
+
26
+ INSERT INTO range
27
+ SELECT link_id, digit_suffix(l_refaddr), digit_suffix(l_nrefaddr),
28
+ nondigit_prefix(l_refaddr), l_postcode, 'L'
29
+ FROM linezip l, local_streets f
30
+ WHERE l.tlid=f.link_id AND l_refaddr IS NOT NULL
31
+ UNION
32
+ SELECT link_id, digit_suffix(r_refaddr), digit_suffix(r_nrefaddr),
33
+ nondigit_prefix(r_refaddr), r_postcode, 'R'
34
+ FROM linezip l, local_streets f
35
+ WHERE l.tlid=f.link_id AND r_refaddr IS NOT NULL;
36
+
37
+ END;
@@ -0,0 +1,39 @@
1
+ #!/bin/bash
2
+
3
+ TMP="/tmp/navteq-import.$$"
4
+ SHPS="local_streets"
5
+ DBFS=""
6
+ BASE=$(dirname $0)
7
+ PATH=$PATH:$BASE/../bin
8
+ SQL="$BASE/../sql"
9
+ HELPER_LIB="$BASE/../lib/geocoder/us/sqlite3.so"
10
+ DATABASE=$1
11
+ shift
12
+
13
+ mkdir -p $TMP || exit 1
14
+
15
+ [ ! -r $DATABASE ] && cat ${SQL}/create.sql ${SQL}/place.sql | sqlite3 $DATABASE
16
+
17
+ if [ x"$1" = x"" ]; then
18
+ cat
19
+ else
20
+ ls $@
21
+ fi | while read county; do
22
+ echo "--- $county"
23
+ if [ -r ${county%.zip}.zip ]; then
24
+ unzip -q $(ls ${county}.zip) -d $TMP
25
+ else
26
+ cp ${county%.*}.* $TMP
27
+ fi
28
+ (echo ".load $HELPER_LIB" && \
29
+ cat ${BASE}/prepare.sql && \
30
+ for file in $SHPS; do
31
+ shp2sqlite -aS $(ls ${TMP}/${file}.shp) ${file}
32
+ done && \
33
+ for file in $DBFS; do
34
+ shp2sqlite -an $(ls ${TMP}/${file}.dbf) ${file}
35
+ done && \
36
+ cat ${BASE}/convert.sql) | sqlite3 $DATABASE
37
+ rm -f $TMP/*
38
+ done 2>&1 | tee import-$$.log
39
+ rm -rf $TMP
@@ -0,0 +1,92 @@
1
+ PRAGMA temp_store=MEMORY;
2
+ PRAGMA journal_mode=MEMORY;
3
+ PRAGMA synchronous=OFF;
4
+ PRAGMA cache_size=250000;
5
+ PRAGMA count_changes=0;
6
+ BEGIN;
7
+ CREATE TABLE "local_streets" (gid integer PRIMARY KEY,
8
+ "the_geom" blob,
9
+ "link_id" integer,
10
+ "st_name" varchar(80),
11
+ "feat_id" integer,
12
+ "st_langcd" varchar(3),
13
+ "num_stnmes" integer,
14
+ "st_nm_pref" varchar(2),
15
+ "st_typ_bef" varchar(30),
16
+ "st_nm_base" varchar(35),
17
+ "st_nm_suff" varchar(2),
18
+ "st_typ_aft" varchar(30),
19
+ "st_typ_att" varchar(1),
20
+ "addr_type" varchar(1),
21
+ "l_refaddr" varchar(10),
22
+ "l_nrefaddr" varchar(10),
23
+ "l_addrsch" varchar(1),
24
+ "l_addrform" varchar(1),
25
+ "r_refaddr" varchar(10),
26
+ "r_nrefaddr" varchar(10),
27
+ "r_addrsch" varchar(1),
28
+ "r_addrform" varchar(1),
29
+ "ref_in_id" integer,
30
+ "nref_in_id" integer,
31
+ "n_shapepnt" integer,
32
+ "func_class" varchar(1),
33
+ "speed_cat" varchar(1),
34
+ "fr_spd_lim" integer,
35
+ "to_spd_lim" integer,
36
+ "to_lanes" integer,
37
+ "from_lanes" integer,
38
+ "enh_geom" varchar(1),
39
+ "lane_cat" varchar(1),
40
+ "divider" varchar(1),
41
+ "dir_travel" varchar(1),
42
+ "l_area_id" integer,
43
+ "r_area_id" integer,
44
+ "l_postcode" varchar(11),
45
+ "r_postcode" varchar(11),
46
+ "l_numzones" integer,
47
+ "r_numzones" integer,
48
+ "num_ad_rng" integer,
49
+ "ar_auto" varchar(1),
50
+ "ar_bus" varchar(1),
51
+ "ar_taxis" varchar(1),
52
+ "ar_carpool" varchar(1),
53
+ "ar_pedest" varchar(1),
54
+ "ar_trucks" varchar(1),
55
+ "ar_traff" varchar(1),
56
+ "ar_deliv" varchar(1),
57
+ "ar_emerveh" varchar(1),
58
+ "paved" varchar(1),
59
+ "private" varchar(1),
60
+ "frontage" varchar(1),
61
+ "bridge" varchar(1),
62
+ "tunnel" varchar(1),
63
+ "ramp" varchar(1),
64
+ "tollway" varchar(1),
65
+ "poiaccess" varchar(1),
66
+ "contracc" varchar(1),
67
+ "roundabout" varchar(1),
68
+ "interinter" varchar(1),
69
+ "undeftraff" varchar(1),
70
+ "ferry_type" varchar(1),
71
+ "multidigit" varchar(1),
72
+ "maxattr" varchar(1),
73
+ "spectrfig" varchar(1),
74
+ "indescrib" varchar(1),
75
+ "manoeuvre" varchar(1),
76
+ "dividerleg" varchar(1),
77
+ "inprocdata" varchar(1),
78
+ "full_geom" varchar(1),
79
+ "urban" varchar(1),
80
+ "route_type" varchar(1),
81
+ "dironsign" varchar(1),
82
+ "explicatbl" varchar(1),
83
+ "nameonrdsn" varchar(1),
84
+ "postalname" varchar(1),
85
+ "stalename" varchar(1),
86
+ "vanityname" varchar(1),
87
+ "junctionnm" varchar(1),
88
+ "exitname" varchar(1),
89
+ "scenic_rt" varchar(1),
90
+ "scenic_nm" varchar(1));
91
+ --SELECT AddGeometryColumn('','local_streets','the_geom','-1','MULTILINESTRING',2);
92
+ END;
data/sql/cluster.sql ADDED
@@ -0,0 +1,16 @@
1
+ .echo on
2
+ -- turn off various pragmas to make SQLite faster
3
+ PRAGMA temp_store=MEMORY;
4
+ PRAGMA journal_mode=OFF;
5
+ PRAGMA synchronous=OFF;
6
+ PRAGMA cache_size=500000;
7
+ PRAGMA count_changes=0;
8
+ BEGIN TRANSACTION;
9
+ -- order the contents of each table by their indexes to reduce
10
+ -- the number of disk pages that need to be read on each query.
11
+ INSERT INTO place SELECT * FROM old.place ORDER BY zip, priority;
12
+ INSERT INTO edge SELECT * FROM old.edge ORDER BY tlid;
13
+ INSERT INTO feature SELECT * FROM old.feature ORDER BY street_phone, zip;
14
+ INSERT INTO feature_edge SELECT * FROM old.feature_edge ORDER BY fid;
15
+ INSERT INTO range SELECT * FROM old.range ORDER BY tlid;
16
+ COMMIT;