geo_coder 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. data/Gemfile +12 -0
  2. data/Gemfile.lock +32 -0
  3. data/History.txt +6 -0
  4. data/Makefile +13 -0
  5. data/Manifest.txt +18 -0
  6. data/README.rdoc +197 -0
  7. data/Rakefile +53 -0
  8. data/TODO.txt +8 -0
  9. data/VERSION +1 -0
  10. data/bin/build_indexes +8 -0
  11. data/bin/rebuild_cluster +22 -0
  12. data/bin/rebuild_metaphones +23 -0
  13. data/bin/tiger_import +59 -0
  14. data/demos/demo/app/ext/geocodewrap.rb +84 -0
  15. data/demos/demo/app/views/index.builder +13 -0
  16. data/demos/demo/app/views/index.erb +71 -0
  17. data/demos/demo/config.ru +12 -0
  18. data/demos/demo/config/bootstraps.rb +130 -0
  19. data/demos/demo/config/geoenvironment.rb +25 -0
  20. data/demos/demo/geocoder_helper.rb +12 -0
  21. data/demos/demo/geocom_geocode.rb +10 -0
  22. data/demos/demo/main.rb +3 -0
  23. data/demos/demo/rakefile.rb +17 -0
  24. data/demos/demo/tmp/restart.txt +0 -0
  25. data/demos/simpledemo/views/index.builder +13 -0
  26. data/demos/simpledemo/views/index.erb +69 -0
  27. data/demos/simpledemo/ws.rb +83 -0
  28. data/doc/Makefile +7 -0
  29. data/doc/html4css1.css +279 -0
  30. data/doc/lookup.rst +193 -0
  31. data/doc/parsing.rst +125 -0
  32. data/doc/voidspace.css +147 -0
  33. data/geo_coder.gemspec +172 -0
  34. data/lib/geocoder/us.rb +21 -0
  35. data/lib/geocoder/us/address.rb +290 -0
  36. data/lib/geocoder/us/constants.rb +670 -0
  37. data/lib/geocoder/us/database.rb +745 -0
  38. data/lib/geocoder/us/import.rb +181 -0
  39. data/lib/geocoder/us/import/tiger.rb +13 -0
  40. data/lib/geocoder/us/numbers.rb +58 -0
  41. data/navteq/README +4 -0
  42. data/navteq/convert.sql +37 -0
  43. data/navteq/navteq_import +39 -0
  44. data/navteq/prepare.sql +92 -0
  45. data/sql/cluster.sql +16 -0
  46. data/sql/convert.sql +80 -0
  47. data/sql/create.sql +37 -0
  48. data/sql/index.sql +12 -0
  49. data/sql/place.csv +104944 -0
  50. data/sql/place.sql +104948 -0
  51. data/sql/setup.sql +78 -0
  52. data/src/Makefile +13 -0
  53. data/src/README +14 -0
  54. data/src/liblwgeom/Makefile +75 -0
  55. data/src/liblwgeom/box2d.c +54 -0
  56. data/src/liblwgeom/lex.yy.c +4799 -0
  57. data/src/liblwgeom/liblwgeom.h +1405 -0
  58. data/src/liblwgeom/lwalgorithm.c +946 -0
  59. data/src/liblwgeom/lwalgorithm.h +52 -0
  60. data/src/liblwgeom/lwcircstring.c +759 -0
  61. data/src/liblwgeom/lwcollection.c +541 -0
  62. data/src/liblwgeom/lwcompound.c +118 -0
  63. data/src/liblwgeom/lwcurvepoly.c +86 -0
  64. data/src/liblwgeom/lwgeom.c +886 -0
  65. data/src/liblwgeom/lwgeom_api.c +2201 -0
  66. data/src/liblwgeom/lwgparse.c +1219 -0
  67. data/src/liblwgeom/lwgunparse.c +1054 -0
  68. data/src/liblwgeom/lwline.c +525 -0
  69. data/src/liblwgeom/lwmcurve.c +125 -0
  70. data/src/liblwgeom/lwmline.c +137 -0
  71. data/src/liblwgeom/lwmpoint.c +138 -0
  72. data/src/liblwgeom/lwmpoly.c +141 -0
  73. data/src/liblwgeom/lwmsurface.c +129 -0
  74. data/src/liblwgeom/lwpoint.c +439 -0
  75. data/src/liblwgeom/lwpoly.c +579 -0
  76. data/src/liblwgeom/lwsegmentize.c +1047 -0
  77. data/src/liblwgeom/lwutil.c +369 -0
  78. data/src/liblwgeom/measures.c +861 -0
  79. data/src/liblwgeom/postgis_config.h +93 -0
  80. data/src/liblwgeom/ptarray.c +847 -0
  81. data/src/liblwgeom/vsprintf.c +179 -0
  82. data/src/liblwgeom/wktparse.h +126 -0
  83. data/src/liblwgeom/wktparse.lex +74 -0
  84. data/src/liblwgeom/wktparse.tab.c +2353 -0
  85. data/src/liblwgeom/wktparse.tab.h +145 -0
  86. data/src/liblwgeom/wktparse.y +385 -0
  87. data/src/libsqlite3_geocoder/Makefile +22 -0
  88. data/src/libsqlite3_geocoder/Makefile.nix +15 -0
  89. data/src/libsqlite3_geocoder/Makefile.redhat +15 -0
  90. data/src/libsqlite3_geocoder/extension.c +121 -0
  91. data/src/libsqlite3_geocoder/extension.h +13 -0
  92. data/src/libsqlite3_geocoder/levenshtein.c +42 -0
  93. data/src/libsqlite3_geocoder/metaphon.c +278 -0
  94. data/src/libsqlite3_geocoder/util.c +37 -0
  95. data/src/libsqlite3_geocoder/wkb_compress.c +54 -0
  96. data/src/metaphone/Makefile +7 -0
  97. data/src/metaphone/README +49 -0
  98. data/src/metaphone/extension.c +37 -0
  99. data/src/metaphone/metaphon.c +251 -0
  100. data/src/shp2sqlite/Makefile +37 -0
  101. data/src/shp2sqlite/Makefile.nix +36 -0
  102. data/src/shp2sqlite/Makefile.redhat +35 -0
  103. data/src/shp2sqlite/dbfopen.c +1595 -0
  104. data/src/shp2sqlite/getopt.c +695 -0
  105. data/src/shp2sqlite/getopt.h +127 -0
  106. data/src/shp2sqlite/shapefil.h +500 -0
  107. data/src/shp2sqlite/shp2sqlite.c +1974 -0
  108. data/src/shp2sqlite/shpopen.c +1894 -0
  109. data/tests/address.rb +236 -0
  110. data/tests/benchmark.rb +20 -0
  111. data/tests/constants.rb +57 -0
  112. data/tests/data/address-sample.csv +52 -0
  113. data/tests/data/db-test.csv +57 -0
  114. data/tests/data/locations.csv +4 -0
  115. data/tests/database.rb +137 -0
  116. data/tests/generate.rb +34 -0
  117. data/tests/numbers.rb +46 -0
  118. data/tests/run.rb +11 -0
  119. metadata +237 -0
@@ -0,0 +1,181 @@
1
+ require 'tmpdir'
2
+ require 'geocoder/us/database'
3
+
4
+ require 'rubygems'
5
+ require 'geo_ruby'
6
+ require 'zip/zip'
7
+
8
+ class Geocoder::US::Import < Geocoder::US::Database
9
+ @tables = {}
10
+
11
+ def self.tables
12
+ @tables
13
+ end
14
+
15
+ def tables
16
+ self.class.tables
17
+ end
18
+
19
+ def initialize (filename, options)
20
+ options[:create] = true
21
+ super(filename, options)
22
+ @sqlpath = options[:sql]
23
+ create_tables
24
+ end
25
+
26
+ def log (*args)
27
+ $stderr.print *args
28
+ end
29
+
30
+ def spin
31
+ @spin ||= 0
32
+ log "|/-\\"[@spin/100..@spin/100]+"\010" if @spin % 100 == 0
33
+ @spin += 1
34
+ @spin %= 400
35
+ end
36
+
37
+ def execute_batch (*args)
38
+ @db.execute_batch(*args)
39
+ end
40
+
41
+ def execute_script (file)
42
+ if File.expand_path(file) != file
43
+ file = File.join(@sqlpath, file)
44
+ end
45
+ execute_batch File.open(file).read
46
+ end
47
+
48
+ def load_features (file)
49
+ dataset = GeoRuby::Shp4r::ShpFile.open(file)
50
+ fields = dataset.fields.map {|f| f.name}
51
+ dataset.each do |record|
52
+ attrs = fields.map {|f| record.data[f]}
53
+ geom = record.geometry
54
+ geom = geom.geometries[0] \
55
+ if geom.kind_of? GeoRuby::SimpleFeatures::GeometryCollection
56
+ points = geom.points.map {|pt| [pt.x, pt.y].map {|i| (i*1000000).to_i}}
57
+ coords = points.flatten.pack("V*")
58
+ yield attrs, coords
59
+ end
60
+ end
61
+
62
+ def insert_data (st, table, attrs)
63
+ unless st
64
+ values = placeholders_for attrs
65
+ st = @db.prepare("INSERT INTO #{table} VALUES (#{values});")
66
+ end
67
+ st.execute(attrs)
68
+ end
69
+
70
+ def insert_shapefile (file, table)
71
+ st = nil
72
+ load_features(file) do |attrs, geom|
73
+ attrs << SQLite3::Blob.new(geom) if geom
74
+ insert_data st, table, attrs
75
+ end
76
+ end
77
+
78
+ def insert_dbf (file, table)
79
+ st = nil
80
+ GeoRuby::Shp4r::Dbf::Reader.open(file) do |dbf|
81
+ fields = dbf.fields.map {|f| f.name}
82
+ dbf.rows.each do |record|
83
+ attrs = fields.map {|f| record[f]}
84
+ insert_data st, table, attrs
85
+ end
86
+ end
87
+ end
88
+
89
+ def insert_csv (file, table, delimiter="|")
90
+ st = nil
91
+ File.open(file).readlines.each do |line|
92
+ attrs = line.chomp.split(delimiter)
93
+ insert_data st, table, attrs
94
+ end
95
+ end
96
+
97
+ def make_temp_dir (cleanup=true)
98
+ path = File.join(Dir.tmpdir, "geocoder-#{$$}")
99
+ FileUtils.mkdir_p path
100
+ if block_given?
101
+ begin
102
+ yield path
103
+ ensure
104
+ FileUtils.rm_r(path) if cleanup
105
+ end
106
+ else
107
+ path
108
+ end
109
+ end
110
+
111
+ def unpack_zip (file, path)
112
+ # log "- unpacking #{file}"
113
+ Zip::ZipFile.open(file).each do |entry|
114
+ target = File.join(path, entry.name)
115
+ # log " - #{target}"
116
+ entry.extract target
117
+ end
118
+ end
119
+
120
+ def import_zip (zipfile, table)
121
+ make_temp_dir do |tmpdir|
122
+ unpack_zip zipfile, tmpdir
123
+ basename = File.join(tmpdir, File.basename(zipfile))[0..-5]
124
+ shpfile = basename + ".shp"
125
+ dbffile = basename + ".dbf"
126
+ if File.exists? shpfile
127
+ log "#{table} "
128
+ insert_shapefile shpfile, table
129
+ elsif File.exists? dbffile
130
+ log "#{table} "
131
+ insert_dbf dbffile, table
132
+ else
133
+ log "\nNOT FOUND: #{shpfile}\n"
134
+ end
135
+ end
136
+ end
137
+
138
+ def import_path (path)
139
+ log "\n#{path}: "
140
+ execute_script "setup.sql"
141
+ @db.transaction do
142
+ tables.each do |table, glob|
143
+ file = Dir[File.join(path, glob)][0]
144
+ next unless file
145
+ if file =~ /\.zip$/io
146
+ import_zip file, table
147
+ else
148
+ import_shapefile file, table
149
+ end
150
+ end
151
+ end
152
+ execute_script "convert.sql"
153
+ end
154
+
155
+ def import_tree (root)
156
+ if !Dir[File.join(root, tables.values[0])].empty?
157
+ import_path root
158
+ else
159
+ Dir[File.join(root, "*")].sort.each do |file|
160
+ import_tree file if File.directory? file
161
+ end
162
+ end
163
+ end
164
+
165
+ def create_tables
166
+ uninit = false
167
+ begin
168
+ @db.execute("SELECT 0 FROM place")
169
+ rescue SQLite3::SQLException
170
+ uninit = true
171
+ end
172
+ if uninit
173
+ log "creating tables\n"
174
+ execute_script "create.sql"
175
+ post_create
176
+ end
177
+ end
178
+
179
+ def post_create
180
+ end
181
+ end
@@ -0,0 +1,13 @@
1
+ require 'geocoder/us/import'
2
+
3
+ class Geocoder::US::Import::TIGER < Geocoder::US::Import
4
+ @tables = {:tiger_edges => "*_edges.zip",
5
+ :tiger_featnames => "*_featnames.zip",
6
+ :tiger_addr => "*_addr.zip"}
7
+ def post_create
8
+ log "importing places"
9
+ @db.transaction do
10
+ # insert_csv File.join(@sqlpath, "place.csv"), "place"
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,58 @@
1
+ module Geocoder
2
+ end
3
+
4
+ module Geocoder::US
5
+ # The NumberMap class provides a means for mapping ordinal
6
+ # and cardinal number words to digits and back.
7
+ class NumberMap < Hash
8
+ attr_accessor :regexp
9
+ def self.[] (array)
10
+ nmap = self.new({})
11
+ array.each {|item| nmap << item }
12
+ nmap.build_match
13
+ nmap
14
+ end
15
+ def initialize (array)
16
+ @count = 0
17
+ end
18
+ def build_match
19
+ @regexp = Regexp.new(
20
+ '\b(' + keys.flatten.join("|") + ')\b',
21
+ Regexp::IGNORECASE)
22
+ end
23
+ def clean (key)
24
+ key.is_a?(String) ? key.downcase.gsub(/\W/o, "") : key
25
+ end
26
+ def <<(item)
27
+ store clean(item), @count
28
+ store @count, item
29
+ @count += 1
30
+ end
31
+ def [] (key)
32
+ super(clean(key))
33
+ end
34
+ end
35
+
36
+ # The Cardinals constant maps digits to cardinal number words and back.
37
+ Cardinals = NumberMap[%w[
38
+ zero one two three four five six seven eight nine ten
39
+ eleven twelve thirteen fourteen fifteen sixteen seventeen
40
+ eighteen nineteen
41
+ ]]
42
+ Cardinal_Tens = %w[ twenty thirty forty fifty sixty seventy eighty ninety ]
43
+ Cardinal_Tens.each {|tens|
44
+ Cardinals << tens
45
+ (1..9).each {|n| Cardinals << tens + "-" + Cardinals[n]}
46
+ }
47
+
48
+ # The Ordinals constant maps digits to ordinal number words and back.
49
+ Ordinals = NumberMap[%w[
50
+ zeroth first second third fourth fifth sixth seventh eighth ninth
51
+ tenth eleventh twelfth thirteenth fourteenth fifteenth sixteenth
52
+ seventeenth eighteenth nineteenth
53
+ ]]
54
+ Cardinal_Tens.each {|tens|
55
+ Ordinals << tens.gsub("y","ieth")
56
+ (1..9).each {|n| Ordinals << tens + "-" + Ordinals[n]}
57
+ }
58
+ end
data/navteq/README ADDED
@@ -0,0 +1,4 @@
1
+ The navteq_import script in this directory is designed to be used with Navteq's
2
+ local_streets layer. It works basically like tiger_import, except that you
3
+ provide either a list of .zip files containing the local_streets.* files on the
4
+ command line, or via standard input.
@@ -0,0 +1,37 @@
1
+ BEGIN;
2
+ CREATE INDEX navteq_link_id on local_streets (link_id);
3
+
4
+ CREATE TEMPORARY TABLE linezip AS
5
+ SELECT DISTINCT tlid, zip FROM (
6
+ SELECT link_id AS tlid, r_postcode AS zip FROM local_streets
7
+ WHERE addr_type IS NOT NULL AND st_name IS NOT NULL
8
+ AND r_postcode IS NOT NULL
9
+ UNION
10
+ SELECT link_id AS tlid, l_postcode AS zip FROM local_streets
11
+ WHERE addr_type IS NOT NULL AND st_name IS NOT NULL
12
+ AND l_postcode IS NOT NULL
13
+ ) AS whatever;
14
+
15
+ INSERT INTO feature
16
+ SELECT l.tlid, st_nm_base, metaphone(st_nm_base,5), st_nm_pref, st_typ_bef,
17
+ NULL, st_nm_suff, st_typ_aft, NULL, 'P', zip
18
+ FROM linezip l, local_streets f
19
+ WHERE l.tlid=f.link_id AND st_name IS NOT NULL;
20
+
21
+ INSERT OR IGNORE INTO edge
22
+ SELECT l.tlid, compress_wkb_line(the_geom) FROM
23
+ (SELECT DISTINCT tlid FROM linezip) AS l, local_streets f
24
+ WHERE l.tlid=f.link_id AND st_name IS NOT NULL;
25
+
26
+ INSERT INTO range
27
+ SELECT link_id, digit_suffix(l_refaddr), digit_suffix(l_nrefaddr),
28
+ nondigit_prefix(l_refaddr), l_postcode, 'L'
29
+ FROM linezip l, local_streets f
30
+ WHERE l.tlid=f.link_id AND l_refaddr IS NOT NULL
31
+ UNION
32
+ SELECT link_id, digit_suffix(r_refaddr), digit_suffix(r_nrefaddr),
33
+ nondigit_prefix(r_refaddr), r_postcode, 'R'
34
+ FROM linezip l, local_streets f
35
+ WHERE l.tlid=f.link_id AND r_refaddr IS NOT NULL;
36
+
37
+ END;
@@ -0,0 +1,39 @@
1
+ #!/bin/bash
2
+
3
+ TMP="/tmp/navteq-import.$$"
4
+ SHPS="local_streets"
5
+ DBFS=""
6
+ BASE=$(dirname $0)
7
+ PATH=$PATH:$BASE/../bin
8
+ SQL="$BASE/../sql"
9
+ HELPER_LIB="$BASE/../lib/geocoder/us/sqlite3.so"
10
+ DATABASE=$1
11
+ shift
12
+
13
+ mkdir -p $TMP || exit 1
14
+
15
+ [ ! -r $DATABASE ] && cat ${SQL}/create.sql ${SQL}/place.sql | sqlite3 $DATABASE
16
+
17
+ if [ x"$1" = x"" ]; then
18
+ cat
19
+ else
20
+ ls $@
21
+ fi | while read county; do
22
+ echo "--- $county"
23
+ if [ -r ${county%.zip}.zip ]; then
24
+ unzip -q $(ls ${county}.zip) -d $TMP
25
+ else
26
+ cp ${county%.*}.* $TMP
27
+ fi
28
+ (echo ".load $HELPER_LIB" && \
29
+ cat ${BASE}/prepare.sql && \
30
+ for file in $SHPS; do
31
+ shp2sqlite -aS $(ls ${TMP}/${file}.shp) ${file}
32
+ done && \
33
+ for file in $DBFS; do
34
+ shp2sqlite -an $(ls ${TMP}/${file}.dbf) ${file}
35
+ done && \
36
+ cat ${BASE}/convert.sql) | sqlite3 $DATABASE
37
+ rm -f $TMP/*
38
+ done 2>&1 | tee import-$$.log
39
+ rm -rf $TMP
@@ -0,0 +1,92 @@
1
+ PRAGMA temp_store=MEMORY;
2
+ PRAGMA journal_mode=MEMORY;
3
+ PRAGMA synchronous=OFF;
4
+ PRAGMA cache_size=250000;
5
+ PRAGMA count_changes=0;
6
+ BEGIN;
7
+ CREATE TABLE "local_streets" (gid integer PRIMARY KEY,
8
+ "the_geom" blob,
9
+ "link_id" integer,
10
+ "st_name" varchar(80),
11
+ "feat_id" integer,
12
+ "st_langcd" varchar(3),
13
+ "num_stnmes" integer,
14
+ "st_nm_pref" varchar(2),
15
+ "st_typ_bef" varchar(30),
16
+ "st_nm_base" varchar(35),
17
+ "st_nm_suff" varchar(2),
18
+ "st_typ_aft" varchar(30),
19
+ "st_typ_att" varchar(1),
20
+ "addr_type" varchar(1),
21
+ "l_refaddr" varchar(10),
22
+ "l_nrefaddr" varchar(10),
23
+ "l_addrsch" varchar(1),
24
+ "l_addrform" varchar(1),
25
+ "r_refaddr" varchar(10),
26
+ "r_nrefaddr" varchar(10),
27
+ "r_addrsch" varchar(1),
28
+ "r_addrform" varchar(1),
29
+ "ref_in_id" integer,
30
+ "nref_in_id" integer,
31
+ "n_shapepnt" integer,
32
+ "func_class" varchar(1),
33
+ "speed_cat" varchar(1),
34
+ "fr_spd_lim" integer,
35
+ "to_spd_lim" integer,
36
+ "to_lanes" integer,
37
+ "from_lanes" integer,
38
+ "enh_geom" varchar(1),
39
+ "lane_cat" varchar(1),
40
+ "divider" varchar(1),
41
+ "dir_travel" varchar(1),
42
+ "l_area_id" integer,
43
+ "r_area_id" integer,
44
+ "l_postcode" varchar(11),
45
+ "r_postcode" varchar(11),
46
+ "l_numzones" integer,
47
+ "r_numzones" integer,
48
+ "num_ad_rng" integer,
49
+ "ar_auto" varchar(1),
50
+ "ar_bus" varchar(1),
51
+ "ar_taxis" varchar(1),
52
+ "ar_carpool" varchar(1),
53
+ "ar_pedest" varchar(1),
54
+ "ar_trucks" varchar(1),
55
+ "ar_traff" varchar(1),
56
+ "ar_deliv" varchar(1),
57
+ "ar_emerveh" varchar(1),
58
+ "paved" varchar(1),
59
+ "private" varchar(1),
60
+ "frontage" varchar(1),
61
+ "bridge" varchar(1),
62
+ "tunnel" varchar(1),
63
+ "ramp" varchar(1),
64
+ "tollway" varchar(1),
65
+ "poiaccess" varchar(1),
66
+ "contracc" varchar(1),
67
+ "roundabout" varchar(1),
68
+ "interinter" varchar(1),
69
+ "undeftraff" varchar(1),
70
+ "ferry_type" varchar(1),
71
+ "multidigit" varchar(1),
72
+ "maxattr" varchar(1),
73
+ "spectrfig" varchar(1),
74
+ "indescrib" varchar(1),
75
+ "manoeuvre" varchar(1),
76
+ "dividerleg" varchar(1),
77
+ "inprocdata" varchar(1),
78
+ "full_geom" varchar(1),
79
+ "urban" varchar(1),
80
+ "route_type" varchar(1),
81
+ "dironsign" varchar(1),
82
+ "explicatbl" varchar(1),
83
+ "nameonrdsn" varchar(1),
84
+ "postalname" varchar(1),
85
+ "stalename" varchar(1),
86
+ "vanityname" varchar(1),
87
+ "junctionnm" varchar(1),
88
+ "exitname" varchar(1),
89
+ "scenic_rt" varchar(1),
90
+ "scenic_nm" varchar(1));
91
+ --SELECT AddGeometryColumn('','local_streets','the_geom','-1','MULTILINESTRING',2);
92
+ END;
data/sql/cluster.sql ADDED
@@ -0,0 +1,16 @@
1
+ .echo on
2
+ -- turn off various pragmas to make SQLite faster
3
+ PRAGMA temp_store=MEMORY;
4
+ PRAGMA journal_mode=OFF;
5
+ PRAGMA synchronous=OFF;
6
+ PRAGMA cache_size=500000;
7
+ PRAGMA count_changes=0;
8
+ BEGIN TRANSACTION;
9
+ -- order the contents of each table by their indexes to reduce
10
+ -- the number of disk pages that need to be read on each query.
11
+ INSERT INTO place SELECT * FROM old.place ORDER BY zip, priority;
12
+ INSERT INTO edge SELECT * FROM old.edge ORDER BY tlid;
13
+ INSERT INTO feature SELECT * FROM old.feature ORDER BY street_phone, zip;
14
+ INSERT INTO feature_edge SELECT * FROM old.feature_edge ORDER BY fid;
15
+ INSERT INTO range SELECT * FROM old.range ORDER BY tlid;
16
+ COMMIT;