geonames_local 0.0.1 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +6 -0
- data/README.rdoc +76 -3
- data/Rakefile +57 -0
- data/VERSION +1 -0
- data/bin/geonames +9 -0
- data/lib/config/codes.yml +24 -0
- data/lib/config/geonames.yml +14 -0
- data/lib/data/geoquery.rb +31 -0
- data/lib/data/postgres.rb +69 -0
- data/lib/data/tokyo.rb +84 -0
- data/lib/features/city.rb +9 -0
- data/lib/features/country.rb +61 -0
- data/lib/features/province.rb +18 -0
- data/lib/features/road.rb +2 -0
- data/lib/features/spot.rb +88 -0
- data/lib/geonames_local.rb +17 -0
- data/lib/work/cli.rb +144 -0
- data/lib/work/dump.rb +76 -0
- data/lib/work/export.rb +21 -0
- data/spec/data/tokyo_spec.rb +29 -0
- data/spec/geonames_local_spec.rb +37 -1
- data/task/benchmark.rb +27 -0
- data/task/benchmark_cabinet.rb +24 -0
- metadata +34 -5
- data/bin/geoname +0 -0
data/.gitignore
CHANGED
data/README.rdoc
CHANGED
@@ -1,9 +1,82 @@
|
|
1
|
-
=
|
1
|
+
= Geonames Local
|
2
|
+
|
3
|
+
Download and store in tokyo or postgresql Geonames.org data.
|
4
|
+
Making every Geoname API operation possible on your servers.
|
5
|
+
No hit limit, fast as possible.
|
6
|
+
|
7
|
+
== Features/Problems
|
8
|
+
|
9
|
+
- Local relational mapping of geonames data
|
10
|
+
- Map geonames fields to your db scheme UNDONE
|
11
|
+
- Countries are a static yml file :/
|
12
|
+
|
13
|
+
== Usage
|
14
|
+
|
15
|
+
geonames conf
|
16
|
+
|
17
|
+
Will generate a "geonames.yml" file on your folder.
|
18
|
+
The file is self explanatory.
|
19
|
+
|
20
|
+
geonames -c geonames.yml
|
21
|
+
|
22
|
+
Work. Use -v for verbose.
|
23
|
+
|
24
|
+
If you are not sure your country code, use:
|
25
|
+
|
26
|
+
geonames list <search>
|
27
|
+
|
28
|
+
|
29
|
+
== Relational Mapping
|
30
|
+
|
31
|
+
When using PG, this gem will (try) to relational map Geonames
|
32
|
+
data on your scheme. Postgresql done, tokyo still need heavy work.
|
33
|
+
|
34
|
+
|
35
|
+
== PostgreSQL
|
36
|
+
|
37
|
+
So, supposing ActiveRecord, something like this is possible:
|
38
|
+
|
39
|
+
City.first.province.country.abbr
|
40
|
+
=> "BR"
|
41
|
+
|
42
|
+
=== Migration
|
43
|
+
|
44
|
+
Default PG migration:
|
45
|
+
|
46
|
+
create_table :cities do |t|
|
47
|
+
t.references :country, :null => false
|
48
|
+
t.references :province
|
49
|
+
t.string :name, :null => false
|
50
|
+
t.point :geom, :srid => 4326
|
51
|
+
t.integer :gid, :zip
|
52
|
+
end
|
53
|
+
|
54
|
+
create_table :provinces do |t|
|
55
|
+
t.references :country, :null => false
|
56
|
+
t.string :name, :null => false
|
57
|
+
t.string :abbr, :limit => 2, :null => false
|
58
|
+
t.integer :gid
|
59
|
+
end
|
60
|
+
|
61
|
+
create_table :countries do |t|
|
62
|
+
t.string :name, :limit => 30, :null => false
|
63
|
+
t.string :abbr, :limit => 2, :null => false
|
64
|
+
end
|
65
|
+
|
66
|
+
add_index :cities, :name
|
67
|
+
add_index :cities, :gid
|
68
|
+
add_index :cities, :zip
|
69
|
+
add_index :cities, :country_id
|
70
|
+
add_index :cities, :province_id
|
71
|
+
add_index :cities, :geom, :spatial => true
|
72
|
+
add_index :provinces, :name
|
73
|
+
add_index :provinces, :abbrg
|
74
|
+
add_index :provinces, :gid
|
75
|
+
add_index :provinces, :country_id
|
2
76
|
|
3
|
-
Description goes here.
|
4
77
|
|
5
78
|
== Note on Patches/Pull Requests
|
6
|
-
|
79
|
+
|
7
80
|
* Fork the project.
|
8
81
|
* Make your feature addition or bug fix.
|
9
82
|
* Add tests for it. This is important so I don't break it in a
|
data/Rakefile
CHANGED
@@ -11,6 +11,7 @@ begin
|
|
11
11
|
gem.homepage = "http://github.com/nofxx/geonames_local"
|
12
12
|
gem.authors = ["Marcos Piccinini"]
|
13
13
|
gem.add_development_dependency "rspec", ">= 1.2.9"
|
14
|
+
gem.add_dependency "tokyotyrant", ">= 1.10"
|
14
15
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
16
|
end
|
16
17
|
Jeweler::GemcutterTasks.new
|
@@ -43,3 +44,59 @@ Rake::RDocTask.new do |rdoc|
|
|
43
44
|
rdoc.rdoc_files.include('README*')
|
44
45
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
45
46
|
end
|
47
|
+
|
48
|
+
#
|
49
|
+
# Tokyo Tyrant rake tasks
|
50
|
+
#
|
51
|
+
namespace :tyrant do
|
52
|
+
TYRANT_DB_FILE = File.join("tyrant.tct")
|
53
|
+
TYRANT_PID_FILE = File.join("tyrant.pid")
|
54
|
+
TYRANT_LOG_FILE = File.join("tyrant.log")
|
55
|
+
|
56
|
+
desc "Start Tyrant server"
|
57
|
+
task :start do
|
58
|
+
raise RuntimeError, "Tyrant is already running." if tyrant_running?
|
59
|
+
system "ttserver -pid #{TYRANT_PID_FILE} -log #{TYRANT_LOG_FILE} #{TYRANT_DB_FILE}&"
|
60
|
+
sleep(2)
|
61
|
+
if tyrant_running?
|
62
|
+
puts "Tyrant started successfully (pid #{tyrant_pid})."
|
63
|
+
else
|
64
|
+
puts "Failed to start tyrant push server. Check logs."
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
desc "Stop Tyrant server"
|
69
|
+
task :stop do
|
70
|
+
raise RuntimeError, "Tyrant isn't running." unless tyrant_running?
|
71
|
+
system "kill #{tyrant_pid}"
|
72
|
+
sleep(2)
|
73
|
+
if tyrant_running?
|
74
|
+
puts "Tyrant didn't stopped. Check the logs."
|
75
|
+
else
|
76
|
+
puts "Tyrant stopped."
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
desc "Restart Tyrant server"
|
81
|
+
task :restart => [:stop, :start]
|
82
|
+
|
83
|
+
desc "Get Tyrant Server Status"
|
84
|
+
task :status do
|
85
|
+
puts tyrant_running? ? "Tyrant running. (#{tyrant_pid})" : "Tyrant not running."
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def tyrant_pid
|
90
|
+
`cat #{TYRANT_PID_FILE}`.to_i
|
91
|
+
end
|
92
|
+
|
93
|
+
def tyrant_running?
|
94
|
+
return false unless File.exist?(TYRANT_PID_FILE)
|
95
|
+
process_check = `ps -p #{tyrant_pid} | wc -l`
|
96
|
+
if process_check.to_i < 2
|
97
|
+
puts "Erasing pidfile..."
|
98
|
+
`rm #{TYRANT_PID_FILE}`
|
99
|
+
end
|
100
|
+
tyrant_pid
|
101
|
+
end
|
102
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.3
|
data/bin/geonames
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
:br:
|
2
|
+
:pt_br: Brasil
|
3
|
+
:en_us: Brazil
|
4
|
+
:us:
|
5
|
+
:pt_br: Estados Unidos
|
6
|
+
:en_us: United States
|
7
|
+
:cl:
|
8
|
+
:pt_br: Chile
|
9
|
+
:en_us: Chile
|
10
|
+
:ar:
|
11
|
+
:pt_br: Argentina
|
12
|
+
:en_us: Argentina
|
13
|
+
:ug:
|
14
|
+
:pt_br: Uganda
|
15
|
+
:en_us: Uganda
|
16
|
+
:td:
|
17
|
+
:pt_br: Chad
|
18
|
+
:en_us: Chad
|
19
|
+
:ru:
|
20
|
+
:pt_br: Rússia
|
21
|
+
:en_us: Russia
|
22
|
+
:it:
|
23
|
+
:pt_br: Itália
|
24
|
+
:en_us: Italy
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Geonames
|
2
|
+
class Geoquery
|
3
|
+
R = 1
|
4
|
+
|
5
|
+
def self.point(tdb, x, y)
|
6
|
+
qry = TDBQRY::new(tdb)
|
7
|
+
qry.addcond("x", TDBQRY::QCNUMGE, minx.to_s())
|
8
|
+
qry.addcond("x", TDBQRY::QCNUMLE, maxx.to_s())
|
9
|
+
qry.addcond("y", TDBQRY::QCNUMGE, miny.to_s())
|
10
|
+
qry.addcond("y", TDBQRY::QCNUMLE, maxy.to_s())
|
11
|
+
qry.setorder("x", TDBQRY::QONUMASC)
|
12
|
+
qry.setlimit(80)
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def self.area(tdb, minx, maxx, miny, maxy)
|
17
|
+
qry = TDBQRY::new(tdb)
|
18
|
+
qry.addcond("x", TDBQRY::QCNUMGE, minx.to_s())
|
19
|
+
qry.addcond("x", TDBQRY::QCNUMLE, maxx.to_s())
|
20
|
+
qry.addcond("y", TDBQRY::QCNUMGE, miny.to_s())
|
21
|
+
qry.addcond("y", TDBQRY::QCNUMLE, maxy.to_s())
|
22
|
+
qry.setorder("x", TDBQRY::QONUMASC)
|
23
|
+
|
24
|
+
res = qry.search
|
25
|
+
info res.length # number of results found
|
26
|
+
return res
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require "pg"
|
2
|
+
|
3
|
+
module Geonames
|
4
|
+
class Postgres
|
5
|
+
Countries = {}
|
6
|
+
Provinces = {}
|
7
|
+
|
8
|
+
def initialize(opts) #table, addr = "localhost", port = 5432)
|
9
|
+
@conn = PGconn.new(opts)
|
10
|
+
end
|
11
|
+
|
12
|
+
#
|
13
|
+
# Get Country and Province ID from the DB
|
14
|
+
def get_some_ids(some)
|
15
|
+
c = Countries[some.country] ||=
|
16
|
+
@conn.exec("SELECT countries.id FROM countries WHERE UPPER(countries.abbr) = UPPER('#{some.country}')")[0]["id"] rescue nil
|
17
|
+
c ||= write("countries", {:name => Codes[some.country.downcase.to_sym][:pt_br], :abbr => some.country })
|
18
|
+
|
19
|
+
p = Provinces[some.province] ||= find("provinces", Cache[:provinces].
|
20
|
+
find{ |p| p.province == some.province}.gid)
|
21
|
+
[c, p]
|
22
|
+
end
|
23
|
+
|
24
|
+
#
|
25
|
+
# Insert a record
|
26
|
+
def insert(some)
|
27
|
+
country_id, province_id = get_some_ids(some)
|
28
|
+
if some.kind == :cities
|
29
|
+
write("cities", {:name => some.name, :country_id => country_id,
|
30
|
+
:geom => some.geom.as_hex_ewkb, :gid => some.gid,
|
31
|
+
:zip => some.zip, :province_id => province_id})
|
32
|
+
else
|
33
|
+
write("provinces", { :name => some.name, :abbr => some.abbr,
|
34
|
+
:country_id => country_id, :gid => some.gid })
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
#
|
39
|
+
# Find a record`s ID
|
40
|
+
def find(kind, id)
|
41
|
+
@conn.exec("SELECT #{kind}.id FROM #{kind} WHERE #{kind}.gid = #{id}")[0]["id"] rescue nil
|
42
|
+
end
|
43
|
+
|
44
|
+
#
|
45
|
+
# F'oo -> F''oo (for pg)
|
46
|
+
def escape_name(name)
|
47
|
+
name.gsub("'", "''")
|
48
|
+
end
|
49
|
+
|
50
|
+
#
|
51
|
+
# Sanitize values por pg.. here until my lazyness open pg rdoc...
|
52
|
+
def pg_values(arr)
|
53
|
+
arr.map do |v|
|
54
|
+
case v
|
55
|
+
when String then "E'#{escape_name(v)}'"
|
56
|
+
when NilClass then 'NULL'
|
57
|
+
else v
|
58
|
+
end
|
59
|
+
end.join(",")
|
60
|
+
end
|
61
|
+
|
62
|
+
#
|
63
|
+
# Naive PG insert ORM =D
|
64
|
+
def write(table, hsh)
|
65
|
+
for_pg = pg_values(hsh.values)
|
66
|
+
@conn.exec("INSERT INTO #{table} (#{hsh.keys.join(",")}) VALUES(#{for_pg}) RETURNING id")[0]["id"]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/lib/data/tokyo.rb
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
|
2
|
+
module Geonames
|
3
|
+
class Tokyo
|
4
|
+
|
5
|
+
def initialize(conn=nil, resource=nil, extra=nil)
|
6
|
+
if conn
|
7
|
+
require 'tokyotyrant'
|
8
|
+
resource ||= 'localhost'
|
9
|
+
extra ||= 1978
|
10
|
+
@tbl = TokyoTyrant::RDBTBL
|
11
|
+
@qry = TokyoTyrant::RDBQRY
|
12
|
+
else
|
13
|
+
require 'tokyocabinet'
|
14
|
+
resource ||= 'geonames.tct'
|
15
|
+
extra ||= (TokyoCabinet::TDB::OWRITER | TokyoCabinet::TDB::OCREAT)
|
16
|
+
@tbl = TokyoCabinet::TDB
|
17
|
+
@qry = TokyoCabinet::TDBQRY
|
18
|
+
end
|
19
|
+
@rdb = @tbl.new
|
20
|
+
@rdb.open(resource, extra)
|
21
|
+
set_indexes
|
22
|
+
end
|
23
|
+
|
24
|
+
def all(params)
|
25
|
+
qry = @qry.new(@rdb)
|
26
|
+
params.each do |k,v|
|
27
|
+
#qry.addcond(k.to_s, Q::QCNUMEQ, v.to_s)
|
28
|
+
qry.addcond(k.to_s, @qry::QCSTREQ, v.to_s)
|
29
|
+
end
|
30
|
+
qry.setorder("name", @qry::QOSTRASC)
|
31
|
+
qry.search.map { |id| @rdb.get(id) }
|
32
|
+
end
|
33
|
+
|
34
|
+
def find(id)
|
35
|
+
#qry = Q.new(@rdb)
|
36
|
+
#qry.addcond("gid", Q::QCNUMEQ, id.to_s)
|
37
|
+
#qry.setlimit(10)
|
38
|
+
#id = qry.search.pop
|
39
|
+
@rdb.get(id)
|
40
|
+
end
|
41
|
+
|
42
|
+
# def to_obj(hsh)
|
43
|
+
# hsh["kind"] == "country" ? Country.new(hsh) : Spot.new(hsh)
|
44
|
+
# end
|
45
|
+
|
46
|
+
def write(o)
|
47
|
+
# pkey = @rdb.genuid
|
48
|
+
if @rdb.put(o.gid, o.to_hash)
|
49
|
+
# info "ok"
|
50
|
+
else
|
51
|
+
info "err #{@rdb.errmsg(@rdb.ecode)}"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def count
|
56
|
+
@qry.new(@rdb).search.length
|
57
|
+
end
|
58
|
+
|
59
|
+
def close
|
60
|
+
# close the database
|
61
|
+
if !@rdb.close
|
62
|
+
STDERR.printf("close error: %s\n", @rdb.errmsg(@rdb.ecode))
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def set_indexes
|
67
|
+
#for index in indexes
|
68
|
+
# @rdb.setindex("gid", @tbl::ITOPT)
|
69
|
+
@rdb.setindex("kind", @tbl::ITLEXICAL)
|
70
|
+
@rdb.setindex("name", @tbl::ITQGRAM)
|
71
|
+
@rdb.setindex("country", @tbl::ITLEXICAL)
|
72
|
+
|
73
|
+
#end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
def flush
|
78
|
+
@rdb.vanish
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Geonames
|
2
|
+
class Country
|
3
|
+
attr_accessor :code, :name, :gid, :iso, :capital, :pop
|
4
|
+
|
5
|
+
def self.all
|
6
|
+
Tokyo.new.all({ :kind => "country" }).map do |c|
|
7
|
+
new(c)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
# [0] iso alpha2
|
12
|
+
# [1] iso alpha3
|
13
|
+
# [2] iso numeric
|
14
|
+
# [3] fips code
|
15
|
+
# [4] name
|
16
|
+
# [5] capital
|
17
|
+
# [6] areaInSqKm
|
18
|
+
# [7] population
|
19
|
+
# [8] continent
|
20
|
+
# [9] top level domain
|
21
|
+
# [10] Currency code
|
22
|
+
# [11] Currency name
|
23
|
+
# [12] Phone
|
24
|
+
# [13] Postal Code Format
|
25
|
+
# [14] Postal Code Regex
|
26
|
+
# [15] Languages
|
27
|
+
# [16] Geoname id
|
28
|
+
# [17] Neighbours
|
29
|
+
# [18] Equivalent Fips Code
|
30
|
+
#
|
31
|
+
def self.parse(row)
|
32
|
+
new(row)
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize(params)
|
36
|
+
parse(params)
|
37
|
+
end
|
38
|
+
|
39
|
+
def parse
|
40
|
+
@iso, @iso3, @ison, @fips, @name, @capital, @area, @pop, continent, tld,
|
41
|
+
currency, phone, postal, langs, gid, neighbours = row.split(/\t/)
|
42
|
+
@code = iso
|
43
|
+
end
|
44
|
+
|
45
|
+
def cities
|
46
|
+
# qry.addcond("country", TBDQRY::QSTREQ, @code)
|
47
|
+
end
|
48
|
+
|
49
|
+
def to_hash
|
50
|
+
{ "gid" => @gid.to_s, "name" => @name, "kind" => "country", "code" => @code}
|
51
|
+
end
|
52
|
+
|
53
|
+
def export
|
54
|
+
[@gid, @code, @name]
|
55
|
+
end
|
56
|
+
|
57
|
+
def export_header
|
58
|
+
["gid", "code", "name"]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Geonames
|
2
|
+
class Province
|
3
|
+
attr_accessor :code, :name, :gid
|
4
|
+
|
5
|
+
def self.all
|
6
|
+
Tokyo.new.all({ :kind => "province" }).map do |c|
|
7
|
+
new(c)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(params)
|
12
|
+
@code = params["code"]
|
13
|
+
@name = params["name"]
|
14
|
+
@gid = params["gid"]
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module Geonames
|
2
|
+
class Spot
|
3
|
+
attr_accessor :gid, :name, :ascii, :lat, :lon, :country, :kind,
|
4
|
+
:code, :pop, :tz, :geom, :province, :zip, :abbr
|
5
|
+
alias :x :lon
|
6
|
+
alias :y :lat
|
7
|
+
alias :geoname_id :gid
|
8
|
+
|
9
|
+
#
|
10
|
+
# = Geonames Spot
|
11
|
+
#
|
12
|
+
# Every geoname type will be parsed as a spot
|
13
|
+
def initialize(params, k)
|
14
|
+
return unless params.instance_of? String
|
15
|
+
k == :zip ? parse_zip(params) : parse(params)
|
16
|
+
if @kind == :provinces
|
17
|
+
@name.gsub!(/Estado d\w\s/, "")
|
18
|
+
@abbr = get_abbr
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Geonames donest have province/state abbr..#fail!
|
23
|
+
# This works 75% of the time in brazil heh
|
24
|
+
def get_abbr
|
25
|
+
s = @name.split(" ")
|
26
|
+
if s.length > 1
|
27
|
+
[s[0][0].chr,s[-1][0].chr].map(&:upcase).join
|
28
|
+
else
|
29
|
+
s[0][0..1].upcase
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
#
|
34
|
+
# Parse Geonames Dump Export
|
35
|
+
def parse(row)
|
36
|
+
gid, @name, @ascii, @alternates, lat, lon, feat, kind,
|
37
|
+
@country, cc2, adm1, adm2, adm3, adm4, pop, ele,
|
38
|
+
gtop, @tz, @up = row.split(/\t/)
|
39
|
+
|
40
|
+
parse_geom(lat, lon)
|
41
|
+
@gid = @geoname_id = gid.to_i
|
42
|
+
@kind = human_code(kind)
|
43
|
+
@province = adm1
|
44
|
+
@code = adm2
|
45
|
+
end
|
46
|
+
|
47
|
+
#
|
48
|
+
# Parse Geonames Zip Export
|
49
|
+
def parse_zip(row)
|
50
|
+
country, zip, @name, province, cc, dunno, adm1, adm2, lat, lon = row.split(/\t/)
|
51
|
+
parse_geom(lat, lon)
|
52
|
+
@code = adm1
|
53
|
+
@kind = :cities
|
54
|
+
@zip = zip.split("-")[0]
|
55
|
+
end
|
56
|
+
|
57
|
+
#
|
58
|
+
# Parse Geom to float or GeoRuby Point
|
59
|
+
def parse_geom(lat, lon)
|
60
|
+
@lat, @lon = lat.to_f, lon.to_f
|
61
|
+
|
62
|
+
if defined?("GeoRuby")
|
63
|
+
@geom = GeoRuby::SimpleFeatures::Point.from_x_y(@lon, @lat)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
#
|
68
|
+
# Parse Time
|
69
|
+
def updated_at
|
70
|
+
Time.utc(*@up.split("-"))
|
71
|
+
end
|
72
|
+
|
73
|
+
# For tokyo
|
74
|
+
def to_hash
|
75
|
+
{ "gid" => @geoname_id.to_s, "kind" => @kind.to_s, "name" => @name, "ascii" => @ascii,
|
76
|
+
"lat" => @lat.to_s, "lon" => @lon.to_s, "tz" => @tz, "country" => @country }
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
def human_code(code)
|
81
|
+
case code
|
82
|
+
when 'ADM1' then :provinces
|
83
|
+
when 'ADM2' then :cities
|
84
|
+
else :other
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
data/lib/geonames_local.rb
CHANGED
@@ -1,6 +1,23 @@
|
|
1
1
|
require 'features/spot'
|
2
2
|
require 'features/country'
|
3
3
|
require 'features/city'
|
4
|
+
require 'rubygems'
|
5
|
+
require 'logger'
|
6
|
+
require 'data/tokyo'
|
7
|
+
require 'data/postgres'
|
8
|
+
require 'work/cli'
|
9
|
+
require 'work/dump'
|
10
|
+
require 'work/export'
|
4
11
|
|
5
12
|
module Geonames
|
13
|
+
Opt = {}
|
14
|
+
Cache = {:dump => [], :zip => []}
|
15
|
+
Codes = YAML.load(File.read(File.join(File.dirname(__FILE__),'config', 'codes.yml')))
|
16
|
+
VERSION = File.read(File.join(File.dirname(__FILE__), '..', 'VERSION'))
|
17
|
+
|
18
|
+
def info(txt)
|
19
|
+
if Opt[:verbose]
|
20
|
+
puts(txt)
|
21
|
+
end
|
22
|
+
end
|
6
23
|
end
|
data/lib/work/cli.rb
ADDED
@@ -0,0 +1,144 @@
|
|
1
|
+
#
|
2
|
+
# Geonames Local
|
3
|
+
#
|
4
|
+
require 'optparse'
|
5
|
+
module Geonames
|
6
|
+
class CLI
|
7
|
+
|
8
|
+
def self.parse_options(argv)
|
9
|
+
options = {}
|
10
|
+
|
11
|
+
argv.options do |opts|
|
12
|
+
opts.banner = <<BANNER
|
13
|
+
Geonames Command Line Usage:
|
14
|
+
|
15
|
+
geonames <country code(s)> <opts>
|
16
|
+
|
17
|
+
BANNER
|
18
|
+
opts.on("-l", "--level LEVEL", String, "The level of logging to report" ) { |level| options[:level] = level }
|
19
|
+
opts.on("-d", "--dump", "Dump DB before all" ) { options[:dump] = true }
|
20
|
+
opts.separator ""
|
21
|
+
opts.separator "Config file:"
|
22
|
+
opts.on("-c", "--config CONFIG", String, "Geonames Config file path" ) { |file| options[:config] = file }
|
23
|
+
opts.separator ""
|
24
|
+
opts.separator "Tyrant Options:"
|
25
|
+
opts.on("-t", "--tyrant", "Use tyrant" ) { options[:tyrant] = true }
|
26
|
+
opts.on("-s", "--server SERVER", String, "Tyrant Server URL" ) { |url| options[:server] = url }
|
27
|
+
opts.on("-p", "--port PORT", Integer, "Tyrant Server Port") { |val| options[:port] = val.to_i }
|
28
|
+
opts.separator ""
|
29
|
+
opts.separator "Common Options:"
|
30
|
+
opts.on("-h", "--help", "Show this message" ) { puts opts; exit }
|
31
|
+
opts.on("-v", "--verbose", "Turn on logging to STDOUT" ) { |bool| options[:verbose] = bool }
|
32
|
+
opts.on("-V", "--version", "Show version") { puts Geonames::VERSION; exit }
|
33
|
+
opts.separator ""
|
34
|
+
begin
|
35
|
+
opts.parse!
|
36
|
+
if argv.empty? && !options[:config]
|
37
|
+
puts opts
|
38
|
+
exit
|
39
|
+
end
|
40
|
+
rescue
|
41
|
+
puts opts
|
42
|
+
exit
|
43
|
+
end
|
44
|
+
end
|
45
|
+
options
|
46
|
+
end
|
47
|
+
private_class_method :parse_options
|
48
|
+
|
49
|
+
def self.work(argv)
|
50
|
+
trap(:INT) { stop! }
|
51
|
+
trap(:TERM) { stop! }
|
52
|
+
Opt.merge! parse_options(argv)
|
53
|
+
|
54
|
+
if Opt[:config]
|
55
|
+
Opt.merge! YAML.load(File.read(Opt[:config]))
|
56
|
+
end
|
57
|
+
|
58
|
+
if argv[0] =~ /list|codes/
|
59
|
+
Codes.each do |key,val|
|
60
|
+
str = [val.values, key.to_s].join(" ").downcase
|
61
|
+
if s = argv[1]
|
62
|
+
next unless str =~ /#{s.downcase}/
|
63
|
+
end
|
64
|
+
puts "#{val[:en_us]}: #{key}"
|
65
|
+
end
|
66
|
+
exit
|
67
|
+
end
|
68
|
+
|
69
|
+
if argv[0] =~ /scaff|conf/
|
70
|
+
fname = (argv[1] || "geonames") + ".yml"
|
71
|
+
if File.exist?(fname)
|
72
|
+
puts "File exists."
|
73
|
+
else
|
74
|
+
puts "Writing to #{fname}"
|
75
|
+
`cp #{File.join(File.dirname(__FILE__), '..', 'config', 'geonames.yml')} #{fname}`
|
76
|
+
end
|
77
|
+
exit
|
78
|
+
end
|
79
|
+
require "geo_ruby" if Opt[:mapping] && Opt[:mapping][:geom]
|
80
|
+
|
81
|
+
if argv[0] =~ /csv|json/
|
82
|
+
Geonames::Export.new(Country.all).to_csv
|
83
|
+
else
|
84
|
+
Geonames::Dump.work(Opt[:codes], :zip) #rescue puts "Command not found: #{comm} #{@usage}"
|
85
|
+
Geonames::Dump.work(Opt[:codes], :dump) #rescue puts "Command not found: #{comm} #{@usage}"
|
86
|
+
info "\n---\nTotal #{Cache[:dump].length} parsed. #{Cache[:zip].length} zips."
|
87
|
+
info "Join dump << zip"
|
88
|
+
unify!
|
89
|
+
write_to_store!
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.write_to_store!
|
94
|
+
db = case Opt[:store].to_sym
|
95
|
+
when :tyrant then Geonames::Tokyo.new(Opt[:tyrant])
|
96
|
+
when :pg then Geonames::Postgres.new(Opt[:db])
|
97
|
+
else
|
98
|
+
info "No store defined!"
|
99
|
+
exit
|
100
|
+
end
|
101
|
+
|
102
|
+
groups = Cache[:dump].group_by(&:kind)
|
103
|
+
Cache[:provinces] = groups[:provinces]
|
104
|
+
# ensure this order....
|
105
|
+
do_write(db, groups[:provinces])
|
106
|
+
do_write(db, groups[:cities])
|
107
|
+
end
|
108
|
+
|
109
|
+
def self.do_write(db, val)
|
110
|
+
key = val[0].kind
|
111
|
+
start = Time.now
|
112
|
+
writt = 0
|
113
|
+
info "\nWriting #{key}..."
|
114
|
+
val.each do |v|
|
115
|
+
unless db.find v.kind, v.gid
|
116
|
+
db.insert v
|
117
|
+
writt += 1
|
118
|
+
end
|
119
|
+
end
|
120
|
+
total = Time.now - start
|
121
|
+
info "#{writt} #{key} written in #{total} sec (#{(writt/total).to_i}/s)"
|
122
|
+
end
|
123
|
+
|
124
|
+
def self.unify!
|
125
|
+
start = Time.now
|
126
|
+
Cache[:dump].map! do |spot|
|
127
|
+
if other = Cache[:zip].find { |d| d.code == spot.code }
|
128
|
+
spot.zip = other.zip
|
129
|
+
spot
|
130
|
+
else
|
131
|
+
spot
|
132
|
+
end
|
133
|
+
end
|
134
|
+
info "Done. #{(Time.now-start).to_i}s"
|
135
|
+
end
|
136
|
+
|
137
|
+
def self.stop!
|
138
|
+
puts "Closing Geonames..."
|
139
|
+
exit
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
data/lib/work/dump.rb
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
module Geonames
|
2
|
+
class Dump
|
3
|
+
URL = "http://download.geonames.org/export/"
|
4
|
+
TMP = "/tmp/geonames/"
|
5
|
+
|
6
|
+
def initialize(codes, kind)
|
7
|
+
@codes = codes
|
8
|
+
@kind = kind
|
9
|
+
if codes.respond_to? :each
|
10
|
+
for code in codes
|
11
|
+
info "\nWorking on #{kind} for #{code}"
|
12
|
+
file = get_file(code)
|
13
|
+
download file
|
14
|
+
uncompress file unless code == "country"
|
15
|
+
parse file
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_file(code)
|
22
|
+
code == "country" ? "countryInfo.txt" : "#{code.upcase}.zip"
|
23
|
+
end
|
24
|
+
|
25
|
+
def download(file)
|
26
|
+
Dir.mkdir(TMP) unless File.exists?(TMP)
|
27
|
+
Dir.mkdir(TMP + @kind.to_s) unless File.exists?(TMP + @kind.to_s)
|
28
|
+
fname = TMP + "#{@kind}/#{file}"
|
29
|
+
return if File.exists?(fname)
|
30
|
+
`curl #{URL}/#{@kind}/#{file} -o #{fname}`
|
31
|
+
end
|
32
|
+
|
33
|
+
def uncompress(file)
|
34
|
+
info "Uncompressing #{file}"
|
35
|
+
`unzip -quo /tmp/geonames/#{@kind}/#{file} -d /tmp/geonames/#{@kind}`
|
36
|
+
end
|
37
|
+
|
38
|
+
def parse_line(l)
|
39
|
+
return if l =~ /^#|^iso/i
|
40
|
+
if @kind == :dump
|
41
|
+
if l =~ /^\D/
|
42
|
+
Country.parse(l)
|
43
|
+
else
|
44
|
+
if Opt[:level] != "all"
|
45
|
+
return unless l =~ /ADM\d/ # ADM2 => cities
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
Spot.new(l, @kind)
|
50
|
+
end
|
51
|
+
|
52
|
+
def parse(file)
|
53
|
+
red = 0
|
54
|
+
start = Time.now
|
55
|
+
File.open("/tmp/geonames/#{@kind}/#{file.gsub("zip", "txt")}") do |f|
|
56
|
+
while line = f.gets
|
57
|
+
if record = parse_line(line)
|
58
|
+
Cache[@kind] << record
|
59
|
+
red += 1
|
60
|
+
end
|
61
|
+
end
|
62
|
+
total = Time.now - start
|
63
|
+
info "#{red} #{@kind} entries parsed in #{total} sec (#{(red/total).to_i}/s)"
|
64
|
+
end
|
65
|
+
rescue Errno::ENOENT => e
|
66
|
+
info "Failed to download #{file}, skipping."
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
def self.work(codes=:all, kind=:dump)
|
71
|
+
new(codes, kind)
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
end
|
76
|
+
end
|
data/lib/work/export.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module Geonames
|
4
|
+
class Export
|
5
|
+
|
6
|
+
def initialize(data)
|
7
|
+
info "Starting export.."
|
8
|
+
@data = data
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_csv
|
12
|
+
file = "export.csv"
|
13
|
+
info "Writing #{file} (#{@data.length} objects)"
|
14
|
+
CSV.open("export.csv", 'w') do |csv|
|
15
|
+
csv << @data[0].export_header
|
16
|
+
@data.each { |o| csv << o.export }
|
17
|
+
end
|
18
|
+
info "Export done."
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
|
3
|
+
describe "Tokyo" do
|
4
|
+
|
5
|
+
it "should write down a spot" do
|
6
|
+
t = Geonames::Tokyo.new('localhost', 1978)
|
7
|
+
m = mock(Geonames::Spot, { :gid => 888, :to_hash => { "gid" => 888, "kind" => "city", "lat" => 5.5 }})
|
8
|
+
t.write(m)
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should read it up" do
|
12
|
+
t = Geonames::Tokyo.new('localhost', 1978)
|
13
|
+
record = t.find(888)
|
14
|
+
record.should be_instance_of Geonames::Spot
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should not duplicate" do
|
18
|
+
t = Geonames::Tokyo.new('localhost', 1978)
|
19
|
+
t.all({ :gid => 888}).length.should eql(1)
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should return all countries" do
|
23
|
+
all = Geonames::Country.all
|
24
|
+
all.should be_instance_of Array
|
25
|
+
all[0].should be_instance_of Geonames::Country
|
26
|
+
all[0].gid.should eql(1)
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
data/spec/geonames_local_spec.rb
CHANGED
@@ -5,7 +5,7 @@ describe "GeonamesLocal" do
|
|
5
5
|
|
6
6
|
describe "Parsing" do
|
7
7
|
before do
|
8
|
-
@spot = Geonames::Spot.new("6319037\tMaxaranguape\tMaxaranguape\t-5.46874226086957\t-35.3565714695652\tA\tADM2\tBR\t22\t2407500\t6593\t12\tAmerica/Recife\t2006-12-17")
|
8
|
+
@spot = Geonames::Spot.new("6319037\tMaxaranguape\tMaxaranguape\t\t-5.46874226086957\t-35.3565714695652\tA\tADM2\tBR\t22\t2407500\t6593\t\t12\t\t\t\tAmerica/Recife\t2006-12-17")
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should parse geoid integer" do
|
@@ -41,6 +41,42 @@ describe "GeonamesLocal" do
|
|
41
41
|
end
|
42
42
|
end
|
43
43
|
|
44
|
+
describe "More Parseing" do
|
45
|
+
before do
|
46
|
+
@spot = Geonames::Spot.new("3384862\tRiacho Zuza\tRiacho Zuza\t\t-9.4333333\t-37.6666667\tH\tSTMI\tBR\t\t02\t\t\t\t0\t\t241\tAmerica/Maceio\t1993-12-17\n")
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should parse geoid integer" do
|
50
|
+
@spot.geoname_id.should eql(3384862)
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should parse name" do
|
54
|
+
@spot.name.should eql("Riacho Zuza")
|
55
|
+
@spot.ascii.should eql("Riacho Zuza")
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should parse geostuff" do
|
59
|
+
@spot.lat.should be_close(-9.4333333, 0.001)
|
60
|
+
@spot.lon.should be_close(-37.6666667, 0.001)
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should parse spot kind" do
|
64
|
+
@spot.kind.should eql(:other)
|
65
|
+
end
|
66
|
+
|
67
|
+
it "should parse spot country" do
|
68
|
+
@spot.country.should eql("BR")
|
69
|
+
end
|
70
|
+
|
71
|
+
it "shuold parse timezone" do
|
72
|
+
@spot.tz.should eql("America/Maceio")
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should parse updated_at" do
|
76
|
+
@spot.updated_at.should be_instance_of(Time)
|
77
|
+
@spot.updated_at.day.should eql(17)
|
78
|
+
end
|
79
|
+
end
|
44
80
|
end
|
45
81
|
|
46
82
|
# 6319037 Maxaranguape Maxaranguape -5.46874226086957 -35.3565714695652 A ADM2 BR 22 2407500 6593 12 America/Recife 2006-12-17
|
data/task/benchmark.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# DB => br, cl ~ 6k objects
|
4
|
+
#
|
5
|
+
require 'benchmark'
|
6
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
7
|
+
require 'geonames_local'
|
8
|
+
|
9
|
+
# @db.flush
|
10
|
+
|
11
|
+
def b1(db)
|
12
|
+
puts "#{db.count} Objects"
|
13
|
+
Benchmark.bmbm do |b|
|
14
|
+
b.report("All Country") { db.all({ :kind => "country" })}
|
15
|
+
b.report("Find by GID") { db.find(888) }
|
16
|
+
b.report("Find by name") { db.all({ :name => "Maxaranguape"}) }
|
17
|
+
b.report("Find on country") { db.all({ :country => "CL"}) }
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
print "Tyrant => "
|
22
|
+
b1(Geonames::Tokyo.new(:tyrant))
|
23
|
+
|
24
|
+
|
25
|
+
print "Cabinet => "
|
26
|
+
b1(Geonames::Tokyo.new)
|
27
|
+
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# DB => br, cl ~ 6k objects
|
4
|
+
#
|
5
|
+
require 'benchmark'
|
6
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
7
|
+
require 'geonames_local'
|
8
|
+
include Geonames
|
9
|
+
# @db.flush
|
10
|
+
|
11
|
+
|
12
|
+
def b1(db)
|
13
|
+
puts "#{db.count} Objects"
|
14
|
+
Benchmark.bmbm do |b|
|
15
|
+
b.report("All Country") { Country.all }
|
16
|
+
b.report("Find by GID") { db.find(888) }
|
17
|
+
b.report("Find by name") { db.all({ :name => "Maxaranguape"}) }
|
18
|
+
b.report("Find on country") { db.all({ :country => "CL"}) }
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
print "Cabinet => "
|
23
|
+
b1(Tokyo.new)
|
24
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: geonames_local
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Marcos Piccinini
|
@@ -9,8 +9,8 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
13
|
-
default_executable:
|
12
|
+
date: 2010-01-28 00:00:00 -02:00
|
13
|
+
default_executable: geonames
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rspec
|
@@ -22,10 +22,20 @@ dependencies:
|
|
22
22
|
- !ruby/object:Gem::Version
|
23
23
|
version: 1.2.9
|
24
24
|
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: tokyotyrant
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "1.10"
|
34
|
+
version:
|
25
35
|
description: Dump and feed a tokyo cabinet for local geonames search
|
26
36
|
email: x@nofxx.com
|
27
37
|
executables:
|
28
|
-
-
|
38
|
+
- geonames
|
29
39
|
extensions: []
|
30
40
|
|
31
41
|
extra_rdoc_files:
|
@@ -37,10 +47,28 @@ files:
|
|
37
47
|
- LICENSE
|
38
48
|
- README.rdoc
|
39
49
|
- Rakefile
|
50
|
+
- VERSION
|
51
|
+
- bin/geonames
|
52
|
+
- lib/config/codes.yml
|
53
|
+
- lib/config/geonames.yml
|
54
|
+
- lib/data/geoquery.rb
|
55
|
+
- lib/data/postgres.rb
|
56
|
+
- lib/data/tokyo.rb
|
57
|
+
- lib/features/city.rb
|
58
|
+
- lib/features/country.rb
|
59
|
+
- lib/features/province.rb
|
60
|
+
- lib/features/road.rb
|
61
|
+
- lib/features/spot.rb
|
40
62
|
- lib/geonames_local.rb
|
63
|
+
- lib/work/cli.rb
|
64
|
+
- lib/work/dump.rb
|
65
|
+
- lib/work/export.rb
|
66
|
+
- spec/data/tokyo_spec.rb
|
41
67
|
- spec/geonames_local_spec.rb
|
42
68
|
- spec/spec.opts
|
43
69
|
- spec/spec_helper.rb
|
70
|
+
- task/benchmark.rb
|
71
|
+
- task/benchmark_cabinet.rb
|
44
72
|
has_rdoc: true
|
45
73
|
homepage: http://github.com/nofxx/geonames_local
|
46
74
|
licenses: []
|
@@ -70,5 +98,6 @@ signing_key:
|
|
70
98
|
specification_version: 3
|
71
99
|
summary: Dump and feed a tokyo local geonames db
|
72
100
|
test_files:
|
73
|
-
- spec/
|
101
|
+
- spec/data/tokyo_spec.rb
|
74
102
|
- spec/geonames_local_spec.rb
|
103
|
+
- spec/spec_helper.rb
|
data/bin/geoname
DELETED
File without changes
|