geonames_local 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -9,6 +9,7 @@ tmtags
9
9
  *~
10
10
  \#*
11
11
  .\#*
12
+ *flymake.rb
12
13
 
13
14
  ## VIM
14
15
  *.swp
@@ -19,3 +20,8 @@ rdoc
19
20
  pkg
20
21
 
21
22
  ## PROJECT::SPECIFIC
23
+ *.tct
24
+ *.pid
25
+ *.log
26
+ *.qgr
27
+ *.lex
data/README.rdoc CHANGED
@@ -1,9 +1,82 @@
1
- = geonames_local
1
+ = Geonames Local
2
+
3
+ Download and store in tokyo or postgresql Geonames.org data.
4
+ Making every Geoname API operation possible on your servers.
5
+ No hit limit, fast as possible.
6
+
7
+ == Features/Problems
8
+
9
+ - Local relational mapping of geonames data
10
+ - Map geonames fields to your db scheme UNDONE
11
+ - Countries are a static yml file :/
12
+
13
+ == Usage
14
+
15
+ geonames conf
16
+
17
+ Will generate a "geonames.yml" file on your folder.
18
+ The file is self explanatory.
19
+
20
+ geonames -c geonames.yml
21
+
22
+ Work. Use -v for verbose.
23
+
24
+ If you are not sure your country code, use:
25
+
26
+ geonames list <search>
27
+
28
+
29
+ == Relational Mapping
30
+
31
+ When using PG, this gem will (try) to relational map Geonames
32
+ data on your scheme. Postgresql done, tokyo still need heavy work.
33
+
34
+
35
+ == PostgreSQL
36
+
37
+ So, supposing ActiveRecord, something like this is possible:
38
+
39
+ City.first.province.country.abbr
40
+ => "BR"
41
+
42
+ === Migration
43
+
44
+ Default PG migration:
45
+
46
+ create_table :cities do |t|
47
+ t.references :country, :null => false
48
+ t.references :province
49
+ t.string :name, :null => false
50
+ t.point :geom, :srid => 4326
51
+ t.integer :gid, :zip
52
+ end
53
+
54
+ create_table :provinces do |t|
55
+ t.references :country, :null => false
56
+ t.string :name, :null => false
57
+ t.string :abbr, :limit => 2, :null => false
58
+ t.integer :gid
59
+ end
60
+
61
+ create_table :countries do |t|
62
+ t.string :name, :limit => 30, :null => false
63
+ t.string :abbr, :limit => 2, :null => false
64
+ end
65
+
66
+ add_index :cities, :name
67
+ add_index :cities, :gid
68
+ add_index :cities, :zip
69
+ add_index :cities, :country_id
70
+ add_index :cities, :province_id
71
+ add_index :cities, :geom, :spatial => true
72
+ add_index :provinces, :name
73
+ add_index :provinces, :abbrg
74
+ add_index :provinces, :gid
75
+ add_index :provinces, :country_id
2
76
 
3
- Description goes here.
4
77
 
5
78
  == Note on Patches/Pull Requests
6
-
79
+
7
80
  * Fork the project.
8
81
  * Make your feature addition or bug fix.
9
82
  * Add tests for it. This is important so I don't break it in a
data/Rakefile CHANGED
@@ -11,6 +11,7 @@ begin
11
11
  gem.homepage = "http://github.com/nofxx/geonames_local"
12
12
  gem.authors = ["Marcos Piccinini"]
13
13
  gem.add_development_dependency "rspec", ">= 1.2.9"
14
+ gem.add_dependency "tokyotyrant", ">= 1.10"
14
15
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
16
  end
16
17
  Jeweler::GemcutterTasks.new
@@ -43,3 +44,59 @@ Rake::RDocTask.new do |rdoc|
43
44
  rdoc.rdoc_files.include('README*')
44
45
  rdoc.rdoc_files.include('lib/**/*.rb')
45
46
  end
47
+
48
+ #
49
+ # Tokyo Tyrant rake tasks
50
+ #
51
+ namespace :tyrant do
52
+ TYRANT_DB_FILE = File.join("tyrant.tct")
53
+ TYRANT_PID_FILE = File.join("tyrant.pid")
54
+ TYRANT_LOG_FILE = File.join("tyrant.log")
55
+
56
+ desc "Start Tyrant server"
57
+ task :start do
58
+ raise RuntimeError, "Tyrant is already running." if tyrant_running?
59
+ system "ttserver -pid #{TYRANT_PID_FILE} -log #{TYRANT_LOG_FILE} #{TYRANT_DB_FILE}&"
60
+ sleep(2)
61
+ if tyrant_running?
62
+ puts "Tyrant started successfully (pid #{tyrant_pid})."
63
+ else
64
+ puts "Failed to start tyrant push server. Check logs."
65
+ end
66
+ end
67
+
68
+ desc "Stop Tyrant server"
69
+ task :stop do
70
+ raise RuntimeError, "Tyrant isn't running." unless tyrant_running?
71
+ system "kill #{tyrant_pid}"
72
+ sleep(2)
73
+ if tyrant_running?
74
+ puts "Tyrant didn't stopped. Check the logs."
75
+ else
76
+ puts "Tyrant stopped."
77
+ end
78
+ end
79
+
80
+ desc "Restart Tyrant server"
81
+ task :restart => [:stop, :start]
82
+
83
+ desc "Get Tyrant Server Status"
84
+ task :status do
85
+ puts tyrant_running? ? "Tyrant running. (#{tyrant_pid})" : "Tyrant not running."
86
+ end
87
+ end
88
+
89
+ def tyrant_pid
90
+ `cat #{TYRANT_PID_FILE}`.to_i
91
+ end
92
+
93
+ def tyrant_running?
94
+ return false unless File.exist?(TYRANT_PID_FILE)
95
+ process_check = `ps -p #{tyrant_pid} | wc -l`
96
+ if process_check.to_i < 2
97
+ puts "Erasing pidfile..."
98
+ `rm #{TYRANT_PID_FILE}`
99
+ end
100
+ tyrant_pid
101
+ end
102
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.3
data/bin/geonames ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+ $VERBOSE = true # -w
3
+ $KCODE = "u" # -Ku
4
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
5
+
6
+ require "geonames_local"
7
+ include Geonames
8
+
9
+ Geonames::CLI.work(ARGV)
@@ -0,0 +1,24 @@
1
+ :br:
2
+ :pt_br: Brasil
3
+ :en_us: Brazil
4
+ :us:
5
+ :pt_br: Estados Unidos
6
+ :en_us: United States
7
+ :cl:
8
+ :pt_br: Chile
9
+ :en_us: Chile
10
+ :ar:
11
+ :pt_br: Argentina
12
+ :en_us: Argentina
13
+ :ug:
14
+ :pt_br: Uganda
15
+ :en_us: Uganda
16
+ :td:
17
+ :pt_br: Chad
18
+ :en_us: Chad
19
+ :ru:
20
+ :pt_br: Rússia
21
+ :en_us: Russia
22
+ :it:
23
+ :pt_br: Itália
24
+ :en_us: Italy
@@ -0,0 +1,14 @@
1
+ #
2
+ # Geonames Local Config Example
3
+ #
4
+ :store: pg
5
+ :codes: [br, cl]
6
+ :level: city
7
+ :mapping:
8
+ :name: name
9
+ :geom: true
10
+ :db:
11
+ :host: localhost
12
+ :dbname: db
13
+ :user: postgres
14
+ :password:
@@ -0,0 +1,31 @@
1
+ module Geonames
2
+ class Geoquery
3
+ R = 1
4
+
5
+ def self.point(tdb, x, y)
6
+ qry = TDBQRY::new(tdb)
7
+ qry.addcond("x", TDBQRY::QCNUMGE, minx.to_s())
8
+ qry.addcond("x", TDBQRY::QCNUMLE, maxx.to_s())
9
+ qry.addcond("y", TDBQRY::QCNUMGE, miny.to_s())
10
+ qry.addcond("y", TDBQRY::QCNUMLE, maxy.to_s())
11
+ qry.setorder("x", TDBQRY::QONUMASC)
12
+ qry.setlimit(80)
13
+ end
14
+
15
+
16
+ def self.area(tdb, minx, maxx, miny, maxy)
17
+ qry = TDBQRY::new(tdb)
18
+ qry.addcond("x", TDBQRY::QCNUMGE, minx.to_s())
19
+ qry.addcond("x", TDBQRY::QCNUMLE, maxx.to_s())
20
+ qry.addcond("y", TDBQRY::QCNUMGE, miny.to_s())
21
+ qry.addcond("y", TDBQRY::QCNUMLE, maxy.to_s())
22
+ qry.setorder("x", TDBQRY::QONUMASC)
23
+
24
+ res = qry.search
25
+ info res.length # number of results found
26
+ return res
27
+ end
28
+
29
+
30
+ end
31
+ end
@@ -0,0 +1,69 @@
1
+ require "pg"
2
+
3
+ module Geonames
4
+ class Postgres
5
+ Countries = {}
6
+ Provinces = {}
7
+
8
+ def initialize(opts) #table, addr = "localhost", port = 5432)
9
+ @conn = PGconn.new(opts)
10
+ end
11
+
12
+ #
13
+ # Get Country and Province ID from the DB
14
+ def get_some_ids(some)
15
+ c = Countries[some.country] ||=
16
+ @conn.exec("SELECT countries.id FROM countries WHERE UPPER(countries.abbr) = UPPER('#{some.country}')")[0]["id"] rescue nil
17
+ c ||= write("countries", {:name => Codes[some.country.downcase.to_sym][:pt_br], :abbr => some.country })
18
+
19
+ p = Provinces[some.province] ||= find("provinces", Cache[:provinces].
20
+ find{ |p| p.province == some.province}.gid)
21
+ [c, p]
22
+ end
23
+
24
+ #
25
+ # Insert a record
26
+ def insert(some)
27
+ country_id, province_id = get_some_ids(some)
28
+ if some.kind == :cities
29
+ write("cities", {:name => some.name, :country_id => country_id,
30
+ :geom => some.geom.as_hex_ewkb, :gid => some.gid,
31
+ :zip => some.zip, :province_id => province_id})
32
+ else
33
+ write("provinces", { :name => some.name, :abbr => some.abbr,
34
+ :country_id => country_id, :gid => some.gid })
35
+ end
36
+ end
37
+
38
+ #
39
+ # Find a record`s ID
40
+ def find(kind, id)
41
+ @conn.exec("SELECT #{kind}.id FROM #{kind} WHERE #{kind}.gid = #{id}")[0]["id"] rescue nil
42
+ end
43
+
44
+ #
45
+ # F'oo -> F''oo (for pg)
46
+ def escape_name(name)
47
+ name.gsub("'", "''")
48
+ end
49
+
50
+ #
51
+ # Sanitize values por pg.. here until my lazyness open pg rdoc...
52
+ def pg_values(arr)
53
+ arr.map do |v|
54
+ case v
55
+ when String then "E'#{escape_name(v)}'"
56
+ when NilClass then 'NULL'
57
+ else v
58
+ end
59
+ end.join(",")
60
+ end
61
+
62
+ #
63
+ # Naive PG insert ORM =D
64
+ def write(table, hsh)
65
+ for_pg = pg_values(hsh.values)
66
+ @conn.exec("INSERT INTO #{table} (#{hsh.keys.join(",")}) VALUES(#{for_pg}) RETURNING id")[0]["id"]
67
+ end
68
+ end
69
+ end
data/lib/data/tokyo.rb ADDED
@@ -0,0 +1,84 @@
1
+
2
+ module Geonames
3
+ class Tokyo
4
+
5
+ def initialize(conn=nil, resource=nil, extra=nil)
6
+ if conn
7
+ require 'tokyotyrant'
8
+ resource ||= 'localhost'
9
+ extra ||= 1978
10
+ @tbl = TokyoTyrant::RDBTBL
11
+ @qry = TokyoTyrant::RDBQRY
12
+ else
13
+ require 'tokyocabinet'
14
+ resource ||= 'geonames.tct'
15
+ extra ||= (TokyoCabinet::TDB::OWRITER | TokyoCabinet::TDB::OCREAT)
16
+ @tbl = TokyoCabinet::TDB
17
+ @qry = TokyoCabinet::TDBQRY
18
+ end
19
+ @rdb = @tbl.new
20
+ @rdb.open(resource, extra)
21
+ set_indexes
22
+ end
23
+
24
+ def all(params)
25
+ qry = @qry.new(@rdb)
26
+ params.each do |k,v|
27
+ #qry.addcond(k.to_s, Q::QCNUMEQ, v.to_s)
28
+ qry.addcond(k.to_s, @qry::QCSTREQ, v.to_s)
29
+ end
30
+ qry.setorder("name", @qry::QOSTRASC)
31
+ qry.search.map { |id| @rdb.get(id) }
32
+ end
33
+
34
+ def find(id)
35
+ #qry = Q.new(@rdb)
36
+ #qry.addcond("gid", Q::QCNUMEQ, id.to_s)
37
+ #qry.setlimit(10)
38
+ #id = qry.search.pop
39
+ @rdb.get(id)
40
+ end
41
+
42
+ # def to_obj(hsh)
43
+ # hsh["kind"] == "country" ? Country.new(hsh) : Spot.new(hsh)
44
+ # end
45
+
46
+ def write(o)
47
+ # pkey = @rdb.genuid
48
+ if @rdb.put(o.gid, o.to_hash)
49
+ # info "ok"
50
+ else
51
+ info "err #{@rdb.errmsg(@rdb.ecode)}"
52
+ end
53
+ end
54
+
55
+ def count
56
+ @qry.new(@rdb).search.length
57
+ end
58
+
59
+ def close
60
+ # close the database
61
+ if !@rdb.close
62
+ STDERR.printf("close error: %s\n", @rdb.errmsg(@rdb.ecode))
63
+ end
64
+ end
65
+
66
+ def set_indexes
67
+ #for index in indexes
68
+ # @rdb.setindex("gid", @tbl::ITOPT)
69
+ @rdb.setindex("kind", @tbl::ITLEXICAL)
70
+ @rdb.setindex("name", @tbl::ITQGRAM)
71
+ @rdb.setindex("country", @tbl::ITLEXICAL)
72
+
73
+ #end
74
+
75
+ end
76
+
77
+ def flush
78
+ @rdb.vanish
79
+ end
80
+
81
+
82
+ end
83
+
84
+ end
@@ -0,0 +1,9 @@
1
+ class City
2
+ attr_accessor :country, :province, :name
3
+
4
+ def self.all
5
+ qry.addcond(QCSTREQ, 'city')
6
+ end
7
+
8
+
9
+ end
@@ -0,0 +1,61 @@
1
+ module Geonames
2
+ class Country
3
+ attr_accessor :code, :name, :gid, :iso, :capital, :pop
4
+
5
+ def self.all
6
+ Tokyo.new.all({ :kind => "country" }).map do |c|
7
+ new(c)
8
+ end
9
+ end
10
+
11
+ # [0] iso alpha2
12
+ # [1] iso alpha3
13
+ # [2] iso numeric
14
+ # [3] fips code
15
+ # [4] name
16
+ # [5] capital
17
+ # [6] areaInSqKm
18
+ # [7] population
19
+ # [8] continent
20
+ # [9] top level domain
21
+ # [10] Currency code
22
+ # [11] Currency name
23
+ # [12] Phone
24
+ # [13] Postal Code Format
25
+ # [14] Postal Code Regex
26
+ # [15] Languages
27
+ # [16] Geoname id
28
+ # [17] Neighbours
29
+ # [18] Equivalent Fips Code
30
+ #
31
+ def self.parse(row)
32
+ new(row)
33
+ end
34
+
35
+ def initialize(params)
36
+ parse(params)
37
+ end
38
+
39
+ def parse
40
+ @iso, @iso3, @ison, @fips, @name, @capital, @area, @pop, continent, tld,
41
+ currency, phone, postal, langs, gid, neighbours = row.split(/\t/)
42
+ @code = iso
43
+ end
44
+
45
+ def cities
46
+ # qry.addcond("country", TBDQRY::QSTREQ, @code)
47
+ end
48
+
49
+ def to_hash
50
+ { "gid" => @gid.to_s, "name" => @name, "kind" => "country", "code" => @code}
51
+ end
52
+
53
+ def export
54
+ [@gid, @code, @name]
55
+ end
56
+
57
+ def export_header
58
+ ["gid", "code", "name"]
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,18 @@
1
+ module Geonames
2
+ class Province
3
+ attr_accessor :code, :name, :gid
4
+
5
+ def self.all
6
+ Tokyo.new.all({ :kind => "province" }).map do |c|
7
+ new(c)
8
+ end
9
+ end
10
+
11
+ def initialize(params)
12
+ @code = params["code"]
13
+ @name = params["name"]
14
+ @gid = params["gid"]
15
+ end
16
+
17
+ end
18
+ end
@@ -0,0 +1,2 @@
1
+ class Road
2
+ end
@@ -0,0 +1,88 @@
1
+ module Geonames
2
+ class Spot
3
+ attr_accessor :gid, :name, :ascii, :lat, :lon, :country, :kind,
4
+ :code, :pop, :tz, :geom, :province, :zip, :abbr
5
+ alias :x :lon
6
+ alias :y :lat
7
+ alias :geoname_id :gid
8
+
9
+ #
10
+ # = Geonames Spot
11
+ #
12
+ # Every geoname type will be parsed as a spot
13
+ def initialize(params, k)
14
+ return unless params.instance_of? String
15
+ k == :zip ? parse_zip(params) : parse(params)
16
+ if @kind == :provinces
17
+ @name.gsub!(/Estado d\w\s/, "")
18
+ @abbr = get_abbr
19
+ end
20
+ end
21
+
22
+ # Geonames donest have province/state abbr..#fail!
23
+ # This works 75% of the time in brazil heh
24
+ def get_abbr
25
+ s = @name.split(" ")
26
+ if s.length > 1
27
+ [s[0][0].chr,s[-1][0].chr].map(&:upcase).join
28
+ else
29
+ s[0][0..1].upcase
30
+ end
31
+ end
32
+
33
+ #
34
+ # Parse Geonames Dump Export
35
+ def parse(row)
36
+ gid, @name, @ascii, @alternates, lat, lon, feat, kind,
37
+ @country, cc2, adm1, adm2, adm3, adm4, pop, ele,
38
+ gtop, @tz, @up = row.split(/\t/)
39
+
40
+ parse_geom(lat, lon)
41
+ @gid = @geoname_id = gid.to_i
42
+ @kind = human_code(kind)
43
+ @province = adm1
44
+ @code = adm2
45
+ end
46
+
47
+ #
48
+ # Parse Geonames Zip Export
49
+ def parse_zip(row)
50
+ country, zip, @name, province, cc, dunno, adm1, adm2, lat, lon = row.split(/\t/)
51
+ parse_geom(lat, lon)
52
+ @code = adm1
53
+ @kind = :cities
54
+ @zip = zip.split("-")[0]
55
+ end
56
+
57
+ #
58
+ # Parse Geom to float or GeoRuby Point
59
+ def parse_geom(lat, lon)
60
+ @lat, @lon = lat.to_f, lon.to_f
61
+
62
+ if defined?("GeoRuby")
63
+ @geom = GeoRuby::SimpleFeatures::Point.from_x_y(@lon, @lat)
64
+ end
65
+ end
66
+
67
+ #
68
+ # Parse Time
69
+ def updated_at
70
+ Time.utc(*@up.split("-"))
71
+ end
72
+
73
+ # For tokyo
74
+ def to_hash
75
+ { "gid" => @geoname_id.to_s, "kind" => @kind.to_s, "name" => @name, "ascii" => @ascii,
76
+ "lat" => @lat.to_s, "lon" => @lon.to_s, "tz" => @tz, "country" => @country }
77
+ end
78
+
79
+
80
+ def human_code(code)
81
+ case code
82
+ when 'ADM1' then :provinces
83
+ when 'ADM2' then :cities
84
+ else :other
85
+ end
86
+ end
87
+ end
88
+ end
@@ -1,6 +1,23 @@
1
1
  require 'features/spot'
2
2
  require 'features/country'
3
3
  require 'features/city'
4
+ require 'rubygems'
5
+ require 'logger'
6
+ require 'data/tokyo'
7
+ require 'data/postgres'
8
+ require 'work/cli'
9
+ require 'work/dump'
10
+ require 'work/export'
4
11
 
5
12
  module Geonames
13
+ Opt = {}
14
+ Cache = {:dump => [], :zip => []}
15
+ Codes = YAML.load(File.read(File.join(File.dirname(__FILE__),'config', 'codes.yml')))
16
+ VERSION = File.read(File.join(File.dirname(__FILE__), '..', 'VERSION'))
17
+
18
+ def info(txt)
19
+ if Opt[:verbose]
20
+ puts(txt)
21
+ end
22
+ end
6
23
  end
data/lib/work/cli.rb ADDED
@@ -0,0 +1,144 @@
1
+ #
2
+ # Geonames Local
3
+ #
4
+ require 'optparse'
5
+ module Geonames
6
+ class CLI
7
+
8
+ def self.parse_options(argv)
9
+ options = {}
10
+
11
+ argv.options do |opts|
12
+ opts.banner = <<BANNER
13
+ Geonames Command Line Usage:
14
+
15
+ geonames <country code(s)> <opts>
16
+
17
+ BANNER
18
+ opts.on("-l", "--level LEVEL", String, "The level of logging to report" ) { |level| options[:level] = level }
19
+ opts.on("-d", "--dump", "Dump DB before all" ) { options[:dump] = true }
20
+ opts.separator ""
21
+ opts.separator "Config file:"
22
+ opts.on("-c", "--config CONFIG", String, "Geonames Config file path" ) { |file| options[:config] = file }
23
+ opts.separator ""
24
+ opts.separator "Tyrant Options:"
25
+ opts.on("-t", "--tyrant", "Use tyrant" ) { options[:tyrant] = true }
26
+ opts.on("-s", "--server SERVER", String, "Tyrant Server URL" ) { |url| options[:server] = url }
27
+ opts.on("-p", "--port PORT", Integer, "Tyrant Server Port") { |val| options[:port] = val.to_i }
28
+ opts.separator ""
29
+ opts.separator "Common Options:"
30
+ opts.on("-h", "--help", "Show this message" ) { puts opts; exit }
31
+ opts.on("-v", "--verbose", "Turn on logging to STDOUT" ) { |bool| options[:verbose] = bool }
32
+ opts.on("-V", "--version", "Show version") { puts Geonames::VERSION; exit }
33
+ opts.separator ""
34
+ begin
35
+ opts.parse!
36
+ if argv.empty? && !options[:config]
37
+ puts opts
38
+ exit
39
+ end
40
+ rescue
41
+ puts opts
42
+ exit
43
+ end
44
+ end
45
+ options
46
+ end
47
+ private_class_method :parse_options
48
+
49
+ def self.work(argv)
50
+ trap(:INT) { stop! }
51
+ trap(:TERM) { stop! }
52
+ Opt.merge! parse_options(argv)
53
+
54
+ if Opt[:config]
55
+ Opt.merge! YAML.load(File.read(Opt[:config]))
56
+ end
57
+
58
+ if argv[0] =~ /list|codes/
59
+ Codes.each do |key,val|
60
+ str = [val.values, key.to_s].join(" ").downcase
61
+ if s = argv[1]
62
+ next unless str =~ /#{s.downcase}/
63
+ end
64
+ puts "#{val[:en_us]}: #{key}"
65
+ end
66
+ exit
67
+ end
68
+
69
+ if argv[0] =~ /scaff|conf/
70
+ fname = (argv[1] || "geonames") + ".yml"
71
+ if File.exist?(fname)
72
+ puts "File exists."
73
+ else
74
+ puts "Writing to #{fname}"
75
+ `cp #{File.join(File.dirname(__FILE__), '..', 'config', 'geonames.yml')} #{fname}`
76
+ end
77
+ exit
78
+ end
79
+ require "geo_ruby" if Opt[:mapping] && Opt[:mapping][:geom]
80
+
81
+ if argv[0] =~ /csv|json/
82
+ Geonames::Export.new(Country.all).to_csv
83
+ else
84
+ Geonames::Dump.work(Opt[:codes], :zip) #rescue puts "Command not found: #{comm} #{@usage}"
85
+ Geonames::Dump.work(Opt[:codes], :dump) #rescue puts "Command not found: #{comm} #{@usage}"
86
+ info "\n---\nTotal #{Cache[:dump].length} parsed. #{Cache[:zip].length} zips."
87
+ info "Join dump << zip"
88
+ unify!
89
+ write_to_store!
90
+ end
91
+ end
92
+
93
+ def self.write_to_store!
94
+ db = case Opt[:store].to_sym
95
+ when :tyrant then Geonames::Tokyo.new(Opt[:tyrant])
96
+ when :pg then Geonames::Postgres.new(Opt[:db])
97
+ else
98
+ info "No store defined!"
99
+ exit
100
+ end
101
+
102
+ groups = Cache[:dump].group_by(&:kind)
103
+ Cache[:provinces] = groups[:provinces]
104
+ # ensure this order....
105
+ do_write(db, groups[:provinces])
106
+ do_write(db, groups[:cities])
107
+ end
108
+
109
+ def self.do_write(db, val)
110
+ key = val[0].kind
111
+ start = Time.now
112
+ writt = 0
113
+ info "\nWriting #{key}..."
114
+ val.each do |v|
115
+ unless db.find v.kind, v.gid
116
+ db.insert v
117
+ writt += 1
118
+ end
119
+ end
120
+ total = Time.now - start
121
+ info "#{writt} #{key} written in #{total} sec (#{(writt/total).to_i}/s)"
122
+ end
123
+
124
+ def self.unify!
125
+ start = Time.now
126
+ Cache[:dump].map! do |spot|
127
+ if other = Cache[:zip].find { |d| d.code == spot.code }
128
+ spot.zip = other.zip
129
+ spot
130
+ else
131
+ spot
132
+ end
133
+ end
134
+ info "Done. #{(Time.now-start).to_i}s"
135
+ end
136
+
137
+ def self.stop!
138
+ puts "Closing Geonames..."
139
+ exit
140
+ end
141
+
142
+ end
143
+
144
+ end
data/lib/work/dump.rb ADDED
@@ -0,0 +1,76 @@
1
+ module Geonames
2
+ class Dump
3
+ URL = "http://download.geonames.org/export/"
4
+ TMP = "/tmp/geonames/"
5
+
6
+ def initialize(codes, kind)
7
+ @codes = codes
8
+ @kind = kind
9
+ if codes.respond_to? :each
10
+ for code in codes
11
+ info "\nWorking on #{kind} for #{code}"
12
+ file = get_file(code)
13
+ download file
14
+ uncompress file unless code == "country"
15
+ parse file
16
+ end
17
+ end
18
+
19
+ end
20
+
21
+ def get_file(code)
22
+ code == "country" ? "countryInfo.txt" : "#{code.upcase}.zip"
23
+ end
24
+
25
+ def download(file)
26
+ Dir.mkdir(TMP) unless File.exists?(TMP)
27
+ Dir.mkdir(TMP + @kind.to_s) unless File.exists?(TMP + @kind.to_s)
28
+ fname = TMP + "#{@kind}/#{file}"
29
+ return if File.exists?(fname)
30
+ `curl #{URL}/#{@kind}/#{file} -o #{fname}`
31
+ end
32
+
33
+ def uncompress(file)
34
+ info "Uncompressing #{file}"
35
+ `unzip -quo /tmp/geonames/#{@kind}/#{file} -d /tmp/geonames/#{@kind}`
36
+ end
37
+
38
+ def parse_line(l)
39
+ return if l =~ /^#|^iso/i
40
+ if @kind == :dump
41
+ if l =~ /^\D/
42
+ Country.parse(l)
43
+ else
44
+ if Opt[:level] != "all"
45
+ return unless l =~ /ADM\d/ # ADM2 => cities
46
+ end
47
+ end
48
+ end
49
+ Spot.new(l, @kind)
50
+ end
51
+
52
+ def parse(file)
53
+ red = 0
54
+ start = Time.now
55
+ File.open("/tmp/geonames/#{@kind}/#{file.gsub("zip", "txt")}") do |f|
56
+ while line = f.gets
57
+ if record = parse_line(line)
58
+ Cache[@kind] << record
59
+ red += 1
60
+ end
61
+ end
62
+ total = Time.now - start
63
+ info "#{red} #{@kind} entries parsed in #{total} sec (#{(red/total).to_i}/s)"
64
+ end
65
+ rescue Errno::ENOENT => e
66
+ info "Failed to download #{file}, skipping."
67
+ end
68
+
69
+
70
+ def self.work(codes=:all, kind=:dump)
71
+ new(codes, kind)
72
+ end
73
+
74
+
75
+ end
76
+ end
@@ -0,0 +1,21 @@
1
+ require 'csv'
2
+
3
+ module Geonames
4
+ class Export
5
+
6
+ def initialize(data)
7
+ info "Starting export.."
8
+ @data = data
9
+ end
10
+
11
+ def to_csv
12
+ file = "export.csv"
13
+ info "Writing #{file} (#{@data.length} objects)"
14
+ CSV.open("export.csv", 'w') do |csv|
15
+ csv << @data[0].export_header
16
+ @data.each { |o| csv << o.export }
17
+ end
18
+ info "Export done."
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,29 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "Tokyo" do
4
+
5
+ it "should write down a spot" do
6
+ t = Geonames::Tokyo.new('localhost', 1978)
7
+ m = mock(Geonames::Spot, { :gid => 888, :to_hash => { "gid" => 888, "kind" => "city", "lat" => 5.5 }})
8
+ t.write(m)
9
+ end
10
+
11
+ it "should read it up" do
12
+ t = Geonames::Tokyo.new('localhost', 1978)
13
+ record = t.find(888)
14
+ record.should be_instance_of Geonames::Spot
15
+ end
16
+
17
+ it "should not duplicate" do
18
+ t = Geonames::Tokyo.new('localhost', 1978)
19
+ t.all({ :gid => 888}).length.should eql(1)
20
+ end
21
+
22
+ it "should return all countries" do
23
+ all = Geonames::Country.all
24
+ all.should be_instance_of Array
25
+ all[0].should be_instance_of Geonames::Country
26
+ all[0].gid.should eql(1)
27
+ end
28
+
29
+ end
@@ -5,7 +5,7 @@ describe "GeonamesLocal" do
5
5
 
6
6
  describe "Parsing" do
7
7
  before do
8
- @spot = Geonames::Spot.new("6319037\tMaxaranguape\tMaxaranguape\t-5.46874226086957\t-35.3565714695652\tA\tADM2\tBR\t22\t2407500\t6593\t12\tAmerica/Recife\t2006-12-17")
8
+ @spot = Geonames::Spot.new("6319037\tMaxaranguape\tMaxaranguape\t\t-5.46874226086957\t-35.3565714695652\tA\tADM2\tBR\t22\t2407500\t6593\t\t12\t\t\t\tAmerica/Recife\t2006-12-17")
9
9
  end
10
10
 
11
11
  it "should parse geoid integer" do
@@ -41,6 +41,42 @@ describe "GeonamesLocal" do
41
41
  end
42
42
  end
43
43
 
44
+ describe "More Parseing" do
45
+ before do
46
+ @spot = Geonames::Spot.new("3384862\tRiacho Zuza\tRiacho Zuza\t\t-9.4333333\t-37.6666667\tH\tSTMI\tBR\t\t02\t\t\t\t0\t\t241\tAmerica/Maceio\t1993-12-17\n")
47
+ end
48
+
49
+ it "should parse geoid integer" do
50
+ @spot.geoname_id.should eql(3384862)
51
+ end
52
+
53
+ it "should parse name" do
54
+ @spot.name.should eql("Riacho Zuza")
55
+ @spot.ascii.should eql("Riacho Zuza")
56
+ end
57
+
58
+ it "should parse geostuff" do
59
+ @spot.lat.should be_close(-9.4333333, 0.001)
60
+ @spot.lon.should be_close(-37.6666667, 0.001)
61
+ end
62
+
63
+ it "should parse spot kind" do
64
+ @spot.kind.should eql(:other)
65
+ end
66
+
67
+ it "should parse spot country" do
68
+ @spot.country.should eql("BR")
69
+ end
70
+
71
+ it "shuold parse timezone" do
72
+ @spot.tz.should eql("America/Maceio")
73
+ end
74
+
75
+ it "should parse updated_at" do
76
+ @spot.updated_at.should be_instance_of(Time)
77
+ @spot.updated_at.day.should eql(17)
78
+ end
79
+ end
44
80
  end
45
81
 
46
82
  # 6319037 Maxaranguape Maxaranguape -5.46874226086957 -35.3565714695652 A ADM2 BR 22 2407500 6593 12 America/Recife 2006-12-17
data/task/benchmark.rb ADDED
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # DB => br, cl ~ 6k objects
4
+ #
5
+ require 'benchmark'
6
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
7
+ require 'geonames_local'
8
+
9
+ # @db.flush
10
+
11
+ def b1(db)
12
+ puts "#{db.count} Objects"
13
+ Benchmark.bmbm do |b|
14
+ b.report("All Country") { db.all({ :kind => "country" })}
15
+ b.report("Find by GID") { db.find(888) }
16
+ b.report("Find by name") { db.all({ :name => "Maxaranguape"}) }
17
+ b.report("Find on country") { db.all({ :country => "CL"}) }
18
+ end
19
+ end
20
+
21
+ print "Tyrant => "
22
+ b1(Geonames::Tokyo.new(:tyrant))
23
+
24
+
25
+ print "Cabinet => "
26
+ b1(Geonames::Tokyo.new)
27
+
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # DB => br, cl ~ 6k objects
4
+ #
5
+ require 'benchmark'
6
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
7
+ require 'geonames_local'
8
+ include Geonames
9
+ # @db.flush
10
+
11
+
12
+ def b1(db)
13
+ puts "#{db.count} Objects"
14
+ Benchmark.bmbm do |b|
15
+ b.report("All Country") { Country.all }
16
+ b.report("Find by GID") { db.find(888) }
17
+ b.report("Find by name") { db.all({ :name => "Maxaranguape"}) }
18
+ b.report("Find on country") { db.all({ :country => "CL"}) }
19
+ end
20
+ end
21
+
22
+ print "Cabinet => "
23
+ b1(Tokyo.new)
24
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: geonames_local
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Marcos Piccinini
@@ -9,8 +9,8 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-12-13 00:00:00 -02:00
13
- default_executable: geoname
12
+ date: 2010-01-28 00:00:00 -02:00
13
+ default_executable: geonames
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rspec
@@ -22,10 +22,20 @@ dependencies:
22
22
  - !ruby/object:Gem::Version
23
23
  version: 1.2.9
24
24
  version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: tokyotyrant
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "1.10"
34
+ version:
25
35
  description: Dump and feed a tokyo cabinet for local geonames search
26
36
  email: x@nofxx.com
27
37
  executables:
28
- - geoname
38
+ - geonames
29
39
  extensions: []
30
40
 
31
41
  extra_rdoc_files:
@@ -37,10 +47,28 @@ files:
37
47
  - LICENSE
38
48
  - README.rdoc
39
49
  - Rakefile
50
+ - VERSION
51
+ - bin/geonames
52
+ - lib/config/codes.yml
53
+ - lib/config/geonames.yml
54
+ - lib/data/geoquery.rb
55
+ - lib/data/postgres.rb
56
+ - lib/data/tokyo.rb
57
+ - lib/features/city.rb
58
+ - lib/features/country.rb
59
+ - lib/features/province.rb
60
+ - lib/features/road.rb
61
+ - lib/features/spot.rb
40
62
  - lib/geonames_local.rb
63
+ - lib/work/cli.rb
64
+ - lib/work/dump.rb
65
+ - lib/work/export.rb
66
+ - spec/data/tokyo_spec.rb
41
67
  - spec/geonames_local_spec.rb
42
68
  - spec/spec.opts
43
69
  - spec/spec_helper.rb
70
+ - task/benchmark.rb
71
+ - task/benchmark_cabinet.rb
44
72
  has_rdoc: true
45
73
  homepage: http://github.com/nofxx/geonames_local
46
74
  licenses: []
@@ -70,5 +98,6 @@ signing_key:
70
98
  specification_version: 3
71
99
  summary: Dump and feed a tokyo local geonames db
72
100
  test_files:
73
- - spec/spec_helper.rb
101
+ - spec/data/tokyo_spec.rb
74
102
  - spec/geonames_local_spec.rb
103
+ - spec/spec_helper.rb
data/bin/geoname DELETED
File without changes