geonames_local 0.0.1 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -9,6 +9,7 @@ tmtags
9
9
  *~
10
10
  \#*
11
11
  .\#*
12
+ *flymake.rb
12
13
 
13
14
  ## VIM
14
15
  *.swp
@@ -19,3 +20,8 @@ rdoc
19
20
  pkg
20
21
 
21
22
  ## PROJECT::SPECIFIC
23
+ *.tct
24
+ *.pid
25
+ *.log
26
+ *.qgr
27
+ *.lex
data/README.rdoc CHANGED
@@ -1,9 +1,82 @@
1
- = geonames_local
1
+ = Geonames Local
2
+
3
+ Download and store in tokyo or postgresql Geonames.org data.
4
+ Making every Geoname API operation possible on your servers.
5
+ No hit limit, fast as possible.
6
+
7
+ == Features/Problems
8
+
9
+ - Local relational mapping of geonames data
10
+ - Map geonames fields to your db scheme UNDONE
11
+ - Countries are a static yml file :/
12
+
13
+ == Usage
14
+
15
+ geonames conf
16
+
17
+ Will generate a "geonames.yml" file on your folder.
18
+ The file is self explanatory.
19
+
20
+ geonames -c geonames.yml
21
+
22
+ Work. Use -v for verbose.
23
+
24
+ If you are not sure your country code, use:
25
+
26
+ geonames list <search>
27
+
28
+
29
+ == Relational Mapping
30
+
31
+ When using PG, this gem will (try) to relational map Geonames
32
+ data on your scheme. Postgresql done, tokyo still need heavy work.
33
+
34
+
35
+ == PostgreSQL
36
+
37
+ So, supposing ActiveRecord, something like this is possible:
38
+
39
+ City.first.province.country.abbr
40
+ => "BR"
41
+
42
+ === Migration
43
+
44
+ Default PG migration:
45
+
46
+ create_table :cities do |t|
47
+ t.references :country, :null => false
48
+ t.references :province
49
+ t.string :name, :null => false
50
+ t.point :geom, :srid => 4326
51
+ t.integer :gid, :zip
52
+ end
53
+
54
+ create_table :provinces do |t|
55
+ t.references :country, :null => false
56
+ t.string :name, :null => false
57
+ t.string :abbr, :limit => 2, :null => false
58
+ t.integer :gid
59
+ end
60
+
61
+ create_table :countries do |t|
62
+ t.string :name, :limit => 30, :null => false
63
+ t.string :abbr, :limit => 2, :null => false
64
+ end
65
+
66
+ add_index :cities, :name
67
+ add_index :cities, :gid
68
+ add_index :cities, :zip
69
+ add_index :cities, :country_id
70
+ add_index :cities, :province_id
71
+ add_index :cities, :geom, :spatial => true
72
+ add_index :provinces, :name
73
+ add_index :provinces, :abbrg
74
+ add_index :provinces, :gid
75
+ add_index :provinces, :country_id
2
76
 
3
- Description goes here.
4
77
 
5
78
  == Note on Patches/Pull Requests
6
-
79
+
7
80
  * Fork the project.
8
81
  * Make your feature addition or bug fix.
9
82
  * Add tests for it. This is important so I don't break it in a
data/Rakefile CHANGED
@@ -11,6 +11,7 @@ begin
11
11
  gem.homepage = "http://github.com/nofxx/geonames_local"
12
12
  gem.authors = ["Marcos Piccinini"]
13
13
  gem.add_development_dependency "rspec", ">= 1.2.9"
14
+ gem.add_dependency "tokyotyrant", ">= 1.10"
14
15
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
16
  end
16
17
  Jeweler::GemcutterTasks.new
@@ -43,3 +44,59 @@ Rake::RDocTask.new do |rdoc|
43
44
  rdoc.rdoc_files.include('README*')
44
45
  rdoc.rdoc_files.include('lib/**/*.rb')
45
46
  end
47
+
48
+ #
49
+ # Tokyo Tyrant rake tasks
50
+ #
51
+ namespace :tyrant do
52
+ TYRANT_DB_FILE = File.join("tyrant.tct")
53
+ TYRANT_PID_FILE = File.join("tyrant.pid")
54
+ TYRANT_LOG_FILE = File.join("tyrant.log")
55
+
56
+ desc "Start Tyrant server"
57
+ task :start do
58
+ raise RuntimeError, "Tyrant is already running." if tyrant_running?
59
+ system "ttserver -pid #{TYRANT_PID_FILE} -log #{TYRANT_LOG_FILE} #{TYRANT_DB_FILE}&"
60
+ sleep(2)
61
+ if tyrant_running?
62
+ puts "Tyrant started successfully (pid #{tyrant_pid})."
63
+ else
64
+ puts "Failed to start tyrant push server. Check logs."
65
+ end
66
+ end
67
+
68
+ desc "Stop Tyrant server"
69
+ task :stop do
70
+ raise RuntimeError, "Tyrant isn't running." unless tyrant_running?
71
+ system "kill #{tyrant_pid}"
72
+ sleep(2)
73
+ if tyrant_running?
74
+ puts "Tyrant didn't stopped. Check the logs."
75
+ else
76
+ puts "Tyrant stopped."
77
+ end
78
+ end
79
+
80
+ desc "Restart Tyrant server"
81
+ task :restart => [:stop, :start]
82
+
83
+ desc "Get Tyrant Server Status"
84
+ task :status do
85
+ puts tyrant_running? ? "Tyrant running. (#{tyrant_pid})" : "Tyrant not running."
86
+ end
87
+ end
88
+
89
+ def tyrant_pid
90
+ `cat #{TYRANT_PID_FILE}`.to_i
91
+ end
92
+
93
+ def tyrant_running?
94
+ return false unless File.exist?(TYRANT_PID_FILE)
95
+ process_check = `ps -p #{tyrant_pid} | wc -l`
96
+ if process_check.to_i < 2
97
+ puts "Erasing pidfile..."
98
+ `rm #{TYRANT_PID_FILE}`
99
+ end
100
+ tyrant_pid
101
+ end
102
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.3
data/bin/geonames ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+ $VERBOSE = true # -w
3
+ $KCODE = "u" # -Ku
4
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
5
+
6
+ require "geonames_local"
7
+ include Geonames
8
+
9
+ Geonames::CLI.work(ARGV)
@@ -0,0 +1,24 @@
1
+ :br:
2
+ :pt_br: Brasil
3
+ :en_us: Brazil
4
+ :us:
5
+ :pt_br: Estados Unidos
6
+ :en_us: United States
7
+ :cl:
8
+ :pt_br: Chile
9
+ :en_us: Chile
10
+ :ar:
11
+ :pt_br: Argentina
12
+ :en_us: Argentina
13
+ :ug:
14
+ :pt_br: Uganda
15
+ :en_us: Uganda
16
+ :td:
17
+ :pt_br: Chad
18
+ :en_us: Chad
19
+ :ru:
20
+ :pt_br: Rússia
21
+ :en_us: Russia
22
+ :it:
23
+ :pt_br: Itália
24
+ :en_us: Italy
@@ -0,0 +1,14 @@
1
+ #
2
+ # Geonames Local Config Example
3
+ #
4
+ :store: pg
5
+ :codes: [br, cl]
6
+ :level: city
7
+ :mapping:
8
+ :name: name
9
+ :geom: true
10
+ :db:
11
+ :host: localhost
12
+ :dbname: db
13
+ :user: postgres
14
+ :password:
@@ -0,0 +1,31 @@
1
+ module Geonames
2
+ class Geoquery
3
+ R = 1
4
+
5
+ def self.point(tdb, x, y)
6
+ qry = TDBQRY::new(tdb)
7
+ qry.addcond("x", TDBQRY::QCNUMGE, minx.to_s())
8
+ qry.addcond("x", TDBQRY::QCNUMLE, maxx.to_s())
9
+ qry.addcond("y", TDBQRY::QCNUMGE, miny.to_s())
10
+ qry.addcond("y", TDBQRY::QCNUMLE, maxy.to_s())
11
+ qry.setorder("x", TDBQRY::QONUMASC)
12
+ qry.setlimit(80)
13
+ end
14
+
15
+
16
+ def self.area(tdb, minx, maxx, miny, maxy)
17
+ qry = TDBQRY::new(tdb)
18
+ qry.addcond("x", TDBQRY::QCNUMGE, minx.to_s())
19
+ qry.addcond("x", TDBQRY::QCNUMLE, maxx.to_s())
20
+ qry.addcond("y", TDBQRY::QCNUMGE, miny.to_s())
21
+ qry.addcond("y", TDBQRY::QCNUMLE, maxy.to_s())
22
+ qry.setorder("x", TDBQRY::QONUMASC)
23
+
24
+ res = qry.search
25
+ info res.length # number of results found
26
+ return res
27
+ end
28
+
29
+
30
+ end
31
+ end
@@ -0,0 +1,69 @@
1
+ require "pg"
2
+
3
+ module Geonames
4
+ class Postgres
5
+ Countries = {}
6
+ Provinces = {}
7
+
8
+ def initialize(opts) #table, addr = "localhost", port = 5432)
9
+ @conn = PGconn.new(opts)
10
+ end
11
+
12
+ #
13
+ # Get Country and Province ID from the DB
14
+ def get_some_ids(some)
15
+ c = Countries[some.country] ||=
16
+ @conn.exec("SELECT countries.id FROM countries WHERE UPPER(countries.abbr) = UPPER('#{some.country}')")[0]["id"] rescue nil
17
+ c ||= write("countries", {:name => Codes[some.country.downcase.to_sym][:pt_br], :abbr => some.country })
18
+
19
+ p = Provinces[some.province] ||= find("provinces", Cache[:provinces].
20
+ find{ |p| p.province == some.province}.gid)
21
+ [c, p]
22
+ end
23
+
24
+ #
25
+ # Insert a record
26
+ def insert(some)
27
+ country_id, province_id = get_some_ids(some)
28
+ if some.kind == :cities
29
+ write("cities", {:name => some.name, :country_id => country_id,
30
+ :geom => some.geom.as_hex_ewkb, :gid => some.gid,
31
+ :zip => some.zip, :province_id => province_id})
32
+ else
33
+ write("provinces", { :name => some.name, :abbr => some.abbr,
34
+ :country_id => country_id, :gid => some.gid })
35
+ end
36
+ end
37
+
38
+ #
39
+ # Find a record`s ID
40
+ def find(kind, id)
41
+ @conn.exec("SELECT #{kind}.id FROM #{kind} WHERE #{kind}.gid = #{id}")[0]["id"] rescue nil
42
+ end
43
+
44
+ #
45
+ # F'oo -> F''oo (for pg)
46
+ def escape_name(name)
47
+ name.gsub("'", "''")
48
+ end
49
+
50
+ #
51
+ # Sanitize values por pg.. here until my lazyness open pg rdoc...
52
+ def pg_values(arr)
53
+ arr.map do |v|
54
+ case v
55
+ when String then "E'#{escape_name(v)}'"
56
+ when NilClass then 'NULL'
57
+ else v
58
+ end
59
+ end.join(",")
60
+ end
61
+
62
+ #
63
+ # Naive PG insert ORM =D
64
+ def write(table, hsh)
65
+ for_pg = pg_values(hsh.values)
66
+ @conn.exec("INSERT INTO #{table} (#{hsh.keys.join(",")}) VALUES(#{for_pg}) RETURNING id")[0]["id"]
67
+ end
68
+ end
69
+ end
data/lib/data/tokyo.rb ADDED
@@ -0,0 +1,84 @@
1
+
2
+ module Geonames
3
+ class Tokyo
4
+
5
+ def initialize(conn=nil, resource=nil, extra=nil)
6
+ if conn
7
+ require 'tokyotyrant'
8
+ resource ||= 'localhost'
9
+ extra ||= 1978
10
+ @tbl = TokyoTyrant::RDBTBL
11
+ @qry = TokyoTyrant::RDBQRY
12
+ else
13
+ require 'tokyocabinet'
14
+ resource ||= 'geonames.tct'
15
+ extra ||= (TokyoCabinet::TDB::OWRITER | TokyoCabinet::TDB::OCREAT)
16
+ @tbl = TokyoCabinet::TDB
17
+ @qry = TokyoCabinet::TDBQRY
18
+ end
19
+ @rdb = @tbl.new
20
+ @rdb.open(resource, extra)
21
+ set_indexes
22
+ end
23
+
24
+ def all(params)
25
+ qry = @qry.new(@rdb)
26
+ params.each do |k,v|
27
+ #qry.addcond(k.to_s, Q::QCNUMEQ, v.to_s)
28
+ qry.addcond(k.to_s, @qry::QCSTREQ, v.to_s)
29
+ end
30
+ qry.setorder("name", @qry::QOSTRASC)
31
+ qry.search.map { |id| @rdb.get(id) }
32
+ end
33
+
34
+ def find(id)
35
+ #qry = Q.new(@rdb)
36
+ #qry.addcond("gid", Q::QCNUMEQ, id.to_s)
37
+ #qry.setlimit(10)
38
+ #id = qry.search.pop
39
+ @rdb.get(id)
40
+ end
41
+
42
+ # def to_obj(hsh)
43
+ # hsh["kind"] == "country" ? Country.new(hsh) : Spot.new(hsh)
44
+ # end
45
+
46
+ def write(o)
47
+ # pkey = @rdb.genuid
48
+ if @rdb.put(o.gid, o.to_hash)
49
+ # info "ok"
50
+ else
51
+ info "err #{@rdb.errmsg(@rdb.ecode)}"
52
+ end
53
+ end
54
+
55
+ def count
56
+ @qry.new(@rdb).search.length
57
+ end
58
+
59
+ def close
60
+ # close the database
61
+ if !@rdb.close
62
+ STDERR.printf("close error: %s\n", @rdb.errmsg(@rdb.ecode))
63
+ end
64
+ end
65
+
66
+ def set_indexes
67
+ #for index in indexes
68
+ # @rdb.setindex("gid", @tbl::ITOPT)
69
+ @rdb.setindex("kind", @tbl::ITLEXICAL)
70
+ @rdb.setindex("name", @tbl::ITQGRAM)
71
+ @rdb.setindex("country", @tbl::ITLEXICAL)
72
+
73
+ #end
74
+
75
+ end
76
+
77
+ def flush
78
+ @rdb.vanish
79
+ end
80
+
81
+
82
+ end
83
+
84
+ end
@@ -0,0 +1,9 @@
1
+ class City
2
+ attr_accessor :country, :province, :name
3
+
4
+ def self.all
5
+ qry.addcond(QCSTREQ, 'city')
6
+ end
7
+
8
+
9
+ end
@@ -0,0 +1,61 @@
1
+ module Geonames
2
+ class Country
3
+ attr_accessor :code, :name, :gid, :iso, :capital, :pop
4
+
5
+ def self.all
6
+ Tokyo.new.all({ :kind => "country" }).map do |c|
7
+ new(c)
8
+ end
9
+ end
10
+
11
+ # [0] iso alpha2
12
+ # [1] iso alpha3
13
+ # [2] iso numeric
14
+ # [3] fips code
15
+ # [4] name
16
+ # [5] capital
17
+ # [6] areaInSqKm
18
+ # [7] population
19
+ # [8] continent
20
+ # [9] top level domain
21
+ # [10] Currency code
22
+ # [11] Currency name
23
+ # [12] Phone
24
+ # [13] Postal Code Format
25
+ # [14] Postal Code Regex
26
+ # [15] Languages
27
+ # [16] Geoname id
28
+ # [17] Neighbours
29
+ # [18] Equivalent Fips Code
30
+ #
31
+ def self.parse(row)
32
+ new(row)
33
+ end
34
+
35
+ def initialize(params)
36
+ parse(params)
37
+ end
38
+
39
+ def parse
40
+ @iso, @iso3, @ison, @fips, @name, @capital, @area, @pop, continent, tld,
41
+ currency, phone, postal, langs, gid, neighbours = row.split(/\t/)
42
+ @code = iso
43
+ end
44
+
45
+ def cities
46
+ # qry.addcond("country", TBDQRY::QSTREQ, @code)
47
+ end
48
+
49
+ def to_hash
50
+ { "gid" => @gid.to_s, "name" => @name, "kind" => "country", "code" => @code}
51
+ end
52
+
53
+ def export
54
+ [@gid, @code, @name]
55
+ end
56
+
57
+ def export_header
58
+ ["gid", "code", "name"]
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,18 @@
1
+ module Geonames
2
+ class Province
3
+ attr_accessor :code, :name, :gid
4
+
5
+ def self.all
6
+ Tokyo.new.all({ :kind => "province" }).map do |c|
7
+ new(c)
8
+ end
9
+ end
10
+
11
+ def initialize(params)
12
+ @code = params["code"]
13
+ @name = params["name"]
14
+ @gid = params["gid"]
15
+ end
16
+
17
+ end
18
+ end
@@ -0,0 +1,2 @@
1
+ class Road
2
+ end
@@ -0,0 +1,88 @@
1
+ module Geonames
2
+ class Spot
3
+ attr_accessor :gid, :name, :ascii, :lat, :lon, :country, :kind,
4
+ :code, :pop, :tz, :geom, :province, :zip, :abbr
5
+ alias :x :lon
6
+ alias :y :lat
7
+ alias :geoname_id :gid
8
+
9
+ #
10
+ # = Geonames Spot
11
+ #
12
+ # Every geoname type will be parsed as a spot
13
+ def initialize(params, k)
14
+ return unless params.instance_of? String
15
+ k == :zip ? parse_zip(params) : parse(params)
16
+ if @kind == :provinces
17
+ @name.gsub!(/Estado d\w\s/, "")
18
+ @abbr = get_abbr
19
+ end
20
+ end
21
+
22
+ # Geonames donest have province/state abbr..#fail!
23
+ # This works 75% of the time in brazil heh
24
+ def get_abbr
25
+ s = @name.split(" ")
26
+ if s.length > 1
27
+ [s[0][0].chr,s[-1][0].chr].map(&:upcase).join
28
+ else
29
+ s[0][0..1].upcase
30
+ end
31
+ end
32
+
33
+ #
34
+ # Parse Geonames Dump Export
35
+ def parse(row)
36
+ gid, @name, @ascii, @alternates, lat, lon, feat, kind,
37
+ @country, cc2, adm1, adm2, adm3, adm4, pop, ele,
38
+ gtop, @tz, @up = row.split(/\t/)
39
+
40
+ parse_geom(lat, lon)
41
+ @gid = @geoname_id = gid.to_i
42
+ @kind = human_code(kind)
43
+ @province = adm1
44
+ @code = adm2
45
+ end
46
+
47
+ #
48
+ # Parse Geonames Zip Export
49
+ def parse_zip(row)
50
+ country, zip, @name, province, cc, dunno, adm1, adm2, lat, lon = row.split(/\t/)
51
+ parse_geom(lat, lon)
52
+ @code = adm1
53
+ @kind = :cities
54
+ @zip = zip.split("-")[0]
55
+ end
56
+
57
+ #
58
+ # Parse Geom to float or GeoRuby Point
59
+ def parse_geom(lat, lon)
60
+ @lat, @lon = lat.to_f, lon.to_f
61
+
62
+ if defined?("GeoRuby")
63
+ @geom = GeoRuby::SimpleFeatures::Point.from_x_y(@lon, @lat)
64
+ end
65
+ end
66
+
67
+ #
68
+ # Parse Time
69
+ def updated_at
70
+ Time.utc(*@up.split("-"))
71
+ end
72
+
73
+ # For tokyo
74
+ def to_hash
75
+ { "gid" => @geoname_id.to_s, "kind" => @kind.to_s, "name" => @name, "ascii" => @ascii,
76
+ "lat" => @lat.to_s, "lon" => @lon.to_s, "tz" => @tz, "country" => @country }
77
+ end
78
+
79
+
80
+ def human_code(code)
81
+ case code
82
+ when 'ADM1' then :provinces
83
+ when 'ADM2' then :cities
84
+ else :other
85
+ end
86
+ end
87
+ end
88
+ end
@@ -1,6 +1,23 @@
1
1
  require 'features/spot'
2
2
  require 'features/country'
3
3
  require 'features/city'
4
+ require 'rubygems'
5
+ require 'logger'
6
+ require 'data/tokyo'
7
+ require 'data/postgres'
8
+ require 'work/cli'
9
+ require 'work/dump'
10
+ require 'work/export'
4
11
 
5
12
  module Geonames
13
+ Opt = {}
14
+ Cache = {:dump => [], :zip => []}
15
+ Codes = YAML.load(File.read(File.join(File.dirname(__FILE__),'config', 'codes.yml')))
16
+ VERSION = File.read(File.join(File.dirname(__FILE__), '..', 'VERSION'))
17
+
18
+ def info(txt)
19
+ if Opt[:verbose]
20
+ puts(txt)
21
+ end
22
+ end
6
23
  end
data/lib/work/cli.rb ADDED
@@ -0,0 +1,144 @@
1
+ #
2
+ # Geonames Local
3
+ #
4
+ require 'optparse'
5
+ module Geonames
6
+ class CLI
7
+
8
+ def self.parse_options(argv)
9
+ options = {}
10
+
11
+ argv.options do |opts|
12
+ opts.banner = <<BANNER
13
+ Geonames Command Line Usage:
14
+
15
+ geonames <country code(s)> <opts>
16
+
17
+ BANNER
18
+ opts.on("-l", "--level LEVEL", String, "The level of logging to report" ) { |level| options[:level] = level }
19
+ opts.on("-d", "--dump", "Dump DB before all" ) { options[:dump] = true }
20
+ opts.separator ""
21
+ opts.separator "Config file:"
22
+ opts.on("-c", "--config CONFIG", String, "Geonames Config file path" ) { |file| options[:config] = file }
23
+ opts.separator ""
24
+ opts.separator "Tyrant Options:"
25
+ opts.on("-t", "--tyrant", "Use tyrant" ) { options[:tyrant] = true }
26
+ opts.on("-s", "--server SERVER", String, "Tyrant Server URL" ) { |url| options[:server] = url }
27
+ opts.on("-p", "--port PORT", Integer, "Tyrant Server Port") { |val| options[:port] = val.to_i }
28
+ opts.separator ""
29
+ opts.separator "Common Options:"
30
+ opts.on("-h", "--help", "Show this message" ) { puts opts; exit }
31
+ opts.on("-v", "--verbose", "Turn on logging to STDOUT" ) { |bool| options[:verbose] = bool }
32
+ opts.on("-V", "--version", "Show version") { puts Geonames::VERSION; exit }
33
+ opts.separator ""
34
+ begin
35
+ opts.parse!
36
+ if argv.empty? && !options[:config]
37
+ puts opts
38
+ exit
39
+ end
40
+ rescue
41
+ puts opts
42
+ exit
43
+ end
44
+ end
45
+ options
46
+ end
47
+ private_class_method :parse_options
48
+
49
+ def self.work(argv)
50
+ trap(:INT) { stop! }
51
+ trap(:TERM) { stop! }
52
+ Opt.merge! parse_options(argv)
53
+
54
+ if Opt[:config]
55
+ Opt.merge! YAML.load(File.read(Opt[:config]))
56
+ end
57
+
58
+ if argv[0] =~ /list|codes/
59
+ Codes.each do |key,val|
60
+ str = [val.values, key.to_s].join(" ").downcase
61
+ if s = argv[1]
62
+ next unless str =~ /#{s.downcase}/
63
+ end
64
+ puts "#{val[:en_us]}: #{key}"
65
+ end
66
+ exit
67
+ end
68
+
69
+ if argv[0] =~ /scaff|conf/
70
+ fname = (argv[1] || "geonames") + ".yml"
71
+ if File.exist?(fname)
72
+ puts "File exists."
73
+ else
74
+ puts "Writing to #{fname}"
75
+ `cp #{File.join(File.dirname(__FILE__), '..', 'config', 'geonames.yml')} #{fname}`
76
+ end
77
+ exit
78
+ end
79
+ require "geo_ruby" if Opt[:mapping] && Opt[:mapping][:geom]
80
+
81
+ if argv[0] =~ /csv|json/
82
+ Geonames::Export.new(Country.all).to_csv
83
+ else
84
+ Geonames::Dump.work(Opt[:codes], :zip) #rescue puts "Command not found: #{comm} #{@usage}"
85
+ Geonames::Dump.work(Opt[:codes], :dump) #rescue puts "Command not found: #{comm} #{@usage}"
86
+ info "\n---\nTotal #{Cache[:dump].length} parsed. #{Cache[:zip].length} zips."
87
+ info "Join dump << zip"
88
+ unify!
89
+ write_to_store!
90
+ end
91
+ end
92
+
93
+ def self.write_to_store!
94
+ db = case Opt[:store].to_sym
95
+ when :tyrant then Geonames::Tokyo.new(Opt[:tyrant])
96
+ when :pg then Geonames::Postgres.new(Opt[:db])
97
+ else
98
+ info "No store defined!"
99
+ exit
100
+ end
101
+
102
+ groups = Cache[:dump].group_by(&:kind)
103
+ Cache[:provinces] = groups[:provinces]
104
+ # ensure this order....
105
+ do_write(db, groups[:provinces])
106
+ do_write(db, groups[:cities])
107
+ end
108
+
109
+ def self.do_write(db, val)
110
+ key = val[0].kind
111
+ start = Time.now
112
+ writt = 0
113
+ info "\nWriting #{key}..."
114
+ val.each do |v|
115
+ unless db.find v.kind, v.gid
116
+ db.insert v
117
+ writt += 1
118
+ end
119
+ end
120
+ total = Time.now - start
121
+ info "#{writt} #{key} written in #{total} sec (#{(writt/total).to_i}/s)"
122
+ end
123
+
124
+ def self.unify!
125
+ start = Time.now
126
+ Cache[:dump].map! do |spot|
127
+ if other = Cache[:zip].find { |d| d.code == spot.code }
128
+ spot.zip = other.zip
129
+ spot
130
+ else
131
+ spot
132
+ end
133
+ end
134
+ info "Done. #{(Time.now-start).to_i}s"
135
+ end
136
+
137
+ def self.stop!
138
+ puts "Closing Geonames..."
139
+ exit
140
+ end
141
+
142
+ end
143
+
144
+ end
data/lib/work/dump.rb ADDED
@@ -0,0 +1,76 @@
1
+ module Geonames
2
+ class Dump
3
+ URL = "http://download.geonames.org/export/"
4
+ TMP = "/tmp/geonames/"
5
+
6
+ def initialize(codes, kind)
7
+ @codes = codes
8
+ @kind = kind
9
+ if codes.respond_to? :each
10
+ for code in codes
11
+ info "\nWorking on #{kind} for #{code}"
12
+ file = get_file(code)
13
+ download file
14
+ uncompress file unless code == "country"
15
+ parse file
16
+ end
17
+ end
18
+
19
+ end
20
+
21
+ def get_file(code)
22
+ code == "country" ? "countryInfo.txt" : "#{code.upcase}.zip"
23
+ end
24
+
25
+ def download(file)
26
+ Dir.mkdir(TMP) unless File.exists?(TMP)
27
+ Dir.mkdir(TMP + @kind.to_s) unless File.exists?(TMP + @kind.to_s)
28
+ fname = TMP + "#{@kind}/#{file}"
29
+ return if File.exists?(fname)
30
+ `curl #{URL}/#{@kind}/#{file} -o #{fname}`
31
+ end
32
+
33
+ def uncompress(file)
34
+ info "Uncompressing #{file}"
35
+ `unzip -quo /tmp/geonames/#{@kind}/#{file} -d /tmp/geonames/#{@kind}`
36
+ end
37
+
38
+ def parse_line(l)
39
+ return if l =~ /^#|^iso/i
40
+ if @kind == :dump
41
+ if l =~ /^\D/
42
+ Country.parse(l)
43
+ else
44
+ if Opt[:level] != "all"
45
+ return unless l =~ /ADM\d/ # ADM2 => cities
46
+ end
47
+ end
48
+ end
49
+ Spot.new(l, @kind)
50
+ end
51
+
52
+ def parse(file)
53
+ red = 0
54
+ start = Time.now
55
+ File.open("/tmp/geonames/#{@kind}/#{file.gsub("zip", "txt")}") do |f|
56
+ while line = f.gets
57
+ if record = parse_line(line)
58
+ Cache[@kind] << record
59
+ red += 1
60
+ end
61
+ end
62
+ total = Time.now - start
63
+ info "#{red} #{@kind} entries parsed in #{total} sec (#{(red/total).to_i}/s)"
64
+ end
65
+ rescue Errno::ENOENT => e
66
+ info "Failed to download #{file}, skipping."
67
+ end
68
+
69
+
70
+ def self.work(codes=:all, kind=:dump)
71
+ new(codes, kind)
72
+ end
73
+
74
+
75
+ end
76
+ end
@@ -0,0 +1,21 @@
1
+ require 'csv'
2
+
3
+ module Geonames
4
+ class Export
5
+
6
+ def initialize(data)
7
+ info "Starting export.."
8
+ @data = data
9
+ end
10
+
11
+ def to_csv
12
+ file = "export.csv"
13
+ info "Writing #{file} (#{@data.length} objects)"
14
+ CSV.open("export.csv", 'w') do |csv|
15
+ csv << @data[0].export_header
16
+ @data.each { |o| csv << o.export }
17
+ end
18
+ info "Export done."
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,29 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ describe "Tokyo" do
4
+
5
+ it "should write down a spot" do
6
+ t = Geonames::Tokyo.new('localhost', 1978)
7
+ m = mock(Geonames::Spot, { :gid => 888, :to_hash => { "gid" => 888, "kind" => "city", "lat" => 5.5 }})
8
+ t.write(m)
9
+ end
10
+
11
+ it "should read it up" do
12
+ t = Geonames::Tokyo.new('localhost', 1978)
13
+ record = t.find(888)
14
+ record.should be_instance_of Geonames::Spot
15
+ end
16
+
17
+ it "should not duplicate" do
18
+ t = Geonames::Tokyo.new('localhost', 1978)
19
+ t.all({ :gid => 888}).length.should eql(1)
20
+ end
21
+
22
+ it "should return all countries" do
23
+ all = Geonames::Country.all
24
+ all.should be_instance_of Array
25
+ all[0].should be_instance_of Geonames::Country
26
+ all[0].gid.should eql(1)
27
+ end
28
+
29
+ end
@@ -5,7 +5,7 @@ describe "GeonamesLocal" do
5
5
 
6
6
  describe "Parsing" do
7
7
  before do
8
- @spot = Geonames::Spot.new("6319037\tMaxaranguape\tMaxaranguape\t-5.46874226086957\t-35.3565714695652\tA\tADM2\tBR\t22\t2407500\t6593\t12\tAmerica/Recife\t2006-12-17")
8
+ @spot = Geonames::Spot.new("6319037\tMaxaranguape\tMaxaranguape\t\t-5.46874226086957\t-35.3565714695652\tA\tADM2\tBR\t22\t2407500\t6593\t\t12\t\t\t\tAmerica/Recife\t2006-12-17")
9
9
  end
10
10
 
11
11
  it "should parse geoid integer" do
@@ -41,6 +41,42 @@ describe "GeonamesLocal" do
41
41
  end
42
42
  end
43
43
 
44
+ describe "More Parseing" do
45
+ before do
46
+ @spot = Geonames::Spot.new("3384862\tRiacho Zuza\tRiacho Zuza\t\t-9.4333333\t-37.6666667\tH\tSTMI\tBR\t\t02\t\t\t\t0\t\t241\tAmerica/Maceio\t1993-12-17\n")
47
+ end
48
+
49
+ it "should parse geoid integer" do
50
+ @spot.geoname_id.should eql(3384862)
51
+ end
52
+
53
+ it "should parse name" do
54
+ @spot.name.should eql("Riacho Zuza")
55
+ @spot.ascii.should eql("Riacho Zuza")
56
+ end
57
+
58
+ it "should parse geostuff" do
59
+ @spot.lat.should be_close(-9.4333333, 0.001)
60
+ @spot.lon.should be_close(-37.6666667, 0.001)
61
+ end
62
+
63
+ it "should parse spot kind" do
64
+ @spot.kind.should eql(:other)
65
+ end
66
+
67
+ it "should parse spot country" do
68
+ @spot.country.should eql("BR")
69
+ end
70
+
71
+ it "shuold parse timezone" do
72
+ @spot.tz.should eql("America/Maceio")
73
+ end
74
+
75
+ it "should parse updated_at" do
76
+ @spot.updated_at.should be_instance_of(Time)
77
+ @spot.updated_at.day.should eql(17)
78
+ end
79
+ end
44
80
  end
45
81
 
46
82
  # 6319037 Maxaranguape Maxaranguape -5.46874226086957 -35.3565714695652 A ADM2 BR 22 2407500 6593 12 America/Recife 2006-12-17
data/task/benchmark.rb ADDED
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # DB => br, cl ~ 6k objects
4
+ #
5
+ require 'benchmark'
6
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
7
+ require 'geonames_local'
8
+
9
+ # @db.flush
10
+
11
+ def b1(db)
12
+ puts "#{db.count} Objects"
13
+ Benchmark.bmbm do |b|
14
+ b.report("All Country") { db.all({ :kind => "country" })}
15
+ b.report("Find by GID") { db.find(888) }
16
+ b.report("Find by name") { db.all({ :name => "Maxaranguape"}) }
17
+ b.report("Find on country") { db.all({ :country => "CL"}) }
18
+ end
19
+ end
20
+
21
+ print "Tyrant => "
22
+ b1(Geonames::Tokyo.new(:tyrant))
23
+
24
+
25
+ print "Cabinet => "
26
+ b1(Geonames::Tokyo.new)
27
+
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # DB => br, cl ~ 6k objects
4
+ #
5
+ require 'benchmark'
6
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
7
+ require 'geonames_local'
8
+ include Geonames
9
+ # @db.flush
10
+
11
+
12
+ def b1(db)
13
+ puts "#{db.count} Objects"
14
+ Benchmark.bmbm do |b|
15
+ b.report("All Country") { Country.all }
16
+ b.report("Find by GID") { db.find(888) }
17
+ b.report("Find by name") { db.all({ :name => "Maxaranguape"}) }
18
+ b.report("Find on country") { db.all({ :country => "CL"}) }
19
+ end
20
+ end
21
+
22
+ print "Cabinet => "
23
+ b1(Tokyo.new)
24
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: geonames_local
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Marcos Piccinini
@@ -9,8 +9,8 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-12-13 00:00:00 -02:00
13
- default_executable: geoname
12
+ date: 2010-01-28 00:00:00 -02:00
13
+ default_executable: geonames
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rspec
@@ -22,10 +22,20 @@ dependencies:
22
22
  - !ruby/object:Gem::Version
23
23
  version: 1.2.9
24
24
  version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: tokyotyrant
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "1.10"
34
+ version:
25
35
  description: Dump and feed a tokyo cabinet for local geonames search
26
36
  email: x@nofxx.com
27
37
  executables:
28
- - geoname
38
+ - geonames
29
39
  extensions: []
30
40
 
31
41
  extra_rdoc_files:
@@ -37,10 +47,28 @@ files:
37
47
  - LICENSE
38
48
  - README.rdoc
39
49
  - Rakefile
50
+ - VERSION
51
+ - bin/geonames
52
+ - lib/config/codes.yml
53
+ - lib/config/geonames.yml
54
+ - lib/data/geoquery.rb
55
+ - lib/data/postgres.rb
56
+ - lib/data/tokyo.rb
57
+ - lib/features/city.rb
58
+ - lib/features/country.rb
59
+ - lib/features/province.rb
60
+ - lib/features/road.rb
61
+ - lib/features/spot.rb
40
62
  - lib/geonames_local.rb
63
+ - lib/work/cli.rb
64
+ - lib/work/dump.rb
65
+ - lib/work/export.rb
66
+ - spec/data/tokyo_spec.rb
41
67
  - spec/geonames_local_spec.rb
42
68
  - spec/spec.opts
43
69
  - spec/spec_helper.rb
70
+ - task/benchmark.rb
71
+ - task/benchmark_cabinet.rb
44
72
  has_rdoc: true
45
73
  homepage: http://github.com/nofxx/geonames_local
46
74
  licenses: []
@@ -70,5 +98,6 @@ signing_key:
70
98
  specification_version: 3
71
99
  summary: Dump and feed a tokyo local geonames db
72
100
  test_files:
73
- - spec/spec_helper.rb
101
+ - spec/data/tokyo_spec.rb
74
102
  - spec/geonames_local_spec.rb
103
+ - spec/spec_helper.rb
data/bin/geoname DELETED
File without changes