worlddb 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest.txt CHANGED
@@ -27,7 +27,8 @@ lib/worlddb/models/tagdb/tagging.rb
27
27
  lib/worlddb/models/usage.rb
28
28
  lib/worlddb/patterns.rb
29
29
  lib/worlddb/reader.rb
30
- lib/worlddb/readers/base.rb
30
+ lib/worlddb/reader_file.rb
31
+ lib/worlddb/reader_zip.rb
31
32
  lib/worlddb/readers/city.rb
32
33
  lib/worlddb/readers/country.rb
33
34
  lib/worlddb/readers/lang.rb
data/README.md CHANGED
@@ -2,8 +2,8 @@
2
2
 
3
3
  world.db Command Line Tool in Ruby
4
4
 
5
- * home :: [github.com/geraldb/world.db.ruby](https://github.com/geraldb/world.db.ruby)
6
- * bugs :: [github.com/geraldb/world.db.ruby/issues](https://github.com/geraldb/world.db.ruby/issues)
5
+ * home :: [github.com/worlddb/world.db.ruby](https://github.com/worlddb/world.db.ruby)
6
+ * bugs :: [github.com/worlddb/world.db.ruby/issues](https://github.com/worlddb/world.db.ruby/issues)
7
7
  * gem :: [rubygems.org/gems/worlddb](https://rubygems.org/gems/worlddb)
8
8
  * rdoc :: [rubydoc.info/gems/worlddb](http://rubydoc.info/gems/worlddb)
9
9
  * forum :: [groups.google.com/group/openmundi](https://groups.google.com/group/openmundi)
data/Rakefile CHANGED
@@ -5,23 +5,24 @@ require './lib/worlddb/version.rb'
5
5
  Hoe.spec 'worlddb' do
6
6
 
7
7
  self.version = WorldDb::VERSION
8
-
8
+
9
9
  self.summary = "worlddb - world.db command line tool"
10
10
  self.description = summary
11
11
 
12
- self.urls = ['https://github.com/geraldb/world.db.ruby']
13
-
12
+ self.urls = ['https://github.com/worlddb/world.db.ruby']
13
+
14
14
  self.author = 'Gerald Bauer'
15
15
  self.email = 'openmundi@googlegroups.com'
16
16
 
17
17
  self.extra_deps = [
18
18
  ['props'], # settings / prop(ertie)s / env / INI
19
19
  ['logutils'], # logging
20
- ['textutils', '>= 0.9.4'], # e.g. >= 0.6 && <= 1.0 ## will include logutils, props
20
+ ['textutils', '>= 0.9.9'], # e.g. >= 0.6 && <= 1.0 ## will include logutils, props
21
21
  ['tagutils'], # tags n categories for activerecord
22
22
 
23
23
  ## 3rd party
24
24
  ['gli', '>= 2.9'],
25
+ ['rubyzip'], ## todo: pull in via textutils ??
25
26
  ['activerecord'] # NB: will include activesupport,etc.
26
27
  ### ['sqlite3', '~> 1.3'] # NB: install on your own; remove dependency
27
28
  ]
data/lib/worlddb.rb CHANGED
@@ -5,15 +5,20 @@
5
5
 
6
6
  # core and stlibs
7
7
 
8
- require 'yaml'
9
8
  require 'pp'
10
9
  require 'logger'
11
10
  require 'optparse'
12
11
  require 'fileutils'
12
+ require 'uri'
13
13
  require 'erb'
14
+ require 'json'
15
+ require 'yaml'
16
+
14
17
 
15
18
  # 3rd party gems / libs
16
19
 
20
+ require 'zip'
21
+
17
22
  require 'active_record' ## todo: add sqlite3? etc.
18
23
 
19
24
  require 'logutils'
@@ -52,7 +57,6 @@ require 'worlddb/models/tagdb/tagging'
52
57
  require 'worlddb/schema' # NB: requires worlddb/models (include WorldDB::Models)
53
58
  require 'worlddb/matcher'
54
59
 
55
- require 'worlddb/readers/base'
56
60
  require 'worlddb/readers/lang'
57
61
  require 'worlddb/readers/usage'
58
62
  require 'worlddb/readers/country'
@@ -60,6 +64,8 @@ require 'worlddb/readers/region'
60
64
  require 'worlddb/readers/city'
61
65
 
62
66
  require 'worlddb/reader'
67
+ require 'worlddb/reader_file'
68
+ require 'worlddb/reader_zip'
63
69
  require 'worlddb/deleter'
64
70
  require 'worlddb/stats'
65
71
  require 'worlddb/stats_comp'
@@ -67,14 +73,6 @@ require 'worlddb/stats_comp'
67
73
 
68
74
  module WorldDb
69
75
 
70
- def self.banner
71
- "worlddb/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
72
- end
73
-
74
- def self.root
75
- "#{File.expand_path( File.dirname(File.dirname(__FILE__)) )}"
76
- end
77
-
78
76
  def self.main
79
77
  require 'worlddb/cli/main'
80
78
  ## Runner.new.run(ARGV) - old code
@@ -99,6 +97,12 @@ module WorldDb
99
97
  reader.load_setup( setup )
100
98
  end
101
99
 
100
+ def self.read_setup_from_zip( zip_name, setup, include_path, opts={} ) ## todo/check - use a better (shorter) name ??
101
+ reader = ZipReader.new( zip_name, include_path, opts )
102
+ reader.load_setup( setup )
103
+ reader.close
104
+ end
105
+
102
106
  def self.read_all( include_path, opts={} ) # load all builtins (using plain text reader); helper for convenience
103
107
  read_setup( 'setups/all', include_path, opts )
104
108
  end # method read_all
@@ -129,14 +133,9 @@ module WorldDb
129
133
 
130
134
  end # module WorldDb
131
135
 
132
- ###########################################
133
- # fix: remove old alias for WorldDb ??
134
- WorldDB = WorldDb
135
-
136
136
 
137
137
  if __FILE__ == $0
138
138
  WorldDb.main
139
139
  else
140
- # say hello
141
- puts WorldDb.banner
142
- end
140
+ puts WorldDb.banner # say hello
141
+ end
@@ -97,20 +97,31 @@ module Matcher
97
97
  end
98
98
 
99
99
  def match_regions_for_country( name, &blk )
100
- ## todo: check if there's a better (more ruby way) to pass along code block ??
101
- match_xxx_for_country( name, 'regions', &blk )
100
+ ## also try synonyms e.g. old regions (if not match for states)
101
+ found = match_xxx_for_country( name, 'states', &blk )
102
+ found = match_xxx_for_country( name, 'regions', &blk ) unless found
103
+ found
102
104
  end
103
105
 
104
- def match_regions_abbr_for_country( name, &blk )
105
- match_xxx_for_country( name, 'regions\.abbr', &blk ) # NB: . gets escaped for regex, that is, \.
106
+ def match_regions_abbr_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
107
+ ## also try synonyms e.g. old regions (if not match for states)
108
+ found = match_xxx_for_country( name, 'states\.abbr', &blk )
109
+ found = match_xxx_for_country( name, 'regions\.abbr', &blk ) unless found
110
+ found
106
111
  end
107
112
 
108
113
  def match_regions_iso_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
109
- match_xxx_for_country( name, 'regions\.iso', &blk )
114
+ ## also try synonyms e.g. old regions (if not match for states)
115
+ found = match_xxx_for_country( name, 'states\.iso', &blk )
116
+ found = match_xxx_for_country( name, 'regions\.iso', &blk ) unless found
117
+ found
110
118
  end
111
119
 
112
120
  def match_regions_nuts_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
113
- match_xxx_for_country( name, 'regions\.nuts', &blk )
121
+ ## also try synonyms e.g. old regions (if not match for states)
122
+ found = match_xxx_for_country( name, 'states\.nuts', &blk )
123
+ found = match_xxx_for_country( name, 'regions\.nuts', &blk ) unless found
124
+ found
114
125
  end
115
126
 
116
127
 
@@ -3,7 +3,7 @@
3
3
  module WorldDb
4
4
 
5
5
 
6
- class Reader
6
+ class ReaderBase
7
7
 
8
8
  include LogUtils::Logging
9
9
 
@@ -14,19 +14,13 @@ class Reader
14
14
 
15
15
  ## value helpers e.g. is_year?, is_taglist? etc.
16
16
  include TextUtils::ValueHelper
17
-
18
17
 
19
18
 
20
- attr_reader :include_path
21
-
22
19
  def skip_tags?() @skip_tags == true; end
23
20
  def strict?() @strict == true; end
24
21
 
25
22
 
26
- def initialize( include_path, opts = {} )
27
-
28
- @include_path = include_path
29
-
23
+ def initialize( opts={} )
30
24
  ## option: do NOT generate/add any tags for countries/regions/cities
31
25
  @skip_tags = opts[:skip_tags].present? ? true : false
32
26
  ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
@@ -35,11 +29,7 @@ class Reader
35
29
 
36
30
 
37
31
  def load_setup( name )
38
- path = "#{include_path}/#{name}.txt"
39
-
40
- logger.info "parsing data '#{name}' (#{path})..."
41
-
42
- reader = FixtureReader.new( path )
32
+ reader = create_fixture_reader( name )
43
33
 
44
34
  reader.each do |fixture|
45
35
  load( fixture )
@@ -56,11 +46,13 @@ class Reader
56
46
  elsif name =~ /^lang/
57
47
  ## todo: pass along opts too
58
48
  ## use match_usage( name ) - why? why not?? ???
59
- LangReader.new( include_path ).read( name )
49
+ r = create_lang_reader( name )
50
+ r.read()
60
51
  elsif name =~ /\/lang/
61
52
  ## todo: pass along opts too
62
53
  ## use match_usage( name ) - why? why not?? ???
63
- UsageReader.new( include_path ).read( name )
54
+ r = create_usage_reader( name )
55
+ r.read()
64
56
  elsif name =~ /\/fifa/
65
57
  load_xxx( 'fifa', name )
66
58
  elsif name =~ /\/iso3/
@@ -72,22 +64,27 @@ class Reader
72
64
  elsif name =~ /^tag.*\.\d$/
73
65
  ## todo: pass along opts too
74
66
  ## use match_tags( name ) - why? why not?? ???
75
- TagDb::TagReader.new( include_path ).read( name )
67
+
68
+ ######## FIX: add back again
69
+ ### fix: use read() only, that is, w/o name
70
+ ## r = create_tag_reader( name )
71
+ ## r.read()
76
72
  elsif match_countries_for_continent( name ) do |continent| # # e.g. africa/countries or america/countries
77
73
  ### NB: continent changed to regions (e.g. middle-east, caribbean, north-america, etc.)
78
74
  ## auto-add continent (from folder structure) as tag
79
75
  ## fix: allow dash/hyphen/minus in tag
80
76
 
81
- r = CountryReader.new( include_path )
82
- r.read( name, tags: continent.tr('-', '_') )
77
+ ### todo/fix: add opts - how??
78
+ r = create_country_reader( name, tags: continent.tr('-', '_') )
79
+ r.read()
83
80
  end
84
81
  elsif match_cities_for_country( name ) do |country_key| # name =~ /\/([a-z]{2})\/cities/
85
82
  ## auto-add required country code (from folder structure)
86
83
  country = Country.find_by_key!( country_key )
87
84
  logger.debug "Country #{country.key} >#{country.title} (#{country.code})<"
88
85
 
89
- r = CityReader.new( include_path )
90
- r.read( name, country_id: country.id )
86
+ r = create_city_reader( name, country_id: country.id )
87
+ r.read()
91
88
  end
92
89
  elsif match_regions_abbr_for_country( name ) do |country_key| # name =~ /\/([a-z]{2})\/regions\.abbr/
93
90
  load_regions_xxx( country_key, 'abbr', name )
@@ -103,8 +100,8 @@ class Reader
103
100
  country = Country.find_by_key!( country_key )
104
101
  logger.debug "Country #{country.key} >#{country.title} (#{country.code})<"
105
102
 
106
- r = RegionReader.new( include_path )
107
- r.read( name, country_id: country.id )
103
+ r = create_region_reader( name, country_id: country.id )
104
+ r.read()
108
105
  end
109
106
  else
110
107
  logger.error "unknown world.db fixture type >#{name}<"
@@ -118,7 +115,7 @@ class Reader
118
115
  country = Country.find_by_key!( country_key )
119
116
  logger.debug "Country #{country.key} >#{country.title} (#{country.code})<"
120
117
 
121
- reader = HashReaderV2.new( name, include_path )
118
+ reader = create_hash_reader( name )
122
119
 
123
120
  reader.each do |key, value|
124
121
  region = Region.find_by_country_id_and_key!( country.id, key )
@@ -130,7 +127,7 @@ class Reader
130
127
 
131
128
  ### use ContinentRefReader
132
129
  def load_continent_refs( name )
133
- reader = HashReaderV2.new( name, include_path )
130
+ reader = create_hash_reader( name )
134
131
 
135
132
  reader.each do |key, value|
136
133
  country = Country.find_by_key!( key )
@@ -142,7 +139,7 @@ class Reader
142
139
 
143
140
  ### use ContinentDef Reader
144
141
  def load_continent_defs( name, more_attribs={} )
145
- reader = ValuesReaderV2.new( name, include_path, more_attribs )
142
+ reader = create_values_reader( name, more_attribs )
146
143
 
147
144
  reader.each_line do |attribs, values|
148
145
 
@@ -168,7 +165,7 @@ class Reader
168
165
 
169
166
  ### use CountryAttr Reader
170
167
  def load_xxx( xxx, name )
171
- reader = HashReaderV2.new( name, include_path )
168
+ reader = create_hash_reader( name )
172
169
 
173
170
  reader.each do |key, value|
174
171
  country = Country.find_by_key!( key )
@@ -177,5 +174,5 @@ class Reader
177
174
  end
178
175
  end
179
176
 
180
- end # class Reader
177
+ end # class ReaderBase
181
178
  end # module WorldDb
@@ -0,0 +1,80 @@
1
+ # encoding: UTF-8
2
+
3
+ module WorldDb
4
+
5
+ class Reader < ReaderBase
6
+
7
+ def initialize( include_path, opts={} )
8
+ super( opts )
9
+
10
+ @include_path = include_path
11
+ end
12
+
13
+
14
+ def create_fixture_reader( name )
15
+ path = "#{@include_path}/#{name}.txt"
16
+ logger.info "parsing data (setup) '#{name}' (#{path})..."
17
+
18
+ FixtureReader.from_file( path )
19
+ end
20
+
21
+ def create_lang_reader( name )
22
+ path = "#{@include_path}/#{name}.yml" ## hash reader - use .yml??
23
+ logger.info "parsing data (lang) '#{name}' (#{path})..."
24
+
25
+ LangReader.from_file( path )
26
+ end
27
+
28
+ def create_usage_reader( name )
29
+ path = "#{@include_path}/#{name}.yml" ## hash reader - use .yml??
30
+ logger.info "parsing data (usage) '#{name}' (#{path})..."
31
+
32
+ UsageReader.from_file( path )
33
+ end
34
+
35
+
36
+ def create_country_reader( name, more_attribs={} )
37
+ path = "#{@include_path}/#{name}.txt"
38
+ logger.info "parsing data (country) '#{name}' (#{path})..."
39
+
40
+ CountryReader.from_file( path, more_attribs )
41
+ end
42
+
43
+ def create_region_reader( name, more_attribs={} )
44
+ path = "#{@include_path}/#{name}.txt"
45
+ logger.info "parsing data (region) '#{name}' (#{path})..."
46
+
47
+ RegionReader.from_file( path, more_attribs )
48
+ end
49
+
50
+ def create_city_reader( name, more_attribs={} )
51
+ path = "#{@include_path}/#{name}.txt"
52
+ logger.info "parsing data (city) '#{name}' (#{path})..."
53
+
54
+ CityReader.from_file( path, more_attribs )
55
+ end
56
+
57
+
58
+ def create_hash_reader( name )
59
+ path = "#{@include_path}/#{name}.yml"
60
+ logger.info "parsing data (hash) '#{name}' (#{path})..."
61
+
62
+ HashReader.from_file( path )
63
+ end
64
+
65
+ def create_values_reader( name, more_attribs={} )
66
+ path = "#{@include_path}/#{name}.txt"
67
+ logger.info "parsing data (values) '#{name}' (#{path})..."
68
+
69
+ ValuesReader.from_file( path, more_attribs )
70
+ end
71
+
72
+
73
+ # def create_tag_reader( name )
74
+ # ## fix: change to new from_file() style
75
+ # TagDb::TagReader.new( @include_path )
76
+ # end
77
+
78
+
79
+ end # class Reader
80
+ end # module WorldDb
@@ -0,0 +1,154 @@
1
+ # encoding: UTF-8
2
+
3
+ module WorldDb
4
+
5
+ class ZipReader < ReaderBase
6
+
7
+
8
+ def initialize( name, include_path, opts = {} )
9
+ super( opts )
10
+
11
+ ## todo/fix: make include_path an opts (included in opts?) - why? why not??
12
+ path = "#{include_path}/#{name}.zip"
13
+
14
+ ## todo: check if zip exists
15
+ @zip_file = Zip::File.open( path ) ## NOTE: do NOT create if file is missing; let it crash
16
+
17
+ ### allow prefix (path) in name
18
+ ### e.g. assume all files relative to setup manifest
19
+ ## e.g. at-austria-master/setups/all.txt or
20
+ ## be-belgium-master/setups/all.txt
21
+ ## for
22
+ ## setups/all.txt
23
+ ###
24
+ ## will get (re)set w/ fixture/setup reader
25
+ ##
26
+ ## todo/fix: change/rename to @relative_path ?? - why? why not?
27
+ @zip_prefix = ''
28
+ end
29
+
30
+ def close
31
+ ## todo/check: add a close method - why? why not ???
32
+ @zip_file.close
33
+ end
34
+
35
+
36
+
37
+ def create_fixture_reader( name )
38
+ ## e.g. pass in => setups/all or setups/test etc. e.g. w/o .txt extension
39
+ query = "**/#{name}.txt"
40
+
41
+ ## note: returns an array of Zip::Entry
42
+ candidates = @zip_file.glob( query )
43
+ pp candidates
44
+
45
+ ## use first candidates entry as match
46
+ ## todo/fix: issue warning if more than one entries/matches!!
47
+
48
+ ## get fullpath e.g. at-austria-master/setups/all.txt
49
+ path = candidates[0].name
50
+ logger.debug " zip entry path >>#{path}<<"
51
+
52
+ ## cut-off at-austria-master/ NOTE: includes trailing slash (if present)
53
+ ## logger.debug " path.size #{path.size} >>#{path}<<"
54
+ ## logger.debug " name.size #{name.size+4} >>#{name}<<"
55
+
56
+ ## note: add +4 for extension (.txt)
57
+ @zip_prefix = path[ 0...(path.size-(name.size+4)) ]
58
+ logger.debug " zip entry prefix >>#{@zip_prefix}<<"
59
+
60
+ logger.info "parsing data (setup) in zip '#{name}' (#{path})..."
61
+
62
+ FixtureReader.from_zip( @zip_file, path )
63
+ end
64
+
65
+
66
+ def create_lang_reader( name )
67
+ path = name_to_zip_entry_path( name, '.yml' ) ## hash reader - use .yml??
68
+ logger.info "parsing data (lang) in zip '#{name}' (#{path})..."
69
+
70
+ LangReader.from_zip( @zip_file, path )
71
+ end
72
+
73
+ def create_usage_reader( name )
74
+ path = name_to_zip_entry_path( name, '.yml' ) ## hash reader - use .yml??
75
+ logger.info "parsing data (usage) in zip '#{name}' (#{path})..."
76
+
77
+ UsageReader.from_zip( @zip_file, path )
78
+ end
79
+
80
+
81
+ def create_country_reader( name, more_attribs={} )
82
+ path = name_to_zip_entry_path( name )
83
+ logger.info "parsing data (country) in zip '#{name}' (#{path})..."
84
+
85
+ CountryReader.from_zip( @zip_file, path, more_attribs )
86
+ end
87
+
88
+ def create_region_reader( name, more_attribs={} )
89
+ path = name_to_zip_entry_path( name )
90
+ logger.info "parsing data (region) in zip '#{name}' (#{path})..."
91
+
92
+ RegionReader.from_zip( @zip_file, path, more_attribs )
93
+ end
94
+
95
+ def create_city_reader( name, more_attribs={} )
96
+ path = name_to_zip_entry_path( name )
97
+ logger.info "parsing data (city) in zip '#{name}' (#{path})..."
98
+
99
+ CityReader.from_zip( @zip_file, path, more_attribs )
100
+ end
101
+
102
+
103
+ def create_hash_reader( name )
104
+ path = name_to_zip_entry_path( name, '.yml' ) ## hash reader - use .yml??
105
+ logger.info "parsing data (hash) in zip '#{name}' (#{path})..."
106
+
107
+ HashReader.from_zip( @zip_file, path )
108
+ end
109
+
110
+ def create_values_reader( name, more_attribs={} )
111
+ path = name_to_zip_entry_path( name )
112
+ logger.info "parsing data (values) in zip '#{name}' (#{path})..."
113
+
114
+ ValuesReader.from_zip( @zip_file, path, more_attribs )
115
+ end
116
+
117
+
118
+ # def create_tag_reader( name )
119
+ # ## fix: change to new from_file() style
120
+ # TagDb::TagReader.new( @include_path )
121
+ # end
122
+
123
+ private
124
+
125
+ def path_to_real_path( path )
126
+ # map name to name_real_path
127
+ # name might include !/ for virtual path (gets cut off)
128
+ # e.g. at-austria!/w-wien/beers becomse w-wien/beers
129
+ pos = path.index( '!/')
130
+ if pos.nil?
131
+ path # not found; real path is the same as name
132
+ else
133
+ # cut off everything until !/ e.g.
134
+ # at-austria!/w-wien/beers becomes
135
+ # w-wien/beers
136
+ path[ (pos+2)..-1 ]
137
+ end
138
+ end
139
+
140
+ def name_to_zip_entry_path( name, extension='.txt' )
141
+ path = "#{name}#{extension}"
142
+
143
+ real_path = path_to_real_path( path )
144
+
145
+ # NOTE: add possible zip entry prefix path
146
+ # (if present includes trailing slash e.g. /)
147
+ entry_path = "#{@zip_prefix}#{real_path}"
148
+ entry_path
149
+ end
150
+
151
+
152
+
153
+ end # class ZipReader
154
+ end # module WorldDb
@@ -2,10 +2,73 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class CityReader < BaseReader
5
+ class CityReader
6
6
 
7
- def read( name, more_attribs={} )
8
- reader = ValuesReaderV2.new( name, include_path, more_attribs )
7
+ include LogUtils::Logging
8
+
9
+ ## make models available by default with namespace
10
+ # e.g. lets you use Usage instead of Model::Usage
11
+ include Models
12
+
13
+ ## value helpers e.g. is_year?, is_taglist? etc.
14
+ include TextUtils::ValueHelper
15
+
16
+
17
+ def self.from_zip( zip_file, entry_path, more_attribs={} )
18
+ ## get text content from zip
19
+
20
+ entry = zip_file.find_entry( entry_path )
21
+
22
+ ## todo/fix: add force encoding to utf-8 ??
23
+ ## check!!!
24
+ ## clean/prepprocess lines
25
+ ## e.g. CR/LF (/r/n) to LF (e.g. /n)
26
+ text = entry.get_input_stream().read()
27
+
28
+ ## NOTE: needs logger ref; only available in instance methods; use global logger for now
29
+ logger = LogUtils::Logger.root
30
+ logger.debug "text.encoding.name (before): #{text.encoding.name}"
31
+ #####
32
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
33
+ ## NB:
34
+ # for now "hardcoded" to utf8 - what else can we do?
35
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
36
+ text = text.force_encoding( Encoding::UTF_8 )
37
+ logger.debug "text.encoding.name (after): #{text.encoding.name}"
38
+
39
+ ## todo:
40
+ # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
41
+ ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
42
+
43
+ self.from_string( text, more_attribs )
44
+ end
45
+
46
+
47
+ def self.from_file( path, more_attribs={} )
48
+ ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
49
+ ## - see textutils/utils.rb
50
+ text = File.read_utf8( path )
51
+ self.from_string( text, more_attribs )
52
+ end
53
+
54
+ def self.from_string( text, more_attribs={} )
55
+ CityReader.new( text, more_attribs )
56
+ end
57
+
58
+
59
+ def skip_tags?() @skip_tags == true; end
60
+ def strict?() @strict == true; end
61
+
62
+ def initialize( text, more_attribs={} )
63
+ ## todo/fix: how to add opts={} ???
64
+
65
+ @text = text
66
+ @more_attribs = more_attribs
67
+ end
68
+
69
+
70
+ def read()
71
+ reader = ValuesReader.from_string( @text, @more_attribs )
9
72
 
10
73
  reader.each_line do |attribs, values|
11
74
  opts = { skip_tags: skip_tags? }
@@ -2,10 +2,71 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class CountryReader < BaseReader
5
+ class CountryReader
6
6
 
7
- def read( name, more_attribs={} )
8
- reader = ValuesReaderV2.new( name, include_path, more_attribs )
7
+ include LogUtils::Logging
8
+
9
+ ## make models available by default with namespace
10
+ # e.g. lets you use Usage instead of Model::Usage
11
+ include Models
12
+
13
+ ## value helpers e.g. is_year?, is_taglist? etc.
14
+ include TextUtils::ValueHelper
15
+
16
+
17
+ def self.from_zip( zip_file, entry_path, more_attribs={} )
18
+ ## get text content from zip
19
+
20
+ entry = zip_file.find_entry( entry_path )
21
+
22
+ ## todo/fix: add force encoding to utf-8 ??
23
+ ## check!!!
24
+ ## clean/prepprocess lines
25
+ ## e.g. CR/LF (/r/n) to LF (e.g. /n)
26
+ text = entry.get_input_stream().read()
27
+
28
+ ## NOTE: needs logger ref; only available in instance methods; use global logger for now
29
+ logger = LogUtils::Logger.root
30
+ logger.debug "text.encoding.name (before): #{text.encoding.name}"
31
+ #####
32
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
33
+ ## NB:
34
+ # for now "hardcoded" to utf8 - what else can we do?
35
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
36
+ text = text.force_encoding( Encoding::UTF_8 )
37
+ logger.debug "text.encoding.name (after): #{text.encoding.name}"
38
+
39
+ ## todo:
40
+ # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
41
+ ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
42
+
43
+ self.from_string( text, more_attribs )
44
+ end
45
+
46
+ def self.from_file( path, more_attribs={} )
47
+ ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
48
+ ## - see textutils/utils.rb
49
+ text = File.read_utf8( path )
50
+ self.from_string( text, more_attribs )
51
+ end
52
+
53
+ def self.from_string( text, more_attribs={} )
54
+ CountryReader.new( text, more_attribs )
55
+ end
56
+
57
+
58
+ def skip_tags?() @skip_tags == true; end
59
+ def strict?() @strict == true; end
60
+
61
+ def initialize( text, more_attribs={} )
62
+ ## todo/fix: how to add opts={} ???
63
+
64
+ @text = text
65
+ @more_attribs = more_attribs
66
+ end
67
+
68
+ def read()
69
+ reader = ValuesReader.from_string( @text, @more_attribs )
9
70
 
10
71
  reader.each_line do |attribs, values|
11
72
  opts = { skip_tags: skip_tags? }
@@ -2,10 +2,74 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class LangReader < BaseReader
5
+ class LangReader
6
6
 
7
- def read( name )
8
- reader = HashReaderV2.new( name, include_path )
7
+ include LogUtils::Logging
8
+
9
+ ## make models available by default with namespace
10
+ # e.g. lets you use Usage instead of Model::Usage
11
+ include Models
12
+
13
+ ## value helpers e.g. is_year?, is_taglist? etc.
14
+ include TextUtils::ValueHelper
15
+
16
+
17
+ ## todo: add opts={} etc.
18
+ def self.from_zip( zip_file, entry_path )
19
+ ## get text content from zip
20
+
21
+ entry = zip_file.find_entry( entry_path )
22
+
23
+ ## todo/fix: add force encoding to utf-8 ??
24
+ ## check!!!
25
+ ## clean/prepprocess lines
26
+ ## e.g. CR/LF (/r/n) to LF (e.g. /n)
27
+ text = entry.get_input_stream().read()
28
+
29
+ ## NOTE: needs logger ref; only available in instance methods; use global logger for now
30
+ logger = LogUtils::Logger.root
31
+ logger.debug "text.encoding.name (before): #{text.encoding.name}"
32
+ #####
33
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
34
+ ## NB:
35
+ # for now "hardcoded" to utf8 - what else can we do?
36
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
37
+ text = text.force_encoding( Encoding::UTF_8 )
38
+ logger.debug "text.encoding.name (after): #{text.encoding.name}"
39
+
40
+ ## todo:
41
+ # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
42
+ ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
43
+
44
+ self.from_string( text )
45
+ end
46
+
47
+ def self.from_file( path, opts={} )
48
+ ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
49
+ ## - see textutils/utils.rb
50
+ text = File.read_utf8( path )
51
+ self.from_string( text, opts )
52
+ end
53
+
54
+ def self.from_string( text, opts={} )
55
+ LangReader.new( text, opts )
56
+ end
57
+
58
+
59
+ def skip_tags?() @skip_tags == true; end
60
+ def strict?() @strict == true; end
61
+
62
+ def initialize( text, opts={} )
63
+ @text = text
64
+
65
+ ## option: do NOT generate/add any tags for countries/regions/cities
66
+ @skip_tags = opts[:skip_tags].present? ? true : false
67
+ ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
68
+ @strict = opts[:strict].present? ? true : false
69
+ end
70
+
71
+ def read()
72
+ reader = HashReader.from_string( @text )
9
73
 
10
74
  reader.each do |key, value|
11
75
 
@@ -2,10 +2,72 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class RegionReader < BaseReader
5
+ class RegionReader
6
6
 
7
- def read( name, more_attribs={} )
8
- reader = ValuesReaderV2.new( name, include_path, more_attribs )
7
+ include LogUtils::Logging
8
+
9
+ ## make models available by default with namespace
10
+ # e.g. lets you use Usage instead of Model::Usage
11
+ include Models
12
+
13
+ ## value helpers e.g. is_year?, is_taglist? etc.
14
+ include TextUtils::ValueHelper
15
+
16
+
17
+ def self.from_zip( zip_file, entry_path, more_attribs={} )
18
+ ## get text content from zip
19
+
20
+ entry = zip_file.find_entry( entry_path )
21
+
22
+ ## todo/fix: add force encoding to utf-8 ??
23
+ ## check!!!
24
+ ## clean/prepprocess lines
25
+ ## e.g. CR/LF (/r/n) to LF (e.g. /n)
26
+ text = entry.get_input_stream().read()
27
+
28
+ ## NOTE: needs logger ref; only available in instance methods; use global logger for now
29
+ logger = LogUtils::Logger.root
30
+ logger.debug "text.encoding.name (before): #{text.encoding.name}"
31
+ #####
32
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
33
+ ## NB:
34
+ # for now "hardcoded" to utf8 - what else can we do?
35
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
36
+ text = text.force_encoding( Encoding::UTF_8 )
37
+ logger.debug "text.encoding.name (after): #{text.encoding.name}"
38
+
39
+ ## todo:
40
+ # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
41
+ ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
42
+
43
+ self.from_string( text, more_attribs )
44
+ end
45
+
46
+ def self.from_file( path, more_attribs={} )
47
+ ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
48
+ ## - see textutils/utils.rb
49
+ text = File.read_utf8( path )
50
+ self.from_string( text, more_attribs )
51
+ end
52
+
53
+ def self.from_string( text, more_attribs={} )
54
+ RegionReader.new( text, more_attribs )
55
+ end
56
+
57
+
58
+ def skip_tags?() @skip_tags == true; end
59
+ def strict?() @strict == true; end
60
+
61
+ def initialize( text, more_attribs={} )
62
+ ## todo/fix: how to add opts={} ???
63
+
64
+ @text = text
65
+ @more_attribs = more_attribs
66
+ end
67
+
68
+
69
+ def read()
70
+ reader = ValuesReader.from_string( @text, @more_attribs )
9
71
 
10
72
  reader.each_line do |attribs, values|
11
73
  opts = { skip_tags: skip_tags? }
@@ -2,10 +2,73 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class UsageReader < BaseReader
5
+ class UsageReader
6
6
 
7
- def read( name )
8
- reader = HashReaderV2.new( name, include_path )
7
+ include LogUtils::Logging
8
+
9
+ ## make models available by default with namespace
10
+ # e.g. lets you use Usage instead of Model::Usage
11
+ include Models
12
+
13
+ ## value helpers e.g. is_year?, is_taglist? etc.
14
+ include TextUtils::ValueHelper
15
+
16
+ ## todo: add opts
17
+ def self.from_zip( zip_file, entry_path )
18
+ ## get text content from zip
19
+
20
+ entry = zip_file.find_entry( entry_path )
21
+
22
+ ## todo/fix: add force encoding to utf-8 ??
23
+ ## check!!!
24
+ ## clean/prepprocess lines
25
+ ## e.g. CR/LF (/r/n) to LF (e.g. /n)
26
+ text = entry.get_input_stream().read()
27
+
28
+ ## NOTE: needs logger ref; only available in instance methods; use global logger for now
29
+ logger = LogUtils::Logger.root
30
+ logger.debug "text.encoding.name (before): #{text.encoding.name}"
31
+ #####
32
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
33
+ ## NB:
34
+ # for now "hardcoded" to utf8 - what else can we do?
35
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
36
+ text = text.force_encoding( Encoding::UTF_8 )
37
+ logger.debug "text.encoding.name (after): #{text.encoding.name}"
38
+
39
+ ## todo:
40
+ # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
41
+ ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
42
+
43
+ self.from_string( text )
44
+ end
45
+
46
+ def self.from_file( path, opts={} )
47
+ ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
48
+ ## - see textutils/utils.rb
49
+ text = File.read_utf8( path )
50
+ self.from_string( text, opts )
51
+ end
52
+
53
+ def self.from_string( text, opts={} )
54
+ UsageReader.new( text, opts )
55
+ end
56
+
57
+
58
+ def skip_tags?() @skip_tags == true; end
59
+ def strict?() @strict == true; end
60
+
61
+ def initialize( text, opts={} )
62
+ @text = text
63
+
64
+ ## option: do NOT generate/add any tags for countries/regions/cities
65
+ @skip_tags = opts[:skip_tags].present? ? true : false
66
+ ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
67
+ @strict = opts[:strict].present? ? true : false
68
+ end
69
+
70
+ def read()
71
+ reader = HashReader.from_string( @text )
9
72
 
10
73
  reader.each do |key, value|
11
74
 
@@ -1,6 +1,23 @@
1
1
 
2
2
  module WorldDb
3
- VERSION = '2.0.4' # sync version w/ sport.db - why? why not?
4
- end
5
3
 
4
+ # sync version w/ sport.db n friends - why? why not?
5
+ MAJOR = 2 ## todo: namespace inside version or something - why? why not??
6
+ MINOR = 0
7
+ PATCH = 5
8
+ VERSION = [MAJOR,MINOR,PATCH].join('.')
9
+
10
+ def self.version
11
+ VERSION
12
+ end
13
+
14
+ def self.banner
15
+ "worlddb/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
16
+ end
17
+
18
+ def self.root
19
+ "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
20
+ end
21
+
22
+ end
6
23
 
data/test/helper.rb CHANGED
@@ -6,18 +6,6 @@
6
6
  # require 'minitest/unit'
7
7
  require 'minitest/autorun'
8
8
 
9
- # include MiniTest::Unit # lets us use TestCase instead of MiniTest::Unit::TestCase
10
-
11
-
12
- # ruby stdlibs
13
-
14
- require 'json'
15
- require 'uri'
16
- require 'pp'
17
-
18
- # ruby gems
19
-
20
- require 'active_record'
21
9
 
22
10
  # our own code
23
11
 
@@ -3,7 +3,7 @@
3
3
  require 'helper'
4
4
 
5
5
 
6
- class TestFixtureMatchers < MiniTest::Unit::TestCase
6
+ class TestFixtureMatchers < MiniTest::Test
7
7
 
8
8
  include WorldDb::Matcher
9
9
 
@@ -3,7 +3,7 @@
3
3
 
4
4
  require 'helper'
5
5
 
6
- class TestModelCity < MiniTest::Unit::TestCase
6
+ class TestModelCity < MiniTest::Test
7
7
 
8
8
  def setup
9
9
  # delete all countries, regions, cities in in-memory only db
@@ -3,7 +3,7 @@
3
3
 
4
4
  require 'helper'
5
5
 
6
- class TestModelComp < MiniTest::Unit::TestCase
6
+ class TestModelComp < MiniTest::Test
7
7
 
8
8
  def setup
9
9
  # delete all countries, regions, cities in in-memory only db
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'helper'
4
4
 
5
- class TestModelCountry < MiniTest::Unit::TestCase
5
+ class TestModelCountry < MiniTest::Test
6
6
 
7
7
  def setup
8
8
  # delete all countries, regions, cities in in-memory only db
@@ -3,7 +3,7 @@
3
3
 
4
4
  require 'helper'
5
5
 
6
- class TestModelRegion < MiniTest::Unit::TestCase
6
+ class TestModelRegion < MiniTest::Test
7
7
 
8
8
  def setup
9
9
  # delete all countries, regions, cities in in-memory only db
data/test/test_models.rb CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  require 'helper'
5
5
 
6
- class TestModels < MiniTest::Unit::TestCase
6
+ class TestModels < MiniTest::Test
7
7
 
8
8
  def setup
9
9
  # delete all countries, regions, cities in in-memory only db
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: worlddb
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.4
4
+ version: 2.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-04-15 00:00:00.000000000 Z
12
+ date: 2014-11-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: props
16
- requirement: &85578640 !ruby/object:Gem::Requirement
16
+ requirement: &75136710 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *85578640
24
+ version_requirements: *75136710
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: logutils
27
- requirement: &85576130 !ruby/object:Gem::Requirement
27
+ requirement: &75136360 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,21 +32,21 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *85576130
35
+ version_requirements: *75136360
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: textutils
38
- requirement: &85575800 !ruby/object:Gem::Requirement
38
+ requirement: &75135990 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
42
42
  - !ruby/object:Gem::Version
43
- version: 0.9.4
43
+ version: 0.9.9
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *85575800
46
+ version_requirements: *75135990
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: tagutils
49
- requirement: &85575530 !ruby/object:Gem::Requirement
49
+ requirement: &75135720 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *85575530
57
+ version_requirements: *75135720
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: gli
60
- requirement: &85574960 !ruby/object:Gem::Requirement
60
+ requirement: &75135460 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,21 @@ dependencies:
65
65
  version: '2.9'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *85574960
68
+ version_requirements: *75135460
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubyzip
71
+ requirement: &75135230 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: *75135230
69
80
  - !ruby/object:Gem::Dependency
70
81
  name: activerecord
71
- requirement: &85574780 !ruby/object:Gem::Requirement
82
+ requirement: &75134990 !ruby/object:Gem::Requirement
72
83
  none: false
73
84
  requirements:
74
85
  - - ! '>='
@@ -76,10 +87,10 @@ dependencies:
76
87
  version: '0'
77
88
  type: :runtime
78
89
  prerelease: false
79
- version_requirements: *85574780
90
+ version_requirements: *75134990
80
91
  - !ruby/object:Gem::Dependency
81
92
  name: rdoc
82
- requirement: &85574380 !ruby/object:Gem::Requirement
93
+ requirement: &75132620 !ruby/object:Gem::Requirement
83
94
  none: false
84
95
  requirements:
85
96
  - - ~>
@@ -87,18 +98,18 @@ dependencies:
87
98
  version: '4.0'
88
99
  type: :development
89
100
  prerelease: false
90
- version_requirements: *85574380
101
+ version_requirements: *75132620
91
102
  - !ruby/object:Gem::Dependency
92
103
  name: hoe
93
- requirement: &85573790 !ruby/object:Gem::Requirement
104
+ requirement: &75132200 !ruby/object:Gem::Requirement
94
105
  none: false
95
106
  requirements:
96
107
  - - ~>
97
108
  - !ruby/object:Gem::Version
98
- version: '3.11'
109
+ version: '3.13'
99
110
  type: :development
100
111
  prerelease: false
101
- version_requirements: *85573790
112
+ version_requirements: *75132200
102
113
  description: worlddb - world.db command line tool
103
114
  email: openmundi@googlegroups.com
104
115
  executables:
@@ -138,7 +149,8 @@ files:
138
149
  - lib/worlddb/models/usage.rb
139
150
  - lib/worlddb/patterns.rb
140
151
  - lib/worlddb/reader.rb
141
- - lib/worlddb/readers/base.rb
152
+ - lib/worlddb/reader_file.rb
153
+ - lib/worlddb/reader_zip.rb
142
154
  - lib/worlddb/readers/city.rb
143
155
  - lib/worlddb/readers/country.rb
144
156
  - lib/worlddb/readers/lang.rb
@@ -156,7 +168,7 @@ files:
156
168
  - test/test_model_region.rb
157
169
  - test/test_models.rb
158
170
  - .gemtest
159
- homepage: https://github.com/geraldb/world.db.ruby
171
+ homepage: https://github.com/worlddb/world.db.ruby
160
172
  licenses:
161
173
  - Public Domain
162
174
  post_install_message:
@@ -1,41 +0,0 @@
1
- # encoding: UTF-8
2
-
3
- module WorldDb
4
-
5
-
6
- class BaseReader
7
-
8
- include LogUtils::Logging
9
-
10
- ## make models available by default with namespace
11
- # e.g. lets you use Usage instead of Model::Usage
12
- include Models
13
-
14
- ## value helpers e.g. is_year?, is_taglist? etc.
15
- include TextUtils::ValueHelper
16
-
17
-
18
- attr_reader :include_path
19
-
20
- def skip_tags?() @skip_tags == true; end
21
- def strict?() @strict == true; end
22
-
23
-
24
- def initialize( include_path, opts = {} )
25
-
26
- @include_path = include_path
27
-
28
- ## option: do NOT generate/add any tags for countries/regions/cities
29
- @skip_tags = opts[:skip_tags].present? ? true : false
30
- ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
31
- @strict = opts[:strict].present? ? true : false
32
- end
33
-
34
-
35
- def read( name, more_attribs={} )
36
- puts "error: overwrite in concrete reader class!!!" ### overwrite!!!!
37
- end
38
-
39
-
40
- end # class BaseReader
41
- end # module WorldDb