worlddb 2.0.4 → 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest.txt CHANGED
@@ -27,7 +27,8 @@ lib/worlddb/models/tagdb/tagging.rb
27
27
  lib/worlddb/models/usage.rb
28
28
  lib/worlddb/patterns.rb
29
29
  lib/worlddb/reader.rb
30
- lib/worlddb/readers/base.rb
30
+ lib/worlddb/reader_file.rb
31
+ lib/worlddb/reader_zip.rb
31
32
  lib/worlddb/readers/city.rb
32
33
  lib/worlddb/readers/country.rb
33
34
  lib/worlddb/readers/lang.rb
data/README.md CHANGED
@@ -2,8 +2,8 @@
2
2
 
3
3
  world.db Command Line Tool in Ruby
4
4
 
5
- * home :: [github.com/geraldb/world.db.ruby](https://github.com/geraldb/world.db.ruby)
6
- * bugs :: [github.com/geraldb/world.db.ruby/issues](https://github.com/geraldb/world.db.ruby/issues)
5
+ * home :: [github.com/worlddb/world.db.ruby](https://github.com/worlddb/world.db.ruby)
6
+ * bugs :: [github.com/worlddb/world.db.ruby/issues](https://github.com/worlddb/world.db.ruby/issues)
7
7
  * gem :: [rubygems.org/gems/worlddb](https://rubygems.org/gems/worlddb)
8
8
  * rdoc :: [rubydoc.info/gems/worlddb](http://rubydoc.info/gems/worlddb)
9
9
  * forum :: [groups.google.com/group/openmundi](https://groups.google.com/group/openmundi)
data/Rakefile CHANGED
@@ -5,23 +5,24 @@ require './lib/worlddb/version.rb'
5
5
  Hoe.spec 'worlddb' do
6
6
 
7
7
  self.version = WorldDb::VERSION
8
-
8
+
9
9
  self.summary = "worlddb - world.db command line tool"
10
10
  self.description = summary
11
11
 
12
- self.urls = ['https://github.com/geraldb/world.db.ruby']
13
-
12
+ self.urls = ['https://github.com/worlddb/world.db.ruby']
13
+
14
14
  self.author = 'Gerald Bauer'
15
15
  self.email = 'openmundi@googlegroups.com'
16
16
 
17
17
  self.extra_deps = [
18
18
  ['props'], # settings / prop(ertie)s / env / INI
19
19
  ['logutils'], # logging
20
- ['textutils', '>= 0.9.4'], # e.g. >= 0.6 && <= 1.0 ## will include logutils, props
20
+ ['textutils', '>= 0.9.9'], # e.g. >= 0.6 && <= 1.0 ## will include logutils, props
21
21
  ['tagutils'], # tags n categories for activerecord
22
22
 
23
23
  ## 3rd party
24
24
  ['gli', '>= 2.9'],
25
+ ['rubyzip'], ## todo: pull in via textutils ??
25
26
  ['activerecord'] # NB: will include activesupport,etc.
26
27
  ### ['sqlite3', '~> 1.3'] # NB: install on your own; remove dependency
27
28
  ]
data/lib/worlddb.rb CHANGED
@@ -5,15 +5,20 @@
5
5
 
6
6
  # core and stlibs
7
7
 
8
- require 'yaml'
9
8
  require 'pp'
10
9
  require 'logger'
11
10
  require 'optparse'
12
11
  require 'fileutils'
12
+ require 'uri'
13
13
  require 'erb'
14
+ require 'json'
15
+ require 'yaml'
16
+
14
17
 
15
18
  # 3rd party gems / libs
16
19
 
20
+ require 'zip'
21
+
17
22
  require 'active_record' ## todo: add sqlite3? etc.
18
23
 
19
24
  require 'logutils'
@@ -52,7 +57,6 @@ require 'worlddb/models/tagdb/tagging'
52
57
  require 'worlddb/schema' # NB: requires worlddb/models (include WorldDB::Models)
53
58
  require 'worlddb/matcher'
54
59
 
55
- require 'worlddb/readers/base'
56
60
  require 'worlddb/readers/lang'
57
61
  require 'worlddb/readers/usage'
58
62
  require 'worlddb/readers/country'
@@ -60,6 +64,8 @@ require 'worlddb/readers/region'
60
64
  require 'worlddb/readers/city'
61
65
 
62
66
  require 'worlddb/reader'
67
+ require 'worlddb/reader_file'
68
+ require 'worlddb/reader_zip'
63
69
  require 'worlddb/deleter'
64
70
  require 'worlddb/stats'
65
71
  require 'worlddb/stats_comp'
@@ -67,14 +73,6 @@ require 'worlddb/stats_comp'
67
73
 
68
74
  module WorldDb
69
75
 
70
- def self.banner
71
- "worlddb/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
72
- end
73
-
74
- def self.root
75
- "#{File.expand_path( File.dirname(File.dirname(__FILE__)) )}"
76
- end
77
-
78
76
  def self.main
79
77
  require 'worlddb/cli/main'
80
78
  ## Runner.new.run(ARGV) - old code
@@ -99,6 +97,12 @@ module WorldDb
99
97
  reader.load_setup( setup )
100
98
  end
101
99
 
100
+ def self.read_setup_from_zip( zip_name, setup, include_path, opts={} ) ## todo/check - use a better (shorter) name ??
101
+ reader = ZipReader.new( zip_name, include_path, opts )
102
+ reader.load_setup( setup )
103
+ reader.close
104
+ end
105
+
102
106
  def self.read_all( include_path, opts={} ) # load all builtins (using plain text reader); helper for convenience
103
107
  read_setup( 'setups/all', include_path, opts )
104
108
  end # method read_all
@@ -129,14 +133,9 @@ module WorldDb
129
133
 
130
134
  end # module WorldDb
131
135
 
132
- ###########################################
133
- # fix: remove old alias for WorldDb ??
134
- WorldDB = WorldDb
135
-
136
136
 
137
137
  if __FILE__ == $0
138
138
  WorldDb.main
139
139
  else
140
- # say hello
141
- puts WorldDb.banner
142
- end
140
+ puts WorldDb.banner # say hello
141
+ end
@@ -97,20 +97,31 @@ module Matcher
97
97
  end
98
98
 
99
99
  def match_regions_for_country( name, &blk )
100
- ## todo: check if there's a better (more ruby way) to pass along code block ??
101
- match_xxx_for_country( name, 'regions', &blk )
100
+ ## also try synonyms e.g. old regions (if not match for states)
101
+ found = match_xxx_for_country( name, 'states', &blk )
102
+ found = match_xxx_for_country( name, 'regions', &blk ) unless found
103
+ found
102
104
  end
103
105
 
104
- def match_regions_abbr_for_country( name, &blk )
105
- match_xxx_for_country( name, 'regions\.abbr', &blk ) # NB: . gets escaped for regex, that is, \.
106
+ def match_regions_abbr_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
107
+ ## also try synonyms e.g. old regions (if not match for states)
108
+ found = match_xxx_for_country( name, 'states\.abbr', &blk )
109
+ found = match_xxx_for_country( name, 'regions\.abbr', &blk ) unless found
110
+ found
106
111
  end
107
112
 
108
113
  def match_regions_iso_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
109
- match_xxx_for_country( name, 'regions\.iso', &blk )
114
+ ## also try synonyms e.g. old regions (if not match for states)
115
+ found = match_xxx_for_country( name, 'states\.iso', &blk )
116
+ found = match_xxx_for_country( name, 'regions\.iso', &blk ) unless found
117
+ found
110
118
  end
111
119
 
112
120
  def match_regions_nuts_for_country( name, &blk ) # NB: . gets escaped for regex, that is, \.
113
- match_xxx_for_country( name, 'regions\.nuts', &blk )
121
+ ## also try synonyms e.g. old regions (if not match for states)
122
+ found = match_xxx_for_country( name, 'states\.nuts', &blk )
123
+ found = match_xxx_for_country( name, 'regions\.nuts', &blk ) unless found
124
+ found
114
125
  end
115
126
 
116
127
 
@@ -3,7 +3,7 @@
3
3
  module WorldDb
4
4
 
5
5
 
6
- class Reader
6
+ class ReaderBase
7
7
 
8
8
  include LogUtils::Logging
9
9
 
@@ -14,19 +14,13 @@ class Reader
14
14
 
15
15
  ## value helpers e.g. is_year?, is_taglist? etc.
16
16
  include TextUtils::ValueHelper
17
-
18
17
 
19
18
 
20
- attr_reader :include_path
21
-
22
19
  def skip_tags?() @skip_tags == true; end
23
20
  def strict?() @strict == true; end
24
21
 
25
22
 
26
- def initialize( include_path, opts = {} )
27
-
28
- @include_path = include_path
29
-
23
+ def initialize( opts={} )
30
24
  ## option: do NOT generate/add any tags for countries/regions/cities
31
25
  @skip_tags = opts[:skip_tags].present? ? true : false
32
26
  ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
@@ -35,11 +29,7 @@ class Reader
35
29
 
36
30
 
37
31
  def load_setup( name )
38
- path = "#{include_path}/#{name}.txt"
39
-
40
- logger.info "parsing data '#{name}' (#{path})..."
41
-
42
- reader = FixtureReader.new( path )
32
+ reader = create_fixture_reader( name )
43
33
 
44
34
  reader.each do |fixture|
45
35
  load( fixture )
@@ -56,11 +46,13 @@ class Reader
56
46
  elsif name =~ /^lang/
57
47
  ## todo: pass along opts too
58
48
  ## use match_usage( name ) - why? why not?? ???
59
- LangReader.new( include_path ).read( name )
49
+ r = create_lang_reader( name )
50
+ r.read()
60
51
  elsif name =~ /\/lang/
61
52
  ## todo: pass along opts too
62
53
  ## use match_usage( name ) - why? why not?? ???
63
- UsageReader.new( include_path ).read( name )
54
+ r = create_usage_reader( name )
55
+ r.read()
64
56
  elsif name =~ /\/fifa/
65
57
  load_xxx( 'fifa', name )
66
58
  elsif name =~ /\/iso3/
@@ -72,22 +64,27 @@ class Reader
72
64
  elsif name =~ /^tag.*\.\d$/
73
65
  ## todo: pass along opts too
74
66
  ## use match_tags( name ) - why? why not?? ???
75
- TagDb::TagReader.new( include_path ).read( name )
67
+
68
+ ######## FIX: add back again
69
+ ### fix: use read() only, that is, w/o name
70
+ ## r = create_tag_reader( name )
71
+ ## r.read()
76
72
  elsif match_countries_for_continent( name ) do |continent| # # e.g. africa/countries or america/countries
77
73
  ### NB: continent changed to regions (e.g. middle-east, caribbean, north-america, etc.)
78
74
  ## auto-add continent (from folder structure) as tag
79
75
  ## fix: allow dash/hyphen/minus in tag
80
76
 
81
- r = CountryReader.new( include_path )
82
- r.read( name, tags: continent.tr('-', '_') )
77
+ ### todo/fix: add opts - how??
78
+ r = create_country_reader( name, tags: continent.tr('-', '_') )
79
+ r.read()
83
80
  end
84
81
  elsif match_cities_for_country( name ) do |country_key| # name =~ /\/([a-z]{2})\/cities/
85
82
  ## auto-add required country code (from folder structure)
86
83
  country = Country.find_by_key!( country_key )
87
84
  logger.debug "Country #{country.key} >#{country.title} (#{country.code})<"
88
85
 
89
- r = CityReader.new( include_path )
90
- r.read( name, country_id: country.id )
86
+ r = create_city_reader( name, country_id: country.id )
87
+ r.read()
91
88
  end
92
89
  elsif match_regions_abbr_for_country( name ) do |country_key| # name =~ /\/([a-z]{2})\/regions\.abbr/
93
90
  load_regions_xxx( country_key, 'abbr', name )
@@ -103,8 +100,8 @@ class Reader
103
100
  country = Country.find_by_key!( country_key )
104
101
  logger.debug "Country #{country.key} >#{country.title} (#{country.code})<"
105
102
 
106
- r = RegionReader.new( include_path )
107
- r.read( name, country_id: country.id )
103
+ r = create_region_reader( name, country_id: country.id )
104
+ r.read()
108
105
  end
109
106
  else
110
107
  logger.error "unknown world.db fixture type >#{name}<"
@@ -118,7 +115,7 @@ class Reader
118
115
  country = Country.find_by_key!( country_key )
119
116
  logger.debug "Country #{country.key} >#{country.title} (#{country.code})<"
120
117
 
121
- reader = HashReaderV2.new( name, include_path )
118
+ reader = create_hash_reader( name )
122
119
 
123
120
  reader.each do |key, value|
124
121
  region = Region.find_by_country_id_and_key!( country.id, key )
@@ -130,7 +127,7 @@ class Reader
130
127
 
131
128
  ### use ContinentRefReader
132
129
  def load_continent_refs( name )
133
- reader = HashReaderV2.new( name, include_path )
130
+ reader = create_hash_reader( name )
134
131
 
135
132
  reader.each do |key, value|
136
133
  country = Country.find_by_key!( key )
@@ -142,7 +139,7 @@ class Reader
142
139
 
143
140
  ### use ContinentDef Reader
144
141
  def load_continent_defs( name, more_attribs={} )
145
- reader = ValuesReaderV2.new( name, include_path, more_attribs )
142
+ reader = create_values_reader( name, more_attribs )
146
143
 
147
144
  reader.each_line do |attribs, values|
148
145
 
@@ -168,7 +165,7 @@ class Reader
168
165
 
169
166
  ### use CountryAttr Reader
170
167
  def load_xxx( xxx, name )
171
- reader = HashReaderV2.new( name, include_path )
168
+ reader = create_hash_reader( name )
172
169
 
173
170
  reader.each do |key, value|
174
171
  country = Country.find_by_key!( key )
@@ -177,5 +174,5 @@ class Reader
177
174
  end
178
175
  end
179
176
 
180
- end # class Reader
177
+ end # class ReaderBase
181
178
  end # module WorldDb
@@ -0,0 +1,80 @@
1
+ # encoding: UTF-8
2
+
3
+ module WorldDb
4
+
5
+ class Reader < ReaderBase
6
+
7
+ def initialize( include_path, opts={} )
8
+ super( opts )
9
+
10
+ @include_path = include_path
11
+ end
12
+
13
+
14
+ def create_fixture_reader( name )
15
+ path = "#{@include_path}/#{name}.txt"
16
+ logger.info "parsing data (setup) '#{name}' (#{path})..."
17
+
18
+ FixtureReader.from_file( path )
19
+ end
20
+
21
+ def create_lang_reader( name )
22
+ path = "#{@include_path}/#{name}.yml" ## hash reader - use .yml??
23
+ logger.info "parsing data (lang) '#{name}' (#{path})..."
24
+
25
+ LangReader.from_file( path )
26
+ end
27
+
28
+ def create_usage_reader( name )
29
+ path = "#{@include_path}/#{name}.yml" ## hash reader - use .yml??
30
+ logger.info "parsing data (usage) '#{name}' (#{path})..."
31
+
32
+ UsageReader.from_file( path )
33
+ end
34
+
35
+
36
+ def create_country_reader( name, more_attribs={} )
37
+ path = "#{@include_path}/#{name}.txt"
38
+ logger.info "parsing data (country) '#{name}' (#{path})..."
39
+
40
+ CountryReader.from_file( path, more_attribs )
41
+ end
42
+
43
+ def create_region_reader( name, more_attribs={} )
44
+ path = "#{@include_path}/#{name}.txt"
45
+ logger.info "parsing data (region) '#{name}' (#{path})..."
46
+
47
+ RegionReader.from_file( path, more_attribs )
48
+ end
49
+
50
+ def create_city_reader( name, more_attribs={} )
51
+ path = "#{@include_path}/#{name}.txt"
52
+ logger.info "parsing data (city) '#{name}' (#{path})..."
53
+
54
+ CityReader.from_file( path, more_attribs )
55
+ end
56
+
57
+
58
+ def create_hash_reader( name )
59
+ path = "#{@include_path}/#{name}.yml"
60
+ logger.info "parsing data (hash) '#{name}' (#{path})..."
61
+
62
+ HashReader.from_file( path )
63
+ end
64
+
65
+ def create_values_reader( name, more_attribs={} )
66
+ path = "#{@include_path}/#{name}.txt"
67
+ logger.info "parsing data (values) '#{name}' (#{path})..."
68
+
69
+ ValuesReader.from_file( path, more_attribs )
70
+ end
71
+
72
+
73
+ # def create_tag_reader( name )
74
+ # ## fix: change to new from_file() style
75
+ # TagDb::TagReader.new( @include_path )
76
+ # end
77
+
78
+
79
+ end # class Reader
80
+ end # module WorldDb
@@ -0,0 +1,154 @@
1
+ # encoding: UTF-8
2
+
3
+ module WorldDb
4
+
5
+ class ZipReader < ReaderBase
6
+
7
+
8
+ def initialize( name, include_path, opts = {} )
9
+ super( opts )
10
+
11
+ ## todo/fix: make include_path an opts (included in opts?) - why? why not??
12
+ path = "#{include_path}/#{name}.zip"
13
+
14
+ ## todo: check if zip exists
15
+ @zip_file = Zip::File.open( path ) ## NOTE: do NOT create if file is missing; let it crash
16
+
17
+ ### allow prefix (path) in name
18
+ ### e.g. assume all files relative to setup manifest
19
+ ## e.g. at-austria-master/setups/all.txt or
20
+ ## be-belgium-master/setups/all.txt
21
+ ## for
22
+ ## setups/all.txt
23
+ ###
24
+ ## will get (re)set w/ fixture/setup reader
25
+ ##
26
+ ## todo/fix: change/rename to @relative_path ?? - why? why not?
27
+ @zip_prefix = ''
28
+ end
29
+
30
+ def close
31
+ ## todo/check: add a close method - why? why not ???
32
+ @zip_file.close
33
+ end
34
+
35
+
36
+
37
+ def create_fixture_reader( name )
38
+ ## e.g. pass in => setups/all or setups/test etc. e.g. w/o .txt extension
39
+ query = "**/#{name}.txt"
40
+
41
+ ## note: returns an array of Zip::Entry
42
+ candidates = @zip_file.glob( query )
43
+ pp candidates
44
+
45
+ ## use first candidates entry as match
46
+ ## todo/fix: issue warning if more than one entries/matches!!
47
+
48
+ ## get fullpath e.g. at-austria-master/setups/all.txt
49
+ path = candidates[0].name
50
+ logger.debug " zip entry path >>#{path}<<"
51
+
52
+ ## cut-off at-austria-master/ NOTE: includes trailing slash (if present)
53
+ ## logger.debug " path.size #{path.size} >>#{path}<<"
54
+ ## logger.debug " name.size #{name.size+4} >>#{name}<<"
55
+
56
+ ## note: add +4 for extension (.txt)
57
+ @zip_prefix = path[ 0...(path.size-(name.size+4)) ]
58
+ logger.debug " zip entry prefix >>#{@zip_prefix}<<"
59
+
60
+ logger.info "parsing data (setup) in zip '#{name}' (#{path})..."
61
+
62
+ FixtureReader.from_zip( @zip_file, path )
63
+ end
64
+
65
+
66
+ def create_lang_reader( name )
67
+ path = name_to_zip_entry_path( name, '.yml' ) ## hash reader - use .yml??
68
+ logger.info "parsing data (lang) in zip '#{name}' (#{path})..."
69
+
70
+ LangReader.from_zip( @zip_file, path )
71
+ end
72
+
73
+ def create_usage_reader( name )
74
+ path = name_to_zip_entry_path( name, '.yml' ) ## hash reader - use .yml??
75
+ logger.info "parsing data (usage) in zip '#{name}' (#{path})..."
76
+
77
+ UsageReader.from_zip( @zip_file, path )
78
+ end
79
+
80
+
81
+ def create_country_reader( name, more_attribs={} )
82
+ path = name_to_zip_entry_path( name )
83
+ logger.info "parsing data (country) in zip '#{name}' (#{path})..."
84
+
85
+ CountryReader.from_zip( @zip_file, path, more_attribs )
86
+ end
87
+
88
+ def create_region_reader( name, more_attribs={} )
89
+ path = name_to_zip_entry_path( name )
90
+ logger.info "parsing data (region) in zip '#{name}' (#{path})..."
91
+
92
+ RegionReader.from_zip( @zip_file, path, more_attribs )
93
+ end
94
+
95
+ def create_city_reader( name, more_attribs={} )
96
+ path = name_to_zip_entry_path( name )
97
+ logger.info "parsing data (city) in zip '#{name}' (#{path})..."
98
+
99
+ CityReader.from_zip( @zip_file, path, more_attribs )
100
+ end
101
+
102
+
103
+ def create_hash_reader( name )
104
+ path = name_to_zip_entry_path( name, '.yml' ) ## hash reader - use .yml??
105
+ logger.info "parsing data (hash) in zip '#{name}' (#{path})..."
106
+
107
+ HashReader.from_zip( @zip_file, path )
108
+ end
109
+
110
+ def create_values_reader( name, more_attribs={} )
111
+ path = name_to_zip_entry_path( name )
112
+ logger.info "parsing data (values) in zip '#{name}' (#{path})..."
113
+
114
+ ValuesReader.from_zip( @zip_file, path, more_attribs )
115
+ end
116
+
117
+
118
+ # def create_tag_reader( name )
119
+ # ## fix: change to new from_file() style
120
+ # TagDb::TagReader.new( @include_path )
121
+ # end
122
+
123
+ private
124
+
125
+ def path_to_real_path( path )
126
+ # map name to name_real_path
127
+ # name might include !/ for virtual path (gets cut off)
128
+ # e.g. at-austria!/w-wien/beers becomse w-wien/beers
129
+ pos = path.index( '!/')
130
+ if pos.nil?
131
+ path # not found; real path is the same as name
132
+ else
133
+ # cut off everything until !/ e.g.
134
+ # at-austria!/w-wien/beers becomes
135
+ # w-wien/beers
136
+ path[ (pos+2)..-1 ]
137
+ end
138
+ end
139
+
140
+ def name_to_zip_entry_path( name, extension='.txt' )
141
+ path = "#{name}#{extension}"
142
+
143
+ real_path = path_to_real_path( path )
144
+
145
+ # NOTE: add possible zip entry prefix path
146
+ # (if present includes trailing slash e.g. /)
147
+ entry_path = "#{@zip_prefix}#{real_path}"
148
+ entry_path
149
+ end
150
+
151
+
152
+
153
+ end # class ZipReader
154
+ end # module WorldDb
@@ -2,10 +2,73 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class CityReader < BaseReader
5
+ class CityReader
6
6
 
7
- def read( name, more_attribs={} )
8
- reader = ValuesReaderV2.new( name, include_path, more_attribs )
7
+ include LogUtils::Logging
8
+
9
+ ## make models available by default with namespace
10
+ # e.g. lets you use Usage instead of Model::Usage
11
+ include Models
12
+
13
+ ## value helpers e.g. is_year?, is_taglist? etc.
14
+ include TextUtils::ValueHelper
15
+
16
+
17
+ def self.from_zip( zip_file, entry_path, more_attribs={} )
18
+ ## get text content from zip
19
+
20
+ entry = zip_file.find_entry( entry_path )
21
+
22
+ ## todo/fix: add force encoding to utf-8 ??
23
+ ## check!!!
24
+ ## clean/prepprocess lines
25
+ ## e.g. CR/LF (/r/n) to LF (e.g. /n)
26
+ text = entry.get_input_stream().read()
27
+
28
+ ## NOTE: needs logger ref; only available in instance methods; use global logger for now
29
+ logger = LogUtils::Logger.root
30
+ logger.debug "text.encoding.name (before): #{text.encoding.name}"
31
+ #####
32
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
33
+ ## NB:
34
+ # for now "hardcoded" to utf8 - what else can we do?
35
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
36
+ text = text.force_encoding( Encoding::UTF_8 )
37
+ logger.debug "text.encoding.name (after): #{text.encoding.name}"
38
+
39
+ ## todo:
40
+ # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
41
+ ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
42
+
43
+ self.from_string( text, more_attribs )
44
+ end
45
+
46
+
47
+ def self.from_file( path, more_attribs={} )
48
+ ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
49
+ ## - see textutils/utils.rb
50
+ text = File.read_utf8( path )
51
+ self.from_string( text, more_attribs )
52
+ end
53
+
54
+ def self.from_string( text, more_attribs={} )
55
+ CityReader.new( text, more_attribs )
56
+ end
57
+
58
+
59
+ def skip_tags?() @skip_tags == true; end
60
+ def strict?() @strict == true; end
61
+
62
+ def initialize( text, more_attribs={} )
63
+ ## todo/fix: how to add opts={} ???
64
+
65
+ @text = text
66
+ @more_attribs = more_attribs
67
+ end
68
+
69
+
70
+ def read()
71
+ reader = ValuesReader.from_string( @text, @more_attribs )
9
72
 
10
73
  reader.each_line do |attribs, values|
11
74
  opts = { skip_tags: skip_tags? }
@@ -2,10 +2,71 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class CountryReader < BaseReader
5
+ class CountryReader
6
6
 
7
- def read( name, more_attribs={} )
8
- reader = ValuesReaderV2.new( name, include_path, more_attribs )
7
+ include LogUtils::Logging
8
+
9
+ ## make models available by default with namespace
10
+ # e.g. lets you use Usage instead of Model::Usage
11
+ include Models
12
+
13
+ ## value helpers e.g. is_year?, is_taglist? etc.
14
+ include TextUtils::ValueHelper
15
+
16
+
17
+ def self.from_zip( zip_file, entry_path, more_attribs={} )
18
+ ## get text content from zip
19
+
20
+ entry = zip_file.find_entry( entry_path )
21
+
22
+ ## todo/fix: add force encoding to utf-8 ??
23
+ ## check!!!
24
+ ## clean/prepprocess lines
25
+ ## e.g. CR/LF (/r/n) to LF (e.g. /n)
26
+ text = entry.get_input_stream().read()
27
+
28
+ ## NOTE: needs logger ref; only available in instance methods; use global logger for now
29
+ logger = LogUtils::Logger.root
30
+ logger.debug "text.encoding.name (before): #{text.encoding.name}"
31
+ #####
32
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
33
+ ## NB:
34
+ # for now "hardcoded" to utf8 - what else can we do?
35
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
36
+ text = text.force_encoding( Encoding::UTF_8 )
37
+ logger.debug "text.encoding.name (after): #{text.encoding.name}"
38
+
39
+ ## todo:
40
+ # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
41
+ ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
42
+
43
+ self.from_string( text, more_attribs )
44
+ end
45
+
46
+ def self.from_file( path, more_attribs={} )
47
+ ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
48
+ ## - see textutils/utils.rb
49
+ text = File.read_utf8( path )
50
+ self.from_string( text, more_attribs )
51
+ end
52
+
53
+ def self.from_string( text, more_attribs={} )
54
+ CountryReader.new( text, more_attribs )
55
+ end
56
+
57
+
58
+ def skip_tags?() @skip_tags == true; end
59
+ def strict?() @strict == true; end
60
+
61
+ def initialize( text, more_attribs={} )
62
+ ## todo/fix: how to add opts={} ???
63
+
64
+ @text = text
65
+ @more_attribs = more_attribs
66
+ end
67
+
68
+ def read()
69
+ reader = ValuesReader.from_string( @text, @more_attribs )
9
70
 
10
71
  reader.each_line do |attribs, values|
11
72
  opts = { skip_tags: skip_tags? }
@@ -2,10 +2,74 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class LangReader < BaseReader
5
+ class LangReader
6
6
 
7
- def read( name )
8
- reader = HashReaderV2.new( name, include_path )
7
+ include LogUtils::Logging
8
+
9
+ ## make models available by default with namespace
10
+ # e.g. lets you use Usage instead of Model::Usage
11
+ include Models
12
+
13
+ ## value helpers e.g. is_year?, is_taglist? etc.
14
+ include TextUtils::ValueHelper
15
+
16
+
17
+ ## todo: add opts={} etc.
18
+ def self.from_zip( zip_file, entry_path )
19
+ ## get text content from zip
20
+
21
+ entry = zip_file.find_entry( entry_path )
22
+
23
+ ## todo/fix: add force encoding to utf-8 ??
24
+ ## check!!!
25
+ ## clean/prepprocess lines
26
+ ## e.g. CR/LF (/r/n) to LF (e.g. /n)
27
+ text = entry.get_input_stream().read()
28
+
29
+ ## NOTE: needs logger ref; only available in instance methods; use global logger for now
30
+ logger = LogUtils::Logger.root
31
+ logger.debug "text.encoding.name (before): #{text.encoding.name}"
32
+ #####
33
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
34
+ ## NB:
35
+ # for now "hardcoded" to utf8 - what else can we do?
36
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
37
+ text = text.force_encoding( Encoding::UTF_8 )
38
+ logger.debug "text.encoding.name (after): #{text.encoding.name}"
39
+
40
+ ## todo:
41
+ # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
42
+ ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
43
+
44
+ self.from_string( text )
45
+ end
46
+
47
+ def self.from_file( path, opts={} )
48
+ ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
49
+ ## - see textutils/utils.rb
50
+ text = File.read_utf8( path )
51
+ self.from_string( text, opts )
52
+ end
53
+
54
+ def self.from_string( text, opts={} )
55
+ LangReader.new( text, opts )
56
+ end
57
+
58
+
59
+ def skip_tags?() @skip_tags == true; end
60
+ def strict?() @strict == true; end
61
+
62
+ def initialize( text, opts={} )
63
+ @text = text
64
+
65
+ ## option: do NOT generate/add any tags for countries/regions/cities
66
+ @skip_tags = opts[:skip_tags].present? ? true : false
67
+ ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
68
+ @strict = opts[:strict].present? ? true : false
69
+ end
70
+
71
+ def read()
72
+ reader = HashReader.from_string( @text )
9
73
 
10
74
  reader.each do |key, value|
11
75
 
@@ -2,10 +2,72 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class RegionReader < BaseReader
5
+ class RegionReader
6
6
 
7
- def read( name, more_attribs={} )
8
- reader = ValuesReaderV2.new( name, include_path, more_attribs )
7
+ include LogUtils::Logging
8
+
9
+ ## make models available by default with namespace
10
+ # e.g. lets you use Usage instead of Model::Usage
11
+ include Models
12
+
13
+ ## value helpers e.g. is_year?, is_taglist? etc.
14
+ include TextUtils::ValueHelper
15
+
16
+
17
+ def self.from_zip( zip_file, entry_path, more_attribs={} )
18
+ ## get text content from zip
19
+
20
+ entry = zip_file.find_entry( entry_path )
21
+
22
+ ## todo/fix: add force encoding to utf-8 ??
23
+ ## check!!!
24
+ ## clean/prepprocess lines
25
+ ## e.g. CR/LF (/r/n) to LF (e.g. /n)
26
+ text = entry.get_input_stream().read()
27
+
28
+ ## NOTE: needs logger ref; only available in instance methods; use global logger for now
29
+ logger = LogUtils::Logger.root
30
+ logger.debug "text.encoding.name (before): #{text.encoding.name}"
31
+ #####
32
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
33
+ ## NB:
34
+ # for now "hardcoded" to utf8 - what else can we do?
35
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
36
+ text = text.force_encoding( Encoding::UTF_8 )
37
+ logger.debug "text.encoding.name (after): #{text.encoding.name}"
38
+
39
+ ## todo:
40
+ # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
41
+ ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
42
+
43
+ self.from_string( text, more_attribs )
44
+ end
45
+
46
+ def self.from_file( path, more_attribs={} )
47
+ ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
48
+ ## - see textutils/utils.rb
49
+ text = File.read_utf8( path )
50
+ self.from_string( text, more_attribs )
51
+ end
52
+
53
+ def self.from_string( text, more_attribs={} )
54
+ RegionReader.new( text, more_attribs )
55
+ end
56
+
57
+
58
+ def skip_tags?() @skip_tags == true; end
59
+ def strict?() @strict == true; end
60
+
61
+ def initialize( text, more_attribs={} )
62
+ ## todo/fix: how to add opts={} ???
63
+
64
+ @text = text
65
+ @more_attribs = more_attribs
66
+ end
67
+
68
+
69
+ def read()
70
+ reader = ValuesReader.from_string( @text, @more_attribs )
9
71
 
10
72
  reader.each_line do |attribs, values|
11
73
  opts = { skip_tags: skip_tags? }
@@ -2,10 +2,73 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class UsageReader < BaseReader
5
+ class UsageReader
6
6
 
7
- def read( name )
8
- reader = HashReaderV2.new( name, include_path )
7
+ include LogUtils::Logging
8
+
9
+ ## make models available by default with namespace
10
+ # e.g. lets you use Usage instead of Model::Usage
11
+ include Models
12
+
13
+ ## value helpers e.g. is_year?, is_taglist? etc.
14
+ include TextUtils::ValueHelper
15
+
16
+ ## todo: add opts
17
+ def self.from_zip( zip_file, entry_path )
18
+ ## get text content from zip
19
+
20
+ entry = zip_file.find_entry( entry_path )
21
+
22
+ ## todo/fix: add force encoding to utf-8 ??
23
+ ## check!!!
24
+ ## clean/prepprocess lines
25
+ ## e.g. CR/LF (/r/n) to LF (e.g. /n)
26
+ text = entry.get_input_stream().read()
27
+
28
+ ## NOTE: needs logger ref; only available in instance methods; use global logger for now
29
+ logger = LogUtils::Logger.root
30
+ logger.debug "text.encoding.name (before): #{text.encoding.name}"
31
+ #####
32
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
33
+ ## NB:
34
+ # for now "hardcoded" to utf8 - what else can we do?
35
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
36
+ text = text.force_encoding( Encoding::UTF_8 )
37
+ logger.debug "text.encoding.name (after): #{text.encoding.name}"
38
+
39
+ ## todo:
40
+ # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
41
+ ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
42
+
43
+ self.from_string( text )
44
+ end
45
+
46
+ def self.from_file( path, opts={} )
47
+ ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
48
+ ## - see textutils/utils.rb
49
+ text = File.read_utf8( path )
50
+ self.from_string( text, opts )
51
+ end
52
+
53
+ def self.from_string( text, opts={} )
54
+ UsageReader.new( text, opts )
55
+ end
56
+
57
+
58
+ def skip_tags?() @skip_tags == true; end
59
+ def strict?() @strict == true; end
60
+
61
+ def initialize( text, opts={} )
62
+ @text = text
63
+
64
+ ## option: do NOT generate/add any tags for countries/regions/cities
65
+ @skip_tags = opts[:skip_tags].present? ? true : false
66
+ ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
67
+ @strict = opts[:strict].present? ? true : false
68
+ end
69
+
70
+ def read()
71
+ reader = HashReader.from_string( @text )
9
72
 
10
73
  reader.each do |key, value|
11
74
 
@@ -1,6 +1,23 @@
1
1
 
2
2
  module WorldDb
3
- VERSION = '2.0.4' # sync version w/ sport.db - why? why not?
4
- end
5
3
 
4
+ # sync version w/ sport.db n friends - why? why not?
5
+ MAJOR = 2 ## todo: namespace inside version or something - why? why not??
6
+ MINOR = 0
7
+ PATCH = 5
8
+ VERSION = [MAJOR,MINOR,PATCH].join('.')
9
+
10
+ def self.version
11
+ VERSION
12
+ end
13
+
14
+ def self.banner
15
+ "worlddb/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
16
+ end
17
+
18
+ def self.root
19
+ "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
20
+ end
21
+
22
+ end
6
23
 
data/test/helper.rb CHANGED
@@ -6,18 +6,6 @@
6
6
  # require 'minitest/unit'
7
7
  require 'minitest/autorun'
8
8
 
9
- # include MiniTest::Unit # lets us use TestCase instead of MiniTest::Unit::TestCase
10
-
11
-
12
- # ruby stdlibs
13
-
14
- require 'json'
15
- require 'uri'
16
- require 'pp'
17
-
18
- # ruby gems
19
-
20
- require 'active_record'
21
9
 
22
10
  # our own code
23
11
 
@@ -3,7 +3,7 @@
3
3
  require 'helper'
4
4
 
5
5
 
6
- class TestFixtureMatchers < MiniTest::Unit::TestCase
6
+ class TestFixtureMatchers < MiniTest::Test
7
7
 
8
8
  include WorldDb::Matcher
9
9
 
@@ -3,7 +3,7 @@
3
3
 
4
4
  require 'helper'
5
5
 
6
- class TestModelCity < MiniTest::Unit::TestCase
6
+ class TestModelCity < MiniTest::Test
7
7
 
8
8
  def setup
9
9
  # delete all countries, regions, cities in in-memory only db
@@ -3,7 +3,7 @@
3
3
 
4
4
  require 'helper'
5
5
 
6
- class TestModelComp < MiniTest::Unit::TestCase
6
+ class TestModelComp < MiniTest::Test
7
7
 
8
8
  def setup
9
9
  # delete all countries, regions, cities in in-memory only db
@@ -2,7 +2,7 @@
2
2
 
3
3
  require 'helper'
4
4
 
5
- class TestModelCountry < MiniTest::Unit::TestCase
5
+ class TestModelCountry < MiniTest::Test
6
6
 
7
7
  def setup
8
8
  # delete all countries, regions, cities in in-memory only db
@@ -3,7 +3,7 @@
3
3
 
4
4
  require 'helper'
5
5
 
6
- class TestModelRegion < MiniTest::Unit::TestCase
6
+ class TestModelRegion < MiniTest::Test
7
7
 
8
8
  def setup
9
9
  # delete all countries, regions, cities in in-memory only db
data/test/test_models.rb CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  require 'helper'
5
5
 
6
- class TestModels < MiniTest::Unit::TestCase
6
+ class TestModels < MiniTest::Test
7
7
 
8
8
  def setup
9
9
  # delete all countries, regions, cities in in-memory only db
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: worlddb
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.4
4
+ version: 2.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-04-15 00:00:00.000000000 Z
12
+ date: 2014-11-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: props
16
- requirement: &85578640 !ruby/object:Gem::Requirement
16
+ requirement: &75136710 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *85578640
24
+ version_requirements: *75136710
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: logutils
27
- requirement: &85576130 !ruby/object:Gem::Requirement
27
+ requirement: &75136360 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,21 +32,21 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *85576130
35
+ version_requirements: *75136360
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: textutils
38
- requirement: &85575800 !ruby/object:Gem::Requirement
38
+ requirement: &75135990 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
42
42
  - !ruby/object:Gem::Version
43
- version: 0.9.4
43
+ version: 0.9.9
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *85575800
46
+ version_requirements: *75135990
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: tagutils
49
- requirement: &85575530 !ruby/object:Gem::Requirement
49
+ requirement: &75135720 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *85575530
57
+ version_requirements: *75135720
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: gli
60
- requirement: &85574960 !ruby/object:Gem::Requirement
60
+ requirement: &75135460 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,21 @@ dependencies:
65
65
  version: '2.9'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *85574960
68
+ version_requirements: *75135460
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubyzip
71
+ requirement: &75135230 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ type: :runtime
78
+ prerelease: false
79
+ version_requirements: *75135230
69
80
  - !ruby/object:Gem::Dependency
70
81
  name: activerecord
71
- requirement: &85574780 !ruby/object:Gem::Requirement
82
+ requirement: &75134990 !ruby/object:Gem::Requirement
72
83
  none: false
73
84
  requirements:
74
85
  - - ! '>='
@@ -76,10 +87,10 @@ dependencies:
76
87
  version: '0'
77
88
  type: :runtime
78
89
  prerelease: false
79
- version_requirements: *85574780
90
+ version_requirements: *75134990
80
91
  - !ruby/object:Gem::Dependency
81
92
  name: rdoc
82
- requirement: &85574380 !ruby/object:Gem::Requirement
93
+ requirement: &75132620 !ruby/object:Gem::Requirement
83
94
  none: false
84
95
  requirements:
85
96
  - - ~>
@@ -87,18 +98,18 @@ dependencies:
87
98
  version: '4.0'
88
99
  type: :development
89
100
  prerelease: false
90
- version_requirements: *85574380
101
+ version_requirements: *75132620
91
102
  - !ruby/object:Gem::Dependency
92
103
  name: hoe
93
- requirement: &85573790 !ruby/object:Gem::Requirement
104
+ requirement: &75132200 !ruby/object:Gem::Requirement
94
105
  none: false
95
106
  requirements:
96
107
  - - ~>
97
108
  - !ruby/object:Gem::Version
98
- version: '3.11'
109
+ version: '3.13'
99
110
  type: :development
100
111
  prerelease: false
101
- version_requirements: *85573790
112
+ version_requirements: *75132200
102
113
  description: worlddb - world.db command line tool
103
114
  email: openmundi@googlegroups.com
104
115
  executables:
@@ -138,7 +149,8 @@ files:
138
149
  - lib/worlddb/models/usage.rb
139
150
  - lib/worlddb/patterns.rb
140
151
  - lib/worlddb/reader.rb
141
- - lib/worlddb/readers/base.rb
152
+ - lib/worlddb/reader_file.rb
153
+ - lib/worlddb/reader_zip.rb
142
154
  - lib/worlddb/readers/city.rb
143
155
  - lib/worlddb/readers/country.rb
144
156
  - lib/worlddb/readers/lang.rb
@@ -156,7 +168,7 @@ files:
156
168
  - test/test_model_region.rb
157
169
  - test/test_models.rb
158
170
  - .gemtest
159
- homepage: https://github.com/geraldb/world.db.ruby
171
+ homepage: https://github.com/worlddb/world.db.ruby
160
172
  licenses:
161
173
  - Public Domain
162
174
  post_install_message:
@@ -1,41 +0,0 @@
1
- # encoding: UTF-8
2
-
3
- module WorldDb
4
-
5
-
6
- class BaseReader
7
-
8
- include LogUtils::Logging
9
-
10
- ## make models available by default with namespace
11
- # e.g. lets you use Usage instead of Model::Usage
12
- include Models
13
-
14
- ## value helpers e.g. is_year?, is_taglist? etc.
15
- include TextUtils::ValueHelper
16
-
17
-
18
- attr_reader :include_path
19
-
20
- def skip_tags?() @skip_tags == true; end
21
- def strict?() @strict == true; end
22
-
23
-
24
- def initialize( include_path, opts = {} )
25
-
26
- @include_path = include_path
27
-
28
- ## option: do NOT generate/add any tags for countries/regions/cities
29
- @skip_tags = opts[:skip_tags].present? ? true : false
30
- ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
31
- @strict = opts[:strict].present? ? true : false
32
- end
33
-
34
-
35
- def read( name, more_attribs={} )
36
- puts "error: overwrite in concrete reader class!!!" ### overwrite!!!!
37
- end
38
-
39
-
40
- end # class BaseReader
41
- end # module WorldDb