worlddb-models 2.2.2 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +31 -13
  3. data/README.md +7 -7
  4. data/Rakefile +1 -1
  5. data/lib/worlddb/deleter.rb +6 -1
  6. data/lib/worlddb/helpers/value_helper.rb +117 -0
  7. data/lib/worlddb/matcher.rb +99 -135
  8. data/lib/worlddb/matcher_adm.rb +82 -0
  9. data/lib/worlddb/models/city.rb +30 -208
  10. data/lib/worlddb/models/city_base.rb +220 -0
  11. data/lib/worlddb/models/continent.rb +9 -0
  12. data/lib/worlddb/models/country.rb +21 -4
  13. data/lib/worlddb/models/forward.rb +25 -9
  14. data/lib/worlddb/models/lang.rb +6 -0
  15. data/lib/worlddb/models/place.rb +1 -1
  16. data/lib/worlddb/models/state.rb +83 -0
  17. data/lib/worlddb/models/{region.rb → state_base.rb} +52 -36
  18. data/lib/worlddb/models/tagdb/tag.rb +1 -1
  19. data/lib/worlddb/models.rb +11 -8
  20. data/lib/worlddb/patterns.rb +4 -4
  21. data/lib/worlddb/reader.rb +68 -39
  22. data/lib/worlddb/reader_file.rb +36 -3
  23. data/lib/worlddb/reader_zip.rb +33 -3
  24. data/lib/worlddb/readers/base.rb +149 -0
  25. data/lib/worlddb/readers/city.rb +2 -65
  26. data/lib/worlddb/readers/country.rb +2 -63
  27. data/lib/worlddb/readers/lang.rb +3 -68
  28. data/lib/worlddb/readers/state.rb +61 -0
  29. data/lib/worlddb/readers/state_tree.rb +118 -0
  30. data/lib/worlddb/readers/usage.rb +2 -65
  31. data/lib/worlddb/schema.rb +142 -43
  32. data/lib/worlddb/stats.rb +7 -4
  33. data/lib/worlddb/tree_reader.rb +97 -0
  34. data/lib/worlddb/version.rb +2 -2
  35. data/test/adm/test_fixture_matcher_adm2.rb +73 -0
  36. data/test/{test_fixture_matcher_adm3.rb → adm/test_fixture_matcher_adm3.rb} +6 -6
  37. data/test/adm/test_fixture_matcher_tree.rb +52 -0
  38. data/test/{test_read_adm.rb → adm/test_read_adm.rb} +13 -20
  39. data/test/adm/test_read_tree.rb +63 -0
  40. data/test/data/at-austria/2--n-niederoesterreich/counties.txt +6 -4
  41. data/test/data/at-austria/orte.txt +23 -0
  42. data/test/data/at-austria/setups/tree.txt +9 -0
  43. data/test/data/de-deutschland/3--by-bayern/4--oberfranken/counties.txt +14 -13
  44. data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt +104 -0
  45. data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt +17 -0
  46. data/test/data/de-deutschland/3--by-bayern/{districts.txt → parts.txt} +1 -1
  47. data/test/data/de-deutschland/orte.txt +12 -0
  48. data/test/data/de-deutschland/setups/adm.txt +1 -1
  49. data/test/data/de-deutschland/setups/tree.txt +9 -0
  50. data/test/helper.rb +8 -1
  51. data/test/test_fixture_matchers.rb +9 -10
  52. data/test/test_fixture_matchers_ii.rb +20 -19
  53. data/test/test_model_city.rb +26 -9
  54. data/test/{test_model_comp.rb → test_model_compat.rb} +15 -13
  55. data/test/test_model_country.rb +1 -1
  56. data/test/test_model_state.rb +54 -0
  57. data/test/test_model_states_at.rb +111 -0
  58. data/test/test_model_states_de.rb +147 -0
  59. data/test/test_models.rb +10 -3
  60. data/test/test_parse_city.rb +70 -0
  61. data/test/test_parse_country.rb +56 -0
  62. data/test/test_parse_state.rb +46 -0
  63. data/test/test_state_tree_reader_at.rb +54 -0
  64. data/test/test_state_tree_reader_de.rb +71 -0
  65. data/test/test_tree_reader.rb +39 -0
  66. metadata +50 -22
  67. data/lib/worlddb/models/city_compat.rb +0 -27
  68. data/lib/worlddb/models/continent_compat.rb +0 -24
  69. data/lib/worlddb/models/country_compat.rb +0 -35
  70. data/lib/worlddb/models/lang_compat.rb +0 -23
  71. data/lib/worlddb/models/region_compat.rb +0 -26
  72. data/lib/worlddb/readers/region.rb +0 -79
  73. data/test/test_fixture_matcher_adm2.rb +0 -62
  74. data/test/test_model_region.rb +0 -50
@@ -0,0 +1,149 @@
1
+ # encoding: UTF-8
2
+
3
+ module WorldDb
4
+
5
+
6
+ ###
7
+ # todo/fix:
8
+ # try to merge ReaderBaseWithMoreAttribs and ReaderBaseWithOpts into one base - why? why not?
9
+
10
+
11
+ class ReaderBaseWithMoreAttribs
12
+
13
+ include LogUtils::Logging
14
+
15
+ ## make models available by default with namespace
16
+ # e.g. lets you use Usage instead of Model::Usage
17
+ include Models
18
+
19
+ ## value helpers e.g. is_year?, is_taglist? etc.
20
+ include TextUtils::ValueHelper
21
+
22
+
23
+ def self.from_zip( zip_file, entry_path, more_attribs={} )
24
+ ## get text content from zip
25
+
26
+ entry = zip_file.find_entry( entry_path )
27
+
28
+ ## todo/fix: add force encoding to utf-8 ??
29
+ ## check!!!
30
+ ## clean/prepprocess lines
31
+ ## e.g. CR/LF (/r/n) to LF (e.g. /n)
32
+ text = entry.get_input_stream().read()
33
+
34
+ ## NOTE: needs logger ref; only available in instance methods; use global logger for now
35
+ logger = LogUtils::Logger.root
36
+ logger.debug "text.encoding.name (before): #{text.encoding.name}"
37
+ #####
38
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
39
+ ## NB:
40
+ # for now "hardcoded" to utf8 - what else can we do?
41
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
42
+ text = text.force_encoding( Encoding::UTF_8 )
43
+ logger.debug "text.encoding.name (after): #{text.encoding.name}"
44
+
45
+ ## todo:
46
+ # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
47
+ ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
48
+
49
+ self.from_string( text, more_attribs )
50
+ end
51
+
52
+ def self.from_file( path, more_attribs={} )
53
+ ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
54
+ ## - see textutils/utils.rb
55
+ text = File.read_utf8( path )
56
+ self.from_string( text, more_attribs )
57
+ end
58
+
59
+ def self.from_string( text, more_attribs={} )
60
+ puts "[debug] ReaderBase.from_string calling #{self.name}.new" # note: assume self is derived class (object)
61
+ self.new( text, more_attribs )
62
+ end
63
+
64
+
65
+ def skip_tags?() @skip_tags == true; end
66
+ def strict?() @strict == true; end
67
+
68
+ def initialize( text, more_attribs={} )
69
+ ## todo/fix: how to add opts={} ???
70
+
71
+ @text = text
72
+ @more_attribs = more_attribs
73
+ end
74
+
75
+ end # class ReaderBaseWithMoreAttribs
76
+
77
+
78
+
79
+ class ReaderBaseWithOpts
80
+
81
+ include LogUtils::Logging
82
+
83
+ ## make models available by default with namespace
84
+ # e.g. lets you use Usage instead of Model::Usage
85
+ include Models
86
+
87
+ ## value helpers e.g. is_year?, is_taglist? etc.
88
+ include TextUtils::ValueHelper
89
+
90
+
91
+ ## todo: add opts={} etc.
92
+ def self.from_zip( zip_file, entry_path )
93
+ ## get text content from zip
94
+
95
+ entry = zip_file.find_entry( entry_path )
96
+
97
+ ## todo/fix: add force encoding to utf-8 ??
98
+ ## check!!!
99
+ ## clean/prepprocess lines
100
+ ## e.g. CR/LF (/r/n) to LF (e.g. /n)
101
+ text = entry.get_input_stream().read()
102
+
103
+ ## NOTE: needs logger ref; only available in instance methods; use global logger for now
104
+ logger = LogUtils::Logger.root
105
+ logger.debug "text.encoding.name (before): #{text.encoding.name}"
106
+ #####
107
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
108
+ ## NB:
109
+ # for now "hardcoded" to utf8 - what else can we do?
110
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
111
+ text = text.force_encoding( Encoding::UTF_8 )
112
+ logger.debug "text.encoding.name (after): #{text.encoding.name}"
113
+
114
+ ## todo:
115
+ # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
116
+ ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
117
+
118
+ self.from_string( text )
119
+ end
120
+
121
+ def self.from_file( path, opts={} )
122
+ ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
123
+ ## - see textutils/utils.rb
124
+ text = File.read_utf8( path )
125
+ self.from_string( text, opts )
126
+ end
127
+
128
+ def self.from_string( text, opts={} )
129
+ puts "[debug] ReaderBase.from_string calling #{self.name}.new" # note: assume self is derived class (object)
130
+ self.new( text, opts )
131
+ end
132
+
133
+
134
+ def skip_tags?() @skip_tags == true; end
135
+ def strict?() @strict == true; end
136
+
137
+ def initialize( text, opts={} )
138
+ @text = text
139
+
140
+ ## option: do NOT generate/add any tags for countries/regions/cities
141
+ @skip_tags = opts[:skip_tags].present? ? true : false
142
+ ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
143
+ @strict = opts[:strict].present? ? true : false
144
+ end
145
+
146
+ end # class ReaderBaseWithOpts
147
+
148
+
149
+ end # module WorldDb
@@ -2,72 +2,9 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class CityReader
5
+ class CityReader < ReaderBaseWithMoreAttribs
6
6
 
7
- include LogUtils::Logging
8
-
9
- ## make models available by default with namespace
10
- # e.g. lets you use Usage instead of Model::Usage
11
- include Models
12
-
13
- ## value helpers e.g. is_year?, is_taglist? etc.
14
- include TextUtils::ValueHelper
15
-
16
-
17
- def self.from_zip( zip_file, entry_path, more_attribs={} )
18
- ## get text content from zip
19
-
20
- entry = zip_file.find_entry( entry_path )
21
-
22
- ## todo/fix: add force encoding to utf-8 ??
23
- ## check!!!
24
- ## clean/prepprocess lines
25
- ## e.g. CR/LF (/r/n) to LF (e.g. /n)
26
- text = entry.get_input_stream().read()
27
-
28
- ## NOTE: needs logger ref; only available in instance methods; use global logger for now
29
- logger = LogUtils::Logger.root
30
- logger.debug "text.encoding.name (before): #{text.encoding.name}"
31
- #####
32
- # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
33
- ## NB:
34
- # for now "hardcoded" to utf8 - what else can we do?
35
- # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
36
- text = text.force_encoding( Encoding::UTF_8 )
37
- logger.debug "text.encoding.name (after): #{text.encoding.name}"
38
-
39
- ## todo:
40
- # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
41
- ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
42
-
43
- self.from_string( text, more_attribs )
44
- end
45
-
46
-
47
- def self.from_file( path, more_attribs={} )
48
- ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
49
- ## - see textutils/utils.rb
50
- text = File.read_utf8( path )
51
- self.from_string( text, more_attribs )
52
- end
53
-
54
- def self.from_string( text, more_attribs={} )
55
- CityReader.new( text, more_attribs )
56
- end
57
-
58
-
59
- def skip_tags?() @skip_tags == true; end
60
- def strict?() @strict == true; end
61
-
62
- def initialize( text, more_attribs={} )
63
- ## todo/fix: how to add opts={} ???
64
-
65
- @text = text
66
- @more_attribs = more_attribs
67
- end
68
-
69
-
70
- def read()
7
+ def read
71
8
  reader = ValuesReader.from_string( @text, @more_attribs )
72
9
 
73
10
  reader.each_line do |attribs, values|
@@ -2,70 +2,9 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class CountryReader
5
+ class CountryReader < ReaderBaseWithMoreAttribs
6
6
 
7
- include LogUtils::Logging
8
-
9
- ## make models available by default with namespace
10
- # e.g. lets you use Usage instead of Model::Usage
11
- include Models
12
-
13
- ## value helpers e.g. is_year?, is_taglist? etc.
14
- include TextUtils::ValueHelper
15
-
16
-
17
- def self.from_zip( zip_file, entry_path, more_attribs={} )
18
- ## get text content from zip
19
-
20
- entry = zip_file.find_entry( entry_path )
21
-
22
- ## todo/fix: add force encoding to utf-8 ??
23
- ## check!!!
24
- ## clean/prepprocess lines
25
- ## e.g. CR/LF (/r/n) to LF (e.g. /n)
26
- text = entry.get_input_stream().read()
27
-
28
- ## NOTE: needs logger ref; only available in instance methods; use global logger for now
29
- logger = LogUtils::Logger.root
30
- logger.debug "text.encoding.name (before): #{text.encoding.name}"
31
- #####
32
- # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
33
- ## NB:
34
- # for now "hardcoded" to utf8 - what else can we do?
35
- # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
36
- text = text.force_encoding( Encoding::UTF_8 )
37
- logger.debug "text.encoding.name (after): #{text.encoding.name}"
38
-
39
- ## todo:
40
- # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
41
- ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
42
-
43
- self.from_string( text, more_attribs )
44
- end
45
-
46
- def self.from_file( path, more_attribs={} )
47
- ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
48
- ## - see textutils/utils.rb
49
- text = File.read_utf8( path )
50
- self.from_string( text, more_attribs )
51
- end
52
-
53
- def self.from_string( text, more_attribs={} )
54
- CountryReader.new( text, more_attribs )
55
- end
56
-
57
-
58
- def skip_tags?() @skip_tags == true; end
59
- def strict?() @strict == true; end
60
-
61
- def initialize( text, more_attribs={} )
62
- ## todo/fix: how to add opts={} ???
63
-
64
- @text = text
65
- @more_attribs = more_attribs
66
- end
67
-
68
- def read()
7
+ def read
69
8
  reader = ValuesReader.from_string( @text, @more_attribs )
70
9
 
71
10
  reader.each_line do |attribs, values|
@@ -2,73 +2,9 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class LangReader
5
+ class LangReader < ReaderBaseWithOpts
6
6
 
7
- include LogUtils::Logging
8
-
9
- ## make models available by default with namespace
10
- # e.g. lets you use Usage instead of Model::Usage
11
- include Models
12
-
13
- ## value helpers e.g. is_year?, is_taglist? etc.
14
- include TextUtils::ValueHelper
15
-
16
-
17
- ## todo: add opts={} etc.
18
- def self.from_zip( zip_file, entry_path )
19
- ## get text content from zip
20
-
21
- entry = zip_file.find_entry( entry_path )
22
-
23
- ## todo/fix: add force encoding to utf-8 ??
24
- ## check!!!
25
- ## clean/prepprocess lines
26
- ## e.g. CR/LF (/r/n) to LF (e.g. /n)
27
- text = entry.get_input_stream().read()
28
-
29
- ## NOTE: needs logger ref; only available in instance methods; use global logger for now
30
- logger = LogUtils::Logger.root
31
- logger.debug "text.encoding.name (before): #{text.encoding.name}"
32
- #####
33
- # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
34
- ## NB:
35
- # for now "hardcoded" to utf8 - what else can we do?
36
- # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
37
- text = text.force_encoding( Encoding::UTF_8 )
38
- logger.debug "text.encoding.name (after): #{text.encoding.name}"
39
-
40
- ## todo:
41
- # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
42
- ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
43
-
44
- self.from_string( text )
45
- end
46
-
47
- def self.from_file( path, opts={} )
48
- ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
49
- ## - see textutils/utils.rb
50
- text = File.read_utf8( path )
51
- self.from_string( text, opts )
52
- end
53
-
54
- def self.from_string( text, opts={} )
55
- LangReader.new( text, opts )
56
- end
57
-
58
-
59
- def skip_tags?() @skip_tags == true; end
60
- def strict?() @strict == true; end
61
-
62
- def initialize( text, opts={} )
63
- @text = text
64
-
65
- ## option: do NOT generate/add any tags for countries/regions/cities
66
- @skip_tags = opts[:skip_tags].present? ? true : false
67
- ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
68
- @strict = opts[:strict].present? ? true : false
69
- end
70
-
71
- def read()
7
+ def read
72
8
  reader = HashReader.from_string( @text )
73
9
 
74
10
  reader.each do |key, value|
@@ -100,8 +36,7 @@ class LangReader
100
36
  lang.update_attributes!( lang_attribs )
101
37
  end # each key,value
102
38
 
103
- end # method load_langs
104
-
39
+ end # method read
105
40
 
106
41
  end # class LangReader
107
42
  end # module WorldDb
@@ -0,0 +1,61 @@
1
+ # encoding: UTF-8
2
+
3
+ module WorldDb
4
+
5
+ class StateReader < ReaderBaseWithMoreAttribs
6
+
7
+ def read
8
+ reader = ValuesReader.from_string( @text, @more_attribs )
9
+
10
+ reader.each_line do |attribs, values|
11
+ opts = { skip_tags: skip_tags? }
12
+ State.create_or_update_from_attribs( attribs, values, opts )
13
+ end
14
+ end
15
+
16
+ end # class StateReader
17
+
18
+
19
+ class PartReader < ReaderBaseWithMoreAttribs
20
+
21
+ def read
22
+ reader = ValuesReader.from_string( @text, @more_attribs )
23
+
24
+ reader.each_line do |attribs, values|
25
+ opts = { skip_tags: skip_tags? }
26
+ Part.create_or_update_from_attribs( attribs, values, opts )
27
+ end
28
+ end
29
+
30
+ end # class PartReader
31
+
32
+
33
+ class CountyReader < ReaderBaseWithMoreAttribs
34
+
35
+ def read
36
+ reader = ValuesReader.from_string( @text, @more_attribs )
37
+
38
+ reader.each_line do |attribs, values|
39
+ opts = { skip_tags: skip_tags? }
40
+ County.create_or_update_from_attribs( attribs, values, opts )
41
+ end
42
+ end
43
+
44
+ end # class CountyReader
45
+
46
+
47
+ class MuniReader < ReaderBaseWithMoreAttribs
48
+
49
+ def read
50
+ reader = ValuesReader.from_string( @text, @more_attribs )
51
+
52
+ reader.each_line do |attribs, values|
53
+ opts = { skip_tags: skip_tags? }
54
+ Muni.create_or_update_from_attribs( attribs, values, opts )
55
+ end
56
+ end
57
+
58
+ end # class MuniReader
59
+
60
+
61
+ end # module WorldDb
@@ -0,0 +1,118 @@
1
+ # encoding: UTF-8
2
+
3
+ module WorldDb
4
+
5
+ class StateTreeReader < ReaderBaseWithMoreAttribs
6
+
7
+ def read
8
+ ## for now requires country_id !!
9
+ country = Country.find( @more_attribs[ :country_id ] )
10
+ puts "[StateTreeReader] country key: #{country.key}, name: #{country.name}"
11
+
12
+ reader = TreeReader.from_string( @text )
13
+
14
+ stack = [] # note: last_level => stack.size; starts w/ 0
15
+
16
+ if country.key == 'de'
17
+ ## use state (1) > part (2) > county (3) > muni (4) > city (5)
18
+ state_level = 1 # Land
19
+ part_level = 2 # Regierungsbezirk
20
+ county_level = 3 # Landkreis, Kreisfreie Stadt
21
+ muni_level = 4 # Gemeinde
22
+ city_level = 5 # Stadt, Ort, etc.
23
+ else
24
+ ## use state (1) > county (2) > muni (3) > city (4)
25
+ state_level = 1
26
+ part_level = -1 ## note: not in use (-1)
27
+ county_level = 2
28
+ muni_level = 3
29
+ city_level = 4
30
+ end
31
+
32
+ reader.each_line do |nodes|
33
+ names = nodes.map { |item| "(#{item.level}) #{item.value}" }
34
+ node = nodes.last
35
+
36
+ puts " #{names.join( ' › ' )}:"
37
+ puts " key: >#{node.key}<, level: >#{node.level}<, value: >#{node.value}<"
38
+
39
+ if node.level == state_level # 1
40
+ rec = State.where( "name like '#{node.value}%'" ).first
41
+ elsif node.level == part_level # 2
42
+ state = stack[0]
43
+ rec = Part.where( "name like '#{node.value}%' AND state_id = #{state.id}" ).first
44
+ elsif node.level == county_level # 2 or 3
45
+ state = stack[0]
46
+ rec = County.where( "name like '#{node.value}%' AND state_id = #{state.id}" ).first
47
+ elsif node.level == muni_level # 3 or 4
48
+ state = stack[0]
49
+ rec = Muni.where( "name like '#{node.value}%' AND state_id = #{state.id}" ).first
50
+ elsif node.level == city_level # 4 or 5
51
+ ## note: city requires country scope for lookup
52
+ ## todo/fix: how to deal with cities with the same name
53
+ ## in the same country (and same state and same county etc.) ??? - add some examples here
54
+ rec = City.where( "name like '#{node.value}%' AND country_id = #{country.id}" ).first
55
+ else
56
+ puts "*** (fatal) error: unknown level for tree node: #{node.inspect}"
57
+ ## todo/fix: exit here
58
+ end
59
+
60
+
61
+ if rec.present?
62
+ puts "ok - record match found: #{rec.inspect}"
63
+ else
64
+ ## note: for now only auto-adds munis n cities
65
+ if node.level == muni_level # 3 or 4
66
+ ## add muni
67
+ key = TextUtils.title_to_key( node.value )
68
+ name = node.value
69
+ level = node.level
70
+ state = stack[0]
71
+ county = stack[county_level-1] # note: stack is zero-based (thus, -1)
72
+ puts "*** adding muni record:"
73
+ rec = Muni.create!( key: key,
74
+ name: name,
75
+ level: level,
76
+ state_id: state.id,
77
+ county_id: county.id )
78
+ elsif node.level == city_level # 4 or 5
79
+ ## add city
80
+ key = TextUtils.title_to_key( node.value )
81
+ name = node.value
82
+ state = stack[0]
83
+ county = stack[county_level-1] # note: stack is zero-based (thus, -1)
84
+ muni = stack[muni_level-1] # note: stack is zero-based (thus, -1)
85
+ puts "*** adding city record:"
86
+ rec = City.create!( key: key,
87
+ name: name,
88
+ state_id: state.id,
89
+ ## add county_id too ???
90
+ muni_id: muni.id,
91
+ country_id: country.id )
92
+ else
93
+ puts "*** (fatal) error: record not found for tree node: #{node.inspect}"
94
+ ## todo/fix: exit here
95
+ end
96
+ end
97
+
98
+ level_diff = node.level - stack.size
99
+
100
+ if level_diff > 0
101
+ logger.debug "[StateTreeReader] up +#{level_diff}"
102
+ ## FIX!!! todo/check/verify/assert: always must be +1
103
+ elsif level_diff < 0
104
+ logger.debug "[StateTreeReader] down #{level_diff}"
105
+ level_diff.abs.times { stack.pop }
106
+ stack.pop
107
+ else
108
+ ## same level
109
+ stack.pop
110
+ end
111
+ stack.push( rec ) ## hierarchy of records (mirrors hierarchy of read-in text)
112
+
113
+ end # each_line
114
+ end # method read
115
+
116
+ end # class StateTreeReader
117
+ end # module WorldDb
118
+
@@ -2,72 +2,9 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class UsageReader
5
+ class UsageReader < ReaderBaseWithOpts
6
6
 
7
- include LogUtils::Logging
8
-
9
- ## make models available by default with namespace
10
- # e.g. lets you use Usage instead of Model::Usage
11
- include Models
12
-
13
- ## value helpers e.g. is_year?, is_taglist? etc.
14
- include TextUtils::ValueHelper
15
-
16
- ## todo: add opts
17
- def self.from_zip( zip_file, entry_path )
18
- ## get text content from zip
19
-
20
- entry = zip_file.find_entry( entry_path )
21
-
22
- ## todo/fix: add force encoding to utf-8 ??
23
- ## check!!!
24
- ## clean/prepprocess lines
25
- ## e.g. CR/LF (/r/n) to LF (e.g. /n)
26
- text = entry.get_input_stream().read()
27
-
28
- ## NOTE: needs logger ref; only available in instance methods; use global logger for now
29
- logger = LogUtils::Logger.root
30
- logger.debug "text.encoding.name (before): #{text.encoding.name}"
31
- #####
32
- # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
33
- ## NB:
34
- # for now "hardcoded" to utf8 - what else can we do?
35
- # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
36
- text = text.force_encoding( Encoding::UTF_8 )
37
- logger.debug "text.encoding.name (after): #{text.encoding.name}"
38
-
39
- ## todo:
40
- # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
41
- ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
42
-
43
- self.from_string( text )
44
- end
45
-
46
- def self.from_file( path, opts={} )
47
- ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
48
- ## - see textutils/utils.rb
49
- text = File.read_utf8( path )
50
- self.from_string( text, opts )
51
- end
52
-
53
- def self.from_string( text, opts={} )
54
- UsageReader.new( text, opts )
55
- end
56
-
57
-
58
- def skip_tags?() @skip_tags == true; end
59
- def strict?() @strict == true; end
60
-
61
- def initialize( text, opts={} )
62
- @text = text
63
-
64
- ## option: do NOT generate/add any tags for countries/regions/cities
65
- @skip_tags = opts[:skip_tags].present? ? true : false
66
- ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
67
- @strict = opts[:strict].present? ? true : false
68
- end
69
-
70
- def read()
7
+ def read
71
8
  reader = HashReader.from_string( @text )
72
9
 
73
10
  reader.each do |key, value|