worlddb-models 2.2.2 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +31 -13
  3. data/README.md +7 -7
  4. data/Rakefile +1 -1
  5. data/lib/worlddb/deleter.rb +6 -1
  6. data/lib/worlddb/helpers/value_helper.rb +117 -0
  7. data/lib/worlddb/matcher.rb +99 -135
  8. data/lib/worlddb/matcher_adm.rb +82 -0
  9. data/lib/worlddb/models/city.rb +30 -208
  10. data/lib/worlddb/models/city_base.rb +220 -0
  11. data/lib/worlddb/models/continent.rb +9 -0
  12. data/lib/worlddb/models/country.rb +21 -4
  13. data/lib/worlddb/models/forward.rb +25 -9
  14. data/lib/worlddb/models/lang.rb +6 -0
  15. data/lib/worlddb/models/place.rb +1 -1
  16. data/lib/worlddb/models/state.rb +83 -0
  17. data/lib/worlddb/models/{region.rb → state_base.rb} +52 -36
  18. data/lib/worlddb/models/tagdb/tag.rb +1 -1
  19. data/lib/worlddb/models.rb +11 -8
  20. data/lib/worlddb/patterns.rb +4 -4
  21. data/lib/worlddb/reader.rb +68 -39
  22. data/lib/worlddb/reader_file.rb +36 -3
  23. data/lib/worlddb/reader_zip.rb +33 -3
  24. data/lib/worlddb/readers/base.rb +149 -0
  25. data/lib/worlddb/readers/city.rb +2 -65
  26. data/lib/worlddb/readers/country.rb +2 -63
  27. data/lib/worlddb/readers/lang.rb +3 -68
  28. data/lib/worlddb/readers/state.rb +61 -0
  29. data/lib/worlddb/readers/state_tree.rb +118 -0
  30. data/lib/worlddb/readers/usage.rb +2 -65
  31. data/lib/worlddb/schema.rb +142 -43
  32. data/lib/worlddb/stats.rb +7 -4
  33. data/lib/worlddb/tree_reader.rb +97 -0
  34. data/lib/worlddb/version.rb +2 -2
  35. data/test/adm/test_fixture_matcher_adm2.rb +73 -0
  36. data/test/{test_fixture_matcher_adm3.rb → adm/test_fixture_matcher_adm3.rb} +6 -6
  37. data/test/adm/test_fixture_matcher_tree.rb +52 -0
  38. data/test/{test_read_adm.rb → adm/test_read_adm.rb} +13 -20
  39. data/test/adm/test_read_tree.rb +63 -0
  40. data/test/data/at-austria/2--n-niederoesterreich/counties.txt +6 -4
  41. data/test/data/at-austria/orte.txt +23 -0
  42. data/test/data/at-austria/setups/tree.txt +9 -0
  43. data/test/data/de-deutschland/3--by-bayern/4--oberfranken/counties.txt +14 -13
  44. data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte.txt +104 -0
  45. data/test/data/de-deutschland/3--by-bayern/4--oberfranken/orte_ii.txt +17 -0
  46. data/test/data/de-deutschland/3--by-bayern/{districts.txt → parts.txt} +1 -1
  47. data/test/data/de-deutschland/orte.txt +12 -0
  48. data/test/data/de-deutschland/setups/adm.txt +1 -1
  49. data/test/data/de-deutschland/setups/tree.txt +9 -0
  50. data/test/helper.rb +8 -1
  51. data/test/test_fixture_matchers.rb +9 -10
  52. data/test/test_fixture_matchers_ii.rb +20 -19
  53. data/test/test_model_city.rb +26 -9
  54. data/test/{test_model_comp.rb → test_model_compat.rb} +15 -13
  55. data/test/test_model_country.rb +1 -1
  56. data/test/test_model_state.rb +54 -0
  57. data/test/test_model_states_at.rb +111 -0
  58. data/test/test_model_states_de.rb +147 -0
  59. data/test/test_models.rb +10 -3
  60. data/test/test_parse_city.rb +70 -0
  61. data/test/test_parse_country.rb +56 -0
  62. data/test/test_parse_state.rb +46 -0
  63. data/test/test_state_tree_reader_at.rb +54 -0
  64. data/test/test_state_tree_reader_de.rb +71 -0
  65. data/test/test_tree_reader.rb +39 -0
  66. metadata +50 -22
  67. data/lib/worlddb/models/city_compat.rb +0 -27
  68. data/lib/worlddb/models/continent_compat.rb +0 -24
  69. data/lib/worlddb/models/country_compat.rb +0 -35
  70. data/lib/worlddb/models/lang_compat.rb +0 -23
  71. data/lib/worlddb/models/region_compat.rb +0 -26
  72. data/lib/worlddb/readers/region.rb +0 -79
  73. data/test/test_fixture_matcher_adm2.rb +0 -62
  74. data/test/test_model_region.rb +0 -50
@@ -0,0 +1,149 @@
1
+ # encoding: UTF-8
2
+
3
+ module WorldDb
4
+
5
+
6
+ ###
7
+ # todo/fix:
8
+ # try to merge ReaderBaseWithMoreAttribs and ReaderBaseWithOpts into one base - why? why not?
9
+
10
+
11
+ class ReaderBaseWithMoreAttribs
12
+
13
+ include LogUtils::Logging
14
+
15
+ ## make models available by default with namespace
16
+ # e.g. lets you use Usage instead of Model::Usage
17
+ include Models
18
+
19
+ ## value helpers e.g. is_year?, is_taglist? etc.
20
+ include TextUtils::ValueHelper
21
+
22
+
23
+ def self.from_zip( zip_file, entry_path, more_attribs={} )
24
+ ## get text content from zip
25
+
26
+ entry = zip_file.find_entry( entry_path )
27
+
28
+ ## todo/fix: add force encoding to utf-8 ??
29
+ ## check!!!
30
+ ## clean/prepprocess lines
31
+ ## e.g. CR/LF (/r/n) to LF (e.g. /n)
32
+ text = entry.get_input_stream().read()
33
+
34
+ ## NOTE: needs logger ref; only available in instance methods; use global logger for now
35
+ logger = LogUtils::Logger.root
36
+ logger.debug "text.encoding.name (before): #{text.encoding.name}"
37
+ #####
38
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
39
+ ## NB:
40
+ # for now "hardcoded" to utf8 - what else can we do?
41
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
42
+ text = text.force_encoding( Encoding::UTF_8 )
43
+ logger.debug "text.encoding.name (after): #{text.encoding.name}"
44
+
45
+ ## todo:
46
+ # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
47
+ ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
48
+
49
+ self.from_string( text, more_attribs )
50
+ end
51
+
52
+ def self.from_file( path, more_attribs={} )
53
+ ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
54
+ ## - see textutils/utils.rb
55
+ text = File.read_utf8( path )
56
+ self.from_string( text, more_attribs )
57
+ end
58
+
59
+ def self.from_string( text, more_attribs={} )
60
+ puts "[debug] ReaderBase.from_string calling #{self.name}.new" # note: assume self is derived class (object)
61
+ self.new( text, more_attribs )
62
+ end
63
+
64
+
65
+ def skip_tags?() @skip_tags == true; end
66
+ def strict?() @strict == true; end
67
+
68
+ def initialize( text, more_attribs={} )
69
+ ## todo/fix: how to add opts={} ???
70
+
71
+ @text = text
72
+ @more_attribs = more_attribs
73
+ end
74
+
75
+ end # class ReaderBaseWithMoreAttribs
76
+
77
+
78
+
79
+ class ReaderBaseWithOpts
80
+
81
+ include LogUtils::Logging
82
+
83
+ ## make models available by default with namespace
84
+ # e.g. lets you use Usage instead of Model::Usage
85
+ include Models
86
+
87
+ ## value helpers e.g. is_year?, is_taglist? etc.
88
+ include TextUtils::ValueHelper
89
+
90
+
91
+ ## todo: add opts={} etc.
92
+ def self.from_zip( zip_file, entry_path )
93
+ ## get text content from zip
94
+
95
+ entry = zip_file.find_entry( entry_path )
96
+
97
+ ## todo/fix: add force encoding to utf-8 ??
98
+ ## check!!!
99
+ ## clean/prepprocess lines
100
+ ## e.g. CR/LF (/r/n) to LF (e.g. /n)
101
+ text = entry.get_input_stream().read()
102
+
103
+ ## NOTE: needs logger ref; only available in instance methods; use global logger for now
104
+ logger = LogUtils::Logger.root
105
+ logger.debug "text.encoding.name (before): #{text.encoding.name}"
106
+ #####
107
+ # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
108
+ ## NB:
109
+ # for now "hardcoded" to utf8 - what else can we do?
110
+ # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
111
+ text = text.force_encoding( Encoding::UTF_8 )
112
+ logger.debug "text.encoding.name (after): #{text.encoding.name}"
113
+
114
+ ## todo:
115
+ # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
116
+ ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
117
+
118
+ self.from_string( text )
119
+ end
120
+
121
+ def self.from_file( path, opts={} )
122
+ ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
123
+ ## - see textutils/utils.rb
124
+ text = File.read_utf8( path )
125
+ self.from_string( text, opts )
126
+ end
127
+
128
+ def self.from_string( text, opts={} )
129
+ puts "[debug] ReaderBase.from_string calling #{self.name}.new" # note: assume self is derived class (object)
130
+ self.new( text, opts )
131
+ end
132
+
133
+
134
+ def skip_tags?() @skip_tags == true; end
135
+ def strict?() @strict == true; end
136
+
137
+ def initialize( text, opts={} )
138
+ @text = text
139
+
140
+ ## option: do NOT generate/add any tags for countries/regions/cities
141
+ @skip_tags = opts[:skip_tags].present? ? true : false
142
+ ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
143
+ @strict = opts[:strict].present? ? true : false
144
+ end
145
+
146
+ end # class ReaderBaseWithOpts
147
+
148
+
149
+ end # module WorldDb
@@ -2,72 +2,9 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class CityReader
5
+ class CityReader < ReaderBaseWithMoreAttribs
6
6
 
7
- include LogUtils::Logging
8
-
9
- ## make models available by default with namespace
10
- # e.g. lets you use Usage instead of Model::Usage
11
- include Models
12
-
13
- ## value helpers e.g. is_year?, is_taglist? etc.
14
- include TextUtils::ValueHelper
15
-
16
-
17
- def self.from_zip( zip_file, entry_path, more_attribs={} )
18
- ## get text content from zip
19
-
20
- entry = zip_file.find_entry( entry_path )
21
-
22
- ## todo/fix: add force encoding to utf-8 ??
23
- ## check!!!
24
- ## clean/prepprocess lines
25
- ## e.g. CR/LF (/r/n) to LF (e.g. /n)
26
- text = entry.get_input_stream().read()
27
-
28
- ## NOTE: needs logger ref; only available in instance methods; use global logger for now
29
- logger = LogUtils::Logger.root
30
- logger.debug "text.encoding.name (before): #{text.encoding.name}"
31
- #####
32
- # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
33
- ## NB:
34
- # for now "hardcoded" to utf8 - what else can we do?
35
- # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
36
- text = text.force_encoding( Encoding::UTF_8 )
37
- logger.debug "text.encoding.name (after): #{text.encoding.name}"
38
-
39
- ## todo:
40
- # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
41
- ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
42
-
43
- self.from_string( text, more_attribs )
44
- end
45
-
46
-
47
- def self.from_file( path, more_attribs={} )
48
- ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
49
- ## - see textutils/utils.rb
50
- text = File.read_utf8( path )
51
- self.from_string( text, more_attribs )
52
- end
53
-
54
- def self.from_string( text, more_attribs={} )
55
- CityReader.new( text, more_attribs )
56
- end
57
-
58
-
59
- def skip_tags?() @skip_tags == true; end
60
- def strict?() @strict == true; end
61
-
62
- def initialize( text, more_attribs={} )
63
- ## todo/fix: how to add opts={} ???
64
-
65
- @text = text
66
- @more_attribs = more_attribs
67
- end
68
-
69
-
70
- def read()
7
+ def read
71
8
  reader = ValuesReader.from_string( @text, @more_attribs )
72
9
 
73
10
  reader.each_line do |attribs, values|
@@ -2,70 +2,9 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class CountryReader
5
+ class CountryReader < ReaderBaseWithMoreAttribs
6
6
 
7
- include LogUtils::Logging
8
-
9
- ## make models available by default with namespace
10
- # e.g. lets you use Usage instead of Model::Usage
11
- include Models
12
-
13
- ## value helpers e.g. is_year?, is_taglist? etc.
14
- include TextUtils::ValueHelper
15
-
16
-
17
- def self.from_zip( zip_file, entry_path, more_attribs={} )
18
- ## get text content from zip
19
-
20
- entry = zip_file.find_entry( entry_path )
21
-
22
- ## todo/fix: add force encoding to utf-8 ??
23
- ## check!!!
24
- ## clean/prepprocess lines
25
- ## e.g. CR/LF (/r/n) to LF (e.g. /n)
26
- text = entry.get_input_stream().read()
27
-
28
- ## NOTE: needs logger ref; only available in instance methods; use global logger for now
29
- logger = LogUtils::Logger.root
30
- logger.debug "text.encoding.name (before): #{text.encoding.name}"
31
- #####
32
- # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
33
- ## NB:
34
- # for now "hardcoded" to utf8 - what else can we do?
35
- # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
36
- text = text.force_encoding( Encoding::UTF_8 )
37
- logger.debug "text.encoding.name (after): #{text.encoding.name}"
38
-
39
- ## todo:
40
- # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
41
- ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
42
-
43
- self.from_string( text, more_attribs )
44
- end
45
-
46
- def self.from_file( path, more_attribs={} )
47
- ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
48
- ## - see textutils/utils.rb
49
- text = File.read_utf8( path )
50
- self.from_string( text, more_attribs )
51
- end
52
-
53
- def self.from_string( text, more_attribs={} )
54
- CountryReader.new( text, more_attribs )
55
- end
56
-
57
-
58
- def skip_tags?() @skip_tags == true; end
59
- def strict?() @strict == true; end
60
-
61
- def initialize( text, more_attribs={} )
62
- ## todo/fix: how to add opts={} ???
63
-
64
- @text = text
65
- @more_attribs = more_attribs
66
- end
67
-
68
- def read()
7
+ def read
69
8
  reader = ValuesReader.from_string( @text, @more_attribs )
70
9
 
71
10
  reader.each_line do |attribs, values|
@@ -2,73 +2,9 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class LangReader
5
+ class LangReader < ReaderBaseWithOpts
6
6
 
7
- include LogUtils::Logging
8
-
9
- ## make models available by default with namespace
10
- # e.g. lets you use Usage instead of Model::Usage
11
- include Models
12
-
13
- ## value helpers e.g. is_year?, is_taglist? etc.
14
- include TextUtils::ValueHelper
15
-
16
-
17
- ## todo: add opts={} etc.
18
- def self.from_zip( zip_file, entry_path )
19
- ## get text content from zip
20
-
21
- entry = zip_file.find_entry( entry_path )
22
-
23
- ## todo/fix: add force encoding to utf-8 ??
24
- ## check!!!
25
- ## clean/prepprocess lines
26
- ## e.g. CR/LF (/r/n) to LF (e.g. /n)
27
- text = entry.get_input_stream().read()
28
-
29
- ## NOTE: needs logger ref; only available in instance methods; use global logger for now
30
- logger = LogUtils::Logger.root
31
- logger.debug "text.encoding.name (before): #{text.encoding.name}"
32
- #####
33
- # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
34
- ## NB:
35
- # for now "hardcoded" to utf8 - what else can we do?
36
- # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
37
- text = text.force_encoding( Encoding::UTF_8 )
38
- logger.debug "text.encoding.name (after): #{text.encoding.name}"
39
-
40
- ## todo:
41
- # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
42
- ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
43
-
44
- self.from_string( text )
45
- end
46
-
47
- def self.from_file( path, opts={} )
48
- ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
49
- ## - see textutils/utils.rb
50
- text = File.read_utf8( path )
51
- self.from_string( text, opts )
52
- end
53
-
54
- def self.from_string( text, opts={} )
55
- LangReader.new( text, opts )
56
- end
57
-
58
-
59
- def skip_tags?() @skip_tags == true; end
60
- def strict?() @strict == true; end
61
-
62
- def initialize( text, opts={} )
63
- @text = text
64
-
65
- ## option: do NOT generate/add any tags for countries/regions/cities
66
- @skip_tags = opts[:skip_tags].present? ? true : false
67
- ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
68
- @strict = opts[:strict].present? ? true : false
69
- end
70
-
71
- def read()
7
+ def read
72
8
  reader = HashReader.from_string( @text )
73
9
 
74
10
  reader.each do |key, value|
@@ -100,8 +36,7 @@ class LangReader
100
36
  lang.update_attributes!( lang_attribs )
101
37
  end # each key,value
102
38
 
103
- end # method load_langs
104
-
39
+ end # method read
105
40
 
106
41
  end # class LangReader
107
42
  end # module WorldDb
@@ -0,0 +1,61 @@
1
+ # encoding: UTF-8
2
+
3
+ module WorldDb
4
+
5
+ class StateReader < ReaderBaseWithMoreAttribs
6
+
7
+ def read
8
+ reader = ValuesReader.from_string( @text, @more_attribs )
9
+
10
+ reader.each_line do |attribs, values|
11
+ opts = { skip_tags: skip_tags? }
12
+ State.create_or_update_from_attribs( attribs, values, opts )
13
+ end
14
+ end
15
+
16
+ end # class StateReader
17
+
18
+
19
+ class PartReader < ReaderBaseWithMoreAttribs
20
+
21
+ def read
22
+ reader = ValuesReader.from_string( @text, @more_attribs )
23
+
24
+ reader.each_line do |attribs, values|
25
+ opts = { skip_tags: skip_tags? }
26
+ Part.create_or_update_from_attribs( attribs, values, opts )
27
+ end
28
+ end
29
+
30
+ end # class PartReader
31
+
32
+
33
+ class CountyReader < ReaderBaseWithMoreAttribs
34
+
35
+ def read
36
+ reader = ValuesReader.from_string( @text, @more_attribs )
37
+
38
+ reader.each_line do |attribs, values|
39
+ opts = { skip_tags: skip_tags? }
40
+ County.create_or_update_from_attribs( attribs, values, opts )
41
+ end
42
+ end
43
+
44
+ end # class CountyReader
45
+
46
+
47
+ class MuniReader < ReaderBaseWithMoreAttribs
48
+
49
+ def read
50
+ reader = ValuesReader.from_string( @text, @more_attribs )
51
+
52
+ reader.each_line do |attribs, values|
53
+ opts = { skip_tags: skip_tags? }
54
+ Muni.create_or_update_from_attribs( attribs, values, opts )
55
+ end
56
+ end
57
+
58
+ end # class MuniReader
59
+
60
+
61
+ end # module WorldDb
@@ -0,0 +1,118 @@
1
+ # encoding: UTF-8
2
+
3
+ module WorldDb
4
+
5
+ class StateTreeReader < ReaderBaseWithMoreAttribs
6
+
7
+ def read
8
+ ## for now requires country_id !!
9
+ country = Country.find( @more_attribs[ :country_id ] )
10
+ puts "[StateTreeReader] country key: #{country.key}, name: #{country.name}"
11
+
12
+ reader = TreeReader.from_string( @text )
13
+
14
+ stack = [] # note: last_level => stack.size; starts w/ 0
15
+
16
+ if country.key == 'de'
17
+ ## use state (1) > part (2) > county (3) > muni (4) > city (5)
18
+ state_level = 1 # Land
19
+ part_level = 2 # Regierungsbezirk
20
+ county_level = 3 # Landkreis, Kreisfreie Stadt
21
+ muni_level = 4 # Gemeinde
22
+ city_level = 5 # Stadt, Ort, etc.
23
+ else
24
+ ## use state (1) > county (2) > muni (3) > city (4)
25
+ state_level = 1
26
+ part_level = -1 ## note: not in use (-1)
27
+ county_level = 2
28
+ muni_level = 3
29
+ city_level = 4
30
+ end
31
+
32
+ reader.each_line do |nodes|
33
+ names = nodes.map { |item| "(#{item.level}) #{item.value}" }
34
+ node = nodes.last
35
+
36
+ puts " #{names.join( ' › ' )}:"
37
+ puts " key: >#{node.key}<, level: >#{node.level}<, value: >#{node.value}<"
38
+
39
+ if node.level == state_level # 1
40
+ rec = State.where( "name like '#{node.value}%'" ).first
41
+ elsif node.level == part_level # 2
42
+ state = stack[0]
43
+ rec = Part.where( "name like '#{node.value}%' AND state_id = #{state.id}" ).first
44
+ elsif node.level == county_level # 2 or 3
45
+ state = stack[0]
46
+ rec = County.where( "name like '#{node.value}%' AND state_id = #{state.id}" ).first
47
+ elsif node.level == muni_level # 3 or 4
48
+ state = stack[0]
49
+ rec = Muni.where( "name like '#{node.value}%' AND state_id = #{state.id}" ).first
50
+ elsif node.level == city_level # 4 or 5
51
+ ## note: city requires country scope for lookup
52
+ ## todo/fix: how to deal with cities with the same name
53
+ ## in the same country (and same state and same county etc.) ??? - add some examples here
54
+ rec = City.where( "name like '#{node.value}%' AND country_id = #{country.id}" ).first
55
+ else
56
+ puts "*** (fatal) error: unknown level for tree node: #{node.inspect}"
57
+ ## todo/fix: exit here
58
+ end
59
+
60
+
61
+ if rec.present?
62
+ puts "ok - record match found: #{rec.inspect}"
63
+ else
64
+ ## note: for now only auto-adds munis n cities
65
+ if node.level == muni_level # 3 or 4
66
+ ## add muni
67
+ key = TextUtils.title_to_key( node.value )
68
+ name = node.value
69
+ level = node.level
70
+ state = stack[0]
71
+ county = stack[county_level-1] # note: stack is zero-based (thus, -1)
72
+ puts "*** adding muni record:"
73
+ rec = Muni.create!( key: key,
74
+ name: name,
75
+ level: level,
76
+ state_id: state.id,
77
+ county_id: county.id )
78
+ elsif node.level == city_level # 4 or 5
79
+ ## add city
80
+ key = TextUtils.title_to_key( node.value )
81
+ name = node.value
82
+ state = stack[0]
83
+ county = stack[county_level-1] # note: stack is zero-based (thus, -1)
84
+ muni = stack[muni_level-1] # note: stack is zero-based (thus, -1)
85
+ puts "*** adding city record:"
86
+ rec = City.create!( key: key,
87
+ name: name,
88
+ state_id: state.id,
89
+ ## add county_id too ???
90
+ muni_id: muni.id,
91
+ country_id: country.id )
92
+ else
93
+ puts "*** (fatal) error: record not found for tree node: #{node.inspect}"
94
+ ## todo/fix: exit here
95
+ end
96
+ end
97
+
98
+ level_diff = node.level - stack.size
99
+
100
+ if level_diff > 0
101
+ logger.debug "[StateTreeReader] up +#{level_diff}"
102
+ ## FIX!!! todo/check/verify/assert: always must be +1
103
+ elsif level_diff < 0
104
+ logger.debug "[StateTreeReader] down #{level_diff}"
105
+ level_diff.abs.times { stack.pop }
106
+ stack.pop
107
+ else
108
+ ## same level
109
+ stack.pop
110
+ end
111
+ stack.push( rec ) ## hierarchy of records (mirrors hierarchy of read-in text)
112
+
113
+ end # each_line
114
+ end # method read
115
+
116
+ end # class StateTreeReader
117
+ end # module WorldDb
118
+
@@ -2,72 +2,9 @@
2
2
 
3
3
  module WorldDb
4
4
 
5
- class UsageReader
5
+ class UsageReader < ReaderBaseWithOpts
6
6
 
7
- include LogUtils::Logging
8
-
9
- ## make models available by default with namespace
10
- # e.g. lets you use Usage instead of Model::Usage
11
- include Models
12
-
13
- ## value helpers e.g. is_year?, is_taglist? etc.
14
- include TextUtils::ValueHelper
15
-
16
- ## todo: add opts
17
- def self.from_zip( zip_file, entry_path )
18
- ## get text content from zip
19
-
20
- entry = zip_file.find_entry( entry_path )
21
-
22
- ## todo/fix: add force encoding to utf-8 ??
23
- ## check!!!
24
- ## clean/prepprocess lines
25
- ## e.g. CR/LF (/r/n) to LF (e.g. /n)
26
- text = entry.get_input_stream().read()
27
-
28
- ## NOTE: needs logger ref; only available in instance methods; use global logger for now
29
- logger = LogUtils::Logger.root
30
- logger.debug "text.encoding.name (before): #{text.encoding.name}"
31
- #####
32
- # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here
33
- ## NB:
34
- # for now "hardcoded" to utf8 - what else can we do?
35
- # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
36
- text = text.force_encoding( Encoding::UTF_8 )
37
- logger.debug "text.encoding.name (after): #{text.encoding.name}"
38
-
39
- ## todo:
40
- # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
41
- ## text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
42
-
43
- self.from_string( text )
44
- end
45
-
46
- def self.from_file( path, opts={} )
47
- ## note: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
48
- ## - see textutils/utils.rb
49
- text = File.read_utf8( path )
50
- self.from_string( text, opts )
51
- end
52
-
53
- def self.from_string( text, opts={} )
54
- UsageReader.new( text, opts )
55
- end
56
-
57
-
58
- def skip_tags?() @skip_tags == true; end
59
- def strict?() @strict == true; end
60
-
61
- def initialize( text, opts={} )
62
- @text = text
63
-
64
- ## option: do NOT generate/add any tags for countries/regions/cities
65
- @skip_tags = opts[:skip_tags].present? ? true : false
66
- ## option: for now issue warning on update, that is, if key/record (country,region,city) already exists
67
- @strict = opts[:strict].present? ? true : false
68
- end
69
-
70
- def read()
7
+ def read
71
8
  reader = HashReader.from_string( @text )
72
9
 
73
10
  reader.each do |key, value|