worlddb 0.8.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. data/Manifest.txt +4 -98
  2. data/Rakefile +1 -6
  3. data/lib/worlddb/cli/main.rb +170 -0
  4. data/lib/worlddb/cli/opts.rb +24 -50
  5. data/lib/worlddb/data/fixtures.rb +168 -0
  6. data/lib/worlddb/deleter.rb +26 -0
  7. data/lib/worlddb/reader.rb +8 -61
  8. data/lib/worlddb/stats.rb +30 -0
  9. data/lib/worlddb/utils.rb +0 -60
  10. data/lib/worlddb/version.rb +1 -2
  11. data/lib/worlddb.rb +24 -184
  12. metadata +30 -108
  13. data/data/africa/1_codes/fifa.yml +0 -59
  14. data/data/africa/1_codes/internet.yml +0 -64
  15. data/data/africa/1_codes/iso3.yml +0 -57
  16. data/data/africa/2_names/de.yml +0 -10
  17. data/data/africa/3_more/en.wikipedia.yml +0 -30
  18. data/data/africa/3_more/lang.yml +0 -63
  19. data/data/africa/countries.txt +0 -92
  20. data/data/america/1_codes/fifa.yml +0 -41
  21. data/data/america/1_codes/internet.yml +0 -39
  22. data/data/america/1_codes/iso3.yml +0 -39
  23. data/data/america/1_codes/motor.yml +0 -26
  24. data/data/america/2_names/de.yml +0 -31
  25. data/data/america/2_names/es.yml +0 -30
  26. data/data/america/3_more/en.wikipedia.yml +0 -9
  27. data/data/america/3_more/lang.yml +0 -60
  28. data/data/america/br/regions.txt +0 -27
  29. data/data/america/ca/cities.txt +0 -48
  30. data/data/america/ca/regions.txt +0 -28
  31. data/data/america/countries.txt +0 -70
  32. data/data/america/mx/cities.txt +0 -32
  33. data/data/america/mx/regions.txt +0 -54
  34. data/data/america/us/cities.txt +0 -46
  35. data/data/america/us/regions.txt +0 -64
  36. data/data/america/ve/cities.txt +0 -358
  37. data/data/america/ve/regions.txt +0 -46
  38. data/data/asia/1_codes/fifa.yml +0 -53
  39. data/data/asia/1_codes/internet.yml +0 -51
  40. data/data/asia/1_codes/iso3.yml +0 -51
  41. data/data/asia/2_names/de.yml +0 -9
  42. data/data/asia/3_more/en.wikipedia.yml +0 -26
  43. data/data/asia/3_more/lang.yml +0 -55
  44. data/data/asia/countries.txt +0 -126
  45. data/data/asia/jp/cities.txt +0 -3
  46. data/data/europe/1_codes/fifa.yml +0 -60
  47. data/data/europe/1_codes/internet.yml +0 -59
  48. data/data/europe/1_codes/iso3.yml +0 -59
  49. data/data/europe/1_codes/motor.yml +0 -60
  50. data/data/europe/2_names/de.yml +0 -42
  51. data/data/europe/2_names/es.yml +0 -43
  52. data/data/europe/3_more/en.wikipedia.yml +0 -12
  53. data/data/europe/3_more/lang.yml +0 -47
  54. data/data/europe/at/cities.txt +0 -45
  55. data/data/europe/at/regions.txt +0 -26
  56. data/data/europe/be/cities.txt +0 -19
  57. data/data/europe/be/regions.txt +0 -23
  58. data/data/europe/bg/cities.txt +0 -4
  59. data/data/europe/by/cities.txt +0 -4
  60. data/data/europe/ch/cities.txt +0 -4
  61. data/data/europe/countries.txt +0 -123
  62. data/data/europe/cy/cities.txt +0 -1
  63. data/data/europe/cz/cities.txt +0 -26
  64. data/data/europe/cz/regions.txt +0 -32
  65. data/data/europe/de/cities.txt +0 -44
  66. data/data/europe/de/regions.txt +0 -16
  67. data/data/europe/dk/cities.txt +0 -6
  68. data/data/europe/ee/cities.txt +0 -2
  69. data/data/europe/en/cities.txt +0 -32
  70. data/data/europe/en/regions.txt +0 -21
  71. data/data/europe/es/cities.txt +0 -17
  72. data/data/europe/es/regions.txt +0 -26
  73. data/data/europe/fi/cities.txt +0 -2
  74. data/data/europe/fr/cities.txt +0 -15
  75. data/data/europe/fr/regions.txt +0 -32
  76. data/data/europe/gr/cities.txt +0 -6
  77. data/data/europe/hr/cities.txt +0 -1
  78. data/data/europe/hu/cities.txt +0 -2
  79. data/data/europe/ie/cities.txt +0 -3
  80. data/data/europe/it/cities.txt +0 -17
  81. data/data/europe/lt/cities.txt +0 -3
  82. data/data/europe/lv/cities.txt +0 -4
  83. data/data/europe/nl/cities.txt +0 -11
  84. data/data/europe/no/cities.txt +0 -3
  85. data/data/europe/pl/cities.txt +0 -12
  86. data/data/europe/pt/cities.txt +0 -6
  87. data/data/europe/ro/cities.txt +0 -4
  88. data/data/europe/rs/cities.txt +0 -3
  89. data/data/europe/ru/cities.txt +0 -14
  90. data/data/europe/sc/cities.txt +0 -3
  91. data/data/europe/se/cities.txt +0 -3
  92. data/data/europe/tr/cities.txt +0 -3
  93. data/data/europe/ua/cities.txt +0 -9
  94. data/data/europe/wa/cities.txt +0 -3
  95. data/data/langs.yml +0 -210
  96. data/data/oceania/1_codes/fifa.yml +0 -21
  97. data/data/oceania/1_codes/internet.yml +0 -17
  98. data/data/oceania/1_codes/iso3.yml +0 -17
  99. data/data/oceania/2_names/de.yml +0 -7
  100. data/data/oceania/3_more/en.wikipedia.yml +0 -11
  101. data/data/oceania/3_more/lang.yml +0 -17
  102. data/data/oceania/au/cities.txt +0 -2
  103. data/data/oceania/countries.txt +0 -40
  104. data/data/tags.1.yml +0 -40
  105. data/data/tags.3.yml +0 -23
  106. data/lib/worlddb/cli/runner.rb +0 -113
  107. data/lib/worlddb/readers/code_reader.rb +0 -34
  108. data/lib/worlddb/readers/hash_reader.rb +0 -81
  109. data/lib/worlddb/readers/line_reader.rb +0 -45
  110. data/lib/worlddb/readers/values_reader.rb +0 -171
@@ -1,171 +0,0 @@
1
- # encoding: utf-8
2
-
3
- class ValuesReader
4
-
5
- def initialize( logger, path, more_values={} )
6
- ## todo: check - can we make logger=nil a default arg too?
7
- if logger.nil?
8
- @logger = Logger.new(STDOUT)
9
- @logger.level = Logger::INFO
10
- else
11
- @logger = logger
12
- end
13
-
14
- @path = path
15
-
16
- @more_values = more_values
17
-
18
- @data = File.read_utf8( @path )
19
- end
20
-
21
- attr_reader :logger
22
-
23
- def each_line
24
-
25
- @data.each_line do |line|
26
-
27
- if line =~ /^\s*#/
28
- # skip komments and do NOT copy to result (keep comments secret!)
29
- logger.debug 'skipping comment line'
30
- next
31
- end
32
-
33
- if line =~ /^\s*$/
34
- # kommentar oder leerzeile überspringen
35
- logger.debug 'skipping blank line'
36
- next
37
- end
38
-
39
-
40
- # pass 1) remove possible trailing eol comment
41
- ## e.g -> nyc, New York # Sample EOL Comment Here (with or without commas,,,,)
42
- ## becomes -> nyc, New York
43
-
44
- line = line.sub( /\s+#.+$/, '' )
45
-
46
- # pass 2) remove leading and trailing whitespace
47
-
48
- line = line.strip
49
-
50
- puts "line: >>#{line}<<"
51
-
52
- values = line.split(',')
53
-
54
- # pass 1) remove leading and trailing whitespace for values
55
-
56
- values = values.map { |value| value.strip }
57
-
58
- ##### todo remove support of comment column? (NB: must NOT include commas)
59
- # pass 2) remove comment columns
60
-
61
- values = values.select do |value|
62
- if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
63
- puts " removing column with value >>#{value}<<"
64
- false
65
- else
66
- true
67
- end
68
- end
69
-
70
- puts " values: >>#{values.join('<< >>')}<<"
71
-
72
-
73
- ### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
74
- ## either use keys or do NOT use keys; do NOT mix in a single fixture file
75
-
76
-
77
- ### support autogenerate key from first title value
78
- if values[0] =~ /^[a-z]{2,}$/ # if it looks like a key (only a-z lower case allowed); assume it's a key
79
- key_col = values[0]
80
- title_col = values[1]
81
- more_cols = values[2..-1]
82
- else
83
- key_col = '<auto>'
84
- title_col = values[0]
85
- more_cols = values[1..-1]
86
- end
87
-
88
- attribs = {}
89
-
90
- ## title (split of optional synonyms)
91
- # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
92
- titles = title_col.split('|')
93
-
94
- attribs[ :title ] = titles[0]
95
-
96
- ## add optional synonyms if present
97
- attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
98
-
99
- if key_col == '<auto>'
100
- ## autogenerate key from first title
101
- key_col = title_to_key( titles[0] )
102
- puts " autogen key >#{key_col}< from title >#{titles[0]}<"
103
- end
104
-
105
- attribs[ :key ] = key_col
106
-
107
- attribs = attribs.merge( @more_values ) # e.g. merge country_id and other defaults if present
108
-
109
- yield( attribs, more_cols )
110
-
111
- end # each lines
112
-
113
- end # method each_line
114
-
115
-
116
-
117
- def title_to_key( title )
118
-
119
- ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
120
- key = title.downcase
121
-
122
- ### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
123
- key = key.gsub( /\[.+\]/, '' )
124
-
125
- ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
126
- key = key.gsub( /\(.+\)/, '' )
127
-
128
- ## remove all whitespace and punctuation
129
- key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
130
-
131
- ## turn accented char into ascii look alike if possible
132
- ##
133
- ## todo: add some more
134
- ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
135
-
136
- alternatives = [
137
- ['ß', 'ss'],
138
- ['æ', 'ae'],
139
- ['ä', 'ae'],
140
- ['á', 'a' ], # e.g. Bogotá, Králové
141
- ['ã', 'a' ], # e.g São Paulo
142
- ['ă', 'a' ], # e.g. Chișinău
143
- ['é', 'e' ], # e.g. Vélez, Králové
144
- ['è', 'e' ], # e.g. Rivières
145
- ['ê', 'e' ], # e.g. Grêmio
146
- ['ě', 'e' ], # e.g. Budějovice
147
- ['ì', 'i' ], # e.g. Potosì
148
- ['í', 'i' ], # e.g. Ústí
149
- ['ñ', 'n' ], # e.g. Porteño
150
- ['ň', 'n' ], # e.g. Plzeň, Třeboň
151
- ['ö', 'oe'],
152
- ['ó', 'o' ], # e.g. Colón, Łódź, Kraków
153
- ['ř', 'r' ], # e.g. Třeboň
154
- ['ș', 's' ], # e.g. Chișinău
155
- ['ü', 'ue'],
156
- ['ú', 'u' ], # e.g. Fútbol
157
- ['ź', 'z' ], # e.g. Łódź
158
- ['Č', 'c' ], # e.g. České
159
- ['Ł', 'l' ], # e.g. Łódź
160
- ['Ú', 'u' ], # e.g. Ústí
161
- ]
162
-
163
- alternatives.each do |alt|
164
- key = key.gsub( alt[0], alt[1] )
165
- end
166
-
167
- key
168
- end # method title_to_key
169
-
170
-
171
- end # class ValuesReader