worlddb 0.8.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (110) hide show
  1. data/Manifest.txt +4 -98
  2. data/Rakefile +1 -6
  3. data/lib/worlddb/cli/main.rb +170 -0
  4. data/lib/worlddb/cli/opts.rb +24 -50
  5. data/lib/worlddb/data/fixtures.rb +168 -0
  6. data/lib/worlddb/deleter.rb +26 -0
  7. data/lib/worlddb/reader.rb +8 -61
  8. data/lib/worlddb/stats.rb +30 -0
  9. data/lib/worlddb/utils.rb +0 -60
  10. data/lib/worlddb/version.rb +1 -2
  11. data/lib/worlddb.rb +24 -184
  12. metadata +30 -108
  13. data/data/africa/1_codes/fifa.yml +0 -59
  14. data/data/africa/1_codes/internet.yml +0 -64
  15. data/data/africa/1_codes/iso3.yml +0 -57
  16. data/data/africa/2_names/de.yml +0 -10
  17. data/data/africa/3_more/en.wikipedia.yml +0 -30
  18. data/data/africa/3_more/lang.yml +0 -63
  19. data/data/africa/countries.txt +0 -92
  20. data/data/america/1_codes/fifa.yml +0 -41
  21. data/data/america/1_codes/internet.yml +0 -39
  22. data/data/america/1_codes/iso3.yml +0 -39
  23. data/data/america/1_codes/motor.yml +0 -26
  24. data/data/america/2_names/de.yml +0 -31
  25. data/data/america/2_names/es.yml +0 -30
  26. data/data/america/3_more/en.wikipedia.yml +0 -9
  27. data/data/america/3_more/lang.yml +0 -60
  28. data/data/america/br/regions.txt +0 -27
  29. data/data/america/ca/cities.txt +0 -48
  30. data/data/america/ca/regions.txt +0 -28
  31. data/data/america/countries.txt +0 -70
  32. data/data/america/mx/cities.txt +0 -32
  33. data/data/america/mx/regions.txt +0 -54
  34. data/data/america/us/cities.txt +0 -46
  35. data/data/america/us/regions.txt +0 -64
  36. data/data/america/ve/cities.txt +0 -358
  37. data/data/america/ve/regions.txt +0 -46
  38. data/data/asia/1_codes/fifa.yml +0 -53
  39. data/data/asia/1_codes/internet.yml +0 -51
  40. data/data/asia/1_codes/iso3.yml +0 -51
  41. data/data/asia/2_names/de.yml +0 -9
  42. data/data/asia/3_more/en.wikipedia.yml +0 -26
  43. data/data/asia/3_more/lang.yml +0 -55
  44. data/data/asia/countries.txt +0 -126
  45. data/data/asia/jp/cities.txt +0 -3
  46. data/data/europe/1_codes/fifa.yml +0 -60
  47. data/data/europe/1_codes/internet.yml +0 -59
  48. data/data/europe/1_codes/iso3.yml +0 -59
  49. data/data/europe/1_codes/motor.yml +0 -60
  50. data/data/europe/2_names/de.yml +0 -42
  51. data/data/europe/2_names/es.yml +0 -43
  52. data/data/europe/3_more/en.wikipedia.yml +0 -12
  53. data/data/europe/3_more/lang.yml +0 -47
  54. data/data/europe/at/cities.txt +0 -45
  55. data/data/europe/at/regions.txt +0 -26
  56. data/data/europe/be/cities.txt +0 -19
  57. data/data/europe/be/regions.txt +0 -23
  58. data/data/europe/bg/cities.txt +0 -4
  59. data/data/europe/by/cities.txt +0 -4
  60. data/data/europe/ch/cities.txt +0 -4
  61. data/data/europe/countries.txt +0 -123
  62. data/data/europe/cy/cities.txt +0 -1
  63. data/data/europe/cz/cities.txt +0 -26
  64. data/data/europe/cz/regions.txt +0 -32
  65. data/data/europe/de/cities.txt +0 -44
  66. data/data/europe/de/regions.txt +0 -16
  67. data/data/europe/dk/cities.txt +0 -6
  68. data/data/europe/ee/cities.txt +0 -2
  69. data/data/europe/en/cities.txt +0 -32
  70. data/data/europe/en/regions.txt +0 -21
  71. data/data/europe/es/cities.txt +0 -17
  72. data/data/europe/es/regions.txt +0 -26
  73. data/data/europe/fi/cities.txt +0 -2
  74. data/data/europe/fr/cities.txt +0 -15
  75. data/data/europe/fr/regions.txt +0 -32
  76. data/data/europe/gr/cities.txt +0 -6
  77. data/data/europe/hr/cities.txt +0 -1
  78. data/data/europe/hu/cities.txt +0 -2
  79. data/data/europe/ie/cities.txt +0 -3
  80. data/data/europe/it/cities.txt +0 -17
  81. data/data/europe/lt/cities.txt +0 -3
  82. data/data/europe/lv/cities.txt +0 -4
  83. data/data/europe/nl/cities.txt +0 -11
  84. data/data/europe/no/cities.txt +0 -3
  85. data/data/europe/pl/cities.txt +0 -12
  86. data/data/europe/pt/cities.txt +0 -6
  87. data/data/europe/ro/cities.txt +0 -4
  88. data/data/europe/rs/cities.txt +0 -3
  89. data/data/europe/ru/cities.txt +0 -14
  90. data/data/europe/sc/cities.txt +0 -3
  91. data/data/europe/se/cities.txt +0 -3
  92. data/data/europe/tr/cities.txt +0 -3
  93. data/data/europe/ua/cities.txt +0 -9
  94. data/data/europe/wa/cities.txt +0 -3
  95. data/data/langs.yml +0 -210
  96. data/data/oceania/1_codes/fifa.yml +0 -21
  97. data/data/oceania/1_codes/internet.yml +0 -17
  98. data/data/oceania/1_codes/iso3.yml +0 -17
  99. data/data/oceania/2_names/de.yml +0 -7
  100. data/data/oceania/3_more/en.wikipedia.yml +0 -11
  101. data/data/oceania/3_more/lang.yml +0 -17
  102. data/data/oceania/au/cities.txt +0 -2
  103. data/data/oceania/countries.txt +0 -40
  104. data/data/tags.1.yml +0 -40
  105. data/data/tags.3.yml +0 -23
  106. data/lib/worlddb/cli/runner.rb +0 -113
  107. data/lib/worlddb/readers/code_reader.rb +0 -34
  108. data/lib/worlddb/readers/hash_reader.rb +0 -81
  109. data/lib/worlddb/readers/line_reader.rb +0 -45
  110. data/lib/worlddb/readers/values_reader.rb +0 -171
@@ -1,171 +0,0 @@
1
- # encoding: utf-8
2
-
3
- class ValuesReader
4
-
5
- def initialize( logger, path, more_values={} )
6
- ## todo: check - can we make logger=nil a default arg too?
7
- if logger.nil?
8
- @logger = Logger.new(STDOUT)
9
- @logger.level = Logger::INFO
10
- else
11
- @logger = logger
12
- end
13
-
14
- @path = path
15
-
16
- @more_values = more_values
17
-
18
- @data = File.read_utf8( @path )
19
- end
20
-
21
- attr_reader :logger
22
-
23
- def each_line
24
-
25
- @data.each_line do |line|
26
-
27
- if line =~ /^\s*#/
28
- # skip komments and do NOT copy to result (keep comments secret!)
29
- logger.debug 'skipping comment line'
30
- next
31
- end
32
-
33
- if line =~ /^\s*$/
34
- # kommentar oder leerzeile überspringen
35
- logger.debug 'skipping blank line'
36
- next
37
- end
38
-
39
-
40
- # pass 1) remove possible trailing eol comment
41
- ## e.g -> nyc, New York # Sample EOL Comment Here (with or without commas,,,,)
42
- ## becomes -> nyc, New York
43
-
44
- line = line.sub( /\s+#.+$/, '' )
45
-
46
- # pass 2) remove leading and trailing whitespace
47
-
48
- line = line.strip
49
-
50
- puts "line: >>#{line}<<"
51
-
52
- values = line.split(',')
53
-
54
- # pass 1) remove leading and trailing whitespace for values
55
-
56
- values = values.map { |value| value.strip }
57
-
58
- ##### todo remove support of comment column? (NB: must NOT include commas)
59
- # pass 2) remove comment columns
60
-
61
- values = values.select do |value|
62
- if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
63
- puts " removing column with value >>#{value}<<"
64
- false
65
- else
66
- true
67
- end
68
- end
69
-
70
- puts " values: >>#{values.join('<< >>')}<<"
71
-
72
-
73
- ### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
74
- ## either use keys or do NOT use keys; do NOT mix in a single fixture file
75
-
76
-
77
- ### support autogenerate key from first title value
78
- if values[0] =~ /^[a-z]{2,}$/ # if it looks like a key (only a-z lower case allowed); assume it's a key
79
- key_col = values[0]
80
- title_col = values[1]
81
- more_cols = values[2..-1]
82
- else
83
- key_col = '<auto>'
84
- title_col = values[0]
85
- more_cols = values[1..-1]
86
- end
87
-
88
- attribs = {}
89
-
90
- ## title (split of optional synonyms)
91
- # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
92
- titles = title_col.split('|')
93
-
94
- attribs[ :title ] = titles[0]
95
-
96
- ## add optional synonyms if present
97
- attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
98
-
99
- if key_col == '<auto>'
100
- ## autogenerate key from first title
101
- key_col = title_to_key( titles[0] )
102
- puts " autogen key >#{key_col}< from title >#{titles[0]}<"
103
- end
104
-
105
- attribs[ :key ] = key_col
106
-
107
- attribs = attribs.merge( @more_values ) # e.g. merge country_id and other defaults if present
108
-
109
- yield( attribs, more_cols )
110
-
111
- end # each lines
112
-
113
- end # method each_line
114
-
115
-
116
-
117
- def title_to_key( title )
118
-
119
- ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
120
- key = title.downcase
121
-
122
- ### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
123
- key = key.gsub( /\[.+\]/, '' )
124
-
125
- ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
126
- key = key.gsub( /\(.+\)/, '' )
127
-
128
- ## remove all whitespace and punctuation
129
- key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
130
-
131
- ## turn accented char into ascii look alike if possible
132
- ##
133
- ## todo: add some more
134
- ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
135
-
136
- alternatives = [
137
- ['ß', 'ss'],
138
- ['æ', 'ae'],
139
- ['ä', 'ae'],
140
- ['á', 'a' ], # e.g. Bogotá, Králové
141
- ['ã', 'a' ], # e.g São Paulo
142
- ['ă', 'a' ], # e.g. Chișinău
143
- ['é', 'e' ], # e.g. Vélez, Králové
144
- ['è', 'e' ], # e.g. Rivières
145
- ['ê', 'e' ], # e.g. Grêmio
146
- ['ě', 'e' ], # e.g. Budějovice
147
- ['ì', 'i' ], # e.g. Potosì
148
- ['í', 'i' ], # e.g. Ústí
149
- ['ñ', 'n' ], # e.g. Porteño
150
- ['ň', 'n' ], # e.g. Plzeň, Třeboň
151
- ['ö', 'oe'],
152
- ['ó', 'o' ], # e.g. Colón, Łódź, Kraków
153
- ['ř', 'r' ], # e.g. Třeboň
154
- ['ș', 's' ], # e.g. Chișinău
155
- ['ü', 'ue'],
156
- ['ú', 'u' ], # e.g. Fútbol
157
- ['ź', 'z' ], # e.g. Łódź
158
- ['Č', 'c' ], # e.g. České
159
- ['Ł', 'l' ], # e.g. Łódź
160
- ['Ú', 'u' ], # e.g. Ústí
161
- ]
162
-
163
- alternatives.each do |alt|
164
- key = key.gsub( alt[0], alt[1] )
165
- end
166
-
167
- key
168
- end # method title_to_key
169
-
170
-
171
- end # class ValuesReader