worlddb 0.8.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +4 -98
- data/Rakefile +1 -6
- data/lib/worlddb/cli/main.rb +170 -0
- data/lib/worlddb/cli/opts.rb +24 -50
- data/lib/worlddb/data/fixtures.rb +168 -0
- data/lib/worlddb/deleter.rb +26 -0
- data/lib/worlddb/reader.rb +8 -61
- data/lib/worlddb/stats.rb +30 -0
- data/lib/worlddb/utils.rb +0 -60
- data/lib/worlddb/version.rb +1 -2
- data/lib/worlddb.rb +24 -184
- metadata +30 -108
- data/data/africa/1_codes/fifa.yml +0 -59
- data/data/africa/1_codes/internet.yml +0 -64
- data/data/africa/1_codes/iso3.yml +0 -57
- data/data/africa/2_names/de.yml +0 -10
- data/data/africa/3_more/en.wikipedia.yml +0 -30
- data/data/africa/3_more/lang.yml +0 -63
- data/data/africa/countries.txt +0 -92
- data/data/america/1_codes/fifa.yml +0 -41
- data/data/america/1_codes/internet.yml +0 -39
- data/data/america/1_codes/iso3.yml +0 -39
- data/data/america/1_codes/motor.yml +0 -26
- data/data/america/2_names/de.yml +0 -31
- data/data/america/2_names/es.yml +0 -30
- data/data/america/3_more/en.wikipedia.yml +0 -9
- data/data/america/3_more/lang.yml +0 -60
- data/data/america/br/regions.txt +0 -27
- data/data/america/ca/cities.txt +0 -48
- data/data/america/ca/regions.txt +0 -28
- data/data/america/countries.txt +0 -70
- data/data/america/mx/cities.txt +0 -32
- data/data/america/mx/regions.txt +0 -54
- data/data/america/us/cities.txt +0 -46
- data/data/america/us/regions.txt +0 -64
- data/data/america/ve/cities.txt +0 -358
- data/data/america/ve/regions.txt +0 -46
- data/data/asia/1_codes/fifa.yml +0 -53
- data/data/asia/1_codes/internet.yml +0 -51
- data/data/asia/1_codes/iso3.yml +0 -51
- data/data/asia/2_names/de.yml +0 -9
- data/data/asia/3_more/en.wikipedia.yml +0 -26
- data/data/asia/3_more/lang.yml +0 -55
- data/data/asia/countries.txt +0 -126
- data/data/asia/jp/cities.txt +0 -3
- data/data/europe/1_codes/fifa.yml +0 -60
- data/data/europe/1_codes/internet.yml +0 -59
- data/data/europe/1_codes/iso3.yml +0 -59
- data/data/europe/1_codes/motor.yml +0 -60
- data/data/europe/2_names/de.yml +0 -42
- data/data/europe/2_names/es.yml +0 -43
- data/data/europe/3_more/en.wikipedia.yml +0 -12
- data/data/europe/3_more/lang.yml +0 -47
- data/data/europe/at/cities.txt +0 -45
- data/data/europe/at/regions.txt +0 -26
- data/data/europe/be/cities.txt +0 -19
- data/data/europe/be/regions.txt +0 -23
- data/data/europe/bg/cities.txt +0 -4
- data/data/europe/by/cities.txt +0 -4
- data/data/europe/ch/cities.txt +0 -4
- data/data/europe/countries.txt +0 -123
- data/data/europe/cy/cities.txt +0 -1
- data/data/europe/cz/cities.txt +0 -26
- data/data/europe/cz/regions.txt +0 -32
- data/data/europe/de/cities.txt +0 -44
- data/data/europe/de/regions.txt +0 -16
- data/data/europe/dk/cities.txt +0 -6
- data/data/europe/ee/cities.txt +0 -2
- data/data/europe/en/cities.txt +0 -32
- data/data/europe/en/regions.txt +0 -21
- data/data/europe/es/cities.txt +0 -17
- data/data/europe/es/regions.txt +0 -26
- data/data/europe/fi/cities.txt +0 -2
- data/data/europe/fr/cities.txt +0 -15
- data/data/europe/fr/regions.txt +0 -32
- data/data/europe/gr/cities.txt +0 -6
- data/data/europe/hr/cities.txt +0 -1
- data/data/europe/hu/cities.txt +0 -2
- data/data/europe/ie/cities.txt +0 -3
- data/data/europe/it/cities.txt +0 -17
- data/data/europe/lt/cities.txt +0 -3
- data/data/europe/lv/cities.txt +0 -4
- data/data/europe/nl/cities.txt +0 -11
- data/data/europe/no/cities.txt +0 -3
- data/data/europe/pl/cities.txt +0 -12
- data/data/europe/pt/cities.txt +0 -6
- data/data/europe/ro/cities.txt +0 -4
- data/data/europe/rs/cities.txt +0 -3
- data/data/europe/ru/cities.txt +0 -14
- data/data/europe/sc/cities.txt +0 -3
- data/data/europe/se/cities.txt +0 -3
- data/data/europe/tr/cities.txt +0 -3
- data/data/europe/ua/cities.txt +0 -9
- data/data/europe/wa/cities.txt +0 -3
- data/data/langs.yml +0 -210
- data/data/oceania/1_codes/fifa.yml +0 -21
- data/data/oceania/1_codes/internet.yml +0 -17
- data/data/oceania/1_codes/iso3.yml +0 -17
- data/data/oceania/2_names/de.yml +0 -7
- data/data/oceania/3_more/en.wikipedia.yml +0 -11
- data/data/oceania/3_more/lang.yml +0 -17
- data/data/oceania/au/cities.txt +0 -2
- data/data/oceania/countries.txt +0 -40
- data/data/tags.1.yml +0 -40
- data/data/tags.3.yml +0 -23
- data/lib/worlddb/cli/runner.rb +0 -113
- data/lib/worlddb/readers/code_reader.rb +0 -34
- data/lib/worlddb/readers/hash_reader.rb +0 -81
- data/lib/worlddb/readers/line_reader.rb +0 -45
- data/lib/worlddb/readers/values_reader.rb +0 -171
|
@@ -1,171 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
|
|
3
|
-
class ValuesReader
|
|
4
|
-
|
|
5
|
-
def initialize( logger, path, more_values={} )
|
|
6
|
-
## todo: check - can we make logger=nil a default arg too?
|
|
7
|
-
if logger.nil?
|
|
8
|
-
@logger = Logger.new(STDOUT)
|
|
9
|
-
@logger.level = Logger::INFO
|
|
10
|
-
else
|
|
11
|
-
@logger = logger
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
@path = path
|
|
15
|
-
|
|
16
|
-
@more_values = more_values
|
|
17
|
-
|
|
18
|
-
@data = File.read_utf8( @path )
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
attr_reader :logger
|
|
22
|
-
|
|
23
|
-
def each_line
|
|
24
|
-
|
|
25
|
-
@data.each_line do |line|
|
|
26
|
-
|
|
27
|
-
if line =~ /^\s*#/
|
|
28
|
-
# skip komments and do NOT copy to result (keep comments secret!)
|
|
29
|
-
logger.debug 'skipping comment line'
|
|
30
|
-
next
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
if line =~ /^\s*$/
|
|
34
|
-
# kommentar oder leerzeile überspringen
|
|
35
|
-
logger.debug 'skipping blank line'
|
|
36
|
-
next
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
# pass 1) remove possible trailing eol comment
|
|
41
|
-
## e.g -> nyc, New York # Sample EOL Comment Here (with or without commas,,,,)
|
|
42
|
-
## becomes -> nyc, New York
|
|
43
|
-
|
|
44
|
-
line = line.sub( /\s+#.+$/, '' )
|
|
45
|
-
|
|
46
|
-
# pass 2) remove leading and trailing whitespace
|
|
47
|
-
|
|
48
|
-
line = line.strip
|
|
49
|
-
|
|
50
|
-
puts "line: >>#{line}<<"
|
|
51
|
-
|
|
52
|
-
values = line.split(',')
|
|
53
|
-
|
|
54
|
-
# pass 1) remove leading and trailing whitespace for values
|
|
55
|
-
|
|
56
|
-
values = values.map { |value| value.strip }
|
|
57
|
-
|
|
58
|
-
##### todo remove support of comment column? (NB: must NOT include commas)
|
|
59
|
-
# pass 2) remove comment columns
|
|
60
|
-
|
|
61
|
-
values = values.select do |value|
|
|
62
|
-
if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
|
|
63
|
-
puts " removing column with value >>#{value}<<"
|
|
64
|
-
false
|
|
65
|
-
else
|
|
66
|
-
true
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
puts " values: >>#{values.join('<< >>')}<<"
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
|
|
74
|
-
## either use keys or do NOT use keys; do NOT mix in a single fixture file
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
### support autogenerate key from first title value
|
|
78
|
-
if values[0] =~ /^[a-z]{2,}$/ # if it looks like a key (only a-z lower case allowed); assume it's a key
|
|
79
|
-
key_col = values[0]
|
|
80
|
-
title_col = values[1]
|
|
81
|
-
more_cols = values[2..-1]
|
|
82
|
-
else
|
|
83
|
-
key_col = '<auto>'
|
|
84
|
-
title_col = values[0]
|
|
85
|
-
more_cols = values[1..-1]
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
attribs = {}
|
|
89
|
-
|
|
90
|
-
## title (split of optional synonyms)
|
|
91
|
-
# e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
|
|
92
|
-
titles = title_col.split('|')
|
|
93
|
-
|
|
94
|
-
attribs[ :title ] = titles[0]
|
|
95
|
-
|
|
96
|
-
## add optional synonyms if present
|
|
97
|
-
attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
|
|
98
|
-
|
|
99
|
-
if key_col == '<auto>'
|
|
100
|
-
## autogenerate key from first title
|
|
101
|
-
key_col = title_to_key( titles[0] )
|
|
102
|
-
puts " autogen key >#{key_col}< from title >#{titles[0]}<"
|
|
103
|
-
end
|
|
104
|
-
|
|
105
|
-
attribs[ :key ] = key_col
|
|
106
|
-
|
|
107
|
-
attribs = attribs.merge( @more_values ) # e.g. merge country_id and other defaults if present
|
|
108
|
-
|
|
109
|
-
yield( attribs, more_cols )
|
|
110
|
-
|
|
111
|
-
end # each lines
|
|
112
|
-
|
|
113
|
-
end # method each_line
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def title_to_key( title )
|
|
118
|
-
|
|
119
|
-
## NB: downcase does NOT work for accented chars (thus, include in alternatives)
|
|
120
|
-
key = title.downcase
|
|
121
|
-
|
|
122
|
-
### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
|
|
123
|
-
key = key.gsub( /\[.+\]/, '' )
|
|
124
|
-
|
|
125
|
-
## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
|
|
126
|
-
key = key.gsub( /\(.+\)/, '' )
|
|
127
|
-
|
|
128
|
-
## remove all whitespace and punctuation
|
|
129
|
-
key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
|
|
130
|
-
|
|
131
|
-
## turn accented char into ascii look alike if possible
|
|
132
|
-
##
|
|
133
|
-
## todo: add some more
|
|
134
|
-
## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
|
|
135
|
-
|
|
136
|
-
alternatives = [
|
|
137
|
-
['ß', 'ss'],
|
|
138
|
-
['æ', 'ae'],
|
|
139
|
-
['ä', 'ae'],
|
|
140
|
-
['á', 'a' ], # e.g. Bogotá, Králové
|
|
141
|
-
['ã', 'a' ], # e.g São Paulo
|
|
142
|
-
['ă', 'a' ], # e.g. Chișinău
|
|
143
|
-
['é', 'e' ], # e.g. Vélez, Králové
|
|
144
|
-
['è', 'e' ], # e.g. Rivières
|
|
145
|
-
['ê', 'e' ], # e.g. Grêmio
|
|
146
|
-
['ě', 'e' ], # e.g. Budějovice
|
|
147
|
-
['ì', 'i' ], # e.g. Potosì
|
|
148
|
-
['í', 'i' ], # e.g. Ústí
|
|
149
|
-
['ñ', 'n' ], # e.g. Porteño
|
|
150
|
-
['ň', 'n' ], # e.g. Plzeň, Třeboň
|
|
151
|
-
['ö', 'oe'],
|
|
152
|
-
['ó', 'o' ], # e.g. Colón, Łódź, Kraków
|
|
153
|
-
['ř', 'r' ], # e.g. Třeboň
|
|
154
|
-
['ș', 's' ], # e.g. Chișinău
|
|
155
|
-
['ü', 'ue'],
|
|
156
|
-
['ú', 'u' ], # e.g. Fútbol
|
|
157
|
-
['ź', 'z' ], # e.g. Łódź
|
|
158
|
-
['Č', 'c' ], # e.g. České
|
|
159
|
-
['Ł', 'l' ], # e.g. Łódź
|
|
160
|
-
['Ú', 'u' ], # e.g. Ústí
|
|
161
|
-
]
|
|
162
|
-
|
|
163
|
-
alternatives.each do |alt|
|
|
164
|
-
key = key.gsub( alt[0], alt[1] )
|
|
165
|
-
end
|
|
166
|
-
|
|
167
|
-
key
|
|
168
|
-
end # method title_to_key
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
end # class ValuesReader
|