worlddb 0.8.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest.txt +4 -98
- data/Rakefile +1 -6
- data/lib/worlddb/cli/main.rb +170 -0
- data/lib/worlddb/cli/opts.rb +24 -50
- data/lib/worlddb/data/fixtures.rb +168 -0
- data/lib/worlddb/deleter.rb +26 -0
- data/lib/worlddb/reader.rb +8 -61
- data/lib/worlddb/stats.rb +30 -0
- data/lib/worlddb/utils.rb +0 -60
- data/lib/worlddb/version.rb +1 -2
- data/lib/worlddb.rb +24 -184
- metadata +30 -108
- data/data/africa/1_codes/fifa.yml +0 -59
- data/data/africa/1_codes/internet.yml +0 -64
- data/data/africa/1_codes/iso3.yml +0 -57
- data/data/africa/2_names/de.yml +0 -10
- data/data/africa/3_more/en.wikipedia.yml +0 -30
- data/data/africa/3_more/lang.yml +0 -63
- data/data/africa/countries.txt +0 -92
- data/data/america/1_codes/fifa.yml +0 -41
- data/data/america/1_codes/internet.yml +0 -39
- data/data/america/1_codes/iso3.yml +0 -39
- data/data/america/1_codes/motor.yml +0 -26
- data/data/america/2_names/de.yml +0 -31
- data/data/america/2_names/es.yml +0 -30
- data/data/america/3_more/en.wikipedia.yml +0 -9
- data/data/america/3_more/lang.yml +0 -60
- data/data/america/br/regions.txt +0 -27
- data/data/america/ca/cities.txt +0 -48
- data/data/america/ca/regions.txt +0 -28
- data/data/america/countries.txt +0 -70
- data/data/america/mx/cities.txt +0 -32
- data/data/america/mx/regions.txt +0 -54
- data/data/america/us/cities.txt +0 -46
- data/data/america/us/regions.txt +0 -64
- data/data/america/ve/cities.txt +0 -358
- data/data/america/ve/regions.txt +0 -46
- data/data/asia/1_codes/fifa.yml +0 -53
- data/data/asia/1_codes/internet.yml +0 -51
- data/data/asia/1_codes/iso3.yml +0 -51
- data/data/asia/2_names/de.yml +0 -9
- data/data/asia/3_more/en.wikipedia.yml +0 -26
- data/data/asia/3_more/lang.yml +0 -55
- data/data/asia/countries.txt +0 -126
- data/data/asia/jp/cities.txt +0 -3
- data/data/europe/1_codes/fifa.yml +0 -60
- data/data/europe/1_codes/internet.yml +0 -59
- data/data/europe/1_codes/iso3.yml +0 -59
- data/data/europe/1_codes/motor.yml +0 -60
- data/data/europe/2_names/de.yml +0 -42
- data/data/europe/2_names/es.yml +0 -43
- data/data/europe/3_more/en.wikipedia.yml +0 -12
- data/data/europe/3_more/lang.yml +0 -47
- data/data/europe/at/cities.txt +0 -45
- data/data/europe/at/regions.txt +0 -26
- data/data/europe/be/cities.txt +0 -19
- data/data/europe/be/regions.txt +0 -23
- data/data/europe/bg/cities.txt +0 -4
- data/data/europe/by/cities.txt +0 -4
- data/data/europe/ch/cities.txt +0 -4
- data/data/europe/countries.txt +0 -123
- data/data/europe/cy/cities.txt +0 -1
- data/data/europe/cz/cities.txt +0 -26
- data/data/europe/cz/regions.txt +0 -32
- data/data/europe/de/cities.txt +0 -44
- data/data/europe/de/regions.txt +0 -16
- data/data/europe/dk/cities.txt +0 -6
- data/data/europe/ee/cities.txt +0 -2
- data/data/europe/en/cities.txt +0 -32
- data/data/europe/en/regions.txt +0 -21
- data/data/europe/es/cities.txt +0 -17
- data/data/europe/es/regions.txt +0 -26
- data/data/europe/fi/cities.txt +0 -2
- data/data/europe/fr/cities.txt +0 -15
- data/data/europe/fr/regions.txt +0 -32
- data/data/europe/gr/cities.txt +0 -6
- data/data/europe/hr/cities.txt +0 -1
- data/data/europe/hu/cities.txt +0 -2
- data/data/europe/ie/cities.txt +0 -3
- data/data/europe/it/cities.txt +0 -17
- data/data/europe/lt/cities.txt +0 -3
- data/data/europe/lv/cities.txt +0 -4
- data/data/europe/nl/cities.txt +0 -11
- data/data/europe/no/cities.txt +0 -3
- data/data/europe/pl/cities.txt +0 -12
- data/data/europe/pt/cities.txt +0 -6
- data/data/europe/ro/cities.txt +0 -4
- data/data/europe/rs/cities.txt +0 -3
- data/data/europe/ru/cities.txt +0 -14
- data/data/europe/sc/cities.txt +0 -3
- data/data/europe/se/cities.txt +0 -3
- data/data/europe/tr/cities.txt +0 -3
- data/data/europe/ua/cities.txt +0 -9
- data/data/europe/wa/cities.txt +0 -3
- data/data/langs.yml +0 -210
- data/data/oceania/1_codes/fifa.yml +0 -21
- data/data/oceania/1_codes/internet.yml +0 -17
- data/data/oceania/1_codes/iso3.yml +0 -17
- data/data/oceania/2_names/de.yml +0 -7
- data/data/oceania/3_more/en.wikipedia.yml +0 -11
- data/data/oceania/3_more/lang.yml +0 -17
- data/data/oceania/au/cities.txt +0 -2
- data/data/oceania/countries.txt +0 -40
- data/data/tags.1.yml +0 -40
- data/data/tags.3.yml +0 -23
- data/lib/worlddb/cli/runner.rb +0 -113
- data/lib/worlddb/readers/code_reader.rb +0 -34
- data/lib/worlddb/readers/hash_reader.rb +0 -81
- data/lib/worlddb/readers/line_reader.rb +0 -45
- data/lib/worlddb/readers/values_reader.rb +0 -171
@@ -1,171 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
class ValuesReader
|
4
|
-
|
5
|
-
def initialize( logger, path, more_values={} )
|
6
|
-
## todo: check - can we make logger=nil a default arg too?
|
7
|
-
if logger.nil?
|
8
|
-
@logger = Logger.new(STDOUT)
|
9
|
-
@logger.level = Logger::INFO
|
10
|
-
else
|
11
|
-
@logger = logger
|
12
|
-
end
|
13
|
-
|
14
|
-
@path = path
|
15
|
-
|
16
|
-
@more_values = more_values
|
17
|
-
|
18
|
-
@data = File.read_utf8( @path )
|
19
|
-
end
|
20
|
-
|
21
|
-
attr_reader :logger
|
22
|
-
|
23
|
-
def each_line
|
24
|
-
|
25
|
-
@data.each_line do |line|
|
26
|
-
|
27
|
-
if line =~ /^\s*#/
|
28
|
-
# skip komments and do NOT copy to result (keep comments secret!)
|
29
|
-
logger.debug 'skipping comment line'
|
30
|
-
next
|
31
|
-
end
|
32
|
-
|
33
|
-
if line =~ /^\s*$/
|
34
|
-
# kommentar oder leerzeile überspringen
|
35
|
-
logger.debug 'skipping blank line'
|
36
|
-
next
|
37
|
-
end
|
38
|
-
|
39
|
-
|
40
|
-
# pass 1) remove possible trailing eol comment
|
41
|
-
## e.g -> nyc, New York # Sample EOL Comment Here (with or without commas,,,,)
|
42
|
-
## becomes -> nyc, New York
|
43
|
-
|
44
|
-
line = line.sub( /\s+#.+$/, '' )
|
45
|
-
|
46
|
-
# pass 2) remove leading and trailing whitespace
|
47
|
-
|
48
|
-
line = line.strip
|
49
|
-
|
50
|
-
puts "line: >>#{line}<<"
|
51
|
-
|
52
|
-
values = line.split(',')
|
53
|
-
|
54
|
-
# pass 1) remove leading and trailing whitespace for values
|
55
|
-
|
56
|
-
values = values.map { |value| value.strip }
|
57
|
-
|
58
|
-
##### todo remove support of comment column? (NB: must NOT include commas)
|
59
|
-
# pass 2) remove comment columns
|
60
|
-
|
61
|
-
values = values.select do |value|
|
62
|
-
if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
|
63
|
-
puts " removing column with value >>#{value}<<"
|
64
|
-
false
|
65
|
-
else
|
66
|
-
true
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
puts " values: >>#{values.join('<< >>')}<<"
|
71
|
-
|
72
|
-
|
73
|
-
### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
|
74
|
-
## either use keys or do NOT use keys; do NOT mix in a single fixture file
|
75
|
-
|
76
|
-
|
77
|
-
### support autogenerate key from first title value
|
78
|
-
if values[0] =~ /^[a-z]{2,}$/ # if it looks like a key (only a-z lower case allowed); assume it's a key
|
79
|
-
key_col = values[0]
|
80
|
-
title_col = values[1]
|
81
|
-
more_cols = values[2..-1]
|
82
|
-
else
|
83
|
-
key_col = '<auto>'
|
84
|
-
title_col = values[0]
|
85
|
-
more_cols = values[1..-1]
|
86
|
-
end
|
87
|
-
|
88
|
-
attribs = {}
|
89
|
-
|
90
|
-
## title (split of optional synonyms)
|
91
|
-
# e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
|
92
|
-
titles = title_col.split('|')
|
93
|
-
|
94
|
-
attribs[ :title ] = titles[0]
|
95
|
-
|
96
|
-
## add optional synonyms if present
|
97
|
-
attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
|
98
|
-
|
99
|
-
if key_col == '<auto>'
|
100
|
-
## autogenerate key from first title
|
101
|
-
key_col = title_to_key( titles[0] )
|
102
|
-
puts " autogen key >#{key_col}< from title >#{titles[0]}<"
|
103
|
-
end
|
104
|
-
|
105
|
-
attribs[ :key ] = key_col
|
106
|
-
|
107
|
-
attribs = attribs.merge( @more_values ) # e.g. merge country_id and other defaults if present
|
108
|
-
|
109
|
-
yield( attribs, more_cols )
|
110
|
-
|
111
|
-
end # each lines
|
112
|
-
|
113
|
-
end # method each_line
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
def title_to_key( title )
|
118
|
-
|
119
|
-
## NB: downcase does NOT work for accented chars (thus, include in alternatives)
|
120
|
-
key = title.downcase
|
121
|
-
|
122
|
-
### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
|
123
|
-
key = key.gsub( /\[.+\]/, '' )
|
124
|
-
|
125
|
-
## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
|
126
|
-
key = key.gsub( /\(.+\)/, '' )
|
127
|
-
|
128
|
-
## remove all whitespace and punctuation
|
129
|
-
key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
|
130
|
-
|
131
|
-
## turn accented char into ascii look alike if possible
|
132
|
-
##
|
133
|
-
## todo: add some more
|
134
|
-
## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
|
135
|
-
|
136
|
-
alternatives = [
|
137
|
-
['ß', 'ss'],
|
138
|
-
['æ', 'ae'],
|
139
|
-
['ä', 'ae'],
|
140
|
-
['á', 'a' ], # e.g. Bogotá, Králové
|
141
|
-
['ã', 'a' ], # e.g São Paulo
|
142
|
-
['ă', 'a' ], # e.g. Chișinău
|
143
|
-
['é', 'e' ], # e.g. Vélez, Králové
|
144
|
-
['è', 'e' ], # e.g. Rivières
|
145
|
-
['ê', 'e' ], # e.g. Grêmio
|
146
|
-
['ě', 'e' ], # e.g. Budějovice
|
147
|
-
['ì', 'i' ], # e.g. Potosì
|
148
|
-
['í', 'i' ], # e.g. Ústí
|
149
|
-
['ñ', 'n' ], # e.g. Porteño
|
150
|
-
['ň', 'n' ], # e.g. Plzeň, Třeboň
|
151
|
-
['ö', 'oe'],
|
152
|
-
['ó', 'o' ], # e.g. Colón, Łódź, Kraków
|
153
|
-
['ř', 'r' ], # e.g. Třeboň
|
154
|
-
['ș', 's' ], # e.g. Chișinău
|
155
|
-
['ü', 'ue'],
|
156
|
-
['ú', 'u' ], # e.g. Fútbol
|
157
|
-
['ź', 'z' ], # e.g. Łódź
|
158
|
-
['Č', 'c' ], # e.g. České
|
159
|
-
['Ł', 'l' ], # e.g. Łódź
|
160
|
-
['Ú', 'u' ], # e.g. Ústí
|
161
|
-
]
|
162
|
-
|
163
|
-
alternatives.each do |alt|
|
164
|
-
key = key.gsub( alt[0], alt[1] )
|
165
|
-
end
|
166
|
-
|
167
|
-
key
|
168
|
-
end # method title_to_key
|
169
|
-
|
170
|
-
|
171
|
-
end # class ValuesReader
|