textutils 0.5.2 → 0.5.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/textutils/reader/values_reader.rb +37 -6
- data/lib/textutils/version.rb +1 -1
- metadata +8 -8
@@ -16,13 +16,21 @@ class ValuesReader
|
|
16
16
|
|
17
17
|
@data.each_line do |line|
|
18
18
|
|
19
|
-
|
19
|
+
## allow alternative comment lines
|
20
|
+
## e.g. -- comment or
|
21
|
+
## % comment
|
22
|
+
## why? # might get used by markdown for marking headers, for example
|
23
|
+
|
24
|
+
## NB: for now alternative comment lines not allowed as end of line style e.g
|
25
|
+
## some data, more data -- comment here
|
26
|
+
|
27
|
+
if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
|
20
28
|
# skip komments and do NOT copy to result (keep comments secret!)
|
21
29
|
logger.debug 'skipping comment line'
|
22
30
|
next
|
23
31
|
end
|
24
|
-
|
25
|
-
if line =~ /^\s*$/
|
32
|
+
|
33
|
+
if line =~ /^\s*$/
|
26
34
|
# kommentar oder leerzeile überspringen
|
27
35
|
logger.debug 'skipping blank line'
|
28
36
|
next
|
@@ -39,9 +47,19 @@ class ValuesReader
|
|
39
47
|
|
40
48
|
line = line.strip
|
41
49
|
|
50
|
+
### guard escaped commas (e.g. \,)
|
51
|
+
line = line.gsub( '\,', '@commma@' )
|
52
|
+
|
53
|
+
## use generic separator (allow us to configure separator)
|
54
|
+
line = line.gsub( ',', '@sep@')
|
55
|
+
|
56
|
+
## restore escaped commas (before split)
|
57
|
+
line = line.gsub( '@commma@', ',' )
|
58
|
+
|
59
|
+
|
42
60
|
logger.debug "line: >>#{line}<<"
|
43
61
|
|
44
|
-
values = line.split('
|
62
|
+
values = line.split( '@sep@' )
|
45
63
|
|
46
64
|
# pass 1) remove leading and trailing whitespace for values
|
47
65
|
|
@@ -71,8 +89,8 @@ class ValuesReader
|
|
71
89
|
# if it looks like a key (only a-z lower case allowed); assume it's a key
|
72
90
|
# - also allow . in keys e.g. world.quali.america, at.cup, etc.
|
73
91
|
# - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
|
74
|
-
|
75
|
-
if values[0] =~ /^[a-z][a-z0-9.]*[a-z0-9]$/ # NB:
|
92
|
+
|
93
|
+
if values[0] =~ /^[a-z][a-z0-9.]*[a-z0-9]|[a-z]$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
|
76
94
|
key_col = values[0]
|
77
95
|
title_col = values[1]
|
78
96
|
more_cols = values[2..-1]
|
@@ -121,9 +139,16 @@ class ValuesReader
|
|
121
139
|
|
122
140
|
## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
|
123
141
|
key = key.gsub( /\(.+\)/, '' )
|
142
|
+
|
143
|
+
## remove optional longer title part in {} e.g. Ottakringer {Bio} or {Alkoholfrei}
|
144
|
+
## todo: use for autotags? e.g. {Bio} => bio
|
145
|
+
key = key.gsub( /\{.+\}/, '' )
|
124
146
|
|
125
147
|
## remove all whitespace and punctuation
|
126
148
|
key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
|
149
|
+
|
150
|
+
## remove special chars (e.g. %˚)
|
151
|
+
key = key.gsub( /[%˚]/, '' )
|
127
152
|
|
128
153
|
## turn accented char into ascii look alike if possible
|
129
154
|
##
|
@@ -145,10 +170,12 @@ class ValuesReader
|
|
145
170
|
['ą', 'a' ], # e.g. Śląsk
|
146
171
|
['ç', 'c' ], # e.g. São Gonçalo, Iguaçu, Neftçi
|
147
172
|
['ć', 'c' ], # e.g. Budućnost
|
173
|
+
['č', 'c' ], # e.g. Tradiční, Výčepní
|
148
174
|
['é', 'e' ], # e.g. Vélez, Králové
|
149
175
|
['è', 'e' ], # e.g. Rivières
|
150
176
|
['ê', 'e' ], # e.g. Grêmio
|
151
177
|
['ě', 'e' ], # e.g. Budějovice
|
178
|
+
['ĕ', 'e' ], # e.g. Svĕtlý
|
152
179
|
['ė', 'e' ], # e.g. Vėtra
|
153
180
|
['ë', 'e' ], # e.g. Skënderbeu
|
154
181
|
['ğ', 'g' ], # e.g. Qarabağ
|
@@ -166,10 +193,13 @@ class ValuesReader
|
|
166
193
|
['ș', 's' ], # e.g. Chișinău, București
|
167
194
|
['ş', 's' ], # e.g. Beşiktaş
|
168
195
|
['š', 's' ], # e.g. Košice
|
196
|
+
['ť', 't' ], # e.g. Měšťan
|
169
197
|
['ü', 'ue'],
|
170
198
|
['ú', 'u' ], # e.g. Fútbol
|
171
199
|
['ū', 'u' ], # e.g. Sūduva
|
200
|
+
['ů', 'u' ], # e.g. Sládkův
|
172
201
|
['ı', 'u' ], # e.g. Bakı # use u?? (Baku) why-why not?
|
202
|
+
['ý', 'y' ], # e.g. Nefitrovaný
|
173
203
|
['ź', 'z' ], # e.g. Łódź
|
174
204
|
['ž', 'z' ], # e.g. Domžale, Petržalka
|
175
205
|
|
@@ -178,6 +208,7 @@ class ValuesReader
|
|
178
208
|
['Í', 'i' ], # e.g. ÍBV
|
179
209
|
['Ł', 'l' ], # e.g. Łódź
|
180
210
|
['Ö', 'oe' ], # e.g. Örebro
|
211
|
+
['Ř', 'r' ], # e.g. Řezák
|
181
212
|
['Ś', 's' ], # e.g. Śląsk
|
182
213
|
['Š', 's' ], # e.g. MŠK
|
183
214
|
['Ş', 's' ], # e.g. Şüvälan
|
data/lib/textutils/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-04-23 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &81154230 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *81154230
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &81170390 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '3.10'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *81170390
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &81170170 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '3.3'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *81170170
|
47
47
|
description: textutils - Text Filters, Helpers, Readers and More
|
48
48
|
email: webslideshow@googlegroups.com
|
49
49
|
executables: []
|