textutils 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/textutils/reader/values_reader.rb +37 -6
- data/lib/textutils/version.rb +1 -1
- metadata +8 -8
@@ -16,13 +16,21 @@ class ValuesReader
|
|
16
16
|
|
17
17
|
@data.each_line do |line|
|
18
18
|
|
19
|
-
|
19
|
+
## allow alternative comment lines
|
20
|
+
## e.g. -- comment or
|
21
|
+
## % comment
|
22
|
+
## why? # might get used by markdown for marking headers, for example
|
23
|
+
|
24
|
+
## NB: for now alternative comment lines not allowed as end of line style e.g
|
25
|
+
## some data, more data -- comment here
|
26
|
+
|
27
|
+
if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
|
20
28
|
# skip komments and do NOT copy to result (keep comments secret!)
|
21
29
|
logger.debug 'skipping comment line'
|
22
30
|
next
|
23
31
|
end
|
24
|
-
|
25
|
-
if line =~ /^\s*$/
|
32
|
+
|
33
|
+
if line =~ /^\s*$/
|
26
34
|
# kommentar oder leerzeile überspringen
|
27
35
|
logger.debug 'skipping blank line'
|
28
36
|
next
|
@@ -39,9 +47,19 @@ class ValuesReader
|
|
39
47
|
|
40
48
|
line = line.strip
|
41
49
|
|
50
|
+
### guard escaped commas (e.g. \,)
|
51
|
+
line = line.gsub( '\,', '@commma@' )
|
52
|
+
|
53
|
+
## use generic separator (allow us to configure separator)
|
54
|
+
line = line.gsub( ',', '@sep@')
|
55
|
+
|
56
|
+
## restore escaped commas (before split)
|
57
|
+
line = line.gsub( '@commma@', ',' )
|
58
|
+
|
59
|
+
|
42
60
|
logger.debug "line: >>#{line}<<"
|
43
61
|
|
44
|
-
values = line.split('
|
62
|
+
values = line.split( '@sep@' )
|
45
63
|
|
46
64
|
# pass 1) remove leading and trailing whitespace for values
|
47
65
|
|
@@ -71,8 +89,8 @@ class ValuesReader
|
|
71
89
|
# if it looks like a key (only a-z lower case allowed); assume it's a key
|
72
90
|
# - also allow . in keys e.g. world.quali.america, at.cup, etc.
|
73
91
|
# - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
|
74
|
-
|
75
|
-
if values[0] =~ /^[a-z][a-z0-9.]*[a-z0-9]$/ # NB:
|
92
|
+
|
93
|
+
if values[0] =~ /^[a-z][a-z0-9.]*[a-z0-9]|[a-z]$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
|
76
94
|
key_col = values[0]
|
77
95
|
title_col = values[1]
|
78
96
|
more_cols = values[2..-1]
|
@@ -121,9 +139,16 @@ class ValuesReader
|
|
121
139
|
|
122
140
|
## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
|
123
141
|
key = key.gsub( /\(.+\)/, '' )
|
142
|
+
|
143
|
+
## remove optional longer title part in {} e.g. Ottakringer {Bio} or {Alkoholfrei}
|
144
|
+
## todo: use for autotags? e.g. {Bio} => bio
|
145
|
+
key = key.gsub( /\{.+\}/, '' )
|
124
146
|
|
125
147
|
## remove all whitespace and punctuation
|
126
148
|
key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
|
149
|
+
|
150
|
+
## remove special chars (e.g. %˚)
|
151
|
+
key = key.gsub( /[%˚]/, '' )
|
127
152
|
|
128
153
|
## turn accented char into ascii look alike if possible
|
129
154
|
##
|
@@ -145,10 +170,12 @@ class ValuesReader
|
|
145
170
|
['ą', 'a' ], # e.g. Śląsk
|
146
171
|
['ç', 'c' ], # e.g. São Gonçalo, Iguaçu, Neftçi
|
147
172
|
['ć', 'c' ], # e.g. Budućnost
|
173
|
+
['č', 'c' ], # e.g. Tradiční, Výčepní
|
148
174
|
['é', 'e' ], # e.g. Vélez, Králové
|
149
175
|
['è', 'e' ], # e.g. Rivières
|
150
176
|
['ê', 'e' ], # e.g. Grêmio
|
151
177
|
['ě', 'e' ], # e.g. Budějovice
|
178
|
+
['ĕ', 'e' ], # e.g. Svĕtlý
|
152
179
|
['ė', 'e' ], # e.g. Vėtra
|
153
180
|
['ë', 'e' ], # e.g. Skënderbeu
|
154
181
|
['ğ', 'g' ], # e.g. Qarabağ
|
@@ -166,10 +193,13 @@ class ValuesReader
|
|
166
193
|
['ș', 's' ], # e.g. Chișinău, București
|
167
194
|
['ş', 's' ], # e.g. Beşiktaş
|
168
195
|
['š', 's' ], # e.g. Košice
|
196
|
+
['ť', 't' ], # e.g. Měšťan
|
169
197
|
['ü', 'ue'],
|
170
198
|
['ú', 'u' ], # e.g. Fútbol
|
171
199
|
['ū', 'u' ], # e.g. Sūduva
|
200
|
+
['ů', 'u' ], # e.g. Sládkův
|
172
201
|
['ı', 'u' ], # e.g. Bakı # use u?? (Baku) why-why not?
|
202
|
+
['ý', 'y' ], # e.g. Nefitrovaný
|
173
203
|
['ź', 'z' ], # e.g. Łódź
|
174
204
|
['ž', 'z' ], # e.g. Domžale, Petržalka
|
175
205
|
|
@@ -178,6 +208,7 @@ class ValuesReader
|
|
178
208
|
['Í', 'i' ], # e.g. ÍBV
|
179
209
|
['Ł', 'l' ], # e.g. Łódź
|
180
210
|
['Ö', 'oe' ], # e.g. Örebro
|
211
|
+
['Ř', 'r' ], # e.g. Řezák
|
181
212
|
['Ś', 's' ], # e.g. Śląsk
|
182
213
|
['Š', 's' ], # e.g. MŠK
|
183
214
|
['Ş', 's' ], # e.g. Şüvälan
|
data/lib/textutils/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-04-23 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &81154230 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *81154230
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &81170390 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '3.10'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *81170390
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &81170170 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '3.3'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *81170170
|
47
47
|
description: textutils - Text Filters, Helpers, Readers and More
|
48
48
|
email: webslideshow@googlegroups.com
|
49
49
|
executables: []
|