textutils 0.5.2 → 0.5.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -16,13 +16,21 @@ class ValuesReader
16
16
 
17
17
  @data.each_line do |line|
18
18
 
19
- if line =~ /^\s*#/
19
+ ## allow alternative comment lines
20
+ ## e.g. -- comment or
21
+ ## % comment
22
+ ## why? # might get used by markdown for marking headers, for example
23
+
24
+ ## NB: for now alternative comment lines not allowed as end of line style e.g
25
+ ## some data, more data -- comment here
26
+
27
+ if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
20
28
  # skip komments and do NOT copy to result (keep comments secret!)
21
29
  logger.debug 'skipping comment line'
22
30
  next
23
31
  end
24
-
25
- if line =~ /^\s*$/
32
+
33
+ if line =~ /^\s*$/
26
34
  # kommentar oder leerzeile überspringen
27
35
  logger.debug 'skipping blank line'
28
36
  next
@@ -39,9 +47,19 @@ class ValuesReader
39
47
 
40
48
  line = line.strip
41
49
 
50
+ ### guard escaped commas (e.g. \,)
51
+ line = line.gsub( '\,', '@commma@' )
52
+
53
+ ## use generic separator (allow us to configure separator)
54
+ line = line.gsub( ',', '@sep@')
55
+
56
+ ## restore escaped commas (before split)
57
+ line = line.gsub( '@commma@', ',' )
58
+
59
+
42
60
  logger.debug "line: >>#{line}<<"
43
61
 
44
- values = line.split(',')
62
+ values = line.split( '@sep@' )
45
63
 
46
64
  # pass 1) remove leading and trailing whitespace for values
47
65
 
@@ -71,8 +89,8 @@ class ValuesReader
71
89
  # if it looks like a key (only a-z lower case allowed); assume it's a key
72
90
  # - also allow . in keys e.g. world.quali.america, at.cup, etc.
73
91
  # - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
74
-
75
- if values[0] =~ /^[a-z][a-z0-9.]*[a-z0-9]$/ # NB: minimum two a-z letters required
92
+
93
+ if values[0] =~ /^[a-z][a-z0-9.]*[a-z0-9]|[a-z]$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
76
94
  key_col = values[0]
77
95
  title_col = values[1]
78
96
  more_cols = values[2..-1]
@@ -121,9 +139,16 @@ class ValuesReader
121
139
 
122
140
  ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
123
141
  key = key.gsub( /\(.+\)/, '' )
142
+
143
+ ## remove optional longer title part in {} e.g. Ottakringer {Bio} or {Alkoholfrei}
144
+ ## todo: use for autotags? e.g. {Bio} => bio
145
+ key = key.gsub( /\{.+\}/, '' )
124
146
 
125
147
  ## remove all whitespace and punctuation
126
148
  key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
149
+
150
+ ## remove special chars (e.g. %˚)
151
+ key = key.gsub( /[%˚]/, '' )
127
152
 
128
153
  ## turn accented char into ascii look alike if possible
129
154
  ##
@@ -145,10 +170,12 @@ class ValuesReader
145
170
  ['ą', 'a' ], # e.g. Śląsk
146
171
  ['ç', 'c' ], # e.g. São Gonçalo, Iguaçu, Neftçi
147
172
  ['ć', 'c' ], # e.g. Budućnost
173
+ ['č', 'c' ], # e.g. Tradiční, Výčepní
148
174
  ['é', 'e' ], # e.g. Vélez, Králové
149
175
  ['è', 'e' ], # e.g. Rivières
150
176
  ['ê', 'e' ], # e.g. Grêmio
151
177
  ['ě', 'e' ], # e.g. Budějovice
178
+ ['ĕ', 'e' ], # e.g. Svĕtlý
152
179
  ['ė', 'e' ], # e.g. Vėtra
153
180
  ['ë', 'e' ], # e.g. Skënderbeu
154
181
  ['ğ', 'g' ], # e.g. Qarabağ
@@ -166,10 +193,13 @@ class ValuesReader
166
193
  ['ș', 's' ], # e.g. Chișinău, București
167
194
  ['ş', 's' ], # e.g. Beşiktaş
168
195
  ['š', 's' ], # e.g. Košice
196
+ ['ť', 't' ], # e.g. Měšťan
169
197
  ['ü', 'ue'],
170
198
  ['ú', 'u' ], # e.g. Fútbol
171
199
  ['ū', 'u' ], # e.g. Sūduva
200
+ ['ů', 'u' ], # e.g. Sládkův
172
201
  ['ı', 'u' ], # e.g. Bakı # use u?? (Baku) why-why not?
202
+ ['ý', 'y' ], # e.g. Nefitrovaný
173
203
  ['ź', 'z' ], # e.g. Łódź
174
204
  ['ž', 'z' ], # e.g. Domžale, Petržalka
175
205
 
@@ -178,6 +208,7 @@ class ValuesReader
178
208
  ['Í', 'i' ], # e.g. ÍBV
179
209
  ['Ł', 'l' ], # e.g. Łódź
180
210
  ['Ö', 'oe' ], # e.g. Örebro
211
+ ['Ř', 'r' ], # e.g. Řezák
181
212
  ['Ś', 's' ], # e.g. Śląsk
182
213
  ['Š', 's' ], # e.g. MŠK
183
214
  ['Ş', 's' ], # e.g. Şüvälan
@@ -1,6 +1,6 @@
1
1
 
2
2
  module TextUtils
3
3
 
4
- VERSION = '0.5.2'
4
+ VERSION = '0.5.3'
5
5
 
6
6
  end # module TextUtils
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-16 00:00:00.000000000 Z
12
+ date: 2013-04-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &84567730 !ruby/object:Gem::Requirement
16
+ requirement: &81154230 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *84567730
24
+ version_requirements: *81154230
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &84567510 !ruby/object:Gem::Requirement
27
+ requirement: &81170390 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *84567510
35
+ version_requirements: *81170390
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &84567260 !ruby/object:Gem::Requirement
38
+ requirement: &81170170 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *84567260
46
+ version_requirements: *81170170
47
47
  description: textutils - Text Filters, Helpers, Readers and More
48
48
  email: webslideshow@googlegroups.com
49
49
  executables: []