textutils 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,13 +16,21 @@ class ValuesReader
16
16
 
17
17
  @data.each_line do |line|
18
18
 
19
- if line =~ /^\s*#/
19
+ ## allow alternative comment lines
20
+ ## e.g. -- comment or
21
+ ## % comment
22
+ ## why? # might get used by markdown for marking headers, for example
23
+
24
+ ## NB: for now alternative comment lines not allowed as end of line style e.g
25
+ ## some data, more data -- comment here
26
+
27
+ if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
20
28
  # skip komments and do NOT copy to result (keep comments secret!)
21
29
  logger.debug 'skipping comment line'
22
30
  next
23
31
  end
24
-
25
- if line =~ /^\s*$/
32
+
33
+ if line =~ /^\s*$/
26
34
  # kommentar oder leerzeile überspringen
27
35
  logger.debug 'skipping blank line'
28
36
  next
@@ -39,9 +47,19 @@ class ValuesReader
39
47
 
40
48
  line = line.strip
41
49
 
50
+ ### guard escaped commas (e.g. \,)
51
+ line = line.gsub( '\,', '@commma@' )
52
+
53
+ ## use generic separator (allow us to configure separator)
54
+ line = line.gsub( ',', '@sep@')
55
+
56
+ ## restore escaped commas (before split)
57
+ line = line.gsub( '@commma@', ',' )
58
+
59
+
42
60
  logger.debug "line: >>#{line}<<"
43
61
 
44
- values = line.split(',')
62
+ values = line.split( '@sep@' )
45
63
 
46
64
  # pass 1) remove leading and trailing whitespace for values
47
65
 
@@ -71,8 +89,8 @@ class ValuesReader
71
89
  # if it looks like a key (only a-z lower case allowed); assume it's a key
72
90
  # - also allow . in keys e.g. world.quali.america, at.cup, etc.
73
91
  # - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
74
-
75
- if values[0] =~ /^[a-z][a-z0-9.]*[a-z0-9]$/ # NB: minimum two a-z letters required
92
+
93
+ if values[0] =~ /^[a-z][a-z0-9.]*[a-z0-9]|[a-z]$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
76
94
  key_col = values[0]
77
95
  title_col = values[1]
78
96
  more_cols = values[2..-1]
@@ -121,9 +139,16 @@ class ValuesReader
121
139
 
122
140
  ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
123
141
  key = key.gsub( /\(.+\)/, '' )
142
+
143
+ ## remove optional longer title part in {} e.g. Ottakringer {Bio} or {Alkoholfrei}
144
+ ## todo: use for autotags? e.g. {Bio} => bio
145
+ key = key.gsub( /\{.+\}/, '' )
124
146
 
125
147
  ## remove all whitespace and punctuation
126
148
  key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
149
+
150
+ ## remove special chars (e.g. %˚)
151
+ key = key.gsub( /[%˚]/, '' )
127
152
 
128
153
  ## turn accented char into ascii look alike if possible
129
154
  ##
@@ -145,10 +170,12 @@ class ValuesReader
145
170
  ['ą', 'a' ], # e.g. Śląsk
146
171
  ['ç', 'c' ], # e.g. São Gonçalo, Iguaçu, Neftçi
147
172
  ['ć', 'c' ], # e.g. Budućnost
173
+ ['č', 'c' ], # e.g. Tradiční, Výčepní
148
174
  ['é', 'e' ], # e.g. Vélez, Králové
149
175
  ['è', 'e' ], # e.g. Rivières
150
176
  ['ê', 'e' ], # e.g. Grêmio
151
177
  ['ě', 'e' ], # e.g. Budějovice
178
+ ['ĕ', 'e' ], # e.g. Svĕtlý
152
179
  ['ė', 'e' ], # e.g. Vėtra
153
180
  ['ë', 'e' ], # e.g. Skënderbeu
154
181
  ['ğ', 'g' ], # e.g. Qarabağ
@@ -166,10 +193,13 @@ class ValuesReader
166
193
  ['ș', 's' ], # e.g. Chișinău, București
167
194
  ['ş', 's' ], # e.g. Beşiktaş
168
195
  ['š', 's' ], # e.g. Košice
196
+ ['ť', 't' ], # e.g. Měšťan
169
197
  ['ü', 'ue'],
170
198
  ['ú', 'u' ], # e.g. Fútbol
171
199
  ['ū', 'u' ], # e.g. Sūduva
200
+ ['ů', 'u' ], # e.g. Sládkův
172
201
  ['ı', 'u' ], # e.g. Bakı # use u?? (Baku) why-why not?
202
+ ['ý', 'y' ], # e.g. Nefitrovaný
173
203
  ['ź', 'z' ], # e.g. Łódź
174
204
  ['ž', 'z' ], # e.g. Domžale, Petržalka
175
205
 
@@ -178,6 +208,7 @@ class ValuesReader
178
208
  ['Í', 'i' ], # e.g. ÍBV
179
209
  ['Ł', 'l' ], # e.g. Łódź
180
210
  ['Ö', 'oe' ], # e.g. Örebro
211
+ ['Ř', 'r' ], # e.g. Řezák
181
212
  ['Ś', 's' ], # e.g. Śląsk
182
213
  ['Š', 's' ], # e.g. MŠK
183
214
  ['Ş', 's' ], # e.g. Şüvälan
@@ -1,6 +1,6 @@
1
1
 
2
2
  module TextUtils
3
3
 
4
- VERSION = '0.5.2'
4
+ VERSION = '0.5.3'
5
5
 
6
6
  end # module TextUtils
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-16 00:00:00.000000000 Z
12
+ date: 2013-04-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &84567730 !ruby/object:Gem::Requirement
16
+ requirement: &81154230 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *84567730
24
+ version_requirements: *81154230
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &84567510 !ruby/object:Gem::Requirement
27
+ requirement: &81170390 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *84567510
35
+ version_requirements: *81170390
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &84567260 !ruby/object:Gem::Requirement
38
+ requirement: &81170170 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *84567260
46
+ version_requirements: *81170170
47
47
  description: textutils - Text Filters, Helpers, Readers and More
48
48
  email: webslideshow@googlegroups.com
49
49
  executables: []