textutils 0.5.10 → 0.5.11
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest.txt +2 -0
- data/lib/textutils/helper/address_helper.rb +29 -0
- data/lib/textutils/helper/title_helper.rb +42 -14
- data/lib/textutils/helper/value_helper.rb +38 -0
- data/lib/textutils/reader/values_reader.rb +0 -120
- data/lib/textutils/utils.rb +1 -0
- data/lib/textutils/version.rb +1 -1
- data/lib/textutils.rb +2 -0
- metadata +10 -8
data/Manifest.txt
CHANGED
@@ -7,8 +7,10 @@ lib/textutils/filter/code_filter.rb
|
|
7
7
|
lib/textutils/filter/comment_filter.rb
|
8
8
|
lib/textutils/filter/erb_django_filter.rb
|
9
9
|
lib/textutils/filter/erb_filter.rb
|
10
|
+
lib/textutils/helper/address_helper.rb
|
10
11
|
lib/textutils/helper/title_helper.rb
|
11
12
|
lib/textutils/helper/unicode_helper.rb
|
13
|
+
lib/textutils/helper/value_helper.rb
|
12
14
|
lib/textutils/reader/code_reader.rb
|
13
15
|
lib/textutils/reader/fixture_reader.rb
|
14
16
|
lib/textutils/reader/hash_reader.rb
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module TextUtils
|
5
|
+
module AddressHelper
|
6
|
+
|
7
|
+
def normalize_address( old_address_line )
|
8
|
+
# for now only checks german 5-digit zip code
|
9
|
+
#
|
10
|
+
# e.g. Alte Plauener Straße 24 // 95028 Hof becomes
|
11
|
+
# 95028 Hof // Alte Plauener Straße 24
|
12
|
+
|
13
|
+
new_address_line = old_address_line # default - do nothing - just path through
|
14
|
+
|
15
|
+
lines = old_address_line.split( '//' )
|
16
|
+
|
17
|
+
if lines.size == 2 # two lines / check for switching lines
|
18
|
+
line1 = lines[0].strip
|
19
|
+
line2 = lines[1].strip
|
20
|
+
if line2 =~ /^[0-9]{5}\s/
|
21
|
+
new_address_line = "#{line2} // #{line1}" # swap - let line w/ 5-digit zip code go first
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
new_address_line
|
26
|
+
end
|
27
|
+
|
28
|
+
end # module AddressHelper
|
29
|
+
end # module TextUtils
|
@@ -4,29 +4,57 @@
|
|
4
4
|
module TextUtils
|
5
5
|
module TitleHelper
|
6
6
|
|
7
|
-
def
|
7
|
+
def strip_translations( title )
|
8
|
+
# remove optional english translation in square brackets ([])
|
9
|
+
# e.g. Wien [Vienna] => Wien
|
10
|
+
|
11
|
+
title.gsub( /\[.+\]/, '' )
|
12
|
+
end
|
13
|
+
|
14
|
+
def strip_subtitles( title )
|
15
|
+
# remove optional longer title part in ()
|
16
|
+
# e.g. Las Palmas (de Gran Canaria) => Las Palmas
|
17
|
+
# Palma (de Mallorca) => Palma
|
18
|
+
|
19
|
+
title.gsub( /\(.+\)/, '' )
|
20
|
+
end
|
21
|
+
|
22
|
+
def strip_tags( title ) # todo: use an alias or rename for better name ??
|
23
|
+
# remove optional longer title part in {}
|
24
|
+
# e.g. Ottakringer {Bio} => Ottakringer
|
25
|
+
# Ottakringer {Alkoholfrei} => Ottakringer
|
26
|
+
#
|
27
|
+
# todo: use for autotags? e.g. {Bio} => bio
|
28
|
+
|
29
|
+
title.gsub( /\{.+\}/, '' )
|
30
|
+
end
|
31
|
+
|
32
|
+
def strip_whitespaces( title )
|
33
|
+
# remove all whitespace and punctuation
|
34
|
+
title.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
|
35
|
+
end
|
36
|
+
|
37
|
+
def strip_special_chars( title )
|
38
|
+
# remove special chars (e.g. %°&)
|
39
|
+
title.gsub( /[%&°]/, '' )
|
40
|
+
end
|
8
41
|
|
9
|
-
|
42
|
+
def title_to_key( title )
|
10
43
|
|
44
|
+
## NB: used in/moved from readers/values_reader.rb
|
11
45
|
|
12
46
|
## NB: downcase does NOT work for accented chars (thus, include in alternatives)
|
13
47
|
key = title.downcase
|
14
48
|
|
15
|
-
|
16
|
-
key = key.gsub( /\[.+\]/, '' )
|
49
|
+
key = strip_translations( key )
|
17
50
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
## remove optional longer title part in {} e.g. Ottakringer {Bio} or {Alkoholfrei}
|
22
|
-
## todo: use for autotags? e.g. {Bio} => bio
|
23
|
-
key = key.gsub( /\{.+\}/, '' )
|
51
|
+
key = strip_subtitles( key )
|
52
|
+
|
53
|
+
key = strip_tags( key )
|
24
54
|
|
25
|
-
|
26
|
-
key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
|
55
|
+
key = strip_whitespaces( key )
|
27
56
|
|
28
|
-
|
29
|
-
key = key.gsub( /[%&°]/, '' )
|
57
|
+
key = strip_special_chars( key )
|
30
58
|
|
31
59
|
## turn accented char into ascii look alike if possible
|
32
60
|
##
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module TextUtils
|
5
|
+
module ValueHelper
|
6
|
+
|
7
|
+
|
8
|
+
def is_region?( value )
|
9
|
+
# assume region code e.g. TX or N
|
10
|
+
value =~ /^[A-Z]{1,2}$/
|
11
|
+
end
|
12
|
+
|
13
|
+
def is_year?( value )
|
14
|
+
# founded/established year e.g. 1776
|
15
|
+
value =~ /^[0-9]{4}$/
|
16
|
+
end
|
17
|
+
|
18
|
+
def is_website?( value )
|
19
|
+
# check for url/internet address e.g. www.ottakringer.at
|
20
|
+
# - must start w/ www. or
|
21
|
+
# - must end w/ .com
|
22
|
+
#
|
23
|
+
# fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
|
24
|
+
value =~ /^www\.|\.com$/
|
25
|
+
end
|
26
|
+
|
27
|
+
def is_address?( value )
|
28
|
+
# if value includes // assume address e.g. 3970 Weitra // Sparkasseplatz 160
|
29
|
+
value =~ /\/{2}/
|
30
|
+
end
|
31
|
+
|
32
|
+
def is_taglist?( value )
|
33
|
+
value =~ /^[a-z0-9\|_ ]+$/
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
end # module ValueHelper
|
38
|
+
end # module TextUtils
|
@@ -171,124 +171,4 @@ class ValuesReader
|
|
171
171
|
end # method each_line
|
172
172
|
|
173
173
|
|
174
|
-
|
175
|
-
def each_line_old_single_line_records_only
|
176
|
-
|
177
|
-
@data.each_line do |line|
|
178
|
-
|
179
|
-
## allow alternative comment lines
|
180
|
-
## e.g. -- comment or
|
181
|
-
## % comment
|
182
|
-
## why? # might get used by markdown for marking headers, for example
|
183
|
-
|
184
|
-
## NB: for now alternative comment lines not allowed as end of line style e.g
|
185
|
-
## some data, more data -- comment here
|
186
|
-
|
187
|
-
if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
|
188
|
-
# skip komments and do NOT copy to result (keep comments secret!)
|
189
|
-
logger.debug 'skipping comment line'
|
190
|
-
next
|
191
|
-
end
|
192
|
-
|
193
|
-
if line =~ /^\s*$/
|
194
|
-
# kommentar oder leerzeile überspringen
|
195
|
-
logger.debug 'skipping blank line'
|
196
|
-
next
|
197
|
-
end
|
198
|
-
|
199
|
-
|
200
|
-
# pass 1) remove possible trailing eol comment
|
201
|
-
## e.g -> nyc, New York # Sample EOL Comment Here (with or without commas,,,,)
|
202
|
-
## becomes -> nyc, New York
|
203
|
-
|
204
|
-
line = line.sub( /\s+#.+$/, '' )
|
205
|
-
|
206
|
-
# pass 2) remove leading and trailing whitespace
|
207
|
-
|
208
|
-
line = line.strip
|
209
|
-
|
210
|
-
### guard escaped commas (e.g. \,)
|
211
|
-
line = line.gsub( '\,', '@commma@' )
|
212
|
-
|
213
|
-
## use generic separator (allow us to configure separator)
|
214
|
-
line = line.gsub( ',', '@sep@')
|
215
|
-
|
216
|
-
## restore escaped commas (before split)
|
217
|
-
line = line.gsub( '@commma@', ',' )
|
218
|
-
|
219
|
-
|
220
|
-
logger.debug "line: >>#{line}<<"
|
221
|
-
|
222
|
-
values = line.split( '@sep@' )
|
223
|
-
|
224
|
-
# pass 1) remove leading and trailing whitespace for values
|
225
|
-
|
226
|
-
values = values.map { |value| value.strip }
|
227
|
-
|
228
|
-
##### todo remove support of comment column? (NB: must NOT include commas)
|
229
|
-
# pass 2) remove comment columns
|
230
|
-
|
231
|
-
values = values.select do |value|
|
232
|
-
if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
|
233
|
-
logger.debug " removing column with value >>#{value}<<"
|
234
|
-
false
|
235
|
-
else
|
236
|
-
true
|
237
|
-
end
|
238
|
-
end
|
239
|
-
|
240
|
-
logger.debug " values: >>#{values.join('<< >>')}<<"
|
241
|
-
|
242
|
-
|
243
|
-
### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
|
244
|
-
## either use keys or do NOT use keys; do NOT mix in a single fixture file
|
245
|
-
|
246
|
-
|
247
|
-
### support autogenerate key from first title value
|
248
|
-
|
249
|
-
# if it looks like a key (only a-z lower case allowed); assume it's a key
|
250
|
-
# - also allow . in keys e.g. world.quali.america, at.cup, etc.
|
251
|
-
# - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
|
252
|
-
|
253
|
-
# fix/todo: add support for leading underscore _
|
254
|
-
# or allow keys starting w/ digits?
|
255
|
-
if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
|
256
|
-
key_col = values[0]
|
257
|
-
title_col = values[1]
|
258
|
-
more_cols = values[2..-1]
|
259
|
-
else
|
260
|
-
key_col = '<auto>'
|
261
|
-
title_col = values[0]
|
262
|
-
more_cols = values[1..-1]
|
263
|
-
end
|
264
|
-
|
265
|
-
attribs = {}
|
266
|
-
|
267
|
-
## title (split of optional synonyms)
|
268
|
-
# e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
|
269
|
-
titles = title_col.split('|')
|
270
|
-
|
271
|
-
attribs[ :title ] = titles[0]
|
272
|
-
|
273
|
-
## add optional synonyms if present
|
274
|
-
attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
|
275
|
-
|
276
|
-
if key_col == '<auto>'
|
277
|
-
## autogenerate key from first title
|
278
|
-
key_col = TextUtils.title_to_key( titles[0] )
|
279
|
-
logger.debug " autogen key >#{key_col}< from title >#{titles[0]}<, textutils version #{TextUtils::VERSION}"
|
280
|
-
end
|
281
|
-
|
282
|
-
attribs[ :key ] = key_col
|
283
|
-
|
284
|
-
attribs = attribs.merge( @more_values ) # e.g. merge country_id and other defaults if present
|
285
|
-
|
286
|
-
yield( attribs, more_cols )
|
287
|
-
|
288
|
-
end # each lines
|
289
|
-
|
290
|
-
end # method each_line
|
291
|
-
|
292
|
-
|
293
|
-
|
294
174
|
end # class ValuesReader
|
data/lib/textutils/utils.rb
CHANGED
data/lib/textutils/version.rb
CHANGED
data/lib/textutils.rb
CHANGED
@@ -23,6 +23,8 @@ require 'textutils/filter/erb_filter'
|
|
23
23
|
|
24
24
|
require 'textutils/helper/unicode_helper'
|
25
25
|
require 'textutils/helper/title_helper'
|
26
|
+
require 'textutils/helper/address_helper'
|
27
|
+
require 'textutils/helper/value_helper'
|
26
28
|
|
27
29
|
require 'textutils/utils'
|
28
30
|
require 'textutils/reader/code_reader'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textutils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.11
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-05-
|
12
|
+
date: 2013-05-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: logutils
|
16
|
-
requirement: &
|
16
|
+
requirement: &75139420 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0.5'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *75139420
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rdoc
|
27
|
-
requirement: &
|
27
|
+
requirement: &75139200 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: '3.10'
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *75139200
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: hoe
|
38
|
-
requirement: &
|
38
|
+
requirement: &75138980 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,7 +43,7 @@ dependencies:
|
|
43
43
|
version: '3.3'
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *75138980
|
47
47
|
description: textutils - Text Filters, Helpers, Readers and More
|
48
48
|
email: webslideshow@googlegroups.com
|
49
49
|
executables: []
|
@@ -60,8 +60,10 @@ files:
|
|
60
60
|
- lib/textutils/filter/comment_filter.rb
|
61
61
|
- lib/textutils/filter/erb_django_filter.rb
|
62
62
|
- lib/textutils/filter/erb_filter.rb
|
63
|
+
- lib/textutils/helper/address_helper.rb
|
63
64
|
- lib/textutils/helper/title_helper.rb
|
64
65
|
- lib/textutils/helper/unicode_helper.rb
|
66
|
+
- lib/textutils/helper/value_helper.rb
|
65
67
|
- lib/textutils/reader/code_reader.rb
|
66
68
|
- lib/textutils/reader/fixture_reader.rb
|
67
69
|
- lib/textutils/reader/hash_reader.rb
|