textutils 0.5.10 → 0.5.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest.txt CHANGED
@@ -7,8 +7,10 @@ lib/textutils/filter/code_filter.rb
7
7
  lib/textutils/filter/comment_filter.rb
8
8
  lib/textutils/filter/erb_django_filter.rb
9
9
  lib/textutils/filter/erb_filter.rb
10
+ lib/textutils/helper/address_helper.rb
10
11
  lib/textutils/helper/title_helper.rb
11
12
  lib/textutils/helper/unicode_helper.rb
13
+ lib/textutils/helper/value_helper.rb
12
14
  lib/textutils/reader/code_reader.rb
13
15
  lib/textutils/reader/fixture_reader.rb
14
16
  lib/textutils/reader/hash_reader.rb
@@ -0,0 +1,29 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module TextUtils
5
+ module AddressHelper
6
+
7
+ def normalize_address( old_address_line )
8
+ # for now only checks german 5-digit zip code
9
+ #
10
+ # e.g. Alte Plauener Straße 24 // 95028 Hof becomes
11
+ # 95028 Hof // Alte Plauener Straße 24
12
+
13
+ new_address_line = old_address_line # default - do nothing - just path through
14
+
15
+ lines = old_address_line.split( '//' )
16
+
17
+ if lines.size == 2 # two lines / check for switching lines
18
+ line1 = lines[0].strip
19
+ line2 = lines[1].strip
20
+ if line2 =~ /^[0-9]{5}\s/
21
+ new_address_line = "#{line2} // #{line1}" # swap - let line w/ 5-digit zip code go first
22
+ end
23
+ end
24
+
25
+ new_address_line
26
+ end
27
+
28
+ end # module AddressHelper
29
+ end # module TextUtils
@@ -4,29 +4,57 @@
4
4
  module TextUtils
5
5
  module TitleHelper
6
6
 
7
- def title_to_key( title )
7
+ def strip_translations( title )
8
+ # remove optional english translation in square brackets ([])
9
+ # e.g. Wien [Vienna] => Wien
10
+
11
+ title.gsub( /\[.+\]/, '' )
12
+ end
13
+
14
+ def strip_subtitles( title )
15
+ # remove optional longer title part in ()
16
+ # e.g. Las Palmas (de Gran Canaria) => Las Palmas
17
+ # Palma (de Mallorca) => Palma
18
+
19
+ title.gsub( /\(.+\)/, '' )
20
+ end
21
+
22
+ def strip_tags( title ) # todo: use an alias or rename for better name ??
23
+ # remove optional longer title part in {}
24
+ # e.g. Ottakringer {Bio} => Ottakringer
25
+ # Ottakringer {Alkoholfrei} => Ottakringer
26
+ #
27
+ # todo: use for autotags? e.g. {Bio} => bio
28
+
29
+ title.gsub( /\{.+\}/, '' )
30
+ end
31
+
32
+ def strip_whitespaces( title )
33
+ # remove all whitespace and punctuation
34
+ title.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
35
+ end
36
+
37
+ def strip_special_chars( title )
38
+ # remove special chars (e.g. %°&)
39
+ title.gsub( /[%&°]/, '' )
40
+ end
8
41
 
9
- ## NB: used in/moved from readers/values_reader.rb
42
+ def title_to_key( title )
10
43
 
44
+ ## NB: used in/moved from readers/values_reader.rb
11
45
 
12
46
  ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
13
47
  key = title.downcase
14
48
 
15
- ### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
16
- key = key.gsub( /\[.+\]/, '' )
49
+ key = strip_translations( key )
17
50
 
18
- ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
19
- key = key.gsub( /\(.+\)/, '' )
20
-
21
- ## remove optional longer title part in {} e.g. Ottakringer {Bio} or {Alkoholfrei}
22
- ## todo: use for autotags? e.g. {Bio} => bio
23
- key = key.gsub( /\{.+\}/, '' )
51
+ key = strip_subtitles( key )
52
+
53
+ key = strip_tags( key )
24
54
 
25
- ## remove all whitespace and punctuation
26
- key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
55
+ key = strip_whitespaces( key )
27
56
 
28
- ## remove special chars (e.g. %°&)
29
- key = key.gsub( /[%&°]/, '' )
57
+ key = strip_special_chars( key )
30
58
 
31
59
  ## turn accented char into ascii look alike if possible
32
60
  ##
@@ -0,0 +1,38 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module TextUtils
5
+ module ValueHelper
6
+
7
+
8
+ def is_region?( value )
9
+ # assume region code e.g. TX or N
10
+ value =~ /^[A-Z]{1,2}$/
11
+ end
12
+
13
+ def is_year?( value )
14
+ # founded/established year e.g. 1776
15
+ value =~ /^[0-9]{4}$/
16
+ end
17
+
18
+ def is_website?( value )
19
+ # check for url/internet address e.g. www.ottakringer.at
20
+ # - must start w/ www. or
21
+ # - must end w/ .com
22
+ #
23
+ # fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
24
+ value =~ /^www\.|\.com$/
25
+ end
26
+
27
+ def is_address?( value )
28
+ # if value includes // assume address e.g. 3970 Weitra // Sparkasseplatz 160
29
+ value =~ /\/{2}/
30
+ end
31
+
32
+ def is_taglist?( value )
33
+ value =~ /^[a-z0-9\|_ ]+$/
34
+ end
35
+
36
+
37
+ end # module ValueHelper
38
+ end # module TextUtils
@@ -171,124 +171,4 @@ class ValuesReader
171
171
  end # method each_line
172
172
 
173
173
 
174
-
175
- def each_line_old_single_line_records_only
176
-
177
- @data.each_line do |line|
178
-
179
- ## allow alternative comment lines
180
- ## e.g. -- comment or
181
- ## % comment
182
- ## why? # might get used by markdown for marking headers, for example
183
-
184
- ## NB: for now alternative comment lines not allowed as end of line style e.g
185
- ## some data, more data -- comment here
186
-
187
- if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
188
- # skip komments and do NOT copy to result (keep comments secret!)
189
- logger.debug 'skipping comment line'
190
- next
191
- end
192
-
193
- if line =~ /^\s*$/
194
- # kommentar oder leerzeile überspringen
195
- logger.debug 'skipping blank line'
196
- next
197
- end
198
-
199
-
200
- # pass 1) remove possible trailing eol comment
201
- ## e.g -> nyc, New York # Sample EOL Comment Here (with or without commas,,,,)
202
- ## becomes -> nyc, New York
203
-
204
- line = line.sub( /\s+#.+$/, '' )
205
-
206
- # pass 2) remove leading and trailing whitespace
207
-
208
- line = line.strip
209
-
210
- ### guard escaped commas (e.g. \,)
211
- line = line.gsub( '\,', '@commma@' )
212
-
213
- ## use generic separator (allow us to configure separator)
214
- line = line.gsub( ',', '@sep@')
215
-
216
- ## restore escaped commas (before split)
217
- line = line.gsub( '@commma@', ',' )
218
-
219
-
220
- logger.debug "line: >>#{line}<<"
221
-
222
- values = line.split( '@sep@' )
223
-
224
- # pass 1) remove leading and trailing whitespace for values
225
-
226
- values = values.map { |value| value.strip }
227
-
228
- ##### todo remove support of comment column? (NB: must NOT include commas)
229
- # pass 2) remove comment columns
230
-
231
- values = values.select do |value|
232
- if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
233
- logger.debug " removing column with value >>#{value}<<"
234
- false
235
- else
236
- true
237
- end
238
- end
239
-
240
- logger.debug " values: >>#{values.join('<< >>')}<<"
241
-
242
-
243
- ### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
244
- ## either use keys or do NOT use keys; do NOT mix in a single fixture file
245
-
246
-
247
- ### support autogenerate key from first title value
248
-
249
- # if it looks like a key (only a-z lower case allowed); assume it's a key
250
- # - also allow . in keys e.g. world.quali.america, at.cup, etc.
251
- # - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
252
-
253
- # fix/todo: add support for leading underscore _
254
- # or allow keys starting w/ digits?
255
- if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
256
- key_col = values[0]
257
- title_col = values[1]
258
- more_cols = values[2..-1]
259
- else
260
- key_col = '<auto>'
261
- title_col = values[0]
262
- more_cols = values[1..-1]
263
- end
264
-
265
- attribs = {}
266
-
267
- ## title (split of optional synonyms)
268
- # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
269
- titles = title_col.split('|')
270
-
271
- attribs[ :title ] = titles[0]
272
-
273
- ## add optional synonyms if present
274
- attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
275
-
276
- if key_col == '<auto>'
277
- ## autogenerate key from first title
278
- key_col = TextUtils.title_to_key( titles[0] )
279
- logger.debug " autogen key >#{key_col}< from title >#{titles[0]}<, textutils version #{TextUtils::VERSION}"
280
- end
281
-
282
- attribs[ :key ] = key_col
283
-
284
- attribs = attribs.merge( @more_values ) # e.g. merge country_id and other defaults if present
285
-
286
- yield( attribs, more_cols )
287
-
288
- end # each lines
289
-
290
- end # method each_line
291
-
292
-
293
-
294
174
  end # class ValuesReader
@@ -6,6 +6,7 @@ module TextUtils
6
6
  # make helpers available as class methods e.g. TextUtils.convert_unicode_dashes_to_plain_ascii
7
7
  extend UnicodeHelper
8
8
  extend TitleHelper
9
+ extend AddressHelper
9
10
  end
10
11
 
11
12
 
@@ -1,6 +1,6 @@
1
1
 
2
2
  module TextUtils
3
3
 
4
- VERSION = '0.5.10'
4
+ VERSION = '0.5.11'
5
5
 
6
6
  end # module TextUtils
data/lib/textutils.rb CHANGED
@@ -23,6 +23,8 @@ require 'textutils/filter/erb_filter'
23
23
 
24
24
  require 'textutils/helper/unicode_helper'
25
25
  require 'textutils/helper/title_helper'
26
+ require 'textutils/helper/address_helper'
27
+ require 'textutils/helper/value_helper'
26
28
 
27
29
  require 'textutils/utils'
28
30
  require 'textutils/reader/code_reader'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.10
4
+ version: 0.5.11
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-08 00:00:00.000000000 Z
12
+ date: 2013-05-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &72786300 !ruby/object:Gem::Requirement
16
+ requirement: &75139420 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *72786300
24
+ version_requirements: *75139420
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &72786080 !ruby/object:Gem::Requirement
27
+ requirement: &75139200 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *72786080
35
+ version_requirements: *75139200
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &72785860 !ruby/object:Gem::Requirement
38
+ requirement: &75138980 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *72785860
46
+ version_requirements: *75138980
47
47
  description: textutils - Text Filters, Helpers, Readers and More
48
48
  email: webslideshow@googlegroups.com
49
49
  executables: []
@@ -60,8 +60,10 @@ files:
60
60
  - lib/textutils/filter/comment_filter.rb
61
61
  - lib/textutils/filter/erb_django_filter.rb
62
62
  - lib/textutils/filter/erb_filter.rb
63
+ - lib/textutils/helper/address_helper.rb
63
64
  - lib/textutils/helper/title_helper.rb
64
65
  - lib/textutils/helper/unicode_helper.rb
66
+ - lib/textutils/helper/value_helper.rb
65
67
  - lib/textutils/reader/code_reader.rb
66
68
  - lib/textutils/reader/fixture_reader.rb
67
69
  - lib/textutils/reader/hash_reader.rb