textutils 0.5.10 → 0.5.11

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest.txt CHANGED
@@ -7,8 +7,10 @@ lib/textutils/filter/code_filter.rb
7
7
  lib/textutils/filter/comment_filter.rb
8
8
  lib/textutils/filter/erb_django_filter.rb
9
9
  lib/textutils/filter/erb_filter.rb
10
+ lib/textutils/helper/address_helper.rb
10
11
  lib/textutils/helper/title_helper.rb
11
12
  lib/textutils/helper/unicode_helper.rb
13
+ lib/textutils/helper/value_helper.rb
12
14
  lib/textutils/reader/code_reader.rb
13
15
  lib/textutils/reader/fixture_reader.rb
14
16
  lib/textutils/reader/hash_reader.rb
@@ -0,0 +1,29 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module TextUtils
5
+ module AddressHelper
6
+
7
+ def normalize_address( old_address_line )
8
+ # for now only checks german 5-digit zip code
9
+ #
10
+ # e.g. Alte Plauener Straße 24 // 95028 Hof becomes
11
+ # 95028 Hof // Alte Plauener Straße 24
12
+
13
+ new_address_line = old_address_line # default - do nothing - just path through
14
+
15
+ lines = old_address_line.split( '//' )
16
+
17
+ if lines.size == 2 # two lines / check for switching lines
18
+ line1 = lines[0].strip
19
+ line2 = lines[1].strip
20
+ if line2 =~ /^[0-9]{5}\s/
21
+ new_address_line = "#{line2} // #{line1}" # swap - let line w/ 5-digit zip code go first
22
+ end
23
+ end
24
+
25
+ new_address_line
26
+ end
27
+
28
+ end # module AddressHelper
29
+ end # module TextUtils
@@ -4,29 +4,57 @@
4
4
  module TextUtils
5
5
  module TitleHelper
6
6
 
7
- def title_to_key( title )
7
+ def strip_translations( title )
8
+ # remove optional english translation in square brackets ([])
9
+ # e.g. Wien [Vienna] => Wien
10
+
11
+ title.gsub( /\[.+\]/, '' )
12
+ end
13
+
14
+ def strip_subtitles( title )
15
+ # remove optional longer title part in ()
16
+ # e.g. Las Palmas (de Gran Canaria) => Las Palmas
17
+ # Palma (de Mallorca) => Palma
18
+
19
+ title.gsub( /\(.+\)/, '' )
20
+ end
21
+
22
+ def strip_tags( title ) # todo: use an alias or rename for better name ??
23
+ # remove optional longer title part in {}
24
+ # e.g. Ottakringer {Bio} => Ottakringer
25
+ # Ottakringer {Alkoholfrei} => Ottakringer
26
+ #
27
+ # todo: use for autotags? e.g. {Bio} => bio
28
+
29
+ title.gsub( /\{.+\}/, '' )
30
+ end
31
+
32
+ def strip_whitespaces( title )
33
+ # remove all whitespace and punctuation
34
+ title.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
35
+ end
36
+
37
+ def strip_special_chars( title )
38
+ # remove special chars (e.g. %°&)
39
+ title.gsub( /[%&°]/, '' )
40
+ end
8
41
 
9
- ## NB: used in/moved from readers/values_reader.rb
42
+ def title_to_key( title )
10
43
 
44
+ ## NB: used in/moved from readers/values_reader.rb
11
45
 
12
46
  ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
13
47
  key = title.downcase
14
48
 
15
- ### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
16
- key = key.gsub( /\[.+\]/, '' )
49
+ key = strip_translations( key )
17
50
 
18
- ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
19
- key = key.gsub( /\(.+\)/, '' )
20
-
21
- ## remove optional longer title part in {} e.g. Ottakringer {Bio} or {Alkoholfrei}
22
- ## todo: use for autotags? e.g. {Bio} => bio
23
- key = key.gsub( /\{.+\}/, '' )
51
+ key = strip_subtitles( key )
52
+
53
+ key = strip_tags( key )
24
54
 
25
- ## remove all whitespace and punctuation
26
- key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
55
+ key = strip_whitespaces( key )
27
56
 
28
- ## remove special chars (e.g. %°&)
29
- key = key.gsub( /[%&°]/, '' )
57
+ key = strip_special_chars( key )
30
58
 
31
59
  ## turn accented char into ascii look alike if possible
32
60
  ##
@@ -0,0 +1,38 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module TextUtils
5
+ module ValueHelper
6
+
7
+
8
+ def is_region?( value )
9
+ # assume region code e.g. TX or N
10
+ value =~ /^[A-Z]{1,2}$/
11
+ end
12
+
13
+ def is_year?( value )
14
+ # founded/established year e.g. 1776
15
+ value =~ /^[0-9]{4}$/
16
+ end
17
+
18
+ def is_website?( value )
19
+ # check for url/internet address e.g. www.ottakringer.at
20
+ # - must start w/ www. or
21
+ # - must end w/ .com
22
+ #
23
+ # fix: support more url format (e.g. w/o www. - look for .com .country code etc.)
24
+ value =~ /^www\.|\.com$/
25
+ end
26
+
27
+ def is_address?( value )
28
+ # if value includes // assume address e.g. 3970 Weitra // Sparkasseplatz 160
29
+ value =~ /\/{2}/
30
+ end
31
+
32
+ def is_taglist?( value )
33
+ value =~ /^[a-z0-9\|_ ]+$/
34
+ end
35
+
36
+
37
+ end # module ValueHelper
38
+ end # module TextUtils
@@ -171,124 +171,4 @@ class ValuesReader
171
171
  end # method each_line
172
172
 
173
173
 
174
-
175
- def each_line_old_single_line_records_only
176
-
177
- @data.each_line do |line|
178
-
179
- ## allow alternative comment lines
180
- ## e.g. -- comment or
181
- ## % comment
182
- ## why? # might get used by markdown for marking headers, for example
183
-
184
- ## NB: for now alternative comment lines not allowed as end of line style e.g
185
- ## some data, more data -- comment here
186
-
187
- if line =~ /^\s*#/ || line =~ /^\s*--/ || line =~ /^\s*%/
188
- # skip komments and do NOT copy to result (keep comments secret!)
189
- logger.debug 'skipping comment line'
190
- next
191
- end
192
-
193
- if line =~ /^\s*$/
194
- # kommentar oder leerzeile überspringen
195
- logger.debug 'skipping blank line'
196
- next
197
- end
198
-
199
-
200
- # pass 1) remove possible trailing eol comment
201
- ## e.g -> nyc, New York # Sample EOL Comment Here (with or without commas,,,,)
202
- ## becomes -> nyc, New York
203
-
204
- line = line.sub( /\s+#.+$/, '' )
205
-
206
- # pass 2) remove leading and trailing whitespace
207
-
208
- line = line.strip
209
-
210
- ### guard escaped commas (e.g. \,)
211
- line = line.gsub( '\,', '@commma@' )
212
-
213
- ## use generic separator (allow us to configure separator)
214
- line = line.gsub( ',', '@sep@')
215
-
216
- ## restore escaped commas (before split)
217
- line = line.gsub( '@commma@', ',' )
218
-
219
-
220
- logger.debug "line: >>#{line}<<"
221
-
222
- values = line.split( '@sep@' )
223
-
224
- # pass 1) remove leading and trailing whitespace for values
225
-
226
- values = values.map { |value| value.strip }
227
-
228
- ##### todo remove support of comment column? (NB: must NOT include commas)
229
- # pass 2) remove comment columns
230
-
231
- values = values.select do |value|
232
- if value =~ /^#/ ## start with # treat it as a comment column; e.g. remove it
233
- logger.debug " removing column with value >>#{value}<<"
234
- false
235
- else
236
- true
237
- end
238
- end
239
-
240
- logger.debug " values: >>#{values.join('<< >>')}<<"
241
-
242
-
243
- ### todo/fix: allow check - do NOT allow mixed use of with key and w/o key
244
- ## either use keys or do NOT use keys; do NOT mix in a single fixture file
245
-
246
-
247
- ### support autogenerate key from first title value
248
-
249
- # if it looks like a key (only a-z lower case allowed); assume it's a key
250
- # - also allow . in keys e.g. world.quali.america, at.cup, etc.
251
- # - also allow 0-9 in keys e.g. at.2, at.3.1, etc.
252
-
253
- # fix/todo: add support for leading underscore _
254
- # or allow keys starting w/ digits?
255
- if values[0] =~ /^([a-z][a-z0-9.]*[a-z0-9]|[a-z])$/ # NB: key must start w/ a-z letter (NB: minimum one letter possible)
256
- key_col = values[0]
257
- title_col = values[1]
258
- more_cols = values[2..-1]
259
- else
260
- key_col = '<auto>'
261
- title_col = values[0]
262
- more_cols = values[1..-1]
263
- end
264
-
265
- attribs = {}
266
-
267
- ## title (split of optional synonyms)
268
- # e.g. FC Bayern Muenchen|Bayern Muenchen|Bayern
269
- titles = title_col.split('|')
270
-
271
- attribs[ :title ] = titles[0]
272
-
273
- ## add optional synonyms if present
274
- attribs[ :synonyms ] = titles[1..-1].join('|') if titles.size > 1
275
-
276
- if key_col == '<auto>'
277
- ## autogenerate key from first title
278
- key_col = TextUtils.title_to_key( titles[0] )
279
- logger.debug " autogen key >#{key_col}< from title >#{titles[0]}<, textutils version #{TextUtils::VERSION}"
280
- end
281
-
282
- attribs[ :key ] = key_col
283
-
284
- attribs = attribs.merge( @more_values ) # e.g. merge country_id and other defaults if present
285
-
286
- yield( attribs, more_cols )
287
-
288
- end # each lines
289
-
290
- end # method each_line
291
-
292
-
293
-
294
174
  end # class ValuesReader
@@ -6,6 +6,7 @@ module TextUtils
6
6
  # make helpers available as class methods e.g. TextUtils.convert_unicode_dashes_to_plain_ascii
7
7
  extend UnicodeHelper
8
8
  extend TitleHelper
9
+ extend AddressHelper
9
10
  end
10
11
 
11
12
 
@@ -1,6 +1,6 @@
1
1
 
2
2
  module TextUtils
3
3
 
4
- VERSION = '0.5.10'
4
+ VERSION = '0.5.11'
5
5
 
6
6
  end # module TextUtils
data/lib/textutils.rb CHANGED
@@ -23,6 +23,8 @@ require 'textutils/filter/erb_filter'
23
23
 
24
24
  require 'textutils/helper/unicode_helper'
25
25
  require 'textutils/helper/title_helper'
26
+ require 'textutils/helper/address_helper'
27
+ require 'textutils/helper/value_helper'
26
28
 
27
29
  require 'textutils/utils'
28
30
  require 'textutils/reader/code_reader'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.10
4
+ version: 0.5.11
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-08 00:00:00.000000000 Z
12
+ date: 2013-05-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &72786300 !ruby/object:Gem::Requirement
16
+ requirement: &75139420 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *72786300
24
+ version_requirements: *75139420
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &72786080 !ruby/object:Gem::Requirement
27
+ requirement: &75139200 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *72786080
35
+ version_requirements: *75139200
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &72785860 !ruby/object:Gem::Requirement
38
+ requirement: &75138980 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *72785860
46
+ version_requirements: *75138980
47
47
  description: textutils - Text Filters, Helpers, Readers and More
48
48
  email: webslideshow@googlegroups.com
49
49
  executables: []
@@ -60,8 +60,10 @@ files:
60
60
  - lib/textutils/filter/comment_filter.rb
61
61
  - lib/textutils/filter/erb_django_filter.rb
62
62
  - lib/textutils/filter/erb_filter.rb
63
+ - lib/textutils/helper/address_helper.rb
63
64
  - lib/textutils/helper/title_helper.rb
64
65
  - lib/textutils/helper/unicode_helper.rb
66
+ - lib/textutils/helper/value_helper.rb
65
67
  - lib/textutils/reader/code_reader.rb
66
68
  - lib/textutils/reader/fixture_reader.rb
67
69
  - lib/textutils/reader/hash_reader.rb