textutils 0.5.9 → 0.5.10

Sign up to get free protection for your applications and to get access to all the features.
data/.gemtest ADDED
File without changes
data/Manifest.txt CHANGED
@@ -7,9 +7,14 @@ lib/textutils/filter/code_filter.rb
7
7
  lib/textutils/filter/comment_filter.rb
8
8
  lib/textutils/filter/erb_django_filter.rb
9
9
  lib/textutils/filter/erb_filter.rb
10
+ lib/textutils/helper/title_helper.rb
11
+ lib/textutils/helper/unicode_helper.rb
10
12
  lib/textutils/reader/code_reader.rb
13
+ lib/textutils/reader/fixture_reader.rb
11
14
  lib/textutils/reader/hash_reader.rb
12
15
  lib/textutils/reader/line_reader.rb
13
16
  lib/textutils/reader/values_reader.rb
14
17
  lib/textutils/utils.rb
15
18
  lib/textutils/version.rb
19
+ test/helper.rb
20
+ test/test_helper.rb
@@ -2,7 +2,7 @@
2
2
  module TextUtils
3
3
  module Filter
4
4
 
5
- def comments_percent_style( content, options={} )
5
+ def comments_percent_style( content, options={} )
6
6
 
7
7
  # remove comments
8
8
  # % comments
@@ -0,0 +1,155 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module TextUtils
5
+ module TitleHelper
6
+
7
+ def title_to_key( title )
8
+
9
+ ## NB: used in/moved from readers/values_reader.rb
10
+
11
+
12
+ ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
13
+ key = title.downcase
14
+
15
+ ### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
16
+ key = key.gsub( /\[.+\]/, '' )
17
+
18
+ ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
19
+ key = key.gsub( /\(.+\)/, '' )
20
+
21
+ ## remove optional longer title part in {} e.g. Ottakringer {Bio} or {Alkoholfrei}
22
+ ## todo: use for autotags? e.g. {Bio} => bio
23
+ key = key.gsub( /\{.+\}/, '' )
24
+
25
+ ## remove all whitespace and punctuation
26
+ key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
27
+
28
+ ## remove special chars (e.g. %°&)
29
+ key = key.gsub( /[%&°]/, '' )
30
+
31
+ ## turn accented char into ascii look alike if possible
32
+ ##
33
+ ## todo: add some more
34
+ ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
35
+
36
+ ## todo: add unicode codepoint name
37
+
38
+ alternatives = [
39
+ ['ß', 'ss'],
40
+ ['æ', 'ae'],
41
+ ['ä', 'ae'],
42
+ ['ā', 'a' ], # e.g. Liepājas
43
+ ['á', 'a' ], # e.g. Bogotá, Králové
44
+ ['ã', 'a' ], # e.g São Paulo
45
+ ['ă', 'a' ], # e.g. Chișinău
46
+ ['â', 'a' ], # e.g Goiânia
47
+ ['å', 'a' ], # e.g. Vålerenga
48
+ ['ą', 'a' ], # e.g. Śląsk
49
+ ['ç', 'c' ], # e.g. São Gonçalo, Iguaçu, Neftçi
50
+ ['ć', 'c' ], # e.g. Budućnost
51
+ ['č', 'c' ], # e.g. Tradiční, Výčepní
52
+ ['é', 'e' ], # e.g. Vélez, Králové
53
+ ['è', 'e' ], # e.g. Rivières
54
+ ['ê', 'e' ], # e.g. Grêmio
55
+ ['ě', 'e' ], # e.g. Budějovice
56
+ ['ĕ', 'e' ], # e.g. Svĕtlý
57
+ ['ė', 'e' ], # e.g. Vėtra
58
+ ['ë', 'e' ], # e.g. Skënderbeu
59
+ ['ğ', 'g' ], # e.g. Qarabağ
60
+ ['ì', 'i' ], # e.g. Potosì
61
+ ['í', 'i' ], # e.g. Ústí
62
+ ['ł', 'l' ], # e.g. Wisła, Wrocław
63
+ ['ñ', 'n' ], # e.g. Porteño
64
+ ['ň', 'n' ], # e.g. Plzeň, Třeboň
65
+ ['ö', 'oe'],
66
+ ['ő', 'o' ], # e.g. Győri
67
+ ['ó', 'o' ], # e.g. Colón, Łódź, Kraków
68
+ ['õ', 'o' ], # e.g. Nõmme
69
+ ['ø', 'o' ], # e.g. Fuglafjørdur, København
70
+ ['ř', 'r' ], # e.g. Třeboň
71
+ ['ș', 's' ], # e.g. Chișinău, București
72
+ ['ş', 's' ], # e.g. Beşiktaş
73
+ ['š', 's' ], # e.g. Košice
74
+ ['ť', 't' ], # e.g. Měšťan
75
+ ['ü', 'ue'],
76
+ ['ú', 'u' ], # e.g. Fútbol
77
+ ['ū', 'u' ], # e.g. Sūduva
78
+ ['ů', 'u' ], # e.g. Sládkův
79
+ ['ı', 'u' ], # e.g. Bakı # use u?? (Baku) why-why not?
80
+ ['ý', 'y' ], # e.g. Nefitrovaný
81
+ ['ź', 'z' ], # e.g. Łódź
82
+ ['ž', 'z' ], # e.g. Domžale, Petržalka
83
+
84
+ ['Č', 'c' ], # e.g. České
85
+ ['İ', 'i' ], # e.g. İnter
86
+ ['Í', 'i' ], # e.g. ÍBV
87
+ ['Ł', 'l' ], # e.g. Łódź
88
+ ['Ö', 'oe' ], # e.g. Örebro
89
+ ['Ř', 'r' ], # e.g. Řezák
90
+ ['Ś', 's' ], # e.g. Śląsk
91
+ ['Š', 's' ], # e.g. MŠK
92
+ ['Ş', 's' ], # e.g. Şüvälan
93
+ ['Ú', 'u' ], # e.g. Ústí, Újpest
94
+ ['Ž', 'z' ] # e.g. Žilina
95
+ ]
96
+
97
+ alternatives.each do |alt|
98
+ key = key.gsub( alt[0], alt[1] )
99
+ end
100
+
101
+ key
102
+ end # method title_to_key
103
+
104
+
105
+ def title_esc_regex( title_unescaped )
106
+
107
+ ## escape regex special chars e.g. . to \. and ( to \( etc.
108
+ # e.g. Benfica Lis.
109
+ # e.g. Club Atlético Colón (Santa Fe)
110
+
111
+ ## NB: cannot use Regexp.escape! will escape space '' to '\ '
112
+ ## title = Regexp.escape( title_unescaped )
113
+ title = title_unescaped.gsub( '.', '\.' )
114
+ title = title.gsub( '(', '\(' )
115
+ title = title.gsub( ')', '\)' )
116
+
117
+ ## match accented char with or without accents
118
+ ## add (ü|ue) etc.
119
+ ## also make - optional change to (-| ) e.g. Blau-Weiss == Blau Weiss
120
+
121
+ ## todo: add some more
122
+ ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
123
+ ##
124
+ ## reuse for all readers!
125
+
126
+ alternatives = [
127
+ ['-', '(-| )'], ## e.g. Blau-Weiß Linz
128
+ ['æ', '(æ|ae)'], ## e.g.
129
+ ['á', '(á|a)'], ## e.g. Bogotá, Sársfield
130
+ ['ã', '(ã|a)'], ## e.g São Paulo
131
+ ['ä', '(ä|ae)'], ## e.g.
132
+ ['ç', '(ç|c)'], ## e.g. Fenerbahçe
133
+ ['é', '(é|e)'], ## e.g. Vélez
134
+ ['ê', '(ê|e)'], ## e.g. Grêmio
135
+ ['ñ', '(ñ|n)'], ## e.g. Porteño
136
+ ['ň', '(ň|n)'], ## e.g. Plzeň
137
+ ['Ö', '(Ö|Oe)'], ## e.g. Österreich
138
+ ['ö', '(ö|oe)'], ## e.g. Mönchengladbach
139
+ ['ó', '(ó|o)'], ## e.g. Colón
140
+ ['ș', '(ș|s)'], ## e.g. Bucarești
141
+ ['ß', '(ß|ss)'], ## e.g. Blau-Weiß Linz
142
+ ['ü', '(ü|ue)'], ## e.g.
143
+ ['ú', '(ú|u)'] ## e.g. Fútbol
144
+ ]
145
+
146
+ alternatives.each do |alt|
147
+ title = title.gsub( alt[0], alt[1] )
148
+ end
149
+
150
+ title
151
+ end
152
+
153
+
154
+ end # module TitleHelper
155
+ end # module TextUtils
@@ -0,0 +1,53 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module TextUtils
5
+ module UnicodeHelper
6
+
7
+ # NB:
8
+ # U_HYPHEN_MINUS is standard ascii hyphen/minus e.g. -
9
+ #
10
+ # see en.wikipedia.org/wiki/Dash
11
+
12
+ U_HYPHEN = "\u2010" # unambigous hyphen
13
+ U_NON_BREAKING_HYPHEN = "\u2011" # unambigous non-breaking hyphen
14
+ U_MINUS = "\u2212" # unambigous minus sign (html => −)
15
+ U_NDASH = "\u2013" # ndash (html => – ascii => --)
16
+ U_MDASH = "\u2014" # mdash (html => — ascii => ---)
17
+
18
+ def convert_unicode_dashes_to_plain_ascii( text, opts = {} )
19
+
20
+ text = text.gsub( /(#{U_HYPHEN}|#{U_NON_BREAKING_HYPHEN}|#{U_MINUS}|#{U_NDASH}|#{U_MDASH})/ ) do |_|
21
+
22
+ # puts "found U+#{'%04X' % $1.ord} (#{$1})"
23
+
24
+ msg = ''
25
+
26
+ if $1 == U_HYPHEN
27
+ msg << "found hyhpen U+2010 (#{$1})"
28
+ elsif $1 == U_NON_BREAKING_HYPHEN
29
+ msg << "found non_breaking_hyhpen U+2011 (#{$1})"
30
+ elsif $1 == U_MINUS
31
+ msg << "found minus U+2212 (#{$1})"
32
+ elsif $1 == U_NDASH
33
+ msg << "found ndash U+2013 (#{$1})"
34
+ elsif $1 == U_MDASH
35
+ msg << "found mdash U+2014 (#{$1})"
36
+ else
37
+ msg << "found unknown unicode dash U+#{'%04X' % $1.ord} (#{$1})"
38
+ end
39
+
40
+ msg << " in file >#{opts[:path]}<" if opts[:path]
41
+ msg << "; converting to plain ascii hyphen_minus (-)"
42
+
43
+ puts "*** warning: #{msg}"
44
+
45
+ '-'
46
+ end
47
+
48
+ text
49
+ end # method convert_unicode_dashes_to_plain_ascii
50
+
51
+
52
+ end # module UnicodeHelper
53
+ end # module TextUtils
@@ -1,5 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
+ # fix: move into TextUtils namespace/module!!
4
+
3
5
  class CodeReader
4
6
 
5
7
  include LogUtils::Logging
@@ -0,0 +1,48 @@
1
+ # encoding: utf-8
2
+
3
+ ### read in a list of fixtures (that is, fixture names/files)
4
+
5
+ # fix: move into TextUtils namespace/module!!
6
+
7
+ class FixtureReader
8
+
9
+ include LogUtils::Logging
10
+
11
+ def initialize( path )
12
+ @path = path
13
+
14
+ ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
15
+ ## - see textutils/utils.rb
16
+ text = File.read_utf8( @path )
17
+
18
+ hash = YAML.load( text )
19
+
20
+ ### build up array for fixtures from hash
21
+
22
+ @ary = []
23
+
24
+ hash.each do |key_wild, value_wild|
25
+ key = key_wild.to_s.strip
26
+
27
+ logger.debug "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value_wild}<<"
28
+
29
+ if value_wild.kind_of?( String ) # assume single fixture name
30
+ @ary << value_wild
31
+ elsif value_wild.kind_of?( Array ) # assume array of fixture names as strings
32
+ @ary = ary + value_wild
33
+ else
34
+ logger.error "unknow fixture type in setup (yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value_wild}<<); skipping"
35
+ end
36
+ end
37
+
38
+ logger.debug "fixture setup:"
39
+ logger.debug @ary.to_json
40
+ end
41
+
42
+ def each
43
+ @ary.each do |fixture|
44
+ yield( fixture )
45
+ end
46
+ end # method each
47
+
48
+ end # class FixtureReader
@@ -1,6 +1,8 @@
1
1
  # encoding: utf-8
2
2
 
3
3
 
4
+ # fix: move into TextUtils namespace/module!!
5
+
4
6
  class HashReader
5
7
 
6
8
  include LogUtils::Logging
@@ -4,6 +4,9 @@
4
4
  ## fix/todo: move to/merge into LineReader itself
5
5
  # e.g. use fromString c'tor ??? or similar??
6
6
 
7
+ # fix: move into TextUtils namespace/module!!
8
+
9
+
7
10
  class StringLineReader
8
11
 
9
12
  include LogUtils::Logging
@@ -1,5 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
+ # fix: move into TextUtils namespace/module!!
4
+
3
5
  class ValuesReader
4
6
 
5
7
  include LogUtils::Logging
@@ -150,7 +152,7 @@ class ValuesReader
150
152
 
151
153
  if key_col == '<auto>'
152
154
  ## autogenerate key from first title
153
- key_col = title_to_key( titles[0] )
155
+ key_col = TextUtils.title_to_key( titles[0] )
154
156
  logger.debug " autogen key >#{key_col}< from title >#{titles[0]}<, textutils version #{TextUtils::VERSION}"
155
157
  end
156
158
 
@@ -273,7 +275,7 @@ class ValuesReader
273
275
 
274
276
  if key_col == '<auto>'
275
277
  ## autogenerate key from first title
276
- key_col = title_to_key( titles[0] )
278
+ key_col = TextUtils.title_to_key( titles[0] )
277
279
  logger.debug " autogen key >#{key_col}< from title >#{titles[0]}<, textutils version #{TextUtils::VERSION}"
278
280
  end
279
281
 
@@ -286,101 +288,7 @@ class ValuesReader
286
288
  end # each lines
287
289
 
288
290
  end # method each_line
289
-
290
-
291
291
 
292
- def title_to_key( title )
293
-
294
- ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
295
- key = title.downcase
296
-
297
- ### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
298
- key = key.gsub( /\[.+\]/, '' )
299
-
300
- ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
301
- key = key.gsub( /\(.+\)/, '' )
302
-
303
- ## remove optional longer title part in {} e.g. Ottakringer {Bio} or {Alkoholfrei}
304
- ## todo: use for autotags? e.g. {Bio} => bio
305
- key = key.gsub( /\{.+\}/, '' )
306
-
307
- ## remove all whitespace and punctuation
308
- key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
309
-
310
- ## remove special chars (e.g. %°&)
311
- key = key.gsub( /[%&°]/, '' )
312
-
313
- ## turn accented char into ascii look alike if possible
314
- ##
315
- ## todo: add some more
316
- ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
317
-
318
- ## todo: add unicode codepoint name
319
-
320
- alternatives = [
321
- ['ß', 'ss'],
322
- ['æ', 'ae'],
323
- ['ä', 'ae'],
324
- ['ā', 'a' ], # e.g. Liepājas
325
- ['á', 'a' ], # e.g. Bogotá, Králové
326
- ['ã', 'a' ], # e.g São Paulo
327
- ['ă', 'a' ], # e.g. Chișinău
328
- ['â', 'a' ], # e.g Goiânia
329
- ['å', 'a' ], # e.g. Vålerenga
330
- ['ą', 'a' ], # e.g. Śląsk
331
- ['ç', 'c' ], # e.g. São Gonçalo, Iguaçu, Neftçi
332
- ['ć', 'c' ], # e.g. Budućnost
333
- ['č', 'c' ], # e.g. Tradiční, Výčepní
334
- ['é', 'e' ], # e.g. Vélez, Králové
335
- ['è', 'e' ], # e.g. Rivières
336
- ['ê', 'e' ], # e.g. Grêmio
337
- ['ě', 'e' ], # e.g. Budějovice
338
- ['ĕ', 'e' ], # e.g. Svĕtlý
339
- ['ė', 'e' ], # e.g. Vėtra
340
- ['ë', 'e' ], # e.g. Skënderbeu
341
- ['ğ', 'g' ], # e.g. Qarabağ
342
- ['ì', 'i' ], # e.g. Potosì
343
- ['í', 'i' ], # e.g. Ústí
344
- ['ł', 'l' ], # e.g. Wisła, Wrocław
345
- ['ñ', 'n' ], # e.g. Porteño
346
- ['ň', 'n' ], # e.g. Plzeň, Třeboň
347
- ['ö', 'oe'],
348
- ['ő', 'o' ], # e.g. Győri
349
- ['ó', 'o' ], # e.g. Colón, Łódź, Kraków
350
- ['õ', 'o' ], # e.g. Nõmme
351
- ['ø', 'o' ], # e.g. Fuglafjørdur, København
352
- ['ř', 'r' ], # e.g. Třeboň
353
- ['ș', 's' ], # e.g. Chișinău, București
354
- ['ş', 's' ], # e.g. Beşiktaş
355
- ['š', 's' ], # e.g. Košice
356
- ['ť', 't' ], # e.g. Měšťan
357
- ['ü', 'ue'],
358
- ['ú', 'u' ], # e.g. Fútbol
359
- ['ū', 'u' ], # e.g. Sūduva
360
- ['ů', 'u' ], # e.g. Sládkův
361
- ['ı', 'u' ], # e.g. Bakı # use u?? (Baku) why-why not?
362
- ['ý', 'y' ], # e.g. Nefitrovaný
363
- ['ź', 'z' ], # e.g. Łódź
364
- ['ž', 'z' ], # e.g. Domžale, Petržalka
365
-
366
- ['Č', 'c' ], # e.g. České
367
- ['İ', 'i' ], # e.g. İnter
368
- ['Í', 'i' ], # e.g. ÍBV
369
- ['Ł', 'l' ], # e.g. Łódź
370
- ['Ö', 'oe' ], # e.g. Örebro
371
- ['Ř', 'r' ], # e.g. Řezák
372
- ['Ś', 's' ], # e.g. Śląsk
373
- ['Š', 's' ], # e.g. MŠK
374
- ['Ş', 's' ], # e.g. Şüvälan
375
- ['Ú', 'u' ], # e.g. Ústí, Újpest
376
- ['Ž', 'z' ] # e.g. Žilina
377
- ]
378
-
379
- alternatives.each do |alt|
380
- key = key.gsub( alt[0], alt[1] )
381
- end
382
292
 
383
- key
384
- end # method title_to_key
385
293
 
386
294
  end # class ValuesReader
@@ -1,6 +1,15 @@
1
1
  # encoding: utf-8
2
2
 
3
3
 
4
+
5
+ module TextUtils
6
+ # make helpers available as class methods e.g. TextUtils.convert_unicode_dashes_to_plain_ascii
7
+ extend UnicodeHelper
8
+ extend TitleHelper
9
+ end
10
+
11
+
12
+
4
13
  class File
5
14
  def self.read_utf8( path )
6
15
  text = open( path, 'r:bom|utf-8' ) do |file|
@@ -8,107 +17,16 @@ class File
8
17
  end
9
18
 
10
19
  # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
11
- text = convert_unicode_dashes_to_plain_ascii( text, path: path )
20
+ text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
12
21
 
13
22
  text
14
23
  end
15
24
  end # class File
16
25
 
17
26
 
18
- # NB:
19
- # U_HYPHEN_MINUS is standard ascii hyphen/minus e.g. -
20
- #
21
- # see en.wikipedia.org/wiki/Dash
22
-
23
- U_HYPHEN = "\u2010" # unambigous hyphen
24
- U_NON_BREAKING_HYPHEN = "\u2011" # unambigous non-breaking hyphen
25
- U_MINUS = "\u2212" # unambigous minus sign (html => &minus;)
26
- U_NDASH = "\u2013" # ndash (html => &ndash; ascii => --)
27
- U_MDASH = "\u2014" # mdash (html => &mdash; ascii => ---)
28
-
29
-
30
- def convert_unicode_dashes_to_plain_ascii( text, opts = {} )
31
-
32
- text = text.gsub( /(#{U_HYPHEN}|#{U_NON_BREAKING_HYPHEN}|#{U_MINUS}|#{U_NDASH}|#{U_MDASH})/ ) do |_|
33
-
34
- # puts "found U+#{'%04X' % $1.ord} (#{$1})"
35
-
36
- msg = ''
37
-
38
- if $1 == U_HYPHEN
39
- msg << "found hyhpen U+2010 (#{$1})"
40
- elsif $1 == U_NON_BREAKING_HYPHEN
41
- msg << "found non_breaking_hyhpen U+2011 (#{$1})"
42
- elsif $1 == U_MINUS
43
- msg << "found minus U+2212 (#{$1})"
44
- elsif $1 == U_NDASH
45
- msg << "found ndash U+2013 (#{$1})"
46
- elsif $1 == U_MDASH
47
- msg << "found mdash U+2014 (#{$1})"
48
- else
49
- msg << "found unknown unicode dash U+#{'%04X' % $1.ord} (#{$1})"
50
- end
51
27
 
52
- msg << " in file >#{opts[:path]}<" if opts[:path]
53
- msg << "; converting to plain ascii hyphen_minus (-)"
54
-
55
- puts "*** warning: #{msg}"
28
+ def title_esc_regex( title_unescaped )
29
+ puts "*** warn: depreceated fn call: use TextUtils.title_esc_regex() or include TextUtils::TitleHelpers"
30
+ TextUtils.title_esc_regex( title_unescaped )
31
+ end
56
32
 
57
- '-'
58
- end
59
-
60
- text
61
- end # method convert_unicode_dashes_to_plain_ascii
62
-
63
-
64
- ############
65
- ### fix/todo: share helper for all text readers/parsers- where to put it?
66
- ###
67
-
68
- def title_esc_regex( title_unescaped )
69
-
70
- ## escape regex special chars e.g. . to \. and ( to \( etc.
71
- # e.g. Benfica Lis.
72
- # e.g. Club Atlético Colón (Santa Fe)
73
-
74
- ## NB: cannot use Regexp.escape! will escape space '' to '\ '
75
- ## title = Regexp.escape( title_unescaped )
76
- title = title_unescaped.gsub( '.', '\.' )
77
- title = title.gsub( '(', '\(' )
78
- title = title.gsub( ')', '\)' )
79
-
80
- ## match accented char with or without accents
81
- ## add (ü|ue) etc.
82
- ## also make - optional change to (-| ) e.g. Blau-Weiss == Blau Weiss
83
-
84
- ## todo: add some more
85
- ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
86
- ##
87
- ## reuse for all readers!
88
-
89
- alternatives = [
90
- ['-', '(-| )'], ## e.g. Blau-Weiß Linz
91
- ['æ', '(æ|ae)'], ## e.g.
92
- ['á', '(á|a)'], ## e.g. Bogotá, Sársfield
93
- ['ã', '(ã|a)'], ## e.g São Paulo
94
- ['ä', '(ä|ae)'], ## e.g.
95
- ['ç', '(ç|c)'], ## e.g. Fenerbahçe
96
- ['é', '(é|e)'], ## e.g. Vélez
97
- ['ê', '(ê|e)'], ## e.g. Grêmio
98
- ['ñ', '(ñ|n)'], ## e.g. Porteño
99
- ['ň', '(ň|n)'], ## e.g. Plzeň
100
- ['Ö', '(Ö|Oe)'], ## e.g. Österreich
101
- ['ö', '(ö|oe)'], ## e.g. Mönchengladbach
102
- ['ó', '(ó|o)'], ## e.g. Colón
103
- ['ș', '(ș|s)'], ## e.g. Bucarești
104
- ['ß', '(ß|ss)'], ## e.g. Blau-Weiß Linz
105
- ['ü', '(ü|ue)'], ## e.g.
106
- ['ú', '(ú|u)'] ## e.g. Fútbol
107
- ]
108
-
109
- alternatives.each do |alt|
110
- title = title.gsub( alt[0], alt[1] )
111
- end
112
-
113
- title
114
- end
@@ -1,6 +1,6 @@
1
1
 
2
2
  module TextUtils
3
3
 
4
- VERSION = '0.5.9'
4
+ VERSION = '0.5.10'
5
5
 
6
6
  end # module TextUtils
data/lib/textutils.rb CHANGED
@@ -21,10 +21,13 @@ require 'textutils/filter/comment_filter'
21
21
  require 'textutils/filter/erb_django_filter'
22
22
  require 'textutils/filter/erb_filter'
23
23
 
24
+ require 'textutils/helper/unicode_helper'
25
+ require 'textutils/helper/title_helper'
26
+
24
27
  require 'textutils/utils'
25
28
  require 'textutils/reader/code_reader'
26
29
  require 'textutils/reader/hash_reader'
27
30
  require 'textutils/reader/line_reader'
28
31
  require 'textutils/reader/values_reader'
29
-
32
+ require 'textutils/reader/fixture_reader'
30
33
 
data/test/helper.rb ADDED
@@ -0,0 +1,14 @@
1
+
2
+ ## $:.unshift(File.dirname(__FILE__))
3
+
4
+ ## minitest setup
5
+
6
+ # require 'minitest/unit'
7
+ require 'minitest/autorun'
8
+
9
+ # include MiniTest::Unit # lets us use TestCase instead of MiniTest::Unit::TestCase
10
+
11
+
12
+ ## our own code
13
+
14
+ require 'textutils'
@@ -0,0 +1,36 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_helper.rb
6
+ # or better
7
+ # rake test
8
+
9
+ require 'helper'
10
+
11
+ class TestHelper < MiniTest::Unit::TestCase
12
+
13
+ def test_convert_unicode_dashes
14
+
15
+ txt_in = "\u2010 \u2011 \u2212 \u2013 \u2014" # NB: unicode chars require double quoted strings
16
+ txt_out = '- - - - -'
17
+
18
+ assert( txt_out == TextUtils.convert_unicode_dashes_to_plain_ascii( txt_in ) )
19
+ end
20
+
21
+
22
+ def test_title_to_key
23
+
24
+ txt_io = [
25
+ [ 'São Paulo', 'saopaulo' ],
26
+ [ 'São Gonçalo', 'saogoncalo' ],
27
+ [ 'Výčepní', 'vycepni' ]
28
+ ]
29
+
30
+ txt_io.each do |txt|
31
+ assert( txt[1] == TextUtils.title_to_key( txt[0] ) )
32
+ end
33
+ end
34
+
35
+
36
+ end # class TestHelper
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.9
4
+ version: 0.5.10
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-05 00:00:00.000000000 Z
12
+ date: 2013-05-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &84970780 !ruby/object:Gem::Requirement
16
+ requirement: &72786300 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *84970780
24
+ version_requirements: *72786300
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &84970560 !ruby/object:Gem::Requirement
27
+ requirement: &72786080 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *84970560
35
+ version_requirements: *72786080
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &84970340 !ruby/object:Gem::Requirement
38
+ requirement: &72785860 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *84970340
46
+ version_requirements: *72785860
47
47
  description: textutils - Text Filters, Helpers, Readers and More
48
48
  email: webslideshow@googlegroups.com
49
49
  executables: []
@@ -60,12 +60,18 @@ files:
60
60
  - lib/textutils/filter/comment_filter.rb
61
61
  - lib/textutils/filter/erb_django_filter.rb
62
62
  - lib/textutils/filter/erb_filter.rb
63
+ - lib/textutils/helper/title_helper.rb
64
+ - lib/textutils/helper/unicode_helper.rb
63
65
  - lib/textutils/reader/code_reader.rb
66
+ - lib/textutils/reader/fixture_reader.rb
64
67
  - lib/textutils/reader/hash_reader.rb
65
68
  - lib/textutils/reader/line_reader.rb
66
69
  - lib/textutils/reader/values_reader.rb
67
70
  - lib/textutils/utils.rb
68
71
  - lib/textutils/version.rb
72
+ - test/helper.rb
73
+ - test/test_helper.rb
74
+ - .gemtest
69
75
  homepage: http://geraldb.github.com/textutils
70
76
  licenses:
71
77
  - Public Domain
@@ -93,4 +99,5 @@ rubygems_version: 1.8.17
93
99
  signing_key:
94
100
  specification_version: 3
95
101
  summary: textutils - Text Filters, Helpers, Readers and More
96
- test_files: []
102
+ test_files:
103
+ - test/test_helper.rb