textutils 0.5.9 → 0.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gemtest ADDED
File without changes
data/Manifest.txt CHANGED
@@ -7,9 +7,14 @@ lib/textutils/filter/code_filter.rb
7
7
  lib/textutils/filter/comment_filter.rb
8
8
  lib/textutils/filter/erb_django_filter.rb
9
9
  lib/textutils/filter/erb_filter.rb
10
+ lib/textutils/helper/title_helper.rb
11
+ lib/textutils/helper/unicode_helper.rb
10
12
  lib/textutils/reader/code_reader.rb
13
+ lib/textutils/reader/fixture_reader.rb
11
14
  lib/textutils/reader/hash_reader.rb
12
15
  lib/textutils/reader/line_reader.rb
13
16
  lib/textutils/reader/values_reader.rb
14
17
  lib/textutils/utils.rb
15
18
  lib/textutils/version.rb
19
+ test/helper.rb
20
+ test/test_helper.rb
@@ -2,7 +2,7 @@
2
2
  module TextUtils
3
3
  module Filter
4
4
 
5
- def comments_percent_style( content, options={} )
5
+ def comments_percent_style( content, options={} )
6
6
 
7
7
  # remove comments
8
8
  # % comments
@@ -0,0 +1,155 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module TextUtils
5
+ module TitleHelper
6
+
7
+ def title_to_key( title )
8
+
9
+ ## NB: used in/moved from readers/values_reader.rb
10
+
11
+
12
+ ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
13
+ key = title.downcase
14
+
15
+ ### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
16
+ key = key.gsub( /\[.+\]/, '' )
17
+
18
+ ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
19
+ key = key.gsub( /\(.+\)/, '' )
20
+
21
+ ## remove optional longer title part in {} e.g. Ottakringer {Bio} or {Alkoholfrei}
22
+ ## todo: use for autotags? e.g. {Bio} => bio
23
+ key = key.gsub( /\{.+\}/, '' )
24
+
25
+ ## remove all whitespace and punctuation
26
+ key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
27
+
28
+ ## remove special chars (e.g. %°&)
29
+ key = key.gsub( /[%&°]/, '' )
30
+
31
+ ## turn accented char into ascii look alike if possible
32
+ ##
33
+ ## todo: add some more
34
+ ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
35
+
36
+ ## todo: add unicode codepoint name
37
+
38
+ alternatives = [
39
+ ['ß', 'ss'],
40
+ ['æ', 'ae'],
41
+ ['ä', 'ae'],
42
+ ['ā', 'a' ], # e.g. Liepājas
43
+ ['á', 'a' ], # e.g. Bogotá, Králové
44
+ ['ã', 'a' ], # e.g São Paulo
45
+ ['ă', 'a' ], # e.g. Chișinău
46
+ ['â', 'a' ], # e.g Goiânia
47
+ ['å', 'a' ], # e.g. Vålerenga
48
+ ['ą', 'a' ], # e.g. Śląsk
49
+ ['ç', 'c' ], # e.g. São Gonçalo, Iguaçu, Neftçi
50
+ ['ć', 'c' ], # e.g. Budućnost
51
+ ['č', 'c' ], # e.g. Tradiční, Výčepní
52
+ ['é', 'e' ], # e.g. Vélez, Králové
53
+ ['è', 'e' ], # e.g. Rivières
54
+ ['ê', 'e' ], # e.g. Grêmio
55
+ ['ě', 'e' ], # e.g. Budějovice
56
+ ['ĕ', 'e' ], # e.g. Svĕtlý
57
+ ['ė', 'e' ], # e.g. Vėtra
58
+ ['ë', 'e' ], # e.g. Skënderbeu
59
+ ['ğ', 'g' ], # e.g. Qarabağ
60
+ ['ì', 'i' ], # e.g. Potosì
61
+ ['í', 'i' ], # e.g. Ústí
62
+ ['ł', 'l' ], # e.g. Wisła, Wrocław
63
+ ['ñ', 'n' ], # e.g. Porteño
64
+ ['ň', 'n' ], # e.g. Plzeň, Třeboň
65
+ ['ö', 'oe'],
66
+ ['ő', 'o' ], # e.g. Győri
67
+ ['ó', 'o' ], # e.g. Colón, Łódź, Kraków
68
+ ['õ', 'o' ], # e.g. Nõmme
69
+ ['ø', 'o' ], # e.g. Fuglafjørdur, København
70
+ ['ř', 'r' ], # e.g. Třeboň
71
+ ['ș', 's' ], # e.g. Chișinău, București
72
+ ['ş', 's' ], # e.g. Beşiktaş
73
+ ['š', 's' ], # e.g. Košice
74
+ ['ť', 't' ], # e.g. Měšťan
75
+ ['ü', 'ue'],
76
+ ['ú', 'u' ], # e.g. Fútbol
77
+ ['ū', 'u' ], # e.g. Sūduva
78
+ ['ů', 'u' ], # e.g. Sládkův
79
+ ['ı', 'u' ], # e.g. Bakı # use u?? (Baku) why-why not?
80
+ ['ý', 'y' ], # e.g. Nefitrovaný
81
+ ['ź', 'z' ], # e.g. Łódź
82
+ ['ž', 'z' ], # e.g. Domžale, Petržalka
83
+
84
+ ['Č', 'c' ], # e.g. České
85
+ ['İ', 'i' ], # e.g. İnter
86
+ ['Í', 'i' ], # e.g. ÍBV
87
+ ['Ł', 'l' ], # e.g. Łódź
88
+ ['Ö', 'oe' ], # e.g. Örebro
89
+ ['Ř', 'r' ], # e.g. Řezák
90
+ ['Ś', 's' ], # e.g. Śląsk
91
+ ['Š', 's' ], # e.g. MŠK
92
+ ['Ş', 's' ], # e.g. Şüvälan
93
+ ['Ú', 'u' ], # e.g. Ústí, Újpest
94
+ ['Ž', 'z' ] # e.g. Žilina
95
+ ]
96
+
97
+ alternatives.each do |alt|
98
+ key = key.gsub( alt[0], alt[1] )
99
+ end
100
+
101
+ key
102
+ end # method title_to_key
103
+
104
+
105
+ def title_esc_regex( title_unescaped )
106
+
107
+ ## escape regex special chars e.g. . to \. and ( to \( etc.
108
+ # e.g. Benfica Lis.
109
+ # e.g. Club Atlético Colón (Santa Fe)
110
+
111
+ ## NB: cannot use Regexp.escape! will escape space '' to '\ '
112
+ ## title = Regexp.escape( title_unescaped )
113
+ title = title_unescaped.gsub( '.', '\.' )
114
+ title = title.gsub( '(', '\(' )
115
+ title = title.gsub( ')', '\)' )
116
+
117
+ ## match accented char with or without accents
118
+ ## add (ü|ue) etc.
119
+ ## also make - optional change to (-| ) e.g. Blau-Weiss == Blau Weiss
120
+
121
+ ## todo: add some more
122
+ ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
123
+ ##
124
+ ## reuse for all readers!
125
+
126
+ alternatives = [
127
+ ['-', '(-| )'], ## e.g. Blau-Weiß Linz
128
+ ['æ', '(æ|ae)'], ## e.g.
129
+ ['á', '(á|a)'], ## e.g. Bogotá, Sársfield
130
+ ['ã', '(ã|a)'], ## e.g São Paulo
131
+ ['ä', '(ä|ae)'], ## e.g.
132
+ ['ç', '(ç|c)'], ## e.g. Fenerbahçe
133
+ ['é', '(é|e)'], ## e.g. Vélez
134
+ ['ê', '(ê|e)'], ## e.g. Grêmio
135
+ ['ñ', '(ñ|n)'], ## e.g. Porteño
136
+ ['ň', '(ň|n)'], ## e.g. Plzeň
137
+ ['Ö', '(Ö|Oe)'], ## e.g. Österreich
138
+ ['ö', '(ö|oe)'], ## e.g. Mönchengladbach
139
+ ['ó', '(ó|o)'], ## e.g. Colón
140
+ ['ș', '(ș|s)'], ## e.g. Bucarești
141
+ ['ß', '(ß|ss)'], ## e.g. Blau-Weiß Linz
142
+ ['ü', '(ü|ue)'], ## e.g.
143
+ ['ú', '(ú|u)'] ## e.g. Fútbol
144
+ ]
145
+
146
+ alternatives.each do |alt|
147
+ title = title.gsub( alt[0], alt[1] )
148
+ end
149
+
150
+ title
151
+ end
152
+
153
+
154
+ end # module TitleHelper
155
+ end # module TextUtils
@@ -0,0 +1,53 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module TextUtils
5
+ module UnicodeHelper
6
+
7
+ # NB:
8
+ # U_HYPHEN_MINUS is standard ascii hyphen/minus e.g. -
9
+ #
10
+ # see en.wikipedia.org/wiki/Dash
11
+
12
+ U_HYPHEN = "\u2010" # unambigous hyphen
13
+ U_NON_BREAKING_HYPHEN = "\u2011" # unambigous non-breaking hyphen
14
+ U_MINUS = "\u2212" # unambigous minus sign (html => −)
15
+ U_NDASH = "\u2013" # ndash (html => – ascii => --)
16
+ U_MDASH = "\u2014" # mdash (html => — ascii => ---)
17
+
18
+ def convert_unicode_dashes_to_plain_ascii( text, opts = {} )
19
+
20
+ text = text.gsub( /(#{U_HYPHEN}|#{U_NON_BREAKING_HYPHEN}|#{U_MINUS}|#{U_NDASH}|#{U_MDASH})/ ) do |_|
21
+
22
+ # puts "found U+#{'%04X' % $1.ord} (#{$1})"
23
+
24
+ msg = ''
25
+
26
+ if $1 == U_HYPHEN
27
+ msg << "found hyhpen U+2010 (#{$1})"
28
+ elsif $1 == U_NON_BREAKING_HYPHEN
29
+ msg << "found non_breaking_hyhpen U+2011 (#{$1})"
30
+ elsif $1 == U_MINUS
31
+ msg << "found minus U+2212 (#{$1})"
32
+ elsif $1 == U_NDASH
33
+ msg << "found ndash U+2013 (#{$1})"
34
+ elsif $1 == U_MDASH
35
+ msg << "found mdash U+2014 (#{$1})"
36
+ else
37
+ msg << "found unknown unicode dash U+#{'%04X' % $1.ord} (#{$1})"
38
+ end
39
+
40
+ msg << " in file >#{opts[:path]}<" if opts[:path]
41
+ msg << "; converting to plain ascii hyphen_minus (-)"
42
+
43
+ puts "*** warning: #{msg}"
44
+
45
+ '-'
46
+ end
47
+
48
+ text
49
+ end # method convert_unicode_dashes_to_plain_ascii
50
+
51
+
52
+ end # module UnicodeHelper
53
+ end # module TextUtils
@@ -1,5 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
+ # fix: move into TextUtils namespace/module!!
4
+
3
5
  class CodeReader
4
6
 
5
7
  include LogUtils::Logging
@@ -0,0 +1,48 @@
1
+ # encoding: utf-8
2
+
3
+ ### read in a list of fixtures (that is, fixture names/files)
4
+
5
+ # fix: move into TextUtils namespace/module!!
6
+
7
+ class FixtureReader
8
+
9
+ include LogUtils::Logging
10
+
11
+ def initialize( path )
12
+ @path = path
13
+
14
+ ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
15
+ ## - see textutils/utils.rb
16
+ text = File.read_utf8( @path )
17
+
18
+ hash = YAML.load( text )
19
+
20
+ ### build up array for fixtures from hash
21
+
22
+ @ary = []
23
+
24
+ hash.each do |key_wild, value_wild|
25
+ key = key_wild.to_s.strip
26
+
27
+ logger.debug "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value_wild}<<"
28
+
29
+ if value_wild.kind_of?( String ) # assume single fixture name
30
+ @ary << value_wild
31
+ elsif value_wild.kind_of?( Array ) # assume array of fixture names as strings
32
+ @ary = ary + value_wild
33
+ else
34
+ logger.error "unknow fixture type in setup (yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value_wild}<<); skipping"
35
+ end
36
+ end
37
+
38
+ logger.debug "fixture setup:"
39
+ logger.debug @ary.to_json
40
+ end
41
+
42
+ def each
43
+ @ary.each do |fixture|
44
+ yield( fixture )
45
+ end
46
+ end # method each
47
+
48
+ end # class FixtureReader
@@ -1,6 +1,8 @@
1
1
  # encoding: utf-8
2
2
 
3
3
 
4
+ # fix: move into TextUtils namespace/module!!
5
+
4
6
  class HashReader
5
7
 
6
8
  include LogUtils::Logging
@@ -4,6 +4,9 @@
4
4
  ## fix/todo: move to/merge into LineReader itself
5
5
  # e.g. use fromString c'tor ??? or similar??
6
6
 
7
+ # fix: move into TextUtils namespace/module!!
8
+
9
+
7
10
  class StringLineReader
8
11
 
9
12
  include LogUtils::Logging
@@ -1,5 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
+ # fix: move into TextUtils namespace/module!!
4
+
3
5
  class ValuesReader
4
6
 
5
7
  include LogUtils::Logging
@@ -150,7 +152,7 @@ class ValuesReader
150
152
 
151
153
  if key_col == '<auto>'
152
154
  ## autogenerate key from first title
153
- key_col = title_to_key( titles[0] )
155
+ key_col = TextUtils.title_to_key( titles[0] )
154
156
  logger.debug " autogen key >#{key_col}< from title >#{titles[0]}<, textutils version #{TextUtils::VERSION}"
155
157
  end
156
158
 
@@ -273,7 +275,7 @@ class ValuesReader
273
275
 
274
276
  if key_col == '<auto>'
275
277
  ## autogenerate key from first title
276
- key_col = title_to_key( titles[0] )
278
+ key_col = TextUtils.title_to_key( titles[0] )
277
279
  logger.debug " autogen key >#{key_col}< from title >#{titles[0]}<, textutils version #{TextUtils::VERSION}"
278
280
  end
279
281
 
@@ -286,101 +288,7 @@ class ValuesReader
286
288
  end # each lines
287
289
 
288
290
  end # method each_line
289
-
290
-
291
291
 
292
- def title_to_key( title )
293
-
294
- ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
295
- key = title.downcase
296
-
297
- ### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
298
- key = key.gsub( /\[.+\]/, '' )
299
-
300
- ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
301
- key = key.gsub( /\(.+\)/, '' )
302
-
303
- ## remove optional longer title part in {} e.g. Ottakringer {Bio} or {Alkoholfrei}
304
- ## todo: use for autotags? e.g. {Bio} => bio
305
- key = key.gsub( /\{.+\}/, '' )
306
-
307
- ## remove all whitespace and punctuation
308
- key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
309
-
310
- ## remove special chars (e.g. %°&)
311
- key = key.gsub( /[%&°]/, '' )
312
-
313
- ## turn accented char into ascii look alike if possible
314
- ##
315
- ## todo: add some more
316
- ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
317
-
318
- ## todo: add unicode codepoint name
319
-
320
- alternatives = [
321
- ['ß', 'ss'],
322
- ['æ', 'ae'],
323
- ['ä', 'ae'],
324
- ['ā', 'a' ], # e.g. Liepājas
325
- ['á', 'a' ], # e.g. Bogotá, Králové
326
- ['ã', 'a' ], # e.g São Paulo
327
- ['ă', 'a' ], # e.g. Chișinău
328
- ['â', 'a' ], # e.g Goiânia
329
- ['å', 'a' ], # e.g. Vålerenga
330
- ['ą', 'a' ], # e.g. Śląsk
331
- ['ç', 'c' ], # e.g. São Gonçalo, Iguaçu, Neftçi
332
- ['ć', 'c' ], # e.g. Budućnost
333
- ['č', 'c' ], # e.g. Tradiční, Výčepní
334
- ['é', 'e' ], # e.g. Vélez, Králové
335
- ['è', 'e' ], # e.g. Rivières
336
- ['ê', 'e' ], # e.g. Grêmio
337
- ['ě', 'e' ], # e.g. Budějovice
338
- ['ĕ', 'e' ], # e.g. Svĕtlý
339
- ['ė', 'e' ], # e.g. Vėtra
340
- ['ë', 'e' ], # e.g. Skënderbeu
341
- ['ğ', 'g' ], # e.g. Qarabağ
342
- ['ì', 'i' ], # e.g. Potosì
343
- ['í', 'i' ], # e.g. Ústí
344
- ['ł', 'l' ], # e.g. Wisła, Wrocław
345
- ['ñ', 'n' ], # e.g. Porteño
346
- ['ň', 'n' ], # e.g. Plzeň, Třeboň
347
- ['ö', 'oe'],
348
- ['ő', 'o' ], # e.g. Győri
349
- ['ó', 'o' ], # e.g. Colón, Łódź, Kraków
350
- ['õ', 'o' ], # e.g. Nõmme
351
- ['ø', 'o' ], # e.g. Fuglafjørdur, København
352
- ['ř', 'r' ], # e.g. Třeboň
353
- ['ș', 's' ], # e.g. Chișinău, București
354
- ['ş', 's' ], # e.g. Beşiktaş
355
- ['š', 's' ], # e.g. Košice
356
- ['ť', 't' ], # e.g. Měšťan
357
- ['ü', 'ue'],
358
- ['ú', 'u' ], # e.g. Fútbol
359
- ['ū', 'u' ], # e.g. Sūduva
360
- ['ů', 'u' ], # e.g. Sládkův
361
- ['ı', 'u' ], # e.g. Bakı # use u?? (Baku) why-why not?
362
- ['ý', 'y' ], # e.g. Nefitrovaný
363
- ['ź', 'z' ], # e.g. Łódź
364
- ['ž', 'z' ], # e.g. Domžale, Petržalka
365
-
366
- ['Č', 'c' ], # e.g. České
367
- ['İ', 'i' ], # e.g. İnter
368
- ['Í', 'i' ], # e.g. ÍBV
369
- ['Ł', 'l' ], # e.g. Łódź
370
- ['Ö', 'oe' ], # e.g. Örebro
371
- ['Ř', 'r' ], # e.g. Řezák
372
- ['Ś', 's' ], # e.g. Śląsk
373
- ['Š', 's' ], # e.g. MŠK
374
- ['Ş', 's' ], # e.g. Şüvälan
375
- ['Ú', 'u' ], # e.g. Ústí, Újpest
376
- ['Ž', 'z' ] # e.g. Žilina
377
- ]
378
-
379
- alternatives.each do |alt|
380
- key = key.gsub( alt[0], alt[1] )
381
- end
382
292
 
383
- key
384
- end # method title_to_key
385
293
 
386
294
  end # class ValuesReader
@@ -1,6 +1,15 @@
1
1
  # encoding: utf-8
2
2
 
3
3
 
4
+
5
+ module TextUtils
6
+ # make helpers available as class methods e.g. TextUtils.convert_unicode_dashes_to_plain_ascii
7
+ extend UnicodeHelper
8
+ extend TitleHelper
9
+ end
10
+
11
+
12
+
4
13
  class File
5
14
  def self.read_utf8( path )
6
15
  text = open( path, 'r:bom|utf-8' ) do |file|
@@ -8,107 +17,16 @@ class File
8
17
  end
9
18
 
10
19
  # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
11
- text = convert_unicode_dashes_to_plain_ascii( text, path: path )
20
+ text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
12
21
 
13
22
  text
14
23
  end
15
24
  end # class File
16
25
 
17
26
 
18
- # NB:
19
- # U_HYPHEN_MINUS is standard ascii hyphen/minus e.g. -
20
- #
21
- # see en.wikipedia.org/wiki/Dash
22
-
23
- U_HYPHEN = "\u2010" # unambigous hyphen
24
- U_NON_BREAKING_HYPHEN = "\u2011" # unambigous non-breaking hyphen
25
- U_MINUS = "\u2212" # unambigous minus sign (html => &minus;)
26
- U_NDASH = "\u2013" # ndash (html => &ndash; ascii => --)
27
- U_MDASH = "\u2014" # mdash (html => &mdash; ascii => ---)
28
-
29
-
30
- def convert_unicode_dashes_to_plain_ascii( text, opts = {} )
31
-
32
- text = text.gsub( /(#{U_HYPHEN}|#{U_NON_BREAKING_HYPHEN}|#{U_MINUS}|#{U_NDASH}|#{U_MDASH})/ ) do |_|
33
-
34
- # puts "found U+#{'%04X' % $1.ord} (#{$1})"
35
-
36
- msg = ''
37
-
38
- if $1 == U_HYPHEN
39
- msg << "found hyhpen U+2010 (#{$1})"
40
- elsif $1 == U_NON_BREAKING_HYPHEN
41
- msg << "found non_breaking_hyhpen U+2011 (#{$1})"
42
- elsif $1 == U_MINUS
43
- msg << "found minus U+2212 (#{$1})"
44
- elsif $1 == U_NDASH
45
- msg << "found ndash U+2013 (#{$1})"
46
- elsif $1 == U_MDASH
47
- msg << "found mdash U+2014 (#{$1})"
48
- else
49
- msg << "found unknown unicode dash U+#{'%04X' % $1.ord} (#{$1})"
50
- end
51
27
 
52
- msg << " in file >#{opts[:path]}<" if opts[:path]
53
- msg << "; converting to plain ascii hyphen_minus (-)"
54
-
55
- puts "*** warning: #{msg}"
28
+ def title_esc_regex( title_unescaped )
29
+ puts "*** warn: depreceated fn call: use TextUtils.title_esc_regex() or include TextUtils::TitleHelpers"
30
+ TextUtils.title_esc_regex( title_unescaped )
31
+ end
56
32
 
57
- '-'
58
- end
59
-
60
- text
61
- end # method convert_unicode_dashes_to_plain_ascii
62
-
63
-
64
- ############
65
- ### fix/todo: share helper for all text readers/parsers- where to put it?
66
- ###
67
-
68
- def title_esc_regex( title_unescaped )
69
-
70
- ## escape regex special chars e.g. . to \. and ( to \( etc.
71
- # e.g. Benfica Lis.
72
- # e.g. Club Atlético Colón (Santa Fe)
73
-
74
- ## NB: cannot use Regexp.escape! will escape space '' to '\ '
75
- ## title = Regexp.escape( title_unescaped )
76
- title = title_unescaped.gsub( '.', '\.' )
77
- title = title.gsub( '(', '\(' )
78
- title = title.gsub( ')', '\)' )
79
-
80
- ## match accented char with or without accents
81
- ## add (ü|ue) etc.
82
- ## also make - optional change to (-| ) e.g. Blau-Weiss == Blau Weiss
83
-
84
- ## todo: add some more
85
- ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
86
- ##
87
- ## reuse for all readers!
88
-
89
- alternatives = [
90
- ['-', '(-| )'], ## e.g. Blau-Weiß Linz
91
- ['æ', '(æ|ae)'], ## e.g.
92
- ['á', '(á|a)'], ## e.g. Bogotá, Sársfield
93
- ['ã', '(ã|a)'], ## e.g São Paulo
94
- ['ä', '(ä|ae)'], ## e.g.
95
- ['ç', '(ç|c)'], ## e.g. Fenerbahçe
96
- ['é', '(é|e)'], ## e.g. Vélez
97
- ['ê', '(ê|e)'], ## e.g. Grêmio
98
- ['ñ', '(ñ|n)'], ## e.g. Porteño
99
- ['ň', '(ň|n)'], ## e.g. Plzeň
100
- ['Ö', '(Ö|Oe)'], ## e.g. Österreich
101
- ['ö', '(ö|oe)'], ## e.g. Mönchengladbach
102
- ['ó', '(ó|o)'], ## e.g. Colón
103
- ['ș', '(ș|s)'], ## e.g. Bucarești
104
- ['ß', '(ß|ss)'], ## e.g. Blau-Weiß Linz
105
- ['ü', '(ü|ue)'], ## e.g.
106
- ['ú', '(ú|u)'] ## e.g. Fútbol
107
- ]
108
-
109
- alternatives.each do |alt|
110
- title = title.gsub( alt[0], alt[1] )
111
- end
112
-
113
- title
114
- end
@@ -1,6 +1,6 @@
1
1
 
2
2
  module TextUtils
3
3
 
4
- VERSION = '0.5.9'
4
+ VERSION = '0.5.10'
5
5
 
6
6
  end # module TextUtils
data/lib/textutils.rb CHANGED
@@ -21,10 +21,13 @@ require 'textutils/filter/comment_filter'
21
21
  require 'textutils/filter/erb_django_filter'
22
22
  require 'textutils/filter/erb_filter'
23
23
 
24
+ require 'textutils/helper/unicode_helper'
25
+ require 'textutils/helper/title_helper'
26
+
24
27
  require 'textutils/utils'
25
28
  require 'textutils/reader/code_reader'
26
29
  require 'textutils/reader/hash_reader'
27
30
  require 'textutils/reader/line_reader'
28
31
  require 'textutils/reader/values_reader'
29
-
32
+ require 'textutils/reader/fixture_reader'
30
33
 
data/test/helper.rb ADDED
@@ -0,0 +1,14 @@
1
+
2
+ ## $:.unshift(File.dirname(__FILE__))
3
+
4
+ ## minitest setup
5
+
6
+ # require 'minitest/unit'
7
+ require 'minitest/autorun'
8
+
9
+ # include MiniTest::Unit # lets us use TestCase instead of MiniTest::Unit::TestCase
10
+
11
+
12
+ ## our own code
13
+
14
+ require 'textutils'
@@ -0,0 +1,36 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_helper.rb
6
+ # or better
7
+ # rake test
8
+
9
+ require 'helper'
10
+
11
+ class TestHelper < MiniTest::Unit::TestCase
12
+
13
+ def test_convert_unicode_dashes
14
+
15
+ txt_in = "\u2010 \u2011 \u2212 \u2013 \u2014" # NB: unicode chars require double quoted strings
16
+ txt_out = '- - - - -'
17
+
18
+ assert( txt_out == TextUtils.convert_unicode_dashes_to_plain_ascii( txt_in ) )
19
+ end
20
+
21
+
22
+ def test_title_to_key
23
+
24
+ txt_io = [
25
+ [ 'São Paulo', 'saopaulo' ],
26
+ [ 'São Gonçalo', 'saogoncalo' ],
27
+ [ 'Výčepní', 'vycepni' ]
28
+ ]
29
+
30
+ txt_io.each do |txt|
31
+ assert( txt[1] == TextUtils.title_to_key( txt[0] ) )
32
+ end
33
+ end
34
+
35
+
36
+ end # class TestHelper
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textutils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.9
4
+ version: 0.5.10
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-05-05 00:00:00.000000000 Z
12
+ date: 2013-05-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: logutils
16
- requirement: &84970780 !ruby/object:Gem::Requirement
16
+ requirement: &72786300 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0.5'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *84970780
24
+ version_requirements: *72786300
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rdoc
27
- requirement: &84970560 !ruby/object:Gem::Requirement
27
+ requirement: &72786080 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '3.10'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *84970560
35
+ version_requirements: *72786080
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: hoe
38
- requirement: &84970340 !ruby/object:Gem::Requirement
38
+ requirement: &72785860 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,7 +43,7 @@ dependencies:
43
43
  version: '3.3'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *84970340
46
+ version_requirements: *72785860
47
47
  description: textutils - Text Filters, Helpers, Readers and More
48
48
  email: webslideshow@googlegroups.com
49
49
  executables: []
@@ -60,12 +60,18 @@ files:
60
60
  - lib/textutils/filter/comment_filter.rb
61
61
  - lib/textutils/filter/erb_django_filter.rb
62
62
  - lib/textutils/filter/erb_filter.rb
63
+ - lib/textutils/helper/title_helper.rb
64
+ - lib/textutils/helper/unicode_helper.rb
63
65
  - lib/textutils/reader/code_reader.rb
66
+ - lib/textutils/reader/fixture_reader.rb
64
67
  - lib/textutils/reader/hash_reader.rb
65
68
  - lib/textutils/reader/line_reader.rb
66
69
  - lib/textutils/reader/values_reader.rb
67
70
  - lib/textutils/utils.rb
68
71
  - lib/textutils/version.rb
72
+ - test/helper.rb
73
+ - test/test_helper.rb
74
+ - .gemtest
69
75
  homepage: http://geraldb.github.com/textutils
70
76
  licenses:
71
77
  - Public Domain
@@ -93,4 +99,5 @@ rubygems_version: 1.8.17
93
99
  signing_key:
94
100
  specification_version: 3
95
101
  summary: textutils - Text Filters, Helpers, Readers and More
96
- test_files: []
102
+ test_files:
103
+ - test/test_helper.rb