RubyGems - textutils - Versions diffs - 0.5.9 → 0.5.10 - Mend

textutils 0.5.9 → 0.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

data/.gemtest +0 -0
data/Manifest.txt +5 -0
data/lib/textutils/filter/comment_filter.rb +1 -1
data/lib/textutils/helper/title_helper.rb +155 -0
data/lib/textutils/helper/unicode_helper.rb +53 -0
data/lib/textutils/reader/code_reader.rb +2 -0
data/lib/textutils/reader/fixture_reader.rb +48 -0
data/lib/textutils/reader/hash_reader.rb +2 -0
data/lib/textutils/reader/line_reader.rb +3 -0
data/lib/textutils/reader/values_reader.rb +4 -96
data/lib/textutils/utils.rb +14 -96
data/lib/textutils/version.rb +1 -1
data/lib/textutils.rb +4 -1
data/test/helper.rb +14 -0
data/test/test_helper.rb +36 -0
metadata +16 -9

data/.gemtest ADDED Viewed

File without changes

data/Manifest.txt CHANGED Viewed

@@ -7,9 +7,14 @@ lib/textutils/filter/code_filter.rb
 lib/textutils/filter/comment_filter.rb
 lib/textutils/filter/erb_django_filter.rb
 lib/textutils/filter/erb_filter.rb
+lib/textutils/helper/title_helper.rb
+lib/textutils/helper/unicode_helper.rb
 lib/textutils/reader/code_reader.rb
+lib/textutils/reader/fixture_reader.rb
 lib/textutils/reader/hash_reader.rb
 lib/textutils/reader/line_reader.rb
 lib/textutils/reader/values_reader.rb
 lib/textutils/utils.rb
 lib/textutils/version.rb
+test/helper.rb
+test/test_helper.rb

data/lib/textutils/filter/comment_filter.rb CHANGED Viewed

@@ -2,7 +2,7 @@
 module TextUtils
   module Filter
-def comments_percent_style( content, options={} )
+  def comments_percent_style( content, options={} )
     # remove comments
     # % comments

data/lib/textutils/helper/title_helper.rb ADDED Viewed

@@ -0,0 +1,155 @@
+# encoding: utf-8
+module TextUtils
+  module TitleHelper
+  def title_to_key( title )
+   ## NB: used in/moved from readers/values_reader.rb
+      ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
+      key = title.downcase
+      ### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
+      key = key.gsub( /\[.+\]/, '' )
+      ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
+      key = key.gsub( /\(.+\)/, '' )
+      ## remove optional longer title part in {} e.g. Ottakringer {Bio} or {Alkoholfrei}
+      ## todo: use for autotags? e.g. {Bio} => bio
+      key = key.gsub( /\{.+\}/, '' )
+      ## remove all whitespace and punctuation
+      key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
+      ## remove special chars (e.g. %°&)
+      key = key.gsub( /[%&°]/, '' )
+      ##  turn accented char into ascii look alike if possible
+      ##
+      ## todo: add some more
+      ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references  for more
+      ## todo: add unicode codepoint name
+      alternatives = [
+        ['ß', 'ss'],
+        ['æ', 'ae'],
+        ['ä', 'ae'],
+        ['ā', 'a' ],  # e.g. Liepājas
+        ['á', 'a' ],  # e.g. Bogotá, Králové
+        ['ã', 'a' ],  # e.g  São Paulo
+        ['ă', 'a' ],  # e.g. Chișinău
+        ['â', 'a' ],  # e.g  Goiânia
+        ['å', 'a' ],  # e.g. Vålerenga
+        ['ą', 'a' ],  # e.g. Śląsk
+        ['ç', 'c' ],  # e.g. São Gonçalo, Iguaçu, Neftçi
+        ['ć', 'c' ],  # e.g. Budućnost
+        ['č', 'c' ],  # e.g. Tradiční, Výčepní
+        ['é', 'e' ],  # e.g. Vélez, Králové
+        ['è', 'e' ],  # e.g. Rivières
+        ['ê', 'e' ],  # e.g. Grêmio
+        ['ě', 'e' ],  # e.g. Budějovice
+        ['ĕ', 'e' ],  # e.g. Svĕtlý
+        ['ė', 'e' ],  # e.g. Vėtra
+        ['ë', 'e' ],  # e.g. Skënderbeu
+        ['ğ', 'g' ],  # e.g. Qarabağ
+        ['ì', 'i' ],  # e.g. Potosì
+        ['í', 'i' ],  # e.g. Ústí
+        ['ł', 'l' ],  # e.g. Wisła, Wrocław
+        ['ñ', 'n' ],  # e.g. Porteño
+        ['ň', 'n' ],  # e.g. Plzeň, Třeboň
+        ['ö', 'oe'],
+        ['ő', 'o' ],  # e.g. Győri
+        ['ó', 'o' ],  # e.g. Colón, Łódź, Kraków
+        ['õ', 'o' ],  # e.g. Nõmme
+        ['ø', 'o' ],  # e.g. Fuglafjørdur, København
+        ['ř', 'r' ],  # e.g. Třeboň
+        ['ș', 's' ],  # e.g. Chișinău, București
+        ['ş', 's' ],  # e.g. Beşiktaş
+        ['š', 's' ],  # e.g. Košice
+        ['ť', 't' ],  # e.g. Měšťan
+        ['ü', 'ue'],
+        ['ú', 'u' ],  # e.g. Fútbol
+        ['ū', 'u' ],  # e.g. Sūduva
+        ['ů', 'u' ],  # e.g. Sládkův
+        ['ı', 'u' ],  # e.g. Bakı   # use u?? (Baku) why-why not?
+        ['ý', 'y' ],  # e.g. Nefitrovaný
+        ['ź', 'z' ],  # e.g. Łódź
+        ['ž', 'z' ],  # e.g. Domžale, Petržalka
+        ['Č', 'c' ],  # e.g. České
+        ['İ', 'i' ],  # e.g. İnter
+        ['Í', 'i' ],  # e.g. ÍBV
+        ['Ł', 'l' ],  # e.g. Łódź
+        ['Ö', 'oe' ], # e.g. Örebro
+        ['Ř', 'r' ],  # e.g. Řezák
+        ['Ś', 's' ],  # e.g. Śląsk
+        ['Š', 's' ],  # e.g. MŠK
+        ['Ş', 's' ],  # e.g. Şüvälan
+        ['Ú', 'u' ],  # e.g. Ústí, Újpest
+        ['Ž', 'z' ]   # e.g. Žilina
+      ]
+      alternatives.each do |alt|
+        key = key.gsub( alt[0], alt[1] )
+      end
+      key
+  end # method title_to_key
+  def title_esc_regex( title_unescaped )
+      ##  escape regex special chars e.g. . to \. and ( to \( etc.
+      # e.g. Benfica Lis.
+      # e.g. Club Atlético Colón (Santa Fe)
+      ## NB: cannot use Regexp.escape! will escape space '' to '\ '
+      ## title = Regexp.escape( title_unescaped )
+      title = title_unescaped.gsub( '.', '\.' )
+      title = title.gsub( '(', '\(' )
+      title = title.gsub( ')', '\)' )
+      ##  match accented char with or without accents
+      ##  add (ü|ue) etc.
+      ## also make - optional change to (-| ) e.g. Blau-Weiss == Blau Weiss
+      ## todo: add some more
+      ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references  for more
+      ##
+      ##  reuse for all readers!
+      alternatives = [
+        ['-', '(-| )'],  ## e.g. Blau-Weiß Linz
+        ['æ', '(æ|ae)'],  ## e.g.
+        ['á', '(á|a)'],  ## e.g. Bogotá, Sársfield
+        ['ã', '(ã|a)'],  ## e.g  São Paulo
+        ['ä', '(ä|ae)'],  ## e.g.
+        ['ç', '(ç|c)'],  ## e.g. Fenerbahçe
+        ['é', '(é|e)'],  ## e.g. Vélez
+        ['ê', '(ê|e)'],  ## e.g. Grêmio
+        ['ñ', '(ñ|n)'],  ## e.g. Porteño
+        ['ň', '(ň|n)'],  ## e.g. Plzeň
+        ['Ö', '(Ö|Oe)'], ## e.g. Österreich
+        ['ö', '(ö|oe)'],  ## e.g. Mönchengladbach
+        ['ó', '(ó|o)'],   ## e.g. Colón
+        ['ș', '(ș|s)'],   ## e.g. Bucarești
+        ['ß', '(ß|ss)'],  ## e.g. Blau-Weiß Linz
+        ['ü', '(ü|ue)'],  ## e.g.
+        ['ú', '(ú|u)']  ## e.g. Fútbol
+      ]
+      alternatives.each do |alt|
+        title = title.gsub( alt[0], alt[1] )
+      end
+      title
+  end
+  end # module TitleHelper
+end # module TextUtils

data/lib/textutils/helper/unicode_helper.rb ADDED Viewed

@@ -0,0 +1,53 @@
+# encoding: utf-8
+module TextUtils
+  module UnicodeHelper
+  # NB:
+  #  U_HYPHEN_MINUS is standard ascii hyphen/minus e.g. -
+  #
+  #  see en.wikipedia.org/wiki/Dash
+  U_HYPHEN              = "\u2010"  # unambigous hyphen
+  U_NON_BREAKING_HYPHEN = "\u2011"  # unambigous non-breaking hyphen
+  U_MINUS               = "\u2212"  # unambigous minus sign (html => &minus;)
+  U_NDASH               = "\u2013"  # ndash (html => &ndash; ascii => --)
+  U_MDASH               = "\u2014"  # mdash (html => &mdash; ascii => ---)
+  def convert_unicode_dashes_to_plain_ascii( text, opts = {} )
+    text = text.gsub( /(#{U_HYPHEN}|#{U_NON_BREAKING_HYPHEN}|#{U_MINUS}|#{U_NDASH}|#{U_MDASH})/ ) do |_|
+      # puts "found U+#{'%04X' % $1.ord} (#{$1})"
+      msg = ''
+      if $1 == U_HYPHEN
+        msg << "found hyhpen U+2010 (#{$1})"
+      elsif $1 == U_NON_BREAKING_HYPHEN
+        msg << "found non_breaking_hyhpen U+2011 (#{$1})"
+      elsif $1 == U_MINUS
+        msg << "found minus U+2212 (#{$1})"
+      elsif $1 == U_NDASH
+        msg << "found ndash U+2013 (#{$1})"
+      elsif $1 == U_MDASH
+        msg << "found mdash U+2014 (#{$1})"
+      else
+        msg << "found unknown unicode dash U+#{'%04X' % $1.ord} (#{$1})"
+      end
+      msg << " in file >#{opts[:path]}<"   if opts[:path]
+      msg << "; converting to plain ascii hyphen_minus (-)"
+      puts "*** warning: #{msg}"
+      '-'
+    end
+    text
+  end # method convert_unicode_dashes_to_plain_ascii
+  end # module UnicodeHelper
+end # module TextUtils

data/lib/textutils/reader/code_reader.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # encoding: utf-8
+# fix: move into TextUtils namespace/module!!
 class CodeReader
   include LogUtils::Logging

data/lib/textutils/reader/fixture_reader.rb ADDED Viewed

@@ -0,0 +1,48 @@
+# encoding: utf-8
+### read in a list of fixtures (that is, fixture names/files)
+# fix: move into TextUtils namespace/module!!
+class FixtureReader
+  include LogUtils::Logging
+  def initialize( path )
+    @path = path
+    ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
+    ## - see textutils/utils.rb
+    text = File.read_utf8( @path )
+    hash = YAML.load( text )
+    ### build up array for fixtures from hash
+    @ary = []
+    hash.each do |key_wild, value_wild|
+      key   = key_wild.to_s.strip
+      logger.debug "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value_wild}<<"
+      if value_wild.kind_of?( String ) # assume single fixture name
+        @ary << value_wild
+      elsif value_wild.kind_of?( Array ) # assume array of fixture names as strings
+        @ary = ary + value_wild
+      else
+        logger.error "unknow fixture type in setup (yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value_wild}<<); skipping"
+      end
+    end
+    logger.debug "fixture setup:"
+    logger.debug @ary.to_json
+  end
+  def each
+    @ary.each do |fixture|
+      yield( fixture )
+    end
+  end # method each
+end # class FixtureReader

data/lib/textutils/reader/hash_reader.rb CHANGED Viewed

@@ -1,6 +1,8 @@
 # encoding: utf-8
+# fix: move into TextUtils namespace/module!!
 class HashReader
   include LogUtils::Logging

data/lib/textutils/reader/line_reader.rb CHANGED Viewed

@@ -4,6 +4,9 @@
 ## fix/todo: move to/merge into LineReader itself
 #   e.g. use  fromString c'tor ??? or similar??
+# fix: move into TextUtils namespace/module!!
 class StringLineReader
   include LogUtils::Logging

data/lib/textutils/reader/values_reader.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # encoding: utf-8
+# fix: move into TextUtils namespace/module!!
 class ValuesReader
   include LogUtils::Logging
@@ -150,7 +152,7 @@ class ValuesReader
       if key_col == '<auto>'
         ## autogenerate key from first title
-        key_col = title_to_key( titles[0] )
+        key_col = TextUtils.title_to_key( titles[0] )
         logger.debug "   autogen key >#{key_col}< from title >#{titles[0]}<, textutils version #{TextUtils::VERSION}"
       end
@@ -273,7 +275,7 @@ class ValuesReader
       if key_col == '<auto>'
         ## autogenerate key from first title
-        key_col = title_to_key( titles[0] )
+        key_col = TextUtils.title_to_key( titles[0] )
         logger.debug "   autogen key >#{key_col}< from title >#{titles[0]}<, textutils version #{TextUtils::VERSION}"
       end
@@ -286,101 +288,7 @@ class ValuesReader
     end # each lines
   end # method each_line
-  def title_to_key( title )
-      ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
-      key = title.downcase
-      ### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
-      key = key.gsub( /\[.+\]/, '' )
-      ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
-      key = key.gsub( /\(.+\)/, '' )
-      ## remove optional longer title part in {} e.g. Ottakringer {Bio} or {Alkoholfrei}
-      ## todo: use for autotags? e.g. {Bio} => bio
-      key = key.gsub( /\{.+\}/, '' )
-      ## remove all whitespace and punctuation
-      key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
-      ## remove special chars (e.g. %°&)
-      key = key.gsub( /[%&°]/, '' )
-      ##  turn accented char into ascii look alike if possible
-      ##
-      ## todo: add some more
-      ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references  for more
-      ## todo: add unicode codepoint name
-      alternatives = [
-        ['ß', 'ss'],
-        ['æ', 'ae'],
-        ['ä', 'ae'],
-        ['ā', 'a' ],  # e.g. Liepājas
-        ['á', 'a' ],  # e.g. Bogotá, Králové
-        ['ã', 'a' ],  # e.g  São Paulo
-        ['ă', 'a' ],  # e.g. Chișinău
-        ['â', 'a' ],  # e.g  Goiânia
-        ['å', 'a' ],  # e.g. Vålerenga
-        ['ą', 'a' ],  # e.g. Śląsk
-        ['ç', 'c' ],  # e.g. São Gonçalo, Iguaçu, Neftçi
-        ['ć', 'c' ],  # e.g. Budućnost
-        ['č', 'c' ],  # e.g. Tradiční, Výčepní
-        ['é', 'e' ],  # e.g. Vélez, Králové
-        ['è', 'e' ],  # e.g. Rivières
-        ['ê', 'e' ],  # e.g. Grêmio
-        ['ě', 'e' ],  # e.g. Budějovice
-        ['ĕ', 'e' ],  # e.g. Svĕtlý
-        ['ė', 'e' ],  # e.g. Vėtra
-        ['ë', 'e' ],  # e.g. Skënderbeu
-        ['ğ', 'g' ],  # e.g. Qarabağ
-        ['ì', 'i' ],  # e.g. Potosì
-        ['í', 'i' ],  # e.g. Ústí
-        ['ł', 'l' ],  # e.g. Wisła, Wrocław
-        ['ñ', 'n' ],  # e.g. Porteño
-        ['ň', 'n' ],  # e.g. Plzeň, Třeboň
-        ['ö', 'oe'],
-        ['ő', 'o' ],  # e.g. Győri
-        ['ó', 'o' ],  # e.g. Colón, Łódź, Kraków
-        ['õ', 'o' ],  # e.g. Nõmme
-        ['ø', 'o' ],  # e.g. Fuglafjørdur, København
-        ['ř', 'r' ],  # e.g. Třeboň
-        ['ș', 's' ],  # e.g. Chișinău, București
-        ['ş', 's' ],  # e.g. Beşiktaş
-        ['š', 's' ],  # e.g. Košice
-        ['ť', 't' ],  # e.g. Měšťan
-        ['ü', 'ue'],
-        ['ú', 'u' ],  # e.g. Fútbol
-        ['ū', 'u' ],  # e.g. Sūduva
-        ['ů', 'u' ],  # e.g. Sládkův
-        ['ı', 'u' ],  # e.g. Bakı   # use u?? (Baku) why-why not?
-        ['ý', 'y' ],  # e.g. Nefitrovaný
-        ['ź', 'z' ],  # e.g. Łódź
-        ['ž', 'z' ],  # e.g. Domžale, Petržalka
-        ['Č', 'c' ],  # e.g. České
-        ['İ', 'i' ],  # e.g. İnter
-        ['Í', 'i' ],  # e.g. ÍBV
-        ['Ł', 'l' ],  # e.g. Łódź
-        ['Ö', 'oe' ], # e.g. Örebro
-        ['Ř', 'r' ],  # e.g. Řezák
-        ['Ś', 's' ],  # e.g. Śląsk
-        ['Š', 's' ],  # e.g. MŠK
-        ['Ş', 's' ],  # e.g. Şüvälan
-        ['Ú', 'u' ],  # e.g. Ústí, Újpest
-        ['Ž', 'z' ]   # e.g. Žilina
-      ]
-      alternatives.each do |alt|
-        key = key.gsub( alt[0], alt[1] )
-      end
-      key
-  end # method title_to_key
 end # class ValuesReader

data/lib/textutils/utils.rb CHANGED Viewed

@@ -1,6 +1,15 @@
 # encoding: utf-8
+module TextUtils
+  # make helpers available as class methods e.g. TextUtils.convert_unicode_dashes_to_plain_ascii
+  extend UnicodeHelper
+  extend TitleHelper
+end
 class File
   def self.read_utf8( path )
     text = open( path, 'r:bom|utf-8' ) do |file|
@@ -8,107 +17,16 @@ class File
     end
     # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
-    text = convert_unicode_dashes_to_plain_ascii( text, path: path )
+    text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
     text
   end
 end # class File
-# NB:
-#  U_HYPHEN_MINUS is standard ascii hyphen/minus e.g. -
-#
-#  see en.wikipedia.org/wiki/Dash
-U_HYPHEN              = "\u2010"  # unambigous hyphen
-U_NON_BREAKING_HYPHEN = "\u2011"  # unambigous non-breaking hyphen
-U_MINUS               = "\u2212"  # unambigous minus sign (html => &minus;)
-U_NDASH               = "\u2013"  # ndash (html => &ndash; ascii => --)
-U_MDASH               = "\u2014"  # mdash (html => &mdash; ascii => ---)
-  def convert_unicode_dashes_to_plain_ascii( text, opts = {} )
-    text = text.gsub( /(#{U_HYPHEN}|#{U_NON_BREAKING_HYPHEN}|#{U_MINUS}|#{U_NDASH}|#{U_MDASH})/ ) do |_|
-      # puts "found U+#{'%04X' % $1.ord} (#{$1})"
-      msg = ''
-      if $1 == U_HYPHEN
-        msg << "found hyhpen U+2010 (#{$1})"
-      elsif $1 == U_NON_BREAKING_HYPHEN
-        msg << "found non_breaking_hyhpen U+2011 (#{$1})"
-      elsif $1 == U_MINUS
-        msg << "found minus U+2212 (#{$1})"
-      elsif $1 == U_NDASH
-        msg << "found ndash U+2013 (#{$1})"
-      elsif $1 == U_MDASH
-        msg << "found mdash U+2014 (#{$1})"
-      else
-        msg << "found unknown unicode dash U+#{'%04X' % $1.ord} (#{$1})"
-      end
-      msg << " in file >#{opts[:path]}<"   if opts[:path]
-      msg << "; converting to plain ascii hyphen_minus (-)"
-      puts "*** warning: #{msg}"
+def title_esc_regex( title_unescaped )
+  puts "*** warn: depreceated fn call: use TextUtils.title_esc_regex() or include TextUtils::TitleHelpers"
+  TextUtils.title_esc_regex( title_unescaped )
+end
-      '-'
-    end
-    text
-  end # method convert_unicode_dashes_to_plain_ascii
-  ############
-  ### fix/todo: share helper for all text readers/parsers- where to put it?
-  ###
-  def title_esc_regex( title_unescaped )
-      ##  escape regex special chars e.g. . to \. and ( to \( etc.
-      # e.g. Benfica Lis.
-      # e.g. Club Atlético Colón (Santa Fe)
-      ## NB: cannot use Regexp.escape! will escape space '' to '\ '
-      ## title = Regexp.escape( title_unescaped )
-      title = title_unescaped.gsub( '.', '\.' )
-      title = title.gsub( '(', '\(' )
-      title = title.gsub( ')', '\)' )
-      ##  match accented char with or without accents
-      ##  add (ü|ue) etc.
-      ## also make - optional change to (-| ) e.g. Blau-Weiss == Blau Weiss
-      ## todo: add some more
-      ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references  for more
-      ##
-      ##  reuse for all readers!
-      alternatives = [
-        ['-', '(-| )'],  ## e.g. Blau-Weiß Linz
-        ['æ', '(æ|ae)'],  ## e.g.
-        ['á', '(á|a)'],  ## e.g. Bogotá, Sársfield
-        ['ã', '(ã|a)'],  ## e.g  São Paulo
-        ['ä', '(ä|ae)'],  ## e.g.
-        ['ç', '(ç|c)'],  ## e.g. Fenerbahçe
-        ['é', '(é|e)'],  ## e.g. Vélez
-        ['ê', '(ê|e)'],  ## e.g. Grêmio
-        ['ñ', '(ñ|n)'],  ## e.g. Porteño
-        ['ň', '(ň|n)'],  ## e.g. Plzeň
-        ['Ö', '(Ö|Oe)'], ## e.g. Österreich
-        ['ö', '(ö|oe)'],  ## e.g. Mönchengladbach
-        ['ó', '(ó|o)'],   ## e.g. Colón
-        ['ș', '(ș|s)'],   ## e.g. Bucarești
-        ['ß', '(ß|ss)'],  ## e.g. Blau-Weiß Linz
-        ['ü', '(ü|ue)'],  ## e.g.
-        ['ú', '(ú|u)']  ## e.g. Fútbol
-      ]
-      alternatives.each do |alt|
-        title = title.gsub( alt[0], alt[1] )
-      end
-      title
-  end

data/lib/textutils/version.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module TextUtils
-  VERSION = '0.5.9'
+  VERSION = '0.5.10'
 end   # module TextUtils

data/lib/textutils.rb CHANGED Viewed

@@ -21,10 +21,13 @@ require 'textutils/filter/comment_filter'
 require 'textutils/filter/erb_django_filter'
 require 'textutils/filter/erb_filter'
+require 'textutils/helper/unicode_helper'
+require 'textutils/helper/title_helper'
 require 'textutils/utils'
 require 'textutils/reader/code_reader'
 require 'textutils/reader/hash_reader'
 require 'textutils/reader/line_reader'
 require 'textutils/reader/values_reader'
+require 'textutils/reader/fixture_reader'

data/test/helper.rb ADDED Viewed

@@ -0,0 +1,14 @@
+## $:.unshift(File.dirname(__FILE__))
+## minitest setup
+# require 'minitest/unit'
+require 'minitest/autorun'
+# include MiniTest::Unit  # lets us use TestCase instead of MiniTest::Unit::TestCase
+## our own code
+require 'textutils'

data/test/test_helper.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# encoding: utf-8
+###
+#  to run use
+#     ruby -I ./lib -I ./test test/test_helper.rb
+#  or better
+#     rake test
+require 'helper'
+class TestHelper < MiniTest::Unit::TestCase
+  def test_convert_unicode_dashes
+    txt_in  = "\u2010 \u2011 \u2212 \u2013 \u2014"  # NB: unicode chars require double quoted strings
+    txt_out = '- - - - -'
+    assert( txt_out == TextUtils.convert_unicode_dashes_to_plain_ascii( txt_in ) )
+  end
+  def test_title_to_key
+    txt_io = [
+      [ 'São Paulo',   'saopaulo' ],
+      [ 'São Gonçalo', 'saogoncalo' ],
+      [ 'Výčepní',     'vycepni' ]
+    ]
+    txt_io.each do |txt|
+      assert( txt[1] == TextUtils.title_to_key( txt[0] ) )
+    end
+  end
+end # class TestHelper

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: textutils
 version: !ruby/object:Gem::Version
-  version: 0.5.9
+  version: 0.5.10
   prerelease:
 platform: ruby
 authors:
@@ -9,11 +9,11 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-05-05 00:00:00.000000000 Z
+date: 2013-05-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: logutils
-  requirement: &84970780 !ruby/object:Gem::Requirement
+  requirement: &72786300 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
@@ -21,10 +21,10 @@ dependencies:
         version: '0.5'
   type: :runtime
   prerelease: false
-  version_requirements: *84970780
+  version_requirements: *72786300
 - !ruby/object:Gem::Dependency
   name: rdoc
-  requirement: &84970560 !ruby/object:Gem::Requirement
+  requirement: &72786080 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
@@ -32,10 +32,10 @@ dependencies:
         version: '3.10'
   type: :development
   prerelease: false
-  version_requirements: *84970560
+  version_requirements: *72786080
 - !ruby/object:Gem::Dependency
   name: hoe
-  requirement: &84970340 !ruby/object:Gem::Requirement
+  requirement: &72785860 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
@@ -43,7 +43,7 @@ dependencies:
         version: '3.3'
   type: :development
   prerelease: false
-  version_requirements: *84970340
+  version_requirements: *72785860
 description: textutils - Text Filters, Helpers, Readers and More
 email: webslideshow@googlegroups.com
 executables: []
@@ -60,12 +60,18 @@ files:
 - lib/textutils/filter/comment_filter.rb
 - lib/textutils/filter/erb_django_filter.rb
 - lib/textutils/filter/erb_filter.rb
+- lib/textutils/helper/title_helper.rb
+- lib/textutils/helper/unicode_helper.rb
 - lib/textutils/reader/code_reader.rb
+- lib/textutils/reader/fixture_reader.rb
 - lib/textutils/reader/hash_reader.rb
 - lib/textutils/reader/line_reader.rb
 - lib/textutils/reader/values_reader.rb
 - lib/textutils/utils.rb
 - lib/textutils/version.rb
+- test/helper.rb
+- test/test_helper.rb
+- .gemtest
 homepage: http://geraldb.github.com/textutils
 licenses:
 - Public Domain
@@ -93,4 +99,5 @@ rubygems_version: 1.8.17
 signing_key:
 specification_version: 3
 summary: textutils - Text Filters, Helpers, Readers and More
-test_files: []
+test_files:
+- test/test_helper.rb