RubyGems - textutils - Versions diffs - 0.5.9 → 0.5.10 - Mend

textutils 0.5.9 → 0.5.10

Files changed (16) hide show

data/.gemtest +0 -0
data/Manifest.txt +5 -0
data/lib/textutils/filter/comment_filter.rb +1 -1
data/lib/textutils/helper/title_helper.rb +155 -0
data/lib/textutils/helper/unicode_helper.rb +53 -0
data/lib/textutils/reader/code_reader.rb +2 -0
data/lib/textutils/reader/fixture_reader.rb +48 -0
data/lib/textutils/reader/hash_reader.rb +2 -0
data/lib/textutils/reader/line_reader.rb +3 -0
data/lib/textutils/reader/values_reader.rb +4 -96
data/lib/textutils/utils.rb +14 -96
data/lib/textutils/version.rb +1 -1
data/lib/textutils.rb +4 -1
data/test/helper.rb +14 -0
data/test/test_helper.rb +36 -0
metadata +16 -9

data/.gemtest ADDED Viewed

File without changes

data/Manifest.txt CHANGED Viewed

@@ -7,9 +7,14 @@ lib/textutils/filter/code_filter.rb
 lib/textutils/filter/comment_filter.rb
 lib/textutils/filter/erb_django_filter.rb
 lib/textutils/filter/erb_filter.rb
+lib/textutils/helper/title_helper.rb
+lib/textutils/helper/unicode_helper.rb
 lib/textutils/reader/code_reader.rb
+lib/textutils/reader/fixture_reader.rb
 lib/textutils/reader/hash_reader.rb
 lib/textutils/reader/line_reader.rb
 lib/textutils/reader/values_reader.rb
 lib/textutils/utils.rb
 lib/textutils/version.rb
+test/helper.rb
+test/test_helper.rb

data/lib/textutils/filter/comment_filter.rb CHANGED Viewed

@@ -2,7 +2,7 @@
 module TextUtils
   module Filter
-def comments_percent_style( content, options={} )
+  def comments_percent_style( content, options={} )
     # remove comments
     # % comments

data/lib/textutils/helper/title_helper.rb ADDED Viewed

@@ -0,0 +1,155 @@
+# encoding: utf-8
+module TextUtils
+  module TitleHelper
+  def title_to_key( title )
+   ## NB: used in/moved from readers/values_reader.rb
+      ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
+      key = title.downcase
+      ### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
+      key = key.gsub( /\[.+\]/, '' )
+      ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
+      key = key.gsub( /\(.+\)/, '' )
+      ## remove optional longer title part in {} e.g. Ottakringer {Bio} or {Alkoholfrei}
+      ## todo: use for autotags? e.g. {Bio} => bio
+      key = key.gsub( /\{.+\}/, '' )
+      ## remove all whitespace and punctuation
+      key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
+      ## remove special chars (e.g. %°&)
+      key = key.gsub( /[%&°]/, '' )
+      ##  turn accented char into ascii look alike if possible
+      ##
+      ## todo: add some more
+      ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references  for more
+      ## todo: add unicode codepoint name
+      alternatives = [
+        ['ß', 'ss'],
+        ['æ', 'ae'],
+        ['ä', 'ae'],
+        ['ā', 'a' ],  # e.g. Liepājas
+        ['á', 'a' ],  # e.g. Bogotá, Králové
+        ['ã', 'a' ],  # e.g  São Paulo
+        ['ă', 'a' ],  # e.g. Chișinău
+        ['â', 'a' ],  # e.g  Goiânia
+        ['å', 'a' ],  # e.g. Vålerenga
+        ['ą', 'a' ],  # e.g. Śląsk
+        ['ç', 'c' ],  # e.g. São Gonçalo, Iguaçu, Neftçi
+        ['ć', 'c' ],  # e.g. Budućnost
+        ['č', 'c' ],  # e.g. Tradiční, Výčepní
+        ['é', 'e' ],  # e.g. Vélez, Králové
+        ['è', 'e' ],  # e.g. Rivières
+        ['ê', 'e' ],  # e.g. Grêmio
+        ['ě', 'e' ],  # e.g. Budějovice
+        ['ĕ', 'e' ],  # e.g. Svĕtlý
+        ['ė', 'e' ],  # e.g. Vėtra
+        ['ë', 'e' ],  # e.g. Skënderbeu
+        ['ğ', 'g' ],  # e.g. Qarabağ
+        ['ì', 'i' ],  # e.g. Potosì
+        ['í', 'i' ],  # e.g. Ústí
+        ['ł', 'l' ],  # e.g. Wisła, Wrocław
+        ['ñ', 'n' ],  # e.g. Porteño
+        ['ň', 'n' ],  # e.g. Plzeň, Třeboň
+        ['ö', 'oe'],
+        ['ő', 'o' ],  # e.g. Győri
+        ['ó', 'o' ],  # e.g. Colón, Łódź, Kraków
+        ['õ', 'o' ],  # e.g. Nõmme
+        ['ø', 'o' ],  # e.g. Fuglafjørdur, København
+        ['ř', 'r' ],  # e.g. Třeboň
+        ['ș', 's' ],  # e.g. Chișinău, București
+        ['ş', 's' ],  # e.g. Beşiktaş
+        ['š', 's' ],  # e.g. Košice
+        ['ť', 't' ],  # e.g. Měšťan
+        ['ü', 'ue'],
+        ['ú', 'u' ],  # e.g. Fútbol
+        ['ū', 'u' ],  # e.g. Sūduva
+        ['ů', 'u' ],  # e.g. Sládkův
+        ['ı', 'u' ],  # e.g. Bakı   # use u?? (Baku) why-why not?
+        ['ý', 'y' ],  # e.g. Nefitrovaný
+        ['ź', 'z' ],  # e.g. Łódź
+        ['ž', 'z' ],  # e.g. Domžale, Petržalka
+        ['Č', 'c' ],  # e.g. České
+        ['İ', 'i' ],  # e.g. İnter
+        ['Í', 'i' ],  # e.g. ÍBV
+        ['Ł', 'l' ],  # e.g. Łódź
+        ['Ö', 'oe' ], # e.g. Örebro
+        ['Ř', 'r' ],  # e.g. Řezák
+        ['Ś', 's' ],  # e.g. Śląsk
+        ['Š', 's' ],  # e.g. MŠK
+        ['Ş', 's' ],  # e.g. Şüvälan
+        ['Ú', 'u' ],  # e.g. Ústí, Újpest
+        ['Ž', 'z' ]   # e.g. Žilina
+      ]
+      alternatives.each do |alt|
+        key = key.gsub( alt[0], alt[1] )
+      end
+      key
+  end # method title_to_key
+  def title_esc_regex( title_unescaped )
+      ##  escape regex special chars e.g. . to \. and ( to \( etc.
+      # e.g. Benfica Lis.
+      # e.g. Club Atlético Colón (Santa Fe)
+      ## NB: cannot use Regexp.escape! will escape space '' to '\ '
+      ## title = Regexp.escape( title_unescaped )
+      title = title_unescaped.gsub( '.', '\.' )
+      title = title.gsub( '(', '\(' )
+      title = title.gsub( ')', '\)' )
+      ##  match accented char with or without accents
+      ##  add (ü|ue) etc.
+      ## also make - optional change to (-| ) e.g. Blau-Weiss == Blau Weiss
+      ## todo: add some more
+      ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references  for more
+      ##
+      ##  reuse for all readers!
+      alternatives = [
+        ['-', '(-| )'],  ## e.g. Blau-Weiß Linz
+        ['æ', '(æ|ae)'],  ## e.g.
+        ['á', '(á|a)'],  ## e.g. Bogotá, Sársfield
+        ['ã', '(ã|a)'],  ## e.g  São Paulo
+        ['ä', '(ä|ae)'],  ## e.g.
+        ['ç', '(ç|c)'],  ## e.g. Fenerbahçe
+        ['é', '(é|e)'],  ## e.g. Vélez
+        ['ê', '(ê|e)'],  ## e.g. Grêmio
+        ['ñ', '(ñ|n)'],  ## e.g. Porteño
+        ['ň', '(ň|n)'],  ## e.g. Plzeň
+        ['Ö', '(Ö|Oe)'], ## e.g. Österreich
+        ['ö', '(ö|oe)'],  ## e.g. Mönchengladbach
+        ['ó', '(ó|o)'],   ## e.g. Colón
+        ['ș', '(ș|s)'],   ## e.g. Bucarești
+        ['ß', '(ß|ss)'],  ## e.g. Blau-Weiß Linz
+        ['ü', '(ü|ue)'],  ## e.g.
+        ['ú', '(ú|u)']  ## e.g. Fútbol
+      ]
+      alternatives.each do |alt|
+        title = title.gsub( alt[0], alt[1] )
+      end
+      title
+  end
+  end # module TitleHelper
+end # module TextUtils

data/lib/textutils/helper/unicode_helper.rb ADDED Viewed

@@ -0,0 +1,53 @@
+# encoding: utf-8
+module TextUtils
+  module UnicodeHelper
+  # NB:
+  #  U_HYPHEN_MINUS is standard ascii hyphen/minus e.g. -
+  #
+  #  see en.wikipedia.org/wiki/Dash
+  U_HYPHEN              = "\u2010"  # unambigous hyphen
+  U_NON_BREAKING_HYPHEN = "\u2011"  # unambigous non-breaking hyphen
+  U_MINUS               = "\u2212"  # unambigous minus sign (html => &minus;)
+  U_NDASH               = "\u2013"  # ndash (html => &ndash; ascii => --)
+  U_MDASH               = "\u2014"  # mdash (html => &mdash; ascii => ---)
+  def convert_unicode_dashes_to_plain_ascii( text, opts = {} )
+    text = text.gsub( /(#{U_HYPHEN}|#{U_NON_BREAKING_HYPHEN}|#{U_MINUS}|#{U_NDASH}|#{U_MDASH})/ ) do |_|
+      # puts "found U+#{'%04X' % $1.ord} (#{$1})"
+      msg = ''
+      if $1 == U_HYPHEN
+        msg << "found hyhpen U+2010 (#{$1})"
+      elsif $1 == U_NON_BREAKING_HYPHEN
+        msg << "found non_breaking_hyhpen U+2011 (#{$1})"
+      elsif $1 == U_MINUS
+        msg << "found minus U+2212 (#{$1})"
+      elsif $1 == U_NDASH
+        msg << "found ndash U+2013 (#{$1})"
+      elsif $1 == U_MDASH
+        msg << "found mdash U+2014 (#{$1})"
+      else
+        msg << "found unknown unicode dash U+#{'%04X' % $1.ord} (#{$1})"
+      end
+      msg << " in file >#{opts[:path]}<"   if opts[:path]
+      msg << "; converting to plain ascii hyphen_minus (-)"
+      puts "*** warning: #{msg}"
+      '-'
+    end
+    text
+  end # method convert_unicode_dashes_to_plain_ascii
+  end # module UnicodeHelper
+end # module TextUtils

data/lib/textutils/reader/code_reader.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # encoding: utf-8
+# fix: move into TextUtils namespace/module!!
 class CodeReader
   include LogUtils::Logging

data/lib/textutils/reader/fixture_reader.rb ADDED Viewed

@@ -0,0 +1,48 @@
+# encoding: utf-8
+### read in a list of fixtures (that is, fixture names/files)
+# fix: move into TextUtils namespace/module!!
+class FixtureReader
+  include LogUtils::Logging
+  def initialize( path )
+    @path = path
+    ## nb: assume/enfore utf-8 encoding (with or without BOM - byte order mark)
+    ## - see textutils/utils.rb
+    text = File.read_utf8( @path )
+    hash = YAML.load( text )
+    ### build up array for fixtures from hash
+    @ary = []
+    hash.each do |key_wild, value_wild|
+      key   = key_wild.to_s.strip
+      logger.debug "yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value_wild}<<"
+      if value_wild.kind_of?( String ) # assume single fixture name
+        @ary << value_wild
+      elsif value_wild.kind_of?( Array ) # assume array of fixture names as strings
+        @ary = ary + value_wild
+      else
+        logger.error "unknow fixture type in setup (yaml key:#{key_wild.class.name} >>#{key}<<, value:#{value_wild.class.name} >>#{value_wild}<<); skipping"
+      end
+    end
+    logger.debug "fixture setup:"
+    logger.debug @ary.to_json
+  end
+  def each
+    @ary.each do |fixture|
+      yield( fixture )
+    end
+  end # method each
+end # class FixtureReader

data/lib/textutils/reader/hash_reader.rb CHANGED Viewed

@@ -1,6 +1,8 @@
 # encoding: utf-8
+# fix: move into TextUtils namespace/module!!
 class HashReader
   include LogUtils::Logging

data/lib/textutils/reader/line_reader.rb CHANGED Viewed

@@ -4,6 +4,9 @@
 ## fix/todo: move to/merge into LineReader itself
 #   e.g. use  fromString c'tor ??? or similar??
+# fix: move into TextUtils namespace/module!!
 class StringLineReader
   include LogUtils::Logging

data/lib/textutils/reader/values_reader.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # encoding: utf-8
+# fix: move into TextUtils namespace/module!!
 class ValuesReader
   include LogUtils::Logging
@@ -150,7 +152,7 @@ class ValuesReader
       if key_col == '<auto>'
         ## autogenerate key from first title
-        key_col = title_to_key( titles[0] )
+        key_col = TextUtils.title_to_key( titles[0] )
         logger.debug "   autogen key >#{key_col}< from title >#{titles[0]}<, textutils version #{TextUtils::VERSION}"
       end
@@ -273,7 +275,7 @@ class ValuesReader
       if key_col == '<auto>'
         ## autogenerate key from first title
-        key_col = title_to_key( titles[0] )
+        key_col = TextUtils.title_to_key( titles[0] )
         logger.debug "   autogen key >#{key_col}< from title >#{titles[0]}<, textutils version #{TextUtils::VERSION}"
       end
@@ -286,101 +288,7 @@ class ValuesReader
     end # each lines
   end # method each_line
-  def title_to_key( title )
-      ## NB: downcase does NOT work for accented chars (thus, include in alternatives)
-      key = title.downcase
-      ### remove optional english translation in square brackets ([]) e.g. Wien [Vienna]
-      key = key.gsub( /\[.+\]/, '' )
-      ## remove optional longer title part in () e.g. Las Palmas (de Gran Canaria), Palma (de Mallorca)
-      key = key.gsub( /\(.+\)/, '' )
-      ## remove optional longer title part in {} e.g. Ottakringer {Bio} or {Alkoholfrei}
-      ## todo: use for autotags? e.g. {Bio} => bio
-      key = key.gsub( /\{.+\}/, '' )
-      ## remove all whitespace and punctuation
-      key = key.gsub( /[ \t_\-\.()\[\]'"\/]/, '' )
-      ## remove special chars (e.g. %°&)
-      key = key.gsub( /[%&°]/, '' )
-      ##  turn accented char into ascii look alike if possible
-      ##
-      ## todo: add some more
-      ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references  for more
-      ## todo: add unicode codepoint name
-      alternatives = [
-        ['ß', 'ss'],
-        ['æ', 'ae'],
-        ['ä', 'ae'],
-        ['ā', 'a' ],  # e.g. Liepājas
-        ['á', 'a' ],  # e.g. Bogotá, Králové
-        ['ã', 'a' ],  # e.g  São Paulo
-        ['ă', 'a' ],  # e.g. Chișinău
-        ['â', 'a' ],  # e.g  Goiânia
-        ['å', 'a' ],  # e.g. Vålerenga
-        ['ą', 'a' ],  # e.g. Śląsk
-        ['ç', 'c' ],  # e.g. São Gonçalo, Iguaçu, Neftçi
-        ['ć', 'c' ],  # e.g. Budućnost
-        ['č', 'c' ],  # e.g. Tradiční, Výčepní
-        ['é', 'e' ],  # e.g. Vélez, Králové
-        ['è', 'e' ],  # e.g. Rivières
-        ['ê', 'e' ],  # e.g. Grêmio
-        ['ě', 'e' ],  # e.g. Budějovice
-        ['ĕ', 'e' ],  # e.g. Svĕtlý
-        ['ė', 'e' ],  # e.g. Vėtra
-        ['ë', 'e' ],  # e.g. Skënderbeu
-        ['ğ', 'g' ],  # e.g. Qarabağ
-        ['ì', 'i' ],  # e.g. Potosì
-        ['í', 'i' ],  # e.g. Ústí
-        ['ł', 'l' ],  # e.g. Wisła, Wrocław
-        ['ñ', 'n' ],  # e.g. Porteño
-        ['ň', 'n' ],  # e.g. Plzeň, Třeboň
-        ['ö', 'oe'],
-        ['ő', 'o' ],  # e.g. Győri
-        ['ó', 'o' ],  # e.g. Colón, Łódź, Kraków
-        ['õ', 'o' ],  # e.g. Nõmme
-        ['ø', 'o' ],  # e.g. Fuglafjørdur, København
-        ['ř', 'r' ],  # e.g. Třeboň
-        ['ș', 's' ],  # e.g. Chișinău, București
-        ['ş', 's' ],  # e.g. Beşiktaş
-        ['š', 's' ],  # e.g. Košice
-        ['ť', 't' ],  # e.g. Měšťan
-        ['ü', 'ue'],
-        ['ú', 'u' ],  # e.g. Fútbol
-        ['ū', 'u' ],  # e.g. Sūduva
-        ['ů', 'u' ],  # e.g. Sládkův
-        ['ı', 'u' ],  # e.g. Bakı   # use u?? (Baku) why-why not?
-        ['ý', 'y' ],  # e.g. Nefitrovaný
-        ['ź', 'z' ],  # e.g. Łódź
-        ['ž', 'z' ],  # e.g. Domžale, Petržalka
-        ['Č', 'c' ],  # e.g. České
-        ['İ', 'i' ],  # e.g. İnter
-        ['Í', 'i' ],  # e.g. ÍBV
-        ['Ł', 'l' ],  # e.g. Łódź
-        ['Ö', 'oe' ], # e.g. Örebro
-        ['Ř', 'r' ],  # e.g. Řezák
-        ['Ś', 's' ],  # e.g. Śląsk
-        ['Š', 's' ],  # e.g. MŠK
-        ['Ş', 's' ],  # e.g. Şüvälan
-        ['Ú', 'u' ],  # e.g. Ústí, Újpest
-        ['Ž', 'z' ]   # e.g. Žilina
-      ]
-      alternatives.each do |alt|
-        key = key.gsub( alt[0], alt[1] )
-      end
-      key
-  end # method title_to_key
 end # class ValuesReader

data/lib/textutils/utils.rb CHANGED Viewed

@@ -1,6 +1,15 @@
 # encoding: utf-8
+module TextUtils
+  # make helpers available as class methods e.g. TextUtils.convert_unicode_dashes_to_plain_ascii
+  extend UnicodeHelper
+  extend TitleHelper
+end
 class File
   def self.read_utf8( path )
     text = open( path, 'r:bom|utf-8' ) do |file|
@@ -8,107 +17,16 @@ class File
     end
     # NB: for convenience: convert fancy unicode dashes/hyphens to plain ascii hyphen-minus
-    text = convert_unicode_dashes_to_plain_ascii( text, path: path )
+    text = TextUtils.convert_unicode_dashes_to_plain_ascii( text, path: path )
     text
   end
 end # class File
-# NB:
-#  U_HYPHEN_MINUS is standard ascii hyphen/minus e.g. -
-#
-#  see en.wikipedia.org/wiki/Dash
-U_HYPHEN              = "\u2010"  # unambigous hyphen
-U_NON_BREAKING_HYPHEN = "\u2011"  # unambigous non-breaking hyphen
-U_MINUS               = "\u2212"  # unambigous minus sign (html => &minus;)
-U_NDASH               = "\u2013"  # ndash (html => &ndash; ascii => --)
-U_MDASH               = "\u2014"  # mdash (html => &mdash; ascii => ---)
-  def convert_unicode_dashes_to_plain_ascii( text, opts = {} )
-    text = text.gsub( /(#{U_HYPHEN}|#{U_NON_BREAKING_HYPHEN}|#{U_MINUS}|#{U_NDASH}|#{U_MDASH})/ ) do |_|
-      # puts "found U+#{'%04X' % $1.ord} (#{$1})"
-      msg = ''
-      if $1 == U_HYPHEN
-        msg << "found hyhpen U+2010 (#{$1})"
-      elsif $1 == U_NON_BREAKING_HYPHEN
-        msg << "found non_breaking_hyhpen U+2011 (#{$1})"
-      elsif $1 == U_MINUS
-        msg << "found minus U+2212 (#{$1})"
-      elsif $1 == U_NDASH
-        msg << "found ndash U+2013 (#{$1})"
-      elsif $1 == U_MDASH
-        msg << "found mdash U+2014 (#{$1})"
-      else
-        msg << "found unknown unicode dash U+#{'%04X' % $1.ord} (#{$1})"
-      end
-      msg << " in file >#{opts[:path]}<"   if opts[:path]
-      msg << "; converting to plain ascii hyphen_minus (-)"
-      puts "*** warning: #{msg}"
+def title_esc_regex( title_unescaped )
+  puts "*** warn: depreceated fn call: use TextUtils.title_esc_regex() or include TextUtils::TitleHelpers"
+  TextUtils.title_esc_regex( title_unescaped )
+end
-      '-'
-    end
-    text
-  end # method convert_unicode_dashes_to_plain_ascii
-  ############
-  ### fix/todo: share helper for all text readers/parsers- where to put it?
-  ###
-  def title_esc_regex( title_unescaped )
-      ##  escape regex special chars e.g. . to \. and ( to \( etc.
-      # e.g. Benfica Lis.
-      # e.g. Club Atlético Colón (Santa Fe)
-      ## NB: cannot use Regexp.escape! will escape space '' to '\ '
-      ## title = Regexp.escape( title_unescaped )
-      title = title_unescaped.gsub( '.', '\.' )
-      title = title.gsub( '(', '\(' )
-      title = title.gsub( ')', '\)' )
-      ##  match accented char with or without accents
-      ##  add (ü|ue) etc.
-      ## also make - optional change to (-| ) e.g. Blau-Weiss == Blau Weiss
-      ## todo: add some more
-      ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references  for more
-      ##
-      ##  reuse for all readers!
-      alternatives = [
-        ['-', '(-| )'],  ## e.g. Blau-Weiß Linz
-        ['æ', '(æ|ae)'],  ## e.g.
-        ['á', '(á|a)'],  ## e.g. Bogotá, Sársfield
-        ['ã', '(ã|a)'],  ## e.g  São Paulo
-        ['ä', '(ä|ae)'],  ## e.g.
-        ['ç', '(ç|c)'],  ## e.g. Fenerbahçe
-        ['é', '(é|e)'],  ## e.g. Vélez
-        ['ê', '(ê|e)'],  ## e.g. Grêmio
-        ['ñ', '(ñ|n)'],  ## e.g. Porteño
-        ['ň', '(ň|n)'],  ## e.g. Plzeň
-        ['Ö', '(Ö|Oe)'], ## e.g. Österreich
-        ['ö', '(ö|oe)'],  ## e.g. Mönchengladbach
-        ['ó', '(ó|o)'],   ## e.g. Colón
-        ['ș', '(ș|s)'],   ## e.g. Bucarești
-        ['ß', '(ß|ss)'],  ## e.g. Blau-Weiß Linz
-        ['ü', '(ü|ue)'],  ## e.g.
-        ['ú', '(ú|u)']  ## e.g. Fútbol
-      ]
-      alternatives.each do |alt|
-        title = title.gsub( alt[0], alt[1] )
-      end
-      title
-  end

data/lib/textutils/version.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module TextUtils
-  VERSION = '0.5.9'
+  VERSION = '0.5.10'
 end   # module TextUtils

data/lib/textutils.rb CHANGED Viewed

@@ -21,10 +21,13 @@ require 'textutils/filter/comment_filter'
 require 'textutils/filter/erb_django_filter'
 require 'textutils/filter/erb_filter'
+require 'textutils/helper/unicode_helper'
+require 'textutils/helper/title_helper'
 require 'textutils/utils'
 require 'textutils/reader/code_reader'
 require 'textutils/reader/hash_reader'
 require 'textutils/reader/line_reader'
 require 'textutils/reader/values_reader'
+require 'textutils/reader/fixture_reader'

data/test/helper.rb ADDED Viewed

@@ -0,0 +1,14 @@
+## $:.unshift(File.dirname(__FILE__))
+## minitest setup
+# require 'minitest/unit'
+require 'minitest/autorun'
+# include MiniTest::Unit  # lets us use TestCase instead of MiniTest::Unit::TestCase
+## our own code
+require 'textutils'

data/test/test_helper.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# encoding: utf-8
+###
+#  to run use
+#     ruby -I ./lib -I ./test test/test_helper.rb
+#  or better
+#     rake test
+require 'helper'
+class TestHelper < MiniTest::Unit::TestCase
+  def test_convert_unicode_dashes
+    txt_in  = "\u2010 \u2011 \u2212 \u2013 \u2014"  # NB: unicode chars require double quoted strings
+    txt_out = '- - - - -'
+    assert( txt_out == TextUtils.convert_unicode_dashes_to_plain_ascii( txt_in ) )
+  end
+  def test_title_to_key
+    txt_io = [
+      [ 'São Paulo',   'saopaulo' ],
+      [ 'São Gonçalo', 'saogoncalo' ],
+      [ 'Výčepní',     'vycepni' ]
+    ]
+    txt_io.each do |txt|
+      assert( txt[1] == TextUtils.title_to_key( txt[0] ) )
+    end
+  end
+end # class TestHelper

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: textutils
 version: !ruby/object:Gem::Version
-  version: 0.5.9
+  version: 0.5.10
   prerelease:
 platform: ruby
 authors:
@@ -9,11 +9,11 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-05-05 00:00:00.000000000 Z
+date: 2013-05-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: logutils
-  requirement: &84970780 !ruby/object:Gem::Requirement
+  requirement: &72786300 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
@@ -21,10 +21,10 @@ dependencies:
         version: '0.5'
   type: :runtime
   prerelease: false
-  version_requirements: *84970780
+  version_requirements: *72786300
 - !ruby/object:Gem::Dependency
   name: rdoc
-  requirement: &84970560 !ruby/object:Gem::Requirement
+  requirement: &72786080 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
@@ -32,10 +32,10 @@ dependencies:
         version: '3.10'
   type: :development
   prerelease: false
-  version_requirements: *84970560
+  version_requirements: *72786080
 - !ruby/object:Gem::Dependency
   name: hoe
-  requirement: &84970340 !ruby/object:Gem::Requirement
+  requirement: &72785860 !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
@@ -43,7 +43,7 @@ dependencies:
         version: '3.3'
   type: :development
   prerelease: false
-  version_requirements: *84970340
+  version_requirements: *72785860
 description: textutils - Text Filters, Helpers, Readers and More
 email: webslideshow@googlegroups.com
 executables: []
@@ -60,12 +60,18 @@ files:
 - lib/textutils/filter/comment_filter.rb
 - lib/textutils/filter/erb_django_filter.rb
 - lib/textutils/filter/erb_filter.rb
+- lib/textutils/helper/title_helper.rb
+- lib/textutils/helper/unicode_helper.rb
 - lib/textutils/reader/code_reader.rb
+- lib/textutils/reader/fixture_reader.rb
 - lib/textutils/reader/hash_reader.rb
 - lib/textutils/reader/line_reader.rb
 - lib/textutils/reader/values_reader.rb
 - lib/textutils/utils.rb
 - lib/textutils/version.rb
+- test/helper.rb
+- test/test_helper.rb
+- .gemtest
 homepage: http://geraldb.github.com/textutils
 licenses:
 - Public Domain
@@ -93,4 +99,5 @@ rubygems_version: 1.8.17
 signing_key:
 specification_version: 3
 summary: textutils - Text Filters, Helpers, Readers and More
-test_files: []
+test_files:
+- test/test_helper.rb