RubyGems - text-hyphen - Versions diffs - 1.4.1 → 1.5.0 - Mend

text-hyphen 1.4.1 → 1.5.0

Files changed (79) hide show

checksums.yaml +7 -0
data/.standard.yml +5 -0
data/Code-of-Conduct.md +73 -0
data/Contributing.md +68 -0
data/History.md +139 -0
data/Licence.md +159 -0
data/Manifest.txt +12 -5
data/README.md +81 -0
data/Rakefile +68 -19
data/bin/ruby-hyphen +0 -0
data/lib/text/hyphen/language/1.8/de1.rb +1307 -571
data/lib/text/hyphen/language/1.8/en_us.rb +412 -453
data/lib/text/hyphen/language/1.8/fr.rb +128 -334
data/lib/text/hyphen/language/1.8/la.rb +1 -0
data/lib/text/hyphen/language/1.8/lt.rb +115 -0
data/lib/text/hyphen/language/1.8/pt.rb +2 -1
data/lib/text/hyphen/language/1.8/sk.rb +277 -0
data/lib/text/hyphen/language/1.9/ca.rb +2 -1
data/lib/text/hyphen/language/1.9/cs.rb +2 -1
data/lib/text/hyphen/language/1.9/da.rb +2 -1
data/lib/text/hyphen/language/1.9/de1.rb +1382 -646
data/lib/text/hyphen/language/1.9/de2.rb +110 -109
data/lib/text/hyphen/language/1.9/en_uk.rb +2 -1
data/lib/text/hyphen/language/1.9/en_us.rb +412 -454
data/lib/text/hyphen/language/1.9/es.rb +2 -1
data/lib/text/hyphen/language/1.9/et.rb +6 -5
data/lib/text/hyphen/language/1.9/eu.rb +4 -3
data/lib/text/hyphen/language/1.9/fi.rb +3 -2
data/lib/text/hyphen/language/1.9/fr.rb +136 -343
data/lib/text/hyphen/language/1.9/ga.rb +27 -26
data/lib/text/hyphen/language/1.9/hr.rb +6 -5
data/lib/text/hyphen/language/1.9/hsb.rb +3 -2
data/lib/text/hyphen/language/1.9/hu1.rb +3 -2
data/lib/text/hyphen/language/1.9/hu2.rb +5 -4
data/lib/text/hyphen/language/1.9/ia.rb +2 -1
data/lib/text/hyphen/language/1.9/id.rb +8 -7
data/lib/text/hyphen/language/1.9/is.rb +2 -1
data/lib/text/hyphen/language/1.9/it.rb +74 -74
data/lib/text/hyphen/language/1.9/la.rb +54 -53
data/lib/text/hyphen/language/1.9/lt.rb +116 -0
data/lib/text/hyphen/language/1.9/mn.rb +7 -6
data/lib/text/hyphen/language/1.9/nl.rb +2 -1
data/lib/text/hyphen/language/1.9/no1.rb +3 -2
data/lib/text/hyphen/language/1.9/no2.rb +3 -2
data/lib/text/hyphen/language/1.9/pl.rb +2 -1
data/lib/text/hyphen/language/1.9/pt.rb +3 -2
data/lib/text/hyphen/language/1.9/ru.rb +2 -1
data/lib/text/hyphen/language/1.9/sk.rb +280 -0
data/lib/text/hyphen/language/1.9/sv.rb +4 -3
data/lib/text/hyphen/language/cs.rb +1 -1
data/lib/text/hyphen/language/de.rb +2 -1
data/lib/text/hyphen/language/de1.rb +1 -1
data/lib/text/hyphen/language/de2.rb +1 -1
data/lib/text/hyphen/language/en_us.rb +1 -1
data/lib/text/hyphen/language/eu.rb +1 -1
data/lib/text/hyphen/language/fr.rb +1 -1
data/lib/text/hyphen/language/hu.rb +1 -1
data/lib/text/hyphen/language/hu1.rb +1 -1
data/lib/text/hyphen/language/hu2.rb +1 -1
data/lib/text/hyphen/language/is.rb +1 -1
data/lib/text/hyphen/language/lt.rb +4 -0
data/lib/text/hyphen/language/ms.rb +3 -3
data/lib/text/hyphen/language/nl.rb +1 -1
data/lib/text/hyphen/language/no.rb +1 -1
data/lib/text/hyphen/language/sk.rb +4 -0
data/lib/text/hyphen/language.rb +45 -45
data/lib/text/hyphen.rb +139 -97
data/lib/text-hyphen.rb +1 -1
data/test/data/bug_9807_latin1.rb +2 -2
data/test/data/bug_9807_utf-8.rb +1 -1
data/test/test_bugs.rb +14 -13
data/test/test_text_hyphen.rb +31 -21
metadata +146 -96
data/.autotest +0 -23
data/.gemtest +0 -0
data/History.rdoc +0 -99
data/License.rdoc +0 -159
data/README.rdoc +0 -95
data/text-hyphen.gemspec +0 -51

data/lib/text/hyphen/language/hu2.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 # -*- encoding: utf-8 -*-
 Text::Hyphen.require_real_hyphenation_file(__FILE__)
-Text::Hyphen::Language.aliases_for "HU2" => %W(HUN HU)
+Text::Hyphen::Language.aliases_for "HU2" => %W[HUN HU]

data/lib/text/hyphen/language/is.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 # -*- encoding: utf-8 -*-
 Text::Hyphen.require_real_hyphenation_file(__FILE__)
-Text::Hyphen::Language.aliases_for "IS" => %W(ICE ISL)
+Text::Hyphen::Language.aliases_for "IS" => %W[ICE ISL]

data/lib/text/hyphen/language/lt.rb ADDED Viewed

@@ -0,0 +1,4 @@
+# -*- encoding: utf-8 -*-
+Text::Hyphen.require_real_hyphenation_file(__FILE__)
+Text::Hyphen::Language.aliases_for "LT" => "LTU"

data/lib/text/hyphen/language/ms.rb CHANGED Viewed

@@ -1,9 +1,9 @@
-require 'text/hyphen/language/id'
+require "text/hyphen/language/id"
 unless defined? Text::Hyphen::Language::MS
   Text::Hyphen::Language::MS = Text::Hyphen::Language.new(Text::Hyphen::Language::ID) do |malay|
-    malay.isocode = 'ms'
+    malay.isocode = "ms"
   end
-  Text::Hyphen::Language.aliases_for "MS" => %W(MAY MSA)
+  Text::Hyphen::Language.aliases_for "MS" => %W[MAY MSA]
 end

data/lib/text/hyphen/language/nl.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 # -*- encoding: utf-8 -*-
 Text::Hyphen.require_real_hyphenation_file(__FILE__)
-Text::Hyphen::Language.aliases_for "NL" => %W(DUT NLD)
+Text::Hyphen::Language.aliases_for "NL" => %W[DUT NLD]

data/lib/text/hyphen/language/no.rb CHANGED Viewed

	@@ -1 +1 @@
1	- require 'text/hyphen/language/no1'
1	+ require "text/hyphen/language/no1"

data/lib/text/hyphen/language/sk.rb ADDED Viewed

@@ -0,0 +1,4 @@
+# -*- encoding: utf-8 -*-
+Text::Hyphen.require_real_hyphenation_file(__FILE__)
+Text::Hyphen::Language.aliases_for "SK" => "SVK"

data/lib/text/hyphen/language.rb CHANGED Viewed

@@ -4,32 +4,32 @@
 # patterns are defined as instances of this class—and only this class. This
 # is a deliberate "breaking" of Ruby's concept of duck-typing and is
 # intended to provide an indication that the patterns have been converted
-# from TeX encodings to other encodings (e.g., latin1 or UTF-8) that are
+# from TeX encodings to other encodings (e.g., iso-8859-1 or UTF-8) that are
 # more suitable to general text manipulations.
 class Text::Hyphen::Language
-  WORD_START_RE         = %r{^\.} #:nodoc:
-  WORD_END_RE           = %r{\.$} #:nodoc:
-  DIGIT_RE              = %r{\d} #:nodoc:
-  NONDIGIT_RE           = %r{\D} #:nodoc:
-  DASH_RE               = %r{-} #:nodoc:
-  EXCEPTION_DASH0_RE    = %r{[^-](?=[^-])} #:nodoc:
-  EXCEPTION_DASH1_RE    = %r{[^-]-} #:nodoc:
-  EXCEPTION_NONUM_RE    = %r{[^01]} #:nodoc:
-  ZERO_INSERT_RE        = %r{(\D)(?=\D)} #:nodoc:
-  ZERO_START_RE         = %r{^(?=\D)} #:nodoc:
-  DEFAULT_ENCODING      = if RUBY_VERSION < "1.9.1" #:nodoc:
-                            "latin1"
-                          else
-                            "utf-8"
-                          end
+  WORD_START_RE = %r{^\.} # :nodoc:
+  WORD_END_RE = %r{\.$} # :nodoc:
+  DIGIT_RE = %r{\d} # :nodoc:
+  NONDIGIT_RE = %r{\D} # :nodoc:
+  DASH_RE = %r{-} # :nodoc:
+  EXCEPTION_DASH0_RE = %r{[^-](?=[^-])} # :nodoc:
+  EXCEPTION_DASH1_RE = %r{[^-]-} # :nodoc:
+  EXCEPTION_NONUM_RE = %r{[^01]} # :nodoc:
+  ZERO_INSERT_RE = %r{(\D)(?=\D)} # :nodoc:
+  ZERO_START_RE = %r{^(?=\D)} # :nodoc:
+  DEFAULT_ENCODING = if RUBY_VERSION < "1.9.1" # :nodoc:
+    "iso-8859-1"
+  else
+    "utf-8"
+  end
   # The character scan regular expression to use.
-  def scan_re #:nodoc:
-    if RUBY_VERSION < '1.9.1'
+  def scan_re # :nodoc:
+    if RUBY_VERSION < "1.9.1"
       return %r{.}u if @encoding =~ /utf-?8/i
     end
-    return %r{.}
+    %r{.}
   end
   # The encoding of the hyphenation definitions. The text to be compared
@@ -66,21 +66,21 @@ class Text::Hyphen::Language
     @pattern_text = pats.dup
     @patterns = {
-      :both   => {},
-      :start  => {},
-      :stop   => {},
+      :both => {},
+      :start => {},
+      :stop => {},
       :hyphen => {}
     }
-    plist = @pattern_text.split($/).map { |ln| ln.gsub(%r{%.*$}, '') }
+    plist = @pattern_text.split($/).map { |ln| ln.gsub(%r{%.*$}, "") }
     plist.each do |line|
       line.split.each do |word|
         next if word.empty?
         start = stop = false
-        start = true if word.sub!(WORD_START_RE, '')
-        stop  = true if word.sub!(WORD_END_RE, '')
+        start = true if word.sub!(WORD_START_RE, "")
+        stop = true if word.sub!(WORD_END_RE, "")
         # Insert zeroes and start with some digit
         word.gsub!(ZERO_INSERT_RE) { "#{$1}0" }
@@ -88,17 +88,17 @@ class Text::Hyphen::Language
         # This assumes that the pattern lists are already in lowercase
         # form only.
-        tag   = word.gsub(DIGIT_RE, '')
-        value = word.gsub(NONDIGIT_RE, '')
+        tag = word.gsub(DIGIT_RE, "")
+        value = word.gsub(NONDIGIT_RE, "")
-        if start and stop
-          set = :both
+        set = if start && stop
+          :both
         elsif start
-          set = :start
+          :start
         elsif stop
-          set = :stop
+          :stop
         else
-          set = :hyphen
+          :hyphen
         end
         @patterns[set][tag] = value
@@ -116,10 +116,10 @@ class Text::Hyphen::Language
     @exceptions = {}
     @exception_text.split.each do |word|
-      tag   = word.gsub(DASH_RE,'')
-      value = "0" + word.gsub(EXCEPTION_DASH0_RE, '0').gsub(EXCEPTION_DASH1_RE, '1')
-      value.gsub!(EXCEPTION_NONUM_RE, '0')
-      @exceptions[tag] = value.scan(self.scan_re).map { |c| c.to_i }
+      tag = word.gsub(DASH_RE, "")
+      value = "0" + word.gsub(EXCEPTION_DASH0_RE, "0").gsub(EXCEPTION_DASH1_RE, "1")
+      value.gsub!(EXCEPTION_NONUM_RE, "0")
+      @exceptions[tag] = value.scan(scan_re).map { |c| c.to_i }
     end
     true
@@ -142,16 +142,16 @@ class Text::Hyphen::Language
   # instance of Text::Hyphen::Language.
   def initialize(language = nil)
     if language.nil?
-      self.encoding DEFAULT_ENCODING
-      self.patterns ""
-      self.exceptions ""
+      encoding DEFAULT_ENCODING
+      patterns ""
+      exceptions ""
       self.left = 2
       self.right = 2
       self.isocode = nil
-    elsif language.kind_of? Text::Hyphen::Language
-      self.encoding language.encoding
-      self.patterns language.instance_variable_get(:@pattern_text)
-      self.exceptions language.instance_variable_get(:@exception_text)
+    elsif language.is_a? Text::Hyphen::Language
+      encoding language.encoding
+      patterns language.instance_variable_get(:@pattern_text)
+      exceptions language.instance_variable_get(:@exception_text)
       self.left = language.left
       self.right = language.right
       self.isocode = language.isocode
@@ -171,7 +171,7 @@ class Text::Hyphen::Language
       end
       language = const_get(language)
-      [ alias_names ].flatten.each do |alias_name|
+      [alias_names].flatten.each do |alias_name|
         next if const_defined? alias_name
         const_set(alias_name, language)
       end

data/lib/text/hyphen.rb CHANGED Viewed

@@ -7,10 +7,21 @@ end
 # hyphenation algorithm with pattern files. Each object is constructed with
 # a specific language's hyphenation patterns.
 class Text::Hyphen
-  DEBUG   = false
-  VERSION = '1.4.1'
+  # Resolves a file for cleaner loading from a hyphenation loader file.
+  def self.require_real_hyphenation_file(loader) # :nodoc:
+    p = File.dirname(loader)
+    f = File.basename(loader)
+    v = if RUBY_VERSION < "1.9.1"
+      "1.8"
+    else
+      "1.9"
+    end
+    require File.join(p, v, f)
+  end
-  DEFAULT_MIN_LEFT  = 2
+  VERSION = "1.5.0"
+  DEFAULT_MIN_LEFT = 2
   DEFAULT_MIN_RIGHT = 2
   # No fewer than this number of letters will show up to the left of the
@@ -26,31 +37,31 @@ class Text::Hyphen
   # two or three character ISO 639 code, with the two character form being
   # the canonical resource name. This will load the language hyphenation
   # definitions from text/hyphen/language/&lt;code&gt; as a Ruby class. The
-  # resource 'text/hyphen/language/en_us' defines the language class
+  # resource "text/hyphen/language/en_us" defines the language class
   # Text::Hyphen::Language::EN_US. It also defines the secondary forms
   # Text::Hyphen::Language::EN and Text::Hyphen::Language::ENG_US.
   #
   # Minimal transformations will be performed on the language code provided,
-  # such that any dashes are converted to underscores (e.g., 'en-us' becomes
-  # 'en_us') and all characters are regularised. Resource names will be
-  # downcased and class names will be converted to uppercase (e.g., 'Pt' for
-  # the Portuguese language becomes 'pt' and 'PT', respectively).
+  # such that any dashes are converted to underscores (e.g., "en-us" becomes
+  # "en_us") and all characters are regularised. Resource names will be
+  # downcased and class names will be converted to uppercase (e.g., "Pt" for
+  # the Portuguese language becomes "pt" and "PT", respectively).
   #
   # The language may also be specified as an instance of
   # Text::Hyphen::Language.
-  attr_accessor :language
+  #
+  # :attr_accessor: language
+  attr_reader :language
-  undef :language=
-  def language=(lang) #:nodoc:
-    require 'text/hyphen/language' unless defined?(Text::Hyphen::Language)
-    if lang.kind_of? Text::Hyphen::Language
+  def language=(lang) # :nodoc:
+    require "text/hyphen/language" unless defined?(Text::Hyphen::Language)
+    if lang.is_a? Text::Hyphen::Language
       @iso_language = lang.to_s.split(%r{::}o)[-1].downcase
-      @language     = lang
+      @language = lang
     else
       @iso_language = lang.downcase
       load_language
     end
-    @iso_language
   end
   # Returns the language's ISO 639 ID, e.g., "en_us" or "pt".
@@ -70,23 +81,22 @@ class Text::Hyphen
   # methods in an initialization block. The following initializations are
   # all equivalent:
   #
-  #   hyp = Text::Hyphenate.new(:language => 'en_us')
-  #   hyp = Text::Hyphenate.new(language: 'en_us') # under Ruby 1.9
-  #   hyp = Text::Hyphenate.new { |h| h.language = 'en_us' }
+  #   hyp = Text::Hyphenate.new(language: "en_us")
+  #   hyp = Text::Hyphenate.new { |h| h.language = "en_us" }
   def initialize(options = {}) # :yields self:
     @iso_language = options[:language]
-    @left         = options[:left]
-    @right        = options[:right]
-    @language     = nil
+    @left = options[:left]
+    @right = options[:right]
+    @language = nil
-    @cache        = {}
-    @vcache       = {}
+    @cache = {}
+    @vcache = {}
-    @hyphen       = {}
+    @hyphen = {}
     @begin_hyphen = {}
-    @end_hyphen   = {}
-    @both_hyphen  = {}
-    @exception    = {}
+    @end_hyphen = {}
+    @both_hyphen = {}
+    @exception = {}
     @first_load = true
     yield self if block_given?
@@ -94,57 +104,87 @@ class Text::Hyphen
     load_language
-    @left  ||= DEFAULT_MIN_LEFT
+    @left ||= DEFAULT_MIN_LEFT
     @right ||= DEFAULT_MIN_RIGHT
   end
   # Returns an array of character positions where a word can be hyphenated.
   #
-  #   hyp.hyphenate('representation') #=> [3, 5, 8 10]
+  #   hyp.hyphenate("representation") #=> [3, 5, 8 10]
   #
   # Because hyphenation can be expensive, if the word has been hyphenated
   # previously, it will be returned from a per-instance cache.
+  #
+  # #hyphenate supports phrase hyphenation:
+  #
+  #   hyp.hyphenate("This useful library supports phrases and sentences.")
+  #   #=> [8, 14, 23, 27, 34, 44]
+  #
+  # When phrases are hyphenated, each word is processed individually and the
+  # result is returned as a single continuous list of hyphenation points.
   def hyphenate(word)
-    word = word.downcase
-    $stderr.puts "Hyphenating #{word}" if DEBUG
-    return @cache[word] if @cache.has_key?(word)
-    res = @language.exceptions[word]
-    return @cache[word] = make_result_list(res) if res
-    letters = word.scan(@language.scan_re)
-    $stderr.puts letters.inspect if DEBUG
-    word_size = letters.size
-    result = [0] * (word_size + 1)
-    right_stop = word_size - @right
-    updater = Proc.new do |hash, str, pos|
-      if hash.has_key?(str)
-        $stderr.print "#{pos}: #{str}: #{hash[str]}" if DEBUG
-        hash[str].scan(@language.scan_re).each_with_index do |cc, ii|
-          cc = cc.to_i
-          result[ii + pos] = cc if cc > result[ii + pos]
+    words = if phrase?(word)
+      word.downcase.split(/[[:space:]]/)
+    else
+      [word.downcase]
+    end
+    points = words.map do |word|
+      next @cache[word] if @cache.has_key?(word)
+      if (exception = @language.exceptions[word])
+        next @cache[word] = make_result_list(exception)
+      end
+      letters = word.scan(@language.scan_re)
+      word_size = letters.size
+      result = [0] * (word_size + 1)
+      right_stop = word_size - @right
+      updater = proc do |hash, str, pos|
+        if hash.has_key?(str)
+          hash[str].scan(@language.scan_re).each_with_index do |cc, ii|
+            cc = cc.to_i
+            result[ii + pos] = cc if cc > result[ii + pos]
+          end
         end
-        $stderr.print ": #{result.inspect}\n" if DEBUG
       end
-    end
       # Walk the word
-    (0..right_stop).each do |pos|
-      rest_length = word_size - pos
-      (1..rest_length).each do |length|
-        substr = letters[pos, length].join('')
-        updater[@language.hyphen, substr, pos]
-        updater[@language.start, substr, pos] if pos.zero?
-        updater[@language.stop, substr, pos] if (length == rest_length)
+      (0..right_stop).each do |pos|
+        rest_length = word_size - pos
+        (1..rest_length).each do |length|
+          substr = letters[pos, length].join("")
+          updater[@language.hyphen, substr, pos]
+          updater[@language.start, substr, pos] if pos.zero?
+          updater[@language.stop, substr, pos] if length == rest_length
+        end
       end
+      updater[@language.both, word, 0] if @language.both[word]
+      (0..@left).each { |i| result[i] = 0 }
+      ((-1 - @right)..-1).each { |i| result[i] = 0 }
+      @cache[word] = make_result_list(result)
     end
-    updater[@language.both, word, 0] if @language.both[word]
+    if points.length > 1
+      offset = 0
+      result = []
-    (0..@left).each { |i| result[i] = 0 }
-    ((-1 - @right)..(-1)).each { |i| result[i] = 0 }
-    @cache[word] = make_result_list(result)
+      points.each_with_index do |word, i|
+        word.each do |pos|
+          result << pos + offset
+        end
+        offset += words[i].length + 1
+      end
+      result
+    else
+      points.flatten
+    end
   end
   # Returns a visualization of the hyphenation points.
@@ -157,8 +197,15 @@ class Text::Hyphen
   #
   # Because hyphenation can be expensive, if the word has been visualised
   # previously, it will be returned from a per-instance cache.
-  def visualise(word, hyphen = '-')
+  #
+  # #visualise supports phrase hyphenation:
+  #
+  #   hyp.hyphenate("This useful library supports phrases and sentences.")
+  #   #=> This use-ful li-brary sup-port-s phras-es and sen-tences.
+  def visualise(word, hyphen = "-")
+    return visualise_phrase(word, hyphen) if phrase?(word)
     return @vcache[word] if @vcache.has_key?(word)
     w = word.dup
     s = hyphen.size
     hyphenate(w).each_with_index do |pos, n|
@@ -168,7 +215,7 @@ class Text::Hyphen
     end
     @vcache[word] = w
   end
-  alias visualize visualise
+  alias_method :visualize, :visualise
   # Clears the per-instance hyphenation and visualization caches.
   def clear_cache!
@@ -177,29 +224,33 @@ class Text::Hyphen
   end
   # This function will hyphenate a word so that the first point is at most
+  # +size+ characters.
   #
   # NOTE: if hyphen is set to a string, it will still be counted as one
   # character (since it represents a hyphen)
   #
-  # +size+ characters.
-  def hyphenate_to(word, size, hyphen = '-')
+  # #hyphenate_to does not support phrase hyphenation and will throw an
+  # exception if there are spaces.
+  def hyphenate_to(word, size, hyphen = "-")
+    raise ArgumentError, "#hyphenate_to does not support phrases" if phrase?(word)
     point = hyphenate(word).delete_if { |e| e >= size }.max
     if point.nil?
       [nil, word]
     else
-      [word[0 ... point] + hyphen, word[point .. -1]]
+      [word[0...point] + hyphen, word[point..-1]]
     end
   end
   # Returns a string describing the structure of the patterns for the
   # language of this hyphenation object.
   def stats
-    _b = @language.both.size
-    _s = @language.start.size
-    _e = @language.stop.size
-    _h = @language.hyphen.size
-    _x = @language.exceptions.size
-    _T = _b + _s + _e + _h + _x
+    stats_both = @language.both.size
+    stats_start = @language.start.size
+    stats_end = @language.stop.size
+    stats_hyphens = @language.hyphen.size
+    stats_exceptions = @language.exceptions.size
+    stats_total = stats_both + stats_start + stats_end + stats_hyphens + stats_exceptions
     s = <<-EOS
@@ -210,25 +261,13 @@ The language '%s' contains %d total hyphenation patterns.
     % 6d patterns are normal patterns.
     % 6d patterns are exceptions.
-EOS
-    s % [ @iso_language, _T, _s, _e, _b, _h, _x ]
+    EOS
+    s % [@iso_language, stats_total, stats_start, stats_end, stats_both, stats_hyphens, stats_exceptions]
   end
-  def updateresult(hash, str, pos)
-    if hash.has_key?(str)
-      STDERR.print "#{pos}: #{str}: #{hash[str]}" if DEBUG
-      hash[str].scan(@language.scan_re).each_with_index do |c, i|
-        c = c.to_i
-        @result[i + pos] = c if c > @result[i + pos]
-      end
-      STDERR.puts ": #{@result}" if DEBUG
-    end
-  end
-  private :updateresult
   def make_result_list(res)
     r = []
-    res.each_with_index { |c, i| r <<  i * (c.to_i % 2) }
+    res.each_with_index { |c, i| r << i * (c.to_i % 2) }
     r.reject { |i| i.to_i == 0 }
   end
   private :make_result_list
@@ -251,17 +290,20 @@ EOS
   end
   private :load_language
-  # Resolves a file for cleaner loading from a hyphenation loader file.
-  def self.require_real_hyphenation_file(loader) # :nodoc:
-    p = File.dirname(loader)
-    f = File.basename(loader)
-    v = if RUBY_VERSION < "1.9.1"
-          "1.8"
-        else
-          "1.9"
-        end
-    require File.join(p, v, f)
+  def split_phrase(phrase)
+    phrase.split(/[[:space:]]+/)
+  end
+  private :split_phrase
+  def visualise_phrase(phrase, hyphen)
+    split_phrase(phrase).map { |word| visualise(word, hyphen) }.join(" ")
+  end
+  private :visualise_phrase
+  def phrase?(input)
+    /[^[:space:]][[:space:]][^[:space:]]/.match?(input)
   end
+  private :phrase?
 end
 # vim: syntax=ruby

data/lib/text-hyphen.rb CHANGED Viewed

@@ -1,2 +1,2 @@
 # -*- ruby encoding: utf-8 -*-
-require 'text/hyphen'
+require "text/hyphen"

data/test/data/bug_9807_latin1.rb CHANGED Viewed

@@ -1,10 +1,10 @@
-# -*- encoding: latin1 -*-
+# -*- encoding: iso-8859-1 -*-
 module TestTextHyphenData
   def self.bug_9807_data
     txt = "Dampfschifffahrtskapit�nsm�tzenhalterhersteller"
     pts = [5, 11, 17, 19, 21, 25, 28, 31, 34, 37, 40, 44]
     viz = "Dampf-schiff-fahrts-ka-pi-t�ns-m�t-zen-hal-ter-her-stel-ler"
-    [ txt, pts, viz ]
+    [txt, pts, viz]
   end
 end

data/test/data/bug_9807_utf-8.rb CHANGED Viewed

@@ -5,6 +5,6 @@ module TestTextHyphenData
     txt = "Dampfschifffahrtskapitänsmützenhalterhersteller"
     pts = [5, 11, 17, 19, 21, 25, 28, 31, 34, 37, 40, 44]
     viz = "Dampf-schiff-fahrts-ka-pi-täns-müt-zen-hal-ter-her-stel-ler"
-    [ txt, pts, viz ]
+    [txt, pts, viz]
   end
 end

data/test/test_bugs.rb CHANGED Viewed

@@ -1,16 +1,17 @@
 # -*- encoding: utf-8 -*-
-require 'test/unit'
-require 'text-hyphen'
+require "test/unit"
+require "text-hyphen"
 # The behaviour of Text::Hyphen differs based on the version and the
-# encoding. Ruby 1.8 fails if the input is not latin1 and the hyphenation
-# patterns are latin1. Ruby 1.9 always expects UTF-8 patterns.
-data_version = if RUBY_VERSION < '1.9.1'
-                 'latin1'
-               else
-                 'utf-8'
-               end
-data_path = File.join(File.dirname(__FILE__), 'data')
+# encoding. Ruby 1.8 fails if the input is not iso-8859-1 and the hyphenation
+# patterns are iso-8859-1. Ruby 1.9 always expects UTF-8 patterns.
+data_version = if RUBY_VERSION < "1.9.1"
+  "iso-8859-1"
+else
+  "utf-8"
+end
+data_path = File.join(File.dirname(__FILE__), "data")
 load File.join(data_path, "bug_9807_#{data_version}.rb")
 class TestTextHyphenBugs < Test::Unit::TestCase
@@ -19,17 +20,17 @@ class TestTextHyphenBugs < Test::Unit::TestCase
     # http://rubyforge.org/tracker/index.php?func=detail&aid=28498&group_id=294&atid=1195
     txt, pts, viz = TestTextHyphenData.bug_9807_data
-    de1 = Text::Hyphen.new(:language => 'de')
+    de1 = Text::Hyphen.new(:language => "de")
     assert_equal pts, de1.hyphenate(txt)
     assert_equal viz, de1.visualize(txt)
-    de2 = Text::Hyphen.new(:language => 'de2')
+    de2 = Text::Hyphen.new(:language => "de2")
     assert_equal pts, de2.hyphenate(txt)
     assert_equal viz, de2.visualize(txt)
   end
   def test_rubyforge_28128
-    en_us = Text::Hyphen.new(:language => 'en_us')
+    en_us = Text::Hyphen.new(:language => "en_us")
     assert_equal [], en_us.hyphenate("to")
     assert_equal "to", en_us.visualize("to")
   end