RubyGems - Text - Versions diffs - 1.1.2 → 1.1.3 - Mend

Text 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

metadata +51 -67
data/README.rdoc +0 -28
data/lib/text.rb +0 -6
data/lib/text/double_metaphone.rb +0 -356
data/lib/text/figlet.rb +0 -17
data/lib/text/figlet/font.rb +0 -117
data/lib/text/figlet/smusher.rb +0 -64
data/lib/text/figlet/typesetter.rb +0 -68
data/lib/text/levenshtein.rb +0 -65
data/lib/text/metaphone.rb +0 -97
data/lib/text/porter_stemming.rb +0 -171
data/lib/text/soundex.rb +0 -61
data/rakefile.rb +0 -44
data/test/data/big.flf +0 -2204
data/test/data/big.txt +0 -8
data/test/data/chunky.flf +0 -512
data/test/data/chunky.txt +0 -5
data/test/data/double_metaphone.csv +0 -1218
data/test/data/metaphone.txt +0 -51
data/test/data/metaphone_buggy.txt +0 -52
data/test/data/porter_stemming_input.txt +0 -23531
data/test/data/porter_stemming_output.txt +0 -23531
data/test/preamble.rb +0 -10
data/test/test_double_metaphone.rb +0 -23
data/test/test_figlet.rb +0 -17
data/test/test_levenshtein.rb +0 -80
data/test/test_metaphone.rb +0 -39
data/test/test_porter_stemming.rb +0 -16
data/test/test_soundex.rb +0 -27

data/lib/text/figlet.rb DELETED

@@ -1,17 +0,0 @@
-#
-# Ruby implementation of the Figlet program (http://www.figlet.org/).
-#
-# Author: Tim Fletcher (twoggle@gmail.com)
-#
-# Usage:
-#
-#   big_font = Text::Figlet::Font.new('big.flf')
-#
-#   figlet = Text::Figlet::Typesetter.new(big_font)
-#
-#   puts figlet['hello world']
-#
-#
-require 'text/figlet/font'
-require 'text/figlet/smusher'
-require 'text/figlet/typesetter'

data/lib/text/figlet/font.rb DELETED

@@ -1,117 +0,0 @@
-module Text
-module Figlet
-  class UnknownFontFormat < StandardError
-  end
-  class Font
-    def initialize(filename, load_german = true)
-      file = File.open(filename, 'rb')
-      header = file.gets.strip.split(/ /)
-      raise UnknownFontFormat if 'flf2a' != header[0][0, 5]
-      @hard_blank = header.shift[-1, 1]
-      @height = header.shift.to_i
-      @baseline = header.shift
-      @max_length = header.shift
-      @old_layout = header.shift.to_i
-      @comment_count = header.shift.to_i
-      @right_to_left = header.shift
-      @right_to_left = !@right_to_left.nil? && @right_to_left.to_i == 1
-      @load_german, @characters = load_german, {}
-      load_comments file
-      load_ascii_characters file
-      load_german_characters file
-      load_extended_characters file
-      file.close
-    end
-    def [](char)
-      @characters[char]
-    end
-    def has_char?(char)
-      @characters.has_key? char
-    end
-    attr_reader :height, :hard_blank, :old_layout
-    def right_to_left?
-      @right_to_left
-    end
-    private
-    def load_comments(file)
-      @comment_count.times { file.gets.strip }
-    end
-    def load_ascii_characters(file)
-      (32..126).each { |i| @characters[i] = load_char(file) }
-    end
-    def load_german_characters(file)
-      [91, 92, 93, 123, 124, 125, 126].each do |i|
-        if @load_german
-          unless char = load_char(file)
-            return
-          end
-          @characters[i] = char
-        else
-          skip_char file
-        end
-      end
-    end
-    def load_extended_characters(file)
-      until file.eof?
-        i = file.gets.strip.split(/ /).first
-        if i.empty?
-          next
-        elsif /^\-0x/i =~ i # comment
-          skip_char file
-        else
-          if /^0x/i =~ i
-            i = i[2, 1].hex
-          elsif '0' == i[0] && '0' != i || '-0' == i[0, 2]
-            i = i.oct
-          end
-          unless char = load_char(file)
-            return
-          end
-          @characters[i] = char
-        end
-      end
-    end
-    def load_char(file)
-      char = []
-      @height.times do
-        return false if file.eof?
-        line = file.gets.rstrip
-        if match = /(.){1,2}$/.match(line)
-          line.gsub! match[1], ''
-        end
-        line << "\x00"
-        char << line
-      end
-      return char
-    end
-    def skip_char(file)
-      @height.times do
-        return if file.eof?
-        return if file.gets.strip.nil?
-      end
-    end
-  end
-end # module Figlet
-end # module Text

data/lib/text/figlet/smusher.rb DELETED

@@ -1,64 +0,0 @@
-module Text
-module Figlet
-  class Smusher
-    def initialize(font)
-      @font = font
-    end
-    def [](result)
-      todo = false
-      @font.height.times do |j|
-        result[j] = result[j].sub(pattern) { todo, x = callback(todo, $1, $2); x }
-      end
-      @font.height.times do |j|
-        result[j] = if todo
-          result[j].sub(/\s\x00(?!$)|\x00\s/, '').sub(/\x00(?!$)/, '')
-        else
-          result[j].sub(/\x00(?!$)/, '')
-        end
-      end
-    end
-    def pattern
-      @pattern ||= /([^#{@font.hard_blank}\x00\s])\x00([^#{@font.hard_blank}\x00\s])/
-    end
-    def symbols
-      @@symbols ||= {
-        24 => '|/\\[]{}()<>',
-        8 => {'[' => ']', ']' => '[', '{' => '}', '}' => '{', '(' => ')', ')' => '('},
-        16 => {"/\\" => '|', "\\/" => 'Y', '><' => 'X'}
-      }
-    end
-    def old_layout?(n)
-      @font.old_layout & n > 0
-    end
-    def callback(s, a, b)
-      combined = a + b
-      if old_layout?(1) && a == b
-        return true, a
-      elsif old_layout?(2) && ('_' == a && symbols[24].include?(b) || '_' == b && symbols[24].include?(a))
-        return true, a
-      elsif old_layout?(4) && ((left = symbols[24].index(a)) && (right = symbols[24].index(b)))
-        return true, (right > left ? b : a)
-      elsif old_layout?(8) && (symbols[8].has_key?(b) && symbols[8][b] == a)
-        return true, '|'
-      elsif old_layout?(16) && symbols[16].has_key?(combined)
-        return true, symbols[16][combined]
-      elsif old_layout?(32) && (a == b && @font.hard_blank == a)
-        return true, @font.hard_blank
-      else
-        return s, "#{a}\00#{b}"
-      end
-    end
-  end
-end # module Figlet
-end # module Text

data/lib/text/figlet/typesetter.rb DELETED

@@ -1,68 +0,0 @@
-module Text
-module Figlet
-  class Typesetter
-    def initialize(font, options = nil)
-      @font = font
-      @options = options || {}
-      @smush = @options.has_key?(:smush) ? @options[:smush] : true
-    end
-    def [](str)
-      result = []
-      str.length.times do |i|
-        char = str[i]
-        unless @font.has_char?(char)
-          if @font.has_char?(0)
-            char = 0
-          else
-            next
-          end
-        end
-        @font.height.times do |j|
-          line = @font[char][j]
-          if result[j].nil?
-            result[j] = line
-          else
-            result[j] = @font.right_to_left?? (line + result[j]) : (result[j] + line)
-          end
-        end
-        if @font.old_layout > -1 && i > 0
-          diff = -1
-          @font.height.times do |j|
-            if match = /\S(\s*\x00\s*)\S/.match(result[j])
-              len = match[1].length
-              diff = (diff == -1 ? len : min(diff, len))
-            end
-          end
-          diff -= 1
-          if diff > 0
-            @font.height.times do |j|
-              if match = /\x00(\s{0,#{diff}})/.match(result[j])
-                b = diff - match[1].length
-                result[j] = result[j].sub(/\s{0,#{b}}\x00\s{#{match[1].length}}/, "\0")
-              end
-            end
-          end
-          smush[result] if @smush
-        end
-      end
-      return result.join("\n").gsub(/\0/, '').gsub(@font.hard_blank, ' ')
-    end
-    private
-    def min(a, b)
-      a > b ? b : a
-    end
-    def smush
-      @smusher ||= Smusher.new(@font)
-    end
-  end
-end # module Figlet
-end # module Text

data/lib/text/levenshtein.rb DELETED

@@ -1,65 +0,0 @@
-#
-# Levenshtein distance algorithm implementation for Ruby, with UTF-8 support.
-#
-# The Levenshtein distance is a measure of how similar two strings s and t are,
-# calculated as the number of deletions/insertions/substitutions needed to
-# transform s into t. The greater the distance, the more the strings differ.
-#
-# The Levenshtein distance is also sometimes referred to as the
-# easier-to-pronounce-and-spell 'edit distance'.
-#
-# Author: Paul Battley (pbattley@gmail.com)
-#
-module Text # :nodoc:
-module Levenshtein
-  # Calculate the Levenshtein distance between two strings +str1+ and +str2+.
-  # +str1+ and +str2+ should be ASCII, UTF-8, or a one-byte-per character encoding such
-  # as ISO-8859-*.
-  #
-  # The strings will be treated as UTF-8 if $KCODE is set appropriately (i.e. 'u').
-  # Otherwise, the comparison will be performed byte-by-byte. There is no specific support
-  # for Shift-JIS or EUC strings.
-  #
-  # When using Unicode text, be aware that this algorithm does not perform normalisation.
-  # If there is a possibility of different normalised forms being used, normalisation
-  # should be performed beforehand.
-  #
-  def distance(str1, str2)
-    if $KCODE =~ /^U/i
-      unpack_rule = 'U*'
-    else
-      unpack_rule = 'C*'
-    end
-    s = str1.unpack(unpack_rule)
-    t = str2.unpack(unpack_rule)
-    n = s.length
-    m = t.length
-    return m if (0 == n)
-    return n if (0 == m)
-    d = (0..m).to_a
-    x = nil
-    (0...n).each do |i|
-      e = i+1
-      (0...m).each do |j|
-        cost = (s[i] == t[j]) ? 0 : 1
-        x = [
-          d[j+1] + 1, # insertion
-          e + 1,      # deletion
-          d[j] + cost # substitution
-        ].min
-        d[j] = e
-        e = x
-      end
-      d[m] = x
-    end
-    return x
-  end
-  extend self
-end
-end

data/lib/text/metaphone.rb DELETED

@@ -1,97 +0,0 @@
-#
-# An implementation of the Metaphone phonetic coding system in Ruby.
-#
-# Metaphone encodes names into a phonetic form such that similar-sounding names
-# have the same or similar Metaphone encodings.
-#
-# The original system was described by Lawrence Philips in Computer Language
-# Vol. 7 No. 12, December 1990, pp 39-43.
-#
-# As there are multiple implementations of Metaphone, each with their own
-# quirks, I have based this on my interpretation of the algorithm specification.
-# Even LP's original BASIC implementation appears to contain bugs (specifically
-# with the handling of CC and MB), when compared to his explanation of the
-# algorithm.
-#
-# I have also compared this implementation with that found in PHP's standard
-# library, which appears to mimic the behaviour of LP's original BASIC
-# implementation. For compatibility, these rules can also be used by passing
-# :buggy=>true to the methods.
-#
-# Author: Paul Battley (pbattley@gmail.com)
-#
-module Text # :nodoc:
-module Metaphone
-  module Rules # :nodoc:all
-    # Metaphone rules.  These are simply applied in order.
-    #
-    STANDARD = [
-      # Regexp, replacement
-      [ /([bcdfhjklmnpqrstvwxyz])\1+/,
-                         '\1' ],  # Remove doubled consonants except g.
-                                  # [PHP] remove c from regexp.
-      [ /^ae/,            'E' ],
-      [ /^[gkp]n/,        'N' ],
-      [ /^wr/,            'R' ],
-      [ /^x/,             'S' ],
-      [ /^wh/,            'W' ],
-      [ /mb$/,            'M' ],  # [PHP] remove $ from regexp.
-      [ /(?!^)sch/,      'SK' ],
-      [ /th/,             '0' ],
-      [ /t?ch|sh/,        'X' ],
-      [ /c(?=ia)/,        'X' ],
-      [ /[st](?=i[ao])/,  'X' ],
-      [ /s?c(?=[iey])/,   'S' ],
-      [ /[cq]/,           'K' ],
-      [ /dg(?=[iey])/,    'J' ],
-      [ /d/,              'T' ],
-      [ /g(?=h[^aeiou])/, ''  ],
-      [ /gn(ed)?/,        'N' ],
-      [ /([^g]|^)g(?=[iey])/,
-                        '\1J' ],
-      [ /g+/,             'K' ],
-      [ /ph/,             'F' ],
-      [ /([aeiou])h(?=\b|[^aeiou])/,
-                         '\1' ],
-      [ /[wy](?![aeiou])/, '' ],
-      [ /z/,              'S' ],
-      [ /v/,              'F' ],
-      [ /(?!^)[aeiou]+/,  ''  ],
-    ]
-    # The rules for the 'buggy' alternate implementation used by PHP etc.
-    #
-    BUGGY = STANDARD.dup
-    BUGGY[0] = [ /([bdfhjklmnpqrstvwxyz])\1+/, '\1' ]
-    BUGGY[6] = [ /mb/, 'M' ]
-  end
-  # Returns the Metaphone representation of a string. If the string contains
-  # multiple words, each word in turn is converted into its Metaphone
-  # representation. Note that only the letters A-Z are supported, so any
-  # language-specific processing should be done beforehand.
-  #
-  # If the :buggy option is set, alternate 'buggy' rules are used.
-  #
-  def metaphone(str, options={})
-    return str.strip.split(/\s+/).map { |w| metaphone_word(w, options) }.join(' ')
-  end
-private
-  def metaphone_word(w, options={})
-    # Normalise case and remove non-ASCII
-    s = w.downcase.gsub(/[^a-z]/, '')
-    # Apply the Metaphone rules
-    rules = options[:buggy] ? Rules::BUGGY : Rules::STANDARD
-    rules.each { |rx, rep| s.gsub!(rx, rep) }
-    return s.upcase
-  end
-  extend self
-end
-end

data/lib/text/porter_stemming.rb DELETED

@@ -1,171 +0,0 @@
-#
-# This is the Porter Stemming algorithm, ported to Ruby from the
-# version coded up in Perl.  It's easy to follow against the rules
-# in the original paper in:
-#
-#   Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
-#   no. 3, pp 130-137,
-#
-# Taken from http://www.tartarus.org/~martin/PorterStemmer (Public Domain)
-#
-module Text # :nodoc:
-module PorterStemming
-  STEP_2_LIST = {
-    'ational' => 'ate', 'tional' => 'tion', 'enci' => 'ence', 'anci' => 'ance',
-    'izer' => 'ize', 'bli' => 'ble',
-    'alli' => 'al', 'entli' => 'ent', 'eli' => 'e', 'ousli' => 'ous',
-    'ization' => 'ize', 'ation' => 'ate',
-    'ator' => 'ate', 'alism' => 'al', 'iveness' => 'ive', 'fulness' => 'ful',
-    'ousness' => 'ous', 'aliti' => 'al',
-    'iviti' => 'ive', 'biliti' => 'ble', 'logi' => 'log'
-  }
-  STEP_3_LIST = {
-    'icate' => 'ic', 'ative' => '', 'alize' => 'al', 'iciti' => 'ic',
-    'ical' => 'ic', 'ful' => '', 'ness' => ''
-  }
-  SUFFIX_1_REGEXP = /(
-                    ational  |
-                    tional   |
-                    enci     |
-                    anci     |
-                    izer     |
-                    bli      |
-                    alli     |
-                    entli    |
-                    eli      |
-                    ousli    |
-                    ization  |
-                    ation    |
-                    ator     |
-                    alism    |
-                    iveness  |
-                    fulness  |
-                    ousness  |
-                    aliti    |
-                    iviti    |
-                    biliti   |
-                    logi)$/x
-  SUFFIX_2_REGEXP = /(
-                      al       |
-                      ance     |
-                      ence     |
-                      er       |
-                      ic       |
-                      able     |
-                      ible     |
-                      ant      |
-                      ement    |
-                      ment     |
-                      ent      |
-                      ou       |
-                      ism      |
-                      ate      |
-                      iti      |
-                      ous      |
-                      ive      |
-                      ize)$/x
-  C = "[^aeiou]"             # consonant
-  V = "[aeiouy]"             # vowel
-  CC = "#{C}(?>[^aeiouy]*)"  # consonant sequence
-  VV = "#{V}(?>[aeiou]*)"    # vowel sequence
-  MGR0 = /^(#{CC})?#{VV}#{CC}/o                # [cc]vvcc... is m>0
-  MEQ1 = /^(#{CC})?#{VV}#{CC}(#{VV})?$/o       # [cc]vvcc[vv] is m=1
-  MGR1 = /^(#{CC})?#{VV}#{CC}#{VV}#{CC}/o      # [cc]vvccvvcc... is m>1
-  VOWEL_IN_STEM   = /^(#{CC})?#{V}/o           # vowel in stem
-  def self.stem(word)
-    # make a copy of the given object and convert it to a string.
-    word = word.dup.to_str
-    return word if word.length < 3
-    # now map initial y to Y so that the patterns never treat it as vowel
-    word[0] = 'Y' if word[0] == ?y
-    # Step 1a
-    if word =~ /(ss|i)es$/
-      word = $` + $1
-    elsif word =~ /([^s])s$/
-      word = $` + $1
-    end
-    # Step 1b
-    if word =~ /eed$/
-      word.chop! if $` =~ MGR0
-    elsif word =~ /(ed|ing)$/
-      stem = $`
-      if stem =~ VOWEL_IN_STEM
-        word = stem
-        case word
-          when /(at|bl|iz)$/             then word << "e"
-          when /([^aeiouylsz])\1$/       then word.chop!
-          when /^#{CC}#{V}[^aeiouwxy]$/o then word << "e"
-        end
-      end
-    end
-    if word =~ /y$/
-      stem = $`
-      word = stem + "i" if stem =~ VOWEL_IN_STEM
-    end
-    # Step 2
-    if word =~ SUFFIX_1_REGEXP
-      stem = $`
-      suffix = $1
-      # print "stem= " + stem + "\n" + "suffix=" + suffix + "\n"
-      if stem =~ MGR0
-        word = stem + STEP_2_LIST[suffix]
-      end
-    end
-    # Step 3
-    if word =~ /(icate|ative|alize|iciti|ical|ful|ness)$/
-      stem = $`
-      suffix = $1
-      if stem =~ MGR0
-        word = stem + STEP_3_LIST[suffix]
-      end
-    end
-    # Step 4
-    if word =~ SUFFIX_2_REGEXP
-      stem = $`
-      if stem =~ MGR1
-        word = stem
-      end
-    elsif word =~ /(s|t)(ion)$/
-      stem = $` + $1
-      if stem =~ MGR1
-        word = stem
-      end
-    end
-    #  Step 5
-    if word =~ /e$/
-      stem = $`
-      if (stem =~ MGR1) ||
-          (stem =~ MEQ1 && stem !~ /^#{CC}#{V}[^aeiouwxy]$/o)
-        word = stem
-      end
-    end
-    if word =~ /ll$/ && word =~ MGR1
-      word.chop!
-    end
-    # and turn initial Y back to y
-    word[0] = 'y' if word[0] == ?Y
-    word
-  end
-end
-end