string_cleaner 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -1
- data/Rakefile +14 -34
- data/lib/string_cleaner.rb +144 -30
- data/spec/spec_helper.rb +2 -3
- data/spec/string_cleaner_spec.rb +67 -0
- data/string_cleaner.gemspec +4 -3
- metadata +30 -9
- data/VERSION +0 -1
    
        data/.gitignore
    CHANGED
    
    
    
        data/Rakefile
    CHANGED
    
    | @@ -1,46 +1,26 @@ | |
| 1 | 
            -
            require  | 
| 2 | 
            -
            require "rake"
         | 
| 3 | 
            -
             | 
| 1 | 
            +
            require 'rubygems'
         | 
| 4 2 | 
             
            begin
         | 
| 5 | 
            -
              require  | 
| 6 | 
            -
              Jeweler::Tasks.new do |gem|
         | 
| 7 | 
            -
                gem.name = "string_cleaner"
         | 
| 8 | 
            -
                gem.summary = %Q{TODO}
         | 
| 9 | 
            -
                gem.email = "joseph@openhood.com"
         | 
| 10 | 
            -
                gem.homepage = "http://github.com/JosephHalter/string_cleaner"
         | 
| 11 | 
            -
                gem.authors = ["Joseph Halter"]
         | 
| 12 | 
            -
             | 
| 13 | 
            -
                # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
         | 
| 14 | 
            -
              end
         | 
| 3 | 
            +
              require 'bundler/setup'
         | 
| 15 4 | 
             
            rescue LoadError
         | 
| 16 | 
            -
              puts  | 
| 5 | 
            +
              puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
         | 
| 17 6 | 
             
            end
         | 
| 18 7 |  | 
| 19 | 
            -
            require  | 
| 20 | 
            -
             | 
| 21 | 
            -
              spec. | 
| 22 | 
            -
              spec.spec_files = FileList["spec/**/*_spec.rb"]
         | 
| 8 | 
            +
            require 'rspec/core/rake_task'
         | 
| 9 | 
            +
            RSpec::Core::RakeTask.new(:spec) do |spec|
         | 
| 10 | 
            +
              spec.pattern = 'spec/**/*_spec.rb'
         | 
| 23 11 | 
             
            end
         | 
| 24 12 |  | 
| 25 | 
            -
             | 
| 26 | 
            -
              spec. | 
| 27 | 
            -
              spec.pattern = "spec/**/*_spec.rb"
         | 
| 13 | 
            +
            RSpec::Core::RakeTask.new(:rcov) do |spec|
         | 
| 14 | 
            +
              spec.pattern = 'spec/**/*_spec.rb'
         | 
| 28 15 | 
             
              spec.rcov = true
         | 
| 29 16 | 
             
            end
         | 
| 30 17 |  | 
| 31 18 | 
             
            task :default => :spec
         | 
| 32 19 |  | 
| 33 | 
            -
            require  | 
| 20 | 
            +
            require 'rake/rdoctask'
         | 
| 34 21 | 
             
            Rake::RDocTask.new do |rdoc|
         | 
| 35 | 
            -
               | 
| 36 | 
            -
             | 
| 37 | 
            -
             | 
| 38 | 
            -
               | 
| 39 | 
            -
             | 
| 40 | 
            -
              end
         | 
| 41 | 
            -
             | 
| 42 | 
            -
              rdoc.rdoc_dir = "rdoc"
         | 
| 43 | 
            -
              rdoc.title = "test #{version}"
         | 
| 44 | 
            -
              rdoc.rdoc_files.include("README*")
         | 
| 45 | 
            -
              rdoc.rdoc_files.include("lib/**/*.rb")
         | 
| 46 | 
            -
            end
         | 
| 22 | 
            +
              rdoc.rdoc_dir = 'rdoc'
         | 
| 23 | 
            +
              rdoc.title = "string_cleaner"
         | 
| 24 | 
            +
              rdoc.rdoc_files.include('README*')
         | 
| 25 | 
            +
              rdoc.rdoc_files.include('lib/**/*.rb')
         | 
| 26 | 
            +
            end
         | 
    
        data/lib/string_cleaner.rb
    CHANGED
    
    | @@ -1,56 +1,170 @@ | |
| 1 1 | 
             
            # encoding: UTF-8
         | 
| 2 | 
            +
            require "unidecoder"
         | 
| 3 | 
            +
             | 
| 2 4 | 
             
            module String::Cleaner
         | 
| 3 5 |  | 
| 4 | 
            -
              # convert invalid UTF-8 strings from ISO-8859-15 to UTF-8 to fix them
         | 
| 5 | 
            -
              # recognize euro char from both ISO 8859-15 and Windows-1252
         | 
| 6 | 
            -
              # replace \r\n and \r with \n normalizing end of lines
         | 
| 7 | 
            -
              # replace control characters and other invisible chars by spaces
         | 
| 8 6 | 
             
              def clean
         | 
| 9 | 
            -
                 | 
| 10 | 
            -
             | 
| 7 | 
            +
                fix_encoding.fix_endlines.fix_invisible_chars
         | 
| 8 | 
            +
              end
         | 
| 11 9 |  | 
| 12 | 
            -
             | 
| 13 | 
            -
             | 
| 10 | 
            +
              def fix_encoding
         | 
| 11 | 
            +
                utf8 = dup
         | 
| 12 | 
            +
                if utf8.respond_to?(:force_encoding) 
         | 
| 13 | 
            +
                  utf8.force_encoding("UTF-8") # for Ruby 1.9+
         | 
| 14 14 | 
             
                  unless utf8.valid_encoding? # if invalid UTF-8
         | 
| 15 15 | 
             
                    utf8 = utf8.force_encoding("ISO8859-15")
         | 
| 16 16 | 
             
                    utf8.encode!("UTF-8", :invalid => :replace, :undef => :replace, :replace => "")
         | 
| 17 17 | 
             
                    utf8.gsub!("\xC2\x80", "€") # special case for euro sign from Windows-1252
         | 
| 18 18 | 
             
                    utf8.force_encoding("UTF-8")
         | 
| 19 19 | 
             
                  end
         | 
| 20 | 
            -
                  
         | 
| 21 | 
            -
                  # normalize end of lines
         | 
| 22 | 
            -
                  utf8.gsub!(/\r\n/u, "\n")
         | 
| 23 | 
            -
                  utf8.gsub!(/\r/u, "\n")
         | 
| 24 | 
            -
                  
         | 
| 25 | 
            -
                  # normalize invisible chars
         | 
| 26 | 
            -
                  utf8 = (utf8 << " ").split(/\n/u).each{|line|
         | 
| 27 | 
            -
                    line.gsub!(/[\s\p{C}]/u, " ")
         | 
| 28 | 
            -
                  }.join("\n").chop!
         | 
| 29 | 
            -
                  utf8.force_encoding("UTF-8")
         | 
| 20 | 
            +
                  utf8
         | 
| 30 21 | 
             
                else
         | 
| 31 | 
            -
             | 
| 32 | 
            -
                  # for Ruby 1.8.6, use iconv
         | 
| 33 22 | 
             
                  require "iconv"
         | 
| 34 23 | 
             
                  utf8 << " "
         | 
| 35 | 
            -
                   | 
| 24 | 
            +
                  begin
         | 
| 36 25 | 
             
                    Iconv.new("UTF-8", "UTF-8").iconv(utf8)
         | 
| 37 26 | 
             
                  rescue
         | 
| 38 27 | 
             
                    utf8.gsub!(/\x80/n, "\xA4")
         | 
| 39 | 
            -
                     | 
| 28 | 
            +
                    Iconv.new("UTF-8//IGNORE", "ISO8859-15").iconv(utf8)
         | 
| 40 29 | 
             
                  end
         | 
| 30 | 
            +
                end
         | 
| 31 | 
            +
              end
         | 
| 41 32 |  | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 33 | 
            +
              def fix_endlines
         | 
| 34 | 
            +
                gsub(/(?:\r\n|\r)/u, "\n")
         | 
| 35 | 
            +
              end
         | 
| 36 | 
            +
              
         | 
| 37 | 
            +
              SPECIAL_SPACES = [
         | 
| 38 | 
            +
                        0x00A0,                # White_Space # Zs       NO-BREAK SPACE
         | 
| 39 | 
            +
                        0x1680,                # White_Space # Zs       OGHAM SPACE MARK
         | 
| 40 | 
            +
                        0x180E,                # White_Space # Zs       MONGOLIAN VOWEL SEPARATOR
         | 
| 41 | 
            +
                        (0x2000..0x200A).to_a, # White_Space # Zs  [11] EN QUAD..HAIR SPACE
         | 
| 42 | 
            +
                        0x2028,                # White_Space # Zl       LINE SEPARATOR
         | 
| 43 | 
            +
                        0x2029,                # White_Space # Zp       PARAGRAPH SEPARATOR
         | 
| 44 | 
            +
                        0x202F,                # White_Space # Zs       NARROW NO-BREAK SPACE
         | 
| 45 | 
            +
                        0x205F,                # White_Space # Zs       MEDIUM MATHEMATICAL SPACE
         | 
| 46 | 
            +
                        0x3000,                # White_Space # Zs       IDEOGRAPHIC SPACE
         | 
| 47 | 
            +
                      ].flatten.collect{|e| [e].pack 'U*'}
         | 
| 48 | 
            +
             | 
| 49 | 
            +
              def fix_invisible_chars
         | 
| 50 | 
            +
                utf8 = self.dup
         | 
| 51 | 
            +
                if utf8.respond_to?(:force_encoding)
         | 
| 52 | 
            +
                  utf8 = (utf8 << " ").split(/\n/u).each{|line|
         | 
| 53 | 
            +
                    line.gsub!(/[\s\p{C}]/u, " ")
         | 
| 54 | 
            +
                  }.join("\n").chop!
         | 
| 55 | 
            +
                  utf8.gsub!(Regexp.new(SPECIAL_SPACES.join("|")), " ")
         | 
| 56 | 
            +
                  utf8.force_encoding("UTF-8")
         | 
| 57 | 
            +
                else
         | 
| 47 58 | 
             
                  require "oniguruma"
         | 
| 48 | 
            -
                  utf8 | 
| 59 | 
            +
                  utf8.split(/\n/n).collect{|line|
         | 
| 49 60 | 
             
                    Oniguruma::ORegexp.new("[\\s\\p{C}]", {:encoding => Oniguruma::ENCODING_UTF8}).gsub(line, " ")
         | 
| 50 61 | 
             
                  }.join("\n").chop!
         | 
| 51 62 | 
             
                end
         | 
| 52 | 
            -
             | 
| 53 | 
            -
             | 
| 63 | 
            +
              end
         | 
| 64 | 
            +
             | 
| 65 | 
            +
              def trim(chars = "")
         | 
| 66 | 
            +
                chars.size>0 ? gsub(/\A[#{chars}]+|[#{chars}]+\z/, "") : strip
         | 
| 67 | 
            +
              end
         | 
| 68 | 
            +
             | 
| 69 | 
            +
              def to_permalink(separator="-")
         | 
| 70 | 
            +
                fix_endlines.to_ascii(chartable).downcase.gsub(/[^a-z0-9]+/, separator).trim(separator)
         | 
| 71 | 
            +
              end
         | 
| 72 | 
            +
             | 
| 73 | 
            +
              def nl2br
         | 
| 74 | 
            +
                gsub("\n", "<br/>\n")
         | 
| 75 | 
            +
              end
         | 
| 76 | 
            +
              
         | 
| 77 | 
            +
              def to_nicer_sym
         | 
| 78 | 
            +
                to_permalink("_").to_sym
         | 
| 79 | 
            +
              end
         | 
| 80 | 
            +
             | 
| 81 | 
            +
              def chartable(options = {})
         | 
| 82 | 
            +
                options = {
         | 
| 83 | 
            +
                  :clean_binary => true,
         | 
| 84 | 
            +
                  :translit_symbols => true,
         | 
| 85 | 
            +
                }.merge(options)
         | 
| 86 | 
            +
                char = "%c"
         | 
| 87 | 
            +
                table = {
         | 
| 88 | 
            +
                  "`" => "'",  # dec = 96
         | 
| 89 | 
            +
                  "¦" => "|",  # dec = 166, broken vertical bar
         | 
| 90 | 
            +
                  "¨" => "",   # dec = 168, spacing diaeresis - umlaut
         | 
| 91 | 
            +
                  "ª" => "",   # dec = 170, feminine ordinal indicator
         | 
| 92 | 
            +
                  "«" => "\"", # dec = 171, left double angle quotes
         | 
| 93 | 
            +
                  "¬" => "!",  # dec = 172, not sign
         | 
| 94 | 
            +
                  "" => "-",  # dec = 173, soft hyphen
         | 
| 95 | 
            +
                  "¯" => "-",  # dec = 175, spacing macron - overline
         | 
| 96 | 
            +
                  "²" => "2",  # dec = 178, superscript two - squared
         | 
| 97 | 
            +
                  "³" => "3",  # dec = 179, superscript three - cubed
         | 
| 98 | 
            +
                  "´" => "'",  # dec = 180, acute accent - spacing acute
         | 
| 99 | 
            +
                  "·" => "",   # dec = 183, middle dot - Georgian comma
         | 
| 100 | 
            +
                  "¸" => "",   # dec = 184, spacing cedilla
         | 
| 101 | 
            +
                  "¹" => "1",  # dec = 185, superscript one
         | 
| 102 | 
            +
                  "º" => "0",  # dec = 186, masculine ordinal indicator
         | 
| 103 | 
            +
                  "»" => "\"", # dec = 187, right double angle quotes
         | 
| 104 | 
            +
                  "¿" => "",   # dec = 191, inverted question mark
         | 
| 105 | 
            +
                  "Ý" => "Y",  # dec = 221
         | 
| 106 | 
            +
                  "–" => "-",  # hex = 2013, en dash
         | 
| 107 | 
            +
                  "—" => "-",  # hex = 2014, em dash
         | 
| 108 | 
            +
                  "‚" => "'",  # hex = 201A, single low-9 quotation mark
         | 
| 109 | 
            +
                  "„" => "\"", # hex = 201E, double low-9 quotation mark
         | 
| 110 | 
            +
                }
         | 
| 111 | 
            +
                if options[:clean_binary]
         | 
| 112 | 
            +
                  table[char %   0] = ""  # null
         | 
| 113 | 
            +
                  table[char %   1] = ""  # start of heading
         | 
| 114 | 
            +
                  table[char %   2] = ""  # start of text
         | 
| 115 | 
            +
                  table[char %   3] = ""  # end of text
         | 
| 116 | 
            +
                  table[char %   4] = ""  # end of transmission
         | 
| 117 | 
            +
                  table[char %   5] = ""  # enquiry
         | 
| 118 | 
            +
                  table[char %   6] = ""  # acknowledge
         | 
| 119 | 
            +
                  table[char %   7] = ""  # bell
         | 
| 120 | 
            +
                  table[char %   8] = ""  # backspace
         | 
| 121 | 
            +
                  table[char %   9] = " " # tab
         | 
| 122 | 
            +
                  table[char %  11] = ""  # vertical tab
         | 
| 123 | 
            +
                  table[char %  12] = ""  # form feed
         | 
| 124 | 
            +
                  table[char %  14] = ""  # shift out
         | 
| 125 | 
            +
                  table[char %  15] = ""  # shift in
         | 
| 126 | 
            +
                  table[char %  16] = ""  # data link escape
         | 
| 127 | 
            +
                  table[char %  17] = ""  # device control 1
         | 
| 128 | 
            +
                  table[char %  18] = ""  # device control 2
         | 
| 129 | 
            +
                  table[char %  19] = ""  # device control 3
         | 
| 130 | 
            +
                  table[char %  20] = ""  # device control 4
         | 
| 131 | 
            +
                  table[char %  21] = ""  # negative acknowledgement
         | 
| 132 | 
            +
                  table[char %  22] = ""  # synchronous idle
         | 
| 133 | 
            +
                  table[char %  23] = ""  # end of transmission block
         | 
| 134 | 
            +
                  table[char %  24] = ""  # cancel
         | 
| 135 | 
            +
                  table[char %  25] = ""  # end of medium
         | 
| 136 | 
            +
                  table[char %  26] = ""  # substitute
         | 
| 137 | 
            +
                  table[char %  27] = ""  # escape
         | 
| 138 | 
            +
                  table[char %  28] = ""  # file separator
         | 
| 139 | 
            +
                  table[char %  29] = ""  # group separator
         | 
| 140 | 
            +
                  table[char %  30] = ""  # record separator
         | 
| 141 | 
            +
                  table[char %  31] = ""  # unit separator
         | 
| 142 | 
            +
                  table[char % 127] = ""  # delete
         | 
| 143 | 
            +
                end
         | 
| 144 | 
            +
                if options[:translit_symbols]
         | 
| 145 | 
            +
                  table["$"]        = " dollars "              # dec = 36, dollar sign
         | 
| 146 | 
            +
                  table["%"]        = " percent "              # dec = 37, percent sign
         | 
| 147 | 
            +
                  table["&"]        = " and "                  # dec = 38, ampersand
         | 
| 148 | 
            +
                  table["@"]        = " at "                   # dec = 64, at symbol
         | 
| 149 | 
            +
                  table[char % 128] = " euros "                # windows euro
         | 
| 150 | 
            +
                  table["¢"]        = " cents "                # dec = 162, cent sign
         | 
| 151 | 
            +
                  table["£"]        = " pounds "               # dec = 163, pound sign
         | 
| 152 | 
            +
                  table["¤"]        = " euros "                # dec = 164, currency sign
         | 
| 153 | 
            +
                  table["¥"]        = " yens "                 # dec = 165, yen sign
         | 
| 154 | 
            +
                  table["§"]        = " section "              # dec = 167, section sign
         | 
| 155 | 
            +
                  table["©"]        = " copyright "            # dec = 169, copyright sign
         | 
| 156 | 
            +
                  table["®"]        = " registered trademark " # dec = 174, registered trade mark sign
         | 
| 157 | 
            +
                  table["°"]        = " degrees "              # dec = 176, degree sign
         | 
| 158 | 
            +
                  table["±"]        = " approx "               # dec = 177, plus-or-minus sign
         | 
| 159 | 
            +
                  table["µ"]        = " micro "                # dec = 181, micro sign
         | 
| 160 | 
            +
                  table["¶"]        = " paragraph "            # dec = 182, pilcrow sign - paragraph sign
         | 
| 161 | 
            +
                  table["¼"]        = " 1/4 "                  # dec = 188, fraction one quarter
         | 
| 162 | 
            +
                  table["½"]        = " 1/2 "                  # dec = 189, fraction one half
         | 
| 163 | 
            +
                  table["¾"]        = " 3/4 "                  # dec = 190, fraction three quarters
         | 
| 164 | 
            +
                  table["€"]        = " euros "                # hex = 20AC, unicode euro
         | 
| 165 | 
            +
                  table["™"]        = " trademark "            # hex = 2122, trade mark
         | 
| 166 | 
            +
                end
         | 
| 167 | 
            +
                table
         | 
| 54 168 | 
             
              end
         | 
| 55 169 |  | 
| 56 170 | 
             
            end
         | 
    
        data/spec/spec_helper.rb
    CHANGED
    
    | @@ -1,3 +1,2 @@ | |
| 1 | 
            -
            require  | 
| 2 | 
            -
            require  | 
| 3 | 
            -
            require File.join(File.dirname(__FILE__), "..", "lib", "string_cleaner")
         | 
| 1 | 
            +
            require 'bundler/setup'
         | 
| 2 | 
            +
            require 'string_cleaner'
         | 
    
        data/spec/string_cleaner_spec.rb
    CHANGED
    
    | @@ -19,6 +19,23 @@ describe String::Cleaner do | |
| 19 19 | 
             
                    @output.should == "          \n  \n                   !\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ €                                ¡¢£€¥Š§š©ª«¬ ®¯°±²³Žµ¶·ž¹º»ŒœŸ¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
         | 
| 20 20 | 
             
                  end
         | 
| 21 21 | 
             
                end
         | 
| 22 | 
            +
                it "should convert all type of spaces to normal spaces" do
         | 
| 23 | 
            +
                  input = [
         | 
| 24 | 
            +
                            (0x0009..0x000D).to_a, # White_Space # Cc   [5] <control-0009>..<control-000D>
         | 
| 25 | 
            +
                            0x0020,                # White_Space # Zs       SPACE
         | 
| 26 | 
            +
                            0x0085,                # White_Space # Cc       <control-0085>
         | 
| 27 | 
            +
                            0x00A0,                # White_Space # Zs       NO-BREAK SPACE
         | 
| 28 | 
            +
                            0x1680,                # White_Space # Zs       OGHAM SPACE MARK
         | 
| 29 | 
            +
                            0x180E,                # White_Space # Zs       MONGOLIAN VOWEL SEPARATOR
         | 
| 30 | 
            +
                            (0x2000..0x200A).to_a, # White_Space # Zs  [11] EN QUAD..HAIR SPACE
         | 
| 31 | 
            +
                            0x2028,                # White_Space # Zl       LINE SEPARATOR
         | 
| 32 | 
            +
                            0x2029,                # White_Space # Zp       PARAGRAPH SEPARATOR
         | 
| 33 | 
            +
                            0x202F,                # White_Space # Zs       NARROW NO-BREAK SPACE
         | 
| 34 | 
            +
                            0x205F,                # White_Space # Zs       MEDIUM MATHEMATICAL SPACE
         | 
| 35 | 
            +
                            0x3000,                # White_Space # Zs       IDEOGRAPHIC SPACE
         | 
| 36 | 
            +
                          ].flatten.collect{ |e| [e].pack 'U*' }
         | 
| 37 | 
            +
                  input.join.clean.should == " \n  \n                     "
         | 
| 38 | 
            +
                end
         | 
| 22 39 | 
             
                describe "with invalid UTF-8 sequence" do
         | 
| 23 40 | 
             
                  before :all do
         | 
| 24 41 | 
             
                    @input = "\210\004"
         | 
| @@ -201,4 +218,54 @@ describe String::Cleaner do | |
| 201 218 | 
             
                  end
         | 
| 202 219 | 
             
                end
         | 
| 203 220 | 
             
              end
         | 
| 221 | 
            +
              describe "#trim(chars = \"\")" do
         | 
| 222 | 
            +
                it "should use #strip when used without params" do
         | 
| 223 | 
            +
                  string, expected = "", mock
         | 
| 224 | 
            +
                  string.stub(:strip).and_return expected
         | 
| 225 | 
            +
                  string.trim.should be expected
         | 
| 226 | 
            +
                end
         | 
| 227 | 
            +
                it "should remove multiple characters at once from beginning and end" do
         | 
| 228 | 
            +
                  prefix, suffix = " rhuif dww f f", "dqz  qafdédsj iowe fcms. qpo asttt t dtt"
         | 
| 229 | 
            +
                  to_remove = "acdeéfhijmopqrstuwz "
         | 
| 230 | 
            +
                  "#{prefix}d#{suffix}".trim(to_remove).should eql "."
         | 
| 231 | 
            +
                  "#{prefix}D#{suffix}".trim(to_remove).should eql "Ddqz  qafdédsj iowe fcms."
         | 
| 232 | 
            +
                end
         | 
| 233 | 
            +
              end
         | 
| 234 | 
            +
              describe "#fix_endlines" do
         | 
| 235 | 
            +
                it "should convert windows endlines" do
         | 
| 236 | 
            +
                  "this is a\r\ntest\r\n".fix_endlines.should eql "this is a\ntest\n"
         | 
| 237 | 
            +
                end
         | 
| 238 | 
            +
                it "should convert old mac endlines" do
         | 
| 239 | 
            +
                  "this is a\rtest\r".fix_endlines.should eql "this is a\ntest\n"
         | 
| 240 | 
            +
                end
         | 
| 241 | 
            +
                it "should not modify proper linux endlines" do
         | 
| 242 | 
            +
                  "this is a\ntest\n".fix_endlines.should eql "this is a\ntest\n"
         | 
| 243 | 
            +
                end
         | 
| 244 | 
            +
                it "should convert mixed endlines" do
         | 
| 245 | 
            +
                  "this is a\n\rtest\r\n".fix_endlines.should eql "this is a\n\ntest\n"
         | 
| 246 | 
            +
                end
         | 
| 247 | 
            +
              end
         | 
| 248 | 
            +
              describe "#to_permalink(separator=\"-\")" do
         | 
| 249 | 
            +
                it "should create nice permalink for string with many accents" do
         | 
| 250 | 
            +
                  crazy = "  ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüý - Hello world, I'm a crazy string!! "
         | 
| 251 | 
            +
                  crazy.to_permalink.should == "aaaaaaceeeeiiiidnoooooxouuuuyaaaaaaceeeeiiiinoooooouuuuy-hello-world-i-m-a-crazy-string"
         | 
| 252 | 
            +
                end
         | 
| 253 | 
            +
                it "should create nice permalink even for evil string" do
         | 
| 254 | 
            +
                  evil = (128..255).inject(""){ |acc, b| acc += ("%c" % b) }
         | 
| 255 | 
            +
                  evil.to_permalink.should == "euros-cents-pounds-euros-yens-section-copyright-registered-trademark-degrees-approx-23-micro-paragraph-10-1-4-1-2-3-4-aaaaaaaeceeeeiiiidnoooooxouuuuythssaaaaaaaeceeeeiiiidnooooo-ouuuuythy"
         | 
| 256 | 
            +
                end
         | 
| 257 | 
            +
                it "should remove endlines too" do
         | 
| 258 | 
            +
                  "this\nis\ta\ntest".to_permalink("_").should eql "this_is_a_test"
         | 
| 259 | 
            +
                end
         | 
| 260 | 
            +
              end
         | 
| 261 | 
            +
              describe "#nl2br" do
         | 
| 262 | 
            +
                it "should convert \n to <br/>\n" do
         | 
| 263 | 
            +
                  "this\nis\ta\ntest\r".nl2br.should eql "this<br/>\nis\ta<br/>\ntest\r"
         | 
| 264 | 
            +
                end
         | 
| 265 | 
            +
              end
         | 
| 266 | 
            +
              describe "#to_nicer_sym" do
         | 
| 267 | 
            +
                it "should convert \"Select or Other\" to :select_or_other" do
         | 
| 268 | 
            +
                  "Select or Other".to_nicer_sym.should be :select_or_other
         | 
| 269 | 
            +
                end
         | 
| 270 | 
            +
              end
         | 
| 204 271 | 
             
            end
         | 
    
        data/string_cleaner.gemspec
    CHANGED
    
    | @@ -2,11 +2,11 @@ | |
| 2 2 |  | 
| 3 3 | 
             
            Gem::Specification.new do |s|
         | 
| 4 4 | 
             
              s.name = %q{string_cleaner}
         | 
| 5 | 
            -
              s.version = "0. | 
| 5 | 
            +
              s.version = "0.2.0"
         | 
| 6 6 |  | 
| 7 7 | 
             
              s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
         | 
| 8 8 | 
             
              s.authors = ["Joseph Halter"]
         | 
| 9 | 
            -
              s.date = %q{ | 
| 9 | 
            +
              s.date = %q{2010-10-18}
         | 
| 10 10 | 
             
              s.email = %q{joseph@openhood.com}
         | 
| 11 11 | 
             
              s.extra_rdoc_files = [
         | 
| 12 12 | 
             
                "LICENSE",
         | 
| @@ -17,7 +17,6 @@ Gem::Specification.new do |s| | |
| 17 17 | 
             
                 "LICENSE",
         | 
| 18 18 | 
             
                 "README.rdoc",
         | 
| 19 19 | 
             
                 "Rakefile",
         | 
| 20 | 
            -
                 "VERSION",
         | 
| 21 20 | 
             
                 "lib/string_cleaner.rb",
         | 
| 22 21 | 
             
                 "spec/spec_helper.rb",
         | 
| 23 22 | 
             
                 "spec/string_cleaner_spec.rb",
         | 
| @@ -33,6 +32,8 @@ Gem::Specification.new do |s| | |
| 33 32 | 
             
                "spec/spec_helper.rb",
         | 
| 34 33 | 
             
                 "spec/string_cleaner_spec.rb"
         | 
| 35 34 | 
             
              ]
         | 
| 35 | 
            +
              s.add_runtime_dependency "unidecoder"
         | 
| 36 | 
            +
              s.add_development_dependency "rspec"
         | 
| 36 37 |  | 
| 37 38 | 
             
              if s.respond_to? :specification_version then
         | 
| 38 39 | 
             
                current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
         | 
    
        metadata
    CHANGED
    
    | @@ -1,13 +1,12 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification 
         | 
| 2 2 | 
             
            name: string_cleaner
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version 
         | 
| 4 | 
            -
              hash: 27
         | 
| 5 4 | 
             
              prerelease: false
         | 
| 6 5 | 
             
              segments: 
         | 
| 7 6 | 
             
              - 0
         | 
| 8 | 
            -
              -  | 
| 7 | 
            +
              - 2
         | 
| 9 8 | 
             
              - 0
         | 
| 10 | 
            -
              version: 0. | 
| 9 | 
            +
              version: 0.2.0
         | 
| 11 10 | 
             
            platform: ruby
         | 
| 12 11 | 
             
            authors: 
         | 
| 13 12 | 
             
            - Joseph Halter
         | 
| @@ -15,10 +14,35 @@ autorequire: | |
| 15 14 | 
             
            bindir: bin
         | 
| 16 15 | 
             
            cert_chain: []
         | 
| 17 16 |  | 
| 18 | 
            -
            date:  | 
| 17 | 
            +
            date: 2010-10-18 00:00:00 +02:00
         | 
| 19 18 | 
             
            default_executable: 
         | 
| 20 | 
            -
            dependencies:  | 
| 21 | 
            -
             | 
| 19 | 
            +
            dependencies: 
         | 
| 20 | 
            +
            - !ruby/object:Gem::Dependency 
         | 
| 21 | 
            +
              name: unidecoder
         | 
| 22 | 
            +
              prerelease: false
         | 
| 23 | 
            +
              requirement: &id001 !ruby/object:Gem::Requirement 
         | 
| 24 | 
            +
                none: false
         | 
| 25 | 
            +
                requirements: 
         | 
| 26 | 
            +
                - - ">="
         | 
| 27 | 
            +
                  - !ruby/object:Gem::Version 
         | 
| 28 | 
            +
                    segments: 
         | 
| 29 | 
            +
                    - 0
         | 
| 30 | 
            +
                    version: "0"
         | 
| 31 | 
            +
              type: :runtime
         | 
| 32 | 
            +
              version_requirements: *id001
         | 
| 33 | 
            +
            - !ruby/object:Gem::Dependency 
         | 
| 34 | 
            +
              name: rspec
         | 
| 35 | 
            +
              prerelease: false
         | 
| 36 | 
            +
              requirement: &id002 !ruby/object:Gem::Requirement 
         | 
| 37 | 
            +
                none: false
         | 
| 38 | 
            +
                requirements: 
         | 
| 39 | 
            +
                - - ">="
         | 
| 40 | 
            +
                  - !ruby/object:Gem::Version 
         | 
| 41 | 
            +
                    segments: 
         | 
| 42 | 
            +
                    - 0
         | 
| 43 | 
            +
                    version: "0"
         | 
| 44 | 
            +
              type: :development
         | 
| 45 | 
            +
              version_requirements: *id002
         | 
| 22 46 | 
             
            description: 
         | 
| 23 47 | 
             
            email: joseph@openhood.com
         | 
| 24 48 | 
             
            executables: []
         | 
| @@ -33,7 +57,6 @@ files: | |
| 33 57 | 
             
            - LICENSE
         | 
| 34 58 | 
             
            - README.rdoc
         | 
| 35 59 | 
             
            - Rakefile
         | 
| 36 | 
            -
            - VERSION
         | 
| 37 60 | 
             
            - lib/string_cleaner.rb
         | 
| 38 61 | 
             
            - spec/spec_helper.rb
         | 
| 39 62 | 
             
            - spec/string_cleaner_spec.rb
         | 
| @@ -52,7 +75,6 @@ required_ruby_version: !ruby/object:Gem::Requirement | |
| 52 75 | 
             
              requirements: 
         | 
| 53 76 | 
             
              - - ">="
         | 
| 54 77 | 
             
                - !ruby/object:Gem::Version 
         | 
| 55 | 
            -
                  hash: 3
         | 
| 56 78 | 
             
                  segments: 
         | 
| 57 79 | 
             
                  - 0
         | 
| 58 80 | 
             
                  version: "0"
         | 
| @@ -61,7 +83,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 61 83 | 
             
              requirements: 
         | 
| 62 84 | 
             
              - - ">="
         | 
| 63 85 | 
             
                - !ruby/object:Gem::Version 
         | 
| 64 | 
            -
                  hash: 3
         | 
| 65 86 | 
             
                  segments: 
         | 
| 66 87 | 
             
                  - 0
         | 
| 67 88 | 
             
                  version: "0"
         | 
    
        data/VERSION
    DELETED
    
    | @@ -1 +0,0 @@ | |
| 1 | 
            -
            0.1.0
         |