string_cleaner 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -1
- data/README.rdoc +7 -1
- data/Rakefile +2 -0
- data/lib/string_cleaner.rb +3 -4
- data/spec/string_cleaner_spec.rb +1 -1
- data/string_cleaner.gemspec +1 -1
- metadata +2 -2
data/.gitignore
CHANGED
data/README.rdoc
CHANGED
@@ -18,9 +18,15 @@ Ruby 1.9+ has native support for unicode and specs are 100% passing.
|
|
18
18
|
|
19
19
|
Because Ruby 1.8.x has no native support for Unicode, you must install oniguruma and the jasherai-oniguruma gem.
|
20
20
|
|
21
|
+
For example, using homebrew you would do:
|
22
|
+
|
23
|
+
brew install oniguruma
|
24
|
+
bundle config build.jasherai-oniguruma --with-onig-dir=`brew --prefix oniguruma`
|
25
|
+
bundle install
|
26
|
+
|
21
27
|
== Example usage
|
22
28
|
|
23
|
-
|
29
|
+
"\210\004".clean # => " "
|
24
30
|
|
25
31
|
== Copyright
|
26
32
|
|
data/Rakefile
CHANGED
data/lib/string_cleaner.rb
CHANGED
@@ -14,9 +14,8 @@ module String::Cleaner
|
|
14
14
|
unless utf8.valid_encoding? # if invalid UTF-8
|
15
15
|
utf8 = utf8.force_encoding("ISO8859-15")
|
16
16
|
utf8.encode!("UTF-8", :invalid => :replace, :undef => :replace, :replace => "")
|
17
|
-
utf8.gsub!("\xC2\x80", "€") # special case for euro sign from Windows-1252
|
18
|
-
utf8.force_encoding("UTF-8")
|
19
17
|
end
|
18
|
+
utf8.gsub!("\u0080", "€") # special case for euro sign from Windows-1252
|
20
19
|
utf8
|
21
20
|
else
|
22
21
|
require "iconv"
|
@@ -25,7 +24,7 @@ module String::Cleaner
|
|
25
24
|
Iconv.new("UTF-8", "UTF-8").iconv(utf8)
|
26
25
|
rescue
|
27
26
|
utf8.gsub!(/\x80/n, "\xA4")
|
28
|
-
Iconv.new("UTF-8//IGNORE", "ISO8859-
|
27
|
+
Iconv.new("UTF-8//IGNORE", "ISO8859-1").iconv(utf8).gsub("¤", "€")
|
29
28
|
end
|
30
29
|
end
|
31
30
|
end
|
@@ -67,7 +66,7 @@ module String::Cleaner
|
|
67
66
|
end
|
68
67
|
|
69
68
|
def to_permalink(separator="-")
|
70
|
-
|
69
|
+
clean.to_ascii(chartable).downcase.gsub(/[^a-z0-9]+/, separator).trim(separator)
|
71
70
|
end
|
72
71
|
|
73
72
|
def nl2br
|
data/spec/string_cleaner_spec.rb
CHANGED
@@ -141,7 +141,7 @@ describe String::Cleaner do
|
|
141
141
|
@output = @input.clean
|
142
142
|
end
|
143
143
|
it "should wipe out the control characters" do
|
144
|
-
@output.should == " \n \n !\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ €
|
144
|
+
@output.should == " \n \n !\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ € ¡¢£€¥¦§¨©ª«¬ ®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
|
145
145
|
end
|
146
146
|
end
|
147
147
|
describe "with invalid UTF-8 sequence" do
|
data/string_cleaner.gemspec
CHANGED