string_cleaner 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -1
- data/README.rdoc +7 -1
- data/Rakefile +2 -0
- data/lib/string_cleaner.rb +3 -4
- data/spec/string_cleaner_spec.rb +1 -1
- data/string_cleaner.gemspec +1 -1
- metadata +2 -2
data/.gitignore
CHANGED
data/README.rdoc
CHANGED
|
@@ -18,9 +18,15 @@ Ruby 1.9+ has native support for unicode and specs are 100% passing.
|
|
|
18
18
|
|
|
19
19
|
Because Ruby 1.8.x has no native support for Unicode, you must install oniguruma and the jasherai-oniguruma gem.
|
|
20
20
|
|
|
21
|
+
For example, using homebrew you would do:
|
|
22
|
+
|
|
23
|
+
brew install oniguruma
|
|
24
|
+
bundle config build.jasherai-oniguruma --with-onig-dir=`brew --prefix oniguruma`
|
|
25
|
+
bundle install
|
|
26
|
+
|
|
21
27
|
== Example usage
|
|
22
28
|
|
|
23
|
-
|
|
29
|
+
"\210\004".clean # => " "
|
|
24
30
|
|
|
25
31
|
== Copyright
|
|
26
32
|
|
data/Rakefile
CHANGED
data/lib/string_cleaner.rb
CHANGED
|
@@ -14,9 +14,8 @@ module String::Cleaner
|
|
|
14
14
|
unless utf8.valid_encoding? # if invalid UTF-8
|
|
15
15
|
utf8 = utf8.force_encoding("ISO8859-15")
|
|
16
16
|
utf8.encode!("UTF-8", :invalid => :replace, :undef => :replace, :replace => "")
|
|
17
|
-
utf8.gsub!("\xC2\x80", "€") # special case for euro sign from Windows-1252
|
|
18
|
-
utf8.force_encoding("UTF-8")
|
|
19
17
|
end
|
|
18
|
+
utf8.gsub!("\u0080", "€") # special case for euro sign from Windows-1252
|
|
20
19
|
utf8
|
|
21
20
|
else
|
|
22
21
|
require "iconv"
|
|
@@ -25,7 +24,7 @@ module String::Cleaner
|
|
|
25
24
|
Iconv.new("UTF-8", "UTF-8").iconv(utf8)
|
|
26
25
|
rescue
|
|
27
26
|
utf8.gsub!(/\x80/n, "\xA4")
|
|
28
|
-
Iconv.new("UTF-8//IGNORE", "ISO8859-
|
|
27
|
+
Iconv.new("UTF-8//IGNORE", "ISO8859-1").iconv(utf8).gsub("¤", "€")
|
|
29
28
|
end
|
|
30
29
|
end
|
|
31
30
|
end
|
|
@@ -67,7 +66,7 @@ module String::Cleaner
|
|
|
67
66
|
end
|
|
68
67
|
|
|
69
68
|
def to_permalink(separator="-")
|
|
70
|
-
|
|
69
|
+
clean.to_ascii(chartable).downcase.gsub(/[^a-z0-9]+/, separator).trim(separator)
|
|
71
70
|
end
|
|
72
71
|
|
|
73
72
|
def nl2br
|
data/spec/string_cleaner_spec.rb
CHANGED
|
@@ -141,7 +141,7 @@ describe String::Cleaner do
|
|
|
141
141
|
@output = @input.clean
|
|
142
142
|
end
|
|
143
143
|
it "should wipe out the control characters" do
|
|
144
|
-
@output.should == " \n \n !\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ €
|
|
144
|
+
@output.should == " \n \n !\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ € ¡¢£€¥¦§¨©ª«¬ ®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
|
|
145
145
|
end
|
|
146
146
|
end
|
|
147
147
|
describe "with invalid UTF-8 sequence" do
|
data/string_cleaner.gemspec
CHANGED