ensure_valid_encoding 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +6 -7
- data/lib/ensure_valid_encoding.rb +8 -7
- data/lib/ensure_valid_encoding/version.rb +1 -1
- data/test/ensure_valid_encoding_test.rb +6 -0
- metadata +2 -2
data/README.md
CHANGED
@@ -63,8 +63,8 @@ fixed = ensure_valid_encoding(bad_str)
|
|
63
63
|
## Rationale
|
64
64
|
|
65
65
|
You are taking textual input from some external source. Could be user input,
|
66
|
-
could be a user-uploaded file of some kind, could be
|
67
|
-
be anything.
|
66
|
+
could be a user-uploaded file of some kind, could be anchient usenet archives,
|
67
|
+
could be a a third party API response or web scrape, could be anything.
|
68
68
|
|
69
69
|
You know what character encoding the textual data _claims_ to be, what it
|
70
70
|
_should_ be, and what it _usually_ is. But occasionally it may have bad bytes
|
@@ -99,11 +99,10 @@ surprisingly tricky to do with the ruby 1.9.3 stdlib, or even with 'iconv'.
|
|
99
99
|
So there you go, now you can do it with this gem. Maybe not as performant
|
100
100
|
as if it were implemented in C like stdlib char encoding routines, but, hey.
|
101
101
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
ruby core team also thinks it's an unusually odd thing to do. shrug.
|
102
|
+
|
103
|
+
**Note:** I have [filed a feature request](https://bugs.ruby-lang.org/issues/6321) for ruby stdlib. Developer from
|
104
|
+
ruby core team also thinks it's an unusually odd thing to do. Most of rubydom seems to agree. Ce la vie. Me and
|
105
|
+
many of my colleagues need to do this all the time, but if you don't, don't do it.
|
107
106
|
|
108
107
|
## Developing/Contributing
|
109
108
|
|
@@ -29,7 +29,7 @@ module EnsureValidEncoding
|
|
29
29
|
# ensure_valid_encoding( some_string, :invalid => :replace)
|
30
30
|
# ensure_valid_encoding( some_string, :invalid => :replace, :replace => '')
|
31
31
|
# ensure_valid_encoding( some_string, :invalid => :replace, :replace => "*")
|
32
|
-
|
32
|
+
def self.ensure_valid_encoding(str, options = {})
|
33
33
|
# Can do nothing in ruby 1.8.x
|
34
34
|
return str unless str.respond_to?(:encoding)
|
35
35
|
|
@@ -55,12 +55,13 @@ module EnsureValidEncoding
|
|
55
55
|
c
|
56
56
|
else
|
57
57
|
options[:replace] || (
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
58
|
+
# UTF-8 for unicode replacement char, encode in
|
59
|
+
# encoding of input string, using '?' as a fallback where
|
60
|
+
# it can't be (which should be non-unicode encodings)
|
61
|
+
"\uFFFD".force_encoding("UTF-8").encode( str.encoding,
|
62
|
+
:undef => :replace,
|
63
|
+
:replace => '?' )
|
64
|
+
)
|
64
65
|
end
|
65
66
|
end.join
|
66
67
|
end
|
@@ -6,6 +6,7 @@ require 'ensure_valid_encoding'
|
|
6
6
|
describe EnsureValidEncoding do
|
7
7
|
before do
|
8
8
|
@bad_bytes_utf8 = "M\xE9xico".force_encoding("UTF-8")
|
9
|
+
@bad_bytes_utf16 = "M\x00\xDFxico".force_encoding( Encoding::UTF_16LE )
|
9
10
|
@bad_bytes_ascii = "M\xA1xico".force_encoding("ASCII")
|
10
11
|
end
|
11
12
|
|
@@ -20,6 +21,11 @@ describe EnsureValidEncoding do
|
|
20
21
|
:invalid => :replace).
|
21
22
|
must_equal("M\uFFFDxico")
|
22
23
|
end
|
24
|
+
|
25
|
+
it "replaces with unicode relacement string for UTF16LE" do
|
26
|
+
assert EnsureValidEncoding.ensure_valid_encoding(@bad_bytes_utf16,
|
27
|
+
:invalid => :replace).valid_encoding?
|
28
|
+
end
|
23
29
|
|
24
30
|
it "replaces with chosen replacement string" do
|
25
31
|
EnsureValidEncoding.ensure_valid_encoding(@bad_bytes_utf8,
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ensure_valid_encoding
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-07-
|
12
|
+
date: 2012-07-13 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description:
|
15
15
|
email:
|