ensure_valid_encoding 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +6 -7
- data/lib/ensure_valid_encoding.rb +8 -7
- data/lib/ensure_valid_encoding/version.rb +1 -1
- data/test/ensure_valid_encoding_test.rb +6 -0
- metadata +2 -2
data/README.md
CHANGED
@@ -63,8 +63,8 @@ fixed = ensure_valid_encoding(bad_str)
|
|
63
63
|
## Rationale
|
64
64
|
|
65
65
|
You are taking textual input from some external source. Could be user input,
|
66
|
-
could be a user-uploaded file of some kind, could be
|
67
|
-
be anything.
|
66
|
+
could be a user-uploaded file of some kind, could be anchient usenet archives,
|
67
|
+
could be a a third party API response or web scrape, could be anything.
|
68
68
|
|
69
69
|
You know what character encoding the textual data _claims_ to be, what it
|
70
70
|
_should_ be, and what it _usually_ is. But occasionally it may have bad bytes
|
@@ -99,11 +99,10 @@ surprisingly tricky to do with the ruby 1.9.3 stdlib, or even with 'iconv'.
|
|
99
99
|
So there you go, now you can do it with this gem. Maybe not as performant
|
100
100
|
as if it were implemented in C like stdlib char encoding routines, but, hey.
|
101
101
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
ruby core team also thinks it's an unusually odd thing to do. shrug.
|
102
|
+
|
103
|
+
**Note:** I have [filed a feature request](https://bugs.ruby-lang.org/issues/6321) for ruby stdlib. Developer from
|
104
|
+
ruby core team also thinks it's an unusually odd thing to do. Most of rubydom seems to agree. Ce la vie. Me and
|
105
|
+
many of my colleagues need to do this all the time, but if you don't, don't do it.
|
107
106
|
|
108
107
|
## Developing/Contributing
|
109
108
|
|
@@ -29,7 +29,7 @@ module EnsureValidEncoding
|
|
29
29
|
# ensure_valid_encoding( some_string, :invalid => :replace)
|
30
30
|
# ensure_valid_encoding( some_string, :invalid => :replace, :replace => '')
|
31
31
|
# ensure_valid_encoding( some_string, :invalid => :replace, :replace => "*")
|
32
|
-
|
32
|
+
def self.ensure_valid_encoding(str, options = {})
|
33
33
|
# Can do nothing in ruby 1.8.x
|
34
34
|
return str unless str.respond_to?(:encoding)
|
35
35
|
|
@@ -55,12 +55,13 @@ module EnsureValidEncoding
|
|
55
55
|
c
|
56
56
|
else
|
57
57
|
options[:replace] || (
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
58
|
+
# UTF-8 for unicode replacement char, encode in
|
59
|
+
# encoding of input string, using '?' as a fallback where
|
60
|
+
# it can't be (which should be non-unicode encodings)
|
61
|
+
"\uFFFD".force_encoding("UTF-8").encode( str.encoding,
|
62
|
+
:undef => :replace,
|
63
|
+
:replace => '?' )
|
64
|
+
)
|
64
65
|
end
|
65
66
|
end.join
|
66
67
|
end
|
@@ -6,6 +6,7 @@ require 'ensure_valid_encoding'
|
|
6
6
|
describe EnsureValidEncoding do
|
7
7
|
before do
|
8
8
|
@bad_bytes_utf8 = "M\xE9xico".force_encoding("UTF-8")
|
9
|
+
@bad_bytes_utf16 = "M\x00\xDFxico".force_encoding( Encoding::UTF_16LE )
|
9
10
|
@bad_bytes_ascii = "M\xA1xico".force_encoding("ASCII")
|
10
11
|
end
|
11
12
|
|
@@ -20,6 +21,11 @@ describe EnsureValidEncoding do
|
|
20
21
|
:invalid => :replace).
|
21
22
|
must_equal("M\uFFFDxico")
|
22
23
|
end
|
24
|
+
|
25
|
+
it "replaces with unicode relacement string for UTF16LE" do
|
26
|
+
assert EnsureValidEncoding.ensure_valid_encoding(@bad_bytes_utf16,
|
27
|
+
:invalid => :replace).valid_encoding?
|
28
|
+
end
|
23
29
|
|
24
30
|
it "replaces with chosen replacement string" do
|
25
31
|
EnsureValidEncoding.ensure_valid_encoding(@bad_bytes_utf8,
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ensure_valid_encoding
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-07-
|
12
|
+
date: 2012-07-13 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description:
|
15
15
|
email:
|