corefines 1.3.0 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.adoc +6 -0
- data/README.adoc +4 -2
- data/lib/corefines/string.rb +51 -0
- data/lib/corefines/version.rb +1 -1
- data/spec/string/force_utf8_spec.rb +68 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 85b1a470338e303705ff7caf0c4cfb7dd36c4cb6
|
4
|
+
data.tar.gz: 5fbaac4aa91cd5b8d36ed2889bb1296deb5d2b6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3f2dc602df43dcfdcdadc87d9bcd551f59a9e9836fe0ded9609dfeb1ed1c33fbba0afeb09dea1259675769e959e23fc80f8f65200736a3f3a470a8b8b47832d5
|
7
|
+
data.tar.gz: ba274790cd0239df3761c3d231d92e9acf0d6158e96889d4b7016349e0c4ac242194c10532391f4ff11999bf5d7d23d3f42087bee4a6cc10e56d3e1ad621e7ea
|
data/CHANGELOG.adoc
CHANGED
@@ -3,6 +3,12 @@
|
|
3
3
|
:doc-base-url: http://www.rubydoc.info/github/jirutka/corefines/Corefines
|
4
4
|
:issue-uri: {repo-uri}/issues
|
5
5
|
|
6
|
+
|
7
|
+
== 1.4.0 (2015-05-03)
|
8
|
+
|
9
|
+
* Add new refinement {doc-base-url}/String/ForceUTF8[String#force_utf8].
|
10
|
+
|
11
|
+
|
6
12
|
== 1.3.0 (2015-04-29)
|
7
13
|
|
8
14
|
* Add new refinement {doc-base-url}/String/ToRegexp[String#to_regexp].
|
data/README.adoc
CHANGED
@@ -41,12 +41,12 @@ TODO
|
|
41
41
|
Add this line to your application’s Gemfile:
|
42
42
|
|
43
43
|
[source]
|
44
|
-
gem 'corefines', '~> 1.
|
44
|
+
gem 'corefines', '~> 1.4'
|
45
45
|
|
46
46
|
or to your gemspec:
|
47
47
|
|
48
48
|
[source]
|
49
|
-
s.add_runtime_dependency 'corefines', '~> 1.
|
49
|
+
s.add_runtime_dependency 'corefines', '~> 1.4'
|
50
50
|
|
51
51
|
and then execute:
|
52
52
|
|
@@ -165,6 +165,8 @@ Not ideal indeed, but probably the best of what we can achieve.
|
|
165
165
|
** {doc-base-url}/String/Color[#color]
|
166
166
|
** {doc-base-url}/String/Concat[#concat!]
|
167
167
|
** {doc-base-url}/String/Decolor[#decolor]
|
168
|
+
** {doc-base-url}/String/ForceUTF8[#force_utf8]
|
169
|
+
** {doc-base-url}/String/ForceUTF8[#force_utf8!]
|
168
170
|
** {doc-base-url}/String/Indent[#indent]
|
169
171
|
** {doc-base-url}/String/RelativePathFrom[#relative_path_from]
|
170
172
|
** {doc-base-url}/String/Remove[#remove]
|
data/lib/corefines/string.rb
CHANGED
@@ -127,6 +127,56 @@ module Corefines
|
|
127
127
|
end
|
128
128
|
end
|
129
129
|
|
130
|
+
##
|
131
|
+
# @!method force_utf8
|
132
|
+
# Returns a copy of _str_ with encoding changed to UTF-8 and all invalid
|
133
|
+
# byte sequences replaced with the Unicode Replacement Character (U+FFFD).
|
134
|
+
#
|
135
|
+
# If _str_ responds to +#scrub!+ (Ruby >=2.1), then it's used for
|
136
|
+
# replacing invalid bytes. Otherwise a simple custom implementation is
|
137
|
+
# used (may not return the same result as +#scrub!+).
|
138
|
+
#
|
139
|
+
# @return [String] a valid UTF-8 string.
|
140
|
+
#
|
141
|
+
# @!method force_utf8!
|
142
|
+
# Changes the encoding to UTF-8, replaces all invalid byte sequences with
|
143
|
+
# the Unicode Replacement Character (U+FFFD) and returns self.
|
144
|
+
# This is same as {#force_utf8}, except it indents the receiver in-place.
|
145
|
+
#
|
146
|
+
# @return (see #force_utf8)
|
147
|
+
#
|
148
|
+
module ForceUTF8
|
149
|
+
refine ::String do
|
150
|
+
def force_utf8
|
151
|
+
dup.force_utf8!
|
152
|
+
end
|
153
|
+
|
154
|
+
def force_utf8!
|
155
|
+
str = force_encoding(Encoding::UTF_8)
|
156
|
+
|
157
|
+
if str.respond_to? :scrub!
|
158
|
+
str.scrub!
|
159
|
+
|
160
|
+
else
|
161
|
+
result = ''.force_encoding('BINARY')
|
162
|
+
invalid = false
|
163
|
+
|
164
|
+
str.chars.each do |c|
|
165
|
+
if c.valid_encoding?
|
166
|
+
result << c
|
167
|
+
invalid = false
|
168
|
+
elsif !invalid
|
169
|
+
result << "\uFFFD"
|
170
|
+
invalid = true
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
replace result.force_encoding(Encoding::UTF_8)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
130
180
|
##
|
131
181
|
# @!method indent(amount, indent_str = nil, indent_empty_lines = false)
|
132
182
|
# Returns an indented copy of this string.
|
@@ -365,6 +415,7 @@ module Corefines
|
|
365
415
|
|
366
416
|
class << self
|
367
417
|
alias_method :concat!, :concat
|
418
|
+
alias_method :force_utf8!, :force_utf8
|
368
419
|
alias_method :indent!, :indent
|
369
420
|
alias_method :remove!, :remove
|
370
421
|
end
|
data/lib/corefines/version.rb
CHANGED
@@ -0,0 +1,68 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
describe String do
|
4
|
+
using Corefines::String::force_utf8
|
5
|
+
|
6
|
+
describe '#force_utf8!' do
|
7
|
+
|
8
|
+
context "string encoded in ISO-8859-1" do
|
9
|
+
subject(:str) { 'foo'.encode(Encoding::ISO_8859_1) }
|
10
|
+
|
11
|
+
it "sets encoding to UTF-8" do
|
12
|
+
expect( str.force_utf8!.encoding ).to eql Encoding::UTF_8
|
13
|
+
expect( str.encoding ).to eql Encoding::UTF_8
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
context "string with bad bytes" do
|
18
|
+
{
|
19
|
+
"\xE3llons-y!" => "�llons-y!",
|
20
|
+
"Al\xE9ons-y!" => "Al�ons-y!",
|
21
|
+
"Al\xE9ons-y\x80" => "Al�ons-y�"
|
22
|
+
}
|
23
|
+
.each do |input, expected|
|
24
|
+
context input.inspect do
|
25
|
+
|
26
|
+
subject(:str) { input.dup } # defreeze!
|
27
|
+
|
28
|
+
it "replaces bad bytes with the replacement char" do
|
29
|
+
expect( str.force_utf8! ).to eq expected
|
30
|
+
expect( str ).to eq expected
|
31
|
+
end
|
32
|
+
|
33
|
+
it "produces a valid UTF-8 string" do
|
34
|
+
str.force_utf8!
|
35
|
+
expect( str.valid_encoding? ).to be true
|
36
|
+
expect( str.encoding ).to eql Encoding::UTF_8
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
it "collapses multiple consecutive bad bytes into one replacement" do
|
42
|
+
str = "abc\u3042\xE3\x80"
|
43
|
+
expect( str.force_utf8! ).to eq "abc\u3042�"
|
44
|
+
expect( str ).to eq "abc\u3042�"
|
45
|
+
expect( str.valid_encoding? ).to be true
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
# force_utf8 just duplicates the string and calls force_utf8!, so there's no
|
52
|
+
# need to test it thoroughly.
|
53
|
+
describe '#force_utf8' do
|
54
|
+
|
55
|
+
it "replaces bad bytes with the replacement char" do
|
56
|
+
expect( "Al\xE9ons-y\x80".force_utf8 ).to eq "Al�ons-y�"
|
57
|
+
end
|
58
|
+
|
59
|
+
it "returns a copy of the string and keeps original unaffected" do
|
60
|
+
str = '\xE3llons-y!'.encode(Encoding::ISO_8859_1)
|
61
|
+
copy = str.dup
|
62
|
+
str.force_utf8
|
63
|
+
|
64
|
+
expect( str ).to eql copy
|
65
|
+
expect( str.encoding ).to eql Encoding::ISO_8859_1
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: corefines
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jakub Jirutka
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: asciidoctor
|
@@ -144,6 +144,7 @@ files:
|
|
144
144
|
- spec/string/color_spec.rb
|
145
145
|
- spec/string/concat_spec.rb
|
146
146
|
- spec/string/decolor_spec.rb
|
147
|
+
- spec/string/force_utf8_spec.rb
|
147
148
|
- spec/string/indent_spec.rb
|
148
149
|
- spec/string/relative_path_from_spec.rb
|
149
150
|
- spec/string/remove_spec.rb
|