corefines 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.adoc +6 -0
- data/README.adoc +4 -2
- data/lib/corefines/string.rb +51 -0
- data/lib/corefines/version.rb +1 -1
- data/spec/string/force_utf8_spec.rb +68 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 85b1a470338e303705ff7caf0c4cfb7dd36c4cb6
|
4
|
+
data.tar.gz: 5fbaac4aa91cd5b8d36ed2889bb1296deb5d2b6f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3f2dc602df43dcfdcdadc87d9bcd551f59a9e9836fe0ded9609dfeb1ed1c33fbba0afeb09dea1259675769e959e23fc80f8f65200736a3f3a470a8b8b47832d5
|
7
|
+
data.tar.gz: ba274790cd0239df3761c3d231d92e9acf0d6158e96889d4b7016349e0c4ac242194c10532391f4ff11999bf5d7d23d3f42087bee4a6cc10e56d3e1ad621e7ea
|
data/CHANGELOG.adoc
CHANGED
@@ -3,6 +3,12 @@
|
|
3
3
|
:doc-base-url: http://www.rubydoc.info/github/jirutka/corefines/Corefines
|
4
4
|
:issue-uri: {repo-uri}/issues
|
5
5
|
|
6
|
+
|
7
|
+
== 1.4.0 (2015-05-03)
|
8
|
+
|
9
|
+
* Add new refinement {doc-base-url}/String/ForceUTF8[String#force_utf8].
|
10
|
+
|
11
|
+
|
6
12
|
== 1.3.0 (2015-04-29)
|
7
13
|
|
8
14
|
* Add new refinement {doc-base-url}/String/ToRegexp[String#to_regexp].
|
data/README.adoc
CHANGED
@@ -41,12 +41,12 @@ TODO
|
|
41
41
|
Add this line to your application’s Gemfile:
|
42
42
|
|
43
43
|
[source]
|
44
|
-
gem 'corefines', '~> 1.
|
44
|
+
gem 'corefines', '~> 1.4'
|
45
45
|
|
46
46
|
or to your gemspec:
|
47
47
|
|
48
48
|
[source]
|
49
|
-
s.add_runtime_dependency 'corefines', '~> 1.
|
49
|
+
s.add_runtime_dependency 'corefines', '~> 1.4'
|
50
50
|
|
51
51
|
and then execute:
|
52
52
|
|
@@ -165,6 +165,8 @@ Not ideal indeed, but probably the best of what we can achieve.
|
|
165
165
|
** {doc-base-url}/String/Color[#color]
|
166
166
|
** {doc-base-url}/String/Concat[#concat!]
|
167
167
|
** {doc-base-url}/String/Decolor[#decolor]
|
168
|
+
** {doc-base-url}/String/ForceUTF8[#force_utf8]
|
169
|
+
** {doc-base-url}/String/ForceUTF8[#force_utf8!]
|
168
170
|
** {doc-base-url}/String/Indent[#indent]
|
169
171
|
** {doc-base-url}/String/RelativePathFrom[#relative_path_from]
|
170
172
|
** {doc-base-url}/String/Remove[#remove]
|
data/lib/corefines/string.rb
CHANGED
@@ -127,6 +127,56 @@ module Corefines
|
|
127
127
|
end
|
128
128
|
end
|
129
129
|
|
130
|
+
##
|
131
|
+
# @!method force_utf8
|
132
|
+
# Returns a copy of _str_ with encoding changed to UTF-8 and all invalid
|
133
|
+
# byte sequences replaced with the Unicode Replacement Character (U+FFFD).
|
134
|
+
#
|
135
|
+
# If _str_ responds to +#scrub!+ (Ruby >=2.1), then it's used for
|
136
|
+
# replacing invalid bytes. Otherwise a simple custom implementation is
|
137
|
+
# used (may not return the same result as +#scrub!+).
|
138
|
+
#
|
139
|
+
# @return [String] a valid UTF-8 string.
|
140
|
+
#
|
141
|
+
# @!method force_utf8!
|
142
|
+
# Changes the encoding to UTF-8, replaces all invalid byte sequences with
|
143
|
+
# the Unicode Replacement Character (U+FFFD) and returns self.
|
144
|
+
# This is same as {#force_utf8}, except it indents the receiver in-place.
|
145
|
+
#
|
146
|
+
# @return (see #force_utf8)
|
147
|
+
#
|
148
|
+
module ForceUTF8
|
149
|
+
refine ::String do
|
150
|
+
def force_utf8
|
151
|
+
dup.force_utf8!
|
152
|
+
end
|
153
|
+
|
154
|
+
def force_utf8!
|
155
|
+
str = force_encoding(Encoding::UTF_8)
|
156
|
+
|
157
|
+
if str.respond_to? :scrub!
|
158
|
+
str.scrub!
|
159
|
+
|
160
|
+
else
|
161
|
+
result = ''.force_encoding('BINARY')
|
162
|
+
invalid = false
|
163
|
+
|
164
|
+
str.chars.each do |c|
|
165
|
+
if c.valid_encoding?
|
166
|
+
result << c
|
167
|
+
invalid = false
|
168
|
+
elsif !invalid
|
169
|
+
result << "\uFFFD"
|
170
|
+
invalid = true
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
replace result.force_encoding(Encoding::UTF_8)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
130
180
|
##
|
131
181
|
# @!method indent(amount, indent_str = nil, indent_empty_lines = false)
|
132
182
|
# Returns an indented copy of this string.
|
@@ -365,6 +415,7 @@ module Corefines
|
|
365
415
|
|
366
416
|
class << self
|
367
417
|
alias_method :concat!, :concat
|
418
|
+
alias_method :force_utf8!, :force_utf8
|
368
419
|
alias_method :indent!, :indent
|
369
420
|
alias_method :remove!, :remove
|
370
421
|
end
|
data/lib/corefines/version.rb
CHANGED
@@ -0,0 +1,68 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
describe String do
|
4
|
+
using Corefines::String::force_utf8
|
5
|
+
|
6
|
+
describe '#force_utf8!' do
|
7
|
+
|
8
|
+
context "string encoded in ISO-8859-1" do
|
9
|
+
subject(:str) { 'foo'.encode(Encoding::ISO_8859_1) }
|
10
|
+
|
11
|
+
it "sets encoding to UTF-8" do
|
12
|
+
expect( str.force_utf8!.encoding ).to eql Encoding::UTF_8
|
13
|
+
expect( str.encoding ).to eql Encoding::UTF_8
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
context "string with bad bytes" do
|
18
|
+
{
|
19
|
+
"\xE3llons-y!" => "�llons-y!",
|
20
|
+
"Al\xE9ons-y!" => "Al�ons-y!",
|
21
|
+
"Al\xE9ons-y\x80" => "Al�ons-y�"
|
22
|
+
}
|
23
|
+
.each do |input, expected|
|
24
|
+
context input.inspect do
|
25
|
+
|
26
|
+
subject(:str) { input.dup } # defreeze!
|
27
|
+
|
28
|
+
it "replaces bad bytes with the replacement char" do
|
29
|
+
expect( str.force_utf8! ).to eq expected
|
30
|
+
expect( str ).to eq expected
|
31
|
+
end
|
32
|
+
|
33
|
+
it "produces a valid UTF-8 string" do
|
34
|
+
str.force_utf8!
|
35
|
+
expect( str.valid_encoding? ).to be true
|
36
|
+
expect( str.encoding ).to eql Encoding::UTF_8
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
it "collapses multiple consecutive bad bytes into one replacement" do
|
42
|
+
str = "abc\u3042\xE3\x80"
|
43
|
+
expect( str.force_utf8! ).to eq "abc\u3042�"
|
44
|
+
expect( str ).to eq "abc\u3042�"
|
45
|
+
expect( str.valid_encoding? ).to be true
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
# force_utf8 just duplicates the string and calls force_utf8!, so there's no
|
52
|
+
# need to test it thoroughly.
|
53
|
+
describe '#force_utf8' do
|
54
|
+
|
55
|
+
it "replaces bad bytes with the replacement char" do
|
56
|
+
expect( "Al\xE9ons-y\x80".force_utf8 ).to eq "Al�ons-y�"
|
57
|
+
end
|
58
|
+
|
59
|
+
it "returns a copy of the string and keeps original unaffected" do
|
60
|
+
str = '\xE3llons-y!'.encode(Encoding::ISO_8859_1)
|
61
|
+
copy = str.dup
|
62
|
+
str.force_utf8
|
63
|
+
|
64
|
+
expect( str ).to eql copy
|
65
|
+
expect( str.encoding ).to eql Encoding::ISO_8859_1
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: corefines
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jakub Jirutka
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: asciidoctor
|
@@ -144,6 +144,7 @@ files:
|
|
144
144
|
- spec/string/color_spec.rb
|
145
145
|
- spec/string/concat_spec.rb
|
146
146
|
- spec/string/decolor_spec.rb
|
147
|
+
- spec/string/force_utf8_spec.rb
|
147
148
|
- spec/string/indent_spec.rb
|
148
149
|
- spec/string/relative_path_from_spec.rb
|
149
150
|
- spec/string/remove_spec.rb
|