string_cleaner 0.2.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.rdoc +3 -15
- data/spec/string_cleaner_spec.rb +41 -42
- data/string_cleaner.gemspec +5 -18
- metadata +35 -29
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8451c9bc58c373d7b4a768cb3ed1ff355e5116f24b02c6a5bfeb69aa0549c589
|
4
|
+
data.tar.gz: b226d948238bd18cdacb01402c0576203da724b895c6745d3b713fae5d21f9ee
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3a83693387d088db6f0b89ea73ecffbdf0b034885a0b2418d222c1197c48794b05cc8fb28f477bcfc4d345ee561d0ea3e1a543c34309ddb77ecc79a6adffe4fb
|
7
|
+
data.tar.gz: 3a35bab3513612149b345bf96d536d741ed3d4dfa757a353d3bc955782173139e8dac4c92b0bef025534706ca13e0ba17b14a30da0b10029d2653eeded5224f2
|
data/README.rdoc
CHANGED
@@ -6,23 +6,11 @@ Just add a method .clean to String which does:
|
|
6
6
|
* replace \r\n and \r with \n normalizing end of lines
|
7
7
|
* replace control characters and other invisible chars by spaces
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
sudo gem install JosephHalter-string_cleaner
|
12
|
-
|
13
|
-
== Ruby 1.9+
|
9
|
+
Supports only Ruby 2.6+
|
14
10
|
|
15
|
-
|
16
|
-
|
17
|
-
== Ruby 1.8.x
|
18
|
-
|
19
|
-
Because Ruby 1.8.x has no native support for Unicode, you must install oniguruma and the jasherai-oniguruma gem.
|
11
|
+
== Install
|
20
12
|
|
21
|
-
|
22
|
-
|
23
|
-
brew install oniguruma
|
24
|
-
bundle config build.jasherai-oniguruma --with-onig-dir=`brew --prefix oniguruma`
|
25
|
-
bundle install
|
13
|
+
sudo gem install string_cleaner
|
26
14
|
|
27
15
|
== Example usage
|
28
16
|
|
data/spec/string_cleaner_spec.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
|
-
|
2
|
-
require File.dirname(__FILE__) + "/spec_helper"
|
1
|
+
require "spec_helper"
|
3
2
|
|
4
|
-
describe String::Cleaner do
|
3
|
+
RSpec.describe String::Cleaner do
|
5
4
|
describe "#clean" do
|
6
5
|
describe "with all 8-bit characters" do
|
7
6
|
before :all do
|
@@ -13,12 +12,12 @@ describe String::Cleaner do
|
|
13
12
|
end
|
14
13
|
if RUBY_VERSION.to_f>1.9
|
15
14
|
it "should output a valid UTF-8 string" do
|
16
|
-
@output.encoding.name.
|
17
|
-
@output.
|
15
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
16
|
+
expect(@output).to be_valid_encoding
|
18
17
|
end
|
19
18
|
end
|
20
19
|
it "should wipe out the control characters" do
|
21
|
-
@output.
|
20
|
+
expect(@output).to eq " \n \n !\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ € ¡¢£€¥¦§¨©ª«¬ ®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
|
22
21
|
end
|
23
22
|
end
|
24
23
|
describe "with various type of spaces" do
|
@@ -41,12 +40,12 @@ describe String::Cleaner do
|
|
41
40
|
end
|
42
41
|
if RUBY_VERSION.to_f>1.9
|
43
42
|
it "should output a valid UTF-8 string" do
|
44
|
-
@output.encoding.name.
|
45
|
-
@output.
|
43
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
44
|
+
expect(@output).to be_valid_encoding
|
46
45
|
end
|
47
46
|
end
|
48
47
|
it "should replace all spaces to normal spaces" do
|
49
|
-
@output.clean.
|
48
|
+
expect(@output.clean).to eq " \n \n "
|
50
49
|
end
|
51
50
|
end
|
52
51
|
describe "with various no-width characters" do
|
@@ -62,12 +61,12 @@ describe String::Cleaner do
|
|
62
61
|
end
|
63
62
|
if RUBY_VERSION.to_f>1.9
|
64
63
|
it "should output a valid UTF-8 string" do
|
65
|
-
@output.encoding.name.
|
66
|
-
@output.
|
64
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
65
|
+
expect(@output).to be_valid_encoding
|
67
66
|
end
|
68
67
|
end
|
69
68
|
it "should remove no-width characters" do
|
70
|
-
@output.
|
69
|
+
expect(@output).to eq ""
|
71
70
|
end
|
72
71
|
end
|
73
72
|
describe "with invalid UTF-8 sequence" do
|
@@ -77,12 +76,12 @@ describe String::Cleaner do
|
|
77
76
|
end
|
78
77
|
if RUBY_VERSION.to_f>1.9
|
79
78
|
it "should output a valid UTF-8 string" do
|
80
|
-
@output.encoding.name.
|
81
|
-
@output.
|
79
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
80
|
+
expect(@output).to be_valid_encoding
|
82
81
|
end
|
83
82
|
end
|
84
83
|
it "should replace invisible chars by space" do
|
85
|
-
@output.
|
84
|
+
expect(@output).to eq " "
|
86
85
|
end
|
87
86
|
end
|
88
87
|
describe "with mixed valid and invalid characters" do
|
@@ -92,12 +91,12 @@ describe String::Cleaner do
|
|
92
91
|
end
|
93
92
|
if RUBY_VERSION.to_f>1.9
|
94
93
|
it "should output a valid UTF-8 string" do
|
95
|
-
@output.encoding.name.
|
96
|
-
@output.
|
94
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
95
|
+
expect(@output).to be_valid_encoding
|
97
96
|
end
|
98
97
|
end
|
99
98
|
it "should keep the valid characters" do
|
100
|
-
@output.
|
99
|
+
expect(@output).to eq "a?^?Ýf"
|
101
100
|
end
|
102
101
|
end
|
103
102
|
describe "with already valid characters" do
|
@@ -107,12 +106,12 @@ describe String::Cleaner do
|
|
107
106
|
end
|
108
107
|
if RUBY_VERSION.to_f>1.9
|
109
108
|
it "should output a valid UTF-8 string" do
|
110
|
-
@output.encoding.name.
|
111
|
-
@output.
|
109
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
110
|
+
expect(@output).to be_valid_encoding
|
112
111
|
end
|
113
112
|
end
|
114
113
|
it "should replace invisible chars by space" do
|
115
|
-
@output.
|
114
|
+
expect(@output).to eq "\n \n\n \n"
|
116
115
|
end
|
117
116
|
end
|
118
117
|
describe "with watermarked text" do
|
@@ -122,12 +121,12 @@ describe String::Cleaner do
|
|
122
121
|
end
|
123
122
|
if RUBY_VERSION.to_f>1.9
|
124
123
|
it "should output a valid UTF-8 string" do
|
125
|
-
@output.encoding.name.
|
126
|
-
@output.
|
124
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
125
|
+
expect(@output).to be_valid_encoding
|
127
126
|
end
|
128
127
|
end
|
129
128
|
it "should replace invisible chars by space" do
|
130
|
-
@output.
|
129
|
+
expect(@output).to eq "Here is a block of text inside of which a number will be hidden!"
|
131
130
|
end
|
132
131
|
end
|
133
132
|
describe "with euro sign from both ISO 8859-15 or Windows-1252" do
|
@@ -137,63 +136,63 @@ describe String::Cleaner do
|
|
137
136
|
end
|
138
137
|
if RUBY_VERSION.to_f>1.9
|
139
138
|
it "should output a valid UTF-8 string" do
|
140
|
-
@output.encoding.name.
|
141
|
-
@output.
|
139
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
140
|
+
expect(@output).to be_valid_encoding
|
142
141
|
end
|
143
142
|
end
|
144
143
|
it "should replace invisible chars by space" do
|
145
|
-
@output.
|
144
|
+
expect(@output).to eq "€€"
|
146
145
|
end
|
147
146
|
end
|
148
147
|
end
|
149
148
|
describe "#trim(chars = \"\")" do
|
150
149
|
it "should use #strip when used without params" do
|
151
|
-
string, expected = "",
|
152
|
-
string.
|
153
|
-
string.trim.
|
150
|
+
string, expected = "", double
|
151
|
+
expect(string).to receive(:strip).and_return expected
|
152
|
+
expect(string.trim).to be expected
|
154
153
|
end
|
155
154
|
it "should remove multiple characters at once from beginning and end" do
|
156
155
|
prefix, suffix = " rhuif dww f f", "dqz qafdédsj iowe fcms. qpo asttt t dtt"
|
157
156
|
to_remove = "acdeéfhijmopqrstuwz "
|
158
|
-
"#{prefix}d#{suffix}".trim(to_remove).
|
159
|
-
"#{prefix}D#{suffix}".trim(to_remove).
|
157
|
+
expect("#{prefix}d#{suffix}".trim(to_remove)).to eq "."
|
158
|
+
expect("#{prefix}D#{suffix}".trim(to_remove)).to eq "Ddqz qafdédsj iowe fcms."
|
160
159
|
end
|
161
160
|
end
|
162
161
|
describe "#fix_endlines" do
|
163
162
|
it "should convert windows endlines" do
|
164
|
-
"this is a\r\ntest\r\n".fix_endlines.
|
163
|
+
expect("this is a\r\ntest\r\n".fix_endlines).to eql "this is a\ntest\n"
|
165
164
|
end
|
166
165
|
it "should convert old mac endlines" do
|
167
|
-
"this is a\rtest\r".fix_endlines.
|
166
|
+
expect("this is a\rtest\r".fix_endlines).to eql "this is a\ntest\n"
|
168
167
|
end
|
169
168
|
it "should not modify proper linux endlines" do
|
170
|
-
"this is a\ntest\n".fix_endlines.
|
169
|
+
expect("this is a\ntest\n".fix_endlines).to eql "this is a\ntest\n"
|
171
170
|
end
|
172
171
|
it "should convert mixed endlines" do
|
173
|
-
"this is a\n\rtest\r\n".fix_endlines.
|
172
|
+
expect("this is a\n\rtest\r\n".fix_endlines).to eql "this is a\n\ntest\n"
|
174
173
|
end
|
175
174
|
end
|
176
175
|
describe "#to_permalink(separator=\"-\")" do
|
177
176
|
it "should create nice permalink for string with many accents" do
|
178
177
|
crazy = " ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüý - Hello world, I'm a crazy string!! "
|
179
|
-
crazy.to_permalink.
|
178
|
+
expect(crazy.to_permalink).to eq "aaaaaaceeeeiiiidnoooooxouuuuyaaaaaaceeeeiiiinoooooouuuuy-hello-world-i-m-a-crazy-string"
|
180
179
|
end
|
181
180
|
it "should create nice permalink even for evil string" do
|
182
181
|
evil = (128..255).inject(""){ |acc, b| acc += ("%c" % b) }
|
183
|
-
evil.to_permalink.
|
182
|
+
expect(evil.to_permalink).to eq "euros-cents-pounds-euros-yens-section-copyright-registered-trademark-degrees-approx-23-micro-paragraph-10-1-4-1-2-3-4-aaaaaaaeceeeeiiiidnoooooxouuuuythssaaaaaaaeceeeeiiiidnooooo-ouuuuythy"
|
184
183
|
end
|
185
184
|
it "should remove endlines too" do
|
186
|
-
"this\nis\ta\ntest".to_permalink("_").
|
185
|
+
expect("this\nis\ta\ntest".to_permalink("_")).to eq "this_is_a_test"
|
187
186
|
end
|
188
187
|
end
|
189
188
|
describe "#nl2br" do
|
190
189
|
it "should convert \n to <br/>\n" do
|
191
|
-
"this\nis\ta\ntest\r".nl2br.
|
190
|
+
expect("this\nis\ta\ntest\r".nl2br).to eq "this<br/>\nis\ta<br/>\ntest\r"
|
192
191
|
end
|
193
192
|
end
|
194
193
|
describe "#to_nicer_sym" do
|
195
194
|
it "should convert \"Select or Other\" to :select_or_other" do
|
196
|
-
"Select or Other".to_nicer_sym.
|
195
|
+
expect("Select or Other".to_nicer_sym).to be :select_or_other
|
197
196
|
end
|
198
197
|
end
|
199
|
-
end
|
198
|
+
end
|
data/string_cleaner.gemspec
CHANGED
@@ -2,12 +2,12 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{string_cleaner}
|
5
|
-
s.version = "0.
|
6
|
-
|
7
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
5
|
+
s.version = "1.0.0"
|
8
6
|
s.authors = ["Joseph Halter"]
|
9
7
|
s.date = %q{2010-10-18}
|
10
8
|
s.email = %q{joseph@openhood.com}
|
9
|
+
s.required_ruby_version = ">= 2.6"
|
10
|
+
s.license = "MIT"
|
11
11
|
s.extra_rdoc_files = [
|
12
12
|
"LICENSE",
|
13
13
|
"README.rdoc"
|
@@ -22,27 +22,14 @@ Gem::Specification.new do |s|
|
|
22
22
|
"spec/string_cleaner_spec.rb",
|
23
23
|
"string_cleaner.gemspec"
|
24
24
|
]
|
25
|
-
s.has_rdoc = true
|
26
25
|
s.homepage = %q{http://github.com/JosephHalter/string_cleaner}
|
27
|
-
s.rdoc_options = ["--charset=UTF-8"]
|
28
26
|
s.require_paths = ["lib"]
|
29
|
-
s.
|
30
|
-
s.summary = %q{Fix invalid UTF-8 and wipe invisible chars, fully compatible with Ruby 1.8 & 1.9 with extensive specs}
|
27
|
+
s.summary = %q{Fix invalid UTF-8 and wipe invisible chars, compatible with Ruby 2.6+ with extensive specs}
|
31
28
|
s.test_files = [
|
32
29
|
"spec/spec_helper.rb",
|
33
30
|
"spec/string_cleaner_spec.rb"
|
34
31
|
]
|
35
|
-
s.add_runtime_dependency "talentbox-unidecoder", "
|
32
|
+
s.add_runtime_dependency "talentbox-unidecoder", "2.0.0"
|
36
33
|
s.add_development_dependency "rake"
|
37
34
|
s.add_development_dependency "rspec"
|
38
|
-
|
39
|
-
if s.respond_to? :specification_version then
|
40
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
41
|
-
s.specification_version = 2
|
42
|
-
|
43
|
-
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
44
|
-
else
|
45
|
-
end
|
46
|
-
else
|
47
|
-
end
|
48
35
|
end
|
metadata
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_cleaner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 1.0.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Joseph Halter
|
@@ -13,37 +12,46 @@ date: 2010-10-18 00:00:00.000000000 Z
|
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: talentbox-unidecoder
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- - =
|
17
|
+
- - '='
|
20
18
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
19
|
+
version: 2.0.0
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.0.0
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: rake
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- -
|
31
|
+
- - ">="
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '0'
|
33
34
|
type: :development
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
42
|
name: rspec
|
38
|
-
requirement:
|
39
|
-
none: false
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
|
-
- -
|
45
|
+
- - ">="
|
42
46
|
- !ruby/object:Gem::Version
|
43
47
|
version: '0'
|
44
48
|
type: :development
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
47
55
|
description:
|
48
56
|
email: joseph@openhood.com
|
49
57
|
executables: []
|
@@ -52,7 +60,7 @@ extra_rdoc_files:
|
|
52
60
|
- LICENSE
|
53
61
|
- README.rdoc
|
54
62
|
files:
|
55
|
-
- .gitignore
|
63
|
+
- ".gitignore"
|
56
64
|
- LICENSE
|
57
65
|
- README.rdoc
|
58
66
|
- Rakefile
|
@@ -61,31 +69,29 @@ files:
|
|
61
69
|
- spec/string_cleaner_spec.rb
|
62
70
|
- string_cleaner.gemspec
|
63
71
|
homepage: http://github.com/JosephHalter/string_cleaner
|
64
|
-
licenses:
|
72
|
+
licenses:
|
73
|
+
- MIT
|
74
|
+
metadata: {}
|
65
75
|
post_install_message:
|
66
|
-
rdoc_options:
|
67
|
-
- --charset=UTF-8
|
76
|
+
rdoc_options: []
|
68
77
|
require_paths:
|
69
78
|
- lib
|
70
79
|
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
-
none: false
|
72
80
|
requirements:
|
73
|
-
- -
|
81
|
+
- - ">="
|
74
82
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
83
|
+
version: '2.6'
|
76
84
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
-
none: false
|
78
85
|
requirements:
|
79
|
-
- -
|
86
|
+
- - ">="
|
80
87
|
- !ruby/object:Gem::Version
|
81
88
|
version: '0'
|
82
89
|
requirements: []
|
83
|
-
|
84
|
-
rubygems_version: 1.8.10
|
90
|
+
rubygems_version: 3.1.6
|
85
91
|
signing_key:
|
86
|
-
specification_version:
|
87
|
-
summary: Fix invalid UTF-8 and wipe invisible chars,
|
88
|
-
|
92
|
+
specification_version: 4
|
93
|
+
summary: Fix invalid UTF-8 and wipe invisible chars, compatible with Ruby 2.6+ with
|
94
|
+
extensive specs
|
89
95
|
test_files:
|
90
96
|
- spec/spec_helper.rb
|
91
97
|
- spec/string_cleaner_spec.rb
|