string_cleaner 0.2.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.rdoc +3 -15
- data/spec/string_cleaner_spec.rb +41 -42
- data/string_cleaner.gemspec +5 -18
- metadata +35 -29
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8451c9bc58c373d7b4a768cb3ed1ff355e5116f24b02c6a5bfeb69aa0549c589
|
4
|
+
data.tar.gz: b226d948238bd18cdacb01402c0576203da724b895c6745d3b713fae5d21f9ee
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3a83693387d088db6f0b89ea73ecffbdf0b034885a0b2418d222c1197c48794b05cc8fb28f477bcfc4d345ee561d0ea3e1a543c34309ddb77ecc79a6adffe4fb
|
7
|
+
data.tar.gz: 3a35bab3513612149b345bf96d536d741ed3d4dfa757a353d3bc955782173139e8dac4c92b0bef025534706ca13e0ba17b14a30da0b10029d2653eeded5224f2
|
data/README.rdoc
CHANGED
@@ -6,23 +6,11 @@ Just add a method .clean to String which does:
|
|
6
6
|
* replace \r\n and \r with \n normalizing end of lines
|
7
7
|
* replace control characters and other invisible chars by spaces
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
sudo gem install JosephHalter-string_cleaner
|
12
|
-
|
13
|
-
== Ruby 1.9+
|
9
|
+
Supports only Ruby 2.6+
|
14
10
|
|
15
|
-
|
16
|
-
|
17
|
-
== Ruby 1.8.x
|
18
|
-
|
19
|
-
Because Ruby 1.8.x has no native support for Unicode, you must install oniguruma and the jasherai-oniguruma gem.
|
11
|
+
== Install
|
20
12
|
|
21
|
-
|
22
|
-
|
23
|
-
brew install oniguruma
|
24
|
-
bundle config build.jasherai-oniguruma --with-onig-dir=`brew --prefix oniguruma`
|
25
|
-
bundle install
|
13
|
+
sudo gem install string_cleaner
|
26
14
|
|
27
15
|
== Example usage
|
28
16
|
|
data/spec/string_cleaner_spec.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
|
-
|
2
|
-
require File.dirname(__FILE__) + "/spec_helper"
|
1
|
+
require "spec_helper"
|
3
2
|
|
4
|
-
describe String::Cleaner do
|
3
|
+
RSpec.describe String::Cleaner do
|
5
4
|
describe "#clean" do
|
6
5
|
describe "with all 8-bit characters" do
|
7
6
|
before :all do
|
@@ -13,12 +12,12 @@ describe String::Cleaner do
|
|
13
12
|
end
|
14
13
|
if RUBY_VERSION.to_f>1.9
|
15
14
|
it "should output a valid UTF-8 string" do
|
16
|
-
@output.encoding.name.
|
17
|
-
@output.
|
15
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
16
|
+
expect(@output).to be_valid_encoding
|
18
17
|
end
|
19
18
|
end
|
20
19
|
it "should wipe out the control characters" do
|
21
|
-
@output.
|
20
|
+
expect(@output).to eq " \n \n !\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ € ¡¢£€¥¦§¨©ª«¬ ®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
|
22
21
|
end
|
23
22
|
end
|
24
23
|
describe "with various type of spaces" do
|
@@ -41,12 +40,12 @@ describe String::Cleaner do
|
|
41
40
|
end
|
42
41
|
if RUBY_VERSION.to_f>1.9
|
43
42
|
it "should output a valid UTF-8 string" do
|
44
|
-
@output.encoding.name.
|
45
|
-
@output.
|
43
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
44
|
+
expect(@output).to be_valid_encoding
|
46
45
|
end
|
47
46
|
end
|
48
47
|
it "should replace all spaces to normal spaces" do
|
49
|
-
@output.clean.
|
48
|
+
expect(@output.clean).to eq " \n \n "
|
50
49
|
end
|
51
50
|
end
|
52
51
|
describe "with various no-width characters" do
|
@@ -62,12 +61,12 @@ describe String::Cleaner do
|
|
62
61
|
end
|
63
62
|
if RUBY_VERSION.to_f>1.9
|
64
63
|
it "should output a valid UTF-8 string" do
|
65
|
-
@output.encoding.name.
|
66
|
-
@output.
|
64
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
65
|
+
expect(@output).to be_valid_encoding
|
67
66
|
end
|
68
67
|
end
|
69
68
|
it "should remove no-width characters" do
|
70
|
-
@output.
|
69
|
+
expect(@output).to eq ""
|
71
70
|
end
|
72
71
|
end
|
73
72
|
describe "with invalid UTF-8 sequence" do
|
@@ -77,12 +76,12 @@ describe String::Cleaner do
|
|
77
76
|
end
|
78
77
|
if RUBY_VERSION.to_f>1.9
|
79
78
|
it "should output a valid UTF-8 string" do
|
80
|
-
@output.encoding.name.
|
81
|
-
@output.
|
79
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
80
|
+
expect(@output).to be_valid_encoding
|
82
81
|
end
|
83
82
|
end
|
84
83
|
it "should replace invisible chars by space" do
|
85
|
-
@output.
|
84
|
+
expect(@output).to eq " "
|
86
85
|
end
|
87
86
|
end
|
88
87
|
describe "with mixed valid and invalid characters" do
|
@@ -92,12 +91,12 @@ describe String::Cleaner do
|
|
92
91
|
end
|
93
92
|
if RUBY_VERSION.to_f>1.9
|
94
93
|
it "should output a valid UTF-8 string" do
|
95
|
-
@output.encoding.name.
|
96
|
-
@output.
|
94
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
95
|
+
expect(@output).to be_valid_encoding
|
97
96
|
end
|
98
97
|
end
|
99
98
|
it "should keep the valid characters" do
|
100
|
-
@output.
|
99
|
+
expect(@output).to eq "a?^?Ýf"
|
101
100
|
end
|
102
101
|
end
|
103
102
|
describe "with already valid characters" do
|
@@ -107,12 +106,12 @@ describe String::Cleaner do
|
|
107
106
|
end
|
108
107
|
if RUBY_VERSION.to_f>1.9
|
109
108
|
it "should output a valid UTF-8 string" do
|
110
|
-
@output.encoding.name.
|
111
|
-
@output.
|
109
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
110
|
+
expect(@output).to be_valid_encoding
|
112
111
|
end
|
113
112
|
end
|
114
113
|
it "should replace invisible chars by space" do
|
115
|
-
@output.
|
114
|
+
expect(@output).to eq "\n \n\n \n"
|
116
115
|
end
|
117
116
|
end
|
118
117
|
describe "with watermarked text" do
|
@@ -122,12 +121,12 @@ describe String::Cleaner do
|
|
122
121
|
end
|
123
122
|
if RUBY_VERSION.to_f>1.9
|
124
123
|
it "should output a valid UTF-8 string" do
|
125
|
-
@output.encoding.name.
|
126
|
-
@output.
|
124
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
125
|
+
expect(@output).to be_valid_encoding
|
127
126
|
end
|
128
127
|
end
|
129
128
|
it "should replace invisible chars by space" do
|
130
|
-
@output.
|
129
|
+
expect(@output).to eq "Here is a block of text inside of which a number will be hidden!"
|
131
130
|
end
|
132
131
|
end
|
133
132
|
describe "with euro sign from both ISO 8859-15 or Windows-1252" do
|
@@ -137,63 +136,63 @@ describe String::Cleaner do
|
|
137
136
|
end
|
138
137
|
if RUBY_VERSION.to_f>1.9
|
139
138
|
it "should output a valid UTF-8 string" do
|
140
|
-
@output.encoding.name.
|
141
|
-
@output.
|
139
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
140
|
+
expect(@output).to be_valid_encoding
|
142
141
|
end
|
143
142
|
end
|
144
143
|
it "should replace invisible chars by space" do
|
145
|
-
@output.
|
144
|
+
expect(@output).to eq "€€"
|
146
145
|
end
|
147
146
|
end
|
148
147
|
end
|
149
148
|
describe "#trim(chars = \"\")" do
|
150
149
|
it "should use #strip when used without params" do
|
151
|
-
string, expected = "",
|
152
|
-
string.
|
153
|
-
string.trim.
|
150
|
+
string, expected = "", double
|
151
|
+
expect(string).to receive(:strip).and_return expected
|
152
|
+
expect(string.trim).to be expected
|
154
153
|
end
|
155
154
|
it "should remove multiple characters at once from beginning and end" do
|
156
155
|
prefix, suffix = " rhuif dww f f", "dqz qafdédsj iowe fcms. qpo asttt t dtt"
|
157
156
|
to_remove = "acdeéfhijmopqrstuwz "
|
158
|
-
"#{prefix}d#{suffix}".trim(to_remove).
|
159
|
-
"#{prefix}D#{suffix}".trim(to_remove).
|
157
|
+
expect("#{prefix}d#{suffix}".trim(to_remove)).to eq "."
|
158
|
+
expect("#{prefix}D#{suffix}".trim(to_remove)).to eq "Ddqz qafdédsj iowe fcms."
|
160
159
|
end
|
161
160
|
end
|
162
161
|
describe "#fix_endlines" do
|
163
162
|
it "should convert windows endlines" do
|
164
|
-
"this is a\r\ntest\r\n".fix_endlines.
|
163
|
+
expect("this is a\r\ntest\r\n".fix_endlines).to eql "this is a\ntest\n"
|
165
164
|
end
|
166
165
|
it "should convert old mac endlines" do
|
167
|
-
"this is a\rtest\r".fix_endlines.
|
166
|
+
expect("this is a\rtest\r".fix_endlines).to eql "this is a\ntest\n"
|
168
167
|
end
|
169
168
|
it "should not modify proper linux endlines" do
|
170
|
-
"this is a\ntest\n".fix_endlines.
|
169
|
+
expect("this is a\ntest\n".fix_endlines).to eql "this is a\ntest\n"
|
171
170
|
end
|
172
171
|
it "should convert mixed endlines" do
|
173
|
-
"this is a\n\rtest\r\n".fix_endlines.
|
172
|
+
expect("this is a\n\rtest\r\n".fix_endlines).to eql "this is a\n\ntest\n"
|
174
173
|
end
|
175
174
|
end
|
176
175
|
describe "#to_permalink(separator=\"-\")" do
|
177
176
|
it "should create nice permalink for string with many accents" do
|
178
177
|
crazy = " ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüý - Hello world, I'm a crazy string!! "
|
179
|
-
crazy.to_permalink.
|
178
|
+
expect(crazy.to_permalink).to eq "aaaaaaceeeeiiiidnoooooxouuuuyaaaaaaceeeeiiiinoooooouuuuy-hello-world-i-m-a-crazy-string"
|
180
179
|
end
|
181
180
|
it "should create nice permalink even for evil string" do
|
182
181
|
evil = (128..255).inject(""){ |acc, b| acc += ("%c" % b) }
|
183
|
-
evil.to_permalink.
|
182
|
+
expect(evil.to_permalink).to eq "euros-cents-pounds-euros-yens-section-copyright-registered-trademark-degrees-approx-23-micro-paragraph-10-1-4-1-2-3-4-aaaaaaaeceeeeiiiidnoooooxouuuuythssaaaaaaaeceeeeiiiidnooooo-ouuuuythy"
|
184
183
|
end
|
185
184
|
it "should remove endlines too" do
|
186
|
-
"this\nis\ta\ntest".to_permalink("_").
|
185
|
+
expect("this\nis\ta\ntest".to_permalink("_")).to eq "this_is_a_test"
|
187
186
|
end
|
188
187
|
end
|
189
188
|
describe "#nl2br" do
|
190
189
|
it "should convert \n to <br/>\n" do
|
191
|
-
"this\nis\ta\ntest\r".nl2br.
|
190
|
+
expect("this\nis\ta\ntest\r".nl2br).to eq "this<br/>\nis\ta<br/>\ntest\r"
|
192
191
|
end
|
193
192
|
end
|
194
193
|
describe "#to_nicer_sym" do
|
195
194
|
it "should convert \"Select or Other\" to :select_or_other" do
|
196
|
-
"Select or Other".to_nicer_sym.
|
195
|
+
expect("Select or Other".to_nicer_sym).to be :select_or_other
|
197
196
|
end
|
198
197
|
end
|
199
|
-
end
|
198
|
+
end
|
data/string_cleaner.gemspec
CHANGED
@@ -2,12 +2,12 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{string_cleaner}
|
5
|
-
s.version = "0.
|
6
|
-
|
7
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
5
|
+
s.version = "1.0.0"
|
8
6
|
s.authors = ["Joseph Halter"]
|
9
7
|
s.date = %q{2010-10-18}
|
10
8
|
s.email = %q{joseph@openhood.com}
|
9
|
+
s.required_ruby_version = ">= 2.6"
|
10
|
+
s.license = "MIT"
|
11
11
|
s.extra_rdoc_files = [
|
12
12
|
"LICENSE",
|
13
13
|
"README.rdoc"
|
@@ -22,27 +22,14 @@ Gem::Specification.new do |s|
|
|
22
22
|
"spec/string_cleaner_spec.rb",
|
23
23
|
"string_cleaner.gemspec"
|
24
24
|
]
|
25
|
-
s.has_rdoc = true
|
26
25
|
s.homepage = %q{http://github.com/JosephHalter/string_cleaner}
|
27
|
-
s.rdoc_options = ["--charset=UTF-8"]
|
28
26
|
s.require_paths = ["lib"]
|
29
|
-
s.
|
30
|
-
s.summary = %q{Fix invalid UTF-8 and wipe invisible chars, fully compatible with Ruby 1.8 & 1.9 with extensive specs}
|
27
|
+
s.summary = %q{Fix invalid UTF-8 and wipe invisible chars, compatible with Ruby 2.6+ with extensive specs}
|
31
28
|
s.test_files = [
|
32
29
|
"spec/spec_helper.rb",
|
33
30
|
"spec/string_cleaner_spec.rb"
|
34
31
|
]
|
35
|
-
s.add_runtime_dependency "talentbox-unidecoder", "
|
32
|
+
s.add_runtime_dependency "talentbox-unidecoder", "2.0.0"
|
36
33
|
s.add_development_dependency "rake"
|
37
34
|
s.add_development_dependency "rspec"
|
38
|
-
|
39
|
-
if s.respond_to? :specification_version then
|
40
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
41
|
-
s.specification_version = 2
|
42
|
-
|
43
|
-
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
44
|
-
else
|
45
|
-
end
|
46
|
-
else
|
47
|
-
end
|
48
35
|
end
|
metadata
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_cleaner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 1.0.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Joseph Halter
|
@@ -13,37 +12,46 @@ date: 2010-10-18 00:00:00.000000000 Z
|
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: talentbox-unidecoder
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
|
-
- - =
|
17
|
+
- - '='
|
20
18
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
19
|
+
version: 2.0.0
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.0.0
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: rake
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
|
-
- -
|
31
|
+
- - ">="
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '0'
|
33
34
|
type: :development
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
42
|
name: rspec
|
38
|
-
requirement:
|
39
|
-
none: false
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
|
-
- -
|
45
|
+
- - ">="
|
42
46
|
- !ruby/object:Gem::Version
|
43
47
|
version: '0'
|
44
48
|
type: :development
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
47
55
|
description:
|
48
56
|
email: joseph@openhood.com
|
49
57
|
executables: []
|
@@ -52,7 +60,7 @@ extra_rdoc_files:
|
|
52
60
|
- LICENSE
|
53
61
|
- README.rdoc
|
54
62
|
files:
|
55
|
-
- .gitignore
|
63
|
+
- ".gitignore"
|
56
64
|
- LICENSE
|
57
65
|
- README.rdoc
|
58
66
|
- Rakefile
|
@@ -61,31 +69,29 @@ files:
|
|
61
69
|
- spec/string_cleaner_spec.rb
|
62
70
|
- string_cleaner.gemspec
|
63
71
|
homepage: http://github.com/JosephHalter/string_cleaner
|
64
|
-
licenses:
|
72
|
+
licenses:
|
73
|
+
- MIT
|
74
|
+
metadata: {}
|
65
75
|
post_install_message:
|
66
|
-
rdoc_options:
|
67
|
-
- --charset=UTF-8
|
76
|
+
rdoc_options: []
|
68
77
|
require_paths:
|
69
78
|
- lib
|
70
79
|
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
-
none: false
|
72
80
|
requirements:
|
73
|
-
- -
|
81
|
+
- - ">="
|
74
82
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
83
|
+
version: '2.6'
|
76
84
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
-
none: false
|
78
85
|
requirements:
|
79
|
-
- -
|
86
|
+
- - ">="
|
80
87
|
- !ruby/object:Gem::Version
|
81
88
|
version: '0'
|
82
89
|
requirements: []
|
83
|
-
|
84
|
-
rubygems_version: 1.8.10
|
90
|
+
rubygems_version: 3.1.6
|
85
91
|
signing_key:
|
86
|
-
specification_version:
|
87
|
-
summary: Fix invalid UTF-8 and wipe invisible chars,
|
88
|
-
|
92
|
+
specification_version: 4
|
93
|
+
summary: Fix invalid UTF-8 and wipe invisible chars, compatible with Ruby 2.6+ with
|
94
|
+
extensive specs
|
89
95
|
test_files:
|
90
96
|
- spec/spec_helper.rb
|
91
97
|
- spec/string_cleaner_spec.rb
|