string_cleaner 0.2.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -1
- data/README.rdoc +4 -10
- data/Rakefile +8 -2
- data/lib/string_cleaner.rb +28 -20
- data/spec/string_cleaner_spec.rb +106 -179
- data/string_cleaner.gemspec +6 -18
- metadata +64 -65
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8451c9bc58c373d7b4a768cb3ed1ff355e5116f24b02c6a5bfeb69aa0549c589
|
4
|
+
data.tar.gz: b226d948238bd18cdacb01402c0576203da724b895c6745d3b713fae5d21f9ee
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3a83693387d088db6f0b89ea73ecffbdf0b034885a0b2418d222c1197c48794b05cc8fb28f477bcfc4d345ee561d0ea3e1a543c34309ddb77ecc79a6adffe4fb
|
7
|
+
data.tar.gz: 3a35bab3513612149b345bf96d536d741ed3d4dfa757a353d3bc955782173139e8dac4c92b0bef025534706ca13e0ba17b14a30da0b10029d2653eeded5224f2
|
data/.gitignore
CHANGED
data/README.rdoc
CHANGED
@@ -6,21 +6,15 @@ Just add a method .clean to String which does:
|
|
6
6
|
* replace \r\n and \r with \n normalizing end of lines
|
7
7
|
* replace control characters and other invisible chars by spaces
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
sudo gem install JosephHalter-string_cleaner
|
12
|
-
|
13
|
-
== Ruby 1.9+
|
9
|
+
Supports only Ruby 2.6+
|
14
10
|
|
15
|
-
|
16
|
-
|
17
|
-
== Ruby 1.8.x
|
11
|
+
== Install
|
18
12
|
|
19
|
-
|
13
|
+
sudo gem install string_cleaner
|
20
14
|
|
21
15
|
== Example usage
|
22
16
|
|
23
|
-
|
17
|
+
"\210\004".clean # => " "
|
24
18
|
|
25
19
|
== Copyright
|
26
20
|
|
data/Rakefile
CHANGED
@@ -1,4 +1,11 @@
|
|
1
|
-
|
1
|
+
if RUBY_VERSION.to_f<1.9
|
2
|
+
require 'rake/tasklib'
|
3
|
+
require 'rake/rdoctask'
|
4
|
+
require 'rubygems'
|
5
|
+
else
|
6
|
+
require 'rdoc/task'
|
7
|
+
end
|
8
|
+
|
2
9
|
begin
|
3
10
|
require 'bundler/setup'
|
4
11
|
rescue LoadError
|
@@ -17,7 +24,6 @@ end
|
|
17
24
|
|
18
25
|
task :default => :spec
|
19
26
|
|
20
|
-
require 'rake/rdoctask'
|
21
27
|
Rake::RDocTask.new do |rdoc|
|
22
28
|
rdoc.rdoc_dir = 'rdoc'
|
23
29
|
rdoc.title = "string_cleaner"
|
data/lib/string_cleaner.rb
CHANGED
@@ -9,14 +9,13 @@ module String::Cleaner
|
|
9
9
|
|
10
10
|
def fix_encoding
|
11
11
|
utf8 = dup
|
12
|
-
if utf8.respond_to?(:force_encoding)
|
12
|
+
if utf8.respond_to?(:force_encoding)
|
13
13
|
utf8.force_encoding("UTF-8") # for Ruby 1.9+
|
14
14
|
unless utf8.valid_encoding? # if invalid UTF-8
|
15
|
-
utf8 = utf8.force_encoding("ISO8859-
|
15
|
+
utf8 = utf8.force_encoding("ISO8859-1")
|
16
16
|
utf8.encode!("UTF-8", :invalid => :replace, :undef => :replace, :replace => "")
|
17
|
-
utf8.gsub!("\xC2\x80", "€") # special case for euro sign from Windows-1252
|
18
|
-
utf8.force_encoding("UTF-8")
|
19
17
|
end
|
18
|
+
utf8.gsub!(/\u0080|¤/, "€") # special case for euro sign from Windows-1252
|
20
19
|
utf8
|
21
20
|
else
|
22
21
|
require "iconv"
|
@@ -25,7 +24,7 @@ module String::Cleaner
|
|
25
24
|
Iconv.new("UTF-8", "UTF-8").iconv(utf8)
|
26
25
|
rescue
|
27
26
|
utf8.gsub!(/\x80/n, "\xA4")
|
28
|
-
Iconv.new("UTF-8//IGNORE", "ISO8859-
|
27
|
+
Iconv.new("UTF-8//IGNORE", "ISO8859-1").iconv(utf8).gsub("¤", "€")
|
29
28
|
end
|
30
29
|
end
|
31
30
|
end
|
@@ -35,31 +34,40 @@ module String::Cleaner
|
|
35
34
|
end
|
36
35
|
|
37
36
|
SPECIAL_SPACES = [
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
37
|
+
0x00A0, # NO-BREAK SPACE
|
38
|
+
0x1680, # OGHAM SPACE MARK
|
39
|
+
0x180E, # MONGOLIAN VOWEL SEPARATOR
|
40
|
+
(0x2000..0x200A).to_a, # EN QUAD..HAIR SPACE
|
41
|
+
0x2028, # LINE SEPARATOR
|
42
|
+
0x2029, # PARAGRAPH SEPARATOR
|
43
|
+
0x202F, # NARROW NO-BREAK SPACE
|
44
|
+
0x205F, # MEDIUM MATHEMATICAL SPACE
|
45
|
+
0x3000, # IDEOGRAPHIC SPACE
|
46
|
+
].flatten.collect{|e| [e].pack 'U*'}
|
47
|
+
|
48
|
+
ZERO_WIDTH = [
|
49
|
+
0x200B, # ZERO WIDTH SPACE
|
50
|
+
0x200C, # ZERO WIDTH NON-JOINER
|
51
|
+
0x200D, # ZERO WIDTH JOINER
|
52
|
+
0x2060, # WORD JOINER
|
53
|
+
0xFEFF, # ZERO WIDTH NO-BREAK SPACE
|
54
|
+
].flatten.collect{|e| [e].pack 'U*'}
|
48
55
|
|
49
56
|
def fix_invisible_chars
|
50
57
|
utf8 = self.dup
|
51
|
-
|
58
|
+
utf8.gsub!(Regexp.new(ZERO_WIDTH.join("|")), "")
|
59
|
+
utf8 = if utf8.respond_to?(:force_encoding)
|
52
60
|
utf8 = (utf8 << " ").split(/\n/u).each{|line|
|
53
61
|
line.gsub!(/[\s\p{C}]/u, " ")
|
54
62
|
}.join("\n").chop!
|
55
|
-
utf8.gsub!(Regexp.new(SPECIAL_SPACES.join("|")), " ")
|
56
|
-
utf8.force_encoding("UTF-8")
|
57
63
|
else
|
58
64
|
require "oniguruma"
|
59
65
|
utf8.split(/\n/n).collect{|line|
|
60
|
-
Oniguruma::ORegexp.new("[\\
|
66
|
+
Oniguruma::ORegexp.new("[\\p{C}]", {:encoding => Oniguruma::ENCODING_UTF8}).gsub(line, " ")
|
61
67
|
}.join("\n").chop!
|
62
68
|
end
|
69
|
+
utf8.gsub!(Regexp.new(SPECIAL_SPACES.join("|") + "|\s"), " ")
|
70
|
+
utf8
|
63
71
|
end
|
64
72
|
|
65
73
|
def trim(chars = "")
|
@@ -67,7 +75,7 @@ module String::Cleaner
|
|
67
75
|
end
|
68
76
|
|
69
77
|
def to_permalink(separator="-")
|
70
|
-
|
78
|
+
clean.to_ascii(chartable).downcase.gsub(/[^a-z0-9]+/, separator).trim(separator)
|
71
79
|
end
|
72
80
|
|
73
81
|
def nl2br
|
data/spec/string_cleaner_spec.rb
CHANGED
@@ -1,147 +1,72 @@
|
|
1
|
-
|
2
|
-
require File.dirname(__FILE__) + "/spec_helper"
|
1
|
+
require "spec_helper"
|
3
2
|
|
4
|
-
describe String::Cleaner do
|
5
|
-
|
6
|
-
# specs for Ruby 1.9+
|
3
|
+
RSpec.describe String::Cleaner do
|
4
|
+
describe "#clean" do
|
7
5
|
describe "with all 8-bit characters" do
|
8
6
|
before :all do
|
9
|
-
@input = ""
|
7
|
+
@input = ""
|
8
|
+
@input.force_encoding("ISO8859-15") if @input.respond_to?(:force_encoding)
|
10
9
|
(0..255).each{|i| @input << i.chr}
|
11
|
-
@input.force_encoding("UTF-8")
|
10
|
+
@input.force_encoding("UTF-8") if @input.respond_to?(:force_encoding)
|
12
11
|
@output = @input.clean
|
13
12
|
end
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
if RUBY_VERSION.to_f>1.9
|
14
|
+
it "should output a valid UTF-8 string" do
|
15
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
16
|
+
expect(@output).to be_valid_encoding
|
17
|
+
end
|
17
18
|
end
|
18
19
|
it "should wipe out the control characters" do
|
19
|
-
@output.
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
end
|
39
|
-
describe "with invalid UTF-8 sequence" do
|
40
|
-
before :all do
|
41
|
-
@input = "\210\004"
|
42
|
-
@output = @input.clean
|
43
|
-
end
|
44
|
-
it "should output a valid UTF-8 string" do
|
45
|
-
@output.encoding.name.should == "UTF-8"
|
46
|
-
@output.should be_valid_encoding
|
47
|
-
end
|
48
|
-
it "should replace invisible chars by space" do
|
49
|
-
@output.should == " "
|
50
|
-
end
|
51
|
-
end
|
52
|
-
describe "with mixed valid and invalid characters" do
|
53
|
-
before :all do
|
54
|
-
@input = "a?^?\xddf"
|
55
|
-
@output = @input.clean
|
56
|
-
end
|
57
|
-
it "should output a valid UTF-8 string" do
|
58
|
-
@output.encoding.name.should == "UTF-8"
|
59
|
-
@output.should be_valid_encoding
|
60
|
-
end
|
61
|
-
it "should keep the valid characters" do
|
62
|
-
@output.should == "a?^?Ýf"
|
63
|
-
end
|
64
|
-
end
|
65
|
-
describe "with already valid characters" do
|
66
|
-
before :all do
|
67
|
-
@input = "\n\t\r\r\n\v\n"
|
68
|
-
@output = @input.clean
|
69
|
-
end
|
70
|
-
it "should output a valid UTF-8 string" do
|
71
|
-
@output.encoding.name.should == "UTF-8"
|
72
|
-
@output.should be_valid_encoding
|
73
|
-
end
|
74
|
-
it "should replace invisible chars by space" do
|
75
|
-
@output.should == "\n \n\n \n"
|
76
|
-
end
|
77
|
-
end
|
78
|
-
describe "with watermarked text" do
|
79
|
-
before :all do
|
80
|
-
@input = "Here is \357\273\277a block \357\273\277of text \357\273\277inside of which a number will be hidden!"
|
81
|
-
@output = @input.clean
|
82
|
-
end
|
83
|
-
it "should output a valid UTF-8 string" do
|
84
|
-
@output.encoding.name.should == "UTF-8"
|
85
|
-
@output.should be_valid_encoding
|
86
|
-
end
|
87
|
-
it "should replace invisible chars by space" do
|
88
|
-
@output.should == "Here is a block of text inside of which a number will be hidden!"
|
89
|
-
end
|
90
|
-
end
|
91
|
-
describe "with unusual valid spaces" do
|
92
|
-
before :all do
|
93
|
-
@input = []
|
94
|
-
@input << "\u0020" # SPACE
|
95
|
-
@input << "\u00A0" # NO-BREAK SPACE
|
96
|
-
@input << "\u2000" # EN QUAD
|
97
|
-
@input << "\u2001" # EM QUAD
|
98
|
-
@input << "\u2002" # EN SPACE
|
99
|
-
@input << "\u2003" # EM SPACE
|
100
|
-
@input << "\u2004" # THREE-PER-EM SPACE
|
101
|
-
@input << "\u2005" # FOUR-PER-EM SPACE
|
102
|
-
@input << "\u2006" # SIX-PER-EM SPACE
|
103
|
-
@input << "\u2007" # FIGURE SPACE
|
104
|
-
@input << "\u2008" # PUNCTUATION SPACE
|
105
|
-
@input << "\u2009" # THIN SPACE
|
106
|
-
@input << "\u200A" # HAIR SPACE
|
107
|
-
@input << "\u200B" # ZERO WIDTH SPACE
|
108
|
-
@input << "\u202F" # NARROW NO-BREAK SPACE
|
109
|
-
@input << "\u205F" # MEDIUM MATHEMATICAL SPACE
|
110
|
-
@input << "\u3000" # IDEOGRAPHIC SPACE
|
111
|
-
@input << "\uFEFF" # ZERO WIDTH NO-BREAK SPACE
|
20
|
+
expect(@output).to eq " \n \n !\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ € ¡¢£€¥¦§¨©ª«¬ ®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
describe "with various type of spaces" do
|
24
|
+
before do
|
25
|
+
@input = [
|
26
|
+
(0x0009..0x000D).to_a, # <control-0009>..<control-000D>
|
27
|
+
0x0020, # SPACE
|
28
|
+
0x0085, # <control-0085>
|
29
|
+
0x00A0, # NO-BREAK SPACE
|
30
|
+
0x1680, # OGHAM SPACE MARK
|
31
|
+
0x180E, # MONGOLIAN VOWEL SEPARATOR
|
32
|
+
(0x2000..0x200A).to_a, # EN QUAD..HAIR SPACE
|
33
|
+
0x2028, # LINE SEPARATOR
|
34
|
+
0x2029, # PARAGRAPH SEPARATOR
|
35
|
+
0x202F, # NARROW NO-BREAK SPACE
|
36
|
+
0x205F, # MEDIUM MATHEMATICAL SPACE
|
37
|
+
0x3000, # IDEOGRAPHIC SPACE
|
38
|
+
].flatten.collect{ |e| [e].pack 'U*' }
|
112
39
|
@output = @input.join.clean
|
113
40
|
end
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
41
|
+
if RUBY_VERSION.to_f>1.9
|
42
|
+
it "should output a valid UTF-8 string" do
|
43
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
44
|
+
expect(@output).to be_valid_encoding
|
45
|
+
end
|
46
|
+
end
|
47
|
+
it "should replace all spaces to normal spaces" do
|
48
|
+
expect(@output.clean).to eq " \n \n "
|
49
|
+
end
|
50
|
+
end
|
51
|
+
describe "with various no-width characters" do
|
52
|
+
before do
|
53
|
+
@input = [
|
54
|
+
0x200B, # ZERO WIDTH SPACE
|
55
|
+
0x200C, # ZERO WIDTH NON-JOINER
|
56
|
+
0x200D, # ZERO WIDTH JOINER
|
57
|
+
0x2060, # WORD JOINER
|
58
|
+
0xFEFF, # ZERO WIDTH NO-BREAK SPACE
|
59
|
+
].flatten.collect{ |e| [e].pack 'U*' }
|
60
|
+
@output = @input.join.clean
|
133
61
|
end
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
@input = ""
|
140
|
-
(0..255).each{|i| @input << i.chr}
|
141
|
-
@output = @input.clean
|
62
|
+
if RUBY_VERSION.to_f>1.9
|
63
|
+
it "should output a valid UTF-8 string" do
|
64
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
65
|
+
expect(@output).to be_valid_encoding
|
66
|
+
end
|
142
67
|
end
|
143
|
-
it "should
|
144
|
-
@output.
|
68
|
+
it "should remove no-width characters" do
|
69
|
+
expect(@output).to eq ""
|
145
70
|
end
|
146
71
|
end
|
147
72
|
describe "with invalid UTF-8 sequence" do
|
@@ -149,8 +74,14 @@ describe String::Cleaner do
|
|
149
74
|
@input = "\210\004"
|
150
75
|
@output = @input.clean
|
151
76
|
end
|
77
|
+
if RUBY_VERSION.to_f>1.9
|
78
|
+
it "should output a valid UTF-8 string" do
|
79
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
80
|
+
expect(@output).to be_valid_encoding
|
81
|
+
end
|
82
|
+
end
|
152
83
|
it "should replace invisible chars by space" do
|
153
|
-
@output.
|
84
|
+
expect(@output).to eq " "
|
154
85
|
end
|
155
86
|
end
|
156
87
|
describe "with mixed valid and invalid characters" do
|
@@ -158,8 +89,14 @@ describe String::Cleaner do
|
|
158
89
|
@input = "a?^?\xddf"
|
159
90
|
@output = @input.clean
|
160
91
|
end
|
92
|
+
if RUBY_VERSION.to_f>1.9
|
93
|
+
it "should output a valid UTF-8 string" do
|
94
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
95
|
+
expect(@output).to be_valid_encoding
|
96
|
+
end
|
97
|
+
end
|
161
98
|
it "should keep the valid characters" do
|
162
|
-
@output.
|
99
|
+
expect(@output).to eq "a?^?Ýf"
|
163
100
|
end
|
164
101
|
end
|
165
102
|
describe "with already valid characters" do
|
@@ -167,8 +104,14 @@ describe String::Cleaner do
|
|
167
104
|
@input = "\n\t\r\r\n\v\n"
|
168
105
|
@output = @input.clean
|
169
106
|
end
|
107
|
+
if RUBY_VERSION.to_f>1.9
|
108
|
+
it "should output a valid UTF-8 string" do
|
109
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
110
|
+
expect(@output).to be_valid_encoding
|
111
|
+
end
|
112
|
+
end
|
170
113
|
it "should replace invisible chars by space" do
|
171
|
-
@output.
|
114
|
+
expect(@output).to eq "\n \n\n \n"
|
172
115
|
end
|
173
116
|
end
|
174
117
|
describe "with watermarked text" do
|
@@ -176,36 +119,14 @@ describe String::Cleaner do
|
|
176
119
|
@input = "Here is \357\273\277a block \357\273\277of text \357\273\277inside of which a number will be hidden!"
|
177
120
|
@output = @input.clean
|
178
121
|
end
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
before :all do
|
185
|
-
@input = []
|
186
|
-
# "\uXXXX" doesn't exists yet on Ruby 1.8.6
|
187
|
-
@input << " " # SPACE
|
188
|
-
@input << "\xC2\xA0" # NO-BREAK SPACE
|
189
|
-
@input << "\xE2\x80\x80" # EN QUAD
|
190
|
-
@input << "\xE2\x80\x81" # EM QUAD
|
191
|
-
@input << "\xE2\x80\x82" # EN SPACE
|
192
|
-
@input << "\xE2\x80\x83" # EM SPACE
|
193
|
-
@input << "\xE2\x80\x84" # THREE-PER-EM SPACE
|
194
|
-
@input << "\xE2\x80\x85" # FOUR-PER-EM SPACE
|
195
|
-
@input << "\xE2\x80\x86" # SIX-PER-EM SPACE
|
196
|
-
@input << "\xE2\x80\x87" # FIGURE SPACE
|
197
|
-
@input << "\xE2\x80\x88" # PUNCTUATION SPACE
|
198
|
-
@input << "\xE2\x80\x89" # THIN SPACE
|
199
|
-
@input << "\xE2\x80\x8A" # HAIR SPACE
|
200
|
-
@input << "\xE2\x80\x8B" # ZERO WIDTH SPACE
|
201
|
-
@input << "\xE2\x80\xAF" # NARROW NO-BREAK SPACE
|
202
|
-
@input << "\xE2\x81\x9F" # MEDIUM MATHEMATICAL SPACE
|
203
|
-
@input << "\xE3\x80\x80" # IDEOGRAPHIC SPACE
|
204
|
-
@input << "\xEF\xBB\xBF" # ZERO WIDTH NO-BREAK SPACE
|
205
|
-
@output = @input.join.clean
|
122
|
+
if RUBY_VERSION.to_f>1.9
|
123
|
+
it "should output a valid UTF-8 string" do
|
124
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
125
|
+
expect(@output).to be_valid_encoding
|
126
|
+
end
|
206
127
|
end
|
207
128
|
it "should replace invisible chars by space" do
|
208
|
-
@output.
|
129
|
+
expect(@output).to eq "Here is a block of text inside of which a number will be hidden!"
|
209
130
|
end
|
210
131
|
end
|
211
132
|
describe "with euro sign from both ISO 8859-15 or Windows-1252" do
|
@@ -213,59 +134,65 @@ describe String::Cleaner do
|
|
213
134
|
@input = "\x80\xA4"
|
214
135
|
@output = @input.clean
|
215
136
|
end
|
137
|
+
if RUBY_VERSION.to_f>1.9
|
138
|
+
it "should output a valid UTF-8 string" do
|
139
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
140
|
+
expect(@output).to be_valid_encoding
|
141
|
+
end
|
142
|
+
end
|
216
143
|
it "should replace invisible chars by space" do
|
217
|
-
@output.
|
144
|
+
expect(@output).to eq "€€"
|
218
145
|
end
|
219
146
|
end
|
220
147
|
end
|
221
148
|
describe "#trim(chars = \"\")" do
|
222
149
|
it "should use #strip when used without params" do
|
223
|
-
string, expected = "",
|
224
|
-
string.
|
225
|
-
string.trim.
|
150
|
+
string, expected = "", double
|
151
|
+
expect(string).to receive(:strip).and_return expected
|
152
|
+
expect(string.trim).to be expected
|
226
153
|
end
|
227
154
|
it "should remove multiple characters at once from beginning and end" do
|
228
155
|
prefix, suffix = " rhuif dww f f", "dqz qafdédsj iowe fcms. qpo asttt t dtt"
|
229
156
|
to_remove = "acdeéfhijmopqrstuwz "
|
230
|
-
"#{prefix}d#{suffix}".trim(to_remove).
|
231
|
-
"#{prefix}D#{suffix}".trim(to_remove).
|
157
|
+
expect("#{prefix}d#{suffix}".trim(to_remove)).to eq "."
|
158
|
+
expect("#{prefix}D#{suffix}".trim(to_remove)).to eq "Ddqz qafdédsj iowe fcms."
|
232
159
|
end
|
233
160
|
end
|
234
161
|
describe "#fix_endlines" do
|
235
162
|
it "should convert windows endlines" do
|
236
|
-
"this is a\r\ntest\r\n".fix_endlines.
|
163
|
+
expect("this is a\r\ntest\r\n".fix_endlines).to eql "this is a\ntest\n"
|
237
164
|
end
|
238
165
|
it "should convert old mac endlines" do
|
239
|
-
"this is a\rtest\r".fix_endlines.
|
166
|
+
expect("this is a\rtest\r".fix_endlines).to eql "this is a\ntest\n"
|
240
167
|
end
|
241
168
|
it "should not modify proper linux endlines" do
|
242
|
-
"this is a\ntest\n".fix_endlines.
|
169
|
+
expect("this is a\ntest\n".fix_endlines).to eql "this is a\ntest\n"
|
243
170
|
end
|
244
171
|
it "should convert mixed endlines" do
|
245
|
-
"this is a\n\rtest\r\n".fix_endlines.
|
172
|
+
expect("this is a\n\rtest\r\n".fix_endlines).to eql "this is a\n\ntest\n"
|
246
173
|
end
|
247
174
|
end
|
248
175
|
describe "#to_permalink(separator=\"-\")" do
|
249
176
|
it "should create nice permalink for string with many accents" do
|
250
177
|
crazy = " ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüý - Hello world, I'm a crazy string!! "
|
251
|
-
crazy.to_permalink.
|
178
|
+
expect(crazy.to_permalink).to eq "aaaaaaceeeeiiiidnoooooxouuuuyaaaaaaceeeeiiiinoooooouuuuy-hello-world-i-m-a-crazy-string"
|
252
179
|
end
|
253
180
|
it "should create nice permalink even for evil string" do
|
254
181
|
evil = (128..255).inject(""){ |acc, b| acc += ("%c" % b) }
|
255
|
-
evil.to_permalink.
|
182
|
+
expect(evil.to_permalink).to eq "euros-cents-pounds-euros-yens-section-copyright-registered-trademark-degrees-approx-23-micro-paragraph-10-1-4-1-2-3-4-aaaaaaaeceeeeiiiidnoooooxouuuuythssaaaaaaaeceeeeiiiidnooooo-ouuuuythy"
|
256
183
|
end
|
257
184
|
it "should remove endlines too" do
|
258
|
-
"this\nis\ta\ntest".to_permalink("_").
|
185
|
+
expect("this\nis\ta\ntest".to_permalink("_")).to eq "this_is_a_test"
|
259
186
|
end
|
260
187
|
end
|
261
188
|
describe "#nl2br" do
|
262
189
|
it "should convert \n to <br/>\n" do
|
263
|
-
"this\nis\ta\ntest\r".nl2br.
|
190
|
+
expect("this\nis\ta\ntest\r".nl2br).to eq "this<br/>\nis\ta<br/>\ntest\r"
|
264
191
|
end
|
265
192
|
end
|
266
193
|
describe "#to_nicer_sym" do
|
267
194
|
it "should convert \"Select or Other\" to :select_or_other" do
|
268
|
-
"Select or Other".to_nicer_sym.
|
195
|
+
expect("Select or Other".to_nicer_sym).to be :select_or_other
|
269
196
|
end
|
270
197
|
end
|
271
|
-
end
|
198
|
+
end
|
data/string_cleaner.gemspec
CHANGED
@@ -2,12 +2,12 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{string_cleaner}
|
5
|
-
s.version = "0.
|
6
|
-
|
7
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
5
|
+
s.version = "1.0.0"
|
8
6
|
s.authors = ["Joseph Halter"]
|
9
7
|
s.date = %q{2010-10-18}
|
10
8
|
s.email = %q{joseph@openhood.com}
|
9
|
+
s.required_ruby_version = ">= 2.6"
|
10
|
+
s.license = "MIT"
|
11
11
|
s.extra_rdoc_files = [
|
12
12
|
"LICENSE",
|
13
13
|
"README.rdoc"
|
@@ -22,26 +22,14 @@ Gem::Specification.new do |s|
|
|
22
22
|
"spec/string_cleaner_spec.rb",
|
23
23
|
"string_cleaner.gemspec"
|
24
24
|
]
|
25
|
-
s.has_rdoc = true
|
26
25
|
s.homepage = %q{http://github.com/JosephHalter/string_cleaner}
|
27
|
-
s.rdoc_options = ["--charset=UTF-8"]
|
28
26
|
s.require_paths = ["lib"]
|
29
|
-
s.
|
30
|
-
s.summary = %q{Fix invalid UTF-8 and wipe invisible chars, fully compatible with Ruby 1.8 & 1.9 with extensive specs}
|
27
|
+
s.summary = %q{Fix invalid UTF-8 and wipe invisible chars, compatible with Ruby 2.6+ with extensive specs}
|
31
28
|
s.test_files = [
|
32
29
|
"spec/spec_helper.rb",
|
33
30
|
"spec/string_cleaner_spec.rb"
|
34
31
|
]
|
35
|
-
s.add_runtime_dependency "unidecoder"
|
32
|
+
s.add_runtime_dependency "talentbox-unidecoder", "2.0.0"
|
33
|
+
s.add_development_dependency "rake"
|
36
34
|
s.add_development_dependency "rspec"
|
37
|
-
|
38
|
-
if s.respond_to? :specification_version then
|
39
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
40
|
-
s.specification_version = 2
|
41
|
-
|
42
|
-
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
43
|
-
else
|
44
|
-
end
|
45
|
-
else
|
46
|
-
end
|
47
35
|
end
|
metadata
CHANGED
@@ -1,59 +1,66 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_cleaner
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
segments:
|
6
|
-
- 0
|
7
|
-
- 2
|
8
|
-
- 0
|
9
|
-
version: 0.2.0
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
10
5
|
platform: ruby
|
11
|
-
authors:
|
6
|
+
authors:
|
12
7
|
- Joseph Halter
|
13
8
|
autorequire:
|
14
9
|
bindir: bin
|
15
10
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
11
|
+
date: 2010-10-18 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: talentbox-unidecoder
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.0.0
|
20
|
+
type: :runtime
|
22
21
|
prerelease: false
|
23
|
-
|
24
|
-
|
25
|
-
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.0.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
26
31
|
- - ">="
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
|
29
|
-
|
30
|
-
version: "0"
|
31
|
-
type: :runtime
|
32
|
-
version_requirements: *id001
|
33
|
-
- !ruby/object:Gem::Dependency
|
34
|
-
name: rspec
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
35
|
prerelease: false
|
36
|
-
|
37
|
-
|
38
|
-
requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
39
38
|
- - ">="
|
40
|
-
- !ruby/object:Gem::Version
|
41
|
-
|
42
|
-
|
43
|
-
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
44
48
|
type: :development
|
45
|
-
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
46
55
|
description:
|
47
56
|
email: joseph@openhood.com
|
48
57
|
executables: []
|
49
|
-
|
50
58
|
extensions: []
|
51
|
-
|
52
|
-
extra_rdoc_files:
|
59
|
+
extra_rdoc_files:
|
53
60
|
- LICENSE
|
54
61
|
- README.rdoc
|
55
|
-
files:
|
56
|
-
- .gitignore
|
62
|
+
files:
|
63
|
+
- ".gitignore"
|
57
64
|
- LICENSE
|
58
65
|
- README.rdoc
|
59
66
|
- Rakefile
|
@@ -61,38 +68,30 @@ files:
|
|
61
68
|
- spec/spec_helper.rb
|
62
69
|
- spec/string_cleaner_spec.rb
|
63
70
|
- string_cleaner.gemspec
|
64
|
-
has_rdoc: true
|
65
71
|
homepage: http://github.com/JosephHalter/string_cleaner
|
66
|
-
licenses:
|
67
|
-
|
72
|
+
licenses:
|
73
|
+
- MIT
|
74
|
+
metadata: {}
|
68
75
|
post_install_message:
|
69
|
-
rdoc_options:
|
70
|
-
|
71
|
-
require_paths:
|
76
|
+
rdoc_options: []
|
77
|
+
require_paths:
|
72
78
|
- lib
|
73
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
74
|
-
|
75
|
-
requirements:
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
76
81
|
- - ">="
|
77
|
-
- !ruby/object:Gem::Version
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
|
-
none: false
|
83
|
-
requirements:
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '2.6'
|
84
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
84
86
|
- - ">="
|
85
|
-
- !ruby/object:Gem::Version
|
86
|
-
|
87
|
-
- 0
|
88
|
-
version: "0"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
89
|
requirements: []
|
90
|
-
|
91
|
-
rubyforge_project:
|
92
|
-
rubygems_version: 1.3.7
|
90
|
+
rubygems_version: 3.1.6
|
93
91
|
signing_key:
|
94
|
-
specification_version:
|
95
|
-
summary: Fix invalid UTF-8 and wipe invisible chars,
|
96
|
-
|
92
|
+
specification_version: 4
|
93
|
+
summary: Fix invalid UTF-8 and wipe invisible chars, compatible with Ruby 2.6+ with
|
94
|
+
extensive specs
|
95
|
+
test_files:
|
97
96
|
- spec/spec_helper.rb
|
98
97
|
- spec/string_cleaner_spec.rb
|