string_cleaner 0.2.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -1
- data/README.rdoc +4 -10
- data/Rakefile +8 -2
- data/lib/string_cleaner.rb +28 -20
- data/spec/string_cleaner_spec.rb +106 -179
- data/string_cleaner.gemspec +6 -18
- metadata +64 -65
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8451c9bc58c373d7b4a768cb3ed1ff355e5116f24b02c6a5bfeb69aa0549c589
|
4
|
+
data.tar.gz: b226d948238bd18cdacb01402c0576203da724b895c6745d3b713fae5d21f9ee
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 3a83693387d088db6f0b89ea73ecffbdf0b034885a0b2418d222c1197c48794b05cc8fb28f477bcfc4d345ee561d0ea3e1a543c34309ddb77ecc79a6adffe4fb
|
7
|
+
data.tar.gz: 3a35bab3513612149b345bf96d536d741ed3d4dfa757a353d3bc955782173139e8dac4c92b0bef025534706ca13e0ba17b14a30da0b10029d2653eeded5224f2
|
data/.gitignore
CHANGED
data/README.rdoc
CHANGED
@@ -6,21 +6,15 @@ Just add a method .clean to String which does:
|
|
6
6
|
* replace \r\n and \r with \n normalizing end of lines
|
7
7
|
* replace control characters and other invisible chars by spaces
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
sudo gem install JosephHalter-string_cleaner
|
12
|
-
|
13
|
-
== Ruby 1.9+
|
9
|
+
Supports only Ruby 2.6+
|
14
10
|
|
15
|
-
|
16
|
-
|
17
|
-
== Ruby 1.8.x
|
11
|
+
== Install
|
18
12
|
|
19
|
-
|
13
|
+
sudo gem install string_cleaner
|
20
14
|
|
21
15
|
== Example usage
|
22
16
|
|
23
|
-
|
17
|
+
"\210\004".clean # => " "
|
24
18
|
|
25
19
|
== Copyright
|
26
20
|
|
data/Rakefile
CHANGED
@@ -1,4 +1,11 @@
|
|
1
|
-
|
1
|
+
if RUBY_VERSION.to_f<1.9
|
2
|
+
require 'rake/tasklib'
|
3
|
+
require 'rake/rdoctask'
|
4
|
+
require 'rubygems'
|
5
|
+
else
|
6
|
+
require 'rdoc/task'
|
7
|
+
end
|
8
|
+
|
2
9
|
begin
|
3
10
|
require 'bundler/setup'
|
4
11
|
rescue LoadError
|
@@ -17,7 +24,6 @@ end
|
|
17
24
|
|
18
25
|
task :default => :spec
|
19
26
|
|
20
|
-
require 'rake/rdoctask'
|
21
27
|
Rake::RDocTask.new do |rdoc|
|
22
28
|
rdoc.rdoc_dir = 'rdoc'
|
23
29
|
rdoc.title = "string_cleaner"
|
data/lib/string_cleaner.rb
CHANGED
@@ -9,14 +9,13 @@ module String::Cleaner
|
|
9
9
|
|
10
10
|
def fix_encoding
|
11
11
|
utf8 = dup
|
12
|
-
if utf8.respond_to?(:force_encoding)
|
12
|
+
if utf8.respond_to?(:force_encoding)
|
13
13
|
utf8.force_encoding("UTF-8") # for Ruby 1.9+
|
14
14
|
unless utf8.valid_encoding? # if invalid UTF-8
|
15
|
-
utf8 = utf8.force_encoding("ISO8859-
|
15
|
+
utf8 = utf8.force_encoding("ISO8859-1")
|
16
16
|
utf8.encode!("UTF-8", :invalid => :replace, :undef => :replace, :replace => "")
|
17
|
-
utf8.gsub!("\xC2\x80", "€") # special case for euro sign from Windows-1252
|
18
|
-
utf8.force_encoding("UTF-8")
|
19
17
|
end
|
18
|
+
utf8.gsub!(/\u0080|¤/, "€") # special case for euro sign from Windows-1252
|
20
19
|
utf8
|
21
20
|
else
|
22
21
|
require "iconv"
|
@@ -25,7 +24,7 @@ module String::Cleaner
|
|
25
24
|
Iconv.new("UTF-8", "UTF-8").iconv(utf8)
|
26
25
|
rescue
|
27
26
|
utf8.gsub!(/\x80/n, "\xA4")
|
28
|
-
Iconv.new("UTF-8//IGNORE", "ISO8859-
|
27
|
+
Iconv.new("UTF-8//IGNORE", "ISO8859-1").iconv(utf8).gsub("¤", "€")
|
29
28
|
end
|
30
29
|
end
|
31
30
|
end
|
@@ -35,31 +34,40 @@ module String::Cleaner
|
|
35
34
|
end
|
36
35
|
|
37
36
|
SPECIAL_SPACES = [
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
37
|
+
0x00A0, # NO-BREAK SPACE
|
38
|
+
0x1680, # OGHAM SPACE MARK
|
39
|
+
0x180E, # MONGOLIAN VOWEL SEPARATOR
|
40
|
+
(0x2000..0x200A).to_a, # EN QUAD..HAIR SPACE
|
41
|
+
0x2028, # LINE SEPARATOR
|
42
|
+
0x2029, # PARAGRAPH SEPARATOR
|
43
|
+
0x202F, # NARROW NO-BREAK SPACE
|
44
|
+
0x205F, # MEDIUM MATHEMATICAL SPACE
|
45
|
+
0x3000, # IDEOGRAPHIC SPACE
|
46
|
+
].flatten.collect{|e| [e].pack 'U*'}
|
47
|
+
|
48
|
+
ZERO_WIDTH = [
|
49
|
+
0x200B, # ZERO WIDTH SPACE
|
50
|
+
0x200C, # ZERO WIDTH NON-JOINER
|
51
|
+
0x200D, # ZERO WIDTH JOINER
|
52
|
+
0x2060, # WORD JOINER
|
53
|
+
0xFEFF, # ZERO WIDTH NO-BREAK SPACE
|
54
|
+
].flatten.collect{|e| [e].pack 'U*'}
|
48
55
|
|
49
56
|
def fix_invisible_chars
|
50
57
|
utf8 = self.dup
|
51
|
-
|
58
|
+
utf8.gsub!(Regexp.new(ZERO_WIDTH.join("|")), "")
|
59
|
+
utf8 = if utf8.respond_to?(:force_encoding)
|
52
60
|
utf8 = (utf8 << " ").split(/\n/u).each{|line|
|
53
61
|
line.gsub!(/[\s\p{C}]/u, " ")
|
54
62
|
}.join("\n").chop!
|
55
|
-
utf8.gsub!(Regexp.new(SPECIAL_SPACES.join("|")), " ")
|
56
|
-
utf8.force_encoding("UTF-8")
|
57
63
|
else
|
58
64
|
require "oniguruma"
|
59
65
|
utf8.split(/\n/n).collect{|line|
|
60
|
-
Oniguruma::ORegexp.new("[\\
|
66
|
+
Oniguruma::ORegexp.new("[\\p{C}]", {:encoding => Oniguruma::ENCODING_UTF8}).gsub(line, " ")
|
61
67
|
}.join("\n").chop!
|
62
68
|
end
|
69
|
+
utf8.gsub!(Regexp.new(SPECIAL_SPACES.join("|") + "|\s"), " ")
|
70
|
+
utf8
|
63
71
|
end
|
64
72
|
|
65
73
|
def trim(chars = "")
|
@@ -67,7 +75,7 @@ module String::Cleaner
|
|
67
75
|
end
|
68
76
|
|
69
77
|
def to_permalink(separator="-")
|
70
|
-
|
78
|
+
clean.to_ascii(chartable).downcase.gsub(/[^a-z0-9]+/, separator).trim(separator)
|
71
79
|
end
|
72
80
|
|
73
81
|
def nl2br
|
data/spec/string_cleaner_spec.rb
CHANGED
@@ -1,147 +1,72 @@
|
|
1
|
-
|
2
|
-
require File.dirname(__FILE__) + "/spec_helper"
|
1
|
+
require "spec_helper"
|
3
2
|
|
4
|
-
describe String::Cleaner do
|
5
|
-
|
6
|
-
# specs for Ruby 1.9+
|
3
|
+
RSpec.describe String::Cleaner do
|
4
|
+
describe "#clean" do
|
7
5
|
describe "with all 8-bit characters" do
|
8
6
|
before :all do
|
9
|
-
@input = ""
|
7
|
+
@input = ""
|
8
|
+
@input.force_encoding("ISO8859-15") if @input.respond_to?(:force_encoding)
|
10
9
|
(0..255).each{|i| @input << i.chr}
|
11
|
-
@input.force_encoding("UTF-8")
|
10
|
+
@input.force_encoding("UTF-8") if @input.respond_to?(:force_encoding)
|
12
11
|
@output = @input.clean
|
13
12
|
end
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
if RUBY_VERSION.to_f>1.9
|
14
|
+
it "should output a valid UTF-8 string" do
|
15
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
16
|
+
expect(@output).to be_valid_encoding
|
17
|
+
end
|
17
18
|
end
|
18
19
|
it "should wipe out the control characters" do
|
19
|
-
@output.
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
end
|
39
|
-
describe "with invalid UTF-8 sequence" do
|
40
|
-
before :all do
|
41
|
-
@input = "\210\004"
|
42
|
-
@output = @input.clean
|
43
|
-
end
|
44
|
-
it "should output a valid UTF-8 string" do
|
45
|
-
@output.encoding.name.should == "UTF-8"
|
46
|
-
@output.should be_valid_encoding
|
47
|
-
end
|
48
|
-
it "should replace invisible chars by space" do
|
49
|
-
@output.should == " "
|
50
|
-
end
|
51
|
-
end
|
52
|
-
describe "with mixed valid and invalid characters" do
|
53
|
-
before :all do
|
54
|
-
@input = "a?^?\xddf"
|
55
|
-
@output = @input.clean
|
56
|
-
end
|
57
|
-
it "should output a valid UTF-8 string" do
|
58
|
-
@output.encoding.name.should == "UTF-8"
|
59
|
-
@output.should be_valid_encoding
|
60
|
-
end
|
61
|
-
it "should keep the valid characters" do
|
62
|
-
@output.should == "a?^?Ýf"
|
63
|
-
end
|
64
|
-
end
|
65
|
-
describe "with already valid characters" do
|
66
|
-
before :all do
|
67
|
-
@input = "\n\t\r\r\n\v\n"
|
68
|
-
@output = @input.clean
|
69
|
-
end
|
70
|
-
it "should output a valid UTF-8 string" do
|
71
|
-
@output.encoding.name.should == "UTF-8"
|
72
|
-
@output.should be_valid_encoding
|
73
|
-
end
|
74
|
-
it "should replace invisible chars by space" do
|
75
|
-
@output.should == "\n \n\n \n"
|
76
|
-
end
|
77
|
-
end
|
78
|
-
describe "with watermarked text" do
|
79
|
-
before :all do
|
80
|
-
@input = "Here is \357\273\277a block \357\273\277of text \357\273\277inside of which a number will be hidden!"
|
81
|
-
@output = @input.clean
|
82
|
-
end
|
83
|
-
it "should output a valid UTF-8 string" do
|
84
|
-
@output.encoding.name.should == "UTF-8"
|
85
|
-
@output.should be_valid_encoding
|
86
|
-
end
|
87
|
-
it "should replace invisible chars by space" do
|
88
|
-
@output.should == "Here is a block of text inside of which a number will be hidden!"
|
89
|
-
end
|
90
|
-
end
|
91
|
-
describe "with unusual valid spaces" do
|
92
|
-
before :all do
|
93
|
-
@input = []
|
94
|
-
@input << "\u0020" # SPACE
|
95
|
-
@input << "\u00A0" # NO-BREAK SPACE
|
96
|
-
@input << "\u2000" # EN QUAD
|
97
|
-
@input << "\u2001" # EM QUAD
|
98
|
-
@input << "\u2002" # EN SPACE
|
99
|
-
@input << "\u2003" # EM SPACE
|
100
|
-
@input << "\u2004" # THREE-PER-EM SPACE
|
101
|
-
@input << "\u2005" # FOUR-PER-EM SPACE
|
102
|
-
@input << "\u2006" # SIX-PER-EM SPACE
|
103
|
-
@input << "\u2007" # FIGURE SPACE
|
104
|
-
@input << "\u2008" # PUNCTUATION SPACE
|
105
|
-
@input << "\u2009" # THIN SPACE
|
106
|
-
@input << "\u200A" # HAIR SPACE
|
107
|
-
@input << "\u200B" # ZERO WIDTH SPACE
|
108
|
-
@input << "\u202F" # NARROW NO-BREAK SPACE
|
109
|
-
@input << "\u205F" # MEDIUM MATHEMATICAL SPACE
|
110
|
-
@input << "\u3000" # IDEOGRAPHIC SPACE
|
111
|
-
@input << "\uFEFF" # ZERO WIDTH NO-BREAK SPACE
|
20
|
+
expect(@output).to eq " \n \n !\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ € ¡¢£€¥¦§¨©ª«¬ ®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
describe "with various type of spaces" do
|
24
|
+
before do
|
25
|
+
@input = [
|
26
|
+
(0x0009..0x000D).to_a, # <control-0009>..<control-000D>
|
27
|
+
0x0020, # SPACE
|
28
|
+
0x0085, # <control-0085>
|
29
|
+
0x00A0, # NO-BREAK SPACE
|
30
|
+
0x1680, # OGHAM SPACE MARK
|
31
|
+
0x180E, # MONGOLIAN VOWEL SEPARATOR
|
32
|
+
(0x2000..0x200A).to_a, # EN QUAD..HAIR SPACE
|
33
|
+
0x2028, # LINE SEPARATOR
|
34
|
+
0x2029, # PARAGRAPH SEPARATOR
|
35
|
+
0x202F, # NARROW NO-BREAK SPACE
|
36
|
+
0x205F, # MEDIUM MATHEMATICAL SPACE
|
37
|
+
0x3000, # IDEOGRAPHIC SPACE
|
38
|
+
].flatten.collect{ |e| [e].pack 'U*' }
|
112
39
|
@output = @input.join.clean
|
113
40
|
end
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
41
|
+
if RUBY_VERSION.to_f>1.9
|
42
|
+
it "should output a valid UTF-8 string" do
|
43
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
44
|
+
expect(@output).to be_valid_encoding
|
45
|
+
end
|
46
|
+
end
|
47
|
+
it "should replace all spaces to normal spaces" do
|
48
|
+
expect(@output.clean).to eq " \n \n "
|
49
|
+
end
|
50
|
+
end
|
51
|
+
describe "with various no-width characters" do
|
52
|
+
before do
|
53
|
+
@input = [
|
54
|
+
0x200B, # ZERO WIDTH SPACE
|
55
|
+
0x200C, # ZERO WIDTH NON-JOINER
|
56
|
+
0x200D, # ZERO WIDTH JOINER
|
57
|
+
0x2060, # WORD JOINER
|
58
|
+
0xFEFF, # ZERO WIDTH NO-BREAK SPACE
|
59
|
+
].flatten.collect{ |e| [e].pack 'U*' }
|
60
|
+
@output = @input.join.clean
|
133
61
|
end
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
@input = ""
|
140
|
-
(0..255).each{|i| @input << i.chr}
|
141
|
-
@output = @input.clean
|
62
|
+
if RUBY_VERSION.to_f>1.9
|
63
|
+
it "should output a valid UTF-8 string" do
|
64
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
65
|
+
expect(@output).to be_valid_encoding
|
66
|
+
end
|
142
67
|
end
|
143
|
-
it "should
|
144
|
-
@output.
|
68
|
+
it "should remove no-width characters" do
|
69
|
+
expect(@output).to eq ""
|
145
70
|
end
|
146
71
|
end
|
147
72
|
describe "with invalid UTF-8 sequence" do
|
@@ -149,8 +74,14 @@ describe String::Cleaner do
|
|
149
74
|
@input = "\210\004"
|
150
75
|
@output = @input.clean
|
151
76
|
end
|
77
|
+
if RUBY_VERSION.to_f>1.9
|
78
|
+
it "should output a valid UTF-8 string" do
|
79
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
80
|
+
expect(@output).to be_valid_encoding
|
81
|
+
end
|
82
|
+
end
|
152
83
|
it "should replace invisible chars by space" do
|
153
|
-
@output.
|
84
|
+
expect(@output).to eq " "
|
154
85
|
end
|
155
86
|
end
|
156
87
|
describe "with mixed valid and invalid characters" do
|
@@ -158,8 +89,14 @@ describe String::Cleaner do
|
|
158
89
|
@input = "a?^?\xddf"
|
159
90
|
@output = @input.clean
|
160
91
|
end
|
92
|
+
if RUBY_VERSION.to_f>1.9
|
93
|
+
it "should output a valid UTF-8 string" do
|
94
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
95
|
+
expect(@output).to be_valid_encoding
|
96
|
+
end
|
97
|
+
end
|
161
98
|
it "should keep the valid characters" do
|
162
|
-
@output.
|
99
|
+
expect(@output).to eq "a?^?Ýf"
|
163
100
|
end
|
164
101
|
end
|
165
102
|
describe "with already valid characters" do
|
@@ -167,8 +104,14 @@ describe String::Cleaner do
|
|
167
104
|
@input = "\n\t\r\r\n\v\n"
|
168
105
|
@output = @input.clean
|
169
106
|
end
|
107
|
+
if RUBY_VERSION.to_f>1.9
|
108
|
+
it "should output a valid UTF-8 string" do
|
109
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
110
|
+
expect(@output).to be_valid_encoding
|
111
|
+
end
|
112
|
+
end
|
170
113
|
it "should replace invisible chars by space" do
|
171
|
-
@output.
|
114
|
+
expect(@output).to eq "\n \n\n \n"
|
172
115
|
end
|
173
116
|
end
|
174
117
|
describe "with watermarked text" do
|
@@ -176,36 +119,14 @@ describe String::Cleaner do
|
|
176
119
|
@input = "Here is \357\273\277a block \357\273\277of text \357\273\277inside of which a number will be hidden!"
|
177
120
|
@output = @input.clean
|
178
121
|
end
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
before :all do
|
185
|
-
@input = []
|
186
|
-
# "\uXXXX" doesn't exists yet on Ruby 1.8.6
|
187
|
-
@input << " " # SPACE
|
188
|
-
@input << "\xC2\xA0" # NO-BREAK SPACE
|
189
|
-
@input << "\xE2\x80\x80" # EN QUAD
|
190
|
-
@input << "\xE2\x80\x81" # EM QUAD
|
191
|
-
@input << "\xE2\x80\x82" # EN SPACE
|
192
|
-
@input << "\xE2\x80\x83" # EM SPACE
|
193
|
-
@input << "\xE2\x80\x84" # THREE-PER-EM SPACE
|
194
|
-
@input << "\xE2\x80\x85" # FOUR-PER-EM SPACE
|
195
|
-
@input << "\xE2\x80\x86" # SIX-PER-EM SPACE
|
196
|
-
@input << "\xE2\x80\x87" # FIGURE SPACE
|
197
|
-
@input << "\xE2\x80\x88" # PUNCTUATION SPACE
|
198
|
-
@input << "\xE2\x80\x89" # THIN SPACE
|
199
|
-
@input << "\xE2\x80\x8A" # HAIR SPACE
|
200
|
-
@input << "\xE2\x80\x8B" # ZERO WIDTH SPACE
|
201
|
-
@input << "\xE2\x80\xAF" # NARROW NO-BREAK SPACE
|
202
|
-
@input << "\xE2\x81\x9F" # MEDIUM MATHEMATICAL SPACE
|
203
|
-
@input << "\xE3\x80\x80" # IDEOGRAPHIC SPACE
|
204
|
-
@input << "\xEF\xBB\xBF" # ZERO WIDTH NO-BREAK SPACE
|
205
|
-
@output = @input.join.clean
|
122
|
+
if RUBY_VERSION.to_f>1.9
|
123
|
+
it "should output a valid UTF-8 string" do
|
124
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
125
|
+
expect(@output).to be_valid_encoding
|
126
|
+
end
|
206
127
|
end
|
207
128
|
it "should replace invisible chars by space" do
|
208
|
-
@output.
|
129
|
+
expect(@output).to eq "Here is a block of text inside of which a number will be hidden!"
|
209
130
|
end
|
210
131
|
end
|
211
132
|
describe "with euro sign from both ISO 8859-15 or Windows-1252" do
|
@@ -213,59 +134,65 @@ describe String::Cleaner do
|
|
213
134
|
@input = "\x80\xA4"
|
214
135
|
@output = @input.clean
|
215
136
|
end
|
137
|
+
if RUBY_VERSION.to_f>1.9
|
138
|
+
it "should output a valid UTF-8 string" do
|
139
|
+
expect(@output.encoding.name).to eq "UTF-8"
|
140
|
+
expect(@output).to be_valid_encoding
|
141
|
+
end
|
142
|
+
end
|
216
143
|
it "should replace invisible chars by space" do
|
217
|
-
@output.
|
144
|
+
expect(@output).to eq "€€"
|
218
145
|
end
|
219
146
|
end
|
220
147
|
end
|
221
148
|
describe "#trim(chars = \"\")" do
|
222
149
|
it "should use #strip when used without params" do
|
223
|
-
string, expected = "",
|
224
|
-
string.
|
225
|
-
string.trim.
|
150
|
+
string, expected = "", double
|
151
|
+
expect(string).to receive(:strip).and_return expected
|
152
|
+
expect(string.trim).to be expected
|
226
153
|
end
|
227
154
|
it "should remove multiple characters at once from beginning and end" do
|
228
155
|
prefix, suffix = " rhuif dww f f", "dqz qafdédsj iowe fcms. qpo asttt t dtt"
|
229
156
|
to_remove = "acdeéfhijmopqrstuwz "
|
230
|
-
"#{prefix}d#{suffix}".trim(to_remove).
|
231
|
-
"#{prefix}D#{suffix}".trim(to_remove).
|
157
|
+
expect("#{prefix}d#{suffix}".trim(to_remove)).to eq "."
|
158
|
+
expect("#{prefix}D#{suffix}".trim(to_remove)).to eq "Ddqz qafdédsj iowe fcms."
|
232
159
|
end
|
233
160
|
end
|
234
161
|
describe "#fix_endlines" do
|
235
162
|
it "should convert windows endlines" do
|
236
|
-
"this is a\r\ntest\r\n".fix_endlines.
|
163
|
+
expect("this is a\r\ntest\r\n".fix_endlines).to eql "this is a\ntest\n"
|
237
164
|
end
|
238
165
|
it "should convert old mac endlines" do
|
239
|
-
"this is a\rtest\r".fix_endlines.
|
166
|
+
expect("this is a\rtest\r".fix_endlines).to eql "this is a\ntest\n"
|
240
167
|
end
|
241
168
|
it "should not modify proper linux endlines" do
|
242
|
-
"this is a\ntest\n".fix_endlines.
|
169
|
+
expect("this is a\ntest\n".fix_endlines).to eql "this is a\ntest\n"
|
243
170
|
end
|
244
171
|
it "should convert mixed endlines" do
|
245
|
-
"this is a\n\rtest\r\n".fix_endlines.
|
172
|
+
expect("this is a\n\rtest\r\n".fix_endlines).to eql "this is a\n\ntest\n"
|
246
173
|
end
|
247
174
|
end
|
248
175
|
describe "#to_permalink(separator=\"-\")" do
|
249
176
|
it "should create nice permalink for string with many accents" do
|
250
177
|
crazy = " ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüý - Hello world, I'm a crazy string!! "
|
251
|
-
crazy.to_permalink.
|
178
|
+
expect(crazy.to_permalink).to eq "aaaaaaceeeeiiiidnoooooxouuuuyaaaaaaceeeeiiiinoooooouuuuy-hello-world-i-m-a-crazy-string"
|
252
179
|
end
|
253
180
|
it "should create nice permalink even for evil string" do
|
254
181
|
evil = (128..255).inject(""){ |acc, b| acc += ("%c" % b) }
|
255
|
-
evil.to_permalink.
|
182
|
+
expect(evil.to_permalink).to eq "euros-cents-pounds-euros-yens-section-copyright-registered-trademark-degrees-approx-23-micro-paragraph-10-1-4-1-2-3-4-aaaaaaaeceeeeiiiidnoooooxouuuuythssaaaaaaaeceeeeiiiidnooooo-ouuuuythy"
|
256
183
|
end
|
257
184
|
it "should remove endlines too" do
|
258
|
-
"this\nis\ta\ntest".to_permalink("_").
|
185
|
+
expect("this\nis\ta\ntest".to_permalink("_")).to eq "this_is_a_test"
|
259
186
|
end
|
260
187
|
end
|
261
188
|
describe "#nl2br" do
|
262
189
|
it "should convert \n to <br/>\n" do
|
263
|
-
"this\nis\ta\ntest\r".nl2br.
|
190
|
+
expect("this\nis\ta\ntest\r".nl2br).to eq "this<br/>\nis\ta<br/>\ntest\r"
|
264
191
|
end
|
265
192
|
end
|
266
193
|
describe "#to_nicer_sym" do
|
267
194
|
it "should convert \"Select or Other\" to :select_or_other" do
|
268
|
-
"Select or Other".to_nicer_sym.
|
195
|
+
expect("Select or Other".to_nicer_sym).to be :select_or_other
|
269
196
|
end
|
270
197
|
end
|
271
|
-
end
|
198
|
+
end
|
data/string_cleaner.gemspec
CHANGED
@@ -2,12 +2,12 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{string_cleaner}
|
5
|
-
s.version = "0.
|
6
|
-
|
7
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
5
|
+
s.version = "1.0.0"
|
8
6
|
s.authors = ["Joseph Halter"]
|
9
7
|
s.date = %q{2010-10-18}
|
10
8
|
s.email = %q{joseph@openhood.com}
|
9
|
+
s.required_ruby_version = ">= 2.6"
|
10
|
+
s.license = "MIT"
|
11
11
|
s.extra_rdoc_files = [
|
12
12
|
"LICENSE",
|
13
13
|
"README.rdoc"
|
@@ -22,26 +22,14 @@ Gem::Specification.new do |s|
|
|
22
22
|
"spec/string_cleaner_spec.rb",
|
23
23
|
"string_cleaner.gemspec"
|
24
24
|
]
|
25
|
-
s.has_rdoc = true
|
26
25
|
s.homepage = %q{http://github.com/JosephHalter/string_cleaner}
|
27
|
-
s.rdoc_options = ["--charset=UTF-8"]
|
28
26
|
s.require_paths = ["lib"]
|
29
|
-
s.
|
30
|
-
s.summary = %q{Fix invalid UTF-8 and wipe invisible chars, fully compatible with Ruby 1.8 & 1.9 with extensive specs}
|
27
|
+
s.summary = %q{Fix invalid UTF-8 and wipe invisible chars, compatible with Ruby 2.6+ with extensive specs}
|
31
28
|
s.test_files = [
|
32
29
|
"spec/spec_helper.rb",
|
33
30
|
"spec/string_cleaner_spec.rb"
|
34
31
|
]
|
35
|
-
s.add_runtime_dependency "unidecoder"
|
32
|
+
s.add_runtime_dependency "talentbox-unidecoder", "2.0.0"
|
33
|
+
s.add_development_dependency "rake"
|
36
34
|
s.add_development_dependency "rspec"
|
37
|
-
|
38
|
-
if s.respond_to? :specification_version then
|
39
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
40
|
-
s.specification_version = 2
|
41
|
-
|
42
|
-
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
43
|
-
else
|
44
|
-
end
|
45
|
-
else
|
46
|
-
end
|
47
35
|
end
|
metadata
CHANGED
@@ -1,59 +1,66 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_cleaner
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
segments:
|
6
|
-
- 0
|
7
|
-
- 2
|
8
|
-
- 0
|
9
|
-
version: 0.2.0
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
10
5
|
platform: ruby
|
11
|
-
authors:
|
6
|
+
authors:
|
12
7
|
- Joseph Halter
|
13
8
|
autorequire:
|
14
9
|
bindir: bin
|
15
10
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
11
|
+
date: 2010-10-18 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: talentbox-unidecoder
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.0.0
|
20
|
+
type: :runtime
|
22
21
|
prerelease: false
|
23
|
-
|
24
|
-
|
25
|
-
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.0.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
26
31
|
- - ">="
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
|
29
|
-
|
30
|
-
version: "0"
|
31
|
-
type: :runtime
|
32
|
-
version_requirements: *id001
|
33
|
-
- !ruby/object:Gem::Dependency
|
34
|
-
name: rspec
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
35
|
prerelease: false
|
36
|
-
|
37
|
-
|
38
|
-
requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
39
38
|
- - ">="
|
40
|
-
- !ruby/object:Gem::Version
|
41
|
-
|
42
|
-
|
43
|
-
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
44
48
|
type: :development
|
45
|
-
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
46
55
|
description:
|
47
56
|
email: joseph@openhood.com
|
48
57
|
executables: []
|
49
|
-
|
50
58
|
extensions: []
|
51
|
-
|
52
|
-
extra_rdoc_files:
|
59
|
+
extra_rdoc_files:
|
53
60
|
- LICENSE
|
54
61
|
- README.rdoc
|
55
|
-
files:
|
56
|
-
- .gitignore
|
62
|
+
files:
|
63
|
+
- ".gitignore"
|
57
64
|
- LICENSE
|
58
65
|
- README.rdoc
|
59
66
|
- Rakefile
|
@@ -61,38 +68,30 @@ files:
|
|
61
68
|
- spec/spec_helper.rb
|
62
69
|
- spec/string_cleaner_spec.rb
|
63
70
|
- string_cleaner.gemspec
|
64
|
-
has_rdoc: true
|
65
71
|
homepage: http://github.com/JosephHalter/string_cleaner
|
66
|
-
licenses:
|
67
|
-
|
72
|
+
licenses:
|
73
|
+
- MIT
|
74
|
+
metadata: {}
|
68
75
|
post_install_message:
|
69
|
-
rdoc_options:
|
70
|
-
|
71
|
-
require_paths:
|
76
|
+
rdoc_options: []
|
77
|
+
require_paths:
|
72
78
|
- lib
|
73
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
74
|
-
|
75
|
-
requirements:
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
76
81
|
- - ">="
|
77
|
-
- !ruby/object:Gem::Version
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
|
-
none: false
|
83
|
-
requirements:
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '2.6'
|
84
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
84
86
|
- - ">="
|
85
|
-
- !ruby/object:Gem::Version
|
86
|
-
|
87
|
-
- 0
|
88
|
-
version: "0"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
89
|
requirements: []
|
90
|
-
|
91
|
-
rubyforge_project:
|
92
|
-
rubygems_version: 1.3.7
|
90
|
+
rubygems_version: 3.1.6
|
93
91
|
signing_key:
|
94
|
-
specification_version:
|
95
|
-
summary: Fix invalid UTF-8 and wipe invisible chars,
|
96
|
-
|
92
|
+
specification_version: 4
|
93
|
+
summary: Fix invalid UTF-8 and wipe invisible chars, compatible with Ruby 2.6+ with
|
94
|
+
extensive specs
|
95
|
+
test_files:
|
97
96
|
- spec/spec_helper.rb
|
98
97
|
- spec/string_cleaner_spec.rb
|