string_cleaner 0.2.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 8451c9bc58c373d7b4a768cb3ed1ff355e5116f24b02c6a5bfeb69aa0549c589
4
+ data.tar.gz: b226d948238bd18cdacb01402c0576203da724b895c6745d3b713fae5d21f9ee
5
+ SHA512:
6
+ metadata.gz: 3a83693387d088db6f0b89ea73ecffbdf0b034885a0b2418d222c1197c48794b05cc8fb28f477bcfc4d345ee561d0ea3e1a543c34309ddb77ecc79a6adffe4fb
7
+ data.tar.gz: 3a35bab3513612149b345bf96d536d741ed3d4dfa757a353d3bc955782173139e8dac4c92b0bef025534706ca13e0ba17b14a30da0b10029d2653eeded5224f2
data/.gitignore CHANGED
@@ -3,4 +3,5 @@
3
3
  coverage
4
4
  rdoc
5
5
  pkg
6
- *.gem
6
+ *.gem
7
+ Gemfile.lock
data/README.rdoc CHANGED
@@ -6,21 +6,15 @@ Just add a method .clean to String which does:
6
6
  * replace \r\n and \r with \n normalizing end of lines
7
7
  * replace control characters and other invisible chars by spaces
8
8
 
9
- == Install
10
-
11
- sudo gem install JosephHalter-string_cleaner
12
-
13
- == Ruby 1.9+
9
+ Supports only Ruby 2.6+
14
10
 
15
- Ruby 1.9+ has native support for unicode and specs are 100% passing.
16
-
17
- == Ruby 1.8.x
11
+ == Install
18
12
 
19
- Because Ruby 1.8.x has no native support for Unicode, you must install oniguruma and the jasherai-oniguruma gem.
13
+ sudo gem install string_cleaner
20
14
 
21
15
  == Example usage
22
16
 
23
- "\210\004".clean # => " "
17
+ "\210\004".clean # => " "
24
18
 
25
19
  == Copyright
26
20
 
data/Rakefile CHANGED
@@ -1,4 +1,11 @@
1
- require 'rubygems'
1
+ if RUBY_VERSION.to_f<1.9
2
+ require 'rake/tasklib'
3
+ require 'rake/rdoctask'
4
+ require 'rubygems'
5
+ else
6
+ require 'rdoc/task'
7
+ end
8
+
2
9
  begin
3
10
  require 'bundler/setup'
4
11
  rescue LoadError
@@ -17,7 +24,6 @@ end
17
24
 
18
25
  task :default => :spec
19
26
 
20
- require 'rake/rdoctask'
21
27
  Rake::RDocTask.new do |rdoc|
22
28
  rdoc.rdoc_dir = 'rdoc'
23
29
  rdoc.title = "string_cleaner"
@@ -9,14 +9,13 @@ module String::Cleaner
9
9
 
10
10
  def fix_encoding
11
11
  utf8 = dup
12
- if utf8.respond_to?(:force_encoding)
12
+ if utf8.respond_to?(:force_encoding)
13
13
  utf8.force_encoding("UTF-8") # for Ruby 1.9+
14
14
  unless utf8.valid_encoding? # if invalid UTF-8
15
- utf8 = utf8.force_encoding("ISO8859-15")
15
+ utf8 = utf8.force_encoding("ISO8859-1")
16
16
  utf8.encode!("UTF-8", :invalid => :replace, :undef => :replace, :replace => "")
17
- utf8.gsub!("\xC2\x80", "€") # special case for euro sign from Windows-1252
18
- utf8.force_encoding("UTF-8")
19
17
  end
18
+ utf8.gsub!(/\u0080|¤/, "€") # special case for euro sign from Windows-1252
20
19
  utf8
21
20
  else
22
21
  require "iconv"
@@ -25,7 +24,7 @@ module String::Cleaner
25
24
  Iconv.new("UTF-8", "UTF-8").iconv(utf8)
26
25
  rescue
27
26
  utf8.gsub!(/\x80/n, "\xA4")
28
- Iconv.new("UTF-8//IGNORE", "ISO8859-15").iconv(utf8)
27
+ Iconv.new("UTF-8//IGNORE", "ISO8859-1").iconv(utf8).gsub("¤", "€")
29
28
  end
30
29
  end
31
30
  end
@@ -35,31 +34,40 @@ module String::Cleaner
35
34
  end
36
35
 
37
36
  SPECIAL_SPACES = [
38
- 0x00A0, # White_Space # Zs NO-BREAK SPACE
39
- 0x1680, # White_Space # Zs OGHAM SPACE MARK
40
- 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR
41
- (0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE
42
- 0x2028, # White_Space # Zl LINE SEPARATOR
43
- 0x2029, # White_Space # Zp PARAGRAPH SEPARATOR
44
- 0x202F, # White_Space # Zs NARROW NO-BREAK SPACE
45
- 0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE
46
- 0x3000, # White_Space # Zs IDEOGRAPHIC SPACE
47
- ].flatten.collect{|e| [e].pack 'U*'}
37
+ 0x00A0, # NO-BREAK SPACE
38
+ 0x1680, # OGHAM SPACE MARK
39
+ 0x180E, # MONGOLIAN VOWEL SEPARATOR
40
+ (0x2000..0x200A).to_a, # EN QUAD..HAIR SPACE
41
+ 0x2028, # LINE SEPARATOR
42
+ 0x2029, # PARAGRAPH SEPARATOR
43
+ 0x202F, # NARROW NO-BREAK SPACE
44
+ 0x205F, # MEDIUM MATHEMATICAL SPACE
45
+ 0x3000, # IDEOGRAPHIC SPACE
46
+ ].flatten.collect{|e| [e].pack 'U*'}
47
+
48
+ ZERO_WIDTH = [
49
+ 0x200B, # ZERO WIDTH SPACE
50
+ 0x200C, # ZERO WIDTH NON-JOINER
51
+ 0x200D, # ZERO WIDTH JOINER
52
+ 0x2060, # WORD JOINER
53
+ 0xFEFF, # ZERO WIDTH NO-BREAK SPACE
54
+ ].flatten.collect{|e| [e].pack 'U*'}
48
55
 
49
56
  def fix_invisible_chars
50
57
  utf8 = self.dup
51
- if utf8.respond_to?(:force_encoding)
58
+ utf8.gsub!(Regexp.new(ZERO_WIDTH.join("|")), "")
59
+ utf8 = if utf8.respond_to?(:force_encoding)
52
60
  utf8 = (utf8 << " ").split(/\n/u).each{|line|
53
61
  line.gsub!(/[\s\p{C}]/u, " ")
54
62
  }.join("\n").chop!
55
- utf8.gsub!(Regexp.new(SPECIAL_SPACES.join("|")), " ")
56
- utf8.force_encoding("UTF-8")
57
63
  else
58
64
  require "oniguruma"
59
65
  utf8.split(/\n/n).collect{|line|
60
- Oniguruma::ORegexp.new("[\\s\\p{C}]", {:encoding => Oniguruma::ENCODING_UTF8}).gsub(line, " ")
66
+ Oniguruma::ORegexp.new("[\\p{C}]", {:encoding => Oniguruma::ENCODING_UTF8}).gsub(line, " ")
61
67
  }.join("\n").chop!
62
68
  end
69
+ utf8.gsub!(Regexp.new(SPECIAL_SPACES.join("|") + "|\s"), " ")
70
+ utf8
63
71
  end
64
72
 
65
73
  def trim(chars = "")
@@ -67,7 +75,7 @@ module String::Cleaner
67
75
  end
68
76
 
69
77
  def to_permalink(separator="-")
70
- fix_endlines.to_ascii(chartable).downcase.gsub(/[^a-z0-9]+/, separator).trim(separator)
78
+ clean.to_ascii(chartable).downcase.gsub(/[^a-z0-9]+/, separator).trim(separator)
71
79
  end
72
80
 
73
81
  def nl2br
@@ -1,147 +1,72 @@
1
- # encoding: UTF-8
2
- require File.dirname(__FILE__) + "/spec_helper"
1
+ require "spec_helper"
3
2
 
4
- describe String::Cleaner do
5
- if "".respond_to?(:force_encoding)
6
- # specs for Ruby 1.9+
3
+ RSpec.describe String::Cleaner do
4
+ describe "#clean" do
7
5
  describe "with all 8-bit characters" do
8
6
  before :all do
9
- @input = "".force_encoding("ISO8859-15")
7
+ @input = ""
8
+ @input.force_encoding("ISO8859-15") if @input.respond_to?(:force_encoding)
10
9
  (0..255).each{|i| @input << i.chr}
11
- @input.force_encoding("UTF-8")
10
+ @input.force_encoding("UTF-8") if @input.respond_to?(:force_encoding)
12
11
  @output = @input.clean
13
12
  end
14
- it "should output a valid UTF-8 string" do
15
- @output.encoding.name.should == "UTF-8"
16
- @output.should be_valid_encoding
13
+ if RUBY_VERSION.to_f>1.9
14
+ it "should output a valid UTF-8 string" do
15
+ expect(@output.encoding.name).to eq "UTF-8"
16
+ expect(@output).to be_valid_encoding
17
+ end
17
18
  end
18
19
  it "should wipe out the control characters" do
19
- @output.should == " \n \n !\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ € ¡¢£€¥Š§š©ª«¬ ®¯°±²³Žµ¶·ž¹º»ŒœŸ¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
20
- end
21
- end
22
- it "should convert all type of spaces to normal spaces" do
23
- input = [
24
- (0x0009..0x000D).to_a, # White_Space # Cc [5] <control-0009>..<control-000D>
25
- 0x0020, # White_Space # Zs SPACE
26
- 0x0085, # White_Space # Cc <control-0085>
27
- 0x00A0, # White_Space # Zs NO-BREAK SPACE
28
- 0x1680, # White_Space # Zs OGHAM SPACE MARK
29
- 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR
30
- (0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE
31
- 0x2028, # White_Space # Zl LINE SEPARATOR
32
- 0x2029, # White_Space # Zp PARAGRAPH SEPARATOR
33
- 0x202F, # White_Space # Zs NARROW NO-BREAK SPACE
34
- 0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE
35
- 0x3000, # White_Space # Zs IDEOGRAPHIC SPACE
36
- ].flatten.collect{ |e| [e].pack 'U*' }
37
- input.join.clean.should == " \n \n "
38
- end
39
- describe "with invalid UTF-8 sequence" do
40
- before :all do
41
- @input = "\210\004"
42
- @output = @input.clean
43
- end
44
- it "should output a valid UTF-8 string" do
45
- @output.encoding.name.should == "UTF-8"
46
- @output.should be_valid_encoding
47
- end
48
- it "should replace invisible chars by space" do
49
- @output.should == " "
50
- end
51
- end
52
- describe "with mixed valid and invalid characters" do
53
- before :all do
54
- @input = "a?^?\xddf"
55
- @output = @input.clean
56
- end
57
- it "should output a valid UTF-8 string" do
58
- @output.encoding.name.should == "UTF-8"
59
- @output.should be_valid_encoding
60
- end
61
- it "should keep the valid characters" do
62
- @output.should == "a?^?Ýf"
63
- end
64
- end
65
- describe "with already valid characters" do
66
- before :all do
67
- @input = "\n\t\r\r\n\v\n"
68
- @output = @input.clean
69
- end
70
- it "should output a valid UTF-8 string" do
71
- @output.encoding.name.should == "UTF-8"
72
- @output.should be_valid_encoding
73
- end
74
- it "should replace invisible chars by space" do
75
- @output.should == "\n \n\n \n"
76
- end
77
- end
78
- describe "with watermarked text" do
79
- before :all do
80
- @input = "Here is \357\273\277a block \357\273\277of text \357\273\277inside of which a number will be hidden!"
81
- @output = @input.clean
82
- end
83
- it "should output a valid UTF-8 string" do
84
- @output.encoding.name.should == "UTF-8"
85
- @output.should be_valid_encoding
86
- end
87
- it "should replace invisible chars by space" do
88
- @output.should == "Here is a block of text inside of which a number will be hidden!"
89
- end
90
- end
91
- describe "with unusual valid spaces" do
92
- before :all do
93
- @input = []
94
- @input << "\u0020" # SPACE
95
- @input << "\u00A0" # NO-BREAK SPACE
96
- @input << "\u2000" # EN QUAD
97
- @input << "\u2001" # EM QUAD
98
- @input << "\u2002" # EN SPACE
99
- @input << "\u2003" # EM SPACE
100
- @input << "\u2004" # THREE-PER-EM SPACE
101
- @input << "\u2005" # FOUR-PER-EM SPACE
102
- @input << "\u2006" # SIX-PER-EM SPACE
103
- @input << "\u2007" # FIGURE SPACE
104
- @input << "\u2008" # PUNCTUATION SPACE
105
- @input << "\u2009" # THIN SPACE
106
- @input << "\u200A" # HAIR SPACE
107
- @input << "\u200B" # ZERO WIDTH SPACE
108
- @input << "\u202F" # NARROW NO-BREAK SPACE
109
- @input << "\u205F" # MEDIUM MATHEMATICAL SPACE
110
- @input << "\u3000" # IDEOGRAPHIC SPACE
111
- @input << "\uFEFF" # ZERO WIDTH NO-BREAK SPACE
20
+ expect(@output).to eq " \n \n !\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ € ¡¢£€¥¦§¨©ª«¬ ®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
21
+ end
22
+ end
23
+ describe "with various type of spaces" do
24
+ before do
25
+ @input = [
26
+ (0x0009..0x000D).to_a, # <control-0009>..<control-000D>
27
+ 0x0020, # SPACE
28
+ 0x0085, # <control-0085>
29
+ 0x00A0, # NO-BREAK SPACE
30
+ 0x1680, # OGHAM SPACE MARK
31
+ 0x180E, # MONGOLIAN VOWEL SEPARATOR
32
+ (0x2000..0x200A).to_a, # EN QUAD..HAIR SPACE
33
+ 0x2028, # LINE SEPARATOR
34
+ 0x2029, # PARAGRAPH SEPARATOR
35
+ 0x202F, # NARROW NO-BREAK SPACE
36
+ 0x205F, # MEDIUM MATHEMATICAL SPACE
37
+ 0x3000, # IDEOGRAPHIC SPACE
38
+ ].flatten.collect{ |e| [e].pack 'U*' }
112
39
  @output = @input.join.clean
113
40
  end
114
- it "should output a valid UTF-8 string" do
115
- @output.encoding.name.should == "UTF-8"
116
- @output.should be_valid_encoding
117
- end
118
- it "should replace invisible chars by space" do
119
- @output.should == " "*@input.size
120
- end
121
- end
122
- describe "with euro sign from both ISO 8859-15 or Windows-1252" do
123
- before :all do
124
- @input = "\x80\xA4"
125
- @output = @input.clean
126
- end
127
- it "should output a valid UTF-8 string" do
128
- @output.encoding.name.should == "UTF-8"
129
- @output.should be_valid_encoding
130
- end
131
- it "should replace invisible chars by space" do
132
- @output.should == "€€"
41
+ if RUBY_VERSION.to_f>1.9
42
+ it "should output a valid UTF-8 string" do
43
+ expect(@output.encoding.name).to eq "UTF-8"
44
+ expect(@output).to be_valid_encoding
45
+ end
46
+ end
47
+ it "should replace all spaces to normal spaces" do
48
+ expect(@output.clean).to eq " \n \n "
49
+ end
50
+ end
51
+ describe "with various no-width characters" do
52
+ before do
53
+ @input = [
54
+ 0x200B, # ZERO WIDTH SPACE
55
+ 0x200C, # ZERO WIDTH NON-JOINER
56
+ 0x200D, # ZERO WIDTH JOINER
57
+ 0x2060, # WORD JOINER
58
+ 0xFEFF, # ZERO WIDTH NO-BREAK SPACE
59
+ ].flatten.collect{ |e| [e].pack 'U*' }
60
+ @output = @input.join.clean
133
61
  end
134
- end
135
- else
136
- # specs for Ruby 1.8.6
137
- describe "with all 8-bit characters" do
138
- before :all do
139
- @input = ""
140
- (0..255).each{|i| @input << i.chr}
141
- @output = @input.clean
62
+ if RUBY_VERSION.to_f>1.9
63
+ it "should output a valid UTF-8 string" do
64
+ expect(@output.encoding.name).to eq "UTF-8"
65
+ expect(@output).to be_valid_encoding
66
+ end
142
67
  end
143
- it "should wipe out the control characters" do
144
- @output.should == " \n \n !\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ € ¡¢£€¥Š§š©ª«¬ ®¯°±²³Žµ¶·ž¹º»ŒœŸ¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
68
+ it "should remove no-width characters" do
69
+ expect(@output).to eq ""
145
70
  end
146
71
  end
147
72
  describe "with invalid UTF-8 sequence" do
@@ -149,8 +74,14 @@ describe String::Cleaner do
149
74
  @input = "\210\004"
150
75
  @output = @input.clean
151
76
  end
77
+ if RUBY_VERSION.to_f>1.9
78
+ it "should output a valid UTF-8 string" do
79
+ expect(@output.encoding.name).to eq "UTF-8"
80
+ expect(@output).to be_valid_encoding
81
+ end
82
+ end
152
83
  it "should replace invisible chars by space" do
153
- @output.should == " "
84
+ expect(@output).to eq " "
154
85
  end
155
86
  end
156
87
  describe "with mixed valid and invalid characters" do
@@ -158,8 +89,14 @@ describe String::Cleaner do
158
89
  @input = "a?^?\xddf"
159
90
  @output = @input.clean
160
91
  end
92
+ if RUBY_VERSION.to_f>1.9
93
+ it "should output a valid UTF-8 string" do
94
+ expect(@output.encoding.name).to eq "UTF-8"
95
+ expect(@output).to be_valid_encoding
96
+ end
97
+ end
161
98
  it "should keep the valid characters" do
162
- @output.should == "a?^?Ýf"
99
+ expect(@output).to eq "a?^?Ýf"
163
100
  end
164
101
  end
165
102
  describe "with already valid characters" do
@@ -167,8 +104,14 @@ describe String::Cleaner do
167
104
  @input = "\n\t\r\r\n\v\n"
168
105
  @output = @input.clean
169
106
  end
107
+ if RUBY_VERSION.to_f>1.9
108
+ it "should output a valid UTF-8 string" do
109
+ expect(@output.encoding.name).to eq "UTF-8"
110
+ expect(@output).to be_valid_encoding
111
+ end
112
+ end
170
113
  it "should replace invisible chars by space" do
171
- @output.should == "\n \n\n \n"
114
+ expect(@output).to eq "\n \n\n \n"
172
115
  end
173
116
  end
174
117
  describe "with watermarked text" do
@@ -176,36 +119,14 @@ describe String::Cleaner do
176
119
  @input = "Here is \357\273\277a block \357\273\277of text \357\273\277inside of which a number will be hidden!"
177
120
  @output = @input.clean
178
121
  end
179
- it "should replace invisible chars by space" do
180
- @output.should == "Here is a block of text inside of which a number will be hidden!"
181
- end
182
- end
183
- describe "with unusual valid spaces" do
184
- before :all do
185
- @input = []
186
- # "\uXXXX" doesn't exists yet on Ruby 1.8.6
187
- @input << " " # SPACE
188
- @input << "\xC2\xA0" # NO-BREAK SPACE
189
- @input << "\xE2\x80\x80" # EN QUAD
190
- @input << "\xE2\x80\x81" # EM QUAD
191
- @input << "\xE2\x80\x82" # EN SPACE
192
- @input << "\xE2\x80\x83" # EM SPACE
193
- @input << "\xE2\x80\x84" # THREE-PER-EM SPACE
194
- @input << "\xE2\x80\x85" # FOUR-PER-EM SPACE
195
- @input << "\xE2\x80\x86" # SIX-PER-EM SPACE
196
- @input << "\xE2\x80\x87" # FIGURE SPACE
197
- @input << "\xE2\x80\x88" # PUNCTUATION SPACE
198
- @input << "\xE2\x80\x89" # THIN SPACE
199
- @input << "\xE2\x80\x8A" # HAIR SPACE
200
- @input << "\xE2\x80\x8B" # ZERO WIDTH SPACE
201
- @input << "\xE2\x80\xAF" # NARROW NO-BREAK SPACE
202
- @input << "\xE2\x81\x9F" # MEDIUM MATHEMATICAL SPACE
203
- @input << "\xE3\x80\x80" # IDEOGRAPHIC SPACE
204
- @input << "\xEF\xBB\xBF" # ZERO WIDTH NO-BREAK SPACE
205
- @output = @input.join.clean
122
+ if RUBY_VERSION.to_f>1.9
123
+ it "should output a valid UTF-8 string" do
124
+ expect(@output.encoding.name).to eq "UTF-8"
125
+ expect(@output).to be_valid_encoding
126
+ end
206
127
  end
207
128
  it "should replace invisible chars by space" do
208
- @output.should == " "*@input.size
129
+ expect(@output).to eq "Here is a block of text inside of which a number will be hidden!"
209
130
  end
210
131
  end
211
132
  describe "with euro sign from both ISO 8859-15 or Windows-1252" do
@@ -213,59 +134,65 @@ describe String::Cleaner do
213
134
  @input = "\x80\xA4"
214
135
  @output = @input.clean
215
136
  end
137
+ if RUBY_VERSION.to_f>1.9
138
+ it "should output a valid UTF-8 string" do
139
+ expect(@output.encoding.name).to eq "UTF-8"
140
+ expect(@output).to be_valid_encoding
141
+ end
142
+ end
216
143
  it "should replace invisible chars by space" do
217
- @output.should == "€€"
144
+ expect(@output).to eq "€€"
218
145
  end
219
146
  end
220
147
  end
221
148
  describe "#trim(chars = \"\")" do
222
149
  it "should use #strip when used without params" do
223
- string, expected = "", mock
224
- string.stub(:strip).and_return expected
225
- string.trim.should be expected
150
+ string, expected = "", double
151
+ expect(string).to receive(:strip).and_return expected
152
+ expect(string.trim).to be expected
226
153
  end
227
154
  it "should remove multiple characters at once from beginning and end" do
228
155
  prefix, suffix = " rhuif dww f f", "dqz qafdédsj iowe fcms. qpo asttt t dtt"
229
156
  to_remove = "acdeéfhijmopqrstuwz "
230
- "#{prefix}d#{suffix}".trim(to_remove).should eql "."
231
- "#{prefix}D#{suffix}".trim(to_remove).should eql "Ddqz qafdédsj iowe fcms."
157
+ expect("#{prefix}d#{suffix}".trim(to_remove)).to eq "."
158
+ expect("#{prefix}D#{suffix}".trim(to_remove)).to eq "Ddqz qafdédsj iowe fcms."
232
159
  end
233
160
  end
234
161
  describe "#fix_endlines" do
235
162
  it "should convert windows endlines" do
236
- "this is a\r\ntest\r\n".fix_endlines.should eql "this is a\ntest\n"
163
+ expect("this is a\r\ntest\r\n".fix_endlines).to eql "this is a\ntest\n"
237
164
  end
238
165
  it "should convert old mac endlines" do
239
- "this is a\rtest\r".fix_endlines.should eql "this is a\ntest\n"
166
+ expect("this is a\rtest\r".fix_endlines).to eql "this is a\ntest\n"
240
167
  end
241
168
  it "should not modify proper linux endlines" do
242
- "this is a\ntest\n".fix_endlines.should eql "this is a\ntest\n"
169
+ expect("this is a\ntest\n".fix_endlines).to eql "this is a\ntest\n"
243
170
  end
244
171
  it "should convert mixed endlines" do
245
- "this is a\n\rtest\r\n".fix_endlines.should eql "this is a\n\ntest\n"
172
+ expect("this is a\n\rtest\r\n".fix_endlines).to eql "this is a\n\ntest\n"
246
173
  end
247
174
  end
248
175
  describe "#to_permalink(separator=\"-\")" do
249
176
  it "should create nice permalink for string with many accents" do
250
177
  crazy = " ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüý - Hello world, I'm a crazy string!! "
251
- crazy.to_permalink.should == "aaaaaaceeeeiiiidnoooooxouuuuyaaaaaaceeeeiiiinoooooouuuuy-hello-world-i-m-a-crazy-string"
178
+ expect(crazy.to_permalink).to eq "aaaaaaceeeeiiiidnoooooxouuuuyaaaaaaceeeeiiiinoooooouuuuy-hello-world-i-m-a-crazy-string"
252
179
  end
253
180
  it "should create nice permalink even for evil string" do
254
181
  evil = (128..255).inject(""){ |acc, b| acc += ("%c" % b) }
255
- evil.to_permalink.should == "euros-cents-pounds-euros-yens-section-copyright-registered-trademark-degrees-approx-23-micro-paragraph-10-1-4-1-2-3-4-aaaaaaaeceeeeiiiidnoooooxouuuuythssaaaaaaaeceeeeiiiidnooooo-ouuuuythy"
182
+ expect(evil.to_permalink).to eq "euros-cents-pounds-euros-yens-section-copyright-registered-trademark-degrees-approx-23-micro-paragraph-10-1-4-1-2-3-4-aaaaaaaeceeeeiiiidnoooooxouuuuythssaaaaaaaeceeeeiiiidnooooo-ouuuuythy"
256
183
  end
257
184
  it "should remove endlines too" do
258
- "this\nis\ta\ntest".to_permalink("_").should eql "this_is_a_test"
185
+ expect("this\nis\ta\ntest".to_permalink("_")).to eq "this_is_a_test"
259
186
  end
260
187
  end
261
188
  describe "#nl2br" do
262
189
  it "should convert \n to <br/>\n" do
263
- "this\nis\ta\ntest\r".nl2br.should eql "this<br/>\nis\ta<br/>\ntest\r"
190
+ expect("this\nis\ta\ntest\r".nl2br).to eq "this<br/>\nis\ta<br/>\ntest\r"
264
191
  end
265
192
  end
266
193
  describe "#to_nicer_sym" do
267
194
  it "should convert \"Select or Other\" to :select_or_other" do
268
- "Select or Other".to_nicer_sym.should be :select_or_other
195
+ expect("Select or Other".to_nicer_sym).to be :select_or_other
269
196
  end
270
197
  end
271
- end
198
+ end
@@ -2,12 +2,12 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{string_cleaner}
5
- s.version = "0.2.0"
6
-
7
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
5
+ s.version = "1.0.0"
8
6
  s.authors = ["Joseph Halter"]
9
7
  s.date = %q{2010-10-18}
10
8
  s.email = %q{joseph@openhood.com}
9
+ s.required_ruby_version = ">= 2.6"
10
+ s.license = "MIT"
11
11
  s.extra_rdoc_files = [
12
12
  "LICENSE",
13
13
  "README.rdoc"
@@ -22,26 +22,14 @@ Gem::Specification.new do |s|
22
22
  "spec/string_cleaner_spec.rb",
23
23
  "string_cleaner.gemspec"
24
24
  ]
25
- s.has_rdoc = true
26
25
  s.homepage = %q{http://github.com/JosephHalter/string_cleaner}
27
- s.rdoc_options = ["--charset=UTF-8"]
28
26
  s.require_paths = ["lib"]
29
- s.rubygems_version = %q{1.3.1}
30
- s.summary = %q{Fix invalid UTF-8 and wipe invisible chars, fully compatible with Ruby 1.8 & 1.9 with extensive specs}
27
+ s.summary = %q{Fix invalid UTF-8 and wipe invisible chars, compatible with Ruby 2.6+ with extensive specs}
31
28
  s.test_files = [
32
29
  "spec/spec_helper.rb",
33
30
  "spec/string_cleaner_spec.rb"
34
31
  ]
35
- s.add_runtime_dependency "unidecoder"
32
+ s.add_runtime_dependency "talentbox-unidecoder", "2.0.0"
33
+ s.add_development_dependency "rake"
36
34
  s.add_development_dependency "rspec"
37
-
38
- if s.respond_to? :specification_version then
39
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
40
- s.specification_version = 2
41
-
42
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
43
- else
44
- end
45
- else
46
- end
47
35
  end
metadata CHANGED
@@ -1,59 +1,66 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: string_cleaner
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 2
8
- - 0
9
- version: 0.2.0
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
10
5
  platform: ruby
11
- authors:
6
+ authors:
12
7
  - Joseph Halter
13
8
  autorequire:
14
9
  bindir: bin
15
10
  cert_chain: []
16
-
17
- date: 2010-10-18 00:00:00 +02:00
18
- default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
21
- name: unidecoder
11
+ date: 2010-10-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: talentbox-unidecoder
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 2.0.0
20
+ type: :runtime
22
21
  prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 2.0.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
26
31
  - - ">="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 0
30
- version: "0"
31
- type: :runtime
32
- version_requirements: *id001
33
- - !ruby/object:Gem::Dependency
34
- name: rspec
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
35
  prerelease: false
36
- requirement: &id002 !ruby/object:Gem::Requirement
37
- none: false
38
- requirements:
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
39
38
  - - ">="
40
- - !ruby/object:Gem::Version
41
- segments:
42
- - 0
43
- version: "0"
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
44
48
  type: :development
45
- version_requirements: *id002
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
46
55
  description:
47
56
  email: joseph@openhood.com
48
57
  executables: []
49
-
50
58
  extensions: []
51
-
52
- extra_rdoc_files:
59
+ extra_rdoc_files:
53
60
  - LICENSE
54
61
  - README.rdoc
55
- files:
56
- - .gitignore
62
+ files:
63
+ - ".gitignore"
57
64
  - LICENSE
58
65
  - README.rdoc
59
66
  - Rakefile
@@ -61,38 +68,30 @@ files:
61
68
  - spec/spec_helper.rb
62
69
  - spec/string_cleaner_spec.rb
63
70
  - string_cleaner.gemspec
64
- has_rdoc: true
65
71
  homepage: http://github.com/JosephHalter/string_cleaner
66
- licenses: []
67
-
72
+ licenses:
73
+ - MIT
74
+ metadata: {}
68
75
  post_install_message:
69
- rdoc_options:
70
- - --charset=UTF-8
71
- require_paths:
76
+ rdoc_options: []
77
+ require_paths:
72
78
  - lib
73
- required_ruby_version: !ruby/object:Gem::Requirement
74
- none: false
75
- requirements:
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ requirements:
76
81
  - - ">="
77
- - !ruby/object:Gem::Version
78
- segments:
79
- - 0
80
- version: "0"
81
- required_rubygems_version: !ruby/object:Gem::Requirement
82
- none: false
83
- requirements:
82
+ - !ruby/object:Gem::Version
83
+ version: '2.6'
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ requirements:
84
86
  - - ">="
85
- - !ruby/object:Gem::Version
86
- segments:
87
- - 0
88
- version: "0"
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
89
  requirements: []
90
-
91
- rubyforge_project:
92
- rubygems_version: 1.3.7
90
+ rubygems_version: 3.1.6
93
91
  signing_key:
94
- specification_version: 2
95
- summary: Fix invalid UTF-8 and wipe invisible chars, fully compatible with Ruby 1.8 & 1.9 with extensive specs
96
- test_files:
92
+ specification_version: 4
93
+ summary: Fix invalid UTF-8 and wipe invisible chars, compatible with Ruby 2.6+ with
94
+ extensive specs
95
+ test_files:
97
96
  - spec/spec_helper.rb
98
97
  - spec/string_cleaner_spec.rb