string_cleaner 0.2.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 8451c9bc58c373d7b4a768cb3ed1ff355e5116f24b02c6a5bfeb69aa0549c589
4
+ data.tar.gz: b226d948238bd18cdacb01402c0576203da724b895c6745d3b713fae5d21f9ee
5
+ SHA512:
6
+ metadata.gz: 3a83693387d088db6f0b89ea73ecffbdf0b034885a0b2418d222c1197c48794b05cc8fb28f477bcfc4d345ee561d0ea3e1a543c34309ddb77ecc79a6adffe4fb
7
+ data.tar.gz: 3a35bab3513612149b345bf96d536d741ed3d4dfa757a353d3bc955782173139e8dac4c92b0bef025534706ca13e0ba17b14a30da0b10029d2653eeded5224f2
data/.gitignore CHANGED
@@ -3,4 +3,5 @@
3
3
  coverage
4
4
  rdoc
5
5
  pkg
6
- *.gem
6
+ *.gem
7
+ Gemfile.lock
data/README.rdoc CHANGED
@@ -6,21 +6,15 @@ Just add a method .clean to String which does:
6
6
  * replace \r\n and \r with \n normalizing end of lines
7
7
  * replace control characters and other invisible chars by spaces
8
8
 
9
- == Install
10
-
11
- sudo gem install JosephHalter-string_cleaner
12
-
13
- == Ruby 1.9+
9
+ Supports only Ruby 2.6+
14
10
 
15
- Ruby 1.9+ has native support for unicode and specs are 100% passing.
16
-
17
- == Ruby 1.8.x
11
+ == Install
18
12
 
19
- Because Ruby 1.8.x has no native support for Unicode, you must install oniguruma and the jasherai-oniguruma gem.
13
+ sudo gem install string_cleaner
20
14
 
21
15
  == Example usage
22
16
 
23
- "\210\004".clean # => " "
17
+ "\210\004".clean # => " "
24
18
 
25
19
  == Copyright
26
20
 
data/Rakefile CHANGED
@@ -1,4 +1,11 @@
1
- require 'rubygems'
1
+ if RUBY_VERSION.to_f<1.9
2
+ require 'rake/tasklib'
3
+ require 'rake/rdoctask'
4
+ require 'rubygems'
5
+ else
6
+ require 'rdoc/task'
7
+ end
8
+
2
9
  begin
3
10
  require 'bundler/setup'
4
11
  rescue LoadError
@@ -17,7 +24,6 @@ end
17
24
 
18
25
  task :default => :spec
19
26
 
20
- require 'rake/rdoctask'
21
27
  Rake::RDocTask.new do |rdoc|
22
28
  rdoc.rdoc_dir = 'rdoc'
23
29
  rdoc.title = "string_cleaner"
@@ -9,14 +9,13 @@ module String::Cleaner
9
9
 
10
10
  def fix_encoding
11
11
  utf8 = dup
12
- if utf8.respond_to?(:force_encoding)
12
+ if utf8.respond_to?(:force_encoding)
13
13
  utf8.force_encoding("UTF-8") # for Ruby 1.9+
14
14
  unless utf8.valid_encoding? # if invalid UTF-8
15
- utf8 = utf8.force_encoding("ISO8859-15")
15
+ utf8 = utf8.force_encoding("ISO8859-1")
16
16
  utf8.encode!("UTF-8", :invalid => :replace, :undef => :replace, :replace => "")
17
- utf8.gsub!("\xC2\x80", "€") # special case for euro sign from Windows-1252
18
- utf8.force_encoding("UTF-8")
19
17
  end
18
+ utf8.gsub!(/\u0080|¤/, "€") # special case for euro sign from Windows-1252
20
19
  utf8
21
20
  else
22
21
  require "iconv"
@@ -25,7 +24,7 @@ module String::Cleaner
25
24
  Iconv.new("UTF-8", "UTF-8").iconv(utf8)
26
25
  rescue
27
26
  utf8.gsub!(/\x80/n, "\xA4")
28
- Iconv.new("UTF-8//IGNORE", "ISO8859-15").iconv(utf8)
27
+ Iconv.new("UTF-8//IGNORE", "ISO8859-1").iconv(utf8).gsub("¤", "€")
29
28
  end
30
29
  end
31
30
  end
@@ -35,31 +34,40 @@ module String::Cleaner
35
34
  end
36
35
 
37
36
  SPECIAL_SPACES = [
38
- 0x00A0, # White_Space # Zs NO-BREAK SPACE
39
- 0x1680, # White_Space # Zs OGHAM SPACE MARK
40
- 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR
41
- (0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE
42
- 0x2028, # White_Space # Zl LINE SEPARATOR
43
- 0x2029, # White_Space # Zp PARAGRAPH SEPARATOR
44
- 0x202F, # White_Space # Zs NARROW NO-BREAK SPACE
45
- 0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE
46
- 0x3000, # White_Space # Zs IDEOGRAPHIC SPACE
47
- ].flatten.collect{|e| [e].pack 'U*'}
37
+ 0x00A0, # NO-BREAK SPACE
38
+ 0x1680, # OGHAM SPACE MARK
39
+ 0x180E, # MONGOLIAN VOWEL SEPARATOR
40
+ (0x2000..0x200A).to_a, # EN QUAD..HAIR SPACE
41
+ 0x2028, # LINE SEPARATOR
42
+ 0x2029, # PARAGRAPH SEPARATOR
43
+ 0x202F, # NARROW NO-BREAK SPACE
44
+ 0x205F, # MEDIUM MATHEMATICAL SPACE
45
+ 0x3000, # IDEOGRAPHIC SPACE
46
+ ].flatten.collect{|e| [e].pack 'U*'}
47
+
48
+ ZERO_WIDTH = [
49
+ 0x200B, # ZERO WIDTH SPACE
50
+ 0x200C, # ZERO WIDTH NON-JOINER
51
+ 0x200D, # ZERO WIDTH JOINER
52
+ 0x2060, # WORD JOINER
53
+ 0xFEFF, # ZERO WIDTH NO-BREAK SPACE
54
+ ].flatten.collect{|e| [e].pack 'U*'}
48
55
 
49
56
  def fix_invisible_chars
50
57
  utf8 = self.dup
51
- if utf8.respond_to?(:force_encoding)
58
+ utf8.gsub!(Regexp.new(ZERO_WIDTH.join("|")), "")
59
+ utf8 = if utf8.respond_to?(:force_encoding)
52
60
  utf8 = (utf8 << " ").split(/\n/u).each{|line|
53
61
  line.gsub!(/[\s\p{C}]/u, " ")
54
62
  }.join("\n").chop!
55
- utf8.gsub!(Regexp.new(SPECIAL_SPACES.join("|")), " ")
56
- utf8.force_encoding("UTF-8")
57
63
  else
58
64
  require "oniguruma"
59
65
  utf8.split(/\n/n).collect{|line|
60
- Oniguruma::ORegexp.new("[\\s\\p{C}]", {:encoding => Oniguruma::ENCODING_UTF8}).gsub(line, " ")
66
+ Oniguruma::ORegexp.new("[\\p{C}]", {:encoding => Oniguruma::ENCODING_UTF8}).gsub(line, " ")
61
67
  }.join("\n").chop!
62
68
  end
69
+ utf8.gsub!(Regexp.new(SPECIAL_SPACES.join("|") + "|\s"), " ")
70
+ utf8
63
71
  end
64
72
 
65
73
  def trim(chars = "")
@@ -67,7 +75,7 @@ module String::Cleaner
67
75
  end
68
76
 
69
77
  def to_permalink(separator="-")
70
- fix_endlines.to_ascii(chartable).downcase.gsub(/[^a-z0-9]+/, separator).trim(separator)
78
+ clean.to_ascii(chartable).downcase.gsub(/[^a-z0-9]+/, separator).trim(separator)
71
79
  end
72
80
 
73
81
  def nl2br
@@ -1,147 +1,72 @@
1
- # encoding: UTF-8
2
- require File.dirname(__FILE__) + "/spec_helper"
1
+ require "spec_helper"
3
2
 
4
- describe String::Cleaner do
5
- if "".respond_to?(:force_encoding)
6
- # specs for Ruby 1.9+
3
+ RSpec.describe String::Cleaner do
4
+ describe "#clean" do
7
5
  describe "with all 8-bit characters" do
8
6
  before :all do
9
- @input = "".force_encoding("ISO8859-15")
7
+ @input = ""
8
+ @input.force_encoding("ISO8859-15") if @input.respond_to?(:force_encoding)
10
9
  (0..255).each{|i| @input << i.chr}
11
- @input.force_encoding("UTF-8")
10
+ @input.force_encoding("UTF-8") if @input.respond_to?(:force_encoding)
12
11
  @output = @input.clean
13
12
  end
14
- it "should output a valid UTF-8 string" do
15
- @output.encoding.name.should == "UTF-8"
16
- @output.should be_valid_encoding
13
+ if RUBY_VERSION.to_f>1.9
14
+ it "should output a valid UTF-8 string" do
15
+ expect(@output.encoding.name).to eq "UTF-8"
16
+ expect(@output).to be_valid_encoding
17
+ end
17
18
  end
18
19
  it "should wipe out the control characters" do
19
- @output.should == " \n \n !\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ € ¡¢£€¥Š§š©ª«¬ ®¯°±²³Žµ¶·ž¹º»ŒœŸ¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
20
- end
21
- end
22
- it "should convert all type of spaces to normal spaces" do
23
- input = [
24
- (0x0009..0x000D).to_a, # White_Space # Cc [5] <control-0009>..<control-000D>
25
- 0x0020, # White_Space # Zs SPACE
26
- 0x0085, # White_Space # Cc <control-0085>
27
- 0x00A0, # White_Space # Zs NO-BREAK SPACE
28
- 0x1680, # White_Space # Zs OGHAM SPACE MARK
29
- 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR
30
- (0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE
31
- 0x2028, # White_Space # Zl LINE SEPARATOR
32
- 0x2029, # White_Space # Zp PARAGRAPH SEPARATOR
33
- 0x202F, # White_Space # Zs NARROW NO-BREAK SPACE
34
- 0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE
35
- 0x3000, # White_Space # Zs IDEOGRAPHIC SPACE
36
- ].flatten.collect{ |e| [e].pack 'U*' }
37
- input.join.clean.should == " \n \n "
38
- end
39
- describe "with invalid UTF-8 sequence" do
40
- before :all do
41
- @input = "\210\004"
42
- @output = @input.clean
43
- end
44
- it "should output a valid UTF-8 string" do
45
- @output.encoding.name.should == "UTF-8"
46
- @output.should be_valid_encoding
47
- end
48
- it "should replace invisible chars by space" do
49
- @output.should == " "
50
- end
51
- end
52
- describe "with mixed valid and invalid characters" do
53
- before :all do
54
- @input = "a?^?\xddf"
55
- @output = @input.clean
56
- end
57
- it "should output a valid UTF-8 string" do
58
- @output.encoding.name.should == "UTF-8"
59
- @output.should be_valid_encoding
60
- end
61
- it "should keep the valid characters" do
62
- @output.should == "a?^?Ýf"
63
- end
64
- end
65
- describe "with already valid characters" do
66
- before :all do
67
- @input = "\n\t\r\r\n\v\n"
68
- @output = @input.clean
69
- end
70
- it "should output a valid UTF-8 string" do
71
- @output.encoding.name.should == "UTF-8"
72
- @output.should be_valid_encoding
73
- end
74
- it "should replace invisible chars by space" do
75
- @output.should == "\n \n\n \n"
76
- end
77
- end
78
- describe "with watermarked text" do
79
- before :all do
80
- @input = "Here is \357\273\277a block \357\273\277of text \357\273\277inside of which a number will be hidden!"
81
- @output = @input.clean
82
- end
83
- it "should output a valid UTF-8 string" do
84
- @output.encoding.name.should == "UTF-8"
85
- @output.should be_valid_encoding
86
- end
87
- it "should replace invisible chars by space" do
88
- @output.should == "Here is a block of text inside of which a number will be hidden!"
89
- end
90
- end
91
- describe "with unusual valid spaces" do
92
- before :all do
93
- @input = []
94
- @input << "\u0020" # SPACE
95
- @input << "\u00A0" # NO-BREAK SPACE
96
- @input << "\u2000" # EN QUAD
97
- @input << "\u2001" # EM QUAD
98
- @input << "\u2002" # EN SPACE
99
- @input << "\u2003" # EM SPACE
100
- @input << "\u2004" # THREE-PER-EM SPACE
101
- @input << "\u2005" # FOUR-PER-EM SPACE
102
- @input << "\u2006" # SIX-PER-EM SPACE
103
- @input << "\u2007" # FIGURE SPACE
104
- @input << "\u2008" # PUNCTUATION SPACE
105
- @input << "\u2009" # THIN SPACE
106
- @input << "\u200A" # HAIR SPACE
107
- @input << "\u200B" # ZERO WIDTH SPACE
108
- @input << "\u202F" # NARROW NO-BREAK SPACE
109
- @input << "\u205F" # MEDIUM MATHEMATICAL SPACE
110
- @input << "\u3000" # IDEOGRAPHIC SPACE
111
- @input << "\uFEFF" # ZERO WIDTH NO-BREAK SPACE
20
+ expect(@output).to eq " \n \n !\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ € ¡¢£€¥¦§¨©ª«¬ ®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
21
+ end
22
+ end
23
+ describe "with various type of spaces" do
24
+ before do
25
+ @input = [
26
+ (0x0009..0x000D).to_a, # <control-0009>..<control-000D>
27
+ 0x0020, # SPACE
28
+ 0x0085, # <control-0085>
29
+ 0x00A0, # NO-BREAK SPACE
30
+ 0x1680, # OGHAM SPACE MARK
31
+ 0x180E, # MONGOLIAN VOWEL SEPARATOR
32
+ (0x2000..0x200A).to_a, # EN QUAD..HAIR SPACE
33
+ 0x2028, # LINE SEPARATOR
34
+ 0x2029, # PARAGRAPH SEPARATOR
35
+ 0x202F, # NARROW NO-BREAK SPACE
36
+ 0x205F, # MEDIUM MATHEMATICAL SPACE
37
+ 0x3000, # IDEOGRAPHIC SPACE
38
+ ].flatten.collect{ |e| [e].pack 'U*' }
112
39
  @output = @input.join.clean
113
40
  end
114
- it "should output a valid UTF-8 string" do
115
- @output.encoding.name.should == "UTF-8"
116
- @output.should be_valid_encoding
117
- end
118
- it "should replace invisible chars by space" do
119
- @output.should == " "*@input.size
120
- end
121
- end
122
- describe "with euro sign from both ISO 8859-15 or Windows-1252" do
123
- before :all do
124
- @input = "\x80\xA4"
125
- @output = @input.clean
126
- end
127
- it "should output a valid UTF-8 string" do
128
- @output.encoding.name.should == "UTF-8"
129
- @output.should be_valid_encoding
130
- end
131
- it "should replace invisible chars by space" do
132
- @output.should == "€€"
41
+ if RUBY_VERSION.to_f>1.9
42
+ it "should output a valid UTF-8 string" do
43
+ expect(@output.encoding.name).to eq "UTF-8"
44
+ expect(@output).to be_valid_encoding
45
+ end
46
+ end
47
+ it "should replace all spaces to normal spaces" do
48
+ expect(@output.clean).to eq " \n \n "
49
+ end
50
+ end
51
+ describe "with various no-width characters" do
52
+ before do
53
+ @input = [
54
+ 0x200B, # ZERO WIDTH SPACE
55
+ 0x200C, # ZERO WIDTH NON-JOINER
56
+ 0x200D, # ZERO WIDTH JOINER
57
+ 0x2060, # WORD JOINER
58
+ 0xFEFF, # ZERO WIDTH NO-BREAK SPACE
59
+ ].flatten.collect{ |e| [e].pack 'U*' }
60
+ @output = @input.join.clean
133
61
  end
134
- end
135
- else
136
- # specs for Ruby 1.8.6
137
- describe "with all 8-bit characters" do
138
- before :all do
139
- @input = ""
140
- (0..255).each{|i| @input << i.chr}
141
- @output = @input.clean
62
+ if RUBY_VERSION.to_f>1.9
63
+ it "should output a valid UTF-8 string" do
64
+ expect(@output.encoding.name).to eq "UTF-8"
65
+ expect(@output).to be_valid_encoding
66
+ end
142
67
  end
143
- it "should wipe out the control characters" do
144
- @output.should == " \n \n !\"\#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ € ¡¢£€¥Š§š©ª«¬ ®¯°±²³Žµ¶·ž¹º»ŒœŸ¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
68
+ it "should remove no-width characters" do
69
+ expect(@output).to eq ""
145
70
  end
146
71
  end
147
72
  describe "with invalid UTF-8 sequence" do
@@ -149,8 +74,14 @@ describe String::Cleaner do
149
74
  @input = "\210\004"
150
75
  @output = @input.clean
151
76
  end
77
+ if RUBY_VERSION.to_f>1.9
78
+ it "should output a valid UTF-8 string" do
79
+ expect(@output.encoding.name).to eq "UTF-8"
80
+ expect(@output).to be_valid_encoding
81
+ end
82
+ end
152
83
  it "should replace invisible chars by space" do
153
- @output.should == " "
84
+ expect(@output).to eq " "
154
85
  end
155
86
  end
156
87
  describe "with mixed valid and invalid characters" do
@@ -158,8 +89,14 @@ describe String::Cleaner do
158
89
  @input = "a?^?\xddf"
159
90
  @output = @input.clean
160
91
  end
92
+ if RUBY_VERSION.to_f>1.9
93
+ it "should output a valid UTF-8 string" do
94
+ expect(@output.encoding.name).to eq "UTF-8"
95
+ expect(@output).to be_valid_encoding
96
+ end
97
+ end
161
98
  it "should keep the valid characters" do
162
- @output.should == "a?^?Ýf"
99
+ expect(@output).to eq "a?^?Ýf"
163
100
  end
164
101
  end
165
102
  describe "with already valid characters" do
@@ -167,8 +104,14 @@ describe String::Cleaner do
167
104
  @input = "\n\t\r\r\n\v\n"
168
105
  @output = @input.clean
169
106
  end
107
+ if RUBY_VERSION.to_f>1.9
108
+ it "should output a valid UTF-8 string" do
109
+ expect(@output.encoding.name).to eq "UTF-8"
110
+ expect(@output).to be_valid_encoding
111
+ end
112
+ end
170
113
  it "should replace invisible chars by space" do
171
- @output.should == "\n \n\n \n"
114
+ expect(@output).to eq "\n \n\n \n"
172
115
  end
173
116
  end
174
117
  describe "with watermarked text" do
@@ -176,36 +119,14 @@ describe String::Cleaner do
176
119
  @input = "Here is \357\273\277a block \357\273\277of text \357\273\277inside of which a number will be hidden!"
177
120
  @output = @input.clean
178
121
  end
179
- it "should replace invisible chars by space" do
180
- @output.should == "Here is a block of text inside of which a number will be hidden!"
181
- end
182
- end
183
- describe "with unusual valid spaces" do
184
- before :all do
185
- @input = []
186
- # "\uXXXX" doesn't exists yet on Ruby 1.8.6
187
- @input << " " # SPACE
188
- @input << "\xC2\xA0" # NO-BREAK SPACE
189
- @input << "\xE2\x80\x80" # EN QUAD
190
- @input << "\xE2\x80\x81" # EM QUAD
191
- @input << "\xE2\x80\x82" # EN SPACE
192
- @input << "\xE2\x80\x83" # EM SPACE
193
- @input << "\xE2\x80\x84" # THREE-PER-EM SPACE
194
- @input << "\xE2\x80\x85" # FOUR-PER-EM SPACE
195
- @input << "\xE2\x80\x86" # SIX-PER-EM SPACE
196
- @input << "\xE2\x80\x87" # FIGURE SPACE
197
- @input << "\xE2\x80\x88" # PUNCTUATION SPACE
198
- @input << "\xE2\x80\x89" # THIN SPACE
199
- @input << "\xE2\x80\x8A" # HAIR SPACE
200
- @input << "\xE2\x80\x8B" # ZERO WIDTH SPACE
201
- @input << "\xE2\x80\xAF" # NARROW NO-BREAK SPACE
202
- @input << "\xE2\x81\x9F" # MEDIUM MATHEMATICAL SPACE
203
- @input << "\xE3\x80\x80" # IDEOGRAPHIC SPACE
204
- @input << "\xEF\xBB\xBF" # ZERO WIDTH NO-BREAK SPACE
205
- @output = @input.join.clean
122
+ if RUBY_VERSION.to_f>1.9
123
+ it "should output a valid UTF-8 string" do
124
+ expect(@output.encoding.name).to eq "UTF-8"
125
+ expect(@output).to be_valid_encoding
126
+ end
206
127
  end
207
128
  it "should replace invisible chars by space" do
208
- @output.should == " "*@input.size
129
+ expect(@output).to eq "Here is a block of text inside of which a number will be hidden!"
209
130
  end
210
131
  end
211
132
  describe "with euro sign from both ISO 8859-15 or Windows-1252" do
@@ -213,59 +134,65 @@ describe String::Cleaner do
213
134
  @input = "\x80\xA4"
214
135
  @output = @input.clean
215
136
  end
137
+ if RUBY_VERSION.to_f>1.9
138
+ it "should output a valid UTF-8 string" do
139
+ expect(@output.encoding.name).to eq "UTF-8"
140
+ expect(@output).to be_valid_encoding
141
+ end
142
+ end
216
143
  it "should replace invisible chars by space" do
217
- @output.should == "€€"
144
+ expect(@output).to eq "€€"
218
145
  end
219
146
  end
220
147
  end
221
148
  describe "#trim(chars = \"\")" do
222
149
  it "should use #strip when used without params" do
223
- string, expected = "", mock
224
- string.stub(:strip).and_return expected
225
- string.trim.should be expected
150
+ string, expected = "", double
151
+ expect(string).to receive(:strip).and_return expected
152
+ expect(string.trim).to be expected
226
153
  end
227
154
  it "should remove multiple characters at once from beginning and end" do
228
155
  prefix, suffix = " rhuif dww f f", "dqz qafdédsj iowe fcms. qpo asttt t dtt"
229
156
  to_remove = "acdeéfhijmopqrstuwz "
230
- "#{prefix}d#{suffix}".trim(to_remove).should eql "."
231
- "#{prefix}D#{suffix}".trim(to_remove).should eql "Ddqz qafdédsj iowe fcms."
157
+ expect("#{prefix}d#{suffix}".trim(to_remove)).to eq "."
158
+ expect("#{prefix}D#{suffix}".trim(to_remove)).to eq "Ddqz qafdédsj iowe fcms."
232
159
  end
233
160
  end
234
161
  describe "#fix_endlines" do
235
162
  it "should convert windows endlines" do
236
- "this is a\r\ntest\r\n".fix_endlines.should eql "this is a\ntest\n"
163
+ expect("this is a\r\ntest\r\n".fix_endlines).to eql "this is a\ntest\n"
237
164
  end
238
165
  it "should convert old mac endlines" do
239
- "this is a\rtest\r".fix_endlines.should eql "this is a\ntest\n"
166
+ expect("this is a\rtest\r".fix_endlines).to eql "this is a\ntest\n"
240
167
  end
241
168
  it "should not modify proper linux endlines" do
242
- "this is a\ntest\n".fix_endlines.should eql "this is a\ntest\n"
169
+ expect("this is a\ntest\n".fix_endlines).to eql "this is a\ntest\n"
243
170
  end
244
171
  it "should convert mixed endlines" do
245
- "this is a\n\rtest\r\n".fix_endlines.should eql "this is a\n\ntest\n"
172
+ expect("this is a\n\rtest\r\n".fix_endlines).to eql "this is a\n\ntest\n"
246
173
  end
247
174
  end
248
175
  describe "#to_permalink(separator=\"-\")" do
249
176
  it "should create nice permalink for string with many accents" do
250
177
  crazy = " ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüý - Hello world, I'm a crazy string!! "
251
- crazy.to_permalink.should == "aaaaaaceeeeiiiidnoooooxouuuuyaaaaaaceeeeiiiinoooooouuuuy-hello-world-i-m-a-crazy-string"
178
+ expect(crazy.to_permalink).to eq "aaaaaaceeeeiiiidnoooooxouuuuyaaaaaaceeeeiiiinoooooouuuuy-hello-world-i-m-a-crazy-string"
252
179
  end
253
180
  it "should create nice permalink even for evil string" do
254
181
  evil = (128..255).inject(""){ |acc, b| acc += ("%c" % b) }
255
- evil.to_permalink.should == "euros-cents-pounds-euros-yens-section-copyright-registered-trademark-degrees-approx-23-micro-paragraph-10-1-4-1-2-3-4-aaaaaaaeceeeeiiiidnoooooxouuuuythssaaaaaaaeceeeeiiiidnooooo-ouuuuythy"
182
+ expect(evil.to_permalink).to eq "euros-cents-pounds-euros-yens-section-copyright-registered-trademark-degrees-approx-23-micro-paragraph-10-1-4-1-2-3-4-aaaaaaaeceeeeiiiidnoooooxouuuuythssaaaaaaaeceeeeiiiidnooooo-ouuuuythy"
256
183
  end
257
184
  it "should remove endlines too" do
258
- "this\nis\ta\ntest".to_permalink("_").should eql "this_is_a_test"
185
+ expect("this\nis\ta\ntest".to_permalink("_")).to eq "this_is_a_test"
259
186
  end
260
187
  end
261
188
  describe "#nl2br" do
262
189
  it "should convert \n to <br/>\n" do
263
- "this\nis\ta\ntest\r".nl2br.should eql "this<br/>\nis\ta<br/>\ntest\r"
190
+ expect("this\nis\ta\ntest\r".nl2br).to eq "this<br/>\nis\ta<br/>\ntest\r"
264
191
  end
265
192
  end
266
193
  describe "#to_nicer_sym" do
267
194
  it "should convert \"Select or Other\" to :select_or_other" do
268
- "Select or Other".to_nicer_sym.should be :select_or_other
195
+ expect("Select or Other".to_nicer_sym).to be :select_or_other
269
196
  end
270
197
  end
271
- end
198
+ end
@@ -2,12 +2,12 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{string_cleaner}
5
- s.version = "0.2.0"
6
-
7
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
5
+ s.version = "1.0.0"
8
6
  s.authors = ["Joseph Halter"]
9
7
  s.date = %q{2010-10-18}
10
8
  s.email = %q{joseph@openhood.com}
9
+ s.required_ruby_version = ">= 2.6"
10
+ s.license = "MIT"
11
11
  s.extra_rdoc_files = [
12
12
  "LICENSE",
13
13
  "README.rdoc"
@@ -22,26 +22,14 @@ Gem::Specification.new do |s|
22
22
  "spec/string_cleaner_spec.rb",
23
23
  "string_cleaner.gemspec"
24
24
  ]
25
- s.has_rdoc = true
26
25
  s.homepage = %q{http://github.com/JosephHalter/string_cleaner}
27
- s.rdoc_options = ["--charset=UTF-8"]
28
26
  s.require_paths = ["lib"]
29
- s.rubygems_version = %q{1.3.1}
30
- s.summary = %q{Fix invalid UTF-8 and wipe invisible chars, fully compatible with Ruby 1.8 & 1.9 with extensive specs}
27
+ s.summary = %q{Fix invalid UTF-8 and wipe invisible chars, compatible with Ruby 2.6+ with extensive specs}
31
28
  s.test_files = [
32
29
  "spec/spec_helper.rb",
33
30
  "spec/string_cleaner_spec.rb"
34
31
  ]
35
- s.add_runtime_dependency "unidecoder"
32
+ s.add_runtime_dependency "talentbox-unidecoder", "2.0.0"
33
+ s.add_development_dependency "rake"
36
34
  s.add_development_dependency "rspec"
37
-
38
- if s.respond_to? :specification_version then
39
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
40
- s.specification_version = 2
41
-
42
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
43
- else
44
- end
45
- else
46
- end
47
35
  end
metadata CHANGED
@@ -1,59 +1,66 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: string_cleaner
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 2
8
- - 0
9
- version: 0.2.0
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
10
5
  platform: ruby
11
- authors:
6
+ authors:
12
7
  - Joseph Halter
13
8
  autorequire:
14
9
  bindir: bin
15
10
  cert_chain: []
16
-
17
- date: 2010-10-18 00:00:00 +02:00
18
- default_executable:
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
21
- name: unidecoder
11
+ date: 2010-10-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: talentbox-unidecoder
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 2.0.0
20
+ type: :runtime
22
21
  prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 2.0.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
26
31
  - - ">="
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 0
30
- version: "0"
31
- type: :runtime
32
- version_requirements: *id001
33
- - !ruby/object:Gem::Dependency
34
- name: rspec
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
35
  prerelease: false
36
- requirement: &id002 !ruby/object:Gem::Requirement
37
- none: false
38
- requirements:
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
39
38
  - - ">="
40
- - !ruby/object:Gem::Version
41
- segments:
42
- - 0
43
- version: "0"
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
44
48
  type: :development
45
- version_requirements: *id002
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
46
55
  description:
47
56
  email: joseph@openhood.com
48
57
  executables: []
49
-
50
58
  extensions: []
51
-
52
- extra_rdoc_files:
59
+ extra_rdoc_files:
53
60
  - LICENSE
54
61
  - README.rdoc
55
- files:
56
- - .gitignore
62
+ files:
63
+ - ".gitignore"
57
64
  - LICENSE
58
65
  - README.rdoc
59
66
  - Rakefile
@@ -61,38 +68,30 @@ files:
61
68
  - spec/spec_helper.rb
62
69
  - spec/string_cleaner_spec.rb
63
70
  - string_cleaner.gemspec
64
- has_rdoc: true
65
71
  homepage: http://github.com/JosephHalter/string_cleaner
66
- licenses: []
67
-
72
+ licenses:
73
+ - MIT
74
+ metadata: {}
68
75
  post_install_message:
69
- rdoc_options:
70
- - --charset=UTF-8
71
- require_paths:
76
+ rdoc_options: []
77
+ require_paths:
72
78
  - lib
73
- required_ruby_version: !ruby/object:Gem::Requirement
74
- none: false
75
- requirements:
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ requirements:
76
81
  - - ">="
77
- - !ruby/object:Gem::Version
78
- segments:
79
- - 0
80
- version: "0"
81
- required_rubygems_version: !ruby/object:Gem::Requirement
82
- none: false
83
- requirements:
82
+ - !ruby/object:Gem::Version
83
+ version: '2.6'
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ requirements:
84
86
  - - ">="
85
- - !ruby/object:Gem::Version
86
- segments:
87
- - 0
88
- version: "0"
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
89
  requirements: []
90
-
91
- rubyforge_project:
92
- rubygems_version: 1.3.7
90
+ rubygems_version: 3.1.6
93
91
  signing_key:
94
- specification_version: 2
95
- summary: Fix invalid UTF-8 and wipe invisible chars, fully compatible with Ruby 1.8 & 1.9 with extensive specs
96
- test_files:
92
+ specification_version: 4
93
+ summary: Fix invalid UTF-8 and wipe invisible chars, compatible with Ruby 2.6+ with
94
+ extensive specs
95
+ test_files:
97
96
  - spec/spec_helper.rb
98
97
  - spec/string_cleaner_spec.rb