utf8 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/utf8.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'utf8/utf8'
2
+ require 'utf8/string'
3
+
4
+ # explicitly require this in your app if you want to use it
5
+ # require 'utf8/string_scanner'
@@ -0,0 +1,19 @@
1
+ class String
2
+ # Wraps your string in an UTF8-aware version of String
3
+ def as_utf8
4
+ String::UTF8.new(self)
5
+ end
6
+
7
+ class UTF8
8
+ VERSION = "0.1.0"
9
+
10
+ # Gives you access to the raw non-UTF8-aware version of the string
11
+ def as_raw
12
+ ::String.new(self)
13
+ end
14
+
15
+ alias :size :length
16
+ alias :chars :each_char
17
+ alias :slice :[]
18
+ end
19
+ end
@@ -0,0 +1,21 @@
1
+ require 'utf8'
2
+
3
+ class StringScanner
4
+ # Returns an UTF8-aware version of StringScanner wrapping your original string
5
+ #
6
+ # NOTE: this will lose all state associated with the current StringScanner instance
7
+ # (like the current scan position)
8
+ def as_utf8
9
+ StringScanner::UTF8.new(self.string.as_utf8)
10
+ end
11
+
12
+ class UTF8
13
+ # Returns a non-UTF8-aware version of StringScanner wrapping your original string
14
+ #
15
+ # NOTE: this will lose all state associated with the current StringScanner::UTF8 instance
16
+ # (like the current scan position)
17
+ def as_raw
18
+ StringScanner.new(self.string)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,5 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'utf8'
3
+ require 'utf8/string_scanner'
4
+
5
+ require 'rspec' unless defined? Rspec
@@ -0,0 +1,48 @@
1
+ # encoding: utf-8
2
+ require File.expand_path('../spec_helper', __FILE__)
3
+
4
+ describe StringScanner::UTF8 do
5
+ before(:all) do
6
+ @char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
7
+ @scanner = StringScanner.new(@char_array.join)
8
+ @utf8_scanner = @scanner.as_utf8
9
+ end
10
+
11
+ it "should extend StringScanner, adding an as_utf8 method that returns a StringScanner::UTF8 instance" do
12
+ @scanner.should respond_to(:as_utf8)
13
+ @scanner.as_utf8.class.should eql(StringScanner::UTF8)
14
+ end
15
+
16
+ it "should allow access to a regular (non-utf8-aware) StringScanner based on it's string" do
17
+ raw = @utf8_scanner.as_raw
18
+ raw.class.should eql(StringScanner)
19
+ raw.string.should eql(@utf8_scanner.string)
20
+ end
21
+
22
+ it "#getch should be utf8-aware" do
23
+ i=0
24
+ while char = @utf8_scanner.getch
25
+ char.should eql(@char_array[i])
26
+ i+=1
27
+ end
28
+ end
29
+
30
+ it "should be able to be reset" do
31
+ i=0
32
+ while char = @utf8_scanner.getch
33
+ char.should eql(@char_array[i])
34
+ if i == 4
35
+ break
36
+ end
37
+ i+=1
38
+ end
39
+
40
+ @utf8_scanner.reset
41
+
42
+ i=0
43
+ while char = @utf8_scanner.getch
44
+ char.should eql(@char_array[i])
45
+ i+=1
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,151 @@
1
+ # encoding: utf-8
2
+ require File.expand_path('../spec_helper', __FILE__)
3
+
4
+ describe String::UTF8 do
5
+ before(:all) do
6
+ @char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
7
+ @str = @char_array.join
8
+ @utf8 = @str.as_utf8
9
+ @utf8_len = @char_array.size
10
+ end
11
+
12
+ it "should extend String, adding an as_utf8 method that returns a String::UTF8 instance" do
13
+ "".should respond_to(:as_utf8)
14
+ "".as_utf8.class.should eql(String::UTF8)
15
+ end
16
+
17
+ it "should allow access to the underlying raw string" do
18
+ raw = @utf8.as_raw
19
+ raw.class.should eql(String)
20
+ if defined? Encoding
21
+ raw.length.should eql(@utf8_len)
22
+ else
23
+ raw.length.should eql(@str.size)
24
+ end
25
+ end
26
+
27
+ it "should wrap all returned strings to be utf8-aware" do
28
+ @utf8[0].class.should eql(String::UTF8)
29
+ @utf8.chars.to_a[0].class.should eql(String::UTF8)
30
+ end
31
+
32
+ context "#length and #size" do
33
+ it "should be utf8-aware" do
34
+ @utf8.length.should eql(@utf8_len)
35
+ @utf8.size.should eql(@utf8_len)
36
+ end
37
+ end
38
+
39
+ context "#chars and #each_char" do
40
+ it "should be utf8-aware" do
41
+ klass = begin
42
+ if defined? Encoding
43
+ Enumerator
44
+ else
45
+ Enumerable::Enumerator
46
+ end
47
+ end
48
+
49
+ @utf8.chars.class.should eql(klass)
50
+ i=0
51
+ @utf8.chars do |char|
52
+ char.should_not be_nil
53
+ i+=1
54
+ end
55
+ joined = @utf8.chars.to_a.join
56
+ @utf8.should eql(joined)
57
+ @utf8.chars.to_a.size.should eql(@utf8_len)
58
+ @utf8.chars.to_a.should eql(@char_array)
59
+ end
60
+ end
61
+
62
+ context "[offset] syntax" do
63
+ it "should be utf8-aware" do
64
+ @char_array.each_with_index do |char, i|
65
+ utf8_char = @utf8[i]
66
+ utf8_char.should eql(char)
67
+ end
68
+ end
69
+
70
+ it "should support negative indices" do
71
+ utf8_char = @utf8[-5]
72
+ utf8_char.should eql(@char_array[-5])
73
+ end
74
+
75
+ it "should return nil for out of range indices" do
76
+ @utf8[100].should be_nil
77
+ @utf8[-100].should be_nil
78
+ end
79
+ end
80
+
81
+ context "[offset, length] syntax" do
82
+ it "should be utf8-aware" do
83
+ utf8_char = @utf8[1, 4]
84
+ utf8_char.should eql(@char_array[1, 4].join)
85
+
86
+ utf8_char = @utf8[0, 6]
87
+ utf8_char.should eql(@char_array[0, 6].join)
88
+
89
+ # this will fail due to a bug in 1.9
90
+ unless defined? Encoding
91
+ utf8_char = @utf8[6, 100]
92
+ utf8_char.should eql(@char_array[6, 100].join)
93
+ end
94
+
95
+ utf8_char = @utf8[-1, 2]
96
+ utf8_char.should eql(@char_array[-1, 2].join)
97
+
98
+ utf8_char = @utf8[-1, 100]
99
+ utf8_char.should eql(@char_array[-1, 100].join)
100
+
101
+ utf8_char = @utf8[0, 0]
102
+ utf8_char.should eql(@char_array[0, 0].join)
103
+ end
104
+
105
+ it "should return nil for an out of range offset or length" do
106
+ @utf8[100, 100].should be_nil
107
+ @utf8[-100, 100].should be_nil
108
+ @utf8[0, -100].should be_nil
109
+ end
110
+ end
111
+
112
+ context "[Range] syntax" do
113
+ it "should be utf8-aware" do
114
+ utf8_char = @utf8[1..4]
115
+ utf8_char.should eql(@char_array[1..4].join)
116
+
117
+ utf8_char = @utf8[0..6]
118
+ utf8_char.should eql(@char_array[0..6].join)
119
+
120
+ # this will fail due to a bug in 1.9
121
+ unless defined? Encoding
122
+ utf8_char = @utf8[6..100]
123
+ utf8_char.should eql(@char_array[6..100].join)
124
+ end
125
+
126
+ utf8_char = @utf8[-1..2]
127
+ utf8_char.should eql(@char_array[-1..2].join)
128
+
129
+ utf8_char = @utf8[-1..100]
130
+ utf8_char.should eql(@char_array[-1..100].join)
131
+ end
132
+
133
+ it "should return nil for an out of range offset or length" do
134
+ @utf8[100..100].should be_nil
135
+ @utf8[-100..100].should be_nil
136
+ @utf8[0..-100].should eql("")
137
+ end
138
+ end
139
+
140
+ it "[Regexp] syntax shouldn't be supported yet" do
141
+ lambda {
142
+ @utf8[/a/]
143
+ }.should raise_error(ArgumentError)
144
+ end
145
+
146
+ it "[Regexp, match_index] syntax shouldn't be supported yet" do
147
+ lambda {
148
+ @utf8[/(a)/, 1]
149
+ }.should raise_error(ArgumentError)
150
+ end
151
+ end
data/utf8.gemspec ADDED
@@ -0,0 +1,37 @@
1
+ # encoding: utf-8
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{utf8}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Brian Lopez"]
9
+ s.date = %q{2011-01-12}
10
+ s.email = %q{seniorlopez@gmail.com}
11
+ s.extensions = ["ext/utf8/extconf.rb"]
12
+ s.extra_rdoc_files = [
13
+ "README.rdoc"
14
+ ]
15
+ s.files = [".gitignore", "MIT-LICENSE", "README.rdoc", "Rakefile", "benchmark/active_support.rb", "benchmark/test.txt", "ext/utf8/ext.c", "ext/utf8/ext.h", "ext/utf8/extconf.rb", "ext/utf8/string_scanner_utf8.c", "ext/utf8/string_scanner_utf8.h", "ext/utf8/string_utf8.c", "ext/utf8/string_utf8.h", "ext/utf8/utf8.c", "ext/utf8/utf8.h", "lib/utf8.rb", "lib/utf8/string.rb", "lib/utf8/string_scanner.rb", "spec/spec_helper.rb", "spec/string_scanner_spec.rb", "spec/string_spec.rb", "utf8.gemspec"]
16
+ s.homepage = %q{http://github.com/brianmario/utf8}
17
+ s.require_paths = ["lib", "ext"]
18
+ s.rubygems_version = %q{1.4.2}
19
+ s.summary = %q{A lightweight UTF8-aware String class meant for use with Ruby 1.8}
20
+ s.test_files = ["spec/spec_helper.rb", "spec/string_scanner_spec.rb", "spec/string_spec.rb"]
21
+
22
+ if s.respond_to? :specification_version then
23
+ s.specification_version = 3
24
+
25
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
26
+ s.add_development_dependency(%q<rake-compiler>, [">= 0.7.5"])
27
+ s.add_development_dependency(%q<rspec>, [">= 0"])
28
+ else
29
+ s.add_dependency(%q<rake-compiler>, [">= 0.7.5"])
30
+ s.add_dependency(%q<rspec>, [">= 0"])
31
+ end
32
+ else
33
+ s.add_dependency(%q<rake-compiler>, [">= 0.7.5"])
34
+ s.add_dependency(%q<rspec>, [">= 0"])
35
+ end
36
+ end
37
+
metadata ADDED
@@ -0,0 +1,120 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: utf8
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Brian Lopez
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-01-12 00:00:00 -08:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rake-compiler
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 9
30
+ segments:
31
+ - 0
32
+ - 7
33
+ - 5
34
+ version: 0.7.5
35
+ type: :development
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: rspec
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ hash: 3
46
+ segments:
47
+ - 0
48
+ version: "0"
49
+ type: :development
50
+ version_requirements: *id002
51
+ description:
52
+ email: seniorlopez@gmail.com
53
+ executables: []
54
+
55
+ extensions:
56
+ - ext/utf8/extconf.rb
57
+ extra_rdoc_files:
58
+ - README.rdoc
59
+ files:
60
+ - .gitignore
61
+ - MIT-LICENSE
62
+ - README.rdoc
63
+ - Rakefile
64
+ - benchmark/active_support.rb
65
+ - benchmark/test.txt
66
+ - ext/utf8/ext.c
67
+ - ext/utf8/ext.h
68
+ - ext/utf8/extconf.rb
69
+ - ext/utf8/string_scanner_utf8.c
70
+ - ext/utf8/string_scanner_utf8.h
71
+ - ext/utf8/string_utf8.c
72
+ - ext/utf8/string_utf8.h
73
+ - ext/utf8/utf8.c
74
+ - ext/utf8/utf8.h
75
+ - lib/utf8.rb
76
+ - lib/utf8/string.rb
77
+ - lib/utf8/string_scanner.rb
78
+ - spec/spec_helper.rb
79
+ - spec/string_scanner_spec.rb
80
+ - spec/string_spec.rb
81
+ - utf8.gemspec
82
+ has_rdoc: true
83
+ homepage: http://github.com/brianmario/utf8
84
+ licenses: []
85
+
86
+ post_install_message:
87
+ rdoc_options: []
88
+
89
+ require_paths:
90
+ - lib
91
+ - ext
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ hash: 3
98
+ segments:
99
+ - 0
100
+ version: "0"
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ none: false
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ hash: 3
107
+ segments:
108
+ - 0
109
+ version: "0"
110
+ requirements: []
111
+
112
+ rubyforge_project:
113
+ rubygems_version: 1.4.2
114
+ signing_key:
115
+ specification_version: 3
116
+ summary: A lightweight UTF8-aware String class meant for use with Ruby 1.8
117
+ test_files:
118
+ - spec/spec_helper.rb
119
+ - spec/string_scanner_spec.rb
120
+ - spec/string_spec.rb