utf8 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/utf8.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'utf8/utf8'
2
+ require 'utf8/string'
3
+
4
+ # explicitly require this in your app if you want to use it
5
+ # require 'utf8/string_scanner'
@@ -0,0 +1,19 @@
1
+ class String
2
+ # Wraps your string in an UTF8-aware version of String
3
+ def as_utf8
4
+ String::UTF8.new(self)
5
+ end
6
+
7
+ class UTF8
8
+ VERSION = "0.1.0"
9
+
10
+ # Gives you access to the raw non-UTF8-aware version of the string
11
+ def as_raw
12
+ ::String.new(self)
13
+ end
14
+
15
+ alias :size :length
16
+ alias :chars :each_char
17
+ alias :slice :[]
18
+ end
19
+ end
@@ -0,0 +1,21 @@
1
+ require 'utf8'
2
+
3
+ class StringScanner
4
+ # Returns an UTF8-aware version of StringScanner wrapping your original string
5
+ #
6
+ # NOTE: this will lose all state associated with the current StringScanner instance
7
+ # (like the current scan position)
8
+ def as_utf8
9
+ StringScanner::UTF8.new(self.string.as_utf8)
10
+ end
11
+
12
+ class UTF8
13
+ # Returns a non-UTF8-aware version of StringScanner wrapping your original string
14
+ #
15
+ # NOTE: this will lose all state associated with the current StringScanner::UTF8 instance
16
+ # (like the current scan position)
17
+ def as_raw
18
+ StringScanner.new(self.string)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,5 @@
1
+ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
2
+ require 'utf8'
3
+ require 'utf8/string_scanner'
4
+
5
+ require 'rspec' unless defined? Rspec
@@ -0,0 +1,48 @@
1
+ # encoding: utf-8
2
+ require File.expand_path('../spec_helper', __FILE__)
3
+
4
+ describe StringScanner::UTF8 do
5
+ before(:all) do
6
+ @char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
7
+ @scanner = StringScanner.new(@char_array.join)
8
+ @utf8_scanner = @scanner.as_utf8
9
+ end
10
+
11
+ it "should extend StringScanner, adding an as_utf8 method that returns a StringScanner::UTF8 instance" do
12
+ @scanner.should respond_to(:as_utf8)
13
+ @scanner.as_utf8.class.should eql(StringScanner::UTF8)
14
+ end
15
+
16
+ it "should allow access to a regular (non-utf8-aware) StringScanner based on it's string" do
17
+ raw = @utf8_scanner.as_raw
18
+ raw.class.should eql(StringScanner)
19
+ raw.string.should eql(@utf8_scanner.string)
20
+ end
21
+
22
+ it "#getch should be utf8-aware" do
23
+ i=0
24
+ while char = @utf8_scanner.getch
25
+ char.should eql(@char_array[i])
26
+ i+=1
27
+ end
28
+ end
29
+
30
+ it "should be able to be reset" do
31
+ i=0
32
+ while char = @utf8_scanner.getch
33
+ char.should eql(@char_array[i])
34
+ if i == 4
35
+ break
36
+ end
37
+ i+=1
38
+ end
39
+
40
+ @utf8_scanner.reset
41
+
42
+ i=0
43
+ while char = @utf8_scanner.getch
44
+ char.should eql(@char_array[i])
45
+ i+=1
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,151 @@
1
+ # encoding: utf-8
2
+ require File.expand_path('../spec_helper', __FILE__)
3
+
4
+ describe String::UTF8 do
5
+ before(:all) do
6
+ @char_array = ["怎", "麼", "也", "沒", "人", "寫", "了", "這", "個", "嗎"]
7
+ @str = @char_array.join
8
+ @utf8 = @str.as_utf8
9
+ @utf8_len = @char_array.size
10
+ end
11
+
12
+ it "should extend String, adding an as_utf8 method that returns a String::UTF8 instance" do
13
+ "".should respond_to(:as_utf8)
14
+ "".as_utf8.class.should eql(String::UTF8)
15
+ end
16
+
17
+ it "should allow access to the underlying raw string" do
18
+ raw = @utf8.as_raw
19
+ raw.class.should eql(String)
20
+ if defined? Encoding
21
+ raw.length.should eql(@utf8_len)
22
+ else
23
+ raw.length.should eql(@str.size)
24
+ end
25
+ end
26
+
27
+ it "should wrap all returned strings to be utf8-aware" do
28
+ @utf8[0].class.should eql(String::UTF8)
29
+ @utf8.chars.to_a[0].class.should eql(String::UTF8)
30
+ end
31
+
32
+ context "#length and #size" do
33
+ it "should be utf8-aware" do
34
+ @utf8.length.should eql(@utf8_len)
35
+ @utf8.size.should eql(@utf8_len)
36
+ end
37
+ end
38
+
39
+ context "#chars and #each_char" do
40
+ it "should be utf8-aware" do
41
+ klass = begin
42
+ if defined? Encoding
43
+ Enumerator
44
+ else
45
+ Enumerable::Enumerator
46
+ end
47
+ end
48
+
49
+ @utf8.chars.class.should eql(klass)
50
+ i=0
51
+ @utf8.chars do |char|
52
+ char.should_not be_nil
53
+ i+=1
54
+ end
55
+ joined = @utf8.chars.to_a.join
56
+ @utf8.should eql(joined)
57
+ @utf8.chars.to_a.size.should eql(@utf8_len)
58
+ @utf8.chars.to_a.should eql(@char_array)
59
+ end
60
+ end
61
+
62
+ context "[offset] syntax" do
63
+ it "should be utf8-aware" do
64
+ @char_array.each_with_index do |char, i|
65
+ utf8_char = @utf8[i]
66
+ utf8_char.should eql(char)
67
+ end
68
+ end
69
+
70
+ it "should support negative indices" do
71
+ utf8_char = @utf8[-5]
72
+ utf8_char.should eql(@char_array[-5])
73
+ end
74
+
75
+ it "should return nil for out of range indices" do
76
+ @utf8[100].should be_nil
77
+ @utf8[-100].should be_nil
78
+ end
79
+ end
80
+
81
+ context "[offset, length] syntax" do
82
+ it "should be utf8-aware" do
83
+ utf8_char = @utf8[1, 4]
84
+ utf8_char.should eql(@char_array[1, 4].join)
85
+
86
+ utf8_char = @utf8[0, 6]
87
+ utf8_char.should eql(@char_array[0, 6].join)
88
+
89
+ # this will fail due to a bug in 1.9
90
+ unless defined? Encoding
91
+ utf8_char = @utf8[6, 100]
92
+ utf8_char.should eql(@char_array[6, 100].join)
93
+ end
94
+
95
+ utf8_char = @utf8[-1, 2]
96
+ utf8_char.should eql(@char_array[-1, 2].join)
97
+
98
+ utf8_char = @utf8[-1, 100]
99
+ utf8_char.should eql(@char_array[-1, 100].join)
100
+
101
+ utf8_char = @utf8[0, 0]
102
+ utf8_char.should eql(@char_array[0, 0].join)
103
+ end
104
+
105
+ it "should return nil for an out of range offset or length" do
106
+ @utf8[100, 100].should be_nil
107
+ @utf8[-100, 100].should be_nil
108
+ @utf8[0, -100].should be_nil
109
+ end
110
+ end
111
+
112
+ context "[Range] syntax" do
113
+ it "should be utf8-aware" do
114
+ utf8_char = @utf8[1..4]
115
+ utf8_char.should eql(@char_array[1..4].join)
116
+
117
+ utf8_char = @utf8[0..6]
118
+ utf8_char.should eql(@char_array[0..6].join)
119
+
120
+ # this will fail due to a bug in 1.9
121
+ unless defined? Encoding
122
+ utf8_char = @utf8[6..100]
123
+ utf8_char.should eql(@char_array[6..100].join)
124
+ end
125
+
126
+ utf8_char = @utf8[-1..2]
127
+ utf8_char.should eql(@char_array[-1..2].join)
128
+
129
+ utf8_char = @utf8[-1..100]
130
+ utf8_char.should eql(@char_array[-1..100].join)
131
+ end
132
+
133
+ it "should return nil for an out of range offset or length" do
134
+ @utf8[100..100].should be_nil
135
+ @utf8[-100..100].should be_nil
136
+ @utf8[0..-100].should eql("")
137
+ end
138
+ end
139
+
140
+ it "[Regexp] syntax shouldn't be supported yet" do
141
+ lambda {
142
+ @utf8[/a/]
143
+ }.should raise_error(ArgumentError)
144
+ end
145
+
146
+ it "[Regexp, match_index] syntax shouldn't be supported yet" do
147
+ lambda {
148
+ @utf8[/(a)/, 1]
149
+ }.should raise_error(ArgumentError)
150
+ end
151
+ end
data/utf8.gemspec ADDED
@@ -0,0 +1,37 @@
1
+ # encoding: utf-8
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{utf8}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Brian Lopez"]
9
+ s.date = %q{2011-01-12}
10
+ s.email = %q{seniorlopez@gmail.com}
11
+ s.extensions = ["ext/utf8/extconf.rb"]
12
+ s.extra_rdoc_files = [
13
+ "README.rdoc"
14
+ ]
15
+ s.files = [".gitignore", "MIT-LICENSE", "README.rdoc", "Rakefile", "benchmark/active_support.rb", "benchmark/test.txt", "ext/utf8/ext.c", "ext/utf8/ext.h", "ext/utf8/extconf.rb", "ext/utf8/string_scanner_utf8.c", "ext/utf8/string_scanner_utf8.h", "ext/utf8/string_utf8.c", "ext/utf8/string_utf8.h", "ext/utf8/utf8.c", "ext/utf8/utf8.h", "lib/utf8.rb", "lib/utf8/string.rb", "lib/utf8/string_scanner.rb", "spec/spec_helper.rb", "spec/string_scanner_spec.rb", "spec/string_spec.rb", "utf8.gemspec"]
16
+ s.homepage = %q{http://github.com/brianmario/utf8}
17
+ s.require_paths = ["lib", "ext"]
18
+ s.rubygems_version = %q{1.4.2}
19
+ s.summary = %q{A lightweight UTF8-aware String class meant for use with Ruby 1.8}
20
+ s.test_files = ["spec/spec_helper.rb", "spec/string_scanner_spec.rb", "spec/string_spec.rb"]
21
+
22
+ if s.respond_to? :specification_version then
23
+ s.specification_version = 3
24
+
25
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
26
+ s.add_development_dependency(%q<rake-compiler>, [">= 0.7.5"])
27
+ s.add_development_dependency(%q<rspec>, [">= 0"])
28
+ else
29
+ s.add_dependency(%q<rake-compiler>, [">= 0.7.5"])
30
+ s.add_dependency(%q<rspec>, [">= 0"])
31
+ end
32
+ else
33
+ s.add_dependency(%q<rake-compiler>, [">= 0.7.5"])
34
+ s.add_dependency(%q<rspec>, [">= 0"])
35
+ end
36
+ end
37
+
metadata ADDED
@@ -0,0 +1,120 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: utf8
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Brian Lopez
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-01-12 00:00:00 -08:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rake-compiler
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 9
30
+ segments:
31
+ - 0
32
+ - 7
33
+ - 5
34
+ version: 0.7.5
35
+ type: :development
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: rspec
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ hash: 3
46
+ segments:
47
+ - 0
48
+ version: "0"
49
+ type: :development
50
+ version_requirements: *id002
51
+ description:
52
+ email: seniorlopez@gmail.com
53
+ executables: []
54
+
55
+ extensions:
56
+ - ext/utf8/extconf.rb
57
+ extra_rdoc_files:
58
+ - README.rdoc
59
+ files:
60
+ - .gitignore
61
+ - MIT-LICENSE
62
+ - README.rdoc
63
+ - Rakefile
64
+ - benchmark/active_support.rb
65
+ - benchmark/test.txt
66
+ - ext/utf8/ext.c
67
+ - ext/utf8/ext.h
68
+ - ext/utf8/extconf.rb
69
+ - ext/utf8/string_scanner_utf8.c
70
+ - ext/utf8/string_scanner_utf8.h
71
+ - ext/utf8/string_utf8.c
72
+ - ext/utf8/string_utf8.h
73
+ - ext/utf8/utf8.c
74
+ - ext/utf8/utf8.h
75
+ - lib/utf8.rb
76
+ - lib/utf8/string.rb
77
+ - lib/utf8/string_scanner.rb
78
+ - spec/spec_helper.rb
79
+ - spec/string_scanner_spec.rb
80
+ - spec/string_spec.rb
81
+ - utf8.gemspec
82
+ has_rdoc: true
83
+ homepage: http://github.com/brianmario/utf8
84
+ licenses: []
85
+
86
+ post_install_message:
87
+ rdoc_options: []
88
+
89
+ require_paths:
90
+ - lib
91
+ - ext
92
+ required_ruby_version: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ hash: 3
98
+ segments:
99
+ - 0
100
+ version: "0"
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ none: false
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ hash: 3
107
+ segments:
108
+ - 0
109
+ version: "0"
110
+ requirements: []
111
+
112
+ rubyforge_project:
113
+ rubygems_version: 1.4.2
114
+ signing_key:
115
+ specification_version: 3
116
+ summary: A lightweight UTF8-aware String class meant for use with Ruby 1.8
117
+ test_files:
118
+ - spec/spec_helper.rb
119
+ - spec/string_scanner_spec.rb
120
+ - spec/string_spec.rb