unf_ext 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
File without changes
@@ -5,14 +5,6 @@ namespace UNF {
5
5
  namespace Trie {
6
6
  class Node {
7
7
  public:
8
- Node() : data(0xFFFFFFFF) {}
9
-
10
- void set_base_index(unsigned base_index) { data = (data&0xFF000000)+(base_index&0x00FFFFFF); }
11
- void set_value(unsigned value) { set_base_index(value); }
12
- void set_check_char(unsigned char ch) { data = (ch << 24) + base(); }
13
-
14
- bool is_unused() const { return data==0xFFFFFFFF; }
15
-
16
8
  unsigned jump(unsigned char ch) const { return base() + ch; }
17
9
  unsigned value() const { return base(); }
18
10
  unsigned check_char() const { return data>>24; }
@@ -9,17 +9,18 @@ namespace UNF {
9
9
  namespace Trie {
10
10
  class Searcher {
11
11
  public:
12
- Searcher(const Node* nodes, const char* value=NULL)
13
- : nodes(nodes), value(value) {}
12
+ Searcher(const Node* nodes, unsigned root, const char* value=NULL)
13
+ : nodes(nodes), root(root), value(value) {}
14
14
 
15
15
  unsigned find_value(const char* key, int default_value) const {
16
- unsigned node_index=0;
16
+ unsigned node_index=root;
17
17
  for(CharStream in(key);; in.read()) {
18
18
  node_index = nodes[node_index].jump(in.peek());
19
19
  if(nodes[node_index].check_char()==in.peek()) {
20
- unsigned terminal_index = nodes[node_index].jump('\0');
21
- if(nodes[terminal_index].check_char()=='\0')
20
+ unsigned terminal_index = nodes[node_index].jump('\0');
21
+ if(nodes[terminal_index].check_char()=='\0') {
22
22
  return nodes[terminal_index].value();
23
+ }
23
24
  } else
24
25
  return default_value;
25
26
  }
@@ -27,13 +28,14 @@ namespace UNF {
27
28
 
28
29
  protected:
29
30
  const Node* nodes;
31
+ const unsigned root;
30
32
  const char* value;
31
33
  };
32
34
 
33
35
  class CanonicalCombiningClass : private Searcher {
34
36
  public:
35
- CanonicalCombiningClass(const unsigned* node_uints)
36
- : Searcher(Node::from_uint_array(node_uints)) {}
37
+ CanonicalCombiningClass(const unsigned* node_uints, unsigned root)
38
+ : Searcher(Node::from_uint_array(node_uints), root) {}
37
39
 
38
40
  unsigned get_class(const char* str) const { return find_value(str,0); }
39
41
 
@@ -46,7 +48,7 @@ namespace UNF {
46
48
  loop_head:
47
49
  unsigned beg = in.cur()-str;
48
50
 
49
- for(unsigned node_index=0;;){
51
+ for(unsigned node_index=root;;){
50
52
  node_index = nodes[node_index].jump(in.read());
51
53
 
52
54
  if(nodes[node_index].check_char()==in.prev()) {
@@ -93,8 +95,8 @@ namespace UNF {
93
95
 
94
96
  class NormalizationForm : private Searcher {
95
97
  public:
96
- NormalizationForm(const unsigned* node_uints, const char* value=NULL)
97
- : Searcher(Node::from_uint_array(node_uints), value) {}
98
+ NormalizationForm(const unsigned* node_uints, unsigned root, const char* value=NULL)
99
+ : Searcher(Node::from_uint_array(node_uints), root, value) {}
98
100
 
99
101
  bool quick_check(const char* key) const { return find_value(key,0xFFFFFFFF)==0xFFFFFFFF; }
100
102
 
@@ -102,12 +104,12 @@ namespace UNF {
102
104
  loop_head:
103
105
  const char* beg = in.cur();
104
106
 
105
- for(unsigned node_index=0;;) {
107
+ for(unsigned node_index=root;;) {
106
108
  node_index = nodes[node_index].jump(in.read());
107
109
  if(nodes[node_index].check_char()==in.prev()) {
108
110
  unsigned terminal_index = nodes[node_index].jump('\0');
109
111
  if(nodes[terminal_index].check_char()=='\0') {
110
- buffer.append(value+nodes[terminal_index].value());
112
+ word_append(buffer, value, nodes[terminal_index].value());
111
113
  beg = in.cur();
112
114
  break;
113
115
  }
@@ -127,15 +129,15 @@ namespace UNF {
127
129
 
128
130
  const char* const beg = in.cur();
129
131
  const char* current_char_head = in.cur();
130
- const char* composed_char = NULL;
132
+ unsigned composed_char_info = 0;
131
133
 
132
- unsigned node_index = 0;
133
- unsigned retry_root_node = 0;
134
+ unsigned node_index = root;
135
+ unsigned retry_root_node = root;
134
136
  unsigned char retry_root_class = 0;
135
137
 
136
138
  for(bool first=true;;) {
137
139
  if(Util::is_utf8_char_start_byte(in.peek())) {
138
- if(node_index != 0)
140
+ if(node_index != root)
139
141
  first=false;
140
142
  current_char_head = in.cur();
141
143
 
@@ -144,13 +146,14 @@ namespace UNF {
144
146
  }
145
147
 
146
148
  retry:
147
- unsigned next_index = nodes[node_index].jump(in.read());
148
- if(nodes[next_index].check_char()==in.prev()) {
149
+ unsigned next_index = nodes[node_index].jump(in.peek());
150
+ if(nodes[next_index].check_char()==in.read()) {
149
151
  // succeeded
150
152
  node_index = next_index;
151
153
  unsigned terminal_index = nodes[node_index].jump('\0');
152
154
  if(nodes[terminal_index].check_char()=='\0') {
153
- composed_char = value+nodes[terminal_index].value();
155
+ composed_char_info = nodes[terminal_index].value();
156
+
154
157
  in.mark_as_last_valid_point();
155
158
  if(in.eos() || retry_root_class > in.get_canonical_class())
156
159
  break;
@@ -168,9 +171,9 @@ namespace UNF {
168
171
  }
169
172
  }
170
173
 
171
- if(composed_char) {
174
+ if(composed_char_info != 0) {
172
175
  // append composed unicode-character and skipped combining-characters
173
- buf.append(composed_char);
176
+ word_append(buf, value, composed_char_info);
174
177
  in.append_skipped_chars_to_str(buf);
175
178
  in.reset_at_marked_point();
176
179
  } else {
@@ -179,6 +182,11 @@ namespace UNF {
179
182
  in.append_read_char_to_str(buf, beg);
180
183
  }
181
184
  }
185
+
186
+ private:
187
+ static void word_append(std::string& buffer, const char* base, unsigned pos_info) {
188
+ buffer.append(base+(pos_info&0x3FFFF), pos_info>>18);
189
+ }
182
190
  };
183
191
  }
184
192
  }
File without changes
@@ -1,10 +1,10 @@
1
+ #include "unf/normalizer.hh"
2
+
1
3
  #include <ruby.h>
2
4
  #if defined(HAVE_RUBY_ENCODING_H)
3
5
  #include <ruby/encoding.h>
4
6
  #endif
5
7
 
6
- #include "unf/normalizer.hh"
7
-
8
8
  extern "C" {
9
9
  VALUE unf_allocate(VALUE klass);
10
10
  VALUE unf_initialize(VALUE self);
data/lib/unf_ext.rb ADDED
@@ -0,0 +1,5 @@
1
+ begin
2
+ require "#{RUBY_VERSION[/\A[0-9]+\.[0-9]+/]}/unf_ext.so"
3
+ rescue LoadError
4
+ require "unf_ext.so"
5
+ end
data/unf_ext.gemspec CHANGED
@@ -5,14 +5,14 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{unf_ext}
8
- s.version = "0.0.3"
8
+ s.version = "0.0.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = [%q{Takeru Ohta}, %q{Akinori MUSHA}]
12
- s.date = %q{2011-10-24}
12
+ s.date = %q{2011-12-08}
13
13
  s.description = %q{Unicode Normalization Form support library for CRuby}
14
14
  s.email = %q{knu@idaemons.org}
15
- s.extensions = [%q{extconf.rb}]
15
+ s.extensions = [%q{ext/unf_ext/extconf.rb}]
16
16
  s.extra_rdoc_files = [
17
17
  "LICENSE.txt",
18
18
  "README.md"
@@ -24,17 +24,18 @@ Gem::Specification.new do |s|
24
24
  "README.md",
25
25
  "Rakefile",
26
26
  "VERSION",
27
- "extconf.rb",
27
+ "ext/unf_ext/extconf.rb",
28
+ "ext/unf_ext/unf.cc",
29
+ "ext/unf_ext/unf/normalizer.hh",
30
+ "ext/unf_ext/unf/table.hh",
31
+ "ext/unf_ext/unf/trie/char_stream.hh",
32
+ "ext/unf_ext/unf/trie/node.hh",
33
+ "ext/unf_ext/unf/trie/searcher.hh",
34
+ "ext/unf_ext/unf/util.hh",
35
+ "lib/unf_ext.rb",
28
36
  "test/helper.rb",
29
37
  "test/normalization-test.txt",
30
38
  "test/test_unf_ext.rb",
31
- "unf.cc",
32
- "unf/normalizer.hh",
33
- "unf/table.hh",
34
- "unf/trie/char_stream.hh",
35
- "unf/trie/node.hh",
36
- "unf/trie/searcher.hh",
37
- "unf/util.hh",
38
39
  "unf_ext.gemspec"
39
40
  ]
40
41
  s.homepage = %q{http://github.com/knu/ruby-unf_ext}
@@ -51,17 +52,20 @@ Gem::Specification.new do |s|
51
52
  s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
52
53
  s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
53
54
  s.add_development_dependency(%q<rcov>, [">= 0"])
55
+ s.add_development_dependency(%q<rake-compiler>, [">= 0.7.9"])
54
56
  else
55
57
  s.add_dependency(%q<shoulda>, [">= 0"])
56
58
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
57
59
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
58
60
  s.add_dependency(%q<rcov>, [">= 0"])
61
+ s.add_dependency(%q<rake-compiler>, [">= 0.7.9"])
59
62
  end
60
63
  else
61
64
  s.add_dependency(%q<shoulda>, [">= 0"])
62
65
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
63
66
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
64
67
  s.add_dependency(%q<rcov>, [">= 0"])
68
+ s.add_dependency(%q<rake-compiler>, [">= 0.7.9"])
65
69
  end
66
70
  end
67
71
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unf_ext
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 23
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 3
10
- version: 0.0.3
9
+ - 4
10
+ version: 0.0.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Takeru Ohta
@@ -16,7 +16,7 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2011-10-24 00:00:00 Z
19
+ date: 2011-12-08 00:00:00 Z
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
22
  name: shoulda
@@ -78,12 +78,28 @@ dependencies:
78
78
  type: :development
79
79
  requirement: *id004
80
80
  prerelease: false
81
+ - !ruby/object:Gem::Dependency
82
+ name: rake-compiler
83
+ version_requirements: &id005 !ruby/object:Gem::Requirement
84
+ none: false
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ hash: 17
89
+ segments:
90
+ - 0
91
+ - 7
92
+ - 9
93
+ version: 0.7.9
94
+ type: :development
95
+ requirement: *id005
96
+ prerelease: false
81
97
  description: Unicode Normalization Form support library for CRuby
82
98
  email: knu@idaemons.org
83
99
  executables: []
84
100
 
85
101
  extensions:
86
- - extconf.rb
102
+ - ext/unf_ext/extconf.rb
87
103
  extra_rdoc_files:
88
104
  - LICENSE.txt
89
105
  - README.md
@@ -94,17 +110,18 @@ files:
94
110
  - README.md
95
111
  - Rakefile
96
112
  - VERSION
97
- - extconf.rb
113
+ - ext/unf_ext/extconf.rb
114
+ - ext/unf_ext/unf.cc
115
+ - ext/unf_ext/unf/normalizer.hh
116
+ - ext/unf_ext/unf/table.hh
117
+ - ext/unf_ext/unf/trie/char_stream.hh
118
+ - ext/unf_ext/unf/trie/node.hh
119
+ - ext/unf_ext/unf/trie/searcher.hh
120
+ - ext/unf_ext/unf/util.hh
121
+ - lib/unf_ext.rb
98
122
  - test/helper.rb
99
123
  - test/normalization-test.txt
100
124
  - test/test_unf_ext.rb
101
- - unf.cc
102
- - unf/normalizer.hh
103
- - unf/table.hh
104
- - unf/trie/char_stream.hh
105
- - unf/trie/node.hh
106
- - unf/trie/searcher.hh
107
- - unf/util.hh
108
125
  - unf_ext.gemspec
109
126
  homepage: http://github.com/knu/ruby-unf_ext
110
127
  licenses:
data/extconf.rb DELETED
@@ -1,4 +0,0 @@
1
- require 'mkmf'
2
- have_library('stdc++')
3
- have_header('ruby/encoding.h')
4
- create_makefile 'unf_ext'