unf_ext 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -5,14 +5,6 @@ namespace UNF {
5
5
  namespace Trie {
6
6
  class Node {
7
7
  public:
8
- Node() : data(0xFFFFFFFF) {}
9
-
10
- void set_base_index(unsigned base_index) { data = (data&0xFF000000)+(base_index&0x00FFFFFF); }
11
- void set_value(unsigned value) { set_base_index(value); }
12
- void set_check_char(unsigned char ch) { data = (ch << 24) + base(); }
13
-
14
- bool is_unused() const { return data==0xFFFFFFFF; }
15
-
16
8
  unsigned jump(unsigned char ch) const { return base() + ch; }
17
9
  unsigned value() const { return base(); }
18
10
  unsigned check_char() const { return data>>24; }
@@ -9,17 +9,18 @@ namespace UNF {
9
9
  namespace Trie {
10
10
  class Searcher {
11
11
  public:
12
- Searcher(const Node* nodes, const char* value=NULL)
13
- : nodes(nodes), value(value) {}
12
+ Searcher(const Node* nodes, unsigned root, const char* value=NULL)
13
+ : nodes(nodes), root(root), value(value) {}
14
14
 
15
15
  unsigned find_value(const char* key, int default_value) const {
16
- unsigned node_index=0;
16
+ unsigned node_index=root;
17
17
  for(CharStream in(key);; in.read()) {
18
18
  node_index = nodes[node_index].jump(in.peek());
19
19
  if(nodes[node_index].check_char()==in.peek()) {
20
- unsigned terminal_index = nodes[node_index].jump('\0');
21
- if(nodes[terminal_index].check_char()=='\0')
20
+ unsigned terminal_index = nodes[node_index].jump('\0');
21
+ if(nodes[terminal_index].check_char()=='\0') {
22
22
  return nodes[terminal_index].value();
23
+ }
23
24
  } else
24
25
  return default_value;
25
26
  }
@@ -27,13 +28,14 @@ namespace UNF {
27
28
 
28
29
  protected:
29
30
  const Node* nodes;
31
+ const unsigned root;
30
32
  const char* value;
31
33
  };
32
34
 
33
35
  class CanonicalCombiningClass : private Searcher {
34
36
  public:
35
- CanonicalCombiningClass(const unsigned* node_uints)
36
- : Searcher(Node::from_uint_array(node_uints)) {}
37
+ CanonicalCombiningClass(const unsigned* node_uints, unsigned root)
38
+ : Searcher(Node::from_uint_array(node_uints), root) {}
37
39
 
38
40
  unsigned get_class(const char* str) const { return find_value(str,0); }
39
41
 
@@ -46,7 +48,7 @@ namespace UNF {
46
48
  loop_head:
47
49
  unsigned beg = in.cur()-str;
48
50
 
49
- for(unsigned node_index=0;;){
51
+ for(unsigned node_index=root;;){
50
52
  node_index = nodes[node_index].jump(in.read());
51
53
 
52
54
  if(nodes[node_index].check_char()==in.prev()) {
@@ -93,8 +95,8 @@ namespace UNF {
93
95
 
94
96
  class NormalizationForm : private Searcher {
95
97
  public:
96
- NormalizationForm(const unsigned* node_uints, const char* value=NULL)
97
- : Searcher(Node::from_uint_array(node_uints), value) {}
98
+ NormalizationForm(const unsigned* node_uints, unsigned root, const char* value=NULL)
99
+ : Searcher(Node::from_uint_array(node_uints), root, value) {}
98
100
 
99
101
  bool quick_check(const char* key) const { return find_value(key,0xFFFFFFFF)==0xFFFFFFFF; }
100
102
 
@@ -102,12 +104,12 @@ namespace UNF {
102
104
  loop_head:
103
105
  const char* beg = in.cur();
104
106
 
105
- for(unsigned node_index=0;;) {
107
+ for(unsigned node_index=root;;) {
106
108
  node_index = nodes[node_index].jump(in.read());
107
109
  if(nodes[node_index].check_char()==in.prev()) {
108
110
  unsigned terminal_index = nodes[node_index].jump('\0');
109
111
  if(nodes[terminal_index].check_char()=='\0') {
110
- buffer.append(value+nodes[terminal_index].value());
112
+ word_append(buffer, value, nodes[terminal_index].value());
111
113
  beg = in.cur();
112
114
  break;
113
115
  }
@@ -127,15 +129,15 @@ namespace UNF {
127
129
 
128
130
  const char* const beg = in.cur();
129
131
  const char* current_char_head = in.cur();
130
- const char* composed_char = NULL;
132
+ unsigned composed_char_info = 0;
131
133
 
132
- unsigned node_index = 0;
133
- unsigned retry_root_node = 0;
134
+ unsigned node_index = root;
135
+ unsigned retry_root_node = root;
134
136
  unsigned char retry_root_class = 0;
135
137
 
136
138
  for(bool first=true;;) {
137
139
  if(Util::is_utf8_char_start_byte(in.peek())) {
138
- if(node_index != 0)
140
+ if(node_index != root)
139
141
  first=false;
140
142
  current_char_head = in.cur();
141
143
 
@@ -144,13 +146,14 @@ namespace UNF {
144
146
  }
145
147
 
146
148
  retry:
147
- unsigned next_index = nodes[node_index].jump(in.read());
148
- if(nodes[next_index].check_char()==in.prev()) {
149
+ unsigned next_index = nodes[node_index].jump(in.peek());
150
+ if(nodes[next_index].check_char()==in.read()) {
149
151
  // succeeded
150
152
  node_index = next_index;
151
153
  unsigned terminal_index = nodes[node_index].jump('\0');
152
154
  if(nodes[terminal_index].check_char()=='\0') {
153
- composed_char = value+nodes[terminal_index].value();
155
+ composed_char_info = nodes[terminal_index].value();
156
+
154
157
  in.mark_as_last_valid_point();
155
158
  if(in.eos() || retry_root_class > in.get_canonical_class())
156
159
  break;
@@ -168,9 +171,9 @@ namespace UNF {
168
171
  }
169
172
  }
170
173
 
171
- if(composed_char) {
174
+ if(composed_char_info != 0) {
172
175
  // append composed unicode-character and skipped combining-characters
173
- buf.append(composed_char);
176
+ word_append(buf, value, composed_char_info);
174
177
  in.append_skipped_chars_to_str(buf);
175
178
  in.reset_at_marked_point();
176
179
  } else {
@@ -179,6 +182,11 @@ namespace UNF {
179
182
  in.append_read_char_to_str(buf, beg);
180
183
  }
181
184
  }
185
+
186
+ private:
187
+ static void word_append(std::string& buffer, const char* base, unsigned pos_info) {
188
+ buffer.append(base+(pos_info&0x3FFFF), pos_info>>18);
189
+ }
182
190
  };
183
191
  }
184
192
  }
File without changes
@@ -1,10 +1,10 @@
1
+ #include "unf/normalizer.hh"
2
+
1
3
  #include <ruby.h>
2
4
  #if defined(HAVE_RUBY_ENCODING_H)
3
5
  #include <ruby/encoding.h>
4
6
  #endif
5
7
 
6
- #include "unf/normalizer.hh"
7
-
8
8
  extern "C" {
9
9
  VALUE unf_allocate(VALUE klass);
10
10
  VALUE unf_initialize(VALUE self);
data/lib/unf_ext.rb ADDED
@@ -0,0 +1,5 @@
1
+ begin
2
+ require "#{RUBY_VERSION[/\A[0-9]+\.[0-9]+/]}/unf_ext.so"
3
+ rescue LoadError
4
+ require "unf_ext.so"
5
+ end
data/unf_ext.gemspec CHANGED
@@ -5,14 +5,14 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{unf_ext}
8
- s.version = "0.0.3"
8
+ s.version = "0.0.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = [%q{Takeru Ohta}, %q{Akinori MUSHA}]
12
- s.date = %q{2011-10-24}
12
+ s.date = %q{2011-12-08}
13
13
  s.description = %q{Unicode Normalization Form support library for CRuby}
14
14
  s.email = %q{knu@idaemons.org}
15
- s.extensions = [%q{extconf.rb}]
15
+ s.extensions = [%q{ext/unf_ext/extconf.rb}]
16
16
  s.extra_rdoc_files = [
17
17
  "LICENSE.txt",
18
18
  "README.md"
@@ -24,17 +24,18 @@ Gem::Specification.new do |s|
24
24
  "README.md",
25
25
  "Rakefile",
26
26
  "VERSION",
27
- "extconf.rb",
27
+ "ext/unf_ext/extconf.rb",
28
+ "ext/unf_ext/unf.cc",
29
+ "ext/unf_ext/unf/normalizer.hh",
30
+ "ext/unf_ext/unf/table.hh",
31
+ "ext/unf_ext/unf/trie/char_stream.hh",
32
+ "ext/unf_ext/unf/trie/node.hh",
33
+ "ext/unf_ext/unf/trie/searcher.hh",
34
+ "ext/unf_ext/unf/util.hh",
35
+ "lib/unf_ext.rb",
28
36
  "test/helper.rb",
29
37
  "test/normalization-test.txt",
30
38
  "test/test_unf_ext.rb",
31
- "unf.cc",
32
- "unf/normalizer.hh",
33
- "unf/table.hh",
34
- "unf/trie/char_stream.hh",
35
- "unf/trie/node.hh",
36
- "unf/trie/searcher.hh",
37
- "unf/util.hh",
38
39
  "unf_ext.gemspec"
39
40
  ]
40
41
  s.homepage = %q{http://github.com/knu/ruby-unf_ext}
@@ -51,17 +52,20 @@ Gem::Specification.new do |s|
51
52
  s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
52
53
  s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
53
54
  s.add_development_dependency(%q<rcov>, [">= 0"])
55
+ s.add_development_dependency(%q<rake-compiler>, [">= 0.7.9"])
54
56
  else
55
57
  s.add_dependency(%q<shoulda>, [">= 0"])
56
58
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
57
59
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
58
60
  s.add_dependency(%q<rcov>, [">= 0"])
61
+ s.add_dependency(%q<rake-compiler>, [">= 0.7.9"])
59
62
  end
60
63
  else
61
64
  s.add_dependency(%q<shoulda>, [">= 0"])
62
65
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
63
66
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
64
67
  s.add_dependency(%q<rcov>, [">= 0"])
68
+ s.add_dependency(%q<rake-compiler>, [">= 0.7.9"])
65
69
  end
66
70
  end
67
71
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unf_ext
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 23
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 3
10
- version: 0.0.3
9
+ - 4
10
+ version: 0.0.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Takeru Ohta
@@ -16,7 +16,7 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2011-10-24 00:00:00 Z
19
+ date: 2011-12-08 00:00:00 Z
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
22
  name: shoulda
@@ -78,12 +78,28 @@ dependencies:
78
78
  type: :development
79
79
  requirement: *id004
80
80
  prerelease: false
81
+ - !ruby/object:Gem::Dependency
82
+ name: rake-compiler
83
+ version_requirements: &id005 !ruby/object:Gem::Requirement
84
+ none: false
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ hash: 17
89
+ segments:
90
+ - 0
91
+ - 7
92
+ - 9
93
+ version: 0.7.9
94
+ type: :development
95
+ requirement: *id005
96
+ prerelease: false
81
97
  description: Unicode Normalization Form support library for CRuby
82
98
  email: knu@idaemons.org
83
99
  executables: []
84
100
 
85
101
  extensions:
86
- - extconf.rb
102
+ - ext/unf_ext/extconf.rb
87
103
  extra_rdoc_files:
88
104
  - LICENSE.txt
89
105
  - README.md
@@ -94,17 +110,18 @@ files:
94
110
  - README.md
95
111
  - Rakefile
96
112
  - VERSION
97
- - extconf.rb
113
+ - ext/unf_ext/extconf.rb
114
+ - ext/unf_ext/unf.cc
115
+ - ext/unf_ext/unf/normalizer.hh
116
+ - ext/unf_ext/unf/table.hh
117
+ - ext/unf_ext/unf/trie/char_stream.hh
118
+ - ext/unf_ext/unf/trie/node.hh
119
+ - ext/unf_ext/unf/trie/searcher.hh
120
+ - ext/unf_ext/unf/util.hh
121
+ - lib/unf_ext.rb
98
122
  - test/helper.rb
99
123
  - test/normalization-test.txt
100
124
  - test/test_unf_ext.rb
101
- - unf.cc
102
- - unf/normalizer.hh
103
- - unf/table.hh
104
- - unf/trie/char_stream.hh
105
- - unf/trie/node.hh
106
- - unf/trie/searcher.hh
107
- - unf/util.hh
108
125
  - unf_ext.gemspec
109
126
  homepage: http://github.com/knu/ruby-unf_ext
110
127
  licenses:
data/extconf.rb DELETED
@@ -1,4 +0,0 @@
1
- require 'mkmf'
2
- have_library('stdc++')
3
- have_header('ruby/encoding.h')
4
- create_makefile 'unf_ext'