ruby_da 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,228 @@
1
+ #ifndef _DOUBLE_ARRAY_H_
2
+ #define _DOUBLE_ARRAY_H_
3
+ #include <vector>
4
+ #include <iostream>
5
+ #include <stack>
6
+ #include <string>
7
+ #include <utility>
8
+ #include <algorithm>
9
+ #include <stdexcept>
10
+ #include <cassert>
11
+
12
+
13
+ class DoubleArray {
14
+ typedef unsigned char uchar;
15
+ struct node {
16
+ int base;
17
+ int check;
18
+ };
19
+
20
+ struct StackNode {
21
+ size_t depth;
22
+ size_t low;
23
+ size_t up;
24
+ int base;
25
+ StackNode(int d, int l, int u, int b) :
26
+ depth(d), low(l), up(u), base(b) {}
27
+ };
28
+
29
+ void set_check(int pos, int base){
30
+ if(pos == empty_head){
31
+ empty_head = -array[pos].check;
32
+ array[pos].check = base;
33
+ }else{
34
+ size_t i;
35
+ for(i = empty_head; i < array.size(); i = -array[i].check){
36
+ if(pos == -array[i].check){
37
+ break;
38
+ }
39
+ }
40
+ array[i].check = array[pos].check;
41
+ array[pos].check = base;
42
+ }
43
+ }
44
+
45
+ void expand(size_t pos){
46
+ while(pos >= array.size()){
47
+ int i = array.size();
48
+ array.resize(array.size() * 2);
49
+ int n = array.size();
50
+ for(; i < n; i++){
51
+ array[i].check = -(i+1);
52
+ }
53
+ }
54
+ }
55
+
56
+ void build(std::vector<std::pair<std::string,int> > &words){
57
+ std::sort(words.rbegin(), words.rend());
58
+ std::stack<StackNode> st;
59
+ st.push(StackNode(0,0,words.size(),0));
60
+ array.resize(512);
61
+ array[0].check=0;
62
+ for(size_t i = 1; i < array.size(); i++)
63
+ array[i].check=-(i+1);
64
+ empty_head = 1;
65
+
66
+ while(!st.empty()){
67
+ StackNode stn = st.top(); st.pop();
68
+
69
+ if(stn.up - stn.low == 1 && words[stn.low].first.size() < stn.depth){
70
+ array[stn.base].base = -words[stn.low].second;
71
+ continue;
72
+ }
73
+
74
+ // extract traverses
75
+ std::vector<uchar> codes;
76
+ std::vector<std::pair<size_t, size_t> > ranges;
77
+ uchar c_before = 0;
78
+ if(words[stn.low].first.size() > stn.depth)
79
+ c_before = static_cast<uchar>((words[stn.low].first)[stn.depth]);
80
+
81
+ size_t low_before = stn.low;
82
+ for(size_t i = stn.low+1; i < stn.up; i++){
83
+ uchar c = 0;
84
+ if(stn.depth < words[i].first.size())
85
+ c = static_cast<uchar>((words[i].first)[stn.depth]);
86
+
87
+ if(c != c_before){
88
+ codes.push_back(c_before);
89
+ ranges.push_back(std::make_pair(low_before,i));
90
+ c_before = c;
91
+ low_before = i;
92
+ }
93
+ }
94
+ codes.push_back(c_before);
95
+ ranges.push_back(std::make_pair(low_before, stn.up));
96
+
97
+ // check a invalid input
98
+ if(codes.size() == 1 && codes[0] == 0 && stn.up - stn.low > 1)
99
+ throw std::logic_error("overlapped");
100
+ assert(codes.size() != 0);
101
+
102
+ // find a base cand
103
+ int fcode = *(codes.begin());
104
+ int base_cand = empty_head - fcode;
105
+ int empty_index = empty_head;
106
+ while(1){
107
+ expand(empty_index);
108
+ if(empty_index - fcode < 0){
109
+ empty_index = -array[empty_index].check;
110
+ continue;
111
+ }
112
+ bool found = true;
113
+ base_cand = empty_index - fcode;
114
+ for(size_t i = 1; i < codes.size(); i++){
115
+ expand(base_cand + codes[i]);
116
+ if(array[base_cand + codes[i]].check >= 0){
117
+ found = false;
118
+ break;
119
+ }
120
+ }
121
+ if(found) break;
122
+ empty_index = -array[empty_index].check;
123
+ }
124
+
125
+ // set a base and checks
126
+ array[stn.base].base = base_cand;
127
+ for(size_t i = 0; i < codes.size(); i++){
128
+ size_t pos = base_cand + codes[i];
129
+ set_check(pos,stn.base);
130
+ st.push(StackNode(stn.depth+1,ranges[i].first,
131
+ ranges[i].second,pos));
132
+ }
133
+ }
134
+ }
135
+ public:
136
+
137
+ void commonPrefixSearch(const char *str,
138
+ std::vector<int> &res_len,
139
+ std::vector<int> &res_id) const {
140
+ const char *p = str;
141
+ int state = 0;
142
+ while(1){
143
+ int t = array[state].base;
144
+ if(state != 0 && t < static_cast<int>(array.size()) &&
145
+ array[t].check == state){
146
+ res_len.push_back(std::distance(str,p));
147
+ res_id.push_back(-array[t].base);
148
+ }
149
+ uchar c = static_cast<uchar>(*p);
150
+ if(t+c < static_cast<int>(array.size()) && array[t+c].check == state){
151
+ if(c == 0){
152
+ return;
153
+ }
154
+ state = t+c;
155
+ ++p;
156
+ }else{
157
+ return;
158
+ }
159
+ }
160
+ }
161
+
162
+ DoubleArray(std::vector<std::pair<std::string,int> > &v) {
163
+ build(v);
164
+ }
165
+
166
+ DoubleArray() : array(1) {}
167
+
168
+ bool save(std::ostream &os){
169
+ int i;
170
+ int n = array.size();
171
+ for(i = n - 1; i >= 0; i--){
172
+ if(array[i].check >= 0) break;
173
+ }
174
+ std::vector<int> v((i+1) * 2 + 1);
175
+ for(int j = 0; j <= i; j++){
176
+ v[j*2+1] = array[j].base;
177
+ v[j*2+2] = array[j].check;
178
+ }
179
+ v[0] = i;
180
+ os.write(reinterpret_cast<const char *>(&v[0]), v.size()*sizeof(int));
181
+ if(os.fail()) return false;
182
+ return true;
183
+ }
184
+
185
+ bool load(std::istream &is){
186
+ int i = 0;
187
+ is.read((char *)&i, sizeof(int));
188
+ if(is.fail()) return false;
189
+ array.resize(i);
190
+ std::vector<int> v((i+1) * 2);
191
+ is.read(reinterpret_cast<char *>(&v[0]), v.size()*sizeof(int));
192
+ if(is.fail() && !is.eof()) return false;
193
+ for(int j = 0; j <= i; j++){
194
+ array[j].base = v[j*2];
195
+ array[j].check = v[j*2+1];
196
+ }
197
+ return true;
198
+ }
199
+
200
+ private:
201
+ std::vector<node> array;
202
+ int empty_head;
203
+ };
204
+
205
+ #include <fstream>
206
+ using namespace std;
207
+ int
208
+ main(int argc, char *argv[]){
209
+ string line;
210
+ vector<pair<string,int> > lines;
211
+ int n = 1;
212
+ while(getline(cin,line)){
213
+ lines.push_back(make_pair(line,n++));
214
+ }
215
+ ofstream ofs("test.dat");
216
+ DoubleArray da(lines);
217
+ vector<int> v1;
218
+ vector<int> v2;
219
+ if(argc < 1 || argv[1] == NULL){
220
+ da.commonPrefixSearch("bisons",v1,v2);
221
+ for(size_t i = 0; i < v1.size(); i++)
222
+ cout << v1[i] << " " << v2[i] << endl;
223
+ }else{
224
+ da.commonPrefixSearch(argv[1],v1,v2);
225
+ }
226
+ return 0;
227
+ }
228
+ #endif /* _DOUBLE_ARRAY_H_ */
@@ -0,0 +1,38 @@
1
+ /*
2
+ * WRITTEN BY Masahiko Higashiyama in 2010.
3
+ *
4
+ * THIS CODE IS IN PUBLIC DOMAIN.
5
+ * THIS SOFTWARE IS COMPLETELY FREE TO COPY, MODIFY AND/OR RE-DISTRIBUTE.
6
+ *
7
+ * THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
8
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
9
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
10
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
11
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
12
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
13
+ * SOFTWARE.
14
+ */
15
+ #ifndef _PROFILE_H_
16
+ #define _PROFILE_H_
17
+
18
+ #include <sys/resource.h>
19
+ #include <limits.h>
20
+ #include <stdlib.h>
21
+ #include <errno.h>
22
+ #include <cstdio>
23
+
24
+ double GetusageSec()
25
+ {
26
+ struct rusage t;
27
+ struct timeval tv;
28
+ getrusage(RUSAGE_SELF, &t);
29
+ tv = t.ru_utime;
30
+ return tv.tv_sec + (double)tv.tv_usec*1e-6;
31
+ }
32
+
33
+ void PrintTime(double before, double after)
34
+ {
35
+ printf(" time = %10.30f\n", after-before);
36
+ }
37
+
38
+ #endif /* _PROFILE_H_ */
data/ruby_da.gemspec ADDED
@@ -0,0 +1,36 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'ruby_da/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "ruby_da"
8
+ spec.version = RubyDa::VERSION
9
+ spec.authors = ["Masahiko Higashiyama"]
10
+ spec.email = ["masahiko.higashiyama@gmail.com"]
11
+
12
+ spec.summary = %q{Double Array Implementation}
13
+ spec.description = %q{Double Array Implementation}
14
+ spec.homepage = "https://github.com/shnya/ruby_da"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+ if RUBY_PLATFORM =~ /java/
22
+ spec.platform = "java"
23
+ spec.files << "lib/java-da.jar"
24
+ spec.files << "lib/ruby_da.jar"
25
+ spec.files.concat(`find ./javada -print0`.split("\x0").reject{ |f| f.match(%r{(.git|.idea|test|target)}) })
26
+ else
27
+ spec.platform = Gem::Platform::RUBY
28
+ spec.extensions = ["ext/ruby_da/extconf.rb"]
29
+ spec.files.concat(`find ./libda -print0 -name "*.hpp"`.split("\x0").reject{ |f| f.match(%r{(.git|test)}) })
30
+ end
31
+
32
+ spec.add_development_dependency "bundler", "~> 1.9"
33
+ spec.add_development_dependency "rake", "~> 10.0"
34
+ spec.add_development_dependency "rake-compiler"
35
+ spec.add_development_dependency "minitest"
36
+ end
metadata ADDED
@@ -0,0 +1,124 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby_da
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - Masahiko Higashiyama
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-05-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.9'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake-compiler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Double Array Implementation
70
+ email:
71
+ - masahiko.higashiyama@gmail.com
72
+ executables: []
73
+ extensions:
74
+ - ext/ruby_da/extconf.rb
75
+ extra_rdoc_files: []
76
+ files:
77
+ - "./libda/README.md"
78
+ - "./libda/include/double_array.hpp"
79
+ - "./libda/include/double_array_internal.hpp"
80
+ - "./libda/include/utf8.hpp"
81
+ - "./libda/src/double_array.cc"
82
+ - "./libda/src/double_array_static.cc"
83
+ - "./libda/src/profile.hpp"
84
+ - ".gitignore"
85
+ - ".gitmodules"
86
+ - CODE_OF_CONDUCT.md
87
+ - Gemfile
88
+ - LICENSE.txt
89
+ - README.md
90
+ - Rakefile
91
+ - bin/console
92
+ - bin/setup
93
+ - ext/ruby_da/RubyDaService.java
94
+ - ext/ruby_da/extconf.rb
95
+ - ext/ruby_da/ruby_da.cpp
96
+ - ext/ruby_da/ruby_da.h
97
+ - lib/ruby_da.rb
98
+ - lib/ruby_da/version.rb
99
+ - ruby_da.gemspec
100
+ homepage: https://github.com/shnya/ruby_da
101
+ licenses:
102
+ - MIT
103
+ metadata: {}
104
+ post_install_message:
105
+ rdoc_options: []
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ required_rubygems_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ requirements: []
119
+ rubyforge_project:
120
+ rubygems_version: 2.2.2
121
+ signing_key:
122
+ specification_version: 4
123
+ summary: Double Array Implementation
124
+ test_files: []