ruby_da 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,228 @@
1
+ #ifndef _DOUBLE_ARRAY_H_
2
+ #define _DOUBLE_ARRAY_H_
3
+ #include <vector>
4
+ #include <iostream>
5
+ #include <stack>
6
+ #include <string>
7
+ #include <utility>
8
+ #include <algorithm>
9
+ #include <stdexcept>
10
+ #include <cassert>
11
+
12
+
13
+ class DoubleArray {
14
+ typedef unsigned char uchar;
15
+ struct node {
16
+ int base;
17
+ int check;
18
+ };
19
+
20
+ struct StackNode {
21
+ size_t depth;
22
+ size_t low;
23
+ size_t up;
24
+ int base;
25
+ StackNode(int d, int l, int u, int b) :
26
+ depth(d), low(l), up(u), base(b) {}
27
+ };
28
+
29
+ void set_check(int pos, int base){
30
+ if(pos == empty_head){
31
+ empty_head = -array[pos].check;
32
+ array[pos].check = base;
33
+ }else{
34
+ size_t i;
35
+ for(i = empty_head; i < array.size(); i = -array[i].check){
36
+ if(pos == -array[i].check){
37
+ break;
38
+ }
39
+ }
40
+ array[i].check = array[pos].check;
41
+ array[pos].check = base;
42
+ }
43
+ }
44
+
45
+ void expand(size_t pos){
46
+ while(pos >= array.size()){
47
+ int i = array.size();
48
+ array.resize(array.size() * 2);
49
+ int n = array.size();
50
+ for(; i < n; i++){
51
+ array[i].check = -(i+1);
52
+ }
53
+ }
54
+ }
55
+
56
+ void build(std::vector<std::pair<std::string,int> > &words){
57
+ std::sort(words.rbegin(), words.rend());
58
+ std::stack<StackNode> st;
59
+ st.push(StackNode(0,0,words.size(),0));
60
+ array.resize(512);
61
+ array[0].check=0;
62
+ for(size_t i = 1; i < array.size(); i++)
63
+ array[i].check=-(i+1);
64
+ empty_head = 1;
65
+
66
+ while(!st.empty()){
67
+ StackNode stn = st.top(); st.pop();
68
+
69
+ if(stn.up - stn.low == 1 && words[stn.low].first.size() < stn.depth){
70
+ array[stn.base].base = -words[stn.low].second;
71
+ continue;
72
+ }
73
+
74
+ // extract traverses
75
+ std::vector<uchar> codes;
76
+ std::vector<std::pair<size_t, size_t> > ranges;
77
+ uchar c_before = 0;
78
+ if(words[stn.low].first.size() > stn.depth)
79
+ c_before = static_cast<uchar>((words[stn.low].first)[stn.depth]);
80
+
81
+ size_t low_before = stn.low;
82
+ for(size_t i = stn.low+1; i < stn.up; i++){
83
+ uchar c = 0;
84
+ if(stn.depth < words[i].first.size())
85
+ c = static_cast<uchar>((words[i].first)[stn.depth]);
86
+
87
+ if(c != c_before){
88
+ codes.push_back(c_before);
89
+ ranges.push_back(std::make_pair(low_before,i));
90
+ c_before = c;
91
+ low_before = i;
92
+ }
93
+ }
94
+ codes.push_back(c_before);
95
+ ranges.push_back(std::make_pair(low_before, stn.up));
96
+
97
+ // check a invalid input
98
+ if(codes.size() == 1 && codes[0] == 0 && stn.up - stn.low > 1)
99
+ throw std::logic_error("overlapped");
100
+ assert(codes.size() != 0);
101
+
102
+ // find a base cand
103
+ int fcode = *(codes.begin());
104
+ int base_cand = empty_head - fcode;
105
+ int empty_index = empty_head;
106
+ while(1){
107
+ expand(empty_index);
108
+ if(empty_index - fcode < 0){
109
+ empty_index = -array[empty_index].check;
110
+ continue;
111
+ }
112
+ bool found = true;
113
+ base_cand = empty_index - fcode;
114
+ for(size_t i = 1; i < codes.size(); i++){
115
+ expand(base_cand + codes[i]);
116
+ if(array[base_cand + codes[i]].check >= 0){
117
+ found = false;
118
+ break;
119
+ }
120
+ }
121
+ if(found) break;
122
+ empty_index = -array[empty_index].check;
123
+ }
124
+
125
+ // set a base and checks
126
+ array[stn.base].base = base_cand;
127
+ for(size_t i = 0; i < codes.size(); i++){
128
+ size_t pos = base_cand + codes[i];
129
+ set_check(pos,stn.base);
130
+ st.push(StackNode(stn.depth+1,ranges[i].first,
131
+ ranges[i].second,pos));
132
+ }
133
+ }
134
+ }
135
+ public:
136
+
137
+ void commonPrefixSearch(const char *str,
138
+ std::vector<int> &res_len,
139
+ std::vector<int> &res_id) const {
140
+ const char *p = str;
141
+ int state = 0;
142
+ while(1){
143
+ int t = array[state].base;
144
+ if(state != 0 && t < static_cast<int>(array.size()) &&
145
+ array[t].check == state){
146
+ res_len.push_back(std::distance(str,p));
147
+ res_id.push_back(-array[t].base);
148
+ }
149
+ uchar c = static_cast<uchar>(*p);
150
+ if(t+c < static_cast<int>(array.size()) && array[t+c].check == state){
151
+ if(c == 0){
152
+ return;
153
+ }
154
+ state = t+c;
155
+ ++p;
156
+ }else{
157
+ return;
158
+ }
159
+ }
160
+ }
161
+
162
+ DoubleArray(std::vector<std::pair<std::string,int> > &v) {
163
+ build(v);
164
+ }
165
+
166
+ DoubleArray() : array(1) {}
167
+
168
+ bool save(std::ostream &os){
169
+ int i;
170
+ int n = array.size();
171
+ for(i = n - 1; i >= 0; i--){
172
+ if(array[i].check >= 0) break;
173
+ }
174
+ std::vector<int> v((i+1) * 2 + 1);
175
+ for(int j = 0; j <= i; j++){
176
+ v[j*2+1] = array[j].base;
177
+ v[j*2+2] = array[j].check;
178
+ }
179
+ v[0] = i;
180
+ os.write(reinterpret_cast<const char *>(&v[0]), v.size()*sizeof(int));
181
+ if(os.fail()) return false;
182
+ return true;
183
+ }
184
+
185
+ bool load(std::istream &is){
186
+ int i = 0;
187
+ is.read((char *)&i, sizeof(int));
188
+ if(is.fail()) return false;
189
+ array.resize(i);
190
+ std::vector<int> v((i+1) * 2);
191
+ is.read(reinterpret_cast<char *>(&v[0]), v.size()*sizeof(int));
192
+ if(is.fail() && !is.eof()) return false;
193
+ for(int j = 0; j <= i; j++){
194
+ array[j].base = v[j*2];
195
+ array[j].check = v[j*2+1];
196
+ }
197
+ return true;
198
+ }
199
+
200
+ private:
201
+ std::vector<node> array;
202
+ int empty_head;
203
+ };
204
+
205
+ #include <fstream>
206
+ using namespace std;
207
+ int
208
+ main(int argc, char *argv[]){
209
+ string line;
210
+ vector<pair<string,int> > lines;
211
+ int n = 1;
212
+ while(getline(cin,line)){
213
+ lines.push_back(make_pair(line,n++));
214
+ }
215
+ ofstream ofs("test.dat");
216
+ DoubleArray da(lines);
217
+ vector<int> v1;
218
+ vector<int> v2;
219
+ if(argc < 1 || argv[1] == NULL){
220
+ da.commonPrefixSearch("bisons",v1,v2);
221
+ for(size_t i = 0; i < v1.size(); i++)
222
+ cout << v1[i] << " " << v2[i] << endl;
223
+ }else{
224
+ da.commonPrefixSearch(argv[1],v1,v2);
225
+ }
226
+ return 0;
227
+ }
228
+ #endif /* _DOUBLE_ARRAY_H_ */
@@ -0,0 +1,38 @@
1
+ /*
2
+ * WRITTEN BY Masahiko Higashiyama in 2010.
3
+ *
4
+ * THIS CODE IS IN PUBLIC DOMAIN.
5
+ * THIS SOFTWARE IS COMPLETELY FREE TO COPY, MODIFY AND/OR RE-DISTRIBUTE.
6
+ *
7
+ * THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
8
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
9
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
10
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
11
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
12
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
13
+ * SOFTWARE.
14
+ */
15
+ #ifndef _PROFILE_H_
16
+ #define _PROFILE_H_
17
+
18
+ #include <sys/resource.h>
19
+ #include <limits.h>
20
+ #include <stdlib.h>
21
+ #include <errno.h>
22
+ #include <cstdio>
23
+
24
+ double GetusageSec()
25
+ {
26
+ struct rusage t;
27
+ struct timeval tv;
28
+ getrusage(RUSAGE_SELF, &t);
29
+ tv = t.ru_utime;
30
+ return tv.tv_sec + (double)tv.tv_usec*1e-6;
31
+ }
32
+
33
+ void PrintTime(double before, double after)
34
+ {
35
+ printf(" time = %10.30f\n", after-before);
36
+ }
37
+
38
+ #endif /* _PROFILE_H_ */
data/ruby_da.gemspec ADDED
@@ -0,0 +1,36 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'ruby_da/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "ruby_da"
8
+ spec.version = RubyDa::VERSION
9
+ spec.authors = ["Masahiko Higashiyama"]
10
+ spec.email = ["masahiko.higashiyama@gmail.com"]
11
+
12
+ spec.summary = %q{Double Array Implementation}
13
+ spec.description = %q{Double Array Implementation}
14
+ spec.homepage = "https://github.com/shnya/ruby_da"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+ if RUBY_PLATFORM =~ /java/
22
+ spec.platform = "java"
23
+ spec.files << "lib/java-da.jar"
24
+ spec.files << "lib/ruby_da.jar"
25
+ spec.files.concat(`find ./javada -print0`.split("\x0").reject{ |f| f.match(%r{(.git|.idea|test|target)}) })
26
+ else
27
+ spec.platform = Gem::Platform::RUBY
28
+ spec.extensions = ["ext/ruby_da/extconf.rb"]
29
+ spec.files.concat(`find ./libda -print0 -name "*.hpp"`.split("\x0").reject{ |f| f.match(%r{(.git|test)}) })
30
+ end
31
+
32
+ spec.add_development_dependency "bundler", "~> 1.9"
33
+ spec.add_development_dependency "rake", "~> 10.0"
34
+ spec.add_development_dependency "rake-compiler"
35
+ spec.add_development_dependency "minitest"
36
+ end
metadata ADDED
@@ -0,0 +1,124 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby_da
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - Masahiko Higashiyama
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-05-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.9'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake-compiler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Double Array Implementation
70
+ email:
71
+ - masahiko.higashiyama@gmail.com
72
+ executables: []
73
+ extensions:
74
+ - ext/ruby_da/extconf.rb
75
+ extra_rdoc_files: []
76
+ files:
77
+ - "./libda/README.md"
78
+ - "./libda/include/double_array.hpp"
79
+ - "./libda/include/double_array_internal.hpp"
80
+ - "./libda/include/utf8.hpp"
81
+ - "./libda/src/double_array.cc"
82
+ - "./libda/src/double_array_static.cc"
83
+ - "./libda/src/profile.hpp"
84
+ - ".gitignore"
85
+ - ".gitmodules"
86
+ - CODE_OF_CONDUCT.md
87
+ - Gemfile
88
+ - LICENSE.txt
89
+ - README.md
90
+ - Rakefile
91
+ - bin/console
92
+ - bin/setup
93
+ - ext/ruby_da/RubyDaService.java
94
+ - ext/ruby_da/extconf.rb
95
+ - ext/ruby_da/ruby_da.cpp
96
+ - ext/ruby_da/ruby_da.h
97
+ - lib/ruby_da.rb
98
+ - lib/ruby_da/version.rb
99
+ - ruby_da.gemspec
100
+ homepage: https://github.com/shnya/ruby_da
101
+ licenses:
102
+ - MIT
103
+ metadata: {}
104
+ post_install_message:
105
+ rdoc_options: []
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ required_rubygems_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ requirements: []
119
+ rubyforge_project:
120
+ rubygems_version: 2.2.2
121
+ signing_key:
122
+ specification_version: 4
123
+ summary: Double Array Implementation
124
+ test_files: []