ruby_da 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.gitmodules +6 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +38 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/ext/ruby_da/RubyDaService.java +153 -0
- data/ext/ruby_da/extconf.rb +9 -0
- data/ext/ruby_da/ruby_da.cpp +171 -0
- data/ext/ruby_da/ruby_da.h +6 -0
- data/lib/ruby_da/version.rb +3 -0
- data/lib/ruby_da.rb +12 -0
- data/libda/README.md +1 -0
- data/libda/include/double_array.hpp +138 -0
- data/libda/include/double_array_internal.hpp +358 -0
- data/libda/include/utf8.hpp +117 -0
- data/libda/src/double_array.cc +114 -0
- data/libda/src/double_array_static.cc +228 -0
- data/libda/src/profile.hpp +38 -0
- data/ruby_da.gemspec +36 -0
- metadata +124 -0
@@ -0,0 +1,228 @@
|
|
1
|
+
#ifndef _DOUBLE_ARRAY_H_
|
2
|
+
#define _DOUBLE_ARRAY_H_
|
3
|
+
#include <vector>
|
4
|
+
#include <iostream>
|
5
|
+
#include <stack>
|
6
|
+
#include <string>
|
7
|
+
#include <utility>
|
8
|
+
#include <algorithm>
|
9
|
+
#include <stdexcept>
|
10
|
+
#include <cassert>
|
11
|
+
|
12
|
+
|
13
|
+
class DoubleArray {
|
14
|
+
typedef unsigned char uchar;
|
15
|
+
struct node {
|
16
|
+
int base;
|
17
|
+
int check;
|
18
|
+
};
|
19
|
+
|
20
|
+
struct StackNode {
|
21
|
+
size_t depth;
|
22
|
+
size_t low;
|
23
|
+
size_t up;
|
24
|
+
int base;
|
25
|
+
StackNode(int d, int l, int u, int b) :
|
26
|
+
depth(d), low(l), up(u), base(b) {}
|
27
|
+
};
|
28
|
+
|
29
|
+
void set_check(int pos, int base){
|
30
|
+
if(pos == empty_head){
|
31
|
+
empty_head = -array[pos].check;
|
32
|
+
array[pos].check = base;
|
33
|
+
}else{
|
34
|
+
size_t i;
|
35
|
+
for(i = empty_head; i < array.size(); i = -array[i].check){
|
36
|
+
if(pos == -array[i].check){
|
37
|
+
break;
|
38
|
+
}
|
39
|
+
}
|
40
|
+
array[i].check = array[pos].check;
|
41
|
+
array[pos].check = base;
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
void expand(size_t pos){
|
46
|
+
while(pos >= array.size()){
|
47
|
+
int i = array.size();
|
48
|
+
array.resize(array.size() * 2);
|
49
|
+
int n = array.size();
|
50
|
+
for(; i < n; i++){
|
51
|
+
array[i].check = -(i+1);
|
52
|
+
}
|
53
|
+
}
|
54
|
+
}
|
55
|
+
|
56
|
+
void build(std::vector<std::pair<std::string,int> > &words){
|
57
|
+
std::sort(words.rbegin(), words.rend());
|
58
|
+
std::stack<StackNode> st;
|
59
|
+
st.push(StackNode(0,0,words.size(),0));
|
60
|
+
array.resize(512);
|
61
|
+
array[0].check=0;
|
62
|
+
for(size_t i = 1; i < array.size(); i++)
|
63
|
+
array[i].check=-(i+1);
|
64
|
+
empty_head = 1;
|
65
|
+
|
66
|
+
while(!st.empty()){
|
67
|
+
StackNode stn = st.top(); st.pop();
|
68
|
+
|
69
|
+
if(stn.up - stn.low == 1 && words[stn.low].first.size() < stn.depth){
|
70
|
+
array[stn.base].base = -words[stn.low].second;
|
71
|
+
continue;
|
72
|
+
}
|
73
|
+
|
74
|
+
// extract traverses
|
75
|
+
std::vector<uchar> codes;
|
76
|
+
std::vector<std::pair<size_t, size_t> > ranges;
|
77
|
+
uchar c_before = 0;
|
78
|
+
if(words[stn.low].first.size() > stn.depth)
|
79
|
+
c_before = static_cast<uchar>((words[stn.low].first)[stn.depth]);
|
80
|
+
|
81
|
+
size_t low_before = stn.low;
|
82
|
+
for(size_t i = stn.low+1; i < stn.up; i++){
|
83
|
+
uchar c = 0;
|
84
|
+
if(stn.depth < words[i].first.size())
|
85
|
+
c = static_cast<uchar>((words[i].first)[stn.depth]);
|
86
|
+
|
87
|
+
if(c != c_before){
|
88
|
+
codes.push_back(c_before);
|
89
|
+
ranges.push_back(std::make_pair(low_before,i));
|
90
|
+
c_before = c;
|
91
|
+
low_before = i;
|
92
|
+
}
|
93
|
+
}
|
94
|
+
codes.push_back(c_before);
|
95
|
+
ranges.push_back(std::make_pair(low_before, stn.up));
|
96
|
+
|
97
|
+
// check a invalid input
|
98
|
+
if(codes.size() == 1 && codes[0] == 0 && stn.up - stn.low > 1)
|
99
|
+
throw std::logic_error("overlapped");
|
100
|
+
assert(codes.size() != 0);
|
101
|
+
|
102
|
+
// find a base cand
|
103
|
+
int fcode = *(codes.begin());
|
104
|
+
int base_cand = empty_head - fcode;
|
105
|
+
int empty_index = empty_head;
|
106
|
+
while(1){
|
107
|
+
expand(empty_index);
|
108
|
+
if(empty_index - fcode < 0){
|
109
|
+
empty_index = -array[empty_index].check;
|
110
|
+
continue;
|
111
|
+
}
|
112
|
+
bool found = true;
|
113
|
+
base_cand = empty_index - fcode;
|
114
|
+
for(size_t i = 1; i < codes.size(); i++){
|
115
|
+
expand(base_cand + codes[i]);
|
116
|
+
if(array[base_cand + codes[i]].check >= 0){
|
117
|
+
found = false;
|
118
|
+
break;
|
119
|
+
}
|
120
|
+
}
|
121
|
+
if(found) break;
|
122
|
+
empty_index = -array[empty_index].check;
|
123
|
+
}
|
124
|
+
|
125
|
+
// set a base and checks
|
126
|
+
array[stn.base].base = base_cand;
|
127
|
+
for(size_t i = 0; i < codes.size(); i++){
|
128
|
+
size_t pos = base_cand + codes[i];
|
129
|
+
set_check(pos,stn.base);
|
130
|
+
st.push(StackNode(stn.depth+1,ranges[i].first,
|
131
|
+
ranges[i].second,pos));
|
132
|
+
}
|
133
|
+
}
|
134
|
+
}
|
135
|
+
public:
|
136
|
+
|
137
|
+
void commonPrefixSearch(const char *str,
|
138
|
+
std::vector<int> &res_len,
|
139
|
+
std::vector<int> &res_id) const {
|
140
|
+
const char *p = str;
|
141
|
+
int state = 0;
|
142
|
+
while(1){
|
143
|
+
int t = array[state].base;
|
144
|
+
if(state != 0 && t < static_cast<int>(array.size()) &&
|
145
|
+
array[t].check == state){
|
146
|
+
res_len.push_back(std::distance(str,p));
|
147
|
+
res_id.push_back(-array[t].base);
|
148
|
+
}
|
149
|
+
uchar c = static_cast<uchar>(*p);
|
150
|
+
if(t+c < static_cast<int>(array.size()) && array[t+c].check == state){
|
151
|
+
if(c == 0){
|
152
|
+
return;
|
153
|
+
}
|
154
|
+
state = t+c;
|
155
|
+
++p;
|
156
|
+
}else{
|
157
|
+
return;
|
158
|
+
}
|
159
|
+
}
|
160
|
+
}
|
161
|
+
|
162
|
+
DoubleArray(std::vector<std::pair<std::string,int> > &v) {
|
163
|
+
build(v);
|
164
|
+
}
|
165
|
+
|
166
|
+
DoubleArray() : array(1) {}
|
167
|
+
|
168
|
+
bool save(std::ostream &os){
|
169
|
+
int i;
|
170
|
+
int n = array.size();
|
171
|
+
for(i = n - 1; i >= 0; i--){
|
172
|
+
if(array[i].check >= 0) break;
|
173
|
+
}
|
174
|
+
std::vector<int> v((i+1) * 2 + 1);
|
175
|
+
for(int j = 0; j <= i; j++){
|
176
|
+
v[j*2+1] = array[j].base;
|
177
|
+
v[j*2+2] = array[j].check;
|
178
|
+
}
|
179
|
+
v[0] = i;
|
180
|
+
os.write(reinterpret_cast<const char *>(&v[0]), v.size()*sizeof(int));
|
181
|
+
if(os.fail()) return false;
|
182
|
+
return true;
|
183
|
+
}
|
184
|
+
|
185
|
+
bool load(std::istream &is){
|
186
|
+
int i = 0;
|
187
|
+
is.read((char *)&i, sizeof(int));
|
188
|
+
if(is.fail()) return false;
|
189
|
+
array.resize(i);
|
190
|
+
std::vector<int> v((i+1) * 2);
|
191
|
+
is.read(reinterpret_cast<char *>(&v[0]), v.size()*sizeof(int));
|
192
|
+
if(is.fail() && !is.eof()) return false;
|
193
|
+
for(int j = 0; j <= i; j++){
|
194
|
+
array[j].base = v[j*2];
|
195
|
+
array[j].check = v[j*2+1];
|
196
|
+
}
|
197
|
+
return true;
|
198
|
+
}
|
199
|
+
|
200
|
+
private:
|
201
|
+
std::vector<node> array;
|
202
|
+
int empty_head;
|
203
|
+
};
|
204
|
+
|
205
|
+
#include <fstream>
|
206
|
+
using namespace std;
|
207
|
+
int
|
208
|
+
main(int argc, char *argv[]){
|
209
|
+
string line;
|
210
|
+
vector<pair<string,int> > lines;
|
211
|
+
int n = 1;
|
212
|
+
while(getline(cin,line)){
|
213
|
+
lines.push_back(make_pair(line,n++));
|
214
|
+
}
|
215
|
+
ofstream ofs("test.dat");
|
216
|
+
DoubleArray da(lines);
|
217
|
+
vector<int> v1;
|
218
|
+
vector<int> v2;
|
219
|
+
if(argc < 1 || argv[1] == NULL){
|
220
|
+
da.commonPrefixSearch("bisons",v1,v2);
|
221
|
+
for(size_t i = 0; i < v1.size(); i++)
|
222
|
+
cout << v1[i] << " " << v2[i] << endl;
|
223
|
+
}else{
|
224
|
+
da.commonPrefixSearch(argv[1],v1,v2);
|
225
|
+
}
|
226
|
+
return 0;
|
227
|
+
}
|
228
|
+
#endif /* _DOUBLE_ARRAY_H_ */
|
@@ -0,0 +1,38 @@
|
|
1
|
+
/*
|
2
|
+
* WRITTEN BY Masahiko Higashiyama in 2010.
|
3
|
+
*
|
4
|
+
* THIS CODE IS IN PUBLIC DOMAIN.
|
5
|
+
* THIS SOFTWARE IS COMPLETELY FREE TO COPY, MODIFY AND/OR RE-DISTRIBUTE.
|
6
|
+
*
|
7
|
+
* THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
8
|
+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
9
|
+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
10
|
+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
11
|
+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
12
|
+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
13
|
+
* SOFTWARE.
|
14
|
+
*/
|
15
|
+
#ifndef _PROFILE_H_
|
16
|
+
#define _PROFILE_H_
|
17
|
+
|
18
|
+
#include <sys/resource.h>
|
19
|
+
#include <limits.h>
|
20
|
+
#include <stdlib.h>
|
21
|
+
#include <errno.h>
|
22
|
+
#include <cstdio>
|
23
|
+
|
24
|
+
double GetusageSec()
|
25
|
+
{
|
26
|
+
struct rusage t;
|
27
|
+
struct timeval tv;
|
28
|
+
getrusage(RUSAGE_SELF, &t);
|
29
|
+
tv = t.ru_utime;
|
30
|
+
return tv.tv_sec + (double)tv.tv_usec*1e-6;
|
31
|
+
}
|
32
|
+
|
33
|
+
void PrintTime(double before, double after)
|
34
|
+
{
|
35
|
+
printf(" time = %10.30f\n", after-before);
|
36
|
+
}
|
37
|
+
|
38
|
+
#endif /* _PROFILE_H_ */
|
data/ruby_da.gemspec
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'ruby_da/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "ruby_da"
|
8
|
+
spec.version = RubyDa::VERSION
|
9
|
+
spec.authors = ["Masahiko Higashiyama"]
|
10
|
+
spec.email = ["masahiko.higashiyama@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Double Array Implementation}
|
13
|
+
spec.description = %q{Double Array Implementation}
|
14
|
+
spec.homepage = "https://github.com/shnya/ruby_da"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
spec.bindir = "exe"
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
if RUBY_PLATFORM =~ /java/
|
22
|
+
spec.platform = "java"
|
23
|
+
spec.files << "lib/java-da.jar"
|
24
|
+
spec.files << "lib/ruby_da.jar"
|
25
|
+
spec.files.concat(`find ./javada -print0`.split("\x0").reject{ |f| f.match(%r{(.git|.idea|test|target)}) })
|
26
|
+
else
|
27
|
+
spec.platform = Gem::Platform::RUBY
|
28
|
+
spec.extensions = ["ext/ruby_da/extconf.rb"]
|
29
|
+
spec.files.concat(`find ./libda -print0 -name "*.hpp"`.split("\x0").reject{ |f| f.match(%r{(.git|test)}) })
|
30
|
+
end
|
31
|
+
|
32
|
+
spec.add_development_dependency "bundler", "~> 1.9"
|
33
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
34
|
+
spec.add_development_dependency "rake-compiler"
|
35
|
+
spec.add_development_dependency "minitest"
|
36
|
+
end
|
metadata
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ruby_da
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.3
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Masahiko Higashiyama
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-05-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.9'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.9'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rake-compiler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: Double Array Implementation
|
70
|
+
email:
|
71
|
+
- masahiko.higashiyama@gmail.com
|
72
|
+
executables: []
|
73
|
+
extensions:
|
74
|
+
- ext/ruby_da/extconf.rb
|
75
|
+
extra_rdoc_files: []
|
76
|
+
files:
|
77
|
+
- "./libda/README.md"
|
78
|
+
- "./libda/include/double_array.hpp"
|
79
|
+
- "./libda/include/double_array_internal.hpp"
|
80
|
+
- "./libda/include/utf8.hpp"
|
81
|
+
- "./libda/src/double_array.cc"
|
82
|
+
- "./libda/src/double_array_static.cc"
|
83
|
+
- "./libda/src/profile.hpp"
|
84
|
+
- ".gitignore"
|
85
|
+
- ".gitmodules"
|
86
|
+
- CODE_OF_CONDUCT.md
|
87
|
+
- Gemfile
|
88
|
+
- LICENSE.txt
|
89
|
+
- README.md
|
90
|
+
- Rakefile
|
91
|
+
- bin/console
|
92
|
+
- bin/setup
|
93
|
+
- ext/ruby_da/RubyDaService.java
|
94
|
+
- ext/ruby_da/extconf.rb
|
95
|
+
- ext/ruby_da/ruby_da.cpp
|
96
|
+
- ext/ruby_da/ruby_da.h
|
97
|
+
- lib/ruby_da.rb
|
98
|
+
- lib/ruby_da/version.rb
|
99
|
+
- ruby_da.gemspec
|
100
|
+
homepage: https://github.com/shnya/ruby_da
|
101
|
+
licenses:
|
102
|
+
- MIT
|
103
|
+
metadata: {}
|
104
|
+
post_install_message:
|
105
|
+
rdoc_options: []
|
106
|
+
require_paths:
|
107
|
+
- lib
|
108
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
109
|
+
requirements:
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: '0'
|
113
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
requirements: []
|
119
|
+
rubyforge_project:
|
120
|
+
rubygems_version: 2.2.2
|
121
|
+
signing_key:
|
122
|
+
specification_version: 4
|
123
|
+
summary: Double Array Implementation
|
124
|
+
test_files: []
|