mem_db 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/main.yml +18 -0
- data/.gitignore +12 -0
- data/.rspec +3 -0
- data/.rubocop.yml +43 -0
- data/Gemfile +11 -0
- data/LICENSE.txt +21 -0
- data/README.md +105 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/lib/mem_db.rb +102 -0
- data/lib/mem_db/bucket.rb +17 -0
- data/lib/mem_db/field.rb +34 -0
- data/lib/mem_db/field/enum.rb +56 -0
- data/lib/mem_db/field/matching.rb +11 -0
- data/lib/mem_db/field/may_missing.rb +44 -0
- data/lib/mem_db/field/negative.rb +40 -0
- data/lib/mem_db/field/pattern.rb +125 -0
- data/lib/mem_db/field/regexp.rb +68 -0
- data/lib/mem_db/fields.rb +25 -0
- data/lib/mem_db/idx.rb +41 -0
- data/lib/mem_db/idx/bytes.rb +25 -0
- data/lib/mem_db/idx/chars.rb +85 -0
- data/lib/mem_db/idx/default.rb +33 -0
- data/lib/mem_db/idx/itself.rb +25 -0
- data/lib/mem_db/idx/pattern.rb +71 -0
- data/lib/mem_db/idx/reverse.rb +27 -0
- data/lib/mem_db/idx/uniq.rb +36 -0
- data/lib/mem_db/index.rb +32 -0
- data/lib/mem_db/index/any.rb +60 -0
- data/lib/mem_db/index/bucket.rb +20 -0
- data/lib/mem_db/index/enum.rb +54 -0
- data/lib/mem_db/index/pattern_match.rb +104 -0
- data/lib/mem_db/index/prefix_tree.rb +110 -0
- data/lib/mem_db/index/sequence_match.rb +146 -0
- data/lib/mem_db/indexation.rb +17 -0
- data/lib/mem_db/indexing_object.rb +58 -0
- data/lib/mem_db/out.rb +25 -0
- data/lib/mem_db/query.rb +49 -0
- data/lib/mem_db/version.rb +5 -0
- data/mem_db.gemspec +25 -0
- metadata +86 -0
@@ -0,0 +1,110 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "mem_db/out"
|
4
|
+
require "mem_db/index"
|
5
|
+
require "mem_db/index/bucket"
|
6
|
+
require "mem_db/bucket"
|
7
|
+
|
8
|
+
class MemDB
|
9
|
+
module Index
|
10
|
+
class PrefixTree
|
11
|
+
include MemDB::Index
|
12
|
+
|
13
|
+
class Bucket
|
14
|
+
include MemDB::Index::Bucket
|
15
|
+
|
16
|
+
def initialize(idx:, bucket: MemDB::Bucket)
|
17
|
+
@idx = idx
|
18
|
+
@bucket = bucket
|
19
|
+
end
|
20
|
+
|
21
|
+
def new
|
22
|
+
MemDB::Index::PrefixTree.new(idx: @idx, bucket: @bucket)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Root
|
27
|
+
MAX_LENGTH_DEFAULT = 2 ^ 64
|
28
|
+
|
29
|
+
def initialize(bucket:)
|
30
|
+
@item = Item.new(bucket: bucket)
|
31
|
+
@min_length = MAX_LENGTH_DEFAULT
|
32
|
+
end
|
33
|
+
|
34
|
+
def get(contents, query:, result:)
|
35
|
+
contents.each do |content|
|
36
|
+
next if @min_length > content.length
|
37
|
+
|
38
|
+
@item.select_values(content, 0, query: query, out: result)
|
39
|
+
end
|
40
|
+
|
41
|
+
result
|
42
|
+
end
|
43
|
+
|
44
|
+
def add(prefixes, obj, value)
|
45
|
+
prefixes.each do |prefix|
|
46
|
+
@min_length = prefix.length if @min_length > prefix.length
|
47
|
+
@item.add(prefix, 0, obj, value)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
class Item
|
53
|
+
attr_reader :value
|
54
|
+
|
55
|
+
def initialize(bucket:)
|
56
|
+
@bucket = bucket
|
57
|
+
end
|
58
|
+
|
59
|
+
def select_values(content, i, query:, out:)
|
60
|
+
@value&.query(query, out: out)
|
61
|
+
|
62
|
+
return if content.length == i
|
63
|
+
|
64
|
+
return unless @children
|
65
|
+
|
66
|
+
if (item = @children[content[i]])
|
67
|
+
item.select_values(content, i + 1, query: query, out: out)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def add(prefix, i, obj, value)
|
72
|
+
if prefix.length == i
|
73
|
+
set_value(obj, value)
|
74
|
+
else
|
75
|
+
item = fetch_children(prefix[i])
|
76
|
+
item.add(prefix, i + 1, obj, value)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def set_value(obj, value)
|
81
|
+
@value ||= @bucket.new
|
82
|
+
@value.add(obj, value)
|
83
|
+
end
|
84
|
+
|
85
|
+
def fetch_children(idx)
|
86
|
+
@children ||= {}
|
87
|
+
@children[idx] ||= Item.new(bucket: @bucket)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
attr_reader :idx, :bucket
|
92
|
+
|
93
|
+
def initialize(idx:, bucket: MemDB::Bucket)
|
94
|
+
@idx = idx
|
95
|
+
@bucket = bucket
|
96
|
+
@root = Root.new(bucket: bucket)
|
97
|
+
end
|
98
|
+
|
99
|
+
def add(obj, value)
|
100
|
+
@root.add(obj.idx_value(@idx), obj, value)
|
101
|
+
end
|
102
|
+
|
103
|
+
def query(query, out: MemDB::Out.new)
|
104
|
+
@root.get(query.idx_value(@idx), query: query, result: out)
|
105
|
+
|
106
|
+
out
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "mem_db/index"
|
4
|
+
require "mem_db/index/bucket"
|
5
|
+
require "mem_db/out"
|
6
|
+
|
7
|
+
class MemDB
|
8
|
+
module Index
|
9
|
+
class SequenceMatch
|
10
|
+
include MemDB::Index
|
11
|
+
|
12
|
+
class Bucket
|
13
|
+
include MemDB::Index::Bucket
|
14
|
+
|
15
|
+
def initialize(idx:, bucket: MemDB::Bucket)
|
16
|
+
@idx = idx
|
17
|
+
@bucket = bucket
|
18
|
+
end
|
19
|
+
|
20
|
+
def new
|
21
|
+
MemDB::Index::SequenceMatch.new(idx: @idx, bucket: @bucket)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# https://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm
|
26
|
+
class SequenceIndex
|
27
|
+
def initialize(pattern)
|
28
|
+
@pattern = pattern
|
29
|
+
@pattern_length = pattern.length
|
30
|
+
@bad_char_skip = {}
|
31
|
+
@good_suffix_skip = Array.new(@pattern_length, 0)
|
32
|
+
|
33
|
+
init_tables
|
34
|
+
end
|
35
|
+
|
36
|
+
def index(seq)
|
37
|
+
i = @pattern_length - 1
|
38
|
+
|
39
|
+
while i < seq.length
|
40
|
+
j = @pattern_length - 1
|
41
|
+
|
42
|
+
while j >= 0 && seq[i] == @pattern[j]
|
43
|
+
i -= 1
|
44
|
+
j -= 1
|
45
|
+
end
|
46
|
+
|
47
|
+
return i + 1 if j.negative?
|
48
|
+
|
49
|
+
bad_skip = @bad_char_skip[seq[i]] || @pattern_length
|
50
|
+
good_skip = @good_suffix_skip[j]
|
51
|
+
i += good_skip > bad_skip ? good_skip : bad_skip
|
52
|
+
end
|
53
|
+
|
54
|
+
-1
|
55
|
+
end
|
56
|
+
|
57
|
+
def init_tables # rubocop:disable Metrics/AbcSize
|
58
|
+
last = @pattern_length - 1
|
59
|
+
|
60
|
+
i = 0
|
61
|
+
while i < last
|
62
|
+
@bad_char_skip[@pattern[i]] = last - i
|
63
|
+
i += 1
|
64
|
+
end
|
65
|
+
|
66
|
+
last_prefix = last
|
67
|
+
|
68
|
+
i = last
|
69
|
+
while i >= 0
|
70
|
+
last_prefix = i + 1 if pattern_suffix?(i + 1)
|
71
|
+
|
72
|
+
@good_suffix_skip[i] = last_prefix + last - i
|
73
|
+
i -= 1
|
74
|
+
end
|
75
|
+
|
76
|
+
i = 0
|
77
|
+
while i < last
|
78
|
+
len_suffix = longest_pattern_suffix(i)
|
79
|
+
|
80
|
+
if @pattern[i - len_suffix] != @pattern[last - len_suffix]
|
81
|
+
@good_suffix_skip[last - len_suffix] = len_suffix + last - i
|
82
|
+
end
|
83
|
+
|
84
|
+
i += 1
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def pattern_suffix?(pos)
|
89
|
+
i = 0
|
90
|
+
while i + pos < @pattern.length
|
91
|
+
return false if @pattern[i] != @pattern[i + pos]
|
92
|
+
|
93
|
+
i += 1
|
94
|
+
end
|
95
|
+
true
|
96
|
+
end
|
97
|
+
|
98
|
+
def longest_pattern_suffix(pos)
|
99
|
+
i = 0
|
100
|
+
|
101
|
+
while i < @pattern.length && i < pos
|
102
|
+
break if @pattern[@pattern.length - 1 - i] != @pattern[pos - i]
|
103
|
+
|
104
|
+
i += 1
|
105
|
+
end
|
106
|
+
|
107
|
+
i
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def initialize(idx:, bucket: MemDB::Bucket)
|
112
|
+
@idx = idx
|
113
|
+
@bucket = bucket
|
114
|
+
@patterns = {}
|
115
|
+
@matchers = {}
|
116
|
+
end
|
117
|
+
|
118
|
+
def add(obj, value)
|
119
|
+
obj.idx_value(@idx).each do |pattern|
|
120
|
+
@patterns[pattern] ||= @bucket.new
|
121
|
+
@patterns[pattern].add(obj, value)
|
122
|
+
|
123
|
+
@matchers[pattern] ||= SequenceIndex.new(pattern)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def query(query, out: MemDB::Out.new)
|
128
|
+
query.idx_value(@idx).each do |seq|
|
129
|
+
select_one(query, seq, out)
|
130
|
+
end
|
131
|
+
|
132
|
+
out
|
133
|
+
end
|
134
|
+
|
135
|
+
private
|
136
|
+
|
137
|
+
def select_one(query, seq, out)
|
138
|
+
@matchers.each do |pattern, sequence|
|
139
|
+
next if seq.length < pattern.length
|
140
|
+
|
141
|
+
@patterns[pattern].query(query, out: out) if sequence.index(seq) > -1
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "mem_db/indexing_object"
|
4
|
+
|
5
|
+
class MemDB
|
6
|
+
class Indexation
|
7
|
+
def initialize(index)
|
8
|
+
@obj = MemDB::IndexingObject.new
|
9
|
+
@index = index
|
10
|
+
end
|
11
|
+
|
12
|
+
def add(raw, value)
|
13
|
+
@obj.assign!(raw)
|
14
|
+
@index.add(@obj, value)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "mem_db/idx"
|
4
|
+
|
5
|
+
class MemDB
|
6
|
+
class IndexingObject
|
7
|
+
def initialize
|
8
|
+
@params = {}
|
9
|
+
@attrs = {}
|
10
|
+
@idx_value = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def assign!(params)
|
14
|
+
@params = params
|
15
|
+
@attrs.clear
|
16
|
+
@idx_value.clear
|
17
|
+
|
18
|
+
self
|
19
|
+
end
|
20
|
+
|
21
|
+
def [](attr)
|
22
|
+
if @attrs.key?(attr)
|
23
|
+
@attrs[attr]
|
24
|
+
else
|
25
|
+
@attrs[attr] ||= prepare_attr(attr)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def []=(param, value)
|
30
|
+
@params[param] = value
|
31
|
+
end
|
32
|
+
|
33
|
+
def delete(param)
|
34
|
+
@params.delete(param)
|
35
|
+
@attrs.delete(param)
|
36
|
+
end
|
37
|
+
|
38
|
+
def idx_value(idx)
|
39
|
+
if @idx_value.key?(idx)
|
40
|
+
@idx_value[idx]
|
41
|
+
else
|
42
|
+
@idx_value[idx] ||= idx.value(self)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def prepare_attr(attr)
|
47
|
+
v = @params[attr]
|
48
|
+
|
49
|
+
if v == MemDB::Idx::ANY
|
50
|
+
v
|
51
|
+
elsif v.nil? || v.is_a?(Array)
|
52
|
+
v
|
53
|
+
else
|
54
|
+
[v]
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
data/lib/mem_db/out.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class MemDB
|
4
|
+
class Out
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@arr = []
|
9
|
+
end
|
10
|
+
|
11
|
+
def add(res)
|
12
|
+
@arr.push(res)
|
13
|
+
|
14
|
+
true
|
15
|
+
end
|
16
|
+
|
17
|
+
def each(&block)
|
18
|
+
return to_enum unless block_given?
|
19
|
+
|
20
|
+
@arr.each do |values|
|
21
|
+
values.each(&block)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/mem_db/query.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class MemDB
|
4
|
+
class Query
|
5
|
+
def initialize(params)
|
6
|
+
@params = params
|
7
|
+
@attrs = {}
|
8
|
+
@idx_value = {}
|
9
|
+
end
|
10
|
+
|
11
|
+
def using(index)
|
12
|
+
index.query(self)
|
13
|
+
end
|
14
|
+
|
15
|
+
def [](attr)
|
16
|
+
if @attrs.key?(attr)
|
17
|
+
@attrs[attr]
|
18
|
+
else
|
19
|
+
@attrs[attr] ||= prepare_attr(attr)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def []=(param, value)
|
24
|
+
@params[param] = value
|
25
|
+
end
|
26
|
+
|
27
|
+
def delete(param)
|
28
|
+
@params.delete(param)
|
29
|
+
@attrs.delete(param)
|
30
|
+
end
|
31
|
+
|
32
|
+
def idx_value(idx)
|
33
|
+
if @idx_value.key?(idx)
|
34
|
+
@idx_value[idx]
|
35
|
+
else
|
36
|
+
@idx_value[idx] ||= idx.prepare_query(self)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def prepare_attr(attr)
|
41
|
+
v = @params[attr]
|
42
|
+
if v.is_a?(Array)
|
43
|
+
v
|
44
|
+
else
|
45
|
+
[v]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/mem_db.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "lib/mem_db/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "mem_db"
|
7
|
+
spec.version = MemDB::VERSION
|
8
|
+
spec.authors = ["Dmitry Bochkarev"]
|
9
|
+
spec.email = ["dimabochkarev@gmail.com"]
|
10
|
+
|
11
|
+
spec.summary = "MemDB is embedded database"
|
12
|
+
spec.description = "MemDB is embedded database"
|
13
|
+
spec.homepage = "https://github.com/DmitryBochkarev/mem_db"
|
14
|
+
spec.license = "MIT"
|
15
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
|
16
|
+
|
17
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
18
|
+
spec.metadata["source_code_uri"] = "https://github.com/DmitryBochkarev/mem_db"
|
19
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
20
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
|
21
|
+
end
|
22
|
+
spec.bindir = "exe"
|
23
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
24
|
+
spec.require_paths = ["lib"]
|
25
|
+
end
|