factbase 0.19.8 → 0.19.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +91 -67
- data/Rakefile +7 -0
- data/lib/factbase/indexed/indexed_and.rb +3 -2
- data/lib/factbase/indexed/indexed_unique.rb +1 -37
- data/lib/factbase/lazy_taped.rb +13 -14
- data/lib/factbase/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c7c832590816ff7438ea62c6692d2567775088ab27567d407b2294b8caafced7
|
|
4
|
+
data.tar.gz: 3c4b7313117776731de9b91cca81c472cd6dfe9dd64e7390381e54194b912b77
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b7569a94be51517bb2860cf368849aaa22f89f96b04a16a29fafba294c6ad72c8b03e359f35024a482a82f657edfb36710ec7f60504ff4422663a96cc0a3ebc8
|
|
7
|
+
data.tar.gz: 7991d24eae8705444c781b35e00fe74b3f3087e015b3ac2cc1d59d250e0afddd70b7c23006dabf85d4d14dbc2c920a78f1e7e6068edc235c0b45dbdb4af5c17b
|
data/README.md
CHANGED
|
@@ -95,6 +95,30 @@ fb.query('(eq foo 43)').each do |f|
|
|
|
95
95
|
end
|
|
96
96
|
```
|
|
97
97
|
|
|
98
|
+
Deleting while iterating is unsafe and may cause elements to be skipped:
|
|
99
|
+
|
|
100
|
+
```ruby
|
|
101
|
+
fb = Factbase.new
|
|
102
|
+
fb.insert.id = 1
|
|
103
|
+
fb.insert.id = 2
|
|
104
|
+
fb.query('(always)').each do |f|
|
|
105
|
+
fb.query("(eq id #{f.id})").delete!
|
|
106
|
+
end
|
|
107
|
+
assert(1 == fb.size)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
To safely delete, use a snapshot:
|
|
111
|
+
|
|
112
|
+
```ruby
|
|
113
|
+
fb = Factbase.new
|
|
114
|
+
fb.insert.id = 1
|
|
115
|
+
fb.insert.id = 2
|
|
116
|
+
fb.query('(always)').to_a.each do |f|
|
|
117
|
+
fb.query("(eq id #{f.id})").delete!
|
|
118
|
+
end
|
|
119
|
+
assert(0 == fb.size)
|
|
120
|
+
```
|
|
121
|
+
|
|
98
122
|
## Terms
|
|
99
123
|
|
|
100
124
|
There are some boolean terms available in a query
|
|
@@ -227,89 +251,89 @@ This is the result of the benchmark:
|
|
|
227
251
|
```text
|
|
228
252
|
|
|
229
253
|
query all facts from an empty factbase 0.00
|
|
230
|
-
insert 20000 facts 0.
|
|
254
|
+
insert 20000 facts 0.66
|
|
231
255
|
export 20000 facts 0.02
|
|
232
|
-
import
|
|
233
|
-
insert 10 facts 0.
|
|
234
|
-
query 10 times w/txn 2.
|
|
235
|
-
query 10 times w/o txn 0.
|
|
236
|
-
modify 10 attrs w/txn 1.
|
|
237
|
-
delete 10 facts w/txn
|
|
256
|
+
import 410996 bytes (20000 facts) 0.02
|
|
257
|
+
insert 10 facts 0.00
|
|
258
|
+
query 10 times w/txn 2.13
|
|
259
|
+
query 10 times w/o txn 0.12
|
|
260
|
+
modify 10 attrs w/txn 1.62
|
|
261
|
+
delete 10 facts w/txn 10.22
|
|
238
262
|
build index on 5000 facts 0.03
|
|
239
263
|
export 5000 facts with index 0.04
|
|
240
264
|
import 5000 facts with persisted index 0.03
|
|
241
|
-
query 5000 facts using persisted index 0.
|
|
242
|
-
export 5000 facts without index 0.
|
|
265
|
+
query 5000 facts using persisted index 0.08
|
|
266
|
+
export 5000 facts without index 0.02
|
|
243
267
|
import 5000 facts without index 0.01
|
|
244
268
|
query 5000 facts building index on-the-fly 0.07
|
|
245
|
-
query 15k facts sel: 20% card: 10 absent plain 0.
|
|
246
|
-
query 15k facts sel: 20% card: 10 absent indexed(cold) 0.
|
|
247
|
-
query 15k facts sel: 20% card: 10 absent indexed(warm) 0.
|
|
248
|
-
query 15k facts sel: 20% card: 10 exists plain 0.
|
|
249
|
-
query 15k facts sel: 20% card: 10 exists indexed(cold) 0.
|
|
269
|
+
query 15k facts sel: 20% card: 10 absent plain 0.60
|
|
270
|
+
query 15k facts sel: 20% card: 10 absent indexed(cold) 0.17
|
|
271
|
+
query 15k facts sel: 20% card: 10 absent indexed(warm) 0.16
|
|
272
|
+
query 15k facts sel: 20% card: 10 exists plain 0.57
|
|
273
|
+
query 15k facts sel: 20% card: 10 exists indexed(cold) 0.17
|
|
250
274
|
query 15k facts sel: 20% card: 10 exists indexed(warm) 0.13
|
|
251
|
-
query 15k facts sel: 20% card: 10 eq plain 0.
|
|
252
|
-
query 15k facts sel: 20% card: 10 eq indexed(cold) 0.
|
|
253
|
-
query 15k facts sel: 20% card: 10 eq indexed(warm) 0.
|
|
254
|
-
query 15k facts sel: 20% card: 10 not plain 1.
|
|
255
|
-
query 15k facts sel: 20% card: 10 not indexed(cold) 0.
|
|
256
|
-
query 15k facts sel: 20% card: 10 not indexed(warm) 0.
|
|
257
|
-
query 15k facts sel: 20% card: 10 gt plain 0.
|
|
258
|
-
query 15k facts sel: 20% card: 10 gt indexed(cold) 0.
|
|
259
|
-
query 15k facts sel: 20% card: 10 gt indexed(warm) 0.
|
|
260
|
-
query 15k facts sel: 20% card: 10 lt plain 0.
|
|
261
|
-
query 15k facts sel: 20% card: 10 lt indexed(cold) 0.
|
|
275
|
+
query 15k facts sel: 20% card: 10 eq plain 0.86
|
|
276
|
+
query 15k facts sel: 20% card: 10 eq indexed(cold) 0.26
|
|
277
|
+
query 15k facts sel: 20% card: 10 eq indexed(warm) 0.19
|
|
278
|
+
query 15k facts sel: 20% card: 10 not plain 1.16
|
|
279
|
+
query 15k facts sel: 20% card: 10 not indexed(cold) 0.53
|
|
280
|
+
query 15k facts sel: 20% card: 10 not indexed(warm) 0.51
|
|
281
|
+
query 15k facts sel: 20% card: 10 gt plain 0.88
|
|
282
|
+
query 15k facts sel: 20% card: 10 gt indexed(cold) 0.29
|
|
283
|
+
query 15k facts sel: 20% card: 10 gt indexed(warm) 0.24
|
|
284
|
+
query 15k facts sel: 20% card: 10 lt plain 0.87
|
|
285
|
+
query 15k facts sel: 20% card: 10 lt indexed(cold) 0.29
|
|
262
286
|
query 15k facts sel: 20% card: 10 lt indexed(warm) 0.20
|
|
263
|
-
query 15k facts sel: 20% card: 10 and eq plain 1.
|
|
264
|
-
query 15k facts sel: 20% card: 10 and eq indexed(cold) 0.
|
|
265
|
-
query 15k facts sel: 20% card: 10 and eq indexed(warm) 0.
|
|
266
|
-
query 15k facts sel: 20% card: 10 and complex plain 1.
|
|
267
|
-
query 15k facts sel: 20% card: 10 and complex indexed(cold) 0.
|
|
268
|
-
query 15k facts sel: 20% card: 10 and complex indexed(warm) 0.
|
|
269
|
-
query 15k facts sel: 20% card: 10 one plain 0.
|
|
270
|
-
query 15k facts sel: 20% card: 10 one indexed(cold) 0.
|
|
271
|
-
query 15k facts sel: 20% card: 10 one indexed(warm) 0.
|
|
287
|
+
query 15k facts sel: 20% card: 10 and eq plain 1.43
|
|
288
|
+
query 15k facts sel: 20% card: 10 and eq indexed(cold) 0.91
|
|
289
|
+
query 15k facts sel: 20% card: 10 and eq indexed(warm) 0.50
|
|
290
|
+
query 15k facts sel: 20% card: 10 and complex plain 1.38
|
|
291
|
+
query 15k facts sel: 20% card: 10 and complex indexed(cold) 0.51
|
|
292
|
+
query 15k facts sel: 20% card: 10 and complex indexed(warm) 0.45
|
|
293
|
+
query 15k facts sel: 20% card: 10 one plain 0.75
|
|
294
|
+
query 15k facts sel: 20% card: 10 one indexed(cold) 0.21
|
|
295
|
+
query 15k facts sel: 20% card: 10 one indexed(warm) 0.16
|
|
272
296
|
query 15k facts sel: 20% card: 10 or plain 2.02
|
|
273
|
-
query 15k facts sel: 20% card: 10 or indexed(cold) 0.
|
|
274
|
-
query 15k facts sel: 20% card: 10 or indexed(warm) 0.
|
|
275
|
-
query 15k facts sel: 20% card: 10 unique plain 1.
|
|
276
|
-
query 15k facts sel: 20% card: 10 unique indexed(cold) 0.
|
|
277
|
-
query 15k facts sel: 20% card: 10 unique indexed(warm) 0.
|
|
278
|
-
(and (eq what 'issue-was-closed') (exists... -> 200 1.
|
|
279
|
-
(and (eq what 'issue-was-closed') (exists... -> 200/txn 1.
|
|
280
|
-
(and (eq what 'issue-was-closed') (exists... -> zero 1.
|
|
281
|
-
(and (eq what 'issue-was-closed') (exists... -> zero/txn 1.
|
|
282
|
-
transaction rollback on factbase with 100000 facts 0.
|
|
283
|
-
(gt time '2024-03-23T03:21:43Z') 0.
|
|
284
|
-
(gt cost 50) 0.
|
|
285
|
-
(eq title 'Object Thinking 5000') 0.
|
|
286
|
-
(and (eq foo 42.998) (or (gt bar 200) (absent z... 0.
|
|
287
|
-
(and (exists foo) (not (exists blue))) 1.
|
|
288
|
-
(eq id (agg (always) (max id))) 2.
|
|
289
|
-
(join "c<=cost,b<=bar" (eq id (agg (always) (ma... 4.
|
|
290
|
-
(and (eq what "foo") (join "w<=what" (and (eq i... 7.
|
|
291
|
-
delete! 0.
|
|
292
|
-
(and (eq issue *) (eq repository *) (eq what '*') (eq where '*')) 0.
|
|
297
|
+
query 15k facts sel: 20% card: 10 or indexed(cold) 0.46
|
|
298
|
+
query 15k facts sel: 20% card: 10 or indexed(warm) 0.32
|
|
299
|
+
query 15k facts sel: 20% card: 10 unique plain 1.87
|
|
300
|
+
query 15k facts sel: 20% card: 10 unique indexed(cold) 0.67
|
|
301
|
+
query 15k facts sel: 20% card: 10 unique indexed(warm) 0.42
|
|
302
|
+
(and (eq what 'issue-was-closed') (exists... -> 200 1.08
|
|
303
|
+
(and (eq what 'issue-was-closed') (exists... -> 200/txn 1.24
|
|
304
|
+
(and (eq what 'issue-was-closed') (exists... -> zero 1.08
|
|
305
|
+
(and (eq what 'issue-was-closed') (exists... -> zero/txn 1.28
|
|
306
|
+
transaction rollback on factbase with 100000 facts 0.01
|
|
307
|
+
(gt time '2024-03-23T03:21:43Z') 0.31
|
|
308
|
+
(gt cost 50) 0.14
|
|
309
|
+
(eq title 'Object Thinking 5000') 0.02
|
|
310
|
+
(and (eq foo 42.998) (or (gt bar 200) (absent z... 0.03
|
|
311
|
+
(and (exists foo) (not (exists blue))) 1.23
|
|
312
|
+
(eq id (agg (always) (max id))) 2.80
|
|
313
|
+
(join "c<=cost,b<=bar" (eq id (agg (always) (ma... 4.44
|
|
314
|
+
(and (eq what "foo") (join "w<=what" (and (eq i... 7.39
|
|
315
|
+
delete! 0.44
|
|
316
|
+
(and (eq issue *) (eq repository *) (eq what '*') (eq where '*')) 0.41
|
|
293
317
|
Taped.append() x50000 0.02
|
|
294
|
-
Taped.each() x125 1.
|
|
318
|
+
Taped.each() x125 1.10
|
|
295
319
|
Taped.delete_if() x375 0.86
|
|
296
|
-
50000 facts: plain read (no txn) 4.
|
|
297
|
-
50000 facts: read-only txn (no copy)
|
|
320
|
+
50000 facts: plain read (no txn) 4.10
|
|
321
|
+
50000 facts: read-only txn (no copy) 5.33
|
|
298
322
|
50000 facts: plain insert (no txn) 0.00
|
|
299
|
-
50000 facts: insert in txn (copy triggered)
|
|
300
|
-
50000 facts: plain modify (no txn) 28.
|
|
301
|
-
50000 facts: modify in txn (copy triggered)
|
|
302
|
-
100000 facts: plain read (no txn) 8.
|
|
303
|
-
100000 facts: read-only txn (no copy)
|
|
323
|
+
50000 facts: insert in txn (no copy triggered) 0.00
|
|
324
|
+
50000 facts: plain modify (no txn) 28.59
|
|
325
|
+
50000 facts: modify in txn (copy triggered) 37.78
|
|
326
|
+
100000 facts: plain read (no txn) 8.33
|
|
327
|
+
100000 facts: read-only txn (no copy) 12.80
|
|
304
328
|
100000 facts: plain insert (no txn) 0.00
|
|
305
|
-
100000 facts: insert in txn (copy triggered)
|
|
306
|
-
100000 facts: plain modify (no txn)
|
|
307
|
-
100000 facts: modify in txn (copy triggered)
|
|
329
|
+
100000 facts: insert in txn (no copy triggered) 0.00
|
|
330
|
+
100000 facts: plain modify (no txn) 57.26
|
|
331
|
+
100000 facts: modify in txn (copy triggered) 75.84
|
|
308
332
|
```
|
|
309
333
|
|
|
310
334
|
The results were calculated in [this GHA job][benchmark-gha]
|
|
311
|
-
on 2026-02-
|
|
335
|
+
on 2026-02-26 at 06:08,
|
|
312
336
|
on Linux with 4 CPUs.
|
|
313
337
|
<!-- benchmark_end -->
|
|
314
338
|
|
|
315
|
-
[benchmark-gha]: https://github.com/yegor256/factbase/actions/runs/
|
|
339
|
+
[benchmark-gha]: https://github.com/yegor256/factbase/actions/runs/22430010182
|
data/Rakefile
CHANGED
|
@@ -20,6 +20,12 @@ end
|
|
|
20
20
|
|
|
21
21
|
task default: %i[clean test picks rubocop yard]
|
|
22
22
|
|
|
23
|
+
def tail(args = ARGV)
|
|
24
|
+
i = args.index('--')
|
|
25
|
+
return '' if i.nil? || args[i + 1].nil?
|
|
26
|
+
args[i..].join(' ')
|
|
27
|
+
end
|
|
28
|
+
|
|
23
29
|
require 'rake/testtask'
|
|
24
30
|
desc 'Run all unit tests'
|
|
25
31
|
Rake::TestTask.new(:test) do |test|
|
|
@@ -28,6 +34,7 @@ Rake::TestTask.new(:test) do |test|
|
|
|
28
34
|
test.pattern = 'test/**/test_*.rb'
|
|
29
35
|
test.warning = true
|
|
30
36
|
test.verbose = false
|
|
37
|
+
test.options = tail
|
|
31
38
|
end
|
|
32
39
|
|
|
33
40
|
desc 'Run them via Ruby, one by one'
|
|
@@ -51,8 +51,9 @@ class Factbase::IndexedAnd
|
|
|
51
51
|
if r.nil?
|
|
52
52
|
r = n
|
|
53
53
|
elsif n.size < r.size * 8 # to skip some obvious matchings
|
|
54
|
-
|
|
55
|
-
|
|
54
|
+
small, large = n.size < r.size ? [n.to_a, r.to_a] : [r.to_a, n.to_a]
|
|
55
|
+
ids = Set.new(small.map(&:object_id))
|
|
56
|
+
r = large.select { |f| ids.include?(f.object_id) }
|
|
56
57
|
end
|
|
57
58
|
break if r.size < maps.size / 32 # it's already small enough
|
|
58
59
|
break if r.size < 128 # it's obviously already small enough
|
|
@@ -4,23 +4,6 @@
|
|
|
4
4
|
# SPDX-License-Identifier: MIT
|
|
5
5
|
|
|
6
6
|
# Indexed term 'unique'.
|
|
7
|
-
# The @idx[ikey] structure:
|
|
8
|
-
# {
|
|
9
|
-
# count: Integer (number of facts already processed),
|
|
10
|
-
# buckets: {
|
|
11
|
-
# key => {
|
|
12
|
-
# facts: Array (unique facts found),
|
|
13
|
-
# seen: Set (composite values already indexed to skip duplicates)
|
|
14
|
-
# }
|
|
15
|
-
# }
|
|
16
|
-
# }
|
|
17
|
-
# Example 1: (unique "fruit")
|
|
18
|
-
# - Apple, Apple, Banana
|
|
19
|
-
# - count: 3, facts: [Apple, Banana], seen: { [Apple], [Banana] }
|
|
20
|
-
#
|
|
21
|
-
# Example 2: (unique "fruit" "color")
|
|
22
|
-
# - [Apple, Red], [Apple, Green], [Apple, Red]
|
|
23
|
-
# - count: 3, facts: [[Apple, Red], [Apple, Green]], seen: { [Apple, Red], [Apple, Green] }
|
|
24
7
|
class Factbase::IndexedUnique
|
|
25
8
|
def initialize(term, idx)
|
|
26
9
|
@term = term
|
|
@@ -28,25 +11,6 @@ class Factbase::IndexedUnique
|
|
|
28
11
|
end
|
|
29
12
|
|
|
30
13
|
def predict(maps, _fb, _params)
|
|
31
|
-
|
|
32
|
-
bucket_key = operands.join('|')
|
|
33
|
-
idx_key = [maps.object_id, @term.op.to_s, bucket_key]
|
|
34
|
-
entry = (@idx[idx_key] ||= { buckets: {}, count: 0 })
|
|
35
|
-
feed(maps.to_a, entry, operands, bucket_key)
|
|
36
|
-
matches = entry[:buckets][bucket_key][:facts]
|
|
37
|
-
maps.respond_to?(:repack) ? maps.repack(matches) : matches
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
private
|
|
41
|
-
|
|
42
|
-
def feed(facts, entry, operands, bucket_key)
|
|
43
|
-
entry[:buckets][bucket_key] ||= { facts: [], seen: Set.new }
|
|
44
|
-
bucket = entry[:buckets][bucket_key]
|
|
45
|
-
(facts[entry[:count]..] || []).each do |fact|
|
|
46
|
-
composite_val = operands.map { |o| fact[o] }
|
|
47
|
-
next if composite_val.any?(&:nil?)
|
|
48
|
-
bucket[:facts] << fact if bucket[:seen].add?(composite_val)
|
|
49
|
-
end
|
|
50
|
-
entry[:count] = facts.size
|
|
14
|
+
maps
|
|
51
15
|
end
|
|
52
16
|
end
|
data/lib/factbase/lazy_taped.rb
CHANGED
|
@@ -72,17 +72,18 @@ class Factbase::LazyTaped
|
|
|
72
72
|
|
|
73
73
|
def each
|
|
74
74
|
return to_enum(__method__) unless block_given?
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
unless
|
|
78
|
-
|
|
79
|
-
m
|
|
80
|
-
|
|
75
|
+
yielded_size = 0
|
|
76
|
+
is_copied = copied?
|
|
77
|
+
unless is_copied
|
|
78
|
+
@origin.each do |m|
|
|
79
|
+
yield _tape(m)
|
|
80
|
+
yielded_size += 1
|
|
81
81
|
end
|
|
82
82
|
end
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
83
|
+
staged = is_copied == copied? ? @staged : @staged[yielded_size..]
|
|
84
|
+
staged&.each do |f|
|
|
85
|
+
next if f.nil?
|
|
86
|
+
yield _tape(f)
|
|
86
87
|
end
|
|
87
88
|
end
|
|
88
89
|
|
|
@@ -119,11 +120,9 @@ class Factbase::LazyTaped
|
|
|
119
120
|
|
|
120
121
|
def ensure_copied!
|
|
121
122
|
return if copied?
|
|
122
|
-
@origin.
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
@staged << c
|
|
126
|
-
end
|
|
123
|
+
@staged = @origin.map do |o|
|
|
124
|
+
o.transform_values(&:dup).tap { |c| _track(c, o) }
|
|
125
|
+
end.concat(@staged)
|
|
127
126
|
@copied = true
|
|
128
127
|
end
|
|
129
128
|
|
data/lib/factbase/version.rb
CHANGED