factbase 0.19.8 → 0.19.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +6 -6
- data/README.md +40 -82
- data/Rakefile +23 -13
- data/lib/factbase/indexed/indexed_and.rb +3 -2
- data/lib/factbase/indexed/indexed_factbase.rb +3 -2
- data/lib/factbase/indexed/indexed_unique.rb +1 -37
- data/lib/factbase/lazy_taped.rb +13 -14
- data/lib/factbase/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4b573b12e8a04a0b69a2e4c33c6ccd17c21c1c14a52a644c87564738388de546
|
|
4
|
+
data.tar.gz: b4c888226e04aee8054ff26d056cc04f34fd3a1fb29455ff6c77ce4dd2103d45
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a96ff08cc40deb0d17e25cc7c30f8e67d5c09b0179e623d7e941ade194ac7c52c2135730fb4a813faf7370097a1d151db74e1559de81a619bf572becb1eec408
|
|
7
|
+
data.tar.gz: 1f57d718a82c2c8d107c033c3d8b59440170822f5810fadfc3b99837cf4072493e96ec8a39bff41c73a913cc3dba75837a9f4ecc0804976da37ebe02c09b1777
|
data/Gemfile.lock
CHANGED
|
@@ -16,7 +16,7 @@ PATH
|
|
|
16
16
|
GEM
|
|
17
17
|
remote: https://rubygems.org/
|
|
18
18
|
specs:
|
|
19
|
-
ansi (1.
|
|
19
|
+
ansi (1.6.0)
|
|
20
20
|
ast (2.4.3)
|
|
21
21
|
backtrace (0.4.1)
|
|
22
22
|
benchmark (0.5.0)
|
|
@@ -38,13 +38,13 @@ GEM
|
|
|
38
38
|
loog (0.8.0)
|
|
39
39
|
ellipsized
|
|
40
40
|
logger (~> 1.0)
|
|
41
|
-
minitest (6.0.
|
|
41
|
+
minitest (6.0.5)
|
|
42
42
|
drb (~> 2.0)
|
|
43
43
|
prism (~> 1.5)
|
|
44
|
-
minitest-reporters (1.
|
|
44
|
+
minitest-reporters (1.8.0)
|
|
45
45
|
ansi
|
|
46
46
|
builder
|
|
47
|
-
minitest (>= 5.0)
|
|
47
|
+
minitest (>= 5.0, < 7)
|
|
48
48
|
ruby-progressbar
|
|
49
49
|
nokogiri (1.19.0-arm64-darwin)
|
|
50
50
|
racc (~> 1.4)
|
|
@@ -64,14 +64,14 @@ GEM
|
|
|
64
64
|
psych (5.3.1)
|
|
65
65
|
date
|
|
66
66
|
stringio
|
|
67
|
-
qbash (0.8.
|
|
67
|
+
qbash (0.8.3)
|
|
68
68
|
backtrace (> 0)
|
|
69
69
|
elapsed (> 0)
|
|
70
70
|
loog (> 0)
|
|
71
71
|
tago (> 0)
|
|
72
72
|
racc (1.8.1)
|
|
73
73
|
rainbow (3.1.1)
|
|
74
|
-
rake (13.
|
|
74
|
+
rake (13.4.2)
|
|
75
75
|
rdoc (7.1.0)
|
|
76
76
|
erb
|
|
77
77
|
psych (>= 4.0.0)
|
data/README.md
CHANGED
|
@@ -95,6 +95,30 @@ fb.query('(eq foo 43)').each do |f|
|
|
|
95
95
|
end
|
|
96
96
|
```
|
|
97
97
|
|
|
98
|
+
Deleting while iterating is unsafe and may cause elements to be skipped:
|
|
99
|
+
|
|
100
|
+
```ruby
|
|
101
|
+
fb = Factbase.new
|
|
102
|
+
fb.insert.id = 1
|
|
103
|
+
fb.insert.id = 2
|
|
104
|
+
fb.query('(always)').each do |f|
|
|
105
|
+
fb.query("(eq id #{f.id})").delete!
|
|
106
|
+
end
|
|
107
|
+
assert(1 == fb.size)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
To safely delete, use a snapshot:
|
|
111
|
+
|
|
112
|
+
```ruby
|
|
113
|
+
fb = Factbase.new
|
|
114
|
+
fb.insert.id = 1
|
|
115
|
+
fb.insert.id = 2
|
|
116
|
+
fb.query('(always)').to_a.each do |f|
|
|
117
|
+
fb.query("(eq id #{f.id})").delete!
|
|
118
|
+
end
|
|
119
|
+
assert(0 == fb.size)
|
|
120
|
+
```
|
|
121
|
+
|
|
98
122
|
## Terms
|
|
99
123
|
|
|
100
124
|
There are some boolean terms available in a query
|
|
@@ -225,91 +249,25 @@ This is the result of the benchmark:
|
|
|
225
249
|
|
|
226
250
|
<!-- benchmark_begin -->
|
|
227
251
|
```text
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
query
|
|
242
|
-
export 5000 facts without index 0.01
|
|
243
|
-
import 5000 facts without index 0.01
|
|
244
|
-
query 5000 facts building index on-the-fly 0.07
|
|
245
|
-
query 15k facts sel: 20% card: 10 absent plain 0.61
|
|
246
|
-
query 15k facts sel: 20% card: 10 absent indexed(cold) 0.16
|
|
247
|
-
query 15k facts sel: 20% card: 10 absent indexed(warm) 0.13
|
|
248
|
-
query 15k facts sel: 20% card: 10 exists plain 0.56
|
|
249
|
-
query 15k facts sel: 20% card: 10 exists indexed(cold) 0.14
|
|
250
|
-
query 15k facts sel: 20% card: 10 exists indexed(warm) 0.13
|
|
251
|
-
query 15k facts sel: 20% card: 10 eq plain 0.83
|
|
252
|
-
query 15k facts sel: 20% card: 10 eq indexed(cold) 0.22
|
|
253
|
-
query 15k facts sel: 20% card: 10 eq indexed(warm) 0.24
|
|
254
|
-
query 15k facts sel: 20% card: 10 not plain 1.11
|
|
255
|
-
query 15k facts sel: 20% card: 10 not indexed(cold) 0.50
|
|
256
|
-
query 15k facts sel: 20% card: 10 not indexed(warm) 0.43
|
|
257
|
-
query 15k facts sel: 20% card: 10 gt plain 0.84
|
|
258
|
-
query 15k facts sel: 20% card: 10 gt indexed(cold) 0.26
|
|
259
|
-
query 15k facts sel: 20% card: 10 gt indexed(warm) 0.19
|
|
260
|
-
query 15k facts sel: 20% card: 10 lt plain 0.84
|
|
261
|
-
query 15k facts sel: 20% card: 10 lt indexed(cold) 0.27
|
|
262
|
-
query 15k facts sel: 20% card: 10 lt indexed(warm) 0.20
|
|
263
|
-
query 15k facts sel: 20% card: 10 and eq plain 1.39
|
|
264
|
-
query 15k facts sel: 20% card: 10 and eq indexed(cold) 0.85
|
|
265
|
-
query 15k facts sel: 20% card: 10 and eq indexed(warm) 0.46
|
|
266
|
-
query 15k facts sel: 20% card: 10 and complex plain 1.32
|
|
267
|
-
query 15k facts sel: 20% card: 10 and complex indexed(cold) 0.48
|
|
268
|
-
query 15k facts sel: 20% card: 10 and complex indexed(warm) 0.43
|
|
269
|
-
query 15k facts sel: 20% card: 10 one plain 0.72
|
|
270
|
-
query 15k facts sel: 20% card: 10 one indexed(cold) 0.19
|
|
271
|
-
query 15k facts sel: 20% card: 10 one indexed(warm) 0.15
|
|
272
|
-
query 15k facts sel: 20% card: 10 or plain 2.02
|
|
273
|
-
query 15k facts sel: 20% card: 10 or indexed(cold) 0.42
|
|
274
|
-
query 15k facts sel: 20% card: 10 or indexed(warm) 0.36
|
|
275
|
-
query 15k facts sel: 20% card: 10 unique plain 1.86
|
|
276
|
-
query 15k facts sel: 20% card: 10 unique indexed(cold) 0.61
|
|
277
|
-
query 15k facts sel: 20% card: 10 unique indexed(warm) 0.37
|
|
278
|
-
(and (eq what 'issue-was-closed') (exists... -> 200 1.04
|
|
279
|
-
(and (eq what 'issue-was-closed') (exists... -> 200/txn 1.23
|
|
280
|
-
(and (eq what 'issue-was-closed') (exists... -> zero 1.06
|
|
281
|
-
(and (eq what 'issue-was-closed') (exists... -> zero/txn 1.23
|
|
282
|
-
transaction rollback on factbase with 100000 facts 0.26
|
|
283
|
-
(gt time '2024-03-23T03:21:43Z') 0.22
|
|
284
|
-
(gt cost 50) 0.10
|
|
285
|
-
(eq title 'Object Thinking 5000') 0.03
|
|
286
|
-
(and (eq foo 42.998) (or (gt bar 200) (absent z... 0.02
|
|
287
|
-
(and (exists foo) (not (exists blue))) 1.12
|
|
288
|
-
(eq id (agg (always) (max id))) 2.76
|
|
289
|
-
(join "c<=cost,b<=bar" (eq id (agg (always) (ma... 4.58
|
|
290
|
-
(and (eq what "foo") (join "w<=what" (and (eq i... 7.26
|
|
291
|
-
delete! 0.42
|
|
292
|
-
(and (eq issue *) (eq repository *) (eq what '*') (eq where '*')) 0.38
|
|
293
|
-
Taped.append() x50000 0.02
|
|
294
|
-
Taped.each() x125 1.08
|
|
295
|
-
Taped.delete_if() x375 0.86
|
|
296
|
-
50000 facts: plain read (no txn) 4.06
|
|
297
|
-
50000 facts: read-only txn (no copy) 4.85
|
|
298
|
-
50000 facts: plain insert (no txn) 0.00
|
|
299
|
-
50000 facts: insert in txn (copy triggered) 3.31
|
|
300
|
-
50000 facts: plain modify (no txn) 28.21
|
|
301
|
-
50000 facts: modify in txn (copy triggered) 35.14
|
|
302
|
-
100000 facts: plain read (no txn) 8.06
|
|
303
|
-
100000 facts: read-only txn (no copy) 9.94
|
|
304
|
-
100000 facts: plain insert (no txn) 0.00
|
|
305
|
-
100000 facts: insert in txn (copy triggered) 6.68
|
|
306
|
-
100000 facts: plain modify (no txn) 56.21
|
|
307
|
-
100000 facts: modify in txn (copy triggered) 70.09
|
|
252
|
+
user
|
|
253
|
+
void scan 0.001102
|
|
254
|
+
20k facts: export: 2991KB 0.854774
|
|
255
|
+
20k facts: import: 2991KB 1.035671
|
|
256
|
+
50k facts: read 0.000138
|
|
257
|
+
50k facts: read in txn 0.002754
|
|
258
|
+
50k facts: insert 0.000090
|
|
259
|
+
50k facts: insert in txn 0.000243
|
|
260
|
+
50k facts: modify 1.085214
|
|
261
|
+
50k facts: modify in txn 2.480409
|
|
262
|
+
12k facts: large query: match 3k 14.187596
|
|
263
|
+
12k facts: large query: match 3k in txn 19.396334
|
|
264
|
+
12k facts: large query: match zero 15.139695
|
|
265
|
+
12k facts: large query: match zero in txn 21.046074
|
|
308
266
|
```
|
|
309
267
|
|
|
310
268
|
The results were calculated in [this GHA job][benchmark-gha]
|
|
311
|
-
on 2026-
|
|
269
|
+
on 2026-04-16 at 17:08,
|
|
312
270
|
on Linux with 4 CPUs.
|
|
313
271
|
<!-- benchmark_end -->
|
|
314
272
|
|
|
315
|
-
[benchmark-gha]: https://github.com/yegor256/factbase/actions/runs/
|
|
273
|
+
[benchmark-gha]: https://github.com/yegor256/factbase/actions/runs/24523476155
|
data/Rakefile
CHANGED
|
@@ -20,6 +20,12 @@ end
|
|
|
20
20
|
|
|
21
21
|
task default: %i[clean test picks rubocop yard]
|
|
22
22
|
|
|
23
|
+
def tail(args = ARGV)
|
|
24
|
+
i = args.index('--')
|
|
25
|
+
return '' if i.nil? || args[i + 1].nil?
|
|
26
|
+
args[i..].join(' ')
|
|
27
|
+
end
|
|
28
|
+
|
|
23
29
|
require 'rake/testtask'
|
|
24
30
|
desc 'Run all unit tests'
|
|
25
31
|
Rake::TestTask.new(:test) do |test|
|
|
@@ -28,6 +34,7 @@ Rake::TestTask.new(:test) do |test|
|
|
|
28
34
|
test.pattern = 'test/**/test_*.rb'
|
|
29
35
|
test.warning = true
|
|
30
36
|
test.verbose = false
|
|
37
|
+
test.options = tail
|
|
31
38
|
end
|
|
32
39
|
|
|
33
40
|
desc 'Run them via Ruby, one by one'
|
|
@@ -54,27 +61,30 @@ RuboCop::RakeTask.new(:rubocop) do |task|
|
|
|
54
61
|
end
|
|
55
62
|
|
|
56
63
|
desc 'Benchmark them all'
|
|
57
|
-
task :benchmark, [:name] do |_t, args|
|
|
58
|
-
bname = args[:name] || '
|
|
64
|
+
task :benchmark, [:name, :cycles] do |_t, args|
|
|
65
|
+
bname = args[:name] || 'essential'
|
|
66
|
+
cycles = (args[:cycles] || 5).to_i
|
|
59
67
|
require_relative 'lib/factbase'
|
|
60
68
|
require_relative 'lib/factbase/cached/cached_factbase'
|
|
61
69
|
require_relative 'lib/factbase/indexed/indexed_factbase'
|
|
62
70
|
require_relative 'lib/factbase/sync/sync_factbase'
|
|
63
71
|
require 'benchmark'
|
|
64
72
|
Benchmark.bm(60) do |b|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
+
files =
|
|
74
|
+
case bname
|
|
75
|
+
when 'all'
|
|
76
|
+
Dir['benchmark/bench_*.rb']
|
|
77
|
+
when 'essential'
|
|
78
|
+
%w[empty serialization txns large_query].map { |f| "benchmark/bench_#{f}.rb" }
|
|
79
|
+
else
|
|
80
|
+
["benchmark/#{bname}.rb"]
|
|
73
81
|
end
|
|
74
|
-
|
|
75
|
-
f = "benchmark/#{bname}.rb"
|
|
82
|
+
files.each do |f|
|
|
76
83
|
require_relative f
|
|
77
|
-
|
|
84
|
+
fb = Factbase.new
|
|
85
|
+
fb = Factbase::IndexedFactbase.new(fb)
|
|
86
|
+
fb = Factbase::CachedFactbase.new(fb)
|
|
87
|
+
Kernel.send(File.basename(f).gsub(/\.rb$/, '').to_sym, b, fb, cycles)
|
|
78
88
|
end
|
|
79
89
|
end
|
|
80
90
|
end
|
|
@@ -51,8 +51,9 @@ class Factbase::IndexedAnd
|
|
|
51
51
|
if r.nil?
|
|
52
52
|
r = n
|
|
53
53
|
elsif n.size < r.size * 8 # to skip some obvious matchings
|
|
54
|
-
|
|
55
|
-
|
|
54
|
+
small, large = n.size < r.size ? [n.to_a, r.to_a] : [r.to_a, n.to_a]
|
|
55
|
+
ids = Set.new(small.map(&:object_id))
|
|
56
|
+
r = large.select { |f| ids.include?(f.object_id) }
|
|
56
57
|
end
|
|
57
58
|
break if r.size < maps.size / 32 # it's already small enough
|
|
58
59
|
break if r.size < 128 # it's obviously already small enough
|
|
@@ -61,11 +61,12 @@ class Factbase::IndexedFactbase
|
|
|
61
61
|
# Run an ACID transaction.
|
|
62
62
|
# @return [Factbase::Churn] How many facts have been changed (zero if rolled back)
|
|
63
63
|
def txn
|
|
64
|
+
inner_idx = {}
|
|
64
65
|
result =
|
|
65
66
|
@origin.txn do |fbt|
|
|
66
|
-
yield Factbase::IndexedFactbase.new(fbt,
|
|
67
|
+
yield Factbase::IndexedFactbase.new(fbt, inner_idx, @fresh)
|
|
67
68
|
end
|
|
68
|
-
@idx.clear
|
|
69
|
+
@idx.clear if result.deleted.positive? || result.added.positive?
|
|
69
70
|
@fresh.clear
|
|
70
71
|
result
|
|
71
72
|
end
|
|
@@ -4,23 +4,6 @@
|
|
|
4
4
|
# SPDX-License-Identifier: MIT
|
|
5
5
|
|
|
6
6
|
# Indexed term 'unique'.
|
|
7
|
-
# The @idx[ikey] structure:
|
|
8
|
-
# {
|
|
9
|
-
# count: Integer (number of facts already processed),
|
|
10
|
-
# buckets: {
|
|
11
|
-
# key => {
|
|
12
|
-
# facts: Array (unique facts found),
|
|
13
|
-
# seen: Set (composite values already indexed to skip duplicates)
|
|
14
|
-
# }
|
|
15
|
-
# }
|
|
16
|
-
# }
|
|
17
|
-
# Example 1: (unique "fruit")
|
|
18
|
-
# - Apple, Apple, Banana
|
|
19
|
-
# - count: 3, facts: [Apple, Banana], seen: { [Apple], [Banana] }
|
|
20
|
-
#
|
|
21
|
-
# Example 2: (unique "fruit" "color")
|
|
22
|
-
# - [Apple, Red], [Apple, Green], [Apple, Red]
|
|
23
|
-
# - count: 3, facts: [[Apple, Red], [Apple, Green]], seen: { [Apple, Red], [Apple, Green] }
|
|
24
7
|
class Factbase::IndexedUnique
|
|
25
8
|
def initialize(term, idx)
|
|
26
9
|
@term = term
|
|
@@ -28,25 +11,6 @@ class Factbase::IndexedUnique
|
|
|
28
11
|
end
|
|
29
12
|
|
|
30
13
|
def predict(maps, _fb, _params)
|
|
31
|
-
|
|
32
|
-
bucket_key = operands.join('|')
|
|
33
|
-
idx_key = [maps.object_id, @term.op.to_s, bucket_key]
|
|
34
|
-
entry = (@idx[idx_key] ||= { buckets: {}, count: 0 })
|
|
35
|
-
feed(maps.to_a, entry, operands, bucket_key)
|
|
36
|
-
matches = entry[:buckets][bucket_key][:facts]
|
|
37
|
-
maps.respond_to?(:repack) ? maps.repack(matches) : matches
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
private
|
|
41
|
-
|
|
42
|
-
def feed(facts, entry, operands, bucket_key)
|
|
43
|
-
entry[:buckets][bucket_key] ||= { facts: [], seen: Set.new }
|
|
44
|
-
bucket = entry[:buckets][bucket_key]
|
|
45
|
-
(facts[entry[:count]..] || []).each do |fact|
|
|
46
|
-
composite_val = operands.map { |o| fact[o] }
|
|
47
|
-
next if composite_val.any?(&:nil?)
|
|
48
|
-
bucket[:facts] << fact if bucket[:seen].add?(composite_val)
|
|
49
|
-
end
|
|
50
|
-
entry[:count] = facts.size
|
|
14
|
+
maps
|
|
51
15
|
end
|
|
52
16
|
end
|
data/lib/factbase/lazy_taped.rb
CHANGED
|
@@ -72,17 +72,18 @@ class Factbase::LazyTaped
|
|
|
72
72
|
|
|
73
73
|
def each
|
|
74
74
|
return to_enum(__method__) unless block_given?
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
unless
|
|
78
|
-
|
|
79
|
-
m
|
|
80
|
-
|
|
75
|
+
yielded_size = 0
|
|
76
|
+
is_copied = copied?
|
|
77
|
+
unless is_copied
|
|
78
|
+
@origin.each do |m|
|
|
79
|
+
yield _tape(m)
|
|
80
|
+
yielded_size += 1
|
|
81
81
|
end
|
|
82
82
|
end
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
83
|
+
staged = is_copied == copied? ? @staged : @staged[yielded_size..]
|
|
84
|
+
staged&.each do |f|
|
|
85
|
+
next if f.nil?
|
|
86
|
+
yield _tape(f)
|
|
86
87
|
end
|
|
87
88
|
end
|
|
88
89
|
|
|
@@ -119,11 +120,9 @@ class Factbase::LazyTaped
|
|
|
119
120
|
|
|
120
121
|
def ensure_copied!
|
|
121
122
|
return if copied?
|
|
122
|
-
@origin.
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
@staged << c
|
|
126
|
-
end
|
|
123
|
+
@staged = @origin.map do |o|
|
|
124
|
+
o.transform_values(&:dup).tap { |c| _track(c, o) }
|
|
125
|
+
end.concat(@staged)
|
|
127
126
|
@copied = true
|
|
128
127
|
end
|
|
129
128
|
|
data/lib/factbase/version.rb
CHANGED