factbase 0.19.8 → 0.19.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 03d4be418b18984640fa333733a6b688aa2879b2e7def6d0ad901bce47490cd2
4
- data.tar.gz: c6f24e2c444eb03eb739489d4cd09f927ece90a8c44f480c3b04fef1876b574d
3
+ metadata.gz: c7c832590816ff7438ea62c6692d2567775088ab27567d407b2294b8caafced7
4
+ data.tar.gz: 3c4b7313117776731de9b91cca81c472cd6dfe9dd64e7390381e54194b912b77
5
5
  SHA512:
6
- metadata.gz: f73778d27a87c5a436769565032c5b064b24354ce62fad61822fd29941664e96c6d70f8481bf146bc72ef32669fc4969bae050eb70ef2f5c1fa0e19ee9395000
7
- data.tar.gz: 299d0afc68fa48166fd4e01366e82ce1d22f8aa8c2f1dc2cfc9bd397763080f8db1762b20caa7738bafd0a5d606e9fa1725c13f400e13b40193ff13528b05b59
6
+ metadata.gz: b7569a94be51517bb2860cf368849aaa22f89f96b04a16a29fafba294c6ad72c8b03e359f35024a482a82f657edfb36710ec7f60504ff4422663a96cc0a3ebc8
7
+ data.tar.gz: 7991d24eae8705444c781b35e00fe74b3f3087e015b3ac2cc1d59d250e0afddd70b7c23006dabf85d4d14dbc2c920a78f1e7e6068edc235c0b45dbdb4af5c17b
data/README.md CHANGED
@@ -95,6 +95,30 @@ fb.query('(eq foo 43)').each do |f|
95
95
  end
96
96
  ```
97
97
 
98
+ Deleting while iterating is unsafe and may cause elements to be skipped:
99
+
100
+ ```ruby
101
+ fb = Factbase.new
102
+ fb.insert.id = 1
103
+ fb.insert.id = 2
104
+ fb.query('(always)').each do |f|
105
+ fb.query("(eq id #{f.id})").delete!
106
+ end
107
+ assert(1 == fb.size)
108
+ ```
109
+
110
+ To safely delete, use a snapshot:
111
+
112
+ ```ruby
113
+ fb = Factbase.new
114
+ fb.insert.id = 1
115
+ fb.insert.id = 2
116
+ fb.query('(always)').to_a.each do |f|
117
+ fb.query("(eq id #{f.id})").delete!
118
+ end
119
+ assert(0 == fb.size)
120
+ ```
121
+
98
122
  ## Terms
99
123
 
100
124
  There are some boolean terms available in a query
@@ -227,89 +251,89 @@ This is the result of the benchmark:
227
251
  ```text
228
252
 
229
253
  query all facts from an empty factbase 0.00
230
- insert 20000 facts 0.63
254
+ insert 20000 facts 0.66
231
255
  export 20000 facts 0.02
232
- import 411003 bytes (20000 facts) 0.01
233
- insert 10 facts 0.03
234
- query 10 times w/txn 2.03
235
- query 10 times w/o txn 0.13
236
- modify 10 attrs w/txn 1.51
237
- delete 10 facts w/txn 2.97
256
+ import 410996 bytes (20000 facts) 0.02
257
+ insert 10 facts 0.00
258
+ query 10 times w/txn 2.13
259
+ query 10 times w/o txn 0.12
260
+ modify 10 attrs w/txn 1.62
261
+ delete 10 facts w/txn 10.22
238
262
  build index on 5000 facts 0.03
239
263
  export 5000 facts with index 0.04
240
264
  import 5000 facts with persisted index 0.03
241
- query 5000 facts using persisted index 0.07
242
- export 5000 facts without index 0.01
265
+ query 5000 facts using persisted index 0.08
266
+ export 5000 facts without index 0.02
243
267
  import 5000 facts without index 0.01
244
268
  query 5000 facts building index on-the-fly 0.07
245
- query 15k facts sel: 20% card: 10 absent plain 0.61
246
- query 15k facts sel: 20% card: 10 absent indexed(cold) 0.16
247
- query 15k facts sel: 20% card: 10 absent indexed(warm) 0.13
248
- query 15k facts sel: 20% card: 10 exists plain 0.56
249
- query 15k facts sel: 20% card: 10 exists indexed(cold) 0.14
269
+ query 15k facts sel: 20% card: 10 absent plain 0.60
270
+ query 15k facts sel: 20% card: 10 absent indexed(cold) 0.17
271
+ query 15k facts sel: 20% card: 10 absent indexed(warm) 0.16
272
+ query 15k facts sel: 20% card: 10 exists plain 0.57
273
+ query 15k facts sel: 20% card: 10 exists indexed(cold) 0.17
250
274
  query 15k facts sel: 20% card: 10 exists indexed(warm) 0.13
251
- query 15k facts sel: 20% card: 10 eq plain 0.83
252
- query 15k facts sel: 20% card: 10 eq indexed(cold) 0.22
253
- query 15k facts sel: 20% card: 10 eq indexed(warm) 0.24
254
- query 15k facts sel: 20% card: 10 not plain 1.11
255
- query 15k facts sel: 20% card: 10 not indexed(cold) 0.50
256
- query 15k facts sel: 20% card: 10 not indexed(warm) 0.43
257
- query 15k facts sel: 20% card: 10 gt plain 0.84
258
- query 15k facts sel: 20% card: 10 gt indexed(cold) 0.26
259
- query 15k facts sel: 20% card: 10 gt indexed(warm) 0.19
260
- query 15k facts sel: 20% card: 10 lt plain 0.84
261
- query 15k facts sel: 20% card: 10 lt indexed(cold) 0.27
275
+ query 15k facts sel: 20% card: 10 eq plain 0.86
276
+ query 15k facts sel: 20% card: 10 eq indexed(cold) 0.26
277
+ query 15k facts sel: 20% card: 10 eq indexed(warm) 0.19
278
+ query 15k facts sel: 20% card: 10 not plain 1.16
279
+ query 15k facts sel: 20% card: 10 not indexed(cold) 0.53
280
+ query 15k facts sel: 20% card: 10 not indexed(warm) 0.51
281
+ query 15k facts sel: 20% card: 10 gt plain 0.88
282
+ query 15k facts sel: 20% card: 10 gt indexed(cold) 0.29
283
+ query 15k facts sel: 20% card: 10 gt indexed(warm) 0.24
284
+ query 15k facts sel: 20% card: 10 lt plain 0.87
285
+ query 15k facts sel: 20% card: 10 lt indexed(cold) 0.29
262
286
  query 15k facts sel: 20% card: 10 lt indexed(warm) 0.20
263
- query 15k facts sel: 20% card: 10 and eq plain 1.39
264
- query 15k facts sel: 20% card: 10 and eq indexed(cold) 0.85
265
- query 15k facts sel: 20% card: 10 and eq indexed(warm) 0.46
266
- query 15k facts sel: 20% card: 10 and complex plain 1.32
267
- query 15k facts sel: 20% card: 10 and complex indexed(cold) 0.48
268
- query 15k facts sel: 20% card: 10 and complex indexed(warm) 0.43
269
- query 15k facts sel: 20% card: 10 one plain 0.72
270
- query 15k facts sel: 20% card: 10 one indexed(cold) 0.19
271
- query 15k facts sel: 20% card: 10 one indexed(warm) 0.15
287
+ query 15k facts sel: 20% card: 10 and eq plain 1.43
288
+ query 15k facts sel: 20% card: 10 and eq indexed(cold) 0.91
289
+ query 15k facts sel: 20% card: 10 and eq indexed(warm) 0.50
290
+ query 15k facts sel: 20% card: 10 and complex plain 1.38
291
+ query 15k facts sel: 20% card: 10 and complex indexed(cold) 0.51
292
+ query 15k facts sel: 20% card: 10 and complex indexed(warm) 0.45
293
+ query 15k facts sel: 20% card: 10 one plain 0.75
294
+ query 15k facts sel: 20% card: 10 one indexed(cold) 0.21
295
+ query 15k facts sel: 20% card: 10 one indexed(warm) 0.16
272
296
  query 15k facts sel: 20% card: 10 or plain 2.02
273
- query 15k facts sel: 20% card: 10 or indexed(cold) 0.42
274
- query 15k facts sel: 20% card: 10 or indexed(warm) 0.36
275
- query 15k facts sel: 20% card: 10 unique plain 1.86
276
- query 15k facts sel: 20% card: 10 unique indexed(cold) 0.61
277
- query 15k facts sel: 20% card: 10 unique indexed(warm) 0.37
278
- (and (eq what 'issue-was-closed') (exists... -> 200 1.04
279
- (and (eq what 'issue-was-closed') (exists... -> 200/txn 1.23
280
- (and (eq what 'issue-was-closed') (exists... -> zero 1.06
281
- (and (eq what 'issue-was-closed') (exists... -> zero/txn 1.23
282
- transaction rollback on factbase with 100000 facts 0.26
283
- (gt time '2024-03-23T03:21:43Z') 0.22
284
- (gt cost 50) 0.10
285
- (eq title 'Object Thinking 5000') 0.03
286
- (and (eq foo 42.998) (or (gt bar 200) (absent z... 0.02
287
- (and (exists foo) (not (exists blue))) 1.12
288
- (eq id (agg (always) (max id))) 2.76
289
- (join "c<=cost,b<=bar" (eq id (agg (always) (ma... 4.58
290
- (and (eq what "foo") (join "w<=what" (and (eq i... 7.26
291
- delete! 0.42
292
- (and (eq issue *) (eq repository *) (eq what '*') (eq where '*')) 0.38
297
+ query 15k facts sel: 20% card: 10 or indexed(cold) 0.46
298
+ query 15k facts sel: 20% card: 10 or indexed(warm) 0.32
299
+ query 15k facts sel: 20% card: 10 unique plain 1.87
300
+ query 15k facts sel: 20% card: 10 unique indexed(cold) 0.67
301
+ query 15k facts sel: 20% card: 10 unique indexed(warm) 0.42
302
+ (and (eq what 'issue-was-closed') (exists... -> 200 1.08
303
+ (and (eq what 'issue-was-closed') (exists... -> 200/txn 1.24
304
+ (and (eq what 'issue-was-closed') (exists... -> zero 1.08
305
+ (and (eq what 'issue-was-closed') (exists... -> zero/txn 1.28
306
+ transaction rollback on factbase with 100000 facts 0.01
307
+ (gt time '2024-03-23T03:21:43Z') 0.31
308
+ (gt cost 50) 0.14
309
+ (eq title 'Object Thinking 5000') 0.02
310
+ (and (eq foo 42.998) (or (gt bar 200) (absent z... 0.03
311
+ (and (exists foo) (not (exists blue))) 1.23
312
+ (eq id (agg (always) (max id))) 2.80
313
+ (join "c<=cost,b<=bar" (eq id (agg (always) (ma... 4.44
314
+ (and (eq what "foo") (join "w<=what" (and (eq i... 7.39
315
+ delete! 0.44
316
+ (and (eq issue *) (eq repository *) (eq what '*') (eq where '*')) 0.41
293
317
  Taped.append() x50000 0.02
294
- Taped.each() x125 1.08
318
+ Taped.each() x125 1.10
295
319
  Taped.delete_if() x375 0.86
296
- 50000 facts: plain read (no txn) 4.06
297
- 50000 facts: read-only txn (no copy) 4.85
320
+ 50000 facts: plain read (no txn) 4.10
321
+ 50000 facts: read-only txn (no copy) 5.33
298
322
  50000 facts: plain insert (no txn) 0.00
299
- 50000 facts: insert in txn (copy triggered) 3.31
300
- 50000 facts: plain modify (no txn) 28.21
301
- 50000 facts: modify in txn (copy triggered) 35.14
302
- 100000 facts: plain read (no txn) 8.06
303
- 100000 facts: read-only txn (no copy) 9.94
323
+ 50000 facts: insert in txn (no copy triggered) 0.00
324
+ 50000 facts: plain modify (no txn) 28.59
325
+ 50000 facts: modify in txn (copy triggered) 37.78
326
+ 100000 facts: plain read (no txn) 8.33
327
+ 100000 facts: read-only txn (no copy) 12.80
304
328
  100000 facts: plain insert (no txn) 0.00
305
- 100000 facts: insert in txn (copy triggered) 6.68
306
- 100000 facts: plain modify (no txn) 56.21
307
- 100000 facts: modify in txn (copy triggered) 70.09
329
+ 100000 facts: insert in txn (no copy triggered) 0.00
330
+ 100000 facts: plain modify (no txn) 57.26
331
+ 100000 facts: modify in txn (copy triggered) 75.84
308
332
  ```
309
333
 
310
334
  The results were calculated in [this GHA job][benchmark-gha]
311
- on 2026-02-17 at 18:48,
335
+ on 2026-02-26 at 06:08,
312
336
  on Linux with 4 CPUs.
313
337
  <!-- benchmark_end -->
314
338
 
315
- [benchmark-gha]: https://github.com/yegor256/factbase/actions/runs/22111136309
339
+ [benchmark-gha]: https://github.com/yegor256/factbase/actions/runs/22430010182
data/Rakefile CHANGED
@@ -20,6 +20,12 @@ end
20
20
 
21
21
  task default: %i[clean test picks rubocop yard]
22
22
 
23
+ def tail(args = ARGV)
24
+ i = args.index('--')
25
+ return '' if i.nil? || args[i + 1].nil?
26
+ args[i..].join(' ')
27
+ end
28
+
23
29
  require 'rake/testtask'
24
30
  desc 'Run all unit tests'
25
31
  Rake::TestTask.new(:test) do |test|
@@ -28,6 +34,7 @@ Rake::TestTask.new(:test) do |test|
28
34
  test.pattern = 'test/**/test_*.rb'
29
35
  test.warning = true
30
36
  test.verbose = false
37
+ test.options = tail
31
38
  end
32
39
 
33
40
  desc 'Run them via Ruby, one by one'
@@ -51,8 +51,9 @@ class Factbase::IndexedAnd
51
51
  if r.nil?
52
52
  r = n
53
53
  elsif n.size < r.size * 8 # to skip some obvious matchings
54
- ids = n.to_set(&:object_id)
55
- r = r.select { |f| ids.include?(f.object_id) }
54
+ small, large = n.size < r.size ? [n.to_a, r.to_a] : [r.to_a, n.to_a]
55
+ ids = Set.new(small.map(&:object_id))
56
+ r = large.select { |f| ids.include?(f.object_id) }
56
57
  end
57
58
  break if r.size < maps.size / 32 # it's already small enough
58
59
  break if r.size < 128 # it's obviously already small enough
@@ -4,23 +4,6 @@
4
4
  # SPDX-License-Identifier: MIT
5
5
 
6
6
  # Indexed term 'unique'.
7
- # The @idx[ikey] structure:
8
- # {
9
- # count: Integer (number of facts already processed),
10
- # buckets: {
11
- # key => {
12
- # facts: Array (unique facts found),
13
- # seen: Set (composite values already indexed to skip duplicates)
14
- # }
15
- # }
16
- # }
17
- # Example 1: (unique "fruit")
18
- # - Apple, Apple, Banana
19
- # - count: 3, facts: [Apple, Banana], seen: { [Apple], [Banana] }
20
- #
21
- # Example 2: (unique "fruit" "color")
22
- # - [Apple, Red], [Apple, Green], [Apple, Red]
23
- # - count: 3, facts: [[Apple, Red], [Apple, Green]], seen: { [Apple, Red], [Apple, Green] }
24
7
  class Factbase::IndexedUnique
25
8
  def initialize(term, idx)
26
9
  @term = term
@@ -28,25 +11,6 @@ class Factbase::IndexedUnique
28
11
  end
29
12
 
30
13
  def predict(maps, _fb, _params)
31
- operands = @term.operands.map(&:to_s)
32
- bucket_key = operands.join('|')
33
- idx_key = [maps.object_id, @term.op.to_s, bucket_key]
34
- entry = (@idx[idx_key] ||= { buckets: {}, count: 0 })
35
- feed(maps.to_a, entry, operands, bucket_key)
36
- matches = entry[:buckets][bucket_key][:facts]
37
- maps.respond_to?(:repack) ? maps.repack(matches) : matches
38
- end
39
-
40
- private
41
-
42
- def feed(facts, entry, operands, bucket_key)
43
- entry[:buckets][bucket_key] ||= { facts: [], seen: Set.new }
44
- bucket = entry[:buckets][bucket_key]
45
- (facts[entry[:count]..] || []).each do |fact|
46
- composite_val = operands.map { |o| fact[o] }
47
- next if composite_val.any?(&:nil?)
48
- bucket[:facts] << fact if bucket[:seen].add?(composite_val)
49
- end
50
- entry[:count] = facts.size
14
+ maps
51
15
  end
52
16
  end
@@ -72,17 +72,18 @@ class Factbase::LazyTaped
72
72
 
73
73
  def each
74
74
  return to_enum(__method__) unless block_given?
75
- st_size = @staged.size
76
- orig_size = @origin.size
77
- unless copied?
78
- orig_size.times do |i|
79
- m = @origin[i]
80
- yield _tape(m) unless m.nil?
75
+ yielded_size = 0
76
+ is_copied = copied?
77
+ unless is_copied
78
+ @origin.each do |m|
79
+ yield _tape(m)
80
+ yielded_size += 1
81
81
  end
82
82
  end
83
- st_size.times do |i|
84
- m = @staged[i]
85
- yield _tape(m) unless m.nil?
83
+ staged = is_copied == copied? ? @staged : @staged[yielded_size..]
84
+ staged&.each do |f|
85
+ next if f.nil?
86
+ yield _tape(f)
86
87
  end
87
88
  end
88
89
 
@@ -119,11 +120,9 @@ class Factbase::LazyTaped
119
120
 
120
121
  def ensure_copied!
121
122
  return if copied?
122
- @origin.each do |o|
123
- c = o.transform_values(&:dup)
124
- _track(c, o)
125
- @staged << c
126
- end
123
+ @staged = @origin.map do |o|
124
+ o.transform_values(&:dup).tap { |c| _track(c, o) }
125
+ end.concat(@staged)
127
126
  @copied = true
128
127
  end
129
128
 
@@ -9,5 +9,5 @@
9
9
  # License:: MIT
10
10
  class Factbase
11
11
  # Current version of the gem (changed by .rultor.yml on every release)
12
- VERSION = '0.19.8' unless const_defined?(:VERSION)
12
+ VERSION = '0.19.9' unless const_defined?(:VERSION)
13
13
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: factbase
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.8
4
+ version: 0.19.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yegor Bugayenko