factbase 0.19.4 → 0.19.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bc6cb6bd378d266fcaa401091228e6dc4084474f5f16e89c9fb29dd56d577c77
4
- data.tar.gz: 8c1e86c86b01371f159d29c0187a263598d7d5b6c2949200b7813dff22fd6fca
3
+ metadata.gz: 99e8fdb60e5fb1e8083b76db845cfad8a7b6be35700c543819ce631f8ac3ca19
4
+ data.tar.gz: 1fd106429c8a3aa39e56683060b5e61395e705abe336290da63fb1a35e748ec5
5
5
  SHA512:
6
- metadata.gz: afa7d0dd3cace0292ce9de22c102dfed04772f568f9ad725e5d937024918b6c2c103a87221bbee3e3902167f0c3ec886567cb791593e8693e2723e66fa2d62f7
7
- data.tar.gz: 55891ef31903c5d07a6d153bb23bff4264e3d2c414ed4655e2388702d51d1bcdcb75a490f40b8e62007a32dbe571ee0146ab190b890bb5935ad9393d215e0e54
6
+ metadata.gz: e03d29fb285a301e48723690f3bdad751a0bc229bd559d901e22fdb196a4e263d5b75bbc6fef2da5480f07f1a4d22d510e630f7f7a7af8988d2b981d6a4b0b10
7
+ data.tar.gz: c41be62abd514f4ea0fde5e47fba206f27a8bd89a82a17e03c55a3908e5d9c98e909eca6bb571e183f8c2f11421617f0a20b31db132c1107a91eea0fb12cb0c1
data/README.md CHANGED
@@ -80,6 +80,21 @@ churn = fb.churn
80
80
  assert churn.inserted == 1
81
81
  ```
82
82
 
83
+ Properties are accumulative.
84
+ Setting a property again adds a value instead of overwriting:
85
+
86
+ ```ruby
87
+ f = fb.insert
88
+ f.foo = 42
89
+ f.foo = 43
90
+ assert(f.foo == 42)
91
+ assert(f['foo'] == [42, 43])
92
+ fb.query('(eq foo 43)').each do |f|
93
+ assert(f.foo == 42)
94
+ assert(f['foo'].include?(43))
95
+ end
96
+ ```
97
+
83
98
  ## Terms
84
99
 
85
100
  There are some boolean terms available in a query
@@ -26,6 +26,10 @@ class Factbase::IndexedAbsent
26
26
  end
27
27
  entry[:indexed_count] = maps_array.size
28
28
  end
29
- (maps & []) | entry[:facts]
29
+ if maps.respond_to?(:ensure_copied!)
30
+ maps & entry[:facts]
31
+ else
32
+ (maps & []) | entry[:facts]
33
+ end
30
34
  end
31
35
  end
@@ -45,7 +45,7 @@ class Factbase::IndexedAnd
45
45
  j = tuples.flat_map { |t| entry[:index][t] || [] }.uniq(&:object_id)
46
46
  r =
47
47
  if maps.respond_to?(:inserted)
48
- Factbase::Taped.new(j, inserted: maps.inserted, deleted: maps.deleted, added: maps.added)
48
+ maps & j
49
49
  else
50
50
  j
51
51
  end
@@ -38,7 +38,7 @@ class Factbase::IndexedEq
38
38
  end
39
39
  j = vv.flat_map { |v| entry[:index][v] || [] }.uniq(&:object_id)
40
40
  if maps.respond_to?(:inserted)
41
- Factbase::Taped.new(j, inserted: maps.inserted, deleted: maps.deleted, added: maps.added)
41
+ maps & j
42
42
  else
43
43
  j
44
44
  end
@@ -4,6 +4,11 @@
4
4
  # SPDX-License-Identifier: MIT
5
5
 
6
6
  # Indexed term 'exists'.
7
+ # The @idx[key] structure:
8
+ # {
9
+ # count: Integer (number of facts already processed),
10
+ # facts: Array (facts found),
11
+ # }
7
12
  class Factbase::IndexedExists
8
13
  def initialize(term, idx)
9
14
  @term = term
@@ -11,21 +16,21 @@ class Factbase::IndexedExists
11
16
  end
12
17
 
13
18
  def predict(maps, _fb, _params)
14
- return nil if @idx.nil?
15
- key = [maps.object_id, @term.operands.first, @term.op]
19
+ operand = @term.operands.first.to_s
20
+ key = [maps.object_id, operand, @term.op]
21
+ @idx[key] = { facts: [], count: 0 } if @idx[key].nil?
16
22
  entry = @idx[key]
17
- maps_array = maps.to_a
18
- if entry.nil?
19
- entry = { facts: [], indexed_count: 0 }
20
- @idx[key] = entry
21
- end
22
- if entry[:indexed_count] < maps_array.size
23
- prop = @term.operands.first.to_s
24
- maps_array[entry[:indexed_count]..].each do |m|
25
- entry[:facts] << m unless m[prop].nil?
26
- end
27
- entry[:indexed_count] = maps_array.size
23
+ feed(maps.to_a, entry, operand)
24
+ return maps.repack(entry[:facts]) if maps.respond_to?(:repack)
25
+ entry[:facts]
26
+ end
27
+
28
+ private
29
+
30
+ def feed(facts, entry, operand)
31
+ facts[entry[:count]..].each do |m|
32
+ entry[:facts] << m unless m[operand].nil?
28
33
  end
29
- (maps & []) | entry[:facts]
34
+ entry[:count] = facts.size
30
35
  end
31
36
  end
@@ -36,7 +36,11 @@ class Factbase::IndexedGt
36
36
  return nil if threshold.nil?
37
37
  i = entry[:sorted].bsearch_index { |pair| pair[0] > threshold } || entry[:sorted].size
38
38
  result = entry[:sorted][i..].map { |pair| pair[1] }.uniq
39
- (maps & []) | result
39
+ if maps.respond_to?(:ensure_copied!)
40
+ maps & result
41
+ else
42
+ (maps & []) | result
43
+ end
40
44
  end
41
45
 
42
46
  private
@@ -41,7 +41,11 @@ class Factbase::IndexedLt
41
41
  return nil if threshold.nil?
42
42
  i = entry[:sorted].bsearch_index { |pair| pair[0] >= threshold } || entry[:sorted].size
43
43
  result = entry[:sorted][0...i].map { |pair| pair[1] }.uniq
44
- (maps & []) | result
44
+ if maps.respond_to?(:ensure_copied!)
45
+ maps & result
46
+ else
47
+ (maps & []) | result
48
+ end
45
49
  end
46
50
 
47
51
  private
@@ -34,6 +34,8 @@ class Factbase::IndexedNot
34
34
  r = entry[:facts]
35
35
  if r.nil?
36
36
  nil
37
+ elsif maps.respond_to?(:ensure_copied!)
38
+ maps & r
37
39
  else
38
40
  (maps & []) | r
39
41
  end
@@ -26,6 +26,10 @@ class Factbase::IndexedOne
26
26
  end
27
27
  entry[:indexed_count] = maps_array.size
28
28
  end
29
- (maps & []) | entry[:facts]
29
+ if maps.respond_to?(:ensure_copied!)
30
+ maps & entry[:facts]
31
+ else
32
+ (maps & []) | entry[:facts]
33
+ end
30
34
  end
31
35
  end
@@ -4,9 +4,23 @@
4
4
  # SPDX-License-Identifier: MIT
5
5
 
6
6
  # Indexed term 'unique'.
7
- # @todo #249:30min Improve prediction for 'unique' term. Current prediction is quite naive and
8
- # returns many false positives because it just filters facts which have exactly the same set
9
- # of keys regardless the values. We should introduce more smart prediction.
7
+ # The @idx[ikey] structure:
8
+ # {
9
+ # count: Integer (number of facts already processed),
10
+ # buckets: {
11
+ # key => {
12
+ # facts: Array (unique facts found),
13
+ # seen: Set (composite values already indexed to skip duplicates)
14
+ # }
15
+ # }
16
+ # }
17
+ # Example 1: (unique "fruit")
18
+ # - Apple, Apple, Banana
19
+ # - count: 3, facts: [Apple, Banana], seen: { [Apple], [Banana] }
20
+ #
21
+ # Example 2: (unique "fruit" "color")
22
+ # - [Apple, Red], [Apple, Green], [Apple, Red]
23
+ # - count: 3, facts: [[Apple, Red], [Apple, Green]], seen: { [Apple, Red], [Apple, Green] }
10
24
  class Factbase::IndexedUnique
11
25
  def initialize(term, idx)
12
26
  @term = term
@@ -14,21 +28,29 @@ class Factbase::IndexedUnique
14
28
  end
15
29
 
16
30
  def predict(maps, _fb, _params)
17
- return nil if @idx.nil?
18
- key = [maps.object_id, @term.operands.first, @term.op]
19
- entry = @idx[key]
20
- maps_array = maps.to_a
21
- if entry.nil?
22
- entry = { facts: [], indexed_count: 0 }
23
- @idx[key] = entry
31
+ operands = @term.operands.map(&:to_s)
32
+ bucket_key = operands.join('|')
33
+ idx_key = [maps.object_id, @term.op.to_s, bucket_key]
34
+ entry = (@idx[idx_key] ||= { buckets: {}, count: 0 })
35
+ feed(maps.to_a, entry, operands, bucket_key)
36
+ bucket = entry[:buckets][bucket_key]
37
+ if maps.respond_to?(:ensure_copied!)
38
+ maps & (bucket[:facts] || [])
39
+ else
40
+ (maps & []) | (bucket[:facts] || [])
24
41
  end
25
- if entry[:indexed_count] < maps_array.size
26
- props = @term.operands.map(&:to_s)
27
- maps_array[entry[:indexed_count]..].each do |m|
28
- entry[:facts] << m if props.all? { |p| !m[p].nil? }
29
- end
30
- entry[:indexed_count] = maps_array.size
42
+ end
43
+
44
+ private
45
+
46
+ def feed(facts, entry, operands, bucket_key)
47
+ entry[:buckets][bucket_key] ||= { facts: [], seen: Set.new }
48
+ bucket = entry[:buckets][bucket_key]
49
+ (facts[entry[:count]..] || []).each do |fact|
50
+ composite_val = operands.map { |o| fact[o] }
51
+ next if composite_val.any?(&:nil?)
52
+ bucket[:facts] << fact if bucket[:seen].add?(composite_val)
31
53
  end
32
- (maps & []) | entry[:facts]
54
+ entry[:count] = facts.size
33
55
  end
34
56
  end
@@ -14,6 +14,7 @@ class Factbase::LazyTaped
14
14
  @copied = false
15
15
  @maps = nil
16
16
  @pairs = nil
17
+ @inverted_pairs = nil
17
18
  @inserted = []
18
19
  @deleted = []
19
20
  @added = []
@@ -62,7 +63,7 @@ class Factbase::LazyTaped
62
63
  end
63
64
 
64
65
  def <<(map)
65
- ensure_copied
66
+ ensure_copied!
66
67
  @maps << map
67
68
  @inserted.append(map.object_id)
68
69
  end
@@ -81,7 +82,7 @@ class Factbase::LazyTaped
81
82
  end
82
83
 
83
84
  def delete_if
84
- ensure_copied
85
+ ensure_copied!
85
86
  @maps.delete_if do |m|
86
87
  r = yield m
87
88
  @deleted.append(@pairs[m].object_id) if r
@@ -93,6 +94,12 @@ class Factbase::LazyTaped
93
94
  (@maps || @origin).to_a
94
95
  end
95
96
 
97
+ def repack(other)
98
+ ensure_copied!
99
+ copied = other.map { |o| @inverted_pairs[o] || o }
100
+ Factbase::Taped.new(copied, inserted: @inserted, deleted: @deleted, added: @added)
101
+ end
102
+
96
103
  def &(other)
97
104
  if other == [] || (@maps || @origin).empty?
98
105
  return Factbase::Taped.new([], inserted: @inserted, deleted: @deleted, added: @added)
@@ -108,26 +115,29 @@ class Factbase::LazyTaped
108
115
  join(other, &:|)
109
116
  end
110
117
 
111
- def ensure_copied
118
+ def ensure_copied!
112
119
  return if @copied
113
120
  @pairs = {}.compare_by_identity
121
+ @inverted_pairs = {}.compare_by_identity
114
122
  @maps =
115
123
  @origin.map do |m|
116
124
  n = m.transform_values(&:dup)
117
125
  @pairs[n] = m
126
+ @inverted_pairs[m] = n
118
127
  n
119
128
  end
120
129
  @copied = true
121
130
  end
122
131
 
123
132
  def get_copied_map(original_map)
124
- ensure_copied
133
+ ensure_copied!
125
134
  @maps.find { |m| @pairs[m].equal?(original_map) }
126
135
  end
127
136
 
128
137
  private
129
138
 
130
139
  def join(other)
140
+ ensure_copied!
131
141
  n = yield (@maps || @origin).to_a, other.to_a
132
142
  raise 'Cannot join with another Taped' if other.respond_to?(:inserted)
133
143
  raise 'Can only join with array' unless other.is_a?(Array)
@@ -68,6 +68,10 @@ class Factbase::Taped
68
68
  @origin.to_a
69
69
  end
70
70
 
71
+ def repack(other)
72
+ Factbase::Taped.new(other, inserted: @inserted, deleted: @deleted, added: @added)
73
+ end
74
+
71
75
  def &(other)
72
76
  if other == [] || @origin.empty?
73
77
  return Factbase::Taped.new([], inserted: @inserted, deleted: @deleted, added: @added)
data/lib/factbase/term.rb CHANGED
@@ -176,7 +176,7 @@ class Factbase::Term < Factbase::TermBase
176
176
  # Try to predict which facts from the provided list
177
177
  # should be evaluated. If no prediction can be made,
178
178
  # the same list is returned.
179
- # @param [Array<Hash>] maps Records to iterate, maybe
179
+ # @param [Array<Hash>, Factbase::Taped, Factbase::LazyTaped] maps Records to iterate, maybe
180
180
  # @param [Hash] params Params to use (keys must be strings, not symbols, with values as arrays)
181
181
  # @return [Array<Hash>] Records to iterate
182
182
  def predict(maps, fb, params)
@@ -9,5 +9,5 @@
9
9
  # License:: MIT
10
10
  class Factbase
11
11
  # Current version of the gem (changed by .rultor.yml on every release)
12
- VERSION = '0.19.4' unless const_defined?(:VERSION)
12
+ VERSION = '0.19.6' unless const_defined?(:VERSION)
13
13
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: factbase
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.4
4
+ version: 0.19.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yegor Bugayenko