bmg 0.18.11 → 0.18.15

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: eda140880e29f5e083ad334176579ed2dfad9090
4
- data.tar.gz: 444a8cc5f96b80ba3bbf6d807be1438c2d26abd7
3
+ metadata.gz: 14260453d9c3f5d24b5ef6975072e452af309485
4
+ data.tar.gz: f6de19178d2b4404c9fbe248d42f9c596cee410e
5
5
  SHA512:
6
- metadata.gz: 96ecc393e90d3ae413655d70c4b1676fa2f9831e519ef61cbd25139b873efcd75a825745cce0597e083fc59bc331b08d30b5717b1e5b42adacb5297147a044bd
7
- data.tar.gz: dc687db5b22f9f3fc53b2ccf0916722065ca7d25b8ad04b0a042c6d79fd66f40c22f2d05461da8019db8b70f54c4c104658f3dce3862f9a37b2af44c6ee25d61
6
+ metadata.gz: e53da5df4abb7095036dc275948a70e4af9f1945e2fd446244e765d7fead1258a9e564d9076baa3efc4ee29566810a5176c76d2c3c953de906a7ff36a3a86dff
7
+ data.tar.gz: a6f0cb03795e5b954563f774bc65c7879a78049928d06da935017a8dfcdb2bdd563c4ab08922cb95f774d68714ab108bb480d0722249ec5e6cc67fed5ed0064a
data/lib/bmg/algebra.rb CHANGED
@@ -12,6 +12,7 @@ module Bmg
12
12
  protected :_allbut
13
13
 
14
14
  def autowrap(options = {})
15
+ return self if self.type.identity_autowrap?(options)
15
16
  _autowrap self.type.autowrap(options), options
16
17
  end
17
18
 
@@ -20,12 +21,12 @@ module Bmg
20
21
  end
21
22
  protected :_autowrap
22
23
 
23
- def autosummarize(by = [], summarization = {})
24
- _autosummarize type = self.type.autosummarize(by, summarization), by, summarization
24
+ def autosummarize(by = [], summarization = {}, options = {})
25
+ _autosummarize self.type.autosummarize(by, summarization, options), by, summarization, options
25
26
  end
26
27
 
27
- def _autosummarize(type, by, summarization)
28
- Operator::Autosummarize.new(type, self, by, summarization)
28
+ def _autosummarize(type, by, summarization, options)
29
+ Operator::Autosummarize.new(type, self, by, summarization, options)
29
30
  end
30
31
  protected :_autosummarize
31
32
 
@@ -11,16 +11,22 @@ module Bmg
11
11
  class Autosummarize
12
12
  include Operator::Unary
13
13
 
14
- def initialize(type, operand, by, sums)
14
+ DEFAULT_OPTIONS = {
15
+ default: :same
16
+ }
17
+
18
+ def initialize(type, operand, by, sums, options = {})
15
19
  @type = type
16
20
  @operand = operand
17
21
  @by = by
18
22
  @sums = sums.each_with_object({}){|(k,v),h| h[k] = to_summarizer(v) }
23
+ @options = DEFAULT_OPTIONS.merge(options)
24
+ @algo = build_algo
19
25
  end
20
26
 
21
27
  protected
22
28
 
23
- attr_reader :by, :sums
29
+ attr_reader :by, :sums, :options
24
30
 
25
31
  public
26
32
 
@@ -45,17 +51,17 @@ module Bmg
45
51
  h = {}
46
52
  @operand.each do |tuple|
47
53
  key = key(tuple)
48
- h[key] ||= init(key, tuple)
49
- h[key] = sum(h[key], tuple)
54
+ h[key] ||= @algo.init(tuple)
55
+ h[key] = @algo.sum(h[key], tuple)
50
56
  end
51
57
  h.each_pair do |k,v|
52
- h[k] = term(v)
58
+ h[k] = @algo.term(v)
53
59
  end
54
60
  h.values.each(&bl)
55
61
  end
56
62
 
57
63
  def to_ast
58
- [:autosummarize, operand.to_ast, by.dup, sums.dup]
64
+ [:autosummarize, operand.to_ast, by.dup, sums.dup, options.dup]
59
65
  end
60
66
 
61
67
  public ### for internal reasons
@@ -73,7 +79,7 @@ module Bmg
73
79
  else
74
80
  op = operand
75
81
  op = op.restrict(bottom)
76
- op = op.autosummarize(by, sums)
82
+ op = op.autosummarize(by, sums, options)
77
83
  op = op.restrict(top)
78
84
  op
79
85
  end
@@ -87,46 +93,86 @@ module Bmg
87
93
 
88
94
  private
89
95
 
96
+ def build_algo
97
+ case default = @options[:default]
98
+ when :same then Check.new(sums)
99
+ when :first then Trust.new(sums)
100
+ else
101
+ raise ArgumentError, "Unknown default summarizer: `#{default}`"
102
+ end
103
+ end
104
+
90
105
  # Returns the tuple determinant.
91
106
  def key(tuple)
92
107
  @by.map{|by| tuple[by] }
93
108
  end
94
109
 
95
- # Returns the initial tuple to use for a given determinant.
96
- def init(key, tuple)
97
- tuple.each_with_object({}){|(k,v),h|
98
- h.merge!(k => summarizer(k).init(v))
99
- }
100
- end
101
-
102
- # Returns the summarizer to use for a given key.
103
- def summarizer(k)
104
- @sums[k] || Same.new
105
- end
106
-
107
- # Sums `tuple` on `memo`, returning the new tuple to use as memo.
108
- def sum(memo, tuple)
109
- tuple.each_with_object(memo.dup){|(k,v),h|
110
- h.merge!(k => summarizer(k).sum(h[k], v))
111
- }
112
- end
113
-
114
- # Terminates the summarization of a given tuple.
115
- def term(tuple)
116
- tuple.each_with_object({}){|(k,v),h|
117
- h.merge!(k => summarizer(k).term(v))
118
- }
119
- end
120
-
121
110
  def to_summarizer(x)
122
111
  case x
123
- when :same then Same.new
124
- when :group then DistinctList.new
112
+ when :same then Same::INSTANCE
113
+ when :group then DistinctList::INSTANCE
125
114
  else
126
115
  x
127
116
  end
128
117
  end
129
118
 
119
+ class Check
120
+ def initialize(sums)
121
+ @sums = sums
122
+ end
123
+ attr_reader :sums
124
+
125
+ def summarizer(k)
126
+ @sums[k] ||= Same::INSTANCE
127
+ end
128
+
129
+ def init(tuple)
130
+ tuple.each_with_object({}){|(k,v),h|
131
+ h.merge!(k => summarizer(k).init(v))
132
+ }
133
+ end
134
+
135
+ def sum(memo, tuple)
136
+ tuple.each_with_object(memo.dup){|(k,v),h|
137
+ h.merge!(k => summarizer(k).sum(h[k], v))
138
+ }
139
+ end
140
+
141
+ def term(tuple)
142
+ tuple.each_with_object({}){|(k,v),h|
143
+ h.merge!(k => summarizer(k).term(v))
144
+ }
145
+ end
146
+ end # class Check
147
+
148
+ class Trust
149
+ def initialize(sums)
150
+ @sums = sums
151
+ end
152
+ attr_reader :sums
153
+
154
+ # Returns the initial tuple to use for a given determinant.
155
+ def init(tuple)
156
+ sums.each_with_object(tuple.dup){|(attribute,summarizer),new_tuple|
157
+ new_tuple[attribute] = summarizer.init(tuple[attribute])
158
+ }
159
+ end
160
+
161
+ # Sums `tuple` on `memo`, returning the new tuple to use as memo.
162
+ def sum(memo, tuple)
163
+ sums.each_with_object(memo.dup){|(attribute,summarizer),new_tuple|
164
+ new_tuple[attribute] = summarizer.sum(memo[attribute], tuple[attribute])
165
+ }
166
+ end
167
+
168
+ # Terminates the summarization of a given tuple.
169
+ def term(tuple)
170
+ sums.each_with_object(tuple.dup){|(attribute,summarizer),new_tuple|
171
+ new_tuple[attribute] = summarizer.term(tuple[attribute])
172
+ }
173
+ end
174
+ end # class Trust
175
+
130
176
  #
131
177
  # Summarizes by enforcing that the same dependent is observed for a given
132
178
  # determinant, returning the dependent as summarization.
@@ -138,7 +184,7 @@ module Bmg
138
184
  end
139
185
 
140
186
  def sum(v1, v2)
141
- raise "Same values expected, got `#{v1}` vs. `#{v2}`" unless v1 == v2
187
+ raise TypeError, "Same values expected, got `#{v1}` vs. `#{v2}`" unless v1 == v2
142
188
  v1
143
189
  end
144
190
 
@@ -151,6 +197,7 @@ module Bmg
151
197
  end
152
198
  alias :to_s :inspect
153
199
 
200
+ INSTANCE = new
154
201
  end # class Same
155
202
 
156
203
  #
@@ -183,6 +230,7 @@ module Bmg
183
230
  end
184
231
  alias :to_s :inspect
185
232
 
233
+ INSTANCE = new
186
234
  end # class DistinctList
187
235
 
188
236
  #
@@ -26,6 +26,12 @@ module Bmg
26
26
  :split => "_"
27
27
  }
28
28
 
29
+ class << self
30
+ def separator(options)
31
+ options[:split] || DEFAULT_OPTIONS[:split]
32
+ end
33
+ end
34
+
29
35
  def initialize(type, operand, options = {})
30
36
  @type = type
31
37
  @operand = operand
@@ -32,7 +32,8 @@ module Bmg
32
32
  def each(&bl)
33
33
  return to_enum unless block_given?
34
34
  page_size = options[:page_size]
35
- @operand.to_a
35
+ @operand
36
+ .to_a
36
37
  .sort(&comparator)
37
38
  .drop(page_size * (page_index-1))
38
39
  .take(page_size)
@@ -46,7 +47,7 @@ module Bmg
46
47
  protected ### inspect
47
48
 
48
49
  def comparator
49
- Ordering.new(@ordering).comparator
50
+ @comparator ||= Ordering.new(@ordering).comparator
50
51
  end
51
52
 
52
53
  def args
@@ -10,20 +10,18 @@ module Bmg
10
10
  }
11
11
 
12
12
  def initialize(type, path_or_io, options = {})
13
- @type = type
13
+ require 'csv'
14
+
14
15
  @path_or_io = path_or_io
15
- @options = DEFAULT_OPTIONS.merge(options)
16
- if @options[:smart] && !@path_or_io.is_a?(IO)
17
- @options[:col_sep] ||= infer_col_sep
18
- @options[:quote_char] ||= infer_quote_char
19
- end
16
+ @options = handle_options(options)
17
+ @type = handle_type(type)
20
18
  end
21
19
 
22
20
  def each
23
21
  return to_enum unless block_given?
24
- require 'csv'
25
- with_io do |io|
26
- ::CSV.new(io, **csv_options).each do |row|
22
+
23
+ with_csv do |csv|
24
+ csv.each do |row|
27
25
  yield tuple(row)
28
26
  end
29
27
  end
@@ -44,33 +42,70 @@ module Bmg
44
42
  row.to_hash.each_with_object({}){|(k,v),h| h[k.to_sym] = v }
45
43
  end
46
44
 
45
+ def handle_type(type)
46
+ return type if type.knows_attrlist?
47
+
48
+ type.with_attrlist(infer_attrlist)
49
+ end
50
+
51
+ def infer_attrlist
52
+ with_csv do |csv|
53
+ csv.each do |row|
54
+ return tuple(row).keys
55
+ end
56
+ end
57
+ end
58
+
59
+ def handle_options(options)
60
+ options = DEFAULT_OPTIONS.merge(options)
61
+ if options[:smart] && !@path_or_io.is_a?(IO)
62
+ options[:col_sep] ||= infer_col_sep
63
+ options[:quote_char] ||= infer_quote_char(options[:col_sep])
64
+ end
65
+ options
66
+ end
67
+
47
68
  def infer_col_sep
48
69
  sniff(text_portion, [",","\t",";"], ",")
49
70
  end
50
71
 
51
- def infer_quote_char
52
- sniff(text_portion, ["'","\""], "\"")
72
+ def infer_quote_char(col_sep)
73
+ sniff(text_portion, ['"',"'"], '"'){|quote|
74
+ /#{quote}#{col_sep}#{quote}|^#{quote}|#{quote}$/
75
+ }
53
76
  end
54
77
 
55
78
  def text_portion
56
- @text_portion ||= with_io{|io| io.readlines(10).join("\n") }
79
+ @text_portion ||= with_io{|io| io.readlines(50).join("\n") }
57
80
  end
58
81
 
59
82
  def with_io(&bl)
60
83
  case @path_or_io
61
84
  when IO, StringIO
85
+ @path_or_io.rewind if @path_or_io.respond_to?(:rewind)
62
86
  bl.call(@path_or_io)
63
87
  else
64
88
  File.open(@path_or_io, "r", &bl)
65
89
  end
66
90
  end
67
91
 
92
+ def with_csv(&bl)
93
+ with_io do |io|
94
+ yield ::CSV.new(io, **csv_options)
95
+ end
96
+ end
97
+
68
98
  # Finds the best candidate among `candidates` for a separator
69
99
  # found in `str`. If none is found, returns `default`.
70
- def sniff(str, candidates, default)
100
+ def sniff(str, candidates, default, &bl)
71
101
  snif = {}
72
102
  candidates.each {|delim|
73
- snif[delim] = str.count(delim)
103
+ counter = bl ? bl.call(delim) : delim
104
+ snif[delim] = if counter.is_a?(Regexp)
105
+ str.scan(counter).length
106
+ else
107
+ str.count(counter)
108
+ end
74
109
  }
75
110
  snif = snif.sort {|a,b| b[1] <=> a[1] }
76
111
  snif.size > 0 ? snif[0][0] : default
@@ -10,32 +10,27 @@ module Bmg
10
10
  }
11
11
 
12
12
  def initialize(type, path, options = {})
13
- @type = type
13
+ require 'roo'
14
14
  @path = path
15
15
  @options = DEFAULT_OPTIONS.merge(options)
16
+ @type = type.knows_attrlist? ? type : type.with_attrlist(infer_attrlist)
16
17
  end
17
18
 
18
19
  def each
19
20
  return to_enum unless block_given?
20
- require 'roo'
21
- xlsx = Roo::Spreadsheet.open(@path, @options)
22
- headers = nil
23
- xlsx.sheet(@options[:sheet])
24
- .each
25
- .drop(@options[:skip])
26
- .each_with_index
27
- .each do |row, i|
28
- if i==0
29
- headers = row.map{|c| c.to_s.strip.to_sym }
30
- else
31
- init = init_tuple(i)
32
- tuple = (0...headers.size)
33
- .each_with_object(init){|i,t|
34
- t[headers[i]] = row[i]
35
- }
36
- yield(tuple)
37
- end
38
- end
21
+
22
+ headers = type.attrlist
23
+ headers = headers[1..-1] if generate_row_num?
24
+ start_at = @options[:skip] + 2
25
+ end_at = spreadsheet.last_row
26
+ (start_at..end_at).each do |i|
27
+ row = spreadsheet.row(i)
28
+ init = init_tuple(i - start_at + 1)
29
+ tuple = (0...headers.size).each_with_object(init){|i,t|
30
+ t[headers[i]] = row[i]
31
+ }
32
+ yield(tuple)
33
+ end
39
34
  end
40
35
 
41
36
  def to_ast
@@ -49,17 +44,37 @@ module Bmg
49
44
 
50
45
  private
51
46
 
52
- def init_tuple(i)
47
+ def spreadsheet
48
+ @spreadsheet ||= Roo::Spreadsheet
49
+ .open(@path, @options)
50
+ .sheet(@options[:sheet])
51
+ end
52
+
53
+ def infer_attrlist
54
+ row = spreadsheet.row(1+@options[:skip])
55
+ attrlist = row.map{|c| c.to_s.strip.to_sym }
56
+ attrlist.unshift(row_num_name) if generate_row_num?
57
+ attrlist
58
+ end
59
+
60
+ def generate_row_num?
61
+ !!@options[:row_num]
62
+ end
63
+
64
+ def row_num_name
53
65
  case as = @options[:row_num]
54
- when TrueClass
55
- { :row_num => i }
56
- when FalseClass
57
- {}
58
- when Symbol
59
- { :"#{as}" => i }
66
+ when TrueClass then :row_num
67
+ when Symbol then as
68
+ else nil
60
69
  end
61
70
  end
62
71
 
72
+ def init_tuple(i)
73
+ return {} unless generate_row_num?
74
+
75
+ { row_num_name => i }
76
+ end
77
+
63
78
  end # class Excel
64
79
  end # module Reader
65
- end # module Bmg
80
+ end # module Bmg
@@ -5,6 +5,7 @@ module Bmg
5
5
 
6
6
  def initialize(operand)
7
7
  @operand = operand
8
+ @materialized = nil
8
9
  end
9
10
 
10
11
  def type
@@ -19,14 +20,14 @@ module Bmg
19
20
  public
20
21
 
21
22
  def _count
22
- operand._count
23
+ _materialize._count
23
24
  end
24
25
 
25
26
  public
26
27
 
27
28
  def each(&bl)
28
- @operand = Relation::InMemory.new(operand.type, operand.to_a) unless @operand.is_a?(Relation::InMemory)
29
- @operand.each(&bl)
29
+ return to_enum unless block_given?
30
+ _materialize.each(&bl)
30
31
  end
31
32
 
32
33
  def to_ast
@@ -37,6 +38,14 @@ module Bmg
37
38
  []
38
39
  end
39
40
 
41
+ private
42
+
43
+ def _materialize
44
+ return @materialized if @materialized
45
+
46
+ @materialized = Relation::InMemory.new(operand.type, operand.to_a)
47
+ end
48
+
40
49
  end # class Materialized
41
50
  end # module Relation
42
51
  end # module Bmg
@@ -24,13 +24,27 @@ module Bmg
24
24
  protected :type=
25
25
 
26
26
  def each(&bl)
27
- spy.call(self) if bl
28
- operand.each(&bl)
27
+ return enum_for(:each) unless bl
28
+
29
+ if spy.respond_to?(:measure)
30
+ spy.measure(self) do
31
+ operand.each(&bl)
32
+ end
33
+ else
34
+ spy.call(self)
35
+ operand.each(&bl)
36
+ end
29
37
  end
30
38
 
31
39
  def count
32
- spy.call(self) if bl
33
- operand.count
40
+ if spy.respond_to?(:measure)
41
+ spy.measure(self) do
42
+ operand.count
43
+ end
44
+ else
45
+ spy.call(self)
46
+ operand.count
47
+ end
34
48
  end
35
49
 
36
50
  def to_ast
@@ -11,13 +11,26 @@ module Bmg
11
11
  end
12
12
 
13
13
  def comparator
14
- @comparator ||= ->(t1, t2) {
15
- attrs.each do |(attr,direction)|
16
- c = t1[attr] <=> t2[attr]
17
- return (direction == :desc ? -c : c) unless c==0
14
+ @comparator ||= ->(t1, t2) { compare_attrs(t1, t2) }
15
+ end
16
+
17
+ def compare_attrs(t1, t2)
18
+ attrs.each do |(attr,direction)|
19
+ a1, a2 = t1[attr], t2[attr]
20
+ if a1.nil? && a2.nil?
21
+ 0
22
+ elsif a1.nil?
23
+ return direction == :desc ? -1 : 1
24
+ elsif a2.nil?
25
+ return direction == :desc ? 1 : -1
26
+ elsif a1.respond_to?(:<=>)
27
+ c = a1 <=> a2
28
+ unless c.nil? || c==0
29
+ return direction == :desc ? -c : c
30
+ end
18
31
  end
19
- 0
20
- }
32
+ end
33
+ 0
21
34
  end
22
35
 
23
36
  end # class Ordering
data/lib/bmg/type.rb CHANGED
@@ -103,8 +103,15 @@ module Bmg
103
103
  }
104
104
  end
105
105
 
106
+ def identity_autowrap?(options)
107
+ return false unless knows_attrlist?
108
+
109
+ sep = Operator::Autowrap.separator(options)
110
+ self.attrlist.all?{|a| a.to_s.index(sep).nil? }
111
+ end
112
+
106
113
  def autowrap(options)
107
- sep = options[:split] || Operator::Autowrap::DEFAULT_OPTIONS[:split]
114
+ sep = Operator::Autowrap.separator(options)
108
115
  splitter = ->(a){ a.to_s.split(sep).first }
109
116
  is_split = ->(a){ a.to_s.split(sep).size > 1 }
110
117
  dup.tap{|x|
@@ -115,7 +122,7 @@ module Bmg
115
122
  }
116
123
  end
117
124
 
118
- def autosummarize(by, summarization)
125
+ def autosummarize(by, summarization, options)
119
126
  known_attributes!(by + summarization.keys) if typechecked? && knows_attrlist?
120
127
  dup.tap{|x|
121
128
  x.attrlist = nil
data/lib/bmg/version.rb CHANGED
@@ -2,7 +2,7 @@ module Bmg
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 18
5
- TINY = 11
5
+ TINY = 15
6
6
  end
7
7
  VERSION = "#{Version::MAJOR}.#{Version::MINOR}.#{Version::TINY}"
8
8
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bmg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.18.11
4
+ version: 0.18.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bernard Lambeau
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-12-11 00:00:00.000000000 Z
11
+ date: 2022-02-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: predicate