bmg 0.17.8 → 0.18.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +0 -3
  3. data/README.md +236 -57
  4. data/lib/bmg.rb +6 -0
  5. data/lib/bmg/algebra.rb +1 -0
  6. data/lib/bmg/algebra/shortcuts.rb +14 -0
  7. data/lib/bmg/operator/allbut.rb +27 -0
  8. data/lib/bmg/operator/autosummarize.rb +27 -4
  9. data/lib/bmg/operator/autowrap.rb +27 -0
  10. data/lib/bmg/operator/constants.rb +7 -0
  11. data/lib/bmg/operator/extend.rb +7 -0
  12. data/lib/bmg/operator/group.rb +1 -0
  13. data/lib/bmg/operator/image.rb +41 -2
  14. data/lib/bmg/operator/join.rb +1 -0
  15. data/lib/bmg/operator/matching.rb +1 -0
  16. data/lib/bmg/operator/not_matching.rb +1 -0
  17. data/lib/bmg/operator/page.rb +2 -7
  18. data/lib/bmg/operator/project.rb +3 -2
  19. data/lib/bmg/operator/rename.rb +12 -5
  20. data/lib/bmg/operator/restrict.rb +1 -0
  21. data/lib/bmg/operator/rxmatch.rb +1 -0
  22. data/lib/bmg/operator/summarize.rb +2 -17
  23. data/lib/bmg/operator/transform.rb +39 -1
  24. data/lib/bmg/operator/union.rb +1 -0
  25. data/lib/bmg/reader.rb +1 -0
  26. data/lib/bmg/reader/csv.rb +29 -10
  27. data/lib/bmg/reader/excel.rb +23 -4
  28. data/lib/bmg/reader/text_file.rb +56 -0
  29. data/lib/bmg/relation.rb +28 -0
  30. data/lib/bmg/relation/empty.rb +4 -0
  31. data/lib/bmg/relation/in_memory.rb +10 -1
  32. data/lib/bmg/relation/materialized.rb +6 -0
  33. data/lib/bmg/relation/spied.rb +6 -1
  34. data/lib/bmg/sequel/relation.rb +5 -0
  35. data/lib/bmg/sql/relation.rb +2 -3
  36. data/lib/bmg/summarizer.rb +29 -1
  37. data/lib/bmg/summarizer/avg.rb +3 -3
  38. data/lib/bmg/summarizer/by_proc.rb +41 -0
  39. data/lib/bmg/summarizer/distinct.rb +36 -0
  40. data/lib/bmg/summarizer/multiple.rb +46 -0
  41. data/lib/bmg/summarizer/percentile.rb +79 -0
  42. data/lib/bmg/support.rb +1 -0
  43. data/lib/bmg/support/ordering.rb +20 -0
  44. data/lib/bmg/support/tuple_algebra.rb +6 -0
  45. data/lib/bmg/support/tuple_transformer.rb +14 -6
  46. data/lib/bmg/version.rb +2 -2
  47. data/lib/bmg/writer.rb +16 -0
  48. data/lib/bmg/writer/csv.rb +0 -11
  49. data/lib/bmg/writer/xlsx.rb +68 -0
  50. data/tasks/test.rake +9 -2
  51. metadata +36 -15
@@ -0,0 +1,36 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # Collect the distinct values as an array.
5
+ #
6
+ # Example:
7
+ #
8
+ # # direct ruby usage
9
+ # Bmg::Summarizer.distinct(:qty).summarize(...)
10
+ #
11
+ class Distinct < Summarizer
12
+
13
+ # Returns [] as least value.
14
+ def least()
15
+ {}
16
+ end
17
+
18
+ # Adds val to the memo array
19
+ def _happens(memo, val)
20
+ memo[val] = true
21
+ memo
22
+ end
23
+
24
+ def finalize(memo)
25
+ memo.keys
26
+ end
27
+
28
+ end # class Distinct
29
+
30
+ # Factors a distinct summarizer
31
+ def self.distinct(*args, &bl)
32
+ Distinct.new(*args, &bl)
33
+ end
34
+
35
+ end # class Summarizer
36
+ end # module Bmg
@@ -0,0 +1,46 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # A summarizer that collects multiple summarization as a wrapped
5
+ # tuple.
6
+ #
7
+ # Example:
8
+ #
9
+ # # direct ruby usage
10
+ # Bmg::Summarizer.multiple(x: ..., y: ...).summarize(...)
11
+ #
12
+ class Multiple < Summarizer
13
+
14
+ def initialize(defs)
15
+ @summarization = Summarizer.summarization(defs)
16
+ end
17
+
18
+ # Returns [] as least value.
19
+ def least()
20
+ @summarization.each_pair.each_with_object({}){|(k,v),memo|
21
+ memo[k] = v.least
22
+ }
23
+ end
24
+
25
+ # Adds val to the memo array
26
+ def happens(memo, val)
27
+ @summarization.each_pair.each_with_object({}){|(k,v),memo2|
28
+ memo2[k] = v.happens(memo[k], val)
29
+ }
30
+ end
31
+
32
+ def finalize(memo)
33
+ @summarization.each_pair.each_with_object({}){|(k,v),memo2|
34
+ memo2[k] = v.finalize(memo[k])
35
+ }
36
+ end
37
+
38
+ end # class Multiple
39
+
40
+ # Factors a distinct summarizer
41
+ def self.multiple(defs)
42
+ Multiple.new(defs)
43
+ end
44
+
45
+ end # class Summarizer
46
+ end # module Bmg
@@ -0,0 +1,79 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # Percentile summarizer.
5
+ #
6
+ # Example:
7
+ #
8
+ # # direct ruby usage
9
+ # Bmg::Summarizer.percentile(:qty, 50).summarize(...)
10
+ #
11
+ class Percentile < Summarizer
12
+
13
+ DEFAULT_OPTIONS = {
14
+ :variant => :continuous
15
+ }
16
+
17
+ def initialize(*args, &bl)
18
+ @nth = args.find{|a| a.is_a?(Integer) } || 50
19
+ functor = args.find{|a| a.is_a?(Symbol) } || bl
20
+ options = args.select{|a| a.is_a?(Hash) }.inject(DEFAULT_OPTIONS){|memo,opts|
21
+ memo.merge(opts)
22
+ }.dup
23
+ super(functor, options)
24
+ end
25
+
26
+ # Returns [] as least value.
27
+ def least()
28
+ []
29
+ end
30
+
31
+ # Collects the value
32
+ def _happens(memo, val)
33
+ memo << val
34
+ end
35
+
36
+ # Finalizes the computation.
37
+ def finalize(memo)
38
+ return nil if memo.empty?
39
+ index = memo.size.to_f * (@nth.to_f / 100.0)
40
+ floor, ceil = index.floor, index.ceil
41
+ ceil +=1 if floor == ceil
42
+ below = [floor - 1, 0].max
43
+ above = [[ceil - 1, memo.size - 1].min, 0].max
44
+ sorted = memo.sort
45
+ if options[:variant] == :continuous
46
+ (sorted[above] + sorted[below]) / 2.0
47
+ else
48
+ sorted[below]
49
+ end
50
+ end
51
+
52
+ end # class Avg
53
+
54
+ def self.percentile(*args, &bl)
55
+ Percentile.new(*args, &bl)
56
+ end
57
+
58
+ def self.percentile_cont(*args, &bl)
59
+ Percentile.new(*(args + [{:variant => :continuous}]), &bl)
60
+ end
61
+
62
+ def self.percentile_disc(*args, &bl)
63
+ Percentile.new(*(args + [{:variant => :discrete}]), &bl)
64
+ end
65
+
66
+ def self.median(*args, &bl)
67
+ Percentile.new(*(args + [50]), &bl)
68
+ end
69
+
70
+ def self.median_cont(*args, &bl)
71
+ Percentile.new(*(args + [50, {:variant => :continuous}]), &bl)
72
+ end
73
+
74
+ def self.median_disc(*args, &bl)
75
+ Percentile.new(*(args + [50, {:variant => :discrete}]), &bl)
76
+ end
77
+
78
+ end # class Summarizer
79
+ end # module Bmg
data/lib/bmg/support.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require_relative 'support/tuple_algebra'
2
2
  require_relative 'support/tuple_transformer'
3
3
  require_relative 'support/keys'
4
+ require_relative 'support/ordering'
4
5
  require_relative 'support/output_preferences'
@@ -0,0 +1,20 @@
1
+ module Bmg
2
+ class Ordering
3
+
4
+ def initialize(attrs)
5
+ @attrs = attrs
6
+ end
7
+ attr_reader :attrs
8
+
9
+ def comparator
10
+ ->(t1, t2) {
11
+ attrs.each do |(attr,direction)|
12
+ c = t1[attr] <=> t2[attr]
13
+ return (direction == :desc ? -c : c) unless c==0
14
+ end
15
+ 0
16
+ }
17
+ end
18
+
19
+ end # class Ordering
20
+ end # module Bmg
@@ -19,5 +19,11 @@ module Bmg
19
19
  end
20
20
  module_function :rename
21
21
 
22
+ def symbolize_keys(h)
23
+ return h if h.empty?
24
+ h.each_with_object({}){|(k,v),h| h[k.to_sym] = v }
25
+ end
26
+ module_function :symbolize_keys
27
+
22
28
  end # module TupleAlgebra
23
29
  end # module Bmg
@@ -26,17 +26,22 @@ module Bmg
26
26
 
27
27
  def transform_tuple(tuple, with)
28
28
  case with
29
- when Symbol
30
- tuple.each_with_object({}){|(k,v),dup|
31
- dup[k] = transform_attr(v, with)
32
- }
33
- when Proc
29
+ when Symbol, Proc, Regexp
34
30
  tuple.each_with_object({}){|(k,v),dup|
35
31
  dup[k] = transform_attr(v, with)
36
32
  }
37
33
  when Hash
38
34
  with.each_with_object(tuple.dup){|(k,v),dup|
39
- dup[k] = transform_attr(dup[k], v)
35
+ case k
36
+ when Symbol
37
+ dup[k] = transform_attr(dup[k], v)
38
+ when Class
39
+ dup.keys.each do |attrname|
40
+ dup[attrname] = transform_attr(dup[attrname], v) if dup[attrname].is_a?(k)
41
+ end
42
+ else
43
+ raise ArgumentError, "Unexpected transformation `#{with.inspect}`"
44
+ end
40
45
  }
41
46
  when Array
42
47
  with.inject(tuple){|dup,on|
@@ -51,6 +56,9 @@ module Bmg
51
56
  case with
52
57
  when Symbol
53
58
  value.send(with)
59
+ when Regexp
60
+ m = with.match(value.to_s)
61
+ m.nil? ? m : m.to_s
54
62
  when Proc
55
63
  with.call(value)
56
64
  when Hash
data/lib/bmg/version.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  module Bmg
2
2
  module Version
3
3
  MAJOR = 0
4
- MINOR = 17
5
- TINY = 8
4
+ MINOR = 18
5
+ TINY = 4
6
6
  end
7
7
  VERSION = "#{Version::MAJOR}.#{Version::MINOR}.#{Version::TINY}"
8
8
  end
data/lib/bmg/writer.rb CHANGED
@@ -1 +1,17 @@
1
+ module Bmg
2
+ module Writer
3
+
4
+ protected
5
+
6
+ def infer_headers(from)
7
+ attrlist = if from.is_a?(Type) && from.knows_attrlist?
8
+ from.to_attrlist
9
+ elsif from.is_a?(Hash)
10
+ from.keys
11
+ end
12
+ attrlist ? output_preferences.order_attrlist(attrlist) : nil
13
+ end
14
+
15
+ end # module Writer
16
+ end # module Bmg
1
17
  require_relative 'writer/csv'
@@ -26,17 +26,6 @@ module Bmg
26
26
  to_s ? string_or_io.string : string_or_io
27
27
  end
28
28
 
29
- private
30
-
31
- def infer_headers(from)
32
- attrlist = if from.is_a?(Type) && from.knows_attrlist?
33
- from.to_attrlist
34
- elsif from.is_a?(Hash)
35
- from.keys
36
- end
37
- attrlist ? output_preferences.order_attrlist(attrlist) : nil
38
- end
39
-
40
29
  end # class Csv
41
30
  end # module Writer
42
31
  end # module Bmg
@@ -0,0 +1,68 @@
1
+ module Bmg
2
+ module Writer
3
+ class Xlsx
4
+ include Writer
5
+
6
+ DEFAULT_OPTIONS = {
7
+ }
8
+
9
+ def initialize(csv_options, output_preferences = nil)
10
+ @csv_options = DEFAULT_OPTIONS.merge(csv_options)
11
+ @output_preferences = OutputPreferences.dress(output_preferences)
12
+ end
13
+ attr_reader :csv_options, :output_preferences
14
+
15
+ def call(relation, path)
16
+ require 'write_xlsx'
17
+ dup._call(relation, path)
18
+ end
19
+
20
+ protected
21
+ attr_reader :workbook, :worksheet
22
+
23
+ def _call(relation, path)
24
+ @workbook = WriteXLSX.new(path)
25
+ @worksheet = workbook.add_worksheet
26
+
27
+ headers = infer_headers(relation.type)
28
+ relation.each_with_index do |tuple,i|
29
+ headers = infer_headers(tuple) if headers.nil?
30
+ headers.each_with_index do |h,i|
31
+ worksheet.write_string(0, i, h)
32
+ end if i == 0
33
+ headers.each_with_index do |h,j|
34
+ meth, *args = write_pair(tuple[h])
35
+ worksheet.send(meth, 1+i, j, *args)
36
+ end
37
+ end
38
+
39
+ workbook.close
40
+ path
41
+ end
42
+
43
+ def write_pair(value)
44
+ case value
45
+ when Numeric
46
+ [:write_number, value]
47
+ when Date
48
+ [:write_date_time, value, date_format]
49
+ else
50
+ [:write_string, value.to_s]
51
+ end
52
+ end
53
+
54
+ def date_format
55
+ @date_format ||= workbook.add_format(:num_format => 'yyyy-mm-dd')
56
+ end
57
+
58
+ end # class Xlsx
59
+ end # module Writer
60
+ module Relation
61
+
62
+ def to_xlsx(options = {}, path = nil, preferences = nil)
63
+ options, path = {}, options unless options.is_a?(Hash)
64
+ Writer::Xlsx.new(options, preferences).call(self, path)
65
+ end
66
+
67
+ end # module Relation
68
+ end # module Bmg
data/tasks/test.rake CHANGED
@@ -6,17 +6,24 @@ namespace :test do
6
6
  desc "Runs unit tests"
7
7
  RSpec::Core::RakeTask.new(:unit) do |t|
8
8
  t.pattern = "spec/unit/**/test_*.rb"
9
- t.rspec_opts = ["-Ilib", "-Ispec/unit", "--fail-fast", "--color", "--backtrace", "--format=progress"]
9
+ t.rspec_opts = ["-Ilib", "-Ispec/unit", "--color", "--backtrace", "--format=progress"]
10
10
  end
11
11
  tests << :unit
12
12
 
13
13
  desc "Runs integration tests"
14
14
  RSpec::Core::RakeTask.new(:integration) do |t|
15
15
  t.pattern = "spec/integration/**/test_*.rb"
16
- t.rspec_opts = ["-Ilib", "-Ispec/integration", "--fail-fast", "--color", "--backtrace", "--format=progress"]
16
+ t.rspec_opts = ["-Ilib", "-Ispec/integration", "--color", "--backtrace", "--format=progress"]
17
17
  end
18
18
  tests << :integration
19
19
 
20
+ desc "Runs github regression tests"
21
+ RSpec::Core::RakeTask.new(:regression) do |t|
22
+ t.pattern = "spec/regression/**/test_*.rb"
23
+ t.rspec_opts = ["-Ilib", "-Ispec/regression", "--color", "--backtrace", "--format=progress"]
24
+ end
25
+ tests << :regression
26
+
20
27
  task :all => tests
21
28
  end
22
29
 
metadata CHANGED
@@ -1,49 +1,49 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bmg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.8
4
+ version: 0.18.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bernard Lambeau
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-10 00:00:00.000000000 Z
11
+ date: 2021-05-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: predicate
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '2.4'
20
17
  - - ">="
21
18
  - !ruby/object:Gem::Version
22
- version: 2.4.0
19
+ version: 2.5.0
20
+ - - "~>"
21
+ - !ruby/object:Gem::Version
22
+ version: '2.5'
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
- - - "~>"
28
- - !ruby/object:Gem::Version
29
- version: '2.4'
30
27
  - - ">="
31
28
  - !ruby/object:Gem::Version
32
- version: 2.4.0
29
+ version: 2.5.0
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '2.5'
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: path
35
35
  requirement: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - ">="
38
38
  - !ruby/object:Gem::Version
39
- version: '1.3'
39
+ version: '2.0'
40
40
  type: :runtime
41
41
  prerelease: false
42
42
  version_requirements: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - ">="
45
45
  - !ruby/object:Gem::Version
46
- version: '1.3'
46
+ version: '2.0'
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rake
49
49
  requirement: !ruby/object:Gem::Requirement
@@ -78,14 +78,28 @@ dependencies:
78
78
  requirements:
79
79
  - - ">="
80
80
  - !ruby/object:Gem::Version
81
- version: '2.7'
81
+ version: '2.8'
82
82
  type: :development
83
83
  prerelease: false
84
84
  version_requirements: !ruby/object:Gem::Requirement
85
85
  requirements:
86
86
  - - ">="
87
87
  - !ruby/object:Gem::Version
88
- version: '2.7'
88
+ version: '2.8'
89
+ - !ruby/object:Gem::Dependency
90
+ name: write_xlsx
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '1.0'
96
+ type: :development
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: '1.0'
89
103
  - !ruby/object:Gem::Dependency
90
104
  name: sequel
91
105
  requirement: !ruby/object:Gem::Requirement
@@ -154,6 +168,7 @@ files:
154
168
  - lib/bmg/reader.rb
155
169
  - lib/bmg/reader/csv.rb
156
170
  - lib/bmg/reader/excel.rb
171
+ - lib/bmg/reader/text_file.rb
157
172
  - lib/bmg/relation.rb
158
173
  - lib/bmg/relation/empty.rb
159
174
  - lib/bmg/relation/in_memory.rb
@@ -250,16 +265,21 @@ files:
250
265
  - lib/bmg/sql/version.rb
251
266
  - lib/bmg/summarizer.rb
252
267
  - lib/bmg/summarizer/avg.rb
268
+ - lib/bmg/summarizer/by_proc.rb
253
269
  - lib/bmg/summarizer/collect.rb
254
270
  - lib/bmg/summarizer/concat.rb
255
271
  - lib/bmg/summarizer/count.rb
272
+ - lib/bmg/summarizer/distinct.rb
256
273
  - lib/bmg/summarizer/max.rb
257
274
  - lib/bmg/summarizer/min.rb
275
+ - lib/bmg/summarizer/multiple.rb
276
+ - lib/bmg/summarizer/percentile.rb
258
277
  - lib/bmg/summarizer/stddev.rb
259
278
  - lib/bmg/summarizer/sum.rb
260
279
  - lib/bmg/summarizer/variance.rb
261
280
  - lib/bmg/support.rb
262
281
  - lib/bmg/support/keys.rb
282
+ - lib/bmg/support/ordering.rb
263
283
  - lib/bmg/support/output_preferences.rb
264
284
  - lib/bmg/support/tuple_algebra.rb
265
285
  - lib/bmg/support/tuple_transformer.rb
@@ -267,6 +287,7 @@ files:
267
287
  - lib/bmg/version.rb
268
288
  - lib/bmg/writer.rb
269
289
  - lib/bmg/writer/csv.rb
290
+ - lib/bmg/writer/xlsx.rb
270
291
  - tasks/gem.rake
271
292
  - tasks/test.rake
272
293
  homepage: http://github.com/enspirit/bmg
@@ -288,7 +309,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
288
309
  - !ruby/object:Gem::Version
289
310
  version: '0'
290
311
  requirements: []
291
- rubygems_version: 3.1.2
312
+ rubygems_version: 3.0.8
292
313
  signing_key:
293
314
  specification_version: 4
294
315
  summary: Bmg is Alf's successor.