bmg 0.17.8 → 0.18.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +0 -3
  3. data/README.md +236 -57
  4. data/lib/bmg.rb +6 -0
  5. data/lib/bmg/algebra.rb +1 -0
  6. data/lib/bmg/algebra/shortcuts.rb +14 -0
  7. data/lib/bmg/operator/allbut.rb +27 -0
  8. data/lib/bmg/operator/autosummarize.rb +27 -4
  9. data/lib/bmg/operator/autowrap.rb +27 -0
  10. data/lib/bmg/operator/constants.rb +7 -0
  11. data/lib/bmg/operator/extend.rb +7 -0
  12. data/lib/bmg/operator/group.rb +1 -0
  13. data/lib/bmg/operator/image.rb +41 -2
  14. data/lib/bmg/operator/join.rb +1 -0
  15. data/lib/bmg/operator/matching.rb +1 -0
  16. data/lib/bmg/operator/not_matching.rb +1 -0
  17. data/lib/bmg/operator/page.rb +2 -7
  18. data/lib/bmg/operator/project.rb +3 -2
  19. data/lib/bmg/operator/rename.rb +12 -5
  20. data/lib/bmg/operator/restrict.rb +1 -0
  21. data/lib/bmg/operator/rxmatch.rb +1 -0
  22. data/lib/bmg/operator/summarize.rb +2 -17
  23. data/lib/bmg/operator/transform.rb +39 -1
  24. data/lib/bmg/operator/union.rb +1 -0
  25. data/lib/bmg/reader.rb +1 -0
  26. data/lib/bmg/reader/csv.rb +29 -10
  27. data/lib/bmg/reader/excel.rb +23 -4
  28. data/lib/bmg/reader/text_file.rb +56 -0
  29. data/lib/bmg/relation.rb +28 -0
  30. data/lib/bmg/relation/empty.rb +4 -0
  31. data/lib/bmg/relation/in_memory.rb +10 -1
  32. data/lib/bmg/relation/materialized.rb +6 -0
  33. data/lib/bmg/relation/spied.rb +6 -1
  34. data/lib/bmg/sequel/relation.rb +5 -0
  35. data/lib/bmg/sql/relation.rb +2 -3
  36. data/lib/bmg/summarizer.rb +29 -1
  37. data/lib/bmg/summarizer/avg.rb +3 -3
  38. data/lib/bmg/summarizer/by_proc.rb +41 -0
  39. data/lib/bmg/summarizer/distinct.rb +36 -0
  40. data/lib/bmg/summarizer/multiple.rb +46 -0
  41. data/lib/bmg/summarizer/percentile.rb +79 -0
  42. data/lib/bmg/support.rb +1 -0
  43. data/lib/bmg/support/ordering.rb +20 -0
  44. data/lib/bmg/support/tuple_algebra.rb +6 -0
  45. data/lib/bmg/support/tuple_transformer.rb +14 -6
  46. data/lib/bmg/version.rb +2 -2
  47. data/lib/bmg/writer.rb +16 -0
  48. data/lib/bmg/writer/csv.rb +0 -11
  49. data/lib/bmg/writer/xlsx.rb +68 -0
  50. data/tasks/test.rake +9 -2
  51. metadata +36 -15
@@ -0,0 +1,36 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # Collect the distinct values as an array.
5
+ #
6
+ # Example:
7
+ #
8
+ # # direct ruby usage
9
+ # Bmg::Summarizer.distinct(:qty).summarize(...)
10
+ #
11
+ class Distinct < Summarizer
12
+
13
+ # Returns [] as least value.
14
+ def least()
15
+ {}
16
+ end
17
+
18
+ # Adds val to the memo array
19
+ def _happens(memo, val)
20
+ memo[val] = true
21
+ memo
22
+ end
23
+
24
+ def finalize(memo)
25
+ memo.keys
26
+ end
27
+
28
+ end # class Distinct
29
+
30
+ # Factors a distinct summarizer
31
+ def self.distinct(*args, &bl)
32
+ Distinct.new(*args, &bl)
33
+ end
34
+
35
+ end # class Summarizer
36
+ end # module Bmg
@@ -0,0 +1,46 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # A summarizer that collects multiple summarization as a wrapped
5
+ # tuple.
6
+ #
7
+ # Example:
8
+ #
9
+ # # direct ruby usage
10
+ # Bmg::Summarizer.multiple(x: ..., y: ...).summarize(...)
11
+ #
12
+ class Multiple < Summarizer
13
+
14
+ def initialize(defs)
15
+ @summarization = Summarizer.summarization(defs)
16
+ end
17
+
18
+ # Returns [] as least value.
19
+ def least()
20
+ @summarization.each_pair.each_with_object({}){|(k,v),memo|
21
+ memo[k] = v.least
22
+ }
23
+ end
24
+
25
+ # Adds val to the memo array
26
+ def happens(memo, val)
27
+ @summarization.each_pair.each_with_object({}){|(k,v),memo2|
28
+ memo2[k] = v.happens(memo[k], val)
29
+ }
30
+ end
31
+
32
+ def finalize(memo)
33
+ @summarization.each_pair.each_with_object({}){|(k,v),memo2|
34
+ memo2[k] = v.finalize(memo[k])
35
+ }
36
+ end
37
+
38
+ end # class Multiple
39
+
40
+ # Factors a distinct summarizer
41
+ def self.multiple(defs)
42
+ Multiple.new(defs)
43
+ end
44
+
45
+ end # class Summarizer
46
+ end # module Bmg
@@ -0,0 +1,79 @@
1
+ module Bmg
2
+ class Summarizer
3
+ #
4
+ # Percentile summarizer.
5
+ #
6
+ # Example:
7
+ #
8
+ # # direct ruby usage
9
+ # Bmg::Summarizer.percentile(:qty, 50).summarize(...)
10
+ #
11
+ class Percentile < Summarizer
12
+
13
+ DEFAULT_OPTIONS = {
14
+ :variant => :continuous
15
+ }
16
+
17
+ def initialize(*args, &bl)
18
+ @nth = args.find{|a| a.is_a?(Integer) } || 50
19
+ functor = args.find{|a| a.is_a?(Symbol) } || bl
20
+ options = args.select{|a| a.is_a?(Hash) }.inject(DEFAULT_OPTIONS){|memo,opts|
21
+ memo.merge(opts)
22
+ }.dup
23
+ super(functor, options)
24
+ end
25
+
26
+ # Returns [] as least value.
27
+ def least()
28
+ []
29
+ end
30
+
31
+ # Collects the value
32
+ def _happens(memo, val)
33
+ memo << val
34
+ end
35
+
36
+ # Finalizes the computation.
37
+ def finalize(memo)
38
+ return nil if memo.empty?
39
+ index = memo.size.to_f * (@nth.to_f / 100.0)
40
+ floor, ceil = index.floor, index.ceil
41
+ ceil +=1 if floor == ceil
42
+ below = [floor - 1, 0].max
43
+ above = [[ceil - 1, memo.size - 1].min, 0].max
44
+ sorted = memo.sort
45
+ if options[:variant] == :continuous
46
+ (sorted[above] + sorted[below]) / 2.0
47
+ else
48
+ sorted[below]
49
+ end
50
+ end
51
+
52
+ end # class Avg
53
+
54
+ def self.percentile(*args, &bl)
55
+ Percentile.new(*args, &bl)
56
+ end
57
+
58
+ def self.percentile_cont(*args, &bl)
59
+ Percentile.new(*(args + [{:variant => :continuous}]), &bl)
60
+ end
61
+
62
+ def self.percentile_disc(*args, &bl)
63
+ Percentile.new(*(args + [{:variant => :discrete}]), &bl)
64
+ end
65
+
66
+ def self.median(*args, &bl)
67
+ Percentile.new(*(args + [50]), &bl)
68
+ end
69
+
70
+ def self.median_cont(*args, &bl)
71
+ Percentile.new(*(args + [50, {:variant => :continuous}]), &bl)
72
+ end
73
+
74
+ def self.median_disc(*args, &bl)
75
+ Percentile.new(*(args + [50, {:variant => :discrete}]), &bl)
76
+ end
77
+
78
+ end # class Summarizer
79
+ end # module Bmg
data/lib/bmg/support.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require_relative 'support/tuple_algebra'
2
2
  require_relative 'support/tuple_transformer'
3
3
  require_relative 'support/keys'
4
+ require_relative 'support/ordering'
4
5
  require_relative 'support/output_preferences'
@@ -0,0 +1,20 @@
1
+ module Bmg
2
+ class Ordering
3
+
4
+ def initialize(attrs)
5
+ @attrs = attrs
6
+ end
7
+ attr_reader :attrs
8
+
9
+ def comparator
10
+ ->(t1, t2) {
11
+ attrs.each do |(attr,direction)|
12
+ c = t1[attr] <=> t2[attr]
13
+ return (direction == :desc ? -c : c) unless c==0
14
+ end
15
+ 0
16
+ }
17
+ end
18
+
19
+ end # class Ordering
20
+ end # module Bmg
@@ -19,5 +19,11 @@ module Bmg
19
19
  end
20
20
  module_function :rename
21
21
 
22
+ def symbolize_keys(h)
23
+ return h if h.empty?
24
+ h.each_with_object({}){|(k,v),h| h[k.to_sym] = v }
25
+ end
26
+ module_function :symbolize_keys
27
+
22
28
  end # module TupleAlgebra
23
29
  end # module Bmg
@@ -26,17 +26,22 @@ module Bmg
26
26
 
27
27
  def transform_tuple(tuple, with)
28
28
  case with
29
- when Symbol
30
- tuple.each_with_object({}){|(k,v),dup|
31
- dup[k] = transform_attr(v, with)
32
- }
33
- when Proc
29
+ when Symbol, Proc, Regexp
34
30
  tuple.each_with_object({}){|(k,v),dup|
35
31
  dup[k] = transform_attr(v, with)
36
32
  }
37
33
  when Hash
38
34
  with.each_with_object(tuple.dup){|(k,v),dup|
39
- dup[k] = transform_attr(dup[k], v)
35
+ case k
36
+ when Symbol
37
+ dup[k] = transform_attr(dup[k], v)
38
+ when Class
39
+ dup.keys.each do |attrname|
40
+ dup[attrname] = transform_attr(dup[attrname], v) if dup[attrname].is_a?(k)
41
+ end
42
+ else
43
+ raise ArgumentError, "Unexpected transformation `#{with.inspect}`"
44
+ end
40
45
  }
41
46
  when Array
42
47
  with.inject(tuple){|dup,on|
@@ -51,6 +56,9 @@ module Bmg
51
56
  case with
52
57
  when Symbol
53
58
  value.send(with)
59
+ when Regexp
60
+ m = with.match(value.to_s)
61
+ m.nil? ? m : m.to_s
54
62
  when Proc
55
63
  with.call(value)
56
64
  when Hash
data/lib/bmg/version.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  module Bmg
2
2
  module Version
3
3
  MAJOR = 0
4
- MINOR = 17
5
- TINY = 8
4
+ MINOR = 18
5
+ TINY = 4
6
6
  end
7
7
  VERSION = "#{Version::MAJOR}.#{Version::MINOR}.#{Version::TINY}"
8
8
  end
data/lib/bmg/writer.rb CHANGED
@@ -1 +1,17 @@
1
+ module Bmg
2
+ module Writer
3
+
4
+ protected
5
+
6
+ def infer_headers(from)
7
+ attrlist = if from.is_a?(Type) && from.knows_attrlist?
8
+ from.to_attrlist
9
+ elsif from.is_a?(Hash)
10
+ from.keys
11
+ end
12
+ attrlist ? output_preferences.order_attrlist(attrlist) : nil
13
+ end
14
+
15
+ end # module Writer
16
+ end # module Bmg
1
17
  require_relative 'writer/csv'
@@ -26,17 +26,6 @@ module Bmg
26
26
  to_s ? string_or_io.string : string_or_io
27
27
  end
28
28
 
29
- private
30
-
31
- def infer_headers(from)
32
- attrlist = if from.is_a?(Type) && from.knows_attrlist?
33
- from.to_attrlist
34
- elsif from.is_a?(Hash)
35
- from.keys
36
- end
37
- attrlist ? output_preferences.order_attrlist(attrlist) : nil
38
- end
39
-
40
29
  end # class Csv
41
30
  end # module Writer
42
31
  end # module Bmg
@@ -0,0 +1,68 @@
1
+ module Bmg
2
+ module Writer
3
+ class Xlsx
4
+ include Writer
5
+
6
+ DEFAULT_OPTIONS = {
7
+ }
8
+
9
+ def initialize(csv_options, output_preferences = nil)
10
+ @csv_options = DEFAULT_OPTIONS.merge(csv_options)
11
+ @output_preferences = OutputPreferences.dress(output_preferences)
12
+ end
13
+ attr_reader :csv_options, :output_preferences
14
+
15
+ def call(relation, path)
16
+ require 'write_xlsx'
17
+ dup._call(relation, path)
18
+ end
19
+
20
+ protected
21
+ attr_reader :workbook, :worksheet
22
+
23
+ def _call(relation, path)
24
+ @workbook = WriteXLSX.new(path)
25
+ @worksheet = workbook.add_worksheet
26
+
27
+ headers = infer_headers(relation.type)
28
+ relation.each_with_index do |tuple,i|
29
+ headers = infer_headers(tuple) if headers.nil?
30
+ headers.each_with_index do |h,i|
31
+ worksheet.write_string(0, i, h)
32
+ end if i == 0
33
+ headers.each_with_index do |h,j|
34
+ meth, *args = write_pair(tuple[h])
35
+ worksheet.send(meth, 1+i, j, *args)
36
+ end
37
+ end
38
+
39
+ workbook.close
40
+ path
41
+ end
42
+
43
+ def write_pair(value)
44
+ case value
45
+ when Numeric
46
+ [:write_number, value]
47
+ when Date
48
+ [:write_date_time, value, date_format]
49
+ else
50
+ [:write_string, value.to_s]
51
+ end
52
+ end
53
+
54
+ def date_format
55
+ @date_format ||= workbook.add_format(:num_format => 'yyyy-mm-dd')
56
+ end
57
+
58
+ end # class Xlsx
59
+ end # module Writer
60
+ module Relation
61
+
62
+ def to_xlsx(options = {}, path = nil, preferences = nil)
63
+ options, path = {}, options unless options.is_a?(Hash)
64
+ Writer::Xlsx.new(options, preferences).call(self, path)
65
+ end
66
+
67
+ end # module Relation
68
+ end # module Bmg
data/tasks/test.rake CHANGED
@@ -6,17 +6,24 @@ namespace :test do
6
6
  desc "Runs unit tests"
7
7
  RSpec::Core::RakeTask.new(:unit) do |t|
8
8
  t.pattern = "spec/unit/**/test_*.rb"
9
- t.rspec_opts = ["-Ilib", "-Ispec/unit", "--fail-fast", "--color", "--backtrace", "--format=progress"]
9
+ t.rspec_opts = ["-Ilib", "-Ispec/unit", "--color", "--backtrace", "--format=progress"]
10
10
  end
11
11
  tests << :unit
12
12
 
13
13
  desc "Runs integration tests"
14
14
  RSpec::Core::RakeTask.new(:integration) do |t|
15
15
  t.pattern = "spec/integration/**/test_*.rb"
16
- t.rspec_opts = ["-Ilib", "-Ispec/integration", "--fail-fast", "--color", "--backtrace", "--format=progress"]
16
+ t.rspec_opts = ["-Ilib", "-Ispec/integration", "--color", "--backtrace", "--format=progress"]
17
17
  end
18
18
  tests << :integration
19
19
 
20
+ desc "Runs github regression tests"
21
+ RSpec::Core::RakeTask.new(:regression) do |t|
22
+ t.pattern = "spec/regression/**/test_*.rb"
23
+ t.rspec_opts = ["-Ilib", "-Ispec/regression", "--color", "--backtrace", "--format=progress"]
24
+ end
25
+ tests << :regression
26
+
20
27
  task :all => tests
21
28
  end
22
29
 
metadata CHANGED
@@ -1,49 +1,49 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bmg
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.8
4
+ version: 0.18.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bernard Lambeau
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-10 00:00:00.000000000 Z
11
+ date: 2021-05-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: predicate
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '2.4'
20
17
  - - ">="
21
18
  - !ruby/object:Gem::Version
22
- version: 2.4.0
19
+ version: 2.5.0
20
+ - - "~>"
21
+ - !ruby/object:Gem::Version
22
+ version: '2.5'
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
- - - "~>"
28
- - !ruby/object:Gem::Version
29
- version: '2.4'
30
27
  - - ">="
31
28
  - !ruby/object:Gem::Version
32
- version: 2.4.0
29
+ version: 2.5.0
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '2.5'
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: path
35
35
  requirement: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - ">="
38
38
  - !ruby/object:Gem::Version
39
- version: '1.3'
39
+ version: '2.0'
40
40
  type: :runtime
41
41
  prerelease: false
42
42
  version_requirements: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - ">="
45
45
  - !ruby/object:Gem::Version
46
- version: '1.3'
46
+ version: '2.0'
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rake
49
49
  requirement: !ruby/object:Gem::Requirement
@@ -78,14 +78,28 @@ dependencies:
78
78
  requirements:
79
79
  - - ">="
80
80
  - !ruby/object:Gem::Version
81
- version: '2.7'
81
+ version: '2.8'
82
82
  type: :development
83
83
  prerelease: false
84
84
  version_requirements: !ruby/object:Gem::Requirement
85
85
  requirements:
86
86
  - - ">="
87
87
  - !ruby/object:Gem::Version
88
- version: '2.7'
88
+ version: '2.8'
89
+ - !ruby/object:Gem::Dependency
90
+ name: write_xlsx
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '1.0'
96
+ type: :development
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: '1.0'
89
103
  - !ruby/object:Gem::Dependency
90
104
  name: sequel
91
105
  requirement: !ruby/object:Gem::Requirement
@@ -154,6 +168,7 @@ files:
154
168
  - lib/bmg/reader.rb
155
169
  - lib/bmg/reader/csv.rb
156
170
  - lib/bmg/reader/excel.rb
171
+ - lib/bmg/reader/text_file.rb
157
172
  - lib/bmg/relation.rb
158
173
  - lib/bmg/relation/empty.rb
159
174
  - lib/bmg/relation/in_memory.rb
@@ -250,16 +265,21 @@ files:
250
265
  - lib/bmg/sql/version.rb
251
266
  - lib/bmg/summarizer.rb
252
267
  - lib/bmg/summarizer/avg.rb
268
+ - lib/bmg/summarizer/by_proc.rb
253
269
  - lib/bmg/summarizer/collect.rb
254
270
  - lib/bmg/summarizer/concat.rb
255
271
  - lib/bmg/summarizer/count.rb
272
+ - lib/bmg/summarizer/distinct.rb
256
273
  - lib/bmg/summarizer/max.rb
257
274
  - lib/bmg/summarizer/min.rb
275
+ - lib/bmg/summarizer/multiple.rb
276
+ - lib/bmg/summarizer/percentile.rb
258
277
  - lib/bmg/summarizer/stddev.rb
259
278
  - lib/bmg/summarizer/sum.rb
260
279
  - lib/bmg/summarizer/variance.rb
261
280
  - lib/bmg/support.rb
262
281
  - lib/bmg/support/keys.rb
282
+ - lib/bmg/support/ordering.rb
263
283
  - lib/bmg/support/output_preferences.rb
264
284
  - lib/bmg/support/tuple_algebra.rb
265
285
  - lib/bmg/support/tuple_transformer.rb
@@ -267,6 +287,7 @@ files:
267
287
  - lib/bmg/version.rb
268
288
  - lib/bmg/writer.rb
269
289
  - lib/bmg/writer/csv.rb
290
+ - lib/bmg/writer/xlsx.rb
270
291
  - tasks/gem.rake
271
292
  - tasks/test.rake
272
293
  homepage: http://github.com/enspirit/bmg
@@ -288,7 +309,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
288
309
  - !ruby/object:Gem::Version
289
310
  version: '0'
290
311
  requirements: []
291
- rubygems_version: 3.1.2
312
+ rubygems_version: 3.0.8
292
313
  signing_key:
293
314
  specification_version: 4
294
315
  summary: Bmg is Alf's successor.