active_data_frame 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5c1042e5d6a9e65c386a0dc5353e6fcc3e065a84
4
- data.tar.gz: 13ee5f0520a97c563dc5bfedb4408010464b2e5f
3
+ metadata.gz: 5c368f1ed1f3fc78c0e9f81b0d2bd7cc9f50141a
4
+ data.tar.gz: 80fa0cfdeed12b5b41d7556ec9c019670827e934
5
5
  SHA512:
6
- metadata.gz: 7e9f1118a5c18a0aed0bc933ec2e9bfc7d443412e762da9cee9707bdf4084922e9d2904f3668088ff2d3866bcebf9a90b0b8c9e21c90e023c9fb2ca19d1c57c3
7
- data.tar.gz: f957ad5532cfcd4a5d635d278a0a5ee6de2c4b6179e5e82f785f6aa9961ae5ea64846d3c138816c1d2e5b2443dc766bcfe7576e2403c7d21fcf77b8e37315b6a
6
+ metadata.gz: b2cc97b56fe384be682c9631a06c108b2524434230df5f4ac4949300339fadea0dcbca0f1efb9822bd04c3a43a7ae2374a3dbad02706793cfc5f8fa42600920b
7
+ data.tar.gz: 7deccde31e9d8a99b31831d2af96227cdf9d087297321b531c79b4327b9bb63f38e0fa026869a94ffb4d5ea3cb5e9e61c5805328cf7bb8248b26e54f95f7fc40
data/Rakefile CHANGED
@@ -1,2 +1,10 @@
1
1
  require "bundler/gem_tasks"
2
- task :default => :spec
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ end
9
+
10
+ task :default => :test
Binary file
@@ -23,6 +23,10 @@ Gem::Specification.new do |spec|
23
23
  spec.add_development_dependency "rake", "~> 10.0"
24
24
  spec.add_development_dependency "pry-byebug", "~> 3.4.0", '>= 3.4.0'
25
25
  spec.add_development_dependency 'pry', '~> 0.10.2', '>= 0.10.0'
26
- spec.add_runtime_dependency 'activerecord', '~> 5.0.0'
26
+ spec.add_development_dependency 'pg'
27
+ spec.add_development_dependency 'minitest', '~>5.11'
28
+ spec.add_development_dependency 'minitest-reporters', '~> 1.1', '>= 1.1.0'
29
+ spec.add_development_dependency 'minitest-around', '0.4.1'
30
+ spec.add_runtime_dependency 'activerecord', '~> 5.0'
27
31
  spec.add_runtime_dependency 'rmatrix', '~> 0.1.10', '>=0.1.10'
28
32
  end
@@ -1,37 +1,9 @@
1
- Refactor:
2
- Is Engine neccessary? @done (17-03-31 08:20)
3
- Add Typecode expectations @done (17-04-06 08:16)
4
- Add enum capabilities
5
- Better errors when using bad indices in RMatrix
6
- Better printing in RMatrix
7
- ☐ Refactor + Tidy
8
- ☐ Tests
9
- ☐ Experiment with MonetDB speed
10
- ☐ Check support for different numeric/string/bool.etc types
11
- ✔ Experiment with single precision @done (17-03-31 08:18)
12
- ActiveRecordMonetDBAdapter:
13
- ☐ Work on support for MonetDB
14
-
15
- ActiveDataFrame:
16
- ✔ Refactor grouping/summing code @done (17-03-31 08:20)
17
- ✔ Allow includes to combine frames @done (17-03-27 10:36)
18
- ✔ Performance test on ICP data @done (17-03-27 08:41)
19
- ✔ Alternate RDBMS support (SQLLite, MySQL) @done (17-03-27 09:58)
20
-
21
-
22
- Utilities:
23
- ☐ KMeans clustering and DBScan built in to multi-d array
24
-
25
- Later:
26
- ☐ Build generic Merge/Cache structure which will either cache infinite columns or rows
27
- - class Unit
28
- - df_cache :all_loads, ::loads, direction: :row
29
- - end
30
-
31
- Ruby dataframe library inspiration:
32
- - Integration with Nyaplot
33
- - Integration with Statsample
34
-
1
+ Priorities:
2
+ Ensure delete/clear works
3
+ rmatrix tests
4
+ Update README.md
5
+ Use MMAP of status/enums
6
+ Support SQLite + MySQL, MonetDB
35
7
  ✔ Generator creates A migration and data_frame and block classes. Block/DataFrame classes have a type, a period unit and a period length @done (17-01-12 10:29)
36
8
  ✔ Type is: @done (17-01-12 10:29)
37
9
  ✔ Bit @done (17-01-12 10:29)
@@ -79,5 +51,10 @@ Ruby dataframe library inspiration:
79
51
  ✔ Finish RMatrix @done (17-03-02 09:01)
80
52
 
81
53
  RMatrix:
82
- ✔ Ensure assignment works @done (17-03-21 09:56)
83
- Raw is simply a copy of self without mappings @cancelled (17-03-21 09:56)
54
+ ✔ Ensure assignment works @done (18-04-03 18:58)
55
+ Raw is simply a copy of self without mappings @done (18-04-03 18:58)
56
+ ActiveDataFrame:
57
+ ✔ dimensions_minimum @done (18-04-03 18:58)
58
+ ✔ dimensions_maximum @done (18-04-03 18:58)
59
+ ✔ dimensions_sum @done (18-04-03 18:58)
60
+ ✔ dimensions_average @done (18-04-03 18:58)
data/examples.rb ADDED
@@ -0,0 +1,46 @@
1
+
2
+ # Get times of day where there was a price spike in wellington
3
+ Icp.where(region: :wellington).loads.idx_where_sum_gte(Time.now..1.day.from_now, 12_000)
4
+
5
+ # Get current load for all Icps, grouped by :region, :customer_class, :tariff
6
+ Icp.include_loads(Time.now).with_groups(:region, :customer_class, :tariff).sum("\"#{Time.now}\"")
7
+
8
+ # Get next days aggregate usage for Auckland residential customers
9
+ Icp.where(region: :auckland, customer_class: :residential).loads.sum(Time.now..1.day.from_now)
10
+
11
+ # Get a years worth of load for a single ICP
12
+ Icp.first.load[Time.now..1.year.from_now]
13
+
14
+ # Get a days worth of load for many ICPs
15
+ Icp.where(tariff: :un).loads[Time.now..1.day.from_now]
16
+
17
+ # Get a average load over a day load for many ICPs
18
+ Icp.where(tariff: :un).loads.avg(Time.now..1.day.from_now)
19
+
20
+ # Count icps which have more than 5.5kw of load at this point in time
21
+ Icp.include_loads(Time.now).where("\"%s\" > ?" % Time.now, 5.5).count
22
+
23
+
24
+ # See the largest spepal length seen for each speacies
25
+ Iris.with_groups(:species).max(:sepal_length)
26
+
27
+ # Get individual iris sepal_length
28
+ Iris.first.dimension.sepal_length
29
+
30
+ # Get multiple dimensions for individual iris
31
+ Iris.first.dimension[:sepal_length, :petal_width]
32
+
33
+ # Get range of dimensions for individual iris
34
+ Iris.first.dimension[:sepal_length..:petal_width]
35
+
36
+ # Get range of dimensions for all iris versicolors
37
+ dimensions = Iris.where(species: :versicolor).dimensions[:sepal_length..:petal_width]
38
+
39
+ # Chop data as needed
40
+ sepal_lengths = dimensions.sepal_length
41
+ sepal_lengths_petal_widths = dimensions[[:sepal_length, :petal_width]]
42
+
43
+ selected_iris = dimensions[Iris.where(species: :versicolor).first(5)]
44
+
45
+ # Look at RMatrix API for matrix functionality
46
+ #
@@ -1,5 +1,7 @@
1
1
  require 'active_data_frame/data_frame_proxy'
2
+ require 'active_data_frame/group_proxy'
2
3
  require 'active_data_frame/table'
3
4
  require 'active_data_frame/row'
4
5
  require 'active_data_frame/has_data_frame'
6
+ require 'active_data_frame/database'
5
7
  require 'rmatrix'
@@ -0,0 +1,4 @@
1
+ module ActiveDataFrame
2
+ class Bounds < Struct.new(:from, :to, :length, :index)
3
+ end
4
+ end
@@ -1,31 +1,58 @@
1
1
  module ActiveDataFrame
2
+
3
+ require_relative 'point'
4
+ require_relative 'bounds'
5
+
2
6
  class DataFrameProxy
3
- attr_accessor :block_type, :data_frame_type, :block_type_name
4
- def initialize(block_type, data_frame_type)
5
- self.block_type = block_type
6
- self.data_frame_type = data_frame_type
7
- self.block_type_name = block_type.table_name.gsub(/_blocks$/,'').gsub(/^blocks_/,'')
7
+ attr_accessor :block_type, :data_frame_type, :block_type_name, :value_map, :singular_df_name, :plural_df_name
8
+
9
+ def initialize(block_type, data_frame_type, value_map: nil, singular_df_name: '', plural_df_name: '')
10
+ self.block_type = block_type
11
+ self.data_frame_type = data_frame_type
12
+ self.block_type_name = block_type.table_name.gsub(/_blocks$/,'').gsub(/^blocks_/,'')
13
+ self.value_map = value_map
14
+ self.singular_df_name = singular_df_name
15
+ self.plural_df_name = plural_df_name
16
+ end
17
+
18
+ def reverse_value_map
19
+ @reverse_value_map ||= value_map.invert
8
20
  end
9
21
 
10
22
  def [](*ranges)
11
- get(extract_ranges(ranges))
23
+ result = get(extract_ranges(ranges))
24
+ if @value_map
25
+ # TODO Multi-dimensions #map would be nice
26
+ result.to_a.map{|row| row.kind_of?(Array) ? row.map(&reverse_value_map.method(:[])) : reverse_value_map[row]}
27
+ else
28
+ result
29
+ end
12
30
  end
13
31
 
14
32
  def []=(from, values)
33
+ values = Array(values).flatten.map(&@value_map.method(:[])) if @value_map
15
34
  from = column_map[from] if column_map && column_map[from]
16
35
  set(from, M[values, typecode: block_type::TYPECODE].to_a.flatten)
17
36
  end
18
37
 
38
+ def clear(*ranges)
39
+ clear(ex)
40
+ end
41
+
19
42
  def column_map
20
- data_frame_type.column_map(self.block_type_name)
43
+ data_frame_type.column_map(self.singular_df_name)
21
44
  end
22
45
 
23
46
  def column_name_map
24
- data_frame_type.column_name_map(self.block_type_name)
47
+ data_frame_type.column_name_map(self.singular_df_name)
25
48
  end
26
49
 
27
50
  def reverse_column_map
28
- data_frame_type.reverse_column_map(self.block_type_name)
51
+ data_frame_type.reverse_column_map(self.singular_df_name)
52
+ end
53
+
54
+ def database
55
+ @database ||= Database.for_types(block: block_type, df: data_frame_type)
29
56
  end
30
57
 
31
58
  def method_missing(name, *args, &block)
@@ -42,7 +69,7 @@ module ActiveDataFrame
42
69
  case range
43
70
  when Range then range
44
71
  when Fixnum then range..range
45
- else raise "Unexpected index #{range}"
72
+ else raise "Unexpected index for data frame proxy #{range}, expecting either a Range or an Integer"
46
73
  end
47
74
  end
48
75
  end
@@ -51,9 +78,6 @@ module ActiveDataFrame
51
78
  0
52
79
  end
53
80
 
54
- def flatten_ranges(ranges)
55
- end
56
-
57
81
  def unmap_ranges(ranges, map)
58
82
  ranges.map do |range|
59
83
  case range
@@ -71,15 +95,17 @@ module ActiveDataFrame
71
95
  from_block_offset = from % block_type::BLOCK_SIZE
72
96
  to_block_index = to / block_type::BLOCK_SIZE
73
97
  to_block_offset = to % block_type::BLOCK_SIZE
74
- return Struct.new(:from, :to, :length, :index).new(
75
- Struct.new(:index, :offset, :position).new(from_block_index, from_block_offset, from),
76
- Struct.new(:index, :offset, :position).new(to_block_index, to_block_offset, to),
98
+ return Bounds.new(
99
+ Point.new(from_block_index, from_block_offset, from),
100
+ Point.new(to_block_index, to_block_offset, to),
77
101
  (to - from) + 1,
78
102
  index
79
103
  )
80
104
  end
81
105
 
82
106
  def self.suppress_logs
107
+ #TODO Make optional
108
+ return yield
83
109
  ActiveRecord::Base.logger, old_logger = nil, ActiveRecord::Base.logger
84
110
  yield.tap do
85
111
  ActiveRecord::Base.logger = old_logger
@@ -101,11 +127,15 @@ module ActiveDataFrame
101
127
  end
102
128
  end
103
129
 
130
+ def match_range(from, to)
131
+ from == to ? from : from..to
132
+ end
133
+
104
134
  def blocks_between(bounds, block_scope: scope)
105
135
  bounds[1..-1].reduce(
106
- block_scope.where( block_type.table_name => { period_index: (bounds[0].from.index..bounds[0].to.index)})
136
+ block_scope.where( block_type.table_name => { period_index: match_range(bounds[0].from.index,bounds[0].to.index)})
107
137
  ) do | or_chain, bound|
108
- or_chain.or(block_scope.where( block_type.table_name => { period_index: (bound.from.index..bound.to.index)}))
138
+ or_chain.or(block_scope.where( block_type.table_name => { period_index: match_range(bound.from.index,bound.to.index)}))
109
139
  end
110
140
  end
111
141
  end
@@ -0,0 +1,115 @@
1
+ module ActiveDataFrame
2
+ class Database
3
+
4
+ def self.batching
5
+ !!Thread.current[:active_data_frame_batching]
6
+ end
7
+
8
+ def self.batching=(value)
9
+ Thread.current[:active_data_frame_batching] = !!value
10
+ end
11
+
12
+ # Not thread safe!
13
+ def self.execute(sql)
14
+ if ActiveDataFrame::Database.batching
15
+ Thread.current[:batch] << sql << ?;
16
+ else
17
+ ActiveRecord::Base.transaction do
18
+ ActiveRecord::Base.connection.execute sql
19
+ end
20
+ end
21
+ end
22
+
23
+ def self.flush!
24
+ execute(Thread.current[:batch])
25
+ Thread.current[:batch] = ''
26
+ end
27
+
28
+ def self.for_types(block:, df:)
29
+ (@@configs ||= {})[[block, df]] ||= Database.new(block, df)
30
+ end
31
+
32
+ attr_reader :block_type, :data_frame_type
33
+
34
+ def initialize(block_type, data_frame_type)
35
+ @block_type = block_type
36
+ @data_frame_type = data_frame_type
37
+ end
38
+
39
+ def self.batch
40
+ self.batching, prev_batch = true, self.batching
41
+ Thread.current[:batch] ||= ''
42
+ ActiveRecord::Base.transaction do
43
+ yield
44
+ end
45
+ ensure
46
+ self.batching = prev_batch
47
+ flush! unless self.batching
48
+ end
49
+ ##
50
+ # Update block data for all blocks in a single call
51
+ ##
52
+ def bulk_update(existing)
53
+ case ActiveRecord::Base.connection_config[:adapter]
54
+ when 'postgresql'.freeze
55
+ # Fast bulk update
56
+ updates = ''
57
+ existing.each do |period_index, (values, df_id)|
58
+ updates << "(#{df_id}, #{period_index}, #{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}),"
59
+ end
60
+ perform_update(updates)
61
+ else
62
+ ids = existing.map {|_, (_, id)| id}
63
+ updates = block_type::COLUMNS.map.with_index do |column, column_idx|
64
+ [column, "CASE period_index\n#{existing.map{|period_index, (values, _)| "WHEN #{period_index} then #{values[column_idx]}"}.join("\n")} \nEND\n"]
65
+ end.to_h
66
+ update_statement = updates.map{|cl, up| "#{cl} = #{up}" }.join(', ')
67
+ Database.execute("UPDATE #{block_type.table_name} SET #{update_statement} WHERE
68
+ #{block_type.table_name}.data_frame_id IN (#{ids.join(',')})
69
+ AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
70
+ AND #{block_type.table_name}.period_index IN (#{existing.keys.join(', ')});
71
+ "
72
+ )
73
+ end
74
+ end
75
+
76
+ ##
77
+ # Insert block data for all blocks in a single call
78
+ ##
79
+ def bulk_insert(new_blocks, instance)
80
+ inserts = ''
81
+ new_blocks.each do |period_index, (values)|
82
+ inserts << \
83
+ case ActiveRecord::Base.connection_config[:adapter]
84
+ when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
85
+ else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
86
+ end
87
+ end
88
+ perform_insert(inserts)
89
+ end
90
+
91
+ def bulk_delete(blocks)
92
+ binding.pry
93
+ end
94
+
95
+ def perform_update(updates)
96
+ Database.execute(
97
+ <<-SQL
98
+ UPDATE #{block_type.table_name}
99
+ SET #{block_type::COLUMNS.map{|col| "#{col} = t.#{col}" }.join(", ")}
100
+ FROM(
101
+ VALUES #{updates[0..-2]}) as t(data_frame_id, period_index, #{block_type::COLUMNS.join(',')})
102
+ WHERE #{block_type.table_name}.data_frame_id = t.data_frame_id
103
+ AND #{block_type.table_name}.period_index = t.period_index
104
+ AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
105
+ SQL
106
+ )
107
+ true
108
+ end
109
+
110
+ def perform_insert(inserts)
111
+ sql = "INSERT INTO #{block_type.table_name} (#{block_type::COLUMNS.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
112
+ Database.execute sql
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,40 @@
1
+ module ActiveDataFrame
2
+ class GroupProxy
3
+ attr_accessor :groups
4
+ def initialize(groups)
5
+ self.groups = groups
6
+ end
7
+
8
+ def min(column_name)
9
+ aggregate('minimum', column_name)
10
+ end
11
+
12
+ def max(column_name)
13
+ aggregate('maximum', column_name)
14
+ end
15
+
16
+ def sum(column_name)
17
+ aggregate('sum', column_name)
18
+ end
19
+
20
+ def average(column_name)
21
+ aggregate('average', column_name)
22
+ end
23
+
24
+ def count
25
+ aggregate('count')
26
+ end
27
+
28
+ private
29
+ def aggregate *agg
30
+ counts = self.groups.send(*agg)
31
+ grouped = {}
32
+ counts.each do |keys, value|
33
+ keys = Array(keys)
34
+ child = keys[0..-2].reduce(grouped){|parent, key| parent[key] ||= {}}
35
+ child[keys[-1]] = value
36
+ end
37
+ grouped
38
+ end
39
+ end
40
+ end
@@ -1,116 +1,170 @@
1
- require 'active_support/concern'
2
-
3
-
4
1
  module ActiveDataFrame
5
- class GroupProxy
6
- attr_accessor :groups
7
- def initialize(groups)
8
- self.groups = groups
9
- end
10
-
11
- def min(column_name)
12
- aggregate('minimum', column_name)
13
- end
2
+ #
3
+ # Modules can include HasDataFrame('frame_name', FrameBlockType) to gain data frame capabilities
4
+ # This method will expose class level and row (Active Record instance) level accessors to the underlying data frame.
5
+ #
6
+ # E.g.
7
+ #
8
+ # module HasBar
9
+ # include HasDataFrame('bars', BarBlock)
10
+ # end
11
+ #
12
+ # class Foo
13
+ # include HasBar
14
+ # end
15
+ #
16
+ # # Select all bars from index 0 to 40, for all foos
17
+ # Foo.bars[0..40]
18
+ #
19
+ # Select all bars from index 0 to 40, for foo with id: 1
20
+ # Foo.find(1).bars[0..40]
21
+ #
22
+ # # Find the average bar size for Foo 1 from index 5 to 30
23
+ # Foo.find(1).bars[5..30].avg
24
+ #
25
+ # Find the average bar size for the first 10 foos from index 13..43
26
+ # Foo.limit(10).bars.avg[13..43]
27
+ #
28
+ # Find the sum size for all foos wher baz == boo from index 13..43
29
+ # Foo.where(baz: :boo).bars.sum[13..43]
30
+ #
31
+ def self.HasDataFrame(singular_table_name, block_type, table_name: singular_table_name, value_map: nil, &block)
32
+ Module.new do
33
+ define_singleton_method(:included) do |base|
34
+ # If somebody includes our dataframe enabled module we execute the following
35
+ base.define_singleton_method(:included) do |decorated|
36
+ block[decorated] if block
37
+ decorated.extend(base::ClassMethods) if defined?(base::ClassMethods)
14
38
 
15
- def max(column_name)
16
- aggregate('maximum', column_name)
17
- end
39
+ # add our class level methods
40
+ decorated.extend(
41
+ ActiveDataFrame.build_module_class_methods(singular_table_name, block_type, table_name: table_name, value_map: value_map)
42
+ )
18
43
 
19
- def sum(column_name)
20
- aggregate('sum', column_name)
21
- end
44
+ # Add our instance level methods
45
+ decorated.class_eval do
22
46
 
23
- def average(column_name)
24
- aggregate('average', column_name)
25
- end
47
+ if value_map
48
+ decorated.const_set(singular_table_name.underscore.camelize, ActiveDataFrame.build_dot_accessible_hash(value_map))
49
+ end
26
50
 
27
- def count
28
- aggregate('count')
29
- end
51
+ # Provide memoised reference to DF row
52
+ define_method singular_table_name do
53
+ (@data_frame_proxies ||= {})[singular_table_name] ||= Row.new(
54
+ block_type,
55
+ self.class,
56
+ self,
57
+ value_map: value_map,
58
+ singular_df_name: singular_table_name,
59
+ plural_df_name: table_name
60
+ )
61
+ end
30
62
 
31
- private
32
- def aggregate *agg
33
- counts = self.groups.send(*agg)
34
- grouped = {}
35
- counts.each do |keys, value|
36
- keys = Array(keys)
37
- child = keys[0..-2].reduce(grouped){|parent, key| parent[key] ||= {}}
38
- child[keys[-1]] = value
63
+ # We provide our own inspect implementation which will include in the output
64
+ # selected dataframe attributes that do not reside on the parent table
65
+ define_method :inspect do
66
+ inspection = "not initialized"
67
+ if defined?(@attributes) && @attributes
68
+ inspection = @attributes.keys.collect { |name|
69
+ if has_attribute?(name)
70
+ "#{name}: #{attribute_for_inspect(name)}"
71
+ end
72
+ }.compact.join(", ")
73
+ end
74
+ "<#{self.class} #{inspection}>"
75
+ end
76
+ end
39
77
  end
40
- grouped
41
78
  end
79
+ end
42
80
  end
43
81
 
44
- def self.HasDataFrame(singular_table_name, table_name, block_type)
45
- to_inject = Module.new
46
- to_inject.extend ActiveSupport::Concern
47
- to_inject.included do
48
- define_method(singular_table_name){
49
- @data_frame_proxies ||= {}
50
- @data_frame_proxies[singular_table_name] ||= Row.new(block_type, self.class, self)
51
- }
52
-
53
- define_method(:inspect){
54
- inspection = "not initialized"
55
- if defined?(@attributes) && @attributes
56
- inspection = @attributes.keys.collect { |name|
57
- if has_attribute?(name)
58
- "#{name}: #{attribute_for_inspect(name)}"
59
- end
60
- }.compact.join(", ")
82
+ #
83
+ # Define methods on our hash to easily access any values that are indexed by a symbol key
84
+ # and that do not clash with existing methods on the Hash
85
+ #
86
+ def self.build_dot_accessible_hash(hash)
87
+ hash.dup.tap do |map|
88
+ map.each do |key, value|
89
+ if(key.kind_of?(Symbol) && !hash.respond_to?(key))
90
+ map.define_singleton_method(key){value}
61
91
  end
62
- "<#{self.class} #{inspection}>"
63
- }
92
+ end
64
93
  end
94
+ end
65
95
 
66
- to_inject.class_methods do
67
- define_method(:df_column_names){
68
- @@column_names ||= {}
69
- }
96
+ #
97
+ # The class methods that are defined on any class the includes our dataframe enabled module
98
+ #
99
+ def self.build_module_class_methods(singular_table_name, block_type, table_name: singular_table_name, value_map: nil)
100
+ Module.new do
70
101
 
71
- define_method(:df_column_maps){
72
- @@column_maps ||= {}
73
- }
102
+ # The key ADF functionality is exposed here.
103
+ # This defines a new `table_name` accesor on the class which gives you access to a dataframe proxy by the name of `table_name`
104
+ #
105
+ # E.g.
106
+ #
107
+ # class Foo
108
+ # include HasBar
109
+ # end
110
+ #
111
+ # # Select all bars from index 0 to 40, for all foos
112
+ # Foo.bars[0..40]
113
+ #
114
+ # Select all bars from index 0 to 40, for foo with id: 1
115
+ # Foo.find(1).bars[0..40]
116
+ #
117
+ # # Find the average bar size for Foo 1 from index 5 to 30
118
+ # Foo.find(1).bars[5..30].avg
119
+ #
120
+ # Find the average bar size for the first 10 foos from index 13..43
121
+ # Foo.limit(10).bars.avg[13..43]
122
+ #
123
+ # Find the sum size for all foos wher baz == boo from index 13..43
124
+ # Foo.where(baz: :boo).bars.sum[13..43]
125
+ #
126
+ define_method(table_name) do
127
+ Table.new(
128
+ block_type,
129
+ all,
130
+ value_map: value_map,
131
+ singular_df_name: singular_table_name,
132
+ plural_df_name: table_name
133
+ )
134
+ end
74
135
 
75
- define_method(:df_reverse_column_maps){
76
- @@reverse_column_maps ||= {}
77
- }
78
136
 
79
- define_method(:with_groups){|*groups|
80
- GroupProxy.new(group(*groups))
81
- }
137
+ #
138
+ # A class level hash containing optionally defined column names for a data frame.
139
+ # Instead of numeric or dynamic column names, you may explicitly define names for columns using the
140
+ # "#{singular_table_name}_column_names" method.
141
+ #
142
+ # E.g.
143
+ #
144
+ # class Foo
145
+ # include HasStatus
146
+ # status_column_names %i(review_status export_status)
147
+ # end
148
+ #
149
+ # This names
150
+ # column 0 as 'review_status' and
151
+ # column 1 as 'export_status'.
152
+ # Now you can make queries like:
153
+ # * Foo.status.review_status
154
+ # * Foo.first.status.export_status
155
+ # * Foo.status[:review_status..:export_status]
156
+ # * Foo.status[43] # You can still use numeric column indices
157
+ #
158
+ define_method :df_column_names do
159
+ @@column_names ||= {}
160
+ end
82
161
 
83
- define_method(table_name){
84
- Table.new(block_type, all)
85
- }
86
-
87
- define_method("include_#{table_name}"){|*dimensions, unmap: true|
88
- scope = self.all
89
- blocks_for_tables = scope.instance_eval{ @blocks_for_tables ||= {} }
90
- included_blocks = blocks_for_tables[singular_table_name] ||= {}
91
- dimensions.flatten.each do |key|
92
- if unmap && column_map(singular_table_name)
93
- idx = column_map(singular_table_name)[key]
94
- else
95
- idx = key
96
- key = "t#{key}"
97
- end
98
- block_index = idx / block_type::BLOCK_SIZE
99
- block_offset = (idx % block_type::BLOCK_SIZE).succ
100
- included_blocks[block_index] ||= []
101
- included_blocks[block_index] << {name: key, idx: block_offset}
102
- end
103
- query = "(SELECT * FROM #{self.table_name} " + blocks_for_tables.reduce('') do |aggregate, (table_name, included_blocks)|
104
- aggregate +
105
- included_blocks.reduce('') do |aggregate, (block_idx, blocks)|
106
- blocks_table_name = "#{table_name}_blocks"
107
- aggregate + " LEFT JOIN(SELECT #{blocks_table_name}.data_frame_type, #{blocks_table_name}.data_frame_id, " + blocks.map{|block| "#{blocks_table_name}.t#{block[:idx]} as \"#{block[:name]}\""}.join(', ') + " FROM #{table_name}_blocks "+
108
- " WHERE #{blocks_table_name}.period_index = #{block_idx}"+") b#{table_name}#{block_idx} ON b#{table_name}#{block_idx}.data_frame_type = '#{self.name}' AND b#{table_name}#{block_idx}.data_frame_id = #{self.table_name}.id"
109
- end
110
- end + ") as #{self.table_name}"
111
- scope.from(query)
162
+ # The class level accessor
163
+ define_method(:column_name_map){|for_table|
164
+ df_column_names[for_table][self] if defined? df_column_names[for_table] rescue nil
112
165
  }
113
166
 
167
+ # The attribute writer
114
168
  define_method("#{singular_table_name}_column_names") do |names|
115
169
  df_column_names[singular_table_name] ||= {}
116
170
  df_column_maps[singular_table_name] ||= {}
@@ -118,35 +172,172 @@ module ActiveDataFrame
118
172
  df_column_maps[singular_table_name][self] = names.map.with_index.to_h
119
173
  end
120
174
 
175
+
176
+ #
177
+ # A class level hash containing optionally defined column maps (these are usually simply a hash that responds to #[](column_name) and returns
178
+ # a positive integer representing the corresponding column index.
179
+ # These are defined using the
180
+ # "#{singular_table_name}_column_maps" method.
181
+ #
182
+ # class Foo
183
+ # include HasCpuTemp
184
+ # cpu_temp_column_map Hash.new{ |columns, time|
185
+ # columns[time] = time.to_i # We store cpu temperatures at a 1 second granularity
186
+ # }
187
+ # end
188
+ #
189
+ define_method :df_column_maps do
190
+ @@column_maps ||= {}
191
+ end
192
+
193
+ # The attribute writer
121
194
  define_method("#{singular_table_name}_column_map") do |column_map|
122
195
  df_column_names[singular_table_name] = nil
123
196
  df_column_maps[singular_table_name] ||= {}
124
197
  df_column_maps[singular_table_name][self] = column_map
125
198
  end
126
199
 
200
+ # The class level accessor
201
+ define_method(:column_map){|for_table|
202
+ df_column_maps[for_table][self] if defined? df_column_maps[for_table] rescue nil
203
+ }
204
+
205
+ #
206
+ # A class level has containing optionally defined reverse column mappings (from a positive integer to a mapped column index/key)
207
+ # This is only used for functions where we query indices based on values.
208
+ # E.g
209
+ #
210
+ # class Foo
211
+ # include HasPrice
212
+ # column_map Hash.new{|columns, date|
213
+ # columns[date] = (date - Date.new(1970)).to_i
214
+ # }
215
+ # reverse_column_map{|columns, index|
216
+ # columns[index] = Date.new(1970) + index.month
217
+ # }
218
+ # end
219
+ #
220
+ # # Show all dates between 2000 and 2010 where the total of all prices is > $500
221
+ # Foo.prices.idx_where_sum_gte(Date.new(2000)...Date.new(2010), 500)
222
+ #
223
+ define_method :df_reverse_column_maps do
224
+ @@reverse_column_maps ||= {}
225
+ end
226
+
227
+ # The attribute writer
127
228
  define_method("#{singular_table_name}_reverse_column_map"){|reverse_column_map|
128
229
  df_reverse_column_maps[singular_table_name] ||= {}
129
230
  df_reverse_column_maps[singular_table_name][self] = reverse_column_map
130
231
  }
131
232
 
132
- define_method(:include_data_blocks){|table_name, *args|
133
- send("include_#{table_name}", *args)
233
+ # The class level accessor
234
+ define_method(:reverse_column_map){|for_table|
235
+ df_reverse_column_maps[for_table] ||= {}
236
+ df_reverse_column_maps[for_table][self] ||= column_map(for_table).invert if column_map(for_table)
134
237
  }
135
238
 
136
- define_method(:column_map){|table_name|
137
- df_column_maps[table_name][self] if defined? df_column_maps[table_name] rescue nil
138
- }
239
+ #
240
+ # See group_proxy.rb.
241
+ # This makes a number of grouping/bucketing queries easier to express
242
+ # for analytics across an entire table
243
+ #
244
+ define_method(:with_groups) do |*groups|
245
+ GroupProxy.new(group(*groups))
246
+ end
139
247
 
140
- define_method(:column_name_map){|table_name|
141
- df_column_names[table_name][self] if defined? df_column_names[table_name]
142
- }
248
+ #
249
+ # If you use the include_#{table_name} function before executing any queries, you can
250
+ # join the child AR rows with any number of columns and treat them as if they were all part of the same table.
251
+ # These joined columns can be used to further refine your queries, perform groupings, counts .etc
252
+ #
253
+ # E.g.
254
+ #
255
+ # class Iris
256
+ # include HasDimension
257
+ # dimension_column_names %i(sepal_length sepal_width petal_length petal_width)
258
+ # end
259
+ #
260
+ # Iris.where('sepal_length > ?', 4) # Error! (There is no column called sepal_length on the iris table)
261
+ # Iris.include_dimensions(:sepal_length).where('sepal_length > ?', 4) # Works fine
262
+ # Iris.include_dimension(:sepal_length, :petal_width).where('sepal_length > 3').select(:petal_width)
263
+ # Iris.include_dimension(:sepal_length, :petal_width).with_groups('ROUND(sepal_length)').average('petal_width')
264
+ # {
265
+ # "4.0":"0.2"
266
+ # "5.0":"0.397872340425532",
267
+ # "6.0":"1.49705882352941",
268
+ # "7.0":"1.89583333333333",
269
+ # "8.0":"2.15",
270
+ # }
271
+ #
272
+ # In cases where column names are not predefined or use a mapper you can provide a hash to give alternate column names for the query
273
+ #
274
+ # class BuildingType < ApplicationRecord
275
+ # include HasBuildingConsent
276
+ # consents_column_map Hash.new{|hash, time, as_date = time.to_date|
277
+ # (as_date.year - 1970) * 12 + as_date.month
278
+ # }
279
+ # end
280
+ #
281
+ # # In this example BuildingType.consents accepts dynamic column indices (anything that responds to to_date)
282
+ # # We can give these columns explicit names so we can refer to them in queries.
283
+ # E.g
284
+ #
285
+ # BuildingType.include_consents({'1994-04-01' => april_94, '1994-05-01' => may_94}).where('april_94 + may_94 < 300')
286
+ # => [
287
+ # <BuildingType id: 2, name: "Hostels_boarding", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 2, april_94: 11, may_94: 5>,
288
+ # <BuildingType id: 3, name: "Hotels", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 3, april_94: 33, may_94: 34>,
289
+ # <BuildingType id: 4, name: "Hospitals", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 4, april_94: 32, may_94: 37>,
290
+ # <BuildingType id: 5, name: "Education", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 5, april_94: 88, may_94: 145>,
291
+ # <BuildingType id: 6, name: "Social_cultural_religious", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 6, april_94: 82, may_94: 102>,
292
+ # <BuildingType id: 9, name: "Storage", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 9, april_94: 29, may_94: 52>,
293
+ # <BuildingType id: 12, name: "Misc", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 12, april_94: 33, may_94: 39>]
294
+ # ]
295
+ #
296
+ #
297
+ define_method("include_#{table_name}"){|*dimensions, unmap: true, scope: self.all, as: false|
298
+ dim1 = dimensions[0]
299
+ case dim1
300
+ when Hash
301
+ dimension_map, dimensions = dim1, dim1.keys
302
+ when Range
303
+ exclude_end = dim1.exclude_end?
304
+
305
+ from, to = if unmap && column_map(singular_table_name)
306
+ unmap = false
307
+ [column_map(singular_table_name)[dim1.begin],column_map(singular_table_name)[dim1.end]]
308
+ else
309
+ [dim1.begin, dim1.end]
310
+ end
311
+ dimensions = (exclude_end ? (from...to) : (from..to)).to_a
312
+ end
313
+
314
+ blocks_for_tables = scope.instance_eval{ @blocks_for_tables ||= {} }
315
+ included_blocks = blocks_for_tables[block_type.table_name] ||= {}
143
316
 
144
- define_method(:reverse_column_map){|table_name|
145
- df_reverse_column_maps[table_name] ||= {}
146
- df_reverse_column_maps[table_name][self] ||= column_map(table_name).invert if column_map(table_name)
317
+ dimensions.flatten.each.with_index(1) do |key, i|
318
+ if unmap && column_map(singular_table_name)
319
+ idx = column_map(singular_table_name)[key]
320
+ key = dimension_map[key] if dimension_map
321
+ else
322
+ idx = key
323
+ key = "t#{key}"
324
+ end
325
+ key = "#{as}#{i}" if as
326
+ block_index = idx / block_type::BLOCK_SIZE
327
+ block_offset = (idx % block_type::BLOCK_SIZE).succ
328
+ included_blocks[block_index] ||= []
329
+ included_blocks[block_index] << {name: key, idx: block_offset}
330
+ end
331
+ query = "(SELECT * FROM #{self.table_name} " + blocks_for_tables.reduce('') do |aggregate, (for_table, blocks_for_table)|
332
+ aggregate +
333
+ blocks_for_table.reduce('') do |blocks_aggregate, (block_idx, blocks)|
334
+ blocks_table_name = for_table
335
+ blocks_aggregate + " LEFT JOIN(SELECT #{blocks_table_name}.data_frame_type, #{blocks_table_name}.data_frame_id, " + blocks.map{|block| "#{blocks_table_name}.t#{block[:idx]} as \"#{block[:name]}\""}.join(', ') + " FROM #{blocks_table_name} "+
336
+ " WHERE #{blocks_table_name}.period_index = #{block_idx}"+") b#{for_table}#{block_idx} ON b#{for_table}#{block_idx}.data_frame_type = '#{self.name}' AND b#{for_table}#{block_idx}.data_frame_id = #{self.table_name}.id"
337
+ end
338
+ end + ") as #{self.table_name}"
339
+ scope.from(query)
147
340
  }
148
341
  end
149
-
150
- return to_inject
151
342
  end
152
343
  end