active_data_frame 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5c1042e5d6a9e65c386a0dc5353e6fcc3e065a84
4
- data.tar.gz: 13ee5f0520a97c563dc5bfedb4408010464b2e5f
3
+ metadata.gz: 5c368f1ed1f3fc78c0e9f81b0d2bd7cc9f50141a
4
+ data.tar.gz: 80fa0cfdeed12b5b41d7556ec9c019670827e934
5
5
  SHA512:
6
- metadata.gz: 7e9f1118a5c18a0aed0bc933ec2e9bfc7d443412e762da9cee9707bdf4084922e9d2904f3668088ff2d3866bcebf9a90b0b8c9e21c90e023c9fb2ca19d1c57c3
7
- data.tar.gz: f957ad5532cfcd4a5d635d278a0a5ee6de2c4b6179e5e82f785f6aa9961ae5ea64846d3c138816c1d2e5b2443dc766bcfe7576e2403c7d21fcf77b8e37315b6a
6
+ metadata.gz: b2cc97b56fe384be682c9631a06c108b2524434230df5f4ac4949300339fadea0dcbca0f1efb9822bd04c3a43a7ae2374a3dbad02706793cfc5f8fa42600920b
7
+ data.tar.gz: 7deccde31e9d8a99b31831d2af96227cdf9d087297321b531c79b4327b9bb63f38e0fa026869a94ffb4d5ea3cb5e9e61c5805328cf7bb8248b26e54f95f7fc40
data/Rakefile CHANGED
@@ -1,2 +1,10 @@
1
1
  require "bundler/gem_tasks"
2
- task :default => :spec
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ end
9
+
10
+ task :default => :test
Binary file
@@ -23,6 +23,10 @@ Gem::Specification.new do |spec|
23
23
  spec.add_development_dependency "rake", "~> 10.0"
24
24
  spec.add_development_dependency "pry-byebug", "~> 3.4.0", '>= 3.4.0'
25
25
  spec.add_development_dependency 'pry', '~> 0.10.2', '>= 0.10.0'
26
- spec.add_runtime_dependency 'activerecord', '~> 5.0.0'
26
+ spec.add_development_dependency 'pg'
27
+ spec.add_development_dependency 'minitest', '~>5.11'
28
+ spec.add_development_dependency 'minitest-reporters', '~> 1.1', '>= 1.1.0'
29
+ spec.add_development_dependency 'minitest-around', '0.4.1'
30
+ spec.add_runtime_dependency 'activerecord', '~> 5.0'
27
31
  spec.add_runtime_dependency 'rmatrix', '~> 0.1.10', '>=0.1.10'
28
32
  end
@@ -1,37 +1,9 @@
1
- Refactor:
2
- Is Engine neccessary? @done (17-03-31 08:20)
3
- Add Typecode expectations @done (17-04-06 08:16)
4
- Add enum capabilities
5
- Better errors when using bad indices in RMatrix
6
- Better printing in RMatrix
7
- ☐ Refactor + Tidy
8
- ☐ Tests
9
- ☐ Experiment with MonetDB speed
10
- ☐ Check support for different numeric/string/bool.etc types
11
- ✔ Experiment with single precision @done (17-03-31 08:18)
12
- ActiveRecordMonetDBAdapter:
13
- ☐ Work on support for MonetDB
14
-
15
- ActiveDataFrame:
16
- ✔ Refactor grouping/summing code @done (17-03-31 08:20)
17
- ✔ Allow includes to combine frames @done (17-03-27 10:36)
18
- ✔ Performance test on ICP data @done (17-03-27 08:41)
19
- ✔ Alternate RDBMS support (SQLLite, MySQL) @done (17-03-27 09:58)
20
-
21
-
22
- Utilities:
23
- ☐ KMeans clustering and DBScan built in to multi-d array
24
-
25
- Later:
26
- ☐ Build generic Merge/Cache structure which will either cache infinite columns or rows
27
- - class Unit
28
- - df_cache :all_loads, ::loads, direction: :row
29
- - end
30
-
31
- Ruby dataframe library inspiration:
32
- - Integration with Nyaplot
33
- - Integration with Statsample
34
-
1
+ Priorities:
2
+ Ensure delete/clear works
3
+ rmatrix tests
4
+ Update README.md
5
+ Use MMAP of status/enums
6
+ Support SQLite + MySQL, MonetDB
35
7
  ✔ Generator creates A migration and data_frame and block classes. Block/DataFrame classes have a type, a period unit and a period length @done (17-01-12 10:29)
36
8
  ✔ Type is: @done (17-01-12 10:29)
37
9
  ✔ Bit @done (17-01-12 10:29)
@@ -79,5 +51,10 @@ Ruby dataframe library inspiration:
79
51
  ✔ Finish RMatrix @done (17-03-02 09:01)
80
52
 
81
53
  RMatrix:
82
- ✔ Ensure assignment works @done (17-03-21 09:56)
83
- Raw is simply a copy of self without mappings @cancelled (17-03-21 09:56)
54
+ ✔ Ensure assignment works @done (18-04-03 18:58)
55
+ Raw is simply a copy of self without mappings @done (18-04-03 18:58)
56
+ ActiveDataFrame:
57
+ ✔ dimensions_minimum @done (18-04-03 18:58)
58
+ ✔ dimensions_maximum @done (18-04-03 18:58)
59
+ ✔ dimensions_sum @done (18-04-03 18:58)
60
+ ✔ dimensions_average @done (18-04-03 18:58)
data/examples.rb ADDED
@@ -0,0 +1,46 @@
1
+
2
+ # Get times of day where there was a price spike in wellington
3
+ Icp.where(region: :wellington).loads.idx_where_sum_gte(Time.now..1.day.from_now, 12_000)
4
+
5
+ # Get current load for all Icps, grouped by :region, :customer_class, :tariff
6
+ Icp.include_loads(Time.now).with_groups(:region, :customer_class, :tariff).sum("\"#{Time.now}\"")
7
+
8
+ # Get next days aggregate usage for Auckland residential customers
9
+ Icp.where(region: :auckland, customer_class: :residential).loads.sum(Time.now..1.day.from_now)
10
+
11
+ # Get a years worth of load for a single ICP
12
+ Icp.first.load[Time.now..1.year.from_now]
13
+
14
+ # Get a days worth of load for many ICPs
15
+ Icp.where(tariff: :un).loads[Time.now..1.day.from_now]
16
+
17
+ # Get a average load over a day load for many ICPs
18
+ Icp.where(tariff: :un).loads.avg(Time.now..1.day.from_now)
19
+
20
+ # Count icps which have more than 5.5kw of load at this point in time
21
+ Icp.include_loads(Time.now).where("\"%s\" > ?" % Time.now, 5.5).count
22
+
23
+
24
+ # See the largest spepal length seen for each speacies
25
+ Iris.with_groups(:species).max(:sepal_length)
26
+
27
+ # Get individual iris sepal_length
28
+ Iris.first.dimension.sepal_length
29
+
30
+ # Get multiple dimensions for individual iris
31
+ Iris.first.dimension[:sepal_length, :petal_width]
32
+
33
+ # Get range of dimensions for individual iris
34
+ Iris.first.dimension[:sepal_length..:petal_width]
35
+
36
+ # Get range of dimensions for all iris versicolors
37
+ dimensions = Iris.where(species: :versicolor).dimensions[:sepal_length..:petal_width]
38
+
39
+ # Chop data as needed
40
+ sepal_lengths = dimensions.sepal_length
41
+ sepal_lengths_petal_widths = dimensions[[:sepal_length, :petal_width]]
42
+
43
+ selected_iris = dimensions[Iris.where(species: :versicolor).first(5)]
44
+
45
+ # Look at RMatrix API for matrix functionality
46
+ #
@@ -1,5 +1,7 @@
1
1
  require 'active_data_frame/data_frame_proxy'
2
+ require 'active_data_frame/group_proxy'
2
3
  require 'active_data_frame/table'
3
4
  require 'active_data_frame/row'
4
5
  require 'active_data_frame/has_data_frame'
6
+ require 'active_data_frame/database'
5
7
  require 'rmatrix'
@@ -0,0 +1,4 @@
1
+ module ActiveDataFrame
2
+ class Bounds < Struct.new(:from, :to, :length, :index)
3
+ end
4
+ end
@@ -1,31 +1,58 @@
1
1
  module ActiveDataFrame
2
+
3
+ require_relative 'point'
4
+ require_relative 'bounds'
5
+
2
6
  class DataFrameProxy
3
- attr_accessor :block_type, :data_frame_type, :block_type_name
4
- def initialize(block_type, data_frame_type)
5
- self.block_type = block_type
6
- self.data_frame_type = data_frame_type
7
- self.block_type_name = block_type.table_name.gsub(/_blocks$/,'').gsub(/^blocks_/,'')
7
+ attr_accessor :block_type, :data_frame_type, :block_type_name, :value_map, :singular_df_name, :plural_df_name
8
+
9
+ def initialize(block_type, data_frame_type, value_map: nil, singular_df_name: '', plural_df_name: '')
10
+ self.block_type = block_type
11
+ self.data_frame_type = data_frame_type
12
+ self.block_type_name = block_type.table_name.gsub(/_blocks$/,'').gsub(/^blocks_/,'')
13
+ self.value_map = value_map
14
+ self.singular_df_name = singular_df_name
15
+ self.plural_df_name = plural_df_name
16
+ end
17
+
18
+ def reverse_value_map
19
+ @reverse_value_map ||= value_map.invert
8
20
  end
9
21
 
10
22
  def [](*ranges)
11
- get(extract_ranges(ranges))
23
+ result = get(extract_ranges(ranges))
24
+ if @value_map
25
+ # TODO Multi-dimensions #map would be nice
26
+ result.to_a.map{|row| row.kind_of?(Array) ? row.map(&reverse_value_map.method(:[])) : reverse_value_map[row]}
27
+ else
28
+ result
29
+ end
12
30
  end
13
31
 
14
32
  def []=(from, values)
33
+ values = Array(values).flatten.map(&@value_map.method(:[])) if @value_map
15
34
  from = column_map[from] if column_map && column_map[from]
16
35
  set(from, M[values, typecode: block_type::TYPECODE].to_a.flatten)
17
36
  end
18
37
 
38
+ def clear(*ranges)
39
+ clear(ex)
40
+ end
41
+
19
42
  def column_map
20
- data_frame_type.column_map(self.block_type_name)
43
+ data_frame_type.column_map(self.singular_df_name)
21
44
  end
22
45
 
23
46
  def column_name_map
24
- data_frame_type.column_name_map(self.block_type_name)
47
+ data_frame_type.column_name_map(self.singular_df_name)
25
48
  end
26
49
 
27
50
  def reverse_column_map
28
- data_frame_type.reverse_column_map(self.block_type_name)
51
+ data_frame_type.reverse_column_map(self.singular_df_name)
52
+ end
53
+
54
+ def database
55
+ @database ||= Database.for_types(block: block_type, df: data_frame_type)
29
56
  end
30
57
 
31
58
  def method_missing(name, *args, &block)
@@ -42,7 +69,7 @@ module ActiveDataFrame
42
69
  case range
43
70
  when Range then range
44
71
  when Fixnum then range..range
45
- else raise "Unexpected index #{range}"
72
+ else raise "Unexpected index for data frame proxy #{range}, expecting either a Range or an Integer"
46
73
  end
47
74
  end
48
75
  end
@@ -51,9 +78,6 @@ module ActiveDataFrame
51
78
  0
52
79
  end
53
80
 
54
- def flatten_ranges(ranges)
55
- end
56
-
57
81
  def unmap_ranges(ranges, map)
58
82
  ranges.map do |range|
59
83
  case range
@@ -71,15 +95,17 @@ module ActiveDataFrame
71
95
  from_block_offset = from % block_type::BLOCK_SIZE
72
96
  to_block_index = to / block_type::BLOCK_SIZE
73
97
  to_block_offset = to % block_type::BLOCK_SIZE
74
- return Struct.new(:from, :to, :length, :index).new(
75
- Struct.new(:index, :offset, :position).new(from_block_index, from_block_offset, from),
76
- Struct.new(:index, :offset, :position).new(to_block_index, to_block_offset, to),
98
+ return Bounds.new(
99
+ Point.new(from_block_index, from_block_offset, from),
100
+ Point.new(to_block_index, to_block_offset, to),
77
101
  (to - from) + 1,
78
102
  index
79
103
  )
80
104
  end
81
105
 
82
106
  def self.suppress_logs
107
+ #TODO Make optional
108
+ return yield
83
109
  ActiveRecord::Base.logger, old_logger = nil, ActiveRecord::Base.logger
84
110
  yield.tap do
85
111
  ActiveRecord::Base.logger = old_logger
@@ -101,11 +127,15 @@ module ActiveDataFrame
101
127
  end
102
128
  end
103
129
 
130
+ def match_range(from, to)
131
+ from == to ? from : from..to
132
+ end
133
+
104
134
  def blocks_between(bounds, block_scope: scope)
105
135
  bounds[1..-1].reduce(
106
- block_scope.where( block_type.table_name => { period_index: (bounds[0].from.index..bounds[0].to.index)})
136
+ block_scope.where( block_type.table_name => { period_index: match_range(bounds[0].from.index,bounds[0].to.index)})
107
137
  ) do | or_chain, bound|
108
- or_chain.or(block_scope.where( block_type.table_name => { period_index: (bound.from.index..bound.to.index)}))
138
+ or_chain.or(block_scope.where( block_type.table_name => { period_index: match_range(bound.from.index,bound.to.index)}))
109
139
  end
110
140
  end
111
141
  end
@@ -0,0 +1,115 @@
1
+ module ActiveDataFrame
2
+ class Database
3
+
4
+ def self.batching
5
+ !!Thread.current[:active_data_frame_batching]
6
+ end
7
+
8
+ def self.batching=(value)
9
+ Thread.current[:active_data_frame_batching] = !!value
10
+ end
11
+
12
+ # Not thread safe!
13
+ def self.execute(sql)
14
+ if ActiveDataFrame::Database.batching
15
+ Thread.current[:batch] << sql << ?;
16
+ else
17
+ ActiveRecord::Base.transaction do
18
+ ActiveRecord::Base.connection.execute sql
19
+ end
20
+ end
21
+ end
22
+
23
+ def self.flush!
24
+ execute(Thread.current[:batch])
25
+ Thread.current[:batch] = ''
26
+ end
27
+
28
+ def self.for_types(block:, df:)
29
+ (@@configs ||= {})[[block, df]] ||= Database.new(block, df)
30
+ end
31
+
32
+ attr_reader :block_type, :data_frame_type
33
+
34
+ def initialize(block_type, data_frame_type)
35
+ @block_type = block_type
36
+ @data_frame_type = data_frame_type
37
+ end
38
+
39
+ def self.batch
40
+ self.batching, prev_batch = true, self.batching
41
+ Thread.current[:batch] ||= ''
42
+ ActiveRecord::Base.transaction do
43
+ yield
44
+ end
45
+ ensure
46
+ self.batching = prev_batch
47
+ flush! unless self.batching
48
+ end
49
+ ##
50
+ # Update block data for all blocks in a single call
51
+ ##
52
+ def bulk_update(existing)
53
+ case ActiveRecord::Base.connection_config[:adapter]
54
+ when 'postgresql'.freeze
55
+ # Fast bulk update
56
+ updates = ''
57
+ existing.each do |period_index, (values, df_id)|
58
+ updates << "(#{df_id}, #{period_index}, #{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}),"
59
+ end
60
+ perform_update(updates)
61
+ else
62
+ ids = existing.map {|_, (_, id)| id}
63
+ updates = block_type::COLUMNS.map.with_index do |column, column_idx|
64
+ [column, "CASE period_index\n#{existing.map{|period_index, (values, _)| "WHEN #{period_index} then #{values[column_idx]}"}.join("\n")} \nEND\n"]
65
+ end.to_h
66
+ update_statement = updates.map{|cl, up| "#{cl} = #{up}" }.join(', ')
67
+ Database.execute("UPDATE #{block_type.table_name} SET #{update_statement} WHERE
68
+ #{block_type.table_name}.data_frame_id IN (#{ids.join(',')})
69
+ AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
70
+ AND #{block_type.table_name}.period_index IN (#{existing.keys.join(', ')});
71
+ "
72
+ )
73
+ end
74
+ end
75
+
76
+ ##
77
+ # Insert block data for all blocks in a single call
78
+ ##
79
+ def bulk_insert(new_blocks, instance)
80
+ inserts = ''
81
+ new_blocks.each do |period_index, (values)|
82
+ inserts << \
83
+ case ActiveRecord::Base.connection_config[:adapter]
84
+ when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
85
+ else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
86
+ end
87
+ end
88
+ perform_insert(inserts)
89
+ end
90
+
91
+ def bulk_delete(blocks)
92
+ binding.pry
93
+ end
94
+
95
+ def perform_update(updates)
96
+ Database.execute(
97
+ <<-SQL
98
+ UPDATE #{block_type.table_name}
99
+ SET #{block_type::COLUMNS.map{|col| "#{col} = t.#{col}" }.join(", ")}
100
+ FROM(
101
+ VALUES #{updates[0..-2]}) as t(data_frame_id, period_index, #{block_type::COLUMNS.join(',')})
102
+ WHERE #{block_type.table_name}.data_frame_id = t.data_frame_id
103
+ AND #{block_type.table_name}.period_index = t.period_index
104
+ AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
105
+ SQL
106
+ )
107
+ true
108
+ end
109
+
110
+ def perform_insert(inserts)
111
+ sql = "INSERT INTO #{block_type.table_name} (#{block_type::COLUMNS.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
112
+ Database.execute sql
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,40 @@
1
+ module ActiveDataFrame
2
+ class GroupProxy
3
+ attr_accessor :groups
4
+ def initialize(groups)
5
+ self.groups = groups
6
+ end
7
+
8
+ def min(column_name)
9
+ aggregate('minimum', column_name)
10
+ end
11
+
12
+ def max(column_name)
13
+ aggregate('maximum', column_name)
14
+ end
15
+
16
+ def sum(column_name)
17
+ aggregate('sum', column_name)
18
+ end
19
+
20
+ def average(column_name)
21
+ aggregate('average', column_name)
22
+ end
23
+
24
+ def count
25
+ aggregate('count')
26
+ end
27
+
28
+ private
29
+ def aggregate *agg
30
+ counts = self.groups.send(*agg)
31
+ grouped = {}
32
+ counts.each do |keys, value|
33
+ keys = Array(keys)
34
+ child = keys[0..-2].reduce(grouped){|parent, key| parent[key] ||= {}}
35
+ child[keys[-1]] = value
36
+ end
37
+ grouped
38
+ end
39
+ end
40
+ end
@@ -1,116 +1,170 @@
1
- require 'active_support/concern'
2
-
3
-
4
1
  module ActiveDataFrame
5
- class GroupProxy
6
- attr_accessor :groups
7
- def initialize(groups)
8
- self.groups = groups
9
- end
10
-
11
- def min(column_name)
12
- aggregate('minimum', column_name)
13
- end
2
+ #
3
+ # Modules can include HasDataFrame('frame_name', FrameBlockType) to gain data frame capabilities
4
+ # This method will expose class level and row (Active Record instance) level accessors to the underlying data frame.
5
+ #
6
+ # E.g.
7
+ #
8
+ # module HasBar
9
+ # include HasDataFrame('bars', BarBlock)
10
+ # end
11
+ #
12
+ # class Foo
13
+ # include HasBar
14
+ # end
15
+ #
16
+ # # Select all bars from index 0 to 40, for all foos
17
+ # Foo.bars[0..40]
18
+ #
19
+ # Select all bars from index 0 to 40, for foo with id: 1
20
+ # Foo.find(1).bars[0..40]
21
+ #
22
+ # # Find the average bar size for Foo 1 from index 5 to 30
23
+ # Foo.find(1).bars[5..30].avg
24
+ #
25
+ # Find the average bar size for the first 10 foos from index 13..43
26
+ # Foo.limit(10).bars.avg[13..43]
27
+ #
28
+ # Find the sum size for all foos wher baz == boo from index 13..43
29
+ # Foo.where(baz: :boo).bars.sum[13..43]
30
+ #
31
+ def self.HasDataFrame(singular_table_name, block_type, table_name: singular_table_name, value_map: nil, &block)
32
+ Module.new do
33
+ define_singleton_method(:included) do |base|
34
+ # If somebody includes our dataframe enabled module we execute the following
35
+ base.define_singleton_method(:included) do |decorated|
36
+ block[decorated] if block
37
+ decorated.extend(base::ClassMethods) if defined?(base::ClassMethods)
14
38
 
15
- def max(column_name)
16
- aggregate('maximum', column_name)
17
- end
39
+ # add our class level methods
40
+ decorated.extend(
41
+ ActiveDataFrame.build_module_class_methods(singular_table_name, block_type, table_name: table_name, value_map: value_map)
42
+ )
18
43
 
19
- def sum(column_name)
20
- aggregate('sum', column_name)
21
- end
44
+ # Add our instance level methods
45
+ decorated.class_eval do
22
46
 
23
- def average(column_name)
24
- aggregate('average', column_name)
25
- end
47
+ if value_map
48
+ decorated.const_set(singular_table_name.underscore.camelize, ActiveDataFrame.build_dot_accessible_hash(value_map))
49
+ end
26
50
 
27
- def count
28
- aggregate('count')
29
- end
51
+ # Provide memoised reference to DF row
52
+ define_method singular_table_name do
53
+ (@data_frame_proxies ||= {})[singular_table_name] ||= Row.new(
54
+ block_type,
55
+ self.class,
56
+ self,
57
+ value_map: value_map,
58
+ singular_df_name: singular_table_name,
59
+ plural_df_name: table_name
60
+ )
61
+ end
30
62
 
31
- private
32
- def aggregate *agg
33
- counts = self.groups.send(*agg)
34
- grouped = {}
35
- counts.each do |keys, value|
36
- keys = Array(keys)
37
- child = keys[0..-2].reduce(grouped){|parent, key| parent[key] ||= {}}
38
- child[keys[-1]] = value
63
+ # We provide our own inspect implementation which will include in the output
64
+ # selected dataframe attributes that do not reside on the parent table
65
+ define_method :inspect do
66
+ inspection = "not initialized"
67
+ if defined?(@attributes) && @attributes
68
+ inspection = @attributes.keys.collect { |name|
69
+ if has_attribute?(name)
70
+ "#{name}: #{attribute_for_inspect(name)}"
71
+ end
72
+ }.compact.join(", ")
73
+ end
74
+ "<#{self.class} #{inspection}>"
75
+ end
76
+ end
39
77
  end
40
- grouped
41
78
  end
79
+ end
42
80
  end
43
81
 
44
- def self.HasDataFrame(singular_table_name, table_name, block_type)
45
- to_inject = Module.new
46
- to_inject.extend ActiveSupport::Concern
47
- to_inject.included do
48
- define_method(singular_table_name){
49
- @data_frame_proxies ||= {}
50
- @data_frame_proxies[singular_table_name] ||= Row.new(block_type, self.class, self)
51
- }
52
-
53
- define_method(:inspect){
54
- inspection = "not initialized"
55
- if defined?(@attributes) && @attributes
56
- inspection = @attributes.keys.collect { |name|
57
- if has_attribute?(name)
58
- "#{name}: #{attribute_for_inspect(name)}"
59
- end
60
- }.compact.join(", ")
82
+ #
83
+ # Define methods on our hash to easily access any values that are indexed by a symbol key
84
+ # and that do not clash with existing methods on the Hash
85
+ #
86
+ def self.build_dot_accessible_hash(hash)
87
+ hash.dup.tap do |map|
88
+ map.each do |key, value|
89
+ if(key.kind_of?(Symbol) && !hash.respond_to?(key))
90
+ map.define_singleton_method(key){value}
61
91
  end
62
- "<#{self.class} #{inspection}>"
63
- }
92
+ end
64
93
  end
94
+ end
65
95
 
66
- to_inject.class_methods do
67
- define_method(:df_column_names){
68
- @@column_names ||= {}
69
- }
96
+ #
97
+ # The class methods that are defined on any class the includes our dataframe enabled module
98
+ #
99
+ def self.build_module_class_methods(singular_table_name, block_type, table_name: singular_table_name, value_map: nil)
100
+ Module.new do
70
101
 
71
- define_method(:df_column_maps){
72
- @@column_maps ||= {}
73
- }
102
+ # The key ADF functionality is exposed here.
103
+ # This defines a new `table_name` accesor on the class which gives you access to a dataframe proxy by the name of `table_name`
104
+ #
105
+ # E.g.
106
+ #
107
+ # class Foo
108
+ # include HasBar
109
+ # end
110
+ #
111
+ # # Select all bars from index 0 to 40, for all foos
112
+ # Foo.bars[0..40]
113
+ #
114
+ # Select all bars from index 0 to 40, for foo with id: 1
115
+ # Foo.find(1).bars[0..40]
116
+ #
117
+ # # Find the average bar size for Foo 1 from index 5 to 30
118
+ # Foo.find(1).bars[5..30].avg
119
+ #
120
+ # Find the average bar size for the first 10 foos from index 13..43
121
+ # Foo.limit(10).bars.avg[13..43]
122
+ #
123
+ # Find the sum size for all foos wher baz == boo from index 13..43
124
+ # Foo.where(baz: :boo).bars.sum[13..43]
125
+ #
126
+ define_method(table_name) do
127
+ Table.new(
128
+ block_type,
129
+ all,
130
+ value_map: value_map,
131
+ singular_df_name: singular_table_name,
132
+ plural_df_name: table_name
133
+ )
134
+ end
74
135
 
75
- define_method(:df_reverse_column_maps){
76
- @@reverse_column_maps ||= {}
77
- }
78
136
 
79
- define_method(:with_groups){|*groups|
80
- GroupProxy.new(group(*groups))
81
- }
137
+ #
138
+ # A class level hash containing optionally defined column names for a data frame.
139
+ # Instead of numeric or dynamic column names, you may explicitly define names for columns using the
140
+ # "#{singular_table_name}_column_names" method.
141
+ #
142
+ # E.g.
143
+ #
144
+ # class Foo
145
+ # include HasStatus
146
+ # status_column_names %i(review_status export_status)
147
+ # end
148
+ #
149
+ # This names
150
+ # column 0 as 'review_status' and
151
+ # column 1 as 'export_status'.
152
+ # Now you can make queries like:
153
+ # * Foo.status.review_status
154
+ # * Foo.first.status.export_status
155
+ # * Foo.status[:review_status..:export_status]
156
+ # * Foo.status[43] # You can still use numeric column indices
157
+ #
158
+ define_method :df_column_names do
159
+ @@column_names ||= {}
160
+ end
82
161
 
83
- define_method(table_name){
84
- Table.new(block_type, all)
85
- }
86
-
87
- define_method("include_#{table_name}"){|*dimensions, unmap: true|
88
- scope = self.all
89
- blocks_for_tables = scope.instance_eval{ @blocks_for_tables ||= {} }
90
- included_blocks = blocks_for_tables[singular_table_name] ||= {}
91
- dimensions.flatten.each do |key|
92
- if unmap && column_map(singular_table_name)
93
- idx = column_map(singular_table_name)[key]
94
- else
95
- idx = key
96
- key = "t#{key}"
97
- end
98
- block_index = idx / block_type::BLOCK_SIZE
99
- block_offset = (idx % block_type::BLOCK_SIZE).succ
100
- included_blocks[block_index] ||= []
101
- included_blocks[block_index] << {name: key, idx: block_offset}
102
- end
103
- query = "(SELECT * FROM #{self.table_name} " + blocks_for_tables.reduce('') do |aggregate, (table_name, included_blocks)|
104
- aggregate +
105
- included_blocks.reduce('') do |aggregate, (block_idx, blocks)|
106
- blocks_table_name = "#{table_name}_blocks"
107
- aggregate + " LEFT JOIN(SELECT #{blocks_table_name}.data_frame_type, #{blocks_table_name}.data_frame_id, " + blocks.map{|block| "#{blocks_table_name}.t#{block[:idx]} as \"#{block[:name]}\""}.join(', ') + " FROM #{table_name}_blocks "+
108
- " WHERE #{blocks_table_name}.period_index = #{block_idx}"+") b#{table_name}#{block_idx} ON b#{table_name}#{block_idx}.data_frame_type = '#{self.name}' AND b#{table_name}#{block_idx}.data_frame_id = #{self.table_name}.id"
109
- end
110
- end + ") as #{self.table_name}"
111
- scope.from(query)
162
+ # The class level accessor
163
+ define_method(:column_name_map){|for_table|
164
+ df_column_names[for_table][self] if defined? df_column_names[for_table] rescue nil
112
165
  }
113
166
 
167
+ # The attribute writer
114
168
  define_method("#{singular_table_name}_column_names") do |names|
115
169
  df_column_names[singular_table_name] ||= {}
116
170
  df_column_maps[singular_table_name] ||= {}
@@ -118,35 +172,172 @@ module ActiveDataFrame
118
172
  df_column_maps[singular_table_name][self] = names.map.with_index.to_h
119
173
  end
120
174
 
175
+
176
+ #
177
+ # A class level hash containing optionally defined column maps (these are usually simply a hash that responds to #[](column_name) and returns
178
+ # a positive integer representing the corresponding column index.
179
+ # These are defined using the
180
+ # "#{singular_table_name}_column_maps" method.
181
+ #
182
+ # class Foo
183
+ # include HasCpuTemp
184
+ # cpu_temp_column_map Hash.new{ |columns, time|
185
+ # columns[time] = time.to_i # We store cpu temperatures at a 1 second granularity
186
+ # }
187
+ # end
188
+ #
189
+ define_method :df_column_maps do
190
+ @@column_maps ||= {}
191
+ end
192
+
193
+ # The attribute writer
121
194
  define_method("#{singular_table_name}_column_map") do |column_map|
122
195
  df_column_names[singular_table_name] = nil
123
196
  df_column_maps[singular_table_name] ||= {}
124
197
  df_column_maps[singular_table_name][self] = column_map
125
198
  end
126
199
 
200
+ # The class level accessor
201
+ define_method(:column_map){|for_table|
202
+ df_column_maps[for_table][self] if defined? df_column_maps[for_table] rescue nil
203
+ }
204
+
205
+ #
206
+ # A class level has containing optionally defined reverse column mappings (from a positive integer to a mapped column index/key)
207
+ # This is only used for functions where we query indices based on values.
208
+ # E.g
209
+ #
210
+ # class Foo
211
+ # include HasPrice
212
+ # column_map Hash.new{|columns, date|
213
+ # columns[date] = (date - Date.new(1970)).to_i
214
+ # }
215
+ # reverse_column_map{|columns, index|
216
+ # columns[index] = Date.new(1970) + index.month
217
+ # }
218
+ # end
219
+ #
220
+ # # Show all dates between 2000 and 2010 where the total of all prices is > $500
221
+ # Foo.prices.idx_where_sum_gte(Date.new(2000)...Date.new(2010), 500)
222
+ #
223
+ define_method :df_reverse_column_maps do
224
+ @@reverse_column_maps ||= {}
225
+ end
226
+
227
+ # The attribute writer
127
228
  define_method("#{singular_table_name}_reverse_column_map"){|reverse_column_map|
128
229
  df_reverse_column_maps[singular_table_name] ||= {}
129
230
  df_reverse_column_maps[singular_table_name][self] = reverse_column_map
130
231
  }
131
232
 
132
- define_method(:include_data_blocks){|table_name, *args|
133
- send("include_#{table_name}", *args)
233
+ # The class level accessor
234
+ define_method(:reverse_column_map){|for_table|
235
+ df_reverse_column_maps[for_table] ||= {}
236
+ df_reverse_column_maps[for_table][self] ||= column_map(for_table).invert if column_map(for_table)
134
237
  }
135
238
 
136
- define_method(:column_map){|table_name|
137
- df_column_maps[table_name][self] if defined? df_column_maps[table_name] rescue nil
138
- }
239
+ #
240
+ # See group_proxy.rb.
241
+ # This makes a number of grouping/bucketing queries easier to express
242
+ # for analytics across an entire table
243
+ #
244
+ define_method(:with_groups) do |*groups|
245
+ GroupProxy.new(group(*groups))
246
+ end
139
247
 
140
- define_method(:column_name_map){|table_name|
141
- df_column_names[table_name][self] if defined? df_column_names[table_name]
142
- }
248
+ #
249
+ # If you use the include_#{table_name} function before executing any queries, you can
250
+ # join the child AR rows with any number of columns and treat them as if they were all part of the same table.
251
+ # These joined columns can be used to further refine your queries, perform groupings, counts .etc
252
+ #
253
+ # E.g.
254
+ #
255
+ # class Iris
256
+ # include HasDimension
257
+ # dimension_column_names %i(sepal_length sepal_width petal_length petal_width)
258
+ # end
259
+ #
260
+ # Iris.where('sepal_length > ?', 4) # Error! (There is no column called sepal_length on the iris table)
261
+ # Iris.include_dimensions(:sepal_length).where('sepal_length > ?', 4) # Works fine
262
+ # Iris.include_dimension(:sepal_length, :petal_width).where('sepal_length > 3').select(:petal_width)
263
+ # Iris.include_dimension(:sepal_length, :petal_width).with_groups('ROUND(sepal_length)').average('petal_width')
264
+ # {
265
+ # "4.0":"0.2"
266
+ # "5.0":"0.397872340425532",
267
+ # "6.0":"1.49705882352941",
268
+ # "7.0":"1.89583333333333",
269
+ # "8.0":"2.15",
270
+ # }
271
+ #
272
+ # In cases where column names are not predefined or use a mapper you can provide a hash to give alternate column names for the query
273
+ #
274
+ # class BuildingType < ApplicationRecord
275
+ # include HasBuildingConsent
276
+ # consents_column_map Hash.new{|hash, time, as_date = time.to_date|
277
+ # (as_date.year - 1970) * 12 + as_date.month
278
+ # }
279
+ # end
280
+ #
281
+ # # In this example BuildingType.consents accepts dynamic column indices (anything that responds to to_date)
282
+ # # We can give these columns explicit names so we can refer to them in queries.
283
+ # E.g
284
+ #
285
+ # BuildingType.include_consents({'1994-04-01' => april_94, '1994-05-01' => may_94}).where('april_94 + may_94 < 300')
286
+ # => [
287
+ # <BuildingType id: 2, name: "Hostels_boarding", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 2, april_94: 11, may_94: 5>,
288
+ # <BuildingType id: 3, name: "Hotels", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 3, april_94: 33, may_94: 34>,
289
+ # <BuildingType id: 4, name: "Hospitals", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 4, april_94: 32, may_94: 37>,
290
+ # <BuildingType id: 5, name: "Education", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 5, april_94: 88, may_94: 145>,
291
+ # <BuildingType id: 6, name: "Social_cultural_religious", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 6, april_94: 82, may_94: 102>,
292
+ # <BuildingType id: 9, name: "Storage", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 9, april_94: 29, may_94: 52>,
293
+ # <BuildingType id: 12, name: "Misc", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 12, april_94: 33, may_94: 39>]
294
+ # ]
295
+ #
296
+ #
297
+ define_method("include_#{table_name}"){|*dimensions, unmap: true, scope: self.all, as: false|
298
+ dim1 = dimensions[0]
299
+ case dim1
300
+ when Hash
301
+ dimension_map, dimensions = dim1, dim1.keys
302
+ when Range
303
+ exclude_end = dim1.exclude_end?
304
+
305
+ from, to = if unmap && column_map(singular_table_name)
306
+ unmap = false
307
+ [column_map(singular_table_name)[dim1.begin],column_map(singular_table_name)[dim1.end]]
308
+ else
309
+ [dim1.begin, dim1.end]
310
+ end
311
+ dimensions = (exclude_end ? (from...to) : (from..to)).to_a
312
+ end
313
+
314
+ blocks_for_tables = scope.instance_eval{ @blocks_for_tables ||= {} }
315
+ included_blocks = blocks_for_tables[block_type.table_name] ||= {}
143
316
 
144
- define_method(:reverse_column_map){|table_name|
145
- df_reverse_column_maps[table_name] ||= {}
146
- df_reverse_column_maps[table_name][self] ||= column_map(table_name).invert if column_map(table_name)
317
+ dimensions.flatten.each.with_index(1) do |key, i|
318
+ if unmap && column_map(singular_table_name)
319
+ idx = column_map(singular_table_name)[key]
320
+ key = dimension_map[key] if dimension_map
321
+ else
322
+ idx = key
323
+ key = "t#{key}"
324
+ end
325
+ key = "#{as}#{i}" if as
326
+ block_index = idx / block_type::BLOCK_SIZE
327
+ block_offset = (idx % block_type::BLOCK_SIZE).succ
328
+ included_blocks[block_index] ||= []
329
+ included_blocks[block_index] << {name: key, idx: block_offset}
330
+ end
331
+ query = "(SELECT * FROM #{self.table_name} " + blocks_for_tables.reduce('') do |aggregate, (for_table, blocks_for_table)|
332
+ aggregate +
333
+ blocks_for_table.reduce('') do |blocks_aggregate, (block_idx, blocks)|
334
+ blocks_table_name = for_table
335
+ blocks_aggregate + " LEFT JOIN(SELECT #{blocks_table_name}.data_frame_type, #{blocks_table_name}.data_frame_id, " + blocks.map{|block| "#{blocks_table_name}.t#{block[:idx]} as \"#{block[:name]}\""}.join(', ') + " FROM #{blocks_table_name} "+
336
+ " WHERE #{blocks_table_name}.period_index = #{block_idx}"+") b#{for_table}#{block_idx} ON b#{for_table}#{block_idx}.data_frame_type = '#{self.name}' AND b#{for_table}#{block_idx}.data_frame_id = #{self.table_name}.id"
337
+ end
338
+ end + ") as #{self.table_name}"
339
+ scope.from(query)
147
340
  }
148
341
  end
149
-
150
- return to_inject
151
342
  end
152
343
  end