RubyGems - active_data_frame - Versions diffs - 0.1.1 → 0.1.2 - Mend

active_data_frame 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/Rakefile +9 -1
data/active_data_frame-0.1.1.gem +0 -0
data/active_data_frame.gemspec +5 -1
data/active_data_frame.todo +13 -36
data/examples.rb +46 -0
data/lib/active_data_frame.rb +2 -0
data/lib/active_data_frame/bounds.rb +4 -0
data/lib/active_data_frame/data_frame_proxy.rb +48 -18
data/lib/active_data_frame/database.rb +115 -0
data/lib/active_data_frame/group_proxy.rb +40 -0
data/lib/active_data_frame/has_data_frame.rb +298 -107
data/lib/active_data_frame/point.rb +4 -0
data/lib/active_data_frame/row.rb +22 -68
data/lib/active_data_frame/table.rb +13 -14
data/lib/active_data_frame/version.rb +1 -1
data/lib/generators/active_data_frame/install_generator.rb +13 -5
data/lib/generators/active_data_frame/templates/has_concern.rb +1 -4
data/lib/generators/active_data_frame/templates/migration.rb +1 -3
metadata +72 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 5c1042e5d6a9e65c386a0dc5353e6fcc3e065a84
-  data.tar.gz: 13ee5f0520a97c563dc5bfedb4408010464b2e5f
+  metadata.gz: 5c368f1ed1f3fc78c0e9f81b0d2bd7cc9f50141a
+  data.tar.gz: 80fa0cfdeed12b5b41d7556ec9c019670827e934
 SHA512:
-  metadata.gz: 7e9f1118a5c18a0aed0bc933ec2e9bfc7d443412e762da9cee9707bdf4084922e9d2904f3668088ff2d3866bcebf9a90b0b8c9e21c90e023c9fb2ca19d1c57c3
-  data.tar.gz: f957ad5532cfcd4a5d635d278a0a5ee6de2c4b6179e5e82f785f6aa9961ae5ea64846d3c138816c1d2e5b2443dc766bcfe7576e2403c7d21fcf77b8e37315b6a
+  metadata.gz: b2cc97b56fe384be682c9631a06c108b2524434230df5f4ac4949300339fadea0dcbca0f1efb9822bd04c3a43a7ae2374a3dbad02706793cfc5f8fa42600920b
+  data.tar.gz: 7deccde31e9d8a99b31831d2af96227cdf9d087297321b531c79b4327b9bb63f38e0fa026869a94ffb4d5ea3cb5e9e61c5805328cf7bb8248b26e54f95f7fc40

data/Rakefile CHANGED Viewed

@@ -1,2 +1,10 @@
 require "bundler/gem_tasks"
-task :default => :spec
+require "rake/testtask"
+Rake::TestTask.new(:test) do |t|
+  t.libs << "test"
+  t.libs << "lib"
+  t.test_files = FileList['test/**/*_test.rb']
+end
+task :default => :test

data/active_data_frame-0.1.1.gem ADDED Viewed

Binary file

data/active_data_frame.gemspec CHANGED Viewed

@@ -23,6 +23,10 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "rake", "~> 10.0"
   spec.add_development_dependency "pry-byebug", "~> 3.4.0", '>= 3.4.0'
   spec.add_development_dependency 'pry', '~> 0.10.2', '>= 0.10.0'
-  spec.add_runtime_dependency     'activerecord', '~> 5.0.0'
+  spec.add_development_dependency 'pg'
+  spec.add_development_dependency 'minitest', '~>5.11'
+  spec.add_development_dependency 'minitest-reporters', '~> 1.1', '>= 1.1.0'
+  spec.add_development_dependency 'minitest-around', '0.4.1'
+  spec.add_runtime_dependency     'activerecord', '~> 5.0'
   spec.add_runtime_dependency     'rmatrix', '~> 0.1.10', '>=0.1.10'
 end

data/active_data_frame.todo CHANGED Viewed

@@ -1,37 +1,9 @@
-Refactor:
- ✔ Is Engine neccessary? @done (17-03-31 08:20)
- ✔ Add Typecode expectations @done (17-04-06 08:16)
- ☐ Add enum capabilities
- ☐ Better errors when using bad indices in RMatrix
- ☐ Better printing in RMatrix
- ☐ Refactor + Tidy
- ☐ Tests
- ☐ Experiment with MonetDB speed
- ☐ Check support for different numeric/string/bool.etc types
- ✔ Experiment with single precision @done (17-03-31 08:18)
-ActiveRecordMonetDBAdapter:
-   ☐ Work on support for MonetDB
-ActiveDataFrame:
-   ✔ Refactor grouping/summing code @done (17-03-31 08:20)
-   ✔ Allow includes to combine frames @done (17-03-27 10:36)
-   ✔ Performance test on ICP data @done (17-03-27 08:41)
-   ✔ Alternate RDBMS support (SQLLite, MySQL) @done (17-03-27 09:58)
- Utilities:
-   ☐ KMeans clustering and DBScan built in to multi-d array
- Later:
-   ☐ Build generic Merge/Cache structure which will either cache infinite columns or rows
-   - class Unit
-       - df_cache :all_loads, ::loads,  direction: :row
-   - end
-Ruby dataframe library inspiration:
-   - Integration with Nyaplot
-   - Integration with Statsample
+Priorities:
+  ☐ Ensure delete/clear works
+  ☐ rmatrix tests
+  ☐ Update README.md
+  ☐ Use MMAP of status/enums
+  ☐ Support SQLite + MySQL, MonetDB
  ✔ Generator creates A migration and data_frame and block classes. Block/DataFrame classes have a type, a period unit and a period length @done (17-01-12 10:29)
  ✔ Type is: @done (17-01-12 10:29)
    ✔ Bit @done (17-01-12 10:29)
@@ -79,5 +51,10 @@ Ruby dataframe library inspiration:
 ✔ Finish RMatrix @done (17-03-02 09:01)
 RMatrix:
-   ✔ Ensure assignment works @done (17-03-21 09:56)
-   ✘ Raw is simply a copy of self without mappings @cancelled (17-03-21 09:56)
+   ✔ Ensure assignment works @done (18-04-03 18:58)
+   ✔ Raw is simply a copy of self without mappings @done (18-04-03 18:58)
+ActiveDataFrame:
+    ✔ dimensions_minimum @done (18-04-03 18:58)
+    ✔ dimensions_maximum @done (18-04-03 18:58)
+    ✔ dimensions_sum @done (18-04-03 18:58)
+    ✔ dimensions_average @done (18-04-03 18:58)

data/examples.rb ADDED Viewed

@@ -0,0 +1,46 @@
+# Get times of day where there was a price spike in wellington
+Icp.where(region: :wellington).loads.idx_where_sum_gte(Time.now..1.day.from_now, 12_000)
+# Get current load for all Icps, grouped by :region, :customer_class, :tariff
+Icp.include_loads(Time.now).with_groups(:region, :customer_class, :tariff).sum("\"#{Time.now}\"")
+# Get next days aggregate usage for Auckland residential customers
+Icp.where(region: :auckland, customer_class: :residential).loads.sum(Time.now..1.day.from_now)
+# Get a years worth of load for a single ICP
+Icp.first.load[Time.now..1.year.from_now]
+# Get a days worth of load for many ICPs
+Icp.where(tariff: :un).loads[Time.now..1.day.from_now]
+# Get a average load over a day  load for many ICPs
+Icp.where(tariff: :un).loads.avg(Time.now..1.day.from_now)
+# Count icps which have more than 5.5kw of load at this point in time
+Icp.include_loads(Time.now).where("\"%s\" > ?" % Time.now, 5.5).count
+# See the largest spepal length seen for each speacies
+Iris.with_groups(:species).max(:sepal_length)
+# Get individual iris sepal_length
+Iris.first.dimension.sepal_length
+# Get multiple dimensions for individual iris
+Iris.first.dimension[:sepal_length, :petal_width]
+# Get range of dimensions for individual iris
+Iris.first.dimension[:sepal_length..:petal_width]
+# Get range of dimensions for all iris versicolors
+dimensions = Iris.where(species: :versicolor).dimensions[:sepal_length..:petal_width]
+# Chop data as needed
+sepal_lengths = dimensions.sepal_length
+sepal_lengths_petal_widths = dimensions[[:sepal_length, :petal_width]]
+selected_iris = dimensions[Iris.where(species: :versicolor).first(5)]
+# Look at RMatrix API for matrix functionality
+#

data/lib/active_data_frame.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 require 'active_data_frame/data_frame_proxy'
+require 'active_data_frame/group_proxy'
 require 'active_data_frame/table'
 require 'active_data_frame/row'
 require 'active_data_frame/has_data_frame'
+require 'active_data_frame/database'
 require 'rmatrix'

data/lib/active_data_frame/bounds.rb ADDED Viewed

@@ -0,0 +1,4 @@
+module ActiveDataFrame
+  class Bounds < Struct.new(:from, :to, :length, :index)
+  end
+end

data/lib/active_data_frame/data_frame_proxy.rb CHANGED Viewed

@@ -1,31 +1,58 @@
 module ActiveDataFrame
+  require_relative 'point'
+  require_relative 'bounds'
   class DataFrameProxy
-    attr_accessor :block_type, :data_frame_type, :block_type_name
-    def initialize(block_type, data_frame_type)
-      self.block_type      = block_type
-      self.data_frame_type = data_frame_type
-      self.block_type_name = block_type.table_name.gsub(/_blocks$/,'').gsub(/^blocks_/,'')
+    attr_accessor :block_type, :data_frame_type, :block_type_name, :value_map, :singular_df_name, :plural_df_name
+    def initialize(block_type, data_frame_type, value_map: nil, singular_df_name: '', plural_df_name: '')
+      self.block_type       = block_type
+      self.data_frame_type  = data_frame_type
+      self.block_type_name  = block_type.table_name.gsub(/_blocks$/,'').gsub(/^blocks_/,'')
+      self.value_map        = value_map
+      self.singular_df_name = singular_df_name
+      self.plural_df_name   = plural_df_name
+    end
+    def reverse_value_map
+      @reverse_value_map ||= value_map.invert
     end
     def [](*ranges)
-      get(extract_ranges(ranges))
+      result = get(extract_ranges(ranges))
+      if @value_map
+        # TODO Multi-dimensions #map would be nice
+        result.to_a.map{|row| row.kind_of?(Array) ? row.map(&reverse_value_map.method(:[])) : reverse_value_map[row]}
+      else
+        result
+      end
     end
     def []=(from, values)
+      values = Array(values).flatten.map(&@value_map.method(:[])) if @value_map
       from = column_map[from] if column_map && column_map[from]
       set(from, M[values, typecode: block_type::TYPECODE].to_a.flatten)
     end
+    def clear(*ranges)
+      clear(ex)
+    end
     def column_map
-      data_frame_type.column_map(self.block_type_name)
+      data_frame_type.column_map(self.singular_df_name)
     end
     def column_name_map
-      data_frame_type.column_name_map(self.block_type_name)
+      data_frame_type.column_name_map(self.singular_df_name)
     end
     def reverse_column_map
-      data_frame_type.reverse_column_map(self.block_type_name)
+      data_frame_type.reverse_column_map(self.singular_df_name)
+    end
+    def database
+      @database ||= Database.for_types(block: block_type, df: data_frame_type)
     end
     def method_missing(name, *args, &block)
@@ -42,7 +69,7 @@ module ActiveDataFrame
         case range
         when Range then range
         when Fixnum then range..range
-        else raise "Unexpected index #{range}"
+        else raise "Unexpected index for data frame proxy #{range}, expecting either a Range or an Integer"
         end
       end
     end
@@ -51,9 +78,6 @@ module ActiveDataFrame
       0
     end
-    def flatten_ranges(ranges)
-    end
     def unmap_ranges(ranges, map)
       ranges.map do |range|
         case range
@@ -71,15 +95,17 @@ module ActiveDataFrame
       from_block_offset = from % block_type::BLOCK_SIZE
       to_block_index    = to / block_type::BLOCK_SIZE
       to_block_offset   = to % block_type::BLOCK_SIZE
-      return Struct.new(:from, :to, :length, :index).new(
-        Struct.new(:index, :offset, :position).new(from_block_index, from_block_offset, from),
-        Struct.new(:index, :offset, :position).new(to_block_index,   to_block_offset, to),
+      return Bounds.new(
+        Point.new(from_block_index, from_block_offset, from),
+        Point.new(to_block_index,   to_block_offset, to),
         (to - from) + 1,
         index
       )
     end
     def self.suppress_logs
+      #TODO Make optional
+      return yield
       ActiveRecord::Base.logger, old_logger = nil,  ActiveRecord::Base.logger
       yield.tap do
         ActiveRecord::Base.logger = old_logger
@@ -101,11 +127,15 @@ module ActiveDataFrame
       end
     end
+    def match_range(from, to)
+      from == to ? from : from..to
+    end
     def blocks_between(bounds, block_scope: scope)
       bounds[1..-1].reduce(
-        block_scope.where( block_type.table_name => { period_index: (bounds[0].from.index..bounds[0].to.index)})
+        block_scope.where( block_type.table_name => { period_index: match_range(bounds[0].from.index,bounds[0].to.index)})
       ) do | or_chain, bound|
-        or_chain.or(block_scope.where( block_type.table_name => { period_index: (bound.from.index..bound.to.index)}))
+        or_chain.or(block_scope.where( block_type.table_name => { period_index: match_range(bound.from.index,bound.to.index)}))
       end
     end
   end

data/lib/active_data_frame/database.rb ADDED Viewed

@@ -0,0 +1,115 @@
+module ActiveDataFrame
+  class Database
+    def self.batching
+      !!Thread.current[:active_data_frame_batching]
+    end
+    def self.batching=(value)
+      Thread.current[:active_data_frame_batching] = !!value
+    end
+    # Not thread safe!
+    def self.execute(sql)
+      if ActiveDataFrame::Database.batching
+        Thread.current[:batch] << sql << ?;
+      else
+        ActiveRecord::Base.transaction do
+          ActiveRecord::Base.connection.execute sql
+        end
+      end
+    end
+    def self.flush!
+      execute(Thread.current[:batch])
+      Thread.current[:batch] = ''
+    end
+    def self.for_types(block:, df:)
+      (@@configs ||= {})[[block, df]] ||= Database.new(block, df)
+    end
+    attr_reader :block_type, :data_frame_type
+    def initialize(block_type, data_frame_type)
+      @block_type = block_type
+      @data_frame_type = data_frame_type
+    end
+    def self.batch
+      self.batching, prev_batch = true, self.batching
+      Thread.current[:batch] ||= ''
+      ActiveRecord::Base.transaction do
+        yield
+      end
+    ensure
+      self.batching = prev_batch
+      flush! unless self.batching
+    end
+    ##
+    # Update block data for all blocks in a single call
+    ##
+    def bulk_update(existing)
+      case ActiveRecord::Base.connection_config[:adapter]
+      when 'postgresql'.freeze
+        # Fast bulk update
+        updates = ''
+        existing.each do |period_index, (values, df_id)|
+          updates <<  "(#{df_id}, #{period_index}, #{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}),"
+        end
+        perform_update(updates)
+      else
+        ids = existing.map {|_, (_, id)| id}
+        updates = block_type::COLUMNS.map.with_index do |column, column_idx|
+          [column, "CASE period_index\n#{existing.map{|period_index, (values, _)| "WHEN #{period_index} then #{values[column_idx]}"}.join("\n")} \nEND\n"]
+        end.to_h
+        update_statement = updates.map{|cl, up| "#{cl} = #{up}" }.join(', ')
+        Database.execute("UPDATE #{block_type.table_name} SET #{update_statement} WHERE
+          #{block_type.table_name}.data_frame_id IN (#{ids.join(',')})
+          AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
+          AND #{block_type.table_name}.period_index IN (#{existing.keys.join(', ')});
+          "
+        )
+      end
+    end
+    ##
+    # Insert block data for all blocks in a single call
+    ##
+    def bulk_insert(new_blocks, instance)
+      inserts = ''
+      new_blocks.each do |period_index, (values)|
+        inserts << \
+        case ActiveRecord::Base.connection_config[:adapter]
+        when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
+        else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
+        end
+      end
+      perform_insert(inserts)
+    end
+    def bulk_delete(blocks)
+      binding.pry
+    end
+    def perform_update(updates)
+      Database.execute(
+        <<-SQL
+        UPDATE #{block_type.table_name}
+          SET #{block_type::COLUMNS.map{|col| "#{col} = t.#{col}" }.join(", ")}
+          FROM(
+          VALUES #{updates[0..-2]}) as t(data_frame_id, period_index, #{block_type::COLUMNS.join(',')})
+          WHERE #{block_type.table_name}.data_frame_id = t.data_frame_id
+          AND #{block_type.table_name}.period_index = t.period_index
+          AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
+        SQL
+      )
+      true
+    end
+    def perform_insert(inserts)
+      sql = "INSERT INTO #{block_type.table_name} (#{block_type::COLUMNS.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
+      Database.execute sql
+    end
+  end
+end

data/lib/active_data_frame/group_proxy.rb ADDED Viewed

@@ -0,0 +1,40 @@
+module ActiveDataFrame
+  class GroupProxy
+    attr_accessor :groups
+    def initialize(groups)
+      self.groups = groups
+    end
+    def min(column_name)
+      aggregate('minimum', column_name)
+    end
+    def max(column_name)
+      aggregate('maximum', column_name)
+    end
+    def sum(column_name)
+      aggregate('sum', column_name)
+    end
+    def average(column_name)
+      aggregate('average', column_name)
+    end
+    def count
+      aggregate('count')
+    end
+    private
+      def aggregate *agg
+        counts = self.groups.send(*agg)
+        grouped = {}
+        counts.each do |keys, value|
+          keys = Array(keys)
+          child = keys[0..-2].reduce(grouped){|parent, key| parent[key] ||= {}}
+          child[keys[-1]] = value
+        end
+        grouped
+      end
+  end
+end

data/lib/active_data_frame/has_data_frame.rb CHANGED Viewed

@@ -1,116 +1,170 @@
-require 'active_support/concern'
 module ActiveDataFrame
-  class GroupProxy
-    attr_accessor :groups
-    def initialize(groups)
-      self.groups = groups
-    end
-    def min(column_name)
-      aggregate('minimum', column_name)
-    end
+  #
+  # Modules can include HasDataFrame('frame_name', FrameBlockType) to gain data frame capabilities
+  # This method will expose class level and row (Active Record instance) level accessors to the underlying data frame.
+  #
+   # E.g.
+  #
+  # module HasBar
+  #   include HasDataFrame('bars', BarBlock)
+  # end
+  #
+  # class Foo
+  #   include HasBar
+  # end
+  #
+  # # Select all bars from index 0 to 40, for all foos
+  # Foo.bars[0..40]
+  #
+   # Select all bars from index 0 to 40, for foo with id: 1
+  # Foo.find(1).bars[0..40]
+  #
+  # # Find the average bar size for Foo 1 from index 5 to 30
+  # Foo.find(1).bars[5..30].avg
+  #
+  # Find the average bar size for the first 10 foos from index 13..43
+  # Foo.limit(10).bars.avg[13..43]
+  #
+  # Find the sum size for all foos wher baz == boo from index 13..43
+  # Foo.where(baz: :boo).bars.sum[13..43]
+  #
+  def self.HasDataFrame(singular_table_name,  block_type, table_name: singular_table_name, value_map: nil, &block)
+    Module.new do
+      define_singleton_method(:included) do |base|
+        # If somebody includes our dataframe enabled module we execute the following
+        base.define_singleton_method(:included) do |decorated|
+          block[decorated] if block
+          decorated.extend(base::ClassMethods) if defined?(base::ClassMethods)
-    def max(column_name)
-      aggregate('maximum', column_name)
-    end
+          # add our class level methods
+          decorated.extend(
+            ActiveDataFrame.build_module_class_methods(singular_table_name, block_type, table_name: table_name, value_map: value_map)
+          )
-    def sum(column_name)
-      aggregate('sum', column_name)
-    end
+          # Add our instance level methods
+          decorated.class_eval do
-    def average(column_name)
-      aggregate('average', column_name)
-    end
+            if value_map
+              decorated.const_set(singular_table_name.underscore.camelize, ActiveDataFrame.build_dot_accessible_hash(value_map))
+            end
-    def count
-      aggregate('count')
-    end
+            # Provide memoised reference to DF row
+            define_method singular_table_name do
+              (@data_frame_proxies ||= {})[singular_table_name] ||= Row.new(
+                block_type,
+                self.class,
+                self,
+                value_map: value_map,
+                singular_df_name: singular_table_name,
+                plural_df_name: table_name
+              )
+            end
-    private
-      def aggregate *agg
-        counts = self.groups.send(*agg)
-        grouped = {}
-        counts.each do |keys, value|
-          keys = Array(keys)
-          child = keys[0..-2].reduce(grouped){|parent, key| parent[key] ||= {}}
-          child[keys[-1]] = value
+            # We provide our own inspect implementation which will include in the output
+            # selected dataframe attributes that do not reside on the parent table
+            define_method :inspect do
+              inspection = "not initialized"
+              if defined?(@attributes) && @attributes
+                 inspection = @attributes.keys.collect { |name|
+                   if has_attribute?(name)
+                     "#{name}: #{attribute_for_inspect(name)}"
+                   end
+                 }.compact.join(", ")
+              end
+              "<#{self.class} #{inspection}>"
+            end
+          end
         end
-        grouped
       end
+    end
   end
-  def self.HasDataFrame(singular_table_name, table_name, block_type)
-    to_inject = Module.new
-    to_inject.extend ActiveSupport::Concern
-    to_inject.included do
-      define_method(singular_table_name){
-        @data_frame_proxies ||= {}
-        @data_frame_proxies[singular_table_name] ||= Row.new(block_type, self.class, self)
-      }
-      define_method(:inspect){
-        inspection = "not initialized"
-        if defined?(@attributes) && @attributes
-           inspection = @attributes.keys.collect { |name|
-             if has_attribute?(name)
-               "#{name}: #{attribute_for_inspect(name)}"
-             end
-           }.compact.join(", ")
+  #
+  # Define methods on our hash to easily access any values that are indexed by a symbol key
+  # and that do not clash with existing methods on the Hash
+  #
+  def self.build_dot_accessible_hash(hash)
+    hash.dup.tap do |map|
+      map.each do |key, value|
+        if(key.kind_of?(Symbol) && !hash.respond_to?(key))
+          map.define_singleton_method(key){value}
         end
-        "<#{self.class} #{inspection}>"
-      }
+      end
     end
+  end
-    to_inject.class_methods do
-      define_method(:df_column_names){
-        @@column_names ||= {}
-      }
+  #
+  # The class methods that are defined on any class the includes our dataframe enabled module
+  #
+  def self.build_module_class_methods(singular_table_name,  block_type, table_name: singular_table_name, value_map: nil)
+    Module.new do
-      define_method(:df_column_maps){
-        @@column_maps ||= {}
-      }
+      # The key ADF functionality is exposed here.
+      # This defines a new `table_name` accesor on the class which gives you access to a dataframe proxy by the name of `table_name`
+      #
+      # E.g.
+      #
+      # class Foo
+      #   include HasBar
+      # end
+      #
+      # # Select all bars from index 0 to 40, for all foos
+      # Foo.bars[0..40]
+      #
+       # Select all bars from index 0 to 40, for foo with id: 1
+      # Foo.find(1).bars[0..40]
+      #
+      # # Find the average bar size for Foo 1 from index 5 to 30
+      # Foo.find(1).bars[5..30].avg
+      #
+      # Find the average bar size for the first 10 foos from index 13..43
+      # Foo.limit(10).bars.avg[13..43]
+      #
+      # Find the sum size for all foos wher baz == boo from index 13..43
+      # Foo.where(baz: :boo).bars.sum[13..43]
+      #
+      define_method(table_name) do
+        Table.new(
+          block_type,
+          all,
+          value_map: value_map,
+          singular_df_name: singular_table_name,
+          plural_df_name: table_name
+        )
+      end
-      define_method(:df_reverse_column_maps){
-        @@reverse_column_maps ||= {}
-      }
-      define_method(:with_groups){|*groups|
-        GroupProxy.new(group(*groups))
-      }
+      #
+      # A class level hash containing optionally defined column names for a data frame.
+      # Instead of numeric or dynamic column names, you may explicitly define names for columns using the
+      #  "#{singular_table_name}_column_names" method.
+      #
+      #  E.g.
+      #
+      #  class Foo
+      #    include HasStatus
+      #    status_column_names %i(review_status export_status)
+      #  end
+      #
+      #  This names
+      #   column 0 as 'review_status' and
+      #   column 1 as 'export_status'.
+      #  Now you can make queries like:
+      #  * Foo.status.review_status
+      #  * Foo.first.status.export_status
+      #  * Foo.status[:review_status..:export_status]
+      #  * Foo.status[43] # You can still use numeric column indices
+      #
+      define_method :df_column_names do
+        @@column_names ||= {}
+      end
-      define_method(table_name){
-        Table.new(block_type, all)
-      }
-      define_method("include_#{table_name}"){|*dimensions, unmap: true|
-        scope = self.all
-        blocks_for_tables = scope.instance_eval{ @blocks_for_tables ||= {} }
-        included_blocks = blocks_for_tables[singular_table_name] ||= {}
-        dimensions.flatten.each do |key|
-          if unmap && column_map(singular_table_name)
-            idx = column_map(singular_table_name)[key]
-          else
-            idx = key
-            key = "t#{key}"
-          end
-          block_index  = idx / block_type::BLOCK_SIZE
-          block_offset = (idx % block_type::BLOCK_SIZE).succ
-          included_blocks[block_index] ||= []
-          included_blocks[block_index] << {name: key, idx: block_offset}
-        end
-        query = "(SELECT * FROM #{self.table_name} " + blocks_for_tables.reduce('') do |aggregate, (table_name, included_blocks)|
-        aggregate +
-          included_blocks.reduce('') do |aggregate, (block_idx, blocks)|
-            blocks_table_name = "#{table_name}_blocks"
-            aggregate + " LEFT JOIN(SELECT #{blocks_table_name}.data_frame_type, #{blocks_table_name}.data_frame_id, " + blocks.map{|block| "#{blocks_table_name}.t#{block[:idx]} as \"#{block[:name]}\""}.join(', ') + " FROM #{table_name}_blocks "+
-            " WHERE #{blocks_table_name}.period_index = #{block_idx}"+") b#{table_name}#{block_idx} ON b#{table_name}#{block_idx}.data_frame_type = '#{self.name}' AND b#{table_name}#{block_idx}.data_frame_id = #{self.table_name}.id"
-          end
-        end + ") as #{self.table_name}"
-        scope.from(query)
+      # The class level accessor
+      define_method(:column_name_map){|for_table|
+        df_column_names[for_table][self] if defined? df_column_names[for_table] rescue nil
       }
+      # The attribute writer
       define_method("#{singular_table_name}_column_names") do |names|
         df_column_names[singular_table_name] ||= {}
         df_column_maps[singular_table_name] ||= {}
@@ -118,35 +172,172 @@ module ActiveDataFrame
         df_column_maps[singular_table_name][self] = names.map.with_index.to_h
       end
+      #
+      # A class level hash containing optionally defined column maps (these are usually simply a hash that responds to #[](column_name) and returns
+      # a positive integer representing the corresponding column index.
+      # These are defined using the
+      # "#{singular_table_name}_column_maps" method.
+      #
+      # class Foo
+      #   include HasCpuTemp
+      #   cpu_temp_column_map Hash.new{ |columns, time|
+      #     columns[time] = time.to_i # We store cpu temperatures at a 1 second granularity
+      #   }
+      # end
+      #
+      define_method :df_column_maps do
+        @@column_maps ||= {}
+      end
+      # The attribute writer
       define_method("#{singular_table_name}_column_map") do |column_map|
         df_column_names[singular_table_name] = nil
         df_column_maps[singular_table_name] ||= {}
         df_column_maps[singular_table_name][self] = column_map
       end
+      # The class level accessor
+      define_method(:column_map){|for_table|
+        df_column_maps[for_table][self] if defined? df_column_maps[for_table] rescue nil
+      }
+      #
+      # A class level has containing optionally defined reverse column mappings (from a positive integer to a mapped column index/key)
+      # This is only used for functions where we query indices based on values.
+      # E.g
+      #
+      # class Foo
+      #   include HasPrice
+      #   column_map Hash.new{|columns, date|
+      #    columns[date] = (date - Date.new(1970)).to_i
+      #   }
+      #   reverse_column_map{|columns, index|
+      #     columns[index] = Date.new(1970) + index.month
+      #   }
+      # end
+      #
+      # # Show all dates between 2000 and 2010 where the total of all prices is > $500
+      # Foo.prices.idx_where_sum_gte(Date.new(2000)...Date.new(2010), 500)
+      #
+      define_method :df_reverse_column_maps do
+        @@reverse_column_maps ||= {}
+      end
+      # The attribute writer
       define_method("#{singular_table_name}_reverse_column_map"){|reverse_column_map|
         df_reverse_column_maps[singular_table_name] ||= {}
         df_reverse_column_maps[singular_table_name][self] = reverse_column_map
       }
-      define_method(:include_data_blocks){|table_name, *args|
-        send("include_#{table_name}", *args)
+      # The class level accessor
+      define_method(:reverse_column_map){|for_table|
+        df_reverse_column_maps[for_table] ||= {}
+        df_reverse_column_maps[for_table][self] ||= column_map(for_table).invert if column_map(for_table)
       }
-      define_method(:column_map){|table_name|
-        df_column_maps[table_name][self] if defined? df_column_maps[table_name] rescue nil
-      }
+      #
+      # See group_proxy.rb.
+      # This makes a number of grouping/bucketing queries easier to express
+      # for analytics across an entire table
+      #
+      define_method(:with_groups) do |*groups|
+        GroupProxy.new(group(*groups))
+      end
-      define_method(:column_name_map){|table_name|
-        df_column_names[table_name][self] if defined? df_column_names[table_name]
-      }
+      #
+      # If you use the include_#{table_name} function before executing any queries, you can
+      # join the child AR rows with any number of columns and treat them as if they were all part of the same table.
+      # These joined columns can be used to further refine your queries, perform groupings, counts .etc
+      #
+      # E.g.
+      #
+      # class Iris
+      #   include HasDimension
+      #   dimension_column_names %i(sepal_length sepal_width petal_length petal_width)
+      # end
+      #
+      # Iris.where('sepal_length > ?', 4) # Error! (There is no column called sepal_length on the iris table)
+      # Iris.include_dimensions(:sepal_length).where('sepal_length > ?', 4) # Works fine
+      # Iris.include_dimension(:sepal_length, :petal_width).where('sepal_length > 3').select(:petal_width)
+      # Iris.include_dimension(:sepal_length, :petal_width).with_groups('ROUND(sepal_length)').average('petal_width')
+      # {
+      #   "4.0":"0.2"
+      #   "5.0":"0.397872340425532",
+      #   "6.0":"1.49705882352941",
+      #   "7.0":"1.89583333333333",
+      #   "8.0":"2.15",
+      # }
+      #
+      # In cases where column names are not predefined or use a mapper you can provide a hash to give alternate column names for the query
+      #
+      # class BuildingType < ApplicationRecord
+      #   include HasBuildingConsent
+      #   consents_column_map Hash.new{|hash, time, as_date = time.to_date|
+      #     (as_date.year - 1970) * 12 + as_date.month
+      #   }
+      # end
+      #
+      # # In this example BuildingType.consents accepts dynamic column indices (anything that responds to to_date)
+      # # We can give these columns explicit names so we can refer to them in queries.
+      # E.g
+      #
+      # BuildingType.include_consents({'1994-04-01' => april_94, '1994-05-01' => may_94}).where('april_94 + may_94 < 300')
+      # => [
+      #    <BuildingType id: 2, name: "Hostels_boarding", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 2, april_94: 11, may_94: 5>,
+      #    <BuildingType id: 3, name: "Hotels", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 3, april_94: 33, may_94: 34>,
+      #    <BuildingType id: 4, name: "Hospitals", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 4, april_94: 32, may_94: 37>,
+      #    <BuildingType id: 5, name: "Education", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 5, april_94: 88, may_94: 145>,
+      #    <BuildingType id: 6, name: "Social_cultural_religious", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 6, april_94: 82, may_94: 102>,
+      #    <BuildingType id: 9, name: "Storage", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 9, april_94: 29, may_94: 52>,
+      #    <BuildingType id: 12, name: "Misc", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 12, april_94: 33, may_94: 39>]
+      # ]
+      #
+      #
+      define_method("include_#{table_name}"){|*dimensions, unmap: true, scope: self.all, as: false|
+        dim1 = dimensions[0]
+        case dim1
+        when Hash
+          dimension_map, dimensions = dim1, dim1.keys
+        when Range
+          exclude_end = dim1.exclude_end?
+          from, to = if unmap && column_map(singular_table_name)
+            unmap = false
+            [column_map(singular_table_name)[dim1.begin],column_map(singular_table_name)[dim1.end]]
+          else
+            [dim1.begin, dim1.end]
+          end
+          dimensions = (exclude_end ? (from...to) : (from..to)).to_a
+        end
+        blocks_for_tables = scope.instance_eval{ @blocks_for_tables ||= {} }
+        included_blocks   = blocks_for_tables[block_type.table_name]  ||= {}
-      define_method(:reverse_column_map){|table_name|
-        df_reverse_column_maps[table_name] ||= {}
-        df_reverse_column_maps[table_name][self] ||= column_map(table_name).invert if column_map(table_name)
+        dimensions.flatten.each.with_index(1) do |key, i|
+          if unmap && column_map(singular_table_name)
+            idx = column_map(singular_table_name)[key]
+            key = dimension_map[key] if dimension_map
+          else
+            idx = key
+            key = "t#{key}"
+          end
+          key = "#{as}#{i}" if as
+          block_index  = idx / block_type::BLOCK_SIZE
+          block_offset = (idx % block_type::BLOCK_SIZE).succ
+          included_blocks[block_index] ||= []
+          included_blocks[block_index] << {name: key, idx: block_offset}
+        end
+        query = "(SELECT * FROM #{self.table_name} " + blocks_for_tables.reduce('') do |aggregate, (for_table, blocks_for_table)|
+          aggregate +
+            blocks_for_table.reduce('') do |blocks_aggregate, (block_idx, blocks)|
+              blocks_table_name = for_table
+              blocks_aggregate + " LEFT JOIN(SELECT #{blocks_table_name}.data_frame_type, #{blocks_table_name}.data_frame_id, " + blocks.map{|block| "#{blocks_table_name}.t#{block[:idx]} as \"#{block[:name]}\""}.join(', ') + " FROM #{blocks_table_name} "+
+              " WHERE #{blocks_table_name}.period_index = #{block_idx}"+") b#{for_table}#{block_idx} ON b#{for_table}#{block_idx}.data_frame_type = '#{self.name}' AND b#{for_table}#{block_idx}.data_frame_id = #{self.table_name}.id"
+            end
+        end + ") as #{self.table_name}"
+        scope.from(query)
       }
     end
-    return to_inject
   end
 end