RubyGems - active_data_frame - Versions diffs - 0.1.2 → 0.1.3 - Mend

active_data_frame 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/README.md +6 -2
data/active_data_frame.gemspec +4 -1
data/active_data_frame.todo +2 -1
data/lib/active_data_frame.rb +16 -1
data/lib/active_data_frame/data_frame_proxy.rb +5 -5
data/lib/active_data_frame/database.rb +67 -47
data/lib/active_data_frame/has_data_frame.rb +13 -12
data/lib/active_data_frame/row.rb +7 -5
data/lib/active_data_frame/table.rb +10 -6
data/lib/active_data_frame/version.rb +1 -1
metadata +35 -7

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 5c368f1ed1f3fc78c0e9f81b0d2bd7cc9f50141a
-  data.tar.gz: 80fa0cfdeed12b5b41d7556ec9c019670827e934
+  metadata.gz: 9e1350ed7595307e7875b6430c24bab9a2fd90a2
+  data.tar.gz: 1d2f0c6eae0ddfb9ed4fa52d07526e7f9e47ec20
 SHA512:
-  metadata.gz: b2cc97b56fe384be682c9631a06c108b2524434230df5f4ac4949300339fadea0dcbca0f1efb9822bd04c3a43a7ae2374a3dbad02706793cfc5f8fa42600920b
-  data.tar.gz: 7deccde31e9d8a99b31831d2af96227cdf9d087297321b531c79b4327b9bb63f38e0fa026869a94ffb4d5ea3cb5e9e61c5805328cf7bb8248b26e54f95f7fc40
+  metadata.gz: db812db474e0980059520b193b9c4fb67d36dbafaf865c28019ea247ab75b269ca26ccf4c6f146e1aec34897003c9f9a70be88550e002f2c514b4a3437ebce84
+  data.tar.gz: 2a2585b6f966cf5691f7d4d5155f8ea977f1f2f0213476ebdc88d3648d6511c71c743bc036ba2c4ba9b55e6610a817469e978b5c398dd6d642739fedfa2c8912

data/README.md CHANGED

@@ -1,8 +1,12 @@
 # ActiveDataFrame
-Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/active_data_frame`. To experiment with that code, run `bin/console` for an interactive prompt.
+ActiveDataFrame allows efficient writing, reading, and analytical queries on large tables of numerical data. You can think of it as a persistent NumPy or NArray with good support for slicing
+and aggregates without needing to load the entire dataset into memory.
-TODO: Delete this and the text above, and describe your gem
+The library depends on ActiveRecord and currently supports the following relational databases:
+* PostgreSQL
+* MySQL
+* SQLite
 ## Installation

data/active_data_frame.gemspec CHANGED

@@ -11,6 +11,7 @@ Gem::Specification.new do |spec|
   spec.summary       = 'An active data frame helper'
   spec.description   = 'An active data frame helper'
+  spec.homepage      = "https://github.com/wouterken/active_data_frame"
   spec.files         = `git ls-files -z`.split("\x0").reject do |f|
     f.match(%r{^(test|spec|features)/})
@@ -24,9 +25,11 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "pry-byebug", "~> 3.4.0", '>= 3.4.0'
   spec.add_development_dependency 'pry', '~> 0.10.2', '>= 0.10.0'
   spec.add_development_dependency 'pg'
+  spec.add_development_dependency 'sqlite3'
+  spec.add_development_dependency 'mysql2'
   spec.add_development_dependency 'minitest', '~>5.11'
   spec.add_development_dependency 'minitest-reporters', '~> 1.1', '>= 1.1.0'
   spec.add_development_dependency 'minitest-around', '0.4.1'
   spec.add_runtime_dependency     'activerecord', '~> 5.0'
-  spec.add_runtime_dependency     'rmatrix', '~> 0.1.10', '>=0.1.10'
+  spec.add_runtime_dependency     'rmatrix', '~> 0.1.15', '>=0.1.15'
 end

data/active_data_frame.todo CHANGED

@@ -2,8 +2,9 @@ Priorities:
   ☐ Ensure delete/clear works
   ☐ rmatrix tests
   ☐ Update README.md
-  ☐ Use MMAP of status/enums
   ☐ Support SQLite + MySQL, MonetDB
+  ✔ Use MMAP of status/enums @done (18-04-03 19:21)
  ✔ Generator creates A migration and data_frame and block classes. Block/DataFrame classes have a type, a period unit and a period length @done (17-01-12 10:29)
  ✔ Type is: @done (17-01-12 10:29)
    ✔ Bit @done (17-01-12 10:29)

data/lib/active_data_frame.rb CHANGED

@@ -4,4 +4,19 @@ require 'active_data_frame/table'
 require 'active_data_frame/row'
 require 'active_data_frame/has_data_frame'
 require 'active_data_frame/database'
-require 'rmatrix'
+require 'rmatrix'
+module ActiveDataFrame
+  CONFIG = OpenStruct.new({
+    suppress_logs: true
+  })
+  module_function
+    def config
+      yield CONFIG
+    end
+    def suppress_logs
+      CONFIG.suppress_logs
+    end
+end

data/lib/active_data_frame/data_frame_proxy.rb CHANGED

@@ -22,8 +22,7 @@ module ActiveDataFrame
     def [](*ranges)
       result = get(extract_ranges(ranges))
       if @value_map
-        # TODO Multi-dimensions #map would be nice
-        result.to_a.map{|row| row.kind_of?(Array) ? row.map(&reverse_value_map.method(:[])) : reverse_value_map[row]}
+        result.map{|row| reverse_value_map[row]}
       else
         result
       end
@@ -36,7 +35,9 @@ module ActiveDataFrame
     end
     def clear(*ranges)
-      clear(ex)
+      extract_ranges(ranges).each do |r|
+        set(r.first, M.blank(columns: r.last - r.first, typecode: block_type::TYPECODE))
+      end
     end
     def column_map
@@ -104,8 +105,7 @@ module ActiveDataFrame
     end
     def self.suppress_logs
-      #TODO Make optional
-      return yield
+      return yield unless ActiveDataFrame.suppress_logs
       ActiveRecord::Base.logger, old_logger = nil,  ActiveRecord::Base.logger
       yield.tap do
         ActiveRecord::Base.logger = old_logger

data/lib/active_data_frame/database.rb CHANGED

@@ -9,13 +9,23 @@ module ActiveDataFrame
       Thread.current[:active_data_frame_batching] = !!value
     end
-    # Not thread safe!
     def self.execute(sql)
       if ActiveDataFrame::Database.batching
         Thread.current[:batch] << sql << ?;
       else
-        ActiveRecord::Base.transaction do
-          ActiveRecord::Base.connection.execute sql
+        unless sql.empty?
+          ActiveRecord::Base.transaction do
+            case ActiveRecord::Base.connection_config[:adapter]
+            when 'sqlite3'.freeze
+              ActiveRecord::Base.connection.raw_connection.execute_batch sql
+            when 'mysql2'
+              sql.split(';').reject{|x| x.strip.empty?}.each do |stmt|
+                ActiveRecord::Base.connection.execute(stmt)
+              end
+            else
+              ActiveRecord::Base.connection.execute(sql)
+            end
+          end
         end
       end
     end
@@ -50,26 +60,34 @@ module ActiveDataFrame
     # Update block data for all blocks in a single call
     ##
     def bulk_update(existing)
-      case ActiveRecord::Base.connection_config[:adapter]
-      when 'postgresql'.freeze
-        # Fast bulk update
-        updates = ''
-        existing.each do |period_index, (values, df_id)|
-          updates <<  "(#{df_id}, #{period_index}, #{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}),"
+      ActiveDataFrame::DataFrameProxy.suppress_logs do
+        case ActiveRecord::Base.connection_config[:adapter]
+        when 'postgresql'.freeze
+          # Fast bulk update
+          updates = ''
+          existing.each do |period_index, (values, df_id)|
+            updates <<  "(#{df_id}, #{period_index}, #{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}),"
+          end
+          perform_update(updates)
+        else
+          ids = existing.map {|_, (_, id)| id}
+          updates = block_type::COLUMNS.map.with_index do |column, column_idx|
+            [column, "CASE period_index\n#{existing.map{|period_index, (values, _)| "WHEN #{period_index} then #{values[column_idx]}"}.join("\n")} \nEND\n"]
+          end.to_h
+          update_statement = updates.map{|cl, up| "#{cl} = #{up}" }.join(', ')
+          Database.execute("UPDATE #{block_type.table_name} SET #{update_statement} WHERE
+            #{block_type.table_name}.data_frame_id IN (#{ids.join(',')})
+            AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
+            AND #{block_type.table_name}.period_index IN (#{existing.keys.join(', ')});
+            "
+          )
         end
-        perform_update(updates)
-      else
-        ids = existing.map {|_, (_, id)| id}
-        updates = block_type::COLUMNS.map.with_index do |column, column_idx|
-          [column, "CASE period_index\n#{existing.map{|period_index, (values, _)| "WHEN #{period_index} then #{values[column_idx]}"}.join("\n")} \nEND\n"]
-        end.to_h
-        update_statement = updates.map{|cl, up| "#{cl} = #{up}" }.join(', ')
-        Database.execute("UPDATE #{block_type.table_name} SET #{update_statement} WHERE
-          #{block_type.table_name}.data_frame_id IN (#{ids.join(',')})
-          AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
-          AND #{block_type.table_name}.period_index IN (#{existing.keys.join(', ')});
-          "
-        )
+      end
+    end
+    def bulk_delete(id, indices)
+      ActiveDataFrame::DataFrameProxy.suppress_logs do
+        block_type.where(data_frame_id: id, period_index: indices).delete_all
       end
     end
@@ -77,39 +95,41 @@ module ActiveDataFrame
     # Insert block data for all blocks in a single call
     ##
     def bulk_insert(new_blocks, instance)
-      inserts = ''
-      new_blocks.each do |period_index, (values)|
-        inserts << \
-        case ActiveRecord::Base.connection_config[:adapter]
-        when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
-        else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
+      ActiveDataFrame::DataFrameProxy.suppress_logs do
+        inserts = ''
+        new_blocks.each do |period_index, (values)|
+          inserts << \
+          case ActiveRecord::Base.connection_config[:adapter]
+          when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
+          else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
+          end
         end
+        perform_insert(inserts)
       end
-      perform_insert(inserts)
-    end
-    def bulk_delete(blocks)
-      binding.pry
     end
     def perform_update(updates)
-      Database.execute(
-        <<-SQL
-        UPDATE #{block_type.table_name}
-          SET #{block_type::COLUMNS.map{|col| "#{col} = t.#{col}" }.join(", ")}
-          FROM(
-          VALUES #{updates[0..-2]}) as t(data_frame_id, period_index, #{block_type::COLUMNS.join(',')})
-          WHERE #{block_type.table_name}.data_frame_id = t.data_frame_id
-          AND #{block_type.table_name}.period_index = t.period_index
-          AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
-        SQL
-      )
-      true
+      ActiveDataFrame::DataFrameProxy.suppress_logs do
+        Database.execute(
+          <<-SQL
+          UPDATE #{block_type.table_name}
+            SET #{block_type::COLUMNS.map{|col| "#{col} = t.#{col}" }.join(", ")}
+            FROM(
+            VALUES #{updates[0..-2]}) as t(data_frame_id, period_index, #{block_type::COLUMNS.join(',')})
+            WHERE #{block_type.table_name}.data_frame_id = t.data_frame_id
+            AND #{block_type.table_name}.period_index = t.period_index
+            AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
+          SQL
+        )
+        true
+      end
     end
     def perform_insert(inserts)
-      sql = "INSERT INTO #{block_type.table_name} (#{block_type::COLUMNS.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
-      Database.execute sql
+      ActiveDataFrame::DataFrameProxy.suppress_logs do
+        sql = "INSERT INTO #{block_type.table_name} (#{block_type::COLUMNS.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
+        Database.execute sql
+      end
     end
   end
 end

data/lib/active_data_frame/has_data_frame.rb CHANGED

@@ -62,17 +62,18 @@ module ActiveDataFrame
             # We provide our own inspect implementation which will include in the output
             # selected dataframe attributes that do not reside on the parent table
-            define_method :inspect do
-              inspection = "not initialized"
-              if defined?(@attributes) && @attributes
-                 inspection = @attributes.keys.collect { |name|
-                   if has_attribute?(name)
-                     "#{name}: #{attribute_for_inspect(name)}"
-                   end
-                 }.compact.join(", ")
-              end
-              "<#{self.class} #{inspection}>"
+          end
+          def inspect
+            inspection = "not initialized"
+            if defined?(@attributes) && @attributes
+               inspection = @attributes.keys.collect { |name|
+                 if has_attribute?(name)
+                   "#{name}: #{attribute_for_inspect(name)}"
+                 end
+               }.compact.join(", ")
             end
+            "<#{self.class} #{inspection}>"
           end
         end
       end
@@ -332,8 +333,8 @@ module ActiveDataFrame
           aggregate +
             blocks_for_table.reduce('') do |blocks_aggregate, (block_idx, blocks)|
               blocks_table_name = for_table
-              blocks_aggregate + " LEFT JOIN(SELECT #{blocks_table_name}.data_frame_type, #{blocks_table_name}.data_frame_id, " + blocks.map{|block| "#{blocks_table_name}.t#{block[:idx]} as \"#{block[:name]}\""}.join(', ') + " FROM #{blocks_table_name} "+
-              " WHERE #{blocks_table_name}.period_index = #{block_idx}"+") b#{for_table}#{block_idx} ON b#{for_table}#{block_idx}.data_frame_type = '#{self.name}' AND b#{for_table}#{block_idx}.data_frame_id = #{self.table_name}.id"
+              blocks_aggregate + " LEFT JOIN(SELECT #{blocks_table_name}.data_frame_type as b#{for_table}#{block_idx}_data_frame_type, #{blocks_table_name}.data_frame_id b#{for_table}#{block_idx}_data_frame_id, " + blocks.map{|block| "#{blocks_table_name}.t#{block[:idx]} as \"#{block[:name]}\""}.join(', ') + " FROM #{blocks_table_name} "+
+              " WHERE #{blocks_table_name}.period_index = #{block_idx}"+") b#{for_table}#{block_idx} ON b#{for_table}#{block_idx}.b#{for_table}#{block_idx}_data_frame_type = '#{self.name}' AND b#{for_table}#{block_idx}.b#{for_table}#{block_idx}_data_frame_id = #{self.table_name}.id"
             end
         end + ") as #{self.table_name}"
         scope.from(query)

data/lib/active_data_frame/row.rb CHANGED

@@ -28,15 +28,17 @@ module ActiveDataFrame
       iterate_bounds([bounds]) do |index, left, right, cursor, size|
         chunk = values[cursor...cursor + size]
-        if size == block_type::BLOCK_SIZE && chunk.all?(&:zero?)
-          deleted_indices << index
-        else
-          block = existing[index] || new_blocks[index]
+        if existing[index]
+          block = existing[index]
           block.first[left..right] = chunk.to_a
+          deleted_indices << index if block.first.all?(&:zero?)
+        elsif chunk.any?(&:nonzero?)
+          new_blocks[index].first[left..right] = chunk.to_a
         end
       end
-      database.bulk_delete(self.id, deleted_indices) unless deleted_indices.size.zero?
+      database.bulk_delete(self.instance.id, deleted_indices) unless deleted_indices.size.zero?
       database.bulk_update(existing)                 unless existing.size.zero?
       database.bulk_insert(new_blocks, instance)     unless new_blocks.size.zero?
       values

data/lib/active_data_frame/table.rb CHANGED

@@ -55,7 +55,7 @@ module ActiveDataFrame
         if aggregation_function
           case col_cases.length
-          when 0 then "NULL::float as #{col}"
+          when 0 then "0.0 as #{col}"
           else
             case_str = col_cases.map do |match|
               case
@@ -78,7 +78,7 @@ module ActiveDataFrame
             "CASE WHEN #{case_str} THEN #{col} ELSE NULL END"
           end
         end
-      end
+      end.map(&Arel.method(:sql))
     end
     def get(ranges)
@@ -98,7 +98,6 @@ module ActiveDataFrame
           all_bounds,
           block_scope: data_frame_type.unscoped
                                     .joins("LEFT JOIN #{block_type.table_name} ON #{data_frame_type.table_name}.id = #{block_type.table_name}.data_frame_id")
-                                    .joins("RIGHT JOIN (#{data_frame_type.select(:id).to_sql}) as ref ON ref.id = #{block_type.table_name}.data_frame_id")
         ).where(
           block_type.table_name => {data_frame_type: data_frame_type.name }
@@ -108,8 +107,13 @@ module ActiveDataFrame
         ActiveRecord::Base.connection.execute(as_sql)
       end
-      res.each_row do |pi, data_frame_id, *values|
-        existing_blocks[pi][data_frame_id] = values
+      case ActiveRecord::Base.connection_config[:adapter]
+      when 'postgresql'.freeze
+        res.each_row {|pi, data_frame_id, *values| existing_blocks[pi][data_frame_id] = values }
+      when 'mysql2'.freeze
+        res.each {|pi, data_frame_id, *values| existing_blocks[pi][data_frame_id] = values }
+      when 'sqlite3'.freeze
+        res.map(&:values).each {|pi, data_frame_id, *values| existing_blocks[pi][data_frame_id] = values }
       end
       result = M.blank(typecode: block_type::TYPECODE, columns: all_bounds.map(&:length).sum, rows: index_map.size)
@@ -210,7 +214,7 @@ module ActiveDataFrame
           .pluck(
             :period_index,
             *block_type::COLUMNS.map do |cl|
-              "#{agg}(#{cl}) as #{cl}"
+              Arel.sql("#{agg}(#{cl}) as #{cl}")
             end
           )
           .map{|pi, *values| [pi, values]}.to_h

data/lib/active_data_frame/version.rb CHANGED

@@ -1,3 +1,3 @@
 module ActiveDataFrame
-  VERSION = "0.1.2"
+  VERSION = "0.1.3"
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: active_data_frame
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.1.3
 platform: ruby
 authors:
 - Wouter Coppieters
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2018-04-03 00:00:00.000000000 Z
+date: 2018-04-24 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -92,6 +92,34 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: sqlite3
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: mysql2
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: minitest
   requirement: !ruby/object:Gem::Requirement
@@ -160,20 +188,20 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.1.10
+        version: 0.1.15
     - - ">="
       - !ruby/object:Gem::Version
-        version: 0.1.10
+        version: 0.1.15
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.1.10
+        version: 0.1.15
     - - ">="
       - !ruby/object:Gem::Version
-        version: 0.1.10
+        version: 0.1.15
 description: An active data frame helper
 email:
 - wc@pico.net.nz
@@ -205,7 +233,7 @@ files:
 - lib/generators/active_data_frame/install_generator.rb
 - lib/generators/active_data_frame/templates/has_concern.rb
 - lib/generators/active_data_frame/templates/migration.rb
-homepage:
+homepage: https://github.com/wouterken/active_data_frame
 licenses: []
 metadata: {}
 post_install_message: