active_data_frame 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5c368f1ed1f3fc78c0e9f81b0d2bd7cc9f50141a
4
- data.tar.gz: 80fa0cfdeed12b5b41d7556ec9c019670827e934
3
+ metadata.gz: 9e1350ed7595307e7875b6430c24bab9a2fd90a2
4
+ data.tar.gz: 1d2f0c6eae0ddfb9ed4fa52d07526e7f9e47ec20
5
5
  SHA512:
6
- metadata.gz: b2cc97b56fe384be682c9631a06c108b2524434230df5f4ac4949300339fadea0dcbca0f1efb9822bd04c3a43a7ae2374a3dbad02706793cfc5f8fa42600920b
7
- data.tar.gz: 7deccde31e9d8a99b31831d2af96227cdf9d087297321b531c79b4327b9bb63f38e0fa026869a94ffb4d5ea3cb5e9e61c5805328cf7bb8248b26e54f95f7fc40
6
+ metadata.gz: db812db474e0980059520b193b9c4fb67d36dbafaf865c28019ea247ab75b269ca26ccf4c6f146e1aec34897003c9f9a70be88550e002f2c514b4a3437ebce84
7
+ data.tar.gz: 2a2585b6f966cf5691f7d4d5155f8ea977f1f2f0213476ebdc88d3648d6511c71c743bc036ba2c4ba9b55e6610a817469e978b5c398dd6d642739fedfa2c8912
data/README.md CHANGED
@@ -1,8 +1,12 @@
1
1
  # ActiveDataFrame
2
2
 
3
- Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/active_data_frame`. To experiment with that code, run `bin/console` for an interactive prompt.
3
+ ActiveDataFrame allows efficient writing, reading, and analytical queries on large tables of numerical data. You can think of it as a persistent NumPy or NArray with good support for slicing
4
+ and aggregates without needing to load the entire dataset into memory.
4
5
 
5
- TODO: Delete this and the text above, and describe your gem
6
+ The library depends on ActiveRecord and currently supports the following relational databases:
7
+ * PostgreSQL
8
+ * MySQL
9
+ * SQLite
6
10
 
7
11
  ## Installation
8
12
 
@@ -11,6 +11,7 @@ Gem::Specification.new do |spec|
11
11
 
12
12
  spec.summary = 'An active data frame helper'
13
13
  spec.description = 'An active data frame helper'
14
+ spec.homepage = "https://github.com/wouterken/active_data_frame"
14
15
 
15
16
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
16
17
  f.match(%r{^(test|spec|features)/})
@@ -24,9 +25,11 @@ Gem::Specification.new do |spec|
24
25
  spec.add_development_dependency "pry-byebug", "~> 3.4.0", '>= 3.4.0'
25
26
  spec.add_development_dependency 'pry', '~> 0.10.2', '>= 0.10.0'
26
27
  spec.add_development_dependency 'pg'
28
+ spec.add_development_dependency 'sqlite3'
29
+ spec.add_development_dependency 'mysql2'
27
30
  spec.add_development_dependency 'minitest', '~>5.11'
28
31
  spec.add_development_dependency 'minitest-reporters', '~> 1.1', '>= 1.1.0'
29
32
  spec.add_development_dependency 'minitest-around', '0.4.1'
30
33
  spec.add_runtime_dependency 'activerecord', '~> 5.0'
31
- spec.add_runtime_dependency 'rmatrix', '~> 0.1.10', '>=0.1.10'
34
+ spec.add_runtime_dependency 'rmatrix', '~> 0.1.15', '>=0.1.15'
32
35
  end
@@ -2,8 +2,9 @@ Priorities:
2
2
  ☐ Ensure delete/clear works
3
3
  ☐ rmatrix tests
4
4
  ☐ Update README.md
5
- ☐ Use MMAP of status/enums
6
5
  ☐ Support SQLite + MySQL, MonetDB
6
+
7
+ ✔ Use MMAP of status/enums @done (18-04-03 19:21)
7
8
  ✔ Generator creates A migration and data_frame and block classes. Block/DataFrame classes have a type, a period unit and a period length @done (17-01-12 10:29)
8
9
  ✔ Type is: @done (17-01-12 10:29)
9
10
  ✔ Bit @done (17-01-12 10:29)
@@ -4,4 +4,19 @@ require 'active_data_frame/table'
4
4
  require 'active_data_frame/row'
5
5
  require 'active_data_frame/has_data_frame'
6
6
  require 'active_data_frame/database'
7
- require 'rmatrix'
7
+ require 'rmatrix'
8
+
9
+ module ActiveDataFrame
10
+ CONFIG = OpenStruct.new({
11
+ suppress_logs: true
12
+ })
13
+
14
+ module_function
15
+ def config
16
+ yield CONFIG
17
+ end
18
+
19
+ def suppress_logs
20
+ CONFIG.suppress_logs
21
+ end
22
+ end
@@ -22,8 +22,7 @@ module ActiveDataFrame
22
22
  def [](*ranges)
23
23
  result = get(extract_ranges(ranges))
24
24
  if @value_map
25
- # TODO Multi-dimensions #map would be nice
26
- result.to_a.map{|row| row.kind_of?(Array) ? row.map(&reverse_value_map.method(:[])) : reverse_value_map[row]}
25
+ result.map{|row| reverse_value_map[row]}
27
26
  else
28
27
  result
29
28
  end
@@ -36,7 +35,9 @@ module ActiveDataFrame
36
35
  end
37
36
 
38
37
  def clear(*ranges)
39
- clear(ex)
38
+ extract_ranges(ranges).each do |r|
39
+ set(r.first, M.blank(columns: r.last - r.first, typecode: block_type::TYPECODE))
40
+ end
40
41
  end
41
42
 
42
43
  def column_map
@@ -104,8 +105,7 @@ module ActiveDataFrame
104
105
  end
105
106
 
106
107
  def self.suppress_logs
107
- #TODO Make optional
108
- return yield
108
+ return yield unless ActiveDataFrame.suppress_logs
109
109
  ActiveRecord::Base.logger, old_logger = nil, ActiveRecord::Base.logger
110
110
  yield.tap do
111
111
  ActiveRecord::Base.logger = old_logger
@@ -9,13 +9,23 @@ module ActiveDataFrame
9
9
  Thread.current[:active_data_frame_batching] = !!value
10
10
  end
11
11
 
12
- # Not thread safe!
13
12
  def self.execute(sql)
14
13
  if ActiveDataFrame::Database.batching
15
14
  Thread.current[:batch] << sql << ?;
16
15
  else
17
- ActiveRecord::Base.transaction do
18
- ActiveRecord::Base.connection.execute sql
16
+ unless sql.empty?
17
+ ActiveRecord::Base.transaction do
18
+ case ActiveRecord::Base.connection_config[:adapter]
19
+ when 'sqlite3'.freeze
20
+ ActiveRecord::Base.connection.raw_connection.execute_batch sql
21
+ when 'mysql2'
22
+ sql.split(';').reject{|x| x.strip.empty?}.each do |stmt|
23
+ ActiveRecord::Base.connection.execute(stmt)
24
+ end
25
+ else
26
+ ActiveRecord::Base.connection.execute(sql)
27
+ end
28
+ end
19
29
  end
20
30
  end
21
31
  end
@@ -50,26 +60,34 @@ module ActiveDataFrame
50
60
  # Update block data for all blocks in a single call
51
61
  ##
52
62
  def bulk_update(existing)
53
- case ActiveRecord::Base.connection_config[:adapter]
54
- when 'postgresql'.freeze
55
- # Fast bulk update
56
- updates = ''
57
- existing.each do |period_index, (values, df_id)|
58
- updates << "(#{df_id}, #{period_index}, #{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}),"
63
+ ActiveDataFrame::DataFrameProxy.suppress_logs do
64
+ case ActiveRecord::Base.connection_config[:adapter]
65
+ when 'postgresql'.freeze
66
+ # Fast bulk update
67
+ updates = ''
68
+ existing.each do |period_index, (values, df_id)|
69
+ updates << "(#{df_id}, #{period_index}, #{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}),"
70
+ end
71
+ perform_update(updates)
72
+ else
73
+ ids = existing.map {|_, (_, id)| id}
74
+ updates = block_type::COLUMNS.map.with_index do |column, column_idx|
75
+ [column, "CASE period_index\n#{existing.map{|period_index, (values, _)| "WHEN #{period_index} then #{values[column_idx]}"}.join("\n")} \nEND\n"]
76
+ end.to_h
77
+ update_statement = updates.map{|cl, up| "#{cl} = #{up}" }.join(', ')
78
+ Database.execute("UPDATE #{block_type.table_name} SET #{update_statement} WHERE
79
+ #{block_type.table_name}.data_frame_id IN (#{ids.join(',')})
80
+ AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
81
+ AND #{block_type.table_name}.period_index IN (#{existing.keys.join(', ')});
82
+ "
83
+ )
59
84
  end
60
- perform_update(updates)
61
- else
62
- ids = existing.map {|_, (_, id)| id}
63
- updates = block_type::COLUMNS.map.with_index do |column, column_idx|
64
- [column, "CASE period_index\n#{existing.map{|period_index, (values, _)| "WHEN #{period_index} then #{values[column_idx]}"}.join("\n")} \nEND\n"]
65
- end.to_h
66
- update_statement = updates.map{|cl, up| "#{cl} = #{up}" }.join(', ')
67
- Database.execute("UPDATE #{block_type.table_name} SET #{update_statement} WHERE
68
- #{block_type.table_name}.data_frame_id IN (#{ids.join(',')})
69
- AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
70
- AND #{block_type.table_name}.period_index IN (#{existing.keys.join(', ')});
71
- "
72
- )
85
+ end
86
+ end
87
+
88
+ def bulk_delete(id, indices)
89
+ ActiveDataFrame::DataFrameProxy.suppress_logs do
90
+ block_type.where(data_frame_id: id, period_index: indices).delete_all
73
91
  end
74
92
  end
75
93
 
@@ -77,39 +95,41 @@ module ActiveDataFrame
77
95
  # Insert block data for all blocks in a single call
78
96
  ##
79
97
  def bulk_insert(new_blocks, instance)
80
- inserts = ''
81
- new_blocks.each do |period_index, (values)|
82
- inserts << \
83
- case ActiveRecord::Base.connection_config[:adapter]
84
- when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
85
- else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
98
+ ActiveDataFrame::DataFrameProxy.suppress_logs do
99
+ inserts = ''
100
+ new_blocks.each do |period_index, (values)|
101
+ inserts << \
102
+ case ActiveRecord::Base.connection_config[:adapter]
103
+ when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
104
+ else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
105
+ end
86
106
  end
107
+ perform_insert(inserts)
87
108
  end
88
- perform_insert(inserts)
89
- end
90
-
91
- def bulk_delete(blocks)
92
- binding.pry
93
109
  end
94
110
 
95
111
  def perform_update(updates)
96
- Database.execute(
97
- <<-SQL
98
- UPDATE #{block_type.table_name}
99
- SET #{block_type::COLUMNS.map{|col| "#{col} = t.#{col}" }.join(", ")}
100
- FROM(
101
- VALUES #{updates[0..-2]}) as t(data_frame_id, period_index, #{block_type::COLUMNS.join(',')})
102
- WHERE #{block_type.table_name}.data_frame_id = t.data_frame_id
103
- AND #{block_type.table_name}.period_index = t.period_index
104
- AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
105
- SQL
106
- )
107
- true
112
+ ActiveDataFrame::DataFrameProxy.suppress_logs do
113
+ Database.execute(
114
+ <<-SQL
115
+ UPDATE #{block_type.table_name}
116
+ SET #{block_type::COLUMNS.map{|col| "#{col} = t.#{col}" }.join(", ")}
117
+ FROM(
118
+ VALUES #{updates[0..-2]}) as t(data_frame_id, period_index, #{block_type::COLUMNS.join(',')})
119
+ WHERE #{block_type.table_name}.data_frame_id = t.data_frame_id
120
+ AND #{block_type.table_name}.period_index = t.period_index
121
+ AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
122
+ SQL
123
+ )
124
+ true
125
+ end
108
126
  end
109
127
 
110
128
  def perform_insert(inserts)
111
- sql = "INSERT INTO #{block_type.table_name} (#{block_type::COLUMNS.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
112
- Database.execute sql
129
+ ActiveDataFrame::DataFrameProxy.suppress_logs do
130
+ sql = "INSERT INTO #{block_type.table_name} (#{block_type::COLUMNS.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
131
+ Database.execute sql
132
+ end
113
133
  end
114
134
  end
115
135
  end
@@ -62,17 +62,18 @@ module ActiveDataFrame
62
62
 
63
63
  # We provide our own inspect implementation which will include in the output
64
64
  # selected dataframe attributes that do not reside on the parent table
65
- define_method :inspect do
66
- inspection = "not initialized"
67
- if defined?(@attributes) && @attributes
68
- inspection = @attributes.keys.collect { |name|
69
- if has_attribute?(name)
70
- "#{name}: #{attribute_for_inspect(name)}"
71
- end
72
- }.compact.join(", ")
73
- end
74
- "<#{self.class} #{inspection}>"
65
+ end
66
+
67
+ def inspect
68
+ inspection = "not initialized"
69
+ if defined?(@attributes) && @attributes
70
+ inspection = @attributes.keys.collect { |name|
71
+ if has_attribute?(name)
72
+ "#{name}: #{attribute_for_inspect(name)}"
73
+ end
74
+ }.compact.join(", ")
75
75
  end
76
+ "<#{self.class} #{inspection}>"
76
77
  end
77
78
  end
78
79
  end
@@ -332,8 +333,8 @@ module ActiveDataFrame
332
333
  aggregate +
333
334
  blocks_for_table.reduce('') do |blocks_aggregate, (block_idx, blocks)|
334
335
  blocks_table_name = for_table
335
- blocks_aggregate + " LEFT JOIN(SELECT #{blocks_table_name}.data_frame_type, #{blocks_table_name}.data_frame_id, " + blocks.map{|block| "#{blocks_table_name}.t#{block[:idx]} as \"#{block[:name]}\""}.join(', ') + " FROM #{blocks_table_name} "+
336
- " WHERE #{blocks_table_name}.period_index = #{block_idx}"+") b#{for_table}#{block_idx} ON b#{for_table}#{block_idx}.data_frame_type = '#{self.name}' AND b#{for_table}#{block_idx}.data_frame_id = #{self.table_name}.id"
336
+ blocks_aggregate + " LEFT JOIN(SELECT #{blocks_table_name}.data_frame_type as b#{for_table}#{block_idx}_data_frame_type, #{blocks_table_name}.data_frame_id b#{for_table}#{block_idx}_data_frame_id, " + blocks.map{|block| "#{blocks_table_name}.t#{block[:idx]} as \"#{block[:name]}\""}.join(', ') + " FROM #{blocks_table_name} "+
337
+ " WHERE #{blocks_table_name}.period_index = #{block_idx}"+") b#{for_table}#{block_idx} ON b#{for_table}#{block_idx}.b#{for_table}#{block_idx}_data_frame_type = '#{self.name}' AND b#{for_table}#{block_idx}.b#{for_table}#{block_idx}_data_frame_id = #{self.table_name}.id"
337
338
  end
338
339
  end + ") as #{self.table_name}"
339
340
  scope.from(query)
@@ -28,15 +28,17 @@ module ActiveDataFrame
28
28
 
29
29
  iterate_bounds([bounds]) do |index, left, right, cursor, size|
30
30
  chunk = values[cursor...cursor + size]
31
- if size == block_type::BLOCK_SIZE && chunk.all?(&:zero?)
32
- deleted_indices << index
33
- else
34
- block = existing[index] || new_blocks[index]
31
+ if existing[index]
32
+ block = existing[index]
35
33
  block.first[left..right] = chunk.to_a
34
+ deleted_indices << index if block.first.all?(&:zero?)
35
+ elsif chunk.any?(&:nonzero?)
36
+ new_blocks[index].first[left..right] = chunk.to_a
36
37
  end
37
38
  end
38
39
 
39
- database.bulk_delete(self.id, deleted_indices) unless deleted_indices.size.zero?
40
+
41
+ database.bulk_delete(self.instance.id, deleted_indices) unless deleted_indices.size.zero?
40
42
  database.bulk_update(existing) unless existing.size.zero?
41
43
  database.bulk_insert(new_blocks, instance) unless new_blocks.size.zero?
42
44
  values
@@ -55,7 +55,7 @@ module ActiveDataFrame
55
55
 
56
56
  if aggregation_function
57
57
  case col_cases.length
58
- when 0 then "NULL::float as #{col}"
58
+ when 0 then "0.0 as #{col}"
59
59
  else
60
60
  case_str = col_cases.map do |match|
61
61
  case
@@ -78,7 +78,7 @@ module ActiveDataFrame
78
78
  "CASE WHEN #{case_str} THEN #{col} ELSE NULL END"
79
79
  end
80
80
  end
81
- end
81
+ end.map(&Arel.method(:sql))
82
82
  end
83
83
 
84
84
  def get(ranges)
@@ -98,7 +98,6 @@ module ActiveDataFrame
98
98
  all_bounds,
99
99
  block_scope: data_frame_type.unscoped
100
100
  .joins("LEFT JOIN #{block_type.table_name} ON #{data_frame_type.table_name}.id = #{block_type.table_name}.data_frame_id")
101
- .joins("RIGHT JOIN (#{data_frame_type.select(:id).to_sql}) as ref ON ref.id = #{block_type.table_name}.data_frame_id")
102
101
 
103
102
  ).where(
104
103
  block_type.table_name => {data_frame_type: data_frame_type.name }
@@ -108,8 +107,13 @@ module ActiveDataFrame
108
107
  ActiveRecord::Base.connection.execute(as_sql)
109
108
  end
110
109
 
111
- res.each_row do |pi, data_frame_id, *values|
112
- existing_blocks[pi][data_frame_id] = values
110
+ case ActiveRecord::Base.connection_config[:adapter]
111
+ when 'postgresql'.freeze
112
+ res.each_row {|pi, data_frame_id, *values| existing_blocks[pi][data_frame_id] = values }
113
+ when 'mysql2'.freeze
114
+ res.each {|pi, data_frame_id, *values| existing_blocks[pi][data_frame_id] = values }
115
+ when 'sqlite3'.freeze
116
+ res.map(&:values).each {|pi, data_frame_id, *values| existing_blocks[pi][data_frame_id] = values }
113
117
  end
114
118
 
115
119
  result = M.blank(typecode: block_type::TYPECODE, columns: all_bounds.map(&:length).sum, rows: index_map.size)
@@ -210,7 +214,7 @@ module ActiveDataFrame
210
214
  .pluck(
211
215
  :period_index,
212
216
  *block_type::COLUMNS.map do |cl|
213
- "#{agg}(#{cl}) as #{cl}"
217
+ Arel.sql("#{agg}(#{cl}) as #{cl}")
214
218
  end
215
219
  )
216
220
  .map{|pi, *values| [pi, values]}.to_h
@@ -1,3 +1,3 @@
1
1
  module ActiveDataFrame
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: active_data_frame
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Wouter Coppieters
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-04-03 00:00:00.000000000 Z
11
+ date: 2018-04-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -92,6 +92,34 @@ dependencies:
92
92
  - - ">="
93
93
  - !ruby/object:Gem::Version
94
94
  version: '0'
95
+ - !ruby/object:Gem::Dependency
96
+ name: sqlite3
97
+ requirement: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ - !ruby/object:Gem::Dependency
110
+ name: mysql2
111
+ requirement: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ type: :development
117
+ prerelease: false
118
+ version_requirements: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
95
123
  - !ruby/object:Gem::Dependency
96
124
  name: minitest
97
125
  requirement: !ruby/object:Gem::Requirement
@@ -160,20 +188,20 @@ dependencies:
160
188
  requirements:
161
189
  - - "~>"
162
190
  - !ruby/object:Gem::Version
163
- version: 0.1.10
191
+ version: 0.1.15
164
192
  - - ">="
165
193
  - !ruby/object:Gem::Version
166
- version: 0.1.10
194
+ version: 0.1.15
167
195
  type: :runtime
168
196
  prerelease: false
169
197
  version_requirements: !ruby/object:Gem::Requirement
170
198
  requirements:
171
199
  - - "~>"
172
200
  - !ruby/object:Gem::Version
173
- version: 0.1.10
201
+ version: 0.1.15
174
202
  - - ">="
175
203
  - !ruby/object:Gem::Version
176
- version: 0.1.10
204
+ version: 0.1.15
177
205
  description: An active data frame helper
178
206
  email:
179
207
  - wc@pico.net.nz
@@ -205,7 +233,7 @@ files:
205
233
  - lib/generators/active_data_frame/install_generator.rb
206
234
  - lib/generators/active_data_frame/templates/has_concern.rb
207
235
  - lib/generators/active_data_frame/templates/migration.rb
208
- homepage:
236
+ homepage: https://github.com/wouterken/active_data_frame
209
237
  licenses: []
210
238
  metadata: {}
211
239
  post_install_message: