active_data_frame 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5c368f1ed1f3fc78c0e9f81b0d2bd7cc9f50141a
4
- data.tar.gz: 80fa0cfdeed12b5b41d7556ec9c019670827e934
3
+ metadata.gz: 9e1350ed7595307e7875b6430c24bab9a2fd90a2
4
+ data.tar.gz: 1d2f0c6eae0ddfb9ed4fa52d07526e7f9e47ec20
5
5
  SHA512:
6
- metadata.gz: b2cc97b56fe384be682c9631a06c108b2524434230df5f4ac4949300339fadea0dcbca0f1efb9822bd04c3a43a7ae2374a3dbad02706793cfc5f8fa42600920b
7
- data.tar.gz: 7deccde31e9d8a99b31831d2af96227cdf9d087297321b531c79b4327b9bb63f38e0fa026869a94ffb4d5ea3cb5e9e61c5805328cf7bb8248b26e54f95f7fc40
6
+ metadata.gz: db812db474e0980059520b193b9c4fb67d36dbafaf865c28019ea247ab75b269ca26ccf4c6f146e1aec34897003c9f9a70be88550e002f2c514b4a3437ebce84
7
+ data.tar.gz: 2a2585b6f966cf5691f7d4d5155f8ea977f1f2f0213476ebdc88d3648d6511c71c743bc036ba2c4ba9b55e6610a817469e978b5c398dd6d642739fedfa2c8912
data/README.md CHANGED
@@ -1,8 +1,12 @@
1
1
  # ActiveDataFrame
2
2
 
3
- Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/active_data_frame`. To experiment with that code, run `bin/console` for an interactive prompt.
3
+ ActiveDataFrame allows efficient writing, reading, and analytical queries on large tables of numerical data. You can think of it as a persistent NumPy or NArray with good support for slicing
4
+ and aggregates without needing to load the entire dataset into memory.
4
5
 
5
- TODO: Delete this and the text above, and describe your gem
6
+ The library depends on ActiveRecord and currently supports the following relational databases:
7
+ * PostgreSQL
8
+ * MySQL
9
+ * SQLite
6
10
 
7
11
  ## Installation
8
12
 
@@ -11,6 +11,7 @@ Gem::Specification.new do |spec|
11
11
 
12
12
  spec.summary = 'An active data frame helper'
13
13
  spec.description = 'An active data frame helper'
14
+ spec.homepage = "https://github.com/wouterken/active_data_frame"
14
15
 
15
16
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
16
17
  f.match(%r{^(test|spec|features)/})
@@ -24,9 +25,11 @@ Gem::Specification.new do |spec|
24
25
  spec.add_development_dependency "pry-byebug", "~> 3.4.0", '>= 3.4.0'
25
26
  spec.add_development_dependency 'pry', '~> 0.10.2', '>= 0.10.0'
26
27
  spec.add_development_dependency 'pg'
28
+ spec.add_development_dependency 'sqlite3'
29
+ spec.add_development_dependency 'mysql2'
27
30
  spec.add_development_dependency 'minitest', '~>5.11'
28
31
  spec.add_development_dependency 'minitest-reporters', '~> 1.1', '>= 1.1.0'
29
32
  spec.add_development_dependency 'minitest-around', '0.4.1'
30
33
  spec.add_runtime_dependency 'activerecord', '~> 5.0'
31
- spec.add_runtime_dependency 'rmatrix', '~> 0.1.10', '>=0.1.10'
34
+ spec.add_runtime_dependency 'rmatrix', '~> 0.1.15', '>=0.1.15'
32
35
  end
@@ -2,8 +2,9 @@ Priorities:
2
2
  ☐ Ensure delete/clear works
3
3
  ☐ rmatrix tests
4
4
  ☐ Update README.md
5
- ☐ Use MMAP of status/enums
6
5
  ☐ Support SQLite + MySQL, MonetDB
6
+
7
+ ✔ Use MMAP of status/enums @done (18-04-03 19:21)
7
8
  ✔ Generator creates A migration and data_frame and block classes. Block/DataFrame classes have a type, a period unit and a period length @done (17-01-12 10:29)
8
9
  ✔ Type is: @done (17-01-12 10:29)
9
10
  ✔ Bit @done (17-01-12 10:29)
@@ -4,4 +4,19 @@ require 'active_data_frame/table'
4
4
  require 'active_data_frame/row'
5
5
  require 'active_data_frame/has_data_frame'
6
6
  require 'active_data_frame/database'
7
- require 'rmatrix'
7
+ require 'rmatrix'
8
+
9
+ module ActiveDataFrame
10
+ CONFIG = OpenStruct.new({
11
+ suppress_logs: true
12
+ })
13
+
14
+ module_function
15
+ def config
16
+ yield CONFIG
17
+ end
18
+
19
+ def suppress_logs
20
+ CONFIG.suppress_logs
21
+ end
22
+ end
@@ -22,8 +22,7 @@ module ActiveDataFrame
22
22
  def [](*ranges)
23
23
  result = get(extract_ranges(ranges))
24
24
  if @value_map
25
- # TODO Multi-dimensions #map would be nice
26
- result.to_a.map{|row| row.kind_of?(Array) ? row.map(&reverse_value_map.method(:[])) : reverse_value_map[row]}
25
+ result.map{|row| reverse_value_map[row]}
27
26
  else
28
27
  result
29
28
  end
@@ -36,7 +35,9 @@ module ActiveDataFrame
36
35
  end
37
36
 
38
37
  def clear(*ranges)
39
- clear(ex)
38
+ extract_ranges(ranges).each do |r|
39
+ set(r.first, M.blank(columns: r.last - r.first, typecode: block_type::TYPECODE))
40
+ end
40
41
  end
41
42
 
42
43
  def column_map
@@ -104,8 +105,7 @@ module ActiveDataFrame
104
105
  end
105
106
 
106
107
  def self.suppress_logs
107
- #TODO Make optional
108
- return yield
108
+ return yield unless ActiveDataFrame.suppress_logs
109
109
  ActiveRecord::Base.logger, old_logger = nil, ActiveRecord::Base.logger
110
110
  yield.tap do
111
111
  ActiveRecord::Base.logger = old_logger
@@ -9,13 +9,23 @@ module ActiveDataFrame
9
9
  Thread.current[:active_data_frame_batching] = !!value
10
10
  end
11
11
 
12
- # Not thread safe!
13
12
  def self.execute(sql)
14
13
  if ActiveDataFrame::Database.batching
15
14
  Thread.current[:batch] << sql << ?;
16
15
  else
17
- ActiveRecord::Base.transaction do
18
- ActiveRecord::Base.connection.execute sql
16
+ unless sql.empty?
17
+ ActiveRecord::Base.transaction do
18
+ case ActiveRecord::Base.connection_config[:adapter]
19
+ when 'sqlite3'.freeze
20
+ ActiveRecord::Base.connection.raw_connection.execute_batch sql
21
+ when 'mysql2'
22
+ sql.split(';').reject{|x| x.strip.empty?}.each do |stmt|
23
+ ActiveRecord::Base.connection.execute(stmt)
24
+ end
25
+ else
26
+ ActiveRecord::Base.connection.execute(sql)
27
+ end
28
+ end
19
29
  end
20
30
  end
21
31
  end
@@ -50,26 +60,34 @@ module ActiveDataFrame
50
60
  # Update block data for all blocks in a single call
51
61
  ##
52
62
  def bulk_update(existing)
53
- case ActiveRecord::Base.connection_config[:adapter]
54
- when 'postgresql'.freeze
55
- # Fast bulk update
56
- updates = ''
57
- existing.each do |period_index, (values, df_id)|
58
- updates << "(#{df_id}, #{period_index}, #{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}),"
63
+ ActiveDataFrame::DataFrameProxy.suppress_logs do
64
+ case ActiveRecord::Base.connection_config[:adapter]
65
+ when 'postgresql'.freeze
66
+ # Fast bulk update
67
+ updates = ''
68
+ existing.each do |period_index, (values, df_id)|
69
+ updates << "(#{df_id}, #{period_index}, #{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}),"
70
+ end
71
+ perform_update(updates)
72
+ else
73
+ ids = existing.map {|_, (_, id)| id}
74
+ updates = block_type::COLUMNS.map.with_index do |column, column_idx|
75
+ [column, "CASE period_index\n#{existing.map{|period_index, (values, _)| "WHEN #{period_index} then #{values[column_idx]}"}.join("\n")} \nEND\n"]
76
+ end.to_h
77
+ update_statement = updates.map{|cl, up| "#{cl} = #{up}" }.join(', ')
78
+ Database.execute("UPDATE #{block_type.table_name} SET #{update_statement} WHERE
79
+ #{block_type.table_name}.data_frame_id IN (#{ids.join(',')})
80
+ AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
81
+ AND #{block_type.table_name}.period_index IN (#{existing.keys.join(', ')});
82
+ "
83
+ )
59
84
  end
60
- perform_update(updates)
61
- else
62
- ids = existing.map {|_, (_, id)| id}
63
- updates = block_type::COLUMNS.map.with_index do |column, column_idx|
64
- [column, "CASE period_index\n#{existing.map{|period_index, (values, _)| "WHEN #{period_index} then #{values[column_idx]}"}.join("\n")} \nEND\n"]
65
- end.to_h
66
- update_statement = updates.map{|cl, up| "#{cl} = #{up}" }.join(', ')
67
- Database.execute("UPDATE #{block_type.table_name} SET #{update_statement} WHERE
68
- #{block_type.table_name}.data_frame_id IN (#{ids.join(',')})
69
- AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
70
- AND #{block_type.table_name}.period_index IN (#{existing.keys.join(', ')});
71
- "
72
- )
85
+ end
86
+ end
87
+
88
+ def bulk_delete(id, indices)
89
+ ActiveDataFrame::DataFrameProxy.suppress_logs do
90
+ block_type.where(data_frame_id: id, period_index: indices).delete_all
73
91
  end
74
92
  end
75
93
 
@@ -77,39 +95,41 @@ module ActiveDataFrame
77
95
  # Insert block data for all blocks in a single call
78
96
  ##
79
97
  def bulk_insert(new_blocks, instance)
80
- inserts = ''
81
- new_blocks.each do |period_index, (values)|
82
- inserts << \
83
- case ActiveRecord::Base.connection_config[:adapter]
84
- when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
85
- else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
98
+ ActiveDataFrame::DataFrameProxy.suppress_logs do
99
+ inserts = ''
100
+ new_blocks.each do |period_index, (values)|
101
+ inserts << \
102
+ case ActiveRecord::Base.connection_config[:adapter]
103
+ when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
104
+ else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
105
+ end
86
106
  end
107
+ perform_insert(inserts)
87
108
  end
88
- perform_insert(inserts)
89
- end
90
-
91
- def bulk_delete(blocks)
92
- binding.pry
93
109
  end
94
110
 
95
111
  def perform_update(updates)
96
- Database.execute(
97
- <<-SQL
98
- UPDATE #{block_type.table_name}
99
- SET #{block_type::COLUMNS.map{|col| "#{col} = t.#{col}" }.join(", ")}
100
- FROM(
101
- VALUES #{updates[0..-2]}) as t(data_frame_id, period_index, #{block_type::COLUMNS.join(',')})
102
- WHERE #{block_type.table_name}.data_frame_id = t.data_frame_id
103
- AND #{block_type.table_name}.period_index = t.period_index
104
- AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
105
- SQL
106
- )
107
- true
112
+ ActiveDataFrame::DataFrameProxy.suppress_logs do
113
+ Database.execute(
114
+ <<-SQL
115
+ UPDATE #{block_type.table_name}
116
+ SET #{block_type::COLUMNS.map{|col| "#{col} = t.#{col}" }.join(", ")}
117
+ FROM(
118
+ VALUES #{updates[0..-2]}) as t(data_frame_id, period_index, #{block_type::COLUMNS.join(',')})
119
+ WHERE #{block_type.table_name}.data_frame_id = t.data_frame_id
120
+ AND #{block_type.table_name}.period_index = t.period_index
121
+ AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
122
+ SQL
123
+ )
124
+ true
125
+ end
108
126
  end
109
127
 
110
128
  def perform_insert(inserts)
111
- sql = "INSERT INTO #{block_type.table_name} (#{block_type::COLUMNS.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
112
- Database.execute sql
129
+ ActiveDataFrame::DataFrameProxy.suppress_logs do
130
+ sql = "INSERT INTO #{block_type.table_name} (#{block_type::COLUMNS.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
131
+ Database.execute sql
132
+ end
113
133
  end
114
134
  end
115
135
  end
@@ -62,17 +62,18 @@ module ActiveDataFrame
62
62
 
63
63
  # We provide our own inspect implementation which will include in the output
64
64
  # selected dataframe attributes that do not reside on the parent table
65
- define_method :inspect do
66
- inspection = "not initialized"
67
- if defined?(@attributes) && @attributes
68
- inspection = @attributes.keys.collect { |name|
69
- if has_attribute?(name)
70
- "#{name}: #{attribute_for_inspect(name)}"
71
- end
72
- }.compact.join(", ")
73
- end
74
- "<#{self.class} #{inspection}>"
65
+ end
66
+
67
+ def inspect
68
+ inspection = "not initialized"
69
+ if defined?(@attributes) && @attributes
70
+ inspection = @attributes.keys.collect { |name|
71
+ if has_attribute?(name)
72
+ "#{name}: #{attribute_for_inspect(name)}"
73
+ end
74
+ }.compact.join(", ")
75
75
  end
76
+ "<#{self.class} #{inspection}>"
76
77
  end
77
78
  end
78
79
  end
@@ -332,8 +333,8 @@ module ActiveDataFrame
332
333
  aggregate +
333
334
  blocks_for_table.reduce('') do |blocks_aggregate, (block_idx, blocks)|
334
335
  blocks_table_name = for_table
335
- blocks_aggregate + " LEFT JOIN(SELECT #{blocks_table_name}.data_frame_type, #{blocks_table_name}.data_frame_id, " + blocks.map{|block| "#{blocks_table_name}.t#{block[:idx]} as \"#{block[:name]}\""}.join(', ') + " FROM #{blocks_table_name} "+
336
- " WHERE #{blocks_table_name}.period_index = #{block_idx}"+") b#{for_table}#{block_idx} ON b#{for_table}#{block_idx}.data_frame_type = '#{self.name}' AND b#{for_table}#{block_idx}.data_frame_id = #{self.table_name}.id"
336
+ blocks_aggregate + " LEFT JOIN(SELECT #{blocks_table_name}.data_frame_type as b#{for_table}#{block_idx}_data_frame_type, #{blocks_table_name}.data_frame_id b#{for_table}#{block_idx}_data_frame_id, " + blocks.map{|block| "#{blocks_table_name}.t#{block[:idx]} as \"#{block[:name]}\""}.join(', ') + " FROM #{blocks_table_name} "+
337
+ " WHERE #{blocks_table_name}.period_index = #{block_idx}"+") b#{for_table}#{block_idx} ON b#{for_table}#{block_idx}.b#{for_table}#{block_idx}_data_frame_type = '#{self.name}' AND b#{for_table}#{block_idx}.b#{for_table}#{block_idx}_data_frame_id = #{self.table_name}.id"
337
338
  end
338
339
  end + ") as #{self.table_name}"
339
340
  scope.from(query)
@@ -28,15 +28,17 @@ module ActiveDataFrame
28
28
 
29
29
  iterate_bounds([bounds]) do |index, left, right, cursor, size|
30
30
  chunk = values[cursor...cursor + size]
31
- if size == block_type::BLOCK_SIZE && chunk.all?(&:zero?)
32
- deleted_indices << index
33
- else
34
- block = existing[index] || new_blocks[index]
31
+ if existing[index]
32
+ block = existing[index]
35
33
  block.first[left..right] = chunk.to_a
34
+ deleted_indices << index if block.first.all?(&:zero?)
35
+ elsif chunk.any?(&:nonzero?)
36
+ new_blocks[index].first[left..right] = chunk.to_a
36
37
  end
37
38
  end
38
39
 
39
- database.bulk_delete(self.id, deleted_indices) unless deleted_indices.size.zero?
40
+
41
+ database.bulk_delete(self.instance.id, deleted_indices) unless deleted_indices.size.zero?
40
42
  database.bulk_update(existing) unless existing.size.zero?
41
43
  database.bulk_insert(new_blocks, instance) unless new_blocks.size.zero?
42
44
  values
@@ -55,7 +55,7 @@ module ActiveDataFrame
55
55
 
56
56
  if aggregation_function
57
57
  case col_cases.length
58
- when 0 then "NULL::float as #{col}"
58
+ when 0 then "0.0 as #{col}"
59
59
  else
60
60
  case_str = col_cases.map do |match|
61
61
  case
@@ -78,7 +78,7 @@ module ActiveDataFrame
78
78
  "CASE WHEN #{case_str} THEN #{col} ELSE NULL END"
79
79
  end
80
80
  end
81
- end
81
+ end.map(&Arel.method(:sql))
82
82
  end
83
83
 
84
84
  def get(ranges)
@@ -98,7 +98,6 @@ module ActiveDataFrame
98
98
  all_bounds,
99
99
  block_scope: data_frame_type.unscoped
100
100
  .joins("LEFT JOIN #{block_type.table_name} ON #{data_frame_type.table_name}.id = #{block_type.table_name}.data_frame_id")
101
- .joins("RIGHT JOIN (#{data_frame_type.select(:id).to_sql}) as ref ON ref.id = #{block_type.table_name}.data_frame_id")
102
101
 
103
102
  ).where(
104
103
  block_type.table_name => {data_frame_type: data_frame_type.name }
@@ -108,8 +107,13 @@ module ActiveDataFrame
108
107
  ActiveRecord::Base.connection.execute(as_sql)
109
108
  end
110
109
 
111
- res.each_row do |pi, data_frame_id, *values|
112
- existing_blocks[pi][data_frame_id] = values
110
+ case ActiveRecord::Base.connection_config[:adapter]
111
+ when 'postgresql'.freeze
112
+ res.each_row {|pi, data_frame_id, *values| existing_blocks[pi][data_frame_id] = values }
113
+ when 'mysql2'.freeze
114
+ res.each {|pi, data_frame_id, *values| existing_blocks[pi][data_frame_id] = values }
115
+ when 'sqlite3'.freeze
116
+ res.map(&:values).each {|pi, data_frame_id, *values| existing_blocks[pi][data_frame_id] = values }
113
117
  end
114
118
 
115
119
  result = M.blank(typecode: block_type::TYPECODE, columns: all_bounds.map(&:length).sum, rows: index_map.size)
@@ -210,7 +214,7 @@ module ActiveDataFrame
210
214
  .pluck(
211
215
  :period_index,
212
216
  *block_type::COLUMNS.map do |cl|
213
- "#{agg}(#{cl}) as #{cl}"
217
+ Arel.sql("#{agg}(#{cl}) as #{cl}")
214
218
  end
215
219
  )
216
220
  .map{|pi, *values| [pi, values]}.to_h
@@ -1,3 +1,3 @@
1
1
  module ActiveDataFrame
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: active_data_frame
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Wouter Coppieters
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-04-03 00:00:00.000000000 Z
11
+ date: 2018-04-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -92,6 +92,34 @@ dependencies:
92
92
  - - ">="
93
93
  - !ruby/object:Gem::Version
94
94
  version: '0'
95
+ - !ruby/object:Gem::Dependency
96
+ name: sqlite3
97
+ requirement: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - ">="
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ - !ruby/object:Gem::Dependency
110
+ name: mysql2
111
+ requirement: !ruby/object:Gem::Requirement
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ version: '0'
116
+ type: :development
117
+ prerelease: false
118
+ version_requirements: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
95
123
  - !ruby/object:Gem::Dependency
96
124
  name: minitest
97
125
  requirement: !ruby/object:Gem::Requirement
@@ -160,20 +188,20 @@ dependencies:
160
188
  requirements:
161
189
  - - "~>"
162
190
  - !ruby/object:Gem::Version
163
- version: 0.1.10
191
+ version: 0.1.15
164
192
  - - ">="
165
193
  - !ruby/object:Gem::Version
166
- version: 0.1.10
194
+ version: 0.1.15
167
195
  type: :runtime
168
196
  prerelease: false
169
197
  version_requirements: !ruby/object:Gem::Requirement
170
198
  requirements:
171
199
  - - "~>"
172
200
  - !ruby/object:Gem::Version
173
- version: 0.1.10
201
+ version: 0.1.15
174
202
  - - ">="
175
203
  - !ruby/object:Gem::Version
176
- version: 0.1.10
204
+ version: 0.1.15
177
205
  description: An active data frame helper
178
206
  email:
179
207
  - wc@pico.net.nz
@@ -205,7 +233,7 @@ files:
205
233
  - lib/generators/active_data_frame/install_generator.rb
206
234
  - lib/generators/active_data_frame/templates/has_concern.rb
207
235
  - lib/generators/active_data_frame/templates/migration.rb
208
- homepage:
236
+ homepage: https://github.com/wouterken/active_data_frame
209
237
  licenses: []
210
238
  metadata: {}
211
239
  post_install_message: