active_data_frame 0.1.3 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +218 -3
- data/active_data_frame.gemspec +1 -1
- data/lib/active_data_frame/data_frame_proxy.rb +5 -1
- data/lib/active_data_frame/database.rb +77 -62
- data/lib/active_data_frame/row.rb +7 -3
- data/lib/active_data_frame/table.rb +3 -4
- data/lib/active_data_frame/version.rb +1 -1
- data/lib/generators/active_data_frame/USAGE +20 -0
- data/lib/generators/active_data_frame/install_generator.rb +3 -5
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6e6c248d13e0f7f10933eca32158e4fb33a080e3
|
4
|
+
data.tar.gz: 28d52390deef35b8e582942612989f99e2026ed3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9fd95c152778f43ea9d3d3e09160a22ed355989b5fdc5e2cbbfb1f10b2290aab4db07d04344acb595f30f28a98eaf1282b4a461611ba6c5999b5a060fc60ae77
|
7
|
+
data.tar.gz: c0b32d8827258e8e8cf38e051758d8de8a1784bb2a4b19cdaada75af5b9b1541b3c45d15e6d2f033ca07ad1ec9422fc290c11b44e41fd2df5ef08b68bf805c47
|
data/README.md
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
# ActiveDataFrame
|
2
2
|
|
3
|
-
ActiveDataFrame allows efficient writing, reading, and analytical queries on large tables of numerical data. You can think of it as a persistent NumPy or NArray with good support for slicing
|
4
|
-
and aggregates without needing to load the entire dataset into memory.
|
3
|
+
ActiveDataFrame allows efficient writing, reading, and analytical queries on large tables of numerical data. You can think of it as a persistent NumPy or NArray with good support for slicing and aggregates without needing the entire dataset in memory.
|
5
4
|
|
6
5
|
The library depends on ActiveRecord and currently supports the following relational databases:
|
6
|
+
|
7
7
|
* PostgreSQL
|
8
8
|
* MySQL
|
9
9
|
* SQLite
|
@@ -24,16 +24,231 @@ Or install it yourself as:
|
|
24
24
|
|
25
25
|
$ gem install active_data_frame
|
26
26
|
|
27
|
+
## Examples
|
28
|
+
|
29
|
+
### Using the generator
|
30
|
+
|
31
|
+
# Generate a new data frame named Statistic, with a datapoint type of double, and a block size of 100
|
32
|
+
$ rails generate active_data_frame:install Statistic double 100
|
33
|
+
|
34
|
+
# Then run migrations to create the underlying table
|
35
|
+
$ rake db:migrate
|
36
|
+
|
27
37
|
## Usage
|
38
|
+
### Generator
|
39
|
+
The easiest way to get started is to use the in-built generator to generate a new
|
40
|
+
`ActiveDataFrame`. This will generate the required migrations for the data frame
|
41
|
+
and generate a new module that you can include inside an `ActiveRecord` model to give it access to the frame.
|
42
|
+
|
43
|
+
```
|
44
|
+
# Generate a new MeterReading data frame type, with a block type of
|
45
|
+
# double and a block size of 48 data points
|
46
|
+
|
47
|
+
$ rails generate active_data_frame:install MeterReading double 48
|
48
|
+
|
49
|
+
# Generate a new Dimension data frame type, with a block type of
|
50
|
+
# float and a block size of 10 data points.
|
51
|
+
# Inject the data-type for use into the Iris model
|
52
|
+
|
53
|
+
$ rails generate active_data_frame:install Dimension float 10 Iris
|
54
|
+
|
55
|
+
#
|
56
|
+
# Generate a new status data frame type with an integer block type
|
57
|
+
#
|
58
|
+
$ rails generate active_data_frame:install Status integer
|
59
|
+
```
|
60
|
+
|
61
|
+
### Writing to a data frame
|
62
|
+
When you include a data frame in an ActiveRecord model, each instance of the model corresponds to a single row in the data frame. The columns are a series of points that stretch towards infinity in each direction.
|
63
|
+
|
64
|
+
By default columns are indexed by integers, but you can set a static or dynamic column map so that you can easily have columns indexed by time, enum columns or use any other data type that serves as a useful index.
|
65
|
+
|
66
|
+
You can write any number of data points to a row in the dataframe using #[]=
|
67
|
+
|
68
|
+
#E.g.
|
69
|
+
# Write to the row called readings from index 0. Here Sensor is the ActiveRecord model, readings is the name of the row
|
70
|
+
Sensor.first.readings[0] = 1,2,3
|
71
|
+
|
72
|
+
# Write to the row called readings from an offset at 1_000_000
|
73
|
+
Sensor.first.readings[1_000_000] = -10, -9, -8
|
74
|
+
|
75
|
+
#Writing to a row which has a column mapping applied, mapping times on integer indexes
|
76
|
+
MeterChannel.first.readings['2001-01-01'] = [1.3, 3.4]
|
77
|
+
|
78
|
+
#If you have enum columns you can use the #[enum_name]= setter instead.
|
79
|
+
Iris.first.dimensions.sepal_length = 5.3
|
80
|
+
Iris.first.dimensions.petal_width = 4.3
|
81
|
+
|
82
|
+
# You can set data for multiple rows at once, by using the frame accessor on the model's class instead of an instance.
|
83
|
+
|
84
|
+
E.g.
|
85
|
+
# This sets the reading at index 1 to 5 for ALL sensors
|
86
|
+
Sensor.readings[1] = 5
|
87
|
+
|
88
|
+
# You can use AR queries to refine which set of rows you are updating at once.
|
89
|
+
# E.g.
|
90
|
+
MeterChannel.where("created_at < ?", "2001-01-01").readings['2001-01-01'] = [5,6,7]
|
91
|
+
|
92
|
+
ActiveDataFrame supports very quick writing of 1000's of values for a single row at a time. Don't be afraid to write large arrays of data like this.
|
93
|
+
|
94
|
+
### Reading from a data frame
|
95
|
+
Reading from a data frame is similar to writing and uses the #[] method.
|
96
|
+
You can read individual values, a range of values, and sparse selections of columns.
|
97
|
+
|
98
|
+
#E.g.
|
99
|
+
# Read a single value
|
100
|
+
Sensor.first.readings[0] # => Matrix(1x1)[...]
|
101
|
+
|
102
|
+
# Read a range of 3 values values
|
103
|
+
Sensor.first.readings[0...3] # => Matrix(1x3)[...]
|
104
|
+
|
105
|
+
# Read some non contiguous values and ranges
|
106
|
+
Sensor.first.readings[5, 10, 4..7, 9..10] = Matrix(1x8)[...]
|
107
|
+
|
108
|
+
#Reading from a row which has a column mapping that uses times
|
109
|
+
MeterChannel.first.readings['2001-01-01'...'2002-01-01'] = Matrix(1xM)[....]
|
110
|
+
|
111
|
+
#If you have enum columns you can use the #[enum_name] getter for single columns
|
112
|
+
Iris.first.dimensions.sepal_length
|
113
|
+
Iris.first.dimensions.petal_width
|
114
|
+
|
115
|
+
# And use symbols as column indices (this assumes a specific ordering of enum columns)
|
116
|
+
Iris.first.dimensions[:sepal_length...:petal_width]
|
117
|
+
|
118
|
+
Similar to when writing data, you can also read data from multiple rows at once.
|
119
|
+
Just use the active data frame accessor on the model class instead of a model instance. E.g.
|
120
|
+
|
121
|
+
Sensor.readings[0..5] # => Matrix(Nx5)
|
122
|
+
|
123
|
+
|
124
|
+
### Deleting
|
125
|
+
You can use #clear(range_or_indices) to delete data.
|
126
|
+
|
127
|
+
Deleting data is equivalent to setting all data points to zero.
|
128
|
+
So the operation row[index] = [0, 0, 0, 0.....0] is equivalent
|
129
|
+
to the operation row.clear(index...end_index). ActiveDataFrame
|
130
|
+
will automatically trim empty blocks.
|
28
131
|
|
29
|
-
|
132
|
+
### Batching
|
133
|
+
If performing many small reads and writes from a data frame in a single atomic operation
|
134
|
+
it makes sense to do this in a single transaction. Active Data Frame provides the `ActiveDataFrame::Database.batch do ... end` method. This method will not only ensure your operations occur in a single transaction, but also that they are sent to the underlying database adapter as a single command.
|
30
135
|
|
136
|
+
### Analytical Queries
|
137
|
+
Any read of a dataframe returns an RMatrix instance. An RMatrix supports a large number of
|
138
|
+
statistical methods and list methods. (See the RMatrix readme for more details).
|
139
|
+
E.g.
|
140
|
+
|
141
|
+
cpu_loads = CPU.first.loads['2001-01-01'..'2005-01-01']
|
142
|
+
puts cpu_loads.avg
|
143
|
+
puts cpu_loads.stddev
|
144
|
+
puts cpu_loads.max
|
145
|
+
# ... and many more
|
146
|
+
|
147
|
+
However in some cases you are dealing with so much data it is not possible, or too slow to retreive all the data at once and manipulate in-memory. ActiveDataFrame supports performing a number of aggregate methods directly in the database. These are #avg, #min, #max and #sum. The syntax for this is almost identical to an ordinary read.
|
148
|
+
|
149
|
+
CPU.loads.avg['2001-01-01'...'2005-01-01'] # The average CPU load per period over all CPUS
|
150
|
+
|
151
|
+
CPU.where(manufacturer: :intel).loads.min['2001-01-01'...'2005-01-01'] # The minimum CPU load per period over all intel CPUS
|
152
|
+
|
153
|
+
### Categorical data
|
154
|
+
ActiveDataFrame provides a very basic abstraction for storing categorical data. This is done by storing categories as an integer data frame, and providing a map from integers to categories. The library will then allow you to use the category names in place of the raw underlying integers.
|
155
|
+
E.g.
|
156
|
+
|
157
|
+
module HasStatus
|
158
|
+
include ActiveDataFrame::HasDataFrame('status', Blocks::StatusBlock, value_map: {
|
159
|
+
actual: 2,
|
160
|
+
estimated: 1,
|
161
|
+
unknown: 0
|
162
|
+
})
|
163
|
+
end
|
164
|
+
|
165
|
+
class CPU < ApplicationRecord
|
166
|
+
include HasStatus
|
167
|
+
end
|
168
|
+
|
169
|
+
The CPU model above includes a dataframe with a status mapping. We can now do things like
|
170
|
+
|
171
|
+
CPU.first.status[0] # => :unknown
|
172
|
+
CPU.first.status[0..5] # => [:unknown,:unknown,:unknown,:unknown,:unknown]
|
173
|
+
|
174
|
+
CPU.first.status[0] = :actual, :estimated
|
175
|
+
CPU.first.status[0..5] # => [:actual,:estimated,:unknown,:unknown,:unknown]
|
176
|
+
|
177
|
+
### Time-series data
|
178
|
+
We can use any datatype we like to index into a dataframe, so long as we can map it to an integer index. This makes active dataframes very well suited to storing large streams of interval data over time.
|
179
|
+
|
180
|
+
For example we might define a mapping such that every half hour period in time corresponds to a colum in our dataframe. In the below example we might be counting the number of arrivals at an airport every half-hour.
|
181
|
+
|
182
|
+
|
183
|
+
module HasArrivals
|
184
|
+
include ActiveDataFrame::HasDataFrame('arrivals', Blocks::ArrivalBlock)
|
185
|
+
module ColumnMaps
|
186
|
+
def self.included(base)
|
187
|
+
base.arrivals_column_map Hash.new{|hash, time| ((time.to_time - Time.at(0)) / 1.hour).to_i rescue time.to_i }
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
class Airport < ApplicationRecord
|
193
|
+
include HasArrivals::ColumnMaps, HasArrivals
|
194
|
+
end
|
195
|
+
|
196
|
+
Now we can use any value that implements #to_time to index into our dataframe. This supports both single indexes and ranges (...).
|
197
|
+
E.g.
|
198
|
+
|
199
|
+
Airport.first.arrivals['2001-01-01'...'2002-01-01'] = Matrix(1xM)[....]
|
200
|
+
|
201
|
+
|
202
|
+
### Column Mappings
|
203
|
+
We can use any datatype we like to index into a dataframe, so long as we can map it to an integer index. See the section on Time-series data for one example of this. Columns can also be aliases to categories. An example of this is using ActiveDataFrame to model the classic Iris dataset.
|
204
|
+
|
205
|
+
class Iris < ApplicationRecord
|
206
|
+
include HasDimensions
|
207
|
+
dimension_column_names %i(sepal_length sepal_width petal_length petal_width)
|
208
|
+
end
|
209
|
+
|
210
|
+
Here we have mapped the first four columns of our data frame to sepal_length, sepal_width, petal_length and petal_width.
|
211
|
+
|
212
|
+
When using symbols as column names ActiveDataFrame provides some syntactic sugar for easily slicing and dicing frames.
|
213
|
+
|
214
|
+
We can do things like:
|
215
|
+
|
216
|
+
* Extract a slice of data:
|
217
|
+
|
218
|
+
`iris_results = Iris.where(species: :setosa).dimension[:sepal_width..:petal_length]`
|
219
|
+
* Extract an entire column from a data-set using the column name:
|
220
|
+
|
221
|
+
`iris_results.sepal_width => V[[...]`]
|
222
|
+
* Extract an entire column from a data-set using the column name:
|
223
|
+
|
224
|
+
`iris_results.sepal_width => V[[...]`]
|
225
|
+
* Extract a single value from an instance:
|
226
|
+
|
227
|
+
`Iris.first.dimension.sepal_width.to_f`
|
228
|
+
|
229
|
+
* Set one or more values for an instance or row at once:
|
230
|
+
|
231
|
+
`Iris.first.dimension.sepal_width = 13`
|
232
|
+
`Iris.all.dimension.petal_length = 5.2,6.3,5.4,1.1`
|
233
|
+
|
234
|
+
### Configuration
|
235
|
+
ActiveDataFrame supports project-wide configuration using
|
236
|
+
|
237
|
+
ActiveDataFrame.config do |config|
|
238
|
+
config.[config_option_name] = [config_value]
|
239
|
+
end
|
240
|
+
|
241
|
+
Currently the following configuration options are supported:
|
242
|
+
|
243
|
+
* `suppress_logs` The queries generated by ActiveDataFrame are quite verbose. If you would like to supress ActiveRecord logging for these queries, set this option to `true`
|
31
244
|
## Development
|
32
245
|
|
33
246
|
After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
34
247
|
|
35
248
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
36
249
|
|
250
|
+
## Testing
|
251
|
+
|
37
252
|
## Contributing
|
38
253
|
|
39
254
|
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/active_data_frame. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
data/active_data_frame.gemspec
CHANGED
@@ -31,5 +31,5 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.add_development_dependency 'minitest-reporters', '~> 1.1', '>= 1.1.0'
|
32
32
|
spec.add_development_dependency 'minitest-around', '0.4.1'
|
33
33
|
spec.add_runtime_dependency 'activerecord', '~> 5.0'
|
34
|
-
spec.add_runtime_dependency 'rmatrix', '~> 0.1.
|
34
|
+
spec.add_runtime_dependency 'rmatrix', '~> 0.1.17', '>=0.1.17'
|
35
35
|
end
|
@@ -57,8 +57,12 @@ module ActiveDataFrame
|
|
57
57
|
end
|
58
58
|
|
59
59
|
def method_missing(name, *args, &block)
|
60
|
+
if name.to_s.ends_with?(?=)
|
61
|
+
is_assignment = true
|
62
|
+
name = name.to_s.gsub(/=$/,'').to_sym
|
63
|
+
end
|
60
64
|
if column_name_map && column_map[name]
|
61
|
-
self[name]
|
65
|
+
is_assignment ? self.[]=(name, *args) : self[name]
|
62
66
|
else
|
63
67
|
super
|
64
68
|
end
|
@@ -15,15 +15,17 @@ module ActiveDataFrame
|
|
15
15
|
else
|
16
16
|
unless sql.empty?
|
17
17
|
ActiveRecord::Base.transaction do
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
18
|
+
ActiveDataFrame::DataFrameProxy.suppress_logs do
|
19
|
+
case ActiveRecord::Base.connection_config[:adapter]
|
20
|
+
when 'sqlite3'.freeze
|
21
|
+
ActiveRecord::Base.connection.raw_connection.execute_batch sql
|
22
|
+
when 'mysql2'
|
23
|
+
sql.split(';').reject{|x| x.strip.empty?}.each do |stmt|
|
24
|
+
ActiveRecord::Base.connection.execute(stmt)
|
25
|
+
end
|
26
|
+
else
|
27
|
+
ActiveRecord::Base.connection.execute(sql)
|
24
28
|
end
|
25
|
-
else
|
26
|
-
ActiveRecord::Base.connection.execute(sql)
|
27
29
|
end
|
28
30
|
end
|
29
31
|
end
|
@@ -60,56 +62,16 @@ module ActiveDataFrame
|
|
60
62
|
# Update block data for all blocks in a single call
|
61
63
|
##
|
62
64
|
def bulk_update(existing)
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
else
|
73
|
-
ids = existing.map {|_, (_, id)| id}
|
74
|
-
updates = block_type::COLUMNS.map.with_index do |column, column_idx|
|
75
|
-
[column, "CASE period_index\n#{existing.map{|period_index, (values, _)| "WHEN #{period_index} then #{values[column_idx]}"}.join("\n")} \nEND\n"]
|
76
|
-
end.to_h
|
77
|
-
update_statement = updates.map{|cl, up| "#{cl} = #{up}" }.join(', ')
|
78
|
-
Database.execute("UPDATE #{block_type.table_name} SET #{update_statement} WHERE
|
79
|
-
#{block_type.table_name}.data_frame_id IN (#{ids.join(',')})
|
80
|
-
AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
|
81
|
-
AND #{block_type.table_name}.period_index IN (#{existing.keys.join(', ')});
|
82
|
-
"
|
83
|
-
)
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
def bulk_delete(id, indices)
|
89
|
-
ActiveDataFrame::DataFrameProxy.suppress_logs do
|
90
|
-
block_type.where(data_frame_id: id, period_index: indices).delete_all
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
##
|
95
|
-
# Insert block data for all blocks in a single call
|
96
|
-
##
|
97
|
-
def bulk_insert(new_blocks, instance)
|
98
|
-
ActiveDataFrame::DataFrameProxy.suppress_logs do
|
99
|
-
inserts = ''
|
100
|
-
new_blocks.each do |period_index, (values)|
|
101
|
-
inserts << \
|
102
|
-
case ActiveRecord::Base.connection_config[:adapter]
|
103
|
-
when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
|
104
|
-
else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
|
105
|
-
end
|
65
|
+
case ActiveRecord::Base.connection_config[:adapter]
|
66
|
+
when 'postgresql'.freeze
|
67
|
+
#
|
68
|
+
# PostgreSQL Supports the fast setting of multiple update values that differ
|
69
|
+
# per row from a temporary table.
|
70
|
+
#
|
71
|
+
updates = ''
|
72
|
+
existing.each do |period_index, (values, df_id)|
|
73
|
+
updates << "(#{df_id}, #{period_index}, #{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}),"
|
106
74
|
end
|
107
|
-
perform_insert(inserts)
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
def perform_update(updates)
|
112
|
-
ActiveDataFrame::DataFrameProxy.suppress_logs do
|
113
75
|
Database.execute(
|
114
76
|
<<-SQL
|
115
77
|
UPDATE #{block_type.table_name}
|
@@ -121,15 +83,68 @@ module ActiveDataFrame
|
|
121
83
|
AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
|
122
84
|
SQL
|
123
85
|
)
|
124
|
-
|
86
|
+
#
|
87
|
+
# For MySQL we use the ON DUPLICATE KEY UPDATE functionality.
|
88
|
+
# This relies on there being a unique index dataframe and period index
|
89
|
+
# on the blocks table.
|
90
|
+
# This tends to be faster than the general CASE based solution below
|
91
|
+
# but slower than the PostgreSQL solution above
|
92
|
+
#
|
93
|
+
when 'mysql2'.freeze
|
94
|
+
# Fast bulk update
|
95
|
+
updates, on_duplicate = "", ""
|
96
|
+
existing.each do |period_index, (values, df_id)|
|
97
|
+
updates << "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
|
98
|
+
end
|
99
|
+
on_duplicate = block_type::COLUMNS.map do |cname|
|
100
|
+
"#{cname}=VALUES(#{cname})"
|
101
|
+
end.join(", ")
|
102
|
+
stmt = <<-SQL
|
103
|
+
INSERT INTO #{block_type.table_name} (#{block_type::COLUMNS.join(',')},data_frame_id,period_index,data_frame_type)
|
104
|
+
VALUES #{updates[0..-2]}
|
105
|
+
ON DUPLICATE KEY UPDATE #{on_duplicate}
|
106
|
+
SQL
|
107
|
+
Database.execute(stmt)
|
108
|
+
else
|
109
|
+
#
|
110
|
+
# General CASE based solution for multiple differing updates
|
111
|
+
# set per row.
|
112
|
+
# We use a CASE statement per column which determines the column
|
113
|
+
# to set based on the period index
|
114
|
+
#
|
115
|
+
ids = existing.map {|_, (_, id)| id}
|
116
|
+
updates = block_type::COLUMNS.map.with_index do |column, column_idx|
|
117
|
+
[column, "CASE period_index\n#{existing.map{|period_index, (values, _)| "WHEN #{period_index} then #{values[column_idx]}"}.join("\n")} \nEND\n"]
|
118
|
+
end.to_h
|
119
|
+
update_statement = updates.map{|cl, up| "#{cl} = #{up}" }.join(', ')
|
120
|
+
Database.execute(<<-SQL
|
121
|
+
UPDATE #{block_type.table_name} SET #{update_statement} WHERE
|
122
|
+
#{block_type.table_name}.data_frame_id IN (#{ids.join(',')})
|
123
|
+
AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
|
124
|
+
AND #{block_type.table_name}.period_index IN (#{existing.keys.join(', ')});
|
125
|
+
SQL
|
126
|
+
)
|
125
127
|
end
|
126
128
|
end
|
127
129
|
|
128
|
-
def
|
129
|
-
|
130
|
-
|
131
|
-
|
130
|
+
def bulk_delete(id, indices)
|
131
|
+
block_type.where(data_frame_id: id, period_index: indices).delete_all
|
132
|
+
end
|
133
|
+
|
134
|
+
##
|
135
|
+
# Insert block data for all blocks in a single call
|
136
|
+
##
|
137
|
+
def bulk_insert(new_blocks, instance)
|
138
|
+
inserts = ''
|
139
|
+
new_blocks.each do |period_index, (values)|
|
140
|
+
inserts << \
|
141
|
+
case ActiveRecord::Base.connection_config[:adapter]
|
142
|
+
when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
|
143
|
+
else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
|
144
|
+
end
|
132
145
|
end
|
146
|
+
sql = "INSERT INTO #{block_type.table_name} (#{block_type::COLUMNS.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
|
147
|
+
Database.execute sql
|
133
148
|
end
|
134
149
|
end
|
135
150
|
end
|
@@ -21,7 +21,6 @@ module ActiveDataFrame
|
|
21
21
|
end
|
22
22
|
|
23
23
|
deleted_indices = []
|
24
|
-
|
25
24
|
existing = blocks_between([bounds]).pluck(:data_frame_id, :period_index, *block_type::COLUMNS).map do |id, period_index, *block_values|
|
26
25
|
[period_index, [block_values, id]]
|
27
26
|
end.to_h
|
@@ -31,7 +30,10 @@ module ActiveDataFrame
|
|
31
30
|
if existing[index]
|
32
31
|
block = existing[index]
|
33
32
|
block.first[left..right] = chunk.to_a
|
34
|
-
|
33
|
+
if block.first.all?(&:zero?)
|
34
|
+
deleted_indices << index
|
35
|
+
existing.delete(index)
|
36
|
+
end
|
35
37
|
elsif chunk.any?(&:nonzero?)
|
36
38
|
new_blocks[index].first[left..right] = chunk.to_a
|
37
39
|
end
|
@@ -49,7 +51,9 @@ module ActiveDataFrame
|
|
49
51
|
get_bounds(range.first, range.exclude_end? ? range.end - 1 : range.end, index)
|
50
52
|
end
|
51
53
|
|
52
|
-
existing =
|
54
|
+
existing = self.class.suppress_logs{
|
55
|
+
blocks_between(all_bounds).pluck(:period_index, *block_type::COLUMNS).map{|pi, *values| [pi, values]}.to_h
|
56
|
+
}
|
53
57
|
result = M.blank(typecode: block_type::TYPECODE, columns: all_bounds.map(&:length).sum)
|
54
58
|
|
55
59
|
iterate_bounds(all_bounds) do |index, left, right, cursor, size|
|
@@ -42,7 +42,6 @@ module ActiveDataFrame
|
|
42
42
|
col_cases = cases[col].sort_by(&:begin).reduce([]) do |agg, col_case|
|
43
43
|
if agg.empty?
|
44
44
|
agg << col_case
|
45
|
-
agg
|
46
45
|
else
|
47
46
|
if agg[-1].end.succ == col_case.begin
|
48
47
|
agg[-1] = (agg[-1].begin..col_case.end)
|
@@ -96,9 +95,9 @@ module ActiveDataFrame
|
|
96
95
|
ids = data_frame_type.pluck(:id)
|
97
96
|
as_sql = blocks_between(
|
98
97
|
all_bounds,
|
99
|
-
block_scope: data_frame_type.unscoped
|
100
|
-
|
101
|
-
|
98
|
+
block_scope: data_frame_type.unscoped.where(
|
99
|
+
"#{data_frame_type.table_name}.id IN (SELECT id FROM (#{data_frame_type.select(:id).to_sql}) airport_ids)"
|
100
|
+
).joins("LEFT JOIN #{block_type.table_name} ON #{data_frame_type.table_name}.id = #{block_type.table_name}.data_frame_id")
|
102
101
|
).where(
|
103
102
|
block_type.table_name => {data_frame_type: data_frame_type.name }
|
104
103
|
).select(:period_index, :data_frame_id, *column_cases(case_map)).to_sql
|
@@ -0,0 +1,20 @@
|
|
1
|
+
Description:
|
2
|
+
Generate a new data frame type, and optionally inject it into models that have such a data frame
|
3
|
+
|
4
|
+
Example:
|
5
|
+
|
6
|
+
# Generate a new MeterReading data frame type, with a block type of
|
7
|
+
# double and a block size of 48 data points
|
8
|
+
|
9
|
+
rails generate active_data_frame:install MeterReading double 48
|
10
|
+
|
11
|
+
# Generate a new Dimension data frame type, with a block type of
|
12
|
+
# float and a block size of 10 data points.
|
13
|
+
# Inject the data-type for use into the Iris model
|
14
|
+
|
15
|
+
rails generate active_data_frame:install Dimension float 10 Iris
|
16
|
+
|
17
|
+
#
|
18
|
+
# Generate a new status data frame type with an integer block type
|
19
|
+
#
|
20
|
+
rails generate active_data_frame:install Status integer
|
@@ -2,13 +2,11 @@ require 'rails/generators/active_record'
|
|
2
2
|
|
3
3
|
module ActiveDataFrame
|
4
4
|
class InstallGenerator < ActiveRecord::Generators::Base
|
5
|
-
desc "Generates a new data_frame type"
|
6
|
-
|
7
5
|
STREAM_TYPES = %w(bit byte integer long float double)
|
8
6
|
# Commandline options can be defined here using Thor-like options:
|
9
|
-
argument :type,
|
10
|
-
argument :columns,
|
11
|
-
argument :inject,
|
7
|
+
argument :type, type: :string, default: 'float', desc: "DataFrame type. One of(#{STREAM_TYPES*" ,"})"
|
8
|
+
argument :columns, type: :numeric, default: 512, desc: "Number of columns"
|
9
|
+
argument :inject, type: :array, default: []
|
12
10
|
|
13
11
|
def self.source_root
|
14
12
|
@source_root ||= File.join(File.dirname(__FILE__), 'templates')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: active_data_frame
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Wouter Coppieters
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-06-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -188,20 +188,20 @@ dependencies:
|
|
188
188
|
requirements:
|
189
189
|
- - "~>"
|
190
190
|
- !ruby/object:Gem::Version
|
191
|
-
version: 0.1.
|
191
|
+
version: 0.1.17
|
192
192
|
- - ">="
|
193
193
|
- !ruby/object:Gem::Version
|
194
|
-
version: 0.1.
|
194
|
+
version: 0.1.17
|
195
195
|
type: :runtime
|
196
196
|
prerelease: false
|
197
197
|
version_requirements: !ruby/object:Gem::Requirement
|
198
198
|
requirements:
|
199
199
|
- - "~>"
|
200
200
|
- !ruby/object:Gem::Version
|
201
|
-
version: 0.1.
|
201
|
+
version: 0.1.17
|
202
202
|
- - ">="
|
203
203
|
- !ruby/object:Gem::Version
|
204
|
-
version: 0.1.
|
204
|
+
version: 0.1.17
|
205
205
|
description: An active data frame helper
|
206
206
|
email:
|
207
207
|
- wc@pico.net.nz
|
@@ -230,6 +230,7 @@ files:
|
|
230
230
|
- lib/active_data_frame/row.rb
|
231
231
|
- lib/active_data_frame/table.rb
|
232
232
|
- lib/active_data_frame/version.rb
|
233
|
+
- lib/generators/active_data_frame/USAGE
|
233
234
|
- lib/generators/active_data_frame/install_generator.rb
|
234
235
|
- lib/generators/active_data_frame/templates/has_concern.rb
|
235
236
|
- lib/generators/active_data_frame/templates/migration.rb
|