active_data_frame 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +218 -3
- data/active_data_frame.gemspec +1 -1
- data/lib/active_data_frame/data_frame_proxy.rb +5 -1
- data/lib/active_data_frame/database.rb +77 -62
- data/lib/active_data_frame/row.rb +7 -3
- data/lib/active_data_frame/table.rb +3 -4
- data/lib/active_data_frame/version.rb +1 -1
- data/lib/generators/active_data_frame/USAGE +20 -0
- data/lib/generators/active_data_frame/install_generator.rb +3 -5
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6e6c248d13e0f7f10933eca32158e4fb33a080e3
|
4
|
+
data.tar.gz: 28d52390deef35b8e582942612989f99e2026ed3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9fd95c152778f43ea9d3d3e09160a22ed355989b5fdc5e2cbbfb1f10b2290aab4db07d04344acb595f30f28a98eaf1282b4a461611ba6c5999b5a060fc60ae77
|
7
|
+
data.tar.gz: c0b32d8827258e8e8cf38e051758d8de8a1784bb2a4b19cdaada75af5b9b1541b3c45d15e6d2f033ca07ad1ec9422fc290c11b44e41fd2df5ef08b68bf805c47
|
data/README.md
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
# ActiveDataFrame
|
2
2
|
|
3
|
-
ActiveDataFrame allows efficient writing, reading, and analytical queries on large tables of numerical data. You can think of it as a persistent NumPy or NArray with good support for slicing
|
4
|
-
and aggregates without needing to load the entire dataset into memory.
|
3
|
+
ActiveDataFrame allows efficient writing, reading, and analytical queries on large tables of numerical data. You can think of it as a persistent NumPy or NArray with good support for slicing and aggregates without needing the entire dataset in memory.
|
5
4
|
|
6
5
|
The library depends on ActiveRecord and currently supports the following relational databases:
|
6
|
+
|
7
7
|
* PostgreSQL
|
8
8
|
* MySQL
|
9
9
|
* SQLite
|
@@ -24,16 +24,231 @@ Or install it yourself as:
|
|
24
24
|
|
25
25
|
$ gem install active_data_frame
|
26
26
|
|
27
|
+
## Examples
|
28
|
+
|
29
|
+
### Using the generator
|
30
|
+
|
31
|
+
# Generate a new data frame named Statistic, with a datapoint type of double, and a block size of 100
|
32
|
+
$ rails generate active_data_frame:install Statistic double 100
|
33
|
+
|
34
|
+
# Then run migrations to create the underlying table
|
35
|
+
$ rake db:migrate
|
36
|
+
|
27
37
|
## Usage
|
38
|
+
### Generator
|
39
|
+
The easiest way to get started is to use the in-built generator to generate a new
|
40
|
+
`ActiveDataFrame`. This will generate the required migrations for the data frame
|
41
|
+
and generate a new module that you can include inside an `ActiveRecord` model to give it access to the frame.
|
42
|
+
|
43
|
+
```
|
44
|
+
# Generate a new MeterReading data frame type, with a block type of
|
45
|
+
# double and a block size of 48 data points
|
46
|
+
|
47
|
+
$ rails generate active_data_frame:install MeterReading double 48
|
48
|
+
|
49
|
+
# Generate a new Dimension data frame type, with a block type of
|
50
|
+
# float and a block size of 10 data points.
|
51
|
+
# Inject the data-type for use into the Iris model
|
52
|
+
|
53
|
+
$ rails generate active_data_frame:install Dimension float 10 Iris
|
54
|
+
|
55
|
+
#
|
56
|
+
# Generate a new status data frame type with an integer block type
|
57
|
+
#
|
58
|
+
$ rails generate active_data_frame:install Status integer
|
59
|
+
```
|
60
|
+
|
61
|
+
### Writing to a data frame
|
62
|
+
When you include a data frame in an ActiveRecord model, each instance of the model corresponds to a single row in the data frame. The columns are a series of points that stretch towards infinity in each direction.
|
63
|
+
|
64
|
+
By default columns are indexed by integers, but you can set a static or dynamic column map so that you can easily have columns indexed by time, enum columns or use any other data type that serves as a useful index.
|
65
|
+
|
66
|
+
You can write any number of data points to a row in the dataframe using #[]=
|
67
|
+
|
68
|
+
#E.g.
|
69
|
+
# Write to the row called readings from index 0. Here Sensor is the ActiveRecord model, readings is the name of the row
|
70
|
+
Sensor.first.readings[0] = 1,2,3
|
71
|
+
|
72
|
+
# Write to the row called readings from an offset at 1_000_000
|
73
|
+
Sensor.first.readings[1_000_000] = -10, -9, -8
|
74
|
+
|
75
|
+
#Writing to a row which has a column mapping applied, mapping times on integer indexes
|
76
|
+
MeterChannel.first.readings['2001-01-01'] = [1.3, 3.4]
|
77
|
+
|
78
|
+
#If you have enum columns you can use the #[enum_name]= setter instead.
|
79
|
+
Iris.first.dimensions.sepal_length = 5.3
|
80
|
+
Iris.first.dimensions.petal_width = 4.3
|
81
|
+
|
82
|
+
# You can set data for multiple rows at once, by using the frame accessor on the model's class instead of an instance.
|
83
|
+
|
84
|
+
E.g.
|
85
|
+
# This sets the reading at index 1 to 5 for ALL sensors
|
86
|
+
Sensor.readings[1] = 5
|
87
|
+
|
88
|
+
# You can use AR queries to refine which set of rows you are updating at once.
|
89
|
+
# E.g.
|
90
|
+
MeterChannel.where("created_at < ?", "2001-01-01").readings['2001-01-01'] = [5,6,7]
|
91
|
+
|
92
|
+
ActiveDataFrame supports very quick writing of 1000's of values for a single row at a time. Don't be afraid to write large arrays of data like this.
|
93
|
+
|
94
|
+
### Reading from a data frame
|
95
|
+
Reading from a data frame is similar to writing and uses the #[] method.
|
96
|
+
You can read individual values, a range of values, and sparse selections of columns.
|
97
|
+
|
98
|
+
#E.g.
|
99
|
+
# Read a single value
|
100
|
+
Sensor.first.readings[0] # => Matrix(1x1)[...]
|
101
|
+
|
102
|
+
# Read a range of 3 values values
|
103
|
+
Sensor.first.readings[0...3] # => Matrix(1x3)[...]
|
104
|
+
|
105
|
+
# Read some non contiguous values and ranges
|
106
|
+
Sensor.first.readings[5, 10, 4..7, 9..10] = Matrix(1x8)[...]
|
107
|
+
|
108
|
+
#Reading from a row which has a column mapping that uses times
|
109
|
+
MeterChannel.first.readings['2001-01-01'...'2002-01-01'] = Matrix(1xM)[....]
|
110
|
+
|
111
|
+
#If you have enum columns you can use the #[enum_name] getter for single columns
|
112
|
+
Iris.first.dimensions.sepal_length
|
113
|
+
Iris.first.dimensions.petal_width
|
114
|
+
|
115
|
+
# And use symbols as column indices (this assumes a specific ordering of enum columns)
|
116
|
+
Iris.first.dimensions[:sepal_length...:petal_width]
|
117
|
+
|
118
|
+
Similar to when writing data, you can also read data from multiple rows at once.
|
119
|
+
Just use the active data frame accessor on the model class instead of a model instance. E.g.
|
120
|
+
|
121
|
+
Sensor.readings[0..5] # => Matrix(Nx5)
|
122
|
+
|
123
|
+
|
124
|
+
### Deleting
|
125
|
+
You can use #clear(range_or_indices) to delete data.
|
126
|
+
|
127
|
+
Deleting data is equivalent to setting all data points to zero.
|
128
|
+
So the operation row[index] = [0, 0, 0, 0.....0] is equivalent
|
129
|
+
to the operation row.clear(index...end_index). ActiveDataFrame
|
130
|
+
will automatically trim empty blocks.
|
28
131
|
|
29
|
-
|
132
|
+
### Batching
|
133
|
+
If performing many small reads and writes from a data frame in a single atomic operation
|
134
|
+
it makes sense to do this in a single transaction. Active Data Frame provides the `ActiveDataFrame::Database.batch do ... end` method. This method will not only ensure your operations occur in a single transaction, but also that they are sent to the underlying database adapter as a single command.
|
30
135
|
|
136
|
+
### Analytical Queries
|
137
|
+
Any read of a dataframe returns an RMatrix instance. An RMatrix supports a large number of
|
138
|
+
statistical methods and list methods. (See the RMatrix readme for more details).
|
139
|
+
E.g.
|
140
|
+
|
141
|
+
cpu_loads = CPU.first.loads['2001-01-01'..'2005-01-01']
|
142
|
+
puts cpu_loads.avg
|
143
|
+
puts cpu_loads.stddev
|
144
|
+
puts cpu_loads.max
|
145
|
+
# ... and many more
|
146
|
+
|
147
|
+
However in some cases you are dealing with so much data it is not possible, or too slow to retreive all the data at once and manipulate in-memory. ActiveDataFrame supports performing a number of aggregate methods directly in the database. These are #avg, #min, #max and #sum. The syntax for this is almost identical to an ordinary read.
|
148
|
+
|
149
|
+
CPU.loads.avg['2001-01-01'...'2005-01-01'] # The average CPU load per period over all CPUS
|
150
|
+
|
151
|
+
CPU.where(manufacturer: :intel).loads.min['2001-01-01'...'2005-01-01'] # The minimum CPU load per period over all intel CPUS
|
152
|
+
|
153
|
+
### Categorical data
|
154
|
+
ActiveDataFrame provides a very basic abstraction for storing categorical data. This is done by storing categories as an integer data frame, and providing a map from integers to categories. The library will then allow you to use the category names in place of the raw underlying integers.
|
155
|
+
E.g.
|
156
|
+
|
157
|
+
module HasStatus
|
158
|
+
include ActiveDataFrame::HasDataFrame('status', Blocks::StatusBlock, value_map: {
|
159
|
+
actual: 2,
|
160
|
+
estimated: 1,
|
161
|
+
unknown: 0
|
162
|
+
})
|
163
|
+
end
|
164
|
+
|
165
|
+
class CPU < ApplicationRecord
|
166
|
+
include HasStatus
|
167
|
+
end
|
168
|
+
|
169
|
+
The CPU model above includes a dataframe with a status mapping. We can now do things like
|
170
|
+
|
171
|
+
CPU.first.status[0] # => :unknown
|
172
|
+
CPU.first.status[0..5] # => [:unknown,:unknown,:unknown,:unknown,:unknown]
|
173
|
+
|
174
|
+
CPU.first.status[0] = :actual, :estimated
|
175
|
+
CPU.first.status[0..5] # => [:actual,:estimated,:unknown,:unknown,:unknown]
|
176
|
+
|
177
|
+
### Time-series data
|
178
|
+
We can use any datatype we like to index into a dataframe, so long as we can map it to an integer index. This makes active dataframes very well suited to storing large streams of interval data over time.
|
179
|
+
|
180
|
+
For example we might define a mapping such that every half hour period in time corresponds to a colum in our dataframe. In the below example we might be counting the number of arrivals at an airport every half-hour.
|
181
|
+
|
182
|
+
|
183
|
+
module HasArrivals
|
184
|
+
include ActiveDataFrame::HasDataFrame('arrivals', Blocks::ArrivalBlock)
|
185
|
+
module ColumnMaps
|
186
|
+
def self.included(base)
|
187
|
+
base.arrivals_column_map Hash.new{|hash, time| ((time.to_time - Time.at(0)) / 1.hour).to_i rescue time.to_i }
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
class Airport < ApplicationRecord
|
193
|
+
include HasArrivals::ColumnMaps, HasArrivals
|
194
|
+
end
|
195
|
+
|
196
|
+
Now we can use any value that implements #to_time to index into our dataframe. This supports both single indexes and ranges (...).
|
197
|
+
E.g.
|
198
|
+
|
199
|
+
Airport.first.arrivals['2001-01-01'...'2002-01-01'] = Matrix(1xM)[....]
|
200
|
+
|
201
|
+
|
202
|
+
### Column Mappings
|
203
|
+
We can use any datatype we like to index into a dataframe, so long as we can map it to an integer index. See the section on Time-series data for one example of this. Columns can also be aliases to categories. An example of this is using ActiveDataFrame to model the classic Iris dataset.
|
204
|
+
|
205
|
+
class Iris < ApplicationRecord
|
206
|
+
include HasDimensions
|
207
|
+
dimension_column_names %i(sepal_length sepal_width petal_length petal_width)
|
208
|
+
end
|
209
|
+
|
210
|
+
Here we have mapped the first four columns of our data frame to sepal_length, sepal_width, petal_length and petal_width.
|
211
|
+
|
212
|
+
When using symbols as column names ActiveDataFrame provides some syntactic sugar for easily slicing and dicing frames.
|
213
|
+
|
214
|
+
We can do things like:
|
215
|
+
|
216
|
+
* Extract a slice of data:
|
217
|
+
|
218
|
+
`iris_results = Iris.where(species: :setosa).dimension[:sepal_width..:petal_length]`
|
219
|
+
* Extract an entire column from a data-set using the column name:
|
220
|
+
|
221
|
+
`iris_results.sepal_width => V[[...]`]
|
222
|
+
* Extract an entire column from a data-set using the column name:
|
223
|
+
|
224
|
+
`iris_results.sepal_width => V[[...]`]
|
225
|
+
* Extract a single value from an instance:
|
226
|
+
|
227
|
+
`Iris.first.dimension.sepal_width.to_f`
|
228
|
+
|
229
|
+
* Set one or more values for an instance or row at once:
|
230
|
+
|
231
|
+
`Iris.first.dimension.sepal_width = 13`
|
232
|
+
`Iris.all.dimension.petal_length = 5.2,6.3,5.4,1.1`
|
233
|
+
|
234
|
+
### Configuration
|
235
|
+
ActiveDataFrame supports project-wide configuration using
|
236
|
+
|
237
|
+
ActiveDataFrame.config do |config|
|
238
|
+
config.[config_option_name] = [config_value]
|
239
|
+
end
|
240
|
+
|
241
|
+
Currently the following configuration options are supported:
|
242
|
+
|
243
|
+
* `suppress_logs` The queries generated by ActiveDataFrame are quite verbose. If you would like to supress ActiveRecord logging for these queries, set this option to `true`
|
31
244
|
## Development
|
32
245
|
|
33
246
|
After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
34
247
|
|
35
248
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
36
249
|
|
250
|
+
## Testing
|
251
|
+
|
37
252
|
## Contributing
|
38
253
|
|
39
254
|
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/active_data_frame. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
data/active_data_frame.gemspec
CHANGED
@@ -31,5 +31,5 @@ Gem::Specification.new do |spec|
|
|
31
31
|
spec.add_development_dependency 'minitest-reporters', '~> 1.1', '>= 1.1.0'
|
32
32
|
spec.add_development_dependency 'minitest-around', '0.4.1'
|
33
33
|
spec.add_runtime_dependency 'activerecord', '~> 5.0'
|
34
|
-
spec.add_runtime_dependency 'rmatrix', '~> 0.1.
|
34
|
+
spec.add_runtime_dependency 'rmatrix', '~> 0.1.17', '>=0.1.17'
|
35
35
|
end
|
@@ -57,8 +57,12 @@ module ActiveDataFrame
|
|
57
57
|
end
|
58
58
|
|
59
59
|
def method_missing(name, *args, &block)
|
60
|
+
if name.to_s.ends_with?(?=)
|
61
|
+
is_assignment = true
|
62
|
+
name = name.to_s.gsub(/=$/,'').to_sym
|
63
|
+
end
|
60
64
|
if column_name_map && column_map[name]
|
61
|
-
self[name]
|
65
|
+
is_assignment ? self.[]=(name, *args) : self[name]
|
62
66
|
else
|
63
67
|
super
|
64
68
|
end
|
@@ -15,15 +15,17 @@ module ActiveDataFrame
|
|
15
15
|
else
|
16
16
|
unless sql.empty?
|
17
17
|
ActiveRecord::Base.transaction do
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
18
|
+
ActiveDataFrame::DataFrameProxy.suppress_logs do
|
19
|
+
case ActiveRecord::Base.connection_config[:adapter]
|
20
|
+
when 'sqlite3'.freeze
|
21
|
+
ActiveRecord::Base.connection.raw_connection.execute_batch sql
|
22
|
+
when 'mysql2'
|
23
|
+
sql.split(';').reject{|x| x.strip.empty?}.each do |stmt|
|
24
|
+
ActiveRecord::Base.connection.execute(stmt)
|
25
|
+
end
|
26
|
+
else
|
27
|
+
ActiveRecord::Base.connection.execute(sql)
|
24
28
|
end
|
25
|
-
else
|
26
|
-
ActiveRecord::Base.connection.execute(sql)
|
27
29
|
end
|
28
30
|
end
|
29
31
|
end
|
@@ -60,56 +62,16 @@ module ActiveDataFrame
|
|
60
62
|
# Update block data for all blocks in a single call
|
61
63
|
##
|
62
64
|
def bulk_update(existing)
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
else
|
73
|
-
ids = existing.map {|_, (_, id)| id}
|
74
|
-
updates = block_type::COLUMNS.map.with_index do |column, column_idx|
|
75
|
-
[column, "CASE period_index\n#{existing.map{|period_index, (values, _)| "WHEN #{period_index} then #{values[column_idx]}"}.join("\n")} \nEND\n"]
|
76
|
-
end.to_h
|
77
|
-
update_statement = updates.map{|cl, up| "#{cl} = #{up}" }.join(', ')
|
78
|
-
Database.execute("UPDATE #{block_type.table_name} SET #{update_statement} WHERE
|
79
|
-
#{block_type.table_name}.data_frame_id IN (#{ids.join(',')})
|
80
|
-
AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
|
81
|
-
AND #{block_type.table_name}.period_index IN (#{existing.keys.join(', ')});
|
82
|
-
"
|
83
|
-
)
|
84
|
-
end
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
def bulk_delete(id, indices)
|
89
|
-
ActiveDataFrame::DataFrameProxy.suppress_logs do
|
90
|
-
block_type.where(data_frame_id: id, period_index: indices).delete_all
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
##
|
95
|
-
# Insert block data for all blocks in a single call
|
96
|
-
##
|
97
|
-
def bulk_insert(new_blocks, instance)
|
98
|
-
ActiveDataFrame::DataFrameProxy.suppress_logs do
|
99
|
-
inserts = ''
|
100
|
-
new_blocks.each do |period_index, (values)|
|
101
|
-
inserts << \
|
102
|
-
case ActiveRecord::Base.connection_config[:adapter]
|
103
|
-
when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
|
104
|
-
else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
|
105
|
-
end
|
65
|
+
case ActiveRecord::Base.connection_config[:adapter]
|
66
|
+
when 'postgresql'.freeze
|
67
|
+
#
|
68
|
+
# PostgreSQL Supports the fast setting of multiple update values that differ
|
69
|
+
# per row from a temporary table.
|
70
|
+
#
|
71
|
+
updates = ''
|
72
|
+
existing.each do |period_index, (values, df_id)|
|
73
|
+
updates << "(#{df_id}, #{period_index}, #{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}),"
|
106
74
|
end
|
107
|
-
perform_insert(inserts)
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
def perform_update(updates)
|
112
|
-
ActiveDataFrame::DataFrameProxy.suppress_logs do
|
113
75
|
Database.execute(
|
114
76
|
<<-SQL
|
115
77
|
UPDATE #{block_type.table_name}
|
@@ -121,15 +83,68 @@ module ActiveDataFrame
|
|
121
83
|
AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
|
122
84
|
SQL
|
123
85
|
)
|
124
|
-
|
86
|
+
#
|
87
|
+
# For MySQL we use the ON DUPLICATE KEY UPDATE functionality.
|
88
|
+
# This relies on there being a unique index dataframe and period index
|
89
|
+
# on the blocks table.
|
90
|
+
# This tends to be faster than the general CASE based solution below
|
91
|
+
# but slower than the PostgreSQL solution above
|
92
|
+
#
|
93
|
+
when 'mysql2'.freeze
|
94
|
+
# Fast bulk update
|
95
|
+
updates, on_duplicate = "", ""
|
96
|
+
existing.each do |period_index, (values, df_id)|
|
97
|
+
updates << "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
|
98
|
+
end
|
99
|
+
on_duplicate = block_type::COLUMNS.map do |cname|
|
100
|
+
"#{cname}=VALUES(#{cname})"
|
101
|
+
end.join(", ")
|
102
|
+
stmt = <<-SQL
|
103
|
+
INSERT INTO #{block_type.table_name} (#{block_type::COLUMNS.join(',')},data_frame_id,period_index,data_frame_type)
|
104
|
+
VALUES #{updates[0..-2]}
|
105
|
+
ON DUPLICATE KEY UPDATE #{on_duplicate}
|
106
|
+
SQL
|
107
|
+
Database.execute(stmt)
|
108
|
+
else
|
109
|
+
#
|
110
|
+
# General CASE based solution for multiple differing updates
|
111
|
+
# set per row.
|
112
|
+
# We use a CASE statement per column which determines the column
|
113
|
+
# to set based on the period index
|
114
|
+
#
|
115
|
+
ids = existing.map {|_, (_, id)| id}
|
116
|
+
updates = block_type::COLUMNS.map.with_index do |column, column_idx|
|
117
|
+
[column, "CASE period_index\n#{existing.map{|period_index, (values, _)| "WHEN #{period_index} then #{values[column_idx]}"}.join("\n")} \nEND\n"]
|
118
|
+
end.to_h
|
119
|
+
update_statement = updates.map{|cl, up| "#{cl} = #{up}" }.join(', ')
|
120
|
+
Database.execute(<<-SQL
|
121
|
+
UPDATE #{block_type.table_name} SET #{update_statement} WHERE
|
122
|
+
#{block_type.table_name}.data_frame_id IN (#{ids.join(',')})
|
123
|
+
AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
|
124
|
+
AND #{block_type.table_name}.period_index IN (#{existing.keys.join(', ')});
|
125
|
+
SQL
|
126
|
+
)
|
125
127
|
end
|
126
128
|
end
|
127
129
|
|
128
|
-
def
|
129
|
-
|
130
|
-
|
131
|
-
|
130
|
+
def bulk_delete(id, indices)
|
131
|
+
block_type.where(data_frame_id: id, period_index: indices).delete_all
|
132
|
+
end
|
133
|
+
|
134
|
+
##
|
135
|
+
# Insert block data for all blocks in a single call
|
136
|
+
##
|
137
|
+
def bulk_insert(new_blocks, instance)
|
138
|
+
inserts = ''
|
139
|
+
new_blocks.each do |period_index, (values)|
|
140
|
+
inserts << \
|
141
|
+
case ActiveRecord::Base.connection_config[:adapter]
|
142
|
+
when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
|
143
|
+
else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
|
144
|
+
end
|
132
145
|
end
|
146
|
+
sql = "INSERT INTO #{block_type.table_name} (#{block_type::COLUMNS.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
|
147
|
+
Database.execute sql
|
133
148
|
end
|
134
149
|
end
|
135
150
|
end
|
@@ -21,7 +21,6 @@ module ActiveDataFrame
|
|
21
21
|
end
|
22
22
|
|
23
23
|
deleted_indices = []
|
24
|
-
|
25
24
|
existing = blocks_between([bounds]).pluck(:data_frame_id, :period_index, *block_type::COLUMNS).map do |id, period_index, *block_values|
|
26
25
|
[period_index, [block_values, id]]
|
27
26
|
end.to_h
|
@@ -31,7 +30,10 @@ module ActiveDataFrame
|
|
31
30
|
if existing[index]
|
32
31
|
block = existing[index]
|
33
32
|
block.first[left..right] = chunk.to_a
|
34
|
-
|
33
|
+
if block.first.all?(&:zero?)
|
34
|
+
deleted_indices << index
|
35
|
+
existing.delete(index)
|
36
|
+
end
|
35
37
|
elsif chunk.any?(&:nonzero?)
|
36
38
|
new_blocks[index].first[left..right] = chunk.to_a
|
37
39
|
end
|
@@ -49,7 +51,9 @@ module ActiveDataFrame
|
|
49
51
|
get_bounds(range.first, range.exclude_end? ? range.end - 1 : range.end, index)
|
50
52
|
end
|
51
53
|
|
52
|
-
existing =
|
54
|
+
existing = self.class.suppress_logs{
|
55
|
+
blocks_between(all_bounds).pluck(:period_index, *block_type::COLUMNS).map{|pi, *values| [pi, values]}.to_h
|
56
|
+
}
|
53
57
|
result = M.blank(typecode: block_type::TYPECODE, columns: all_bounds.map(&:length).sum)
|
54
58
|
|
55
59
|
iterate_bounds(all_bounds) do |index, left, right, cursor, size|
|
@@ -42,7 +42,6 @@ module ActiveDataFrame
|
|
42
42
|
col_cases = cases[col].sort_by(&:begin).reduce([]) do |agg, col_case|
|
43
43
|
if agg.empty?
|
44
44
|
agg << col_case
|
45
|
-
agg
|
46
45
|
else
|
47
46
|
if agg[-1].end.succ == col_case.begin
|
48
47
|
agg[-1] = (agg[-1].begin..col_case.end)
|
@@ -96,9 +95,9 @@ module ActiveDataFrame
|
|
96
95
|
ids = data_frame_type.pluck(:id)
|
97
96
|
as_sql = blocks_between(
|
98
97
|
all_bounds,
|
99
|
-
block_scope: data_frame_type.unscoped
|
100
|
-
|
101
|
-
|
98
|
+
block_scope: data_frame_type.unscoped.where(
|
99
|
+
"#{data_frame_type.table_name}.id IN (SELECT id FROM (#{data_frame_type.select(:id).to_sql}) airport_ids)"
|
100
|
+
).joins("LEFT JOIN #{block_type.table_name} ON #{data_frame_type.table_name}.id = #{block_type.table_name}.data_frame_id")
|
102
101
|
).where(
|
103
102
|
block_type.table_name => {data_frame_type: data_frame_type.name }
|
104
103
|
).select(:period_index, :data_frame_id, *column_cases(case_map)).to_sql
|
@@ -0,0 +1,20 @@
|
|
1
|
+
Description:
|
2
|
+
Generate a new data frame type, and optionally inject it into models that have such a data frame
|
3
|
+
|
4
|
+
Example:
|
5
|
+
|
6
|
+
# Generate a new MeterReading data frame type, with a block type of
|
7
|
+
# double and a block size of 48 data points
|
8
|
+
|
9
|
+
rails generate active_data_frame:install MeterReading double 48
|
10
|
+
|
11
|
+
# Generate a new Dimension data frame type, with a block type of
|
12
|
+
# float and a block size of 10 data points.
|
13
|
+
# Inject the data-type for use into the Iris model
|
14
|
+
|
15
|
+
rails generate active_data_frame:install Dimension float 10 Iris
|
16
|
+
|
17
|
+
#
|
18
|
+
# Generate a new status data frame type with an integer block type
|
19
|
+
#
|
20
|
+
rails generate active_data_frame:install Status integer
|
@@ -2,13 +2,11 @@ require 'rails/generators/active_record'
|
|
2
2
|
|
3
3
|
module ActiveDataFrame
|
4
4
|
class InstallGenerator < ActiveRecord::Generators::Base
|
5
|
-
desc "Generates a new data_frame type"
|
6
|
-
|
7
5
|
STREAM_TYPES = %w(bit byte integer long float double)
|
8
6
|
# Commandline options can be defined here using Thor-like options:
|
9
|
-
argument :type,
|
10
|
-
argument :columns,
|
11
|
-
argument :inject,
|
7
|
+
argument :type, type: :string, default: 'float', desc: "DataFrame type. One of(#{STREAM_TYPES*" ,"})"
|
8
|
+
argument :columns, type: :numeric, default: 512, desc: "Number of columns"
|
9
|
+
argument :inject, type: :array, default: []
|
12
10
|
|
13
11
|
def self.source_root
|
14
12
|
@source_root ||= File.join(File.dirname(__FILE__), 'templates')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: active_data_frame
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Wouter Coppieters
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-06-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -188,20 +188,20 @@ dependencies:
|
|
188
188
|
requirements:
|
189
189
|
- - "~>"
|
190
190
|
- !ruby/object:Gem::Version
|
191
|
-
version: 0.1.
|
191
|
+
version: 0.1.17
|
192
192
|
- - ">="
|
193
193
|
- !ruby/object:Gem::Version
|
194
|
-
version: 0.1.
|
194
|
+
version: 0.1.17
|
195
195
|
type: :runtime
|
196
196
|
prerelease: false
|
197
197
|
version_requirements: !ruby/object:Gem::Requirement
|
198
198
|
requirements:
|
199
199
|
- - "~>"
|
200
200
|
- !ruby/object:Gem::Version
|
201
|
-
version: 0.1.
|
201
|
+
version: 0.1.17
|
202
202
|
- - ">="
|
203
203
|
- !ruby/object:Gem::Version
|
204
|
-
version: 0.1.
|
204
|
+
version: 0.1.17
|
205
205
|
description: An active data frame helper
|
206
206
|
email:
|
207
207
|
- wc@pico.net.nz
|
@@ -230,6 +230,7 @@ files:
|
|
230
230
|
- lib/active_data_frame/row.rb
|
231
231
|
- lib/active_data_frame/table.rb
|
232
232
|
- lib/active_data_frame/version.rb
|
233
|
+
- lib/generators/active_data_frame/USAGE
|
233
234
|
- lib/generators/active_data_frame/install_generator.rb
|
234
235
|
- lib/generators/active_data_frame/templates/has_concern.rb
|
235
236
|
- lib/generators/active_data_frame/templates/migration.rb
|