active_data_frame 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +9 -1
- data/active_data_frame-0.1.1.gem +0 -0
- data/active_data_frame.gemspec +5 -1
- data/active_data_frame.todo +13 -36
- data/examples.rb +46 -0
- data/lib/active_data_frame.rb +2 -0
- data/lib/active_data_frame/bounds.rb +4 -0
- data/lib/active_data_frame/data_frame_proxy.rb +48 -18
- data/lib/active_data_frame/database.rb +115 -0
- data/lib/active_data_frame/group_proxy.rb +40 -0
- data/lib/active_data_frame/has_data_frame.rb +298 -107
- data/lib/active_data_frame/point.rb +4 -0
- data/lib/active_data_frame/row.rb +22 -68
- data/lib/active_data_frame/table.rb +13 -14
- data/lib/active_data_frame/version.rb +1 -1
- data/lib/generators/active_data_frame/install_generator.rb +13 -5
- data/lib/generators/active_data_frame/templates/has_concern.rb +1 -4
- data/lib/generators/active_data_frame/templates/migration.rb +1 -3
- metadata +72 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c368f1ed1f3fc78c0e9f81b0d2bd7cc9f50141a
|
4
|
+
data.tar.gz: 80fa0cfdeed12b5b41d7556ec9c019670827e934
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b2cc97b56fe384be682c9631a06c108b2524434230df5f4ac4949300339fadea0dcbca0f1efb9822bd04c3a43a7ae2374a3dbad02706793cfc5f8fa42600920b
|
7
|
+
data.tar.gz: 7deccde31e9d8a99b31831d2af96227cdf9d087297321b531c79b4327b9bb63f38e0fa026869a94ffb4d5ea3cb5e9e61c5805328cf7bb8248b26e54f95f7fc40
|
data/Rakefile
CHANGED
Binary file
|
data/active_data_frame.gemspec
CHANGED
@@ -23,6 +23,10 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_development_dependency "rake", "~> 10.0"
|
24
24
|
spec.add_development_dependency "pry-byebug", "~> 3.4.0", '>= 3.4.0'
|
25
25
|
spec.add_development_dependency 'pry', '~> 0.10.2', '>= 0.10.0'
|
26
|
-
spec.
|
26
|
+
spec.add_development_dependency 'pg'
|
27
|
+
spec.add_development_dependency 'minitest', '~>5.11'
|
28
|
+
spec.add_development_dependency 'minitest-reporters', '~> 1.1', '>= 1.1.0'
|
29
|
+
spec.add_development_dependency 'minitest-around', '0.4.1'
|
30
|
+
spec.add_runtime_dependency 'activerecord', '~> 5.0'
|
27
31
|
spec.add_runtime_dependency 'rmatrix', '~> 0.1.10', '>=0.1.10'
|
28
32
|
end
|
data/active_data_frame.todo
CHANGED
@@ -1,37 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
☐ Refactor + Tidy
|
8
|
-
☐ Tests
|
9
|
-
☐ Experiment with MonetDB speed
|
10
|
-
☐ Check support for different numeric/string/bool.etc types
|
11
|
-
✔ Experiment with single precision @done (17-03-31 08:18)
|
12
|
-
ActiveRecordMonetDBAdapter:
|
13
|
-
☐ Work on support for MonetDB
|
14
|
-
|
15
|
-
ActiveDataFrame:
|
16
|
-
✔ Refactor grouping/summing code @done (17-03-31 08:20)
|
17
|
-
✔ Allow includes to combine frames @done (17-03-27 10:36)
|
18
|
-
✔ Performance test on ICP data @done (17-03-27 08:41)
|
19
|
-
✔ Alternate RDBMS support (SQLLite, MySQL) @done (17-03-27 09:58)
|
20
|
-
|
21
|
-
|
22
|
-
Utilities:
|
23
|
-
☐ KMeans clustering and DBScan built in to multi-d array
|
24
|
-
|
25
|
-
Later:
|
26
|
-
☐ Build generic Merge/Cache structure which will either cache infinite columns or rows
|
27
|
-
- class Unit
|
28
|
-
- df_cache :all_loads, ::loads, direction: :row
|
29
|
-
- end
|
30
|
-
|
31
|
-
Ruby dataframe library inspiration:
|
32
|
-
- Integration with Nyaplot
|
33
|
-
- Integration with Statsample
|
34
|
-
|
1
|
+
Priorities:
|
2
|
+
☐ Ensure delete/clear works
|
3
|
+
☐ rmatrix tests
|
4
|
+
☐ Update README.md
|
5
|
+
☐ Use MMAP of status/enums
|
6
|
+
☐ Support SQLite + MySQL, MonetDB
|
35
7
|
✔ Generator creates A migration and data_frame and block classes. Block/DataFrame classes have a type, a period unit and a period length @done (17-01-12 10:29)
|
36
8
|
✔ Type is: @done (17-01-12 10:29)
|
37
9
|
✔ Bit @done (17-01-12 10:29)
|
@@ -79,5 +51,10 @@ Ruby dataframe library inspiration:
|
|
79
51
|
✔ Finish RMatrix @done (17-03-02 09:01)
|
80
52
|
|
81
53
|
RMatrix:
|
82
|
-
✔ Ensure assignment works @done (
|
83
|
-
|
54
|
+
✔ Ensure assignment works @done (18-04-03 18:58)
|
55
|
+
✔ Raw is simply a copy of self without mappings @done (18-04-03 18:58)
|
56
|
+
ActiveDataFrame:
|
57
|
+
✔ dimensions_minimum @done (18-04-03 18:58)
|
58
|
+
✔ dimensions_maximum @done (18-04-03 18:58)
|
59
|
+
✔ dimensions_sum @done (18-04-03 18:58)
|
60
|
+
✔ dimensions_average @done (18-04-03 18:58)
|
data/examples.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
|
2
|
+
# Get times of day where there was a price spike in wellington
|
3
|
+
Icp.where(region: :wellington).loads.idx_where_sum_gte(Time.now..1.day.from_now, 12_000)
|
4
|
+
|
5
|
+
# Get current load for all Icps, grouped by :region, :customer_class, :tariff
|
6
|
+
Icp.include_loads(Time.now).with_groups(:region, :customer_class, :tariff).sum("\"#{Time.now}\"")
|
7
|
+
|
8
|
+
# Get next days aggregate usage for Auckland residential customers
|
9
|
+
Icp.where(region: :auckland, customer_class: :residential).loads.sum(Time.now..1.day.from_now)
|
10
|
+
|
11
|
+
# Get a years worth of load for a single ICP
|
12
|
+
Icp.first.load[Time.now..1.year.from_now]
|
13
|
+
|
14
|
+
# Get a days worth of load for many ICPs
|
15
|
+
Icp.where(tariff: :un).loads[Time.now..1.day.from_now]
|
16
|
+
|
17
|
+
# Get a average load over a day load for many ICPs
|
18
|
+
Icp.where(tariff: :un).loads.avg(Time.now..1.day.from_now)
|
19
|
+
|
20
|
+
# Count icps which have more than 5.5kw of load at this point in time
|
21
|
+
Icp.include_loads(Time.now).where("\"%s\" > ?" % Time.now, 5.5).count
|
22
|
+
|
23
|
+
|
24
|
+
# See the largest spepal length seen for each speacies
|
25
|
+
Iris.with_groups(:species).max(:sepal_length)
|
26
|
+
|
27
|
+
# Get individual iris sepal_length
|
28
|
+
Iris.first.dimension.sepal_length
|
29
|
+
|
30
|
+
# Get multiple dimensions for individual iris
|
31
|
+
Iris.first.dimension[:sepal_length, :petal_width]
|
32
|
+
|
33
|
+
# Get range of dimensions for individual iris
|
34
|
+
Iris.first.dimension[:sepal_length..:petal_width]
|
35
|
+
|
36
|
+
# Get range of dimensions for all iris versicolors
|
37
|
+
dimensions = Iris.where(species: :versicolor).dimensions[:sepal_length..:petal_width]
|
38
|
+
|
39
|
+
# Chop data as needed
|
40
|
+
sepal_lengths = dimensions.sepal_length
|
41
|
+
sepal_lengths_petal_widths = dimensions[[:sepal_length, :petal_width]]
|
42
|
+
|
43
|
+
selected_iris = dimensions[Iris.where(species: :versicolor).first(5)]
|
44
|
+
|
45
|
+
# Look at RMatrix API for matrix functionality
|
46
|
+
#
|
data/lib/active_data_frame.rb
CHANGED
@@ -1,31 +1,58 @@
|
|
1
1
|
module ActiveDataFrame
|
2
|
+
|
3
|
+
require_relative 'point'
|
4
|
+
require_relative 'bounds'
|
5
|
+
|
2
6
|
class DataFrameProxy
|
3
|
-
attr_accessor :block_type, :data_frame_type, :block_type_name
|
4
|
-
|
5
|
-
|
6
|
-
self.
|
7
|
-
self.
|
7
|
+
attr_accessor :block_type, :data_frame_type, :block_type_name, :value_map, :singular_df_name, :plural_df_name
|
8
|
+
|
9
|
+
def initialize(block_type, data_frame_type, value_map: nil, singular_df_name: '', plural_df_name: '')
|
10
|
+
self.block_type = block_type
|
11
|
+
self.data_frame_type = data_frame_type
|
12
|
+
self.block_type_name = block_type.table_name.gsub(/_blocks$/,'').gsub(/^blocks_/,'')
|
13
|
+
self.value_map = value_map
|
14
|
+
self.singular_df_name = singular_df_name
|
15
|
+
self.plural_df_name = plural_df_name
|
16
|
+
end
|
17
|
+
|
18
|
+
def reverse_value_map
|
19
|
+
@reverse_value_map ||= value_map.invert
|
8
20
|
end
|
9
21
|
|
10
22
|
def [](*ranges)
|
11
|
-
get(extract_ranges(ranges))
|
23
|
+
result = get(extract_ranges(ranges))
|
24
|
+
if @value_map
|
25
|
+
# TODO Multi-dimensions #map would be nice
|
26
|
+
result.to_a.map{|row| row.kind_of?(Array) ? row.map(&reverse_value_map.method(:[])) : reverse_value_map[row]}
|
27
|
+
else
|
28
|
+
result
|
29
|
+
end
|
12
30
|
end
|
13
31
|
|
14
32
|
def []=(from, values)
|
33
|
+
values = Array(values).flatten.map(&@value_map.method(:[])) if @value_map
|
15
34
|
from = column_map[from] if column_map && column_map[from]
|
16
35
|
set(from, M[values, typecode: block_type::TYPECODE].to_a.flatten)
|
17
36
|
end
|
18
37
|
|
38
|
+
def clear(*ranges)
|
39
|
+
clear(ex)
|
40
|
+
end
|
41
|
+
|
19
42
|
def column_map
|
20
|
-
data_frame_type.column_map(self.
|
43
|
+
data_frame_type.column_map(self.singular_df_name)
|
21
44
|
end
|
22
45
|
|
23
46
|
def column_name_map
|
24
|
-
data_frame_type.column_name_map(self.
|
47
|
+
data_frame_type.column_name_map(self.singular_df_name)
|
25
48
|
end
|
26
49
|
|
27
50
|
def reverse_column_map
|
28
|
-
data_frame_type.reverse_column_map(self.
|
51
|
+
data_frame_type.reverse_column_map(self.singular_df_name)
|
52
|
+
end
|
53
|
+
|
54
|
+
def database
|
55
|
+
@database ||= Database.for_types(block: block_type, df: data_frame_type)
|
29
56
|
end
|
30
57
|
|
31
58
|
def method_missing(name, *args, &block)
|
@@ -42,7 +69,7 @@ module ActiveDataFrame
|
|
42
69
|
case range
|
43
70
|
when Range then range
|
44
71
|
when Fixnum then range..range
|
45
|
-
else raise "Unexpected index #{range}"
|
72
|
+
else raise "Unexpected index for data frame proxy #{range}, expecting either a Range or an Integer"
|
46
73
|
end
|
47
74
|
end
|
48
75
|
end
|
@@ -51,9 +78,6 @@ module ActiveDataFrame
|
|
51
78
|
0
|
52
79
|
end
|
53
80
|
|
54
|
-
def flatten_ranges(ranges)
|
55
|
-
end
|
56
|
-
|
57
81
|
def unmap_ranges(ranges, map)
|
58
82
|
ranges.map do |range|
|
59
83
|
case range
|
@@ -71,15 +95,17 @@ module ActiveDataFrame
|
|
71
95
|
from_block_offset = from % block_type::BLOCK_SIZE
|
72
96
|
to_block_index = to / block_type::BLOCK_SIZE
|
73
97
|
to_block_offset = to % block_type::BLOCK_SIZE
|
74
|
-
return
|
75
|
-
|
76
|
-
|
98
|
+
return Bounds.new(
|
99
|
+
Point.new(from_block_index, from_block_offset, from),
|
100
|
+
Point.new(to_block_index, to_block_offset, to),
|
77
101
|
(to - from) + 1,
|
78
102
|
index
|
79
103
|
)
|
80
104
|
end
|
81
105
|
|
82
106
|
def self.suppress_logs
|
107
|
+
#TODO Make optional
|
108
|
+
return yield
|
83
109
|
ActiveRecord::Base.logger, old_logger = nil, ActiveRecord::Base.logger
|
84
110
|
yield.tap do
|
85
111
|
ActiveRecord::Base.logger = old_logger
|
@@ -101,11 +127,15 @@ module ActiveDataFrame
|
|
101
127
|
end
|
102
128
|
end
|
103
129
|
|
130
|
+
def match_range(from, to)
|
131
|
+
from == to ? from : from..to
|
132
|
+
end
|
133
|
+
|
104
134
|
def blocks_between(bounds, block_scope: scope)
|
105
135
|
bounds[1..-1].reduce(
|
106
|
-
block_scope.where( block_type.table_name => { period_index: (bounds[0].from.index
|
136
|
+
block_scope.where( block_type.table_name => { period_index: match_range(bounds[0].from.index,bounds[0].to.index)})
|
107
137
|
) do | or_chain, bound|
|
108
|
-
or_chain.or(block_scope.where( block_type.table_name => { period_index: (bound.from.index
|
138
|
+
or_chain.or(block_scope.where( block_type.table_name => { period_index: match_range(bound.from.index,bound.to.index)}))
|
109
139
|
end
|
110
140
|
end
|
111
141
|
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
module ActiveDataFrame
|
2
|
+
class Database
|
3
|
+
|
4
|
+
def self.batching
|
5
|
+
!!Thread.current[:active_data_frame_batching]
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.batching=(value)
|
9
|
+
Thread.current[:active_data_frame_batching] = !!value
|
10
|
+
end
|
11
|
+
|
12
|
+
# Not thread safe!
|
13
|
+
def self.execute(sql)
|
14
|
+
if ActiveDataFrame::Database.batching
|
15
|
+
Thread.current[:batch] << sql << ?;
|
16
|
+
else
|
17
|
+
ActiveRecord::Base.transaction do
|
18
|
+
ActiveRecord::Base.connection.execute sql
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.flush!
|
24
|
+
execute(Thread.current[:batch])
|
25
|
+
Thread.current[:batch] = ''
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.for_types(block:, df:)
|
29
|
+
(@@configs ||= {})[[block, df]] ||= Database.new(block, df)
|
30
|
+
end
|
31
|
+
|
32
|
+
attr_reader :block_type, :data_frame_type
|
33
|
+
|
34
|
+
def initialize(block_type, data_frame_type)
|
35
|
+
@block_type = block_type
|
36
|
+
@data_frame_type = data_frame_type
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.batch
|
40
|
+
self.batching, prev_batch = true, self.batching
|
41
|
+
Thread.current[:batch] ||= ''
|
42
|
+
ActiveRecord::Base.transaction do
|
43
|
+
yield
|
44
|
+
end
|
45
|
+
ensure
|
46
|
+
self.batching = prev_batch
|
47
|
+
flush! unless self.batching
|
48
|
+
end
|
49
|
+
##
|
50
|
+
# Update block data for all blocks in a single call
|
51
|
+
##
|
52
|
+
def bulk_update(existing)
|
53
|
+
case ActiveRecord::Base.connection_config[:adapter]
|
54
|
+
when 'postgresql'.freeze
|
55
|
+
# Fast bulk update
|
56
|
+
updates = ''
|
57
|
+
existing.each do |period_index, (values, df_id)|
|
58
|
+
updates << "(#{df_id}, #{period_index}, #{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}),"
|
59
|
+
end
|
60
|
+
perform_update(updates)
|
61
|
+
else
|
62
|
+
ids = existing.map {|_, (_, id)| id}
|
63
|
+
updates = block_type::COLUMNS.map.with_index do |column, column_idx|
|
64
|
+
[column, "CASE period_index\n#{existing.map{|period_index, (values, _)| "WHEN #{period_index} then #{values[column_idx]}"}.join("\n")} \nEND\n"]
|
65
|
+
end.to_h
|
66
|
+
update_statement = updates.map{|cl, up| "#{cl} = #{up}" }.join(', ')
|
67
|
+
Database.execute("UPDATE #{block_type.table_name} SET #{update_statement} WHERE
|
68
|
+
#{block_type.table_name}.data_frame_id IN (#{ids.join(',')})
|
69
|
+
AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
|
70
|
+
AND #{block_type.table_name}.period_index IN (#{existing.keys.join(', ')});
|
71
|
+
"
|
72
|
+
)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
##
|
77
|
+
# Insert block data for all blocks in a single call
|
78
|
+
##
|
79
|
+
def bulk_insert(new_blocks, instance)
|
80
|
+
inserts = ''
|
81
|
+
new_blocks.each do |period_index, (values)|
|
82
|
+
inserts << \
|
83
|
+
case ActiveRecord::Base.connection_config[:adapter]
|
84
|
+
when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
|
85
|
+
else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
perform_insert(inserts)
|
89
|
+
end
|
90
|
+
|
91
|
+
def bulk_delete(blocks)
|
92
|
+
binding.pry
|
93
|
+
end
|
94
|
+
|
95
|
+
def perform_update(updates)
|
96
|
+
Database.execute(
|
97
|
+
<<-SQL
|
98
|
+
UPDATE #{block_type.table_name}
|
99
|
+
SET #{block_type::COLUMNS.map{|col| "#{col} = t.#{col}" }.join(", ")}
|
100
|
+
FROM(
|
101
|
+
VALUES #{updates[0..-2]}) as t(data_frame_id, period_index, #{block_type::COLUMNS.join(',')})
|
102
|
+
WHERE #{block_type.table_name}.data_frame_id = t.data_frame_id
|
103
|
+
AND #{block_type.table_name}.period_index = t.period_index
|
104
|
+
AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
|
105
|
+
SQL
|
106
|
+
)
|
107
|
+
true
|
108
|
+
end
|
109
|
+
|
110
|
+
def perform_insert(inserts)
|
111
|
+
sql = "INSERT INTO #{block_type.table_name} (#{block_type::COLUMNS.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
|
112
|
+
Database.execute sql
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module ActiveDataFrame
|
2
|
+
class GroupProxy
|
3
|
+
attr_accessor :groups
|
4
|
+
def initialize(groups)
|
5
|
+
self.groups = groups
|
6
|
+
end
|
7
|
+
|
8
|
+
def min(column_name)
|
9
|
+
aggregate('minimum', column_name)
|
10
|
+
end
|
11
|
+
|
12
|
+
def max(column_name)
|
13
|
+
aggregate('maximum', column_name)
|
14
|
+
end
|
15
|
+
|
16
|
+
def sum(column_name)
|
17
|
+
aggregate('sum', column_name)
|
18
|
+
end
|
19
|
+
|
20
|
+
def average(column_name)
|
21
|
+
aggregate('average', column_name)
|
22
|
+
end
|
23
|
+
|
24
|
+
def count
|
25
|
+
aggregate('count')
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
def aggregate *agg
|
30
|
+
counts = self.groups.send(*agg)
|
31
|
+
grouped = {}
|
32
|
+
counts.each do |keys, value|
|
33
|
+
keys = Array(keys)
|
34
|
+
child = keys[0..-2].reduce(grouped){|parent, key| parent[key] ||= {}}
|
35
|
+
child[keys[-1]] = value
|
36
|
+
end
|
37
|
+
grouped
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -1,116 +1,170 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
|
3
|
-
|
4
1
|
module ActiveDataFrame
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
2
|
+
#
|
3
|
+
# Modules can include HasDataFrame('frame_name', FrameBlockType) to gain data frame capabilities
|
4
|
+
# This method will expose class level and row (Active Record instance) level accessors to the underlying data frame.
|
5
|
+
#
|
6
|
+
# E.g.
|
7
|
+
#
|
8
|
+
# module HasBar
|
9
|
+
# include HasDataFrame('bars', BarBlock)
|
10
|
+
# end
|
11
|
+
#
|
12
|
+
# class Foo
|
13
|
+
# include HasBar
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# # Select all bars from index 0 to 40, for all foos
|
17
|
+
# Foo.bars[0..40]
|
18
|
+
#
|
19
|
+
# Select all bars from index 0 to 40, for foo with id: 1
|
20
|
+
# Foo.find(1).bars[0..40]
|
21
|
+
#
|
22
|
+
# # Find the average bar size for Foo 1 from index 5 to 30
|
23
|
+
# Foo.find(1).bars[5..30].avg
|
24
|
+
#
|
25
|
+
# Find the average bar size for the first 10 foos from index 13..43
|
26
|
+
# Foo.limit(10).bars.avg[13..43]
|
27
|
+
#
|
28
|
+
# Find the sum size for all foos wher baz == boo from index 13..43
|
29
|
+
# Foo.where(baz: :boo).bars.sum[13..43]
|
30
|
+
#
|
31
|
+
def self.HasDataFrame(singular_table_name, block_type, table_name: singular_table_name, value_map: nil, &block)
|
32
|
+
Module.new do
|
33
|
+
define_singleton_method(:included) do |base|
|
34
|
+
# If somebody includes our dataframe enabled module we execute the following
|
35
|
+
base.define_singleton_method(:included) do |decorated|
|
36
|
+
block[decorated] if block
|
37
|
+
decorated.extend(base::ClassMethods) if defined?(base::ClassMethods)
|
14
38
|
|
15
|
-
|
16
|
-
|
17
|
-
|
39
|
+
# add our class level methods
|
40
|
+
decorated.extend(
|
41
|
+
ActiveDataFrame.build_module_class_methods(singular_table_name, block_type, table_name: table_name, value_map: value_map)
|
42
|
+
)
|
18
43
|
|
19
|
-
|
20
|
-
|
21
|
-
end
|
44
|
+
# Add our instance level methods
|
45
|
+
decorated.class_eval do
|
22
46
|
|
23
|
-
|
24
|
-
|
25
|
-
|
47
|
+
if value_map
|
48
|
+
decorated.const_set(singular_table_name.underscore.camelize, ActiveDataFrame.build_dot_accessible_hash(value_map))
|
49
|
+
end
|
26
50
|
|
27
|
-
|
28
|
-
|
29
|
-
|
51
|
+
# Provide memoised reference to DF row
|
52
|
+
define_method singular_table_name do
|
53
|
+
(@data_frame_proxies ||= {})[singular_table_name] ||= Row.new(
|
54
|
+
block_type,
|
55
|
+
self.class,
|
56
|
+
self,
|
57
|
+
value_map: value_map,
|
58
|
+
singular_df_name: singular_table_name,
|
59
|
+
plural_df_name: table_name
|
60
|
+
)
|
61
|
+
end
|
30
62
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
63
|
+
# We provide our own inspect implementation which will include in the output
|
64
|
+
# selected dataframe attributes that do not reside on the parent table
|
65
|
+
define_method :inspect do
|
66
|
+
inspection = "not initialized"
|
67
|
+
if defined?(@attributes) && @attributes
|
68
|
+
inspection = @attributes.keys.collect { |name|
|
69
|
+
if has_attribute?(name)
|
70
|
+
"#{name}: #{attribute_for_inspect(name)}"
|
71
|
+
end
|
72
|
+
}.compact.join(", ")
|
73
|
+
end
|
74
|
+
"<#{self.class} #{inspection}>"
|
75
|
+
end
|
76
|
+
end
|
39
77
|
end
|
40
|
-
grouped
|
41
78
|
end
|
79
|
+
end
|
42
80
|
end
|
43
81
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
define_method(:inspect){
|
54
|
-
inspection = "not initialized"
|
55
|
-
if defined?(@attributes) && @attributes
|
56
|
-
inspection = @attributes.keys.collect { |name|
|
57
|
-
if has_attribute?(name)
|
58
|
-
"#{name}: #{attribute_for_inspect(name)}"
|
59
|
-
end
|
60
|
-
}.compact.join(", ")
|
82
|
+
#
|
83
|
+
# Define methods on our hash to easily access any values that are indexed by a symbol key
|
84
|
+
# and that do not clash with existing methods on the Hash
|
85
|
+
#
|
86
|
+
def self.build_dot_accessible_hash(hash)
|
87
|
+
hash.dup.tap do |map|
|
88
|
+
map.each do |key, value|
|
89
|
+
if(key.kind_of?(Symbol) && !hash.respond_to?(key))
|
90
|
+
map.define_singleton_method(key){value}
|
61
91
|
end
|
62
|
-
|
63
|
-
}
|
92
|
+
end
|
64
93
|
end
|
94
|
+
end
|
65
95
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
96
|
+
#
|
97
|
+
# The class methods that are defined on any class the includes our dataframe enabled module
|
98
|
+
#
|
99
|
+
def self.build_module_class_methods(singular_table_name, block_type, table_name: singular_table_name, value_map: nil)
|
100
|
+
Module.new do
|
70
101
|
|
71
|
-
|
72
|
-
|
73
|
-
|
102
|
+
# The key ADF functionality is exposed here.
|
103
|
+
# This defines a new `table_name` accesor on the class which gives you access to a dataframe proxy by the name of `table_name`
|
104
|
+
#
|
105
|
+
# E.g.
|
106
|
+
#
|
107
|
+
# class Foo
|
108
|
+
# include HasBar
|
109
|
+
# end
|
110
|
+
#
|
111
|
+
# # Select all bars from index 0 to 40, for all foos
|
112
|
+
# Foo.bars[0..40]
|
113
|
+
#
|
114
|
+
# Select all bars from index 0 to 40, for foo with id: 1
|
115
|
+
# Foo.find(1).bars[0..40]
|
116
|
+
#
|
117
|
+
# # Find the average bar size for Foo 1 from index 5 to 30
|
118
|
+
# Foo.find(1).bars[5..30].avg
|
119
|
+
#
|
120
|
+
# Find the average bar size for the first 10 foos from index 13..43
|
121
|
+
# Foo.limit(10).bars.avg[13..43]
|
122
|
+
#
|
123
|
+
# Find the sum size for all foos wher baz == boo from index 13..43
|
124
|
+
# Foo.where(baz: :boo).bars.sum[13..43]
|
125
|
+
#
|
126
|
+
define_method(table_name) do
|
127
|
+
Table.new(
|
128
|
+
block_type,
|
129
|
+
all,
|
130
|
+
value_map: value_map,
|
131
|
+
singular_df_name: singular_table_name,
|
132
|
+
plural_df_name: table_name
|
133
|
+
)
|
134
|
+
end
|
74
135
|
|
75
|
-
define_method(:df_reverse_column_maps){
|
76
|
-
@@reverse_column_maps ||= {}
|
77
|
-
}
|
78
136
|
|
79
|
-
|
80
|
-
|
81
|
-
|
137
|
+
#
|
138
|
+
# A class level hash containing optionally defined column names for a data frame.
|
139
|
+
# Instead of numeric or dynamic column names, you may explicitly define names for columns using the
|
140
|
+
# "#{singular_table_name}_column_names" method.
|
141
|
+
#
|
142
|
+
# E.g.
|
143
|
+
#
|
144
|
+
# class Foo
|
145
|
+
# include HasStatus
|
146
|
+
# status_column_names %i(review_status export_status)
|
147
|
+
# end
|
148
|
+
#
|
149
|
+
# This names
|
150
|
+
# column 0 as 'review_status' and
|
151
|
+
# column 1 as 'export_status'.
|
152
|
+
# Now you can make queries like:
|
153
|
+
# * Foo.status.review_status
|
154
|
+
# * Foo.first.status.export_status
|
155
|
+
# * Foo.status[:review_status..:export_status]
|
156
|
+
# * Foo.status[43] # You can still use numeric column indices
|
157
|
+
#
|
158
|
+
define_method :df_column_names do
|
159
|
+
@@column_names ||= {}
|
160
|
+
end
|
82
161
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
define_method("include_#{table_name}"){|*dimensions, unmap: true|
|
88
|
-
scope = self.all
|
89
|
-
blocks_for_tables = scope.instance_eval{ @blocks_for_tables ||= {} }
|
90
|
-
included_blocks = blocks_for_tables[singular_table_name] ||= {}
|
91
|
-
dimensions.flatten.each do |key|
|
92
|
-
if unmap && column_map(singular_table_name)
|
93
|
-
idx = column_map(singular_table_name)[key]
|
94
|
-
else
|
95
|
-
idx = key
|
96
|
-
key = "t#{key}"
|
97
|
-
end
|
98
|
-
block_index = idx / block_type::BLOCK_SIZE
|
99
|
-
block_offset = (idx % block_type::BLOCK_SIZE).succ
|
100
|
-
included_blocks[block_index] ||= []
|
101
|
-
included_blocks[block_index] << {name: key, idx: block_offset}
|
102
|
-
end
|
103
|
-
query = "(SELECT * FROM #{self.table_name} " + blocks_for_tables.reduce('') do |aggregate, (table_name, included_blocks)|
|
104
|
-
aggregate +
|
105
|
-
included_blocks.reduce('') do |aggregate, (block_idx, blocks)|
|
106
|
-
blocks_table_name = "#{table_name}_blocks"
|
107
|
-
aggregate + " LEFT JOIN(SELECT #{blocks_table_name}.data_frame_type, #{blocks_table_name}.data_frame_id, " + blocks.map{|block| "#{blocks_table_name}.t#{block[:idx]} as \"#{block[:name]}\""}.join(', ') + " FROM #{table_name}_blocks "+
|
108
|
-
" WHERE #{blocks_table_name}.period_index = #{block_idx}"+") b#{table_name}#{block_idx} ON b#{table_name}#{block_idx}.data_frame_type = '#{self.name}' AND b#{table_name}#{block_idx}.data_frame_id = #{self.table_name}.id"
|
109
|
-
end
|
110
|
-
end + ") as #{self.table_name}"
|
111
|
-
scope.from(query)
|
162
|
+
# The class level accessor
|
163
|
+
define_method(:column_name_map){|for_table|
|
164
|
+
df_column_names[for_table][self] if defined? df_column_names[for_table] rescue nil
|
112
165
|
}
|
113
166
|
|
167
|
+
# The attribute writer
|
114
168
|
define_method("#{singular_table_name}_column_names") do |names|
|
115
169
|
df_column_names[singular_table_name] ||= {}
|
116
170
|
df_column_maps[singular_table_name] ||= {}
|
@@ -118,35 +172,172 @@ module ActiveDataFrame
|
|
118
172
|
df_column_maps[singular_table_name][self] = names.map.with_index.to_h
|
119
173
|
end
|
120
174
|
|
175
|
+
|
176
|
+
#
|
177
|
+
# A class level hash containing optionally defined column maps (these are usually simply a hash that responds to #[](column_name) and returns
|
178
|
+
# a positive integer representing the corresponding column index.
|
179
|
+
# These are defined using the
|
180
|
+
# "#{singular_table_name}_column_maps" method.
|
181
|
+
#
|
182
|
+
# class Foo
|
183
|
+
# include HasCpuTemp
|
184
|
+
# cpu_temp_column_map Hash.new{ |columns, time|
|
185
|
+
# columns[time] = time.to_i # We store cpu temperatures at a 1 second granularity
|
186
|
+
# }
|
187
|
+
# end
|
188
|
+
#
|
189
|
+
define_method :df_column_maps do
|
190
|
+
@@column_maps ||= {}
|
191
|
+
end
|
192
|
+
|
193
|
+
# The attribute writer
|
121
194
|
define_method("#{singular_table_name}_column_map") do |column_map|
|
122
195
|
df_column_names[singular_table_name] = nil
|
123
196
|
df_column_maps[singular_table_name] ||= {}
|
124
197
|
df_column_maps[singular_table_name][self] = column_map
|
125
198
|
end
|
126
199
|
|
200
|
+
# The class level accessor
|
201
|
+
define_method(:column_map){|for_table|
|
202
|
+
df_column_maps[for_table][self] if defined? df_column_maps[for_table] rescue nil
|
203
|
+
}
|
204
|
+
|
205
|
+
#
|
206
|
+
# A class level has containing optionally defined reverse column mappings (from a positive integer to a mapped column index/key)
|
207
|
+
# This is only used for functions where we query indices based on values.
|
208
|
+
# E.g
|
209
|
+
#
|
210
|
+
# class Foo
|
211
|
+
# include HasPrice
|
212
|
+
# column_map Hash.new{|columns, date|
|
213
|
+
# columns[date] = (date - Date.new(1970)).to_i
|
214
|
+
# }
|
215
|
+
# reverse_column_map{|columns, index|
|
216
|
+
# columns[index] = Date.new(1970) + index.month
|
217
|
+
# }
|
218
|
+
# end
|
219
|
+
#
|
220
|
+
# # Show all dates between 2000 and 2010 where the total of all prices is > $500
|
221
|
+
# Foo.prices.idx_where_sum_gte(Date.new(2000)...Date.new(2010), 500)
|
222
|
+
#
|
223
|
+
define_method :df_reverse_column_maps do
|
224
|
+
@@reverse_column_maps ||= {}
|
225
|
+
end
|
226
|
+
|
227
|
+
# The attribute writer
|
127
228
|
define_method("#{singular_table_name}_reverse_column_map"){|reverse_column_map|
|
128
229
|
df_reverse_column_maps[singular_table_name] ||= {}
|
129
230
|
df_reverse_column_maps[singular_table_name][self] = reverse_column_map
|
130
231
|
}
|
131
232
|
|
132
|
-
|
133
|
-
|
233
|
+
# The class level accessor
|
234
|
+
define_method(:reverse_column_map){|for_table|
|
235
|
+
df_reverse_column_maps[for_table] ||= {}
|
236
|
+
df_reverse_column_maps[for_table][self] ||= column_map(for_table).invert if column_map(for_table)
|
134
237
|
}
|
135
238
|
|
136
|
-
|
137
|
-
|
138
|
-
|
239
|
+
#
|
240
|
+
# See group_proxy.rb.
|
241
|
+
# This makes a number of grouping/bucketing queries easier to express
|
242
|
+
# for analytics across an entire table
|
243
|
+
#
|
244
|
+
define_method(:with_groups) do |*groups|
|
245
|
+
GroupProxy.new(group(*groups))
|
246
|
+
end
|
139
247
|
|
140
|
-
|
141
|
-
|
142
|
-
|
248
|
+
#
|
249
|
+
# If you use the include_#{table_name} function before executing any queries, you can
|
250
|
+
# join the child AR rows with any number of columns and treat them as if they were all part of the same table.
|
251
|
+
# These joined columns can be used to further refine your queries, perform groupings, counts .etc
|
252
|
+
#
|
253
|
+
# E.g.
|
254
|
+
#
|
255
|
+
# class Iris
|
256
|
+
# include HasDimension
|
257
|
+
# dimension_column_names %i(sepal_length sepal_width petal_length petal_width)
|
258
|
+
# end
|
259
|
+
#
|
260
|
+
# Iris.where('sepal_length > ?', 4) # Error! (There is no column called sepal_length on the iris table)
|
261
|
+
# Iris.include_dimensions(:sepal_length).where('sepal_length > ?', 4) # Works fine
|
262
|
+
# Iris.include_dimension(:sepal_length, :petal_width).where('sepal_length > 3').select(:petal_width)
|
263
|
+
# Iris.include_dimension(:sepal_length, :petal_width).with_groups('ROUND(sepal_length)').average('petal_width')
|
264
|
+
# {
|
265
|
+
# "4.0":"0.2"
|
266
|
+
# "5.0":"0.397872340425532",
|
267
|
+
# "6.0":"1.49705882352941",
|
268
|
+
# "7.0":"1.89583333333333",
|
269
|
+
# "8.0":"2.15",
|
270
|
+
# }
|
271
|
+
#
|
272
|
+
# In cases where column names are not predefined or use a mapper you can provide a hash to give alternate column names for the query
|
273
|
+
#
|
274
|
+
# class BuildingType < ApplicationRecord
|
275
|
+
# include HasBuildingConsent
|
276
|
+
# consents_column_map Hash.new{|hash, time, as_date = time.to_date|
|
277
|
+
# (as_date.year - 1970) * 12 + as_date.month
|
278
|
+
# }
|
279
|
+
# end
|
280
|
+
#
|
281
|
+
# # In this example BuildingType.consents accepts dynamic column indices (anything that responds to to_date)
|
282
|
+
# # We can give these columns explicit names so we can refer to them in queries.
|
283
|
+
# E.g
|
284
|
+
#
|
285
|
+
# BuildingType.include_consents({'1994-04-01' => april_94, '1994-05-01' => may_94}).where('april_94 + may_94 < 300')
|
286
|
+
# => [
|
287
|
+
# <BuildingType id: 2, name: "Hostels_boarding", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 2, april_94: 11, may_94: 5>,
|
288
|
+
# <BuildingType id: 3, name: "Hotels", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 3, april_94: 33, may_94: 34>,
|
289
|
+
# <BuildingType id: 4, name: "Hospitals", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 4, april_94: 32, may_94: 37>,
|
290
|
+
# <BuildingType id: 5, name: "Education", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 5, april_94: 88, may_94: 145>,
|
291
|
+
# <BuildingType id: 6, name: "Social_cultural_religious", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 6, april_94: 82, may_94: 102>,
|
292
|
+
# <BuildingType id: 9, name: "Storage", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 9, april_94: 29, may_94: 52>,
|
293
|
+
# <BuildingType id: 12, name: "Misc", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 12, april_94: 33, may_94: 39>]
|
294
|
+
# ]
|
295
|
+
#
|
296
|
+
#
|
297
|
+
define_method("include_#{table_name}"){|*dimensions, unmap: true, scope: self.all, as: false|
|
298
|
+
dim1 = dimensions[0]
|
299
|
+
case dim1
|
300
|
+
when Hash
|
301
|
+
dimension_map, dimensions = dim1, dim1.keys
|
302
|
+
when Range
|
303
|
+
exclude_end = dim1.exclude_end?
|
304
|
+
|
305
|
+
from, to = if unmap && column_map(singular_table_name)
|
306
|
+
unmap = false
|
307
|
+
[column_map(singular_table_name)[dim1.begin],column_map(singular_table_name)[dim1.end]]
|
308
|
+
else
|
309
|
+
[dim1.begin, dim1.end]
|
310
|
+
end
|
311
|
+
dimensions = (exclude_end ? (from...to) : (from..to)).to_a
|
312
|
+
end
|
313
|
+
|
314
|
+
blocks_for_tables = scope.instance_eval{ @blocks_for_tables ||= {} }
|
315
|
+
included_blocks = blocks_for_tables[block_type.table_name] ||= {}
|
143
316
|
|
144
|
-
|
145
|
-
|
146
|
-
|
317
|
+
dimensions.flatten.each.with_index(1) do |key, i|
|
318
|
+
if unmap && column_map(singular_table_name)
|
319
|
+
idx = column_map(singular_table_name)[key]
|
320
|
+
key = dimension_map[key] if dimension_map
|
321
|
+
else
|
322
|
+
idx = key
|
323
|
+
key = "t#{key}"
|
324
|
+
end
|
325
|
+
key = "#{as}#{i}" if as
|
326
|
+
block_index = idx / block_type::BLOCK_SIZE
|
327
|
+
block_offset = (idx % block_type::BLOCK_SIZE).succ
|
328
|
+
included_blocks[block_index] ||= []
|
329
|
+
included_blocks[block_index] << {name: key, idx: block_offset}
|
330
|
+
end
|
331
|
+
query = "(SELECT * FROM #{self.table_name} " + blocks_for_tables.reduce('') do |aggregate, (for_table, blocks_for_table)|
|
332
|
+
aggregate +
|
333
|
+
blocks_for_table.reduce('') do |blocks_aggregate, (block_idx, blocks)|
|
334
|
+
blocks_table_name = for_table
|
335
|
+
blocks_aggregate + " LEFT JOIN(SELECT #{blocks_table_name}.data_frame_type, #{blocks_table_name}.data_frame_id, " + blocks.map{|block| "#{blocks_table_name}.t#{block[:idx]} as \"#{block[:name]}\""}.join(', ') + " FROM #{blocks_table_name} "+
|
336
|
+
" WHERE #{blocks_table_name}.period_index = #{block_idx}"+") b#{for_table}#{block_idx} ON b#{for_table}#{block_idx}.data_frame_type = '#{self.name}' AND b#{for_table}#{block_idx}.data_frame_id = #{self.table_name}.id"
|
337
|
+
end
|
338
|
+
end + ") as #{self.table_name}"
|
339
|
+
scope.from(query)
|
147
340
|
}
|
148
341
|
end
|
149
|
-
|
150
|
-
return to_inject
|
151
342
|
end
|
152
343
|
end
|