active_data_frame 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +9 -1
- data/active_data_frame-0.1.1.gem +0 -0
- data/active_data_frame.gemspec +5 -1
- data/active_data_frame.todo +13 -36
- data/examples.rb +46 -0
- data/lib/active_data_frame.rb +2 -0
- data/lib/active_data_frame/bounds.rb +4 -0
- data/lib/active_data_frame/data_frame_proxy.rb +48 -18
- data/lib/active_data_frame/database.rb +115 -0
- data/lib/active_data_frame/group_proxy.rb +40 -0
- data/lib/active_data_frame/has_data_frame.rb +298 -107
- data/lib/active_data_frame/point.rb +4 -0
- data/lib/active_data_frame/row.rb +22 -68
- data/lib/active_data_frame/table.rb +13 -14
- data/lib/active_data_frame/version.rb +1 -1
- data/lib/generators/active_data_frame/install_generator.rb +13 -5
- data/lib/generators/active_data_frame/templates/has_concern.rb +1 -4
- data/lib/generators/active_data_frame/templates/migration.rb +1 -3
- metadata +72 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c368f1ed1f3fc78c0e9f81b0d2bd7cc9f50141a
|
4
|
+
data.tar.gz: 80fa0cfdeed12b5b41d7556ec9c019670827e934
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b2cc97b56fe384be682c9631a06c108b2524434230df5f4ac4949300339fadea0dcbca0f1efb9822bd04c3a43a7ae2374a3dbad02706793cfc5f8fa42600920b
|
7
|
+
data.tar.gz: 7deccde31e9d8a99b31831d2af96227cdf9d087297321b531c79b4327b9bb63f38e0fa026869a94ffb4d5ea3cb5e9e61c5805328cf7bb8248b26e54f95f7fc40
|
data/Rakefile
CHANGED
Binary file
|
data/active_data_frame.gemspec
CHANGED
@@ -23,6 +23,10 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_development_dependency "rake", "~> 10.0"
|
24
24
|
spec.add_development_dependency "pry-byebug", "~> 3.4.0", '>= 3.4.0'
|
25
25
|
spec.add_development_dependency 'pry', '~> 0.10.2', '>= 0.10.0'
|
26
|
-
spec.
|
26
|
+
spec.add_development_dependency 'pg'
|
27
|
+
spec.add_development_dependency 'minitest', '~>5.11'
|
28
|
+
spec.add_development_dependency 'minitest-reporters', '~> 1.1', '>= 1.1.0'
|
29
|
+
spec.add_development_dependency 'minitest-around', '0.4.1'
|
30
|
+
spec.add_runtime_dependency 'activerecord', '~> 5.0'
|
27
31
|
spec.add_runtime_dependency 'rmatrix', '~> 0.1.10', '>=0.1.10'
|
28
32
|
end
|
data/active_data_frame.todo
CHANGED
@@ -1,37 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
☐ Refactor + Tidy
|
8
|
-
☐ Tests
|
9
|
-
☐ Experiment with MonetDB speed
|
10
|
-
☐ Check support for different numeric/string/bool.etc types
|
11
|
-
✔ Experiment with single precision @done (17-03-31 08:18)
|
12
|
-
ActiveRecordMonetDBAdapter:
|
13
|
-
☐ Work on support for MonetDB
|
14
|
-
|
15
|
-
ActiveDataFrame:
|
16
|
-
✔ Refactor grouping/summing code @done (17-03-31 08:20)
|
17
|
-
✔ Allow includes to combine frames @done (17-03-27 10:36)
|
18
|
-
✔ Performance test on ICP data @done (17-03-27 08:41)
|
19
|
-
✔ Alternate RDBMS support (SQLLite, MySQL) @done (17-03-27 09:58)
|
20
|
-
|
21
|
-
|
22
|
-
Utilities:
|
23
|
-
☐ KMeans clustering and DBScan built in to multi-d array
|
24
|
-
|
25
|
-
Later:
|
26
|
-
☐ Build generic Merge/Cache structure which will either cache infinite columns or rows
|
27
|
-
- class Unit
|
28
|
-
- df_cache :all_loads, ::loads, direction: :row
|
29
|
-
- end
|
30
|
-
|
31
|
-
Ruby dataframe library inspiration:
|
32
|
-
- Integration with Nyaplot
|
33
|
-
- Integration with Statsample
|
34
|
-
|
1
|
+
Priorities:
|
2
|
+
☐ Ensure delete/clear works
|
3
|
+
☐ rmatrix tests
|
4
|
+
☐ Update README.md
|
5
|
+
☐ Use MMAP of status/enums
|
6
|
+
☐ Support SQLite + MySQL, MonetDB
|
35
7
|
✔ Generator creates A migration and data_frame and block classes. Block/DataFrame classes have a type, a period unit and a period length @done (17-01-12 10:29)
|
36
8
|
✔ Type is: @done (17-01-12 10:29)
|
37
9
|
✔ Bit @done (17-01-12 10:29)
|
@@ -79,5 +51,10 @@ Ruby dataframe library inspiration:
|
|
79
51
|
✔ Finish RMatrix @done (17-03-02 09:01)
|
80
52
|
|
81
53
|
RMatrix:
|
82
|
-
✔ Ensure assignment works @done (
|
83
|
-
|
54
|
+
✔ Ensure assignment works @done (18-04-03 18:58)
|
55
|
+
✔ Raw is simply a copy of self without mappings @done (18-04-03 18:58)
|
56
|
+
ActiveDataFrame:
|
57
|
+
✔ dimensions_minimum @done (18-04-03 18:58)
|
58
|
+
✔ dimensions_maximum @done (18-04-03 18:58)
|
59
|
+
✔ dimensions_sum @done (18-04-03 18:58)
|
60
|
+
✔ dimensions_average @done (18-04-03 18:58)
|
data/examples.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
|
2
|
+
# Get times of day where there was a price spike in wellington
|
3
|
+
Icp.where(region: :wellington).loads.idx_where_sum_gte(Time.now..1.day.from_now, 12_000)
|
4
|
+
|
5
|
+
# Get current load for all Icps, grouped by :region, :customer_class, :tariff
|
6
|
+
Icp.include_loads(Time.now).with_groups(:region, :customer_class, :tariff).sum("\"#{Time.now}\"")
|
7
|
+
|
8
|
+
# Get next days aggregate usage for Auckland residential customers
|
9
|
+
Icp.where(region: :auckland, customer_class: :residential).loads.sum(Time.now..1.day.from_now)
|
10
|
+
|
11
|
+
# Get a years worth of load for a single ICP
|
12
|
+
Icp.first.load[Time.now..1.year.from_now]
|
13
|
+
|
14
|
+
# Get a days worth of load for many ICPs
|
15
|
+
Icp.where(tariff: :un).loads[Time.now..1.day.from_now]
|
16
|
+
|
17
|
+
# Get a average load over a day load for many ICPs
|
18
|
+
Icp.where(tariff: :un).loads.avg(Time.now..1.day.from_now)
|
19
|
+
|
20
|
+
# Count icps which have more than 5.5kw of load at this point in time
|
21
|
+
Icp.include_loads(Time.now).where("\"%s\" > ?" % Time.now, 5.5).count
|
22
|
+
|
23
|
+
|
24
|
+
# See the largest spepal length seen for each speacies
|
25
|
+
Iris.with_groups(:species).max(:sepal_length)
|
26
|
+
|
27
|
+
# Get individual iris sepal_length
|
28
|
+
Iris.first.dimension.sepal_length
|
29
|
+
|
30
|
+
# Get multiple dimensions for individual iris
|
31
|
+
Iris.first.dimension[:sepal_length, :petal_width]
|
32
|
+
|
33
|
+
# Get range of dimensions for individual iris
|
34
|
+
Iris.first.dimension[:sepal_length..:petal_width]
|
35
|
+
|
36
|
+
# Get range of dimensions for all iris versicolors
|
37
|
+
dimensions = Iris.where(species: :versicolor).dimensions[:sepal_length..:petal_width]
|
38
|
+
|
39
|
+
# Chop data as needed
|
40
|
+
sepal_lengths = dimensions.sepal_length
|
41
|
+
sepal_lengths_petal_widths = dimensions[[:sepal_length, :petal_width]]
|
42
|
+
|
43
|
+
selected_iris = dimensions[Iris.where(species: :versicolor).first(5)]
|
44
|
+
|
45
|
+
# Look at RMatrix API for matrix functionality
|
46
|
+
#
|
data/lib/active_data_frame.rb
CHANGED
@@ -1,31 +1,58 @@
|
|
1
1
|
module ActiveDataFrame
|
2
|
+
|
3
|
+
require_relative 'point'
|
4
|
+
require_relative 'bounds'
|
5
|
+
|
2
6
|
class DataFrameProxy
|
3
|
-
attr_accessor :block_type, :data_frame_type, :block_type_name
|
4
|
-
|
5
|
-
|
6
|
-
self.
|
7
|
-
self.
|
7
|
+
attr_accessor :block_type, :data_frame_type, :block_type_name, :value_map, :singular_df_name, :plural_df_name
|
8
|
+
|
9
|
+
def initialize(block_type, data_frame_type, value_map: nil, singular_df_name: '', plural_df_name: '')
|
10
|
+
self.block_type = block_type
|
11
|
+
self.data_frame_type = data_frame_type
|
12
|
+
self.block_type_name = block_type.table_name.gsub(/_blocks$/,'').gsub(/^blocks_/,'')
|
13
|
+
self.value_map = value_map
|
14
|
+
self.singular_df_name = singular_df_name
|
15
|
+
self.plural_df_name = plural_df_name
|
16
|
+
end
|
17
|
+
|
18
|
+
def reverse_value_map
|
19
|
+
@reverse_value_map ||= value_map.invert
|
8
20
|
end
|
9
21
|
|
10
22
|
def [](*ranges)
|
11
|
-
get(extract_ranges(ranges))
|
23
|
+
result = get(extract_ranges(ranges))
|
24
|
+
if @value_map
|
25
|
+
# TODO Multi-dimensions #map would be nice
|
26
|
+
result.to_a.map{|row| row.kind_of?(Array) ? row.map(&reverse_value_map.method(:[])) : reverse_value_map[row]}
|
27
|
+
else
|
28
|
+
result
|
29
|
+
end
|
12
30
|
end
|
13
31
|
|
14
32
|
def []=(from, values)
|
33
|
+
values = Array(values).flatten.map(&@value_map.method(:[])) if @value_map
|
15
34
|
from = column_map[from] if column_map && column_map[from]
|
16
35
|
set(from, M[values, typecode: block_type::TYPECODE].to_a.flatten)
|
17
36
|
end
|
18
37
|
|
38
|
+
def clear(*ranges)
|
39
|
+
clear(ex)
|
40
|
+
end
|
41
|
+
|
19
42
|
def column_map
|
20
|
-
data_frame_type.column_map(self.
|
43
|
+
data_frame_type.column_map(self.singular_df_name)
|
21
44
|
end
|
22
45
|
|
23
46
|
def column_name_map
|
24
|
-
data_frame_type.column_name_map(self.
|
47
|
+
data_frame_type.column_name_map(self.singular_df_name)
|
25
48
|
end
|
26
49
|
|
27
50
|
def reverse_column_map
|
28
|
-
data_frame_type.reverse_column_map(self.
|
51
|
+
data_frame_type.reverse_column_map(self.singular_df_name)
|
52
|
+
end
|
53
|
+
|
54
|
+
def database
|
55
|
+
@database ||= Database.for_types(block: block_type, df: data_frame_type)
|
29
56
|
end
|
30
57
|
|
31
58
|
def method_missing(name, *args, &block)
|
@@ -42,7 +69,7 @@ module ActiveDataFrame
|
|
42
69
|
case range
|
43
70
|
when Range then range
|
44
71
|
when Fixnum then range..range
|
45
|
-
else raise "Unexpected index #{range}"
|
72
|
+
else raise "Unexpected index for data frame proxy #{range}, expecting either a Range or an Integer"
|
46
73
|
end
|
47
74
|
end
|
48
75
|
end
|
@@ -51,9 +78,6 @@ module ActiveDataFrame
|
|
51
78
|
0
|
52
79
|
end
|
53
80
|
|
54
|
-
def flatten_ranges(ranges)
|
55
|
-
end
|
56
|
-
|
57
81
|
def unmap_ranges(ranges, map)
|
58
82
|
ranges.map do |range|
|
59
83
|
case range
|
@@ -71,15 +95,17 @@ module ActiveDataFrame
|
|
71
95
|
from_block_offset = from % block_type::BLOCK_SIZE
|
72
96
|
to_block_index = to / block_type::BLOCK_SIZE
|
73
97
|
to_block_offset = to % block_type::BLOCK_SIZE
|
74
|
-
return
|
75
|
-
|
76
|
-
|
98
|
+
return Bounds.new(
|
99
|
+
Point.new(from_block_index, from_block_offset, from),
|
100
|
+
Point.new(to_block_index, to_block_offset, to),
|
77
101
|
(to - from) + 1,
|
78
102
|
index
|
79
103
|
)
|
80
104
|
end
|
81
105
|
|
82
106
|
def self.suppress_logs
|
107
|
+
#TODO Make optional
|
108
|
+
return yield
|
83
109
|
ActiveRecord::Base.logger, old_logger = nil, ActiveRecord::Base.logger
|
84
110
|
yield.tap do
|
85
111
|
ActiveRecord::Base.logger = old_logger
|
@@ -101,11 +127,15 @@ module ActiveDataFrame
|
|
101
127
|
end
|
102
128
|
end
|
103
129
|
|
130
|
+
def match_range(from, to)
|
131
|
+
from == to ? from : from..to
|
132
|
+
end
|
133
|
+
|
104
134
|
def blocks_between(bounds, block_scope: scope)
|
105
135
|
bounds[1..-1].reduce(
|
106
|
-
block_scope.where( block_type.table_name => { period_index: (bounds[0].from.index
|
136
|
+
block_scope.where( block_type.table_name => { period_index: match_range(bounds[0].from.index,bounds[0].to.index)})
|
107
137
|
) do | or_chain, bound|
|
108
|
-
or_chain.or(block_scope.where( block_type.table_name => { period_index: (bound.from.index
|
138
|
+
or_chain.or(block_scope.where( block_type.table_name => { period_index: match_range(bound.from.index,bound.to.index)}))
|
109
139
|
end
|
110
140
|
end
|
111
141
|
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
module ActiveDataFrame
|
2
|
+
class Database
|
3
|
+
|
4
|
+
def self.batching
|
5
|
+
!!Thread.current[:active_data_frame_batching]
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.batching=(value)
|
9
|
+
Thread.current[:active_data_frame_batching] = !!value
|
10
|
+
end
|
11
|
+
|
12
|
+
# Not thread safe!
|
13
|
+
def self.execute(sql)
|
14
|
+
if ActiveDataFrame::Database.batching
|
15
|
+
Thread.current[:batch] << sql << ?;
|
16
|
+
else
|
17
|
+
ActiveRecord::Base.transaction do
|
18
|
+
ActiveRecord::Base.connection.execute sql
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.flush!
|
24
|
+
execute(Thread.current[:batch])
|
25
|
+
Thread.current[:batch] = ''
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.for_types(block:, df:)
|
29
|
+
(@@configs ||= {})[[block, df]] ||= Database.new(block, df)
|
30
|
+
end
|
31
|
+
|
32
|
+
attr_reader :block_type, :data_frame_type
|
33
|
+
|
34
|
+
def initialize(block_type, data_frame_type)
|
35
|
+
@block_type = block_type
|
36
|
+
@data_frame_type = data_frame_type
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.batch
|
40
|
+
self.batching, prev_batch = true, self.batching
|
41
|
+
Thread.current[:batch] ||= ''
|
42
|
+
ActiveRecord::Base.transaction do
|
43
|
+
yield
|
44
|
+
end
|
45
|
+
ensure
|
46
|
+
self.batching = prev_batch
|
47
|
+
flush! unless self.batching
|
48
|
+
end
|
49
|
+
##
|
50
|
+
# Update block data for all blocks in a single call
|
51
|
+
##
|
52
|
+
def bulk_update(existing)
|
53
|
+
case ActiveRecord::Base.connection_config[:adapter]
|
54
|
+
when 'postgresql'.freeze
|
55
|
+
# Fast bulk update
|
56
|
+
updates = ''
|
57
|
+
existing.each do |period_index, (values, df_id)|
|
58
|
+
updates << "(#{df_id}, #{period_index}, #{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}),"
|
59
|
+
end
|
60
|
+
perform_update(updates)
|
61
|
+
else
|
62
|
+
ids = existing.map {|_, (_, id)| id}
|
63
|
+
updates = block_type::COLUMNS.map.with_index do |column, column_idx|
|
64
|
+
[column, "CASE period_index\n#{existing.map{|period_index, (values, _)| "WHEN #{period_index} then #{values[column_idx]}"}.join("\n")} \nEND\n"]
|
65
|
+
end.to_h
|
66
|
+
update_statement = updates.map{|cl, up| "#{cl} = #{up}" }.join(', ')
|
67
|
+
Database.execute("UPDATE #{block_type.table_name} SET #{update_statement} WHERE
|
68
|
+
#{block_type.table_name}.data_frame_id IN (#{ids.join(',')})
|
69
|
+
AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
|
70
|
+
AND #{block_type.table_name}.period_index IN (#{existing.keys.join(', ')});
|
71
|
+
"
|
72
|
+
)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
##
|
77
|
+
# Insert block data for all blocks in a single call
|
78
|
+
##
|
79
|
+
def bulk_insert(new_blocks, instance)
|
80
|
+
inserts = ''
|
81
|
+
new_blocks.each do |period_index, (values)|
|
82
|
+
inserts << \
|
83
|
+
case ActiveRecord::Base.connection_config[:adapter]
|
84
|
+
when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
|
85
|
+
else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{instance.id}, #{period_index}, '#{data_frame_type.name}'),"
|
86
|
+
end
|
87
|
+
end
|
88
|
+
perform_insert(inserts)
|
89
|
+
end
|
90
|
+
|
91
|
+
def bulk_delete(blocks)
|
92
|
+
binding.pry
|
93
|
+
end
|
94
|
+
|
95
|
+
def perform_update(updates)
|
96
|
+
Database.execute(
|
97
|
+
<<-SQL
|
98
|
+
UPDATE #{block_type.table_name}
|
99
|
+
SET #{block_type::COLUMNS.map{|col| "#{col} = t.#{col}" }.join(", ")}
|
100
|
+
FROM(
|
101
|
+
VALUES #{updates[0..-2]}) as t(data_frame_id, period_index, #{block_type::COLUMNS.join(',')})
|
102
|
+
WHERE #{block_type.table_name}.data_frame_id = t.data_frame_id
|
103
|
+
AND #{block_type.table_name}.period_index = t.period_index
|
104
|
+
AND #{block_type.table_name}.data_frame_type = '#{data_frame_type.name}'
|
105
|
+
SQL
|
106
|
+
)
|
107
|
+
true
|
108
|
+
end
|
109
|
+
|
110
|
+
def perform_insert(inserts)
|
111
|
+
sql = "INSERT INTO #{block_type.table_name} (#{block_type::COLUMNS.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
|
112
|
+
Database.execute sql
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module ActiveDataFrame
|
2
|
+
class GroupProxy
|
3
|
+
attr_accessor :groups
|
4
|
+
def initialize(groups)
|
5
|
+
self.groups = groups
|
6
|
+
end
|
7
|
+
|
8
|
+
def min(column_name)
|
9
|
+
aggregate('minimum', column_name)
|
10
|
+
end
|
11
|
+
|
12
|
+
def max(column_name)
|
13
|
+
aggregate('maximum', column_name)
|
14
|
+
end
|
15
|
+
|
16
|
+
def sum(column_name)
|
17
|
+
aggregate('sum', column_name)
|
18
|
+
end
|
19
|
+
|
20
|
+
def average(column_name)
|
21
|
+
aggregate('average', column_name)
|
22
|
+
end
|
23
|
+
|
24
|
+
def count
|
25
|
+
aggregate('count')
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
def aggregate *agg
|
30
|
+
counts = self.groups.send(*agg)
|
31
|
+
grouped = {}
|
32
|
+
counts.each do |keys, value|
|
33
|
+
keys = Array(keys)
|
34
|
+
child = keys[0..-2].reduce(grouped){|parent, key| parent[key] ||= {}}
|
35
|
+
child[keys[-1]] = value
|
36
|
+
end
|
37
|
+
grouped
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -1,116 +1,170 @@
|
|
1
|
-
require 'active_support/concern'
|
2
|
-
|
3
|
-
|
4
1
|
module ActiveDataFrame
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
2
|
+
#
|
3
|
+
# Modules can include HasDataFrame('frame_name', FrameBlockType) to gain data frame capabilities
|
4
|
+
# This method will expose class level and row (Active Record instance) level accessors to the underlying data frame.
|
5
|
+
#
|
6
|
+
# E.g.
|
7
|
+
#
|
8
|
+
# module HasBar
|
9
|
+
# include HasDataFrame('bars', BarBlock)
|
10
|
+
# end
|
11
|
+
#
|
12
|
+
# class Foo
|
13
|
+
# include HasBar
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# # Select all bars from index 0 to 40, for all foos
|
17
|
+
# Foo.bars[0..40]
|
18
|
+
#
|
19
|
+
# Select all bars from index 0 to 40, for foo with id: 1
|
20
|
+
# Foo.find(1).bars[0..40]
|
21
|
+
#
|
22
|
+
# # Find the average bar size for Foo 1 from index 5 to 30
|
23
|
+
# Foo.find(1).bars[5..30].avg
|
24
|
+
#
|
25
|
+
# Find the average bar size for the first 10 foos from index 13..43
|
26
|
+
# Foo.limit(10).bars.avg[13..43]
|
27
|
+
#
|
28
|
+
# Find the sum size for all foos wher baz == boo from index 13..43
|
29
|
+
# Foo.where(baz: :boo).bars.sum[13..43]
|
30
|
+
#
|
31
|
+
def self.HasDataFrame(singular_table_name, block_type, table_name: singular_table_name, value_map: nil, &block)
|
32
|
+
Module.new do
|
33
|
+
define_singleton_method(:included) do |base|
|
34
|
+
# If somebody includes our dataframe enabled module we execute the following
|
35
|
+
base.define_singleton_method(:included) do |decorated|
|
36
|
+
block[decorated] if block
|
37
|
+
decorated.extend(base::ClassMethods) if defined?(base::ClassMethods)
|
14
38
|
|
15
|
-
|
16
|
-
|
17
|
-
|
39
|
+
# add our class level methods
|
40
|
+
decorated.extend(
|
41
|
+
ActiveDataFrame.build_module_class_methods(singular_table_name, block_type, table_name: table_name, value_map: value_map)
|
42
|
+
)
|
18
43
|
|
19
|
-
|
20
|
-
|
21
|
-
end
|
44
|
+
# Add our instance level methods
|
45
|
+
decorated.class_eval do
|
22
46
|
|
23
|
-
|
24
|
-
|
25
|
-
|
47
|
+
if value_map
|
48
|
+
decorated.const_set(singular_table_name.underscore.camelize, ActiveDataFrame.build_dot_accessible_hash(value_map))
|
49
|
+
end
|
26
50
|
|
27
|
-
|
28
|
-
|
29
|
-
|
51
|
+
# Provide memoised reference to DF row
|
52
|
+
define_method singular_table_name do
|
53
|
+
(@data_frame_proxies ||= {})[singular_table_name] ||= Row.new(
|
54
|
+
block_type,
|
55
|
+
self.class,
|
56
|
+
self,
|
57
|
+
value_map: value_map,
|
58
|
+
singular_df_name: singular_table_name,
|
59
|
+
plural_df_name: table_name
|
60
|
+
)
|
61
|
+
end
|
30
62
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
63
|
+
# We provide our own inspect implementation which will include in the output
|
64
|
+
# selected dataframe attributes that do not reside on the parent table
|
65
|
+
define_method :inspect do
|
66
|
+
inspection = "not initialized"
|
67
|
+
if defined?(@attributes) && @attributes
|
68
|
+
inspection = @attributes.keys.collect { |name|
|
69
|
+
if has_attribute?(name)
|
70
|
+
"#{name}: #{attribute_for_inspect(name)}"
|
71
|
+
end
|
72
|
+
}.compact.join(", ")
|
73
|
+
end
|
74
|
+
"<#{self.class} #{inspection}>"
|
75
|
+
end
|
76
|
+
end
|
39
77
|
end
|
40
|
-
grouped
|
41
78
|
end
|
79
|
+
end
|
42
80
|
end
|
43
81
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
define_method(:inspect){
|
54
|
-
inspection = "not initialized"
|
55
|
-
if defined?(@attributes) && @attributes
|
56
|
-
inspection = @attributes.keys.collect { |name|
|
57
|
-
if has_attribute?(name)
|
58
|
-
"#{name}: #{attribute_for_inspect(name)}"
|
59
|
-
end
|
60
|
-
}.compact.join(", ")
|
82
|
+
#
|
83
|
+
# Define methods on our hash to easily access any values that are indexed by a symbol key
|
84
|
+
# and that do not clash with existing methods on the Hash
|
85
|
+
#
|
86
|
+
def self.build_dot_accessible_hash(hash)
|
87
|
+
hash.dup.tap do |map|
|
88
|
+
map.each do |key, value|
|
89
|
+
if(key.kind_of?(Symbol) && !hash.respond_to?(key))
|
90
|
+
map.define_singleton_method(key){value}
|
61
91
|
end
|
62
|
-
|
63
|
-
}
|
92
|
+
end
|
64
93
|
end
|
94
|
+
end
|
65
95
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
96
|
+
#
|
97
|
+
# The class methods that are defined on any class the includes our dataframe enabled module
|
98
|
+
#
|
99
|
+
def self.build_module_class_methods(singular_table_name, block_type, table_name: singular_table_name, value_map: nil)
|
100
|
+
Module.new do
|
70
101
|
|
71
|
-
|
72
|
-
|
73
|
-
|
102
|
+
# The key ADF functionality is exposed here.
|
103
|
+
# This defines a new `table_name` accesor on the class which gives you access to a dataframe proxy by the name of `table_name`
|
104
|
+
#
|
105
|
+
# E.g.
|
106
|
+
#
|
107
|
+
# class Foo
|
108
|
+
# include HasBar
|
109
|
+
# end
|
110
|
+
#
|
111
|
+
# # Select all bars from index 0 to 40, for all foos
|
112
|
+
# Foo.bars[0..40]
|
113
|
+
#
|
114
|
+
# Select all bars from index 0 to 40, for foo with id: 1
|
115
|
+
# Foo.find(1).bars[0..40]
|
116
|
+
#
|
117
|
+
# # Find the average bar size for Foo 1 from index 5 to 30
|
118
|
+
# Foo.find(1).bars[5..30].avg
|
119
|
+
#
|
120
|
+
# Find the average bar size for the first 10 foos from index 13..43
|
121
|
+
# Foo.limit(10).bars.avg[13..43]
|
122
|
+
#
|
123
|
+
# Find the sum size for all foos wher baz == boo from index 13..43
|
124
|
+
# Foo.where(baz: :boo).bars.sum[13..43]
|
125
|
+
#
|
126
|
+
define_method(table_name) do
|
127
|
+
Table.new(
|
128
|
+
block_type,
|
129
|
+
all,
|
130
|
+
value_map: value_map,
|
131
|
+
singular_df_name: singular_table_name,
|
132
|
+
plural_df_name: table_name
|
133
|
+
)
|
134
|
+
end
|
74
135
|
|
75
|
-
define_method(:df_reverse_column_maps){
|
76
|
-
@@reverse_column_maps ||= {}
|
77
|
-
}
|
78
136
|
|
79
|
-
|
80
|
-
|
81
|
-
|
137
|
+
#
|
138
|
+
# A class level hash containing optionally defined column names for a data frame.
|
139
|
+
# Instead of numeric or dynamic column names, you may explicitly define names for columns using the
|
140
|
+
# "#{singular_table_name}_column_names" method.
|
141
|
+
#
|
142
|
+
# E.g.
|
143
|
+
#
|
144
|
+
# class Foo
|
145
|
+
# include HasStatus
|
146
|
+
# status_column_names %i(review_status export_status)
|
147
|
+
# end
|
148
|
+
#
|
149
|
+
# This names
|
150
|
+
# column 0 as 'review_status' and
|
151
|
+
# column 1 as 'export_status'.
|
152
|
+
# Now you can make queries like:
|
153
|
+
# * Foo.status.review_status
|
154
|
+
# * Foo.first.status.export_status
|
155
|
+
# * Foo.status[:review_status..:export_status]
|
156
|
+
# * Foo.status[43] # You can still use numeric column indices
|
157
|
+
#
|
158
|
+
define_method :df_column_names do
|
159
|
+
@@column_names ||= {}
|
160
|
+
end
|
82
161
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
define_method("include_#{table_name}"){|*dimensions, unmap: true|
|
88
|
-
scope = self.all
|
89
|
-
blocks_for_tables = scope.instance_eval{ @blocks_for_tables ||= {} }
|
90
|
-
included_blocks = blocks_for_tables[singular_table_name] ||= {}
|
91
|
-
dimensions.flatten.each do |key|
|
92
|
-
if unmap && column_map(singular_table_name)
|
93
|
-
idx = column_map(singular_table_name)[key]
|
94
|
-
else
|
95
|
-
idx = key
|
96
|
-
key = "t#{key}"
|
97
|
-
end
|
98
|
-
block_index = idx / block_type::BLOCK_SIZE
|
99
|
-
block_offset = (idx % block_type::BLOCK_SIZE).succ
|
100
|
-
included_blocks[block_index] ||= []
|
101
|
-
included_blocks[block_index] << {name: key, idx: block_offset}
|
102
|
-
end
|
103
|
-
query = "(SELECT * FROM #{self.table_name} " + blocks_for_tables.reduce('') do |aggregate, (table_name, included_blocks)|
|
104
|
-
aggregate +
|
105
|
-
included_blocks.reduce('') do |aggregate, (block_idx, blocks)|
|
106
|
-
blocks_table_name = "#{table_name}_blocks"
|
107
|
-
aggregate + " LEFT JOIN(SELECT #{blocks_table_name}.data_frame_type, #{blocks_table_name}.data_frame_id, " + blocks.map{|block| "#{blocks_table_name}.t#{block[:idx]} as \"#{block[:name]}\""}.join(', ') + " FROM #{table_name}_blocks "+
|
108
|
-
" WHERE #{blocks_table_name}.period_index = #{block_idx}"+") b#{table_name}#{block_idx} ON b#{table_name}#{block_idx}.data_frame_type = '#{self.name}' AND b#{table_name}#{block_idx}.data_frame_id = #{self.table_name}.id"
|
109
|
-
end
|
110
|
-
end + ") as #{self.table_name}"
|
111
|
-
scope.from(query)
|
162
|
+
# The class level accessor
|
163
|
+
define_method(:column_name_map){|for_table|
|
164
|
+
df_column_names[for_table][self] if defined? df_column_names[for_table] rescue nil
|
112
165
|
}
|
113
166
|
|
167
|
+
# The attribute writer
|
114
168
|
define_method("#{singular_table_name}_column_names") do |names|
|
115
169
|
df_column_names[singular_table_name] ||= {}
|
116
170
|
df_column_maps[singular_table_name] ||= {}
|
@@ -118,35 +172,172 @@ module ActiveDataFrame
|
|
118
172
|
df_column_maps[singular_table_name][self] = names.map.with_index.to_h
|
119
173
|
end
|
120
174
|
|
175
|
+
|
176
|
+
#
|
177
|
+
# A class level hash containing optionally defined column maps (these are usually simply a hash that responds to #[](column_name) and returns
|
178
|
+
# a positive integer representing the corresponding column index.
|
179
|
+
# These are defined using the
|
180
|
+
# "#{singular_table_name}_column_maps" method.
|
181
|
+
#
|
182
|
+
# class Foo
|
183
|
+
# include HasCpuTemp
|
184
|
+
# cpu_temp_column_map Hash.new{ |columns, time|
|
185
|
+
# columns[time] = time.to_i # We store cpu temperatures at a 1 second granularity
|
186
|
+
# }
|
187
|
+
# end
|
188
|
+
#
|
189
|
+
define_method :df_column_maps do
|
190
|
+
@@column_maps ||= {}
|
191
|
+
end
|
192
|
+
|
193
|
+
# The attribute writer
|
121
194
|
define_method("#{singular_table_name}_column_map") do |column_map|
|
122
195
|
df_column_names[singular_table_name] = nil
|
123
196
|
df_column_maps[singular_table_name] ||= {}
|
124
197
|
df_column_maps[singular_table_name][self] = column_map
|
125
198
|
end
|
126
199
|
|
200
|
+
# The class level accessor
|
201
|
+
define_method(:column_map){|for_table|
|
202
|
+
df_column_maps[for_table][self] if defined? df_column_maps[for_table] rescue nil
|
203
|
+
}
|
204
|
+
|
205
|
+
#
|
206
|
+
# A class level has containing optionally defined reverse column mappings (from a positive integer to a mapped column index/key)
|
207
|
+
# This is only used for functions where we query indices based on values.
|
208
|
+
# E.g
|
209
|
+
#
|
210
|
+
# class Foo
|
211
|
+
# include HasPrice
|
212
|
+
# column_map Hash.new{|columns, date|
|
213
|
+
# columns[date] = (date - Date.new(1970)).to_i
|
214
|
+
# }
|
215
|
+
# reverse_column_map{|columns, index|
|
216
|
+
# columns[index] = Date.new(1970) + index.month
|
217
|
+
# }
|
218
|
+
# end
|
219
|
+
#
|
220
|
+
# # Show all dates between 2000 and 2010 where the total of all prices is > $500
|
221
|
+
# Foo.prices.idx_where_sum_gte(Date.new(2000)...Date.new(2010), 500)
|
222
|
+
#
|
223
|
+
define_method :df_reverse_column_maps do
|
224
|
+
@@reverse_column_maps ||= {}
|
225
|
+
end
|
226
|
+
|
227
|
+
# The attribute writer
|
127
228
|
define_method("#{singular_table_name}_reverse_column_map"){|reverse_column_map|
|
128
229
|
df_reverse_column_maps[singular_table_name] ||= {}
|
129
230
|
df_reverse_column_maps[singular_table_name][self] = reverse_column_map
|
130
231
|
}
|
131
232
|
|
132
|
-
|
133
|
-
|
233
|
+
# The class level accessor
|
234
|
+
define_method(:reverse_column_map){|for_table|
|
235
|
+
df_reverse_column_maps[for_table] ||= {}
|
236
|
+
df_reverse_column_maps[for_table][self] ||= column_map(for_table).invert if column_map(for_table)
|
134
237
|
}
|
135
238
|
|
136
|
-
|
137
|
-
|
138
|
-
|
239
|
+
#
|
240
|
+
# See group_proxy.rb.
|
241
|
+
# This makes a number of grouping/bucketing queries easier to express
|
242
|
+
# for analytics across an entire table
|
243
|
+
#
|
244
|
+
define_method(:with_groups) do |*groups|
|
245
|
+
GroupProxy.new(group(*groups))
|
246
|
+
end
|
139
247
|
|
140
|
-
|
141
|
-
|
142
|
-
|
248
|
+
#
|
249
|
+
# If you use the include_#{table_name} function before executing any queries, you can
|
250
|
+
# join the child AR rows with any number of columns and treat them as if they were all part of the same table.
|
251
|
+
# These joined columns can be used to further refine your queries, perform groupings, counts .etc
|
252
|
+
#
|
253
|
+
# E.g.
|
254
|
+
#
|
255
|
+
# class Iris
|
256
|
+
# include HasDimension
|
257
|
+
# dimension_column_names %i(sepal_length sepal_width petal_length petal_width)
|
258
|
+
# end
|
259
|
+
#
|
260
|
+
# Iris.where('sepal_length > ?', 4) # Error! (There is no column called sepal_length on the iris table)
|
261
|
+
# Iris.include_dimensions(:sepal_length).where('sepal_length > ?', 4) # Works fine
|
262
|
+
# Iris.include_dimension(:sepal_length, :petal_width).where('sepal_length > 3').select(:petal_width)
|
263
|
+
# Iris.include_dimension(:sepal_length, :petal_width).with_groups('ROUND(sepal_length)').average('petal_width')
|
264
|
+
# {
|
265
|
+
# "4.0":"0.2"
|
266
|
+
# "5.0":"0.397872340425532",
|
267
|
+
# "6.0":"1.49705882352941",
|
268
|
+
# "7.0":"1.89583333333333",
|
269
|
+
# "8.0":"2.15",
|
270
|
+
# }
|
271
|
+
#
|
272
|
+
# In cases where column names are not predefined or use a mapper you can provide a hash to give alternate column names for the query
|
273
|
+
#
|
274
|
+
# class BuildingType < ApplicationRecord
|
275
|
+
# include HasBuildingConsent
|
276
|
+
# consents_column_map Hash.new{|hash, time, as_date = time.to_date|
|
277
|
+
# (as_date.year - 1970) * 12 + as_date.month
|
278
|
+
# }
|
279
|
+
# end
|
280
|
+
#
|
281
|
+
# # In this example BuildingType.consents accepts dynamic column indices (anything that responds to to_date)
|
282
|
+
# # We can give these columns explicit names so we can refer to them in queries.
|
283
|
+
# E.g
|
284
|
+
#
|
285
|
+
# BuildingType.include_consents({'1994-04-01' => april_94, '1994-05-01' => may_94}).where('april_94 + may_94 < 300')
|
286
|
+
# => [
|
287
|
+
# <BuildingType id: 2, name: "Hostels_boarding", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 2, april_94: 11, may_94: 5>,
|
288
|
+
# <BuildingType id: 3, name: "Hotels", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 3, april_94: 33, may_94: 34>,
|
289
|
+
# <BuildingType id: 4, name: "Hospitals", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 4, april_94: 32, may_94: 37>,
|
290
|
+
# <BuildingType id: 5, name: "Education", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 5, april_94: 88, may_94: 145>,
|
291
|
+
# <BuildingType id: 6, name: "Social_cultural_religious", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 6, april_94: 82, may_94: 102>,
|
292
|
+
# <BuildingType id: 9, name: "Storage", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 9, april_94: 29, may_94: 52>,
|
293
|
+
# <BuildingType id: 12, name: "Misc", created_at: "2018-01-25 03:28:41", updated_at: "2018-01-25 03:28:41", data_frame_type: "BuildingType", data_frame_id: 12, april_94: 33, may_94: 39>]
|
294
|
+
# ]
|
295
|
+
#
|
296
|
+
#
|
297
|
+
define_method("include_#{table_name}"){|*dimensions, unmap: true, scope: self.all, as: false|
|
298
|
+
dim1 = dimensions[0]
|
299
|
+
case dim1
|
300
|
+
when Hash
|
301
|
+
dimension_map, dimensions = dim1, dim1.keys
|
302
|
+
when Range
|
303
|
+
exclude_end = dim1.exclude_end?
|
304
|
+
|
305
|
+
from, to = if unmap && column_map(singular_table_name)
|
306
|
+
unmap = false
|
307
|
+
[column_map(singular_table_name)[dim1.begin],column_map(singular_table_name)[dim1.end]]
|
308
|
+
else
|
309
|
+
[dim1.begin, dim1.end]
|
310
|
+
end
|
311
|
+
dimensions = (exclude_end ? (from...to) : (from..to)).to_a
|
312
|
+
end
|
313
|
+
|
314
|
+
blocks_for_tables = scope.instance_eval{ @blocks_for_tables ||= {} }
|
315
|
+
included_blocks = blocks_for_tables[block_type.table_name] ||= {}
|
143
316
|
|
144
|
-
|
145
|
-
|
146
|
-
|
317
|
+
dimensions.flatten.each.with_index(1) do |key, i|
|
318
|
+
if unmap && column_map(singular_table_name)
|
319
|
+
idx = column_map(singular_table_name)[key]
|
320
|
+
key = dimension_map[key] if dimension_map
|
321
|
+
else
|
322
|
+
idx = key
|
323
|
+
key = "t#{key}"
|
324
|
+
end
|
325
|
+
key = "#{as}#{i}" if as
|
326
|
+
block_index = idx / block_type::BLOCK_SIZE
|
327
|
+
block_offset = (idx % block_type::BLOCK_SIZE).succ
|
328
|
+
included_blocks[block_index] ||= []
|
329
|
+
included_blocks[block_index] << {name: key, idx: block_offset}
|
330
|
+
end
|
331
|
+
query = "(SELECT * FROM #{self.table_name} " + blocks_for_tables.reduce('') do |aggregate, (for_table, blocks_for_table)|
|
332
|
+
aggregate +
|
333
|
+
blocks_for_table.reduce('') do |blocks_aggregate, (block_idx, blocks)|
|
334
|
+
blocks_table_name = for_table
|
335
|
+
blocks_aggregate + " LEFT JOIN(SELECT #{blocks_table_name}.data_frame_type, #{blocks_table_name}.data_frame_id, " + blocks.map{|block| "#{blocks_table_name}.t#{block[:idx]} as \"#{block[:name]}\""}.join(', ') + " FROM #{blocks_table_name} "+
|
336
|
+
" WHERE #{blocks_table_name}.period_index = #{block_idx}"+") b#{for_table}#{block_idx} ON b#{for_table}#{block_idx}.data_frame_type = '#{self.name}' AND b#{for_table}#{block_idx}.data_frame_id = #{self.table_name}.id"
|
337
|
+
end
|
338
|
+
end + ") as #{self.table_name}"
|
339
|
+
scope.from(query)
|
147
340
|
}
|
148
341
|
end
|
149
|
-
|
150
|
-
return to_inject
|
151
342
|
end
|
152
343
|
end
|