quandl_cassandra 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -0
- data/Gemfile +2 -0
- data/LICENSE +7 -0
- data/README.md +7 -0
- data/Rakefile +11 -0
- data/UPGRADE.md +3 -0
- data/lib/quandl/cassandra/base/attributes.rb +103 -0
- data/lib/quandl/cassandra/base/callbacks.rb +15 -0
- data/lib/quandl/cassandra/base/connection.rb +49 -0
- data/lib/quandl/cassandra/base/logging.rb +40 -0
- data/lib/quandl/cassandra/base/naming.rb +19 -0
- data/lib/quandl/cassandra/base/persistence.rb +67 -0
- data/lib/quandl/cassandra/base/sanitization.rb +38 -0
- data/lib/quandl/cassandra/base/schema.rb +79 -0
- data/lib/quandl/cassandra/base/scoping.rb +122 -0
- data/lib/quandl/cassandra/base.rb +51 -0
- data/lib/quandl/cassandra/configuration.rb +34 -0
- data/lib/quandl/cassandra/error.rb +10 -0
- data/lib/quandl/cassandra/types/abstract_type.rb +33 -0
- data/lib/quandl/cassandra/types/boolean_type.rb +10 -0
- data/lib/quandl/cassandra/types/decimal_type.rb +9 -0
- data/lib/quandl/cassandra/types/double_type.rb +9 -0
- data/lib/quandl/cassandra/types/float_type.rb +9 -0
- data/lib/quandl/cassandra/types/integer_type.rb +9 -0
- data/lib/quandl/cassandra/types/long_type.rb +9 -0
- data/lib/quandl/cassandra/types/timestamp_type.rb +15 -0
- data/lib/quandl/cassandra/types/utf8_type.rb +13 -0
- data/lib/quandl/cassandra/types/uuid_type.rb +21 -0
- data/lib/quandl/cassandra/types.rb +42 -0
- data/lib/quandl/cassandra/version.rb +5 -0
- data/lib/quandl/cassandra.rb +30 -0
- data/lib/quandl/cassandra_models/column/read/collapse.rb +64 -0
- data/lib/quandl/cassandra_models/column/read/column.rb +18 -0
- data/lib/quandl/cassandra_models/column/read/data_table.rb +57 -0
- data/lib/quandl/cassandra_models/column/read/offset.rb +114 -0
- data/lib/quandl/cassandra_models/column/read/query.rb +55 -0
- data/lib/quandl/cassandra_models/column/read/row.rb +20 -0
- data/lib/quandl/cassandra_models/column/read/transform.rb +53 -0
- data/lib/quandl/cassandra_models/column/read/type.rb +25 -0
- data/lib/quandl/cassandra_models/column/read.rb +28 -0
- data/lib/quandl/cassandra_models/column/write/group_data_by_column.rb +42 -0
- data/lib/quandl/cassandra_models/column/write/group_data_by_frequency.rb +24 -0
- data/lib/quandl/cassandra_models/column/write/insert_columns.rb +22 -0
- data/lib/quandl/cassandra_models/column/write/insert_data.rb +39 -0
- data/lib/quandl/cassandra_models/column/write.rb +22 -0
- data/lib/quandl/cassandra_models/column.rb +20 -0
- data/lib/quandl/cassandra_models/column_attribute.rb +11 -0
- data/lib/quandl/cassandra_models/data.rb +52 -0
- data/lib/quandl/cassandra_models/dataset.rb +83 -0
- data/lib/quandl/cassandra_models/dataset_attribute.rb +6 -0
- data/lib/quandl/cassandra_models/multiset.rb +50 -0
- data/lib/quandl/strategy.rb +59 -0
- data/quandl_cassandra.gemspec +35 -0
- data/spec/expectations/string.rb +5 -0
- data/spec/expectations/time.rb +5 -0
- data/spec/factories/dataset.rb +8 -0
- data/spec/lib/quandl/cassandra/base/scoping_spec.rb +40 -0
- data/spec/lib/quandl/cassandra_models/column/write/group_data_by_frequency_spec.rb +28 -0
- data/spec/lib/quandl/cassandra_models/column/write_spec.rb +15 -0
- data/spec/lib/quandl/cassandra_models/column_attribute_spec.rb +16 -0
- data/spec/lib/quandl/cassandra_models/column_spec.rb +17 -0
- data/spec/lib/quandl/cassandra_models/data_spec.rb +34 -0
- data/spec/lib/quandl/cassandra_models/dataset/collapse_spec.rb +41 -0
- data/spec/lib/quandl/cassandra_models/dataset/column_spec.rb +25 -0
- data/spec/lib/quandl/cassandra_models/dataset/persistence_spec.rb +24 -0
- data/spec/lib/quandl/cassandra_models/dataset/row_spec.rb +26 -0
- data/spec/lib/quandl/cassandra_models/dataset/transform_spec.rb +16 -0
- data/spec/lib/quandl/cassandra_models/dataset/trim_spec.rb +74 -0
- data/spec/lib/quandl/cassandra_models/dataset/update_spec.rb +37 -0
- data/spec/lib/quandl/cassandra_models/dataset_attribute_spec.rb +18 -0
- data/spec/lib/quandl/cassandra_models/dataset_spec.rb +63 -0
- data/spec/lib/quandl/cassandra_models/multiset/collapse_spec.rb +122 -0
- data/spec/lib/quandl/cassandra_models/multiset/columns_spec.rb +57 -0
- data/spec/lib/quandl/cassandra_models/multiset/data_spec.rb +25 -0
- data/spec/lib/quandl/cassandra_models/multiset/transform_spec.rb +68 -0
- data/spec/lib/quandl/cassandra_spec.rb +12 -0
- data/spec/spec_helper.rb +37 -0
- metadata +339 -0
@@ -0,0 +1,42 @@
|
|
1
|
+
class Quandl::Cassandra::Column::Write::GroupDataByColumn < Quandl::Cassandra::Column::Write
|
2
|
+
|
3
|
+
# INPUTS
|
4
|
+
# { source: [ [1,2,3], [2,4,8], ... ], weekly: ... }
|
5
|
+
|
6
|
+
# OUTPUTS
|
7
|
+
# { source: { UUID: [[1,2], [2,4]], UUID: [[1,3],[2,8]] }}
|
8
|
+
|
9
|
+
def perform
|
10
|
+
self.frequency_column_data = {}
|
11
|
+
# for each { frequency: [ [12,3,4], ... ] }
|
12
|
+
frequency_data.each do |frequency, data|
|
13
|
+
# assign grouped data to frequency_column_data
|
14
|
+
self.frequency_column_data[frequency] = group_data_by_column(data)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def group_data_by_column(data)
|
19
|
+
column_data = {}
|
20
|
+
# for each [ [date, val, val], ... ]
|
21
|
+
data.each do |row|
|
22
|
+
# extract date
|
23
|
+
date = row[0]
|
24
|
+
# for each [ val, val, ... ]
|
25
|
+
row[1..-1].each_with_index do |value, index|
|
26
|
+
# ensure array
|
27
|
+
column_data[ column_id(index) ] ||= []
|
28
|
+
# group each each [date, value] by column_id, excluding nil
|
29
|
+
column_data[ column_id(index) ] << [date, value] unless value.blank?
|
30
|
+
end
|
31
|
+
end
|
32
|
+
column_data
|
33
|
+
end
|
34
|
+
|
35
|
+
def column_id(index)
|
36
|
+
# ensure column_ids is defined
|
37
|
+
self.column_ids ||= Quandl::Cassandra::Dataset.find_column_ids_by_id(id)
|
38
|
+
# ensure column_ids[index] is present
|
39
|
+
self.column_ids[index] ||= SecureRandom.uuid
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
class Quandl::Cassandra::Column::Write::GroupDataByFrequency < Quandl::Cassandra::Column::Write
|
2
|
+
|
3
|
+
# INPUTS
|
4
|
+
# [ [1,2,3], [2,4,8], ... ]
|
5
|
+
|
6
|
+
# OUTPUTS
|
7
|
+
# { source: [ [1,2,3], [2,4,8], ... ], weekly: ... }
|
8
|
+
|
9
|
+
def perform
|
10
|
+
self.frequency = data.frequency
|
11
|
+
self.frequency_data = {}
|
12
|
+
# clone source data
|
13
|
+
self.data = data.clone
|
14
|
+
self.frequency_data[:source] = data.data_array.clone
|
15
|
+
# collapse and clone each frequency
|
16
|
+
Quandl::Operation::Collapse.collapses_greater_than(data.frequency).each do |freq|
|
17
|
+
# collapse the data to the required frequency
|
18
|
+
data.collapse(freq)
|
19
|
+
# clone the internal data array and add it to the grouping
|
20
|
+
self.frequency_data[freq] = data.data_array.clone
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class Quandl::Cassandra::Column::Write::InsertColumns < Quandl::Cassandra::Column::Write
|
2
|
+
|
3
|
+
# INPUTS
|
4
|
+
# { source: { UUID: [[1,2], [2,4]], UUID: [[1,3],[2,8]] }}
|
5
|
+
|
6
|
+
def perform
|
7
|
+
return if column_ids.blank?
|
8
|
+
column_ids.each_with_index{|column_id, position|
|
9
|
+
Quandl::Cassandra::Base.execute( datasets_statement( column_id, position ) )
|
10
|
+
Quandl::Cassandra::Base.execute( column_attributes_statement( column_id ) )
|
11
|
+
}
|
12
|
+
end
|
13
|
+
|
14
|
+
def datasets_statement( column_id, position )
|
15
|
+
"INSERT INTO datasets (id, column_id, position) VALUES (#{id}, #{column_id}, #{position})"
|
16
|
+
end
|
17
|
+
|
18
|
+
def column_attributes_statement( column_id )
|
19
|
+
"INSERT INTO column_attributes ( id, frequency ) VALUES ( #{column_id}, '#{frequency}' )"
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
class Quandl::Cassandra::Column::Write::InsertData < Quandl::Cassandra::Column::Write
|
2
|
+
|
3
|
+
def perform
|
4
|
+
insert_data_in_batches.collect(&:value)
|
5
|
+
end
|
6
|
+
|
7
|
+
def insert_data_in_batches
|
8
|
+
futures = []
|
9
|
+
statements = []
|
10
|
+
frequency_column_data.each do |frequency, column_data|
|
11
|
+
column_data.each do |column_id, rows|
|
12
|
+
rows.each do |time_value|
|
13
|
+
# collect statements
|
14
|
+
statements << statement( column_id, frequency, time_value[0], time_value[1] )
|
15
|
+
# after 30 statements are collected, execute a batch insert
|
16
|
+
if statements.count >= Quandl::Cassandra.configuration.batch_size
|
17
|
+
# collect the futures
|
18
|
+
futures << execute_async_batch(statements)
|
19
|
+
# clear statements
|
20
|
+
statements = []
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
# execute any remaining statements
|
26
|
+
futures << execute_async_batch(statements) if statements.count > 0
|
27
|
+
futures
|
28
|
+
end
|
29
|
+
|
30
|
+
def execute_async_batch(statements)
|
31
|
+
batch = %Q{BEGIN UNLOGGED BATCH\n#{statements.join("\n")}\nAPPLY BATCH;}
|
32
|
+
future = Quandl::Cassandra::Base.execute_async( batch )
|
33
|
+
end
|
34
|
+
|
35
|
+
def statement( id, type, time, value )
|
36
|
+
"INSERT INTO columns (id, type, time, value) VALUES (#{id}, '#{type}', #{time}, #{value})"
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class Quandl::Cassandra::Column::Write < Quandl::Strategy::Strategize
|
2
|
+
|
3
|
+
# strategy attributes
|
4
|
+
define_attributes :id, :data, :frequency, :column_ids, :frequency_data, :frequency_column_data
|
5
|
+
|
6
|
+
require_relative 'write/insert_data'
|
7
|
+
require_relative 'write/insert_columns'
|
8
|
+
require_relative 'write/group_data_by_column'
|
9
|
+
require_relative 'write/group_data_by_frequency'
|
10
|
+
|
11
|
+
# execute strategy
|
12
|
+
def self.perform(attributes)
|
13
|
+
strategy = Quandl::Strategy.new( attributes ) do |c|
|
14
|
+
c.use Quandl::Cassandra::Column::Write::GroupDataByFrequency
|
15
|
+
c.use Quandl::Cassandra::Column::Write::GroupDataByColumn
|
16
|
+
c.use Quandl::Cassandra::Column::Write::InsertData
|
17
|
+
c.use Quandl::Cassandra::Column::Write::InsertColumns
|
18
|
+
end
|
19
|
+
strategy.perform
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class Quandl::Cassandra::Column < Quandl::Cassandra::Base
|
2
|
+
|
3
|
+
table_name :columns
|
4
|
+
|
5
|
+
require_relative 'column/read'
|
6
|
+
require_relative 'column/write'
|
7
|
+
|
8
|
+
class << self
|
9
|
+
|
10
|
+
def read(*args)
|
11
|
+
Quandl::Cassandra::Column::Read.perform(*args)[:data]
|
12
|
+
end
|
13
|
+
|
14
|
+
def write(*args)
|
15
|
+
Quandl::Cassandra::Column::Write.perform(*args)
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
class Quandl::Cassandra::Data
|
2
|
+
|
3
|
+
include ScopeComposer::Model
|
4
|
+
|
5
|
+
has_scope_composer
|
6
|
+
|
7
|
+
delegate :where, to: :scope
|
8
|
+
|
9
|
+
scope :dataset, ->(d){ id(d.id).column_ids(d.column_ids) }
|
10
|
+
|
11
|
+
scope :row, :id, :limit, :offset, :column, :accuracy, :frequency
|
12
|
+
|
13
|
+
scope :column_frequencies, ->(*freqs){ where( column_frequencies: Array(freqs).flatten ) }
|
14
|
+
scope :column_ids, ->(*ids){ where( column_ids: Array(ids).flatten ) }
|
15
|
+
|
16
|
+
scope :collapse, ->(v){ where( collapse: v.to_sym ) if Quandl::Operation::Collapse.valid_collapse?(v) }
|
17
|
+
scope :transform, ->(v){ where( transform: v.to_sym ) if Quandl::Operation::Transform.valid_transformation?(v) }
|
18
|
+
|
19
|
+
scope :order, ->(v){
|
20
|
+
order = ( v.to_sym == :asc ) ? :asc : :desc
|
21
|
+
where( order: order )
|
22
|
+
}
|
23
|
+
|
24
|
+
scope :trim_start, ->(date){ where( trim_start: parse_date(date).jd ) }
|
25
|
+
scope :trim_end, ->(date){ where( trim_end: parse_date(date).jd ) }
|
26
|
+
|
27
|
+
scope_helper :find, ->(id){ id(id).to_table }
|
28
|
+
scope_helper :to_table, ->{ all }
|
29
|
+
|
30
|
+
scope_helper :parse_date, ->( date ){
|
31
|
+
begin
|
32
|
+
date = Date.jd(date.to_i) if date.kind_of?(String) && date.numeric?
|
33
|
+
date = Date.jd(date) if date.is_a?(Integer)
|
34
|
+
date = Date.parse(date) if date.is_a?(String) && date =~ /^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/
|
35
|
+
date
|
36
|
+
rescue
|
37
|
+
nil
|
38
|
+
end
|
39
|
+
}
|
40
|
+
|
41
|
+
scope.class_eval do
|
42
|
+
|
43
|
+
delegate :inspect, :==, to: :all, allow_nil: true
|
44
|
+
Array.forwardable_methods.each{|mname| delegate(mname, to: :all, allow_nil: true ) unless self.respond_to?(mname) }
|
45
|
+
|
46
|
+
def all
|
47
|
+
@all ||= Quandl::Cassandra::Column.read( attributes.merge(scope_attributes) )
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
class Quandl::Cassandra::Dataset < Quandl::Cassandra::Base
|
2
|
+
|
3
|
+
table_name :datasets
|
4
|
+
autosave_changes false
|
5
|
+
|
6
|
+
define_attributes :id, :data, :column_ids
|
7
|
+
|
8
|
+
before_save :save_columns, :save_data, :save_dataset_attribute
|
9
|
+
after_save :clear_attributes
|
10
|
+
|
11
|
+
delegate :type, :updated_at, :created_at, :frequency, to: :dataset_attribute, allow_nil: true
|
12
|
+
|
13
|
+
def self.find_column_ids_by_id(id)
|
14
|
+
Dataset.where( id: id ).pluck(:column_id, :position).sort_by{|r| r[1] }.collect{|r| r[0] }
|
15
|
+
end
|
16
|
+
|
17
|
+
def column_attributes=(column_attrs)
|
18
|
+
column_attrs.each_with_index do |attrs, index|
|
19
|
+
self.columns[index] ||= Quandl::Cassandra::Column.new
|
20
|
+
self.columns[index].assign_attributes(attrs)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def columns
|
25
|
+
@columns ||= column_ids.collect{|cid| Quandl::Cassandra::ColumnAttribute.find(cid) }
|
26
|
+
end
|
27
|
+
|
28
|
+
def column_ids
|
29
|
+
@column_ids ||= self.class.find_column_ids_by_id(id)
|
30
|
+
end
|
31
|
+
|
32
|
+
def trim_start
|
33
|
+
@trim_start ||= data_scope.limit(1).order(:asc).try(:[], 0).try(:[], 0)
|
34
|
+
end
|
35
|
+
|
36
|
+
def trim_end
|
37
|
+
@trim_end ||= data_scope.limit(1).order(:desc).try(:[], 0).try(:[], 0)
|
38
|
+
end
|
39
|
+
|
40
|
+
def data
|
41
|
+
# data set?
|
42
|
+
return read_attribute(:data) if data?
|
43
|
+
# read data
|
44
|
+
@attributes[:data] ||= data_scope
|
45
|
+
end
|
46
|
+
|
47
|
+
def data=(rows)
|
48
|
+
rows = Quandl::Data.new(rows) unless rows.is_a?(Quandl::Data)
|
49
|
+
write_attribute(:data, rows)
|
50
|
+
end
|
51
|
+
|
52
|
+
def data_scope
|
53
|
+
Quandl::Cassandra::Data.dataset(self)
|
54
|
+
end
|
55
|
+
|
56
|
+
def dataset_attribute
|
57
|
+
@dataset_attribute ||= Quandl::Cassandra::DatasetAttribute.find_or_build(id)
|
58
|
+
end
|
59
|
+
|
60
|
+
protected
|
61
|
+
|
62
|
+
def save_dataset_attribute
|
63
|
+
dataset_attribute.frequency = data.frequency.to_s
|
64
|
+
dataset_attribute.save
|
65
|
+
end
|
66
|
+
|
67
|
+
def save_columns
|
68
|
+
columns.each(&:save)
|
69
|
+
end
|
70
|
+
|
71
|
+
def save_data
|
72
|
+
Quandl::Cassandra::Column.write( id: id, data: data ) if data_changed?
|
73
|
+
end
|
74
|
+
|
75
|
+
def clear_attributes
|
76
|
+
@trim_start = nil
|
77
|
+
@trim_end = nil
|
78
|
+
@columns = nil
|
79
|
+
@column_ids = nil
|
80
|
+
@attributes[:data] = nil
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
class Quandl::Cassandra::Multiset < Quandl::Cassandra::Dataset
|
2
|
+
|
3
|
+
table_name :datasets
|
4
|
+
|
5
|
+
define_attributes :datasets_columns
|
6
|
+
|
7
|
+
class << self
|
8
|
+
|
9
|
+
def with_columns(datasets_columns)
|
10
|
+
self.new( datasets_columns: datasets_columns )
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
def data_scope
|
16
|
+
Quandl::Cassandra::Data.collapse(:source).column_ids( column_ids ).column_frequencies( column_frequencies )
|
17
|
+
end
|
18
|
+
|
19
|
+
def column_frequencies
|
20
|
+
@column_frequencies ||= columns.collect(&:frequency)
|
21
|
+
end
|
22
|
+
|
23
|
+
def column_ids
|
24
|
+
@column_ids ||= column_ids_from_datasets_columns
|
25
|
+
end
|
26
|
+
|
27
|
+
def column_ids_from_datasets_columns
|
28
|
+
ids = []
|
29
|
+
datasets_columns.split(',').each do |dataset_column|
|
30
|
+
dataset_id, column = dataset_column.split('.')
|
31
|
+
datasets[dataset_id] ||= Quandl::Cassandra::Dataset.find(dataset_id).column_ids
|
32
|
+
ids << datasets[dataset_id][ column.to_i - 1 ]
|
33
|
+
end
|
34
|
+
ids
|
35
|
+
end
|
36
|
+
|
37
|
+
def datasets
|
38
|
+
@datasets ||= {}
|
39
|
+
end
|
40
|
+
|
41
|
+
def frequency
|
42
|
+
data.frequency
|
43
|
+
end
|
44
|
+
|
45
|
+
def dataset_attribute
|
46
|
+
# find or initialize dataset attribute object
|
47
|
+
@dataset_attribute ||= Quandl::Cassandra::DatasetAttribute.new
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
class Quandl::Strategy
|
2
|
+
|
3
|
+
attr_accessor :attributes
|
4
|
+
|
5
|
+
def initialize(*args, &block)
|
6
|
+
setup(*args, &block)
|
7
|
+
end
|
8
|
+
|
9
|
+
def setup(attributes, &block)
|
10
|
+
self.attributes = attributes
|
11
|
+
block.call(self) if block_given?
|
12
|
+
end
|
13
|
+
|
14
|
+
def perform
|
15
|
+
classes.each do |klass|
|
16
|
+
strategy = klass.new(attributes)
|
17
|
+
self.attributes = strategy.attributes
|
18
|
+
end
|
19
|
+
attributes
|
20
|
+
end
|
21
|
+
|
22
|
+
def use(klass)
|
23
|
+
self.classes << klass unless classes.include?(klass)
|
24
|
+
end
|
25
|
+
|
26
|
+
def classes
|
27
|
+
@classes ||= []
|
28
|
+
end
|
29
|
+
|
30
|
+
class Strategize
|
31
|
+
|
32
|
+
attr_accessor :attributes
|
33
|
+
|
34
|
+
def self.call(attrs)
|
35
|
+
self.new(attrs).attributes
|
36
|
+
end
|
37
|
+
|
38
|
+
def initialize(attrs)
|
39
|
+
attrs = attrs.clone if attrs.respond_to?(:clone)
|
40
|
+
self.attributes = attrs
|
41
|
+
self.perform if respond_to?(:perform)
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.define_attributes(*names)
|
45
|
+
names.each do |name|
|
46
|
+
# getter
|
47
|
+
define_method(name) do
|
48
|
+
self.attributes[name.to_sym]
|
49
|
+
end
|
50
|
+
# setter
|
51
|
+
define_method("#{name}=") do |value|
|
52
|
+
self.attributes[name.to_sym] = value
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|