quandl_cassandra_models 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +7 -0
- data/Gemfile +4 -0
- data/LICENSE +7 -0
- data/README.md +7 -0
- data/Rakefile +11 -0
- data/UPGRADE.md +34 -0
- data/config/cassandra.yml +41 -0
- data/lib/quandl/cassandra/models.rb +16 -0
- data/lib/quandl/cassandra/models/column.rb +42 -0
- data/lib/quandl/cassandra/models/column/read.rb +49 -0
- data/lib/quandl/cassandra/models/column/read/collapse.rb +41 -0
- data/lib/quandl/cassandra/models/column/read/column.rb +19 -0
- data/lib/quandl/cassandra/models/column/read/data.rb +59 -0
- data/lib/quandl/cassandra/models/column/read/offset.rb +104 -0
- data/lib/quandl/cassandra/models/column/read/row.rb +20 -0
- data/lib/quandl/cassandra/models/column/read/select_columns.rb +63 -0
- data/lib/quandl/cassandra/models/column/read/transform.rb +53 -0
- data/lib/quandl/cassandra/models/column/read/trim.rb +14 -0
- data/lib/quandl/cassandra/models/column/read/type.rb +25 -0
- data/lib/quandl/cassandra/models/column/write.rb +25 -0
- data/lib/quandl/cassandra/models/column/write/group_data_by_column.rb +36 -0
- data/lib/quandl/cassandra/models/column/write/group_data_by_frequency.rb +24 -0
- data/lib/quandl/cassandra/models/column/write/insert_column_attributes.rb +22 -0
- data/lib/quandl/cassandra/models/column/write/insert_columns.rb +9 -0
- data/lib/quandl/cassandra/models/column_attribute.rb +11 -0
- data/lib/quandl/cassandra/models/data.rb +18 -0
- data/lib/quandl/cassandra/models/data/search.rb +105 -0
- data/lib/quandl/cassandra/models/dataset.rb +87 -0
- data/lib/quandl/cassandra/models/dataset/columns.rb +63 -0
- data/lib/quandl/cassandra/models/dataset_attribute.rb +6 -0
- data/lib/quandl/cassandra/models/multiset.rb +55 -0
- data/lib/quandl/cassandra/models/version.rb +7 -0
- data/migrations/20131105204200_create_datasets.rb +18 -0
- data/migrations/20131105204201_create_columns.rb +18 -0
- data/migrations/20131105204202_create_dataset_attributes.rb +17 -0
- data/migrations/20131105204203_create_column_attributes.rb +17 -0
- data/quandl_cassandra_models.gemspec +28 -0
- data/spec/expectations/string.rb +5 -0
- data/spec/expectations/time.rb +5 -0
- data/spec/factories/dataset.rb +8 -0
- data/spec/lib/quandl/cassandra/models/column/read_spec.rb +27 -0
- data/spec/lib/quandl/cassandra/models/column/write/group_data_by_frequency_spec.rb +28 -0
- data/spec/lib/quandl/cassandra/models/column/write_spec.rb +23 -0
- data/spec/lib/quandl/cassandra/models/column_attribute_spec.rb +16 -0
- data/spec/lib/quandl/cassandra/models/column_spec.rb +17 -0
- data/spec/lib/quandl/cassandra/models/data_spec.rb +105 -0
- data/spec/lib/quandl/cassandra/models/dataset/collapse_spec.rb +44 -0
- data/spec/lib/quandl/cassandra/models/dataset/column_spec.rb +24 -0
- data/spec/lib/quandl/cassandra/models/dataset/persistence_spec.rb +25 -0
- data/spec/lib/quandl/cassandra/models/dataset/row_spec.rb +26 -0
- data/spec/lib/quandl/cassandra/models/dataset/transform_spec.rb +16 -0
- data/spec/lib/quandl/cassandra/models/dataset/trim_spec.rb +74 -0
- data/spec/lib/quandl/cassandra/models/dataset/update_spec.rb +37 -0
- data/spec/lib/quandl/cassandra/models/dataset_attribute_spec.rb +18 -0
- data/spec/lib/quandl/cassandra/models/dataset_spec.rb +117 -0
- data/spec/lib/quandl/cassandra/models/multiset/collapse_spec.rb +122 -0
- data/spec/lib/quandl/cassandra/models/multiset/columns_spec.rb +57 -0
- data/spec/lib/quandl/cassandra/models/multiset/data_spec.rb +25 -0
- data/spec/lib/quandl/cassandra/models/multiset/transform_spec.rb +69 -0
- data/spec/spec_helper.rb +40 -0
- data/tasks/migrations.rake +14 -0
- metadata +212 -0
@@ -0,0 +1,87 @@
|
|
1
|
+
class Quandl::Cassandra::Models::Dataset < Quandl::Cassandra::Base
|
2
|
+
|
3
|
+
require_relative 'dataset/columns'
|
4
|
+
|
5
|
+
table_name :datasets
|
6
|
+
autosave_changes false
|
7
|
+
|
8
|
+
define_attributes :id, :data
|
9
|
+
|
10
|
+
after_initialize :repair_frequency
|
11
|
+
|
12
|
+
before_save :save_data, :save_dataset_attribute
|
13
|
+
|
14
|
+
after_save :clear_attributes!
|
15
|
+
|
16
|
+
delegate :type, :updated_at, :created_at, :frequency, to: :dataset_attribute, allow_nil: true
|
17
|
+
|
18
|
+
include Quandl::Cassandra::Models::Dataset::Columns
|
19
|
+
|
20
|
+
def trim_start
|
21
|
+
@trim_start ||= Date.jd( data.scoped.limit(1).order(:asc)[0][0] )
|
22
|
+
rescue
|
23
|
+
nil
|
24
|
+
end
|
25
|
+
|
26
|
+
def trim_end
|
27
|
+
@trim_end ||= Date.jd( data.scoped.limit(1).order(:desc)[0][0] )
|
28
|
+
rescue
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
|
32
|
+
def data
|
33
|
+
# data set?
|
34
|
+
return read_attribute(:data) if data?
|
35
|
+
# read data
|
36
|
+
@attributes[:data] ||= data_scope
|
37
|
+
end
|
38
|
+
|
39
|
+
def data=(rows)
|
40
|
+
rows = Quandl::Data.new(rows) unless rows.is_a?(Quandl::Data)
|
41
|
+
rows = rows.to_jd
|
42
|
+
data_will_change!
|
43
|
+
@attributes[:data] = rows
|
44
|
+
end
|
45
|
+
|
46
|
+
def data_scope
|
47
|
+
Quandl::Cassandra::Models::Data.dataset(self)
|
48
|
+
end
|
49
|
+
|
50
|
+
def dataset_attribute
|
51
|
+
@dataset_attribute ||= Quandl::Cassandra::Models::DatasetAttribute.find_or_build(id)
|
52
|
+
end
|
53
|
+
|
54
|
+
def reload
|
55
|
+
clear_attributes!
|
56
|
+
end
|
57
|
+
|
58
|
+
protected
|
59
|
+
|
60
|
+
def save_dataset_attribute
|
61
|
+
dataset_attribute.save if dataset_attribute.changed?
|
62
|
+
end
|
63
|
+
|
64
|
+
def save_data
|
65
|
+
Quandl::Cassandra::Models::Column.write( id: id, data: data ) if data_changed?
|
66
|
+
end
|
67
|
+
|
68
|
+
def clear_attributes!
|
69
|
+
super if defined?(super)
|
70
|
+
@trim_start = nil
|
71
|
+
@trim_end = nil
|
72
|
+
@dataset_attribute = nil
|
73
|
+
@attributes = { id: id }
|
74
|
+
end
|
75
|
+
|
76
|
+
def repair_frequency
|
77
|
+
return if self.new_record?
|
78
|
+
if frequency.blank?
|
79
|
+
# repair dataset attribute
|
80
|
+
dataset_attribute.frequency = data_scope.limit(100).to_table.frequency.to_s
|
81
|
+
dataset_attribute.save
|
82
|
+
end
|
83
|
+
rescue => e
|
84
|
+
Quandl::Logger.error("#{self.class.name}.id #{id} #{e}")
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module Quandl::Cassandra::Models::Dataset::Columns
|
2
|
+
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
included do
|
6
|
+
# define_attributes :column_ids
|
7
|
+
before_save :save_columns
|
8
|
+
|
9
|
+
end
|
10
|
+
|
11
|
+
module ClassMethods
|
12
|
+
|
13
|
+
def find_column_ids_by_id(id)
|
14
|
+
Quandl::Cassandra::Models::Dataset.where( id: id ).pluck(:column_id, :position).sort_by{|r| r[1] }.collect{|r| r[0] }
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
def column_attributes=(column_attrs)
|
20
|
+
column_attrs.each_with_index do |attrs, index|
|
21
|
+
self.columns[index] ||= Quandl::Cassandra::Models::Column.new
|
22
|
+
self.columns[index].assign_attributes(attrs)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def column_units
|
27
|
+
@column_units ||= columns.collect(&:unit)
|
28
|
+
end
|
29
|
+
|
30
|
+
def column_names
|
31
|
+
@column_names ||= columns.collect(&:name)
|
32
|
+
end
|
33
|
+
|
34
|
+
def columns
|
35
|
+
return @columns if @columns
|
36
|
+
# nothing to do without column ids
|
37
|
+
return [] if column_ids.compact.blank?
|
38
|
+
# find columns
|
39
|
+
columns = Quandl::Cassandra::Models::ColumnAttribute.where( id: column_ids ).all
|
40
|
+
# build column where column_id was not found
|
41
|
+
@columns = column_ids.collect{|cid| columns.detect{|c| c.id.to_s == cid.to_s } || Quandl::Cassandra::Models::ColumnAttribute.new( id: cid ) }
|
42
|
+
end
|
43
|
+
|
44
|
+
def column_ids
|
45
|
+
@column_ids ||= id.blank? ? [] : self.class.find_column_ids_by_id(id)
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
protected
|
50
|
+
|
51
|
+
def clear_attributes!
|
52
|
+
@columns = nil
|
53
|
+
@column_ids = nil
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def save_columns
|
60
|
+
columns.each{|c| c.save if c.changed? || c.new_record? }
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
class Quandl::Cassandra::Models::Multiset < Quandl::Cassandra::Models::Dataset
|
2
|
+
|
3
|
+
table_name :datasets
|
4
|
+
|
5
|
+
define_attributes :datasets_columns
|
6
|
+
|
7
|
+
class << self
|
8
|
+
|
9
|
+
def with_columns(datasets_columns)
|
10
|
+
self.new( datasets_columns: datasets_columns )
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
def data_scope
|
16
|
+
Quandl::Cassandra::Models::Data.collapse(:source).column_ids( column_ids ).column_frequencies( column_frequencies )
|
17
|
+
end
|
18
|
+
|
19
|
+
def column_frequencies
|
20
|
+
@column_frequencies ||= columns.collect(&:frequency)
|
21
|
+
end
|
22
|
+
|
23
|
+
def column_ids
|
24
|
+
@column_ids ||= column_ids_from_datasets_columns
|
25
|
+
end
|
26
|
+
|
27
|
+
def column_ids_from_datasets_columns
|
28
|
+
ids = []
|
29
|
+
datasets_columns.to_s.split(',').each do |dataset_column|
|
30
|
+
dataset_id, column = dataset_column.split('.')
|
31
|
+
datasets[dataset_id] ||= Quandl::Cassandra::Models::Dataset.find(dataset_id).try(:column_ids)
|
32
|
+
ids << datasets[dataset_id][ column.to_i - 1 ] if datasets[dataset_id].is_a?(Array)
|
33
|
+
end
|
34
|
+
ids
|
35
|
+
end
|
36
|
+
|
37
|
+
def datasets
|
38
|
+
@datasets ||= {}
|
39
|
+
end
|
40
|
+
|
41
|
+
def frequency
|
42
|
+
return @frequency if @frequency
|
43
|
+
uniq_freqs = column_frequencies.compact.uniq
|
44
|
+
Quandl::Operation::Collapse.valid_collapses.each do |collapse|
|
45
|
+
@frequency = collapse.to_s if uniq_freqs.include?(collapse.to_s)
|
46
|
+
end
|
47
|
+
@frequency
|
48
|
+
end
|
49
|
+
|
50
|
+
def dataset_attribute
|
51
|
+
# find or initialize dataset attribute object
|
52
|
+
@dataset_attribute ||= Quandl::Cassandra::Models::DatasetAttribute.new
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class CreateDatasets < CassandraMigrations::Migration
|
2
|
+
|
3
|
+
def up
|
4
|
+
cql = "CREATE TABLE datasets (
|
5
|
+
id double,
|
6
|
+
column_id uuid,
|
7
|
+
position int,
|
8
|
+
PRIMARY KEY (id, column_id)
|
9
|
+
)
|
10
|
+
WITH COMPACT STORAGE;"
|
11
|
+
CassandraMigrations::Cassandra.execute(cql)
|
12
|
+
end
|
13
|
+
|
14
|
+
def down
|
15
|
+
drop_table :datasets
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class CreateColumns < CassandraMigrations::Migration
|
2
|
+
|
3
|
+
def up
|
4
|
+
cql = "CREATE TABLE columns (
|
5
|
+
id uuid,
|
6
|
+
type text,
|
7
|
+
time int,
|
8
|
+
value double,
|
9
|
+
PRIMARY KEY (id, type, time)
|
10
|
+
) WITH COMPACT STORAGE;"
|
11
|
+
CassandraMigrations::Cassandra.execute(cql)
|
12
|
+
end
|
13
|
+
|
14
|
+
def down
|
15
|
+
drop_table :columns
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
class CreateDatasetAttributes < CassandraMigrations::Migration
|
2
|
+
|
3
|
+
def up
|
4
|
+
cql = "CREATE TABLE dataset_attributes (
|
5
|
+
id double PRIMARY KEY,
|
6
|
+
created_at timestamp,
|
7
|
+
frequency text,
|
8
|
+
type text,
|
9
|
+
updated_at timestamp) WITH caching='ALL';"
|
10
|
+
CassandraMigrations::Cassandra.execute(cql)
|
11
|
+
end
|
12
|
+
|
13
|
+
def down
|
14
|
+
drop_table :dataset_attributes
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
class CreateColumnAttributes < CassandraMigrations::Migration
|
2
|
+
|
3
|
+
def up
|
4
|
+
cql = "CREATE TABLE column_attributes (
|
5
|
+
id uuid PRIMARY KEY,
|
6
|
+
frequency text,
|
7
|
+
name text,
|
8
|
+
units text
|
9
|
+
) WITH caching='ALL';"
|
10
|
+
CassandraMigrations::Cassandra.execute(cql)
|
11
|
+
end
|
12
|
+
|
13
|
+
def down
|
14
|
+
drop_table :column_attributes
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "quandl/cassandra/models/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "quandl_cassandra_models"
|
7
|
+
s.version = Quandl::Cassandra::Models::VERSION
|
8
|
+
s.authors = ["Blake Hilscher"]
|
9
|
+
s.email = ["blake@hilscher.ca"]
|
10
|
+
s.homepage = "http://blake.hilscher.ca/"
|
11
|
+
s.license = "MIT"
|
12
|
+
s.summary = "Quandl cassandra interface."
|
13
|
+
s.description = "Quandl cassandra interface. CQL."
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
s.require_paths = ["lib"]
|
19
|
+
|
20
|
+
s.add_development_dependency "rake", "~> 10.0"
|
21
|
+
s.add_development_dependency "rspec", "~> 2.13"
|
22
|
+
s.add_development_dependency "factory_girl_rails"
|
23
|
+
s.add_development_dependency "fivemat", "~> 1.2"
|
24
|
+
s.add_development_dependency "pry"
|
25
|
+
|
26
|
+
s.add_runtime_dependency "quandl_cassandra", "~> 1.1"
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Quandl::Cassandra::Models::Column::Write do
|
5
|
+
|
6
|
+
let(:id){ rand(10000*10000) + 10000*10000 }
|
7
|
+
let(:data){ Quandl::Fabricate::Data.rand( columns: 1, rows: 15, nils: false, frequency: :annual ) }
|
8
|
+
before(:each){ Quandl::Cassandra::Models::Column.write( id: id, data: data ) }
|
9
|
+
|
10
|
+
attrs = {}
|
11
|
+
{
|
12
|
+
transform: :rdiff,
|
13
|
+
collapse: :annual,
|
14
|
+
row: 0,
|
15
|
+
column: 1,
|
16
|
+
}.each do |key, value|
|
17
|
+
attrs[key] = value
|
18
|
+
it "should read #{attrs.to_query}" do
|
19
|
+
attrs[:id] = id
|
20
|
+
attrs[:column_ids] = Quandl::Cassandra::Dataset.find(id).column_ids
|
21
|
+
Quandl::Cassandra::Models::Column.read(attrs).should be_present
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Quandl::Cassandra::Models::Column::Write::GroupDataByFrequency do
|
5
|
+
|
6
|
+
let(:data){ Quandl::Fabricate::Data.rand( columns: 4, rows: 12, nils: false, frequency: :weekly ) }
|
7
|
+
let(:strategy){ Quandl::Cassandra::Models::Column::Write::GroupDataByFrequency.new({ data: data }) }
|
8
|
+
|
9
|
+
describe "#frequency_data" do
|
10
|
+
subject{ strategy.frequency_data }
|
11
|
+
its(:length){ should eq 4 }
|
12
|
+
|
13
|
+
it "should have source" do
|
14
|
+
subject[:source].count.should eq 12
|
15
|
+
end
|
16
|
+
it "should have monthly" do
|
17
|
+
subject[:monthly].count.should <= 4
|
18
|
+
end
|
19
|
+
it "should have quarterly" do
|
20
|
+
subject[:quarterly].count.should <= 2
|
21
|
+
end
|
22
|
+
it "should have annual" do
|
23
|
+
subject[:annual].count.should eq 1
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Quandl::Cassandra::Models::Column::Write do
|
5
|
+
|
6
|
+
let(:id){ rand(10000*10000) + 10000*10000 }
|
7
|
+
let(:data){ Quandl::Fabricate::Data.rand( columns: 4, rows: 12, nils: false, frequency: :weekly ) }
|
8
|
+
|
9
|
+
it "should not alter the original data" do
|
10
|
+
source_data = data.to_a.collect{|r| r.clone }
|
11
|
+
r = Quandl::Cassandra::Models::Column.write( id: id, data: data )
|
12
|
+
data.should eq source_data
|
13
|
+
end
|
14
|
+
|
15
|
+
context "given tiny data array" do
|
16
|
+
let(:data){ Quandl::Fabricate::Data.rand( columns: 1, rows: 2, nils: false ) }
|
17
|
+
before(:each){ Quandl::Cassandra::Models::Column.write( id: id, data: data ) }
|
18
|
+
it "should have written the data" do
|
19
|
+
Quandl::Cassandra::Models::Column.read( id: id ).should eq data
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Quandl::Cassandra::Models::ColumnAttribute do
|
4
|
+
|
5
|
+
let(:id){ rand(10000*10000) + 10000*10000 }
|
6
|
+
let(:data){ Quandl::Fabricate::Data.rand( rows: 10, columns: 2 ) }
|
7
|
+
let(:dataset){ Quandl::Cassandra::Models::Dataset.create( id: id, data: data ) }
|
8
|
+
|
9
|
+
let(:column){ Quandl::Cassandra::Models::ColumnAttribute.find(dataset.column_ids.first) }
|
10
|
+
|
11
|
+
subject{ column }
|
12
|
+
|
13
|
+
its(:id){ should eq dataset.column_ids.first }
|
14
|
+
its(:frequency){ should eq 'daily' }
|
15
|
+
|
16
|
+
end
|