quandl_cassandra_models 0.3.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +7 -0
- data/Gemfile +4 -0
- data/LICENSE +7 -0
- data/README.md +7 -0
- data/Rakefile +11 -0
- data/UPGRADE.md +34 -0
- data/config/cassandra.yml +41 -0
- data/lib/quandl/cassandra/models.rb +16 -0
- data/lib/quandl/cassandra/models/column.rb +42 -0
- data/lib/quandl/cassandra/models/column/read.rb +49 -0
- data/lib/quandl/cassandra/models/column/read/collapse.rb +41 -0
- data/lib/quandl/cassandra/models/column/read/column.rb +19 -0
- data/lib/quandl/cassandra/models/column/read/data.rb +59 -0
- data/lib/quandl/cassandra/models/column/read/offset.rb +104 -0
- data/lib/quandl/cassandra/models/column/read/row.rb +20 -0
- data/lib/quandl/cassandra/models/column/read/select_columns.rb +63 -0
- data/lib/quandl/cassandra/models/column/read/transform.rb +53 -0
- data/lib/quandl/cassandra/models/column/read/trim.rb +14 -0
- data/lib/quandl/cassandra/models/column/read/type.rb +25 -0
- data/lib/quandl/cassandra/models/column/write.rb +25 -0
- data/lib/quandl/cassandra/models/column/write/group_data_by_column.rb +36 -0
- data/lib/quandl/cassandra/models/column/write/group_data_by_frequency.rb +24 -0
- data/lib/quandl/cassandra/models/column/write/insert_column_attributes.rb +22 -0
- data/lib/quandl/cassandra/models/column/write/insert_columns.rb +9 -0
- data/lib/quandl/cassandra/models/column_attribute.rb +11 -0
- data/lib/quandl/cassandra/models/data.rb +18 -0
- data/lib/quandl/cassandra/models/data/search.rb +105 -0
- data/lib/quandl/cassandra/models/dataset.rb +87 -0
- data/lib/quandl/cassandra/models/dataset/columns.rb +63 -0
- data/lib/quandl/cassandra/models/dataset_attribute.rb +6 -0
- data/lib/quandl/cassandra/models/multiset.rb +55 -0
- data/lib/quandl/cassandra/models/version.rb +7 -0
- data/migrations/20131105204200_create_datasets.rb +18 -0
- data/migrations/20131105204201_create_columns.rb +18 -0
- data/migrations/20131105204202_create_dataset_attributes.rb +17 -0
- data/migrations/20131105204203_create_column_attributes.rb +17 -0
- data/quandl_cassandra_models.gemspec +28 -0
- data/spec/expectations/string.rb +5 -0
- data/spec/expectations/time.rb +5 -0
- data/spec/factories/dataset.rb +8 -0
- data/spec/lib/quandl/cassandra/models/column/read_spec.rb +27 -0
- data/spec/lib/quandl/cassandra/models/column/write/group_data_by_frequency_spec.rb +28 -0
- data/spec/lib/quandl/cassandra/models/column/write_spec.rb +23 -0
- data/spec/lib/quandl/cassandra/models/column_attribute_spec.rb +16 -0
- data/spec/lib/quandl/cassandra/models/column_spec.rb +17 -0
- data/spec/lib/quandl/cassandra/models/data_spec.rb +105 -0
- data/spec/lib/quandl/cassandra/models/dataset/collapse_spec.rb +44 -0
- data/spec/lib/quandl/cassandra/models/dataset/column_spec.rb +24 -0
- data/spec/lib/quandl/cassandra/models/dataset/persistence_spec.rb +25 -0
- data/spec/lib/quandl/cassandra/models/dataset/row_spec.rb +26 -0
- data/spec/lib/quandl/cassandra/models/dataset/transform_spec.rb +16 -0
- data/spec/lib/quandl/cassandra/models/dataset/trim_spec.rb +74 -0
- data/spec/lib/quandl/cassandra/models/dataset/update_spec.rb +37 -0
- data/spec/lib/quandl/cassandra/models/dataset_attribute_spec.rb +18 -0
- data/spec/lib/quandl/cassandra/models/dataset_spec.rb +117 -0
- data/spec/lib/quandl/cassandra/models/multiset/collapse_spec.rb +122 -0
- data/spec/lib/quandl/cassandra/models/multiset/columns_spec.rb +57 -0
- data/spec/lib/quandl/cassandra/models/multiset/data_spec.rb +25 -0
- data/spec/lib/quandl/cassandra/models/multiset/transform_spec.rb +69 -0
- data/spec/spec_helper.rb +40 -0
- data/tasks/migrations.rake +14 -0
- metadata +212 -0
@@ -0,0 +1,87 @@
|
|
1
|
+
class Quandl::Cassandra::Models::Dataset < Quandl::Cassandra::Base
|
2
|
+
|
3
|
+
require_relative 'dataset/columns'
|
4
|
+
|
5
|
+
table_name :datasets
|
6
|
+
autosave_changes false
|
7
|
+
|
8
|
+
define_attributes :id, :data
|
9
|
+
|
10
|
+
after_initialize :repair_frequency
|
11
|
+
|
12
|
+
before_save :save_data, :save_dataset_attribute
|
13
|
+
|
14
|
+
after_save :clear_attributes!
|
15
|
+
|
16
|
+
delegate :type, :updated_at, :created_at, :frequency, to: :dataset_attribute, allow_nil: true
|
17
|
+
|
18
|
+
include Quandl::Cassandra::Models::Dataset::Columns
|
19
|
+
|
20
|
+
def trim_start
|
21
|
+
@trim_start ||= Date.jd( data.scoped.limit(1).order(:asc)[0][0] )
|
22
|
+
rescue
|
23
|
+
nil
|
24
|
+
end
|
25
|
+
|
26
|
+
def trim_end
|
27
|
+
@trim_end ||= Date.jd( data.scoped.limit(1).order(:desc)[0][0] )
|
28
|
+
rescue
|
29
|
+
nil
|
30
|
+
end
|
31
|
+
|
32
|
+
def data
|
33
|
+
# data set?
|
34
|
+
return read_attribute(:data) if data?
|
35
|
+
# read data
|
36
|
+
@attributes[:data] ||= data_scope
|
37
|
+
end
|
38
|
+
|
39
|
+
def data=(rows)
|
40
|
+
rows = Quandl::Data.new(rows) unless rows.is_a?(Quandl::Data)
|
41
|
+
rows = rows.to_jd
|
42
|
+
data_will_change!
|
43
|
+
@attributes[:data] = rows
|
44
|
+
end
|
45
|
+
|
46
|
+
def data_scope
|
47
|
+
Quandl::Cassandra::Models::Data.dataset(self)
|
48
|
+
end
|
49
|
+
|
50
|
+
def dataset_attribute
|
51
|
+
@dataset_attribute ||= Quandl::Cassandra::Models::DatasetAttribute.find_or_build(id)
|
52
|
+
end
|
53
|
+
|
54
|
+
def reload
|
55
|
+
clear_attributes!
|
56
|
+
end
|
57
|
+
|
58
|
+
protected
|
59
|
+
|
60
|
+
def save_dataset_attribute
|
61
|
+
dataset_attribute.save if dataset_attribute.changed?
|
62
|
+
end
|
63
|
+
|
64
|
+
def save_data
|
65
|
+
Quandl::Cassandra::Models::Column.write( id: id, data: data ) if data_changed?
|
66
|
+
end
|
67
|
+
|
68
|
+
def clear_attributes!
|
69
|
+
super if defined?(super)
|
70
|
+
@trim_start = nil
|
71
|
+
@trim_end = nil
|
72
|
+
@dataset_attribute = nil
|
73
|
+
@attributes = { id: id }
|
74
|
+
end
|
75
|
+
|
76
|
+
def repair_frequency
|
77
|
+
return if self.new_record?
|
78
|
+
if frequency.blank?
|
79
|
+
# repair dataset attribute
|
80
|
+
dataset_attribute.frequency = data_scope.limit(100).to_table.frequency.to_s
|
81
|
+
dataset_attribute.save
|
82
|
+
end
|
83
|
+
rescue => e
|
84
|
+
Quandl::Logger.error("#{self.class.name}.id #{id} #{e}")
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module Quandl::Cassandra::Models::Dataset::Columns
|
2
|
+
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
included do
|
6
|
+
# define_attributes :column_ids
|
7
|
+
before_save :save_columns
|
8
|
+
|
9
|
+
end
|
10
|
+
|
11
|
+
module ClassMethods
|
12
|
+
|
13
|
+
def find_column_ids_by_id(id)
|
14
|
+
Quandl::Cassandra::Models::Dataset.where( id: id ).pluck(:column_id, :position).sort_by{|r| r[1] }.collect{|r| r[0] }
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
def column_attributes=(column_attrs)
|
20
|
+
column_attrs.each_with_index do |attrs, index|
|
21
|
+
self.columns[index] ||= Quandl::Cassandra::Models::Column.new
|
22
|
+
self.columns[index].assign_attributes(attrs)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def column_units
|
27
|
+
@column_units ||= columns.collect(&:unit)
|
28
|
+
end
|
29
|
+
|
30
|
+
def column_names
|
31
|
+
@column_names ||= columns.collect(&:name)
|
32
|
+
end
|
33
|
+
|
34
|
+
def columns
|
35
|
+
return @columns if @columns
|
36
|
+
# nothing to do without column ids
|
37
|
+
return [] if column_ids.compact.blank?
|
38
|
+
# find columns
|
39
|
+
columns = Quandl::Cassandra::Models::ColumnAttribute.where( id: column_ids ).all
|
40
|
+
# build column where column_id was not found
|
41
|
+
@columns = column_ids.collect{|cid| columns.detect{|c| c.id.to_s == cid.to_s } || Quandl::Cassandra::Models::ColumnAttribute.new( id: cid ) }
|
42
|
+
end
|
43
|
+
|
44
|
+
def column_ids
|
45
|
+
@column_ids ||= id.blank? ? [] : self.class.find_column_ids_by_id(id)
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
protected
|
50
|
+
|
51
|
+
def clear_attributes!
|
52
|
+
@columns = nil
|
53
|
+
@column_ids = nil
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def save_columns
|
60
|
+
columns.each{|c| c.save if c.changed? || c.new_record? }
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
class Quandl::Cassandra::Models::Multiset < Quandl::Cassandra::Models::Dataset
|
2
|
+
|
3
|
+
table_name :datasets
|
4
|
+
|
5
|
+
define_attributes :datasets_columns
|
6
|
+
|
7
|
+
class << self
|
8
|
+
|
9
|
+
def with_columns(datasets_columns)
|
10
|
+
self.new( datasets_columns: datasets_columns )
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
def data_scope
|
16
|
+
Quandl::Cassandra::Models::Data.collapse(:source).column_ids( column_ids ).column_frequencies( column_frequencies )
|
17
|
+
end
|
18
|
+
|
19
|
+
def column_frequencies
|
20
|
+
@column_frequencies ||= columns.collect(&:frequency)
|
21
|
+
end
|
22
|
+
|
23
|
+
def column_ids
|
24
|
+
@column_ids ||= column_ids_from_datasets_columns
|
25
|
+
end
|
26
|
+
|
27
|
+
def column_ids_from_datasets_columns
|
28
|
+
ids = []
|
29
|
+
datasets_columns.to_s.split(',').each do |dataset_column|
|
30
|
+
dataset_id, column = dataset_column.split('.')
|
31
|
+
datasets[dataset_id] ||= Quandl::Cassandra::Models::Dataset.find(dataset_id).try(:column_ids)
|
32
|
+
ids << datasets[dataset_id][ column.to_i - 1 ] if datasets[dataset_id].is_a?(Array)
|
33
|
+
end
|
34
|
+
ids
|
35
|
+
end
|
36
|
+
|
37
|
+
def datasets
|
38
|
+
@datasets ||= {}
|
39
|
+
end
|
40
|
+
|
41
|
+
def frequency
|
42
|
+
return @frequency if @frequency
|
43
|
+
uniq_freqs = column_frequencies.compact.uniq
|
44
|
+
Quandl::Operation::Collapse.valid_collapses.each do |collapse|
|
45
|
+
@frequency = collapse.to_s if uniq_freqs.include?(collapse.to_s)
|
46
|
+
end
|
47
|
+
@frequency
|
48
|
+
end
|
49
|
+
|
50
|
+
def dataset_attribute
|
51
|
+
# find or initialize dataset attribute object
|
52
|
+
@dataset_attribute ||= Quandl::Cassandra::Models::DatasetAttribute.new
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class CreateDatasets < CassandraMigrations::Migration
|
2
|
+
|
3
|
+
def up
|
4
|
+
cql = "CREATE TABLE datasets (
|
5
|
+
id double,
|
6
|
+
column_id uuid,
|
7
|
+
position int,
|
8
|
+
PRIMARY KEY (id, column_id)
|
9
|
+
)
|
10
|
+
WITH COMPACT STORAGE;"
|
11
|
+
CassandraMigrations::Cassandra.execute(cql)
|
12
|
+
end
|
13
|
+
|
14
|
+
def down
|
15
|
+
drop_table :datasets
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class CreateColumns < CassandraMigrations::Migration
|
2
|
+
|
3
|
+
def up
|
4
|
+
cql = "CREATE TABLE columns (
|
5
|
+
id uuid,
|
6
|
+
type text,
|
7
|
+
time int,
|
8
|
+
value double,
|
9
|
+
PRIMARY KEY (id, type, time)
|
10
|
+
) WITH COMPACT STORAGE;"
|
11
|
+
CassandraMigrations::Cassandra.execute(cql)
|
12
|
+
end
|
13
|
+
|
14
|
+
def down
|
15
|
+
drop_table :columns
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
class CreateDatasetAttributes < CassandraMigrations::Migration
|
2
|
+
|
3
|
+
def up
|
4
|
+
cql = "CREATE TABLE dataset_attributes (
|
5
|
+
id double PRIMARY KEY,
|
6
|
+
created_at timestamp,
|
7
|
+
frequency text,
|
8
|
+
type text,
|
9
|
+
updated_at timestamp) WITH caching='ALL';"
|
10
|
+
CassandraMigrations::Cassandra.execute(cql)
|
11
|
+
end
|
12
|
+
|
13
|
+
def down
|
14
|
+
drop_table :dataset_attributes
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
class CreateColumnAttributes < CassandraMigrations::Migration
|
2
|
+
|
3
|
+
def up
|
4
|
+
cql = "CREATE TABLE column_attributes (
|
5
|
+
id uuid PRIMARY KEY,
|
6
|
+
frequency text,
|
7
|
+
name text,
|
8
|
+
units text
|
9
|
+
) WITH caching='ALL';"
|
10
|
+
CassandraMigrations::Cassandra.execute(cql)
|
11
|
+
end
|
12
|
+
|
13
|
+
def down
|
14
|
+
drop_table :column_attributes
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "quandl/cassandra/models/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "quandl_cassandra_models"
|
7
|
+
s.version = Quandl::Cassandra::Models::VERSION
|
8
|
+
s.authors = ["Blake Hilscher"]
|
9
|
+
s.email = ["blake@hilscher.ca"]
|
10
|
+
s.homepage = "http://blake.hilscher.ca/"
|
11
|
+
s.license = "MIT"
|
12
|
+
s.summary = "Quandl cassandra interface."
|
13
|
+
s.description = "Quandl cassandra interface. CQL."
|
14
|
+
|
15
|
+
s.files = `git ls-files`.split("\n")
|
16
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
s.require_paths = ["lib"]
|
19
|
+
|
20
|
+
s.add_development_dependency "rake", "~> 10.0"
|
21
|
+
s.add_development_dependency "rspec", "~> 2.13"
|
22
|
+
s.add_development_dependency "factory_girl_rails"
|
23
|
+
s.add_development_dependency "fivemat", "~> 1.2"
|
24
|
+
s.add_development_dependency "pry"
|
25
|
+
|
26
|
+
s.add_runtime_dependency "quandl_cassandra", "~> 1.1"
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Quandl::Cassandra::Models::Column::Write do
|
5
|
+
|
6
|
+
let(:id){ rand(10000*10000) + 10000*10000 }
|
7
|
+
let(:data){ Quandl::Fabricate::Data.rand( columns: 1, rows: 15, nils: false, frequency: :annual ) }
|
8
|
+
before(:each){ Quandl::Cassandra::Models::Column.write( id: id, data: data ) }
|
9
|
+
|
10
|
+
attrs = {}
|
11
|
+
{
|
12
|
+
transform: :rdiff,
|
13
|
+
collapse: :annual,
|
14
|
+
row: 0,
|
15
|
+
column: 1,
|
16
|
+
}.each do |key, value|
|
17
|
+
attrs[key] = value
|
18
|
+
it "should read #{attrs.to_query}" do
|
19
|
+
attrs[:id] = id
|
20
|
+
attrs[:column_ids] = Quandl::Cassandra::Dataset.find(id).column_ids
|
21
|
+
Quandl::Cassandra::Models::Column.read(attrs).should be_present
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Quandl::Cassandra::Models::Column::Write::GroupDataByFrequency do
|
5
|
+
|
6
|
+
let(:data){ Quandl::Fabricate::Data.rand( columns: 4, rows: 12, nils: false, frequency: :weekly ) }
|
7
|
+
let(:strategy){ Quandl::Cassandra::Models::Column::Write::GroupDataByFrequency.new({ data: data }) }
|
8
|
+
|
9
|
+
describe "#frequency_data" do
|
10
|
+
subject{ strategy.frequency_data }
|
11
|
+
its(:length){ should eq 4 }
|
12
|
+
|
13
|
+
it "should have source" do
|
14
|
+
subject[:source].count.should eq 12
|
15
|
+
end
|
16
|
+
it "should have monthly" do
|
17
|
+
subject[:monthly].count.should <= 4
|
18
|
+
end
|
19
|
+
it "should have quarterly" do
|
20
|
+
subject[:quarterly].count.should <= 2
|
21
|
+
end
|
22
|
+
it "should have annual" do
|
23
|
+
subject[:annual].count.should eq 1
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Quandl::Cassandra::Models::Column::Write do
|
5
|
+
|
6
|
+
let(:id){ rand(10000*10000) + 10000*10000 }
|
7
|
+
let(:data){ Quandl::Fabricate::Data.rand( columns: 4, rows: 12, nils: false, frequency: :weekly ) }
|
8
|
+
|
9
|
+
it "should not alter the original data" do
|
10
|
+
source_data = data.to_a.collect{|r| r.clone }
|
11
|
+
r = Quandl::Cassandra::Models::Column.write( id: id, data: data )
|
12
|
+
data.should eq source_data
|
13
|
+
end
|
14
|
+
|
15
|
+
context "given tiny data array" do
|
16
|
+
let(:data){ Quandl::Fabricate::Data.rand( columns: 1, rows: 2, nils: false ) }
|
17
|
+
before(:each){ Quandl::Cassandra::Models::Column.write( id: id, data: data ) }
|
18
|
+
it "should have written the data" do
|
19
|
+
Quandl::Cassandra::Models::Column.read( id: id ).should eq data
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Quandl::Cassandra::Models::ColumnAttribute do
|
4
|
+
|
5
|
+
let(:id){ rand(10000*10000) + 10000*10000 }
|
6
|
+
let(:data){ Quandl::Fabricate::Data.rand( rows: 10, columns: 2 ) }
|
7
|
+
let(:dataset){ Quandl::Cassandra::Models::Dataset.create( id: id, data: data ) }
|
8
|
+
|
9
|
+
let(:column){ Quandl::Cassandra::Models::ColumnAttribute.find(dataset.column_ids.first) }
|
10
|
+
|
11
|
+
subject{ column }
|
12
|
+
|
13
|
+
its(:id){ should eq dataset.column_ids.first }
|
14
|
+
its(:frequency){ should eq 'daily' }
|
15
|
+
|
16
|
+
end
|