quandl_cassandra_models 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE +7 -0
  5. data/README.md +7 -0
  6. data/Rakefile +11 -0
  7. data/UPGRADE.md +34 -0
  8. data/config/cassandra.yml +41 -0
  9. data/lib/quandl/cassandra/models.rb +16 -0
  10. data/lib/quandl/cassandra/models/column.rb +42 -0
  11. data/lib/quandl/cassandra/models/column/read.rb +49 -0
  12. data/lib/quandl/cassandra/models/column/read/collapse.rb +41 -0
  13. data/lib/quandl/cassandra/models/column/read/column.rb +19 -0
  14. data/lib/quandl/cassandra/models/column/read/data.rb +59 -0
  15. data/lib/quandl/cassandra/models/column/read/offset.rb +104 -0
  16. data/lib/quandl/cassandra/models/column/read/row.rb +20 -0
  17. data/lib/quandl/cassandra/models/column/read/select_columns.rb +63 -0
  18. data/lib/quandl/cassandra/models/column/read/transform.rb +53 -0
  19. data/lib/quandl/cassandra/models/column/read/trim.rb +14 -0
  20. data/lib/quandl/cassandra/models/column/read/type.rb +25 -0
  21. data/lib/quandl/cassandra/models/column/write.rb +25 -0
  22. data/lib/quandl/cassandra/models/column/write/group_data_by_column.rb +36 -0
  23. data/lib/quandl/cassandra/models/column/write/group_data_by_frequency.rb +24 -0
  24. data/lib/quandl/cassandra/models/column/write/insert_column_attributes.rb +22 -0
  25. data/lib/quandl/cassandra/models/column/write/insert_columns.rb +9 -0
  26. data/lib/quandl/cassandra/models/column_attribute.rb +11 -0
  27. data/lib/quandl/cassandra/models/data.rb +18 -0
  28. data/lib/quandl/cassandra/models/data/search.rb +105 -0
  29. data/lib/quandl/cassandra/models/dataset.rb +87 -0
  30. data/lib/quandl/cassandra/models/dataset/columns.rb +63 -0
  31. data/lib/quandl/cassandra/models/dataset_attribute.rb +6 -0
  32. data/lib/quandl/cassandra/models/multiset.rb +55 -0
  33. data/lib/quandl/cassandra/models/version.rb +7 -0
  34. data/migrations/20131105204200_create_datasets.rb +18 -0
  35. data/migrations/20131105204201_create_columns.rb +18 -0
  36. data/migrations/20131105204202_create_dataset_attributes.rb +17 -0
  37. data/migrations/20131105204203_create_column_attributes.rb +17 -0
  38. data/quandl_cassandra_models.gemspec +28 -0
  39. data/spec/expectations/string.rb +5 -0
  40. data/spec/expectations/time.rb +5 -0
  41. data/spec/factories/dataset.rb +8 -0
  42. data/spec/lib/quandl/cassandra/models/column/read_spec.rb +27 -0
  43. data/spec/lib/quandl/cassandra/models/column/write/group_data_by_frequency_spec.rb +28 -0
  44. data/spec/lib/quandl/cassandra/models/column/write_spec.rb +23 -0
  45. data/spec/lib/quandl/cassandra/models/column_attribute_spec.rb +16 -0
  46. data/spec/lib/quandl/cassandra/models/column_spec.rb +17 -0
  47. data/spec/lib/quandl/cassandra/models/data_spec.rb +105 -0
  48. data/spec/lib/quandl/cassandra/models/dataset/collapse_spec.rb +44 -0
  49. data/spec/lib/quandl/cassandra/models/dataset/column_spec.rb +24 -0
  50. data/spec/lib/quandl/cassandra/models/dataset/persistence_spec.rb +25 -0
  51. data/spec/lib/quandl/cassandra/models/dataset/row_spec.rb +26 -0
  52. data/spec/lib/quandl/cassandra/models/dataset/transform_spec.rb +16 -0
  53. data/spec/lib/quandl/cassandra/models/dataset/trim_spec.rb +74 -0
  54. data/spec/lib/quandl/cassandra/models/dataset/update_spec.rb +37 -0
  55. data/spec/lib/quandl/cassandra/models/dataset_attribute_spec.rb +18 -0
  56. data/spec/lib/quandl/cassandra/models/dataset_spec.rb +117 -0
  57. data/spec/lib/quandl/cassandra/models/multiset/collapse_spec.rb +122 -0
  58. data/spec/lib/quandl/cassandra/models/multiset/columns_spec.rb +57 -0
  59. data/spec/lib/quandl/cassandra/models/multiset/data_spec.rb +25 -0
  60. data/spec/lib/quandl/cassandra/models/multiset/transform_spec.rb +69 -0
  61. data/spec/spec_helper.rb +40 -0
  62. data/tasks/migrations.rake +14 -0
  63. metadata +212 -0
@@ -0,0 +1,87 @@
1
+ class Quandl::Cassandra::Models::Dataset < Quandl::Cassandra::Base
2
+
3
+ require_relative 'dataset/columns'
4
+
5
+ table_name :datasets
6
+ autosave_changes false
7
+
8
+ define_attributes :id, :data
9
+
10
+ after_initialize :repair_frequency
11
+
12
+ before_save :save_data, :save_dataset_attribute
13
+
14
+ after_save :clear_attributes!
15
+
16
+ delegate :type, :updated_at, :created_at, :frequency, to: :dataset_attribute, allow_nil: true
17
+
18
+ include Quandl::Cassandra::Models::Dataset::Columns
19
+
20
+ def trim_start
21
+ @trim_start ||= Date.jd( data.scoped.limit(1).order(:asc)[0][0] )
22
+ rescue
23
+ nil
24
+ end
25
+
26
+ def trim_end
27
+ @trim_end ||= Date.jd( data.scoped.limit(1).order(:desc)[0][0] )
28
+ rescue
29
+ nil
30
+ end
31
+
32
+ def data
33
+ # data set?
34
+ return read_attribute(:data) if data?
35
+ # read data
36
+ @attributes[:data] ||= data_scope
37
+ end
38
+
39
+ def data=(rows)
40
+ rows = Quandl::Data.new(rows) unless rows.is_a?(Quandl::Data)
41
+ rows = rows.to_jd
42
+ data_will_change!
43
+ @attributes[:data] = rows
44
+ end
45
+
46
+ def data_scope
47
+ Quandl::Cassandra::Models::Data.dataset(self)
48
+ end
49
+
50
+ def dataset_attribute
51
+ @dataset_attribute ||= Quandl::Cassandra::Models::DatasetAttribute.find_or_build(id)
52
+ end
53
+
54
+ def reload
55
+ clear_attributes!
56
+ end
57
+
58
+ protected
59
+
60
+ def save_dataset_attribute
61
+ dataset_attribute.save if dataset_attribute.changed?
62
+ end
63
+
64
+ def save_data
65
+ Quandl::Cassandra::Models::Column.write( id: id, data: data ) if data_changed?
66
+ end
67
+
68
+ def clear_attributes!
69
+ super if defined?(super)
70
+ @trim_start = nil
71
+ @trim_end = nil
72
+ @dataset_attribute = nil
73
+ @attributes = { id: id }
74
+ end
75
+
76
+ def repair_frequency
77
+ return if self.new_record?
78
+ if frequency.blank?
79
+ # repair dataset attribute
80
+ dataset_attribute.frequency = data_scope.limit(100).to_table.frequency.to_s
81
+ dataset_attribute.save
82
+ end
83
+ rescue => e
84
+ Quandl::Logger.error("#{self.class.name}.id #{id} #{e}")
85
+ end
86
+
87
+ end
@@ -0,0 +1,63 @@
1
+ module Quandl::Cassandra::Models::Dataset::Columns
2
+
3
+ extend ActiveSupport::Concern
4
+
5
+ included do
6
+ # define_attributes :column_ids
7
+ before_save :save_columns
8
+
9
+ end
10
+
11
+ module ClassMethods
12
+
13
+ def find_column_ids_by_id(id)
14
+ Quandl::Cassandra::Models::Dataset.where( id: id ).pluck(:column_id, :position).sort_by{|r| r[1] }.collect{|r| r[0] }
15
+ end
16
+
17
+ end
18
+
19
+ def column_attributes=(column_attrs)
20
+ column_attrs.each_with_index do |attrs, index|
21
+ self.columns[index] ||= Quandl::Cassandra::Models::Column.new
22
+ self.columns[index].assign_attributes(attrs)
23
+ end
24
+ end
25
+
26
+ def column_units
27
+ @column_units ||= columns.collect(&:unit)
28
+ end
29
+
30
+ def column_names
31
+ @column_names ||= columns.collect(&:name)
32
+ end
33
+
34
+ def columns
35
+ return @columns if @columns
36
+ # nothing to do without column ids
37
+ return [] if column_ids.compact.blank?
38
+ # find columns
39
+ columns = Quandl::Cassandra::Models::ColumnAttribute.where( id: column_ids ).all
40
+ # build column where column_id was not found
41
+ @columns = column_ids.collect{|cid| columns.detect{|c| c.id.to_s == cid.to_s } || Quandl::Cassandra::Models::ColumnAttribute.new( id: cid ) }
42
+ end
43
+
44
+ def column_ids
45
+ @column_ids ||= id.blank? ? [] : self.class.find_column_ids_by_id(id)
46
+ end
47
+
48
+
49
+ protected
50
+
51
+ def clear_attributes!
52
+ @columns = nil
53
+ @column_ids = nil
54
+ end
55
+
56
+
57
+ private
58
+
59
+ def save_columns
60
+ columns.each{|c| c.save if c.changed? || c.new_record? }
61
+ end
62
+
63
+ end
@@ -0,0 +1,6 @@
1
+ class Quandl::Cassandra::Models::DatasetAttribute < Quandl::Cassandra::Base
2
+
3
+ table_name :dataset_attributes
4
+ define_attributes :id, :type, :updated_at, :created_at, :frequency
5
+
6
+ end
@@ -0,0 +1,55 @@
1
+ class Quandl::Cassandra::Models::Multiset < Quandl::Cassandra::Models::Dataset
2
+
3
+ table_name :datasets
4
+
5
+ define_attributes :datasets_columns
6
+
7
+ class << self
8
+
9
+ def with_columns(datasets_columns)
10
+ self.new( datasets_columns: datasets_columns )
11
+ end
12
+
13
+ end
14
+
15
+ def data_scope
16
+ Quandl::Cassandra::Models::Data.collapse(:source).column_ids( column_ids ).column_frequencies( column_frequencies )
17
+ end
18
+
19
+ def column_frequencies
20
+ @column_frequencies ||= columns.collect(&:frequency)
21
+ end
22
+
23
+ def column_ids
24
+ @column_ids ||= column_ids_from_datasets_columns
25
+ end
26
+
27
+ def column_ids_from_datasets_columns
28
+ ids = []
29
+ datasets_columns.to_s.split(',').each do |dataset_column|
30
+ dataset_id, column = dataset_column.split('.')
31
+ datasets[dataset_id] ||= Quandl::Cassandra::Models::Dataset.find(dataset_id).try(:column_ids)
32
+ ids << datasets[dataset_id][ column.to_i - 1 ] if datasets[dataset_id].is_a?(Array)
33
+ end
34
+ ids
35
+ end
36
+
37
+ def datasets
38
+ @datasets ||= {}
39
+ end
40
+
41
+ def frequency
42
+ return @frequency if @frequency
43
+ uniq_freqs = column_frequencies.compact.uniq
44
+ Quandl::Operation::Collapse.valid_collapses.each do |collapse|
45
+ @frequency = collapse.to_s if uniq_freqs.include?(collapse.to_s)
46
+ end
47
+ @frequency
48
+ end
49
+
50
+ def dataset_attribute
51
+ # find or initialize dataset attribute object
52
+ @dataset_attribute ||= Quandl::Cassandra::Models::DatasetAttribute.new
53
+ end
54
+
55
+ end
@@ -0,0 +1,7 @@
1
+ module Quandl
2
+ module Cassandra
3
+ module Models
4
+ VERSION = '0.3.6'
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,18 @@
1
+ class CreateDatasets < CassandraMigrations::Migration
2
+
3
+ def up
4
+ cql = "CREATE TABLE datasets (
5
+ id double,
6
+ column_id uuid,
7
+ position int,
8
+ PRIMARY KEY (id, column_id)
9
+ )
10
+ WITH COMPACT STORAGE;"
11
+ CassandraMigrations::Cassandra.execute(cql)
12
+ end
13
+
14
+ def down
15
+ drop_table :datasets
16
+ end
17
+
18
+ end
@@ -0,0 +1,18 @@
1
+ class CreateColumns < CassandraMigrations::Migration
2
+
3
+ def up
4
+ cql = "CREATE TABLE columns (
5
+ id uuid,
6
+ type text,
7
+ time int,
8
+ value double,
9
+ PRIMARY KEY (id, type, time)
10
+ ) WITH COMPACT STORAGE;"
11
+ CassandraMigrations::Cassandra.execute(cql)
12
+ end
13
+
14
+ def down
15
+ drop_table :columns
16
+ end
17
+
18
+ end
@@ -0,0 +1,17 @@
1
+ class CreateDatasetAttributes < CassandraMigrations::Migration
2
+
3
+ def up
4
+ cql = "CREATE TABLE dataset_attributes (
5
+ id double PRIMARY KEY,
6
+ created_at timestamp,
7
+ frequency text,
8
+ type text,
9
+ updated_at timestamp) WITH caching='ALL';"
10
+ CassandraMigrations::Cassandra.execute(cql)
11
+ end
12
+
13
+ def down
14
+ drop_table :dataset_attributes
15
+ end
16
+
17
+ end
@@ -0,0 +1,17 @@
1
+ class CreateColumnAttributes < CassandraMigrations::Migration
2
+
3
+ def up
4
+ cql = "CREATE TABLE column_attributes (
5
+ id uuid PRIMARY KEY,
6
+ frequency text,
7
+ name text,
8
+ units text
9
+ ) WITH caching='ALL';"
10
+ CassandraMigrations::Cassandra.execute(cql)
11
+ end
12
+
13
+ def down
14
+ drop_table :column_attributes
15
+ end
16
+
17
+ end
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "quandl/cassandra/models/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "quandl_cassandra_models"
7
+ s.version = Quandl::Cassandra::Models::VERSION
8
+ s.authors = ["Blake Hilscher"]
9
+ s.email = ["blake@hilscher.ca"]
10
+ s.homepage = "http://blake.hilscher.ca/"
11
+ s.license = "MIT"
12
+ s.summary = "Quandl cassandra interface."
13
+ s.description = "Quandl cassandra interface. CQL."
14
+
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
+ s.require_paths = ["lib"]
19
+
20
+ s.add_development_dependency "rake", "~> 10.0"
21
+ s.add_development_dependency "rspec", "~> 2.13"
22
+ s.add_development_dependency "factory_girl_rails"
23
+ s.add_development_dependency "fivemat", "~> 1.2"
24
+ s.add_development_dependency "pry"
25
+
26
+ s.add_runtime_dependency "quandl_cassandra", "~> 1.1"
27
+
28
+ end
@@ -0,0 +1,5 @@
1
+ RSpec::Matchers.define :be_same_string_as do |expected|
2
+ match do |actual|
3
+ actual.to_s == expected.to_s
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ RSpec::Matchers.define :be_same_second_as do |expected|
2
+ match do |actual|
3
+ actual.to_i == expected.to_i
4
+ end
5
+ end
@@ -0,0 +1,8 @@
1
+ FactoryGirl.define do
2
+
3
+ factory :dataset, class: Quandl::Cassandra::Models::Dataset do
4
+ sequence(:id) { |n| "#{(Time.now.to_f * 1000).to_i}#{n}".to_i }
5
+ data { Quandl::Fabricate::Data.rand( rows: 60, columns: 4, nils: false ).to_csv }
6
+ end
7
+
8
+ end
@@ -0,0 +1,27 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Models::Column::Write do
5
+
6
+ let(:id){ rand(10000*10000) + 10000*10000 }
7
+ let(:data){ Quandl::Fabricate::Data.rand( columns: 1, rows: 15, nils: false, frequency: :annual ) }
8
+ before(:each){ Quandl::Cassandra::Models::Column.write( id: id, data: data ) }
9
+
10
+ attrs = {}
11
+ {
12
+ transform: :rdiff,
13
+ collapse: :annual,
14
+ row: 0,
15
+ column: 1,
16
+ }.each do |key, value|
17
+ attrs[key] = value
18
+ it "should read #{attrs.to_query}" do
19
+ attrs[:id] = id
20
+ attrs[:column_ids] = Quandl::Cassandra::Dataset.find(id).column_ids
21
+ Quandl::Cassandra::Models::Column.read(attrs).should be_present
22
+ end
23
+
24
+ end
25
+
26
+
27
+ end
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Models::Column::Write::GroupDataByFrequency do
5
+
6
+ let(:data){ Quandl::Fabricate::Data.rand( columns: 4, rows: 12, nils: false, frequency: :weekly ) }
7
+ let(:strategy){ Quandl::Cassandra::Models::Column::Write::GroupDataByFrequency.new({ data: data }) }
8
+
9
+ describe "#frequency_data" do
10
+ subject{ strategy.frequency_data }
11
+ its(:length){ should eq 4 }
12
+
13
+ it "should have source" do
14
+ subject[:source].count.should eq 12
15
+ end
16
+ it "should have monthly" do
17
+ subject[:monthly].count.should <= 4
18
+ end
19
+ it "should have quarterly" do
20
+ subject[:quarterly].count.should <= 2
21
+ end
22
+ it "should have annual" do
23
+ subject[:annual].count.should eq 1
24
+ end
25
+
26
+ end
27
+
28
+ end
@@ -0,0 +1,23 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Models::Column::Write do
5
+
6
+ let(:id){ rand(10000*10000) + 10000*10000 }
7
+ let(:data){ Quandl::Fabricate::Data.rand( columns: 4, rows: 12, nils: false, frequency: :weekly ) }
8
+
9
+ it "should not alter the original data" do
10
+ source_data = data.to_a.collect{|r| r.clone }
11
+ r = Quandl::Cassandra::Models::Column.write( id: id, data: data )
12
+ data.should eq source_data
13
+ end
14
+
15
+ context "given tiny data array" do
16
+ let(:data){ Quandl::Fabricate::Data.rand( columns: 1, rows: 2, nils: false ) }
17
+ before(:each){ Quandl::Cassandra::Models::Column.write( id: id, data: data ) }
18
+ it "should have written the data" do
19
+ Quandl::Cassandra::Models::Column.read( id: id ).should eq data
20
+ end
21
+ end
22
+
23
+ end
@@ -0,0 +1,16 @@
1
+ require 'spec_helper'
2
+
3
+ describe Quandl::Cassandra::Models::ColumnAttribute do
4
+
5
+ let(:id){ rand(10000*10000) + 10000*10000 }
6
+ let(:data){ Quandl::Fabricate::Data.rand( rows: 10, columns: 2 ) }
7
+ let(:dataset){ Quandl::Cassandra::Models::Dataset.create( id: id, data: data ) }
8
+
9
+ let(:column){ Quandl::Cassandra::Models::ColumnAttribute.find(dataset.column_ids.first) }
10
+
11
+ subject{ column }
12
+
13
+ its(:id){ should eq dataset.column_ids.first }
14
+ its(:frequency){ should eq 'daily' }
15
+
16
+ end