quandl_cassandra_models 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE +7 -0
  5. data/README.md +7 -0
  6. data/Rakefile +11 -0
  7. data/UPGRADE.md +34 -0
  8. data/config/cassandra.yml +41 -0
  9. data/lib/quandl/cassandra/models.rb +16 -0
  10. data/lib/quandl/cassandra/models/column.rb +42 -0
  11. data/lib/quandl/cassandra/models/column/read.rb +49 -0
  12. data/lib/quandl/cassandra/models/column/read/collapse.rb +41 -0
  13. data/lib/quandl/cassandra/models/column/read/column.rb +19 -0
  14. data/lib/quandl/cassandra/models/column/read/data.rb +59 -0
  15. data/lib/quandl/cassandra/models/column/read/offset.rb +104 -0
  16. data/lib/quandl/cassandra/models/column/read/row.rb +20 -0
  17. data/lib/quandl/cassandra/models/column/read/select_columns.rb +63 -0
  18. data/lib/quandl/cassandra/models/column/read/transform.rb +53 -0
  19. data/lib/quandl/cassandra/models/column/read/trim.rb +14 -0
  20. data/lib/quandl/cassandra/models/column/read/type.rb +25 -0
  21. data/lib/quandl/cassandra/models/column/write.rb +25 -0
  22. data/lib/quandl/cassandra/models/column/write/group_data_by_column.rb +36 -0
  23. data/lib/quandl/cassandra/models/column/write/group_data_by_frequency.rb +24 -0
  24. data/lib/quandl/cassandra/models/column/write/insert_column_attributes.rb +22 -0
  25. data/lib/quandl/cassandra/models/column/write/insert_columns.rb +9 -0
  26. data/lib/quandl/cassandra/models/column_attribute.rb +11 -0
  27. data/lib/quandl/cassandra/models/data.rb +18 -0
  28. data/lib/quandl/cassandra/models/data/search.rb +105 -0
  29. data/lib/quandl/cassandra/models/dataset.rb +87 -0
  30. data/lib/quandl/cassandra/models/dataset/columns.rb +63 -0
  31. data/lib/quandl/cassandra/models/dataset_attribute.rb +6 -0
  32. data/lib/quandl/cassandra/models/multiset.rb +55 -0
  33. data/lib/quandl/cassandra/models/version.rb +7 -0
  34. data/migrations/20131105204200_create_datasets.rb +18 -0
  35. data/migrations/20131105204201_create_columns.rb +18 -0
  36. data/migrations/20131105204202_create_dataset_attributes.rb +17 -0
  37. data/migrations/20131105204203_create_column_attributes.rb +17 -0
  38. data/quandl_cassandra_models.gemspec +28 -0
  39. data/spec/expectations/string.rb +5 -0
  40. data/spec/expectations/time.rb +5 -0
  41. data/spec/factories/dataset.rb +8 -0
  42. data/spec/lib/quandl/cassandra/models/column/read_spec.rb +27 -0
  43. data/spec/lib/quandl/cassandra/models/column/write/group_data_by_frequency_spec.rb +28 -0
  44. data/spec/lib/quandl/cassandra/models/column/write_spec.rb +23 -0
  45. data/spec/lib/quandl/cassandra/models/column_attribute_spec.rb +16 -0
  46. data/spec/lib/quandl/cassandra/models/column_spec.rb +17 -0
  47. data/spec/lib/quandl/cassandra/models/data_spec.rb +105 -0
  48. data/spec/lib/quandl/cassandra/models/dataset/collapse_spec.rb +44 -0
  49. data/spec/lib/quandl/cassandra/models/dataset/column_spec.rb +24 -0
  50. data/spec/lib/quandl/cassandra/models/dataset/persistence_spec.rb +25 -0
  51. data/spec/lib/quandl/cassandra/models/dataset/row_spec.rb +26 -0
  52. data/spec/lib/quandl/cassandra/models/dataset/transform_spec.rb +16 -0
  53. data/spec/lib/quandl/cassandra/models/dataset/trim_spec.rb +74 -0
  54. data/spec/lib/quandl/cassandra/models/dataset/update_spec.rb +37 -0
  55. data/spec/lib/quandl/cassandra/models/dataset_attribute_spec.rb +18 -0
  56. data/spec/lib/quandl/cassandra/models/dataset_spec.rb +117 -0
  57. data/spec/lib/quandl/cassandra/models/multiset/collapse_spec.rb +122 -0
  58. data/spec/lib/quandl/cassandra/models/multiset/columns_spec.rb +57 -0
  59. data/spec/lib/quandl/cassandra/models/multiset/data_spec.rb +25 -0
  60. data/spec/lib/quandl/cassandra/models/multiset/transform_spec.rb +69 -0
  61. data/spec/spec_helper.rb +40 -0
  62. data/tasks/migrations.rake +14 -0
  63. metadata +212 -0
@@ -0,0 +1,87 @@
1
+ class Quandl::Cassandra::Models::Dataset < Quandl::Cassandra::Base
2
+
3
+ require_relative 'dataset/columns'
4
+
5
+ table_name :datasets
6
+ autosave_changes false
7
+
8
+ define_attributes :id, :data
9
+
10
+ after_initialize :repair_frequency
11
+
12
+ before_save :save_data, :save_dataset_attribute
13
+
14
+ after_save :clear_attributes!
15
+
16
+ delegate :type, :updated_at, :created_at, :frequency, to: :dataset_attribute, allow_nil: true
17
+
18
+ include Quandl::Cassandra::Models::Dataset::Columns
19
+
20
+ def trim_start
21
+ @trim_start ||= Date.jd( data.scoped.limit(1).order(:asc)[0][0] )
22
+ rescue
23
+ nil
24
+ end
25
+
26
+ def trim_end
27
+ @trim_end ||= Date.jd( data.scoped.limit(1).order(:desc)[0][0] )
28
+ rescue
29
+ nil
30
+ end
31
+
32
+ def data
33
+ # data set?
34
+ return read_attribute(:data) if data?
35
+ # read data
36
+ @attributes[:data] ||= data_scope
37
+ end
38
+
39
+ def data=(rows)
40
+ rows = Quandl::Data.new(rows) unless rows.is_a?(Quandl::Data)
41
+ rows = rows.to_jd
42
+ data_will_change!
43
+ @attributes[:data] = rows
44
+ end
45
+
46
+ def data_scope
47
+ Quandl::Cassandra::Models::Data.dataset(self)
48
+ end
49
+
50
+ def dataset_attribute
51
+ @dataset_attribute ||= Quandl::Cassandra::Models::DatasetAttribute.find_or_build(id)
52
+ end
53
+
54
+ def reload
55
+ clear_attributes!
56
+ end
57
+
58
+ protected
59
+
60
+ def save_dataset_attribute
61
+ dataset_attribute.save if dataset_attribute.changed?
62
+ end
63
+
64
+ def save_data
65
+ Quandl::Cassandra::Models::Column.write( id: id, data: data ) if data_changed?
66
+ end
67
+
68
+ def clear_attributes!
69
+ super if defined?(super)
70
+ @trim_start = nil
71
+ @trim_end = nil
72
+ @dataset_attribute = nil
73
+ @attributes = { id: id }
74
+ end
75
+
76
+ def repair_frequency
77
+ return if self.new_record?
78
+ if frequency.blank?
79
+ # repair dataset attribute
80
+ dataset_attribute.frequency = data_scope.limit(100).to_table.frequency.to_s
81
+ dataset_attribute.save
82
+ end
83
+ rescue => e
84
+ Quandl::Logger.error("#{self.class.name}.id #{id} #{e}")
85
+ end
86
+
87
+ end
@@ -0,0 +1,63 @@
1
+ module Quandl::Cassandra::Models::Dataset::Columns
2
+
3
+ extend ActiveSupport::Concern
4
+
5
+ included do
6
+ # define_attributes :column_ids
7
+ before_save :save_columns
8
+
9
+ end
10
+
11
+ module ClassMethods
12
+
13
+ def find_column_ids_by_id(id)
14
+ Quandl::Cassandra::Models::Dataset.where( id: id ).pluck(:column_id, :position).sort_by{|r| r[1] }.collect{|r| r[0] }
15
+ end
16
+
17
+ end
18
+
19
+ def column_attributes=(column_attrs)
20
+ column_attrs.each_with_index do |attrs, index|
21
+ self.columns[index] ||= Quandl::Cassandra::Models::Column.new
22
+ self.columns[index].assign_attributes(attrs)
23
+ end
24
+ end
25
+
26
+ def column_units
27
+ @column_units ||= columns.collect(&:unit)
28
+ end
29
+
30
+ def column_names
31
+ @column_names ||= columns.collect(&:name)
32
+ end
33
+
34
+ def columns
35
+ return @columns if @columns
36
+ # nothing to do without column ids
37
+ return [] if column_ids.compact.blank?
38
+ # find columns
39
+ columns = Quandl::Cassandra::Models::ColumnAttribute.where( id: column_ids ).all
40
+ # build column where column_id was not found
41
+ @columns = column_ids.collect{|cid| columns.detect{|c| c.id.to_s == cid.to_s } || Quandl::Cassandra::Models::ColumnAttribute.new( id: cid ) }
42
+ end
43
+
44
+ def column_ids
45
+ @column_ids ||= id.blank? ? [] : self.class.find_column_ids_by_id(id)
46
+ end
47
+
48
+
49
+ protected
50
+
51
+ def clear_attributes!
52
+ @columns = nil
53
+ @column_ids = nil
54
+ end
55
+
56
+
57
+ private
58
+
59
+ def save_columns
60
+ columns.each{|c| c.save if c.changed? || c.new_record? }
61
+ end
62
+
63
+ end
@@ -0,0 +1,6 @@
1
+ class Quandl::Cassandra::Models::DatasetAttribute < Quandl::Cassandra::Base
2
+
3
+ table_name :dataset_attributes
4
+ define_attributes :id, :type, :updated_at, :created_at, :frequency
5
+
6
+ end
@@ -0,0 +1,55 @@
1
+ class Quandl::Cassandra::Models::Multiset < Quandl::Cassandra::Models::Dataset
2
+
3
+ table_name :datasets
4
+
5
+ define_attributes :datasets_columns
6
+
7
+ class << self
8
+
9
+ def with_columns(datasets_columns)
10
+ self.new( datasets_columns: datasets_columns )
11
+ end
12
+
13
+ end
14
+
15
+ def data_scope
16
+ Quandl::Cassandra::Models::Data.collapse(:source).column_ids( column_ids ).column_frequencies( column_frequencies )
17
+ end
18
+
19
+ def column_frequencies
20
+ @column_frequencies ||= columns.collect(&:frequency)
21
+ end
22
+
23
+ def column_ids
24
+ @column_ids ||= column_ids_from_datasets_columns
25
+ end
26
+
27
+ def column_ids_from_datasets_columns
28
+ ids = []
29
+ datasets_columns.to_s.split(',').each do |dataset_column|
30
+ dataset_id, column = dataset_column.split('.')
31
+ datasets[dataset_id] ||= Quandl::Cassandra::Models::Dataset.find(dataset_id).try(:column_ids)
32
+ ids << datasets[dataset_id][ column.to_i - 1 ] if datasets[dataset_id].is_a?(Array)
33
+ end
34
+ ids
35
+ end
36
+
37
+ def datasets
38
+ @datasets ||= {}
39
+ end
40
+
41
+ def frequency
42
+ return @frequency if @frequency
43
+ uniq_freqs = column_frequencies.compact.uniq
44
+ Quandl::Operation::Collapse.valid_collapses.each do |collapse|
45
+ @frequency = collapse.to_s if uniq_freqs.include?(collapse.to_s)
46
+ end
47
+ @frequency
48
+ end
49
+
50
+ def dataset_attribute
51
+ # find or initialize dataset attribute object
52
+ @dataset_attribute ||= Quandl::Cassandra::Models::DatasetAttribute.new
53
+ end
54
+
55
+ end
@@ -0,0 +1,7 @@
1
+ module Quandl
2
+ module Cassandra
3
+ module Models
4
+ VERSION = '0.3.6'
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,18 @@
1
+ class CreateDatasets < CassandraMigrations::Migration
2
+
3
+ def up
4
+ cql = "CREATE TABLE datasets (
5
+ id double,
6
+ column_id uuid,
7
+ position int,
8
+ PRIMARY KEY (id, column_id)
9
+ )
10
+ WITH COMPACT STORAGE;"
11
+ CassandraMigrations::Cassandra.execute(cql)
12
+ end
13
+
14
+ def down
15
+ drop_table :datasets
16
+ end
17
+
18
+ end
@@ -0,0 +1,18 @@
1
+ class CreateColumns < CassandraMigrations::Migration
2
+
3
+ def up
4
+ cql = "CREATE TABLE columns (
5
+ id uuid,
6
+ type text,
7
+ time int,
8
+ value double,
9
+ PRIMARY KEY (id, type, time)
10
+ ) WITH COMPACT STORAGE;"
11
+ CassandraMigrations::Cassandra.execute(cql)
12
+ end
13
+
14
+ def down
15
+ drop_table :columns
16
+ end
17
+
18
+ end
@@ -0,0 +1,17 @@
1
+ class CreateDatasetAttributes < CassandraMigrations::Migration
2
+
3
+ def up
4
+ cql = "CREATE TABLE dataset_attributes (
5
+ id double PRIMARY KEY,
6
+ created_at timestamp,
7
+ frequency text,
8
+ type text,
9
+ updated_at timestamp) WITH caching='ALL';"
10
+ CassandraMigrations::Cassandra.execute(cql)
11
+ end
12
+
13
+ def down
14
+ drop_table :dataset_attributes
15
+ end
16
+
17
+ end
@@ -0,0 +1,17 @@
1
+ class CreateColumnAttributes < CassandraMigrations::Migration
2
+
3
+ def up
4
+ cql = "CREATE TABLE column_attributes (
5
+ id uuid PRIMARY KEY,
6
+ frequency text,
7
+ name text,
8
+ units text
9
+ ) WITH caching='ALL';"
10
+ CassandraMigrations::Cassandra.execute(cql)
11
+ end
12
+
13
+ def down
14
+ drop_table :column_attributes
15
+ end
16
+
17
+ end
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "quandl/cassandra/models/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "quandl_cassandra_models"
7
+ s.version = Quandl::Cassandra::Models::VERSION
8
+ s.authors = ["Blake Hilscher"]
9
+ s.email = ["blake@hilscher.ca"]
10
+ s.homepage = "http://blake.hilscher.ca/"
11
+ s.license = "MIT"
12
+ s.summary = "Quandl cassandra interface."
13
+ s.description = "Quandl cassandra interface. CQL."
14
+
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
+ s.require_paths = ["lib"]
19
+
20
+ s.add_development_dependency "rake", "~> 10.0"
21
+ s.add_development_dependency "rspec", "~> 2.13"
22
+ s.add_development_dependency "factory_girl_rails"
23
+ s.add_development_dependency "fivemat", "~> 1.2"
24
+ s.add_development_dependency "pry"
25
+
26
+ s.add_runtime_dependency "quandl_cassandra", "~> 1.1"
27
+
28
+ end
@@ -0,0 +1,5 @@
1
+ RSpec::Matchers.define :be_same_string_as do |expected|
2
+ match do |actual|
3
+ actual.to_s == expected.to_s
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ RSpec::Matchers.define :be_same_second_as do |expected|
2
+ match do |actual|
3
+ actual.to_i == expected.to_i
4
+ end
5
+ end
@@ -0,0 +1,8 @@
1
+ FactoryGirl.define do
2
+
3
+ factory :dataset, class: Quandl::Cassandra::Models::Dataset do
4
+ sequence(:id) { |n| "#{(Time.now.to_f * 1000).to_i}#{n}".to_i }
5
+ data { Quandl::Fabricate::Data.rand( rows: 60, columns: 4, nils: false ).to_csv }
6
+ end
7
+
8
+ end
@@ -0,0 +1,27 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Models::Column::Write do
5
+
6
+ let(:id){ rand(10000*10000) + 10000*10000 }
7
+ let(:data){ Quandl::Fabricate::Data.rand( columns: 1, rows: 15, nils: false, frequency: :annual ) }
8
+ before(:each){ Quandl::Cassandra::Models::Column.write( id: id, data: data ) }
9
+
10
+ attrs = {}
11
+ {
12
+ transform: :rdiff,
13
+ collapse: :annual,
14
+ row: 0,
15
+ column: 1,
16
+ }.each do |key, value|
17
+ attrs[key] = value
18
+ it "should read #{attrs.to_query}" do
19
+ attrs[:id] = id
20
+ attrs[:column_ids] = Quandl::Cassandra::Dataset.find(id).column_ids
21
+ Quandl::Cassandra::Models::Column.read(attrs).should be_present
22
+ end
23
+
24
+ end
25
+
26
+
27
+ end
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Models::Column::Write::GroupDataByFrequency do
5
+
6
+ let(:data){ Quandl::Fabricate::Data.rand( columns: 4, rows: 12, nils: false, frequency: :weekly ) }
7
+ let(:strategy){ Quandl::Cassandra::Models::Column::Write::GroupDataByFrequency.new({ data: data }) }
8
+
9
+ describe "#frequency_data" do
10
+ subject{ strategy.frequency_data }
11
+ its(:length){ should eq 4 }
12
+
13
+ it "should have source" do
14
+ subject[:source].count.should eq 12
15
+ end
16
+ it "should have monthly" do
17
+ subject[:monthly].count.should <= 4
18
+ end
19
+ it "should have quarterly" do
20
+ subject[:quarterly].count.should <= 2
21
+ end
22
+ it "should have annual" do
23
+ subject[:annual].count.should eq 1
24
+ end
25
+
26
+ end
27
+
28
+ end
@@ -0,0 +1,23 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Models::Column::Write do
5
+
6
+ let(:id){ rand(10000*10000) + 10000*10000 }
7
+ let(:data){ Quandl::Fabricate::Data.rand( columns: 4, rows: 12, nils: false, frequency: :weekly ) }
8
+
9
+ it "should not alter the original data" do
10
+ source_data = data.to_a.collect{|r| r.clone }
11
+ r = Quandl::Cassandra::Models::Column.write( id: id, data: data )
12
+ data.should eq source_data
13
+ end
14
+
15
+ context "given tiny data array" do
16
+ let(:data){ Quandl::Fabricate::Data.rand( columns: 1, rows: 2, nils: false ) }
17
+ before(:each){ Quandl::Cassandra::Models::Column.write( id: id, data: data ) }
18
+ it "should have written the data" do
19
+ Quandl::Cassandra::Models::Column.read( id: id ).should eq data
20
+ end
21
+ end
22
+
23
+ end
@@ -0,0 +1,16 @@
1
+ require 'spec_helper'
2
+
3
+ describe Quandl::Cassandra::Models::ColumnAttribute do
4
+
5
+ let(:id){ rand(10000*10000) + 10000*10000 }
6
+ let(:data){ Quandl::Fabricate::Data.rand( rows: 10, columns: 2 ) }
7
+ let(:dataset){ Quandl::Cassandra::Models::Dataset.create( id: id, data: data ) }
8
+
9
+ let(:column){ Quandl::Cassandra::Models::ColumnAttribute.find(dataset.column_ids.first) }
10
+
11
+ subject{ column }
12
+
13
+ its(:id){ should eq dataset.column_ids.first }
14
+ its(:frequency){ should eq 'daily' }
15
+
16
+ end