quandl_cassandra 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. data/.gitignore +7 -0
  2. data/Gemfile +2 -0
  3. data/LICENSE +7 -0
  4. data/README.md +7 -0
  5. data/Rakefile +11 -0
  6. data/UPGRADE.md +3 -0
  7. data/lib/quandl/cassandra/base/attributes.rb +103 -0
  8. data/lib/quandl/cassandra/base/callbacks.rb +15 -0
  9. data/lib/quandl/cassandra/base/connection.rb +49 -0
  10. data/lib/quandl/cassandra/base/logging.rb +40 -0
  11. data/lib/quandl/cassandra/base/naming.rb +19 -0
  12. data/lib/quandl/cassandra/base/persistence.rb +67 -0
  13. data/lib/quandl/cassandra/base/sanitization.rb +38 -0
  14. data/lib/quandl/cassandra/base/schema.rb +79 -0
  15. data/lib/quandl/cassandra/base/scoping.rb +122 -0
  16. data/lib/quandl/cassandra/base.rb +51 -0
  17. data/lib/quandl/cassandra/configuration.rb +34 -0
  18. data/lib/quandl/cassandra/error.rb +10 -0
  19. data/lib/quandl/cassandra/types/abstract_type.rb +33 -0
  20. data/lib/quandl/cassandra/types/boolean_type.rb +10 -0
  21. data/lib/quandl/cassandra/types/decimal_type.rb +9 -0
  22. data/lib/quandl/cassandra/types/double_type.rb +9 -0
  23. data/lib/quandl/cassandra/types/float_type.rb +9 -0
  24. data/lib/quandl/cassandra/types/integer_type.rb +9 -0
  25. data/lib/quandl/cassandra/types/long_type.rb +9 -0
  26. data/lib/quandl/cassandra/types/timestamp_type.rb +15 -0
  27. data/lib/quandl/cassandra/types/utf8_type.rb +13 -0
  28. data/lib/quandl/cassandra/types/uuid_type.rb +21 -0
  29. data/lib/quandl/cassandra/types.rb +42 -0
  30. data/lib/quandl/cassandra/version.rb +5 -0
  31. data/lib/quandl/cassandra.rb +30 -0
  32. data/lib/quandl/cassandra_models/column/read/collapse.rb +64 -0
  33. data/lib/quandl/cassandra_models/column/read/column.rb +18 -0
  34. data/lib/quandl/cassandra_models/column/read/data_table.rb +57 -0
  35. data/lib/quandl/cassandra_models/column/read/offset.rb +114 -0
  36. data/lib/quandl/cassandra_models/column/read/query.rb +55 -0
  37. data/lib/quandl/cassandra_models/column/read/row.rb +20 -0
  38. data/lib/quandl/cassandra_models/column/read/transform.rb +53 -0
  39. data/lib/quandl/cassandra_models/column/read/type.rb +25 -0
  40. data/lib/quandl/cassandra_models/column/read.rb +28 -0
  41. data/lib/quandl/cassandra_models/column/write/group_data_by_column.rb +42 -0
  42. data/lib/quandl/cassandra_models/column/write/group_data_by_frequency.rb +24 -0
  43. data/lib/quandl/cassandra_models/column/write/insert_columns.rb +22 -0
  44. data/lib/quandl/cassandra_models/column/write/insert_data.rb +39 -0
  45. data/lib/quandl/cassandra_models/column/write.rb +22 -0
  46. data/lib/quandl/cassandra_models/column.rb +20 -0
  47. data/lib/quandl/cassandra_models/column_attribute.rb +11 -0
  48. data/lib/quandl/cassandra_models/data.rb +52 -0
  49. data/lib/quandl/cassandra_models/dataset.rb +83 -0
  50. data/lib/quandl/cassandra_models/dataset_attribute.rb +6 -0
  51. data/lib/quandl/cassandra_models/multiset.rb +50 -0
  52. data/lib/quandl/strategy.rb +59 -0
  53. data/quandl_cassandra.gemspec +35 -0
  54. data/spec/expectations/string.rb +5 -0
  55. data/spec/expectations/time.rb +5 -0
  56. data/spec/factories/dataset.rb +8 -0
  57. data/spec/lib/quandl/cassandra/base/scoping_spec.rb +40 -0
  58. data/spec/lib/quandl/cassandra_models/column/write/group_data_by_frequency_spec.rb +28 -0
  59. data/spec/lib/quandl/cassandra_models/column/write_spec.rb +15 -0
  60. data/spec/lib/quandl/cassandra_models/column_attribute_spec.rb +16 -0
  61. data/spec/lib/quandl/cassandra_models/column_spec.rb +17 -0
  62. data/spec/lib/quandl/cassandra_models/data_spec.rb +34 -0
  63. data/spec/lib/quandl/cassandra_models/dataset/collapse_spec.rb +41 -0
  64. data/spec/lib/quandl/cassandra_models/dataset/column_spec.rb +25 -0
  65. data/spec/lib/quandl/cassandra_models/dataset/persistence_spec.rb +24 -0
  66. data/spec/lib/quandl/cassandra_models/dataset/row_spec.rb +26 -0
  67. data/spec/lib/quandl/cassandra_models/dataset/transform_spec.rb +16 -0
  68. data/spec/lib/quandl/cassandra_models/dataset/trim_spec.rb +74 -0
  69. data/spec/lib/quandl/cassandra_models/dataset/update_spec.rb +37 -0
  70. data/spec/lib/quandl/cassandra_models/dataset_attribute_spec.rb +18 -0
  71. data/spec/lib/quandl/cassandra_models/dataset_spec.rb +63 -0
  72. data/spec/lib/quandl/cassandra_models/multiset/collapse_spec.rb +122 -0
  73. data/spec/lib/quandl/cassandra_models/multiset/columns_spec.rb +57 -0
  74. data/spec/lib/quandl/cassandra_models/multiset/data_spec.rb +25 -0
  75. data/spec/lib/quandl/cassandra_models/multiset/transform_spec.rb +68 -0
  76. data/spec/lib/quandl/cassandra_spec.rb +12 -0
  77. data/spec/spec_helper.rb +37 -0
  78. metadata +339 -0
@@ -0,0 +1,35 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "quandl/cassandra/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "quandl_cassandra"
7
+ s.version = Quandl::Cassandra::VERSION
8
+ s.authors = ["Blake Hilscher"]
9
+ s.email = ["blake@hilscher.ca"]
10
+ s.homepage = "http://blake.hilscher.ca/"
11
+ s.license = "MIT"
12
+ s.summary = "Quandl cassandra interface."
13
+ s.description = "Quandl cassandra interface. CQL."
14
+
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
+ s.require_paths = ["lib"]
19
+
20
+ s.add_development_dependency "rake", "~> 10.0"
21
+ s.add_development_dependency "rspec", "~> 2.13"
22
+ s.add_development_dependency "factory_girl_rails"
23
+ s.add_development_dependency "fivemat", "~> 1.2"
24
+ s.add_development_dependency "pry"
25
+
26
+ s.add_runtime_dependency "activesupport", ">= 3.0.0"
27
+ s.add_runtime_dependency "activemodel", ">= 3.0.0"
28
+
29
+ s.add_runtime_dependency "scope_composer", "~> 0.3"
30
+ s.add_runtime_dependency "quandl_data", "~> 1.0"
31
+ s.add_runtime_dependency "quandl_logger", "~> 0.1"
32
+ s.add_runtime_dependency "quandl_operation", "~> 0.1"
33
+ s.add_runtime_dependency 'cql-rb', '1.1.0.rc0'
34
+
35
+ end
@@ -0,0 +1,5 @@
1
+ RSpec::Matchers.define :be_same_string_as do |expected|
2
+ match do |actual|
3
+ actual.to_s == expected.to_s
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ RSpec::Matchers.define :be_same_second_as do |expected|
2
+ match do |actual|
3
+ actual.to_i == expected.to_i
4
+ end
5
+ end
@@ -0,0 +1,8 @@
1
+ FactoryGirl.define do
2
+
3
+ factory :dataset, class: Quandl::Cassandra::Dataset do
4
+ sequence(:id) { |n| "#{(Time.now.to_f * 1000).to_i}#{n}".to_i }
5
+ data { Quandl::Fabricate::Data.rand( rows: 60, columns: 4, nils: false ).to_csv }
6
+ end
7
+
8
+ end
@@ -0,0 +1,40 @@
1
+ require 'spec_helper'
2
+
3
+ class TestModel < Quandl::Cassandra::Base; end
4
+
5
+ describe Quandl::Cassandra::Base::Scoping do
6
+
7
+ subject{ TestModel }
8
+
9
+ [:where, :select, :limit, :order].each do |name|
10
+ it{ should respond_to name }
11
+ end
12
+
13
+ describe ".to_cql" do
14
+
15
+ let(:scope) { TestModel.scope.new }
16
+ subject{ scope.to_cql }
17
+
18
+ describe "#where" do
19
+ before(:each){ scope.where( id: 1 ) }
20
+ it{ should eq "SELECT * FROM test_models WHERE id = 1" }
21
+
22
+ describe "#limit" do
23
+ before(:each){ scope.limit( 10 ) }
24
+ it{ should eq "SELECT * FROM test_models WHERE id = 1 LIMIT 10" }
25
+
26
+ describe "#select" do
27
+ before(:each){ scope.select( :name, :value ) }
28
+ it{ should eq "SELECT name,value FROM test_models WHERE id = 1 LIMIT 10" }
29
+
30
+ describe "#order" do
31
+ before(:each){ scope.order("name DESC") }
32
+ it{ should eq "SELECT name,value FROM test_models WHERE id = 1 ORDER BY name DESC LIMIT 10" }
33
+ end
34
+
35
+ end
36
+ end
37
+ end
38
+ end
39
+
40
+ end
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Column::Write::GroupDataByFrequency do
5
+
6
+ let(:data){ Quandl::Fabricate::Data.rand( columns: 4, rows: 12, nils: false, frequency: :weekly ) }
7
+ let(:strategy){ Quandl::Cassandra::Column::Write::GroupDataByFrequency.new({ data: data }) }
8
+
9
+ describe "#frequency_data" do
10
+ subject{ strategy.frequency_data }
11
+ its(:length){ should eq 4 }
12
+
13
+ it "should have source" do
14
+ subject[:source].count.should eq 12
15
+ end
16
+ it "should have monthly" do
17
+ subject[:monthly].count.should <= 4
18
+ end
19
+ it "should have quarterly" do
20
+ subject[:quarterly].count.should <= 2
21
+ end
22
+ it "should have annual" do
23
+ subject[:annual].count.should eq 1
24
+ end
25
+
26
+ end
27
+
28
+ end
@@ -0,0 +1,15 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Column::Write do
5
+
6
+ let(:id){ rand(10000*10000) + 10000*10000 }
7
+ let(:data){ Quandl::Fabricate::Data.rand( columns: 4, rows: 12, nils: false, frequency: :weekly ) }
8
+
9
+ it "should not alter the original data" do
10
+ source_data = data.to_a.collect{|r| r.clone }
11
+ r = Quandl::Cassandra::Column.write( id: id, data: data )
12
+ data.should eq source_data
13
+ end
14
+
15
+ end
@@ -0,0 +1,16 @@
1
+ require 'spec_helper'
2
+
3
+ describe Quandl::Cassandra::ColumnAttribute do
4
+
5
+ let(:id){ rand(10000*10000) + 10000*10000 }
6
+ let(:data){ Quandl::Fabricate::Data.rand( rows: 10, columns: 2 ) }
7
+ let(:dataset){ Quandl::Cassandra::Dataset.create( id: id, data: data ) }
8
+
9
+ let(:column){ Quandl::Cassandra::ColumnAttribute.find(dataset.column_ids.first) }
10
+
11
+ subject{ column }
12
+
13
+ its(:id){ should eq dataset.column_ids.first }
14
+ its(:frequency){ should eq 'daily' }
15
+
16
+ end
@@ -0,0 +1,17 @@
1
+ require 'spec_helper'
2
+
3
+ describe Quandl::Cassandra::Column do
4
+
5
+ let(:id){ rand(10000*10000) + 10000*10000 }
6
+ let(:data){ Quandl::Fabricate::Data.rand( rows: 10, columns: 2 ) }
7
+ let(:dataset){ Quandl::Cassandra::Dataset.create( id: id, data: data ) }
8
+
9
+ describe ".read" do
10
+ before(:each){ Quandl::Cassandra::Column.write( id: id, data: data ); sleep(0.2) }
11
+
12
+ subject{ Quandl::Cassandra::Column.read( id: id ) }
13
+ its(:count){ should eq 10 }
14
+ it{ should eq data }
15
+ end
16
+
17
+ end
@@ -0,0 +1,34 @@
1
+ require 'spec_helper'
2
+
3
+ describe Quandl::Cassandra::Data do
4
+
5
+ let(:id){ rand(10000*10000) + 10000*10000 }
6
+ let(:data){ Quandl::Fabricate::Data.rand(rows: 10, columns: 2, nils: false) }
7
+ let(:dataset){ Quandl::Cassandra::Dataset.create( id: id, data: data ) }
8
+ subject{ Quandl::Cassandra::Data }
9
+
10
+ [:row, :id, :limit, :offset, :column, :accuracy, :frequency, :column_ids,
11
+ :collapse, :transform, :order, :trim_start, :trim_end ].each do |name|
12
+ it{ should respond_to name }
13
+ end
14
+
15
+ describe "#collapse" do
16
+ it "should become annual" do
17
+ scope = subject.collapse(:annual)
18
+ scope.attributes[:collapse].should eq :annual
19
+ end
20
+ it "should become annual given string" do
21
+ scope = subject.collapse('annual')
22
+ scope.attributes[:collapse].should eq :annual
23
+ end
24
+ it "should become monthly" do
25
+ scope = subject.collapse(:monthly)
26
+ scope.attributes[:collapse].should eq :monthly
27
+ end
28
+ it "should become weekly" do
29
+ scope = subject.collapse('weekly')
30
+ scope.attributes[:collapse].should eq :weekly
31
+ end
32
+ end
33
+
34
+ end
@@ -0,0 +1,41 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Dataset do
5
+
6
+ let(:dataset){ create(:dataset) }
7
+ subject { dataset }
8
+
9
+ it "should have columns" do
10
+ subject.columns.count.should eq 4
11
+ end
12
+
13
+ describe "#data" do
14
+
15
+ subject{ dataset.data }
16
+
17
+ its(:class){ should eq Quandl::Cassandra::Data::ScopeScope }
18
+
19
+ it "should have data" do
20
+ subject.count.should eq 60
21
+ end
22
+
23
+ it "should collapse to daily" do
24
+ subject.collapse(:daily).count.should eq 60
25
+ end
26
+
27
+ it "should collapse to weekly" do
28
+ subject.collapse(:weekly).count.should be < 12
29
+ end
30
+
31
+ it "should collapse to monthly" do
32
+ subject.collapse(:monthly).count.should be < 4
33
+ end
34
+
35
+ it "should collapse to annual" do
36
+ subject.collapse(:annual).count.should eq 1
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Dataset do
5
+
6
+ context "column" do
7
+
8
+ subject { create(:dataset) }
9
+
10
+ it "should return first column" do
11
+ subject.data.column(1)[0][1].should eq subject.data_scope.to_table[0][1]
12
+ end
13
+
14
+ it "should return second column" do
15
+ # binding.pry
16
+ subject.data.column(2)[0][1].should eq subject.data_scope.to_table[0][2]
17
+ end
18
+
19
+ it "should return third column" do
20
+ subject.data.column(2)[0][1].should eq subject.data_scope.to_table[0][2]
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Dataset do
5
+
6
+ let(:data){ Quandl::Fabricate::Data.rand( rows: 4, columns: 4, nils: false ) }
7
+ let(:dataset){ create( :dataset, data: data ) }
8
+
9
+ subject{ dataset }
10
+
11
+ its(:dataset_attribute){ should be_a DatasetAttribute }
12
+ its(:trim_start){ should eq Dataset.find(dataset.id).data[-1][0] }
13
+ its(:trim_end){ should eq Dataset.find(dataset.id).data[0][0] }
14
+ its(:updated_at){ should_not be_nil }
15
+
16
+ context "after save" do
17
+ before(:each){
18
+ @previously_updated_at = subject.updated_at
19
+ subject.save
20
+ }
21
+ its(:updated_at){ should_not eq @previously_updated_at }
22
+ end
23
+
24
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Dataset do
5
+
6
+ let(:data){ Quandl::Fabricate::Data.rand( rows: 48, columns: 2, frequency: :monthly, nils: false ).to_csv }
7
+ let(:dataset){ create( :dataset, data: data ) }
8
+ subject { dataset }
9
+
10
+ it "should be monthly" do
11
+ subject.frequency.should eq 'monthly'
12
+ end
13
+
14
+ it "should pluck the second row" do
15
+ subject.data_scope.row(1).to_a.should eq [subject.data_scope[1]]
16
+ end
17
+
18
+ it "should collapse and pluck the second year" do
19
+ subject.data_scope.collapse(:annual).row(2).to_a.should eq [subject.data_scope.collapse(:annual)[2]]
20
+ end
21
+
22
+ it "should collapse and pluck the second year with a transformation" do
23
+ subject.data_scope.collapse(:annual).transform(:rdiff).row(2).to_a.should eq [subject.data_scope.collapse(:annual).transform(:rdiff)[2]]
24
+ end
25
+
26
+ end
@@ -0,0 +1,16 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Dataset do
5
+
6
+ let(:data){ Quandl::Data.new([ [1002, 10], [1001, 20], [1000, 30] ]) }
7
+ let(:dataset){ build(:dataset) }
8
+
9
+ it "should cumul data" do
10
+ dataset.data = data
11
+ dataset.save!
12
+ cumul_data = Dataset.find(dataset.id).data.transform(:cumul).to_table.data_array
13
+ cumul_data.should eq [ [1002, 60.0], [1001, 50.0], [1000, 30.0] ]
14
+ end
15
+
16
+ end
@@ -0,0 +1,74 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Dataset do
5
+
6
+ describe ".trim_start,.trim_end" do
7
+ { weekly: 7, monthly: 30, quarterly: 92, annual: 365 }.each do | collapse_to, occurences |
8
+ context "when collapsed to #{collapse_to}" do
9
+
10
+ subject(:dataset){ create(:dataset, data: Quandl::Fabricate::Data.rand( rows: occurences * 3, columns: 1, nils: false ).to_csv ) }
11
+
12
+ it "should include trim_start" do
13
+ # raw data
14
+ source_data = subject.data_scope.collapse(collapse_to).to_table
15
+ trim_start = source_data[-1][0]
16
+ # trim and check
17
+ trim_data = subject.data.trim_start( trim_start ).collapse(collapse_to).to_table
18
+ trim_data.last[0].should eq trim_start
19
+ end
20
+ it "should include trim_end" do
21
+ # raw data
22
+ source_data = subject.data_scope.collapse(collapse_to).to_table
23
+ trim_end = source_data[1][0]
24
+ # trim and check
25
+ trim_data = subject.data.trim_end( trim_end ).collapse(collapse_to).to_table
26
+ trim_data.first[0].should eq trim_end
27
+ end
28
+ it "should include trim_start and trim_end" do
29
+ # raw data
30
+ source_data = subject.data_scope.collapse(collapse_to).to_table
31
+ trim_end = source_data[1][0]
32
+ trim_start = source_data[-1][0]
33
+ # trim and check
34
+ trim_data = subject.data.trim_start( trim_start ).trim_end( trim_end ).collapse(collapse_to).to_table
35
+ trim_data.first[0].should eq trim_end
36
+ trim_data.last[0].should eq trim_start
37
+ end
38
+
39
+ [:diff, :rdiff, :cumul].each do |transformed_to|
40
+ context "when transformed to #{transformed_to}" do
41
+ it "should include trim_start" do
42
+ # raw data
43
+ source_data = subject.data_scope.collapse(collapse_to).transform(transformed_to).to_table
44
+ trim_start = source_data[-1][0]
45
+ # trim and check
46
+ trim_data = subject.data.trim_start( trim_start ).transform(transformed_to).collapse(collapse_to).to_table
47
+ trim_data.last[0].should eq trim_start
48
+ end
49
+ it "should include trim_end" do
50
+ # raw data
51
+ source_data = subject.data_scope.collapse(collapse_to).transform(transformed_to).to_table
52
+ trim_end = source_data[-1][0]
53
+ # trim and check
54
+ trim_data = subject.data.trim_end( trim_end ).transform(transformed_to).collapse(collapse_to).to_table
55
+ trim_data.first[0].should eq trim_end
56
+ end
57
+ it "should include trim_start and trim_end" do
58
+ source_data = subject.data_scope.collapse(collapse_to).transform(transformed_to).to_table
59
+ trim_end = source_data[1][0]
60
+ trim_start = source_data[-1][0]
61
+ # trim and check
62
+ trim_data = subject.data.trim_start( trim_start ).trim_end( trim_end ).transform(transformed_to).collapse(collapse_to).to_table
63
+ trim_data.first[0].should eq trim_end
64
+ trim_data.last[0].should eq trim_start
65
+ end
66
+ end # each transform
67
+
68
+ end
69
+
70
+ end # each collapse
71
+ end
72
+ end
73
+
74
+ end
@@ -0,0 +1,37 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Dataset do
5
+
6
+ context "update data with newer data" do
7
+
8
+ subject{ create(:dataset, data: Quandl::Fabricate::Data.rand( rows: 730, columns: 2, nils: false ).to_csv ) }
9
+
10
+ it "should update the collapse data" do
11
+ old_row = subject.data_scope[0]
12
+ old_row_month = subject.data_scope.collapse(:monthly)[0]
13
+ # update
14
+ dataset = Dataset.find(subject.id)
15
+ # advance data dates by 60 days
16
+ new_data = subject.data.collect{|r|
17
+ date = r[0] + 60
18
+ values = r[1..-1].collect{ rand(9102841).to_f / 1000 }
19
+ [date, values].flatten
20
+ }
21
+ # assign new data
22
+ dataset.data = new_data.to_a.collect{|r| r.to_csv }.join
23
+ dataset.save!
24
+
25
+ new_row = dataset.data_scope[0]
26
+ new_row_month = dataset.data_scope.collapse(:monthly)[0]
27
+
28
+ new_row[0].should_not eq new_row_month[0]
29
+ new_row[1].should eq new_row_month[1]
30
+
31
+ old_row[0].should_not eq new_row[0]
32
+ old_row_month[1].should_not eq new_row_month[1]
33
+ end
34
+
35
+ end
36
+
37
+ end