quandl_cassandra 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. data/.gitignore +7 -0
  2. data/Gemfile +2 -0
  3. data/LICENSE +7 -0
  4. data/README.md +7 -0
  5. data/Rakefile +11 -0
  6. data/UPGRADE.md +3 -0
  7. data/lib/quandl/cassandra/base/attributes.rb +103 -0
  8. data/lib/quandl/cassandra/base/callbacks.rb +15 -0
  9. data/lib/quandl/cassandra/base/connection.rb +49 -0
  10. data/lib/quandl/cassandra/base/logging.rb +40 -0
  11. data/lib/quandl/cassandra/base/naming.rb +19 -0
  12. data/lib/quandl/cassandra/base/persistence.rb +67 -0
  13. data/lib/quandl/cassandra/base/sanitization.rb +38 -0
  14. data/lib/quandl/cassandra/base/schema.rb +79 -0
  15. data/lib/quandl/cassandra/base/scoping.rb +122 -0
  16. data/lib/quandl/cassandra/base.rb +51 -0
  17. data/lib/quandl/cassandra/configuration.rb +34 -0
  18. data/lib/quandl/cassandra/error.rb +10 -0
  19. data/lib/quandl/cassandra/types/abstract_type.rb +33 -0
  20. data/lib/quandl/cassandra/types/boolean_type.rb +10 -0
  21. data/lib/quandl/cassandra/types/decimal_type.rb +9 -0
  22. data/lib/quandl/cassandra/types/double_type.rb +9 -0
  23. data/lib/quandl/cassandra/types/float_type.rb +9 -0
  24. data/lib/quandl/cassandra/types/integer_type.rb +9 -0
  25. data/lib/quandl/cassandra/types/long_type.rb +9 -0
  26. data/lib/quandl/cassandra/types/timestamp_type.rb +15 -0
  27. data/lib/quandl/cassandra/types/utf8_type.rb +13 -0
  28. data/lib/quandl/cassandra/types/uuid_type.rb +21 -0
  29. data/lib/quandl/cassandra/types.rb +42 -0
  30. data/lib/quandl/cassandra/version.rb +5 -0
  31. data/lib/quandl/cassandra.rb +30 -0
  32. data/lib/quandl/cassandra_models/column/read/collapse.rb +64 -0
  33. data/lib/quandl/cassandra_models/column/read/column.rb +18 -0
  34. data/lib/quandl/cassandra_models/column/read/data_table.rb +57 -0
  35. data/lib/quandl/cassandra_models/column/read/offset.rb +114 -0
  36. data/lib/quandl/cassandra_models/column/read/query.rb +55 -0
  37. data/lib/quandl/cassandra_models/column/read/row.rb +20 -0
  38. data/lib/quandl/cassandra_models/column/read/transform.rb +53 -0
  39. data/lib/quandl/cassandra_models/column/read/type.rb +25 -0
  40. data/lib/quandl/cassandra_models/column/read.rb +28 -0
  41. data/lib/quandl/cassandra_models/column/write/group_data_by_column.rb +42 -0
  42. data/lib/quandl/cassandra_models/column/write/group_data_by_frequency.rb +24 -0
  43. data/lib/quandl/cassandra_models/column/write/insert_columns.rb +22 -0
  44. data/lib/quandl/cassandra_models/column/write/insert_data.rb +39 -0
  45. data/lib/quandl/cassandra_models/column/write.rb +22 -0
  46. data/lib/quandl/cassandra_models/column.rb +20 -0
  47. data/lib/quandl/cassandra_models/column_attribute.rb +11 -0
  48. data/lib/quandl/cassandra_models/data.rb +52 -0
  49. data/lib/quandl/cassandra_models/dataset.rb +83 -0
  50. data/lib/quandl/cassandra_models/dataset_attribute.rb +6 -0
  51. data/lib/quandl/cassandra_models/multiset.rb +50 -0
  52. data/lib/quandl/strategy.rb +59 -0
  53. data/quandl_cassandra.gemspec +35 -0
  54. data/spec/expectations/string.rb +5 -0
  55. data/spec/expectations/time.rb +5 -0
  56. data/spec/factories/dataset.rb +8 -0
  57. data/spec/lib/quandl/cassandra/base/scoping_spec.rb +40 -0
  58. data/spec/lib/quandl/cassandra_models/column/write/group_data_by_frequency_spec.rb +28 -0
  59. data/spec/lib/quandl/cassandra_models/column/write_spec.rb +15 -0
  60. data/spec/lib/quandl/cassandra_models/column_attribute_spec.rb +16 -0
  61. data/spec/lib/quandl/cassandra_models/column_spec.rb +17 -0
  62. data/spec/lib/quandl/cassandra_models/data_spec.rb +34 -0
  63. data/spec/lib/quandl/cassandra_models/dataset/collapse_spec.rb +41 -0
  64. data/spec/lib/quandl/cassandra_models/dataset/column_spec.rb +25 -0
  65. data/spec/lib/quandl/cassandra_models/dataset/persistence_spec.rb +24 -0
  66. data/spec/lib/quandl/cassandra_models/dataset/row_spec.rb +26 -0
  67. data/spec/lib/quandl/cassandra_models/dataset/transform_spec.rb +16 -0
  68. data/spec/lib/quandl/cassandra_models/dataset/trim_spec.rb +74 -0
  69. data/spec/lib/quandl/cassandra_models/dataset/update_spec.rb +37 -0
  70. data/spec/lib/quandl/cassandra_models/dataset_attribute_spec.rb +18 -0
  71. data/spec/lib/quandl/cassandra_models/dataset_spec.rb +63 -0
  72. data/spec/lib/quandl/cassandra_models/multiset/collapse_spec.rb +122 -0
  73. data/spec/lib/quandl/cassandra_models/multiset/columns_spec.rb +57 -0
  74. data/spec/lib/quandl/cassandra_models/multiset/data_spec.rb +25 -0
  75. data/spec/lib/quandl/cassandra_models/multiset/transform_spec.rb +68 -0
  76. data/spec/lib/quandl/cassandra_spec.rb +12 -0
  77. data/spec/spec_helper.rb +37 -0
  78. metadata +339 -0
@@ -0,0 +1,35 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "quandl/cassandra/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "quandl_cassandra"
7
+ s.version = Quandl::Cassandra::VERSION
8
+ s.authors = ["Blake Hilscher"]
9
+ s.email = ["blake@hilscher.ca"]
10
+ s.homepage = "http://blake.hilscher.ca/"
11
+ s.license = "MIT"
12
+ s.summary = "Quandl cassandra interface."
13
+ s.description = "Quandl cassandra interface. CQL."
14
+
15
+ s.files = `git ls-files`.split("\n")
16
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
17
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
+ s.require_paths = ["lib"]
19
+
20
+ s.add_development_dependency "rake", "~> 10.0"
21
+ s.add_development_dependency "rspec", "~> 2.13"
22
+ s.add_development_dependency "factory_girl_rails"
23
+ s.add_development_dependency "fivemat", "~> 1.2"
24
+ s.add_development_dependency "pry"
25
+
26
+ s.add_runtime_dependency "activesupport", ">= 3.0.0"
27
+ s.add_runtime_dependency "activemodel", ">= 3.0.0"
28
+
29
+ s.add_runtime_dependency "scope_composer", "~> 0.3"
30
+ s.add_runtime_dependency "quandl_data", "~> 1.0"
31
+ s.add_runtime_dependency "quandl_logger", "~> 0.1"
32
+ s.add_runtime_dependency "quandl_operation", "~> 0.1"
33
+ s.add_runtime_dependency 'cql-rb', '1.1.0.rc0'
34
+
35
+ end
@@ -0,0 +1,5 @@
1
+ RSpec::Matchers.define :be_same_string_as do |expected|
2
+ match do |actual|
3
+ actual.to_s == expected.to_s
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ RSpec::Matchers.define :be_same_second_as do |expected|
2
+ match do |actual|
3
+ actual.to_i == expected.to_i
4
+ end
5
+ end
@@ -0,0 +1,8 @@
1
+ FactoryGirl.define do
2
+
3
+ factory :dataset, class: Quandl::Cassandra::Dataset do
4
+ sequence(:id) { |n| "#{(Time.now.to_f * 1000).to_i}#{n}".to_i }
5
+ data { Quandl::Fabricate::Data.rand( rows: 60, columns: 4, nils: false ).to_csv }
6
+ end
7
+
8
+ end
@@ -0,0 +1,40 @@
1
+ require 'spec_helper'
2
+
3
+ class TestModel < Quandl::Cassandra::Base; end
4
+
5
+ describe Quandl::Cassandra::Base::Scoping do
6
+
7
+ subject{ TestModel }
8
+
9
+ [:where, :select, :limit, :order].each do |name|
10
+ it{ should respond_to name }
11
+ end
12
+
13
+ describe ".to_cql" do
14
+
15
+ let(:scope) { TestModel.scope.new }
16
+ subject{ scope.to_cql }
17
+
18
+ describe "#where" do
19
+ before(:each){ scope.where( id: 1 ) }
20
+ it{ should eq "SELECT * FROM test_models WHERE id = 1" }
21
+
22
+ describe "#limit" do
23
+ before(:each){ scope.limit( 10 ) }
24
+ it{ should eq "SELECT * FROM test_models WHERE id = 1 LIMIT 10" }
25
+
26
+ describe "#select" do
27
+ before(:each){ scope.select( :name, :value ) }
28
+ it{ should eq "SELECT name,value FROM test_models WHERE id = 1 LIMIT 10" }
29
+
30
+ describe "#order" do
31
+ before(:each){ scope.order("name DESC") }
32
+ it{ should eq "SELECT name,value FROM test_models WHERE id = 1 ORDER BY name DESC LIMIT 10" }
33
+ end
34
+
35
+ end
36
+ end
37
+ end
38
+ end
39
+
40
+ end
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Column::Write::GroupDataByFrequency do
5
+
6
+ let(:data){ Quandl::Fabricate::Data.rand( columns: 4, rows: 12, nils: false, frequency: :weekly ) }
7
+ let(:strategy){ Quandl::Cassandra::Column::Write::GroupDataByFrequency.new({ data: data }) }
8
+
9
+ describe "#frequency_data" do
10
+ subject{ strategy.frequency_data }
11
+ its(:length){ should eq 4 }
12
+
13
+ it "should have source" do
14
+ subject[:source].count.should eq 12
15
+ end
16
+ it "should have monthly" do
17
+ subject[:monthly].count.should <= 4
18
+ end
19
+ it "should have quarterly" do
20
+ subject[:quarterly].count.should <= 2
21
+ end
22
+ it "should have annual" do
23
+ subject[:annual].count.should eq 1
24
+ end
25
+
26
+ end
27
+
28
+ end
@@ -0,0 +1,15 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Column::Write do
5
+
6
+ let(:id){ rand(10000*10000) + 10000*10000 }
7
+ let(:data){ Quandl::Fabricate::Data.rand( columns: 4, rows: 12, nils: false, frequency: :weekly ) }
8
+
9
+ it "should not alter the original data" do
10
+ source_data = data.to_a.collect{|r| r.clone }
11
+ r = Quandl::Cassandra::Column.write( id: id, data: data )
12
+ data.should eq source_data
13
+ end
14
+
15
+ end
@@ -0,0 +1,16 @@
1
+ require 'spec_helper'
2
+
3
+ describe Quandl::Cassandra::ColumnAttribute do
4
+
5
+ let(:id){ rand(10000*10000) + 10000*10000 }
6
+ let(:data){ Quandl::Fabricate::Data.rand( rows: 10, columns: 2 ) }
7
+ let(:dataset){ Quandl::Cassandra::Dataset.create( id: id, data: data ) }
8
+
9
+ let(:column){ Quandl::Cassandra::ColumnAttribute.find(dataset.column_ids.first) }
10
+
11
+ subject{ column }
12
+
13
+ its(:id){ should eq dataset.column_ids.first }
14
+ its(:frequency){ should eq 'daily' }
15
+
16
+ end
@@ -0,0 +1,17 @@
1
+ require 'spec_helper'
2
+
3
+ describe Quandl::Cassandra::Column do
4
+
5
+ let(:id){ rand(10000*10000) + 10000*10000 }
6
+ let(:data){ Quandl::Fabricate::Data.rand( rows: 10, columns: 2 ) }
7
+ let(:dataset){ Quandl::Cassandra::Dataset.create( id: id, data: data ) }
8
+
9
+ describe ".read" do
10
+ before(:each){ Quandl::Cassandra::Column.write( id: id, data: data ); sleep(0.2) }
11
+
12
+ subject{ Quandl::Cassandra::Column.read( id: id ) }
13
+ its(:count){ should eq 10 }
14
+ it{ should eq data }
15
+ end
16
+
17
+ end
@@ -0,0 +1,34 @@
1
+ require 'spec_helper'
2
+
3
+ describe Quandl::Cassandra::Data do
4
+
5
+ let(:id){ rand(10000*10000) + 10000*10000 }
6
+ let(:data){ Quandl::Fabricate::Data.rand(rows: 10, columns: 2, nils: false) }
7
+ let(:dataset){ Quandl::Cassandra::Dataset.create( id: id, data: data ) }
8
+ subject{ Quandl::Cassandra::Data }
9
+
10
+ [:row, :id, :limit, :offset, :column, :accuracy, :frequency, :column_ids,
11
+ :collapse, :transform, :order, :trim_start, :trim_end ].each do |name|
12
+ it{ should respond_to name }
13
+ end
14
+
15
+ describe "#collapse" do
16
+ it "should become annual" do
17
+ scope = subject.collapse(:annual)
18
+ scope.attributes[:collapse].should eq :annual
19
+ end
20
+ it "should become annual given string" do
21
+ scope = subject.collapse('annual')
22
+ scope.attributes[:collapse].should eq :annual
23
+ end
24
+ it "should become monthly" do
25
+ scope = subject.collapse(:monthly)
26
+ scope.attributes[:collapse].should eq :monthly
27
+ end
28
+ it "should become weekly" do
29
+ scope = subject.collapse('weekly')
30
+ scope.attributes[:collapse].should eq :weekly
31
+ end
32
+ end
33
+
34
+ end
@@ -0,0 +1,41 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Dataset do
5
+
6
+ let(:dataset){ create(:dataset) }
7
+ subject { dataset }
8
+
9
+ it "should have columns" do
10
+ subject.columns.count.should eq 4
11
+ end
12
+
13
+ describe "#data" do
14
+
15
+ subject{ dataset.data }
16
+
17
+ its(:class){ should eq Quandl::Cassandra::Data::ScopeScope }
18
+
19
+ it "should have data" do
20
+ subject.count.should eq 60
21
+ end
22
+
23
+ it "should collapse to daily" do
24
+ subject.collapse(:daily).count.should eq 60
25
+ end
26
+
27
+ it "should collapse to weekly" do
28
+ subject.collapse(:weekly).count.should be < 12
29
+ end
30
+
31
+ it "should collapse to monthly" do
32
+ subject.collapse(:monthly).count.should be < 4
33
+ end
34
+
35
+ it "should collapse to annual" do
36
+ subject.collapse(:annual).count.should eq 1
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Dataset do
5
+
6
+ context "column" do
7
+
8
+ subject { create(:dataset) }
9
+
10
+ it "should return first column" do
11
+ subject.data.column(1)[0][1].should eq subject.data_scope.to_table[0][1]
12
+ end
13
+
14
+ it "should return second column" do
15
+ # binding.pry
16
+ subject.data.column(2)[0][1].should eq subject.data_scope.to_table[0][2]
17
+ end
18
+
19
+ it "should return third column" do
20
+ subject.data.column(2)[0][1].should eq subject.data_scope.to_table[0][2]
21
+ end
22
+
23
+ end
24
+
25
+ end
@@ -0,0 +1,24 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Dataset do
5
+
6
+ let(:data){ Quandl::Fabricate::Data.rand( rows: 4, columns: 4, nils: false ) }
7
+ let(:dataset){ create( :dataset, data: data ) }
8
+
9
+ subject{ dataset }
10
+
11
+ its(:dataset_attribute){ should be_a DatasetAttribute }
12
+ its(:trim_start){ should eq Dataset.find(dataset.id).data[-1][0] }
13
+ its(:trim_end){ should eq Dataset.find(dataset.id).data[0][0] }
14
+ its(:updated_at){ should_not be_nil }
15
+
16
+ context "after save" do
17
+ before(:each){
18
+ @previously_updated_at = subject.updated_at
19
+ subject.save
20
+ }
21
+ its(:updated_at){ should_not eq @previously_updated_at }
22
+ end
23
+
24
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Dataset do
5
+
6
+ let(:data){ Quandl::Fabricate::Data.rand( rows: 48, columns: 2, frequency: :monthly, nils: false ).to_csv }
7
+ let(:dataset){ create( :dataset, data: data ) }
8
+ subject { dataset }
9
+
10
+ it "should be monthly" do
11
+ subject.frequency.should eq 'monthly'
12
+ end
13
+
14
+ it "should pluck the second row" do
15
+ subject.data_scope.row(1).to_a.should eq [subject.data_scope[1]]
16
+ end
17
+
18
+ it "should collapse and pluck the second year" do
19
+ subject.data_scope.collapse(:annual).row(2).to_a.should eq [subject.data_scope.collapse(:annual)[2]]
20
+ end
21
+
22
+ it "should collapse and pluck the second year with a transformation" do
23
+ subject.data_scope.collapse(:annual).transform(:rdiff).row(2).to_a.should eq [subject.data_scope.collapse(:annual).transform(:rdiff)[2]]
24
+ end
25
+
26
+ end
@@ -0,0 +1,16 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Dataset do
5
+
6
+ let(:data){ Quandl::Data.new([ [1002, 10], [1001, 20], [1000, 30] ]) }
7
+ let(:dataset){ build(:dataset) }
8
+
9
+ it "should cumul data" do
10
+ dataset.data = data
11
+ dataset.save!
12
+ cumul_data = Dataset.find(dataset.id).data.transform(:cumul).to_table.data_array
13
+ cumul_data.should eq [ [1002, 60.0], [1001, 50.0], [1000, 30.0] ]
14
+ end
15
+
16
+ end
@@ -0,0 +1,74 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Quandl::Cassandra::Dataset do
5
+
6
+ describe ".trim_start,.trim_end" do
7
+ { weekly: 7, monthly: 30, quarterly: 92, annual: 365 }.each do | collapse_to, occurences |
8
+ context "when collapsed to #{collapse_to}" do
9
+
10
+ subject(:dataset){ create(:dataset, data: Quandl::Fabricate::Data.rand( rows: occurences * 3, columns: 1, nils: false ).to_csv ) }
11
+
12
+ it "should include trim_start" do
13
+ # raw data
14
+ source_data = subject.data_scope.collapse(collapse_to).to_table
15
+ trim_start = source_data[-1][0]
16
+ # trim and check
17
+ trim_data = subject.data.trim_start( trim_start ).collapse(collapse_to).to_table
18
+ trim_data.last[0].should eq trim_start
19
+ end
20
+ it "should include trim_end" do
21
+ # raw data
22
+ source_data = subject.data_scope.collapse(collapse_to).to_table
23
+ trim_end = source_data[1][0]
24
+ # trim and check
25
+ trim_data = subject.data.trim_end( trim_end ).collapse(collapse_to).to_table
26
+ trim_data.first[0].should eq trim_end
27
+ end
28
+ it "should include trim_start and trim_end" do
29
+ # raw data
30
+ source_data = subject.data_scope.collapse(collapse_to).to_table
31
+ trim_end = source_data[1][0]
32
+ trim_start = source_data[-1][0]
33
+ # trim and check
34
+ trim_data = subject.data.trim_start( trim_start ).trim_end( trim_end ).collapse(collapse_to).to_table
35
+ trim_data.first[0].should eq trim_end
36
+ trim_data.last[0].should eq trim_start
37
+ end
38
+
39
+ [:diff, :rdiff, :cumul].each do |transformed_to|
40
+ context "when transformed to #{transformed_to}" do
41
+ it "should include trim_start" do
42
+ # raw data
43
+ source_data = subject.data_scope.collapse(collapse_to).transform(transformed_to).to_table
44
+ trim_start = source_data[-1][0]
45
+ # trim and check
46
+ trim_data = subject.data.trim_start( trim_start ).transform(transformed_to).collapse(collapse_to).to_table
47
+ trim_data.last[0].should eq trim_start
48
+ end
49
+ it "should include trim_end" do
50
+ # raw data
51
+ source_data = subject.data_scope.collapse(collapse_to).transform(transformed_to).to_table
52
+ trim_end = source_data[-1][0]
53
+ # trim and check
54
+ trim_data = subject.data.trim_end( trim_end ).transform(transformed_to).collapse(collapse_to).to_table
55
+ trim_data.first[0].should eq trim_end
56
+ end
57
+ it "should include trim_start and trim_end" do
58
+ source_data = subject.data_scope.collapse(collapse_to).transform(transformed_to).to_table
59
+ trim_end = source_data[1][0]
60
+ trim_start = source_data[-1][0]
61
+ # trim and check
62
+ trim_data = subject.data.trim_start( trim_start ).trim_end( trim_end ).transform(transformed_to).collapse(collapse_to).to_table
63
+ trim_data.first[0].should eq trim_end
64
+ trim_data.last[0].should eq trim_start
65
+ end
66
+ end # each transform
67
+
68
+ end
69
+
70
+ end # each collapse
71
+ end
72
+ end
73
+
74
+ end
@@ -0,0 +1,37 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe Dataset do
5
+
6
+ context "update data with newer data" do
7
+
8
+ subject{ create(:dataset, data: Quandl::Fabricate::Data.rand( rows: 730, columns: 2, nils: false ).to_csv ) }
9
+
10
+ it "should update the collapse data" do
11
+ old_row = subject.data_scope[0]
12
+ old_row_month = subject.data_scope.collapse(:monthly)[0]
13
+ # update
14
+ dataset = Dataset.find(subject.id)
15
+ # advance data dates by 60 days
16
+ new_data = subject.data.collect{|r|
17
+ date = r[0] + 60
18
+ values = r[1..-1].collect{ rand(9102841).to_f / 1000 }
19
+ [date, values].flatten
20
+ }
21
+ # assign new data
22
+ dataset.data = new_data.to_a.collect{|r| r.to_csv }.join
23
+ dataset.save!
24
+
25
+ new_row = dataset.data_scope[0]
26
+ new_row_month = dataset.data_scope.collapse(:monthly)[0]
27
+
28
+ new_row[0].should_not eq new_row_month[0]
29
+ new_row[1].should eq new_row_month[1]
30
+
31
+ old_row[0].should_not eq new_row[0]
32
+ old_row_month[1].should_not eq new_row_month[1]
33
+ end
34
+
35
+ end
36
+
37
+ end