quandl_cassandra_models 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE +7 -0
  5. data/README.md +7 -0
  6. data/Rakefile +11 -0
  7. data/UPGRADE.md +34 -0
  8. data/config/cassandra.yml +41 -0
  9. data/lib/quandl/cassandra/models.rb +16 -0
  10. data/lib/quandl/cassandra/models/column.rb +42 -0
  11. data/lib/quandl/cassandra/models/column/read.rb +49 -0
  12. data/lib/quandl/cassandra/models/column/read/collapse.rb +41 -0
  13. data/lib/quandl/cassandra/models/column/read/column.rb +19 -0
  14. data/lib/quandl/cassandra/models/column/read/data.rb +59 -0
  15. data/lib/quandl/cassandra/models/column/read/offset.rb +104 -0
  16. data/lib/quandl/cassandra/models/column/read/row.rb +20 -0
  17. data/lib/quandl/cassandra/models/column/read/select_columns.rb +63 -0
  18. data/lib/quandl/cassandra/models/column/read/transform.rb +53 -0
  19. data/lib/quandl/cassandra/models/column/read/trim.rb +14 -0
  20. data/lib/quandl/cassandra/models/column/read/type.rb +25 -0
  21. data/lib/quandl/cassandra/models/column/write.rb +25 -0
  22. data/lib/quandl/cassandra/models/column/write/group_data_by_column.rb +36 -0
  23. data/lib/quandl/cassandra/models/column/write/group_data_by_frequency.rb +24 -0
  24. data/lib/quandl/cassandra/models/column/write/insert_column_attributes.rb +22 -0
  25. data/lib/quandl/cassandra/models/column/write/insert_columns.rb +9 -0
  26. data/lib/quandl/cassandra/models/column_attribute.rb +11 -0
  27. data/lib/quandl/cassandra/models/data.rb +18 -0
  28. data/lib/quandl/cassandra/models/data/search.rb +105 -0
  29. data/lib/quandl/cassandra/models/dataset.rb +87 -0
  30. data/lib/quandl/cassandra/models/dataset/columns.rb +63 -0
  31. data/lib/quandl/cassandra/models/dataset_attribute.rb +6 -0
  32. data/lib/quandl/cassandra/models/multiset.rb +55 -0
  33. data/lib/quandl/cassandra/models/version.rb +7 -0
  34. data/migrations/20131105204200_create_datasets.rb +18 -0
  35. data/migrations/20131105204201_create_columns.rb +18 -0
  36. data/migrations/20131105204202_create_dataset_attributes.rb +17 -0
  37. data/migrations/20131105204203_create_column_attributes.rb +17 -0
  38. data/quandl_cassandra_models.gemspec +28 -0
  39. data/spec/expectations/string.rb +5 -0
  40. data/spec/expectations/time.rb +5 -0
  41. data/spec/factories/dataset.rb +8 -0
  42. data/spec/lib/quandl/cassandra/models/column/read_spec.rb +27 -0
  43. data/spec/lib/quandl/cassandra/models/column/write/group_data_by_frequency_spec.rb +28 -0
  44. data/spec/lib/quandl/cassandra/models/column/write_spec.rb +23 -0
  45. data/spec/lib/quandl/cassandra/models/column_attribute_spec.rb +16 -0
  46. data/spec/lib/quandl/cassandra/models/column_spec.rb +17 -0
  47. data/spec/lib/quandl/cassandra/models/data_spec.rb +105 -0
  48. data/spec/lib/quandl/cassandra/models/dataset/collapse_spec.rb +44 -0
  49. data/spec/lib/quandl/cassandra/models/dataset/column_spec.rb +24 -0
  50. data/spec/lib/quandl/cassandra/models/dataset/persistence_spec.rb +25 -0
  51. data/spec/lib/quandl/cassandra/models/dataset/row_spec.rb +26 -0
  52. data/spec/lib/quandl/cassandra/models/dataset/transform_spec.rb +16 -0
  53. data/spec/lib/quandl/cassandra/models/dataset/trim_spec.rb +74 -0
  54. data/spec/lib/quandl/cassandra/models/dataset/update_spec.rb +37 -0
  55. data/spec/lib/quandl/cassandra/models/dataset_attribute_spec.rb +18 -0
  56. data/spec/lib/quandl/cassandra/models/dataset_spec.rb +117 -0
  57. data/spec/lib/quandl/cassandra/models/multiset/collapse_spec.rb +122 -0
  58. data/spec/lib/quandl/cassandra/models/multiset/columns_spec.rb +57 -0
  59. data/spec/lib/quandl/cassandra/models/multiset/data_spec.rb +25 -0
  60. data/spec/lib/quandl/cassandra/models/multiset/transform_spec.rb +69 -0
  61. data/spec/spec_helper.rb +40 -0
  62. data/tasks/migrations.rake +14 -0
  63. metadata +212 -0
@@ -0,0 +1,20 @@
1
+ class Quandl::Cassandra::Models::Column::Read::Row < Quandl::Cassandra::Models::Column::Read
2
+
3
+ def perform
4
+ return unless attributes[:row].present?
5
+ # ensure integer
6
+ row = attributes[:row]
7
+ # negative value needs inversion
8
+ if row < 0
9
+ attributes[:order] = :asc
10
+ attributes[:offset] = (row * -1) - 1
11
+ attributes[:limit] = 1
12
+ # postive value is an offset of current observation
13
+ else
14
+ attributes[:order] = :desc
15
+ attributes[:offset] = row
16
+ attributes[:limit] = 1
17
+ end
18
+ end
19
+
20
+ end
@@ -0,0 +1,63 @@
1
+ class Quandl::Cassandra::Models::Column::Read::SelectColumns < Quandl::Cassandra::Models::Column::Read
2
+
3
+ def perform
4
+ # attrs to result hash
5
+ attributes[:data] = count? ? count_data : select_data
6
+ rescue => e
7
+ Quandl::Logger.error("#{attributes} #{e}")
8
+ raise
9
+ end
10
+
11
+ def count_data
12
+ prepared = Quandl::Cassandra::Base.prepare( statement )
13
+ rows = []
14
+ column_ids.each_with_index do | id, index |
15
+ # pluck column type from collapses
16
+ type = attributes[:column_collapses][index].to_s
17
+ # bind and execute query
18
+ rows << prepared.execute( id, type, :one )
19
+ end
20
+ rows.collect{|r| r.first['count'] }.max
21
+ end
22
+
23
+ def select_data
24
+ t1 = Time.now
25
+ # fire off the queries
26
+ prepared = Quandl::Cassandra::Base.prepare( statement )
27
+ data = {}
28
+ futures = []
29
+ column_ids.each_with_index do | id, index |
30
+ # pluck column type from collapses
31
+ type = attributes[:column_collapses][index].to_s
32
+ # bind and execute query
33
+ futures << prepared.async.execute( id, type, Quandl::Cassandra::Base.consistency )
34
+ end
35
+ # collect the results
36
+ futures.each_with_index do |future, index|
37
+ # collect result
38
+ future.value.each do |row|
39
+ data[row['time']] ||= Array.new( column_ids.count )
40
+ data[row['time']][index] ||= row['value']
41
+ end
42
+ end
43
+ Quandl::Logger.debug("(#{t1.elapsed_ms}) #{self.class.name}.select_data")
44
+ data
45
+ end
46
+
47
+ def statement
48
+ columns = count? ? "COUNT(*)" : "time,value"
49
+ cql = "SELECT #{columns} FROM columns WHERE"
50
+ # cql += "ORDER"
51
+ cql += " time >= #{attributes[:trim_start]} AND " if attributes[:trim_start]
52
+ cql += " time <= #{attributes[:trim_end]} AND " if attributes[:trim_end]
53
+ cql += " id = ? AND type = ?"
54
+ cql += " ORDER BY type #{order}"
55
+ cql += " LIMIT #{attributes[:limit]}" if attributes[:limit]
56
+ cql
57
+ end
58
+
59
+ def order
60
+ @order ||= attributes[:order] == :asc ? :asc : :desc
61
+ end
62
+
63
+ end
@@ -0,0 +1,53 @@
1
+ class Quandl::Cassandra::Models::Column::Read::Transform < Quandl::Cassandra::Models::Column::Read
2
+
3
+ def perform
4
+ return unless attributes[:transform].present?
5
+ # apply
6
+ apply_rdiff
7
+ apply_cumul
8
+ end
9
+
10
+ def apply_cumul
11
+ # rdiff_from needs the data from the current to offset
12
+ return unless transform?( :cumul ) && limit.present?
13
+ # data table needs to handle the offset since cumul requires all data
14
+ attributes[:data_table] = Quandl::Cassandra::Models::Column::Read::Offset.call(attributes)
15
+ # the query should not limit or offset the data
16
+ attributes[:row] = nil
17
+ attributes[:limit] = nil
18
+ attributes[:offset] = nil
19
+ end
20
+
21
+ def apply_rdiff
22
+ return unless transform? :rdiff, :diff
23
+ # limit requires limit
24
+ attributes[:limit] = attributes[:limit] + 1 if attributes[:limit]
25
+ # trims should be increased by one
26
+ attributes[:trim_start] = trim_start.occurrences_of_frequency_ago( 1, attributes[:collapse] ).jd if trim_start.present? && order == :desc
27
+ attributes[:trim_end] = trim_end.occurrences_of_frequency_ahead( 1, attributes[:collapse] ).jd if trim_end.present? && order == :asc
28
+ end
29
+
30
+ def transform?(*keys)
31
+ keys.each do |key|
32
+ return true if key == attributes[:transform]
33
+ end
34
+ false
35
+ end
36
+
37
+ def trim_end
38
+ @trim_end ||= attributes[:trim_end].present? ? Date.jd( attributes[:trim_end] ) : nil
39
+ end
40
+
41
+ def trim_start
42
+ @trim_start ||= attributes[:trim_start].present? ? Date.jd( attributes[:trim_start] ) : nil
43
+ end
44
+
45
+ def limit
46
+ attributes[:limit]
47
+ end
48
+
49
+ def order
50
+ @order ||= attributes[:order] == :asc ? :asc : :desc
51
+ end
52
+
53
+ end
@@ -0,0 +1,14 @@
1
+ class Quandl::Cassandra::Models::Column::Read::Trim < Quandl::Cassandra::Models::Column::Read
2
+
3
+ def perform
4
+ # if both are provided
5
+ if attributes[:trim_end].is_a?(Integer) && attributes[:trim_start].is_a?(Integer)
6
+ # if trim_start exceeds trim_end
7
+ if attributes[:trim_start] > attributes[:trim_end]
8
+ # then set trim_end to trim_start, since they are requesting nothing
9
+ attributes[:trim_end] = attributes[:trim_start]
10
+ end
11
+ end
12
+ end
13
+
14
+ end
@@ -0,0 +1,25 @@
1
+ class Quandl::Cassandra::Models::Column::Read::Type < Quandl::Cassandra::Models::Column::Read
2
+
3
+ def perform
4
+ # enforce types
5
+ attributes.each do |key, value|
6
+ attributes[key] = enforce_type(key, value)
7
+ end
8
+ # retain pristine copy of attrs
9
+ attributes[:pristine] ||= attributes.clone
10
+ end
11
+
12
+ def enforce_type(key, value)
13
+ case key
14
+ when :limit, :column, :trim_start, :trim_end, :offset, :accuracy, :row
15
+ return value.try(:to_i)
16
+ when :collapse, :transform, :frequency
17
+ return value.try(:to_sym)
18
+ when :order
19
+ return value.try(:to_sym) == :asc ? :asc : :desc
20
+ else
21
+ value
22
+ end
23
+ end
24
+
25
+ end
@@ -0,0 +1,25 @@
1
+ class Quandl::Cassandra::Models::Column::Write < Quandl::Strategy::Strategize
2
+
3
+ # strategy attributes
4
+ define_attributes :id, :data, :frequency, :column_ids, :frequency_data, :frequency_column_data, :statement_values
5
+
6
+ require_relative 'write/insert_columns'
7
+ require_relative 'write/insert_column_attributes'
8
+ require_relative 'write/group_data_by_column'
9
+ require_relative 'write/group_data_by_frequency'
10
+
11
+ # execute strategy
12
+ def self.perform(attributes)
13
+ strategy = Quandl::Strategy.new( attributes ) do |c|
14
+ c.use Quandl::Cassandra::Models::Column::Write::GroupDataByFrequency
15
+ c.use Quandl::Cassandra::Models::Column::Write::GroupDataByColumn
16
+ c.use Quandl::Cassandra::Models::Column::Write::InsertColumns
17
+ c.use Quandl::Cassandra::Models::Column::Write::InsertColumnAttributes
18
+ end
19
+ strategy.perform
20
+ rescue => e
21
+ Quandl::Logger.error("#{e} #{strategy.attributes}")
22
+ raise
23
+ end
24
+
25
+ end
@@ -0,0 +1,36 @@
1
+ class Quandl::Cassandra::Models::Column::Write::GroupDataByColumn < Quandl::Cassandra::Models::Column::Write
2
+
3
+ # INPUTS
4
+ # { source: [ [1,2,3], [2,4,8], ... ], weekly: ... }
5
+
6
+ def perform
7
+ group_by_statement_values
8
+ end
9
+
10
+ def group_by_statement_values
11
+ # for each [ [date, val, val], ... ]
12
+ self.statement_values = []
13
+ frequency_data.each do |frequency, rows|
14
+ frequency = frequency.to_s
15
+ rows.each do |row|
16
+ # extract date
17
+ date = row[0]
18
+ # for each [ val, val, ... ]
19
+ row[1..-1].each_with_index do |value, index|
20
+ # ensure array
21
+ cid = column_id(index)
22
+ # add to values
23
+ self.statement_values << [ cid, frequency, date, value] unless value.blank?
24
+ end
25
+ end
26
+ end
27
+ end
28
+
29
+ def column_id(index)
30
+ # ensure column_ids is defined
31
+ self.column_ids ||= Quandl::Cassandra::Models::Dataset.find_column_ids_by_id(id)
32
+ # ensure column_ids[index] is present
33
+ self.column_ids[index] ||= Cql::Uuid.new(SecureRandom.uuid)
34
+ end
35
+
36
+ end
@@ -0,0 +1,24 @@
1
+ class Quandl::Cassandra::Models::Column::Write::GroupDataByFrequency < Quandl::Cassandra::Models::Column::Write
2
+
3
+ # INPUTS
4
+ # [ [1,2,3], [2,4,8], ... ]
5
+
6
+ # OUTPUTS
7
+ # { source: [ [1,2,3], [2,4,8], ... ], weekly: ... }
8
+
9
+ def perform
10
+ self.frequency = data.frequency
11
+ self.frequency_data = {}
12
+ # clone source data
13
+ self.data = data.clone
14
+ self.frequency_data[:source] = data.data_array.clone
15
+ # collapse and clone each frequency
16
+ Quandl::Operation::Collapse.collapses_greater_than(data.frequency).each do |freq|
17
+ # collapse the data to the required frequency
18
+ data.collapse(freq)
19
+ # clone the internal data array and add it to the grouping
20
+ self.frequency_data[freq] = data.data_array.clone
21
+ end
22
+ end
23
+
24
+ end
@@ -0,0 +1,22 @@
1
+ class Quandl::Cassandra::Models::Column::Write::InsertColumnAttributes < Quandl::Cassandra::Models::Column::Write
2
+
3
+ # INPUTS
4
+ # { source: { UUID: [[1,2], [2,4]], UUID: [[1,3],[2,8]] }}
5
+
6
+ def perform
7
+ return if column_ids.blank?
8
+ # format data for batch insertion
9
+ rows_values = []
10
+ column_ids.each_with_index do |column_id, position|
11
+ rows_values << [id, column_id, position, frequency]
12
+ end
13
+ # insert dataset_attribute
14
+ Quandl::Cassandra::Base.execute("INSERT INTO dataset_attributes (id, updated_at, frequency) VALUES (#{id}, #{(Time.now.to_f * 1000).to_i}, '#{frequency}')")
15
+ # insert data
16
+ Quandl::Cassandra::Batch.insert(rows_values) do |id, column_id, position, frequency|
17
+ %Q{INSERT INTO datasets (id, column_id, position) VALUES (#{id}, #{column_id}, #{position})
18
+ INSERT INTO column_attributes ( id, frequency ) VALUES ( #{column_id}, '#{frequency}' )}
19
+ end
20
+ end
21
+
22
+ end
@@ -0,0 +1,9 @@
1
+ class Quandl::Cassandra::Models::Column::Write::InsertColumns < Quandl::Cassandra::Models::Column::Write
2
+
3
+ def perform
4
+ Quandl::Cassandra::Batch.insert(statement_values) do |id, type, time, value|
5
+ "INSERT INTO columns (id, type, time, value) VALUES (#{id}, '#{type}', #{time}, #{value})"
6
+ end
7
+ end
8
+
9
+ end
@@ -0,0 +1,11 @@
1
+ class Quandl::Cassandra::Models::ColumnAttribute < Quandl::Cassandra::Base
2
+
3
+ table_name :column_attributes
4
+
5
+ after_initialize :default_attributes
6
+
7
+ def default_attributes
8
+ self.id = Cql::Uuid.new(SecureRandom.uuid) unless id.present?
9
+ end
10
+
11
+ end
@@ -0,0 +1,18 @@
1
+ class Quandl::Cassandra::Models::Data < Quandl::Data
2
+
3
+ require_relative 'data/search'
4
+
5
+ include Quandl::Cassandra::Models::Data::Search
6
+
7
+ attr_accessor :dataset_id, :column_ids, :column_frequencies
8
+
9
+ # start a new scope from this data
10
+ def scoped
11
+ s = self.class.scope.new
12
+ s.id(dataset_id)
13
+ s.column_ids(column_ids) if column_ids
14
+ s.column_frequencies(column_frequencies) if column_frequencies
15
+ s
16
+ end
17
+
18
+ end
@@ -0,0 +1,105 @@
1
+ module Quandl::Cassandra::Models::Data::Search
2
+
3
+ extend ActiveSupport::Concern
4
+
5
+ included do
6
+
7
+ include ScopeComposer::Model
8
+
9
+ def self.scope_names
10
+ scope.scope_names
11
+ end
12
+
13
+ has_scope_composer
14
+
15
+ delegate :where, to: :scope
16
+
17
+ scope :dataset, ->(d){
18
+ id(d.id)
19
+ column_ids(d.column_ids)
20
+ }
21
+
22
+ scope :row, :id, :limit, :offset, :column, :accuracy, :frequency, :count, :delete
23
+
24
+ scope :column_frequencies, ->(*freqs){ where( column_frequencies: Array(freqs).flatten ) }
25
+ scope :column_ids, ->(*ids){ cids = Array(ids).flatten.compact; where( column_ids: cids ) if cids.present? }
26
+
27
+ scope :collapse, ->(v){ where( collapse: v.to_sym ) if Quandl::Operation::Collapse.valid_collapse?(v) }
28
+ scope :transform, ->(v){ where( transform: v.to_sym ) if Quandl::Operation::Transform.valid_transformation?(v) }
29
+
30
+ scope :order, ->(v){
31
+ order = ( v.to_sym == :asc ) ? :asc : :desc
32
+ where( order: order )
33
+ }
34
+
35
+ scope :trim_start, ->(date){ date = parse_date(date); where( trim_start: date ) if date }
36
+ scope :trim_end, ->(date){ date = parse_date(date); where( trim_end: date ) if date }
37
+
38
+ scope_helper :find, ->(id){ id(id).to_table }
39
+ scope_helper :to_table, ->{ all }
40
+
41
+ scope_helper :parse_date, ->( value ){
42
+ begin
43
+ date = Date.jd(value.to_i) if value.kind_of?(String) && value.numeric?
44
+ date = Date.jd(value) if value.is_a?(Integer)
45
+ date = Date.parse(value) if value.is_a?(String) && value =~ /^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/
46
+ date = value if value.is_a?(Date)
47
+ date.jd
48
+ rescue
49
+ nil
50
+ end
51
+ }
52
+
53
+ scope.class_eval do
54
+
55
+ delegate :to_a, :==, :inspect, :first, :flatten, :[], :collect, :<=>, :each, :each_with_index, :to_date, :to_h, to: :all, allow_nil: true
56
+
57
+ def delete_all(*args)
58
+ if attributes[:column_ids].present?
59
+ result = Quandl::Cassandra::Models::Column.where( id: attributes[:column_ids] ).delete_all
60
+ return result.nil? ? true : result
61
+ end
62
+ false
63
+ end
64
+
65
+ def count(*args)
66
+ attributes[:count] = true
67
+ result = dataset? ? fetch.to_i : 0
68
+ attributes[:count] = false
69
+ result
70
+ end
71
+
72
+ def all
73
+ @all ||= fetch
74
+ end
75
+
76
+ def dataset?
77
+ self.id.present? || attributes[:column_ids].present?
78
+ end
79
+
80
+ def fetched?
81
+ @all.present?
82
+ end
83
+
84
+ def scoped
85
+ s = self.class.new
86
+ s.id( self.id ) if id.present?
87
+ s.column_ids( attributes[:column_ids] ) if attributes[:column_ids].present?
88
+ s.column_frequencies(attributes[:column_frequencies]) if attributes[:column_frequencies].present?
89
+ s
90
+ end
91
+
92
+ protected
93
+
94
+ def fetch
95
+ # without an id or columns there's nothing to be read
96
+ return Quandl::Cassandra::Models::Data.new unless dataset?
97
+ # otherwise read the data
98
+ Quandl::Cassandra::Models::Column.read( attributes.merge(scope_attributes) )
99
+ end
100
+
101
+
102
+ end
103
+
104
+ end
105
+ end