quandl_cassandra_models 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE +7 -0
  5. data/README.md +7 -0
  6. data/Rakefile +11 -0
  7. data/UPGRADE.md +34 -0
  8. data/config/cassandra.yml +41 -0
  9. data/lib/quandl/cassandra/models.rb +16 -0
  10. data/lib/quandl/cassandra/models/column.rb +42 -0
  11. data/lib/quandl/cassandra/models/column/read.rb +49 -0
  12. data/lib/quandl/cassandra/models/column/read/collapse.rb +41 -0
  13. data/lib/quandl/cassandra/models/column/read/column.rb +19 -0
  14. data/lib/quandl/cassandra/models/column/read/data.rb +59 -0
  15. data/lib/quandl/cassandra/models/column/read/offset.rb +104 -0
  16. data/lib/quandl/cassandra/models/column/read/row.rb +20 -0
  17. data/lib/quandl/cassandra/models/column/read/select_columns.rb +63 -0
  18. data/lib/quandl/cassandra/models/column/read/transform.rb +53 -0
  19. data/lib/quandl/cassandra/models/column/read/trim.rb +14 -0
  20. data/lib/quandl/cassandra/models/column/read/type.rb +25 -0
  21. data/lib/quandl/cassandra/models/column/write.rb +25 -0
  22. data/lib/quandl/cassandra/models/column/write/group_data_by_column.rb +36 -0
  23. data/lib/quandl/cassandra/models/column/write/group_data_by_frequency.rb +24 -0
  24. data/lib/quandl/cassandra/models/column/write/insert_column_attributes.rb +22 -0
  25. data/lib/quandl/cassandra/models/column/write/insert_columns.rb +9 -0
  26. data/lib/quandl/cassandra/models/column_attribute.rb +11 -0
  27. data/lib/quandl/cassandra/models/data.rb +18 -0
  28. data/lib/quandl/cassandra/models/data/search.rb +105 -0
  29. data/lib/quandl/cassandra/models/dataset.rb +87 -0
  30. data/lib/quandl/cassandra/models/dataset/columns.rb +63 -0
  31. data/lib/quandl/cassandra/models/dataset_attribute.rb +6 -0
  32. data/lib/quandl/cassandra/models/multiset.rb +55 -0
  33. data/lib/quandl/cassandra/models/version.rb +7 -0
  34. data/migrations/20131105204200_create_datasets.rb +18 -0
  35. data/migrations/20131105204201_create_columns.rb +18 -0
  36. data/migrations/20131105204202_create_dataset_attributes.rb +17 -0
  37. data/migrations/20131105204203_create_column_attributes.rb +17 -0
  38. data/quandl_cassandra_models.gemspec +28 -0
  39. data/spec/expectations/string.rb +5 -0
  40. data/spec/expectations/time.rb +5 -0
  41. data/spec/factories/dataset.rb +8 -0
  42. data/spec/lib/quandl/cassandra/models/column/read_spec.rb +27 -0
  43. data/spec/lib/quandl/cassandra/models/column/write/group_data_by_frequency_spec.rb +28 -0
  44. data/spec/lib/quandl/cassandra/models/column/write_spec.rb +23 -0
  45. data/spec/lib/quandl/cassandra/models/column_attribute_spec.rb +16 -0
  46. data/spec/lib/quandl/cassandra/models/column_spec.rb +17 -0
  47. data/spec/lib/quandl/cassandra/models/data_spec.rb +105 -0
  48. data/spec/lib/quandl/cassandra/models/dataset/collapse_spec.rb +44 -0
  49. data/spec/lib/quandl/cassandra/models/dataset/column_spec.rb +24 -0
  50. data/spec/lib/quandl/cassandra/models/dataset/persistence_spec.rb +25 -0
  51. data/spec/lib/quandl/cassandra/models/dataset/row_spec.rb +26 -0
  52. data/spec/lib/quandl/cassandra/models/dataset/transform_spec.rb +16 -0
  53. data/spec/lib/quandl/cassandra/models/dataset/trim_spec.rb +74 -0
  54. data/spec/lib/quandl/cassandra/models/dataset/update_spec.rb +37 -0
  55. data/spec/lib/quandl/cassandra/models/dataset_attribute_spec.rb +18 -0
  56. data/spec/lib/quandl/cassandra/models/dataset_spec.rb +117 -0
  57. data/spec/lib/quandl/cassandra/models/multiset/collapse_spec.rb +122 -0
  58. data/spec/lib/quandl/cassandra/models/multiset/columns_spec.rb +57 -0
  59. data/spec/lib/quandl/cassandra/models/multiset/data_spec.rb +25 -0
  60. data/spec/lib/quandl/cassandra/models/multiset/transform_spec.rb +69 -0
  61. data/spec/spec_helper.rb +40 -0
  62. data/tasks/migrations.rake +14 -0
  63. metadata +212 -0
@@ -0,0 +1,20 @@
1
+ class Quandl::Cassandra::Models::Column::Read::Row < Quandl::Cassandra::Models::Column::Read
2
+
3
+ def perform
4
+ return unless attributes[:row].present?
5
+ # ensure integer
6
+ row = attributes[:row]
7
+ # negative value needs inversion
8
+ if row < 0
9
+ attributes[:order] = :asc
10
+ attributes[:offset] = (row * -1) - 1
11
+ attributes[:limit] = 1
12
+ # postive value is an offset of current observation
13
+ else
14
+ attributes[:order] = :desc
15
+ attributes[:offset] = row
16
+ attributes[:limit] = 1
17
+ end
18
+ end
19
+
20
+ end
@@ -0,0 +1,63 @@
1
+ class Quandl::Cassandra::Models::Column::Read::SelectColumns < Quandl::Cassandra::Models::Column::Read
2
+
3
+ def perform
4
+ # attrs to result hash
5
+ attributes[:data] = count? ? count_data : select_data
6
+ rescue => e
7
+ Quandl::Logger.error("#{attributes} #{e}")
8
+ raise
9
+ end
10
+
11
+ def count_data
12
+ prepared = Quandl::Cassandra::Base.prepare( statement )
13
+ rows = []
14
+ column_ids.each_with_index do | id, index |
15
+ # pluck column type from collapses
16
+ type = attributes[:column_collapses][index].to_s
17
+ # bind and execute query
18
+ rows << prepared.execute( id, type, :one )
19
+ end
20
+ rows.collect{|r| r.first['count'] }.max
21
+ end
22
+
23
+ def select_data
24
+ t1 = Time.now
25
+ # fire off the queries
26
+ prepared = Quandl::Cassandra::Base.prepare( statement )
27
+ data = {}
28
+ futures = []
29
+ column_ids.each_with_index do | id, index |
30
+ # pluck column type from collapses
31
+ type = attributes[:column_collapses][index].to_s
32
+ # bind and execute query
33
+ futures << prepared.async.execute( id, type, Quandl::Cassandra::Base.consistency )
34
+ end
35
+ # collect the results
36
+ futures.each_with_index do |future, index|
37
+ # collect result
38
+ future.value.each do |row|
39
+ data[row['time']] ||= Array.new( column_ids.count )
40
+ data[row['time']][index] ||= row['value']
41
+ end
42
+ end
43
+ Quandl::Logger.debug("(#{t1.elapsed_ms}) #{self.class.name}.select_data")
44
+ data
45
+ end
46
+
47
+ def statement
48
+ columns = count? ? "COUNT(*)" : "time,value"
49
+ cql = "SELECT #{columns} FROM columns WHERE"
50
+ # cql += "ORDER"
51
+ cql += " time >= #{attributes[:trim_start]} AND " if attributes[:trim_start]
52
+ cql += " time <= #{attributes[:trim_end]} AND " if attributes[:trim_end]
53
+ cql += " id = ? AND type = ?"
54
+ cql += " ORDER BY type #{order}"
55
+ cql += " LIMIT #{attributes[:limit]}" if attributes[:limit]
56
+ cql
57
+ end
58
+
59
+ def order
60
+ @order ||= attributes[:order] == :asc ? :asc : :desc
61
+ end
62
+
63
+ end
@@ -0,0 +1,53 @@
1
+ class Quandl::Cassandra::Models::Column::Read::Transform < Quandl::Cassandra::Models::Column::Read
2
+
3
+ def perform
4
+ return unless attributes[:transform].present?
5
+ # apply
6
+ apply_rdiff
7
+ apply_cumul
8
+ end
9
+
10
+ def apply_cumul
11
+ # rdiff_from needs the data from the current to offset
12
+ return unless transform?( :cumul ) && limit.present?
13
+ # data table needs to handle the offset since cumul requires all data
14
+ attributes[:data_table] = Quandl::Cassandra::Models::Column::Read::Offset.call(attributes)
15
+ # the query should not limit or offset the data
16
+ attributes[:row] = nil
17
+ attributes[:limit] = nil
18
+ attributes[:offset] = nil
19
+ end
20
+
21
+ def apply_rdiff
22
+ return unless transform? :rdiff, :diff
23
+ # limit requires limit
24
+ attributes[:limit] = attributes[:limit] + 1 if attributes[:limit]
25
+ # trims should be increased by one
26
+ attributes[:trim_start] = trim_start.occurrences_of_frequency_ago( 1, attributes[:collapse] ).jd if trim_start.present? && order == :desc
27
+ attributes[:trim_end] = trim_end.occurrences_of_frequency_ahead( 1, attributes[:collapse] ).jd if trim_end.present? && order == :asc
28
+ end
29
+
30
+ def transform?(*keys)
31
+ keys.each do |key|
32
+ return true if key == attributes[:transform]
33
+ end
34
+ false
35
+ end
36
+
37
+ def trim_end
38
+ @trim_end ||= attributes[:trim_end].present? ? Date.jd( attributes[:trim_end] ) : nil
39
+ end
40
+
41
+ def trim_start
42
+ @trim_start ||= attributes[:trim_start].present? ? Date.jd( attributes[:trim_start] ) : nil
43
+ end
44
+
45
+ def limit
46
+ attributes[:limit]
47
+ end
48
+
49
+ def order
50
+ @order ||= attributes[:order] == :asc ? :asc : :desc
51
+ end
52
+
53
+ end
@@ -0,0 +1,14 @@
1
+ class Quandl::Cassandra::Models::Column::Read::Trim < Quandl::Cassandra::Models::Column::Read
2
+
3
+ def perform
4
+ # if both are provided
5
+ if attributes[:trim_end].is_a?(Integer) && attributes[:trim_start].is_a?(Integer)
6
+ # if trim_start exceeds trim_end
7
+ if attributes[:trim_start] > attributes[:trim_end]
8
+ # then set trim_end to trim_start, since they are requesting nothing
9
+ attributes[:trim_end] = attributes[:trim_start]
10
+ end
11
+ end
12
+ end
13
+
14
+ end
@@ -0,0 +1,25 @@
1
+ class Quandl::Cassandra::Models::Column::Read::Type < Quandl::Cassandra::Models::Column::Read
2
+
3
+ def perform
4
+ # enforce types
5
+ attributes.each do |key, value|
6
+ attributes[key] = enforce_type(key, value)
7
+ end
8
+ # retain pristine copy of attrs
9
+ attributes[:pristine] ||= attributes.clone
10
+ end
11
+
12
+ def enforce_type(key, value)
13
+ case key
14
+ when :limit, :column, :trim_start, :trim_end, :offset, :accuracy, :row
15
+ return value.try(:to_i)
16
+ when :collapse, :transform, :frequency
17
+ return value.try(:to_sym)
18
+ when :order
19
+ return value.try(:to_sym) == :asc ? :asc : :desc
20
+ else
21
+ value
22
+ end
23
+ end
24
+
25
+ end
@@ -0,0 +1,25 @@
1
+ class Quandl::Cassandra::Models::Column::Write < Quandl::Strategy::Strategize
2
+
3
+ # strategy attributes
4
+ define_attributes :id, :data, :frequency, :column_ids, :frequency_data, :frequency_column_data, :statement_values
5
+
6
+ require_relative 'write/insert_columns'
7
+ require_relative 'write/insert_column_attributes'
8
+ require_relative 'write/group_data_by_column'
9
+ require_relative 'write/group_data_by_frequency'
10
+
11
+ # execute strategy
12
+ def self.perform(attributes)
13
+ strategy = Quandl::Strategy.new( attributes ) do |c|
14
+ c.use Quandl::Cassandra::Models::Column::Write::GroupDataByFrequency
15
+ c.use Quandl::Cassandra::Models::Column::Write::GroupDataByColumn
16
+ c.use Quandl::Cassandra::Models::Column::Write::InsertColumns
17
+ c.use Quandl::Cassandra::Models::Column::Write::InsertColumnAttributes
18
+ end
19
+ strategy.perform
20
+ rescue => e
21
+ Quandl::Logger.error("#{e} #{strategy.attributes}")
22
+ raise
23
+ end
24
+
25
+ end
@@ -0,0 +1,36 @@
1
+ class Quandl::Cassandra::Models::Column::Write::GroupDataByColumn < Quandl::Cassandra::Models::Column::Write
2
+
3
+ # INPUTS
4
+ # { source: [ [1,2,3], [2,4,8], ... ], weekly: ... }
5
+
6
+ def perform
7
+ group_by_statement_values
8
+ end
9
+
10
+ def group_by_statement_values
11
+ # for each [ [date, val, val], ... ]
12
+ self.statement_values = []
13
+ frequency_data.each do |frequency, rows|
14
+ frequency = frequency.to_s
15
+ rows.each do |row|
16
+ # extract date
17
+ date = row[0]
18
+ # for each [ val, val, ... ]
19
+ row[1..-1].each_with_index do |value, index|
20
+ # ensure array
21
+ cid = column_id(index)
22
+ # add to values
23
+ self.statement_values << [ cid, frequency, date, value] unless value.blank?
24
+ end
25
+ end
26
+ end
27
+ end
28
+
29
+ def column_id(index)
30
+ # ensure column_ids is defined
31
+ self.column_ids ||= Quandl::Cassandra::Models::Dataset.find_column_ids_by_id(id)
32
+ # ensure column_ids[index] is present
33
+ self.column_ids[index] ||= Cql::Uuid.new(SecureRandom.uuid)
34
+ end
35
+
36
+ end
@@ -0,0 +1,24 @@
1
+ class Quandl::Cassandra::Models::Column::Write::GroupDataByFrequency < Quandl::Cassandra::Models::Column::Write
2
+
3
+ # INPUTS
4
+ # [ [1,2,3], [2,4,8], ... ]
5
+
6
+ # OUTPUTS
7
+ # { source: [ [1,2,3], [2,4,8], ... ], weekly: ... }
8
+
9
+ def perform
10
+ self.frequency = data.frequency
11
+ self.frequency_data = {}
12
+ # clone source data
13
+ self.data = data.clone
14
+ self.frequency_data[:source] = data.data_array.clone
15
+ # collapse and clone each frequency
16
+ Quandl::Operation::Collapse.collapses_greater_than(data.frequency).each do |freq|
17
+ # collapse the data to the required frequency
18
+ data.collapse(freq)
19
+ # clone the internal data array and add it to the grouping
20
+ self.frequency_data[freq] = data.data_array.clone
21
+ end
22
+ end
23
+
24
+ end
@@ -0,0 +1,22 @@
1
+ class Quandl::Cassandra::Models::Column::Write::InsertColumnAttributes < Quandl::Cassandra::Models::Column::Write
2
+
3
+ # INPUTS
4
+ # { source: { UUID: [[1,2], [2,4]], UUID: [[1,3],[2,8]] }}
5
+
6
+ def perform
7
+ return if column_ids.blank?
8
+ # format data for batch insertion
9
+ rows_values = []
10
+ column_ids.each_with_index do |column_id, position|
11
+ rows_values << [id, column_id, position, frequency]
12
+ end
13
+ # insert dataset_attribute
14
+ Quandl::Cassandra::Base.execute("INSERT INTO dataset_attributes (id, updated_at, frequency) VALUES (#{id}, #{(Time.now.to_f * 1000).to_i}, '#{frequency}')")
15
+ # insert data
16
+ Quandl::Cassandra::Batch.insert(rows_values) do |id, column_id, position, frequency|
17
+ %Q{INSERT INTO datasets (id, column_id, position) VALUES (#{id}, #{column_id}, #{position})
18
+ INSERT INTO column_attributes ( id, frequency ) VALUES ( #{column_id}, '#{frequency}' )}
19
+ end
20
+ end
21
+
22
+ end
@@ -0,0 +1,9 @@
1
+ class Quandl::Cassandra::Models::Column::Write::InsertColumns < Quandl::Cassandra::Models::Column::Write
2
+
3
+ def perform
4
+ Quandl::Cassandra::Batch.insert(statement_values) do |id, type, time, value|
5
+ "INSERT INTO columns (id, type, time, value) VALUES (#{id}, '#{type}', #{time}, #{value})"
6
+ end
7
+ end
8
+
9
+ end
@@ -0,0 +1,11 @@
1
+ class Quandl::Cassandra::Models::ColumnAttribute < Quandl::Cassandra::Base
2
+
3
+ table_name :column_attributes
4
+
5
+ after_initialize :default_attributes
6
+
7
+ def default_attributes
8
+ self.id = Cql::Uuid.new(SecureRandom.uuid) unless id.present?
9
+ end
10
+
11
+ end
@@ -0,0 +1,18 @@
1
+ class Quandl::Cassandra::Models::Data < Quandl::Data
2
+
3
+ require_relative 'data/search'
4
+
5
+ include Quandl::Cassandra::Models::Data::Search
6
+
7
+ attr_accessor :dataset_id, :column_ids, :column_frequencies
8
+
9
+ # start a new scope from this data
10
+ def scoped
11
+ s = self.class.scope.new
12
+ s.id(dataset_id)
13
+ s.column_ids(column_ids) if column_ids
14
+ s.column_frequencies(column_frequencies) if column_frequencies
15
+ s
16
+ end
17
+
18
+ end
@@ -0,0 +1,105 @@
1
+ module Quandl::Cassandra::Models::Data::Search
2
+
3
+ extend ActiveSupport::Concern
4
+
5
+ included do
6
+
7
+ include ScopeComposer::Model
8
+
9
+ def self.scope_names
10
+ scope.scope_names
11
+ end
12
+
13
+ has_scope_composer
14
+
15
+ delegate :where, to: :scope
16
+
17
+ scope :dataset, ->(d){
18
+ id(d.id)
19
+ column_ids(d.column_ids)
20
+ }
21
+
22
+ scope :row, :id, :limit, :offset, :column, :accuracy, :frequency, :count, :delete
23
+
24
+ scope :column_frequencies, ->(*freqs){ where( column_frequencies: Array(freqs).flatten ) }
25
+ scope :column_ids, ->(*ids){ cids = Array(ids).flatten.compact; where( column_ids: cids ) if cids.present? }
26
+
27
+ scope :collapse, ->(v){ where( collapse: v.to_sym ) if Quandl::Operation::Collapse.valid_collapse?(v) }
28
+ scope :transform, ->(v){ where( transform: v.to_sym ) if Quandl::Operation::Transform.valid_transformation?(v) }
29
+
30
+ scope :order, ->(v){
31
+ order = ( v.to_sym == :asc ) ? :asc : :desc
32
+ where( order: order )
33
+ }
34
+
35
+ scope :trim_start, ->(date){ date = parse_date(date); where( trim_start: date ) if date }
36
+ scope :trim_end, ->(date){ date = parse_date(date); where( trim_end: date ) if date }
37
+
38
+ scope_helper :find, ->(id){ id(id).to_table }
39
+ scope_helper :to_table, ->{ all }
40
+
41
+ scope_helper :parse_date, ->( value ){
42
+ begin
43
+ date = Date.jd(value.to_i) if value.kind_of?(String) && value.numeric?
44
+ date = Date.jd(value) if value.is_a?(Integer)
45
+ date = Date.parse(value) if value.is_a?(String) && value =~ /^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/
46
+ date = value if value.is_a?(Date)
47
+ date.jd
48
+ rescue
49
+ nil
50
+ end
51
+ }
52
+
53
+ scope.class_eval do
54
+
55
+ delegate :to_a, :==, :inspect, :first, :flatten, :[], :collect, :<=>, :each, :each_with_index, :to_date, :to_h, to: :all, allow_nil: true
56
+
57
+ def delete_all(*args)
58
+ if attributes[:column_ids].present?
59
+ result = Quandl::Cassandra::Models::Column.where( id: attributes[:column_ids] ).delete_all
60
+ return result.nil? ? true : result
61
+ end
62
+ false
63
+ end
64
+
65
+ def count(*args)
66
+ attributes[:count] = true
67
+ result = dataset? ? fetch.to_i : 0
68
+ attributes[:count] = false
69
+ result
70
+ end
71
+
72
+ def all
73
+ @all ||= fetch
74
+ end
75
+
76
+ def dataset?
77
+ self.id.present? || attributes[:column_ids].present?
78
+ end
79
+
80
+ def fetched?
81
+ @all.present?
82
+ end
83
+
84
+ def scoped
85
+ s = self.class.new
86
+ s.id( self.id ) if id.present?
87
+ s.column_ids( attributes[:column_ids] ) if attributes[:column_ids].present?
88
+ s.column_frequencies(attributes[:column_frequencies]) if attributes[:column_frequencies].present?
89
+ s
90
+ end
91
+
92
+ protected
93
+
94
+ def fetch
95
+ # without an id or columns there's nothing to be read
96
+ return Quandl::Cassandra::Models::Data.new unless dataset?
97
+ # otherwise read the data
98
+ Quandl::Cassandra::Models::Column.read( attributes.merge(scope_attributes) )
99
+ end
100
+
101
+
102
+ end
103
+
104
+ end
105
+ end