quandl_cassandra 0.3.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. data/UPGRADE.md +6 -0
  2. data/lib/quandl/cassandra.rb +0 -7
  3. data/lib/quandl/cassandra/version.rb +1 -1
  4. data/spec/lib/quandl/cassandra/base/scoping_spec.rb +10 -10
  5. metadata +1 -61
  6. data/lib/quandl/cassandra_models/column.rb +0 -20
  7. data/lib/quandl/cassandra_models/column/read.rb +0 -32
  8. data/lib/quandl/cassandra_models/column/read/collapse.rb +0 -65
  9. data/lib/quandl/cassandra_models/column/read/column.rb +0 -19
  10. data/lib/quandl/cassandra_models/column/read/data.rb +0 -59
  11. data/lib/quandl/cassandra_models/column/read/offset.rb +0 -118
  12. data/lib/quandl/cassandra_models/column/read/row.rb +0 -20
  13. data/lib/quandl/cassandra_models/column/read/select_columns.rb +0 -60
  14. data/lib/quandl/cassandra_models/column/read/transform.rb +0 -53
  15. data/lib/quandl/cassandra_models/column/read/type.rb +0 -25
  16. data/lib/quandl/cassandra_models/column/write.rb +0 -22
  17. data/lib/quandl/cassandra_models/column/write/group_data_by_column.rb +0 -37
  18. data/lib/quandl/cassandra_models/column/write/group_data_by_frequency.rb +0 -24
  19. data/lib/quandl/cassandra_models/column/write/insert_column_attributes.rb +0 -20
  20. data/lib/quandl/cassandra_models/column/write/insert_columns.rb +0 -9
  21. data/lib/quandl/cassandra_models/column_attribute.rb +0 -11
  22. data/lib/quandl/cassandra_models/data.rb +0 -18
  23. data/lib/quandl/cassandra_models/data/search.rb +0 -104
  24. data/lib/quandl/cassandra_models/dataset.rb +0 -72
  25. data/lib/quandl/cassandra_models/dataset/columns.rb +0 -61
  26. data/lib/quandl/cassandra_models/dataset_attribute.rb +0 -6
  27. data/lib/quandl/cassandra_models/multiset.rb +0 -50
  28. data/spec/factories/dataset.rb +0 -8
  29. data/spec/lib/quandl/cassandra_models/column/write/group_data_by_frequency_spec.rb +0 -28
  30. data/spec/lib/quandl/cassandra_models/column/write_spec.rb +0 -23
  31. data/spec/lib/quandl/cassandra_models/column_attribute_spec.rb +0 -16
  32. data/spec/lib/quandl/cassandra_models/column_spec.rb +0 -17
  33. data/spec/lib/quandl/cassandra_models/data_spec.rb +0 -84
  34. data/spec/lib/quandl/cassandra_models/dataset/collapse_spec.rb +0 -44
  35. data/spec/lib/quandl/cassandra_models/dataset/column_spec.rb +0 -24
  36. data/spec/lib/quandl/cassandra_models/dataset/persistence_spec.rb +0 -24
  37. data/spec/lib/quandl/cassandra_models/dataset/row_spec.rb +0 -26
  38. data/spec/lib/quandl/cassandra_models/dataset/transform_spec.rb +0 -16
  39. data/spec/lib/quandl/cassandra_models/dataset/trim_spec.rb +0 -74
  40. data/spec/lib/quandl/cassandra_models/dataset/update_spec.rb +0 -37
  41. data/spec/lib/quandl/cassandra_models/dataset_attribute_spec.rb +0 -18
  42. data/spec/lib/quandl/cassandra_models/dataset_spec.rb +0 -111
  43. data/spec/lib/quandl/cassandra_models/multiset/collapse_spec.rb +0 -122
  44. data/spec/lib/quandl/cassandra_models/multiset/columns_spec.rb +0 -57
  45. data/spec/lib/quandl/cassandra_models/multiset/data_spec.rb +0 -25
  46. data/spec/lib/quandl/cassandra_models/multiset/transform_spec.rb +0 -69
@@ -1,20 +0,0 @@
1
- class Quandl::Cassandra::Column::Read::Row < Quandl::Cassandra::Column::Read
2
-
3
- def perform
4
- return unless attributes[:row].present?
5
- # ensure integer
6
- row = attributes[:row]
7
- # negative value needs inversion
8
- if row < 0
9
- attributes[:order] = :asc
10
- attributes[:offset] = (row * -1) - 1
11
- attributes[:limit] = 1
12
- # postive value is an offset of current observation
13
- else
14
- attributes[:order] = :desc
15
- attributes[:offset] = row
16
- attributes[:limit] = 1
17
- end
18
- end
19
-
20
- end
@@ -1,60 +0,0 @@
1
- class Quandl::Cassandra::Column::Read::SelectColumns < Quandl::Cassandra::Column::Read
2
-
3
- def perform
4
- # attrs to result hash
5
- attributes[:data] = count? ? count_data : select_data
6
- end
7
-
8
- def count_data
9
- prepared = Quandl::Cassandra::Base.prepare( statement )
10
- rows = []
11
- attributes[:column_ids].each_with_index do | id, index |
12
- # pluck column type from collapses
13
- type = attributes[:column_collapses][index].to_s
14
- # bind and execute query
15
- rows << prepared.execute( id, type, :one )
16
- end
17
- rows.collect{|r| r.first['count'] }.max
18
- end
19
-
20
- def select_data
21
- t1 = Time.now
22
- # fire off the queries
23
- prepared = Quandl::Cassandra::Base.prepare( statement )
24
- data = {}
25
- futures = []
26
- attributes[:column_ids].each_with_index do | id, index |
27
- # pluck column type from collapses
28
- type = attributes[:column_collapses][index].to_s
29
- # bind and execute query
30
- futures << prepared.async.execute( id, type, Quandl::Cassandra::Base.consistency )
31
- end
32
- # collect the results
33
- futures.each_with_index do |future, index|
34
- # collect result
35
- future.value.each do |row|
36
- data[row['time']] ||= Array.new( attributes[:column_ids].count )
37
- data[row['time']][index] ||= row['value']
38
- end
39
- end
40
- Quandl::Logger.debug("(#{t1.elapsed_ms}) #{self.class.name}.select_data")
41
- data
42
- end
43
-
44
- def statement
45
- columns = count? ? "COUNT(*)" : "time,value"
46
- cql = "SELECT #{columns} FROM columns WHERE"
47
- # cql += "ORDER"
48
- cql += " time >= #{attributes[:trim_start]} AND " if attributes[:trim_start]
49
- cql += " time <= #{attributes[:trim_end]} AND " if attributes[:trim_end]
50
- cql += " id = ? AND type = ?"
51
- cql += " ORDER BY type #{order}"
52
- cql += " LIMIT #{attributes[:limit]}" if attributes[:limit]
53
- cql
54
- end
55
-
56
- def order
57
- @order ||= attributes[:order] == :asc ? :asc : :desc
58
- end
59
-
60
- end
@@ -1,53 +0,0 @@
1
- class Quandl::Cassandra::Column::Read::Transform < Quandl::Cassandra::Column::Read
2
-
3
- def perform
4
- return unless attributes[:transform].present?
5
- # apply
6
- apply_rdiff
7
- apply_cumul
8
- end
9
-
10
- def apply_cumul
11
- # rdiff_from needs the data from the current to offset
12
- return unless transform?( :cumul ) && limit.present?
13
- # data table needs to handle the offset since cumul requires all data
14
- attributes[:data_table] = Quandl::Cassandra::Column::Read::Offset.call(attributes)
15
- # the query should not limit or offset the data
16
- attributes[:row] = nil
17
- attributes[:limit] = nil
18
- attributes[:offset] = nil
19
- end
20
-
21
- def apply_rdiff
22
- return unless transform? :rdiff, :diff
23
- # limit requires limit
24
- attributes[:limit] = attributes[:limit] + 1 if attributes[:limit]
25
- # trims should be increased by one
26
- attributes[:trim_start] = trim_start.occurrences_of_frequency_ago( 1, attributes[:collapse] ).jd if trim_start.present? && order == :desc
27
- attributes[:trim_end] = trim_end.occurrences_of_frequency_ahead( 1, attributes[:collapse] ).jd if trim_end.present? && order == :asc
28
- end
29
-
30
- def transform?(*keys)
31
- keys.each do |key|
32
- return true if key == attributes[:transform]
33
- end
34
- false
35
- end
36
-
37
- def trim_end
38
- @trim_end ||= attributes[:trim_end].present? ? Date.jd( attributes[:trim_end] ) : nil
39
- end
40
-
41
- def trim_start
42
- @trim_start ||= attributes[:trim_start].present? ? Date.jd( attributes[:trim_start] ) : nil
43
- end
44
-
45
- def limit
46
- attributes[:limit]
47
- end
48
-
49
- def order
50
- @order ||= attributes[:order] == :asc ? :asc : :desc
51
- end
52
-
53
- end
@@ -1,25 +0,0 @@
1
- class Quandl::Cassandra::Column::Read::Type < Quandl::Cassandra::Column::Read
2
-
3
- def perform
4
- # enforce types
5
- attributes.each do |key, value|
6
- attributes[key] = enforce_type(key, value)
7
- end
8
- # retain pristine copy of attrs
9
- attributes[:pristine] ||= attributes.clone
10
- end
11
-
12
- def enforce_type(key, value)
13
- case key
14
- when :limit, :column, :trim_start, :trim_end, :offset, :accuracy, :row
15
- return value.try(:to_i)
16
- when :collapse, :transform, :frequency
17
- return value.try(:to_sym)
18
- when :order
19
- return value.try(:to_sym) == :asc ? :asc : :desc
20
- else
21
- value
22
- end
23
- end
24
-
25
- end
@@ -1,22 +0,0 @@
1
- class Quandl::Cassandra::Column::Write < Quandl::Strategy::Strategize
2
-
3
- # strategy attributes
4
- define_attributes :id, :data, :frequency, :column_ids, :frequency_data, :frequency_column_data, :statement_values
5
-
6
- require_relative 'write/insert_columns'
7
- require_relative 'write/insert_column_attributes'
8
- require_relative 'write/group_data_by_column'
9
- require_relative 'write/group_data_by_frequency'
10
-
11
- # execute strategy
12
- def self.perform(attributes)
13
- strategy = Quandl::Strategy.new( attributes ) do |c|
14
- c.use Quandl::Cassandra::Column::Write::GroupDataByFrequency
15
- c.use Quandl::Cassandra::Column::Write::GroupDataByColumn
16
- c.use Quandl::Cassandra::Column::Write::InsertColumns
17
- c.use Quandl::Cassandra::Column::Write::InsertColumnAttributes
18
- end
19
- strategy.perform
20
- end
21
-
22
- end
@@ -1,37 +0,0 @@
1
- class Quandl::Cassandra::Column::Write::GroupDataByColumn < Quandl::Cassandra::Column::Write
2
-
3
- # INPUTS
4
- # { source: [ [1,2,3], [2,4,8], ... ], weekly: ... }
5
-
6
- # OUTPUTS
7
- # { source: { UUID: [[1,2], [2,4]], UUID: [[1,3],[2,8]] }}
8
-
9
- def perform
10
- group_by_statement_values
11
- end
12
-
13
- def group_by_statement_values
14
- # for each [ [date, val, val], ... ]
15
- self.statement_values = []
16
- frequency_data.each do |frequency, rows|
17
- frequency = frequency.to_s
18
- rows.each do |row|
19
- # extract date
20
- date = row[0]
21
- # for each [ val, val, ... ]
22
- row[1..-1].each_with_index do |value, index|
23
- # ensure array
24
- self.statement_values << [ column_id(index), frequency, date, value] unless value.blank?
25
- end
26
- end
27
- end
28
- end
29
-
30
- def column_id(index)
31
- # ensure column_ids is defined
32
- self.column_ids ||= Quandl::Cassandra::Dataset.find_column_ids_by_id(id)
33
- # ensure column_ids[index] is present
34
- self.column_ids[index] ||= Cql::Uuid.new(SecureRandom.uuid)
35
- end
36
-
37
- end
@@ -1,24 +0,0 @@
1
- class Quandl::Cassandra::Column::Write::GroupDataByFrequency < Quandl::Cassandra::Column::Write
2
-
3
- # INPUTS
4
- # [ [1,2,3], [2,4,8], ... ]
5
-
6
- # OUTPUTS
7
- # { source: [ [1,2,3], [2,4,8], ... ], weekly: ... }
8
-
9
- def perform
10
- self.frequency = data.frequency
11
- self.frequency_data = {}
12
- # clone source data
13
- self.data = data.clone
14
- self.frequency_data[:source] = data.data_array.clone
15
- # collapse and clone each frequency
16
- Quandl::Operation::Collapse.collapses_greater_than(data.frequency).each do |freq|
17
- # collapse the data to the required frequency
18
- data.collapse(freq)
19
- # clone the internal data array and add it to the grouping
20
- self.frequency_data[freq] = data.data_array.clone
21
- end
22
- end
23
-
24
- end
@@ -1,20 +0,0 @@
1
- class Quandl::Cassandra::Column::Write::InsertColumnAttributes < Quandl::Cassandra::Column::Write
2
-
3
- # INPUTS
4
- # { source: { UUID: [[1,2], [2,4]], UUID: [[1,3],[2,8]] }}
5
-
6
- def perform
7
- return if column_ids.blank?
8
- # format data for batch insertion
9
- rows_values = []
10
- column_ids.each_with_index do |column_id, position|
11
- rows_values << [id, column_id, position, frequency]
12
- end
13
- # insert data
14
- Quandl::Cassandra::Batch.insert(rows_values) do |id, column_id, position, frequency|
15
- %Q{INSERT INTO datasets (id, column_id, position) VALUES (#{id}, #{column_id}, #{position})
16
- INSERT INTO column_attributes ( id, frequency ) VALUES ( #{column_id}, '#{frequency}' )}
17
- end
18
- end
19
-
20
- end
@@ -1,9 +0,0 @@
1
- class Quandl::Cassandra::Column::Write::InsertColumns < Quandl::Cassandra::Column::Write
2
-
3
- def perform
4
- Quandl::Cassandra::Batch.insert(statement_values) do |id, type, time, value|
5
- "INSERT INTO columns (id, type, time, value) VALUES (#{id}, '#{type}', #{time}, #{value})"
6
- end
7
- end
8
-
9
- end
@@ -1,11 +0,0 @@
1
- class Quandl::Cassandra::ColumnAttribute < Quandl::Cassandra::Base
2
-
3
- table_name :column_attributes
4
-
5
- after_initialize :default_attributes
6
-
7
- def default_attributes
8
- self.id = SecureRandom.uuid
9
- end
10
-
11
- end
@@ -1,18 +0,0 @@
1
- class Quandl::Cassandra::Data < Quandl::Data
2
-
3
- require_relative 'data/search'
4
-
5
- include Quandl::Cassandra::Data::Search
6
-
7
- attr_accessor :dataset_id, :column_ids, :column_frequencies
8
-
9
- # start a new scope from this data
10
- def scoped
11
- s = self.class.scope.new
12
- s.id(dataset_id)
13
- s.column_ids(column_ids) if column_ids
14
- s.column_frequencies(column_frequencies) if column_frequencies
15
- s
16
- end
17
-
18
- end
@@ -1,104 +0,0 @@
1
- module Quandl::Cassandra::Data::Search
2
-
3
- extend ActiveSupport::Concern
4
-
5
- included do
6
-
7
- include ScopeComposer::Model
8
-
9
- def self.scope_names
10
- scope.scope_names
11
- end
12
-
13
- has_scope_composer
14
-
15
- delegate :where, to: :scope
16
-
17
- scope :dataset, ->(d){
18
- id(d.id)
19
- column_ids(d.column_ids)
20
- }
21
-
22
- scope :row, :id, :limit, :offset, :column, :accuracy, :frequency, :count, :delete
23
-
24
- scope :column_frequencies, ->(*freqs){ where( column_frequencies: Array(freqs).flatten ) }
25
- scope :column_ids, ->(*ids){ cids = Array(ids).flatten.compact; where( column_ids: cids ) if cids.present? }
26
-
27
- scope :collapse, ->(v){ where( collapse: v.to_sym ) if Quandl::Operation::Collapse.valid_collapse?(v) }
28
- scope :transform, ->(v){ where( transform: v.to_sym ) if Quandl::Operation::Transform.valid_transformation?(v) }
29
-
30
- scope :order, ->(v){
31
- order = ( v.to_sym == :asc ) ? :asc : :desc
32
- where( order: order )
33
- }
34
-
35
- scope :trim_start, ->(date){ date = parse_date(date); where( trim_start: date ) if date }
36
- scope :trim_end, ->(date){ date = parse_date(date); where( trim_end: date ) if date }
37
-
38
- scope_helper :find, ->(id){ id(id).to_table }
39
- scope_helper :to_table, ->{ all }
40
-
41
- scope_helper :parse_date, ->( value ){
42
- begin
43
- date = Date.jd(value.to_i) if value.kind_of?(String) && value.numeric?
44
- date = Date.jd(value) if value.is_a?(Integer)
45
- date = Date.parse(value) if value.is_a?(String) && value =~ /^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/
46
- date.jd
47
- rescue
48
- nil
49
- end
50
- }
51
-
52
- scope.class_eval do
53
-
54
- delegate :to_a, :==, :inspect, :first, :flatten, :[], :collect, :<=>, :each, :each_with_index, :to_date, :to_h, to: :all, allow_nil: true
55
-
56
- def delete_all(*args)
57
- if attributes[:column_ids].present?
58
- result = Quandl::Cassandra::Column.where( id: attributes[:column_ids] ).delete_all
59
- return result.nil? ? true : result
60
- end
61
- false
62
- end
63
-
64
- def count(*args)
65
- attributes[:count] = true
66
- result = dataset? ? fetch.to_i : 0
67
- attributes[:count] = false
68
- result
69
- end
70
-
71
- def all
72
- @all ||= fetch
73
- end
74
-
75
- def dataset?
76
- self.id.present? || attributes[:column_ids].present?
77
- end
78
-
79
- def fetched?
80
- @all.present?
81
- end
82
-
83
- def scoped
84
- s = self.class.new
85
- s.id( self.id ) if id.present?
86
- s.column_ids( attributes[:column_ids] ) if attributes[:column_ids].present?
87
- s.column_frequencies(attributes[:column_frequencies]) if attributes[:column_frequencies].present?
88
- s
89
- end
90
-
91
- protected
92
-
93
- def fetch
94
- # without an id or columns there's nothing to be read
95
- return Quandl::Cassandra::Data.new unless dataset?
96
- # otherwise read the data
97
- Quandl::Cassandra::Column.read( attributes.merge(scope_attributes) )
98
- end
99
-
100
-
101
- end
102
-
103
- end
104
- end
@@ -1,72 +0,0 @@
1
- class Quandl::Cassandra::Dataset < Quandl::Cassandra::Base
2
-
3
- require_relative 'dataset/columns'
4
-
5
- include Quandl::Cassandra::Dataset::Columns
6
-
7
- table_name :datasets
8
- autosave_changes false
9
-
10
- define_attributes :id, :data
11
-
12
- before_save :save_data, :save_dataset_attribute
13
- after_save :clear_attributes!
14
-
15
- delegate :type, :updated_at, :created_at, :frequency, to: :dataset_attribute, allow_nil: true
16
-
17
- def trim_start
18
- @trim_start ||= Date.jd( data.scoped.limit(1).order(:asc)[0][0] )
19
- rescue
20
- nil
21
- end
22
-
23
- def trim_end
24
- @trim_end ||= Date.jd( data.scoped.limit(1).order(:desc)[0][0] )
25
- rescue
26
- nil
27
- end
28
-
29
- def data
30
- # data set?
31
- return read_attribute(:data) if data?
32
- # read data
33
- @attributes[:data] ||= data_scope
34
- end
35
-
36
- def data=(rows)
37
- rows = Quandl::Data.new(rows) unless rows.is_a?(Quandl::Data)
38
- data_will_change!
39
- @attributes[:data] = rows
40
- end
41
-
42
- def data_scope
43
- Quandl::Cassandra::Data.dataset(self)
44
- end
45
-
46
- def dataset_attribute
47
- @dataset_attribute ||= Quandl::Cassandra::DatasetAttribute.find_or_build(id)
48
- end
49
-
50
- def reload
51
- clear_attributes!
52
- end
53
-
54
- protected
55
-
56
- def save_dataset_attribute
57
- dataset_attribute.frequency = data.frequency.to_s
58
- dataset_attribute.save
59
- end
60
-
61
- def save_data
62
- Quandl::Cassandra::Column.write( id: id, data: data ) if data_changed?
63
- end
64
-
65
- def clear_attributes!
66
- super if defined?(super)
67
- @trim_start = nil
68
- @trim_end = nil
69
- @attributes = { id: id }
70
- end
71
-
72
- end