quandl_cassandra 0.3.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/UPGRADE.md +6 -0
  2. data/lib/quandl/cassandra.rb +0 -7
  3. data/lib/quandl/cassandra/version.rb +1 -1
  4. data/spec/lib/quandl/cassandra/base/scoping_spec.rb +10 -10
  5. metadata +1 -61
  6. data/lib/quandl/cassandra_models/column.rb +0 -20
  7. data/lib/quandl/cassandra_models/column/read.rb +0 -32
  8. data/lib/quandl/cassandra_models/column/read/collapse.rb +0 -65
  9. data/lib/quandl/cassandra_models/column/read/column.rb +0 -19
  10. data/lib/quandl/cassandra_models/column/read/data.rb +0 -59
  11. data/lib/quandl/cassandra_models/column/read/offset.rb +0 -118
  12. data/lib/quandl/cassandra_models/column/read/row.rb +0 -20
  13. data/lib/quandl/cassandra_models/column/read/select_columns.rb +0 -60
  14. data/lib/quandl/cassandra_models/column/read/transform.rb +0 -53
  15. data/lib/quandl/cassandra_models/column/read/type.rb +0 -25
  16. data/lib/quandl/cassandra_models/column/write.rb +0 -22
  17. data/lib/quandl/cassandra_models/column/write/group_data_by_column.rb +0 -37
  18. data/lib/quandl/cassandra_models/column/write/group_data_by_frequency.rb +0 -24
  19. data/lib/quandl/cassandra_models/column/write/insert_column_attributes.rb +0 -20
  20. data/lib/quandl/cassandra_models/column/write/insert_columns.rb +0 -9
  21. data/lib/quandl/cassandra_models/column_attribute.rb +0 -11
  22. data/lib/quandl/cassandra_models/data.rb +0 -18
  23. data/lib/quandl/cassandra_models/data/search.rb +0 -104
  24. data/lib/quandl/cassandra_models/dataset.rb +0 -72
  25. data/lib/quandl/cassandra_models/dataset/columns.rb +0 -61
  26. data/lib/quandl/cassandra_models/dataset_attribute.rb +0 -6
  27. data/lib/quandl/cassandra_models/multiset.rb +0 -50
  28. data/spec/factories/dataset.rb +0 -8
  29. data/spec/lib/quandl/cassandra_models/column/write/group_data_by_frequency_spec.rb +0 -28
  30. data/spec/lib/quandl/cassandra_models/column/write_spec.rb +0 -23
  31. data/spec/lib/quandl/cassandra_models/column_attribute_spec.rb +0 -16
  32. data/spec/lib/quandl/cassandra_models/column_spec.rb +0 -17
  33. data/spec/lib/quandl/cassandra_models/data_spec.rb +0 -84
  34. data/spec/lib/quandl/cassandra_models/dataset/collapse_spec.rb +0 -44
  35. data/spec/lib/quandl/cassandra_models/dataset/column_spec.rb +0 -24
  36. data/spec/lib/quandl/cassandra_models/dataset/persistence_spec.rb +0 -24
  37. data/spec/lib/quandl/cassandra_models/dataset/row_spec.rb +0 -26
  38. data/spec/lib/quandl/cassandra_models/dataset/transform_spec.rb +0 -16
  39. data/spec/lib/quandl/cassandra_models/dataset/trim_spec.rb +0 -74
  40. data/spec/lib/quandl/cassandra_models/dataset/update_spec.rb +0 -37
  41. data/spec/lib/quandl/cassandra_models/dataset_attribute_spec.rb +0 -18
  42. data/spec/lib/quandl/cassandra_models/dataset_spec.rb +0 -111
  43. data/spec/lib/quandl/cassandra_models/multiset/collapse_spec.rb +0 -122
  44. data/spec/lib/quandl/cassandra_models/multiset/columns_spec.rb +0 -57
  45. data/spec/lib/quandl/cassandra_models/multiset/data_spec.rb +0 -25
  46. data/spec/lib/quandl/cassandra_models/multiset/transform_spec.rb +0 -69
@@ -1,20 +0,0 @@
1
- class Quandl::Cassandra::Column::Read::Row < Quandl::Cassandra::Column::Read
2
-
3
- def perform
4
- return unless attributes[:row].present?
5
- # ensure integer
6
- row = attributes[:row]
7
- # negative value needs inversion
8
- if row < 0
9
- attributes[:order] = :asc
10
- attributes[:offset] = (row * -1) - 1
11
- attributes[:limit] = 1
12
- # postive value is an offset of current observation
13
- else
14
- attributes[:order] = :desc
15
- attributes[:offset] = row
16
- attributes[:limit] = 1
17
- end
18
- end
19
-
20
- end
@@ -1,60 +0,0 @@
1
- class Quandl::Cassandra::Column::Read::SelectColumns < Quandl::Cassandra::Column::Read
2
-
3
- def perform
4
- # attrs to result hash
5
- attributes[:data] = count? ? count_data : select_data
6
- end
7
-
8
- def count_data
9
- prepared = Quandl::Cassandra::Base.prepare( statement )
10
- rows = []
11
- attributes[:column_ids].each_with_index do | id, index |
12
- # pluck column type from collapses
13
- type = attributes[:column_collapses][index].to_s
14
- # bind and execute query
15
- rows << prepared.execute( id, type, :one )
16
- end
17
- rows.collect{|r| r.first['count'] }.max
18
- end
19
-
20
- def select_data
21
- t1 = Time.now
22
- # fire off the queries
23
- prepared = Quandl::Cassandra::Base.prepare( statement )
24
- data = {}
25
- futures = []
26
- attributes[:column_ids].each_with_index do | id, index |
27
- # pluck column type from collapses
28
- type = attributes[:column_collapses][index].to_s
29
- # bind and execute query
30
- futures << prepared.async.execute( id, type, Quandl::Cassandra::Base.consistency )
31
- end
32
- # collect the results
33
- futures.each_with_index do |future, index|
34
- # collect result
35
- future.value.each do |row|
36
- data[row['time']] ||= Array.new( attributes[:column_ids].count )
37
- data[row['time']][index] ||= row['value']
38
- end
39
- end
40
- Quandl::Logger.debug("(#{t1.elapsed_ms}) #{self.class.name}.select_data")
41
- data
42
- end
43
-
44
- def statement
45
- columns = count? ? "COUNT(*)" : "time,value"
46
- cql = "SELECT #{columns} FROM columns WHERE"
47
- # cql += "ORDER"
48
- cql += " time >= #{attributes[:trim_start]} AND " if attributes[:trim_start]
49
- cql += " time <= #{attributes[:trim_end]} AND " if attributes[:trim_end]
50
- cql += " id = ? AND type = ?"
51
- cql += " ORDER BY type #{order}"
52
- cql += " LIMIT #{attributes[:limit]}" if attributes[:limit]
53
- cql
54
- end
55
-
56
- def order
57
- @order ||= attributes[:order] == :asc ? :asc : :desc
58
- end
59
-
60
- end
@@ -1,53 +0,0 @@
1
- class Quandl::Cassandra::Column::Read::Transform < Quandl::Cassandra::Column::Read
2
-
3
- def perform
4
- return unless attributes[:transform].present?
5
- # apply
6
- apply_rdiff
7
- apply_cumul
8
- end
9
-
10
- def apply_cumul
11
- # rdiff_from needs the data from the current to offset
12
- return unless transform?( :cumul ) && limit.present?
13
- # data table needs to handle the offset since cumul requires all data
14
- attributes[:data_table] = Quandl::Cassandra::Column::Read::Offset.call(attributes)
15
- # the query should not limit or offset the data
16
- attributes[:row] = nil
17
- attributes[:limit] = nil
18
- attributes[:offset] = nil
19
- end
20
-
21
- def apply_rdiff
22
- return unless transform? :rdiff, :diff
23
- # limit requires limit
24
- attributes[:limit] = attributes[:limit] + 1 if attributes[:limit]
25
- # trims should be increased by one
26
- attributes[:trim_start] = trim_start.occurrences_of_frequency_ago( 1, attributes[:collapse] ).jd if trim_start.present? && order == :desc
27
- attributes[:trim_end] = trim_end.occurrences_of_frequency_ahead( 1, attributes[:collapse] ).jd if trim_end.present? && order == :asc
28
- end
29
-
30
- def transform?(*keys)
31
- keys.each do |key|
32
- return true if key == attributes[:transform]
33
- end
34
- false
35
- end
36
-
37
- def trim_end
38
- @trim_end ||= attributes[:trim_end].present? ? Date.jd( attributes[:trim_end] ) : nil
39
- end
40
-
41
- def trim_start
42
- @trim_start ||= attributes[:trim_start].present? ? Date.jd( attributes[:trim_start] ) : nil
43
- end
44
-
45
- def limit
46
- attributes[:limit]
47
- end
48
-
49
- def order
50
- @order ||= attributes[:order] == :asc ? :asc : :desc
51
- end
52
-
53
- end
@@ -1,25 +0,0 @@
1
- class Quandl::Cassandra::Column::Read::Type < Quandl::Cassandra::Column::Read
2
-
3
- def perform
4
- # enforce types
5
- attributes.each do |key, value|
6
- attributes[key] = enforce_type(key, value)
7
- end
8
- # retain pristine copy of attrs
9
- attributes[:pristine] ||= attributes.clone
10
- end
11
-
12
- def enforce_type(key, value)
13
- case key
14
- when :limit, :column, :trim_start, :trim_end, :offset, :accuracy, :row
15
- return value.try(:to_i)
16
- when :collapse, :transform, :frequency
17
- return value.try(:to_sym)
18
- when :order
19
- return value.try(:to_sym) == :asc ? :asc : :desc
20
- else
21
- value
22
- end
23
- end
24
-
25
- end
@@ -1,22 +0,0 @@
1
- class Quandl::Cassandra::Column::Write < Quandl::Strategy::Strategize
2
-
3
- # strategy attributes
4
- define_attributes :id, :data, :frequency, :column_ids, :frequency_data, :frequency_column_data, :statement_values
5
-
6
- require_relative 'write/insert_columns'
7
- require_relative 'write/insert_column_attributes'
8
- require_relative 'write/group_data_by_column'
9
- require_relative 'write/group_data_by_frequency'
10
-
11
- # execute strategy
12
- def self.perform(attributes)
13
- strategy = Quandl::Strategy.new( attributes ) do |c|
14
- c.use Quandl::Cassandra::Column::Write::GroupDataByFrequency
15
- c.use Quandl::Cassandra::Column::Write::GroupDataByColumn
16
- c.use Quandl::Cassandra::Column::Write::InsertColumns
17
- c.use Quandl::Cassandra::Column::Write::InsertColumnAttributes
18
- end
19
- strategy.perform
20
- end
21
-
22
- end
@@ -1,37 +0,0 @@
1
- class Quandl::Cassandra::Column::Write::GroupDataByColumn < Quandl::Cassandra::Column::Write
2
-
3
- # INPUTS
4
- # { source: [ [1,2,3], [2,4,8], ... ], weekly: ... }
5
-
6
- # OUTPUTS
7
- # { source: { UUID: [[1,2], [2,4]], UUID: [[1,3],[2,8]] }}
8
-
9
- def perform
10
- group_by_statement_values
11
- end
12
-
13
- def group_by_statement_values
14
- # for each [ [date, val, val], ... ]
15
- self.statement_values = []
16
- frequency_data.each do |frequency, rows|
17
- frequency = frequency.to_s
18
- rows.each do |row|
19
- # extract date
20
- date = row[0]
21
- # for each [ val, val, ... ]
22
- row[1..-1].each_with_index do |value, index|
23
- # ensure array
24
- self.statement_values << [ column_id(index), frequency, date, value] unless value.blank?
25
- end
26
- end
27
- end
28
- end
29
-
30
- def column_id(index)
31
- # ensure column_ids is defined
32
- self.column_ids ||= Quandl::Cassandra::Dataset.find_column_ids_by_id(id)
33
- # ensure column_ids[index] is present
34
- self.column_ids[index] ||= Cql::Uuid.new(SecureRandom.uuid)
35
- end
36
-
37
- end
@@ -1,24 +0,0 @@
1
- class Quandl::Cassandra::Column::Write::GroupDataByFrequency < Quandl::Cassandra::Column::Write
2
-
3
- # INPUTS
4
- # [ [1,2,3], [2,4,8], ... ]
5
-
6
- # OUTPUTS
7
- # { source: [ [1,2,3], [2,4,8], ... ], weekly: ... }
8
-
9
- def perform
10
- self.frequency = data.frequency
11
- self.frequency_data = {}
12
- # clone source data
13
- self.data = data.clone
14
- self.frequency_data[:source] = data.data_array.clone
15
- # collapse and clone each frequency
16
- Quandl::Operation::Collapse.collapses_greater_than(data.frequency).each do |freq|
17
- # collapse the data to the required frequency
18
- data.collapse(freq)
19
- # clone the internal data array and add it to the grouping
20
- self.frequency_data[freq] = data.data_array.clone
21
- end
22
- end
23
-
24
- end
@@ -1,20 +0,0 @@
1
- class Quandl::Cassandra::Column::Write::InsertColumnAttributes < Quandl::Cassandra::Column::Write
2
-
3
- # INPUTS
4
- # { source: { UUID: [[1,2], [2,4]], UUID: [[1,3],[2,8]] }}
5
-
6
- def perform
7
- return if column_ids.blank?
8
- # format data for batch insertion
9
- rows_values = []
10
- column_ids.each_with_index do |column_id, position|
11
- rows_values << [id, column_id, position, frequency]
12
- end
13
- # insert data
14
- Quandl::Cassandra::Batch.insert(rows_values) do |id, column_id, position, frequency|
15
- %Q{INSERT INTO datasets (id, column_id, position) VALUES (#{id}, #{column_id}, #{position})
16
- INSERT INTO column_attributes ( id, frequency ) VALUES ( #{column_id}, '#{frequency}' )}
17
- end
18
- end
19
-
20
- end
@@ -1,9 +0,0 @@
1
- class Quandl::Cassandra::Column::Write::InsertColumns < Quandl::Cassandra::Column::Write
2
-
3
- def perform
4
- Quandl::Cassandra::Batch.insert(statement_values) do |id, type, time, value|
5
- "INSERT INTO columns (id, type, time, value) VALUES (#{id}, '#{type}', #{time}, #{value})"
6
- end
7
- end
8
-
9
- end
@@ -1,11 +0,0 @@
1
- class Quandl::Cassandra::ColumnAttribute < Quandl::Cassandra::Base
2
-
3
- table_name :column_attributes
4
-
5
- after_initialize :default_attributes
6
-
7
- def default_attributes
8
- self.id = SecureRandom.uuid
9
- end
10
-
11
- end
@@ -1,18 +0,0 @@
1
- class Quandl::Cassandra::Data < Quandl::Data
2
-
3
- require_relative 'data/search'
4
-
5
- include Quandl::Cassandra::Data::Search
6
-
7
- attr_accessor :dataset_id, :column_ids, :column_frequencies
8
-
9
- # start a new scope from this data
10
- def scoped
11
- s = self.class.scope.new
12
- s.id(dataset_id)
13
- s.column_ids(column_ids) if column_ids
14
- s.column_frequencies(column_frequencies) if column_frequencies
15
- s
16
- end
17
-
18
- end
@@ -1,104 +0,0 @@
1
- module Quandl::Cassandra::Data::Search
2
-
3
- extend ActiveSupport::Concern
4
-
5
- included do
6
-
7
- include ScopeComposer::Model
8
-
9
- def self.scope_names
10
- scope.scope_names
11
- end
12
-
13
- has_scope_composer
14
-
15
- delegate :where, to: :scope
16
-
17
- scope :dataset, ->(d){
18
- id(d.id)
19
- column_ids(d.column_ids)
20
- }
21
-
22
- scope :row, :id, :limit, :offset, :column, :accuracy, :frequency, :count, :delete
23
-
24
- scope :column_frequencies, ->(*freqs){ where( column_frequencies: Array(freqs).flatten ) }
25
- scope :column_ids, ->(*ids){ cids = Array(ids).flatten.compact; where( column_ids: cids ) if cids.present? }
26
-
27
- scope :collapse, ->(v){ where( collapse: v.to_sym ) if Quandl::Operation::Collapse.valid_collapse?(v) }
28
- scope :transform, ->(v){ where( transform: v.to_sym ) if Quandl::Operation::Transform.valid_transformation?(v) }
29
-
30
- scope :order, ->(v){
31
- order = ( v.to_sym == :asc ) ? :asc : :desc
32
- where( order: order )
33
- }
34
-
35
- scope :trim_start, ->(date){ date = parse_date(date); where( trim_start: date ) if date }
36
- scope :trim_end, ->(date){ date = parse_date(date); where( trim_end: date ) if date }
37
-
38
- scope_helper :find, ->(id){ id(id).to_table }
39
- scope_helper :to_table, ->{ all }
40
-
41
- scope_helper :parse_date, ->( value ){
42
- begin
43
- date = Date.jd(value.to_i) if value.kind_of?(String) && value.numeric?
44
- date = Date.jd(value) if value.is_a?(Integer)
45
- date = Date.parse(value) if value.is_a?(String) && value =~ /^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/
46
- date.jd
47
- rescue
48
- nil
49
- end
50
- }
51
-
52
- scope.class_eval do
53
-
54
- delegate :to_a, :==, :inspect, :first, :flatten, :[], :collect, :<=>, :each, :each_with_index, :to_date, :to_h, to: :all, allow_nil: true
55
-
56
- def delete_all(*args)
57
- if attributes[:column_ids].present?
58
- result = Quandl::Cassandra::Column.where( id: attributes[:column_ids] ).delete_all
59
- return result.nil? ? true : result
60
- end
61
- false
62
- end
63
-
64
- def count(*args)
65
- attributes[:count] = true
66
- result = dataset? ? fetch.to_i : 0
67
- attributes[:count] = false
68
- result
69
- end
70
-
71
- def all
72
- @all ||= fetch
73
- end
74
-
75
- def dataset?
76
- self.id.present? || attributes[:column_ids].present?
77
- end
78
-
79
- def fetched?
80
- @all.present?
81
- end
82
-
83
- def scoped
84
- s = self.class.new
85
- s.id( self.id ) if id.present?
86
- s.column_ids( attributes[:column_ids] ) if attributes[:column_ids].present?
87
- s.column_frequencies(attributes[:column_frequencies]) if attributes[:column_frequencies].present?
88
- s
89
- end
90
-
91
- protected
92
-
93
- def fetch
94
- # without an id or columns there's nothing to be read
95
- return Quandl::Cassandra::Data.new unless dataset?
96
- # otherwise read the data
97
- Quandl::Cassandra::Column.read( attributes.merge(scope_attributes) )
98
- end
99
-
100
-
101
- end
102
-
103
- end
104
- end
@@ -1,72 +0,0 @@
1
- class Quandl::Cassandra::Dataset < Quandl::Cassandra::Base
2
-
3
- require_relative 'dataset/columns'
4
-
5
- include Quandl::Cassandra::Dataset::Columns
6
-
7
- table_name :datasets
8
- autosave_changes false
9
-
10
- define_attributes :id, :data
11
-
12
- before_save :save_data, :save_dataset_attribute
13
- after_save :clear_attributes!
14
-
15
- delegate :type, :updated_at, :created_at, :frequency, to: :dataset_attribute, allow_nil: true
16
-
17
- def trim_start
18
- @trim_start ||= Date.jd( data.scoped.limit(1).order(:asc)[0][0] )
19
- rescue
20
- nil
21
- end
22
-
23
- def trim_end
24
- @trim_end ||= Date.jd( data.scoped.limit(1).order(:desc)[0][0] )
25
- rescue
26
- nil
27
- end
28
-
29
- def data
30
- # data set?
31
- return read_attribute(:data) if data?
32
- # read data
33
- @attributes[:data] ||= data_scope
34
- end
35
-
36
- def data=(rows)
37
- rows = Quandl::Data.new(rows) unless rows.is_a?(Quandl::Data)
38
- data_will_change!
39
- @attributes[:data] = rows
40
- end
41
-
42
- def data_scope
43
- Quandl::Cassandra::Data.dataset(self)
44
- end
45
-
46
- def dataset_attribute
47
- @dataset_attribute ||= Quandl::Cassandra::DatasetAttribute.find_or_build(id)
48
- end
49
-
50
- def reload
51
- clear_attributes!
52
- end
53
-
54
- protected
55
-
56
- def save_dataset_attribute
57
- dataset_attribute.frequency = data.frequency.to_s
58
- dataset_attribute.save
59
- end
60
-
61
- def save_data
62
- Quandl::Cassandra::Column.write( id: id, data: data ) if data_changed?
63
- end
64
-
65
- def clear_attributes!
66
- super if defined?(super)
67
- @trim_start = nil
68
- @trim_end = nil
69
- @attributes = { id: id }
70
- end
71
-
72
- end