quandl_cassandra 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. data/.gitignore +7 -0
  2. data/Gemfile +2 -0
  3. data/LICENSE +7 -0
  4. data/README.md +7 -0
  5. data/Rakefile +11 -0
  6. data/UPGRADE.md +3 -0
  7. data/lib/quandl/cassandra/base/attributes.rb +103 -0
  8. data/lib/quandl/cassandra/base/callbacks.rb +15 -0
  9. data/lib/quandl/cassandra/base/connection.rb +49 -0
  10. data/lib/quandl/cassandra/base/logging.rb +40 -0
  11. data/lib/quandl/cassandra/base/naming.rb +19 -0
  12. data/lib/quandl/cassandra/base/persistence.rb +67 -0
  13. data/lib/quandl/cassandra/base/sanitization.rb +38 -0
  14. data/lib/quandl/cassandra/base/schema.rb +79 -0
  15. data/lib/quandl/cassandra/base/scoping.rb +122 -0
  16. data/lib/quandl/cassandra/base.rb +51 -0
  17. data/lib/quandl/cassandra/configuration.rb +34 -0
  18. data/lib/quandl/cassandra/error.rb +10 -0
  19. data/lib/quandl/cassandra/types/abstract_type.rb +33 -0
  20. data/lib/quandl/cassandra/types/boolean_type.rb +10 -0
  21. data/lib/quandl/cassandra/types/decimal_type.rb +9 -0
  22. data/lib/quandl/cassandra/types/double_type.rb +9 -0
  23. data/lib/quandl/cassandra/types/float_type.rb +9 -0
  24. data/lib/quandl/cassandra/types/integer_type.rb +9 -0
  25. data/lib/quandl/cassandra/types/long_type.rb +9 -0
  26. data/lib/quandl/cassandra/types/timestamp_type.rb +15 -0
  27. data/lib/quandl/cassandra/types/utf8_type.rb +13 -0
  28. data/lib/quandl/cassandra/types/uuid_type.rb +21 -0
  29. data/lib/quandl/cassandra/types.rb +42 -0
  30. data/lib/quandl/cassandra/version.rb +5 -0
  31. data/lib/quandl/cassandra.rb +30 -0
  32. data/lib/quandl/cassandra_models/column/read/collapse.rb +64 -0
  33. data/lib/quandl/cassandra_models/column/read/column.rb +18 -0
  34. data/lib/quandl/cassandra_models/column/read/data_table.rb +57 -0
  35. data/lib/quandl/cassandra_models/column/read/offset.rb +114 -0
  36. data/lib/quandl/cassandra_models/column/read/query.rb +55 -0
  37. data/lib/quandl/cassandra_models/column/read/row.rb +20 -0
  38. data/lib/quandl/cassandra_models/column/read/transform.rb +53 -0
  39. data/lib/quandl/cassandra_models/column/read/type.rb +25 -0
  40. data/lib/quandl/cassandra_models/column/read.rb +28 -0
  41. data/lib/quandl/cassandra_models/column/write/group_data_by_column.rb +42 -0
  42. data/lib/quandl/cassandra_models/column/write/group_data_by_frequency.rb +24 -0
  43. data/lib/quandl/cassandra_models/column/write/insert_columns.rb +22 -0
  44. data/lib/quandl/cassandra_models/column/write/insert_data.rb +39 -0
  45. data/lib/quandl/cassandra_models/column/write.rb +22 -0
  46. data/lib/quandl/cassandra_models/column.rb +20 -0
  47. data/lib/quandl/cassandra_models/column_attribute.rb +11 -0
  48. data/lib/quandl/cassandra_models/data.rb +52 -0
  49. data/lib/quandl/cassandra_models/dataset.rb +83 -0
  50. data/lib/quandl/cassandra_models/dataset_attribute.rb +6 -0
  51. data/lib/quandl/cassandra_models/multiset.rb +50 -0
  52. data/lib/quandl/strategy.rb +59 -0
  53. data/quandl_cassandra.gemspec +35 -0
  54. data/spec/expectations/string.rb +5 -0
  55. data/spec/expectations/time.rb +5 -0
  56. data/spec/factories/dataset.rb +8 -0
  57. data/spec/lib/quandl/cassandra/base/scoping_spec.rb +40 -0
  58. data/spec/lib/quandl/cassandra_models/column/write/group_data_by_frequency_spec.rb +28 -0
  59. data/spec/lib/quandl/cassandra_models/column/write_spec.rb +15 -0
  60. data/spec/lib/quandl/cassandra_models/column_attribute_spec.rb +16 -0
  61. data/spec/lib/quandl/cassandra_models/column_spec.rb +17 -0
  62. data/spec/lib/quandl/cassandra_models/data_spec.rb +34 -0
  63. data/spec/lib/quandl/cassandra_models/dataset/collapse_spec.rb +41 -0
  64. data/spec/lib/quandl/cassandra_models/dataset/column_spec.rb +25 -0
  65. data/spec/lib/quandl/cassandra_models/dataset/persistence_spec.rb +24 -0
  66. data/spec/lib/quandl/cassandra_models/dataset/row_spec.rb +26 -0
  67. data/spec/lib/quandl/cassandra_models/dataset/transform_spec.rb +16 -0
  68. data/spec/lib/quandl/cassandra_models/dataset/trim_spec.rb +74 -0
  69. data/spec/lib/quandl/cassandra_models/dataset/update_spec.rb +37 -0
  70. data/spec/lib/quandl/cassandra_models/dataset_attribute_spec.rb +18 -0
  71. data/spec/lib/quandl/cassandra_models/dataset_spec.rb +63 -0
  72. data/spec/lib/quandl/cassandra_models/multiset/collapse_spec.rb +122 -0
  73. data/spec/lib/quandl/cassandra_models/multiset/columns_spec.rb +57 -0
  74. data/spec/lib/quandl/cassandra_models/multiset/data_spec.rb +25 -0
  75. data/spec/lib/quandl/cassandra_models/multiset/transform_spec.rb +68 -0
  76. data/spec/lib/quandl/cassandra_spec.rb +12 -0
  77. data/spec/spec_helper.rb +37 -0
  78. metadata +339 -0
@@ -0,0 +1,42 @@
1
+ class Quandl::Cassandra::Column::Write::GroupDataByColumn < Quandl::Cassandra::Column::Write
2
+
3
+ # INPUTS
4
+ # { source: [ [1,2,3], [2,4,8], ... ], weekly: ... }
5
+
6
+ # OUTPUTS
7
+ # { source: { UUID: [[1,2], [2,4]], UUID: [[1,3],[2,8]] }}
8
+
9
+ def perform
10
+ self.frequency_column_data = {}
11
+ # for each { frequency: [ [12,3,4], ... ] }
12
+ frequency_data.each do |frequency, data|
13
+ # assign grouped data to frequency_column_data
14
+ self.frequency_column_data[frequency] = group_data_by_column(data)
15
+ end
16
+ end
17
+
18
+ def group_data_by_column(data)
19
+ column_data = {}
20
+ # for each [ [date, val, val], ... ]
21
+ data.each do |row|
22
+ # extract date
23
+ date = row[0]
24
+ # for each [ val, val, ... ]
25
+ row[1..-1].each_with_index do |value, index|
26
+ # ensure array
27
+ column_data[ column_id(index) ] ||= []
28
+ # group each each [date, value] by column_id, excluding nil
29
+ column_data[ column_id(index) ] << [date, value] unless value.blank?
30
+ end
31
+ end
32
+ column_data
33
+ end
34
+
35
+ def column_id(index)
36
+ # ensure column_ids is defined
37
+ self.column_ids ||= Quandl::Cassandra::Dataset.find_column_ids_by_id(id)
38
+ # ensure column_ids[index] is present
39
+ self.column_ids[index] ||= SecureRandom.uuid
40
+ end
41
+
42
+ end
@@ -0,0 +1,24 @@
1
+ class Quandl::Cassandra::Column::Write::GroupDataByFrequency < Quandl::Cassandra::Column::Write
2
+
3
+ # INPUTS
4
+ # [ [1,2,3], [2,4,8], ... ]
5
+
6
+ # OUTPUTS
7
+ # { source: [ [1,2,3], [2,4,8], ... ], weekly: ... }
8
+
9
+ def perform
10
+ self.frequency = data.frequency
11
+ self.frequency_data = {}
12
+ # clone source data
13
+ self.data = data.clone
14
+ self.frequency_data[:source] = data.data_array.clone
15
+ # collapse and clone each frequency
16
+ Quandl::Operation::Collapse.collapses_greater_than(data.frequency).each do |freq|
17
+ # collapse the data to the required frequency
18
+ data.collapse(freq)
19
+ # clone the internal data array and add it to the grouping
20
+ self.frequency_data[freq] = data.data_array.clone
21
+ end
22
+ end
23
+
24
+ end
@@ -0,0 +1,22 @@
1
+ class Quandl::Cassandra::Column::Write::InsertColumns < Quandl::Cassandra::Column::Write
2
+
3
+ # INPUTS
4
+ # { source: { UUID: [[1,2], [2,4]], UUID: [[1,3],[2,8]] }}
5
+
6
+ def perform
7
+ return if column_ids.blank?
8
+ column_ids.each_with_index{|column_id, position|
9
+ Quandl::Cassandra::Base.execute( datasets_statement( column_id, position ) )
10
+ Quandl::Cassandra::Base.execute( column_attributes_statement( column_id ) )
11
+ }
12
+ end
13
+
14
+ def datasets_statement( column_id, position )
15
+ "INSERT INTO datasets (id, column_id, position) VALUES (#{id}, #{column_id}, #{position})"
16
+ end
17
+
18
+ def column_attributes_statement( column_id )
19
+ "INSERT INTO column_attributes ( id, frequency ) VALUES ( #{column_id}, '#{frequency}' )"
20
+ end
21
+
22
+ end
@@ -0,0 +1,39 @@
1
+ class Quandl::Cassandra::Column::Write::InsertData < Quandl::Cassandra::Column::Write
2
+
3
+ def perform
4
+ insert_data_in_batches.collect(&:value)
5
+ end
6
+
7
+ def insert_data_in_batches
8
+ futures = []
9
+ statements = []
10
+ frequency_column_data.each do |frequency, column_data|
11
+ column_data.each do |column_id, rows|
12
+ rows.each do |time_value|
13
+ # collect statements
14
+ statements << statement( column_id, frequency, time_value[0], time_value[1] )
15
+ # after 30 statements are collected, execute a batch insert
16
+ if statements.count >= Quandl::Cassandra.configuration.batch_size
17
+ # collect the futures
18
+ futures << execute_async_batch(statements)
19
+ # clear statements
20
+ statements = []
21
+ end
22
+ end
23
+ end
24
+ end
25
+ # execute any remaining statements
26
+ futures << execute_async_batch(statements) if statements.count > 0
27
+ futures
28
+ end
29
+
30
+ def execute_async_batch(statements)
31
+ batch = %Q{BEGIN UNLOGGED BATCH\n#{statements.join("\n")}\nAPPLY BATCH;}
32
+ future = Quandl::Cassandra::Base.execute_async( batch )
33
+ end
34
+
35
+ def statement( id, type, time, value )
36
+ "INSERT INTO columns (id, type, time, value) VALUES (#{id}, '#{type}', #{time}, #{value})"
37
+ end
38
+
39
+ end
@@ -0,0 +1,22 @@
1
+ class Quandl::Cassandra::Column::Write < Quandl::Strategy::Strategize
2
+
3
+ # strategy attributes
4
+ define_attributes :id, :data, :frequency, :column_ids, :frequency_data, :frequency_column_data
5
+
6
+ require_relative 'write/insert_data'
7
+ require_relative 'write/insert_columns'
8
+ require_relative 'write/group_data_by_column'
9
+ require_relative 'write/group_data_by_frequency'
10
+
11
+ # execute strategy
12
+ def self.perform(attributes)
13
+ strategy = Quandl::Strategy.new( attributes ) do |c|
14
+ c.use Quandl::Cassandra::Column::Write::GroupDataByFrequency
15
+ c.use Quandl::Cassandra::Column::Write::GroupDataByColumn
16
+ c.use Quandl::Cassandra::Column::Write::InsertData
17
+ c.use Quandl::Cassandra::Column::Write::InsertColumns
18
+ end
19
+ strategy.perform
20
+ end
21
+
22
+ end
@@ -0,0 +1,20 @@
1
+ class Quandl::Cassandra::Column < Quandl::Cassandra::Base
2
+
3
+ table_name :columns
4
+
5
+ require_relative 'column/read'
6
+ require_relative 'column/write'
7
+
8
+ class << self
9
+
10
+ def read(*args)
11
+ Quandl::Cassandra::Column::Read.perform(*args)[:data]
12
+ end
13
+
14
+ def write(*args)
15
+ Quandl::Cassandra::Column::Write.perform(*args)
16
+ end
17
+
18
+ end
19
+
20
+ end
@@ -0,0 +1,11 @@
1
+ class Quandl::Cassandra::ColumnAttribute < Quandl::Cassandra::Base
2
+
3
+ table_name :column_attributes
4
+
5
+ after_initialize :default_attributes
6
+
7
+ def default_attributes
8
+ self.id = SecureRandom.uuid
9
+ end
10
+
11
+ end
@@ -0,0 +1,52 @@
1
+ class Quandl::Cassandra::Data
2
+
3
+ include ScopeComposer::Model
4
+
5
+ has_scope_composer
6
+
7
+ delegate :where, to: :scope
8
+
9
+ scope :dataset, ->(d){ id(d.id).column_ids(d.column_ids) }
10
+
11
+ scope :row, :id, :limit, :offset, :column, :accuracy, :frequency
12
+
13
+ scope :column_frequencies, ->(*freqs){ where( column_frequencies: Array(freqs).flatten ) }
14
+ scope :column_ids, ->(*ids){ where( column_ids: Array(ids).flatten ) }
15
+
16
+ scope :collapse, ->(v){ where( collapse: v.to_sym ) if Quandl::Operation::Collapse.valid_collapse?(v) }
17
+ scope :transform, ->(v){ where( transform: v.to_sym ) if Quandl::Operation::Transform.valid_transformation?(v) }
18
+
19
+ scope :order, ->(v){
20
+ order = ( v.to_sym == :asc ) ? :asc : :desc
21
+ where( order: order )
22
+ }
23
+
24
+ scope :trim_start, ->(date){ where( trim_start: parse_date(date).jd ) }
25
+ scope :trim_end, ->(date){ where( trim_end: parse_date(date).jd ) }
26
+
27
+ scope_helper :find, ->(id){ id(id).to_table }
28
+ scope_helper :to_table, ->{ all }
29
+
30
+ scope_helper :parse_date, ->( date ){
31
+ begin
32
+ date = Date.jd(date.to_i) if date.kind_of?(String) && date.numeric?
33
+ date = Date.jd(date) if date.is_a?(Integer)
34
+ date = Date.parse(date) if date.is_a?(String) && date =~ /^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/
35
+ date
36
+ rescue
37
+ nil
38
+ end
39
+ }
40
+
41
+ scope.class_eval do
42
+
43
+ delegate :inspect, :==, to: :all, allow_nil: true
44
+ Array.forwardable_methods.each{|mname| delegate(mname, to: :all, allow_nil: true ) unless self.respond_to?(mname) }
45
+
46
+ def all
47
+ @all ||= Quandl::Cassandra::Column.read( attributes.merge(scope_attributes) )
48
+ end
49
+
50
+ end
51
+
52
+ end
@@ -0,0 +1,83 @@
1
+ class Quandl::Cassandra::Dataset < Quandl::Cassandra::Base
2
+
3
+ table_name :datasets
4
+ autosave_changes false
5
+
6
+ define_attributes :id, :data, :column_ids
7
+
8
+ before_save :save_columns, :save_data, :save_dataset_attribute
9
+ after_save :clear_attributes
10
+
11
+ delegate :type, :updated_at, :created_at, :frequency, to: :dataset_attribute, allow_nil: true
12
+
13
+ def self.find_column_ids_by_id(id)
14
+ Dataset.where( id: id ).pluck(:column_id, :position).sort_by{|r| r[1] }.collect{|r| r[0] }
15
+ end
16
+
17
+ def column_attributes=(column_attrs)
18
+ column_attrs.each_with_index do |attrs, index|
19
+ self.columns[index] ||= Quandl::Cassandra::Column.new
20
+ self.columns[index].assign_attributes(attrs)
21
+ end
22
+ end
23
+
24
+ def columns
25
+ @columns ||= column_ids.collect{|cid| Quandl::Cassandra::ColumnAttribute.find(cid) }
26
+ end
27
+
28
+ def column_ids
29
+ @column_ids ||= self.class.find_column_ids_by_id(id)
30
+ end
31
+
32
+ def trim_start
33
+ @trim_start ||= data_scope.limit(1).order(:asc).try(:[], 0).try(:[], 0)
34
+ end
35
+
36
+ def trim_end
37
+ @trim_end ||= data_scope.limit(1).order(:desc).try(:[], 0).try(:[], 0)
38
+ end
39
+
40
+ def data
41
+ # data set?
42
+ return read_attribute(:data) if data?
43
+ # read data
44
+ @attributes[:data] ||= data_scope
45
+ end
46
+
47
+ def data=(rows)
48
+ rows = Quandl::Data.new(rows) unless rows.is_a?(Quandl::Data)
49
+ write_attribute(:data, rows)
50
+ end
51
+
52
+ def data_scope
53
+ Quandl::Cassandra::Data.dataset(self)
54
+ end
55
+
56
+ def dataset_attribute
57
+ @dataset_attribute ||= Quandl::Cassandra::DatasetAttribute.find_or_build(id)
58
+ end
59
+
60
+ protected
61
+
62
+ def save_dataset_attribute
63
+ dataset_attribute.frequency = data.frequency.to_s
64
+ dataset_attribute.save
65
+ end
66
+
67
+ def save_columns
68
+ columns.each(&:save)
69
+ end
70
+
71
+ def save_data
72
+ Quandl::Cassandra::Column.write( id: id, data: data ) if data_changed?
73
+ end
74
+
75
+ def clear_attributes
76
+ @trim_start = nil
77
+ @trim_end = nil
78
+ @columns = nil
79
+ @column_ids = nil
80
+ @attributes[:data] = nil
81
+ end
82
+
83
+ end
@@ -0,0 +1,6 @@
1
+ class Quandl::Cassandra::DatasetAttribute < Quandl::Cassandra::Base
2
+
3
+ table_name :dataset_attributes
4
+ define_attributes :id, :type, :updated_at, :created_at, :frequency
5
+
6
+ end
@@ -0,0 +1,50 @@
1
+ class Quandl::Cassandra::Multiset < Quandl::Cassandra::Dataset
2
+
3
+ table_name :datasets
4
+
5
+ define_attributes :datasets_columns
6
+
7
+ class << self
8
+
9
+ def with_columns(datasets_columns)
10
+ self.new( datasets_columns: datasets_columns )
11
+ end
12
+
13
+ end
14
+
15
+ def data_scope
16
+ Quandl::Cassandra::Data.collapse(:source).column_ids( column_ids ).column_frequencies( column_frequencies )
17
+ end
18
+
19
+ def column_frequencies
20
+ @column_frequencies ||= columns.collect(&:frequency)
21
+ end
22
+
23
+ def column_ids
24
+ @column_ids ||= column_ids_from_datasets_columns
25
+ end
26
+
27
+ def column_ids_from_datasets_columns
28
+ ids = []
29
+ datasets_columns.split(',').each do |dataset_column|
30
+ dataset_id, column = dataset_column.split('.')
31
+ datasets[dataset_id] ||= Quandl::Cassandra::Dataset.find(dataset_id).column_ids
32
+ ids << datasets[dataset_id][ column.to_i - 1 ]
33
+ end
34
+ ids
35
+ end
36
+
37
+ def datasets
38
+ @datasets ||= {}
39
+ end
40
+
41
+ def frequency
42
+ data.frequency
43
+ end
44
+
45
+ def dataset_attribute
46
+ # find or initialize dataset attribute object
47
+ @dataset_attribute ||= Quandl::Cassandra::DatasetAttribute.new
48
+ end
49
+
50
+ end
@@ -0,0 +1,59 @@
1
+ class Quandl::Strategy
2
+
3
+ attr_accessor :attributes
4
+
5
+ def initialize(*args, &block)
6
+ setup(*args, &block)
7
+ end
8
+
9
+ def setup(attributes, &block)
10
+ self.attributes = attributes
11
+ block.call(self) if block_given?
12
+ end
13
+
14
+ def perform
15
+ classes.each do |klass|
16
+ strategy = klass.new(attributes)
17
+ self.attributes = strategy.attributes
18
+ end
19
+ attributes
20
+ end
21
+
22
+ def use(klass)
23
+ self.classes << klass unless classes.include?(klass)
24
+ end
25
+
26
+ def classes
27
+ @classes ||= []
28
+ end
29
+
30
+ class Strategize
31
+
32
+ attr_accessor :attributes
33
+
34
+ def self.call(attrs)
35
+ self.new(attrs).attributes
36
+ end
37
+
38
+ def initialize(attrs)
39
+ attrs = attrs.clone if attrs.respond_to?(:clone)
40
+ self.attributes = attrs
41
+ self.perform if respond_to?(:perform)
42
+ end
43
+
44
+ def self.define_attributes(*names)
45
+ names.each do |name|
46
+ # getter
47
+ define_method(name) do
48
+ self.attributes[name.to_sym]
49
+ end
50
+ # setter
51
+ define_method("#{name}=") do |value|
52
+ self.attributes[name.to_sym] = value
53
+ end
54
+ end
55
+ end
56
+
57
+ end
58
+
59
+ end