quandl_cassandra 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. data/.gitignore +7 -0
  2. data/Gemfile +2 -0
  3. data/LICENSE +7 -0
  4. data/README.md +7 -0
  5. data/Rakefile +11 -0
  6. data/UPGRADE.md +3 -0
  7. data/lib/quandl/cassandra/base/attributes.rb +103 -0
  8. data/lib/quandl/cassandra/base/callbacks.rb +15 -0
  9. data/lib/quandl/cassandra/base/connection.rb +49 -0
  10. data/lib/quandl/cassandra/base/logging.rb +40 -0
  11. data/lib/quandl/cassandra/base/naming.rb +19 -0
  12. data/lib/quandl/cassandra/base/persistence.rb +67 -0
  13. data/lib/quandl/cassandra/base/sanitization.rb +38 -0
  14. data/lib/quandl/cassandra/base/schema.rb +79 -0
  15. data/lib/quandl/cassandra/base/scoping.rb +122 -0
  16. data/lib/quandl/cassandra/base.rb +51 -0
  17. data/lib/quandl/cassandra/configuration.rb +34 -0
  18. data/lib/quandl/cassandra/error.rb +10 -0
  19. data/lib/quandl/cassandra/types/abstract_type.rb +33 -0
  20. data/lib/quandl/cassandra/types/boolean_type.rb +10 -0
  21. data/lib/quandl/cassandra/types/decimal_type.rb +9 -0
  22. data/lib/quandl/cassandra/types/double_type.rb +9 -0
  23. data/lib/quandl/cassandra/types/float_type.rb +9 -0
  24. data/lib/quandl/cassandra/types/integer_type.rb +9 -0
  25. data/lib/quandl/cassandra/types/long_type.rb +9 -0
  26. data/lib/quandl/cassandra/types/timestamp_type.rb +15 -0
  27. data/lib/quandl/cassandra/types/utf8_type.rb +13 -0
  28. data/lib/quandl/cassandra/types/uuid_type.rb +21 -0
  29. data/lib/quandl/cassandra/types.rb +42 -0
  30. data/lib/quandl/cassandra/version.rb +5 -0
  31. data/lib/quandl/cassandra.rb +30 -0
  32. data/lib/quandl/cassandra_models/column/read/collapse.rb +64 -0
  33. data/lib/quandl/cassandra_models/column/read/column.rb +18 -0
  34. data/lib/quandl/cassandra_models/column/read/data_table.rb +57 -0
  35. data/lib/quandl/cassandra_models/column/read/offset.rb +114 -0
  36. data/lib/quandl/cassandra_models/column/read/query.rb +55 -0
  37. data/lib/quandl/cassandra_models/column/read/row.rb +20 -0
  38. data/lib/quandl/cassandra_models/column/read/transform.rb +53 -0
  39. data/lib/quandl/cassandra_models/column/read/type.rb +25 -0
  40. data/lib/quandl/cassandra_models/column/read.rb +28 -0
  41. data/lib/quandl/cassandra_models/column/write/group_data_by_column.rb +42 -0
  42. data/lib/quandl/cassandra_models/column/write/group_data_by_frequency.rb +24 -0
  43. data/lib/quandl/cassandra_models/column/write/insert_columns.rb +22 -0
  44. data/lib/quandl/cassandra_models/column/write/insert_data.rb +39 -0
  45. data/lib/quandl/cassandra_models/column/write.rb +22 -0
  46. data/lib/quandl/cassandra_models/column.rb +20 -0
  47. data/lib/quandl/cassandra_models/column_attribute.rb +11 -0
  48. data/lib/quandl/cassandra_models/data.rb +52 -0
  49. data/lib/quandl/cassandra_models/dataset.rb +83 -0
  50. data/lib/quandl/cassandra_models/dataset_attribute.rb +6 -0
  51. data/lib/quandl/cassandra_models/multiset.rb +50 -0
  52. data/lib/quandl/strategy.rb +59 -0
  53. data/quandl_cassandra.gemspec +35 -0
  54. data/spec/expectations/string.rb +5 -0
  55. data/spec/expectations/time.rb +5 -0
  56. data/spec/factories/dataset.rb +8 -0
  57. data/spec/lib/quandl/cassandra/base/scoping_spec.rb +40 -0
  58. data/spec/lib/quandl/cassandra_models/column/write/group_data_by_frequency_spec.rb +28 -0
  59. data/spec/lib/quandl/cassandra_models/column/write_spec.rb +15 -0
  60. data/spec/lib/quandl/cassandra_models/column_attribute_spec.rb +16 -0
  61. data/spec/lib/quandl/cassandra_models/column_spec.rb +17 -0
  62. data/spec/lib/quandl/cassandra_models/data_spec.rb +34 -0
  63. data/spec/lib/quandl/cassandra_models/dataset/collapse_spec.rb +41 -0
  64. data/spec/lib/quandl/cassandra_models/dataset/column_spec.rb +25 -0
  65. data/spec/lib/quandl/cassandra_models/dataset/persistence_spec.rb +24 -0
  66. data/spec/lib/quandl/cassandra_models/dataset/row_spec.rb +26 -0
  67. data/spec/lib/quandl/cassandra_models/dataset/transform_spec.rb +16 -0
  68. data/spec/lib/quandl/cassandra_models/dataset/trim_spec.rb +74 -0
  69. data/spec/lib/quandl/cassandra_models/dataset/update_spec.rb +37 -0
  70. data/spec/lib/quandl/cassandra_models/dataset_attribute_spec.rb +18 -0
  71. data/spec/lib/quandl/cassandra_models/dataset_spec.rb +63 -0
  72. data/spec/lib/quandl/cassandra_models/multiset/collapse_spec.rb +122 -0
  73. data/spec/lib/quandl/cassandra_models/multiset/columns_spec.rb +57 -0
  74. data/spec/lib/quandl/cassandra_models/multiset/data_spec.rb +25 -0
  75. data/spec/lib/quandl/cassandra_models/multiset/transform_spec.rb +68 -0
  76. data/spec/lib/quandl/cassandra_spec.rb +12 -0
  77. data/spec/spec_helper.rb +37 -0
  78. metadata +339 -0
@@ -0,0 +1,42 @@
1
+ class Quandl::Cassandra::Column::Write::GroupDataByColumn < Quandl::Cassandra::Column::Write
2
+
3
+ # INPUTS
4
+ # { source: [ [1,2,3], [2,4,8], ... ], weekly: ... }
5
+
6
+ # OUTPUTS
7
+ # { source: { UUID: [[1,2], [2,4]], UUID: [[1,3],[2,8]] }}
8
+
9
+ def perform
10
+ self.frequency_column_data = {}
11
+ # for each { frequency: [ [12,3,4], ... ] }
12
+ frequency_data.each do |frequency, data|
13
+ # assign grouped data to frequency_column_data
14
+ self.frequency_column_data[frequency] = group_data_by_column(data)
15
+ end
16
+ end
17
+
18
+ def group_data_by_column(data)
19
+ column_data = {}
20
+ # for each [ [date, val, val], ... ]
21
+ data.each do |row|
22
+ # extract date
23
+ date = row[0]
24
+ # for each [ val, val, ... ]
25
+ row[1..-1].each_with_index do |value, index|
26
+ # ensure array
27
+ column_data[ column_id(index) ] ||= []
28
+ # group each each [date, value] by column_id, excluding nil
29
+ column_data[ column_id(index) ] << [date, value] unless value.blank?
30
+ end
31
+ end
32
+ column_data
33
+ end
34
+
35
+ def column_id(index)
36
+ # ensure column_ids is defined
37
+ self.column_ids ||= Quandl::Cassandra::Dataset.find_column_ids_by_id(id)
38
+ # ensure column_ids[index] is present
39
+ self.column_ids[index] ||= SecureRandom.uuid
40
+ end
41
+
42
+ end
@@ -0,0 +1,24 @@
1
+ class Quandl::Cassandra::Column::Write::GroupDataByFrequency < Quandl::Cassandra::Column::Write
2
+
3
+ # INPUTS
4
+ # [ [1,2,3], [2,4,8], ... ]
5
+
6
+ # OUTPUTS
7
+ # { source: [ [1,2,3], [2,4,8], ... ], weekly: ... }
8
+
9
+ def perform
10
+ self.frequency = data.frequency
11
+ self.frequency_data = {}
12
+ # clone source data
13
+ self.data = data.clone
14
+ self.frequency_data[:source] = data.data_array.clone
15
+ # collapse and clone each frequency
16
+ Quandl::Operation::Collapse.collapses_greater_than(data.frequency).each do |freq|
17
+ # collapse the data to the required frequency
18
+ data.collapse(freq)
19
+ # clone the internal data array and add it to the grouping
20
+ self.frequency_data[freq] = data.data_array.clone
21
+ end
22
+ end
23
+
24
+ end
@@ -0,0 +1,22 @@
1
+ class Quandl::Cassandra::Column::Write::InsertColumns < Quandl::Cassandra::Column::Write
2
+
3
+ # INPUTS
4
+ # { source: { UUID: [[1,2], [2,4]], UUID: [[1,3],[2,8]] }}
5
+
6
+ def perform
7
+ return if column_ids.blank?
8
+ column_ids.each_with_index{|column_id, position|
9
+ Quandl::Cassandra::Base.execute( datasets_statement( column_id, position ) )
10
+ Quandl::Cassandra::Base.execute( column_attributes_statement( column_id ) )
11
+ }
12
+ end
13
+
14
+ def datasets_statement( column_id, position )
15
+ "INSERT INTO datasets (id, column_id, position) VALUES (#{id}, #{column_id}, #{position})"
16
+ end
17
+
18
+ def column_attributes_statement( column_id )
19
+ "INSERT INTO column_attributes ( id, frequency ) VALUES ( #{column_id}, '#{frequency}' )"
20
+ end
21
+
22
+ end
@@ -0,0 +1,39 @@
1
+ class Quandl::Cassandra::Column::Write::InsertData < Quandl::Cassandra::Column::Write
2
+
3
+ def perform
4
+ insert_data_in_batches.collect(&:value)
5
+ end
6
+
7
+ def insert_data_in_batches
8
+ futures = []
9
+ statements = []
10
+ frequency_column_data.each do |frequency, column_data|
11
+ column_data.each do |column_id, rows|
12
+ rows.each do |time_value|
13
+ # collect statements
14
+ statements << statement( column_id, frequency, time_value[0], time_value[1] )
15
+ # after 30 statements are collected, execute a batch insert
16
+ if statements.count >= Quandl::Cassandra.configuration.batch_size
17
+ # collect the futures
18
+ futures << execute_async_batch(statements)
19
+ # clear statements
20
+ statements = []
21
+ end
22
+ end
23
+ end
24
+ end
25
+ # execute any remaining statements
26
+ futures << execute_async_batch(statements) if statements.count > 0
27
+ futures
28
+ end
29
+
30
+ def execute_async_batch(statements)
31
+ batch = %Q{BEGIN UNLOGGED BATCH\n#{statements.join("\n")}\nAPPLY BATCH;}
32
+ future = Quandl::Cassandra::Base.execute_async( batch )
33
+ end
34
+
35
+ def statement( id, type, time, value )
36
+ "INSERT INTO columns (id, type, time, value) VALUES (#{id}, '#{type}', #{time}, #{value})"
37
+ end
38
+
39
+ end
@@ -0,0 +1,22 @@
1
+ class Quandl::Cassandra::Column::Write < Quandl::Strategy::Strategize
2
+
3
+ # strategy attributes
4
+ define_attributes :id, :data, :frequency, :column_ids, :frequency_data, :frequency_column_data
5
+
6
+ require_relative 'write/insert_data'
7
+ require_relative 'write/insert_columns'
8
+ require_relative 'write/group_data_by_column'
9
+ require_relative 'write/group_data_by_frequency'
10
+
11
+ # execute strategy
12
+ def self.perform(attributes)
13
+ strategy = Quandl::Strategy.new( attributes ) do |c|
14
+ c.use Quandl::Cassandra::Column::Write::GroupDataByFrequency
15
+ c.use Quandl::Cassandra::Column::Write::GroupDataByColumn
16
+ c.use Quandl::Cassandra::Column::Write::InsertData
17
+ c.use Quandl::Cassandra::Column::Write::InsertColumns
18
+ end
19
+ strategy.perform
20
+ end
21
+
22
+ end
@@ -0,0 +1,20 @@
1
+ class Quandl::Cassandra::Column < Quandl::Cassandra::Base
2
+
3
+ table_name :columns
4
+
5
+ require_relative 'column/read'
6
+ require_relative 'column/write'
7
+
8
+ class << self
9
+
10
+ def read(*args)
11
+ Quandl::Cassandra::Column::Read.perform(*args)[:data]
12
+ end
13
+
14
+ def write(*args)
15
+ Quandl::Cassandra::Column::Write.perform(*args)
16
+ end
17
+
18
+ end
19
+
20
+ end
@@ -0,0 +1,11 @@
1
+ class Quandl::Cassandra::ColumnAttribute < Quandl::Cassandra::Base
2
+
3
+ table_name :column_attributes
4
+
5
+ after_initialize :default_attributes
6
+
7
+ def default_attributes
8
+ self.id = SecureRandom.uuid
9
+ end
10
+
11
+ end
@@ -0,0 +1,52 @@
1
+ class Quandl::Cassandra::Data
2
+
3
+ include ScopeComposer::Model
4
+
5
+ has_scope_composer
6
+
7
+ delegate :where, to: :scope
8
+
9
+ scope :dataset, ->(d){ id(d.id).column_ids(d.column_ids) }
10
+
11
+ scope :row, :id, :limit, :offset, :column, :accuracy, :frequency
12
+
13
+ scope :column_frequencies, ->(*freqs){ where( column_frequencies: Array(freqs).flatten ) }
14
+ scope :column_ids, ->(*ids){ where( column_ids: Array(ids).flatten ) }
15
+
16
+ scope :collapse, ->(v){ where( collapse: v.to_sym ) if Quandl::Operation::Collapse.valid_collapse?(v) }
17
+ scope :transform, ->(v){ where( transform: v.to_sym ) if Quandl::Operation::Transform.valid_transformation?(v) }
18
+
19
+ scope :order, ->(v){
20
+ order = ( v.to_sym == :asc ) ? :asc : :desc
21
+ where( order: order )
22
+ }
23
+
24
+ scope :trim_start, ->(date){ where( trim_start: parse_date(date).jd ) }
25
+ scope :trim_end, ->(date){ where( trim_end: parse_date(date).jd ) }
26
+
27
+ scope_helper :find, ->(id){ id(id).to_table }
28
+ scope_helper :to_table, ->{ all }
29
+
30
+ scope_helper :parse_date, ->( date ){
31
+ begin
32
+ date = Date.jd(date.to_i) if date.kind_of?(String) && date.numeric?
33
+ date = Date.jd(date) if date.is_a?(Integer)
34
+ date = Date.parse(date) if date.is_a?(String) && date =~ /^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/
35
+ date
36
+ rescue
37
+ nil
38
+ end
39
+ }
40
+
41
+ scope.class_eval do
42
+
43
+ delegate :inspect, :==, to: :all, allow_nil: true
44
+ Array.forwardable_methods.each{|mname| delegate(mname, to: :all, allow_nil: true ) unless self.respond_to?(mname) }
45
+
46
+ def all
47
+ @all ||= Quandl::Cassandra::Column.read( attributes.merge(scope_attributes) )
48
+ end
49
+
50
+ end
51
+
52
+ end
@@ -0,0 +1,83 @@
1
+ class Quandl::Cassandra::Dataset < Quandl::Cassandra::Base
2
+
3
+ table_name :datasets
4
+ autosave_changes false
5
+
6
+ define_attributes :id, :data, :column_ids
7
+
8
+ before_save :save_columns, :save_data, :save_dataset_attribute
9
+ after_save :clear_attributes
10
+
11
+ delegate :type, :updated_at, :created_at, :frequency, to: :dataset_attribute, allow_nil: true
12
+
13
+ def self.find_column_ids_by_id(id)
14
+ Dataset.where( id: id ).pluck(:column_id, :position).sort_by{|r| r[1] }.collect{|r| r[0] }
15
+ end
16
+
17
+ def column_attributes=(column_attrs)
18
+ column_attrs.each_with_index do |attrs, index|
19
+ self.columns[index] ||= Quandl::Cassandra::Column.new
20
+ self.columns[index].assign_attributes(attrs)
21
+ end
22
+ end
23
+
24
+ def columns
25
+ @columns ||= column_ids.collect{|cid| Quandl::Cassandra::ColumnAttribute.find(cid) }
26
+ end
27
+
28
+ def column_ids
29
+ @column_ids ||= self.class.find_column_ids_by_id(id)
30
+ end
31
+
32
+ def trim_start
33
+ @trim_start ||= data_scope.limit(1).order(:asc).try(:[], 0).try(:[], 0)
34
+ end
35
+
36
+ def trim_end
37
+ @trim_end ||= data_scope.limit(1).order(:desc).try(:[], 0).try(:[], 0)
38
+ end
39
+
40
+ def data
41
+ # data set?
42
+ return read_attribute(:data) if data?
43
+ # read data
44
+ @attributes[:data] ||= data_scope
45
+ end
46
+
47
+ def data=(rows)
48
+ rows = Quandl::Data.new(rows) unless rows.is_a?(Quandl::Data)
49
+ write_attribute(:data, rows)
50
+ end
51
+
52
+ def data_scope
53
+ Quandl::Cassandra::Data.dataset(self)
54
+ end
55
+
56
+ def dataset_attribute
57
+ @dataset_attribute ||= Quandl::Cassandra::DatasetAttribute.find_or_build(id)
58
+ end
59
+
60
+ protected
61
+
62
+ def save_dataset_attribute
63
+ dataset_attribute.frequency = data.frequency.to_s
64
+ dataset_attribute.save
65
+ end
66
+
67
+ def save_columns
68
+ columns.each(&:save)
69
+ end
70
+
71
+ def save_data
72
+ Quandl::Cassandra::Column.write( id: id, data: data ) if data_changed?
73
+ end
74
+
75
+ def clear_attributes
76
+ @trim_start = nil
77
+ @trim_end = nil
78
+ @columns = nil
79
+ @column_ids = nil
80
+ @attributes[:data] = nil
81
+ end
82
+
83
+ end
@@ -0,0 +1,6 @@
1
+ class Quandl::Cassandra::DatasetAttribute < Quandl::Cassandra::Base
2
+
3
+ table_name :dataset_attributes
4
+ define_attributes :id, :type, :updated_at, :created_at, :frequency
5
+
6
+ end
@@ -0,0 +1,50 @@
1
+ class Quandl::Cassandra::Multiset < Quandl::Cassandra::Dataset
2
+
3
+ table_name :datasets
4
+
5
+ define_attributes :datasets_columns
6
+
7
+ class << self
8
+
9
+ def with_columns(datasets_columns)
10
+ self.new( datasets_columns: datasets_columns )
11
+ end
12
+
13
+ end
14
+
15
+ def data_scope
16
+ Quandl::Cassandra::Data.collapse(:source).column_ids( column_ids ).column_frequencies( column_frequencies )
17
+ end
18
+
19
+ def column_frequencies
20
+ @column_frequencies ||= columns.collect(&:frequency)
21
+ end
22
+
23
+ def column_ids
24
+ @column_ids ||= column_ids_from_datasets_columns
25
+ end
26
+
27
+ def column_ids_from_datasets_columns
28
+ ids = []
29
+ datasets_columns.split(',').each do |dataset_column|
30
+ dataset_id, column = dataset_column.split('.')
31
+ datasets[dataset_id] ||= Quandl::Cassandra::Dataset.find(dataset_id).column_ids
32
+ ids << datasets[dataset_id][ column.to_i - 1 ]
33
+ end
34
+ ids
35
+ end
36
+
37
+ def datasets
38
+ @datasets ||= {}
39
+ end
40
+
41
+ def frequency
42
+ data.frequency
43
+ end
44
+
45
+ def dataset_attribute
46
+ # find or initialize dataset attribute object
47
+ @dataset_attribute ||= Quandl::Cassandra::DatasetAttribute.new
48
+ end
49
+
50
+ end
@@ -0,0 +1,59 @@
1
+ class Quandl::Strategy
2
+
3
+ attr_accessor :attributes
4
+
5
+ def initialize(*args, &block)
6
+ setup(*args, &block)
7
+ end
8
+
9
+ def setup(attributes, &block)
10
+ self.attributes = attributes
11
+ block.call(self) if block_given?
12
+ end
13
+
14
+ def perform
15
+ classes.each do |klass|
16
+ strategy = klass.new(attributes)
17
+ self.attributes = strategy.attributes
18
+ end
19
+ attributes
20
+ end
21
+
22
+ def use(klass)
23
+ self.classes << klass unless classes.include?(klass)
24
+ end
25
+
26
+ def classes
27
+ @classes ||= []
28
+ end
29
+
30
+ class Strategize
31
+
32
+ attr_accessor :attributes
33
+
34
+ def self.call(attrs)
35
+ self.new(attrs).attributes
36
+ end
37
+
38
+ def initialize(attrs)
39
+ attrs = attrs.clone if attrs.respond_to?(:clone)
40
+ self.attributes = attrs
41
+ self.perform if respond_to?(:perform)
42
+ end
43
+
44
+ def self.define_attributes(*names)
45
+ names.each do |name|
46
+ # getter
47
+ define_method(name) do
48
+ self.attributes[name.to_sym]
49
+ end
50
+ # setter
51
+ define_method("#{name}=") do |value|
52
+ self.attributes[name.to_sym] = value
53
+ end
54
+ end
55
+ end
56
+
57
+ end
58
+
59
+ end