quandl_cassandra 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. data/.gitignore +7 -0
  2. data/Gemfile +2 -0
  3. data/LICENSE +7 -0
  4. data/README.md +7 -0
  5. data/Rakefile +11 -0
  6. data/UPGRADE.md +3 -0
  7. data/lib/quandl/cassandra/base/attributes.rb +103 -0
  8. data/lib/quandl/cassandra/base/callbacks.rb +15 -0
  9. data/lib/quandl/cassandra/base/connection.rb +49 -0
  10. data/lib/quandl/cassandra/base/logging.rb +40 -0
  11. data/lib/quandl/cassandra/base/naming.rb +19 -0
  12. data/lib/quandl/cassandra/base/persistence.rb +67 -0
  13. data/lib/quandl/cassandra/base/sanitization.rb +38 -0
  14. data/lib/quandl/cassandra/base/schema.rb +79 -0
  15. data/lib/quandl/cassandra/base/scoping.rb +122 -0
  16. data/lib/quandl/cassandra/base.rb +51 -0
  17. data/lib/quandl/cassandra/configuration.rb +34 -0
  18. data/lib/quandl/cassandra/error.rb +10 -0
  19. data/lib/quandl/cassandra/types/abstract_type.rb +33 -0
  20. data/lib/quandl/cassandra/types/boolean_type.rb +10 -0
  21. data/lib/quandl/cassandra/types/decimal_type.rb +9 -0
  22. data/lib/quandl/cassandra/types/double_type.rb +9 -0
  23. data/lib/quandl/cassandra/types/float_type.rb +9 -0
  24. data/lib/quandl/cassandra/types/integer_type.rb +9 -0
  25. data/lib/quandl/cassandra/types/long_type.rb +9 -0
  26. data/lib/quandl/cassandra/types/timestamp_type.rb +15 -0
  27. data/lib/quandl/cassandra/types/utf8_type.rb +13 -0
  28. data/lib/quandl/cassandra/types/uuid_type.rb +21 -0
  29. data/lib/quandl/cassandra/types.rb +42 -0
  30. data/lib/quandl/cassandra/version.rb +5 -0
  31. data/lib/quandl/cassandra.rb +30 -0
  32. data/lib/quandl/cassandra_models/column/read/collapse.rb +64 -0
  33. data/lib/quandl/cassandra_models/column/read/column.rb +18 -0
  34. data/lib/quandl/cassandra_models/column/read/data_table.rb +57 -0
  35. data/lib/quandl/cassandra_models/column/read/offset.rb +114 -0
  36. data/lib/quandl/cassandra_models/column/read/query.rb +55 -0
  37. data/lib/quandl/cassandra_models/column/read/row.rb +20 -0
  38. data/lib/quandl/cassandra_models/column/read/transform.rb +53 -0
  39. data/lib/quandl/cassandra_models/column/read/type.rb +25 -0
  40. data/lib/quandl/cassandra_models/column/read.rb +28 -0
  41. data/lib/quandl/cassandra_models/column/write/group_data_by_column.rb +42 -0
  42. data/lib/quandl/cassandra_models/column/write/group_data_by_frequency.rb +24 -0
  43. data/lib/quandl/cassandra_models/column/write/insert_columns.rb +22 -0
  44. data/lib/quandl/cassandra_models/column/write/insert_data.rb +39 -0
  45. data/lib/quandl/cassandra_models/column/write.rb +22 -0
  46. data/lib/quandl/cassandra_models/column.rb +20 -0
  47. data/lib/quandl/cassandra_models/column_attribute.rb +11 -0
  48. data/lib/quandl/cassandra_models/data.rb +52 -0
  49. data/lib/quandl/cassandra_models/dataset.rb +83 -0
  50. data/lib/quandl/cassandra_models/dataset_attribute.rb +6 -0
  51. data/lib/quandl/cassandra_models/multiset.rb +50 -0
  52. data/lib/quandl/strategy.rb +59 -0
  53. data/quandl_cassandra.gemspec +35 -0
  54. data/spec/expectations/string.rb +5 -0
  55. data/spec/expectations/time.rb +5 -0
  56. data/spec/factories/dataset.rb +8 -0
  57. data/spec/lib/quandl/cassandra/base/scoping_spec.rb +40 -0
  58. data/spec/lib/quandl/cassandra_models/column/write/group_data_by_frequency_spec.rb +28 -0
  59. data/spec/lib/quandl/cassandra_models/column/write_spec.rb +15 -0
  60. data/spec/lib/quandl/cassandra_models/column_attribute_spec.rb +16 -0
  61. data/spec/lib/quandl/cassandra_models/column_spec.rb +17 -0
  62. data/spec/lib/quandl/cassandra_models/data_spec.rb +34 -0
  63. data/spec/lib/quandl/cassandra_models/dataset/collapse_spec.rb +41 -0
  64. data/spec/lib/quandl/cassandra_models/dataset/column_spec.rb +25 -0
  65. data/spec/lib/quandl/cassandra_models/dataset/persistence_spec.rb +24 -0
  66. data/spec/lib/quandl/cassandra_models/dataset/row_spec.rb +26 -0
  67. data/spec/lib/quandl/cassandra_models/dataset/transform_spec.rb +16 -0
  68. data/spec/lib/quandl/cassandra_models/dataset/trim_spec.rb +74 -0
  69. data/spec/lib/quandl/cassandra_models/dataset/update_spec.rb +37 -0
  70. data/spec/lib/quandl/cassandra_models/dataset_attribute_spec.rb +18 -0
  71. data/spec/lib/quandl/cassandra_models/dataset_spec.rb +63 -0
  72. data/spec/lib/quandl/cassandra_models/multiset/collapse_spec.rb +122 -0
  73. data/spec/lib/quandl/cassandra_models/multiset/columns_spec.rb +57 -0
  74. data/spec/lib/quandl/cassandra_models/multiset/data_spec.rb +25 -0
  75. data/spec/lib/quandl/cassandra_models/multiset/transform_spec.rb +68 -0
  76. data/spec/lib/quandl/cassandra_spec.rb +12 -0
  77. data/spec/spec_helper.rb +37 -0
  78. metadata +339 -0
@@ -0,0 +1,64 @@
1
+ class Quandl::Cassandra::Column::Read::Collapse < Quandl::Cassandra::Column::Read
2
+
3
+ def perform
4
+ # assign
5
+ attributes[:collapse] = collapse
6
+ attributes[:frequency] = frequency
7
+ attributes[:column_frequencies] = column_frequencies
8
+ attributes[:column_collapses] = column_collapses
9
+ end
10
+
11
+ def collapse
12
+ # collapse is the collapse
13
+ coll = attributes[:collapse]
14
+ # when pristine.frequency and collapse are equal, the source data is being requested
15
+ coll = :source if attributes[:pristine][:frequency] == attributes[:collapse]
16
+ # when the collapse is daily, source data is being requested
17
+ coll = :source if coll == :daily || coll.blank?
18
+ coll
19
+ end
20
+
21
+ def frequency
22
+ # frequency is the given frequency
23
+ freq = attributes[:frequency]
24
+ # grab from database
25
+ freq = DatasetAttribute.where(id: id).pluck(:frequency).first if freq.blank? and id.present?
26
+ # fallback to collapse
27
+ freq = attributes[:collapse] if freq.blank?
28
+ # if freq is source, daily is being requested
29
+ freq = :daily if freq == :source
30
+ freq
31
+ end
32
+
33
+ def column_frequencies
34
+ # default to passed value
35
+ freqs = attributes[:column_frequencies]
36
+ # fallback to collapse value
37
+ freqs = attributes[:column_ids].collect{ attributes[:frequency] } if freqs.blank?
38
+ freqs
39
+ end
40
+
41
+ # INPUT column_frequencies: [ :daily, :weekly, :monthly ], collapse: :monthly
42
+ # OUPUT [ :monthly, :monthly, :source ]
43
+
44
+ # INPUT column_frequencies: [ :daily, :weekly, :monthly ], collapse: :source
45
+ # OUPUT [ :source, :source, :source ]
46
+
47
+ # INPUT column_frequencies: [ :daily, :monthly, :annual ], collapse: :monthly
48
+ # OUPUT [ :monthly, :source, :source ]
49
+
50
+ def column_collapses
51
+ attributes[:column_frequencies].collect do |frequency|
52
+ # is frequency less than collapse?
53
+ if Quandl::Operation::Collapse.collapses_greater_than( frequency ).include?( attributes[:collapse] )
54
+ # this column needs to be collapsed
55
+ attributes[:collapse]
56
+ # otherwise frequency is greater than or equal to collapse
57
+ else
58
+ # so collapse is not needed
59
+ :source
60
+ end
61
+ end
62
+ end
63
+
64
+ end
@@ -0,0 +1,18 @@
1
+ class Quandl::Cassandra::Column::Read::Column < Quandl::Cassandra::Column::Read
2
+
3
+ def perform
4
+ # given an id, this is a dataset
5
+ attributes[:column_ids] ||= Quandl::Cassandra::Dataset.find(id).column_ids if id.present?
6
+ # given a column, this is requesting a specific column
7
+ attributes[:column_ids] = pluck_column_id if column.present?
8
+ end
9
+
10
+ def pluck_column_id
11
+ [ attributes[:column_ids][ column ] ]
12
+ end
13
+
14
+ def column
15
+ attributes[:column].present? ? attributes[:column] - 1 : nil
16
+ end
17
+
18
+ end
@@ -0,0 +1,57 @@
1
+ class Quandl::Cassandra::Column::Read::DataTable < Quandl::Cassandra::Column::Read
2
+
3
+ def perform
4
+ attributes[:data] = quandl_data( attributes[:data] )
5
+ end
6
+
7
+
8
+ def quandl_data(data)
9
+ # init
10
+ data = Quandl::Data.new( data )
11
+ # post process
12
+ data = collapse(data)
13
+ data = transform(data)
14
+ data = trim(data)
15
+ data = sort( data )
16
+ data = row( data )
17
+ data = limit(data)
18
+ data
19
+ end
20
+
21
+ def trim(data)
22
+ return data unless attributes[:data_table].present?
23
+ data = data.sort_order( :asc ) if attributes[:data_table][:trim_start] || attributes[:data_table][:trim_end]
24
+ data = data.trim_start( attributes[:data_table][:trim_start] ) if attributes[:data_table][:trim_start]
25
+ data = data.trim_end( attributes[:data_table][:trim_end] ) if attributes[:data_table][:trim_end]
26
+ data
27
+ end
28
+
29
+ def sort(data)
30
+ data = data.sort_order( attributes[:order] )
31
+ end
32
+
33
+ def collapse(data)
34
+ data = data.collapse( attributes[:collapse] ) if attributes[:collapse]
35
+ data
36
+ end
37
+
38
+ def transform(data)
39
+ data = data.transform( attributes[:transform] ) if attributes[:transform]
40
+ data
41
+ end
42
+
43
+ def limit(data)
44
+ data = data.limit( attributes[:pristine][:limit] ) if attributes[:pristine][:limit]
45
+ data
46
+ end
47
+
48
+ def row(data)
49
+ # if a row was requested and the data count is greater than a single row
50
+ if attributes[:row].present? && data.count > 1
51
+ # grab the last row
52
+ data.data_array = [data.data_array[-1]]
53
+ end
54
+ data
55
+ end
56
+
57
+ end
@@ -0,0 +1,114 @@
1
+ class Quandl::Cassandra::Column::Read::Offset < Quandl::Cassandra::Column::Read
2
+
3
+ def perform
4
+ # apply offset
5
+ apply_offset_with_transform
6
+ apply_offset
7
+ end
8
+
9
+ def apply_offset_with_transform
10
+ # rdiff_from needs the data from the current to offset
11
+ if attributes[:row].present? && attributes[:transform] == :rdiff_from
12
+ # limit the results by trim
13
+ attributes[:trim_start] = trim_start.occurrences_of_frequency_ago( attributes[:row], collapse_with_frequency ).end_of_frequency(collapse_with_frequency).jd if order == :desc
14
+ attributes[:trim_end] = trim_end.occurrences_of_frequency_ahead( attributes[:row], collapse_with_frequency ).end_of_frequency(collapse_with_frequency).jd if order == :asc
15
+ # the query should not limit or offset the data
16
+ attributes[:limit] = nil
17
+ attributes[:offset] = nil
18
+ end
19
+ end
20
+
21
+ def apply_offset
22
+ return unless attributes[:offset].present?
23
+ order == :asc ? apply_offset_asc : apply_offset_desc
24
+ end
25
+
26
+ def apply_offset_asc
27
+ # calculate ranges
28
+ offset_start = trim_end.occurrences_of_frequency_ahead( offset, collapse_with_frequency ).start_of_frequency(collapse_with_frequency)
29
+ offset_end = offset_start.occurrences_of_frequency_ahead( accuracy_with_limit, collapse_with_frequency ).end_of_frequency( collapse_with_frequency ) if limit
30
+ # set trims
31
+ attributes[:trim_start] = offset_start.jd
32
+ attributes[:trim_end] = offset_end.jd if limit
33
+ end
34
+
35
+ def apply_offset_desc
36
+ # calculate ranges
37
+ offset_start = trim_start.occurrences_of_frequency_ago( offset, collapse_with_frequency ).end_of_frequency(collapse_with_frequency)
38
+ offset_end = offset_start.occurrences_of_frequency_ago( accuracy_with_limit, collapse_with_frequency ).start_of_frequency( collapse_with_frequency ) if limit
39
+ # set trims
40
+ attributes[:trim_start] = offset_end.jd if limit
41
+ attributes[:trim_end] = offset_start.jd
42
+ end
43
+
44
+ def trim_start
45
+ return @trim_start if defined?(@trim_start)
46
+ times = column_ids.collect do |id|
47
+ Quandl::Cassandra::Column.limit(1).order("type DESC").where( id: id, type: collapse ).pluck(:time)
48
+ end
49
+ max_time = times.flatten.max
50
+ @trim_start = Date.jd( max_time )
51
+ end
52
+
53
+ def trim_end
54
+ return @trim_end if defined?(@trim_end)
55
+ times = column_ids.collect do |id|
56
+ Quandl::Cassandra::Column.limit(1).order("type ASC").where( id: id, type: collapse ).pluck(:time)
57
+ end
58
+ max_time = times.flatten.max
59
+ @trim_end = Date.jd( max_time )
60
+ end
61
+
62
+ def accuracy_with_limit
63
+ awl = accuracy
64
+ awl = awl + limit - 1 if limit && limit > 0
65
+ awl
66
+ end
67
+
68
+ def accuracy
69
+ # did the query include an accuracy?
70
+ accuracy = attributes[:accuracy]
71
+ # otherwise guess at the accuracy by collapse
72
+ if accuracy.blank?
73
+ accuracy = case frequency
74
+ when :daily then 0
75
+ when :weekly then 1
76
+ when :monthly then 1
77
+ when :quarterly then 1
78
+ when :annual then 0
79
+ else
80
+ 3
81
+ end
82
+ end
83
+ accuracy
84
+ end
85
+
86
+ def column_ids
87
+ attributes[:column_ids]
88
+ end
89
+
90
+ def limit
91
+ @limit ||= attributes[:limit]
92
+ end
93
+
94
+ def offset
95
+ attributes[:offset]
96
+ end
97
+
98
+ def order
99
+ @order ||= attributes[:order] == :asc ? :asc : :desc
100
+ end
101
+
102
+ def collapse_with_frequency
103
+ collapse == :source ? frequency : collapse
104
+ end
105
+
106
+ def frequency
107
+ attributes[:frequency]
108
+ end
109
+
110
+ def collapse
111
+ attributes[:collapse]
112
+ end
113
+
114
+ end
@@ -0,0 +1,55 @@
1
+ class Quandl::Cassandra::Column::Read::Query < Quandl::Cassandra::Column::Read
2
+
3
+ def perform
4
+ # attrs to result hash
5
+ attributes[:data] = select_data
6
+ end
7
+
8
+ def select_data
9
+ prepared = Quandl::Cassandra::Base.prepare( statement )
10
+ data = {}
11
+ attributes[:column_ids].each_with_index do | id, index |
12
+ # pluck column type from collapses
13
+ type = attributes[:column_collapses][index].to_s
14
+ # bind and execute query
15
+ rows = prepared.execute( id, type, :one )
16
+ # collect result
17
+ rows.each do |row|
18
+ data[row['time']] ||= Array.new( attributes[:column_ids].count )
19
+ data[row['time']][index] ||= row['value']
20
+ end
21
+ end
22
+ data.collect(&:flatten)
23
+ # # fire off the queries
24
+ # futures = []
25
+ # attrs[:column_ids].each_with_index do | id, index |
26
+ # # pluck column type from collapses
27
+ # type = attrs[:column_collapses][index].to_s
28
+ # # bind and execute query
29
+ # futures << Quandl::Cassandra::Base.connection.execute_async( statement.bind( id, type ) )
30
+ # end
31
+ # # collect the results
32
+ # futures.each_with_index do |future, column_index|
33
+ # t1 = Time.now
34
+ # rows = JavaDriver::ResultSet.new( future.get_uninterruptibly ).to_a
35
+ # rows.each do |row|
36
+ # data[row[0]] ||= Array.new(attrs[:column_ids].count)
37
+ # data[row[0]][column_index] ||= row[1]
38
+ # end
39
+ # JCQL::CommonLogger.info "#{cql} (#{attrs[:column_ids][column_index]}) (#{t1.elapsed.microseconds}ms)"
40
+ # end
41
+ # data
42
+ end
43
+
44
+ def statement
45
+ cql = "SELECT time,value FROM columns WHERE"
46
+ # cql += "ORDER"
47
+ cql += " time >= #{attributes[:trim_start]} AND " if attributes[:trim_start]
48
+ cql += " time <= #{attributes[:trim_end]} AND " if attributes[:trim_end]
49
+ cql += " id = ? AND type = ?"
50
+ cql += " ORDER BY type #{attributes[:order]}" if attributes[:order]
51
+ cql += " LIMIT #{attributes[:limit]}" if attributes[:limit]
52
+ cql
53
+ end
54
+
55
+ end
@@ -0,0 +1,20 @@
1
+ class Quandl::Cassandra::Column::Read::Row < Quandl::Cassandra::Column::Read
2
+
3
+ def perform
4
+ return unless attributes[:row].present?
5
+ # ensure integer
6
+ row = attributes[:row]
7
+ # negative value needs inversion
8
+ if row < 0
9
+ attributes[:order] = :asc
10
+ attributes[:offset] = (row * -1) - 1
11
+ attributes[:limit] = 1
12
+ # postive value is an offset of current observation
13
+ else
14
+ attributes[:order] = :desc
15
+ attributes[:offset] = row
16
+ attributes[:limit] = 1
17
+ end
18
+ end
19
+
20
+ end
@@ -0,0 +1,53 @@
1
+ class Quandl::Cassandra::Column::Read::Transform < Quandl::Cassandra::Column::Read
2
+
3
+ def perform
4
+ return unless attributes[:transform].present?
5
+ # apply
6
+ apply_rdiff
7
+ apply_cumul
8
+ end
9
+
10
+ def apply_cumul
11
+ # rdiff_from needs the data from the current to offset
12
+ return unless transform?( :cumul ) && limit.present?
13
+ # data table needs to handle the offset since cumul requires all data
14
+ attributes[:data_table] = Quandl::Cassandra::Column::Read::Offset.call(attributes)
15
+ # the query should not limit or offset the data
16
+ attributes[:row] = nil
17
+ attributes[:limit] = nil
18
+ attributes[:offset] = nil
19
+ end
20
+
21
+ def apply_rdiff
22
+ return unless transform? :rdiff, :diff
23
+ # limit requires limit
24
+ attributes[:limit] = attributes[:limit] + 1 if attributes[:limit]
25
+ # trims should be increased by one
26
+ attributes[:trim_start] = trim_start.occurrences_of_frequency_ago( 1, attributes[:collapse] ).jd if trim_start.present? && order == :desc
27
+ attributes[:trim_end] = trim_end.occurrences_of_frequency_ahead( 1, attributes[:collapse] ).jd if trim_end.present? && order == :asc
28
+ end
29
+
30
+ def transform?(*keys)
31
+ keys.each do |key|
32
+ return true if key == attributes[:transform]
33
+ end
34
+ false
35
+ end
36
+
37
+ def trim_end
38
+ @trim_end ||= attributes[:trim_end].present? ? Date.jd( attributes[:trim_end] ) : nil
39
+ end
40
+
41
+ def trim_start
42
+ @trim_start ||= attributes[:trim_start].present? ? Date.jd( attributes[:trim_start] ) : nil
43
+ end
44
+
45
+ def limit
46
+ attributes[:limit]
47
+ end
48
+
49
+ def order
50
+ @order ||= attributes[:order] == :asc ? :asc : :desc
51
+ end
52
+
53
+ end
@@ -0,0 +1,25 @@
1
+ class Quandl::Cassandra::Column::Read::Type < Quandl::Cassandra::Column::Read
2
+
3
+ def perform
4
+ # enforce types
5
+ attributes.each do |key, value|
6
+ attributes[key] = enforce_type(key, value)
7
+ end
8
+ # retain pristine copy of attrs
9
+ attributes[:pristine] ||= attributes.clone
10
+ end
11
+
12
+ def enforce_type(key, value)
13
+ case key
14
+ when :limit, :column, :trim_start, :trim_end, :offset, :accuracy, :row
15
+ return value.try(:to_i)
16
+ when :collapse, :transform, :frequency
17
+ return value.try(:to_sym)
18
+ when :order
19
+ return value.try(:to_sym) == :asc ? :asc : :desc
20
+ else
21
+ value
22
+ end
23
+ end
24
+
25
+ end
@@ -0,0 +1,28 @@
1
+ class Quandl::Cassandra::Column::Read < Quandl::Strategy::Strategize
2
+
3
+ require_relative 'read/collapse'
4
+ require_relative 'read/column'
5
+ require_relative 'read/data_table'
6
+ require_relative 'read/offset'
7
+ require_relative 'read/query'
8
+ require_relative 'read/row'
9
+ require_relative 'read/transform'
10
+ require_relative 'read/type'
11
+
12
+ define_attributes :id
13
+
14
+ def self.perform(attributes)
15
+ strategy = Quandl::Strategy.new( attributes ) do |c|
16
+ c.use Quandl::Cassandra::Column::Read::Type
17
+ c.use Quandl::Cassandra::Column::Read::Row
18
+ c.use Quandl::Cassandra::Column::Read::Column
19
+ c.use Quandl::Cassandra::Column::Read::Collapse
20
+ c.use Quandl::Cassandra::Column::Read::Transform
21
+ c.use Quandl::Cassandra::Column::Read::Offset
22
+ c.use Quandl::Cassandra::Column::Read::Query
23
+ c.use Quandl::Cassandra::Column::Read::DataTable
24
+ end
25
+ strategy.perform
26
+ end
27
+
28
+ end