quandl_cassandra 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. data/.gitignore +7 -0
  2. data/Gemfile +2 -0
  3. data/LICENSE +7 -0
  4. data/README.md +7 -0
  5. data/Rakefile +11 -0
  6. data/UPGRADE.md +3 -0
  7. data/lib/quandl/cassandra/base/attributes.rb +103 -0
  8. data/lib/quandl/cassandra/base/callbacks.rb +15 -0
  9. data/lib/quandl/cassandra/base/connection.rb +49 -0
  10. data/lib/quandl/cassandra/base/logging.rb +40 -0
  11. data/lib/quandl/cassandra/base/naming.rb +19 -0
  12. data/lib/quandl/cassandra/base/persistence.rb +67 -0
  13. data/lib/quandl/cassandra/base/sanitization.rb +38 -0
  14. data/lib/quandl/cassandra/base/schema.rb +79 -0
  15. data/lib/quandl/cassandra/base/scoping.rb +122 -0
  16. data/lib/quandl/cassandra/base.rb +51 -0
  17. data/lib/quandl/cassandra/configuration.rb +34 -0
  18. data/lib/quandl/cassandra/error.rb +10 -0
  19. data/lib/quandl/cassandra/types/abstract_type.rb +33 -0
  20. data/lib/quandl/cassandra/types/boolean_type.rb +10 -0
  21. data/lib/quandl/cassandra/types/decimal_type.rb +9 -0
  22. data/lib/quandl/cassandra/types/double_type.rb +9 -0
  23. data/lib/quandl/cassandra/types/float_type.rb +9 -0
  24. data/lib/quandl/cassandra/types/integer_type.rb +9 -0
  25. data/lib/quandl/cassandra/types/long_type.rb +9 -0
  26. data/lib/quandl/cassandra/types/timestamp_type.rb +15 -0
  27. data/lib/quandl/cassandra/types/utf8_type.rb +13 -0
  28. data/lib/quandl/cassandra/types/uuid_type.rb +21 -0
  29. data/lib/quandl/cassandra/types.rb +42 -0
  30. data/lib/quandl/cassandra/version.rb +5 -0
  31. data/lib/quandl/cassandra.rb +30 -0
  32. data/lib/quandl/cassandra_models/column/read/collapse.rb +64 -0
  33. data/lib/quandl/cassandra_models/column/read/column.rb +18 -0
  34. data/lib/quandl/cassandra_models/column/read/data_table.rb +57 -0
  35. data/lib/quandl/cassandra_models/column/read/offset.rb +114 -0
  36. data/lib/quandl/cassandra_models/column/read/query.rb +55 -0
  37. data/lib/quandl/cassandra_models/column/read/row.rb +20 -0
  38. data/lib/quandl/cassandra_models/column/read/transform.rb +53 -0
  39. data/lib/quandl/cassandra_models/column/read/type.rb +25 -0
  40. data/lib/quandl/cassandra_models/column/read.rb +28 -0
  41. data/lib/quandl/cassandra_models/column/write/group_data_by_column.rb +42 -0
  42. data/lib/quandl/cassandra_models/column/write/group_data_by_frequency.rb +24 -0
  43. data/lib/quandl/cassandra_models/column/write/insert_columns.rb +22 -0
  44. data/lib/quandl/cassandra_models/column/write/insert_data.rb +39 -0
  45. data/lib/quandl/cassandra_models/column/write.rb +22 -0
  46. data/lib/quandl/cassandra_models/column.rb +20 -0
  47. data/lib/quandl/cassandra_models/column_attribute.rb +11 -0
  48. data/lib/quandl/cassandra_models/data.rb +52 -0
  49. data/lib/quandl/cassandra_models/dataset.rb +83 -0
  50. data/lib/quandl/cassandra_models/dataset_attribute.rb +6 -0
  51. data/lib/quandl/cassandra_models/multiset.rb +50 -0
  52. data/lib/quandl/strategy.rb +59 -0
  53. data/quandl_cassandra.gemspec +35 -0
  54. data/spec/expectations/string.rb +5 -0
  55. data/spec/expectations/time.rb +5 -0
  56. data/spec/factories/dataset.rb +8 -0
  57. data/spec/lib/quandl/cassandra/base/scoping_spec.rb +40 -0
  58. data/spec/lib/quandl/cassandra_models/column/write/group_data_by_frequency_spec.rb +28 -0
  59. data/spec/lib/quandl/cassandra_models/column/write_spec.rb +15 -0
  60. data/spec/lib/quandl/cassandra_models/column_attribute_spec.rb +16 -0
  61. data/spec/lib/quandl/cassandra_models/column_spec.rb +17 -0
  62. data/spec/lib/quandl/cassandra_models/data_spec.rb +34 -0
  63. data/spec/lib/quandl/cassandra_models/dataset/collapse_spec.rb +41 -0
  64. data/spec/lib/quandl/cassandra_models/dataset/column_spec.rb +25 -0
  65. data/spec/lib/quandl/cassandra_models/dataset/persistence_spec.rb +24 -0
  66. data/spec/lib/quandl/cassandra_models/dataset/row_spec.rb +26 -0
  67. data/spec/lib/quandl/cassandra_models/dataset/transform_spec.rb +16 -0
  68. data/spec/lib/quandl/cassandra_models/dataset/trim_spec.rb +74 -0
  69. data/spec/lib/quandl/cassandra_models/dataset/update_spec.rb +37 -0
  70. data/spec/lib/quandl/cassandra_models/dataset_attribute_spec.rb +18 -0
  71. data/spec/lib/quandl/cassandra_models/dataset_spec.rb +63 -0
  72. data/spec/lib/quandl/cassandra_models/multiset/collapse_spec.rb +122 -0
  73. data/spec/lib/quandl/cassandra_models/multiset/columns_spec.rb +57 -0
  74. data/spec/lib/quandl/cassandra_models/multiset/data_spec.rb +25 -0
  75. data/spec/lib/quandl/cassandra_models/multiset/transform_spec.rb +68 -0
  76. data/spec/lib/quandl/cassandra_spec.rb +12 -0
  77. data/spec/spec_helper.rb +37 -0
  78. metadata +339 -0
@@ -0,0 +1,64 @@
1
+ class Quandl::Cassandra::Column::Read::Collapse < Quandl::Cassandra::Column::Read
2
+
3
+ def perform
4
+ # assign
5
+ attributes[:collapse] = collapse
6
+ attributes[:frequency] = frequency
7
+ attributes[:column_frequencies] = column_frequencies
8
+ attributes[:column_collapses] = column_collapses
9
+ end
10
+
11
+ def collapse
12
+ # collapse is the collapse
13
+ coll = attributes[:collapse]
14
+ # when pristine.frequency and collapse are equal, the source data is being requested
15
+ coll = :source if attributes[:pristine][:frequency] == attributes[:collapse]
16
+ # when the collapse is daily, source data is being requested
17
+ coll = :source if coll == :daily || coll.blank?
18
+ coll
19
+ end
20
+
21
+ def frequency
22
+ # frequency is the given frequency
23
+ freq = attributes[:frequency]
24
+ # grab from database
25
+ freq = DatasetAttribute.where(id: id).pluck(:frequency).first if freq.blank? and id.present?
26
+ # fallback to collapse
27
+ freq = attributes[:collapse] if freq.blank?
28
+ # if freq is source, daily is being requested
29
+ freq = :daily if freq == :source
30
+ freq
31
+ end
32
+
33
+ def column_frequencies
34
+ # default to passed value
35
+ freqs = attributes[:column_frequencies]
36
+ # fallback to collapse value
37
+ freqs = attributes[:column_ids].collect{ attributes[:frequency] } if freqs.blank?
38
+ freqs
39
+ end
40
+
41
+ # INPUT column_frequencies: [ :daily, :weekly, :monthly ], collapse: :monthly
42
+ # OUPUT [ :monthly, :monthly, :source ]
43
+
44
+ # INPUT column_frequencies: [ :daily, :weekly, :monthly ], collapse: :source
45
+ # OUPUT [ :source, :source, :source ]
46
+
47
+ # INPUT column_frequencies: [ :daily, :monthly, :annual ], collapse: :monthly
48
+ # OUPUT [ :monthly, :source, :source ]
49
+
50
+ def column_collapses
51
+ attributes[:column_frequencies].collect do |frequency|
52
+ # is frequency less than collapse?
53
+ if Quandl::Operation::Collapse.collapses_greater_than( frequency ).include?( attributes[:collapse] )
54
+ # this column needs to be collapsed
55
+ attributes[:collapse]
56
+ # otherwise frequency is greater than or equal to collapse
57
+ else
58
+ # so collapse is not needed
59
+ :source
60
+ end
61
+ end
62
+ end
63
+
64
+ end
@@ -0,0 +1,18 @@
1
+ class Quandl::Cassandra::Column::Read::Column < Quandl::Cassandra::Column::Read
2
+
3
+ def perform
4
+ # given an id, this is a dataset
5
+ attributes[:column_ids] ||= Quandl::Cassandra::Dataset.find(id).column_ids if id.present?
6
+ # given a column, this is requesting a specific column
7
+ attributes[:column_ids] = pluck_column_id if column.present?
8
+ end
9
+
10
+ def pluck_column_id
11
+ [ attributes[:column_ids][ column ] ]
12
+ end
13
+
14
+ def column
15
+ attributes[:column].present? ? attributes[:column] - 1 : nil
16
+ end
17
+
18
+ end
@@ -0,0 +1,57 @@
1
+ class Quandl::Cassandra::Column::Read::DataTable < Quandl::Cassandra::Column::Read
2
+
3
+ def perform
4
+ attributes[:data] = quandl_data( attributes[:data] )
5
+ end
6
+
7
+
8
+ def quandl_data(data)
9
+ # init
10
+ data = Quandl::Data.new( data )
11
+ # post process
12
+ data = collapse(data)
13
+ data = transform(data)
14
+ data = trim(data)
15
+ data = sort( data )
16
+ data = row( data )
17
+ data = limit(data)
18
+ data
19
+ end
20
+
21
+ def trim(data)
22
+ return data unless attributes[:data_table].present?
23
+ data = data.sort_order( :asc ) if attributes[:data_table][:trim_start] || attributes[:data_table][:trim_end]
24
+ data = data.trim_start( attributes[:data_table][:trim_start] ) if attributes[:data_table][:trim_start]
25
+ data = data.trim_end( attributes[:data_table][:trim_end] ) if attributes[:data_table][:trim_end]
26
+ data
27
+ end
28
+
29
+ def sort(data)
30
+ data = data.sort_order( attributes[:order] )
31
+ end
32
+
33
+ def collapse(data)
34
+ data = data.collapse( attributes[:collapse] ) if attributes[:collapse]
35
+ data
36
+ end
37
+
38
+ def transform(data)
39
+ data = data.transform( attributes[:transform] ) if attributes[:transform]
40
+ data
41
+ end
42
+
43
+ def limit(data)
44
+ data = data.limit( attributes[:pristine][:limit] ) if attributes[:pristine][:limit]
45
+ data
46
+ end
47
+
48
+ def row(data)
49
+ # if a row was requested and the data count is greater than a single row
50
+ if attributes[:row].present? && data.count > 1
51
+ # grab the last row
52
+ data.data_array = [data.data_array[-1]]
53
+ end
54
+ data
55
+ end
56
+
57
+ end
@@ -0,0 +1,114 @@
1
+ class Quandl::Cassandra::Column::Read::Offset < Quandl::Cassandra::Column::Read
2
+
3
+ def perform
4
+ # apply offset
5
+ apply_offset_with_transform
6
+ apply_offset
7
+ end
8
+
9
+ def apply_offset_with_transform
10
+ # rdiff_from needs the data from the current to offset
11
+ if attributes[:row].present? && attributes[:transform] == :rdiff_from
12
+ # limit the results by trim
13
+ attributes[:trim_start] = trim_start.occurrences_of_frequency_ago( attributes[:row], collapse_with_frequency ).end_of_frequency(collapse_with_frequency).jd if order == :desc
14
+ attributes[:trim_end] = trim_end.occurrences_of_frequency_ahead( attributes[:row], collapse_with_frequency ).end_of_frequency(collapse_with_frequency).jd if order == :asc
15
+ # the query should not limit or offset the data
16
+ attributes[:limit] = nil
17
+ attributes[:offset] = nil
18
+ end
19
+ end
20
+
21
+ def apply_offset
22
+ return unless attributes[:offset].present?
23
+ order == :asc ? apply_offset_asc : apply_offset_desc
24
+ end
25
+
26
+ def apply_offset_asc
27
+ # calculate ranges
28
+ offset_start = trim_end.occurrences_of_frequency_ahead( offset, collapse_with_frequency ).start_of_frequency(collapse_with_frequency)
29
+ offset_end = offset_start.occurrences_of_frequency_ahead( accuracy_with_limit, collapse_with_frequency ).end_of_frequency( collapse_with_frequency ) if limit
30
+ # set trims
31
+ attributes[:trim_start] = offset_start.jd
32
+ attributes[:trim_end] = offset_end.jd if limit
33
+ end
34
+
35
+ def apply_offset_desc
36
+ # calculate ranges
37
+ offset_start = trim_start.occurrences_of_frequency_ago( offset, collapse_with_frequency ).end_of_frequency(collapse_with_frequency)
38
+ offset_end = offset_start.occurrences_of_frequency_ago( accuracy_with_limit, collapse_with_frequency ).start_of_frequency( collapse_with_frequency ) if limit
39
+ # set trims
40
+ attributes[:trim_start] = offset_end.jd if limit
41
+ attributes[:trim_end] = offset_start.jd
42
+ end
43
+
44
+ def trim_start
45
+ return @trim_start if defined?(@trim_start)
46
+ times = column_ids.collect do |id|
47
+ Quandl::Cassandra::Column.limit(1).order("type DESC").where( id: id, type: collapse ).pluck(:time)
48
+ end
49
+ max_time = times.flatten.max
50
+ @trim_start = Date.jd( max_time )
51
+ end
52
+
53
+ def trim_end
54
+ return @trim_end if defined?(@trim_end)
55
+ times = column_ids.collect do |id|
56
+ Quandl::Cassandra::Column.limit(1).order("type ASC").where( id: id, type: collapse ).pluck(:time)
57
+ end
58
+ max_time = times.flatten.max
59
+ @trim_end = Date.jd( max_time )
60
+ end
61
+
62
+ def accuracy_with_limit
63
+ awl = accuracy
64
+ awl = awl + limit - 1 if limit && limit > 0
65
+ awl
66
+ end
67
+
68
+ def accuracy
69
+ # did the query include an accuracy?
70
+ accuracy = attributes[:accuracy]
71
+ # otherwise guess at the accuracy by collapse
72
+ if accuracy.blank?
73
+ accuracy = case frequency
74
+ when :daily then 0
75
+ when :weekly then 1
76
+ when :monthly then 1
77
+ when :quarterly then 1
78
+ when :annual then 0
79
+ else
80
+ 3
81
+ end
82
+ end
83
+ accuracy
84
+ end
85
+
86
+ def column_ids
87
+ attributes[:column_ids]
88
+ end
89
+
90
+ def limit
91
+ @limit ||= attributes[:limit]
92
+ end
93
+
94
+ def offset
95
+ attributes[:offset]
96
+ end
97
+
98
+ def order
99
+ @order ||= attributes[:order] == :asc ? :asc : :desc
100
+ end
101
+
102
+ def collapse_with_frequency
103
+ collapse == :source ? frequency : collapse
104
+ end
105
+
106
+ def frequency
107
+ attributes[:frequency]
108
+ end
109
+
110
+ def collapse
111
+ attributes[:collapse]
112
+ end
113
+
114
+ end
@@ -0,0 +1,55 @@
1
+ class Quandl::Cassandra::Column::Read::Query < Quandl::Cassandra::Column::Read
2
+
3
+ def perform
4
+ # attrs to result hash
5
+ attributes[:data] = select_data
6
+ end
7
+
8
+ def select_data
9
+ prepared = Quandl::Cassandra::Base.prepare( statement )
10
+ data = {}
11
+ attributes[:column_ids].each_with_index do | id, index |
12
+ # pluck column type from collapses
13
+ type = attributes[:column_collapses][index].to_s
14
+ # bind and execute query
15
+ rows = prepared.execute( id, type, :one )
16
+ # collect result
17
+ rows.each do |row|
18
+ data[row['time']] ||= Array.new( attributes[:column_ids].count )
19
+ data[row['time']][index] ||= row['value']
20
+ end
21
+ end
22
+ data.collect(&:flatten)
23
+ # # fire off the queries
24
+ # futures = []
25
+ # attrs[:column_ids].each_with_index do | id, index |
26
+ # # pluck column type from collapses
27
+ # type = attrs[:column_collapses][index].to_s
28
+ # # bind and execute query
29
+ # futures << Quandl::Cassandra::Base.connection.execute_async( statement.bind( id, type ) )
30
+ # end
31
+ # # collect the results
32
+ # futures.each_with_index do |future, column_index|
33
+ # t1 = Time.now
34
+ # rows = JavaDriver::ResultSet.new( future.get_uninterruptibly ).to_a
35
+ # rows.each do |row|
36
+ # data[row[0]] ||= Array.new(attrs[:column_ids].count)
37
+ # data[row[0]][column_index] ||= row[1]
38
+ # end
39
+ # JCQL::CommonLogger.info "#{cql} (#{attrs[:column_ids][column_index]}) (#{t1.elapsed.microseconds}ms)"
40
+ # end
41
+ # data
42
+ end
43
+
44
+ def statement
45
+ cql = "SELECT time,value FROM columns WHERE"
46
+ # cql += "ORDER"
47
+ cql += " time >= #{attributes[:trim_start]} AND " if attributes[:trim_start]
48
+ cql += " time <= #{attributes[:trim_end]} AND " if attributes[:trim_end]
49
+ cql += " id = ? AND type = ?"
50
+ cql += " ORDER BY type #{attributes[:order]}" if attributes[:order]
51
+ cql += " LIMIT #{attributes[:limit]}" if attributes[:limit]
52
+ cql
53
+ end
54
+
55
+ end
@@ -0,0 +1,20 @@
1
+ class Quandl::Cassandra::Column::Read::Row < Quandl::Cassandra::Column::Read
2
+
3
+ def perform
4
+ return unless attributes[:row].present?
5
+ # ensure integer
6
+ row = attributes[:row]
7
+ # negative value needs inversion
8
+ if row < 0
9
+ attributes[:order] = :asc
10
+ attributes[:offset] = (row * -1) - 1
11
+ attributes[:limit] = 1
12
+ # postive value is an offset of current observation
13
+ else
14
+ attributes[:order] = :desc
15
+ attributes[:offset] = row
16
+ attributes[:limit] = 1
17
+ end
18
+ end
19
+
20
+ end
@@ -0,0 +1,53 @@
1
+ class Quandl::Cassandra::Column::Read::Transform < Quandl::Cassandra::Column::Read
2
+
3
+ def perform
4
+ return unless attributes[:transform].present?
5
+ # apply
6
+ apply_rdiff
7
+ apply_cumul
8
+ end
9
+
10
+ def apply_cumul
11
+ # rdiff_from needs the data from the current to offset
12
+ return unless transform?( :cumul ) && limit.present?
13
+ # data table needs to handle the offset since cumul requires all data
14
+ attributes[:data_table] = Quandl::Cassandra::Column::Read::Offset.call(attributes)
15
+ # the query should not limit or offset the data
16
+ attributes[:row] = nil
17
+ attributes[:limit] = nil
18
+ attributes[:offset] = nil
19
+ end
20
+
21
+ def apply_rdiff
22
+ return unless transform? :rdiff, :diff
23
+ # limit requires limit
24
+ attributes[:limit] = attributes[:limit] + 1 if attributes[:limit]
25
+ # trims should be increased by one
26
+ attributes[:trim_start] = trim_start.occurrences_of_frequency_ago( 1, attributes[:collapse] ).jd if trim_start.present? && order == :desc
27
+ attributes[:trim_end] = trim_end.occurrences_of_frequency_ahead( 1, attributes[:collapse] ).jd if trim_end.present? && order == :asc
28
+ end
29
+
30
+ def transform?(*keys)
31
+ keys.each do |key|
32
+ return true if key == attributes[:transform]
33
+ end
34
+ false
35
+ end
36
+
37
+ def trim_end
38
+ @trim_end ||= attributes[:trim_end].present? ? Date.jd( attributes[:trim_end] ) : nil
39
+ end
40
+
41
+ def trim_start
42
+ @trim_start ||= attributes[:trim_start].present? ? Date.jd( attributes[:trim_start] ) : nil
43
+ end
44
+
45
+ def limit
46
+ attributes[:limit]
47
+ end
48
+
49
+ def order
50
+ @order ||= attributes[:order] == :asc ? :asc : :desc
51
+ end
52
+
53
+ end
@@ -0,0 +1,25 @@
1
+ class Quandl::Cassandra::Column::Read::Type < Quandl::Cassandra::Column::Read
2
+
3
+ def perform
4
+ # enforce types
5
+ attributes.each do |key, value|
6
+ attributes[key] = enforce_type(key, value)
7
+ end
8
+ # retain pristine copy of attrs
9
+ attributes[:pristine] ||= attributes.clone
10
+ end
11
+
12
+ def enforce_type(key, value)
13
+ case key
14
+ when :limit, :column, :trim_start, :trim_end, :offset, :accuracy, :row
15
+ return value.try(:to_i)
16
+ when :collapse, :transform, :frequency
17
+ return value.try(:to_sym)
18
+ when :order
19
+ return value.try(:to_sym) == :asc ? :asc : :desc
20
+ else
21
+ value
22
+ end
23
+ end
24
+
25
+ end
@@ -0,0 +1,28 @@
1
+ class Quandl::Cassandra::Column::Read < Quandl::Strategy::Strategize
2
+
3
+ require_relative 'read/collapse'
4
+ require_relative 'read/column'
5
+ require_relative 'read/data_table'
6
+ require_relative 'read/offset'
7
+ require_relative 'read/query'
8
+ require_relative 'read/row'
9
+ require_relative 'read/transform'
10
+ require_relative 'read/type'
11
+
12
+ define_attributes :id
13
+
14
+ def self.perform(attributes)
15
+ strategy = Quandl::Strategy.new( attributes ) do |c|
16
+ c.use Quandl::Cassandra::Column::Read::Type
17
+ c.use Quandl::Cassandra::Column::Read::Row
18
+ c.use Quandl::Cassandra::Column::Read::Column
19
+ c.use Quandl::Cassandra::Column::Read::Collapse
20
+ c.use Quandl::Cassandra::Column::Read::Transform
21
+ c.use Quandl::Cassandra::Column::Read::Offset
22
+ c.use Quandl::Cassandra::Column::Read::Query
23
+ c.use Quandl::Cassandra::Column::Read::DataTable
24
+ end
25
+ strategy.perform
26
+ end
27
+
28
+ end