quandl_cassandra_models 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +7 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE +7 -0
  5. data/README.md +7 -0
  6. data/Rakefile +11 -0
  7. data/UPGRADE.md +34 -0
  8. data/config/cassandra.yml +41 -0
  9. data/lib/quandl/cassandra/models.rb +16 -0
  10. data/lib/quandl/cassandra/models/column.rb +42 -0
  11. data/lib/quandl/cassandra/models/column/read.rb +49 -0
  12. data/lib/quandl/cassandra/models/column/read/collapse.rb +41 -0
  13. data/lib/quandl/cassandra/models/column/read/column.rb +19 -0
  14. data/lib/quandl/cassandra/models/column/read/data.rb +59 -0
  15. data/lib/quandl/cassandra/models/column/read/offset.rb +104 -0
  16. data/lib/quandl/cassandra/models/column/read/row.rb +20 -0
  17. data/lib/quandl/cassandra/models/column/read/select_columns.rb +63 -0
  18. data/lib/quandl/cassandra/models/column/read/transform.rb +53 -0
  19. data/lib/quandl/cassandra/models/column/read/trim.rb +14 -0
  20. data/lib/quandl/cassandra/models/column/read/type.rb +25 -0
  21. data/lib/quandl/cassandra/models/column/write.rb +25 -0
  22. data/lib/quandl/cassandra/models/column/write/group_data_by_column.rb +36 -0
  23. data/lib/quandl/cassandra/models/column/write/group_data_by_frequency.rb +24 -0
  24. data/lib/quandl/cassandra/models/column/write/insert_column_attributes.rb +22 -0
  25. data/lib/quandl/cassandra/models/column/write/insert_columns.rb +9 -0
  26. data/lib/quandl/cassandra/models/column_attribute.rb +11 -0
  27. data/lib/quandl/cassandra/models/data.rb +18 -0
  28. data/lib/quandl/cassandra/models/data/search.rb +105 -0
  29. data/lib/quandl/cassandra/models/dataset.rb +87 -0
  30. data/lib/quandl/cassandra/models/dataset/columns.rb +63 -0
  31. data/lib/quandl/cassandra/models/dataset_attribute.rb +6 -0
  32. data/lib/quandl/cassandra/models/multiset.rb +55 -0
  33. data/lib/quandl/cassandra/models/version.rb +7 -0
  34. data/migrations/20131105204200_create_datasets.rb +18 -0
  35. data/migrations/20131105204201_create_columns.rb +18 -0
  36. data/migrations/20131105204202_create_dataset_attributes.rb +17 -0
  37. data/migrations/20131105204203_create_column_attributes.rb +17 -0
  38. data/quandl_cassandra_models.gemspec +28 -0
  39. data/spec/expectations/string.rb +5 -0
  40. data/spec/expectations/time.rb +5 -0
  41. data/spec/factories/dataset.rb +8 -0
  42. data/spec/lib/quandl/cassandra/models/column/read_spec.rb +27 -0
  43. data/spec/lib/quandl/cassandra/models/column/write/group_data_by_frequency_spec.rb +28 -0
  44. data/spec/lib/quandl/cassandra/models/column/write_spec.rb +23 -0
  45. data/spec/lib/quandl/cassandra/models/column_attribute_spec.rb +16 -0
  46. data/spec/lib/quandl/cassandra/models/column_spec.rb +17 -0
  47. data/spec/lib/quandl/cassandra/models/data_spec.rb +105 -0
  48. data/spec/lib/quandl/cassandra/models/dataset/collapse_spec.rb +44 -0
  49. data/spec/lib/quandl/cassandra/models/dataset/column_spec.rb +24 -0
  50. data/spec/lib/quandl/cassandra/models/dataset/persistence_spec.rb +25 -0
  51. data/spec/lib/quandl/cassandra/models/dataset/row_spec.rb +26 -0
  52. data/spec/lib/quandl/cassandra/models/dataset/transform_spec.rb +16 -0
  53. data/spec/lib/quandl/cassandra/models/dataset/trim_spec.rb +74 -0
  54. data/spec/lib/quandl/cassandra/models/dataset/update_spec.rb +37 -0
  55. data/spec/lib/quandl/cassandra/models/dataset_attribute_spec.rb +18 -0
  56. data/spec/lib/quandl/cassandra/models/dataset_spec.rb +117 -0
  57. data/spec/lib/quandl/cassandra/models/multiset/collapse_spec.rb +122 -0
  58. data/spec/lib/quandl/cassandra/models/multiset/columns_spec.rb +57 -0
  59. data/spec/lib/quandl/cassandra/models/multiset/data_spec.rb +25 -0
  60. data/spec/lib/quandl/cassandra/models/multiset/transform_spec.rb +69 -0
  61. data/spec/spec_helper.rb +40 -0
  62. data/tasks/migrations.rake +14 -0
  63. metadata +212 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d22ebaef57fca90a9ffab18635ffb33f6cf4d85b
4
+ data.tar.gz: f9e5c8ab3b033b3478551e6833568c9e4b8c1bcf
5
+ SHA512:
6
+ metadata.gz: 9a59a536e6e8d8ca42f090a12606c30eb1758c1bc64018103aa1434af78f40b66dda50c3a21ced85212c71d03ac20606def5841844815880a5aefeed8ff3e037
7
+ data.tar.gz: b0a5d4a870bcbc01e157c0e56371ea4373a1bde000276f67ed43bc55b6f181097992bacf8fc0617dfa41072377dd913111a5048e8cb52c18a88f9aedba673c67
data/.gitignore ADDED
@@ -0,0 +1,7 @@
1
+ /Gemfile.lock
2
+ /pkg
3
+ /tmp
4
+ .rvmrc
5
+ *.gem
6
+ *.log
7
+ log/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+ gemspec
3
+
4
+ gem 'cassandra_migrations','~> 0.3', git: 'git@github.com:blakehilscher/cassandra_migrations.git'
data/LICENSE ADDED
@@ -0,0 +1,7 @@
1
+ Copyright (c) 2012-2013 Blake Hilscher
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,7 @@
1
+ # Installation
2
+
3
+ ```ruby
4
+
5
+ gem 'quandl_cassandra_models', git: 'https://github.com/quandl/quandl_cassandra_models.git'
6
+
7
+ ```
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require "bundler"
2
+ require "rake"
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ desc "Run all specs"
7
+ RSpec::Core::RakeTask.new(:spec) do |task|
8
+ task.pattern = "spec/**/*_spec.rb"
9
+ end
10
+
11
+ load "tasks/migrations.rake"
data/UPGRADE.md ADDED
@@ -0,0 +1,34 @@
1
+ ## 0.3.5
2
+
3
+ * fixes quandl/wikiposit#417
4
+ * refactor column trim_start and trim_end to use Column.find_max_time_by_ids
5
+ * move error handling to strategy.perform
6
+
7
+
8
+ ## 0.3.3
9
+
10
+ * should handle column that is outside the range of available columns
11
+
12
+
13
+ ## 0.3.1
14
+
15
+ * given data with empty columns it should save the dataset
16
+
17
+
18
+ ## 0.3.0
19
+
20
+ * multiset given dataset with columns that do not have column_attributes saved, should not raise error. should guess at frequency.
21
+
22
+ ## 0.2.1
23
+
24
+ * Dataset#data= accepts array data with dates
25
+
26
+
27
+ ## 0.2.0
28
+
29
+ * include Quandl::Cassandra::Models in Quandl::Cassandra
30
+
31
+
32
+ ## 0.0.1
33
+
34
+ * init
@@ -0,0 +1,41 @@
1
+ production:
2
+ host: 10.235.22.174
3
+ port: 9042
4
+ keyspace: wikiposit
5
+ consistency: one
6
+ replication:
7
+ class: SimpleStrategy
8
+ replication_factor: 1
9
+
10
+ staging:
11
+ host: 10.235.36.67
12
+ port: 9042
13
+ keyspace: wikiposit
14
+ consistency: one
15
+ replication:
16
+ class: SimpleStrategy
17
+ replication_factor: 1
18
+
19
+ development: &development
20
+ host: 127.0.0.1
21
+ port: 9042
22
+ keyspace: wikiposit
23
+ consistency: one
24
+ replication:
25
+ class: SimpleStrategy
26
+ replication_factor: 1
27
+
28
+ test:
29
+ <<: *development
30
+ keyspace: wikiposit_test
31
+
32
+ vagrant: &vagrant
33
+ <<: *development
34
+ host: 192.168.33.10
35
+
36
+ vagrant_staging:
37
+ <<: *vagrant
38
+
39
+ vagrant_test:
40
+ <<: *vagrant
41
+ keyspace: wikiposit_test
@@ -0,0 +1,16 @@
1
+ require 'quandl/cassandra/models/version'
2
+
3
+ require 'quandl/cassandra'
4
+
5
+ require 'quandl/cassandra/models/column'
6
+ require 'quandl/cassandra/models/column_attribute'
7
+ require 'quandl/cassandra/models/data'
8
+ require 'quandl/cassandra/models/dataset'
9
+ require 'quandl/cassandra/models/dataset_attribute'
10
+ require 'quandl/cassandra/models/multiset'
11
+
12
+ module Quandl
13
+ module Cassandra
14
+ include Quandl::Cassandra::Models
15
+ end
16
+ end
@@ -0,0 +1,42 @@
1
+ class Quandl::Cassandra::Models::Column < Quandl::Cassandra::Base
2
+
3
+ table_name :columns
4
+
5
+ require_relative 'column/read'
6
+ require_relative 'column/write'
7
+
8
+ class << self
9
+
10
+ def read(*args)
11
+ Quandl::Cassandra::Models::Column::Read.perform(*args)[:data]
12
+ end
13
+
14
+ def write(*args)
15
+ Quandl::Cassandra::Models::Column::Write.perform(*args)
16
+ end
17
+
18
+ def find_max_time_by_ids(ids, collapses, order)
19
+ unique = collapses.uniq.count == 1
20
+ time = find_max_time_by_ids_and_collapse(ids, collapses.first, "DESC") if unique
21
+ time = find_max_time_by_ids_and_collapses(ids, collapses, "DESC") unless unique
22
+ Date.jd(time) if time.is_a?(Integer)
23
+ end
24
+
25
+ def find_max_time_by_ids_and_collapses(ids, collapses, order)
26
+ # otherwise lookup each column seperately
27
+ times = []
28
+ column_ids.each_with_index do |id, index|
29
+ column_collapse = collapses[index]
30
+ next if column_collapse.blank?
31
+ times << limit(1).order("type #{order}, time #{order}").where( id: id, type: collapse ).select(:time, :type).to_a[0]['time']
32
+ end
33
+ times.flatten.max
34
+ end
35
+
36
+ def find_max_time_by_ids_and_collapse(ids, collapse, order)
37
+ limit(1).order("type #{order}, time #{order}").where( id: ids, type: collapse ).select(:time, :type).to_a[0]['time']
38
+ end
39
+
40
+ end
41
+
42
+ end
@@ -0,0 +1,49 @@
1
+ class Quandl::Cassandra::Models::Column::Read < Quandl::Strategy::Strategize
2
+
3
+ require_relative 'read/collapse'
4
+ require_relative 'read/column'
5
+ require_relative 'read/data'
6
+ require_relative 'read/offset'
7
+ require_relative 'read/select_columns'
8
+ require_relative 'read/row'
9
+ require_relative 'read/transform'
10
+ require_relative 'read/trim'
11
+ require_relative 'read/type'
12
+
13
+ define_attributes :id
14
+
15
+ def self.perform(attributes)
16
+ strategy = Quandl::Strategy.new( attributes ) do |c|
17
+ c.use Quandl::Cassandra::Models::Column::Read::Type
18
+ c.use Quandl::Cassandra::Models::Column::Read::Row
19
+ c.use Quandl::Cassandra::Models::Column::Read::Column
20
+ c.use Quandl::Cassandra::Models::Column::Read::Collapse
21
+ c.use Quandl::Cassandra::Models::Column::Read::Transform
22
+ c.use Quandl::Cassandra::Models::Column::Read::Offset
23
+ c.use Quandl::Cassandra::Models::Column::Read::Trim
24
+ c.use Quandl::Cassandra::Models::Column::Read::SelectColumns
25
+ c.use Quandl::Cassandra::Models::Column::Read::Data
26
+ end
27
+ strategy.perform
28
+ rescue => e
29
+ Quandl::Logger.error("#{e} #{strategy.attributes}")
30
+ raise
31
+ end
32
+
33
+ def count?
34
+ attributes[:count] == true
35
+ end
36
+
37
+ def column_ids=(value)
38
+ attributes[:column_ids] = Array(value).compact.flatten
39
+ end
40
+
41
+ def column_collapses
42
+ attributes[:column_collapses]
43
+ end
44
+
45
+ def column_ids
46
+ attributes[:column_ids]
47
+ end
48
+
49
+ end
@@ -0,0 +1,41 @@
1
+ class Quandl::Cassandra::Models::Column::Read::Collapse < Quandl::Cassandra::Models::Column::Read
2
+
3
+ def perform
4
+ attributes[:collapse] ||= :source
5
+ attributes[:column_frequencies] ||= select_column_frequencies
6
+ attributes[:column_collapses] = build_column_collapses_using_frequencies( attributes[:column_frequencies], attributes[:collapse] )
7
+ attributes[:frequency] = find_largest_frequency( attributes[:column_frequencies] )
8
+ end
9
+
10
+ def select_column_frequencies
11
+ freqs = Quandl::Cassandra::Models::ColumnAttribute.where( id: column_ids ).select(:id, :frequency).to_a
12
+ column_ids.collect{|id| freqs.detect{|f| f['id'] == id }.try(:[], 'frequency') }
13
+ end
14
+
15
+ def build_column_collapses_using_frequencies(freqs, collapse)
16
+ freqs.collect do |frequency|
17
+ # is frequency less than collapse?
18
+ if Quandl::Operation::Collapse.collapses_greater_than( frequency ).include?( collapse )
19
+ # this column needs to be collapsed
20
+ collapse
21
+ # otherwise frequency is greater than or equal to collapse
22
+ else
23
+ # so collapse is not needed
24
+ :source
25
+ end
26
+ end
27
+ end
28
+
29
+ # [:daily, :weekly, :monthly]
30
+ def find_largest_frequency(freqs)
31
+ attributes[:column_frequencies].inject(:daily) do |freq,column_freq|
32
+ # index of both frequencies
33
+ freq_index = Quandl::Operation::Collapse.valid_collapses.index(freq)
34
+ column_freq_index = Quandl::Operation::Collapse.valid_collapses.index(column_freq).to_i
35
+ # is column_freq greater than freq?
36
+ freq = column_freq if column_freq_index > freq_index
37
+ freq
38
+ end
39
+ end
40
+
41
+ end
@@ -0,0 +1,19 @@
1
+ class Quandl::Cassandra::Models::Column::Read::Column < Quandl::Cassandra::Models::Column::Read
2
+
3
+ def perform
4
+ # given an id, this is a dataset
5
+ self.column_ids ||= Quandl::Cassandra::Models::Dataset.find_column_ids_by_id(id) if id.present?
6
+ self.column_ids = [] if self.column_ids.blank?
7
+ # given a column, this is requesting a specific column
8
+ self.column_ids = pluck_column_id if column.present? && self.column_ids.present?
9
+ end
10
+
11
+ def pluck_column_id
12
+ [ self.column_ids[ column ] ]
13
+ end
14
+
15
+ def column
16
+ attributes[:column].present? ? attributes[:column] - 1 : nil
17
+ end
18
+
19
+ end
@@ -0,0 +1,59 @@
1
+ class Quandl::Cassandra::Models::Column::Read::Data < Quandl::Cassandra::Models::Column::Read
2
+
3
+ def perform
4
+ attributes[:data] = quandl_data( attributes[:data] ) unless count?
5
+ end
6
+
7
+ def quandl_data(data)
8
+ # init
9
+ data = Quandl::Cassandra::Models::Data.new( data )
10
+ data.dataset_id = attributes[:id]
11
+ data.column_ids = column_ids unless attributes[:column].present?
12
+ data.column_frequencies = attributes[:column_frequencies] unless attributes[:column].present?
13
+ # post process
14
+ data = collapse(data)
15
+ data = transform(data)
16
+ data = trim(data)
17
+ data = sort( data )
18
+ data = row( data )
19
+ data = limit(data)
20
+ data
21
+ end
22
+
23
+ def trim(data)
24
+ return data unless attributes[:data_table].present?
25
+ data = data.sort_order( :asc ) if attributes[:data_table][:trim_start] || attributes[:data_table][:trim_end]
26
+ data = data.trim_start( attributes[:data_table][:trim_start] ) if attributes[:data_table][:trim_start]
27
+ data = data.trim_end( attributes[:data_table][:trim_end] ) if attributes[:data_table][:trim_end]
28
+ data
29
+ end
30
+
31
+ def sort(data)
32
+ data = data.sort_order( attributes[:order] )
33
+ end
34
+
35
+ def collapse(data)
36
+ data = data.collapse( attributes[:collapse] ) if attributes[:collapse]
37
+ data
38
+ end
39
+
40
+ def transform(data)
41
+ data = data.transform( attributes[:transform] ) if attributes[:transform]
42
+ data
43
+ end
44
+
45
+ def limit(data)
46
+ data = data.limit( attributes[:pristine][:limit] ) if attributes[:pristine][:limit]
47
+ data
48
+ end
49
+
50
+ def row(data)
51
+ # if a row was requested and the data count is greater than a single row
52
+ if attributes[:row].present? && data.count > 1
53
+ # grab the last row
54
+ data.data_array = [data.data_array[-1]]
55
+ end
56
+ data
57
+ end
58
+
59
+ end
@@ -0,0 +1,104 @@
1
+ class Quandl::Cassandra::Models::Column::Read::Offset < Quandl::Cassandra::Models::Column::Read
2
+
3
+ def perform
4
+ # apply offset
5
+ return unless column_ids.present?
6
+ apply_offset_with_transform
7
+ apply_offset
8
+ end
9
+
10
+ def apply_offset_with_transform
11
+ # rdiff_from needs the data from the current to offset
12
+ if attributes[:row].present? && attributes[:transform] == :rdiff_from
13
+ # limit the results by trim
14
+ attributes[:trim_start] = trim_start.occurrences_of_frequency_ago( attributes[:row], collapse_with_frequency ).end_of_frequency(collapse_with_frequency).jd if order == :desc
15
+ attributes[:trim_end] = trim_end.occurrences_of_frequency_ahead( attributes[:row], collapse_with_frequency ).end_of_frequency(collapse_with_frequency).jd if order == :asc
16
+ # the query should not limit or offset the data
17
+ attributes[:limit] = nil
18
+ attributes[:offset] = nil
19
+ end
20
+ end
21
+
22
+ def apply_offset
23
+ return unless attributes[:offset].present?
24
+ order == :asc ? apply_offset_asc : apply_offset_desc
25
+ end
26
+
27
+ def apply_offset_asc
28
+ # calculate ranges
29
+ offset_start = trim_end.occurrences_of_frequency_ahead( offset, collapse_with_frequency ).start_of_frequency(collapse_with_frequency)
30
+ offset_end = offset_start.occurrences_of_frequency_ahead( accuracy_with_limit, collapse_with_frequency ).end_of_frequency( collapse_with_frequency ) if limit
31
+ # set trims
32
+ attributes[:trim_start] = offset_start.jd
33
+ attributes[:trim_end] = offset_end.jd if limit
34
+ end
35
+
36
+ def apply_offset_desc
37
+ # calculate ranges
38
+ offset_start = trim_start.occurrences_of_frequency_ago( offset, collapse_with_frequency ).end_of_frequency(collapse_with_frequency)
39
+ offset_end = offset_start.occurrences_of_frequency_ago( accuracy_with_limit, collapse_with_frequency ).start_of_frequency( collapse_with_frequency ) if limit
40
+ # set trims
41
+ attributes[:trim_start] = offset_end.jd if limit
42
+ attributes[:trim_end] = offset_start.jd
43
+ end
44
+
45
+ def trim_start
46
+ @trim_start ||= Quandl::Cassandra::Column.find_max_time_by_ids(column_ids, column_collapses, "DESC")
47
+ end
48
+
49
+ def trim_end
50
+ @trim_end ||= Quandl::Cassandra::Column.find_max_time_by_ids(column_ids, column_collapses, "ASC")
51
+ end
52
+
53
+ def accuracy_with_limit
54
+ # # short circuit for now
55
+ # return 0
56
+ # revist this at a later date:
57
+ awl = accuracy
58
+ awl = awl + limit - 1 if limit && limit > 0
59
+ awl
60
+ end
61
+
62
+ def accuracy
63
+ # did the query include an accuracy?
64
+ accuracy = attributes[:accuracy]
65
+ # otherwise guess at the accuracy by collapse
66
+ if accuracy.blank?
67
+ accuracy = case frequency
68
+ when :daily then 0
69
+ when :weekly then 1
70
+ when :monthly then 1
71
+ when :quarterly then 1
72
+ when :annual then 0
73
+ else
74
+ 3
75
+ end
76
+ end
77
+ accuracy
78
+ end
79
+
80
+ def limit
81
+ @limit ||= attributes[:limit]
82
+ end
83
+
84
+ def offset
85
+ attributes[:offset]
86
+ end
87
+
88
+ def order
89
+ @order ||= attributes[:order] == :asc ? :asc : :desc
90
+ end
91
+
92
+ def collapse_with_frequency
93
+ collapse == :source ? frequency : collapse
94
+ end
95
+
96
+ def frequency
97
+ attributes[:frequency]
98
+ end
99
+
100
+ def collapse
101
+ attributes[:collapse]
102
+ end
103
+
104
+ end