quandl_cassandra 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/UPGRADE.md +8 -0
- data/lib/quandl/cassandra.rb +2 -0
- data/lib/quandl/cassandra/base/connection.rb +34 -5
- data/lib/quandl/cassandra/base/logging.rb +1 -13
- data/lib/quandl/cassandra/batch.rb +9 -0
- data/lib/quandl/cassandra/batch/insert.rb +53 -0
- data/lib/quandl/cassandra/batch/logging.rb +22 -0
- data/lib/quandl/cassandra/version.rb +1 -1
- data/lib/quandl/cassandra_models/column/read/select_columns.rb +11 -23
- data/lib/quandl/cassandra_models/column/write/insert_column_attributes.rb +10 -12
- data/lib/quandl/cassandra_models/column/write/insert_columns.rb +2 -23
- data/lib/quandl/cassandra_models/dataset.rb +1 -1
- data/quandl_cassandra.gemspec +2 -1
- data/spec/lib/quandl/cassandra_models/column/write_spec.rb +8 -0
- data/spec/lib/quandl/cassandra_models/dataset_spec.rb +49 -45
- metadata +21 -2
data/UPGRADE.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## 0.3.0
|
2
|
+
|
3
|
+
* Base::Connection methods use #with_connection
|
4
|
+
* InsertColumnAttributes uses Batch.insert
|
5
|
+
* InsertColumns uses Batch.insert
|
6
|
+
* add Quandl::Cassandra::Batch
|
7
|
+
* insert_columns_in_batches_with_threads does not care how many rows are sent
|
8
|
+
|
1
9
|
## 0.0.1
|
2
10
|
|
3
11
|
* Begin
|
data/lib/quandl/cassandra.rb
CHANGED
@@ -7,6 +7,7 @@ require "active_support/core_ext/object"
|
|
7
7
|
|
8
8
|
require 'cql'
|
9
9
|
require 'scope_composer'
|
10
|
+
require 'facter'
|
10
11
|
|
11
12
|
require 'quandl/logger'
|
12
13
|
require "quandl/data"
|
@@ -16,6 +17,7 @@ require 'quandl/cassandra/error'
|
|
16
17
|
require 'quandl/cassandra/types'
|
17
18
|
require 'quandl/cassandra/base'
|
18
19
|
require 'quandl/cassandra/configuration'
|
20
|
+
require 'quandl/cassandra/batch'
|
19
21
|
|
20
22
|
require 'quandl/cassandra_models/column'
|
21
23
|
require 'quandl/cassandra_models/column_attribute'
|
@@ -12,17 +12,40 @@ module Quandl::Cassandra::Base::Connection
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def prepare(statement)
|
15
|
-
|
15
|
+
with_connection do |c|
|
16
|
+
c.prepare(statement)
|
17
|
+
end
|
16
18
|
end
|
17
19
|
|
18
20
|
def execute_async(statement, query_consistency = nil)
|
19
21
|
query_consistency = consistency unless query_consistency.present?
|
20
|
-
|
22
|
+
with_connection do |c|
|
23
|
+
c.async.execute( statement, query_consistency )
|
24
|
+
end
|
21
25
|
end
|
22
26
|
|
23
|
-
def execute(statement,
|
24
|
-
|
25
|
-
|
27
|
+
def execute(statement, qconsistency = nil)
|
28
|
+
qconsistency = consistency unless qconsistency.present?
|
29
|
+
with_connection do |c|
|
30
|
+
c.execute( statement, qconsistency )
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def with_connection(&block)
|
35
|
+
begin
|
36
|
+
yield(connection)
|
37
|
+
|
38
|
+
rescue Cql::Io::ConnectionError => e
|
39
|
+
Quandl::Logger.error(e)
|
40
|
+
reset_connection
|
41
|
+
raise Cql::Io::ConnectionError
|
42
|
+
|
43
|
+
rescue Cql::NotConnectedError => e
|
44
|
+
Quandl::Logger.error(e)
|
45
|
+
reset_connection
|
46
|
+
raise Cql::NotConnectedError
|
47
|
+
|
48
|
+
end
|
26
49
|
end
|
27
50
|
|
28
51
|
def connection
|
@@ -33,6 +56,12 @@ module Quandl::Cassandra::Base::Connection
|
|
33
56
|
Quandl::Cassandra.configuration.consistency
|
34
57
|
end
|
35
58
|
|
59
|
+
def reset_connection
|
60
|
+
connection.close
|
61
|
+
@@connection = establish_connection
|
62
|
+
true
|
63
|
+
end
|
64
|
+
|
36
65
|
def establish_connection
|
37
66
|
c = Cql::Client.connect(
|
38
67
|
hosts: Quandl::Cassandra.configuration.hosts,
|
@@ -10,19 +10,7 @@ module Quandl::Cassandra::Base::Logging
|
|
10
10
|
Quandl::Logger.debug(statement)
|
11
11
|
super if defined?(super)
|
12
12
|
end
|
13
|
-
|
14
|
-
def execute_async(*args, &block)
|
15
|
-
statement = args.first.to_s
|
16
|
-
statement = "#{statement[0..200]} ... #{statement.length} chars" if statement.length > 200
|
17
|
-
t1 = Time.now
|
18
|
-
begin
|
19
|
-
r = super if defined?(super)
|
20
|
-
ensure
|
21
|
-
Quandl::Logger.debug("(#{t1.elapsed_ms}) #{statement}")
|
22
|
-
end
|
23
|
-
r
|
24
|
-
end
|
25
|
-
|
13
|
+
|
26
14
|
def execute(*args, &block)
|
27
15
|
statement = args.first.to_s
|
28
16
|
statement = "#{statement[0..200]} ... #{statement.length} chars" if statement.length > 200
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require 'quandl/cassandra/batch/logging'
|
2
|
+
require 'quandl/cassandra/batch/insert'
|
3
|
+
|
4
|
+
class Quandl::Cassandra::Batch
|
5
|
+
|
6
|
+
include Quandl::Cassandra::Batch::Insert
|
7
|
+
include Quandl::Cassandra::Batch::Logging if defined?(QUANDL_LOGGER) && QUANDL_LOGGER == true
|
8
|
+
|
9
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Quandl::Cassandra::Batch::Insert
|
2
|
+
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
module ClassMethods
|
6
|
+
|
7
|
+
# Quandl::Cassandra::Batch.insert(rows) do |id, type, time, value|
|
8
|
+
# "INSERT INTO columns (id, type, time, value) VALUES (#{id}, '#{type}', #{time}, #{value})"
|
9
|
+
# end
|
10
|
+
def insert(rows, &block)
|
11
|
+
insert_in_batches(rows, &block)
|
12
|
+
end
|
13
|
+
|
14
|
+
def batch_size
|
15
|
+
Quandl::Cassandra.configuration.batch_size
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
protected
|
20
|
+
|
21
|
+
def insert_in_batches_with_threads(rows, &block)
|
22
|
+
# split rows into groups by rows_per_thread
|
23
|
+
threads = rows.each_slice( rows_per_thread(rows) ).map do |rows_slice|
|
24
|
+
Thread.start{ insert_in_batches(rows_slice, &block) }
|
25
|
+
end
|
26
|
+
threads.each(&:join)
|
27
|
+
end
|
28
|
+
|
29
|
+
def insert_in_batches(rows, &block)
|
30
|
+
futures = []
|
31
|
+
rows.each_slice( batch_size ).each do |rows_slice|
|
32
|
+
statements = rows_slice.collect{|row| block.call( *row ) }
|
33
|
+
futures << execute_async_batch(statements)
|
34
|
+
end
|
35
|
+
futures.collect(&:value)
|
36
|
+
end
|
37
|
+
|
38
|
+
def rows_per_thread(rows)
|
39
|
+
r = rows.count / Facter.processorcount.to_i
|
40
|
+
r = 1 if r <= 0
|
41
|
+
r
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def execute_async_batch(statements)
|
48
|
+
batch = %Q{BEGIN UNLOGGED BATCH\n#{statements.join("\n")}\nAPPLY BATCH;}
|
49
|
+
future = Quandl::Cassandra::Base.execute_async( batch )
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class Quandl::Cassandra::Batch
|
2
|
+
module Logging
|
3
|
+
|
4
|
+
extend ActiveSupport::Concern
|
5
|
+
|
6
|
+
module ClassMethods
|
7
|
+
|
8
|
+
def insert(rows, &block)
|
9
|
+
# log init
|
10
|
+
statement = block.call(rows[0].collect{'?'})
|
11
|
+
t1 = Time.now
|
12
|
+
# call method
|
13
|
+
r = super if defined?(super)
|
14
|
+
# log write
|
15
|
+
Quandl::Logger.debug("(#{t1.elapsed_ms}) [#{rows.count} rows] BATCH #{statement}")
|
16
|
+
r
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
@@ -18,39 +18,27 @@ class Quandl::Cassandra::Column::Read::SelectColumns < Quandl::Cassandra::Column
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def select_data
|
21
|
+
t1 = Time.now
|
22
|
+
# fire off the queries
|
21
23
|
prepared = Quandl::Cassandra::Base.prepare( statement )
|
22
24
|
data = {}
|
25
|
+
futures = []
|
23
26
|
attributes[:column_ids].each_with_index do | id, index |
|
24
27
|
# pluck column type from collapses
|
25
28
|
type = attributes[:column_collapses][index].to_s
|
26
29
|
# bind and execute query
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
+
futures << prepared.async.execute( id, type, Quandl::Cassandra::Base.consistency )
|
31
|
+
end
|
32
|
+
# collect the results
|
33
|
+
futures.each_with_index do |future, index|
|
34
|
+
# collect result
|
35
|
+
future.value.each do |row|
|
30
36
|
data[row['time']] ||= Array.new( attributes[:column_ids].count )
|
31
37
|
data[row['time']][index] ||= row['value']
|
32
38
|
end
|
33
39
|
end
|
34
|
-
|
35
|
-
|
36
|
-
# futures = []
|
37
|
-
# attrs[:column_ids].each_with_index do | id, index |
|
38
|
-
# # pluck column type from collapses
|
39
|
-
# type = attrs[:column_collapses][index].to_s
|
40
|
-
# # bind and execute query
|
41
|
-
# futures << Quandl::Cassandra::Base.connection.execute_async( statement.bind( id, type ) )
|
42
|
-
# end
|
43
|
-
# # collect the results
|
44
|
-
# futures.each_with_index do |future, column_index|
|
45
|
-
# t1 = Time.now
|
46
|
-
# rows = JavaDriver::ResultSet.new( future.get_uninterruptibly ).to_a
|
47
|
-
# rows.each do |row|
|
48
|
-
# data[row[0]] ||= Array.new(attrs[:column_ids].count)
|
49
|
-
# data[row[0]][column_index] ||= row[1]
|
50
|
-
# end
|
51
|
-
# JCQL::CommonLogger.info "#{cql} (#{attrs[:column_ids][column_index]}) (#{t1.elapsed.microseconds}ms)"
|
52
|
-
# end
|
53
|
-
# data
|
40
|
+
Quandl::Logger.debug("(#{t1.elapsed_ms}) #{self.class.name}.select_data")
|
41
|
+
data
|
54
42
|
end
|
55
43
|
|
56
44
|
def statement
|
@@ -5,18 +5,16 @@ class Quandl::Cassandra::Column::Write::InsertColumnAttributes < Quandl::Cassand
|
|
5
5
|
|
6
6
|
def perform
|
7
7
|
return if column_ids.blank?
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
def column_attributes_statement( column_id )
|
19
|
-
"INSERT INTO column_attributes ( id, frequency ) VALUES ( #{column_id}, '#{frequency}' )"
|
8
|
+
# format data for batch insertion
|
9
|
+
rows_values = []
|
10
|
+
column_ids.each_with_index do |column_id, position|
|
11
|
+
rows_values << [id, column_id, position, frequency]
|
12
|
+
end
|
13
|
+
# insert data
|
14
|
+
Quandl::Cassandra::Batch.insert(rows_values) do |id, column_id, position, frequency|
|
15
|
+
%Q{INSERT INTO datasets (id, column_id, position) VALUES (#{id}, #{column_id}, #{position})
|
16
|
+
INSERT INTO column_attributes ( id, frequency ) VALUES ( #{column_id}, '#{frequency}' )}
|
17
|
+
end
|
20
18
|
end
|
21
19
|
|
22
20
|
end
|
@@ -1,30 +1,9 @@
|
|
1
1
|
class Quandl::Cassandra::Column::Write::InsertColumns < Quandl::Cassandra::Column::Write
|
2
2
|
|
3
3
|
def perform
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
def insert_columns_in_batches
|
8
|
-
threads = statement_values.each_slice( statement_values.size / 8 ).map do |threads_slice|
|
9
|
-
Thread.start do
|
10
|
-
futures = []
|
11
|
-
threads_slice.each_slice( Quandl::Cassandra.configuration.batch_size ).each do |batch_slice|
|
12
|
-
statements = batch_slice.collect{|row| statement( *row ) }
|
13
|
-
futures << execute_async_batch(statements)
|
14
|
-
end
|
15
|
-
futures.collect(&:value)
|
16
|
-
end
|
4
|
+
Quandl::Cassandra::Batch.insert(statement_values) do |id, type, time, value|
|
5
|
+
"INSERT INTO columns (id, type, time, value) VALUES (#{id}, '#{type}', #{time}, #{value})"
|
17
6
|
end
|
18
|
-
threads.each(&:join)
|
19
|
-
end
|
20
|
-
|
21
|
-
def execute_async_batch(statements)
|
22
|
-
batch = %Q{BEGIN UNLOGGED BATCH\n#{statements.join("\n")}\nAPPLY BATCH;}
|
23
|
-
future = Quandl::Cassandra::Base.execute_async( batch )
|
24
|
-
end
|
25
|
-
|
26
|
-
def statement( id, type, time, value )
|
27
|
-
"INSERT INTO columns (id, type, time, value) VALUES (#{id}, '#{type}', #{time}, #{value})"
|
28
7
|
end
|
29
8
|
|
30
9
|
end
|
data/quandl_cassandra.gemspec
CHANGED
@@ -25,7 +25,8 @@ Gem::Specification.new do |s|
|
|
25
25
|
|
26
26
|
s.add_runtime_dependency "activesupport", ">= 3.0.0"
|
27
27
|
s.add_runtime_dependency "activemodel", ">= 3.0.0"
|
28
|
-
|
28
|
+
|
29
|
+
s.add_runtime_dependency "facter", "~> 1.7.3"
|
29
30
|
s.add_runtime_dependency "scope_composer", "~> 0.4"
|
30
31
|
s.add_runtime_dependency "quandl_data", "~> 1.0"
|
31
32
|
s.add_runtime_dependency "quandl_logger", "~> 0.1"
|
@@ -12,4 +12,12 @@ describe Quandl::Cassandra::Column::Write do
|
|
12
12
|
data.should eq source_data
|
13
13
|
end
|
14
14
|
|
15
|
+
context "given tiny data array" do
|
16
|
+
let(:data){ Quandl::Fabricate::Data.rand( columns: 1, rows: 2, nils: false ) }
|
17
|
+
before(:each){ Quandl::Cassandra::Column.write( id: id, data: data ) }
|
18
|
+
it "should have written the data" do
|
19
|
+
Quandl::Cassandra::Column.read( id: id ).should eq data
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
15
23
|
end
|
@@ -41,62 +41,66 @@ describe Quandl::Cassandra::Dataset do
|
|
41
41
|
its(:table_name){ should eq 'datasets' }
|
42
42
|
end
|
43
43
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
dataset.save
|
49
|
-
}
|
44
|
+
context "given data" do
|
45
|
+
before(:each){ dataset.data = Quandl::Fabricate::Data.rand(rows: 10, columns: 2, nils: false) }
|
46
|
+
|
47
|
+
describe "#save" do
|
48
|
+
before(:each){ dataset.save }
|
50
49
|
|
51
|
-
|
50
|
+
subject{ dataset }
|
52
51
|
|
53
|
-
|
52
|
+
its(:changes){ should be_blank }
|
53
|
+
its(:frequency){ should eq 'daily' }
|
54
54
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
55
|
+
describe ".find" do
|
56
|
+
subject{ Quandl::Cassandra::Dataset.find(id) }
|
57
|
+
its(:data){ should eq dataset.data.to_table }
|
58
|
+
its(:frequency){ should eq 'daily' }
|
59
|
+
|
60
|
+
it "data should count and return data" do
|
61
|
+
subject.data.count.should eq 10
|
62
|
+
subject.data.to_table.should be_a Quandl::Cassandra::Data
|
63
|
+
end
|
64
|
+
it "columns should eq dataset.columns" do
|
65
|
+
subject.columns.collect{|c| c.id.to_s }.should eq dataset.columns.collect{|c| c.id.to_s }
|
66
|
+
end
|
67
|
+
it "column_ids should eq dataset.column_ids" do
|
68
|
+
subject.column_ids.collect(&:to_s).should eq dataset.column_ids.collect(&:to_s)
|
69
|
+
end
|
67
70
|
end
|
68
|
-
end
|
69
71
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
72
|
+
describe "#reload" do
|
73
|
+
before(:each){
|
74
|
+
dataset.data.limit(5).to_a
|
75
|
+
dataset.reload
|
76
|
+
}
|
75
77
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
78
|
+
describe "#attributes" do
|
79
|
+
subject{ dataset.attributes }
|
80
|
+
its([:data]){ should eq nil }
|
81
|
+
end
|
80
82
|
|
81
|
-
|
83
|
+
end
|
82
84
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
85
|
+
describe "#data" do
|
86
|
+
subject{ dataset.data }
|
87
|
+
its(:count){ should eq 10 }
|
88
|
+
end
|
87
89
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
90
|
+
describe "#column_ids" do
|
91
|
+
subject{ dataset.column_ids }
|
92
|
+
its(:count){ should eq 2 }
|
93
|
+
its(:first){ should be_a Cql::Uuid }
|
94
|
+
end
|
93
95
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
96
|
+
describe "#columns" do
|
97
|
+
subject{ dataset.columns }
|
98
|
+
its(:count){ should eq 2 }
|
99
|
+
its(:first){ should be_a Quandl::Cassandra::ColumnAttribute }
|
100
|
+
end
|
99
101
|
|
102
|
+
end
|
103
|
+
|
100
104
|
end
|
101
105
|
|
102
106
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: quandl_cassandra
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-11-
|
12
|
+
date: 2013-11-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -123,6 +123,22 @@ dependencies:
|
|
123
123
|
- - '>='
|
124
124
|
- !ruby/object:Gem::Version
|
125
125
|
version: 3.0.0
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: facter
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
130
|
+
requirements:
|
131
|
+
- - ~>
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: 1.7.3
|
134
|
+
type: :runtime
|
135
|
+
prerelease: false
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ~>
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: 1.7.3
|
126
142
|
- !ruby/object:Gem::Dependency
|
127
143
|
name: scope_composer
|
128
144
|
requirement: !ruby/object:Gem::Requirement
|
@@ -227,6 +243,9 @@ files:
|
|
227
243
|
- lib/quandl/cassandra/base/sanitization.rb
|
228
244
|
- lib/quandl/cassandra/base/schema.rb
|
229
245
|
- lib/quandl/cassandra/base/scoping.rb
|
246
|
+
- lib/quandl/cassandra/batch.rb
|
247
|
+
- lib/quandl/cassandra/batch/insert.rb
|
248
|
+
- lib/quandl/cassandra/batch/logging.rb
|
230
249
|
- lib/quandl/cassandra/configuration.rb
|
231
250
|
- lib/quandl/cassandra/error.rb
|
232
251
|
- lib/quandl/cassandra/types.rb
|