quandl_cassandra 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/UPGRADE.md +8 -0
- data/lib/quandl/cassandra.rb +2 -0
- data/lib/quandl/cassandra/base/connection.rb +34 -5
- data/lib/quandl/cassandra/base/logging.rb +1 -13
- data/lib/quandl/cassandra/batch.rb +9 -0
- data/lib/quandl/cassandra/batch/insert.rb +53 -0
- data/lib/quandl/cassandra/batch/logging.rb +22 -0
- data/lib/quandl/cassandra/version.rb +1 -1
- data/lib/quandl/cassandra_models/column/read/select_columns.rb +11 -23
- data/lib/quandl/cassandra_models/column/write/insert_column_attributes.rb +10 -12
- data/lib/quandl/cassandra_models/column/write/insert_columns.rb +2 -23
- data/lib/quandl/cassandra_models/dataset.rb +1 -1
- data/quandl_cassandra.gemspec +2 -1
- data/spec/lib/quandl/cassandra_models/column/write_spec.rb +8 -0
- data/spec/lib/quandl/cassandra_models/dataset_spec.rb +49 -45
- metadata +21 -2
data/UPGRADE.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## 0.3.0
|
2
|
+
|
3
|
+
* Base::Connection methods use #with_connection
|
4
|
+
* InsertColumnAttributes uses Batch.insert
|
5
|
+
* InsertColumns uses Batch.insert
|
6
|
+
* add Quandl::Cassandra::Batch
|
7
|
+
* insert_columns_in_batches_with_threads does not care how many rows are sent
|
8
|
+
|
1
9
|
## 0.0.1
|
2
10
|
|
3
11
|
* Begin
|
data/lib/quandl/cassandra.rb
CHANGED
@@ -7,6 +7,7 @@ require "active_support/core_ext/object"
|
|
7
7
|
|
8
8
|
require 'cql'
|
9
9
|
require 'scope_composer'
|
10
|
+
require 'facter'
|
10
11
|
|
11
12
|
require 'quandl/logger'
|
12
13
|
require "quandl/data"
|
@@ -16,6 +17,7 @@ require 'quandl/cassandra/error'
|
|
16
17
|
require 'quandl/cassandra/types'
|
17
18
|
require 'quandl/cassandra/base'
|
18
19
|
require 'quandl/cassandra/configuration'
|
20
|
+
require 'quandl/cassandra/batch'
|
19
21
|
|
20
22
|
require 'quandl/cassandra_models/column'
|
21
23
|
require 'quandl/cassandra_models/column_attribute'
|
@@ -12,17 +12,40 @@ module Quandl::Cassandra::Base::Connection
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def prepare(statement)
|
15
|
-
|
15
|
+
with_connection do |c|
|
16
|
+
c.prepare(statement)
|
17
|
+
end
|
16
18
|
end
|
17
19
|
|
18
20
|
def execute_async(statement, query_consistency = nil)
|
19
21
|
query_consistency = consistency unless query_consistency.present?
|
20
|
-
|
22
|
+
with_connection do |c|
|
23
|
+
c.async.execute( statement, query_consistency )
|
24
|
+
end
|
21
25
|
end
|
22
26
|
|
23
|
-
def execute(statement,
|
24
|
-
|
25
|
-
|
27
|
+
def execute(statement, qconsistency = nil)
|
28
|
+
qconsistency = consistency unless qconsistency.present?
|
29
|
+
with_connection do |c|
|
30
|
+
c.execute( statement, qconsistency )
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def with_connection(&block)
|
35
|
+
begin
|
36
|
+
yield(connection)
|
37
|
+
|
38
|
+
rescue Cql::Io::ConnectionError => e
|
39
|
+
Quandl::Logger.error(e)
|
40
|
+
reset_connection
|
41
|
+
raise Cql::Io::ConnectionError
|
42
|
+
|
43
|
+
rescue Cql::NotConnectedError => e
|
44
|
+
Quandl::Logger.error(e)
|
45
|
+
reset_connection
|
46
|
+
raise Cql::NotConnectedError
|
47
|
+
|
48
|
+
end
|
26
49
|
end
|
27
50
|
|
28
51
|
def connection
|
@@ -33,6 +56,12 @@ module Quandl::Cassandra::Base::Connection
|
|
33
56
|
Quandl::Cassandra.configuration.consistency
|
34
57
|
end
|
35
58
|
|
59
|
+
def reset_connection
|
60
|
+
connection.close
|
61
|
+
@@connection = establish_connection
|
62
|
+
true
|
63
|
+
end
|
64
|
+
|
36
65
|
def establish_connection
|
37
66
|
c = Cql::Client.connect(
|
38
67
|
hosts: Quandl::Cassandra.configuration.hosts,
|
@@ -10,19 +10,7 @@ module Quandl::Cassandra::Base::Logging
|
|
10
10
|
Quandl::Logger.debug(statement)
|
11
11
|
super if defined?(super)
|
12
12
|
end
|
13
|
-
|
14
|
-
def execute_async(*args, &block)
|
15
|
-
statement = args.first.to_s
|
16
|
-
statement = "#{statement[0..200]} ... #{statement.length} chars" if statement.length > 200
|
17
|
-
t1 = Time.now
|
18
|
-
begin
|
19
|
-
r = super if defined?(super)
|
20
|
-
ensure
|
21
|
-
Quandl::Logger.debug("(#{t1.elapsed_ms}) #{statement}")
|
22
|
-
end
|
23
|
-
r
|
24
|
-
end
|
25
|
-
|
13
|
+
|
26
14
|
def execute(*args, &block)
|
27
15
|
statement = args.first.to_s
|
28
16
|
statement = "#{statement[0..200]} ... #{statement.length} chars" if statement.length > 200
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require 'quandl/cassandra/batch/logging'
|
2
|
+
require 'quandl/cassandra/batch/insert'
|
3
|
+
|
4
|
+
class Quandl::Cassandra::Batch
|
5
|
+
|
6
|
+
include Quandl::Cassandra::Batch::Insert
|
7
|
+
include Quandl::Cassandra::Batch::Logging if defined?(QUANDL_LOGGER) && QUANDL_LOGGER == true
|
8
|
+
|
9
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Quandl::Cassandra::Batch::Insert
|
2
|
+
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
module ClassMethods
|
6
|
+
|
7
|
+
# Quandl::Cassandra::Batch.insert(rows) do |id, type, time, value|
|
8
|
+
# "INSERT INTO columns (id, type, time, value) VALUES (#{id}, '#{type}', #{time}, #{value})"
|
9
|
+
# end
|
10
|
+
def insert(rows, &block)
|
11
|
+
insert_in_batches(rows, &block)
|
12
|
+
end
|
13
|
+
|
14
|
+
def batch_size
|
15
|
+
Quandl::Cassandra.configuration.batch_size
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
protected
|
20
|
+
|
21
|
+
def insert_in_batches_with_threads(rows, &block)
|
22
|
+
# split rows into groups by rows_per_thread
|
23
|
+
threads = rows.each_slice( rows_per_thread(rows) ).map do |rows_slice|
|
24
|
+
Thread.start{ insert_in_batches(rows_slice, &block) }
|
25
|
+
end
|
26
|
+
threads.each(&:join)
|
27
|
+
end
|
28
|
+
|
29
|
+
def insert_in_batches(rows, &block)
|
30
|
+
futures = []
|
31
|
+
rows.each_slice( batch_size ).each do |rows_slice|
|
32
|
+
statements = rows_slice.collect{|row| block.call( *row ) }
|
33
|
+
futures << execute_async_batch(statements)
|
34
|
+
end
|
35
|
+
futures.collect(&:value)
|
36
|
+
end
|
37
|
+
|
38
|
+
def rows_per_thread(rows)
|
39
|
+
r = rows.count / Facter.processorcount.to_i
|
40
|
+
r = 1 if r <= 0
|
41
|
+
r
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def execute_async_batch(statements)
|
48
|
+
batch = %Q{BEGIN UNLOGGED BATCH\n#{statements.join("\n")}\nAPPLY BATCH;}
|
49
|
+
future = Quandl::Cassandra::Base.execute_async( batch )
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class Quandl::Cassandra::Batch
|
2
|
+
module Logging
|
3
|
+
|
4
|
+
extend ActiveSupport::Concern
|
5
|
+
|
6
|
+
module ClassMethods
|
7
|
+
|
8
|
+
def insert(rows, &block)
|
9
|
+
# log init
|
10
|
+
statement = block.call(rows[0].collect{'?'})
|
11
|
+
t1 = Time.now
|
12
|
+
# call method
|
13
|
+
r = super if defined?(super)
|
14
|
+
# log write
|
15
|
+
Quandl::Logger.debug("(#{t1.elapsed_ms}) [#{rows.count} rows] BATCH #{statement}")
|
16
|
+
r
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
@@ -18,39 +18,27 @@ class Quandl::Cassandra::Column::Read::SelectColumns < Quandl::Cassandra::Column
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def select_data
|
21
|
+
t1 = Time.now
|
22
|
+
# fire off the queries
|
21
23
|
prepared = Quandl::Cassandra::Base.prepare( statement )
|
22
24
|
data = {}
|
25
|
+
futures = []
|
23
26
|
attributes[:column_ids].each_with_index do | id, index |
|
24
27
|
# pluck column type from collapses
|
25
28
|
type = attributes[:column_collapses][index].to_s
|
26
29
|
# bind and execute query
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
+
futures << prepared.async.execute( id, type, Quandl::Cassandra::Base.consistency )
|
31
|
+
end
|
32
|
+
# collect the results
|
33
|
+
futures.each_with_index do |future, index|
|
34
|
+
# collect result
|
35
|
+
future.value.each do |row|
|
30
36
|
data[row['time']] ||= Array.new( attributes[:column_ids].count )
|
31
37
|
data[row['time']][index] ||= row['value']
|
32
38
|
end
|
33
39
|
end
|
34
|
-
|
35
|
-
|
36
|
-
# futures = []
|
37
|
-
# attrs[:column_ids].each_with_index do | id, index |
|
38
|
-
# # pluck column type from collapses
|
39
|
-
# type = attrs[:column_collapses][index].to_s
|
40
|
-
# # bind and execute query
|
41
|
-
# futures << Quandl::Cassandra::Base.connection.execute_async( statement.bind( id, type ) )
|
42
|
-
# end
|
43
|
-
# # collect the results
|
44
|
-
# futures.each_with_index do |future, column_index|
|
45
|
-
# t1 = Time.now
|
46
|
-
# rows = JavaDriver::ResultSet.new( future.get_uninterruptibly ).to_a
|
47
|
-
# rows.each do |row|
|
48
|
-
# data[row[0]] ||= Array.new(attrs[:column_ids].count)
|
49
|
-
# data[row[0]][column_index] ||= row[1]
|
50
|
-
# end
|
51
|
-
# JCQL::CommonLogger.info "#{cql} (#{attrs[:column_ids][column_index]}) (#{t1.elapsed.microseconds}ms)"
|
52
|
-
# end
|
53
|
-
# data
|
40
|
+
Quandl::Logger.debug("(#{t1.elapsed_ms}) #{self.class.name}.select_data")
|
41
|
+
data
|
54
42
|
end
|
55
43
|
|
56
44
|
def statement
|
@@ -5,18 +5,16 @@ class Quandl::Cassandra::Column::Write::InsertColumnAttributes < Quandl::Cassand
|
|
5
5
|
|
6
6
|
def perform
|
7
7
|
return if column_ids.blank?
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
def column_attributes_statement( column_id )
|
19
|
-
"INSERT INTO column_attributes ( id, frequency ) VALUES ( #{column_id}, '#{frequency}' )"
|
8
|
+
# format data for batch insertion
|
9
|
+
rows_values = []
|
10
|
+
column_ids.each_with_index do |column_id, position|
|
11
|
+
rows_values << [id, column_id, position, frequency]
|
12
|
+
end
|
13
|
+
# insert data
|
14
|
+
Quandl::Cassandra::Batch.insert(rows_values) do |id, column_id, position, frequency|
|
15
|
+
%Q{INSERT INTO datasets (id, column_id, position) VALUES (#{id}, #{column_id}, #{position})
|
16
|
+
INSERT INTO column_attributes ( id, frequency ) VALUES ( #{column_id}, '#{frequency}' )}
|
17
|
+
end
|
20
18
|
end
|
21
19
|
|
22
20
|
end
|
@@ -1,30 +1,9 @@
|
|
1
1
|
class Quandl::Cassandra::Column::Write::InsertColumns < Quandl::Cassandra::Column::Write
|
2
2
|
|
3
3
|
def perform
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
def insert_columns_in_batches
|
8
|
-
threads = statement_values.each_slice( statement_values.size / 8 ).map do |threads_slice|
|
9
|
-
Thread.start do
|
10
|
-
futures = []
|
11
|
-
threads_slice.each_slice( Quandl::Cassandra.configuration.batch_size ).each do |batch_slice|
|
12
|
-
statements = batch_slice.collect{|row| statement( *row ) }
|
13
|
-
futures << execute_async_batch(statements)
|
14
|
-
end
|
15
|
-
futures.collect(&:value)
|
16
|
-
end
|
4
|
+
Quandl::Cassandra::Batch.insert(statement_values) do |id, type, time, value|
|
5
|
+
"INSERT INTO columns (id, type, time, value) VALUES (#{id}, '#{type}', #{time}, #{value})"
|
17
6
|
end
|
18
|
-
threads.each(&:join)
|
19
|
-
end
|
20
|
-
|
21
|
-
def execute_async_batch(statements)
|
22
|
-
batch = %Q{BEGIN UNLOGGED BATCH\n#{statements.join("\n")}\nAPPLY BATCH;}
|
23
|
-
future = Quandl::Cassandra::Base.execute_async( batch )
|
24
|
-
end
|
25
|
-
|
26
|
-
def statement( id, type, time, value )
|
27
|
-
"INSERT INTO columns (id, type, time, value) VALUES (#{id}, '#{type}', #{time}, #{value})"
|
28
7
|
end
|
29
8
|
|
30
9
|
end
|
data/quandl_cassandra.gemspec
CHANGED
@@ -25,7 +25,8 @@ Gem::Specification.new do |s|
|
|
25
25
|
|
26
26
|
s.add_runtime_dependency "activesupport", ">= 3.0.0"
|
27
27
|
s.add_runtime_dependency "activemodel", ">= 3.0.0"
|
28
|
-
|
28
|
+
|
29
|
+
s.add_runtime_dependency "facter", "~> 1.7.3"
|
29
30
|
s.add_runtime_dependency "scope_composer", "~> 0.4"
|
30
31
|
s.add_runtime_dependency "quandl_data", "~> 1.0"
|
31
32
|
s.add_runtime_dependency "quandl_logger", "~> 0.1"
|
@@ -12,4 +12,12 @@ describe Quandl::Cassandra::Column::Write do
|
|
12
12
|
data.should eq source_data
|
13
13
|
end
|
14
14
|
|
15
|
+
context "given tiny data array" do
|
16
|
+
let(:data){ Quandl::Fabricate::Data.rand( columns: 1, rows: 2, nils: false ) }
|
17
|
+
before(:each){ Quandl::Cassandra::Column.write( id: id, data: data ) }
|
18
|
+
it "should have written the data" do
|
19
|
+
Quandl::Cassandra::Column.read( id: id ).should eq data
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
15
23
|
end
|
@@ -41,62 +41,66 @@ describe Quandl::Cassandra::Dataset do
|
|
41
41
|
its(:table_name){ should eq 'datasets' }
|
42
42
|
end
|
43
43
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
dataset.save
|
49
|
-
}
|
44
|
+
context "given data" do
|
45
|
+
before(:each){ dataset.data = Quandl::Fabricate::Data.rand(rows: 10, columns: 2, nils: false) }
|
46
|
+
|
47
|
+
describe "#save" do
|
48
|
+
before(:each){ dataset.save }
|
50
49
|
|
51
|
-
|
50
|
+
subject{ dataset }
|
52
51
|
|
53
|
-
|
52
|
+
its(:changes){ should be_blank }
|
53
|
+
its(:frequency){ should eq 'daily' }
|
54
54
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
55
|
+
describe ".find" do
|
56
|
+
subject{ Quandl::Cassandra::Dataset.find(id) }
|
57
|
+
its(:data){ should eq dataset.data.to_table }
|
58
|
+
its(:frequency){ should eq 'daily' }
|
59
|
+
|
60
|
+
it "data should count and return data" do
|
61
|
+
subject.data.count.should eq 10
|
62
|
+
subject.data.to_table.should be_a Quandl::Cassandra::Data
|
63
|
+
end
|
64
|
+
it "columns should eq dataset.columns" do
|
65
|
+
subject.columns.collect{|c| c.id.to_s }.should eq dataset.columns.collect{|c| c.id.to_s }
|
66
|
+
end
|
67
|
+
it "column_ids should eq dataset.column_ids" do
|
68
|
+
subject.column_ids.collect(&:to_s).should eq dataset.column_ids.collect(&:to_s)
|
69
|
+
end
|
67
70
|
end
|
68
|
-
end
|
69
71
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
72
|
+
describe "#reload" do
|
73
|
+
before(:each){
|
74
|
+
dataset.data.limit(5).to_a
|
75
|
+
dataset.reload
|
76
|
+
}
|
75
77
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
78
|
+
describe "#attributes" do
|
79
|
+
subject{ dataset.attributes }
|
80
|
+
its([:data]){ should eq nil }
|
81
|
+
end
|
80
82
|
|
81
|
-
|
83
|
+
end
|
82
84
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
85
|
+
describe "#data" do
|
86
|
+
subject{ dataset.data }
|
87
|
+
its(:count){ should eq 10 }
|
88
|
+
end
|
87
89
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
90
|
+
describe "#column_ids" do
|
91
|
+
subject{ dataset.column_ids }
|
92
|
+
its(:count){ should eq 2 }
|
93
|
+
its(:first){ should be_a Cql::Uuid }
|
94
|
+
end
|
93
95
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
96
|
+
describe "#columns" do
|
97
|
+
subject{ dataset.columns }
|
98
|
+
its(:count){ should eq 2 }
|
99
|
+
its(:first){ should be_a Quandl::Cassandra::ColumnAttribute }
|
100
|
+
end
|
99
101
|
|
102
|
+
end
|
103
|
+
|
100
104
|
end
|
101
105
|
|
102
106
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: quandl_cassandra
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-11-
|
12
|
+
date: 2013-11-04 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -123,6 +123,22 @@ dependencies:
|
|
123
123
|
- - '>='
|
124
124
|
- !ruby/object:Gem::Version
|
125
125
|
version: 3.0.0
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: facter
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
130
|
+
requirements:
|
131
|
+
- - ~>
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: 1.7.3
|
134
|
+
type: :runtime
|
135
|
+
prerelease: false
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ~>
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: 1.7.3
|
126
142
|
- !ruby/object:Gem::Dependency
|
127
143
|
name: scope_composer
|
128
144
|
requirement: !ruby/object:Gem::Requirement
|
@@ -227,6 +243,9 @@ files:
|
|
227
243
|
- lib/quandl/cassandra/base/sanitization.rb
|
228
244
|
- lib/quandl/cassandra/base/schema.rb
|
229
245
|
- lib/quandl/cassandra/base/scoping.rb
|
246
|
+
- lib/quandl/cassandra/batch.rb
|
247
|
+
- lib/quandl/cassandra/batch/insert.rb
|
248
|
+
- lib/quandl/cassandra/batch/logging.rb
|
230
249
|
- lib/quandl/cassandra/configuration.rb
|
231
250
|
- lib/quandl/cassandra/error.rb
|
232
251
|
- lib/quandl/cassandra/types.rb
|