quandl_cassandra 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -1
- data/lib/quandl/cassandra/configuration.rb +1 -1
- data/lib/quandl/cassandra/version.rb +1 -1
- data/lib/quandl/cassandra_models/column/write.rb +1 -1
- data/lib/quandl/cassandra_models/column/write/group_data_by_column.rb +14 -19
- data/lib/quandl/cassandra_models/column/write/insert_columns.rb +10 -19
- metadata +2 -2
data/.gitignore
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
class Quandl::Cassandra::Column::Write < Quandl::Strategy::Strategize
|
2
2
|
|
3
3
|
# strategy attributes
|
4
|
-
define_attributes :id, :data, :frequency, :column_ids, :frequency_data, :frequency_column_data
|
4
|
+
define_attributes :id, :data, :frequency, :column_ids, :frequency_data, :frequency_column_data, :statement_values
|
5
5
|
|
6
6
|
require_relative 'write/insert_columns'
|
7
7
|
require_relative 'write/insert_column_attributes'
|
@@ -7,36 +7,31 @@ class Quandl::Cassandra::Column::Write::GroupDataByColumn < Quandl::Cassandra::C
|
|
7
7
|
# { source: { UUID: [[1,2], [2,4]], UUID: [[1,3],[2,8]] }}
|
8
8
|
|
9
9
|
def perform
|
10
|
-
|
11
|
-
# for each { frequency: [ [12,3,4], ... ] }
|
12
|
-
frequency_data.each do |frequency, data|
|
13
|
-
# assign grouped data to frequency_column_data
|
14
|
-
self.frequency_column_data[frequency] = group_data_by_column(data)
|
15
|
-
end
|
10
|
+
group_by_statement_values
|
16
11
|
end
|
17
12
|
|
18
|
-
def
|
19
|
-
column_data = {}
|
13
|
+
def group_by_statement_values
|
20
14
|
# for each [ [date, val, val], ... ]
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
15
|
+
self.statement_values = []
|
16
|
+
frequency_data.each do |frequency, rows|
|
17
|
+
frequency = frequency.to_s
|
18
|
+
rows.each do |row|
|
19
|
+
# extract date
|
20
|
+
date = row[0]
|
21
|
+
# for each [ val, val, ... ]
|
22
|
+
row[1..-1].each_with_index do |value, index|
|
23
|
+
# ensure array
|
24
|
+
self.statement_values << [ column_id(index), frequency, date, value] unless value.blank?
|
25
|
+
end
|
30
26
|
end
|
31
27
|
end
|
32
|
-
column_data
|
33
28
|
end
|
34
29
|
|
35
30
|
def column_id(index)
|
36
31
|
# ensure column_ids is defined
|
37
32
|
self.column_ids ||= Quandl::Cassandra::Dataset.find_column_ids_by_id(id)
|
38
33
|
# ensure column_ids[index] is present
|
39
|
-
self.column_ids[index] ||= SecureRandom.uuid
|
34
|
+
self.column_ids[index] ||= Cql::Uuid.new(SecureRandom.uuid)
|
40
35
|
end
|
41
36
|
|
42
37
|
end
|
@@ -1,30 +1,21 @@
|
|
1
1
|
class Quandl::Cassandra::Column::Write::InsertColumns < Quandl::Cassandra::Column::Write
|
2
2
|
|
3
3
|
def perform
|
4
|
-
|
4
|
+
insert_columns_in_batches
|
5
5
|
end
|
6
6
|
|
7
|
-
def
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
statements << statement( column_id, frequency, time_value[0], time_value[1] )
|
15
|
-
# after 30 statements are collected, execute a batch insert
|
16
|
-
if statements.count >= Quandl::Cassandra.configuration.batch_size
|
17
|
-
# collect the futures
|
18
|
-
futures << execute_async_batch(statements)
|
19
|
-
# clear statements
|
20
|
-
statements = []
|
21
|
-
end
|
7
|
+
def insert_columns_in_batches
|
8
|
+
threads = statement_values.each_slice( statement_values.size / 8 ).map do |threads_slice|
|
9
|
+
Thread.start do
|
10
|
+
futures = []
|
11
|
+
threads_slice.each_slice( Quandl::Cassandra.configuration.batch_size ).each do |batch_slice|
|
12
|
+
statements = batch_slice.collect{|row| statement( *row ) }
|
13
|
+
futures << execute_async_batch(statements)
|
22
14
|
end
|
15
|
+
futures.collect(&:value)
|
23
16
|
end
|
24
17
|
end
|
25
|
-
|
26
|
-
futures << execute_async_batch(statements) if statements.count > 0
|
27
|
-
futures
|
18
|
+
threads.each(&:join)
|
28
19
|
end
|
29
20
|
|
30
21
|
def execute_async_batch(statements)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: quandl_cassandra
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-11-
|
12
|
+
date: 2013-11-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|