quandl_cassandra 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -1
- data/lib/quandl/cassandra/configuration.rb +1 -1
- data/lib/quandl/cassandra/version.rb +1 -1
- data/lib/quandl/cassandra_models/column/write.rb +1 -1
- data/lib/quandl/cassandra_models/column/write/group_data_by_column.rb +14 -19
- data/lib/quandl/cassandra_models/column/write/insert_columns.rb +10 -19
- metadata +2 -2
data/.gitignore
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
class Quandl::Cassandra::Column::Write < Quandl::Strategy::Strategize
|
2
2
|
|
3
3
|
# strategy attributes
|
4
|
-
define_attributes :id, :data, :frequency, :column_ids, :frequency_data, :frequency_column_data
|
4
|
+
define_attributes :id, :data, :frequency, :column_ids, :frequency_data, :frequency_column_data, :statement_values
|
5
5
|
|
6
6
|
require_relative 'write/insert_columns'
|
7
7
|
require_relative 'write/insert_column_attributes'
|
@@ -7,36 +7,31 @@ class Quandl::Cassandra::Column::Write::GroupDataByColumn < Quandl::Cassandra::C
|
|
7
7
|
# { source: { UUID: [[1,2], [2,4]], UUID: [[1,3],[2,8]] }}
|
8
8
|
|
9
9
|
def perform
|
10
|
-
|
11
|
-
# for each { frequency: [ [12,3,4], ... ] }
|
12
|
-
frequency_data.each do |frequency, data|
|
13
|
-
# assign grouped data to frequency_column_data
|
14
|
-
self.frequency_column_data[frequency] = group_data_by_column(data)
|
15
|
-
end
|
10
|
+
group_by_statement_values
|
16
11
|
end
|
17
12
|
|
18
|
-
def
|
19
|
-
column_data = {}
|
13
|
+
def group_by_statement_values
|
20
14
|
# for each [ [date, val, val], ... ]
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
15
|
+
self.statement_values = []
|
16
|
+
frequency_data.each do |frequency, rows|
|
17
|
+
frequency = frequency.to_s
|
18
|
+
rows.each do |row|
|
19
|
+
# extract date
|
20
|
+
date = row[0]
|
21
|
+
# for each [ val, val, ... ]
|
22
|
+
row[1..-1].each_with_index do |value, index|
|
23
|
+
# ensure array
|
24
|
+
self.statement_values << [ column_id(index), frequency, date, value] unless value.blank?
|
25
|
+
end
|
30
26
|
end
|
31
27
|
end
|
32
|
-
column_data
|
33
28
|
end
|
34
29
|
|
35
30
|
def column_id(index)
|
36
31
|
# ensure column_ids is defined
|
37
32
|
self.column_ids ||= Quandl::Cassandra::Dataset.find_column_ids_by_id(id)
|
38
33
|
# ensure column_ids[index] is present
|
39
|
-
self.column_ids[index] ||= SecureRandom.uuid
|
34
|
+
self.column_ids[index] ||= Cql::Uuid.new(SecureRandom.uuid)
|
40
35
|
end
|
41
36
|
|
42
37
|
end
|
@@ -1,30 +1,21 @@
|
|
1
1
|
class Quandl::Cassandra::Column::Write::InsertColumns < Quandl::Cassandra::Column::Write
|
2
2
|
|
3
3
|
def perform
|
4
|
-
|
4
|
+
insert_columns_in_batches
|
5
5
|
end
|
6
6
|
|
7
|
-
def
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
statements << statement( column_id, frequency, time_value[0], time_value[1] )
|
15
|
-
# after 30 statements are collected, execute a batch insert
|
16
|
-
if statements.count >= Quandl::Cassandra.configuration.batch_size
|
17
|
-
# collect the futures
|
18
|
-
futures << execute_async_batch(statements)
|
19
|
-
# clear statements
|
20
|
-
statements = []
|
21
|
-
end
|
7
|
+
def insert_columns_in_batches
|
8
|
+
threads = statement_values.each_slice( statement_values.size / 8 ).map do |threads_slice|
|
9
|
+
Thread.start do
|
10
|
+
futures = []
|
11
|
+
threads_slice.each_slice( Quandl::Cassandra.configuration.batch_size ).each do |batch_slice|
|
12
|
+
statements = batch_slice.collect{|row| statement( *row ) }
|
13
|
+
futures << execute_async_batch(statements)
|
22
14
|
end
|
15
|
+
futures.collect(&:value)
|
23
16
|
end
|
24
17
|
end
|
25
|
-
|
26
|
-
futures << execute_async_batch(statements) if statements.count > 0
|
27
|
-
futures
|
18
|
+
threads.each(&:join)
|
28
19
|
end
|
29
20
|
|
30
21
|
def execute_async_batch(statements)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: quandl_cassandra
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-11-
|
12
|
+
date: 2013-11-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|