gooddata_datawarehouse 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -0
- data/lib/gooddata_datawarehouse/datawarehouse.rb +24 -5
- data/lib/gooddata_datawarehouse/sql_generator.rb +4 -0
- data/lib/gooddata_datawarehouse/version.rb +1 -1
- data/new-version.sh +23 -0
- data/spec/data/emptyheader-bike.csv +4 -0
- data/spec/datawarehouse_spec.rb +47 -4
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 453c08405c8e4e9c2fc46b6b2fb1f1df22f2f4ae
|
4
|
+
data.tar.gz: ce89580d666c0d660a61a72def8f6d7d28d4ab9d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d40a9ece4f77255e2d2f750d0082bc5ca0d19bb1d99ae013d8da5331000b1c86022e3dc4c8b70b54cda7da130332a6b3b19ac4eeb280d27573d19bd8cafc1ed
|
7
|
+
data.tar.gz: 69fc15c0a428e8e32db4b20f07b7fa7ef686342df98e210dc1a4540e6af85569f31cebb00a867d9fe5e15a62f3326c1d3c74db0b4a31fb6d7b4c777fc6755c4a
|
data/README.md
CHANGED
@@ -51,7 +51,13 @@ dwh = GoodData::Datawarehouse.new('you@gooddata.com', 'yourpass', 'your ADS inst
|
|
51
51
|
# import a csv
|
52
52
|
dwh.csv_to_new_table('my_table', 'path/to/my.csv')
|
53
53
|
|
54
|
+
# or multiple csvs (running in parallel threads)
|
55
|
+
dwh.csv_to_new_table('my_table', ['path/to/my.csv', 'path/to/my2.csv'])
|
56
|
+
dwh.csv_to_new_table('my_table', 'path/to/*.csv')
|
57
|
+
dwh.csv_to_new_table('my_table', 'path/to/directory/')
|
58
|
+
|
54
59
|
dwh.table_exists?('my_table') # true
|
60
|
+
dwh.table_row_count('my_table') # 55
|
55
61
|
dwh.get_columns('my_table') # [{column_name: 'col1', data_type: 'varchar(88)'}, {column_name: 'col2', data_type: 'int'}]
|
56
62
|
|
57
63
|
# run an arbitrary sql
|
@@ -9,7 +9,7 @@ require_relative 'sql_generator'
|
|
9
9
|
|
10
10
|
module GoodData
|
11
11
|
class Datawarehouse
|
12
|
-
PARALEL_COPY_THREAD_COUNT =
|
12
|
+
PARALEL_COPY_THREAD_COUNT = 10
|
13
13
|
def initialize(username, password, instance_id, opts={})
|
14
14
|
@logger = Logger.new(STDOUT)
|
15
15
|
@username = username
|
@@ -42,6 +42,8 @@ module GoodData
|
|
42
42
|
csv << row.values_at(*col_keys)
|
43
43
|
end
|
44
44
|
end
|
45
|
+
@logger.info "Table #{table_name} exported to #{csv_path.respond_to?(:path)? csv_path.path : csv_path}"
|
46
|
+
csv_path
|
45
47
|
end
|
46
48
|
|
47
49
|
def rename_table(old_name, new_name)
|
@@ -55,18 +57,27 @@ module GoodData
|
|
55
57
|
def csv_to_new_table(table_name, csvs, opts={})
|
56
58
|
csv_list = list_files(csvs)
|
57
59
|
cols = create_table_from_csv_header(table_name, csv_list[0], opts)
|
58
|
-
load_data_from_csv(table_name, csv_list, opts.merge(columns: cols))
|
60
|
+
load_data_from_csv(table_name, csv_list, opts.merge(columns: cols, append: true))
|
61
|
+
end
|
62
|
+
|
63
|
+
def truncate_table(table_name)
|
64
|
+
execute(GoodData::SQLGenerator.truncate_table(table_name))
|
59
65
|
end
|
60
66
|
|
61
67
|
def load_data_from_csv(table_name, csvs, opts={})
|
68
|
+
thread_count = opts[:paralel_copy_thread_count] || PARALEL_COPY_THREAD_COUNT
|
62
69
|
# get the list of files to load and columns in the csv
|
63
70
|
csv_list = list_files(csvs)
|
64
71
|
columns = opts[:columns] || get_csv_headers(csv_list[0])
|
65
72
|
|
73
|
+
# truncate_table unless data should be appended
|
74
|
+
unless opts[:append]
|
75
|
+
truncate_table(table_name)
|
76
|
+
end
|
77
|
+
|
66
78
|
# load each csv from the list
|
67
79
|
single_file = (csv_list.size == 1)
|
68
|
-
|
69
|
-
csv_list.peach(PARALEL_COPY_THREAD_COUNT) do |csv_path|
|
80
|
+
csv_list.peach(thread_count) do |csv_path|
|
70
81
|
if opts[:ignore_parse_errors] && opts[:exceptions_file].nil? && opts[:rejections_file].nil?
|
71
82
|
exc = nil
|
72
83
|
rej = nil
|
@@ -209,7 +220,15 @@ module GoodData
|
|
209
220
|
if header_str.nil? || header_str.empty?
|
210
221
|
return []
|
211
222
|
end
|
212
|
-
|
223
|
+
empty_count = 0
|
224
|
+
header_str.split(',').map{|s| s.gsub(/[\s"-]/,'')}.map do |c|
|
225
|
+
if c.nil? || c.empty?
|
226
|
+
empty_count += 1
|
227
|
+
"empty#{empty_count}"
|
228
|
+
else
|
229
|
+
c
|
230
|
+
end
|
231
|
+
end
|
213
232
|
end
|
214
233
|
end
|
215
234
|
end
|
data/new-version.sh
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/bin/sh
|
2
|
+
|
3
|
+
# get the new version
|
4
|
+
VERSION=`bundle exec ruby <<-EORUBY
|
5
|
+
|
6
|
+
require 'gooddata_datawarehouse'
|
7
|
+
puts GoodData::Datawarehouse::VERSION
|
8
|
+
|
9
|
+
EORUBY`
|
10
|
+
|
11
|
+
# create tag and push it
|
12
|
+
TAG="v$VERSION"
|
13
|
+
git tag $TAG
|
14
|
+
git push origin $TAG
|
15
|
+
|
16
|
+
# build and push the gem
|
17
|
+
gem build gooddata_datawarehouse.gemspec
|
18
|
+
gem push "gooddata_datawarehouse-$VERSION.gem"
|
19
|
+
|
20
|
+
# update the gem after a few secs
|
21
|
+
echo "Sleeping.."
|
22
|
+
sleep 30
|
23
|
+
gem update gooddata_datawarehouse
|
data/spec/datawarehouse_spec.rb
CHANGED
@@ -4,6 +4,7 @@ require 'gooddata_datawarehouse/datawarehouse'
|
|
4
4
|
CSV_PATH = 'spec/data/bike.csv'
|
5
5
|
CSV_PATH2 = 'spec/data/bike2.csv'
|
6
6
|
WRONG_CSV_PATH = 'spec/data/wrong-bike.csv'
|
7
|
+
EMPTY_HEADER_CSV_PATH = 'spec/data/emptyheader-bike.csv'
|
7
8
|
CSV_REGEXP = 'spec/data/bike*.csv'
|
8
9
|
|
9
10
|
class Helper
|
@@ -107,9 +108,10 @@ describe GoodData::Datawarehouse do
|
|
107
108
|
expect(@dwh.table_exists?(@random_table_name)).to eq true
|
108
109
|
end
|
109
110
|
|
110
|
-
def check_row_count
|
111
|
+
def check_row_count(files=[CSV_PATH, CSV_PATH2])
|
112
|
+
expected_count = files.map {|f| Helper.line_count(f)}.reduce(:+)
|
111
113
|
# there are lines from both of the csvs
|
112
|
-
expect(@dwh.table_row_count(@random_table_name)).to eq
|
114
|
+
expect(@dwh.table_row_count(@random_table_name)).to eq expected_count
|
113
115
|
end
|
114
116
|
|
115
117
|
describe '#rename_table' do
|
@@ -141,13 +143,13 @@ describe GoodData::Datawarehouse do
|
|
141
143
|
end
|
142
144
|
|
143
145
|
|
144
|
-
it "loads all files in a directory" do
|
146
|
+
it "loads all files in a directory, in paralel" do
|
145
147
|
# make a tempdir and copy the csvs there
|
146
148
|
Dir.mktmpdir('foo') do |dir|
|
147
149
|
FileUtils.cp(CSV_PATH, dir)
|
148
150
|
FileUtils.cp(CSV_PATH2, dir)
|
149
151
|
|
150
|
-
@dwh.csv_to_new_table(@random_table_name, dir)
|
152
|
+
@dwh.csv_to_new_table(@random_table_name, dir, :paralel_copy_thread_count => 2)
|
151
153
|
end
|
152
154
|
|
153
155
|
check_table_exists
|
@@ -279,6 +281,11 @@ describe GoodData::Datawarehouse do
|
|
279
281
|
expect(File.size("#{rej.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
|
280
282
|
expect(File.size("#{exc.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
|
281
283
|
end
|
284
|
+
it "creates empty1, etc. columns for empty header columns" do
|
285
|
+
@dwh.csv_to_new_table(@random_table_name, EMPTY_HEADER_CSV_PATH)
|
286
|
+
# it should have cols empty1,2
|
287
|
+
expect(@dwh.get_columns(@random_table_name).map {|c| c[:column_name]}).to include('empty1', 'empty2')
|
288
|
+
end
|
282
289
|
end
|
283
290
|
|
284
291
|
describe '#export_table' do
|
@@ -343,6 +350,42 @@ describe GoodData::Datawarehouse do
|
|
343
350
|
# load the data there - expect fail
|
344
351
|
expect{@dwh.load_data_from_csv(@random_table_name, WRONG_CSV_PATH)}.to raise_error(ArgumentError)
|
345
352
|
end
|
353
|
+
|
354
|
+
it 'truncates the data that is already there' do
|
355
|
+
@dwh.create_table_from_csv_header(@random_table_name, CSV_PATH)
|
356
|
+
check_table_exists
|
357
|
+
check_cols
|
358
|
+
|
359
|
+
# load the data there
|
360
|
+
@dwh.load_data_from_csv(@random_table_name, CSV_PATH)
|
361
|
+
check_row_count([CSV_PATH])
|
362
|
+
|
363
|
+
# load the data there again, count should stay
|
364
|
+
@dwh.load_data_from_csv(@random_table_name, CSV_PATH2)
|
365
|
+
check_row_count([CSV_PATH2])
|
366
|
+
end
|
367
|
+
|
368
|
+
it "keeps the data that is there if append option passed" do
|
369
|
+
@dwh.create_table_from_csv_header(@random_table_name, CSV_PATH)
|
370
|
+
check_table_exists
|
371
|
+
check_cols
|
372
|
+
|
373
|
+
# load the data there
|
374
|
+
@dwh.load_data_from_csv(@random_table_name, CSV_PATH)
|
375
|
+
check_row_count([CSV_PATH])
|
376
|
+
|
377
|
+
# append the data
|
378
|
+
@dwh.load_data_from_csv(@random_table_name, CSV_PATH2, :append => true)
|
379
|
+
check_row_count([CSV_PATH, CSV_PATH2])
|
380
|
+
end
|
381
|
+
end
|
382
|
+
|
383
|
+
describe "#truncate_table" do
|
384
|
+
it "truncates the given table" do
|
385
|
+
@dwh.csv_to_new_table(@random_table_name, CSV_PATH)
|
386
|
+
@dwh.truncate_table(@random_table_name)
|
387
|
+
expect(@dwh.table_row_count(@random_table_name)).to eq 0
|
388
|
+
end
|
346
389
|
end
|
347
390
|
|
348
391
|
describe '#get_columns' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gooddata_datawarehouse
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Petr Cvengros
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-03-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -149,8 +149,10 @@ files:
|
|
149
149
|
- lib/gooddata_datawarehouse/datawarehouse.rb
|
150
150
|
- lib/gooddata_datawarehouse/sql_generator.rb
|
151
151
|
- lib/gooddata_datawarehouse/version.rb
|
152
|
+
- new-version.sh
|
152
153
|
- spec/data/bike.csv
|
153
154
|
- spec/data/bike2.csv
|
155
|
+
- spec/data/emptyheader-bike.csv
|
154
156
|
- spec/data/wrong-bike.csv
|
155
157
|
- spec/datawarehouse_spec.rb
|
156
158
|
- spec/spec_helper.rb
|
@@ -181,6 +183,7 @@ summary: Convenient work with GoodData's Datawarehouse (ADS)
|
|
181
183
|
test_files:
|
182
184
|
- spec/data/bike.csv
|
183
185
|
- spec/data/bike2.csv
|
186
|
+
- spec/data/emptyheader-bike.csv
|
184
187
|
- spec/data/wrong-bike.csv
|
185
188
|
- spec/datawarehouse_spec.rb
|
186
189
|
- spec/spec_helper.rb
|