gooddata_datawarehouse 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -0
- data/lib/gooddata_datawarehouse/datawarehouse.rb +24 -5
- data/lib/gooddata_datawarehouse/sql_generator.rb +4 -0
- data/lib/gooddata_datawarehouse/version.rb +1 -1
- data/new-version.sh +23 -0
- data/spec/data/emptyheader-bike.csv +4 -0
- data/spec/datawarehouse_spec.rb +47 -4
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 453c08405c8e4e9c2fc46b6b2fb1f1df22f2f4ae
|
4
|
+
data.tar.gz: ce89580d666c0d660a61a72def8f6d7d28d4ab9d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d40a9ece4f77255e2d2f750d0082bc5ca0d19bb1d99ae013d8da5331000b1c86022e3dc4c8b70b54cda7da130332a6b3b19ac4eeb280d27573d19bd8cafc1ed
|
7
|
+
data.tar.gz: 69fc15c0a428e8e32db4b20f07b7fa7ef686342df98e210dc1a4540e6af85569f31cebb00a867d9fe5e15a62f3326c1d3c74db0b4a31fb6d7b4c777fc6755c4a
|
data/README.md
CHANGED
@@ -51,7 +51,13 @@ dwh = GoodData::Datawarehouse.new('you@gooddata.com', 'yourpass', 'your ADS inst
|
|
51
51
|
# import a csv
|
52
52
|
dwh.csv_to_new_table('my_table', 'path/to/my.csv')
|
53
53
|
|
54
|
+
# or multiple csvs (running in parallel threads)
|
55
|
+
dwh.csv_to_new_table('my_table', ['path/to/my.csv', 'path/to/my2.csv'])
|
56
|
+
dwh.csv_to_new_table('my_table', 'path/to/*.csv')
|
57
|
+
dwh.csv_to_new_table('my_table', 'path/to/directory/')
|
58
|
+
|
54
59
|
dwh.table_exists?('my_table') # true
|
60
|
+
dwh.table_row_count('my_table') # 55
|
55
61
|
dwh.get_columns('my_table') # [{column_name: 'col1', data_type: 'varchar(88)'}, {column_name: 'col2', data_type: 'int'}]
|
56
62
|
|
57
63
|
# run an arbitrary sql
|
@@ -9,7 +9,7 @@ require_relative 'sql_generator'
|
|
9
9
|
|
10
10
|
module GoodData
|
11
11
|
class Datawarehouse
|
12
|
-
PARALEL_COPY_THREAD_COUNT =
|
12
|
+
PARALEL_COPY_THREAD_COUNT = 10
|
13
13
|
def initialize(username, password, instance_id, opts={})
|
14
14
|
@logger = Logger.new(STDOUT)
|
15
15
|
@username = username
|
@@ -42,6 +42,8 @@ module GoodData
|
|
42
42
|
csv << row.values_at(*col_keys)
|
43
43
|
end
|
44
44
|
end
|
45
|
+
@logger.info "Table #{table_name} exported to #{csv_path.respond_to?(:path)? csv_path.path : csv_path}"
|
46
|
+
csv_path
|
45
47
|
end
|
46
48
|
|
47
49
|
def rename_table(old_name, new_name)
|
@@ -55,18 +57,27 @@ module GoodData
|
|
55
57
|
def csv_to_new_table(table_name, csvs, opts={})
|
56
58
|
csv_list = list_files(csvs)
|
57
59
|
cols = create_table_from_csv_header(table_name, csv_list[0], opts)
|
58
|
-
load_data_from_csv(table_name, csv_list, opts.merge(columns: cols))
|
60
|
+
load_data_from_csv(table_name, csv_list, opts.merge(columns: cols, append: true))
|
61
|
+
end
|
62
|
+
|
63
|
+
def truncate_table(table_name)
|
64
|
+
execute(GoodData::SQLGenerator.truncate_table(table_name))
|
59
65
|
end
|
60
66
|
|
61
67
|
def load_data_from_csv(table_name, csvs, opts={})
|
68
|
+
thread_count = opts[:paralel_copy_thread_count] || PARALEL_COPY_THREAD_COUNT
|
62
69
|
# get the list of files to load and columns in the csv
|
63
70
|
csv_list = list_files(csvs)
|
64
71
|
columns = opts[:columns] || get_csv_headers(csv_list[0])
|
65
72
|
|
73
|
+
# truncate_table unless data should be appended
|
74
|
+
unless opts[:append]
|
75
|
+
truncate_table(table_name)
|
76
|
+
end
|
77
|
+
|
66
78
|
# load each csv from the list
|
67
79
|
single_file = (csv_list.size == 1)
|
68
|
-
|
69
|
-
csv_list.peach(PARALEL_COPY_THREAD_COUNT) do |csv_path|
|
80
|
+
csv_list.peach(thread_count) do |csv_path|
|
70
81
|
if opts[:ignore_parse_errors] && opts[:exceptions_file].nil? && opts[:rejections_file].nil?
|
71
82
|
exc = nil
|
72
83
|
rej = nil
|
@@ -209,7 +220,15 @@ module GoodData
|
|
209
220
|
if header_str.nil? || header_str.empty?
|
210
221
|
return []
|
211
222
|
end
|
212
|
-
|
223
|
+
empty_count = 0
|
224
|
+
header_str.split(',').map{|s| s.gsub(/[\s"-]/,'')}.map do |c|
|
225
|
+
if c.nil? || c.empty?
|
226
|
+
empty_count += 1
|
227
|
+
"empty#{empty_count}"
|
228
|
+
else
|
229
|
+
c
|
230
|
+
end
|
231
|
+
end
|
213
232
|
end
|
214
233
|
end
|
215
234
|
end
|
data/new-version.sh
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/bin/sh
|
2
|
+
|
3
|
+
# get the new version
|
4
|
+
VERSION=`bundle exec ruby <<-EORUBY
|
5
|
+
|
6
|
+
require 'gooddata_datawarehouse'
|
7
|
+
puts GoodData::Datawarehouse::VERSION
|
8
|
+
|
9
|
+
EORUBY`
|
10
|
+
|
11
|
+
# create tag and push it
|
12
|
+
TAG="v$VERSION"
|
13
|
+
git tag $TAG
|
14
|
+
git push origin $TAG
|
15
|
+
|
16
|
+
# build and push the gem
|
17
|
+
gem build gooddata_datawarehouse.gemspec
|
18
|
+
gem push "gooddata_datawarehouse-$VERSION.gem"
|
19
|
+
|
20
|
+
# update the gem after a few secs
|
21
|
+
echo "Sleeping.."
|
22
|
+
sleep 30
|
23
|
+
gem update gooddata_datawarehouse
|
data/spec/datawarehouse_spec.rb
CHANGED
@@ -4,6 +4,7 @@ require 'gooddata_datawarehouse/datawarehouse'
|
|
4
4
|
CSV_PATH = 'spec/data/bike.csv'
|
5
5
|
CSV_PATH2 = 'spec/data/bike2.csv'
|
6
6
|
WRONG_CSV_PATH = 'spec/data/wrong-bike.csv'
|
7
|
+
EMPTY_HEADER_CSV_PATH = 'spec/data/emptyheader-bike.csv'
|
7
8
|
CSV_REGEXP = 'spec/data/bike*.csv'
|
8
9
|
|
9
10
|
class Helper
|
@@ -107,9 +108,10 @@ describe GoodData::Datawarehouse do
|
|
107
108
|
expect(@dwh.table_exists?(@random_table_name)).to eq true
|
108
109
|
end
|
109
110
|
|
110
|
-
def check_row_count
|
111
|
+
def check_row_count(files=[CSV_PATH, CSV_PATH2])
|
112
|
+
expected_count = files.map {|f| Helper.line_count(f)}.reduce(:+)
|
111
113
|
# there are lines from both of the csvs
|
112
|
-
expect(@dwh.table_row_count(@random_table_name)).to eq
|
114
|
+
expect(@dwh.table_row_count(@random_table_name)).to eq expected_count
|
113
115
|
end
|
114
116
|
|
115
117
|
describe '#rename_table' do
|
@@ -141,13 +143,13 @@ describe GoodData::Datawarehouse do
|
|
141
143
|
end
|
142
144
|
|
143
145
|
|
144
|
-
it "loads all files in a directory" do
|
146
|
+
it "loads all files in a directory, in paralel" do
|
145
147
|
# make a tempdir and copy the csvs there
|
146
148
|
Dir.mktmpdir('foo') do |dir|
|
147
149
|
FileUtils.cp(CSV_PATH, dir)
|
148
150
|
FileUtils.cp(CSV_PATH2, dir)
|
149
151
|
|
150
|
-
@dwh.csv_to_new_table(@random_table_name, dir)
|
152
|
+
@dwh.csv_to_new_table(@random_table_name, dir, :paralel_copy_thread_count => 2)
|
151
153
|
end
|
152
154
|
|
153
155
|
check_table_exists
|
@@ -279,6 +281,11 @@ describe GoodData::Datawarehouse do
|
|
279
281
|
expect(File.size("#{rej.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
|
280
282
|
expect(File.size("#{exc.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
|
281
283
|
end
|
284
|
+
it "creates empty1, etc. columns for empty header columns" do
|
285
|
+
@dwh.csv_to_new_table(@random_table_name, EMPTY_HEADER_CSV_PATH)
|
286
|
+
# it should have cols empty1,2
|
287
|
+
expect(@dwh.get_columns(@random_table_name).map {|c| c[:column_name]}).to include('empty1', 'empty2')
|
288
|
+
end
|
282
289
|
end
|
283
290
|
|
284
291
|
describe '#export_table' do
|
@@ -343,6 +350,42 @@ describe GoodData::Datawarehouse do
|
|
343
350
|
# load the data there - expect fail
|
344
351
|
expect{@dwh.load_data_from_csv(@random_table_name, WRONG_CSV_PATH)}.to raise_error(ArgumentError)
|
345
352
|
end
|
353
|
+
|
354
|
+
it 'truncates the data that is already there' do
|
355
|
+
@dwh.create_table_from_csv_header(@random_table_name, CSV_PATH)
|
356
|
+
check_table_exists
|
357
|
+
check_cols
|
358
|
+
|
359
|
+
# load the data there
|
360
|
+
@dwh.load_data_from_csv(@random_table_name, CSV_PATH)
|
361
|
+
check_row_count([CSV_PATH])
|
362
|
+
|
363
|
+
# load the data there again, count should stay
|
364
|
+
@dwh.load_data_from_csv(@random_table_name, CSV_PATH2)
|
365
|
+
check_row_count([CSV_PATH2])
|
366
|
+
end
|
367
|
+
|
368
|
+
it "keeps the data that is there if append option passed" do
|
369
|
+
@dwh.create_table_from_csv_header(@random_table_name, CSV_PATH)
|
370
|
+
check_table_exists
|
371
|
+
check_cols
|
372
|
+
|
373
|
+
# load the data there
|
374
|
+
@dwh.load_data_from_csv(@random_table_name, CSV_PATH)
|
375
|
+
check_row_count([CSV_PATH])
|
376
|
+
|
377
|
+
# append the data
|
378
|
+
@dwh.load_data_from_csv(@random_table_name, CSV_PATH2, :append => true)
|
379
|
+
check_row_count([CSV_PATH, CSV_PATH2])
|
380
|
+
end
|
381
|
+
end
|
382
|
+
|
383
|
+
describe "#truncate_table" do
|
384
|
+
it "truncates the given table" do
|
385
|
+
@dwh.csv_to_new_table(@random_table_name, CSV_PATH)
|
386
|
+
@dwh.truncate_table(@random_table_name)
|
387
|
+
expect(@dwh.table_row_count(@random_table_name)).to eq 0
|
388
|
+
end
|
346
389
|
end
|
347
390
|
|
348
391
|
describe '#get_columns' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gooddata_datawarehouse
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Petr Cvengros
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-03-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -149,8 +149,10 @@ files:
|
|
149
149
|
- lib/gooddata_datawarehouse/datawarehouse.rb
|
150
150
|
- lib/gooddata_datawarehouse/sql_generator.rb
|
151
151
|
- lib/gooddata_datawarehouse/version.rb
|
152
|
+
- new-version.sh
|
152
153
|
- spec/data/bike.csv
|
153
154
|
- spec/data/bike2.csv
|
155
|
+
- spec/data/emptyheader-bike.csv
|
154
156
|
- spec/data/wrong-bike.csv
|
155
157
|
- spec/datawarehouse_spec.rb
|
156
158
|
- spec/spec_helper.rb
|
@@ -181,6 +183,7 @@ summary: Convenient work with GoodData's Datawarehouse (ADS)
|
|
181
183
|
test_files:
|
182
184
|
- spec/data/bike.csv
|
183
185
|
- spec/data/bike2.csv
|
186
|
+
- spec/data/emptyheader-bike.csv
|
184
187
|
- spec/data/wrong-bike.csv
|
185
188
|
- spec/datawarehouse_spec.rb
|
186
189
|
- spec/spec_helper.rb
|