gooddata_datawarehouse 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.coveralls.yml +1 -0
- data/.rspec +3 -0
- data/README.md +15 -1
- data/gooddata_datawarehouse.gemspec +1 -0
- data/lib/gooddata_datawarehouse/datawarehouse.rb +69 -23
- data/lib/gooddata_datawarehouse/sql_generator.rb +4 -0
- data/lib/gooddata_datawarehouse/version.rb +1 -1
- data/spec/data/bike2.csv +4 -0
- data/spec/data/{bike-wrong.csv → wrong-bike.csv} +0 -0
- data/spec/datawarehouse_spec.rb +110 -7
- data/spec/spec_helper.rb +1 -8
- metadata +22 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9a6a8637a21756e98d892ca84f9abbf1d4cb912e
|
4
|
+
data.tar.gz: a3853b73306ee86d97471d8bec24efa0482c1b26
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c5b61278e1b619ad6a31ffe9fd947b734aadc61a27090479f51870608385ec07c9cd07a7d35a68b7bbc25b0d60725d0d83e8022e1135ff5e33f773a44062f934
|
7
|
+
data.tar.gz: 3976202ed90034b82109b9ca0e82d1c1fbd6074b7e8aa770ed447a9f1612113205ff2ecc3d2bc55f0c18ca5c6a72be7723237e916bce00c10aa1112e9b4c9429
|
data/.coveralls.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
service_name: travis-ci
|
data/.rspec
ADDED
data/README.md
CHANGED
@@ -44,6 +44,9 @@ require 'gooddata_datawarehouse'
|
|
44
44
|
|
45
45
|
# connect
|
46
46
|
dwh = GoodData::Datawarehouse.new('you@gooddata.com', 'yourpass', 'your ADS instance id')
|
47
|
+
# instance id is the identifier of your datawarehouse (ADS).
|
48
|
+
# E.g. for datawarehouse https://secure.gooddata.com/gdc/datawarehouse/instances/d4979ac54df8afb7b5192b0086de6270
|
49
|
+
# the instance id is d4979ac54df8afb7b5192b0086de6270
|
47
50
|
|
48
51
|
# import a csv
|
49
52
|
dwh.csv_to_new_table('my_table', 'path/to/my.csv')
|
@@ -51,7 +54,7 @@ dwh.csv_to_new_table('my_table', 'path/to/my.csv')
|
|
51
54
|
dwh.table_exists?('my_table') # true
|
52
55
|
dwh.get_columns('my_table') # [{column_name: 'col1', data_type: 'varchar(88)'}, {column_name: 'col2', data_type: 'int'}]
|
53
56
|
|
54
|
-
# run an
|
57
|
+
# run an arbitrary sql
|
55
58
|
dwh.execute('ALTER TABLE my_table ADD COLUMN col3 INTEGER')
|
56
59
|
|
57
60
|
# run a select and process results
|
@@ -69,6 +72,17 @@ dwh.export_table('my_new_table', 'path/to/my_new.csv')
|
|
69
72
|
dwh.drop_table('my_new_table')
|
70
73
|
```
|
71
74
|
|
75
|
+
## Troubleshooting
|
76
|
+
### Wrong driver version
|
77
|
+
If you get an error talking about handshake error and wrong DSS driver version, update your `gooddata-dss-jdbc` gem by running
|
78
|
+
|
79
|
+
$ bundle update
|
80
|
+
|
81
|
+
or
|
82
|
+
|
83
|
+
$ gem update gooddata-dss-jdbc
|
84
|
+
|
85
|
+
You should always have the latest version of this gem.
|
72
86
|
|
73
87
|
## Contributing
|
74
88
|
|
@@ -3,11 +3,13 @@ require 'sequel'
|
|
3
3
|
require 'logger'
|
4
4
|
require 'csv'
|
5
5
|
require 'tempfile'
|
6
|
+
require 'pmap'
|
6
7
|
|
7
8
|
require_relative 'sql_generator'
|
8
9
|
|
9
10
|
module GoodData
|
10
11
|
class Datawarehouse
|
12
|
+
PARALEL_COPY_THREAD_COUNT = 5
|
11
13
|
def initialize(username, password, instance_id, opts={})
|
12
14
|
@logger = Logger.new(STDOUT)
|
13
15
|
@username = username
|
@@ -50,37 +52,57 @@ module GoodData
|
|
50
52
|
execute(GoodData::SQLGenerator.drop_table(table_name,opts))
|
51
53
|
end
|
52
54
|
|
53
|
-
def csv_to_new_table(table_name,
|
54
|
-
|
55
|
-
|
55
|
+
def csv_to_new_table(table_name, csvs, opts={})
|
56
|
+
csv_list = list_files(csvs)
|
57
|
+
cols = create_table_from_csv_header(table_name, csv_list[0], opts)
|
58
|
+
load_data_from_csv(table_name, csv_list, opts.merge(columns: cols))
|
56
59
|
end
|
57
60
|
|
58
|
-
def load_data_from_csv(table_name,
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
61
|
+
def load_data_from_csv(table_name, csvs, opts={})
|
62
|
+
# get the list of files to load and columns in the csv
|
63
|
+
csv_list = list_files(csvs)
|
64
|
+
columns = opts[:columns] || get_csv_headers(csv_list[0])
|
65
|
+
|
66
|
+
# load each csv from the list
|
67
|
+
single_file = (csv_list.size == 1)
|
68
|
+
|
69
|
+
csv_list.peach(PARALEL_COPY_THREAD_COUNT) do |csv_path|
|
70
|
+
if opts[:ignore_parse_errors] && opts[:exceptions_file].nil? && opts[:rejections_file].nil?
|
71
|
+
exc = nil
|
72
|
+
rej = nil
|
73
|
+
opts_file = opts
|
74
|
+
else
|
75
|
+
opts_file = opts.clone
|
76
|
+
# priradit do opts i do exc -
|
77
|
+
# temporary files to get the excepted records (if not given)
|
78
|
+
exc = opts_file[:exceptions_file] = init_file(opts_file[:exceptions_file], 'exceptions', csv_path, single_file)
|
79
|
+
rej = opts_file[:rejections_file] = init_file(opts_file[:rejections_file], 'rejections', csv_path, single_file)
|
80
|
+
end
|
71
81
|
|
72
|
-
|
73
|
-
|
82
|
+
# execute the load
|
83
|
+
execute(GoodData::SQLGenerator.load_data(table_name, csv_path, columns, opts_file))
|
74
84
|
|
75
|
-
|
76
|
-
|
85
|
+
exc.close if exc
|
86
|
+
rej.close if rej
|
77
87
|
|
78
|
-
|
79
|
-
|
80
|
-
|
88
|
+
# if there was something rejected and it shouldn't be ignored, raise an error
|
89
|
+
if ((exc && File.size?(exc)) || (rej && File.size?(rej))) && (! opts[:ignore_parse_errors])
|
90
|
+
fail ArgumentError, "Some lines in the CSV didn't go through. Exceptions: #{IO.read(exc)}\nRejected records: #{IO.read(rej)}"
|
91
|
+
end
|
81
92
|
end
|
82
93
|
end
|
83
94
|
|
95
|
+
def init_file(given_filename, key, csv_path, single_file)
|
96
|
+
# only use file postfix if there are multiple files
|
97
|
+
postfix = single_file ? '' : "-#{File.basename(csv_path)}"
|
98
|
+
|
99
|
+
# take what we have and put the source csv name at the end
|
100
|
+
given_filename = given_filename.path if given_filename.is_a?(File)
|
101
|
+
f = "#{given_filename || Tempfile.new(key).path}#{postfix}"
|
102
|
+
f = File.new(f, 'w') unless f.is_a?(File)
|
103
|
+
f
|
104
|
+
end
|
105
|
+
|
84
106
|
# returns a list of columns created
|
85
107
|
# does nothing if file empty, returns []
|
86
108
|
def create_table_from_csv_header(table_name, csv_path, opts={})
|
@@ -99,6 +121,10 @@ module GoodData
|
|
99
121
|
count > 0
|
100
122
|
end
|
101
123
|
|
124
|
+
def table_row_count(table_name)
|
125
|
+
execute_select(GoodData::SQLGenerator.get_row_count(table_name), :count => true)
|
126
|
+
end
|
127
|
+
|
102
128
|
def get_columns(table_name)
|
103
129
|
res = execute_select(GoodData::SQLGenerator.get_columns(table_name))
|
104
130
|
end
|
@@ -158,6 +184,26 @@ module GoodData
|
|
158
184
|
|
159
185
|
private
|
160
186
|
|
187
|
+
# returns an array of file paths (strings)
|
188
|
+
def list_files(csvs)
|
189
|
+
# csvs can be:
|
190
|
+
case csvs
|
191
|
+
when String
|
192
|
+
|
193
|
+
# directory
|
194
|
+
if File.directory?(csvs)
|
195
|
+
return (Dir.entries(csvs) - ['.', '..']).map{|f| File.join(csvs, f)}
|
196
|
+
end
|
197
|
+
|
198
|
+
# filename or pattern
|
199
|
+
return Dir.glob(csvs)
|
200
|
+
|
201
|
+
# array
|
202
|
+
when Array
|
203
|
+
return csvs
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
161
207
|
def get_csv_headers(csv_path)
|
162
208
|
header_str = File.open(csv_path, &:gets)
|
163
209
|
if header_str.nil? || header_str.empty?
|
@@ -42,6 +42,10 @@ module GoodData
|
|
42
42
|
"SELECT COUNT(*) FROM tables WHERE table_name = '#{table_name}'"
|
43
43
|
end
|
44
44
|
|
45
|
+
def get_row_count(table_name)
|
46
|
+
"SELECT COUNT(*) FROM #{table_name}"
|
47
|
+
end
|
48
|
+
|
45
49
|
def get_columns(table_name)
|
46
50
|
"SELECT column_name, data_type FROM columns WHERE table_name = '#{table_name}'"
|
47
51
|
end
|
data/spec/data/bike2.csv
ADDED
File without changes
|
data/spec/datawarehouse_spec.rb
CHANGED
@@ -1,13 +1,25 @@
|
|
1
1
|
require 'tempfile'
|
2
2
|
require 'gooddata_datawarehouse/datawarehouse'
|
3
|
-
require_relative 'spec_helper'
|
4
3
|
|
5
4
|
CSV_PATH = 'spec/data/bike.csv'
|
6
|
-
|
5
|
+
CSV_PATH2 = 'spec/data/bike2.csv'
|
6
|
+
WRONG_CSV_PATH = 'spec/data/wrong-bike.csv'
|
7
|
+
CSV_REGEXP = 'spec/data/bike*.csv'
|
8
|
+
|
9
|
+
class Helper
|
10
|
+
def self.create_default_connection
|
11
|
+
GoodData::Datawarehouse.new(ENV['USERNAME'], ENV['PASSWORD'], ENV['INSTANCE_ID'])
|
12
|
+
end
|
13
|
+
def self.line_count(f)
|
14
|
+
i = 0
|
15
|
+
CSV.foreach(f, :headers => true) {|_| i += 1}
|
16
|
+
i
|
17
|
+
end
|
18
|
+
end
|
7
19
|
|
8
20
|
describe GoodData::Datawarehouse do
|
9
21
|
before(:each) do
|
10
|
-
@dwh =
|
22
|
+
@dwh = Helper::create_default_connection
|
11
23
|
@random = rand(10000000).to_s
|
12
24
|
@random_table_name = "temp_#{@random}"
|
13
25
|
@created_tables = nil
|
@@ -84,6 +96,22 @@ describe GoodData::Datawarehouse do
|
|
84
96
|
end
|
85
97
|
end
|
86
98
|
|
99
|
+
def check_cols
|
100
|
+
# cols are the same as in the csv
|
101
|
+
expected_cols = File.open(CSV_PATH, &:gets).strip.split(',')
|
102
|
+
expect(Set.new(@dwh.get_columns(@random_table_name))).to eq Set.new(expected_cols.map {|c| {:column_name => c, :data_type => GoodData::SQLGenerator::DEFAULT_TYPE}})
|
103
|
+
end
|
104
|
+
|
105
|
+
def check_table_exists
|
106
|
+
# table exists
|
107
|
+
expect(@dwh.table_exists?(@random_table_name)).to eq true
|
108
|
+
end
|
109
|
+
|
110
|
+
def check_row_count
|
111
|
+
# there are lines from both of the csvs
|
112
|
+
expect(@dwh.table_row_count(@random_table_name)).to eq Helper.line_count(CSV_PATH) + Helper.line_count(CSV_PATH2)
|
113
|
+
end
|
114
|
+
|
87
115
|
describe '#rename_table' do
|
88
116
|
it 'renames a table' do
|
89
117
|
cols = ['col1', 'col2', 'col3']
|
@@ -109,8 +137,38 @@ describe GoodData::Datawarehouse do
|
|
109
137
|
expect(@dwh.table_exists?(@random_table_name)).to eq true
|
110
138
|
|
111
139
|
# cols are the same as in the csv
|
112
|
-
|
113
|
-
|
140
|
+
check_cols
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
it "loads all files in a directory" do
|
145
|
+
# make a tempdir and copy the csvs there
|
146
|
+
Dir.mktmpdir('foo') do |dir|
|
147
|
+
FileUtils.cp(CSV_PATH, dir)
|
148
|
+
FileUtils.cp(CSV_PATH2, dir)
|
149
|
+
|
150
|
+
@dwh.csv_to_new_table(@random_table_name, dir)
|
151
|
+
end
|
152
|
+
|
153
|
+
check_table_exists
|
154
|
+
check_cols
|
155
|
+
check_row_count
|
156
|
+
end
|
157
|
+
|
158
|
+
it "loads all files given in a list" do
|
159
|
+
@dwh.csv_to_new_table(@random_table_name, [CSV_PATH, CSV_PATH2])
|
160
|
+
|
161
|
+
check_table_exists
|
162
|
+
check_cols
|
163
|
+
check_row_count
|
164
|
+
end
|
165
|
+
|
166
|
+
it "loads all files given by a regexp" do
|
167
|
+
@dwh.csv_to_new_table(@random_table_name, CSV_REGEXP)
|
168
|
+
|
169
|
+
check_table_exists
|
170
|
+
check_cols
|
171
|
+
check_row_count
|
114
172
|
end
|
115
173
|
|
116
174
|
it 'writes exceptions and rejections to files at given path, passed strings' do
|
@@ -123,6 +181,25 @@ describe GoodData::Datawarehouse do
|
|
123
181
|
expect(File.size(exc)).to eq 0
|
124
182
|
end
|
125
183
|
|
184
|
+
it 'overwrites the rejections and exceptions' do
|
185
|
+
rej = Tempfile.new('rejections.csv')
|
186
|
+
exc = Tempfile.new('exceptions.csv')
|
187
|
+
|
188
|
+
@dwh.csv_to_new_table(@random_table_name, WRONG_CSV_PATH, :exceptions_file => exc.path, :rejections_file => rej.path, :ignore_parse_errors => true)
|
189
|
+
|
190
|
+
rej_size = File.size(rej)
|
191
|
+
exc_size = File.size(exc)
|
192
|
+
|
193
|
+
expect(rej_size).to be > 0
|
194
|
+
expect(exc_size).to be > 0
|
195
|
+
|
196
|
+
# load it again and see if it was overwritten - has the same size
|
197
|
+
@dwh.load_data_from_csv(@random_table_name, WRONG_CSV_PATH, :exceptions_file => exc.path, :rejections_file => rej.path, :ignore_parse_errors => true)
|
198
|
+
|
199
|
+
expect(File.size(rej)).to eq rej_size
|
200
|
+
expect(File.size(exc)).to be exc_size
|
201
|
+
end
|
202
|
+
|
126
203
|
it 'writes exceptions and rejections to files at given path, passed files' do
|
127
204
|
rej = Tempfile.new('rejections.csv')
|
128
205
|
exc = Tempfile.new('exceptions.csv')
|
@@ -162,7 +239,7 @@ describe GoodData::Datawarehouse do
|
|
162
239
|
expect(File.size(exc)).to be > 0
|
163
240
|
end
|
164
241
|
|
165
|
-
it "
|
242
|
+
it "loads fine when ignoring errors and not passing files" do
|
166
243
|
@dwh.csv_to_new_table(@random_table_name, CSV_PATH, :ignore_parse_errors => true)
|
167
244
|
|
168
245
|
# table exists
|
@@ -173,7 +250,7 @@ describe GoodData::Datawarehouse do
|
|
173
250
|
expect(Set.new(@dwh.get_columns(@random_table_name))).to eq Set.new(expected_cols.map {|c| {:column_name => c, :data_type => GoodData::SQLGenerator::DEFAULT_TYPE}})
|
174
251
|
end
|
175
252
|
|
176
|
-
it "works with non-existing files" do
|
253
|
+
it "works with non-existing rejection/exception files" do
|
177
254
|
t = Tempfile.new('haha')
|
178
255
|
d = File.dirname(t)
|
179
256
|
|
@@ -188,6 +265,20 @@ describe GoodData::Datawarehouse do
|
|
188
265
|
expect(File.size(rej)).to be > 0
|
189
266
|
expect(File.size(exc)).to be > 0
|
190
267
|
end
|
268
|
+
|
269
|
+
it "fails if one of the files is wrong" do
|
270
|
+
expect{@dwh.csv_to_new_table(@random_table_name, [CSV_PATH, WRONG_CSV_PATH])}.to raise_error(ArgumentError)
|
271
|
+
end
|
272
|
+
|
273
|
+
it "creates exceptions / rejections for each file when wanted" do
|
274
|
+
rej = Tempfile.new('rejections.csv')
|
275
|
+
exc = Tempfile.new('exceptions.csv')
|
276
|
+
|
277
|
+
@dwh.csv_to_new_table(@random_table_name, [CSV_PATH, WRONG_CSV_PATH], :exceptions_file => exc.path, :rejections_file => rej.path, :ignore_parse_errors => true)
|
278
|
+
|
279
|
+
expect(File.size("#{rej.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
|
280
|
+
expect(File.size("#{exc.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
|
281
|
+
end
|
191
282
|
end
|
192
283
|
|
193
284
|
describe '#export_table' do
|
@@ -232,6 +323,18 @@ describe GoodData::Datawarehouse do
|
|
232
323
|
expect(exported).to eq imported
|
233
324
|
end
|
234
325
|
|
326
|
+
it "can load multiple files" do
|
327
|
+
# create the table
|
328
|
+
@dwh.create_table_from_csv_header(@random_table_name, CSV_PATH)
|
329
|
+
check_table_exists
|
330
|
+
check_cols
|
331
|
+
|
332
|
+
# load the data there
|
333
|
+
@dwh.load_data_from_csv(@random_table_name, [CSV_PATH, CSV_PATH2])
|
334
|
+
|
335
|
+
check_row_count
|
336
|
+
end
|
337
|
+
|
235
338
|
it 'fails for a wrong csv' do
|
236
339
|
# create the table
|
237
340
|
@dwh.create_table_from_csv_header(@random_table_name, WRONG_CSV_PATH)
|
data/spec/spec_helper.rb
CHANGED
@@ -1,15 +1,8 @@
|
|
1
|
-
require 'gooddata_datawarehouse'
|
2
1
|
require 'coveralls'
|
3
|
-
|
4
|
-
Coveralls.wear_merged!
|
2
|
+
Coveralls.wear!
|
5
3
|
|
6
4
|
RSpec.configure do |c|
|
7
5
|
c.filter_run :focus => true
|
8
6
|
c.run_all_when_everything_filtered = true
|
9
7
|
end
|
10
8
|
|
11
|
-
class SpecHelper
|
12
|
-
def self.create_default_connection
|
13
|
-
GoodData::Datawarehouse.new(ENV['USERNAME'], ENV['PASSWORD'], ENV['INSTANCE_ID'])
|
14
|
-
end
|
15
|
-
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gooddata_datawarehouse
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Petr Cvengros
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-02-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -114,6 +114,20 @@ dependencies:
|
|
114
114
|
- - ~>
|
115
115
|
- !ruby/object:Gem::Version
|
116
116
|
version: '0.1'
|
117
|
+
- !ruby/object:Gem::Dependency
|
118
|
+
requirement: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - ~>
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '1.0'
|
123
|
+
name: pmap
|
124
|
+
prerelease: false
|
125
|
+
type: :runtime
|
126
|
+
version_requirements: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ~>
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '1.0'
|
117
131
|
description: ''
|
118
132
|
email:
|
119
133
|
- petr.cvengros@gooddata.com
|
@@ -121,7 +135,9 @@ executables: []
|
|
121
135
|
extensions: []
|
122
136
|
extra_rdoc_files: []
|
123
137
|
files:
|
138
|
+
- .coveralls.yml
|
124
139
|
- .gitignore
|
140
|
+
- .rspec
|
125
141
|
- .travis.yml
|
126
142
|
- Gemfile
|
127
143
|
- LICENSE.txt
|
@@ -133,8 +149,9 @@ files:
|
|
133
149
|
- lib/gooddata_datawarehouse/datawarehouse.rb
|
134
150
|
- lib/gooddata_datawarehouse/sql_generator.rb
|
135
151
|
- lib/gooddata_datawarehouse/version.rb
|
136
|
-
- spec/data/bike-wrong.csv
|
137
152
|
- spec/data/bike.csv
|
153
|
+
- spec/data/bike2.csv
|
154
|
+
- spec/data/wrong-bike.csv
|
138
155
|
- spec/datawarehouse_spec.rb
|
139
156
|
- spec/spec_helper.rb
|
140
157
|
homepage: ''
|
@@ -162,7 +179,8 @@ signing_key:
|
|
162
179
|
specification_version: 4
|
163
180
|
summary: Convenient work with GoodData's Datawarehouse (ADS)
|
164
181
|
test_files:
|
165
|
-
- spec/data/bike-wrong.csv
|
166
182
|
- spec/data/bike.csv
|
183
|
+
- spec/data/bike2.csv
|
184
|
+
- spec/data/wrong-bike.csv
|
167
185
|
- spec/datawarehouse_spec.rb
|
168
186
|
- spec/spec_helper.rb
|