gooddata_datawarehouse 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.coveralls.yml +1 -0
- data/.rspec +3 -0
- data/README.md +15 -1
- data/gooddata_datawarehouse.gemspec +1 -0
- data/lib/gooddata_datawarehouse/datawarehouse.rb +69 -23
- data/lib/gooddata_datawarehouse/sql_generator.rb +4 -0
- data/lib/gooddata_datawarehouse/version.rb +1 -1
- data/spec/data/bike2.csv +4 -0
- data/spec/data/{bike-wrong.csv → wrong-bike.csv} +0 -0
- data/spec/datawarehouse_spec.rb +110 -7
- data/spec/spec_helper.rb +1 -8
- metadata +22 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9a6a8637a21756e98d892ca84f9abbf1d4cb912e
|
4
|
+
data.tar.gz: a3853b73306ee86d97471d8bec24efa0482c1b26
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c5b61278e1b619ad6a31ffe9fd947b734aadc61a27090479f51870608385ec07c9cd07a7d35a68b7bbc25b0d60725d0d83e8022e1135ff5e33f773a44062f934
|
7
|
+
data.tar.gz: 3976202ed90034b82109b9ca0e82d1c1fbd6074b7e8aa770ed447a9f1612113205ff2ecc3d2bc55f0c18ca5c6a72be7723237e916bce00c10aa1112e9b4c9429
|
data/.coveralls.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
service_name: travis-ci
|
data/.rspec
ADDED
data/README.md
CHANGED
@@ -44,6 +44,9 @@ require 'gooddata_datawarehouse'
|
|
44
44
|
|
45
45
|
# connect
|
46
46
|
dwh = GoodData::Datawarehouse.new('you@gooddata.com', 'yourpass', 'your ADS instance id')
|
47
|
+
# instance id is the identifier of your datawarehouse (ADS).
|
48
|
+
# E.g. for datawarehouse https://secure.gooddata.com/gdc/datawarehouse/instances/d4979ac54df8afb7b5192b0086de6270
|
49
|
+
# the instance id is d4979ac54df8afb7b5192b0086de6270
|
47
50
|
|
48
51
|
# import a csv
|
49
52
|
dwh.csv_to_new_table('my_table', 'path/to/my.csv')
|
@@ -51,7 +54,7 @@ dwh.csv_to_new_table('my_table', 'path/to/my.csv')
|
|
51
54
|
dwh.table_exists?('my_table') # true
|
52
55
|
dwh.get_columns('my_table') # [{column_name: 'col1', data_type: 'varchar(88)'}, {column_name: 'col2', data_type: 'int'}]
|
53
56
|
|
54
|
-
# run an
|
57
|
+
# run an arbitrary sql
|
55
58
|
dwh.execute('ALTER TABLE my_table ADD COLUMN col3 INTEGER')
|
56
59
|
|
57
60
|
# run a select and process results
|
@@ -69,6 +72,17 @@ dwh.export_table('my_new_table', 'path/to/my_new.csv')
|
|
69
72
|
dwh.drop_table('my_new_table')
|
70
73
|
```
|
71
74
|
|
75
|
+
## Troubleshooting
|
76
|
+
### Wrong driver version
|
77
|
+
If you get an error talking about handshake error and wrong DSS driver version, update your `gooddata-dss-jdbc` gem by running
|
78
|
+
|
79
|
+
$ bundle update
|
80
|
+
|
81
|
+
or
|
82
|
+
|
83
|
+
$ gem update gooddata-dss-jdbc
|
84
|
+
|
85
|
+
You should always have the latest version of this gem.
|
72
86
|
|
73
87
|
## Contributing
|
74
88
|
|
@@ -3,11 +3,13 @@ require 'sequel'
|
|
3
3
|
require 'logger'
|
4
4
|
require 'csv'
|
5
5
|
require 'tempfile'
|
6
|
+
require 'pmap'
|
6
7
|
|
7
8
|
require_relative 'sql_generator'
|
8
9
|
|
9
10
|
module GoodData
|
10
11
|
class Datawarehouse
|
12
|
+
PARALEL_COPY_THREAD_COUNT = 5
|
11
13
|
def initialize(username, password, instance_id, opts={})
|
12
14
|
@logger = Logger.new(STDOUT)
|
13
15
|
@username = username
|
@@ -50,37 +52,57 @@ module GoodData
|
|
50
52
|
execute(GoodData::SQLGenerator.drop_table(table_name,opts))
|
51
53
|
end
|
52
54
|
|
53
|
-
def csv_to_new_table(table_name,
|
54
|
-
|
55
|
-
|
55
|
+
def csv_to_new_table(table_name, csvs, opts={})
|
56
|
+
csv_list = list_files(csvs)
|
57
|
+
cols = create_table_from_csv_header(table_name, csv_list[0], opts)
|
58
|
+
load_data_from_csv(table_name, csv_list, opts.merge(columns: cols))
|
56
59
|
end
|
57
60
|
|
58
|
-
def load_data_from_csv(table_name,
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
61
|
+
def load_data_from_csv(table_name, csvs, opts={})
|
62
|
+
# get the list of files to load and columns in the csv
|
63
|
+
csv_list = list_files(csvs)
|
64
|
+
columns = opts[:columns] || get_csv_headers(csv_list[0])
|
65
|
+
|
66
|
+
# load each csv from the list
|
67
|
+
single_file = (csv_list.size == 1)
|
68
|
+
|
69
|
+
csv_list.peach(PARALEL_COPY_THREAD_COUNT) do |csv_path|
|
70
|
+
if opts[:ignore_parse_errors] && opts[:exceptions_file].nil? && opts[:rejections_file].nil?
|
71
|
+
exc = nil
|
72
|
+
rej = nil
|
73
|
+
opts_file = opts
|
74
|
+
else
|
75
|
+
opts_file = opts.clone
|
76
|
+
# priradit do opts i do exc -
|
77
|
+
# temporary files to get the excepted records (if not given)
|
78
|
+
exc = opts_file[:exceptions_file] = init_file(opts_file[:exceptions_file], 'exceptions', csv_path, single_file)
|
79
|
+
rej = opts_file[:rejections_file] = init_file(opts_file[:rejections_file], 'rejections', csv_path, single_file)
|
80
|
+
end
|
71
81
|
|
72
|
-
|
73
|
-
|
82
|
+
# execute the load
|
83
|
+
execute(GoodData::SQLGenerator.load_data(table_name, csv_path, columns, opts_file))
|
74
84
|
|
75
|
-
|
76
|
-
|
85
|
+
exc.close if exc
|
86
|
+
rej.close if rej
|
77
87
|
|
78
|
-
|
79
|
-
|
80
|
-
|
88
|
+
# if there was something rejected and it shouldn't be ignored, raise an error
|
89
|
+
if ((exc && File.size?(exc)) || (rej && File.size?(rej))) && (! opts[:ignore_parse_errors])
|
90
|
+
fail ArgumentError, "Some lines in the CSV didn't go through. Exceptions: #{IO.read(exc)}\nRejected records: #{IO.read(rej)}"
|
91
|
+
end
|
81
92
|
end
|
82
93
|
end
|
83
94
|
|
95
|
+
def init_file(given_filename, key, csv_path, single_file)
|
96
|
+
# only use file postfix if there are multiple files
|
97
|
+
postfix = single_file ? '' : "-#{File.basename(csv_path)}"
|
98
|
+
|
99
|
+
# take what we have and put the source csv name at the end
|
100
|
+
given_filename = given_filename.path if given_filename.is_a?(File)
|
101
|
+
f = "#{given_filename || Tempfile.new(key).path}#{postfix}"
|
102
|
+
f = File.new(f, 'w') unless f.is_a?(File)
|
103
|
+
f
|
104
|
+
end
|
105
|
+
|
84
106
|
# returns a list of columns created
|
85
107
|
# does nothing if file empty, returns []
|
86
108
|
def create_table_from_csv_header(table_name, csv_path, opts={})
|
@@ -99,6 +121,10 @@ module GoodData
|
|
99
121
|
count > 0
|
100
122
|
end
|
101
123
|
|
124
|
+
def table_row_count(table_name)
|
125
|
+
execute_select(GoodData::SQLGenerator.get_row_count(table_name), :count => true)
|
126
|
+
end
|
127
|
+
|
102
128
|
def get_columns(table_name)
|
103
129
|
res = execute_select(GoodData::SQLGenerator.get_columns(table_name))
|
104
130
|
end
|
@@ -158,6 +184,26 @@ module GoodData
|
|
158
184
|
|
159
185
|
private
|
160
186
|
|
187
|
+
# returns an array of file paths (strings)
|
188
|
+
def list_files(csvs)
|
189
|
+
# csvs can be:
|
190
|
+
case csvs
|
191
|
+
when String
|
192
|
+
|
193
|
+
# directory
|
194
|
+
if File.directory?(csvs)
|
195
|
+
return (Dir.entries(csvs) - ['.', '..']).map{|f| File.join(csvs, f)}
|
196
|
+
end
|
197
|
+
|
198
|
+
# filename or pattern
|
199
|
+
return Dir.glob(csvs)
|
200
|
+
|
201
|
+
# array
|
202
|
+
when Array
|
203
|
+
return csvs
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
161
207
|
def get_csv_headers(csv_path)
|
162
208
|
header_str = File.open(csv_path, &:gets)
|
163
209
|
if header_str.nil? || header_str.empty?
|
@@ -42,6 +42,10 @@ module GoodData
|
|
42
42
|
"SELECT COUNT(*) FROM tables WHERE table_name = '#{table_name}'"
|
43
43
|
end
|
44
44
|
|
45
|
+
def get_row_count(table_name)
|
46
|
+
"SELECT COUNT(*) FROM #{table_name}"
|
47
|
+
end
|
48
|
+
|
45
49
|
def get_columns(table_name)
|
46
50
|
"SELECT column_name, data_type FROM columns WHERE table_name = '#{table_name}'"
|
47
51
|
end
|
data/spec/data/bike2.csv
ADDED
File without changes
|
data/spec/datawarehouse_spec.rb
CHANGED
@@ -1,13 +1,25 @@
|
|
1
1
|
require 'tempfile'
|
2
2
|
require 'gooddata_datawarehouse/datawarehouse'
|
3
|
-
require_relative 'spec_helper'
|
4
3
|
|
5
4
|
CSV_PATH = 'spec/data/bike.csv'
|
6
|
-
|
5
|
+
CSV_PATH2 = 'spec/data/bike2.csv'
|
6
|
+
WRONG_CSV_PATH = 'spec/data/wrong-bike.csv'
|
7
|
+
CSV_REGEXP = 'spec/data/bike*.csv'
|
8
|
+
|
9
|
+
class Helper
|
10
|
+
def self.create_default_connection
|
11
|
+
GoodData::Datawarehouse.new(ENV['USERNAME'], ENV['PASSWORD'], ENV['INSTANCE_ID'])
|
12
|
+
end
|
13
|
+
def self.line_count(f)
|
14
|
+
i = 0
|
15
|
+
CSV.foreach(f, :headers => true) {|_| i += 1}
|
16
|
+
i
|
17
|
+
end
|
18
|
+
end
|
7
19
|
|
8
20
|
describe GoodData::Datawarehouse do
|
9
21
|
before(:each) do
|
10
|
-
@dwh =
|
22
|
+
@dwh = Helper::create_default_connection
|
11
23
|
@random = rand(10000000).to_s
|
12
24
|
@random_table_name = "temp_#{@random}"
|
13
25
|
@created_tables = nil
|
@@ -84,6 +96,22 @@ describe GoodData::Datawarehouse do
|
|
84
96
|
end
|
85
97
|
end
|
86
98
|
|
99
|
+
def check_cols
|
100
|
+
# cols are the same as in the csv
|
101
|
+
expected_cols = File.open(CSV_PATH, &:gets).strip.split(',')
|
102
|
+
expect(Set.new(@dwh.get_columns(@random_table_name))).to eq Set.new(expected_cols.map {|c| {:column_name => c, :data_type => GoodData::SQLGenerator::DEFAULT_TYPE}})
|
103
|
+
end
|
104
|
+
|
105
|
+
def check_table_exists
|
106
|
+
# table exists
|
107
|
+
expect(@dwh.table_exists?(@random_table_name)).to eq true
|
108
|
+
end
|
109
|
+
|
110
|
+
def check_row_count
|
111
|
+
# there are lines from both of the csvs
|
112
|
+
expect(@dwh.table_row_count(@random_table_name)).to eq Helper.line_count(CSV_PATH) + Helper.line_count(CSV_PATH2)
|
113
|
+
end
|
114
|
+
|
87
115
|
describe '#rename_table' do
|
88
116
|
it 'renames a table' do
|
89
117
|
cols = ['col1', 'col2', 'col3']
|
@@ -109,8 +137,38 @@ describe GoodData::Datawarehouse do
|
|
109
137
|
expect(@dwh.table_exists?(@random_table_name)).to eq true
|
110
138
|
|
111
139
|
# cols are the same as in the csv
|
112
|
-
|
113
|
-
|
140
|
+
check_cols
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
it "loads all files in a directory" do
|
145
|
+
# make a tempdir and copy the csvs there
|
146
|
+
Dir.mktmpdir('foo') do |dir|
|
147
|
+
FileUtils.cp(CSV_PATH, dir)
|
148
|
+
FileUtils.cp(CSV_PATH2, dir)
|
149
|
+
|
150
|
+
@dwh.csv_to_new_table(@random_table_name, dir)
|
151
|
+
end
|
152
|
+
|
153
|
+
check_table_exists
|
154
|
+
check_cols
|
155
|
+
check_row_count
|
156
|
+
end
|
157
|
+
|
158
|
+
it "loads all files given in a list" do
|
159
|
+
@dwh.csv_to_new_table(@random_table_name, [CSV_PATH, CSV_PATH2])
|
160
|
+
|
161
|
+
check_table_exists
|
162
|
+
check_cols
|
163
|
+
check_row_count
|
164
|
+
end
|
165
|
+
|
166
|
+
it "loads all files given by a regexp" do
|
167
|
+
@dwh.csv_to_new_table(@random_table_name, CSV_REGEXP)
|
168
|
+
|
169
|
+
check_table_exists
|
170
|
+
check_cols
|
171
|
+
check_row_count
|
114
172
|
end
|
115
173
|
|
116
174
|
it 'writes exceptions and rejections to files at given path, passed strings' do
|
@@ -123,6 +181,25 @@ describe GoodData::Datawarehouse do
|
|
123
181
|
expect(File.size(exc)).to eq 0
|
124
182
|
end
|
125
183
|
|
184
|
+
it 'overwrites the rejections and exceptions' do
|
185
|
+
rej = Tempfile.new('rejections.csv')
|
186
|
+
exc = Tempfile.new('exceptions.csv')
|
187
|
+
|
188
|
+
@dwh.csv_to_new_table(@random_table_name, WRONG_CSV_PATH, :exceptions_file => exc.path, :rejections_file => rej.path, :ignore_parse_errors => true)
|
189
|
+
|
190
|
+
rej_size = File.size(rej)
|
191
|
+
exc_size = File.size(exc)
|
192
|
+
|
193
|
+
expect(rej_size).to be > 0
|
194
|
+
expect(exc_size).to be > 0
|
195
|
+
|
196
|
+
# load it again and see if it was overwritten - has the same size
|
197
|
+
@dwh.load_data_from_csv(@random_table_name, WRONG_CSV_PATH, :exceptions_file => exc.path, :rejections_file => rej.path, :ignore_parse_errors => true)
|
198
|
+
|
199
|
+
expect(File.size(rej)).to eq rej_size
|
200
|
+
expect(File.size(exc)).to be exc_size
|
201
|
+
end
|
202
|
+
|
126
203
|
it 'writes exceptions and rejections to files at given path, passed files' do
|
127
204
|
rej = Tempfile.new('rejections.csv')
|
128
205
|
exc = Tempfile.new('exceptions.csv')
|
@@ -162,7 +239,7 @@ describe GoodData::Datawarehouse do
|
|
162
239
|
expect(File.size(exc)).to be > 0
|
163
240
|
end
|
164
241
|
|
165
|
-
it "
|
242
|
+
it "loads fine when ignoring errors and not passing files" do
|
166
243
|
@dwh.csv_to_new_table(@random_table_name, CSV_PATH, :ignore_parse_errors => true)
|
167
244
|
|
168
245
|
# table exists
|
@@ -173,7 +250,7 @@ describe GoodData::Datawarehouse do
|
|
173
250
|
expect(Set.new(@dwh.get_columns(@random_table_name))).to eq Set.new(expected_cols.map {|c| {:column_name => c, :data_type => GoodData::SQLGenerator::DEFAULT_TYPE}})
|
174
251
|
end
|
175
252
|
|
176
|
-
it "works with non-existing files" do
|
253
|
+
it "works with non-existing rejection/exception files" do
|
177
254
|
t = Tempfile.new('haha')
|
178
255
|
d = File.dirname(t)
|
179
256
|
|
@@ -188,6 +265,20 @@ describe GoodData::Datawarehouse do
|
|
188
265
|
expect(File.size(rej)).to be > 0
|
189
266
|
expect(File.size(exc)).to be > 0
|
190
267
|
end
|
268
|
+
|
269
|
+
it "fails if one of the files is wrong" do
|
270
|
+
expect{@dwh.csv_to_new_table(@random_table_name, [CSV_PATH, WRONG_CSV_PATH])}.to raise_error(ArgumentError)
|
271
|
+
end
|
272
|
+
|
273
|
+
it "creates exceptions / rejections for each file when wanted" do
|
274
|
+
rej = Tempfile.new('rejections.csv')
|
275
|
+
exc = Tempfile.new('exceptions.csv')
|
276
|
+
|
277
|
+
@dwh.csv_to_new_table(@random_table_name, [CSV_PATH, WRONG_CSV_PATH], :exceptions_file => exc.path, :rejections_file => rej.path, :ignore_parse_errors => true)
|
278
|
+
|
279
|
+
expect(File.size("#{rej.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
|
280
|
+
expect(File.size("#{exc.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
|
281
|
+
end
|
191
282
|
end
|
192
283
|
|
193
284
|
describe '#export_table' do
|
@@ -232,6 +323,18 @@ describe GoodData::Datawarehouse do
|
|
232
323
|
expect(exported).to eq imported
|
233
324
|
end
|
234
325
|
|
326
|
+
it "can load multiple files" do
|
327
|
+
# create the table
|
328
|
+
@dwh.create_table_from_csv_header(@random_table_name, CSV_PATH)
|
329
|
+
check_table_exists
|
330
|
+
check_cols
|
331
|
+
|
332
|
+
# load the data there
|
333
|
+
@dwh.load_data_from_csv(@random_table_name, [CSV_PATH, CSV_PATH2])
|
334
|
+
|
335
|
+
check_row_count
|
336
|
+
end
|
337
|
+
|
235
338
|
it 'fails for a wrong csv' do
|
236
339
|
# create the table
|
237
340
|
@dwh.create_table_from_csv_header(@random_table_name, WRONG_CSV_PATH)
|
data/spec/spec_helper.rb
CHANGED
@@ -1,15 +1,8 @@
|
|
1
|
-
require 'gooddata_datawarehouse'
|
2
1
|
require 'coveralls'
|
3
|
-
|
4
|
-
Coveralls.wear_merged!
|
2
|
+
Coveralls.wear!
|
5
3
|
|
6
4
|
RSpec.configure do |c|
|
7
5
|
c.filter_run :focus => true
|
8
6
|
c.run_all_when_everything_filtered = true
|
9
7
|
end
|
10
8
|
|
11
|
-
class SpecHelper
|
12
|
-
def self.create_default_connection
|
13
|
-
GoodData::Datawarehouse.new(ENV['USERNAME'], ENV['PASSWORD'], ENV['INSTANCE_ID'])
|
14
|
-
end
|
15
|
-
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gooddata_datawarehouse
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Petr Cvengros
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-02-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -114,6 +114,20 @@ dependencies:
|
|
114
114
|
- - ~>
|
115
115
|
- !ruby/object:Gem::Version
|
116
116
|
version: '0.1'
|
117
|
+
- !ruby/object:Gem::Dependency
|
118
|
+
requirement: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - ~>
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '1.0'
|
123
|
+
name: pmap
|
124
|
+
prerelease: false
|
125
|
+
type: :runtime
|
126
|
+
version_requirements: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ~>
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '1.0'
|
117
131
|
description: ''
|
118
132
|
email:
|
119
133
|
- petr.cvengros@gooddata.com
|
@@ -121,7 +135,9 @@ executables: []
|
|
121
135
|
extensions: []
|
122
136
|
extra_rdoc_files: []
|
123
137
|
files:
|
138
|
+
- .coveralls.yml
|
124
139
|
- .gitignore
|
140
|
+
- .rspec
|
125
141
|
- .travis.yml
|
126
142
|
- Gemfile
|
127
143
|
- LICENSE.txt
|
@@ -133,8 +149,9 @@ files:
|
|
133
149
|
- lib/gooddata_datawarehouse/datawarehouse.rb
|
134
150
|
- lib/gooddata_datawarehouse/sql_generator.rb
|
135
151
|
- lib/gooddata_datawarehouse/version.rb
|
136
|
-
- spec/data/bike-wrong.csv
|
137
152
|
- spec/data/bike.csv
|
153
|
+
- spec/data/bike2.csv
|
154
|
+
- spec/data/wrong-bike.csv
|
138
155
|
- spec/datawarehouse_spec.rb
|
139
156
|
- spec/spec_helper.rb
|
140
157
|
homepage: ''
|
@@ -162,7 +179,8 @@ signing_key:
|
|
162
179
|
specification_version: 4
|
163
180
|
summary: Convenient work with GoodData's Datawarehouse (ADS)
|
164
181
|
test_files:
|
165
|
-
- spec/data/bike-wrong.csv
|
166
182
|
- spec/data/bike.csv
|
183
|
+
- spec/data/bike2.csv
|
184
|
+
- spec/data/wrong-bike.csv
|
167
185
|
- spec/datawarehouse_spec.rb
|
168
186
|
- spec/spec_helper.rb
|