gooddata_datawarehouse 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7f98f560684ad94b0c65263eb0b8bb39748d3fe8
4
- data.tar.gz: 5626327514feed7e85f5541871100b3e28373c0d
3
+ metadata.gz: 9a6a8637a21756e98d892ca84f9abbf1d4cb912e
4
+ data.tar.gz: a3853b73306ee86d97471d8bec24efa0482c1b26
5
5
  SHA512:
6
- metadata.gz: 52e171a5c74e80cc056020263d98fe3ca25a28952d7a3ac338356de34003944279ca427370002589d9fb2ad21bdbe32679732f28b19ca2e1c66482e6996fd175
7
- data.tar.gz: f285e427b6f9216cd578a9fa9f8ebf32d4a2769a62ed4bb1b03c4ea452009d36b91320463519524e9d97a1aece663155b4f45e731bd84af5f29c4893b41d1f0a
6
+ metadata.gz: c5b61278e1b619ad6a31ffe9fd947b734aadc61a27090479f51870608385ec07c9cd07a7d35a68b7bbc25b0d60725d0d83e8022e1135ff5e33f773a44062f934
7
+ data.tar.gz: 3976202ed90034b82109b9ca0e82d1c1fbd6074b7e8aa770ed447a9f1612113205ff2ecc3d2bc55f0c18ca5c6a72be7723237e916bce00c10aa1112e9b4c9429
data/.coveralls.yml ADDED
@@ -0,0 +1 @@
1
+ service_name: travis-ci
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format documentation
3
+ --require spec_helper
data/README.md CHANGED
@@ -44,6 +44,9 @@ require 'gooddata_datawarehouse'
44
44
 
45
45
  # connect
46
46
  dwh = GoodData::Datawarehouse.new('you@gooddata.com', 'yourpass', 'your ADS instance id')
47
+ # instance id is the identifier of your datawarehouse (ADS).
48
+ # E.g. for datawarehouse https://secure.gooddata.com/gdc/datawarehouse/instances/d4979ac54df8afb7b5192b0086de6270
49
+ # the instance id is d4979ac54df8afb7b5192b0086de6270
47
50
 
48
51
  # import a csv
49
52
  dwh.csv_to_new_table('my_table', 'path/to/my.csv')
@@ -51,7 +54,7 @@ dwh.csv_to_new_table('my_table', 'path/to/my.csv')
51
54
  dwh.table_exists?('my_table') # true
52
55
  dwh.get_columns('my_table') # [{column_name: 'col1', data_type: 'varchar(88)'}, {column_name: 'col2', data_type: 'int'}]
53
56
 
54
- # run an aribrary sql
57
+ # run an arbitrary sql
55
58
  dwh.execute('ALTER TABLE my_table ADD COLUMN col3 INTEGER')
56
59
 
57
60
  # run a select and process results
@@ -69,6 +72,17 @@ dwh.export_table('my_new_table', 'path/to/my_new.csv')
69
72
  dwh.drop_table('my_new_table')
70
73
  ```
71
74
 
75
+ ## Troubleshooting
76
+ ### Wrong driver version
77
+ If you get an error talking about handshake error and wrong DSS driver version, update your `gooddata-dss-jdbc` gem by running
78
+
79
+ $ bundle update
80
+
81
+ or
82
+
83
+ $ gem update gooddata-dss-jdbc
84
+
85
+ You should always have the latest version of this gem.
72
86
 
73
87
  ## Contributing
74
88
 
@@ -30,4 +30,5 @@ Gem::Specification.new do |spec|
30
30
 
31
31
  spec.add_dependency "sequel", "~> 4.17"
32
32
  spec.add_dependency "gooddata-dss-jdbc", "~> 0.1"
33
+ spec.add_dependency "pmap", "~> 1.0"
33
34
  end
@@ -3,11 +3,13 @@ require 'sequel'
3
3
  require 'logger'
4
4
  require 'csv'
5
5
  require 'tempfile'
6
+ require 'pmap'
6
7
 
7
8
  require_relative 'sql_generator'
8
9
 
9
10
  module GoodData
10
11
  class Datawarehouse
12
+ PARALEL_COPY_THREAD_COUNT = 5
11
13
  def initialize(username, password, instance_id, opts={})
12
14
  @logger = Logger.new(STDOUT)
13
15
  @username = username
@@ -50,37 +52,57 @@ module GoodData
50
52
  execute(GoodData::SQLGenerator.drop_table(table_name,opts))
51
53
  end
52
54
 
53
- def csv_to_new_table(table_name, csv_path, opts={})
54
- cols = create_table_from_csv_header(table_name, csv_path, opts)
55
- load_data_from_csv(table_name, csv_path, opts.merge(columns: cols))
55
+ def csv_to_new_table(table_name, csvs, opts={})
56
+ csv_list = list_files(csvs)
57
+ cols = create_table_from_csv_header(table_name, csv_list[0], opts)
58
+ load_data_from_csv(table_name, csv_list, opts.merge(columns: cols))
56
59
  end
57
60
 
58
- def load_data_from_csv(table_name, csv_path, opts={})
59
- columns = opts[:columns] || get_csv_headers(csv_path)
60
-
61
- if opts[:ignore_parse_errors] && opts[:exceptions_file].nil? && opts[:rejections_file].nil?
62
- exc = nil
63
- rej = nil
64
- else
65
- # temporary files to get the excepted records (if not given)
66
- exc = opts[:exceptions_file] ||= Tempfile.new('exceptions')
67
- rej = opts[:rejections_file] ||= Tempfile.new('rejections')
68
- exc = File.new(exc, 'w') unless exc.is_a?(File)
69
- rej = File.new(rej, 'w') unless rej.is_a?(File)
70
- end
61
+ def load_data_from_csv(table_name, csvs, opts={})
62
+ # get the list of files to load and columns in the csv
63
+ csv_list = list_files(csvs)
64
+ columns = opts[:columns] || get_csv_headers(csv_list[0])
65
+
66
+ # load each csv from the list
67
+ single_file = (csv_list.size == 1)
68
+
69
+ csv_list.peach(PARALEL_COPY_THREAD_COUNT) do |csv_path|
70
+ if opts[:ignore_parse_errors] && opts[:exceptions_file].nil? && opts[:rejections_file].nil?
71
+ exc = nil
72
+ rej = nil
73
+ opts_file = opts
74
+ else
75
+ opts_file = opts.clone
76
+ # priradit do opts i do exc -
77
+ # temporary files to get the excepted records (if not given)
78
+ exc = opts_file[:exceptions_file] = init_file(opts_file[:exceptions_file], 'exceptions', csv_path, single_file)
79
+ rej = opts_file[:rejections_file] = init_file(opts_file[:rejections_file], 'rejections', csv_path, single_file)
80
+ end
71
81
 
72
- # execute the load
73
- execute(GoodData::SQLGenerator.load_data(table_name, csv_path, columns, opts))
82
+ # execute the load
83
+ execute(GoodData::SQLGenerator.load_data(table_name, csv_path, columns, opts_file))
74
84
 
75
- exc.close if exc
76
- rej.close if rej
85
+ exc.close if exc
86
+ rej.close if rej
77
87
 
78
- # if there was something rejected and it shouldn't be ignored, raise an error
79
- if ((exc && File.size?(exc)) || (rej && File.size?(rej))) && (! opts[:ignore_parse_errors])
80
- fail ArgumentError, "Some lines in the CSV didn't go through. Exceptions: #{IO.read(exc)}\nRejected records: #{IO.read(rej)}"
88
+ # if there was something rejected and it shouldn't be ignored, raise an error
89
+ if ((exc && File.size?(exc)) || (rej && File.size?(rej))) && (! opts[:ignore_parse_errors])
90
+ fail ArgumentError, "Some lines in the CSV didn't go through. Exceptions: #{IO.read(exc)}\nRejected records: #{IO.read(rej)}"
91
+ end
81
92
  end
82
93
  end
83
94
 
95
+ def init_file(given_filename, key, csv_path, single_file)
96
+ # only use file postfix if there are multiple files
97
+ postfix = single_file ? '' : "-#{File.basename(csv_path)}"
98
+
99
+ # take what we have and put the source csv name at the end
100
+ given_filename = given_filename.path if given_filename.is_a?(File)
101
+ f = "#{given_filename || Tempfile.new(key).path}#{postfix}"
102
+ f = File.new(f, 'w') unless f.is_a?(File)
103
+ f
104
+ end
105
+
84
106
  # returns a list of columns created
85
107
  # does nothing if file empty, returns []
86
108
  def create_table_from_csv_header(table_name, csv_path, opts={})
@@ -99,6 +121,10 @@ module GoodData
99
121
  count > 0
100
122
  end
101
123
 
124
+ def table_row_count(table_name)
125
+ execute_select(GoodData::SQLGenerator.get_row_count(table_name), :count => true)
126
+ end
127
+
102
128
  def get_columns(table_name)
103
129
  res = execute_select(GoodData::SQLGenerator.get_columns(table_name))
104
130
  end
@@ -158,6 +184,26 @@ module GoodData
158
184
 
159
185
  private
160
186
 
187
+ # returns an array of file paths (strings)
188
+ def list_files(csvs)
189
+ # csvs can be:
190
+ case csvs
191
+ when String
192
+
193
+ # directory
194
+ if File.directory?(csvs)
195
+ return (Dir.entries(csvs) - ['.', '..']).map{|f| File.join(csvs, f)}
196
+ end
197
+
198
+ # filename or pattern
199
+ return Dir.glob(csvs)
200
+
201
+ # array
202
+ when Array
203
+ return csvs
204
+ end
205
+ end
206
+
161
207
  def get_csv_headers(csv_path)
162
208
  header_str = File.open(csv_path, &:gets)
163
209
  if header_str.nil? || header_str.empty?
@@ -42,6 +42,10 @@ module GoodData
42
42
  "SELECT COUNT(*) FROM tables WHERE table_name = '#{table_name}'"
43
43
  end
44
44
 
45
+ def get_row_count(table_name)
46
+ "SELECT COUNT(*) FROM #{table_name}"
47
+ end
48
+
45
49
  def get_columns(table_name)
46
50
  "SELECT column_name, data_type FROM columns WHERE table_name = '#{table_name}'"
47
51
  end
@@ -1,5 +1,5 @@
1
1
  module GoodData
2
2
  class Datawarehouse
3
- VERSION = "0.0.5"
3
+ VERSION = "0.0.6"
4
4
  end
5
5
  end
@@ -0,0 +1,4 @@
1
+ wheel_size,manufacturer
2
+ "29","Ibis"
3
+ "27.5","Seven"
4
+ "29","Moots"
File without changes
@@ -1,13 +1,25 @@
1
1
  require 'tempfile'
2
2
  require 'gooddata_datawarehouse/datawarehouse'
3
- require_relative 'spec_helper'
4
3
 
5
4
  CSV_PATH = 'spec/data/bike.csv'
6
- WRONG_CSV_PATH = 'spec/data/bike-wrong.csv'
5
+ CSV_PATH2 = 'spec/data/bike2.csv'
6
+ WRONG_CSV_PATH = 'spec/data/wrong-bike.csv'
7
+ CSV_REGEXP = 'spec/data/bike*.csv'
8
+
9
+ class Helper
10
+ def self.create_default_connection
11
+ GoodData::Datawarehouse.new(ENV['USERNAME'], ENV['PASSWORD'], ENV['INSTANCE_ID'])
12
+ end
13
+ def self.line_count(f)
14
+ i = 0
15
+ CSV.foreach(f, :headers => true) {|_| i += 1}
16
+ i
17
+ end
18
+ end
7
19
 
8
20
  describe GoodData::Datawarehouse do
9
21
  before(:each) do
10
- @dwh = SpecHelper::create_default_connection
22
+ @dwh = Helper::create_default_connection
11
23
  @random = rand(10000000).to_s
12
24
  @random_table_name = "temp_#{@random}"
13
25
  @created_tables = nil
@@ -84,6 +96,22 @@ describe GoodData::Datawarehouse do
84
96
  end
85
97
  end
86
98
 
99
+ def check_cols
100
+ # cols are the same as in the csv
101
+ expected_cols = File.open(CSV_PATH, &:gets).strip.split(',')
102
+ expect(Set.new(@dwh.get_columns(@random_table_name))).to eq Set.new(expected_cols.map {|c| {:column_name => c, :data_type => GoodData::SQLGenerator::DEFAULT_TYPE}})
103
+ end
104
+
105
+ def check_table_exists
106
+ # table exists
107
+ expect(@dwh.table_exists?(@random_table_name)).to eq true
108
+ end
109
+
110
+ def check_row_count
111
+ # there are lines from both of the csvs
112
+ expect(@dwh.table_row_count(@random_table_name)).to eq Helper.line_count(CSV_PATH) + Helper.line_count(CSV_PATH2)
113
+ end
114
+
87
115
  describe '#rename_table' do
88
116
  it 'renames a table' do
89
117
  cols = ['col1', 'col2', 'col3']
@@ -109,8 +137,38 @@ describe GoodData::Datawarehouse do
109
137
  expect(@dwh.table_exists?(@random_table_name)).to eq true
110
138
 
111
139
  # cols are the same as in the csv
112
- expected_cols = File.open(CSV_PATH, &:gets).strip.split(',')
113
- expect(Set.new(@dwh.get_columns(@random_table_name))).to eq Set.new(expected_cols.map {|c| {:column_name => c, :data_type => GoodData::SQLGenerator::DEFAULT_TYPE}})
140
+ check_cols
141
+ end
142
+
143
+
144
+ it "loads all files in a directory" do
145
+ # make a tempdir and copy the csvs there
146
+ Dir.mktmpdir('foo') do |dir|
147
+ FileUtils.cp(CSV_PATH, dir)
148
+ FileUtils.cp(CSV_PATH2, dir)
149
+
150
+ @dwh.csv_to_new_table(@random_table_name, dir)
151
+ end
152
+
153
+ check_table_exists
154
+ check_cols
155
+ check_row_count
156
+ end
157
+
158
+ it "loads all files given in a list" do
159
+ @dwh.csv_to_new_table(@random_table_name, [CSV_PATH, CSV_PATH2])
160
+
161
+ check_table_exists
162
+ check_cols
163
+ check_row_count
164
+ end
165
+
166
+ it "loads all files given by a regexp" do
167
+ @dwh.csv_to_new_table(@random_table_name, CSV_REGEXP)
168
+
169
+ check_table_exists
170
+ check_cols
171
+ check_row_count
114
172
  end
115
173
 
116
174
  it 'writes exceptions and rejections to files at given path, passed strings' do
@@ -123,6 +181,25 @@ describe GoodData::Datawarehouse do
123
181
  expect(File.size(exc)).to eq 0
124
182
  end
125
183
 
184
+ it 'overwrites the rejections and exceptions' do
185
+ rej = Tempfile.new('rejections.csv')
186
+ exc = Tempfile.new('exceptions.csv')
187
+
188
+ @dwh.csv_to_new_table(@random_table_name, WRONG_CSV_PATH, :exceptions_file => exc.path, :rejections_file => rej.path, :ignore_parse_errors => true)
189
+
190
+ rej_size = File.size(rej)
191
+ exc_size = File.size(exc)
192
+
193
+ expect(rej_size).to be > 0
194
+ expect(exc_size).to be > 0
195
+
196
+ # load it again and see if it was overwritten - has the same size
197
+ @dwh.load_data_from_csv(@random_table_name, WRONG_CSV_PATH, :exceptions_file => exc.path, :rejections_file => rej.path, :ignore_parse_errors => true)
198
+
199
+ expect(File.size(rej)).to eq rej_size
200
+ expect(File.size(exc)).to be exc_size
201
+ end
202
+
126
203
  it 'writes exceptions and rejections to files at given path, passed files' do
127
204
  rej = Tempfile.new('rejections.csv')
128
205
  exc = Tempfile.new('exceptions.csv')
@@ -162,7 +239,7 @@ describe GoodData::Datawarehouse do
162
239
  expect(File.size(exc)).to be > 0
163
240
  end
164
241
 
165
- it "does something when ignoring errors and not passing files" do
242
+ it "loads fine when ignoring errors and not passing files" do
166
243
  @dwh.csv_to_new_table(@random_table_name, CSV_PATH, :ignore_parse_errors => true)
167
244
 
168
245
  # table exists
@@ -173,7 +250,7 @@ describe GoodData::Datawarehouse do
173
250
  expect(Set.new(@dwh.get_columns(@random_table_name))).to eq Set.new(expected_cols.map {|c| {:column_name => c, :data_type => GoodData::SQLGenerator::DEFAULT_TYPE}})
174
251
  end
175
252
 
176
- it "works with non-existing files" do
253
+ it "works with non-existing rejection/exception files" do
177
254
  t = Tempfile.new('haha')
178
255
  d = File.dirname(t)
179
256
 
@@ -188,6 +265,20 @@ describe GoodData::Datawarehouse do
188
265
  expect(File.size(rej)).to be > 0
189
266
  expect(File.size(exc)).to be > 0
190
267
  end
268
+
269
+ it "fails if one of the files is wrong" do
270
+ expect{@dwh.csv_to_new_table(@random_table_name, [CSV_PATH, WRONG_CSV_PATH])}.to raise_error(ArgumentError)
271
+ end
272
+
273
+ it "creates exceptions / rejections for each file when wanted" do
274
+ rej = Tempfile.new('rejections.csv')
275
+ exc = Tempfile.new('exceptions.csv')
276
+
277
+ @dwh.csv_to_new_table(@random_table_name, [CSV_PATH, WRONG_CSV_PATH], :exceptions_file => exc.path, :rejections_file => rej.path, :ignore_parse_errors => true)
278
+
279
+ expect(File.size("#{rej.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
280
+ expect(File.size("#{exc.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
281
+ end
191
282
  end
192
283
 
193
284
  describe '#export_table' do
@@ -232,6 +323,18 @@ describe GoodData::Datawarehouse do
232
323
  expect(exported).to eq imported
233
324
  end
234
325
 
326
+ it "can load multiple files" do
327
+ # create the table
328
+ @dwh.create_table_from_csv_header(@random_table_name, CSV_PATH)
329
+ check_table_exists
330
+ check_cols
331
+
332
+ # load the data there
333
+ @dwh.load_data_from_csv(@random_table_name, [CSV_PATH, CSV_PATH2])
334
+
335
+ check_row_count
336
+ end
337
+
235
338
  it 'fails for a wrong csv' do
236
339
  # create the table
237
340
  @dwh.create_table_from_csv_header(@random_table_name, WRONG_CSV_PATH)
data/spec/spec_helper.rb CHANGED
@@ -1,15 +1,8 @@
1
- require 'gooddata_datawarehouse'
2
1
  require 'coveralls'
3
-
4
- Coveralls.wear_merged!
2
+ Coveralls.wear!
5
3
 
6
4
  RSpec.configure do |c|
7
5
  c.filter_run :focus => true
8
6
  c.run_all_when_everything_filtered = true
9
7
  end
10
8
 
11
- class SpecHelper
12
- def self.create_default_connection
13
- GoodData::Datawarehouse.new(ENV['USERNAME'], ENV['PASSWORD'], ENV['INSTANCE_ID'])
14
- end
15
- end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gooddata_datawarehouse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Petr Cvengros
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-15 00:00:00.000000000 Z
11
+ date: 2015-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -114,6 +114,20 @@ dependencies:
114
114
  - - ~>
115
115
  - !ruby/object:Gem::Version
116
116
  version: '0.1'
117
+ - !ruby/object:Gem::Dependency
118
+ requirement: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ~>
121
+ - !ruby/object:Gem::Version
122
+ version: '1.0'
123
+ name: pmap
124
+ prerelease: false
125
+ type: :runtime
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ~>
129
+ - !ruby/object:Gem::Version
130
+ version: '1.0'
117
131
  description: ''
118
132
  email:
119
133
  - petr.cvengros@gooddata.com
@@ -121,7 +135,9 @@ executables: []
121
135
  extensions: []
122
136
  extra_rdoc_files: []
123
137
  files:
138
+ - .coveralls.yml
124
139
  - .gitignore
140
+ - .rspec
125
141
  - .travis.yml
126
142
  - Gemfile
127
143
  - LICENSE.txt
@@ -133,8 +149,9 @@ files:
133
149
  - lib/gooddata_datawarehouse/datawarehouse.rb
134
150
  - lib/gooddata_datawarehouse/sql_generator.rb
135
151
  - lib/gooddata_datawarehouse/version.rb
136
- - spec/data/bike-wrong.csv
137
152
  - spec/data/bike.csv
153
+ - spec/data/bike2.csv
154
+ - spec/data/wrong-bike.csv
138
155
  - spec/datawarehouse_spec.rb
139
156
  - spec/spec_helper.rb
140
157
  homepage: ''
@@ -162,7 +179,8 @@ signing_key:
162
179
  specification_version: 4
163
180
  summary: Convenient work with GoodData's Datawarehouse (ADS)
164
181
  test_files:
165
- - spec/data/bike-wrong.csv
166
182
  - spec/data/bike.csv
183
+ - spec/data/bike2.csv
184
+ - spec/data/wrong-bike.csv
167
185
  - spec/datawarehouse_spec.rb
168
186
  - spec/spec_helper.rb