gooddata_datawarehouse 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7f98f560684ad94b0c65263eb0b8bb39748d3fe8
4
- data.tar.gz: 5626327514feed7e85f5541871100b3e28373c0d
3
+ metadata.gz: 9a6a8637a21756e98d892ca84f9abbf1d4cb912e
4
+ data.tar.gz: a3853b73306ee86d97471d8bec24efa0482c1b26
5
5
  SHA512:
6
- metadata.gz: 52e171a5c74e80cc056020263d98fe3ca25a28952d7a3ac338356de34003944279ca427370002589d9fb2ad21bdbe32679732f28b19ca2e1c66482e6996fd175
7
- data.tar.gz: f285e427b6f9216cd578a9fa9f8ebf32d4a2769a62ed4bb1b03c4ea452009d36b91320463519524e9d97a1aece663155b4f45e731bd84af5f29c4893b41d1f0a
6
+ metadata.gz: c5b61278e1b619ad6a31ffe9fd947b734aadc61a27090479f51870608385ec07c9cd07a7d35a68b7bbc25b0d60725d0d83e8022e1135ff5e33f773a44062f934
7
+ data.tar.gz: 3976202ed90034b82109b9ca0e82d1c1fbd6074b7e8aa770ed447a9f1612113205ff2ecc3d2bc55f0c18ca5c6a72be7723237e916bce00c10aa1112e9b4c9429
data/.coveralls.yml ADDED
@@ -0,0 +1 @@
1
+ service_name: travis-ci
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --format documentation
3
+ --require spec_helper
data/README.md CHANGED
@@ -44,6 +44,9 @@ require 'gooddata_datawarehouse'
44
44
 
45
45
  # connect
46
46
  dwh = GoodData::Datawarehouse.new('you@gooddata.com', 'yourpass', 'your ADS instance id')
47
+ # instance id is the identifier of your datawarehouse (ADS).
48
+ # E.g. for datawarehouse https://secure.gooddata.com/gdc/datawarehouse/instances/d4979ac54df8afb7b5192b0086de6270
49
+ # the instance id is d4979ac54df8afb7b5192b0086de6270
47
50
 
48
51
  # import a csv
49
52
  dwh.csv_to_new_table('my_table', 'path/to/my.csv')
@@ -51,7 +54,7 @@ dwh.csv_to_new_table('my_table', 'path/to/my.csv')
51
54
  dwh.table_exists?('my_table') # true
52
55
  dwh.get_columns('my_table') # [{column_name: 'col1', data_type: 'varchar(88)'}, {column_name: 'col2', data_type: 'int'}]
53
56
 
54
- # run an aribrary sql
57
+ # run an arbitrary sql
55
58
  dwh.execute('ALTER TABLE my_table ADD COLUMN col3 INTEGER')
56
59
 
57
60
  # run a select and process results
@@ -69,6 +72,17 @@ dwh.export_table('my_new_table', 'path/to/my_new.csv')
69
72
  dwh.drop_table('my_new_table')
70
73
  ```
71
74
 
75
+ ## Troubleshooting
76
+ ### Wrong driver version
77
+ If you get an error talking about handshake error and wrong DSS driver version, update your `gooddata-dss-jdbc` gem by running
78
+
79
+ $ bundle update
80
+
81
+ or
82
+
83
+ $ gem update gooddata-dss-jdbc
84
+
85
+ You should always have the latest version of this gem.
72
86
 
73
87
  ## Contributing
74
88
 
@@ -30,4 +30,5 @@ Gem::Specification.new do |spec|
30
30
 
31
31
  spec.add_dependency "sequel", "~> 4.17"
32
32
  spec.add_dependency "gooddata-dss-jdbc", "~> 0.1"
33
+ spec.add_dependency "pmap", "~> 1.0"
33
34
  end
@@ -3,11 +3,13 @@ require 'sequel'
3
3
  require 'logger'
4
4
  require 'csv'
5
5
  require 'tempfile'
6
+ require 'pmap'
6
7
 
7
8
  require_relative 'sql_generator'
8
9
 
9
10
  module GoodData
10
11
  class Datawarehouse
12
+ PARALEL_COPY_THREAD_COUNT = 5
11
13
  def initialize(username, password, instance_id, opts={})
12
14
  @logger = Logger.new(STDOUT)
13
15
  @username = username
@@ -50,37 +52,57 @@ module GoodData
50
52
  execute(GoodData::SQLGenerator.drop_table(table_name,opts))
51
53
  end
52
54
 
53
- def csv_to_new_table(table_name, csv_path, opts={})
54
- cols = create_table_from_csv_header(table_name, csv_path, opts)
55
- load_data_from_csv(table_name, csv_path, opts.merge(columns: cols))
55
+ def csv_to_new_table(table_name, csvs, opts={})
56
+ csv_list = list_files(csvs)
57
+ cols = create_table_from_csv_header(table_name, csv_list[0], opts)
58
+ load_data_from_csv(table_name, csv_list, opts.merge(columns: cols))
56
59
  end
57
60
 
58
- def load_data_from_csv(table_name, csv_path, opts={})
59
- columns = opts[:columns] || get_csv_headers(csv_path)
60
-
61
- if opts[:ignore_parse_errors] && opts[:exceptions_file].nil? && opts[:rejections_file].nil?
62
- exc = nil
63
- rej = nil
64
- else
65
- # temporary files to get the excepted records (if not given)
66
- exc = opts[:exceptions_file] ||= Tempfile.new('exceptions')
67
- rej = opts[:rejections_file] ||= Tempfile.new('rejections')
68
- exc = File.new(exc, 'w') unless exc.is_a?(File)
69
- rej = File.new(rej, 'w') unless rej.is_a?(File)
70
- end
61
+ def load_data_from_csv(table_name, csvs, opts={})
62
+ # get the list of files to load and columns in the csv
63
+ csv_list = list_files(csvs)
64
+ columns = opts[:columns] || get_csv_headers(csv_list[0])
65
+
66
+ # load each csv from the list
67
+ single_file = (csv_list.size == 1)
68
+
69
+ csv_list.peach(PARALEL_COPY_THREAD_COUNT) do |csv_path|
70
+ if opts[:ignore_parse_errors] && opts[:exceptions_file].nil? && opts[:rejections_file].nil?
71
+ exc = nil
72
+ rej = nil
73
+ opts_file = opts
74
+ else
75
+ opts_file = opts.clone
76
+ # priradit do opts i do exc -
77
+ # temporary files to get the excepted records (if not given)
78
+ exc = opts_file[:exceptions_file] = init_file(opts_file[:exceptions_file], 'exceptions', csv_path, single_file)
79
+ rej = opts_file[:rejections_file] = init_file(opts_file[:rejections_file], 'rejections', csv_path, single_file)
80
+ end
71
81
 
72
- # execute the load
73
- execute(GoodData::SQLGenerator.load_data(table_name, csv_path, columns, opts))
82
+ # execute the load
83
+ execute(GoodData::SQLGenerator.load_data(table_name, csv_path, columns, opts_file))
74
84
 
75
- exc.close if exc
76
- rej.close if rej
85
+ exc.close if exc
86
+ rej.close if rej
77
87
 
78
- # if there was something rejected and it shouldn't be ignored, raise an error
79
- if ((exc && File.size?(exc)) || (rej && File.size?(rej))) && (! opts[:ignore_parse_errors])
80
- fail ArgumentError, "Some lines in the CSV didn't go through. Exceptions: #{IO.read(exc)}\nRejected records: #{IO.read(rej)}"
88
+ # if there was something rejected and it shouldn't be ignored, raise an error
89
+ if ((exc && File.size?(exc)) || (rej && File.size?(rej))) && (! opts[:ignore_parse_errors])
90
+ fail ArgumentError, "Some lines in the CSV didn't go through. Exceptions: #{IO.read(exc)}\nRejected records: #{IO.read(rej)}"
91
+ end
81
92
  end
82
93
  end
83
94
 
95
+ def init_file(given_filename, key, csv_path, single_file)
96
+ # only use file postfix if there are multiple files
97
+ postfix = single_file ? '' : "-#{File.basename(csv_path)}"
98
+
99
+ # take what we have and put the source csv name at the end
100
+ given_filename = given_filename.path if given_filename.is_a?(File)
101
+ f = "#{given_filename || Tempfile.new(key).path}#{postfix}"
102
+ f = File.new(f, 'w') unless f.is_a?(File)
103
+ f
104
+ end
105
+
84
106
  # returns a list of columns created
85
107
  # does nothing if file empty, returns []
86
108
  def create_table_from_csv_header(table_name, csv_path, opts={})
@@ -99,6 +121,10 @@ module GoodData
99
121
  count > 0
100
122
  end
101
123
 
124
+ def table_row_count(table_name)
125
+ execute_select(GoodData::SQLGenerator.get_row_count(table_name), :count => true)
126
+ end
127
+
102
128
  def get_columns(table_name)
103
129
  res = execute_select(GoodData::SQLGenerator.get_columns(table_name))
104
130
  end
@@ -158,6 +184,26 @@ module GoodData
158
184
 
159
185
  private
160
186
 
187
+ # returns an array of file paths (strings)
188
+ def list_files(csvs)
189
+ # csvs can be:
190
+ case csvs
191
+ when String
192
+
193
+ # directory
194
+ if File.directory?(csvs)
195
+ return (Dir.entries(csvs) - ['.', '..']).map{|f| File.join(csvs, f)}
196
+ end
197
+
198
+ # filename or pattern
199
+ return Dir.glob(csvs)
200
+
201
+ # array
202
+ when Array
203
+ return csvs
204
+ end
205
+ end
206
+
161
207
  def get_csv_headers(csv_path)
162
208
  header_str = File.open(csv_path, &:gets)
163
209
  if header_str.nil? || header_str.empty?
@@ -42,6 +42,10 @@ module GoodData
42
42
  "SELECT COUNT(*) FROM tables WHERE table_name = '#{table_name}'"
43
43
  end
44
44
 
45
+ def get_row_count(table_name)
46
+ "SELECT COUNT(*) FROM #{table_name}"
47
+ end
48
+
45
49
  def get_columns(table_name)
46
50
  "SELECT column_name, data_type FROM columns WHERE table_name = '#{table_name}'"
47
51
  end
@@ -1,5 +1,5 @@
1
1
  module GoodData
2
2
  class Datawarehouse
3
- VERSION = "0.0.5"
3
+ VERSION = "0.0.6"
4
4
  end
5
5
  end
@@ -0,0 +1,4 @@
1
+ wheel_size,manufacturer
2
+ "29","Ibis"
3
+ "27.5","Seven"
4
+ "29","Moots"
File without changes
@@ -1,13 +1,25 @@
1
1
  require 'tempfile'
2
2
  require 'gooddata_datawarehouse/datawarehouse'
3
- require_relative 'spec_helper'
4
3
 
5
4
  CSV_PATH = 'spec/data/bike.csv'
6
- WRONG_CSV_PATH = 'spec/data/bike-wrong.csv'
5
+ CSV_PATH2 = 'spec/data/bike2.csv'
6
+ WRONG_CSV_PATH = 'spec/data/wrong-bike.csv'
7
+ CSV_REGEXP = 'spec/data/bike*.csv'
8
+
9
+ class Helper
10
+ def self.create_default_connection
11
+ GoodData::Datawarehouse.new(ENV['USERNAME'], ENV['PASSWORD'], ENV['INSTANCE_ID'])
12
+ end
13
+ def self.line_count(f)
14
+ i = 0
15
+ CSV.foreach(f, :headers => true) {|_| i += 1}
16
+ i
17
+ end
18
+ end
7
19
 
8
20
  describe GoodData::Datawarehouse do
9
21
  before(:each) do
10
- @dwh = SpecHelper::create_default_connection
22
+ @dwh = Helper::create_default_connection
11
23
  @random = rand(10000000).to_s
12
24
  @random_table_name = "temp_#{@random}"
13
25
  @created_tables = nil
@@ -84,6 +96,22 @@ describe GoodData::Datawarehouse do
84
96
  end
85
97
  end
86
98
 
99
+ def check_cols
100
+ # cols are the same as in the csv
101
+ expected_cols = File.open(CSV_PATH, &:gets).strip.split(',')
102
+ expect(Set.new(@dwh.get_columns(@random_table_name))).to eq Set.new(expected_cols.map {|c| {:column_name => c, :data_type => GoodData::SQLGenerator::DEFAULT_TYPE}})
103
+ end
104
+
105
+ def check_table_exists
106
+ # table exists
107
+ expect(@dwh.table_exists?(@random_table_name)).to eq true
108
+ end
109
+
110
+ def check_row_count
111
+ # there are lines from both of the csvs
112
+ expect(@dwh.table_row_count(@random_table_name)).to eq Helper.line_count(CSV_PATH) + Helper.line_count(CSV_PATH2)
113
+ end
114
+
87
115
  describe '#rename_table' do
88
116
  it 'renames a table' do
89
117
  cols = ['col1', 'col2', 'col3']
@@ -109,8 +137,38 @@ describe GoodData::Datawarehouse do
109
137
  expect(@dwh.table_exists?(@random_table_name)).to eq true
110
138
 
111
139
  # cols are the same as in the csv
112
- expected_cols = File.open(CSV_PATH, &:gets).strip.split(',')
113
- expect(Set.new(@dwh.get_columns(@random_table_name))).to eq Set.new(expected_cols.map {|c| {:column_name => c, :data_type => GoodData::SQLGenerator::DEFAULT_TYPE}})
140
+ check_cols
141
+ end
142
+
143
+
144
+ it "loads all files in a directory" do
145
+ # make a tempdir and copy the csvs there
146
+ Dir.mktmpdir('foo') do |dir|
147
+ FileUtils.cp(CSV_PATH, dir)
148
+ FileUtils.cp(CSV_PATH2, dir)
149
+
150
+ @dwh.csv_to_new_table(@random_table_name, dir)
151
+ end
152
+
153
+ check_table_exists
154
+ check_cols
155
+ check_row_count
156
+ end
157
+
158
+ it "loads all files given in a list" do
159
+ @dwh.csv_to_new_table(@random_table_name, [CSV_PATH, CSV_PATH2])
160
+
161
+ check_table_exists
162
+ check_cols
163
+ check_row_count
164
+ end
165
+
166
+ it "loads all files given by a regexp" do
167
+ @dwh.csv_to_new_table(@random_table_name, CSV_REGEXP)
168
+
169
+ check_table_exists
170
+ check_cols
171
+ check_row_count
114
172
  end
115
173
 
116
174
  it 'writes exceptions and rejections to files at given path, passed strings' do
@@ -123,6 +181,25 @@ describe GoodData::Datawarehouse do
123
181
  expect(File.size(exc)).to eq 0
124
182
  end
125
183
 
184
+ it 'overwrites the rejections and exceptions' do
185
+ rej = Tempfile.new('rejections.csv')
186
+ exc = Tempfile.new('exceptions.csv')
187
+
188
+ @dwh.csv_to_new_table(@random_table_name, WRONG_CSV_PATH, :exceptions_file => exc.path, :rejections_file => rej.path, :ignore_parse_errors => true)
189
+
190
+ rej_size = File.size(rej)
191
+ exc_size = File.size(exc)
192
+
193
+ expect(rej_size).to be > 0
194
+ expect(exc_size).to be > 0
195
+
196
+ # load it again and see if it was overwritten - has the same size
197
+ @dwh.load_data_from_csv(@random_table_name, WRONG_CSV_PATH, :exceptions_file => exc.path, :rejections_file => rej.path, :ignore_parse_errors => true)
198
+
199
+ expect(File.size(rej)).to eq rej_size
200
+ expect(File.size(exc)).to be exc_size
201
+ end
202
+
126
203
  it 'writes exceptions and rejections to files at given path, passed files' do
127
204
  rej = Tempfile.new('rejections.csv')
128
205
  exc = Tempfile.new('exceptions.csv')
@@ -162,7 +239,7 @@ describe GoodData::Datawarehouse do
162
239
  expect(File.size(exc)).to be > 0
163
240
  end
164
241
 
165
- it "does something when ignoring errors and not passing files" do
242
+ it "loads fine when ignoring errors and not passing files" do
166
243
  @dwh.csv_to_new_table(@random_table_name, CSV_PATH, :ignore_parse_errors => true)
167
244
 
168
245
  # table exists
@@ -173,7 +250,7 @@ describe GoodData::Datawarehouse do
173
250
  expect(Set.new(@dwh.get_columns(@random_table_name))).to eq Set.new(expected_cols.map {|c| {:column_name => c, :data_type => GoodData::SQLGenerator::DEFAULT_TYPE}})
174
251
  end
175
252
 
176
- it "works with non-existing files" do
253
+ it "works with non-existing rejection/exception files" do
177
254
  t = Tempfile.new('haha')
178
255
  d = File.dirname(t)
179
256
 
@@ -188,6 +265,20 @@ describe GoodData::Datawarehouse do
188
265
  expect(File.size(rej)).to be > 0
189
266
  expect(File.size(exc)).to be > 0
190
267
  end
268
+
269
+ it "fails if one of the files is wrong" do
270
+ expect{@dwh.csv_to_new_table(@random_table_name, [CSV_PATH, WRONG_CSV_PATH])}.to raise_error(ArgumentError)
271
+ end
272
+
273
+ it "creates exceptions / rejections for each file when wanted" do
274
+ rej = Tempfile.new('rejections.csv')
275
+ exc = Tempfile.new('exceptions.csv')
276
+
277
+ @dwh.csv_to_new_table(@random_table_name, [CSV_PATH, WRONG_CSV_PATH], :exceptions_file => exc.path, :rejections_file => rej.path, :ignore_parse_errors => true)
278
+
279
+ expect(File.size("#{rej.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
280
+ expect(File.size("#{exc.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
281
+ end
191
282
  end
192
283
 
193
284
  describe '#export_table' do
@@ -232,6 +323,18 @@ describe GoodData::Datawarehouse do
232
323
  expect(exported).to eq imported
233
324
  end
234
325
 
326
+ it "can load multiple files" do
327
+ # create the table
328
+ @dwh.create_table_from_csv_header(@random_table_name, CSV_PATH)
329
+ check_table_exists
330
+ check_cols
331
+
332
+ # load the data there
333
+ @dwh.load_data_from_csv(@random_table_name, [CSV_PATH, CSV_PATH2])
334
+
335
+ check_row_count
336
+ end
337
+
235
338
  it 'fails for a wrong csv' do
236
339
  # create the table
237
340
  @dwh.create_table_from_csv_header(@random_table_name, WRONG_CSV_PATH)
data/spec/spec_helper.rb CHANGED
@@ -1,15 +1,8 @@
1
- require 'gooddata_datawarehouse'
2
1
  require 'coveralls'
3
-
4
- Coveralls.wear_merged!
2
+ Coveralls.wear!
5
3
 
6
4
  RSpec.configure do |c|
7
5
  c.filter_run :focus => true
8
6
  c.run_all_when_everything_filtered = true
9
7
  end
10
8
 
11
- class SpecHelper
12
- def self.create_default_connection
13
- GoodData::Datawarehouse.new(ENV['USERNAME'], ENV['PASSWORD'], ENV['INSTANCE_ID'])
14
- end
15
- end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gooddata_datawarehouse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Petr Cvengros
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-15 00:00:00.000000000 Z
11
+ date: 2015-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -114,6 +114,20 @@ dependencies:
114
114
  - - ~>
115
115
  - !ruby/object:Gem::Version
116
116
  version: '0.1'
117
+ - !ruby/object:Gem::Dependency
118
+ requirement: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ~>
121
+ - !ruby/object:Gem::Version
122
+ version: '1.0'
123
+ name: pmap
124
+ prerelease: false
125
+ type: :runtime
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ~>
129
+ - !ruby/object:Gem::Version
130
+ version: '1.0'
117
131
  description: ''
118
132
  email:
119
133
  - petr.cvengros@gooddata.com
@@ -121,7 +135,9 @@ executables: []
121
135
  extensions: []
122
136
  extra_rdoc_files: []
123
137
  files:
138
+ - .coveralls.yml
124
139
  - .gitignore
140
+ - .rspec
125
141
  - .travis.yml
126
142
  - Gemfile
127
143
  - LICENSE.txt
@@ -133,8 +149,9 @@ files:
133
149
  - lib/gooddata_datawarehouse/datawarehouse.rb
134
150
  - lib/gooddata_datawarehouse/sql_generator.rb
135
151
  - lib/gooddata_datawarehouse/version.rb
136
- - spec/data/bike-wrong.csv
137
152
  - spec/data/bike.csv
153
+ - spec/data/bike2.csv
154
+ - spec/data/wrong-bike.csv
138
155
  - spec/datawarehouse_spec.rb
139
156
  - spec/spec_helper.rb
140
157
  homepage: ''
@@ -162,7 +179,8 @@ signing_key:
162
179
  specification_version: 4
163
180
  summary: Convenient work with GoodData's Datawarehouse (ADS)
164
181
  test_files:
165
- - spec/data/bike-wrong.csv
166
182
  - spec/data/bike.csv
183
+ - spec/data/bike2.csv
184
+ - spec/data/wrong-bike.csv
167
185
  - spec/datawarehouse_spec.rb
168
186
  - spec/spec_helper.rb