gooddata_datawarehouse 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9a6a8637a21756e98d892ca84f9abbf1d4cb912e
4
- data.tar.gz: a3853b73306ee86d97471d8bec24efa0482c1b26
3
+ metadata.gz: 453c08405c8e4e9c2fc46b6b2fb1f1df22f2f4ae
4
+ data.tar.gz: ce89580d666c0d660a61a72def8f6d7d28d4ab9d
5
5
  SHA512:
6
- metadata.gz: c5b61278e1b619ad6a31ffe9fd947b734aadc61a27090479f51870608385ec07c9cd07a7d35a68b7bbc25b0d60725d0d83e8022e1135ff5e33f773a44062f934
7
- data.tar.gz: 3976202ed90034b82109b9ca0e82d1c1fbd6074b7e8aa770ed447a9f1612113205ff2ecc3d2bc55f0c18ca5c6a72be7723237e916bce00c10aa1112e9b4c9429
6
+ metadata.gz: 5d40a9ece4f77255e2d2f750d0082bc5ca0d19bb1d99ae013d8da5331000b1c86022e3dc4c8b70b54cda7da130332a6b3b19ac4eeb280d27573d19bd8cafc1ed
7
+ data.tar.gz: 69fc15c0a428e8e32db4b20f07b7fa7ef686342df98e210dc1a4540e6af85569f31cebb00a867d9fe5e15a62f3326c1d3c74db0b4a31fb6d7b4c777fc6755c4a
data/README.md CHANGED
@@ -51,7 +51,13 @@ dwh = GoodData::Datawarehouse.new('you@gooddata.com', 'yourpass', 'your ADS inst
51
51
  # import a csv
52
52
  dwh.csv_to_new_table('my_table', 'path/to/my.csv')
53
53
 
54
+ # or multiple csvs (running in parallel threads)
55
+ dwh.csv_to_new_table('my_table', ['path/to/my.csv', 'path/to/my2.csv'])
56
+ dwh.csv_to_new_table('my_table', 'path/to/*.csv')
57
+ dwh.csv_to_new_table('my_table', 'path/to/directory/')
58
+
54
59
  dwh.table_exists?('my_table') # true
60
+ dwh.table_row_count('my_table') # 55
55
61
  dwh.get_columns('my_table') # [{column_name: 'col1', data_type: 'varchar(88)'}, {column_name: 'col2', data_type: 'int'}]
56
62
 
57
63
  # run an arbitrary sql
@@ -9,7 +9,7 @@ require_relative 'sql_generator'
9
9
 
10
10
  module GoodData
11
11
  class Datawarehouse
12
- PARALEL_COPY_THREAD_COUNT = 5
12
+ PARALEL_COPY_THREAD_COUNT = 10
13
13
  def initialize(username, password, instance_id, opts={})
14
14
  @logger = Logger.new(STDOUT)
15
15
  @username = username
@@ -42,6 +42,8 @@ module GoodData
42
42
  csv << row.values_at(*col_keys)
43
43
  end
44
44
  end
45
+ @logger.info "Table #{table_name} exported to #{csv_path.respond_to?(:path)? csv_path.path : csv_path}"
46
+ csv_path
45
47
  end
46
48
 
47
49
  def rename_table(old_name, new_name)
@@ -55,18 +57,27 @@ module GoodData
55
57
  def csv_to_new_table(table_name, csvs, opts={})
56
58
  csv_list = list_files(csvs)
57
59
  cols = create_table_from_csv_header(table_name, csv_list[0], opts)
58
- load_data_from_csv(table_name, csv_list, opts.merge(columns: cols))
60
+ load_data_from_csv(table_name, csv_list, opts.merge(columns: cols, append: true))
61
+ end
62
+
63
+ def truncate_table(table_name)
64
+ execute(GoodData::SQLGenerator.truncate_table(table_name))
59
65
  end
60
66
 
61
67
  def load_data_from_csv(table_name, csvs, opts={})
68
+ thread_count = opts[:paralel_copy_thread_count] || PARALEL_COPY_THREAD_COUNT
62
69
  # get the list of files to load and columns in the csv
63
70
  csv_list = list_files(csvs)
64
71
  columns = opts[:columns] || get_csv_headers(csv_list[0])
65
72
 
73
+ # truncate_table unless data should be appended
74
+ unless opts[:append]
75
+ truncate_table(table_name)
76
+ end
77
+
66
78
  # load each csv from the list
67
79
  single_file = (csv_list.size == 1)
68
-
69
- csv_list.peach(PARALEL_COPY_THREAD_COUNT) do |csv_path|
80
+ csv_list.peach(thread_count) do |csv_path|
70
81
  if opts[:ignore_parse_errors] && opts[:exceptions_file].nil? && opts[:rejections_file].nil?
71
82
  exc = nil
72
83
  rej = nil
@@ -209,7 +220,15 @@ module GoodData
209
220
  if header_str.nil? || header_str.empty?
210
221
  return []
211
222
  end
212
- header_str.split(',').map{ |s| s.gsub(/[\s"-]/,'') }
223
+ empty_count = 0
224
+ header_str.split(',').map{|s| s.gsub(/[\s"-]/,'')}.map do |c|
225
+ if c.nil? || c.empty?
226
+ empty_count += 1
227
+ "empty#{empty_count}"
228
+ else
229
+ c
230
+ end
231
+ end
213
232
  end
214
233
  end
215
234
  end
@@ -54,6 +54,10 @@ module GoodData
54
54
  limit = opts[:limit] ? "LIMIT #{opts[:limit]}" : ''
55
55
  "SELECT * FROM #{table_name} #{limit}"
56
56
  end
57
+
58
+ def truncate_table(table_name)
59
+ "TRUNCATE TABLE #{table_name}"
60
+ end
57
61
  end
58
62
  end
59
63
  end
@@ -1,5 +1,5 @@
1
1
  module GoodData
2
2
  class Datawarehouse
3
- VERSION = "0.0.6"
3
+ VERSION = "0.0.7"
4
4
  end
5
5
  end
data/new-version.sh ADDED
@@ -0,0 +1,23 @@
1
+ #!/bin/sh
2
+
3
+ # get the new version
4
+ VERSION=`bundle exec ruby <<-EORUBY
5
+
6
+ require 'gooddata_datawarehouse'
7
+ puts GoodData::Datawarehouse::VERSION
8
+
9
+ EORUBY`
10
+
11
+ # create tag and push it
12
+ TAG="v$VERSION"
13
+ git tag $TAG
14
+ git push origin $TAG
15
+
16
+ # build and push the gem
17
+ gem build gooddata_datawarehouse.gemspec
18
+ gem push "gooddata_datawarehouse-$VERSION.gem"
19
+
20
+ # update the gem after a few secs
21
+ echo "Sleeping.."
22
+ sleep 30
23
+ gem update gooddata_datawarehouse
@@ -0,0 +1,4 @@
1
+ "","manufacturer",""
2
+ "29","Ibis","1"
3
+ "27.5","Seven","2"
4
+ "29","Moots","3"
@@ -4,6 +4,7 @@ require 'gooddata_datawarehouse/datawarehouse'
4
4
  CSV_PATH = 'spec/data/bike.csv'
5
5
  CSV_PATH2 = 'spec/data/bike2.csv'
6
6
  WRONG_CSV_PATH = 'spec/data/wrong-bike.csv'
7
+ EMPTY_HEADER_CSV_PATH = 'spec/data/emptyheader-bike.csv'
7
8
  CSV_REGEXP = 'spec/data/bike*.csv'
8
9
 
9
10
  class Helper
@@ -107,9 +108,10 @@ describe GoodData::Datawarehouse do
107
108
  expect(@dwh.table_exists?(@random_table_name)).to eq true
108
109
  end
109
110
 
110
- def check_row_count
111
+ def check_row_count(files=[CSV_PATH, CSV_PATH2])
112
+ expected_count = files.map {|f| Helper.line_count(f)}.reduce(:+)
111
113
  # there are lines from both of the csvs
112
- expect(@dwh.table_row_count(@random_table_name)).to eq Helper.line_count(CSV_PATH) + Helper.line_count(CSV_PATH2)
114
+ expect(@dwh.table_row_count(@random_table_name)).to eq expected_count
113
115
  end
114
116
 
115
117
  describe '#rename_table' do
@@ -141,13 +143,13 @@ describe GoodData::Datawarehouse do
141
143
  end
142
144
 
143
145
 
144
- it "loads all files in a directory" do
146
+ it "loads all files in a directory, in paralel" do
145
147
  # make a tempdir and copy the csvs there
146
148
  Dir.mktmpdir('foo') do |dir|
147
149
  FileUtils.cp(CSV_PATH, dir)
148
150
  FileUtils.cp(CSV_PATH2, dir)
149
151
 
150
- @dwh.csv_to_new_table(@random_table_name, dir)
152
+ @dwh.csv_to_new_table(@random_table_name, dir, :paralel_copy_thread_count => 2)
151
153
  end
152
154
 
153
155
  check_table_exists
@@ -279,6 +281,11 @@ describe GoodData::Datawarehouse do
279
281
  expect(File.size("#{rej.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
280
282
  expect(File.size("#{exc.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
281
283
  end
284
+ it "creates empty1, etc. columns for empty header columns" do
285
+ @dwh.csv_to_new_table(@random_table_name, EMPTY_HEADER_CSV_PATH)
286
+ # it should have cols empty1,2
287
+ expect(@dwh.get_columns(@random_table_name).map {|c| c[:column_name]}).to include('empty1', 'empty2')
288
+ end
282
289
  end
283
290
 
284
291
  describe '#export_table' do
@@ -343,6 +350,42 @@ describe GoodData::Datawarehouse do
343
350
  # load the data there - expect fail
344
351
  expect{@dwh.load_data_from_csv(@random_table_name, WRONG_CSV_PATH)}.to raise_error(ArgumentError)
345
352
  end
353
+
354
+ it 'truncates the data that is already there' do
355
+ @dwh.create_table_from_csv_header(@random_table_name, CSV_PATH)
356
+ check_table_exists
357
+ check_cols
358
+
359
+ # load the data there
360
+ @dwh.load_data_from_csv(@random_table_name, CSV_PATH)
361
+ check_row_count([CSV_PATH])
362
+
363
+ # load the data there again, count should stay
364
+ @dwh.load_data_from_csv(@random_table_name, CSV_PATH2)
365
+ check_row_count([CSV_PATH2])
366
+ end
367
+
368
+ it "keeps the data that is there if append option passed" do
369
+ @dwh.create_table_from_csv_header(@random_table_name, CSV_PATH)
370
+ check_table_exists
371
+ check_cols
372
+
373
+ # load the data there
374
+ @dwh.load_data_from_csv(@random_table_name, CSV_PATH)
375
+ check_row_count([CSV_PATH])
376
+
377
+ # append the data
378
+ @dwh.load_data_from_csv(@random_table_name, CSV_PATH2, :append => true)
379
+ check_row_count([CSV_PATH, CSV_PATH2])
380
+ end
381
+ end
382
+
383
+ describe "#truncate_table" do
384
+ it "truncates the given table" do
385
+ @dwh.csv_to_new_table(@random_table_name, CSV_PATH)
386
+ @dwh.truncate_table(@random_table_name)
387
+ expect(@dwh.table_row_count(@random_table_name)).to eq 0
388
+ end
346
389
  end
347
390
 
348
391
  describe '#get_columns' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gooddata_datawarehouse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Petr Cvengros
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-23 00:00:00.000000000 Z
11
+ date: 2015-03-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -149,8 +149,10 @@ files:
149
149
  - lib/gooddata_datawarehouse/datawarehouse.rb
150
150
  - lib/gooddata_datawarehouse/sql_generator.rb
151
151
  - lib/gooddata_datawarehouse/version.rb
152
+ - new-version.sh
152
153
  - spec/data/bike.csv
153
154
  - spec/data/bike2.csv
155
+ - spec/data/emptyheader-bike.csv
154
156
  - spec/data/wrong-bike.csv
155
157
  - spec/datawarehouse_spec.rb
156
158
  - spec/spec_helper.rb
@@ -181,6 +183,7 @@ summary: Convenient work with GoodData's Datawarehouse (ADS)
181
183
  test_files:
182
184
  - spec/data/bike.csv
183
185
  - spec/data/bike2.csv
186
+ - spec/data/emptyheader-bike.csv
184
187
  - spec/data/wrong-bike.csv
185
188
  - spec/datawarehouse_spec.rb
186
189
  - spec/spec_helper.rb