gooddata_datawarehouse 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9a6a8637a21756e98d892ca84f9abbf1d4cb912e
4
- data.tar.gz: a3853b73306ee86d97471d8bec24efa0482c1b26
3
+ metadata.gz: 453c08405c8e4e9c2fc46b6b2fb1f1df22f2f4ae
4
+ data.tar.gz: ce89580d666c0d660a61a72def8f6d7d28d4ab9d
5
5
  SHA512:
6
- metadata.gz: c5b61278e1b619ad6a31ffe9fd947b734aadc61a27090479f51870608385ec07c9cd07a7d35a68b7bbc25b0d60725d0d83e8022e1135ff5e33f773a44062f934
7
- data.tar.gz: 3976202ed90034b82109b9ca0e82d1c1fbd6074b7e8aa770ed447a9f1612113205ff2ecc3d2bc55f0c18ca5c6a72be7723237e916bce00c10aa1112e9b4c9429
6
+ metadata.gz: 5d40a9ece4f77255e2d2f750d0082bc5ca0d19bb1d99ae013d8da5331000b1c86022e3dc4c8b70b54cda7da130332a6b3b19ac4eeb280d27573d19bd8cafc1ed
7
+ data.tar.gz: 69fc15c0a428e8e32db4b20f07b7fa7ef686342df98e210dc1a4540e6af85569f31cebb00a867d9fe5e15a62f3326c1d3c74db0b4a31fb6d7b4c777fc6755c4a
data/README.md CHANGED
@@ -51,7 +51,13 @@ dwh = GoodData::Datawarehouse.new('you@gooddata.com', 'yourpass', 'your ADS inst
51
51
  # import a csv
52
52
  dwh.csv_to_new_table('my_table', 'path/to/my.csv')
53
53
 
54
+ # or multiple csvs (running in parallel threads)
55
+ dwh.csv_to_new_table('my_table', ['path/to/my.csv', 'path/to/my2.csv'])
56
+ dwh.csv_to_new_table('my_table', 'path/to/*.csv')
57
+ dwh.csv_to_new_table('my_table', 'path/to/directory/')
58
+
54
59
  dwh.table_exists?('my_table') # true
60
+ dwh.table_row_count('my_table') # 55
55
61
  dwh.get_columns('my_table') # [{column_name: 'col1', data_type: 'varchar(88)'}, {column_name: 'col2', data_type: 'int'}]
56
62
 
57
63
  # run an arbitrary sql
@@ -9,7 +9,7 @@ require_relative 'sql_generator'
9
9
 
10
10
  module GoodData
11
11
  class Datawarehouse
12
- PARALEL_COPY_THREAD_COUNT = 5
12
+ PARALEL_COPY_THREAD_COUNT = 10
13
13
  def initialize(username, password, instance_id, opts={})
14
14
  @logger = Logger.new(STDOUT)
15
15
  @username = username
@@ -42,6 +42,8 @@ module GoodData
42
42
  csv << row.values_at(*col_keys)
43
43
  end
44
44
  end
45
+ @logger.info "Table #{table_name} exported to #{csv_path.respond_to?(:path)? csv_path.path : csv_path}"
46
+ csv_path
45
47
  end
46
48
 
47
49
  def rename_table(old_name, new_name)
@@ -55,18 +57,27 @@ module GoodData
55
57
  def csv_to_new_table(table_name, csvs, opts={})
56
58
  csv_list = list_files(csvs)
57
59
  cols = create_table_from_csv_header(table_name, csv_list[0], opts)
58
- load_data_from_csv(table_name, csv_list, opts.merge(columns: cols))
60
+ load_data_from_csv(table_name, csv_list, opts.merge(columns: cols, append: true))
61
+ end
62
+
63
+ def truncate_table(table_name)
64
+ execute(GoodData::SQLGenerator.truncate_table(table_name))
59
65
  end
60
66
 
61
67
  def load_data_from_csv(table_name, csvs, opts={})
68
+ thread_count = opts[:paralel_copy_thread_count] || PARALEL_COPY_THREAD_COUNT
62
69
  # get the list of files to load and columns in the csv
63
70
  csv_list = list_files(csvs)
64
71
  columns = opts[:columns] || get_csv_headers(csv_list[0])
65
72
 
73
+ # truncate_table unless data should be appended
74
+ unless opts[:append]
75
+ truncate_table(table_name)
76
+ end
77
+
66
78
  # load each csv from the list
67
79
  single_file = (csv_list.size == 1)
68
-
69
- csv_list.peach(PARALEL_COPY_THREAD_COUNT) do |csv_path|
80
+ csv_list.peach(thread_count) do |csv_path|
70
81
  if opts[:ignore_parse_errors] && opts[:exceptions_file].nil? && opts[:rejections_file].nil?
71
82
  exc = nil
72
83
  rej = nil
@@ -209,7 +220,15 @@ module GoodData
209
220
  if header_str.nil? || header_str.empty?
210
221
  return []
211
222
  end
212
- header_str.split(',').map{ |s| s.gsub(/[\s"-]/,'') }
223
+ empty_count = 0
224
+ header_str.split(',').map{|s| s.gsub(/[\s"-]/,'')}.map do |c|
225
+ if c.nil? || c.empty?
226
+ empty_count += 1
227
+ "empty#{empty_count}"
228
+ else
229
+ c
230
+ end
231
+ end
213
232
  end
214
233
  end
215
234
  end
@@ -54,6 +54,10 @@ module GoodData
54
54
  limit = opts[:limit] ? "LIMIT #{opts[:limit]}" : ''
55
55
  "SELECT * FROM #{table_name} #{limit}"
56
56
  end
57
+
58
+ def truncate_table(table_name)
59
+ "TRUNCATE TABLE #{table_name}"
60
+ end
57
61
  end
58
62
  end
59
63
  end
@@ -1,5 +1,5 @@
1
1
  module GoodData
2
2
  class Datawarehouse
3
- VERSION = "0.0.6"
3
+ VERSION = "0.0.7"
4
4
  end
5
5
  end
data/new-version.sh ADDED
@@ -0,0 +1,23 @@
1
+ #!/bin/sh
2
+
3
+ # get the new version
4
+ VERSION=`bundle exec ruby <<-EORUBY
5
+
6
+ require 'gooddata_datawarehouse'
7
+ puts GoodData::Datawarehouse::VERSION
8
+
9
+ EORUBY`
10
+
11
+ # create tag and push it
12
+ TAG="v$VERSION"
13
+ git tag $TAG
14
+ git push origin $TAG
15
+
16
+ # build and push the gem
17
+ gem build gooddata_datawarehouse.gemspec
18
+ gem push "gooddata_datawarehouse-$VERSION.gem"
19
+
20
+ # update the gem after a few secs
21
+ echo "Sleeping.."
22
+ sleep 30
23
+ gem update gooddata_datawarehouse
@@ -0,0 +1,4 @@
1
+ "","manufacturer",""
2
+ "29","Ibis","1"
3
+ "27.5","Seven","2"
4
+ "29","Moots","3"
@@ -4,6 +4,7 @@ require 'gooddata_datawarehouse/datawarehouse'
4
4
  CSV_PATH = 'spec/data/bike.csv'
5
5
  CSV_PATH2 = 'spec/data/bike2.csv'
6
6
  WRONG_CSV_PATH = 'spec/data/wrong-bike.csv'
7
+ EMPTY_HEADER_CSV_PATH = 'spec/data/emptyheader-bike.csv'
7
8
  CSV_REGEXP = 'spec/data/bike*.csv'
8
9
 
9
10
  class Helper
@@ -107,9 +108,10 @@ describe GoodData::Datawarehouse do
107
108
  expect(@dwh.table_exists?(@random_table_name)).to eq true
108
109
  end
109
110
 
110
- def check_row_count
111
+ def check_row_count(files=[CSV_PATH, CSV_PATH2])
112
+ expected_count = files.map {|f| Helper.line_count(f)}.reduce(:+)
111
113
  # there are lines from both of the csvs
112
- expect(@dwh.table_row_count(@random_table_name)).to eq Helper.line_count(CSV_PATH) + Helper.line_count(CSV_PATH2)
114
+ expect(@dwh.table_row_count(@random_table_name)).to eq expected_count
113
115
  end
114
116
 
115
117
  describe '#rename_table' do
@@ -141,13 +143,13 @@ describe GoodData::Datawarehouse do
141
143
  end
142
144
 
143
145
 
144
- it "loads all files in a directory" do
146
+ it "loads all files in a directory, in paralel" do
145
147
  # make a tempdir and copy the csvs there
146
148
  Dir.mktmpdir('foo') do |dir|
147
149
  FileUtils.cp(CSV_PATH, dir)
148
150
  FileUtils.cp(CSV_PATH2, dir)
149
151
 
150
- @dwh.csv_to_new_table(@random_table_name, dir)
152
+ @dwh.csv_to_new_table(@random_table_name, dir, :paralel_copy_thread_count => 2)
151
153
  end
152
154
 
153
155
  check_table_exists
@@ -279,6 +281,11 @@ describe GoodData::Datawarehouse do
279
281
  expect(File.size("#{rej.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
280
282
  expect(File.size("#{exc.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
281
283
  end
284
+ it "creates empty1, etc. columns for empty header columns" do
285
+ @dwh.csv_to_new_table(@random_table_name, EMPTY_HEADER_CSV_PATH)
286
+ # it should have cols empty1,2
287
+ expect(@dwh.get_columns(@random_table_name).map {|c| c[:column_name]}).to include('empty1', 'empty2')
288
+ end
282
289
  end
283
290
 
284
291
  describe '#export_table' do
@@ -343,6 +350,42 @@ describe GoodData::Datawarehouse do
343
350
  # load the data there - expect fail
344
351
  expect{@dwh.load_data_from_csv(@random_table_name, WRONG_CSV_PATH)}.to raise_error(ArgumentError)
345
352
  end
353
+
354
+ it 'truncates the data that is already there' do
355
+ @dwh.create_table_from_csv_header(@random_table_name, CSV_PATH)
356
+ check_table_exists
357
+ check_cols
358
+
359
+ # load the data there
360
+ @dwh.load_data_from_csv(@random_table_name, CSV_PATH)
361
+ check_row_count([CSV_PATH])
362
+
363
+ # load the data there again, count should stay
364
+ @dwh.load_data_from_csv(@random_table_name, CSV_PATH2)
365
+ check_row_count([CSV_PATH2])
366
+ end
367
+
368
+ it "keeps the data that is there if append option passed" do
369
+ @dwh.create_table_from_csv_header(@random_table_name, CSV_PATH)
370
+ check_table_exists
371
+ check_cols
372
+
373
+ # load the data there
374
+ @dwh.load_data_from_csv(@random_table_name, CSV_PATH)
375
+ check_row_count([CSV_PATH])
376
+
377
+ # append the data
378
+ @dwh.load_data_from_csv(@random_table_name, CSV_PATH2, :append => true)
379
+ check_row_count([CSV_PATH, CSV_PATH2])
380
+ end
381
+ end
382
+
383
+ describe "#truncate_table" do
384
+ it "truncates the given table" do
385
+ @dwh.csv_to_new_table(@random_table_name, CSV_PATH)
386
+ @dwh.truncate_table(@random_table_name)
387
+ expect(@dwh.table_row_count(@random_table_name)).to eq 0
388
+ end
346
389
  end
347
390
 
348
391
  describe '#get_columns' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gooddata_datawarehouse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Petr Cvengros
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-23 00:00:00.000000000 Z
11
+ date: 2015-03-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -149,8 +149,10 @@ files:
149
149
  - lib/gooddata_datawarehouse/datawarehouse.rb
150
150
  - lib/gooddata_datawarehouse/sql_generator.rb
151
151
  - lib/gooddata_datawarehouse/version.rb
152
+ - new-version.sh
152
153
  - spec/data/bike.csv
153
154
  - spec/data/bike2.csv
155
+ - spec/data/emptyheader-bike.csv
154
156
  - spec/data/wrong-bike.csv
155
157
  - spec/datawarehouse_spec.rb
156
158
  - spec/spec_helper.rb
@@ -181,6 +183,7 @@ summary: Convenient work with GoodData's Datawarehouse (ADS)
181
183
  test_files:
182
184
  - spec/data/bike.csv
183
185
  - spec/data/bike2.csv
186
+ - spec/data/emptyheader-bike.csv
184
187
  - spec/data/wrong-bike.csv
185
188
  - spec/datawarehouse_spec.rb
186
189
  - spec/spec_helper.rb