RubyGems - gooddata_datawarehouse - Versions diffs - 0.0.6 → 0.0.7 - Mend

gooddata_datawarehouse 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/README.md +6 -0
data/lib/gooddata_datawarehouse/datawarehouse.rb +24 -5
data/lib/gooddata_datawarehouse/sql_generator.rb +4 -0
data/lib/gooddata_datawarehouse/version.rb +1 -1
data/new-version.sh +23 -0
data/spec/data/emptyheader-bike.csv +4 -0
data/spec/datawarehouse_spec.rb +47 -4
metadata +5 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 9a6a8637a21756e98d892ca84f9abbf1d4cb912e
-  data.tar.gz: a3853b73306ee86d97471d8bec24efa0482c1b26
+  metadata.gz: 453c08405c8e4e9c2fc46b6b2fb1f1df22f2f4ae
+  data.tar.gz: ce89580d666c0d660a61a72def8f6d7d28d4ab9d
 SHA512:
-  metadata.gz: c5b61278e1b619ad6a31ffe9fd947b734aadc61a27090479f51870608385ec07c9cd07a7d35a68b7bbc25b0d60725d0d83e8022e1135ff5e33f773a44062f934
-  data.tar.gz: 3976202ed90034b82109b9ca0e82d1c1fbd6074b7e8aa770ed447a9f1612113205ff2ecc3d2bc55f0c18ca5c6a72be7723237e916bce00c10aa1112e9b4c9429
+  metadata.gz: 5d40a9ece4f77255e2d2f750d0082bc5ca0d19bb1d99ae013d8da5331000b1c86022e3dc4c8b70b54cda7da130332a6b3b19ac4eeb280d27573d19bd8cafc1ed
+  data.tar.gz: 69fc15c0a428e8e32db4b20f07b7fa7ef686342df98e210dc1a4540e6af85569f31cebb00a867d9fe5e15a62f3326c1d3c74db0b4a31fb6d7b4c777fc6755c4a

data/README.md CHANGED Viewed

@@ -51,7 +51,13 @@ dwh = GoodData::Datawarehouse.new('you@gooddata.com', 'yourpass', 'your ADS inst
 # import a csv
 dwh.csv_to_new_table('my_table', 'path/to/my.csv')
+# or multiple csvs (running in parallel threads)
+dwh.csv_to_new_table('my_table', ['path/to/my.csv', 'path/to/my2.csv'])
+dwh.csv_to_new_table('my_table', 'path/to/*.csv')
+dwh.csv_to_new_table('my_table', 'path/to/directory/')
 dwh.table_exists?('my_table') # true
+dwh.table_row_count('my_table') # 55
 dwh.get_columns('my_table') # [{column_name: 'col1', data_type: 'varchar(88)'}, {column_name: 'col2', data_type: 'int'}]
 # run an arbitrary sql

data/lib/gooddata_datawarehouse/datawarehouse.rb CHANGED Viewed

@@ -9,7 +9,7 @@ require_relative 'sql_generator'
 module GoodData
   class Datawarehouse
-    PARALEL_COPY_THREAD_COUNT = 5
+    PARALEL_COPY_THREAD_COUNT = 10
     def initialize(username, password, instance_id, opts={})
       @logger = Logger.new(STDOUT)
       @username = username
@@ -42,6 +42,8 @@ module GoodData
           csv << row.values_at(*col_keys)
         end
       end
+      @logger.info "Table #{table_name} exported to #{csv_path.respond_to?(:path)? csv_path.path : csv_path}"
+      csv_path
     end
     def rename_table(old_name, new_name)
@@ -55,18 +57,27 @@ module GoodData
     def csv_to_new_table(table_name, csvs, opts={})
       csv_list = list_files(csvs)
       cols = create_table_from_csv_header(table_name, csv_list[0], opts)
-      load_data_from_csv(table_name, csv_list, opts.merge(columns: cols))
+      load_data_from_csv(table_name, csv_list, opts.merge(columns: cols, append: true))
+    end
+    def truncate_table(table_name)
+      execute(GoodData::SQLGenerator.truncate_table(table_name))
     end
     def load_data_from_csv(table_name, csvs, opts={})
+      thread_count = opts[:paralel_copy_thread_count] || PARALEL_COPY_THREAD_COUNT
       # get the list of files to load and columns in the csv
       csv_list = list_files(csvs)
       columns = opts[:columns] || get_csv_headers(csv_list[0])
+      # truncate_table unless data should be appended
+      unless opts[:append]
+        truncate_table(table_name)
+      end
       # load each csv from the list
       single_file = (csv_list.size == 1)
-      csv_list.peach(PARALEL_COPY_THREAD_COUNT) do |csv_path|
+      csv_list.peach(thread_count) do |csv_path|
         if opts[:ignore_parse_errors] && opts[:exceptions_file].nil? && opts[:rejections_file].nil?
           exc = nil
           rej = nil
@@ -209,7 +220,15 @@ module GoodData
       if header_str.nil? || header_str.empty?
         return []
       end
-      header_str.split(',').map{ |s| s.gsub(/[\s"-]/,'') }
+      empty_count = 0
+      header_str.split(',').map{|s| s.gsub(/[\s"-]/,'')}.map do |c|
+        if c.nil? || c.empty?
+          empty_count += 1
+          "empty#{empty_count}"
+        else
+          c
+        end
+      end
     end
   end
 end

data/lib/gooddata_datawarehouse/sql_generator.rb CHANGED Viewed

@@ -54,6 +54,10 @@ module GoodData
         limit = opts[:limit] ? "LIMIT #{opts[:limit]}" : ''
         "SELECT * FROM #{table_name} #{limit}"
       end
+      def truncate_table(table_name)
+        "TRUNCATE TABLE #{table_name}"
+      end
     end
   end
 end

data/lib/gooddata_datawarehouse/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module GoodData
   class Datawarehouse
-    VERSION = "0.0.6"
+    VERSION = "0.0.7"
   end
 end

data/new-version.sh ADDED Viewed

@@ -0,0 +1,23 @@
+#!/bin/sh
+# get the new version
+VERSION=`bundle exec ruby <<-EORUBY
+  require 'gooddata_datawarehouse'
+  puts GoodData::Datawarehouse::VERSION
+EORUBY`
+# create tag and push it
+TAG="v$VERSION"
+git tag $TAG
+git push origin $TAG
+# build and push the gem
+gem build gooddata_datawarehouse.gemspec
+gem push "gooddata_datawarehouse-$VERSION.gem"
+# update the gem after a few secs
+echo "Sleeping.."
+sleep 30
+gem update gooddata_datawarehouse

data/spec/data/emptyheader-bike.csv ADDED Viewed

@@ -0,0 +1,4 @@
+"","manufacturer",""
+"29","Ibis","1"
+"27.5","Seven","2"
+"29","Moots","3"

data/spec/datawarehouse_spec.rb CHANGED Viewed

@@ -4,6 +4,7 @@ require 'gooddata_datawarehouse/datawarehouse'
 CSV_PATH = 'spec/data/bike.csv'
 CSV_PATH2 = 'spec/data/bike2.csv'
 WRONG_CSV_PATH = 'spec/data/wrong-bike.csv'
+EMPTY_HEADER_CSV_PATH = 'spec/data/emptyheader-bike.csv'
 CSV_REGEXP = 'spec/data/bike*.csv'
 class Helper
@@ -107,9 +108,10 @@ describe GoodData::Datawarehouse do
     expect(@dwh.table_exists?(@random_table_name)).to eq true
   end
-  def check_row_count
+  def check_row_count(files=[CSV_PATH, CSV_PATH2])
+    expected_count = files.map {|f| Helper.line_count(f)}.reduce(:+)
     # there are lines from both of the csvs
-    expect(@dwh.table_row_count(@random_table_name)).to eq Helper.line_count(CSV_PATH) + Helper.line_count(CSV_PATH2)
+    expect(@dwh.table_row_count(@random_table_name)).to eq expected_count
   end
   describe '#rename_table' do
@@ -141,13 +143,13 @@ describe GoodData::Datawarehouse do
     end
-    it "loads all files in a directory" do
+    it "loads all files in a directory, in paralel" do
       # make a tempdir and copy the csvs there
       Dir.mktmpdir('foo') do |dir|
         FileUtils.cp(CSV_PATH, dir)
         FileUtils.cp(CSV_PATH2, dir)
-        @dwh.csv_to_new_table(@random_table_name, dir)
+        @dwh.csv_to_new_table(@random_table_name, dir, :paralel_copy_thread_count => 2)
       end
       check_table_exists
@@ -279,6 +281,11 @@ describe GoodData::Datawarehouse do
       expect(File.size("#{rej.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
       expect(File.size("#{exc.path}-#{File.basename(WRONG_CSV_PATH)}")).to be > 0
     end
+    it "creates empty1, etc. columns for empty header columns" do
+      @dwh.csv_to_new_table(@random_table_name, EMPTY_HEADER_CSV_PATH)
+      # it should have cols empty1,2
+      expect(@dwh.get_columns(@random_table_name).map {|c| c[:column_name]}).to include('empty1', 'empty2')
+    end
   end
   describe '#export_table' do
@@ -343,6 +350,42 @@ describe GoodData::Datawarehouse do
       # load the data there - expect fail
       expect{@dwh.load_data_from_csv(@random_table_name, WRONG_CSV_PATH)}.to raise_error(ArgumentError)
     end
+    it 'truncates the data that is already there' do
+      @dwh.create_table_from_csv_header(@random_table_name, CSV_PATH)
+      check_table_exists
+      check_cols
+      # load the data there
+      @dwh.load_data_from_csv(@random_table_name, CSV_PATH)
+      check_row_count([CSV_PATH])
+      # load the data there again, count should stay
+      @dwh.load_data_from_csv(@random_table_name, CSV_PATH2)
+      check_row_count([CSV_PATH2])
+    end
+    it "keeps the data that is there if append option passed" do
+      @dwh.create_table_from_csv_header(@random_table_name, CSV_PATH)
+      check_table_exists
+      check_cols
+      # load the data there
+      @dwh.load_data_from_csv(@random_table_name, CSV_PATH)
+      check_row_count([CSV_PATH])
+      # append the data
+      @dwh.load_data_from_csv(@random_table_name, CSV_PATH2, :append => true)
+      check_row_count([CSV_PATH, CSV_PATH2])
+    end
+  end
+  describe "#truncate_table" do
+    it "truncates the given table" do
+      @dwh.csv_to_new_table(@random_table_name, CSV_PATH)
+      @dwh.truncate_table(@random_table_name)
+      expect(@dwh.table_row_count(@random_table_name)).to eq 0
+    end
   end
   describe '#get_columns' do

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: gooddata_datawarehouse
 version: !ruby/object:Gem::Version
-  version: 0.0.6
+  version: 0.0.7
 platform: ruby
 authors:
 - Petr Cvengros
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-02-23 00:00:00.000000000 Z
+date: 2015-03-09 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   requirement: !ruby/object:Gem::Requirement
@@ -149,8 +149,10 @@ files:
 - lib/gooddata_datawarehouse/datawarehouse.rb
 - lib/gooddata_datawarehouse/sql_generator.rb
 - lib/gooddata_datawarehouse/version.rb
+- new-version.sh
 - spec/data/bike.csv
 - spec/data/bike2.csv
+- spec/data/emptyheader-bike.csv
 - spec/data/wrong-bike.csv
 - spec/datawarehouse_spec.rb
 - spec/spec_helper.rb
@@ -181,6 +183,7 @@ summary: Convenient work with GoodData's Datawarehouse (ADS)
 test_files:
 - spec/data/bike.csv
 - spec/data/bike2.csv
+- spec/data/emptyheader-bike.csv
 - spec/data/wrong-bike.csv
 - spec/datawarehouse_spec.rb
 - spec/spec_helper.rb