RubyGems - superset - Versions diffs - 0.1.6 → 0.2.5 - Mend

superset 0.1.6 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +36 -0
data/README.md +36 -144
data/doc/duplicate_dashboards.md +2 -5
data/doc/migrating_dashboards_across_environments.md +173 -0
data/doc/publishing.md +39 -0
data/doc/setting_up_personal_api_credentials.md +43 -7
data/doc/usage.md +105 -0
data/env.sample +1 -1
data/lib/superset/base_put_request.rb +30 -0
data/lib/superset/chart/create.rb +40 -0
data/lib/superset/chart/duplicate.rb +75 -0
data/lib/superset/chart/put.rb +18 -0
data/lib/superset/chart/update_dataset.rb +1 -1
data/lib/superset/client.rb +7 -1
data/lib/superset/dashboard/bulk_delete_cascade.rb +1 -1
data/lib/superset/dashboard/compare.rb +2 -2
data/lib/superset/dashboard/datasets/list.rb +37 -9
data/lib/superset/dashboard/embedded/get.rb +2 -2
data/lib/superset/dashboard/export.rb +56 -5
data/lib/superset/dashboard/get.rb +5 -0
data/lib/superset/dashboard/import.rb +84 -0
data/lib/superset/dashboard/list.rb +8 -4
data/lib/superset/dashboard/warm_up_cache.rb +1 -1
data/lib/superset/database/export.rb +119 -0
data/lib/superset/database/list.rb +5 -2
data/lib/superset/dataset/get.rb +10 -11
data/lib/superset/dataset/list.rb +1 -1
data/lib/superset/dataset/put.rb +18 -0
data/lib/superset/dataset/refresh.rb +38 -0
data/lib/superset/dataset/update_schema.rb +4 -3
data/lib/superset/file_utilities.rb +4 -3
data/lib/superset/guest_token.rb +14 -7
data/lib/superset/logger.rb +2 -2
data/lib/superset/request.rb +7 -4
data/lib/superset/services/dashboard_loader.rb +69 -0
data/lib/superset/services/duplicate_dashboard.rb +14 -13
data/lib/superset/services/import_dashboard_across_environment.rb +144 -0
data/lib/superset/version.rb +1 -1
metadata +16 -3

data/lib/superset/dashboard/list.rb CHANGED Viewed

@@ -5,12 +5,14 @@
 module Superset
   module Dashboard
     class List < Superset::Request
-      attr_reader :title_contains, :tags_equal, :ids_not_in
+      attr_reader :title_contains, :title_equals, :tags_equal, :ids_not_in, :include_filter_dataset_schemas
-      def initialize(page_num: 0, title_contains: '', tags_equal: [], ids_not_in: [])
+      def initialize(page_num: 0, title_contains: '', title_equals: '', tags_equal: [], ids_not_in: [], include_filter_dataset_schemas: false)
         @title_contains = title_contains
+        @title_equals = title_equals
         @tags_equal = tags_equal
         @ids_not_in = ids_not_in
+        @include_filter_dataset_schemas = include_filter_dataset_schemas
         super(page_num: page_num)
       end
@@ -34,7 +36,7 @@ module Superset
       end
       def retrieve_schemas(id)
-        { schemas: Datasets::List.new(id).schemas }
+        { schemas: Datasets::List.new(dashboard_id: id, include_filter_datasets: include_filter_dataset_schemas).schemas }
       rescue StandardError => e
         # within Superset, a bug exists around deleting dashboards failing and the corrupting datasets configs, so handle errored datasets gracefully
         # ref NEP-17532
@@ -42,7 +44,7 @@ module Superset
       end
       def retrieve_embedded_details(id)
-        embedded_dashboard = Dashboard::Embedded::Get.new(id)
+        embedded_dashboard = Dashboard::Embedded::Get.new(dashboard_id: id)
         { allowed_embedded_domains: embedded_dashboard.allowed_domains,
           uuid: embedded_dashboard.uuid,}
       end
@@ -69,6 +71,7 @@ module Superset
         # TODO filtering across all list classes can be refactored to support multiple options in a more flexible way
         filter_set = []
         filter_set << "(col:dashboard_title,opr:ct,value:'#{title_contains}')" if title_contains.present?
+        filter_set << "(col:dashboard_title,opr:eq,value:'#{title_equals}')" if title_equals.present?
         filter_set << tag_filters if tags_equal.present?
         filter_set << ids_not_in_filters if ids_not_in.present?
         unless filter_set.empty?
@@ -90,6 +93,7 @@ module Superset
       def validate_constructor_args
         raise InvalidParameterError, "title_contains must be a String type" unless title_contains.is_a?(String)
+        raise InvalidParameterError, "title_equals must be a String type" unless title_equals.is_a?(String)
         raise InvalidParameterError, "tags_equal must be an Array type" unless tags_equal.is_a?(Array)
         raise InvalidParameterError, "tags_equal array must contain string only values" unless tags_equal.all? { |item| item.is_a?(String) }
         raise InvalidParameterError, "ids_not_in must be an Array type" unless ids_not_in.is_a?(Array)

data/lib/superset/dashboard/warm_up_cache.rb CHANGED Viewed

@@ -35,7 +35,7 @@ module Superset
       end
       def fetch_dataset_details(dashboard_id)
-        Superset::Dashboard::Datasets::List.new(dashboard_id).datasets_details.map { |dataset| dataset['database'].slice('name').merge(dataset.slice('datasource_name'))}
+        Superset::Dashboard::Datasets::List.new(dashboard_id: dashboard_id).datasets_details.map { |dataset| dataset['database'].slice('name').merge(dataset.slice('datasource_name'))}
       end
     end
   end

data/lib/superset/database/export.rb ADDED Viewed

@@ -0,0 +1,119 @@
+# Will export the Database zip file to /tmp/superset_database_exports with zip filename adjusted to include the database_id
+# Example zipfile: dashboard_#{database_id}_export_#{datestamp}.zip
+#
+# File will then be unziped and all files copied into the destination_path with the database_id as a subfolder
+# Optianally remove the dataset yaml files from the export
+#
+# Usage
+# Superset::Database::Export.new(database_id: 1, destination_path: '/tmp/superset_database_exports/').perform
+# Superset::Database::Export.new(database_id: 1, destination_path: '/tmp/superset_database_exports/', remove_dataset_yamls: true).perform
+#
+require 'superset/file_utilities'
+module Superset
+  module Database
+    class Export < Request
+      include FileUtilities
+      TMP_SUPERSET_DATABASE_PATH = '/tmp/superset_database_exports'.freeze
+      attr_reader :database_id, :destination_path, :remove_dataset_yamls
+      def initialize(database_id: , destination_path: , remove_dataset_yamls: true)
+        @database_id = database_id
+        @destination_path = destination_path.chomp('/')
+        @remove_dataset_yamls = remove_dataset_yamls
+      end
+      def perform
+        create_tmp_dir
+        save_exported_zip_file
+        unzip_files
+        copy_export_files_to_destination_path
+        Dir.glob("#{destination_path_with_db_id}/databases/*")
+      end
+      def response
+        @response ||= client.call(
+          :get,
+          client.url(route),
+          client.param_check(params)
+        )
+      end
+      def exported_zip_path
+        @exported_zip_path ||= "#{tmp_uniq_database_path}/database_#{database_id}_export_#{datestamp}.zip"
+      end
+      private
+      def params
+        # The Swagger API interface indicates this endpoint should take an array of integers
+        # however this does not work within the Swagger interface or when testing the API
+        # Investigating the Superset GUI with Dev Tools shows that the format below is used
+        { "q": "!(#{database_id})" }
+      end
+      def save_exported_zip_file
+        File.open(exported_zip_path, 'wb') { |fp| fp.write(response.body) }
+      end
+      def unzip_files
+        @extracted_files = unzip_file(exported_zip_path, tmp_uniq_database_path)
+        remove_dataset_yaml_files if remove_dataset_yamls
+      end
+      def download_folder
+        File.dirname(extracted_files[0])
+      end
+      def destination_path_with_db_id
+        @destination_path_with_db_id ||= File.join(destination_path, database_id.to_s)
+      end
+      def copy_export_files_to_destination_path
+        FileUtils.mkdir_p(destination_path_with_db_id) unless File.directory?(destination_path_with_db_id)
+        Dir.glob("#{download_folder}/*").each do |item|
+          FileUtils.cp_r(item, destination_path_with_db_id)
+        end
+      end
+      def remove_dataset_yaml_files
+        datasets_directories = Dir.glob( File.join(tmp_uniq_database_path, '/*/datasets') )
+        datasets_directories.each do |directory|
+          FileUtils.rm_rf(directory) if Dir.exist?(directory)
+        end
+      end
+      def create_tmp_dir
+        FileUtils.mkdir_p(tmp_uniq_database_path) unless File.directory?(tmp_uniq_database_path)
+      end
+      def tmp_uniq_database_path
+        @tmp_uniq_database_path ||= File.join(TMP_SUPERSET_DATABASE_PATH, uuid)
+      end
+      def uuid
+        SecureRandom.uuid
+      end
+      def extracted_files
+        @extracted_files ||= []
+      end
+      def route
+        "database/export/"
+      end
+      def datestamp
+        @datestamp ||= Time.now.strftime('%Y%m%d')
+      end
+    end
+  end
+end

data/lib/superset/database/list.rb CHANGED Viewed

@@ -4,10 +4,11 @@
 module Superset
   module Database
     class List < Superset::Request
-      attr_reader :title_contains
+      attr_reader :title_contains, :uuid_equals
-      def initialize(page_num: 0, title_contains: '')
+      def initialize(page_num: 0, title_contains: '', uuid_equals: '')
         @title_contains = title_contains
+        @uuid_equals = uuid_equals
         super(page_num: page_num)
       end
@@ -34,6 +35,7 @@ module Superset
         # TODO filtering across all list classes can be refactored to support multiple options in a more flexible way
         filter_set = []
         filter_set << "(col:database_name,opr:ct,value:'#{title_contains}')" if title_contains.present?
+        filter_set << "(col:uuid,opr:eq,value:'#{uuid_equals}')" if uuid_equals.present?
         unless filter_set.empty?
           "filters:!(" + filter_set.join(',') + "),"
         end
@@ -45,6 +47,7 @@ module Superset
       def validate_constructor_args
         raise InvalidParameterError, "title_contains must be a String type" unless title_contains.is_a?(String)
+        raise InvalidParameterError, "uuid_equals must be a String type" unless uuid_equals.is_a?(String)
       end
     end
   end

data/lib/superset/dataset/get.rb CHANGED Viewed

@@ -29,17 +29,6 @@ module Superset
         result['name']
       end
-      private
-      def route
-        "dataset/#{id}"
-      end
-      def display_headers
-        %w[title schema database_name, database_id]
-      end
       def database_name
         result['database']['database_name']
       end
@@ -51,6 +40,16 @@ module Superset
       def sql
         ['sql']
       end
+      private
+      def route
+        "dataset/#{id}"
+      end
+      def display_headers
+        %w[title schema database_name, database_id]
+      end
     end
   end
 end

data/lib/superset/dataset/list.rb CHANGED Viewed

@@ -34,7 +34,7 @@ module Superset
       end
       def list_attributes
-        ['id', 'table_name', 'schema', 'changed_by_name']
+        ['id', 'table_name', 'database', 'schema', 'changed_by_name']
       end
     end
   end

data/lib/superset/dataset/put.rb ADDED Viewed

@@ -0,0 +1,18 @@
+# Updates a dataset in Superset with the given params
+#
+# Usage:
+# params = { owners: [ 58, 3 ] }
+# Superset::Dataset::Put.new(object_id: 101, params: params ).perform
+module Superset
+  module Dataset
+    class Put < Superset::BasePutRequest
+      private
+      def route
+        "dataset/#{object_id}"
+      end
+    end
+  end
+end

data/lib/superset/dataset/refresh.rb ADDED Viewed

@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+# Description: This endpoint has the same functionality as 'Sync Columns from Source' button in the Superset UI on a dataset.
+# Executes the dataset against the source to confirm the query runs and then sync and cache dataset columns.
+# NOTICE: only owners of the dataset can refresh it
+#
+# Usage: Superset::Dataset::Refresh.call(id)
+module Superset
+  module Dataset
+    class Refresh < Superset::Request
+      attr_reader :id
+      def initialize(id)
+        @id = id
+      end
+      def self.call(id)
+        self.new(id).perform
+      end
+      def perform
+        response
+      end
+      def response
+        @response ||= client.put(route)
+      end
+      private
+      def route
+        "dataset/#{id}/refresh"
+      end
+    end
+  end
+end

data/lib/superset/dataset/update_schema.rb CHANGED Viewed

@@ -68,9 +68,10 @@ module Superset
       def validate_proposed_changes
         logger.info "    Validating Dataset ID: #{source_dataset_id} schema update to #{target_schema} on Database: #{target_database_id}"
-        raise "Error: source_dataset_id integer is required"  unless source_dataset_id.present? && source_dataset_id.is_a?(Integer)
-        raise "Error: target_database_id integer is required" unless target_database_id.present? && target_database_id.is_a?(Integer)
-        raise "Error: target_schema string is required"       unless target_schema.present? && target_schema.is_a?(String)
+        raise "Error: source_dataset_id integer is required"    unless source_dataset_id.present? && source_dataset_id.is_a?(Integer)
+        raise "Error: target_database_id integer is required"   unless target_database_id.present? && target_database_id.is_a?(Integer)
+        raise "Error: target_schema string is required"         unless target_schema.present? && target_schema.is_a?(String)
+        raise "Error: schema must be set on the source dataset" unless source_dataset['schema'].present? # required for validating sql_query_includes_hard_coded_schema
         # confirm the dataset exist? ... no need as the load_source_dataset method will raise an error if the dataset does not exist

data/lib/superset/file_utilities.rb CHANGED Viewed

@@ -9,11 +9,12 @@ module Superset
           entry_path = File.join(destination, entry.name)
           entries << entry_path
           FileUtils.mkdir_p(File.dirname(entry_path))
-          zip.extract(entry, entry_path)
+          zip.extract(entry, entry_path) unless File.exist?(entry_path)
         end
       end
-      puts entries
-      entries # return array of extracted files
+      entries
     end
   end
 end

data/lib/superset/guest_token.rb CHANGED Viewed

@@ -2,14 +2,16 @@ module Superset
   class GuestToken
     include Credential::EmbeddedUser
-    attr_accessor :embedded_dashboard_id, :current_user
+    attr_accessor :embedded_dashboard_id, :rls_clause, :additional_params
-    def initialize(embedded_dashboard_id: , current_user: nil)
+    def initialize(embedded_dashboard_id:, rls_clause: [], **additional_params)
       @embedded_dashboard_id = embedded_dashboard_id
-      @current_user = current_user
+      @rls_clause = rls_clause
+      @additional_params = additional_params
     end
     def guest_token
+      validate_params
       response_body['token']
     end
@@ -20,18 +22,23 @@ module Superset
             "id": embedded_dashboard_id.to_s,
             "type": "dashboard" }
         ],
-        "rls": [],
+        "rls": rls_clause, # Ex: [{ "clause": "publisher = 'Nintendo'" }]
         "user": current_user_params
-      }
+      }.merge(additional_params)
     end
     private
+    def validate_params
+      raise Superset::Request::InvalidParameterError, "rls_clause should be an array. But it is #{rls_clause.class}" if rls_clause.nil? || rls_clause.class != Array
+    end
     # optional param to be available in Superset for query templating using jinja
     # ss expects username .. which could be used to query as current_user.id
     def current_user_params
-      if current_user
-        { "username": current_user.id.to_s }
+      current_user_id = additional_params[:embedded_app_current_user_id]
+      if current_user_id
+        { "username": current_user_id.to_s }
       else
         { }
       end

data/lib/superset/logger.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 module Superset
   class Logger
     def info(msg)
       # puts msg   # allow logs to console
       logger.info msg
@@ -17,4 +17,4 @@ module Superset
       end
     end
   end
-end
+end

data/lib/superset/request.rb CHANGED Viewed

@@ -5,7 +5,6 @@ module Superset
     class InvalidParameterError < StandardError; end
     class ValidationError < StandardError; end
     PAGE_SIZE = 100
     attr_accessor :page_num
@@ -43,8 +42,12 @@ module Superset
       raise NotImplementedError.new("You must implement route.")
     end
-    def client
-      @client ||= Superset::Client.new
+    def client(use_json: true)
+      @client ||= begin
+        c = Superset::Client.new
+        c.config.use_json = use_json
+        c
+      end
     end
     def pagination
@@ -59,4 +62,4 @@ module Superset
       @logger ||= Superset::Logger.new
     end
   end
-end
+end

data/lib/superset/services/dashboard_loader.rb ADDED Viewed

@@ -0,0 +1,69 @@
+# Given a path, load all yaml files
+require 'superset/file_utilities'
+require 'yaml'
+module Superset
+  module Services
+    class DashboardLoader
+      include FileUtilities
+      TMP_PATH = '/tmp/superset_dashboard_imports'.freeze
+      attr_reader :dashboard_export_zip
+      def initialize(dashboard_export_zip:)
+        @dashboard_export_zip = dashboard_export_zip
+      end
+      def perform
+        unzip_source_file
+        dashboard_config
+      end
+      def dashboard_config
+        @dashboard_config ||= DashboardConfig.new(
+                                dashboard_export_zip:    dashboard_export_zip,
+                                tmp_uniq_dashboard_path: tmp_uniq_dashboard_path).config
+      end
+      private
+      def unzip_source_file
+        @extracted_files = unzip_file(dashboard_export_zip, tmp_uniq_dashboard_path)
+      end
+      def tmp_uniq_dashboard_path
+        @tmp_uniq_dashboard_path ||= File.join(TMP_PATH, uuid)
+      end
+      def uuid
+        SecureRandom.uuid
+      end
+      class DashboardConfig < ::OpenStruct
+        def config
+            {
+              tmp_uniq_dashboard_path: tmp_uniq_dashboard_path,
+              dashboards: load_yamls_for('dashboards'),
+              databases:  load_yamls_for('databases'),
+              datasets:   load_yamls_for('datasets'),
+              charts:     load_yamls_for('charts'),
+              metadata:   load_yamls_for('metadata.yaml', pattern_sufix: nil),
+            }
+        end
+        def load_yamls_for(object_path, pattern_sufix: '**/*.yaml')
+          pattern = File.join([tmp_uniq_dashboard_path, '**', object_path, pattern_sufix].compact)
+          Dir.glob(pattern).map do |file|
+            { filename: file, content: load_yaml_and_symbolize_keys(file) } if File.file?(file)
+          end.compact
+        end
+        def load_yaml_and_symbolize_keys(path)
+          YAML.load_file(path).deep_symbolize_keys
+        end
+      end
+    end
+  end
+end

data/lib/superset/services/duplicate_dashboard.rb CHANGED Viewed

@@ -93,13 +93,18 @@ module Superset
           # duplicate the dataset, renaming to use of suffix as the target_schema
           # reason: there is a bug(or feature) in the SS API where a dataset name must be uniq when duplicating.
           # (note however renaming in the GUI to a dup name works fine)
-          new_dataset_id = Superset::Dataset::Duplicate.new(source_dataset_id: dataset[:id], new_dataset_name: "#{dataset[:datasource_name]}-#{target_schema}").perform
+          new_dataset_name = "#{dataset[:datasource_name]}-#{target_schema}"
+          existing_datasets = Superset::Dataset::List.new(title_equals: new_dataset_name, schema_equals: target_schema).result
+          if existing_datasets.any?
+            logger.info "Dataset #{existing_datasets[0]["table_name"]} already exists. Reusing it"
+            new_dataset_id = existing_datasets[0]["id"] # assuming that we do not name multiple datasets with same name in a single schema
+          else
+            new_dataset_id = Superset::Dataset::Duplicate.new(source_dataset_id: dataset[:id], new_dataset_name: new_dataset_name).perform
+            # update the new dataset with the target schema and target database
+            Superset::Dataset::UpdateSchema.new(source_dataset_id: new_dataset_id, target_database_id: target_database_id, target_schema: target_schema).perform
+          end
           # keep track of the previous dataset and the matching new dataset_id
           dataset_duplication_tracker <<  { source_dataset_id: dataset[:id], new_dataset_id: new_dataset_id }
-          # update the new dataset with the target schema and target database
-          Superset::Dataset::UpdateSchema.new(source_dataset_id: new_dataset_id, target_database_id: target_database_id, target_schema: target_schema).perform
         end
       end
@@ -179,7 +184,7 @@ module Superset
       # retrieve the datasets that will be duplicated
       def source_dashboard_datasets
-        @source_dashboard_datasets ||= Superset::Dashboard::Datasets::List.new(source_dashboard_id).datasets_details
+        @source_dashboard_datasets ||= Superset::Dashboard::Datasets::List.new(dashboard_id: source_dashboard_id, include_filter_datasets: true).datasets_details
       rescue => e
         raise "Unable to retrieve datasets for source dashboard #{source_dashboard_id}: #{e.message}"
       end
@@ -199,7 +204,7 @@ module Superset
         raise ValidationError, "The source dashboard datasets are required to point to one schema only. Actual schema list is #{source_dashboard_schemas.join(',')}" if source_dashboard_has_more_than_one_schema?
         raise ValidationError, "One or more source dashboard filters point to a different schema than the dashboard charts. Identified Unpermittied Filter Dataset Ids are #{unpermitted_filter_dataset_ids.to_s}" if unpermitted_filter_dataset_ids.any?
-        # new dataset validations
+        # new dataset validations - Need to be commented for EU dashboard duplication as we are using the existing datasets for the new dashboard
         raise ValidationError, "DATASET NAME CONFLICT: The Target Schema #{target_schema} already has existing datasets named: #{target_schema_matching_dataset_names.join(',')}" unless target_schema_matching_dataset_names.empty?
         validate_source_dashboard_datasets_sql_does_not_hard_code_schema
@@ -241,7 +246,7 @@ module Superset
       # here we will need to decide if we want to use the existing dataset or not see NEP-????
       # for now we will exit with an error if we find any existing datasets of the same name
       def target_schema_matching_dataset_names
-        source_dashboard_dataset_names.map do |source_dataset_name|
+        @target_schema_matching_dataset_names ||= source_dashboard_dataset_names.map do |source_dataset_name|
           existing_names = Superset::Dataset::List.new(title_contains: source_dataset_name, schema_equals: target_schema).result.map{|t|t['table_name']}.uniq # contains match to cover with suffix as well
           unless existing_names.flatten.empty?
             logger.error "  HALTING PROCESS: Schema #{target_schema} already has Dataset called #{existing_names}"
@@ -255,11 +260,7 @@ module Superset
       end
       def source_dashboard_filter_dataset_ids
-        filters_configuration = JSON.parse(source_dashboard.result['json_metadata'])['native_filter_configuration'] || []
-        return Array.new unless filters_configuration && filters_configuration.any?
-        # pull only the filters dataset ids from the dashboard
-        filters_configuration.map { |c| c['targets'] }.flatten.compact.map { |c| c['datasetId'] }.flatten.compact
+        @filter_dataset_ids ||= source_dashboard.filter_configuration.map { |c| c['targets'] }.flatten.compact.map { |c| c['datasetId'] }.flatten.compact.uniq
       end
       # Primary Assumption is that all charts datasets on the source dashboard are pointing to the same database schema