RubyGems - pgdexter - Versions diffs - 0.5.6 → 0.6.1 - Mend

pgdexter 0.5.6 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +12 -0
data/LICENSE.txt +1 -1
data/README.md +4 -14
data/lib/dexter/client.rb +46 -29
data/lib/dexter/collector.rb +11 -19
data/lib/dexter/column_resolver.rb +74 -0
data/lib/dexter/connection.rb +96 -0
data/lib/dexter/index_creator.rb +72 -0
data/lib/dexter/indexer.rb +175 -423
data/lib/dexter/logging.rb +1 -1
data/lib/dexter/parsers/csv_log_parser.rb +25 -0
data/lib/dexter/{json_log_parser.rb → parsers/json_log_parser.rb} +5 -3
data/lib/dexter/{log_parser.rb → parsers/log_parser.rb} +1 -8
data/lib/dexter/{sql_log_parser.rb → parsers/sql_log_parser.rb} +3 -2
data/lib/dexter/{stderr_log_parser.rb → parsers/stderr_log_parser.rb} +4 -8
data/lib/dexter/processor.rb +10 -24
data/lib/dexter/query.rb +14 -31
data/lib/dexter/sources/log_source.rb +26 -0
data/lib/dexter/{pg_stat_activity_parser.rb → sources/pg_stat_activity_source.rb} +10 -6
data/lib/dexter/sources/pg_stat_statements_source.rb +34 -0
data/lib/dexter/sources/statement_source.rb +11 -0
data/lib/dexter/table_resolver.rb +120 -0
data/lib/dexter/version.rb +1 -1
data/lib/dexter.rb +16 -7
data/lib/pgdexter.rb +1 -0
metadata +20 -12
data/lib/dexter/csv_log_parser.rb +0 -24

data/lib/dexter/indexer.rb CHANGED Viewed

@@ -2,77 +2,77 @@ module Dexter
   class Indexer
     include Logging
-    def initialize(options)
+    def initialize(connection:, **options)
+      @connection = connection
       @create = options[:create]
       @tablespace = options[:tablespace]
       @log_level = options[:log_level]
       @exclude_tables = options[:exclude]
       @include_tables = Array(options[:include].split(",")) if options[:include]
-      @log_sql = options[:log_sql]
       @log_explain = options[:log_explain]
-      @min_time = options[:min_time] || 0
-      @min_calls = options[:min_calls] || 0
       @analyze = options[:analyze]
+      @min_cost = options[:min_cost].to_i
       @min_cost_savings_pct = options[:min_cost_savings_pct].to_i
       @options = options
-      @mutex = Mutex.new
-      if server_version_num < 110000
-        raise Dexter::Abort, "This version of Dexter requires Postgres 11+"
-      end
-      check_extension
-      execute("SET lock_timeout = '5s'")
-    end
-    def process_stat_statements
-      queries = stat_statements.map { |q| Query.new(q) }.sort_by(&:fingerprint).group_by(&:fingerprint).map { |_, v| v.first }
-      log "Processing #{queries.size} new query fingerprints"
-      process_queries(queries)
+      @server_version_num = @connection.server_version_num
     end
+    # TODO recheck server version?
     def process_queries(queries)
-      # reset hypothetical indexes
-      reset_hypothetical_indexes
-      tables = Set.new(database_tables + materialized_views)
+      TableResolver.new(@connection, queries, log_level: @log_level).perform
+      candidate_queries = queries.reject(&:missing_tables)
-      # map tables without schema to schema
-      no_schema_tables = {}
-      search_path_index = Hash[search_path.map.with_index.to_a]
-      tables.group_by { |t| t.split(".")[-1] }.each do |group, t2|
-        no_schema_tables[group] = t2.sort_by { |t| [search_path_index[t.split(".")[0]] || 1000000, t] }[0]
+      tables = determine_tables(candidate_queries)
+      candidate_queries.each do |query|
+        query.candidate_tables = query.tables.select { |t| tables.include?(t) }.sort
       end
+      candidate_queries.select! { |q| q.candidate_tables.any? }
-      # add tables from views
-      view_tables = database_view_tables
-      view_tables.each do |v, vt|
-        view_tables[v] = vt.map { |t| no_schema_tables[t] || t }
-      end
+      if tables.any?
+        # analyze tables if needed
+        # TODO remove @log_level in 0.7.0
+        analyze_tables(tables) if @analyze || @log_level == "debug2"
+        # get initial costs for queries
+        reset_hypothetical_indexes
+        calculate_plan(candidate_queries)
+        candidate_queries.select! { |q| q.initial_cost && q.initial_cost >= @min_cost }
+        # find columns
+        ColumnResolver.new(@connection, candidate_queries, log_level: @log_level).perform
+        candidate_queries.each do |query|
+          # no reason to use btree index for json columns
+          # TODO check type supports btree
+          query.candidate_columns = query.columns.reject { |c| ["json", "jsonb", "point"].include?(c[:type]) }.sort_by { |c| [c[:table], c[:column]] }
+        end
+        candidate_queries.select! { |q| q.candidate_columns.any? }
-      # fully resolve tables
-      # make sure no views in result
-      view_tables.each do |v, vt|
-        view_tables[v] = vt.flat_map { |t| view_tables[t] || [t] }.uniq
+        # create hypothetical indexes and explain queries
+        batch_hypothetical_indexes(candidate_queries)
       end
-      # filter queries from other databases and system tables
-      queries.each do |query|
-        # add schema to table if needed
-        query.tables = query.tables.map { |t| no_schema_tables[t] || t }
+      # see if new indexes were used and meet bar
+      new_indexes = determine_indexes(queries, tables)
-        # substitute view tables
-        new_tables = query.tables.flat_map { |t| view_tables[t] || [t] }.uniq
-        query.tables_from_views = new_tables - query.tables
-        query.tables = new_tables
+      # display new indexes
+      show_new_indexes(new_indexes)
-        # check for missing tables
-        query.missing_tables = !query.tables.all? { |t| tables.include?(t) }
-      end
+      # display debug info
+      show_debug_info(new_indexes, queries) if @log_level.start_with?("debug")
+      # create new indexes
+      IndexCreator.new(@connection, self, new_indexes, @tablespace).perform if @create && new_indexes.any?
+    end
+    private
+    def reset_hypothetical_indexes
+      execute("SELECT hypopg_reset()")
+    end
+    def determine_tables(candidate_queries)
       # set tables
-      tables = Set.new(queries.reject(&:missing_tables).flat_map(&:tables))
+      tables = Set.new(candidate_queries.flat_map(&:tables))
       # must come after missing tables set
       if @include_tables
@@ -88,54 +88,10 @@ module Dexter
       # remove system tables
       tables.delete_if { |t| t.start_with?("information_schema.", "pg_catalog.") }
-      queries.each do |query|
-        query.candidate_tables = !query.missing_tables && query.tables.any? { |t| tables.include?(t) }
-      end
-      # analyze tables if needed
-      analyze_tables(tables) if tables.any? && (@analyze || @log_level == "debug2")
-      # create hypothetical indexes and explain queries
-      if tables.any?
-        # process in batches to prevent "hypopg: not more oid available" error
-        # https://hypopg.readthedocs.io/en/rel1_stable/usage.html#configuration
-        queries.select(&:candidate_tables).each_slice(500) do |batch|
-          create_hypothetical_indexes(batch)
-        end
-      end
-      # see if new indexes were used and meet bar
-      new_indexes = determine_indexes(queries, tables)
-      # display and create new indexes
-      show_and_create_indexes(new_indexes, queries)
-    end
-    private
-    def check_extension
-      extension = execute("SELECT installed_version FROM pg_available_extensions WHERE name = 'hypopg'").first
-      if extension.nil?
-        raise Dexter::Abort, "Install HypoPG first: https://github.com/ankane/dexter#installation"
-      end
-      if extension["installed_version"].nil?
-        if @options[:enable_hypopg]
-          execute("CREATE EXTENSION hypopg")
-        else
-          raise Dexter::Abort, "Run `CREATE EXTENSION hypopg` or pass --enable-hypopg"
-        end
-      end
+      tables
     end
-    def reset_hypothetical_indexes
-      execute("SELECT hypopg_reset()")
-    end
-    def analyze_tables(tables)
-      tables = tables.to_a.sort
+    def analyze_stats(tables)
       query = <<~SQL
         SELECT
           schemaname || '.' || relname AS table,
@@ -146,10 +102,14 @@ module Dexter
         WHERE
           schemaname || '.' || relname IN (#{tables.size.times.map { |i| "$#{i + 1}" }.join(", ")})
       SQL
-      analyze_stats = execute(query, params: tables.to_a)
+      execute(query, params: tables)
+    end
+    def analyze_tables(tables)
+      tables = tables.to_a.sort
       last_analyzed = {}
-      analyze_stats.each do |stats|
+      analyze_stats(tables).each do |stats|
         last_analyzed[stats["table"]] = Time.parse(stats["last_analyze"]) if stats["last_analyze"]
       end
@@ -162,7 +122,7 @@ module Dexter
         end
         if @analyze && (!la || la < Time.now - 3600)
-          statement = "ANALYZE #{quote_ident(table)}"
+          statement = "ANALYZE #{@connection.quote_ident(table)}"
           log "Running analyze: #{statement}"
           execute(statement)
         end
@@ -186,71 +146,83 @@ module Dexter
       end
     end
-    def create_hypothetical_indexes(queries)
-      candidates = {}
+    # process in batches to prevent "hypopg: not more oid available" error
+    # https://hypopg.readthedocs.io/en/rel1_stable/usage.html#configuration
+    def batch_hypothetical_indexes(candidate_queries)
+      batch_count = 0
+      batch = []
+      single_column_indexes = Set.new
+      multicolumn_indexes = Set.new
-      reset_hypothetical_indexes
+      # sort to improve batching
+      # TODO improve
+      candidate_queries.sort_by! { |q| q.candidate_columns.map { |c| [c[:table], c[:column]] } }
-      # get initial costs for queries
-      calculate_plan(queries)
-      explainable_queries = queries.select { |q| q.plans.any? && q.high_cost? }
+      candidate_queries.each do |query|
+        batch << query
-      # filter tables for performance
-      tables = Set.new(explainable_queries.flat_map(&:tables))
-      tables_from_views = Set.new(explainable_queries.flat_map(&:tables_from_views))
+        single_column_indexes.merge(query.candidate_columns)
-      if tables.any?
-        # since every set of multi-column indexes are expensive
-        # try to parse out columns
-        possible_columns = Set.new
-        explainable_queries.each do |query|
-          log "Finding columns: #{query.statement}" if @log_level == "debug3"
-          begin
-            find_columns(query.tree).each do |col|
-              last_col = col["fields"].last
-              if last_col["String"]
-                possible_columns << last_col["String"]["sval"]
-              end
-            end
-          rescue JSON::NestingError
-            if @log_level.start_with?("debug")
-              log colorize("ERROR: Cannot get columns", :red)
-            end
-          end
+        # TODO for multicolumn indexes, use ordering
+        columns_by_table = query.candidate_columns.group_by { |c| c[:table] }
+        columns_by_table.each do |_, columns|
+          multicolumn_indexes.merge(columns.permutation(2).to_a)
         end
-        # create hypothetical indexes
-        # use all columns in tables from views
-        columns_by_table = columns(tables).select { |c| possible_columns.include?(c[:column]) || tables_from_views.include?(c[:table]) }.group_by { |c| c[:table] }
+        if single_column_indexes.size + multicolumn_indexes.size >= 500
+          create_hypothetical_indexes(batch, single_column_indexes, multicolumn_indexes, batch_count)
+          batch_count += 1
+          batch.clear
+          single_column_indexes.clear
+          multicolumn_indexes.clear
+        end
+      end
-        # create single column indexes
-        create_hypothetical_indexes_helper(columns_by_table, 1, candidates)
+      if batch.any?
+        create_hypothetical_indexes(batch, single_column_indexes, multicolumn_indexes, batch_count)
+      end
+    end
-        # get next round of costs
-        calculate_plan(explainable_queries)
+    def create_candidate_indexes(candidate_indexes, index_mapping)
+      candidate_indexes.each do |columns|
+        begin
+          index_name = create_hypothetical_index(columns[0][:table], columns.map { |c| c[:column] })
+          index_mapping[index_name] = columns
+        rescue PG::UndefinedObject
+          # data type x has no default operator class for access method "btree"
+        end
+      end
+    rescue PG::InternalError
+      # hypopg: not more oid available
+      log colorize("WARNING: Limiting index candidates", :yellow) if @log_level == "debug2"
+    end
-        # create multicolumn indexes
-        create_hypothetical_indexes_helper(columns_by_table, 2, candidates)
+    def create_hypothetical_indexes(queries, single_column_indexes, multicolumn_indexes, batch_count)
+      index_mapping = {}
+      reset_hypothetical_indexes
-        # get next round of costs
-        calculate_plan(explainable_queries)
-      end
+      # check single column indexes
+      create_candidate_indexes(single_column_indexes.map { |c| [c] }, index_mapping)
+      calculate_plan(queries)
+      # check multicolumn indexes
+      create_candidate_indexes(multicolumn_indexes, index_mapping)
+      calculate_plan(queries)
+      # save index mapping for analysis
       queries.each do |query|
-        query.candidates = candidates
+        query.index_mapping = index_mapping
       end
-    end
-    def find_columns(plan)
-      plan = JSON.parse(plan.to_json, max_nesting: 1000)
-      find_by_key(plan, "ColumnRef")
+      # TODO different log level?
+      log "Batch #{batch_count + 1}: #{queries.size} queries, #{index_mapping.size} hypothetical indexes" if @log_level == "debug2"
     end
     def find_indexes(plan)
-      find_by_key(plan, "Index Name")
+      self.class.find_by_key(plan, "Index Name")
     end
-    def find_by_key(plan, key)
+    def self.find_by_key(plan, key)
       result = []
       queue = [plan]
       while queue.any?
@@ -271,16 +243,16 @@ module Dexter
       result
     end
-    def hypo_indexes_from_plan(index_name_to_columns, plan, index_set)
+    def hypo_indexes_from_plan(index_mapping, plan, index_set)
       query_indexes = []
       find_indexes(plan).uniq.sort.each do |index_name|
-        col_set = index_name_to_columns[index_name]
+        columns = index_mapping[index_name]
-        if col_set
+        if columns
           index = {
-            table: col_set[0][:table],
-            columns: col_set.map { |c| c[:column] }
+            table: columns[0][:table],
+            columns: columns.map { |c| c[:column] }
           }
           unless index_set.include?([index[:table], index[:columns]])
@@ -310,10 +282,10 @@ module Dexter
         end
       end
-      savings_ratio = (1 - @min_cost_savings_pct / 100.0)
+      savings_ratio = 1 - @min_cost_savings_pct / 100.0
       queries.each do |query|
-        if query.explainable? && query.high_cost?
+        if query.fully_analyzed?
           new_cost, new_cost2 = query.costs[1..2]
           cost_savings = new_cost < query.initial_cost * savings_ratio
@@ -322,11 +294,11 @@ module Dexter
           cost_savings2 = new_cost > 100 && new_cost2 < new_cost * savings_ratio
           key = cost_savings2 ? 2 : 1
-          query_indexes = hypo_indexes_from_plan(query.candidates, query.plans[key], index_set)
+          query_indexes = hypo_indexes_from_plan(query.index_mapping, query.plans[key], index_set)
           # likely a bad suggestion, so try single column
           if cost_savings2 && query_indexes.size > 1
-            query_indexes = hypo_indexes_from_plan(query.candidates, query.plans[1], index_set)
+            query_indexes = hypo_indexes_from_plan(query.index_mapping, query.plans[1], index_set)
             cost_savings2 = false
           end
@@ -348,7 +320,7 @@ module Dexter
             query_indexes.each do |query_index|
               reset_hypothetical_indexes
-              create_hypothetical_index(query_index[:table], query_index[:columns].map { |v| {column: v} })
+              create_hypothetical_index(query_index[:table], query_index[:columns])
               plan3 = plan(query.statement)
               cost3 = plan3["Total Cost"]
@@ -399,8 +371,8 @@ module Dexter
           # TODO optimize
           if @log_level.start_with?("debug")
-            query.pass1_indexes = hypo_indexes_from_plan(query.candidates, query.plans[1], index_set)
-            query.pass2_indexes = hypo_indexes_from_plan(query.candidates, query.plans[2], index_set)
+            query.pass1_indexes = hypo_indexes_from_plan(query.index_mapping, query.plans[1], index_set)
+            query.pass2_indexes = hypo_indexes_from_plan(query.index_mapping, query.plans[2], index_set)
           end
         end
       end
@@ -424,8 +396,7 @@ module Dexter
       end
     end
-    def show_and_create_indexes(new_indexes, queries)
-      # print summary
+    def show_new_indexes(new_indexes)
       if new_indexes.any?
         new_indexes.each do |index|
           log colorize("Index found: #{index[:table]} (#{index[:columns].join(", ")})", :green)
@@ -433,121 +404,56 @@ module Dexter
       else
         log "No new indexes found"
       end
+    end
-      # debug info
-      if @log_level.start_with?("debug")
-        index_queries = new_indexes.flat_map { |i| i[:queries].sort_by(&:fingerprint) }
-        if @log_level == "debug2"
-          fingerprints = Set.new(index_queries.map(&:fingerprint))
-          index_queries.concat(queries.reject { |q| fingerprints.include?(q.fingerprint) }.sort_by(&:fingerprint))
-        end
-        index_queries.each do |query|
-          log "-" * 80
-          log "Query #{query.fingerprint}"
-          log "Total time: #{(query.total_time / 60000.0).round(1)} min, avg time: #{(query.total_time / query.calls.to_f).round} ms, calls: #{query.calls}" if query.total_time
-          if query.fingerprint == "unknown"
-            log "Could not parse query"
-          elsif query.tables.empty?
-            log "No tables"
-          elsif query.missing_tables
-            log "Tables not present in current database"
-          elsif !query.candidate_tables
-            log "No candidate tables for indexes"
-          elsif query.explainable? && !query.high_cost?
-            log "Low initial cost: #{query.initial_cost}"
-          elsif query.explainable?
-            query_indexes = query.indexes || []
-            log "Start: #{query.costs[0]}"
-            log "Pass1: #{query.costs[1]} : #{log_indexes(query.pass1_indexes || [])}"
-            log "Pass2: #{query.costs[2]} : #{log_indexes(query.pass2_indexes || [])}"
-            if query.costs[3]
-              log "Pass3: #{query.costs[3]} : #{log_indexes(query.pass3_indexes || [])}"
-            end
-            log "Final: #{query.new_cost} : #{log_indexes(query.suggest_index ? query_indexes : [])}"
-            if (query.pass1_indexes.any? || query.pass2_indexes.any?) && !query.suggest_index
-              log "Need #{@min_cost_savings_pct}% cost savings to suggest index"
-            end
-          else
-            log "Could not run explain"
+    def show_debug_info(new_indexes, queries)
+      index_queries = new_indexes.flat_map { |i| i[:queries].sort_by(&:fingerprint) }
+      if @log_level == "debug2"
+        fingerprints = Set.new(index_queries.map(&:fingerprint))
+        index_queries.concat(queries.reject { |q| fingerprints.include?(q.fingerprint) }.sort_by(&:fingerprint))
+      end
+      index_queries.each do |query|
+        log "-" * 80
+        log "Query #{query.fingerprint}"
+        log "Total time: #{(query.total_time / 60000.0).round(1)} min, avg time: #{(query.total_time / query.calls.to_f).round} ms, calls: #{query.calls}" if query.calls > 0
+        if query.fingerprint == "unknown"
+          log "Could not parse query"
+        elsif query.tables.empty?
+          log "No tables"
+        elsif query.missing_tables
+          log "Tables not present in current database"
+        elsif query.candidate_tables.empty?
+          log "No candidate tables for indexes"
+        elsif !query.initial_cost
+          log "Could not run explain"
+        elsif query.initial_cost < @min_cost
+          log "Low initial cost: #{query.initial_cost}"
+        elsif query.candidate_columns.empty?
+          log "No candidate columns for indexes"
+        elsif query.fully_analyzed?
+          query_indexes = query.indexes || []
+          log "Start: #{query.costs[0]}"
+          log "Pass1: #{query.costs[1]} : #{log_indexes(query.pass1_indexes || [])}"
+          log "Pass2: #{query.costs[2]} : #{log_indexes(query.pass2_indexes || [])}"
+          if query.costs[3]
+            log "Pass3: #{query.costs[3]} : #{log_indexes(query.pass3_indexes || [])}"
           end
-          log
-          log query.statement
-          log
-        end
-      end
-      # create
-      if @create && new_indexes.any?
-        # 1. create lock
-        # 2. refresh existing index list
-        # 3. create indexes that still don't exist
-        # 4. release lock
-        with_advisory_lock do
-          new_indexes.each do |index|
-            unless index_exists?(index)
-              statement = String.new("CREATE INDEX CONCURRENTLY ON #{quote_ident(index[:table])} (#{index[:columns].map { |c| quote_ident(c) }.join(", ")})")
-              statement << " TABLESPACE #{quote_ident(@tablespace)}" if @tablespace
-              log "Creating index: #{statement}"
-              started_at = monotonic_time
-              begin
-                execute(statement)
-                log "Index created: #{((monotonic_time - started_at) * 1000).to_i} ms"
-              rescue PG::LockNotAvailable
-                log "Could not acquire lock: #{index[:table]}"
-              end
-            end
+          log "Final: #{query.new_cost} : #{log_indexes(query.suggest_index ? query_indexes : [])}"
+          if (query.pass1_indexes.any? || query.pass2_indexes.any?) && !query.suggest_index
+            log "Need #{@min_cost_savings_pct}% cost savings to suggest index"
           end
-        end
-      end
-      new_indexes
-    end
-    def monotonic_time
-      Process.clock_gettime(Process::CLOCK_MONOTONIC)
-    end
-    def conn
-      @conn ||= begin
-        # set connect timeout if none set
-        ENV["PGCONNECT_TIMEOUT"] ||= "3"
-        if @options[:dbname].start_with?("postgres://", "postgresql://")
-          config = @options[:dbname]
         else
-          config = {
-            host: @options[:host],
-            port: @options[:port],
-            dbname: @options[:dbname],
-            user: @options[:username]
-          }.reject { |_, value| value.to_s.empty? }
-          config = config[:dbname] if config.keys == [:dbname] && config[:dbname].include?("=")
+          log "Could not run explain"
         end
-        PG::Connection.new(config)
+        log
+        log query.statement
+        log
       end
-    rescue PG::ConnectionBad => e
-      raise Dexter::Abort, e.message
     end
-    def execute(query, pretty: true, params: [], use_exec: false)
-      # use exec_params instead of exec when possible for security
-      #
-      # Unlike PQexec, PQexecParams allows at most one SQL command in the given string.
-      # (There can be semicolons in it, but not more than one nonempty command.)
-      # This is a limitation of the underlying protocol, but has some usefulness
-      # as an extra defense against SQL-injection attacks.
-      # https://www.postgresql.org/docs/current/static/libpq-exec.html
-      query = squish(query) if pretty
-      log colorize("[sql] #{query}#{params.any? ? " /*#{params.to_json}*/" : ""}", :cyan) if @log_sql
-      @mutex.synchronize do
-        if use_exec
-          conn.exec("#{query} /*dexter*/").to_a
-        else
-          conn.exec_params("#{query} /*dexter*/", params).to_a
-        end
-      end
+    def execute(...)
+      @connection.execute(...)
     end
     def plan(query)
@@ -557,7 +463,7 @@ module Dexter
       # try to EXPLAIN normalized queries
       # https://dev.to/yugabyte/explain-from-pgstatstatements-normalized-queries-how-to-always-get-the-generic-plan-in--5cfi
       normalized = query.include?("$1")
-      generic_plan = normalized && server_version_num >= 160000
+      generic_plan = normalized && @server_version_num >= 160000
       explain_normalized = normalized && !generic_plan
       if explain_normalized
         prepared_name = "dexter_prepared"
@@ -569,16 +475,7 @@ module Dexter
         execute("BEGIN")
         transaction = true
-        if server_version_num >= 120000
-          execute("SET LOCAL plan_cache_mode = force_generic_plan")
-        else
-          execute("SET LOCAL cpu_operator_cost = 1e42")
-          5.times do
-            execute("EXPLAIN (FORMAT JSON) #{safe_statement(query)}", pretty: false)
-          end
-          execute("ROLLBACK")
-          execute("BEGIN")
-        end
+        execute("SET LOCAL plan_cache_mode = force_generic_plan")
       end
       explain_prefix = generic_plan ? "GENERIC_PLAN, " : ""
@@ -599,140 +496,8 @@ module Dexter
       end
     end
-    # TODO for multicolumn indexes, use ordering
-    def create_hypothetical_indexes_helper(columns_by_table, n, candidates)
-      columns_by_table.each do |table, cols|
-        # no reason to use btree index for json columns
-        cols.reject { |c| ["json", "jsonb"].include?(c[:type]) }.permutation(n) do |col_set|
-          index_name = create_hypothetical_index(table, col_set)
-          candidates[index_name] = col_set
-        end
-      end
-    end
-    def create_hypothetical_index(table, col_set)
-      execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{quote_ident(table)} (#{col_set.map { |c| quote_ident(c[:column])  }.join(", ")})')").first["indexname"]
-    end
-    def database_tables
-      result = execute <<~SQL
-        SELECT
-          table_schema || '.' || table_name AS table_name
-        FROM
-          information_schema.tables
-        WHERE
-          table_catalog = current_database()
-          AND table_type IN ('BASE TABLE', 'VIEW')
-      SQL
-      result.map { |r| r["table_name"] }
-    end
-    def materialized_views
-      result = execute <<~SQL
-        SELECT
-          schemaname || '.' || matviewname AS table_name
-        FROM
-          pg_matviews
-      SQL
-      result.map { |r| r["table_name"] }
-    end
-    def server_version_num
-      execute("SHOW server_version_num").first["server_version_num"].to_i
-    end
-    def database_view_tables
-      result = execute <<~SQL
-        SELECT
-          schemaname || '.' || viewname AS table_name,
-          definition
-        FROM
-          pg_views
-        WHERE
-          schemaname NOT IN ('information_schema', 'pg_catalog')
-      SQL
-      view_tables = {}
-      result.each do |row|
-        begin
-          view_tables[row["table_name"]] = PgQuery.parse(row["definition"]).tables
-        rescue PgQuery::ParseError
-          if @log_level.start_with?("debug")
-            log colorize("ERROR: Cannot parse view definition: #{row["table_name"]}", :red)
-          end
-        end
-      end
-      view_tables
-    end
-    def stat_statements
-      total_time = server_version_num >= 130000 ? "(total_plan_time + total_exec_time)" : "total_time"
-      sql = <<~SQL
-        SELECT
-          DISTINCT query
-        FROM
-          pg_stat_statements
-        INNER JOIN
-          pg_database ON pg_database.oid = pg_stat_statements.dbid
-        WHERE
-          datname = current_database()
-          AND #{total_time} >= \$1
-          AND calls >= \$2
-        ORDER BY
-          1
-      SQL
-      execute(sql, params: [@min_time * 60000, @min_calls.to_i]).map { |q| q["query"] }
-    end
-    def with_advisory_lock
-      lock_id = 123456
-      first_time = true
-      while execute("SELECT pg_try_advisory_lock($1)", params: [lock_id]).first["pg_try_advisory_lock"] != "t"
-        if first_time
-          log "Waiting for lock..."
-          first_time = false
-        end
-        sleep(1)
-      end
-      yield
-    ensure
-      suppress_messages do
-        execute("SELECT pg_advisory_unlock($1)", params: [lock_id])
-      end
-    end
-    def suppress_messages
-      conn.set_notice_processor do |message|
-        # do nothing
-      end
-      yield
-    ensure
-      # clear notice processor
-      conn.set_notice_processor
-    end
-    def index_exists?(index)
-      indexes([index[:table]]).find { |i| i["columns"] == index[:columns] }
-    end
-    def columns(tables)
-      query = <<~SQL
-        SELECT
-          s.nspname || '.' || t.relname AS table_name,
-          a.attname AS column_name,
-          pg_catalog.format_type(a.atttypid, a.atttypmod) AS data_type
-        FROM pg_attribute a
-          JOIN pg_class t on a.attrelid = t.oid
-          JOIN pg_namespace s on t.relnamespace = s.oid
-        WHERE a.attnum > 0
-          AND NOT a.attisdropped
-          AND s.nspname || '.' || t.relname IN (#{tables.size.times.map { |i| "$#{i + 1}" }.join(", ")})
-        ORDER BY
-          1, 2
-      SQL
-      columns = execute(query, params: tables.to_a)
-      columns.map { |v| {table: v["table_name"], column: v["column_name"], type: v["data_type"]} }
+    def create_hypothetical_index(table, columns)
+      execute("SELECT * FROM hypopg_create_index('CREATE INDEX ON #{@connection.quote_ident(table)} (#{columns.map { |c| @connection.quote_ident(c) }.join(", ")})')").first["indexname"]
     end
     def indexes(tables)
@@ -761,10 +526,6 @@ module Dexter
       execute(query, params: tables.to_a).map { |v| v["columns"] = v["columns"].sub(") WHERE (", " WHERE ").split(", ").map { |c| unquote(c) }; v }
     end
-    def search_path
-      execute("SELECT current_schemas(true)")[0]["current_schemas"][1..-2].split(",")
-    end
     def unquote(part)
       if part && part.start_with?('"') && part.end_with?('"')
         part[1..-2]
@@ -773,15 +534,6 @@ module Dexter
       end
     end
-    def quote_ident(value)
-      value.split(".").map { |v| conn.quote_ident(v) }.join(".")
-    end
-    # from activesupport
-    def squish(str)
-      str.to_s.gsub(/\A[[:space:]]+/, "").gsub(/[[:space:]]+\z/, "").gsub(/[[:space:]]+/, " ")
-    end
     def safe_statement(statement)
       statement.gsub(";", "")
     end