RubyGems - pgslice - Versions diffs - 0.1.5 → 0.1.6 - Mend

pgslice 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 02a6779afe17f04bc97713d360ac5bbdcdbd68e0
-  data.tar.gz: e3aac8b61fed50af76c9bc7b4835b6f218a474d1
+  metadata.gz: 8b5f6975bd8ac6e9a10d0f9655a0b8a93f5ce1c1
+  data.tar.gz: 56f8ba1e389e723fa02aea9a2fdde9c39b64ea32
 SHA512:
-  metadata.gz: dde7c9b7330f0edf78297d4a3a61e1d8d253c893f14fbe808083e3d55253fa4e4ffc3351f12ee34053c5fe4c999066b2eb7cdd8f801f594013ea7a477f1985db
-  data.tar.gz: 6a578d1b2663fe77eedb21dfbf121284ed4a6ed026f4cfc4c4c60b911d6513168b4072bf30ab3fb3cb519227903e57c9bb11aa34400e1e9abb870e527d8a62d7
+  metadata.gz: 636dde577a3aaad53bccff51443f7ee82c0b72515af5db714c14c093bc8643a0715ae5fb3915535cc12a4eeed0baba6d988b47b5084b9ef5d3c69fe95d1ff5f6
+  data.tar.gz: 254f7d242c475accd162c2aa8143283cb9aae1f402b72ee9f168c4ab11b8538aabdf29fd2439d261f78ad69636b0304f682f575d321e2eecdb3497979f027e0d

data/CHANGELOG.md CHANGED

@@ -1,3 +1,8 @@
+## 0.1.6
+- Added `--no-partition` option to `prep`
+- Added `--url` option
 ## 0.1.5
 - Removed `activesupport` dependency for speed

data/README.md CHANGED

@@ -46,6 +46,8 @@ gem install pgslice
   Use the `--batch-size` and `--sleep` options to control the speed.
+  To sync data across different databases, check out [pgsync](https://github.com/ankane/pgsync).
 5. Swap the intermediate table with the original table
   ```sh
@@ -70,6 +72,30 @@ To add partitions, use:
 pgslice add_partitions <table> --future 3
 ```
+Add this as a cron job to create a new partition each day or month.
+```
+# day
+0 0 * * * pgslice add_partitions <table> --future 3 --url ...
+# month
+0 0 1 * * pgslice add_partitions <table> --future 3 --url ...
+```
+Add a monitor to ensure partitions are being created.
+```sql
+SELECT 1 FROM
+    pg_catalog.pg_class c
+INNER JOIN
+    pg_catalog.pg_namespace n ON n.oid = c.relnamespace
+WHERE
+    c.relkind = 'r' AND
+    n.nspname = 'public' AND
+    c.relname = '<table>_' || to_char(NOW() + INTERVAL '3 days', 'YYYYMMDD')
+    -- for months, use to_char(NOW() + INTERVAL '3 months', 'YYYYMM')
+```
 ## Additional Commands
 To undo prep (which will delete partitions), use:
@@ -186,6 +212,16 @@ CREATE INDEX ON locations_20160426 USING btree (updated_at, shopper_id);
 COMMIT;
 ```
+## One Off Tasks
+You can also reduce the size of a table without partitioning.
+```sh
+pgslice prep <table> --no-partition
+pgslice fill <table> --start 1000 # starting primary key
+pgslice swap <table>
+```
 ## Upgrading
 Run:

data/lib/pgslice.rb CHANGED

@@ -54,11 +54,18 @@ module PgSlice
       intermediate_table = "#{table}_intermediate"
       trigger_name = self.trigger_name(table)
-      abort "Usage: pgslice prep <table> <column> <period>" if arguments.length != 3
+      if options[:no_partition]
+        abort "Usage: pgslice prep <table> --no-partition" if arguments.length != 1
+      else
+        abort "Usage: pgslice prep <table> <column> <period>" if arguments.length != 3
+      end
       abort "Table not found: #{table}" unless table_exists?(table)
       abort "Table already exists: #{intermediate_table}" if table_exists?(intermediate_table)
-      abort "Column not found: #{column}" unless columns(table).include?(column)
-      abort "Invalid period: #{period}" unless SQL_FORMAT[period.to_sym]
+      unless options[:no_partition]
+        abort "Column not found: #{column}" unless columns(table).include?(column)
+        abort "Invalid period: #{period}" unless SQL_FORMAT[period.to_sym]
+      end
       queries = []
@@ -66,22 +73,24 @@ module PgSlice
 CREATE TABLE #{intermediate_table} (LIKE #{table} INCLUDING ALL);
       SQL
-      sql_format = SQL_FORMAT[period.to_sym]
-      queries << <<-SQL
-CREATE FUNCTION #{trigger_name}()
-    RETURNS trigger AS $$
-    BEGIN
-        EXECUTE 'INSERT INTO #{table}_' || to_char(NEW.#{column}, '#{sql_format}') || ' VALUES ($1.*)' USING NEW;
-        RETURN NULL;
-    END;
-    $$ LANGUAGE plpgsql;
-      SQL
+      unless options[:no_partition]
+        sql_format = SQL_FORMAT[period.to_sym]
+        queries << <<-SQL
+  CREATE FUNCTION #{trigger_name}()
+      RETURNS trigger AS $$
+      BEGIN
+          EXECUTE 'INSERT INTO #{table}_' || to_char(NEW.#{column}, '#{sql_format}') || ' VALUES ($1.*)' USING NEW;
+          RETURN NULL;
+      END;
+      $$ LANGUAGE plpgsql;
+        SQL
-      queries << <<-SQL
-CREATE TRIGGER #{trigger_name}
-    BEFORE INSERT ON #{intermediate_table}
-    FOR EACH ROW EXECUTE PROCEDURE #{trigger_name}();
-      SQL
+        queries << <<-SQL
+  CREATE TRIGGER #{trigger_name}
+      BEFORE INSERT ON #{intermediate_table}
+      FOR EACH ROW EXECUTE PROCEDURE #{trigger_name}();
+        SQL
+      end
       run_queries(queries)
     end
@@ -96,7 +105,7 @@ CREATE TRIGGER #{trigger_name}
       queries = [
         "DROP TABLE #{intermediate_table} CASCADE;",
-        "DROP FUNCTION #{trigger_name}();"
+        "DROP FUNCTION IF EXISTS #{trigger_name}();"
       ]
       run_queries(queries)
     end
@@ -122,6 +131,7 @@ CREATE TRIGGER #{trigger_name}
       queries = []
       period, field = settings_from_table(original_table, table)
+      abort "Could not read settings" unless period
       today = round_date(Date.today, period)
       range.each do |n|
         day = advance_date(today, period, n)
@@ -162,11 +172,14 @@ CREATE TABLE #{partition_name}
       abort "Table not found: #{dest_table}" unless table_exists?(dest_table)
       period, field = settings_from_table(table, dest_table)
-      name_format = self.name_format(period)
-      existing_tables = self.existing_tables(like: "#{table}_%").select { |t| /#{Regexp.escape("#{table}_")}(\d{4,6})/.match(t) }.sort
-      starting_time = DateTime.strptime(existing_tables.first.split("_").last, name_format)
-      ending_time = advance_date(DateTime.strptime(existing_tables.last.split("_").last, name_format), period, 1)
+      if period
+        name_format = self.name_format(period)
+        existing_tables = self.existing_tables(like: "#{table}_%").select { |t| /#{Regexp.escape("#{table}_")}(\d{4,6})/.match(t) }.sort
+        starting_time = DateTime.strptime(existing_tables.first.split("_").last, name_format)
+        ending_time = advance_date(DateTime.strptime(existing_tables.last.split("_").last, name_format), period, 1)
+      end
       primary_key = self.primary_key(table)
       max_source_id = max_id(source_table, primary_key)
@@ -178,8 +191,12 @@ CREATE TABLE #{partition_name}
         end
       if max_dest_id == 0 && !options[:swapped]
-        min_source_id = min_id(source_table, primary_key, field, starting_time)
-        max_dest_id = min_source_id - 1 if min_source_id
+        if options[:start]
+          max_dest_id = options[:start]
+        else
+          min_source_id = min_id(source_table, primary_key, field, starting_time)
+          max_dest_id = min_source_id - 1 if min_source_id
+        end
       end
       starting_id = max_dest_id
@@ -189,11 +206,16 @@ CREATE TABLE #{partition_name}
       i = 1
       batch_count = ((max_source_id - starting_id) / batch_size.to_f).ceil
       while starting_id < max_source_id
+        where = "#{primary_key} > #{starting_id} AND #{primary_key} <= #{starting_id + batch_size}"
+        if period
+          where << " AND #{field} >= #{sql_date(starting_time)} AND #{field} < #{sql_date(ending_time)}"
+        end
         query = <<-SQL
 /* #{i} of #{batch_count} */
 INSERT INTO #{dest_table} (#{fields})
     SELECT #{fields} FROM #{source_table}
-    WHERE #{primary_key} > #{starting_id} AND #{primary_key} <= #{starting_id + batch_size} AND #{field} >= #{sql_date(starting_time)} AND #{field} < #{sql_date(ending_time)}
+    WHERE #{where}
         SQL
         log_sql(query)
@@ -265,6 +287,9 @@ INSERT INTO #{dest_table} (#{fields})
         o.integer "--past", default: 0
         o.integer "--batch-size", default: 10000
         o.boolean "--dry-run", default: false
+        o.boolean "--no-partition", default: false
+        o.integer "--start"
+        o.string "--url"
         o.on "-v", "--version", "print the version" do
           log PgSlice::VERSION
           @exit = true
@@ -294,8 +319,9 @@ INSERT INTO #{dest_table} (#{fields})
     def connection
       @connection ||= begin
-        abort "Set PGSLICE_URL" unless ENV["PGSLICE_URL"]
-        uri = URI.parse(ENV["PGSLICE_URL"])
+        url = options[:url] || ENV["PGSLICE_URL"]
+        abort "Set PGSLICE_URL or use the --url option" unless url
+        uri = URI.parse(url)
         uri_parser = URI::Parser.new
         config = {
           host: uri.host,
@@ -369,8 +395,9 @@ INSERT INTO #{dest_table} (#{fields})
     end
     def min_id(table, primary_key, column, starting_time)
-      query = "SELECT MIN(#{primary_key}) FROM #{table} WHERE #{column} >= #{sql_date(starting_time)}"
-      execute(query)[0]["min"].to_i
+      query = "SELECT MIN(#{primary_key}) FROM #{table}"
+      query << " WHERE #{column} >= #{sql_date(starting_time)}" if starting_time
+      (execute(query)[0]["min"] || 1).to_i
     end
     def has_trigger?(trigger_name, table)
@@ -444,9 +471,11 @@ INSERT INTO #{dest_table} (#{fields})
     def settings_from_table(original_table, table)
       trigger_name = self.trigger_name(original_table)
-      function_def = execute("select pg_get_functiondef(oid) from pg_proc where proname = $1", [trigger_name])[0]["pg_get_functiondef"]
+      function_def = execute("select pg_get_functiondef(oid) from pg_proc where proname = $1", [trigger_name])[0]
+      return [nil, nil] unless function_def
+      function_def = function_def["pg_get_functiondef"]
       sql_format = SQL_FORMAT.find { |_, f| function_def.include?("'#{f}'") }
-      abort "Could not read settings" unless sql_format
+      return [nil, nil] unless sql_format
       period = sql_format[0]
       field = /to_char\(NEW\.(\w+),/.match(function_def)[1]
       [period, field]

data/lib/pgslice/version.rb CHANGED

@@ -1,3 +1,3 @@
 module PgSlice
-  VERSION = "0.1.5"
+  VERSION = "0.1.6"
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: pgslice
 version: !ruby/object:Gem::Version
-  version: 0.1.5
+  version: 0.1.6
 platform: ruby
 authors:
 - Andrew Kane
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2016-04-26 00:00:00.000000000 Z
+date: 2016-08-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: slop