rails_redshift_replicator 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.rdoc +3 -0
  4. data/Rakefile +34 -0
  5. data/app/assets/javascripts/rails_redshift_replicator/application.js +13 -0
  6. data/app/assets/stylesheets/rails_redshift_replicator/application.css +15 -0
  7. data/app/controllers/rails_redshift_replicator/application_controller.rb +5 -0
  8. data/app/helpers/rails_redshift_replicator/application_helper.rb +4 -0
  9. data/app/models/rails_redshift_replicator/replication.rb +98 -0
  10. data/app/views/layouts/rails_redshift_replicator/application.html.erb +14 -0
  11. data/config/locales/rails_redshift_replicator.en.yml +20 -0
  12. data/config/routes.rb +2 -0
  13. data/db/migrate/20160503214955_create_rails_redshift_replicator_replications.rb +24 -0
  14. data/db/migrate/20160509193335_create_table_rails_redshift_replicator_deleted_ids.rb +8 -0
  15. data/lib/generators/rails_redshift_replicator/install_generator.rb +25 -0
  16. data/lib/generators/templates/rails_redshift_replicator.rb +74 -0
  17. data/lib/rails_redshift_replicator.rb +229 -0
  18. data/lib/rails_redshift_replicator/adapters/generic.rb +40 -0
  19. data/lib/rails_redshift_replicator/adapters/mysql2.rb +22 -0
  20. data/lib/rails_redshift_replicator/adapters/postgresql.rb +37 -0
  21. data/lib/rails_redshift_replicator/adapters/sqlite.rb +27 -0
  22. data/lib/rails_redshift_replicator/deleter.rb +67 -0
  23. data/lib/rails_redshift_replicator/engine.rb +14 -0
  24. data/lib/rails_redshift_replicator/exporters/base.rb +215 -0
  25. data/lib/rails_redshift_replicator/exporters/full_replicator.rb +9 -0
  26. data/lib/rails_redshift_replicator/exporters/identity_replicator.rb +9 -0
  27. data/lib/rails_redshift_replicator/exporters/timed_replicator.rb +9 -0
  28. data/lib/rails_redshift_replicator/file_manager.rb +134 -0
  29. data/lib/rails_redshift_replicator/importers/base.rb +158 -0
  30. data/lib/rails_redshift_replicator/importers/full_replicator.rb +17 -0
  31. data/lib/rails_redshift_replicator/importers/identity_replicator.rb +15 -0
  32. data/lib/rails_redshift_replicator/importers/timed_replicator.rb +18 -0
  33. data/lib/rails_redshift_replicator/model/extension.rb +45 -0
  34. data/lib/rails_redshift_replicator/model/hair_trigger_extension.rb +8 -0
  35. data/lib/rails_redshift_replicator/replicable.rb +143 -0
  36. data/lib/rails_redshift_replicator/rlogger.rb +12 -0
  37. data/lib/rails_redshift_replicator/tools/analyze.rb +18 -0
  38. data/lib/rails_redshift_replicator/tools/vacuum.rb +77 -0
  39. data/lib/rails_redshift_replicator/version.rb +3 -0
  40. data/lib/tasks/rails_redshift_replicator_tasks.rake +4 -0
  41. data/spec/dummy/README.rdoc +28 -0
  42. data/spec/dummy/Rakefile +6 -0
  43. data/spec/dummy/app/assets/javascripts/application.js +13 -0
  44. data/spec/dummy/app/assets/stylesheets/application.css +15 -0
  45. data/spec/dummy/app/controllers/application_controller.rb +5 -0
  46. data/spec/dummy/app/helpers/application_helper.rb +2 -0
  47. data/spec/dummy/app/models/post.rb +4 -0
  48. data/spec/dummy/app/models/tag.rb +4 -0
  49. data/spec/dummy/app/models/user.rb +5 -0
  50. data/spec/dummy/app/views/layouts/application.html.erb +14 -0
  51. data/spec/dummy/bin/bundle +3 -0
  52. data/spec/dummy/bin/rails +4 -0
  53. data/spec/dummy/bin/rake +4 -0
  54. data/spec/dummy/bin/setup +29 -0
  55. data/spec/dummy/config.ru +4 -0
  56. data/spec/dummy/config/application.rb +26 -0
  57. data/spec/dummy/config/boot.rb +5 -0
  58. data/spec/dummy/config/database.yml +37 -0
  59. data/spec/dummy/config/environment.rb +5 -0
  60. data/spec/dummy/config/environments/development.rb +41 -0
  61. data/spec/dummy/config/environments/production.rb +79 -0
  62. data/spec/dummy/config/environments/test.rb +42 -0
  63. data/spec/dummy/config/initializers/assets.rb +11 -0
  64. data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
  65. data/spec/dummy/config/initializers/cookies_serializer.rb +3 -0
  66. data/spec/dummy/config/initializers/filter_parameter_logging.rb +4 -0
  67. data/spec/dummy/config/initializers/inflections.rb +16 -0
  68. data/spec/dummy/config/initializers/mime_types.rb +4 -0
  69. data/spec/dummy/config/initializers/rails_redshift_replicator.rb +59 -0
  70. data/spec/dummy/config/initializers/session_store.rb +3 -0
  71. data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
  72. data/spec/dummy/config/locales/en.yml +23 -0
  73. data/spec/dummy/config/locales/rails_redshift_replicator.en.yml +19 -0
  74. data/spec/dummy/config/routes.rb +4 -0
  75. data/spec/dummy/config/secrets.yml +22 -0
  76. data/spec/dummy/db/development.sqlite3 +0 -0
  77. data/spec/dummy/db/migrate/20160504120421_create_test_tables.rb +40 -0
  78. data/spec/dummy/db/migrate/20160509225445_create_triggers_posts_delete_or_tags_delete_or_users_delete.rb +33 -0
  79. data/spec/dummy/db/migrate/20160511000937_create_rails_redshift_replicator_replications.rails_redshift_replicator.rb +25 -0
  80. data/spec/dummy/db/migrate/20160511000938_create_table_rails_redshift_replicator_deleted_ids.rails_redshift_replicator.rb +9 -0
  81. data/spec/dummy/db/schema.rb +99 -0
  82. data/spec/dummy/db/test.sqlite3 +0 -0
  83. data/spec/dummy/log/development.log +1623 -0
  84. data/spec/dummy/log/test.log +95379 -0
  85. data/spec/dummy/public/404.html +67 -0
  86. data/spec/dummy/public/422.html +67 -0
  87. data/spec/dummy/public/500.html +66 -0
  88. data/spec/dummy/public/favicon.ico +0 -0
  89. data/spec/dummy/rails_redshift_replicator_development +0 -0
  90. data/spec/factories/rails_redshift_replicator_replications.rb +31 -0
  91. data/spec/integration/rails_redshift_replicator_spec.rb +148 -0
  92. data/spec/integration/setup_spec.rb +149 -0
  93. data/spec/lib/rails_redshift_replicator/deleter_spec.rb +90 -0
  94. data/spec/lib/rails_redshift_replicator/exporters/base_spec.rb +326 -0
  95. data/spec/lib/rails_redshift_replicator/exporters/full_replicator_spec.rb +33 -0
  96. data/spec/lib/rails_redshift_replicator/exporters/identity_replicator_spec.rb +40 -0
  97. data/spec/lib/rails_redshift_replicator/exporters/timed_replicator_spec.rb +43 -0
  98. data/spec/lib/rails_redshift_replicator/file_manager_spec.rb +90 -0
  99. data/spec/lib/rails_redshift_replicator/importers/base_spec.rb +102 -0
  100. data/spec/lib/rails_redshift_replicator/importers/full_replicator_spec.rb +27 -0
  101. data/spec/lib/rails_redshift_replicator/importers/identity_replicator_spec.rb +26 -0
  102. data/spec/lib/rails_redshift_replicator/importers/timed_replicator_spec.rb +26 -0
  103. data/spec/lib/rails_redshift_replicator/model/extension_spec.rb +36 -0
  104. data/spec/lib/rails_redshift_replicator/replicable_spec.rb +230 -0
  105. data/spec/lib/rails_redshift_replicator/rlogger_spec.rb +22 -0
  106. data/spec/lib/rails_redshift_replicator/tools/analyze_spec.rb +15 -0
  107. data/spec/lib/rails_redshift_replicator/tools/vacuum_spec.rb +65 -0
  108. data/spec/lib/rails_redshift_replicator_spec.rb +110 -0
  109. data/spec/models/rails_redshift_replicator/replication_spec.rb +104 -0
  110. data/spec/spec_helper.rb +36 -0
  111. data/spec/support/csv/invalid_user.csv +12 -0
  112. data/spec/support/csv/valid_post.csv +2 -0
  113. data/spec/support/csv/valid_tags_users.csv +1 -0
  114. data/spec/support/csv/valid_user.csv +2 -0
  115. data/spec/support/rails_redshift_replicator_helpers.rb +95 -0
  116. metadata +430 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: 091d07d2dc935184033dc7be33159df8b0eaca82
4
+ data.tar.gz: 16cc2eac454114f7b1e2ce146039e04b032a912d
5
+ SHA512:
6
+ metadata.gz: e14f322cb08776821dc29c403ac176dd4546dd85c94251e9d6d0bf03eedd48a97a90493e03eac43b3ff9e3b0cdc0661d8549b3a7facbd0193a6e467874e293a6
7
+ data.tar.gz: 79c1f83dc575f1bb80428122770a8aab0dff9b91534a207013882944cc850c0ae2c81872f95877f3b9ed9dd5511366c1c0699d3b7d9b6bb4d7d63c22c8dd900e
@@ -0,0 +1,20 @@
1
+ Copyright 2016 Alexandre Angelim
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,3 @@
1
+ = RailsRedshiftReplicator
2
+
3
+ This project rocks and uses MIT-LICENSE.
@@ -0,0 +1,34 @@
1
+ begin
2
+ require 'bundler/setup'
3
+ rescue LoadError
4
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5
+ end
6
+
7
+ require 'rdoc/task'
8
+
9
+ RDoc::Task.new(:rdoc) do |rdoc|
10
+ rdoc.rdoc_dir = 'rdoc'
11
+ rdoc.title = 'RailsRedshiftReplicator'
12
+ rdoc.options << '--line-numbers'
13
+ rdoc.rdoc_files.include('README.rdoc')
14
+ rdoc.rdoc_files.include('lib/**/*.rb')
15
+ end
16
+
17
+ APP_RAKEFILE = File.expand_path("../spec/dummy/Rakefile", __FILE__)
18
+ load 'rails/tasks/engine.rake'
19
+
20
+
21
+ load 'rails/tasks/statistics.rake'
22
+
23
+
24
+ Bundler::GemHelper.install_tasks
25
+
26
+ Dir[File.join(File.dirname(__FILE__), 'tasks/**/*.rake')].each {|f| load f }
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+
31
+ desc "Run all specs in spec directory (excluding plugin specs)"
32
+ RSpec::Core::RakeTask.new(:spec => 'app:db:test:prepare')
33
+
34
+ task :default => :spec
@@ -0,0 +1,13 @@
1
+ // This is a manifest file that'll be compiled into application.js, which will include all the files
2
+ // listed below.
3
+ //
4
+ // Any JavaScript/Coffee file within this directory, lib/assets/javascripts, vendor/assets/javascripts,
5
+ // or any plugin's vendor/assets/javascripts directory can be referenced here using a relative path.
6
+ //
7
+ // It's not advisable to add code directly here, but if you do, it'll appear at the bottom of the
8
+ // compiled file.
9
+ //
10
+ // Read Sprockets README (https://github.com/rails/sprockets#sprockets-directives) for details
11
+ // about supported directives.
12
+ //
13
+ //= require_tree .
@@ -0,0 +1,15 @@
1
+ /*
2
+ * This is a manifest file that'll be compiled into application.css, which will include all the files
3
+ * listed below.
4
+ *
5
+ * Any CSS and SCSS file within this directory, lib/assets/stylesheets, vendor/assets/stylesheets,
6
+ * or any plugin's vendor/assets/stylesheets directory can be referenced here using a relative path.
7
+ *
8
+ * You're free to add application-wide styles to this file and they'll appear at the bottom of the
9
+ * compiled file so the styles you add here take precedence over styles defined in any styles
10
+ * defined in the other CSS/SCSS files in this directory. It is generally better to create a new
11
+ * file per style scope.
12
+ *
13
+ *= require_tree .
14
+ *= require_self
15
+ */
@@ -0,0 +1,5 @@
1
+ module RailsRedshiftReplicator
2
+ class ApplicationController < ActionController::Base
3
+ protect_from_forgery with: :exception
4
+ end
5
+ end
@@ -0,0 +1,4 @@
1
+ module RailsRedshiftReplicator
2
+ module ApplicationHelper
3
+ end
4
+ end
@@ -0,0 +1,98 @@
1
+ module RailsRedshiftReplicator
2
+ class Replication < ActiveRecord::Base
3
+ STATES = %w(enqueued exporting exported uploading uploaded importing imported canceled)
4
+ FORMATS = %w(gzip csv)
5
+
6
+ # @return [Array] ids from source_table to delete on the next replication.
7
+ serialize :ids_to_delete, Array
8
+
9
+ validates :state, inclusion: { in: STATES }
10
+ validates :export_format, inclusion: { in: FORMATS }
11
+ validates_presence_of :replication_type, :key, :source_table, :target_table
12
+ before_validation :setup_target_table
13
+
14
+ # Clears the error column
15
+ def clear_errors!
16
+ update_attributes last_error: nil
17
+ end
18
+
19
+ # If replication is on an error state
20
+ # @return [true, false] if has error
21
+ def error?
22
+ last_error.present?
23
+ end
24
+
25
+ # Initializes target table if it is blank
26
+ def setup_target_table
27
+ self.target_table = source_table if target_table.blank?
28
+ end
29
+
30
+ # Cancels the replication
31
+ def cancel!
32
+ update_attribute :state, 'canceled'
33
+ end
34
+
35
+ # @return [RailsRedshiftReplicator::Replicable] replicable for this model/table
36
+ def replicable
37
+ RailsRedshiftReplicator.replicables[source_table]
38
+ end
39
+
40
+ scope :from_table, ->(table) { where(source_table: Array(table).map(&:to_s)).where.not(state: 'canceled') }
41
+ scope :with_state, ->(state) { where(state: state) }
42
+ scope :older_than, ->(table, cap) { where("id < ?", cap_id(table, cap)) }
43
+
44
+ def self.cap_id(table, cap)
45
+ return unless cap
46
+ where(source_table: table).order("id desc").limit(cap).pluck(:id)[cap-1]
47
+ end
48
+
49
+ # Builds helper methods to identify export format.
50
+ # @return [true, false] if export is in a given format.
51
+ FORMATS.each do |format|
52
+ # @example
53
+ # self.format = "gzip"
54
+ # self.csv? #=> false
55
+ define_method "#{format}?" do
56
+ export_format == format
57
+ end
58
+ end
59
+
60
+ STATES.each do |state|
61
+ # Builds methods to change replication to a given state persisting changes.
62
+ # @example
63
+ # self.uploaded! upload_duration: 10
64
+ # self.state #=> "uploaded"
65
+ # self.upload_duration #=> 10
66
+ # @return [Time] current time
67
+ define_method "#{state}!" do |options = {}|
68
+ update_attributes({ state: state }.merge(options))
69
+ return Time.now
70
+ end
71
+
72
+ # Builds methods to change replication to a given state _without_ persisting changes.
73
+ # @example
74
+ # self.new_record? = true
75
+ # self.uploaded upload_duration: 10
76
+ # self.state #=> "uploaded"
77
+ # self.upload_duration #=> 10
78
+ # @return [Time] current time
79
+ define_method "#{state}" do |options = {}|
80
+ assign_attributes({ state: state }.merge(options))
81
+ return Time.now
82
+ end
83
+
84
+ # Builds helper methods to identify the current state.
85
+ # @example
86
+ # self.state = "uploaded"
87
+ # self.uploaded? #=> true
88
+ define_method "#{state}?" do
89
+ self.state == state
90
+ end
91
+
92
+ # Scopes
93
+ # @example
94
+ # scope :error, -> { where state: "error" }
95
+ scope state, -> { where state: state }
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,14 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>RailsRedshiftReplicator</title>
5
+ <%= stylesheet_link_tag "rails_redshift_replicator/application", media: "all" %>
6
+ <%= javascript_include_tag "rails_redshift_replicator/application" %>
7
+ <%= csrf_meta_tags %>
8
+ </head>
9
+ <body>
10
+
11
+ <%= yield %>
12
+
13
+ </body>
14
+ </html>
@@ -0,0 +1,20 @@
1
+ en:
2
+ rails_redshift_replicator:
3
+ uploading_notice: "uploading %{file} to %{key}"
4
+ gzip_notice: "gzip %{file} to %{gzip_file} with command: %{command}"
5
+ exporting_results: "exporting %{counts} results"
6
+ importing_file: "importing %{file} to %{target_table}"
7
+ no_new_records: "No new records for %{table_name} to export"
8
+ missing_indexes: "'%{replication_field}' index is missing on table '%{table_name}'. Export performance can be improved by adding indexes to replication fields"
9
+ missing_table: "Coudn't find table %{table_name} on Redshift"
10
+ missing_replicator_type: Replication type not implemented
11
+ must_specify_tables: "Specify tables to export or use :all for all eligible tables"
12
+ replicable_added: "Added replicable for table '%{table_name}'"
13
+ executing_query: "Executing query with %{adapter}: %{sql}"
14
+ nothing_to_import: "No replication for table '%{table_name}' was pending import"
15
+ table_not_replicable: "The table '%{table_name}' is not registered as replicable"
16
+ max_retries_reached: "The replication #%{id} for table '%{table_name}' reached the maximum number of retries"
17
+ resuming_replication: "Resuming %{action} replication for table '%{table_name}', which was on %{state} state"
18
+ propagating_deletes: "Propagating %{count} deleted records from table '%{table_name}'"
19
+ delete_propagation_error: "Failed to propagate %{count} deleted records from table '%{table_name}'"
20
+ deleting_file: "Deleting file on s3: %{key}"
@@ -0,0 +1,2 @@
1
+ RailsRedshiftReplicator::Engine.routes.draw do
2
+ end
@@ -0,0 +1,24 @@
1
+ # 20160503214955
2
+ class CreateRailsRedshiftReplicatorReplications < ActiveRecord::Migration
3
+ def change
4
+ create_table :rails_redshift_replicator_replications do |t|
5
+ t.string "replication_type"
6
+ t.string "key"
7
+ t.string "state", :default => "enqueued"
8
+ t.string "last_record"
9
+ t.integer "retries", default: 0
10
+ t.text "last_error"
11
+ t.string "source_table"
12
+ t.string "target_table"
13
+ t.integer "slices"
14
+ t.string "first_record"
15
+ t.integer "record_count"
16
+ t.string "export_format"
17
+ t.integer "export_duration"
18
+ t.integer "upload_duration"
19
+ t.integer "import_duration"
20
+ t.datetime "created_at", :null => false
21
+ t.datetime "updated_at", :null => false
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,8 @@
1
+ class CreateTableRailsRedshiftReplicatorDeletedIds < ActiveRecord::Migration
2
+ def change
3
+ create_table :rails_redshift_replicator_deleted_ids, id: false do |t|
4
+ t.string :source_table, index: true
5
+ t.integer :object_id
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,25 @@
1
+ require 'rails/generators/base'
2
+ require 'securerandom'
3
+
4
+ module RailsRedshiftReplicator
5
+ module Generators
6
+
7
+ class InstallGenerator < Rails::Generators::Base
8
+ source_root File.expand_path("../../templates", __FILE__)
9
+
10
+ desc "Creates a RRR initializer and copy locale files to your application."
11
+
12
+ def copy_initializer
13
+ template "rails_redshift_replicator.rb", "config/initializers/rails_redshift_replicator.rb"
14
+ end
15
+
16
+ def copy_locale
17
+ copy_file "../../../config/locales/rails_redshift_replicator.en.yml", "config/locales/rails_redshift_replicator.en.yml"
18
+ end
19
+
20
+ def rails_4?
21
+ Rails::VERSION::MAJOR == 4
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,74 @@
1
+ RailsRedshiftReplicator.setup do |config|
2
+ # RRR already provides a logger pointing to STDOUT, but you can point it to your own logger.
3
+ # Just be sure to make it inherit from RailsRedshiftReplicator::RLogger or you will loose
4
+ # the notifications feature.
5
+ # config.logger = MyLogger.new
6
+
7
+ # Connection parameters for Redshift. Defaults to environment variables.
8
+ config.redshift_connection_params = {
9
+ host: ENV['RRR_REDSHIFT_HOST'],
10
+ dbname: ENV['RRR_REDSHIFT_DATABASE'],
11
+ port: ENV['RRR_REDSHIFT_PORT'],
12
+ user: ENV['RRR_REDSHIFT_USER'],
13
+ password: ENV['RRR_REDSHIFT_PASSWORD']
14
+ }
15
+
16
+ # AWS S3 Replication bucket credentials. Defaults to environment variables.
17
+ config.aws_credentials = {
18
+ key: ENV['RRR_AWS_ACCESS_KEY_ID'],
19
+ secret: ENV['RRR_AWS_SECRET_ACCESS_KEY']
20
+ }
21
+
22
+ # AWS S3 replication bucket parameters.
23
+ # region defaults to environment variable or US East (N. Virginia)
24
+ # bucket defaults to environment variable
25
+ config.s3_bucket_params = {
26
+ region: (ENV['RRR_REPLICATION_REGION'] || 'us-east-1'),
27
+ bucket: ENV['RRR_REPLICATION_BUCKET'],
28
+ prefix: ENV['RRR_REPLICATION_PREFIX']
29
+ }
30
+
31
+ # see [http://docs.aws.amazon.com/redshift/latest/dg/r_COPY.html]
32
+ # You can add other keys aside from changing these.
33
+ # The keys won't be used on the copy commands. Just their values.
34
+ # To remove one of the defaults, set it to nil.
35
+ # @example:
36
+ # @@copy_options = {
37
+ # statupdate: nil,
38
+ # }
39
+ config.copy_options = {
40
+ statupdate: 'STATUPDATE TRUE',
41
+ acceptinvchars: 'ACCEPTINVCHARS',
42
+ empty: 'EMPTYASNULL',
43
+ truncate: 'TRUNCATECOLUMNS'
44
+ }
45
+
46
+ # Number of slices available on Redshift cluster. Used to split export files. Defaults to 1.
47
+ # see [http://docs.aws.amazon.com/redshift/latest/dg/t_splitting-data-files.html]
48
+ config.redshift_slices = 1
49
+
50
+ # Folder to store temporary replication files until the S3 upload. Defaults to /tmp
51
+ config.local_replication_path = '/tmp'
52
+
53
+ # Command or path to executable that splits files
54
+ config.split_command = 'split'
55
+
56
+ # Command or path to executable that compresses files to gzip
57
+ config.gzip_command = 'gzip'
58
+
59
+ # Enable debug mode to output messages to STDOUT. Default to false
60
+ config.debug_mode = false
61
+
62
+ # Defines how many replication records are kept in history. Default to nil keeping full history.
63
+ config.history_cap = nil
64
+
65
+ # Preferred format for export file
66
+ config.preferred_format = 'csv'
67
+
68
+ # Maximum number of retries for a replication before cancelling and starting another
69
+ config.max_retries = 5
70
+
71
+ # If deletes should be tracked and propagated to redshift
72
+ # Take a look at the "A word on tracking deletions" section
73
+ config.enable_delete_tracking = false
74
+ end
@@ -0,0 +1,229 @@
1
+ require 'hair_trigger'
2
+ require 'active_support'
3
+ require "rails_redshift_replicator/engine"
4
+ require 'rails_redshift_replicator/model/extension'
5
+ require 'rails_redshift_replicator/model/hair_trigger_extension'
6
+ require 'rails_redshift_replicator/replicable'
7
+ require 'rails_redshift_replicator/deleter'
8
+ require 'rails_redshift_replicator/rlogger'
9
+ require 'rails_redshift_replicator/file_manager'
10
+
11
+ require 'rails_redshift_replicator/exporters/base'
12
+ require 'rails_redshift_replicator/exporters/identity_replicator'
13
+ require 'rails_redshift_replicator/exporters/timed_replicator'
14
+ require 'rails_redshift_replicator/exporters/full_replicator'
15
+
16
+ require 'rails_redshift_replicator/importers/base'
17
+ require 'rails_redshift_replicator/importers/identity_replicator'
18
+ require 'rails_redshift_replicator/importers/timed_replicator'
19
+ require 'rails_redshift_replicator/importers/full_replicator'
20
+
21
+ require 'rails_redshift_replicator/tools/analyze'
22
+ require 'rails_redshift_replicator/tools/vacuum'
23
+
24
+
25
+ module RailsRedshiftReplicator
26
+ mattr_accessor :replicables, :logger, :redshift_connection_params, :aws_credentials, :s3_bucket_params,
27
+ :redshift_slices, :local_replication_path, :debug_mode, :history_cap,
28
+ :split_command, :gzip_command, :preferred_format, :max_retries, :enable_delete_tracking,
29
+ :delete_s3_file_after_import, :copy_options
30
+
31
+ class << self
32
+
33
+ # @note Useful for testing
34
+ def define_defaults
35
+ @@replicables = {}.with_indifferent_access
36
+ @@logger = RLogger.new(STDOUT).tap{ |l| l.level = Logger::WARN }
37
+
38
+ # Connection parameters for Redshift. Defaults to environment variables.
39
+ @@redshift_connection_params = {
40
+ host: ENV['RRR_REDSHIFT_HOST'],
41
+ dbname: ENV['RRR_REDSHIFT_DATABASE'],
42
+ port: ENV['RRR_REDSHIFT_PORT'],
43
+ user: ENV['RRR_REDSHIFT_USER'],
44
+ password: ENV['RRR_REDSHIFT_PASSWORD']
45
+ }
46
+
47
+ # AWS S3 Replication bucket credentials. Defaults to environment variables.
48
+ @@aws_credentials = {
49
+ key: ENV['RRR_AWS_ACCESS_KEY_ID'],
50
+ secret: ENV['RRR_AWS_SECRET_ACCESS_KEY']
51
+ }
52
+
53
+ # AWS S3 replication bucket parameters.
54
+ # region defaults to environment variable or US East (N. Virginia)
55
+ # bucket defaults to environment variable
56
+ @@s3_bucket_params = {
57
+ region: (ENV['RRR_REPLICATION_REGION'] || 'us-east-1'),
58
+ bucket: ENV['RRR_REPLICATION_BUCKET'],
59
+ prefix: ENV['RRR_REPLICATION_PREFIX']
60
+ }
61
+
62
+ # see [http://docs.aws.amazon.com/redshift/latest/dg/r_COPY.html]
63
+ # You can add other keys aside from changing these.
64
+ # The keys won't be used on the copy commands. Just their values.
65
+ # To remove one of the defaults, set it to nil.
66
+ # @example:
67
+ # @@copy_options = {
68
+ # statupdate: nil,
69
+ # }
70
+ @@copy_options = {
71
+ statupdate: 'STATUPDATE TRUE',
72
+ acceptinvchars: 'ACCEPTINVCHARS',
73
+ empty: 'EMPTYASNULL',
74
+ truncate: 'TRUNCATECOLUMNS'
75
+ }
76
+
77
+ # Number of slices available on Redshift cluster. Used to split export files. Defaults to 1.
78
+ # see [http://docs.aws.amazon.com/redshift/latest/dg/t_splitting-data-files.html]
79
+ @@redshift_slices = 1
80
+
81
+ # Folder to store temporary replication files until the S3 upload. Defaults to /tmp
82
+ @@local_replication_path = '/tmp'
83
+
84
+ # Command or path to executable that splits files
85
+ @@split_command = 'split'
86
+
87
+ # Command or path to executable that compresses files to gzip
88
+ @@gzip_command = 'gzip'
89
+
90
+ # Enable debug mode to output messages to STDOUT. Default to false
91
+ @@debug_mode = false
92
+
93
+ # Defines how many replication records are kept in history. Default to nil keeping full history.
94
+ @@history_cap = nil
95
+
96
+ # Preferred format for export file
97
+ @@preferred_format = 'csv'
98
+
99
+ # Maximum number of retries for a replication before cancelling and starting another
100
+ @@max_retries = nil
101
+
102
+ # If deletes should be tracked and propagated to redshift
103
+ @@enable_delete_tracking = false
104
+
105
+ # If exported files on s3 should be deleted after imported
106
+ @@delete_s3_file_after_import = true
107
+
108
+ return nil
109
+ end
110
+ alias reload define_defaults
111
+
112
+ def debug_mode=(value)
113
+ logger.level = value == true ? Logger::DEBUG : Logger::WARN
114
+ @@debug_mode = value
115
+ end
116
+
117
+ def history_cap=(value)
118
+ @@history_cap = value && [value,2].max
119
+ end
120
+
121
+ # @return [RedshiftReplicator]
122
+ def setup
123
+ yield self
124
+ end
125
+
126
+ def add_replicable(hash)
127
+ logger.debug I18n.t(:replicable_added, table_name: hash.keys.first, scope: :rails_redshift_replicator)
128
+ RailsRedshiftReplicator.replicables.merge! hash
129
+ end
130
+
131
+ def reload_replicables
132
+ replicables = {}
133
+ replicables.each do |name, replicable|
134
+ add_replicable(name => RailsRedshiftReplicator::Replicable.new(replicable.replication_type, replicable.options))
135
+ end
136
+ end
137
+
138
+ # Performs full replication (export + import)
139
+ # @param models [Array<Symbol>, Argument list] activerecord models to export or :all
140
+ # @example Replicate user and post models.
141
+ # RedshiftReplicator.replicate(:user, :publication)
142
+ # @example Replicate all models
143
+ # RedshiftReplicator.replicate(:all)
144
+ def replicate(*tables)
145
+ check_args(tables)
146
+ replicable_definitions(tables_to_perform(tables)).each do |_, replicable|
147
+ replication = replicable.export
148
+ replicable.import
149
+ end
150
+ end
151
+
152
+ # @see .replicate
153
+ def export(*tables)
154
+ check_args(tables)
155
+ replicable_definitions(tables_to_perform(tables)).each { |_, replicable| replicable.export }
156
+ end
157
+
158
+ # @see .replicate
159
+ def import(*tables)
160
+ check_args(tables)
161
+ replicable_definitions(tables_to_perform(tables)).each { |_, replicable| replicable.import }
162
+ end
163
+
164
+ def check_args(tables)
165
+ if tables == []
166
+ error_message = I18n.t(:must_specify_tables, scope: :rails_redshift_replicator)
167
+ logger.error error_message
168
+ raise StandardError.new(error_message)
169
+ end
170
+ end
171
+
172
+ def vacuum(*args)
173
+ Tools::Vacuum.new(*args).perform
174
+ end
175
+
176
+ def analyze(*args)
177
+ Tools::Analyze.new(*args).perform
178
+ end
179
+
180
+ # Lists exporters names
181
+ def base_exporter_types
182
+ [
183
+ 'identity_replicator',
184
+ 'timed_replicator',
185
+ 'full_replicator'
186
+ ]
187
+ end
188
+
189
+ # All replicable tables registered in RailsRedshiftReplicator
190
+ # eighter from the model or directly.
191
+ # @return [Array<String>] tables
192
+ def replicable_tables
193
+ RailsRedshiftReplicator.replicables.keys.map(&:to_s)
194
+ end
195
+
196
+ def replicable_target_tables
197
+ RailsRedshiftReplicator.replicables.map{ |k,v| v[:target_table] }
198
+ end
199
+
200
+ # @retuns [Hash] subset of key pairs of replicables
201
+ def replicable_definitions(tables)
202
+ RailsRedshiftReplicator.replicables.select { |k,_| k.to_s.in? tables.map(&:to_s) }
203
+ end
204
+
205
+ # Returns tables to export. :all selects all eligible
206
+ # @returns [Array<String>] tables to export
207
+ def tables_to_perform(tables)
208
+ tables = Array(tables).map(&:to_s)
209
+ if tables[0] == 'all'
210
+ replicable_tables
211
+ else
212
+ (replicable_tables & tables).tap do |selected|
213
+ warn_if_unreplicable tables-selected
214
+ end
215
+ end
216
+ end
217
+
218
+ def warn_if_unreplicable(tables)
219
+ tables.each { |table| logger.warn I18n.t(:table_not_replicable, table_name: table, scope: :rails_redshift_replicator) }
220
+ end
221
+
222
+ # Redshift connection
223
+ # @return [PG::Connection]
224
+ def connection
225
+ @redshift ||= PG.connect(redshift_connection_params)
226
+ end
227
+ end
228
+ end
229
+ RailsRedshiftReplicator.define_defaults