rails_redshift_replicator 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (116) hide show
  1. checksums.yaml +7 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README.rdoc +3 -0
  4. data/Rakefile +34 -0
  5. data/app/assets/javascripts/rails_redshift_replicator/application.js +13 -0
  6. data/app/assets/stylesheets/rails_redshift_replicator/application.css +15 -0
  7. data/app/controllers/rails_redshift_replicator/application_controller.rb +5 -0
  8. data/app/helpers/rails_redshift_replicator/application_helper.rb +4 -0
  9. data/app/models/rails_redshift_replicator/replication.rb +98 -0
  10. data/app/views/layouts/rails_redshift_replicator/application.html.erb +14 -0
  11. data/config/locales/rails_redshift_replicator.en.yml +20 -0
  12. data/config/routes.rb +2 -0
  13. data/db/migrate/20160503214955_create_rails_redshift_replicator_replications.rb +24 -0
  14. data/db/migrate/20160509193335_create_table_rails_redshift_replicator_deleted_ids.rb +8 -0
  15. data/lib/generators/rails_redshift_replicator/install_generator.rb +25 -0
  16. data/lib/generators/templates/rails_redshift_replicator.rb +74 -0
  17. data/lib/rails_redshift_replicator.rb +229 -0
  18. data/lib/rails_redshift_replicator/adapters/generic.rb +40 -0
  19. data/lib/rails_redshift_replicator/adapters/mysql2.rb +22 -0
  20. data/lib/rails_redshift_replicator/adapters/postgresql.rb +37 -0
  21. data/lib/rails_redshift_replicator/adapters/sqlite.rb +27 -0
  22. data/lib/rails_redshift_replicator/deleter.rb +67 -0
  23. data/lib/rails_redshift_replicator/engine.rb +14 -0
  24. data/lib/rails_redshift_replicator/exporters/base.rb +215 -0
  25. data/lib/rails_redshift_replicator/exporters/full_replicator.rb +9 -0
  26. data/lib/rails_redshift_replicator/exporters/identity_replicator.rb +9 -0
  27. data/lib/rails_redshift_replicator/exporters/timed_replicator.rb +9 -0
  28. data/lib/rails_redshift_replicator/file_manager.rb +134 -0
  29. data/lib/rails_redshift_replicator/importers/base.rb +158 -0
  30. data/lib/rails_redshift_replicator/importers/full_replicator.rb +17 -0
  31. data/lib/rails_redshift_replicator/importers/identity_replicator.rb +15 -0
  32. data/lib/rails_redshift_replicator/importers/timed_replicator.rb +18 -0
  33. data/lib/rails_redshift_replicator/model/extension.rb +45 -0
  34. data/lib/rails_redshift_replicator/model/hair_trigger_extension.rb +8 -0
  35. data/lib/rails_redshift_replicator/replicable.rb +143 -0
  36. data/lib/rails_redshift_replicator/rlogger.rb +12 -0
  37. data/lib/rails_redshift_replicator/tools/analyze.rb +18 -0
  38. data/lib/rails_redshift_replicator/tools/vacuum.rb +77 -0
  39. data/lib/rails_redshift_replicator/version.rb +3 -0
  40. data/lib/tasks/rails_redshift_replicator_tasks.rake +4 -0
  41. data/spec/dummy/README.rdoc +28 -0
  42. data/spec/dummy/Rakefile +6 -0
  43. data/spec/dummy/app/assets/javascripts/application.js +13 -0
  44. data/spec/dummy/app/assets/stylesheets/application.css +15 -0
  45. data/spec/dummy/app/controllers/application_controller.rb +5 -0
  46. data/spec/dummy/app/helpers/application_helper.rb +2 -0
  47. data/spec/dummy/app/models/post.rb +4 -0
  48. data/spec/dummy/app/models/tag.rb +4 -0
  49. data/spec/dummy/app/models/user.rb +5 -0
  50. data/spec/dummy/app/views/layouts/application.html.erb +14 -0
  51. data/spec/dummy/bin/bundle +3 -0
  52. data/spec/dummy/bin/rails +4 -0
  53. data/spec/dummy/bin/rake +4 -0
  54. data/spec/dummy/bin/setup +29 -0
  55. data/spec/dummy/config.ru +4 -0
  56. data/spec/dummy/config/application.rb +26 -0
  57. data/spec/dummy/config/boot.rb +5 -0
  58. data/spec/dummy/config/database.yml +37 -0
  59. data/spec/dummy/config/environment.rb +5 -0
  60. data/spec/dummy/config/environments/development.rb +41 -0
  61. data/spec/dummy/config/environments/production.rb +79 -0
  62. data/spec/dummy/config/environments/test.rb +42 -0
  63. data/spec/dummy/config/initializers/assets.rb +11 -0
  64. data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
  65. data/spec/dummy/config/initializers/cookies_serializer.rb +3 -0
  66. data/spec/dummy/config/initializers/filter_parameter_logging.rb +4 -0
  67. data/spec/dummy/config/initializers/inflections.rb +16 -0
  68. data/spec/dummy/config/initializers/mime_types.rb +4 -0
  69. data/spec/dummy/config/initializers/rails_redshift_replicator.rb +59 -0
  70. data/spec/dummy/config/initializers/session_store.rb +3 -0
  71. data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
  72. data/spec/dummy/config/locales/en.yml +23 -0
  73. data/spec/dummy/config/locales/rails_redshift_replicator.en.yml +19 -0
  74. data/spec/dummy/config/routes.rb +4 -0
  75. data/spec/dummy/config/secrets.yml +22 -0
  76. data/spec/dummy/db/development.sqlite3 +0 -0
  77. data/spec/dummy/db/migrate/20160504120421_create_test_tables.rb +40 -0
  78. data/spec/dummy/db/migrate/20160509225445_create_triggers_posts_delete_or_tags_delete_or_users_delete.rb +33 -0
  79. data/spec/dummy/db/migrate/20160511000937_create_rails_redshift_replicator_replications.rails_redshift_replicator.rb +25 -0
  80. data/spec/dummy/db/migrate/20160511000938_create_table_rails_redshift_replicator_deleted_ids.rails_redshift_replicator.rb +9 -0
  81. data/spec/dummy/db/schema.rb +99 -0
  82. data/spec/dummy/db/test.sqlite3 +0 -0
  83. data/spec/dummy/log/development.log +1623 -0
  84. data/spec/dummy/log/test.log +95379 -0
  85. data/spec/dummy/public/404.html +67 -0
  86. data/spec/dummy/public/422.html +67 -0
  87. data/spec/dummy/public/500.html +66 -0
  88. data/spec/dummy/public/favicon.ico +0 -0
  89. data/spec/dummy/rails_redshift_replicator_development +0 -0
  90. data/spec/factories/rails_redshift_replicator_replications.rb +31 -0
  91. data/spec/integration/rails_redshift_replicator_spec.rb +148 -0
  92. data/spec/integration/setup_spec.rb +149 -0
  93. data/spec/lib/rails_redshift_replicator/deleter_spec.rb +90 -0
  94. data/spec/lib/rails_redshift_replicator/exporters/base_spec.rb +326 -0
  95. data/spec/lib/rails_redshift_replicator/exporters/full_replicator_spec.rb +33 -0
  96. data/spec/lib/rails_redshift_replicator/exporters/identity_replicator_spec.rb +40 -0
  97. data/spec/lib/rails_redshift_replicator/exporters/timed_replicator_spec.rb +43 -0
  98. data/spec/lib/rails_redshift_replicator/file_manager_spec.rb +90 -0
  99. data/spec/lib/rails_redshift_replicator/importers/base_spec.rb +102 -0
  100. data/spec/lib/rails_redshift_replicator/importers/full_replicator_spec.rb +27 -0
  101. data/spec/lib/rails_redshift_replicator/importers/identity_replicator_spec.rb +26 -0
  102. data/spec/lib/rails_redshift_replicator/importers/timed_replicator_spec.rb +26 -0
  103. data/spec/lib/rails_redshift_replicator/model/extension_spec.rb +36 -0
  104. data/spec/lib/rails_redshift_replicator/replicable_spec.rb +230 -0
  105. data/spec/lib/rails_redshift_replicator/rlogger_spec.rb +22 -0
  106. data/spec/lib/rails_redshift_replicator/tools/analyze_spec.rb +15 -0
  107. data/spec/lib/rails_redshift_replicator/tools/vacuum_spec.rb +65 -0
  108. data/spec/lib/rails_redshift_replicator_spec.rb +110 -0
  109. data/spec/models/rails_redshift_replicator/replication_spec.rb +104 -0
  110. data/spec/spec_helper.rb +36 -0
  111. data/spec/support/csv/invalid_user.csv +12 -0
  112. data/spec/support/csv/valid_post.csv +2 -0
  113. data/spec/support/csv/valid_tags_users.csv +1 -0
  114. data/spec/support/csv/valid_user.csv +2 -0
  115. data/spec/support/rails_redshift_replicator_helpers.rb +95 -0
  116. metadata +430 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: 091d07d2dc935184033dc7be33159df8b0eaca82
4
+ data.tar.gz: 16cc2eac454114f7b1e2ce146039e04b032a912d
5
+ SHA512:
6
+ metadata.gz: e14f322cb08776821dc29c403ac176dd4546dd85c94251e9d6d0bf03eedd48a97a90493e03eac43b3ff9e3b0cdc0661d8549b3a7facbd0193a6e467874e293a6
7
+ data.tar.gz: 79c1f83dc575f1bb80428122770a8aab0dff9b91534a207013882944cc850c0ae2c81872f95877f3b9ed9dd5511366c1c0699d3b7d9b6bb4d7d63c22c8dd900e
@@ -0,0 +1,20 @@
1
+ Copyright 2016 Alexandre Angelim
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,3 @@
1
+ = RailsRedshiftReplicator
2
+
3
+ This project rocks and uses MIT-LICENSE.
@@ -0,0 +1,34 @@
1
+ begin
2
+ require 'bundler/setup'
3
+ rescue LoadError
4
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5
+ end
6
+
7
+ require 'rdoc/task'
8
+
9
+ RDoc::Task.new(:rdoc) do |rdoc|
10
+ rdoc.rdoc_dir = 'rdoc'
11
+ rdoc.title = 'RailsRedshiftReplicator'
12
+ rdoc.options << '--line-numbers'
13
+ rdoc.rdoc_files.include('README.rdoc')
14
+ rdoc.rdoc_files.include('lib/**/*.rb')
15
+ end
16
+
17
+ APP_RAKEFILE = File.expand_path("../spec/dummy/Rakefile", __FILE__)
18
+ load 'rails/tasks/engine.rake'
19
+
20
+
21
+ load 'rails/tasks/statistics.rake'
22
+
23
+
24
+ Bundler::GemHelper.install_tasks
25
+
26
+ Dir[File.join(File.dirname(__FILE__), 'tasks/**/*.rake')].each {|f| load f }
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+
31
+ desc "Run all specs in spec directory (excluding plugin specs)"
32
+ RSpec::Core::RakeTask.new(:spec => 'app:db:test:prepare')
33
+
34
+ task :default => :spec
@@ -0,0 +1,13 @@
1
+ // This is a manifest file that'll be compiled into application.js, which will include all the files
2
+ // listed below.
3
+ //
4
+ // Any JavaScript/Coffee file within this directory, lib/assets/javascripts, vendor/assets/javascripts,
5
+ // or any plugin's vendor/assets/javascripts directory can be referenced here using a relative path.
6
+ //
7
+ // It's not advisable to add code directly here, but if you do, it'll appear at the bottom of the
8
+ // compiled file.
9
+ //
10
+ // Read Sprockets README (https://github.com/rails/sprockets#sprockets-directives) for details
11
+ // about supported directives.
12
+ //
13
+ //= require_tree .
@@ -0,0 +1,15 @@
1
+ /*
2
+ * This is a manifest file that'll be compiled into application.css, which will include all the files
3
+ * listed below.
4
+ *
5
+ * Any CSS and SCSS file within this directory, lib/assets/stylesheets, vendor/assets/stylesheets,
6
+ * or any plugin's vendor/assets/stylesheets directory can be referenced here using a relative path.
7
+ *
8
+ * You're free to add application-wide styles to this file and they'll appear at the bottom of the
9
+ * compiled file so the styles you add here take precedence over styles defined in any styles
10
+ * defined in the other CSS/SCSS files in this directory. It is generally better to create a new
11
+ * file per style scope.
12
+ *
13
+ *= require_tree .
14
+ *= require_self
15
+ */
@@ -0,0 +1,5 @@
1
+ module RailsRedshiftReplicator
2
+ class ApplicationController < ActionController::Base
3
+ protect_from_forgery with: :exception
4
+ end
5
+ end
@@ -0,0 +1,4 @@
1
+ module RailsRedshiftReplicator
2
+ module ApplicationHelper
3
+ end
4
+ end
@@ -0,0 +1,98 @@
1
+ module RailsRedshiftReplicator
2
+ class Replication < ActiveRecord::Base
3
+ STATES = %w(enqueued exporting exported uploading uploaded importing imported canceled)
4
+ FORMATS = %w(gzip csv)
5
+
6
+ # @return [Array] ids from source_table to delete on the next replication.
7
+ serialize :ids_to_delete, Array
8
+
9
+ validates :state, inclusion: { in: STATES }
10
+ validates :export_format, inclusion: { in: FORMATS }
11
+ validates_presence_of :replication_type, :key, :source_table, :target_table
12
+ before_validation :setup_target_table
13
+
14
+ # Clears the error column
15
+ def clear_errors!
16
+ update_attributes last_error: nil
17
+ end
18
+
19
+ # If replication is on an error state
20
+ # @return [true, false] if has error
21
+ def error?
22
+ last_error.present?
23
+ end
24
+
25
+ # Initializes target table if it is blank
26
+ def setup_target_table
27
+ self.target_table = source_table if target_table.blank?
28
+ end
29
+
30
+ # Cancels the replication
31
+ def cancel!
32
+ update_attribute :state, 'canceled'
33
+ end
34
+
35
+ # @return [RailsRedshiftReplicator::Replicable] replicable for this model/table
36
+ def replicable
37
+ RailsRedshiftReplicator.replicables[source_table]
38
+ end
39
+
40
+ scope :from_table, ->(table) { where(source_table: Array(table).map(&:to_s)).where.not(state: 'canceled') }
41
+ scope :with_state, ->(state) { where(state: state) }
42
+ scope :older_than, ->(table, cap) { where("id < ?", cap_id(table, cap)) }
43
+
44
+ def self.cap_id(table, cap)
45
+ return unless cap
46
+ where(source_table: table).order("id desc").limit(cap).pluck(:id)[cap-1]
47
+ end
48
+
49
+ # Builds helper methods to identify export format.
50
+ # @return [true, false] if export is in a given format.
51
+ FORMATS.each do |format|
52
+ # @example
53
+ # self.format = "gzip"
54
+ # self.csv? #=> false
55
+ define_method "#{format}?" do
56
+ export_format == format
57
+ end
58
+ end
59
+
60
+ STATES.each do |state|
61
+ # Builds methods to change replication to a given state persisting changes.
62
+ # @example
63
+ # self.uploaded! upload_duration: 10
64
+ # self.state #=> "uploaded"
65
+ # self.upload_duration #=> 10
66
+ # @return [Time] current time
67
+ define_method "#{state}!" do |options = {}|
68
+ update_attributes({ state: state }.merge(options))
69
+ return Time.now
70
+ end
71
+
72
+ # Builds methods to change replication to a given state _without_ persisting changes.
73
+ # @example
74
+ # self.new_record? = true
75
+ # self.uploaded upload_duration: 10
76
+ # self.state #=> "uploaded"
77
+ # self.upload_duration #=> 10
78
+ # @return [Time] current time
79
+ define_method "#{state}" do |options = {}|
80
+ assign_attributes({ state: state }.merge(options))
81
+ return Time.now
82
+ end
83
+
84
+ # Builds helper methods to identify the current state.
85
+ # @example
86
+ # self.state = "uploaded"
87
+ # self.uploaded? #=> true
88
+ define_method "#{state}?" do
89
+ self.state == state
90
+ end
91
+
92
+ # Scopes
93
+ # @example
94
+ # scope :error, -> { where state: "error" }
95
+ scope state, -> { where state: state }
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,14 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>RailsRedshiftReplicator</title>
5
+ <%= stylesheet_link_tag "rails_redshift_replicator/application", media: "all" %>
6
+ <%= javascript_include_tag "rails_redshift_replicator/application" %>
7
+ <%= csrf_meta_tags %>
8
+ </head>
9
+ <body>
10
+
11
+ <%= yield %>
12
+
13
+ </body>
14
+ </html>
@@ -0,0 +1,20 @@
1
+ en:
2
+ rails_redshift_replicator:
3
+ uploading_notice: "uploading %{file} to %{key}"
4
+ gzip_notice: "gzip %{file} to %{gzip_file} with command: %{command}"
5
+ exporting_results: "exporting %{counts} results"
6
+ importing_file: "importing %{file} to %{target_table}"
7
+ no_new_records: "No new records for %{table_name} to export"
8
+ missing_indexes: "'%{replication_field}' index is missing on table '%{table_name}'. Export performance can be improved by adding indexes to replication fields"
9
+ missing_table: "Coudn't find table %{table_name} on Redshift"
10
+ missing_replicator_type: Replication type not implemented
11
+ must_specify_tables: "Specify tables to export or use :all for all eligible tables"
12
+ replicable_added: "Added replicable for table '%{table_name}'"
13
+ executing_query: "Executing query with %{adapter}: %{sql}"
14
+ nothing_to_import: "No replication for table '%{table_name}' was pending import"
15
+ table_not_replicable: "The table '%{table_name}' is not registered as replicable"
16
+ max_retries_reached: "The replication #%{id} for table '%{table_name}' reached the maximum number of retries"
17
+ resuming_replication: "Resuming %{action} replication for table '%{table_name}', which was on %{state} state"
18
+ propagating_deletes: "Propagating %{count} deleted records from table '%{table_name}'"
19
+ delete_propagation_error: "Failed to propagate %{count} deleted records from table '%{table_name}'"
20
+ deleting_file: "Deleting file on s3: %{key}"
@@ -0,0 +1,2 @@
1
+ RailsRedshiftReplicator::Engine.routes.draw do
2
+ end
@@ -0,0 +1,24 @@
1
+ # 20160503214955
2
+ class CreateRailsRedshiftReplicatorReplications < ActiveRecord::Migration
3
+ def change
4
+ create_table :rails_redshift_replicator_replications do |t|
5
+ t.string "replication_type"
6
+ t.string "key"
7
+ t.string "state", :default => "enqueued"
8
+ t.string "last_record"
9
+ t.integer "retries", default: 0
10
+ t.text "last_error"
11
+ t.string "source_table"
12
+ t.string "target_table"
13
+ t.integer "slices"
14
+ t.string "first_record"
15
+ t.integer "record_count"
16
+ t.string "export_format"
17
+ t.integer "export_duration"
18
+ t.integer "upload_duration"
19
+ t.integer "import_duration"
20
+ t.datetime "created_at", :null => false
21
+ t.datetime "updated_at", :null => false
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,8 @@
1
+ class CreateTableRailsRedshiftReplicatorDeletedIds < ActiveRecord::Migration
2
+ def change
3
+ create_table :rails_redshift_replicator_deleted_ids, id: false do |t|
4
+ t.string :source_table, index: true
5
+ t.integer :object_id
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,25 @@
1
+ require 'rails/generators/base'
2
+ require 'securerandom'
3
+
4
+ module RailsRedshiftReplicator
5
+ module Generators
6
+
7
+ class InstallGenerator < Rails::Generators::Base
8
+ source_root File.expand_path("../../templates", __FILE__)
9
+
10
+ desc "Creates a RRR initializer and copy locale files to your application."
11
+
12
+ def copy_initializer
13
+ template "rails_redshift_replicator.rb", "config/initializers/rails_redshift_replicator.rb"
14
+ end
15
+
16
+ def copy_locale
17
+ copy_file "../../../config/locales/rails_redshift_replicator.en.yml", "config/locales/rails_redshift_replicator.en.yml"
18
+ end
19
+
20
+ def rails_4?
21
+ Rails::VERSION::MAJOR == 4
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,74 @@
1
+ RailsRedshiftReplicator.setup do |config|
2
+ # RRR already provides a logger pointing to STDOUT, but you can point it to your own logger.
3
+ # Just be sure to make it inherit from RailsRedshiftReplicator::RLogger or you will loose
4
+ # the notifications feature.
5
+ # config.logger = MyLogger.new
6
+
7
+ # Connection parameters for Redshift. Defaults to environment variables.
8
+ config.redshift_connection_params = {
9
+ host: ENV['RRR_REDSHIFT_HOST'],
10
+ dbname: ENV['RRR_REDSHIFT_DATABASE'],
11
+ port: ENV['RRR_REDSHIFT_PORT'],
12
+ user: ENV['RRR_REDSHIFT_USER'],
13
+ password: ENV['RRR_REDSHIFT_PASSWORD']
14
+ }
15
+
16
+ # AWS S3 Replication bucket credentials. Defaults to environment variables.
17
+ config.aws_credentials = {
18
+ key: ENV['RRR_AWS_ACCESS_KEY_ID'],
19
+ secret: ENV['RRR_AWS_SECRET_ACCESS_KEY']
20
+ }
21
+
22
+ # AWS S3 replication bucket parameters.
23
+ # region defaults to environment variable or US East (N. Virginia)
24
+ # bucket defaults to environment variable
25
+ config.s3_bucket_params = {
26
+ region: (ENV['RRR_REPLICATION_REGION'] || 'us-east-1'),
27
+ bucket: ENV['RRR_REPLICATION_BUCKET'],
28
+ prefix: ENV['RRR_REPLICATION_PREFIX']
29
+ }
30
+
31
+ # see [http://docs.aws.amazon.com/redshift/latest/dg/r_COPY.html]
32
+ # You can add other keys aside from changing these.
33
+ # The keys won't be used on the copy commands. Just their values.
34
+ # To remove one of the defaults, set it to nil.
35
+ # @example:
36
+ # @@copy_options = {
37
+ # statupdate: nil,
38
+ # }
39
+ config.copy_options = {
40
+ statupdate: 'STATUPDATE TRUE',
41
+ acceptinvchars: 'ACCEPTINVCHARS',
42
+ empty: 'EMPTYASNULL',
43
+ truncate: 'TRUNCATECOLUMNS'
44
+ }
45
+
46
+ # Number of slices available on Redshift cluster. Used to split export files. Defaults to 1.
47
+ # see [http://docs.aws.amazon.com/redshift/latest/dg/t_splitting-data-files.html]
48
+ config.redshift_slices = 1
49
+
50
+ # Folder to store temporary replication files until the S3 upload. Defaults to /tmp
51
+ config.local_replication_path = '/tmp'
52
+
53
+ # Command or path to executable that splits files
54
+ config.split_command = 'split'
55
+
56
+ # Command or path to executable that compresses files to gzip
57
+ config.gzip_command = 'gzip'
58
+
59
+ # Enable debug mode to output messages to STDOUT. Default to false
60
+ config.debug_mode = false
61
+
62
+ # Defines how many replication records are kept in history. Default to nil keeping full history.
63
+ config.history_cap = nil
64
+
65
+ # Preferred format for export file
66
+ config.preferred_format = 'csv'
67
+
68
+ # Maximum number of retries for a replication before cancelling and starting another
69
+ config.max_retries = 5
70
+
71
+ # If deletes should be tracked and propagated to redshift
72
+ # Take a look at the "A word on tracking deletions" section
73
+ config.enable_delete_tracking = false
74
+ end
@@ -0,0 +1,229 @@
1
+ require 'hair_trigger'
2
+ require 'active_support'
3
+ require "rails_redshift_replicator/engine"
4
+ require 'rails_redshift_replicator/model/extension'
5
+ require 'rails_redshift_replicator/model/hair_trigger_extension'
6
+ require 'rails_redshift_replicator/replicable'
7
+ require 'rails_redshift_replicator/deleter'
8
+ require 'rails_redshift_replicator/rlogger'
9
+ require 'rails_redshift_replicator/file_manager'
10
+
11
+ require 'rails_redshift_replicator/exporters/base'
12
+ require 'rails_redshift_replicator/exporters/identity_replicator'
13
+ require 'rails_redshift_replicator/exporters/timed_replicator'
14
+ require 'rails_redshift_replicator/exporters/full_replicator'
15
+
16
+ require 'rails_redshift_replicator/importers/base'
17
+ require 'rails_redshift_replicator/importers/identity_replicator'
18
+ require 'rails_redshift_replicator/importers/timed_replicator'
19
+ require 'rails_redshift_replicator/importers/full_replicator'
20
+
21
+ require 'rails_redshift_replicator/tools/analyze'
22
+ require 'rails_redshift_replicator/tools/vacuum'
23
+
24
+
25
+ module RailsRedshiftReplicator
26
+ mattr_accessor :replicables, :logger, :redshift_connection_params, :aws_credentials, :s3_bucket_params,
27
+ :redshift_slices, :local_replication_path, :debug_mode, :history_cap,
28
+ :split_command, :gzip_command, :preferred_format, :max_retries, :enable_delete_tracking,
29
+ :delete_s3_file_after_import, :copy_options
30
+
31
+ class << self
32
+
33
+ # @note Useful for testing
34
+ def define_defaults
35
+ @@replicables = {}.with_indifferent_access
36
+ @@logger = RLogger.new(STDOUT).tap{ |l| l.level = Logger::WARN }
37
+
38
+ # Connection parameters for Redshift. Defaults to environment variables.
39
+ @@redshift_connection_params = {
40
+ host: ENV['RRR_REDSHIFT_HOST'],
41
+ dbname: ENV['RRR_REDSHIFT_DATABASE'],
42
+ port: ENV['RRR_REDSHIFT_PORT'],
43
+ user: ENV['RRR_REDSHIFT_USER'],
44
+ password: ENV['RRR_REDSHIFT_PASSWORD']
45
+ }
46
+
47
+ # AWS S3 Replication bucket credentials. Defaults to environment variables.
48
+ @@aws_credentials = {
49
+ key: ENV['RRR_AWS_ACCESS_KEY_ID'],
50
+ secret: ENV['RRR_AWS_SECRET_ACCESS_KEY']
51
+ }
52
+
53
+ # AWS S3 replication bucket parameters.
54
+ # region defaults to environment variable or US East (N. Virginia)
55
+ # bucket defaults to environment variable
56
+ @@s3_bucket_params = {
57
+ region: (ENV['RRR_REPLICATION_REGION'] || 'us-east-1'),
58
+ bucket: ENV['RRR_REPLICATION_BUCKET'],
59
+ prefix: ENV['RRR_REPLICATION_PREFIX']
60
+ }
61
+
62
+ # see [http://docs.aws.amazon.com/redshift/latest/dg/r_COPY.html]
63
+ # You can add other keys aside from changing these.
64
+ # The keys won't be used on the copy commands. Just their values.
65
+ # To remove one of the defaults, set it to nil.
66
+ # @example:
67
+ # @@copy_options = {
68
+ # statupdate: nil,
69
+ # }
70
+ @@copy_options = {
71
+ statupdate: 'STATUPDATE TRUE',
72
+ acceptinvchars: 'ACCEPTINVCHARS',
73
+ empty: 'EMPTYASNULL',
74
+ truncate: 'TRUNCATECOLUMNS'
75
+ }
76
+
77
+ # Number of slices available on Redshift cluster. Used to split export files. Defaults to 1.
78
+ # see [http://docs.aws.amazon.com/redshift/latest/dg/t_splitting-data-files.html]
79
+ @@redshift_slices = 1
80
+
81
+ # Folder to store temporary replication files until the S3 upload. Defaults to /tmp
82
+ @@local_replication_path = '/tmp'
83
+
84
+ # Command or path to executable that splits files
85
+ @@split_command = 'split'
86
+
87
+ # Command or path to executable that compresses files to gzip
88
+ @@gzip_command = 'gzip'
89
+
90
+ # Enable debug mode to output messages to STDOUT. Default to false
91
+ @@debug_mode = false
92
+
93
+ # Defines how many replication records are kept in history. Default to nil keeping full history.
94
+ @@history_cap = nil
95
+
96
+ # Preferred format for export file
97
+ @@preferred_format = 'csv'
98
+
99
+ # Maximum number of retries for a replication before cancelling and starting another
100
+ @@max_retries = nil
101
+
102
+ # If deletes should be tracked and propagated to redshift
103
+ @@enable_delete_tracking = false
104
+
105
+ # If exported files on s3 should be deleted after imported
106
+ @@delete_s3_file_after_import = true
107
+
108
+ return nil
109
+ end
110
+ alias reload define_defaults
111
+
112
+ def debug_mode=(value)
113
+ logger.level = value == true ? Logger::DEBUG : Logger::WARN
114
+ @@debug_mode = value
115
+ end
116
+
117
+ def history_cap=(value)
118
+ @@history_cap = value && [value,2].max
119
+ end
120
+
121
+ # @return [RedshiftReplicator]
122
+ def setup
123
+ yield self
124
+ end
125
+
126
+ def add_replicable(hash)
127
+ logger.debug I18n.t(:replicable_added, table_name: hash.keys.first, scope: :rails_redshift_replicator)
128
+ RailsRedshiftReplicator.replicables.merge! hash
129
+ end
130
+
131
+ def reload_replicables
132
+ replicables = {}
133
+ replicables.each do |name, replicable|
134
+ add_replicable(name => RailsRedshiftReplicator::Replicable.new(replicable.replication_type, replicable.options))
135
+ end
136
+ end
137
+
138
+ # Performs full replication (export + import)
139
+ # @param models [Array<Symbol>, Argument list] activerecord models to export or :all
140
+ # @example Replicate user and post models.
141
+ # RedshiftReplicator.replicate(:user, :publication)
142
+ # @example Replicate all models
143
+ # RedshiftReplicator.replicate(:all)
144
+ def replicate(*tables)
145
+ check_args(tables)
146
+ replicable_definitions(tables_to_perform(tables)).each do |_, replicable|
147
+ replication = replicable.export
148
+ replicable.import
149
+ end
150
+ end
151
+
152
+ # @see .replicate
153
+ def export(*tables)
154
+ check_args(tables)
155
+ replicable_definitions(tables_to_perform(tables)).each { |_, replicable| replicable.export }
156
+ end
157
+
158
+ # @see .replicate
159
+ def import(*tables)
160
+ check_args(tables)
161
+ replicable_definitions(tables_to_perform(tables)).each { |_, replicable| replicable.import }
162
+ end
163
+
164
+ def check_args(tables)
165
+ if tables == []
166
+ error_message = I18n.t(:must_specify_tables, scope: :rails_redshift_replicator)
167
+ logger.error error_message
168
+ raise StandardError.new(error_message)
169
+ end
170
+ end
171
+
172
+ def vacuum(*args)
173
+ Tools::Vacuum.new(*args).perform
174
+ end
175
+
176
+ def analyze(*args)
177
+ Tools::Analyze.new(*args).perform
178
+ end
179
+
180
+ # Lists exporters names
181
+ def base_exporter_types
182
+ [
183
+ 'identity_replicator',
184
+ 'timed_replicator',
185
+ 'full_replicator'
186
+ ]
187
+ end
188
+
189
+ # All replicable tables registered in RailsRedshiftReplicator
190
+ # eighter from the model or directly.
191
+ # @return [Array<String>] tables
192
+ def replicable_tables
193
+ RailsRedshiftReplicator.replicables.keys.map(&:to_s)
194
+ end
195
+
196
+ def replicable_target_tables
197
+ RailsRedshiftReplicator.replicables.map{ |k,v| v[:target_table] }
198
+ end
199
+
200
+ # @retuns [Hash] subset of key pairs of replicables
201
+ def replicable_definitions(tables)
202
+ RailsRedshiftReplicator.replicables.select { |k,_| k.to_s.in? tables.map(&:to_s) }
203
+ end
204
+
205
+ # Returns tables to export. :all selects all eligible
206
+ # @returns [Array<String>] tables to export
207
+ def tables_to_perform(tables)
208
+ tables = Array(tables).map(&:to_s)
209
+ if tables[0] == 'all'
210
+ replicable_tables
211
+ else
212
+ (replicable_tables & tables).tap do |selected|
213
+ warn_if_unreplicable tables-selected
214
+ end
215
+ end
216
+ end
217
+
218
+ def warn_if_unreplicable(tables)
219
+ tables.each { |table| logger.warn I18n.t(:table_not_replicable, table_name: table, scope: :rails_redshift_replicator) }
220
+ end
221
+
222
+ # Redshift connection
223
+ # @return [PG::Connection]
224
+ def connection
225
+ @redshift ||= PG.connect(redshift_connection_params)
226
+ end
227
+ end
228
+ end
229
+ RailsRedshiftReplicator.define_defaults