db_blaster 0.1.1 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d612afe45176a37521310c9f694cb2d4b0efe1d42406da82f04bf5fca3187b86
4
- data.tar.gz: 25aae3fae57025cfcc72d14e192b21950c73baa804b4270c7de2c1377bc09686
3
+ metadata.gz: 2de00cf222282fde0e0a3e38078ccc398a80ff19c41ccf7b79ecd043450db6b1
4
+ data.tar.gz: 66b93222044a44bcd7dd35a6233de5c4839a58d5106e527a7d540be2daee9821
5
5
  SHA512:
6
- metadata.gz: 28412dbbfae7b89fe57afd4e40fd4bc43bfb2c126a5e3fc67e632d776017f07d2f15d7d70da537344e59aa3021bff30131f905305d00c0784ab156a71185efe9
7
- data.tar.gz: 49c856817e7c650bb36b5a74b3395fcdcd075439c2831aebe720eeda381fcc5e36fd08fbacdb1a4c5bd2fde2ec86e647a14a9a1975a023cc40e76019665e7bb0
6
+ metadata.gz: 7305baf42dff923be85d8105625bf3ccf8200988d7895c80202a32232b062a9c613863a002a7b8e57d7e57a3660725ab4ed6765d2b2d3ba49f3b216da2614dac
7
+ data.tar.gz: 8652430ce86f2ee275997ec35e52bf2ba99befa4c3a8630b13e0ca57f309d4b0fb013974806e52882bc37bd95a6c4e9e9d4ea51fa5a2096d2767df0d3608b796
data/README.md CHANGED
@@ -2,15 +2,15 @@
2
2
  # DbBlaster
3
3
  ![Image of DB to SNS](https://lucid.app/publicSegments/view/c70feed3-2f48-46ee-8734-423474488feb/image.png)
4
4
 
5
- DbBlaster publishes changed database rows to AWS SNS. The first time `DbBlaster::PublishAllJob.perform_later` is ran,
6
- the entire database will be incrementally published to SNS. Subsequent runs will publish rows whose `updated_at` column
5
+ DbBlaster can either publish changed database rows to AWS SNS or push the changes to S#. The first time `DbBlaster::PublishAllJob.perform_later` is ran,
6
+ the entire database will be incrementally published. Subsequent runs will publish rows whose `updated_at` column
7
7
  is more recent than the last run.
8
8
 
9
9
  Consuming the published messages is functionality not provided by DbBlaster.
10
10
 
11
11
  ## Usage
12
12
 
13
- Update `config/initializers/db_blaster_config.rb` with valid AWS credentials, topics, and options.
13
+ Update `config/initializers/db_blaster_config.rb` with valid AWS credentials and options. Either `sns_topic` or `s3_bucket` must be set!
14
14
 
15
15
  Schedule `DbBlaster::PublishAllJob.perform_later` to run periodically with something
16
16
  like [sidekiq-cron](https://github.com/ondrejbartas/sidekiq-cron) or [whenever](https://github.com/javan/whenever)
@@ -13,8 +13,12 @@ module DbBlaster
13
13
  .build_all(DbBlaster.configuration))
14
14
 
15
15
  DbBlaster::SourceTable.pluck(:id).each do |source_table_id|
16
- PublishSourceTableJob.perform_later(source_table_id)
16
+ PublishSourceTableJob.perform_later(source_table_id, batch_start_time)
17
17
  end
18
18
  end
19
+
20
+ def batch_start_time
21
+ @batch_start_time ||= DateTime.now.utc.strftime(DbBlaster::Configuration::DEFAULT_DATETIME_FORMAT)
22
+ end
19
23
  end
20
24
  end
@@ -6,11 +6,11 @@ module DbBlaster
6
6
  class PublishSourceTableJob < ApplicationJob
7
7
  queue_as 'default'
8
8
 
9
- def perform(source_table_id)
9
+ def perform(source_table_id, batch_start_time)
10
10
  source_table = SourceTable.find_by(id: source_table_id)
11
11
  return unless source_table
12
12
 
13
- PublishSourceTable.execute(source_table)
13
+ PublishSourceTable.execute(source_table: source_table, batch_start_time: batch_start_time)
14
14
  end
15
15
  end
16
16
  end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ # publish records to SNS topic
4
+ module DbBlaster
5
+ # Base class for publishing
6
+ class BasePublisher
7
+ attr_reader :source_table, :records, :batch_start_time
8
+
9
+ def initialize(source_table, records, batch_start_time)
10
+ @source_table = source_table
11
+ @records = records
12
+ @batch_start_time = batch_start_time
13
+ end
14
+
15
+ def self.publish(source_table:, records:, batch_start_time:)
16
+ publisher_class =
17
+ if DbBlaster.configuration.sns_topic
18
+ SnsPublisher
19
+ else
20
+ S3Publisher
21
+ end
22
+ publisher_class.new(source_table, records, batch_start_time).publish
23
+ end
24
+
25
+ def publish
26
+ raise NotImplementedError
27
+ end
28
+ end
29
+ end
@@ -2,17 +2,55 @@
2
2
 
3
3
  module DbBlaster
4
4
  # Configuration class for providing credentials, topics, and customizations.
5
+ # Either the `sns_topic` or `s3_bucket' must be set
5
6
  class Configuration
6
7
  DEFAULT_BATCH_SIZE = 100
7
8
  DEFAULT_MAX_MESSAGE_SIZE_IN_KILOBYTES = 256 # max size allowed by AWS SNS
9
+ DEFAULT_S3_KEY = '<batch_date>/<batch_time>/db_blaster/<table_name>/<uuid>.json'
10
+ DEFAULT_DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%LZ'
8
11
 
9
12
  # The required configuration fields
10
- REQUIRED_FIELDS = %i[aws_access_key aws_access_secret aws_region sns_topic].freeze
13
+ REQUIRED_FIELDS = %i[aws_access_key aws_access_secret aws_region].freeze
14
+
15
+ # exactly one of these fields needs to be set
16
+ EITHER_OR_FIELDS = %i[sns_topic s3_bucket].freeze
11
17
 
12
18
  # The topic to which messages will be published
13
19
  attr_accessor :sns_topic
20
+ # The s3 bucket name
21
+ attr_accessor :s3_bucket
14
22
  attr_accessor :aws_access_key, :aws_access_secret, :aws_region
15
23
 
24
+ # Optional
25
+ # Applicable only when `sns_topic` is set
26
+ # Extra [SNS message_attributes](https://docs.aws.amazon.com/sns/latest/dg/sns-message-attributes.html)
27
+ # Attributes set here will be included in every published message
28
+ # example: config.extra_sns_message_attributes = {'infra_id' => {data_type: 'String', value: '061'}}
29
+ attr_accessor :extra_sns_message_attributes
30
+
31
+ # Optional
32
+ # Applicable only when `s3_bucket' is set
33
+ # The value set here will be included in every payload pushed to S3
34
+ # example: config.s3_meta = {'infra_id' => '061', 'source_app' => 'kcp-api'}}
35
+ # The resulting JSON will include the `meta` merged into every record
36
+ attr_accessor :s3_meta
37
+
38
+ # Optional
39
+ # Applicable only when `s3_bucket` is set
40
+ # The S3 key. The following values will get substituted:
41
+ # <batch_timestamp> - a timestamp signifying the beginning of the batch processing
42
+ # <timestamp> - the current time
43
+ # <table_name> - the name of the table associated with the S3 body
44
+ # <uuid> - a universal identifier
45
+ # '<batch_timestamp>/kcp-api/001/<table_name>/<uuid>.json'
46
+ attr_accessor :s3_key
47
+
48
+ # Optional
49
+ # Applicable only when `s3_bucket` is set
50
+ # S3 Tags
51
+ # example: config.s3_tags = { infra_id: '001', source_app: 'kcp-api', source_table: 'meetings' }
52
+ attr_accessor :s3_tags
53
+
16
54
  # Global list of column names not to include in published SNS messages
17
55
  # example: config.ignored_column_names = ['email', 'phone_number']
18
56
  attr_accessor :ignored_column_names
@@ -22,6 +60,11 @@ module DbBlaster
22
60
  # example: config.only_source_tables = ['posts', 'tags', 'comments']
23
61
  attr_accessor :only_source_tables
24
62
 
63
+ # Optional
64
+ # If set, ignore source tables specified.
65
+ # example: config.ignore_source_tables = ['active_storage_blobs']
66
+ attr_accessor :ignore_source_tables
67
+
25
68
  # Optional
26
69
  # Customize batch_size and/or ignored_columns
27
70
  # example:
@@ -29,12 +72,6 @@ module DbBlaster
29
72
  # { source_table_name: 'comments', ignored_column_names: ['tags'] }]
30
73
  attr_accessor :source_table_options
31
74
 
32
- # Optional
33
- # Extra [SNS message_attributes](https://docs.aws.amazon.com/sns/latest/dg/sns-message-attributes.html)
34
- # Attributes set here will be included in every published message
35
- # example: config.extra_sns_message_attributes = {'infra_id' => {data_type: 'String', value: '061'}}
36
- attr_accessor :extra_sns_message_attributes
37
-
38
75
  # Optional
39
76
  # db_blaster will select and then publish `batch_size` rows at a time
40
77
  # Default value is 100
@@ -47,12 +84,22 @@ module DbBlaster
47
84
 
48
85
  # Raises error if a required field is not set
49
86
  def verify!
50
- no_values = REQUIRED_FIELDS.select do |attribute|
87
+ verify_required
88
+ verify_either_or
89
+ end
90
+
91
+ def verify_either_or
92
+ either_or = EITHER_OR_FIELDS.select do |attribute|
51
93
  send(attribute).nil? || send(attribute).strip.empty?
52
94
  end
53
- return if no_values.empty?
95
+ raise "only one of [#{either_or.join(', ')}] should be set" unless either_or.length == 1
96
+ end
54
97
 
55
- raise "missing configuration values for [#{no_values.join(', ')}]"
98
+ def verify_required
99
+ no_values = REQUIRED_FIELDS.select do |attribute|
100
+ send(attribute).nil? || send(attribute).strip.empty?
101
+ end
102
+ raise "missing configuration values for [#{no_values.join(', ')}]" unless no_values.empty?
56
103
  end
57
104
  end
58
105
  end
@@ -4,14 +4,15 @@ module DbBlaster
4
4
  # Given a `source_table` providing the table name,
5
5
  # finds rows in `batch_size` chunks that are published to SNS
6
6
  class PublishSourceTable
7
- attr_reader :source_table
7
+ attr_reader :source_table, :batch_start_time
8
8
 
9
- def initialize(source_table)
9
+ def initialize(source_table, batch_start_time)
10
10
  @source_table = source_table
11
+ @batch_start_time = batch_start_time
11
12
  end
12
13
 
13
- def self.execute(source_table)
14
- new(source_table).execute
14
+ def self.execute(source_table:, batch_start_time:)
15
+ new(source_table, batch_start_time).execute
15
16
  end
16
17
 
17
18
  def execute
@@ -20,7 +21,7 @@ module DbBlaster
20
21
  # pessimistically lock row for the duration
21
22
  source_table.with_lock do
22
23
  Finder.find(source_table) do |records|
23
- Publisher.publish(source_table, records)
24
+ BasePublisher.publish(source_table: source_table, records: records, batch_start_time: batch_start_time)
24
25
  source_table.update(last_published_updated_at: records.last['updated_at'])
25
26
  end
26
27
  end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DbBlaster
4
+ # Builds the key to be used for the uploaded S3 Object
5
+ class S3KeyBuilder
6
+ attr_reader :source_table_name, :batch_start_time
7
+
8
+ def initialize(source_table_name, batch_start_time)
9
+ @source_table_name = source_table_name
10
+ @batch_start_time = batch_start_time
11
+ end
12
+
13
+ def self.build(source_table_name:, batch_start_time:)
14
+ new(source_table_name, batch_start_time).build
15
+ end
16
+
17
+ def build
18
+ key = starting_key
19
+ substitutions.each do |replace, value|
20
+ key = key.gsub(replace, value)
21
+ end
22
+ key
23
+ end
24
+
25
+ def substitutions
26
+ date, time = batch_start_time.split('T')
27
+ { '<batch_date_time>' => batch_start_time,
28
+ '<batch_date>' => date,
29
+ '<batch_time>' => time,
30
+ '<uuid>' => SecureRandom.uuid,
31
+ '<table_name>' => source_table_name }
32
+ end
33
+
34
+ def starting_key
35
+ DbBlaster.configuration.s3_key.presence || Configuration::DEFAULT_S3_KEY
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'aws-sdk-s3'
4
+
5
+ module DbBlaster
6
+ # Pushes records to S3
7
+ class S3Publisher < BasePublisher
8
+ def publish
9
+ client.put_object(bucket: DbBlaster.configuration.s3_bucket,
10
+ key: S3KeyBuilder.build(source_table_name: source_table.name,
11
+ batch_start_time: batch_start_time),
12
+ tagging: tagging,
13
+ body: content.to_json)
14
+ end
15
+
16
+ def content
17
+ meta_records
18
+ end
19
+
20
+ def tagging
21
+ URI.encode_www_form(tags_hash)
22
+ end
23
+
24
+ def tags_hash
25
+ @tags_hash ||= { source_table: source_table.name }
26
+ .merge(DbBlaster.configuration.s3_tags.presence || {})
27
+ end
28
+
29
+ def meta
30
+ @meta ||= (DbBlaster.configuration.s3_meta.presence || {}).merge(source_table: source_table.name)
31
+ end
32
+
33
+ def meta_records
34
+ records.collect { |record| record.merge(meta) }
35
+ end
36
+
37
+ def client
38
+ @client ||= Aws::S3::Client.new(region: DbBlaster.configuration.aws_region,
39
+ credentials: Aws::Credentials.new(DbBlaster.configuration.aws_access_key,
40
+ DbBlaster.configuration.aws_access_secret))
41
+ end
42
+ end
43
+ end
@@ -1,20 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # publish records to SNS topic
4
- module DbBlaster
5
- # Publishes records to AWS SNS
6
- class Publisher
7
- attr_reader :source_table, :records
8
-
9
- def initialize(source_table, records)
10
- @source_table = source_table
11
- @records = records
12
- end
3
+ require 'aws-sdk-sns'
13
4
 
14
- def self.publish(source_table, records)
15
- new(source_table, records).publish
16
- end
5
+ # frozen_string_literal: true
17
6
 
7
+ module DbBlaster
8
+ # Publishes records to AWS SNS
9
+ class SnsPublisher < BasePublisher
18
10
  def publish
19
11
  topic.publish(message_attributes: message_attributes,
20
12
  message: records.to_json)
@@ -45,11 +45,12 @@ module DbBlaster
45
45
  end
46
46
 
47
47
  def table_names_for_configuration
48
- if configuration.only_source_tables&.length&.positive?
49
- available_tables & configuration.only_source_tables
50
- else
51
- available_tables
52
- end
48
+ table_names = if configuration.only_source_tables&.length&.positive?
49
+ available_tables & configuration.only_source_tables
50
+ else
51
+ available_tables
52
+ end
53
+ table_names - (configuration.ignore_source_tables || [])
53
54
  end
54
55
  end
55
56
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DbBlaster
4
- VERSION = '0.1.1'
4
+ VERSION = '0.1.5'
5
5
  end
data/lib/db_blaster.rb CHANGED
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'aws-sdk-sns'
4
3
  require 'db_blaster/version'
5
4
  require 'db_blaster/engine'
6
5
  require 'db_blaster/one_record_too_large_error'
@@ -8,7 +7,10 @@ require 'db_blaster/available_tables'
8
7
  require 'db_blaster/configuration'
9
8
  require 'db_blaster/source_table_configuration'
10
9
  require 'db_blaster/source_table_configuration_builder'
11
- require 'db_blaster/publisher'
10
+ require 'db_blaster/base_publisher'
11
+ require 'db_blaster/s3_key_builder'
12
+ require 'db_blaster/s3_publisher'
13
+ require 'db_blaster/sns_publisher'
12
14
  require 'db_blaster/publish_source_table'
13
15
  require 'db_blaster/chunker'
14
16
  require 'db_blaster/finder_sql'
@@ -1,12 +1,40 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # Either `sns_topic` or `s3_bucket` must be set
3
4
  DbBlaster.configure do |config|
4
- # SNS topic to receive database changes
5
+ # SNS topic to receive database changes. Either `sns_topic` or `s3_bucket` must be set
5
6
  config.sns_topic = 'the-topic'
6
7
  config.aws_access_key = 'access-key'
7
8
  config.aws_access_secret = 'secret'
8
9
  config.aws_region = 'region'
9
10
 
11
+ # Optional
12
+ # Applicable only when `sns_topic` is set
13
+ # Extra [SNS message_attributes](https://docs.aws.amazon.com/sns/latest/dg/sns-message-attributes.html)
14
+ # Attributes set here will be included in every published message
15
+ # config.extra_sns_message_attributes = {'infra_id' => {data_type: 'String', value: '061'}}
16
+
17
+ # S3 bucket where JSON will be pushed. Either `sns_topic` or `s3_bucket` must be set
18
+ # config.s3_bucket = 'bucket-name'
19
+
20
+ # Optional
21
+ # Applicable only when `s3_bucket` is set
22
+ # The S3 key path. The following values will get substituted:
23
+ # <batch_date_time> - a timestamp signifying the beginning of the batch processing
24
+ # <batch_date> - a date signifying the beginning of the batch processing
25
+ # <batch_time> - a time signifying the beginning of the batch processing
26
+ # <timestamp> - the current time
27
+ # <table_name> - the name of the table associated with the S3 body
28
+ # <uuid> - a universal identifier
29
+ # config.s3_key = '<batch_timestamp>/kcp-api/001/<table_name>/<uuid>.json'
30
+
31
+ # Optional
32
+ # Applicable only when `s3_bucket' is set
33
+ # Extra meta values sent along with each payload
34
+ # example: config.s3_meta = {'infra_id' => '061'}
35
+ # The resulting JSON will include the `meta` merged into every record.
36
+ # config.s3_meta = {'infra_id' => '061'}
37
+
10
38
  # Optional
11
39
  # db_blaster will select and then publish `batch_size` rows at a time
12
40
  # config.batch_size = 100
@@ -14,12 +42,7 @@ DbBlaster.configure do |config|
14
42
  # Optional
15
43
  # db_blaster will publish no messages larger than this value
16
44
  # Default value is 256
17
- # attr_accessor :max_message_size_in_kilobytes
18
-
19
- # Optional
20
- # Extra [SNS message_attributes](https://docs.aws.amazon.com/sns/latest/dg/sns-message-attributes.html)
21
- # Attributes set here will be included in every published message
22
- # config.extra_sns_message_attributes = {'infra_id' => {data_type: 'String', value: '061'}}
45
+ # config.max_message_size_in_kilobytes = 256
23
46
 
24
47
  # Global list of column names not to include in published SNS messages
25
48
  # example: config.ignored_column_names = ['email', 'phone_number']
@@ -29,6 +52,10 @@ DbBlaster.configure do |config|
29
52
  # If set, only publish tables specified.
30
53
  # config.only_source_tables = ['posts', 'tags', 'comments']
31
54
 
55
+ # Optional
56
+ # If set, ignore source tables specified.
57
+ # config.ignore_source_tables = ['active_storage_blobs']
58
+
32
59
  # Optional
33
60
  # Customize batch_size and/or ignored_columns
34
61
  # example:
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: db_blaster
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Perry Hertler
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-11 00:00:00.000000000 Z
11
+ date: 2021-09-02 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: aws-sdk-s3
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: aws-sdk-sns
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -119,6 +133,7 @@ files:
119
133
  - db/migrate/20210727222252_create_source_tables.rb
120
134
  - lib/db_blaster.rb
121
135
  - lib/db_blaster/available_tables.rb
136
+ - lib/db_blaster/base_publisher.rb
122
137
  - lib/db_blaster/chunker.rb
123
138
  - lib/db_blaster/configuration.rb
124
139
  - lib/db_blaster/engine.rb
@@ -126,8 +141,10 @@ files:
126
141
  - lib/db_blaster/finder_sql.rb
127
142
  - lib/db_blaster/one_record_too_large_error.rb
128
143
  - lib/db_blaster/publish_source_table.rb
129
- - lib/db_blaster/publisher.rb
130
144
  - lib/db_blaster/rspec.rb
145
+ - lib/db_blaster/s3_key_builder.rb
146
+ - lib/db_blaster/s3_publisher.rb
147
+ - lib/db_blaster/sns_publisher.rb
131
148
  - lib/db_blaster/source_table_configuration.rb
132
149
  - lib/db_blaster/source_table_configuration_builder.rb
133
150
  - lib/db_blaster/version.rb