db_blaster 0.1.1 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d612afe45176a37521310c9f694cb2d4b0efe1d42406da82f04bf5fca3187b86
4
- data.tar.gz: 25aae3fae57025cfcc72d14e192b21950c73baa804b4270c7de2c1377bc09686
3
+ metadata.gz: 2de00cf222282fde0e0a3e38078ccc398a80ff19c41ccf7b79ecd043450db6b1
4
+ data.tar.gz: 66b93222044a44bcd7dd35a6233de5c4839a58d5106e527a7d540be2daee9821
5
5
  SHA512:
6
- metadata.gz: 28412dbbfae7b89fe57afd4e40fd4bc43bfb2c126a5e3fc67e632d776017f07d2f15d7d70da537344e59aa3021bff30131f905305d00c0784ab156a71185efe9
7
- data.tar.gz: 49c856817e7c650bb36b5a74b3395fcdcd075439c2831aebe720eeda381fcc5e36fd08fbacdb1a4c5bd2fde2ec86e647a14a9a1975a023cc40e76019665e7bb0
6
+ metadata.gz: 7305baf42dff923be85d8105625bf3ccf8200988d7895c80202a32232b062a9c613863a002a7b8e57d7e57a3660725ab4ed6765d2b2d3ba49f3b216da2614dac
7
+ data.tar.gz: 8652430ce86f2ee275997ec35e52bf2ba99befa4c3a8630b13e0ca57f309d4b0fb013974806e52882bc37bd95a6c4e9e9d4ea51fa5a2096d2767df0d3608b796
data/README.md CHANGED
@@ -2,15 +2,15 @@
2
2
  # DbBlaster
3
3
  ![Image of DB to SNS](https://lucid.app/publicSegments/view/c70feed3-2f48-46ee-8734-423474488feb/image.png)
4
4
 
5
- DbBlaster publishes changed database rows to AWS SNS. The first time `DbBlaster::PublishAllJob.perform_later` is ran,
6
- the entire database will be incrementally published to SNS. Subsequent runs will publish rows whose `updated_at` column
5
+ DbBlaster can either publish changed database rows to AWS SNS or push the changes to S#. The first time `DbBlaster::PublishAllJob.perform_later` is ran,
6
+ the entire database will be incrementally published. Subsequent runs will publish rows whose `updated_at` column
7
7
  is more recent than the last run.
8
8
 
9
9
  Consuming the published messages is functionality not provided by DbBlaster.
10
10
 
11
11
  ## Usage
12
12
 
13
- Update `config/initializers/db_blaster_config.rb` with valid AWS credentials, topics, and options.
13
+ Update `config/initializers/db_blaster_config.rb` with valid AWS credentials and options. Either `sns_topic` or `s3_bucket` must be set!
14
14
 
15
15
  Schedule `DbBlaster::PublishAllJob.perform_later` to run periodically with something
16
16
  like [sidekiq-cron](https://github.com/ondrejbartas/sidekiq-cron) or [whenever](https://github.com/javan/whenever)
@@ -13,8 +13,12 @@ module DbBlaster
13
13
  .build_all(DbBlaster.configuration))
14
14
 
15
15
  DbBlaster::SourceTable.pluck(:id).each do |source_table_id|
16
- PublishSourceTableJob.perform_later(source_table_id)
16
+ PublishSourceTableJob.perform_later(source_table_id, batch_start_time)
17
17
  end
18
18
  end
19
+
20
+ def batch_start_time
21
+ @batch_start_time ||= DateTime.now.utc.strftime(DbBlaster::Configuration::DEFAULT_DATETIME_FORMAT)
22
+ end
19
23
  end
20
24
  end
@@ -6,11 +6,11 @@ module DbBlaster
6
6
  class PublishSourceTableJob < ApplicationJob
7
7
  queue_as 'default'
8
8
 
9
- def perform(source_table_id)
9
+ def perform(source_table_id, batch_start_time)
10
10
  source_table = SourceTable.find_by(id: source_table_id)
11
11
  return unless source_table
12
12
 
13
- PublishSourceTable.execute(source_table)
13
+ PublishSourceTable.execute(source_table: source_table, batch_start_time: batch_start_time)
14
14
  end
15
15
  end
16
16
  end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ # publish records to SNS topic
4
+ module DbBlaster
5
+ # Base class for publishing
6
+ class BasePublisher
7
+ attr_reader :source_table, :records, :batch_start_time
8
+
9
+ def initialize(source_table, records, batch_start_time)
10
+ @source_table = source_table
11
+ @records = records
12
+ @batch_start_time = batch_start_time
13
+ end
14
+
15
+ def self.publish(source_table:, records:, batch_start_time:)
16
+ publisher_class =
17
+ if DbBlaster.configuration.sns_topic
18
+ SnsPublisher
19
+ else
20
+ S3Publisher
21
+ end
22
+ publisher_class.new(source_table, records, batch_start_time).publish
23
+ end
24
+
25
+ def publish
26
+ raise NotImplementedError
27
+ end
28
+ end
29
+ end
@@ -2,17 +2,55 @@
2
2
 
3
3
  module DbBlaster
4
4
  # Configuration class for providing credentials, topics, and customizations.
5
+ # Either the `sns_topic` or `s3_bucket' must be set
5
6
  class Configuration
6
7
  DEFAULT_BATCH_SIZE = 100
7
8
  DEFAULT_MAX_MESSAGE_SIZE_IN_KILOBYTES = 256 # max size allowed by AWS SNS
9
+ DEFAULT_S3_KEY = '<batch_date>/<batch_time>/db_blaster/<table_name>/<uuid>.json'
10
+ DEFAULT_DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%LZ'
8
11
 
9
12
  # The required configuration fields
10
- REQUIRED_FIELDS = %i[aws_access_key aws_access_secret aws_region sns_topic].freeze
13
+ REQUIRED_FIELDS = %i[aws_access_key aws_access_secret aws_region].freeze
14
+
15
+ # exactly one of these fields needs to be set
16
+ EITHER_OR_FIELDS = %i[sns_topic s3_bucket].freeze
11
17
 
12
18
  # The topic to which messages will be published
13
19
  attr_accessor :sns_topic
20
+ # The s3 bucket name
21
+ attr_accessor :s3_bucket
14
22
  attr_accessor :aws_access_key, :aws_access_secret, :aws_region
15
23
 
24
+ # Optional
25
+ # Applicable only when `sns_topic` is set
26
+ # Extra [SNS message_attributes](https://docs.aws.amazon.com/sns/latest/dg/sns-message-attributes.html)
27
+ # Attributes set here will be included in every published message
28
+ # example: config.extra_sns_message_attributes = {'infra_id' => {data_type: 'String', value: '061'}}
29
+ attr_accessor :extra_sns_message_attributes
30
+
31
+ # Optional
32
+ # Applicable only when `s3_bucket' is set
33
+ # The value set here will be included in every payload pushed to S3
34
+ # example: config.s3_meta = {'infra_id' => '061', 'source_app' => 'kcp-api'}}
35
+ # The resulting JSON will include the `meta` merged into every record
36
+ attr_accessor :s3_meta
37
+
38
+ # Optional
39
+ # Applicable only when `s3_bucket` is set
40
+ # The S3 key. The following values will get substituted:
41
+ # <batch_timestamp> - a timestamp signifying the beginning of the batch processing
42
+ # <timestamp> - the current time
43
+ # <table_name> - the name of the table associated with the S3 body
44
+ # <uuid> - a universal identifier
45
+ # '<batch_timestamp>/kcp-api/001/<table_name>/<uuid>.json'
46
+ attr_accessor :s3_key
47
+
48
+ # Optional
49
+ # Applicable only when `s3_bucket` is set
50
+ # S3 Tags
51
+ # example: config.s3_tags = { infra_id: '001', source_app: 'kcp-api', source_table: 'meetings' }
52
+ attr_accessor :s3_tags
53
+
16
54
  # Global list of column names not to include in published SNS messages
17
55
  # example: config.ignored_column_names = ['email', 'phone_number']
18
56
  attr_accessor :ignored_column_names
@@ -22,6 +60,11 @@ module DbBlaster
22
60
  # example: config.only_source_tables = ['posts', 'tags', 'comments']
23
61
  attr_accessor :only_source_tables
24
62
 
63
+ # Optional
64
+ # If set, ignore source tables specified.
65
+ # example: config.ignore_source_tables = ['active_storage_blobs']
66
+ attr_accessor :ignore_source_tables
67
+
25
68
  # Optional
26
69
  # Customize batch_size and/or ignored_columns
27
70
  # example:
@@ -29,12 +72,6 @@ module DbBlaster
29
72
  # { source_table_name: 'comments', ignored_column_names: ['tags'] }]
30
73
  attr_accessor :source_table_options
31
74
 
32
- # Optional
33
- # Extra [SNS message_attributes](https://docs.aws.amazon.com/sns/latest/dg/sns-message-attributes.html)
34
- # Attributes set here will be included in every published message
35
- # example: config.extra_sns_message_attributes = {'infra_id' => {data_type: 'String', value: '061'}}
36
- attr_accessor :extra_sns_message_attributes
37
-
38
75
  # Optional
39
76
  # db_blaster will select and then publish `batch_size` rows at a time
40
77
  # Default value is 100
@@ -47,12 +84,22 @@ module DbBlaster
47
84
 
48
85
  # Raises error if a required field is not set
49
86
  def verify!
50
- no_values = REQUIRED_FIELDS.select do |attribute|
87
+ verify_required
88
+ verify_either_or
89
+ end
90
+
91
+ def verify_either_or
92
+ either_or = EITHER_OR_FIELDS.select do |attribute|
51
93
  send(attribute).nil? || send(attribute).strip.empty?
52
94
  end
53
- return if no_values.empty?
95
+ raise "only one of [#{either_or.join(', ')}] should be set" unless either_or.length == 1
96
+ end
54
97
 
55
- raise "missing configuration values for [#{no_values.join(', ')}]"
98
+ def verify_required
99
+ no_values = REQUIRED_FIELDS.select do |attribute|
100
+ send(attribute).nil? || send(attribute).strip.empty?
101
+ end
102
+ raise "missing configuration values for [#{no_values.join(', ')}]" unless no_values.empty?
56
103
  end
57
104
  end
58
105
  end
@@ -4,14 +4,15 @@ module DbBlaster
4
4
  # Given a `source_table` providing the table name,
5
5
  # finds rows in `batch_size` chunks that are published to SNS
6
6
  class PublishSourceTable
7
- attr_reader :source_table
7
+ attr_reader :source_table, :batch_start_time
8
8
 
9
- def initialize(source_table)
9
+ def initialize(source_table, batch_start_time)
10
10
  @source_table = source_table
11
+ @batch_start_time = batch_start_time
11
12
  end
12
13
 
13
- def self.execute(source_table)
14
- new(source_table).execute
14
+ def self.execute(source_table:, batch_start_time:)
15
+ new(source_table, batch_start_time).execute
15
16
  end
16
17
 
17
18
  def execute
@@ -20,7 +21,7 @@ module DbBlaster
20
21
  # pessimistically lock row for the duration
21
22
  source_table.with_lock do
22
23
  Finder.find(source_table) do |records|
23
- Publisher.publish(source_table, records)
24
+ BasePublisher.publish(source_table: source_table, records: records, batch_start_time: batch_start_time)
24
25
  source_table.update(last_published_updated_at: records.last['updated_at'])
25
26
  end
26
27
  end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DbBlaster
4
+ # Builds the key to be used for the uploaded S3 Object
5
+ class S3KeyBuilder
6
+ attr_reader :source_table_name, :batch_start_time
7
+
8
+ def initialize(source_table_name, batch_start_time)
9
+ @source_table_name = source_table_name
10
+ @batch_start_time = batch_start_time
11
+ end
12
+
13
+ def self.build(source_table_name:, batch_start_time:)
14
+ new(source_table_name, batch_start_time).build
15
+ end
16
+
17
+ def build
18
+ key = starting_key
19
+ substitutions.each do |replace, value|
20
+ key = key.gsub(replace, value)
21
+ end
22
+ key
23
+ end
24
+
25
+ def substitutions
26
+ date, time = batch_start_time.split('T')
27
+ { '<batch_date_time>' => batch_start_time,
28
+ '<batch_date>' => date,
29
+ '<batch_time>' => time,
30
+ '<uuid>' => SecureRandom.uuid,
31
+ '<table_name>' => source_table_name }
32
+ end
33
+
34
+ def starting_key
35
+ DbBlaster.configuration.s3_key.presence || Configuration::DEFAULT_S3_KEY
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'aws-sdk-s3'
4
+
5
+ module DbBlaster
6
+ # Pushes records to S3
7
+ class S3Publisher < BasePublisher
8
+ def publish
9
+ client.put_object(bucket: DbBlaster.configuration.s3_bucket,
10
+ key: S3KeyBuilder.build(source_table_name: source_table.name,
11
+ batch_start_time: batch_start_time),
12
+ tagging: tagging,
13
+ body: content.to_json)
14
+ end
15
+
16
+ def content
17
+ meta_records
18
+ end
19
+
20
+ def tagging
21
+ URI.encode_www_form(tags_hash)
22
+ end
23
+
24
+ def tags_hash
25
+ @tags_hash ||= { source_table: source_table.name }
26
+ .merge(DbBlaster.configuration.s3_tags.presence || {})
27
+ end
28
+
29
+ def meta
30
+ @meta ||= (DbBlaster.configuration.s3_meta.presence || {}).merge(source_table: source_table.name)
31
+ end
32
+
33
+ def meta_records
34
+ records.collect { |record| record.merge(meta) }
35
+ end
36
+
37
+ def client
38
+ @client ||= Aws::S3::Client.new(region: DbBlaster.configuration.aws_region,
39
+ credentials: Aws::Credentials.new(DbBlaster.configuration.aws_access_key,
40
+ DbBlaster.configuration.aws_access_secret))
41
+ end
42
+ end
43
+ end
@@ -1,20 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # publish records to SNS topic
4
- module DbBlaster
5
- # Publishes records to AWS SNS
6
- class Publisher
7
- attr_reader :source_table, :records
8
-
9
- def initialize(source_table, records)
10
- @source_table = source_table
11
- @records = records
12
- end
3
+ require 'aws-sdk-sns'
13
4
 
14
- def self.publish(source_table, records)
15
- new(source_table, records).publish
16
- end
5
+ # frozen_string_literal: true
17
6
 
7
+ module DbBlaster
8
+ # Publishes records to AWS SNS
9
+ class SnsPublisher < BasePublisher
18
10
  def publish
19
11
  topic.publish(message_attributes: message_attributes,
20
12
  message: records.to_json)
@@ -45,11 +45,12 @@ module DbBlaster
45
45
  end
46
46
 
47
47
  def table_names_for_configuration
48
- if configuration.only_source_tables&.length&.positive?
49
- available_tables & configuration.only_source_tables
50
- else
51
- available_tables
52
- end
48
+ table_names = if configuration.only_source_tables&.length&.positive?
49
+ available_tables & configuration.only_source_tables
50
+ else
51
+ available_tables
52
+ end
53
+ table_names - (configuration.ignore_source_tables || [])
53
54
  end
54
55
  end
55
56
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DbBlaster
4
- VERSION = '0.1.1'
4
+ VERSION = '0.1.5'
5
5
  end
data/lib/db_blaster.rb CHANGED
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'aws-sdk-sns'
4
3
  require 'db_blaster/version'
5
4
  require 'db_blaster/engine'
6
5
  require 'db_blaster/one_record_too_large_error'
@@ -8,7 +7,10 @@ require 'db_blaster/available_tables'
8
7
  require 'db_blaster/configuration'
9
8
  require 'db_blaster/source_table_configuration'
10
9
  require 'db_blaster/source_table_configuration_builder'
11
- require 'db_blaster/publisher'
10
+ require 'db_blaster/base_publisher'
11
+ require 'db_blaster/s3_key_builder'
12
+ require 'db_blaster/s3_publisher'
13
+ require 'db_blaster/sns_publisher'
12
14
  require 'db_blaster/publish_source_table'
13
15
  require 'db_blaster/chunker'
14
16
  require 'db_blaster/finder_sql'
@@ -1,12 +1,40 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # Either `sns_topic` or `s3_bucket` must be set
3
4
  DbBlaster.configure do |config|
4
- # SNS topic to receive database changes
5
+ # SNS topic to receive database changes. Either `sns_topic` or `s3_bucket` must be set
5
6
  config.sns_topic = 'the-topic'
6
7
  config.aws_access_key = 'access-key'
7
8
  config.aws_access_secret = 'secret'
8
9
  config.aws_region = 'region'
9
10
 
11
+ # Optional
12
+ # Applicable only when `sns_topic` is set
13
+ # Extra [SNS message_attributes](https://docs.aws.amazon.com/sns/latest/dg/sns-message-attributes.html)
14
+ # Attributes set here will be included in every published message
15
+ # config.extra_sns_message_attributes = {'infra_id' => {data_type: 'String', value: '061'}}
16
+
17
+ # S3 bucket where JSON will be pushed. Either `sns_topic` or `s3_bucket` must be set
18
+ # config.s3_bucket = 'bucket-name'
19
+
20
+ # Optional
21
+ # Applicable only when `s3_bucket` is set
22
+ # The S3 key path. The following values will get substituted:
23
+ # <batch_date_time> - a timestamp signifying the beginning of the batch processing
24
+ # <batch_date> - a date signifying the beginning of the batch processing
25
+ # <batch_time> - a time signifying the beginning of the batch processing
26
+ # <timestamp> - the current time
27
+ # <table_name> - the name of the table associated with the S3 body
28
+ # <uuid> - a universal identifier
29
+ # config.s3_key = '<batch_timestamp>/kcp-api/001/<table_name>/<uuid>.json'
30
+
31
+ # Optional
32
+ # Applicable only when `s3_bucket' is set
33
+ # Extra meta values sent along with each payload
34
+ # example: config.s3_meta = {'infra_id' => '061'}
35
+ # The resulting JSON will include the `meta` merged into every record.
36
+ # config.s3_meta = {'infra_id' => '061'}
37
+
10
38
  # Optional
11
39
  # db_blaster will select and then publish `batch_size` rows at a time
12
40
  # config.batch_size = 100
@@ -14,12 +42,7 @@ DbBlaster.configure do |config|
14
42
  # Optional
15
43
  # db_blaster will publish no messages larger than this value
16
44
  # Default value is 256
17
- # attr_accessor :max_message_size_in_kilobytes
18
-
19
- # Optional
20
- # Extra [SNS message_attributes](https://docs.aws.amazon.com/sns/latest/dg/sns-message-attributes.html)
21
- # Attributes set here will be included in every published message
22
- # config.extra_sns_message_attributes = {'infra_id' => {data_type: 'String', value: '061'}}
45
+ # config.max_message_size_in_kilobytes = 256
23
46
 
24
47
  # Global list of column names not to include in published SNS messages
25
48
  # example: config.ignored_column_names = ['email', 'phone_number']
@@ -29,6 +52,10 @@ DbBlaster.configure do |config|
29
52
  # If set, only publish tables specified.
30
53
  # config.only_source_tables = ['posts', 'tags', 'comments']
31
54
 
55
+ # Optional
56
+ # If set, ignore source tables specified.
57
+ # config.ignore_source_tables = ['active_storage_blobs']
58
+
32
59
  # Optional
33
60
  # Customize batch_size and/or ignored_columns
34
61
  # example:
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: db_blaster
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Perry Hertler
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-11 00:00:00.000000000 Z
11
+ date: 2021-09-02 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: aws-sdk-s3
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: aws-sdk-sns
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -119,6 +133,7 @@ files:
119
133
  - db/migrate/20210727222252_create_source_tables.rb
120
134
  - lib/db_blaster.rb
121
135
  - lib/db_blaster/available_tables.rb
136
+ - lib/db_blaster/base_publisher.rb
122
137
  - lib/db_blaster/chunker.rb
123
138
  - lib/db_blaster/configuration.rb
124
139
  - lib/db_blaster/engine.rb
@@ -126,8 +141,10 @@ files:
126
141
  - lib/db_blaster/finder_sql.rb
127
142
  - lib/db_blaster/one_record_too_large_error.rb
128
143
  - lib/db_blaster/publish_source_table.rb
129
- - lib/db_blaster/publisher.rb
130
144
  - lib/db_blaster/rspec.rb
145
+ - lib/db_blaster/s3_key_builder.rb
146
+ - lib/db_blaster/s3_publisher.rb
147
+ - lib/db_blaster/sns_publisher.rb
131
148
  - lib/db_blaster/source_table_configuration.rb
132
149
  - lib/db_blaster/source_table_configuration_builder.rb
133
150
  - lib/db_blaster/version.rb