db_blaster 0.1.2 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d35e663180807e21555f50de79be57ebaf73a7a2effbc86fda8cb2b59ea538bb
4
- data.tar.gz: fcd1df984229d196a858e7e81fc94503eabb476bb3569afc0ff66871f6ab0e6e
3
+ metadata.gz: 31207f1a8613b779dffcf5a8c17c5220508ae6da66088611a4f66ec457e9c210
4
+ data.tar.gz: 2ff279b9cee2ce471fd99bb3f2e8f786eed3f94d42a4a6488e2248b476d71080
5
5
  SHA512:
6
- metadata.gz: 270045284ee3ff5032bbb5b34de7bd3ad6add7252b6c7af05dfa6e1525f6b156c2d11138a62cbd1a3630606bb52e6df25b3e050ce24262ab99d4679b2ba84dd3
7
- data.tar.gz: af1f643cae181fdbfba7f6a5ec11acef540f33d5bda76ce9753e6e0065dd40ca8640af80fcc9bd530ada2645ef8b4131f4afc5cb00f5c0f439836ab700425de5
6
+ metadata.gz: 9f83e6757ccdc32c57a0e64c004e9b07fb543b54fafcf3c3838969bb84853601d959f178977a14b2bf5244ba7b194a2a427ed8ba74b96af7aef557a786875f01
7
+ data.tar.gz: 01f717e65dc1def37cddf11af9ba0df201d213713a8a0d83aa9490d70e796bffabf17edf004e946cd87f35ac8afde64a09b6c9aa37b31488e4f1422329d82838
data/README.md CHANGED
@@ -2,15 +2,15 @@
2
2
  # DbBlaster
3
3
  ![Image of DB to SNS](https://lucid.app/publicSegments/view/c70feed3-2f48-46ee-8734-423474488feb/image.png)
4
4
 
5
- DbBlaster publishes changed database rows to AWS SNS. The first time `DbBlaster::PublishAllJob.perform_later` is ran,
6
- the entire database will be incrementally published to SNS. Subsequent runs will publish rows whose `updated_at` column
5
+ DbBlaster can either publish changed database rows to AWS SNS or push the changes to S#. The first time `DbBlaster::PublishAllJob.perform_later` is ran,
6
+ the entire database will be incrementally published. Subsequent runs will publish rows whose `updated_at` column
7
7
  is more recent than the last run.
8
8
 
9
9
  Consuming the published messages is functionality not provided by DbBlaster.
10
10
 
11
11
  ## Usage
12
12
 
13
- Update `config/initializers/db_blaster_config.rb` with valid AWS credentials, topics, and options.
13
+ Update `config/initializers/db_blaster_config.rb` with valid AWS credentials and options. Either `sns_topic` or `s3_bucket` must be set!
14
14
 
15
15
  Schedule `DbBlaster::PublishAllJob.perform_later` to run periodically with something
16
16
  like [sidekiq-cron](https://github.com/ondrejbartas/sidekiq-cron) or [whenever](https://github.com/javan/whenever)
@@ -13,8 +13,12 @@ module DbBlaster
13
13
  .build_all(DbBlaster.configuration))
14
14
 
15
15
  DbBlaster::SourceTable.pluck(:id).each do |source_table_id|
16
- PublishSourceTableJob.perform_later(source_table_id)
16
+ PublishSourceTableJob.perform_later(source_table_id, batch_start_time)
17
17
  end
18
18
  end
19
+
20
+ def batch_start_time
21
+ @batch_start_time ||= DateTime.now.utc.strftime(DbBlaster::Configuration::DEFAULT_DATETIME_FORMAT)
22
+ end
19
23
  end
20
24
  end
@@ -6,11 +6,11 @@ module DbBlaster
6
6
  class PublishSourceTableJob < ApplicationJob
7
7
  queue_as 'default'
8
8
 
9
- def perform(source_table_id)
9
+ def perform(source_table_id, batch_start_time)
10
10
  source_table = SourceTable.find_by(id: source_table_id)
11
11
  return unless source_table
12
12
 
13
- PublishSourceTable.execute(source_table)
13
+ PublishSourceTable.execute(source_table: source_table, batch_start_time: batch_start_time)
14
14
  end
15
15
  end
16
16
  end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ # publish records to SNS topic
4
+ module DbBlaster
5
+ # Base class for publishing
6
+ class BasePublisher
7
+ attr_reader :source_table, :records, :batch_start_time
8
+
9
+ def initialize(source_table, records, batch_start_time)
10
+ @source_table = source_table
11
+ @records = records
12
+ @batch_start_time = batch_start_time
13
+ end
14
+
15
+ def self.publish(source_table:, records:, batch_start_time:)
16
+ publisher_class =
17
+ if DbBlaster.configuration.sns_topic
18
+ SnsPublisher
19
+ else
20
+ S3Publisher
21
+ end
22
+ publisher_class.new(source_table, records, batch_start_time).publish
23
+ end
24
+
25
+ def publish
26
+ raise NotImplementedError
27
+ end
28
+ end
29
+ end
@@ -2,17 +2,65 @@
2
2
 
3
3
  module DbBlaster
4
4
  # Configuration class for providing credentials, topics, and customizations.
5
+ # Either the `sns_topic` or `s3_bucket' must be set
5
6
  class Configuration
6
7
  DEFAULT_BATCH_SIZE = 100
7
8
  DEFAULT_MAX_MESSAGE_SIZE_IN_KILOBYTES = 256 # max size allowed by AWS SNS
9
+ DEFAULT_S3_KEY = '<batch_date>/<batch_time>/db_blaster/<table_name>/<uuid>.json'
10
+ DEFAULT_DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%LZ'
11
+ ATTRIBUTE_S3_META_FORMAT = 'attribute' # { meta: {your: :value}, records: [] }
12
+ INLINE_S3_META_FORMAT = 'inline' # records.collect{|record| record.merge(meta) }
8
13
 
9
14
  # The required configuration fields
10
- REQUIRED_FIELDS = %i[aws_access_key aws_access_secret aws_region sns_topic].freeze
15
+ REQUIRED_FIELDS = %i[aws_access_key aws_access_secret aws_region].freeze
16
+
17
+ # exactly one of these fields needs to be set
18
+ EITHER_OR_FIELDS = %i[sns_topic s3_bucket].freeze
11
19
 
12
20
  # The topic to which messages will be published
13
21
  attr_accessor :sns_topic
22
+ # The s3 bucket name
23
+ attr_accessor :s3_bucket
14
24
  attr_accessor :aws_access_key, :aws_access_secret, :aws_region
15
25
 
26
+ # Optional
27
+ # Applicable only when `sns_topic` is set
28
+ # Extra [SNS message_attributes](https://docs.aws.amazon.com/sns/latest/dg/sns-message-attributes.html)
29
+ # Attributes set here will be included in every published message
30
+ # example: config.extra_sns_message_attributes = {'infra_id' => {data_type: 'String', value: '061'}}
31
+ attr_accessor :extra_sns_message_attributes
32
+
33
+ # Optional
34
+ # Applicable only when `s3_bucket' is set
35
+ # The value set here will be included in every payload pushed to S3
36
+ # example: config.s3_meta = {'infra_id' => '061', 'source_app' => 'kcp-api'}}
37
+ attr_accessor :s3_meta
38
+
39
+ # Optional
40
+ # Options: ['attribute', 'inline']
41
+ # Defaults to 'attribute'
42
+ # 'attribute' payload: { meta: `s3_meta`, records: [source_table_records] }
43
+ # 'inline' payload: records.collect{|record| record.merge(meta) }
44
+ attr_accessor :s3_meta_format
45
+
46
+ # Optional
47
+ # Applicable only when `s3_bucket` is set
48
+ # The S3 key. The following values will get substituted:
49
+ # <batch_date_time> - date time when batch started
50
+ # <batch_date> - date when batch started
51
+ # <batch_time - time when batch started
52
+ # <date_time> - the datetime just before pushing to S3
53
+ # <table_name> - the name of the table associated with the S3 body
54
+ # <uuid> - a universal identifier
55
+ # '<batch_timestamp>/kcp-api/001/<table_name>/<uuid>.json'
56
+ attr_accessor :s3_key
57
+
58
+ # Optional
59
+ # Applicable only when `s3_bucket` is set
60
+ # S3 Tags
61
+ # example: config.s3_tags = { infra_id: '001', source_app: 'kcp-api', source_table: 'meetings' }
62
+ attr_accessor :s3_tags
63
+
16
64
  # Global list of column names not to include in published SNS messages
17
65
  # example: config.ignored_column_names = ['email', 'phone_number']
18
66
  attr_accessor :ignored_column_names
@@ -34,12 +82,6 @@ module DbBlaster
34
82
  # { source_table_name: 'comments', ignored_column_names: ['tags'] }]
35
83
  attr_accessor :source_table_options
36
84
 
37
- # Optional
38
- # Extra [SNS message_attributes](https://docs.aws.amazon.com/sns/latest/dg/sns-message-attributes.html)
39
- # Attributes set here will be included in every published message
40
- # example: config.extra_sns_message_attributes = {'infra_id' => {data_type: 'String', value: '061'}}
41
- attr_accessor :extra_sns_message_attributes
42
-
43
85
  # Optional
44
86
  # db_blaster will select and then publish `batch_size` rows at a time
45
87
  # Default value is 100
@@ -52,12 +94,22 @@ module DbBlaster
52
94
 
53
95
  # Raises error if a required field is not set
54
96
  def verify!
55
- no_values = REQUIRED_FIELDS.select do |attribute|
97
+ verify_required
98
+ verify_either_or
99
+ end
100
+
101
+ def verify_either_or
102
+ either_or = EITHER_OR_FIELDS.select do |attribute|
56
103
  send(attribute).nil? || send(attribute).strip.empty?
57
104
  end
58
- return if no_values.empty?
105
+ raise "only one of [#{either_or.join(', ')}] should be set" unless either_or.length == 1
106
+ end
59
107
 
60
- raise "missing configuration values for [#{no_values.join(', ')}]"
108
+ def verify_required
109
+ no_values = REQUIRED_FIELDS.select do |attribute|
110
+ send(attribute).nil? || send(attribute).strip.empty?
111
+ end
112
+ raise "missing configuration values for [#{no_values.join(', ')}]" unless no_values.empty?
61
113
  end
62
114
  end
63
115
  end
@@ -4,14 +4,15 @@ module DbBlaster
4
4
  # Given a `source_table` providing the table name,
5
5
  # finds rows in `batch_size` chunks that are published to SNS
6
6
  class PublishSourceTable
7
- attr_reader :source_table
7
+ attr_reader :source_table, :batch_start_time
8
8
 
9
- def initialize(source_table)
9
+ def initialize(source_table, batch_start_time)
10
10
  @source_table = source_table
11
+ @batch_start_time = batch_start_time
11
12
  end
12
13
 
13
- def self.execute(source_table)
14
- new(source_table).execute
14
+ def self.execute(source_table:, batch_start_time:)
15
+ new(source_table, batch_start_time).execute
15
16
  end
16
17
 
17
18
  def execute
@@ -20,7 +21,7 @@ module DbBlaster
20
21
  # pessimistically lock row for the duration
21
22
  source_table.with_lock do
22
23
  Finder.find(source_table) do |records|
23
- Publisher.publish(source_table, records)
24
+ BasePublisher.publish(source_table: source_table, records: records, batch_start_time: batch_start_time)
24
25
  source_table.update(last_published_updated_at: records.last['updated_at'])
25
26
  end
26
27
  end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DbBlaster
4
+ # Builds the key to be used for the uploaded S3 Object
5
+ class S3KeyBuilder
6
+ attr_reader :source_table_name, :batch_start_time
7
+
8
+ def initialize(source_table_name, batch_start_time)
9
+ @source_table_name = source_table_name
10
+ @batch_start_time = batch_start_time
11
+ end
12
+
13
+ def self.build(source_table_name:, batch_start_time:)
14
+ new(source_table_name, batch_start_time).build
15
+ end
16
+
17
+ def build
18
+ key = starting_key
19
+ substitutions.each do |replace, value|
20
+ key = key.gsub(replace, value)
21
+ end
22
+ key
23
+ end
24
+
25
+ def substitutions
26
+ date_time = DateTime.now.utc.strftime(DbBlaster::Configuration::DEFAULT_DATETIME_FORMAT)
27
+ date, time = batch_start_time.split('T')
28
+ { '<batch_date_time>' => batch_start_time,
29
+ '<batch_date>' => date,
30
+ '<batch_time>' => time,
31
+ '<date_time>' => date_time,
32
+ '<uuid>' => SecureRandom.uuid,
33
+ '<table_name>' => source_table_name }
34
+ end
35
+
36
+ def starting_key
37
+ DbBlaster.configuration.s3_key.presence || Configuration::DEFAULT_S3_KEY
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'aws-sdk-s3'
4
+
5
+ module DbBlaster
6
+ # Pushes records to S3
7
+ class S3Publisher < BasePublisher
8
+ def publish
9
+ client.put_object(bucket: DbBlaster.configuration.s3_bucket,
10
+ key: S3KeyBuilder.build(source_table_name: source_table.name,
11
+ batch_start_time: batch_start_time),
12
+ tagging: tagging,
13
+ body: content.to_json)
14
+ end
15
+
16
+ def content
17
+ return meta_records if DbBlaster.configuration.s3_meta_format == Configuration::INLINE_S3_META_FORMAT
18
+
19
+ { meta: meta,
20
+ records: records }
21
+ end
22
+
23
+ def tagging
24
+ URI.encode_www_form(tags_hash)
25
+ end
26
+
27
+ def tags_hash
28
+ @tags_hash ||= { source_table: source_table.name }
29
+ .merge(DbBlaster.configuration.s3_tags.presence || {})
30
+ end
31
+
32
+ def meta
33
+ @meta ||= (DbBlaster.configuration.s3_meta.presence || {}).merge(source_table: source_table.name)
34
+ end
35
+
36
+ def meta_records
37
+ records.collect { |record| record.merge(meta) }
38
+ end
39
+
40
+ def client
41
+ @client ||= Aws::S3::Client.new(region: DbBlaster.configuration.aws_region,
42
+ credentials: Aws::Credentials.new(DbBlaster.configuration.aws_access_key,
43
+ DbBlaster.configuration.aws_access_secret))
44
+ end
45
+ end
46
+ end
@@ -1,20 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # publish records to SNS topic
4
- module DbBlaster
5
- # Publishes records to AWS SNS
6
- class Publisher
7
- attr_reader :source_table, :records
8
-
9
- def initialize(source_table, records)
10
- @source_table = source_table
11
- @records = records
12
- end
3
+ require 'aws-sdk-sns'
13
4
 
14
- def self.publish(source_table, records)
15
- new(source_table, records).publish
16
- end
5
+ # frozen_string_literal: true
17
6
 
7
+ module DbBlaster
8
+ # Publishes records to AWS SNS
9
+ class SnsPublisher < BasePublisher
18
10
  def publish
19
11
  topic.publish(message_attributes: message_attributes,
20
12
  message: records.to_json)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DbBlaster
4
- VERSION = '0.1.2'
4
+ VERSION = '0.1.6'
5
5
  end
data/lib/db_blaster.rb CHANGED
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'aws-sdk-sns'
4
3
  require 'db_blaster/version'
5
4
  require 'db_blaster/engine'
6
5
  require 'db_blaster/one_record_too_large_error'
@@ -8,7 +7,10 @@ require 'db_blaster/available_tables'
8
7
  require 'db_blaster/configuration'
9
8
  require 'db_blaster/source_table_configuration'
10
9
  require 'db_blaster/source_table_configuration_builder'
11
- require 'db_blaster/publisher'
10
+ require 'db_blaster/base_publisher'
11
+ require 'db_blaster/s3_key_builder'
12
+ require 'db_blaster/s3_publisher'
13
+ require 'db_blaster/sns_publisher'
12
14
  require 'db_blaster/publish_source_table'
13
15
  require 'db_blaster/chunker'
14
16
  require 'db_blaster/finder_sql'
@@ -1,12 +1,39 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # Either `sns_topic` or `s3_bucket` must be set
3
4
  DbBlaster.configure do |config|
4
- # SNS topic to receive database changes
5
+ # SNS topic to receive database changes. Either `sns_topic` or `s3_bucket` must be set
5
6
  config.sns_topic = 'the-topic'
6
7
  config.aws_access_key = 'access-key'
7
8
  config.aws_access_secret = 'secret'
8
9
  config.aws_region = 'region'
9
10
 
11
+ # Optional
12
+ # Applicable only when `sns_topic` is set
13
+ # Extra [SNS message_attributes](https://docs.aws.amazon.com/sns/latest/dg/sns-message-attributes.html)
14
+ # Attributes set here will be included in every published message
15
+ # config.extra_sns_message_attributes = {'infra_id' => {data_type: 'String', value: '061'}}
16
+
17
+ # S3 bucket where JSON will be pushed. Either `sns_topic` or `s3_bucket` must be set
18
+ # config.s3_bucket = 'bucket-name'
19
+
20
+ # Optional
21
+ # Applicable only when `s3_bucket` is set
22
+ # The S3 key path. The following values will get substituted:
23
+ # <batch_date_time> - a timestamp signifying the beginning of the batch processing
24
+ # <batch_date> - a date signifying the beginning of the batch processing
25
+ # <date_time> - the datetime just before pushing to S3
26
+ # <table_name> - the name of the table associated with the S3 body
27
+ # <uuid> - a universal identifier
28
+ # config.s3_key = '<batch_timestamp>/kcp-api/001/<table_name>/<uuid>.json'
29
+
30
+ # Optional
31
+ # Applicable only when `s3_bucket' is set
32
+ # Extra meta values sent along with each payload
33
+ # example: config.s3_meta = {'infra_id' => '061'}
34
+ # The resulting JSON will include the `meta` merged into every record.
35
+ # config.s3_meta = {'infra_id' => '061'}
36
+
10
37
  # Optional
11
38
  # db_blaster will select and then publish `batch_size` rows at a time
12
39
  # config.batch_size = 100
@@ -16,11 +43,6 @@ DbBlaster.configure do |config|
16
43
  # Default value is 256
17
44
  # config.max_message_size_in_kilobytes = 256
18
45
 
19
- # Optional
20
- # Extra [SNS message_attributes](https://docs.aws.amazon.com/sns/latest/dg/sns-message-attributes.html)
21
- # Attributes set here will be included in every published message
22
- # config.extra_sns_message_attributes = {'infra_id' => {data_type: 'String', value: '061'}}
23
-
24
46
  # Global list of column names not to include in published SNS messages
25
47
  # example: config.ignored_column_names = ['email', 'phone_number']
26
48
  # config.ignored_column_names = ['email', 'phone_number']
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: db_blaster
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Perry Hertler
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-08-12 00:00:00.000000000 Z
11
+ date: 2021-09-03 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: aws-sdk-s3
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: aws-sdk-sns
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -119,6 +133,7 @@ files:
119
133
  - db/migrate/20210727222252_create_source_tables.rb
120
134
  - lib/db_blaster.rb
121
135
  - lib/db_blaster/available_tables.rb
136
+ - lib/db_blaster/base_publisher.rb
122
137
  - lib/db_blaster/chunker.rb
123
138
  - lib/db_blaster/configuration.rb
124
139
  - lib/db_blaster/engine.rb
@@ -126,8 +141,10 @@ files:
126
141
  - lib/db_blaster/finder_sql.rb
127
142
  - lib/db_blaster/one_record_too_large_error.rb
128
143
  - lib/db_blaster/publish_source_table.rb
129
- - lib/db_blaster/publisher.rb
130
144
  - lib/db_blaster/rspec.rb
145
+ - lib/db_blaster/s3_key_builder.rb
146
+ - lib/db_blaster/s3_publisher.rb
147
+ - lib/db_blaster/sns_publisher.rb
131
148
  - lib/db_blaster/source_table_configuration.rb
132
149
  - lib/db_blaster/source_table_configuration_builder.rb
133
150
  - lib/db_blaster/version.rb