db_blaster 0.1.3 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9e7b7ce6209a45251fe19c47d6248e932e042bca2ef60e4507b7365dd517555b
4
- data.tar.gz: 774d0e7784134bfc8b14759cb7eb832851245d24a1afbcb7f1bd300be49f6537
3
+ metadata.gz: 21570da32145568b451fa60f6e48f2dbf889f30dcdb6baed94b51043d4e1d46e
4
+ data.tar.gz: 198f2fa9de6915d44e7c7717ed905a8357b65a3a2b10ba841b46b96e494016c1
5
5
  SHA512:
6
- metadata.gz: 2ab4c82a9debbaeb34f40353d5e0fae79077920bd4509469ea3a4bf51788f504a813c20b57aced15c68025ae34de6dd3fb97e112d2240613adb33fddcd64f429
7
- data.tar.gz: 600d83c0a603c961b68a5751dbfd5685c4aefd916484905abb00e4997977fed4f63c7f79adb18291a6b64d9dab3c89d48a89aa3b19b47218b5b5ab20fec3c8b0
6
+ metadata.gz: bfe0f16131ce6e7359129da73e24d51e8499fdbeb40ecc958d422cdb968b2ffcd944e768352d8495350fe9109f706d003158304c4b3ed058c0d71775bc15d9ef
7
+ data.tar.gz: 86508a7a04b99cf10d92ceb08bcb4259fb985b727229eda85d9cce191ea5724a93819fc45afe60f24b29dddfad8bf0336a92d1125eb925947c6fb09f6063dd41
@@ -18,7 +18,7 @@ module DbBlaster
18
18
  end
19
19
 
20
20
  def batch_start_time
21
- @batch_start_time ||= DateTime.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
21
+ @batch_start_time ||= DateTime.now.utc.strftime(DbBlaster::Configuration::DEFAULT_DATETIME_FORMAT)
22
22
  end
23
23
  end
24
24
  end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Adding last_published_id to avoid dupes in selecting sourcetables
4
+ class AddLastPublishedId < ActiveRecord::Migration[6.1]
5
+ def change
6
+ add_column :db_blaster_source_tables, :last_published_id, :string, default: '0'
7
+ end
8
+ end
@@ -6,7 +6,10 @@ module DbBlaster
6
6
  class Configuration
7
7
  DEFAULT_BATCH_SIZE = 100
8
8
  DEFAULT_MAX_MESSAGE_SIZE_IN_KILOBYTES = 256 # max size allowed by AWS SNS
9
- DEFAULT_S3_KEY = '<batch_timestamp>/db_blaster/<table_name>/<uuid>.json'
9
+ DEFAULT_S3_KEY = '<batch_date>/<batch_time>/db_blaster/<table_name>/<uuid>.json'
10
+ DEFAULT_DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%LZ'
11
+ ATTRIBUTE_S3_META_FORMAT = 'attribute' # { meta: {your: :value}, records: [] }
12
+ INLINE_S3_META_FORMAT = 'inline' # records.collect{|record| record.merge(meta) }
10
13
 
11
14
  # The required configuration fields
12
15
  REQUIRED_FIELDS = %i[aws_access_key aws_access_secret aws_region].freeze
@@ -30,21 +33,34 @@ module DbBlaster
30
33
  # Optional
31
34
  # Applicable only when `s3_bucket' is set
32
35
  # The value set here will be included in every payload pushed to S3
33
- # example: config.s3_meta = {'infra_id' => '061', 'src_app' => 'kcp-api'}}
34
- # The resulting JSON:
35
- # {"meta" : {"infra_id" : "061", "src_app" : "kcp-api", "src_table" : "the-table"}, "records" : [] }
36
+ # example: config.s3_meta = {'infra_id' => '061', 'source_app' => 'kcp-api'}}
36
37
  attr_accessor :s3_meta
37
38
 
39
+ # Optional
40
+ # Options: ['attribute', 'inline']
41
+ # Defaults to 'attribute'
42
+ # 'attribute' payload: { meta: `s3_meta`, records: [source_table_records] }
43
+ # 'inline' payload: records.collect{|record| record.merge(meta) }
44
+ attr_accessor :s3_meta_format
45
+
38
46
  # Optional
39
47
  # Applicable only when `s3_bucket` is set
40
48
  # The S3 key. The following values will get substituted:
41
- # <batch_timestamp> - a timestamp signifying the beginning of the batch processing
42
- # <timestamp> - the current time
49
+ # <batch_date_time> - date time when batch started
50
+ # <batch_date> - date when batch started
51
+ # <batch_time - time when batch started
52
+ # <date_time> - the datetime just before pushing to S3
43
53
  # <table_name> - the name of the table associated with the S3 body
44
54
  # <uuid> - a universal identifier
45
55
  # '<batch_timestamp>/kcp-api/001/<table_name>/<uuid>.json'
46
56
  attr_accessor :s3_key
47
57
 
58
+ # Optional
59
+ # Applicable only when `s3_bucket` is set
60
+ # S3 Tags
61
+ # example: config.s3_tags = { infra_id: '001', source_app: 'kcp-api', source_table: 'meetings' }
62
+ attr_accessor :s3_tags
63
+
48
64
  # Global list of column names not to include in published SNS messages
49
65
  # example: config.ignored_column_names = ['email', 'phone_number']
50
66
  attr_accessor :ignored_column_names
@@ -17,16 +17,26 @@ module DbBlaster
17
17
  "SELECT * FROM #{source_table.name} #{where} ORDER BY updated_at ASC LIMIT #{source_table.batch_size}"
18
18
  end
19
19
 
20
+ # if we just use updated_at > from_updated_at, it's possible to miss records
21
+ # that share the same `updated_at`
22
+ # if we use updated_at >= from_updated_at, we'll get redundant records on every run
23
+ # settled on the approach below
20
24
  def where
21
25
  return '' unless from_updated_at
22
26
 
23
27
  ActiveRecord::Base.sanitize_sql_for_conditions(
24
- ['WHERE updated_at >= :updated_at', { updated_at: from_updated_at.to_s(:db) }]
28
+ ['WHERE updated_at > :updated_at OR (updated_at = :updated_at AND id <> :updated_id)',
29
+ { updated_at: from_updated_at.to_s(:db),
30
+ updated_id: last_published_id }]
25
31
  )
26
32
  end
27
33
 
28
34
  def from_updated_at
29
35
  @from_updated_at ||= source_table.last_published_updated_at
30
36
  end
37
+
38
+ def last_published_id
39
+ @last_published_id ||= source_table.last_published_id
40
+ end
31
41
  end
32
42
  end
@@ -22,7 +22,8 @@ module DbBlaster
22
22
  source_table.with_lock do
23
23
  Finder.find(source_table) do |records|
24
24
  BasePublisher.publish(source_table: source_table, records: records, batch_start_time: batch_start_time)
25
- source_table.update(last_published_updated_at: records.last['updated_at'])
25
+ source_table.update(last_published_updated_at: records.last['updated_at'],
26
+ last_published_id: records.last['id'])
26
27
  end
27
28
  end
28
29
  self
@@ -23,7 +23,12 @@ module DbBlaster
23
23
  end
24
24
 
25
25
  def substitutions
26
- { '<batch_timestamp>' => batch_start_time,
26
+ date_time = DateTime.now.utc.strftime(DbBlaster::Configuration::DEFAULT_DATETIME_FORMAT)
27
+ date, time = batch_start_time.split('T')
28
+ { '<batch_date_time>' => batch_start_time,
29
+ '<batch_date>' => date,
30
+ '<batch_time>' => time,
31
+ '<date_time>' => date_time,
27
32
  '<uuid>' => SecureRandom.uuid,
28
33
  '<table_name>' => source_table_name }
29
34
  end
@@ -9,16 +9,32 @@ module DbBlaster
9
9
  client.put_object(bucket: DbBlaster.configuration.s3_bucket,
10
10
  key: S3KeyBuilder.build(source_table_name: source_table.name,
11
11
  batch_start_time: batch_start_time),
12
+ tagging: tagging,
12
13
  body: content.to_json)
13
14
  end
14
15
 
15
16
  def content
17
+ return meta_records if DbBlaster.configuration.s3_meta_format == Configuration::INLINE_S3_META_FORMAT
18
+
16
19
  { meta: meta,
17
20
  records: records }
18
21
  end
19
22
 
23
+ def tagging
24
+ URI.encode_www_form(tags_hash)
25
+ end
26
+
27
+ def tags_hash
28
+ @tags_hash ||= { source_table: source_table.name }
29
+ .merge(DbBlaster.configuration.s3_tags.presence || {})
30
+ end
31
+
20
32
  def meta
21
- (DbBlaster.configuration.s3_meta || {}).merge(source_table: source_table.name)
33
+ @meta ||= (DbBlaster.configuration.s3_meta.presence || {}).merge(source_table: source_table.name)
34
+ end
35
+
36
+ def meta_records
37
+ records.collect { |record| record.merge(meta) }
22
38
  end
23
39
 
24
40
  def client
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DbBlaster
4
+ # Builds an array of tables and their columns
5
+ class SourceTablesSchemaBuilder
6
+ def self.build_schema
7
+ new.build_schema
8
+ end
9
+
10
+ def build_schema
11
+ ActiveRecord::Base.connection.tables.each_with_object({}) do |table_name, hash|
12
+ unless AvailableTables::SYSTEM_TABLES.include?(table_name)
13
+ hash[table_name] = build_columns_from_table_name(table_name)
14
+ end
15
+ end
16
+ end
17
+
18
+ def build_columns_from_table_name(table_name)
19
+ ActiveRecord::Base.connection.columns(table_name).collect do |column|
20
+ next if ignored_column?(column.name)
21
+
22
+ { name: column.name,
23
+ type: column.type,
24
+ limit: column.limit }
25
+ end.compact
26
+ end
27
+
28
+ def ignored_column?(column)
29
+ (DbBlaster.configuration.ignored_column_names || []).include?(column)
30
+ end
31
+ end
32
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DbBlaster
4
- VERSION = '0.1.3'
4
+ VERSION = '0.1.7'
5
5
  end
data/lib/db_blaster.rb CHANGED
@@ -15,6 +15,7 @@ require 'db_blaster/publish_source_table'
15
15
  require 'db_blaster/chunker'
16
16
  require 'db_blaster/finder_sql'
17
17
  require 'db_blaster/finder'
18
+ require 'db_blaster/source_tables_schema_builder'
18
19
 
19
20
  # Top-level module that serves as an entry point
20
21
  # into the engine gem
@@ -20,8 +20,9 @@ DbBlaster.configure do |config|
20
20
  # Optional
21
21
  # Applicable only when `s3_bucket` is set
22
22
  # The S3 key path. The following values will get substituted:
23
- # <batch_timestamp> - a timestamp signifying the beginning of the batch processing
24
- # <timestamp> - the current time
23
+ # <batch_date_time> - a timestamp signifying the beginning of the batch processing
24
+ # <batch_date> - a date signifying the beginning of the batch processing
25
+ # <date_time> - the datetime just before pushing to S3
25
26
  # <table_name> - the name of the table associated with the S3 body
26
27
  # <uuid> - a universal identifier
27
28
  # config.s3_key = '<batch_timestamp>/kcp-api/001/<table_name>/<uuid>.json'
@@ -30,8 +31,7 @@ DbBlaster.configure do |config|
30
31
  # Applicable only when `s3_bucket' is set
31
32
  # Extra meta values sent along with each payload
32
33
  # example: config.s3_meta = {'infra_id' => '061'}
33
- # The resulting JSON:
34
- # {"meta" : {"infra_id" : "061", "src_app" : "kcp-api", "src_table" : "the-table"}, "records" : [] }
34
+ # The resulting JSON will include the `meta` merged into every record.
35
35
  # config.s3_meta = {'infra_id' => '061'}
36
36
 
37
37
  # Optional
@@ -1,5 +1,14 @@
1
1
  # frozen_string_literal: true
2
- # desc "Explaining what the task does"
3
- # task :db_blaster do
4
- # # Task goes here
5
- # end
2
+
3
+ require 'db_blaster'
4
+
5
+ namespace :db_blaster do
6
+ desc 'generate table schema'
7
+ task generate_table_schema: :environment do
8
+ schema_name = 'kcp-api-schema.json'
9
+ puts "Generating #{schema_name}......."
10
+ built = DbBlaster::SourceTablesSchemaBuilder.build_schema
11
+ File.open(schema_name, 'w') { |f| f << built.to_json }
12
+ puts 'Success!'
13
+ end
14
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: db_blaster
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Perry Hertler
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-09-01 00:00:00.000000000 Z
11
+ date: 2021-09-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aws-sdk-s3
@@ -131,6 +131,7 @@ files:
131
131
  - config/brakeman.ignore
132
132
  - config/routes.rb
133
133
  - db/migrate/20210727222252_create_source_tables.rb
134
+ - db/migrate/20210908214439_add_last_published_id.rb
134
135
  - lib/db_blaster.rb
135
136
  - lib/db_blaster/available_tables.rb
136
137
  - lib/db_blaster/base_publisher.rb
@@ -147,6 +148,7 @@ files:
147
148
  - lib/db_blaster/sns_publisher.rb
148
149
  - lib/db_blaster/source_table_configuration.rb
149
150
  - lib/db_blaster/source_table_configuration_builder.rb
151
+ - lib/db_blaster/source_tables_schema_builder.rb
150
152
  - lib/db_blaster/version.rb
151
153
  - lib/generators/db_blaster/install/install_generator.rb
152
154
  - lib/generators/db_blaster/install/templates/db_blaster_config.rb