db_blaster 0.1.3 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9e7b7ce6209a45251fe19c47d6248e932e042bca2ef60e4507b7365dd517555b
4
- data.tar.gz: 774d0e7784134bfc8b14759cb7eb832851245d24a1afbcb7f1bd300be49f6537
3
+ metadata.gz: 21570da32145568b451fa60f6e48f2dbf889f30dcdb6baed94b51043d4e1d46e
4
+ data.tar.gz: 198f2fa9de6915d44e7c7717ed905a8357b65a3a2b10ba841b46b96e494016c1
5
5
  SHA512:
6
- metadata.gz: 2ab4c82a9debbaeb34f40353d5e0fae79077920bd4509469ea3a4bf51788f504a813c20b57aced15c68025ae34de6dd3fb97e112d2240613adb33fddcd64f429
7
- data.tar.gz: 600d83c0a603c961b68a5751dbfd5685c4aefd916484905abb00e4997977fed4f63c7f79adb18291a6b64d9dab3c89d48a89aa3b19b47218b5b5ab20fec3c8b0
6
+ metadata.gz: bfe0f16131ce6e7359129da73e24d51e8499fdbeb40ecc958d422cdb968b2ffcd944e768352d8495350fe9109f706d003158304c4b3ed058c0d71775bc15d9ef
7
+ data.tar.gz: 86508a7a04b99cf10d92ceb08bcb4259fb985b727229eda85d9cce191ea5724a93819fc45afe60f24b29dddfad8bf0336a92d1125eb925947c6fb09f6063dd41
@@ -18,7 +18,7 @@ module DbBlaster
18
18
  end
19
19
 
20
20
  def batch_start_time
21
- @batch_start_time ||= DateTime.now.utc.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
21
+ @batch_start_time ||= DateTime.now.utc.strftime(DbBlaster::Configuration::DEFAULT_DATETIME_FORMAT)
22
22
  end
23
23
  end
24
24
  end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Adding last_published_id to avoid dupes in selecting sourcetables
4
+ class AddLastPublishedId < ActiveRecord::Migration[6.1]
5
+ def change
6
+ add_column :db_blaster_source_tables, :last_published_id, :string, default: '0'
7
+ end
8
+ end
@@ -6,7 +6,10 @@ module DbBlaster
6
6
  class Configuration
7
7
  DEFAULT_BATCH_SIZE = 100
8
8
  DEFAULT_MAX_MESSAGE_SIZE_IN_KILOBYTES = 256 # max size allowed by AWS SNS
9
- DEFAULT_S3_KEY = '<batch_timestamp>/db_blaster/<table_name>/<uuid>.json'
9
+ DEFAULT_S3_KEY = '<batch_date>/<batch_time>/db_blaster/<table_name>/<uuid>.json'
10
+ DEFAULT_DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%LZ'
11
+ ATTRIBUTE_S3_META_FORMAT = 'attribute' # { meta: {your: :value}, records: [] }
12
+ INLINE_S3_META_FORMAT = 'inline' # records.collect{|record| record.merge(meta) }
10
13
 
11
14
  # The required configuration fields
12
15
  REQUIRED_FIELDS = %i[aws_access_key aws_access_secret aws_region].freeze
@@ -30,21 +33,34 @@ module DbBlaster
30
33
  # Optional
31
34
  # Applicable only when `s3_bucket' is set
32
35
  # The value set here will be included in every payload pushed to S3
33
- # example: config.s3_meta = {'infra_id' => '061', 'src_app' => 'kcp-api'}}
34
- # The resulting JSON:
35
- # {"meta" : {"infra_id" : "061", "src_app" : "kcp-api", "src_table" : "the-table"}, "records" : [] }
36
+ # example: config.s3_meta = {'infra_id' => '061', 'source_app' => 'kcp-api'}}
36
37
  attr_accessor :s3_meta
37
38
 
39
+ # Optional
40
+ # Options: ['attribute', 'inline']
41
+ # Defaults to 'attribute'
42
+ # 'attribute' payload: { meta: `s3_meta`, records: [source_table_records] }
43
+ # 'inline' payload: records.collect{|record| record.merge(meta) }
44
+ attr_accessor :s3_meta_format
45
+
38
46
  # Optional
39
47
  # Applicable only when `s3_bucket` is set
40
48
  # The S3 key. The following values will get substituted:
41
- # <batch_timestamp> - a timestamp signifying the beginning of the batch processing
42
- # <timestamp> - the current time
49
+ # <batch_date_time> - date time when batch started
50
+ # <batch_date> - date when batch started
51
+ # <batch_time - time when batch started
52
+ # <date_time> - the datetime just before pushing to S3
43
53
  # <table_name> - the name of the table associated with the S3 body
44
54
  # <uuid> - a universal identifier
45
55
  # '<batch_timestamp>/kcp-api/001/<table_name>/<uuid>.json'
46
56
  attr_accessor :s3_key
47
57
 
58
+ # Optional
59
+ # Applicable only when `s3_bucket` is set
60
+ # S3 Tags
61
+ # example: config.s3_tags = { infra_id: '001', source_app: 'kcp-api', source_table: 'meetings' }
62
+ attr_accessor :s3_tags
63
+
48
64
  # Global list of column names not to include in published SNS messages
49
65
  # example: config.ignored_column_names = ['email', 'phone_number']
50
66
  attr_accessor :ignored_column_names
@@ -17,16 +17,26 @@ module DbBlaster
17
17
  "SELECT * FROM #{source_table.name} #{where} ORDER BY updated_at ASC LIMIT #{source_table.batch_size}"
18
18
  end
19
19
 
20
+ # if we just use updated_at > from_updated_at, it's possible to miss records
21
+ # that share the same `updated_at`
22
+ # if we use updated_at >= from_updated_at, we'll get redundant records on every run
23
+ # settled on the approach below
20
24
  def where
21
25
  return '' unless from_updated_at
22
26
 
23
27
  ActiveRecord::Base.sanitize_sql_for_conditions(
24
- ['WHERE updated_at >= :updated_at', { updated_at: from_updated_at.to_s(:db) }]
28
+ ['WHERE updated_at > :updated_at OR (updated_at = :updated_at AND id <> :updated_id)',
29
+ { updated_at: from_updated_at.to_s(:db),
30
+ updated_id: last_published_id }]
25
31
  )
26
32
  end
27
33
 
28
34
  def from_updated_at
29
35
  @from_updated_at ||= source_table.last_published_updated_at
30
36
  end
37
+
38
+ def last_published_id
39
+ @last_published_id ||= source_table.last_published_id
40
+ end
31
41
  end
32
42
  end
@@ -22,7 +22,8 @@ module DbBlaster
22
22
  source_table.with_lock do
23
23
  Finder.find(source_table) do |records|
24
24
  BasePublisher.publish(source_table: source_table, records: records, batch_start_time: batch_start_time)
25
- source_table.update(last_published_updated_at: records.last['updated_at'])
25
+ source_table.update(last_published_updated_at: records.last['updated_at'],
26
+ last_published_id: records.last['id'])
26
27
  end
27
28
  end
28
29
  self
@@ -23,7 +23,12 @@ module DbBlaster
23
23
  end
24
24
 
25
25
  def substitutions
26
- { '<batch_timestamp>' => batch_start_time,
26
+ date_time = DateTime.now.utc.strftime(DbBlaster::Configuration::DEFAULT_DATETIME_FORMAT)
27
+ date, time = batch_start_time.split('T')
28
+ { '<batch_date_time>' => batch_start_time,
29
+ '<batch_date>' => date,
30
+ '<batch_time>' => time,
31
+ '<date_time>' => date_time,
27
32
  '<uuid>' => SecureRandom.uuid,
28
33
  '<table_name>' => source_table_name }
29
34
  end
@@ -9,16 +9,32 @@ module DbBlaster
9
9
  client.put_object(bucket: DbBlaster.configuration.s3_bucket,
10
10
  key: S3KeyBuilder.build(source_table_name: source_table.name,
11
11
  batch_start_time: batch_start_time),
12
+ tagging: tagging,
12
13
  body: content.to_json)
13
14
  end
14
15
 
15
16
  def content
17
+ return meta_records if DbBlaster.configuration.s3_meta_format == Configuration::INLINE_S3_META_FORMAT
18
+
16
19
  { meta: meta,
17
20
  records: records }
18
21
  end
19
22
 
23
+ def tagging
24
+ URI.encode_www_form(tags_hash)
25
+ end
26
+
27
+ def tags_hash
28
+ @tags_hash ||= { source_table: source_table.name }
29
+ .merge(DbBlaster.configuration.s3_tags.presence || {})
30
+ end
31
+
20
32
  def meta
21
- (DbBlaster.configuration.s3_meta || {}).merge(source_table: source_table.name)
33
+ @meta ||= (DbBlaster.configuration.s3_meta.presence || {}).merge(source_table: source_table.name)
34
+ end
35
+
36
+ def meta_records
37
+ records.collect { |record| record.merge(meta) }
22
38
  end
23
39
 
24
40
  def client
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DbBlaster
4
+ # Builds an array of tables and their columns
5
+ class SourceTablesSchemaBuilder
6
+ def self.build_schema
7
+ new.build_schema
8
+ end
9
+
10
+ def build_schema
11
+ ActiveRecord::Base.connection.tables.each_with_object({}) do |table_name, hash|
12
+ unless AvailableTables::SYSTEM_TABLES.include?(table_name)
13
+ hash[table_name] = build_columns_from_table_name(table_name)
14
+ end
15
+ end
16
+ end
17
+
18
+ def build_columns_from_table_name(table_name)
19
+ ActiveRecord::Base.connection.columns(table_name).collect do |column|
20
+ next if ignored_column?(column.name)
21
+
22
+ { name: column.name,
23
+ type: column.type,
24
+ limit: column.limit }
25
+ end.compact
26
+ end
27
+
28
+ def ignored_column?(column)
29
+ (DbBlaster.configuration.ignored_column_names || []).include?(column)
30
+ end
31
+ end
32
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DbBlaster
4
- VERSION = '0.1.3'
4
+ VERSION = '0.1.7'
5
5
  end
data/lib/db_blaster.rb CHANGED
@@ -15,6 +15,7 @@ require 'db_blaster/publish_source_table'
15
15
  require 'db_blaster/chunker'
16
16
  require 'db_blaster/finder_sql'
17
17
  require 'db_blaster/finder'
18
+ require 'db_blaster/source_tables_schema_builder'
18
19
 
19
20
  # Top-level module that serves as an entry point
20
21
  # into the engine gem
@@ -20,8 +20,9 @@ DbBlaster.configure do |config|
20
20
  # Optional
21
21
  # Applicable only when `s3_bucket` is set
22
22
  # The S3 key path. The following values will get substituted:
23
- # <batch_timestamp> - a timestamp signifying the beginning of the batch processing
24
- # <timestamp> - the current time
23
+ # <batch_date_time> - a timestamp signifying the beginning of the batch processing
24
+ # <batch_date> - a date signifying the beginning of the batch processing
25
+ # <date_time> - the datetime just before pushing to S3
25
26
  # <table_name> - the name of the table associated with the S3 body
26
27
  # <uuid> - a universal identifier
27
28
  # config.s3_key = '<batch_timestamp>/kcp-api/001/<table_name>/<uuid>.json'
@@ -30,8 +31,7 @@ DbBlaster.configure do |config|
30
31
  # Applicable only when `s3_bucket' is set
31
32
  # Extra meta values sent along with each payload
32
33
  # example: config.s3_meta = {'infra_id' => '061'}
33
- # The resulting JSON:
34
- # {"meta" : {"infra_id" : "061", "src_app" : "kcp-api", "src_table" : "the-table"}, "records" : [] }
34
+ # The resulting JSON will include the `meta` merged into every record.
35
35
  # config.s3_meta = {'infra_id' => '061'}
36
36
 
37
37
  # Optional
@@ -1,5 +1,14 @@
1
1
  # frozen_string_literal: true
2
- # desc "Explaining what the task does"
3
- # task :db_blaster do
4
- # # Task goes here
5
- # end
2
+
3
+ require 'db_blaster'
4
+
5
+ namespace :db_blaster do
6
+ desc 'generate table schema'
7
+ task generate_table_schema: :environment do
8
+ schema_name = 'kcp-api-schema.json'
9
+ puts "Generating #{schema_name}......."
10
+ built = DbBlaster::SourceTablesSchemaBuilder.build_schema
11
+ File.open(schema_name, 'w') { |f| f << built.to_json }
12
+ puts 'Success!'
13
+ end
14
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: db_blaster
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Perry Hertler
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-09-01 00:00:00.000000000 Z
11
+ date: 2021-09-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: aws-sdk-s3
@@ -131,6 +131,7 @@ files:
131
131
  - config/brakeman.ignore
132
132
  - config/routes.rb
133
133
  - db/migrate/20210727222252_create_source_tables.rb
134
+ - db/migrate/20210908214439_add_last_published_id.rb
134
135
  - lib/db_blaster.rb
135
136
  - lib/db_blaster/available_tables.rb
136
137
  - lib/db_blaster/base_publisher.rb
@@ -147,6 +148,7 @@ files:
147
148
  - lib/db_blaster/sns_publisher.rb
148
149
  - lib/db_blaster/source_table_configuration.rb
149
150
  - lib/db_blaster/source_table_configuration_builder.rb
151
+ - lib/db_blaster/source_tables_schema_builder.rb
150
152
  - lib/db_blaster/version.rb
151
153
  - lib/generators/db_blaster/install/install_generator.rb
152
154
  - lib/generators/db_blaster/install/templates/db_blaster_config.rb