bricolage-streamingload 0.15.0 → 0.15.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c74f043409945f4dc970472a1a7735c67a7ba930
4
- data.tar.gz: 9ce73867f02aff49a7391d168c791ce4be4d3727
2
+ SHA256:
3
+ metadata.gz: 3e5d795da31bb8940f14d95be7ae2710ffdf026ab07bed08c619c20b25ce91e6
4
+ data.tar.gz: ce467fbb158a9a9a3a42eaf07a23c9741cf0f8721511a2cf66295d3f59f0c131
5
5
  SHA512:
6
- metadata.gz: 4a6a1a3c6fbb28d1048481072596fc944e0732218bcddfd09dd9555a4d0a43b900f7806627c2f4ec4685b36cbf3a93ded8b173bd7a75917d652bef7d7a6a0739
7
- data.tar.gz: ffa357b2233b11b9820ffeda35480c74a328904d531a1f67b716fc115d584979e4eff2332239c557179687121527d3e38c90afcc02d102b4e9a3f1e3fe12de70
6
+ metadata.gz: 22008a5432ecf609084750804e39b5ac20e71f477c86cdb71066b4c5adea0a370ee15316bfba9972196ad431b0751afa4ea1f18b9c0d1524acd9f1a896994ad6
7
+ data.tar.gz: 84b45b8a683d971965ce2162ce5a2e9b47ffff015242290266dd64409e9c9890c7c52c5ed692a91010cbed076de79d1383d9f1c0d31619924f158dbb789ed76c
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ .bundle
3
+
4
+ /config/development/
5
+ /config/test/
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "https://rubygems.org"
2
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,54 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ bricolage-streamingload (0.15.1)
5
+ aws-sdk-s3 (~> 1.8)
6
+ aws-sdk-sqs (~> 1.3)
7
+ bricolage (>= 5.29.2)
8
+ pg (~> 0.18.0)
9
+
10
+ GEM
11
+ remote: https://rubygems.org/
12
+ specs:
13
+ aws-eventstream (1.0.1)
14
+ aws-partitions (1.100.0)
15
+ aws-sdk-core (3.24.1)
16
+ aws-eventstream (~> 1.0)
17
+ aws-partitions (~> 1.0)
18
+ aws-sigv4 (~> 1.0)
19
+ jmespath (~> 1.0)
20
+ aws-sdk-kms (1.7.0)
21
+ aws-sdk-core (~> 3)
22
+ aws-sigv4 (~> 1.0)
23
+ aws-sdk-s3 (1.17.0)
24
+ aws-sdk-core (~> 3, >= 3.21.2)
25
+ aws-sdk-kms (~> 1)
26
+ aws-sigv4 (~> 1.0)
27
+ aws-sdk-sns (1.3.0)
28
+ aws-sdk-core (~> 3)
29
+ aws-sigv4 (~> 1.0)
30
+ aws-sdk-sqs (1.4.0)
31
+ aws-sdk-core (~> 3)
32
+ aws-sigv4 (~> 1.0)
33
+ aws-sigv4 (1.0.3)
34
+ bricolage (5.29.2)
35
+ aws-sdk-s3 (~> 1)
36
+ aws-sdk-sns (~> 1)
37
+ pg (~> 0.18.0)
38
+ jmespath (1.4.0)
39
+ pg (0.18.4)
40
+ power_assert (1.1.3)
41
+ rake (12.3.1)
42
+ test-unit (3.2.8)
43
+ power_assert
44
+
45
+ PLATFORMS
46
+ ruby
47
+
48
+ DEPENDENCIES
49
+ bricolage-streamingload!
50
+ rake
51
+ test-unit
52
+
53
+ BUNDLED WITH
54
+ 1.16.1
data/LICENSES ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2016 Minero Aoki, Shimpei Kodama
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/RELEASE.md ADDED
@@ -0,0 +1,116 @@
1
+ # Bricolage Streaming Load Release Note
2
+
3
+ ## version 0.15.1
4
+
5
+ - [new] Use the schema name in the parameters table by default (no more "no such variable: XXXX_schema" error).
6
+
7
+ ## version 0.15.0
8
+
9
+ - [CHANGE] Upgrade AWS-SDK to v3. This version requires at least Bricolage 5.25.
10
+
11
+ ## version 0.14.2
12
+
13
+ - Reduces loading retry count
14
+
15
+ ## version 0.14.1
16
+
17
+ - [fix] task logging did not work
18
+
19
+ ## version 0.14.0
20
+
21
+ - [new] Logs (task - object) relashonships to S3 on dispatch, as a DB backup.
22
+
23
+ ## version 0.13.0
24
+
25
+ - grand refactoring
26
+
27
+ ## version 0.12.0
28
+
29
+ - [CHANGE] Adds task_id column to the log table (strload_load_logs).
30
+
31
+ ## version 0.11.0
32
+
33
+ - Loosen dependent Bricolage version
34
+
35
+ ## version 0.10.2
36
+
37
+ - [new] New parameter dispatch-interval.
38
+
39
+ ## version 0.10.1
40
+
41
+ - [fix] Fixes simple variable ref bug.
42
+
43
+ ## version 0.10.0
44
+
45
+ - [new] Automatically complement strload_jobs status with Redshift-side log table.
46
+
47
+ ## version 0.9.0
48
+
49
+ - [new] Introduces Redshift-side load log table (strload_load_logs) for load duplication checking.
50
+
51
+ ## version 0.8.1
52
+
53
+ - [fix] tmp: Do not retry on data connection failure.
54
+
55
+ ## version 0.8.0
56
+
57
+ - [CHANGE] Loader retries failed load tasks automatically. Streaming loader does NOT delete a task from the queue on job failures.
58
+ - [enhancement] Rewrites loader to get better error handling (ensure to write log record in the wider range of errornous situations).
59
+
60
+ ## version 0.7.1
61
+
62
+ - fix utilities
63
+
64
+ ## version 0.7.0
65
+
66
+ - [CHANGE] SQS data source requires "region" attribute.
67
+
68
+ ## version 0.6.2
69
+
70
+ - [new] AWS access key id and secret key are now optional for SQS data sources (to allow using EC2 instance or ECS task attached IAM roles).
71
+ - [new] New utility commands send-data-event, send-shutdown, send-checkpoint, send-load-task.
72
+ - Adds sample config files.
73
+
74
+ ## version 0.6.1
75
+
76
+ - [fix] dispatcher: Default ctl data source was wrong.
77
+ - [fix] dispatcher: Detects S3 events by "s3" attribute instead of "eventSource" attribute, to allow fake S3 events (from non-S3 system).
78
+ - [fix] dispatcher: SNS alert is now optional.
79
+ - [fix] dispatcher: Correctly deletes unknown format messages.
80
+ - [enhancement] Adds more logging messages.
81
+
82
+ ## version 0.6.0
83
+
84
+ - [CHANGE] Adds loaded column to strload_objects table to record if the object is really loaded or not.
85
+ - [CHANGE] Now strload_objects' object_url is unique. Duplicated objects are stored in another table, strload_dup_objects.
86
+ - [CHANGE] Now strload_table has table_id column, which is the primary key.
87
+ - [new] Loader daemon supports new command line option --working-dir, to support symbolic linked path, such as Capistrano deploy target (current/).
88
+ - [new] Keeps Redshift manifest file for later inspection.
89
+ - [enhancement] Reduces the number of Redshift writer transactions (1 transaction for 1 loading).
90
+ - [enhancement] Delay dispatching tasks until current event batch is processed, to avoid unexpected visibility timeout.
91
+ - [enhancement] Adds more logging messages.
92
+
93
+ ## version 0.5.1
94
+
95
+ - [fix] Fixes slow query
96
+
97
+ ## version 0.5.0
98
+
99
+ - [new] Introduces FLUSHTABLE dispatcher event
100
+
101
+ ## version 0.4.0
102
+
103
+ - [new] Introduces CHECKPOINT dispatcher event
104
+
105
+ ## version 0.3.0
106
+
107
+ - [new] Supoprts SNS notification
108
+
109
+ ## version 0.2.0
110
+
111
+ - not released
112
+ - [fix] Fixes async delete timing
113
+
114
+ ## version 0.1.0
115
+
116
+ - 2016-07-13 works 1 month
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ task :test do
2
+ load "#{__dir__}/test/all.rb"
3
+ end
@@ -0,0 +1,26 @@
1
+ require_relative 'lib/bricolage/streamingload/version'
2
+
3
+ Gem::Specification.new do |s|
4
+ s.platform = Gem::Platform::RUBY
5
+ s.name = 'bricolage-streamingload'
6
+ s.version = Bricolage::StreamingLoad::VERSION
7
+ s.summary = 'Bricolage Streaming Load Daemon'
8
+ s.description = 'Bricolage Streaming Load Daemon loads S3 data files to Redshift continuously.'
9
+ s.license = 'MIT'
10
+
11
+ s.author = ['Minero Aoki', 'Shimpei Kodama']
12
+ s.email = ['aamine@loveruby.net']
13
+ s.homepage = 'https://github.com/aamine/bricolage-streamingload'
14
+
15
+ s.files = `git ls-files -z`.split("\x0").reject {|f| f.match(%r{^(test|spec|features)/}) }
16
+ s.executables = s.files.grep(%r{bin/}).map {|path| File.basename(path) }
17
+ s.require_path = 'lib'
18
+
19
+ s.required_ruby_version = '>= 2.3.0'
20
+ s.add_dependency 'bricolage', '>= 5.29.2'
21
+ s.add_dependency 'pg', '~> 0.18.0'
22
+ s.add_dependency 'aws-sdk-s3', '~> 1.8'
23
+ s.add_dependency 'aws-sdk-sqs', '~> 1.3'
24
+ s.add_development_dependency 'rake'
25
+ s.add_development_dependency 'test-unit'
26
+ end
@@ -0,0 +1,66 @@
1
+ ### Databases
2
+
3
+ db_ctl:
4
+ type: psql
5
+ host: localhost
6
+ port: 5432
7
+ database: bricolage
8
+ username: bricolage
9
+ # Get password from password.yml
10
+ password: <%= password 'postgres_bricolage_password' %>
11
+ encoding: utf8
12
+
13
+ db_data: &db_data
14
+ type: psql
15
+ host: redshift.host
16
+ port: 5439
17
+ database: production
18
+ username: bricolage
19
+ # Get password from password.yml
20
+ password: <%= password 'redshift_bricolage_password' %>
21
+ encoding: utf8
22
+
23
+ sql:
24
+ <<: *db_data
25
+
26
+ ### SQS
27
+
28
+ sqs_event:
29
+ type: sqs
30
+ region: ap-northeast-1
31
+ url: https://sqs.ap-northeast-1.amazonaws.com/111111111111/bricolage-events
32
+ max_number_of_messages: 10
33
+ visibility_timeout: 600
34
+ wait_time_seconds: 10
35
+ # Enable following lines if you use access key explicitly.
36
+ # Otherwise Bricolage uses EC2 instance or ECS task attached IAM role.
37
+ #access_key_id: "<%%= password 'aws_access_key_id' %>"
38
+ #secret_access_key: "<%%= password 'aws_secret_access_key' %>"
39
+
40
+ sqs_task:
41
+ type: sqs
42
+ region: ap-northeast-1
43
+ url: https://sqs.ap-northeast-1.amazonaws.com/111111111111/bricolage-tasks
44
+ max_number_of_messages: 1
45
+ visibility_timeout: 1800
46
+ wait_time_seconds: 10
47
+ # Enable following lines if you use access key explicitly.
48
+ # Otherwise Bricolage uses EC2 instance or ECS task attached IAM role.
49
+ #access_key_id: "<%%= password 'aws_access_key_id' %>"
50
+ #secret_access_key: "<%%= password 'aws_secret_access_key' %>"
51
+
52
+ ### S3
53
+
54
+ s3_ctl: &s3_ctl
55
+ type: s3
56
+ region: ap-northeast-1
57
+ endpoint: s3-ap-northeast-1.amazonaws.com
58
+ bucket: bricolagectl.ap-northeast-1
59
+ prefix: development/strload
60
+ # Enable following lines if you use access key explicitly.
61
+ # Otherwise Bricolage uses EC2 instance or ECS task attached IAM role.
62
+ #access_key_id: "<%%= password 'aws_access_key_id' %>"
63
+ #secret_access_key: "<%%= password 'aws_secret_access_key' %>"
64
+
65
+ s3:
66
+ <<: *s3_ctl
@@ -0,0 +1,5 @@
1
+ # Never commit this file
2
+ redshift_bricolage_password: xxxxxxxxxxx
3
+ postgres_bricolage_password: xxxxxxxxxxx
4
+ aws_access_key_id: "AKIAAAAAAAAAAAAAAAAA"
5
+ aws_secret_access_key: "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
@@ -0,0 +1,20 @@
1
+ #event-queue-ds: sqs_event
2
+ #task-queue-ds: sqs_task
3
+
4
+ #ctl-postgres-ds: db_ctl
5
+ #ctl-s3-ds: s3_ctl
6
+
7
+ #redshift-ds: db_data
8
+ #log-table: strload_load_logs
9
+
10
+ #dispatch-interval: 60
11
+
12
+ # To Enable SNS notification
13
+ #sns-ds: sns
14
+ #alert-level: warn
15
+
16
+ url_patterns:
17
+ -
18
+ url: "s3://some-log-bucket/\\w{4}\\.\\w+?\\.\\w+?\\.(?<schema>\\w+)\\.(?<table>\\w+)/\\d{4}/\\d{2}/\\d{2}/.*\\.gz"
19
+ schema: "%schema"
20
+ table: "%table"
@@ -0,0 +1,5 @@
1
+ defaults:
2
+ redshift-ds: db_data
3
+ ctl-ds: s3_ctl
4
+
5
+ dwh_schema: dwh
@@ -35,7 +35,7 @@ module Bricolage
35
35
  end
36
36
 
37
37
  def JobParams.resolve_schema(ctx, schema)
38
- ctx.global_variables["#{schema}_schema"] || schema
38
+ ctx.global_variables.get_force("#{schema}_schema") || schema
39
39
  end
40
40
  private_class_method :resolve_schema
41
41
 
@@ -1,5 +1,5 @@
1
1
  module Bricolage
2
2
  module StreamingLoad
3
- VERSION = '0.15.0'
3
+ VERSION = '0.15.1'
4
4
  end
5
5
  end
@@ -0,0 +1,38 @@
1
+ {
2
+ "Records" => [
3
+ {
4
+ "eventVersion" => "2.0",
5
+ "eventSource" => "aws:s3",
6
+ "awsRegion" => "ap-northeast-1",
7
+ "eventTime" => "2016-02-13T11:40:08.001Z",
8
+ "eventName" => "ObjectCreated:Put",
9
+ "userIdentity" => {
10
+ "principalId" => "AWS:AIXXXXXXXXXXXXXXXXX6A"
11
+ },
12
+ "requestParameters" => {
13
+ "sourceIPAddress" => "111.222.111.90"
14
+ },
15
+ "responseElements" => {
16
+ "x-amz-request-id" => "1111AAAA9999AAAA",
17
+ "x-amz-id-2" => "6p9IZG+R+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxFyXiNMMB4="
18
+ },
19
+ "s3" => {
20
+ "s3SchemaVersion" => "1.0",
21
+ "configurationId" => "ObjectCreatedEvents",
22
+ "bucket" => {
23
+ "name" => "some-bucket",
24
+ "ownerIdentity" => {
25
+ "principalId" => "AAAAIIIIBBBB11"
26
+ },
27
+ "arn" => "arn:aws:s3:::some-bucket"
28
+ },
29
+ "object" => {
30
+ "key" => "development/logs/schema.table/20160125_0000_0_de37c5ad-d26a-42cc-a141-475676b65f69.gz",
31
+ "size" => 1302957,
32
+ "eTag" => "d704db7f9cb77b1ecb981c534526b542",
33
+ "sequencer" => "111122223333444499"
34
+ }
35
+ }
36
+ }
37
+ ]
38
+ }
@@ -0,0 +1,18 @@
1
+ #<struct Aws::SQS::Types::ReceiveMessageResult
2
+ messages = [
3
+ #<struct Aws::SQS::Types::Message
4
+ message_id = "11112222-b37f-4281-aee2-696408c482be",
5
+ receipt_handle = "AQEBErp4Vv159Hvx5oXSioWu7Ov1Jj1ht423LwTTUlgLohmKh02gAMAZ7kN1J4+aLzM54BRuMatnifWPH3cBh6rtdSWlllysBVmXDRKj83VCaseKPpq+Vdw/bLZrPEYRSGPuhKsHa2DFjo67KeND4AqasZO7lOCJX+YcsbqsqAtcODlDWlVbfedPkKIGM3Scra3uY9ysGjNx/zWuJ88fCtvRpu9tgk0nG7srHAm84Mxj5ArakJnKnJMbtkD/+lvm0Vqi2YCkXonS8+BP8gK4kSTbl1uv/Gp7ZfxkjKQJBB8xQiYo7e7qKLwhbImMZtO5tqDKzIKFVfMi1G2ODF+tEj4Ce+ryMRGA50GhON2ETCyofsl1T7Wdr61IEOV0NFlonGTWFf4q/1r3OPhFAchp+tYnyA==",
6
+ md5_of_body = "00006bf43abdff178ca0ffa96205aaaa",
7
+ body = "{\"Records\":[{\"eventVersion\":\"2.0\",\"eventSource\":\"aws:s3\",\"awsRegion\":\"ap-northeast-1\",\"eventTime\":\"2016-02-13T11:40:07.268Z\",\"eventName\":\"ObjectCreated:Put\",\"userIdentity\":{\"principalId\":\"AWS:AAAAJKS3A4VEF45XCAAAA\"},\"requestParameters\":{\"sourceIPAddress\":\"111.222.111.90\"},\"responseElements\":{\"x-amz-request-id\":\"10AAAA31A1EDCCCC\",\"x-amz-id-2\":\"YT+U/PxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxzfFGPjqP5AQg=\"},\"s3\":{\"s3SchemaVersion\":\"1.0\",\"configurationId\":\"LogStreamDev\",\"bucket\":{\"name\":\"redshift-copy-buffer\",\"ownerIdentity\":{\"principalId\":\"AAAAFIDWR40000\"},\"arn\":\"arn:aws:s3:::redshift-copy-buffer\"},\"object\":{\"key\":\"development/logs/schema.table/20160125_0000_0_2d0b43ad-0db9-4655-bd98-b7464b123763.gz\",\"size\":1238953,\"eTag\":\"aaaa196c3935f4957c7bb645f9780000\",\"sequencer\":\"0000BF161697C1AAAA\"}}}]}",
8
+ attributes = {
9
+ "SenderId" => "FACE0VEO02BJMF37H2JKW",
10
+ "ApproximateFirstReceiveTimestamp" => "1455364193429",
11
+ "ApproximateReceiveCount" => "1",
12
+ "SentTimestamp" => "1455363607341"
13
+ },
14
+ md5_of_message_attributes = nil,
15
+ message_attributes = {}
16
+ >
17
+ ]
18
+ >
@@ -0,0 +1,13 @@
1
+ --dest-table: bricolage.strload_load_logs
2
+
3
+ /*
4
+ Redshift-side log table
5
+ */
6
+ create table $dest_table
7
+ ( job_id bigint encode raw
8
+ , task_id bigint encode zstd
9
+ , finish_time timestamp encode delta
10
+ )
11
+ distkey (job_id)
12
+ sortkey (job_id)
13
+ ;