embulk-input-dynamodb 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/master.yml +34 -0
- data/.github/workflows/test.yml +30 -0
- data/.scalafmt.conf +5 -0
- data/CHANGELOG.md +49 -0
- data/README.md +204 -54
- data/build.gradle +53 -44
- data/example/config-deprecated.yml +20 -0
- data/example/config-query-as-json.yml +18 -0
- data/example/config-query.yml +22 -0
- data/example/config-scan.yml +18 -0
- data/example/prepare_dynamodb_table.sh +67 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +1 -2
- data/gradlew +67 -48
- data/gradlew.bat +20 -10
- data/{test/run_dynamodb_local.sh → run_dynamodb_local.sh} +2 -1
- data/settings.gradle +1 -0
- data/src/main/scala/org/embulk/input/dynamodb/DeprecatedDynamodbInputPlugin.scala +73 -0
- data/src/main/scala/org/embulk/input/dynamodb/DynamodbInputPlugin.scala +76 -25
- data/src/main/scala/org/embulk/input/dynamodb/PluginTask.scala +132 -32
- data/src/main/scala/org/embulk/input/dynamodb/aws/Aws.scala +44 -0
- data/src/main/scala/org/embulk/input/dynamodb/aws/AwsClientConfiguration.scala +37 -0
- data/src/main/scala/org/embulk/input/dynamodb/aws/AwsCredentials.scala +240 -0
- data/src/main/scala/org/embulk/input/dynamodb/aws/AwsDynamodbConfiguration.scala +35 -0
- data/src/main/scala/org/embulk/input/dynamodb/aws/AwsEndpointConfiguration.scala +79 -0
- data/src/main/scala/org/embulk/input/dynamodb/aws/HttpProxy.scala +61 -0
- data/src/main/scala/org/embulk/input/dynamodb/deprecated/AttributeValueHelper.scala +72 -0
- data/src/main/scala/org/embulk/input/dynamodb/{Filter.scala → deprecated/Filter.scala} +3 -3
- data/src/main/scala/org/embulk/input/dynamodb/{FilterConfig.scala → deprecated/FilterConfig.scala} +13 -13
- data/src/main/scala/org/embulk/input/dynamodb/{ope → deprecated/ope}/AbstractOperation.scala +36 -18
- data/src/main/scala/org/embulk/input/dynamodb/{ope → deprecated/ope}/QueryOperation.scala +21 -13
- data/src/main/scala/org/embulk/input/dynamodb/{ope → deprecated/ope}/ScanOperation.scala +20 -13
- data/src/main/scala/org/embulk/input/dynamodb/item/DynamodbAttributeValue.scala +154 -0
- data/src/main/scala/org/embulk/input/dynamodb/item/DynamodbAttributeValueEmbulkTypeTransformable.scala +245 -0
- data/src/main/scala/org/embulk/input/dynamodb/item/DynamodbAttributeValueType.scala +33 -0
- data/src/main/scala/org/embulk/input/dynamodb/item/DynamodbItemColumnVisitor.scala +50 -0
- data/src/main/scala/org/embulk/input/dynamodb/item/DynamodbItemConsumer.scala +40 -0
- data/src/main/scala/org/embulk/input/dynamodb/item/DynamodbItemIterator.scala +19 -0
- data/src/main/scala/org/embulk/input/dynamodb/item/DynamodbItemReader.scala +64 -0
- data/src/main/scala/org/embulk/input/dynamodb/item/DynamodbItemSchema.scala +135 -0
- data/src/main/scala/org/embulk/input/dynamodb/operation/AbstractDynamodbOperation.scala +169 -0
- data/src/main/scala/org/embulk/input/dynamodb/operation/DynamodbOperationProxy.scala +59 -0
- data/src/main/scala/org/embulk/input/dynamodb/operation/DynamodbQueryOperation.scala +72 -0
- data/src/main/scala/org/embulk/input/dynamodb/operation/DynamodbScanOperation.scala +93 -0
- data/src/main/scala/org/embulk/input/dynamodb/operation/EmbulkDynamodbOperation.scala +15 -0
- data/src/main/scala/org/embulk/input/dynamodb/package.scala +4 -9
- data/src/test/scala/org/embulk/input/dynamodb/AttributeValueHelperTest.scala +245 -101
- data/src/test/scala/org/embulk/input/dynamodb/AwsCredentialsTest.scala +150 -97
- data/src/test/scala/org/embulk/input/dynamodb/DynamodbQueryOperationTest.scala +188 -0
- data/src/test/scala/org/embulk/input/dynamodb/DynamodbScanOperationTest.scala +181 -0
- data/src/test/scala/org/embulk/input/dynamodb/testutil/EmbulkTestBase.scala +85 -0
- metadata +73 -49
- data/circle.yml +0 -16
- data/config/checkstyle/checkstyle.xml +0 -128
- data/config/checkstyle/default.xml +0 -108
- data/src/main/scala/org/embulk/input/dynamodb/AttributeValueHelper.scala +0 -41
- data/src/main/scala/org/embulk/input/dynamodb/AwsCredentials.scala +0 -63
- data/src/main/scala/org/embulk/input/dynamodb/DynamoDBClient.scala +0 -23
- data/src/test/resources/yaml/authMethodBasic.yml +0 -21
- data/src/test/resources/yaml/authMethodBasic_Error.yml +0 -19
- data/src/test/resources/yaml/authMethodEnv.yml +0 -19
- data/src/test/resources/yaml/authMethodProfile.yml +0 -20
- data/src/test/resources/yaml/dynamodb-local-query.yml +0 -25
- data/src/test/resources/yaml/dynamodb-local-scan.yml +0 -23
- data/src/test/resources/yaml/notSetAuthMethod.yml +0 -20
- data/src/test/scala/org/embulk/input/dynamodb/ope/QueryOperationTest.scala +0 -83
- data/src/test/scala/org/embulk/input/dynamodb/ope/ScanOperationTest.scala +0 -83
- data/test/create_table.sh +0 -16
- data/test/put_items.sh +0 -25
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 85976c690e023ebd1405fa2990e5dc6995511f08
|
4
|
+
data.tar.gz: e7aa6e0eed7b15581be57fbbb69028ed6a7d1621
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8ad60d0ea024cab88469c6afec78c37eb6341035b33effdbe2b30ba823e3239f4d6c07d347da8e095f3d1f43cf61f77f3df654629c2a073f7a4daebb8550dce9
|
7
|
+
data.tar.gz: 67718ce3f0d8fd97ce3a2c33bbdfb77e1891b9b50ad59815c2282ec1ea56a56e8b7c0e8725d44f0c9fa88f5e50c9542aeb80e62108662ed1c23c85d59cb56aba
|
@@ -0,0 +1,34 @@
|
|
1
|
+
name: Master CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
pull_request:
|
5
|
+
branches:
|
6
|
+
- master
|
7
|
+
types:
|
8
|
+
- closed
|
9
|
+
|
10
|
+
jobs:
|
11
|
+
test:
|
12
|
+
|
13
|
+
runs-on: ubuntu-latest
|
14
|
+
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v1
|
17
|
+
- name: Set up JDK 1.8
|
18
|
+
uses: actions/setup-java@v1
|
19
|
+
with:
|
20
|
+
java-version: 1.8
|
21
|
+
- name: scalafmt
|
22
|
+
run: ./gradlew spotlessCheck
|
23
|
+
- name: Set up DynamoDBLocal
|
24
|
+
run: docker run -d -p 8000:8000 amazon/dynamodb-local:latest -jar ./DynamoDBLocal.jar -inMemory -sharedDb -port 8000
|
25
|
+
- name: Test with Gradle
|
26
|
+
run: ./gradlew test
|
27
|
+
env:
|
28
|
+
RUN_AWS_CREDENTIALS_TEST: false
|
29
|
+
- name: Archive test results
|
30
|
+
if: always()
|
31
|
+
uses: actions/upload-artifact@v1
|
32
|
+
with:
|
33
|
+
name: test-report
|
34
|
+
path: build/reports/tests/test
|
@@ -0,0 +1,30 @@
|
|
1
|
+
name: Test CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
- push
|
5
|
+
|
6
|
+
jobs:
|
7
|
+
test:
|
8
|
+
|
9
|
+
runs-on: ubuntu-latest
|
10
|
+
|
11
|
+
steps:
|
12
|
+
- uses: actions/checkout@v1
|
13
|
+
- name: Set up JDK 1.8
|
14
|
+
uses: actions/setup-java@v1
|
15
|
+
with:
|
16
|
+
java-version: 1.8
|
17
|
+
- name: scalafmt
|
18
|
+
run: ./gradlew spotlessCheck
|
19
|
+
- name: Set up DynamoDBLocal
|
20
|
+
run: docker run -d -p 8000:8000 amazon/dynamodb-local:latest -jar ./DynamoDBLocal.jar -inMemory -sharedDb -port 8000
|
21
|
+
- name: Test with Gradle
|
22
|
+
run: ./gradlew test
|
23
|
+
env:
|
24
|
+
RUN_AWS_CREDENTIALS_TEST: false
|
25
|
+
- name: Archive test results
|
26
|
+
if: always()
|
27
|
+
uses: actions/upload-artifact@v1
|
28
|
+
with:
|
29
|
+
name: test-report
|
30
|
+
path: build/reports/tests/test
|
data/.scalafmt.conf
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
0.3.0 (2020-03-09)
|
2
|
+
==================
|
3
|
+
|
4
|
+
- [Enhancement] Update dependencies
|
5
|
+
- [#5](https://github.com/lulichn/embulk-input-dynamodb/pull/5) JRuby Gradle Plugin (0.1.5 => 1.5.0)
|
6
|
+
- [#6](https://github.com/lulichn/embulk-input-dynamodb/pull/6) Scala (2.11.8 => 2.13.1)
|
7
|
+
- [#7](https://github.com/lulichn/embulk-input-dynamodb/pull/7) AWS DynamoDB SDK (1.10.43 => 1.11.711)
|
8
|
+
- [#8](https://github.com/lulichn/embulk-input-dynamodb/pull/8) Embulk (0.8.13 => 0.9.23)
|
9
|
+
- [Enhancement] [#9](https://github.com/lulichn/embulk-input-dynamodb/pull/9) Use TestingEmbulk instead of EmbulkEmbed when testing
|
10
|
+
- [Enhancement] [#10](https://github.com/lulichn/embulk-input-dynamodb/pull/10) Reduce test dependencies
|
11
|
+
- [Enhancement] [#13](https://github.com/lulichn/embulk-input-dynamodb/pull/13) Use Github Actions instead of CircleCI.
|
12
|
+
- [Enhancement] [#15](https://github.com/lulichn/embulk-input-dynamodb/pull/15) Improve development environments
|
13
|
+
- Introduce [scalafmt](https://scalameta.org/scalafmt/) with [spotless](https://github.com/diffplug/spotless)
|
14
|
+
- Fix the format violations
|
15
|
+
- Add [scalafmt](https://scalameta.org/scalafmt/) to CI
|
16
|
+
- Add [CHANGELOG](./CHANGELOG.md)
|
17
|
+
- Add [an example](./example)
|
18
|
+
- Update README about development
|
19
|
+
- [Enhancement] [#16](https://github.com/lulichn/embulk-input-dynamodb/pull/16) Cleanup gradle settings
|
20
|
+
- [New Feature] [#18](https://github.com/lulichn/embulk-input-dynamodb/pull/18) Introduce new `auth_method`: `"session"`, `"anonymous"`, `"web_identity_token"`, `"default"`.
|
21
|
+
- `"anonymous"`: uses anonymous access. This auth method can access only public files.
|
22
|
+
- `"session"`: uses temporary-generated **access_key_id**, **secret_access_key** and **session_token**.
|
23
|
+
- `"assume_role"`: uses temporary-generated credentials by assuming **role_arn** role.
|
24
|
+
- `"web_identity_token"`: uses temporary-generated credentials by assuming **role_arn** role with web identity.
|
25
|
+
- `"default"`: uses AWS SDK's default strategy to look up available credentials from runtime environment. This method behaves like the combination of the following methods.
|
26
|
+
1. `"env"`
|
27
|
+
1. `"properties"`
|
28
|
+
1. `"web_identity_token"`
|
29
|
+
1. `"profile"`
|
30
|
+
1. `"instance"`
|
31
|
+
- [New Feature] [#18](https://github.com/lulichn/embulk-input-dynamodb/pull/18) Support `http_proxy` option when generating aws credentials.
|
32
|
+
- [Enhancement] [#18](https://github.com/lulichn/embulk-input-dynamodb/pull/18) The default value of `auth_method` option become `"default"`. When `access_key_id` and `secret_access_key` options are set, use `"basic"` as `auth_method` for backward compatibility.
|
33
|
+
- [Deprecated] [#18](https://github.com/lulichn/embulk-input-dynamodb/pull/18) Make `access_key` and `secret_key` options deprecated. Use `access_key_id` and `secret_access_key` options instead.
|
34
|
+
- [Deprecated] [#18](https://github.com/lulichn/embulk-input-dynamodb/pull/18) Make `end_point` option deprecated. Use `endpoint` option instead.
|
35
|
+
- [Deprecated] [#19](https://github.com/lulichn/embulk-input-dynamodb/pull/19) The original operation implementation is deprecated, so the below options become deprecated.
|
36
|
+
- **operation**: Use **query** option or **scan** option instead.
|
37
|
+
- **limit**: Use **query.batch_size** option or **query.batch_size** instead.
|
38
|
+
- **scan_limit**: Use **query.batch_size** option or **query.batch_size** instead.
|
39
|
+
- **record_limit**: Use **query.limit** option or **query.limit** instead.
|
40
|
+
- **filters**: Use **query.filter_expression** option or **query.filter_expression** instead.
|
41
|
+
- [New Feature] [#19](https://github.com/lulichn/embulk-input-dynamodb/pull/19) Introduce new options **scan**, **query** to support all configurations for Dynamodb Scan/Query Operation API except legacy configurations.
|
42
|
+
- NOTE: This operation stores `null` AttributeValue as `null`, though, in the deprecated operation, `null` is converted arbitrarily. (`string` -> empty string, `long` -> `0`, `double` -> `0.0`, `boolean` -> `false`)
|
43
|
+
- NOTE: This operation stores timestamp values by parsing user-defined format, though the deprecated operation skips storing values when the column type is defined as `timestamp` without any errors.
|
44
|
+
- NOTE: This operation can convert the specific type of the attribute that you specify in **column.attribute_type** to Embulk types, though the deprecated operation can only convert Embulk types that match a particular Dynamodb Attribute type.
|
45
|
+
- [Enhancement] [#19](https://github.com/lulichn/embulk-input-dynamodb/pull/19) You can store each dynamodb item as JSON, so **columns** option becomes optional.
|
46
|
+
- [Enhancement] [#19](https://github.com/lulichn/embulk-input-dynamodb/pull/19) You can specify the `AttributeValue` type (like `"S"`, `"N"`, `"SS"` and so on) used when converting AttributeValue to Embulk type.
|
47
|
+
- [BugFix] [#19](https://github.com/lulichn/embulk-input-dynamodb/pull/19) Avoid `NullPointerException` when Type `N` AttributeValue has `null` in the deprecated operation.
|
48
|
+
- [Enhancement] [#19](https://github.com/lulichn/embulk-input-dynamodb/pull/19) Examples work without real Dynamodb.
|
49
|
+
- [Enhancement] [#19](https://github.com/lulichn/embulk-input-dynamodb/pull/19) Add more examples.
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Dynamodb input plugin for Embulk
|
2
2
|
|
3
|
+
![Master CI Status Badge](https://github.com/lulichn/embulk-input-dynamodb/workflows/Master%20CI/badge.svg) ![Test CI Status Badge](https://github.com/lulichn/embulk-input-dynamodb/workflows/Test%20CI/badge.svg)
|
4
|
+
|
3
5
|
## Overview
|
4
6
|
|
5
7
|
* **Plugin type**: input
|
@@ -7,39 +9,140 @@
|
|
7
9
|
* **Resume supported**: no
|
8
10
|
* **Cleanup supported**: no
|
9
11
|
|
10
|
-
|
11
12
|
## Configuration
|
12
|
-
- **auth_method**:
|
13
|
-
|
14
|
-
-
|
15
|
-
-
|
16
|
-
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
-
|
26
|
-
`
|
27
|
-
- **
|
28
|
-
|
13
|
+
- **auth_method**: name of mechanism to authenticate requests (`"basic"`, `"env"`, `"instance"`, `"profile"`, `"properties"`, `"anonymous"`, `"session"`, `"web_identity_token"`, default: `"default"`)
|
14
|
+
- `"basic"`: uses **access_key_id** and **secret_access_key** to authenticate.
|
15
|
+
- `"env"`: uses `AWS_ACCESS_KEY_ID` (or `AWS_ACCESS_KEY`) and `AWS_SECRET_KEY` (or `AWS_SECRET_ACCESS_KEY`) environment variables.
|
16
|
+
- `"instance"`: uses EC2 instance profile or attached ECS task role.
|
17
|
+
- `"profile"`: uses credentials written in a file. Format of the file is as following, where `[...]` is a name of profile.
|
18
|
+
```
|
19
|
+
[default]
|
20
|
+
aws_access_key_id=YOUR_ACCESS_KEY_ID
|
21
|
+
aws_secret_access_key=YOUR_SECRET_ACCESS_KEY
|
22
|
+
|
23
|
+
[profile2]
|
24
|
+
...
|
25
|
+
```
|
26
|
+
- `"properties"`: uses aws.accessKeyId and aws.secretKey Java system properties.
|
27
|
+
- `"anonymous"`: uses anonymous access. This auth method can access only public files.
|
28
|
+
- `"session"`: uses temporary-generated **access_key_id**, **secret_access_key** and **session_token**.
|
29
|
+
- `"assume_role"`: uses temporary-generated credentials by assuming **role_arn** role.
|
30
|
+
- `"web_identity_token"`: uses temporary-generated credentials by assuming **role_arn** role with web identity.
|
31
|
+
- `"default"`: uses AWS SDK's default strategy to look up available credentials from runtime environment. This method behaves like the combination of the following methods.
|
32
|
+
1. `"env"`
|
33
|
+
1. `"properties"`
|
34
|
+
1. `"web_identity_token"`
|
35
|
+
1. `"profile"`
|
36
|
+
1. `"instance"`
|
37
|
+
- **profile_file**: path to a profile file. this is optionally used when **auth_method** is `"profile"`. (string, default: given by `AWS_CREDENTIAL_PROFILES_FILE` environment variable, or ~/.aws/credentials).
|
38
|
+
- **profile_name**: name of a profile. this is optionally used when **auth_method** is `"profile"`. (string, default: `"default"`)
|
39
|
+
- **access_key_id**: aws access key id. this is required when **auth_method** is `"basic"` or `"session"`. (string, optional)
|
40
|
+
- **secret_access_key**: aws secret access key. this is required when **auth_method** is `"basic"` or `"session"`. (string, optional)
|
41
|
+
- **session_token**: aws session token. this is required when **auth_method** is `"session"`. (string, optional)
|
42
|
+
- **role_arn**: arn of the role to assume. this is required for **auth_method** is `"assume_role"` or `"web_identity_token"`. (string, optional)
|
43
|
+
- **role_session_name**: an identifier for the assumed role session. this is required when **auth_method** is `"assume_role"` or `"web_identity_token"`. (string, optional)
|
44
|
+
- **role_external_id**: a unique identifier that is used by third parties when assuming roles in their customers' accounts. this is optionally used for **auth_method**: `"assume_role"`. (string, optional)
|
45
|
+
- **role_session_duration_seconds**: duration, in seconds, of the role session. this is optionally used for **auth_method**: `"assume_role"`. (int, optional)
|
46
|
+
- **web_identity_token_file**: the absolute path to the web identity token file. this is required when **auth_method** is `"web_identity_token"`. (string, optional)
|
47
|
+
- **scope_down_policy**: an iam policy in json format. this is optionally used for **auth_method**: `"assume_role"`. (string, optional)
|
48
|
+
- **endpoint**: The AWS Service endpoint (string, optional)
|
49
|
+
- **region**: The AWS region (string, optional)
|
50
|
+
- **http_proxy**: Indicate whether using when accessing AWS via http proxy. (optional)
|
51
|
+
- **host** proxy host (string, required)
|
52
|
+
- **port** proxy port (int, optional)
|
53
|
+
- **protocol** proxy protocol (string, default: `"https"`)
|
54
|
+
- **user** proxy user (string, optional)
|
55
|
+
- **password** proxy password (string, optional)
|
56
|
+
- **scan**: scan operation configuration. This option cannot be used with **query** option. (See [Operation Configuration Details](#operation-configuration-details), optional)
|
57
|
+
- **query**: query operation configuration. This option cannot be used with **scan** option. (See [Operation Configuration Details](#operation-configuration-details), optional)
|
29
58
|
- **table**: Table Name (string, required)
|
30
|
-
- **
|
31
|
-
|
59
|
+
- **default_timestamp_format**: Format of the timestamp if **columns.type** is `"timestamp"`. (string, optional, default: `"%Y-%m-%d %H:%M:%S.%N %z"`)
|
60
|
+
- **default_timezone**: Time zone of timestamp columns if the value itself doesn’t include time zone description (eg. Asia/Tokyo). (string, optional, default: `"UTC"`)
|
61
|
+
- **default_date**: Set date part if the format doesn’t include date part. (string, optional, default: `"1970-01-01"`)
|
62
|
+
- **columns**: a key-value pairs where key is a column name and value is options for the column. If you do not specify this option, each dynamodb items are processed as a single json. (array of string-to-string map, optional, default: `[]`)
|
63
|
+
- **name**: Name of the column. (string, required)
|
64
|
+
- **type**: Embulk Type of the column that is converted to from dynamodb attribute value as possible. (`"boolean"`, `"long"`,`"timestamp"`, `"double"`, `"string"` or `"json"`, required)
|
65
|
+
- **attribute_type**: Type of the Dynamodb attribute that name matches **name** of the column. The types except specified one are stored as `null` when this option is specified. (`"S"`, `"N"`, `"B"`, `"SS"`, `"NS"`, `"BS"`, `"M"`, `"L"`, `"NULL"` or `"BOOL"`, optional)
|
66
|
+
- **format**: Format of the timestamp if **type** is `"timestamp"`. (string, optional, default value is specified by **default_timestamp_format**)
|
67
|
+
- **timezone**: Timezone of the timestamp if the value itself doesn’t include time zone description (eg. Asia/Tokyo). (string, optional, default value is specified by **default_timezone**)
|
68
|
+
- **date**: Set date part if the **format** doesn’t include date part. (string, optional, default value is specified by **default_date**)
|
69
|
+
- **json_column_name**: Name of the column when each dynamodb items are processed as a single json. (string, optional, default: `"record"`)
|
70
|
+
|
71
|
+
### Operation Configuration Details
|
72
|
+
|
73
|
+
Here is the explanation of the configuration for **scan** option or **query** option. The configuration has common options and specific options. Sometimes a type called `DynamodbAttributeValue` appears, see the end of this section first if you are worried about it.
|
74
|
+
|
75
|
+
#### Common Options
|
76
|
+
|
77
|
+
- **consistent_read**: Require strongly consistent reads or not. ref. (boolean, optional, default: `false`)
|
78
|
+
- See the docs ([Read Consistency for Scan](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Scan.html#Scan.ReadConsistency) or [Read Consistency for Query](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Query.html#Query.ReadConsistency)) for more details.
|
79
|
+
- **exclusive_start_key**: When you want to read the middle of the table, specify the attribute as the start key. (string to `DynamodbAttributeValue` map, optional)
|
80
|
+
- See the docs ([Paginating Table Scan Results](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Scan.html#Scan.Pagination) or [Paginating Table Query Results](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Query.Pagination.html)) for more details.
|
81
|
+
- **expression_attribute_names**: An expression attribute name is a placeholder that you use in an Amazon DynamoDB expression as an alternative to an actual attribute name. An expression attribute name must begin with a pound sign (#), and be followed by one or more alphanumeric characters. (string to string map, optional, default: `{}`)
|
82
|
+
- See the doc ([Expression Attribute Names](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Expressions.ExpressionAttributeNames.html)) for more details.
|
83
|
+
- **expression_attribute_values**: If you need to compare an attribute with a value, define an expression attribute value as a placeholder. Expression attribute values in Amazon DynamoDB are substitutes for the actual values that you want to compare—values that you might not know until runtime. An expression attribute value must begin with a colon (:) and be followed by one or more alphanumeric characters. (string to `DynamodbAttributeValue` map, optional, default: `{}`)
|
84
|
+
- See the doc ([Expression Attribute Values](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Expressions.ExpressionAttributeValues.html)) for more details.
|
85
|
+
- **filter_expression**: A filter expression is applied after the operation finishes, but before the results are returned. Therefore, the operation consumes the same amount of read capacity, regardless of whether a filter expression is present. (string, optional)
|
86
|
+
- See the docs ([Filter Expressions for Scan](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Scan.html#Scan.FilterExpression) or [Filter Expressions for Query](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Query.html#Query.FilterExpression)) for more details.
|
87
|
+
- **index_name**: Amazon DynamoDB provides fast access to items in a table by specifying primary key values. However, many applications might benefit from having one or more secondary (or alternate) keys available, to allow efficient access to data with attributes other than the primary key. To address this, you can create one or more secondary indexes on a table and issue **query** or **scan** operations against these indexes. (string, optional)
|
88
|
+
- See the doc ([Improving Data Access with Secondary Indexes](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/SecondaryIndexes.html)) for more details.
|
89
|
+
- **batch_size**: The limit of items by an operation. The final result contains specified number of items or fewer when **filter_expression** is specified. (int, optional)
|
90
|
+
- See the docs ([Limiting the Number of Items in the Result Set for Scan](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Scan.html#Scan.Limit) or [Limiting the Number of Items in the Result Set for Query](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Query.html#Query.Limit)) for more details.
|
91
|
+
- **limit**: The limit of total items by operations. (long, optional)
|
92
|
+
- **projection_expression**: To read data from a table, you use operations. Amazon DynamoDB returns all the item attributes by default. To get only some, rather than all of the attributes, use a projection expression. (string, optional)
|
93
|
+
- See the doc ([Projection Expressions](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Expressions.ProjectionExpressions.html)) for more details.
|
94
|
+
- **select**: The attributes to be returned in the result. You can retrieve all item attributes, specific item attributes, the count of matching items, or in the case of an index, some or all of the attributes projected into the index. (`"ALL_ATTRIBUTES"`, `"ALL_PROJECTED_ATTRIBUTES"`, `"SPECIFIC_ATTRIBUTES"` or `"COUNT"`, optional)
|
95
|
+
- See the docs ([Select for Scan](https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_Scan.html#DDB-Scan-request-Select) or [Select for Query](https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_Query.html#DDB-Query-request-Select)) for more details.
|
96
|
+
|
97
|
+
#### Options for **scan**
|
98
|
+
|
99
|
+
- **segment**: A segment to be scanned by a particular worker. Each worker should use a different value for **segment**. If **segment** is not specified and **total_segment** is specified, this plugin automatically set **segment** following the number of embulk workers. If **segment** and **total_segment** is specified, this plugin loads only the **segment**, so you loads other segments in other processes. (int, optional)
|
100
|
+
- See the doc ([Parallel Scan](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Scan.html#Scan.ParallelScan)) for more details.
|
101
|
+
- **total_segment**: The total number of segments for the parallel scan. If **segment** is not specified and **total_segment** is specified, this plugin automatically set **segment** following the number of embulk workers.
|
102
|
+
- See the doc ([Parallel Scan](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Scan.html#Scan.ParallelScan)) for more details.
|
103
|
+
|
104
|
+
#### Options for **query**
|
105
|
+
|
106
|
+
- **key_condition_expression**: To specify the search criteria, you use a key condition expression—a string that determines the items to be read from the table or index. (string, required)
|
107
|
+
- See the doc ([Key Condition Expression](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Query.html#Query.KeyConditionExpressions)) for more details.
|
108
|
+
- **scan_index_forward**: By default, the sort order is ascending. To reverse the order, set this option is `false`. (boolean, optional, default: `false`)
|
109
|
+
- See the doc ([Key Condition Expression](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Query.html#Query.KeyConditionExpressions)) for more details
|
110
|
+
|
111
|
+
#### About `DynamodbAttributeValue` Type
|
112
|
+
|
113
|
+
This type of `DynamodbAttributeValue` is one that can express Dynamodb `AttributeValue` as Embulk configuration. This configuration has the below options. Only one of these options can be set.
|
114
|
+
|
115
|
+
- **S**: string value (string, optional)
|
116
|
+
- **N**: number value. (string, optional)
|
117
|
+
- **B**: binary value. (string, optional)
|
118
|
+
- **SS**: array of string value. (array of string, optional)
|
119
|
+
- **NS**: array of number value. (array of number, optional)
|
120
|
+
- **BS**: array of binary value. (array of binary, optional)
|
121
|
+
- **M**: map value. (string to `DynamodbAttributeValue` map, optional)
|
122
|
+
- **L**: list value. (array of `DynamodbAttributeValue`, optional)
|
123
|
+
- **NULL**: null or not. (boolean, optional)
|
124
|
+
- **BOOL**: `true` or `false`. (boolean, optional)
|
125
|
+
|
126
|
+
### Deprecated Configuration
|
127
|
+
|
128
|
+
You can use the below options yet for the backward compatibility before `v0.3.0`. However, these are already deprecated, so please use new options instead.
|
129
|
+
|
130
|
+
- **access_key**: *[Deprecated: Use **access_key** instead]* aws access key id. this is required when **auth_method** is `"basic"` or `"session"`. (string, optional)
|
131
|
+
- **secret_key**: *[Deprecated: Use **secret_access_key** instead]* aws secret access key. this is required when **auth_method** is `"basic"` or `"session"`. (string, optional)
|
132
|
+
- **end_point**: *[Deprecated: Use **endpoint** instead]* The AWS Service endpoint (string, optional)
|
133
|
+
- **operation**: *[Deprecated: Use **scan** or **query** option instead]* Operation Type (`"scan"` or `"query"`, required)
|
134
|
+
- **filters**: *[Deprecated: Use **query.filter_expression** option or **query.filter_expression** instead]* Query Filters. (Required if **operation** is `"query"`, optional if **operation** is `"scan"`)
|
32
135
|
- **name**: Column name.
|
33
136
|
- **type**: Column type.
|
34
137
|
- **condition**: Comparison Operator.
|
35
138
|
- **value(s)**: Attribute Value(s).
|
36
|
-
- **limit**: DynamoDB 1-time Scan/Query Operation size limit (
|
37
|
-
- **scan_limit**: DynamoDB 1-time Scan Query size limit (
|
38
|
-
- **record_limit**: Max Record Search limit (
|
39
|
-
- **columns**: a key-value pairs where key is a column name and value is options for the column (required)
|
40
|
-
- **name**: Column name.
|
41
|
-
- **type**: Column values are converted to this embulk type.
|
42
|
-
|
139
|
+
- **limit**: *[Deprecated: Use **query.batch_size** option or **query.batch_size** instead]* DynamoDB 1-time Scan/Query Operation size limit (int, optional)
|
140
|
+
- **scan_limit**: *[Deprecated: Use **query.batch_size** option or **query.batch_size** instead]* DynamoDB 1-time Scan Query size limit (int, optional)
|
141
|
+
- **record_limit**: *[Deprecated: Use **query.limit** option or **query.limit** instead]* Max Record Search limit (long, optional)
|
142
|
+
- **columns**: *[Deprecated: This **columns** option for the deprecated operation. See the above **columns** option when using a new operation.]* a key-value pairs where key is a column name and value is options for the column (required)
|
143
|
+
- **name**: Column name. (string, required)
|
144
|
+
- **type**: Column values are converted to this embulk type. (`"boolean"`, `"long"`, `"double"`, `"string"`, `"json"`, required)
|
145
|
+
- NOTE: Be careful that storing values is skipped when you specify `"timestamp"`.
|
43
146
|
|
44
147
|
## Example
|
45
148
|
|
@@ -48,21 +151,18 @@ Required to `query` operation. Optional for `scan`.
|
|
48
151
|
```yaml
|
49
152
|
in:
|
50
153
|
type: dynamodb
|
51
|
-
auth_method:
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
table: YOUR_TABLE_NAME
|
154
|
+
auth_method: env
|
155
|
+
region: us-east-1
|
156
|
+
scan:
|
157
|
+
total_segment: 20
|
158
|
+
table: embulk-input-dynamodb_example
|
57
159
|
columns:
|
58
160
|
- {name: ColumnA, type: long}
|
59
161
|
- {name: ColumnB, type: double}
|
60
|
-
- {name: ColumnC, type: string}
|
162
|
+
- {name: ColumnC, type: string, attribute_type: S}
|
61
163
|
- {name: ColumnD, type: boolean}
|
62
|
-
- {name: ColumnE, type:
|
63
|
-
|
64
|
-
- {name: ColumnA, type: long, condition: BETWEEN, value: 10000, value2: 20000}
|
65
|
-
- {name: ColumnC, type: string, condition: EQ, value: foobar}
|
164
|
+
- {name: ColumnE, type: timestamp}
|
165
|
+
- {name: ColumnF, type: json}
|
66
166
|
|
67
167
|
out:
|
68
168
|
type: stdout
|
@@ -74,31 +174,81 @@ out:
|
|
74
174
|
in:
|
75
175
|
type: dynamodb
|
76
176
|
auth_method: env
|
77
|
-
region:
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
- {name: ColumnE, type: json}
|
86
|
-
filters:
|
87
|
-
- {name: ColumnA, type: long, condition: EQ, value: 10000}
|
177
|
+
region: us-east-1
|
178
|
+
query:
|
179
|
+
key_condition_expression: "#x = :v"
|
180
|
+
expression_attribute_names:
|
181
|
+
"#x": primary-key
|
182
|
+
expression_attribute_values:
|
183
|
+
":v": {S: key-1}
|
184
|
+
table: embulk-input-dynamodb_example
|
88
185
|
|
89
186
|
out:
|
90
187
|
type: stdout
|
91
188
|
```
|
92
189
|
|
93
|
-
|
190
|
+
You can see more examples [here](./example).
|
94
191
|
|
95
|
-
|
192
|
+
## Development
|
193
|
+
|
194
|
+
### Run examples
|
195
|
+
|
196
|
+
```shell
|
197
|
+
$ ./run_dynamodb_local.sh
|
198
|
+
$ ./example/prepare_dynamodb_table.sh
|
96
199
|
$ ./gradlew classpath
|
97
|
-
$ embulk
|
200
|
+
$ embulk run example/config-query.yml -Ilib
|
201
|
+
```
|
202
|
+
|
203
|
+
### Run tests
|
204
|
+
|
205
|
+
```shell
|
206
|
+
## Run dynamodb-local
|
207
|
+
$ ./run_dynamodb_local.sh
|
208
|
+
$ AWS_ACCESS_KEY_ID=${YOUR_AWS_ACCESS_KEY_ID} \
|
209
|
+
AWS_SECRET_ACCESS_KEY=${YOUR_AWS_SECRET_ACCESS_KEY} \
|
210
|
+
EMBULK_DYNAMODB_TEST_ACCESS_KEY=${YOUR_AWS_ACCESS_KEY_ID} \
|
211
|
+
EMBULK_DYNAMODB_TEST_SECRET_KEY=${YOUR_AWS_SECRET_ACCESS_KEY} \
|
212
|
+
EMBULK_DYNAMODB_TEST_PROFILE_NAME=${YOUR_AWS_PROFILE} \
|
213
|
+
EMBULK_DYNAMODB_TEST_ASSUME_ROLE_ROLE_ARN=${YOUR_ROLE_ARN} \
|
214
|
+
./gradlew test
|
215
|
+
```
|
216
|
+
|
217
|
+
If you do not have any real aws account, you can skip the tests that use the real aws account.
|
218
|
+
|
219
|
+
```shell
|
220
|
+
$ ./run_dynamodb_local.sh
|
221
|
+
$ RUN_AWS_CREDENTIALS_TEST=false ./gradlew test
|
222
|
+
```
|
223
|
+
|
224
|
+
### Run the formatter
|
225
|
+
|
226
|
+
```shell
|
227
|
+
## Just check the format violations
|
228
|
+
$ ./gradlew spotlessCheck
|
229
|
+
|
230
|
+
## Fix the all format violations
|
231
|
+
$ ./gradlew spotlessApply
|
98
232
|
```
|
99
233
|
|
100
|
-
|
234
|
+
### Build
|
101
235
|
|
102
236
|
```
|
103
|
-
$ ./gradlew gem
|
237
|
+
$ ./gradlew gem # -t to watch change of files and rebuild continuously
|
104
238
|
```
|
239
|
+
|
240
|
+
### Release gem:
|
241
|
+
Fix [build.gradle](./build.gradle), then
|
242
|
+
|
243
|
+
|
244
|
+
```shell
|
245
|
+
$ ./gradlew gemPush
|
246
|
+
```
|
247
|
+
|
248
|
+
## ChangeLog
|
249
|
+
|
250
|
+
[CHANGELOG.md](./CHANGELOG.md)
|
251
|
+
|
252
|
+
## License
|
253
|
+
|
254
|
+
[MIT LICENSE](./LICENSE)
|
data/build.gradle
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
plugins {
|
2
2
|
id "com.jfrog.bintray" version "1.1"
|
3
|
-
id "com.github.jruby-gradle.base" version "
|
3
|
+
id "com.github.jruby-gradle.base" version "1.5.0"
|
4
4
|
id "scala"
|
5
|
-
id "
|
5
|
+
id "com.diffplug.gradle.spotless" version "3.27.1"
|
6
6
|
}
|
7
7
|
|
8
8
|
import com.github.jrubygradle.JRubyExec
|
@@ -14,38 +14,44 @@ configurations {
|
|
14
14
|
provided
|
15
15
|
}
|
16
16
|
|
17
|
-
version = "0.
|
17
|
+
version = "0.3.0"
|
18
18
|
|
19
|
-
sourceCompatibility = 1.
|
20
|
-
targetCompatibility = 1.
|
19
|
+
sourceCompatibility = 1.8
|
20
|
+
targetCompatibility = 1.8
|
21
21
|
|
22
22
|
dependencies {
|
23
|
-
compile "org.scala-lang:scala-library:2.
|
23
|
+
compile "org.scala-lang:scala-library:2.13.1"
|
24
24
|
|
25
|
-
compile "org.embulk:embulk-core:0.
|
26
|
-
provided "org.embulk:embulk-core:0.
|
25
|
+
compile "org.embulk:embulk-core:0.9.23"
|
26
|
+
provided "org.embulk:embulk-core:0.9.23"
|
27
27
|
|
28
|
-
compile "com.amazonaws:aws-java-sdk-dynamodb:1.
|
28
|
+
compile "com.amazonaws:aws-java-sdk-dynamodb:1.11.711"
|
29
|
+
compile "com.amazonaws:aws-java-sdk-sts:1.11.711"
|
30
|
+
// For @delegate macro.
|
31
|
+
compile "dev.zio:zio-macros-core_2.13:0.6.2"
|
29
32
|
|
30
33
|
testCompile "junit:junit:4.+"
|
31
|
-
testCompile "org.embulk:embulk-standards:0.
|
32
|
-
testCompile "org.embulk:embulk-
|
34
|
+
testCompile "org.embulk:embulk-standards:0.9.23"
|
35
|
+
testCompile "org.embulk:embulk-deps-buffer:0.9.23"
|
36
|
+
testCompile "org.embulk:embulk-deps-config:0.9.23"
|
37
|
+
testCompile "org.embulk:embulk-test:0.9.23"
|
33
38
|
}
|
34
39
|
|
35
40
|
compileScala {
|
36
|
-
scalaCompileOptions.
|
41
|
+
scalaCompileOptions.additionalParameters = [
|
42
|
+
"-Ymacro-annotations"
|
43
|
+
]
|
37
44
|
}
|
38
45
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
jvmArgs = ['-XX:MaxPermSize=1024m']
|
43
|
-
}
|
46
|
+
test {
|
47
|
+
jvmArgs '-Xms4g', '-Xmx4g', '-XX:MaxMetaspaceSize=1g'
|
48
|
+
maxHeapSize = "4g"
|
44
49
|
}
|
45
50
|
|
46
|
-
|
47
|
-
|
48
|
-
|
51
|
+
spotless {
|
52
|
+
scala {
|
53
|
+
scalafmt('2.3.2').configFile('.scalafmt.conf')
|
54
|
+
}
|
49
55
|
}
|
50
56
|
|
51
57
|
task classpath(type: Copy, dependsOn: ["jar"]) {
|
@@ -53,39 +59,40 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
|
|
53
59
|
from (configurations.runtime - configurations.provided + files(jar.archivePath))
|
54
60
|
into "classpath"
|
55
61
|
}
|
56
|
-
clean { delete
|
62
|
+
clean { delete "classpath" }
|
57
63
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
}
|
62
|
-
|
63
|
-
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
|
64
|
-
ignoreFailures = true
|
65
|
-
}
|
66
|
-
checkstyleTest {
|
67
|
-
configFile = file("${project.rootDir}/config/checkstyle/default.xml")
|
68
|
-
ignoreFailures = true
|
64
|
+
task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
|
65
|
+
jrubyArgs "-S"
|
66
|
+
script "gem"
|
67
|
+
scriptArgs "build", "${project.name}.gemspec"
|
68
|
+
doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
|
69
69
|
}
|
70
|
-
|
71
|
-
|
72
|
-
|
70
|
+
|
71
|
+
task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
|
72
|
+
jrubyArgs "-S"
|
73
|
+
script "gem"
|
74
|
+
scriptArgs "push", "pkg/${project.name}-${project.version}.gem"
|
73
75
|
}
|
74
76
|
|
75
|
-
task
|
76
|
-
|
77
|
-
|
78
|
-
|
77
|
+
task "package"(dependsOn: ["gemspec", "classpath"]) {
|
78
|
+
doLast {
|
79
|
+
println "> Build succeeded."
|
80
|
+
println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
|
81
|
+
}
|
79
82
|
}
|
80
83
|
|
81
|
-
task gemspec
|
84
|
+
task gemspec {
|
85
|
+
ext.gemspecFile = file("${project.name}.gemspec")
|
86
|
+
inputs.file "build.gradle"
|
87
|
+
outputs.file gemspecFile
|
88
|
+
doLast { gemspecFile.write($/
|
82
89
|
Gem::Specification.new do |spec|
|
83
90
|
spec.name = "${project.name}"
|
84
91
|
spec.version = "${project.version}"
|
85
|
-
spec.authors = ["Daisuke Higashi"]
|
92
|
+
spec.authors = ["Daisuke Higashi", "Civitaspo"]
|
86
93
|
spec.summary = %[Dynamodb input plugin for Embulk]
|
87
94
|
spec.description = %["Loads records from Dynamodb."]
|
88
|
-
spec.email = ["daisuke.develop@gmail.com"]
|
95
|
+
spec.email = ["daisuke.develop@gmail.com", "civitaspo@gmail.com"]
|
89
96
|
spec.licenses = ["MIT"]
|
90
97
|
spec.homepage = "https://github.com/lulichn/embulk-input-dynamodb"
|
91
98
|
|
@@ -93,9 +100,11 @@ Gem::Specification.new do |spec|
|
|
93
100
|
spec.test_files = spec.files.grep(%r"^(test|spec)/")
|
94
101
|
spec.require_paths = ["lib"]
|
95
102
|
|
96
|
-
#spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
|
97
103
|
spec.add_development_dependency 'bundler', ['~> 1.0']
|
98
|
-
spec.add_development_dependency 'rake', ['
|
104
|
+
spec.add_development_dependency 'rake', ['~> 12.0']
|
99
105
|
end
|
100
106
|
/$)
|
107
|
+
}
|
101
108
|
}
|
109
|
+
clean { delete "${project.name}.gemspec" }
|
110
|
+
|