embulk-input-dynamodb 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/master.yml +34 -0
  3. data/.github/workflows/test.yml +30 -0
  4. data/.scalafmt.conf +5 -0
  5. data/CHANGELOG.md +49 -0
  6. data/README.md +204 -54
  7. data/build.gradle +53 -44
  8. data/example/config-deprecated.yml +20 -0
  9. data/example/config-query-as-json.yml +18 -0
  10. data/example/config-query.yml +22 -0
  11. data/example/config-scan.yml +18 -0
  12. data/example/prepare_dynamodb_table.sh +67 -0
  13. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  14. data/gradle/wrapper/gradle-wrapper.properties +1 -2
  15. data/gradlew +67 -48
  16. data/gradlew.bat +20 -10
  17. data/{test/run_dynamodb_local.sh → run_dynamodb_local.sh} +2 -1
  18. data/settings.gradle +1 -0
  19. data/src/main/scala/org/embulk/input/dynamodb/DeprecatedDynamodbInputPlugin.scala +73 -0
  20. data/src/main/scala/org/embulk/input/dynamodb/DynamodbInputPlugin.scala +76 -25
  21. data/src/main/scala/org/embulk/input/dynamodb/PluginTask.scala +132 -32
  22. data/src/main/scala/org/embulk/input/dynamodb/aws/Aws.scala +44 -0
  23. data/src/main/scala/org/embulk/input/dynamodb/aws/AwsClientConfiguration.scala +37 -0
  24. data/src/main/scala/org/embulk/input/dynamodb/aws/AwsCredentials.scala +240 -0
  25. data/src/main/scala/org/embulk/input/dynamodb/aws/AwsDynamodbConfiguration.scala +35 -0
  26. data/src/main/scala/org/embulk/input/dynamodb/aws/AwsEndpointConfiguration.scala +79 -0
  27. data/src/main/scala/org/embulk/input/dynamodb/aws/HttpProxy.scala +61 -0
  28. data/src/main/scala/org/embulk/input/dynamodb/deprecated/AttributeValueHelper.scala +72 -0
  29. data/src/main/scala/org/embulk/input/dynamodb/{Filter.scala → deprecated/Filter.scala} +3 -3
  30. data/src/main/scala/org/embulk/input/dynamodb/{FilterConfig.scala → deprecated/FilterConfig.scala} +13 -13
  31. data/src/main/scala/org/embulk/input/dynamodb/{ope → deprecated/ope}/AbstractOperation.scala +36 -18
  32. data/src/main/scala/org/embulk/input/dynamodb/{ope → deprecated/ope}/QueryOperation.scala +21 -13
  33. data/src/main/scala/org/embulk/input/dynamodb/{ope → deprecated/ope}/ScanOperation.scala +20 -13
  34. data/src/main/scala/org/embulk/input/dynamodb/item/DynamodbAttributeValue.scala +154 -0
  35. data/src/main/scala/org/embulk/input/dynamodb/item/DynamodbAttributeValueEmbulkTypeTransformable.scala +245 -0
  36. data/src/main/scala/org/embulk/input/dynamodb/item/DynamodbAttributeValueType.scala +33 -0
  37. data/src/main/scala/org/embulk/input/dynamodb/item/DynamodbItemColumnVisitor.scala +50 -0
  38. data/src/main/scala/org/embulk/input/dynamodb/item/DynamodbItemConsumer.scala +40 -0
  39. data/src/main/scala/org/embulk/input/dynamodb/item/DynamodbItemIterator.scala +19 -0
  40. data/src/main/scala/org/embulk/input/dynamodb/item/DynamodbItemReader.scala +64 -0
  41. data/src/main/scala/org/embulk/input/dynamodb/item/DynamodbItemSchema.scala +135 -0
  42. data/src/main/scala/org/embulk/input/dynamodb/operation/AbstractDynamodbOperation.scala +169 -0
  43. data/src/main/scala/org/embulk/input/dynamodb/operation/DynamodbOperationProxy.scala +59 -0
  44. data/src/main/scala/org/embulk/input/dynamodb/operation/DynamodbQueryOperation.scala +72 -0
  45. data/src/main/scala/org/embulk/input/dynamodb/operation/DynamodbScanOperation.scala +93 -0
  46. data/src/main/scala/org/embulk/input/dynamodb/operation/EmbulkDynamodbOperation.scala +15 -0
  47. data/src/main/scala/org/embulk/input/dynamodb/package.scala +4 -9
  48. data/src/test/scala/org/embulk/input/dynamodb/AttributeValueHelperTest.scala +245 -101
  49. data/src/test/scala/org/embulk/input/dynamodb/AwsCredentialsTest.scala +150 -97
  50. data/src/test/scala/org/embulk/input/dynamodb/DynamodbQueryOperationTest.scala +188 -0
  51. data/src/test/scala/org/embulk/input/dynamodb/DynamodbScanOperationTest.scala +181 -0
  52. data/src/test/scala/org/embulk/input/dynamodb/testutil/EmbulkTestBase.scala +85 -0
  53. metadata +73 -49
  54. data/circle.yml +0 -16
  55. data/config/checkstyle/checkstyle.xml +0 -128
  56. data/config/checkstyle/default.xml +0 -108
  57. data/src/main/scala/org/embulk/input/dynamodb/AttributeValueHelper.scala +0 -41
  58. data/src/main/scala/org/embulk/input/dynamodb/AwsCredentials.scala +0 -63
  59. data/src/main/scala/org/embulk/input/dynamodb/DynamoDBClient.scala +0 -23
  60. data/src/test/resources/yaml/authMethodBasic.yml +0 -21
  61. data/src/test/resources/yaml/authMethodBasic_Error.yml +0 -19
  62. data/src/test/resources/yaml/authMethodEnv.yml +0 -19
  63. data/src/test/resources/yaml/authMethodProfile.yml +0 -20
  64. data/src/test/resources/yaml/dynamodb-local-query.yml +0 -25
  65. data/src/test/resources/yaml/dynamodb-local-scan.yml +0 -23
  66. data/src/test/resources/yaml/notSetAuthMethod.yml +0 -20
  67. data/src/test/scala/org/embulk/input/dynamodb/ope/QueryOperationTest.scala +0 -83
  68. data/src/test/scala/org/embulk/input/dynamodb/ope/ScanOperationTest.scala +0 -83
  69. data/test/create_table.sh +0 -16
  70. data/test/put_items.sh +0 -25
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 324f33092c5bb362ecf9a804329e56eda0e545be
4
- data.tar.gz: 00c350b4d52c76adf8291bd6abeee9d9b284eb10
3
+ metadata.gz: 85976c690e023ebd1405fa2990e5dc6995511f08
4
+ data.tar.gz: e7aa6e0eed7b15581be57fbbb69028ed6a7d1621
5
5
  SHA512:
6
- metadata.gz: 09103d2d4bdbc12d22f51318b7e6be74bba3420d9ce1e0003e78c55b77f242c03e5a74d5f9a95233be08539bb2c0cce8fbb4b8ad151104f5730c3f76f3edfd7d
7
- data.tar.gz: c7d278c4eac6260264652ce74c6901d416dc3a4ebd287ce1162d16e911628f18430aad3b34205524b7e53e4b10ae32958cbe26872d359124b883bad67f78eb93
6
+ metadata.gz: 8ad60d0ea024cab88469c6afec78c37eb6341035b33effdbe2b30ba823e3239f4d6c07d347da8e095f3d1f43cf61f77f3df654629c2a073f7a4daebb8550dce9
7
+ data.tar.gz: 67718ce3f0d8fd97ce3a2c33bbdfb77e1891b9b50ad59815c2282ec1ea56a56e8b7c0e8725d44f0c9fa88f5e50c9542aeb80e62108662ed1c23c85d59cb56aba
@@ -0,0 +1,34 @@
1
+ name: Master CI
2
+
3
+ on:
4
+ pull_request:
5
+ branches:
6
+ - master
7
+ types:
8
+ - closed
9
+
10
+ jobs:
11
+ test:
12
+
13
+ runs-on: ubuntu-latest
14
+
15
+ steps:
16
+ - uses: actions/checkout@v1
17
+ - name: Set up JDK 1.8
18
+ uses: actions/setup-java@v1
19
+ with:
20
+ java-version: 1.8
21
+ - name: scalafmt
22
+ run: ./gradlew spotlessCheck
23
+ - name: Set up DynamoDBLocal
24
+ run: docker run -d -p 8000:8000 amazon/dynamodb-local:latest -jar ./DynamoDBLocal.jar -inMemory -sharedDb -port 8000
25
+ - name: Test with Gradle
26
+ run: ./gradlew test
27
+ env:
28
+ RUN_AWS_CREDENTIALS_TEST: false
29
+ - name: Archive test results
30
+ if: always()
31
+ uses: actions/upload-artifact@v1
32
+ with:
33
+ name: test-report
34
+ path: build/reports/tests/test
@@ -0,0 +1,30 @@
1
+ name: Test CI
2
+
3
+ on:
4
+ - push
5
+
6
+ jobs:
7
+ test:
8
+
9
+ runs-on: ubuntu-latest
10
+
11
+ steps:
12
+ - uses: actions/checkout@v1
13
+ - name: Set up JDK 1.8
14
+ uses: actions/setup-java@v1
15
+ with:
16
+ java-version: 1.8
17
+ - name: scalafmt
18
+ run: ./gradlew spotlessCheck
19
+ - name: Set up DynamoDBLocal
20
+ run: docker run -d -p 8000:8000 amazon/dynamodb-local:latest -jar ./DynamoDBLocal.jar -inMemory -sharedDb -port 8000
21
+ - name: Test with Gradle
22
+ run: ./gradlew test
23
+ env:
24
+ RUN_AWS_CREDENTIALS_TEST: false
25
+ - name: Archive test results
26
+ if: always()
27
+ uses: actions/upload-artifact@v1
28
+ with:
29
+ name: test-report
30
+ path: build/reports/tests/test
data/.scalafmt.conf ADDED
@@ -0,0 +1,5 @@
1
+ # https://scalameta.org/scalafmt/#Configuration
2
+
3
+ version = "2.3.2"
4
+ newlines.alwaysBeforeElseAfterCurlyIf = true
5
+ newlines.alwaysBeforeTopLevelStatements = true
data/CHANGELOG.md ADDED
@@ -0,0 +1,49 @@
1
+ 0.3.0 (2020-03-09)
2
+ ==================
3
+
4
+ - [Enhancement] Update dependencies
5
+ - [#5](https://github.com/lulichn/embulk-input-dynamodb/pull/5) JRuby Gradle Plugin (0.1.5 => 1.5.0)
6
+ - [#6](https://github.com/lulichn/embulk-input-dynamodb/pull/6) Scala (2.11.8 => 2.13.1)
7
+ - [#7](https://github.com/lulichn/embulk-input-dynamodb/pull/7) AWS DynamoDB SDK (1.10.43 => 1.11.711)
8
+ - [#8](https://github.com/lulichn/embulk-input-dynamodb/pull/8) Embulk (0.8.13 => 0.9.23)
9
+ - [Enhancement] [#9](https://github.com/lulichn/embulk-input-dynamodb/pull/9) Use TestingEmbulk instead of EmbulkEmbed when testing
10
+ - [Enhancement] [#10](https://github.com/lulichn/embulk-input-dynamodb/pull/10) Reduce test dependencies
11
+ - [Enhancement] [#13](https://github.com/lulichn/embulk-input-dynamodb/pull/13) Use Github Actions instead of CircleCI.
12
+ - [Enhancement] [#15](https://github.com/lulichn/embulk-input-dynamodb/pull/15) Improve development environments
13
+ - Introduce [scalafmt](https://scalameta.org/scalafmt/) with [spotless](https://github.com/diffplug/spotless)
14
+ - Fix the format violations
15
+ - Add [scalafmt](https://scalameta.org/scalafmt/) to CI
16
+ - Add [CHANGELOG](./CHANGELOG.md)
17
+ - Add [an example](./example)
18
+ - Update README about development
19
+ - [Enhancement] [#16](https://github.com/lulichn/embulk-input-dynamodb/pull/16) Cleanup gradle settings
20
+ - [New Feature] [#18](https://github.com/lulichn/embulk-input-dynamodb/pull/18) Introduce new `auth_method`: `"session"`, `"anonymous"`, `"web_identity_token"`, `"default"`.
21
+ - `"anonymous"`: uses anonymous access. This auth method can access only public files.
22
+ - `"session"`: uses temporary-generated **access_key_id**, **secret_access_key** and **session_token**.
23
+ - `"assume_role"`: uses temporary-generated credentials by assuming **role_arn** role.
24
+ - `"web_identity_token"`: uses temporary-generated credentials by assuming **role_arn** role with web identity.
25
+ - `"default"`: uses AWS SDK's default strategy to look up available credentials from runtime environment. This method behaves like the combination of the following methods.
26
+ 1. `"env"`
27
+ 1. `"properties"`
28
+ 1. `"web_identity_token"`
29
+ 1. `"profile"`
30
+ 1. `"instance"`
31
+ - [New Feature] [#18](https://github.com/lulichn/embulk-input-dynamodb/pull/18) Support `http_proxy` option when generating aws credentials.
32
+ - [Enhancement] [#18](https://github.com/lulichn/embulk-input-dynamodb/pull/18) The default value of `auth_method` option become `"default"`. When `access_key_id` and `secret_access_key` options are set, use `"basic"` as `auth_method` for backward compatibility.
33
+ - [Deprecated] [#18](https://github.com/lulichn/embulk-input-dynamodb/pull/18) Make `access_key` and `secret_key` options deprecated. Use `access_key_id` and `secret_access_key` options instead.
34
+ - [Deprecated] [#18](https://github.com/lulichn/embulk-input-dynamodb/pull/18) Make `end_point` option deprecated. Use `endpoint` option instead.
35
+ - [Deprecated] [#19](https://github.com/lulichn/embulk-input-dynamodb/pull/19) The original operation implementation is deprecated, so the below options become deprecated.
36
+ - **operation**: Use **query** option or **scan** option instead.
37
+ - **limit**: Use **query.batch_size** option or **query.batch_size** instead.
38
+ - **scan_limit**: Use **query.batch_size** option or **query.batch_size** instead.
39
+ - **record_limit**: Use **query.limit** option or **query.limit** instead.
40
+ - **filters**: Use **query.filter_expression** option or **query.filter_expression** instead.
41
+ - [New Feature] [#19](https://github.com/lulichn/embulk-input-dynamodb/pull/19) Introduce new options **scan**, **query** to support all configurations for Dynamodb Scan/Query Operation API except legacy configurations.
42
+ - NOTE: This operation stores `null` AttributeValue as `null`, though, in the deprecated operation, `null` is converted arbitrarily. (`string` -> empty string, `long` -> `0`, `double` -> `0.0`, `boolean` -> `false`)
43
+ - NOTE: This operation stores timestamp values by parsing user-defined format, though the deprecated operation skips storing values when the column type is defined as `timestamp` without any errors.
44
+ - NOTE: This operation can convert the specific type of the attribute that you specify in **column.attribute_type** to Embulk types, though the deprecated operation can only convert Embulk types that match a particular Dynamodb Attribute type.
45
+ - [Enhancement] [#19](https://github.com/lulichn/embulk-input-dynamodb/pull/19) You can store each dynamodb item as JSON, so **columns** option becomes optional.
46
+ - [Enhancement] [#19](https://github.com/lulichn/embulk-input-dynamodb/pull/19) You can specify the `AttributeValue` type (like `"S"`, `"N"`, `"SS"` and so on) used when converting AttributeValue to Embulk type.
47
+ - [BugFix] [#19](https://github.com/lulichn/embulk-input-dynamodb/pull/19) Avoid `NullPointerException` when Type `N` AttributeValue has `null` in the deprecated operation.
48
+ - [Enhancement] [#19](https://github.com/lulichn/embulk-input-dynamodb/pull/19) Examples work without real Dynamodb.
49
+ - [Enhancement] [#19](https://github.com/lulichn/embulk-input-dynamodb/pull/19) Add more examples.
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # Dynamodb input plugin for Embulk
2
2
 
3
+ ![Master CI Status Badge](https://github.com/lulichn/embulk-input-dynamodb/workflows/Master%20CI/badge.svg) ![Test CI Status Badge](https://github.com/lulichn/embulk-input-dynamodb/workflows/Test%20CI/badge.svg)
4
+
3
5
  ## Overview
4
6
 
5
7
  * **Plugin type**: input
@@ -7,39 +9,140 @@
7
9
  * **Resume supported**: no
8
10
  * **Cleanup supported**: no
9
11
 
10
-
11
12
  ## Configuration
12
- - **auth_method**: AWS Credential Type.
13
- Available values options are: `basic`, `env`, `instance`, `profile`, `properties`
14
- - **basic**: AWS access key and secret access key
15
- - **env**: Environment variables
16
- - **instance**: EC2 Instance Metadata Service
17
- - **profile**: Profile configuration file
18
- - **properties**: Java system properties
19
- - If **auth_method** is set `basic`
20
- - **access_key**: AWS access key (string, required)
21
- - **secret_key**: AWS secret key (string, required)
22
- - If **auth_method** is set `profile`
23
- - **profile_name**: The name of a local configuration profile (string, optional)
24
- - **region**: Region Name (string, optional)
25
- - **end_point**: EndPoint URL (string, optional)
26
- `end_point` has priority when `region` and `end_point` are specified.
27
- - **operation**: Operation Type (string, required)
28
- Available types are: `scan`, `query`
13
+ - **auth_method**: name of mechanism to authenticate requests (`"basic"`, `"env"`, `"instance"`, `"profile"`, `"properties"`, `"anonymous"`, `"session"`, `"web_identity_token"`, default: `"default"`)
14
+ - `"basic"`: uses **access_key_id** and **secret_access_key** to authenticate.
15
+ - `"env"`: uses `AWS_ACCESS_KEY_ID` (or `AWS_ACCESS_KEY`) and `AWS_SECRET_KEY` (or `AWS_SECRET_ACCESS_KEY`) environment variables.
16
+ - `"instance"`: uses EC2 instance profile or attached ECS task role.
17
+ - `"profile"`: uses credentials written in a file. Format of the file is as following, where `[...]` is a name of profile.
18
+ ```
19
+ [default]
20
+ aws_access_key_id=YOUR_ACCESS_KEY_ID
21
+ aws_secret_access_key=YOUR_SECRET_ACCESS_KEY
22
+
23
+ [profile2]
24
+ ...
25
+ ```
26
+ - `"properties"`: uses aws.accessKeyId and aws.secretKey Java system properties.
27
+ - `"anonymous"`: uses anonymous access. This auth method can access only public files.
28
+ - `"session"`: uses temporary-generated **access_key_id**, **secret_access_key** and **session_token**.
29
+ - `"assume_role"`: uses temporary-generated credentials by assuming **role_arn** role.
30
+ - `"web_identity_token"`: uses temporary-generated credentials by assuming **role_arn** role with web identity.
31
+ - `"default"`: uses AWS SDK's default strategy to look up available credentials from runtime environment. This method behaves like the combination of the following methods.
32
+ 1. `"env"`
33
+ 1. `"properties"`
34
+ 1. `"web_identity_token"`
35
+ 1. `"profile"`
36
+ 1. `"instance"`
37
+ - **profile_file**: path to a profile file. this is optionally used when **auth_method** is `"profile"`. (string, default: given by `AWS_CREDENTIAL_PROFILES_FILE` environment variable, or ~/.aws/credentials).
38
+ - **profile_name**: name of a profile. this is optionally used when **auth_method** is `"profile"`. (string, default: `"default"`)
39
+ - **access_key_id**: aws access key id. this is required when **auth_method** is `"basic"` or `"session"`. (string, optional)
40
+ - **secret_access_key**: aws secret access key. this is required when **auth_method** is `"basic"` or `"session"`. (string, optional)
41
+ - **session_token**: aws session token. this is required when **auth_method** is `"session"`. (string, optional)
42
+ - **role_arn**: arn of the role to assume. this is required for **auth_method** is `"assume_role"` or `"web_identity_token"`. (string, optional)
43
+ - **role_session_name**: an identifier for the assumed role session. this is required when **auth_method** is `"assume_role"` or `"web_identity_token"`. (string, optional)
44
+ - **role_external_id**: a unique identifier that is used by third parties when assuming roles in their customers' accounts. this is optionally used for **auth_method**: `"assume_role"`. (string, optional)
45
+ - **role_session_duration_seconds**: duration, in seconds, of the role session. this is optionally used for **auth_method**: `"assume_role"`. (int, optional)
46
+ - **web_identity_token_file**: the absolute path to the web identity token file. this is required when **auth_method** is `"web_identity_token"`. (string, optional)
47
+ - **scope_down_policy**: an iam policy in json format. this is optionally used for **auth_method**: `"assume_role"`. (string, optional)
48
+ - **endpoint**: The AWS Service endpoint (string, optional)
49
+ - **region**: The AWS region (string, optional)
50
+ - **http_proxy**: Indicate whether using when accessing AWS via http proxy. (optional)
51
+ - **host** proxy host (string, required)
52
+ - **port** proxy port (int, optional)
53
+ - **protocol** proxy protocol (string, default: `"https"`)
54
+ - **user** proxy user (string, optional)
55
+ - **password** proxy password (string, optional)
56
+ - **scan**: scan operation configuration. This option cannot be used with **query** option. (See [Operation Configuration Details](#operation-configuration-details), optional)
57
+ - **query**: query operation configuration. This option cannot be used with **scan** option. (See [Operation Configuration Details](#operation-configuration-details), optional)
29
58
  - **table**: Table Name (string, required)
30
- - **filters**: Query Filters
31
- Required to `query` operation. Optional for `scan`.
59
+ - **default_timestamp_format**: Format of the timestamp if **columns.type** is `"timestamp"`. (string, optional, default: `"%Y-%m-%d %H:%M:%S.%N %z"`)
60
+ - **default_timezone**: Time zone of timestamp columns if the value itself doesn’t include time zone description (eg. Asia/Tokyo). (string, optional, default: `"UTC"`)
61
+ - **default_date**: Set date part if the format doesn’t include date part. (string, optional, default: `"1970-01-01"`)
62
+ - **columns**: a key-value pairs where key is a column name and value is options for the column. If you do not specify this option, each dynamodb items are processed as a single json. (array of string-to-string map, optional, default: `[]`)
63
+ - **name**: Name of the column. (string, required)
64
+ - **type**: Embulk Type of the column that is converted to from dynamodb attribute value as possible. (`"boolean"`, `"long"`,`"timestamp"`, `"double"`, `"string"` or `"json"`, required)
65
+ - **attribute_type**: Type of the Dynamodb attribute that name matches **name** of the column. The types except specified one are stored as `null` when this option is specified. (`"S"`, `"N"`, `"B"`, `"SS"`, `"NS"`, `"BS"`, `"M"`, `"L"`, `"NULL"` or `"BOOL"`, optional)
66
+ - **format**: Format of the timestamp if **type** is `"timestamp"`. (string, optional, default value is specified by **default_timestamp_format**)
67
+ - **timezone**: Timezone of the timestamp if the value itself doesn’t include time zone description (eg. Asia/Tokyo). (string, optional, default value is specified by **default_timezone**)
68
+ - **date**: Set date part if the **format** doesn’t include date part. (string, optional, default value is specified by **default_date**)
69
+ - **json_column_name**: Name of the column when each dynamodb items are processed as a single json. (string, optional, default: `"record"`)
70
+
71
+ ### Operation Configuration Details
72
+
73
+ Here is the explanation of the configuration for **scan** option or **query** option. The configuration has common options and specific options. Sometimes a type called `DynamodbAttributeValue` appears, see the end of this section first if you are worried about it.
74
+
75
+ #### Common Options
76
+
77
+ - **consistent_read**: Require strongly consistent reads or not. ref. (boolean, optional, default: `false`)
78
+ - See the docs ([Read Consistency for Scan](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Scan.html#Scan.ReadConsistency) or [Read Consistency for Query](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Query.html#Query.ReadConsistency)) for more details.
79
+ - **exclusive_start_key**: When you want to read the middle of the table, specify the attribute as the start key. (string to `DynamodbAttributeValue` map, optional)
80
+ - See the docs ([Paginating Table Scan Results](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Scan.html#Scan.Pagination) or [Paginating Table Query Results](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Query.Pagination.html)) for more details.
81
+ - **expression_attribute_names**: An expression attribute name is a placeholder that you use in an Amazon DynamoDB expression as an alternative to an actual attribute name. An expression attribute name must begin with a pound sign (#), and be followed by one or more alphanumeric characters. (string to string map, optional, default: `{}`)
82
+ - See the doc ([Expression Attribute Names](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Expressions.ExpressionAttributeNames.html)) for more details.
83
+ - **expression_attribute_values**: If you need to compare an attribute with a value, define an expression attribute value as a placeholder. Expression attribute values in Amazon DynamoDB are substitutes for the actual values that you want to compare—values that you might not know until runtime. An expression attribute value must begin with a colon (:) and be followed by one or more alphanumeric characters. (string to `DynamodbAttributeValue` map, optional, default: `{}`)
84
+ - See the doc ([Expression Attribute Values](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Expressions.ExpressionAttributeValues.html)) for more details.
85
+ - **filter_expression**: A filter expression is applied after the operation finishes, but before the results are returned. Therefore, the operation consumes the same amount of read capacity, regardless of whether a filter expression is present. (string, optional)
86
+ - See the docs ([Filter Expressions for Scan](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Scan.html#Scan.FilterExpression) or [Filter Expressions for Query](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Query.html#Query.FilterExpression)) for more details.
87
+ - **index_name**: Amazon DynamoDB provides fast access to items in a table by specifying primary key values. However, many applications might benefit from having one or more secondary (or alternate) keys available, to allow efficient access to data with attributes other than the primary key. To address this, you can create one or more secondary indexes on a table and issue **query** or **scan** operations against these indexes. (string, optional)
88
+ - See the doc ([Improving Data Access with Secondary Indexes](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/SecondaryIndexes.html)) for more details.
89
+ - **batch_size**: The limit of items by an operation. The final result contains specified number of items or fewer when **filter_expression** is specified. (int, optional)
90
+ - See the docs ([Limiting the Number of Items in the Result Set for Scan](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Scan.html#Scan.Limit) or [Limiting the Number of Items in the Result Set for Query](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Query.html#Query.Limit)) for more details.
91
+ - **limit**: The limit of total items by operations. (long, optional)
92
+ - **projection_expression**: To read data from a table, you use operations. Amazon DynamoDB returns all the item attributes by default. To get only some, rather than all of the attributes, use a projection expression. (string, optional)
93
+ - See the doc ([Projection Expressions](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Expressions.ProjectionExpressions.html)) for more details.
94
+ - **select**: The attributes to be returned in the result. You can retrieve all item attributes, specific item attributes, the count of matching items, or in the case of an index, some or all of the attributes projected into the index. (`"ALL_ATTRIBUTES"`, `"ALL_PROJECTED_ATTRIBUTES"`, `"SPECIFIC_ATTRIBUTES"` or `"COUNT"`, optional)
95
+ - See the docs ([Select for Scan](https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_Scan.html#DDB-Scan-request-Select) or [Select for Query](https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_Query.html#DDB-Query-request-Select)) for more details.
96
+
97
+ #### Options for **scan**
98
+
99
+ - **segment**: A segment to be scanned by a particular worker. Each worker should use a different value for **segment**. If **segment** is not specified and **total_segment** is specified, this plugin automatically set **segment** following the number of embulk workers. If **segment** and **total_segment** is specified, this plugin loads only the **segment**, so you loads other segments in other processes. (int, optional)
100
+ - See the doc ([Parallel Scan](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Scan.html#Scan.ParallelScan)) for more details.
101
+ - **total_segment**: The total number of segments for the parallel scan. If **segment** is not specified and **total_segment** is specified, this plugin automatically set **segment** following the number of embulk workers.
102
+ - See the doc ([Parallel Scan](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Scan.html#Scan.ParallelScan)) for more details.
103
+
104
+ #### Options for **query**
105
+
106
+ - **key_condition_expression**: To specify the search criteria, you use a key condition expression—a string that determines the items to be read from the table or index. (string, required)
107
+ - See the doc ([Key Condition Expression](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Query.html#Query.KeyConditionExpressions)) for more details.
108
+ - **scan_index_forward**: By default, the sort order is ascending. To reverse the order, set this option is `false`. (boolean, optional, default: `false`)
109
+ - See the doc ([Key Condition Expression](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Query.html#Query.KeyConditionExpressions)) for more details
110
+
111
+ #### About `DynamodbAttributeValue` Type
112
+
113
+ This type of `DynamodbAttributeValue` is one that can express Dynamodb `AttributeValue` as Embulk configuration. This configuration has the below options. Only one of these options can be set.
114
+
115
+ - **S**: string value (string, optional)
116
+ - **N**: number value. (string, optional)
117
+ - **B**: binary value. (string, optional)
118
+ - **SS**: array of string value. (array of string, optional)
119
+ - **NS**: array of number value. (array of number, optional)
120
+ - **BS**: array of binary value. (array of binary, optional)
121
+ - **M**: map value. (string to `DynamodbAttributeValue` map, optional)
122
+ - **L**: list value. (array of `DynamodbAttributeValue`, optional)
123
+ - **NULL**: null or not. (boolean, optional)
124
+ - **BOOL**: `true` or `false`. (boolean, optional)
125
+
126
+ ### Deprecated Configuration
127
+
128
+ You can use the below options yet for the backward compatibility before `v0.3.0`. However, these are already deprecated, so please use new options instead.
129
+
130
+ - **access_key**: *[Deprecated: Use **access_key** instead]* aws access key id. this is required when **auth_method** is `"basic"` or `"session"`. (string, optional)
131
+ - **secret_key**: *[Deprecated: Use **secret_access_key** instead]* aws secret access key. this is required when **auth_method** is `"basic"` or `"session"`. (string, optional)
132
+ - **end_point**: *[Deprecated: Use **endpoint** instead]* The AWS Service endpoint (string, optional)
133
+ - **operation**: *[Deprecated: Use **scan** or **query** option instead]* Operation Type (`"scan"` or `"query"`, required)
134
+ - **filters**: *[Deprecated: Use **query.filter_expression** option or **query.filter_expression** instead]* Query Filters. (Required if **operation** is `"query"`, optional if **operation** is `"scan"`)
32
135
  - **name**: Column name.
33
136
  - **type**: Column type.
34
137
  - **condition**: Comparison Operator.
35
138
  - **value(s)**: Attribute Value(s).
36
- - **limit**: DynamoDB 1-time Scan/Query Operation size limit (Int, optional)
37
- - **scan_limit**: DynamoDB 1-time Scan Query size limit (Deprecated, Int, optional)
38
- - **record_limit**: Max Record Search limit (Long, optional)
39
- - **columns**: a key-value pairs where key is a column name and value is options for the column (required)
40
- - **name**: Column name.
41
- - **type**: Column values are converted to this embulk type.
42
- Available values options are: `boolean`, `long`, `double`, `string`, `json`
139
+ - **limit**: *[Deprecated: Use **query.batch_size** option or **query.batch_size** instead]* DynamoDB 1-time Scan/Query Operation size limit (int, optional)
140
+ - **scan_limit**: *[Deprecated: Use **query.batch_size** option or **query.batch_size** instead]* DynamoDB 1-time Scan Query size limit (int, optional)
141
+ - **record_limit**: *[Deprecated: Use **query.limit** option or **query.limit** instead]* Max Record Search limit (long, optional)
142
+ - **columns**: *[Deprecated: This **columns** option for the deprecated operation. See the above **columns** option when using a new operation.]* a key-value pairs where key is a column name and value is options for the column (required)
143
+ - **name**: Column name. (string, required)
144
+ - **type**: Column values are converted to this embulk type. (`"boolean"`, `"long"`, `"double"`, `"string"`, `"json"`, required)
145
+ - NOTE: Be careful that storing values is skipped when you specify `"timestamp"`.
43
146
 
44
147
  ## Example
45
148
 
@@ -48,21 +151,18 @@ Required to `query` operation. Optional for `scan`.
48
151
  ```yaml
49
152
  in:
50
153
  type: dynamodb
51
- auth_method: basic
52
- access_key: YOUR_ACCESS_KEY
53
- secret_key: YOUR_SECRET_KEY
54
- region: ap-northeast-1
55
- operation: scan
56
- table: YOUR_TABLE_NAME
154
+ auth_method: env
155
+ region: us-east-1
156
+ scan:
157
+ total_segment: 20
158
+ table: embulk-input-dynamodb_example
57
159
  columns:
58
160
  - {name: ColumnA, type: long}
59
161
  - {name: ColumnB, type: double}
60
- - {name: ColumnC, type: string}
162
+ - {name: ColumnC, type: string, attribute_type: S}
61
163
  - {name: ColumnD, type: boolean}
62
- - {name: ColumnE, type: json} # DynamoDB Map, List and Set Column Type are json.
63
- filters:
64
- - {name: ColumnA, type: long, condition: BETWEEN, value: 10000, value2: 20000}
65
- - {name: ColumnC, type: string, condition: EQ, value: foobar}
164
+ - {name: ColumnE, type: timestamp}
165
+ - {name: ColumnF, type: json}
66
166
 
67
167
  out:
68
168
  type: stdout
@@ -74,31 +174,81 @@ out:
74
174
  in:
75
175
  type: dynamodb
76
176
  auth_method: env
77
- region: ap-northeast-1
78
- operation: query
79
- table: YOUR_TABLE_NAME
80
- columns:
81
- - {name: ColumnA, type: long}
82
- - {name: ColumnB, type: double}
83
- - {name: ColumnC, type: string}
84
- - {name: ColumnD, type: boolean}
85
- - {name: ColumnE, type: json}
86
- filters:
87
- - {name: ColumnA, type: long, condition: EQ, value: 10000}
177
+ region: us-east-1
178
+ query:
179
+ key_condition_expression: "#x = :v"
180
+ expression_attribute_names:
181
+ "#x": primary-key
182
+ expression_attribute_values:
183
+ ":v": {S: key-1}
184
+ table: embulk-input-dynamodb_example
88
185
 
89
186
  out:
90
187
  type: stdout
91
188
  ```
92
189
 
93
- ## Try
190
+ You can see more examples [here](./example).
94
191
 
95
- ```
192
+ ## Development
193
+
194
+ ### Run examples
195
+
196
+ ```shell
197
+ $ ./run_dynamodb_local.sh
198
+ $ ./example/prepare_dynamodb_table.sh
96
199
  $ ./gradlew classpath
97
- $ embulk preview -I lib your-config.yml
200
+ $ embulk run example/config-query.yml -Ilib
201
+ ```
202
+
203
+ ### Run tests
204
+
205
+ ```shell
206
+ ## Run dynamodb-local
207
+ $ ./run_dynamodb_local.sh
208
+ $ AWS_ACCESS_KEY_ID=${YOUR_AWS_ACCESS_KEY_ID} \
209
+ AWS_SECRET_ACCESS_KEY=${YOUR_AWS_SECRET_ACCESS_KEY} \
210
+ EMBULK_DYNAMODB_TEST_ACCESS_KEY=${YOUR_AWS_ACCESS_KEY_ID} \
211
+ EMBULK_DYNAMODB_TEST_SECRET_KEY=${YOUR_AWS_SECRET_ACCESS_KEY} \
212
+ EMBULK_DYNAMODB_TEST_PROFILE_NAME=${YOUR_AWS_PROFILE} \
213
+ EMBULK_DYNAMODB_TEST_ASSUME_ROLE_ROLE_ARN=${YOUR_ROLE_ARN} \
214
+ ./gradlew test
215
+ ```
216
+
217
+ If you do not have any real aws account, you can skip the tests that use the real aws account.
218
+
219
+ ```shell
220
+ $ ./run_dynamodb_local.sh
221
+ $ RUN_AWS_CREDENTIALS_TEST=false ./gradlew test
222
+ ```
223
+
224
+ ### Run the formatter
225
+
226
+ ```shell
227
+ ## Just check the format violations
228
+ $ ./gradlew spotlessCheck
229
+
230
+ ## Fix the all format violations
231
+ $ ./gradlew spotlessApply
98
232
  ```
99
233
 
100
- ## Build
234
+ ### Build
101
235
 
102
236
  ```
103
- $ ./gradlew gem
237
+ $ ./gradlew gem # -t to watch change of files and rebuild continuously
104
238
  ```
239
+
240
+ ### Release gem:
241
+ Fix [build.gradle](./build.gradle), then
242
+
243
+
244
+ ```shell
245
+ $ ./gradlew gemPush
246
+ ```
247
+
248
+ ## ChangeLog
249
+
250
+ [CHANGELOG.md](./CHANGELOG.md)
251
+
252
+ ## License
253
+
254
+ [MIT LICENSE](./LICENSE)
data/build.gradle CHANGED
@@ -1,8 +1,8 @@
1
1
  plugins {
2
2
  id "com.jfrog.bintray" version "1.1"
3
- id "com.github.jruby-gradle.base" version "0.1.5"
3
+ id "com.github.jruby-gradle.base" version "1.5.0"
4
4
  id "scala"
5
- id "checkstyle"
5
+ id "com.diffplug.gradle.spotless" version "3.27.1"
6
6
  }
7
7
 
8
8
  import com.github.jrubygradle.JRubyExec
@@ -14,38 +14,44 @@ configurations {
14
14
  provided
15
15
  }
16
16
 
17
- version = "0.2.0"
17
+ version = "0.3.0"
18
18
 
19
- sourceCompatibility = 1.7
20
- targetCompatibility = 1.7
19
+ sourceCompatibility = 1.8
20
+ targetCompatibility = 1.8
21
21
 
22
22
  dependencies {
23
- compile "org.scala-lang:scala-library:2.11.8"
23
+ compile "org.scala-lang:scala-library:2.13.1"
24
24
 
25
- compile "org.embulk:embulk-core:0.8.13"
26
- provided "org.embulk:embulk-core:0.8.13"
25
+ compile "org.embulk:embulk-core:0.9.23"
26
+ provided "org.embulk:embulk-core:0.9.23"
27
27
 
28
- compile "com.amazonaws:aws-java-sdk-dynamodb:1.10.43"
28
+ compile "com.amazonaws:aws-java-sdk-dynamodb:1.11.711"
29
+ compile "com.amazonaws:aws-java-sdk-sts:1.11.711"
30
+ // For @delegate macro.
31
+ compile "dev.zio:zio-macros-core_2.13:0.6.2"
29
32
 
30
33
  testCompile "junit:junit:4.+"
31
- testCompile "org.embulk:embulk-standards:0.8.13"
32
- testCompile "org.embulk:embulk-core:0.8.13:tests"
34
+ testCompile "org.embulk:embulk-standards:0.9.23"
35
+ testCompile "org.embulk:embulk-deps-buffer:0.9.23"
36
+ testCompile "org.embulk:embulk-deps-config:0.9.23"
37
+ testCompile "org.embulk:embulk-test:0.9.23"
33
38
  }
34
39
 
35
40
  compileScala {
36
- scalaCompileOptions.useCompileDaemon = true
41
+ scalaCompileOptions.additionalParameters = [
42
+ "-Ymacro-annotations"
43
+ ]
37
44
  }
38
45
 
39
- tasks.withType(ScalaCompile) {
40
- configure(scalaCompileOptions.forkOptions) {
41
- memoryMaximumSize = '1g'
42
- jvmArgs = ['-XX:MaxPermSize=1024m']
43
- }
46
+ test {
47
+ jvmArgs '-Xms4g', '-Xmx4g', '-XX:MaxMetaspaceSize=1g'
48
+ maxHeapSize = "4g"
44
49
  }
45
50
 
46
- // Activating the Zinc based compiler
47
- tasks.withType(ScalaCompile) {
48
- scalaCompileOptions.useAnt = false
51
+ spotless {
52
+ scala {
53
+ scalafmt('2.3.2').configFile('.scalafmt.conf')
54
+ }
49
55
  }
50
56
 
51
57
  task classpath(type: Copy, dependsOn: ["jar"]) {
@@ -53,39 +59,40 @@ task classpath(type: Copy, dependsOn: ["jar"]) {
53
59
  from (configurations.runtime - configurations.provided + files(jar.archivePath))
54
60
  into "classpath"
55
61
  }
56
- clean { delete 'classpath' }
62
+ clean { delete "classpath" }
57
63
 
58
- checkstyle {
59
- configFile = file("${project.rootDir}/config/checkstyle/checkstyle.xml")
60
- toolVersion = '6.14.1'
61
- }
62
- checkstyleMain {
63
- configFile = file("${project.rootDir}/config/checkstyle/default.xml")
64
- ignoreFailures = true
65
- }
66
- checkstyleTest {
67
- configFile = file("${project.rootDir}/config/checkstyle/default.xml")
68
- ignoreFailures = true
64
+ task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
65
+ jrubyArgs "-S"
66
+ script "gem"
67
+ scriptArgs "build", "${project.name}.gemspec"
68
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
69
69
  }
70
- task checkstyle(type: Checkstyle) {
71
- classpath = sourceSets.main.output + sourceSets.test.output
72
- source = sourceSets.main.allJava + sourceSets.test.allJava
70
+
71
+ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
72
+ jrubyArgs "-S"
73
+ script "gem"
74
+ scriptArgs "push", "pkg/${project.name}-${project.version}.gem"
73
75
  }
74
76
 
75
- task gem(type: JRubyExec, dependsOn: ["build", "gemspec", "classpath"]) {
76
- jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
77
- script "build/gemspec"
78
- doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
77
+ task "package"(dependsOn: ["gemspec", "classpath"]) {
78
+ doLast {
79
+ println "> Build succeeded."
80
+ println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
81
+ }
79
82
  }
80
83
 
81
- task gemspec << { file("build/gemspec").write($/
84
+ task gemspec {
85
+ ext.gemspecFile = file("${project.name}.gemspec")
86
+ inputs.file "build.gradle"
87
+ outputs.file gemspecFile
88
+ doLast { gemspecFile.write($/
82
89
  Gem::Specification.new do |spec|
83
90
  spec.name = "${project.name}"
84
91
  spec.version = "${project.version}"
85
- spec.authors = ["Daisuke Higashi"]
92
+ spec.authors = ["Daisuke Higashi", "Civitaspo"]
86
93
  spec.summary = %[Dynamodb input plugin for Embulk]
87
94
  spec.description = %["Loads records from Dynamodb."]
88
- spec.email = ["daisuke.develop@gmail.com"]
95
+ spec.email = ["daisuke.develop@gmail.com", "civitaspo@gmail.com"]
89
96
  spec.licenses = ["MIT"]
90
97
  spec.homepage = "https://github.com/lulichn/embulk-input-dynamodb"
91
98
 
@@ -93,9 +100,11 @@ Gem::Specification.new do |spec|
93
100
  spec.test_files = spec.files.grep(%r"^(test|spec)/")
94
101
  spec.require_paths = ["lib"]
95
102
 
96
- #spec.add_dependency 'YOUR_GEM_DEPENDENCY', ['~> YOUR_GEM_DEPENDENCY_VERSION']
97
103
  spec.add_development_dependency 'bundler', ['~> 1.0']
98
- spec.add_development_dependency 'rake', ['>= 10.0']
104
+ spec.add_development_dependency 'rake', ['~> 12.0']
99
105
  end
100
106
  /$)
107
+ }
101
108
  }
109
+ clean { delete "${project.name}.gemspec" }
110
+