embulk-input-dynamodb 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +37 -4
- data/build.gradle +6 -6
- data/src/main/scala/org/embulk/input/dynamodb/DynamoDBClient.scala +23 -0
- data/src/main/scala/org/embulk/input/dynamodb/DynamodbInputPlugin.scala +8 -2
- data/src/main/scala/org/embulk/input/dynamodb/PluginTask.scala +7 -0
- data/src/main/scala/org/embulk/input/dynamodb/ope/AbstractOperation.scala +101 -0
- data/src/main/scala/org/embulk/input/dynamodb/ope/QueryOperation.scala +50 -0
- data/src/main/scala/org/embulk/input/dynamodb/ope/ScanOperation.scala +51 -0
- data/src/test/resources/yaml/authMethodBasic.yml +1 -0
- data/src/test/resources/yaml/authMethodBasic_Error.yml +1 -0
- data/src/test/resources/yaml/authMethodEnv.yml +1 -0
- data/src/test/resources/yaml/authMethodProfile.yml +1 -0
- data/src/test/resources/yaml/dynamodb-local-query.yml +25 -0
- data/src/test/resources/yaml/{dynamodb-local.yml → dynamodb-local-scan.yml} +0 -0
- data/src/test/resources/yaml/notSetAuthMethod.yml +1 -0
- data/src/test/scala/org/embulk/input/dynamodb/ope/QueryOperationTest.scala +83 -0
- data/src/test/scala/org/embulk/input/dynamodb/{DynamoDBUtilTest.scala → ope/ScanOperationTest.scala} +5 -3
- data/test/run_dynamodb_local.sh +4 -4
- metadata +17 -12
- data/src/main/scala/org/embulk/input/dynamodb/DynamoDBUtil.scala +0 -155
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 324f33092c5bb362ecf9a804329e56eda0e545be
|
4
|
+
data.tar.gz: 00c350b4d52c76adf8291bd6abeee9d9b284eb10
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 09103d2d4bdbc12d22f51318b7e6be74bba3420d9ce1e0003e78c55b77f242c03e5a74d5f9a95233be08539bb2c0cce8fbb4b8ad151104f5730c3f76f3edfd7d
|
7
|
+
data.tar.gz: c7d278c4eac6260264652ce74c6901d416dc3a4ebd287ce1162d16e911628f18430aad3b34205524b7e53e4b10ae32958cbe26872d359124b883bad67f78eb93
|
data/README.md
CHANGED
@@ -24,17 +24,27 @@ Available values options are: `basic`, `env`, `instance`, `profile`, `properties
|
|
24
24
|
- **region**: Region Name (string, optional)
|
25
25
|
- **end_point**: EndPoint URL (string, optional)
|
26
26
|
`end_point` has priority when `region` and `end_point` are specified.
|
27
|
+
- **operation**: Operation Type (string, required)
|
28
|
+
Available types are: `scan`, `query`
|
27
29
|
- **table**: Table Name (string, required)
|
28
|
-
- **
|
29
|
-
|
30
|
+
- **filters**: Query Filters
|
31
|
+
Required to `query` operation. Optional for `scan`.
|
32
|
+
- **name**: Column name.
|
33
|
+
- **type**: Column type.
|
34
|
+
- **condition**: Comparison Operator.
|
35
|
+
- **value(s)**: Attribute Value(s).
|
36
|
+
- **limit**: DynamoDB 1-time Scan/Query Operation size limit (Int, optional)
|
37
|
+
- **scan_limit**: DynamoDB 1-time Scan Query size limit (Deprecated, Int, optional)
|
38
|
+
- **record_limit**: Max Record Search limit (Long, optional)
|
30
39
|
- **columns**: a key-value pairs where key is a column name and value is options for the column (required)
|
31
40
|
- **name**: Column name.
|
32
41
|
- **type**: Column values are converted to this embulk type.
|
33
42
|
Available values options are: `boolean`, `long`, `double`, `string`, `json`
|
34
|
-
- **filters**: query filter (optional)
|
35
43
|
|
36
44
|
## Example
|
37
45
|
|
46
|
+
- Scan Operation
|
47
|
+
|
38
48
|
```yaml
|
39
49
|
in:
|
40
50
|
type: dynamodb
|
@@ -42,6 +52,7 @@ in:
|
|
42
52
|
access_key: YOUR_ACCESS_KEY
|
43
53
|
secret_key: YOUR_SECRET_KEY
|
44
54
|
region: ap-northeast-1
|
55
|
+
operation: scan
|
45
56
|
table: YOUR_TABLE_NAME
|
46
57
|
columns:
|
47
58
|
- {name: ColumnA, type: long}
|
@@ -57,11 +68,33 @@ out:
|
|
57
68
|
type: stdout
|
58
69
|
```
|
59
70
|
|
71
|
+
- Query Operation
|
72
|
+
|
73
|
+
```yaml
|
74
|
+
in:
|
75
|
+
type: dynamodb
|
76
|
+
auth_method: env
|
77
|
+
region: ap-northeast-1
|
78
|
+
operation: query
|
79
|
+
table: YOUR_TABLE_NAME
|
80
|
+
columns:
|
81
|
+
- {name: ColumnA, type: long}
|
82
|
+
- {name: ColumnB, type: double}
|
83
|
+
- {name: ColumnC, type: string}
|
84
|
+
- {name: ColumnD, type: boolean}
|
85
|
+
- {name: ColumnE, type: json}
|
86
|
+
filters:
|
87
|
+
- {name: ColumnA, type: long, condition: EQ, value: 10000}
|
88
|
+
|
89
|
+
out:
|
90
|
+
type: stdout
|
91
|
+
```
|
92
|
+
|
60
93
|
## Try
|
61
94
|
|
62
95
|
```
|
63
96
|
$ ./gradlew classpath
|
64
|
-
$ embulk preview -I lib your-
|
97
|
+
$ embulk preview -I lib your-config.yml
|
65
98
|
```
|
66
99
|
|
67
100
|
## Build
|
data/build.gradle
CHANGED
@@ -14,22 +14,22 @@ configurations {
|
|
14
14
|
provided
|
15
15
|
}
|
16
16
|
|
17
|
-
version = "0.
|
17
|
+
version = "0.2.0"
|
18
18
|
|
19
19
|
sourceCompatibility = 1.7
|
20
20
|
targetCompatibility = 1.7
|
21
21
|
|
22
22
|
dependencies {
|
23
|
-
compile "org.scala-lang:scala-library:2.11.
|
23
|
+
compile "org.scala-lang:scala-library:2.11.8"
|
24
24
|
|
25
|
-
compile "org.embulk:embulk-core:0.8.
|
26
|
-
provided "org.embulk:embulk-core:0.8.
|
25
|
+
compile "org.embulk:embulk-core:0.8.13"
|
26
|
+
provided "org.embulk:embulk-core:0.8.13"
|
27
27
|
|
28
28
|
compile "com.amazonaws:aws-java-sdk-dynamodb:1.10.43"
|
29
29
|
|
30
30
|
testCompile "junit:junit:4.+"
|
31
|
-
testCompile "org.embulk:embulk-standards:0.8.
|
32
|
-
testCompile "org.embulk:embulk-core:0.8.
|
31
|
+
testCompile "org.embulk:embulk-standards:0.8.13"
|
32
|
+
testCompile "org.embulk:embulk-core:0.8.13:tests"
|
33
33
|
}
|
34
34
|
|
35
35
|
compileScala {
|
@@ -0,0 +1,23 @@
|
|
1
|
+
package org.embulk.input.dynamodb
|
2
|
+
|
3
|
+
import com.amazonaws.ClientConfiguration
|
4
|
+
import com.amazonaws.regions.Regions
|
5
|
+
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
|
6
|
+
import org.embulk.config.ConfigException
|
7
|
+
|
8
|
+
object DynamoDBClient {
|
9
|
+
def create(task: PluginTask): AmazonDynamoDBClient = {
|
10
|
+
val client = new AmazonDynamoDBClient(
|
11
|
+
AwsCredentials.getCredentialsProvider(task),
|
12
|
+
new ClientConfiguration()
|
13
|
+
.withMaxConnections(50)) // SDK Default Value
|
14
|
+
|
15
|
+
if (task.getEndPoint.isPresent) {
|
16
|
+
client.withEndpoint(task.getEndPoint.get())
|
17
|
+
} else if (task.getRegion.isPresent) {
|
18
|
+
client.withRegion(Regions.fromName(task.getRegion.get()))
|
19
|
+
} else {
|
20
|
+
throw new ConfigException("At least one of EndPoint or Region must be set")
|
21
|
+
}
|
22
|
+
}
|
23
|
+
}
|
@@ -4,6 +4,7 @@ import java.util.{List => JList}
|
|
4
4
|
|
5
5
|
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
|
6
6
|
import org.embulk.config._
|
7
|
+
import org.embulk.input.dynamodb.ope.{QueryOperation, ScanOperation}
|
7
8
|
import org.embulk.spi._
|
8
9
|
|
9
10
|
class DynamodbInputPlugin extends InputPlugin {
|
@@ -24,8 +25,13 @@ class DynamodbInputPlugin extends InputPlugin {
|
|
24
25
|
def run(taskSource: TaskSource, schema: Schema, taskIndex: Int, output: PageOutput): TaskReport = {
|
25
26
|
val task: PluginTask = taskSource.loadTask(classOf[PluginTask])
|
26
27
|
|
27
|
-
|
28
|
-
|
28
|
+
val client: AmazonDynamoDBClient = DynamoDBClient.create(task)
|
29
|
+
|
30
|
+
val ope = task.getOperation.toLowerCase match {
|
31
|
+
case "scan" => new ScanOperation(client)
|
32
|
+
case "query" => new QueryOperation(client)
|
33
|
+
}
|
34
|
+
ope.execute(task, schema, output)
|
29
35
|
|
30
36
|
Exec.newTaskReport()
|
31
37
|
}
|
@@ -29,6 +29,13 @@ trait PluginTask extends Task {
|
|
29
29
|
@ConfigDefault("null")
|
30
30
|
def getEndPoint: Optional[String]
|
31
31
|
|
32
|
+
@Config("operation")
|
33
|
+
def getOperation: String
|
34
|
+
|
35
|
+
@Config("limit")
|
36
|
+
@ConfigDefault("0")
|
37
|
+
def getLimit: Long
|
38
|
+
|
32
39
|
@Config("scan_limit")
|
33
40
|
@ConfigDefault("0")
|
34
41
|
def getScanLimit: Long
|
@@ -0,0 +1,101 @@
|
|
1
|
+
package org.embulk.input.dynamodb.ope
|
2
|
+
|
3
|
+
import com.amazonaws.services.dynamodbv2.model.{AttributeValue, Condition}
|
4
|
+
import org.embulk.input.dynamodb.{AttributeValueHelper, PluginTask}
|
5
|
+
import org.embulk.spi._
|
6
|
+
import org.embulk.spi.`type`.Types
|
7
|
+
import org.msgpack.value.{Value, ValueFactory}
|
8
|
+
|
9
|
+
import scala.collection.JavaConverters._
|
10
|
+
|
11
|
+
abstract class AbstractOperation {
|
12
|
+
def execute(task: PluginTask, schema: Schema, output: PageOutput): Unit
|
13
|
+
|
14
|
+
def getLimit(limit: Long, recordLimit: Long, recordCount: Long): Int = {
|
15
|
+
if (limit > 0 && recordLimit > 0) {
|
16
|
+
math.min(limit, recordLimit - recordCount).toInt
|
17
|
+
} else if (limit > 0 || recordLimit > 0) {
|
18
|
+
math.max(limit, recordLimit).toInt
|
19
|
+
} else { 0 }
|
20
|
+
}
|
21
|
+
|
22
|
+
def createFilters(task: PluginTask): Map[String, Condition] = {
|
23
|
+
val filterMap = collection.mutable.HashMap[String, Condition]()
|
24
|
+
|
25
|
+
Option(task.getFilters.orNull).map { filters =>
|
26
|
+
filters.getFilters.asScala.map { filter =>
|
27
|
+
val attributeValueList = collection.mutable.ArrayBuffer[AttributeValue]()
|
28
|
+
attributeValueList += createAttributeValue(filter.getType, filter.getValue)
|
29
|
+
Option(filter.getValue2).map { value2 =>
|
30
|
+
attributeValueList+= createAttributeValue(filter.getType, value2) }
|
31
|
+
|
32
|
+
filterMap += filter.getName -> new Condition()
|
33
|
+
.withComparisonOperator(filter.getCondition)
|
34
|
+
.withAttributeValueList(attributeValueList.asJava)
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
filterMap.toMap
|
39
|
+
}
|
40
|
+
|
41
|
+
def createAttributeValue(t: String, v: String): AttributeValue = {
|
42
|
+
t match {
|
43
|
+
case "string" =>
|
44
|
+
new AttributeValue().withS(v)
|
45
|
+
case "long" | "double" =>
|
46
|
+
new AttributeValue().withN(v)
|
47
|
+
case "boolean" =>
|
48
|
+
new AttributeValue().withBOOL(v.toBoolean)
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
def write(pageBuilder: PageBuilder, schema: Schema, items: Seq[Map[String, AttributeValue]]): Long = {
|
53
|
+
var count = 0
|
54
|
+
|
55
|
+
items.foreach { item =>
|
56
|
+
schema.getColumns.asScala.foreach { column =>
|
57
|
+
val value = item.get(column.getName)
|
58
|
+
column.getType match {
|
59
|
+
case Types.STRING =>
|
60
|
+
convert(column, value, pageBuilder.setString)
|
61
|
+
case Types.LONG =>
|
62
|
+
convert(column, value, pageBuilder.setLong)
|
63
|
+
case Types.DOUBLE =>
|
64
|
+
convert(column, value, pageBuilder.setDouble)
|
65
|
+
case Types.BOOLEAN =>
|
66
|
+
convert(column, value, pageBuilder.setBoolean)
|
67
|
+
case Types.JSON =>
|
68
|
+
convert(column, value, pageBuilder.setJson)
|
69
|
+
case _ => /* Do nothing */
|
70
|
+
}
|
71
|
+
}
|
72
|
+
pageBuilder.addRecord()
|
73
|
+
count += 1
|
74
|
+
}
|
75
|
+
|
76
|
+
count
|
77
|
+
}
|
78
|
+
|
79
|
+
def convert[A](column: Column,
|
80
|
+
value: Option[AttributeValue],
|
81
|
+
f: (Column, A) => Unit)(implicit f1: Option[AttributeValue] => A): Unit =
|
82
|
+
f(column, f1(value))
|
83
|
+
|
84
|
+
implicit def StringConvert(value: Option[AttributeValue]): String =
|
85
|
+
value.map(_.getS).getOrElse("")
|
86
|
+
|
87
|
+
implicit def LongConvert(value: Option[AttributeValue]): Long =
|
88
|
+
value.map(_.getN.toLong).getOrElse(0L)
|
89
|
+
|
90
|
+
implicit def DoubleConvert(value: Option[AttributeValue]): Double =
|
91
|
+
value.map(_.getN.toDouble).getOrElse(0D)
|
92
|
+
|
93
|
+
implicit def BooleanConvert(value: Option[AttributeValue]): Boolean =
|
94
|
+
value.exists(_.getBOOL)
|
95
|
+
|
96
|
+
implicit def JsonConvert(value: Option[AttributeValue]): Value = {
|
97
|
+
value.map { attr =>
|
98
|
+
AttributeValueHelper.decodeToValue(attr)
|
99
|
+
}.getOrElse(ValueFactory.newNil())
|
100
|
+
}
|
101
|
+
}
|
@@ -0,0 +1,50 @@
|
|
1
|
+
package org.embulk.input.dynamodb.ope
|
2
|
+
|
3
|
+
import java.util.{List => JList, Map => JMap}
|
4
|
+
|
5
|
+
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
|
6
|
+
import com.amazonaws.services.dynamodbv2.model.{AttributeValue, Condition, QueryRequest, QueryResult}
|
7
|
+
import org.embulk.input.dynamodb.PluginTask
|
8
|
+
import org.embulk.spi.{BufferAllocator, PageBuilder, PageOutput, Schema}
|
9
|
+
|
10
|
+
import scala.collection.JavaConverters._
|
11
|
+
|
12
|
+
class QueryOperation(client: AmazonDynamoDBClient) extends AbstractOperation {
|
13
|
+
override def execute(task: PluginTask,
|
14
|
+
schema: Schema,
|
15
|
+
output: PageOutput): Unit =
|
16
|
+
{
|
17
|
+
val allocator: BufferAllocator = task.getBufferAllocator
|
18
|
+
val pageBuilder: PageBuilder = new PageBuilder(allocator, schema, output)
|
19
|
+
|
20
|
+
val attributes: JList[String] = schema.getColumns.asScala.map(_.getName).asJava
|
21
|
+
val conditions: JMap[String, Condition] = createFilters(task).asJava
|
22
|
+
var evaluateKey: JMap[String, AttributeValue] = null
|
23
|
+
|
24
|
+
val limit: Long = math.max(task.getScanLimit, task.getLimit)
|
25
|
+
val recordLimit: Long = task.getRecordLimit
|
26
|
+
var recordCount: Long = 0
|
27
|
+
|
28
|
+
do {
|
29
|
+
val batchSize = getLimit(limit, recordLimit, recordCount)
|
30
|
+
|
31
|
+
val request: QueryRequest = new QueryRequest()
|
32
|
+
.withTableName(task.getTable)
|
33
|
+
.withAttributesToGet(attributes)
|
34
|
+
.withKeyConditions(conditions)
|
35
|
+
.withExclusiveStartKey(evaluateKey)
|
36
|
+
|
37
|
+
if (batchSize > 0) {
|
38
|
+
request.setLimit(batchSize)
|
39
|
+
}
|
40
|
+
|
41
|
+
val result: QueryResult = client.query(request)
|
42
|
+
evaluateKey = result.getLastEvaluatedKey
|
43
|
+
|
44
|
+
val items = result.getItems.asScala.map(_.asScala.toMap)
|
45
|
+
recordCount += write(pageBuilder, schema, items)
|
46
|
+
} while(evaluateKey != null && (recordLimit == 0 || recordLimit > recordCount))
|
47
|
+
|
48
|
+
pageBuilder.finish()
|
49
|
+
}
|
50
|
+
}
|
@@ -0,0 +1,51 @@
|
|
1
|
+
package org.embulk.input.dynamodb.ope
|
2
|
+
|
3
|
+
import java.util.{List => JList, Map => JMap}
|
4
|
+
|
5
|
+
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
|
6
|
+
import com.amazonaws.services.dynamodbv2.model.{AttributeValue, Condition, ScanRequest, ScanResult}
|
7
|
+
import org.embulk.input.dynamodb.PluginTask
|
8
|
+
import org.embulk.spi.{BufferAllocator, PageBuilder, PageOutput, Schema}
|
9
|
+
|
10
|
+
import scala.collection.JavaConverters._
|
11
|
+
|
12
|
+
class ScanOperation(client: AmazonDynamoDBClient) extends AbstractOperation {
|
13
|
+
override def execute(
|
14
|
+
task: PluginTask,
|
15
|
+
schema: Schema,
|
16
|
+
output: PageOutput): Unit =
|
17
|
+
{
|
18
|
+
val allocator: BufferAllocator = task.getBufferAllocator
|
19
|
+
val pageBuilder: PageBuilder = new PageBuilder(allocator, schema, output)
|
20
|
+
|
21
|
+
val attributes: JList[String] = schema.getColumns.asScala.map(_.getName).asJava
|
22
|
+
val scanFilter: JMap[String, Condition] = createFilters(task).asJava
|
23
|
+
var evaluateKey: JMap[String, AttributeValue] = null
|
24
|
+
|
25
|
+
val scanLimit: Long = task.getScanLimit
|
26
|
+
val recordLimit: Long = task.getRecordLimit
|
27
|
+
var recordCount: Long = 0
|
28
|
+
|
29
|
+
do {
|
30
|
+
val batchSize = getLimit(scanLimit, recordLimit, recordCount)
|
31
|
+
|
32
|
+
val request: ScanRequest = new ScanRequest()
|
33
|
+
.withTableName(task.getTable)
|
34
|
+
.withAttributesToGet(attributes)
|
35
|
+
.withScanFilter(scanFilter)
|
36
|
+
.withExclusiveStartKey(evaluateKey)
|
37
|
+
|
38
|
+
if (batchSize > 0) {
|
39
|
+
request.setLimit(batchSize)
|
40
|
+
}
|
41
|
+
|
42
|
+
val result: ScanResult = client.scan(request)
|
43
|
+
evaluateKey = result.getLastEvaluatedKey
|
44
|
+
|
45
|
+
val items = result.getItems.asScala.map(_.asScala.toMap)
|
46
|
+
recordCount += write(pageBuilder, schema, items)
|
47
|
+
} while(evaluateKey != null && (recordLimit == 0 || recordLimit > recordCount))
|
48
|
+
|
49
|
+
pageBuilder.finish()
|
50
|
+
}
|
51
|
+
}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
in:
|
2
|
+
type: dynamodb
|
3
|
+
end_point: http://localhost:8000/
|
4
|
+
table: ENV_VAR
|
5
|
+
auth_method: basic
|
6
|
+
access_key: dummy
|
7
|
+
secret_key: dummy
|
8
|
+
filters:
|
9
|
+
- {name: pri-key, type: string, condition: EQ, value: key-1}
|
10
|
+
columns:
|
11
|
+
- {name: pri-key, type: string}
|
12
|
+
- {name: sort-key, type: long}
|
13
|
+
- {name: doubleValue, type: double}
|
14
|
+
- {name: boolValue, type: boolean}
|
15
|
+
- {name: listValue, type: json}
|
16
|
+
- {name: mapValue, type: json}
|
17
|
+
|
18
|
+
out:
|
19
|
+
type: file
|
20
|
+
path_prefix: dynamodb-local-result
|
21
|
+
file_ext: tsv
|
22
|
+
formatter:
|
23
|
+
type: csv
|
24
|
+
delimiter: "\t"
|
25
|
+
header_line: false
|
File without changes
|
@@ -0,0 +1,83 @@
|
|
1
|
+
package org.embulk.input.dynamodb.ope
|
2
|
+
|
3
|
+
import java.io.File
|
4
|
+
import java.nio.charset.Charset
|
5
|
+
import java.nio.file.{FileSystems, Files}
|
6
|
+
|
7
|
+
import com.fasterxml.jackson.databind.ObjectMapper
|
8
|
+
import com.google.inject.{Binder, Module}
|
9
|
+
import org.embulk.EmbulkEmbed
|
10
|
+
import org.embulk.config.ConfigSource
|
11
|
+
import org.embulk.input.dynamodb.DynamodbInputPlugin
|
12
|
+
import org.embulk.plugin.InjectedPluginSource
|
13
|
+
import org.embulk.spi.InputPlugin
|
14
|
+
import org.hamcrest.CoreMatchers._
|
15
|
+
import org.junit.Assert._
|
16
|
+
import org.junit.{Before, Test}
|
17
|
+
|
18
|
+
class QueryOperationTest {
|
19
|
+
private var embulk: EmbulkEmbed = null
|
20
|
+
|
21
|
+
private var EMBULK_DYNAMODB_TEST_TABLE: String = null
|
22
|
+
private var mapper: ObjectMapper = null
|
23
|
+
|
24
|
+
@Before
|
25
|
+
def createResources() {
|
26
|
+
// Get Environments
|
27
|
+
EMBULK_DYNAMODB_TEST_TABLE = System.getenv("EMBULK_DYNAMODB_TEST_TABLE")
|
28
|
+
|
29
|
+
val bootstrap = new EmbulkEmbed.Bootstrap()
|
30
|
+
bootstrap.addModules(new Module {
|
31
|
+
def configure(binder: Binder): Unit = {
|
32
|
+
InjectedPluginSource.registerPluginTo(binder,
|
33
|
+
classOf[InputPlugin],
|
34
|
+
"dynamodb",
|
35
|
+
classOf[DynamodbInputPlugin])
|
36
|
+
}
|
37
|
+
})
|
38
|
+
|
39
|
+
embulk = bootstrap.initializeCloseable()
|
40
|
+
|
41
|
+
mapper = new ObjectMapper()
|
42
|
+
}
|
43
|
+
|
44
|
+
|
45
|
+
def doTest(config: ConfigSource) {
|
46
|
+
embulk.run(config)
|
47
|
+
|
48
|
+
val fs = FileSystems.getDefault
|
49
|
+
val lines = Files.readAllLines(fs.getPath("dynamodb-local-result000.00.tsv"), Charset.forName("UTF-8"))
|
50
|
+
assertEquals(lines.size, 1)
|
51
|
+
|
52
|
+
val head = lines.get(0)
|
53
|
+
val values = head.split("\t")
|
54
|
+
|
55
|
+
assertThat(values(0), is("key-1"))
|
56
|
+
assertThat(values(1), is("0"))
|
57
|
+
assertThat(values(2), is("42.195"))
|
58
|
+
assertThat(values(3), is("true"))
|
59
|
+
|
60
|
+
val listValue = mapper.readValue(values(4).replaceAll("\"(?!\")", ""), classOf[java.util.List[Object]])
|
61
|
+
assertThat(listValue.size(), is(2))
|
62
|
+
assertThat(listValue.get(0).asInstanceOf[String], is("list-value"))
|
63
|
+
assertThat(listValue.get(1).asInstanceOf[Int], is(123))
|
64
|
+
|
65
|
+
val mapValue = mapper.readValue(values(5).replaceAll("\"(?!\")", ""), classOf[java.util.Map[String, Object]])
|
66
|
+
assert(mapValue.containsKey("map-key-1"))
|
67
|
+
assertThat(mapValue.get("map-key-1").asInstanceOf[String], is("map-value-1"))
|
68
|
+
assert(mapValue.containsKey("map-key-2"))
|
69
|
+
assertThat(mapValue.get("map-key-2").asInstanceOf[Int], is(456))
|
70
|
+
}
|
71
|
+
|
72
|
+
@Test
|
73
|
+
def queryTest() {
|
74
|
+
val config = embulk.newConfigLoader().fromYamlFile(
|
75
|
+
new File("src/test/resources/yaml/dynamodb-local-query.yml"))
|
76
|
+
|
77
|
+
config.getNested("in")
|
78
|
+
.set("operation", "query")
|
79
|
+
.set("table", EMBULK_DYNAMODB_TEST_TABLE)
|
80
|
+
|
81
|
+
doTest(config)
|
82
|
+
}
|
83
|
+
}
|
data/src/test/scala/org/embulk/input/dynamodb/{DynamoDBUtilTest.scala → ope/ScanOperationTest.scala}
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
package org.embulk.input.dynamodb
|
1
|
+
package org.embulk.input.dynamodb.ope
|
2
2
|
|
3
3
|
import java.io.File
|
4
4
|
import java.nio.charset.Charset
|
@@ -8,13 +8,14 @@ import com.fasterxml.jackson.databind.ObjectMapper
|
|
8
8
|
import com.google.inject.{Binder, Module}
|
9
9
|
import org.embulk.EmbulkEmbed
|
10
10
|
import org.embulk.config.ConfigSource
|
11
|
+
import org.embulk.input.dynamodb.DynamodbInputPlugin
|
11
12
|
import org.embulk.plugin.InjectedPluginSource
|
12
13
|
import org.embulk.spi.InputPlugin
|
13
14
|
import org.hamcrest.CoreMatchers._
|
14
15
|
import org.junit.Assert._
|
15
16
|
import org.junit.{Before, Test}
|
16
17
|
|
17
|
-
class
|
18
|
+
class ScanOperationTest {
|
18
19
|
private var embulk: EmbulkEmbed = null
|
19
20
|
|
20
21
|
private var EMBULK_DYNAMODB_TEST_TABLE: String = null
|
@@ -71,9 +72,10 @@ class DynamoDBUtilTest {
|
|
71
72
|
@Test
|
72
73
|
def scanTest() {
|
73
74
|
val config = embulk.newConfigLoader().fromYamlFile(
|
74
|
-
new File("src/test/resources/yaml/dynamodb-local.yml"))
|
75
|
+
new File("src/test/resources/yaml/dynamodb-local-scan.yml"))
|
75
76
|
|
76
77
|
config.getNested("in")
|
78
|
+
.set("operation", "scan")
|
77
79
|
.set("table", EMBULK_DYNAMODB_TEST_TABLE)
|
78
80
|
|
79
81
|
doTest(config)
|
data/test/run_dynamodb_local.sh
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-dynamodb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daisuke Higashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-10-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -59,11 +59,14 @@ files:
|
|
59
59
|
- lib/embulk/input/dynamodb.rb
|
60
60
|
- src/main/scala/org/embulk/input/dynamodb/AttributeValueHelper.scala
|
61
61
|
- src/main/scala/org/embulk/input/dynamodb/AwsCredentials.scala
|
62
|
-
- src/main/scala/org/embulk/input/dynamodb/
|
62
|
+
- src/main/scala/org/embulk/input/dynamodb/DynamoDBClient.scala
|
63
63
|
- src/main/scala/org/embulk/input/dynamodb/DynamodbInputPlugin.scala
|
64
64
|
- src/main/scala/org/embulk/input/dynamodb/Filter.scala
|
65
65
|
- src/main/scala/org/embulk/input/dynamodb/FilterConfig.scala
|
66
66
|
- src/main/scala/org/embulk/input/dynamodb/PluginTask.scala
|
67
|
+
- src/main/scala/org/embulk/input/dynamodb/ope/AbstractOperation.scala
|
68
|
+
- src/main/scala/org/embulk/input/dynamodb/ope/QueryOperation.scala
|
69
|
+
- src/main/scala/org/embulk/input/dynamodb/ope/ScanOperation.scala
|
67
70
|
- src/main/scala/org/embulk/input/dynamodb/package.scala
|
68
71
|
- src/test/resources/json/test.json
|
69
72
|
- src/test/resources/json/test.template
|
@@ -71,24 +74,26 @@ files:
|
|
71
74
|
- src/test/resources/yaml/authMethodBasic_Error.yml
|
72
75
|
- src/test/resources/yaml/authMethodEnv.yml
|
73
76
|
- src/test/resources/yaml/authMethodProfile.yml
|
74
|
-
- src/test/resources/yaml/dynamodb-local.yml
|
77
|
+
- src/test/resources/yaml/dynamodb-local-query.yml
|
78
|
+
- src/test/resources/yaml/dynamodb-local-scan.yml
|
75
79
|
- src/test/resources/yaml/notSetAuthMethod.yml
|
76
80
|
- src/test/scala/org/embulk/input/dynamodb/AttributeValueHelperTest.scala
|
77
81
|
- src/test/scala/org/embulk/input/dynamodb/AwsCredentialsTest.scala
|
78
|
-
- src/test/scala/org/embulk/input/dynamodb/
|
82
|
+
- src/test/scala/org/embulk/input/dynamodb/ope/QueryOperationTest.scala
|
83
|
+
- src/test/scala/org/embulk/input/dynamodb/ope/ScanOperationTest.scala
|
79
84
|
- test/create_table.sh
|
80
85
|
- test/put_items.sh
|
81
86
|
- test/run_dynamodb_local.sh
|
82
|
-
- classpath/
|
83
|
-
- classpath/commons-codec-1.6.jar
|
84
|
-
- classpath/scala-library-2.11.7.jar
|
87
|
+
- classpath/aws-java-sdk-core-1.10.43.jar
|
85
88
|
- classpath/aws-java-sdk-dynamodb-1.10.43.jar
|
86
|
-
- classpath/embulk-input-dynamodb-0.1.1.jar
|
87
89
|
- classpath/aws-java-sdk-kms-1.10.43.jar
|
88
|
-
- classpath/httpcore-4.3.3.jar
|
89
|
-
- classpath/httpclient-4.3.6.jar
|
90
|
-
- classpath/aws-java-sdk-core-1.10.43.jar
|
91
90
|
- classpath/aws-java-sdk-s3-1.10.43.jar
|
91
|
+
- classpath/commons-codec-1.6.jar
|
92
|
+
- classpath/commons-logging-1.1.3.jar
|
93
|
+
- classpath/embulk-input-dynamodb-0.2.0.jar
|
94
|
+
- classpath/httpclient-4.3.6.jar
|
95
|
+
- classpath/httpcore-4.3.3.jar
|
96
|
+
- classpath/scala-library-2.11.8.jar
|
92
97
|
homepage: https://github.com/lulichn/embulk-input-dynamodb
|
93
98
|
licenses:
|
94
99
|
- MIT
|
@@ -1,155 +0,0 @@
|
|
1
|
-
package org.embulk.input.dynamodb
|
2
|
-
|
3
|
-
import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
|
4
|
-
|
5
|
-
import com.amazonaws.ClientConfiguration
|
6
|
-
import com.amazonaws.regions.Regions
|
7
|
-
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
|
8
|
-
import com.amazonaws.services.dynamodbv2.model.{AttributeValue, Condition, ScanRequest, ScanResult}
|
9
|
-
import org.embulk.config.ConfigException
|
10
|
-
import org.embulk.spi._
|
11
|
-
import org.embulk.spi.`type`.Types
|
12
|
-
import org.msgpack.value.{Value, ValueFactory}
|
13
|
-
|
14
|
-
import scala.collection.JavaConverters._
|
15
|
-
|
16
|
-
object DynamoDBUtil {
|
17
|
-
def createClient(task: PluginTask): AmazonDynamoDBClient = {
|
18
|
-
val client = new AmazonDynamoDBClient(
|
19
|
-
AwsCredentials.getCredentialsProvider(task),
|
20
|
-
new ClientConfiguration()
|
21
|
-
.withMaxConnections(50)) // SDK Default Value
|
22
|
-
|
23
|
-
if (task.getEndPoint.isPresent) {
|
24
|
-
client.withEndpoint(task.getEndPoint.get())
|
25
|
-
} else if (task.getRegion.isPresent) {
|
26
|
-
client.withRegion(Regions.fromName(task.getRegion.get()))
|
27
|
-
} else {
|
28
|
-
throw new ConfigException("At least one of EndPoint or Region must be set")
|
29
|
-
}
|
30
|
-
}
|
31
|
-
|
32
|
-
|
33
|
-
def scan(
|
34
|
-
task: PluginTask,
|
35
|
-
schema: Schema,
|
36
|
-
output: PageOutput)
|
37
|
-
(implicit client: AmazonDynamoDBClient): Unit =
|
38
|
-
{
|
39
|
-
val allocator: BufferAllocator = task.getBufferAllocator
|
40
|
-
val pageBuilder: PageBuilder = new PageBuilder(allocator, schema, output)
|
41
|
-
|
42
|
-
val attributes: JList[String] = new JArrayList[String]()
|
43
|
-
|
44
|
-
schema.getColumns.asScala.foreach { column =>
|
45
|
-
attributes.add(column.getName)
|
46
|
-
}
|
47
|
-
val scanFilter: JMap[String, Condition] = createScanFilter(task).asJava
|
48
|
-
var evaluateKey: JMap[String, AttributeValue] = null
|
49
|
-
|
50
|
-
val scanLimit: Long = task.getScanLimit
|
51
|
-
val recordLimit: Long = task.getRecordLimit
|
52
|
-
var recordCount: Long = 0
|
53
|
-
|
54
|
-
do {
|
55
|
-
val batchSize = getScanLimit(scanLimit, recordLimit, recordCount)
|
56
|
-
|
57
|
-
val request: ScanRequest = new ScanRequest()
|
58
|
-
.withTableName(task.getTable)
|
59
|
-
.withAttributesToGet(attributes)
|
60
|
-
.withScanFilter(scanFilter)
|
61
|
-
.withExclusiveStartKey(evaluateKey)
|
62
|
-
|
63
|
-
if (batchSize > 0) {
|
64
|
-
request.setLimit(batchSize)
|
65
|
-
}
|
66
|
-
|
67
|
-
val result: ScanResult = client.scan(request)
|
68
|
-
evaluateKey = result.getLastEvaluatedKey
|
69
|
-
|
70
|
-
result.getItems.asScala.foreach { item =>
|
71
|
-
schema.getColumns.asScala.foreach { column =>
|
72
|
-
val value = item.asScala.get(column.getName)
|
73
|
-
column.getType match {
|
74
|
-
case Types.STRING =>
|
75
|
-
convert(column, value, pageBuilder.setString)
|
76
|
-
case Types.LONG =>
|
77
|
-
convert(column, value, pageBuilder.setLong)
|
78
|
-
case Types.DOUBLE =>
|
79
|
-
convert(column, value, pageBuilder.setDouble)
|
80
|
-
case Types.BOOLEAN =>
|
81
|
-
convert(column, value, pageBuilder.setBoolean)
|
82
|
-
case Types.JSON =>
|
83
|
-
convert(column, value, pageBuilder.setJson)
|
84
|
-
case _ => /* Do nothing */
|
85
|
-
}
|
86
|
-
}
|
87
|
-
pageBuilder.addRecord()
|
88
|
-
recordCount += 1
|
89
|
-
}
|
90
|
-
} while(evaluateKey != null && (recordLimit == 0 || recordLimit > recordCount))
|
91
|
-
|
92
|
-
pageBuilder.finish()
|
93
|
-
}
|
94
|
-
|
95
|
-
private def getScanLimit(scanLimit: Long, recordLimit: Long, recordCount: Long): Int = {
|
96
|
-
if (scanLimit > 0 && recordLimit > 0) {
|
97
|
-
math.min(scanLimit, recordLimit - recordCount).toInt
|
98
|
-
} else if (scanLimit > 0 || recordLimit > 0) {
|
99
|
-
math.max(scanLimit, recordLimit).toInt
|
100
|
-
} else { 0 }
|
101
|
-
}
|
102
|
-
|
103
|
-
private def createScanFilter(task: PluginTask): Map[String, Condition] = {
|
104
|
-
val filterMap = collection.mutable.HashMap[String, Condition]()
|
105
|
-
|
106
|
-
Option(task.getFilters.orNull).map { filters =>
|
107
|
-
filters.getFilters.asScala.map { filter =>
|
108
|
-
val attributeValueList = collection.mutable.ArrayBuffer[AttributeValue]()
|
109
|
-
attributeValueList += createAttributeValue(filter.getType, filter.getValue)
|
110
|
-
Option(filter.getValue2).map { value2 =>
|
111
|
-
attributeValueList+= createAttributeValue(filter.getType, value2) }
|
112
|
-
|
113
|
-
filterMap += filter.getName -> new Condition()
|
114
|
-
.withComparisonOperator(filter.getCondition)
|
115
|
-
.withAttributeValueList(attributeValueList.asJava)
|
116
|
-
}
|
117
|
-
}
|
118
|
-
|
119
|
-
filterMap.toMap
|
120
|
-
}
|
121
|
-
|
122
|
-
private def createAttributeValue(t: String, v: String): AttributeValue = {
|
123
|
-
t match {
|
124
|
-
case "string" =>
|
125
|
-
new AttributeValue().withS(v)
|
126
|
-
case "long" | "double" =>
|
127
|
-
new AttributeValue().withN(v)
|
128
|
-
case "boolean" =>
|
129
|
-
new AttributeValue().withBOOL(v.toBoolean)
|
130
|
-
}
|
131
|
-
}
|
132
|
-
|
133
|
-
private def convert[A](column: Column,
|
134
|
-
value: Option[AttributeValue],
|
135
|
-
f: (Column, A) => Unit)(implicit f1: Option[AttributeValue] => A): Unit =
|
136
|
-
f(column, f1(value))
|
137
|
-
|
138
|
-
implicit private def StringConvert(value: Option[AttributeValue]): String =
|
139
|
-
value.map(_.getS).getOrElse("")
|
140
|
-
|
141
|
-
implicit private def LongConvert(value: Option[AttributeValue]): Long =
|
142
|
-
value.map(_.getN.toLong).getOrElse(0L)
|
143
|
-
|
144
|
-
implicit private def DoubleConvert(value: Option[AttributeValue]): Double =
|
145
|
-
value.map(_.getN.toDouble).getOrElse(0D)
|
146
|
-
|
147
|
-
implicit private def BooleanConvert(value: Option[AttributeValue]): Boolean =
|
148
|
-
value.exists(_.getBOOL)
|
149
|
-
|
150
|
-
implicit private def JsonConvert(value: Option[AttributeValue]): Value = {
|
151
|
-
value.map { attr =>
|
152
|
-
AttributeValueHelper.decodeToValue(attr)
|
153
|
-
}.getOrElse(ValueFactory.newNil())
|
154
|
-
}
|
155
|
-
}
|