embulk-input-dynamodb 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +37 -4
- data/build.gradle +6 -6
- data/src/main/scala/org/embulk/input/dynamodb/DynamoDBClient.scala +23 -0
- data/src/main/scala/org/embulk/input/dynamodb/DynamodbInputPlugin.scala +8 -2
- data/src/main/scala/org/embulk/input/dynamodb/PluginTask.scala +7 -0
- data/src/main/scala/org/embulk/input/dynamodb/ope/AbstractOperation.scala +101 -0
- data/src/main/scala/org/embulk/input/dynamodb/ope/QueryOperation.scala +50 -0
- data/src/main/scala/org/embulk/input/dynamodb/ope/ScanOperation.scala +51 -0
- data/src/test/resources/yaml/authMethodBasic.yml +1 -0
- data/src/test/resources/yaml/authMethodBasic_Error.yml +1 -0
- data/src/test/resources/yaml/authMethodEnv.yml +1 -0
- data/src/test/resources/yaml/authMethodProfile.yml +1 -0
- data/src/test/resources/yaml/dynamodb-local-query.yml +25 -0
- data/src/test/resources/yaml/{dynamodb-local.yml → dynamodb-local-scan.yml} +0 -0
- data/src/test/resources/yaml/notSetAuthMethod.yml +1 -0
- data/src/test/scala/org/embulk/input/dynamodb/ope/QueryOperationTest.scala +83 -0
- data/src/test/scala/org/embulk/input/dynamodb/{DynamoDBUtilTest.scala → ope/ScanOperationTest.scala} +5 -3
- data/test/run_dynamodb_local.sh +4 -4
- metadata +17 -12
- data/src/main/scala/org/embulk/input/dynamodb/DynamoDBUtil.scala +0 -155
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 324f33092c5bb362ecf9a804329e56eda0e545be
|
4
|
+
data.tar.gz: 00c350b4d52c76adf8291bd6abeee9d9b284eb10
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 09103d2d4bdbc12d22f51318b7e6be74bba3420d9ce1e0003e78c55b77f242c03e5a74d5f9a95233be08539bb2c0cce8fbb4b8ad151104f5730c3f76f3edfd7d
|
7
|
+
data.tar.gz: c7d278c4eac6260264652ce74c6901d416dc3a4ebd287ce1162d16e911628f18430aad3b34205524b7e53e4b10ae32958cbe26872d359124b883bad67f78eb93
|
data/README.md
CHANGED
@@ -24,17 +24,27 @@ Available values options are: `basic`, `env`, `instance`, `profile`, `properties
|
|
24
24
|
- **region**: Region Name (string, optional)
|
25
25
|
- **end_point**: EndPoint URL (string, optional)
|
26
26
|
`end_point` has priority when `region` and `end_point` are specified.
|
27
|
+
- **operation**: Operation Type (string, required)
|
28
|
+
Available types are: `scan`, `query`
|
27
29
|
- **table**: Table Name (string, required)
|
28
|
-
- **
|
29
|
-
|
30
|
+
- **filters**: Query Filters
|
31
|
+
Required to `query` operation. Optional for `scan`.
|
32
|
+
- **name**: Column name.
|
33
|
+
- **type**: Column type.
|
34
|
+
- **condition**: Comparison Operator.
|
35
|
+
- **value(s)**: Attribute Value(s).
|
36
|
+
- **limit**: DynamoDB 1-time Scan/Query Operation size limit (Int, optional)
|
37
|
+
- **scan_limit**: DynamoDB 1-time Scan Query size limit (Deprecated, Int, optional)
|
38
|
+
- **record_limit**: Max Record Search limit (Long, optional)
|
30
39
|
- **columns**: a key-value pairs where key is a column name and value is options for the column (required)
|
31
40
|
- **name**: Column name.
|
32
41
|
- **type**: Column values are converted to this embulk type.
|
33
42
|
Available values options are: `boolean`, `long`, `double`, `string`, `json`
|
34
|
-
- **filters**: query filter (optional)
|
35
43
|
|
36
44
|
## Example
|
37
45
|
|
46
|
+
- Scan Operation
|
47
|
+
|
38
48
|
```yaml
|
39
49
|
in:
|
40
50
|
type: dynamodb
|
@@ -42,6 +52,7 @@ in:
|
|
42
52
|
access_key: YOUR_ACCESS_KEY
|
43
53
|
secret_key: YOUR_SECRET_KEY
|
44
54
|
region: ap-northeast-1
|
55
|
+
operation: scan
|
45
56
|
table: YOUR_TABLE_NAME
|
46
57
|
columns:
|
47
58
|
- {name: ColumnA, type: long}
|
@@ -57,11 +68,33 @@ out:
|
|
57
68
|
type: stdout
|
58
69
|
```
|
59
70
|
|
71
|
+
- Query Operation
|
72
|
+
|
73
|
+
```yaml
|
74
|
+
in:
|
75
|
+
type: dynamodb
|
76
|
+
auth_method: env
|
77
|
+
region: ap-northeast-1
|
78
|
+
operation: query
|
79
|
+
table: YOUR_TABLE_NAME
|
80
|
+
columns:
|
81
|
+
- {name: ColumnA, type: long}
|
82
|
+
- {name: ColumnB, type: double}
|
83
|
+
- {name: ColumnC, type: string}
|
84
|
+
- {name: ColumnD, type: boolean}
|
85
|
+
- {name: ColumnE, type: json}
|
86
|
+
filters:
|
87
|
+
- {name: ColumnA, type: long, condition: EQ, value: 10000}
|
88
|
+
|
89
|
+
out:
|
90
|
+
type: stdout
|
91
|
+
```
|
92
|
+
|
60
93
|
## Try
|
61
94
|
|
62
95
|
```
|
63
96
|
$ ./gradlew classpath
|
64
|
-
$ embulk preview -I lib your-
|
97
|
+
$ embulk preview -I lib your-config.yml
|
65
98
|
```
|
66
99
|
|
67
100
|
## Build
|
data/build.gradle
CHANGED
@@ -14,22 +14,22 @@ configurations {
|
|
14
14
|
provided
|
15
15
|
}
|
16
16
|
|
17
|
-
version = "0.
|
17
|
+
version = "0.2.0"
|
18
18
|
|
19
19
|
sourceCompatibility = 1.7
|
20
20
|
targetCompatibility = 1.7
|
21
21
|
|
22
22
|
dependencies {
|
23
|
-
compile "org.scala-lang:scala-library:2.11.
|
23
|
+
compile "org.scala-lang:scala-library:2.11.8"
|
24
24
|
|
25
|
-
compile "org.embulk:embulk-core:0.8.
|
26
|
-
provided "org.embulk:embulk-core:0.8.
|
25
|
+
compile "org.embulk:embulk-core:0.8.13"
|
26
|
+
provided "org.embulk:embulk-core:0.8.13"
|
27
27
|
|
28
28
|
compile "com.amazonaws:aws-java-sdk-dynamodb:1.10.43"
|
29
29
|
|
30
30
|
testCompile "junit:junit:4.+"
|
31
|
-
testCompile "org.embulk:embulk-standards:0.8.
|
32
|
-
testCompile "org.embulk:embulk-core:0.8.
|
31
|
+
testCompile "org.embulk:embulk-standards:0.8.13"
|
32
|
+
testCompile "org.embulk:embulk-core:0.8.13:tests"
|
33
33
|
}
|
34
34
|
|
35
35
|
compileScala {
|
@@ -0,0 +1,23 @@
|
|
1
|
+
package org.embulk.input.dynamodb
|
2
|
+
|
3
|
+
import com.amazonaws.ClientConfiguration
|
4
|
+
import com.amazonaws.regions.Regions
|
5
|
+
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
|
6
|
+
import org.embulk.config.ConfigException
|
7
|
+
|
8
|
+
object DynamoDBClient {
|
9
|
+
def create(task: PluginTask): AmazonDynamoDBClient = {
|
10
|
+
val client = new AmazonDynamoDBClient(
|
11
|
+
AwsCredentials.getCredentialsProvider(task),
|
12
|
+
new ClientConfiguration()
|
13
|
+
.withMaxConnections(50)) // SDK Default Value
|
14
|
+
|
15
|
+
if (task.getEndPoint.isPresent) {
|
16
|
+
client.withEndpoint(task.getEndPoint.get())
|
17
|
+
} else if (task.getRegion.isPresent) {
|
18
|
+
client.withRegion(Regions.fromName(task.getRegion.get()))
|
19
|
+
} else {
|
20
|
+
throw new ConfigException("At least one of EndPoint or Region must be set")
|
21
|
+
}
|
22
|
+
}
|
23
|
+
}
|
@@ -4,6 +4,7 @@ import java.util.{List => JList}
|
|
4
4
|
|
5
5
|
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
|
6
6
|
import org.embulk.config._
|
7
|
+
import org.embulk.input.dynamodb.ope.{QueryOperation, ScanOperation}
|
7
8
|
import org.embulk.spi._
|
8
9
|
|
9
10
|
class DynamodbInputPlugin extends InputPlugin {
|
@@ -24,8 +25,13 @@ class DynamodbInputPlugin extends InputPlugin {
|
|
24
25
|
def run(taskSource: TaskSource, schema: Schema, taskIndex: Int, output: PageOutput): TaskReport = {
|
25
26
|
val task: PluginTask = taskSource.loadTask(classOf[PluginTask])
|
26
27
|
|
27
|
-
|
28
|
-
|
28
|
+
val client: AmazonDynamoDBClient = DynamoDBClient.create(task)
|
29
|
+
|
30
|
+
val ope = task.getOperation.toLowerCase match {
|
31
|
+
case "scan" => new ScanOperation(client)
|
32
|
+
case "query" => new QueryOperation(client)
|
33
|
+
}
|
34
|
+
ope.execute(task, schema, output)
|
29
35
|
|
30
36
|
Exec.newTaskReport()
|
31
37
|
}
|
@@ -29,6 +29,13 @@ trait PluginTask extends Task {
|
|
29
29
|
@ConfigDefault("null")
|
30
30
|
def getEndPoint: Optional[String]
|
31
31
|
|
32
|
+
@Config("operation")
|
33
|
+
def getOperation: String
|
34
|
+
|
35
|
+
@Config("limit")
|
36
|
+
@ConfigDefault("0")
|
37
|
+
def getLimit: Long
|
38
|
+
|
32
39
|
@Config("scan_limit")
|
33
40
|
@ConfigDefault("0")
|
34
41
|
def getScanLimit: Long
|
@@ -0,0 +1,101 @@
|
|
1
|
+
package org.embulk.input.dynamodb.ope
|
2
|
+
|
3
|
+
import com.amazonaws.services.dynamodbv2.model.{AttributeValue, Condition}
|
4
|
+
import org.embulk.input.dynamodb.{AttributeValueHelper, PluginTask}
|
5
|
+
import org.embulk.spi._
|
6
|
+
import org.embulk.spi.`type`.Types
|
7
|
+
import org.msgpack.value.{Value, ValueFactory}
|
8
|
+
|
9
|
+
import scala.collection.JavaConverters._
|
10
|
+
|
11
|
+
abstract class AbstractOperation {
|
12
|
+
def execute(task: PluginTask, schema: Schema, output: PageOutput): Unit
|
13
|
+
|
14
|
+
def getLimit(limit: Long, recordLimit: Long, recordCount: Long): Int = {
|
15
|
+
if (limit > 0 && recordLimit > 0) {
|
16
|
+
math.min(limit, recordLimit - recordCount).toInt
|
17
|
+
} else if (limit > 0 || recordLimit > 0) {
|
18
|
+
math.max(limit, recordLimit).toInt
|
19
|
+
} else { 0 }
|
20
|
+
}
|
21
|
+
|
22
|
+
def createFilters(task: PluginTask): Map[String, Condition] = {
|
23
|
+
val filterMap = collection.mutable.HashMap[String, Condition]()
|
24
|
+
|
25
|
+
Option(task.getFilters.orNull).map { filters =>
|
26
|
+
filters.getFilters.asScala.map { filter =>
|
27
|
+
val attributeValueList = collection.mutable.ArrayBuffer[AttributeValue]()
|
28
|
+
attributeValueList += createAttributeValue(filter.getType, filter.getValue)
|
29
|
+
Option(filter.getValue2).map { value2 =>
|
30
|
+
attributeValueList+= createAttributeValue(filter.getType, value2) }
|
31
|
+
|
32
|
+
filterMap += filter.getName -> new Condition()
|
33
|
+
.withComparisonOperator(filter.getCondition)
|
34
|
+
.withAttributeValueList(attributeValueList.asJava)
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
filterMap.toMap
|
39
|
+
}
|
40
|
+
|
41
|
+
def createAttributeValue(t: String, v: String): AttributeValue = {
|
42
|
+
t match {
|
43
|
+
case "string" =>
|
44
|
+
new AttributeValue().withS(v)
|
45
|
+
case "long" | "double" =>
|
46
|
+
new AttributeValue().withN(v)
|
47
|
+
case "boolean" =>
|
48
|
+
new AttributeValue().withBOOL(v.toBoolean)
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
def write(pageBuilder: PageBuilder, schema: Schema, items: Seq[Map[String, AttributeValue]]): Long = {
|
53
|
+
var count = 0
|
54
|
+
|
55
|
+
items.foreach { item =>
|
56
|
+
schema.getColumns.asScala.foreach { column =>
|
57
|
+
val value = item.get(column.getName)
|
58
|
+
column.getType match {
|
59
|
+
case Types.STRING =>
|
60
|
+
convert(column, value, pageBuilder.setString)
|
61
|
+
case Types.LONG =>
|
62
|
+
convert(column, value, pageBuilder.setLong)
|
63
|
+
case Types.DOUBLE =>
|
64
|
+
convert(column, value, pageBuilder.setDouble)
|
65
|
+
case Types.BOOLEAN =>
|
66
|
+
convert(column, value, pageBuilder.setBoolean)
|
67
|
+
case Types.JSON =>
|
68
|
+
convert(column, value, pageBuilder.setJson)
|
69
|
+
case _ => /* Do nothing */
|
70
|
+
}
|
71
|
+
}
|
72
|
+
pageBuilder.addRecord()
|
73
|
+
count += 1
|
74
|
+
}
|
75
|
+
|
76
|
+
count
|
77
|
+
}
|
78
|
+
|
79
|
+
def convert[A](column: Column,
|
80
|
+
value: Option[AttributeValue],
|
81
|
+
f: (Column, A) => Unit)(implicit f1: Option[AttributeValue] => A): Unit =
|
82
|
+
f(column, f1(value))
|
83
|
+
|
84
|
+
implicit def StringConvert(value: Option[AttributeValue]): String =
|
85
|
+
value.map(_.getS).getOrElse("")
|
86
|
+
|
87
|
+
implicit def LongConvert(value: Option[AttributeValue]): Long =
|
88
|
+
value.map(_.getN.toLong).getOrElse(0L)
|
89
|
+
|
90
|
+
implicit def DoubleConvert(value: Option[AttributeValue]): Double =
|
91
|
+
value.map(_.getN.toDouble).getOrElse(0D)
|
92
|
+
|
93
|
+
implicit def BooleanConvert(value: Option[AttributeValue]): Boolean =
|
94
|
+
value.exists(_.getBOOL)
|
95
|
+
|
96
|
+
implicit def JsonConvert(value: Option[AttributeValue]): Value = {
|
97
|
+
value.map { attr =>
|
98
|
+
AttributeValueHelper.decodeToValue(attr)
|
99
|
+
}.getOrElse(ValueFactory.newNil())
|
100
|
+
}
|
101
|
+
}
|
@@ -0,0 +1,50 @@
|
|
1
|
+
package org.embulk.input.dynamodb.ope
|
2
|
+
|
3
|
+
import java.util.{List => JList, Map => JMap}
|
4
|
+
|
5
|
+
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
|
6
|
+
import com.amazonaws.services.dynamodbv2.model.{AttributeValue, Condition, QueryRequest, QueryResult}
|
7
|
+
import org.embulk.input.dynamodb.PluginTask
|
8
|
+
import org.embulk.spi.{BufferAllocator, PageBuilder, PageOutput, Schema}
|
9
|
+
|
10
|
+
import scala.collection.JavaConverters._
|
11
|
+
|
12
|
+
class QueryOperation(client: AmazonDynamoDBClient) extends AbstractOperation {
|
13
|
+
override def execute(task: PluginTask,
|
14
|
+
schema: Schema,
|
15
|
+
output: PageOutput): Unit =
|
16
|
+
{
|
17
|
+
val allocator: BufferAllocator = task.getBufferAllocator
|
18
|
+
val pageBuilder: PageBuilder = new PageBuilder(allocator, schema, output)
|
19
|
+
|
20
|
+
val attributes: JList[String] = schema.getColumns.asScala.map(_.getName).asJava
|
21
|
+
val conditions: JMap[String, Condition] = createFilters(task).asJava
|
22
|
+
var evaluateKey: JMap[String, AttributeValue] = null
|
23
|
+
|
24
|
+
val limit: Long = math.max(task.getScanLimit, task.getLimit)
|
25
|
+
val recordLimit: Long = task.getRecordLimit
|
26
|
+
var recordCount: Long = 0
|
27
|
+
|
28
|
+
do {
|
29
|
+
val batchSize = getLimit(limit, recordLimit, recordCount)
|
30
|
+
|
31
|
+
val request: QueryRequest = new QueryRequest()
|
32
|
+
.withTableName(task.getTable)
|
33
|
+
.withAttributesToGet(attributes)
|
34
|
+
.withKeyConditions(conditions)
|
35
|
+
.withExclusiveStartKey(evaluateKey)
|
36
|
+
|
37
|
+
if (batchSize > 0) {
|
38
|
+
request.setLimit(batchSize)
|
39
|
+
}
|
40
|
+
|
41
|
+
val result: QueryResult = client.query(request)
|
42
|
+
evaluateKey = result.getLastEvaluatedKey
|
43
|
+
|
44
|
+
val items = result.getItems.asScala.map(_.asScala.toMap)
|
45
|
+
recordCount += write(pageBuilder, schema, items)
|
46
|
+
} while(evaluateKey != null && (recordLimit == 0 || recordLimit > recordCount))
|
47
|
+
|
48
|
+
pageBuilder.finish()
|
49
|
+
}
|
50
|
+
}
|
@@ -0,0 +1,51 @@
|
|
1
|
+
package org.embulk.input.dynamodb.ope
|
2
|
+
|
3
|
+
import java.util.{List => JList, Map => JMap}
|
4
|
+
|
5
|
+
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
|
6
|
+
import com.amazonaws.services.dynamodbv2.model.{AttributeValue, Condition, ScanRequest, ScanResult}
|
7
|
+
import org.embulk.input.dynamodb.PluginTask
|
8
|
+
import org.embulk.spi.{BufferAllocator, PageBuilder, PageOutput, Schema}
|
9
|
+
|
10
|
+
import scala.collection.JavaConverters._
|
11
|
+
|
12
|
+
class ScanOperation(client: AmazonDynamoDBClient) extends AbstractOperation {
|
13
|
+
override def execute(
|
14
|
+
task: PluginTask,
|
15
|
+
schema: Schema,
|
16
|
+
output: PageOutput): Unit =
|
17
|
+
{
|
18
|
+
val allocator: BufferAllocator = task.getBufferAllocator
|
19
|
+
val pageBuilder: PageBuilder = new PageBuilder(allocator, schema, output)
|
20
|
+
|
21
|
+
val attributes: JList[String] = schema.getColumns.asScala.map(_.getName).asJava
|
22
|
+
val scanFilter: JMap[String, Condition] = createFilters(task).asJava
|
23
|
+
var evaluateKey: JMap[String, AttributeValue] = null
|
24
|
+
|
25
|
+
val scanLimit: Long = task.getScanLimit
|
26
|
+
val recordLimit: Long = task.getRecordLimit
|
27
|
+
var recordCount: Long = 0
|
28
|
+
|
29
|
+
do {
|
30
|
+
val batchSize = getLimit(scanLimit, recordLimit, recordCount)
|
31
|
+
|
32
|
+
val request: ScanRequest = new ScanRequest()
|
33
|
+
.withTableName(task.getTable)
|
34
|
+
.withAttributesToGet(attributes)
|
35
|
+
.withScanFilter(scanFilter)
|
36
|
+
.withExclusiveStartKey(evaluateKey)
|
37
|
+
|
38
|
+
if (batchSize > 0) {
|
39
|
+
request.setLimit(batchSize)
|
40
|
+
}
|
41
|
+
|
42
|
+
val result: ScanResult = client.scan(request)
|
43
|
+
evaluateKey = result.getLastEvaluatedKey
|
44
|
+
|
45
|
+
val items = result.getItems.asScala.map(_.asScala.toMap)
|
46
|
+
recordCount += write(pageBuilder, schema, items)
|
47
|
+
} while(evaluateKey != null && (recordLimit == 0 || recordLimit > recordCount))
|
48
|
+
|
49
|
+
pageBuilder.finish()
|
50
|
+
}
|
51
|
+
}
|
@@ -0,0 +1,25 @@
|
|
1
|
+
in:
|
2
|
+
type: dynamodb
|
3
|
+
end_point: http://localhost:8000/
|
4
|
+
table: ENV_VAR
|
5
|
+
auth_method: basic
|
6
|
+
access_key: dummy
|
7
|
+
secret_key: dummy
|
8
|
+
filters:
|
9
|
+
- {name: pri-key, type: string, condition: EQ, value: key-1}
|
10
|
+
columns:
|
11
|
+
- {name: pri-key, type: string}
|
12
|
+
- {name: sort-key, type: long}
|
13
|
+
- {name: doubleValue, type: double}
|
14
|
+
- {name: boolValue, type: boolean}
|
15
|
+
- {name: listValue, type: json}
|
16
|
+
- {name: mapValue, type: json}
|
17
|
+
|
18
|
+
out:
|
19
|
+
type: file
|
20
|
+
path_prefix: dynamodb-local-result
|
21
|
+
file_ext: tsv
|
22
|
+
formatter:
|
23
|
+
type: csv
|
24
|
+
delimiter: "\t"
|
25
|
+
header_line: false
|
File without changes
|
@@ -0,0 +1,83 @@
|
|
1
|
+
package org.embulk.input.dynamodb.ope
|
2
|
+
|
3
|
+
import java.io.File
|
4
|
+
import java.nio.charset.Charset
|
5
|
+
import java.nio.file.{FileSystems, Files}
|
6
|
+
|
7
|
+
import com.fasterxml.jackson.databind.ObjectMapper
|
8
|
+
import com.google.inject.{Binder, Module}
|
9
|
+
import org.embulk.EmbulkEmbed
|
10
|
+
import org.embulk.config.ConfigSource
|
11
|
+
import org.embulk.input.dynamodb.DynamodbInputPlugin
|
12
|
+
import org.embulk.plugin.InjectedPluginSource
|
13
|
+
import org.embulk.spi.InputPlugin
|
14
|
+
import org.hamcrest.CoreMatchers._
|
15
|
+
import org.junit.Assert._
|
16
|
+
import org.junit.{Before, Test}
|
17
|
+
|
18
|
+
class QueryOperationTest {
|
19
|
+
private var embulk: EmbulkEmbed = null
|
20
|
+
|
21
|
+
private var EMBULK_DYNAMODB_TEST_TABLE: String = null
|
22
|
+
private var mapper: ObjectMapper = null
|
23
|
+
|
24
|
+
@Before
|
25
|
+
def createResources() {
|
26
|
+
// Get Environments
|
27
|
+
EMBULK_DYNAMODB_TEST_TABLE = System.getenv("EMBULK_DYNAMODB_TEST_TABLE")
|
28
|
+
|
29
|
+
val bootstrap = new EmbulkEmbed.Bootstrap()
|
30
|
+
bootstrap.addModules(new Module {
|
31
|
+
def configure(binder: Binder): Unit = {
|
32
|
+
InjectedPluginSource.registerPluginTo(binder,
|
33
|
+
classOf[InputPlugin],
|
34
|
+
"dynamodb",
|
35
|
+
classOf[DynamodbInputPlugin])
|
36
|
+
}
|
37
|
+
})
|
38
|
+
|
39
|
+
embulk = bootstrap.initializeCloseable()
|
40
|
+
|
41
|
+
mapper = new ObjectMapper()
|
42
|
+
}
|
43
|
+
|
44
|
+
|
45
|
+
def doTest(config: ConfigSource) {
|
46
|
+
embulk.run(config)
|
47
|
+
|
48
|
+
val fs = FileSystems.getDefault
|
49
|
+
val lines = Files.readAllLines(fs.getPath("dynamodb-local-result000.00.tsv"), Charset.forName("UTF-8"))
|
50
|
+
assertEquals(lines.size, 1)
|
51
|
+
|
52
|
+
val head = lines.get(0)
|
53
|
+
val values = head.split("\t")
|
54
|
+
|
55
|
+
assertThat(values(0), is("key-1"))
|
56
|
+
assertThat(values(1), is("0"))
|
57
|
+
assertThat(values(2), is("42.195"))
|
58
|
+
assertThat(values(3), is("true"))
|
59
|
+
|
60
|
+
val listValue = mapper.readValue(values(4).replaceAll("\"(?!\")", ""), classOf[java.util.List[Object]])
|
61
|
+
assertThat(listValue.size(), is(2))
|
62
|
+
assertThat(listValue.get(0).asInstanceOf[String], is("list-value"))
|
63
|
+
assertThat(listValue.get(1).asInstanceOf[Int], is(123))
|
64
|
+
|
65
|
+
val mapValue = mapper.readValue(values(5).replaceAll("\"(?!\")", ""), classOf[java.util.Map[String, Object]])
|
66
|
+
assert(mapValue.containsKey("map-key-1"))
|
67
|
+
assertThat(mapValue.get("map-key-1").asInstanceOf[String], is("map-value-1"))
|
68
|
+
assert(mapValue.containsKey("map-key-2"))
|
69
|
+
assertThat(mapValue.get("map-key-2").asInstanceOf[Int], is(456))
|
70
|
+
}
|
71
|
+
|
72
|
+
@Test
|
73
|
+
def queryTest() {
|
74
|
+
val config = embulk.newConfigLoader().fromYamlFile(
|
75
|
+
new File("src/test/resources/yaml/dynamodb-local-query.yml"))
|
76
|
+
|
77
|
+
config.getNested("in")
|
78
|
+
.set("operation", "query")
|
79
|
+
.set("table", EMBULK_DYNAMODB_TEST_TABLE)
|
80
|
+
|
81
|
+
doTest(config)
|
82
|
+
}
|
83
|
+
}
|
data/src/test/scala/org/embulk/input/dynamodb/{DynamoDBUtilTest.scala → ope/ScanOperationTest.scala}
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
package org.embulk.input.dynamodb
|
1
|
+
package org.embulk.input.dynamodb.ope
|
2
2
|
|
3
3
|
import java.io.File
|
4
4
|
import java.nio.charset.Charset
|
@@ -8,13 +8,14 @@ import com.fasterxml.jackson.databind.ObjectMapper
|
|
8
8
|
import com.google.inject.{Binder, Module}
|
9
9
|
import org.embulk.EmbulkEmbed
|
10
10
|
import org.embulk.config.ConfigSource
|
11
|
+
import org.embulk.input.dynamodb.DynamodbInputPlugin
|
11
12
|
import org.embulk.plugin.InjectedPluginSource
|
12
13
|
import org.embulk.spi.InputPlugin
|
13
14
|
import org.hamcrest.CoreMatchers._
|
14
15
|
import org.junit.Assert._
|
15
16
|
import org.junit.{Before, Test}
|
16
17
|
|
17
|
-
class
|
18
|
+
class ScanOperationTest {
|
18
19
|
private var embulk: EmbulkEmbed = null
|
19
20
|
|
20
21
|
private var EMBULK_DYNAMODB_TEST_TABLE: String = null
|
@@ -71,9 +72,10 @@ class DynamoDBUtilTest {
|
|
71
72
|
@Test
|
72
73
|
def scanTest() {
|
73
74
|
val config = embulk.newConfigLoader().fromYamlFile(
|
74
|
-
new File("src/test/resources/yaml/dynamodb-local.yml"))
|
75
|
+
new File("src/test/resources/yaml/dynamodb-local-scan.yml"))
|
75
76
|
|
76
77
|
config.getNested("in")
|
78
|
+
.set("operation", "scan")
|
77
79
|
.set("table", EMBULK_DYNAMODB_TEST_TABLE)
|
78
80
|
|
79
81
|
doTest(config)
|
data/test/run_dynamodb_local.sh
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-dynamodb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daisuke Higashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-10-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -59,11 +59,14 @@ files:
|
|
59
59
|
- lib/embulk/input/dynamodb.rb
|
60
60
|
- src/main/scala/org/embulk/input/dynamodb/AttributeValueHelper.scala
|
61
61
|
- src/main/scala/org/embulk/input/dynamodb/AwsCredentials.scala
|
62
|
-
- src/main/scala/org/embulk/input/dynamodb/
|
62
|
+
- src/main/scala/org/embulk/input/dynamodb/DynamoDBClient.scala
|
63
63
|
- src/main/scala/org/embulk/input/dynamodb/DynamodbInputPlugin.scala
|
64
64
|
- src/main/scala/org/embulk/input/dynamodb/Filter.scala
|
65
65
|
- src/main/scala/org/embulk/input/dynamodb/FilterConfig.scala
|
66
66
|
- src/main/scala/org/embulk/input/dynamodb/PluginTask.scala
|
67
|
+
- src/main/scala/org/embulk/input/dynamodb/ope/AbstractOperation.scala
|
68
|
+
- src/main/scala/org/embulk/input/dynamodb/ope/QueryOperation.scala
|
69
|
+
- src/main/scala/org/embulk/input/dynamodb/ope/ScanOperation.scala
|
67
70
|
- src/main/scala/org/embulk/input/dynamodb/package.scala
|
68
71
|
- src/test/resources/json/test.json
|
69
72
|
- src/test/resources/json/test.template
|
@@ -71,24 +74,26 @@ files:
|
|
71
74
|
- src/test/resources/yaml/authMethodBasic_Error.yml
|
72
75
|
- src/test/resources/yaml/authMethodEnv.yml
|
73
76
|
- src/test/resources/yaml/authMethodProfile.yml
|
74
|
-
- src/test/resources/yaml/dynamodb-local.yml
|
77
|
+
- src/test/resources/yaml/dynamodb-local-query.yml
|
78
|
+
- src/test/resources/yaml/dynamodb-local-scan.yml
|
75
79
|
- src/test/resources/yaml/notSetAuthMethod.yml
|
76
80
|
- src/test/scala/org/embulk/input/dynamodb/AttributeValueHelperTest.scala
|
77
81
|
- src/test/scala/org/embulk/input/dynamodb/AwsCredentialsTest.scala
|
78
|
-
- src/test/scala/org/embulk/input/dynamodb/
|
82
|
+
- src/test/scala/org/embulk/input/dynamodb/ope/QueryOperationTest.scala
|
83
|
+
- src/test/scala/org/embulk/input/dynamodb/ope/ScanOperationTest.scala
|
79
84
|
- test/create_table.sh
|
80
85
|
- test/put_items.sh
|
81
86
|
- test/run_dynamodb_local.sh
|
82
|
-
- classpath/
|
83
|
-
- classpath/commons-codec-1.6.jar
|
84
|
-
- classpath/scala-library-2.11.7.jar
|
87
|
+
- classpath/aws-java-sdk-core-1.10.43.jar
|
85
88
|
- classpath/aws-java-sdk-dynamodb-1.10.43.jar
|
86
|
-
- classpath/embulk-input-dynamodb-0.1.1.jar
|
87
89
|
- classpath/aws-java-sdk-kms-1.10.43.jar
|
88
|
-
- classpath/httpcore-4.3.3.jar
|
89
|
-
- classpath/httpclient-4.3.6.jar
|
90
|
-
- classpath/aws-java-sdk-core-1.10.43.jar
|
91
90
|
- classpath/aws-java-sdk-s3-1.10.43.jar
|
91
|
+
- classpath/commons-codec-1.6.jar
|
92
|
+
- classpath/commons-logging-1.1.3.jar
|
93
|
+
- classpath/embulk-input-dynamodb-0.2.0.jar
|
94
|
+
- classpath/httpclient-4.3.6.jar
|
95
|
+
- classpath/httpcore-4.3.3.jar
|
96
|
+
- classpath/scala-library-2.11.8.jar
|
92
97
|
homepage: https://github.com/lulichn/embulk-input-dynamodb
|
93
98
|
licenses:
|
94
99
|
- MIT
|
@@ -1,155 +0,0 @@
|
|
1
|
-
package org.embulk.input.dynamodb
|
2
|
-
|
3
|
-
import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
|
4
|
-
|
5
|
-
import com.amazonaws.ClientConfiguration
|
6
|
-
import com.amazonaws.regions.Regions
|
7
|
-
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
|
8
|
-
import com.amazonaws.services.dynamodbv2.model.{AttributeValue, Condition, ScanRequest, ScanResult}
|
9
|
-
import org.embulk.config.ConfigException
|
10
|
-
import org.embulk.spi._
|
11
|
-
import org.embulk.spi.`type`.Types
|
12
|
-
import org.msgpack.value.{Value, ValueFactory}
|
13
|
-
|
14
|
-
import scala.collection.JavaConverters._
|
15
|
-
|
16
|
-
object DynamoDBUtil {
|
17
|
-
def createClient(task: PluginTask): AmazonDynamoDBClient = {
|
18
|
-
val client = new AmazonDynamoDBClient(
|
19
|
-
AwsCredentials.getCredentialsProvider(task),
|
20
|
-
new ClientConfiguration()
|
21
|
-
.withMaxConnections(50)) // SDK Default Value
|
22
|
-
|
23
|
-
if (task.getEndPoint.isPresent) {
|
24
|
-
client.withEndpoint(task.getEndPoint.get())
|
25
|
-
} else if (task.getRegion.isPresent) {
|
26
|
-
client.withRegion(Regions.fromName(task.getRegion.get()))
|
27
|
-
} else {
|
28
|
-
throw new ConfigException("At least one of EndPoint or Region must be set")
|
29
|
-
}
|
30
|
-
}
|
31
|
-
|
32
|
-
|
33
|
-
def scan(
|
34
|
-
task: PluginTask,
|
35
|
-
schema: Schema,
|
36
|
-
output: PageOutput)
|
37
|
-
(implicit client: AmazonDynamoDBClient): Unit =
|
38
|
-
{
|
39
|
-
val allocator: BufferAllocator = task.getBufferAllocator
|
40
|
-
val pageBuilder: PageBuilder = new PageBuilder(allocator, schema, output)
|
41
|
-
|
42
|
-
val attributes: JList[String] = new JArrayList[String]()
|
43
|
-
|
44
|
-
schema.getColumns.asScala.foreach { column =>
|
45
|
-
attributes.add(column.getName)
|
46
|
-
}
|
47
|
-
val scanFilter: JMap[String, Condition] = createScanFilter(task).asJava
|
48
|
-
var evaluateKey: JMap[String, AttributeValue] = null
|
49
|
-
|
50
|
-
val scanLimit: Long = task.getScanLimit
|
51
|
-
val recordLimit: Long = task.getRecordLimit
|
52
|
-
var recordCount: Long = 0
|
53
|
-
|
54
|
-
do {
|
55
|
-
val batchSize = getScanLimit(scanLimit, recordLimit, recordCount)
|
56
|
-
|
57
|
-
val request: ScanRequest = new ScanRequest()
|
58
|
-
.withTableName(task.getTable)
|
59
|
-
.withAttributesToGet(attributes)
|
60
|
-
.withScanFilter(scanFilter)
|
61
|
-
.withExclusiveStartKey(evaluateKey)
|
62
|
-
|
63
|
-
if (batchSize > 0) {
|
64
|
-
request.setLimit(batchSize)
|
65
|
-
}
|
66
|
-
|
67
|
-
val result: ScanResult = client.scan(request)
|
68
|
-
evaluateKey = result.getLastEvaluatedKey
|
69
|
-
|
70
|
-
result.getItems.asScala.foreach { item =>
|
71
|
-
schema.getColumns.asScala.foreach { column =>
|
72
|
-
val value = item.asScala.get(column.getName)
|
73
|
-
column.getType match {
|
74
|
-
case Types.STRING =>
|
75
|
-
convert(column, value, pageBuilder.setString)
|
76
|
-
case Types.LONG =>
|
77
|
-
convert(column, value, pageBuilder.setLong)
|
78
|
-
case Types.DOUBLE =>
|
79
|
-
convert(column, value, pageBuilder.setDouble)
|
80
|
-
case Types.BOOLEAN =>
|
81
|
-
convert(column, value, pageBuilder.setBoolean)
|
82
|
-
case Types.JSON =>
|
83
|
-
convert(column, value, pageBuilder.setJson)
|
84
|
-
case _ => /* Do nothing */
|
85
|
-
}
|
86
|
-
}
|
87
|
-
pageBuilder.addRecord()
|
88
|
-
recordCount += 1
|
89
|
-
}
|
90
|
-
} while(evaluateKey != null && (recordLimit == 0 || recordLimit > recordCount))
|
91
|
-
|
92
|
-
pageBuilder.finish()
|
93
|
-
}
|
94
|
-
|
95
|
-
private def getScanLimit(scanLimit: Long, recordLimit: Long, recordCount: Long): Int = {
|
96
|
-
if (scanLimit > 0 && recordLimit > 0) {
|
97
|
-
math.min(scanLimit, recordLimit - recordCount).toInt
|
98
|
-
} else if (scanLimit > 0 || recordLimit > 0) {
|
99
|
-
math.max(scanLimit, recordLimit).toInt
|
100
|
-
} else { 0 }
|
101
|
-
}
|
102
|
-
|
103
|
-
private def createScanFilter(task: PluginTask): Map[String, Condition] = {
|
104
|
-
val filterMap = collection.mutable.HashMap[String, Condition]()
|
105
|
-
|
106
|
-
Option(task.getFilters.orNull).map { filters =>
|
107
|
-
filters.getFilters.asScala.map { filter =>
|
108
|
-
val attributeValueList = collection.mutable.ArrayBuffer[AttributeValue]()
|
109
|
-
attributeValueList += createAttributeValue(filter.getType, filter.getValue)
|
110
|
-
Option(filter.getValue2).map { value2 =>
|
111
|
-
attributeValueList+= createAttributeValue(filter.getType, value2) }
|
112
|
-
|
113
|
-
filterMap += filter.getName -> new Condition()
|
114
|
-
.withComparisonOperator(filter.getCondition)
|
115
|
-
.withAttributeValueList(attributeValueList.asJava)
|
116
|
-
}
|
117
|
-
}
|
118
|
-
|
119
|
-
filterMap.toMap
|
120
|
-
}
|
121
|
-
|
122
|
-
private def createAttributeValue(t: String, v: String): AttributeValue = {
|
123
|
-
t match {
|
124
|
-
case "string" =>
|
125
|
-
new AttributeValue().withS(v)
|
126
|
-
case "long" | "double" =>
|
127
|
-
new AttributeValue().withN(v)
|
128
|
-
case "boolean" =>
|
129
|
-
new AttributeValue().withBOOL(v.toBoolean)
|
130
|
-
}
|
131
|
-
}
|
132
|
-
|
133
|
-
private def convert[A](column: Column,
|
134
|
-
value: Option[AttributeValue],
|
135
|
-
f: (Column, A) => Unit)(implicit f1: Option[AttributeValue] => A): Unit =
|
136
|
-
f(column, f1(value))
|
137
|
-
|
138
|
-
implicit private def StringConvert(value: Option[AttributeValue]): String =
|
139
|
-
value.map(_.getS).getOrElse("")
|
140
|
-
|
141
|
-
implicit private def LongConvert(value: Option[AttributeValue]): Long =
|
142
|
-
value.map(_.getN.toLong).getOrElse(0L)
|
143
|
-
|
144
|
-
implicit private def DoubleConvert(value: Option[AttributeValue]): Double =
|
145
|
-
value.map(_.getN.toDouble).getOrElse(0D)
|
146
|
-
|
147
|
-
implicit private def BooleanConvert(value: Option[AttributeValue]): Boolean =
|
148
|
-
value.exists(_.getBOOL)
|
149
|
-
|
150
|
-
implicit private def JsonConvert(value: Option[AttributeValue]): Value = {
|
151
|
-
value.map { attr =>
|
152
|
-
AttributeValueHelper.decodeToValue(attr)
|
153
|
-
}.getOrElse(ValueFactory.newNil())
|
154
|
-
}
|
155
|
-
}
|