embulk-input-dynamodb 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 603d3c6513d86bf773182a8214d70cb668fcf2a6
4
- data.tar.gz: 60d9256bb5fac99e56f5424997037323e98f3868
3
+ metadata.gz: 324f33092c5bb362ecf9a804329e56eda0e545be
4
+ data.tar.gz: 00c350b4d52c76adf8291bd6abeee9d9b284eb10
5
5
  SHA512:
6
- metadata.gz: 55d4bdb5960bdf069804360dd0ca9b627bb7cfab0be6bb92c1012b14335019f3c1fd9dd31548fcf9b11b3e22d207efc35b8faf1172b7a5be531d62d39966d61e
7
- data.tar.gz: 9ae148915f273eca767e740b0cd523f7d355b6f793644106320b592695124682769c47df41b643139d91111d9e541b65e692a32d31b9072a04ea1721b1fd5b35
6
+ metadata.gz: 09103d2d4bdbc12d22f51318b7e6be74bba3420d9ce1e0003e78c55b77f242c03e5a74d5f9a95233be08539bb2c0cce8fbb4b8ad151104f5730c3f76f3edfd7d
7
+ data.tar.gz: c7d278c4eac6260264652ce74c6901d416dc3a4ebd287ce1162d16e911628f18430aad3b34205524b7e53e4b10ae32958cbe26872d359124b883bad67f78eb93
data/README.md CHANGED
@@ -24,17 +24,27 @@ Available values options are: `basic`, `env`, `instance`, `profile`, `properties
24
24
  - **region**: Region Name (string, optional)
25
25
  - **end_point**: EndPoint URL (string, optional)
26
26
  `end_point` has priority when `region` and `end_point` are specified.
27
+ - **operation**: Operation Type (string, required)
28
+ Available types are: `scan`, `query`
27
29
  - **table**: Table Name (string, required)
28
- - **scan_limit**: DynamoDB 1time Scan Query size limit (Int, optional)
29
- - **record_limit**: Max Record Search limit (Long, optional)
30
+ - **filters**: Query Filters
31
+ Required to `query` operation. Optional for `scan`.
32
+ - **name**: Column name.
33
+ - **type**: Column type.
34
+ - **condition**: Comparison Operator.
35
+ - **value(s)**: Attribute Value(s).
36
+ - **limit**: DynamoDB 1-time Scan/Query Operation size limit (Int, optional)
37
+ - **scan_limit**: DynamoDB 1-time Scan Query size limit (Deprecated, Int, optional)
38
+ - **record_limit**: Max Record Search limit (Long, optional)
30
39
  - **columns**: a key-value pairs where key is a column name and value is options for the column (required)
31
40
  - **name**: Column name.
32
41
  - **type**: Column values are converted to this embulk type.
33
42
  Available values options are: `boolean`, `long`, `double`, `string`, `json`
34
- - **filters**: query filter (optional)
35
43
 
36
44
  ## Example
37
45
 
46
+ - Scan Operation
47
+
38
48
  ```yaml
39
49
  in:
40
50
  type: dynamodb
@@ -42,6 +52,7 @@ in:
42
52
  access_key: YOUR_ACCESS_KEY
43
53
  secret_key: YOUR_SECRET_KEY
44
54
  region: ap-northeast-1
55
+ operation: scan
45
56
  table: YOUR_TABLE_NAME
46
57
  columns:
47
58
  - {name: ColumnA, type: long}
@@ -57,11 +68,33 @@ out:
57
68
  type: stdout
58
69
  ```
59
70
 
71
+ - Query Operation
72
+
73
+ ```yaml
74
+ in:
75
+ type: dynamodb
76
+ auth_method: env
77
+ region: ap-northeast-1
78
+ operation: query
79
+ table: YOUR_TABLE_NAME
80
+ columns:
81
+ - {name: ColumnA, type: long}
82
+ - {name: ColumnB, type: double}
83
+ - {name: ColumnC, type: string}
84
+ - {name: ColumnD, type: boolean}
85
+ - {name: ColumnE, type: json}
86
+ filters:
87
+ - {name: ColumnA, type: long, condition: EQ, value: 10000}
88
+
89
+ out:
90
+ type: stdout
91
+ ```
92
+
60
93
  ## Try
61
94
 
62
95
  ```
63
96
  $ ./gradlew classpath
64
- $ embulk preview -I lib your-sample.yml
97
+ $ embulk preview -I lib your-config.yml
65
98
  ```
66
99
 
67
100
  ## Build
@@ -14,22 +14,22 @@ configurations {
14
14
  provided
15
15
  }
16
16
 
17
- version = "0.1.1"
17
+ version = "0.2.0"
18
18
 
19
19
  sourceCompatibility = 1.7
20
20
  targetCompatibility = 1.7
21
21
 
22
22
  dependencies {
23
- compile "org.scala-lang:scala-library:2.11.7"
23
+ compile "org.scala-lang:scala-library:2.11.8"
24
24
 
25
- compile "org.embulk:embulk-core:0.8.6"
26
- provided "org.embulk:embulk-core:0.8.6"
25
+ compile "org.embulk:embulk-core:0.8.13"
26
+ provided "org.embulk:embulk-core:0.8.13"
27
27
 
28
28
  compile "com.amazonaws:aws-java-sdk-dynamodb:1.10.43"
29
29
 
30
30
  testCompile "junit:junit:4.+"
31
- testCompile "org.embulk:embulk-standards:0.8.6"
32
- testCompile "org.embulk:embulk-core:0.8.6:tests"
31
+ testCompile "org.embulk:embulk-standards:0.8.13"
32
+ testCompile "org.embulk:embulk-core:0.8.13:tests"
33
33
  }
34
34
 
35
35
  compileScala {
@@ -0,0 +1,23 @@
1
+ package org.embulk.input.dynamodb
2
+
3
+ import com.amazonaws.ClientConfiguration
4
+ import com.amazonaws.regions.Regions
5
+ import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
6
+ import org.embulk.config.ConfigException
7
+
8
+ object DynamoDBClient {
9
+ def create(task: PluginTask): AmazonDynamoDBClient = {
10
+ val client = new AmazonDynamoDBClient(
11
+ AwsCredentials.getCredentialsProvider(task),
12
+ new ClientConfiguration()
13
+ .withMaxConnections(50)) // SDK Default Value
14
+
15
+ if (task.getEndPoint.isPresent) {
16
+ client.withEndpoint(task.getEndPoint.get())
17
+ } else if (task.getRegion.isPresent) {
18
+ client.withRegion(Regions.fromName(task.getRegion.get()))
19
+ } else {
20
+ throw new ConfigException("At least one of EndPoint or Region must be set")
21
+ }
22
+ }
23
+ }
@@ -4,6 +4,7 @@ import java.util.{List => JList}
4
4
 
5
5
  import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
6
6
  import org.embulk.config._
7
+ import org.embulk.input.dynamodb.ope.{QueryOperation, ScanOperation}
7
8
  import org.embulk.spi._
8
9
 
9
10
  class DynamodbInputPlugin extends InputPlugin {
@@ -24,8 +25,13 @@ class DynamodbInputPlugin extends InputPlugin {
24
25
  def run(taskSource: TaskSource, schema: Schema, taskIndex: Int, output: PageOutput): TaskReport = {
25
26
  val task: PluginTask = taskSource.loadTask(classOf[PluginTask])
26
27
 
27
- implicit val client: AmazonDynamoDBClient = DynamoDBUtil.createClient(task)
28
- DynamoDBUtil.scan(task, schema, output)
28
+ val client: AmazonDynamoDBClient = DynamoDBClient.create(task)
29
+
30
+ val ope = task.getOperation.toLowerCase match {
31
+ case "scan" => new ScanOperation(client)
32
+ case "query" => new QueryOperation(client)
33
+ }
34
+ ope.execute(task, schema, output)
29
35
 
30
36
  Exec.newTaskReport()
31
37
  }
@@ -29,6 +29,13 @@ trait PluginTask extends Task {
29
29
  @ConfigDefault("null")
30
30
  def getEndPoint: Optional[String]
31
31
 
32
+ @Config("operation")
33
+ def getOperation: String
34
+
35
+ @Config("limit")
36
+ @ConfigDefault("0")
37
+ def getLimit: Long
38
+
32
39
  @Config("scan_limit")
33
40
  @ConfigDefault("0")
34
41
  def getScanLimit: Long
@@ -0,0 +1,101 @@
1
+ package org.embulk.input.dynamodb.ope
2
+
3
+ import com.amazonaws.services.dynamodbv2.model.{AttributeValue, Condition}
4
+ import org.embulk.input.dynamodb.{AttributeValueHelper, PluginTask}
5
+ import org.embulk.spi._
6
+ import org.embulk.spi.`type`.Types
7
+ import org.msgpack.value.{Value, ValueFactory}
8
+
9
+ import scala.collection.JavaConverters._
10
+
11
+ abstract class AbstractOperation {
12
+ def execute(task: PluginTask, schema: Schema, output: PageOutput): Unit
13
+
14
+ def getLimit(limit: Long, recordLimit: Long, recordCount: Long): Int = {
15
+ if (limit > 0 && recordLimit > 0) {
16
+ math.min(limit, recordLimit - recordCount).toInt
17
+ } else if (limit > 0 || recordLimit > 0) {
18
+ math.max(limit, recordLimit).toInt
19
+ } else { 0 }
20
+ }
21
+
22
+ def createFilters(task: PluginTask): Map[String, Condition] = {
23
+ val filterMap = collection.mutable.HashMap[String, Condition]()
24
+
25
+ Option(task.getFilters.orNull).map { filters =>
26
+ filters.getFilters.asScala.map { filter =>
27
+ val attributeValueList = collection.mutable.ArrayBuffer[AttributeValue]()
28
+ attributeValueList += createAttributeValue(filter.getType, filter.getValue)
29
+ Option(filter.getValue2).map { value2 =>
30
+ attributeValueList+= createAttributeValue(filter.getType, value2) }
31
+
32
+ filterMap += filter.getName -> new Condition()
33
+ .withComparisonOperator(filter.getCondition)
34
+ .withAttributeValueList(attributeValueList.asJava)
35
+ }
36
+ }
37
+
38
+ filterMap.toMap
39
+ }
40
+
41
+ def createAttributeValue(t: String, v: String): AttributeValue = {
42
+ t match {
43
+ case "string" =>
44
+ new AttributeValue().withS(v)
45
+ case "long" | "double" =>
46
+ new AttributeValue().withN(v)
47
+ case "boolean" =>
48
+ new AttributeValue().withBOOL(v.toBoolean)
49
+ }
50
+ }
51
+
52
+ def write(pageBuilder: PageBuilder, schema: Schema, items: Seq[Map[String, AttributeValue]]): Long = {
53
+ var count = 0
54
+
55
+ items.foreach { item =>
56
+ schema.getColumns.asScala.foreach { column =>
57
+ val value = item.get(column.getName)
58
+ column.getType match {
59
+ case Types.STRING =>
60
+ convert(column, value, pageBuilder.setString)
61
+ case Types.LONG =>
62
+ convert(column, value, pageBuilder.setLong)
63
+ case Types.DOUBLE =>
64
+ convert(column, value, pageBuilder.setDouble)
65
+ case Types.BOOLEAN =>
66
+ convert(column, value, pageBuilder.setBoolean)
67
+ case Types.JSON =>
68
+ convert(column, value, pageBuilder.setJson)
69
+ case _ => /* Do nothing */
70
+ }
71
+ }
72
+ pageBuilder.addRecord()
73
+ count += 1
74
+ }
75
+
76
+ count
77
+ }
78
+
79
+ def convert[A](column: Column,
80
+ value: Option[AttributeValue],
81
+ f: (Column, A) => Unit)(implicit f1: Option[AttributeValue] => A): Unit =
82
+ f(column, f1(value))
83
+
84
+ implicit def StringConvert(value: Option[AttributeValue]): String =
85
+ value.map(_.getS).getOrElse("")
86
+
87
+ implicit def LongConvert(value: Option[AttributeValue]): Long =
88
+ value.map(_.getN.toLong).getOrElse(0L)
89
+
90
+ implicit def DoubleConvert(value: Option[AttributeValue]): Double =
91
+ value.map(_.getN.toDouble).getOrElse(0D)
92
+
93
+ implicit def BooleanConvert(value: Option[AttributeValue]): Boolean =
94
+ value.exists(_.getBOOL)
95
+
96
+ implicit def JsonConvert(value: Option[AttributeValue]): Value = {
97
+ value.map { attr =>
98
+ AttributeValueHelper.decodeToValue(attr)
99
+ }.getOrElse(ValueFactory.newNil())
100
+ }
101
+ }
@@ -0,0 +1,50 @@
1
+ package org.embulk.input.dynamodb.ope
2
+
3
+ import java.util.{List => JList, Map => JMap}
4
+
5
+ import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
6
+ import com.amazonaws.services.dynamodbv2.model.{AttributeValue, Condition, QueryRequest, QueryResult}
7
+ import org.embulk.input.dynamodb.PluginTask
8
+ import org.embulk.spi.{BufferAllocator, PageBuilder, PageOutput, Schema}
9
+
10
+ import scala.collection.JavaConverters._
11
+
12
+ class QueryOperation(client: AmazonDynamoDBClient) extends AbstractOperation {
13
+ override def execute(task: PluginTask,
14
+ schema: Schema,
15
+ output: PageOutput): Unit =
16
+ {
17
+ val allocator: BufferAllocator = task.getBufferAllocator
18
+ val pageBuilder: PageBuilder = new PageBuilder(allocator, schema, output)
19
+
20
+ val attributes: JList[String] = schema.getColumns.asScala.map(_.getName).asJava
21
+ val conditions: JMap[String, Condition] = createFilters(task).asJava
22
+ var evaluateKey: JMap[String, AttributeValue] = null
23
+
24
+ val limit: Long = math.max(task.getScanLimit, task.getLimit)
25
+ val recordLimit: Long = task.getRecordLimit
26
+ var recordCount: Long = 0
27
+
28
+ do {
29
+ val batchSize = getLimit(limit, recordLimit, recordCount)
30
+
31
+ val request: QueryRequest = new QueryRequest()
32
+ .withTableName(task.getTable)
33
+ .withAttributesToGet(attributes)
34
+ .withKeyConditions(conditions)
35
+ .withExclusiveStartKey(evaluateKey)
36
+
37
+ if (batchSize > 0) {
38
+ request.setLimit(batchSize)
39
+ }
40
+
41
+ val result: QueryResult = client.query(request)
42
+ evaluateKey = result.getLastEvaluatedKey
43
+
44
+ val items = result.getItems.asScala.map(_.asScala.toMap)
45
+ recordCount += write(pageBuilder, schema, items)
46
+ } while(evaluateKey != null && (recordLimit == 0 || recordLimit > recordCount))
47
+
48
+ pageBuilder.finish()
49
+ }
50
+ }
@@ -0,0 +1,51 @@
1
+ package org.embulk.input.dynamodb.ope
2
+
3
+ import java.util.{List => JList, Map => JMap}
4
+
5
+ import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
6
+ import com.amazonaws.services.dynamodbv2.model.{AttributeValue, Condition, ScanRequest, ScanResult}
7
+ import org.embulk.input.dynamodb.PluginTask
8
+ import org.embulk.spi.{BufferAllocator, PageBuilder, PageOutput, Schema}
9
+
10
+ import scala.collection.JavaConverters._
11
+
12
+ class ScanOperation(client: AmazonDynamoDBClient) extends AbstractOperation {
13
+ override def execute(
14
+ task: PluginTask,
15
+ schema: Schema,
16
+ output: PageOutput): Unit =
17
+ {
18
+ val allocator: BufferAllocator = task.getBufferAllocator
19
+ val pageBuilder: PageBuilder = new PageBuilder(allocator, schema, output)
20
+
21
+ val attributes: JList[String] = schema.getColumns.asScala.map(_.getName).asJava
22
+ val scanFilter: JMap[String, Condition] = createFilters(task).asJava
23
+ var evaluateKey: JMap[String, AttributeValue] = null
24
+
25
+ val scanLimit: Long = task.getScanLimit
26
+ val recordLimit: Long = task.getRecordLimit
27
+ var recordCount: Long = 0
28
+
29
+ do {
30
+ val batchSize = getLimit(scanLimit, recordLimit, recordCount)
31
+
32
+ val request: ScanRequest = new ScanRequest()
33
+ .withTableName(task.getTable)
34
+ .withAttributesToGet(attributes)
35
+ .withScanFilter(scanFilter)
36
+ .withExclusiveStartKey(evaluateKey)
37
+
38
+ if (batchSize > 0) {
39
+ request.setLimit(batchSize)
40
+ }
41
+
42
+ val result: ScanResult = client.scan(request)
43
+ evaluateKey = result.getLastEvaluatedKey
44
+
45
+ val items = result.getItems.asScala.map(_.asScala.toMap)
46
+ recordCount += write(pageBuilder, schema, items)
47
+ } while(evaluateKey != null && (recordLimit == 0 || recordLimit > recordCount))
48
+
49
+ pageBuilder.finish()
50
+ }
51
+ }
@@ -1,6 +1,7 @@
1
1
  in:
2
2
  type: dynamodb
3
3
  region: ENV_VAR
4
+ operation: scan
4
5
  table: ENV_VAR
5
6
  auth_method: basic
6
7
  access_key: ENV_VAR
@@ -1,6 +1,7 @@
1
1
  in:
2
2
  type: dynamodb
3
3
  region: ENV_VAR
4
+ operation: scan
4
5
  table: ENV_VAR
5
6
  auth_method: basic
6
7
  columns:
@@ -1,6 +1,7 @@
1
1
  in:
2
2
  type: dynamodb
3
3
  region: ENV_VAR
4
+ operation: scan
4
5
  table: ENV_VAR
5
6
  auth_method: env
6
7
  columns:
@@ -1,6 +1,7 @@
1
1
  in:
2
2
  type: dynamodb
3
3
  region: ENV_VAR
4
+ operation: scan
4
5
  table: ENV_VAR
5
6
  auth_method: profile
6
7
  profile_name: ENV_VAR
@@ -0,0 +1,25 @@
1
+ in:
2
+ type: dynamodb
3
+ end_point: http://localhost:8000/
4
+ table: ENV_VAR
5
+ auth_method: basic
6
+ access_key: dummy
7
+ secret_key: dummy
8
+ filters:
9
+ - {name: pri-key, type: string, condition: EQ, value: key-1}
10
+ columns:
11
+ - {name: pri-key, type: string}
12
+ - {name: sort-key, type: long}
13
+ - {name: doubleValue, type: double}
14
+ - {name: boolValue, type: boolean}
15
+ - {name: listValue, type: json}
16
+ - {name: mapValue, type: json}
17
+
18
+ out:
19
+ type: file
20
+ path_prefix: dynamodb-local-result
21
+ file_ext: tsv
22
+ formatter:
23
+ type: csv
24
+ delimiter: "\t"
25
+ header_line: false
@@ -1,6 +1,7 @@
1
1
  in:
2
2
  type: dynamodb
3
3
  region: ENV_VAR
4
+ operation: scan
4
5
  table: ENV_VAR
5
6
  access_key: ENV_VAR
6
7
  secret_key: ENV_VAR
@@ -0,0 +1,83 @@
1
+ package org.embulk.input.dynamodb.ope
2
+
3
+ import java.io.File
4
+ import java.nio.charset.Charset
5
+ import java.nio.file.{FileSystems, Files}
6
+
7
+ import com.fasterxml.jackson.databind.ObjectMapper
8
+ import com.google.inject.{Binder, Module}
9
+ import org.embulk.EmbulkEmbed
10
+ import org.embulk.config.ConfigSource
11
+ import org.embulk.input.dynamodb.DynamodbInputPlugin
12
+ import org.embulk.plugin.InjectedPluginSource
13
+ import org.embulk.spi.InputPlugin
14
+ import org.hamcrest.CoreMatchers._
15
+ import org.junit.Assert._
16
+ import org.junit.{Before, Test}
17
+
18
+ class QueryOperationTest {
19
+ private var embulk: EmbulkEmbed = null
20
+
21
+ private var EMBULK_DYNAMODB_TEST_TABLE: String = null
22
+ private var mapper: ObjectMapper = null
23
+
24
+ @Before
25
+ def createResources() {
26
+ // Get Environments
27
+ EMBULK_DYNAMODB_TEST_TABLE = System.getenv("EMBULK_DYNAMODB_TEST_TABLE")
28
+
29
+ val bootstrap = new EmbulkEmbed.Bootstrap()
30
+ bootstrap.addModules(new Module {
31
+ def configure(binder: Binder): Unit = {
32
+ InjectedPluginSource.registerPluginTo(binder,
33
+ classOf[InputPlugin],
34
+ "dynamodb",
35
+ classOf[DynamodbInputPlugin])
36
+ }
37
+ })
38
+
39
+ embulk = bootstrap.initializeCloseable()
40
+
41
+ mapper = new ObjectMapper()
42
+ }
43
+
44
+
45
+ def doTest(config: ConfigSource) {
46
+ embulk.run(config)
47
+
48
+ val fs = FileSystems.getDefault
49
+ val lines = Files.readAllLines(fs.getPath("dynamodb-local-result000.00.tsv"), Charset.forName("UTF-8"))
50
+ assertEquals(lines.size, 1)
51
+
52
+ val head = lines.get(0)
53
+ val values = head.split("\t")
54
+
55
+ assertThat(values(0), is("key-1"))
56
+ assertThat(values(1), is("0"))
57
+ assertThat(values(2), is("42.195"))
58
+ assertThat(values(3), is("true"))
59
+
60
+ val listValue = mapper.readValue(values(4).replaceAll("\"(?!\")", ""), classOf[java.util.List[Object]])
61
+ assertThat(listValue.size(), is(2))
62
+ assertThat(listValue.get(0).asInstanceOf[String], is("list-value"))
63
+ assertThat(listValue.get(1).asInstanceOf[Int], is(123))
64
+
65
+ val mapValue = mapper.readValue(values(5).replaceAll("\"(?!\")", ""), classOf[java.util.Map[String, Object]])
66
+ assert(mapValue.containsKey("map-key-1"))
67
+ assertThat(mapValue.get("map-key-1").asInstanceOf[String], is("map-value-1"))
68
+ assert(mapValue.containsKey("map-key-2"))
69
+ assertThat(mapValue.get("map-key-2").asInstanceOf[Int], is(456))
70
+ }
71
+
72
+ @Test
73
+ def queryTest() {
74
+ val config = embulk.newConfigLoader().fromYamlFile(
75
+ new File("src/test/resources/yaml/dynamodb-local-query.yml"))
76
+
77
+ config.getNested("in")
78
+ .set("operation", "query")
79
+ .set("table", EMBULK_DYNAMODB_TEST_TABLE)
80
+
81
+ doTest(config)
82
+ }
83
+ }
@@ -1,4 +1,4 @@
1
- package org.embulk.input.dynamodb
1
+ package org.embulk.input.dynamodb.ope
2
2
 
3
3
  import java.io.File
4
4
  import java.nio.charset.Charset
@@ -8,13 +8,14 @@ import com.fasterxml.jackson.databind.ObjectMapper
8
8
  import com.google.inject.{Binder, Module}
9
9
  import org.embulk.EmbulkEmbed
10
10
  import org.embulk.config.ConfigSource
11
+ import org.embulk.input.dynamodb.DynamodbInputPlugin
11
12
  import org.embulk.plugin.InjectedPluginSource
12
13
  import org.embulk.spi.InputPlugin
13
14
  import org.hamcrest.CoreMatchers._
14
15
  import org.junit.Assert._
15
16
  import org.junit.{Before, Test}
16
17
 
17
- class DynamoDBUtilTest {
18
+ class ScanOperationTest {
18
19
  private var embulk: EmbulkEmbed = null
19
20
 
20
21
  private var EMBULK_DYNAMODB_TEST_TABLE: String = null
@@ -71,9 +72,10 @@ class DynamoDBUtilTest {
71
72
  @Test
72
73
  def scanTest() {
73
74
  val config = embulk.newConfigLoader().fromYamlFile(
74
- new File("src/test/resources/yaml/dynamodb-local.yml"))
75
+ new File("src/test/resources/yaml/dynamodb-local-scan.yml"))
75
76
 
76
77
  config.getNested("in")
78
+ .set("operation", "scan")
77
79
  .set("table", EMBULK_DYNAMODB_TEST_TABLE)
78
80
 
79
81
  doTest(config)
@@ -1,7 +1,7 @@
1
1
  #!/bin/sh
2
2
 
3
- docker run -d \
3
+ docker run -i -t -d \
4
4
  -p 8000:8000 \
5
- -v $PWD/dynamodb-local:/data \
6
- --env DYNAMO_OPT='-dbPath /data -sharedDb' \
7
- lulichn/dynamodb-local
5
+ tray/dynamodb-local \
6
+ -inMemory -sharedDb -port 8000
7
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-dynamodb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daisuke Higashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-06 00:00:00.000000000 Z
11
+ date: 2016-10-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -59,11 +59,14 @@ files:
59
59
  - lib/embulk/input/dynamodb.rb
60
60
  - src/main/scala/org/embulk/input/dynamodb/AttributeValueHelper.scala
61
61
  - src/main/scala/org/embulk/input/dynamodb/AwsCredentials.scala
62
- - src/main/scala/org/embulk/input/dynamodb/DynamoDBUtil.scala
62
+ - src/main/scala/org/embulk/input/dynamodb/DynamoDBClient.scala
63
63
  - src/main/scala/org/embulk/input/dynamodb/DynamodbInputPlugin.scala
64
64
  - src/main/scala/org/embulk/input/dynamodb/Filter.scala
65
65
  - src/main/scala/org/embulk/input/dynamodb/FilterConfig.scala
66
66
  - src/main/scala/org/embulk/input/dynamodb/PluginTask.scala
67
+ - src/main/scala/org/embulk/input/dynamodb/ope/AbstractOperation.scala
68
+ - src/main/scala/org/embulk/input/dynamodb/ope/QueryOperation.scala
69
+ - src/main/scala/org/embulk/input/dynamodb/ope/ScanOperation.scala
67
70
  - src/main/scala/org/embulk/input/dynamodb/package.scala
68
71
  - src/test/resources/json/test.json
69
72
  - src/test/resources/json/test.template
@@ -71,24 +74,26 @@ files:
71
74
  - src/test/resources/yaml/authMethodBasic_Error.yml
72
75
  - src/test/resources/yaml/authMethodEnv.yml
73
76
  - src/test/resources/yaml/authMethodProfile.yml
74
- - src/test/resources/yaml/dynamodb-local.yml
77
+ - src/test/resources/yaml/dynamodb-local-query.yml
78
+ - src/test/resources/yaml/dynamodb-local-scan.yml
75
79
  - src/test/resources/yaml/notSetAuthMethod.yml
76
80
  - src/test/scala/org/embulk/input/dynamodb/AttributeValueHelperTest.scala
77
81
  - src/test/scala/org/embulk/input/dynamodb/AwsCredentialsTest.scala
78
- - src/test/scala/org/embulk/input/dynamodb/DynamoDBUtilTest.scala
82
+ - src/test/scala/org/embulk/input/dynamodb/ope/QueryOperationTest.scala
83
+ - src/test/scala/org/embulk/input/dynamodb/ope/ScanOperationTest.scala
79
84
  - test/create_table.sh
80
85
  - test/put_items.sh
81
86
  - test/run_dynamodb_local.sh
82
- - classpath/commons-logging-1.1.3.jar
83
- - classpath/commons-codec-1.6.jar
84
- - classpath/scala-library-2.11.7.jar
87
+ - classpath/aws-java-sdk-core-1.10.43.jar
85
88
  - classpath/aws-java-sdk-dynamodb-1.10.43.jar
86
- - classpath/embulk-input-dynamodb-0.1.1.jar
87
89
  - classpath/aws-java-sdk-kms-1.10.43.jar
88
- - classpath/httpcore-4.3.3.jar
89
- - classpath/httpclient-4.3.6.jar
90
- - classpath/aws-java-sdk-core-1.10.43.jar
91
90
  - classpath/aws-java-sdk-s3-1.10.43.jar
91
+ - classpath/commons-codec-1.6.jar
92
+ - classpath/commons-logging-1.1.3.jar
93
+ - classpath/embulk-input-dynamodb-0.2.0.jar
94
+ - classpath/httpclient-4.3.6.jar
95
+ - classpath/httpcore-4.3.3.jar
96
+ - classpath/scala-library-2.11.8.jar
92
97
  homepage: https://github.com/lulichn/embulk-input-dynamodb
93
98
  licenses:
94
99
  - MIT
@@ -1,155 +0,0 @@
1
- package org.embulk.input.dynamodb
2
-
3
- import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
4
-
5
- import com.amazonaws.ClientConfiguration
6
- import com.amazonaws.regions.Regions
7
- import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
8
- import com.amazonaws.services.dynamodbv2.model.{AttributeValue, Condition, ScanRequest, ScanResult}
9
- import org.embulk.config.ConfigException
10
- import org.embulk.spi._
11
- import org.embulk.spi.`type`.Types
12
- import org.msgpack.value.{Value, ValueFactory}
13
-
14
- import scala.collection.JavaConverters._
15
-
16
- object DynamoDBUtil {
17
- def createClient(task: PluginTask): AmazonDynamoDBClient = {
18
- val client = new AmazonDynamoDBClient(
19
- AwsCredentials.getCredentialsProvider(task),
20
- new ClientConfiguration()
21
- .withMaxConnections(50)) // SDK Default Value
22
-
23
- if (task.getEndPoint.isPresent) {
24
- client.withEndpoint(task.getEndPoint.get())
25
- } else if (task.getRegion.isPresent) {
26
- client.withRegion(Regions.fromName(task.getRegion.get()))
27
- } else {
28
- throw new ConfigException("At least one of EndPoint or Region must be set")
29
- }
30
- }
31
-
32
-
33
- def scan(
34
- task: PluginTask,
35
- schema: Schema,
36
- output: PageOutput)
37
- (implicit client: AmazonDynamoDBClient): Unit =
38
- {
39
- val allocator: BufferAllocator = task.getBufferAllocator
40
- val pageBuilder: PageBuilder = new PageBuilder(allocator, schema, output)
41
-
42
- val attributes: JList[String] = new JArrayList[String]()
43
-
44
- schema.getColumns.asScala.foreach { column =>
45
- attributes.add(column.getName)
46
- }
47
- val scanFilter: JMap[String, Condition] = createScanFilter(task).asJava
48
- var evaluateKey: JMap[String, AttributeValue] = null
49
-
50
- val scanLimit: Long = task.getScanLimit
51
- val recordLimit: Long = task.getRecordLimit
52
- var recordCount: Long = 0
53
-
54
- do {
55
- val batchSize = getScanLimit(scanLimit, recordLimit, recordCount)
56
-
57
- val request: ScanRequest = new ScanRequest()
58
- .withTableName(task.getTable)
59
- .withAttributesToGet(attributes)
60
- .withScanFilter(scanFilter)
61
- .withExclusiveStartKey(evaluateKey)
62
-
63
- if (batchSize > 0) {
64
- request.setLimit(batchSize)
65
- }
66
-
67
- val result: ScanResult = client.scan(request)
68
- evaluateKey = result.getLastEvaluatedKey
69
-
70
- result.getItems.asScala.foreach { item =>
71
- schema.getColumns.asScala.foreach { column =>
72
- val value = item.asScala.get(column.getName)
73
- column.getType match {
74
- case Types.STRING =>
75
- convert(column, value, pageBuilder.setString)
76
- case Types.LONG =>
77
- convert(column, value, pageBuilder.setLong)
78
- case Types.DOUBLE =>
79
- convert(column, value, pageBuilder.setDouble)
80
- case Types.BOOLEAN =>
81
- convert(column, value, pageBuilder.setBoolean)
82
- case Types.JSON =>
83
- convert(column, value, pageBuilder.setJson)
84
- case _ => /* Do nothing */
85
- }
86
- }
87
- pageBuilder.addRecord()
88
- recordCount += 1
89
- }
90
- } while(evaluateKey != null && (recordLimit == 0 || recordLimit > recordCount))
91
-
92
- pageBuilder.finish()
93
- }
94
-
95
- private def getScanLimit(scanLimit: Long, recordLimit: Long, recordCount: Long): Int = {
96
- if (scanLimit > 0 && recordLimit > 0) {
97
- math.min(scanLimit, recordLimit - recordCount).toInt
98
- } else if (scanLimit > 0 || recordLimit > 0) {
99
- math.max(scanLimit, recordLimit).toInt
100
- } else { 0 }
101
- }
102
-
103
- private def createScanFilter(task: PluginTask): Map[String, Condition] = {
104
- val filterMap = collection.mutable.HashMap[String, Condition]()
105
-
106
- Option(task.getFilters.orNull).map { filters =>
107
- filters.getFilters.asScala.map { filter =>
108
- val attributeValueList = collection.mutable.ArrayBuffer[AttributeValue]()
109
- attributeValueList += createAttributeValue(filter.getType, filter.getValue)
110
- Option(filter.getValue2).map { value2 =>
111
- attributeValueList+= createAttributeValue(filter.getType, value2) }
112
-
113
- filterMap += filter.getName -> new Condition()
114
- .withComparisonOperator(filter.getCondition)
115
- .withAttributeValueList(attributeValueList.asJava)
116
- }
117
- }
118
-
119
- filterMap.toMap
120
- }
121
-
122
- private def createAttributeValue(t: String, v: String): AttributeValue = {
123
- t match {
124
- case "string" =>
125
- new AttributeValue().withS(v)
126
- case "long" | "double" =>
127
- new AttributeValue().withN(v)
128
- case "boolean" =>
129
- new AttributeValue().withBOOL(v.toBoolean)
130
- }
131
- }
132
-
133
- private def convert[A](column: Column,
134
- value: Option[AttributeValue],
135
- f: (Column, A) => Unit)(implicit f1: Option[AttributeValue] => A): Unit =
136
- f(column, f1(value))
137
-
138
- implicit private def StringConvert(value: Option[AttributeValue]): String =
139
- value.map(_.getS).getOrElse("")
140
-
141
- implicit private def LongConvert(value: Option[AttributeValue]): Long =
142
- value.map(_.getN.toLong).getOrElse(0L)
143
-
144
- implicit private def DoubleConvert(value: Option[AttributeValue]): Double =
145
- value.map(_.getN.toDouble).getOrElse(0D)
146
-
147
- implicit private def BooleanConvert(value: Option[AttributeValue]): Boolean =
148
- value.exists(_.getBOOL)
149
-
150
- implicit private def JsonConvert(value: Option[AttributeValue]): Value = {
151
- value.map { attr =>
152
- AttributeValueHelper.decodeToValue(attr)
153
- }.getOrElse(ValueFactory.newNil())
154
- }
155
- }