embulk-input-dynamodb 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 603d3c6513d86bf773182a8214d70cb668fcf2a6
4
- data.tar.gz: 60d9256bb5fac99e56f5424997037323e98f3868
3
+ metadata.gz: 324f33092c5bb362ecf9a804329e56eda0e545be
4
+ data.tar.gz: 00c350b4d52c76adf8291bd6abeee9d9b284eb10
5
5
  SHA512:
6
- metadata.gz: 55d4bdb5960bdf069804360dd0ca9b627bb7cfab0be6bb92c1012b14335019f3c1fd9dd31548fcf9b11b3e22d207efc35b8faf1172b7a5be531d62d39966d61e
7
- data.tar.gz: 9ae148915f273eca767e740b0cd523f7d355b6f793644106320b592695124682769c47df41b643139d91111d9e541b65e692a32d31b9072a04ea1721b1fd5b35
6
+ metadata.gz: 09103d2d4bdbc12d22f51318b7e6be74bba3420d9ce1e0003e78c55b77f242c03e5a74d5f9a95233be08539bb2c0cce8fbb4b8ad151104f5730c3f76f3edfd7d
7
+ data.tar.gz: c7d278c4eac6260264652ce74c6901d416dc3a4ebd287ce1162d16e911628f18430aad3b34205524b7e53e4b10ae32958cbe26872d359124b883bad67f78eb93
data/README.md CHANGED
@@ -24,17 +24,27 @@ Available values options are: `basic`, `env`, `instance`, `profile`, `properties
24
24
  - **region**: Region Name (string, optional)
25
25
  - **end_point**: EndPoint URL (string, optional)
26
26
  `end_point` has priority when `region` and `end_point` are specified.
27
+ - **operation**: Operation Type (string, required)
28
+ Available types are: `scan`, `query`
27
29
  - **table**: Table Name (string, required)
28
- - **scan_limit**: DynamoDB 1time Scan Query size limit (Int, optional)
29
- - **record_limit**: Max Record Search limit (Long, optional)
30
+ - **filters**: Query Filters
31
+ Required to `query` operation. Optional for `scan`.
32
+ - **name**: Column name.
33
+ - **type**: Column type.
34
+ - **condition**: Comparison Operator.
35
+ - **value(s)**: Attribute Value(s).
36
+ - **limit**: DynamoDB 1-time Scan/Query Operation size limit (Int, optional)
37
+ - **scan_limit**: DynamoDB 1-time Scan Query size limit (Deprecated, Int, optional)
38
+ - **record_limit**: Max Record Search limit (Long, optional)
30
39
  - **columns**: a key-value pairs where key is a column name and value is options for the column (required)
31
40
  - **name**: Column name.
32
41
  - **type**: Column values are converted to this embulk type.
33
42
  Available values options are: `boolean`, `long`, `double`, `string`, `json`
34
- - **filters**: query filter (optional)
35
43
 
36
44
  ## Example
37
45
 
46
+ - Scan Operation
47
+
38
48
  ```yaml
39
49
  in:
40
50
  type: dynamodb
@@ -42,6 +52,7 @@ in:
42
52
  access_key: YOUR_ACCESS_KEY
43
53
  secret_key: YOUR_SECRET_KEY
44
54
  region: ap-northeast-1
55
+ operation: scan
45
56
  table: YOUR_TABLE_NAME
46
57
  columns:
47
58
  - {name: ColumnA, type: long}
@@ -57,11 +68,33 @@ out:
57
68
  type: stdout
58
69
  ```
59
70
 
71
+ - Query Operation
72
+
73
+ ```yaml
74
+ in:
75
+ type: dynamodb
76
+ auth_method: env
77
+ region: ap-northeast-1
78
+ operation: query
79
+ table: YOUR_TABLE_NAME
80
+ columns:
81
+ - {name: ColumnA, type: long}
82
+ - {name: ColumnB, type: double}
83
+ - {name: ColumnC, type: string}
84
+ - {name: ColumnD, type: boolean}
85
+ - {name: ColumnE, type: json}
86
+ filters:
87
+ - {name: ColumnA, type: long, condition: EQ, value: 10000}
88
+
89
+ out:
90
+ type: stdout
91
+ ```
92
+
60
93
  ## Try
61
94
 
62
95
  ```
63
96
  $ ./gradlew classpath
64
- $ embulk preview -I lib your-sample.yml
97
+ $ embulk preview -I lib your-config.yml
65
98
  ```
66
99
 
67
100
  ## Build
@@ -14,22 +14,22 @@ configurations {
14
14
  provided
15
15
  }
16
16
 
17
- version = "0.1.1"
17
+ version = "0.2.0"
18
18
 
19
19
  sourceCompatibility = 1.7
20
20
  targetCompatibility = 1.7
21
21
 
22
22
  dependencies {
23
- compile "org.scala-lang:scala-library:2.11.7"
23
+ compile "org.scala-lang:scala-library:2.11.8"
24
24
 
25
- compile "org.embulk:embulk-core:0.8.6"
26
- provided "org.embulk:embulk-core:0.8.6"
25
+ compile "org.embulk:embulk-core:0.8.13"
26
+ provided "org.embulk:embulk-core:0.8.13"
27
27
 
28
28
  compile "com.amazonaws:aws-java-sdk-dynamodb:1.10.43"
29
29
 
30
30
  testCompile "junit:junit:4.+"
31
- testCompile "org.embulk:embulk-standards:0.8.6"
32
- testCompile "org.embulk:embulk-core:0.8.6:tests"
31
+ testCompile "org.embulk:embulk-standards:0.8.13"
32
+ testCompile "org.embulk:embulk-core:0.8.13:tests"
33
33
  }
34
34
 
35
35
  compileScala {
@@ -0,0 +1,23 @@
1
+ package org.embulk.input.dynamodb
2
+
3
+ import com.amazonaws.ClientConfiguration
4
+ import com.amazonaws.regions.Regions
5
+ import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
6
+ import org.embulk.config.ConfigException
7
+
8
+ object DynamoDBClient {
9
+ def create(task: PluginTask): AmazonDynamoDBClient = {
10
+ val client = new AmazonDynamoDBClient(
11
+ AwsCredentials.getCredentialsProvider(task),
12
+ new ClientConfiguration()
13
+ .withMaxConnections(50)) // SDK Default Value
14
+
15
+ if (task.getEndPoint.isPresent) {
16
+ client.withEndpoint(task.getEndPoint.get())
17
+ } else if (task.getRegion.isPresent) {
18
+ client.withRegion(Regions.fromName(task.getRegion.get()))
19
+ } else {
20
+ throw new ConfigException("At least one of EndPoint or Region must be set")
21
+ }
22
+ }
23
+ }
@@ -4,6 +4,7 @@ import java.util.{List => JList}
4
4
 
5
5
  import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
6
6
  import org.embulk.config._
7
+ import org.embulk.input.dynamodb.ope.{QueryOperation, ScanOperation}
7
8
  import org.embulk.spi._
8
9
 
9
10
  class DynamodbInputPlugin extends InputPlugin {
@@ -24,8 +25,13 @@ class DynamodbInputPlugin extends InputPlugin {
24
25
  def run(taskSource: TaskSource, schema: Schema, taskIndex: Int, output: PageOutput): TaskReport = {
25
26
  val task: PluginTask = taskSource.loadTask(classOf[PluginTask])
26
27
 
27
- implicit val client: AmazonDynamoDBClient = DynamoDBUtil.createClient(task)
28
- DynamoDBUtil.scan(task, schema, output)
28
+ val client: AmazonDynamoDBClient = DynamoDBClient.create(task)
29
+
30
+ val ope = task.getOperation.toLowerCase match {
31
+ case "scan" => new ScanOperation(client)
32
+ case "query" => new QueryOperation(client)
33
+ }
34
+ ope.execute(task, schema, output)
29
35
 
30
36
  Exec.newTaskReport()
31
37
  }
@@ -29,6 +29,13 @@ trait PluginTask extends Task {
29
29
  @ConfigDefault("null")
30
30
  def getEndPoint: Optional[String]
31
31
 
32
+ @Config("operation")
33
+ def getOperation: String
34
+
35
+ @Config("limit")
36
+ @ConfigDefault("0")
37
+ def getLimit: Long
38
+
32
39
  @Config("scan_limit")
33
40
  @ConfigDefault("0")
34
41
  def getScanLimit: Long
@@ -0,0 +1,101 @@
1
+ package org.embulk.input.dynamodb.ope
2
+
3
+ import com.amazonaws.services.dynamodbv2.model.{AttributeValue, Condition}
4
+ import org.embulk.input.dynamodb.{AttributeValueHelper, PluginTask}
5
+ import org.embulk.spi._
6
+ import org.embulk.spi.`type`.Types
7
+ import org.msgpack.value.{Value, ValueFactory}
8
+
9
+ import scala.collection.JavaConverters._
10
+
11
+ abstract class AbstractOperation {
12
+ def execute(task: PluginTask, schema: Schema, output: PageOutput): Unit
13
+
14
+ def getLimit(limit: Long, recordLimit: Long, recordCount: Long): Int = {
15
+ if (limit > 0 && recordLimit > 0) {
16
+ math.min(limit, recordLimit - recordCount).toInt
17
+ } else if (limit > 0 || recordLimit > 0) {
18
+ math.max(limit, recordLimit).toInt
19
+ } else { 0 }
20
+ }
21
+
22
+ def createFilters(task: PluginTask): Map[String, Condition] = {
23
+ val filterMap = collection.mutable.HashMap[String, Condition]()
24
+
25
+ Option(task.getFilters.orNull).map { filters =>
26
+ filters.getFilters.asScala.map { filter =>
27
+ val attributeValueList = collection.mutable.ArrayBuffer[AttributeValue]()
28
+ attributeValueList += createAttributeValue(filter.getType, filter.getValue)
29
+ Option(filter.getValue2).map { value2 =>
30
+ attributeValueList+= createAttributeValue(filter.getType, value2) }
31
+
32
+ filterMap += filter.getName -> new Condition()
33
+ .withComparisonOperator(filter.getCondition)
34
+ .withAttributeValueList(attributeValueList.asJava)
35
+ }
36
+ }
37
+
38
+ filterMap.toMap
39
+ }
40
+
41
+ def createAttributeValue(t: String, v: String): AttributeValue = {
42
+ t match {
43
+ case "string" =>
44
+ new AttributeValue().withS(v)
45
+ case "long" | "double" =>
46
+ new AttributeValue().withN(v)
47
+ case "boolean" =>
48
+ new AttributeValue().withBOOL(v.toBoolean)
49
+ }
50
+ }
51
+
52
+ def write(pageBuilder: PageBuilder, schema: Schema, items: Seq[Map[String, AttributeValue]]): Long = {
53
+ var count = 0
54
+
55
+ items.foreach { item =>
56
+ schema.getColumns.asScala.foreach { column =>
57
+ val value = item.get(column.getName)
58
+ column.getType match {
59
+ case Types.STRING =>
60
+ convert(column, value, pageBuilder.setString)
61
+ case Types.LONG =>
62
+ convert(column, value, pageBuilder.setLong)
63
+ case Types.DOUBLE =>
64
+ convert(column, value, pageBuilder.setDouble)
65
+ case Types.BOOLEAN =>
66
+ convert(column, value, pageBuilder.setBoolean)
67
+ case Types.JSON =>
68
+ convert(column, value, pageBuilder.setJson)
69
+ case _ => /* Do nothing */
70
+ }
71
+ }
72
+ pageBuilder.addRecord()
73
+ count += 1
74
+ }
75
+
76
+ count
77
+ }
78
+
79
+ def convert[A](column: Column,
80
+ value: Option[AttributeValue],
81
+ f: (Column, A) => Unit)(implicit f1: Option[AttributeValue] => A): Unit =
82
+ f(column, f1(value))
83
+
84
+ implicit def StringConvert(value: Option[AttributeValue]): String =
85
+ value.map(_.getS).getOrElse("")
86
+
87
+ implicit def LongConvert(value: Option[AttributeValue]): Long =
88
+ value.map(_.getN.toLong).getOrElse(0L)
89
+
90
+ implicit def DoubleConvert(value: Option[AttributeValue]): Double =
91
+ value.map(_.getN.toDouble).getOrElse(0D)
92
+
93
+ implicit def BooleanConvert(value: Option[AttributeValue]): Boolean =
94
+ value.exists(_.getBOOL)
95
+
96
+ implicit def JsonConvert(value: Option[AttributeValue]): Value = {
97
+ value.map { attr =>
98
+ AttributeValueHelper.decodeToValue(attr)
99
+ }.getOrElse(ValueFactory.newNil())
100
+ }
101
+ }
@@ -0,0 +1,50 @@
1
+ package org.embulk.input.dynamodb.ope
2
+
3
+ import java.util.{List => JList, Map => JMap}
4
+
5
+ import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
6
+ import com.amazonaws.services.dynamodbv2.model.{AttributeValue, Condition, QueryRequest, QueryResult}
7
+ import org.embulk.input.dynamodb.PluginTask
8
+ import org.embulk.spi.{BufferAllocator, PageBuilder, PageOutput, Schema}
9
+
10
+ import scala.collection.JavaConverters._
11
+
12
+ class QueryOperation(client: AmazonDynamoDBClient) extends AbstractOperation {
13
+ override def execute(task: PluginTask,
14
+ schema: Schema,
15
+ output: PageOutput): Unit =
16
+ {
17
+ val allocator: BufferAllocator = task.getBufferAllocator
18
+ val pageBuilder: PageBuilder = new PageBuilder(allocator, schema, output)
19
+
20
+ val attributes: JList[String] = schema.getColumns.asScala.map(_.getName).asJava
21
+ val conditions: JMap[String, Condition] = createFilters(task).asJava
22
+ var evaluateKey: JMap[String, AttributeValue] = null
23
+
24
+ val limit: Long = math.max(task.getScanLimit, task.getLimit)
25
+ val recordLimit: Long = task.getRecordLimit
26
+ var recordCount: Long = 0
27
+
28
+ do {
29
+ val batchSize = getLimit(limit, recordLimit, recordCount)
30
+
31
+ val request: QueryRequest = new QueryRequest()
32
+ .withTableName(task.getTable)
33
+ .withAttributesToGet(attributes)
34
+ .withKeyConditions(conditions)
35
+ .withExclusiveStartKey(evaluateKey)
36
+
37
+ if (batchSize > 0) {
38
+ request.setLimit(batchSize)
39
+ }
40
+
41
+ val result: QueryResult = client.query(request)
42
+ evaluateKey = result.getLastEvaluatedKey
43
+
44
+ val items = result.getItems.asScala.map(_.asScala.toMap)
45
+ recordCount += write(pageBuilder, schema, items)
46
+ } while(evaluateKey != null && (recordLimit == 0 || recordLimit > recordCount))
47
+
48
+ pageBuilder.finish()
49
+ }
50
+ }
@@ -0,0 +1,51 @@
1
+ package org.embulk.input.dynamodb.ope
2
+
3
+ import java.util.{List => JList, Map => JMap}
4
+
5
+ import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
6
+ import com.amazonaws.services.dynamodbv2.model.{AttributeValue, Condition, ScanRequest, ScanResult}
7
+ import org.embulk.input.dynamodb.PluginTask
8
+ import org.embulk.spi.{BufferAllocator, PageBuilder, PageOutput, Schema}
9
+
10
+ import scala.collection.JavaConverters._
11
+
12
+ class ScanOperation(client: AmazonDynamoDBClient) extends AbstractOperation {
13
+ override def execute(
14
+ task: PluginTask,
15
+ schema: Schema,
16
+ output: PageOutput): Unit =
17
+ {
18
+ val allocator: BufferAllocator = task.getBufferAllocator
19
+ val pageBuilder: PageBuilder = new PageBuilder(allocator, schema, output)
20
+
21
+ val attributes: JList[String] = schema.getColumns.asScala.map(_.getName).asJava
22
+ val scanFilter: JMap[String, Condition] = createFilters(task).asJava
23
+ var evaluateKey: JMap[String, AttributeValue] = null
24
+
25
+ val scanLimit: Long = task.getScanLimit
26
+ val recordLimit: Long = task.getRecordLimit
27
+ var recordCount: Long = 0
28
+
29
+ do {
30
+ val batchSize = getLimit(scanLimit, recordLimit, recordCount)
31
+
32
+ val request: ScanRequest = new ScanRequest()
33
+ .withTableName(task.getTable)
34
+ .withAttributesToGet(attributes)
35
+ .withScanFilter(scanFilter)
36
+ .withExclusiveStartKey(evaluateKey)
37
+
38
+ if (batchSize > 0) {
39
+ request.setLimit(batchSize)
40
+ }
41
+
42
+ val result: ScanResult = client.scan(request)
43
+ evaluateKey = result.getLastEvaluatedKey
44
+
45
+ val items = result.getItems.asScala.map(_.asScala.toMap)
46
+ recordCount += write(pageBuilder, schema, items)
47
+ } while(evaluateKey != null && (recordLimit == 0 || recordLimit > recordCount))
48
+
49
+ pageBuilder.finish()
50
+ }
51
+ }
@@ -1,6 +1,7 @@
1
1
  in:
2
2
  type: dynamodb
3
3
  region: ENV_VAR
4
+ operation: scan
4
5
  table: ENV_VAR
5
6
  auth_method: basic
6
7
  access_key: ENV_VAR
@@ -1,6 +1,7 @@
1
1
  in:
2
2
  type: dynamodb
3
3
  region: ENV_VAR
4
+ operation: scan
4
5
  table: ENV_VAR
5
6
  auth_method: basic
6
7
  columns:
@@ -1,6 +1,7 @@
1
1
  in:
2
2
  type: dynamodb
3
3
  region: ENV_VAR
4
+ operation: scan
4
5
  table: ENV_VAR
5
6
  auth_method: env
6
7
  columns:
@@ -1,6 +1,7 @@
1
1
  in:
2
2
  type: dynamodb
3
3
  region: ENV_VAR
4
+ operation: scan
4
5
  table: ENV_VAR
5
6
  auth_method: profile
6
7
  profile_name: ENV_VAR
@@ -0,0 +1,25 @@
1
+ in:
2
+ type: dynamodb
3
+ end_point: http://localhost:8000/
4
+ table: ENV_VAR
5
+ auth_method: basic
6
+ access_key: dummy
7
+ secret_key: dummy
8
+ filters:
9
+ - {name: pri-key, type: string, condition: EQ, value: key-1}
10
+ columns:
11
+ - {name: pri-key, type: string}
12
+ - {name: sort-key, type: long}
13
+ - {name: doubleValue, type: double}
14
+ - {name: boolValue, type: boolean}
15
+ - {name: listValue, type: json}
16
+ - {name: mapValue, type: json}
17
+
18
+ out:
19
+ type: file
20
+ path_prefix: dynamodb-local-result
21
+ file_ext: tsv
22
+ formatter:
23
+ type: csv
24
+ delimiter: "\t"
25
+ header_line: false
@@ -1,6 +1,7 @@
1
1
  in:
2
2
  type: dynamodb
3
3
  region: ENV_VAR
4
+ operation: scan
4
5
  table: ENV_VAR
5
6
  access_key: ENV_VAR
6
7
  secret_key: ENV_VAR
@@ -0,0 +1,83 @@
1
+ package org.embulk.input.dynamodb.ope
2
+
3
+ import java.io.File
4
+ import java.nio.charset.Charset
5
+ import java.nio.file.{FileSystems, Files}
6
+
7
+ import com.fasterxml.jackson.databind.ObjectMapper
8
+ import com.google.inject.{Binder, Module}
9
+ import org.embulk.EmbulkEmbed
10
+ import org.embulk.config.ConfigSource
11
+ import org.embulk.input.dynamodb.DynamodbInputPlugin
12
+ import org.embulk.plugin.InjectedPluginSource
13
+ import org.embulk.spi.InputPlugin
14
+ import org.hamcrest.CoreMatchers._
15
+ import org.junit.Assert._
16
+ import org.junit.{Before, Test}
17
+
18
+ class QueryOperationTest {
19
+ private var embulk: EmbulkEmbed = null
20
+
21
+ private var EMBULK_DYNAMODB_TEST_TABLE: String = null
22
+ private var mapper: ObjectMapper = null
23
+
24
+ @Before
25
+ def createResources() {
26
+ // Get Environments
27
+ EMBULK_DYNAMODB_TEST_TABLE = System.getenv("EMBULK_DYNAMODB_TEST_TABLE")
28
+
29
+ val bootstrap = new EmbulkEmbed.Bootstrap()
30
+ bootstrap.addModules(new Module {
31
+ def configure(binder: Binder): Unit = {
32
+ InjectedPluginSource.registerPluginTo(binder,
33
+ classOf[InputPlugin],
34
+ "dynamodb",
35
+ classOf[DynamodbInputPlugin])
36
+ }
37
+ })
38
+
39
+ embulk = bootstrap.initializeCloseable()
40
+
41
+ mapper = new ObjectMapper()
42
+ }
43
+
44
+
45
+ def doTest(config: ConfigSource) {
46
+ embulk.run(config)
47
+
48
+ val fs = FileSystems.getDefault
49
+ val lines = Files.readAllLines(fs.getPath("dynamodb-local-result000.00.tsv"), Charset.forName("UTF-8"))
50
+ assertEquals(lines.size, 1)
51
+
52
+ val head = lines.get(0)
53
+ val values = head.split("\t")
54
+
55
+ assertThat(values(0), is("key-1"))
56
+ assertThat(values(1), is("0"))
57
+ assertThat(values(2), is("42.195"))
58
+ assertThat(values(3), is("true"))
59
+
60
+ val listValue = mapper.readValue(values(4).replaceAll("\"(?!\")", ""), classOf[java.util.List[Object]])
61
+ assertThat(listValue.size(), is(2))
62
+ assertThat(listValue.get(0).asInstanceOf[String], is("list-value"))
63
+ assertThat(listValue.get(1).asInstanceOf[Int], is(123))
64
+
65
+ val mapValue = mapper.readValue(values(5).replaceAll("\"(?!\")", ""), classOf[java.util.Map[String, Object]])
66
+ assert(mapValue.containsKey("map-key-1"))
67
+ assertThat(mapValue.get("map-key-1").asInstanceOf[String], is("map-value-1"))
68
+ assert(mapValue.containsKey("map-key-2"))
69
+ assertThat(mapValue.get("map-key-2").asInstanceOf[Int], is(456))
70
+ }
71
+
72
+ @Test
73
+ def queryTest() {
74
+ val config = embulk.newConfigLoader().fromYamlFile(
75
+ new File("src/test/resources/yaml/dynamodb-local-query.yml"))
76
+
77
+ config.getNested("in")
78
+ .set("operation", "query")
79
+ .set("table", EMBULK_DYNAMODB_TEST_TABLE)
80
+
81
+ doTest(config)
82
+ }
83
+ }
@@ -1,4 +1,4 @@
1
- package org.embulk.input.dynamodb
1
+ package org.embulk.input.dynamodb.ope
2
2
 
3
3
  import java.io.File
4
4
  import java.nio.charset.Charset
@@ -8,13 +8,14 @@ import com.fasterxml.jackson.databind.ObjectMapper
8
8
  import com.google.inject.{Binder, Module}
9
9
  import org.embulk.EmbulkEmbed
10
10
  import org.embulk.config.ConfigSource
11
+ import org.embulk.input.dynamodb.DynamodbInputPlugin
11
12
  import org.embulk.plugin.InjectedPluginSource
12
13
  import org.embulk.spi.InputPlugin
13
14
  import org.hamcrest.CoreMatchers._
14
15
  import org.junit.Assert._
15
16
  import org.junit.{Before, Test}
16
17
 
17
- class DynamoDBUtilTest {
18
+ class ScanOperationTest {
18
19
  private var embulk: EmbulkEmbed = null
19
20
 
20
21
  private var EMBULK_DYNAMODB_TEST_TABLE: String = null
@@ -71,9 +72,10 @@ class DynamoDBUtilTest {
71
72
  @Test
72
73
  def scanTest() {
73
74
  val config = embulk.newConfigLoader().fromYamlFile(
74
- new File("src/test/resources/yaml/dynamodb-local.yml"))
75
+ new File("src/test/resources/yaml/dynamodb-local-scan.yml"))
75
76
 
76
77
  config.getNested("in")
78
+ .set("operation", "scan")
77
79
  .set("table", EMBULK_DYNAMODB_TEST_TABLE)
78
80
 
79
81
  doTest(config)
@@ -1,7 +1,7 @@
1
1
  #!/bin/sh
2
2
 
3
- docker run -d \
3
+ docker run -i -t -d \
4
4
  -p 8000:8000 \
5
- -v $PWD/dynamodb-local:/data \
6
- --env DYNAMO_OPT='-dbPath /data -sharedDb' \
7
- lulichn/dynamodb-local
5
+ tray/dynamodb-local \
6
+ -inMemory -sharedDb -port 8000
7
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-dynamodb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daisuke Higashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-06 00:00:00.000000000 Z
11
+ date: 2016-10-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -59,11 +59,14 @@ files:
59
59
  - lib/embulk/input/dynamodb.rb
60
60
  - src/main/scala/org/embulk/input/dynamodb/AttributeValueHelper.scala
61
61
  - src/main/scala/org/embulk/input/dynamodb/AwsCredentials.scala
62
- - src/main/scala/org/embulk/input/dynamodb/DynamoDBUtil.scala
62
+ - src/main/scala/org/embulk/input/dynamodb/DynamoDBClient.scala
63
63
  - src/main/scala/org/embulk/input/dynamodb/DynamodbInputPlugin.scala
64
64
  - src/main/scala/org/embulk/input/dynamodb/Filter.scala
65
65
  - src/main/scala/org/embulk/input/dynamodb/FilterConfig.scala
66
66
  - src/main/scala/org/embulk/input/dynamodb/PluginTask.scala
67
+ - src/main/scala/org/embulk/input/dynamodb/ope/AbstractOperation.scala
68
+ - src/main/scala/org/embulk/input/dynamodb/ope/QueryOperation.scala
69
+ - src/main/scala/org/embulk/input/dynamodb/ope/ScanOperation.scala
67
70
  - src/main/scala/org/embulk/input/dynamodb/package.scala
68
71
  - src/test/resources/json/test.json
69
72
  - src/test/resources/json/test.template
@@ -71,24 +74,26 @@ files:
71
74
  - src/test/resources/yaml/authMethodBasic_Error.yml
72
75
  - src/test/resources/yaml/authMethodEnv.yml
73
76
  - src/test/resources/yaml/authMethodProfile.yml
74
- - src/test/resources/yaml/dynamodb-local.yml
77
+ - src/test/resources/yaml/dynamodb-local-query.yml
78
+ - src/test/resources/yaml/dynamodb-local-scan.yml
75
79
  - src/test/resources/yaml/notSetAuthMethod.yml
76
80
  - src/test/scala/org/embulk/input/dynamodb/AttributeValueHelperTest.scala
77
81
  - src/test/scala/org/embulk/input/dynamodb/AwsCredentialsTest.scala
78
- - src/test/scala/org/embulk/input/dynamodb/DynamoDBUtilTest.scala
82
+ - src/test/scala/org/embulk/input/dynamodb/ope/QueryOperationTest.scala
83
+ - src/test/scala/org/embulk/input/dynamodb/ope/ScanOperationTest.scala
79
84
  - test/create_table.sh
80
85
  - test/put_items.sh
81
86
  - test/run_dynamodb_local.sh
82
- - classpath/commons-logging-1.1.3.jar
83
- - classpath/commons-codec-1.6.jar
84
- - classpath/scala-library-2.11.7.jar
87
+ - classpath/aws-java-sdk-core-1.10.43.jar
85
88
  - classpath/aws-java-sdk-dynamodb-1.10.43.jar
86
- - classpath/embulk-input-dynamodb-0.1.1.jar
87
89
  - classpath/aws-java-sdk-kms-1.10.43.jar
88
- - classpath/httpcore-4.3.3.jar
89
- - classpath/httpclient-4.3.6.jar
90
- - classpath/aws-java-sdk-core-1.10.43.jar
91
90
  - classpath/aws-java-sdk-s3-1.10.43.jar
91
+ - classpath/commons-codec-1.6.jar
92
+ - classpath/commons-logging-1.1.3.jar
93
+ - classpath/embulk-input-dynamodb-0.2.0.jar
94
+ - classpath/httpclient-4.3.6.jar
95
+ - classpath/httpcore-4.3.3.jar
96
+ - classpath/scala-library-2.11.8.jar
92
97
  homepage: https://github.com/lulichn/embulk-input-dynamodb
93
98
  licenses:
94
99
  - MIT
@@ -1,155 +0,0 @@
1
- package org.embulk.input.dynamodb
2
-
3
- import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
4
-
5
- import com.amazonaws.ClientConfiguration
6
- import com.amazonaws.regions.Regions
7
- import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
8
- import com.amazonaws.services.dynamodbv2.model.{AttributeValue, Condition, ScanRequest, ScanResult}
9
- import org.embulk.config.ConfigException
10
- import org.embulk.spi._
11
- import org.embulk.spi.`type`.Types
12
- import org.msgpack.value.{Value, ValueFactory}
13
-
14
- import scala.collection.JavaConverters._
15
-
16
- object DynamoDBUtil {
17
- def createClient(task: PluginTask): AmazonDynamoDBClient = {
18
- val client = new AmazonDynamoDBClient(
19
- AwsCredentials.getCredentialsProvider(task),
20
- new ClientConfiguration()
21
- .withMaxConnections(50)) // SDK Default Value
22
-
23
- if (task.getEndPoint.isPresent) {
24
- client.withEndpoint(task.getEndPoint.get())
25
- } else if (task.getRegion.isPresent) {
26
- client.withRegion(Regions.fromName(task.getRegion.get()))
27
- } else {
28
- throw new ConfigException("At least one of EndPoint or Region must be set")
29
- }
30
- }
31
-
32
-
33
- def scan(
34
- task: PluginTask,
35
- schema: Schema,
36
- output: PageOutput)
37
- (implicit client: AmazonDynamoDBClient): Unit =
38
- {
39
- val allocator: BufferAllocator = task.getBufferAllocator
40
- val pageBuilder: PageBuilder = new PageBuilder(allocator, schema, output)
41
-
42
- val attributes: JList[String] = new JArrayList[String]()
43
-
44
- schema.getColumns.asScala.foreach { column =>
45
- attributes.add(column.getName)
46
- }
47
- val scanFilter: JMap[String, Condition] = createScanFilter(task).asJava
48
- var evaluateKey: JMap[String, AttributeValue] = null
49
-
50
- val scanLimit: Long = task.getScanLimit
51
- val recordLimit: Long = task.getRecordLimit
52
- var recordCount: Long = 0
53
-
54
- do {
55
- val batchSize = getScanLimit(scanLimit, recordLimit, recordCount)
56
-
57
- val request: ScanRequest = new ScanRequest()
58
- .withTableName(task.getTable)
59
- .withAttributesToGet(attributes)
60
- .withScanFilter(scanFilter)
61
- .withExclusiveStartKey(evaluateKey)
62
-
63
- if (batchSize > 0) {
64
- request.setLimit(batchSize)
65
- }
66
-
67
- val result: ScanResult = client.scan(request)
68
- evaluateKey = result.getLastEvaluatedKey
69
-
70
- result.getItems.asScala.foreach { item =>
71
- schema.getColumns.asScala.foreach { column =>
72
- val value = item.asScala.get(column.getName)
73
- column.getType match {
74
- case Types.STRING =>
75
- convert(column, value, pageBuilder.setString)
76
- case Types.LONG =>
77
- convert(column, value, pageBuilder.setLong)
78
- case Types.DOUBLE =>
79
- convert(column, value, pageBuilder.setDouble)
80
- case Types.BOOLEAN =>
81
- convert(column, value, pageBuilder.setBoolean)
82
- case Types.JSON =>
83
- convert(column, value, pageBuilder.setJson)
84
- case _ => /* Do nothing */
85
- }
86
- }
87
- pageBuilder.addRecord()
88
- recordCount += 1
89
- }
90
- } while(evaluateKey != null && (recordLimit == 0 || recordLimit > recordCount))
91
-
92
- pageBuilder.finish()
93
- }
94
-
95
- private def getScanLimit(scanLimit: Long, recordLimit: Long, recordCount: Long): Int = {
96
- if (scanLimit > 0 && recordLimit > 0) {
97
- math.min(scanLimit, recordLimit - recordCount).toInt
98
- } else if (scanLimit > 0 || recordLimit > 0) {
99
- math.max(scanLimit, recordLimit).toInt
100
- } else { 0 }
101
- }
102
-
103
- private def createScanFilter(task: PluginTask): Map[String, Condition] = {
104
- val filterMap = collection.mutable.HashMap[String, Condition]()
105
-
106
- Option(task.getFilters.orNull).map { filters =>
107
- filters.getFilters.asScala.map { filter =>
108
- val attributeValueList = collection.mutable.ArrayBuffer[AttributeValue]()
109
- attributeValueList += createAttributeValue(filter.getType, filter.getValue)
110
- Option(filter.getValue2).map { value2 =>
111
- attributeValueList+= createAttributeValue(filter.getType, value2) }
112
-
113
- filterMap += filter.getName -> new Condition()
114
- .withComparisonOperator(filter.getCondition)
115
- .withAttributeValueList(attributeValueList.asJava)
116
- }
117
- }
118
-
119
- filterMap.toMap
120
- }
121
-
122
- private def createAttributeValue(t: String, v: String): AttributeValue = {
123
- t match {
124
- case "string" =>
125
- new AttributeValue().withS(v)
126
- case "long" | "double" =>
127
- new AttributeValue().withN(v)
128
- case "boolean" =>
129
- new AttributeValue().withBOOL(v.toBoolean)
130
- }
131
- }
132
-
133
- private def convert[A](column: Column,
134
- value: Option[AttributeValue],
135
- f: (Column, A) => Unit)(implicit f1: Option[AttributeValue] => A): Unit =
136
- f(column, f1(value))
137
-
138
- implicit private def StringConvert(value: Option[AttributeValue]): String =
139
- value.map(_.getS).getOrElse("")
140
-
141
- implicit private def LongConvert(value: Option[AttributeValue]): Long =
142
- value.map(_.getN.toLong).getOrElse(0L)
143
-
144
- implicit private def DoubleConvert(value: Option[AttributeValue]): Double =
145
- value.map(_.getN.toDouble).getOrElse(0D)
146
-
147
- implicit private def BooleanConvert(value: Option[AttributeValue]): Boolean =
148
- value.exists(_.getBOOL)
149
-
150
- implicit private def JsonConvert(value: Option[AttributeValue]): Value = {
151
- value.map { attr =>
152
- AttributeValueHelper.decodeToValue(attr)
153
- }.getOrElse(ValueFactory.newNil())
154
- }
155
- }