embulk-input-dynamodb 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/build.gradle +1 -1
- data/src/main/scala/org/embulk/input/DynamoDBUtil.scala +78 -32
- data/src/main/scala/org/embulk/input/PluginTask.scala +8 -4
- metadata +14 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d5bff72dc25c3fe5ce57da31798ac74d2ac7e8bf
|
4
|
+
data.tar.gz: a35560d2b4a29e5d5d71d2d2656210da7653e19b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 41fd0fdc599929929d81519aefb58c72cb250f66650f31b8305d08ed26f77d41fe08fb8df275ada2383f388c6629a7d55c6b4211a519f74382781e6667cc85a6
|
7
|
+
data.tar.gz: d0f608e76092fe34e150a4d3e4076c7a3f94e36d6990f64d721c2628702492456e7d48cd713bca5e8442cab958e4e82d7d695fab5901d0d3c75ac3d5f340cd1a
|
data/README.md
CHANGED
@@ -14,7 +14,8 @@
|
|
14
14
|
If you don't specify keys, I'll use the profile configuration file for the default profile.
|
15
15
|
- **region**: Region Name (string, default: ap-northeast-1)
|
16
16
|
- **table**: Table Name (string, required)
|
17
|
-
- **
|
17
|
+
- **scan_limit**: DynamoDB 1time Scan Query size limit (Int, optional)
|
18
|
+
- **record_limit**: Max Record Search limit (Long, optional)
|
18
19
|
|
19
20
|
## Example
|
20
21
|
|
@@ -25,7 +26,6 @@ in:
|
|
25
26
|
secret_key: YOUR_SECRET_KEY
|
26
27
|
region: ap-northeast-1
|
27
28
|
table: YOUR_TABLE_NAME
|
28
|
-
limit: 1000
|
29
29
|
columns:
|
30
30
|
- {name: ColumnA, type: long}
|
31
31
|
- {name: ColumnB, type: double}
|
data/build.gradle
CHANGED
@@ -1,17 +1,27 @@
|
|
1
1
|
package org.embulk.input
|
2
2
|
|
3
|
-
import
|
4
|
-
|
3
|
+
import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
|
4
|
+
|
5
5
|
import com.amazonaws.auth.profile.ProfileCredentialsProvider
|
6
|
+
import com.amazonaws.auth.{AWSCredentials, AWSCredentialsProvider, BasicAWSCredentials}
|
6
7
|
import com.amazonaws.regions.Regions
|
7
8
|
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
|
8
9
|
import com.amazonaws.services.dynamodbv2.model._
|
10
|
+
import com.amazonaws.{AmazonClientException, ClientConfiguration}
|
9
11
|
import org.embulk.spi._
|
10
12
|
|
11
|
-
import java.util.{ArrayList => JArrayList, List => JList}
|
12
13
|
import scala.collection.JavaConversions._
|
13
14
|
|
14
15
|
object DynamoDBUtil {
|
16
|
+
def createClient(task: PluginTask): AmazonDynamoDBClient = {
|
17
|
+
try {
|
18
|
+
createClientUsingIAMRole(task)
|
19
|
+
} catch {
|
20
|
+
case e: AmazonClientException =>
|
21
|
+
createClientUsingCredentials(task)
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
15
25
|
private def getCredentialsProvider(task: PluginTask): AWSCredentialsProvider = {
|
16
26
|
{for {
|
17
27
|
accessKey <- Option(task.getAccessKey.orNull)
|
@@ -23,18 +33,30 @@ object DynamoDBUtil {
|
|
23
33
|
new BasicAWSCredentials(accessKey, secretKey)
|
24
34
|
}
|
25
35
|
}
|
26
|
-
}}.getOrElse{
|
36
|
+
}}.getOrElse {
|
27
37
|
new ProfileCredentialsProvider()
|
28
38
|
}
|
29
39
|
}
|
30
40
|
|
31
|
-
def
|
41
|
+
private def createClientUsingIAMRole(task: PluginTask): AmazonDynamoDBClient = {
|
42
|
+
val client: AmazonDynamoDBClient = new AmazonDynamoDBClient(
|
43
|
+
new ClientConfiguration().withMaxConnections(10))
|
44
|
+
.withRegion(Regions.fromName(task.getRegion))
|
45
|
+
|
46
|
+
client.describeTable(task.getTable) // FIXME
|
47
|
+
|
48
|
+
client
|
49
|
+
}
|
50
|
+
|
51
|
+
private def createClientUsingCredentials(task: PluginTask): AmazonDynamoDBClient = {
|
32
52
|
val credentialsProvider: AWSCredentialsProvider = getCredentialsProvider(task)
|
33
53
|
val client: AmazonDynamoDBClient = new AmazonDynamoDBClient(
|
34
54
|
credentialsProvider,
|
35
55
|
new ClientConfiguration().withMaxConnections(10))
|
36
56
|
.withRegion(Regions.fromName(task.getRegion))
|
37
57
|
|
58
|
+
client.describeTable(task.getTable) // FIXME
|
59
|
+
|
38
60
|
client
|
39
61
|
}
|
40
62
|
|
@@ -49,44 +71,68 @@ object DynamoDBUtil {
|
|
49
71
|
attributes.add(column.getName)
|
50
72
|
}
|
51
73
|
val scanFilter: Map[String, Condition] = createScanFilter(task)
|
52
|
-
|
53
|
-
|
54
|
-
val
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
+
var evaluateKey: JMap[String, AttributeValue] = null
|
75
|
+
|
76
|
+
val scanLimit: Long = task.getScanLimit
|
77
|
+
val recordLimit: Long = task.getRecordLimit
|
78
|
+
var recordCount: Long = 0
|
79
|
+
|
80
|
+
do {
|
81
|
+
val batchSize = getScanLimit(scanLimit, recordLimit, recordCount)
|
82
|
+
|
83
|
+
val request: ScanRequest = new ScanRequest()
|
84
|
+
.withTableName(task.getTable)
|
85
|
+
.withAttributesToGet(attributes)
|
86
|
+
.withScanFilter(scanFilter)
|
87
|
+
.withExclusiveStartKey(evaluateKey)
|
88
|
+
|
89
|
+
if (batchSize > 0) {
|
90
|
+
request.setLimit(batchSize)
|
91
|
+
}
|
92
|
+
|
93
|
+
val result: ScanResult = client.scan(request)
|
94
|
+
evaluateKey = result.getLastEvaluatedKey
|
95
|
+
|
96
|
+
result.getItems.foreach { item =>
|
97
|
+
schema.getColumns.foreach { column =>
|
98
|
+
val value = item.get(column.getName)
|
99
|
+
column.getType.getName match {
|
100
|
+
case "string" =>
|
101
|
+
pageBuilder.setString(column, Option(value) map { _.getS } getOrElse { "" })
|
102
|
+
case "long" =>
|
103
|
+
pageBuilder.setLong(column, Option(value) map { _.getN.toLong } getOrElse { 0L })
|
104
|
+
case "double" =>
|
105
|
+
pageBuilder.setDouble(column, Option(value) map { _.getN.toDouble } getOrElse { 0D })
|
106
|
+
case "boolean" =>
|
107
|
+
pageBuilder.setBoolean(column, Option(value) map { _.getBOOL == true } getOrElse { false })
|
108
|
+
case _ => /* Do nothing */
|
109
|
+
}
|
74
110
|
}
|
111
|
+
pageBuilder.addRecord()
|
112
|
+
recordCount += 1
|
75
113
|
}
|
76
|
-
|
77
|
-
}
|
114
|
+
println(s"$recordLimit $recordLimit $recordCount")
|
115
|
+
} while(evaluateKey != null && (recordLimit == 0 || recordLimit > recordCount))
|
78
116
|
|
79
117
|
pageBuilder.finish()
|
80
118
|
}
|
81
119
|
|
120
|
+
private def getScanLimit(scanLimit: Long, recordLimit: Long, recordCount: Long): Int = {
|
121
|
+
if (scanLimit > 0 && recordLimit > 0) {
|
122
|
+
math.min(scanLimit, recordLimit - recordCount).toInt
|
123
|
+
} else if (scanLimit > 0 || recordLimit > 0) {
|
124
|
+
math.max(scanLimit, recordLimit).toInt
|
125
|
+
} else { 0 }
|
126
|
+
}
|
127
|
+
|
82
128
|
private def createScanFilter(task: PluginTask): Map[String, Condition] = {
|
83
129
|
val filterMap = collection.mutable.HashMap[String, Condition]()
|
84
130
|
|
85
131
|
Option(task.getFilters.orNull).map { filters =>
|
86
132
|
filters.getFilters.map { filter =>
|
87
133
|
val attributeValueList = collection.mutable.ArrayBuffer[AttributeValue]()
|
88
|
-
attributeValueList +=
|
89
|
-
Option(filter.getValue2).map { value2 => attributeValueList +=
|
134
|
+
attributeValueList += createAttributeValue(filter.getType, filter.getValue)
|
135
|
+
Option(filter.getValue2).map { value2 => attributeValueList += createAttributeValue(filter.getType, value2) }
|
90
136
|
|
91
137
|
filterMap += filter.getName -> new Condition()
|
92
138
|
.withComparisonOperator(filter.getCondition)
|
@@ -97,7 +143,7 @@ object DynamoDBUtil {
|
|
97
143
|
filterMap.toMap
|
98
144
|
}
|
99
145
|
|
100
|
-
private def
|
146
|
+
private def createAttributeValue(t: String, v: String): AttributeValue = {
|
101
147
|
t match {
|
102
148
|
case "string" =>
|
103
149
|
new AttributeValue().withS(v)
|
@@ -17,13 +17,17 @@ trait PluginTask extends Task {
|
|
17
17
|
@ConfigDefault("ap-northeast-1")
|
18
18
|
def getRegion: String
|
19
19
|
|
20
|
+
@Config("scan_limit")
|
21
|
+
@ConfigDefault("0")
|
22
|
+
def getScanLimit: Long
|
23
|
+
|
24
|
+
@Config("record_limit")
|
25
|
+
@ConfigDefault("0")
|
26
|
+
def getRecordLimit: Long
|
27
|
+
|
20
28
|
@Config("table")
|
21
29
|
def getTable: String
|
22
30
|
|
23
|
-
@Config("limit")
|
24
|
-
@ConfigDefault("100")
|
25
|
-
def getLimit: Int
|
26
|
-
|
27
31
|
@Config("columns")
|
28
32
|
def getColumns: SchemaConfig
|
29
33
|
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-dynamodb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daisuke Higashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-07-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ~>
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.0'
|
20
14
|
requirement: !ruby/object:Gem::Requirement
|
21
15
|
requirements:
|
22
16
|
- - ~>
|
23
17
|
- !ruby/object:Gem::Version
|
24
18
|
version: '1.0'
|
19
|
+
name: bundler
|
25
20
|
prerelease: false
|
26
21
|
type: :development
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
22
|
version_requirements: !ruby/object:Gem::Requirement
|
30
23
|
requirements:
|
31
|
-
- -
|
24
|
+
- - ~>
|
32
25
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
35
29
|
requirements:
|
36
30
|
- - '>='
|
37
31
|
- !ruby/object:Gem::Version
|
38
32
|
version: '10.0'
|
33
|
+
name: rake
|
39
34
|
prerelease: false
|
40
35
|
type: :development
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
41
|
description: '"Loads records from Dynamodb."'
|
42
42
|
email:
|
43
43
|
- daisuke.develop@gmail.com
|
@@ -65,9 +65,10 @@ files:
|
|
65
65
|
- classpath/aws-java-sdk-s3-1.9.22.jar
|
66
66
|
- classpath/commons-codec-1.6.jar
|
67
67
|
- classpath/commons-logging-1.1.3.jar
|
68
|
-
- classpath/embulk-input-dynamodb-0.0.
|
68
|
+
- classpath/embulk-input-dynamodb-0.0.3.jar
|
69
69
|
- classpath/httpclient-4.3.4.jar
|
70
70
|
- classpath/httpcore-4.3.2.jar
|
71
|
+
- classpath/joda-time-2.8.1.jar
|
71
72
|
- classpath/scala-library-2.11.5.jar
|
72
73
|
homepage: https://github.com/lulichn/embulk-input-dynamodb
|
73
74
|
licenses:
|