embulk-input-dynamodb 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/build.gradle +1 -1
- data/src/main/scala/org/embulk/input/DynamoDBUtil.scala +78 -32
- data/src/main/scala/org/embulk/input/PluginTask.scala +8 -4
- metadata +14 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d5bff72dc25c3fe5ce57da31798ac74d2ac7e8bf
|
4
|
+
data.tar.gz: a35560d2b4a29e5d5d71d2d2656210da7653e19b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 41fd0fdc599929929d81519aefb58c72cb250f66650f31b8305d08ed26f77d41fe08fb8df275ada2383f388c6629a7d55c6b4211a519f74382781e6667cc85a6
|
7
|
+
data.tar.gz: d0f608e76092fe34e150a4d3e4076c7a3f94e36d6990f64d721c2628702492456e7d48cd713bca5e8442cab958e4e82d7d695fab5901d0d3c75ac3d5f340cd1a
|
data/README.md
CHANGED
@@ -14,7 +14,8 @@
|
|
14
14
|
If you don't specify keys, I'll use the profile configuration file for the default profile.
|
15
15
|
- **region**: Region Name (string, default: ap-northeast-1)
|
16
16
|
- **table**: Table Name (string, required)
|
17
|
-
- **
|
17
|
+
- **scan_limit**: DynamoDB 1time Scan Query size limit (Int, optional)
|
18
|
+
- **record_limit**: Max Record Search limit (Long, optional)
|
18
19
|
|
19
20
|
## Example
|
20
21
|
|
@@ -25,7 +26,6 @@ in:
|
|
25
26
|
secret_key: YOUR_SECRET_KEY
|
26
27
|
region: ap-northeast-1
|
27
28
|
table: YOUR_TABLE_NAME
|
28
|
-
limit: 1000
|
29
29
|
columns:
|
30
30
|
- {name: ColumnA, type: long}
|
31
31
|
- {name: ColumnB, type: double}
|
data/build.gradle
CHANGED
@@ -1,17 +1,27 @@
|
|
1
1
|
package org.embulk.input
|
2
2
|
|
3
|
-
import
|
4
|
-
|
3
|
+
import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
|
4
|
+
|
5
5
|
import com.amazonaws.auth.profile.ProfileCredentialsProvider
|
6
|
+
import com.amazonaws.auth.{AWSCredentials, AWSCredentialsProvider, BasicAWSCredentials}
|
6
7
|
import com.amazonaws.regions.Regions
|
7
8
|
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
|
8
9
|
import com.amazonaws.services.dynamodbv2.model._
|
10
|
+
import com.amazonaws.{AmazonClientException, ClientConfiguration}
|
9
11
|
import org.embulk.spi._
|
10
12
|
|
11
|
-
import java.util.{ArrayList => JArrayList, List => JList}
|
12
13
|
import scala.collection.JavaConversions._
|
13
14
|
|
14
15
|
object DynamoDBUtil {
|
16
|
+
def createClient(task: PluginTask): AmazonDynamoDBClient = {
|
17
|
+
try {
|
18
|
+
createClientUsingIAMRole(task)
|
19
|
+
} catch {
|
20
|
+
case e: AmazonClientException =>
|
21
|
+
createClientUsingCredentials(task)
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
15
25
|
private def getCredentialsProvider(task: PluginTask): AWSCredentialsProvider = {
|
16
26
|
{for {
|
17
27
|
accessKey <- Option(task.getAccessKey.orNull)
|
@@ -23,18 +33,30 @@ object DynamoDBUtil {
|
|
23
33
|
new BasicAWSCredentials(accessKey, secretKey)
|
24
34
|
}
|
25
35
|
}
|
26
|
-
}}.getOrElse{
|
36
|
+
}}.getOrElse {
|
27
37
|
new ProfileCredentialsProvider()
|
28
38
|
}
|
29
39
|
}
|
30
40
|
|
31
|
-
def
|
41
|
+
private def createClientUsingIAMRole(task: PluginTask): AmazonDynamoDBClient = {
|
42
|
+
val client: AmazonDynamoDBClient = new AmazonDynamoDBClient(
|
43
|
+
new ClientConfiguration().withMaxConnections(10))
|
44
|
+
.withRegion(Regions.fromName(task.getRegion))
|
45
|
+
|
46
|
+
client.describeTable(task.getTable) // FIXME
|
47
|
+
|
48
|
+
client
|
49
|
+
}
|
50
|
+
|
51
|
+
private def createClientUsingCredentials(task: PluginTask): AmazonDynamoDBClient = {
|
32
52
|
val credentialsProvider: AWSCredentialsProvider = getCredentialsProvider(task)
|
33
53
|
val client: AmazonDynamoDBClient = new AmazonDynamoDBClient(
|
34
54
|
credentialsProvider,
|
35
55
|
new ClientConfiguration().withMaxConnections(10))
|
36
56
|
.withRegion(Regions.fromName(task.getRegion))
|
37
57
|
|
58
|
+
client.describeTable(task.getTable) // FIXME
|
59
|
+
|
38
60
|
client
|
39
61
|
}
|
40
62
|
|
@@ -49,44 +71,68 @@ object DynamoDBUtil {
|
|
49
71
|
attributes.add(column.getName)
|
50
72
|
}
|
51
73
|
val scanFilter: Map[String, Condition] = createScanFilter(task)
|
52
|
-
|
53
|
-
|
54
|
-
val
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
+
var evaluateKey: JMap[String, AttributeValue] = null
|
75
|
+
|
76
|
+
val scanLimit: Long = task.getScanLimit
|
77
|
+
val recordLimit: Long = task.getRecordLimit
|
78
|
+
var recordCount: Long = 0
|
79
|
+
|
80
|
+
do {
|
81
|
+
val batchSize = getScanLimit(scanLimit, recordLimit, recordCount)
|
82
|
+
|
83
|
+
val request: ScanRequest = new ScanRequest()
|
84
|
+
.withTableName(task.getTable)
|
85
|
+
.withAttributesToGet(attributes)
|
86
|
+
.withScanFilter(scanFilter)
|
87
|
+
.withExclusiveStartKey(evaluateKey)
|
88
|
+
|
89
|
+
if (batchSize > 0) {
|
90
|
+
request.setLimit(batchSize)
|
91
|
+
}
|
92
|
+
|
93
|
+
val result: ScanResult = client.scan(request)
|
94
|
+
evaluateKey = result.getLastEvaluatedKey
|
95
|
+
|
96
|
+
result.getItems.foreach { item =>
|
97
|
+
schema.getColumns.foreach { column =>
|
98
|
+
val value = item.get(column.getName)
|
99
|
+
column.getType.getName match {
|
100
|
+
case "string" =>
|
101
|
+
pageBuilder.setString(column, Option(value) map { _.getS } getOrElse { "" })
|
102
|
+
case "long" =>
|
103
|
+
pageBuilder.setLong(column, Option(value) map { _.getN.toLong } getOrElse { 0L })
|
104
|
+
case "double" =>
|
105
|
+
pageBuilder.setDouble(column, Option(value) map { _.getN.toDouble } getOrElse { 0D })
|
106
|
+
case "boolean" =>
|
107
|
+
pageBuilder.setBoolean(column, Option(value) map { _.getBOOL == true } getOrElse { false })
|
108
|
+
case _ => /* Do nothing */
|
109
|
+
}
|
74
110
|
}
|
111
|
+
pageBuilder.addRecord()
|
112
|
+
recordCount += 1
|
75
113
|
}
|
76
|
-
|
77
|
-
}
|
114
|
+
println(s"$recordLimit $recordLimit $recordCount")
|
115
|
+
} while(evaluateKey != null && (recordLimit == 0 || recordLimit > recordCount))
|
78
116
|
|
79
117
|
pageBuilder.finish()
|
80
118
|
}
|
81
119
|
|
120
|
+
private def getScanLimit(scanLimit: Long, recordLimit: Long, recordCount: Long): Int = {
|
121
|
+
if (scanLimit > 0 && recordLimit > 0) {
|
122
|
+
math.min(scanLimit, recordLimit - recordCount).toInt
|
123
|
+
} else if (scanLimit > 0 || recordLimit > 0) {
|
124
|
+
math.max(scanLimit, recordLimit).toInt
|
125
|
+
} else { 0 }
|
126
|
+
}
|
127
|
+
|
82
128
|
private def createScanFilter(task: PluginTask): Map[String, Condition] = {
|
83
129
|
val filterMap = collection.mutable.HashMap[String, Condition]()
|
84
130
|
|
85
131
|
Option(task.getFilters.orNull).map { filters =>
|
86
132
|
filters.getFilters.map { filter =>
|
87
133
|
val attributeValueList = collection.mutable.ArrayBuffer[AttributeValue]()
|
88
|
-
attributeValueList +=
|
89
|
-
Option(filter.getValue2).map { value2 => attributeValueList +=
|
134
|
+
attributeValueList += createAttributeValue(filter.getType, filter.getValue)
|
135
|
+
Option(filter.getValue2).map { value2 => attributeValueList += createAttributeValue(filter.getType, value2) }
|
90
136
|
|
91
137
|
filterMap += filter.getName -> new Condition()
|
92
138
|
.withComparisonOperator(filter.getCondition)
|
@@ -97,7 +143,7 @@ object DynamoDBUtil {
|
|
97
143
|
filterMap.toMap
|
98
144
|
}
|
99
145
|
|
100
|
-
private def
|
146
|
+
private def createAttributeValue(t: String, v: String): AttributeValue = {
|
101
147
|
t match {
|
102
148
|
case "string" =>
|
103
149
|
new AttributeValue().withS(v)
|
@@ -17,13 +17,17 @@ trait PluginTask extends Task {
|
|
17
17
|
@ConfigDefault("ap-northeast-1")
|
18
18
|
def getRegion: String
|
19
19
|
|
20
|
+
@Config("scan_limit")
|
21
|
+
@ConfigDefault("0")
|
22
|
+
def getScanLimit: Long
|
23
|
+
|
24
|
+
@Config("record_limit")
|
25
|
+
@ConfigDefault("0")
|
26
|
+
def getRecordLimit: Long
|
27
|
+
|
20
28
|
@Config("table")
|
21
29
|
def getTable: String
|
22
30
|
|
23
|
-
@Config("limit")
|
24
|
-
@ConfigDefault("100")
|
25
|
-
def getLimit: Int
|
26
|
-
|
27
31
|
@Config("columns")
|
28
32
|
def getColumns: SchemaConfig
|
29
33
|
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-dynamodb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daisuke Higashi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-07-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name: bundler
|
15
|
-
version_requirements: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ~>
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.0'
|
20
14
|
requirement: !ruby/object:Gem::Requirement
|
21
15
|
requirements:
|
22
16
|
- - ~>
|
23
17
|
- !ruby/object:Gem::Version
|
24
18
|
version: '1.0'
|
19
|
+
name: bundler
|
25
20
|
prerelease: false
|
26
21
|
type: :development
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: rake
|
29
22
|
version_requirements: !ruby/object:Gem::Requirement
|
30
23
|
requirements:
|
31
|
-
- -
|
24
|
+
- - ~>
|
32
25
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
35
29
|
requirements:
|
36
30
|
- - '>='
|
37
31
|
- !ruby/object:Gem::Version
|
38
32
|
version: '10.0'
|
33
|
+
name: rake
|
39
34
|
prerelease: false
|
40
35
|
type: :development
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
41
|
description: '"Loads records from Dynamodb."'
|
42
42
|
email:
|
43
43
|
- daisuke.develop@gmail.com
|
@@ -65,9 +65,10 @@ files:
|
|
65
65
|
- classpath/aws-java-sdk-s3-1.9.22.jar
|
66
66
|
- classpath/commons-codec-1.6.jar
|
67
67
|
- classpath/commons-logging-1.1.3.jar
|
68
|
-
- classpath/embulk-input-dynamodb-0.0.
|
68
|
+
- classpath/embulk-input-dynamodb-0.0.3.jar
|
69
69
|
- classpath/httpclient-4.3.4.jar
|
70
70
|
- classpath/httpcore-4.3.2.jar
|
71
|
+
- classpath/joda-time-2.8.1.jar
|
71
72
|
- classpath/scala-library-2.11.5.jar
|
72
73
|
homepage: https://github.com/lulichn/embulk-input-dynamodb
|
73
74
|
licenses:
|