embulk-input-dynamodb 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b499a6aa99576c0713943f0be90e2435b12ac017
4
- data.tar.gz: ebfbba02b9e38d42c97049c7742d4245a0f368f5
3
+ metadata.gz: d5bff72dc25c3fe5ce57da31798ac74d2ac7e8bf
4
+ data.tar.gz: a35560d2b4a29e5d5d71d2d2656210da7653e19b
5
5
  SHA512:
6
- metadata.gz: 220cdcde6ec63897c87a53b8cd656a1e2628e930410f8a7dca9873bf2d9ded244222ed867997282804a327f047c46eb51b9d7e0abf16666e96ebf6b1588ac149
7
- data.tar.gz: 5d951db675c6166da1b70b65d9d80584eadf0e0aaeb28d710a07741dd4a401955b80eef88619a124739d328a32b8d3b5328228be59b4c06970f871f1060659e1
6
+ metadata.gz: 41fd0fdc599929929d81519aefb58c72cb250f66650f31b8305d08ed26f77d41fe08fb8df275ada2383f388c6629a7d55c6b4211a519f74382781e6667cc85a6
7
+ data.tar.gz: d0f608e76092fe34e150a4d3e4076c7a3f94e36d6990f64d721c2628702492456e7d48cd713bca5e8442cab958e4e82d7d695fab5901d0d3c75ac3d5f340cd1a
data/README.md CHANGED
@@ -14,7 +14,8 @@
14
14
  If you don't specify keys, I'll use the profile configuration file for the default profile.
15
15
  - **region**: Region Name (string, default: ap-northeast-1)
16
16
  - **table**: Table Name (string, required)
17
- - **limit**: Scan Limit (integer, default: 100)
17
+ - **scan_limit**: DynamoDB 1time Scan Query size limit (Int, optional)
18
+ - **record_limit**: Max Record Search limit (Long, optional)
18
19
 
19
20
  ## Example
20
21
 
@@ -25,7 +26,6 @@ in:
25
26
  secret_key: YOUR_SECRET_KEY
26
27
  region: ap-northeast-1
27
28
  table: YOUR_TABLE_NAME
28
- limit: 1000
29
29
  columns:
30
30
  - {name: ColumnA, type: long}
31
31
  - {name: ColumnB, type: double}
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.0.2"
16
+ version = "0.0.3"
17
17
 
18
18
  dependencies {
19
19
  compile "org.scala-lang:scala-library:2.11.5"
@@ -1,17 +1,27 @@
1
1
  package org.embulk.input
2
2
 
3
- import com.amazonaws.ClientConfiguration
4
- import com.amazonaws.auth.{AWSCredentials, BasicAWSCredentials, AWSCredentialsProvider}
3
+ import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
4
+
5
5
  import com.amazonaws.auth.profile.ProfileCredentialsProvider
6
+ import com.amazonaws.auth.{AWSCredentials, AWSCredentialsProvider, BasicAWSCredentials}
6
7
  import com.amazonaws.regions.Regions
7
8
  import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
8
9
  import com.amazonaws.services.dynamodbv2.model._
10
+ import com.amazonaws.{AmazonClientException, ClientConfiguration}
9
11
  import org.embulk.spi._
10
12
 
11
- import java.util.{ArrayList => JArrayList, List => JList}
12
13
  import scala.collection.JavaConversions._
13
14
 
14
15
  object DynamoDBUtil {
16
+ def createClient(task: PluginTask): AmazonDynamoDBClient = {
17
+ try {
18
+ createClientUsingIAMRole(task)
19
+ } catch {
20
+ case e: AmazonClientException =>
21
+ createClientUsingCredentials(task)
22
+ }
23
+ }
24
+
15
25
  private def getCredentialsProvider(task: PluginTask): AWSCredentialsProvider = {
16
26
  {for {
17
27
  accessKey <- Option(task.getAccessKey.orNull)
@@ -23,18 +33,30 @@ object DynamoDBUtil {
23
33
  new BasicAWSCredentials(accessKey, secretKey)
24
34
  }
25
35
  }
26
- }}.getOrElse{
36
+ }}.getOrElse {
27
37
  new ProfileCredentialsProvider()
28
38
  }
29
39
  }
30
40
 
31
- def createClient(task: PluginTask): AmazonDynamoDBClient = {
41
+ private def createClientUsingIAMRole(task: PluginTask): AmazonDynamoDBClient = {
42
+ val client: AmazonDynamoDBClient = new AmazonDynamoDBClient(
43
+ new ClientConfiguration().withMaxConnections(10))
44
+ .withRegion(Regions.fromName(task.getRegion))
45
+
46
+ client.describeTable(task.getTable) // FIXME
47
+
48
+ client
49
+ }
50
+
51
+ private def createClientUsingCredentials(task: PluginTask): AmazonDynamoDBClient = {
32
52
  val credentialsProvider: AWSCredentialsProvider = getCredentialsProvider(task)
33
53
  val client: AmazonDynamoDBClient = new AmazonDynamoDBClient(
34
54
  credentialsProvider,
35
55
  new ClientConfiguration().withMaxConnections(10))
36
56
  .withRegion(Regions.fromName(task.getRegion))
37
57
 
58
+ client.describeTable(task.getTable) // FIXME
59
+
38
60
  client
39
61
  }
40
62
 
@@ -49,44 +71,68 @@ object DynamoDBUtil {
49
71
  attributes.add(column.getName)
50
72
  }
51
73
  val scanFilter: Map[String, Condition] = createScanFilter(task)
52
- val limit: Int = task.getLimit
53
-
54
- val request: ScanRequest = new ScanRequest()
55
- .withTableName(task.getTable)
56
- .withAttributesToGet(attributes)
57
- .withScanFilter(scanFilter)
58
- .withLimit(limit)
59
-
60
- val result: ScanResult = client.scan(request)
61
- result.getItems.foreach { item =>
62
- schema.getColumns.foreach { column =>
63
- val value = item.get(column.getName)
64
- column.getType.getName match {
65
- case "string" =>
66
- pageBuilder.setString(column, Option(value) map { _.getS } getOrElse { "" })
67
- case "long" =>
68
- pageBuilder.setLong(column, Option(value) map { _.getN.toLong } getOrElse { 0L })
69
- case "double" =>
70
- pageBuilder.setDouble(column, Option(value) map { _.getN.toDouble } getOrElse { 0D })
71
- case "boolean" =>
72
- pageBuilder.setBoolean(column, Option(value) map { _.getBOOL == true } getOrElse { false })
73
- case _ => /* Do nothing */
74
+ var evaluateKey: JMap[String, AttributeValue] = null
75
+
76
+ val scanLimit: Long = task.getScanLimit
77
+ val recordLimit: Long = task.getRecordLimit
78
+ var recordCount: Long = 0
79
+
80
+ do {
81
+ val batchSize = getScanLimit(scanLimit, recordLimit, recordCount)
82
+
83
+ val request: ScanRequest = new ScanRequest()
84
+ .withTableName(task.getTable)
85
+ .withAttributesToGet(attributes)
86
+ .withScanFilter(scanFilter)
87
+ .withExclusiveStartKey(evaluateKey)
88
+
89
+ if (batchSize > 0) {
90
+ request.setLimit(batchSize)
91
+ }
92
+
93
+ val result: ScanResult = client.scan(request)
94
+ evaluateKey = result.getLastEvaluatedKey
95
+
96
+ result.getItems.foreach { item =>
97
+ schema.getColumns.foreach { column =>
98
+ val value = item.get(column.getName)
99
+ column.getType.getName match {
100
+ case "string" =>
101
+ pageBuilder.setString(column, Option(value) map { _.getS } getOrElse { "" })
102
+ case "long" =>
103
+ pageBuilder.setLong(column, Option(value) map { _.getN.toLong } getOrElse { 0L })
104
+ case "double" =>
105
+ pageBuilder.setDouble(column, Option(value) map { _.getN.toDouble } getOrElse { 0D })
106
+ case "boolean" =>
107
+ pageBuilder.setBoolean(column, Option(value) map { _.getBOOL == true } getOrElse { false })
108
+ case _ => /* Do nothing */
109
+ }
74
110
  }
111
+ pageBuilder.addRecord()
112
+ recordCount += 1
75
113
  }
76
- pageBuilder.addRecord()
77
- }
114
+ println(s"$recordLimit $recordLimit $recordCount")
115
+ } while(evaluateKey != null && (recordLimit == 0 || recordLimit > recordCount))
78
116
 
79
117
  pageBuilder.finish()
80
118
  }
81
119
 
120
+ private def getScanLimit(scanLimit: Long, recordLimit: Long, recordCount: Long): Int = {
121
+ if (scanLimit > 0 && recordLimit > 0) {
122
+ math.min(scanLimit, recordLimit - recordCount).toInt
123
+ } else if (scanLimit > 0 || recordLimit > 0) {
124
+ math.max(scanLimit, recordLimit).toInt
125
+ } else { 0 }
126
+ }
127
+
82
128
  private def createScanFilter(task: PluginTask): Map[String, Condition] = {
83
129
  val filterMap = collection.mutable.HashMap[String, Condition]()
84
130
 
85
131
  Option(task.getFilters.orNull).map { filters =>
86
132
  filters.getFilters.map { filter =>
87
133
  val attributeValueList = collection.mutable.ArrayBuffer[AttributeValue]()
88
- attributeValueList += createAttrinuteValue(filter.getType, filter.getValue)
89
- Option(filter.getValue2).map { value2 => attributeValueList += createAttrinuteValue(filter.getType, value2) }
134
+ attributeValueList += createAttributeValue(filter.getType, filter.getValue)
135
+ Option(filter.getValue2).map { value2 => attributeValueList += createAttributeValue(filter.getType, value2) }
90
136
 
91
137
  filterMap += filter.getName -> new Condition()
92
138
  .withComparisonOperator(filter.getCondition)
@@ -97,7 +143,7 @@ object DynamoDBUtil {
97
143
  filterMap.toMap
98
144
  }
99
145
 
100
- private def createAttrinuteValue(t: String, v: String): AttributeValue = {
146
+ private def createAttributeValue(t: String, v: String): AttributeValue = {
101
147
  t match {
102
148
  case "string" =>
103
149
  new AttributeValue().withS(v)
@@ -17,13 +17,17 @@ trait PluginTask extends Task {
17
17
  @ConfigDefault("ap-northeast-1")
18
18
  def getRegion: String
19
19
 
20
+ @Config("scan_limit")
21
+ @ConfigDefault("0")
22
+ def getScanLimit: Long
23
+
24
+ @Config("record_limit")
25
+ @ConfigDefault("0")
26
+ def getRecordLimit: Long
27
+
20
28
  @Config("table")
21
29
  def getTable: String
22
30
 
23
- @Config("limit")
24
- @ConfigDefault("100")
25
- def getLimit: Int
26
-
27
31
  @Config("columns")
28
32
  def getColumns: SchemaConfig
29
33
 
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-dynamodb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daisuke Higashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-16 00:00:00.000000000 Z
11
+ date: 2015-07-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '1.0'
20
14
  requirement: !ruby/object:Gem::Requirement
21
15
  requirements:
22
16
  - - ~>
23
17
  - !ruby/object:Gem::Version
24
18
  version: '1.0'
19
+ name: bundler
25
20
  prerelease: false
26
21
  type: :development
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
22
  version_requirements: !ruby/object:Gem::Requirement
30
23
  requirements:
31
- - - '>='
24
+ - - ~>
32
25
  - !ruby/object:Gem::Version
33
- version: '10.0'
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
34
28
  requirement: !ruby/object:Gem::Requirement
35
29
  requirements:
36
30
  - - '>='
37
31
  - !ruby/object:Gem::Version
38
32
  version: '10.0'
33
+ name: rake
39
34
  prerelease: false
40
35
  type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
41
  description: '"Loads records from Dynamodb."'
42
42
  email:
43
43
  - daisuke.develop@gmail.com
@@ -65,9 +65,10 @@ files:
65
65
  - classpath/aws-java-sdk-s3-1.9.22.jar
66
66
  - classpath/commons-codec-1.6.jar
67
67
  - classpath/commons-logging-1.1.3.jar
68
- - classpath/embulk-input-dynamodb-0.0.2.jar
68
+ - classpath/embulk-input-dynamodb-0.0.3.jar
69
69
  - classpath/httpclient-4.3.4.jar
70
70
  - classpath/httpcore-4.3.2.jar
71
+ - classpath/joda-time-2.8.1.jar
71
72
  - classpath/scala-library-2.11.5.jar
72
73
  homepage: https://github.com/lulichn/embulk-input-dynamodb
73
74
  licenses: