embulk-input-dynamodb 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b499a6aa99576c0713943f0be90e2435b12ac017
4
- data.tar.gz: ebfbba02b9e38d42c97049c7742d4245a0f368f5
3
+ metadata.gz: d5bff72dc25c3fe5ce57da31798ac74d2ac7e8bf
4
+ data.tar.gz: a35560d2b4a29e5d5d71d2d2656210da7653e19b
5
5
  SHA512:
6
- metadata.gz: 220cdcde6ec63897c87a53b8cd656a1e2628e930410f8a7dca9873bf2d9ded244222ed867997282804a327f047c46eb51b9d7e0abf16666e96ebf6b1588ac149
7
- data.tar.gz: 5d951db675c6166da1b70b65d9d80584eadf0e0aaeb28d710a07741dd4a401955b80eef88619a124739d328a32b8d3b5328228be59b4c06970f871f1060659e1
6
+ metadata.gz: 41fd0fdc599929929d81519aefb58c72cb250f66650f31b8305d08ed26f77d41fe08fb8df275ada2383f388c6629a7d55c6b4211a519f74382781e6667cc85a6
7
+ data.tar.gz: d0f608e76092fe34e150a4d3e4076c7a3f94e36d6990f64d721c2628702492456e7d48cd713bca5e8442cab958e4e82d7d695fab5901d0d3c75ac3d5f340cd1a
data/README.md CHANGED
@@ -14,7 +14,8 @@
14
14
  If you don't specify keys, I'll use the profile configuration file for the default profile.
15
15
  - **region**: Region Name (string, default: ap-northeast-1)
16
16
  - **table**: Table Name (string, required)
17
- - **limit**: Scan Limit (integer, default: 100)
17
+ - **scan_limit**: DynamoDB 1time Scan Query size limit (Int, optional)
18
+ - **record_limit**: Max Record Search limit (Long, optional)
18
19
 
19
20
  ## Example
20
21
 
@@ -25,7 +26,6 @@ in:
25
26
  secret_key: YOUR_SECRET_KEY
26
27
  region: ap-northeast-1
27
28
  table: YOUR_TABLE_NAME
28
- limit: 1000
29
29
  columns:
30
30
  - {name: ColumnA, type: long}
31
31
  - {name: ColumnB, type: double}
data/build.gradle CHANGED
@@ -13,7 +13,7 @@ configurations {
13
13
  provided
14
14
  }
15
15
 
16
- version = "0.0.2"
16
+ version = "0.0.3"
17
17
 
18
18
  dependencies {
19
19
  compile "org.scala-lang:scala-library:2.11.5"
@@ -1,17 +1,27 @@
1
1
  package org.embulk.input
2
2
 
3
- import com.amazonaws.ClientConfiguration
4
- import com.amazonaws.auth.{AWSCredentials, BasicAWSCredentials, AWSCredentialsProvider}
3
+ import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
4
+
5
5
  import com.amazonaws.auth.profile.ProfileCredentialsProvider
6
+ import com.amazonaws.auth.{AWSCredentials, AWSCredentialsProvider, BasicAWSCredentials}
6
7
  import com.amazonaws.regions.Regions
7
8
  import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient
8
9
  import com.amazonaws.services.dynamodbv2.model._
10
+ import com.amazonaws.{AmazonClientException, ClientConfiguration}
9
11
  import org.embulk.spi._
10
12
 
11
- import java.util.{ArrayList => JArrayList, List => JList}
12
13
  import scala.collection.JavaConversions._
13
14
 
14
15
  object DynamoDBUtil {
16
+ def createClient(task: PluginTask): AmazonDynamoDBClient = {
17
+ try {
18
+ createClientUsingIAMRole(task)
19
+ } catch {
20
+ case e: AmazonClientException =>
21
+ createClientUsingCredentials(task)
22
+ }
23
+ }
24
+
15
25
  private def getCredentialsProvider(task: PluginTask): AWSCredentialsProvider = {
16
26
  {for {
17
27
  accessKey <- Option(task.getAccessKey.orNull)
@@ -23,18 +33,30 @@ object DynamoDBUtil {
23
33
  new BasicAWSCredentials(accessKey, secretKey)
24
34
  }
25
35
  }
26
- }}.getOrElse{
36
+ }}.getOrElse {
27
37
  new ProfileCredentialsProvider()
28
38
  }
29
39
  }
30
40
 
31
- def createClient(task: PluginTask): AmazonDynamoDBClient = {
41
+ private def createClientUsingIAMRole(task: PluginTask): AmazonDynamoDBClient = {
42
+ val client: AmazonDynamoDBClient = new AmazonDynamoDBClient(
43
+ new ClientConfiguration().withMaxConnections(10))
44
+ .withRegion(Regions.fromName(task.getRegion))
45
+
46
+ client.describeTable(task.getTable) // FIXME
47
+
48
+ client
49
+ }
50
+
51
+ private def createClientUsingCredentials(task: PluginTask): AmazonDynamoDBClient = {
32
52
  val credentialsProvider: AWSCredentialsProvider = getCredentialsProvider(task)
33
53
  val client: AmazonDynamoDBClient = new AmazonDynamoDBClient(
34
54
  credentialsProvider,
35
55
  new ClientConfiguration().withMaxConnections(10))
36
56
  .withRegion(Regions.fromName(task.getRegion))
37
57
 
58
+ client.describeTable(task.getTable) // FIXME
59
+
38
60
  client
39
61
  }
40
62
 
@@ -49,44 +71,68 @@ object DynamoDBUtil {
49
71
  attributes.add(column.getName)
50
72
  }
51
73
  val scanFilter: Map[String, Condition] = createScanFilter(task)
52
- val limit: Int = task.getLimit
53
-
54
- val request: ScanRequest = new ScanRequest()
55
- .withTableName(task.getTable)
56
- .withAttributesToGet(attributes)
57
- .withScanFilter(scanFilter)
58
- .withLimit(limit)
59
-
60
- val result: ScanResult = client.scan(request)
61
- result.getItems.foreach { item =>
62
- schema.getColumns.foreach { column =>
63
- val value = item.get(column.getName)
64
- column.getType.getName match {
65
- case "string" =>
66
- pageBuilder.setString(column, Option(value) map { _.getS } getOrElse { "" })
67
- case "long" =>
68
- pageBuilder.setLong(column, Option(value) map { _.getN.toLong } getOrElse { 0L })
69
- case "double" =>
70
- pageBuilder.setDouble(column, Option(value) map { _.getN.toDouble } getOrElse { 0D })
71
- case "boolean" =>
72
- pageBuilder.setBoolean(column, Option(value) map { _.getBOOL == true } getOrElse { false })
73
- case _ => /* Do nothing */
74
+ var evaluateKey: JMap[String, AttributeValue] = null
75
+
76
+ val scanLimit: Long = task.getScanLimit
77
+ val recordLimit: Long = task.getRecordLimit
78
+ var recordCount: Long = 0
79
+
80
+ do {
81
+ val batchSize = getScanLimit(scanLimit, recordLimit, recordCount)
82
+
83
+ val request: ScanRequest = new ScanRequest()
84
+ .withTableName(task.getTable)
85
+ .withAttributesToGet(attributes)
86
+ .withScanFilter(scanFilter)
87
+ .withExclusiveStartKey(evaluateKey)
88
+
89
+ if (batchSize > 0) {
90
+ request.setLimit(batchSize)
91
+ }
92
+
93
+ val result: ScanResult = client.scan(request)
94
+ evaluateKey = result.getLastEvaluatedKey
95
+
96
+ result.getItems.foreach { item =>
97
+ schema.getColumns.foreach { column =>
98
+ val value = item.get(column.getName)
99
+ column.getType.getName match {
100
+ case "string" =>
101
+ pageBuilder.setString(column, Option(value) map { _.getS } getOrElse { "" })
102
+ case "long" =>
103
+ pageBuilder.setLong(column, Option(value) map { _.getN.toLong } getOrElse { 0L })
104
+ case "double" =>
105
+ pageBuilder.setDouble(column, Option(value) map { _.getN.toDouble } getOrElse { 0D })
106
+ case "boolean" =>
107
+ pageBuilder.setBoolean(column, Option(value) map { _.getBOOL == true } getOrElse { false })
108
+ case _ => /* Do nothing */
109
+ }
74
110
  }
111
+ pageBuilder.addRecord()
112
+ recordCount += 1
75
113
  }
76
- pageBuilder.addRecord()
77
- }
114
+ println(s"$recordLimit $recordLimit $recordCount")
115
+ } while(evaluateKey != null && (recordLimit == 0 || recordLimit > recordCount))
78
116
 
79
117
  pageBuilder.finish()
80
118
  }
81
119
 
120
+ private def getScanLimit(scanLimit: Long, recordLimit: Long, recordCount: Long): Int = {
121
+ if (scanLimit > 0 && recordLimit > 0) {
122
+ math.min(scanLimit, recordLimit - recordCount).toInt
123
+ } else if (scanLimit > 0 || recordLimit > 0) {
124
+ math.max(scanLimit, recordLimit).toInt
125
+ } else { 0 }
126
+ }
127
+
82
128
  private def createScanFilter(task: PluginTask): Map[String, Condition] = {
83
129
  val filterMap = collection.mutable.HashMap[String, Condition]()
84
130
 
85
131
  Option(task.getFilters.orNull).map { filters =>
86
132
  filters.getFilters.map { filter =>
87
133
  val attributeValueList = collection.mutable.ArrayBuffer[AttributeValue]()
88
- attributeValueList += createAttrinuteValue(filter.getType, filter.getValue)
89
- Option(filter.getValue2).map { value2 => attributeValueList += createAttrinuteValue(filter.getType, value2) }
134
+ attributeValueList += createAttributeValue(filter.getType, filter.getValue)
135
+ Option(filter.getValue2).map { value2 => attributeValueList += createAttributeValue(filter.getType, value2) }
90
136
 
91
137
  filterMap += filter.getName -> new Condition()
92
138
  .withComparisonOperator(filter.getCondition)
@@ -97,7 +143,7 @@ object DynamoDBUtil {
97
143
  filterMap.toMap
98
144
  }
99
145
 
100
- private def createAttrinuteValue(t: String, v: String): AttributeValue = {
146
+ private def createAttributeValue(t: String, v: String): AttributeValue = {
101
147
  t match {
102
148
  case "string" =>
103
149
  new AttributeValue().withS(v)
@@ -17,13 +17,17 @@ trait PluginTask extends Task {
17
17
  @ConfigDefault("ap-northeast-1")
18
18
  def getRegion: String
19
19
 
20
+ @Config("scan_limit")
21
+ @ConfigDefault("0")
22
+ def getScanLimit: Long
23
+
24
+ @Config("record_limit")
25
+ @ConfigDefault("0")
26
+ def getRecordLimit: Long
27
+
20
28
  @Config("table")
21
29
  def getTable: String
22
30
 
23
- @Config("limit")
24
- @ConfigDefault("100")
25
- def getLimit: Int
26
-
27
31
  @Config("columns")
28
32
  def getColumns: SchemaConfig
29
33
 
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-dynamodb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daisuke Higashi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-16 00:00:00.000000000 Z
11
+ date: 2015-07-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: bundler
15
- version_requirements: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ~>
18
- - !ruby/object:Gem::Version
19
- version: '1.0'
20
14
  requirement: !ruby/object:Gem::Requirement
21
15
  requirements:
22
16
  - - ~>
23
17
  - !ruby/object:Gem::Version
24
18
  version: '1.0'
19
+ name: bundler
25
20
  prerelease: false
26
21
  type: :development
27
- - !ruby/object:Gem::Dependency
28
- name: rake
29
22
  version_requirements: !ruby/object:Gem::Requirement
30
23
  requirements:
31
- - - '>='
24
+ - - ~>
32
25
  - !ruby/object:Gem::Version
33
- version: '10.0'
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
34
28
  requirement: !ruby/object:Gem::Requirement
35
29
  requirements:
36
30
  - - '>='
37
31
  - !ruby/object:Gem::Version
38
32
  version: '10.0'
33
+ name: rake
39
34
  prerelease: false
40
35
  type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
41
  description: '"Loads records from Dynamodb."'
42
42
  email:
43
43
  - daisuke.develop@gmail.com
@@ -65,9 +65,10 @@ files:
65
65
  - classpath/aws-java-sdk-s3-1.9.22.jar
66
66
  - classpath/commons-codec-1.6.jar
67
67
  - classpath/commons-logging-1.1.3.jar
68
- - classpath/embulk-input-dynamodb-0.0.2.jar
68
+ - classpath/embulk-input-dynamodb-0.0.3.jar
69
69
  - classpath/httpclient-4.3.4.jar
70
70
  - classpath/httpcore-4.3.2.jar
71
+ - classpath/joda-time-2.8.1.jar
71
72
  - classpath/scala-library-2.11.5.jar
72
73
  homepage: https://github.com/lulichn/embulk-input-dynamodb
73
74
  licenses: