embulk-output-dynamodb 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/CHANGELOG.md +1 -0
- data/README.md +210 -0
- data/build.gradle +92 -0
- data/config/checkstyle/checkstyle.xml +130 -0
- data/config/checkstyle/default.xml +110 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +160 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/output/dynamodb.rb +3 -0
- data/settings.gradle +2 -0
- data/src/main/java/org/embulk/output/dynamodb/AwsCredentials.java +182 -0
- data/src/main/java/org/embulk/output/dynamodb/AwsCredentialsTask.java +39 -0
- data/src/main/java/org/embulk/output/dynamodb/DynamodbOutputPlugin.java +425 -0
- data/src/main/java/org/embulk/output/dynamodb/DynamodbUtils.java +361 -0
- data/src/test/java/org/embulk/output/dynamodb/TestDynamodbOutputPlugin.java +249 -0
- data/src/test/java/org/embulk/output/dynamodb/TestDynamodbUtils.java +10 -0
- data/src/test/resources/sample_01.csv +5 -0
- metadata +100 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
package org.embulk.output.dynamodb;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import org.embulk.config.Config;
|
5
|
+
import org.embulk.config.ConfigDefault;
|
6
|
+
import org.embulk.spi.unit.LocalFile;
|
7
|
+
|
8
|
+
public interface AwsCredentialsTask
|
9
|
+
{
|
10
|
+
@Config("auth_method")
|
11
|
+
@ConfigDefault("\"basic\"")
|
12
|
+
String getAuthMethod();
|
13
|
+
void setAuthMethod(String method);
|
14
|
+
|
15
|
+
@Config("access_key_id")
|
16
|
+
@ConfigDefault("null")
|
17
|
+
Optional<String> getAccessKeyId();
|
18
|
+
void setAccessKeyId(Optional<String> value);
|
19
|
+
|
20
|
+
@Config("secret_access_key")
|
21
|
+
@ConfigDefault("null")
|
22
|
+
Optional<String> getSecretAccessKey();
|
23
|
+
void setSecretAccessKey(Optional<String> value);
|
24
|
+
|
25
|
+
@Config("session_token")
|
26
|
+
@ConfigDefault("null")
|
27
|
+
Optional<String> getSessionToken();
|
28
|
+
void setSessionToken(Optional<String> value);
|
29
|
+
|
30
|
+
@Config("profile_file")
|
31
|
+
@ConfigDefault("null")
|
32
|
+
Optional<LocalFile> getProfileFile();
|
33
|
+
void setProfileFile(Optional<LocalFile> value);
|
34
|
+
|
35
|
+
@Config("profile_name")
|
36
|
+
@ConfigDefault("null")
|
37
|
+
Optional<String> getProfileName();
|
38
|
+
void setProfileName(Optional<String> value);
|
39
|
+
}
|
@@ -0,0 +1,425 @@
|
|
1
|
+
package org.embulk.output.dynamodb;
|
2
|
+
|
3
|
+
import com.amazonaws.AmazonClientException;
|
4
|
+
import com.amazonaws.AmazonServiceException;
|
5
|
+
import com.amazonaws.services.dynamodbv2.document.DynamoDB;
|
6
|
+
import com.amazonaws.services.dynamodbv2.document.Item;
|
7
|
+
import com.amazonaws.services.dynamodbv2.document.TableWriteItems;
|
8
|
+
import com.fasterxml.jackson.annotation.JsonCreator;
|
9
|
+
import com.fasterxml.jackson.annotation.JsonValue;
|
10
|
+
import com.google.common.base.Optional;
|
11
|
+
import com.google.common.base.Throwables;
|
12
|
+
import com.google.inject.Inject;
|
13
|
+
import org.embulk.config.Config;
|
14
|
+
import org.embulk.config.ConfigDefault;
|
15
|
+
import org.embulk.config.ConfigDiff;
|
16
|
+
import org.embulk.config.ConfigException;
|
17
|
+
import org.embulk.config.ConfigSource;
|
18
|
+
import org.embulk.config.Task;
|
19
|
+
import org.embulk.config.TaskReport;
|
20
|
+
import org.embulk.config.TaskSource;
|
21
|
+
import org.embulk.spi.Column;
|
22
|
+
import org.embulk.spi.ColumnVisitor;
|
23
|
+
import org.embulk.spi.Exec;
|
24
|
+
import org.embulk.spi.OutputPlugin;
|
25
|
+
import org.embulk.spi.Page;
|
26
|
+
import org.embulk.spi.PageReader;
|
27
|
+
import org.embulk.spi.Schema;
|
28
|
+
import org.embulk.spi.TransactionalPageOutput;
|
29
|
+
import org.slf4j.Logger;
|
30
|
+
|
31
|
+
import java.util.List;
|
32
|
+
import java.util.Locale;
|
33
|
+
|
34
|
+
public class DynamodbOutputPlugin
|
35
|
+
implements OutputPlugin
|
36
|
+
{
|
37
|
+
public interface CapacityTask
|
38
|
+
extends Task
|
39
|
+
{
|
40
|
+
@Config("normal")
|
41
|
+
@ConfigDefault("null")
|
42
|
+
Optional<Long> getNormal();
|
43
|
+
|
44
|
+
@Config("raise")
|
45
|
+
@ConfigDefault("null")
|
46
|
+
Optional<Long> getRaise();
|
47
|
+
}
|
48
|
+
|
49
|
+
public interface PluginTask
|
50
|
+
extends AwsCredentialsTask, Task
|
51
|
+
{
|
52
|
+
@Config("mode")
|
53
|
+
@ConfigDefault("\"upsert\"")
|
54
|
+
Mode getMode();
|
55
|
+
|
56
|
+
@Config("region")
|
57
|
+
String getRegion();
|
58
|
+
|
59
|
+
@Config("auto_create_table")
|
60
|
+
@ConfigDefault("false")
|
61
|
+
Boolean getAutoCreateTable();
|
62
|
+
|
63
|
+
@Config("table")
|
64
|
+
String getTable();
|
65
|
+
void setTable(String table);
|
66
|
+
|
67
|
+
@Config("update_expression")
|
68
|
+
@ConfigDefault("null")
|
69
|
+
Optional<String> getUpdateExpression();
|
70
|
+
|
71
|
+
@Config("write_capacity_units")
|
72
|
+
@ConfigDefault("null")
|
73
|
+
Optional<CapacityTask> getWriteCapacityUnits();
|
74
|
+
|
75
|
+
@Config("read_capacity_units")
|
76
|
+
@ConfigDefault("null")
|
77
|
+
Optional<CapacityTask> getReadCapacityUnits();
|
78
|
+
|
79
|
+
@Config("max_put_items")
|
80
|
+
@ConfigDefault("25")
|
81
|
+
int getMaxPutItems();
|
82
|
+
|
83
|
+
@Config("endpoint")
|
84
|
+
@ConfigDefault("null")
|
85
|
+
Optional<String> getEndpoint();
|
86
|
+
|
87
|
+
@Config("primary_key")
|
88
|
+
Optional<String> getPrimaryKey();
|
89
|
+
|
90
|
+
@Config("primary_key_type")
|
91
|
+
Optional<String> getPrimaryKeyType();
|
92
|
+
|
93
|
+
@Config("sort_key")
|
94
|
+
@ConfigDefault("null")
|
95
|
+
Optional<String> getSortKey();
|
96
|
+
|
97
|
+
@Config("sort_key_type")
|
98
|
+
@ConfigDefault("null")
|
99
|
+
Optional<String> getSortKeyType();
|
100
|
+
}
|
101
|
+
|
102
|
+
private final Logger log;
|
103
|
+
private final DynamodbUtils dynamoDbUtils;
|
104
|
+
|
105
|
+
@Inject
|
106
|
+
public DynamodbOutputPlugin()
|
107
|
+
{
|
108
|
+
log = Exec.getLogger(getClass());
|
109
|
+
dynamoDbUtils = new DynamodbUtils();
|
110
|
+
}
|
111
|
+
|
112
|
+
@Override
|
113
|
+
public ConfigDiff transaction(ConfigSource config,
|
114
|
+
Schema schema, int taskCount,
|
115
|
+
OutputPlugin.Control control)
|
116
|
+
{
|
117
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
118
|
+
dynamoDbUtils.configCheck(task);
|
119
|
+
|
120
|
+
DynamoDB dynamoDB = null;
|
121
|
+
try {
|
122
|
+
dynamoDB = dynamoDbUtils.createDynamoDB(task);
|
123
|
+
log.info(String.format("Executing plugin with '%s' mode", task.getMode()));
|
124
|
+
task.setTable(dynamoDbUtils.generateTableName(task.getTable()));
|
125
|
+
if (task.getAutoCreateTable()) {
|
126
|
+
if (task.getPrimaryKey().isPresent() && task.getPrimaryKeyType().isPresent()) {
|
127
|
+
dynamoDbUtils.createTable(dynamoDB, task);
|
128
|
+
}
|
129
|
+
else {
|
130
|
+
throw new ConfigException("If auto_create_table is true, both primary_key and primary_key_type is necessary");
|
131
|
+
}
|
132
|
+
}
|
133
|
+
// Up to raised provisioned value
|
134
|
+
dynamoDbUtils.updateTableProvision(dynamoDB, task, true);
|
135
|
+
|
136
|
+
control.run(task.dump());
|
137
|
+
|
138
|
+
// Back to normal provisioned value
|
139
|
+
dynamoDbUtils.updateTableProvision(dynamoDB, task, false);
|
140
|
+
}
|
141
|
+
catch (AmazonClientException | InterruptedException ex) {
|
142
|
+
throw Throwables.propagate(ex);
|
143
|
+
}
|
144
|
+
finally {
|
145
|
+
if (dynamoDB != null) {
|
146
|
+
dynamoDB.shutdown();
|
147
|
+
}
|
148
|
+
}
|
149
|
+
return Exec.newConfigDiff();
|
150
|
+
}
|
151
|
+
|
152
|
+
@Override
|
153
|
+
public ConfigDiff resume(TaskSource taskSource,
|
154
|
+
Schema schema, int taskCount,
|
155
|
+
OutputPlugin.Control control)
|
156
|
+
{
|
157
|
+
// TODO
|
158
|
+
return Exec.newConfigDiff();
|
159
|
+
}
|
160
|
+
|
161
|
+
@Override
|
162
|
+
public void cleanup(TaskSource taskSource,
|
163
|
+
Schema schema, int taskCount,
|
164
|
+
List<TaskReport> successTaskReports)
|
165
|
+
{
|
166
|
+
}
|
167
|
+
|
168
|
+
@Override
|
169
|
+
public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
|
170
|
+
{
|
171
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
172
|
+
DynamodbPageOutput pageOutput = null;
|
173
|
+
try {
|
174
|
+
DynamoDB dynamoDB = dynamoDbUtils.createDynamoDB(task);
|
175
|
+
pageOutput = new DynamodbPageOutput(task, dynamoDB);
|
176
|
+
pageOutput.open(schema);
|
177
|
+
}
|
178
|
+
catch (AmazonClientException ex) {
|
179
|
+
Throwables.propagate(ex);
|
180
|
+
}
|
181
|
+
return pageOutput;
|
182
|
+
}
|
183
|
+
|
184
|
+
public static class DynamodbPageOutput implements TransactionalPageOutput
|
185
|
+
{
|
186
|
+
private Logger log;
|
187
|
+
private DynamodbUtils dynamodbUtils;
|
188
|
+
private DynamoDB dynamoDB;
|
189
|
+
private PageReader pageReader;
|
190
|
+
private int totalWroteItemSize = 0;
|
191
|
+
private int currentBufferItemSize = 0;
|
192
|
+
private TableWriteItems items;
|
193
|
+
|
194
|
+
private final String table;
|
195
|
+
private final Mode mode;
|
196
|
+
private final Optional<String> updateExpression;
|
197
|
+
private final String primaryKey;
|
198
|
+
private final int maxPutItems;
|
199
|
+
|
200
|
+
public DynamodbPageOutput(PluginTask task, DynamoDB dynamoDB)
|
201
|
+
{
|
202
|
+
this.log = Exec.getLogger(getClass());
|
203
|
+
this.dynamodbUtils = new DynamodbUtils();
|
204
|
+
this.dynamoDB = dynamoDB;
|
205
|
+
this.table = task.getTable();
|
206
|
+
this.mode = task.getMode();
|
207
|
+
this.updateExpression = task.getUpdateExpression();
|
208
|
+
this.primaryKey = (mode.equals(Mode.UPSERT_WITH_EXPRESSION)) ? dynamodbUtils.getPrimaryKeyName(dynamoDB, table) : null;
|
209
|
+
this.maxPutItems = task.getMaxPutItems();
|
210
|
+
}
|
211
|
+
|
212
|
+
void open(final Schema schema)
|
213
|
+
{
|
214
|
+
pageReader = new PageReader(schema);
|
215
|
+
if (mode.equals(Mode.UPSERT)) {
|
216
|
+
items = new TableWriteItems(table);
|
217
|
+
}
|
218
|
+
}
|
219
|
+
|
220
|
+
@Override
|
221
|
+
public void add(Page page)
|
222
|
+
{
|
223
|
+
pageReader.setPage(page);
|
224
|
+
while (pageReader.nextRecord()) {
|
225
|
+
try {
|
226
|
+
final Item item = new Item();
|
227
|
+
|
228
|
+
pageReader.getSchema().visitColumns(new ColumnVisitor() {
|
229
|
+
@Override
|
230
|
+
public void booleanColumn(Column column)
|
231
|
+
{
|
232
|
+
if (pageReader.isNull(column)) {
|
233
|
+
addNullValue(column.getName());
|
234
|
+
}
|
235
|
+
else {
|
236
|
+
item.withBoolean(column.getName(), pageReader.getBoolean(column));
|
237
|
+
}
|
238
|
+
}
|
239
|
+
|
240
|
+
@Override
|
241
|
+
public void longColumn(Column column)
|
242
|
+
{
|
243
|
+
if (pageReader.isNull(column)) {
|
244
|
+
addNullValue(column.getName());
|
245
|
+
}
|
246
|
+
else {
|
247
|
+
item.withLong(column.getName(), pageReader.getLong(column));
|
248
|
+
}
|
249
|
+
}
|
250
|
+
|
251
|
+
@Override
|
252
|
+
public void doubleColumn(Column column)
|
253
|
+
{
|
254
|
+
if (pageReader.isNull(column)) {
|
255
|
+
addNullValue(column.getName());
|
256
|
+
}
|
257
|
+
else {
|
258
|
+
item.withDouble(column.getName(), pageReader.getDouble(column));
|
259
|
+
}
|
260
|
+
}
|
261
|
+
|
262
|
+
@Override
|
263
|
+
public void stringColumn(Column column)
|
264
|
+
{
|
265
|
+
if (pageReader.isNull(column)) {
|
266
|
+
addNullValue(column.getName());
|
267
|
+
}
|
268
|
+
else {
|
269
|
+
item.withString(column.getName(), pageReader.getString(column));
|
270
|
+
}
|
271
|
+
}
|
272
|
+
|
273
|
+
@Override
|
274
|
+
public void timestampColumn(Column column)
|
275
|
+
{
|
276
|
+
if (pageReader.isNull(column)) {
|
277
|
+
addNullValue(column.getName());
|
278
|
+
}
|
279
|
+
else {
|
280
|
+
item.withString(column.getName(), String.valueOf(pageReader.getTimestamp(column)));
|
281
|
+
}
|
282
|
+
}
|
283
|
+
|
284
|
+
@Override
|
285
|
+
public void jsonColumn(Column column)
|
286
|
+
{
|
287
|
+
if (pageReader.isNull(column)) {
|
288
|
+
addNullValue(column.getName());
|
289
|
+
}
|
290
|
+
else {
|
291
|
+
item.withJSON(column.getName(), pageReader.getJson(column).toString());
|
292
|
+
}
|
293
|
+
}
|
294
|
+
|
295
|
+
private void addNullValue(String name)
|
296
|
+
{
|
297
|
+
item.withNull(name);
|
298
|
+
}
|
299
|
+
});
|
300
|
+
|
301
|
+
if (mode.equals(Mode.UPSERT)) {
|
302
|
+
addItemToBuffer(item);
|
303
|
+
}
|
304
|
+
else if (mode.equals(Mode.UPSERT_WITH_EXPRESSION)) {
|
305
|
+
updateItem(item);
|
306
|
+
}
|
307
|
+
}
|
308
|
+
catch (AmazonServiceException ex) {
|
309
|
+
throw Throwables.propagate(ex);
|
310
|
+
}
|
311
|
+
}
|
312
|
+
}
|
313
|
+
|
314
|
+
// upsert mode only
|
315
|
+
public void addItemToBuffer(Item item)
|
316
|
+
{
|
317
|
+
items.addItemToPut(item);
|
318
|
+
currentBufferItemSize++;
|
319
|
+
totalWroteItemSize++;
|
320
|
+
// @see http://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html
|
321
|
+
if (currentBufferItemSize >= maxPutItems) {
|
322
|
+
flush();
|
323
|
+
}
|
324
|
+
}
|
325
|
+
|
326
|
+
// upsert mode only
|
327
|
+
public void flush()
|
328
|
+
{
|
329
|
+
if (currentBufferItemSize > 0) {
|
330
|
+
try {
|
331
|
+
dynamodbUtils.batchWriteItem(dynamoDB, items);
|
332
|
+
if (totalWroteItemSize % 1000 == 0) {
|
333
|
+
log.info(String.format("Wrote %s items", totalWroteItemSize));
|
334
|
+
}
|
335
|
+
}
|
336
|
+
catch (AmazonServiceException ex) {
|
337
|
+
if (ex.getErrorCode().equals("ValidationException")) {
|
338
|
+
log.error(String.format("Data was invalid. data:%s", items.getItemsToPut()));
|
339
|
+
}
|
340
|
+
throw Throwables.propagate(ex);
|
341
|
+
}
|
342
|
+
finally {
|
343
|
+
// Re-initialize for next loop
|
344
|
+
items = new TableWriteItems(table);
|
345
|
+
currentBufferItemSize = 0;
|
346
|
+
}
|
347
|
+
}
|
348
|
+
}
|
349
|
+
|
350
|
+
// upsert_with_expression mode only
|
351
|
+
public void updateItem(Item item)
|
352
|
+
{
|
353
|
+
try {
|
354
|
+
dynamodbUtils.updateItem(dynamoDB, table, item, primaryKey, updateExpression);
|
355
|
+
totalWroteItemSize++;
|
356
|
+
if (totalWroteItemSize % 1000 == 0) {
|
357
|
+
log.info(String.format("Updated %s items", totalWroteItemSize));
|
358
|
+
}
|
359
|
+
}
|
360
|
+
catch (AmazonServiceException ex) {
|
361
|
+
if (ex.getErrorCode().equals("ValidationException")) {
|
362
|
+
log.error(String.format("Data was invalid. data:%s", items.getItemsToPut()));
|
363
|
+
}
|
364
|
+
throw Throwables.propagate(ex);
|
365
|
+
}
|
366
|
+
}
|
367
|
+
|
368
|
+
@Override
|
369
|
+
public void finish()
|
370
|
+
{
|
371
|
+
close();
|
372
|
+
log.info(String.format("Completed to write total %s items", totalWroteItemSize));
|
373
|
+
}
|
374
|
+
|
375
|
+
@Override
|
376
|
+
public void close()
|
377
|
+
{
|
378
|
+
if (mode.equals(Mode.UPSERT)) {
|
379
|
+
flush();
|
380
|
+
}
|
381
|
+
if (dynamoDB != null) {
|
382
|
+
dynamoDB.shutdown();
|
383
|
+
dynamoDB = null;
|
384
|
+
}
|
385
|
+
}
|
386
|
+
|
387
|
+
@Override
|
388
|
+
public void abort()
|
389
|
+
{
|
390
|
+
// nothing
|
391
|
+
}
|
392
|
+
|
393
|
+
@Override
|
394
|
+
public TaskReport commit()
|
395
|
+
{
|
396
|
+
return Exec.newTaskReport();
|
397
|
+
}
|
398
|
+
}
|
399
|
+
|
400
|
+
public enum Mode
|
401
|
+
{
|
402
|
+
UPSERT,
|
403
|
+
UPSERT_WITH_EXPRESSION;
|
404
|
+
|
405
|
+
@JsonValue
|
406
|
+
@Override
|
407
|
+
public String toString()
|
408
|
+
{
|
409
|
+
return name().toLowerCase(Locale.ENGLISH);
|
410
|
+
}
|
411
|
+
|
412
|
+
@JsonCreator
|
413
|
+
public static Mode fromString(String value)
|
414
|
+
{
|
415
|
+
switch (value) {
|
416
|
+
case "upsert":
|
417
|
+
return UPSERT;
|
418
|
+
case "upsert_with_expression":
|
419
|
+
return UPSERT_WITH_EXPRESSION;
|
420
|
+
default:
|
421
|
+
throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are upsert and upsert_with_expression", value));
|
422
|
+
}
|
423
|
+
}
|
424
|
+
}
|
425
|
+
}
|