embulk-output-dynamodb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,39 @@
1
+ package org.embulk.output.dynamodb;
2
+
3
+ import com.google.common.base.Optional;
4
+ import org.embulk.config.Config;
5
+ import org.embulk.config.ConfigDefault;
6
+ import org.embulk.spi.unit.LocalFile;
7
+
8
+ public interface AwsCredentialsTask
9
+ {
10
+ @Config("auth_method")
11
+ @ConfigDefault("\"basic\"")
12
+ String getAuthMethod();
13
+ void setAuthMethod(String method);
14
+
15
+ @Config("access_key_id")
16
+ @ConfigDefault("null")
17
+ Optional<String> getAccessKeyId();
18
+ void setAccessKeyId(Optional<String> value);
19
+
20
+ @Config("secret_access_key")
21
+ @ConfigDefault("null")
22
+ Optional<String> getSecretAccessKey();
23
+ void setSecretAccessKey(Optional<String> value);
24
+
25
+ @Config("session_token")
26
+ @ConfigDefault("null")
27
+ Optional<String> getSessionToken();
28
+ void setSessionToken(Optional<String> value);
29
+
30
+ @Config("profile_file")
31
+ @ConfigDefault("null")
32
+ Optional<LocalFile> getProfileFile();
33
+ void setProfileFile(Optional<LocalFile> value);
34
+
35
+ @Config("profile_name")
36
+ @ConfigDefault("null")
37
+ Optional<String> getProfileName();
38
+ void setProfileName(Optional<String> value);
39
+ }
@@ -0,0 +1,425 @@
1
+ package org.embulk.output.dynamodb;
2
+
3
+ import com.amazonaws.AmazonClientException;
4
+ import com.amazonaws.AmazonServiceException;
5
+ import com.amazonaws.services.dynamodbv2.document.DynamoDB;
6
+ import com.amazonaws.services.dynamodbv2.document.Item;
7
+ import com.amazonaws.services.dynamodbv2.document.TableWriteItems;
8
+ import com.fasterxml.jackson.annotation.JsonCreator;
9
+ import com.fasterxml.jackson.annotation.JsonValue;
10
+ import com.google.common.base.Optional;
11
+ import com.google.common.base.Throwables;
12
+ import com.google.inject.Inject;
13
+ import org.embulk.config.Config;
14
+ import org.embulk.config.ConfigDefault;
15
+ import org.embulk.config.ConfigDiff;
16
+ import org.embulk.config.ConfigException;
17
+ import org.embulk.config.ConfigSource;
18
+ import org.embulk.config.Task;
19
+ import org.embulk.config.TaskReport;
20
+ import org.embulk.config.TaskSource;
21
+ import org.embulk.spi.Column;
22
+ import org.embulk.spi.ColumnVisitor;
23
+ import org.embulk.spi.Exec;
24
+ import org.embulk.spi.OutputPlugin;
25
+ import org.embulk.spi.Page;
26
+ import org.embulk.spi.PageReader;
27
+ import org.embulk.spi.Schema;
28
+ import org.embulk.spi.TransactionalPageOutput;
29
+ import org.slf4j.Logger;
30
+
31
+ import java.util.List;
32
+ import java.util.Locale;
33
+
34
+ public class DynamodbOutputPlugin
35
+ implements OutputPlugin
36
+ {
37
+ public interface CapacityTask
38
+ extends Task
39
+ {
40
+ @Config("normal")
41
+ @ConfigDefault("null")
42
+ Optional<Long> getNormal();
43
+
44
+ @Config("raise")
45
+ @ConfigDefault("null")
46
+ Optional<Long> getRaise();
47
+ }
48
+
49
+ public interface PluginTask
50
+ extends AwsCredentialsTask, Task
51
+ {
52
+ @Config("mode")
53
+ @ConfigDefault("\"upsert\"")
54
+ Mode getMode();
55
+
56
+ @Config("region")
57
+ String getRegion();
58
+
59
+ @Config("auto_create_table")
60
+ @ConfigDefault("false")
61
+ Boolean getAutoCreateTable();
62
+
63
+ @Config("table")
64
+ String getTable();
65
+ void setTable(String table);
66
+
67
+ @Config("update_expression")
68
+ @ConfigDefault("null")
69
+ Optional<String> getUpdateExpression();
70
+
71
+ @Config("write_capacity_units")
72
+ @ConfigDefault("null")
73
+ Optional<CapacityTask> getWriteCapacityUnits();
74
+
75
+ @Config("read_capacity_units")
76
+ @ConfigDefault("null")
77
+ Optional<CapacityTask> getReadCapacityUnits();
78
+
79
+ @Config("max_put_items")
80
+ @ConfigDefault("25")
81
+ int getMaxPutItems();
82
+
83
+ @Config("endpoint")
84
+ @ConfigDefault("null")
85
+ Optional<String> getEndpoint();
86
+
87
+ @Config("primary_key")
88
+ Optional<String> getPrimaryKey();
89
+
90
+ @Config("primary_key_type")
91
+ Optional<String> getPrimaryKeyType();
92
+
93
+ @Config("sort_key")
94
+ @ConfigDefault("null")
95
+ Optional<String> getSortKey();
96
+
97
+ @Config("sort_key_type")
98
+ @ConfigDefault("null")
99
+ Optional<String> getSortKeyType();
100
+ }
101
+
102
+ private final Logger log;
103
+ private final DynamodbUtils dynamoDbUtils;
104
+
105
+ @Inject
106
+ public DynamodbOutputPlugin()
107
+ {
108
+ log = Exec.getLogger(getClass());
109
+ dynamoDbUtils = new DynamodbUtils();
110
+ }
111
+
112
+ @Override
113
+ public ConfigDiff transaction(ConfigSource config,
114
+ Schema schema, int taskCount,
115
+ OutputPlugin.Control control)
116
+ {
117
+ PluginTask task = config.loadConfig(PluginTask.class);
118
+ dynamoDbUtils.configCheck(task);
119
+
120
+ DynamoDB dynamoDB = null;
121
+ try {
122
+ dynamoDB = dynamoDbUtils.createDynamoDB(task);
123
+ log.info(String.format("Executing plugin with '%s' mode", task.getMode()));
124
+ task.setTable(dynamoDbUtils.generateTableName(task.getTable()));
125
+ if (task.getAutoCreateTable()) {
126
+ if (task.getPrimaryKey().isPresent() && task.getPrimaryKeyType().isPresent()) {
127
+ dynamoDbUtils.createTable(dynamoDB, task);
128
+ }
129
+ else {
130
+ throw new ConfigException("If auto_create_table is true, both primary_key and primary_key_type is necessary");
131
+ }
132
+ }
133
+ // Up to raised provisioned value
134
+ dynamoDbUtils.updateTableProvision(dynamoDB, task, true);
135
+
136
+ control.run(task.dump());
137
+
138
+ // Back to normal provisioned value
139
+ dynamoDbUtils.updateTableProvision(dynamoDB, task, false);
140
+ }
141
+ catch (AmazonClientException | InterruptedException ex) {
142
+ throw Throwables.propagate(ex);
143
+ }
144
+ finally {
145
+ if (dynamoDB != null) {
146
+ dynamoDB.shutdown();
147
+ }
148
+ }
149
+ return Exec.newConfigDiff();
150
+ }
151
+
152
+ @Override
153
+ public ConfigDiff resume(TaskSource taskSource,
154
+ Schema schema, int taskCount,
155
+ OutputPlugin.Control control)
156
+ {
157
+ // TODO
158
+ return Exec.newConfigDiff();
159
+ }
160
+
161
+ @Override
162
+ public void cleanup(TaskSource taskSource,
163
+ Schema schema, int taskCount,
164
+ List<TaskReport> successTaskReports)
165
+ {
166
+ }
167
+
168
+ @Override
169
+ public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
170
+ {
171
+ PluginTask task = taskSource.loadTask(PluginTask.class);
172
+ DynamodbPageOutput pageOutput = null;
173
+ try {
174
+ DynamoDB dynamoDB = dynamoDbUtils.createDynamoDB(task);
175
+ pageOutput = new DynamodbPageOutput(task, dynamoDB);
176
+ pageOutput.open(schema);
177
+ }
178
+ catch (AmazonClientException ex) {
179
+ Throwables.propagate(ex);
180
+ }
181
+ return pageOutput;
182
+ }
183
+
184
+ public static class DynamodbPageOutput implements TransactionalPageOutput
185
+ {
186
+ private Logger log;
187
+ private DynamodbUtils dynamodbUtils;
188
+ private DynamoDB dynamoDB;
189
+ private PageReader pageReader;
190
+ private int totalWroteItemSize = 0;
191
+ private int currentBufferItemSize = 0;
192
+ private TableWriteItems items;
193
+
194
+ private final String table;
195
+ private final Mode mode;
196
+ private final Optional<String> updateExpression;
197
+ private final String primaryKey;
198
+ private final int maxPutItems;
199
+
200
+ public DynamodbPageOutput(PluginTask task, DynamoDB dynamoDB)
201
+ {
202
+ this.log = Exec.getLogger(getClass());
203
+ this.dynamodbUtils = new DynamodbUtils();
204
+ this.dynamoDB = dynamoDB;
205
+ this.table = task.getTable();
206
+ this.mode = task.getMode();
207
+ this.updateExpression = task.getUpdateExpression();
208
+ this.primaryKey = (mode.equals(Mode.UPSERT_WITH_EXPRESSION)) ? dynamodbUtils.getPrimaryKeyName(dynamoDB, table) : null;
209
+ this.maxPutItems = task.getMaxPutItems();
210
+ }
211
+
212
+ void open(final Schema schema)
213
+ {
214
+ pageReader = new PageReader(schema);
215
+ if (mode.equals(Mode.UPSERT)) {
216
+ items = new TableWriteItems(table);
217
+ }
218
+ }
219
+
220
+ @Override
221
+ public void add(Page page)
222
+ {
223
+ pageReader.setPage(page);
224
+ while (pageReader.nextRecord()) {
225
+ try {
226
+ final Item item = new Item();
227
+
228
+ pageReader.getSchema().visitColumns(new ColumnVisitor() {
229
+ @Override
230
+ public void booleanColumn(Column column)
231
+ {
232
+ if (pageReader.isNull(column)) {
233
+ addNullValue(column.getName());
234
+ }
235
+ else {
236
+ item.withBoolean(column.getName(), pageReader.getBoolean(column));
237
+ }
238
+ }
239
+
240
+ @Override
241
+ public void longColumn(Column column)
242
+ {
243
+ if (pageReader.isNull(column)) {
244
+ addNullValue(column.getName());
245
+ }
246
+ else {
247
+ item.withLong(column.getName(), pageReader.getLong(column));
248
+ }
249
+ }
250
+
251
+ @Override
252
+ public void doubleColumn(Column column)
253
+ {
254
+ if (pageReader.isNull(column)) {
255
+ addNullValue(column.getName());
256
+ }
257
+ else {
258
+ item.withDouble(column.getName(), pageReader.getDouble(column));
259
+ }
260
+ }
261
+
262
+ @Override
263
+ public void stringColumn(Column column)
264
+ {
265
+ if (pageReader.isNull(column)) {
266
+ addNullValue(column.getName());
267
+ }
268
+ else {
269
+ item.withString(column.getName(), pageReader.getString(column));
270
+ }
271
+ }
272
+
273
+ @Override
274
+ public void timestampColumn(Column column)
275
+ {
276
+ if (pageReader.isNull(column)) {
277
+ addNullValue(column.getName());
278
+ }
279
+ else {
280
+ item.withString(column.getName(), String.valueOf(pageReader.getTimestamp(column)));
281
+ }
282
+ }
283
+
284
+ @Override
285
+ public void jsonColumn(Column column)
286
+ {
287
+ if (pageReader.isNull(column)) {
288
+ addNullValue(column.getName());
289
+ }
290
+ else {
291
+ item.withJSON(column.getName(), pageReader.getJson(column).toString());
292
+ }
293
+ }
294
+
295
+ private void addNullValue(String name)
296
+ {
297
+ item.withNull(name);
298
+ }
299
+ });
300
+
301
+ if (mode.equals(Mode.UPSERT)) {
302
+ addItemToBuffer(item);
303
+ }
304
+ else if (mode.equals(Mode.UPSERT_WITH_EXPRESSION)) {
305
+ updateItem(item);
306
+ }
307
+ }
308
+ catch (AmazonServiceException ex) {
309
+ throw Throwables.propagate(ex);
310
+ }
311
+ }
312
+ }
313
+
314
+ // upsert mode only
315
+ public void addItemToBuffer(Item item)
316
+ {
317
+ items.addItemToPut(item);
318
+ currentBufferItemSize++;
319
+ totalWroteItemSize++;
320
+ // @see http://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html
321
+ if (currentBufferItemSize >= maxPutItems) {
322
+ flush();
323
+ }
324
+ }
325
+
326
+ // upsert mode only
327
+ public void flush()
328
+ {
329
+ if (currentBufferItemSize > 0) {
330
+ try {
331
+ dynamodbUtils.batchWriteItem(dynamoDB, items);
332
+ if (totalWroteItemSize % 1000 == 0) {
333
+ log.info(String.format("Wrote %s items", totalWroteItemSize));
334
+ }
335
+ }
336
+ catch (AmazonServiceException ex) {
337
+ if (ex.getErrorCode().equals("ValidationException")) {
338
+ log.error(String.format("Data was invalid. data:%s", items.getItemsToPut()));
339
+ }
340
+ throw Throwables.propagate(ex);
341
+ }
342
+ finally {
343
+ // Re-initialize for next loop
344
+ items = new TableWriteItems(table);
345
+ currentBufferItemSize = 0;
346
+ }
347
+ }
348
+ }
349
+
350
+ // upsert_with_expression mode only
351
+ public void updateItem(Item item)
352
+ {
353
+ try {
354
+ dynamodbUtils.updateItem(dynamoDB, table, item, primaryKey, updateExpression);
355
+ totalWroteItemSize++;
356
+ if (totalWroteItemSize % 1000 == 0) {
357
+ log.info(String.format("Updated %s items", totalWroteItemSize));
358
+ }
359
+ }
360
+ catch (AmazonServiceException ex) {
361
+ if (ex.getErrorCode().equals("ValidationException")) {
362
+ log.error(String.format("Data was invalid. data:%s", items.getItemsToPut()));
363
+ }
364
+ throw Throwables.propagate(ex);
365
+ }
366
+ }
367
+
368
+ @Override
369
+ public void finish()
370
+ {
371
+ close();
372
+ log.info(String.format("Completed to write total %s items", totalWroteItemSize));
373
+ }
374
+
375
+ @Override
376
+ public void close()
377
+ {
378
+ if (mode.equals(Mode.UPSERT)) {
379
+ flush();
380
+ }
381
+ if (dynamoDB != null) {
382
+ dynamoDB.shutdown();
383
+ dynamoDB = null;
384
+ }
385
+ }
386
+
387
+ @Override
388
+ public void abort()
389
+ {
390
+ // nothing
391
+ }
392
+
393
+ @Override
394
+ public TaskReport commit()
395
+ {
396
+ return Exec.newTaskReport();
397
+ }
398
+ }
399
+
400
+ public enum Mode
401
+ {
402
+ UPSERT,
403
+ UPSERT_WITH_EXPRESSION;
404
+
405
+ @JsonValue
406
+ @Override
407
+ public String toString()
408
+ {
409
+ return name().toLowerCase(Locale.ENGLISH);
410
+ }
411
+
412
+ @JsonCreator
413
+ public static Mode fromString(String value)
414
+ {
415
+ switch (value) {
416
+ case "upsert":
417
+ return UPSERT;
418
+ case "upsert_with_expression":
419
+ return UPSERT_WITH_EXPRESSION;
420
+ default:
421
+ throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are upsert and upsert_with_expression", value));
422
+ }
423
+ }
424
+ }
425
+ }