embulk-output-dynamodb 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,39 @@
1
+ package org.embulk.output.dynamodb;
2
+
3
+ import com.google.common.base.Optional;
4
+ import org.embulk.config.Config;
5
+ import org.embulk.config.ConfigDefault;
6
+ import org.embulk.spi.unit.LocalFile;
7
+
8
+ public interface AwsCredentialsTask
9
+ {
10
+ @Config("auth_method")
11
+ @ConfigDefault("\"basic\"")
12
+ String getAuthMethod();
13
+ void setAuthMethod(String method);
14
+
15
+ @Config("access_key_id")
16
+ @ConfigDefault("null")
17
+ Optional<String> getAccessKeyId();
18
+ void setAccessKeyId(Optional<String> value);
19
+
20
+ @Config("secret_access_key")
21
+ @ConfigDefault("null")
22
+ Optional<String> getSecretAccessKey();
23
+ void setSecretAccessKey(Optional<String> value);
24
+
25
+ @Config("session_token")
26
+ @ConfigDefault("null")
27
+ Optional<String> getSessionToken();
28
+ void setSessionToken(Optional<String> value);
29
+
30
+ @Config("profile_file")
31
+ @ConfigDefault("null")
32
+ Optional<LocalFile> getProfileFile();
33
+ void setProfileFile(Optional<LocalFile> value);
34
+
35
+ @Config("profile_name")
36
+ @ConfigDefault("null")
37
+ Optional<String> getProfileName();
38
+ void setProfileName(Optional<String> value);
39
+ }
@@ -0,0 +1,425 @@
1
+ package org.embulk.output.dynamodb;
2
+
3
+ import com.amazonaws.AmazonClientException;
4
+ import com.amazonaws.AmazonServiceException;
5
+ import com.amazonaws.services.dynamodbv2.document.DynamoDB;
6
+ import com.amazonaws.services.dynamodbv2.document.Item;
7
+ import com.amazonaws.services.dynamodbv2.document.TableWriteItems;
8
+ import com.fasterxml.jackson.annotation.JsonCreator;
9
+ import com.fasterxml.jackson.annotation.JsonValue;
10
+ import com.google.common.base.Optional;
11
+ import com.google.common.base.Throwables;
12
+ import com.google.inject.Inject;
13
+ import org.embulk.config.Config;
14
+ import org.embulk.config.ConfigDefault;
15
+ import org.embulk.config.ConfigDiff;
16
+ import org.embulk.config.ConfigException;
17
+ import org.embulk.config.ConfigSource;
18
+ import org.embulk.config.Task;
19
+ import org.embulk.config.TaskReport;
20
+ import org.embulk.config.TaskSource;
21
+ import org.embulk.spi.Column;
22
+ import org.embulk.spi.ColumnVisitor;
23
+ import org.embulk.spi.Exec;
24
+ import org.embulk.spi.OutputPlugin;
25
+ import org.embulk.spi.Page;
26
+ import org.embulk.spi.PageReader;
27
+ import org.embulk.spi.Schema;
28
+ import org.embulk.spi.TransactionalPageOutput;
29
+ import org.slf4j.Logger;
30
+
31
+ import java.util.List;
32
+ import java.util.Locale;
33
+
34
+ public class DynamodbOutputPlugin
35
+ implements OutputPlugin
36
+ {
37
+ public interface CapacityTask
38
+ extends Task
39
+ {
40
+ @Config("normal")
41
+ @ConfigDefault("null")
42
+ Optional<Long> getNormal();
43
+
44
+ @Config("raise")
45
+ @ConfigDefault("null")
46
+ Optional<Long> getRaise();
47
+ }
48
+
49
+ public interface PluginTask
50
+ extends AwsCredentialsTask, Task
51
+ {
52
+ @Config("mode")
53
+ @ConfigDefault("\"upsert\"")
54
+ Mode getMode();
55
+
56
+ @Config("region")
57
+ String getRegion();
58
+
59
+ @Config("auto_create_table")
60
+ @ConfigDefault("false")
61
+ Boolean getAutoCreateTable();
62
+
63
+ @Config("table")
64
+ String getTable();
65
+ void setTable(String table);
66
+
67
+ @Config("update_expression")
68
+ @ConfigDefault("null")
69
+ Optional<String> getUpdateExpression();
70
+
71
+ @Config("write_capacity_units")
72
+ @ConfigDefault("null")
73
+ Optional<CapacityTask> getWriteCapacityUnits();
74
+
75
+ @Config("read_capacity_units")
76
+ @ConfigDefault("null")
77
+ Optional<CapacityTask> getReadCapacityUnits();
78
+
79
+ @Config("max_put_items")
80
+ @ConfigDefault("25")
81
+ int getMaxPutItems();
82
+
83
+ @Config("endpoint")
84
+ @ConfigDefault("null")
85
+ Optional<String> getEndpoint();
86
+
87
+ @Config("primary_key")
88
+ Optional<String> getPrimaryKey();
89
+
90
+ @Config("primary_key_type")
91
+ Optional<String> getPrimaryKeyType();
92
+
93
+ @Config("sort_key")
94
+ @ConfigDefault("null")
95
+ Optional<String> getSortKey();
96
+
97
+ @Config("sort_key_type")
98
+ @ConfigDefault("null")
99
+ Optional<String> getSortKeyType();
100
+ }
101
+
102
+ private final Logger log;
103
+ private final DynamodbUtils dynamoDbUtils;
104
+
105
+ @Inject
106
+ public DynamodbOutputPlugin()
107
+ {
108
+ log = Exec.getLogger(getClass());
109
+ dynamoDbUtils = new DynamodbUtils();
110
+ }
111
+
112
+ @Override
113
+ public ConfigDiff transaction(ConfigSource config,
114
+ Schema schema, int taskCount,
115
+ OutputPlugin.Control control)
116
+ {
117
+ PluginTask task = config.loadConfig(PluginTask.class);
118
+ dynamoDbUtils.configCheck(task);
119
+
120
+ DynamoDB dynamoDB = null;
121
+ try {
122
+ dynamoDB = dynamoDbUtils.createDynamoDB(task);
123
+ log.info(String.format("Executing plugin with '%s' mode", task.getMode()));
124
+ task.setTable(dynamoDbUtils.generateTableName(task.getTable()));
125
+ if (task.getAutoCreateTable()) {
126
+ if (task.getPrimaryKey().isPresent() && task.getPrimaryKeyType().isPresent()) {
127
+ dynamoDbUtils.createTable(dynamoDB, task);
128
+ }
129
+ else {
130
+ throw new ConfigException("If auto_create_table is true, both primary_key and primary_key_type is necessary");
131
+ }
132
+ }
133
+ // Up to raised provisioned value
134
+ dynamoDbUtils.updateTableProvision(dynamoDB, task, true);
135
+
136
+ control.run(task.dump());
137
+
138
+ // Back to normal provisioned value
139
+ dynamoDbUtils.updateTableProvision(dynamoDB, task, false);
140
+ }
141
+ catch (AmazonClientException | InterruptedException ex) {
142
+ throw Throwables.propagate(ex);
143
+ }
144
+ finally {
145
+ if (dynamoDB != null) {
146
+ dynamoDB.shutdown();
147
+ }
148
+ }
149
+ return Exec.newConfigDiff();
150
+ }
151
+
152
+ @Override
153
+ public ConfigDiff resume(TaskSource taskSource,
154
+ Schema schema, int taskCount,
155
+ OutputPlugin.Control control)
156
+ {
157
+ // TODO
158
+ return Exec.newConfigDiff();
159
+ }
160
+
161
+ @Override
162
+ public void cleanup(TaskSource taskSource,
163
+ Schema schema, int taskCount,
164
+ List<TaskReport> successTaskReports)
165
+ {
166
+ }
167
+
168
+ @Override
169
+ public TransactionalPageOutput open(TaskSource taskSource, Schema schema, int taskIndex)
170
+ {
171
+ PluginTask task = taskSource.loadTask(PluginTask.class);
172
+ DynamodbPageOutput pageOutput = null;
173
+ try {
174
+ DynamoDB dynamoDB = dynamoDbUtils.createDynamoDB(task);
175
+ pageOutput = new DynamodbPageOutput(task, dynamoDB);
176
+ pageOutput.open(schema);
177
+ }
178
+ catch (AmazonClientException ex) {
179
+ Throwables.propagate(ex);
180
+ }
181
+ return pageOutput;
182
+ }
183
+
184
+ public static class DynamodbPageOutput implements TransactionalPageOutput
185
+ {
186
+ private Logger log;
187
+ private DynamodbUtils dynamodbUtils;
188
+ private DynamoDB dynamoDB;
189
+ private PageReader pageReader;
190
+ private int totalWroteItemSize = 0;
191
+ private int currentBufferItemSize = 0;
192
+ private TableWriteItems items;
193
+
194
+ private final String table;
195
+ private final Mode mode;
196
+ private final Optional<String> updateExpression;
197
+ private final String primaryKey;
198
+ private final int maxPutItems;
199
+
200
+ public DynamodbPageOutput(PluginTask task, DynamoDB dynamoDB)
201
+ {
202
+ this.log = Exec.getLogger(getClass());
203
+ this.dynamodbUtils = new DynamodbUtils();
204
+ this.dynamoDB = dynamoDB;
205
+ this.table = task.getTable();
206
+ this.mode = task.getMode();
207
+ this.updateExpression = task.getUpdateExpression();
208
+ this.primaryKey = (mode.equals(Mode.UPSERT_WITH_EXPRESSION)) ? dynamodbUtils.getPrimaryKeyName(dynamoDB, table) : null;
209
+ this.maxPutItems = task.getMaxPutItems();
210
+ }
211
+
212
+ void open(final Schema schema)
213
+ {
214
+ pageReader = new PageReader(schema);
215
+ if (mode.equals(Mode.UPSERT)) {
216
+ items = new TableWriteItems(table);
217
+ }
218
+ }
219
+
220
+ @Override
221
+ public void add(Page page)
222
+ {
223
+ pageReader.setPage(page);
224
+ while (pageReader.nextRecord()) {
225
+ try {
226
+ final Item item = new Item();
227
+
228
+ pageReader.getSchema().visitColumns(new ColumnVisitor() {
229
+ @Override
230
+ public void booleanColumn(Column column)
231
+ {
232
+ if (pageReader.isNull(column)) {
233
+ addNullValue(column.getName());
234
+ }
235
+ else {
236
+ item.withBoolean(column.getName(), pageReader.getBoolean(column));
237
+ }
238
+ }
239
+
240
+ @Override
241
+ public void longColumn(Column column)
242
+ {
243
+ if (pageReader.isNull(column)) {
244
+ addNullValue(column.getName());
245
+ }
246
+ else {
247
+ item.withLong(column.getName(), pageReader.getLong(column));
248
+ }
249
+ }
250
+
251
+ @Override
252
+ public void doubleColumn(Column column)
253
+ {
254
+ if (pageReader.isNull(column)) {
255
+ addNullValue(column.getName());
256
+ }
257
+ else {
258
+ item.withDouble(column.getName(), pageReader.getDouble(column));
259
+ }
260
+ }
261
+
262
+ @Override
263
+ public void stringColumn(Column column)
264
+ {
265
+ if (pageReader.isNull(column)) {
266
+ addNullValue(column.getName());
267
+ }
268
+ else {
269
+ item.withString(column.getName(), pageReader.getString(column));
270
+ }
271
+ }
272
+
273
+ @Override
274
+ public void timestampColumn(Column column)
275
+ {
276
+ if (pageReader.isNull(column)) {
277
+ addNullValue(column.getName());
278
+ }
279
+ else {
280
+ item.withString(column.getName(), String.valueOf(pageReader.getTimestamp(column)));
281
+ }
282
+ }
283
+
284
+ @Override
285
+ public void jsonColumn(Column column)
286
+ {
287
+ if (pageReader.isNull(column)) {
288
+ addNullValue(column.getName());
289
+ }
290
+ else {
291
+ item.withJSON(column.getName(), pageReader.getJson(column).toString());
292
+ }
293
+ }
294
+
295
+ private void addNullValue(String name)
296
+ {
297
+ item.withNull(name);
298
+ }
299
+ });
300
+
301
+ if (mode.equals(Mode.UPSERT)) {
302
+ addItemToBuffer(item);
303
+ }
304
+ else if (mode.equals(Mode.UPSERT_WITH_EXPRESSION)) {
305
+ updateItem(item);
306
+ }
307
+ }
308
+ catch (AmazonServiceException ex) {
309
+ throw Throwables.propagate(ex);
310
+ }
311
+ }
312
+ }
313
+
314
+ // upsert mode only
315
+ public void addItemToBuffer(Item item)
316
+ {
317
+ items.addItemToPut(item);
318
+ currentBufferItemSize++;
319
+ totalWroteItemSize++;
320
+ // @see http://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html
321
+ if (currentBufferItemSize >= maxPutItems) {
322
+ flush();
323
+ }
324
+ }
325
+
326
+ // upsert mode only
327
+ public void flush()
328
+ {
329
+ if (currentBufferItemSize > 0) {
330
+ try {
331
+ dynamodbUtils.batchWriteItem(dynamoDB, items);
332
+ if (totalWroteItemSize % 1000 == 0) {
333
+ log.info(String.format("Wrote %s items", totalWroteItemSize));
334
+ }
335
+ }
336
+ catch (AmazonServiceException ex) {
337
+ if (ex.getErrorCode().equals("ValidationException")) {
338
+ log.error(String.format("Data was invalid. data:%s", items.getItemsToPut()));
339
+ }
340
+ throw Throwables.propagate(ex);
341
+ }
342
+ finally {
343
+ // Re-initialize for next loop
344
+ items = new TableWriteItems(table);
345
+ currentBufferItemSize = 0;
346
+ }
347
+ }
348
+ }
349
+
350
+ // upsert_with_expression mode only
351
+ public void updateItem(Item item)
352
+ {
353
+ try {
354
+ dynamodbUtils.updateItem(dynamoDB, table, item, primaryKey, updateExpression);
355
+ totalWroteItemSize++;
356
+ if (totalWroteItemSize % 1000 == 0) {
357
+ log.info(String.format("Updated %s items", totalWroteItemSize));
358
+ }
359
+ }
360
+ catch (AmazonServiceException ex) {
361
+ if (ex.getErrorCode().equals("ValidationException")) {
362
+ log.error(String.format("Data was invalid. data:%s", items.getItemsToPut()));
363
+ }
364
+ throw Throwables.propagate(ex);
365
+ }
366
+ }
367
+
368
+ @Override
369
+ public void finish()
370
+ {
371
+ close();
372
+ log.info(String.format("Completed to write total %s items", totalWroteItemSize));
373
+ }
374
+
375
+ @Override
376
+ public void close()
377
+ {
378
+ if (mode.equals(Mode.UPSERT)) {
379
+ flush();
380
+ }
381
+ if (dynamoDB != null) {
382
+ dynamoDB.shutdown();
383
+ dynamoDB = null;
384
+ }
385
+ }
386
+
387
+ @Override
388
+ public void abort()
389
+ {
390
+ // nothing
391
+ }
392
+
393
+ @Override
394
+ public TaskReport commit()
395
+ {
396
+ return Exec.newTaskReport();
397
+ }
398
+ }
399
+
400
+ public enum Mode
401
+ {
402
+ UPSERT,
403
+ UPSERT_WITH_EXPRESSION;
404
+
405
+ @JsonValue
406
+ @Override
407
+ public String toString()
408
+ {
409
+ return name().toLowerCase(Locale.ENGLISH);
410
+ }
411
+
412
+ @JsonCreator
413
+ public static Mode fromString(String value)
414
+ {
415
+ switch (value) {
416
+ case "upsert":
417
+ return UPSERT;
418
+ case "upsert_with_expression":
419
+ return UPSERT_WITH_EXPRESSION;
420
+ default:
421
+ throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are upsert and upsert_with_expression", value));
422
+ }
423
+ }
424
+ }
425
+ }