embulk-output-dynamodb 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,361 @@
1
+ package org.embulk.output.dynamodb;
2
+
3
+ import com.amazonaws.AmazonClientException;
4
+ import com.amazonaws.AmazonServiceException;
5
+ import com.amazonaws.ClientConfiguration;
6
+ import com.amazonaws.auth.AWSCredentialsProvider;
7
+ import com.amazonaws.regions.Regions;
8
+ import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient;
9
+ import com.amazonaws.services.dynamodbv2.document.BatchWriteItemOutcome;
10
+ import com.amazonaws.services.dynamodbv2.document.DynamoDB;
11
+ import com.amazonaws.services.dynamodbv2.document.Item;
12
+ import com.amazonaws.services.dynamodbv2.document.Table;
13
+ import com.amazonaws.services.dynamodbv2.document.TableWriteItems;
14
+ import com.amazonaws.services.dynamodbv2.model.AttributeDefinition;
15
+ import com.amazonaws.services.dynamodbv2.model.CreateTableRequest;
16
+ import com.amazonaws.services.dynamodbv2.model.KeySchemaElement;
17
+ import com.amazonaws.services.dynamodbv2.model.KeyType;
18
+ import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput;
19
+ import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
20
+ import com.amazonaws.services.dynamodbv2.model.ScalarAttributeType;
21
+ import com.amazonaws.services.dynamodbv2.model.TableDescription;
22
+ import com.amazonaws.services.dynamodbv2.model.WriteRequest;
23
+ import com.google.common.base.Optional;
24
+ import com.google.inject.Inject;
25
+ import org.embulk.config.ConfigException;
26
+ import org.embulk.config.UserDataException;
27
+ import org.embulk.spi.Exec;
28
+ import org.jruby.embed.ScriptingContainer;
29
+ import org.slf4j.Logger;
30
+
31
+ import java.util.ArrayList;
32
+ import java.util.HashMap;
33
+ import java.util.Iterator;
34
+ import java.util.List;
35
+ import java.util.Map;
36
+
37
+ public class DynamodbUtils
38
+ {
39
+ private final Logger log;
40
+
41
+ @Inject
42
+ public DynamodbUtils()
43
+ {
44
+ log = Exec.getLogger(getClass());
45
+ }
46
+
47
+ protected DynamoDB createDynamoDB(DynamodbOutputPlugin.PluginTask task)
48
+ {
49
+ DynamoDB dynamoDB;
50
+ try {
51
+ AmazonDynamoDBClient client = new AmazonDynamoDBClient(
52
+ getCredentialsProvider(task),
53
+ getClientConfiguration(task)
54
+ ).withRegion(Regions.fromName(task.getRegion()));
55
+
56
+ if (task.getEndpoint().isPresent()) {
57
+ client.setEndpoint(task.getEndpoint().get());
58
+ }
59
+
60
+ dynamoDB = new DynamoDB(client);
61
+ dynamoDB.getTable(task.getTable());
62
+ }
63
+ catch (AmazonServiceException ex) {
64
+ int statusCode = ex.getStatusCode();
65
+ if (statusCode == 400) {
66
+ throw new ConfigException(ex);
67
+ }
68
+ else {
69
+ throw new ConnectionException(ex);
70
+ }
71
+ }
72
+ catch (AmazonClientException ex) {
73
+ throw new ConnectionException(ex);
74
+ }
75
+ return dynamoDB;
76
+ }
77
+
78
+ protected ClientConfiguration getClientConfiguration(DynamodbOutputPlugin.PluginTask task)
79
+ {
80
+ ClientConfiguration clientConfig = new ClientConfiguration();
81
+
82
+ //clientConfig.setProtocol(Protocol.HTTP);
83
+ clientConfig.setMaxConnections(50); // SDK default: 50
84
+ clientConfig.setMaxErrorRetry(3); // SDK default: 3
85
+ clientConfig.setSocketTimeout(8 * 60 * 1000); // SDK default: 50*1000
86
+
87
+ return clientConfig;
88
+ }
89
+
90
+ private AWSCredentialsProvider getCredentialsProvider(DynamodbOutputPlugin.PluginTask task)
91
+ {
92
+ return AwsCredentials.getAWSCredentialsProvider(task);
93
+ }
94
+
95
+ protected void configCheck(DynamodbOutputPlugin.PluginTask task)
96
+ {
97
+ // @see http://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html
98
+ if (task.getMode().equals(DynamodbOutputPlugin.Mode.UPSERT)) {
99
+ if (task.getMaxPutItems() > 25) {
100
+ throw new ConfigException("'max_put_items' must less than or equal to 25");
101
+ }
102
+ }
103
+
104
+ if (task.getMode().equals(DynamodbOutputPlugin.Mode.UPSERT_WITH_EXPRESSION)) {
105
+ if (!task.getUpdateExpression().isPresent()) {
106
+ throw new ConfigException("'update_expression' is required when update mode");
107
+ }
108
+ }
109
+ }
110
+
111
+ protected void batchWriteItem(DynamoDB dynamoDB, TableWriteItems items)
112
+ {
113
+ BatchWriteItemOutcome outcome = dynamoDB.batchWriteItem(items);
114
+ int retryCount = 0;
115
+ try {
116
+ do {
117
+ Map<String, List<WriteRequest>> unprocessedItems = outcome.getUnprocessedItems();
118
+ // @see http://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_BatchWriteItem.html
119
+ // If DynamoDB returns any unprocessed items, you should retry the batch operation on those items.
120
+ // However, we strongly recommend that you use an exponential backoff algorithm
121
+ if (outcome.getUnprocessedItems().size() > 0) {
122
+ retryCount++;
123
+ if (retryCount >= 5) {
124
+ throw new ConnectionException("Retry count expired while executing batchWriteItem");
125
+ }
126
+ Thread.sleep(500 * retryCount);
127
+ log.warn("Retrieving the unprocessed items");
128
+ outcome = dynamoDB.batchWriteItemUnprocessed(unprocessedItems);
129
+ }
130
+ } while (outcome.getUnprocessedItems().size() > 0);
131
+ }
132
+ catch (InterruptedException ex) {
133
+ throw new ConnectionException("Retry batchWriteItem was interrupted");
134
+ }
135
+ }
136
+
137
+ protected void updateItem(DynamoDB dynamoDB, String tableName, Item item, String primaryKey, Optional<String> expression)
138
+ {
139
+ Object primaryKeyValue = null;
140
+ Map<String, String> attributeNames = new HashMap<>();
141
+ Map<String, Object> attributeValues = new HashMap<>();
142
+
143
+ Map<String, Object> itemMap = item.asMap();
144
+ for (Map.Entry<String, Object> e : itemMap.entrySet()) {
145
+ String keyName = e.getKey();
146
+ if (keyName.equals(primaryKey)) {
147
+ primaryKeyValue = e.getValue();
148
+ }
149
+ else {
150
+ if (expression.get().indexOf(keyName) > 0) {
151
+ attributeNames.put("#" + keyName, keyName);
152
+ attributeValues.put(":" + keyName, e.getValue());
153
+ }
154
+ }
155
+ }
156
+ log.debug("attribute names: " + attributeNames.toString());
157
+ log.debug("attribute values: " + attributeValues.toString());
158
+ log.debug(String.format("primary key %s:%s", primaryKey, primaryKeyValue));
159
+ Table table = dynamoDB.getTable(tableName);
160
+ table.updateItem(primaryKey, primaryKeyValue, expression.get(), attributeNames, attributeValues);
161
+ }
162
+
163
+ protected String getPrimaryKeyName(DynamoDB dynamoDB, String tableName)
164
+ {
165
+ Table table = dynamoDB.getTable(tableName);
166
+
167
+ TableDescription description = table.describe();
168
+ Iterator<KeySchemaElement> schema = description.getKeySchema().iterator();
169
+ String primaryKey = null;
170
+ while (schema.hasNext()) {
171
+ KeySchemaElement element = schema.next();
172
+ primaryKey = element.getAttributeName();
173
+ }
174
+ return primaryKey;
175
+ }
176
+
177
+ protected void createTable(DynamoDB dynamoDB, DynamodbOutputPlugin.PluginTask task)
178
+ throws InterruptedException
179
+ {
180
+ ArrayList<KeySchemaElement> keySchema = getKeySchemaElements(task);
181
+ ArrayList<AttributeDefinition> attributeDefinitions = getAttributeDefinitions(task);
182
+ ProvisionedThroughput provisionedThroughput = new ProvisionedThroughput()
183
+ .withReadCapacityUnits(task.getReadCapacityUnits().get().getNormal().get())
184
+ .withWriteCapacityUnits(task.getWriteCapacityUnits().get().getNormal().get());
185
+
186
+ dynamoDB.createTable(new CreateTableRequest()
187
+ .withTableName(task.getTable())
188
+ .withKeySchema(keySchema)
189
+ .withAttributeDefinitions(attributeDefinitions)
190
+ .withProvisionedThroughput(provisionedThroughput)
191
+ );
192
+
193
+ Table table = dynamoDB.getTable(task.getTable());
194
+ table.waitForActive();
195
+ log.info(String.format("Created table '%s'", task.getTable()));
196
+ }
197
+
198
+ protected void deleteTable(DynamoDB dynamoDB, String tableName)
199
+ throws InterruptedException
200
+ {
201
+ Table table = dynamoDB.getTable(tableName);
202
+ table.delete();
203
+ table.waitForDelete();
204
+ log.info(String.format("Deleted table '%s'", tableName));
205
+ }
206
+
207
+ protected boolean isExistsTable(DynamoDB dynamoDB, String tableName)
208
+ throws InterruptedException
209
+ {
210
+ Table table = dynamoDB.getTable(tableName);
211
+ TableDescription description = null;
212
+ try {
213
+ switch (table.describe().getTableStatus()) {
214
+ case "CREATING":
215
+ case "UPDATING":
216
+ table.waitForActive();
217
+ return true;
218
+ case "DELETING":
219
+ table.waitForDelete();
220
+ return true;
221
+ default:
222
+ return true;
223
+ }
224
+ }
225
+ catch (ResourceNotFoundException e) {
226
+ return false;
227
+ }
228
+ catch (AmazonClientException e) {
229
+ return false;
230
+ }
231
+ }
232
+
233
+ protected void updateTableProvision(DynamoDB dynamoDB, DynamodbOutputPlugin.PluginTask task, boolean isRaise)
234
+ throws InterruptedException
235
+ {
236
+ if (!task.getReadCapacityUnits().isPresent() && !task.getWriteCapacityUnits().isPresent()) {
237
+ return;
238
+ }
239
+
240
+ Boolean isNeedChange = false;
241
+
242
+ Table table = dynamoDB.getTable(task.getTable());
243
+ TableDescription description = table.describe();
244
+ long currentReadCapacityUnit = description.getProvisionedThroughput().getReadCapacityUnits();
245
+ long currentWriteCapacityUnit = description.getProvisionedThroughput().getWriteCapacityUnits();
246
+
247
+ ProvisionedThroughput throughput = new ProvisionedThroughput();
248
+ Optional<Long> readUnits = (isRaise) ? task.getReadCapacityUnits().get().getRaise() : task.getReadCapacityUnits().get().getNormal();
249
+ if (readUnits.isPresent()) {
250
+ Long readUnitsLong = readUnits.get();
251
+ if (currentReadCapacityUnit != readUnitsLong) {
252
+ throughput.withReadCapacityUnits(readUnitsLong);
253
+ isNeedChange = true;
254
+ }
255
+ }
256
+ Optional<Long> writeUnits = (isRaise) ? task.getWriteCapacityUnits().get().getRaise() : task.getWriteCapacityUnits().get().getNormal();
257
+ if (writeUnits.isPresent()) {
258
+ Long writeUnitsLong = writeUnits.get();
259
+ if (currentWriteCapacityUnit != writeUnitsLong) {
260
+ throughput.withWriteCapacityUnits(writeUnitsLong);
261
+ isNeedChange = true;
262
+ }
263
+ }
264
+
265
+ if (isNeedChange) {
266
+ table.updateTable(throughput);
267
+ log.info(String.format("Updated Provisioned Throughput of table[%s]. read_capacity_unit[%s], write_capacity_unit[%s]",
268
+ task.getTable(), readUnits.orNull(), writeUnits.orNull())
269
+ );
270
+ table.waitForActive();
271
+ }
272
+ else {
273
+ log.info(String.format("No Provisioned Throughput update is needed for table[%s]. Current value is read_capacity_unit[%s], write_capacity_unit[%s]",
274
+ task.getTable(), currentReadCapacityUnit, currentWriteCapacityUnit)
275
+ );
276
+ }
277
+ }
278
+
279
+ // Parse like "table_%Y_%m"(include pattern or not) format using Java is difficult. So use jRuby.
280
+ public String generateTableName(String tableName)
281
+ {
282
+ ScriptingContainer jruby = new ScriptingContainer();
283
+ return jruby.runScriptlet("Time.now.strftime('" + tableName + "')").toString();
284
+ }
285
+
286
+ private ArrayList<KeySchemaElement> getKeySchemaElements(DynamodbOutputPlugin.PluginTask task)
287
+ {
288
+ ArrayList<KeySchemaElement> keySchema = new ArrayList<>();
289
+ keySchema.add(new KeySchemaElement().withAttributeName(task.getPrimaryKey().get()).withKeyType(KeyType.HASH));
290
+ if (task.getSortKey().isPresent()) {
291
+ String sortKey = task.getSortKey().get();
292
+ keySchema.add(new KeySchemaElement().withAttributeName(sortKey).withKeyType(KeyType.RANGE));
293
+ }
294
+ return keySchema;
295
+ }
296
+
297
+ private ArrayList<AttributeDefinition> getAttributeDefinitions(DynamodbOutputPlugin.PluginTask task)
298
+ {
299
+ ArrayList<AttributeDefinition> attributeDefinitions = new ArrayList<>();
300
+ attributeDefinitions.add(
301
+ new AttributeDefinition()
302
+ .withAttributeName(task.getPrimaryKey().get())
303
+ .withAttributeType(getAttributeType(task.getPrimaryKeyType().get())));
304
+ if (task.getSortKey().isPresent()) {
305
+ String sortKey = task.getSortKey().get();
306
+ attributeDefinitions.add(
307
+ new AttributeDefinition()
308
+ .withAttributeName(sortKey)
309
+ .withAttributeType(getAttributeType(task.getSortKeyType().get())));
310
+ }
311
+ return attributeDefinitions;
312
+ }
313
+
314
+ private ScalarAttributeType getAttributeType(String type)
315
+ {
316
+ switch (type.toLowerCase()) {
317
+ case "string":
318
+ return ScalarAttributeType.S;
319
+ case "number":
320
+ return ScalarAttributeType.N;
321
+ case "binary":
322
+ return ScalarAttributeType.B;
323
+ default:
324
+ throw new UnknownScalarAttributeTypeException(type + " is invalid key type");
325
+ }
326
+ }
327
+
328
+ public class ConnectionException extends RuntimeException implements UserDataException
329
+ {
330
+ protected ConnectionException()
331
+ {
332
+ }
333
+
334
+ public ConnectionException(String message)
335
+ {
336
+ super(message);
337
+ }
338
+
339
+ public ConnectionException(Throwable cause)
340
+ {
341
+ super(cause);
342
+ }
343
+ }
344
+
345
+ public class UnknownScalarAttributeTypeException extends RuntimeException implements UserDataException
346
+ {
347
+ protected UnknownScalarAttributeTypeException()
348
+ {
349
+ }
350
+
351
+ public UnknownScalarAttributeTypeException(String message)
352
+ {
353
+ super(message);
354
+ }
355
+
356
+ public UnknownScalarAttributeTypeException(Throwable cause)
357
+ {
358
+ super(cause);
359
+ }
360
+ }
361
+ }
@@ -0,0 +1,249 @@
1
+ package org.embulk.output.dynamodb;
2
+
3
+ import com.amazonaws.services.dynamodbv2.document.DynamoDB;
4
+ import com.google.common.collect.ImmutableList;
5
+ import com.google.common.collect.ImmutableMap;
6
+ import com.google.common.collect.Lists;
7
+ import org.embulk.EmbulkTestRuntime;
8
+ import org.embulk.config.ConfigException;
9
+ import org.embulk.config.ConfigSource;
10
+ import org.embulk.config.TaskReport;
11
+ import org.embulk.config.TaskSource;
12
+ import org.embulk.output.dynamodb.DynamodbOutputPlugin.PluginTask;
13
+ import org.embulk.spi.Exec;
14
+ import org.embulk.spi.OutputPlugin;
15
+ import org.embulk.spi.Page;
16
+ import org.embulk.spi.PageTestUtils;
17
+ import org.embulk.spi.Schema;
18
+ import org.embulk.spi.TestPageBuilderReader.MockPageOutput;
19
+ import org.embulk.spi.TransactionalPageOutput;
20
+ import org.embulk.standards.CsvParserPlugin;
21
+ import org.junit.Before;
22
+ import org.junit.BeforeClass;
23
+ import org.junit.Rule;
24
+ import org.junit.Test;
25
+
26
+ import java.util.Arrays;
27
+ import java.util.List;
28
+
29
+ import static org.junit.Assert.assertEquals;
30
+
31
+ public class TestDynamodbOutputPlugin
32
+ {
33
+ private static String PATH_PREFIX;
34
+
35
+ private MockPageOutput pageOutput;
36
+
37
+ @BeforeClass
38
+ public static void initializeConstant()
39
+ {
40
+ PATH_PREFIX = DynamodbOutputPlugin.class.getClassLoader().getResource("sample_01.csv").getPath();
41
+ }
42
+
43
+ @Rule
44
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
45
+ private DynamodbOutputPlugin plugin;
46
+
47
+ @Before
48
+ public void createResources() throws Exception
49
+ {
50
+ ConfigSource config = config();
51
+ plugin = new DynamodbOutputPlugin();
52
+ PluginTask task = config.loadConfig(PluginTask.class);
53
+ pageOutput = new MockPageOutput();
54
+
55
+ DynamodbUtils dynamoDbUtils = new DynamodbUtils();
56
+ DynamoDB dynamoDB = null;
57
+ try {
58
+ dynamoDB = dynamoDbUtils.createDynamoDB(task);
59
+ if (dynamoDbUtils.isExistsTable(dynamoDB, task.getTable())) {
60
+ dynamoDbUtils.deleteTable(dynamoDB, task.getTable());
61
+ }
62
+ dynamoDbUtils.createTable(dynamoDB, task);
63
+ }
64
+ finally {
65
+ if (dynamoDB != null) {
66
+ dynamoDB.shutdown();
67
+ }
68
+ }
69
+ }
70
+
71
+ @Test
72
+ public void testDefaultValues()
73
+ {
74
+ ConfigSource config = config();
75
+ DynamodbOutputPlugin.PluginTask task = config.loadConfig(PluginTask.class);
76
+ assertEquals("us-west-1", task.getRegion());
77
+ }
78
+
79
+ @Test
80
+ public void testTransaction()
81
+ {
82
+ ConfigSource config = config();
83
+ Schema schema = config.getNested("parser").loadConfig(CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
84
+ plugin.transaction(config, schema, 0, new OutputPlugin.Control()
85
+ {
86
+ @Override
87
+ public List<TaskReport> run(TaskSource taskSource)
88
+ {
89
+ return Lists.newArrayList(Exec.newTaskReport());
90
+ }
91
+ });
92
+ // no error happens
93
+ }
94
+
95
+ @Test
96
+ public void testResume()
97
+ {
98
+ ConfigSource config = config();
99
+ Schema schema = config.getNested("parser").loadConfig(CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
100
+ PluginTask task = config.loadConfig(PluginTask.class);
101
+ plugin.resume(task.dump(), schema, 0, new OutputPlugin.Control()
102
+ {
103
+ @Override
104
+ public List<TaskReport> run(TaskSource taskSource)
105
+ {
106
+ return Lists.newArrayList(Exec.newTaskReport());
107
+ }
108
+ });
109
+ }
110
+
111
+ @Test
112
+ public void testCleanup()
113
+ {
114
+ ConfigSource config = config();
115
+ Schema schema = config.getNested("parser").loadConfig(CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
116
+ PluginTask task = config.loadConfig(PluginTask.class);
117
+ plugin.cleanup(task.dump(), schema, 0, Arrays.asList(Exec.newTaskReport()));
118
+ // no error happens
119
+ }
120
+
121
+ @Test
122
+ public void testOutputByOpen() throws Exception
123
+ {
124
+ ConfigSource config = config();
125
+ Schema schema = config.getNested("parser").loadConfig(CsvParserPlugin.PluginTask.class).getSchemaConfig().toSchema();
126
+ PluginTask task = config.loadConfig(PluginTask.class);
127
+ plugin.transaction(config, schema, 0, new OutputPlugin.Control()
128
+ {
129
+ @Override
130
+ public List<TaskReport> run(TaskSource taskSource)
131
+ {
132
+ return Lists.newArrayList(Exec.newTaskReport());
133
+ }
134
+ });
135
+ TransactionalPageOutput output = plugin.open(task.dump(), schema, 0);
136
+
137
+ List<Page> pages = PageTestUtils.buildPage(runtime.getBufferAllocator(), schema, 1L, 32864L, "2015-01-27T19:23:49", "2015-01-27T00:00:00", true, 123.45, "embulk");
138
+ assertEquals(1, pages.size());
139
+ for (Page page : pages) {
140
+ output.add(page);
141
+ }
142
+
143
+ // output.finish();
144
+ // output.commit();
145
+ //
146
+ // DynamodbUtils dynamoDbUtils = new DynamodbUtils();
147
+ // DynamoDB dynamoDB = null;
148
+ // try {
149
+ // dynamoDB = dynamoDbUtils.createDynamoDB(task);
150
+ //
151
+ // Table table = dynamoDB.getTable(task.getTable());
152
+ // ItemCollection<ScanOutcome> items = table.scan();
153
+ //
154
+ // while (items.iterator().hasNext()) {
155
+ // Map<String, Object> item = items.iterator().next().asMap();
156
+ // assertEquals(1, item.get("id"));
157
+ // assertEquals(32864, item.get("account"));
158
+ // assertEquals("2015-01-27T19:23:49", item.get("time"));
159
+ // assertEquals("2015-01-27T00:00:00", item.get("purchase"));
160
+ // assertEquals(true, item.get("flg"));
161
+ // assertEquals(123.45, item.get("score"));
162
+ // assertEquals("embulk", item.get("comment"));
163
+ // }
164
+ // }
165
+ // finally {
166
+ // if (dynamoDB != null) {
167
+ // dynamoDB.shutdown();
168
+ // }
169
+ // }
170
+ }
171
+
172
+ @Test
173
+ public void testMode()
174
+ {
175
+ assertEquals(2, DynamodbOutputPlugin.Mode.values().length);
176
+ assertEquals(DynamodbOutputPlugin.Mode.UPSERT, DynamodbOutputPlugin.Mode.valueOf("UPSERT"));
177
+ }
178
+
179
+ @Test(expected = ConfigException.class)
180
+ public void testModeThrowsConfigException()
181
+ {
182
+ DynamodbOutputPlugin.Mode.fromString("non-exists-mode");
183
+ }
184
+
185
+ private ConfigSource config()
186
+ {
187
+ return Exec.newConfigSource()
188
+ .set("in", inputConfig())
189
+ .set("parser", parserConfig(schemaConfig()))
190
+ .set("type", "dynamodb")
191
+ .set("mode", "upsert")
192
+ .set("region", "us-west-1")
193
+ .set("table", "dummy")
194
+ .set("primary_key", "id")
195
+ .set("primary_key_type", "string")
196
+ .set("read_capacity_units", capacityUnitConfig())
197
+ .set("write_capacity_units", capacityUnitConfig())
198
+ .set("auth_method", "basic")
199
+ .set("access_key_id", "dummy")
200
+ .set("secret_access_key", "dummy")
201
+ .set("endpoint", "http://localhost:8000");
202
+ }
203
+
204
+ private ImmutableMap<String, Object> capacityUnitConfig()
205
+ {
206
+ ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
207
+ builder.put("normal", 5L);
208
+ builder.put("raise", 8L);
209
+ return builder.build();
210
+ }
211
+
212
+ private ImmutableMap<String, Object> inputConfig()
213
+ {
214
+ ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
215
+ builder.put("type", "file");
216
+ builder.put("path_prefix", PATH_PREFIX);
217
+ builder.put("last_path", "");
218
+ return builder.build();
219
+ }
220
+
221
+ private ImmutableMap<String, Object> parserConfig(ImmutableList<Object> schemaConfig)
222
+ {
223
+ ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
224
+ builder.put("type", "csv");
225
+ builder.put("newline", "CRLF");
226
+ builder.put("delimiter", ",");
227
+ builder.put("quote", "\"");
228
+ builder.put("escape", "\"");
229
+ builder.put("trim_if_not_quoted", false);
230
+ builder.put("skip_header_lines", 1);
231
+ builder.put("allow_extra_columns", false);
232
+ builder.put("allow_optional_columns", false);
233
+ builder.put("columns", schemaConfig);
234
+ return builder.build();
235
+ }
236
+
237
+ private ImmutableList<Object> schemaConfig()
238
+ {
239
+ ImmutableList.Builder<Object> builder = new ImmutableList.Builder<>();
240
+ builder.add(ImmutableMap.of("name", "id", "type", "long"));
241
+ builder.add(ImmutableMap.of("name", "account", "type", "long"));
242
+ builder.add(ImmutableMap.of("name", "time", "type", "timestamp", "format", "%Y-%m-%d %H:%M:%S"));
243
+ builder.add(ImmutableMap.of("name", "purchase", "type", "timestamp", "format", "%Y%m%d"));
244
+ builder.add(ImmutableMap.of("name", "flg", "type", "boolean"));
245
+ builder.add(ImmutableMap.of("name", "score", "type", "double"));
246
+ builder.add(ImmutableMap.of("name", "comment", "type", "string"));
247
+ return builder.build();
248
+ }
249
+ }