embulk-input-mongodb 0.6.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/build.gradle +1 -1
- data/gradle/wrapper/gradle-wrapper.properties +1 -1
- data/src/main/java/org/embulk/input/mongodb/HostTask.java +16 -0
- data/src/main/java/org/embulk/input/mongodb/MongodbInputPlugin.java +22 -113
- data/src/main/java/org/embulk/input/mongodb/PluginTask.java +96 -0
- data/src/main/java/org/embulk/input/mongodb/ValueCodec.java +9 -2
- data/src/test/java/org/embulk/input/mongodb/TestMongodbInputPlugin.java +29 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1c0c287fc6caae40f480044e097b35c22748be2d
|
4
|
+
data.tar.gz: 31c40a09bfd9aa1d3333105a49b02807c7fefb5b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 013109760ca0b9040ea5367384d61c283801c97c833c1da0b52625ee8927dadefe71c6cd0f15a4115e4184b4fbdf40faed93eb2406fc3385ed4fb85eef5c3150
|
7
|
+
data.tar.gz: 2e1e7a1dcac758ede878293a37c9a95a532cb177964c9208d05f64f97c9ad3b1040ec5f9ce37abb25ddc53af07d379abbe24b9edf6c67f2b12e3474c300c2530
|
data/build.gradle
CHANGED
@@ -0,0 +1,16 @@
|
|
1
|
+
package org.embulk.input.mongodb;
|
2
|
+
|
3
|
+
import org.embulk.config.Config;
|
4
|
+
import org.embulk.config.ConfigDefault;
|
5
|
+
import org.embulk.config.Task;
|
6
|
+
|
7
|
+
public interface HostTask
|
8
|
+
extends Task
|
9
|
+
{
|
10
|
+
@Config("host")
|
11
|
+
String getHost();
|
12
|
+
|
13
|
+
@Config("port")
|
14
|
+
@ConfigDefault("27017")
|
15
|
+
int getPort();
|
16
|
+
}
|
@@ -3,6 +3,7 @@ package org.embulk.input.mongodb;
|
|
3
3
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
4
4
|
import com.google.common.base.Optional;
|
5
5
|
import com.google.common.base.Throwables;
|
6
|
+
import com.mongodb.BasicDBObject;
|
6
7
|
import com.mongodb.MongoClient;
|
7
8
|
import com.mongodb.MongoClientURI;
|
8
9
|
import com.mongodb.MongoCredential;
|
@@ -11,21 +12,16 @@ import com.mongodb.ServerAddress;
|
|
11
12
|
import com.mongodb.client.MongoCollection;
|
12
13
|
import com.mongodb.client.MongoCursor;
|
13
14
|
import com.mongodb.client.MongoDatabase;
|
14
|
-
import com.mongodb.util.JSON;
|
15
|
-
import com.mongodb.util.JSONParseException;
|
16
15
|
import org.bson.codecs.configuration.CodecRegistries;
|
17
16
|
import org.bson.codecs.configuration.CodecRegistry;
|
18
17
|
import org.bson.conversions.Bson;
|
19
|
-
import org.
|
20
|
-
import org.embulk.config.ConfigDefault;
|
18
|
+
import org.bson.json.JsonParseException;
|
21
19
|
import org.embulk.config.ConfigDiff;
|
22
20
|
import org.embulk.config.ConfigException;
|
23
|
-
import org.embulk.config.ConfigInject;
|
24
21
|
import org.embulk.config.ConfigSource;
|
25
22
|
import org.embulk.config.DataSource;
|
26
23
|
import org.embulk.config.DataSourceImpl;
|
27
24
|
import org.embulk.config.ModelManager;
|
28
|
-
import org.embulk.config.Task;
|
29
25
|
import org.embulk.config.TaskReport;
|
30
26
|
import org.embulk.config.TaskSource;
|
31
27
|
import org.embulk.spi.BufferAllocator;
|
@@ -35,13 +31,10 @@ import org.embulk.spi.InputPlugin;
|
|
35
31
|
import org.embulk.spi.PageBuilder;
|
36
32
|
import org.embulk.spi.PageOutput;
|
37
33
|
import org.embulk.spi.Schema;
|
38
|
-
import org.embulk.spi.SchemaConfig;
|
39
34
|
import org.embulk.spi.type.Types;
|
40
35
|
import org.msgpack.value.Value;
|
41
36
|
import org.slf4j.Logger;
|
42
37
|
|
43
|
-
import javax.validation.constraints.Min;
|
44
|
-
|
45
38
|
import java.io.IOException;
|
46
39
|
import java.net.UnknownHostException;
|
47
40
|
import java.util.ArrayList;
|
@@ -54,99 +47,6 @@ import java.util.Map;
|
|
54
47
|
public class MongodbInputPlugin
|
55
48
|
implements InputPlugin
|
56
49
|
{
|
57
|
-
public interface HostTask
|
58
|
-
extends Task
|
59
|
-
{
|
60
|
-
@Config("host")
|
61
|
-
String getHost();
|
62
|
-
|
63
|
-
@Config("port")
|
64
|
-
@ConfigDefault("27017")
|
65
|
-
int getPort();
|
66
|
-
}
|
67
|
-
|
68
|
-
public interface PluginTask
|
69
|
-
extends Task
|
70
|
-
{
|
71
|
-
// MongoDB connection string URI
|
72
|
-
@Config("uri")
|
73
|
-
@ConfigDefault("null")
|
74
|
-
Optional<String> getUri();
|
75
|
-
|
76
|
-
@Config("hosts")
|
77
|
-
@ConfigDefault("null")
|
78
|
-
Optional<List<HostTask>> getHosts();
|
79
|
-
|
80
|
-
@Config("user")
|
81
|
-
@ConfigDefault("null")
|
82
|
-
Optional<String> getUser();
|
83
|
-
|
84
|
-
@Config("password")
|
85
|
-
@ConfigDefault("null")
|
86
|
-
Optional<String> getPassword();
|
87
|
-
|
88
|
-
@Config("database")
|
89
|
-
@ConfigDefault("null")
|
90
|
-
Optional<String> getDatabase();
|
91
|
-
|
92
|
-
@Config("collection")
|
93
|
-
String getCollection();
|
94
|
-
|
95
|
-
@Config("fields")
|
96
|
-
@ConfigDefault("null")
|
97
|
-
Optional<SchemaConfig> getFields();
|
98
|
-
|
99
|
-
@Config("projection")
|
100
|
-
@ConfigDefault("\"{}\"")
|
101
|
-
String getProjection();
|
102
|
-
|
103
|
-
@Config("query")
|
104
|
-
@ConfigDefault("\"{}\"")
|
105
|
-
String getQuery();
|
106
|
-
void setQuery(String query);
|
107
|
-
|
108
|
-
@Config("sort")
|
109
|
-
@ConfigDefault("\"{}\"")
|
110
|
-
String getSort();
|
111
|
-
void setSort(String sort);
|
112
|
-
|
113
|
-
@Config("limit")
|
114
|
-
@ConfigDefault("null")
|
115
|
-
Optional<Integer> getLimit();
|
116
|
-
|
117
|
-
@Config("skip")
|
118
|
-
@ConfigDefault("null")
|
119
|
-
Optional<Integer> getSkip();
|
120
|
-
|
121
|
-
@Config("id_field_name")
|
122
|
-
@ConfigDefault("\"_id\"")
|
123
|
-
String getIdFieldName();
|
124
|
-
|
125
|
-
@Config("batch_size")
|
126
|
-
@ConfigDefault("10000")
|
127
|
-
@Min(1)
|
128
|
-
int getBatchSize();
|
129
|
-
|
130
|
-
@Config("stop_on_invalid_record")
|
131
|
-
@ConfigDefault("false")
|
132
|
-
boolean getStopOnInvalidRecord();
|
133
|
-
|
134
|
-
@Config("json_column_name")
|
135
|
-
@ConfigDefault("\"record\"")
|
136
|
-
String getJsonColumnName();
|
137
|
-
|
138
|
-
@Config("incremental_field")
|
139
|
-
@ConfigDefault("null")
|
140
|
-
Optional<List<String>> getIncrementalField();
|
141
|
-
|
142
|
-
@Config("last_record")
|
143
|
-
@ConfigDefault("null")
|
144
|
-
Optional<Map<String, Object>> getLastRecord();
|
145
|
-
|
146
|
-
@ConfigInject
|
147
|
-
BufferAllocator getBufferAllocator();
|
148
|
-
}
|
149
|
-
|
150
50
|
private final Logger log = Exec.getLogger(MongodbInputPlugin.class);
|
151
51
|
|
152
52
|
@Override
|
@@ -232,9 +132,9 @@ public class MongodbInputPlugin
|
|
232
132
|
throw new ConfigException(ex);
|
233
133
|
}
|
234
134
|
|
235
|
-
Bson query =
|
236
|
-
Bson projection =
|
237
|
-
Bson sort =
|
135
|
+
Bson query = BasicDBObject.parse(task.getQuery());
|
136
|
+
Bson projection = BasicDBObject.parse(task.getProjection());
|
137
|
+
Bson sort = BasicDBObject.parse(task.getSort());
|
238
138
|
|
239
139
|
log.trace("query: {}", query);
|
240
140
|
log.trace("projection: {}", projection);
|
@@ -263,11 +163,13 @@ public class MongodbInputPlugin
|
|
263
163
|
}
|
264
164
|
|
265
165
|
pageBuilder.finish();
|
166
|
+
return updateTaskReport(Exec.newTaskReport(), valueCodec, task);
|
167
|
+
}
|
266
168
|
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
169
|
+
private TaskReport updateTaskReport(TaskReport report, ValueCodec valueCodec, PluginTask task)
|
170
|
+
{
|
171
|
+
DataSource lastRecord = new DataSourceImpl(Exec.getInjector().getInstance(ModelManager.class));
|
172
|
+
if (valueCodec.getLastRecord() != null && valueCodec.getProcessedRecordCount() > 0) {
|
271
173
|
for (String k : valueCodec.getLastRecord().keySet()) {
|
272
174
|
String value = valueCodec.getLastRecord().get(k).toString();
|
273
175
|
Map<String, String> types = valueCodec.getLastRecordType();
|
@@ -299,8 +201,15 @@ public class MongodbInputPlugin
|
|
299
201
|
lastRecord.set(k, value);
|
300
202
|
}
|
301
203
|
}
|
302
|
-
report.setNested("last_record", lastRecord);
|
303
204
|
}
|
205
|
+
else if (task.getIncrementalField().isPresent() && task.getLastRecord().isPresent()) {
|
206
|
+
for (String field : task.getIncrementalField().get()) {
|
207
|
+
if (task.getLastRecord().get().containsKey(field)) {
|
208
|
+
lastRecord.set(field, task.getLastRecord().get().get(field));
|
209
|
+
}
|
210
|
+
}
|
211
|
+
}
|
212
|
+
report.setNested("last_record", lastRecord);
|
304
213
|
return report;
|
305
214
|
}
|
306
215
|
|
@@ -414,7 +323,7 @@ public class MongodbInputPlugin
|
|
414
323
|
|
415
324
|
return result;
|
416
325
|
}
|
417
|
-
catch (
|
326
|
+
catch (JsonParseException | IOException ex) {
|
418
327
|
throw new ConfigException("Could not generate new query for incremental load.");
|
419
328
|
}
|
420
329
|
}
|
@@ -422,9 +331,9 @@ public class MongodbInputPlugin
|
|
422
331
|
private void validateJsonField(String name, String jsonString)
|
423
332
|
{
|
424
333
|
try {
|
425
|
-
|
334
|
+
BasicDBObject.parse(jsonString);
|
426
335
|
}
|
427
|
-
catch (
|
336
|
+
catch (JsonParseException ex) {
|
428
337
|
throw new ConfigException(String.format("Invalid JSON string was given for '%s' parameter. [%s]", name, jsonString));
|
429
338
|
}
|
430
339
|
}
|
@@ -0,0 +1,96 @@
|
|
1
|
+
package org.embulk.input.mongodb;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import org.embulk.config.Config;
|
5
|
+
import org.embulk.config.ConfigDefault;
|
6
|
+
import org.embulk.config.ConfigInject;
|
7
|
+
import org.embulk.config.Task;
|
8
|
+
import org.embulk.spi.BufferAllocator;
|
9
|
+
import org.embulk.spi.SchemaConfig;
|
10
|
+
|
11
|
+
import javax.validation.constraints.Min;
|
12
|
+
|
13
|
+
import java.util.List;
|
14
|
+
import java.util.Map;
|
15
|
+
|
16
|
+
public interface PluginTask
|
17
|
+
extends Task
|
18
|
+
{
|
19
|
+
// MongoDB connection string URI
|
20
|
+
@Config("uri")
|
21
|
+
@ConfigDefault("null")
|
22
|
+
Optional<String> getUri();
|
23
|
+
|
24
|
+
@Config("hosts")
|
25
|
+
@ConfigDefault("null")
|
26
|
+
Optional<List<HostTask>> getHosts();
|
27
|
+
|
28
|
+
@Config("user")
|
29
|
+
@ConfigDefault("null")
|
30
|
+
Optional<String> getUser();
|
31
|
+
|
32
|
+
@Config("password")
|
33
|
+
@ConfigDefault("null")
|
34
|
+
Optional<String> getPassword();
|
35
|
+
|
36
|
+
@Config("database")
|
37
|
+
@ConfigDefault("null")
|
38
|
+
Optional<String> getDatabase();
|
39
|
+
|
40
|
+
@Config("collection")
|
41
|
+
String getCollection();
|
42
|
+
|
43
|
+
@Config("fields")
|
44
|
+
@ConfigDefault("null")
|
45
|
+
Optional<SchemaConfig> getFields();
|
46
|
+
|
47
|
+
@Config("projection")
|
48
|
+
@ConfigDefault("\"{}\"")
|
49
|
+
String getProjection();
|
50
|
+
|
51
|
+
@Config("query")
|
52
|
+
@ConfigDefault("\"{}\"")
|
53
|
+
String getQuery();
|
54
|
+
void setQuery(String query);
|
55
|
+
|
56
|
+
@Config("sort")
|
57
|
+
@ConfigDefault("\"{}\"")
|
58
|
+
String getSort();
|
59
|
+
void setSort(String sort);
|
60
|
+
|
61
|
+
@Config("limit")
|
62
|
+
@ConfigDefault("null")
|
63
|
+
Optional<Integer> getLimit();
|
64
|
+
|
65
|
+
@Config("skip")
|
66
|
+
@ConfigDefault("null")
|
67
|
+
Optional<Integer> getSkip();
|
68
|
+
|
69
|
+
@Config("id_field_name")
|
70
|
+
@ConfigDefault("\"_id\"")
|
71
|
+
String getIdFieldName();
|
72
|
+
|
73
|
+
@Config("batch_size")
|
74
|
+
@ConfigDefault("10000")
|
75
|
+
@Min(1)
|
76
|
+
int getBatchSize();
|
77
|
+
|
78
|
+
@Config("stop_on_invalid_record")
|
79
|
+
@ConfigDefault("false")
|
80
|
+
boolean getStopOnInvalidRecord();
|
81
|
+
|
82
|
+
@Config("json_column_name")
|
83
|
+
@ConfigDefault("\"record\"")
|
84
|
+
String getJsonColumnName();
|
85
|
+
|
86
|
+
@Config("incremental_field")
|
87
|
+
@ConfigDefault("null")
|
88
|
+
Optional<List<String>> getIncrementalField();
|
89
|
+
|
90
|
+
@Config("last_record")
|
91
|
+
@ConfigDefault("null")
|
92
|
+
Optional<Map<String, Object>> getLastRecord();
|
93
|
+
|
94
|
+
@ConfigInject
|
95
|
+
BufferAllocator getBufferAllocator();
|
96
|
+
}
|
@@ -34,12 +34,13 @@ public class ValueCodec implements Codec<Value>
|
|
34
34
|
private final SimpleDateFormat formatter;
|
35
35
|
private final Logger log = Exec.getLogger(MongodbInputPlugin.class);
|
36
36
|
private final boolean stopOnInvalidRecord;
|
37
|
-
private final
|
37
|
+
private final PluginTask task;
|
38
38
|
private final Optional<List<String>> incrementalField;
|
39
39
|
private Map<String, Object> lastRecord;
|
40
|
+
private long processedRecordCount = 0;
|
40
41
|
private Map<String, String> lastRecordType;
|
41
42
|
|
42
|
-
public ValueCodec(boolean stopOnInvalidRecord,
|
43
|
+
public ValueCodec(boolean stopOnInvalidRecord, PluginTask task)
|
43
44
|
{
|
44
45
|
this.formatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", java.util.Locale.ENGLISH);
|
45
46
|
formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
|
@@ -84,6 +85,7 @@ public class ValueCodec implements Codec<Value>
|
|
84
85
|
log.warn(String.format("Skipped document because field '%s' contains unsupported object type [%s]",
|
85
86
|
fieldName, type));
|
86
87
|
}
|
88
|
+
this.processedRecordCount++;
|
87
89
|
}
|
88
90
|
reader.readEndDocument();
|
89
91
|
|
@@ -166,6 +168,11 @@ public class ValueCodec implements Codec<Value>
|
|
166
168
|
return this.lastRecord;
|
167
169
|
}
|
168
170
|
|
171
|
+
public Long getProcessedRecordCount()
|
172
|
+
{
|
173
|
+
return this.processedRecordCount;
|
174
|
+
}
|
175
|
+
|
169
176
|
public Map<String, String> getLastRecordType()
|
170
177
|
{
|
171
178
|
return this.lastRecordType;
|
@@ -23,7 +23,6 @@ import org.embulk.config.ConfigException;
|
|
23
23
|
import org.embulk.config.ConfigSource;
|
24
24
|
import org.embulk.config.TaskReport;
|
25
25
|
import org.embulk.config.TaskSource;
|
26
|
-
import org.embulk.input.mongodb.MongodbInputPlugin.PluginTask;
|
27
26
|
import org.embulk.spi.Column;
|
28
27
|
import org.embulk.spi.Exec;
|
29
28
|
import org.embulk.spi.InputPlugin;
|
@@ -314,6 +313,35 @@ public class TestMongodbInputPlugin
|
|
314
313
|
assertEquals("true", lastRecord.get(String.class, "boolean_field"));
|
315
314
|
}
|
316
315
|
|
316
|
+
@Test
|
317
|
+
public void testRunWithLimitIncrementalLoadWithNoRecord() throws Exception
|
318
|
+
{
|
319
|
+
Map<String, Object> previousLastRecord = new HashMap<>();
|
320
|
+
previousLastRecord.put("int32_field", 1);
|
321
|
+
previousLastRecord.put("datetime_field", "{$date=2015-01-27T10:23:49.000Z}");
|
322
|
+
previousLastRecord.put("boolean_field", true);
|
323
|
+
ConfigSource config = Exec.newConfigSource()
|
324
|
+
.set("uri", MONGO_URI)
|
325
|
+
.set("collection", MONGO_COLLECTION)
|
326
|
+
.set("id_field_name", "int32_field")
|
327
|
+
.set("query", "{\"double_field\":{\"$gte\": 1.23}}")
|
328
|
+
.set("incremental_field", Optional.of(Arrays.asList("int32_field", "datetime_field", "boolean_field")))
|
329
|
+
.set("last_record", previousLastRecord);
|
330
|
+
|
331
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
332
|
+
|
333
|
+
dropCollection(task, MONGO_COLLECTION);
|
334
|
+
createCollection(task, MONGO_COLLECTION);
|
335
|
+
insertDocument(task, createValidDocuments());
|
336
|
+
|
337
|
+
ConfigDiff diff = plugin.transaction(config, new Control());
|
338
|
+
ConfigDiff lastRecord = diff.getNested("last_record");
|
339
|
+
|
340
|
+
assertEquals("1", lastRecord.get(String.class, "int32_field"));
|
341
|
+
assertEquals("{$date=2015-01-27T10:23:49.000Z}", lastRecord.get(String.class, "datetime_field"));
|
342
|
+
assertEquals("true", lastRecord.get(String.class, "boolean_field"));
|
343
|
+
}
|
344
|
+
|
317
345
|
@Test(expected = ConfigException.class)
|
318
346
|
public void testRunWithIncrementalLoadUnsupportedType() throws Exception
|
319
347
|
{
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-mongodb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kazuyuki Honda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-05-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,7 +58,9 @@ files:
|
|
58
58
|
- gradlew
|
59
59
|
- gradlew.bat
|
60
60
|
- lib/embulk/input/mongodb.rb
|
61
|
+
- src/main/java/org/embulk/input/mongodb/HostTask.java
|
61
62
|
- src/main/java/org/embulk/input/mongodb/MongodbInputPlugin.java
|
63
|
+
- src/main/java/org/embulk/input/mongodb/PluginTask.java
|
62
64
|
- src/main/java/org/embulk/input/mongodb/ValueCodec.java
|
63
65
|
- src/test/java/org/embulk/input/mongodb/TestMongodbInputPlugin.java
|
64
66
|
- src/test/resources/basic.yml
|
@@ -68,7 +70,7 @@ files:
|
|
68
70
|
- src/test/resources/id_field_name.yml
|
69
71
|
- src/test/resources/id_field_name_expected.csv
|
70
72
|
- src/test/resources/my_collection.jsonl
|
71
|
-
- classpath/embulk-input-mongodb-0.6.
|
73
|
+
- classpath/embulk-input-mongodb-0.6.1.jar
|
72
74
|
- classpath/mongo-java-driver-3.6.1.jar
|
73
75
|
homepage: https://github.com/hakobera/embulk-input-mongodb
|
74
76
|
licenses:
|