embulk-input-mongodb 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +14 -3
- data/README.md +5 -12
- data/build.gradle +4 -4
- data/classpath/embulk-input-mongodb-0.2.0.jar +0 -0
- data/classpath/mongo-java-driver-3.2.2.jar +0 -0
- data/src/main/java/org/embulk/input/mongodb/MongodbInputPlugin.java +50 -24
- data/src/test/resources/basic.yml +1 -0
- data/src/test/resources/basic_expected.csv +2 -2
- data/src/test/resources/full.yml +1 -0
- data/src/test/resources/full_expected.csv +8 -8
- data/src/test/resources/my_collection.jsonl +9 -9
- metadata +4 -4
- data/classpath/embulk-input-mongodb-0.1.2.jar +0 -0
- data/classpath/mongo-java-driver-3.0.3.jar +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: eca1bf1b21b4a44bab520a7cb93f59df2c18ea82
|
|
4
|
+
data.tar.gz: 98f866d2313fe81dbf96da2fe61b9a303afcd698
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6251028e2ec5dc41523cbc43d3a06f57cde1a7b05a9a21a8098d169e6c3d0e05af74d12e325847fcbb74328f8c9cf98eb8e6ed167091cb2215bfbf2341fdb5ff
|
|
7
|
+
data.tar.gz: 7a6c238d53ab10733917a05a170a3d06aacb8cbbd8a631c87d86d5fa2a6a198e887df27b61bfebe10655eeefe48293bf0a174417435aed4926285d6ef5ba581f
|
data/.travis.yml
CHANGED
|
@@ -7,13 +7,21 @@ jdk:
|
|
|
7
7
|
|
|
8
8
|
sudo: required
|
|
9
9
|
|
|
10
|
+
# Work around fix for buffer overflow error on OpenJDK7
|
|
11
|
+
# ref: https://github.com/travis-ci/travis-ci/issues/5227#issuecomment-165131913
|
|
12
|
+
before_install:
|
|
13
|
+
- cat /etc/hosts # optionally check the content *before*
|
|
14
|
+
- sudo hostname "$(hostname | cut -c1-63)"
|
|
15
|
+
- sed -e "s/^\\(127\\.0\\.0\\.1.*\\)/\\1 $(hostname | cut -c1-63)/" /etc/hosts | sudo tee /etc/hosts
|
|
16
|
+
- cat /etc/hosts # optionally check the content *after*
|
|
17
|
+
|
|
10
18
|
install:
|
|
11
19
|
- sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 7F0CEB10
|
|
12
|
-
- echo "deb http://repo.mongodb.org/apt/ubuntu "$(lsb_release -sc)"/mongodb-org/3.
|
|
20
|
+
- echo "deb http://repo.mongodb.org/apt/ubuntu "$(lsb_release -sc)"/mongodb-org/3.2 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.2.list
|
|
13
21
|
- sudo apt-get update
|
|
14
|
-
- sudo apt-get install -y mongodb-org
|
|
22
|
+
- sudo apt-get install -y --force-yes mongodb-org
|
|
15
23
|
- mongod -version
|
|
16
|
-
- curl --create-dirs -o ~/.embulk/bin/embulk -L "
|
|
24
|
+
- curl --create-dirs -o ~/.embulk/bin/embulk -L "https://dl.bintray.com/embulk/maven/embulk-0.8.8.jar"
|
|
17
25
|
- chmod +x ~/.embulk/bin/embulk
|
|
18
26
|
- export PATH="$HOME/.embulk/bin:$PATH"
|
|
19
27
|
- embulk --version
|
|
@@ -27,5 +35,8 @@ before_script:
|
|
|
27
35
|
|
|
28
36
|
script:
|
|
29
37
|
- embulk run -L . src/test/resources/basic.yml
|
|
38
|
+
- cat tmp/basic000.00.csv
|
|
39
|
+
- cmp tmp/basic000.00.csv src/test/resources/basic_expected.csv || exit 1
|
|
30
40
|
- embulk run -L . src/test/resources/full.yml
|
|
41
|
+
- cat tmp/full000.00.csv
|
|
31
42
|
- cmp tmp/full000.00.csv src/test/resources/full_expected.csv || exit 1
|
data/README.md
CHANGED
|
@@ -4,17 +4,9 @@
|
|
|
4
4
|
|
|
5
5
|
MongoDB input plugin for Embulk loads records from MongoDB.
|
|
6
6
|
|
|
7
|
-
**CAUTION:** this plugin does not support array and object fields,
|
|
8
|
-
because embulk does not supported these types yet.
|
|
9
|
-
But these types will be supported, so when it supported I add support these types.
|
|
10
|
-
For more detail see following issues.
|
|
11
|
-
|
|
12
|
-
- https://github.com/embulk/embulk/issues/120
|
|
13
|
-
- https://github.com/embulk/embulk/issues/121
|
|
14
|
-
|
|
15
7
|
## Overview
|
|
16
8
|
|
|
17
|
-
This plugin only works with embulk >= 0.
|
|
9
|
+
This plugin only works with embulk >= 0.8.8.
|
|
18
10
|
|
|
19
11
|
* **Plugin type**: input
|
|
20
12
|
* **Resume supported**: no
|
|
@@ -43,26 +35,27 @@ This plugin only works with embulk >= 0.7.4.
|
|
|
43
35
|
```yaml
|
|
44
36
|
in:
|
|
45
37
|
type: mongodb
|
|
46
|
-
uri: mongodb://myuser@
|
|
38
|
+
uri: mongodb://myuser:mypassword@localhost:27017/my_database
|
|
47
39
|
collection: "my_collection"
|
|
48
40
|
fields:
|
|
49
41
|
- { name: id, type: string }
|
|
50
42
|
- { name: field1, type: long }
|
|
51
43
|
- { name: field2, type: timestamp }
|
|
44
|
+
- { name: field3, type: json }
|
|
52
45
|
```
|
|
53
46
|
|
|
54
47
|
### Filter object by query and sort
|
|
55
48
|
|
|
56
|
-
|
|
57
49
|
```yaml
|
|
58
50
|
in:
|
|
59
51
|
type: mongodb
|
|
60
|
-
uri: mongodb://myuser@
|
|
52
|
+
uri: mongodb://myuser:mypassword@localhost:27017/my_database
|
|
61
53
|
collection: "my_collection"
|
|
62
54
|
fields:
|
|
63
55
|
- { name: id, type: string }
|
|
64
56
|
- { name: field1, type: long }
|
|
65
57
|
- { name: field2, type: timestamp }
|
|
58
|
+
- { name: field3, type: json }
|
|
66
59
|
query: '{ field1: { $gte: 3 } }'
|
|
67
60
|
sort: '{ field1: 1 }'
|
|
68
61
|
```
|
data/build.gradle
CHANGED
|
@@ -15,15 +15,15 @@ configurations {
|
|
|
15
15
|
provided
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
-
version = "0.
|
|
18
|
+
version = "0.2.0"
|
|
19
19
|
|
|
20
20
|
sourceCompatibility = 1.7
|
|
21
21
|
targetCompatibility = 1.7
|
|
22
22
|
|
|
23
23
|
dependencies {
|
|
24
|
-
compile "org.embulk:embulk-core:0.
|
|
25
|
-
provided "org.embulk:embulk-core:0.
|
|
26
|
-
compile "org.mongodb:mongo-java-driver:3.
|
|
24
|
+
compile "org.embulk:embulk-core:0.8.8"
|
|
25
|
+
provided "org.embulk:embulk-core:0.8.8"
|
|
26
|
+
compile "org.mongodb:mongo-java-driver:3.2.2"
|
|
27
27
|
|
|
28
28
|
testCompile "junit:junit:4.+"
|
|
29
29
|
}
|
|
Binary file
|
|
Binary file
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
package org.embulk.input.mongodb;
|
|
2
2
|
|
|
3
|
+
import com.google.common.base.Throwables;
|
|
3
4
|
import com.mongodb.MongoClient;
|
|
4
5
|
import com.mongodb.MongoClientURI;
|
|
6
|
+
import com.mongodb.MongoException;
|
|
5
7
|
import com.mongodb.client.MongoCollection;
|
|
6
8
|
import com.mongodb.client.MongoCursor;
|
|
7
9
|
import com.mongodb.client.MongoDatabase;
|
|
@@ -11,6 +13,7 @@ import org.bson.conversions.Bson;
|
|
|
11
13
|
import org.embulk.config.Config;
|
|
12
14
|
import org.embulk.config.ConfigDefault;
|
|
13
15
|
import org.embulk.config.ConfigDiff;
|
|
16
|
+
import org.embulk.config.ConfigException;
|
|
14
17
|
import org.embulk.config.ConfigInject;
|
|
15
18
|
import org.embulk.config.ConfigSource;
|
|
16
19
|
import org.embulk.config.Task;
|
|
@@ -25,10 +28,13 @@ import org.embulk.spi.PageBuilder;
|
|
|
25
28
|
import org.embulk.spi.PageOutput;
|
|
26
29
|
import org.embulk.spi.Schema;
|
|
27
30
|
import org.embulk.spi.SchemaConfig;
|
|
31
|
+
import org.embulk.spi.json.JsonParser;
|
|
28
32
|
import org.embulk.spi.time.Timestamp;
|
|
29
33
|
import org.embulk.spi.type.Type;
|
|
30
34
|
import org.slf4j.Logger;
|
|
31
35
|
|
|
36
|
+
import javax.validation.constraints.Min;
|
|
37
|
+
import java.net.UnknownHostException;
|
|
32
38
|
import java.util.List;
|
|
33
39
|
|
|
34
40
|
public class MongodbInputPlugin
|
|
@@ -57,10 +63,11 @@ public class MongodbInputPlugin
|
|
|
57
63
|
|
|
58
64
|
@Config("batch_size")
|
|
59
65
|
@ConfigDefault("10000")
|
|
60
|
-
|
|
66
|
+
@Min(1)
|
|
67
|
+
int getBatchSize();
|
|
61
68
|
|
|
62
69
|
@ConfigInject
|
|
63
|
-
|
|
70
|
+
BufferAllocator getBufferAllocator();
|
|
64
71
|
}
|
|
65
72
|
|
|
66
73
|
private final Logger log = Exec.getLogger(MongodbInputPlugin.class);
|
|
@@ -70,6 +77,12 @@ public class MongodbInputPlugin
|
|
|
70
77
|
InputPlugin.Control control)
|
|
71
78
|
{
|
|
72
79
|
PluginTask task = config.loadConfig(PluginTask.class);
|
|
80
|
+
// Connect once to throw ConfigException in earlier stage of excecution
|
|
81
|
+
try {
|
|
82
|
+
connect(task);
|
|
83
|
+
} catch (UnknownHostException | MongoException ex) {
|
|
84
|
+
throw new ConfigException(ex);
|
|
85
|
+
}
|
|
73
86
|
Schema schema = task.getFields().toSchema();
|
|
74
87
|
return resume(task.dump(), schema, 1, control);
|
|
75
88
|
}
|
|
@@ -99,9 +112,16 @@ public class MongodbInputPlugin
|
|
|
99
112
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
|
100
113
|
BufferAllocator allocator = task.getBufferAllocator();
|
|
101
114
|
PageBuilder pageBuilder = new PageBuilder(allocator, schema, output);
|
|
115
|
+
JsonParser jsonParser = new JsonParser();
|
|
116
|
+
List<Column> columns = pageBuilder.getSchema().getColumns();
|
|
102
117
|
|
|
103
|
-
|
|
104
|
-
|
|
118
|
+
MongoCollection<Document> collection;
|
|
119
|
+
try {
|
|
120
|
+
MongoDatabase db = connect(task);
|
|
121
|
+
collection = db.getCollection(task.getCollection());
|
|
122
|
+
} catch (UnknownHostException | MongoException ex) {
|
|
123
|
+
throw new ConfigException(ex);
|
|
124
|
+
}
|
|
105
125
|
|
|
106
126
|
Bson query = (Bson) JSON.parse(task.getQuery());
|
|
107
127
|
Bson projection = getProjection(task);
|
|
@@ -111,25 +131,22 @@ public class MongodbInputPlugin
|
|
|
111
131
|
log.trace("projection: {}", projection);
|
|
112
132
|
log.trace("sort: {}", sort);
|
|
113
133
|
|
|
114
|
-
MongoCursor<Document> cursor = collection
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
try {
|
|
134
|
+
try (MongoCursor<Document> cursor = collection
|
|
135
|
+
.find(query)
|
|
136
|
+
.projection(projection)
|
|
137
|
+
.sort(sort)
|
|
138
|
+
.batchSize(task.getBatchSize())
|
|
139
|
+
.iterator()) {
|
|
122
140
|
while (cursor.hasNext()) {
|
|
123
|
-
fetch(cursor, pageBuilder);
|
|
141
|
+
fetch(cursor, pageBuilder, jsonParser, columns);
|
|
124
142
|
}
|
|
125
|
-
}
|
|
126
|
-
|
|
143
|
+
} catch (MongoException ex) {
|
|
144
|
+
Throwables.propagate(ex);
|
|
127
145
|
}
|
|
128
146
|
|
|
129
147
|
pageBuilder.finish();
|
|
130
148
|
|
|
131
|
-
|
|
132
|
-
return report;
|
|
149
|
+
return Exec.newTaskReport();
|
|
133
150
|
}
|
|
134
151
|
|
|
135
152
|
@Override
|
|
@@ -138,15 +155,19 @@ public class MongodbInputPlugin
|
|
|
138
155
|
return Exec.newConfigDiff();
|
|
139
156
|
}
|
|
140
157
|
|
|
141
|
-
private MongoDatabase connect(PluginTask task) {
|
|
158
|
+
private MongoDatabase connect(final PluginTask task) throws UnknownHostException, MongoException {
|
|
142
159
|
MongoClientURI uri = new MongoClientURI(task.getUri());
|
|
143
160
|
MongoClient mongoClient = new MongoClient(uri);
|
|
144
|
-
|
|
161
|
+
|
|
162
|
+
MongoDatabase db = mongoClient.getDatabase(uri.getDatabase());
|
|
163
|
+
// Get collection count for throw Exception
|
|
164
|
+
db.getCollection(task.getCollection()).count();
|
|
165
|
+
return db;
|
|
145
166
|
}
|
|
146
167
|
|
|
147
|
-
private void fetch(MongoCursor<Document> cursor, PageBuilder pageBuilder
|
|
168
|
+
private void fetch(MongoCursor<Document> cursor, PageBuilder pageBuilder,
|
|
169
|
+
JsonParser jsonParser, List<Column> columns) {
|
|
148
170
|
Document doc = cursor.next();
|
|
149
|
-
List<Column> columns = pageBuilder.getSchema().getColumns();
|
|
150
171
|
for (Column c : columns) {
|
|
151
172
|
Type t = c.getType();
|
|
152
173
|
String key = normalize(c.getName());
|
|
@@ -170,12 +191,17 @@ public class MongodbInputPlugin
|
|
|
170
191
|
break;
|
|
171
192
|
|
|
172
193
|
case "string":
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
194
|
+
// Enable output object like ObjectId as string, this is reason I don't use doc.getString(key).
|
|
195
|
+
pageBuilder.setString(c, doc.get(key).toString());
|
|
196
|
+
break;
|
|
176
197
|
|
|
177
198
|
case "timestamp":
|
|
178
199
|
pageBuilder.setTimestamp(c, Timestamp.ofEpochMilli(doc.getDate(key).getTime()));
|
|
200
|
+
break;
|
|
201
|
+
|
|
202
|
+
case "json":
|
|
203
|
+
pageBuilder.setJson(c, jsonParser.parse(((Document) doc.get(key)).toJson()));
|
|
204
|
+
break;
|
|
179
205
|
}
|
|
180
206
|
}
|
|
181
207
|
}
|
data/src/test/resources/full.yml
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
id,name,rank,value,created_at
|
|
2
|
-
55eae883689a08361045d652,obj9,9,9.9,2015-09-06 10:05:18.786000 +0000
|
|
3
|
-
55eae883689a08361045d651,obj8,8,8.8,2015-09-06 10:05:28.786000 +0000
|
|
4
|
-
55eae883689a08361045d650,obj7,7,7.7,2015-09-06 10:05:38.786000 +0000
|
|
5
|
-
55eae883689a08361045d64f,obj6,6,6.6,2015-09-06 10:05:48.786000 +0000
|
|
6
|
-
55eae883689a08361045d64e,obj5,5,5.5,2015-09-06 10:05:58.786000 +0000
|
|
7
|
-
55eae883689a08361045d64d,obj4,4,4.4,2015-09-06 10:06:08.786000 +0000
|
|
8
|
-
55eae883689a08361045d64c,obj3,3,3.3,2015-09-06 10:06:18.786000 +0000
|
|
1
|
+
id,name,rank,value,created_at,embeded
|
|
2
|
+
55eae883689a08361045d652,obj9,9,9.9,2015-09-06 10:05:18.786000 +0000,"{""key"":""value9""}"
|
|
3
|
+
55eae883689a08361045d651,obj8,8,8.8,2015-09-06 10:05:28.786000 +0000,"{""key"":""value8""}"
|
|
4
|
+
55eae883689a08361045d650,obj7,7,7.7,2015-09-06 10:05:38.786000 +0000,"{""key"":""value7""}"
|
|
5
|
+
55eae883689a08361045d64f,obj6,6,6.6,2015-09-06 10:05:48.786000 +0000,"{""key"":""value6""}"
|
|
6
|
+
55eae883689a08361045d64e,obj5,5,5.5,2015-09-06 10:05:58.786000 +0000,"{""key"":""value5""}"
|
|
7
|
+
55eae883689a08361045d64d,obj4,4,4.4,2015-09-06 10:06:08.786000 +0000,"{""key"":{""inner_key"":""value4""}}"
|
|
8
|
+
55eae883689a08361045d64c,obj3,3,3.3,2015-09-06 10:06:18.786000 +0000,"{""key"":[""v3-1"",""v3-2""]}"
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
{ "_id": "55eae883689a08361045d64a", "name": "obj1", "rank": 1, "value": 1.1, "created_at": { "$date" : 1441533998786 } }
|
|
2
|
-
{ "_id": "55eae883689a08361045d64b", "name": "obj2", "rank": 2, "value": 2.2, "created_at": { "$date" : 1441533988786 } }
|
|
3
|
-
{ "_id": "55eae883689a08361045d64c", "name": "obj3", "rank": 3, "value": 3.3, "created_at": { "$date" : 1441533978786 } }
|
|
4
|
-
{ "_id": "55eae883689a08361045d64d", "name": "obj4", "rank": 4, "value": 4.4, "created_at": { "$date" : 1441533968786 } }
|
|
5
|
-
{ "_id": "55eae883689a08361045d64e", "name": "obj5", "rank": 5, "value": 5.5, "created_at": { "$date" : 1441533958786 } }
|
|
6
|
-
{ "_id": "55eae883689a08361045d64f", "name": "obj6", "rank": 6, "value": 6.6, "created_at": { "$date" : 1441533948786 } }
|
|
7
|
-
{ "_id": "55eae883689a08361045d650", "name": "obj7", "rank": 7, "value": 7.7, "created_at": { "$date" : 1441533938786 } }
|
|
8
|
-
{ "_id": "55eae883689a08361045d651", "name": "obj8", "rank": 8, "value": 8.8, "created_at": { "$date" : 1441533928786 } }
|
|
9
|
-
{ "_id": "55eae883689a08361045d652", "name": "obj9", "rank": 9, "value": 9.9, "created_at": { "$date" : 1441533918786 } }
|
|
1
|
+
{ "_id": "55eae883689a08361045d64a", "name": "obj1", "rank": 1, "value": 1.1, "created_at": { "$date" : 1441533998786 }, "embeded": { "key": "value1" } }
|
|
2
|
+
{ "_id": "55eae883689a08361045d64b", "name": "obj2", "rank": 2, "value": 2.2, "created_at": { "$date" : 1441533988786 }, "embeded": { "key": "value2" } }
|
|
3
|
+
{ "_id": "55eae883689a08361045d64c", "name": "obj3", "rank": 3, "value": 3.3, "created_at": { "$date" : 1441533978786 }, "embeded": { "key": ["v3-1", "v3-2"]} }
|
|
4
|
+
{ "_id": "55eae883689a08361045d64d", "name": "obj4", "rank": 4, "value": 4.4, "created_at": { "$date" : 1441533968786 }, "embeded": { "key": { "inner_key": "value4" } } }
|
|
5
|
+
{ "_id": "55eae883689a08361045d64e", "name": "obj5", "rank": 5, "value": 5.5, "created_at": { "$date" : 1441533958786 }, "embeded": { "key": "value5" } }
|
|
6
|
+
{ "_id": "55eae883689a08361045d64f", "name": "obj6", "rank": 6, "value": 6.6, "created_at": { "$date" : 1441533948786 }, "embeded": { "key": "value6" } }
|
|
7
|
+
{ "_id": "55eae883689a08361045d650", "name": "obj7", "rank": 7, "value": 7.7, "created_at": { "$date" : 1441533938786 }, "embeded": { "key": "value7" } }
|
|
8
|
+
{ "_id": "55eae883689a08361045d651", "name": "obj8", "rank": 8, "value": 8.8, "created_at": { "$date" : 1441533928786 }, "embeded": { "key": "value8" } }
|
|
9
|
+
{ "_id": "55eae883689a08361045d652", "name": "obj9", "rank": 9, "value": 9.9, "created_at": { "$date" : 1441533918786 }, "embeded": { "key": "value9" } }
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: embulk-input-mongodb
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kazuyuki Honda
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2016-05-30 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -62,8 +62,8 @@ files:
|
|
|
62
62
|
- src/test/resources/full.yml
|
|
63
63
|
- src/test/resources/full_expected.csv
|
|
64
64
|
- src/test/resources/my_collection.jsonl
|
|
65
|
-
- classpath/embulk-input-mongodb-0.
|
|
66
|
-
- classpath/mongo-java-driver-3.
|
|
65
|
+
- classpath/embulk-input-mongodb-0.2.0.jar
|
|
66
|
+
- classpath/mongo-java-driver-3.2.2.jar
|
|
67
67
|
homepage: https://github.com/hakobera/embulk-input-mongodb
|
|
68
68
|
licenses:
|
|
69
69
|
- MIT
|
|
Binary file
|
|
Binary file
|