embulk-input-mongodb 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +14 -3
- data/README.md +5 -12
- data/build.gradle +4 -4
- data/classpath/embulk-input-mongodb-0.2.0.jar +0 -0
- data/classpath/mongo-java-driver-3.2.2.jar +0 -0
- data/src/main/java/org/embulk/input/mongodb/MongodbInputPlugin.java +50 -24
- data/src/test/resources/basic.yml +1 -0
- data/src/test/resources/basic_expected.csv +2 -2
- data/src/test/resources/full.yml +1 -0
- data/src/test/resources/full_expected.csv +8 -8
- data/src/test/resources/my_collection.jsonl +9 -9
- metadata +4 -4
- data/classpath/embulk-input-mongodb-0.1.2.jar +0 -0
- data/classpath/mongo-java-driver-3.0.3.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eca1bf1b21b4a44bab520a7cb93f59df2c18ea82
|
4
|
+
data.tar.gz: 98f866d2313fe81dbf96da2fe61b9a303afcd698
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6251028e2ec5dc41523cbc43d3a06f57cde1a7b05a9a21a8098d169e6c3d0e05af74d12e325847fcbb74328f8c9cf98eb8e6ed167091cb2215bfbf2341fdb5ff
|
7
|
+
data.tar.gz: 7a6c238d53ab10733917a05a170a3d06aacb8cbbd8a631c87d86d5fa2a6a198e887df27b61bfebe10655eeefe48293bf0a174417435aed4926285d6ef5ba581f
|
data/.travis.yml
CHANGED
@@ -7,13 +7,21 @@ jdk:
|
|
7
7
|
|
8
8
|
sudo: required
|
9
9
|
|
10
|
+
# Work around fix for buffer overflow error on OpenJDK7
|
11
|
+
# ref: https://github.com/travis-ci/travis-ci/issues/5227#issuecomment-165131913
|
12
|
+
before_install:
|
13
|
+
- cat /etc/hosts # optionally check the content *before*
|
14
|
+
- sudo hostname "$(hostname | cut -c1-63)"
|
15
|
+
- sed -e "s/^\\(127\\.0\\.0\\.1.*\\)/\\1 $(hostname | cut -c1-63)/" /etc/hosts | sudo tee /etc/hosts
|
16
|
+
- cat /etc/hosts # optionally check the content *after*
|
17
|
+
|
10
18
|
install:
|
11
19
|
- sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 7F0CEB10
|
12
|
-
- echo "deb http://repo.mongodb.org/apt/ubuntu "$(lsb_release -sc)"/mongodb-org/3.
|
20
|
+
- echo "deb http://repo.mongodb.org/apt/ubuntu "$(lsb_release -sc)"/mongodb-org/3.2 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.2.list
|
13
21
|
- sudo apt-get update
|
14
|
-
- sudo apt-get install -y mongodb-org
|
22
|
+
- sudo apt-get install -y --force-yes mongodb-org
|
15
23
|
- mongod -version
|
16
|
-
- curl --create-dirs -o ~/.embulk/bin/embulk -L "
|
24
|
+
- curl --create-dirs -o ~/.embulk/bin/embulk -L "https://dl.bintray.com/embulk/maven/embulk-0.8.8.jar"
|
17
25
|
- chmod +x ~/.embulk/bin/embulk
|
18
26
|
- export PATH="$HOME/.embulk/bin:$PATH"
|
19
27
|
- embulk --version
|
@@ -27,5 +35,8 @@ before_script:
|
|
27
35
|
|
28
36
|
script:
|
29
37
|
- embulk run -L . src/test/resources/basic.yml
|
38
|
+
- cat tmp/basic000.00.csv
|
39
|
+
- cmp tmp/basic000.00.csv src/test/resources/basic_expected.csv || exit 1
|
30
40
|
- embulk run -L . src/test/resources/full.yml
|
41
|
+
- cat tmp/full000.00.csv
|
31
42
|
- cmp tmp/full000.00.csv src/test/resources/full_expected.csv || exit 1
|
data/README.md
CHANGED
@@ -4,17 +4,9 @@
|
|
4
4
|
|
5
5
|
MongoDB input plugin for Embulk loads records from MongoDB.
|
6
6
|
|
7
|
-
**CAUTION:** this plugin does not support array and object fields,
|
8
|
-
because embulk does not supported these types yet.
|
9
|
-
But these types will be supported, so when it supported I add support these types.
|
10
|
-
For more detail see following issues.
|
11
|
-
|
12
|
-
- https://github.com/embulk/embulk/issues/120
|
13
|
-
- https://github.com/embulk/embulk/issues/121
|
14
|
-
|
15
7
|
## Overview
|
16
8
|
|
17
|
-
This plugin only works with embulk >= 0.
|
9
|
+
This plugin only works with embulk >= 0.8.8.
|
18
10
|
|
19
11
|
* **Plugin type**: input
|
20
12
|
* **Resume supported**: no
|
@@ -43,26 +35,27 @@ This plugin only works with embulk >= 0.7.4.
|
|
43
35
|
```yaml
|
44
36
|
in:
|
45
37
|
type: mongodb
|
46
|
-
uri: mongodb://myuser@
|
38
|
+
uri: mongodb://myuser:mypassword@localhost:27017/my_database
|
47
39
|
collection: "my_collection"
|
48
40
|
fields:
|
49
41
|
- { name: id, type: string }
|
50
42
|
- { name: field1, type: long }
|
51
43
|
- { name: field2, type: timestamp }
|
44
|
+
- { name: field3, type: json }
|
52
45
|
```
|
53
46
|
|
54
47
|
### Filter object by query and sort
|
55
48
|
|
56
|
-
|
57
49
|
```yaml
|
58
50
|
in:
|
59
51
|
type: mongodb
|
60
|
-
uri: mongodb://myuser@
|
52
|
+
uri: mongodb://myuser:mypassword@localhost:27017/my_database
|
61
53
|
collection: "my_collection"
|
62
54
|
fields:
|
63
55
|
- { name: id, type: string }
|
64
56
|
- { name: field1, type: long }
|
65
57
|
- { name: field2, type: timestamp }
|
58
|
+
- { name: field3, type: json }
|
66
59
|
query: '{ field1: { $gte: 3 } }'
|
67
60
|
sort: '{ field1: 1 }'
|
68
61
|
```
|
data/build.gradle
CHANGED
@@ -15,15 +15,15 @@ configurations {
|
|
15
15
|
provided
|
16
16
|
}
|
17
17
|
|
18
|
-
version = "0.
|
18
|
+
version = "0.2.0"
|
19
19
|
|
20
20
|
sourceCompatibility = 1.7
|
21
21
|
targetCompatibility = 1.7
|
22
22
|
|
23
23
|
dependencies {
|
24
|
-
compile "org.embulk:embulk-core:0.
|
25
|
-
provided "org.embulk:embulk-core:0.
|
26
|
-
compile "org.mongodb:mongo-java-driver:3.
|
24
|
+
compile "org.embulk:embulk-core:0.8.8"
|
25
|
+
provided "org.embulk:embulk-core:0.8.8"
|
26
|
+
compile "org.mongodb:mongo-java-driver:3.2.2"
|
27
27
|
|
28
28
|
testCompile "junit:junit:4.+"
|
29
29
|
}
|
Binary file
|
Binary file
|
@@ -1,7 +1,9 @@
|
|
1
1
|
package org.embulk.input.mongodb;
|
2
2
|
|
3
|
+
import com.google.common.base.Throwables;
|
3
4
|
import com.mongodb.MongoClient;
|
4
5
|
import com.mongodb.MongoClientURI;
|
6
|
+
import com.mongodb.MongoException;
|
5
7
|
import com.mongodb.client.MongoCollection;
|
6
8
|
import com.mongodb.client.MongoCursor;
|
7
9
|
import com.mongodb.client.MongoDatabase;
|
@@ -11,6 +13,7 @@ import org.bson.conversions.Bson;
|
|
11
13
|
import org.embulk.config.Config;
|
12
14
|
import org.embulk.config.ConfigDefault;
|
13
15
|
import org.embulk.config.ConfigDiff;
|
16
|
+
import org.embulk.config.ConfigException;
|
14
17
|
import org.embulk.config.ConfigInject;
|
15
18
|
import org.embulk.config.ConfigSource;
|
16
19
|
import org.embulk.config.Task;
|
@@ -25,10 +28,13 @@ import org.embulk.spi.PageBuilder;
|
|
25
28
|
import org.embulk.spi.PageOutput;
|
26
29
|
import org.embulk.spi.Schema;
|
27
30
|
import org.embulk.spi.SchemaConfig;
|
31
|
+
import org.embulk.spi.json.JsonParser;
|
28
32
|
import org.embulk.spi.time.Timestamp;
|
29
33
|
import org.embulk.spi.type.Type;
|
30
34
|
import org.slf4j.Logger;
|
31
35
|
|
36
|
+
import javax.validation.constraints.Min;
|
37
|
+
import java.net.UnknownHostException;
|
32
38
|
import java.util.List;
|
33
39
|
|
34
40
|
public class MongodbInputPlugin
|
@@ -57,10 +63,11 @@ public class MongodbInputPlugin
|
|
57
63
|
|
58
64
|
@Config("batch_size")
|
59
65
|
@ConfigDefault("10000")
|
60
|
-
|
66
|
+
@Min(1)
|
67
|
+
int getBatchSize();
|
61
68
|
|
62
69
|
@ConfigInject
|
63
|
-
|
70
|
+
BufferAllocator getBufferAllocator();
|
64
71
|
}
|
65
72
|
|
66
73
|
private final Logger log = Exec.getLogger(MongodbInputPlugin.class);
|
@@ -70,6 +77,12 @@ public class MongodbInputPlugin
|
|
70
77
|
InputPlugin.Control control)
|
71
78
|
{
|
72
79
|
PluginTask task = config.loadConfig(PluginTask.class);
|
80
|
+
// Connect once to throw ConfigException in earlier stage of excecution
|
81
|
+
try {
|
82
|
+
connect(task);
|
83
|
+
} catch (UnknownHostException | MongoException ex) {
|
84
|
+
throw new ConfigException(ex);
|
85
|
+
}
|
73
86
|
Schema schema = task.getFields().toSchema();
|
74
87
|
return resume(task.dump(), schema, 1, control);
|
75
88
|
}
|
@@ -99,9 +112,16 @@ public class MongodbInputPlugin
|
|
99
112
|
PluginTask task = taskSource.loadTask(PluginTask.class);
|
100
113
|
BufferAllocator allocator = task.getBufferAllocator();
|
101
114
|
PageBuilder pageBuilder = new PageBuilder(allocator, schema, output);
|
115
|
+
JsonParser jsonParser = new JsonParser();
|
116
|
+
List<Column> columns = pageBuilder.getSchema().getColumns();
|
102
117
|
|
103
|
-
|
104
|
-
|
118
|
+
MongoCollection<Document> collection;
|
119
|
+
try {
|
120
|
+
MongoDatabase db = connect(task);
|
121
|
+
collection = db.getCollection(task.getCollection());
|
122
|
+
} catch (UnknownHostException | MongoException ex) {
|
123
|
+
throw new ConfigException(ex);
|
124
|
+
}
|
105
125
|
|
106
126
|
Bson query = (Bson) JSON.parse(task.getQuery());
|
107
127
|
Bson projection = getProjection(task);
|
@@ -111,25 +131,22 @@ public class MongodbInputPlugin
|
|
111
131
|
log.trace("projection: {}", projection);
|
112
132
|
log.trace("sort: {}", sort);
|
113
133
|
|
114
|
-
MongoCursor<Document> cursor = collection
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
try {
|
134
|
+
try (MongoCursor<Document> cursor = collection
|
135
|
+
.find(query)
|
136
|
+
.projection(projection)
|
137
|
+
.sort(sort)
|
138
|
+
.batchSize(task.getBatchSize())
|
139
|
+
.iterator()) {
|
122
140
|
while (cursor.hasNext()) {
|
123
|
-
fetch(cursor, pageBuilder);
|
141
|
+
fetch(cursor, pageBuilder, jsonParser, columns);
|
124
142
|
}
|
125
|
-
}
|
126
|
-
|
143
|
+
} catch (MongoException ex) {
|
144
|
+
Throwables.propagate(ex);
|
127
145
|
}
|
128
146
|
|
129
147
|
pageBuilder.finish();
|
130
148
|
|
131
|
-
|
132
|
-
return report;
|
149
|
+
return Exec.newTaskReport();
|
133
150
|
}
|
134
151
|
|
135
152
|
@Override
|
@@ -138,15 +155,19 @@ public class MongodbInputPlugin
|
|
138
155
|
return Exec.newConfigDiff();
|
139
156
|
}
|
140
157
|
|
141
|
-
private MongoDatabase connect(PluginTask task) {
|
158
|
+
private MongoDatabase connect(final PluginTask task) throws UnknownHostException, MongoException {
|
142
159
|
MongoClientURI uri = new MongoClientURI(task.getUri());
|
143
160
|
MongoClient mongoClient = new MongoClient(uri);
|
144
|
-
|
161
|
+
|
162
|
+
MongoDatabase db = mongoClient.getDatabase(uri.getDatabase());
|
163
|
+
// Get collection count for throw Exception
|
164
|
+
db.getCollection(task.getCollection()).count();
|
165
|
+
return db;
|
145
166
|
}
|
146
167
|
|
147
|
-
private void fetch(MongoCursor<Document> cursor, PageBuilder pageBuilder
|
168
|
+
private void fetch(MongoCursor<Document> cursor, PageBuilder pageBuilder,
|
169
|
+
JsonParser jsonParser, List<Column> columns) {
|
148
170
|
Document doc = cursor.next();
|
149
|
-
List<Column> columns = pageBuilder.getSchema().getColumns();
|
150
171
|
for (Column c : columns) {
|
151
172
|
Type t = c.getType();
|
152
173
|
String key = normalize(c.getName());
|
@@ -170,12 +191,17 @@ public class MongodbInputPlugin
|
|
170
191
|
break;
|
171
192
|
|
172
193
|
case "string":
|
173
|
-
|
174
|
-
|
175
|
-
|
194
|
+
// Enable output object like ObjectId as string, this is reason I don't use doc.getString(key).
|
195
|
+
pageBuilder.setString(c, doc.get(key).toString());
|
196
|
+
break;
|
176
197
|
|
177
198
|
case "timestamp":
|
178
199
|
pageBuilder.setTimestamp(c, Timestamp.ofEpochMilli(doc.getDate(key).getTime()));
|
200
|
+
break;
|
201
|
+
|
202
|
+
case "json":
|
203
|
+
pageBuilder.setJson(c, jsonParser.parse(((Document) doc.get(key)).toJson()));
|
204
|
+
break;
|
179
205
|
}
|
180
206
|
}
|
181
207
|
}
|
data/src/test/resources/full.yml
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
id,name,rank,value,created_at
|
2
|
-
55eae883689a08361045d652,obj9,9,9.9,2015-09-06 10:05:18.786000 +0000
|
3
|
-
55eae883689a08361045d651,obj8,8,8.8,2015-09-06 10:05:28.786000 +0000
|
4
|
-
55eae883689a08361045d650,obj7,7,7.7,2015-09-06 10:05:38.786000 +0000
|
5
|
-
55eae883689a08361045d64f,obj6,6,6.6,2015-09-06 10:05:48.786000 +0000
|
6
|
-
55eae883689a08361045d64e,obj5,5,5.5,2015-09-06 10:05:58.786000 +0000
|
7
|
-
55eae883689a08361045d64d,obj4,4,4.4,2015-09-06 10:06:08.786000 +0000
|
8
|
-
55eae883689a08361045d64c,obj3,3,3.3,2015-09-06 10:06:18.786000 +0000
|
1
|
+
id,name,rank,value,created_at,embeded
|
2
|
+
55eae883689a08361045d652,obj9,9,9.9,2015-09-06 10:05:18.786000 +0000,"{""key"":""value9""}"
|
3
|
+
55eae883689a08361045d651,obj8,8,8.8,2015-09-06 10:05:28.786000 +0000,"{""key"":""value8""}"
|
4
|
+
55eae883689a08361045d650,obj7,7,7.7,2015-09-06 10:05:38.786000 +0000,"{""key"":""value7""}"
|
5
|
+
55eae883689a08361045d64f,obj6,6,6.6,2015-09-06 10:05:48.786000 +0000,"{""key"":""value6""}"
|
6
|
+
55eae883689a08361045d64e,obj5,5,5.5,2015-09-06 10:05:58.786000 +0000,"{""key"":""value5""}"
|
7
|
+
55eae883689a08361045d64d,obj4,4,4.4,2015-09-06 10:06:08.786000 +0000,"{""key"":{""inner_key"":""value4""}}"
|
8
|
+
55eae883689a08361045d64c,obj3,3,3.3,2015-09-06 10:06:18.786000 +0000,"{""key"":[""v3-1"",""v3-2""]}"
|
@@ -1,9 +1,9 @@
|
|
1
|
-
{ "_id": "55eae883689a08361045d64a", "name": "obj1", "rank": 1, "value": 1.1, "created_at": { "$date" : 1441533998786 } }
|
2
|
-
{ "_id": "55eae883689a08361045d64b", "name": "obj2", "rank": 2, "value": 2.2, "created_at": { "$date" : 1441533988786 } }
|
3
|
-
{ "_id": "55eae883689a08361045d64c", "name": "obj3", "rank": 3, "value": 3.3, "created_at": { "$date" : 1441533978786 } }
|
4
|
-
{ "_id": "55eae883689a08361045d64d", "name": "obj4", "rank": 4, "value": 4.4, "created_at": { "$date" : 1441533968786 } }
|
5
|
-
{ "_id": "55eae883689a08361045d64e", "name": "obj5", "rank": 5, "value": 5.5, "created_at": { "$date" : 1441533958786 } }
|
6
|
-
{ "_id": "55eae883689a08361045d64f", "name": "obj6", "rank": 6, "value": 6.6, "created_at": { "$date" : 1441533948786 } }
|
7
|
-
{ "_id": "55eae883689a08361045d650", "name": "obj7", "rank": 7, "value": 7.7, "created_at": { "$date" : 1441533938786 } }
|
8
|
-
{ "_id": "55eae883689a08361045d651", "name": "obj8", "rank": 8, "value": 8.8, "created_at": { "$date" : 1441533928786 } }
|
9
|
-
{ "_id": "55eae883689a08361045d652", "name": "obj9", "rank": 9, "value": 9.9, "created_at": { "$date" : 1441533918786 } }
|
1
|
+
{ "_id": "55eae883689a08361045d64a", "name": "obj1", "rank": 1, "value": 1.1, "created_at": { "$date" : 1441533998786 }, "embeded": { "key": "value1" } }
|
2
|
+
{ "_id": "55eae883689a08361045d64b", "name": "obj2", "rank": 2, "value": 2.2, "created_at": { "$date" : 1441533988786 }, "embeded": { "key": "value2" } }
|
3
|
+
{ "_id": "55eae883689a08361045d64c", "name": "obj3", "rank": 3, "value": 3.3, "created_at": { "$date" : 1441533978786 }, "embeded": { "key": ["v3-1", "v3-2"]} }
|
4
|
+
{ "_id": "55eae883689a08361045d64d", "name": "obj4", "rank": 4, "value": 4.4, "created_at": { "$date" : 1441533968786 }, "embeded": { "key": { "inner_key": "value4" } } }
|
5
|
+
{ "_id": "55eae883689a08361045d64e", "name": "obj5", "rank": 5, "value": 5.5, "created_at": { "$date" : 1441533958786 }, "embeded": { "key": "value5" } }
|
6
|
+
{ "_id": "55eae883689a08361045d64f", "name": "obj6", "rank": 6, "value": 6.6, "created_at": { "$date" : 1441533948786 }, "embeded": { "key": "value6" } }
|
7
|
+
{ "_id": "55eae883689a08361045d650", "name": "obj7", "rank": 7, "value": 7.7, "created_at": { "$date" : 1441533938786 }, "embeded": { "key": "value7" } }
|
8
|
+
{ "_id": "55eae883689a08361045d651", "name": "obj8", "rank": 8, "value": 8.8, "created_at": { "$date" : 1441533928786 }, "embeded": { "key": "value8" } }
|
9
|
+
{ "_id": "55eae883689a08361045d652", "name": "obj9", "rank": 9, "value": 9.9, "created_at": { "$date" : 1441533918786 }, "embeded": { "key": "value9" } }
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: embulk-input-mongodb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kazuyuki Honda
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-05-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -62,8 +62,8 @@ files:
|
|
62
62
|
- src/test/resources/full.yml
|
63
63
|
- src/test/resources/full_expected.csv
|
64
64
|
- src/test/resources/my_collection.jsonl
|
65
|
-
- classpath/embulk-input-mongodb-0.
|
66
|
-
- classpath/mongo-java-driver-3.
|
65
|
+
- classpath/embulk-input-mongodb-0.2.0.jar
|
66
|
+
- classpath/mongo-java-driver-3.2.2.jar
|
67
67
|
homepage: https://github.com/hakobera/embulk-input-mongodb
|
68
68
|
licenses:
|
69
69
|
- MIT
|
Binary file
|
Binary file
|