embulk-input-mongodb 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f727800501212f31289b40afce7c6df6dd55b720
4
- data.tar.gz: 5aace5d182c198c5bd9cc937a6de69e69008b2ad
3
+ metadata.gz: eca1bf1b21b4a44bab520a7cb93f59df2c18ea82
4
+ data.tar.gz: 98f866d2313fe81dbf96da2fe61b9a303afcd698
5
5
  SHA512:
6
- metadata.gz: d338e4b5b5c1e117f3b57f9200d1224360af298bf1104567768f9e901285066e8a6124b6811a1697ec27520bbe52f1f295e1c2397632f2462e2ffb28e5332e30
7
- data.tar.gz: b017a18476c34ab56202584b63c61121e2a0a5098d13f11a25421e8b72974d979aeea3229ddff888836486ce644195fe6c22ad698f3955a64b37e82832ffc446
6
+ metadata.gz: 6251028e2ec5dc41523cbc43d3a06f57cde1a7b05a9a21a8098d169e6c3d0e05af74d12e325847fcbb74328f8c9cf98eb8e6ed167091cb2215bfbf2341fdb5ff
7
+ data.tar.gz: 7a6c238d53ab10733917a05a170a3d06aacb8cbbd8a631c87d86d5fa2a6a198e887df27b61bfebe10655eeefe48293bf0a174417435aed4926285d6ef5ba581f
data/.travis.yml CHANGED
@@ -7,13 +7,21 @@ jdk:
7
7
 
8
8
  sudo: required
9
9
 
10
+ # Work around fix for buffer overflow error on OpenJDK7
11
+ # ref: https://github.com/travis-ci/travis-ci/issues/5227#issuecomment-165131913
12
+ before_install:
13
+ - cat /etc/hosts # optionally check the content *before*
14
+ - sudo hostname "$(hostname | cut -c1-63)"
15
+ - sed -e "s/^\\(127\\.0\\.0\\.1.*\\)/\\1 $(hostname | cut -c1-63)/" /etc/hosts | sudo tee /etc/hosts
16
+ - cat /etc/hosts # optionally check the content *after*
17
+
10
18
  install:
11
19
  - sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 7F0CEB10
12
- - echo "deb http://repo.mongodb.org/apt/ubuntu "$(lsb_release -sc)"/mongodb-org/3.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.0.list
20
+ - echo "deb http://repo.mongodb.org/apt/ubuntu "$(lsb_release -sc)"/mongodb-org/3.2 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.2.list
13
21
  - sudo apt-get update
14
- - sudo apt-get install -y mongodb-org=3.0.6
22
+ - sudo apt-get install -y --force-yes mongodb-org
15
23
  - mongod -version
16
- - curl --create-dirs -o ~/.embulk/bin/embulk -L "http://dl.embulk.org/embulk-latest.jar"
24
+ - curl --create-dirs -o ~/.embulk/bin/embulk -L "https://dl.bintray.com/embulk/maven/embulk-0.8.8.jar"
17
25
  - chmod +x ~/.embulk/bin/embulk
18
26
  - export PATH="$HOME/.embulk/bin:$PATH"
19
27
  - embulk --version
@@ -27,5 +35,8 @@ before_script:
27
35
 
28
36
  script:
29
37
  - embulk run -L . src/test/resources/basic.yml
38
+ - cat tmp/basic000.00.csv
39
+ - cmp tmp/basic000.00.csv src/test/resources/basic_expected.csv || exit 1
30
40
  - embulk run -L . src/test/resources/full.yml
41
+ - cat tmp/full000.00.csv
31
42
  - cmp tmp/full000.00.csv src/test/resources/full_expected.csv || exit 1
data/README.md CHANGED
@@ -4,17 +4,9 @@
4
4
 
5
5
  MongoDB input plugin for Embulk loads records from MongoDB.
6
6
 
7
- **CAUTION:** this plugin does not support array and object fields,
8
- because embulk does not supported these types yet.
9
- But these types will be supported, so when it supported I add support these types.
10
- For more detail see following issues.
11
-
12
- - https://github.com/embulk/embulk/issues/120
13
- - https://github.com/embulk/embulk/issues/121
14
-
15
7
  ## Overview
16
8
 
17
- This plugin only works with embulk >= 0.7.4.
9
+ This plugin only works with embulk >= 0.8.8.
18
10
 
19
11
  * **Plugin type**: input
20
12
  * **Resume supported**: no
@@ -43,26 +35,27 @@ This plugin only works with embulk >= 0.7.4.
43
35
  ```yaml
44
36
  in:
45
37
  type: mongodb
46
- uri: mongodb://myuser@mypassword:localhost:27017/my_database
38
+ uri: mongodb://myuser:mypassword@localhost:27017/my_database
47
39
  collection: "my_collection"
48
40
  fields:
49
41
  - { name: id, type: string }
50
42
  - { name: field1, type: long }
51
43
  - { name: field2, type: timestamp }
44
+ - { name: field3, type: json }
52
45
  ```
53
46
 
54
47
  ### Filter object by query and sort
55
48
 
56
-
57
49
  ```yaml
58
50
  in:
59
51
  type: mongodb
60
- uri: mongodb://myuser@mypassword:localhost:27017/my_database
52
+ uri: mongodb://myuser:mypassword@localhost:27017/my_database
61
53
  collection: "my_collection"
62
54
  fields:
63
55
  - { name: id, type: string }
64
56
  - { name: field1, type: long }
65
57
  - { name: field2, type: timestamp }
58
+ - { name: field3, type: json }
66
59
  query: '{ field1: { $gte: 3 } }'
67
60
  sort: '{ field1: 1 }'
68
61
  ```
data/build.gradle CHANGED
@@ -15,15 +15,15 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.1.2"
18
+ version = "0.2.0"
19
19
 
20
20
  sourceCompatibility = 1.7
21
21
  targetCompatibility = 1.7
22
22
 
23
23
  dependencies {
24
- compile "org.embulk:embulk-core:0.7.4"
25
- provided "org.embulk:embulk-core:0.7.4"
26
- compile "org.mongodb:mongo-java-driver:3.0.3"
24
+ compile "org.embulk:embulk-core:0.8.8"
25
+ provided "org.embulk:embulk-core:0.8.8"
26
+ compile "org.mongodb:mongo-java-driver:3.2.2"
27
27
 
28
28
  testCompile "junit:junit:4.+"
29
29
  }
@@ -1,7 +1,9 @@
1
1
  package org.embulk.input.mongodb;
2
2
 
3
+ import com.google.common.base.Throwables;
3
4
  import com.mongodb.MongoClient;
4
5
  import com.mongodb.MongoClientURI;
6
+ import com.mongodb.MongoException;
5
7
  import com.mongodb.client.MongoCollection;
6
8
  import com.mongodb.client.MongoCursor;
7
9
  import com.mongodb.client.MongoDatabase;
@@ -11,6 +13,7 @@ import org.bson.conversions.Bson;
11
13
  import org.embulk.config.Config;
12
14
  import org.embulk.config.ConfigDefault;
13
15
  import org.embulk.config.ConfigDiff;
16
+ import org.embulk.config.ConfigException;
14
17
  import org.embulk.config.ConfigInject;
15
18
  import org.embulk.config.ConfigSource;
16
19
  import org.embulk.config.Task;
@@ -25,10 +28,13 @@ import org.embulk.spi.PageBuilder;
25
28
  import org.embulk.spi.PageOutput;
26
29
  import org.embulk.spi.Schema;
27
30
  import org.embulk.spi.SchemaConfig;
31
+ import org.embulk.spi.json.JsonParser;
28
32
  import org.embulk.spi.time.Timestamp;
29
33
  import org.embulk.spi.type.Type;
30
34
  import org.slf4j.Logger;
31
35
 
36
+ import javax.validation.constraints.Min;
37
+ import java.net.UnknownHostException;
32
38
  import java.util.List;
33
39
 
34
40
  public class MongodbInputPlugin
@@ -57,10 +63,11 @@ public class MongodbInputPlugin
57
63
 
58
64
  @Config("batch_size")
59
65
  @ConfigDefault("10000")
60
- Integer getBatchSize();
66
+ @Min(1)
67
+ int getBatchSize();
61
68
 
62
69
  @ConfigInject
63
- public BufferAllocator getBufferAllocator();
70
+ BufferAllocator getBufferAllocator();
64
71
  }
65
72
 
66
73
  private final Logger log = Exec.getLogger(MongodbInputPlugin.class);
@@ -70,6 +77,12 @@ public class MongodbInputPlugin
70
77
  InputPlugin.Control control)
71
78
  {
72
79
  PluginTask task = config.loadConfig(PluginTask.class);
80
+ // Connect once to throw ConfigException in earlier stage of excecution
81
+ try {
82
+ connect(task);
83
+ } catch (UnknownHostException | MongoException ex) {
84
+ throw new ConfigException(ex);
85
+ }
73
86
  Schema schema = task.getFields().toSchema();
74
87
  return resume(task.dump(), schema, 1, control);
75
88
  }
@@ -99,9 +112,16 @@ public class MongodbInputPlugin
99
112
  PluginTask task = taskSource.loadTask(PluginTask.class);
100
113
  BufferAllocator allocator = task.getBufferAllocator();
101
114
  PageBuilder pageBuilder = new PageBuilder(allocator, schema, output);
115
+ JsonParser jsonParser = new JsonParser();
116
+ List<Column> columns = pageBuilder.getSchema().getColumns();
102
117
 
103
- MongoDatabase db = connect(task);
104
- MongoCollection<Document> collection = db.getCollection(task.getCollection());
118
+ MongoCollection<Document> collection;
119
+ try {
120
+ MongoDatabase db = connect(task);
121
+ collection = db.getCollection(task.getCollection());
122
+ } catch (UnknownHostException | MongoException ex) {
123
+ throw new ConfigException(ex);
124
+ }
105
125
 
106
126
  Bson query = (Bson) JSON.parse(task.getQuery());
107
127
  Bson projection = getProjection(task);
@@ -111,25 +131,22 @@ public class MongodbInputPlugin
111
131
  log.trace("projection: {}", projection);
112
132
  log.trace("sort: {}", sort);
113
133
 
114
- MongoCursor<Document> cursor = collection
115
- .find(query)
116
- .projection(projection)
117
- .sort(sort)
118
- .batchSize(task.getBatchSize())
119
- .iterator();
120
-
121
- try {
134
+ try (MongoCursor<Document> cursor = collection
135
+ .find(query)
136
+ .projection(projection)
137
+ .sort(sort)
138
+ .batchSize(task.getBatchSize())
139
+ .iterator()) {
122
140
  while (cursor.hasNext()) {
123
- fetch(cursor, pageBuilder);
141
+ fetch(cursor, pageBuilder, jsonParser, columns);
124
142
  }
125
- } finally {
126
- cursor.close();
143
+ } catch (MongoException ex) {
144
+ Throwables.propagate(ex);
127
145
  }
128
146
 
129
147
  pageBuilder.finish();
130
148
 
131
- TaskReport report = Exec.newTaskReport();
132
- return report;
149
+ return Exec.newTaskReport();
133
150
  }
134
151
 
135
152
  @Override
@@ -138,15 +155,19 @@ public class MongodbInputPlugin
138
155
  return Exec.newConfigDiff();
139
156
  }
140
157
 
141
- private MongoDatabase connect(PluginTask task) {
158
+ private MongoDatabase connect(final PluginTask task) throws UnknownHostException, MongoException {
142
159
  MongoClientURI uri = new MongoClientURI(task.getUri());
143
160
  MongoClient mongoClient = new MongoClient(uri);
144
- return mongoClient.getDatabase(uri.getDatabase());
161
+
162
+ MongoDatabase db = mongoClient.getDatabase(uri.getDatabase());
163
+ // Get collection count for throw Exception
164
+ db.getCollection(task.getCollection()).count();
165
+ return db;
145
166
  }
146
167
 
147
- private void fetch(MongoCursor<Document> cursor, PageBuilder pageBuilder) {
168
+ private void fetch(MongoCursor<Document> cursor, PageBuilder pageBuilder,
169
+ JsonParser jsonParser, List<Column> columns) {
148
170
  Document doc = cursor.next();
149
- List<Column> columns = pageBuilder.getSchema().getColumns();
150
171
  for (Column c : columns) {
151
172
  Type t = c.getType();
152
173
  String key = normalize(c.getName());
@@ -170,12 +191,17 @@ public class MongodbInputPlugin
170
191
  break;
171
192
 
172
193
  case "string":
173
- // Enable output object like ObjectId as string, this is reason I don't use doc.getString(key).
174
- pageBuilder.setString(c, doc.get(key).toString());
175
- break;
194
+ // Enable output object like ObjectId as string, this is reason I don't use doc.getString(key).
195
+ pageBuilder.setString(c, doc.get(key).toString());
196
+ break;
176
197
 
177
198
  case "timestamp":
178
199
  pageBuilder.setTimestamp(c, Timestamp.ofEpochMilli(doc.getDate(key).getTime()));
200
+ break;
201
+
202
+ case "json":
203
+ pageBuilder.setJson(c, jsonParser.parse(((Document) doc.get(key)).toJson()));
204
+ break;
179
205
  }
180
206
  }
181
207
  }
@@ -5,6 +5,7 @@ in:
5
5
  fields:
6
6
  - { name: name, type: string }
7
7
  - { name: rank, type: long }
8
+ sort: '{ rank: 1 }'
8
9
  out:
9
10
  type: file
10
11
  path_prefix: ./tmp/basic
@@ -1,10 +1,10 @@
1
1
  name,rank
2
2
  obj1,1
3
3
  obj2,2
4
- obj4,4
5
4
  obj3,3
5
+ obj4,4
6
6
  obj5,5
7
7
  obj6,6
8
8
  obj7,7
9
- obj9,9
10
9
  obj8,8
10
+ obj9,9
@@ -8,6 +8,7 @@ in:
8
8
  - { name: rank, type: long }
9
9
  - { name: value, type: double }
10
10
  - { name: created_at, type: timestamp }
11
+ - { name: embeded, type: json }
11
12
  query: '{ rank: { $gte: 3 } }'
12
13
  sort: '{ rank: -1 }'
13
14
  batch_size: 100
@@ -1,8 +1,8 @@
1
- id,name,rank,value,created_at
2
- 55eae883689a08361045d652,obj9,9,9.9,2015-09-06 10:05:18.786000 +0000
3
- 55eae883689a08361045d651,obj8,8,8.8,2015-09-06 10:05:28.786000 +0000
4
- 55eae883689a08361045d650,obj7,7,7.7,2015-09-06 10:05:38.786000 +0000
5
- 55eae883689a08361045d64f,obj6,6,6.6,2015-09-06 10:05:48.786000 +0000
6
- 55eae883689a08361045d64e,obj5,5,5.5,2015-09-06 10:05:58.786000 +0000
7
- 55eae883689a08361045d64d,obj4,4,4.4,2015-09-06 10:06:08.786000 +0000
8
- 55eae883689a08361045d64c,obj3,3,3.3,2015-09-06 10:06:18.786000 +0000
1
+ id,name,rank,value,created_at,embeded
2
+ 55eae883689a08361045d652,obj9,9,9.9,2015-09-06 10:05:18.786000 +0000,"{""key"":""value9""}"
3
+ 55eae883689a08361045d651,obj8,8,8.8,2015-09-06 10:05:28.786000 +0000,"{""key"":""value8""}"
4
+ 55eae883689a08361045d650,obj7,7,7.7,2015-09-06 10:05:38.786000 +0000,"{""key"":""value7""}"
5
+ 55eae883689a08361045d64f,obj6,6,6.6,2015-09-06 10:05:48.786000 +0000,"{""key"":""value6""}"
6
+ 55eae883689a08361045d64e,obj5,5,5.5,2015-09-06 10:05:58.786000 +0000,"{""key"":""value5""}"
7
+ 55eae883689a08361045d64d,obj4,4,4.4,2015-09-06 10:06:08.786000 +0000,"{""key"":{""inner_key"":""value4""}}"
8
+ 55eae883689a08361045d64c,obj3,3,3.3,2015-09-06 10:06:18.786000 +0000,"{""key"":[""v3-1"",""v3-2""]}"
@@ -1,9 +1,9 @@
1
- { "_id": "55eae883689a08361045d64a", "name": "obj1", "rank": 1, "value": 1.1, "created_at": { "$date" : 1441533998786 } }
2
- { "_id": "55eae883689a08361045d64b", "name": "obj2", "rank": 2, "value": 2.2, "created_at": { "$date" : 1441533988786 } }
3
- { "_id": "55eae883689a08361045d64c", "name": "obj3", "rank": 3, "value": 3.3, "created_at": { "$date" : 1441533978786 } }
4
- { "_id": "55eae883689a08361045d64d", "name": "obj4", "rank": 4, "value": 4.4, "created_at": { "$date" : 1441533968786 } }
5
- { "_id": "55eae883689a08361045d64e", "name": "obj5", "rank": 5, "value": 5.5, "created_at": { "$date" : 1441533958786 } }
6
- { "_id": "55eae883689a08361045d64f", "name": "obj6", "rank": 6, "value": 6.6, "created_at": { "$date" : 1441533948786 } }
7
- { "_id": "55eae883689a08361045d650", "name": "obj7", "rank": 7, "value": 7.7, "created_at": { "$date" : 1441533938786 } }
8
- { "_id": "55eae883689a08361045d651", "name": "obj8", "rank": 8, "value": 8.8, "created_at": { "$date" : 1441533928786 } }
9
- { "_id": "55eae883689a08361045d652", "name": "obj9", "rank": 9, "value": 9.9, "created_at": { "$date" : 1441533918786 } }
1
+ { "_id": "55eae883689a08361045d64a", "name": "obj1", "rank": 1, "value": 1.1, "created_at": { "$date" : 1441533998786 }, "embeded": { "key": "value1" } }
2
+ { "_id": "55eae883689a08361045d64b", "name": "obj2", "rank": 2, "value": 2.2, "created_at": { "$date" : 1441533988786 }, "embeded": { "key": "value2" } }
3
+ { "_id": "55eae883689a08361045d64c", "name": "obj3", "rank": 3, "value": 3.3, "created_at": { "$date" : 1441533978786 }, "embeded": { "key": ["v3-1", "v3-2"]} }
4
+ { "_id": "55eae883689a08361045d64d", "name": "obj4", "rank": 4, "value": 4.4, "created_at": { "$date" : 1441533968786 }, "embeded": { "key": { "inner_key": "value4" } } }
5
+ { "_id": "55eae883689a08361045d64e", "name": "obj5", "rank": 5, "value": 5.5, "created_at": { "$date" : 1441533958786 }, "embeded": { "key": "value5" } }
6
+ { "_id": "55eae883689a08361045d64f", "name": "obj6", "rank": 6, "value": 6.6, "created_at": { "$date" : 1441533948786 }, "embeded": { "key": "value6" } }
7
+ { "_id": "55eae883689a08361045d650", "name": "obj7", "rank": 7, "value": 7.7, "created_at": { "$date" : 1441533938786 }, "embeded": { "key": "value7" } }
8
+ { "_id": "55eae883689a08361045d651", "name": "obj8", "rank": 8, "value": 8.8, "created_at": { "$date" : 1441533928786 }, "embeded": { "key": "value8" } }
9
+ { "_id": "55eae883689a08361045d652", "name": "obj9", "rank": 9, "value": 9.9, "created_at": { "$date" : 1441533918786 }, "embeded": { "key": "value9" } }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-mongodb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kazuyuki Honda
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-10 00:00:00.000000000 Z
11
+ date: 2016-05-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -62,8 +62,8 @@ files:
62
62
  - src/test/resources/full.yml
63
63
  - src/test/resources/full_expected.csv
64
64
  - src/test/resources/my_collection.jsonl
65
- - classpath/embulk-input-mongodb-0.1.2.jar
66
- - classpath/mongo-java-driver-3.0.3.jar
65
+ - classpath/embulk-input-mongodb-0.2.0.jar
66
+ - classpath/mongo-java-driver-3.2.2.jar
67
67
  homepage: https://github.com/hakobera/embulk-input-mongodb
68
68
  licenses:
69
69
  - MIT
Binary file