embulk-input-mongodb 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f727800501212f31289b40afce7c6df6dd55b720
4
- data.tar.gz: 5aace5d182c198c5bd9cc937a6de69e69008b2ad
3
+ metadata.gz: eca1bf1b21b4a44bab520a7cb93f59df2c18ea82
4
+ data.tar.gz: 98f866d2313fe81dbf96da2fe61b9a303afcd698
5
5
  SHA512:
6
- metadata.gz: d338e4b5b5c1e117f3b57f9200d1224360af298bf1104567768f9e901285066e8a6124b6811a1697ec27520bbe52f1f295e1c2397632f2462e2ffb28e5332e30
7
- data.tar.gz: b017a18476c34ab56202584b63c61121e2a0a5098d13f11a25421e8b72974d979aeea3229ddff888836486ce644195fe6c22ad698f3955a64b37e82832ffc446
6
+ metadata.gz: 6251028e2ec5dc41523cbc43d3a06f57cde1a7b05a9a21a8098d169e6c3d0e05af74d12e325847fcbb74328f8c9cf98eb8e6ed167091cb2215bfbf2341fdb5ff
7
+ data.tar.gz: 7a6c238d53ab10733917a05a170a3d06aacb8cbbd8a631c87d86d5fa2a6a198e887df27b61bfebe10655eeefe48293bf0a174417435aed4926285d6ef5ba581f
data/.travis.yml CHANGED
@@ -7,13 +7,21 @@ jdk:
7
7
 
8
8
  sudo: required
9
9
 
10
+ # Work around fix for buffer overflow error on OpenJDK7
11
+ # ref: https://github.com/travis-ci/travis-ci/issues/5227#issuecomment-165131913
12
+ before_install:
13
+ - cat /etc/hosts # optionally check the content *before*
14
+ - sudo hostname "$(hostname | cut -c1-63)"
15
+ - sed -e "s/^\\(127\\.0\\.0\\.1.*\\)/\\1 $(hostname | cut -c1-63)/" /etc/hosts | sudo tee /etc/hosts
16
+ - cat /etc/hosts # optionally check the content *after*
17
+
10
18
  install:
11
19
  - sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 7F0CEB10
12
- - echo "deb http://repo.mongodb.org/apt/ubuntu "$(lsb_release -sc)"/mongodb-org/3.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.0.list
20
+ - echo "deb http://repo.mongodb.org/apt/ubuntu "$(lsb_release -sc)"/mongodb-org/3.2 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.2.list
13
21
  - sudo apt-get update
14
- - sudo apt-get install -y mongodb-org=3.0.6
22
+ - sudo apt-get install -y --force-yes mongodb-org
15
23
  - mongod -version
16
- - curl --create-dirs -o ~/.embulk/bin/embulk -L "http://dl.embulk.org/embulk-latest.jar"
24
+ - curl --create-dirs -o ~/.embulk/bin/embulk -L "https://dl.bintray.com/embulk/maven/embulk-0.8.8.jar"
17
25
  - chmod +x ~/.embulk/bin/embulk
18
26
  - export PATH="$HOME/.embulk/bin:$PATH"
19
27
  - embulk --version
@@ -27,5 +35,8 @@ before_script:
27
35
 
28
36
  script:
29
37
  - embulk run -L . src/test/resources/basic.yml
38
+ - cat tmp/basic000.00.csv
39
+ - cmp tmp/basic000.00.csv src/test/resources/basic_expected.csv || exit 1
30
40
  - embulk run -L . src/test/resources/full.yml
41
+ - cat tmp/full000.00.csv
31
42
  - cmp tmp/full000.00.csv src/test/resources/full_expected.csv || exit 1
data/README.md CHANGED
@@ -4,17 +4,9 @@
4
4
 
5
5
  MongoDB input plugin for Embulk loads records from MongoDB.
6
6
 
7
- **CAUTION:** this plugin does not support array and object fields,
8
- because embulk does not supported these types yet.
9
- But these types will be supported, so when it supported I add support these types.
10
- For more detail see following issues.
11
-
12
- - https://github.com/embulk/embulk/issues/120
13
- - https://github.com/embulk/embulk/issues/121
14
-
15
7
  ## Overview
16
8
 
17
- This plugin only works with embulk >= 0.7.4.
9
+ This plugin only works with embulk >= 0.8.8.
18
10
 
19
11
  * **Plugin type**: input
20
12
  * **Resume supported**: no
@@ -43,26 +35,27 @@ This plugin only works with embulk >= 0.7.4.
43
35
  ```yaml
44
36
  in:
45
37
  type: mongodb
46
- uri: mongodb://myuser@mypassword:localhost:27017/my_database
38
+ uri: mongodb://myuser:mypassword@localhost:27017/my_database
47
39
  collection: "my_collection"
48
40
  fields:
49
41
  - { name: id, type: string }
50
42
  - { name: field1, type: long }
51
43
  - { name: field2, type: timestamp }
44
+ - { name: field3, type: json }
52
45
  ```
53
46
 
54
47
  ### Filter object by query and sort
55
48
 
56
-
57
49
  ```yaml
58
50
  in:
59
51
  type: mongodb
60
- uri: mongodb://myuser@mypassword:localhost:27017/my_database
52
+ uri: mongodb://myuser:mypassword@localhost:27017/my_database
61
53
  collection: "my_collection"
62
54
  fields:
63
55
  - { name: id, type: string }
64
56
  - { name: field1, type: long }
65
57
  - { name: field2, type: timestamp }
58
+ - { name: field3, type: json }
66
59
  query: '{ field1: { $gte: 3 } }'
67
60
  sort: '{ field1: 1 }'
68
61
  ```
data/build.gradle CHANGED
@@ -15,15 +15,15 @@ configurations {
15
15
  provided
16
16
  }
17
17
 
18
- version = "0.1.2"
18
+ version = "0.2.0"
19
19
 
20
20
  sourceCompatibility = 1.7
21
21
  targetCompatibility = 1.7
22
22
 
23
23
  dependencies {
24
- compile "org.embulk:embulk-core:0.7.4"
25
- provided "org.embulk:embulk-core:0.7.4"
26
- compile "org.mongodb:mongo-java-driver:3.0.3"
24
+ compile "org.embulk:embulk-core:0.8.8"
25
+ provided "org.embulk:embulk-core:0.8.8"
26
+ compile "org.mongodb:mongo-java-driver:3.2.2"
27
27
 
28
28
  testCompile "junit:junit:4.+"
29
29
  }
@@ -1,7 +1,9 @@
1
1
  package org.embulk.input.mongodb;
2
2
 
3
+ import com.google.common.base.Throwables;
3
4
  import com.mongodb.MongoClient;
4
5
  import com.mongodb.MongoClientURI;
6
+ import com.mongodb.MongoException;
5
7
  import com.mongodb.client.MongoCollection;
6
8
  import com.mongodb.client.MongoCursor;
7
9
  import com.mongodb.client.MongoDatabase;
@@ -11,6 +13,7 @@ import org.bson.conversions.Bson;
11
13
  import org.embulk.config.Config;
12
14
  import org.embulk.config.ConfigDefault;
13
15
  import org.embulk.config.ConfigDiff;
16
+ import org.embulk.config.ConfigException;
14
17
  import org.embulk.config.ConfigInject;
15
18
  import org.embulk.config.ConfigSource;
16
19
  import org.embulk.config.Task;
@@ -25,10 +28,13 @@ import org.embulk.spi.PageBuilder;
25
28
  import org.embulk.spi.PageOutput;
26
29
  import org.embulk.spi.Schema;
27
30
  import org.embulk.spi.SchemaConfig;
31
+ import org.embulk.spi.json.JsonParser;
28
32
  import org.embulk.spi.time.Timestamp;
29
33
  import org.embulk.spi.type.Type;
30
34
  import org.slf4j.Logger;
31
35
 
36
+ import javax.validation.constraints.Min;
37
+ import java.net.UnknownHostException;
32
38
  import java.util.List;
33
39
 
34
40
  public class MongodbInputPlugin
@@ -57,10 +63,11 @@ public class MongodbInputPlugin
57
63
 
58
64
  @Config("batch_size")
59
65
  @ConfigDefault("10000")
60
- Integer getBatchSize();
66
+ @Min(1)
67
+ int getBatchSize();
61
68
 
62
69
  @ConfigInject
63
- public BufferAllocator getBufferAllocator();
70
+ BufferAllocator getBufferAllocator();
64
71
  }
65
72
 
66
73
  private final Logger log = Exec.getLogger(MongodbInputPlugin.class);
@@ -70,6 +77,12 @@ public class MongodbInputPlugin
70
77
  InputPlugin.Control control)
71
78
  {
72
79
  PluginTask task = config.loadConfig(PluginTask.class);
80
+ // Connect once to throw ConfigException in earlier stage of excecution
81
+ try {
82
+ connect(task);
83
+ } catch (UnknownHostException | MongoException ex) {
84
+ throw new ConfigException(ex);
85
+ }
73
86
  Schema schema = task.getFields().toSchema();
74
87
  return resume(task.dump(), schema, 1, control);
75
88
  }
@@ -99,9 +112,16 @@ public class MongodbInputPlugin
99
112
  PluginTask task = taskSource.loadTask(PluginTask.class);
100
113
  BufferAllocator allocator = task.getBufferAllocator();
101
114
  PageBuilder pageBuilder = new PageBuilder(allocator, schema, output);
115
+ JsonParser jsonParser = new JsonParser();
116
+ List<Column> columns = pageBuilder.getSchema().getColumns();
102
117
 
103
- MongoDatabase db = connect(task);
104
- MongoCollection<Document> collection = db.getCollection(task.getCollection());
118
+ MongoCollection<Document> collection;
119
+ try {
120
+ MongoDatabase db = connect(task);
121
+ collection = db.getCollection(task.getCollection());
122
+ } catch (UnknownHostException | MongoException ex) {
123
+ throw new ConfigException(ex);
124
+ }
105
125
 
106
126
  Bson query = (Bson) JSON.parse(task.getQuery());
107
127
  Bson projection = getProjection(task);
@@ -111,25 +131,22 @@ public class MongodbInputPlugin
111
131
  log.trace("projection: {}", projection);
112
132
  log.trace("sort: {}", sort);
113
133
 
114
- MongoCursor<Document> cursor = collection
115
- .find(query)
116
- .projection(projection)
117
- .sort(sort)
118
- .batchSize(task.getBatchSize())
119
- .iterator();
120
-
121
- try {
134
+ try (MongoCursor<Document> cursor = collection
135
+ .find(query)
136
+ .projection(projection)
137
+ .sort(sort)
138
+ .batchSize(task.getBatchSize())
139
+ .iterator()) {
122
140
  while (cursor.hasNext()) {
123
- fetch(cursor, pageBuilder);
141
+ fetch(cursor, pageBuilder, jsonParser, columns);
124
142
  }
125
- } finally {
126
- cursor.close();
143
+ } catch (MongoException ex) {
144
+ Throwables.propagate(ex);
127
145
  }
128
146
 
129
147
  pageBuilder.finish();
130
148
 
131
- TaskReport report = Exec.newTaskReport();
132
- return report;
149
+ return Exec.newTaskReport();
133
150
  }
134
151
 
135
152
  @Override
@@ -138,15 +155,19 @@ public class MongodbInputPlugin
138
155
  return Exec.newConfigDiff();
139
156
  }
140
157
 
141
- private MongoDatabase connect(PluginTask task) {
158
+ private MongoDatabase connect(final PluginTask task) throws UnknownHostException, MongoException {
142
159
  MongoClientURI uri = new MongoClientURI(task.getUri());
143
160
  MongoClient mongoClient = new MongoClient(uri);
144
- return mongoClient.getDatabase(uri.getDatabase());
161
+
162
+ MongoDatabase db = mongoClient.getDatabase(uri.getDatabase());
163
+ // Get collection count for throw Exception
164
+ db.getCollection(task.getCollection()).count();
165
+ return db;
145
166
  }
146
167
 
147
- private void fetch(MongoCursor<Document> cursor, PageBuilder pageBuilder) {
168
+ private void fetch(MongoCursor<Document> cursor, PageBuilder pageBuilder,
169
+ JsonParser jsonParser, List<Column> columns) {
148
170
  Document doc = cursor.next();
149
- List<Column> columns = pageBuilder.getSchema().getColumns();
150
171
  for (Column c : columns) {
151
172
  Type t = c.getType();
152
173
  String key = normalize(c.getName());
@@ -170,12 +191,17 @@ public class MongodbInputPlugin
170
191
  break;
171
192
 
172
193
  case "string":
173
- // Enable output object like ObjectId as string, this is reason I don't use doc.getString(key).
174
- pageBuilder.setString(c, doc.get(key).toString());
175
- break;
194
+ // Enable output object like ObjectId as string, this is reason I don't use doc.getString(key).
195
+ pageBuilder.setString(c, doc.get(key).toString());
196
+ break;
176
197
 
177
198
  case "timestamp":
178
199
  pageBuilder.setTimestamp(c, Timestamp.ofEpochMilli(doc.getDate(key).getTime()));
200
+ break;
201
+
202
+ case "json":
203
+ pageBuilder.setJson(c, jsonParser.parse(((Document) doc.get(key)).toJson()));
204
+ break;
179
205
  }
180
206
  }
181
207
  }
@@ -5,6 +5,7 @@ in:
5
5
  fields:
6
6
  - { name: name, type: string }
7
7
  - { name: rank, type: long }
8
+ sort: '{ rank: 1 }'
8
9
  out:
9
10
  type: file
10
11
  path_prefix: ./tmp/basic
@@ -1,10 +1,10 @@
1
1
  name,rank
2
2
  obj1,1
3
3
  obj2,2
4
- obj4,4
5
4
  obj3,3
5
+ obj4,4
6
6
  obj5,5
7
7
  obj6,6
8
8
  obj7,7
9
- obj9,9
10
9
  obj8,8
10
+ obj9,9
@@ -8,6 +8,7 @@ in:
8
8
  - { name: rank, type: long }
9
9
  - { name: value, type: double }
10
10
  - { name: created_at, type: timestamp }
11
+ - { name: embeded, type: json }
11
12
  query: '{ rank: { $gte: 3 } }'
12
13
  sort: '{ rank: -1 }'
13
14
  batch_size: 100
@@ -1,8 +1,8 @@
1
- id,name,rank,value,created_at
2
- 55eae883689a08361045d652,obj9,9,9.9,2015-09-06 10:05:18.786000 +0000
3
- 55eae883689a08361045d651,obj8,8,8.8,2015-09-06 10:05:28.786000 +0000
4
- 55eae883689a08361045d650,obj7,7,7.7,2015-09-06 10:05:38.786000 +0000
5
- 55eae883689a08361045d64f,obj6,6,6.6,2015-09-06 10:05:48.786000 +0000
6
- 55eae883689a08361045d64e,obj5,5,5.5,2015-09-06 10:05:58.786000 +0000
7
- 55eae883689a08361045d64d,obj4,4,4.4,2015-09-06 10:06:08.786000 +0000
8
- 55eae883689a08361045d64c,obj3,3,3.3,2015-09-06 10:06:18.786000 +0000
1
+ id,name,rank,value,created_at,embeded
2
+ 55eae883689a08361045d652,obj9,9,9.9,2015-09-06 10:05:18.786000 +0000,"{""key"":""value9""}"
3
+ 55eae883689a08361045d651,obj8,8,8.8,2015-09-06 10:05:28.786000 +0000,"{""key"":""value8""}"
4
+ 55eae883689a08361045d650,obj7,7,7.7,2015-09-06 10:05:38.786000 +0000,"{""key"":""value7""}"
5
+ 55eae883689a08361045d64f,obj6,6,6.6,2015-09-06 10:05:48.786000 +0000,"{""key"":""value6""}"
6
+ 55eae883689a08361045d64e,obj5,5,5.5,2015-09-06 10:05:58.786000 +0000,"{""key"":""value5""}"
7
+ 55eae883689a08361045d64d,obj4,4,4.4,2015-09-06 10:06:08.786000 +0000,"{""key"":{""inner_key"":""value4""}}"
8
+ 55eae883689a08361045d64c,obj3,3,3.3,2015-09-06 10:06:18.786000 +0000,"{""key"":[""v3-1"",""v3-2""]}"
@@ -1,9 +1,9 @@
1
- { "_id": "55eae883689a08361045d64a", "name": "obj1", "rank": 1, "value": 1.1, "created_at": { "$date" : 1441533998786 } }
2
- { "_id": "55eae883689a08361045d64b", "name": "obj2", "rank": 2, "value": 2.2, "created_at": { "$date" : 1441533988786 } }
3
- { "_id": "55eae883689a08361045d64c", "name": "obj3", "rank": 3, "value": 3.3, "created_at": { "$date" : 1441533978786 } }
4
- { "_id": "55eae883689a08361045d64d", "name": "obj4", "rank": 4, "value": 4.4, "created_at": { "$date" : 1441533968786 } }
5
- { "_id": "55eae883689a08361045d64e", "name": "obj5", "rank": 5, "value": 5.5, "created_at": { "$date" : 1441533958786 } }
6
- { "_id": "55eae883689a08361045d64f", "name": "obj6", "rank": 6, "value": 6.6, "created_at": { "$date" : 1441533948786 } }
7
- { "_id": "55eae883689a08361045d650", "name": "obj7", "rank": 7, "value": 7.7, "created_at": { "$date" : 1441533938786 } }
8
- { "_id": "55eae883689a08361045d651", "name": "obj8", "rank": 8, "value": 8.8, "created_at": { "$date" : 1441533928786 } }
9
- { "_id": "55eae883689a08361045d652", "name": "obj9", "rank": 9, "value": 9.9, "created_at": { "$date" : 1441533918786 } }
1
+ { "_id": "55eae883689a08361045d64a", "name": "obj1", "rank": 1, "value": 1.1, "created_at": { "$date" : 1441533998786 }, "embeded": { "key": "value1" } }
2
+ { "_id": "55eae883689a08361045d64b", "name": "obj2", "rank": 2, "value": 2.2, "created_at": { "$date" : 1441533988786 }, "embeded": { "key": "value2" } }
3
+ { "_id": "55eae883689a08361045d64c", "name": "obj3", "rank": 3, "value": 3.3, "created_at": { "$date" : 1441533978786 }, "embeded": { "key": ["v3-1", "v3-2"]} }
4
+ { "_id": "55eae883689a08361045d64d", "name": "obj4", "rank": 4, "value": 4.4, "created_at": { "$date" : 1441533968786 }, "embeded": { "key": { "inner_key": "value4" } } }
5
+ { "_id": "55eae883689a08361045d64e", "name": "obj5", "rank": 5, "value": 5.5, "created_at": { "$date" : 1441533958786 }, "embeded": { "key": "value5" } }
6
+ { "_id": "55eae883689a08361045d64f", "name": "obj6", "rank": 6, "value": 6.6, "created_at": { "$date" : 1441533948786 }, "embeded": { "key": "value6" } }
7
+ { "_id": "55eae883689a08361045d650", "name": "obj7", "rank": 7, "value": 7.7, "created_at": { "$date" : 1441533938786 }, "embeded": { "key": "value7" } }
8
+ { "_id": "55eae883689a08361045d651", "name": "obj8", "rank": 8, "value": 8.8, "created_at": { "$date" : 1441533928786 }, "embeded": { "key": "value8" } }
9
+ { "_id": "55eae883689a08361045d652", "name": "obj9", "rank": 9, "value": 9.9, "created_at": { "$date" : 1441533918786 }, "embeded": { "key": "value9" } }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: embulk-input-mongodb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kazuyuki Honda
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-10 00:00:00.000000000 Z
11
+ date: 2016-05-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -62,8 +62,8 @@ files:
62
62
  - src/test/resources/full.yml
63
63
  - src/test/resources/full_expected.csv
64
64
  - src/test/resources/my_collection.jsonl
65
- - classpath/embulk-input-mongodb-0.1.2.jar
66
- - classpath/mongo-java-driver-3.0.3.jar
65
+ - classpath/embulk-input-mongodb-0.2.0.jar
66
+ - classpath/mongo-java-driver-3.2.2.jar
67
67
  homepage: https://github.com/hakobera/embulk-input-mongodb
68
68
  licenses:
69
69
  - MIT
Binary file