embulk-input-mongodb 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 42557bf36917051e3ff5bedebcfce52239f4aa79
4
+ data.tar.gz: 7bf1fe7b6df8888f60f69ebad630242136aa2de2
5
+ SHA512:
6
+ metadata.gz: 4f299f4aad5fb16c592bb6c75fcb850629f086ddfb38f3b1f30f257b508655fce08bc2e49a5ab7f47054b004c9f656b2f597a410f1b0993f698b5c774bb20d32
7
+ data.tar.gz: 0a44d691bb62828f13fb0cf1e669d46d7f2125c9128a02c0ef93f06bd406a9615bb7d8c16861d6258b1de42110c93128dc30b7824863a5c48753ebddb180c27c
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ *.gemspec
5
+ .gradle/
6
+ /classpath/
7
+ build/
8
+ .idea
9
+ *.iml
data/.travis.yml ADDED
@@ -0,0 +1,31 @@
1
+ language: java
2
+
3
+ jdk:
4
+ - oraclejdk8
5
+ - oraclejdk7
6
+ - openjdk7
7
+
8
+ sudo: required
9
+
10
+ install:
11
+ - sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 7F0CEB10
12
+ - echo "deb http://repo.mongodb.org/apt/ubuntu "$(lsb_release -sc)"/mongodb-org/3.0 multiverse" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.0.list
13
+ - sudo apt-get update
14
+ - sudo apt-get install -y mongodb-org=3.0.6
15
+ - mongod -version
16
+ - curl --create-dirs -o ~/.embulk/bin/embulk -L "http://dl.embulk.org/embulk-latest.jar"
17
+ - chmod +x ~/.embulk/bin/embulk
18
+ - export PATH="$HOME/.embulk/bin:$PATH"
19
+ - embulk --version
20
+
21
+ before_script:
22
+ - echo "Wait mongodb wakeup"
23
+ - sleep 10
24
+ - mongoimport --db my_database --collection my_collection --type json --drop src/test/resources/my_collection.jsonl
25
+ - ./gradlew package
26
+ - mkdir -p ./tmp
27
+
28
+ script:
29
+ - embulk run -L . src/test/resources/basic.yml
30
+ - embulk run -L . src/test/resources/full.yml
31
+ - cmp tmp/full000.00.csv src/test/resources/full_expected.csv || exit 1
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,74 @@
1
+ # MongoDB input plugin for Embulk
2
+
3
+ [![Build Status](https://travis-ci.org/hakobera/embulk-input-mongodb.svg)](https://travis-ci.org/hakobera/embulk-input-mongodb)
4
+
5
+ MongoDB input plugin for Embulk loads records from MongoDB.
6
+
7
+ **CAUTION:** this plugin does not support array and object fields,
8
+ because embulk does not supported these types yet.
9
+ But these types will be supported, so when it supported I add support these types.
10
+ For more detail see following issues.
11
+
12
+ - https://github.com/embulk/embulk/issues/120
13
+ - https://github.com/embulk/embulk/issues/121
14
+
15
+ ## Overview
16
+
17
+ This plugin only works with embulk >= 0.7.4.
18
+
19
+ * **Plugin type**: input
20
+ * **Resume supported**: no
21
+ * **Cleanup supported**: no
22
+ * **Guess supported**: no
23
+
24
+ ## Configuration
25
+
26
+ - **uri**: [MongoDB connection string URI](http://docs.mongodb.org/manual/reference/connection-string/) (e.g. 'mongodb://localhost:27017/mydb') (string, required)
27
+ - **collection**: source collection name (string, required)
28
+ - **fields**: hash records that has the following two fields (array, required)
29
+ - name: Name of the column
30
+ - type: Column types as follows
31
+ - boolean
32
+ - long
33
+ - double
34
+ - string
35
+ - timestamp
36
+ - **query**: provides a JSON document as a query that optionally limits the documents returned (string, optional)
37
+ - **sort**: specifies an ordering for exported results (string, optional)
38
+
39
+ ## Example
40
+
41
+ ### Export all objects
42
+
43
+ ```yaml
44
+ in:
45
+ type: mongodb
46
+ uri: mongodb://myuser@mypassword:localhost:27017/my_database
47
+ collection: "my_collection"
48
+ fields:
49
+ - { name: id, type: string }
50
+ - { name: field1, type: long }
51
+ - { name: field2, type: timestamp }
52
+ ```
53
+
54
+ ### Filter object by query and sort
55
+
56
+
57
+ ```yaml
58
+ in:
59
+ type: mongodb
60
+ uri: mongodb://myuser@mypassword:localhost:27017/my_database
61
+ collection: "my_collection"
62
+ fields:
63
+ - { name: id, type: string }
64
+ - { name: field1, type: long }
65
+ - { name: field2, type: timestamp }
66
+ query: '{ field1: { $gte: 3 } }'
67
+ sort: '{ field1: 1 }'
68
+ ```
69
+
70
+ ## Build
71
+
72
+ ```
73
+ $ ./gradlew gem
74
+ ```
data/build.gradle ADDED
@@ -0,0 +1,76 @@
1
+ plugins {
2
+ id "com.jfrog.bintray" version "1.1"
3
+ id "com.github.jruby-gradle.base" version "0.1.5"
4
+ id "java"
5
+ }
6
+ import com.github.jrubygradle.JRubyExec
7
+ repositories {
8
+ mavenCentral()
9
+ jcenter()
10
+ maven {
11
+ url "https://oss.sonatype.org/content/repositories/releases/"
12
+ }
13
+ }
14
+ configurations {
15
+ provided
16
+ }
17
+
18
+ version = "0.1.0"
19
+
20
+ dependencies {
21
+ compile "org.embulk:embulk-core:0.7.4"
22
+ provided "org.embulk:embulk-core:0.7.4"
23
+ compile "org.mongodb:mongo-java-driver:3.0.3"
24
+
25
+ testCompile "junit:junit:4.+"
26
+ }
27
+
28
+ task classpath(type: Copy, dependsOn: ["jar"]) {
29
+ doFirst { file("classpath").deleteDir() }
30
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
31
+ into "classpath"
32
+ }
33
+ clean { delete "classpath" }
34
+
35
+ task gem(type: JRubyExec, dependsOn: ["gemspec", "classpath"]) {
36
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
37
+ script "${project.name}.gemspec"
38
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
39
+ }
40
+
41
+ task gemPush(type: JRubyExec, dependsOn: ["gem"]) {
42
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "push"
43
+ script "pkg/${project.name}-${project.version}.gem"
44
+ }
45
+
46
+ task "package"(dependsOn: ["gemspec", "classpath"]) << {
47
+ println "> Build succeeded."
48
+ println "> You can run embulk with '-L ${file(".").absolutePath}' argument."
49
+ }
50
+
51
+ task gemspec {
52
+ ext.gemspecFile = file("${project.name}.gemspec")
53
+ inputs.file "build.gradle"
54
+ outputs.file gemspecFile
55
+ doLast { gemspecFile.write($/
56
+ Gem::Specification.new do |spec|
57
+ spec.name = "${project.name}"
58
+ spec.version = "${project.version}"
59
+ spec.authors = ["Kazuyuki Honda"]
60
+ spec.summary = %[Mongodb input plugin for Embulk]
61
+ spec.description = %[Loads records from Mongodb.]
62
+ spec.email = ["hakobera@gmail.com"]
63
+ spec.licenses = ["MIT"]
64
+ # TODO set this: spec.homepage = "https://github.com/hakobera/embulk-input-mongodb"
65
+
66
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
67
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
68
+ spec.require_paths = ["lib"]
69
+
70
+ spec.add_development_dependency 'bundler', ['~> 1.0']
71
+ spec.add_development_dependency 'rake', ['>= 10.0']
72
+ end
73
+ /$)
74
+ }
75
+ }
76
+ clean { delete "${project.name}.gemspec" }
Binary file
@@ -0,0 +1,6 @@
1
+ #Tue Aug 04 15:25:00 JST 2015
2
+ distributionBase=GRADLE_USER_HOME
3
+ distributionPath=wrapper/dists
4
+ zipStoreBase=GRADLE_USER_HOME
5
+ zipStorePath=wrapper/dists
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.2.1-all.zip
data/gradlew ADDED
@@ -0,0 +1,164 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
+ DEFAULT_JVM_OPTS=""
11
+
12
+ APP_NAME="Gradle"
13
+ APP_BASE_NAME=`basename "$0"`
14
+
15
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
16
+ MAX_FD="maximum"
17
+
18
+ warn ( ) {
19
+ echo "$*"
20
+ }
21
+
22
+ die ( ) {
23
+ echo
24
+ echo "$*"
25
+ echo
26
+ exit 1
27
+ }
28
+
29
+ # OS specific support (must be 'true' or 'false').
30
+ cygwin=false
31
+ msys=false
32
+ darwin=false
33
+ case "`uname`" in
34
+ CYGWIN* )
35
+ cygwin=true
36
+ ;;
37
+ Darwin* )
38
+ darwin=true
39
+ ;;
40
+ MINGW* )
41
+ msys=true
42
+ ;;
43
+ esac
44
+
45
+ # For Cygwin, ensure paths are in UNIX format before anything is touched.
46
+ if $cygwin ; then
47
+ [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
48
+ fi
49
+
50
+ # Attempt to set APP_HOME
51
+ # Resolve links: $0 may be a link
52
+ PRG="$0"
53
+ # Need this for relative symlinks.
54
+ while [ -h "$PRG" ] ; do
55
+ ls=`ls -ld "$PRG"`
56
+ link=`expr "$ls" : '.*-> \(.*\)$'`
57
+ if expr "$link" : '/.*' > /dev/null; then
58
+ PRG="$link"
59
+ else
60
+ PRG=`dirname "$PRG"`"/$link"
61
+ fi
62
+ done
63
+ SAVED="`pwd`"
64
+ cd "`dirname \"$PRG\"`/" >&-
65
+ APP_HOME="`pwd -P`"
66
+ cd "$SAVED" >&-
67
+
68
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
69
+
70
+ # Determine the Java command to use to start the JVM.
71
+ if [ -n "$JAVA_HOME" ] ; then
72
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
73
+ # IBM's JDK on AIX uses strange locations for the executables
74
+ JAVACMD="$JAVA_HOME/jre/sh/java"
75
+ else
76
+ JAVACMD="$JAVA_HOME/bin/java"
77
+ fi
78
+ if [ ! -x "$JAVACMD" ] ; then
79
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
80
+
81
+ Please set the JAVA_HOME variable in your environment to match the
82
+ location of your Java installation."
83
+ fi
84
+ else
85
+ JAVACMD="java"
86
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
87
+
88
+ Please set the JAVA_HOME variable in your environment to match the
89
+ location of your Java installation."
90
+ fi
91
+
92
+ # Increase the maximum file descriptors if we can.
93
+ if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
94
+ MAX_FD_LIMIT=`ulimit -H -n`
95
+ if [ $? -eq 0 ] ; then
96
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
97
+ MAX_FD="$MAX_FD_LIMIT"
98
+ fi
99
+ ulimit -n $MAX_FD
100
+ if [ $? -ne 0 ] ; then
101
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
102
+ fi
103
+ else
104
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
105
+ fi
106
+ fi
107
+
108
+ # For Darwin, add options to specify how the application appears in the dock
109
+ if $darwin; then
110
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
111
+ fi
112
+
113
+ # For Cygwin, switch paths to Windows format before running java
114
+ if $cygwin ; then
115
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
116
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
117
+
118
+ # We build the pattern for arguments to be converted via cygpath
119
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120
+ SEP=""
121
+ for dir in $ROOTDIRSRAW ; do
122
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
123
+ SEP="|"
124
+ done
125
+ OURCYGPATTERN="(^($ROOTDIRS))"
126
+ # Add a user-defined pattern to the cygpath arguments
127
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129
+ fi
130
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
131
+ i=0
132
+ for arg in "$@" ; do
133
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135
+
136
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138
+ else
139
+ eval `echo args$i`="\"$arg\""
140
+ fi
141
+ i=$((i+1))
142
+ done
143
+ case $i in
144
+ (0) set -- ;;
145
+ (1) set -- "$args0" ;;
146
+ (2) set -- "$args0" "$args1" ;;
147
+ (3) set -- "$args0" "$args1" "$args2" ;;
148
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154
+ esac
155
+ fi
156
+
157
+ # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
158
+ function splitJvmOpts() {
159
+ JVM_OPTS=("$@")
160
+ }
161
+ eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
162
+ JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
163
+
164
+ exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
data/gradlew.bat ADDED
@@ -0,0 +1,90 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
+ set DEFAULT_JVM_OPTS=
13
+
14
+ set DIRNAME=%~dp0
15
+ if "%DIRNAME%" == "" set DIRNAME=.
16
+ set APP_BASE_NAME=%~n0
17
+ set APP_HOME=%DIRNAME%
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windowz variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+ if "%@eval[2+2]" == "4" goto 4NT_args
53
+
54
+ :win9xME_args
55
+ @rem Slurp the command line arguments.
56
+ set CMD_LINE_ARGS=
57
+ set _SKIP=2
58
+
59
+ :win9xME_args_slurp
60
+ if "x%~1" == "x" goto execute
61
+
62
+ set CMD_LINE_ARGS=%*
63
+ goto execute
64
+
65
+ :4NT_args
66
+ @rem Get arguments from the 4NT Shell from JP Software
67
+ set CMD_LINE_ARGS=%$
68
+
69
+ :execute
70
+ @rem Setup the command line
71
+
72
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if "%ERRORLEVEL%"=="0" goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85
+ exit /b 1
86
+
87
+ :mainEnd
88
+ if "%OS%"=="Windows_NT" endlocal
89
+
90
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_input(
2
+ "mongodb", "org.embulk.input.mongodb.MongodbInputPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,211 @@
1
+ package org.embulk.input.mongodb;
2
+
3
+ import com.mongodb.MongoClient;
4
+ import com.mongodb.MongoClientURI;
5
+ import com.mongodb.client.MongoCollection;
6
+ import com.mongodb.client.MongoCursor;
7
+ import com.mongodb.client.MongoDatabase;
8
+ import com.mongodb.util.JSON;
9
+ import org.bson.Document;
10
+ import org.bson.conversions.Bson;
11
+ import org.embulk.config.Config;
12
+ import org.embulk.config.ConfigDefault;
13
+ import org.embulk.config.ConfigDiff;
14
+ import org.embulk.config.ConfigInject;
15
+ import org.embulk.config.ConfigSource;
16
+ import org.embulk.config.Task;
17
+ import org.embulk.config.TaskReport;
18
+ import org.embulk.config.TaskSource;
19
+ import org.embulk.spi.BufferAllocator;
20
+ import org.embulk.spi.Column;
21
+ import org.embulk.spi.ColumnConfig;
22
+ import org.embulk.spi.Exec;
23
+ import org.embulk.spi.InputPlugin;
24
+ import org.embulk.spi.PageBuilder;
25
+ import org.embulk.spi.PageOutput;
26
+ import org.embulk.spi.Schema;
27
+ import org.embulk.spi.SchemaConfig;
28
+ import org.embulk.spi.time.Timestamp;
29
+ import org.embulk.spi.type.Type;
30
+ import org.slf4j.Logger;
31
+
32
+ import java.util.List;
33
+
34
+ public class MongodbInputPlugin
35
+ implements InputPlugin
36
+ {
37
+ public interface PluginTask
38
+ extends Task
39
+ {
40
+ // MongoDB connection string URI
41
+ @Config("uri")
42
+ String getUri();
43
+
44
+ @Config("collection")
45
+ String getCollection();
46
+
47
+ @Config("fields")
48
+ SchemaConfig getFields();
49
+
50
+ @Config("query")
51
+ @ConfigDefault("\"{}\"")
52
+ String getQuery();
53
+
54
+ @Config("sort")
55
+ @ConfigDefault("\"{}\"")
56
+ String getSort();
57
+
58
+ @Config("batch_size")
59
+ @ConfigDefault("10000")
60
+ Integer getBatchSize();
61
+
62
+ @ConfigInject
63
+ public BufferAllocator getBufferAllocator();
64
+ }
65
+
66
+ private final Logger log = Exec.getLogger(MongodbInputPlugin.class);
67
+
68
+ @Override
69
+ public ConfigDiff transaction(ConfigSource config,
70
+ InputPlugin.Control control)
71
+ {
72
+ PluginTask task = config.loadConfig(PluginTask.class);
73
+ Schema schema = task.getFields().toSchema();
74
+ return resume(task.dump(), schema, 1, control);
75
+ }
76
+
77
+ @Override
78
+ public ConfigDiff resume(TaskSource taskSource,
79
+ Schema schema, int taskCount,
80
+ InputPlugin.Control control)
81
+ {
82
+ control.run(taskSource, schema, taskCount);
83
+ return Exec.newConfigDiff();
84
+ }
85
+
86
+ @Override
87
+ public void cleanup(TaskSource taskSource,
88
+ Schema schema, int taskCount,
89
+ List<TaskReport> successCommitReports)
90
+ {
91
+ // do nothing
92
+ }
93
+
94
+ @Override
95
+ public TaskReport run(TaskSource taskSource,
96
+ Schema schema, int taskIndex,
97
+ PageOutput output)
98
+ {
99
+ PluginTask task = taskSource.loadTask(PluginTask.class);
100
+ BufferAllocator allocator = task.getBufferAllocator();
101
+ PageBuilder pageBuilder = new PageBuilder(allocator, schema, output);
102
+
103
+ MongoDatabase db = connect(task);
104
+ MongoCollection<Document> collection = db.getCollection(task.getCollection());
105
+
106
+ Bson query = (Bson) JSON.parse(task.getQuery());
107
+ Bson projection = getProjection(task);
108
+ Bson sort = (Bson) JSON.parse(task.getSort());
109
+
110
+ log.trace("query: {}", query);
111
+ log.trace("projection: {}", projection);
112
+ log.trace("sort: {}", sort);
113
+
114
+ MongoCursor<Document> cursor = collection
115
+ .find(query)
116
+ .projection(projection)
117
+ .sort(sort)
118
+ .batchSize(task.getBatchSize())
119
+ .iterator();
120
+
121
+ try {
122
+ while (cursor.hasNext()) {
123
+ fetch(cursor, pageBuilder);
124
+ }
125
+ } finally {
126
+ cursor.close();
127
+ }
128
+
129
+ pageBuilder.finish();
130
+
131
+ TaskReport report = Exec.newTaskReport();
132
+ return report;
133
+ }
134
+
135
+ @Override
136
+ public ConfigDiff guess(ConfigSource config)
137
+ {
138
+ return Exec.newConfigDiff();
139
+ }
140
+
141
+ private MongoDatabase connect(PluginTask task) {
142
+ MongoClientURI uri = new MongoClientURI(task.getUri());
143
+ MongoClient mongoClient = new MongoClient(uri);
144
+ return mongoClient.getDatabase(uri.getDatabase());
145
+ }
146
+
147
+ private void fetch(MongoCursor<Document> cursor, PageBuilder pageBuilder) {
148
+ Document doc = cursor.next();
149
+ List<Column> columns = pageBuilder.getSchema().getColumns();
150
+ for (Column c : columns) {
151
+ Type t = c.getType();
152
+ String key = normalize(c.getName());
153
+
154
+ if (!doc.containsKey(key) || doc.get(key) == null) {
155
+ pageBuilder.setNull(c);
156
+ } else {
157
+ switch (t.getName()) {
158
+ case "boolean":
159
+ pageBuilder.setBoolean(c, doc.getBoolean(key));
160
+ break;
161
+
162
+ case "long":
163
+ // MongoDB can contain both 'int' and 'long', but embulk only support 'long'
164
+ // So enable handling both 'int' and 'long', first get value as java.lang.Number, then convert it to long
165
+ pageBuilder.setLong(c, ((Number) doc.get(key)).longValue());
166
+ break;
167
+
168
+ case "double":
169
+ pageBuilder.setDouble(c, ((Number) doc.get(key)).doubleValue());
170
+ break;
171
+
172
+ case "string":
173
+ // Enable output object like ObjectId as string, this is reason I don't use doc.getString(key).
174
+ pageBuilder.setString(c, doc.get(key).toString());
175
+ break;
176
+
177
+ case "timestamp":
178
+ pageBuilder.setTimestamp(c, Timestamp.ofEpochMilli(doc.getDate(key).getTime()));
179
+ }
180
+ }
181
+ }
182
+ pageBuilder.addRecord();
183
+ }
184
+
185
+ private Bson getProjection(PluginTask task) {
186
+ SchemaConfig fields = task.getFields();
187
+ StringBuilder sb = new StringBuilder("{");
188
+ int l = fields.getColumnCount();
189
+
190
+ for (int i = 0; i < l; i++) {
191
+ ColumnConfig c = fields.getColumn(i);
192
+ if (i != 0) {
193
+ sb.append(",");
194
+ }
195
+ String key = normalize(c.getName());
196
+ sb.append(key).append(":1");
197
+ }
198
+ sb.append("}");
199
+
200
+ return (Bson) JSON.parse(sb.toString());
201
+ }
202
+
203
+ private String normalize(String key) {
204
+ // 'id' is special alias key name of MongoDB ObjectId
205
+ // http://docs.mongodb.org/manual/reference/object-id/
206
+ if (key.equals("id")) {
207
+ return "_id";
208
+ }
209
+ return key;
210
+ }
211
+ }
@@ -0,0 +1,5 @@
1
+ package org.embulk.input.mongodb;
2
+
3
+ public class TestMongodbInputPlugin
4
+ {
5
+ }
@@ -0,0 +1,16 @@
1
+ in:
2
+ type: mongodb
3
+ uri: mongodb://localhost:27017/my_database
4
+ collection: "my_collection"
5
+ fields:
6
+ - { name: name, type: string }
7
+ - { name: rank, type: long }
8
+ out:
9
+ type: file
10
+ path_prefix: ./tmp/basic
11
+ file_ext: csv
12
+ formatter:
13
+ type: csv
14
+ header_line: true
15
+ charset: UTF-8
16
+ newline: CRLF
@@ -0,0 +1,10 @@
1
+ name,rank
2
+ obj1,1
3
+ obj2,2
4
+ obj4,4
5
+ obj3,3
6
+ obj5,5
7
+ obj6,6
8
+ obj7,7
9
+ obj9,9
10
+ obj8,8
@@ -0,0 +1,22 @@
1
+ in:
2
+ type: mongodb
3
+ uri: mongodb://localhost:27017/my_database
4
+ collection: "my_collection"
5
+ fields:
6
+ - { name: id, type: string }
7
+ - { name: name, type: string }
8
+ - { name: rank, type: long }
9
+ - { name: value, type: double }
10
+ - { name: created_at, type: timestamp }
11
+ query: '{ rank: { $gte: 3 } }'
12
+ sort: '{ rank: -1 }'
13
+ batch_size: 100
14
+ out:
15
+ type: file
16
+ path_prefix: ./tmp/full
17
+ file_ext: csv
18
+ formatter:
19
+ type: csv
20
+ header_line: true
21
+ charset: UTF-8
22
+ newline: CRLF
@@ -0,0 +1,8 @@
1
+ id,name,rank,value,created_at
2
+ 55eae883689a08361045d652,obj9,9,9.9,2015-09-06 10:05:18.786000 +0000
3
+ 55eae883689a08361045d651,obj8,8,8.8,2015-09-06 10:05:28.786000 +0000
4
+ 55eae883689a08361045d650,obj7,7,7.7,2015-09-06 10:05:38.786000 +0000
5
+ 55eae883689a08361045d64f,obj6,6,6.6,2015-09-06 10:05:48.786000 +0000
6
+ 55eae883689a08361045d64e,obj5,5,5.5,2015-09-06 10:05:58.786000 +0000
7
+ 55eae883689a08361045d64d,obj4,4,4.4,2015-09-06 10:06:08.786000 +0000
8
+ 55eae883689a08361045d64c,obj3,3,3.3,2015-09-06 10:06:18.786000 +0000
@@ -0,0 +1,9 @@
1
+ { "_id": "55eae883689a08361045d64a", "name": "obj1", "rank": 1, "value": 1.1, "created_at": { "$date" : 1441533998786 } }
2
+ { "_id": "55eae883689a08361045d64b", "name": "obj2", "rank": 2, "value": 2.2, "created_at": { "$date" : 1441533988786 } }
3
+ { "_id": "55eae883689a08361045d64c", "name": "obj3", "rank": 3, "value": 3.3, "created_at": { "$date" : 1441533978786 } }
4
+ { "_id": "55eae883689a08361045d64d", "name": "obj4", "rank": 4, "value": 4.4, "created_at": { "$date" : 1441533968786 } }
5
+ { "_id": "55eae883689a08361045d64e", "name": "obj5", "rank": 5, "value": 5.5, "created_at": { "$date" : 1441533958786 } }
6
+ { "_id": "55eae883689a08361045d64f", "name": "obj6", "rank": 6, "value": 6.6, "created_at": { "$date" : 1441533948786 } }
7
+ { "_id": "55eae883689a08361045d650", "name": "obj7", "rank": 7, "value": 7.7, "created_at": { "$date" : 1441533938786 } }
8
+ { "_id": "55eae883689a08361045d651", "name": "obj8", "rank": 8, "value": 8.8, "created_at": { "$date" : 1441533928786 } }
9
+ { "_id": "55eae883689a08361045d652", "name": "obj9", "rank": 9, "value": 9.9, "created_at": { "$date" : 1441533918786 } }
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-input-mongodb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Kazuyuki Honda
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ~>
17
+ - !ruby/object:Gem::Version
18
+ version: '1.0'
19
+ name: bundler
20
+ prerelease: false
21
+ type: :development
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '10.0'
33
+ name: rake
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ description: Loads records from Mongodb.
42
+ email:
43
+ - hakobera@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - .gitignore
49
+ - .travis.yml
50
+ - LICENSE.txt
51
+ - README.md
52
+ - build.gradle
53
+ - gradle/wrapper/gradle-wrapper.jar
54
+ - gradle/wrapper/gradle-wrapper.properties
55
+ - gradlew
56
+ - gradlew.bat
57
+ - lib/embulk/input/mongodb.rb
58
+ - src/main/java/org/embulk/input/mongodb/MongodbInputPlugin.java
59
+ - src/test/java/org/embulk/input/mongodb/TestMongodbInputPlugin.java
60
+ - src/test/resources/basic.yml
61
+ - src/test/resources/basic_expected.csv
62
+ - src/test/resources/full.yml
63
+ - src/test/resources/full_expected.csv
64
+ - src/test/resources/my_collection.jsonl
65
+ - classpath/embulk-input-mongodb-0.1.0.jar
66
+ - classpath/mongo-java-driver-3.0.3.jar
67
+ homepage:
68
+ licenses:
69
+ - MIT
70
+ metadata: {}
71
+ post_install_message:
72
+ rdoc_options: []
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - '>='
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements: []
86
+ rubyforge_project:
87
+ rubygems_version: 2.1.9
88
+ signing_key:
89
+ specification_version: 4
90
+ summary: Mongodb input plugin for Embulk
91
+ test_files: []