embulk-output-elasticsearch 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 81f91426171815e966260af0bc33010a33d6751a
4
+ data.tar.gz: 714ef2ccf5d6482e71f456bf882248aace621d1e
5
+ SHA512:
6
+ metadata.gz: ce73b2bc564f308d70f5f2b33e4c7d52760c2a1afc07edd7f4be8b49b705a22ee2f1e8ed649961ef26ca5f7a57de1ff9ac925bee3f9564296da6b9728d44b5c3
7
+ data.tar.gz: f763815c3f63f6115a03c62c4e13c36e8ad7420118f80e9640704527cc986cebbf32311fea9e350150a2c28eed5403d8dff76324a16e273d532d43a878d35cc9
data/.gitignore ADDED
@@ -0,0 +1,7 @@
1
+ *~
2
+ *.iml
3
+ .idea
4
+ build/
5
+ /classpath/
6
+ /.gradle
7
+ /pkg/
data/README.md ADDED
@@ -0,0 +1,30 @@
1
+ # Elasticsearch output plugin for Embulk
2
+
3
+ ## Overview
4
+
5
+ * **Plugin type**: output
6
+ * **Rollback supported**: no
7
+ * **Resume supported**: no
8
+ * **Cleanup supported**: no
9
+
10
+ ## Configuration
11
+
12
+ - **cluster**: cluster name (string, default: 'elasticsearch')
13
+ - **index_name**: index name (string, required)
14
+ - **index_type**: index type (string, required)
15
+
16
+ ## Example
17
+
18
+ ```yaml
19
+ out:
20
+ type: elasticsearch
21
+ cluster: elasticsearch
22
+ index_name: embulk
23
+ index_type: embulk
24
+ ```
25
+
26
+ ## Build
27
+
28
+ ```
29
+ $ ./gradlew gem
30
+ ```
data/build.gradle ADDED
@@ -0,0 +1,66 @@
1
+ plugins {
2
+ id "com.jfrog.bintray" version "1.1"
3
+ id "com.github.jruby-gradle.base" version "0.1.5"
4
+ id "java"
5
+ }
6
+ import com.github.jrubygradle.JRubyExec
7
+ repositories {
8
+ mavenCentral()
9
+ jcenter()
10
+ mavenLocal()
11
+ }
12
+ configurations {
13
+ provided
14
+ }
15
+
16
+ version = "0.1.0"
17
+
18
+ dependencies {
19
+ compile "org.embulk:embulk-core:0.4.0"
20
+ provided "org.embulk:embulk-core:0.4.0"
21
+ compile 'org.elasticsearch:elasticsearch:1.4.2'
22
+ testCompile "junit:junit:4.+"
23
+ testCompile "org.mockito:mockito-core:1.+"
24
+ }
25
+
26
+ task classpath(type: Copy, dependsOn: ["jar"]) {
27
+ doFirst { file("classpath").deleteDir() }
28
+ from (configurations.runtime - configurations.provided + files(jar.archivePath))
29
+ into "classpath"
30
+ }
31
+ clean { delete 'classpath' }
32
+
33
+ //task copyDependencies(type:Copy) {
34
+ // new File("$buildDir/libs/dependencies").mkdirs()
35
+ // into "$buildDir/libs/dependencies" from configurations.runtime
36
+ //}
37
+
38
+ task gem(type: JRubyExec, dependsOn: ["build", "gemspec", "classpath"]) {
39
+ jrubyArgs "-rrubygems/gem_runner", "-eGem::GemRunner.new.run(ARGV)", "build"
40
+ script "build/gemspec"
41
+ doLast { ant.move(file: "${project.name}-${project.version}.gem", todir: "pkg") }
42
+ }
43
+
44
+ task gemspec << { file("build/gemspec").write($/
45
+ Gem::Specification.new do |spec|
46
+ spec.name = "${project.name}"
47
+ spec.version = "${project.version}"
48
+ spec.authors = ["Muga Nishizawa"]
49
+ spec.summary = %[Elasticsearch output plugin for Embulk]
50
+ spec.description = %[Elasticsearch output plugin is an Embulk plugin that loads records to Elasticsearch read by any input plugins. Search the input plugins by "embulk-input" keyword.]
51
+ spec.email = ["muga.nishizawa@gmail.com"]
52
+ spec.licenses = ["Apache 2.0"]
53
+ spec.homepage = "https://github.com/muga/embulk-output-elasticsearch"
54
+
55
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
56
+ spec.test_files = spec.files.grep(%r"^(test|spec)/")
57
+ spec.require_paths = ["lib"]
58
+ spec.executables = spec.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
59
+ spec.has_rdoc = false
60
+
61
+ spec.add_development_dependency "bundler", [">= 1.0"]
62
+ spec.add_development_dependency "rake", [">= 10.0"]
63
+ spec.add_development_dependency "test-unit", ["~> 3.0.2"]
64
+ end
65
+ /$)
66
+ }
Binary file
@@ -0,0 +1,6 @@
1
+ #Thu Feb 05 00:05:43 PST 2015
2
+ distributionBase=GRADLE_USER_HOME
3
+ distributionPath=wrapper/dists
4
+ zipStoreBase=GRADLE_USER_HOME
5
+ zipStorePath=wrapper/dists
6
+ distributionUrl=https\://services.gradle.org/distributions/gradle-2.1-bin.zip
data/gradlew ADDED
@@ -0,0 +1,164 @@
1
+ #!/usr/bin/env bash
2
+
3
+ ##############################################################################
4
+ ##
5
+ ## Gradle start up script for UN*X
6
+ ##
7
+ ##############################################################################
8
+
9
+ # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
10
+ DEFAULT_JVM_OPTS=""
11
+
12
+ APP_NAME="Gradle"
13
+ APP_BASE_NAME=`basename "$0"`
14
+
15
+ # Use the maximum available, or set MAX_FD != -1 to use that value.
16
+ MAX_FD="maximum"
17
+
18
+ warn ( ) {
19
+ echo "$*"
20
+ }
21
+
22
+ die ( ) {
23
+ echo
24
+ echo "$*"
25
+ echo
26
+ exit 1
27
+ }
28
+
29
+ # OS specific support (must be 'true' or 'false').
30
+ cygwin=false
31
+ msys=false
32
+ darwin=false
33
+ case "`uname`" in
34
+ CYGWIN* )
35
+ cygwin=true
36
+ ;;
37
+ Darwin* )
38
+ darwin=true
39
+ ;;
40
+ MINGW* )
41
+ msys=true
42
+ ;;
43
+ esac
44
+
45
+ # For Cygwin, ensure paths are in UNIX format before anything is touched.
46
+ if $cygwin ; then
47
+ [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
48
+ fi
49
+
50
+ # Attempt to set APP_HOME
51
+ # Resolve links: $0 may be a link
52
+ PRG="$0"
53
+ # Need this for relative symlinks.
54
+ while [ -h "$PRG" ] ; do
55
+ ls=`ls -ld "$PRG"`
56
+ link=`expr "$ls" : '.*-> \(.*\)$'`
57
+ if expr "$link" : '/.*' > /dev/null; then
58
+ PRG="$link"
59
+ else
60
+ PRG=`dirname "$PRG"`"/$link"
61
+ fi
62
+ done
63
+ SAVED="`pwd`"
64
+ cd "`dirname \"$PRG\"`/" >&-
65
+ APP_HOME="`pwd -P`"
66
+ cd "$SAVED" >&-
67
+
68
+ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
69
+
70
+ # Determine the Java command to use to start the JVM.
71
+ if [ -n "$JAVA_HOME" ] ; then
72
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
73
+ # IBM's JDK on AIX uses strange locations for the executables
74
+ JAVACMD="$JAVA_HOME/jre/sh/java"
75
+ else
76
+ JAVACMD="$JAVA_HOME/bin/java"
77
+ fi
78
+ if [ ! -x "$JAVACMD" ] ; then
79
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
80
+
81
+ Please set the JAVA_HOME variable in your environment to match the
82
+ location of your Java installation."
83
+ fi
84
+ else
85
+ JAVACMD="java"
86
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
87
+
88
+ Please set the JAVA_HOME variable in your environment to match the
89
+ location of your Java installation."
90
+ fi
91
+
92
+ # Increase the maximum file descriptors if we can.
93
+ if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then
94
+ MAX_FD_LIMIT=`ulimit -H -n`
95
+ if [ $? -eq 0 ] ; then
96
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
97
+ MAX_FD="$MAX_FD_LIMIT"
98
+ fi
99
+ ulimit -n $MAX_FD
100
+ if [ $? -ne 0 ] ; then
101
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
102
+ fi
103
+ else
104
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
105
+ fi
106
+ fi
107
+
108
+ # For Darwin, add options to specify how the application appears in the dock
109
+ if $darwin; then
110
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
111
+ fi
112
+
113
+ # For Cygwin, switch paths to Windows format before running java
114
+ if $cygwin ; then
115
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
116
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
117
+
118
+ # We build the pattern for arguments to be converted via cygpath
119
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120
+ SEP=""
121
+ for dir in $ROOTDIRSRAW ; do
122
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
123
+ SEP="|"
124
+ done
125
+ OURCYGPATTERN="(^($ROOTDIRS))"
126
+ # Add a user-defined pattern to the cygpath arguments
127
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129
+ fi
130
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
131
+ i=0
132
+ for arg in "$@" ; do
133
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
135
+
136
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
137
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138
+ else
139
+ eval `echo args$i`="\"$arg\""
140
+ fi
141
+ i=$((i+1))
142
+ done
143
+ case $i in
144
+ (0) set -- ;;
145
+ (1) set -- "$args0" ;;
146
+ (2) set -- "$args0" "$args1" ;;
147
+ (3) set -- "$args0" "$args1" "$args2" ;;
148
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154
+ esac
155
+ fi
156
+
157
+ # Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
158
+ function splitJvmOpts() {
159
+ JVM_OPTS=("$@")
160
+ }
161
+ eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
162
+ JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
163
+
164
+ exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
data/gradlew.bat ADDED
@@ -0,0 +1,90 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
12
+ set DEFAULT_JVM_OPTS=
13
+
14
+ set DIRNAME=%~dp0
15
+ if "%DIRNAME%" == "" set DIRNAME=.
16
+ set APP_BASE_NAME=%~n0
17
+ set APP_HOME=%DIRNAME%
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windowz variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+ if "%@eval[2+2]" == "4" goto 4NT_args
53
+
54
+ :win9xME_args
55
+ @rem Slurp the command line arguments.
56
+ set CMD_LINE_ARGS=
57
+ set _SKIP=2
58
+
59
+ :win9xME_args_slurp
60
+ if "x%~1" == "x" goto execute
61
+
62
+ set CMD_LINE_ARGS=%*
63
+ goto execute
64
+
65
+ :4NT_args
66
+ @rem Get arguments from the 4NT Shell from JP Software
67
+ set CMD_LINE_ARGS=%$
68
+
69
+ :execute
70
+ @rem Setup the command line
71
+
72
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
73
+
74
+ @rem Execute Gradle
75
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
76
+
77
+ :end
78
+ @rem End local scope for the variables with windows NT shell
79
+ if "%ERRORLEVEL%"=="0" goto mainEnd
80
+
81
+ :fail
82
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
+ rem the _cmd.exe /c_ return code!
84
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
85
+ exit /b 1
86
+
87
+ :mainEnd
88
+ if "%OS%"=="Windows_NT" endlocal
89
+
90
+ :omega
data/settings.gradle ADDED
@@ -0,0 +1 @@
1
+ rootProject.name = 'embulk-output-elasticsearch'
@@ -0,0 +1,351 @@
1
+ package org.embulk.output;
2
+
3
+ import com.google.common.base.Optional;
4
+ import com.google.common.base.Throwables;
5
+ import com.google.inject.Inject;
6
+ import org.elasticsearch.action.bulk.BulkItemResponse;
7
+ import org.elasticsearch.action.bulk.BulkProcessor;
8
+ import org.elasticsearch.action.bulk.BulkRequest;
9
+ import org.elasticsearch.action.bulk.BulkResponse;
10
+ import org.elasticsearch.action.index.IndexRequest;
11
+ import org.elasticsearch.client.Client;
12
+ import org.elasticsearch.client.Requests;
13
+ import org.elasticsearch.common.settings.ImmutableSettings;
14
+ import org.elasticsearch.common.settings.Settings;
15
+ import org.elasticsearch.common.xcontent.XContentBuilder;
16
+ import org.elasticsearch.common.xcontent.XContentFactory;
17
+ import org.elasticsearch.node.Node;
18
+ import org.elasticsearch.node.NodeBuilder;
19
+ import org.embulk.config.CommitReport;
20
+ import org.embulk.config.Config;
21
+ import org.embulk.config.ConfigDefault;
22
+ import org.embulk.config.ConfigDiff;
23
+ import org.embulk.config.ConfigSource;
24
+ import org.embulk.config.Task;
25
+ import org.embulk.config.TaskSource;
26
+ import org.embulk.spi.Column;
27
+ import org.embulk.spi.Exec;
28
+ import org.embulk.spi.OutputPlugin;
29
+ import org.embulk.spi.Page;
30
+ import org.embulk.spi.PageReader;
31
+ import org.embulk.spi.Schema;
32
+ import org.embulk.spi.ColumnVisitor;
33
+ import org.embulk.spi.TransactionalPageOutput;
34
+ import org.slf4j.Logger;
35
+
36
+ import java.io.IOException;
37
+ import java.util.List;
38
+ import java.util.concurrent.TimeUnit;
39
+
40
+ public class ElasticsearchOutputPlugin
41
+ implements OutputPlugin
42
+ {
43
+ public interface RunnerTask
44
+ extends Task
45
+ {
46
+ @Config("cluster")
47
+ @ConfigDefault("elasticsearch")
48
+ public String getClusterName();
49
+
50
+ @Config("index_name")
51
+ @ConfigDefault("embulk")
52
+ public String getIndex();
53
+
54
+ @Config("index_type")
55
+ @ConfigDefault("embulk")
56
+ public String getIndexType();
57
+
58
+ @Config("doc_id")
59
+ @ConfigDefault("null")
60
+ public Optional<String> getDocId();
61
+
62
+ @Config("bulk_actions")
63
+ @ConfigDefault("1000")
64
+ public int getBulkActions();
65
+
66
+ @Config("concurrent_requests")
67
+ @ConfigDefault("5")
68
+ public int getConcurrentRequests();
69
+
70
+ }
71
+
72
+ private final Logger log;
73
+
74
+ @Inject
75
+ public ElasticsearchOutputPlugin()
76
+ {
77
+ log = Exec.getLogger(getClass());
78
+ }
79
+
80
+ @Override
81
+ public ConfigDiff transaction(ConfigSource config, Schema schema,
82
+ int processorCount, Control control)
83
+ {
84
+ final RunnerTask task = config.loadConfig(RunnerTask.class);
85
+
86
+ try (Node node = createNode(task)) {
87
+ try (Client client = createClient(task, node)) {
88
+ }
89
+ }
90
+
91
+ try {
92
+ control.run(task.dump());
93
+ } catch (Exception e) {
94
+ throw Throwables.propagate(e);
95
+ }
96
+
97
+ ConfigDiff nextConfig = Exec.newConfigDiff();
98
+ return nextConfig;
99
+ }
100
+
101
+ @Override
102
+ public ConfigDiff resume(TaskSource taskSource,
103
+ Schema schema, int processorCount,
104
+ OutputPlugin.Control control)
105
+ {
106
+ // TODO
107
+ return Exec.newConfigDiff();
108
+ }
109
+
110
+ @Override
111
+ public void cleanup(TaskSource taskSource,
112
+ Schema schema, int processorCount,
113
+ List<CommitReport> successCommitReports)
114
+ { }
115
+
116
+ private Node createNode(final RunnerTask task)
117
+ {
118
+ // @see http://www.elasticsearch.org/guide/en/elasticsearch/client/java-api/current/client.html
119
+ Settings settings = ImmutableSettings.settingsBuilder()
120
+ .classLoader(Settings.class.getClassLoader())
121
+ .build();
122
+ return NodeBuilder.nodeBuilder()
123
+ .clusterName(task.getClusterName())
124
+ .settings(settings)
125
+ .node();
126
+ }
127
+
128
+ private Client createClient(final RunnerTask task, final Node node)
129
+ {
130
+ return node.client();
131
+ }
132
+
133
+ private BulkProcessor newBulkProcessor(final RunnerTask task, final Client client)
134
+ {
135
+ return BulkProcessor.builder(client, new BulkProcessor.Listener() {
136
+ @Override
137
+ public void beforeBulk(long executionId, BulkRequest request)
138
+ {
139
+ log.info("Execute {} bulk actions", request.numberOfActions());
140
+ }
141
+
142
+ @Override
143
+ public void afterBulk(long executionId, BulkRequest request, BulkResponse response)
144
+ {
145
+ if (response.hasFailures()) {
146
+ long items = 0;
147
+ if (log.isDebugEnabled()) {
148
+ for (BulkItemResponse item : response.getItems()) {
149
+ if (item.isFailed()) {
150
+ items += 1;
151
+ log.debug(" Error for {}/{}/{} for {} operation: {}",
152
+ item.getIndex(), item.getType(), item.getId(),
153
+ item.getOpType(), item.getFailureMessage());
154
+ }
155
+ }
156
+ }
157
+ log.warn("{} bulk actions failed: {}", items, response.buildFailureMessage());
158
+ } else {
159
+ log.info("{} bulk actions succeeded", request.numberOfActions());
160
+ }
161
+ }
162
+
163
+ @Override
164
+ public void afterBulk(long executionId, BulkRequest request, Throwable failure)
165
+ {
166
+ log.warn("Got the error during bulk processing", failure);
167
+ }
168
+ }).setBulkActions(task.getBulkActions())
169
+ .setConcurrentRequests(task.getConcurrentRequests())
170
+ .build();
171
+ }
172
+
173
+ @Override
174
+ public TransactionalPageOutput open(TaskSource taskSource, Schema schema,
175
+ int processorIndex)
176
+ {
177
+ final RunnerTask task = taskSource.loadTask(RunnerTask.class);
178
+
179
+ Node node = createNode(task);
180
+ Client client = createClient(task, node);
181
+ BulkProcessor bulkProcessor = newBulkProcessor(task, client);
182
+ ElasticsearchPageOutput pageOutput = new ElasticsearchPageOutput(task, node, client, bulkProcessor);
183
+ pageOutput.open(schema);
184
+ return pageOutput;
185
+ }
186
+
187
+ static class ElasticsearchPageOutput implements TransactionalPageOutput
188
+ {
189
+ private Logger log;
190
+
191
+ private Node node;
192
+ private Client client;
193
+ private BulkProcessor bulkProcessor;
194
+
195
+ private PageReader pageReader;
196
+
197
+ private final String index;
198
+ private final String indexType;
199
+ private final String docId;
200
+
201
+ ElasticsearchPageOutput(RunnerTask task, Node node, Client client,
202
+ BulkProcessor bulkProcessor)
203
+ {
204
+ this.log = Exec.getLogger(getClass());
205
+
206
+ this.node = node;
207
+ this.client = client;
208
+ this.bulkProcessor = bulkProcessor;
209
+
210
+ this.index = task.getIndex();
211
+ this.indexType = task.getIndexType();
212
+ this.docId = task.getDocId().orNull();
213
+ }
214
+
215
+ void open(final Schema schema)
216
+ {
217
+ pageReader = new PageReader(schema);
218
+ }
219
+
220
+ @Override
221
+ public void add(Page page)
222
+ {
223
+ pageReader.setPage(page);
224
+
225
+ while (pageReader.nextRecord()) {
226
+ try {
227
+ final XContentBuilder contextBuilder = XContentFactory.jsonBuilder().startObject(); // TODO reusable??
228
+ pageReader.getSchema().visitColumns(new ColumnVisitor() {
229
+ @Override
230
+ public void booleanColumn(Column column) {
231
+ try {
232
+ contextBuilder.field(column.getName(), pageReader.getBoolean(column));
233
+ } catch (IOException e) {
234
+ try {
235
+ contextBuilder.nullField(column.getName());
236
+ } catch (IOException ex) {
237
+ throw Throwables.propagate(ex);
238
+ }
239
+ }
240
+ }
241
+
242
+ @Override
243
+ public void longColumn(Column column) {
244
+ try {
245
+ contextBuilder.field(column.getName(), pageReader.getLong(column));
246
+ } catch (IOException e) {
247
+ try {
248
+ contextBuilder.nullField(column.getName());
249
+ } catch (IOException ex) {
250
+ throw Throwables.propagate(ex);
251
+ }
252
+ }
253
+ }
254
+
255
+ @Override
256
+ public void doubleColumn(Column column) {
257
+ try {
258
+ contextBuilder.field(column.getName(), pageReader.getDouble(column));
259
+ } catch (IOException e) {
260
+ try {
261
+ contextBuilder.nullField(column.getName());
262
+ } catch (IOException ex) {
263
+ throw Throwables.propagate(ex);
264
+ }
265
+ }
266
+ }
267
+
268
+ @Override
269
+ public void stringColumn(Column column) {
270
+ try {
271
+ contextBuilder.field(column.getName(), pageReader.getString(column));
272
+ } catch (IOException e) {
273
+ try {
274
+ contextBuilder.nullField(column.getName());
275
+ } catch (IOException ex) {
276
+ throw Throwables.propagate(ex);
277
+ }
278
+ }
279
+ }
280
+
281
+ @Override
282
+ public void timestampColumn(Column column) {
283
+ // TODO
284
+ }
285
+ });
286
+
287
+ contextBuilder.endObject();
288
+ bulkProcessor.add(newIndexRequest().source(contextBuilder));
289
+
290
+ } catch (IOException e) {
291
+ Throwables.propagate(e); // TODO error handling
292
+ }
293
+ }
294
+ }
295
+
296
+ private IndexRequest newIndexRequest()
297
+ {
298
+ return Requests.indexRequest(index).type(indexType).id(docId);
299
+ }
300
+
301
+ @Override
302
+ public void finish()
303
+ {
304
+ try {
305
+ bulkProcessor.flush();
306
+ } finally {
307
+ close();
308
+ }
309
+ }
310
+
311
+ @Override
312
+ public void close()
313
+ {
314
+ if (bulkProcessor != null) {
315
+ try {
316
+ while (!bulkProcessor.awaitClose(3, TimeUnit.SECONDS)) {
317
+ log.debug("wait for closing the bulk processing..");
318
+ }
319
+ } catch (InterruptedException e) {
320
+ Thread.currentThread().interrupt();
321
+ }
322
+ bulkProcessor = null;
323
+ }
324
+
325
+ if (client != null) {
326
+ client.close(); // ElasticsearchException
327
+ client = null;
328
+ }
329
+
330
+ if (node != null) {
331
+ node.close();
332
+ node = null;
333
+ }
334
+ }
335
+
336
+ @Override
337
+ public void abort()
338
+ {
339
+ // TODO do nothing
340
+ }
341
+
342
+ @Override
343
+ public CommitReport commit()
344
+ {
345
+ CommitReport report = Exec.newCommitReport();
346
+ // TODO
347
+ return report;
348
+ }
349
+
350
+ }
351
+ }
@@ -0,0 +1,31 @@
1
+ package org.embulk.output;
2
+
3
+ import com.google.common.base.Preconditions;
4
+ import com.google.common.collect.ImmutableList;
5
+ import com.google.inject.Binder;
6
+ import com.google.inject.Module;
7
+ import org.embulk.config.ConfigSource;
8
+ import org.embulk.spi.Extension;
9
+ import org.embulk.spi.OutputPlugin;
10
+
11
+ import java.util.List;
12
+
13
+ import static org.embulk.plugin.InjectedPluginSource.registerPluginTo;
14
+
15
+ public class ElasticsearchOutputPluginModule
16
+ implements Extension, Module
17
+ {
18
+
19
+ @Override
20
+ public void configure(Binder binder)
21
+ {
22
+ Preconditions.checkNotNull(binder, "binder is null.");
23
+ registerPluginTo(binder, OutputPlugin.class, "elasticsearch", ElasticsearchOutputPlugin.class);
24
+ }
25
+
26
+ @Override
27
+ public List<Module> getModules(ConfigSource systemConfig)
28
+ {
29
+ return ImmutableList.<Module>of(this);
30
+ }
31
+ }
@@ -0,0 +1 @@
1
+ org.embulk.output.ElasticsearchOutputPluginModule
@@ -0,0 +1,5 @@
1
+ package org.embulk.output;
2
+
3
+ public class TestElasticsearchOutputPlugin
4
+ {
5
+ }
metadata ADDED
@@ -0,0 +1,113 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-output-elasticsearch
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Muga Nishizawa
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - '>='
23
+ - !ruby/object:Gem::Version
24
+ version: '1.0'
25
+ prerelease: false
26
+ type: :development
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '10.0'
39
+ prerelease: false
40
+ type: :development
41
+ - !ruby/object:Gem::Dependency
42
+ name: test-unit
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: 3.0.2
48
+ requirement: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ~>
51
+ - !ruby/object:Gem::Version
52
+ version: 3.0.2
53
+ prerelease: false
54
+ type: :development
55
+ description: Elasticsearch output plugin is an Embulk plugin that loads records to Elasticsearch read by any input plugins. Search the input plugins by "embulk-input" keyword.
56
+ email:
57
+ - muga.nishizawa@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - .gitignore
63
+ - README.md
64
+ - build.gradle
65
+ - gradle/wrapper/gradle-wrapper.jar
66
+ - gradle/wrapper/gradle-wrapper.properties
67
+ - gradlew
68
+ - gradlew.bat
69
+ - settings.gradle
70
+ - src/main/java/org/embulk/output/ElasticsearchOutputPlugin.java
71
+ - src/main/java/org/embulk/output/ElasticsearchOutputPluginModule.java
72
+ - src/main/resources/META-INF/services/org.embulk.spi.Extension
73
+ - src/test/java/org/embulk/output/TestElasticsearchOutputPlugin.java
74
+ - classpath/elasticsearch-1.4.2.jar
75
+ - classpath/embulk-output-elasticsearch-0.1.0.jar
76
+ - classpath/lucene-analyzers-common-4.10.2.jar
77
+ - classpath/lucene-core-4.10.2.jar
78
+ - classpath/lucene-grouping-4.10.2.jar
79
+ - classpath/lucene-highlighter-4.10.2.jar
80
+ - classpath/lucene-join-4.10.2.jar
81
+ - classpath/lucene-memory-4.10.2.jar
82
+ - classpath/lucene-misc-4.10.2.jar
83
+ - classpath/lucene-queries-4.10.2.jar
84
+ - classpath/lucene-queryparser-4.10.2.jar
85
+ - classpath/lucene-sandbox-4.10.2.jar
86
+ - classpath/lucene-spatial-4.10.2.jar
87
+ - classpath/lucene-suggest-4.10.2.jar
88
+ - classpath/spatial4j-0.4.1.jar
89
+ homepage: https://github.com/muga/embulk-output-elasticsearch
90
+ licenses:
91
+ - Apache 2.0
92
+ metadata: {}
93
+ post_install_message:
94
+ rdoc_options: []
95
+ require_paths:
96
+ - lib
97
+ required_ruby_version: !ruby/object:Gem::Requirement
98
+ requirements:
99
+ - - '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - '>='
105
+ - !ruby/object:Gem::Version
106
+ version: '0'
107
+ requirements: []
108
+ rubyforge_project:
109
+ rubygems_version: 2.1.9
110
+ signing_key:
111
+ specification_version: 4
112
+ summary: Elasticsearch output plugin for Embulk
113
+ test_files: []