embulk-output-elasticsearch 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,286 @@
1
+ package org.embulk.output.elasticsearch;
2
+
3
+ import com.fasterxml.jackson.annotation.JsonCreator;
4
+ import com.fasterxml.jackson.annotation.JsonValue;
5
+ import com.google.common.annotations.VisibleForTesting;
6
+ import com.google.common.base.Optional;
7
+ import com.google.common.base.Throwables;
8
+ import org.eclipse.jetty.util.ssl.SslContextFactory;
9
+ import org.embulk.base.restclient.RestClientOutputPluginDelegate;
10
+ import org.embulk.base.restclient.RestClientOutputTaskBase;
11
+ import org.embulk.base.restclient.jackson.JacksonServiceRequestMapper;
12
+ import org.embulk.base.restclient.jackson.JacksonTopLevelValueLocator;
13
+ import org.embulk.base.restclient.jackson.scope.JacksonAllInObjectScope;
14
+ import org.embulk.base.restclient.record.RecordBuffer;
15
+ import org.embulk.config.Config;
16
+ import org.embulk.config.ConfigDefault;
17
+ import org.embulk.config.ConfigDiff;
18
+ import org.embulk.config.ConfigException;
19
+ import org.embulk.config.Task;
20
+ import org.embulk.config.TaskReport;
21
+ import org.embulk.spi.Exec;
22
+ import org.embulk.spi.Schema;
23
+ import org.embulk.spi.time.TimestampFormatter;
24
+ import org.embulk.util.retryhelper.jetty92.Jetty92ClientCreator;
25
+ import org.embulk.util.retryhelper.jetty92.Jetty92RetryHelper;
26
+ import org.joda.time.DateTimeZone;
27
+ import org.slf4j.Logger;
28
+
29
+ import java.util.List;
30
+ import java.util.Locale;
31
+
32
+ public class ElasticsearchOutputPluginDelegate
33
+ implements RestClientOutputPluginDelegate<ElasticsearchOutputPluginDelegate.PluginTask>
34
+ {
35
+ private final Logger log;
36
+ private final ElasticsearchHttpClient client;
37
+
38
+ public ElasticsearchOutputPluginDelegate()
39
+ {
40
+ this.log = Exec.getLogger(getClass());
41
+ this.client = new ElasticsearchHttpClient();
42
+ }
43
+
44
+ public interface NodeAddressTask
45
+ extends Task
46
+ {
47
+ @Config("host")
48
+ String getHost();
49
+
50
+ @Config("port")
51
+ @ConfigDefault("9200")
52
+ int getPort();
53
+ }
54
+
55
+ public interface PluginTask
56
+ extends RestClientOutputTaskBase, TimestampFormatter.Task
57
+ {
58
+ @Config("mode")
59
+ @ConfigDefault("\"insert\"")
60
+ Mode getMode();
61
+
62
+ @Config("nodes")
63
+ List<NodeAddressTask> getNodes();
64
+
65
+ @Config("cluster_name")
66
+ @ConfigDefault("\"elasticsearch\"")
67
+ String getClusterName();
68
+
69
+ @Config("index")
70
+ String getIndex();
71
+ void setIndex(String indexName);
72
+
73
+ @Config("alias")
74
+ @ConfigDefault("null")
75
+ Optional<String> getAlias();
76
+ void setAlias(Optional<String> aliasName);
77
+
78
+ @Config("index_type")
79
+ String getType();
80
+
81
+ @Config("id")
82
+ @ConfigDefault("null")
83
+ Optional<String> getId();
84
+
85
+ @Config("use_ssl")
86
+ @ConfigDefault("false")
87
+ boolean getUseSsl();
88
+
89
+ @Config("auth_method")
90
+ @ConfigDefault("\"none\"")
91
+ AuthMethod getAuthMethod();
92
+
93
+ @Config("user")
94
+ @ConfigDefault("null")
95
+ Optional<String> getUser();
96
+
97
+ @Config("password")
98
+ @ConfigDefault("null")
99
+ Optional<String> getPassword();
100
+
101
+ @Config("bulk_actions")
102
+ @ConfigDefault("1000")
103
+ int getBulkActions();
104
+
105
+ @Config("bulk_size")
106
+ @ConfigDefault("5242880")
107
+ long getBulkSize();
108
+
109
+ @Config("concurrent_requests")
110
+ @ConfigDefault("5")
111
+ int getConcurrentRequests();
112
+
113
+ @Config("maximum_retries")
114
+ @ConfigDefault("7")
115
+ int getMaximumRetries();
116
+
117
+ @Config("initial_retry_interval_millis")
118
+ @ConfigDefault("1000")
119
+ int getInitialRetryIntervalMillis();
120
+
121
+ @Config("maximum_retry_interval_millis")
122
+ @ConfigDefault("120000")
123
+ int getMaximumRetryIntervalMillis();
124
+
125
+ @Config("timeout_millis")
126
+ @ConfigDefault("60000")
127
+ int getTimeoutMills();
128
+
129
+ @Config("time_zone")
130
+ @ConfigDefault("\"UTC\"")
131
+ String getTimeZone();
132
+ }
133
+
134
+ public enum Mode
135
+ {
136
+ INSERT,
137
+ REPLACE;
138
+
139
+ @JsonValue
140
+ @Override
141
+ public String toString()
142
+ {
143
+ return name().toLowerCase(Locale.ENGLISH);
144
+ }
145
+
146
+ @JsonCreator
147
+ public static Mode fromString(String value)
148
+ {
149
+ switch (value) {
150
+ case "insert":
151
+ return INSERT;
152
+ case "replace":
153
+ return REPLACE;
154
+ default:
155
+ throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are insert, truncate_insert, replace", value));
156
+ }
157
+ }
158
+ }
159
+
160
+ public enum AuthMethod
161
+ {
162
+ NONE,
163
+ BASIC;
164
+
165
+ @JsonValue
166
+ @Override
167
+ public String toString()
168
+ {
169
+ return name().toLowerCase(Locale.ENGLISH);
170
+ }
171
+
172
+ @JsonCreator
173
+ public static AuthMethod fromString(String value)
174
+ {
175
+ switch (value) {
176
+ case "none":
177
+ return NONE;
178
+ case "basic":
179
+ return BASIC;
180
+ default:
181
+ throw new ConfigException(String.format("Unknown auth_method '%s'. Supported auth_method are none, basic", value));
182
+ }
183
+ }
184
+ }
185
+
186
+ @Override // Overridden from |OutputTaskValidatable|
187
+ public void validateOutputTask(PluginTask task, Schema embulkSchema, int taskCount)
188
+ {
189
+ if (task.getNodes().size() > 0) {
190
+ for (NodeAddressTask node : task.getNodes()) {
191
+ if (node.getHost().endsWith("es.amazonaws.com")) {
192
+ throw new ConfigException("This plugin does't support AWS Elasticsearch Service.");
193
+ }
194
+ if (node.getPort() == 9300) {
195
+ log.warn("Port:9300 is usually used by TransportClient. HTTP/Rest Client uses 9200.");
196
+ }
197
+ }
198
+ }
199
+
200
+ try (Jetty92RetryHelper retryHelper = createRetryHelper(task)) {
201
+ log.info(String.format("Connecting to Elasticsearch version:%s", client.getEsVersion(task, retryHelper)));
202
+ log.info("Executing plugin with '{}' mode.", task.getMode());
203
+ client.validateIndexOrAliasName(task.getIndex(), "index");
204
+ client.validateIndexOrAliasName(task.getType(), "index_type");
205
+
206
+ if (task.getMode().equals(Mode.REPLACE)) {
207
+ task.setAlias(Optional.of(task.getIndex()));
208
+ task.setIndex(client.generateNewIndexName(task.getIndex()));
209
+ if (client.isIndexExisting(task.getAlias().orNull(), task, retryHelper) && !client.isAliasExisting(task.getAlias().orNull(), task, retryHelper)) {
210
+ throw new ConfigException(String.format("Invalid alias name [%s], an index exists with the same name as the alias", task.getAlias().orNull()));
211
+ }
212
+ }
213
+ log.info(String.format("Inserting data into index[%s]", task.getIndex()));
214
+ }
215
+
216
+ if (task.getAuthMethod() == AuthMethod.BASIC) {
217
+ if (!task.getUser().isPresent() || !task.getPassword().isPresent()) {
218
+ throw new ConfigException("'user' and 'password' are required when auth_method='basic'");
219
+ }
220
+ }
221
+ }
222
+
223
+ @Override // Overridden from |ServiceRequestMapperBuildable|
224
+ public JacksonServiceRequestMapper buildServiceRequestMapper(PluginTask task)
225
+ {
226
+ TimestampFormatter formatter = new TimestampFormatter(task.getJRuby(), "%Y-%m-%dT%H:%M:%S.%3N%z", DateTimeZone.forID(task.getTimeZone()));
227
+
228
+ return JacksonServiceRequestMapper.builder()
229
+ .add(new JacksonAllInObjectScope(formatter), new JacksonTopLevelValueLocator("record"))
230
+ .build();
231
+ }
232
+
233
+ @Override // Overridden from |RecordBufferBuildable|
234
+ public RecordBuffer buildRecordBuffer(PluginTask task)
235
+ {
236
+ Jetty92RetryHelper retryHelper = createRetryHelper(task);
237
+ return new ElasticsearchRecordBuffer("records", task, retryHelper);
238
+ }
239
+
240
+ @Override
241
+ public ConfigDiff egestEmbulkData(final PluginTask task,
242
+ Schema schema,
243
+ int taskIndex,
244
+ List<TaskReport> taskReports)
245
+ {
246
+ long totalInserted = 0;
247
+ for (TaskReport taskReport : taskReports) {
248
+ if (taskReport.has("inserted")) {
249
+ totalInserted += taskReport.get(Long.class, "inserted");
250
+ }
251
+ }
252
+
253
+ log.info("Insert completed. {} records", totalInserted);
254
+ try (Jetty92RetryHelper retryHelper = createRetryHelper(task)) {
255
+ // Re assign alias only when repale mode
256
+ if (task.getMode().equals(Mode.REPLACE)) {
257
+ client.reassignAlias(task.getAlias().orNull(), task.getIndex(), task, retryHelper);
258
+ }
259
+ }
260
+
261
+ return Exec.newConfigDiff();
262
+ }
263
+
264
+ @VisibleForTesting
265
+ protected Jetty92RetryHelper createRetryHelper(PluginTask task)
266
+ {
267
+ return new Jetty92RetryHelper(
268
+ task.getMaximumRetries(),
269
+ task.getInitialRetryIntervalMillis(),
270
+ task.getMaximumRetryIntervalMillis(),
271
+ new Jetty92ClientCreator() {
272
+ @Override
273
+ public org.eclipse.jetty.client.HttpClient createAndStart()
274
+ {
275
+ org.eclipse.jetty.client.HttpClient client = new org.eclipse.jetty.client.HttpClient(new SslContextFactory());
276
+ try {
277
+ client.start();
278
+ return client;
279
+ }
280
+ catch (Exception e) {
281
+ throw Throwables.propagate(e);
282
+ }
283
+ }
284
+ });
285
+ }
286
+ }
@@ -0,0 +1,99 @@
1
+ package org.embulk.output.elasticsearch;
2
+
3
+ import com.fasterxml.jackson.databind.DeserializationFeature;
4
+ import com.fasterxml.jackson.databind.JsonNode;
5
+ import com.fasterxml.jackson.databind.ObjectMapper;
6
+ import com.fasterxml.jackson.databind.node.ArrayNode;
7
+ import com.fasterxml.jackson.databind.node.JsonNodeFactory;
8
+ import com.google.common.base.Throwables;
9
+ import org.embulk.base.restclient.jackson.JacksonServiceRecord;
10
+ import org.embulk.base.restclient.record.RecordBuffer;
11
+ import org.embulk.base.restclient.record.ServiceRecord;
12
+ import org.embulk.config.TaskReport;
13
+ import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.PluginTask;
14
+ import org.embulk.spi.Exec;
15
+ import org.embulk.util.retryhelper.jetty92.Jetty92RetryHelper;
16
+ import org.slf4j.Logger;
17
+
18
+ import java.io.IOException;
19
+
20
+ /**
21
+ * ElasticsearchRecordBuffer is an implementation of {@code RecordBuffer} which includes JSON output directly to Elasticsearch server.
22
+ */
23
+ public class ElasticsearchRecordBuffer
24
+ extends RecordBuffer
25
+ {
26
+ private final String attributeName;
27
+ private final PluginTask task;
28
+ private final long bulkActions;
29
+ private final long bulkSize;
30
+ private final ElasticsearchHttpClient client;
31
+ private final Jetty92RetryHelper retryHelper;
32
+ private final ObjectMapper mapper;
33
+ private final Logger log;
34
+ private long totalCount;
35
+ private int requestCount;
36
+ private long requestBytes;
37
+ private ArrayNode records;
38
+
39
+ public ElasticsearchRecordBuffer(String attributeName, PluginTask task, Jetty92RetryHelper retryHelper)
40
+ {
41
+ this.attributeName = attributeName;
42
+ this.task = task;
43
+ this.bulkActions = task.getBulkActions();
44
+ this.bulkSize = task.getBulkSize();
45
+ this.client = new ElasticsearchHttpClient();
46
+ this.retryHelper = retryHelper;
47
+ this.mapper = new ObjectMapper()
48
+ .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
49
+ .configure(com.fasterxml.jackson.core.JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, false);
50
+ this.records = JsonNodeFactory.instance.arrayNode();
51
+ this.totalCount = 0;
52
+ this.requestCount = 0;
53
+ this.requestBytes = 0;
54
+ this.log = Exec.getLogger(getClass());
55
+ }
56
+
57
+ @Override
58
+ public void bufferRecord(ServiceRecord serviceRecord)
59
+ {
60
+ JacksonServiceRecord jacksonServiceRecord;
61
+ try {
62
+ jacksonServiceRecord = (JacksonServiceRecord) serviceRecord;
63
+ JsonNode record = mapper.readTree(jacksonServiceRecord.toString()).get("record");
64
+
65
+ requestCount++;
66
+ totalCount++;
67
+ requestBytes += record.toString().getBytes().length;
68
+
69
+ records.add(record);
70
+ if (requestCount >= bulkActions || requestBytes >= bulkSize) {
71
+ client.push(records, task, retryHelper);
72
+ if (totalCount % 10000 == 0) {
73
+ log.info("Inserted {} records", totalCount);
74
+ }
75
+ records = JsonNodeFactory.instance.arrayNode();
76
+ requestBytes = 0;
77
+ requestCount = 0;
78
+ }
79
+ }
80
+ catch (ClassCastException ex) {
81
+ throw new RuntimeException(ex);
82
+ }
83
+ catch (IOException ex) {
84
+ throw Throwables.propagate(ex);
85
+ }
86
+ }
87
+
88
+ @Override
89
+ public TaskReport commitWithTaskReportUpdated(TaskReport taskReport)
90
+ {
91
+ if (records.size() > 0) {
92
+ client.push(records, task, retryHelper);
93
+ log.info("Inserted {} records", records.size());
94
+ }
95
+
96
+ this.retryHelper.close();
97
+ return Exec.newTaskReport().set("inserted", totalCount);
98
+ }
99
+ }
@@ -0,0 +1,159 @@
1
+ package org.embulk.output.elasticsearch;
2
+
3
+ import com.google.common.base.Throwables;
4
+ import com.google.common.collect.ImmutableList;
5
+ import com.google.common.collect.ImmutableMap;
6
+ import org.eclipse.jetty.util.ssl.SslContextFactory;
7
+ import org.embulk.config.ConfigSource;
8
+ import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.PluginTask;
9
+ import org.embulk.spi.Exec;
10
+ import org.embulk.util.retryhelper.jetty92.Jetty92ClientCreator;
11
+ import org.embulk.util.retryhelper.jetty92.Jetty92RetryHelper;
12
+
13
+ import java.lang.reflect.Method;
14
+ import java.util.Arrays;
15
+ import java.util.List;
16
+
17
+ import static org.junit.Assume.assumeNotNull;
18
+
19
+ public class ElasticsearchTestUtils
20
+ {
21
+ public static String ES_HOST;
22
+ public static int ES_PORT;
23
+ public static List ES_NODES;
24
+ public static String ES_INDEX;
25
+ public static String ES_INDEX_TYPE;
26
+ public static String ES_ID;
27
+ public static int ES_BULK_ACTIONS;
28
+ public static int ES_BULK_SIZE;
29
+ public static int ES_CONCURRENT_REQUESTS;
30
+ public static String PATH_PREFIX;
31
+ public static String ES_INDEX2;
32
+ public static String ES_ALIAS;
33
+
34
+ /*
35
+ * This test case requires environment variables
36
+ * ES_HOST
37
+ * ES_INDEX
38
+ * ES_INDEX_TYPE
39
+ */
40
+ public void initializeConstant()
41
+ {
42
+ ES_HOST = System.getenv("ES_HOST") != null ? System.getenv("ES_HOST") : "";
43
+ ES_PORT = System.getenv("ES_PORT") != null ? Integer.valueOf(System.getenv("ES_PORT")) : 9200;
44
+
45
+ ES_INDEX = System.getenv("ES_INDEX");
46
+ ES_INDEX2 = ES_INDEX + "_02";
47
+ ES_ALIAS = ES_INDEX + "_alias";
48
+ ES_INDEX_TYPE = System.getenv("ES_INDEX_TYPE");
49
+ ES_ID = "id";
50
+ ES_BULK_ACTIONS = System.getenv("ES_BULK_ACTIONS") != null ? Integer.valueOf(System.getenv("ES_BULK_ACTIONS")) : 1000;
51
+ ES_BULK_SIZE = System.getenv("ES_BULK_SIZE") != null ? Integer.valueOf(System.getenv("ES_BULK_SIZE")) : 5242880;
52
+ ES_CONCURRENT_REQUESTS = System.getenv("ES_CONCURRENT_REQUESTS") != null ? Integer.valueOf(System.getenv("ES_CONCURRENT_REQUESTS")) : 5;
53
+
54
+ assumeNotNull(ES_HOST, ES_INDEX, ES_INDEX_TYPE);
55
+
56
+ ES_NODES = Arrays.asList(ImmutableMap.of("host", ES_HOST, "port", ES_PORT));
57
+
58
+ PATH_PREFIX = ElasticsearchTestUtils.class.getClassLoader().getResource("sample_01.csv").getPath();
59
+ }
60
+
61
+ public void prepareBeforeTest(PluginTask task) throws Exception
62
+ {
63
+ ElasticsearchHttpClient client = new ElasticsearchHttpClient();
64
+ try (Jetty92RetryHelper retryHelper = createRetryHelper()) {
65
+ Method deleteIndex = ElasticsearchHttpClient.class.getDeclaredMethod("deleteIndex", String.class, PluginTask.class, Jetty92RetryHelper.class);
66
+ deleteIndex.setAccessible(true);
67
+
68
+ // Delete alias
69
+ if (client.isAliasExisting(ES_ALIAS, task, retryHelper)) {
70
+ deleteIndex.invoke(client, ES_ALIAS, task, retryHelper);
71
+ }
72
+
73
+ // Delete index
74
+ if (client.isIndexExisting(ES_INDEX, task, retryHelper)) {
75
+ deleteIndex.invoke(client, ES_INDEX, task, retryHelper);
76
+ }
77
+
78
+ if (client.isIndexExisting(ES_INDEX2, task, retryHelper)) {
79
+ deleteIndex.invoke(client, ES_INDEX2, task, retryHelper);
80
+ }
81
+ }
82
+ }
83
+
84
+ public ConfigSource config()
85
+ {
86
+ return Exec.newConfigSource()
87
+ .set("in", inputConfig())
88
+ .set("parser", parserConfig(schemaConfig()))
89
+ .set("type", "elasticsearch")
90
+ .set("mode", "insert")
91
+ .set("nodes", ES_NODES)
92
+ .set("index", ES_INDEX)
93
+ .set("index_type", ES_INDEX_TYPE)
94
+ .set("id", ES_ID)
95
+ .set("bulk_actions", ES_BULK_ACTIONS)
96
+ .set("bulk_size", ES_BULK_SIZE)
97
+ .set("concurrent_requests", ES_CONCURRENT_REQUESTS);
98
+ }
99
+
100
+ public ImmutableMap<String, Object> inputConfig()
101
+ {
102
+ ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
103
+ builder.put("type", "file");
104
+ builder.put("path_prefix", PATH_PREFIX);
105
+ builder.put("last_path", "");
106
+ return builder.build();
107
+ }
108
+
109
+ public ImmutableMap<String, Object> parserConfig(ImmutableList<Object> schemaConfig)
110
+ {
111
+ ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
112
+ builder.put("type", "csv");
113
+ builder.put("newline", "CRLF");
114
+ builder.put("delimiter", ",");
115
+ builder.put("quote", "\"");
116
+ builder.put("escape", "\"");
117
+ builder.put("trim_if_not_quoted", false);
118
+ builder.put("skip_header_lines", 1);
119
+ builder.put("allow_extra_columns", false);
120
+ builder.put("allow_optional_columns", false);
121
+ builder.put("columns", schemaConfig);
122
+ return builder.build();
123
+ }
124
+
125
+ public ImmutableList<Object> schemaConfig()
126
+ {
127
+ ImmutableList.Builder<Object> builder = new ImmutableList.Builder<>();
128
+ builder.add(ImmutableMap.of("name", "id", "type", "long"));
129
+ builder.add(ImmutableMap.of("name", "account", "type", "long"));
130
+ builder.add(ImmutableMap.of("name", "time", "type", "timestamp", "format", "%Y-%m-%d %H:%M:%S"));
131
+ builder.add(ImmutableMap.of("name", "purchase", "type", "timestamp", "format", "%Y%m%d"));
132
+ builder.add(ImmutableMap.of("name", "flg", "type", "boolean"));
133
+ builder.add(ImmutableMap.of("name", "score", "type", "double"));
134
+ builder.add(ImmutableMap.of("name", "comment", "type", "string"));
135
+ return builder.build();
136
+ }
137
+
138
+ public Jetty92RetryHelper createRetryHelper()
139
+ {
140
+ return new Jetty92RetryHelper(
141
+ 2,
142
+ 1000,
143
+ 32000,
144
+ new Jetty92ClientCreator() {
145
+ @Override
146
+ public org.eclipse.jetty.client.HttpClient createAndStart()
147
+ {
148
+ org.eclipse.jetty.client.HttpClient client = new org.eclipse.jetty.client.HttpClient(new SslContextFactory());
149
+ try {
150
+ client.start();
151
+ return client;
152
+ }
153
+ catch (Exception e) {
154
+ throw Throwables.propagate(e);
155
+ }
156
+ }
157
+ });
158
+ }
159
+ }