embulk-output-elasticsearch 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,286 @@
1
+ package org.embulk.output.elasticsearch;
2
+
3
+ import com.fasterxml.jackson.annotation.JsonCreator;
4
+ import com.fasterxml.jackson.annotation.JsonValue;
5
+ import com.google.common.annotations.VisibleForTesting;
6
+ import com.google.common.base.Optional;
7
+ import com.google.common.base.Throwables;
8
+ import org.eclipse.jetty.util.ssl.SslContextFactory;
9
+ import org.embulk.base.restclient.RestClientOutputPluginDelegate;
10
+ import org.embulk.base.restclient.RestClientOutputTaskBase;
11
+ import org.embulk.base.restclient.jackson.JacksonServiceRequestMapper;
12
+ import org.embulk.base.restclient.jackson.JacksonTopLevelValueLocator;
13
+ import org.embulk.base.restclient.jackson.scope.JacksonAllInObjectScope;
14
+ import org.embulk.base.restclient.record.RecordBuffer;
15
+ import org.embulk.config.Config;
16
+ import org.embulk.config.ConfigDefault;
17
+ import org.embulk.config.ConfigDiff;
18
+ import org.embulk.config.ConfigException;
19
+ import org.embulk.config.Task;
20
+ import org.embulk.config.TaskReport;
21
+ import org.embulk.spi.Exec;
22
+ import org.embulk.spi.Schema;
23
+ import org.embulk.spi.time.TimestampFormatter;
24
+ import org.embulk.util.retryhelper.jetty92.Jetty92ClientCreator;
25
+ import org.embulk.util.retryhelper.jetty92.Jetty92RetryHelper;
26
+ import org.joda.time.DateTimeZone;
27
+ import org.slf4j.Logger;
28
+
29
+ import java.util.List;
30
+ import java.util.Locale;
31
+
32
+ public class ElasticsearchOutputPluginDelegate
33
+ implements RestClientOutputPluginDelegate<ElasticsearchOutputPluginDelegate.PluginTask>
34
+ {
35
+ private final Logger log;
36
+ private final ElasticsearchHttpClient client;
37
+
38
+ public ElasticsearchOutputPluginDelegate()
39
+ {
40
+ this.log = Exec.getLogger(getClass());
41
+ this.client = new ElasticsearchHttpClient();
42
+ }
43
+
44
+ public interface NodeAddressTask
45
+ extends Task
46
+ {
47
+ @Config("host")
48
+ String getHost();
49
+
50
+ @Config("port")
51
+ @ConfigDefault("9200")
52
+ int getPort();
53
+ }
54
+
55
+ public interface PluginTask
56
+ extends RestClientOutputTaskBase, TimestampFormatter.Task
57
+ {
58
+ @Config("mode")
59
+ @ConfigDefault("\"insert\"")
60
+ Mode getMode();
61
+
62
+ @Config("nodes")
63
+ List<NodeAddressTask> getNodes();
64
+
65
+ @Config("cluster_name")
66
+ @ConfigDefault("\"elasticsearch\"")
67
+ String getClusterName();
68
+
69
+ @Config("index")
70
+ String getIndex();
71
+ void setIndex(String indexName);
72
+
73
+ @Config("alias")
74
+ @ConfigDefault("null")
75
+ Optional<String> getAlias();
76
+ void setAlias(Optional<String> aliasName);
77
+
78
+ @Config("index_type")
79
+ String getType();
80
+
81
+ @Config("id")
82
+ @ConfigDefault("null")
83
+ Optional<String> getId();
84
+
85
+ @Config("use_ssl")
86
+ @ConfigDefault("false")
87
+ boolean getUseSsl();
88
+
89
+ @Config("auth_method")
90
+ @ConfigDefault("\"none\"")
91
+ AuthMethod getAuthMethod();
92
+
93
+ @Config("user")
94
+ @ConfigDefault("null")
95
+ Optional<String> getUser();
96
+
97
+ @Config("password")
98
+ @ConfigDefault("null")
99
+ Optional<String> getPassword();
100
+
101
+ @Config("bulk_actions")
102
+ @ConfigDefault("1000")
103
+ int getBulkActions();
104
+
105
+ @Config("bulk_size")
106
+ @ConfigDefault("5242880")
107
+ long getBulkSize();
108
+
109
+ @Config("concurrent_requests")
110
+ @ConfigDefault("5")
111
+ int getConcurrentRequests();
112
+
113
+ @Config("maximum_retries")
114
+ @ConfigDefault("7")
115
+ int getMaximumRetries();
116
+
117
+ @Config("initial_retry_interval_millis")
118
+ @ConfigDefault("1000")
119
+ int getInitialRetryIntervalMillis();
120
+
121
+ @Config("maximum_retry_interval_millis")
122
+ @ConfigDefault("120000")
123
+ int getMaximumRetryIntervalMillis();
124
+
125
+ @Config("timeout_millis")
126
+ @ConfigDefault("60000")
127
+ int getTimeoutMills();
128
+
129
+ @Config("time_zone")
130
+ @ConfigDefault("\"UTC\"")
131
+ String getTimeZone();
132
+ }
133
+
134
+ public enum Mode
135
+ {
136
+ INSERT,
137
+ REPLACE;
138
+
139
+ @JsonValue
140
+ @Override
141
+ public String toString()
142
+ {
143
+ return name().toLowerCase(Locale.ENGLISH);
144
+ }
145
+
146
+ @JsonCreator
147
+ public static Mode fromString(String value)
148
+ {
149
+ switch (value) {
150
+ case "insert":
151
+ return INSERT;
152
+ case "replace":
153
+ return REPLACE;
154
+ default:
155
+ throw new ConfigException(String.format("Unknown mode '%s'. Supported modes are insert, truncate_insert, replace", value));
156
+ }
157
+ }
158
+ }
159
+
160
+ public enum AuthMethod
161
+ {
162
+ NONE,
163
+ BASIC;
164
+
165
+ @JsonValue
166
+ @Override
167
+ public String toString()
168
+ {
169
+ return name().toLowerCase(Locale.ENGLISH);
170
+ }
171
+
172
+ @JsonCreator
173
+ public static AuthMethod fromString(String value)
174
+ {
175
+ switch (value) {
176
+ case "none":
177
+ return NONE;
178
+ case "basic":
179
+ return BASIC;
180
+ default:
181
+ throw new ConfigException(String.format("Unknown auth_method '%s'. Supported auth_method are none, basic", value));
182
+ }
183
+ }
184
+ }
185
+
186
+ @Override // Overridden from |OutputTaskValidatable|
187
+ public void validateOutputTask(PluginTask task, Schema embulkSchema, int taskCount)
188
+ {
189
+ if (task.getNodes().size() > 0) {
190
+ for (NodeAddressTask node : task.getNodes()) {
191
+ if (node.getHost().endsWith("es.amazonaws.com")) {
192
+ throw new ConfigException("This plugin does't support AWS Elasticsearch Service.");
193
+ }
194
+ if (node.getPort() == 9300) {
195
+ log.warn("Port:9300 is usually used by TransportClient. HTTP/Rest Client uses 9200.");
196
+ }
197
+ }
198
+ }
199
+
200
+ try (Jetty92RetryHelper retryHelper = createRetryHelper(task)) {
201
+ log.info(String.format("Connecting to Elasticsearch version:%s", client.getEsVersion(task, retryHelper)));
202
+ log.info("Executing plugin with '{}' mode.", task.getMode());
203
+ client.validateIndexOrAliasName(task.getIndex(), "index");
204
+ client.validateIndexOrAliasName(task.getType(), "index_type");
205
+
206
+ if (task.getMode().equals(Mode.REPLACE)) {
207
+ task.setAlias(Optional.of(task.getIndex()));
208
+ task.setIndex(client.generateNewIndexName(task.getIndex()));
209
+ if (client.isIndexExisting(task.getAlias().orNull(), task, retryHelper) && !client.isAliasExisting(task.getAlias().orNull(), task, retryHelper)) {
210
+ throw new ConfigException(String.format("Invalid alias name [%s], an index exists with the same name as the alias", task.getAlias().orNull()));
211
+ }
212
+ }
213
+ log.info(String.format("Inserting data into index[%s]", task.getIndex()));
214
+ }
215
+
216
+ if (task.getAuthMethod() == AuthMethod.BASIC) {
217
+ if (!task.getUser().isPresent() || !task.getPassword().isPresent()) {
218
+ throw new ConfigException("'user' and 'password' are required when auth_method='basic'");
219
+ }
220
+ }
221
+ }
222
+
223
+ @Override // Overridden from |ServiceRequestMapperBuildable|
224
+ public JacksonServiceRequestMapper buildServiceRequestMapper(PluginTask task)
225
+ {
226
+ TimestampFormatter formatter = new TimestampFormatter(task.getJRuby(), "%Y-%m-%dT%H:%M:%S.%3N%z", DateTimeZone.forID(task.getTimeZone()));
227
+
228
+ return JacksonServiceRequestMapper.builder()
229
+ .add(new JacksonAllInObjectScope(formatter), new JacksonTopLevelValueLocator("record"))
230
+ .build();
231
+ }
232
+
233
+ @Override // Overridden from |RecordBufferBuildable|
234
+ public RecordBuffer buildRecordBuffer(PluginTask task)
235
+ {
236
+ Jetty92RetryHelper retryHelper = createRetryHelper(task);
237
+ return new ElasticsearchRecordBuffer("records", task, retryHelper);
238
+ }
239
+
240
+ @Override
241
+ public ConfigDiff egestEmbulkData(final PluginTask task,
242
+ Schema schema,
243
+ int taskIndex,
244
+ List<TaskReport> taskReports)
245
+ {
246
+ long totalInserted = 0;
247
+ for (TaskReport taskReport : taskReports) {
248
+ if (taskReport.has("inserted")) {
249
+ totalInserted += taskReport.get(Long.class, "inserted");
250
+ }
251
+ }
252
+
253
+ log.info("Insert completed. {} records", totalInserted);
254
+ try (Jetty92RetryHelper retryHelper = createRetryHelper(task)) {
255
+ // Re assign alias only when repale mode
256
+ if (task.getMode().equals(Mode.REPLACE)) {
257
+ client.reassignAlias(task.getAlias().orNull(), task.getIndex(), task, retryHelper);
258
+ }
259
+ }
260
+
261
+ return Exec.newConfigDiff();
262
+ }
263
+
264
+ @VisibleForTesting
265
+ protected Jetty92RetryHelper createRetryHelper(PluginTask task)
266
+ {
267
+ return new Jetty92RetryHelper(
268
+ task.getMaximumRetries(),
269
+ task.getInitialRetryIntervalMillis(),
270
+ task.getMaximumRetryIntervalMillis(),
271
+ new Jetty92ClientCreator() {
272
+ @Override
273
+ public org.eclipse.jetty.client.HttpClient createAndStart()
274
+ {
275
+ org.eclipse.jetty.client.HttpClient client = new org.eclipse.jetty.client.HttpClient(new SslContextFactory());
276
+ try {
277
+ client.start();
278
+ return client;
279
+ }
280
+ catch (Exception e) {
281
+ throw Throwables.propagate(e);
282
+ }
283
+ }
284
+ });
285
+ }
286
+ }
@@ -0,0 +1,99 @@
1
+ package org.embulk.output.elasticsearch;
2
+
3
+ import com.fasterxml.jackson.databind.DeserializationFeature;
4
+ import com.fasterxml.jackson.databind.JsonNode;
5
+ import com.fasterxml.jackson.databind.ObjectMapper;
6
+ import com.fasterxml.jackson.databind.node.ArrayNode;
7
+ import com.fasterxml.jackson.databind.node.JsonNodeFactory;
8
+ import com.google.common.base.Throwables;
9
+ import org.embulk.base.restclient.jackson.JacksonServiceRecord;
10
+ import org.embulk.base.restclient.record.RecordBuffer;
11
+ import org.embulk.base.restclient.record.ServiceRecord;
12
+ import org.embulk.config.TaskReport;
13
+ import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.PluginTask;
14
+ import org.embulk.spi.Exec;
15
+ import org.embulk.util.retryhelper.jetty92.Jetty92RetryHelper;
16
+ import org.slf4j.Logger;
17
+
18
+ import java.io.IOException;
19
+
20
+ /**
21
+ * ElasticsearchRecordBuffer is an implementation of {@code RecordBuffer} which includes JSON output directly to Elasticsearch server.
22
+ */
23
+ public class ElasticsearchRecordBuffer
24
+ extends RecordBuffer
25
+ {
26
+ private final String attributeName;
27
+ private final PluginTask task;
28
+ private final long bulkActions;
29
+ private final long bulkSize;
30
+ private final ElasticsearchHttpClient client;
31
+ private final Jetty92RetryHelper retryHelper;
32
+ private final ObjectMapper mapper;
33
+ private final Logger log;
34
+ private long totalCount;
35
+ private int requestCount;
36
+ private long requestBytes;
37
+ private ArrayNode records;
38
+
39
+ public ElasticsearchRecordBuffer(String attributeName, PluginTask task, Jetty92RetryHelper retryHelper)
40
+ {
41
+ this.attributeName = attributeName;
42
+ this.task = task;
43
+ this.bulkActions = task.getBulkActions();
44
+ this.bulkSize = task.getBulkSize();
45
+ this.client = new ElasticsearchHttpClient();
46
+ this.retryHelper = retryHelper;
47
+ this.mapper = new ObjectMapper()
48
+ .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
49
+ .configure(com.fasterxml.jackson.core.JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, false);
50
+ this.records = JsonNodeFactory.instance.arrayNode();
51
+ this.totalCount = 0;
52
+ this.requestCount = 0;
53
+ this.requestBytes = 0;
54
+ this.log = Exec.getLogger(getClass());
55
+ }
56
+
57
+ @Override
58
+ public void bufferRecord(ServiceRecord serviceRecord)
59
+ {
60
+ JacksonServiceRecord jacksonServiceRecord;
61
+ try {
62
+ jacksonServiceRecord = (JacksonServiceRecord) serviceRecord;
63
+ JsonNode record = mapper.readTree(jacksonServiceRecord.toString()).get("record");
64
+
65
+ requestCount++;
66
+ totalCount++;
67
+ requestBytes += record.toString().getBytes().length;
68
+
69
+ records.add(record);
70
+ if (requestCount >= bulkActions || requestBytes >= bulkSize) {
71
+ client.push(records, task, retryHelper);
72
+ if (totalCount % 10000 == 0) {
73
+ log.info("Inserted {} records", totalCount);
74
+ }
75
+ records = JsonNodeFactory.instance.arrayNode();
76
+ requestBytes = 0;
77
+ requestCount = 0;
78
+ }
79
+ }
80
+ catch (ClassCastException ex) {
81
+ throw new RuntimeException(ex);
82
+ }
83
+ catch (IOException ex) {
84
+ throw Throwables.propagate(ex);
85
+ }
86
+ }
87
+
88
+ @Override
89
+ public TaskReport commitWithTaskReportUpdated(TaskReport taskReport)
90
+ {
91
+ if (records.size() > 0) {
92
+ client.push(records, task, retryHelper);
93
+ log.info("Inserted {} records", records.size());
94
+ }
95
+
96
+ this.retryHelper.close();
97
+ return Exec.newTaskReport().set("inserted", totalCount);
98
+ }
99
+ }
@@ -0,0 +1,159 @@
1
+ package org.embulk.output.elasticsearch;
2
+
3
+ import com.google.common.base.Throwables;
4
+ import com.google.common.collect.ImmutableList;
5
+ import com.google.common.collect.ImmutableMap;
6
+ import org.eclipse.jetty.util.ssl.SslContextFactory;
7
+ import org.embulk.config.ConfigSource;
8
+ import org.embulk.output.elasticsearch.ElasticsearchOutputPluginDelegate.PluginTask;
9
+ import org.embulk.spi.Exec;
10
+ import org.embulk.util.retryhelper.jetty92.Jetty92ClientCreator;
11
+ import org.embulk.util.retryhelper.jetty92.Jetty92RetryHelper;
12
+
13
+ import java.lang.reflect.Method;
14
+ import java.util.Arrays;
15
+ import java.util.List;
16
+
17
+ import static org.junit.Assume.assumeNotNull;
18
+
19
+ public class ElasticsearchTestUtils
20
+ {
21
+ public static String ES_HOST;
22
+ public static int ES_PORT;
23
+ public static List ES_NODES;
24
+ public static String ES_INDEX;
25
+ public static String ES_INDEX_TYPE;
26
+ public static String ES_ID;
27
+ public static int ES_BULK_ACTIONS;
28
+ public static int ES_BULK_SIZE;
29
+ public static int ES_CONCURRENT_REQUESTS;
30
+ public static String PATH_PREFIX;
31
+ public static String ES_INDEX2;
32
+ public static String ES_ALIAS;
33
+
34
+ /*
35
+ * This test case requires environment variables
36
+ * ES_HOST
37
+ * ES_INDEX
38
+ * ES_INDEX_TYPE
39
+ */
40
+ public void initializeConstant()
41
+ {
42
+ ES_HOST = System.getenv("ES_HOST") != null ? System.getenv("ES_HOST") : "";
43
+ ES_PORT = System.getenv("ES_PORT") != null ? Integer.valueOf(System.getenv("ES_PORT")) : 9200;
44
+
45
+ ES_INDEX = System.getenv("ES_INDEX");
46
+ ES_INDEX2 = ES_INDEX + "_02";
47
+ ES_ALIAS = ES_INDEX + "_alias";
48
+ ES_INDEX_TYPE = System.getenv("ES_INDEX_TYPE");
49
+ ES_ID = "id";
50
+ ES_BULK_ACTIONS = System.getenv("ES_BULK_ACTIONS") != null ? Integer.valueOf(System.getenv("ES_BULK_ACTIONS")) : 1000;
51
+ ES_BULK_SIZE = System.getenv("ES_BULK_SIZE") != null ? Integer.valueOf(System.getenv("ES_BULK_SIZE")) : 5242880;
52
+ ES_CONCURRENT_REQUESTS = System.getenv("ES_CONCURRENT_REQUESTS") != null ? Integer.valueOf(System.getenv("ES_CONCURRENT_REQUESTS")) : 5;
53
+
54
+ assumeNotNull(ES_HOST, ES_INDEX, ES_INDEX_TYPE);
55
+
56
+ ES_NODES = Arrays.asList(ImmutableMap.of("host", ES_HOST, "port", ES_PORT));
57
+
58
+ PATH_PREFIX = ElasticsearchTestUtils.class.getClassLoader().getResource("sample_01.csv").getPath();
59
+ }
60
+
61
+ public void prepareBeforeTest(PluginTask task) throws Exception
62
+ {
63
+ ElasticsearchHttpClient client = new ElasticsearchHttpClient();
64
+ try (Jetty92RetryHelper retryHelper = createRetryHelper()) {
65
+ Method deleteIndex = ElasticsearchHttpClient.class.getDeclaredMethod("deleteIndex", String.class, PluginTask.class, Jetty92RetryHelper.class);
66
+ deleteIndex.setAccessible(true);
67
+
68
+ // Delete alias
69
+ if (client.isAliasExisting(ES_ALIAS, task, retryHelper)) {
70
+ deleteIndex.invoke(client, ES_ALIAS, task, retryHelper);
71
+ }
72
+
73
+ // Delete index
74
+ if (client.isIndexExisting(ES_INDEX, task, retryHelper)) {
75
+ deleteIndex.invoke(client, ES_INDEX, task, retryHelper);
76
+ }
77
+
78
+ if (client.isIndexExisting(ES_INDEX2, task, retryHelper)) {
79
+ deleteIndex.invoke(client, ES_INDEX2, task, retryHelper);
80
+ }
81
+ }
82
+ }
83
+
84
+ public ConfigSource config()
85
+ {
86
+ return Exec.newConfigSource()
87
+ .set("in", inputConfig())
88
+ .set("parser", parserConfig(schemaConfig()))
89
+ .set("type", "elasticsearch")
90
+ .set("mode", "insert")
91
+ .set("nodes", ES_NODES)
92
+ .set("index", ES_INDEX)
93
+ .set("index_type", ES_INDEX_TYPE)
94
+ .set("id", ES_ID)
95
+ .set("bulk_actions", ES_BULK_ACTIONS)
96
+ .set("bulk_size", ES_BULK_SIZE)
97
+ .set("concurrent_requests", ES_CONCURRENT_REQUESTS);
98
+ }
99
+
100
+ public ImmutableMap<String, Object> inputConfig()
101
+ {
102
+ ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
103
+ builder.put("type", "file");
104
+ builder.put("path_prefix", PATH_PREFIX);
105
+ builder.put("last_path", "");
106
+ return builder.build();
107
+ }
108
+
109
+ public ImmutableMap<String, Object> parserConfig(ImmutableList<Object> schemaConfig)
110
+ {
111
+ ImmutableMap.Builder<String, Object> builder = new ImmutableMap.Builder<>();
112
+ builder.put("type", "csv");
113
+ builder.put("newline", "CRLF");
114
+ builder.put("delimiter", ",");
115
+ builder.put("quote", "\"");
116
+ builder.put("escape", "\"");
117
+ builder.put("trim_if_not_quoted", false);
118
+ builder.put("skip_header_lines", 1);
119
+ builder.put("allow_extra_columns", false);
120
+ builder.put("allow_optional_columns", false);
121
+ builder.put("columns", schemaConfig);
122
+ return builder.build();
123
+ }
124
+
125
+ public ImmutableList<Object> schemaConfig()
126
+ {
127
+ ImmutableList.Builder<Object> builder = new ImmutableList.Builder<>();
128
+ builder.add(ImmutableMap.of("name", "id", "type", "long"));
129
+ builder.add(ImmutableMap.of("name", "account", "type", "long"));
130
+ builder.add(ImmutableMap.of("name", "time", "type", "timestamp", "format", "%Y-%m-%d %H:%M:%S"));
131
+ builder.add(ImmutableMap.of("name", "purchase", "type", "timestamp", "format", "%Y%m%d"));
132
+ builder.add(ImmutableMap.of("name", "flg", "type", "boolean"));
133
+ builder.add(ImmutableMap.of("name", "score", "type", "double"));
134
+ builder.add(ImmutableMap.of("name", "comment", "type", "string"));
135
+ return builder.build();
136
+ }
137
+
138
+ public Jetty92RetryHelper createRetryHelper()
139
+ {
140
+ return new Jetty92RetryHelper(
141
+ 2,
142
+ 1000,
143
+ 32000,
144
+ new Jetty92ClientCreator() {
145
+ @Override
146
+ public org.eclipse.jetty.client.HttpClient createAndStart()
147
+ {
148
+ org.eclipse.jetty.client.HttpClient client = new org.eclipse.jetty.client.HttpClient(new SslContextFactory());
149
+ try {
150
+ client.start();
151
+ return client;
152
+ }
153
+ catch (Exception e) {
154
+ throw Throwables.propagate(e);
155
+ }
156
+ }
157
+ });
158
+ }
159
+ }