embulk-filter-copy 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +21 -0
- data/README.md +67 -0
- data/build.gradle +104 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/example/config.yml +28 -0
- data/example/data.tsv +5 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +169 -0
- data/gradlew.bat +84 -0
- data/lib/embulk/filter/copy.rb +8 -0
- data/settings.gradle +1 -0
- data/src/main/java/org/embulk/filter/copy/CopyFilterPlugin.java +149 -0
- data/src/main/java/org/embulk/filter/copy/forward/ForwardBaseTask.java +17 -0
- data/src/main/java/org/embulk/filter/copy/forward/InForwardEventReader.java +147 -0
- data/src/main/java/org/embulk/filter/copy/forward/InForwardService.java +187 -0
- data/src/main/java/org/embulk/filter/copy/forward/InForwardVisitor.java +63 -0
- data/src/main/java/org/embulk/filter/copy/forward/OutForwardEventBuilder.java +135 -0
- data/src/main/java/org/embulk/filter/copy/forward/OutForwardService.java +170 -0
- data/src/main/java/org/embulk/filter/copy/forward/OutForwardVisitor.java +63 -0
- data/src/main/java/org/embulk/filter/copy/plugin/InternalForwardInputPlugin.java +111 -0
- data/src/main/java/org/embulk/filter/copy/service/EmbulkExecutorService.java +111 -0
- data/src/main/java/org/embulk/filter/copy/service/StandardColumnVisitor.java +64 -0
- data/src/main/java/org/embulk/filter/copy/util/ElapsedTime.java +165 -0
- data/src/test/java/org/embulk/filter/copy/TestCopyFilterPlugin.java +5 -0
- data/src/test/java/org/embulk/filter/copy/plugin/TestInternalForwardInputPlugin.java +5 -0
- metadata +111 -0
@@ -0,0 +1,170 @@
|
|
1
|
+
package org.embulk.filter.copy.forward;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import com.google.common.collect.Maps;
|
5
|
+
import org.embulk.config.Config;
|
6
|
+
import org.embulk.config.ConfigDefault;
|
7
|
+
import org.embulk.spi.Exec;
|
8
|
+
import org.komamitsu.fluency.Fluency;
|
9
|
+
import org.slf4j.Logger;
|
10
|
+
|
11
|
+
import java.io.IOException;
|
12
|
+
import java.util.Map;
|
13
|
+
|
14
|
+
public class OutForwardService
|
15
|
+
{
|
16
|
+
private final static Logger logger = Exec.getLogger(OutForwardService.class);
|
17
|
+
|
18
|
+
public interface OutForwardTask
|
19
|
+
extends org.embulk.config.Task
|
20
|
+
{
|
21
|
+
@Config("host")
|
22
|
+
@ConfigDefault("\"localhost\"")
|
23
|
+
String getHost();
|
24
|
+
|
25
|
+
@Config("port")
|
26
|
+
@ConfigDefault("24224")
|
27
|
+
int getPort();
|
28
|
+
|
29
|
+
@Config("max_buffer_size")
|
30
|
+
@ConfigDefault("null")
|
31
|
+
Optional<Long> getMaxBufferSize();
|
32
|
+
|
33
|
+
@Config("buffer_chunk_initial_size")
|
34
|
+
@ConfigDefault("null")
|
35
|
+
Optional<Integer> getBufferChunkInitialSize();
|
36
|
+
|
37
|
+
@Config("buffer_chunk_retention_size")
|
38
|
+
@ConfigDefault("null")
|
39
|
+
Optional<Integer> getBufferChunkRetentionSize();
|
40
|
+
|
41
|
+
@Config("flush_interval_millis")
|
42
|
+
@ConfigDefault("null")
|
43
|
+
Optional<Integer> getFlushIntervalMillis();
|
44
|
+
|
45
|
+
@Config("sender_max_retry_count")
|
46
|
+
@ConfigDefault("null")
|
47
|
+
Optional<Integer> getSenderMaxRetryCount();
|
48
|
+
|
49
|
+
@Config("ack_response_mode")
|
50
|
+
@ConfigDefault("null")
|
51
|
+
Optional<Boolean> getAckResponseMode();
|
52
|
+
|
53
|
+
@Config("file_backup_dir")
|
54
|
+
@ConfigDefault("null")
|
55
|
+
Optional<String> getFileBackupDir();
|
56
|
+
|
57
|
+
@Config("wait_until_buffer_flushed")
|
58
|
+
@ConfigDefault("null")
|
59
|
+
Optional<Integer> getWaitUntilBufferFlushed();
|
60
|
+
|
61
|
+
@Config("wait_until_flusher_terminated")
|
62
|
+
@ConfigDefault("null")
|
63
|
+
Optional<Integer> getWaitUntilFlusherTerminated();
|
64
|
+
}
|
65
|
+
|
66
|
+
public interface Task
|
67
|
+
extends ForwardBaseTask
|
68
|
+
{
|
69
|
+
@Config("out_forward")
|
70
|
+
@ConfigDefault("{}")
|
71
|
+
OutForwardTask getOutForwardTask();
|
72
|
+
}
|
73
|
+
|
74
|
+
public static void sendShutdownMessage(Task task)
|
75
|
+
{
|
76
|
+
logger.info("out_forward: send shutdown message.");
|
77
|
+
OutForwardService outForward = new OutForwardService(task);
|
78
|
+
outForward.emit(task.getShutdownTag(), Maps.newHashMap());
|
79
|
+
outForward.finish();
|
80
|
+
outForward.close();
|
81
|
+
}
|
82
|
+
|
83
|
+
private final Task task;
|
84
|
+
private final Fluency client;
|
85
|
+
|
86
|
+
public OutForwardService(Task task)
|
87
|
+
{
|
88
|
+
this.task = task;
|
89
|
+
this.client = newFluency(task.getOutForwardTask());
|
90
|
+
}
|
91
|
+
|
92
|
+
private Fluency.Config configureFluencyConfig(OutForwardTask t)
|
93
|
+
{
|
94
|
+
Fluency.Config c = new Fluency.Config();
|
95
|
+
if (t.getMaxBufferSize().isPresent()) {
|
96
|
+
c.setMaxBufferSize(t.getMaxBufferSize().get());
|
97
|
+
}
|
98
|
+
if (t.getBufferChunkInitialSize().isPresent()) {
|
99
|
+
c.setBufferChunkInitialSize(t.getBufferChunkInitialSize().get());
|
100
|
+
}
|
101
|
+
if (t.getBufferChunkRetentionSize().isPresent()) {
|
102
|
+
c.setBufferChunkRetentionSize(t.getBufferChunkRetentionSize().get());
|
103
|
+
}
|
104
|
+
if (t.getFlushIntervalMillis().isPresent()) {
|
105
|
+
c.setFlushIntervalMillis(t.getFlushIntervalMillis().get());
|
106
|
+
}
|
107
|
+
if (t.getSenderMaxRetryCount().isPresent()) {
|
108
|
+
c.setSenderMaxRetryCount(t.getSenderMaxRetryCount().get());
|
109
|
+
}
|
110
|
+
if (t.getAckResponseMode().isPresent()) {
|
111
|
+
c.setAckResponseMode(t.getAckResponseMode().get());
|
112
|
+
}
|
113
|
+
if (t.getFileBackupDir().isPresent()) {
|
114
|
+
c.setFileBackupDir(t.getFileBackupDir().get());
|
115
|
+
}
|
116
|
+
if (t.getWaitUntilBufferFlushed().isPresent()) {
|
117
|
+
c.setWaitUntilBufferFlushed(t.getWaitUntilBufferFlushed().get());
|
118
|
+
}
|
119
|
+
if (t.getWaitUntilFlusherTerminated().isPresent()) {
|
120
|
+
c.setWaitUntilFlusherTerminated(t.getWaitUntilFlusherTerminated().get());
|
121
|
+
}
|
122
|
+
return c;
|
123
|
+
}
|
124
|
+
|
125
|
+
private Fluency newFluency(OutForwardTask t)
|
126
|
+
{
|
127
|
+
Fluency.Config c = configureFluencyConfig(t);
|
128
|
+
try {
|
129
|
+
return Fluency.defaultFluency(t.getHost(), t.getPort(), c);
|
130
|
+
}
|
131
|
+
catch (IOException e) {
|
132
|
+
throw new RuntimeException(e);
|
133
|
+
}
|
134
|
+
}
|
135
|
+
|
136
|
+
public void emit(String tag, Map<String, Object> message)
|
137
|
+
{
|
138
|
+
try {
|
139
|
+
client.emit(tag, message);
|
140
|
+
}
|
141
|
+
catch (IOException e) {
|
142
|
+
throw new RuntimeException(e);
|
143
|
+
}
|
144
|
+
}
|
145
|
+
|
146
|
+
public void emit(Map<String, Object> message)
|
147
|
+
{
|
148
|
+
emit(task.getMessageTag(), message);
|
149
|
+
}
|
150
|
+
|
151
|
+
public void finish()
|
152
|
+
{
|
153
|
+
try {
|
154
|
+
client.flush();
|
155
|
+
}
|
156
|
+
catch (IOException e) {
|
157
|
+
throw new RuntimeException(e);
|
158
|
+
}
|
159
|
+
}
|
160
|
+
|
161
|
+
public void close()
|
162
|
+
{
|
163
|
+
try {
|
164
|
+
client.close();
|
165
|
+
}
|
166
|
+
catch (IOException e) {
|
167
|
+
throw new RuntimeException(e);
|
168
|
+
}
|
169
|
+
}
|
170
|
+
}
|
@@ -0,0 +1,63 @@
|
|
1
|
+
package org.embulk.filter.copy.forward;
|
2
|
+
|
3
|
+
import org.embulk.spi.Column;
|
4
|
+
import org.embulk.spi.ColumnVisitor;
|
5
|
+
import org.embulk.spi.PageReader;
|
6
|
+
|
7
|
+
public class OutForwardVisitor
|
8
|
+
implements ColumnVisitor
|
9
|
+
{
|
10
|
+
private final PageReader reader;
|
11
|
+
private final OutForwardEventBuilder builder;
|
12
|
+
|
13
|
+
public OutForwardVisitor(PageReader reader, OutForwardEventBuilder builder)
|
14
|
+
{
|
15
|
+
this.reader = reader;
|
16
|
+
this.builder = builder;
|
17
|
+
}
|
18
|
+
|
19
|
+
private void nullOr(Column column, Runnable r)
|
20
|
+
{
|
21
|
+
if (reader.isNull(column)) {
|
22
|
+
builder.setNull(column);
|
23
|
+
return;
|
24
|
+
}
|
25
|
+
r.run();
|
26
|
+
}
|
27
|
+
|
28
|
+
@Override
|
29
|
+
public void booleanColumn(Column column)
|
30
|
+
{
|
31
|
+
nullOr(column, () -> builder.setBoolean(column, reader.getBoolean(column)));
|
32
|
+
}
|
33
|
+
|
34
|
+
@Override
|
35
|
+
public void longColumn(Column column)
|
36
|
+
{
|
37
|
+
nullOr(column, () -> builder.setLong(column, reader.getLong(column)));
|
38
|
+
}
|
39
|
+
|
40
|
+
@Override
|
41
|
+
public void doubleColumn(Column column)
|
42
|
+
{
|
43
|
+
nullOr(column, () -> builder.setDouble(column, reader.getDouble(column)));
|
44
|
+
}
|
45
|
+
|
46
|
+
@Override
|
47
|
+
public void stringColumn(Column column)
|
48
|
+
{
|
49
|
+
nullOr(column, () -> builder.setString(column, reader.getString(column)));
|
50
|
+
}
|
51
|
+
|
52
|
+
@Override
|
53
|
+
public void timestampColumn(Column column)
|
54
|
+
{
|
55
|
+
nullOr(column, () -> builder.setTimestamp(column, reader.getTimestamp(column)));
|
56
|
+
}
|
57
|
+
|
58
|
+
@Override
|
59
|
+
public void jsonColumn(Column column)
|
60
|
+
{
|
61
|
+
nullOr(column, () -> builder.setJson(column, reader.getJson(column)));
|
62
|
+
}
|
63
|
+
}
|
@@ -0,0 +1,111 @@
|
|
1
|
+
package org.embulk.filter.copy.plugin;
|
2
|
+
|
3
|
+
import org.embulk.config.Config;
|
4
|
+
import org.embulk.config.ConfigDiff;
|
5
|
+
import org.embulk.config.ConfigInject;
|
6
|
+
import org.embulk.config.ConfigSource;
|
7
|
+
import org.embulk.config.Task;
|
8
|
+
import org.embulk.config.TaskReport;
|
9
|
+
import org.embulk.config.TaskSource;
|
10
|
+
import org.embulk.filter.copy.forward.InForwardEventReader;
|
11
|
+
import org.embulk.filter.copy.forward.InForwardService;
|
12
|
+
import org.embulk.filter.copy.forward.InForwardVisitor;
|
13
|
+
import org.embulk.spi.BufferAllocator;
|
14
|
+
import org.embulk.spi.Exec;
|
15
|
+
import org.embulk.spi.InputPlugin;
|
16
|
+
import org.embulk.spi.PageBuilder;
|
17
|
+
import org.embulk.spi.PageOutput;
|
18
|
+
import org.embulk.spi.Schema;
|
19
|
+
import org.embulk.spi.SchemaConfig;
|
20
|
+
import org.embulk.spi.time.TimestampParser;
|
21
|
+
import org.slf4j.Logger;
|
22
|
+
|
23
|
+
import java.util.List;
|
24
|
+
|
25
|
+
public class InternalForwardInputPlugin
|
26
|
+
implements InputPlugin
|
27
|
+
{
|
28
|
+
public final static String PLUGIN_NAME = "internal_forward";
|
29
|
+
private final static Logger logger = Exec.getLogger(InternalForwardInputPlugin.class);
|
30
|
+
|
31
|
+
public interface PluginTask
|
32
|
+
extends Task, TimestampParser.Task, InForwardService.Task
|
33
|
+
{
|
34
|
+
@Config("columns")
|
35
|
+
SchemaConfig getColumns();
|
36
|
+
|
37
|
+
@ConfigInject
|
38
|
+
BufferAllocator getBufferAllocator();
|
39
|
+
}
|
40
|
+
|
41
|
+
@Override
|
42
|
+
public ConfigDiff transaction(ConfigSource config,
|
43
|
+
InputPlugin.Control control)
|
44
|
+
{
|
45
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
46
|
+
|
47
|
+
Schema schema = task.getColumns().toSchema();
|
48
|
+
int taskCount = 1; // number of run() method calls
|
49
|
+
|
50
|
+
return resume(task.dump(), schema, taskCount, control);
|
51
|
+
}
|
52
|
+
|
53
|
+
@Override
|
54
|
+
public ConfigDiff resume(TaskSource taskSource,
|
55
|
+
Schema schema, int taskCount,
|
56
|
+
InputPlugin.Control control)
|
57
|
+
{
|
58
|
+
control.run(taskSource, schema, taskCount);
|
59
|
+
return Exec.newConfigDiff();
|
60
|
+
}
|
61
|
+
|
62
|
+
@Override
|
63
|
+
public void cleanup(TaskSource taskSource,
|
64
|
+
Schema schema, int taskCount,
|
65
|
+
List<TaskReport> successTaskReports)
|
66
|
+
{
|
67
|
+
}
|
68
|
+
|
69
|
+
@Override
|
70
|
+
public TaskReport run(TaskSource taskSource,
|
71
|
+
Schema schema, int taskIndex,
|
72
|
+
PageOutput output)
|
73
|
+
{
|
74
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
75
|
+
|
76
|
+
try (PageBuilder pageBuilder = new PageBuilder(task.getBufferAllocator(), schema, output)) {
|
77
|
+
TimestampParser timestampParser = new TimestampParser(
|
78
|
+
task.getJRuby(),
|
79
|
+
task.getDefaultTimestampFormat(),
|
80
|
+
task.getDefaultTimeZone());
|
81
|
+
InForwardEventReader eventReader = new InForwardEventReader(schema, timestampParser);
|
82
|
+
InForwardVisitor inForwardVisitor = new InForwardVisitor(eventReader, pageBuilder);
|
83
|
+
|
84
|
+
InForwardService.builder()
|
85
|
+
.task(task)
|
86
|
+
.forEachEventCallback(
|
87
|
+
event ->
|
88
|
+
{
|
89
|
+
// TODO: here is not thread-safe
|
90
|
+
eventReader.setEvent(event);
|
91
|
+
while (eventReader.nextMessage()) {
|
92
|
+
schema.visitColumns(inForwardVisitor);
|
93
|
+
pageBuilder.addRecord();
|
94
|
+
}
|
95
|
+
}
|
96
|
+
)
|
97
|
+
.build()
|
98
|
+
.runUntilShouldShutdown();
|
99
|
+
|
100
|
+
pageBuilder.finish();
|
101
|
+
}
|
102
|
+
|
103
|
+
return Exec.newTaskReport(); // TODO
|
104
|
+
}
|
105
|
+
|
106
|
+
@Override
|
107
|
+
public ConfigDiff guess(ConfigSource config)
|
108
|
+
{
|
109
|
+
return Exec.newConfigDiff();
|
110
|
+
}
|
111
|
+
}
|
@@ -0,0 +1,111 @@
|
|
1
|
+
package org.embulk.filter.copy.service;
|
2
|
+
|
3
|
+
import com.google.common.util.concurrent.FutureCallback;
|
4
|
+
import com.google.common.util.concurrent.Futures;
|
5
|
+
import com.google.common.util.concurrent.ListenableFuture;
|
6
|
+
import com.google.common.util.concurrent.ListeningExecutorService;
|
7
|
+
import com.google.common.util.concurrent.MoreExecutors;
|
8
|
+
import com.google.inject.Injector;
|
9
|
+
import org.embulk.EmbulkEmbed;
|
10
|
+
import org.embulk.config.ConfigException;
|
11
|
+
import org.embulk.config.ConfigSource;
|
12
|
+
import org.embulk.exec.ExecutionResult;
|
13
|
+
import org.embulk.filter.copy.util.ElapsedTime;
|
14
|
+
import org.embulk.guice.LifeCycleInjector;
|
15
|
+
import org.embulk.spi.Exec;
|
16
|
+
import org.slf4j.Logger;
|
17
|
+
|
18
|
+
import javax.annotation.Nullable;
|
19
|
+
|
20
|
+
import java.lang.reflect.Constructor;
|
21
|
+
import java.lang.reflect.InvocationTargetException;
|
22
|
+
import java.util.concurrent.Callable;
|
23
|
+
import java.util.concurrent.Executors;
|
24
|
+
|
25
|
+
public class EmbulkExecutorService
|
26
|
+
{
|
27
|
+
private final static String THREAD_NAME = "embulk executor service";
|
28
|
+
private static final int NUM_THREADS = 1;
|
29
|
+
private final static Logger logger = Exec.getLogger(EmbulkExecutorService.class);
|
30
|
+
private final Injector injector;
|
31
|
+
private final ListeningExecutorService es;
|
32
|
+
private ListenableFuture<ExecutionResult> future;
|
33
|
+
|
34
|
+
public EmbulkExecutorService(Injector injector)
|
35
|
+
{
|
36
|
+
this.injector = injector;
|
37
|
+
this.es = MoreExecutors.listeningDecorator(
|
38
|
+
Executors.newFixedThreadPool(
|
39
|
+
NUM_THREADS,
|
40
|
+
r -> new Thread(r, THREAD_NAME)
|
41
|
+
));
|
42
|
+
}
|
43
|
+
|
44
|
+
public void executeAsync(ConfigSource config)
|
45
|
+
{
|
46
|
+
logger.debug("execute with this config: {}", config);
|
47
|
+
if (future != null) {
|
48
|
+
throw new IllegalStateException("executeAsync is already called.");
|
49
|
+
}
|
50
|
+
future = es.submit(embulkRun(config));
|
51
|
+
Futures.addCallback(future, resultFutureCallback());
|
52
|
+
}
|
53
|
+
|
54
|
+
public void shutdown()
|
55
|
+
{
|
56
|
+
ElapsedTime.info(
|
57
|
+
logger,
|
58
|
+
"embulk executor service shutdown",
|
59
|
+
es::shutdown);
|
60
|
+
}
|
61
|
+
|
62
|
+
public void waitExecutionFinished()
|
63
|
+
{
|
64
|
+
if (future == null) {
|
65
|
+
throw new NullPointerException();
|
66
|
+
}
|
67
|
+
|
68
|
+
ElapsedTime.debugUntil(() -> future.isDone() || future.isCancelled(),
|
69
|
+
logger, "embulk executor", 3000L);
|
70
|
+
}
|
71
|
+
|
72
|
+
private Callable<ExecutionResult> embulkRun(ConfigSource config)
|
73
|
+
{
|
74
|
+
return () -> newEmbulkEmbed(injector).run(config);
|
75
|
+
}
|
76
|
+
|
77
|
+
private EmbulkEmbed newEmbulkEmbed(Injector injector)
|
78
|
+
{
|
79
|
+
try {
|
80
|
+
Constructor<EmbulkEmbed> constructor = EmbulkEmbed.class
|
81
|
+
.getDeclaredConstructor(ConfigSource.class, LifeCycleInjector.class);
|
82
|
+
constructor.setAccessible(true);
|
83
|
+
return constructor.newInstance(Exec.newConfigSource(), injector);
|
84
|
+
}
|
85
|
+
catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException e) {
|
86
|
+
throw new ConfigException(e);
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
private FutureCallback<ExecutionResult> resultFutureCallback()
|
91
|
+
{
|
92
|
+
return new FutureCallback<ExecutionResult>()
|
93
|
+
{
|
94
|
+
@Override
|
95
|
+
public void onSuccess(@Nullable ExecutionResult result)
|
96
|
+
{
|
97
|
+
for (Throwable throwable : result.getIgnoredExceptions()) {
|
98
|
+
logger.warn("Ignored error ", throwable);
|
99
|
+
}
|
100
|
+
logger.info("Config diff: {}", result.getConfigDiff());
|
101
|
+
logger.debug("ExecutionResult: {}", result);
|
102
|
+
}
|
103
|
+
|
104
|
+
@Override
|
105
|
+
public void onFailure(Throwable t)
|
106
|
+
{
|
107
|
+
throw new RuntimeException(t);
|
108
|
+
}
|
109
|
+
};
|
110
|
+
}
|
111
|
+
}
|