embulk-filter-speedometer 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_filter(
2
+ "speedometer", "org.embulk.filter.SpeedometerFilterPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1,289 @@
1
+ package org.embulk.filter;
2
+
3
+ import javax.validation.constraints.Min;
4
+
5
+ import org.embulk.config.Config;
6
+ import org.embulk.config.ConfigDefault;
7
+ import org.embulk.config.ConfigInject;
8
+ import org.embulk.config.ConfigSource;
9
+ import org.embulk.config.Task;
10
+ import org.embulk.config.TaskSource;
11
+ import org.embulk.spi.BufferAllocator;
12
+ import org.embulk.spi.Column;
13
+ import org.embulk.spi.ColumnVisitor;
14
+ import org.embulk.spi.FilterPlugin;
15
+ import org.embulk.spi.Page;
16
+ import org.embulk.spi.PageBuilder;
17
+ import org.embulk.spi.PageOutput;
18
+ import org.embulk.spi.PageReader;
19
+ import org.embulk.spi.Schema;
20
+ import org.embulk.spi.time.Timestamp;
21
+ import org.embulk.spi.time.TimestampFormatter;
22
+ import org.embulk.spi.type.TimestampType;
23
+
24
+ import com.google.common.collect.ImmutableMap;
25
+
26
+ public class SpeedometerFilterPlugin
27
+ implements FilterPlugin
28
+ {
29
+ private static final int TRUE_LENGTH = Boolean.toString(true).length();
30
+ private static final int FALSE_LENGTH = Boolean.toString(false).length();
31
+
32
+ public interface PluginTask
33
+ extends Task, TimestampFormatter.FormatterTask
34
+ {
35
+ @Config("speed_limit")
36
+ @ConfigDefault("0")
37
+ @Min(0)
38
+ public long getSpeedLimit();
39
+
40
+ @Config("max_sleep_millisec")
41
+ @ConfigDefault("1000")
42
+ @Min(0)
43
+ public int getMaxSleepMillisec();
44
+
45
+ @Config("delimiter")
46
+ @ConfigDefault("\",\"")
47
+ public String getDelimiter();
48
+
49
+ @Config("record_padding_size")
50
+ @ConfigDefault("1")
51
+ public int getRecordPaddingSize();
52
+
53
+ @Config("log_interval_seconds")
54
+ @ConfigDefault("10")
55
+ @Min(0)
56
+ public int getLogIntervalSeconds();
57
+
58
+ @ConfigInject
59
+ public BufferAllocator getBufferAllocator();
60
+ }
61
+
62
+ @Override
63
+ public void transaction(ConfigSource config, Schema inputSchema,
64
+ FilterPlugin.Control control)
65
+ {
66
+ PluginTask task = config.loadConfig(PluginTask.class);
67
+ Schema outputSchema = inputSchema;
68
+ control.run(task.dump(), outputSchema);
69
+ }
70
+
71
+ @Override
72
+ public PageOutput open(TaskSource taskSource, Schema inputSchema,
73
+ Schema outputSchema, PageOutput output)
74
+ {
75
+ PluginTask task = taskSource.loadTask(PluginTask.class);
76
+
77
+ return new SpeedControlPageOutput(task, inputSchema, output);
78
+ }
79
+
80
+ static class SpeedControlPageOutput implements PageOutput {
81
+ private final SpeedometerSpeedController controller;
82
+ private final Schema schema;
83
+ private final ImmutableMap<Column, TimestampFormatter> timestampMap;
84
+ private final PageOutput pageOutput;
85
+ private final PageOutput addOnlyPageOutput;
86
+ private final PageReader pageReader;
87
+ private final BufferAllocator allocator;
88
+ private final int delimiterLength;
89
+ private final int recordPaddingSize;
90
+
91
+ SpeedControlPageOutput(PluginTask task, Schema schema, PageOutput pageOutput) {
92
+ this.controller = new SpeedometerSpeedController(task, SpeedometerSpeedAggregator.getInstance());
93
+ this.schema = schema;
94
+ this.pageOutput = pageOutput;
95
+ this.addOnlyPageOutput = new AddOnlyPageOutput(pageOutput);
96
+ this.allocator = task.getBufferAllocator();
97
+ this.delimiterLength = task.getDelimiter().length();
98
+ this.recordPaddingSize = task.getRecordPaddingSize();
99
+ pageReader = new PageReader(schema);
100
+ timestampMap = buildTimestampFormatterMap(task, schema);
101
+ }
102
+
103
+ @Override
104
+ public void add(Page page) {
105
+ try (final PageBuilder pageBuilder = new PageBuilder(allocator, schema, addOnlyPageOutput)) {
106
+ ColumnVisitorImpl visitor = new ColumnVisitorImpl(pageBuilder);
107
+ pageReader.setPage(page);
108
+ while (pageReader.nextRecord()) {
109
+ visitor.speedMonitorStartRecord();
110
+ schema.visitColumns(visitor);
111
+ visitor.speedMonitorEndRecord();
112
+ pageBuilder.addRecord();
113
+ }
114
+ pageBuilder.finish();
115
+ }
116
+ }
117
+
118
+ @Override
119
+ public void finish() {
120
+ pageOutput.finish();
121
+ }
122
+
123
+ @Override
124
+ public void close() {
125
+ controller.stop();
126
+ pageReader.close();
127
+ pageOutput.close();
128
+ }
129
+
130
+ private ImmutableMap<Column, TimestampFormatter> buildTimestampFormatterMap(final PluginTask task, Schema schema) {
131
+ final ImmutableMap.Builder<Column, TimestampFormatter> builder = new ImmutableMap.Builder<>();
132
+
133
+ schema.visitColumns(new ColumnVisitor() {
134
+ @Override
135
+ public void booleanColumn(Column column) { }
136
+
137
+ @Override
138
+ public void longColumn(Column column) { }
139
+
140
+ @Override
141
+ public void doubleColumn(Column column) { }
142
+
143
+ @Override
144
+ public void stringColumn(Column column) { }
145
+
146
+ @Override
147
+ public void timestampColumn(Column column) {
148
+ if (column.getType() instanceof TimestampType) {
149
+ TimestampType tt = (TimestampType) column.getType();
150
+ builder.put(column, new TimestampFormatter(tt.getFormat(), task));
151
+ } else {
152
+ throw new RuntimeException("Timestamp should be TimestampType.");
153
+ }
154
+ }
155
+ });
156
+
157
+ return builder.build();
158
+ }
159
+
160
+ // Ignore finish and close to avoid closing upper output stream.
161
+ static class AddOnlyPageOutput implements PageOutput {
162
+ protected final PageOutput output;
163
+
164
+ public AddOnlyPageOutput(PageOutput outptut) {
165
+ this.output = outptut;
166
+ }
167
+
168
+ @Override
169
+ public void add(Page page) {
170
+ output.add(page);
171
+ }
172
+
173
+ @Override
174
+ public void finish() { }
175
+
176
+ @Override
177
+ public void close() { }
178
+ }
179
+
180
+ class ColumnVisitorImpl implements ColumnVisitor {
181
+ private final PageBuilder pageBuilder;
182
+ private long startRecordTime;
183
+
184
+ ColumnVisitorImpl(PageBuilder pageBuilder) {
185
+ this.pageBuilder = pageBuilder;
186
+ }
187
+
188
+ @Override
189
+ public void booleanColumn(Column column) {
190
+ if (pageReader.isNull(column)) {
191
+ speedMonitor(column);
192
+ pageBuilder.setNull(column);
193
+ } else {
194
+ pageBuilder.setBoolean(column, speedMonitor(column, pageReader.getBoolean(column)));
195
+ }
196
+ }
197
+
198
+ @Override
199
+ public void longColumn(Column column) {
200
+ if (pageReader.isNull(column)) {
201
+ speedMonitor(column);
202
+ pageBuilder.setNull(column);
203
+ } else {
204
+ pageBuilder.setLong(column, speedMonitor(column, pageReader.getLong(column)));
205
+ }
206
+ }
207
+
208
+ @Override
209
+ public void doubleColumn(Column column) {
210
+ if (pageReader.isNull(column)) {
211
+ speedMonitor(column);
212
+ pageBuilder.setNull(column);
213
+ } else {
214
+ pageBuilder.setDouble(column, speedMonitor(column, pageReader.getDouble(column)));
215
+ }
216
+ }
217
+
218
+ @Override
219
+ public void stringColumn(Column column) {
220
+ if (pageReader.isNull(column)) {
221
+ speedMonitor(column);
222
+ pageBuilder.setNull(column);
223
+ } else {
224
+ pageBuilder.setString(column, speedMonitor(column, pageReader.getString(column)));
225
+ }
226
+ }
227
+
228
+ @Override
229
+ public void timestampColumn(Column column) {
230
+ if (pageReader.isNull(column)) {
231
+ speedMonitor(column);
232
+ pageBuilder.setNull(column);
233
+ } else {
234
+ pageBuilder.setTimestamp(column, speedMonitor(column, pageReader.getTimestamp(column)));
235
+ }
236
+ }
237
+
238
+ private void speedMonitorStartRecord() {
239
+ startRecordTime = System.currentTimeMillis();
240
+ }
241
+
242
+ private void speedMonitorEndRecord() {
243
+ controller.checkSpeedLimit(startRecordTime, recordPaddingSize);
244
+ }
245
+
246
+ // For null column
247
+ private void speedMonitor(Column column) {
248
+ speedMonitorForDelimiter(column);
249
+ }
250
+
251
+ private boolean speedMonitor(Column column, boolean b) {
252
+ speedMonitorForDelimiter(column);
253
+ controller.checkSpeedLimit(startRecordTime, b ? TRUE_LENGTH : FALSE_LENGTH);
254
+ return b;
255
+ }
256
+
257
+ private long speedMonitor(Column column, long l) {
258
+ speedMonitorForDelimiter(column);
259
+ controller.checkSpeedLimit(startRecordTime, String.valueOf(l).length());
260
+ return l;
261
+ }
262
+
263
+ private double speedMonitor(Column column, double d) {
264
+ speedMonitorForDelimiter(column);
265
+ controller.checkSpeedLimit(startRecordTime, String.valueOf(d).length());
266
+ return d;
267
+ }
268
+
269
+ private String speedMonitor(Column column, String s) {
270
+ speedMonitorForDelimiter(column);
271
+ controller.checkSpeedLimit(startRecordTime, s.length());
272
+ return s;
273
+ }
274
+
275
+ private Timestamp speedMonitor(Column column, Timestamp t) {
276
+ speedMonitorForDelimiter(column);
277
+ TimestampFormatter formatter = timestampMap.get(column);
278
+ controller.checkSpeedLimit(startRecordTime, formatter.format(t).length());
279
+ return t;
280
+ }
281
+
282
+ private void speedMonitorForDelimiter(Column column) {
283
+ if (column.getIndex() > 0) {
284
+ controller.checkSpeedLimit(startRecordTime, delimiterLength);
285
+ }
286
+ }
287
+ }
288
+ }
289
+ }
@@ -0,0 +1,141 @@
1
+ package org.embulk.filter;
2
+
3
+ import java.util.ArrayList;
4
+ import java.util.List;
5
+ import java.util.concurrent.atomic.AtomicInteger;
6
+ import java.util.concurrent.atomic.AtomicLong;
7
+
8
+ import org.embulk.spi.Exec;
9
+ import org.slf4j.Logger;
10
+
11
+ class SpeedometerSpeedAggregator {
12
+ private static class SpeedometerSpeedAggregatorHolder {
13
+ private static final SpeedometerSpeedAggregator INSTANCE = new SpeedometerSpeedAggregator();
14
+ }
15
+
16
+ private final long INITAL_START_TIME = 0;
17
+
18
+ private final AtomicInteger activeControllerCount = new AtomicInteger(0);
19
+ private final AtomicLong globalStartTime = new AtomicLong(INITAL_START_TIME);
20
+ private final AtomicLong globalTotalBytes = new AtomicLong(0);
21
+ private final AtomicLong previousLogReportTimeMillisec = new AtomicLong(INITAL_START_TIME);
22
+
23
+ // TODO: We can use google's library.
24
+ private final List<SpeedometerSpeedController> controllerList = new ArrayList<>();
25
+
26
+ public static SpeedometerSpeedAggregator getInstance() {
27
+ return SpeedometerSpeedAggregatorHolder.INSTANCE;
28
+ }
29
+
30
+ public SpeedometerSpeedAggregator() {
31
+ showLogMessage(activeControllerCount.get(), 0, 0, 0);
32
+ }
33
+
34
+ public void startController(SpeedometerSpeedController controller, long nowTime) {
35
+ globalStartTime.compareAndSet(INITAL_START_TIME, nowTime);
36
+ activeControllerCount.incrementAndGet();
37
+ synchronized (controllerList) {
38
+ controllerList.add(controller);
39
+ }
40
+ }
41
+
42
+ public void stopController(SpeedometerSpeedController controller) {
43
+ synchronized (controllerList) {
44
+ controllerList.remove(controller);
45
+ }
46
+ long runningCount = activeControllerCount.decrementAndGet();
47
+ globalTotalBytes.addAndGet(controller.getTotalBytes());
48
+
49
+ // NOTE: Sometimes, there is no running thread nevertheless there are remaining tasks.
50
+ // So, this message may be output while running tasks.
51
+ if (runningCount == 0) {
52
+ showOverallMessage();
53
+ }
54
+ }
55
+
56
+ public long getSpeedLimitForController(SpeedometerSpeedController controller) {
57
+ return controller.getSpeedLimit() / activeControllerCount.get();
58
+ }
59
+
60
+ public void checkProgress(long nowTime, int logIntervalMillisec) {
61
+ if (logIntervalMillisec <= 0) {
62
+ return;
63
+ }
64
+
65
+ long previousTime = previousLogReportTimeMillisec.get();
66
+ if (previousTime == INITAL_START_TIME) {
67
+ previousLogReportTimeMillisec.compareAndSet(INITAL_START_TIME, nowTime);
68
+ } else {
69
+ long nowInterval = previousTime + logIntervalMillisec;
70
+ if (nowInterval < nowTime) {
71
+ if (previousLogReportTimeMillisec.compareAndSet(previousTime, nowTime)) {
72
+ showProgressMessage(nowTime);
73
+ renewPeriods();
74
+ }
75
+ }
76
+ }
77
+ }
78
+
79
+ Logger getLogger() {
80
+ return Exec.getLogger(SpeedometerFilterPlugin.class);
81
+ }
82
+
83
+ long getGlobalStartTime() {
84
+ return globalStartTime.get();
85
+ }
86
+
87
+ int getActiveControllerCount() {
88
+ return activeControllerCount.get();
89
+ }
90
+
91
+ long getGlobalTotalBytes() {
92
+ return globalTotalBytes.get();
93
+ }
94
+
95
+ List<SpeedometerSpeedController> getControllerList() {
96
+ List<SpeedometerSpeedController> copyList;
97
+ synchronized (controllerList) {
98
+ copyList = new ArrayList<SpeedometerSpeedController>(controllerList);
99
+ }
100
+ return copyList;
101
+ }
102
+
103
+ private void renewPeriods() {
104
+ for (SpeedometerSpeedController controller : getControllerList()) {
105
+ controller.renewPeriod();
106
+ }
107
+ }
108
+
109
+ private void showProgressMessage(long nowTime) {
110
+ long currentTotalSize = globalTotalBytes.get();
111
+ long currentBytesPerSec = 0;
112
+ for (SpeedometerSpeedController controller : getControllerList()) {
113
+ currentTotalSize += controller.getTotalBytes();
114
+ currentBytesPerSec += controller.getPeriodBytesPerSec(nowTime);
115
+ }
116
+
117
+ long timeDelta = nowTime - globalStartTime.get();
118
+ timeDelta = timeDelta > 0 ? timeDelta : 1;
119
+
120
+ showLogMessage(activeControllerCount.get(), currentTotalSize, timeDelta, currentBytesPerSec);
121
+ }
122
+
123
+ private void showOverallMessage() {
124
+ long timeDelta = System.currentTimeMillis() - globalStartTime.get();
125
+ timeDelta = timeDelta > 0 ? timeDelta : 1;
126
+ long bytesPerSec = (globalTotalBytes.get() * 1000) / timeDelta;
127
+
128
+ showLogMessage(activeControllerCount.get(), globalTotalBytes.get(), timeDelta, bytesPerSec);
129
+ }
130
+
131
+ private void showLogMessage(int activeThreads, long totalBytes, long timeMilliSec, long bytesPerSec) {
132
+ Logger logger = getLogger();
133
+ if (logger != null) {
134
+ logger.info(String.format("{speedometer: {active: %d, total: %s, sec: %s, speed: %s/s}}",
135
+ activeThreads,
136
+ SpeedometerUtil.toNumberText(totalBytes),
137
+ SpeedometerUtil.toTimeText(timeMilliSec),
138
+ SpeedometerUtil.toNumberText(bytesPerSec)));
139
+ }
140
+ }
141
+ }
@@ -0,0 +1,107 @@
1
+ package org.embulk.filter;
2
+
3
+ import org.embulk.filter.SpeedometerFilterPlugin.PluginTask;
4
+
5
+
6
+ class SpeedometerSpeedController {
7
+ private final SpeedometerSpeedAggregator aggregator;
8
+
9
+ private final long limitBytesPerSec;
10
+ private final int maxSleepMillisec;
11
+ private final int logIntervalMillisec;
12
+
13
+ private long startTime;
14
+ private volatile long periodStartTime;
15
+ private volatile long periodTotalBytes;
16
+ private volatile long threadTotalBytes;
17
+ private volatile boolean renewFlag = true;
18
+
19
+ SpeedometerSpeedController(PluginTask task, SpeedometerSpeedAggregator aggregator) {
20
+ this.limitBytesPerSec = task.getSpeedLimit();
21
+ this.maxSleepMillisec = task.getMaxSleepMillisec();
22
+ this.logIntervalMillisec = task.getLogIntervalSeconds() * 1000;
23
+ this.aggregator = aggregator;
24
+ }
25
+
26
+ public void stop() {
27
+ startNewPeriod(0);
28
+ aggregator.stopController(this);
29
+ }
30
+
31
+ public long getSpeedLimit() {
32
+ return limitBytesPerSec;
33
+ }
34
+
35
+ public int getMaxSleepMillisec() {
36
+ return maxSleepMillisec;
37
+ }
38
+
39
+ public int getLogIntervalMillisec() {
40
+ return logIntervalMillisec;
41
+ }
42
+
43
+ public long getTotalBytes() {
44
+ return threadTotalBytes + periodTotalBytes;
45
+ }
46
+
47
+ public long getPeriodBytesPerSec(long nowTime) {
48
+ long timeDeltaMillisec = nowTime - periodStartTime;
49
+ if (timeDeltaMillisec <= 0) {
50
+ timeDeltaMillisec = 1;
51
+ }
52
+ return (periodTotalBytes * 1000) / timeDeltaMillisec;
53
+ }
54
+
55
+ public void checkSpeedLimit(long nowTime, long newDataSize) {
56
+ if (startTime == 0) {
57
+ startTime = nowTime;
58
+ aggregator.startController(this, startTime);
59
+ }
60
+
61
+ if (renewFlag) {
62
+ renewFlag = false;
63
+ startNewPeriod(nowTime);
64
+ }
65
+
66
+ periodTotalBytes += newDataSize;
67
+ aggregator.checkProgress(nowTime, logIntervalMillisec);
68
+
69
+ if (limitBytesPerSec <= 0) {
70
+ return;
71
+ }
72
+
73
+ long speedLimitForThread = aggregator.getSpeedLimitForController(this);
74
+ long timeDeltaMillisec = nowTime > periodStartTime ? nowTime - periodStartTime : 1;
75
+ long bytesPerSec = (periodTotalBytes * 1000) / timeDeltaMillisec;
76
+ long overBytes = bytesPerSec - speedLimitForThread;
77
+
78
+ if (overBytes > 0) {
79
+ try {
80
+ long sleepTime = (periodTotalBytes * 1000) / speedLimitForThread - timeDeltaMillisec;
81
+ sleepTime = sleepTime > maxSleepMillisec ? maxSleepMillisec : sleepTime > 0 ? sleepTime : 0;
82
+ Thread.sleep(sleepTime);
83
+ } catch (InterruptedException e) {
84
+ // TODO: Do I need to throw an exception ?
85
+ }
86
+ }
87
+ }
88
+
89
+ void renewPeriod() {
90
+ renewFlag = true;
91
+ }
92
+
93
+ boolean isRenewPeriodSet() {
94
+ return renewFlag;
95
+ }
96
+
97
+ SpeedometerSpeedAggregator getAggregator() {
98
+ return aggregator;
99
+ }
100
+
101
+ private void startNewPeriod(long newPeriodTime) {
102
+ threadTotalBytes += periodTotalBytes;
103
+ periodTotalBytes = 0;
104
+ periodStartTime = newPeriodTime;
105
+ }
106
+ }
107
+