embulk-filter-calc 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.travis.yml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +67 -0
- data/build.gradle +104 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/example/config.yml +36 -0
- data/example/config2.yml +32 -0
- data/example/csv/sample_01.csv.gz +0 -0
- data/example/seed.yml +5 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +160 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/filter/calc.rb +3 -0
- data/src/main/antlr/org/embulk/filter/calc/Calculator.g4 +52 -0
- data/src/main/java/org/embulk/filter/calc/CalcConfigCheckVisitor.java +32 -0
- data/src/main/java/org/embulk/filter/calc/CalcConfigChecker.java +38 -0
- data/src/main/java/org/embulk/filter/calc/CalcFilterPlugin.java +140 -0
- data/src/main/java/org/embulk/filter/calc/CalcFormulaVisitor.java +130 -0
- data/src/main/java/org/embulk/filter/calc/CalcVisitorImpl.java +141 -0
- data/src/main/java/org/embulk/filter/calc/Calculator.java +30 -0
- data/src/main/java/org/embulk/filter/calc/ConfigErrorListener.java +27 -0
- data/src/test/java/org/embulk/filter/calc/TestCalcFilterPlugin.java +107 -0
- data/src/test/java/org/embulk/filter/calc/TestCalcVisitorImpl.java +448 -0
- metadata +100 -0
@@ -0,0 +1,140 @@
|
|
1
|
+
package org.embulk.filter.calc;
|
2
|
+
|
3
|
+
import org.embulk.config.Config;
|
4
|
+
import org.embulk.config.ConfigDefault;
|
5
|
+
import org.embulk.config.ConfigSource;
|
6
|
+
import org.embulk.config.Task;
|
7
|
+
import org.embulk.config.TaskSource;
|
8
|
+
import org.embulk.spi.Column;
|
9
|
+
import org.embulk.spi.ColumnConfig;
|
10
|
+
import org.embulk.spi.Exec;
|
11
|
+
import org.embulk.spi.FilterPlugin;
|
12
|
+
import org.embulk.spi.Page;
|
13
|
+
import org.embulk.spi.PageBuilder;
|
14
|
+
import org.embulk.spi.PageOutput;
|
15
|
+
import org.embulk.spi.PageReader;
|
16
|
+
import org.embulk.spi.Schema;
|
17
|
+
import org.embulk.spi.SchemaConfigException;
|
18
|
+
import org.embulk.spi.type.Type;
|
19
|
+
import org.embulk.spi.type.Types;
|
20
|
+
|
21
|
+
import java.util.List;
|
22
|
+
|
23
|
+
import static java.util.Locale.ENGLISH;
|
24
|
+
|
25
|
+
public class CalcFilterPlugin
|
26
|
+
implements FilterPlugin
|
27
|
+
{
|
28
|
+
|
29
|
+
// private Object IOException;
|
30
|
+
|
31
|
+
public interface CalcConfig
|
32
|
+
extends Task
|
33
|
+
{
|
34
|
+
@Config("formula")
|
35
|
+
String getFormula();
|
36
|
+
|
37
|
+
@Config("name")
|
38
|
+
String getName();
|
39
|
+
}
|
40
|
+
|
41
|
+
public interface PluginTask
|
42
|
+
extends Task
|
43
|
+
{
|
44
|
+
|
45
|
+
@Config("columns")
|
46
|
+
public List<CalcConfig> getCalcConfig();
|
47
|
+
|
48
|
+
@Config("output_columns")
|
49
|
+
@ConfigDefault("[]")
|
50
|
+
public List<ColumnConfig> getOutputColumns();
|
51
|
+
}
|
52
|
+
|
53
|
+
@Override
|
54
|
+
public void transaction(ConfigSource config, Schema inputSchema,
|
55
|
+
FilterPlugin.Control control)
|
56
|
+
{
|
57
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
58
|
+
|
59
|
+
Schema outputSchema = buildOutputSchema(task, inputSchema);
|
60
|
+
for (CalcConfig calcConfig : task.getCalcConfig()) {
|
61
|
+
CalcConfigChecker calc = new CalcConfigChecker(calcConfig.getName(), calcConfig.getFormula(), outputSchema);
|
62
|
+
calc.validateFormula();
|
63
|
+
}
|
64
|
+
|
65
|
+
control.run(task.dump(), outputSchema);
|
66
|
+
}
|
67
|
+
|
68
|
+
static Schema buildOutputSchema(PluginTask task, Schema inputSchema)
|
69
|
+
{
|
70
|
+
Schema.Builder builder = Schema.builder();
|
71
|
+
for (Column inputColumns : inputSchema.getColumns()) {
|
72
|
+
builder.add(inputColumns.getName(), inputColumns.getType());
|
73
|
+
}
|
74
|
+
|
75
|
+
List<ColumnConfig> outputColumns = task.getOutputColumns();
|
76
|
+
for (ColumnConfig outputColumn : outputColumns) {
|
77
|
+
|
78
|
+
String name = outputColumn.getName();
|
79
|
+
Type type = outputColumn.getType();
|
80
|
+
Column inputColumn;
|
81
|
+
try {
|
82
|
+
inputColumn = inputSchema.lookupColumn(name);
|
83
|
+
}
|
84
|
+
catch (SchemaConfigException ex) {
|
85
|
+
inputColumn = null;
|
86
|
+
}
|
87
|
+
if (inputColumn != null) {
|
88
|
+
throw new SchemaConfigException(String.format(ENGLISH, "The column \"%s\" already exists.", name));
|
89
|
+
}
|
90
|
+
|
91
|
+
if (Types.DOUBLE.equals(type)) {
|
92
|
+
builder.add(name, Types.DOUBLE);
|
93
|
+
}
|
94
|
+
else if (Types.LONG.equals(type)) {
|
95
|
+
builder.add(name, Types.LONG);
|
96
|
+
}
|
97
|
+
else {
|
98
|
+
throw new SchemaConfigException(String.format(ENGLISH, "The column \"%s\" must specify either long or double.", name));
|
99
|
+
}
|
100
|
+
}
|
101
|
+
return builder.build();
|
102
|
+
}
|
103
|
+
|
104
|
+
@Override
|
105
|
+
public PageOutput open(TaskSource taskSource, final Schema inputSchema,
|
106
|
+
final Schema outputSchema, final PageOutput output)
|
107
|
+
{
|
108
|
+
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
109
|
+
|
110
|
+
return new PageOutput()
|
111
|
+
{
|
112
|
+
private PageReader pageReader = new PageReader(inputSchema);
|
113
|
+
private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
114
|
+
private CalcVisitorImpl visitor = new CalcVisitorImpl(task, inputSchema, outputSchema, pageReader, pageBuilder);
|
115
|
+
|
116
|
+
@Override
|
117
|
+
public void finish()
|
118
|
+
{
|
119
|
+
pageBuilder.finish();
|
120
|
+
}
|
121
|
+
|
122
|
+
@Override
|
123
|
+
public void close()
|
124
|
+
{
|
125
|
+
pageBuilder.close();
|
126
|
+
}
|
127
|
+
|
128
|
+
@Override
|
129
|
+
public void add(Page page)
|
130
|
+
{
|
131
|
+
pageReader.setPage(page);
|
132
|
+
|
133
|
+
while (pageReader.nextRecord()) {
|
134
|
+
outputSchema.visitColumns(visitor);
|
135
|
+
pageBuilder.addRecord();
|
136
|
+
}
|
137
|
+
}
|
138
|
+
};
|
139
|
+
}
|
140
|
+
}
|
@@ -0,0 +1,130 @@
|
|
1
|
+
package org.embulk.filter.calc;
|
2
|
+
|
3
|
+
import org.embulk.config.ConfigException;
|
4
|
+
import org.embulk.spi.Column;
|
5
|
+
import org.embulk.spi.Page;
|
6
|
+
import org.embulk.spi.PageReader;
|
7
|
+
import org.embulk.spi.Schema;
|
8
|
+
import org.embulk.spi.type.Types;
|
9
|
+
|
10
|
+
import static java.util.Locale.ENGLISH;
|
11
|
+
|
12
|
+
public class CalcFormulaVisitor
|
13
|
+
extends CalculatorBaseVisitor<Double>
|
14
|
+
{
|
15
|
+
private PageReader pageReader;
|
16
|
+
private Schema inputSchema;
|
17
|
+
|
18
|
+
public CalcFormulaVisitor(Schema inputSchema, PageReader pageReader)
|
19
|
+
{
|
20
|
+
this.pageReader = pageReader;
|
21
|
+
this.inputSchema = inputSchema;
|
22
|
+
}
|
23
|
+
|
24
|
+
@Override
|
25
|
+
public Double visitMulDivMod(CalculatorParser.MulDivModContext ctx)
|
26
|
+
{
|
27
|
+
Double left = visit(ctx.expr(0));
|
28
|
+
Double right = visit(ctx.expr(1));
|
29
|
+
|
30
|
+
if (left == null || right == null) {
|
31
|
+
return null;
|
32
|
+
}
|
33
|
+
else if (ctx.op.getType() == CalculatorParser.MUL) {
|
34
|
+
return left * right;
|
35
|
+
}
|
36
|
+
|
37
|
+
else if (ctx.op.getType() == CalculatorParser.DIV) {
|
38
|
+
return left / right;
|
39
|
+
}
|
40
|
+
else {
|
41
|
+
return left % right;
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
@Override
|
46
|
+
public Double visitAddSub(CalculatorParser.AddSubContext ctx)
|
47
|
+
{
|
48
|
+
Double left = visit(ctx.expr(0));
|
49
|
+
Double right = visit(ctx.expr(1));
|
50
|
+
if (left == null || right == null) {
|
51
|
+
return null;
|
52
|
+
}
|
53
|
+
else if (ctx.op.getType() == CalculatorParser.ADD) {
|
54
|
+
return left + right;
|
55
|
+
}
|
56
|
+
else {
|
57
|
+
return left - right;
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
@Override
|
62
|
+
public Double visitNumber(CalculatorParser.NumberContext ctx)
|
63
|
+
{
|
64
|
+
String id = ctx.NUM().getText();
|
65
|
+
|
66
|
+
return Double.parseDouble(id);
|
67
|
+
}
|
68
|
+
|
69
|
+
@Override
|
70
|
+
public Double visitIdentifier(CalculatorParser.IdentifierContext ctx)
|
71
|
+
{
|
72
|
+
String id = ctx.ID().getText();
|
73
|
+
Double val;
|
74
|
+
Column column = inputSchema.lookupColumn(id);
|
75
|
+
|
76
|
+
if (pageReader.isNull(column)) {
|
77
|
+
val = null;
|
78
|
+
}
|
79
|
+
else if (Types.DOUBLE.equals(column.getType())) {
|
80
|
+
val = pageReader.getDouble(column);
|
81
|
+
}
|
82
|
+
else if (Types.LONG.equals(column.getType())) {
|
83
|
+
Long v;
|
84
|
+
v = pageReader.getLong(column);
|
85
|
+
val = v.doubleValue();
|
86
|
+
}
|
87
|
+
else {
|
88
|
+
// throw
|
89
|
+
val = null;
|
90
|
+
}
|
91
|
+
return val;
|
92
|
+
}
|
93
|
+
|
94
|
+
@Override
|
95
|
+
public Double visitParen(CalculatorParser.ParenContext ctx)
|
96
|
+
{
|
97
|
+
return visit(ctx.expr());
|
98
|
+
}
|
99
|
+
|
100
|
+
@Override
|
101
|
+
public Double visitPower(CalculatorParser.PowerContext ctx)
|
102
|
+
{
|
103
|
+
Double left = visit(ctx.expr(0));
|
104
|
+
Double right = visit(ctx.expr(1));
|
105
|
+
if (left == null || right == null) {
|
106
|
+
return null;
|
107
|
+
}
|
108
|
+
|
109
|
+
return Math.pow(left, right);
|
110
|
+
}
|
111
|
+
|
112
|
+
// Scientific Functions
|
113
|
+
@Override
|
114
|
+
public Double visitFuncCos(CalculatorParser.FuncCosContext ctx)
|
115
|
+
{
|
116
|
+
return Math.cos(visit(ctx.expr()));
|
117
|
+
}
|
118
|
+
|
119
|
+
@Override
|
120
|
+
public Double visitFuncSin(CalculatorParser.FuncSinContext ctx)
|
121
|
+
{
|
122
|
+
return Math.sin(visit(ctx.expr()));
|
123
|
+
}
|
124
|
+
|
125
|
+
@Override
|
126
|
+
public Double visitFuncTan(CalculatorParser.FuncTanContext ctx)
|
127
|
+
{
|
128
|
+
return Math.tan(visit(ctx.expr()));
|
129
|
+
}
|
130
|
+
}
|
@@ -0,0 +1,141 @@
|
|
1
|
+
package org.embulk.filter.calc;
|
2
|
+
|
3
|
+
import org.embulk.spi.Column;
|
4
|
+
import org.embulk.spi.ColumnVisitor;
|
5
|
+
import org.embulk.spi.Exec;
|
6
|
+
import org.embulk.spi.PageBuilder;
|
7
|
+
import org.embulk.spi.PageReader;
|
8
|
+
import org.embulk.spi.Schema;
|
9
|
+
import org.slf4j.Logger;
|
10
|
+
|
11
|
+
import java.util.HashMap;
|
12
|
+
|
13
|
+
public class CalcVisitorImpl
|
14
|
+
implements ColumnVisitor
|
15
|
+
{
|
16
|
+
|
17
|
+
private static final Logger logger = Exec.getLogger(CalcFilterPlugin.class);
|
18
|
+
private final CalcFilterPlugin.PluginTask task;
|
19
|
+
private final Schema inputSchema;
|
20
|
+
private final Schema outputSchema;
|
21
|
+
private final PageReader pageReader;
|
22
|
+
private final PageBuilder pageBuilder;
|
23
|
+
private final HashMap<String, Calculator> calcMap = new HashMap<>();
|
24
|
+
|
25
|
+
CalcVisitorImpl(CalcFilterPlugin.PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader, PageBuilder pageBuilder)
|
26
|
+
{
|
27
|
+
this.task = task;
|
28
|
+
this.inputSchema = inputSchema;
|
29
|
+
this.outputSchema = outputSchema;
|
30
|
+
this.pageReader = pageReader;
|
31
|
+
this.pageBuilder = pageBuilder;
|
32
|
+
initializeCalcMap();
|
33
|
+
}
|
34
|
+
|
35
|
+
private void initializeCalcMap()
|
36
|
+
{
|
37
|
+
|
38
|
+
for (CalcFilterPlugin.CalcConfig calcConfig : task.getCalcConfig()) {
|
39
|
+
String name = calcConfig.getName();
|
40
|
+
String formula = calcConfig.getFormula();
|
41
|
+
Calculator calc = new Calculator(formula, inputSchema, pageReader);
|
42
|
+
calcMap.put(name, calc);
|
43
|
+
}
|
44
|
+
}
|
45
|
+
|
46
|
+
@Override
|
47
|
+
public void booleanColumn(Column outputColumn)
|
48
|
+
{
|
49
|
+
Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
|
50
|
+
if (pageReader.isNull(inputColumn)) {
|
51
|
+
pageBuilder.setNull(outputColumn);
|
52
|
+
}
|
53
|
+
else {
|
54
|
+
pageBuilder.setBoolean(outputColumn, pageReader.getBoolean(inputColumn));
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
@Override
|
59
|
+
public void longColumn(Column outputColumn)
|
60
|
+
{
|
61
|
+
Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
|
62
|
+
Calculator calc = calcMap.get(outputColumn.getName());
|
63
|
+
if (calc == null) {
|
64
|
+
if (pageReader.isNull(inputColumn)) {
|
65
|
+
pageBuilder.setNull(outputColumn);
|
66
|
+
}
|
67
|
+
else {
|
68
|
+
pageBuilder.setLong(outputColumn, pageReader.getLong(inputColumn));
|
69
|
+
}
|
70
|
+
}
|
71
|
+
else {
|
72
|
+
Double val = calc.calc();
|
73
|
+
if (val == null) {
|
74
|
+
pageBuilder.setNull(outputColumn);
|
75
|
+
}
|
76
|
+
else {
|
77
|
+
pageBuilder.setLong(outputColumn, val.longValue());
|
78
|
+
}
|
79
|
+
}
|
80
|
+
}
|
81
|
+
|
82
|
+
@Override
|
83
|
+
public void doubleColumn(Column outputColumn)
|
84
|
+
{
|
85
|
+
Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
|
86
|
+
Calculator calc = calcMap.get(outputColumn.getName());
|
87
|
+
if (calc == null) {
|
88
|
+
if (pageReader.isNull(inputColumn)) {
|
89
|
+
pageBuilder.setNull(outputColumn);
|
90
|
+
}
|
91
|
+
else {
|
92
|
+
pageBuilder.setDouble(outputColumn, pageReader.getDouble(inputColumn));
|
93
|
+
}
|
94
|
+
}
|
95
|
+
else {
|
96
|
+
Double val = calc.calc();
|
97
|
+
if (val == null) {
|
98
|
+
pageBuilder.setNull(outputColumn);
|
99
|
+
}
|
100
|
+
else {
|
101
|
+
pageBuilder.setDouble(outputColumn, val);
|
102
|
+
}
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
@Override
|
107
|
+
public void stringColumn(Column outputColumn)
|
108
|
+
{
|
109
|
+
Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
|
110
|
+
if (pageReader.isNull(inputColumn)) {
|
111
|
+
pageBuilder.setNull(outputColumn);
|
112
|
+
}
|
113
|
+
else {
|
114
|
+
pageBuilder.setString(outputColumn, pageReader.getString(inputColumn));
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
118
|
+
@Override
|
119
|
+
public void jsonColumn(Column outputColumn)
|
120
|
+
{
|
121
|
+
Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
|
122
|
+
if (pageReader.isNull(inputColumn)) {
|
123
|
+
pageBuilder.setNull(outputColumn);
|
124
|
+
}
|
125
|
+
else {
|
126
|
+
pageBuilder.setJson(outputColumn, pageReader.getJson(inputColumn));
|
127
|
+
}
|
128
|
+
}
|
129
|
+
|
130
|
+
@Override
|
131
|
+
public void timestampColumn(Column outputColumn)
|
132
|
+
{
|
133
|
+
Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
|
134
|
+
if (pageReader.isNull(inputColumn)) {
|
135
|
+
pageBuilder.setNull(outputColumn);
|
136
|
+
}
|
137
|
+
else {
|
138
|
+
pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
|
139
|
+
}
|
140
|
+
}
|
141
|
+
}
|
@@ -0,0 +1,30 @@
|
|
1
|
+
package org.embulk.filter.calc;
|
2
|
+
|
3
|
+
import org.antlr.v4.runtime.ANTLRInputStream;
|
4
|
+
import org.antlr.v4.runtime.CommonTokenStream;
|
5
|
+
import org.antlr.v4.runtime.tree.ParseTree;
|
6
|
+
import org.embulk.spi.PageReader;
|
7
|
+
import org.embulk.spi.Schema;
|
8
|
+
|
9
|
+
public class Calculator
|
10
|
+
{
|
11
|
+
private ParseTree tree;
|
12
|
+
private CalcFormulaVisitor visitor;
|
13
|
+
|
14
|
+
public Calculator(String formula, Schema inputSchema, PageReader pageReader)
|
15
|
+
{
|
16
|
+
ANTLRInputStream input = new ANTLRInputStream(formula);
|
17
|
+
CalculatorLexer lexer = new CalculatorLexer(input);
|
18
|
+
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
19
|
+
CalculatorParser parser = new CalculatorParser(tokens);
|
20
|
+
|
21
|
+
this.tree = parser.expr();
|
22
|
+
this.visitor = new CalcFormulaVisitor(inputSchema, pageReader);
|
23
|
+
}
|
24
|
+
|
25
|
+
public Double calc()
|
26
|
+
{
|
27
|
+
return visitor.visit(tree);
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
package org.embulk.filter.calc;
|
2
|
+
|
3
|
+
import org.antlr.v4.runtime.BaseErrorListener;
|
4
|
+
import org.antlr.v4.runtime.RecognitionException;
|
5
|
+
import org.antlr.v4.runtime.Recognizer;
|
6
|
+
import org.embulk.config.ConfigException;
|
7
|
+
|
8
|
+
import static java.util.Locale.ENGLISH;
|
9
|
+
|
10
|
+
public class ConfigErrorListener
|
11
|
+
extends BaseErrorListener
|
12
|
+
{
|
13
|
+
private String column_name;
|
14
|
+
|
15
|
+
public ConfigErrorListener(String column_name)
|
16
|
+
{
|
17
|
+
this.column_name = column_name;
|
18
|
+
}
|
19
|
+
|
20
|
+
@Override
|
21
|
+
public void syntaxError(Recognizer<?, ?> recognizer,
|
22
|
+
Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e)
|
23
|
+
{
|
24
|
+
String err = String.format(ENGLISH, "The \"%s\" column has invalid formula. line: %d error: %s", column_name, line, msg);
|
25
|
+
throw new ConfigException(err);
|
26
|
+
}
|
27
|
+
}
|