embulk-filter-calc 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.travis.yml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +67 -0
- data/build.gradle +104 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/example/config.yml +36 -0
- data/example/config2.yml +32 -0
- data/example/csv/sample_01.csv.gz +0 -0
- data/example/seed.yml +5 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +160 -0
- data/gradlew.bat +90 -0
- data/lib/embulk/filter/calc.rb +3 -0
- data/src/main/antlr/org/embulk/filter/calc/Calculator.g4 +52 -0
- data/src/main/java/org/embulk/filter/calc/CalcConfigCheckVisitor.java +32 -0
- data/src/main/java/org/embulk/filter/calc/CalcConfigChecker.java +38 -0
- data/src/main/java/org/embulk/filter/calc/CalcFilterPlugin.java +140 -0
- data/src/main/java/org/embulk/filter/calc/CalcFormulaVisitor.java +130 -0
- data/src/main/java/org/embulk/filter/calc/CalcVisitorImpl.java +141 -0
- data/src/main/java/org/embulk/filter/calc/Calculator.java +30 -0
- data/src/main/java/org/embulk/filter/calc/ConfigErrorListener.java +27 -0
- data/src/test/java/org/embulk/filter/calc/TestCalcFilterPlugin.java +107 -0
- data/src/test/java/org/embulk/filter/calc/TestCalcVisitorImpl.java +448 -0
- metadata +100 -0
@@ -0,0 +1,140 @@
|
|
1
|
+
package org.embulk.filter.calc;
|
2
|
+
|
3
|
+
import org.embulk.config.Config;
|
4
|
+
import org.embulk.config.ConfigDefault;
|
5
|
+
import org.embulk.config.ConfigSource;
|
6
|
+
import org.embulk.config.Task;
|
7
|
+
import org.embulk.config.TaskSource;
|
8
|
+
import org.embulk.spi.Column;
|
9
|
+
import org.embulk.spi.ColumnConfig;
|
10
|
+
import org.embulk.spi.Exec;
|
11
|
+
import org.embulk.spi.FilterPlugin;
|
12
|
+
import org.embulk.spi.Page;
|
13
|
+
import org.embulk.spi.PageBuilder;
|
14
|
+
import org.embulk.spi.PageOutput;
|
15
|
+
import org.embulk.spi.PageReader;
|
16
|
+
import org.embulk.spi.Schema;
|
17
|
+
import org.embulk.spi.SchemaConfigException;
|
18
|
+
import org.embulk.spi.type.Type;
|
19
|
+
import org.embulk.spi.type.Types;
|
20
|
+
|
21
|
+
import java.util.List;
|
22
|
+
|
23
|
+
import static java.util.Locale.ENGLISH;
|
24
|
+
|
25
|
+
public class CalcFilterPlugin
|
26
|
+
implements FilterPlugin
|
27
|
+
{
|
28
|
+
|
29
|
+
// private Object IOException;
|
30
|
+
|
31
|
+
public interface CalcConfig
|
32
|
+
extends Task
|
33
|
+
{
|
34
|
+
@Config("formula")
|
35
|
+
String getFormula();
|
36
|
+
|
37
|
+
@Config("name")
|
38
|
+
String getName();
|
39
|
+
}
|
40
|
+
|
41
|
+
public interface PluginTask
|
42
|
+
extends Task
|
43
|
+
{
|
44
|
+
|
45
|
+
@Config("columns")
|
46
|
+
public List<CalcConfig> getCalcConfig();
|
47
|
+
|
48
|
+
@Config("output_columns")
|
49
|
+
@ConfigDefault("[]")
|
50
|
+
public List<ColumnConfig> getOutputColumns();
|
51
|
+
}
|
52
|
+
|
53
|
+
@Override
|
54
|
+
public void transaction(ConfigSource config, Schema inputSchema,
|
55
|
+
FilterPlugin.Control control)
|
56
|
+
{
|
57
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
58
|
+
|
59
|
+
Schema outputSchema = buildOutputSchema(task, inputSchema);
|
60
|
+
for (CalcConfig calcConfig : task.getCalcConfig()) {
|
61
|
+
CalcConfigChecker calc = new CalcConfigChecker(calcConfig.getName(), calcConfig.getFormula(), outputSchema);
|
62
|
+
calc.validateFormula();
|
63
|
+
}
|
64
|
+
|
65
|
+
control.run(task.dump(), outputSchema);
|
66
|
+
}
|
67
|
+
|
68
|
+
static Schema buildOutputSchema(PluginTask task, Schema inputSchema)
|
69
|
+
{
|
70
|
+
Schema.Builder builder = Schema.builder();
|
71
|
+
for (Column inputColumns : inputSchema.getColumns()) {
|
72
|
+
builder.add(inputColumns.getName(), inputColumns.getType());
|
73
|
+
}
|
74
|
+
|
75
|
+
List<ColumnConfig> outputColumns = task.getOutputColumns();
|
76
|
+
for (ColumnConfig outputColumn : outputColumns) {
|
77
|
+
|
78
|
+
String name = outputColumn.getName();
|
79
|
+
Type type = outputColumn.getType();
|
80
|
+
Column inputColumn;
|
81
|
+
try {
|
82
|
+
inputColumn = inputSchema.lookupColumn(name);
|
83
|
+
}
|
84
|
+
catch (SchemaConfigException ex) {
|
85
|
+
inputColumn = null;
|
86
|
+
}
|
87
|
+
if (inputColumn != null) {
|
88
|
+
throw new SchemaConfigException(String.format(ENGLISH, "The column \"%s\" already exists.", name));
|
89
|
+
}
|
90
|
+
|
91
|
+
if (Types.DOUBLE.equals(type)) {
|
92
|
+
builder.add(name, Types.DOUBLE);
|
93
|
+
}
|
94
|
+
else if (Types.LONG.equals(type)) {
|
95
|
+
builder.add(name, Types.LONG);
|
96
|
+
}
|
97
|
+
else {
|
98
|
+
throw new SchemaConfigException(String.format(ENGLISH, "The column \"%s\" must specify either long or double.", name));
|
99
|
+
}
|
100
|
+
}
|
101
|
+
return builder.build();
|
102
|
+
}
|
103
|
+
|
104
|
+
@Override
|
105
|
+
public PageOutput open(TaskSource taskSource, final Schema inputSchema,
|
106
|
+
final Schema outputSchema, final PageOutput output)
|
107
|
+
{
|
108
|
+
final PluginTask task = taskSource.loadTask(PluginTask.class);
|
109
|
+
|
110
|
+
return new PageOutput()
|
111
|
+
{
|
112
|
+
private PageReader pageReader = new PageReader(inputSchema);
|
113
|
+
private PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, output);
|
114
|
+
private CalcVisitorImpl visitor = new CalcVisitorImpl(task, inputSchema, outputSchema, pageReader, pageBuilder);
|
115
|
+
|
116
|
+
@Override
|
117
|
+
public void finish()
|
118
|
+
{
|
119
|
+
pageBuilder.finish();
|
120
|
+
}
|
121
|
+
|
122
|
+
@Override
|
123
|
+
public void close()
|
124
|
+
{
|
125
|
+
pageBuilder.close();
|
126
|
+
}
|
127
|
+
|
128
|
+
@Override
|
129
|
+
public void add(Page page)
|
130
|
+
{
|
131
|
+
pageReader.setPage(page);
|
132
|
+
|
133
|
+
while (pageReader.nextRecord()) {
|
134
|
+
outputSchema.visitColumns(visitor);
|
135
|
+
pageBuilder.addRecord();
|
136
|
+
}
|
137
|
+
}
|
138
|
+
};
|
139
|
+
}
|
140
|
+
}
|
@@ -0,0 +1,130 @@
|
|
1
|
+
package org.embulk.filter.calc;
|
2
|
+
|
3
|
+
import org.embulk.config.ConfigException;
|
4
|
+
import org.embulk.spi.Column;
|
5
|
+
import org.embulk.spi.Page;
|
6
|
+
import org.embulk.spi.PageReader;
|
7
|
+
import org.embulk.spi.Schema;
|
8
|
+
import org.embulk.spi.type.Types;
|
9
|
+
|
10
|
+
import static java.util.Locale.ENGLISH;
|
11
|
+
|
12
|
+
public class CalcFormulaVisitor
|
13
|
+
extends CalculatorBaseVisitor<Double>
|
14
|
+
{
|
15
|
+
private PageReader pageReader;
|
16
|
+
private Schema inputSchema;
|
17
|
+
|
18
|
+
public CalcFormulaVisitor(Schema inputSchema, PageReader pageReader)
|
19
|
+
{
|
20
|
+
this.pageReader = pageReader;
|
21
|
+
this.inputSchema = inputSchema;
|
22
|
+
}
|
23
|
+
|
24
|
+
@Override
|
25
|
+
public Double visitMulDivMod(CalculatorParser.MulDivModContext ctx)
|
26
|
+
{
|
27
|
+
Double left = visit(ctx.expr(0));
|
28
|
+
Double right = visit(ctx.expr(1));
|
29
|
+
|
30
|
+
if (left == null || right == null) {
|
31
|
+
return null;
|
32
|
+
}
|
33
|
+
else if (ctx.op.getType() == CalculatorParser.MUL) {
|
34
|
+
return left * right;
|
35
|
+
}
|
36
|
+
|
37
|
+
else if (ctx.op.getType() == CalculatorParser.DIV) {
|
38
|
+
return left / right;
|
39
|
+
}
|
40
|
+
else {
|
41
|
+
return left % right;
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
@Override
|
46
|
+
public Double visitAddSub(CalculatorParser.AddSubContext ctx)
|
47
|
+
{
|
48
|
+
Double left = visit(ctx.expr(0));
|
49
|
+
Double right = visit(ctx.expr(1));
|
50
|
+
if (left == null || right == null) {
|
51
|
+
return null;
|
52
|
+
}
|
53
|
+
else if (ctx.op.getType() == CalculatorParser.ADD) {
|
54
|
+
return left + right;
|
55
|
+
}
|
56
|
+
else {
|
57
|
+
return left - right;
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
@Override
|
62
|
+
public Double visitNumber(CalculatorParser.NumberContext ctx)
|
63
|
+
{
|
64
|
+
String id = ctx.NUM().getText();
|
65
|
+
|
66
|
+
return Double.parseDouble(id);
|
67
|
+
}
|
68
|
+
|
69
|
+
@Override
|
70
|
+
public Double visitIdentifier(CalculatorParser.IdentifierContext ctx)
|
71
|
+
{
|
72
|
+
String id = ctx.ID().getText();
|
73
|
+
Double val;
|
74
|
+
Column column = inputSchema.lookupColumn(id);
|
75
|
+
|
76
|
+
if (pageReader.isNull(column)) {
|
77
|
+
val = null;
|
78
|
+
}
|
79
|
+
else if (Types.DOUBLE.equals(column.getType())) {
|
80
|
+
val = pageReader.getDouble(column);
|
81
|
+
}
|
82
|
+
else if (Types.LONG.equals(column.getType())) {
|
83
|
+
Long v;
|
84
|
+
v = pageReader.getLong(column);
|
85
|
+
val = v.doubleValue();
|
86
|
+
}
|
87
|
+
else {
|
88
|
+
// throw
|
89
|
+
val = null;
|
90
|
+
}
|
91
|
+
return val;
|
92
|
+
}
|
93
|
+
|
94
|
+
@Override
|
95
|
+
public Double visitParen(CalculatorParser.ParenContext ctx)
|
96
|
+
{
|
97
|
+
return visit(ctx.expr());
|
98
|
+
}
|
99
|
+
|
100
|
+
@Override
|
101
|
+
public Double visitPower(CalculatorParser.PowerContext ctx)
|
102
|
+
{
|
103
|
+
Double left = visit(ctx.expr(0));
|
104
|
+
Double right = visit(ctx.expr(1));
|
105
|
+
if (left == null || right == null) {
|
106
|
+
return null;
|
107
|
+
}
|
108
|
+
|
109
|
+
return Math.pow(left, right);
|
110
|
+
}
|
111
|
+
|
112
|
+
// Scientific Functions
|
113
|
+
@Override
|
114
|
+
public Double visitFuncCos(CalculatorParser.FuncCosContext ctx)
|
115
|
+
{
|
116
|
+
return Math.cos(visit(ctx.expr()));
|
117
|
+
}
|
118
|
+
|
119
|
+
@Override
|
120
|
+
public Double visitFuncSin(CalculatorParser.FuncSinContext ctx)
|
121
|
+
{
|
122
|
+
return Math.sin(visit(ctx.expr()));
|
123
|
+
}
|
124
|
+
|
125
|
+
@Override
|
126
|
+
public Double visitFuncTan(CalculatorParser.FuncTanContext ctx)
|
127
|
+
{
|
128
|
+
return Math.tan(visit(ctx.expr()));
|
129
|
+
}
|
130
|
+
}
|
@@ -0,0 +1,141 @@
|
|
1
|
+
package org.embulk.filter.calc;
|
2
|
+
|
3
|
+
import org.embulk.spi.Column;
|
4
|
+
import org.embulk.spi.ColumnVisitor;
|
5
|
+
import org.embulk.spi.Exec;
|
6
|
+
import org.embulk.spi.PageBuilder;
|
7
|
+
import org.embulk.spi.PageReader;
|
8
|
+
import org.embulk.spi.Schema;
|
9
|
+
import org.slf4j.Logger;
|
10
|
+
|
11
|
+
import java.util.HashMap;
|
12
|
+
|
13
|
+
public class CalcVisitorImpl
|
14
|
+
implements ColumnVisitor
|
15
|
+
{
|
16
|
+
|
17
|
+
private static final Logger logger = Exec.getLogger(CalcFilterPlugin.class);
|
18
|
+
private final CalcFilterPlugin.PluginTask task;
|
19
|
+
private final Schema inputSchema;
|
20
|
+
private final Schema outputSchema;
|
21
|
+
private final PageReader pageReader;
|
22
|
+
private final PageBuilder pageBuilder;
|
23
|
+
private final HashMap<String, Calculator> calcMap = new HashMap<>();
|
24
|
+
|
25
|
+
CalcVisitorImpl(CalcFilterPlugin.PluginTask task, Schema inputSchema, Schema outputSchema, PageReader pageReader, PageBuilder pageBuilder)
|
26
|
+
{
|
27
|
+
this.task = task;
|
28
|
+
this.inputSchema = inputSchema;
|
29
|
+
this.outputSchema = outputSchema;
|
30
|
+
this.pageReader = pageReader;
|
31
|
+
this.pageBuilder = pageBuilder;
|
32
|
+
initializeCalcMap();
|
33
|
+
}
|
34
|
+
|
35
|
+
private void initializeCalcMap()
|
36
|
+
{
|
37
|
+
|
38
|
+
for (CalcFilterPlugin.CalcConfig calcConfig : task.getCalcConfig()) {
|
39
|
+
String name = calcConfig.getName();
|
40
|
+
String formula = calcConfig.getFormula();
|
41
|
+
Calculator calc = new Calculator(formula, inputSchema, pageReader);
|
42
|
+
calcMap.put(name, calc);
|
43
|
+
}
|
44
|
+
}
|
45
|
+
|
46
|
+
@Override
|
47
|
+
public void booleanColumn(Column outputColumn)
|
48
|
+
{
|
49
|
+
Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
|
50
|
+
if (pageReader.isNull(inputColumn)) {
|
51
|
+
pageBuilder.setNull(outputColumn);
|
52
|
+
}
|
53
|
+
else {
|
54
|
+
pageBuilder.setBoolean(outputColumn, pageReader.getBoolean(inputColumn));
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
@Override
|
59
|
+
public void longColumn(Column outputColumn)
|
60
|
+
{
|
61
|
+
Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
|
62
|
+
Calculator calc = calcMap.get(outputColumn.getName());
|
63
|
+
if (calc == null) {
|
64
|
+
if (pageReader.isNull(inputColumn)) {
|
65
|
+
pageBuilder.setNull(outputColumn);
|
66
|
+
}
|
67
|
+
else {
|
68
|
+
pageBuilder.setLong(outputColumn, pageReader.getLong(inputColumn));
|
69
|
+
}
|
70
|
+
}
|
71
|
+
else {
|
72
|
+
Double val = calc.calc();
|
73
|
+
if (val == null) {
|
74
|
+
pageBuilder.setNull(outputColumn);
|
75
|
+
}
|
76
|
+
else {
|
77
|
+
pageBuilder.setLong(outputColumn, val.longValue());
|
78
|
+
}
|
79
|
+
}
|
80
|
+
}
|
81
|
+
|
82
|
+
@Override
|
83
|
+
public void doubleColumn(Column outputColumn)
|
84
|
+
{
|
85
|
+
Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
|
86
|
+
Calculator calc = calcMap.get(outputColumn.getName());
|
87
|
+
if (calc == null) {
|
88
|
+
if (pageReader.isNull(inputColumn)) {
|
89
|
+
pageBuilder.setNull(outputColumn);
|
90
|
+
}
|
91
|
+
else {
|
92
|
+
pageBuilder.setDouble(outputColumn, pageReader.getDouble(inputColumn));
|
93
|
+
}
|
94
|
+
}
|
95
|
+
else {
|
96
|
+
Double val = calc.calc();
|
97
|
+
if (val == null) {
|
98
|
+
pageBuilder.setNull(outputColumn);
|
99
|
+
}
|
100
|
+
else {
|
101
|
+
pageBuilder.setDouble(outputColumn, val);
|
102
|
+
}
|
103
|
+
}
|
104
|
+
}
|
105
|
+
|
106
|
+
@Override
|
107
|
+
public void stringColumn(Column outputColumn)
|
108
|
+
{
|
109
|
+
Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
|
110
|
+
if (pageReader.isNull(inputColumn)) {
|
111
|
+
pageBuilder.setNull(outputColumn);
|
112
|
+
}
|
113
|
+
else {
|
114
|
+
pageBuilder.setString(outputColumn, pageReader.getString(inputColumn));
|
115
|
+
}
|
116
|
+
}
|
117
|
+
|
118
|
+
@Override
|
119
|
+
public void jsonColumn(Column outputColumn)
|
120
|
+
{
|
121
|
+
Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
|
122
|
+
if (pageReader.isNull(inputColumn)) {
|
123
|
+
pageBuilder.setNull(outputColumn);
|
124
|
+
}
|
125
|
+
else {
|
126
|
+
pageBuilder.setJson(outputColumn, pageReader.getJson(inputColumn));
|
127
|
+
}
|
128
|
+
}
|
129
|
+
|
130
|
+
@Override
|
131
|
+
public void timestampColumn(Column outputColumn)
|
132
|
+
{
|
133
|
+
Column inputColumn = inputSchema.lookupColumn(outputColumn.getName());
|
134
|
+
if (pageReader.isNull(inputColumn)) {
|
135
|
+
pageBuilder.setNull(outputColumn);
|
136
|
+
}
|
137
|
+
else {
|
138
|
+
pageBuilder.setTimestamp(outputColumn, pageReader.getTimestamp(inputColumn));
|
139
|
+
}
|
140
|
+
}
|
141
|
+
}
|
@@ -0,0 +1,30 @@
|
|
1
|
+
package org.embulk.filter.calc;
|
2
|
+
|
3
|
+
import org.antlr.v4.runtime.ANTLRInputStream;
|
4
|
+
import org.antlr.v4.runtime.CommonTokenStream;
|
5
|
+
import org.antlr.v4.runtime.tree.ParseTree;
|
6
|
+
import org.embulk.spi.PageReader;
|
7
|
+
import org.embulk.spi.Schema;
|
8
|
+
|
9
|
+
public class Calculator
|
10
|
+
{
|
11
|
+
private ParseTree tree;
|
12
|
+
private CalcFormulaVisitor visitor;
|
13
|
+
|
14
|
+
public Calculator(String formula, Schema inputSchema, PageReader pageReader)
|
15
|
+
{
|
16
|
+
ANTLRInputStream input = new ANTLRInputStream(formula);
|
17
|
+
CalculatorLexer lexer = new CalculatorLexer(input);
|
18
|
+
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
19
|
+
CalculatorParser parser = new CalculatorParser(tokens);
|
20
|
+
|
21
|
+
this.tree = parser.expr();
|
22
|
+
this.visitor = new CalcFormulaVisitor(inputSchema, pageReader);
|
23
|
+
}
|
24
|
+
|
25
|
+
public Double calc()
|
26
|
+
{
|
27
|
+
return visitor.visit(tree);
|
28
|
+
}
|
29
|
+
}
|
30
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
package org.embulk.filter.calc;
|
2
|
+
|
3
|
+
import org.antlr.v4.runtime.BaseErrorListener;
|
4
|
+
import org.antlr.v4.runtime.RecognitionException;
|
5
|
+
import org.antlr.v4.runtime.Recognizer;
|
6
|
+
import org.embulk.config.ConfigException;
|
7
|
+
|
8
|
+
import static java.util.Locale.ENGLISH;
|
9
|
+
|
10
|
+
public class ConfigErrorListener
|
11
|
+
extends BaseErrorListener
|
12
|
+
{
|
13
|
+
private String column_name;
|
14
|
+
|
15
|
+
public ConfigErrorListener(String column_name)
|
16
|
+
{
|
17
|
+
this.column_name = column_name;
|
18
|
+
}
|
19
|
+
|
20
|
+
@Override
|
21
|
+
public void syntaxError(Recognizer<?, ?> recognizer,
|
22
|
+
Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e)
|
23
|
+
{
|
24
|
+
String err = String.format(ENGLISH, "The \"%s\" column has invalid formula. line: %d error: %s", column_name, line, msg);
|
25
|
+
throw new ConfigException(err);
|
26
|
+
}
|
27
|
+
}
|