embulk-filter-calc 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,107 @@
1
+ package org.embulk.filter.calc;
2
+
3
+ import com.google.common.collect.Lists;
4
+
5
+ import org.embulk.EmbulkTestRuntime;
6
+ import org.embulk.config.ConfigException;
7
+ import org.embulk.config.ConfigLoader;
8
+ import org.embulk.config.ConfigSource;
9
+ import org.embulk.config.TaskSource;
10
+ import org.embulk.filter.calc.CalcFilterPlugin.PluginTask;
11
+ import org.embulk.spi.Column;
12
+ import org.embulk.spi.Exec;
13
+ import org.embulk.spi.FilterPlugin;
14
+ import org.embulk.spi.Schema;
15
+ import org.junit.Before;
16
+ import org.junit.Rule;
17
+ import org.junit.Test;
18
+
19
+ import static org.embulk.spi.type.Types.BOOLEAN;
20
+ import static org.embulk.spi.type.Types.DOUBLE;
21
+ import static org.embulk.spi.type.Types.JSON;
22
+ import static org.embulk.spi.type.Types.LONG;
23
+ import static org.embulk.spi.type.Types.STRING;
24
+ import static org.embulk.spi.type.Types.TIMESTAMP;
25
+ import static org.junit.Assert.assertEquals;
26
+
27
+
28
+ public class TestCalcFilterPlugin
29
+ {
30
+
31
+ @Rule
32
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
33
+
34
+ private CalcFilterPlugin plugin;
35
+
36
+
37
+ private Schema schema(Column... columns)
38
+ {
39
+ return new Schema(Lists.newArrayList(columns));
40
+ }
41
+
42
+ private ConfigSource configFromYamlString(String... lines)
43
+ {
44
+ StringBuilder builder = new StringBuilder();
45
+ for (String line : lines) {
46
+ builder.append(line).append("\n");
47
+ }
48
+ String yamlString = builder.toString();
49
+
50
+ ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
51
+ return loader.fromYamlString(yamlString);
52
+ }
53
+
54
+ private PluginTask taskFromYamlString(String... lines)
55
+ {
56
+ ConfigSource config = configFromYamlString(lines);
57
+ return config.loadConfig(PluginTask.class);
58
+ }
59
+
60
+ private void transaction(ConfigSource config, Schema inputSchema)
61
+ {
62
+ plugin.transaction(config, inputSchema, new FilterPlugin.Control() {
63
+ @Override
64
+ public void run(TaskSource taskSource, Schema outputSchema)
65
+ {
66
+ }
67
+ });
68
+ }
69
+
70
+
71
+ @Before
72
+ public void createResource()
73
+ {
74
+ plugin = new CalcFilterPlugin();
75
+ }
76
+
77
+
78
+ @Test
79
+ public void buildOutputSchema_Columns()
80
+ {
81
+ PluginTask task = taskFromYamlString(
82
+ "type: calc",
83
+ "columns:",
84
+ " - { name: long, formula: \"long + 1\"}");
85
+ Schema inputSchema = Schema.builder()
86
+ .add("timestamp", TIMESTAMP)
87
+ .add("string", STRING)
88
+ .add("boolean", BOOLEAN)
89
+ .add("long", LONG)
90
+ .add("double", DOUBLE)
91
+ .add("json", JSON)
92
+ .add("remove_me", STRING)
93
+ .build();
94
+
95
+ Schema outputSchema = CalcFilterPlugin.buildOutputSchema(task, inputSchema);
96
+ assertEquals(7, outputSchema.size());
97
+
98
+ Column column;
99
+ {
100
+ column = outputSchema.getColumn(0);
101
+ assertEquals("timestamp", column.getName());
102
+ }
103
+ }
104
+
105
+
106
+
107
+ }
@@ -0,0 +1,448 @@
1
+ package org.embulk.filter.calc;
2
+
3
+ import org.embulk.EmbulkTestRuntime;
4
+ import org.embulk.config.ConfigLoader;
5
+ import org.embulk.config.ConfigSource;
6
+ import org.embulk.filter.calc.CalcFilterPlugin.PluginTask;
7
+ import org.embulk.spi.Exec;
8
+ import org.embulk.spi.Page;
9
+ import org.embulk.spi.PageBuilder;
10
+ import org.embulk.spi.PageReader;
11
+ import org.embulk.spi.PageTestUtils;
12
+ import org.embulk.spi.Schema;
13
+ import org.embulk.spi.TestPageBuilderReader;
14
+ import org.embulk.spi.time.Timestamp;
15
+ import org.embulk.spi.util.Pages;
16
+ import org.junit.Before;
17
+ import org.junit.Rule;
18
+ import org.junit.Test;
19
+ import org.msgpack.value.ValueFactory;
20
+
21
+ import static org.embulk.spi.type.Types.BOOLEAN;
22
+ import static org.embulk.spi.type.Types.DOUBLE;
23
+ import static org.embulk.spi.type.Types.JSON;
24
+ import static org.embulk.spi.type.Types.LONG;
25
+ import static org.embulk.spi.type.Types.STRING;
26
+ import static org.embulk.spi.type.Types.TIMESTAMP;
27
+ import static org.junit.Assert.assertEquals;
28
+
29
+ import java.util.List;
30
+
31
+
32
+ public class TestCalcVisitorImpl
33
+ {
34
+ @Rule
35
+ public EmbulkTestRuntime runtime = new EmbulkTestRuntime();
36
+
37
+ @Before
38
+ public void createResource()
39
+ {
40
+ }
41
+
42
+ private ConfigSource config()
43
+ {
44
+ return runtime.getExec().newConfigSource();
45
+ }
46
+
47
+ private PluginTask taskFromYamlString(String... lines)
48
+ {
49
+ StringBuilder builder = new StringBuilder();
50
+ for (String line : lines) {
51
+ builder.append(line).append("\n");
52
+ }
53
+ String yamlString = builder.toString();
54
+
55
+ ConfigLoader loader = new ConfigLoader(Exec.getModelManager());
56
+ ConfigSource config = loader.fromYamlString(yamlString);
57
+ return config.loadConfig(PluginTask.class);
58
+ }
59
+
60
+ private List<Object[]> filter(PluginTask task, Schema inputSchema, Object ... objects)
61
+ {
62
+ TestPageBuilderReader.MockPageOutput output = new TestPageBuilderReader.MockPageOutput();
63
+ Schema outputSchema = CalcFilterPlugin.buildOutputSchema(task, inputSchema);
64
+ PageBuilder pageBuilder = new PageBuilder(runtime.getBufferAllocator(), outputSchema, output);
65
+ PageReader pageReader = new PageReader(inputSchema);
66
+ CalcVisitorImpl visitor = new CalcVisitorImpl(task, inputSchema, outputSchema, pageReader, pageBuilder);
67
+
68
+ List<Page> pages = PageTestUtils.buildPage(runtime.getBufferAllocator(), inputSchema, objects);
69
+ for (Page page : pages) {
70
+ pageReader.setPage(page);
71
+
72
+ while (pageReader.nextRecord()) {
73
+ outputSchema.visitColumns(visitor);
74
+ pageBuilder.addRecord();
75
+ }
76
+ }
77
+ pageBuilder.finish();
78
+ pageBuilder.close();
79
+ return Pages.toObjects(outputSchema, output.pages);
80
+ }
81
+
82
+ @Test
83
+ public void visit_calc_NoFormula()
84
+ {
85
+ PluginTask task = taskFromYamlString(
86
+ "type: calc",
87
+ "columns: []");
88
+ Schema inputSchema = Schema.builder()
89
+ .add("timestamp",TIMESTAMP)
90
+ .add("string",STRING)
91
+ .add("boolean", BOOLEAN)
92
+ .add("long", LONG)
93
+ .add("double",DOUBLE)
94
+ .add("json",JSON)
95
+ .build();
96
+ List<Object[]> records = filter(task, inputSchema,
97
+ // row1
98
+ Timestamp.ofEpochSecond(1436745600), "string", new Boolean(true), new Long(0), new Double(0.5), ValueFactory.newString("json"),
99
+ // row2
100
+ Timestamp.ofEpochSecond(1436745600), "string", new Boolean(true), new Long(0), new Double(0.5), ValueFactory.newString("json"));
101
+
102
+ assertEquals(2, records.size());
103
+
104
+ Object[] record;
105
+ {
106
+ record = records.get(0);
107
+ assertEquals(6, record.length);
108
+ assertEquals(Timestamp.ofEpochSecond(1436745600),record[0]);
109
+ assertEquals("string",record[1]);
110
+ assertEquals(new Boolean(true),record[2]);
111
+ assertEquals(new Long(0),record[3]);
112
+ assertEquals(new Double(0.5),record[4]);
113
+ assertEquals(ValueFactory.newString("json"),record[5]);
114
+ }
115
+ }
116
+
117
+ @Test
118
+ public void visit_calc_NoFormulaWithNull()
119
+ {
120
+ PluginTask task = taskFromYamlString(
121
+ "type: calc",
122
+ "columns: []");
123
+ Schema inputSchema = Schema.builder()
124
+ .add("dummy",STRING)
125
+ .add("timestamp",TIMESTAMP)
126
+ .add("string",STRING)
127
+ .add("boolean", BOOLEAN)
128
+ .add("long", LONG)
129
+ .add("double",DOUBLE)
130
+ .add("json",JSON)
131
+ .build();
132
+ List<Object[]> records = filter(task, inputSchema,
133
+ // row1
134
+ "dummy",null,null,null,null,null,null,
135
+ // row2
136
+ "dummy",null,null,null,null,null,null);
137
+
138
+ assertEquals(2, records.size());
139
+
140
+ Object[] record;
141
+ {
142
+ record = records.get(0);
143
+ assertEquals(7, record.length);
144
+ assertEquals("dummy",record[0]);
145
+ assertEquals(null,record[1]);
146
+ assertEquals(null,record[2]);
147
+ assertEquals(null,record[3]);
148
+ assertEquals(null,record[4]);
149
+ assertEquals(null,record[5]);
150
+ assertEquals(null,record[6]);
151
+
152
+ }
153
+ }
154
+
155
+ @Test
156
+ public void visit_calc_NullFormula()
157
+ {
158
+ PluginTask task = taskFromYamlString(
159
+ "type: calc",
160
+ "columns:",
161
+ " - {name: long, formula: \" long + 10 \"}",
162
+ " - {name: double, formula: \" double + 10 \"}");
163
+ Schema inputSchema = Schema.builder()
164
+ .add("long", LONG)
165
+ .add("double", DOUBLE)
166
+ .build();
167
+ List<Object[]> records = filter(task, inputSchema,
168
+ // row1
169
+ null,null,
170
+ // row2
171
+ null,null);
172
+
173
+ assertEquals(2, records.size());
174
+
175
+ Object[] record;
176
+ {
177
+ record = records.get(0);
178
+ assertEquals(2, record.length);
179
+ assertEquals(null,record[0]);
180
+ assertEquals(null,record[1]);
181
+ }
182
+ }
183
+
184
+ @Test
185
+ public void visit_calc_SingleFormula()
186
+ {
187
+ PluginTask task = taskFromYamlString(
188
+ "type: calc",
189
+ "columns:",
190
+ " - {name: long1, formula: \" 100 \"}",
191
+ " - {name: long2, formula: \" long2 \"}",
192
+ " - {name: double1, formula: \" 11.1 \"}",
193
+ " - {name: double2, formula: \" double2 \"}");
194
+ Schema inputSchema = Schema.builder()
195
+ .add("long1", LONG)
196
+ .add("long2", LONG)
197
+ .add("double1", DOUBLE)
198
+ .add("double2", DOUBLE)
199
+ .build();
200
+ List<Object[]> records = filter(task, inputSchema,
201
+ // row1
202
+ new Long(521),new Long(521),new Double(523.5),new Double(523.5),
203
+ // row2
204
+ new Long(521),new Long(521),new Double(523.5),new Double(523.5));
205
+
206
+ assertEquals(2, records.size());
207
+
208
+ Object[] record;
209
+ {
210
+ record = records.get(0);
211
+ assertEquals(4, record.length);
212
+ assertEquals(new Long(100), record[0]);
213
+ assertEquals(new Long(521), record[1]);
214
+ assertEquals(new Double(11.1), record[2]);
215
+ assertEquals(new Double(523.5),record[3]);
216
+ }
217
+ }
218
+
219
+
220
+ @Test
221
+ public void visit_calc_MathFormula()
222
+ {
223
+ PluginTask task = taskFromYamlString(
224
+ "type: calc",
225
+ "columns:",
226
+ " - {name: sin_value, formula: \" sin(sin_value) \"}",
227
+ " - {name: cos_value, formula: \" cos(cos_value) \"}",
228
+ " - {name: tan_value, formula: \" tan(tan_value) \"}");
229
+ Schema inputSchema = Schema.builder()
230
+ .add("sin_value", DOUBLE)
231
+ .add("cos_value", DOUBLE)
232
+ .add("tan_value", DOUBLE)
233
+ .build();
234
+ List<Object[]> records = filter(task, inputSchema,
235
+ // row1
236
+ new Double(0.05),new Double(0.05),new Double(0.05),
237
+ // row2
238
+ new Double(0.05),new Double(0.05),new Double(0.05));
239
+
240
+ assertEquals(2, records.size());
241
+
242
+ Object[] record;
243
+ {
244
+ record = records.get(0);
245
+ assertEquals(3, record.length);
246
+ assertEquals(Math.sin(0.05),record[0]);
247
+ assertEquals(Math.cos(0.05),record[1]);
248
+ assertEquals(Math.tan(0.05),record[2]);
249
+ }
250
+ }
251
+
252
+ @Test
253
+ public void visit_calc_SinglePowerFormula()
254
+ {
255
+ PluginTask task = taskFromYamlString(
256
+ "type: calc",
257
+ "columns:",
258
+ " - {name: long1, formula: \" 2 ^ 8\"}",
259
+ " - {name: long2, formula: \" long2 ^ 8 \"}",
260
+ " - {name: double1, formula: \" 2 ^ 8 \"}",
261
+ " - {name: double2, formula: \" double2 ^ 8 \"}");
262
+ Schema inputSchema = Schema.builder()
263
+ .add("long1", LONG)
264
+ .add("long2", LONG)
265
+ .add("double1", DOUBLE)
266
+ .add("double2", DOUBLE)
267
+ .build();
268
+ List<Object[]> records = filter(task, inputSchema,
269
+ // row1
270
+ new Long(10),new Long(3),new Double(10.0),new Double(2.0),
271
+ // row2
272
+ new Long(10),new Long(2),new Double(10.0),new Double(2.0));
273
+
274
+ assertEquals(2, records.size());
275
+
276
+ Object[] record;
277
+ {
278
+ record = records.get(0);
279
+ assertEquals(4, record.length);
280
+ assertEquals(new Long(256), record[0]);
281
+ assertEquals(new Long(6561), record[1]);
282
+ assertEquals(new Double(256), record[2]);
283
+ assertEquals(new Double(256.0),record[3]);
284
+ }
285
+ }
286
+ @Test
287
+ public void visit_calc_BasicFormula()
288
+ {
289
+ PluginTask task = taskFromYamlString(
290
+ "type: calc",
291
+ "columns:",
292
+ " - {name: add_long, formula: \" add_long + 100\"}",
293
+ " - {name: sub_long, formula: \" sub_long - 100\"}",
294
+ " - {name: mul_long, formula: \" mul_long * 100\"}",
295
+ " - {name: div_long, formula: \" div_long / 100\"}",
296
+ " - {name: mod_long, formula: \" mod_long % 100\"}",
297
+ " - {name: add_double, formula: \" add_double + 100\"}",
298
+ " - {name: sub_double, formula: \" sub_double - 100\"}",
299
+ " - {name: mul_double, formula: \" mul_double * 100\"}",
300
+ " - {name: div_double, formula: \" div_double / 100\"}",
301
+ " - {name: mod_double, formula: \" mod_double % 100\"}");
302
+ Schema inputSchema = Schema.builder()
303
+ .add("add_long", LONG)
304
+ .add("sub_long", LONG)
305
+ .add("mul_long", LONG)
306
+ .add("div_long", LONG)
307
+ .add("mod_long", LONG)
308
+ .add("add_double", DOUBLE)
309
+ .add("sub_double", DOUBLE)
310
+ .add("mul_double", DOUBLE)
311
+ .add("div_double", DOUBLE)
312
+ .add("mod_double", DOUBLE)
313
+ .build();
314
+ List<Object[]> records = filter(task, inputSchema,
315
+ // row1
316
+ new Long(521),new Long(521),new Long(521),new Long(521),new Long(521),
317
+ new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),
318
+ // row2
319
+ new Long(521),new Long(521),new Long(521),new Long(521),new Long(521),
320
+ new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5));
321
+
322
+ assertEquals(2, records.size());
323
+
324
+ Object[] record;
325
+ {
326
+ record = records.get(0);
327
+ assertEquals(10, record.length);
328
+ assertEquals(new Long(621), record[0]);
329
+ assertEquals(new Long(421), record[1]);
330
+ assertEquals(new Long(52100), record[2]);
331
+ assertEquals(new Long(5), record[3]);
332
+ assertEquals(new Long(21), record[4]);
333
+ assertEquals(new Double(623.5),record[5]);
334
+ assertEquals(new Double(423.5),record[6]);
335
+ assertEquals(new Double(52350),record[7]);
336
+ assertEquals(new Double(5.235),record[8]);
337
+ assertEquals(new Double(23.5), record[9]);
338
+ }
339
+ }
340
+ @Test
341
+ public void visit_calc_PriorityChkFormula()
342
+ {
343
+ PluginTask task = taskFromYamlString(
344
+ "type: calc",
345
+ "columns:",
346
+ " - {name: add_long, formula: \" add_long + 100 * 3\"}",
347
+ " - {name: sub_long, formula: \" sub_long - 100 * 3\"}",
348
+ " - {name: mul_long, formula: \" mul_long * 100 * 3\"}",
349
+ " - {name: div_long, formula: \" div_long / 100 * 3\"}",
350
+ " - {name: mod_long, formula: \" mod_long % 100 * 3\"}",
351
+ " - {name: add_double, formula: \" add_double + 100 * 3\"}",
352
+ " - {name: sub_double, formula: \" sub_double - 100 * 3\"}",
353
+ " - {name: mul_double, formula: \" mul_double * 100 * 3\"}",
354
+ " - {name: div_double, formula: \" div_double / 100 * 3\"}",
355
+ " - {name: mod_double, formula: \" mod_double % 100 * 3\"}");
356
+ Schema inputSchema = Schema.builder()
357
+ .add("add_long", LONG)
358
+ .add("sub_long", LONG)
359
+ .add("mul_long", LONG)
360
+ .add("div_long", LONG)
361
+ .add("mod_long", LONG)
362
+ .add("add_double", DOUBLE)
363
+ .add("sub_double", DOUBLE)
364
+ .add("mul_double", DOUBLE)
365
+ .add("div_double", DOUBLE)
366
+ .add("mod_double", DOUBLE)
367
+ .build();
368
+ List<Object[]> records = filter(task, inputSchema,
369
+ // row1
370
+ new Long(521),new Long(521),new Long(521),new Long(521),new Long(521),
371
+ new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),
372
+ // row2
373
+ new Long(521),new Long(521),new Long(521),new Long(521),new Long(521),
374
+ new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5));
375
+
376
+ assertEquals(2, records.size());
377
+
378
+ Object[] record;
379
+ {
380
+ record = records.get(0);
381
+ assertEquals(10, record.length);
382
+ assertEquals(new Long(821), record[0]);
383
+ assertEquals(new Long(221), record[1]);
384
+ assertEquals(new Long(156300), record[2]);
385
+ assertEquals(new Long(15), record[3]);
386
+ assertEquals(new Long(63), record[4]);
387
+ assertEquals(new Double(823.5),record[5]);
388
+ assertEquals(new Double(223.5),record[6]);
389
+ assertEquals(new Double(157050),record[7]);
390
+ // assertEquals(new Double(15.705),record[8]); // TODO result 15.705000000000002
391
+ assertEquals(new Double(70.5), record[9]);
392
+ }
393
+ }
394
+ @Test
395
+ public void visit_calc_ParenChkFormula()
396
+ {
397
+ PluginTask task = taskFromYamlString(
398
+ "type: calc",
399
+ "columns:",
400
+ " - {name: add_long, formula: \" ( add_long + 100 ) * 3\"}",
401
+ " - {name: sub_long, formula: \" ( sub_long - 100 ) * 3\"}",
402
+ " - {name: mul_long, formula: \" ( mul_long * 100 ) * 3\"}",
403
+ " - {name: div_long, formula: \" ( div_long / 100 ) * 3\"}",
404
+ " - {name: mod_long, formula: \" ( mod_long % 100 ) * 3\"}",
405
+ " - {name: add_double, formula: \" ( add_double + 100 ) * 3\"}",
406
+ " - {name: sub_double, formula: \" ( sub_double - 100 ) * 3\"}",
407
+ " - {name: mul_double, formula: \" ( mul_double * 100 ) * 3\"}",
408
+ " - {name: div_double, formula: \" ( div_double / 100 ) * 3\"}",
409
+ " - {name: mod_double, formula: \" ( mod_double % 100 ) * 3\"}");
410
+ Schema inputSchema = Schema.builder()
411
+ .add("add_long", LONG)
412
+ .add("sub_long", LONG)
413
+ .add("mul_long", LONG)
414
+ .add("div_long", LONG)
415
+ .add("mod_long", LONG)
416
+ .add("add_double", DOUBLE)
417
+ .add("sub_double", DOUBLE)
418
+ .add("mul_double", DOUBLE)
419
+ .add("div_double", DOUBLE)
420
+ .add("mod_double", DOUBLE)
421
+ .build();
422
+ List<Object[]> records = filter(task, inputSchema,
423
+ // row1
424
+ new Long(521),new Long(521),new Long(521),new Long(521),new Long(521),
425
+ new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),
426
+ // row2
427
+ new Long(521),new Long(521),new Long(521),new Long(521),new Long(521),
428
+ new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5),new Double(523.5));
429
+
430
+ assertEquals(2, records.size());
431
+
432
+ Object[] record;
433
+ {
434
+ record = records.get(0);
435
+ assertEquals(10, record.length);
436
+ assertEquals(new Long(1863), record[0]);
437
+ assertEquals(new Long(1263), record[1]);
438
+ assertEquals(new Long(156300), record[2]);
439
+ assertEquals(new Long(15), record[3]);
440
+ assertEquals(new Long(63), record[4]);
441
+ assertEquals(new Double(1870.5),record[5]);
442
+ assertEquals(new Double(1270.5),record[6]);
443
+ assertEquals(new Double(157050),record[7]);
444
+ // assertEquals(new Double(15.705),record[8]); // TODO result 15.705000000000002
445
+ assertEquals(new Double(70.5), record[9]);
446
+ }
447
+ }
448
+ }