embulk-filter-csv_lookup 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +12 -12
- data/LICENSE +20 -20
- data/README.md +131 -131
- data/build.gradle +98 -98
- data/classpath/embulk-filter-csv_lookup-0.1.4.jar +0 -0
- data/config/checkstyle/checkstyle.xml +128 -128
- data/config/checkstyle/default.xml +108 -108
- data/example/config.yml +50 -50
- data/gradle/wrapper/gradle-wrapper.properties +4 -4
- data/gradlew +172 -172
- data/gradlew.bat +0 -0
- data/lib/embulk/filter/csv_lookup.rb +3 -3
- data/src/main/java/org/embulk/filter/csv_lookup/CsvLookupFilterPlugin.java +355 -355
- data/src/test/java/org/embulk/filter/csv_lookup/TestCsvLookupFilterPlugin.java +5 -5
- metadata +8 -7
- data/classpath/embulk-filter-csv_lookup-0.1.3.jar +0 -0
@@ -1,355 +1,355 @@
|
|
1
|
-
package org.embulk.filter.csv_lookup;
|
2
|
-
|
3
|
-
import com.google.common.base.Optional;
|
4
|
-
|
5
|
-
import com.google.common.collect.ImmutableList;
|
6
|
-
import com.opencsv.CSVReader;
|
7
|
-
import com.opencsv.exceptions.CsvValidationException;
|
8
|
-
import org.embulk.config.Config;
|
9
|
-
import org.embulk.config.ConfigDefault;
|
10
|
-
import org.embulk.config.ConfigDiff;
|
11
|
-
import org.embulk.config.ConfigSource;
|
12
|
-
import org.embulk.config.Task;
|
13
|
-
import org.embulk.config.TaskSource;
|
14
|
-
import org.embulk.spi.*;
|
15
|
-
import org.embulk.spi.time.Timestamp;
|
16
|
-
import org.embulk.spi.type.Types;
|
17
|
-
|
18
|
-
import java.io.BufferedReader;
|
19
|
-
import java.io.FileReader;
|
20
|
-
import java.io.IOException;
|
21
|
-
import java.sql.SQLException;
|
22
|
-
import java.time.Instant;
|
23
|
-
import java.util.*;
|
24
|
-
|
25
|
-
public class CsvLookupFilterPlugin
|
26
|
-
implements FilterPlugin
|
27
|
-
{
|
28
|
-
public interface PluginTask
|
29
|
-
extends Task
|
30
|
-
{
|
31
|
-
@Config("mapping_from")
|
32
|
-
public List<String> getMappingFrom();
|
33
|
-
|
34
|
-
@Config("mapping_to")
|
35
|
-
public List<String> getMappingTo();
|
36
|
-
|
37
|
-
@Config("new_columns")
|
38
|
-
public SchemaConfig getNewColumns();
|
39
|
-
|
40
|
-
@Config("path_of_lookup_file")
|
41
|
-
public String getPathOfLookupFile();
|
42
|
-
|
43
|
-
}
|
44
|
-
|
45
|
-
@Override
|
46
|
-
public void transaction(ConfigSource config, Schema inputSchema,
|
47
|
-
FilterPlugin.Control control)
|
48
|
-
{
|
49
|
-
PluginTask task = config.loadConfig(PluginTask.class);
|
50
|
-
|
51
|
-
List<String> inputColumns = task.getMappingFrom();
|
52
|
-
List<String> keyColumns = task.getMappingTo();
|
53
|
-
if(inputColumns.size()!=keyColumns.size()){
|
54
|
-
throw new RuntimeException("Number of mapping_from columns must be exactly equals to number of mapping_to columns");
|
55
|
-
}
|
56
|
-
|
57
|
-
Schema outputSchema = inputSchema;
|
58
|
-
|
59
|
-
ImmutableList.Builder<Column> builder = ImmutableList.builder();
|
60
|
-
int i = 0;
|
61
|
-
for (Column inputColumn : inputSchema.getColumns()) {
|
62
|
-
Column outputColumn = new Column(i++, inputColumn.getName(), inputColumn.getType());
|
63
|
-
builder.add(outputColumn);
|
64
|
-
}
|
65
|
-
|
66
|
-
for (ColumnConfig columnConfig : task.getNewColumns().getColumns()) {
|
67
|
-
builder.add(columnConfig.toColumn(i++));
|
68
|
-
}
|
69
|
-
outputSchema = new Schema(builder.build());
|
70
|
-
|
71
|
-
control.run(task.dump(), outputSchema);
|
72
|
-
}
|
73
|
-
|
74
|
-
@Override
|
75
|
-
public PageOutput open(TaskSource taskSource, Schema inputSchema,
|
76
|
-
Schema outputSchema, PageOutput output)
|
77
|
-
{
|
78
|
-
PluginTask task = taskSource.loadTask(PluginTask.class);
|
79
|
-
Map<String, List<String>> map = new HashMap<>();
|
80
|
-
try {
|
81
|
-
try {
|
82
|
-
map = getKeyValueMap(task);
|
83
|
-
} catch (CsvValidationException e) {
|
84
|
-
throw new RuntimeException(e);
|
85
|
-
}
|
86
|
-
} catch (SQLException e) {
|
87
|
-
e.printStackTrace();
|
88
|
-
} catch (IOException e) {
|
89
|
-
throw new RuntimeException(e);
|
90
|
-
}
|
91
|
-
PageReader pageReader = new PageReader(inputSchema);
|
92
|
-
return new MyOutput(pageReader, inputSchema, outputSchema, output, task, map);
|
93
|
-
}
|
94
|
-
private Map<String, List<String>> getKeyValueMap(PluginTask task) throws SQLException, IOException, CsvValidationException, CsvValidationException {
|
95
|
-
Map<String, List<String>> map = new LinkedHashMap<>();
|
96
|
-
|
97
|
-
List<String> targetColumns = task.getMappingTo();
|
98
|
-
List<String> newColumns = new ArrayList<>();
|
99
|
-
|
100
|
-
for (ColumnConfig columnConfig : task.getNewColumns().getColumns()) {
|
101
|
-
newColumns.add(columnConfig.getName());
|
102
|
-
}
|
103
|
-
BufferedReader reader = null;
|
104
|
-
String line = "";
|
105
|
-
reader = new BufferedReader(new FileReader(task.getPathOfLookupFile()));
|
106
|
-
String[] lineDataArray;
|
107
|
-
Map<String, Integer> map1 = new LinkedHashMap<>();
|
108
|
-
List<Integer> list1 = new ArrayList<>();
|
109
|
-
List<Integer> list2 = new ArrayList<>();
|
110
|
-
|
111
|
-
while((line = reader.readLine()) != null) {
|
112
|
-
lineDataArray = line.split(",");
|
113
|
-
for (int s = 0; s< lineDataArray.length; s++) {
|
114
|
-
map1.put(lineDataArray[s], s);
|
115
|
-
}
|
116
|
-
break;
|
117
|
-
}
|
118
|
-
|
119
|
-
for (int x = 0; x< targetColumns.size(); x++){
|
120
|
-
if (!map1.containsKey(targetColumns.get(x))){
|
121
|
-
throw new RuntimeException("Target Columns Not Found!!");
|
122
|
-
}
|
123
|
-
list1.add(map1.get(targetColumns.get(x)));
|
124
|
-
}
|
125
|
-
|
126
|
-
for (int x = 0; x< newColumns.size(); x++){
|
127
|
-
if (!map1.containsKey(newColumns.get(x))){
|
128
|
-
throw new RuntimeException("New Columns field Not Found!!");
|
129
|
-
}
|
130
|
-
list2.add(map1.get(newColumns.get(x)));
|
131
|
-
}
|
132
|
-
|
133
|
-
CSVReader reader1 = new CSVReader(new FileReader(task.getPathOfLookupFile()));
|
134
|
-
String [] nextLine;
|
135
|
-
int i = 0;
|
136
|
-
while ((nextLine = reader1.readNext()) != null) {
|
137
|
-
if (i!=0){
|
138
|
-
|
139
|
-
//for Key
|
140
|
-
String key = "";
|
141
|
-
for (int z = 0; z< list1.size(); z++) {
|
142
|
-
key += nextLine[list1.get(z)];
|
143
|
-
if (z != list1.size() - 1) {
|
144
|
-
key += ",";
|
145
|
-
}
|
146
|
-
}
|
147
|
-
|
148
|
-
//for Values
|
149
|
-
List<String> keyArray = new ArrayList<>();
|
150
|
-
for (int z = 0; z < newColumns.size(); z++) {
|
151
|
-
keyArray.add(nextLine[list2.get(z)]);
|
152
|
-
}
|
153
|
-
map.put(key, keyArray);
|
154
|
-
}i++;
|
155
|
-
}
|
156
|
-
return map;
|
157
|
-
}
|
158
|
-
|
159
|
-
public static class MyOutput implements PageOutput {
|
160
|
-
private PageReader reader;
|
161
|
-
private PageBuilder builder;
|
162
|
-
private PluginTask task;
|
163
|
-
private Schema inputSchema;
|
164
|
-
private Map<String, List<String>> keyValuePair;
|
165
|
-
|
166
|
-
public MyOutput(PageReader pageReader, Schema inputSchema, Schema outputSchema, PageOutput pageOutput, PluginTask task, Map<String, List<String>> keyValuePair) {
|
167
|
-
this.reader = pageReader;
|
168
|
-
this.builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, pageOutput);
|
169
|
-
this.task = task;
|
170
|
-
this.inputSchema = inputSchema;
|
171
|
-
this.keyValuePair = keyValuePair;
|
172
|
-
}
|
173
|
-
|
174
|
-
@Override
|
175
|
-
public void add(Page page) {
|
176
|
-
reader.setPage(page);
|
177
|
-
List<ColumnConfig> columnConfigList = new ArrayList<>();
|
178
|
-
for (ColumnConfig columnConfig : task.getNewColumns().getColumns()) {
|
179
|
-
columnConfigList.add(columnConfig);
|
180
|
-
}
|
181
|
-
|
182
|
-
while (reader.nextRecord()) {
|
183
|
-
|
184
|
-
int colNum = 0;
|
185
|
-
List<String> inputColumns = task.getMappingFrom();
|
186
|
-
List<String> searchingKeyData = new ArrayList<>();
|
187
|
-
Map<String, Integer> keyMap = new HashMap<>();
|
188
|
-
keyMap.put("Key", 0);
|
189
|
-
|
190
|
-
for (Column column : inputSchema.getColumns()) {
|
191
|
-
if (reader.isNull(column)) {
|
192
|
-
if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
|
193
|
-
searchingKeyData.add("");
|
194
|
-
int key = keyMap.get("Key");
|
195
|
-
keyMap.put("Key", ++key);
|
196
|
-
}
|
197
|
-
builder.setNull(colNum++);
|
198
|
-
} else {
|
199
|
-
add_builder(colNum++, column, searchingKeyData, inputColumns, keyMap);
|
200
|
-
}
|
201
|
-
}
|
202
|
-
|
203
|
-
String key = "";
|
204
|
-
for (int k = 0; k < searchingKeyData.size(); k++) {
|
205
|
-
key += searchingKeyData.get(k);
|
206
|
-
if (k != searchingKeyData.size() - 1) {
|
207
|
-
key += ",";
|
208
|
-
}
|
209
|
-
}
|
210
|
-
|
211
|
-
List<String> matchedData = new ArrayList<>();
|
212
|
-
if (keyValuePair.containsKey(key)) {
|
213
|
-
matchedData = keyValuePair.get(key);
|
214
|
-
}
|
215
|
-
|
216
|
-
if (matchedData.size() == 0) {
|
217
|
-
for (int k = 0; k < columnConfigList.size(); k++) {
|
218
|
-
add_builder_for_new_column(colNum, columnConfigList.get(k).getType().getName(), "", false);
|
219
|
-
colNum++;
|
220
|
-
}
|
221
|
-
} else {
|
222
|
-
for (int k = 0; k < columnConfigList.size(); k++) {
|
223
|
-
add_builder_for_new_column(colNum, columnConfigList.get(k).getType().getName(), matchedData.get(k), true);
|
224
|
-
colNum++;
|
225
|
-
}
|
226
|
-
}
|
227
|
-
builder.addRecord();
|
228
|
-
}
|
229
|
-
|
230
|
-
}
|
231
|
-
|
232
|
-
@Override
|
233
|
-
public void finish() {
|
234
|
-
builder.finish();
|
235
|
-
}
|
236
|
-
|
237
|
-
@Override
|
238
|
-
public void close() {
|
239
|
-
builder.close();
|
240
|
-
}
|
241
|
-
|
242
|
-
private void add_builder(int colNum, Column column, List<String> searchingKeyData, List<String> inputColumns, Map<String, Integer> keyMap) {
|
243
|
-
if (Types.STRING.equals(column.getType())) {
|
244
|
-
if (keyMap.get("Key") < inputColumns.size()) {
|
245
|
-
if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
|
246
|
-
searchingKeyData.add(reader.getString(column));
|
247
|
-
int key = keyMap.get("Key");
|
248
|
-
keyMap.put("Key", ++key);
|
249
|
-
}
|
250
|
-
}
|
251
|
-
builder.setString(colNum, reader.getString(column));
|
252
|
-
} else if (Types.BOOLEAN.equals(column.getType())) {
|
253
|
-
if (keyMap.get("Key") < inputColumns.size()) {
|
254
|
-
if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
|
255
|
-
searchingKeyData.add(String.valueOf(reader.getBoolean(column)));
|
256
|
-
int key = keyMap.get("Key");
|
257
|
-
keyMap.put("Key", ++key);
|
258
|
-
}
|
259
|
-
}
|
260
|
-
builder.setBoolean(colNum, reader.getBoolean(column));
|
261
|
-
} else if (Types.DOUBLE.equals(column.getType())) {
|
262
|
-
if (keyMap.get("Key") < inputColumns.size()) {
|
263
|
-
if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
|
264
|
-
searchingKeyData.add(String.valueOf(reader.getDouble(column)));
|
265
|
-
int key = keyMap.get("Key");
|
266
|
-
keyMap.put("Key", ++key);
|
267
|
-
}
|
268
|
-
}
|
269
|
-
builder.setDouble(colNum, reader.getDouble(column));
|
270
|
-
} else if (Types.LONG.equals(column.getType())) {
|
271
|
-
if (keyMap.get("Key") < inputColumns.size()) {
|
272
|
-
if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
|
273
|
-
searchingKeyData.add(String.valueOf(reader.getLong(column)));
|
274
|
-
int key = keyMap.get("Key");
|
275
|
-
keyMap.put("Key", ++key);
|
276
|
-
}
|
277
|
-
}
|
278
|
-
|
279
|
-
builder.setLong(colNum, reader.getLong(column));
|
280
|
-
} else if (Types.TIMESTAMP.equals(column.getType())) {
|
281
|
-
if (keyMap.get("Key") < inputColumns.size()) {
|
282
|
-
if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
|
283
|
-
searchingKeyData.add(String.valueOf(reader.getTimestamp(column)));
|
284
|
-
int key = keyMap.get("Key");
|
285
|
-
keyMap.put("Key", ++key);
|
286
|
-
}
|
287
|
-
}
|
288
|
-
builder.setTimestamp(colNum, reader.getTimestamp(column));
|
289
|
-
}
|
290
|
-
}
|
291
|
-
|
292
|
-
private void add_builder_for_new_column(int colNum, String newlyAddedColumnType, String matchedData, Boolean isDataMatched) {
|
293
|
-
try{
|
294
|
-
if (newlyAddedColumnType.equalsIgnoreCase("string")) {
|
295
|
-
if (isDataMatched) {
|
296
|
-
builder.setString(colNum, matchedData);
|
297
|
-
} else {
|
298
|
-
builder.setString(colNum, "");
|
299
|
-
}
|
300
|
-
|
301
|
-
} else if (newlyAddedColumnType.equalsIgnoreCase("long")) {
|
302
|
-
if (isDataMatched) {
|
303
|
-
if (matchedData.length() == 0) {
|
304
|
-
builder.setLong(colNum, 0);
|
305
|
-
}else{
|
306
|
-
builder.setLong(colNum, Long.parseLong(matchedData));
|
307
|
-
}
|
308
|
-
} else {
|
309
|
-
builder.setLong(colNum, 0);
|
310
|
-
}
|
311
|
-
|
312
|
-
} else if (newlyAddedColumnType.equalsIgnoreCase("double")) {
|
313
|
-
if (isDataMatched) {
|
314
|
-
if (matchedData.length() == 0) {
|
315
|
-
builder.setDouble(colNum, 0.0);
|
316
|
-
}else{
|
317
|
-
builder.setDouble(colNum, Double.parseDouble(matchedData));
|
318
|
-
}
|
319
|
-
} else {
|
320
|
-
builder.setDouble(colNum, 0.0);
|
321
|
-
}
|
322
|
-
} else if (newlyAddedColumnType.equalsIgnoreCase("boolean")) {
|
323
|
-
if (isDataMatched) {
|
324
|
-
if (matchedData.length() == 0) {
|
325
|
-
builder.setNull(colNum);
|
326
|
-
}else{
|
327
|
-
builder.setBoolean(colNum, Boolean.parseBoolean(matchedData));
|
328
|
-
}
|
329
|
-
} else {
|
330
|
-
builder.setNull(colNum);
|
331
|
-
}
|
332
|
-
} else if (newlyAddedColumnType.equalsIgnoreCase("timestamp")) {
|
333
|
-
if (isDataMatched) {
|
334
|
-
if (matchedData.length() == 0) {
|
335
|
-
builder.setNull(colNum);
|
336
|
-
}else{
|
337
|
-
java.sql.Timestamp timestamp = java.sql.Timestamp.valueOf(matchedData);
|
338
|
-
Instant instant = timestamp.toInstant();
|
339
|
-
Timestamp spiTimeStamp = Timestamp.ofInstant(instant);
|
340
|
-
builder.setTimestamp(colNum, spiTimeStamp);
|
341
|
-
}
|
342
|
-
} else {
|
343
|
-
builder.setNull(colNum);
|
344
|
-
}
|
345
|
-
|
346
|
-
}
|
347
|
-
}catch (Exception e){
|
348
|
-
e.printStackTrace();
|
349
|
-
throw new RuntimeException("Data type could not be cast due to wrong data or issue in typecasting timestamp",e);
|
350
|
-
}
|
351
|
-
|
352
|
-
}
|
353
|
-
|
354
|
-
}
|
355
|
-
}
|
1
|
+
package org.embulk.filter.csv_lookup;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
|
5
|
+
import com.google.common.collect.ImmutableList;
|
6
|
+
import com.opencsv.CSVReader;
|
7
|
+
import com.opencsv.exceptions.CsvValidationException;
|
8
|
+
import org.embulk.config.Config;
|
9
|
+
import org.embulk.config.ConfigDefault;
|
10
|
+
import org.embulk.config.ConfigDiff;
|
11
|
+
import org.embulk.config.ConfigSource;
|
12
|
+
import org.embulk.config.Task;
|
13
|
+
import org.embulk.config.TaskSource;
|
14
|
+
import org.embulk.spi.*;
|
15
|
+
import org.embulk.spi.time.Timestamp;
|
16
|
+
import org.embulk.spi.type.Types;
|
17
|
+
|
18
|
+
import java.io.BufferedReader;
|
19
|
+
import java.io.FileReader;
|
20
|
+
import java.io.IOException;
|
21
|
+
import java.sql.SQLException;
|
22
|
+
import java.time.Instant;
|
23
|
+
import java.util.*;
|
24
|
+
|
25
|
+
public class CsvLookupFilterPlugin
|
26
|
+
implements FilterPlugin
|
27
|
+
{
|
28
|
+
public interface PluginTask
|
29
|
+
extends Task
|
30
|
+
{
|
31
|
+
@Config("mapping_from")
|
32
|
+
public List<String> getMappingFrom();
|
33
|
+
|
34
|
+
@Config("mapping_to")
|
35
|
+
public List<String> getMappingTo();
|
36
|
+
|
37
|
+
@Config("new_columns")
|
38
|
+
public SchemaConfig getNewColumns();
|
39
|
+
|
40
|
+
@Config("path_of_lookup_file")
|
41
|
+
public String getPathOfLookupFile();
|
42
|
+
|
43
|
+
}
|
44
|
+
|
45
|
+
@Override
|
46
|
+
public void transaction(ConfigSource config, Schema inputSchema,
|
47
|
+
FilterPlugin.Control control)
|
48
|
+
{
|
49
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
50
|
+
|
51
|
+
List<String> inputColumns = task.getMappingFrom();
|
52
|
+
List<String> keyColumns = task.getMappingTo();
|
53
|
+
if(inputColumns.size()!=keyColumns.size()){
|
54
|
+
throw new RuntimeException("Number of mapping_from columns must be exactly equals to number of mapping_to columns");
|
55
|
+
}
|
56
|
+
|
57
|
+
Schema outputSchema = inputSchema;
|
58
|
+
|
59
|
+
ImmutableList.Builder<Column> builder = ImmutableList.builder();
|
60
|
+
int i = 0;
|
61
|
+
for (Column inputColumn : inputSchema.getColumns()) {
|
62
|
+
Column outputColumn = new Column(i++, inputColumn.getName(), inputColumn.getType());
|
63
|
+
builder.add(outputColumn);
|
64
|
+
}
|
65
|
+
|
66
|
+
for (ColumnConfig columnConfig : task.getNewColumns().getColumns()) {
|
67
|
+
builder.add(columnConfig.toColumn(i++));
|
68
|
+
}
|
69
|
+
outputSchema = new Schema(builder.build());
|
70
|
+
|
71
|
+
control.run(task.dump(), outputSchema);
|
72
|
+
}
|
73
|
+
|
74
|
+
@Override
|
75
|
+
public PageOutput open(TaskSource taskSource, Schema inputSchema,
|
76
|
+
Schema outputSchema, PageOutput output)
|
77
|
+
{
|
78
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
79
|
+
Map<String, List<String>> map = new HashMap<>();
|
80
|
+
try {
|
81
|
+
try {
|
82
|
+
map = getKeyValueMap(task);
|
83
|
+
} catch (CsvValidationException e) {
|
84
|
+
throw new RuntimeException(e);
|
85
|
+
}
|
86
|
+
} catch (SQLException e) {
|
87
|
+
e.printStackTrace();
|
88
|
+
} catch (IOException e) {
|
89
|
+
throw new RuntimeException(e);
|
90
|
+
}
|
91
|
+
PageReader pageReader = new PageReader(inputSchema);
|
92
|
+
return new MyOutput(pageReader, inputSchema, outputSchema, output, task, map);
|
93
|
+
}
|
94
|
+
private Map<String, List<String>> getKeyValueMap(PluginTask task) throws SQLException, IOException, CsvValidationException, CsvValidationException {
|
95
|
+
Map<String, List<String>> map = new LinkedHashMap<>();
|
96
|
+
|
97
|
+
List<String> targetColumns = task.getMappingTo();
|
98
|
+
List<String> newColumns = new ArrayList<>();
|
99
|
+
|
100
|
+
for (ColumnConfig columnConfig : task.getNewColumns().getColumns()) {
|
101
|
+
newColumns.add(columnConfig.getName());
|
102
|
+
}
|
103
|
+
BufferedReader reader = null;
|
104
|
+
String line = "";
|
105
|
+
reader = new BufferedReader(new FileReader(task.getPathOfLookupFile()));
|
106
|
+
String[] lineDataArray;
|
107
|
+
Map<String, Integer> map1 = new LinkedHashMap<>();
|
108
|
+
List<Integer> list1 = new ArrayList<>();
|
109
|
+
List<Integer> list2 = new ArrayList<>();
|
110
|
+
|
111
|
+
while((line = reader.readLine()) != null) {
|
112
|
+
lineDataArray = line.split(",");
|
113
|
+
for (int s = 0; s< lineDataArray.length; s++) {
|
114
|
+
map1.put(lineDataArray[s], s);
|
115
|
+
}
|
116
|
+
break;
|
117
|
+
}
|
118
|
+
|
119
|
+
for (int x = 0; x< targetColumns.size(); x++){
|
120
|
+
if (!map1.containsKey(targetColumns.get(x))){
|
121
|
+
throw new RuntimeException("Target Columns Not Found!!");
|
122
|
+
}
|
123
|
+
list1.add(map1.get(targetColumns.get(x)));
|
124
|
+
}
|
125
|
+
|
126
|
+
for (int x = 0; x< newColumns.size(); x++){
|
127
|
+
if (!map1.containsKey(newColumns.get(x))){
|
128
|
+
throw new RuntimeException("New Columns field Not Found!!");
|
129
|
+
}
|
130
|
+
list2.add(map1.get(newColumns.get(x)));
|
131
|
+
}
|
132
|
+
|
133
|
+
CSVReader reader1 = new CSVReader(new FileReader(task.getPathOfLookupFile()));
|
134
|
+
String [] nextLine;
|
135
|
+
int i = 0;
|
136
|
+
while ((nextLine = reader1.readNext()) != null) {
|
137
|
+
if (i!=0){
|
138
|
+
|
139
|
+
//for Key
|
140
|
+
String key = "";
|
141
|
+
for (int z = 0; z< list1.size(); z++) {
|
142
|
+
key += nextLine[list1.get(z)];
|
143
|
+
if (z != list1.size() - 1) {
|
144
|
+
key += ",";
|
145
|
+
}
|
146
|
+
}
|
147
|
+
|
148
|
+
//for Values
|
149
|
+
List<String> keyArray = new ArrayList<>();
|
150
|
+
for (int z = 0; z < newColumns.size(); z++) {
|
151
|
+
keyArray.add(nextLine[list2.get(z)]);
|
152
|
+
}
|
153
|
+
map.put(key, keyArray);
|
154
|
+
}i++;
|
155
|
+
}
|
156
|
+
return map;
|
157
|
+
}
|
158
|
+
|
159
|
+
public static class MyOutput implements PageOutput {
|
160
|
+
private PageReader reader;
|
161
|
+
private PageBuilder builder;
|
162
|
+
private PluginTask task;
|
163
|
+
private Schema inputSchema;
|
164
|
+
private Map<String, List<String>> keyValuePair;
|
165
|
+
|
166
|
+
public MyOutput(PageReader pageReader, Schema inputSchema, Schema outputSchema, PageOutput pageOutput, PluginTask task, Map<String, List<String>> keyValuePair) {
|
167
|
+
this.reader = pageReader;
|
168
|
+
this.builder = new PageBuilder(Exec.getBufferAllocator(), outputSchema, pageOutput);
|
169
|
+
this.task = task;
|
170
|
+
this.inputSchema = inputSchema;
|
171
|
+
this.keyValuePair = keyValuePair;
|
172
|
+
}
|
173
|
+
|
174
|
+
@Override
|
175
|
+
public void add(Page page) {
|
176
|
+
reader.setPage(page);
|
177
|
+
List<ColumnConfig> columnConfigList = new ArrayList<>();
|
178
|
+
for (ColumnConfig columnConfig : task.getNewColumns().getColumns()) {
|
179
|
+
columnConfigList.add(columnConfig);
|
180
|
+
}
|
181
|
+
|
182
|
+
while (reader.nextRecord()) {
|
183
|
+
|
184
|
+
int colNum = 0;
|
185
|
+
List<String> inputColumns = task.getMappingFrom();
|
186
|
+
List<String> searchingKeyData = new ArrayList<>();
|
187
|
+
Map<String, Integer> keyMap = new HashMap<>();
|
188
|
+
keyMap.put("Key", 0);
|
189
|
+
|
190
|
+
for (Column column : inputSchema.getColumns()) {
|
191
|
+
if (reader.isNull(column)) {
|
192
|
+
if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
|
193
|
+
searchingKeyData.add("");
|
194
|
+
int key = keyMap.get("Key");
|
195
|
+
keyMap.put("Key", ++key);
|
196
|
+
}
|
197
|
+
builder.setNull(colNum++);
|
198
|
+
} else {
|
199
|
+
add_builder(colNum++, column, searchingKeyData, inputColumns, keyMap);
|
200
|
+
}
|
201
|
+
}
|
202
|
+
|
203
|
+
String key = "";
|
204
|
+
for (int k = 0; k < searchingKeyData.size(); k++) {
|
205
|
+
key += searchingKeyData.get(k);
|
206
|
+
if (k != searchingKeyData.size() - 1) {
|
207
|
+
key += ",";
|
208
|
+
}
|
209
|
+
}
|
210
|
+
|
211
|
+
List<String> matchedData = new ArrayList<>();
|
212
|
+
if (keyValuePair.containsKey(key)) {
|
213
|
+
matchedData = keyValuePair.get(key);
|
214
|
+
}
|
215
|
+
|
216
|
+
if (matchedData.size() == 0) {
|
217
|
+
for (int k = 0; k < columnConfigList.size(); k++) {
|
218
|
+
add_builder_for_new_column(colNum, columnConfigList.get(k).getType().getName(), "", false);
|
219
|
+
colNum++;
|
220
|
+
}
|
221
|
+
} else {
|
222
|
+
for (int k = 0; k < columnConfigList.size(); k++) {
|
223
|
+
add_builder_for_new_column(colNum, columnConfigList.get(k).getType().getName(), matchedData.get(k), true);
|
224
|
+
colNum++;
|
225
|
+
}
|
226
|
+
}
|
227
|
+
builder.addRecord();
|
228
|
+
}
|
229
|
+
|
230
|
+
}
|
231
|
+
|
232
|
+
@Override
|
233
|
+
public void finish() {
|
234
|
+
builder.finish();
|
235
|
+
}
|
236
|
+
|
237
|
+
@Override
|
238
|
+
public void close() {
|
239
|
+
builder.close();
|
240
|
+
}
|
241
|
+
|
242
|
+
private void add_builder(int colNum, Column column, List<String> searchingKeyData, List<String> inputColumns, Map<String, Integer> keyMap) {
|
243
|
+
if (Types.STRING.equals(column.getType())) {
|
244
|
+
if (keyMap.get("Key") < inputColumns.size()) {
|
245
|
+
if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
|
246
|
+
searchingKeyData.add(reader.getString(column));
|
247
|
+
int key = keyMap.get("Key");
|
248
|
+
keyMap.put("Key", ++key);
|
249
|
+
}
|
250
|
+
}
|
251
|
+
builder.setString(colNum, reader.getString(column));
|
252
|
+
} else if (Types.BOOLEAN.equals(column.getType())) {
|
253
|
+
if (keyMap.get("Key") < inputColumns.size()) {
|
254
|
+
if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
|
255
|
+
searchingKeyData.add(String.valueOf(reader.getBoolean(column)));
|
256
|
+
int key = keyMap.get("Key");
|
257
|
+
keyMap.put("Key", ++key);
|
258
|
+
}
|
259
|
+
}
|
260
|
+
builder.setBoolean(colNum, reader.getBoolean(column));
|
261
|
+
} else if (Types.DOUBLE.equals(column.getType())) {
|
262
|
+
if (keyMap.get("Key") < inputColumns.size()) {
|
263
|
+
if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
|
264
|
+
searchingKeyData.add(String.valueOf(reader.getDouble(column)));
|
265
|
+
int key = keyMap.get("Key");
|
266
|
+
keyMap.put("Key", ++key);
|
267
|
+
}
|
268
|
+
}
|
269
|
+
builder.setDouble(colNum, reader.getDouble(column));
|
270
|
+
} else if (Types.LONG.equals(column.getType())) {
|
271
|
+
if (keyMap.get("Key") < inputColumns.size()) {
|
272
|
+
if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
|
273
|
+
searchingKeyData.add(String.valueOf(reader.getLong(column)));
|
274
|
+
int key = keyMap.get("Key");
|
275
|
+
keyMap.put("Key", ++key);
|
276
|
+
}
|
277
|
+
}
|
278
|
+
|
279
|
+
builder.setLong(colNum, reader.getLong(column));
|
280
|
+
} else if (Types.TIMESTAMP.equals(column.getType())) {
|
281
|
+
if (keyMap.get("Key") < inputColumns.size()) {
|
282
|
+
if (column.getName().equalsIgnoreCase(inputColumns.get(keyMap.get("Key")))) {
|
283
|
+
searchingKeyData.add(String.valueOf(reader.getTimestamp(column)));
|
284
|
+
int key = keyMap.get("Key");
|
285
|
+
keyMap.put("Key", ++key);
|
286
|
+
}
|
287
|
+
}
|
288
|
+
builder.setTimestamp(colNum, reader.getTimestamp(column));
|
289
|
+
}
|
290
|
+
}
|
291
|
+
|
292
|
+
private void add_builder_for_new_column(int colNum, String newlyAddedColumnType, String matchedData, Boolean isDataMatched) {
|
293
|
+
try{
|
294
|
+
if (newlyAddedColumnType.equalsIgnoreCase("string")) {
|
295
|
+
if (isDataMatched) {
|
296
|
+
builder.setString(colNum, matchedData);
|
297
|
+
} else {
|
298
|
+
builder.setString(colNum, "");
|
299
|
+
}
|
300
|
+
|
301
|
+
} else if (newlyAddedColumnType.equalsIgnoreCase("long")) {
|
302
|
+
if (isDataMatched) {
|
303
|
+
if (matchedData.length() == 0) {
|
304
|
+
builder.setLong(colNum, 0);
|
305
|
+
}else{
|
306
|
+
builder.setLong(colNum, Long.parseLong(matchedData));
|
307
|
+
}
|
308
|
+
} else {
|
309
|
+
builder.setLong(colNum, 0);
|
310
|
+
}
|
311
|
+
|
312
|
+
} else if (newlyAddedColumnType.equalsIgnoreCase("double")) {
|
313
|
+
if (isDataMatched) {
|
314
|
+
if (matchedData.length() == 0) {
|
315
|
+
builder.setDouble(colNum, 0.0);
|
316
|
+
}else{
|
317
|
+
builder.setDouble(colNum, Double.parseDouble(matchedData));
|
318
|
+
}
|
319
|
+
} else {
|
320
|
+
builder.setDouble(colNum, 0.0);
|
321
|
+
}
|
322
|
+
} else if (newlyAddedColumnType.equalsIgnoreCase("boolean")) {
|
323
|
+
if (isDataMatched) {
|
324
|
+
if (matchedData.length() == 0) {
|
325
|
+
builder.setNull(colNum);
|
326
|
+
}else{
|
327
|
+
builder.setBoolean(colNum, Boolean.parseBoolean(matchedData));
|
328
|
+
}
|
329
|
+
} else {
|
330
|
+
builder.setNull(colNum);
|
331
|
+
}
|
332
|
+
} else if (newlyAddedColumnType.equalsIgnoreCase("timestamp")) {
|
333
|
+
if (isDataMatched) {
|
334
|
+
if (matchedData.length() == 0) {
|
335
|
+
builder.setNull(colNum);
|
336
|
+
}else{
|
337
|
+
java.sql.Timestamp timestamp = java.sql.Timestamp.valueOf(matchedData);
|
338
|
+
Instant instant = timestamp.toInstant();
|
339
|
+
Timestamp spiTimeStamp = Timestamp.ofInstant(instant);
|
340
|
+
builder.setTimestamp(colNum, spiTimeStamp);
|
341
|
+
}
|
342
|
+
} else {
|
343
|
+
builder.setNull(colNum);
|
344
|
+
}
|
345
|
+
|
346
|
+
}
|
347
|
+
}catch (Exception e){
|
348
|
+
e.printStackTrace();
|
349
|
+
throw new RuntimeException("Data type could not be cast due to wrong data or issue in typecasting timestamp",e);
|
350
|
+
}
|
351
|
+
|
352
|
+
}
|
353
|
+
|
354
|
+
}
|
355
|
+
}
|