embulk-output-kintone 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +57 -1
- data/build.gradle +1 -0
- data/classpath/commons-csv-1.9.0.jar +0 -0
- data/classpath/embulk-output-kintone-1.1.0.jar +0 -0
- data/classpath/externalsortinginjava-0.6.2.jar +0 -0
- data/classpath/{shadow-kintone-java-client-1.0.0-all.jar → shadow-kintone-java-client-1.1.0-all.jar} +0 -0
- data/src/main/java/org/embulk/output/kintone/KintoneColumnOption.java +5 -0
- data/src/main/java/org/embulk/output/kintone/KintoneColumnType.java +209 -5
- data/src/main/java/org/embulk/output/kintone/KintoneColumnVisitor.java +28 -9
- data/src/main/java/org/embulk/output/kintone/KintoneOutputPlugin.java +12 -3
- data/src/main/java/org/embulk/output/kintone/KintonePageOutput.java +20 -12
- data/src/main/java/org/embulk/output/kintone/KintoneSortColumn.java +33 -0
- data/src/main/java/org/embulk/output/kintone/PluginTask.java +27 -0
- data/src/main/java/org/embulk/output/kintone/deserializer/DeserializeApplier.java +19 -0
- data/src/main/java/org/embulk/output/kintone/deserializer/DeserializeException.java +7 -0
- data/src/main/java/org/embulk/output/kintone/deserializer/Deserializer.java +279 -0
- data/src/main/java/org/embulk/output/kintone/reducer/CSVInputColumnVisitor.java +78 -0
- data/src/main/java/org/embulk/output/kintone/reducer/CSVOutputColumnVisitor.java +79 -0
- data/src/main/java/org/embulk/output/kintone/reducer/ReduceException.java +11 -0
- data/src/main/java/org/embulk/output/kintone/reducer/ReduceType.java +190 -0
- data/src/main/java/org/embulk/output/kintone/reducer/ReducedPageOutput.java +100 -0
- data/src/main/java/org/embulk/output/kintone/reducer/Reducer.java +355 -0
- data/src/test/java/org/embulk/output/kintone/KintoneColumnOptionBuilder.java +7 -0
- data/src/test/java/org/embulk/output/kintone/KintoneColumnTypeTest.java +194 -0
- data/src/test/java/org/embulk/output/kintone/KintoneColumnVisitorTest.java +153 -34
- data/src/test/java/org/embulk/output/kintone/KintoneColumnVisitorVerifier.java +13 -3
- data/src/test/java/org/embulk/output/kintone/KintonePageOutputVerifier.java +44 -1
- data/src/test/java/org/embulk/output/kintone/TestKintoneOutputPlugin.java +89 -12
- data/src/test/java/org/embulk/output/kintone/TestTaskReduce.java +46 -0
- data/src/test/java/org/embulk/output/kintone/TestTaskReduceException.java +50 -0
- data/src/test/java/org/embulk/output/kintone/TestTaskReduceSubtable.java +46 -0
- data/src/test/java/org/embulk/output/kintone/deserializer/DeserializerTest.java +165 -0
- data/src/test/java/org/embulk/output/kintone/reducer/ReduceTypeTest.java +154 -0
- data/src/test/resources/org/embulk/output/kintone/task/config.yml +1 -1
- data/src/test/resources/org/embulk/output/kintone/task/mode/config.yml +6 -0
- data/src/test/resources/org/embulk/output/kintone/task/mode/input.csv +7 -7
- data/src/test/resources/org/embulk/output/kintone/task/mode/insert_add_ignore_nulls_records.jsonl +2 -2
- data/src/test/resources/org/embulk/output/kintone/task/mode/insert_add_prefer_nulls_records.jsonl +6 -6
- data/src/test/resources/org/embulk/output/kintone/task/mode/insert_add_records.jsonl +6 -6
- data/src/test/resources/org/embulk/output/kintone/task/mode/update_update_ignore_nulls_records.jsonl +2 -2
- data/src/test/resources/org/embulk/output/kintone/task/mode/update_update_prefer_nulls_records.jsonl +3 -3
- data/src/test/resources/org/embulk/output/kintone/task/mode/update_update_records.jsonl +6 -6
- data/src/test/resources/org/embulk/output/kintone/task/mode/upsert_add_prefer_nulls_records.jsonl +3 -3
- data/src/test/resources/org/embulk/output/kintone/task/mode/upsert_add_records.jsonl +2 -2
- data/src/test/resources/org/embulk/output/kintone/task/mode/upsert_update_ignore_nulls_records.jsonl +2 -2
- data/src/test/resources/org/embulk/output/kintone/task/mode/upsert_update_prefer_nulls_records.jsonl +3 -3
- data/src/test/resources/org/embulk/output/kintone/task/mode/upsert_update_records.jsonl +4 -4
- data/src/test/resources/org/embulk/output/kintone/task/reduce/config.yml +171 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce/input.csv +7 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce/insert_add_ignore_nulls_records.jsonl +6 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce/insert_add_prefer_nulls_records.jsonl +6 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce/insert_add_records.jsonl +6 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce/update_update_ignore_nulls_records.jsonl +3 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce/update_update_prefer_nulls_records.jsonl +3 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce/update_update_records.jsonl +6 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce/upsert_add_ignore_nulls_records.jsonl +3 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce/upsert_add_prefer_nulls_records.jsonl +3 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce/upsert_add_records.jsonl +2 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce/upsert_update_ignore_nulls_records.jsonl +3 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce/upsert_update_prefer_nulls_records.jsonl +3 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce/upsert_update_records.jsonl +4 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce/values.json +1 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce/values_ignore_nulls.json +1 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce/values_prefer_nulls.json +1 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_exception/config.yml +36 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_exception/derived_columns.json +1 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_exception/input.csv +13 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_exception/insert_add_records.jsonl +2 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_exception/update_update_records.jsonl +2 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/config.yml +343 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/derived_columns.json +1 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/input.csv +13 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/insert_add_ignore_nulls_records.jsonl +6 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/insert_add_prefer_nulls_records.jsonl +6 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/insert_add_records.jsonl +6 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/update_update_ignore_nulls_records.jsonl +3 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/update_update_prefer_nulls_records.jsonl +3 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/update_update_records.jsonl +6 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/upsert_add_ignore_nulls_records.jsonl +3 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/upsert_add_prefer_nulls_records.jsonl +3 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/upsert_add_records.jsonl +0 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/upsert_update_ignore_nulls_records.jsonl +3 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/upsert_update_prefer_nulls_records.jsonl +3 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/upsert_update_records.jsonl +6 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/values.json +1 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/values_ignore_nulls.json +1 -0
- data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/values_prefer_nulls.json +1 -0
- metadata +62 -4
- data/classpath/embulk-output-kintone-1.0.0.jar +0 -0
@@ -0,0 +1,190 @@
|
|
1
|
+
package org.embulk.output.kintone.reducer;
|
2
|
+
|
3
|
+
import java.time.Instant;
|
4
|
+
import java.util.ArrayList;
|
5
|
+
import java.util.Comparator;
|
6
|
+
import java.util.List;
|
7
|
+
import java.util.Map;
|
8
|
+
import java.util.function.Supplier;
|
9
|
+
import org.embulk.output.kintone.KintoneColumnOption;
|
10
|
+
import org.embulk.output.kintone.KintoneColumnType;
|
11
|
+
import org.embulk.output.kintone.KintoneSortColumn;
|
12
|
+
import org.embulk.spi.Column;
|
13
|
+
import org.embulk.spi.time.Timestamp;
|
14
|
+
import org.msgpack.value.ArrayValue;
|
15
|
+
import org.msgpack.value.MapValue;
|
16
|
+
import org.msgpack.value.Value;
|
17
|
+
import org.msgpack.value.ValueFactory;
|
18
|
+
|
19
|
+
public enum ReduceType {
|
20
|
+
BOOLEAN {
|
21
|
+
@Override
|
22
|
+
public MapValue value(String value, KintoneColumnOption option) {
|
23
|
+
KintoneColumnType type = KintoneColumnType.getType(option, KintoneColumnType.NUMBER);
|
24
|
+
Supplier<Value> supplier =
|
25
|
+
() -> type.asValue(type.getFieldValue(Boolean.parseBoolean(value), option));
|
26
|
+
return value(type, value, supplier);
|
27
|
+
}
|
28
|
+
|
29
|
+
@Override
|
30
|
+
public Comparator<String> comparator(KintoneSortColumn.Order order) {
|
31
|
+
return Comparator.comparing(Boolean::parseBoolean, order(order));
|
32
|
+
}
|
33
|
+
},
|
34
|
+
LONG {
|
35
|
+
@Override
|
36
|
+
public MapValue value(String value, KintoneColumnOption option) {
|
37
|
+
KintoneColumnType type = KintoneColumnType.getType(option, KintoneColumnType.NUMBER);
|
38
|
+
Supplier<Value> supplier =
|
39
|
+
() -> type.asValue(type.getFieldValue(Long.parseLong(value), option));
|
40
|
+
return value(type, value, supplier);
|
41
|
+
}
|
42
|
+
|
43
|
+
@Override
|
44
|
+
public Comparator<String> comparator(KintoneSortColumn.Order order) {
|
45
|
+
return Comparator.comparing(Long::parseLong, order(order));
|
46
|
+
}
|
47
|
+
},
|
48
|
+
DOUBLE {
|
49
|
+
@Override
|
50
|
+
public MapValue value(String value, KintoneColumnOption option) {
|
51
|
+
KintoneColumnType type = KintoneColumnType.getType(option, KintoneColumnType.NUMBER);
|
52
|
+
Supplier<Value> supplier =
|
53
|
+
() -> type.asValue(type.getFieldValue(Double.parseDouble(value), option));
|
54
|
+
return value(type, value, supplier);
|
55
|
+
}
|
56
|
+
|
57
|
+
@Override
|
58
|
+
public Comparator<String> comparator(KintoneSortColumn.Order order) {
|
59
|
+
return Comparator.comparing(Double::parseDouble, order(order));
|
60
|
+
}
|
61
|
+
},
|
62
|
+
STRING {
|
63
|
+
@Override
|
64
|
+
public MapValue value(String value, KintoneColumnOption option) {
|
65
|
+
KintoneColumnType type = KintoneColumnType.getType(option, KintoneColumnType.MULTI_LINE_TEXT);
|
66
|
+
Supplier<Value> supplier = () -> type.asValue(type.getFieldValue(value, option));
|
67
|
+
return value(type, value, supplier);
|
68
|
+
}
|
69
|
+
|
70
|
+
@Override
|
71
|
+
public Comparator<String> comparator(KintoneSortColumn.Order order) {
|
72
|
+
return order(order);
|
73
|
+
}
|
74
|
+
},
|
75
|
+
TIMESTAMP {
|
76
|
+
@Override
|
77
|
+
public MapValue value(String value, KintoneColumnOption option) {
|
78
|
+
KintoneColumnType type = KintoneColumnType.getType(option, KintoneColumnType.DATETIME);
|
79
|
+
Supplier<Value> supplier =
|
80
|
+
() -> type.asValue(type.getFieldValue(Timestamp.ofInstant(Instant.parse(value)), option));
|
81
|
+
return value(type, value, supplier);
|
82
|
+
}
|
83
|
+
|
84
|
+
@Override
|
85
|
+
public Comparator<String> comparator(KintoneSortColumn.Order order) {
|
86
|
+
return Comparator.comparing(Instant::parse, order(order));
|
87
|
+
}
|
88
|
+
},
|
89
|
+
JSON {
|
90
|
+
@Override
|
91
|
+
public MapValue value(String value, KintoneColumnOption option) {
|
92
|
+
KintoneColumnType type = KintoneColumnType.getType(option, KintoneColumnType.MULTI_LINE_TEXT);
|
93
|
+
Supplier<Value> supplier =
|
94
|
+
() -> type.asValue(type.getFieldValue(Reducer.PARSER.parse(value), option));
|
95
|
+
return value(type, value, supplier);
|
96
|
+
}
|
97
|
+
|
98
|
+
@Override
|
99
|
+
public Comparator<String> comparator(KintoneSortColumn.Order order) {
|
100
|
+
return order(order);
|
101
|
+
}
|
102
|
+
};
|
103
|
+
private static final Value NIL = ValueFactory.newNil();
|
104
|
+
private static final Value ID = ValueFactory.newString("id");
|
105
|
+
private static final Value TYPE = ValueFactory.newString("type");
|
106
|
+
private static final Value VALUE = ValueFactory.newString("value");
|
107
|
+
private static final Value KEY_SET = ValueFactory.newString("$$key_set");
|
108
|
+
private static final Value SORT_VALUE = ValueFactory.newString("$$sort_value");
|
109
|
+
|
110
|
+
public abstract MapValue value(String value, KintoneColumnOption option);
|
111
|
+
|
112
|
+
public abstract Comparator<String> comparator(KintoneSortColumn.Order order);
|
113
|
+
|
114
|
+
public static Comparator<String> comparator(Column column, KintoneSortColumn.Order order) {
|
115
|
+
return valueOf(column).comparator(order);
|
116
|
+
}
|
117
|
+
|
118
|
+
public static String asString(Value value, KintoneSortColumn sortColumn) {
|
119
|
+
return asString(sortValue(value).map().get(value(sortColumn.getName())));
|
120
|
+
}
|
121
|
+
|
122
|
+
public static boolean isEmpty(MapValue value) {
|
123
|
+
return value.values().stream()
|
124
|
+
.map(Value::asMapValue)
|
125
|
+
.map(MapValue::map)
|
126
|
+
.map(map -> map.get(VALUE))
|
127
|
+
.allMatch(Value::isNilValue);
|
128
|
+
}
|
129
|
+
|
130
|
+
public static Value value(String value) {
|
131
|
+
return value == null ? NIL : ValueFactory.newString(value);
|
132
|
+
}
|
133
|
+
|
134
|
+
public static MapValue value(Value value) {
|
135
|
+
ValueFactory.MapBuilder builder = ValueFactory.newMapBuilder();
|
136
|
+
Map<Value, Value> map = value.asMapValue().map();
|
137
|
+
builder.put(ID, map.get(ID));
|
138
|
+
builder.put(VALUE, value(map.get(VALUE).asMapValue().map(), map.get(KEY_SET).asArrayValue()));
|
139
|
+
return builder.build();
|
140
|
+
}
|
141
|
+
|
142
|
+
public static MapValue value(Long id, MapValue value, MapValue sortValue) {
|
143
|
+
ValueFactory.MapBuilder builder = ValueFactory.newMapBuilder();
|
144
|
+
builder.put(ID, id == null ? NIL : ValueFactory.newString(id.toString()));
|
145
|
+
builder.put(VALUE, value == null ? ValueFactory.emptyMap() : value);
|
146
|
+
builder.put(KEY_SET, value == null ? ValueFactory.emptyArray() : keySet(value));
|
147
|
+
builder.put(SORT_VALUE, sortValue == null ? ValueFactory.emptyMap() : sortValue);
|
148
|
+
return builder.build();
|
149
|
+
}
|
150
|
+
|
151
|
+
public static MapValue value(Column column, List<String> values, KintoneColumnOption option) {
|
152
|
+
return valueOf(column).value(values.get(column.getIndex()), option);
|
153
|
+
}
|
154
|
+
|
155
|
+
protected static MapValue value(KintoneColumnType type, String value, Supplier<Value> supplier) {
|
156
|
+
ValueFactory.MapBuilder builder = ValueFactory.newMapBuilder();
|
157
|
+
builder.put(TYPE, value(type.name()));
|
158
|
+
builder.put(VALUE, value == null ? NIL : supplier.get());
|
159
|
+
return builder.build();
|
160
|
+
}
|
161
|
+
|
162
|
+
private static <T extends Comparable<? super T>> Comparator<T> order(
|
163
|
+
KintoneSortColumn.Order order) {
|
164
|
+
return order == KintoneSortColumn.Order.DESC
|
165
|
+
? Comparator.reverseOrder()
|
166
|
+
: Comparator.naturalOrder();
|
167
|
+
}
|
168
|
+
|
169
|
+
private static String asString(Value value) {
|
170
|
+
return value.isNilValue() ? null : value.asStringValue().asString();
|
171
|
+
}
|
172
|
+
|
173
|
+
private static MapValue sortValue(Value value) {
|
174
|
+
return value.asMapValue().map().get(SORT_VALUE).asMapValue();
|
175
|
+
}
|
176
|
+
|
177
|
+
private static MapValue value(Map<Value, Value> map, ArrayValue keySet) {
|
178
|
+
ValueFactory.MapBuilder builder = ValueFactory.newMapBuilder();
|
179
|
+
keySet.forEach(key -> builder.put(key, map.get(key)));
|
180
|
+
return builder.build();
|
181
|
+
}
|
182
|
+
|
183
|
+
private static ArrayValue keySet(MapValue value) {
|
184
|
+
return ValueFactory.newArray(new ArrayList<>(value.asMapValue().keySet()));
|
185
|
+
}
|
186
|
+
|
187
|
+
private static ReduceType valueOf(Column column) {
|
188
|
+
return valueOf(column.getType().getName().toUpperCase());
|
189
|
+
}
|
190
|
+
}
|
@@ -0,0 +1,100 @@
|
|
1
|
+
package org.embulk.output.kintone.reducer;
|
2
|
+
|
3
|
+
import java.io.Closeable;
|
4
|
+
import java.io.File;
|
5
|
+
import java.io.IOException;
|
6
|
+
import java.io.OutputStreamWriter;
|
7
|
+
import java.lang.invoke.MethodHandles;
|
8
|
+
import java.nio.charset.StandardCharsets;
|
9
|
+
import java.nio.file.Files;
|
10
|
+
import org.apache.commons.csv.CSVPrinter;
|
11
|
+
import org.embulk.config.TaskReport;
|
12
|
+
import org.embulk.output.kintone.KintoneOutputPlugin;
|
13
|
+
import org.embulk.spi.ColumnVisitor;
|
14
|
+
import org.embulk.spi.Exec;
|
15
|
+
import org.embulk.spi.Page;
|
16
|
+
import org.embulk.spi.PageReader;
|
17
|
+
import org.embulk.spi.Schema;
|
18
|
+
import org.embulk.spi.TransactionalPageOutput;
|
19
|
+
import org.slf4j.Logger;
|
20
|
+
import org.slf4j.LoggerFactory;
|
21
|
+
|
22
|
+
public class ReducedPageOutput implements TransactionalPageOutput {
|
23
|
+
private static final Logger LOGGER =
|
24
|
+
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
25
|
+
private final PageReader reader;
|
26
|
+
private final File file;
|
27
|
+
private final CSVPrinter printer;
|
28
|
+
private final ColumnVisitor visitor;
|
29
|
+
|
30
|
+
public ReducedPageOutput(Schema schema, int taskIndex) {
|
31
|
+
reader = new PageReader(schema);
|
32
|
+
file = file(taskIndex);
|
33
|
+
printer = printer(file);
|
34
|
+
visitor = new CSVOutputColumnVisitor(reader, printer);
|
35
|
+
}
|
36
|
+
|
37
|
+
@Override
|
38
|
+
public void add(Page page) {
|
39
|
+
reader.setPage(page);
|
40
|
+
while (reader.nextRecord()) visitColumns();
|
41
|
+
}
|
42
|
+
|
43
|
+
@Override
|
44
|
+
public void finish() {}
|
45
|
+
|
46
|
+
@Override
|
47
|
+
public void close() {
|
48
|
+
reader.close();
|
49
|
+
close(printer);
|
50
|
+
}
|
51
|
+
|
52
|
+
@Override
|
53
|
+
public void abort() {}
|
54
|
+
|
55
|
+
@Override
|
56
|
+
public TaskReport commit() {
|
57
|
+
return Exec.newTaskReport().set("path", file.getPath());
|
58
|
+
}
|
59
|
+
|
60
|
+
private void visitColumns() {
|
61
|
+
reader.getSchema().visitColumns(visitor);
|
62
|
+
println(printer);
|
63
|
+
}
|
64
|
+
|
65
|
+
private static File file(int taskIndex) {
|
66
|
+
try {
|
67
|
+
return File.createTempFile(
|
68
|
+
String.format("%s.", KintoneOutputPlugin.class.getName()),
|
69
|
+
String.format(".%d", taskIndex));
|
70
|
+
} catch (IOException e) {
|
71
|
+
throw new ReduceException(e);
|
72
|
+
}
|
73
|
+
}
|
74
|
+
|
75
|
+
private static CSVPrinter printer(File file) {
|
76
|
+
try {
|
77
|
+
return new CSVPrinter(
|
78
|
+
new OutputStreamWriter(Files.newOutputStream(file.toPath()), StandardCharsets.UTF_8),
|
79
|
+
Reducer.FORMAT);
|
80
|
+
} catch (IOException e) {
|
81
|
+
throw new ReduceException(e);
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
private static void println(CSVPrinter printer) {
|
86
|
+
try {
|
87
|
+
printer.println();
|
88
|
+
} catch (IOException e) {
|
89
|
+
throw new ReduceException(e);
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
private static void close(Closeable closeable) {
|
94
|
+
try {
|
95
|
+
closeable.close();
|
96
|
+
} catch (IOException e) {
|
97
|
+
LOGGER.warn("close error", e);
|
98
|
+
}
|
99
|
+
}
|
100
|
+
}
|
@@ -0,0 +1,355 @@
|
|
1
|
+
package org.embulk.output.kintone.reducer;
|
2
|
+
|
3
|
+
import com.google.code.externalsorting.csv.CsvExternalSort;
|
4
|
+
import com.google.code.externalsorting.csv.CsvSortOptions;
|
5
|
+
import java.io.File;
|
6
|
+
import java.io.IOException;
|
7
|
+
import java.io.OutputStream;
|
8
|
+
import java.lang.invoke.MethodHandles;
|
9
|
+
import java.nio.charset.StandardCharsets;
|
10
|
+
import java.nio.file.Files;
|
11
|
+
import java.nio.file.Path;
|
12
|
+
import java.util.ArrayList;
|
13
|
+
import java.util.Collections;
|
14
|
+
import java.util.Comparator;
|
15
|
+
import java.util.List;
|
16
|
+
import java.util.Objects;
|
17
|
+
import java.util.concurrent.atomic.AtomicInteger;
|
18
|
+
import java.util.function.Function;
|
19
|
+
import java.util.function.Predicate;
|
20
|
+
import java.util.stream.Collectors;
|
21
|
+
import java.util.stream.IntStream;
|
22
|
+
import org.apache.commons.csv.CSVFormat;
|
23
|
+
import org.apache.commons.csv.CSVParser;
|
24
|
+
import org.apache.commons.csv.CSVRecord;
|
25
|
+
import org.apache.commons.csv.QuoteMode;
|
26
|
+
import org.embulk.config.ConfigDiff;
|
27
|
+
import org.embulk.config.TaskReport;
|
28
|
+
import org.embulk.output.kintone.KintoneColumnOption;
|
29
|
+
import org.embulk.output.kintone.KintoneColumnType;
|
30
|
+
import org.embulk.output.kintone.KintoneOutputPlugin;
|
31
|
+
import org.embulk.output.kintone.KintonePageOutput;
|
32
|
+
import org.embulk.output.kintone.KintoneSortColumn;
|
33
|
+
import org.embulk.output.kintone.PluginTask;
|
34
|
+
import org.embulk.spi.Column;
|
35
|
+
import org.embulk.spi.Exec;
|
36
|
+
import org.embulk.spi.PageBuilder;
|
37
|
+
import org.embulk.spi.Schema;
|
38
|
+
import org.embulk.spi.json.JsonParser;
|
39
|
+
import org.embulk.spi.type.Type;
|
40
|
+
import org.embulk.spi.type.Types;
|
41
|
+
import org.msgpack.value.ArrayValue;
|
42
|
+
import org.msgpack.value.MapValue;
|
43
|
+
import org.msgpack.value.Value;
|
44
|
+
import org.msgpack.value.ValueFactory;
|
45
|
+
import org.slf4j.Logger;
|
46
|
+
import org.slf4j.LoggerFactory;
|
47
|
+
|
48
|
+
public class Reducer {
|
49
|
+
protected static final CSVFormat FORMAT =
|
50
|
+
CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.ALL_NON_NULL).build();
|
51
|
+
protected static final JsonParser PARSER = new JsonParser();
|
52
|
+
private static final Logger LOGGER =
|
53
|
+
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
54
|
+
private final PluginTask task;
|
55
|
+
private final List<Integer> indices;
|
56
|
+
private final int size;
|
57
|
+
private final Schema schema;
|
58
|
+
|
59
|
+
public Reducer(PluginTask task, Schema schema) {
|
60
|
+
this.task = task;
|
61
|
+
indices =
|
62
|
+
schema.getColumns().stream()
|
63
|
+
.filter(column -> !column.getName().matches("^.*\\..*$"))
|
64
|
+
.map(Column::getIndex)
|
65
|
+
.collect(Collectors.toList());
|
66
|
+
size = schema.size();
|
67
|
+
this.schema = schema(task, schema);
|
68
|
+
this.task.setDerivedColumns(
|
69
|
+
range().mapToObj(this.schema::getColumn).collect(Collectors.toSet()));
|
70
|
+
}
|
71
|
+
|
72
|
+
public ConfigDiff reduce(List<TaskReport> taskReports, Column column) {
|
73
|
+
File merged = file(".merged");
|
74
|
+
merge(taskReports, merged);
|
75
|
+
File sorted = file(".sorted");
|
76
|
+
sort(merged, sorted, sortOptions(task, schema, column));
|
77
|
+
AtomicInteger reduced = new AtomicInteger();
|
78
|
+
try (CSVParser parser = parser(sorted);
|
79
|
+
PageBuilder builder = builder(task, schema)) {
|
80
|
+
addRecords(column, reduced, parser, builder);
|
81
|
+
} catch (IOException e) {
|
82
|
+
throw new ReduceException(e);
|
83
|
+
}
|
84
|
+
if (reduced.get() % task.getChunkSize() != 0) {
|
85
|
+
LOGGER.info(String.format("Number of records reduced: %d", reduced.get()));
|
86
|
+
}
|
87
|
+
return Exec.newConfigDiff();
|
88
|
+
}
|
89
|
+
|
90
|
+
private void addRecords(
|
91
|
+
Column column, AtomicInteger reduced, CSVParser parser, PageBuilder builder) {
|
92
|
+
List<String> values = null;
|
93
|
+
for (CSVRecord record : parser) {
|
94
|
+
values = addRecord(column, reduced, builder, values, record);
|
95
|
+
}
|
96
|
+
if (values != null) {
|
97
|
+
addRecord(column, reduced, builder, values, null);
|
98
|
+
}
|
99
|
+
builder.finish();
|
100
|
+
}
|
101
|
+
|
102
|
+
private List<String> addRecord(
|
103
|
+
Column column,
|
104
|
+
AtomicInteger reduced,
|
105
|
+
PageBuilder builder,
|
106
|
+
List<String> values,
|
107
|
+
CSVRecord record) {
|
108
|
+
if (values == null && record == null) {
|
109
|
+
return null;
|
110
|
+
}
|
111
|
+
if (values == null) {
|
112
|
+
return values(record);
|
113
|
+
}
|
114
|
+
int index = column.getIndex();
|
115
|
+
if (record != null
|
116
|
+
&& values.get(index) != null
|
117
|
+
&& record.get(index) != null
|
118
|
+
&& values.get(index).equals(record.get(index))) {
|
119
|
+
return values(column, values, record);
|
120
|
+
}
|
121
|
+
schema.visitColumns(new CSVInputColumnVisitor(builder, values(values)));
|
122
|
+
builder.addRecord();
|
123
|
+
reduced.getAndIncrement();
|
124
|
+
if (reduced.get() % task.getChunkSize() == 0) {
|
125
|
+
LOGGER.info(String.format("Number of records reduced: %d", reduced.get()));
|
126
|
+
}
|
127
|
+
return record == null ? null : values(record);
|
128
|
+
}
|
129
|
+
|
130
|
+
private List<String> values(CSVRecord record) {
|
131
|
+
List<String> values = new ArrayList<>(record.toList());
|
132
|
+
range().forEach(index -> values.add(value(record, index).toJson()));
|
133
|
+
return values;
|
134
|
+
}
|
135
|
+
|
136
|
+
private ArrayValue value(CSVRecord record, int index) {
|
137
|
+
ValueFactory.MapBuilder builder = ValueFactory.newMapBuilder();
|
138
|
+
String name = schema.getColumnName(index);
|
139
|
+
Predicate<Column> isId = column -> column.getName().equals(String.format("%s.$id", name));
|
140
|
+
Long id =
|
141
|
+
schema.getColumns().stream()
|
142
|
+
.filter(isId)
|
143
|
+
.findFirst()
|
144
|
+
.map(column -> record.get(column.getIndex()))
|
145
|
+
.filter(value -> !value.isEmpty())
|
146
|
+
.map(Long::parseLong)
|
147
|
+
.orElse(null);
|
148
|
+
Predicate<Column> predicate =
|
149
|
+
column -> column.getName().matches(String.format("^%s\\..*$", name));
|
150
|
+
Function<Column, String> function =
|
151
|
+
column -> column.getName().replaceFirst(String.format("^%s\\.", name), "");
|
152
|
+
schema.getColumns().stream()
|
153
|
+
.filter(isId.negate().and(predicate))
|
154
|
+
.forEach(column -> builder.put(key(function, column), value(record, column)));
|
155
|
+
MapValue value = builder.build();
|
156
|
+
return id == null && ReduceType.isEmpty(value)
|
157
|
+
? ValueFactory.emptyArray()
|
158
|
+
: ValueFactory.newArray(ReduceType.value(id, value, sortValue(record, index)));
|
159
|
+
}
|
160
|
+
|
161
|
+
private Value key(Function<Column, String> function, Column column) {
|
162
|
+
KintoneColumnOption option = task.getColumnOptions().get(column.getName());
|
163
|
+
return ReduceType.value(option != null ? option.getFieldCode() : function.apply(column));
|
164
|
+
}
|
165
|
+
|
166
|
+
private MapValue value(CSVRecord record, Column column) {
|
167
|
+
return ReduceType.value(column, record.toList(), task.getColumnOptions().get(column.getName()));
|
168
|
+
}
|
169
|
+
|
170
|
+
private MapValue sortValue(CSVRecord record, int index) {
|
171
|
+
ValueFactory.MapBuilder builder = ValueFactory.newMapBuilder();
|
172
|
+
String name = schema.getColumnName(index);
|
173
|
+
Function<KintoneSortColumn, Column> column = sortColumn -> lookupColumn(name, sortColumn);
|
174
|
+
Function<KintoneSortColumn, Value> key = sortColumn -> ReduceType.value(sortColumn.getName());
|
175
|
+
Function<KintoneSortColumn, Value> value =
|
176
|
+
sortColumn -> ReduceType.value(record.get(column.apply(sortColumn).getIndex()));
|
177
|
+
getSortColumns(index)
|
178
|
+
.forEach(sortColumn -> builder.put(key.apply(sortColumn), value.apply(sortColumn)));
|
179
|
+
return builder.build();
|
180
|
+
}
|
181
|
+
|
182
|
+
private List<String> values(Column column, List<String> values, CSVRecord record) {
|
183
|
+
if (!indices.stream().allMatch(index -> Objects.equals(values.get(index), record.get(index)))) {
|
184
|
+
throw new ReduceException(
|
185
|
+
String.format(
|
186
|
+
"Couldn't reduce because column %s is not unique to %s\n%s expected %s but actual %s",
|
187
|
+
column.getName(),
|
188
|
+
range().mapToObj(schema::getColumnName).collect(Collectors.toList()),
|
189
|
+
indices.stream().map(schema::getColumnName).collect(Collectors.toList()),
|
190
|
+
indices.stream().map(values::get).collect(Collectors.toList()),
|
191
|
+
indices.stream().map(record::get).collect(Collectors.toList())));
|
192
|
+
}
|
193
|
+
range().forEach(index -> values.set(index, value(values, record, index).toJson()));
|
194
|
+
return values;
|
195
|
+
}
|
196
|
+
|
197
|
+
private ArrayValue value(List<String> values, CSVRecord record, int index) {
|
198
|
+
List<Value> list = new ArrayList<>(list(values, index));
|
199
|
+
list.addAll(value(record, index).list());
|
200
|
+
return list.isEmpty() ? ValueFactory.emptyArray() : ValueFactory.newArray(list);
|
201
|
+
}
|
202
|
+
|
203
|
+
private List<String> values(List<String> values) {
|
204
|
+
range().forEach(index -> values.set(index, value(values, index).toJson()));
|
205
|
+
return values;
|
206
|
+
}
|
207
|
+
|
208
|
+
private ArrayValue value(List<String> values, int index) {
|
209
|
+
List<Value> list =
|
210
|
+
list(values, index).stream()
|
211
|
+
.sorted(comparator(index))
|
212
|
+
.map(ReduceType::value)
|
213
|
+
.collect(Collectors.toList());
|
214
|
+
return list.isEmpty() ? ValueFactory.emptyArray() : ValueFactory.newArray(list);
|
215
|
+
}
|
216
|
+
|
217
|
+
private Comparator<Value> comparator(int index) {
|
218
|
+
String name = schema.getColumnName(index);
|
219
|
+
return getSortColumns(index).stream()
|
220
|
+
.map(sortColumn -> comparator(name, sortColumn))
|
221
|
+
.reduce(Comparator::thenComparing)
|
222
|
+
.orElse(Comparator.comparing(value -> 0));
|
223
|
+
}
|
224
|
+
|
225
|
+
private Comparator<Value> comparator(String name, KintoneSortColumn sortColumn) {
|
226
|
+
Column column = lookupColumn(name, sortColumn);
|
227
|
+
return Comparator.comparing(
|
228
|
+
value -> ReduceType.asString(value, sortColumn),
|
229
|
+
Comparator.nullsLast(ReduceType.comparator(column, sortColumn.getOrder())));
|
230
|
+
}
|
231
|
+
|
232
|
+
private List<KintoneSortColumn> getSortColumns(int index) {
|
233
|
+
KintoneColumnOption option = task.getColumnOptions().get(schema.getColumnName(index));
|
234
|
+
return option != null ? option.getSortColumns() : Collections.emptyList();
|
235
|
+
}
|
236
|
+
|
237
|
+
private Column lookupColumn(String name, KintoneSortColumn sortColumn) {
|
238
|
+
return schema.lookupColumn(String.format("%s.%s", name, sortColumn.getName()));
|
239
|
+
}
|
240
|
+
|
241
|
+
private IntStream range() {
|
242
|
+
return IntStream.range(size, schema.size());
|
243
|
+
}
|
244
|
+
|
245
|
+
private static Schema schema(PluginTask task, Schema schema) {
|
246
|
+
Schema.Builder builder = Schema.builder();
|
247
|
+
schema.getColumns().forEach(column -> builder.add(column.getName(), column.getType()));
|
248
|
+
schema.getColumns().stream()
|
249
|
+
.map(Column::getName)
|
250
|
+
.filter(name -> name.matches("^.*\\..*$"))
|
251
|
+
.map(name -> name.replaceFirst("\\..*$", ""))
|
252
|
+
.distinct()
|
253
|
+
.forEach(name -> builder.add(name, type(task, name)));
|
254
|
+
return builder.build();
|
255
|
+
}
|
256
|
+
|
257
|
+
private static Type type(PluginTask task, String name) {
|
258
|
+
return KintoneColumnType.getType(task.getColumnOptions().get(name), KintoneColumnType.SUBTABLE)
|
259
|
+
== KintoneColumnType.SUBTABLE
|
260
|
+
? Types.JSON
|
261
|
+
: Types.STRING;
|
262
|
+
}
|
263
|
+
|
264
|
+
private static File file(String suffix) {
|
265
|
+
try {
|
266
|
+
return File.createTempFile(String.format("%s.", KintoneOutputPlugin.class.getName()), suffix);
|
267
|
+
} catch (IOException e) {
|
268
|
+
throw new ReduceException(e);
|
269
|
+
}
|
270
|
+
}
|
271
|
+
|
272
|
+
private static void merge(List<TaskReport> taskReports, File merged) {
|
273
|
+
try (OutputStream out = Files.newOutputStream(merged.toPath())) {
|
274
|
+
long bytes =
|
275
|
+
taskReports.stream()
|
276
|
+
.map(taskReport -> new File(taskReport.get(String.class, "path")).toPath())
|
277
|
+
.mapToLong(source -> copy(source, out))
|
278
|
+
.sum();
|
279
|
+
LOGGER.info(String.format("Number of bytes merged: %d", bytes));
|
280
|
+
} catch (IOException e) {
|
281
|
+
throw new ReduceException(e);
|
282
|
+
}
|
283
|
+
}
|
284
|
+
|
285
|
+
private static long copy(Path source, OutputStream out) {
|
286
|
+
try {
|
287
|
+
long bytes = Files.copy(source, out);
|
288
|
+
LOGGER.info(String.format("Number of bytes copied: %d", bytes));
|
289
|
+
return bytes;
|
290
|
+
} catch (IOException e) {
|
291
|
+
throw new ReduceException(e);
|
292
|
+
}
|
293
|
+
}
|
294
|
+
|
295
|
+
private static void sort(File merged, File sorted, CsvSortOptions sortOptions) {
|
296
|
+
try {
|
297
|
+
int lines =
|
298
|
+
CsvExternalSort.mergeSortedFiles(
|
299
|
+
CsvExternalSort.sortInBatch(merged, null, sortOptions, new ArrayList<>()),
|
300
|
+
sorted,
|
301
|
+
sortOptions,
|
302
|
+
false,
|
303
|
+
Collections.emptyList());
|
304
|
+
LOGGER.info(String.format("Number of lines sorted: %d", lines));
|
305
|
+
} catch (IOException | ClassNotFoundException e) {
|
306
|
+
throw new ReduceException(e);
|
307
|
+
}
|
308
|
+
}
|
309
|
+
|
310
|
+
private static CsvSortOptions sortOptions(PluginTask task, Schema schema, Column column) {
|
311
|
+
List<KintoneSortColumn> sortColumns = new ArrayList<>();
|
312
|
+
sortColumns.add(new KintoneSortColumn(column.getName(), KintoneSortColumn.Order.ASC));
|
313
|
+
sortColumns.addAll(task.getSortColumns());
|
314
|
+
return new CsvSortOptions.Builder(
|
315
|
+
comparator(schema, sortColumns),
|
316
|
+
task.getMaxSortTmpFiles().orElse(CsvExternalSort.DEFAULTMAXTEMPFILES),
|
317
|
+
task.getMaxSortMemory().orElse(CsvExternalSort.estimateAvailableMemory()))
|
318
|
+
.charset(StandardCharsets.UTF_8)
|
319
|
+
.format(FORMAT)
|
320
|
+
.build();
|
321
|
+
}
|
322
|
+
|
323
|
+
private static Comparator<CSVRecord> comparator(
|
324
|
+
Schema schema, List<KintoneSortColumn> sortColumns) {
|
325
|
+
Function<KintoneSortColumn, Comparator<CSVRecord>> function =
|
326
|
+
sortColumn -> comparator(schema, sortColumn);
|
327
|
+
return sortColumns.stream()
|
328
|
+
.skip(1)
|
329
|
+
.map(function)
|
330
|
+
.reduce(function.apply(sortColumns.get(0)), Comparator::thenComparing);
|
331
|
+
}
|
332
|
+
|
333
|
+
private static Comparator<CSVRecord> comparator(Schema schema, KintoneSortColumn sortColumn) {
|
334
|
+
Column column = schema.lookupColumn(sortColumn.getName());
|
335
|
+
return Comparator.comparing(
|
336
|
+
record -> record.get(column.getIndex()),
|
337
|
+
Comparator.nullsLast(ReduceType.comparator(column, sortColumn.getOrder())));
|
338
|
+
}
|
339
|
+
|
340
|
+
private static CSVParser parser(File sorted) {
|
341
|
+
try {
|
342
|
+
return CSVParser.parse(sorted, StandardCharsets.UTF_8, FORMAT);
|
343
|
+
} catch (IOException e) {
|
344
|
+
throw new ReduceException(e);
|
345
|
+
}
|
346
|
+
}
|
347
|
+
|
348
|
+
private static PageBuilder builder(PluginTask task, Schema schema) {
|
349
|
+
return new PageBuilder(Exec.getBufferAllocator(), schema, new KintonePageOutput(task, schema));
|
350
|
+
}
|
351
|
+
|
352
|
+
private static List<Value> list(List<String> values, int index) {
|
353
|
+
return PARSER.parse(values.get(index)).asArrayValue().list();
|
354
|
+
}
|
355
|
+
}
|
@@ -1,5 +1,7 @@
|
|
1
1
|
package org.embulk.output.kintone;
|
2
2
|
|
3
|
+
import java.util.Collections;
|
4
|
+
import java.util.List;
|
3
5
|
import org.embulk.config.TaskSource;
|
4
6
|
|
5
7
|
public class KintoneColumnOptionBuilder {
|
@@ -50,6 +52,11 @@ public class KintoneColumnOptionBuilder {
|
|
50
52
|
return valueSeparator;
|
51
53
|
}
|
52
54
|
|
55
|
+
@Override
|
56
|
+
public List<KintoneSortColumn> getSortColumns() {
|
57
|
+
return Collections.emptyList();
|
58
|
+
}
|
59
|
+
|
53
60
|
@Override
|
54
61
|
public void validate() {}
|
55
62
|
|