embulk-output-kintone 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +57 -1
  3. data/build.gradle +1 -0
  4. data/classpath/commons-csv-1.9.0.jar +0 -0
  5. data/classpath/embulk-output-kintone-1.1.0.jar +0 -0
  6. data/classpath/externalsortinginjava-0.6.2.jar +0 -0
  7. data/classpath/{shadow-kintone-java-client-1.0.0-all.jar → shadow-kintone-java-client-1.1.0-all.jar} +0 -0
  8. data/src/main/java/org/embulk/output/kintone/KintoneColumnOption.java +5 -0
  9. data/src/main/java/org/embulk/output/kintone/KintoneColumnType.java +209 -5
  10. data/src/main/java/org/embulk/output/kintone/KintoneColumnVisitor.java +28 -9
  11. data/src/main/java/org/embulk/output/kintone/KintoneOutputPlugin.java +12 -3
  12. data/src/main/java/org/embulk/output/kintone/KintonePageOutput.java +20 -12
  13. data/src/main/java/org/embulk/output/kintone/KintoneSortColumn.java +33 -0
  14. data/src/main/java/org/embulk/output/kintone/PluginTask.java +27 -0
  15. data/src/main/java/org/embulk/output/kintone/deserializer/DeserializeApplier.java +19 -0
  16. data/src/main/java/org/embulk/output/kintone/deserializer/DeserializeException.java +7 -0
  17. data/src/main/java/org/embulk/output/kintone/deserializer/Deserializer.java +279 -0
  18. data/src/main/java/org/embulk/output/kintone/reducer/CSVInputColumnVisitor.java +78 -0
  19. data/src/main/java/org/embulk/output/kintone/reducer/CSVOutputColumnVisitor.java +79 -0
  20. data/src/main/java/org/embulk/output/kintone/reducer/ReduceException.java +11 -0
  21. data/src/main/java/org/embulk/output/kintone/reducer/ReduceType.java +190 -0
  22. data/src/main/java/org/embulk/output/kintone/reducer/ReducedPageOutput.java +100 -0
  23. data/src/main/java/org/embulk/output/kintone/reducer/Reducer.java +355 -0
  24. data/src/test/java/org/embulk/output/kintone/KintoneColumnOptionBuilder.java +7 -0
  25. data/src/test/java/org/embulk/output/kintone/KintoneColumnTypeTest.java +194 -0
  26. data/src/test/java/org/embulk/output/kintone/KintoneColumnVisitorTest.java +153 -34
  27. data/src/test/java/org/embulk/output/kintone/KintoneColumnVisitorVerifier.java +13 -3
  28. data/src/test/java/org/embulk/output/kintone/KintonePageOutputVerifier.java +44 -1
  29. data/src/test/java/org/embulk/output/kintone/TestKintoneOutputPlugin.java +89 -12
  30. data/src/test/java/org/embulk/output/kintone/TestTaskReduce.java +46 -0
  31. data/src/test/java/org/embulk/output/kintone/TestTaskReduceException.java +50 -0
  32. data/src/test/java/org/embulk/output/kintone/TestTaskReduceSubtable.java +46 -0
  33. data/src/test/java/org/embulk/output/kintone/deserializer/DeserializerTest.java +165 -0
  34. data/src/test/java/org/embulk/output/kintone/reducer/ReduceTypeTest.java +154 -0
  35. data/src/test/resources/org/embulk/output/kintone/task/config.yml +1 -1
  36. data/src/test/resources/org/embulk/output/kintone/task/mode/config.yml +6 -0
  37. data/src/test/resources/org/embulk/output/kintone/task/mode/input.csv +7 -7
  38. data/src/test/resources/org/embulk/output/kintone/task/mode/insert_add_ignore_nulls_records.jsonl +2 -2
  39. data/src/test/resources/org/embulk/output/kintone/task/mode/insert_add_prefer_nulls_records.jsonl +6 -6
  40. data/src/test/resources/org/embulk/output/kintone/task/mode/insert_add_records.jsonl +6 -6
  41. data/src/test/resources/org/embulk/output/kintone/task/mode/update_update_ignore_nulls_records.jsonl +2 -2
  42. data/src/test/resources/org/embulk/output/kintone/task/mode/update_update_prefer_nulls_records.jsonl +3 -3
  43. data/src/test/resources/org/embulk/output/kintone/task/mode/update_update_records.jsonl +6 -6
  44. data/src/test/resources/org/embulk/output/kintone/task/mode/upsert_add_prefer_nulls_records.jsonl +3 -3
  45. data/src/test/resources/org/embulk/output/kintone/task/mode/upsert_add_records.jsonl +2 -2
  46. data/src/test/resources/org/embulk/output/kintone/task/mode/upsert_update_ignore_nulls_records.jsonl +2 -2
  47. data/src/test/resources/org/embulk/output/kintone/task/mode/upsert_update_prefer_nulls_records.jsonl +3 -3
  48. data/src/test/resources/org/embulk/output/kintone/task/mode/upsert_update_records.jsonl +4 -4
  49. data/src/test/resources/org/embulk/output/kintone/task/reduce/config.yml +171 -0
  50. data/src/test/resources/org/embulk/output/kintone/task/reduce/input.csv +7 -0
  51. data/src/test/resources/org/embulk/output/kintone/task/reduce/insert_add_ignore_nulls_records.jsonl +6 -0
  52. data/src/test/resources/org/embulk/output/kintone/task/reduce/insert_add_prefer_nulls_records.jsonl +6 -0
  53. data/src/test/resources/org/embulk/output/kintone/task/reduce/insert_add_records.jsonl +6 -0
  54. data/src/test/resources/org/embulk/output/kintone/task/reduce/update_update_ignore_nulls_records.jsonl +3 -0
  55. data/src/test/resources/org/embulk/output/kintone/task/reduce/update_update_prefer_nulls_records.jsonl +3 -0
  56. data/src/test/resources/org/embulk/output/kintone/task/reduce/update_update_records.jsonl +6 -0
  57. data/src/test/resources/org/embulk/output/kintone/task/reduce/upsert_add_ignore_nulls_records.jsonl +3 -0
  58. data/src/test/resources/org/embulk/output/kintone/task/reduce/upsert_add_prefer_nulls_records.jsonl +3 -0
  59. data/src/test/resources/org/embulk/output/kintone/task/reduce/upsert_add_records.jsonl +2 -0
  60. data/src/test/resources/org/embulk/output/kintone/task/reduce/upsert_update_ignore_nulls_records.jsonl +3 -0
  61. data/src/test/resources/org/embulk/output/kintone/task/reduce/upsert_update_prefer_nulls_records.jsonl +3 -0
  62. data/src/test/resources/org/embulk/output/kintone/task/reduce/upsert_update_records.jsonl +4 -0
  63. data/src/test/resources/org/embulk/output/kintone/task/reduce/values.json +1 -0
  64. data/src/test/resources/org/embulk/output/kintone/task/reduce/values_ignore_nulls.json +1 -0
  65. data/src/test/resources/org/embulk/output/kintone/task/reduce/values_prefer_nulls.json +1 -0
  66. data/src/test/resources/org/embulk/output/kintone/task/reduce_exception/config.yml +36 -0
  67. data/src/test/resources/org/embulk/output/kintone/task/reduce_exception/derived_columns.json +1 -0
  68. data/src/test/resources/org/embulk/output/kintone/task/reduce_exception/input.csv +13 -0
  69. data/src/test/resources/org/embulk/output/kintone/task/reduce_exception/insert_add_records.jsonl +2 -0
  70. data/src/test/resources/org/embulk/output/kintone/task/reduce_exception/update_update_records.jsonl +2 -0
  71. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/config.yml +343 -0
  72. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/derived_columns.json +1 -0
  73. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/input.csv +13 -0
  74. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/insert_add_ignore_nulls_records.jsonl +6 -0
  75. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/insert_add_prefer_nulls_records.jsonl +6 -0
  76. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/insert_add_records.jsonl +6 -0
  77. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/update_update_ignore_nulls_records.jsonl +3 -0
  78. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/update_update_prefer_nulls_records.jsonl +3 -0
  79. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/update_update_records.jsonl +6 -0
  80. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/upsert_add_ignore_nulls_records.jsonl +3 -0
  81. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/upsert_add_prefer_nulls_records.jsonl +3 -0
  82. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/upsert_add_records.jsonl +0 -0
  83. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/upsert_update_ignore_nulls_records.jsonl +3 -0
  84. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/upsert_update_prefer_nulls_records.jsonl +3 -0
  85. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/upsert_update_records.jsonl +6 -0
  86. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/values.json +1 -0
  87. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/values_ignore_nulls.json +1 -0
  88. data/src/test/resources/org/embulk/output/kintone/task/reduce_subtable/values_prefer_nulls.json +1 -0
  89. metadata +62 -4
  90. data/classpath/embulk-output-kintone-1.0.0.jar +0 -0
@@ -0,0 +1,190 @@
1
+ package org.embulk.output.kintone.reducer;
2
+
3
+ import java.time.Instant;
4
+ import java.util.ArrayList;
5
+ import java.util.Comparator;
6
+ import java.util.List;
7
+ import java.util.Map;
8
+ import java.util.function.Supplier;
9
+ import org.embulk.output.kintone.KintoneColumnOption;
10
+ import org.embulk.output.kintone.KintoneColumnType;
11
+ import org.embulk.output.kintone.KintoneSortColumn;
12
+ import org.embulk.spi.Column;
13
+ import org.embulk.spi.time.Timestamp;
14
+ import org.msgpack.value.ArrayValue;
15
+ import org.msgpack.value.MapValue;
16
+ import org.msgpack.value.Value;
17
+ import org.msgpack.value.ValueFactory;
18
+
19
+ public enum ReduceType {
20
+ BOOLEAN {
21
+ @Override
22
+ public MapValue value(String value, KintoneColumnOption option) {
23
+ KintoneColumnType type = KintoneColumnType.getType(option, KintoneColumnType.NUMBER);
24
+ Supplier<Value> supplier =
25
+ () -> type.asValue(type.getFieldValue(Boolean.parseBoolean(value), option));
26
+ return value(type, value, supplier);
27
+ }
28
+
29
+ @Override
30
+ public Comparator<String> comparator(KintoneSortColumn.Order order) {
31
+ return Comparator.comparing(Boolean::parseBoolean, order(order));
32
+ }
33
+ },
34
+ LONG {
35
+ @Override
36
+ public MapValue value(String value, KintoneColumnOption option) {
37
+ KintoneColumnType type = KintoneColumnType.getType(option, KintoneColumnType.NUMBER);
38
+ Supplier<Value> supplier =
39
+ () -> type.asValue(type.getFieldValue(Long.parseLong(value), option));
40
+ return value(type, value, supplier);
41
+ }
42
+
43
+ @Override
44
+ public Comparator<String> comparator(KintoneSortColumn.Order order) {
45
+ return Comparator.comparing(Long::parseLong, order(order));
46
+ }
47
+ },
48
+ DOUBLE {
49
+ @Override
50
+ public MapValue value(String value, KintoneColumnOption option) {
51
+ KintoneColumnType type = KintoneColumnType.getType(option, KintoneColumnType.NUMBER);
52
+ Supplier<Value> supplier =
53
+ () -> type.asValue(type.getFieldValue(Double.parseDouble(value), option));
54
+ return value(type, value, supplier);
55
+ }
56
+
57
+ @Override
58
+ public Comparator<String> comparator(KintoneSortColumn.Order order) {
59
+ return Comparator.comparing(Double::parseDouble, order(order));
60
+ }
61
+ },
62
+ STRING {
63
+ @Override
64
+ public MapValue value(String value, KintoneColumnOption option) {
65
+ KintoneColumnType type = KintoneColumnType.getType(option, KintoneColumnType.MULTI_LINE_TEXT);
66
+ Supplier<Value> supplier = () -> type.asValue(type.getFieldValue(value, option));
67
+ return value(type, value, supplier);
68
+ }
69
+
70
+ @Override
71
+ public Comparator<String> comparator(KintoneSortColumn.Order order) {
72
+ return order(order);
73
+ }
74
+ },
75
+ TIMESTAMP {
76
+ @Override
77
+ public MapValue value(String value, KintoneColumnOption option) {
78
+ KintoneColumnType type = KintoneColumnType.getType(option, KintoneColumnType.DATETIME);
79
+ Supplier<Value> supplier =
80
+ () -> type.asValue(type.getFieldValue(Timestamp.ofInstant(Instant.parse(value)), option));
81
+ return value(type, value, supplier);
82
+ }
83
+
84
+ @Override
85
+ public Comparator<String> comparator(KintoneSortColumn.Order order) {
86
+ return Comparator.comparing(Instant::parse, order(order));
87
+ }
88
+ },
89
+ JSON {
90
+ @Override
91
+ public MapValue value(String value, KintoneColumnOption option) {
92
+ KintoneColumnType type = KintoneColumnType.getType(option, KintoneColumnType.MULTI_LINE_TEXT);
93
+ Supplier<Value> supplier =
94
+ () -> type.asValue(type.getFieldValue(Reducer.PARSER.parse(value), option));
95
+ return value(type, value, supplier);
96
+ }
97
+
98
+ @Override
99
+ public Comparator<String> comparator(KintoneSortColumn.Order order) {
100
+ return order(order);
101
+ }
102
+ };
103
+ private static final Value NIL = ValueFactory.newNil();
104
+ private static final Value ID = ValueFactory.newString("id");
105
+ private static final Value TYPE = ValueFactory.newString("type");
106
+ private static final Value VALUE = ValueFactory.newString("value");
107
+ private static final Value KEY_SET = ValueFactory.newString("$$key_set");
108
+ private static final Value SORT_VALUE = ValueFactory.newString("$$sort_value");
109
+
110
+ public abstract MapValue value(String value, KintoneColumnOption option);
111
+
112
+ public abstract Comparator<String> comparator(KintoneSortColumn.Order order);
113
+
114
+ public static Comparator<String> comparator(Column column, KintoneSortColumn.Order order) {
115
+ return valueOf(column).comparator(order);
116
+ }
117
+
118
+ public static String asString(Value value, KintoneSortColumn sortColumn) {
119
+ return asString(sortValue(value).map().get(value(sortColumn.getName())));
120
+ }
121
+
122
+ public static boolean isEmpty(MapValue value) {
123
+ return value.values().stream()
124
+ .map(Value::asMapValue)
125
+ .map(MapValue::map)
126
+ .map(map -> map.get(VALUE))
127
+ .allMatch(Value::isNilValue);
128
+ }
129
+
130
+ public static Value value(String value) {
131
+ return value == null ? NIL : ValueFactory.newString(value);
132
+ }
133
+
134
+ public static MapValue value(Value value) {
135
+ ValueFactory.MapBuilder builder = ValueFactory.newMapBuilder();
136
+ Map<Value, Value> map = value.asMapValue().map();
137
+ builder.put(ID, map.get(ID));
138
+ builder.put(VALUE, value(map.get(VALUE).asMapValue().map(), map.get(KEY_SET).asArrayValue()));
139
+ return builder.build();
140
+ }
141
+
142
+ public static MapValue value(Long id, MapValue value, MapValue sortValue) {
143
+ ValueFactory.MapBuilder builder = ValueFactory.newMapBuilder();
144
+ builder.put(ID, id == null ? NIL : ValueFactory.newString(id.toString()));
145
+ builder.put(VALUE, value == null ? ValueFactory.emptyMap() : value);
146
+ builder.put(KEY_SET, value == null ? ValueFactory.emptyArray() : keySet(value));
147
+ builder.put(SORT_VALUE, sortValue == null ? ValueFactory.emptyMap() : sortValue);
148
+ return builder.build();
149
+ }
150
+
151
+ public static MapValue value(Column column, List<String> values, KintoneColumnOption option) {
152
+ return valueOf(column).value(values.get(column.getIndex()), option);
153
+ }
154
+
155
+ protected static MapValue value(KintoneColumnType type, String value, Supplier<Value> supplier) {
156
+ ValueFactory.MapBuilder builder = ValueFactory.newMapBuilder();
157
+ builder.put(TYPE, value(type.name()));
158
+ builder.put(VALUE, value == null ? NIL : supplier.get());
159
+ return builder.build();
160
+ }
161
+
162
+ private static <T extends Comparable<? super T>> Comparator<T> order(
163
+ KintoneSortColumn.Order order) {
164
+ return order == KintoneSortColumn.Order.DESC
165
+ ? Comparator.reverseOrder()
166
+ : Comparator.naturalOrder();
167
+ }
168
+
169
+ private static String asString(Value value) {
170
+ return value.isNilValue() ? null : value.asStringValue().asString();
171
+ }
172
+
173
+ private static MapValue sortValue(Value value) {
174
+ return value.asMapValue().map().get(SORT_VALUE).asMapValue();
175
+ }
176
+
177
+ private static MapValue value(Map<Value, Value> map, ArrayValue keySet) {
178
+ ValueFactory.MapBuilder builder = ValueFactory.newMapBuilder();
179
+ keySet.forEach(key -> builder.put(key, map.get(key)));
180
+ return builder.build();
181
+ }
182
+
183
+ private static ArrayValue keySet(MapValue value) {
184
+ return ValueFactory.newArray(new ArrayList<>(value.asMapValue().keySet()));
185
+ }
186
+
187
+ private static ReduceType valueOf(Column column) {
188
+ return valueOf(column.getType().getName().toUpperCase());
189
+ }
190
+ }
@@ -0,0 +1,100 @@
1
+ package org.embulk.output.kintone.reducer;
2
+
3
+ import java.io.Closeable;
4
+ import java.io.File;
5
+ import java.io.IOException;
6
+ import java.io.OutputStreamWriter;
7
+ import java.lang.invoke.MethodHandles;
8
+ import java.nio.charset.StandardCharsets;
9
+ import java.nio.file.Files;
10
+ import org.apache.commons.csv.CSVPrinter;
11
+ import org.embulk.config.TaskReport;
12
+ import org.embulk.output.kintone.KintoneOutputPlugin;
13
+ import org.embulk.spi.ColumnVisitor;
14
+ import org.embulk.spi.Exec;
15
+ import org.embulk.spi.Page;
16
+ import org.embulk.spi.PageReader;
17
+ import org.embulk.spi.Schema;
18
+ import org.embulk.spi.TransactionalPageOutput;
19
+ import org.slf4j.Logger;
20
+ import org.slf4j.LoggerFactory;
21
+
22
+ public class ReducedPageOutput implements TransactionalPageOutput {
23
+ private static final Logger LOGGER =
24
+ LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
25
+ private final PageReader reader;
26
+ private final File file;
27
+ private final CSVPrinter printer;
28
+ private final ColumnVisitor visitor;
29
+
30
+ public ReducedPageOutput(Schema schema, int taskIndex) {
31
+ reader = new PageReader(schema);
32
+ file = file(taskIndex);
33
+ printer = printer(file);
34
+ visitor = new CSVOutputColumnVisitor(reader, printer);
35
+ }
36
+
37
+ @Override
38
+ public void add(Page page) {
39
+ reader.setPage(page);
40
+ while (reader.nextRecord()) visitColumns();
41
+ }
42
+
43
+ @Override
44
+ public void finish() {}
45
+
46
+ @Override
47
+ public void close() {
48
+ reader.close();
49
+ close(printer);
50
+ }
51
+
52
+ @Override
53
+ public void abort() {}
54
+
55
+ @Override
56
+ public TaskReport commit() {
57
+ return Exec.newTaskReport().set("path", file.getPath());
58
+ }
59
+
60
+ private void visitColumns() {
61
+ reader.getSchema().visitColumns(visitor);
62
+ println(printer);
63
+ }
64
+
65
+ private static File file(int taskIndex) {
66
+ try {
67
+ return File.createTempFile(
68
+ String.format("%s.", KintoneOutputPlugin.class.getName()),
69
+ String.format(".%d", taskIndex));
70
+ } catch (IOException e) {
71
+ throw new ReduceException(e);
72
+ }
73
+ }
74
+
75
+ private static CSVPrinter printer(File file) {
76
+ try {
77
+ return new CSVPrinter(
78
+ new OutputStreamWriter(Files.newOutputStream(file.toPath()), StandardCharsets.UTF_8),
79
+ Reducer.FORMAT);
80
+ } catch (IOException e) {
81
+ throw new ReduceException(e);
82
+ }
83
+ }
84
+
85
+ private static void println(CSVPrinter printer) {
86
+ try {
87
+ printer.println();
88
+ } catch (IOException e) {
89
+ throw new ReduceException(e);
90
+ }
91
+ }
92
+
93
+ private static void close(Closeable closeable) {
94
+ try {
95
+ closeable.close();
96
+ } catch (IOException e) {
97
+ LOGGER.warn("close error", e);
98
+ }
99
+ }
100
+ }
@@ -0,0 +1,355 @@
1
+ package org.embulk.output.kintone.reducer;
2
+
3
+ import com.google.code.externalsorting.csv.CsvExternalSort;
4
+ import com.google.code.externalsorting.csv.CsvSortOptions;
5
+ import java.io.File;
6
+ import java.io.IOException;
7
+ import java.io.OutputStream;
8
+ import java.lang.invoke.MethodHandles;
9
+ import java.nio.charset.StandardCharsets;
10
+ import java.nio.file.Files;
11
+ import java.nio.file.Path;
12
+ import java.util.ArrayList;
13
+ import java.util.Collections;
14
+ import java.util.Comparator;
15
+ import java.util.List;
16
+ import java.util.Objects;
17
+ import java.util.concurrent.atomic.AtomicInteger;
18
+ import java.util.function.Function;
19
+ import java.util.function.Predicate;
20
+ import java.util.stream.Collectors;
21
+ import java.util.stream.IntStream;
22
+ import org.apache.commons.csv.CSVFormat;
23
+ import org.apache.commons.csv.CSVParser;
24
+ import org.apache.commons.csv.CSVRecord;
25
+ import org.apache.commons.csv.QuoteMode;
26
+ import org.embulk.config.ConfigDiff;
27
+ import org.embulk.config.TaskReport;
28
+ import org.embulk.output.kintone.KintoneColumnOption;
29
+ import org.embulk.output.kintone.KintoneColumnType;
30
+ import org.embulk.output.kintone.KintoneOutputPlugin;
31
+ import org.embulk.output.kintone.KintonePageOutput;
32
+ import org.embulk.output.kintone.KintoneSortColumn;
33
+ import org.embulk.output.kintone.PluginTask;
34
+ import org.embulk.spi.Column;
35
+ import org.embulk.spi.Exec;
36
+ import org.embulk.spi.PageBuilder;
37
+ import org.embulk.spi.Schema;
38
+ import org.embulk.spi.json.JsonParser;
39
+ import org.embulk.spi.type.Type;
40
+ import org.embulk.spi.type.Types;
41
+ import org.msgpack.value.ArrayValue;
42
+ import org.msgpack.value.MapValue;
43
+ import org.msgpack.value.Value;
44
+ import org.msgpack.value.ValueFactory;
45
+ import org.slf4j.Logger;
46
+ import org.slf4j.LoggerFactory;
47
+
48
+ public class Reducer {
49
+ protected static final CSVFormat FORMAT =
50
+ CSVFormat.DEFAULT.builder().setNullString("").setQuoteMode(QuoteMode.ALL_NON_NULL).build();
51
+ protected static final JsonParser PARSER = new JsonParser();
52
+ private static final Logger LOGGER =
53
+ LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
54
+ private final PluginTask task;
55
+ private final List<Integer> indices;
56
+ private final int size;
57
+ private final Schema schema;
58
+
59
+ public Reducer(PluginTask task, Schema schema) {
60
+ this.task = task;
61
+ indices =
62
+ schema.getColumns().stream()
63
+ .filter(column -> !column.getName().matches("^.*\\..*$"))
64
+ .map(Column::getIndex)
65
+ .collect(Collectors.toList());
66
+ size = schema.size();
67
+ this.schema = schema(task, schema);
68
+ this.task.setDerivedColumns(
69
+ range().mapToObj(this.schema::getColumn).collect(Collectors.toSet()));
70
+ }
71
+
72
+ public ConfigDiff reduce(List<TaskReport> taskReports, Column column) {
73
+ File merged = file(".merged");
74
+ merge(taskReports, merged);
75
+ File sorted = file(".sorted");
76
+ sort(merged, sorted, sortOptions(task, schema, column));
77
+ AtomicInteger reduced = new AtomicInteger();
78
+ try (CSVParser parser = parser(sorted);
79
+ PageBuilder builder = builder(task, schema)) {
80
+ addRecords(column, reduced, parser, builder);
81
+ } catch (IOException e) {
82
+ throw new ReduceException(e);
83
+ }
84
+ if (reduced.get() % task.getChunkSize() != 0) {
85
+ LOGGER.info(String.format("Number of records reduced: %d", reduced.get()));
86
+ }
87
+ return Exec.newConfigDiff();
88
+ }
89
+
90
+ private void addRecords(
91
+ Column column, AtomicInteger reduced, CSVParser parser, PageBuilder builder) {
92
+ List<String> values = null;
93
+ for (CSVRecord record : parser) {
94
+ values = addRecord(column, reduced, builder, values, record);
95
+ }
96
+ if (values != null) {
97
+ addRecord(column, reduced, builder, values, null);
98
+ }
99
+ builder.finish();
100
+ }
101
+
102
+ private List<String> addRecord(
103
+ Column column,
104
+ AtomicInteger reduced,
105
+ PageBuilder builder,
106
+ List<String> values,
107
+ CSVRecord record) {
108
+ if (values == null && record == null) {
109
+ return null;
110
+ }
111
+ if (values == null) {
112
+ return values(record);
113
+ }
114
+ int index = column.getIndex();
115
+ if (record != null
116
+ && values.get(index) != null
117
+ && record.get(index) != null
118
+ && values.get(index).equals(record.get(index))) {
119
+ return values(column, values, record);
120
+ }
121
+ schema.visitColumns(new CSVInputColumnVisitor(builder, values(values)));
122
+ builder.addRecord();
123
+ reduced.getAndIncrement();
124
+ if (reduced.get() % task.getChunkSize() == 0) {
125
+ LOGGER.info(String.format("Number of records reduced: %d", reduced.get()));
126
+ }
127
+ return record == null ? null : values(record);
128
+ }
129
+
130
+ private List<String> values(CSVRecord record) {
131
+ List<String> values = new ArrayList<>(record.toList());
132
+ range().forEach(index -> values.add(value(record, index).toJson()));
133
+ return values;
134
+ }
135
+
136
+ private ArrayValue value(CSVRecord record, int index) {
137
+ ValueFactory.MapBuilder builder = ValueFactory.newMapBuilder();
138
+ String name = schema.getColumnName(index);
139
+ Predicate<Column> isId = column -> column.getName().equals(String.format("%s.$id", name));
140
+ Long id =
141
+ schema.getColumns().stream()
142
+ .filter(isId)
143
+ .findFirst()
144
+ .map(column -> record.get(column.getIndex()))
145
+ .filter(value -> !value.isEmpty())
146
+ .map(Long::parseLong)
147
+ .orElse(null);
148
+ Predicate<Column> predicate =
149
+ column -> column.getName().matches(String.format("^%s\\..*$", name));
150
+ Function<Column, String> function =
151
+ column -> column.getName().replaceFirst(String.format("^%s\\.", name), "");
152
+ schema.getColumns().stream()
153
+ .filter(isId.negate().and(predicate))
154
+ .forEach(column -> builder.put(key(function, column), value(record, column)));
155
+ MapValue value = builder.build();
156
+ return id == null && ReduceType.isEmpty(value)
157
+ ? ValueFactory.emptyArray()
158
+ : ValueFactory.newArray(ReduceType.value(id, value, sortValue(record, index)));
159
+ }
160
+
161
+ private Value key(Function<Column, String> function, Column column) {
162
+ KintoneColumnOption option = task.getColumnOptions().get(column.getName());
163
+ return ReduceType.value(option != null ? option.getFieldCode() : function.apply(column));
164
+ }
165
+
166
+ private MapValue value(CSVRecord record, Column column) {
167
+ return ReduceType.value(column, record.toList(), task.getColumnOptions().get(column.getName()));
168
+ }
169
+
170
+ private MapValue sortValue(CSVRecord record, int index) {
171
+ ValueFactory.MapBuilder builder = ValueFactory.newMapBuilder();
172
+ String name = schema.getColumnName(index);
173
+ Function<KintoneSortColumn, Column> column = sortColumn -> lookupColumn(name, sortColumn);
174
+ Function<KintoneSortColumn, Value> key = sortColumn -> ReduceType.value(sortColumn.getName());
175
+ Function<KintoneSortColumn, Value> value =
176
+ sortColumn -> ReduceType.value(record.get(column.apply(sortColumn).getIndex()));
177
+ getSortColumns(index)
178
+ .forEach(sortColumn -> builder.put(key.apply(sortColumn), value.apply(sortColumn)));
179
+ return builder.build();
180
+ }
181
+
182
+ private List<String> values(Column column, List<String> values, CSVRecord record) {
183
+ if (!indices.stream().allMatch(index -> Objects.equals(values.get(index), record.get(index)))) {
184
+ throw new ReduceException(
185
+ String.format(
186
+ "Couldn't reduce because column %s is not unique to %s\n%s expected %s but actual %s",
187
+ column.getName(),
188
+ range().mapToObj(schema::getColumnName).collect(Collectors.toList()),
189
+ indices.stream().map(schema::getColumnName).collect(Collectors.toList()),
190
+ indices.stream().map(values::get).collect(Collectors.toList()),
191
+ indices.stream().map(record::get).collect(Collectors.toList())));
192
+ }
193
+ range().forEach(index -> values.set(index, value(values, record, index).toJson()));
194
+ return values;
195
+ }
196
+
197
+ private ArrayValue value(List<String> values, CSVRecord record, int index) {
198
+ List<Value> list = new ArrayList<>(list(values, index));
199
+ list.addAll(value(record, index).list());
200
+ return list.isEmpty() ? ValueFactory.emptyArray() : ValueFactory.newArray(list);
201
+ }
202
+
203
+ private List<String> values(List<String> values) {
204
+ range().forEach(index -> values.set(index, value(values, index).toJson()));
205
+ return values;
206
+ }
207
+
208
+ private ArrayValue value(List<String> values, int index) {
209
+ List<Value> list =
210
+ list(values, index).stream()
211
+ .sorted(comparator(index))
212
+ .map(ReduceType::value)
213
+ .collect(Collectors.toList());
214
+ return list.isEmpty() ? ValueFactory.emptyArray() : ValueFactory.newArray(list);
215
+ }
216
+
217
+ private Comparator<Value> comparator(int index) {
218
+ String name = schema.getColumnName(index);
219
+ return getSortColumns(index).stream()
220
+ .map(sortColumn -> comparator(name, sortColumn))
221
+ .reduce(Comparator::thenComparing)
222
+ .orElse(Comparator.comparing(value -> 0));
223
+ }
224
+
225
+ private Comparator<Value> comparator(String name, KintoneSortColumn sortColumn) {
226
+ Column column = lookupColumn(name, sortColumn);
227
+ return Comparator.comparing(
228
+ value -> ReduceType.asString(value, sortColumn),
229
+ Comparator.nullsLast(ReduceType.comparator(column, sortColumn.getOrder())));
230
+ }
231
+
232
+ private List<KintoneSortColumn> getSortColumns(int index) {
233
+ KintoneColumnOption option = task.getColumnOptions().get(schema.getColumnName(index));
234
+ return option != null ? option.getSortColumns() : Collections.emptyList();
235
+ }
236
+
237
+ private Column lookupColumn(String name, KintoneSortColumn sortColumn) {
238
+ return schema.lookupColumn(String.format("%s.%s", name, sortColumn.getName()));
239
+ }
240
+
241
+ private IntStream range() {
242
+ return IntStream.range(size, schema.size());
243
+ }
244
+
245
+ private static Schema schema(PluginTask task, Schema schema) {
246
+ Schema.Builder builder = Schema.builder();
247
+ schema.getColumns().forEach(column -> builder.add(column.getName(), column.getType()));
248
+ schema.getColumns().stream()
249
+ .map(Column::getName)
250
+ .filter(name -> name.matches("^.*\\..*$"))
251
+ .map(name -> name.replaceFirst("\\..*$", ""))
252
+ .distinct()
253
+ .forEach(name -> builder.add(name, type(task, name)));
254
+ return builder.build();
255
+ }
256
+
257
+ private static Type type(PluginTask task, String name) {
258
+ return KintoneColumnType.getType(task.getColumnOptions().get(name), KintoneColumnType.SUBTABLE)
259
+ == KintoneColumnType.SUBTABLE
260
+ ? Types.JSON
261
+ : Types.STRING;
262
+ }
263
+
264
+ private static File file(String suffix) {
265
+ try {
266
+ return File.createTempFile(String.format("%s.", KintoneOutputPlugin.class.getName()), suffix);
267
+ } catch (IOException e) {
268
+ throw new ReduceException(e);
269
+ }
270
+ }
271
+
272
+ private static void merge(List<TaskReport> taskReports, File merged) {
273
+ try (OutputStream out = Files.newOutputStream(merged.toPath())) {
274
+ long bytes =
275
+ taskReports.stream()
276
+ .map(taskReport -> new File(taskReport.get(String.class, "path")).toPath())
277
+ .mapToLong(source -> copy(source, out))
278
+ .sum();
279
+ LOGGER.info(String.format("Number of bytes merged: %d", bytes));
280
+ } catch (IOException e) {
281
+ throw new ReduceException(e);
282
+ }
283
+ }
284
+
285
+ private static long copy(Path source, OutputStream out) {
286
+ try {
287
+ long bytes = Files.copy(source, out);
288
+ LOGGER.info(String.format("Number of bytes copied: %d", bytes));
289
+ return bytes;
290
+ } catch (IOException e) {
291
+ throw new ReduceException(e);
292
+ }
293
+ }
294
+
295
+ private static void sort(File merged, File sorted, CsvSortOptions sortOptions) {
296
+ try {
297
+ int lines =
298
+ CsvExternalSort.mergeSortedFiles(
299
+ CsvExternalSort.sortInBatch(merged, null, sortOptions, new ArrayList<>()),
300
+ sorted,
301
+ sortOptions,
302
+ false,
303
+ Collections.emptyList());
304
+ LOGGER.info(String.format("Number of lines sorted: %d", lines));
305
+ } catch (IOException | ClassNotFoundException e) {
306
+ throw new ReduceException(e);
307
+ }
308
+ }
309
+
310
+ private static CsvSortOptions sortOptions(PluginTask task, Schema schema, Column column) {
311
+ List<KintoneSortColumn> sortColumns = new ArrayList<>();
312
+ sortColumns.add(new KintoneSortColumn(column.getName(), KintoneSortColumn.Order.ASC));
313
+ sortColumns.addAll(task.getSortColumns());
314
+ return new CsvSortOptions.Builder(
315
+ comparator(schema, sortColumns),
316
+ task.getMaxSortTmpFiles().orElse(CsvExternalSort.DEFAULTMAXTEMPFILES),
317
+ task.getMaxSortMemory().orElse(CsvExternalSort.estimateAvailableMemory()))
318
+ .charset(StandardCharsets.UTF_8)
319
+ .format(FORMAT)
320
+ .build();
321
+ }
322
+
323
+ private static Comparator<CSVRecord> comparator(
324
+ Schema schema, List<KintoneSortColumn> sortColumns) {
325
+ Function<KintoneSortColumn, Comparator<CSVRecord>> function =
326
+ sortColumn -> comparator(schema, sortColumn);
327
+ return sortColumns.stream()
328
+ .skip(1)
329
+ .map(function)
330
+ .reduce(function.apply(sortColumns.get(0)), Comparator::thenComparing);
331
+ }
332
+
333
+ private static Comparator<CSVRecord> comparator(Schema schema, KintoneSortColumn sortColumn) {
334
+ Column column = schema.lookupColumn(sortColumn.getName());
335
+ return Comparator.comparing(
336
+ record -> record.get(column.getIndex()),
337
+ Comparator.nullsLast(ReduceType.comparator(column, sortColumn.getOrder())));
338
+ }
339
+
340
+ private static CSVParser parser(File sorted) {
341
+ try {
342
+ return CSVParser.parse(sorted, StandardCharsets.UTF_8, FORMAT);
343
+ } catch (IOException e) {
344
+ throw new ReduceException(e);
345
+ }
346
+ }
347
+
348
+ private static PageBuilder builder(PluginTask task, Schema schema) {
349
+ return new PageBuilder(Exec.getBufferAllocator(), schema, new KintonePageOutput(task, schema));
350
+ }
351
+
352
+ private static List<Value> list(List<String> values, int index) {
353
+ return PARSER.parse(values.get(index)).asArrayValue().list();
354
+ }
355
+ }
@@ -1,5 +1,7 @@
1
1
  package org.embulk.output.kintone;
2
2
 
3
+ import java.util.Collections;
4
+ import java.util.List;
3
5
  import org.embulk.config.TaskSource;
4
6
 
5
7
  public class KintoneColumnOptionBuilder {
@@ -50,6 +52,11 @@ public class KintoneColumnOptionBuilder {
50
52
  return valueSeparator;
51
53
  }
52
54
 
55
+ @Override
56
+ public List<KintoneSortColumn> getSortColumns() {
57
+ return Collections.emptyList();
58
+ }
59
+
53
60
  @Override
54
61
  public void validate() {}
55
62