embulk-parser-csv_guessable 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/LICENSE.txt +21 -0
- data/README.md +53 -0
- data/build.gradle +97 -0
- data/config/checkstyle/checkstyle.xml +128 -0
- data/config/checkstyle/default.xml +108 -0
- data/gradle/wrapper/gradle-wrapper.jar +0 -0
- data/gradle/wrapper/gradle-wrapper.properties +6 -0
- data/gradlew +169 -0
- data/gradlew.bat +84 -0
- data/lib/embulk/guess/csv_guessable.rb +61 -0
- data/lib/embulk/parser/csv_guessable.rb +3 -0
- data/libs/embulk-standards-0.8.22.jar +0 -0
- data/src/main/java/org/embulk/parser/csv_guessable/CsvGuessableParserPlugin.java +371 -0
- data/src/main/java/org/embulk/parser/csv_guessable/CsvTokenizer.java +512 -0
- data/src/test/java/org/embulk/parser/csv_guessable/TestCsvGuessableParserPlugin.java +81 -0
- data/src/test/resources/data/test.csv +3 -0
- data/src/test/resources/data/test_alias.yml +3 -0
- data/src/test/resources/yml/guess_from_header.yml +9 -0
- data/src/test/resources/yml/original-csv.yml +12 -0
- data/src/test/resources/yml/replace_column_name.yml +13 -0
- metadata +100 -0
data/gradlew.bat
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
@if "%DEBUG%" == "" @echo off
|
2
|
+
@rem ##########################################################################
|
3
|
+
@rem
|
4
|
+
@rem Gradle startup script for Windows
|
5
|
+
@rem
|
6
|
+
@rem ##########################################################################
|
7
|
+
|
8
|
+
@rem Set local scope for the variables with windows NT shell
|
9
|
+
if "%OS%"=="Windows_NT" setlocal
|
10
|
+
|
11
|
+
set DIRNAME=%~dp0
|
12
|
+
if "%DIRNAME%" == "" set DIRNAME=.
|
13
|
+
set APP_BASE_NAME=%~n0
|
14
|
+
set APP_HOME=%DIRNAME%
|
15
|
+
|
16
|
+
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
17
|
+
set DEFAULT_JVM_OPTS=
|
18
|
+
|
19
|
+
@rem Find java.exe
|
20
|
+
if defined JAVA_HOME goto findJavaFromJavaHome
|
21
|
+
|
22
|
+
set JAVA_EXE=java.exe
|
23
|
+
%JAVA_EXE% -version >NUL 2>&1
|
24
|
+
if "%ERRORLEVEL%" == "0" goto init
|
25
|
+
|
26
|
+
echo.
|
27
|
+
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
28
|
+
echo.
|
29
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
30
|
+
echo location of your Java installation.
|
31
|
+
|
32
|
+
goto fail
|
33
|
+
|
34
|
+
:findJavaFromJavaHome
|
35
|
+
set JAVA_HOME=%JAVA_HOME:"=%
|
36
|
+
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
37
|
+
|
38
|
+
if exist "%JAVA_EXE%" goto init
|
39
|
+
|
40
|
+
echo.
|
41
|
+
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
|
42
|
+
echo.
|
43
|
+
echo Please set the JAVA_HOME variable in your environment to match the
|
44
|
+
echo location of your Java installation.
|
45
|
+
|
46
|
+
goto fail
|
47
|
+
|
48
|
+
:init
|
49
|
+
@rem Get command-line arguments, handling Windows variants
|
50
|
+
|
51
|
+
if not "%OS%" == "Windows_NT" goto win9xME_args
|
52
|
+
|
53
|
+
:win9xME_args
|
54
|
+
@rem Slurp the command line arguments.
|
55
|
+
set CMD_LINE_ARGS=
|
56
|
+
set _SKIP=2
|
57
|
+
|
58
|
+
:win9xME_args_slurp
|
59
|
+
if "x%~1" == "x" goto execute
|
60
|
+
|
61
|
+
set CMD_LINE_ARGS=%*
|
62
|
+
|
63
|
+
:execute
|
64
|
+
@rem Setup the command line
|
65
|
+
|
66
|
+
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
67
|
+
|
68
|
+
@rem Execute Gradle
|
69
|
+
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
|
70
|
+
|
71
|
+
:end
|
72
|
+
@rem End local scope for the variables with windows NT shell
|
73
|
+
if "%ERRORLEVEL%"=="0" goto mainEnd
|
74
|
+
|
75
|
+
:fail
|
76
|
+
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
77
|
+
rem the _cmd.exe /c_ return code!
|
78
|
+
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
|
79
|
+
exit /b 1
|
80
|
+
|
81
|
+
:mainEnd
|
82
|
+
if "%OS%"=="Windows_NT" endlocal
|
83
|
+
|
84
|
+
:omega
|
@@ -0,0 +1,61 @@
|
|
1
|
+
module Embulk
|
2
|
+
module Guess
|
3
|
+
|
4
|
+
# TODO implement guess plugin to make this command work:
|
5
|
+
# $ embulk guess -g "csv_guessable" partial-config.yml
|
6
|
+
#
|
7
|
+
# Depending on the file format the plugin uses, you can use choose
|
8
|
+
# one of binary guess (GuessPlugin), text guess (TextGuessPlugin),
|
9
|
+
# or line guess (LineGuessPlugin).
|
10
|
+
|
11
|
+
# class CsvGuessable < GuessPlugin
|
12
|
+
# Plugin.register_guess("csv_guessable", self)
|
13
|
+
#
|
14
|
+
# def guess(config, sample_buffer)
|
15
|
+
# if sample_buffer[0,2] == GZIP_HEADER
|
16
|
+
# guessed = {}
|
17
|
+
# guessed["type"] = "csv_guessable"
|
18
|
+
# guessed["property1"] = "guessed-value"
|
19
|
+
# return {"parser" => guessed}
|
20
|
+
# else
|
21
|
+
# return {}
|
22
|
+
# end
|
23
|
+
# end
|
24
|
+
# end
|
25
|
+
|
26
|
+
# class CsvGuessable < TextGuessPlugin
|
27
|
+
# Plugin.register_guess("csv_guessable", self)
|
28
|
+
#
|
29
|
+
# def guess_text(config, sample_text)
|
30
|
+
# js = JSON.parse(sample_text) rescue nil
|
31
|
+
# if js && js["mykeyword"] == "keyword"
|
32
|
+
# guessed = {}
|
33
|
+
# guessed["type"] = "csv_guessable"
|
34
|
+
# guessed["property1"] = "guessed-value"
|
35
|
+
# return {"parser" => guessed}
|
36
|
+
# else
|
37
|
+
# return {}
|
38
|
+
# end
|
39
|
+
# end
|
40
|
+
# end
|
41
|
+
|
42
|
+
# class CsvGuessable < LineGuessPlugin
|
43
|
+
# Plugin.register_guess("csv_guessable", self)
|
44
|
+
#
|
45
|
+
# def guess_lines(config, sample_lines)
|
46
|
+
# all_line_matched = sample_lines.all? do |line|
|
47
|
+
# line =~ /mypattern/
|
48
|
+
# end
|
49
|
+
# if all_line_matched
|
50
|
+
# guessed = {}
|
51
|
+
# guessed["type"] = "csv_guessable"
|
52
|
+
# guessed["property1"] = "guessed-value"
|
53
|
+
# return {"parser" => guessed}
|
54
|
+
# else
|
55
|
+
# return {}
|
56
|
+
# end
|
57
|
+
# end
|
58
|
+
# end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
Binary file
|
@@ -0,0 +1,371 @@
|
|
1
|
+
package org.embulk.parser.csv_guessable;
|
2
|
+
|
3
|
+
import com.google.common.base.Optional;
|
4
|
+
import com.google.common.collect.ImmutableSet;
|
5
|
+
import java.io.BufferedReader;
|
6
|
+
import com.opencsv.CSVReader; // TODO: use embulk's parser
|
7
|
+
import java.io.IOException;
|
8
|
+
import java.io.StringReader;
|
9
|
+
import java.nio.charset.StandardCharsets;
|
10
|
+
import java.nio.file.Files;
|
11
|
+
import java.nio.file.Path;
|
12
|
+
import java.util.ArrayList;
|
13
|
+
import org.slf4j.Logger;
|
14
|
+
|
15
|
+
import org.embulk.config.Config;
|
16
|
+
import org.embulk.config.ConfigDefault;
|
17
|
+
import org.embulk.config.ConfigDiff;
|
18
|
+
import org.embulk.config.ConfigException;
|
19
|
+
import org.embulk.config.ConfigSource;
|
20
|
+
import org.embulk.config.Task;
|
21
|
+
import org.embulk.config.TaskSource;
|
22
|
+
import org.embulk.spi.Column;
|
23
|
+
import org.embulk.spi.ColumnConfig;
|
24
|
+
import org.embulk.spi.ColumnVisitor;
|
25
|
+
import org.embulk.spi.DataException;
|
26
|
+
import org.embulk.spi.Exec;
|
27
|
+
import org.embulk.spi.FileInput;
|
28
|
+
import org.embulk.spi.json.JsonParser;
|
29
|
+
import org.embulk.spi.json.JsonParseException;
|
30
|
+
import org.embulk.spi.PageBuilder;
|
31
|
+
import org.embulk.spi.PageOutput;
|
32
|
+
import org.embulk.spi.ParserPlugin;
|
33
|
+
import org.embulk.spi.Schema;
|
34
|
+
import org.embulk.spi.SchemaConfig;
|
35
|
+
import org.embulk.spi.time.TimestampParser;
|
36
|
+
import org.embulk.spi.time.TimestampParseException;
|
37
|
+
import org.embulk.spi.type.Types;
|
38
|
+
import org.embulk.spi.unit.LocalFile;
|
39
|
+
import org.embulk.spi.util.LineDecoder;
|
40
|
+
import org.embulk.spi.util.Timestamps;
|
41
|
+
|
42
|
+
import org.embulk.standards.CsvParserPlugin;
|
43
|
+
|
44
|
+
public class CsvGuessableParserPlugin
|
45
|
+
extends CsvParserPlugin
|
46
|
+
{
|
47
|
+
private static final ImmutableSet<String> TRUE_STRINGS =
|
48
|
+
ImmutableSet.of(
|
49
|
+
"true", "True", "TRUE",
|
50
|
+
"yes", "Yes", "YES",
|
51
|
+
"t", "T", "y", "Y",
|
52
|
+
"on", "On", "ON",
|
53
|
+
"1");
|
54
|
+
|
55
|
+
private final Logger log;
|
56
|
+
|
57
|
+
public CsvGuessableParserPlugin()
|
58
|
+
{
|
59
|
+
log = Exec.getLogger(CsvGuessableParserPlugin.class);
|
60
|
+
}
|
61
|
+
|
62
|
+
public interface PluginTask
|
63
|
+
extends Task, LineDecoder.DecoderTask, TimestampParser.Task
|
64
|
+
{
|
65
|
+
@Config("columns")
|
66
|
+
@ConfigDefault("null")
|
67
|
+
Optional<SchemaConfig> getSchemaConfig();
|
68
|
+
|
69
|
+
@Config("header_line")
|
70
|
+
@ConfigDefault("null")
|
71
|
+
Optional<Boolean> getHeaderLine();
|
72
|
+
|
73
|
+
@Config("skip_header_lines")
|
74
|
+
@ConfigDefault("0")
|
75
|
+
int getSkipHeaderLines();
|
76
|
+
void setSkipHeaderLines(int n);
|
77
|
+
|
78
|
+
@Config("delimiter")
|
79
|
+
@ConfigDefault("\",\"")
|
80
|
+
String getDelimiter();
|
81
|
+
|
82
|
+
@Config("quote")
|
83
|
+
@ConfigDefault("\"\\\"\"")
|
84
|
+
Optional<QuoteCharacter> getQuoteChar();
|
85
|
+
|
86
|
+
@Config("escape")
|
87
|
+
@ConfigDefault("\"\\\\\"")
|
88
|
+
Optional<EscapeCharacter> getEscapeChar();
|
89
|
+
|
90
|
+
// Null value handling: if the CsvParser found 'non-quoted empty string's,
|
91
|
+
// it replaces them to string that users specified like "\N", "NULL".
|
92
|
+
@Config("null_string")
|
93
|
+
@ConfigDefault("null")
|
94
|
+
Optional<String> getNullString();
|
95
|
+
|
96
|
+
@Config("trim_if_not_quoted")
|
97
|
+
@ConfigDefault("false")
|
98
|
+
boolean getTrimIfNotQuoted();
|
99
|
+
|
100
|
+
@Config("max_quoted_size_limit")
|
101
|
+
@ConfigDefault("131072") //128kB
|
102
|
+
long getMaxQuotedSizeLimit();
|
103
|
+
|
104
|
+
@Config("comment_line_marker")
|
105
|
+
@ConfigDefault("null")
|
106
|
+
Optional<String> getCommentLineMarker();
|
107
|
+
|
108
|
+
@Config("allow_optional_columns")
|
109
|
+
@ConfigDefault("false")
|
110
|
+
boolean getAllowOptionalColumns();
|
111
|
+
|
112
|
+
@Config("allow_extra_columns")
|
113
|
+
@ConfigDefault("false")
|
114
|
+
boolean getAllowExtraColumns();
|
115
|
+
|
116
|
+
@Config("stop_on_invalid_record")
|
117
|
+
@ConfigDefault("false")
|
118
|
+
boolean getStopOnInvalidRecord();
|
119
|
+
|
120
|
+
@Config("schema_file")
|
121
|
+
@ConfigDefault("null")
|
122
|
+
public Optional<LocalFile> getSchemaFile();
|
123
|
+
|
124
|
+
@Config("schema_line")
|
125
|
+
@ConfigDefault("1")
|
126
|
+
public int getSchemaLine();
|
127
|
+
}
|
128
|
+
|
129
|
+
@Override
|
130
|
+
public void transaction(ConfigSource config, ParserPlugin.Control control)
|
131
|
+
{
|
132
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
133
|
+
SchemaConfig schemaConfig = null;
|
134
|
+
|
135
|
+
if (task.getSchemaFile().isPresent()) { /* embulk-parser-csv_guessable */
|
136
|
+
if (task.getHeaderLine().isPresent()) {
|
137
|
+
// TODO: use 'columns' as hints for guess
|
138
|
+
throw new ConfigException("embulk-parsre-csv_gussable will use 'columnes' as hints for guess as hints for guess. Please delete 'columnes' now.");
|
139
|
+
} else { /* guess from header */
|
140
|
+
int schemaLine = task.getSchemaLine();
|
141
|
+
task.setSkipHeaderLines(schemaLine); // TODO: use 'skip_header_line'
|
142
|
+
|
143
|
+
String header = readHeader(task.getSchemaFile().get().getPath(), schemaLine);
|
144
|
+
log.debug(header);
|
145
|
+
ArrayList<ColumnConfig> columns = newColumns(header, config);
|
146
|
+
log.debug(columns.toString());
|
147
|
+
schemaConfig = new SchemaConfig(columns);
|
148
|
+
}
|
149
|
+
} else { /* embulk-parser-csv embulk */
|
150
|
+
// backward compatibility
|
151
|
+
if (task.getHeaderLine().isPresent()) {
|
152
|
+
if (task.getSkipHeaderLines() > 0) {
|
153
|
+
throw new ConfigException("'header_line' option is invalid if 'skip_header_lines' is set.");
|
154
|
+
}
|
155
|
+
if (task.getHeaderLine().get()) {
|
156
|
+
task.setSkipHeaderLines(1);
|
157
|
+
} else {
|
158
|
+
task.setSkipHeaderLines(0);
|
159
|
+
}
|
160
|
+
}
|
161
|
+
schemaConfig = task.getSchemaConfig().get();
|
162
|
+
}
|
163
|
+
|
164
|
+
control.run(task.dump(), schemaConfig.toSchema());
|
165
|
+
}
|
166
|
+
|
167
|
+
@Override
|
168
|
+
public void run(TaskSource taskSource, final Schema schema,
|
169
|
+
FileInput input, PageOutput output)
|
170
|
+
{
|
171
|
+
PluginTask task = taskSource.loadTask(PluginTask.class);
|
172
|
+
TimestampParser[] timestampParsers = null;
|
173
|
+
if (task.getSchemaConfig().isPresent()) {
|
174
|
+
timestampParsers = Timestamps.newTimestampColumnParsers(task, task.getSchemaConfig().get());
|
175
|
+
}
|
176
|
+
final JsonParser jsonParser = new JsonParser();
|
177
|
+
final CsvTokenizer tokenizer = new CsvTokenizer(new LineDecoder(input, task), task);
|
178
|
+
final boolean allowOptionalColumns = task.getAllowOptionalColumns();
|
179
|
+
final boolean allowExtraColumns = task.getAllowExtraColumns();
|
180
|
+
final boolean stopOnInvalidRecord = task.getStopOnInvalidRecord();
|
181
|
+
int skipHeaderLines = task.getSkipHeaderLines();
|
182
|
+
|
183
|
+
try (final PageBuilder pageBuilder = new PageBuilder(Exec.getBufferAllocator(), schema, output)) {
|
184
|
+
while (tokenizer.nextFile()) {
|
185
|
+
// skip the header lines for each file
|
186
|
+
for (; skipHeaderLines > 0; skipHeaderLines--) {
|
187
|
+
if (!tokenizer.skipHeaderLine()) {
|
188
|
+
break;
|
189
|
+
}
|
190
|
+
}
|
191
|
+
|
192
|
+
if (!tokenizer.nextRecord()) {
|
193
|
+
// empty file
|
194
|
+
continue;
|
195
|
+
}
|
196
|
+
|
197
|
+
while (true) {
|
198
|
+
boolean hasNextRecord;
|
199
|
+
|
200
|
+
try {
|
201
|
+
schema.visitColumns(new ColumnVisitor() {
|
202
|
+
public void booleanColumn(Column column)
|
203
|
+
{
|
204
|
+
String v = nextColumn();
|
205
|
+
if (v == null) {
|
206
|
+
pageBuilder.setNull(column);
|
207
|
+
} else {
|
208
|
+
pageBuilder.setBoolean(column, TRUE_STRINGS.contains(v));
|
209
|
+
}
|
210
|
+
}
|
211
|
+
|
212
|
+
public void longColumn(Column column)
|
213
|
+
{
|
214
|
+
String v = nextColumn();
|
215
|
+
if (v == null) {
|
216
|
+
pageBuilder.setNull(column);
|
217
|
+
} else {
|
218
|
+
try {
|
219
|
+
pageBuilder.setLong(column, Long.parseLong(v));
|
220
|
+
} catch (NumberFormatException e) {
|
221
|
+
// TODO support default value
|
222
|
+
throw new CsvRecordValidateException(e);
|
223
|
+
}
|
224
|
+
}
|
225
|
+
}
|
226
|
+
|
227
|
+
public void doubleColumn(Column column)
|
228
|
+
{
|
229
|
+
String v = nextColumn();
|
230
|
+
if (v == null) {
|
231
|
+
pageBuilder.setNull(column);
|
232
|
+
} else {
|
233
|
+
try {
|
234
|
+
pageBuilder.setDouble(column, Double.parseDouble(v));
|
235
|
+
} catch (NumberFormatException e) {
|
236
|
+
// TODO support default value
|
237
|
+
throw new CsvRecordValidateException(e);
|
238
|
+
}
|
239
|
+
}
|
240
|
+
}
|
241
|
+
|
242
|
+
public void stringColumn(Column column)
|
243
|
+
{
|
244
|
+
String v = nextColumn();
|
245
|
+
if (v == null) {
|
246
|
+
pageBuilder.setNull(column);
|
247
|
+
} else {
|
248
|
+
pageBuilder.setString(column, v);
|
249
|
+
}
|
250
|
+
}
|
251
|
+
|
252
|
+
public void timestampColumn(Column column)
|
253
|
+
{
|
254
|
+
String v = nextColumn();
|
255
|
+
if (v == null) {
|
256
|
+
pageBuilder.setNull(column);
|
257
|
+
} else {
|
258
|
+
try {
|
259
|
+
// pageBuilder.setTimestamp(column, timestampParsers[column.getIndex()].parse(v));
|
260
|
+
} catch (TimestampParseException e) {
|
261
|
+
// TODO support default value
|
262
|
+
throw new CsvRecordValidateException(e);
|
263
|
+
}
|
264
|
+
}
|
265
|
+
}
|
266
|
+
|
267
|
+
public void jsonColumn(Column column)
|
268
|
+
{
|
269
|
+
String v = nextColumn();
|
270
|
+
if (v == null) {
|
271
|
+
pageBuilder.setNull(column);
|
272
|
+
} else {
|
273
|
+
try {
|
274
|
+
pageBuilder.setJson(column, jsonParser.parse(v));
|
275
|
+
} catch (JsonParseException e) {
|
276
|
+
// TODO support default value
|
277
|
+
throw new CsvRecordValidateException(e);
|
278
|
+
}
|
279
|
+
}
|
280
|
+
}
|
281
|
+
|
282
|
+
private String nextColumn()
|
283
|
+
{
|
284
|
+
if (allowOptionalColumns && !tokenizer.hasNextColumn()) {
|
285
|
+
//TODO warning
|
286
|
+
return null;
|
287
|
+
}
|
288
|
+
return tokenizer.nextColumnOrNull();
|
289
|
+
}
|
290
|
+
});
|
291
|
+
|
292
|
+
try {
|
293
|
+
hasNextRecord = tokenizer.nextRecord();
|
294
|
+
} catch (CsvTokenizer.TooManyColumnsException ex) {
|
295
|
+
if (allowExtraColumns) {
|
296
|
+
String tooManyColumnsLine = tokenizer.skipCurrentLine();
|
297
|
+
// TODO warning
|
298
|
+
hasNextRecord = tokenizer.nextRecord();
|
299
|
+
} else {
|
300
|
+
// this line will be skipped at the following catch section
|
301
|
+
throw ex;
|
302
|
+
}
|
303
|
+
}
|
304
|
+
pageBuilder.addRecord();
|
305
|
+
|
306
|
+
} catch (CsvTokenizer.InvalidFormatException | CsvTokenizer.InvalidValueException | CsvRecordValidateException e) {
|
307
|
+
String skippedLine = tokenizer.skipCurrentLine();
|
308
|
+
long lineNumber = tokenizer.getCurrentLineNumber();
|
309
|
+
if (stopOnInvalidRecord) {
|
310
|
+
throw new DataException(String.format("Invalid record at line %d: %s", lineNumber, skippedLine), e);
|
311
|
+
}
|
312
|
+
log.warn(String.format("Skipped line %d (%s): %s", lineNumber, e.getMessage(), skippedLine));
|
313
|
+
//exec.notice().skippedLine(skippedLine);
|
314
|
+
|
315
|
+
hasNextRecord = tokenizer.nextRecord();
|
316
|
+
}
|
317
|
+
|
318
|
+
if (!hasNextRecord) {
|
319
|
+
break;
|
320
|
+
}
|
321
|
+
}
|
322
|
+
}
|
323
|
+
|
324
|
+
pageBuilder.finish();
|
325
|
+
}
|
326
|
+
}
|
327
|
+
|
328
|
+
static class CsvRecordValidateException
|
329
|
+
extends DataException
|
330
|
+
{
|
331
|
+
CsvRecordValidateException(Throwable cause)
|
332
|
+
{
|
333
|
+
super(cause);
|
334
|
+
}
|
335
|
+
}
|
336
|
+
|
337
|
+
private String readHeader(Path path, int schemaLine) {
|
338
|
+
if (schemaLine <= 0) {
|
339
|
+
throw new ConfigException("'schemaLine' must be set '> 0'");
|
340
|
+
}
|
341
|
+
|
342
|
+
String line = null;
|
343
|
+
try (BufferedReader br = Files.newBufferedReader(path, StandardCharsets.UTF_8)) {
|
344
|
+
for (int i=1; i <= schemaLine; ++i) {
|
345
|
+
line = br.readLine();
|
346
|
+
if (line == null) {
|
347
|
+
throw new ConfigException("not found 'schema_line' in 'schema_file'");
|
348
|
+
}
|
349
|
+
}
|
350
|
+
} catch (IOException e) {
|
351
|
+
throw new ConfigException(e);
|
352
|
+
}
|
353
|
+
return line;
|
354
|
+
}
|
355
|
+
|
356
|
+
private ArrayList<ColumnConfig> newColumns(String header, ConfigSource config) {
|
357
|
+
ArrayList columns = new ArrayList<ArrayList>();
|
358
|
+
PluginTask task = config.loadConfig(PluginTask.class);
|
359
|
+
|
360
|
+
try (CSVReader reader = new CSVReader(new StringReader(header))) {
|
361
|
+
String[] csv = reader.readNext();
|
362
|
+
for (String column: csv) {
|
363
|
+
columns.add(new ColumnConfig(column, Types.STRING, config));
|
364
|
+
}
|
365
|
+
} catch (IOException e) {
|
366
|
+
throw new ConfigException(e);
|
367
|
+
}
|
368
|
+
|
369
|
+
return columns;
|
370
|
+
}
|
371
|
+
}
|