embulk-input-marketo_extended 0.6.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +7 -0
  2. data/.github/PULL_REQUEST_TEMPLATE.md +37 -0
  3. data/.gitignore +14 -0
  4. data/.travis.yml +6 -0
  5. data/CHANGELOG.md +170 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +213 -0
  8. data/build.gradle +103 -0
  9. data/config/checkstyle/checkstyle.xml +128 -0
  10. data/config/checkstyle/default.xml +108 -0
  11. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  12. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  13. data/gradlew +169 -0
  14. data/gradlew.bat +84 -0
  15. data/lib/embulk/input/marketo.rb +3 -0
  16. data/settings.gradle +1 -0
  17. data/src/main/java/org/embulk/input/marketo/CsvTokenizer.java +700 -0
  18. data/src/main/java/org/embulk/input/marketo/MarketoInputPlugin.java +15 -0
  19. data/src/main/java/org/embulk/input/marketo/MarketoInputPluginDelegate.java +100 -0
  20. data/src/main/java/org/embulk/input/marketo/MarketoService.java +38 -0
  21. data/src/main/java/org/embulk/input/marketo/MarketoServiceImpl.java +245 -0
  22. data/src/main/java/org/embulk/input/marketo/MarketoUtils.java +212 -0
  23. data/src/main/java/org/embulk/input/marketo/delegate/ActivityBulkExtractInputPlugin.java +167 -0
  24. data/src/main/java/org/embulk/input/marketo/delegate/CampaignInputPlugin.java +48 -0
  25. data/src/main/java/org/embulk/input/marketo/delegate/CustomObjectInputPlugin.java +75 -0
  26. data/src/main/java/org/embulk/input/marketo/delegate/CustomObjectResponseMapperBuilder.java +81 -0
  27. data/src/main/java/org/embulk/input/marketo/delegate/LeadBulkExtractInputPlugin.java +66 -0
  28. data/src/main/java/org/embulk/input/marketo/delegate/LeadServiceResponseMapperBuilder.java +85 -0
  29. data/src/main/java/org/embulk/input/marketo/delegate/LeadWithListInputPlugin.java +64 -0
  30. data/src/main/java/org/embulk/input/marketo/delegate/LeadWithProgramInputPlugin.java +60 -0
  31. data/src/main/java/org/embulk/input/marketo/delegate/MarketoBaseBulkExtractInputPlugin.java +441 -0
  32. data/src/main/java/org/embulk/input/marketo/delegate/MarketoBaseInputPluginDelegate.java +92 -0
  33. data/src/main/java/org/embulk/input/marketo/delegate/ProgramInputPlugin.java +228 -0
  34. data/src/main/java/org/embulk/input/marketo/exception/MarketoAPIException.java +30 -0
  35. data/src/main/java/org/embulk/input/marketo/model/BulkExtractRangeHeader.java +26 -0
  36. data/src/main/java/org/embulk/input/marketo/model/MarketoAccessTokenResponse.java +92 -0
  37. data/src/main/java/org/embulk/input/marketo/model/MarketoBulkExtractRequest.java +68 -0
  38. data/src/main/java/org/embulk/input/marketo/model/MarketoError.java +40 -0
  39. data/src/main/java/org/embulk/input/marketo/model/MarketoField.java +126 -0
  40. data/src/main/java/org/embulk/input/marketo/model/MarketoResponse.java +82 -0
  41. data/src/main/java/org/embulk/input/marketo/model/filter/DateRangeFilter.java +40 -0
  42. data/src/main/java/org/embulk/input/marketo/rest/MarketoBaseRestClient.java +306 -0
  43. data/src/main/java/org/embulk/input/marketo/rest/MarketoInputStreamResponseEntityReader.java +69 -0
  44. data/src/main/java/org/embulk/input/marketo/rest/MarketoRESTEndpoint.java +47 -0
  45. data/src/main/java/org/embulk/input/marketo/rest/MarketoResponseJetty92EntityReader.java +89 -0
  46. data/src/main/java/org/embulk/input/marketo/rest/MarketoRestClient.java +569 -0
  47. data/src/main/java/org/embulk/input/marketo/rest/RecordPagingIterable.java +180 -0
  48. data/src/test/java/org/embulk/input/marketo/MarketoServiceImplTest.java +140 -0
  49. data/src/test/java/org/embulk/input/marketo/MarketoUtilsTest.java +87 -0
  50. data/src/test/java/org/embulk/input/marketo/delegate/ActivityBulkExtractInputPluginTest.java +128 -0
  51. data/src/test/java/org/embulk/input/marketo/delegate/CampaignInputPluginTest.java +73 -0
  52. data/src/test/java/org/embulk/input/marketo/delegate/CustomObjectInputPluginTest.java +102 -0
  53. data/src/test/java/org/embulk/input/marketo/delegate/LeadBulkExtractInputPluginTest.java +99 -0
  54. data/src/test/java/org/embulk/input/marketo/delegate/LeadServiceResponseMapperBuilderTest.java +119 -0
  55. data/src/test/java/org/embulk/input/marketo/delegate/LeadWithListInputPluginTest.java +101 -0
  56. data/src/test/java/org/embulk/input/marketo/delegate/LeadWithProgramInputPluginTest.java +103 -0
  57. data/src/test/java/org/embulk/input/marketo/delegate/MarketoBaseBulkExtractInputPluginTest.java +169 -0
  58. data/src/test/java/org/embulk/input/marketo/delegate/ProgramInputPluginTest.java +343 -0
  59. data/src/test/java/org/embulk/input/marketo/rest/MarketoBaseRestClientTest.java +368 -0
  60. data/src/test/java/org/embulk/input/marketo/rest/MarketoRestClientTest.java +584 -0
  61. data/src/test/resources/config/activity_bulk_extract_config.yaml +7 -0
  62. data/src/test/resources/config/custom_object_config.yaml +8 -0
  63. data/src/test/resources/config/lead_bulk_extract_config.yaml +8 -0
  64. data/src/test/resources/config/rest_config.yaml +3 -0
  65. data/src/test/resources/fixtures/activity_extract1.csv +35 -0
  66. data/src/test/resources/fixtures/activity_extract2.csv +22 -0
  67. data/src/test/resources/fixtures/activity_types.json +22 -0
  68. data/src/test/resources/fixtures/all_program_full.json +53 -0
  69. data/src/test/resources/fixtures/campaign_response.json +38 -0
  70. data/src/test/resources/fixtures/campaign_response_full.json +102 -0
  71. data/src/test/resources/fixtures/custom_object_describe.json +124 -0
  72. data/src/test/resources/fixtures/custom_object_describe_marketo_fields_full.json +22 -0
  73. data/src/test/resources/fixtures/custom_object_expected.json +66 -0
  74. data/src/test/resources/fixtures/custom_object_response.json +24 -0
  75. data/src/test/resources/fixtures/custom_object_response_full.json +23 -0
  76. data/src/test/resources/fixtures/lead_by_list.json +33 -0
  77. data/src/test/resources/fixtures/lead_by_program_response.json +47 -0
  78. data/src/test/resources/fixtures/lead_describe.json +221 -0
  79. data/src/test/resources/fixtures/lead_describe_expected.json +66 -0
  80. data/src/test/resources/fixtures/lead_describe_marketo_fields_full.json +518 -0
  81. data/src/test/resources/fixtures/lead_extract1.csv +11 -0
  82. data/src/test/resources/fixtures/lead_response_full.json +2402 -0
  83. data/src/test/resources/fixtures/lead_with_program_full.json +17 -0
  84. data/src/test/resources/fixtures/leads_extract2.csv +10 -0
  85. data/src/test/resources/fixtures/list_reponse_full.json +191 -0
  86. data/src/test/resources/fixtures/lists_response.json +31 -0
  87. data/src/test/resources/fixtures/program_response.json +71 -0
  88. metadata +171 -0
@@ -0,0 +1,84 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ set DIRNAME=%~dp0
12
+ if "%DIRNAME%" == "" set DIRNAME=.
13
+ set APP_BASE_NAME=%~n0
14
+ set APP_HOME=%DIRNAME%
15
+
16
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17
+ set DEFAULT_JVM_OPTS=
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windows variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+
53
+ :win9xME_args
54
+ @rem Slurp the command line arguments.
55
+ set CMD_LINE_ARGS=
56
+ set _SKIP=2
57
+
58
+ :win9xME_args_slurp
59
+ if "x%~1" == "x" goto execute
60
+
61
+ set CMD_LINE_ARGS=%*
62
+
63
+ :execute
64
+ @rem Setup the command line
65
+
66
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67
+
68
+ @rem Execute Gradle
69
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70
+
71
+ :end
72
+ @rem End local scope for the variables with windows NT shell
73
+ if "%ERRORLEVEL%"=="0" goto mainEnd
74
+
75
+ :fail
76
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77
+ rem the _cmd.exe /c_ return code!
78
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79
+ exit /b 1
80
+
81
+ :mainEnd
82
+ if "%OS%"=="Windows_NT" endlocal
83
+
84
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_input(
2
+ "marketo", "org.embulk.input.marketo.MarketoInputPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1 @@
1
+ rootProject.name = "embulk-input-marketo_extended"
@@ -0,0 +1,700 @@
1
+ package org.embulk.input.marketo;
2
+ import com.fasterxml.jackson.annotation.JsonCreator;
3
+ import com.fasterxml.jackson.annotation.JsonIgnore;
4
+ import com.fasterxml.jackson.annotation.JsonValue;
5
+ import com.google.common.base.Optional;
6
+ import com.google.common.base.Preconditions;
7
+ import org.embulk.config.Config;
8
+ import org.embulk.config.ConfigDefault;
9
+ import org.embulk.config.ConfigException;
10
+ import org.embulk.spi.DataException;
11
+ import org.embulk.spi.Exec;
12
+ import org.embulk.spi.util.LineDecoder;
13
+
14
+ import java.util.ArrayDeque;
15
+ import java.util.ArrayList;
16
+ import java.util.Deque;
17
+ import java.util.List;
18
+
19
+ /**
20
+ * Created by tai.khuu on 9/15/17.
21
+ */
22
+ public class CsvTokenizer
23
+ {
24
+ static enum RecordState
25
+ {
26
+ NOT_END, END,
27
+ }
28
+
29
+ static enum ColumnState
30
+ {
31
+ BEGIN, VALUE, QUOTED_VALUE, AFTER_QUOTED_VALUE, FIRST_TRIM, LAST_TRIM_OR_VALUE,
32
+ }
33
+
34
+ private static final char END_OF_LINE = '\0';
35
+ static final char NO_QUOTE = '\0';
36
+ static final char NO_ESCAPE = '\0';
37
+
38
+ public interface PluginTask extends LineDecoder.DecoderTask
39
+ {
40
+ @Config("delimiter")
41
+ @ConfigDefault("\",\"")
42
+ String getDelimiter();
43
+
44
+ @Config("quote")
45
+ @ConfigDefault("\"\\\"\"")
46
+ Optional<QuoteCharacter> getQuoteChar();
47
+
48
+ @Config("escape")
49
+ @ConfigDefault("\"\\\\\"")
50
+ Optional<EscapeCharacter> getEscapeChar();
51
+
52
+ // Null value handling: if the CsvParser found 'non-quoted empty string's,
53
+ // it replaces them to string that users specified like "\N", "NULL".
54
+ @Config("null_string")
55
+ @ConfigDefault("\"null\"")
56
+ Optional<String> getNullString();
57
+
58
+ @Config("trim_if_not_quoted")
59
+ @ConfigDefault("false")
60
+ boolean getTrimIfNotQuoted();
61
+
62
+ @Config("max_quoted_size_limit")
63
+ @ConfigDefault("131072") //128kB
64
+ long getMaxQuotedSizeLimit();
65
+
66
+ @Config("comment_line_marker")
67
+ @ConfigDefault("null")
68
+ Optional<String> getCommentLineMarker();
69
+ }
70
+
71
+ private final char delimiterChar;
72
+ private final String delimiterFollowingString;
73
+ private final char quote;
74
+ private final char escape;
75
+ private final String newline;
76
+ private final boolean trimIfNotQuoted;
77
+ private final long maxQuotedSizeLimit;
78
+ private final String commentLineMarker;
79
+ private final LineDecoder input;
80
+ private final String nullStringOrNull;
81
+
82
+ private RecordState recordState = RecordState.END; // initial state is end of a record. nextRecord() must be called first
83
+ private long lineNumber = 0;
84
+
85
+ private String line = null;
86
+ private int linePos = 0;
87
+ private boolean wasQuotedColumn = false;
88
+ private List<String> quotedValueLines = new ArrayList<>();
89
+ private Deque<String> unreadLines = new ArrayDeque<>();
90
+
91
+ public CsvTokenizer(LineDecoder input, PluginTask task)
92
+ {
93
+ this(task.getDelimiter(), task.getQuoteChar().or(QuoteCharacter.noQuote()).getCharacter(),
94
+ task.getEscapeChar().or(EscapeCharacter.noEscape()).getCharacter(), task.getNewline().getString(),
95
+ task.getTrimIfNotQuoted(), task.getMaxQuotedSizeLimit(), task.getCommentLineMarker().orNull(), input, task.getNullString().orNull());
96
+ }
97
+
98
+ public CsvTokenizer(String delimiter, char quote, char escape, String newline, boolean trimIfNotQuoted, long maxQuotedSizeLimit, String commentLineMarker, LineDecoder input, String nullStringOrNull)
99
+ {
100
+ if (delimiter.length() == 0) {
101
+ throw new ConfigException("Empty delimiter is not allowed");
102
+ }
103
+ else {
104
+ this.delimiterChar = delimiter.charAt(0);
105
+ if (delimiter.length() > 1) {
106
+ delimiterFollowingString = delimiter.substring(1);
107
+ }
108
+ else {
109
+ delimiterFollowingString = null;
110
+ }
111
+ }
112
+ this.quote = quote;
113
+ this.escape = escape;
114
+ this.newline = newline;
115
+ this.trimIfNotQuoted = trimIfNotQuoted;
116
+ this.maxQuotedSizeLimit = maxQuotedSizeLimit;
117
+ this.commentLineMarker = commentLineMarker;
118
+ this.input = input;
119
+ this.nullStringOrNull = nullStringOrNull;
120
+ }
121
+
122
+ public long getCurrentLineNumber()
123
+ {
124
+ return lineNumber;
125
+ }
126
+
127
+ public boolean skipHeaderLine()
128
+ {
129
+ boolean skipped = input.poll() != null;
130
+ if (skipped) {
131
+ lineNumber++;
132
+ }
133
+ return skipped;
134
+ }
135
+
136
+ // returns skipped line
137
+ public String skipCurrentLine()
138
+ {
139
+ String skippedLine;
140
+ if (quotedValueLines.isEmpty()) {
141
+ skippedLine = line;
142
+ }
143
+ else {
144
+ // recover lines of quoted value
145
+ skippedLine = quotedValueLines.remove(0); // TODO optimize performance
146
+ unreadLines.addAll(quotedValueLines);
147
+ lineNumber -= quotedValueLines.size();
148
+ if (line != null) {
149
+ unreadLines.add(line);
150
+ lineNumber -= 1;
151
+ }
152
+ quotedValueLines.clear();
153
+ }
154
+ recordState = RecordState.END;
155
+ return skippedLine;
156
+ }
157
+
158
+ public boolean nextFile()
159
+ {
160
+ boolean next = input.nextFile();
161
+ if (next) {
162
+ lineNumber = 0;
163
+ }
164
+ return next;
165
+ }
166
+
167
+ // used by guess-csv
168
+ public boolean nextRecord()
169
+ {
170
+ return nextRecord(true);
171
+ }
172
+
173
+ public boolean nextRecord(boolean skipEmptyLine)
174
+ {
175
+ // If at the end of record, read the next line and initialize the state
176
+ if (recordState != RecordState.END) {
177
+ throw new TooManyColumnsException("Too many columns");
178
+ }
179
+
180
+ boolean hasNext = nextLine(skipEmptyLine);
181
+ if (hasNext) {
182
+ recordState = RecordState.NOT_END;
183
+ return true;
184
+ }
185
+ else {
186
+ return false;
187
+ }
188
+ }
189
+
190
+ private boolean nextLine(boolean skipEmptyLine)
191
+ {
192
+ while (true) {
193
+ if (!unreadLines.isEmpty()) {
194
+ line = unreadLines.removeFirst();
195
+ }
196
+ else {
197
+ line = input.poll();
198
+ if (line == null) {
199
+ return false;
200
+ }
201
+ }
202
+ linePos = 0;
203
+ lineNumber++;
204
+
205
+ boolean skip = skipEmptyLine && (
206
+ line.isEmpty() ||
207
+ (commentLineMarker != null && line.startsWith(commentLineMarker)));
208
+ if (!skip) {
209
+ return true;
210
+ }
211
+ }
212
+ }
213
+
214
+ public boolean hasNextColumn()
215
+ {
216
+ return recordState == RecordState.NOT_END;
217
+ }
218
+
219
+ public String nextColumn()
220
+ {
221
+ if (!hasNextColumn()) {
222
+ throw new TooFewColumnsException("Too few columns");
223
+ }
224
+
225
+ // reset last state
226
+ wasQuotedColumn = false;
227
+ quotedValueLines.clear();
228
+
229
+ // local state
230
+ int valueStartPos = linePos;
231
+ int valueEndPos = 0; // initialized by VALUE state and used by LAST_TRIM_OR_VALUE and
232
+ StringBuilder quotedValue = null; // initial by VALUE or FIRST_TRIM state and used by QUOTED_VALUE state
233
+ ColumnState columnState = ColumnState.BEGIN;
234
+
235
+ while (true) {
236
+ final char c = nextChar();
237
+
238
+ switch (columnState) {
239
+ case BEGIN:
240
+ // TODO optimization: state is BEGIN only at the first character of a column.
241
+ // this block can be out of the looop.
242
+ if (isDelimiter(c)) {
243
+ // empty value
244
+ if (delimiterFollowingString == null) {
245
+ return "";
246
+ }
247
+ else if (isDelimiterFollowingFrom(linePos)) {
248
+ linePos += delimiterFollowingString.length();
249
+ return "";
250
+ }
251
+ // not a delimiter
252
+ }
253
+ if (isEndOfLine(c)) {
254
+ // empty value
255
+ recordState = RecordState.END;
256
+ return "";
257
+ }
258
+ else if (isSpace(c) && trimIfNotQuoted) {
259
+ columnState = ColumnState.FIRST_TRIM;
260
+ }
261
+ else if (isQuote(c)) {
262
+ valueStartPos = linePos; // == 1
263
+ wasQuotedColumn = true;
264
+ quotedValue = new StringBuilder();
265
+ columnState = ColumnState.QUOTED_VALUE;
266
+ }
267
+ else {
268
+ columnState = ColumnState.VALUE;
269
+ }
270
+ break;
271
+
272
+ case FIRST_TRIM:
273
+ if (isDelimiter(c)) {
274
+ // empty value
275
+ if (delimiterFollowingString == null) {
276
+ return "";
277
+ }
278
+ else if (isDelimiterFollowingFrom(linePos)) {
279
+ linePos += delimiterFollowingString.length();
280
+ return "";
281
+ }
282
+ // not a delimiter
283
+ }
284
+ if (isEndOfLine(c)) {
285
+ // empty value
286
+ recordState = RecordState.END;
287
+ return "";
288
+ }
289
+ else if (isQuote(c)) {
290
+ // column has heading spaces and quoted. TODO should this be rejected?
291
+ valueStartPos = linePos;
292
+ wasQuotedColumn = true;
293
+ quotedValue = new StringBuilder();
294
+ columnState = ColumnState.QUOTED_VALUE;
295
+ }
296
+ else if (isSpace(c)) {
297
+ // skip this character
298
+ } else {
299
+ valueStartPos = linePos - 1;
300
+ columnState = ColumnState.VALUE;
301
+ }
302
+ break;
303
+
304
+ case VALUE:
305
+ if (isDelimiter(c)) {
306
+ if (delimiterFollowingString == null) {
307
+ return line.substring(valueStartPos, linePos - 1);
308
+ }
309
+ else if (isDelimiterFollowingFrom(linePos)) {
310
+ String value = line.substring(valueStartPos, linePos - 1);
311
+ linePos += delimiterFollowingString.length();
312
+ return value;
313
+ }
314
+ // not a delimiter
315
+ }
316
+ if (isEndOfLine(c)) {
317
+ recordState = RecordState.END;
318
+ return line.substring(valueStartPos, linePos);
319
+ }
320
+ else if (isSpace(c) && trimIfNotQuoted) {
321
+ valueEndPos = linePos - 1; // this is possibly end of value
322
+ columnState = ColumnState.LAST_TRIM_OR_VALUE;
323
+
324
+ // TODO not implemented yet foo""bar""baz -> [foo, bar, baz].append
325
+ //} else if (isQuote(c)) {
326
+ // // In RFC4180, If fields are not enclosed with double quotes, then
327
+ // // double quotes may not appear inside the fields. But they are often
328
+ // // included in the fields. We should care about them later.
329
+ }
330
+ else {
331
+ // keep VALUE state
332
+ }
333
+ break;
334
+
335
+ case LAST_TRIM_OR_VALUE:
336
+ if (isDelimiter(c)) {
337
+ if (delimiterFollowingString == null) {
338
+ return line.substring(valueStartPos, valueEndPos);
339
+ }
340
+ else if (isDelimiterFollowingFrom(linePos)) {
341
+ linePos += delimiterFollowingString.length();
342
+ return line.substring(valueStartPos, valueEndPos);
343
+ }
344
+ else {
345
+ // not a delimiter
346
+ }
347
+ }
348
+ if (isEndOfLine(c)) {
349
+ recordState = RecordState.END;
350
+ return line.substring(valueStartPos, valueEndPos);
351
+ }
352
+ else if (isSpace(c)) {
353
+ // keep LAST_TRIM_OR_VALUE state
354
+ } else {
355
+ // this spaces are not trailing spaces. go back to VALUE state
356
+ columnState = ColumnState.VALUE;
357
+ }
358
+ break;
359
+
360
+ case QUOTED_VALUE:
361
+ if (isEndOfLine(c)) {
362
+ // multi-line quoted value
363
+ quotedValue.append(line.substring(valueStartPos, linePos));
364
+ quotedValue.append(newline);
365
+ quotedValueLines.add(line);
366
+ if (!nextLine(false)) {
367
+ throw new InvalidValueException("Unexpected end of line during parsing a quoted value");
368
+ }
369
+ valueStartPos = 0;
370
+ }
371
+ else if (isQuote(c)) {
372
+ char next = peekNextChar();
373
+ if (isQuote(next)) { // escaped quote
374
+ quotedValue.append(line.substring(valueStartPos, linePos));
375
+ valueStartPos = ++linePos;
376
+ }
377
+ else {
378
+ quotedValue.append(line.substring(valueStartPos, linePos - 1));
379
+ columnState = ColumnState.AFTER_QUOTED_VALUE;
380
+ }
381
+ }
382
+ else if (isEscape(c)) { // isQuote must be checked first in case of quote == escape
383
+ // In RFC 4180, CSV's escape char is '\"'. But '\\' is often used.
384
+ char next = peekNextChar();
385
+ if (isEndOfLine(c)) {
386
+ // escape end of line. TODO assuming multi-line quoted value without newline?
387
+ quotedValue.append(line.substring(valueStartPos, linePos));
388
+ quotedValueLines.add(line);
389
+ if (!nextLine(false)) {
390
+ throw new InvalidValueException("Unexpected end of line during parsing a quoted value");
391
+ }
392
+ valueStartPos = 0;
393
+ }
394
+ else if (isQuote(next) || isEscape(next)) { // escaped quote
395
+ quotedValue.append(line.substring(valueStartPos, linePos - 1));
396
+ quotedValue.append(next);
397
+ valueStartPos = ++linePos;
398
+ }
399
+ }
400
+ else {
401
+ if ((linePos - valueStartPos) + quotedValue.length() > maxQuotedSizeLimit) {
402
+ throw new QuotedSizeLimitExceededException("The size of the quoted value exceeds the limit size (" + maxQuotedSizeLimit + ")");
403
+ }
404
+ // keep QUOTED_VALUE state
405
+ }
406
+ break;
407
+
408
+ case AFTER_QUOTED_VALUE:
409
+ if (isDelimiter(c)) {
410
+ if (delimiterFollowingString == null) {
411
+ return quotedValue.toString();
412
+ }
413
+ else if (isDelimiterFollowingFrom(linePos)) {
414
+ linePos += delimiterFollowingString.length();
415
+ return quotedValue.toString();
416
+ }
417
+ // not a delimiter
418
+ }
419
+ if (isEndOfLine(c)) {
420
+ recordState = RecordState.END;
421
+ return quotedValue.toString();
422
+ }
423
+ else if (isSpace(c)) {
424
+ // column has trailing spaces and quoted. TODO should this be rejected?
425
+ } else {
426
+ // I do not see a reason to reject record if stray quotes happen:
427
+ // ACCEPT_STRAY_QUOTES_ASSUMING_NO_DELIMITERS_IN_FIELDS Accept stray quotes as-is in the field. Instead, it behaves undefined if delimiters are in fields. "a"b" goes a"b. "a""b" goes a"b.
428
+ // https://www.embulk.org/docs/built-in.html#csv-parser-plugin
429
+ // throw new InvalidValueException(String.format("Unexpected extra character '%c' after a value quoted by '%c'", c, quote));
430
+ Exec.getLogger(CsvTokenizer.class).warn(String.format("Unexpected extra character '%c' after a value quoted by '%c', Record= %s", c, quote, line));
431
+
432
+ }
433
+ break;
434
+
435
+ default:
436
+ assert false;
437
+ }
438
+ }
439
+ }
440
+
441
+ public String nextColumnOrNull()
442
+ {
443
+ String v = nextColumn();
444
+ if (nullStringOrNull == null) {
445
+ if (v.isEmpty()) {
446
+ if (wasQuotedColumn) {
447
+ return "";
448
+ }
449
+ else {
450
+ return null;
451
+ }
452
+ }
453
+ else {
454
+ return v;
455
+ }
456
+ }
457
+ else {
458
+ if (v.equals(nullStringOrNull)) {
459
+ return null;
460
+ }
461
+ else {
462
+ return v;
463
+ }
464
+ }
465
+ }
466
+
467
+ public boolean wasQuotedColumn()
468
+ {
469
+ return wasQuotedColumn;
470
+ }
471
+
472
+ private char nextChar()
473
+ {
474
+ Preconditions.checkState(line != null, "nextColumn is called after end of file");
475
+
476
+ if (linePos >= line.length()) {
477
+ return END_OF_LINE;
478
+ }
479
+ else {
480
+ return line.charAt(linePos++);
481
+ }
482
+ }
483
+
484
+ private char peekNextChar()
485
+ {
486
+ Preconditions.checkState(line != null, "peekNextChar is called after end of file");
487
+
488
+ if (linePos >= line.length()) {
489
+ return END_OF_LINE;
490
+ }
491
+ else {
492
+ return line.charAt(linePos);
493
+ }
494
+ }
495
+
496
+ private boolean isSpace(char c)
497
+ {
498
+ return c == ' ';
499
+ }
500
+
501
+ private boolean isDelimiterFollowingFrom(int pos)
502
+ {
503
+ if (line.length() < pos + delimiterFollowingString.length()) {
504
+ return false;
505
+ }
506
+ for (int i = 0; i < delimiterFollowingString.length(); i++) {
507
+ if (delimiterFollowingString.charAt(i) != line.charAt(pos + i)) {
508
+ return false;
509
+ }
510
+ }
511
+ return true;
512
+ }
513
+
514
+ private boolean isDelimiter(char c)
515
+ {
516
+ return c == delimiterChar;
517
+ }
518
+
519
+ private boolean isEndOfLine(char c)
520
+ {
521
+ return c == END_OF_LINE;
522
+ }
523
+
524
+ private boolean isQuote(char c)
525
+ {
526
+ return quote != NO_QUOTE && c == quote;
527
+ }
528
+
529
+ private boolean isEscape(char c)
530
+ {
531
+ return escape != NO_ESCAPE && c == escape;
532
+ }
533
+
534
+ public static class InvalidFormatException
535
+ extends DataException
536
+ {
537
+ public InvalidFormatException(String message)
538
+ {
539
+ super(message);
540
+ }
541
+ }
542
+
543
+ public static class InvalidValueException
544
+ extends DataException
545
+ {
546
+ public InvalidValueException(String message)
547
+ {
548
+ super(message);
549
+ }
550
+ }
551
+
552
+ public static class QuotedSizeLimitExceededException
553
+ extends InvalidValueException
554
+ {
555
+ public QuotedSizeLimitExceededException(String message)
556
+ {
557
+ super(message);
558
+ }
559
+ }
560
+
561
+ public class TooManyColumnsException
562
+ extends InvalidFormatException
563
+ {
564
+ public TooManyColumnsException(String message)
565
+ {
566
+ super(message);
567
+ }
568
+ }
569
+
570
+ public class TooFewColumnsException
571
+ extends InvalidFormatException
572
+ {
573
+ public TooFewColumnsException(String message)
574
+ {
575
+ super(message);
576
+ }
577
+ }
578
+
579
+ public static class QuoteCharacter
580
+ {
581
+ private final char character;
582
+
583
+ public QuoteCharacter(char character)
584
+ {
585
+ this.character = character;
586
+ }
587
+
588
+ public static QuoteCharacter noQuote()
589
+ {
590
+ return new QuoteCharacter(CsvTokenizer.NO_QUOTE);
591
+ }
592
+
593
+ @JsonCreator
594
+ public static QuoteCharacter ofString(String str)
595
+ {
596
+ if (str.length() >= 2) {
597
+ throw new ConfigException("\"quote\" option accepts only 1 character.");
598
+ }
599
+ else if (str.isEmpty()) {
600
+ Exec.getLogger(CsvTokenizer.class).warn("Setting '' (empty string) to \"quote\" option is obsoleted. Currently it becomes '\"' automatically but this behavior will be removed. Please set '\"' explicitly.");
601
+ return new QuoteCharacter('"');
602
+ }
603
+ else {
604
+ return new QuoteCharacter(str.charAt(0));
605
+ }
606
+ }
607
+
608
+ @JsonIgnore
609
+ public char getCharacter()
610
+ {
611
+ return character;
612
+ }
613
+
614
+ @JsonValue
615
+ public String getOptionalString()
616
+ {
617
+ return new String(new char[] { character });
618
+ }
619
+
620
+ @Override
621
+ public int hashCode()
622
+ {
623
+ final int prime = 31;
624
+ int result = 1;
625
+ result = prime * result + character;
626
+ return result;
627
+ }
628
+
629
+ @Override
630
+ public boolean equals(Object obj)
631
+ {
632
+ if (!(obj instanceof QuoteCharacter)) {
633
+ return false;
634
+ }
635
+ QuoteCharacter o = (QuoteCharacter) obj;
636
+ return character == o.character;
637
+ }
638
+ }
639
+
640
+ public static class EscapeCharacter
641
+ {
642
+ private final char character;
643
+
644
+ public EscapeCharacter(char character)
645
+ {
646
+ this.character = character;
647
+ }
648
+
649
+ public static EscapeCharacter noEscape()
650
+ {
651
+ return new EscapeCharacter(CsvTokenizer.NO_ESCAPE);
652
+ }
653
+
654
+ @JsonCreator
655
+ public static EscapeCharacter ofString(String str)
656
+ {
657
+ if (str.length() >= 2) {
658
+ throw new ConfigException("\"escape\" option accepts only 1 character.");
659
+ }
660
+ else if (str.isEmpty()) {
661
+ Exec.getLogger(CsvTokenizer.class).warn("Setting '' (empty string) to \"escape\" option is obsoleted. Currently it becomes null automatically but this behavior will be removed. Please set \"escape: null\" explicitly.");
662
+ return noEscape();
663
+ }
664
+ else {
665
+ return new EscapeCharacter(str.charAt(0));
666
+ }
667
+ }
668
+
669
+ @JsonIgnore
670
+ public char getCharacter()
671
+ {
672
+ return character;
673
+ }
674
+
675
+ @JsonValue
676
+ public String getOptionalString()
677
+ {
678
+ return new String(new char[] { character });
679
+ }
680
+
681
+ @Override
682
+ public boolean equals(Object obj)
683
+ {
684
+ if (!(obj instanceof EscapeCharacter)) {
685
+ return false;
686
+ }
687
+ EscapeCharacter o = (EscapeCharacter) obj;
688
+ return character == o.character;
689
+ }
690
+
691
+ @Override
692
+ public int hashCode()
693
+ {
694
+ final int prime = 31;
695
+ int result = 1;
696
+ result = prime * result + character;
697
+ return result;
698
+ }
699
+ }
700
+ }