embulk-input-marketo-extended 0.6.18

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +7 -0
  2. data/.github/PULL_REQUEST_TEMPLATE.md +37 -0
  3. data/.gitignore +14 -0
  4. data/.travis.yml +6 -0
  5. data/CHANGELOG.md +170 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +213 -0
  8. data/build.gradle +103 -0
  9. data/config/checkstyle/checkstyle.xml +128 -0
  10. data/config/checkstyle/default.xml +108 -0
  11. data/gradle/wrapper/gradle-wrapper.jar +0 -0
  12. data/gradle/wrapper/gradle-wrapper.properties +6 -0
  13. data/gradlew +169 -0
  14. data/gradlew.bat +84 -0
  15. data/lib/embulk/input/marketo.rb +3 -0
  16. data/settings.gradle +1 -0
  17. data/src/main/java/org/embulk/input/marketo/CsvTokenizer.java +700 -0
  18. data/src/main/java/org/embulk/input/marketo/MarketoInputPlugin.java +15 -0
  19. data/src/main/java/org/embulk/input/marketo/MarketoInputPluginDelegate.java +100 -0
  20. data/src/main/java/org/embulk/input/marketo/MarketoService.java +38 -0
  21. data/src/main/java/org/embulk/input/marketo/MarketoServiceImpl.java +245 -0
  22. data/src/main/java/org/embulk/input/marketo/MarketoUtils.java +212 -0
  23. data/src/main/java/org/embulk/input/marketo/delegate/ActivityBulkExtractInputPlugin.java +167 -0
  24. data/src/main/java/org/embulk/input/marketo/delegate/CampaignInputPlugin.java +48 -0
  25. data/src/main/java/org/embulk/input/marketo/delegate/CustomObjectInputPlugin.java +75 -0
  26. data/src/main/java/org/embulk/input/marketo/delegate/CustomObjectResponseMapperBuilder.java +81 -0
  27. data/src/main/java/org/embulk/input/marketo/delegate/LeadBulkExtractInputPlugin.java +66 -0
  28. data/src/main/java/org/embulk/input/marketo/delegate/LeadServiceResponseMapperBuilder.java +85 -0
  29. data/src/main/java/org/embulk/input/marketo/delegate/LeadWithListInputPlugin.java +64 -0
  30. data/src/main/java/org/embulk/input/marketo/delegate/LeadWithProgramInputPlugin.java +60 -0
  31. data/src/main/java/org/embulk/input/marketo/delegate/MarketoBaseBulkExtractInputPlugin.java +441 -0
  32. data/src/main/java/org/embulk/input/marketo/delegate/MarketoBaseInputPluginDelegate.java +92 -0
  33. data/src/main/java/org/embulk/input/marketo/delegate/ProgramInputPlugin.java +228 -0
  34. data/src/main/java/org/embulk/input/marketo/exception/MarketoAPIException.java +30 -0
  35. data/src/main/java/org/embulk/input/marketo/model/BulkExtractRangeHeader.java +26 -0
  36. data/src/main/java/org/embulk/input/marketo/model/MarketoAccessTokenResponse.java +92 -0
  37. data/src/main/java/org/embulk/input/marketo/model/MarketoBulkExtractRequest.java +68 -0
  38. data/src/main/java/org/embulk/input/marketo/model/MarketoError.java +40 -0
  39. data/src/main/java/org/embulk/input/marketo/model/MarketoField.java +126 -0
  40. data/src/main/java/org/embulk/input/marketo/model/MarketoResponse.java +82 -0
  41. data/src/main/java/org/embulk/input/marketo/model/filter/DateRangeFilter.java +40 -0
  42. data/src/main/java/org/embulk/input/marketo/rest/MarketoBaseRestClient.java +306 -0
  43. data/src/main/java/org/embulk/input/marketo/rest/MarketoInputStreamResponseEntityReader.java +69 -0
  44. data/src/main/java/org/embulk/input/marketo/rest/MarketoRESTEndpoint.java +47 -0
  45. data/src/main/java/org/embulk/input/marketo/rest/MarketoResponseJetty92EntityReader.java +89 -0
  46. data/src/main/java/org/embulk/input/marketo/rest/MarketoRestClient.java +569 -0
  47. data/src/main/java/org/embulk/input/marketo/rest/RecordPagingIterable.java +180 -0
  48. data/src/test/java/org/embulk/input/marketo/MarketoServiceImplTest.java +140 -0
  49. data/src/test/java/org/embulk/input/marketo/MarketoUtilsTest.java +87 -0
  50. data/src/test/java/org/embulk/input/marketo/delegate/ActivityBulkExtractInputPluginTest.java +128 -0
  51. data/src/test/java/org/embulk/input/marketo/delegate/CampaignInputPluginTest.java +73 -0
  52. data/src/test/java/org/embulk/input/marketo/delegate/CustomObjectInputPluginTest.java +102 -0
  53. data/src/test/java/org/embulk/input/marketo/delegate/LeadBulkExtractInputPluginTest.java +99 -0
  54. data/src/test/java/org/embulk/input/marketo/delegate/LeadServiceResponseMapperBuilderTest.java +119 -0
  55. data/src/test/java/org/embulk/input/marketo/delegate/LeadWithListInputPluginTest.java +101 -0
  56. data/src/test/java/org/embulk/input/marketo/delegate/LeadWithProgramInputPluginTest.java +103 -0
  57. data/src/test/java/org/embulk/input/marketo/delegate/MarketoBaseBulkExtractInputPluginTest.java +169 -0
  58. data/src/test/java/org/embulk/input/marketo/delegate/ProgramInputPluginTest.java +343 -0
  59. data/src/test/java/org/embulk/input/marketo/rest/MarketoBaseRestClientTest.java +368 -0
  60. data/src/test/java/org/embulk/input/marketo/rest/MarketoRestClientTest.java +584 -0
  61. data/src/test/resources/config/activity_bulk_extract_config.yaml +7 -0
  62. data/src/test/resources/config/custom_object_config.yaml +8 -0
  63. data/src/test/resources/config/lead_bulk_extract_config.yaml +8 -0
  64. data/src/test/resources/config/rest_config.yaml +3 -0
  65. data/src/test/resources/fixtures/activity_extract1.csv +35 -0
  66. data/src/test/resources/fixtures/activity_extract2.csv +22 -0
  67. data/src/test/resources/fixtures/activity_types.json +22 -0
  68. data/src/test/resources/fixtures/all_program_full.json +53 -0
  69. data/src/test/resources/fixtures/campaign_response.json +38 -0
  70. data/src/test/resources/fixtures/campaign_response_full.json +102 -0
  71. data/src/test/resources/fixtures/custom_object_describe.json +124 -0
  72. data/src/test/resources/fixtures/custom_object_describe_marketo_fields_full.json +22 -0
  73. data/src/test/resources/fixtures/custom_object_expected.json +66 -0
  74. data/src/test/resources/fixtures/custom_object_response.json +24 -0
  75. data/src/test/resources/fixtures/custom_object_response_full.json +23 -0
  76. data/src/test/resources/fixtures/lead_by_list.json +33 -0
  77. data/src/test/resources/fixtures/lead_by_program_response.json +47 -0
  78. data/src/test/resources/fixtures/lead_describe.json +221 -0
  79. data/src/test/resources/fixtures/lead_describe_expected.json +66 -0
  80. data/src/test/resources/fixtures/lead_describe_marketo_fields_full.json +518 -0
  81. data/src/test/resources/fixtures/lead_extract1.csv +11 -0
  82. data/src/test/resources/fixtures/lead_response_full.json +2402 -0
  83. data/src/test/resources/fixtures/lead_with_program_full.json +17 -0
  84. data/src/test/resources/fixtures/leads_extract2.csv +10 -0
  85. data/src/test/resources/fixtures/list_reponse_full.json +191 -0
  86. data/src/test/resources/fixtures/lists_response.json +31 -0
  87. data/src/test/resources/fixtures/program_response.json +71 -0
  88. metadata +171 -0
@@ -0,0 +1,84 @@
1
+ @if "%DEBUG%" == "" @echo off
2
+ @rem ##########################################################################
3
+ @rem
4
+ @rem Gradle startup script for Windows
5
+ @rem
6
+ @rem ##########################################################################
7
+
8
+ @rem Set local scope for the variables with windows NT shell
9
+ if "%OS%"=="Windows_NT" setlocal
10
+
11
+ set DIRNAME=%~dp0
12
+ if "%DIRNAME%" == "" set DIRNAME=.
13
+ set APP_BASE_NAME=%~n0
14
+ set APP_HOME=%DIRNAME%
15
+
16
+ @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17
+ set DEFAULT_JVM_OPTS=
18
+
19
+ @rem Find java.exe
20
+ if defined JAVA_HOME goto findJavaFromJavaHome
21
+
22
+ set JAVA_EXE=java.exe
23
+ %JAVA_EXE% -version >NUL 2>&1
24
+ if "%ERRORLEVEL%" == "0" goto init
25
+
26
+ echo.
27
+ echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28
+ echo.
29
+ echo Please set the JAVA_HOME variable in your environment to match the
30
+ echo location of your Java installation.
31
+
32
+ goto fail
33
+
34
+ :findJavaFromJavaHome
35
+ set JAVA_HOME=%JAVA_HOME:"=%
36
+ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37
+
38
+ if exist "%JAVA_EXE%" goto init
39
+
40
+ echo.
41
+ echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42
+ echo.
43
+ echo Please set the JAVA_HOME variable in your environment to match the
44
+ echo location of your Java installation.
45
+
46
+ goto fail
47
+
48
+ :init
49
+ @rem Get command-line arguments, handling Windows variants
50
+
51
+ if not "%OS%" == "Windows_NT" goto win9xME_args
52
+
53
+ :win9xME_args
54
+ @rem Slurp the command line arguments.
55
+ set CMD_LINE_ARGS=
56
+ set _SKIP=2
57
+
58
+ :win9xME_args_slurp
59
+ if "x%~1" == "x" goto execute
60
+
61
+ set CMD_LINE_ARGS=%*
62
+
63
+ :execute
64
+ @rem Setup the command line
65
+
66
+ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67
+
68
+ @rem Execute Gradle
69
+ "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70
+
71
+ :end
72
+ @rem End local scope for the variables with windows NT shell
73
+ if "%ERRORLEVEL%"=="0" goto mainEnd
74
+
75
+ :fail
76
+ rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77
+ rem the _cmd.exe /c_ return code!
78
+ if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79
+ exit /b 1
80
+
81
+ :mainEnd
82
+ if "%OS%"=="Windows_NT" endlocal
83
+
84
+ :omega
@@ -0,0 +1,3 @@
1
+ Embulk::JavaPlugin.register_input(
2
+ "marketo", "org.embulk.input.marketo.MarketoInputPlugin",
3
+ File.expand_path('../../../../classpath', __FILE__))
@@ -0,0 +1 @@
1
+ rootProject.name = "embulk-input-marketo-extended"
@@ -0,0 +1,700 @@
1
+ package org.embulk.input.marketo;
2
+ import com.fasterxml.jackson.annotation.JsonCreator;
3
+ import com.fasterxml.jackson.annotation.JsonIgnore;
4
+ import com.fasterxml.jackson.annotation.JsonValue;
5
+ import com.google.common.base.Optional;
6
+ import com.google.common.base.Preconditions;
7
+ import org.embulk.config.Config;
8
+ import org.embulk.config.ConfigDefault;
9
+ import org.embulk.config.ConfigException;
10
+ import org.embulk.spi.DataException;
11
+ import org.embulk.spi.Exec;
12
+ import org.embulk.spi.util.LineDecoder;
13
+
14
+ import java.util.ArrayDeque;
15
+ import java.util.ArrayList;
16
+ import java.util.Deque;
17
+ import java.util.List;
18
+
19
+ /**
20
+ * Created by tai.khuu on 9/15/17.
21
+ */
22
+ public class CsvTokenizer
23
+ {
24
+ static enum RecordState
25
+ {
26
+ NOT_END, END,
27
+ }
28
+
29
+ static enum ColumnState
30
+ {
31
+ BEGIN, VALUE, QUOTED_VALUE, AFTER_QUOTED_VALUE, FIRST_TRIM, LAST_TRIM_OR_VALUE,
32
+ }
33
+
34
+ private static final char END_OF_LINE = '\0';
35
+ static final char NO_QUOTE = '\0';
36
+ static final char NO_ESCAPE = '\0';
37
+
38
+ public interface PluginTask extends LineDecoder.DecoderTask
39
+ {
40
+ @Config("delimiter")
41
+ @ConfigDefault("\",\"")
42
+ String getDelimiter();
43
+
44
+ @Config("quote")
45
+ @ConfigDefault("\"\\\"\"")
46
+ Optional<QuoteCharacter> getQuoteChar();
47
+
48
+ @Config("escape")
49
+ @ConfigDefault("\"\\\\\"")
50
+ Optional<EscapeCharacter> getEscapeChar();
51
+
52
+ // Null value handling: if the CsvParser found 'non-quoted empty string's,
53
+ // it replaces them to string that users specified like "\N", "NULL".
54
+ @Config("null_string")
55
+ @ConfigDefault("\"null\"")
56
+ Optional<String> getNullString();
57
+
58
+ @Config("trim_if_not_quoted")
59
+ @ConfigDefault("false")
60
+ boolean getTrimIfNotQuoted();
61
+
62
+ @Config("max_quoted_size_limit")
63
+ @ConfigDefault("131072") //128kB
64
+ long getMaxQuotedSizeLimit();
65
+
66
+ @Config("comment_line_marker")
67
+ @ConfigDefault("null")
68
+ Optional<String> getCommentLineMarker();
69
+ }
70
+
71
+ private final char delimiterChar;
72
+ private final String delimiterFollowingString;
73
+ private final char quote;
74
+ private final char escape;
75
+ private final String newline;
76
+ private final boolean trimIfNotQuoted;
77
+ private final long maxQuotedSizeLimit;
78
+ private final String commentLineMarker;
79
+ private final LineDecoder input;
80
+ private final String nullStringOrNull;
81
+
82
+ private RecordState recordState = RecordState.END; // initial state is end of a record. nextRecord() must be called first
83
+ private long lineNumber = 0;
84
+
85
+ private String line = null;
86
+ private int linePos = 0;
87
+ private boolean wasQuotedColumn = false;
88
+ private List<String> quotedValueLines = new ArrayList<>();
89
+ private Deque<String> unreadLines = new ArrayDeque<>();
90
+
91
+ public CsvTokenizer(LineDecoder input, PluginTask task)
92
+ {
93
+ this(task.getDelimiter(), task.getQuoteChar().or(QuoteCharacter.noQuote()).getCharacter(),
94
+ task.getEscapeChar().or(EscapeCharacter.noEscape()).getCharacter(), task.getNewline().getString(),
95
+ task.getTrimIfNotQuoted(), task.getMaxQuotedSizeLimit(), task.getCommentLineMarker().orNull(), input, task.getNullString().orNull());
96
+ }
97
+
98
+ public CsvTokenizer(String delimiter, char quote, char escape, String newline, boolean trimIfNotQuoted, long maxQuotedSizeLimit, String commentLineMarker, LineDecoder input, String nullStringOrNull)
99
+ {
100
+ if (delimiter.length() == 0) {
101
+ throw new ConfigException("Empty delimiter is not allowed");
102
+ }
103
+ else {
104
+ this.delimiterChar = delimiter.charAt(0);
105
+ if (delimiter.length() > 1) {
106
+ delimiterFollowingString = delimiter.substring(1);
107
+ }
108
+ else {
109
+ delimiterFollowingString = null;
110
+ }
111
+ }
112
+ this.quote = quote;
113
+ this.escape = escape;
114
+ this.newline = newline;
115
+ this.trimIfNotQuoted = trimIfNotQuoted;
116
+ this.maxQuotedSizeLimit = maxQuotedSizeLimit;
117
+ this.commentLineMarker = commentLineMarker;
118
+ this.input = input;
119
+ this.nullStringOrNull = nullStringOrNull;
120
+ }
121
+
122
+ public long getCurrentLineNumber()
123
+ {
124
+ return lineNumber;
125
+ }
126
+
127
+ public boolean skipHeaderLine()
128
+ {
129
+ boolean skipped = input.poll() != null;
130
+ if (skipped) {
131
+ lineNumber++;
132
+ }
133
+ return skipped;
134
+ }
135
+
136
+ // returns skipped line
137
+ public String skipCurrentLine()
138
+ {
139
+ String skippedLine;
140
+ if (quotedValueLines.isEmpty()) {
141
+ skippedLine = line;
142
+ }
143
+ else {
144
+ // recover lines of quoted value
145
+ skippedLine = quotedValueLines.remove(0); // TODO optimize performance
146
+ unreadLines.addAll(quotedValueLines);
147
+ lineNumber -= quotedValueLines.size();
148
+ if (line != null) {
149
+ unreadLines.add(line);
150
+ lineNumber -= 1;
151
+ }
152
+ quotedValueLines.clear();
153
+ }
154
+ recordState = RecordState.END;
155
+ return skippedLine;
156
+ }
157
+
158
+ public boolean nextFile()
159
+ {
160
+ boolean next = input.nextFile();
161
+ if (next) {
162
+ lineNumber = 0;
163
+ }
164
+ return next;
165
+ }
166
+
167
+ // used by guess-csv
168
+ public boolean nextRecord()
169
+ {
170
+ return nextRecord(true);
171
+ }
172
+
173
+ public boolean nextRecord(boolean skipEmptyLine)
174
+ {
175
+ // If at the end of record, read the next line and initialize the state
176
+ if (recordState != RecordState.END) {
177
+ throw new TooManyColumnsException("Too many columns");
178
+ }
179
+
180
+ boolean hasNext = nextLine(skipEmptyLine);
181
+ if (hasNext) {
182
+ recordState = RecordState.NOT_END;
183
+ return true;
184
+ }
185
+ else {
186
+ return false;
187
+ }
188
+ }
189
+
190
+ private boolean nextLine(boolean skipEmptyLine)
191
+ {
192
+ while (true) {
193
+ if (!unreadLines.isEmpty()) {
194
+ line = unreadLines.removeFirst();
195
+ }
196
+ else {
197
+ line = input.poll();
198
+ if (line == null) {
199
+ return false;
200
+ }
201
+ }
202
+ linePos = 0;
203
+ lineNumber++;
204
+
205
+ boolean skip = skipEmptyLine && (
206
+ line.isEmpty() ||
207
+ (commentLineMarker != null && line.startsWith(commentLineMarker)));
208
+ if (!skip) {
209
+ return true;
210
+ }
211
+ }
212
+ }
213
+
214
+ public boolean hasNextColumn()
215
+ {
216
+ return recordState == RecordState.NOT_END;
217
+ }
218
+
219
+ public String nextColumn()
220
+ {
221
+ if (!hasNextColumn()) {
222
+ throw new TooFewColumnsException("Too few columns");
223
+ }
224
+
225
+ // reset last state
226
+ wasQuotedColumn = false;
227
+ quotedValueLines.clear();
228
+
229
+ // local state
230
+ int valueStartPos = linePos;
231
+ int valueEndPos = 0; // initialized by VALUE state and used by LAST_TRIM_OR_VALUE and
232
+ StringBuilder quotedValue = null; // initial by VALUE or FIRST_TRIM state and used by QUOTED_VALUE state
233
+ ColumnState columnState = ColumnState.BEGIN;
234
+
235
+ while (true) {
236
+ final char c = nextChar();
237
+
238
+ switch (columnState) {
239
+ case BEGIN:
240
+ // TODO optimization: state is BEGIN only at the first character of a column.
241
+ // this block can be out of the looop.
242
+ if (isDelimiter(c)) {
243
+ // empty value
244
+ if (delimiterFollowingString == null) {
245
+ return "";
246
+ }
247
+ else if (isDelimiterFollowingFrom(linePos)) {
248
+ linePos += delimiterFollowingString.length();
249
+ return "";
250
+ }
251
+ // not a delimiter
252
+ }
253
+ if (isEndOfLine(c)) {
254
+ // empty value
255
+ recordState = RecordState.END;
256
+ return "";
257
+ }
258
+ else if (isSpace(c) && trimIfNotQuoted) {
259
+ columnState = ColumnState.FIRST_TRIM;
260
+ }
261
+ else if (isQuote(c)) {
262
+ valueStartPos = linePos; // == 1
263
+ wasQuotedColumn = true;
264
+ quotedValue = new StringBuilder();
265
+ columnState = ColumnState.QUOTED_VALUE;
266
+ }
267
+ else {
268
+ columnState = ColumnState.VALUE;
269
+ }
270
+ break;
271
+
272
+ case FIRST_TRIM:
273
+ if (isDelimiter(c)) {
274
+ // empty value
275
+ if (delimiterFollowingString == null) {
276
+ return "";
277
+ }
278
+ else if (isDelimiterFollowingFrom(linePos)) {
279
+ linePos += delimiterFollowingString.length();
280
+ return "";
281
+ }
282
+ // not a delimiter
283
+ }
284
+ if (isEndOfLine(c)) {
285
+ // empty value
286
+ recordState = RecordState.END;
287
+ return "";
288
+ }
289
+ else if (isQuote(c)) {
290
+ // column has heading spaces and quoted. TODO should this be rejected?
291
+ valueStartPos = linePos;
292
+ wasQuotedColumn = true;
293
+ quotedValue = new StringBuilder();
294
+ columnState = ColumnState.QUOTED_VALUE;
295
+ }
296
+ else if (isSpace(c)) {
297
+ // skip this character
298
+ } else {
299
+ valueStartPos = linePos - 1;
300
+ columnState = ColumnState.VALUE;
301
+ }
302
+ break;
303
+
304
+ case VALUE:
305
+ if (isDelimiter(c)) {
306
+ if (delimiterFollowingString == null) {
307
+ return line.substring(valueStartPos, linePos - 1);
308
+ }
309
+ else if (isDelimiterFollowingFrom(linePos)) {
310
+ String value = line.substring(valueStartPos, linePos - 1);
311
+ linePos += delimiterFollowingString.length();
312
+ return value;
313
+ }
314
+ // not a delimiter
315
+ }
316
+ if (isEndOfLine(c)) {
317
+ recordState = RecordState.END;
318
+ return line.substring(valueStartPos, linePos);
319
+ }
320
+ else if (isSpace(c) && trimIfNotQuoted) {
321
+ valueEndPos = linePos - 1; // this is possibly end of value
322
+ columnState = ColumnState.LAST_TRIM_OR_VALUE;
323
+
324
+ // TODO not implemented yet foo""bar""baz -> [foo, bar, baz].append
325
+ //} else if (isQuote(c)) {
326
+ // // In RFC4180, If fields are not enclosed with double quotes, then
327
+ // // double quotes may not appear inside the fields. But they are often
328
+ // // included in the fields. We should care about them later.
329
+ }
330
+ else {
331
+ // keep VALUE state
332
+ }
333
+ break;
334
+
335
+ case LAST_TRIM_OR_VALUE:
336
+ if (isDelimiter(c)) {
337
+ if (delimiterFollowingString == null) {
338
+ return line.substring(valueStartPos, valueEndPos);
339
+ }
340
+ else if (isDelimiterFollowingFrom(linePos)) {
341
+ linePos += delimiterFollowingString.length();
342
+ return line.substring(valueStartPos, valueEndPos);
343
+ }
344
+ else {
345
+ // not a delimiter
346
+ }
347
+ }
348
+ if (isEndOfLine(c)) {
349
+ recordState = RecordState.END;
350
+ return line.substring(valueStartPos, valueEndPos);
351
+ }
352
+ else if (isSpace(c)) {
353
+ // keep LAST_TRIM_OR_VALUE state
354
+ } else {
355
+ // this spaces are not trailing spaces. go back to VALUE state
356
+ columnState = ColumnState.VALUE;
357
+ }
358
+ break;
359
+
360
+ case QUOTED_VALUE:
361
+ if (isEndOfLine(c)) {
362
+ // multi-line quoted value
363
+ quotedValue.append(line.substring(valueStartPos, linePos));
364
+ quotedValue.append(newline);
365
+ quotedValueLines.add(line);
366
+ if (!nextLine(false)) {
367
+ throw new InvalidValueException("Unexpected end of line during parsing a quoted value");
368
+ }
369
+ valueStartPos = 0;
370
+ }
371
+ else if (isQuote(c)) {
372
+ char next = peekNextChar();
373
+ if (isQuote(next)) { // escaped quote
374
+ quotedValue.append(line.substring(valueStartPos, linePos));
375
+ valueStartPos = ++linePos;
376
+ }
377
+ else {
378
+ quotedValue.append(line.substring(valueStartPos, linePos - 1));
379
+ columnState = ColumnState.AFTER_QUOTED_VALUE;
380
+ }
381
+ }
382
+ else if (isEscape(c)) { // isQuote must be checked first in case of quote == escape
383
+ // In RFC 4180, CSV's escape char is '\"'. But '\\' is often used.
384
+ char next = peekNextChar();
385
+ if (isEndOfLine(c)) {
386
+ // escape end of line. TODO assuming multi-line quoted value without newline?
387
+ quotedValue.append(line.substring(valueStartPos, linePos));
388
+ quotedValueLines.add(line);
389
+ if (!nextLine(false)) {
390
+ throw new InvalidValueException("Unexpected end of line during parsing a quoted value");
391
+ }
392
+ valueStartPos = 0;
393
+ }
394
+ else if (isQuote(next) || isEscape(next)) { // escaped quote
395
+ quotedValue.append(line.substring(valueStartPos, linePos - 1));
396
+ quotedValue.append(next);
397
+ valueStartPos = ++linePos;
398
+ }
399
+ }
400
+ else {
401
+ if ((linePos - valueStartPos) + quotedValue.length() > maxQuotedSizeLimit) {
402
+ throw new QuotedSizeLimitExceededException("The size of the quoted value exceeds the limit size (" + maxQuotedSizeLimit + ")");
403
+ }
404
+ // keep QUOTED_VALUE state
405
+ }
406
+ break;
407
+
408
+ case AFTER_QUOTED_VALUE:
409
+ if (isDelimiter(c)) {
410
+ if (delimiterFollowingString == null) {
411
+ return quotedValue.toString();
412
+ }
413
+ else if (isDelimiterFollowingFrom(linePos)) {
414
+ linePos += delimiterFollowingString.length();
415
+ return quotedValue.toString();
416
+ }
417
+ // not a delimiter
418
+ }
419
+ if (isEndOfLine(c)) {
420
+ recordState = RecordState.END;
421
+ return quotedValue.toString();
422
+ }
423
+ else if (isSpace(c)) {
424
+ // column has trailing spaces and quoted. TODO should this be rejected?
425
+ } else {
426
+ // I do not see a reason to reject record if stray quotes happen:
427
+ // ACCEPT_STRAY_QUOTES_ASSUMING_NO_DELIMITERS_IN_FIELDS Accept stray quotes as-is in the field. Instead, it behaves undefined if delimiters are in fields. "a"b" goes a"b. "a""b" goes a"b.
428
+ // https://www.embulk.org/docs/built-in.html#csv-parser-plugin
429
+ // throw new InvalidValueException(String.format("Unexpected extra character '%c' after a value quoted by '%c'", c, quote));
430
+ Exec.getLogger(CsvTokenizer.class).warn(String.format("Unexpected extra character '%c' after a value quoted by '%c', Record= %s", c, quote, line));
431
+
432
+ }
433
+ break;
434
+
435
+ default:
436
+ assert false;
437
+ }
438
+ }
439
+ }
440
+
441
+ public String nextColumnOrNull()
442
+ {
443
+ String v = nextColumn();
444
+ if (nullStringOrNull == null) {
445
+ if (v.isEmpty()) {
446
+ if (wasQuotedColumn) {
447
+ return "";
448
+ }
449
+ else {
450
+ return null;
451
+ }
452
+ }
453
+ else {
454
+ return v;
455
+ }
456
+ }
457
+ else {
458
+ if (v.equals(nullStringOrNull)) {
459
+ return null;
460
+ }
461
+ else {
462
+ return v;
463
+ }
464
+ }
465
+ }
466
+
467
+ public boolean wasQuotedColumn()
468
+ {
469
+ return wasQuotedColumn;
470
+ }
471
+
472
+ private char nextChar()
473
+ {
474
+ Preconditions.checkState(line != null, "nextColumn is called after end of file");
475
+
476
+ if (linePos >= line.length()) {
477
+ return END_OF_LINE;
478
+ }
479
+ else {
480
+ return line.charAt(linePos++);
481
+ }
482
+ }
483
+
484
+ private char peekNextChar()
485
+ {
486
+ Preconditions.checkState(line != null, "peekNextChar is called after end of file");
487
+
488
+ if (linePos >= line.length()) {
489
+ return END_OF_LINE;
490
+ }
491
+ else {
492
+ return line.charAt(linePos);
493
+ }
494
+ }
495
+
496
+ private boolean isSpace(char c)
497
+ {
498
+ return c == ' ';
499
+ }
500
+
501
+ private boolean isDelimiterFollowingFrom(int pos)
502
+ {
503
+ if (line.length() < pos + delimiterFollowingString.length()) {
504
+ return false;
505
+ }
506
+ for (int i = 0; i < delimiterFollowingString.length(); i++) {
507
+ if (delimiterFollowingString.charAt(i) != line.charAt(pos + i)) {
508
+ return false;
509
+ }
510
+ }
511
+ return true;
512
+ }
513
+
514
+ private boolean isDelimiter(char c)
515
+ {
516
+ return c == delimiterChar;
517
+ }
518
+
519
+ private boolean isEndOfLine(char c)
520
+ {
521
+ return c == END_OF_LINE;
522
+ }
523
+
524
+ private boolean isQuote(char c)
525
+ {
526
+ return quote != NO_QUOTE && c == quote;
527
+ }
528
+
529
+ private boolean isEscape(char c)
530
+ {
531
+ return escape != NO_ESCAPE && c == escape;
532
+ }
533
+
534
+ public static class InvalidFormatException
535
+ extends DataException
536
+ {
537
+ public InvalidFormatException(String message)
538
+ {
539
+ super(message);
540
+ }
541
+ }
542
+
543
+ public static class InvalidValueException
544
+ extends DataException
545
+ {
546
+ public InvalidValueException(String message)
547
+ {
548
+ super(message);
549
+ }
550
+ }
551
+
552
+ public static class QuotedSizeLimitExceededException
553
+ extends InvalidValueException
554
+ {
555
+ public QuotedSizeLimitExceededException(String message)
556
+ {
557
+ super(message);
558
+ }
559
+ }
560
+
561
+ public class TooManyColumnsException
562
+ extends InvalidFormatException
563
+ {
564
+ public TooManyColumnsException(String message)
565
+ {
566
+ super(message);
567
+ }
568
+ }
569
+
570
+ public class TooFewColumnsException
571
+ extends InvalidFormatException
572
+ {
573
+ public TooFewColumnsException(String message)
574
+ {
575
+ super(message);
576
+ }
577
+ }
578
+
579
+ public static class QuoteCharacter
580
+ {
581
+ private final char character;
582
+
583
+ public QuoteCharacter(char character)
584
+ {
585
+ this.character = character;
586
+ }
587
+
588
+ public static QuoteCharacter noQuote()
589
+ {
590
+ return new QuoteCharacter(CsvTokenizer.NO_QUOTE);
591
+ }
592
+
593
+ @JsonCreator
594
+ public static QuoteCharacter ofString(String str)
595
+ {
596
+ if (str.length() >= 2) {
597
+ throw new ConfigException("\"quote\" option accepts only 1 character.");
598
+ }
599
+ else if (str.isEmpty()) {
600
+ Exec.getLogger(CsvTokenizer.class).warn("Setting '' (empty string) to \"quote\" option is obsoleted. Currently it becomes '\"' automatically but this behavior will be removed. Please set '\"' explicitly.");
601
+ return new QuoteCharacter('"');
602
+ }
603
+ else {
604
+ return new QuoteCharacter(str.charAt(0));
605
+ }
606
+ }
607
+
608
+ @JsonIgnore
609
+ public char getCharacter()
610
+ {
611
+ return character;
612
+ }
613
+
614
+ @JsonValue
615
+ public String getOptionalString()
616
+ {
617
+ return new String(new char[] { character });
618
+ }
619
+
620
+ @Override
621
+ public int hashCode()
622
+ {
623
+ final int prime = 31;
624
+ int result = 1;
625
+ result = prime * result + character;
626
+ return result;
627
+ }
628
+
629
+ @Override
630
+ public boolean equals(Object obj)
631
+ {
632
+ if (!(obj instanceof QuoteCharacter)) {
633
+ return false;
634
+ }
635
+ QuoteCharacter o = (QuoteCharacter) obj;
636
+ return character == o.character;
637
+ }
638
+ }
639
+
640
+ public static class EscapeCharacter
641
+ {
642
+ private final char character;
643
+
644
+ public EscapeCharacter(char character)
645
+ {
646
+ this.character = character;
647
+ }
648
+
649
+ public static EscapeCharacter noEscape()
650
+ {
651
+ return new EscapeCharacter(CsvTokenizer.NO_ESCAPE);
652
+ }
653
+
654
+ @JsonCreator
655
+ public static EscapeCharacter ofString(String str)
656
+ {
657
+ if (str.length() >= 2) {
658
+ throw new ConfigException("\"escape\" option accepts only 1 character.");
659
+ }
660
+ else if (str.isEmpty()) {
661
+ Exec.getLogger(CsvTokenizer.class).warn("Setting '' (empty string) to \"escape\" option is obsoleted. Currently it becomes null automatically but this behavior will be removed. Please set \"escape: null\" explicitly.");
662
+ return noEscape();
663
+ }
664
+ else {
665
+ return new EscapeCharacter(str.charAt(0));
666
+ }
667
+ }
668
+
669
+ @JsonIgnore
670
+ public char getCharacter()
671
+ {
672
+ return character;
673
+ }
674
+
675
+ @JsonValue
676
+ public String getOptionalString()
677
+ {
678
+ return new String(new char[] { character });
679
+ }
680
+
681
+ @Override
682
+ public boolean equals(Object obj)
683
+ {
684
+ if (!(obj instanceof EscapeCharacter)) {
685
+ return false;
686
+ }
687
+ EscapeCharacter o = (EscapeCharacter) obj;
688
+ return character == o.character;
689
+ }
690
+
691
+ @Override
692
+ public int hashCode()
693
+ {
694
+ final int prime = 31;
695
+ int result = 1;
696
+ result = prime * result + character;
697
+ return result;
698
+ }
699
+ }
700
+ }