embulk 0.8.26-java → 0.8.27-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/build.gradle +4 -3
- data/embulk-core/build.gradle +2 -0
- data/embulk-core/src/main/java/org/embulk/spi/time/TimestampParser.java +126 -27
- data/embulk-core/src/test/java/org/embulk/spi/time/TestTimestampFormatterParser.java +3 -3
- data/embulk-docs/src/release.rst +1 -0
- data/embulk-docs/src/release/release-0.8.27.rst +15 -0
- data/embulk-jruby-strptime/build.gradle +3 -0
- data/embulk-jruby-strptime/src/main/java/org/embulk/spi/time/RubyDateParser.java +121 -0
- data/embulk-jruby-strptime/src/main/java/org/embulk/spi/time/StrptimeFormat.java +53 -0
- data/embulk-jruby-strptime/src/main/java/org/embulk/spi/time/StrptimeParser.java +884 -0
- data/embulk-jruby-strptime/src/main/java/org/embulk/spi/time/StrptimeToken.java +111 -0
- data/embulk-jruby-strptime/src/main/java/org/embulk/spi/time/TimeZoneConverter.java +466 -0
- data/embulk-jruby-strptime/src/main/java/org/embulk/spi/time/lexer/StrptimeLexer.flex +62 -0
- data/embulk-jruby-strptime/src/main/java/org/embulk/spi/time/lexer/StrptimeLexer.java +577 -0
- data/lib/embulk/version.rb +1 -1
- data/settings.gradle +1 -0
- data/test/run-test.rb +14 -0
- metadata +15 -5
@@ -0,0 +1,884 @@
|
|
1
|
+
package org.embulk.spi.time;
|
2
|
+
|
3
|
+
import java.io.IOException;
|
4
|
+
import java.io.Reader;
|
5
|
+
import java.io.StringReader;
|
6
|
+
import java.util.EnumSet;
|
7
|
+
import java.util.LinkedList;
|
8
|
+
import java.util.List;
|
9
|
+
import java.util.Arrays;
|
10
|
+
import java.util.regex.Matcher;
|
11
|
+
import java.util.regex.Pattern;
|
12
|
+
|
13
|
+
import org.embulk.spi.time.lexer.StrptimeLexer;
|
14
|
+
|
15
|
+
/**
|
16
|
+
* This is Java implementation of ext/date/date_strptime.c in Ruby v2.3.1.
|
17
|
+
* @see <a href="https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strptime.c">date_strptime.c</a>
|
18
|
+
*
|
19
|
+
* TODO
|
20
|
+
* This class is tentatively required for {@code TimestampParser} class.
|
21
|
+
* The {@code StrptimeParser} and {@code RubyDateParser} will be merged into JRuby
|
22
|
+
* (jruby/jruby#4591). embulk-jruby-strptime is removed when Embulk start using
|
23
|
+
* the JRuby that bundles embulk-jruby-strptime.
|
24
|
+
*/
|
25
|
+
public class StrptimeParser
|
26
|
+
{
|
27
|
+
// day_names
|
28
|
+
private static final String[] DAY_NAMES = new String[] {
|
29
|
+
"Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday",
|
30
|
+
"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
|
31
|
+
};
|
32
|
+
|
33
|
+
// month_names
|
34
|
+
private static final String[] MONTH_NAMES = new String[] {
|
35
|
+
"January", "February", "March", "April", "May", "June", "July", "August", "September",
|
36
|
+
"October", "November", "December", "Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
37
|
+
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
|
38
|
+
};
|
39
|
+
|
40
|
+
// merid_names
|
41
|
+
private static final String[] MERID_NAMES = new String[] {
|
42
|
+
"am", "pm", "a.m.", "p.m."
|
43
|
+
};
|
44
|
+
|
45
|
+
/**
|
46
|
+
* Ported Date::Format::Bag from JRuby 9.1.5.0's lib/ruby/stdlib/date/format.rb.
|
47
|
+
* @see <a href="https://github.com/jruby/jruby/blob/036ce39f0476d4bd718e23e64caff36bb50b8dbc/lib/ruby/stdlib/date/format.rb">format.rb</a>
|
48
|
+
*/
|
49
|
+
public static class FormatBag
|
50
|
+
{
|
51
|
+
private int mDay = Integer.MIN_VALUE;
|
52
|
+
private int wDay = Integer.MIN_VALUE;
|
53
|
+
private int cWDay = Integer.MIN_VALUE;
|
54
|
+
private int yDay = Integer.MIN_VALUE;
|
55
|
+
private int cWeek = Integer.MIN_VALUE;
|
56
|
+
private int cWYear = Integer.MIN_VALUE;
|
57
|
+
private int min = Integer.MIN_VALUE;
|
58
|
+
private int mon = Integer.MIN_VALUE;
|
59
|
+
private int hour = Integer.MIN_VALUE;
|
60
|
+
private int year = Integer.MIN_VALUE;
|
61
|
+
private int sec = Integer.MIN_VALUE;
|
62
|
+
private int wNum0 = Integer.MIN_VALUE;
|
63
|
+
private int wNum1 = Integer.MIN_VALUE;
|
64
|
+
|
65
|
+
private String zone = null;
|
66
|
+
|
67
|
+
private int secFraction = Integer.MIN_VALUE; // Rational
|
68
|
+
private int secFractionSize = Integer.MIN_VALUE;
|
69
|
+
|
70
|
+
private long seconds = Long.MIN_VALUE; // long or Rational
|
71
|
+
private int secondsSize = Integer.MIN_VALUE;
|
72
|
+
|
73
|
+
private int merid = Integer.MIN_VALUE;
|
74
|
+
private int cent = Integer.MIN_VALUE;
|
75
|
+
|
76
|
+
private boolean fail = false;
|
77
|
+
private String leftover = null;
|
78
|
+
|
79
|
+
public int getMDay()
|
80
|
+
{
|
81
|
+
return mDay;
|
82
|
+
}
|
83
|
+
|
84
|
+
public int getWDay()
|
85
|
+
{
|
86
|
+
return wDay;
|
87
|
+
}
|
88
|
+
|
89
|
+
public int getCWDay()
|
90
|
+
{
|
91
|
+
return cWDay;
|
92
|
+
}
|
93
|
+
|
94
|
+
public int getYDay()
|
95
|
+
{
|
96
|
+
return yDay;
|
97
|
+
}
|
98
|
+
|
99
|
+
public int getCWeek()
|
100
|
+
{
|
101
|
+
return cWeek;
|
102
|
+
}
|
103
|
+
|
104
|
+
public int getCWYear()
|
105
|
+
{
|
106
|
+
return cWYear;
|
107
|
+
}
|
108
|
+
|
109
|
+
public int getMin()
|
110
|
+
{
|
111
|
+
return min;
|
112
|
+
}
|
113
|
+
|
114
|
+
public int getMon()
|
115
|
+
{
|
116
|
+
return mon;
|
117
|
+
}
|
118
|
+
|
119
|
+
public int getHour()
|
120
|
+
{
|
121
|
+
return hour;
|
122
|
+
}
|
123
|
+
|
124
|
+
public int getYear()
|
125
|
+
{
|
126
|
+
return year;
|
127
|
+
}
|
128
|
+
|
129
|
+
public int getSec()
|
130
|
+
{
|
131
|
+
return sec;
|
132
|
+
}
|
133
|
+
|
134
|
+
public int getWNum0()
|
135
|
+
{
|
136
|
+
return wNum0;
|
137
|
+
}
|
138
|
+
|
139
|
+
public int getWNum1()
|
140
|
+
{
|
141
|
+
return wNum1;
|
142
|
+
}
|
143
|
+
|
144
|
+
public String getZone()
|
145
|
+
{
|
146
|
+
return zone;
|
147
|
+
}
|
148
|
+
|
149
|
+
public int getSecFraction()
|
150
|
+
{
|
151
|
+
return secFraction;
|
152
|
+
}
|
153
|
+
|
154
|
+
public int getSecFractionSize()
|
155
|
+
{
|
156
|
+
return secFractionSize;
|
157
|
+
}
|
158
|
+
|
159
|
+
public long getSeconds()
|
160
|
+
{
|
161
|
+
return seconds;
|
162
|
+
}
|
163
|
+
|
164
|
+
public int getSecondsSize()
|
165
|
+
{
|
166
|
+
return secondsSize;
|
167
|
+
}
|
168
|
+
|
169
|
+
public int getMerid()
|
170
|
+
{
|
171
|
+
return merid;
|
172
|
+
}
|
173
|
+
|
174
|
+
public int getCent()
|
175
|
+
{
|
176
|
+
return cent;
|
177
|
+
}
|
178
|
+
|
179
|
+
void fail()
|
180
|
+
{
|
181
|
+
fail = true;
|
182
|
+
}
|
183
|
+
|
184
|
+
public String getLeftover()
|
185
|
+
{
|
186
|
+
return leftover;
|
187
|
+
}
|
188
|
+
|
189
|
+
public boolean setYearIfNotSet(int v)
|
190
|
+
{
|
191
|
+
if (has(year)) {
|
192
|
+
return false;
|
193
|
+
}
|
194
|
+
else {
|
195
|
+
year = v;
|
196
|
+
return true;
|
197
|
+
}
|
198
|
+
}
|
199
|
+
|
200
|
+
public boolean setMonthIfNotSet(int v)
|
201
|
+
{
|
202
|
+
if (has(mon)) {
|
203
|
+
return false;
|
204
|
+
}
|
205
|
+
else {
|
206
|
+
mon = v;
|
207
|
+
return true;
|
208
|
+
}
|
209
|
+
}
|
210
|
+
|
211
|
+
public boolean setMdayIfNotSet(int v)
|
212
|
+
{
|
213
|
+
if (has(mDay)) {
|
214
|
+
return false;
|
215
|
+
}
|
216
|
+
else {
|
217
|
+
mDay = v;
|
218
|
+
return true;
|
219
|
+
}
|
220
|
+
}
|
221
|
+
|
222
|
+
public boolean hasSeconds()
|
223
|
+
{
|
224
|
+
return seconds != Long.MIN_VALUE;
|
225
|
+
}
|
226
|
+
|
227
|
+
public static boolean has(int v)
|
228
|
+
{
|
229
|
+
return v != Integer.MIN_VALUE;
|
230
|
+
}
|
231
|
+
}
|
232
|
+
|
233
|
+
private final StrptimeLexer lexer;
|
234
|
+
|
235
|
+
public StrptimeParser()
|
236
|
+
{
|
237
|
+
this.lexer = new StrptimeLexer((Reader) null);
|
238
|
+
}
|
239
|
+
|
240
|
+
/**
|
241
|
+
* Ported from org.jruby.util.RubyDateFormatter#addToPattern in JRuby 9.1.5.0
|
242
|
+
* under EPL.
|
243
|
+
* @see <a href="https://github.com/jruby/jruby/blob/036ce39f0476d4bd718e23e64caff36bb50b8dbc/core/src/main/java/org/jruby/util/RubyDateFormatter.java">RubyDateFormatter.java</a>
|
244
|
+
*/
|
245
|
+
private void addToPattern(final List<StrptimeToken> compiledPattern, final String str)
|
246
|
+
{
|
247
|
+
for (int i = 0; i < str.length(); i++) {
|
248
|
+
final char c = str.charAt(i);
|
249
|
+
if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
|
250
|
+
compiledPattern.add(StrptimeToken.format(c));
|
251
|
+
}
|
252
|
+
else {
|
253
|
+
compiledPattern.add(StrptimeToken.str(Character.toString(c)));
|
254
|
+
}
|
255
|
+
}
|
256
|
+
}
|
257
|
+
|
258
|
+
/**
|
259
|
+
* Ported from org.jruby.util.RubyDateFormatter#compilePattern in JRuby 9.1.5.0
|
260
|
+
* under EPL.
|
261
|
+
* @see <a href="https://github.com/jruby/jruby/blob/036ce39f0476d4bd718e23e64caff36bb50b8dbc/core/src/main/java/org/jruby/util/RubyDateFormatter.java">RubyDateFormatter.java</a>
|
262
|
+
*/
|
263
|
+
public List<StrptimeToken> compilePattern(final String pattern)
|
264
|
+
{
|
265
|
+
final List<StrptimeToken> compiledPattern = new LinkedList<>();
|
266
|
+
final Reader reader = new StringReader(pattern); // TODO Use try-with-resource statement
|
267
|
+
lexer.yyreset(reader);
|
268
|
+
|
269
|
+
StrptimeToken token;
|
270
|
+
try {
|
271
|
+
while ((token = lexer.yylex()) != null) {
|
272
|
+
if (token.getFormat() != StrptimeFormat.FORMAT_SPECIAL) {
|
273
|
+
compiledPattern.add(token);
|
274
|
+
}
|
275
|
+
else {
|
276
|
+
char c = (Character) token.getData();
|
277
|
+
switch (c) {
|
278
|
+
case 'c':
|
279
|
+
addToPattern(compiledPattern, "a b e H:M:S Y");
|
280
|
+
break;
|
281
|
+
case 'D':
|
282
|
+
case 'x':
|
283
|
+
addToPattern(compiledPattern, "m/d/y");
|
284
|
+
break;
|
285
|
+
case 'F':
|
286
|
+
addToPattern(compiledPattern, "Y-m-d");
|
287
|
+
break;
|
288
|
+
case 'n':
|
289
|
+
compiledPattern.add(StrptimeToken.str("\n"));
|
290
|
+
break;
|
291
|
+
case 'R':
|
292
|
+
addToPattern(compiledPattern, "H:M");
|
293
|
+
break;
|
294
|
+
case 'r':
|
295
|
+
addToPattern(compiledPattern, "I:M:S p");
|
296
|
+
break;
|
297
|
+
case 'T':
|
298
|
+
case 'X':
|
299
|
+
addToPattern(compiledPattern, "H:M:S");
|
300
|
+
break;
|
301
|
+
case 't':
|
302
|
+
compiledPattern.add(StrptimeToken.str("\t"));
|
303
|
+
break;
|
304
|
+
case 'v':
|
305
|
+
addToPattern(compiledPattern, "e-b-Y");
|
306
|
+
break;
|
307
|
+
case 'Z':
|
308
|
+
// +HH:MM in 'date', never zone name
|
309
|
+
compiledPattern.add(StrptimeToken.zoneOffsetColons(1));
|
310
|
+
break;
|
311
|
+
case '+':
|
312
|
+
addToPattern(compiledPattern, "a b e H:M:S ");
|
313
|
+
// %Z: +HH:MM in 'date', never zone name
|
314
|
+
compiledPattern.add(StrptimeToken.zoneOffsetColons(1));
|
315
|
+
addToPattern(compiledPattern, " Y");
|
316
|
+
break;
|
317
|
+
default:
|
318
|
+
throw new Error("Unknown special char: " + c);
|
319
|
+
}
|
320
|
+
}
|
321
|
+
}
|
322
|
+
}
|
323
|
+
catch (IOException e) {
|
324
|
+
e.printStackTrace();
|
325
|
+
}
|
326
|
+
|
327
|
+
return compiledPattern;
|
328
|
+
}
|
329
|
+
|
330
|
+
public FormatBag parse(final List<StrptimeToken> compiledPattern, final String text)
|
331
|
+
{
|
332
|
+
final FormatBag bag = new StringParser(text).parse(compiledPattern);
|
333
|
+
if (bag == null) {
|
334
|
+
return null;
|
335
|
+
}
|
336
|
+
|
337
|
+
if (FormatBag.has(bag.cent)) {
|
338
|
+
if (FormatBag.has(bag.cWYear)) {
|
339
|
+
bag.cWYear += bag.cent * 100;
|
340
|
+
}
|
341
|
+
if (FormatBag.has(bag.year)) {
|
342
|
+
bag.year += bag.cent * 100;
|
343
|
+
}
|
344
|
+
|
345
|
+
// delete bag._cent
|
346
|
+
bag.cent = Integer.MIN_VALUE;
|
347
|
+
}
|
348
|
+
|
349
|
+
if (FormatBag.has(bag.merid)) {
|
350
|
+
if (FormatBag.has(bag.hour)) {
|
351
|
+
bag.hour %= 12;
|
352
|
+
bag.hour += bag.merid;
|
353
|
+
}
|
354
|
+
|
355
|
+
// delete bag._merid
|
356
|
+
bag.merid = Integer.MIN_VALUE;
|
357
|
+
}
|
358
|
+
|
359
|
+
return bag;
|
360
|
+
}
|
361
|
+
|
362
|
+
private static class StringParser
|
363
|
+
{
|
364
|
+
private static final Pattern ZONE_PARSE_REGEX = Pattern.compile("\\A(" +
|
365
|
+
"(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?" +
|
366
|
+
"|(?-i:[[\\p{Alpha}].\\s]+)(?:standard|daylight)\\s+time\\b" +
|
367
|
+
"|(?-i:[[\\p{Alpha}]]+)(?:\\s+dst)?\\b" +
|
368
|
+
")", Pattern.CASE_INSENSITIVE);
|
369
|
+
|
370
|
+
private final String text;
|
371
|
+
private final FormatBag bag;
|
372
|
+
|
373
|
+
private int pos;
|
374
|
+
private boolean fail;
|
375
|
+
|
376
|
+
private StringParser(String text)
|
377
|
+
{
|
378
|
+
this.text = text;
|
379
|
+
this.bag = new FormatBag();
|
380
|
+
|
381
|
+
this.pos = 0;
|
382
|
+
this.fail = false;
|
383
|
+
}
|
384
|
+
|
385
|
+
private FormatBag parse(final List<StrptimeToken> compiledPattern)
|
386
|
+
{
|
387
|
+
for (int tokenIndex = 0; tokenIndex < compiledPattern.size(); tokenIndex++) {
|
388
|
+
final StrptimeToken token = compiledPattern.get(tokenIndex);
|
389
|
+
|
390
|
+
switch (token.getFormat()) {
|
391
|
+
case FORMAT_STRING: {
|
392
|
+
final String str = token.getData().toString();
|
393
|
+
for (int i = 0; i < str.length(); i++) {
|
394
|
+
final char c = str.charAt(i);
|
395
|
+
if (isSpace(c)) {
|
396
|
+
while (!isEndOfText(text, pos) && isSpace(text.charAt(pos))) {
|
397
|
+
pos++;
|
398
|
+
}
|
399
|
+
}
|
400
|
+
else {
|
401
|
+
if (isEndOfText(text, pos) || c != text.charAt(pos)) {
|
402
|
+
fail = true;
|
403
|
+
}
|
404
|
+
pos++;
|
405
|
+
}
|
406
|
+
}
|
407
|
+
break;
|
408
|
+
}
|
409
|
+
case FORMAT_WEEK_LONG: // %A - The full weekday name (``Sunday'')
|
410
|
+
case FORMAT_WEEK_SHORT: { // %a - The abbreviated name (``Sun'')
|
411
|
+
final int dayIndex = findIndexInPatterns(DAY_NAMES);
|
412
|
+
if (dayIndex >= 0) {
|
413
|
+
bag.wDay = dayIndex % 7;
|
414
|
+
pos += DAY_NAMES[dayIndex].length();
|
415
|
+
}
|
416
|
+
else {
|
417
|
+
fail = true;
|
418
|
+
}
|
419
|
+
break;
|
420
|
+
}
|
421
|
+
case FORMAT_MONTH_LONG: // %B - The full month name (``January'')
|
422
|
+
case FORMAT_MONTH_SHORT: { // %b, %h - The abbreviated month name (``Jan'')
|
423
|
+
final int monIndex = findIndexInPatterns(MONTH_NAMES);
|
424
|
+
if (monIndex >= 0) {
|
425
|
+
bag.mon = monIndex % 12 + 1;
|
426
|
+
pos += MONTH_NAMES[monIndex].length();
|
427
|
+
}
|
428
|
+
else {
|
429
|
+
fail = true;
|
430
|
+
}
|
431
|
+
break;
|
432
|
+
}
|
433
|
+
case FORMAT_CENTURY: { // %C - year / 100 (round down. 20 in 2009)
|
434
|
+
final long cent;
|
435
|
+
if (isNumberPattern(compiledPattern, tokenIndex)) {
|
436
|
+
cent = readDigits(2);
|
437
|
+
}
|
438
|
+
else {
|
439
|
+
cent = readDigitsMax();
|
440
|
+
}
|
441
|
+
bag.cent = (int)cent;
|
442
|
+
break;
|
443
|
+
}
|
444
|
+
case FORMAT_DAY: // %d, %Od - Day of the month, zero-padded (01..31)
|
445
|
+
case FORMAT_DAY_S: { // %e, %Oe - Day of the month, blank-padded ( 1..31)
|
446
|
+
final long day;
|
447
|
+
if (isBlank(text, pos)) {
|
448
|
+
pos += 1; // blank
|
449
|
+
day = readDigits(1);
|
450
|
+
}
|
451
|
+
else {
|
452
|
+
day = readDigits(2);
|
453
|
+
}
|
454
|
+
|
455
|
+
if (!validRange(day, 1, 31)) {
|
456
|
+
fail = true;
|
457
|
+
}
|
458
|
+
bag.mDay = (int)day;
|
459
|
+
break;
|
460
|
+
}
|
461
|
+
case FORMAT_WEEKYEAR: { // %G - The week-based year
|
462
|
+
final long year;
|
463
|
+
if (isNumberPattern(compiledPattern, tokenIndex)) {
|
464
|
+
year = readDigits(4);
|
465
|
+
}
|
466
|
+
else {
|
467
|
+
year = readDigitsMax();
|
468
|
+
}
|
469
|
+
bag.cWYear = (int)year;
|
470
|
+
break;
|
471
|
+
}
|
472
|
+
case FORMAT_WEEKYEAR_SHORT: { // %g - The last 2 digits of the week-based year (00..99)
|
473
|
+
final long v = readDigits(2);
|
474
|
+
if (!validRange(v, 0, 99)) {
|
475
|
+
fail = true;
|
476
|
+
}
|
477
|
+
bag.cWYear = (int)v;
|
478
|
+
if (!bag.has(bag.cent)) {
|
479
|
+
bag.cent = v >= 69 ? 19 : 20;
|
480
|
+
}
|
481
|
+
break;
|
482
|
+
}
|
483
|
+
case FORMAT_HOUR: // %H, %OH - Hour of the day, 24-hour clock, zero-padded (00..23)
|
484
|
+
case FORMAT_HOUR_BLANK: { // %k - Hour of the day, 24-hour clock, blank-padded ( 0..23)
|
485
|
+
final long hour;
|
486
|
+
if (isBlank(text, pos)) {
|
487
|
+
pos += 1; // blank
|
488
|
+
hour = readDigits(1);
|
489
|
+
}
|
490
|
+
else {
|
491
|
+
hour = readDigits(2);
|
492
|
+
}
|
493
|
+
|
494
|
+
if (!validRange(hour, 0, 24)) {
|
495
|
+
fail = true;
|
496
|
+
}
|
497
|
+
bag.hour = (int)hour;
|
498
|
+
break;
|
499
|
+
}
|
500
|
+
case FORMAT_HOUR_M: // %I, %OI - Hour of the day, 12-hour clock, zero-padded (01..12)
|
501
|
+
case FORMAT_HOUR_S: { // %l - Hour of the day, 12-hour clock, blank-padded ( 1..12)
|
502
|
+
final long hour;
|
503
|
+
if (isBlank(text, pos)) {
|
504
|
+
pos += 1; // blank
|
505
|
+
hour = readDigits(1);
|
506
|
+
}
|
507
|
+
else {
|
508
|
+
hour = readDigits(2);
|
509
|
+
}
|
510
|
+
|
511
|
+
if (!validRange(hour, 1, 12)) {
|
512
|
+
fail = true;
|
513
|
+
}
|
514
|
+
bag.hour = (int)hour;
|
515
|
+
break;
|
516
|
+
}
|
517
|
+
case FORMAT_DAY_YEAR: { // %j - Day of the year (001..366)
|
518
|
+
final long day = readDigits(3);
|
519
|
+
if (!validRange(day, 1, 365)) {
|
520
|
+
fail = true;
|
521
|
+
}
|
522
|
+
bag.yDay = (int)day;
|
523
|
+
break;
|
524
|
+
}
|
525
|
+
case FORMAT_MILLISEC: // %L - Millisecond of the second (000..999)
|
526
|
+
case FORMAT_NANOSEC: { // %N - Fractional seconds digits, default is 9 digits (nanosecond)
|
527
|
+
boolean negative = false;
|
528
|
+
if (isSign(text, pos)) {
|
529
|
+
negative = text.charAt(pos) == '-';
|
530
|
+
pos++;
|
531
|
+
}
|
532
|
+
|
533
|
+
final long v;
|
534
|
+
final int initPos = pos;
|
535
|
+
if (isNumberPattern(compiledPattern, tokenIndex)) {
|
536
|
+
if (token.getFormat() == StrptimeFormat.FORMAT_MILLISEC) {
|
537
|
+
v = readDigits(3);
|
538
|
+
}
|
539
|
+
else {
|
540
|
+
v = readDigits(9);
|
541
|
+
}
|
542
|
+
}
|
543
|
+
else {
|
544
|
+
v = readDigitsMax();
|
545
|
+
}
|
546
|
+
|
547
|
+
bag.secFraction = (int)(!negative ? v : -v);
|
548
|
+
bag.secFractionSize = pos - initPos;
|
549
|
+
break;
|
550
|
+
}
|
551
|
+
case FORMAT_MINUTES: { // %M, %OM - Minute of the hour (00..59)
|
552
|
+
final long min = readDigits(2);
|
553
|
+
if (!validRange(min, 0, 59)) {
|
554
|
+
fail = true;
|
555
|
+
}
|
556
|
+
bag.min = (int)min;
|
557
|
+
break;
|
558
|
+
}
|
559
|
+
case FORMAT_MONTH: { // %m, %Om - Month of the year, zero-padded (01..12)
|
560
|
+
final long mon = readDigits(2);
|
561
|
+
if (!validRange(mon, 1, 12)) {
|
562
|
+
fail = true;
|
563
|
+
}
|
564
|
+
bag.mon = (int)mon;
|
565
|
+
break;
|
566
|
+
}
|
567
|
+
case FORMAT_MERIDIAN: // %P - Meridian indicator, lowercase (``am'' or ``pm'')
|
568
|
+
case FORMAT_MERIDIAN_LOWER_CASE: { // %p - Meridian indicator, uppercase (``AM'' or ``PM'')
|
569
|
+
final int meridIndex = findIndexInPatterns(MERID_NAMES);
|
570
|
+
if (meridIndex >= 0) {
|
571
|
+
bag.merid = meridIndex % 2 == 0 ? 0 : 12;
|
572
|
+
pos += MERID_NAMES[meridIndex].length();
|
573
|
+
}
|
574
|
+
else {
|
575
|
+
fail = true;
|
576
|
+
}
|
577
|
+
break;
|
578
|
+
}
|
579
|
+
case FORMAT_MICROSEC_EPOCH: { // %Q - Number of microseconds since 1970-01-01 00:00:00 UTC.
|
580
|
+
boolean negative = false;
|
581
|
+
if (isMinus(text, pos)) {
|
582
|
+
negative = true;
|
583
|
+
pos++;
|
584
|
+
}
|
585
|
+
|
586
|
+
final long sec = readDigitsMax();
|
587
|
+
bag.seconds = !negative ? sec : -sec;
|
588
|
+
bag.secondsSize = 3;
|
589
|
+
break;
|
590
|
+
}
|
591
|
+
case FORMAT_SECONDS: { // %S - Second of the minute (00..59)
|
592
|
+
final long sec = readDigits(2);
|
593
|
+
if (!validRange(sec, 0, 60)) {
|
594
|
+
fail = true;
|
595
|
+
}
|
596
|
+
bag.sec = (int)sec;
|
597
|
+
break;
|
598
|
+
}
|
599
|
+
case FORMAT_EPOCH: { // %s - Number of seconds since 1970-01-01 00:00:00 UTC.
|
600
|
+
boolean negative = false;
|
601
|
+
if (isMinus(text, pos)) {
|
602
|
+
negative = true;
|
603
|
+
pos++;
|
604
|
+
}
|
605
|
+
|
606
|
+
final long sec = readDigitsMax();
|
607
|
+
bag.seconds = (int)(!negative ? sec : -sec);
|
608
|
+
break;
|
609
|
+
}
|
610
|
+
case FORMAT_WEEK_YEAR_S: // %U, %OU - Week number of the year. The week starts with Sunday. (00..53)
|
611
|
+
case FORMAT_WEEK_YEAR_M: { // %W, %OW - Week number of the year. The week starts with Monday. (00..53)
|
612
|
+
final long week = readDigits(2);
|
613
|
+
if (!validRange(week, 0, 53)) {
|
614
|
+
fail = true;
|
615
|
+
}
|
616
|
+
|
617
|
+
if (token.getFormat() == StrptimeFormat.FORMAT_WEEK_YEAR_S) {
|
618
|
+
bag.wNum0 = (int)week;
|
619
|
+
} else {
|
620
|
+
bag.wNum1 = (int)week;
|
621
|
+
}
|
622
|
+
break;
|
623
|
+
}
|
624
|
+
case FORMAT_DAY_WEEK2: { // %u, %Ou - Day of the week (Monday is 1, 1..7)
|
625
|
+
final long day = readDigits(1);
|
626
|
+
if (!validRange(day, 1, 7)) {
|
627
|
+
fail = true;
|
628
|
+
}
|
629
|
+
bag.cWDay = (int)day;
|
630
|
+
break;
|
631
|
+
}
|
632
|
+
case FORMAT_WEEK_WEEKYEAR: { // %V, %OV - Week number of the week-based year (01..53)
|
633
|
+
final long week = readDigits(2);
|
634
|
+
if (!validRange(week, 1, 53)) {
|
635
|
+
fail = true;
|
636
|
+
}
|
637
|
+
bag.cWeek = (int)week;
|
638
|
+
break;
|
639
|
+
}
|
640
|
+
case FORMAT_DAY_WEEK: { // %w - Day of the week (Sunday is 0, 0..6)
|
641
|
+
final long day = readDigits(1);
|
642
|
+
if (!validRange(day, 0, 6)) {
|
643
|
+
fail = true;
|
644
|
+
}
|
645
|
+
bag.wDay = (int)day;
|
646
|
+
break;
|
647
|
+
}
|
648
|
+
case FORMAT_YEAR_LONG: {
|
649
|
+
// %Y, %EY - Year with century (can be negative, 4 digits at least)
|
650
|
+
// -0001, 0000, 1995, 2009, 14292, etc.
|
651
|
+
boolean negative = false;
|
652
|
+
if (isSign(text, pos)) {
|
653
|
+
negative = text.charAt(pos) == '-';
|
654
|
+
pos++;
|
655
|
+
}
|
656
|
+
|
657
|
+
final long year;
|
658
|
+
if (isNumberPattern(compiledPattern, tokenIndex)) {
|
659
|
+
year = readDigits(4);
|
660
|
+
} else {
|
661
|
+
year = readDigitsMax();
|
662
|
+
}
|
663
|
+
|
664
|
+
bag.year = (int)(!negative ? year : -year);
|
665
|
+
break;
|
666
|
+
}
|
667
|
+
case FORMAT_YEAR_SHORT: { // %y, %Ey, %Oy - year % 100 (00..99)
|
668
|
+
final long y = readDigits(2);
|
669
|
+
if (!validRange(y, 0, 99)) {
|
670
|
+
fail = true;
|
671
|
+
}
|
672
|
+
bag.year = (int)y;
|
673
|
+
if (!bag.has(bag.cent)) {
|
674
|
+
bag.cent = y >= 69 ? 19 : 20;
|
675
|
+
}
|
676
|
+
break;
|
677
|
+
}
|
678
|
+
case FORMAT_ZONE_ID: // %Z - Time zone abbreviation name
|
679
|
+
case FORMAT_COLON_ZONE_OFF: {
|
680
|
+
// %z - Time zone as hour and minute offset from UTC (e.g. +0900)
|
681
|
+
// %:z - hour and minute offset from UTC with a colon (e.g. +09:00)
|
682
|
+
// %::z - hour, minute and second offset from UTC (e.g. +09:00:00)
|
683
|
+
// %:::z - hour, minute and second offset from UTC
|
684
|
+
// (e.g. +09, +09:30, +09:30:30)
|
685
|
+
if (isEndOfText(text, pos)) {
|
686
|
+
fail = true;
|
687
|
+
break;
|
688
|
+
}
|
689
|
+
|
690
|
+
final Matcher m = ZONE_PARSE_REGEX.matcher(text.substring(pos));
|
691
|
+
if (m.find()) {
|
692
|
+
// zone
|
693
|
+
String zone = text.substring(pos, pos + m.end());
|
694
|
+
bag.zone = zone;
|
695
|
+
pos += zone.length();
|
696
|
+
} else {
|
697
|
+
fail = true;
|
698
|
+
}
|
699
|
+
break;
|
700
|
+
}
|
701
|
+
case FORMAT_SPECIAL:
|
702
|
+
{
|
703
|
+
throw new Error("FORMAT_SPECIAL is a special token only for the lexer.");
|
704
|
+
}
|
705
|
+
}
|
706
|
+
}
|
707
|
+
|
708
|
+
if (fail) {
|
709
|
+
return null;
|
710
|
+
}
|
711
|
+
|
712
|
+
if (text.length() > pos) {
|
713
|
+
bag.leftover = text.substring(pos, text.length());
|
714
|
+
}
|
715
|
+
|
716
|
+
return bag;
|
717
|
+
}
|
718
|
+
|
719
|
+
/**
|
720
|
+
* Ported read_digits in MRI 2.3.1's ext/date/date_strptime.c
|
721
|
+
* @see <a href="https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strftime.c">date_strftime.c</a>
|
722
|
+
*/
|
723
|
+
private long readDigits(final int len)
|
724
|
+
{
|
725
|
+
char c;
|
726
|
+
long v = 0;
|
727
|
+
final int initPos = pos;
|
728
|
+
|
729
|
+
for (int i = 0; i < len; i++) {
|
730
|
+
if (isEndOfText(text, pos)) {
|
731
|
+
break;
|
732
|
+
}
|
733
|
+
|
734
|
+
c = text.charAt(pos);
|
735
|
+
if (!isDigit(c)) {
|
736
|
+
break;
|
737
|
+
}
|
738
|
+
else {
|
739
|
+
v = v * 10 + toInt(c);
|
740
|
+
}
|
741
|
+
pos += 1;
|
742
|
+
}
|
743
|
+
|
744
|
+
if (pos == initPos) {
|
745
|
+
fail = true;
|
746
|
+
}
|
747
|
+
|
748
|
+
return v;
|
749
|
+
}
|
750
|
+
|
751
|
+
/**
|
752
|
+
* Ported from READ_DIGITS_MAX in MRI 2.3.1's ext/date/date_strptime.c under BSDL.
|
753
|
+
* @see <a href="https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strftime.c">date_strftime.c</a>
|
754
|
+
*/
|
755
|
+
private long readDigitsMax()
|
756
|
+
{
|
757
|
+
return readDigits(Integer.MAX_VALUE);
|
758
|
+
}
|
759
|
+
|
760
|
+
/**
|
761
|
+
* Returns -1 if text doesn't match with patterns.
|
762
|
+
*/
|
763
|
+
private int findIndexInPatterns(final String[] patterns)
|
764
|
+
{
|
765
|
+
if (isEndOfText(text, pos)) {
|
766
|
+
return -1;
|
767
|
+
}
|
768
|
+
|
769
|
+
for (int i = 0; i < patterns.length; i++) {
|
770
|
+
final String pattern = patterns[i];
|
771
|
+
final int len = pattern.length();
|
772
|
+
if (!isEndOfText(text, pos + len - 1)
|
773
|
+
&& pattern.equalsIgnoreCase(text.substring(pos, pos + len))) { // strncasecmp
|
774
|
+
return i;
|
775
|
+
}
|
776
|
+
}
|
777
|
+
|
778
|
+
return -1; // text doesn't match at any patterns.
|
779
|
+
}
|
780
|
+
|
781
|
+
/**
|
782
|
+
* Ported from num_pattern_p in MRI 2.3.1's ext/date/date_strptime.c under BSDL.
|
783
|
+
* @see <a href="https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strftime.c">date_strftime.c</a>
|
784
|
+
*/
|
785
|
+
private static boolean isNumberPattern(final List<StrptimeToken> compiledPattern, final int i)
|
786
|
+
{
|
787
|
+
if (compiledPattern.size() <= i + 1) {
|
788
|
+
return false;
|
789
|
+
}
|
790
|
+
else {
|
791
|
+
final StrptimeToken nextToken = compiledPattern.get(i + 1);
|
792
|
+
final StrptimeFormat f = nextToken.getFormat();
|
793
|
+
if (f == StrptimeFormat.FORMAT_STRING && isDigit(((String) nextToken.getData()).charAt(0))) {
|
794
|
+
return true;
|
795
|
+
}
|
796
|
+
else if (NUMBER_PATTERNS.contains(f)) {
|
797
|
+
return true;
|
798
|
+
}
|
799
|
+
else {
|
800
|
+
return false;
|
801
|
+
}
|
802
|
+
}
|
803
|
+
}
|
804
|
+
|
805
|
+
// CDdeFGgHIjkLlMmNQRrSsTUuVvWwXxYy
|
806
|
+
private static final EnumSet<StrptimeFormat> NUMBER_PATTERNS =
|
807
|
+
EnumSet.copyOf(Arrays.asList(
|
808
|
+
StrptimeFormat.FORMAT_CENTURY, // 'C'
|
809
|
+
// D
|
810
|
+
StrptimeFormat.FORMAT_DAY, // 'd'
|
811
|
+
StrptimeFormat.FORMAT_DAY_S, // 'e'
|
812
|
+
// F
|
813
|
+
StrptimeFormat.FORMAT_WEEKYEAR, // 'G'
|
814
|
+
StrptimeFormat.FORMAT_WEEKYEAR_SHORT, // 'g'
|
815
|
+
StrptimeFormat.FORMAT_HOUR, // 'H'
|
816
|
+
StrptimeFormat.FORMAT_HOUR_M, // 'I'
|
817
|
+
StrptimeFormat.FORMAT_DAY_YEAR, // 'j'
|
818
|
+
StrptimeFormat.FORMAT_HOUR_BLANK, // 'k'
|
819
|
+
StrptimeFormat.FORMAT_MILLISEC, // 'L'
|
820
|
+
StrptimeFormat.FORMAT_HOUR_S, // 'l'
|
821
|
+
StrptimeFormat.FORMAT_MINUTES, // 'M'
|
822
|
+
StrptimeFormat.FORMAT_MONTH, // 'm'
|
823
|
+
StrptimeFormat.FORMAT_NANOSEC, // 'N'
|
824
|
+
// Q, R, r
|
825
|
+
StrptimeFormat.FORMAT_SECONDS, // 'S'
|
826
|
+
StrptimeFormat.FORMAT_EPOCH, // 's'
|
827
|
+
// T
|
828
|
+
StrptimeFormat.FORMAT_WEEK_YEAR_S, // 'U'
|
829
|
+
StrptimeFormat.FORMAT_DAY_WEEK2, // 'u'
|
830
|
+
StrptimeFormat.FORMAT_WEEK_WEEKYEAR, // 'V'
|
831
|
+
// v
|
832
|
+
StrptimeFormat.FORMAT_WEEK_YEAR_M, // 'W'
|
833
|
+
StrptimeFormat.FORMAT_DAY_WEEK, // 'w'
|
834
|
+
// X, x
|
835
|
+
StrptimeFormat.FORMAT_YEAR_LONG, // 'Y'
|
836
|
+
StrptimeFormat.FORMAT_YEAR_SHORT // 'y'
|
837
|
+
));
|
838
|
+
|
839
|
+
/**
|
840
|
+
* Ported from valid_pattern_p in MRI 2.3.1's ext/date/date_strptime.c under BSDL.
|
841
|
+
* @see <a href="https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strftime.c">date_strftime.c</a>
|
842
|
+
*/
|
843
|
+
private static boolean validRange(long v, int lower, int upper)
|
844
|
+
{
|
845
|
+
return lower <= v && v <= upper;
|
846
|
+
}
|
847
|
+
|
848
|
+
private static boolean isSpace(char c)
|
849
|
+
{
|
850
|
+
return c == ' ' || c == '\t' || c == '\n' ||
|
851
|
+
c == '\u000b' || c == '\f' || c == '\r';
|
852
|
+
}
|
853
|
+
|
854
|
+
private static boolean isDigit(char c)
|
855
|
+
{
|
856
|
+
return '0' <= c && c <= '9';
|
857
|
+
}
|
858
|
+
|
859
|
+
private static boolean isEndOfText(String text, int pos)
|
860
|
+
{
|
861
|
+
return pos >= text.length();
|
862
|
+
}
|
863
|
+
|
864
|
+
private static boolean isSign(String text, int pos)
|
865
|
+
{
|
866
|
+
return !isEndOfText(text, pos) && (text.charAt(pos) == '+' || text.charAt(pos) == '-');
|
867
|
+
}
|
868
|
+
|
869
|
+
private static boolean isMinus(String text, int pos)
|
870
|
+
{
|
871
|
+
return !isEndOfText(text, pos) && text.charAt(pos) == '-';
|
872
|
+
}
|
873
|
+
|
874
|
+
private static boolean isBlank(String text, int pos)
|
875
|
+
{
|
876
|
+
return !isEndOfText(text, pos) && text.charAt(pos) == ' ';
|
877
|
+
}
|
878
|
+
|
879
|
+
private static int toInt(char c)
|
880
|
+
{
|
881
|
+
return c - '0';
|
882
|
+
}
|
883
|
+
}
|
884
|
+
}
|