embulk 0.8.26 → 0.8.27

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,884 @@
1
+ package org.embulk.spi.time;
2
+
3
+ import java.io.IOException;
4
+ import java.io.Reader;
5
+ import java.io.StringReader;
6
+ import java.util.EnumSet;
7
+ import java.util.LinkedList;
8
+ import java.util.List;
9
+ import java.util.Arrays;
10
+ import java.util.regex.Matcher;
11
+ import java.util.regex.Pattern;
12
+
13
+ import org.embulk.spi.time.lexer.StrptimeLexer;
14
+
15
+ /**
16
+ * This is Java implementation of ext/date/date_strptime.c in Ruby v2.3.1.
17
+ * @see <a href="https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strptime.c">date_strptime.c</a>
18
+ *
19
+ * TODO
20
+ * This class is tentatively required for {@code TimestampParser} class.
21
+ * The {@code StrptimeParser} and {@code RubyDateParser} will be merged into JRuby
22
+ * (jruby/jruby#4591). embulk-jruby-strptime is removed when Embulk start using
23
+ * the JRuby that bundles embulk-jruby-strptime.
24
+ */
25
+ public class StrptimeParser
26
+ {
27
+ // day_names
28
+ private static final String[] DAY_NAMES = new String[] {
29
+ "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday",
30
+ "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
31
+ };
32
+
33
+ // month_names
34
+ private static final String[] MONTH_NAMES = new String[] {
35
+ "January", "February", "March", "April", "May", "June", "July", "August", "September",
36
+ "October", "November", "December", "Jan", "Feb", "Mar", "Apr", "May", "Jun",
37
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
38
+ };
39
+
40
+ // merid_names
41
+ private static final String[] MERID_NAMES = new String[] {
42
+ "am", "pm", "a.m.", "p.m."
43
+ };
44
+
45
+ /**
46
+ * Ported Date::Format::Bag from JRuby 9.1.5.0's lib/ruby/stdlib/date/format.rb.
47
+ * @see <a href="https://github.com/jruby/jruby/blob/036ce39f0476d4bd718e23e64caff36bb50b8dbc/lib/ruby/stdlib/date/format.rb">format.rb</a>
48
+ */
49
+ public static class FormatBag
50
+ {
51
+ private int mDay = Integer.MIN_VALUE;
52
+ private int wDay = Integer.MIN_VALUE;
53
+ private int cWDay = Integer.MIN_VALUE;
54
+ private int yDay = Integer.MIN_VALUE;
55
+ private int cWeek = Integer.MIN_VALUE;
56
+ private int cWYear = Integer.MIN_VALUE;
57
+ private int min = Integer.MIN_VALUE;
58
+ private int mon = Integer.MIN_VALUE;
59
+ private int hour = Integer.MIN_VALUE;
60
+ private int year = Integer.MIN_VALUE;
61
+ private int sec = Integer.MIN_VALUE;
62
+ private int wNum0 = Integer.MIN_VALUE;
63
+ private int wNum1 = Integer.MIN_VALUE;
64
+
65
+ private String zone = null;
66
+
67
+ private int secFraction = Integer.MIN_VALUE; // Rational
68
+ private int secFractionSize = Integer.MIN_VALUE;
69
+
70
+ private long seconds = Long.MIN_VALUE; // long or Rational
71
+ private int secondsSize = Integer.MIN_VALUE;
72
+
73
+ private int merid = Integer.MIN_VALUE;
74
+ private int cent = Integer.MIN_VALUE;
75
+
76
+ private boolean fail = false;
77
+ private String leftover = null;
78
+
79
+ public int getMDay()
80
+ {
81
+ return mDay;
82
+ }
83
+
84
+ public int getWDay()
85
+ {
86
+ return wDay;
87
+ }
88
+
89
+ public int getCWDay()
90
+ {
91
+ return cWDay;
92
+ }
93
+
94
+ public int getYDay()
95
+ {
96
+ return yDay;
97
+ }
98
+
99
+ public int getCWeek()
100
+ {
101
+ return cWeek;
102
+ }
103
+
104
+ public int getCWYear()
105
+ {
106
+ return cWYear;
107
+ }
108
+
109
+ public int getMin()
110
+ {
111
+ return min;
112
+ }
113
+
114
+ public int getMon()
115
+ {
116
+ return mon;
117
+ }
118
+
119
+ public int getHour()
120
+ {
121
+ return hour;
122
+ }
123
+
124
+ public int getYear()
125
+ {
126
+ return year;
127
+ }
128
+
129
+ public int getSec()
130
+ {
131
+ return sec;
132
+ }
133
+
134
+ public int getWNum0()
135
+ {
136
+ return wNum0;
137
+ }
138
+
139
+ public int getWNum1()
140
+ {
141
+ return wNum1;
142
+ }
143
+
144
+ public String getZone()
145
+ {
146
+ return zone;
147
+ }
148
+
149
+ public int getSecFraction()
150
+ {
151
+ return secFraction;
152
+ }
153
+
154
+ public int getSecFractionSize()
155
+ {
156
+ return secFractionSize;
157
+ }
158
+
159
+ public long getSeconds()
160
+ {
161
+ return seconds;
162
+ }
163
+
164
+ public int getSecondsSize()
165
+ {
166
+ return secondsSize;
167
+ }
168
+
169
+ public int getMerid()
170
+ {
171
+ return merid;
172
+ }
173
+
174
+ public int getCent()
175
+ {
176
+ return cent;
177
+ }
178
+
179
+ void fail()
180
+ {
181
+ fail = true;
182
+ }
183
+
184
+ public String getLeftover()
185
+ {
186
+ return leftover;
187
+ }
188
+
189
+ public boolean setYearIfNotSet(int v)
190
+ {
191
+ if (has(year)) {
192
+ return false;
193
+ }
194
+ else {
195
+ year = v;
196
+ return true;
197
+ }
198
+ }
199
+
200
+ public boolean setMonthIfNotSet(int v)
201
+ {
202
+ if (has(mon)) {
203
+ return false;
204
+ }
205
+ else {
206
+ mon = v;
207
+ return true;
208
+ }
209
+ }
210
+
211
+ public boolean setMdayIfNotSet(int v)
212
+ {
213
+ if (has(mDay)) {
214
+ return false;
215
+ }
216
+ else {
217
+ mDay = v;
218
+ return true;
219
+ }
220
+ }
221
+
222
+ public boolean hasSeconds()
223
+ {
224
+ return seconds != Long.MIN_VALUE;
225
+ }
226
+
227
+ public static boolean has(int v)
228
+ {
229
+ return v != Integer.MIN_VALUE;
230
+ }
231
+ }
232
+
233
+ private final StrptimeLexer lexer;
234
+
235
+ public StrptimeParser()
236
+ {
237
+ this.lexer = new StrptimeLexer((Reader) null);
238
+ }
239
+
240
+ /**
241
+ * Ported from org.jruby.util.RubyDateFormatter#addToPattern in JRuby 9.1.5.0
242
+ * under EPL.
243
+ * @see <a href="https://github.com/jruby/jruby/blob/036ce39f0476d4bd718e23e64caff36bb50b8dbc/core/src/main/java/org/jruby/util/RubyDateFormatter.java">RubyDateFormatter.java</a>
244
+ */
245
+ private void addToPattern(final List<StrptimeToken> compiledPattern, final String str)
246
+ {
247
+ for (int i = 0; i < str.length(); i++) {
248
+ final char c = str.charAt(i);
249
+ if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
250
+ compiledPattern.add(StrptimeToken.format(c));
251
+ }
252
+ else {
253
+ compiledPattern.add(StrptimeToken.str(Character.toString(c)));
254
+ }
255
+ }
256
+ }
257
+
258
+ /**
259
+ * Ported from org.jruby.util.RubyDateFormatter#compilePattern in JRuby 9.1.5.0
260
+ * under EPL.
261
+ * @see <a href="https://github.com/jruby/jruby/blob/036ce39f0476d4bd718e23e64caff36bb50b8dbc/core/src/main/java/org/jruby/util/RubyDateFormatter.java">RubyDateFormatter.java</a>
262
+ */
263
+ public List<StrptimeToken> compilePattern(final String pattern)
264
+ {
265
+ final List<StrptimeToken> compiledPattern = new LinkedList<>();
266
+ final Reader reader = new StringReader(pattern); // TODO Use try-with-resource statement
267
+ lexer.yyreset(reader);
268
+
269
+ StrptimeToken token;
270
+ try {
271
+ while ((token = lexer.yylex()) != null) {
272
+ if (token.getFormat() != StrptimeFormat.FORMAT_SPECIAL) {
273
+ compiledPattern.add(token);
274
+ }
275
+ else {
276
+ char c = (Character) token.getData();
277
+ switch (c) {
278
+ case 'c':
279
+ addToPattern(compiledPattern, "a b e H:M:S Y");
280
+ break;
281
+ case 'D':
282
+ case 'x':
283
+ addToPattern(compiledPattern, "m/d/y");
284
+ break;
285
+ case 'F':
286
+ addToPattern(compiledPattern, "Y-m-d");
287
+ break;
288
+ case 'n':
289
+ compiledPattern.add(StrptimeToken.str("\n"));
290
+ break;
291
+ case 'R':
292
+ addToPattern(compiledPattern, "H:M");
293
+ break;
294
+ case 'r':
295
+ addToPattern(compiledPattern, "I:M:S p");
296
+ break;
297
+ case 'T':
298
+ case 'X':
299
+ addToPattern(compiledPattern, "H:M:S");
300
+ break;
301
+ case 't':
302
+ compiledPattern.add(StrptimeToken.str("\t"));
303
+ break;
304
+ case 'v':
305
+ addToPattern(compiledPattern, "e-b-Y");
306
+ break;
307
+ case 'Z':
308
+ // +HH:MM in 'date', never zone name
309
+ compiledPattern.add(StrptimeToken.zoneOffsetColons(1));
310
+ break;
311
+ case '+':
312
+ addToPattern(compiledPattern, "a b e H:M:S ");
313
+ // %Z: +HH:MM in 'date', never zone name
314
+ compiledPattern.add(StrptimeToken.zoneOffsetColons(1));
315
+ addToPattern(compiledPattern, " Y");
316
+ break;
317
+ default:
318
+ throw new Error("Unknown special char: " + c);
319
+ }
320
+ }
321
+ }
322
+ }
323
+ catch (IOException e) {
324
+ e.printStackTrace();
325
+ }
326
+
327
+ return compiledPattern;
328
+ }
329
+
330
+ public FormatBag parse(final List<StrptimeToken> compiledPattern, final String text)
331
+ {
332
+ final FormatBag bag = new StringParser(text).parse(compiledPattern);
333
+ if (bag == null) {
334
+ return null;
335
+ }
336
+
337
+ if (FormatBag.has(bag.cent)) {
338
+ if (FormatBag.has(bag.cWYear)) {
339
+ bag.cWYear += bag.cent * 100;
340
+ }
341
+ if (FormatBag.has(bag.year)) {
342
+ bag.year += bag.cent * 100;
343
+ }
344
+
345
+ // delete bag._cent
346
+ bag.cent = Integer.MIN_VALUE;
347
+ }
348
+
349
+ if (FormatBag.has(bag.merid)) {
350
+ if (FormatBag.has(bag.hour)) {
351
+ bag.hour %= 12;
352
+ bag.hour += bag.merid;
353
+ }
354
+
355
+ // delete bag._merid
356
+ bag.merid = Integer.MIN_VALUE;
357
+ }
358
+
359
+ return bag;
360
+ }
361
+
362
+ private static class StringParser
363
+ {
364
+ private static final Pattern ZONE_PARSE_REGEX = Pattern.compile("\\A(" +
365
+ "(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?" +
366
+ "|(?-i:[[\\p{Alpha}].\\s]+)(?:standard|daylight)\\s+time\\b" +
367
+ "|(?-i:[[\\p{Alpha}]]+)(?:\\s+dst)?\\b" +
368
+ ")", Pattern.CASE_INSENSITIVE);
369
+
370
+ private final String text;
371
+ private final FormatBag bag;
372
+
373
+ private int pos;
374
+ private boolean fail;
375
+
376
+ private StringParser(String text)
377
+ {
378
+ this.text = text;
379
+ this.bag = new FormatBag();
380
+
381
+ this.pos = 0;
382
+ this.fail = false;
383
+ }
384
+
385
+ private FormatBag parse(final List<StrptimeToken> compiledPattern)
386
+ {
387
+ for (int tokenIndex = 0; tokenIndex < compiledPattern.size(); tokenIndex++) {
388
+ final StrptimeToken token = compiledPattern.get(tokenIndex);
389
+
390
+ switch (token.getFormat()) {
391
+ case FORMAT_STRING: {
392
+ final String str = token.getData().toString();
393
+ for (int i = 0; i < str.length(); i++) {
394
+ final char c = str.charAt(i);
395
+ if (isSpace(c)) {
396
+ while (!isEndOfText(text, pos) && isSpace(text.charAt(pos))) {
397
+ pos++;
398
+ }
399
+ }
400
+ else {
401
+ if (isEndOfText(text, pos) || c != text.charAt(pos)) {
402
+ fail = true;
403
+ }
404
+ pos++;
405
+ }
406
+ }
407
+ break;
408
+ }
409
+ case FORMAT_WEEK_LONG: // %A - The full weekday name (``Sunday'')
410
+ case FORMAT_WEEK_SHORT: { // %a - The abbreviated name (``Sun'')
411
+ final int dayIndex = findIndexInPatterns(DAY_NAMES);
412
+ if (dayIndex >= 0) {
413
+ bag.wDay = dayIndex % 7;
414
+ pos += DAY_NAMES[dayIndex].length();
415
+ }
416
+ else {
417
+ fail = true;
418
+ }
419
+ break;
420
+ }
421
+ case FORMAT_MONTH_LONG: // %B - The full month name (``January'')
422
+ case FORMAT_MONTH_SHORT: { // %b, %h - The abbreviated month name (``Jan'')
423
+ final int monIndex = findIndexInPatterns(MONTH_NAMES);
424
+ if (monIndex >= 0) {
425
+ bag.mon = monIndex % 12 + 1;
426
+ pos += MONTH_NAMES[monIndex].length();
427
+ }
428
+ else {
429
+ fail = true;
430
+ }
431
+ break;
432
+ }
433
+ case FORMAT_CENTURY: { // %C - year / 100 (round down. 20 in 2009)
434
+ final long cent;
435
+ if (isNumberPattern(compiledPattern, tokenIndex)) {
436
+ cent = readDigits(2);
437
+ }
438
+ else {
439
+ cent = readDigitsMax();
440
+ }
441
+ bag.cent = (int)cent;
442
+ break;
443
+ }
444
+ case FORMAT_DAY: // %d, %Od - Day of the month, zero-padded (01..31)
445
+ case FORMAT_DAY_S: { // %e, %Oe - Day of the month, blank-padded ( 1..31)
446
+ final long day;
447
+ if (isBlank(text, pos)) {
448
+ pos += 1; // blank
449
+ day = readDigits(1);
450
+ }
451
+ else {
452
+ day = readDigits(2);
453
+ }
454
+
455
+ if (!validRange(day, 1, 31)) {
456
+ fail = true;
457
+ }
458
+ bag.mDay = (int)day;
459
+ break;
460
+ }
461
+ case FORMAT_WEEKYEAR: { // %G - The week-based year
462
+ final long year;
463
+ if (isNumberPattern(compiledPattern, tokenIndex)) {
464
+ year = readDigits(4);
465
+ }
466
+ else {
467
+ year = readDigitsMax();
468
+ }
469
+ bag.cWYear = (int)year;
470
+ break;
471
+ }
472
+ case FORMAT_WEEKYEAR_SHORT: { // %g - The last 2 digits of the week-based year (00..99)
473
+ final long v = readDigits(2);
474
+ if (!validRange(v, 0, 99)) {
475
+ fail = true;
476
+ }
477
+ bag.cWYear = (int)v;
478
+ if (!bag.has(bag.cent)) {
479
+ bag.cent = v >= 69 ? 19 : 20;
480
+ }
481
+ break;
482
+ }
483
+ case FORMAT_HOUR: // %H, %OH - Hour of the day, 24-hour clock, zero-padded (00..23)
484
+ case FORMAT_HOUR_BLANK: { // %k - Hour of the day, 24-hour clock, blank-padded ( 0..23)
485
+ final long hour;
486
+ if (isBlank(text, pos)) {
487
+ pos += 1; // blank
488
+ hour = readDigits(1);
489
+ }
490
+ else {
491
+ hour = readDigits(2);
492
+ }
493
+
494
+ if (!validRange(hour, 0, 24)) {
495
+ fail = true;
496
+ }
497
+ bag.hour = (int)hour;
498
+ break;
499
+ }
500
+ case FORMAT_HOUR_M: // %I, %OI - Hour of the day, 12-hour clock, zero-padded (01..12)
501
+ case FORMAT_HOUR_S: { // %l - Hour of the day, 12-hour clock, blank-padded ( 1..12)
502
+ final long hour;
503
+ if (isBlank(text, pos)) {
504
+ pos += 1; // blank
505
+ hour = readDigits(1);
506
+ }
507
+ else {
508
+ hour = readDigits(2);
509
+ }
510
+
511
+ if (!validRange(hour, 1, 12)) {
512
+ fail = true;
513
+ }
514
+ bag.hour = (int)hour;
515
+ break;
516
+ }
517
+ case FORMAT_DAY_YEAR: { // %j - Day of the year (001..366)
518
+ final long day = readDigits(3);
519
+ if (!validRange(day, 1, 365)) {
520
+ fail = true;
521
+ }
522
+ bag.yDay = (int)day;
523
+ break;
524
+ }
525
+ case FORMAT_MILLISEC: // %L - Millisecond of the second (000..999)
526
+ case FORMAT_NANOSEC: { // %N - Fractional seconds digits, default is 9 digits (nanosecond)
527
+ boolean negative = false;
528
+ if (isSign(text, pos)) {
529
+ negative = text.charAt(pos) == '-';
530
+ pos++;
531
+ }
532
+
533
+ final long v;
534
+ final int initPos = pos;
535
+ if (isNumberPattern(compiledPattern, tokenIndex)) {
536
+ if (token.getFormat() == StrptimeFormat.FORMAT_MILLISEC) {
537
+ v = readDigits(3);
538
+ }
539
+ else {
540
+ v = readDigits(9);
541
+ }
542
+ }
543
+ else {
544
+ v = readDigitsMax();
545
+ }
546
+
547
+ bag.secFraction = (int)(!negative ? v : -v);
548
+ bag.secFractionSize = pos - initPos;
549
+ break;
550
+ }
551
+ case FORMAT_MINUTES: { // %M, %OM - Minute of the hour (00..59)
552
+ final long min = readDigits(2);
553
+ if (!validRange(min, 0, 59)) {
554
+ fail = true;
555
+ }
556
+ bag.min = (int)min;
557
+ break;
558
+ }
559
+ case FORMAT_MONTH: { // %m, %Om - Month of the year, zero-padded (01..12)
560
+ final long mon = readDigits(2);
561
+ if (!validRange(mon, 1, 12)) {
562
+ fail = true;
563
+ }
564
+ bag.mon = (int)mon;
565
+ break;
566
+ }
567
+ case FORMAT_MERIDIAN: // %P - Meridian indicator, lowercase (``am'' or ``pm'')
568
+ case FORMAT_MERIDIAN_LOWER_CASE: { // %p - Meridian indicator, uppercase (``AM'' or ``PM'')
569
+ final int meridIndex = findIndexInPatterns(MERID_NAMES);
570
+ if (meridIndex >= 0) {
571
+ bag.merid = meridIndex % 2 == 0 ? 0 : 12;
572
+ pos += MERID_NAMES[meridIndex].length();
573
+ }
574
+ else {
575
+ fail = true;
576
+ }
577
+ break;
578
+ }
579
+ case FORMAT_MICROSEC_EPOCH: { // %Q - Number of microseconds since 1970-01-01 00:00:00 UTC.
580
+ boolean negative = false;
581
+ if (isMinus(text, pos)) {
582
+ negative = true;
583
+ pos++;
584
+ }
585
+
586
+ final long sec = readDigitsMax();
587
+ bag.seconds = !negative ? sec : -sec;
588
+ bag.secondsSize = 3;
589
+ break;
590
+ }
591
+ case FORMAT_SECONDS: { // %S - Second of the minute (00..59)
592
+ final long sec = readDigits(2);
593
+ if (!validRange(sec, 0, 60)) {
594
+ fail = true;
595
+ }
596
+ bag.sec = (int)sec;
597
+ break;
598
+ }
599
+ case FORMAT_EPOCH: { // %s - Number of seconds since 1970-01-01 00:00:00 UTC.
600
+ boolean negative = false;
601
+ if (isMinus(text, pos)) {
602
+ negative = true;
603
+ pos++;
604
+ }
605
+
606
+ final long sec = readDigitsMax();
607
+ bag.seconds = (int)(!negative ? sec : -sec);
608
+ break;
609
+ }
610
+ case FORMAT_WEEK_YEAR_S: // %U, %OU - Week number of the year. The week starts with Sunday. (00..53)
611
+ case FORMAT_WEEK_YEAR_M: { // %W, %OW - Week number of the year. The week starts with Monday. (00..53)
612
+ final long week = readDigits(2);
613
+ if (!validRange(week, 0, 53)) {
614
+ fail = true;
615
+ }
616
+
617
+ if (token.getFormat() == StrptimeFormat.FORMAT_WEEK_YEAR_S) {
618
+ bag.wNum0 = (int)week;
619
+ } else {
620
+ bag.wNum1 = (int)week;
621
+ }
622
+ break;
623
+ }
624
+ case FORMAT_DAY_WEEK2: { // %u, %Ou - Day of the week (Monday is 1, 1..7)
625
+ final long day = readDigits(1);
626
+ if (!validRange(day, 1, 7)) {
627
+ fail = true;
628
+ }
629
+ bag.cWDay = (int)day;
630
+ break;
631
+ }
632
+ case FORMAT_WEEK_WEEKYEAR: { // %V, %OV - Week number of the week-based year (01..53)
633
+ final long week = readDigits(2);
634
+ if (!validRange(week, 1, 53)) {
635
+ fail = true;
636
+ }
637
+ bag.cWeek = (int)week;
638
+ break;
639
+ }
640
+ case FORMAT_DAY_WEEK: { // %w - Day of the week (Sunday is 0, 0..6)
641
+ final long day = readDigits(1);
642
+ if (!validRange(day, 0, 6)) {
643
+ fail = true;
644
+ }
645
+ bag.wDay = (int)day;
646
+ break;
647
+ }
648
+ case FORMAT_YEAR_LONG: {
649
+ // %Y, %EY - Year with century (can be negative, 4 digits at least)
650
+ // -0001, 0000, 1995, 2009, 14292, etc.
651
+ boolean negative = false;
652
+ if (isSign(text, pos)) {
653
+ negative = text.charAt(pos) == '-';
654
+ pos++;
655
+ }
656
+
657
+ final long year;
658
+ if (isNumberPattern(compiledPattern, tokenIndex)) {
659
+ year = readDigits(4);
660
+ } else {
661
+ year = readDigitsMax();
662
+ }
663
+
664
+ bag.year = (int)(!negative ? year : -year);
665
+ break;
666
+ }
667
+ case FORMAT_YEAR_SHORT: { // %y, %Ey, %Oy - year % 100 (00..99)
668
+ final long y = readDigits(2);
669
+ if (!validRange(y, 0, 99)) {
670
+ fail = true;
671
+ }
672
+ bag.year = (int)y;
673
+ if (!bag.has(bag.cent)) {
674
+ bag.cent = y >= 69 ? 19 : 20;
675
+ }
676
+ break;
677
+ }
678
+ case FORMAT_ZONE_ID: // %Z - Time zone abbreviation name
679
+ case FORMAT_COLON_ZONE_OFF: {
680
+ // %z - Time zone as hour and minute offset from UTC (e.g. +0900)
681
+ // %:z - hour and minute offset from UTC with a colon (e.g. +09:00)
682
+ // %::z - hour, minute and second offset from UTC (e.g. +09:00:00)
683
+ // %:::z - hour, minute and second offset from UTC
684
+ // (e.g. +09, +09:30, +09:30:30)
685
+ if (isEndOfText(text, pos)) {
686
+ fail = true;
687
+ break;
688
+ }
689
+
690
+ final Matcher m = ZONE_PARSE_REGEX.matcher(text.substring(pos));
691
+ if (m.find()) {
692
+ // zone
693
+ String zone = text.substring(pos, pos + m.end());
694
+ bag.zone = zone;
695
+ pos += zone.length();
696
+ } else {
697
+ fail = true;
698
+ }
699
+ break;
700
+ }
701
+ case FORMAT_SPECIAL:
702
+ {
703
+ throw new Error("FORMAT_SPECIAL is a special token only for the lexer.");
704
+ }
705
+ }
706
+ }
707
+
708
+ if (fail) {
709
+ return null;
710
+ }
711
+
712
+ if (text.length() > pos) {
713
+ bag.leftover = text.substring(pos, text.length());
714
+ }
715
+
716
+ return bag;
717
+ }
718
+
719
+ /**
720
+ * Ported read_digits in MRI 2.3.1's ext/date/date_strptime.c
721
+ * @see <a href="https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strftime.c">date_strftime.c</a>
722
+ */
723
+ private long readDigits(final int len)
724
+ {
725
+ char c;
726
+ long v = 0;
727
+ final int initPos = pos;
728
+
729
+ for (int i = 0; i < len; i++) {
730
+ if (isEndOfText(text, pos)) {
731
+ break;
732
+ }
733
+
734
+ c = text.charAt(pos);
735
+ if (!isDigit(c)) {
736
+ break;
737
+ }
738
+ else {
739
+ v = v * 10 + toInt(c);
740
+ }
741
+ pos += 1;
742
+ }
743
+
744
+ if (pos == initPos) {
745
+ fail = true;
746
+ }
747
+
748
+ return v;
749
+ }
750
+
751
+ /**
752
+ * Ported from READ_DIGITS_MAX in MRI 2.3.1's ext/date/date_strptime.c under BSDL.
753
+ * @see <a href="https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strftime.c">date_strftime.c</a>
754
+ */
755
+ private long readDigitsMax()
756
+ {
757
+ return readDigits(Integer.MAX_VALUE);
758
+ }
759
+
760
+ /**
761
+ * Returns -1 if text doesn't match with patterns.
762
+ */
763
+ private int findIndexInPatterns(final String[] patterns)
764
+ {
765
+ if (isEndOfText(text, pos)) {
766
+ return -1;
767
+ }
768
+
769
+ for (int i = 0; i < patterns.length; i++) {
770
+ final String pattern = patterns[i];
771
+ final int len = pattern.length();
772
+ if (!isEndOfText(text, pos + len - 1)
773
+ && pattern.equalsIgnoreCase(text.substring(pos, pos + len))) { // strncasecmp
774
+ return i;
775
+ }
776
+ }
777
+
778
+ return -1; // text doesn't match at any patterns.
779
+ }
780
+
781
+ /**
782
+ * Ported from num_pattern_p in MRI 2.3.1's ext/date/date_strptime.c under BSDL.
783
+ * @see <a href="https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strftime.c">date_strftime.c</a>
784
+ */
785
+ private static boolean isNumberPattern(final List<StrptimeToken> compiledPattern, final int i)
786
+ {
787
+ if (compiledPattern.size() <= i + 1) {
788
+ return false;
789
+ }
790
+ else {
791
+ final StrptimeToken nextToken = compiledPattern.get(i + 1);
792
+ final StrptimeFormat f = nextToken.getFormat();
793
+ if (f == StrptimeFormat.FORMAT_STRING && isDigit(((String) nextToken.getData()).charAt(0))) {
794
+ return true;
795
+ }
796
+ else if (NUMBER_PATTERNS.contains(f)) {
797
+ return true;
798
+ }
799
+ else {
800
+ return false;
801
+ }
802
+ }
803
+ }
804
+
805
+ // CDdeFGgHIjkLlMmNQRrSsTUuVvWwXxYy
806
+ private static final EnumSet<StrptimeFormat> NUMBER_PATTERNS =
807
+ EnumSet.copyOf(Arrays.asList(
808
+ StrptimeFormat.FORMAT_CENTURY, // 'C'
809
+ // D
810
+ StrptimeFormat.FORMAT_DAY, // 'd'
811
+ StrptimeFormat.FORMAT_DAY_S, // 'e'
812
+ // F
813
+ StrptimeFormat.FORMAT_WEEKYEAR, // 'G'
814
+ StrptimeFormat.FORMAT_WEEKYEAR_SHORT, // 'g'
815
+ StrptimeFormat.FORMAT_HOUR, // 'H'
816
+ StrptimeFormat.FORMAT_HOUR_M, // 'I'
817
+ StrptimeFormat.FORMAT_DAY_YEAR, // 'j'
818
+ StrptimeFormat.FORMAT_HOUR_BLANK, // 'k'
819
+ StrptimeFormat.FORMAT_MILLISEC, // 'L'
820
+ StrptimeFormat.FORMAT_HOUR_S, // 'l'
821
+ StrptimeFormat.FORMAT_MINUTES, // 'M'
822
+ StrptimeFormat.FORMAT_MONTH, // 'm'
823
+ StrptimeFormat.FORMAT_NANOSEC, // 'N'
824
+ // Q, R, r
825
+ StrptimeFormat.FORMAT_SECONDS, // 'S'
826
+ StrptimeFormat.FORMAT_EPOCH, // 's'
827
+ // T
828
+ StrptimeFormat.FORMAT_WEEK_YEAR_S, // 'U'
829
+ StrptimeFormat.FORMAT_DAY_WEEK2, // 'u'
830
+ StrptimeFormat.FORMAT_WEEK_WEEKYEAR, // 'V'
831
+ // v
832
+ StrptimeFormat.FORMAT_WEEK_YEAR_M, // 'W'
833
+ StrptimeFormat.FORMAT_DAY_WEEK, // 'w'
834
+ // X, x
835
+ StrptimeFormat.FORMAT_YEAR_LONG, // 'Y'
836
+ StrptimeFormat.FORMAT_YEAR_SHORT // 'y'
837
+ ));
838
+
839
+ /**
840
+ * Ported from valid_pattern_p in MRI 2.3.1's ext/date/date_strptime.c under BSDL.
841
+ * @see <a href="https://github.com/ruby/ruby/blob/394fa89c67722d35bdda89f10c7de5c304a5efb1/ext/date/date_strftime.c">date_strftime.c</a>
842
+ */
843
+ private static boolean validRange(long v, int lower, int upper)
844
+ {
845
+ return lower <= v && v <= upper;
846
+ }
847
+
848
+ private static boolean isSpace(char c)
849
+ {
850
+ return c == ' ' || c == '\t' || c == '\n' ||
851
+ c == '\u000b' || c == '\f' || c == '\r';
852
+ }
853
+
854
+ private static boolean isDigit(char c)
855
+ {
856
+ return '0' <= c && c <= '9';
857
+ }
858
+
859
+ private static boolean isEndOfText(String text, int pos)
860
+ {
861
+ return pos >= text.length();
862
+ }
863
+
864
+ private static boolean isSign(String text, int pos)
865
+ {
866
+ return !isEndOfText(text, pos) && (text.charAt(pos) == '+' || text.charAt(pos) == '-');
867
+ }
868
+
869
+ private static boolean isMinus(String text, int pos)
870
+ {
871
+ return !isEndOfText(text, pos) && text.charAt(pos) == '-';
872
+ }
873
+
874
+ private static boolean isBlank(String text, int pos)
875
+ {
876
+ return !isEndOfText(text, pos) && text.charAt(pos) == ' ';
877
+ }
878
+
879
+ private static int toInt(char c)
880
+ {
881
+ return c - '0';
882
+ }
883
+ }
884
+ }