re2js 2.7.0 → 2.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  * re2js
3
3
  * RE2JS is the JavaScript port of RE2, a regular expression engine that provides linear time matching
4
4
  *
5
- * @version v2.7.0
5
+ * @version v2.7.1
6
6
  * @author Oleksii Vasyliev
7
7
  * @homepage https://github.com/le0pard/re2js#readme
8
8
  * @repository github:le0pard/re2js
@@ -2337,30 +2337,35 @@ class Machine {
2337
2337
  }
2338
2338
  this.matched = false;
2339
2339
  this.matchcap.fill(-1);
2340
+
2341
+ // Lookbehinds must scan from the beginning of the string to build their state table,
2342
+ // even if the main pattern search is requested to start mid-string.
2343
+ let currentPos = this.prog.numLb > 0 ? 0 : pos;
2344
+ let matchStartPos = pos;
2340
2345
  let runq = this.q0;
2341
2346
  let nextq = this.q1;
2342
- let r = input.step(pos);
2347
+ let r = input.step(currentPos);
2343
2348
  let rune = r >> 3;
2344
2349
  let width = r & 7;
2345
2350
  let rune1 = -1;
2346
2351
  let width1 = 0;
2347
2352
  if (r !== MachineInputBase.EOF()) {
2348
- r = input.step(pos + width);
2353
+ r = input.step(currentPos + width);
2349
2354
  rune1 = r >> 3;
2350
2355
  width1 = r & 7;
2351
2356
  }
2352
2357
  let flag;
2353
- if (pos === 0) {
2358
+ if (currentPos === 0) {
2354
2359
  flag = Utils.emptyOpContext(-1, rune);
2355
2360
  } else {
2356
- flag = input.context(pos);
2361
+ flag = input.context(currentPos);
2357
2362
  }
2358
2363
  while (true) {
2359
2364
  if (runq.isEmpty()) {
2360
- if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) {
2365
+ if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && currentPos !== 0) {
2361
2366
  break;
2362
2367
  }
2363
- if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2368
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && currentPos !== 0) {
2364
2369
  break;
2365
2370
  }
2366
2371
  if (this.matched) {
@@ -2370,43 +2375,50 @@ class Machine {
2370
2375
  // Fast-forwarding the string pointer will skip over the positions where
2371
2376
  // the parallel lookbehind automata need to be spawned.
2372
2377
  if (this.prog.numLb === 0 && !(this.re2.prefix.length === 0) && rune1 !== this.re2.prefixRune && input.canCheckPrefix()) {
2373
- const advance = input.index(this.re2, pos);
2378
+ const advance = input.index(this.re2, currentPos);
2374
2379
  if (advance < 0) {
2375
2380
  break;
2376
2381
  }
2377
- pos += advance;
2378
- r = input.step(pos);
2382
+ currentPos += advance;
2383
+ r = input.step(currentPos);
2379
2384
  rune = r >> 3;
2380
2385
  width = r & 7;
2381
- r = input.step(pos + width);
2386
+ r = input.step(currentPos + width);
2382
2387
  rune1 = r >> 3;
2383
2388
  width1 = r & 7;
2384
2389
  }
2385
2390
  }
2386
- if (!this.matched && (pos === 0 || anchor === RE2Flags.UNANCHORED)) {
2387
- if (this.ncap > 0) {
2388
- this.matchcap[0] = pos;
2389
- }
2390
- // Spawn Lookbehind threads BEFORE the main pattern
2391
+
2392
+ // Optimize lookbehind spawning. Because lookbehinds are prefixed with `.*` by the compiler,
2393
+ // they only need to be spawned exactly once at the beginning of the string (currentPos === 0).
2394
+ if (currentPos === 0 && this.prog.numLb > 0) {
2391
2395
  for (let i = 0; i < this.prog.lbStarts.length; i++) {
2392
- this.add(runq, this.prog.lbStarts[i], pos, this.matchcap, flag, null);
2396
+ this.add(runq, this.prog.lbStarts[i], currentPos, this.matchcap, flag, null);
2397
+ }
2398
+ }
2399
+ if (!this.matched && (currentPos === 0 || anchor === RE2Flags.UNANCHORED)) {
2400
+ // ONLY spawn the main pattern if we have reached the requested search start boundary
2401
+ if (currentPos >= matchStartPos) {
2402
+ if (this.ncap > 0) {
2403
+ this.matchcap[0] = currentPos;
2404
+ }
2405
+ this.add(runq, this.prog.start, currentPos, this.matchcap, flag, null);
2393
2406
  }
2394
- this.add(runq, this.prog.start, pos, this.matchcap, flag, null);
2395
2407
  }
2396
- const nextPos = pos + width;
2408
+ const nextPos = currentPos + width;
2397
2409
  flag = input.context(nextPos);
2398
- this.step(runq, nextq, pos, nextPos, rune, flag, anchor, pos === input.endPos());
2410
+ this.step(runq, nextq, currentPos, nextPos, rune, flag, anchor, currentPos === input.endPos());
2399
2411
  if (width === 0) {
2400
2412
  break;
2401
2413
  }
2402
2414
  if (this.ncap === 0 && this.matched) {
2403
2415
  break;
2404
2416
  }
2405
- pos += width;
2417
+ currentPos += width;
2406
2418
  rune = rune1;
2407
2419
  width = width1;
2408
2420
  if (rune !== -1) {
2409
- r = input.step(pos + width);
2421
+ r = input.step(currentPos + width);
2410
2422
  rune1 = r >> 3;
2411
2423
  width1 = r & 7;
2412
2424
  }
@@ -2423,35 +2435,46 @@ class Machine {
2423
2435
  if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2424
2436
  return [];
2425
2437
  }
2438
+
2439
+ // Lookbehinds must scan from the beginning of the string to build their state table,
2440
+ // even if the main pattern search is requested to start mid-string.
2441
+ let currentPos = this.prog.numLb > 0 ? 0 : pos;
2442
+ let matchStartPos = pos;
2426
2443
  let runq = this.q0;
2427
2444
  let nextq = this.q1;
2428
- let r = input.step(pos);
2445
+ let r = input.step(currentPos);
2429
2446
  let rune = r >> 3;
2430
2447
  let width = r & 7;
2431
2448
  let rune1 = -1;
2432
2449
  let width1 = 0;
2433
2450
  if (r !== MachineInputBase.EOF()) {
2434
- r = input.step(pos + width);
2451
+ r = input.step(currentPos + width);
2435
2452
  rune1 = r >> 3;
2436
2453
  width1 = r & 7;
2437
2454
  }
2438
- let flag = pos === 0 ? Utils.emptyOpContext(-1, rune) : input.context(pos);
2455
+ let flag = currentPos === 0 ? Utils.emptyOpContext(-1, rune) : input.context(currentPos);
2439
2456
  const matches = new Set();
2440
2457
  while (true) {
2441
2458
  if (runq.isEmpty()) {
2442
- if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && pos !== 0) break;
2443
- if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && pos !== 0) {
2459
+ if ((startCond & Utils.EMPTY_BEGIN_TEXT) !== 0 && currentPos !== 0) break;
2460
+ if ((anchor === RE2Flags.ANCHOR_START || anchor === RE2Flags.ANCHOR_BOTH) && currentPos !== 0) {
2444
2461
  break;
2445
2462
  }
2446
2463
  }
2447
- if (pos === 0 || anchor === RE2Flags.UNANCHORED) {
2448
- // Spawn Lookbehind threads BEFORE the main pattern
2464
+
2465
+ // Optimize lookbehind spawning to exactly once at BOF
2466
+ if (currentPos === 0 && this.prog.numLb > 0) {
2449
2467
  for (let i = 0; i < this.prog.lbStarts.length; i++) {
2450
- this.add(runq, this.prog.lbStarts[i], pos, this.matchcap, flag, null);
2468
+ this.add(runq, this.prog.lbStarts[i], currentPos, this.matchcap, flag, null);
2469
+ }
2470
+ }
2471
+ if (currentPos === 0 || anchor === RE2Flags.UNANCHORED) {
2472
+ // ONLY spawn the main pattern if we have reached the requested search start boundary
2473
+ if (currentPos >= matchStartPos) {
2474
+ this.add(runq, this.prog.start, currentPos, this.matchcap, flag, null);
2451
2475
  }
2452
- this.add(runq, this.prog.start, pos, this.matchcap, flag, null);
2453
2476
  }
2454
- const nextPos = pos + width;
2477
+ const nextPos = currentPos + width;
2455
2478
  flag = input.context(nextPos);
2456
2479
  for (let j = 0; j < runq.size; j++) {
2457
2480
  let t = runq.denseThreads[j];
@@ -2460,7 +2483,7 @@ class Machine {
2460
2483
  let add = false;
2461
2484
  switch (i.op) {
2462
2485
  case Inst.MATCH:
2463
- if (anchor === RE2Flags.ANCHOR_BOTH && pos !== input.endPos()) break;
2486
+ if (anchor === RE2Flags.ANCHOR_BOTH && currentPos !== input.endPos()) break;
2464
2487
  matches.add(i.arg); // Record the matched Set ID
2465
2488
  break;
2466
2489
  case Inst.RUNE:
@@ -2488,11 +2511,11 @@ class Machine {
2488
2511
  }
2489
2512
  runq.clear();
2490
2513
  if (width === 0) break;
2491
- pos += width;
2514
+ currentPos += width;
2492
2515
  rune = rune1;
2493
2516
  width = width1;
2494
2517
  if (rune !== -1) {
2495
- r = input.step(pos + width);
2518
+ r = input.step(currentPos + width);
2496
2519
  rune1 = r >> 3;
2497
2520
  width1 = r & 7;
2498
2521
  }