@swagger-api/apidom-parser-adapter-openapi-yaml-3-1 1.0.0-beta.40 → 1.0.0-beta.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/dist/apidom-parser-adapter-openapi-yaml-3-1.browser.js +1233 -1233
- package/package.json +5 -5
|
@@ -23310,1239 +23310,1239 @@ __webpack_require__.r(__webpack_exports__);
|
|
|
23310
23310
|
/* harmony export */ identifiers: () => (/* binding */ identifiers),
|
|
23311
23311
|
/* harmony export */ utilities: () => (/* binding */ utilities)
|
|
23312
23312
|
/* harmony export */ });
|
|
23313
|
-
/* *************************************************************************************
|
|
23314
|
-
* copyright: Copyright (c) 2023 Lowell D. Thomas, all rights reserved
|
|
23315
|
-
* license: BSD-2-Clause (https://opensource.org/licenses/BSD-2-Clause)
|
|
23316
|
-
*
|
|
23317
|
-
* Redistribution and use in source and binary forms, with or without
|
|
23318
|
-
* modification, are permitted provided that the following conditions are met:
|
|
23319
|
-
*
|
|
23320
|
-
* 1. Redistributions of source code must retain the above copyright notice, this
|
|
23321
|
-
* list of conditions and the following disclaimer.
|
|
23322
|
-
*
|
|
23323
|
-
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
23324
|
-
* this list of conditions and the following disclaimer in the documentation
|
|
23325
|
-
* and/or other materials provided with the distribution.
|
|
23326
|
-
*
|
|
23327
|
-
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
23328
|
-
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
23329
|
-
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
23330
|
-
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
23331
|
-
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
23332
|
-
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
23333
|
-
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
23334
|
-
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
23335
|
-
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
23336
|
-
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
23337
|
-
*
|
|
23338
|
-
* ********************************************************************************* */
|
|
23339
|
-
|
|
23340
|
-
|
|
23341
|
-
const Parser = function fnparser() {
|
|
23342
|
-
const id = identifiers;
|
|
23343
|
-
const utils = utilities;
|
|
23344
|
-
const p = this;
|
|
23345
|
-
const thisFileName = 'parser.js: Parser(): ';
|
|
23346
|
-
const systemData = function systemData() {
|
|
23347
|
-
this.state = id.ACTIVE;
|
|
23348
|
-
this.phraseLength = 0;
|
|
23349
|
-
this.refresh = () => {
|
|
23350
|
-
this.state = id.ACTIVE;
|
|
23351
|
-
this.phraseLength = 0;
|
|
23352
|
-
};
|
|
23353
|
-
};
|
|
23354
|
-
p.ast = undefined;
|
|
23355
|
-
p.stats = undefined;
|
|
23356
|
-
p.trace = undefined;
|
|
23357
|
-
p.callbacks = [];
|
|
23358
|
-
let lookAhead = 0;
|
|
23359
|
-
let treeDepth = 0;
|
|
23360
|
-
let maxTreeDepth = 0;
|
|
23361
|
-
let nodeHits = 0;
|
|
23362
|
-
let maxMatched = 0;
|
|
23363
|
-
let rules = undefined;
|
|
23364
|
-
let udts = undefined;
|
|
23365
|
-
let opcodes = undefined;
|
|
23366
|
-
let chars = undefined;
|
|
23367
|
-
let sysData = new systemData();
|
|
23368
|
-
let ruleCallbacks = undefined;
|
|
23369
|
-
let udtCallbacks = undefined;
|
|
23370
|
-
let userData = undefined;
|
|
23371
|
-
const clear = () => {
|
|
23372
|
-
lookAhead = 0;
|
|
23373
|
-
treeDepth = 0;
|
|
23374
|
-
maxTreeDepth = 0;
|
|
23375
|
-
nodeHits = 0;
|
|
23376
|
-
maxMatched = 0;
|
|
23377
|
-
rules = undefined;
|
|
23378
|
-
udts = undefined;
|
|
23379
|
-
opcodes = undefined;
|
|
23380
|
-
chars = undefined;
|
|
23381
|
-
sysData.refresh();
|
|
23382
|
-
ruleCallbacks = undefined;
|
|
23383
|
-
udtCallbacks = undefined;
|
|
23384
|
-
userData = undefined;
|
|
23385
|
-
};
|
|
23386
|
-
|
|
23387
|
-
const initializeCallbacks = () => {
|
|
23388
|
-
const functionName = `${thisFileName}initializeCallbacks(): `;
|
|
23389
|
-
let i;
|
|
23390
|
-
ruleCallbacks = [];
|
|
23391
|
-
udtCallbacks = [];
|
|
23392
|
-
for (i = 0; i < rules.length; i += 1) {
|
|
23393
|
-
ruleCallbacks[i] = undefined;
|
|
23394
|
-
}
|
|
23395
|
-
for (i = 0; i < udts.length; i += 1) {
|
|
23396
|
-
udtCallbacks[i] = undefined;
|
|
23397
|
-
}
|
|
23398
|
-
let func;
|
|
23399
|
-
const list = [];
|
|
23400
|
-
for (i = 0; i < rules.length; i += 1) {
|
|
23401
|
-
list.push(rules[i].lower);
|
|
23402
|
-
}
|
|
23403
|
-
for (i = 0; i < udts.length; i += 1) {
|
|
23404
|
-
list.push(udts[i].lower);
|
|
23405
|
-
}
|
|
23406
|
-
for (const index in p.callbacks) {
|
|
23407
|
-
if (p.callbacks.hasOwnProperty(index)) {
|
|
23408
|
-
i = list.indexOf(index.toLowerCase());
|
|
23409
|
-
if (i < 0) {
|
|
23410
|
-
throw new Error(`${functionName}syntax callback '${index}' not a rule or udt name`);
|
|
23411
|
-
}
|
|
23412
|
-
func = p.callbacks[index] ? p.callbacks[index] : undefined;
|
|
23413
|
-
if (typeof func === 'function' || func === undefined) {
|
|
23414
|
-
if (i < rules.length) {
|
|
23415
|
-
ruleCallbacks[i] = func;
|
|
23416
|
-
} else {
|
|
23417
|
-
udtCallbacks[i - rules.length] = func;
|
|
23418
|
-
}
|
|
23419
|
-
} else {
|
|
23420
|
-
throw new Error(`${functionName}syntax callback[${index}] must be function reference or falsy)`);
|
|
23421
|
-
}
|
|
23422
|
-
}
|
|
23423
|
-
}
|
|
23424
|
-
};
|
|
23425
|
-
|
|
23426
|
-
p.parse = (grammar, startName, inputString, callbackData) => {
|
|
23427
|
-
const functionName = `${thisFileName}parse(): `;
|
|
23428
|
-
clear();
|
|
23429
|
-
chars = utils.stringToChars(inputString);
|
|
23430
|
-
rules = grammar.rules;
|
|
23431
|
-
udts = grammar.udts;
|
|
23432
|
-
const lower = startName.toLowerCase();
|
|
23433
|
-
let startIndex = undefined;
|
|
23434
|
-
for (const i in rules) {
|
|
23435
|
-
if (rules.hasOwnProperty(i)) {
|
|
23436
|
-
if (lower === rules[i].lower) {
|
|
23437
|
-
startIndex = rules[i].index;
|
|
23438
|
-
break;
|
|
23439
|
-
}
|
|
23440
|
-
}
|
|
23441
|
-
}
|
|
23442
|
-
if (startIndex === undefined) {
|
|
23443
|
-
throw new Error(`${functionName}start rule name '${startRule}' not recognized`);
|
|
23444
|
-
}
|
|
23445
|
-
initializeCallbacks();
|
|
23446
|
-
if (p.trace) {
|
|
23447
|
-
p.trace.init(rules, udts, chars);
|
|
23448
|
-
}
|
|
23449
|
-
if (p.stats) {
|
|
23450
|
-
p.stats.init(rules, udts);
|
|
23451
|
-
}
|
|
23452
|
-
if (p.ast) {
|
|
23453
|
-
p.ast.init(rules, udts, chars);
|
|
23454
|
-
}
|
|
23455
|
-
userData = callbackData;
|
|
23456
|
-
/* create a dummy opcode for the start rule */
|
|
23457
|
-
opcodes = [
|
|
23458
|
-
{
|
|
23459
|
-
type: id.RNM,
|
|
23460
|
-
index: startIndex,
|
|
23461
|
-
},
|
|
23462
|
-
];
|
|
23463
|
-
/* execute the start rule */
|
|
23464
|
-
opExecute(0, 0);
|
|
23465
|
-
opcodes = undefined;
|
|
23466
|
-
/* test and return the sysData */
|
|
23467
|
-
let success = false;
|
|
23468
|
-
switch (sysData.state) {
|
|
23469
|
-
case id.ACTIVE:
|
|
23470
|
-
throw new Error(`${functionName}final state should never be 'ACTIVE'`);
|
|
23471
|
-
case id.NOMATCH:
|
|
23472
|
-
success = false;
|
|
23473
|
-
break;
|
|
23474
|
-
case id.EMPTY:
|
|
23475
|
-
case id.MATCH:
|
|
23476
|
-
if (sysData.phraseLength === chars.length) {
|
|
23477
|
-
success = true;
|
|
23478
|
-
} else {
|
|
23479
|
-
success = false;
|
|
23480
|
-
}
|
|
23481
|
-
break;
|
|
23482
|
-
default:
|
|
23483
|
-
throw new Error('unrecognized state');
|
|
23484
|
-
}
|
|
23485
|
-
return {
|
|
23486
|
-
success,
|
|
23487
|
-
state: sysData.state,
|
|
23488
|
-
stateName: id.idName(sysData.state),
|
|
23489
|
-
length: chars.length,
|
|
23490
|
-
matched: sysData.phraseLength,
|
|
23491
|
-
maxMatched,
|
|
23492
|
-
maxTreeDepth,
|
|
23493
|
-
nodeHits,
|
|
23494
|
-
};
|
|
23495
|
-
};
|
|
23496
|
-
// The `ALT` operator.<br>
|
|
23497
|
-
// Executes its child nodes, from left to right, until it finds a match.
|
|
23498
|
-
// Fails if *all* of its child nodes fail.
|
|
23499
|
-
const opALT = (opIndex, phraseIndex) => {
|
|
23500
|
-
const op = opcodes[opIndex];
|
|
23501
|
-
for (let i = 0; i < op.children.length; i += 1) {
|
|
23502
|
-
opExecute(op.children[i], phraseIndex);
|
|
23503
|
-
if (sysData.state !== id.NOMATCH) {
|
|
23504
|
-
break;
|
|
23505
|
-
}
|
|
23506
|
-
}
|
|
23507
|
-
};
|
|
23508
|
-
// The `CAT` operator.<br>
|
|
23509
|
-
// Executes all of its child nodes, from left to right,
|
|
23510
|
-
// concatenating the matched phrases.
|
|
23511
|
-
// Fails if *any* child nodes fail.
|
|
23512
|
-
const opCAT = (opIndex, phraseIndex) => {
|
|
23513
|
-
let success;
|
|
23514
|
-
let astLength;
|
|
23515
|
-
let catCharIndex;
|
|
23516
|
-
let catPhrase;
|
|
23517
|
-
const op = opcodes[opIndex];
|
|
23518
|
-
if (p.ast) {
|
|
23519
|
-
astLength = p.ast.getLength();
|
|
23520
|
-
}
|
|
23521
|
-
success = true;
|
|
23522
|
-
catCharIndex = phraseIndex;
|
|
23523
|
-
catPhrase = 0;
|
|
23524
|
-
for (let i = 0; i < op.children.length; i += 1) {
|
|
23525
|
-
opExecute(op.children[i], catCharIndex);
|
|
23526
|
-
if (sysData.state === id.NOMATCH) {
|
|
23527
|
-
success = false;
|
|
23528
|
-
break;
|
|
23529
|
-
} else {
|
|
23530
|
-
catCharIndex += sysData.phraseLength;
|
|
23531
|
-
catPhrase += sysData.phraseLength;
|
|
23532
|
-
}
|
|
23533
|
-
}
|
|
23534
|
-
if (success) {
|
|
23535
|
-
sysData.state = catPhrase === 0 ? id.EMPTY : id.MATCH;
|
|
23536
|
-
sysData.phraseLength = catPhrase;
|
|
23537
|
-
} else {
|
|
23538
|
-
sysData.state = id.NOMATCH;
|
|
23539
|
-
sysData.phraseLength = 0;
|
|
23540
|
-
if (p.ast) {
|
|
23541
|
-
p.ast.setLength(astLength);
|
|
23542
|
-
}
|
|
23543
|
-
}
|
|
23544
|
-
};
|
|
23545
|
-
// The `REP` operator.<br>
|
|
23546
|
-
// Repeatedly executes its single child node,
|
|
23547
|
-
// concatenating each of the matched phrases found.
|
|
23548
|
-
// The number of repetitions executed and its final sysData depends
|
|
23549
|
-
// on its `min` & `max` repetition values.
|
|
23550
|
-
const opREP = (opIndex, phraseIndex) => {
|
|
23551
|
-
let astLength;
|
|
23552
|
-
let repCharIndex;
|
|
23553
|
-
let repPhrase;
|
|
23554
|
-
let repCount;
|
|
23555
|
-
const op = opcodes[opIndex];
|
|
23556
|
-
if (op.max === 0) {
|
|
23557
|
-
// this is an empty-string acceptor
|
|
23558
|
-
// deprecated: use the TLS empty string operator, "", instead
|
|
23559
|
-
sysData.state = id.EMPTY;
|
|
23560
|
-
sysData.phraseLength = 0;
|
|
23561
|
-
return;
|
|
23562
|
-
}
|
|
23563
|
-
repCharIndex = phraseIndex;
|
|
23564
|
-
repPhrase = 0;
|
|
23565
|
-
repCount = 0;
|
|
23566
|
-
if (p.ast) {
|
|
23567
|
-
astLength = p.ast.getLength();
|
|
23568
|
-
}
|
|
23569
|
-
while (1) {
|
|
23570
|
-
if (repCharIndex >= chars.length) {
|
|
23571
|
-
/* exit on end of input string */
|
|
23572
|
-
break;
|
|
23573
|
-
}
|
|
23574
|
-
opExecute(opIndex + 1, repCharIndex);
|
|
23575
|
-
if (sysData.state === id.NOMATCH) {
|
|
23576
|
-
/* always end if the child node fails */
|
|
23577
|
-
break;
|
|
23578
|
-
}
|
|
23579
|
-
if (sysData.state === id.EMPTY) {
|
|
23580
|
-
/* REP always succeeds when the child node returns an empty phrase */
|
|
23581
|
-
/* this may not seem obvious, but that's the way it works out */
|
|
23582
|
-
break;
|
|
23583
|
-
}
|
|
23584
|
-
repCount += 1;
|
|
23585
|
-
repPhrase += sysData.phraseLength;
|
|
23586
|
-
repCharIndex += sysData.phraseLength;
|
|
23587
|
-
if (repCount === op.max) {
|
|
23588
|
-
/* end on maxed out reps */
|
|
23589
|
-
break;
|
|
23590
|
-
}
|
|
23591
|
-
}
|
|
23592
|
-
/* evaluate the match count according to the min, max values */
|
|
23593
|
-
if (sysData.state === id.EMPTY) {
|
|
23594
|
-
sysData.state = repPhrase === 0 ? id.EMPTY : id.MATCH;
|
|
23595
|
-
sysData.phraseLength = repPhrase;
|
|
23596
|
-
} else if (repCount >= op.min) {
|
|
23597
|
-
sysData.state = repPhrase === 0 ? id.EMPTY : id.MATCH;
|
|
23598
|
-
sysData.phraseLength = repPhrase;
|
|
23599
|
-
} else {
|
|
23600
|
-
sysData.state = id.NOMATCH;
|
|
23601
|
-
sysData.phraseLength = 0;
|
|
23602
|
-
if (p.ast) {
|
|
23603
|
-
p.ast.setLength(astLength);
|
|
23604
|
-
}
|
|
23605
|
-
}
|
|
23606
|
-
};
|
|
23607
|
-
// Validate the callback function's returned sysData values.
|
|
23608
|
-
// It's the user's responsibility to get them right
|
|
23609
|
-
// but `RNM` fails if not.
|
|
23610
|
-
const validateRnmCallbackResult = (rule, sysData, charsLeft, down) => {
|
|
23611
|
-
if (sysData.phraseLength > charsLeft) {
|
|
23612
|
-
let str = `${thisFileName}opRNM(${rule.name}): callback function error: `;
|
|
23613
|
-
str += `sysData.phraseLength: ${sysData.phraseLength}`;
|
|
23614
|
-
str += ` must be <= remaining chars: ${charsLeft}`;
|
|
23615
|
-
throw new Error(str);
|
|
23616
|
-
}
|
|
23617
|
-
switch (sysData.state) {
|
|
23618
|
-
case id.ACTIVE:
|
|
23619
|
-
if (!down) {
|
|
23620
|
-
throw new Error(
|
|
23621
|
-
`${thisFileName}opRNM(${rule.name}): callback function return error. ACTIVE state not allowed.`
|
|
23622
|
-
);
|
|
23623
|
-
}
|
|
23624
|
-
break;
|
|
23625
|
-
case id.EMPTY:
|
|
23626
|
-
sysData.phraseLength = 0;
|
|
23627
|
-
break;
|
|
23628
|
-
case id.MATCH:
|
|
23629
|
-
if (sysData.phraseLength === 0) {
|
|
23630
|
-
sysData.state = id.EMPTY;
|
|
23631
|
-
}
|
|
23632
|
-
break;
|
|
23633
|
-
case id.NOMATCH:
|
|
23634
|
-
sysData.phraseLength = 0;
|
|
23635
|
-
break;
|
|
23636
|
-
default:
|
|
23637
|
-
throw new Error(
|
|
23638
|
-
`${thisFileName}opRNM(${rule.name}): callback function return error. Unrecognized return state: ${sysData.state}`
|
|
23639
|
-
);
|
|
23640
|
-
}
|
|
23641
|
-
};
|
|
23642
|
-
// The `RNM` operator.<br>
|
|
23643
|
-
// This operator will acts as a root node for a parse tree branch below and
|
|
23644
|
-
// returns the matched phrase to its parent.
|
|
23645
|
-
// However, its larger responsibility is handling user-defined callback functions and `AST` nodes.
|
|
23646
|
-
// Note that the `AST` is a separate object, but `RNM` calls its functions to create its nodes.
|
|
23647
|
-
const opRNM = (opIndex, phraseIndex) => {
|
|
23648
|
-
let astLength;
|
|
23649
|
-
let astDefined;
|
|
23650
|
-
let savedOpcodes;
|
|
23651
|
-
const op = opcodes[opIndex];
|
|
23652
|
-
const rule = rules[op.index];
|
|
23653
|
-
const callback = ruleCallbacks[rule.index];
|
|
23654
|
-
/* ignore AST in look ahead (AND or NOT operator above) */
|
|
23655
|
-
if (!lookAhead) {
|
|
23656
|
-
astDefined = p.ast && p.ast.ruleDefined(op.index);
|
|
23657
|
-
if (astDefined) {
|
|
23658
|
-
astLength = p.ast.getLength();
|
|
23659
|
-
p.ast.down(op.index, rules[op.index].name);
|
|
23660
|
-
}
|
|
23661
|
-
}
|
|
23662
|
-
if (callback) {
|
|
23663
|
-
/* call user's callback going down the parse tree*/
|
|
23664
|
-
const charsLeft = chars.length - phraseIndex;
|
|
23665
|
-
callback(sysData, chars, phraseIndex, userData);
|
|
23666
|
-
validateRnmCallbackResult(rule, sysData, charsLeft, true);
|
|
23667
|
-
if (sysData.state === id.ACTIVE) {
|
|
23668
|
-
savedOpcodes = opcodes;
|
|
23669
|
-
opcodes = rule.opcodes;
|
|
23670
|
-
opExecute(0, phraseIndex);
|
|
23671
|
-
opcodes = savedOpcodes;
|
|
23672
|
-
/* call user's callback going up the parse tree*/
|
|
23673
|
-
callback(sysData, chars, phraseIndex, userData);
|
|
23674
|
-
validateRnmCallbackResult(rule, sysData, charsLeft, false);
|
|
23675
|
-
} /* implied else clause: just accept the callback sysData - RNM acting as UDT */
|
|
23676
|
-
} else {
|
|
23677
|
-
/* no callback - just execute the rule */
|
|
23678
|
-
savedOpcodes = opcodes;
|
|
23679
|
-
opcodes = rule.opcodes;
|
|
23680
|
-
opExecute(0, phraseIndex, sysData);
|
|
23681
|
-
opcodes = savedOpcodes;
|
|
23682
|
-
}
|
|
23683
|
-
if (!lookAhead) {
|
|
23684
|
-
/* end AST */
|
|
23685
|
-
if (astDefined) {
|
|
23686
|
-
if (sysData.state === id.NOMATCH) {
|
|
23687
|
-
p.ast.setLength(astLength);
|
|
23688
|
-
} else {
|
|
23689
|
-
p.ast.up(op.index, rule.name, phraseIndex, sysData.phraseLength);
|
|
23690
|
-
}
|
|
23691
|
-
}
|
|
23692
|
-
}
|
|
23693
|
-
};
|
|
23694
|
-
// The `TRG` operator.<br>
|
|
23695
|
-
// Succeeds if the single first character of the phrase is
|
|
23696
|
-
// within the `min - max` range.
|
|
23697
|
-
const opTRG = (opIndex, phraseIndex) => {
|
|
23698
|
-
const op = opcodes[opIndex];
|
|
23699
|
-
sysData.state = id.NOMATCH;
|
|
23700
|
-
if (phraseIndex < chars.length) {
|
|
23701
|
-
if (op.min <= chars[phraseIndex] && chars[phraseIndex] <= op.max) {
|
|
23702
|
-
sysData.state = id.MATCH;
|
|
23703
|
-
sysData.phraseLength = 1;
|
|
23704
|
-
}
|
|
23705
|
-
}
|
|
23706
|
-
};
|
|
23707
|
-
// The `TBS` operator.<br>
|
|
23708
|
-
// Matches its pre-defined phrase against the input string.
|
|
23709
|
-
// All characters must match exactly.
|
|
23710
|
-
// Case-sensitive literal strings (`'string'` & `%s"string"`) are translated to `TBS`
|
|
23711
|
-
// operators by `apg`.
|
|
23712
|
-
// Phrase length of zero is not allowed.
|
|
23713
|
-
// Empty phrases can only be defined with `TLS` operators.
|
|
23714
|
-
const opTBS = (opIndex, phraseIndex) => {
|
|
23715
|
-
const op = opcodes[opIndex];
|
|
23716
|
-
const len = op.string.length;
|
|
23717
|
-
sysData.state = id.NOMATCH;
|
|
23718
|
-
if (phraseIndex + len <= chars.length) {
|
|
23719
|
-
for (let i = 0; i < len; i += 1) {
|
|
23720
|
-
if (chars[phraseIndex + i] !== op.string[i]) {
|
|
23721
|
-
return;
|
|
23722
|
-
}
|
|
23723
|
-
}
|
|
23724
|
-
sysData.state = id.MATCH;
|
|
23725
|
-
sysData.phraseLength = len;
|
|
23726
|
-
} /* implied else NOMATCH */
|
|
23727
|
-
};
|
|
23728
|
-
// The `TLS` operator.<br>
|
|
23729
|
-
// Matches its pre-defined phrase against the input string.
|
|
23730
|
-
// A case-insensitive match is attempted for ASCII alphbetical characters.
|
|
23731
|
-
// `TLS` is the only operator that explicitly allows empty phrases.
|
|
23732
|
-
// `apg` will fail for empty `TBS`, case-sensitive strings (`''`) or
|
|
23733
|
-
// zero repetitions (`0*0RuleName` or `0RuleName`).
|
|
23734
|
-
const opTLS = (opIndex, phraseIndex) => {
|
|
23735
|
-
let code;
|
|
23736
|
-
const op = opcodes[opIndex];
|
|
23737
|
-
sysData.state = id.NOMATCH;
|
|
23738
|
-
const len = op.string.length;
|
|
23739
|
-
if (len === 0) {
|
|
23740
|
-
/* EMPTY match allowed for TLS */
|
|
23741
|
-
sysData.state = id.EMPTY;
|
|
23742
|
-
return;
|
|
23743
|
-
}
|
|
23744
|
-
if (phraseIndex + len <= chars.length) {
|
|
23745
|
-
for (let i = 0; i < len; i += 1) {
|
|
23746
|
-
code = chars[phraseIndex + i];
|
|
23747
|
-
if (code >= 65 && code <= 90) {
|
|
23748
|
-
code += 32;
|
|
23749
|
-
}
|
|
23750
|
-
if (code !== op.string[i]) {
|
|
23751
|
-
return;
|
|
23752
|
-
}
|
|
23753
|
-
}
|
|
23754
|
-
sysData.state = id.MATCH;
|
|
23755
|
-
sysData.phraseLength = len;
|
|
23756
|
-
} /* implied else NOMATCH */
|
|
23757
|
-
};
|
|
23758
|
-
// Validate the callback function's returned sysData values.
|
|
23759
|
-
// It's the user's responsibility to get it right but `UDT` fails if not.
|
|
23760
|
-
const validateUdtCallbackResult = (udt, sysData, charsLeft) => {
|
|
23761
|
-
if (sysData.phraseLength > charsLeft) {
|
|
23762
|
-
let str = `${thisFileName}opUDT(${udt.name}): callback function error: `;
|
|
23763
|
-
str += `sysData.phraseLength: ${sysData.phraseLength}`;
|
|
23764
|
-
str += ` must be <= remaining chars: ${charsLeft}`;
|
|
23765
|
-
throw new Error(str);
|
|
23766
|
-
}
|
|
23767
|
-
switch (sysData.state) {
|
|
23768
|
-
case id.ACTIVE:
|
|
23769
|
-
throw new Error(`${thisFileName}opUDT(${udt.name}) ACTIVE state return not allowed.`);
|
|
23770
|
-
case id.EMPTY:
|
|
23771
|
-
if (udt.empty) {
|
|
23772
|
-
sysData.phraseLength = 0;
|
|
23773
|
-
} else {
|
|
23774
|
-
throw new Error(`${thisFileName}opUDT(${udt.name}) may not return EMPTY.`);
|
|
23775
|
-
}
|
|
23776
|
-
break;
|
|
23777
|
-
case id.MATCH:
|
|
23778
|
-
if (sysData.phraseLength === 0) {
|
|
23779
|
-
if (udt.empty) {
|
|
23780
|
-
sysData.state = id.EMPTY;
|
|
23781
|
-
} else {
|
|
23782
|
-
throw new Error(`${thisFileName}opUDT(${udt.name}) may not return EMPTY.`);
|
|
23783
|
-
}
|
|
23784
|
-
}
|
|
23785
|
-
break;
|
|
23786
|
-
case id.NOMATCH:
|
|
23787
|
-
sysData.phraseLength = 0;
|
|
23788
|
-
break;
|
|
23789
|
-
default:
|
|
23790
|
-
throw new Error(
|
|
23791
|
-
`${thisFileName}opUDT(${udt.name}): callback function return error. Unrecognized return state: ${sysData.state}`
|
|
23792
|
-
);
|
|
23793
|
-
}
|
|
23794
|
-
};
|
|
23795
|
-
// The `UDT` operator.<br>
|
|
23796
|
-
// Simply calls the user's callback function, but operates like `RNM` with regard to the `AST`
|
|
23797
|
-
// and back referencing.
|
|
23798
|
-
// There is some ambiguity here. `UDT`s act as terminals for phrase recognition but as named rules
|
|
23799
|
-
// for `AST` nodes and back referencing.
|
|
23800
|
-
// See [`ast.js`](./ast.html) for usage.
|
|
23801
|
-
const opUDT = (opIndex, phraseIndex) => {
|
|
23802
|
-
let astLength;
|
|
23803
|
-
let astIndex;
|
|
23804
|
-
let astDefined;
|
|
23805
|
-
const op = opcodes[opIndex];
|
|
23806
|
-
const udt = udts[op.index];
|
|
23807
|
-
sysData.UdtIndex = udt.index;
|
|
23808
|
-
/* ignore AST in look ahead */
|
|
23809
|
-
if (!lookAhead) {
|
|
23810
|
-
astDefined = p.ast && p.ast.udtDefined(op.index);
|
|
23811
|
-
if (astDefined) {
|
|
23812
|
-
astIndex = rules.length + op.index;
|
|
23813
|
-
astLength = p.ast.getLength();
|
|
23814
|
-
p.ast.down(astIndex, udt.name);
|
|
23815
|
-
}
|
|
23816
|
-
}
|
|
23817
|
-
/* call the UDT */
|
|
23818
|
-
const charsLeft = chars.length - phraseIndex;
|
|
23819
|
-
udtCallbacks[op.index](sysData, chars, phraseIndex, userData);
|
|
23820
|
-
validateUdtCallbackResult(udt, sysData, charsLeft);
|
|
23821
|
-
if (!lookAhead) {
|
|
23822
|
-
/* end AST */
|
|
23823
|
-
if (astDefined) {
|
|
23824
|
-
if (sysData.state === id.NOMATCH) {
|
|
23825
|
-
p.ast.setLength(astLength);
|
|
23826
|
-
} else {
|
|
23827
|
-
p.ast.up(astIndex, udt.name, phraseIndex, sysData.phraseLength);
|
|
23828
|
-
}
|
|
23829
|
-
}
|
|
23830
|
-
}
|
|
23831
|
-
};
|
|
23832
|
-
// The `AND` operator.<br>
|
|
23833
|
-
// This is the positive `look ahead` operator.
|
|
23834
|
-
// Executes its single child node, returning the EMPTY state
|
|
23835
|
-
// if it succeedsand NOMATCH if it fails.
|
|
23836
|
-
// *Always* backtracks on any matched phrase and returns EMPTY on success.
|
|
23837
|
-
const opAND = (opIndex, phraseIndex) => {
|
|
23838
|
-
lookAhead += 1;
|
|
23839
|
-
opExecute(opIndex + 1, phraseIndex);
|
|
23840
|
-
lookAhead -= 1;
|
|
23841
|
-
sysData.phraseLength = 0;
|
|
23842
|
-
switch (sysData.state) {
|
|
23843
|
-
case id.EMPTY:
|
|
23844
|
-
sysData.state = id.EMPTY;
|
|
23845
|
-
break;
|
|
23846
|
-
case id.MATCH:
|
|
23847
|
-
sysData.state = id.EMPTY;
|
|
23848
|
-
break;
|
|
23849
|
-
case id.NOMATCH:
|
|
23850
|
-
sysData.state = id.NOMATCH;
|
|
23851
|
-
break;
|
|
23852
|
-
default:
|
|
23853
|
-
throw new Error(`opAND: invalid state ${sysData.state}`);
|
|
23854
|
-
}
|
|
23855
|
-
};
|
|
23856
|
-
// The `NOT` operator.<br>
|
|
23857
|
-
// This is the negative `look ahead` operator.
|
|
23858
|
-
// Executes its single child node, returning the EMPTY state
|
|
23859
|
-
// if it *fails* and NOMATCH if it succeeds.
|
|
23860
|
-
// *Always* backtracks on any matched phrase and returns EMPTY
|
|
23861
|
-
// on success (failure of its child node).
|
|
23862
|
-
const opNOT = (opIndex, phraseIndex) => {
|
|
23863
|
-
lookAhead += 1;
|
|
23864
|
-
opExecute(opIndex + 1, phraseIndex);
|
|
23865
|
-
lookAhead -= 1;
|
|
23866
|
-
sysData.phraseLength = 0;
|
|
23867
|
-
switch (sysData.state) {
|
|
23868
|
-
case id.EMPTY:
|
|
23869
|
-
case id.MATCH:
|
|
23870
|
-
sysData.state = id.NOMATCH;
|
|
23871
|
-
break;
|
|
23872
|
-
case id.NOMATCH:
|
|
23873
|
-
sysData.state = id.EMPTY;
|
|
23874
|
-
break;
|
|
23875
|
-
default:
|
|
23876
|
-
throw new Error(`opNOT: invalid state ${sysData.state}`);
|
|
23877
|
-
}
|
|
23878
|
-
};
|
|
23879
|
-
|
|
23880
|
-
const opExecute = (opIndex, phraseIndex) => {
|
|
23881
|
-
const functionName = `${thisFileName}opExecute(): `;
|
|
23882
|
-
const op = opcodes[opIndex];
|
|
23883
|
-
nodeHits += 1;
|
|
23884
|
-
if (treeDepth > maxTreeDepth) {
|
|
23885
|
-
maxTreeDepth = treeDepth;
|
|
23886
|
-
}
|
|
23887
|
-
treeDepth += 1;
|
|
23888
|
-
sysData.refresh();
|
|
23889
|
-
if (p.trace) {
|
|
23890
|
-
p.trace.down(op, phraseIndex);
|
|
23891
|
-
}
|
|
23892
|
-
switch (op.type) {
|
|
23893
|
-
case id.ALT:
|
|
23894
|
-
opALT(opIndex, phraseIndex);
|
|
23895
|
-
break;
|
|
23896
|
-
case id.CAT:
|
|
23897
|
-
opCAT(opIndex, phraseIndex);
|
|
23898
|
-
break;
|
|
23899
|
-
case id.REP:
|
|
23900
|
-
opREP(opIndex, phraseIndex);
|
|
23901
|
-
break;
|
|
23902
|
-
case id.RNM:
|
|
23903
|
-
opRNM(opIndex, phraseIndex);
|
|
23904
|
-
break;
|
|
23905
|
-
case id.TRG:
|
|
23906
|
-
opTRG(opIndex, phraseIndex);
|
|
23907
|
-
break;
|
|
23908
|
-
case id.TBS:
|
|
23909
|
-
opTBS(opIndex, phraseIndex);
|
|
23910
|
-
break;
|
|
23911
|
-
case id.TLS:
|
|
23912
|
-
opTLS(opIndex, phraseIndex);
|
|
23913
|
-
break;
|
|
23914
|
-
case id.UDT:
|
|
23915
|
-
opUDT(opIndex, phraseIndex);
|
|
23916
|
-
break;
|
|
23917
|
-
case id.AND:
|
|
23918
|
-
opAND(opIndex, phraseIndex);
|
|
23919
|
-
break;
|
|
23920
|
-
case id.NOT:
|
|
23921
|
-
opNOT(opIndex, phraseIndex);
|
|
23922
|
-
break;
|
|
23923
|
-
default:
|
|
23924
|
-
throw new Error(`${functionName}unrecognized operator`);
|
|
23925
|
-
}
|
|
23926
|
-
if (!lookAhead) {
|
|
23927
|
-
if (phraseIndex + sysData.phraseLength > maxMatched) {
|
|
23928
|
-
maxMatched = phraseIndex + sysData.phraseLength;
|
|
23929
|
-
}
|
|
23930
|
-
}
|
|
23931
|
-
if (p.stats) {
|
|
23932
|
-
p.stats.collect(op, sysData);
|
|
23933
|
-
}
|
|
23934
|
-
if (p.trace) {
|
|
23935
|
-
p.trace.up(op, sysData.state, phraseIndex, sysData.phraseLength);
|
|
23936
|
-
}
|
|
23937
|
-
treeDepth -= 1;
|
|
23938
|
-
};
|
|
23939
|
-
};
|
|
23940
|
-
|
|
23941
|
-
const Ast = function fnast() {
|
|
23942
|
-
const thisFileName = 'parser.js: Ast()): ';
|
|
23943
|
-
const id = identifiers;
|
|
23944
|
-
const utils = utilities;
|
|
23945
|
-
const a = this;
|
|
23946
|
-
let rules = undefined;
|
|
23947
|
-
let udts = undefined;
|
|
23948
|
-
let chars = undefined;
|
|
23949
|
-
let nodeCount = 0;
|
|
23950
|
-
const nodeCallbacks = [];
|
|
23951
|
-
const stack = [];
|
|
23952
|
-
const records = [];
|
|
23953
|
-
a.callbacks = [];
|
|
23954
|
-
/* called by the parser to initialize the AST with the rules, UDTs and the input characters */
|
|
23955
|
-
a.init = (rulesIn, udtsIn, charsIn) => {
|
|
23956
|
-
stack.length = 0;
|
|
23957
|
-
records.length = 0;
|
|
23958
|
-
nodeCount = 0;
|
|
23959
|
-
rules = rulesIn;
|
|
23960
|
-
udts = udtsIn;
|
|
23961
|
-
chars = charsIn;
|
|
23962
|
-
let i;
|
|
23963
|
-
const list = [];
|
|
23964
|
-
for (i = 0; i < rules.length; i += 1) {
|
|
23965
|
-
list.push(rules[i].lower);
|
|
23966
|
-
}
|
|
23967
|
-
for (i = 0; i < udts.length; i += 1) {
|
|
23968
|
-
list.push(udts[i].lower);
|
|
23969
|
-
}
|
|
23970
|
-
nodeCount = rules.length + udts.length;
|
|
23971
|
-
for (i = 0; i < nodeCount; i += 1) {
|
|
23972
|
-
nodeCallbacks[i] = undefined;
|
|
23973
|
-
}
|
|
23974
|
-
for (const index in a.callbacks) {
|
|
23975
|
-
if (a.callbacks.hasOwnProperty(index)) {
|
|
23976
|
-
const lower = index.toLowerCase();
|
|
23977
|
-
i = list.indexOf(lower);
|
|
23978
|
-
if (i < 0) {
|
|
23979
|
-
throw new Error(`${thisFileName}init: node '${index}' not a rule or udt name`);
|
|
23980
|
-
}
|
|
23981
|
-
nodeCallbacks[i] = a.callbacks[index];
|
|
23982
|
-
}
|
|
23983
|
-
}
|
|
23984
|
-
};
|
|
23985
|
-
/* AST node rule callbacks - called by the parser's `RNM` operator */
|
|
23986
|
-
a.ruleDefined = (index) => !!nodeCallbacks[index];
|
|
23987
|
-
/* AST node UDT callbacks - called by the parser's `UDT` operator */
|
|
23988
|
-
a.udtDefined = (index) => !!nodeCallbacks[rules.length + index];
|
|
23989
|
-
/* called by the parser's `RNM` & `UDT` operators
|
|
23990
|
-
builds a record for the downward traversal of the node */
|
|
23991
|
-
a.down = (callbackIndex, name) => {
|
|
23992
|
-
const thisIndex = records.length;
|
|
23993
|
-
stack.push(thisIndex);
|
|
23994
|
-
records.push({
|
|
23995
|
-
name,
|
|
23996
|
-
thisIndex,
|
|
23997
|
-
thatIndex: undefined,
|
|
23998
|
-
state: id.SEM_PRE,
|
|
23999
|
-
callbackIndex,
|
|
24000
|
-
phraseIndex: undefined,
|
|
24001
|
-
phraseLength: undefined,
|
|
24002
|
-
stack: stack.length,
|
|
24003
|
-
});
|
|
24004
|
-
return thisIndex;
|
|
24005
|
-
};
|
|
24006
|
-
/* called by the parser's `RNM` & `UDT` operators */
|
|
24007
|
-
/* builds a record for the upward traversal of the node */
|
|
24008
|
-
a.up = (callbackIndex, name, phraseIndex, phraseLength) => {
|
|
24009
|
-
const thisIndex = records.length;
|
|
24010
|
-
const thatIndex = stack.pop();
|
|
24011
|
-
records.push({
|
|
24012
|
-
name,
|
|
24013
|
-
thisIndex,
|
|
24014
|
-
thatIndex,
|
|
24015
|
-
state: id.SEM_POST,
|
|
24016
|
-
callbackIndex,
|
|
24017
|
-
phraseIndex,
|
|
24018
|
-
phraseLength,
|
|
24019
|
-
stack: stack.length,
|
|
24020
|
-
});
|
|
24021
|
-
records[thatIndex].thatIndex = thisIndex;
|
|
24022
|
-
records[thatIndex].phraseIndex = phraseIndex;
|
|
24023
|
-
records[thatIndex].phraseLength = phraseLength;
|
|
24024
|
-
return thisIndex;
|
|
24025
|
-
};
|
|
24026
|
-
// Called by the user to translate the AST.
|
|
24027
|
-
// Translate means to associate or apply some semantic action to the
|
|
24028
|
-
// phrases that were syntactically matched to the AST nodes according
|
|
24029
|
-
// to the defining grammar.
|
|
24030
|
-
// ```
|
|
24031
|
-
// data - optional user-defined data
|
|
24032
|
-
// passed to the callback functions by the translator
|
|
24033
|
-
// ```
|
|
24034
|
-
a.translate = (data) => {
|
|
24035
|
-
let ret;
|
|
24036
|
-
let callback;
|
|
24037
|
-
let record;
|
|
24038
|
-
for (let i = 0; i < records.length; i += 1) {
|
|
24039
|
-
record = records[i];
|
|
24040
|
-
callback = nodeCallbacks[record.callbackIndex];
|
|
24041
|
-
if (callback) {
|
|
24042
|
-
if (record.state === id.SEM_PRE) {
|
|
24043
|
-
callback(id.SEM_PRE, chars, record.phraseIndex, record.phraseLength, data);
|
|
24044
|
-
} else if (callback) {
|
|
24045
|
-
callback(id.SEM_POST, chars, record.phraseIndex, record.phraseLength, data);
|
|
24046
|
-
}
|
|
24047
|
-
}
|
|
24048
|
-
}
|
|
24049
|
-
};
|
|
24050
|
-
/* called by the parser to reset the length of the records array */
|
|
24051
|
-
/* necessary on backtracking */
|
|
24052
|
-
a.setLength = (length) => {
|
|
24053
|
-
records.length = length;
|
|
24054
|
-
if (length > 0) {
|
|
24055
|
-
stack.length = records[length - 1].stack;
|
|
24056
|
-
} else {
|
|
24057
|
-
stack.length = 0;
|
|
24058
|
-
}
|
|
24059
|
-
};
|
|
24060
|
-
/* called by the parser to get the length of the records array */
|
|
24061
|
-
a.getLength = () => records.length;
|
|
24062
|
-
/* helper for XML display */
|
|
24063
|
-
function indent(n) {
|
|
24064
|
-
let ret = '';
|
|
24065
|
-
while (n-- > 0) {
|
|
24066
|
-
ret += ' ';
|
|
24067
|
-
}
|
|
24068
|
-
return ret;
|
|
24069
|
-
}
|
|
24070
|
-
// Generate an `XML` version of the AST.
|
|
24071
|
-
// Useful if you want to use a special or favorite XML parser to translate the
|
|
24072
|
-
// AST. Node data are JavaScript strings.
|
|
24073
|
-
a.toXml = () => {
|
|
24074
|
-
let xml = '';
|
|
24075
|
-
let depth = 0;
|
|
24076
|
-
xml += '<?xml version="1.0" encoding="utf-8"?>\n';
|
|
24077
|
-
xml += `<root nodes="${records.length / 2}" characters="${chars.length}">\n`;
|
|
24078
|
-
xml += `<!-- input string -->\n`;
|
|
24079
|
-
xml += indent(depth + 2);
|
|
24080
|
-
xml += utils.charsToString(chars);
|
|
24081
|
-
xml += '\n';
|
|
24082
|
-
records.forEach((rec) => {
|
|
24083
|
-
if (rec.state === id.SEM_PRE) {
|
|
24084
|
-
depth += 1;
|
|
24085
|
-
xml += indent(depth);
|
|
24086
|
-
xml += `<node name="${rec.name}" index="${rec.phraseIndex}" length="${rec.phraseLength}">\n`;
|
|
24087
|
-
xml += indent(depth + 2);
|
|
24088
|
-
xml += utils.charsToString(chars, rec.phraseIndex, rec.phraseLength);
|
|
24089
|
-
xml += '\n';
|
|
24090
|
-
} else {
|
|
24091
|
-
xml += indent(depth);
|
|
24092
|
-
xml += `</node><!-- name="${rec.name}" -->\n`;
|
|
24093
|
-
depth -= 1;
|
|
24094
|
-
}
|
|
24095
|
-
});
|
|
24096
|
-
|
|
24097
|
-
xml += '</root>\n';
|
|
24098
|
-
return xml;
|
|
24099
|
-
};
|
|
24100
|
-
};
|
|
24101
|
-
|
|
24102
|
-
const Trace = function fntrace() {
|
|
24103
|
-
const id = identifiers;
|
|
24104
|
-
const utils = utilities;
|
|
24105
|
-
const thisFile = 'parser.js: Trace(): ';
|
|
24106
|
-
let chars = undefined;
|
|
24107
|
-
let rules = undefined;
|
|
24108
|
-
let udts = undefined;
|
|
24109
|
-
let out = '';
|
|
24110
|
-
let treeDepth = 0;
|
|
24111
|
-
const MAX_PHRASE = 100;
|
|
24112
|
-
const t = this;
|
|
24113
|
-
const indent = (n) => {
|
|
24114
|
-
let ret = '';
|
|
24115
|
-
let count = 0;
|
|
24116
|
-
if (n >= 0) {
|
|
24117
|
-
while (n--) {
|
|
24118
|
-
count += 1;
|
|
24119
|
-
if (count === 5) {
|
|
24120
|
-
ret += '|';
|
|
24121
|
-
count = 0;
|
|
24122
|
-
} else {
|
|
24123
|
-
ret += '.';
|
|
24124
|
-
}
|
|
24125
|
-
}
|
|
24126
|
-
}
|
|
24127
|
-
return ret;
|
|
24128
|
-
};
|
|
24129
|
-
t.init = (r, u, c) => {
|
|
24130
|
-
rules = r;
|
|
24131
|
-
udts = u;
|
|
24132
|
-
chars = c;
|
|
24133
|
-
};
|
|
24134
|
-
const opName = (op) => {
|
|
24135
|
-
let name;
|
|
24136
|
-
switch (op.type) {
|
|
24137
|
-
case id.ALT:
|
|
24138
|
-
name = 'ALT';
|
|
24139
|
-
break;
|
|
24140
|
-
case id.CAT:
|
|
24141
|
-
name = 'CAT';
|
|
24142
|
-
break;
|
|
24143
|
-
case id.REP:
|
|
24144
|
-
if (op.max === Infinity) {
|
|
24145
|
-
name = `REP(${op.min},inf)`;
|
|
24146
|
-
} else {
|
|
24147
|
-
name = `REP(${op.min},${op.max})`;
|
|
24148
|
-
}
|
|
24149
|
-
break;
|
|
24150
|
-
case id.RNM:
|
|
24151
|
-
name = `RNM(${rules[op.index].name})`;
|
|
24152
|
-
break;
|
|
24153
|
-
case id.TRG:
|
|
24154
|
-
name = `TRG(${op.min},${op.max})`;
|
|
24155
|
-
break;
|
|
24156
|
-
case id.TBS:
|
|
24157
|
-
if (op.string.length > 6) {
|
|
24158
|
-
name = `TBS(${utils.charsToString(op.string, 0, 3)}...)`;
|
|
24159
|
-
} else {
|
|
24160
|
-
name = `TBS(${utils.charsToString(op.string, 0, 6)})`;
|
|
24161
|
-
}
|
|
24162
|
-
break;
|
|
24163
|
-
case id.TLS:
|
|
24164
|
-
if (op.string.length > 6) {
|
|
24165
|
-
name = `TLS(${utils.charsToString(op.string, 0, 3)}...)`;
|
|
24166
|
-
} else {
|
|
24167
|
-
name = `TLS(${utils.charsToString(op.string, 0, 6)})`;
|
|
24168
|
-
}
|
|
24169
|
-
break;
|
|
24170
|
-
case id.UDT:
|
|
24171
|
-
name = `UDT(${udts[op.index].name})`;
|
|
24172
|
-
break;
|
|
24173
|
-
case id.AND:
|
|
24174
|
-
name = 'AND';
|
|
24175
|
-
break;
|
|
24176
|
-
case id.NOT:
|
|
24177
|
-
name = 'NOT';
|
|
24178
|
-
break;
|
|
24179
|
-
default:
|
|
24180
|
-
throw new Error(`${thisFile}Trace: opName: unrecognized opcode`);
|
|
24181
|
-
}
|
|
24182
|
-
return name;
|
|
24183
|
-
};
|
|
24184
|
-
t.down = (op, offset) => {
|
|
24185
|
-
const lead = indent(treeDepth);
|
|
24186
|
-
const len = Math.min(MAX_PHRASE, chars.length - offset);
|
|
24187
|
-
let phrase = utils.charsToString(chars, offset, len);
|
|
24188
|
-
if (len < chars.length - offset) {
|
|
24189
|
-
phrase += '...';
|
|
24190
|
-
}
|
|
24191
|
-
phrase = `${lead}|-|[${opName(op)}]${phrase}\n`;
|
|
24192
|
-
out += phrase;
|
|
24193
|
-
treeDepth += 1;
|
|
24194
|
-
};
|
|
24195
|
-
t.up = (op, state, offset, phraseLength) => {
|
|
24196
|
-
const thisFunc = `${thisFile}trace.up: `;
|
|
24197
|
-
treeDepth -= 1;
|
|
24198
|
-
const lead = indent(treeDepth);
|
|
24199
|
-
let len;
|
|
24200
|
-
let phrase;
|
|
24201
|
-
let st;
|
|
24202
|
-
switch (state) {
|
|
24203
|
-
case id.EMPTY:
|
|
24204
|
-
st = '|E|';
|
|
24205
|
-
phrase = `''`;
|
|
24206
|
-
break;
|
|
24207
|
-
case id.MATCH:
|
|
24208
|
-
st = '|M|';
|
|
24209
|
-
len = Math.min(MAX_PHRASE, phraseLength);
|
|
24210
|
-
if (len < phraseLength) {
|
|
24211
|
-
phrase = `'${utils.charsToString(chars, offset, len)}...'`;
|
|
24212
|
-
} else {
|
|
24213
|
-
phrase = `'${utils.charsToString(chars, offset, len)}'`;
|
|
24214
|
-
}
|
|
24215
|
-
break;
|
|
24216
|
-
case id.NOMATCH:
|
|
24217
|
-
st = '|N|';
|
|
24218
|
-
phrase = '';
|
|
24219
|
-
break;
|
|
24220
|
-
default:
|
|
24221
|
-
throw new Error(`${thisFunc} unrecognized state`);
|
|
24222
|
-
}
|
|
24223
|
-
phrase = `${lead}${st}[${opName(op)}]${phrase}\n`;
|
|
24224
|
-
out += phrase;
|
|
24225
|
-
};
|
|
24226
|
-
t.displayTrace = () => out;
|
|
24227
|
-
};
|
|
24228
|
-
|
|
24229
|
-
const Stats = function fnstats() {
|
|
24230
|
-
const id = identifiers;
|
|
24231
|
-
const thisFileName = 'parser.js: Stats(): ';
|
|
24232
|
-
let rules;
|
|
24233
|
-
let udts;
|
|
24234
|
-
let totals;
|
|
24235
|
-
const stats = [];
|
|
24236
|
-
const ruleStats = [];
|
|
24237
|
-
const udtStats = [];
|
|
24238
|
-
/* called by parser to initialize the stats */
|
|
24239
|
-
this.init = (r, u) => {
|
|
24240
|
-
rules = r;
|
|
24241
|
-
udts = u;
|
|
24242
|
-
clear();
|
|
24243
|
-
};
|
|
24244
|
-
/* This function is the main interaction with the parser. */
|
|
24245
|
-
/* The parser calls it after each node has been traversed. */
|
|
24246
|
-
this.collect = (op, sys) => {
|
|
24247
|
-
incStat(totals, sys.state, sys.phraseLength);
|
|
24248
|
-
incStat(stats[op.type], sys.state, sys.phraseLength);
|
|
24249
|
-
if (op.type === id.RNM) {
|
|
24250
|
-
incStat(ruleStats[op.index], sys.state, sys.phraseLength);
|
|
24251
|
-
}
|
|
24252
|
-
if (op.type === id.UDT) {
|
|
24253
|
-
incStat(udtStats[op.index], sys.state, sys.phraseLength);
|
|
24254
|
-
}
|
|
24255
|
-
};
|
|
24256
|
-
this.displayStats = () => {
|
|
24257
|
-
let out = '';
|
|
24258
|
-
const totals = {
|
|
24259
|
-
match: 0,
|
|
24260
|
-
empty: 0,
|
|
24261
|
-
nomatch: 0,
|
|
24262
|
-
total: 0,
|
|
24263
|
-
};
|
|
24264
|
-
const displayRow = (op, m, e, n, t) => {
|
|
24265
|
-
totals.match += m;
|
|
24266
|
-
totals.empty += e;
|
|
24267
|
-
totals.nomatch += n;
|
|
24268
|
-
totals.total += t;
|
|
24269
|
-
const mm = normalize(m);
|
|
24270
|
-
const ee = normalize(e);
|
|
24271
|
-
const nn = normalize(n);
|
|
24272
|
-
const tt = normalize(t);
|
|
24273
|
-
return `${op} | ${mm} | ${ee} | ${nn} | ${tt} |\n`;
|
|
24274
|
-
};
|
|
24275
|
-
out += ' OPERATOR STATS\n';
|
|
24276
|
-
out += ' | MATCH | EMPTY | NOMATCH | TOTAL |\n';
|
|
24277
|
-
out += displayRow(' ALT', stats[id.ALT].match, stats[id.ALT].empty, stats[id.ALT].nomatch, stats[id.ALT].total);
|
|
24278
|
-
out += displayRow(' CAT', stats[id.CAT].match, stats[id.CAT].empty, stats[id.CAT].nomatch, stats[id.CAT].total);
|
|
24279
|
-
out += displayRow(' REP', stats[id.REP].match, stats[id.REP].empty, stats[id.REP].nomatch, stats[id.REP].total);
|
|
24280
|
-
out += displayRow(' RNM', stats[id.RNM].match, stats[id.RNM].empty, stats[id.RNM].nomatch, stats[id.RNM].total);
|
|
24281
|
-
out += displayRow(' TRG', stats[id.TRG].match, stats[id.TRG].empty, stats[id.TRG].nomatch, stats[id.TRG].total);
|
|
24282
|
-
out += displayRow(' TBS', stats[id.TBS].match, stats[id.TBS].empty, stats[id.TBS].nomatch, stats[id.TBS].total);
|
|
24283
|
-
out += displayRow(' TLS', stats[id.TLS].match, stats[id.TLS].empty, stats[id.TLS].nomatch, stats[id.TLS].total);
|
|
24284
|
-
out += displayRow(' UDT', stats[id.UDT].match, stats[id.UDT].empty, stats[id.UDT].nomatch, stats[id.UDT].total);
|
|
24285
|
-
out += displayRow(' AND', stats[id.AND].match, stats[id.AND].empty, stats[id.AND].nomatch, stats[id.AND].total);
|
|
24286
|
-
out += displayRow(' NOT', stats[id.NOT].match, stats[id.NOT].empty, stats[id.NOT].nomatch, stats[id.NOT].total);
|
|
24287
|
-
out += displayRow('TOTAL', totals.match, totals.empty, totals.nomatch, totals.total);
|
|
24288
|
-
return out;
|
|
24289
|
-
};
|
|
24290
|
-
/*
|
|
24291
|
-
Display rule/udt
|
|
24292
|
-
*/
|
|
24293
|
-
this.displayHits = (type) => {
|
|
24294
|
-
let out = '';
|
|
24295
|
-
const displayRow = (m, e, n, t, name) => {
|
|
24296
|
-
totals.match += m;
|
|
24297
|
-
totals.empty += e;
|
|
24298
|
-
totals.nomatch += n;
|
|
24299
|
-
totals.total += t;
|
|
24300
|
-
const mm = normalize(m);
|
|
24301
|
-
const ee = normalize(e);
|
|
24302
|
-
const nn = normalize(n);
|
|
24303
|
-
const tt = normalize(t);
|
|
24304
|
-
return `| ${mm} | ${ee} | ${nn} | ${tt} | ${name}\n`;
|
|
24305
|
-
};
|
|
24306
|
-
if (typeof type === 'string' && type.toLowerCase()[0] === 'a') {
|
|
24307
|
-
ruleStats.sort(sortAlpha);
|
|
24308
|
-
udtStats.sort(sortAlpha);
|
|
24309
|
-
out += ' RULES/UDTS ALPHABETICALLY\n';
|
|
24310
|
-
} else if (typeof type === 'string' && type.toLowerCase()[0] === 'i') {
|
|
24311
|
-
ruleStats.sort(sortIndex);
|
|
24312
|
-
udtStats.sort(sortIndex);
|
|
24313
|
-
out += ' RULES/UDTS BY INDEX\n';
|
|
24314
|
-
} else {
|
|
24315
|
-
ruleStats.sort(sortHits);
|
|
24316
|
-
udtStats.sort(sortHits);
|
|
24317
|
-
out += ' RULES/UDTS BY HIT COUNT\n';
|
|
24318
|
-
}
|
|
24319
|
-
out += '| MATCH | EMPTY | NOMATCH | TOTAL | NAME\n';
|
|
24320
|
-
for (let i = 0; i < ruleStats.length; i += 1) {
|
|
24321
|
-
let r = ruleStats[i];
|
|
24322
|
-
if (r.total) {
|
|
24323
|
-
out += displayRow(r.match, r.empty, r.nomatch, r.total, r.name);
|
|
24324
|
-
}
|
|
24325
|
-
}
|
|
24326
|
-
for (let i = 0; i < udtStats.length; i += 1) {
|
|
24327
|
-
let r = udtStats[i];
|
|
24328
|
-
if (r.total) {
|
|
24329
|
-
out += displayRow(r.match, r.empty, r.nomatch, r.total, r.name);
|
|
24330
|
-
}
|
|
24331
|
-
}
|
|
24332
|
-
return out;
|
|
24333
|
-
};
|
|
24334
|
-
const normalize = (n) => {
|
|
24335
|
-
if (n < 10) {
|
|
24336
|
-
return ` ${n}`;
|
|
24337
|
-
}
|
|
24338
|
-
if (n < 100) {
|
|
24339
|
-
return ` ${n}`;
|
|
24340
|
-
}
|
|
24341
|
-
if (n < 1000) {
|
|
24342
|
-
return ` ${n}`;
|
|
24343
|
-
}
|
|
24344
|
-
if (n < 10000) {
|
|
24345
|
-
return ` ${n}`;
|
|
24346
|
-
}
|
|
24347
|
-
if (n < 100000) {
|
|
24348
|
-
return ` ${n}`;
|
|
24349
|
-
}
|
|
24350
|
-
if (n < 1000000) {
|
|
24351
|
-
return ` ${n}`;
|
|
24352
|
-
}
|
|
24353
|
-
return `${n}`;
|
|
24354
|
-
};
|
|
24355
|
-
const sortAlpha = (lhs, rhs) => {
|
|
24356
|
-
if (lhs.lower < rhs.lower) {
|
|
24357
|
-
return -1;
|
|
24358
|
-
}
|
|
24359
|
-
if (lhs.lower > rhs.lower) {
|
|
24360
|
-
return 1;
|
|
24361
|
-
}
|
|
24362
|
-
return 0;
|
|
24363
|
-
};
|
|
24364
|
-
const sortHits = (lhs, rhs) => {
|
|
24365
|
-
if (lhs.total < rhs.total) {
|
|
24366
|
-
return 1;
|
|
24367
|
-
}
|
|
24368
|
-
if (lhs.total > rhs.total) {
|
|
24369
|
-
return -1;
|
|
24370
|
-
}
|
|
24371
|
-
return sortAlpha(lhs, rhs);
|
|
24372
|
-
};
|
|
24373
|
-
const sortIndex = (lhs, rhs) => {
|
|
24374
|
-
if (lhs.index < rhs.index) {
|
|
24375
|
-
return -1;
|
|
24376
|
-
}
|
|
24377
|
-
if (lhs.index > rhs.index) {
|
|
24378
|
-
return 1;
|
|
24379
|
-
}
|
|
24380
|
-
return 0;
|
|
24381
|
-
};
|
|
24382
|
-
const EmptyStat = function fnempty() {
|
|
24383
|
-
this.empty = 0;
|
|
24384
|
-
this.match = 0;
|
|
24385
|
-
this.nomatch = 0;
|
|
24386
|
-
this.total = 0;
|
|
24387
|
-
};
|
|
24388
|
-
/* Zero out all stats */
|
|
24389
|
-
const clear = () => {
|
|
24390
|
-
stats.length = 0;
|
|
24391
|
-
totals = new EmptyStat();
|
|
24392
|
-
stats[id.ALT] = new EmptyStat();
|
|
24393
|
-
stats[id.CAT] = new EmptyStat();
|
|
24394
|
-
stats[id.REP] = new EmptyStat();
|
|
24395
|
-
stats[id.RNM] = new EmptyStat();
|
|
24396
|
-
stats[id.TRG] = new EmptyStat();
|
|
24397
|
-
stats[id.TBS] = new EmptyStat();
|
|
24398
|
-
stats[id.TLS] = new EmptyStat();
|
|
24399
|
-
stats[id.UDT] = new EmptyStat();
|
|
24400
|
-
stats[id.AND] = new EmptyStat();
|
|
24401
|
-
stats[id.NOT] = new EmptyStat();
|
|
24402
|
-
ruleStats.length = 0;
|
|
24403
|
-
for (let i = 0; i < rules.length; i += 1) {
|
|
24404
|
-
ruleStats.push({
|
|
24405
|
-
empty: 0,
|
|
24406
|
-
match: 0,
|
|
24407
|
-
nomatch: 0,
|
|
24408
|
-
total: 0,
|
|
24409
|
-
name: rules[i].name,
|
|
24410
|
-
lower: rules[i].lower,
|
|
24411
|
-
index: rules[i].index,
|
|
24412
|
-
});
|
|
24413
|
-
}
|
|
24414
|
-
if (udts.length > 0) {
|
|
24415
|
-
udtStats.length = 0;
|
|
24416
|
-
for (let i = 0; i < udts.length; i += 1) {
|
|
24417
|
-
udtStats.push({
|
|
24418
|
-
empty: 0,
|
|
24419
|
-
match: 0,
|
|
24420
|
-
nomatch: 0,
|
|
24421
|
-
total: 0,
|
|
24422
|
-
name: udts[i].name,
|
|
24423
|
-
lower: udts[i].lower,
|
|
24424
|
-
index: udts[i].index,
|
|
24425
|
-
});
|
|
24426
|
-
}
|
|
24427
|
-
}
|
|
24428
|
-
};
|
|
24429
|
-
/* increment the designated operator hit count by one */
|
|
24430
|
-
const incStat = (stat, state) => {
|
|
24431
|
-
stat.total += 1;
|
|
24432
|
-
switch (state) {
|
|
24433
|
-
case id.EMPTY:
|
|
24434
|
-
stat.empty += 1;
|
|
24435
|
-
break;
|
|
24436
|
-
case id.MATCH:
|
|
24437
|
-
stat.match += 1;
|
|
24438
|
-
break;
|
|
24439
|
-
case id.NOMATCH:
|
|
24440
|
-
stat.nomatch += 1;
|
|
24441
|
-
break;
|
|
24442
|
-
default:
|
|
24443
|
-
throw new Error(`${thisFileName}collect(): incStat(): unrecognized state: ${state}`);
|
|
24444
|
-
}
|
|
24445
|
-
};
|
|
24446
|
-
};
|
|
24447
|
-
|
|
24448
|
-
const utilities = {
|
|
24449
|
-
// utility functions
|
|
24450
|
-
stringToChars: (string) => [...string].map((cp) => cp.codePointAt(0)),
|
|
24451
|
-
charsToString: (chars, beg, len) => {
|
|
24452
|
-
let subChars = chars;
|
|
24453
|
-
while (1) {
|
|
24454
|
-
if (beg === undefined || beg < 0) {
|
|
24455
|
-
break;
|
|
24456
|
-
}
|
|
24457
|
-
if (len === undefined) {
|
|
24458
|
-
subChars = chars.slice(beg);
|
|
24459
|
-
break;
|
|
24460
|
-
}
|
|
24461
|
-
if (len <= 0) {
|
|
24462
|
-
// always an empty string
|
|
24463
|
-
return '';
|
|
24464
|
-
}
|
|
24465
|
-
subChars = chars.slice(beg, beg + len);
|
|
24466
|
-
break;
|
|
24467
|
-
}
|
|
24468
|
-
return String.fromCodePoint(...subChars);
|
|
24469
|
-
},
|
|
24470
|
-
};
|
|
24471
|
-
|
|
24472
|
-
const identifiers = {
|
|
24473
|
-
// Identifies the operator type.
|
|
24474
|
-
// NB: These must match the values in apg-js 4.3.0, apg-lib/identifiers.
|
|
24475
|
-
/* the original ABNF operators */
|
|
24476
|
-
ALT: 1 /* alternation */,
|
|
24477
|
-
CAT: 2 /* concatenation */,
|
|
24478
|
-
REP: 3 /* repetition */,
|
|
24479
|
-
RNM: 4 /* rule name */,
|
|
24480
|
-
TRG: 5 /* terminal range */,
|
|
24481
|
-
TBS: 6 /* terminal binary string, case sensitive */,
|
|
24482
|
-
TLS: 7 /* terminal literal string, case insensitive */,
|
|
24483
|
-
/* the super set, SABNF operators */
|
|
24484
|
-
UDT: 11 /* user-defined terminal */,
|
|
24485
|
-
AND: 12 /* positive look ahead */,
|
|
24486
|
-
NOT: 13 /* negative look ahead */,
|
|
24487
|
-
// Used by the parser and the user's `RNM` and `UDT` callback functions.
|
|
24488
|
-
// Identifies the parser state as it traverses the parse tree nodes.
|
|
24489
|
-
// - *ACTIVE* - indicates the downward direction through the parse tree node.
|
|
24490
|
-
// - *MATCH* - indicates the upward direction and a phrase, of length \> 0, has been successfully matched
|
|
24491
|
-
// - *EMPTY* - indicates the upward direction and a phrase, of length = 0, has been successfully matched
|
|
24492
|
-
// - *NOMATCH* - indicates the upward direction and the parser failed to match any phrase at all
|
|
24493
|
-
ACTIVE: 100,
|
|
24494
|
-
MATCH: 101,
|
|
24495
|
-
EMPTY: 102,
|
|
24496
|
-
NOMATCH: 103,
|
|
24497
|
-
// Used by [`AST` translator](./ast.html) (semantic analysis) and the user's callback functions
|
|
24498
|
-
// to indicate the direction of flow through the `AST` nodes.
|
|
24499
|
-
// - *SEM_PRE* - indicates the downward (pre-branch) direction through the `AST` node.
|
|
24500
|
-
// - *SEM_POST* - indicates the upward (post-branch) direction through the `AST` node.
|
|
24501
|
-
SEM_PRE: 200,
|
|
24502
|
-
SEM_POST: 201,
|
|
24503
|
-
// Ignored. Retained for backwords compatibility.
|
|
24504
|
-
SEM_OK: 300,
|
|
24505
|
-
idName: (s) => {
|
|
24506
|
-
switch (s) {
|
|
24507
|
-
case identifiers.ALT:
|
|
24508
|
-
return 'ALT';
|
|
24509
|
-
case identifiers.CAT:
|
|
24510
|
-
return 'CAT';
|
|
24511
|
-
case identifiers.REP:
|
|
24512
|
-
return 'REP';
|
|
24513
|
-
case identifiers.RNM:
|
|
24514
|
-
return 'RNM';
|
|
24515
|
-
case identifiers.TRG:
|
|
24516
|
-
return 'TRG';
|
|
24517
|
-
case identifiers.TBS:
|
|
24518
|
-
return 'TBS';
|
|
24519
|
-
case identifiers.TLS:
|
|
24520
|
-
return 'TLS';
|
|
24521
|
-
case identifiers.UDT:
|
|
24522
|
-
return 'UDT';
|
|
24523
|
-
case identifiers.AND:
|
|
24524
|
-
return 'AND';
|
|
24525
|
-
case identifiers.NOT:
|
|
24526
|
-
return 'NOT';
|
|
24527
|
-
case identifiers.ACTIVE:
|
|
24528
|
-
return 'ACTIVE';
|
|
24529
|
-
case identifiers.EMPTY:
|
|
24530
|
-
return 'EMPTY';
|
|
24531
|
-
case identifiers.MATCH:
|
|
24532
|
-
return 'MATCH';
|
|
24533
|
-
case identifiers.NOMATCH:
|
|
24534
|
-
return 'NOMATCH';
|
|
24535
|
-
case identifiers.SEM_PRE:
|
|
24536
|
-
return 'SEM_PRE';
|
|
24537
|
-
case identifiers.SEM_POST:
|
|
24538
|
-
return 'SEM_POST';
|
|
24539
|
-
case identifiers.SEM_OK:
|
|
24540
|
-
return 'SEM_OK';
|
|
24541
|
-
default:
|
|
24542
|
-
return 'UNRECOGNIZED STATE';
|
|
24543
|
-
}
|
|
24544
|
-
},
|
|
24545
|
-
};
|
|
23313
|
+
/* *************************************************************************************
|
|
23314
|
+
* copyright: Copyright (c) 2023 Lowell D. Thomas, all rights reserved
|
|
23315
|
+
* license: BSD-2-Clause (https://opensource.org/licenses/BSD-2-Clause)
|
|
23316
|
+
*
|
|
23317
|
+
* Redistribution and use in source and binary forms, with or without
|
|
23318
|
+
* modification, are permitted provided that the following conditions are met:
|
|
23319
|
+
*
|
|
23320
|
+
* 1. Redistributions of source code must retain the above copyright notice, this
|
|
23321
|
+
* list of conditions and the following disclaimer.
|
|
23322
|
+
*
|
|
23323
|
+
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
23324
|
+
* this list of conditions and the following disclaimer in the documentation
|
|
23325
|
+
* and/or other materials provided with the distribution.
|
|
23326
|
+
*
|
|
23327
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
23328
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
23329
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
23330
|
+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
23331
|
+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
23332
|
+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
23333
|
+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
23334
|
+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
23335
|
+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
23336
|
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
23337
|
+
*
|
|
23338
|
+
* ********************************************************************************* */
|
|
23339
|
+
|
|
23340
|
+
|
|
23341
|
+
const Parser = function fnparser() {
|
|
23342
|
+
const id = identifiers;
|
|
23343
|
+
const utils = utilities;
|
|
23344
|
+
const p = this;
|
|
23345
|
+
const thisFileName = 'parser.js: Parser(): ';
|
|
23346
|
+
const systemData = function systemData() {
|
|
23347
|
+
this.state = id.ACTIVE;
|
|
23348
|
+
this.phraseLength = 0;
|
|
23349
|
+
this.refresh = () => {
|
|
23350
|
+
this.state = id.ACTIVE;
|
|
23351
|
+
this.phraseLength = 0;
|
|
23352
|
+
};
|
|
23353
|
+
};
|
|
23354
|
+
p.ast = undefined;
|
|
23355
|
+
p.stats = undefined;
|
|
23356
|
+
p.trace = undefined;
|
|
23357
|
+
p.callbacks = [];
|
|
23358
|
+
let lookAhead = 0;
|
|
23359
|
+
let treeDepth = 0;
|
|
23360
|
+
let maxTreeDepth = 0;
|
|
23361
|
+
let nodeHits = 0;
|
|
23362
|
+
let maxMatched = 0;
|
|
23363
|
+
let rules = undefined;
|
|
23364
|
+
let udts = undefined;
|
|
23365
|
+
let opcodes = undefined;
|
|
23366
|
+
let chars = undefined;
|
|
23367
|
+
let sysData = new systemData();
|
|
23368
|
+
let ruleCallbacks = undefined;
|
|
23369
|
+
let udtCallbacks = undefined;
|
|
23370
|
+
let userData = undefined;
|
|
23371
|
+
const clear = () => {
|
|
23372
|
+
lookAhead = 0;
|
|
23373
|
+
treeDepth = 0;
|
|
23374
|
+
maxTreeDepth = 0;
|
|
23375
|
+
nodeHits = 0;
|
|
23376
|
+
maxMatched = 0;
|
|
23377
|
+
rules = undefined;
|
|
23378
|
+
udts = undefined;
|
|
23379
|
+
opcodes = undefined;
|
|
23380
|
+
chars = undefined;
|
|
23381
|
+
sysData.refresh();
|
|
23382
|
+
ruleCallbacks = undefined;
|
|
23383
|
+
udtCallbacks = undefined;
|
|
23384
|
+
userData = undefined;
|
|
23385
|
+
};
|
|
23386
|
+
|
|
23387
|
+
const initializeCallbacks = () => {
|
|
23388
|
+
const functionName = `${thisFileName}initializeCallbacks(): `;
|
|
23389
|
+
let i;
|
|
23390
|
+
ruleCallbacks = [];
|
|
23391
|
+
udtCallbacks = [];
|
|
23392
|
+
for (i = 0; i < rules.length; i += 1) {
|
|
23393
|
+
ruleCallbacks[i] = undefined;
|
|
23394
|
+
}
|
|
23395
|
+
for (i = 0; i < udts.length; i += 1) {
|
|
23396
|
+
udtCallbacks[i] = undefined;
|
|
23397
|
+
}
|
|
23398
|
+
let func;
|
|
23399
|
+
const list = [];
|
|
23400
|
+
for (i = 0; i < rules.length; i += 1) {
|
|
23401
|
+
list.push(rules[i].lower);
|
|
23402
|
+
}
|
|
23403
|
+
for (i = 0; i < udts.length; i += 1) {
|
|
23404
|
+
list.push(udts[i].lower);
|
|
23405
|
+
}
|
|
23406
|
+
for (const index in p.callbacks) {
|
|
23407
|
+
if (p.callbacks.hasOwnProperty(index)) {
|
|
23408
|
+
i = list.indexOf(index.toLowerCase());
|
|
23409
|
+
if (i < 0) {
|
|
23410
|
+
throw new Error(`${functionName}syntax callback '${index}' not a rule or udt name`);
|
|
23411
|
+
}
|
|
23412
|
+
func = p.callbacks[index] ? p.callbacks[index] : undefined;
|
|
23413
|
+
if (typeof func === 'function' || func === undefined) {
|
|
23414
|
+
if (i < rules.length) {
|
|
23415
|
+
ruleCallbacks[i] = func;
|
|
23416
|
+
} else {
|
|
23417
|
+
udtCallbacks[i - rules.length] = func;
|
|
23418
|
+
}
|
|
23419
|
+
} else {
|
|
23420
|
+
throw new Error(`${functionName}syntax callback[${index}] must be function reference or falsy)`);
|
|
23421
|
+
}
|
|
23422
|
+
}
|
|
23423
|
+
}
|
|
23424
|
+
};
|
|
23425
|
+
|
|
23426
|
+
p.parse = (grammar, startName, inputString, callbackData) => {
|
|
23427
|
+
const functionName = `${thisFileName}parse(): `;
|
|
23428
|
+
clear();
|
|
23429
|
+
chars = utils.stringToChars(inputString);
|
|
23430
|
+
rules = grammar.rules;
|
|
23431
|
+
udts = grammar.udts;
|
|
23432
|
+
const lower = startName.toLowerCase();
|
|
23433
|
+
let startIndex = undefined;
|
|
23434
|
+
for (const i in rules) {
|
|
23435
|
+
if (rules.hasOwnProperty(i)) {
|
|
23436
|
+
if (lower === rules[i].lower) {
|
|
23437
|
+
startIndex = rules[i].index;
|
|
23438
|
+
break;
|
|
23439
|
+
}
|
|
23440
|
+
}
|
|
23441
|
+
}
|
|
23442
|
+
if (startIndex === undefined) {
|
|
23443
|
+
throw new Error(`${functionName}start rule name '${startRule}' not recognized`);
|
|
23444
|
+
}
|
|
23445
|
+
initializeCallbacks();
|
|
23446
|
+
if (p.trace) {
|
|
23447
|
+
p.trace.init(rules, udts, chars);
|
|
23448
|
+
}
|
|
23449
|
+
if (p.stats) {
|
|
23450
|
+
p.stats.init(rules, udts);
|
|
23451
|
+
}
|
|
23452
|
+
if (p.ast) {
|
|
23453
|
+
p.ast.init(rules, udts, chars);
|
|
23454
|
+
}
|
|
23455
|
+
userData = callbackData;
|
|
23456
|
+
/* create a dummy opcode for the start rule */
|
|
23457
|
+
opcodes = [
|
|
23458
|
+
{
|
|
23459
|
+
type: id.RNM,
|
|
23460
|
+
index: startIndex,
|
|
23461
|
+
},
|
|
23462
|
+
];
|
|
23463
|
+
/* execute the start rule */
|
|
23464
|
+
opExecute(0, 0);
|
|
23465
|
+
opcodes = undefined;
|
|
23466
|
+
/* test and return the sysData */
|
|
23467
|
+
let success = false;
|
|
23468
|
+
switch (sysData.state) {
|
|
23469
|
+
case id.ACTIVE:
|
|
23470
|
+
throw new Error(`${functionName}final state should never be 'ACTIVE'`);
|
|
23471
|
+
case id.NOMATCH:
|
|
23472
|
+
success = false;
|
|
23473
|
+
break;
|
|
23474
|
+
case id.EMPTY:
|
|
23475
|
+
case id.MATCH:
|
|
23476
|
+
if (sysData.phraseLength === chars.length) {
|
|
23477
|
+
success = true;
|
|
23478
|
+
} else {
|
|
23479
|
+
success = false;
|
|
23480
|
+
}
|
|
23481
|
+
break;
|
|
23482
|
+
default:
|
|
23483
|
+
throw new Error('unrecognized state');
|
|
23484
|
+
}
|
|
23485
|
+
return {
|
|
23486
|
+
success,
|
|
23487
|
+
state: sysData.state,
|
|
23488
|
+
stateName: id.idName(sysData.state),
|
|
23489
|
+
length: chars.length,
|
|
23490
|
+
matched: sysData.phraseLength,
|
|
23491
|
+
maxMatched,
|
|
23492
|
+
maxTreeDepth,
|
|
23493
|
+
nodeHits,
|
|
23494
|
+
};
|
|
23495
|
+
};
|
|
23496
|
+
// The `ALT` operator.<br>
|
|
23497
|
+
// Executes its child nodes, from left to right, until it finds a match.
|
|
23498
|
+
// Fails if *all* of its child nodes fail.
|
|
23499
|
+
const opALT = (opIndex, phraseIndex) => {
|
|
23500
|
+
const op = opcodes[opIndex];
|
|
23501
|
+
for (let i = 0; i < op.children.length; i += 1) {
|
|
23502
|
+
opExecute(op.children[i], phraseIndex);
|
|
23503
|
+
if (sysData.state !== id.NOMATCH) {
|
|
23504
|
+
break;
|
|
23505
|
+
}
|
|
23506
|
+
}
|
|
23507
|
+
};
|
|
23508
|
+
// The `CAT` operator.<br>
|
|
23509
|
+
// Executes all of its child nodes, from left to right,
|
|
23510
|
+
// concatenating the matched phrases.
|
|
23511
|
+
// Fails if *any* child nodes fail.
|
|
23512
|
+
const opCAT = (opIndex, phraseIndex) => {
|
|
23513
|
+
let success;
|
|
23514
|
+
let astLength;
|
|
23515
|
+
let catCharIndex;
|
|
23516
|
+
let catPhrase;
|
|
23517
|
+
const op = opcodes[opIndex];
|
|
23518
|
+
if (p.ast) {
|
|
23519
|
+
astLength = p.ast.getLength();
|
|
23520
|
+
}
|
|
23521
|
+
success = true;
|
|
23522
|
+
catCharIndex = phraseIndex;
|
|
23523
|
+
catPhrase = 0;
|
|
23524
|
+
for (let i = 0; i < op.children.length; i += 1) {
|
|
23525
|
+
opExecute(op.children[i], catCharIndex);
|
|
23526
|
+
if (sysData.state === id.NOMATCH) {
|
|
23527
|
+
success = false;
|
|
23528
|
+
break;
|
|
23529
|
+
} else {
|
|
23530
|
+
catCharIndex += sysData.phraseLength;
|
|
23531
|
+
catPhrase += sysData.phraseLength;
|
|
23532
|
+
}
|
|
23533
|
+
}
|
|
23534
|
+
if (success) {
|
|
23535
|
+
sysData.state = catPhrase === 0 ? id.EMPTY : id.MATCH;
|
|
23536
|
+
sysData.phraseLength = catPhrase;
|
|
23537
|
+
} else {
|
|
23538
|
+
sysData.state = id.NOMATCH;
|
|
23539
|
+
sysData.phraseLength = 0;
|
|
23540
|
+
if (p.ast) {
|
|
23541
|
+
p.ast.setLength(astLength);
|
|
23542
|
+
}
|
|
23543
|
+
}
|
|
23544
|
+
};
|
|
23545
|
+
// The `REP` operator.<br>
|
|
23546
|
+
// Repeatedly executes its single child node,
|
|
23547
|
+
// concatenating each of the matched phrases found.
|
|
23548
|
+
// The number of repetitions executed and its final sysData depends
|
|
23549
|
+
// on its `min` & `max` repetition values.
|
|
23550
|
+
const opREP = (opIndex, phraseIndex) => {
|
|
23551
|
+
let astLength;
|
|
23552
|
+
let repCharIndex;
|
|
23553
|
+
let repPhrase;
|
|
23554
|
+
let repCount;
|
|
23555
|
+
const op = opcodes[opIndex];
|
|
23556
|
+
if (op.max === 0) {
|
|
23557
|
+
// this is an empty-string acceptor
|
|
23558
|
+
// deprecated: use the TLS empty string operator, "", instead
|
|
23559
|
+
sysData.state = id.EMPTY;
|
|
23560
|
+
sysData.phraseLength = 0;
|
|
23561
|
+
return;
|
|
23562
|
+
}
|
|
23563
|
+
repCharIndex = phraseIndex;
|
|
23564
|
+
repPhrase = 0;
|
|
23565
|
+
repCount = 0;
|
|
23566
|
+
if (p.ast) {
|
|
23567
|
+
astLength = p.ast.getLength();
|
|
23568
|
+
}
|
|
23569
|
+
while (1) {
|
|
23570
|
+
if (repCharIndex >= chars.length) {
|
|
23571
|
+
/* exit on end of input string */
|
|
23572
|
+
break;
|
|
23573
|
+
}
|
|
23574
|
+
opExecute(opIndex + 1, repCharIndex);
|
|
23575
|
+
if (sysData.state === id.NOMATCH) {
|
|
23576
|
+
/* always end if the child node fails */
|
|
23577
|
+
break;
|
|
23578
|
+
}
|
|
23579
|
+
if (sysData.state === id.EMPTY) {
|
|
23580
|
+
/* REP always succeeds when the child node returns an empty phrase */
|
|
23581
|
+
/* this may not seem obvious, but that's the way it works out */
|
|
23582
|
+
break;
|
|
23583
|
+
}
|
|
23584
|
+
repCount += 1;
|
|
23585
|
+
repPhrase += sysData.phraseLength;
|
|
23586
|
+
repCharIndex += sysData.phraseLength;
|
|
23587
|
+
if (repCount === op.max) {
|
|
23588
|
+
/* end on maxed out reps */
|
|
23589
|
+
break;
|
|
23590
|
+
}
|
|
23591
|
+
}
|
|
23592
|
+
/* evaluate the match count according to the min, max values */
|
|
23593
|
+
if (sysData.state === id.EMPTY) {
|
|
23594
|
+
sysData.state = repPhrase === 0 ? id.EMPTY : id.MATCH;
|
|
23595
|
+
sysData.phraseLength = repPhrase;
|
|
23596
|
+
} else if (repCount >= op.min) {
|
|
23597
|
+
sysData.state = repPhrase === 0 ? id.EMPTY : id.MATCH;
|
|
23598
|
+
sysData.phraseLength = repPhrase;
|
|
23599
|
+
} else {
|
|
23600
|
+
sysData.state = id.NOMATCH;
|
|
23601
|
+
sysData.phraseLength = 0;
|
|
23602
|
+
if (p.ast) {
|
|
23603
|
+
p.ast.setLength(astLength);
|
|
23604
|
+
}
|
|
23605
|
+
}
|
|
23606
|
+
};
|
|
23607
|
+
// Validate the callback function's returned sysData values.
|
|
23608
|
+
// It's the user's responsibility to get them right
|
|
23609
|
+
// but `RNM` fails if not.
|
|
23610
|
+
const validateRnmCallbackResult = (rule, sysData, charsLeft, down) => {
|
|
23611
|
+
if (sysData.phraseLength > charsLeft) {
|
|
23612
|
+
let str = `${thisFileName}opRNM(${rule.name}): callback function error: `;
|
|
23613
|
+
str += `sysData.phraseLength: ${sysData.phraseLength}`;
|
|
23614
|
+
str += ` must be <= remaining chars: ${charsLeft}`;
|
|
23615
|
+
throw new Error(str);
|
|
23616
|
+
}
|
|
23617
|
+
switch (sysData.state) {
|
|
23618
|
+
case id.ACTIVE:
|
|
23619
|
+
if (!down) {
|
|
23620
|
+
throw new Error(
|
|
23621
|
+
`${thisFileName}opRNM(${rule.name}): callback function return error. ACTIVE state not allowed.`
|
|
23622
|
+
);
|
|
23623
|
+
}
|
|
23624
|
+
break;
|
|
23625
|
+
case id.EMPTY:
|
|
23626
|
+
sysData.phraseLength = 0;
|
|
23627
|
+
break;
|
|
23628
|
+
case id.MATCH:
|
|
23629
|
+
if (sysData.phraseLength === 0) {
|
|
23630
|
+
sysData.state = id.EMPTY;
|
|
23631
|
+
}
|
|
23632
|
+
break;
|
|
23633
|
+
case id.NOMATCH:
|
|
23634
|
+
sysData.phraseLength = 0;
|
|
23635
|
+
break;
|
|
23636
|
+
default:
|
|
23637
|
+
throw new Error(
|
|
23638
|
+
`${thisFileName}opRNM(${rule.name}): callback function return error. Unrecognized return state: ${sysData.state}`
|
|
23639
|
+
);
|
|
23640
|
+
}
|
|
23641
|
+
};
|
|
23642
|
+
// The `RNM` operator.<br>
|
|
23643
|
+
// This operator will acts as a root node for a parse tree branch below and
|
|
23644
|
+
// returns the matched phrase to its parent.
|
|
23645
|
+
// However, its larger responsibility is handling user-defined callback functions and `AST` nodes.
|
|
23646
|
+
// Note that the `AST` is a separate object, but `RNM` calls its functions to create its nodes.
|
|
23647
|
+
const opRNM = (opIndex, phraseIndex) => {
|
|
23648
|
+
let astLength;
|
|
23649
|
+
let astDefined;
|
|
23650
|
+
let savedOpcodes;
|
|
23651
|
+
const op = opcodes[opIndex];
|
|
23652
|
+
const rule = rules[op.index];
|
|
23653
|
+
const callback = ruleCallbacks[rule.index];
|
|
23654
|
+
/* ignore AST in look ahead (AND or NOT operator above) */
|
|
23655
|
+
if (!lookAhead) {
|
|
23656
|
+
astDefined = p.ast && p.ast.ruleDefined(op.index);
|
|
23657
|
+
if (astDefined) {
|
|
23658
|
+
astLength = p.ast.getLength();
|
|
23659
|
+
p.ast.down(op.index, rules[op.index].name);
|
|
23660
|
+
}
|
|
23661
|
+
}
|
|
23662
|
+
if (callback) {
|
|
23663
|
+
/* call user's callback going down the parse tree*/
|
|
23664
|
+
const charsLeft = chars.length - phraseIndex;
|
|
23665
|
+
callback(sysData, chars, phraseIndex, userData);
|
|
23666
|
+
validateRnmCallbackResult(rule, sysData, charsLeft, true);
|
|
23667
|
+
if (sysData.state === id.ACTIVE) {
|
|
23668
|
+
savedOpcodes = opcodes;
|
|
23669
|
+
opcodes = rule.opcodes;
|
|
23670
|
+
opExecute(0, phraseIndex);
|
|
23671
|
+
opcodes = savedOpcodes;
|
|
23672
|
+
/* call user's callback going up the parse tree*/
|
|
23673
|
+
callback(sysData, chars, phraseIndex, userData);
|
|
23674
|
+
validateRnmCallbackResult(rule, sysData, charsLeft, false);
|
|
23675
|
+
} /* implied else clause: just accept the callback sysData - RNM acting as UDT */
|
|
23676
|
+
} else {
|
|
23677
|
+
/* no callback - just execute the rule */
|
|
23678
|
+
savedOpcodes = opcodes;
|
|
23679
|
+
opcodes = rule.opcodes;
|
|
23680
|
+
opExecute(0, phraseIndex, sysData);
|
|
23681
|
+
opcodes = savedOpcodes;
|
|
23682
|
+
}
|
|
23683
|
+
if (!lookAhead) {
|
|
23684
|
+
/* end AST */
|
|
23685
|
+
if (astDefined) {
|
|
23686
|
+
if (sysData.state === id.NOMATCH) {
|
|
23687
|
+
p.ast.setLength(astLength);
|
|
23688
|
+
} else {
|
|
23689
|
+
p.ast.up(op.index, rule.name, phraseIndex, sysData.phraseLength);
|
|
23690
|
+
}
|
|
23691
|
+
}
|
|
23692
|
+
}
|
|
23693
|
+
};
|
|
23694
|
+
// The `TRG` operator.<br>
|
|
23695
|
+
// Succeeds if the single first character of the phrase is
|
|
23696
|
+
// within the `min - max` range.
|
|
23697
|
+
const opTRG = (opIndex, phraseIndex) => {
|
|
23698
|
+
const op = opcodes[opIndex];
|
|
23699
|
+
sysData.state = id.NOMATCH;
|
|
23700
|
+
if (phraseIndex < chars.length) {
|
|
23701
|
+
if (op.min <= chars[phraseIndex] && chars[phraseIndex] <= op.max) {
|
|
23702
|
+
sysData.state = id.MATCH;
|
|
23703
|
+
sysData.phraseLength = 1;
|
|
23704
|
+
}
|
|
23705
|
+
}
|
|
23706
|
+
};
|
|
23707
|
+
// The `TBS` operator.<br>
|
|
23708
|
+
// Matches its pre-defined phrase against the input string.
|
|
23709
|
+
// All characters must match exactly.
|
|
23710
|
+
// Case-sensitive literal strings (`'string'` & `%s"string"`) are translated to `TBS`
|
|
23711
|
+
// operators by `apg`.
|
|
23712
|
+
// Phrase length of zero is not allowed.
|
|
23713
|
+
// Empty phrases can only be defined with `TLS` operators.
|
|
23714
|
+
const opTBS = (opIndex, phraseIndex) => {
|
|
23715
|
+
const op = opcodes[opIndex];
|
|
23716
|
+
const len = op.string.length;
|
|
23717
|
+
sysData.state = id.NOMATCH;
|
|
23718
|
+
if (phraseIndex + len <= chars.length) {
|
|
23719
|
+
for (let i = 0; i < len; i += 1) {
|
|
23720
|
+
if (chars[phraseIndex + i] !== op.string[i]) {
|
|
23721
|
+
return;
|
|
23722
|
+
}
|
|
23723
|
+
}
|
|
23724
|
+
sysData.state = id.MATCH;
|
|
23725
|
+
sysData.phraseLength = len;
|
|
23726
|
+
} /* implied else NOMATCH */
|
|
23727
|
+
};
|
|
23728
|
+
// The `TLS` operator.<br>
|
|
23729
|
+
// Matches its pre-defined phrase against the input string.
|
|
23730
|
+
// A case-insensitive match is attempted for ASCII alphbetical characters.
|
|
23731
|
+
// `TLS` is the only operator that explicitly allows empty phrases.
|
|
23732
|
+
// `apg` will fail for empty `TBS`, case-sensitive strings (`''`) or
|
|
23733
|
+
// zero repetitions (`0*0RuleName` or `0RuleName`).
|
|
23734
|
+
const opTLS = (opIndex, phraseIndex) => {
|
|
23735
|
+
let code;
|
|
23736
|
+
const op = opcodes[opIndex];
|
|
23737
|
+
sysData.state = id.NOMATCH;
|
|
23738
|
+
const len = op.string.length;
|
|
23739
|
+
if (len === 0) {
|
|
23740
|
+
/* EMPTY match allowed for TLS */
|
|
23741
|
+
sysData.state = id.EMPTY;
|
|
23742
|
+
return;
|
|
23743
|
+
}
|
|
23744
|
+
if (phraseIndex + len <= chars.length) {
|
|
23745
|
+
for (let i = 0; i < len; i += 1) {
|
|
23746
|
+
code = chars[phraseIndex + i];
|
|
23747
|
+
if (code >= 65 && code <= 90) {
|
|
23748
|
+
code += 32;
|
|
23749
|
+
}
|
|
23750
|
+
if (code !== op.string[i]) {
|
|
23751
|
+
return;
|
|
23752
|
+
}
|
|
23753
|
+
}
|
|
23754
|
+
sysData.state = id.MATCH;
|
|
23755
|
+
sysData.phraseLength = len;
|
|
23756
|
+
} /* implied else NOMATCH */
|
|
23757
|
+
};
|
|
23758
|
+
// Validate the callback function's returned sysData values.
|
|
23759
|
+
// It's the user's responsibility to get it right but `UDT` fails if not.
|
|
23760
|
+
const validateUdtCallbackResult = (udt, sysData, charsLeft) => {
|
|
23761
|
+
if (sysData.phraseLength > charsLeft) {
|
|
23762
|
+
let str = `${thisFileName}opUDT(${udt.name}): callback function error: `;
|
|
23763
|
+
str += `sysData.phraseLength: ${sysData.phraseLength}`;
|
|
23764
|
+
str += ` must be <= remaining chars: ${charsLeft}`;
|
|
23765
|
+
throw new Error(str);
|
|
23766
|
+
}
|
|
23767
|
+
switch (sysData.state) {
|
|
23768
|
+
case id.ACTIVE:
|
|
23769
|
+
throw new Error(`${thisFileName}opUDT(${udt.name}) ACTIVE state return not allowed.`);
|
|
23770
|
+
case id.EMPTY:
|
|
23771
|
+
if (udt.empty) {
|
|
23772
|
+
sysData.phraseLength = 0;
|
|
23773
|
+
} else {
|
|
23774
|
+
throw new Error(`${thisFileName}opUDT(${udt.name}) may not return EMPTY.`);
|
|
23775
|
+
}
|
|
23776
|
+
break;
|
|
23777
|
+
case id.MATCH:
|
|
23778
|
+
if (sysData.phraseLength === 0) {
|
|
23779
|
+
if (udt.empty) {
|
|
23780
|
+
sysData.state = id.EMPTY;
|
|
23781
|
+
} else {
|
|
23782
|
+
throw new Error(`${thisFileName}opUDT(${udt.name}) may not return EMPTY.`);
|
|
23783
|
+
}
|
|
23784
|
+
}
|
|
23785
|
+
break;
|
|
23786
|
+
case id.NOMATCH:
|
|
23787
|
+
sysData.phraseLength = 0;
|
|
23788
|
+
break;
|
|
23789
|
+
default:
|
|
23790
|
+
throw new Error(
|
|
23791
|
+
`${thisFileName}opUDT(${udt.name}): callback function return error. Unrecognized return state: ${sysData.state}`
|
|
23792
|
+
);
|
|
23793
|
+
}
|
|
23794
|
+
};
|
|
23795
|
+
// The `UDT` operator.<br>
|
|
23796
|
+
// Simply calls the user's callback function, but operates like `RNM` with regard to the `AST`
|
|
23797
|
+
// and back referencing.
|
|
23798
|
+
// There is some ambiguity here. `UDT`s act as terminals for phrase recognition but as named rules
|
|
23799
|
+
// for `AST` nodes and back referencing.
|
|
23800
|
+
// See [`ast.js`](./ast.html) for usage.
|
|
23801
|
+
const opUDT = (opIndex, phraseIndex) => {
|
|
23802
|
+
let astLength;
|
|
23803
|
+
let astIndex;
|
|
23804
|
+
let astDefined;
|
|
23805
|
+
const op = opcodes[opIndex];
|
|
23806
|
+
const udt = udts[op.index];
|
|
23807
|
+
sysData.UdtIndex = udt.index;
|
|
23808
|
+
/* ignore AST in look ahead */
|
|
23809
|
+
if (!lookAhead) {
|
|
23810
|
+
astDefined = p.ast && p.ast.udtDefined(op.index);
|
|
23811
|
+
if (astDefined) {
|
|
23812
|
+
astIndex = rules.length + op.index;
|
|
23813
|
+
astLength = p.ast.getLength();
|
|
23814
|
+
p.ast.down(astIndex, udt.name);
|
|
23815
|
+
}
|
|
23816
|
+
}
|
|
23817
|
+
/* call the UDT */
|
|
23818
|
+
const charsLeft = chars.length - phraseIndex;
|
|
23819
|
+
udtCallbacks[op.index](sysData, chars, phraseIndex, userData);
|
|
23820
|
+
validateUdtCallbackResult(udt, sysData, charsLeft);
|
|
23821
|
+
if (!lookAhead) {
|
|
23822
|
+
/* end AST */
|
|
23823
|
+
if (astDefined) {
|
|
23824
|
+
if (sysData.state === id.NOMATCH) {
|
|
23825
|
+
p.ast.setLength(astLength);
|
|
23826
|
+
} else {
|
|
23827
|
+
p.ast.up(astIndex, udt.name, phraseIndex, sysData.phraseLength);
|
|
23828
|
+
}
|
|
23829
|
+
}
|
|
23830
|
+
}
|
|
23831
|
+
};
|
|
23832
|
+
// The `AND` operator.<br>
|
|
23833
|
+
// This is the positive `look ahead` operator.
|
|
23834
|
+
// Executes its single child node, returning the EMPTY state
|
|
23835
|
+
// if it succeedsand NOMATCH if it fails.
|
|
23836
|
+
// *Always* backtracks on any matched phrase and returns EMPTY on success.
|
|
23837
|
+
const opAND = (opIndex, phraseIndex) => {
|
|
23838
|
+
lookAhead += 1;
|
|
23839
|
+
opExecute(opIndex + 1, phraseIndex);
|
|
23840
|
+
lookAhead -= 1;
|
|
23841
|
+
sysData.phraseLength = 0;
|
|
23842
|
+
switch (sysData.state) {
|
|
23843
|
+
case id.EMPTY:
|
|
23844
|
+
sysData.state = id.EMPTY;
|
|
23845
|
+
break;
|
|
23846
|
+
case id.MATCH:
|
|
23847
|
+
sysData.state = id.EMPTY;
|
|
23848
|
+
break;
|
|
23849
|
+
case id.NOMATCH:
|
|
23850
|
+
sysData.state = id.NOMATCH;
|
|
23851
|
+
break;
|
|
23852
|
+
default:
|
|
23853
|
+
throw new Error(`opAND: invalid state ${sysData.state}`);
|
|
23854
|
+
}
|
|
23855
|
+
};
|
|
23856
|
+
// The `NOT` operator.<br>
|
|
23857
|
+
// This is the negative `look ahead` operator.
|
|
23858
|
+
// Executes its single child node, returning the EMPTY state
|
|
23859
|
+
// if it *fails* and NOMATCH if it succeeds.
|
|
23860
|
+
// *Always* backtracks on any matched phrase and returns EMPTY
|
|
23861
|
+
// on success (failure of its child node).
|
|
23862
|
+
const opNOT = (opIndex, phraseIndex) => {
|
|
23863
|
+
lookAhead += 1;
|
|
23864
|
+
opExecute(opIndex + 1, phraseIndex);
|
|
23865
|
+
lookAhead -= 1;
|
|
23866
|
+
sysData.phraseLength = 0;
|
|
23867
|
+
switch (sysData.state) {
|
|
23868
|
+
case id.EMPTY:
|
|
23869
|
+
case id.MATCH:
|
|
23870
|
+
sysData.state = id.NOMATCH;
|
|
23871
|
+
break;
|
|
23872
|
+
case id.NOMATCH:
|
|
23873
|
+
sysData.state = id.EMPTY;
|
|
23874
|
+
break;
|
|
23875
|
+
default:
|
|
23876
|
+
throw new Error(`opNOT: invalid state ${sysData.state}`);
|
|
23877
|
+
}
|
|
23878
|
+
};
|
|
23879
|
+
|
|
23880
|
+
const opExecute = (opIndex, phraseIndex) => {
|
|
23881
|
+
const functionName = `${thisFileName}opExecute(): `;
|
|
23882
|
+
const op = opcodes[opIndex];
|
|
23883
|
+
nodeHits += 1;
|
|
23884
|
+
if (treeDepth > maxTreeDepth) {
|
|
23885
|
+
maxTreeDepth = treeDepth;
|
|
23886
|
+
}
|
|
23887
|
+
treeDepth += 1;
|
|
23888
|
+
sysData.refresh();
|
|
23889
|
+
if (p.trace) {
|
|
23890
|
+
p.trace.down(op, phraseIndex);
|
|
23891
|
+
}
|
|
23892
|
+
switch (op.type) {
|
|
23893
|
+
case id.ALT:
|
|
23894
|
+
opALT(opIndex, phraseIndex);
|
|
23895
|
+
break;
|
|
23896
|
+
case id.CAT:
|
|
23897
|
+
opCAT(opIndex, phraseIndex);
|
|
23898
|
+
break;
|
|
23899
|
+
case id.REP:
|
|
23900
|
+
opREP(opIndex, phraseIndex);
|
|
23901
|
+
break;
|
|
23902
|
+
case id.RNM:
|
|
23903
|
+
opRNM(opIndex, phraseIndex);
|
|
23904
|
+
break;
|
|
23905
|
+
case id.TRG:
|
|
23906
|
+
opTRG(opIndex, phraseIndex);
|
|
23907
|
+
break;
|
|
23908
|
+
case id.TBS:
|
|
23909
|
+
opTBS(opIndex, phraseIndex);
|
|
23910
|
+
break;
|
|
23911
|
+
case id.TLS:
|
|
23912
|
+
opTLS(opIndex, phraseIndex);
|
|
23913
|
+
break;
|
|
23914
|
+
case id.UDT:
|
|
23915
|
+
opUDT(opIndex, phraseIndex);
|
|
23916
|
+
break;
|
|
23917
|
+
case id.AND:
|
|
23918
|
+
opAND(opIndex, phraseIndex);
|
|
23919
|
+
break;
|
|
23920
|
+
case id.NOT:
|
|
23921
|
+
opNOT(opIndex, phraseIndex);
|
|
23922
|
+
break;
|
|
23923
|
+
default:
|
|
23924
|
+
throw new Error(`${functionName}unrecognized operator`);
|
|
23925
|
+
}
|
|
23926
|
+
if (!lookAhead) {
|
|
23927
|
+
if (phraseIndex + sysData.phraseLength > maxMatched) {
|
|
23928
|
+
maxMatched = phraseIndex + sysData.phraseLength;
|
|
23929
|
+
}
|
|
23930
|
+
}
|
|
23931
|
+
if (p.stats) {
|
|
23932
|
+
p.stats.collect(op, sysData);
|
|
23933
|
+
}
|
|
23934
|
+
if (p.trace) {
|
|
23935
|
+
p.trace.up(op, sysData.state, phraseIndex, sysData.phraseLength);
|
|
23936
|
+
}
|
|
23937
|
+
treeDepth -= 1;
|
|
23938
|
+
};
|
|
23939
|
+
};
|
|
23940
|
+
|
|
23941
|
+
const Ast = function fnast() {
|
|
23942
|
+
const thisFileName = 'parser.js: Ast()): ';
|
|
23943
|
+
const id = identifiers;
|
|
23944
|
+
const utils = utilities;
|
|
23945
|
+
const a = this;
|
|
23946
|
+
let rules = undefined;
|
|
23947
|
+
let udts = undefined;
|
|
23948
|
+
let chars = undefined;
|
|
23949
|
+
let nodeCount = 0;
|
|
23950
|
+
const nodeCallbacks = [];
|
|
23951
|
+
const stack = [];
|
|
23952
|
+
const records = [];
|
|
23953
|
+
a.callbacks = [];
|
|
23954
|
+
/* called by the parser to initialize the AST with the rules, UDTs and the input characters */
|
|
23955
|
+
a.init = (rulesIn, udtsIn, charsIn) => {
|
|
23956
|
+
stack.length = 0;
|
|
23957
|
+
records.length = 0;
|
|
23958
|
+
nodeCount = 0;
|
|
23959
|
+
rules = rulesIn;
|
|
23960
|
+
udts = udtsIn;
|
|
23961
|
+
chars = charsIn;
|
|
23962
|
+
let i;
|
|
23963
|
+
const list = [];
|
|
23964
|
+
for (i = 0; i < rules.length; i += 1) {
|
|
23965
|
+
list.push(rules[i].lower);
|
|
23966
|
+
}
|
|
23967
|
+
for (i = 0; i < udts.length; i += 1) {
|
|
23968
|
+
list.push(udts[i].lower);
|
|
23969
|
+
}
|
|
23970
|
+
nodeCount = rules.length + udts.length;
|
|
23971
|
+
for (i = 0; i < nodeCount; i += 1) {
|
|
23972
|
+
nodeCallbacks[i] = undefined;
|
|
23973
|
+
}
|
|
23974
|
+
for (const index in a.callbacks) {
|
|
23975
|
+
if (a.callbacks.hasOwnProperty(index)) {
|
|
23976
|
+
const lower = index.toLowerCase();
|
|
23977
|
+
i = list.indexOf(lower);
|
|
23978
|
+
if (i < 0) {
|
|
23979
|
+
throw new Error(`${thisFileName}init: node '${index}' not a rule or udt name`);
|
|
23980
|
+
}
|
|
23981
|
+
nodeCallbacks[i] = a.callbacks[index];
|
|
23982
|
+
}
|
|
23983
|
+
}
|
|
23984
|
+
};
|
|
23985
|
+
/* AST node rule callbacks - called by the parser's `RNM` operator */
|
|
23986
|
+
a.ruleDefined = (index) => !!nodeCallbacks[index];
|
|
23987
|
+
/* AST node UDT callbacks - called by the parser's `UDT` operator */
|
|
23988
|
+
a.udtDefined = (index) => !!nodeCallbacks[rules.length + index];
|
|
23989
|
+
/* called by the parser's `RNM` & `UDT` operators
|
|
23990
|
+
builds a record for the downward traversal of the node */
|
|
23991
|
+
a.down = (callbackIndex, name) => {
|
|
23992
|
+
const thisIndex = records.length;
|
|
23993
|
+
stack.push(thisIndex);
|
|
23994
|
+
records.push({
|
|
23995
|
+
name,
|
|
23996
|
+
thisIndex,
|
|
23997
|
+
thatIndex: undefined,
|
|
23998
|
+
state: id.SEM_PRE,
|
|
23999
|
+
callbackIndex,
|
|
24000
|
+
phraseIndex: undefined,
|
|
24001
|
+
phraseLength: undefined,
|
|
24002
|
+
stack: stack.length,
|
|
24003
|
+
});
|
|
24004
|
+
return thisIndex;
|
|
24005
|
+
};
|
|
24006
|
+
/* called by the parser's `RNM` & `UDT` operators */
|
|
24007
|
+
/* builds a record for the upward traversal of the node */
|
|
24008
|
+
a.up = (callbackIndex, name, phraseIndex, phraseLength) => {
|
|
24009
|
+
const thisIndex = records.length;
|
|
24010
|
+
const thatIndex = stack.pop();
|
|
24011
|
+
records.push({
|
|
24012
|
+
name,
|
|
24013
|
+
thisIndex,
|
|
24014
|
+
thatIndex,
|
|
24015
|
+
state: id.SEM_POST,
|
|
24016
|
+
callbackIndex,
|
|
24017
|
+
phraseIndex,
|
|
24018
|
+
phraseLength,
|
|
24019
|
+
stack: stack.length,
|
|
24020
|
+
});
|
|
24021
|
+
records[thatIndex].thatIndex = thisIndex;
|
|
24022
|
+
records[thatIndex].phraseIndex = phraseIndex;
|
|
24023
|
+
records[thatIndex].phraseLength = phraseLength;
|
|
24024
|
+
return thisIndex;
|
|
24025
|
+
};
|
|
24026
|
+
// Called by the user to translate the AST.
|
|
24027
|
+
// Translate means to associate or apply some semantic action to the
|
|
24028
|
+
// phrases that were syntactically matched to the AST nodes according
|
|
24029
|
+
// to the defining grammar.
|
|
24030
|
+
// ```
|
|
24031
|
+
// data - optional user-defined data
|
|
24032
|
+
// passed to the callback functions by the translator
|
|
24033
|
+
// ```
|
|
24034
|
+
a.translate = (data) => {
|
|
24035
|
+
let ret;
|
|
24036
|
+
let callback;
|
|
24037
|
+
let record;
|
|
24038
|
+
for (let i = 0; i < records.length; i += 1) {
|
|
24039
|
+
record = records[i];
|
|
24040
|
+
callback = nodeCallbacks[record.callbackIndex];
|
|
24041
|
+
if (callback) {
|
|
24042
|
+
if (record.state === id.SEM_PRE) {
|
|
24043
|
+
callback(id.SEM_PRE, chars, record.phraseIndex, record.phraseLength, data);
|
|
24044
|
+
} else if (callback) {
|
|
24045
|
+
callback(id.SEM_POST, chars, record.phraseIndex, record.phraseLength, data);
|
|
24046
|
+
}
|
|
24047
|
+
}
|
|
24048
|
+
}
|
|
24049
|
+
};
|
|
24050
|
+
/* called by the parser to reset the length of the records array */
|
|
24051
|
+
/* necessary on backtracking */
|
|
24052
|
+
a.setLength = (length) => {
|
|
24053
|
+
records.length = length;
|
|
24054
|
+
if (length > 0) {
|
|
24055
|
+
stack.length = records[length - 1].stack;
|
|
24056
|
+
} else {
|
|
24057
|
+
stack.length = 0;
|
|
24058
|
+
}
|
|
24059
|
+
};
|
|
24060
|
+
/* called by the parser to get the length of the records array */
|
|
24061
|
+
a.getLength = () => records.length;
|
|
24062
|
+
/* helper for XML display */
|
|
24063
|
+
function indent(n) {
|
|
24064
|
+
let ret = '';
|
|
24065
|
+
while (n-- > 0) {
|
|
24066
|
+
ret += ' ';
|
|
24067
|
+
}
|
|
24068
|
+
return ret;
|
|
24069
|
+
}
|
|
24070
|
+
// Generate an `XML` version of the AST.
|
|
24071
|
+
// Useful if you want to use a special or favorite XML parser to translate the
|
|
24072
|
+
// AST. Node data are JavaScript strings.
|
|
24073
|
+
a.toXml = () => {
|
|
24074
|
+
let xml = '';
|
|
24075
|
+
let depth = 0;
|
|
24076
|
+
xml += '<?xml version="1.0" encoding="utf-8"?>\n';
|
|
24077
|
+
xml += `<root nodes="${records.length / 2}" characters="${chars.length}">\n`;
|
|
24078
|
+
xml += `<!-- input string -->\n`;
|
|
24079
|
+
xml += indent(depth + 2);
|
|
24080
|
+
xml += utils.charsToString(chars);
|
|
24081
|
+
xml += '\n';
|
|
24082
|
+
records.forEach((rec) => {
|
|
24083
|
+
if (rec.state === id.SEM_PRE) {
|
|
24084
|
+
depth += 1;
|
|
24085
|
+
xml += indent(depth);
|
|
24086
|
+
xml += `<node name="${rec.name}" index="${rec.phraseIndex}" length="${rec.phraseLength}">\n`;
|
|
24087
|
+
xml += indent(depth + 2);
|
|
24088
|
+
xml += utils.charsToString(chars, rec.phraseIndex, rec.phraseLength);
|
|
24089
|
+
xml += '\n';
|
|
24090
|
+
} else {
|
|
24091
|
+
xml += indent(depth);
|
|
24092
|
+
xml += `</node><!-- name="${rec.name}" -->\n`;
|
|
24093
|
+
depth -= 1;
|
|
24094
|
+
}
|
|
24095
|
+
});
|
|
24096
|
+
|
|
24097
|
+
xml += '</root>\n';
|
|
24098
|
+
return xml;
|
|
24099
|
+
};
|
|
24100
|
+
};
|
|
24101
|
+
|
|
24102
|
+
const Trace = function fntrace() {
|
|
24103
|
+
const id = identifiers;
|
|
24104
|
+
const utils = utilities;
|
|
24105
|
+
const thisFile = 'parser.js: Trace(): ';
|
|
24106
|
+
let chars = undefined;
|
|
24107
|
+
let rules = undefined;
|
|
24108
|
+
let udts = undefined;
|
|
24109
|
+
let out = '';
|
|
24110
|
+
let treeDepth = 0;
|
|
24111
|
+
const MAX_PHRASE = 100;
|
|
24112
|
+
const t = this;
|
|
24113
|
+
const indent = (n) => {
|
|
24114
|
+
let ret = '';
|
|
24115
|
+
let count = 0;
|
|
24116
|
+
if (n >= 0) {
|
|
24117
|
+
while (n--) {
|
|
24118
|
+
count += 1;
|
|
24119
|
+
if (count === 5) {
|
|
24120
|
+
ret += '|';
|
|
24121
|
+
count = 0;
|
|
24122
|
+
} else {
|
|
24123
|
+
ret += '.';
|
|
24124
|
+
}
|
|
24125
|
+
}
|
|
24126
|
+
}
|
|
24127
|
+
return ret;
|
|
24128
|
+
};
|
|
24129
|
+
t.init = (r, u, c) => {
|
|
24130
|
+
rules = r;
|
|
24131
|
+
udts = u;
|
|
24132
|
+
chars = c;
|
|
24133
|
+
};
|
|
24134
|
+
const opName = (op) => {
|
|
24135
|
+
let name;
|
|
24136
|
+
switch (op.type) {
|
|
24137
|
+
case id.ALT:
|
|
24138
|
+
name = 'ALT';
|
|
24139
|
+
break;
|
|
24140
|
+
case id.CAT:
|
|
24141
|
+
name = 'CAT';
|
|
24142
|
+
break;
|
|
24143
|
+
case id.REP:
|
|
24144
|
+
if (op.max === Infinity) {
|
|
24145
|
+
name = `REP(${op.min},inf)`;
|
|
24146
|
+
} else {
|
|
24147
|
+
name = `REP(${op.min},${op.max})`;
|
|
24148
|
+
}
|
|
24149
|
+
break;
|
|
24150
|
+
case id.RNM:
|
|
24151
|
+
name = `RNM(${rules[op.index].name})`;
|
|
24152
|
+
break;
|
|
24153
|
+
case id.TRG:
|
|
24154
|
+
name = `TRG(${op.min},${op.max})`;
|
|
24155
|
+
break;
|
|
24156
|
+
case id.TBS:
|
|
24157
|
+
if (op.string.length > 6) {
|
|
24158
|
+
name = `TBS(${utils.charsToString(op.string, 0, 3)}...)`;
|
|
24159
|
+
} else {
|
|
24160
|
+
name = `TBS(${utils.charsToString(op.string, 0, 6)})`;
|
|
24161
|
+
}
|
|
24162
|
+
break;
|
|
24163
|
+
case id.TLS:
|
|
24164
|
+
if (op.string.length > 6) {
|
|
24165
|
+
name = `TLS(${utils.charsToString(op.string, 0, 3)}...)`;
|
|
24166
|
+
} else {
|
|
24167
|
+
name = `TLS(${utils.charsToString(op.string, 0, 6)})`;
|
|
24168
|
+
}
|
|
24169
|
+
break;
|
|
24170
|
+
case id.UDT:
|
|
24171
|
+
name = `UDT(${udts[op.index].name})`;
|
|
24172
|
+
break;
|
|
24173
|
+
case id.AND:
|
|
24174
|
+
name = 'AND';
|
|
24175
|
+
break;
|
|
24176
|
+
case id.NOT:
|
|
24177
|
+
name = 'NOT';
|
|
24178
|
+
break;
|
|
24179
|
+
default:
|
|
24180
|
+
throw new Error(`${thisFile}Trace: opName: unrecognized opcode`);
|
|
24181
|
+
}
|
|
24182
|
+
return name;
|
|
24183
|
+
};
|
|
24184
|
+
t.down = (op, offset) => {
|
|
24185
|
+
const lead = indent(treeDepth);
|
|
24186
|
+
const len = Math.min(MAX_PHRASE, chars.length - offset);
|
|
24187
|
+
let phrase = utils.charsToString(chars, offset, len);
|
|
24188
|
+
if (len < chars.length - offset) {
|
|
24189
|
+
phrase += '...';
|
|
24190
|
+
}
|
|
24191
|
+
phrase = `${lead}|-|[${opName(op)}]${phrase}\n`;
|
|
24192
|
+
out += phrase;
|
|
24193
|
+
treeDepth += 1;
|
|
24194
|
+
};
|
|
24195
|
+
t.up = (op, state, offset, phraseLength) => {
|
|
24196
|
+
const thisFunc = `${thisFile}trace.up: `;
|
|
24197
|
+
treeDepth -= 1;
|
|
24198
|
+
const lead = indent(treeDepth);
|
|
24199
|
+
let len;
|
|
24200
|
+
let phrase;
|
|
24201
|
+
let st;
|
|
24202
|
+
switch (state) {
|
|
24203
|
+
case id.EMPTY:
|
|
24204
|
+
st = '|E|';
|
|
24205
|
+
phrase = `''`;
|
|
24206
|
+
break;
|
|
24207
|
+
case id.MATCH:
|
|
24208
|
+
st = '|M|';
|
|
24209
|
+
len = Math.min(MAX_PHRASE, phraseLength);
|
|
24210
|
+
if (len < phraseLength) {
|
|
24211
|
+
phrase = `'${utils.charsToString(chars, offset, len)}...'`;
|
|
24212
|
+
} else {
|
|
24213
|
+
phrase = `'${utils.charsToString(chars, offset, len)}'`;
|
|
24214
|
+
}
|
|
24215
|
+
break;
|
|
24216
|
+
case id.NOMATCH:
|
|
24217
|
+
st = '|N|';
|
|
24218
|
+
phrase = '';
|
|
24219
|
+
break;
|
|
24220
|
+
default:
|
|
24221
|
+
throw new Error(`${thisFunc} unrecognized state`);
|
|
24222
|
+
}
|
|
24223
|
+
phrase = `${lead}${st}[${opName(op)}]${phrase}\n`;
|
|
24224
|
+
out += phrase;
|
|
24225
|
+
};
|
|
24226
|
+
t.displayTrace = () => out;
|
|
24227
|
+
};
|
|
24228
|
+
|
|
24229
|
+
const Stats = function fnstats() {
|
|
24230
|
+
const id = identifiers;
|
|
24231
|
+
const thisFileName = 'parser.js: Stats(): ';
|
|
24232
|
+
let rules;
|
|
24233
|
+
let udts;
|
|
24234
|
+
let totals;
|
|
24235
|
+
const stats = [];
|
|
24236
|
+
const ruleStats = [];
|
|
24237
|
+
const udtStats = [];
|
|
24238
|
+
/* called by parser to initialize the stats */
|
|
24239
|
+
this.init = (r, u) => {
|
|
24240
|
+
rules = r;
|
|
24241
|
+
udts = u;
|
|
24242
|
+
clear();
|
|
24243
|
+
};
|
|
24244
|
+
/* This function is the main interaction with the parser. */
|
|
24245
|
+
/* The parser calls it after each node has been traversed. */
|
|
24246
|
+
this.collect = (op, sys) => {
|
|
24247
|
+
incStat(totals, sys.state, sys.phraseLength);
|
|
24248
|
+
incStat(stats[op.type], sys.state, sys.phraseLength);
|
|
24249
|
+
if (op.type === id.RNM) {
|
|
24250
|
+
incStat(ruleStats[op.index], sys.state, sys.phraseLength);
|
|
24251
|
+
}
|
|
24252
|
+
if (op.type === id.UDT) {
|
|
24253
|
+
incStat(udtStats[op.index], sys.state, sys.phraseLength);
|
|
24254
|
+
}
|
|
24255
|
+
};
|
|
24256
|
+
this.displayStats = () => {
|
|
24257
|
+
let out = '';
|
|
24258
|
+
const totals = {
|
|
24259
|
+
match: 0,
|
|
24260
|
+
empty: 0,
|
|
24261
|
+
nomatch: 0,
|
|
24262
|
+
total: 0,
|
|
24263
|
+
};
|
|
24264
|
+
const displayRow = (op, m, e, n, t) => {
|
|
24265
|
+
totals.match += m;
|
|
24266
|
+
totals.empty += e;
|
|
24267
|
+
totals.nomatch += n;
|
|
24268
|
+
totals.total += t;
|
|
24269
|
+
const mm = normalize(m);
|
|
24270
|
+
const ee = normalize(e);
|
|
24271
|
+
const nn = normalize(n);
|
|
24272
|
+
const tt = normalize(t);
|
|
24273
|
+
return `${op} | ${mm} | ${ee} | ${nn} | ${tt} |\n`;
|
|
24274
|
+
};
|
|
24275
|
+
out += ' OPERATOR STATS\n';
|
|
24276
|
+
out += ' | MATCH | EMPTY | NOMATCH | TOTAL |\n';
|
|
24277
|
+
out += displayRow(' ALT', stats[id.ALT].match, stats[id.ALT].empty, stats[id.ALT].nomatch, stats[id.ALT].total);
|
|
24278
|
+
out += displayRow(' CAT', stats[id.CAT].match, stats[id.CAT].empty, stats[id.CAT].nomatch, stats[id.CAT].total);
|
|
24279
|
+
out += displayRow(' REP', stats[id.REP].match, stats[id.REP].empty, stats[id.REP].nomatch, stats[id.REP].total);
|
|
24280
|
+
out += displayRow(' RNM', stats[id.RNM].match, stats[id.RNM].empty, stats[id.RNM].nomatch, stats[id.RNM].total);
|
|
24281
|
+
out += displayRow(' TRG', stats[id.TRG].match, stats[id.TRG].empty, stats[id.TRG].nomatch, stats[id.TRG].total);
|
|
24282
|
+
out += displayRow(' TBS', stats[id.TBS].match, stats[id.TBS].empty, stats[id.TBS].nomatch, stats[id.TBS].total);
|
|
24283
|
+
out += displayRow(' TLS', stats[id.TLS].match, stats[id.TLS].empty, stats[id.TLS].nomatch, stats[id.TLS].total);
|
|
24284
|
+
out += displayRow(' UDT', stats[id.UDT].match, stats[id.UDT].empty, stats[id.UDT].nomatch, stats[id.UDT].total);
|
|
24285
|
+
out += displayRow(' AND', stats[id.AND].match, stats[id.AND].empty, stats[id.AND].nomatch, stats[id.AND].total);
|
|
24286
|
+
out += displayRow(' NOT', stats[id.NOT].match, stats[id.NOT].empty, stats[id.NOT].nomatch, stats[id.NOT].total);
|
|
24287
|
+
out += displayRow('TOTAL', totals.match, totals.empty, totals.nomatch, totals.total);
|
|
24288
|
+
return out;
|
|
24289
|
+
};
|
|
24290
|
+
/*
|
|
24291
|
+
Display rule/udt
|
|
24292
|
+
*/
|
|
24293
|
+
this.displayHits = (type) => {
|
|
24294
|
+
let out = '';
|
|
24295
|
+
const displayRow = (m, e, n, t, name) => {
|
|
24296
|
+
totals.match += m;
|
|
24297
|
+
totals.empty += e;
|
|
24298
|
+
totals.nomatch += n;
|
|
24299
|
+
totals.total += t;
|
|
24300
|
+
const mm = normalize(m);
|
|
24301
|
+
const ee = normalize(e);
|
|
24302
|
+
const nn = normalize(n);
|
|
24303
|
+
const tt = normalize(t);
|
|
24304
|
+
return `| ${mm} | ${ee} | ${nn} | ${tt} | ${name}\n`;
|
|
24305
|
+
};
|
|
24306
|
+
if (typeof type === 'string' && type.toLowerCase()[0] === 'a') {
|
|
24307
|
+
ruleStats.sort(sortAlpha);
|
|
24308
|
+
udtStats.sort(sortAlpha);
|
|
24309
|
+
out += ' RULES/UDTS ALPHABETICALLY\n';
|
|
24310
|
+
} else if (typeof type === 'string' && type.toLowerCase()[0] === 'i') {
|
|
24311
|
+
ruleStats.sort(sortIndex);
|
|
24312
|
+
udtStats.sort(sortIndex);
|
|
24313
|
+
out += ' RULES/UDTS BY INDEX\n';
|
|
24314
|
+
} else {
|
|
24315
|
+
ruleStats.sort(sortHits);
|
|
24316
|
+
udtStats.sort(sortHits);
|
|
24317
|
+
out += ' RULES/UDTS BY HIT COUNT\n';
|
|
24318
|
+
}
|
|
24319
|
+
out += '| MATCH | EMPTY | NOMATCH | TOTAL | NAME\n';
|
|
24320
|
+
for (let i = 0; i < ruleStats.length; i += 1) {
|
|
24321
|
+
let r = ruleStats[i];
|
|
24322
|
+
if (r.total) {
|
|
24323
|
+
out += displayRow(r.match, r.empty, r.nomatch, r.total, r.name);
|
|
24324
|
+
}
|
|
24325
|
+
}
|
|
24326
|
+
for (let i = 0; i < udtStats.length; i += 1) {
|
|
24327
|
+
let r = udtStats[i];
|
|
24328
|
+
if (r.total) {
|
|
24329
|
+
out += displayRow(r.match, r.empty, r.nomatch, r.total, r.name);
|
|
24330
|
+
}
|
|
24331
|
+
}
|
|
24332
|
+
return out;
|
|
24333
|
+
};
|
|
24334
|
+
const normalize = (n) => {
|
|
24335
|
+
if (n < 10) {
|
|
24336
|
+
return ` ${n}`;
|
|
24337
|
+
}
|
|
24338
|
+
if (n < 100) {
|
|
24339
|
+
return ` ${n}`;
|
|
24340
|
+
}
|
|
24341
|
+
if (n < 1000) {
|
|
24342
|
+
return ` ${n}`;
|
|
24343
|
+
}
|
|
24344
|
+
if (n < 10000) {
|
|
24345
|
+
return ` ${n}`;
|
|
24346
|
+
}
|
|
24347
|
+
if (n < 100000) {
|
|
24348
|
+
return ` ${n}`;
|
|
24349
|
+
}
|
|
24350
|
+
if (n < 1000000) {
|
|
24351
|
+
return ` ${n}`;
|
|
24352
|
+
}
|
|
24353
|
+
return `${n}`;
|
|
24354
|
+
};
|
|
24355
|
+
const sortAlpha = (lhs, rhs) => {
|
|
24356
|
+
if (lhs.lower < rhs.lower) {
|
|
24357
|
+
return -1;
|
|
24358
|
+
}
|
|
24359
|
+
if (lhs.lower > rhs.lower) {
|
|
24360
|
+
return 1;
|
|
24361
|
+
}
|
|
24362
|
+
return 0;
|
|
24363
|
+
};
|
|
24364
|
+
const sortHits = (lhs, rhs) => {
|
|
24365
|
+
if (lhs.total < rhs.total) {
|
|
24366
|
+
return 1;
|
|
24367
|
+
}
|
|
24368
|
+
if (lhs.total > rhs.total) {
|
|
24369
|
+
return -1;
|
|
24370
|
+
}
|
|
24371
|
+
return sortAlpha(lhs, rhs);
|
|
24372
|
+
};
|
|
24373
|
+
const sortIndex = (lhs, rhs) => {
|
|
24374
|
+
if (lhs.index < rhs.index) {
|
|
24375
|
+
return -1;
|
|
24376
|
+
}
|
|
24377
|
+
if (lhs.index > rhs.index) {
|
|
24378
|
+
return 1;
|
|
24379
|
+
}
|
|
24380
|
+
return 0;
|
|
24381
|
+
};
|
|
24382
|
+
const EmptyStat = function fnempty() {
|
|
24383
|
+
this.empty = 0;
|
|
24384
|
+
this.match = 0;
|
|
24385
|
+
this.nomatch = 0;
|
|
24386
|
+
this.total = 0;
|
|
24387
|
+
};
|
|
24388
|
+
/* Zero out all stats */
|
|
24389
|
+
const clear = () => {
|
|
24390
|
+
stats.length = 0;
|
|
24391
|
+
totals = new EmptyStat();
|
|
24392
|
+
stats[id.ALT] = new EmptyStat();
|
|
24393
|
+
stats[id.CAT] = new EmptyStat();
|
|
24394
|
+
stats[id.REP] = new EmptyStat();
|
|
24395
|
+
stats[id.RNM] = new EmptyStat();
|
|
24396
|
+
stats[id.TRG] = new EmptyStat();
|
|
24397
|
+
stats[id.TBS] = new EmptyStat();
|
|
24398
|
+
stats[id.TLS] = new EmptyStat();
|
|
24399
|
+
stats[id.UDT] = new EmptyStat();
|
|
24400
|
+
stats[id.AND] = new EmptyStat();
|
|
24401
|
+
stats[id.NOT] = new EmptyStat();
|
|
24402
|
+
ruleStats.length = 0;
|
|
24403
|
+
for (let i = 0; i < rules.length; i += 1) {
|
|
24404
|
+
ruleStats.push({
|
|
24405
|
+
empty: 0,
|
|
24406
|
+
match: 0,
|
|
24407
|
+
nomatch: 0,
|
|
24408
|
+
total: 0,
|
|
24409
|
+
name: rules[i].name,
|
|
24410
|
+
lower: rules[i].lower,
|
|
24411
|
+
index: rules[i].index,
|
|
24412
|
+
});
|
|
24413
|
+
}
|
|
24414
|
+
if (udts.length > 0) {
|
|
24415
|
+
udtStats.length = 0;
|
|
24416
|
+
for (let i = 0; i < udts.length; i += 1) {
|
|
24417
|
+
udtStats.push({
|
|
24418
|
+
empty: 0,
|
|
24419
|
+
match: 0,
|
|
24420
|
+
nomatch: 0,
|
|
24421
|
+
total: 0,
|
|
24422
|
+
name: udts[i].name,
|
|
24423
|
+
lower: udts[i].lower,
|
|
24424
|
+
index: udts[i].index,
|
|
24425
|
+
});
|
|
24426
|
+
}
|
|
24427
|
+
}
|
|
24428
|
+
};
|
|
24429
|
+
/* increment the designated operator hit count by one */
|
|
24430
|
+
const incStat = (stat, state) => {
|
|
24431
|
+
stat.total += 1;
|
|
24432
|
+
switch (state) {
|
|
24433
|
+
case id.EMPTY:
|
|
24434
|
+
stat.empty += 1;
|
|
24435
|
+
break;
|
|
24436
|
+
case id.MATCH:
|
|
24437
|
+
stat.match += 1;
|
|
24438
|
+
break;
|
|
24439
|
+
case id.NOMATCH:
|
|
24440
|
+
stat.nomatch += 1;
|
|
24441
|
+
break;
|
|
24442
|
+
default:
|
|
24443
|
+
throw new Error(`${thisFileName}collect(): incStat(): unrecognized state: ${state}`);
|
|
24444
|
+
}
|
|
24445
|
+
};
|
|
24446
|
+
};
|
|
24447
|
+
|
|
24448
|
+
const utilities = {
|
|
24449
|
+
// utility functions
|
|
24450
|
+
stringToChars: (string) => [...string].map((cp) => cp.codePointAt(0)),
|
|
24451
|
+
charsToString: (chars, beg, len) => {
|
|
24452
|
+
let subChars = chars;
|
|
24453
|
+
while (1) {
|
|
24454
|
+
if (beg === undefined || beg < 0) {
|
|
24455
|
+
break;
|
|
24456
|
+
}
|
|
24457
|
+
if (len === undefined) {
|
|
24458
|
+
subChars = chars.slice(beg);
|
|
24459
|
+
break;
|
|
24460
|
+
}
|
|
24461
|
+
if (len <= 0) {
|
|
24462
|
+
// always an empty string
|
|
24463
|
+
return '';
|
|
24464
|
+
}
|
|
24465
|
+
subChars = chars.slice(beg, beg + len);
|
|
24466
|
+
break;
|
|
24467
|
+
}
|
|
24468
|
+
return String.fromCodePoint(...subChars);
|
|
24469
|
+
},
|
|
24470
|
+
};
|
|
24471
|
+
|
|
24472
|
+
const identifiers = {
|
|
24473
|
+
// Identifies the operator type.
|
|
24474
|
+
// NB: These must match the values in apg-js 4.3.0, apg-lib/identifiers.
|
|
24475
|
+
/* the original ABNF operators */
|
|
24476
|
+
ALT: 1 /* alternation */,
|
|
24477
|
+
CAT: 2 /* concatenation */,
|
|
24478
|
+
REP: 3 /* repetition */,
|
|
24479
|
+
RNM: 4 /* rule name */,
|
|
24480
|
+
TRG: 5 /* terminal range */,
|
|
24481
|
+
TBS: 6 /* terminal binary string, case sensitive */,
|
|
24482
|
+
TLS: 7 /* terminal literal string, case insensitive */,
|
|
24483
|
+
/* the super set, SABNF operators */
|
|
24484
|
+
UDT: 11 /* user-defined terminal */,
|
|
24485
|
+
AND: 12 /* positive look ahead */,
|
|
24486
|
+
NOT: 13 /* negative look ahead */,
|
|
24487
|
+
// Used by the parser and the user's `RNM` and `UDT` callback functions.
|
|
24488
|
+
// Identifies the parser state as it traverses the parse tree nodes.
|
|
24489
|
+
// - *ACTIVE* - indicates the downward direction through the parse tree node.
|
|
24490
|
+
// - *MATCH* - indicates the upward direction and a phrase, of length \> 0, has been successfully matched
|
|
24491
|
+
// - *EMPTY* - indicates the upward direction and a phrase, of length = 0, has been successfully matched
|
|
24492
|
+
// - *NOMATCH* - indicates the upward direction and the parser failed to match any phrase at all
|
|
24493
|
+
ACTIVE: 100,
|
|
24494
|
+
MATCH: 101,
|
|
24495
|
+
EMPTY: 102,
|
|
24496
|
+
NOMATCH: 103,
|
|
24497
|
+
// Used by [`AST` translator](./ast.html) (semantic analysis) and the user's callback functions
|
|
24498
|
+
// to indicate the direction of flow through the `AST` nodes.
|
|
24499
|
+
// - *SEM_PRE* - indicates the downward (pre-branch) direction through the `AST` node.
|
|
24500
|
+
// - *SEM_POST* - indicates the upward (post-branch) direction through the `AST` node.
|
|
24501
|
+
SEM_PRE: 200,
|
|
24502
|
+
SEM_POST: 201,
|
|
24503
|
+
// Ignored. Retained for backwords compatibility.
|
|
24504
|
+
SEM_OK: 300,
|
|
24505
|
+
idName: (s) => {
|
|
24506
|
+
switch (s) {
|
|
24507
|
+
case identifiers.ALT:
|
|
24508
|
+
return 'ALT';
|
|
24509
|
+
case identifiers.CAT:
|
|
24510
|
+
return 'CAT';
|
|
24511
|
+
case identifiers.REP:
|
|
24512
|
+
return 'REP';
|
|
24513
|
+
case identifiers.RNM:
|
|
24514
|
+
return 'RNM';
|
|
24515
|
+
case identifiers.TRG:
|
|
24516
|
+
return 'TRG';
|
|
24517
|
+
case identifiers.TBS:
|
|
24518
|
+
return 'TBS';
|
|
24519
|
+
case identifiers.TLS:
|
|
24520
|
+
return 'TLS';
|
|
24521
|
+
case identifiers.UDT:
|
|
24522
|
+
return 'UDT';
|
|
24523
|
+
case identifiers.AND:
|
|
24524
|
+
return 'AND';
|
|
24525
|
+
case identifiers.NOT:
|
|
24526
|
+
return 'NOT';
|
|
24527
|
+
case identifiers.ACTIVE:
|
|
24528
|
+
return 'ACTIVE';
|
|
24529
|
+
case identifiers.EMPTY:
|
|
24530
|
+
return 'EMPTY';
|
|
24531
|
+
case identifiers.MATCH:
|
|
24532
|
+
return 'MATCH';
|
|
24533
|
+
case identifiers.NOMATCH:
|
|
24534
|
+
return 'NOMATCH';
|
|
24535
|
+
case identifiers.SEM_PRE:
|
|
24536
|
+
return 'SEM_PRE';
|
|
24537
|
+
case identifiers.SEM_POST:
|
|
24538
|
+
return 'SEM_POST';
|
|
24539
|
+
case identifiers.SEM_OK:
|
|
24540
|
+
return 'SEM_OK';
|
|
24541
|
+
default:
|
|
24542
|
+
return 'UNRECOGNIZED STATE';
|
|
24543
|
+
}
|
|
24544
|
+
},
|
|
24545
|
+
};
|
|
24546
24546
|
|
|
24547
24547
|
|
|
24548
24548
|
/***/ }),
|