quantible 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +130 -27
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +3 -3
- package/dist/index.d.ts +3 -3
- package/dist/index.iife.min.js +1 -1
- package/dist/index.iife.min.js.map +1 -1
- package/dist/index.js +130 -27
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -293,6 +293,18 @@ var units = {
|
|
|
293
293
|
Mbps: { singular: "megabit per second", plural: "megabits per second" },
|
|
294
294
|
Gbps: { singular: "gigabit per second", plural: "gigabits per second" }
|
|
295
295
|
};
|
|
296
|
+
var commonSymbols = {
|
|
297
|
+
"&": " and ",
|
|
298
|
+
"@": " at ",
|
|
299
|
+
"#": " hash ",
|
|
300
|
+
"|": " pipe ",
|
|
301
|
+
".": " dot ",
|
|
302
|
+
_: " underscore "
|
|
303
|
+
};
|
|
304
|
+
var abbreviationMap = {
|
|
305
|
+
"e.g.": "example given",
|
|
306
|
+
"i.e.": "that is"
|
|
307
|
+
};
|
|
296
308
|
var numbers = {
|
|
297
309
|
ones: ["", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"],
|
|
298
310
|
teens: ["ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"],
|
|
@@ -325,6 +337,23 @@ var currencySymbols = {
|
|
|
325
337
|
fraction: { singular: "paisa", plural: "paise" }
|
|
326
338
|
}
|
|
327
339
|
};
|
|
340
|
+
var trailingCurrencySymbols = {
|
|
341
|
+
"\u20A9": {
|
|
342
|
+
singular: "won",
|
|
343
|
+
plural: "won",
|
|
344
|
+
fraction: null
|
|
345
|
+
},
|
|
346
|
+
"\u20AB": {
|
|
347
|
+
singular: "dong",
|
|
348
|
+
plural: "dong",
|
|
349
|
+
fraction: null
|
|
350
|
+
},
|
|
351
|
+
"\u0E3F": {
|
|
352
|
+
singular: "baht",
|
|
353
|
+
plural: "baht",
|
|
354
|
+
fraction: { singular: "satang", plural: "satang" }
|
|
355
|
+
}
|
|
356
|
+
};
|
|
328
357
|
var currencyCodes = {
|
|
329
358
|
USD: {
|
|
330
359
|
singular: "dollar",
|
|
@@ -369,6 +398,7 @@ var currencyCodes = {
|
|
|
369
398
|
};
|
|
370
399
|
var currencies = {
|
|
371
400
|
...currencySymbols,
|
|
401
|
+
...trailingCurrencySymbols,
|
|
372
402
|
...currencyCodes
|
|
373
403
|
};
|
|
374
404
|
var math = {
|
|
@@ -380,7 +410,14 @@ var math = {
|
|
|
380
410
|
"/": "divided by",
|
|
381
411
|
"\xF7": "divided by",
|
|
382
412
|
":": "divided by",
|
|
383
|
-
"^": "to the power of"
|
|
413
|
+
"^": "to the power of",
|
|
414
|
+
"=": "equals",
|
|
415
|
+
"<": "is less than",
|
|
416
|
+
">": "is greater than",
|
|
417
|
+
"<=": "is less than or equal to",
|
|
418
|
+
">=": "is greater than or equal to",
|
|
419
|
+
"\u2260": "is not equal to",
|
|
420
|
+
"\u2248": "is approximately equal to"
|
|
384
421
|
};
|
|
385
422
|
|
|
386
423
|
// src/utils/extraction/regexPatterns.ts
|
|
@@ -388,28 +425,47 @@ var sanitizeString = (str) => {
|
|
|
388
425
|
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
389
426
|
};
|
|
390
427
|
var currencySymbolsSanitized = Object.keys(currencySymbols).map((key) => sanitizeString(key)).join("|");
|
|
428
|
+
var trailingCurrencySymbolsSanitized = Object.keys(trailingCurrencySymbols).map((key) => sanitizeString(key)).join("|");
|
|
391
429
|
var currencyCodesSanitized = Object.keys(currencyCodes).join("|");
|
|
392
430
|
var allUnits = Object.keys(units).map((key) => sanitizeString(key)).sort((a, b) => b.length - a.length).join("|");
|
|
393
|
-
var
|
|
431
|
+
var commonSymbolsBase = Object.keys(commonSymbols).filter(
|
|
432
|
+
(symbol) => symbol !== "." && symbol !== "-" && symbol !== "_"
|
|
433
|
+
);
|
|
434
|
+
var commonSymbolsBaseSanitized = commonSymbolsBase.map((symbol) => sanitizeString(symbol)).join("|");
|
|
435
|
+
var dotPattern = `\\.(?![0-9])(?=\\w)`;
|
|
436
|
+
var underscorePattern = `_`;
|
|
437
|
+
var commonSymbolsSanitized = `(?:${commonSymbolsBaseSanitized}|${dotPattern}|${underscorePattern})`;
|
|
438
|
+
var operatorSymbols = Object.keys(math).sort((a, b) => b.length - a.length).map((operator) => sanitizeString(operator)).join("|");
|
|
394
439
|
var regexMatches = new RegExp(
|
|
395
440
|
[
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
441
|
+
[
|
|
442
|
+
// 0. ABBREVIATIONS (special multi‑character sequences)
|
|
443
|
+
// Priority: Highest, to prevent mis‑matching as units or symbols
|
|
444
|
+
`(?<abbreviation>e\\.g\\.|i\\.e\\.)`,
|
|
445
|
+
// 1. COMMON SYMBOLS
|
|
446
|
+
// Priority: High
|
|
447
|
+
// Anchoring: Loose (allows matching symbols like '@' in emails or '&' in 'A&B')
|
|
448
|
+
`|(?<commonSymbol>(${commonSymbolsSanitized}))`,
|
|
449
|
+
// 2. CURRENCY MATCHING
|
|
450
|
+
// Strategy: Atomic anchoring per subtype to prevent 'loose' matches from being stolen by the 'number' group.
|
|
451
|
+
`|(?<=^|\\s)(?:(?<symbolCurrency>(?<negativeSignSymbol>-)?(?<currencySymbol>${currencySymbolsSanitized})\\s*(?<symbolInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<symbolDec>\\d+))?)|(?<trailingSymbolCurrency>(?<negativeSignTrailing>-)?(?<trailingInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<trailingDec>\\d+))?[ \\t]*(?<trailingCurrencySymbol>${trailingCurrencySymbolsSanitized}))|(?<codeCurrency>(?<negativeSignCode>-)?(?<codeInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<codeDec>\\d+))?[ \\t]*(?<currencyCode>${currencyCodesSanitized})))(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
|
|
452
|
+
// 3. SCIENTIFIC NOTATION
|
|
453
|
+
// Target: 5e-10, 5.5E10, etc.
|
|
454
|
+
`|(?<scientific>(?<negativeSignScientific>-)?(?<scientificInt>\\d+)(?:\\.(?<scientificDec>\\d+))?(?:[eE])(?<scientificExponent>[+-]?\\d+))(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
|
|
455
|
+
// 4. NUMERIC UNITS
|
|
456
|
+
// Target: 5km, 10m/s^2, 10m²/s, etc.
|
|
457
|
+
`|(?<unit>(?<negativeSignUnit>-)?(?<unitInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<unitDec>\\d+))?\\s*(?:\\^(?<unitCaretExponentPre>-?\\d+)|(?<unitSuperExponentPre>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?\\s*(?<unitName>(?:${allUnits})\\b)(?!\\s*(?:${commonSymbolsSanitized}))(?:\\^(?<unitCaretExponentPost>-?\\d+)|(?<unitSuperExponentPost>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?)(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
|
|
458
|
+
// 5. VERSIONED NUMBERS (e.g., v2, _5, V10)
|
|
459
|
+
// Priority: Higher than generic numbers to catch specific prefixes within words/boundaries.
|
|
460
|
+
`|(?<versionedNumber>(?:v|V|_)\\d+)|(?<!\\w)(?<number>(?<negativeSignInteger>-)?(?<integerInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<integerDec>\\d+))?(?:(?:\\^(?<integerCaretExponent>-?\\d+))|(?<integerSuperExponent>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?)(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
|
|
461
|
+
// 7. UNIT ONLY
|
|
462
|
+
// Target: stand-alone units (m, kg, m²)
|
|
463
|
+
`|(?<unitOnly>(?<![\\w.]|(?:${commonSymbolsSanitized})\\s+)(?<unitNameOnly>(?:${allUnits})\\b)(?!\\s*(?:${commonSymbolsSanitized}))(?:\\^(?<unitOnlyCaretExponent>-?\\d+)|(?<unitOnlySuperExponent>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?(?!\\w))(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
|
|
464
|
+
// 7. MATHEMATICAL OPERATORS
|
|
465
|
+
// Target: standalone +, -, *, /, =, <, >, etc.
|
|
466
|
+
// Anchoring: Must be surrounded by whitespace or EOL to avoid splitting numbers/words.
|
|
467
|
+
`|(?<operator>(?:(?<=^)|(?<=\\s))(${operatorSymbols})(?=(?:\\s|$|(?:\\.(?!\\d)))))`
|
|
468
|
+
].join("")
|
|
413
469
|
// Matches mathematical operators
|
|
414
470
|
].join(""),
|
|
415
471
|
"u"
|
|
@@ -423,15 +479,36 @@ function extractFirstMatch(input) {
|
|
|
423
479
|
let matchType = Object.keys(matchedGroup)[0];
|
|
424
480
|
const extractedGroup = {};
|
|
425
481
|
switch (matchType) {
|
|
482
|
+
case "abbreviation":
|
|
483
|
+
extractedGroup.symbol = matchedGroup["abbreviation"];
|
|
484
|
+
break;
|
|
485
|
+
case "commonSymbol":
|
|
486
|
+
extractedGroup.symbol = matchedGroup["commonSymbol"];
|
|
487
|
+
break;
|
|
426
488
|
case "symbolCurrency":
|
|
427
489
|
extractedGroup.integer = matchedGroup["symbolInt"];
|
|
428
490
|
extractedGroup.decimal = matchedGroup["symbolDec"];
|
|
491
|
+
if (extractedGroup.decimal && extractedGroup.decimal.length === 1) {
|
|
492
|
+
extractedGroup.decimal += "0";
|
|
493
|
+
}
|
|
429
494
|
extractedGroup.negativeInt = matchedGroup["negativeSignSymbol"] !== void 0;
|
|
430
495
|
extractedGroup.currency = matchedGroup["currencySymbol"];
|
|
431
496
|
break;
|
|
497
|
+
case "trailingSymbolCurrency":
|
|
498
|
+
extractedGroup.integer = matchedGroup["trailingInt"];
|
|
499
|
+
extractedGroup.decimal = matchedGroup["trailingDec"];
|
|
500
|
+
if (extractedGroup.decimal && extractedGroup.decimal.length === 1) {
|
|
501
|
+
extractedGroup.decimal += "0";
|
|
502
|
+
}
|
|
503
|
+
extractedGroup.negativeInt = matchedGroup["negativeSignTrailing"] !== void 0;
|
|
504
|
+
extractedGroup.currency = matchedGroup["trailingCurrencySymbol"];
|
|
505
|
+
break;
|
|
432
506
|
case "codeCurrency":
|
|
433
507
|
extractedGroup.integer = matchedGroup["codeInt"];
|
|
434
508
|
extractedGroup.decimal = matchedGroup["codeDec"];
|
|
509
|
+
if (extractedGroup.decimal && extractedGroup.decimal.length === 1) {
|
|
510
|
+
extractedGroup.decimal += "0";
|
|
511
|
+
}
|
|
435
512
|
extractedGroup.negativeInt = matchedGroup["negativeSignCode"] !== void 0;
|
|
436
513
|
extractedGroup.currency = matchedGroup["currencyCode"];
|
|
437
514
|
break;
|
|
@@ -461,6 +538,12 @@ function extractFirstMatch(input) {
|
|
|
461
538
|
extractedGroup.exponent = matchedGroup["integerSuperExponent"] || matchedGroup["integerCaretExponent"];
|
|
462
539
|
extractedGroup.negativeInt = matchedGroup["negativeSignInteger"] !== void 0;
|
|
463
540
|
break;
|
|
541
|
+
case "versionedNumber": {
|
|
542
|
+
const vMatch = matchedGroup["versionedNumber"];
|
|
543
|
+
extractedGroup.symbol = vMatch[0];
|
|
544
|
+
extractedGroup.integer = vMatch.slice(1);
|
|
545
|
+
break;
|
|
546
|
+
}
|
|
464
547
|
}
|
|
465
548
|
extractedGroup.matchType = matchType;
|
|
466
549
|
extractedGroup.input = matchedGroup[matchType];
|
|
@@ -476,15 +559,13 @@ function extractAllMatches(input) {
|
|
|
476
559
|
let remainingInput = input;
|
|
477
560
|
let currentExtraction = extractFirstMatch(remainingInput);
|
|
478
561
|
while (currentExtraction !== null && currentExtraction.input) {
|
|
479
|
-
const remainingLength = input.length - remainingInput.length;
|
|
480
|
-
if (results.length > 0) {
|
|
481
|
-
currentExtraction.index = remainingInput.indexOf(currentExtraction.input) + remainingLength;
|
|
482
|
-
}
|
|
483
562
|
results.push(currentExtraction);
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
);
|
|
563
|
+
const lastMatchEnd = currentExtraction.index + currentExtraction.input.length;
|
|
564
|
+
remainingInput = input.substring(lastMatchEnd);
|
|
487
565
|
currentExtraction = extractFirstMatch(remainingInput);
|
|
566
|
+
if (currentExtraction) {
|
|
567
|
+
currentExtraction.index += lastMatchEnd;
|
|
568
|
+
}
|
|
488
569
|
}
|
|
489
570
|
return results;
|
|
490
571
|
}
|
|
@@ -1022,9 +1103,32 @@ var convertQuantities = {
|
|
|
1022
1103
|
case "number":
|
|
1023
1104
|
result = convertNumberToSpokenWord(extractionResult);
|
|
1024
1105
|
break;
|
|
1106
|
+
case "abbreviation": {
|
|
1107
|
+
const abbrev = extractionResult.symbol;
|
|
1108
|
+
result = abbreviationMap[abbrev] || abbrev;
|
|
1109
|
+
break;
|
|
1110
|
+
}
|
|
1111
|
+
case "versionedNumber": {
|
|
1112
|
+
const vNum = extractionResult;
|
|
1113
|
+
const prefixChar = vNum.symbol || "";
|
|
1114
|
+
const numericPart = convertNumberToSpokenWord({
|
|
1115
|
+
integer: vNum.integer,
|
|
1116
|
+
negativeInt: false,
|
|
1117
|
+
input: vNum.integer,
|
|
1118
|
+
index: vNum.index
|
|
1119
|
+
});
|
|
1120
|
+
result = `${prefixChar} ${numericPart}`;
|
|
1121
|
+
break;
|
|
1122
|
+
}
|
|
1123
|
+
case "commonSymbol":
|
|
1124
|
+
result = commonSymbols[extractionResult.symbol] || "";
|
|
1125
|
+
break;
|
|
1025
1126
|
case "symbolCurrency":
|
|
1026
1127
|
result = convertCurrencyToSpokenWord(extractionResult);
|
|
1027
1128
|
break;
|
|
1129
|
+
case "trailingSymbolCurrency":
|
|
1130
|
+
result = convertCurrencyToSpokenWord(extractionResult);
|
|
1131
|
+
break;
|
|
1028
1132
|
case "codeCurrency":
|
|
1029
1133
|
result = convertCurrencyToSpokenWord(extractionResult);
|
|
1030
1134
|
break;
|
|
@@ -1079,7 +1183,6 @@ var convertQuantities = {
|
|
|
1079
1183
|
return output.replace(/\s+/g, " ").trim();
|
|
1080
1184
|
}
|
|
1081
1185
|
};
|
|
1082
|
-
console.log(convertQuantities.autoReplaceAllMatches("25 EUR"));
|
|
1083
1186
|
|
|
1084
1187
|
export { convertQuantities, extractQuantities, validateExtractionResult };
|
|
1085
1188
|
//# sourceMappingURL=index.js.map
|