quantible 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -293,6 +293,18 @@ var units = {
293
293
  Mbps: { singular: "megabit per second", plural: "megabits per second" },
294
294
  Gbps: { singular: "gigabit per second", plural: "gigabits per second" }
295
295
  };
296
+ var commonSymbols = {
297
+ "&": " and ",
298
+ "@": " at ",
299
+ "#": " hash ",
300
+ "|": " pipe ",
301
+ ".": " dot ",
302
+ _: " underscore "
303
+ };
304
+ var abbreviationMap = {
305
+ "e.g.": "example given",
306
+ "i.e.": "that is"
307
+ };
296
308
  var numbers = {
297
309
  ones: ["", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"],
298
310
  teens: ["ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"],
@@ -325,6 +337,23 @@ var currencySymbols = {
325
337
  fraction: { singular: "paisa", plural: "paise" }
326
338
  }
327
339
  };
340
+ var trailingCurrencySymbols = {
341
+ "\u20A9": {
342
+ singular: "won",
343
+ plural: "won",
344
+ fraction: null
345
+ },
346
+ "\u20AB": {
347
+ singular: "dong",
348
+ plural: "dong",
349
+ fraction: null
350
+ },
351
+ "\u0E3F": {
352
+ singular: "baht",
353
+ plural: "baht",
354
+ fraction: { singular: "satang", plural: "satang" }
355
+ }
356
+ };
328
357
  var currencyCodes = {
329
358
  USD: {
330
359
  singular: "dollar",
@@ -369,6 +398,7 @@ var currencyCodes = {
369
398
  };
370
399
  var currencies = {
371
400
  ...currencySymbols,
401
+ ...trailingCurrencySymbols,
372
402
  ...currencyCodes
373
403
  };
374
404
  var math = {
@@ -380,7 +410,14 @@ var math = {
380
410
  "/": "divided by",
381
411
  "\xF7": "divided by",
382
412
  ":": "divided by",
383
- "^": "to the power of"
413
+ "^": "to the power of",
414
+ "=": "equals",
415
+ "<": "is less than",
416
+ ">": "is greater than",
417
+ "<=": "is less than or equal to",
418
+ ">=": "is greater than or equal to",
419
+ "\u2260": "is not equal to",
420
+ "\u2248": "is approximately equal to"
384
421
  };
385
422
 
386
423
  // src/utils/extraction/regexPatterns.ts
@@ -388,28 +425,47 @@ var sanitizeString = (str) => {
388
425
  return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
389
426
  };
390
427
  var currencySymbolsSanitized = Object.keys(currencySymbols).map((key) => sanitizeString(key)).join("|");
428
+ var trailingCurrencySymbolsSanitized = Object.keys(trailingCurrencySymbols).map((key) => sanitizeString(key)).join("|");
391
429
  var currencyCodesSanitized = Object.keys(currencyCodes).join("|");
392
430
  var allUnits = Object.keys(units).map((key) => sanitizeString(key)).sort((a, b) => b.length - a.length).join("|");
393
- var operatorSymbols = Object.keys(math).map((operator) => sanitizeString(operator)).join("|");
431
+ var commonSymbolsBase = Object.keys(commonSymbols).filter(
432
+ (symbol) => symbol !== "." && symbol !== "-" && symbol !== "_"
433
+ );
434
+ var commonSymbolsBaseSanitized = commonSymbolsBase.map((symbol) => sanitizeString(symbol)).join("|");
435
+ var dotPattern = `\\.(?![0-9])(?=\\w)`;
436
+ var underscorePattern = `_`;
437
+ var commonSymbolsSanitized = `(?:${commonSymbolsBaseSanitized}|${dotPattern}|${underscorePattern})`;
438
+ var operatorSymbols = Object.keys(math).sort((a, b) => b.length - a.length).map((operator) => sanitizeString(operator)).join("|");
394
439
  var regexMatches = new RegExp(
395
440
  [
396
- // Currency Matching (Highest Priority)
397
- `(?<=^|\\s)(?:(?<symbolCurrency>(?<negativeSignSymbol>-)?(?<currencySymbol>${currencySymbolsSanitized})\\s*(?<symbolInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<symbolDec>\\d+))?)|(?<codeCurrency>(?<negativeSignCode>-)?(?<codeInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<codeDec>\\d+))?[ \\t]*(?<currencyCode>${currencyCodesSanitized})))(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
398
- // Ensures the match is followed by whitespace, end of string, or non-numeric characters
399
- // Scientific Notation 5e-10, 5e10, 5.5e10, 5.5e-10
400
- `|(?<scientific>(?<negativeSignScientific>-)?(?<scientificInt>\\d+)(?:\\.(?<scientificDec>\\d+))?(?:[eE])(?<scientificExponent>[+-]?\\d+))(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
401
- // Ensures the match is followed by whitespace, end of string, or non-numeric characters
402
- // Numeric Units 5km, 10m/s, 10m/s^2, 10m/s², 10m²/s, 10m²/s²
403
- `|(?<unit>(?<negativeSignUnit>-)?(?<unitInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<unitDec>\\d+))?\\s*(?:\\^(?<unitCaretExponentPre>-?\\d+)|(?<unitSuperExponentPre>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?\\s*(?<unitName>(?:${allUnits})\\b)(?:\\^(?<unitCaretExponentPost>-?\\d+)|(?<unitSuperExponentPost>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?)(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
404
- // Ensures the match is followed by whitespace, end of string, or non-numeric characters
405
- // Number with optional exponent
406
- `|(?<!\\w)(?<number>(?<negativeSignInteger>-)?(?<integerInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<integerDec>\\d+))?(?:(?:\\^(?<integerCaretExponent>-?\\d+))|(?<integerSuperExponent>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?)(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
407
- // Ensures the match is followed by whitespace, end of string, or non-numeric characters
408
- // Unit Only (e.g., m, kg, m², kg³)
409
- `|(?<unitOnly>(?<![\\w.])(?<unitNameOnly>(?:${allUnits})\\b)(?:\\^(?<unitOnlyCaretExponent>-?\\d+)|(?<unitOnlySuperExponent>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?(?!\\w))(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
410
- // Ensures the match is followed by whitespace, end of string, or non-numeric characters
411
- // Mathematical Operators
412
- `|(?<operator>(?:(?<=^)|(?<=\\s))(${operatorSymbols})(?=\\s|$|(?:\\.(?!\\d))))`
441
+ [
442
+ // 0. ABBREVIATIONS (special multi‑character sequences)
443
+ // Priority: Highest, to prevent mis‑matching as units or symbols
444
+ `(?<abbreviation>e\\.g\\.|i\\.e\\.)`,
445
+ // 1. COMMON SYMBOLS
446
+ // Priority: High
447
+ // Anchoring: Loose (allows matching symbols like '@' in emails or '&' in 'A&B')
448
+ `|(?<commonSymbol>(${commonSymbolsSanitized}))`,
449
+ // 2. CURRENCY MATCHING
450
+ // Strategy: Atomic anchoring per subtype to prevent 'loose' matches from being stolen by the 'number' group.
451
+ `|(?<=^|\\s)(?:(?<symbolCurrency>(?<negativeSignSymbol>-)?(?<currencySymbol>${currencySymbolsSanitized})\\s*(?<symbolInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<symbolDec>\\d+))?)|(?<trailingSymbolCurrency>(?<negativeSignTrailing>-)?(?<trailingInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<trailingDec>\\d+))?[ \\t]*(?<trailingCurrencySymbol>${trailingCurrencySymbolsSanitized}))|(?<codeCurrency>(?<negativeSignCode>-)?(?<codeInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<codeDec>\\d+))?[ \\t]*(?<currencyCode>${currencyCodesSanitized})))(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
452
+ // 3. SCIENTIFIC NOTATION
453
+ // Target: 5e-10, 5.5E10, etc.
454
+ `|(?<scientific>(?<negativeSignScientific>-)?(?<scientificInt>\\d+)(?:\\.(?<scientificDec>\\d+))?(?:[eE])(?<scientificExponent>[+-]?\\d+))(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
455
+ // 4. NUMERIC UNITS
456
+ // Target: 5km, 10m/s^2, 10m²/s, etc.
457
+ `|(?<unit>(?<negativeSignUnit>-)?(?<unitInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<unitDec>\\d+))?\\s*(?:\\^(?<unitCaretExponentPre>-?\\d+)|(?<unitSuperExponentPre>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?\\s*(?<unitName>(?:${allUnits})\\b)(?!\\s*(?:${commonSymbolsSanitized}))(?:\\^(?<unitCaretExponentPost>-?\\d+)|(?<unitSuperExponentPost>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?)(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
458
+ // 5. VERSIONED NUMBERS (e.g., v2, _5, V10)
459
+ // Priority: Higher than generic numbers to catch specific prefixes within words/boundaries.
460
+ `|(?<versionedNumber>(?:v|V|_)\\d+)|(?<!\\w)(?<number>(?<negativeSignInteger>-)?(?<integerInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<integerDec>\\d+))?(?:(?:\\^(?<integerCaretExponent>-?\\d+))|(?<integerSuperExponent>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?)(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
461
+ // 7. UNIT ONLY
462
+ // Target: stand-alone units (m, kg, m²)
463
+ `|(?<unitOnly>(?<![\\w.]|(?:${commonSymbolsSanitized})\\s+)(?<unitNameOnly>(?:${allUnits})\\b)(?!\\s*(?:${commonSymbolsSanitized}))(?:\\^(?<unitOnlyCaretExponent>-?\\d+)|(?<unitOnlySuperExponent>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?(?!\\w))(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
464
+ // 7. MATHEMATICAL OPERATORS
465
+ // Target: standalone +, -, *, /, =, <, >, etc.
466
+ // Anchoring: Must be surrounded by whitespace or EOL to avoid splitting numbers/words.
467
+ `|(?<operator>(?:(?<=^)|(?<=\\s))(${operatorSymbols})(?=(?:\\s|$|(?:\\.(?!\\d)))))`
468
+ ].join("")
413
469
  // Matches mathematical operators
414
470
  ].join(""),
415
471
  "u"
@@ -423,15 +479,36 @@ function extractFirstMatch(input) {
423
479
  let matchType = Object.keys(matchedGroup)[0];
424
480
  const extractedGroup = {};
425
481
  switch (matchType) {
482
+ case "abbreviation":
483
+ extractedGroup.symbol = matchedGroup["abbreviation"];
484
+ break;
485
+ case "commonSymbol":
486
+ extractedGroup.symbol = matchedGroup["commonSymbol"];
487
+ break;
426
488
  case "symbolCurrency":
427
489
  extractedGroup.integer = matchedGroup["symbolInt"];
428
490
  extractedGroup.decimal = matchedGroup["symbolDec"];
491
+ if (extractedGroup.decimal && extractedGroup.decimal.length === 1) {
492
+ extractedGroup.decimal += "0";
493
+ }
429
494
  extractedGroup.negativeInt = matchedGroup["negativeSignSymbol"] !== void 0;
430
495
  extractedGroup.currency = matchedGroup["currencySymbol"];
431
496
  break;
497
+ case "trailingSymbolCurrency":
498
+ extractedGroup.integer = matchedGroup["trailingInt"];
499
+ extractedGroup.decimal = matchedGroup["trailingDec"];
500
+ if (extractedGroup.decimal && extractedGroup.decimal.length === 1) {
501
+ extractedGroup.decimal += "0";
502
+ }
503
+ extractedGroup.negativeInt = matchedGroup["negativeSignTrailing"] !== void 0;
504
+ extractedGroup.currency = matchedGroup["trailingCurrencySymbol"];
505
+ break;
432
506
  case "codeCurrency":
433
507
  extractedGroup.integer = matchedGroup["codeInt"];
434
508
  extractedGroup.decimal = matchedGroup["codeDec"];
509
+ if (extractedGroup.decimal && extractedGroup.decimal.length === 1) {
510
+ extractedGroup.decimal += "0";
511
+ }
435
512
  extractedGroup.negativeInt = matchedGroup["negativeSignCode"] !== void 0;
436
513
  extractedGroup.currency = matchedGroup["currencyCode"];
437
514
  break;
@@ -461,6 +538,12 @@ function extractFirstMatch(input) {
461
538
  extractedGroup.exponent = matchedGroup["integerSuperExponent"] || matchedGroup["integerCaretExponent"];
462
539
  extractedGroup.negativeInt = matchedGroup["negativeSignInteger"] !== void 0;
463
540
  break;
541
+ case "versionedNumber": {
542
+ const vMatch = matchedGroup["versionedNumber"];
543
+ extractedGroup.symbol = vMatch[0];
544
+ extractedGroup.integer = vMatch.slice(1);
545
+ break;
546
+ }
464
547
  }
465
548
  extractedGroup.matchType = matchType;
466
549
  extractedGroup.input = matchedGroup[matchType];
@@ -476,15 +559,13 @@ function extractAllMatches(input) {
476
559
  let remainingInput = input;
477
560
  let currentExtraction = extractFirstMatch(remainingInput);
478
561
  while (currentExtraction !== null && currentExtraction.input) {
479
- const remainingLength = input.length - remainingInput.length;
480
- if (results.length > 0) {
481
- currentExtraction.index = remainingInput.indexOf(currentExtraction.input) + remainingLength;
482
- }
483
562
  results.push(currentExtraction);
484
- remainingInput = remainingInput.substring(
485
- remainingInput.indexOf(currentExtraction.input) + currentExtraction.input.length
486
- );
563
+ const lastMatchEnd = currentExtraction.index + currentExtraction.input.length;
564
+ remainingInput = input.substring(lastMatchEnd);
487
565
  currentExtraction = extractFirstMatch(remainingInput);
566
+ if (currentExtraction) {
567
+ currentExtraction.index += lastMatchEnd;
568
+ }
488
569
  }
489
570
  return results;
490
571
  }
@@ -1022,9 +1103,32 @@ var convertQuantities = {
1022
1103
  case "number":
1023
1104
  result = convertNumberToSpokenWord(extractionResult);
1024
1105
  break;
1106
+ case "abbreviation": {
1107
+ const abbrev = extractionResult.symbol;
1108
+ result = abbreviationMap[abbrev] || abbrev;
1109
+ break;
1110
+ }
1111
+ case "versionedNumber": {
1112
+ const vNum = extractionResult;
1113
+ const prefixChar = vNum.symbol || "";
1114
+ const numericPart = convertNumberToSpokenWord({
1115
+ integer: vNum.integer,
1116
+ negativeInt: false,
1117
+ input: vNum.integer,
1118
+ index: vNum.index
1119
+ });
1120
+ result = `${prefixChar} ${numericPart}`;
1121
+ break;
1122
+ }
1123
+ case "commonSymbol":
1124
+ result = commonSymbols[extractionResult.symbol] || "";
1125
+ break;
1025
1126
  case "symbolCurrency":
1026
1127
  result = convertCurrencyToSpokenWord(extractionResult);
1027
1128
  break;
1129
+ case "trailingSymbolCurrency":
1130
+ result = convertCurrencyToSpokenWord(extractionResult);
1131
+ break;
1028
1132
  case "codeCurrency":
1029
1133
  result = convertCurrencyToSpokenWord(extractionResult);
1030
1134
  break;
@@ -1079,7 +1183,6 @@ var convertQuantities = {
1079
1183
  return output.replace(/\s+/g, " ").trim();
1080
1184
  }
1081
1185
  };
1082
- console.log(convertQuantities.autoReplaceAllMatches("25 EUR"));
1083
1186
 
1084
1187
  export { convertQuantities, extractQuantities, validateExtractionResult };
1085
1188
  //# sourceMappingURL=index.js.map