quantible 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -295,6 +295,18 @@ var units = {
295
295
  Mbps: { singular: "megabit per second", plural: "megabits per second" },
296
296
  Gbps: { singular: "gigabit per second", plural: "gigabits per second" }
297
297
  };
298
+ var commonSymbols = {
299
+ "&": " and ",
300
+ "@": " at ",
301
+ "#": " hash ",
302
+ "|": " pipe ",
303
+ ".": " dot ",
304
+ _: " underscore "
305
+ };
306
+ var abbreviationMap = {
307
+ "e.g.": "example given",
308
+ "i.e.": "that is"
309
+ };
298
310
  var numbers = {
299
311
  ones: ["", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"],
300
312
  teens: ["ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"],
@@ -327,6 +339,23 @@ var currencySymbols = {
327
339
  fraction: { singular: "paisa", plural: "paise" }
328
340
  }
329
341
  };
342
+ var trailingCurrencySymbols = {
343
+ "\u20A9": {
344
+ singular: "won",
345
+ plural: "won",
346
+ fraction: null
347
+ },
348
+ "\u20AB": {
349
+ singular: "dong",
350
+ plural: "dong",
351
+ fraction: null
352
+ },
353
+ "\u0E3F": {
354
+ singular: "baht",
355
+ plural: "baht",
356
+ fraction: { singular: "satang", plural: "satang" }
357
+ }
358
+ };
330
359
  var currencyCodes = {
331
360
  USD: {
332
361
  singular: "dollar",
@@ -371,6 +400,7 @@ var currencyCodes = {
371
400
  };
372
401
  var currencies = {
373
402
  ...currencySymbols,
403
+ ...trailingCurrencySymbols,
374
404
  ...currencyCodes
375
405
  };
376
406
  var math = {
@@ -382,7 +412,14 @@ var math = {
382
412
  "/": "divided by",
383
413
  "\xF7": "divided by",
384
414
  ":": "divided by",
385
- "^": "to the power of"
415
+ "^": "to the power of",
416
+ "=": "equals",
417
+ "<": "is less than",
418
+ ">": "is greater than",
419
+ "<=": "is less than or equal to",
420
+ ">=": "is greater than or equal to",
421
+ "\u2260": "is not equal to",
422
+ "\u2248": "is approximately equal to"
386
423
  };
387
424
 
388
425
  // src/utils/extraction/regexPatterns.ts
@@ -390,28 +427,47 @@ var sanitizeString = (str) => {
390
427
  return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
391
428
  };
392
429
  var currencySymbolsSanitized = Object.keys(currencySymbols).map((key) => sanitizeString(key)).join("|");
430
+ var trailingCurrencySymbolsSanitized = Object.keys(trailingCurrencySymbols).map((key) => sanitizeString(key)).join("|");
393
431
  var currencyCodesSanitized = Object.keys(currencyCodes).join("|");
394
432
  var allUnits = Object.keys(units).map((key) => sanitizeString(key)).sort((a, b) => b.length - a.length).join("|");
395
- var operatorSymbols = Object.keys(math).map((operator) => sanitizeString(operator)).join("|");
433
+ var commonSymbolsBase = Object.keys(commonSymbols).filter(
434
+ (symbol) => symbol !== "." && symbol !== "-" && symbol !== "_"
435
+ );
436
+ var commonSymbolsBaseSanitized = commonSymbolsBase.map((symbol) => sanitizeString(symbol)).join("|");
437
+ var dotPattern = `\\.(?![0-9])(?=\\w)`;
438
+ var underscorePattern = `_`;
439
+ var commonSymbolsSanitized = `(?:${commonSymbolsBaseSanitized}|${dotPattern}|${underscorePattern})`;
440
+ var operatorSymbols = Object.keys(math).sort((a, b) => b.length - a.length).map((operator) => sanitizeString(operator)).join("|");
396
441
  var regexMatches = new RegExp(
397
442
  [
398
- // Currency Matching (Highest Priority)
399
- `(?<=^|\\s)(?:(?<symbolCurrency>(?<negativeSignSymbol>-)?(?<currencySymbol>${currencySymbolsSanitized})\\s*(?<symbolInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<symbolDec>\\d+))?)|(?<codeCurrency>(?<negativeSignCode>-)?(?<codeInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<codeDec>\\d+))?[ \\t]*(?<currencyCode>${currencyCodesSanitized})))(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
400
- // Ensures the match is followed by whitespace, end of string, or non-numeric characters
401
- // Scientific Notation 5e-10, 5e10, 5.5e10, 5.5e-10
402
- `|(?<scientific>(?<negativeSignScientific>-)?(?<scientificInt>\\d+)(?:\\.(?<scientificDec>\\d+))?(?:[eE])(?<scientificExponent>[+-]?\\d+))(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
403
- // Ensures the match is followed by whitespace, end of string, or non-numeric characters
404
- // Numeric Units 5km, 10m/s, 10m/s^2, 10m/s², 10m²/s, 10m²/s²
405
- `|(?<unit>(?<negativeSignUnit>-)?(?<unitInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<unitDec>\\d+))?\\s*(?:\\^(?<unitCaretExponentPre>-?\\d+)|(?<unitSuperExponentPre>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?\\s*(?<unitName>(?:${allUnits})\\b)(?:\\^(?<unitCaretExponentPost>-?\\d+)|(?<unitSuperExponentPost>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?)(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
406
- // Ensures the match is followed by whitespace, end of string, or non-numeric characters
407
- // Number with optional exponent
408
- `|(?<!\\w)(?<number>(?<negativeSignInteger>-)?(?<integerInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<integerDec>\\d+))?(?:(?:\\^(?<integerCaretExponent>-?\\d+))|(?<integerSuperExponent>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?)(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
409
- // Ensures the match is followed by whitespace, end of string, or non-numeric characters
410
- // Unit Only (e.g., m, kg, m², kg³)
411
- `|(?<unitOnly>(?<![\\w.])(?<unitNameOnly>(?:${allUnits})\\b)(?:\\^(?<unitOnlyCaretExponent>-?\\d+)|(?<unitOnlySuperExponent>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?(?!\\w))(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
412
- // Ensures the match is followed by whitespace, end of string, or non-numeric characters
413
- // Mathematical Operators
414
- `|(?<operator>(?:(?<=^)|(?<=\\s))(${operatorSymbols})(?=\\s|$|(?:\\.(?!\\d))))`
443
+ [
444
+ // 0. ABBREVIATIONS (special multi‑character sequences)
445
+ // Priority: Highest, to prevent mis‑matching as units or symbols
446
+ `(?<abbreviation>e\\.g\\.|i\\.e\\.)`,
447
+ // 1. COMMON SYMBOLS
448
+ // Priority: High
449
+ // Anchoring: Loose (allows matching symbols like '@' in emails or '&' in 'A&B')
450
+ `|(?<commonSymbol>(${commonSymbolsSanitized}))`,
451
+ // 2. CURRENCY MATCHING
452
+ // Strategy: Atomic anchoring per subtype to prevent 'loose' matches from being stolen by the 'number' group.
453
+ `|(?<=^|\\s)(?:(?<symbolCurrency>(?<negativeSignSymbol>-)?(?<currencySymbol>${currencySymbolsSanitized})\\s*(?<symbolInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<symbolDec>\\d+))?)|(?<trailingSymbolCurrency>(?<negativeSignTrailing>-)?(?<trailingInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<trailingDec>\\d+))?[ \\t]*(?<trailingCurrencySymbol>${trailingCurrencySymbolsSanitized}))|(?<codeCurrency>(?<negativeSignCode>-)?(?<codeInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<codeDec>\\d+))?[ \\t]*(?<currencyCode>${currencyCodesSanitized})))(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
454
+ // 3. SCIENTIFIC NOTATION
455
+ // Target: 5e-10, 5.5E10, etc.
456
+ `|(?<scientific>(?<negativeSignScientific>-)?(?<scientificInt>\\d+)(?:\\.(?<scientificDec>\\d+))?(?:[eE])(?<scientificExponent>[+-]?\\d+))(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
457
+ // 4. NUMERIC UNITS
458
+ // Target: 5km, 10m/s^2, 10m²/s, etc.
459
+ `|(?<unit>(?<negativeSignUnit>-)?(?<unitInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<unitDec>\\d+))?\\s*(?:\\^(?<unitCaretExponentPre>-?\\d+)|(?<unitSuperExponentPre>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?\\s*(?<unitName>(?:${allUnits})\\b)(?!\\s*(?:${commonSymbolsSanitized}))(?:\\^(?<unitCaretExponentPost>-?\\d+)|(?<unitSuperExponentPost>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?)(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
460
+ // 5. VERSIONED NUMBERS (e.g., v2, _5, V10)
461
+ // Priority: Higher than generic numbers to catch specific prefixes within words/boundaries.
462
+ `|(?<versionedNumber>(?:v|V|_)\\d+)|(?<!\\w)(?<number>(?<negativeSignInteger>-)?(?<integerInt>(?:\\d{1,3}(?:,\\d{3})*|\\d+))(?:\\.(?<integerDec>\\d+))?(?:(?:\\^(?<integerCaretExponent>-?\\d+))|(?<integerSuperExponent>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?)(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
463
+ // 7. UNIT ONLY
464
+ // Target: stand-alone units (m, kg, m²)
465
+ `|(?<unitOnly>(?<![\\w.]|(?:${commonSymbolsSanitized})\\s+)(?<unitNameOnly>(?:${allUnits})\\b)(?!\\s*(?:${commonSymbolsSanitized}))(?:\\^(?<unitOnlyCaretExponent>-?\\d+)|(?<unitOnlySuperExponent>\u207B?[\xB9\xB2\xB3\u2074\u2075\u2076\u2077\u2078\u2079]))?(?!\\w))(?=(?:\\s|$|(?:\\.(?!\\d))|[^\\d.]))`,
466
+ // 7. MATHEMATICAL OPERATORS
467
+ // Target: standalone +, -, *, /, =, <, >, etc.
468
+ // Anchoring: Must be surrounded by whitespace or EOL to avoid splitting numbers/words.
469
+ `|(?<operator>(?:(?<=^)|(?<=\\s))(${operatorSymbols})(?=(?:\\s|$|(?:\\.(?!\\d)))))`
470
+ ].join("")
415
471
  // Matches mathematical operators
416
472
  ].join(""),
417
473
  "u"
@@ -425,15 +481,36 @@ function extractFirstMatch(input) {
425
481
  let matchType = Object.keys(matchedGroup)[0];
426
482
  const extractedGroup = {};
427
483
  switch (matchType) {
484
+ case "abbreviation":
485
+ extractedGroup.symbol = matchedGroup["abbreviation"];
486
+ break;
487
+ case "commonSymbol":
488
+ extractedGroup.symbol = matchedGroup["commonSymbol"];
489
+ break;
428
490
  case "symbolCurrency":
429
491
  extractedGroup.integer = matchedGroup["symbolInt"];
430
492
  extractedGroup.decimal = matchedGroup["symbolDec"];
493
+ if (extractedGroup.decimal && extractedGroup.decimal.length === 1) {
494
+ extractedGroup.decimal += "0";
495
+ }
431
496
  extractedGroup.negativeInt = matchedGroup["negativeSignSymbol"] !== void 0;
432
497
  extractedGroup.currency = matchedGroup["currencySymbol"];
433
498
  break;
499
+ case "trailingSymbolCurrency":
500
+ extractedGroup.integer = matchedGroup["trailingInt"];
501
+ extractedGroup.decimal = matchedGroup["trailingDec"];
502
+ if (extractedGroup.decimal && extractedGroup.decimal.length === 1) {
503
+ extractedGroup.decimal += "0";
504
+ }
505
+ extractedGroup.negativeInt = matchedGroup["negativeSignTrailing"] !== void 0;
506
+ extractedGroup.currency = matchedGroup["trailingCurrencySymbol"];
507
+ break;
434
508
  case "codeCurrency":
435
509
  extractedGroup.integer = matchedGroup["codeInt"];
436
510
  extractedGroup.decimal = matchedGroup["codeDec"];
511
+ if (extractedGroup.decimal && extractedGroup.decimal.length === 1) {
512
+ extractedGroup.decimal += "0";
513
+ }
437
514
  extractedGroup.negativeInt = matchedGroup["negativeSignCode"] !== void 0;
438
515
  extractedGroup.currency = matchedGroup["currencyCode"];
439
516
  break;
@@ -463,6 +540,12 @@ function extractFirstMatch(input) {
463
540
  extractedGroup.exponent = matchedGroup["integerSuperExponent"] || matchedGroup["integerCaretExponent"];
464
541
  extractedGroup.negativeInt = matchedGroup["negativeSignInteger"] !== void 0;
465
542
  break;
543
+ case "versionedNumber": {
544
+ const vMatch = matchedGroup["versionedNumber"];
545
+ extractedGroup.symbol = vMatch[0];
546
+ extractedGroup.integer = vMatch.slice(1);
547
+ break;
548
+ }
466
549
  }
467
550
  extractedGroup.matchType = matchType;
468
551
  extractedGroup.input = matchedGroup[matchType];
@@ -478,15 +561,13 @@ function extractAllMatches(input) {
478
561
  let remainingInput = input;
479
562
  let currentExtraction = extractFirstMatch(remainingInput);
480
563
  while (currentExtraction !== null && currentExtraction.input) {
481
- const remainingLength = input.length - remainingInput.length;
482
- if (results.length > 0) {
483
- currentExtraction.index = remainingInput.indexOf(currentExtraction.input) + remainingLength;
484
- }
485
564
  results.push(currentExtraction);
486
- remainingInput = remainingInput.substring(
487
- remainingInput.indexOf(currentExtraction.input) + currentExtraction.input.length
488
- );
565
+ const lastMatchEnd = currentExtraction.index + currentExtraction.input.length;
566
+ remainingInput = input.substring(lastMatchEnd);
489
567
  currentExtraction = extractFirstMatch(remainingInput);
568
+ if (currentExtraction) {
569
+ currentExtraction.index += lastMatchEnd;
570
+ }
490
571
  }
491
572
  return results;
492
573
  }
@@ -508,7 +589,7 @@ function zeroTo999(num) {
508
589
  words += numbers.tens[Math.floor(num / 10)];
509
590
  num %= 10;
510
591
  if (num > 0) {
511
- words += " " + numbers.ones[num];
592
+ words += "-" + numbers.ones[num];
512
593
  }
513
594
  } else if (num >= 10) {
514
595
  words += numbers.teens[num - 10];
@@ -1024,9 +1105,32 @@ var convertQuantities = {
1024
1105
  case "number":
1025
1106
  result = convertNumberToSpokenWord(extractionResult);
1026
1107
  break;
1108
+ case "abbreviation": {
1109
+ const abbrev = extractionResult.symbol;
1110
+ result = abbreviationMap[abbrev] || abbrev;
1111
+ break;
1112
+ }
1113
+ case "versionedNumber": {
1114
+ const vNum = extractionResult;
1115
+ const prefixChar = vNum.symbol || "";
1116
+ const numericPart = convertNumberToSpokenWord({
1117
+ integer: vNum.integer,
1118
+ negativeInt: false,
1119
+ input: vNum.integer,
1120
+ index: vNum.index
1121
+ });
1122
+ result = `${prefixChar} ${numericPart}`;
1123
+ break;
1124
+ }
1125
+ case "commonSymbol":
1126
+ result = commonSymbols[extractionResult.symbol] || "";
1127
+ break;
1027
1128
  case "symbolCurrency":
1028
1129
  result = convertCurrencyToSpokenWord(extractionResult);
1029
1130
  break;
1131
+ case "trailingSymbolCurrency":
1132
+ result = convertCurrencyToSpokenWord(extractionResult);
1133
+ break;
1030
1134
  case "codeCurrency":
1031
1135
  result = convertCurrencyToSpokenWord(extractionResult);
1032
1136
  break;
@@ -1081,7 +1185,6 @@ var convertQuantities = {
1081
1185
  return output.replace(/\s+/g, " ").trim();
1082
1186
  }
1083
1187
  };
1084
- console.log(convertQuantities.autoReplaceAllMatches("25 EUR"));
1085
1188
 
1086
1189
  exports.convertQuantities = convertQuantities;
1087
1190
  exports.extractQuantities = extractQuantities;