resuml 1.11.0 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -481,6 +481,7 @@ var init_en = __esm({
481
481
  ],
482
482
  pronouns: ["i", "me", "my", "mine", "myself", "we", "our", "ours"],
483
483
  stopWords: [
484
+ // Articles & determiners
484
485
  "a",
485
486
  "an",
486
487
  "the",
@@ -551,7 +552,217 @@ var init_en = __esm({
551
552
  "such",
552
553
  "than",
553
554
  "too",
554
- "very"
555
+ "very",
556
+ // Pronouns & possessives (also checked by pronoun check, but filter from JD keywords)
557
+ "you",
558
+ "your",
559
+ "yours",
560
+ "yourself",
561
+ "we",
562
+ "our",
563
+ "ours",
564
+ "ourselves",
565
+ "they",
566
+ "them",
567
+ "their",
568
+ "theirs",
569
+ "he",
570
+ "she",
571
+ "his",
572
+ "her",
573
+ "hers",
574
+ "who",
575
+ "whom",
576
+ "whose",
577
+ "which",
578
+ "what",
579
+ "where",
580
+ "when",
581
+ "how",
582
+ "why",
583
+ // Common JD filler words (not meaningful for skill matching)
584
+ "able",
585
+ "also",
586
+ "across",
587
+ "already",
588
+ "always",
589
+ "among",
590
+ "any",
591
+ "apply",
592
+ "become",
593
+ "believe",
594
+ "best",
595
+ "bring",
596
+ "change",
597
+ "come",
598
+ "committed",
599
+ "company",
600
+ "comfortable",
601
+ "critical",
602
+ "current",
603
+ "day",
604
+ "desired",
605
+ "either",
606
+ "end",
607
+ "ensure",
608
+ "environment",
609
+ "equal",
610
+ "even",
611
+ "excellent",
612
+ "exciting",
613
+ "exceptional",
614
+ "expected",
615
+ "experience",
616
+ "fast",
617
+ "field",
618
+ "find",
619
+ "first",
620
+ "focused",
621
+ "follow",
622
+ "get",
623
+ "give",
624
+ "go",
625
+ "going",
626
+ "good",
627
+ "great",
628
+ "group",
629
+ "grow",
630
+ "growing",
631
+ "growth",
632
+ "help",
633
+ "here",
634
+ "high",
635
+ "highly",
636
+ "ideal",
637
+ "impact",
638
+ "important",
639
+ "include",
640
+ "includes",
641
+ "including",
642
+ "industry",
643
+ "interested",
644
+ "job",
645
+ "join",
646
+ "just",
647
+ "keep",
648
+ "key",
649
+ "know",
650
+ "large",
651
+ "latest",
652
+ "lead",
653
+ "level",
654
+ "like",
655
+ "location",
656
+ "long",
657
+ "look",
658
+ "looking",
659
+ "love",
660
+ "make",
661
+ "many",
662
+ "much",
663
+ "must",
664
+ "need",
665
+ "new",
666
+ "next",
667
+ "offer",
668
+ "one",
669
+ "only",
670
+ "open",
671
+ "opportunity",
672
+ "order",
673
+ "others",
674
+ "own",
675
+ "pace",
676
+ "part",
677
+ "partner",
678
+ "passionate",
679
+ "people",
680
+ "per",
681
+ "play",
682
+ "plus",
683
+ "position",
684
+ "preferred",
685
+ "provide",
686
+ "put",
687
+ "qualifications",
688
+ "quickly",
689
+ "range",
690
+ "related",
691
+ "required",
692
+ "requirements",
693
+ "requirement",
694
+ "responsible",
695
+ "responsibilities",
696
+ "responsibility",
697
+ "result",
698
+ "right",
699
+ "role",
700
+ "run",
701
+ "same",
702
+ "see",
703
+ "seek",
704
+ "seeking",
705
+ "set",
706
+ "several",
707
+ "since",
708
+ "skills",
709
+ "someone",
710
+ "start",
711
+ "state",
712
+ "still",
713
+ "strong",
714
+ "success",
715
+ "successful",
716
+ "support",
717
+ "sure",
718
+ "take",
719
+ "team",
720
+ "then",
721
+ "there",
722
+ "thing",
723
+ "think",
724
+ "through",
725
+ "time",
726
+ "together",
727
+ "top",
728
+ "truly",
729
+ "try",
730
+ "two",
731
+ "type",
732
+ "use",
733
+ "used",
734
+ "using",
735
+ "value",
736
+ "want",
737
+ "way",
738
+ "well",
739
+ "while",
740
+ "within",
741
+ "without",
742
+ "work",
743
+ "working",
744
+ "world",
745
+ "would",
746
+ "year",
747
+ "years",
748
+ // Section headers & structural words (not technical skills)
749
+ "description",
750
+ "overview",
751
+ "summary",
752
+ "duties",
753
+ "bachelor",
754
+ "bachelors",
755
+ "master",
756
+ "masters",
757
+ "degree",
758
+ "phd",
759
+ "minimum",
760
+ "preferred",
761
+ "implement",
762
+ "process",
763
+ "robust",
764
+ "consistent",
765
+ "operations"
555
766
  ]
556
767
  };
557
768
  en_default = en;
@@ -1123,7 +1334,7 @@ var init_genericChecks = __esm({
1123
1334
 
1124
1335
  // src/ats/jdMatcher.ts
1125
1336
  function tokenize(text, stopWords) {
1126
- return text.toLowerCase().replace(/[^a-zA-Z0-9äöüßÄÖÜàáâãéèêëíìîïóòôõúùûüñç\s-]/g, " ").split(/\s+/).filter((word) => word.length > 2 && !stopWords.has(word));
1337
+ return text.toLowerCase().replace(/[^a-zA-Z0-9äöüßÄÖÜàáâãéèêëíìîïóòôõúùûüñç\s/+-]/g, " ").split(/\s+/).filter((word) => word.length > 2 && !stopWords.has(word));
1127
1338
  }
1128
1339
  function simpleStem(word, language) {
1129
1340
  if (language === "de") {
@@ -1166,20 +1377,158 @@ function buildTfMap(tokens) {
1166
1377
  }
1167
1378
  return tf;
1168
1379
  }
1380
+ function splitJdSections(text) {
1381
+ const lines = text.split("\n");
1382
+ const reqPatterns = /^(required|requirements?|minimum|preferred|qualifications?|must[\s-]have|nice[\s-]to[\s-]have|what you.?ll|what we.?re looking|skills|technical|you.?ll need|responsibilities)/i;
1383
+ let inReqSection = false;
1384
+ const reqLines = [];
1385
+ const otherLines = [];
1386
+ for (const line of lines) {
1387
+ const trimmed = line.trim();
1388
+ if (reqPatterns.test(trimmed.replace(/[:#*-]/g, "").trim())) {
1389
+ inReqSection = true;
1390
+ } else if (/^(about|summary|who we are|our (company|team|mission)|description|overview|benefits|perks|compensation|salary)/i.test(trimmed.replace(/[:#*-]/g, "").trim())) {
1391
+ inReqSection = false;
1392
+ }
1393
+ if (inReqSection) {
1394
+ reqLines.push(line);
1395
+ } else {
1396
+ otherLines.push(line);
1397
+ }
1398
+ }
1399
+ return {
1400
+ requirementText: reqLines.join("\n"),
1401
+ otherText: otherLines.join("\n")
1402
+ };
1403
+ }
1404
+ function extractCompoundTerms(text) {
1405
+ const patterns = [
1406
+ /\b(machine\s+learning)\b/gi,
1407
+ /\b(deep\s+learning)\b/gi,
1408
+ /\b(computer\s+vision)\b/gi,
1409
+ /\b(natural\s+language\s+processing)\b/gi,
1410
+ /\b(data\s+pipelines?)\b/gi,
1411
+ /\b(data\s+models?)\b/gi,
1412
+ /\b(data\s+engineering)\b/gi,
1413
+ /\b(data\s+structures?)\b/gi,
1414
+ /\b(data\s+quality)\b/gi,
1415
+ /\b(data\s+flows?)\b/gi,
1416
+ /\b(data\s+orchestration)\b/gi,
1417
+ /\b(data\s+warehou\w+)\b/gi,
1418
+ /\b(synthetic\s+data)\b/gi,
1419
+ /\b(ci\s*\/?\s*cd)\b/gi,
1420
+ /\b(rest\s+api)\b/gi,
1421
+ /\b(open\s+source)\b/gi,
1422
+ /\b(human[\s-]+in[\s-]+the[\s-]+loop)\b/gi,
1423
+ /\b(self[\s-]+service)\b/gi,
1424
+ /\b(agentic\s+workflows?)\b/gi,
1425
+ /\b(distributed\s+systems?)\b/gi,
1426
+ /\b(cloud\s+infrastructure)\b/gi,
1427
+ /\b(micro\s*services?)\b/gi,
1428
+ /\b(full[\s-]+stack)\b/gi,
1429
+ /\b(front[\s-]*end)\b/gi,
1430
+ /\b(back[\s-]*end)\b/gi,
1431
+ /\b(sql\s*\/?\s*nosql)\b/gi
1432
+ ];
1433
+ const found = [];
1434
+ for (const pattern of patterns) {
1435
+ const matches = text.matchAll(pattern);
1436
+ for (const m of matches) {
1437
+ const term = m[1]?.toLowerCase().replace(/\s+/g, " ").trim();
1438
+ if (term && !found.includes(term)) {
1439
+ found.push(term);
1440
+ }
1441
+ }
1442
+ }
1443
+ return found;
1444
+ }
1445
+ function extractBrandNames(text) {
1446
+ const brands = /* @__PURE__ */ new Set();
1447
+ const brandPatterns = [
1448
+ /\bat\s+([A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*)/g,
1449
+ /(?:^|\.\s+)([A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*)\s+(?:is|are|was|has|Inc|Corp|Ltd|GmbH)/g,
1450
+ /\b(?:join(?:ing)?|about)\s+([A-Z][a-zA-Z]+)/g
1451
+ ];
1452
+ for (const pattern of brandPatterns) {
1453
+ const matches = text.matchAll(pattern);
1454
+ for (const m of matches) {
1455
+ const name = m[1]?.toLowerCase();
1456
+ if (name) {
1457
+ for (const word of name.split(/\s+/)) {
1458
+ if (word.length > 2) brands.add(word);
1459
+ }
1460
+ }
1461
+ }
1462
+ }
1463
+ const knownBrands = [
1464
+ "apple",
1465
+ "google",
1466
+ "meta",
1467
+ "facebook",
1468
+ "amazon",
1469
+ "microsoft",
1470
+ "netflix",
1471
+ "uber",
1472
+ "airbnb",
1473
+ "twitter",
1474
+ "linkedin",
1475
+ "spotify",
1476
+ "stripe",
1477
+ "shopify",
1478
+ "iphone",
1479
+ "ipad",
1480
+ "mac",
1481
+ "macbook",
1482
+ "airpods",
1483
+ "android",
1484
+ "windows",
1485
+ "alexa",
1486
+ "siri",
1487
+ "cortana",
1488
+ "gmail",
1489
+ "chrome",
1490
+ "safari",
1491
+ "firefox"
1492
+ ];
1493
+ for (const b of knownBrands) brands.add(b);
1494
+ return brands;
1495
+ }
1169
1496
  function extractKeywords(text, language, maxKeywords = 30) {
1170
1497
  const langData = getLanguageData(language);
1171
1498
  const stopWords = new Set(langData.stopWords);
1172
- const tokens = tokenize(text, stopWords);
1173
- const stemmed = tokens.map((t) => simpleStem(t, language));
1499
+ const compoundTerms = extractCompoundTerms(text);
1500
+ const brandNames = extractBrandNames(text);
1501
+ const { requirementText, otherText } = splitJdSections(text);
1502
+ const reqTokens = tokenize(requirementText, stopWords).filter((t) => !brandNames.has(t));
1503
+ const otherTokens = tokenize(otherText, stopWords).filter((t) => !brandNames.has(t));
1504
+ const allTokens = [...reqTokens, ...reqTokens, ...reqTokens, ...otherTokens];
1505
+ const stemmed = allTokens.map((t) => simpleStem(t, language));
1174
1506
  const tf = buildTfMap(stemmed);
1175
1507
  const stemToOriginal = /* @__PURE__ */ new Map();
1176
- for (let i = 0; i < tokens.length; i++) {
1508
+ for (let i = 0; i < allTokens.length; i++) {
1177
1509
  const stem = stemmed[i] ?? "";
1178
1510
  if (!stemToOriginal.has(stem)) {
1179
- stemToOriginal.set(stem, tokens[i] ?? "");
1511
+ stemToOriginal.set(stem, allTokens[i] ?? "");
1512
+ }
1513
+ }
1514
+ const compoundsFlat = compoundTerms.join(" ").split(/\s+/);
1515
+ const compoundWordSet = new Set(compoundsFlat);
1516
+ const singleKeywords = [...tf.entries()].filter(([stem]) => stem.length > 2).filter(([stem]) => {
1517
+ const original = stemToOriginal.get(stem) || stem;
1518
+ if (compoundWordSet.has(original) && !reqTokens.includes(original)) {
1519
+ return false;
1520
+ }
1521
+ return true;
1522
+ }).sort((a, b) => b[1] - a[1]).slice(0, maxKeywords - compoundTerms.length).map(([stem]) => stemToOriginal.get(stem) || stem);
1523
+ const seen = /* @__PURE__ */ new Set();
1524
+ const keywords = [];
1525
+ for (const term of [...compoundTerms, ...singleKeywords]) {
1526
+ if (!seen.has(term)) {
1527
+ seen.add(term);
1528
+ keywords.push(term);
1180
1529
  }
1181
1530
  }
1182
- return [...tf.entries()].filter(([stem]) => stem.length > 2).sort((a, b) => b[1] - a[1]).slice(0, maxKeywords).map(([stem]) => stemToOriginal.get(stem) || stem);
1531
+ return keywords.slice(0, maxKeywords);
1183
1532
  }
1184
1533
  function matchJobDescription(resume, jobDescription, language = "en") {
1185
1534
  const langData = getLanguageData(language);
@@ -1192,11 +1541,24 @@ function matchJobDescription(resume, jobDescription, language = "en") {
1192
1541
  const matched = [];
1193
1542
  const missing = [];
1194
1543
  for (const keyword of jdKeywords) {
1195
- const stem = simpleStem(keyword, language);
1196
- if (resumeStems.has(stem) || resumeTokenSet.has(keyword.toLowerCase())) {
1197
- matched.push(keyword);
1544
+ if (keyword.includes(" ")) {
1545
+ const parts = keyword.split(/\s+/);
1546
+ const allPartsMatch = parts.every((part) => {
1547
+ const stem = simpleStem(part, language);
1548
+ return resumeStems.has(stem) || resumeTokenSet.has(part.toLowerCase());
1549
+ });
1550
+ if (allPartsMatch) {
1551
+ matched.push(keyword);
1552
+ } else {
1553
+ missing.push(keyword);
1554
+ }
1198
1555
  } else {
1199
- missing.push(keyword);
1556
+ const stem = simpleStem(keyword, language);
1557
+ if (resumeStems.has(stem) || resumeTokenSet.has(keyword.toLowerCase())) {
1558
+ matched.push(keyword);
1559
+ } else {
1560
+ missing.push(keyword);
1561
+ }
1200
1562
  }
1201
1563
  }
1202
1564
  const matchPercentage = jdKeywords.length > 0 ? Math.round(matched.length / jdKeywords.length * 100) : 0;