resuml 1.12.0 → 1.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -762,7 +762,206 @@ var init_en = __esm({
762
762
  "process",
763
763
  "robust",
764
764
  "consistent",
765
- "operations"
765
+ "operations",
766
+ // URL/email/domain fragments
767
+ "http",
768
+ "https",
769
+ "www",
770
+ "com",
771
+ "org",
772
+ "net",
773
+ "mailto",
774
+ // Resume/YAML schema field names (in case raw YAML is pasted)
775
+ "name",
776
+ "keywords",
777
+ "highlights",
778
+ "startdate",
779
+ "enddate",
780
+ "website",
781
+ "profiles",
782
+ "basics",
783
+ "position",
784
+ "institution",
785
+ "studytype",
786
+ "fluency",
787
+ "issuer",
788
+ "network",
789
+ "username",
790
+ "countrycode",
791
+ "region",
792
+ // Generic nouns that aren't skills
793
+ "product",
794
+ "company",
795
+ "service",
796
+ "services",
797
+ "platform",
798
+ "solutions",
799
+ "ability",
800
+ "opportunity",
801
+ "candidate",
802
+ "applicant",
803
+ "position",
804
+ "salary",
805
+ "compensation",
806
+ "benefits",
807
+ "perks",
808
+ "bonus",
809
+ "development",
810
+ "management",
811
+ "knowledge",
812
+ "modern",
813
+ "advanced",
814
+ "practices",
815
+ "nice",
816
+ "technologies",
817
+ "technology",
818
+ "frameworks",
819
+ "framework",
820
+ "tools",
821
+ "data",
822
+ "based",
823
+ "contribute",
824
+ "contributions",
825
+ "migration",
826
+ "leading",
827
+ "source",
828
+ "visit",
829
+ // Common verbs & verb forms (not technical skills, supplement action verbs list)
830
+ "collaborate",
831
+ "collaborating",
832
+ "collaboratively",
833
+ "communicate",
834
+ "communicating",
835
+ "contributing",
836
+ "coordinate",
837
+ "coordinating",
838
+ "demonstrate",
839
+ "demonstrating",
840
+ "design",
841
+ "designing",
842
+ "designed",
843
+ "develop",
844
+ "developing",
845
+ "developed",
846
+ "drive",
847
+ "driving",
848
+ "driven",
849
+ "enable",
850
+ "enabling",
851
+ "evaluate",
852
+ "evaluating",
853
+ "execute",
854
+ "executing",
855
+ "facilitate",
856
+ "facilitating",
857
+ "identify",
858
+ "identifying",
859
+ "influence",
860
+ "influencing",
861
+ "interact",
862
+ "interacting",
863
+ "lead",
864
+ "leverage",
865
+ "leveraging",
866
+ "manage",
867
+ "managing",
868
+ "mentor",
869
+ "mentoring",
870
+ "operate",
871
+ "operating",
872
+ "optimize",
873
+ "optimizing",
874
+ "participate",
875
+ "participating",
876
+ "report",
877
+ "reporting",
878
+ "solve",
879
+ "solving",
880
+ "understand",
881
+ "understanding",
882
+ // Common adjectives & descriptors (not technical skills)
883
+ "fluent",
884
+ "proficient",
885
+ "deep",
886
+ "solid",
887
+ "proven",
888
+ "hands-on",
889
+ "detail-oriented",
890
+ "results-driven",
891
+ "self-motivated",
892
+ "proactive",
893
+ "creative",
894
+ "innovative",
895
+ "dynamic",
896
+ "strategic",
897
+ "analytical",
898
+ "collaborative",
899
+ "effective",
900
+ "efficient",
901
+ "reliable",
902
+ "flexible",
903
+ "adaptable",
904
+ "motivated",
905
+ "dedicated",
906
+ "capable",
907
+ "qualified",
908
+ "diverse",
909
+ "inclusive",
910
+ "global",
911
+ "local",
912
+ "remote",
913
+ "hybrid",
914
+ "onsite",
915
+ "full-time",
916
+ "part-time",
917
+ "contract",
918
+ "permanent",
919
+ // Role titles & department names (not skills themselves)
920
+ "designer",
921
+ "designers",
922
+ "developer",
923
+ "developers",
924
+ "engineer",
925
+ "engineers",
926
+ "manager",
927
+ "managers",
928
+ "director",
929
+ "analyst",
930
+ "analysts",
931
+ "architect",
932
+ "architects",
933
+ "consultant",
934
+ "consultants",
935
+ "specialist",
936
+ "specialists",
937
+ "coordinator",
938
+ "lead",
939
+ "principal",
940
+ "staff",
941
+ "junior",
942
+ "mid",
943
+ "department",
944
+ "organization",
945
+ "division",
946
+ "stakeholder",
947
+ "stakeholders",
948
+ "client",
949
+ "clients",
950
+ "customer",
951
+ "customers",
952
+ // Date & time words
953
+ "date",
954
+ "dates",
955
+ "month",
956
+ "months",
957
+ "week",
958
+ "weeks",
959
+ "daily",
960
+ "weekly",
961
+ "monthly",
962
+ "quarterly",
963
+ "annual",
964
+ "annually"
766
965
  ]
767
966
  };
768
967
  en_default = en;
@@ -1333,8 +1532,20 @@ var init_genericChecks = __esm({
1333
1532
  });
1334
1533
 
1335
1534
  // src/ats/jdMatcher.ts
1535
+ function stripNoise(text) {
1536
+ return text.replace(/https?:\/\/[^\s]+/gi, " ").replace(/www\.[^\s]+/gi, " ").replace(/[\w.+-]+@[\w.-]+\.[a-z]{2,}/gi, " ").replace(/(?:^|\s)\/[\w/.-]+/g, " ").replace(/\b[a-z]+[A-Z][a-zA-Z]*\b/g, (match2) => {
1537
+ return match2.replace(/([a-z])([A-Z])/g, "$1 $2");
1538
+ });
1539
+ }
1336
1540
  function tokenize(text, stopWords) {
1337
- return text.toLowerCase().replace(/[^a-zA-Z0-9äöüßÄÖÜàáâãéèêëíìîïóòôõúùûüñç\s/+-]/g, " ").split(/\s+/).filter((word) => word.length > 2 && !stopWords.has(word));
1541
+ return text.toLowerCase().replace(/[^a-zA-Z0-9äöüßÄÖÜàáâãéèêëíìîïóòôõúùûüñç\s/+-]/g, " ").split(/\s+/).filter((word) => {
1542
+ if (word.length <= 2) return false;
1543
+ if (stopWords.has(word)) return false;
1544
+ if (word.startsWith("//") || word.startsWith("http")) return false;
1545
+ if (/^\d+$/.test(word)) return false;
1546
+ if (/^[/+-]+$/.test(word)) return false;
1547
+ return true;
1548
+ });
1338
1549
  }
1339
1550
  function simpleStem(word, language) {
1340
1551
  if (language === "de") {
@@ -1491,17 +1702,84 @@ function extractBrandNames(text) {
1491
1702
  "firefox"
1492
1703
  ];
1493
1704
  for (const b of knownBrands) brands.add(b);
1705
+ const locations = [
1706
+ "zurich",
1707
+ "z\xFCrich",
1708
+ "berlin",
1709
+ "london",
1710
+ "paris",
1711
+ "amsterdam",
1712
+ "munich",
1713
+ "m\xFCnchen",
1714
+ "new york",
1715
+ "san francisco",
1716
+ "seattle",
1717
+ "austin",
1718
+ "boston",
1719
+ "chicago",
1720
+ "toronto",
1721
+ "vancouver",
1722
+ "singapore",
1723
+ "tokyo",
1724
+ "sydney",
1725
+ "dublin",
1726
+ "bangalore",
1727
+ "hyderabad",
1728
+ "remote",
1729
+ "hybrid",
1730
+ "onsite",
1731
+ "switzerland",
1732
+ "germany",
1733
+ "france",
1734
+ "spain",
1735
+ "italy",
1736
+ "netherlands",
1737
+ "united states",
1738
+ "united kingdom",
1739
+ "canada",
1740
+ "australia",
1741
+ "india",
1742
+ "japan",
1743
+ "china",
1744
+ "brazil",
1745
+ "israel",
1746
+ "sweden",
1747
+ "norway",
1748
+ "denmark",
1749
+ "finland",
1750
+ "austria",
1751
+ "belgium",
1752
+ "portugal",
1753
+ "ireland",
1754
+ "poland"
1755
+ ];
1756
+ for (const loc of locations) {
1757
+ brands.add(loc);
1758
+ for (const part of loc.split(/\s+/)) {
1759
+ if (part.length > 2) brands.add(part);
1760
+ }
1761
+ }
1494
1762
  return brands;
1495
1763
  }
1496
- function extractKeywords(text, language, maxKeywords = 30) {
1764
+ function extractKeywords(text, language, maxKeywords = 30, extraStopWords) {
1497
1765
  const langData = getLanguageData(language);
1498
- const stopWords = new Set(langData.stopWords);
1499
- const compoundTerms = extractCompoundTerms(text);
1766
+ const stopWords = /* @__PURE__ */ new Set([
1767
+ ...langData.stopWords,
1768
+ ...langData.actionVerbs,
1769
+ ...langData.actionVerbs.map((v) => simpleStem(v, language)),
1770
+ ...extraStopWords || []
1771
+ ]);
1772
+ const cleanText = stripNoise(text);
1773
+ const compoundTerms = extractCompoundTerms(cleanText);
1500
1774
  const brandNames = extractBrandNames(text);
1501
- const { requirementText, otherText } = splitJdSections(text);
1502
- const reqTokens = tokenize(requirementText, stopWords).filter((t) => !brandNames.has(t));
1503
- const otherTokens = tokenize(otherText, stopWords).filter((t) => !brandNames.has(t));
1504
- const allTokens = [...reqTokens, ...reqTokens, ...reqTokens, ...otherTokens];
1775
+ const { requirementText } = splitJdSections(cleanText);
1776
+ const hasRequirementSections = requirementText.trim().length > 0;
1777
+ let allTokens;
1778
+ if (hasRequirementSections) {
1779
+ allTokens = tokenize(requirementText, stopWords).filter((t) => !brandNames.has(t));
1780
+ } else {
1781
+ allTokens = tokenize(cleanText, stopWords).filter((t) => !brandNames.has(t));
1782
+ }
1505
1783
  const stemmed = allTokens.map((t) => simpleStem(t, language));
1506
1784
  const tf = buildTfMap(stemmed);
1507
1785
  const stemToOriginal = /* @__PURE__ */ new Map();
@@ -1515,7 +1793,7 @@ function extractKeywords(text, language, maxKeywords = 30) {
1515
1793
  const compoundWordSet = new Set(compoundsFlat);
1516
1794
  const singleKeywords = [...tf.entries()].filter(([stem]) => stem.length > 2).filter(([stem]) => {
1517
1795
  const original = stemToOriginal.get(stem) || stem;
1518
- if (compoundWordSet.has(original) && !reqTokens.includes(original)) {
1796
+ if (compoundWordSet.has(original) && !allTokens.includes(original)) {
1519
1797
  return false;
1520
1798
  }
1521
1799
  return true;
@@ -1533,7 +1811,19 @@ function extractKeywords(text, language, maxKeywords = 30) {
1533
1811
  function matchJobDescription(resume, jobDescription, language = "en") {
1534
1812
  const langData = getLanguageData(language);
1535
1813
  const stopWords = new Set(langData.stopWords);
1536
- const jdKeywords = extractKeywords(jobDescription, language);
1814
+ const nameStopWords = /* @__PURE__ */ new Set();
1815
+ if (resume.basics?.name) {
1816
+ for (const part of resume.basics.name.toLowerCase().split(/\s+/)) {
1817
+ if (part.length > 2) nameStopWords.add(part);
1818
+ }
1819
+ }
1820
+ if (resume.basics?.location?.city) {
1821
+ nameStopWords.add(resume.basics.location.city.toLowerCase());
1822
+ }
1823
+ if (resume.basics?.location?.region) {
1824
+ nameStopWords.add(resume.basics.location.region.toLowerCase());
1825
+ }
1826
+ const jdKeywords = extractKeywords(jobDescription, language, 30, nameStopWords);
1537
1827
  const resumeText = extractResumeText(resume);
1538
1828
  const resumeTokens = tokenize(resumeText, stopWords);
1539
1829
  const resumeStems = new Set(resumeTokens.map((t) => simpleStem(t, language)));