npm - resuml - Versions diffs - 1.12.0 → 1.12.1 - Mend

resuml 1.12.0 → 1.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.cjs CHANGED Viewed

@@ -762,7 +762,71 @@ var init_en = __esm({
         "process",
         "robust",
         "consistent",
-        "operations"
+        "operations",
+        // URL/email/domain fragments
+        "http",
+        "https",
+        "www",
+        "com",
+        "org",
+        "net",
+        "mailto",
+        // Resume/YAML schema field names (in case raw YAML is pasted)
+        "name",
+        "keywords",
+        "highlights",
+        "startdate",
+        "enddate",
+        "website",
+        "profiles",
+        "basics",
+        "position",
+        "institution",
+        "studytype",
+        "fluency",
+        "issuer",
+        "network",
+        "username",
+        "countrycode",
+        "region",
+        // Generic nouns that aren't skills
+        "product",
+        "company",
+        "service",
+        "services",
+        "platform",
+        "solutions",
+        "ability",
+        "opportunity",
+        "candidate",
+        "applicant",
+        "position",
+        "salary",
+        "compensation",
+        "benefits",
+        "perks",
+        "bonus",
+        "development",
+        "management",
+        "knowledge",
+        "modern",
+        "advanced",
+        "practices",
+        "nice",
+        "technologies",
+        "technology",
+        "frameworks",
+        "framework",
+        "tools",
+        "data",
+        "based",
+        "contribute",
+        "contributions",
+        "migration",
+        "leading",
+        "source",
+        "visit",
+        "join"
       ]
     };
     en_default = en;
@@ -1333,8 +1397,20 @@ var init_genericChecks = __esm({
 });
 // src/ats/jdMatcher.ts
+function stripNoise(text) {
+  return text.replace(/https?:\/\/[^\s]+/gi, " ").replace(/www\.[^\s]+/gi, " ").replace(/[\w.+-]+@[\w.-]+\.[a-z]{2,}/gi, " ").replace(/(?:^|\s)\/[\w/.-]+/g, " ").replace(/\b[a-z]+[A-Z][a-zA-Z]*\b/g, (match2) => {
+    return match2.replace(/([a-z])([A-Z])/g, "$1 $2");
+  });
+}
 function tokenize(text, stopWords) {
-  return text.toLowerCase().replace(/[^a-zA-Z0-9äöüßÄÖÜàáâãéèêëíìîïóòôõúùûüñç\s/+-]/g, " ").split(/\s+/).filter((word) => word.length > 2 && !stopWords.has(word));
+  return text.toLowerCase().replace(/[^a-zA-Z0-9äöüßÄÖÜàáâãéèêëíìîïóòôõúùûüñç\s/+-]/g, " ").split(/\s+/).filter((word) => {
+    if (word.length <= 2) return false;
+    if (stopWords.has(word)) return false;
+    if (word.startsWith("//") || word.startsWith("http")) return false;
+    if (/^\d+$/.test(word)) return false;
+    if (/^[/+-]+$/.test(word)) return false;
+    return true;
+  });
 }
 function simpleStem(word, language) {
   if (language === "de") {
@@ -1496,12 +1572,17 @@ function extractBrandNames(text) {
 function extractKeywords(text, language, maxKeywords = 30) {
   const langData = getLanguageData(language);
   const stopWords = new Set(langData.stopWords);
-  const compoundTerms = extractCompoundTerms(text);
+  const cleanText = stripNoise(text);
+  const compoundTerms = extractCompoundTerms(cleanText);
   const brandNames = extractBrandNames(text);
-  const { requirementText, otherText } = splitJdSections(text);
-  const reqTokens = tokenize(requirementText, stopWords).filter((t) => !brandNames.has(t));
-  const otherTokens = tokenize(otherText, stopWords).filter((t) => !brandNames.has(t));
-  const allTokens = [...reqTokens, ...reqTokens, ...reqTokens, ...otherTokens];
+  const { requirementText } = splitJdSections(cleanText);
+  const hasRequirementSections = requirementText.trim().length > 0;
+  let allTokens;
+  if (hasRequirementSections) {
+    allTokens = tokenize(requirementText, stopWords).filter((t) => !brandNames.has(t));
+  } else {
+    allTokens = tokenize(cleanText, stopWords).filter((t) => !brandNames.has(t));
+  }
   const stemmed = allTokens.map((t) => simpleStem(t, language));
   const tf = buildTfMap(stemmed);
   const stemToOriginal = /* @__PURE__ */ new Map();
@@ -1515,7 +1596,7 @@ function extractKeywords(text, language, maxKeywords = 30) {
   const compoundWordSet = new Set(compoundsFlat);
   const singleKeywords = [...tf.entries()].filter(([stem]) => stem.length > 2).filter(([stem]) => {
     const original = stemToOriginal.get(stem) || stem;
-    if (compoundWordSet.has(original) && !reqTokens.includes(original)) {
+    if (compoundWordSet.has(original) && !allTokens.includes(original)) {
       return false;
     }
     return true;