npm - @pranavraut033/ats-checker - Versions diffs - 1.3.0 → 1.3.2 - Mend

@pranavraut033/ats-checker 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md CHANGED Viewed

@@ -52,9 +52,12 @@ import { analyzeResume } from "@pranavraut033/ats-checker";
 const result = analyzeResume({
   resumeText: `
     Software Engineer with 5 years of experience.
-    Skills: JavaScript, TypeScript, React, Node.js, SQL
-    Experience: Senior Engineer at ExampleCorp (Jan 2020 - Present)
-    Education: B.S. Computer Science
+    Skills
+    JavaScript, TypeScript, React, Node.js, SQL
+    Experience
+    Senior Engineer at ExampleCorp (Jan 2020 - Present)
+    Education
+    B.S. Computer Science
   `,
   jobDescription: `
     Frontend engineer role. Must have React, TypeScript, accessibility best practices.
@@ -63,11 +66,11 @@ const result = analyzeResume({
   config: { referenceDate: "2026-01-01" }, // freeze clock for reproducible scores
 });
-console.log(result.score);            // e.g. 72.45
+console.log(result.score);            // 44.44
 console.log(result.matchedSkills);    // ["javascript", "node", "react", "typescript"]
-console.log(result.missingSkills);    // ["accessibility best practices", "graphql"]
+console.log(result.missingSkills);    // ["accessibility", "frontend", "graphql"]
 console.log(result.experienceGap);    // 0 (requirement met)
-console.log(result.suggestions);      // ["Add GraphQL to your skills section", ...]
+console.log(result.suggestions);      // ["Highlight these required skills: accessibility, frontend, graphql", ...]
 ```
 ---
@@ -102,6 +105,10 @@ console.log(result.suggestions);      // ["Add GraphQL to your skills section",
 The `keywords` sub-score is a **weighted** coverage ratio, not a flat count: each JD keyword gets a weight from its location (required > preferred > body text) and frequency, so missing a required keyword drops the score more than missing one mentioned once in the body.
+> **Caveat — malformed/copy-pasted JD text:** required/preferred detection scans each line for literal trigger phrases (`required`, `must`, `nice to have`, `preferred`). Job postings copy-pasted from a wrapped/columned source sometimes split words across line breaks (e.g. `"Nice to\n\nhaveExperience..."`), which breaks these phrases across two lines and silently drops them into the unweighted body-keyword bucket instead of required/preferred. Skill keywords themselves (e.g. `react`, `python/fastapi`) are still picked up via the whole-text token scan and unaffected. If a JD looks oddly broken, paste it through a plain-text cleanup pass first, or expect required/preferred weighting to under-count.
+The `education` sub-score normalizes degree abbreviations on both sides to a canonical level (`bachelor`, `master`, `phd`, `mba`, `associate`) before comparing — so a resume listing "B.S. Computer Science" satisfies a JD requiring "Bachelor's degree".
 ---
 ## Configuration
@@ -288,6 +295,23 @@ if (result.warnings.length) {
 }
 ```
+### OCR fallback for scanned PDFs
+`extractTextFromPDF` accepts an optional `ocrFallback` that's only invoked when the text layer comes back too short (default threshold: 100 chars). The OCR engine and its dependency are entirely your choice — the core library never bundles one:
+```typescript
+const resumeText = await extractTextFromPDF(bytes, {
+  ocrFallback: async (data) => {
+    // bring your own OCR engine, e.g. tesseract.js or a cloud OCR API
+    const { recognize } = await import("tesseract.js");
+    const { data: { text } } = await recognize(data, "eng");
+    return text;
+  },
+});
+```
+If `ocrFallback` throws or returns text that isn't longer than the text-layer result, `extractTextFromPDF` silently keeps the original result — OCR failures never break the deterministic extraction path.
 ---
 ## LLM Integration (deprecated)

package/dist/index.cjs CHANGED Viewed

@@ -282,15 +282,38 @@ var LEVEL_RANK = {
   fluent: 5,
   native: 6,
   "native speaker": 6,
-  bilingual: 6
+  bilingual: 6,
+  // German
+  grundkenntnisse: 1,
+  gering: 2,
+  gut: 3,
+  fortgeschritten: 4,
+  flie\u00DFend: 5,
+  muttersprache: 6,
+  muttersprachler: 6,
+  // French
+  "d\xE9butant": 1,
+  "\xE9l\xE9mentaire": 1,
+  "limit\xE9": 2,
+  "interm\xE9diaire": 3,
+  "avanc\xE9": 4,
+  courant: 5,
+  natif: 6,
+  "langue maternelle": 6,
+  bilingue: 6
 };
 var LANGUAGE_GROUP = KNOWN_LANGUAGES.join("|");
 var LEVEL_GROUP = Object.keys(LEVEL_RANK).sort((a, b) => b.length - a.length).map((l) => l.replace(/\s+/g, "\\s+")).join("|");
+var BOUNDARY_START = "(?:^|(?<=[^a-z\xE0-\xFF]))";
+var BOUNDARY_END = "(?:$|(?=[^a-z\xE0-\xFF]))";
 var LANGUAGE_LEVEL_RE = new RegExp(
-  `\\b(${LANGUAGE_GROUP})\\b(?:\\s*[\\(:\\-]?\\s*(${LEVEL_GROUP}|[abc][12]))?`,
+  `\\b(${LANGUAGE_GROUP})\\b(?:\\s*[\\(:\\-]?\\s*(${BOUNDARY_START}(?:${LEVEL_GROUP})${BOUNDARY_END}|[abc][12]))?`,
+  "gi"
+);
+var LEVEL_BEFORE_LANGUAGE_RE = new RegExp(
+  `${BOUNDARY_START}(${LEVEL_GROUP})${BOUNDARY_END}\\s+(?:in\\s+)?(${LANGUAGE_GROUP})\\b`,
   "gi"
 );
-var LEVEL_BEFORE_LANGUAGE_RE = new RegExp(`\\b(${LEVEL_GROUP})\\s+(?:in\\s+)?(${LANGUAGE_GROUP})\\b`, "gi");
 function canonicalLanguage(name) {
   const lower = name.toLowerCase();
   return LANGUAGE_ALIASES[lower] ?? lower;
@@ -340,9 +363,9 @@ function diffLanguages(resumeLanguages, requiredLanguages) {
 // src/core/parser/jd.parser.ts
 var DEGREE_VARIANTS = [
-  [/\b(?:bachelor(?:'s)?|b\.s\.?|bs\.?|bsc\.?)\b/i, "bachelor"],
-  [/\b(?:master(?:'s)?|m\.s\.?|ms\.?|msc\.?)\b/i, "master"],
-  [/\b(?:phd|ph\.d\.?|doctorate)\b/i, "phd"],
+  [/\b(?:bachelor(?:'s)?|b\.s\.?|bs\.?|bsc\.?|licence)\b/i, "bachelor"],
+  [/\b(?:master(?:'s)?|m\.s\.?|ms\.?|msc\.?|diplom)\b/i, "master"],
+  [/\b(?:phd|ph\.d\.?|doctorate|doktor|doctorat)\b/i, "phd"],
   [/\bmba\b/i, "mba"],
   [/\bassociate(?:'s)?\b/i, "associate"]
 ];
@@ -365,16 +388,15 @@ function extractPreferredSkills(lines) {
   return preferred;
 }
 function extractRoleKeywords(text) {
-  const roleMatch = text.match(/(engineer|developer|manager|scientist|analyst|designer|architect)/i);
-  const titleTokens = roleMatch ? roleMatch[0].split(/\s+/) : [];
-  return unique(tokenize(titleTokens.join(" ") || text.split(/\n/)[0] || ""));
+  const roleMatches = text.match(/(engineer|developer|manager|scientist|analyst|designer|architect|director|consultant|lead|vp)/gi) ?? [];
+  const fallback = roleMatches.length === 0 ? [text.split(/\n/)[0] ?? ""] : [];
+  return unique(tokenize([...roleMatches, ...fallback].join(" ")));
 }
 function extractMinExperience(text) {
-  const match = text.match(/(\d{1,2})\+?\s+(?:years|yrs)/i);
-  if (match) {
-    return Number.parseInt(match[1], 10);
-  }
-  return void 0;
+  const match = text.match(/(\d{1,2})\+?\s*(?:years?|yrs\.?|jahre?|ans?|années?)/i);
+  if (!match) return void 0;
+  const parsed = Number.parseInt(match[1], 10);
+  return parsed <= 60 ? parsed : void 0;
 }
 var SURFACE_TOKEN_RE = /[a-z0-9][a-z0-9.#+\-/]*[a-z0-9#+]/gi;
 function collectKeywordSurfaceForms(rawText, aliases) {
@@ -388,7 +410,16 @@ function collectKeywordSurfaceForms(rawText, aliases) {
   }
   return surfaceForms;
 }
-function extractEducationRequirements(text) {
+var LANG_SECTION_RE = /^\s*(?:languages?|sprache|langue)s?\s*[:\-–—]?\s*/i;
+var LANG_REQUIREMENT_HINT_RE = /\b(fluent|required|must|need|speak|proficient|native|conversational|intermediate|advanced|professional|[abc][12])\b/i;
+function isLanguageRequired(lang, jobDescription) {
+  return splitLines(jobDescription).some((line) => {
+    const lower = line.toLowerCase();
+    if (!lower.includes(lang.name)) return false;
+    return LANG_SECTION_RE.test(line) || LANG_REQUIREMENT_HINT_RE.test(line);
+  });
+}
+function extractDegreeLevels(text) {
   const found = /* @__PURE__ */ new Set();
   for (const [pattern, canonical] of DEGREE_VARIANTS) {
     if (pattern.test(text)) found.add(canonical);
@@ -426,11 +457,13 @@ function parseJobDescription(jobDescription, config) {
     roleKeywords,
     keywords,
     minExperienceYears: extractMinExperience(jobDescription),
-    educationRequirements: extractEducationRequirements(jobDescription),
+    educationRequirements: extractDegreeLevels(jobDescription),
     keywordSurfaceForms: collectKeywordSurfaceForms(jobDescription, config.skillAliases),
-    // ponytail: any language mention in the JD is treated as a requirement — good enough until
-    // JDs that merely *reference* a language (not require it) show up as false positives.
-    requiredLanguages: parseLanguageMentions(jobDescription)
+    // A language only counts as required if its mention carries a requirement/level cue
+    // or sits in a "Languages:" line — plain references ("our Berlin office") don't count.
+    requiredLanguages: parseLanguageMentions(jobDescription).filter(
+      (lang) => isLanguageRequired(lang, jobDescription)
+    )
   };
 }
@@ -459,11 +492,37 @@ var MONTHS = {
   nov: 11,
   november: 11,
   dec: 12,
-  december: 12
+  december: 12,
+  // German
+  januar: 1,
+  j\u00E4nner: 1,
+  februar: 2,
+  m\u00E4rz: 3,
+  maerz: 3,
+  mai: 5,
+  juni: 6,
+  juli: 7,
+  oktober: 10,
+  dezember: 12,
+  // French
+  janvier: 1,
+  f\u00E9vrier: 2,
+  fevrier: 2,
+  mars: 3,
+  avril: 4,
+  juin: 6,
+  juillet: 7,
+  ao\u00FBt: 8,
+  aout: 8,
+  septembre: 9,
+  octobre: 10,
+  novembre: 11,
+  d\u00E9cembre: 12,
+  decembre: 12
 };
 function parseDateToken(raw) {
   const cleaned = raw.trim().toLowerCase();
-  const monthMatch = cleaned.match(/([a-z]{3,9})\s*(\d{4})/i);
+  const monthMatch = cleaned.match(/([a-zà-ÿ]{3,9})\s*(\d{4})/i);
   if (monthMatch) {
     const monthName = monthMatch[1].toLowerCase();
     const year = Number.parseInt(monthMatch[2], 10);
@@ -495,14 +554,14 @@ function monthsBetween(start, end) {
 function parseDateRange(text, referenceDate) {
   const normalized = text.trim();
   const rangeMatch = normalized.match(
-    /(\d{1,2}\/\d{4}|[A-Za-z]{3,9}\s+\d{4}|\d{4})\s*(?:-|to|–|—)\s*(Present|Current|Now|\d{1,2}\/\d{4}|[A-Za-z]{3,9}\s+\d{4}|\d{4})/i
+    /(\d{1,2}\/\d{4}|[A-Za-zà-ÿ]{3,9}\s+\d{4}|\d{4})\s*(?:-|to|through|until|bis|jusqu'à|à|–|—)\s*(Present|Current|Now|Aktuell|Heute|Actuellement|Présent|\d{1,2}\/\d{4}|[A-Za-zà-ÿ]{3,9}\s+\d{4}|\d{4})/i
   );
   if (!rangeMatch) {
     return null;
   }
   const startToken = parseDateToken(rangeMatch[1]);
   const endRaw = rangeMatch[2];
-  const isPresent = /present|current|now/i.test(endRaw);
+  const isPresent = /present|current|now|aktuell|heute|actuellement|présent|actuel/i.test(endRaw);
   const endToken = isPresent ? void 0 : parseDateToken(endRaw);
   if (!startToken) {
     return null;
@@ -556,12 +615,21 @@ function sumExperienceYears(ranges) {
 // src/core/parser/resume.parser.ts
 var SECTION_ALIASES = {
-  summary: ["summary", "profile", "about"],
-  experience: ["experience", "work experience", "professional experience", "employment"],
-  skills: ["skills", "technical skills", "technologies"],
-  education: ["education", "academics", "academic background"],
-  projects: ["projects", "portfolio"],
-  certifications: ["certifications", "licenses"]
+  summary: ["summary", "profile", "about", "zusammenfassung", "profil", "r\xE9sum\xE9", "\xE0 propos"],
+  experience: [
+    "experience",
+    "work experience",
+    "professional experience",
+    "employment",
+    "erfahrung",
+    "berufserfahrung",
+    "exp\xE9rience",
+    "exp\xE9rience professionnelle"
+  ],
+  skills: ["skills", "technical skills", "technologies", "f\xE4higkeiten", "kenntnisse", "comp\xE9tences"],
+  education: ["education", "academics", "academic background", "ausbildung", "formation", "\xE9tudes"],
+  projects: ["projects", "portfolio", "projekte", "projets"],
+  certifications: ["certifications", "licenses", "zertifizierungen", "certifications professionnelles"]
 };
 var STRONG_VERBS = [
   "led",
@@ -639,7 +707,9 @@ function extractSections(text) {
 }
 function parseSkills(sectionContent, aliases) {
   if (!sectionContent) return [];
-  const raw = sectionContent.split(/[,;\n]/).map((skill) => skill.trim()).filter(Boolean);
+  const hasBullets = /[•·‣▪○●◦]/.test(sectionContent);
+  const normalized = hasBullets ? sectionContent.replace(/\n/g, " ") : sectionContent;
+  const raw = normalized.split(/[,;\n]|[•·‣▪○●◦]/).map((skill) => skill.trim().replace(/^[-•·‣▪○●◦\s]+|[-•·‣▪○●◦\s]+$/g, "").trim()).filter(Boolean);
   return normalizeSkills(raw, aliases);
 }
 function parseActionVerbs(text) {
@@ -672,7 +742,7 @@ function parseExperience(sectionContent, referenceDate) {
       }
       continue;
     }
-    const titleMatch = line.match(/^(Senior|Lead|Principal|Staff|Software|Full\s*Stack|Frontend|Backend|Engineer|Developer|Manager|Analyst)[^,-]*/i);
+    const titleMatch = line.match(/^(Senior|Lead|Principal|Staff|VP|Director|Consultant|Architect|Software|Full\s*Stack|Frontend|Backend|Engineer|Developer|Manager|Analyst)[^,-]*/i);
     if (titleMatch) {
       const title = titleMatch[0].trim();
       jobTitles.push(title.toLowerCase());
@@ -710,7 +780,8 @@ function parseResume(resumeText, config) {
     const textToScan = sections.summary ?? normalizedText;
     const yearsMatch = textToScan.match(/(\d{1,2})\+?\s*years?/i);
     if (yearsMatch) {
-      totalExperienceYears = Number.parseInt(yearsMatch[1], 10);
+      const parsed = Number.parseInt(yearsMatch[1], 10);
+      totalExperienceYears = parsed <= 60 ? parsed : 0;
     }
   }
   const requiredSections = ["summary", "experience", "skills", "education"];
@@ -946,10 +1017,9 @@ function scoreEducation(resume, job) {
   if (job.educationRequirements.length === 0) {
     return 100;
   }
-  const resumeEducationText = resume.educationEntries.join(" ");
-  const normalizedEducation = resumeEducationText.toLowerCase();
+  const resumeDegreeLevels = extractDegreeLevels(resume.educationEntries.join(" "));
   const matched = job.educationRequirements.filter(
-    (requirement) => normalizedEducation.includes(requirement.toLowerCase())
+    (requirement) => resumeDegreeLevels.includes(requirement)
   );
   if (matched.length === 0) {
     return 0;