npm - @pranavraut033/ats-checker - Versions diffs - 1.1.0 → 1.2.0 - Mend

@pranavraut033/ats-checker 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md CHANGED Viewed

@@ -19,6 +19,7 @@ Zero-dependency TypeScript library that scores a resume against a job descriptio
 - **Explainable** — breakdown by category (skills / experience / keywords / education) plus matched and missing skill/keyword lists
 - **Configurable** — adjust weights, add skill aliases, define custom penalty rules
 - **Zero dependencies** — core library has no runtime deps; ships ESM + CJS
+- **PDF input** — optional `/pdf` subpath extracts resume text from a PDF buffer (requires `pdfjs-dist` peer dep)
 - **Built-in profiles** — software engineer, data scientist, product manager out of the box
 ---
@@ -82,7 +83,8 @@ console.log(result.suggestions);      // ["Add GraphQL to your skills section",
 | `warnings` | `string[]` | Parse warnings and section alerts |
 | `experienceGap` | `number` | Years below JD minimum; `0` when met |
 | `detectedSections` | `string[]` | Resume sections the parser found |
-| `parsedExperienceYears` | `number` | Total years from resume date ranges |
+| `parsedExperienceYears` | `number` | Total years from resume date ranges (overlap-deduplicated) |
+| `experienceEntries` | `ParsedExperienceEntry[]` | Parsed job entries: `title`, `company`, `dates` (with `start`/`end`/`durationInMonths`) |
 **Scoring formula:**
 `score = skills×0.30 + experience×0.30 + keywords×0.25 + education×0.15` → clamped to 0–100 → rule penalties subtracted.
@@ -183,6 +185,41 @@ const result = analyzeResume({
 ---
+## PDF Input
+Extract text from a PDF resume before passing it to `analyzeResume`. This uses `pdfjs-dist` as an optional peer dependency — the core library stays zero-dep.
+```bash
+npm install pdfjs-dist
+```
+```typescript
+import { extractTextFromPDF } from "@pranavraut033/ats-checker/pdf";
+import { analyzeResume } from "@pranavraut033/ats-checker";
+import { readFileSync } from "fs";
+const bytes = readFileSync("resume.pdf");
+const resumeText = await extractTextFromPDF(bytes);
+const result = analyzeResume({ resumeText, jobDescription: "..." });
+```
+`extractTextFromPDF` accepts a `Uint8Array` or `ArrayBuffer` and returns a plain `string`. Works in Node.js and the browser (text-layer PDFs only).
+**Multi-column layouts are handled automatically.** The extractor uses glyph x/y coordinates to detect column boundaries and process each column independently, so a two-column resume parses cleanly without interleaved text.
+For PDFs that can't be recovered — scanned/image resumes or exports with no text layer — `analyzeResume` surfaces an actionable message in `result.warnings`. Always check it after PDF input:
+```typescript
+const result = analyzeResume({ resumeText, jobDescription: "..." });
+if (result.warnings.length) {
+  console.warn("Parsing issues:", result.warnings);
+  // e.g. "Almost no text was extracted — the resume may be a scanned/image PDF."
+}
+```
+---
 ## LLM Integration (deprecated)
 `analyzeResumeAsync` accepts an optional `llm` config that rewrites suggestion text via a caller-supplied LLM client. **This path is deprecated** — scores and breakdowns are never touched by LLM. Prefer calling `analyzeResume` and running your own LLM pass on `result.suggestions` if you want AI-enhanced wording.

package/dist/index.d.mts CHANGED Viewed

@@ -4,6 +4,11 @@ interface ParsedDateRange {
     start?: string;
     end?: string;
     durationInMonths?: number;
+    /** Numeric year/month of the start and end, for overlap-aware summing. */
+    startYear?: number;
+    startMonth?: number;
+    endYear?: number;
+    endMonth?: number;
 }
 interface ParsedExperienceEntry {
     title?: string;
@@ -248,6 +253,8 @@ interface ATSAnalysisResult {
     detectedSections: string[];
     /** Total years of experience parsed from the resume's date ranges. */
     parsedExperienceYears: number;
+    /** Parsed experience entries from the resume, with titles and date ranges. */
+    experienceEntries: ParsedExperienceEntry[];
 }
 declare const defaultSkillAliases: SkillAliases;

package/dist/index.d.ts CHANGED Viewed

@@ -4,6 +4,11 @@ interface ParsedDateRange {
     start?: string;
     end?: string;
     durationInMonths?: number;
+    /** Numeric year/month of the start and end, for overlap-aware summing. */
+    startYear?: number;
+    startMonth?: number;
+    endYear?: number;
+    endMonth?: number;
 }
 interface ParsedExperienceEntry {
     title?: string;
@@ -248,6 +253,8 @@ interface ATSAnalysisResult {
     detectedSections: string[];
     /** Total years of experience parsed from the resume's date ranges. */
     parsedExperienceYears: number;
+    /** Parsed experience entries from the resume, with titles and date ranges. */
+    experienceEntries: ParsedExperienceEntry[];
 }
 declare const defaultSkillAliases: SkillAliases;

package/dist/index.js CHANGED Viewed

@@ -2,6 +2,7 @@
 // src/utils/text.ts
 var STOP_WORDS = /* @__PURE__ */ new Set([
+  // articles / prepositions / conjunctions
   "the",
   "and",
   "or",
@@ -17,12 +18,16 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
   "by",
   "from",
   "as",
+  "into",
+  "onto",
+  "upon",
+  "via",
+  "per",
+  "plus",
+  // verbs / modals
   "is",
   "are",
   "be",
-  "this",
-  "that",
-  "it",
   "was",
   "were",
   "will",
@@ -31,7 +36,98 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
   "must",
   "have",
   "has",
-  "had"
+  "had",
+  "do",
+  "does",
+  "did",
+  "get",
+  "give",
+  "go",
+  "use",
+  "see",
+  "help",
+  "work",
+  "build",
+  "show",
+  "need",
+  "want",
+  "make",
+  "let",
+  // pronouns / determiners
+  "it",
+  "its",
+  "this",
+  "that",
+  "these",
+  "those",
+  "we",
+  "our",
+  "you",
+  "your",
+  "they",
+  "their",
+  "us",
+  "who",
+  "what",
+  "which",
+  "how",
+  // common English fillers that leak into JDs
+  "no",
+  "not",
+  "all",
+  "any",
+  "also",
+  "more",
+  "well",
+  "very",
+  "highly",
+  "across",
+  "over",
+  "under",
+  "within",
+  "about",
+  "out",
+  "up",
+  "down",
+  "new",
+  "if",
+  "so",
+  "such",
+  "both",
+  "each",
+  "one",
+  "many",
+  "only",
+  // JD/HR boilerplate — never skills
+  "years",
+  "year",
+  "experience",
+  "required",
+  "requirement",
+  "requirements",
+  "preferred",
+  "role",
+  "degree",
+  "practices",
+  "best",
+  "skills",
+  "team",
+  "field",
+  "related",
+  "relevant",
+  "desired",
+  "strong",
+  "solid",
+  "good",
+  "first",
+  "based",
+  "day",
+  "week",
+  "month",
+  "time",
+  "fast",
+  "open",
+  "dynamic"
 ]);
 function normalizeWhitespace(text) {
   return text.replace(/\r\n?/g, "\n").replace(/\s+/g, " ").trim();
@@ -45,7 +141,12 @@ function splitLines(text) {
 var TECH_TOKEN_RE = /[a-z0-9][a-z0-9.#+\-/]*[a-z0-9#+]/g;
 function tokenize(text) {
   const normalized = normalizeForComparison(text);
-  return (normalized.match(TECH_TOKEN_RE) ?? []).filter((t) => !STOP_WORDS.has(t));
+  return (normalized.match(TECH_TOKEN_RE) ?? []).filter(
+    (t) => /[a-z]/.test(t) && !STOP_WORDS.has(t)
+  );
+}
+function escapeRegExp(input) {
+  return input.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
 }
 function unique(values) {
   const seen = /* @__PURE__ */ new Set();
@@ -101,37 +202,30 @@ function normalizeSkills(skills, aliases) {
 }
 // src/core/parser/jd.parser.ts
-var DEGREE_KEYWORDS = [
-  "bachelor",
-  "b.s",
-  "bs",
-  "bsc",
-  "master",
-  "m.s",
-  "ms",
-  "msc",
-  "phd",
-  "doctorate",
-  "mba",
-  "associate"
+var DEGREE_VARIANTS = [
+  [/\b(?:bachelor(?:'s)?|b\.s\.?|bs\.?|bsc\.?)\b/i, "bachelor"],
+  [/\b(?:master(?:'s)?|m\.s\.?|ms\.?|msc\.?)\b/i, "master"],
+  [/\b(?:phd|ph\.d\.?|doctorate)\b/i, "phd"],
+  [/\bmba\b/i, "mba"],
+  [/\bassociate(?:'s)?\b/i, "associate"]
 ];
 function extractRequiredSkills(lines) {
   const required = [];
   for (const line of lines) {
     if (/must|require|required|need/i.test(line)) {
-      required.push(...line.split(/[,.;•-]/));
+      required.push(...tokenize(line));
     }
   }
-  return required.map((value) => value.trim()).filter(Boolean);
+  return required;
 }
 function extractPreferredSkills(lines) {
   const preferred = [];
   for (const line of lines) {
     if (/preferred|nice to have|plus/i.test(line)) {
-      preferred.push(...line.split(/[,.;•-]/));
+      preferred.push(...tokenize(line));
     }
   }
-  return preferred.map((value) => value.trim()).filter(Boolean);
+  return preferred;
 }
 function extractRoleKeywords(text) {
   const roleMatch = text.match(/(engineer|developer|manager|scientist|analyst|designer|architect)/i);
@@ -146,23 +240,41 @@ function extractMinExperience(text) {
   return void 0;
 }
 function extractEducationRequirements(text) {
-  const normalized = normalizeForComparison(text);
-  return DEGREE_KEYWORDS.filter((degree) => normalized.includes(degree));
+  const found = /* @__PURE__ */ new Set();
+  for (const [pattern, canonical] of DEGREE_VARIANTS) {
+    if (pattern.test(text)) found.add(canonical);
+  }
+  return [...found];
 }
 function parseJobDescription(jobDescription, config) {
   const normalizedText = normalizeWhitespace(jobDescription);
   const lines = splitLines(jobDescription);
-  const requiredSkillsRaw = extractRequiredSkills(lines);
-  const preferredSkillsRaw = extractPreferredSkills(lines);
+  const skillVocab = /* @__PURE__ */ new Set();
+  for (const [canonical, aliases] of Object.entries(config.skillAliases)) {
+    skillVocab.add(canonical.toLowerCase());
+    for (const alias of aliases) skillVocab.add(alias.toLowerCase());
+  }
+  for (const s of config.profile?.mandatorySkills ?? []) skillVocab.add(s.toLowerCase());
+  for (const s of config.profile?.optionalSkills ?? []) skillVocab.add(s.toLowerCase());
+  const isSkillLike = (t) => {
+    if (skillVocab.has(t)) return true;
+    if (/[.#+]/.test(t) && /[a-z]/.test(t)) return true;
+    if (t.includes("/")) return t.split("/").some((p) => p.length >= 2 && !STOP_WORDS.has(p));
+    return false;
+  };
+  const requiredSkillsRaw = extractRequiredSkills(lines).filter(isSkillLike);
+  const preferredSkillsRaw = extractPreferredSkills(lines).filter(isSkillLike);
   const requiredSkills = normalizeSkills(requiredSkillsRaw, config.skillAliases);
   const preferredSkills = normalizeSkills(preferredSkillsRaw, config.skillAliases);
-  const keywords = unique([...requiredSkills, ...preferredSkills, ...tokenize(normalizedText)]);
+  const bodyTokens = tokenize(normalizedText).filter(isSkillLike);
+  const roleKeywords = extractRoleKeywords(jobDescription);
+  const keywords = unique([...requiredSkills, ...preferredSkills, ...roleKeywords, ...bodyTokens]);
   return {
     raw: jobDescription,
     normalizedText,
     requiredSkills,
     preferredSkills,
-    roleKeywords: extractRoleKeywords(jobDescription),
+    roleKeywords,
     keywords,
     minExperienceYears: extractMinExperience(jobDescription),
     educationRequirements: extractEducationRequirements(jobDescription)
@@ -207,6 +319,14 @@ function parseDateToken(raw) {
       return { year, month };
     }
   }
+  const slashMatch = cleaned.match(/^(\d{1,2})\/(\d{4})$/);
+  if (slashMatch) {
+    const month = Number.parseInt(slashMatch[1], 10);
+    const year = Number.parseInt(slashMatch[2], 10);
+    if (month >= 1 && month <= 12 && !Number.isNaN(year)) {
+      return { year, month };
+    }
+  }
   const yearMatch = cleaned.match(/(20\d{2}|19\d{2})/);
   if (yearMatch) {
     const year = Number.parseInt(yearMatch[1], 10);
@@ -221,7 +341,9 @@ function monthsBetween(start, end) {
 }
 function parseDateRange(text, referenceDate) {
   const normalized = text.trim();
-  const rangeMatch = normalized.match(/([A-Za-z]{3,9}\s+\d{4}|\d{4})\s*(?:-|to|–|—)\s*(Present|Current|Now|[A-Za-z]{3,9}\s+\d{4}|\d{4})/i);
+  const rangeMatch = normalized.match(
+    /(\d{1,2}\/\d{4}|[A-Za-z]{3,9}\s+\d{4}|\d{4})\s*(?:-|to|–|—)\s*(Present|Current|Now|\d{1,2}\/\d{4}|[A-Za-z]{3,9}\s+\d{4}|\d{4})/i
+  );
   if (!rangeMatch) {
     return null;
   }
@@ -242,11 +364,40 @@ function parseDateRange(text, referenceDate) {
     raw: normalized,
     start: rangeMatch[1],
     end: isPresent ? "present" : rangeMatch[2],
-    durationInMonths: durationInMonths > 0 ? durationInMonths : void 0
+    durationInMonths: durationInMonths > 0 ? durationInMonths : void 0,
+    startYear: startToken.year,
+    startMonth: startToken.month,
+    endYear: endTokenResolved.year,
+    endMonth: endTokenResolved.month
   };
 }
 function sumExperienceYears(ranges) {
-  const months = ranges.map((range) => range.durationInMonths ?? 0).reduce((total, value) => total + value, 0);
+  const withBounds = ranges.filter(
+    (r) => r.startYear !== void 0 && r.endYear !== void 0
+  );
+  if (withBounds.length === ranges.length && ranges.length > 0) {
+    const toIndex = (year, month) => year * 12 + month;
+    const intervals = withBounds.map((r) => ({
+      s: toIndex(r.startYear, r.startMonth ?? 1),
+      e: toIndex(r.endYear, r.endMonth ?? 12)
+    })).sort((a, b) => a.s - b.s);
+    let totalMonths = 0;
+    let curStart = intervals[0].s;
+    let curEnd = intervals[0].e;
+    for (let i = 1; i < intervals.length; i++) {
+      const { s, e } = intervals[i];
+      if (s <= curEnd) {
+        curEnd = Math.max(curEnd, e);
+      } else {
+        totalMonths += curEnd - curStart + 1;
+        curStart = s;
+        curEnd = e;
+      }
+    }
+    totalMonths += curEnd - curStart + 1;
+    return Number((totalMonths / 12).toFixed(2));
+  }
+  const months = ranges.reduce((total, r) => total + (r.durationInMonths ?? 0), 0);
   return Number((months / 12).toFixed(2));
 }
@@ -280,9 +431,6 @@ var ACTION_VERBS = [
   "reduced",
   "increased"
 ];
-function escapeRegExp(input) {
-  return input.replace(/[.*+?^${}()|[\\]\\]/g, "\\$&");
-}
 function detectSection(line) {
   const normalized = line.trim().toLowerCase();
   for (const [section, aliases] of Object.entries(SECTION_ALIASES)) {
@@ -381,11 +529,28 @@ function parseResume(resumeText, config) {
   const actionVerbs = parseActionVerbs(normalizedText);
   const experienceData = parseExperience(sections.experience, config.referenceDate);
   const educationEntries = parseEducation(sections.education);
-  const totalExperienceYears = sumExperienceYears(
+  let totalExperienceYears = sumExperienceYears(
     experienceData.entries.map((entry) => entry.dates).filter((range) => Boolean(range))
   );
+  if (totalExperienceYears === 0) {
+    const textToScan = sections.summary ?? normalizedText;
+    const yearsMatch = textToScan.match(/(\d{1,2})\+?\s*years?/i);
+    if (yearsMatch) {
+      totalExperienceYears = Number.parseInt(yearsMatch[1], 10);
+    }
+  }
   const requiredSections = ["summary", "experience", "skills", "education"];
   const warnings = [];
+  const lineCount = splitLines(resumeText).length;
+  if (resumeText.trim().length < 100) {
+    warnings.push(
+      "Almost no text was extracted \u2014 the resume may be a scanned/image PDF. Upload a text-based PDF or paste the text directly."
+    );
+  } else if (lineCount <= 2) {
+    warnings.push(
+      "Resume text has no line breaks \u2014 the PDF layout likely didn't export cleanly (common with multi-column designs). Export as a single-column PDF or paste plain text for accurate parsing."
+    );
+  }
   for (const section of requiredSections) {
     if (!detected.includes(section)) {
       warnings.push(`${section} section not detected`);
@@ -586,10 +751,11 @@ function calculateScore(resume, job, config) {
     overusedKeywords: keywordResult.overusedKeywords,
     suggestions: [],
     warnings: [],
-    // detectedSections / parsedExperienceYears / experienceGap: filled by index.ts
+    // detectedSections / parsedExperienceYears / experienceGap / experienceEntries: filled by index.ts
     experienceGap: experienceResult.missingYears,
     detectedSections: [],
     parsedExperienceYears: 0,
+    experienceEntries: [],
     missingExperienceYears: experienceResult.missingYears,
     educationScore
   };
@@ -613,7 +779,31 @@ var defaultSkillAliases = {
   docker: ["containers"],
   kubernetes: ["k8s"],
   html: ["html5"],
-  css: ["css3"]
+  css: ["css3"],
+  // ML / data science
+  pytorch: ["torch"],
+  tensorflow: ["tf"],
+  "scikit-learn": ["sklearn"],
+  pandas: [],
+  numpy: [],
+  fastapi: [],
+  flask: [],
+  django: [],
+  // data / infra
+  kafka: [],
+  redis: [],
+  elasticsearch: ["elastic"],
+  spark: ["apache spark"],
+  // common pure-letter tech skills (no symbol chars)
+  accessibility: ["a11y"],
+  frontend: ["front-end"],
+  backend: ["back-end"],
+  security: ["cybersecurity"],
+  testing: ["unittest", "pytest"],
+  microservices: [],
+  agile: ["scrum"],
+  blockchain: [],
+  devops: []
 };
 var softwareEngineerProfile = {
   name: "software-engineer",
@@ -740,6 +930,11 @@ var SuggestionEngine = class {
         "Strengthen bullet points with impact verbs (led, built, improved, delivered)."
       );
     }
+    if (input.resume.detectedSections.length < 2 && input.resume.raw.trim().length > 300) {
+      suggestions.push(
+        "Your resume may use a multi-column layout. Export as a single-column PDF or paste plain text \u2014 most ATS systems and this parser work best with a linear layout."
+      );
+    }
     return { suggestions, warnings };
   }
 };
@@ -1400,6 +1595,7 @@ function analyzeResume(input) {
     experienceGap: scoring.experienceGap,
     detectedSections: parsedResume.detectedSections,
     parsedExperienceYears: parsedResume.totalExperienceYears,
+    experienceEntries: parsedResume.experience,
     suggestions,
     warnings: [...suggestionResult.warnings, ...llmWarnings]
   };
@@ -1464,6 +1660,7 @@ async function analyzeResumeAsync(input) {
     experienceGap: scoring.experienceGap,
     detectedSections: parsedResume.detectedSections,
     parsedExperienceYears: parsedResume.totalExperienceYears,
+    experienceEntries: parsedResume.experience,
     suggestions,
     warnings: [...suggestionResult.warnings, ...llmWarnings]
   };