@pranavraut033/ats-checker 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -52,9 +52,12 @@ import { analyzeResume } from "@pranavraut033/ats-checker";
52
52
  const result = analyzeResume({
53
53
  resumeText: `
54
54
  Software Engineer with 5 years of experience.
55
- Skills: JavaScript, TypeScript, React, Node.js, SQL
56
- Experience: Senior Engineer at ExampleCorp (Jan 2020 - Present)
57
- Education: B.S. Computer Science
55
+ Skills
56
+ JavaScript, TypeScript, React, Node.js, SQL
57
+ Experience
58
+ Senior Engineer at ExampleCorp (Jan 2020 - Present)
59
+ Education
60
+ B.S. Computer Science
58
61
  `,
59
62
  jobDescription: `
60
63
  Frontend engineer role. Must have React, TypeScript, accessibility best practices.
@@ -63,11 +66,11 @@ const result = analyzeResume({
63
66
  config: { referenceDate: "2026-01-01" }, // freeze clock for reproducible scores
64
67
  });
65
68
 
66
- console.log(result.score); // e.g. 72.45
69
+ console.log(result.score); // 44.44
67
70
  console.log(result.matchedSkills); // ["javascript", "node", "react", "typescript"]
68
- console.log(result.missingSkills); // ["accessibility best practices", "graphql"]
71
+ console.log(result.missingSkills); // ["accessibility", "frontend", "graphql"]
69
72
  console.log(result.experienceGap); // 0 (requirement met)
70
- console.log(result.suggestions); // ["Add GraphQL to your skills section", ...]
73
+ console.log(result.suggestions); // ["Highlight these required skills: accessibility, frontend, graphql", ...]
71
74
  ```
72
75
 
73
76
  ---
@@ -102,6 +105,10 @@ console.log(result.suggestions); // ["Add GraphQL to your skills section",
102
105
 
103
106
  The `keywords` sub-score is a **weighted** coverage ratio, not a flat count: each JD keyword gets a weight from its location (required > preferred > body text) and frequency, so missing a required keyword drops the score more than missing one mentioned once in the body.
104
107
 
108
+ > **Caveat — malformed/copy-pasted JD text:** required/preferred detection scans each line for literal trigger phrases (`required`, `must`, `nice to have`, `preferred`). Job postings copy-pasted from a wrapped/columned source sometimes split words across line breaks (e.g. `"Nice to\n\nhaveExperience..."`), which breaks these phrases across two lines and silently drops them into the unweighted body-keyword bucket instead of required/preferred. Skill keywords themselves (e.g. `react`, `python/fastapi`) are still picked up via the whole-text token scan and unaffected. If a JD looks oddly broken, paste it through a plain-text cleanup pass first, or expect required/preferred weighting to under-count.
109
+
110
+ The `education` sub-score normalizes degree abbreviations on both sides to a canonical level (`bachelor`, `master`, `phd`, `mba`, `associate`) before comparing — so a resume listing "B.S. Computer Science" satisfies a JD requiring "Bachelor's degree".
111
+
105
112
  ---
106
113
 
107
114
  ## Configuration
@@ -288,6 +295,23 @@ if (result.warnings.length) {
288
295
  }
289
296
  ```
290
297
 
298
+ ### OCR fallback for scanned PDFs
299
+
300
+ `extractTextFromPDF` accepts an optional `ocrFallback` that's only invoked when the text layer comes back too short (default threshold: 100 chars). The OCR engine and its dependency are entirely your choice — the core library never bundles one:
301
+
302
+ ```typescript
303
+ const resumeText = await extractTextFromPDF(bytes, {
304
+ ocrFallback: async (data) => {
305
+ // bring your own OCR engine, e.g. tesseract.js or a cloud OCR API
306
+ const { recognize } = await import("tesseract.js");
307
+ const { data: { text } } = await recognize(data, "eng");
308
+ return text;
309
+ },
310
+ });
311
+ ```
312
+
313
+ If `ocrFallback` throws or returns text that isn't longer than the text-layer result, `extractTextFromPDF` silently keeps the original result — OCR failures never break the deterministic extraction path.
314
+
291
315
  ---
292
316
 
293
317
  ## LLM Integration (deprecated)
package/dist/index.cjs CHANGED
@@ -282,15 +282,38 @@ var LEVEL_RANK = {
282
282
  fluent: 5,
283
283
  native: 6,
284
284
  "native speaker": 6,
285
- bilingual: 6
285
+ bilingual: 6,
286
+ // German
287
+ grundkenntnisse: 1,
288
+ gering: 2,
289
+ gut: 3,
290
+ fortgeschritten: 4,
291
+ flie\u00DFend: 5,
292
+ muttersprache: 6,
293
+ muttersprachler: 6,
294
+ // French
295
+ "d\xE9butant": 1,
296
+ "\xE9l\xE9mentaire": 1,
297
+ "limit\xE9": 2,
298
+ "interm\xE9diaire": 3,
299
+ "avanc\xE9": 4,
300
+ courant: 5,
301
+ natif: 6,
302
+ "langue maternelle": 6,
303
+ bilingue: 6
286
304
  };
287
305
  var LANGUAGE_GROUP = KNOWN_LANGUAGES.join("|");
288
306
  var LEVEL_GROUP = Object.keys(LEVEL_RANK).sort((a, b) => b.length - a.length).map((l) => l.replace(/\s+/g, "\\s+")).join("|");
307
+ var BOUNDARY_START = "(?:^|(?<=[^a-z\xE0-\xFF]))";
308
+ var BOUNDARY_END = "(?:$|(?=[^a-z\xE0-\xFF]))";
289
309
  var LANGUAGE_LEVEL_RE = new RegExp(
290
- `\\b(${LANGUAGE_GROUP})\\b(?:\\s*[\\(:\\-]?\\s*(${LEVEL_GROUP}|[abc][12]))?`,
310
+ `\\b(${LANGUAGE_GROUP})\\b(?:\\s*[\\(:\\-]?\\s*(${BOUNDARY_START}(?:${LEVEL_GROUP})${BOUNDARY_END}|[abc][12]))?`,
311
+ "gi"
312
+ );
313
+ var LEVEL_BEFORE_LANGUAGE_RE = new RegExp(
314
+ `${BOUNDARY_START}(${LEVEL_GROUP})${BOUNDARY_END}\\s+(?:in\\s+)?(${LANGUAGE_GROUP})\\b`,
291
315
  "gi"
292
316
  );
293
- var LEVEL_BEFORE_LANGUAGE_RE = new RegExp(`\\b(${LEVEL_GROUP})\\s+(?:in\\s+)?(${LANGUAGE_GROUP})\\b`, "gi");
294
317
  function canonicalLanguage(name) {
295
318
  const lower = name.toLowerCase();
296
319
  return LANGUAGE_ALIASES[lower] ?? lower;
@@ -340,9 +363,9 @@ function diffLanguages(resumeLanguages, requiredLanguages) {
340
363
 
341
364
  // src/core/parser/jd.parser.ts
342
365
  var DEGREE_VARIANTS = [
343
- [/\b(?:bachelor(?:'s)?|b\.s\.?|bs\.?|bsc\.?)\b/i, "bachelor"],
344
- [/\b(?:master(?:'s)?|m\.s\.?|ms\.?|msc\.?)\b/i, "master"],
345
- [/\b(?:phd|ph\.d\.?|doctorate)\b/i, "phd"],
366
+ [/\b(?:bachelor(?:'s)?|b\.s\.?|bs\.?|bsc\.?|licence)\b/i, "bachelor"],
367
+ [/\b(?:master(?:'s)?|m\.s\.?|ms\.?|msc\.?|diplom)\b/i, "master"],
368
+ [/\b(?:phd|ph\.d\.?|doctorate|doktor|doctorat)\b/i, "phd"],
346
369
  [/\bmba\b/i, "mba"],
347
370
  [/\bassociate(?:'s)?\b/i, "associate"]
348
371
  ];
@@ -365,16 +388,15 @@ function extractPreferredSkills(lines) {
365
388
  return preferred;
366
389
  }
367
390
  function extractRoleKeywords(text) {
368
- const roleMatch = text.match(/(engineer|developer|manager|scientist|analyst|designer|architect)/i);
369
- const titleTokens = roleMatch ? roleMatch[0].split(/\s+/) : [];
370
- return unique(tokenize(titleTokens.join(" ") || text.split(/\n/)[0] || ""));
391
+ const roleMatches = text.match(/(engineer|developer|manager|scientist|analyst|designer|architect|director|consultant|lead|vp)/gi) ?? [];
392
+ const fallback = roleMatches.length === 0 ? [text.split(/\n/)[0] ?? ""] : [];
393
+ return unique(tokenize([...roleMatches, ...fallback].join(" ")));
371
394
  }
372
395
  function extractMinExperience(text) {
373
- const match = text.match(/(\d{1,2})\+?\s+(?:years|yrs)/i);
374
- if (match) {
375
- return Number.parseInt(match[1], 10);
376
- }
377
- return void 0;
396
+ const match = text.match(/(\d{1,2})\+?\s*(?:years?|yrs\.?|jahre?|ans?|années?)/i);
397
+ if (!match) return void 0;
398
+ const parsed = Number.parseInt(match[1], 10);
399
+ return parsed <= 60 ? parsed : void 0;
378
400
  }
379
401
  var SURFACE_TOKEN_RE = /[a-z0-9][a-z0-9.#+\-/]*[a-z0-9#+]/gi;
380
402
  function collectKeywordSurfaceForms(rawText, aliases) {
@@ -388,7 +410,16 @@ function collectKeywordSurfaceForms(rawText, aliases) {
388
410
  }
389
411
  return surfaceForms;
390
412
  }
391
- function extractEducationRequirements(text) {
413
+ var LANG_SECTION_RE = /^\s*(?:languages?|sprache|langue)s?\s*[:\-–—]?\s*/i;
414
+ var LANG_REQUIREMENT_HINT_RE = /\b(fluent|required|must|need|speak|proficient|native|conversational|intermediate|advanced|professional|[abc][12])\b/i;
415
+ function isLanguageRequired(lang, jobDescription) {
416
+ return splitLines(jobDescription).some((line) => {
417
+ const lower = line.toLowerCase();
418
+ if (!lower.includes(lang.name)) return false;
419
+ return LANG_SECTION_RE.test(line) || LANG_REQUIREMENT_HINT_RE.test(line);
420
+ });
421
+ }
422
+ function extractDegreeLevels(text) {
392
423
  const found = /* @__PURE__ */ new Set();
393
424
  for (const [pattern, canonical] of DEGREE_VARIANTS) {
394
425
  if (pattern.test(text)) found.add(canonical);
@@ -426,11 +457,13 @@ function parseJobDescription(jobDescription, config) {
426
457
  roleKeywords,
427
458
  keywords,
428
459
  minExperienceYears: extractMinExperience(jobDescription),
429
- educationRequirements: extractEducationRequirements(jobDescription),
460
+ educationRequirements: extractDegreeLevels(jobDescription),
430
461
  keywordSurfaceForms: collectKeywordSurfaceForms(jobDescription, config.skillAliases),
431
- // ponytail: any language mention in the JD is treated as a requirement — good enough until
432
- // JDs that merely *reference* a language (not require it) show up as false positives.
433
- requiredLanguages: parseLanguageMentions(jobDescription)
462
+ // A language only counts as required if its mention carries a requirement/level cue
463
+ // or sits in a "Languages:" line plain references ("our Berlin office") don't count.
464
+ requiredLanguages: parseLanguageMentions(jobDescription).filter(
465
+ (lang) => isLanguageRequired(lang, jobDescription)
466
+ )
434
467
  };
435
468
  }
436
469
 
@@ -459,11 +492,37 @@ var MONTHS = {
459
492
  nov: 11,
460
493
  november: 11,
461
494
  dec: 12,
462
- december: 12
495
+ december: 12,
496
+ // German
497
+ januar: 1,
498
+ j\u00E4nner: 1,
499
+ februar: 2,
500
+ m\u00E4rz: 3,
501
+ maerz: 3,
502
+ mai: 5,
503
+ juni: 6,
504
+ juli: 7,
505
+ oktober: 10,
506
+ dezember: 12,
507
+ // French
508
+ janvier: 1,
509
+ f\u00E9vrier: 2,
510
+ fevrier: 2,
511
+ mars: 3,
512
+ avril: 4,
513
+ juin: 6,
514
+ juillet: 7,
515
+ ao\u00FBt: 8,
516
+ aout: 8,
517
+ septembre: 9,
518
+ octobre: 10,
519
+ novembre: 11,
520
+ d\u00E9cembre: 12,
521
+ decembre: 12
463
522
  };
464
523
  function parseDateToken(raw) {
465
524
  const cleaned = raw.trim().toLowerCase();
466
- const monthMatch = cleaned.match(/([a-z]{3,9})\s*(\d{4})/i);
525
+ const monthMatch = cleaned.match(/([a-zà-ÿ]{3,9})\s*(\d{4})/i);
467
526
  if (monthMatch) {
468
527
  const monthName = monthMatch[1].toLowerCase();
469
528
  const year = Number.parseInt(monthMatch[2], 10);
@@ -495,14 +554,14 @@ function monthsBetween(start, end) {
495
554
  function parseDateRange(text, referenceDate) {
496
555
  const normalized = text.trim();
497
556
  const rangeMatch = normalized.match(
498
- /(\d{1,2}\/\d{4}|[A-Za-z]{3,9}\s+\d{4}|\d{4})\s*(?:-|to|–|—)\s*(Present|Current|Now|\d{1,2}\/\d{4}|[A-Za-z]{3,9}\s+\d{4}|\d{4})/i
557
+ /(\d{1,2}\/\d{4}|[A-Za-zà-ÿ]{3,9}\s+\d{4}|\d{4})\s*(?:-|to|through|until|bis|jusqu'à|à|–|—)\s*(Present|Current|Now|Aktuell|Heute|Actuellement|Présent|\d{1,2}\/\d{4}|[A-Za-zà-ÿ]{3,9}\s+\d{4}|\d{4})/i
499
558
  );
500
559
  if (!rangeMatch) {
501
560
  return null;
502
561
  }
503
562
  const startToken = parseDateToken(rangeMatch[1]);
504
563
  const endRaw = rangeMatch[2];
505
- const isPresent = /present|current|now/i.test(endRaw);
564
+ const isPresent = /present|current|now|aktuell|heute|actuellement|présent|actuel/i.test(endRaw);
506
565
  const endToken = isPresent ? void 0 : parseDateToken(endRaw);
507
566
  if (!startToken) {
508
567
  return null;
@@ -556,12 +615,21 @@ function sumExperienceYears(ranges) {
556
615
 
557
616
  // src/core/parser/resume.parser.ts
558
617
  var SECTION_ALIASES = {
559
- summary: ["summary", "profile", "about"],
560
- experience: ["experience", "work experience", "professional experience", "employment"],
561
- skills: ["skills", "technical skills", "technologies"],
562
- education: ["education", "academics", "academic background"],
563
- projects: ["projects", "portfolio"],
564
- certifications: ["certifications", "licenses"]
618
+ summary: ["summary", "profile", "about", "zusammenfassung", "profil", "r\xE9sum\xE9", "\xE0 propos"],
619
+ experience: [
620
+ "experience",
621
+ "work experience",
622
+ "professional experience",
623
+ "employment",
624
+ "erfahrung",
625
+ "berufserfahrung",
626
+ "exp\xE9rience",
627
+ "exp\xE9rience professionnelle"
628
+ ],
629
+ skills: ["skills", "technical skills", "technologies", "f\xE4higkeiten", "kenntnisse", "comp\xE9tences"],
630
+ education: ["education", "academics", "academic background", "ausbildung", "formation", "\xE9tudes"],
631
+ projects: ["projects", "portfolio", "projekte", "projets"],
632
+ certifications: ["certifications", "licenses", "zertifizierungen", "certifications professionnelles"]
565
633
  };
566
634
  var STRONG_VERBS = [
567
635
  "led",
@@ -639,7 +707,9 @@ function extractSections(text) {
639
707
  }
640
708
  function parseSkills(sectionContent, aliases) {
641
709
  if (!sectionContent) return [];
642
- const raw = sectionContent.split(/[,;\n]/).map((skill) => skill.trim()).filter(Boolean);
710
+ const hasBullets = /[•·‣▪○●◦]/.test(sectionContent);
711
+ const normalized = hasBullets ? sectionContent.replace(/\n/g, " ") : sectionContent;
712
+ const raw = normalized.split(/[,;\n]|[•·‣▪○●◦]/).map((skill) => skill.trim().replace(/^[-•·‣▪○●◦\s]+|[-•·‣▪○●◦\s]+$/g, "").trim()).filter(Boolean);
643
713
  return normalizeSkills(raw, aliases);
644
714
  }
645
715
  function parseActionVerbs(text) {
@@ -672,7 +742,7 @@ function parseExperience(sectionContent, referenceDate) {
672
742
  }
673
743
  continue;
674
744
  }
675
- const titleMatch = line.match(/^(Senior|Lead|Principal|Staff|Software|Full\s*Stack|Frontend|Backend|Engineer|Developer|Manager|Analyst)[^,-]*/i);
745
+ const titleMatch = line.match(/^(Senior|Lead|Principal|Staff|VP|Director|Consultant|Architect|Software|Full\s*Stack|Frontend|Backend|Engineer|Developer|Manager|Analyst)[^,-]*/i);
676
746
  if (titleMatch) {
677
747
  const title = titleMatch[0].trim();
678
748
  jobTitles.push(title.toLowerCase());
@@ -710,7 +780,8 @@ function parseResume(resumeText, config) {
710
780
  const textToScan = sections.summary ?? normalizedText;
711
781
  const yearsMatch = textToScan.match(/(\d{1,2})\+?\s*years?/i);
712
782
  if (yearsMatch) {
713
- totalExperienceYears = Number.parseInt(yearsMatch[1], 10);
783
+ const parsed = Number.parseInt(yearsMatch[1], 10);
784
+ totalExperienceYears = parsed <= 60 ? parsed : 0;
714
785
  }
715
786
  }
716
787
  const requiredSections = ["summary", "experience", "skills", "education"];
@@ -946,10 +1017,9 @@ function scoreEducation(resume, job) {
946
1017
  if (job.educationRequirements.length === 0) {
947
1018
  return 100;
948
1019
  }
949
- const resumeEducationText = resume.educationEntries.join(" ");
950
- const normalizedEducation = resumeEducationText.toLowerCase();
1020
+ const resumeDegreeLevels = extractDegreeLevels(resume.educationEntries.join(" "));
951
1021
  const matched = job.educationRequirements.filter(
952
- (requirement) => normalizedEducation.includes(requirement.toLowerCase())
1022
+ (requirement) => resumeDegreeLevels.includes(requirement)
953
1023
  );
954
1024
  if (matched.length === 0) {
955
1025
  return 0;