@pranavraut033/ats-checker 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -19,6 +19,7 @@ Zero-dependency TypeScript library that scores a resume against a job descriptio
19
19
  - **Explainable** — breakdown by category (skills / experience / keywords / education) plus matched and missing skill/keyword lists
20
20
  - **Configurable** — adjust weights, add skill aliases, define custom penalty rules
21
21
  - **Zero dependencies** — core library has no runtime deps; ships ESM + CJS
22
+ - **PDF input** — optional `/pdf` subpath extracts resume text from a PDF buffer (requires `pdfjs-dist` peer dep)
22
23
  - **Built-in profiles** — software engineer, data scientist, product manager out of the box
23
24
 
24
25
  ---
@@ -82,7 +83,8 @@ console.log(result.suggestions); // ["Add GraphQL to your skills section",
82
83
  | `warnings` | `string[]` | Parse warnings and section alerts |
83
84
  | `experienceGap` | `number` | Years below JD minimum; `0` when met |
84
85
  | `detectedSections` | `string[]` | Resume sections the parser found |
85
- | `parsedExperienceYears` | `number` | Total years from resume date ranges |
86
+ | `parsedExperienceYears` | `number` | Total years from resume date ranges (overlap-deduplicated) |
87
+ | `experienceEntries` | `ParsedExperienceEntry[]` | Parsed job entries: `title`, `company`, `dates` (with `start`/`end`/`durationInMonths`) |
86
88
 
87
89
  **Scoring formula:**
88
90
  `score = skills×0.30 + experience×0.30 + keywords×0.25 + education×0.15` → clamped to 0–100 → rule penalties subtracted.
@@ -183,6 +185,41 @@ const result = analyzeResume({
183
185
 
184
186
  ---
185
187
 
188
+ ## PDF Input
189
+
190
+ Extract text from a PDF resume before passing it to `analyzeResume`. This uses `pdfjs-dist` as an optional peer dependency — the core library stays zero-dep.
191
+
192
+ ```bash
193
+ npm install pdfjs-dist
194
+ ```
195
+
196
+ ```typescript
197
+ import { extractTextFromPDF } from "@pranavraut033/ats-checker/pdf";
198
+ import { analyzeResume } from "@pranavraut033/ats-checker";
199
+ import { readFileSync } from "fs";
200
+
201
+ const bytes = readFileSync("resume.pdf");
202
+ const resumeText = await extractTextFromPDF(bytes);
203
+
204
+ const result = analyzeResume({ resumeText, jobDescription: "..." });
205
+ ```
206
+
207
+ `extractTextFromPDF` accepts a `Uint8Array` or `ArrayBuffer` and returns a plain `string`. Works in Node.js and the browser (text-layer PDFs only).
208
+
209
+ **Multi-column layouts are handled automatically.** The extractor uses glyph x/y coordinates to detect column boundaries and process each column independently, so a two-column resume parses cleanly without interleaved text.
210
+
211
+ For PDFs that can't be recovered — scanned/image resumes or exports with no text layer — `analyzeResume` surfaces an actionable message in `result.warnings`. Always check it after PDF input:
212
+
213
+ ```typescript
214
+ const result = analyzeResume({ resumeText, jobDescription: "..." });
215
+ if (result.warnings.length) {
216
+ console.warn("Parsing issues:", result.warnings);
217
+ // e.g. "Almost no text was extracted — the resume may be a scanned/image PDF."
218
+ }
219
+ ```
220
+
221
+ ---
222
+
186
223
  ## LLM Integration (deprecated)
187
224
 
188
225
  `analyzeResumeAsync` accepts an optional `llm` config that rewrites suggestion text via a caller-supplied LLM client. **This path is deprecated** — scores and breakdowns are never touched by LLM. Prefer calling `analyzeResume` and running your own LLM pass on `result.suggestions` if you want AI-enhanced wording.
package/dist/index.d.mts CHANGED
@@ -4,6 +4,11 @@ interface ParsedDateRange {
4
4
  start?: string;
5
5
  end?: string;
6
6
  durationInMonths?: number;
7
+ /** Numeric year/month of the start and end, for overlap-aware summing. */
8
+ startYear?: number;
9
+ startMonth?: number;
10
+ endYear?: number;
11
+ endMonth?: number;
7
12
  }
8
13
  interface ParsedExperienceEntry {
9
14
  title?: string;
@@ -248,6 +253,8 @@ interface ATSAnalysisResult {
248
253
  detectedSections: string[];
249
254
  /** Total years of experience parsed from the resume's date ranges. */
250
255
  parsedExperienceYears: number;
256
+ /** Parsed experience entries from the resume, with titles and date ranges. */
257
+ experienceEntries: ParsedExperienceEntry[];
251
258
  }
252
259
 
253
260
  declare const defaultSkillAliases: SkillAliases;
package/dist/index.d.ts CHANGED
@@ -4,6 +4,11 @@ interface ParsedDateRange {
4
4
  start?: string;
5
5
  end?: string;
6
6
  durationInMonths?: number;
7
+ /** Numeric year/month of the start and end, for overlap-aware summing. */
8
+ startYear?: number;
9
+ startMonth?: number;
10
+ endYear?: number;
11
+ endMonth?: number;
7
12
  }
8
13
  interface ParsedExperienceEntry {
9
14
  title?: string;
@@ -248,6 +253,8 @@ interface ATSAnalysisResult {
248
253
  detectedSections: string[];
249
254
  /** Total years of experience parsed from the resume's date ranges. */
250
255
  parsedExperienceYears: number;
256
+ /** Parsed experience entries from the resume, with titles and date ranges. */
257
+ experienceEntries: ParsedExperienceEntry[];
251
258
  }
252
259
 
253
260
  declare const defaultSkillAliases: SkillAliases;
package/dist/index.js CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  // src/utils/text.ts
4
4
  var STOP_WORDS = /* @__PURE__ */ new Set([
5
+ // articles / prepositions / conjunctions
5
6
  "the",
6
7
  "and",
7
8
  "or",
@@ -17,12 +18,16 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
17
18
  "by",
18
19
  "from",
19
20
  "as",
21
+ "into",
22
+ "onto",
23
+ "upon",
24
+ "via",
25
+ "per",
26
+ "plus",
27
+ // verbs / modals
20
28
  "is",
21
29
  "are",
22
30
  "be",
23
- "this",
24
- "that",
25
- "it",
26
31
  "was",
27
32
  "were",
28
33
  "will",
@@ -31,7 +36,98 @@ var STOP_WORDS = /* @__PURE__ */ new Set([
31
36
  "must",
32
37
  "have",
33
38
  "has",
34
- "had"
39
+ "had",
40
+ "do",
41
+ "does",
42
+ "did",
43
+ "get",
44
+ "give",
45
+ "go",
46
+ "use",
47
+ "see",
48
+ "help",
49
+ "work",
50
+ "build",
51
+ "show",
52
+ "need",
53
+ "want",
54
+ "make",
55
+ "let",
56
+ // pronouns / determiners
57
+ "it",
58
+ "its",
59
+ "this",
60
+ "that",
61
+ "these",
62
+ "those",
63
+ "we",
64
+ "our",
65
+ "you",
66
+ "your",
67
+ "they",
68
+ "their",
69
+ "us",
70
+ "who",
71
+ "what",
72
+ "which",
73
+ "how",
74
+ // common English fillers that leak into JDs
75
+ "no",
76
+ "not",
77
+ "all",
78
+ "any",
79
+ "also",
80
+ "more",
81
+ "well",
82
+ "very",
83
+ "highly",
84
+ "across",
85
+ "over",
86
+ "under",
87
+ "within",
88
+ "about",
89
+ "out",
90
+ "up",
91
+ "down",
92
+ "new",
93
+ "if",
94
+ "so",
95
+ "such",
96
+ "both",
97
+ "each",
98
+ "one",
99
+ "many",
100
+ "only",
101
+ // JD/HR boilerplate — never skills
102
+ "years",
103
+ "year",
104
+ "experience",
105
+ "required",
106
+ "requirement",
107
+ "requirements",
108
+ "preferred",
109
+ "role",
110
+ "degree",
111
+ "practices",
112
+ "best",
113
+ "skills",
114
+ "team",
115
+ "field",
116
+ "related",
117
+ "relevant",
118
+ "desired",
119
+ "strong",
120
+ "solid",
121
+ "good",
122
+ "first",
123
+ "based",
124
+ "day",
125
+ "week",
126
+ "month",
127
+ "time",
128
+ "fast",
129
+ "open",
130
+ "dynamic"
35
131
  ]);
36
132
  function normalizeWhitespace(text) {
37
133
  return text.replace(/\r\n?/g, "\n").replace(/\s+/g, " ").trim();
@@ -45,7 +141,12 @@ function splitLines(text) {
45
141
  var TECH_TOKEN_RE = /[a-z0-9][a-z0-9.#+\-/]*[a-z0-9#+]/g;
46
142
  function tokenize(text) {
47
143
  const normalized = normalizeForComparison(text);
48
- return (normalized.match(TECH_TOKEN_RE) ?? []).filter((t) => !STOP_WORDS.has(t));
144
+ return (normalized.match(TECH_TOKEN_RE) ?? []).filter(
145
+ (t) => /[a-z]/.test(t) && !STOP_WORDS.has(t)
146
+ );
147
+ }
148
+ function escapeRegExp(input) {
149
+ return input.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
49
150
  }
50
151
  function unique(values) {
51
152
  const seen = /* @__PURE__ */ new Set();
@@ -101,37 +202,30 @@ function normalizeSkills(skills, aliases) {
101
202
  }
102
203
 
103
204
  // src/core/parser/jd.parser.ts
104
- var DEGREE_KEYWORDS = [
105
- "bachelor",
106
- "b.s",
107
- "bs",
108
- "bsc",
109
- "master",
110
- "m.s",
111
- "ms",
112
- "msc",
113
- "phd",
114
- "doctorate",
115
- "mba",
116
- "associate"
205
+ var DEGREE_VARIANTS = [
206
+ [/\b(?:bachelor(?:'s)?|b\.s\.?|bs\.?|bsc\.?)\b/i, "bachelor"],
207
+ [/\b(?:master(?:'s)?|m\.s\.?|ms\.?|msc\.?)\b/i, "master"],
208
+ [/\b(?:phd|ph\.d\.?|doctorate)\b/i, "phd"],
209
+ [/\bmba\b/i, "mba"],
210
+ [/\bassociate(?:'s)?\b/i, "associate"]
117
211
  ];
118
212
  function extractRequiredSkills(lines) {
119
213
  const required = [];
120
214
  for (const line of lines) {
121
215
  if (/must|require|required|need/i.test(line)) {
122
- required.push(...line.split(/[,.;•-]/));
216
+ required.push(...tokenize(line));
123
217
  }
124
218
  }
125
- return required.map((value) => value.trim()).filter(Boolean);
219
+ return required;
126
220
  }
127
221
  function extractPreferredSkills(lines) {
128
222
  const preferred = [];
129
223
  for (const line of lines) {
130
224
  if (/preferred|nice to have|plus/i.test(line)) {
131
- preferred.push(...line.split(/[,.;•-]/));
225
+ preferred.push(...tokenize(line));
132
226
  }
133
227
  }
134
- return preferred.map((value) => value.trim()).filter(Boolean);
228
+ return preferred;
135
229
  }
136
230
  function extractRoleKeywords(text) {
137
231
  const roleMatch = text.match(/(engineer|developer|manager|scientist|analyst|designer|architect)/i);
@@ -146,23 +240,41 @@ function extractMinExperience(text) {
146
240
  return void 0;
147
241
  }
148
242
  function extractEducationRequirements(text) {
149
- const normalized = normalizeForComparison(text);
150
- return DEGREE_KEYWORDS.filter((degree) => normalized.includes(degree));
243
+ const found = /* @__PURE__ */ new Set();
244
+ for (const [pattern, canonical] of DEGREE_VARIANTS) {
245
+ if (pattern.test(text)) found.add(canonical);
246
+ }
247
+ return [...found];
151
248
  }
152
249
  function parseJobDescription(jobDescription, config) {
153
250
  const normalizedText = normalizeWhitespace(jobDescription);
154
251
  const lines = splitLines(jobDescription);
155
- const requiredSkillsRaw = extractRequiredSkills(lines);
156
- const preferredSkillsRaw = extractPreferredSkills(lines);
252
+ const skillVocab = /* @__PURE__ */ new Set();
253
+ for (const [canonical, aliases] of Object.entries(config.skillAliases)) {
254
+ skillVocab.add(canonical.toLowerCase());
255
+ for (const alias of aliases) skillVocab.add(alias.toLowerCase());
256
+ }
257
+ for (const s of config.profile?.mandatorySkills ?? []) skillVocab.add(s.toLowerCase());
258
+ for (const s of config.profile?.optionalSkills ?? []) skillVocab.add(s.toLowerCase());
259
+ const isSkillLike = (t) => {
260
+ if (skillVocab.has(t)) return true;
261
+ if (/[.#+]/.test(t) && /[a-z]/.test(t)) return true;
262
+ if (t.includes("/")) return t.split("/").some((p) => p.length >= 2 && !STOP_WORDS.has(p));
263
+ return false;
264
+ };
265
+ const requiredSkillsRaw = extractRequiredSkills(lines).filter(isSkillLike);
266
+ const preferredSkillsRaw = extractPreferredSkills(lines).filter(isSkillLike);
157
267
  const requiredSkills = normalizeSkills(requiredSkillsRaw, config.skillAliases);
158
268
  const preferredSkills = normalizeSkills(preferredSkillsRaw, config.skillAliases);
159
- const keywords = unique([...requiredSkills, ...preferredSkills, ...tokenize(normalizedText)]);
269
+ const bodyTokens = tokenize(normalizedText).filter(isSkillLike);
270
+ const roleKeywords = extractRoleKeywords(jobDescription);
271
+ const keywords = unique([...requiredSkills, ...preferredSkills, ...roleKeywords, ...bodyTokens]);
160
272
  return {
161
273
  raw: jobDescription,
162
274
  normalizedText,
163
275
  requiredSkills,
164
276
  preferredSkills,
165
- roleKeywords: extractRoleKeywords(jobDescription),
277
+ roleKeywords,
166
278
  keywords,
167
279
  minExperienceYears: extractMinExperience(jobDescription),
168
280
  educationRequirements: extractEducationRequirements(jobDescription)
@@ -207,6 +319,14 @@ function parseDateToken(raw) {
207
319
  return { year, month };
208
320
  }
209
321
  }
322
+ const slashMatch = cleaned.match(/^(\d{1,2})\/(\d{4})$/);
323
+ if (slashMatch) {
324
+ const month = Number.parseInt(slashMatch[1], 10);
325
+ const year = Number.parseInt(slashMatch[2], 10);
326
+ if (month >= 1 && month <= 12 && !Number.isNaN(year)) {
327
+ return { year, month };
328
+ }
329
+ }
210
330
  const yearMatch = cleaned.match(/(20\d{2}|19\d{2})/);
211
331
  if (yearMatch) {
212
332
  const year = Number.parseInt(yearMatch[1], 10);
@@ -221,7 +341,9 @@ function monthsBetween(start, end) {
221
341
  }
222
342
  function parseDateRange(text, referenceDate) {
223
343
  const normalized = text.trim();
224
- const rangeMatch = normalized.match(/([A-Za-z]{3,9}\s+\d{4}|\d{4})\s*(?:-|to|–|—)\s*(Present|Current|Now|[A-Za-z]{3,9}\s+\d{4}|\d{4})/i);
344
+ const rangeMatch = normalized.match(
345
+ /(\d{1,2}\/\d{4}|[A-Za-z]{3,9}\s+\d{4}|\d{4})\s*(?:-|to|–|—)\s*(Present|Current|Now|\d{1,2}\/\d{4}|[A-Za-z]{3,9}\s+\d{4}|\d{4})/i
346
+ );
225
347
  if (!rangeMatch) {
226
348
  return null;
227
349
  }
@@ -242,11 +364,40 @@ function parseDateRange(text, referenceDate) {
242
364
  raw: normalized,
243
365
  start: rangeMatch[1],
244
366
  end: isPresent ? "present" : rangeMatch[2],
245
- durationInMonths: durationInMonths > 0 ? durationInMonths : void 0
367
+ durationInMonths: durationInMonths > 0 ? durationInMonths : void 0,
368
+ startYear: startToken.year,
369
+ startMonth: startToken.month,
370
+ endYear: endTokenResolved.year,
371
+ endMonth: endTokenResolved.month
246
372
  };
247
373
  }
248
374
  function sumExperienceYears(ranges) {
249
- const months = ranges.map((range) => range.durationInMonths ?? 0).reduce((total, value) => total + value, 0);
375
+ const withBounds = ranges.filter(
376
+ (r) => r.startYear !== void 0 && r.endYear !== void 0
377
+ );
378
+ if (withBounds.length === ranges.length && ranges.length > 0) {
379
+ const toIndex = (year, month) => year * 12 + month;
380
+ const intervals = withBounds.map((r) => ({
381
+ s: toIndex(r.startYear, r.startMonth ?? 1),
382
+ e: toIndex(r.endYear, r.endMonth ?? 12)
383
+ })).sort((a, b) => a.s - b.s);
384
+ let totalMonths = 0;
385
+ let curStart = intervals[0].s;
386
+ let curEnd = intervals[0].e;
387
+ for (let i = 1; i < intervals.length; i++) {
388
+ const { s, e } = intervals[i];
389
+ if (s <= curEnd) {
390
+ curEnd = Math.max(curEnd, e);
391
+ } else {
392
+ totalMonths += curEnd - curStart + 1;
393
+ curStart = s;
394
+ curEnd = e;
395
+ }
396
+ }
397
+ totalMonths += curEnd - curStart + 1;
398
+ return Number((totalMonths / 12).toFixed(2));
399
+ }
400
+ const months = ranges.reduce((total, r) => total + (r.durationInMonths ?? 0), 0);
250
401
  return Number((months / 12).toFixed(2));
251
402
  }
252
403
 
@@ -280,9 +431,6 @@ var ACTION_VERBS = [
280
431
  "reduced",
281
432
  "increased"
282
433
  ];
283
- function escapeRegExp(input) {
284
- return input.replace(/[.*+?^${}()|[\\]\\]/g, "\\$&");
285
- }
286
434
  function detectSection(line) {
287
435
  const normalized = line.trim().toLowerCase();
288
436
  for (const [section, aliases] of Object.entries(SECTION_ALIASES)) {
@@ -381,11 +529,28 @@ function parseResume(resumeText, config) {
381
529
  const actionVerbs = parseActionVerbs(normalizedText);
382
530
  const experienceData = parseExperience(sections.experience, config.referenceDate);
383
531
  const educationEntries = parseEducation(sections.education);
384
- const totalExperienceYears = sumExperienceYears(
532
+ let totalExperienceYears = sumExperienceYears(
385
533
  experienceData.entries.map((entry) => entry.dates).filter((range) => Boolean(range))
386
534
  );
535
+ if (totalExperienceYears === 0) {
536
+ const textToScan = sections.summary ?? normalizedText;
537
+ const yearsMatch = textToScan.match(/(\d{1,2})\+?\s*years?/i);
538
+ if (yearsMatch) {
539
+ totalExperienceYears = Number.parseInt(yearsMatch[1], 10);
540
+ }
541
+ }
387
542
  const requiredSections = ["summary", "experience", "skills", "education"];
388
543
  const warnings = [];
544
+ const lineCount = splitLines(resumeText).length;
545
+ if (resumeText.trim().length < 100) {
546
+ warnings.push(
547
+ "Almost no text was extracted \u2014 the resume may be a scanned/image PDF. Upload a text-based PDF or paste the text directly."
548
+ );
549
+ } else if (lineCount <= 2) {
550
+ warnings.push(
551
+ "Resume text has no line breaks \u2014 the PDF layout likely didn't export cleanly (common with multi-column designs). Export as a single-column PDF or paste plain text for accurate parsing."
552
+ );
553
+ }
389
554
  for (const section of requiredSections) {
390
555
  if (!detected.includes(section)) {
391
556
  warnings.push(`${section} section not detected`);
@@ -586,10 +751,11 @@ function calculateScore(resume, job, config) {
586
751
  overusedKeywords: keywordResult.overusedKeywords,
587
752
  suggestions: [],
588
753
  warnings: [],
589
- // detectedSections / parsedExperienceYears / experienceGap: filled by index.ts
754
+ // detectedSections / parsedExperienceYears / experienceGap / experienceEntries: filled by index.ts
590
755
  experienceGap: experienceResult.missingYears,
591
756
  detectedSections: [],
592
757
  parsedExperienceYears: 0,
758
+ experienceEntries: [],
593
759
  missingExperienceYears: experienceResult.missingYears,
594
760
  educationScore
595
761
  };
@@ -613,7 +779,31 @@ var defaultSkillAliases = {
613
779
  docker: ["containers"],
614
780
  kubernetes: ["k8s"],
615
781
  html: ["html5"],
616
- css: ["css3"]
782
+ css: ["css3"],
783
+ // ML / data science
784
+ pytorch: ["torch"],
785
+ tensorflow: ["tf"],
786
+ "scikit-learn": ["sklearn"],
787
+ pandas: [],
788
+ numpy: [],
789
+ fastapi: [],
790
+ flask: [],
791
+ django: [],
792
+ // data / infra
793
+ kafka: [],
794
+ redis: [],
795
+ elasticsearch: ["elastic"],
796
+ spark: ["apache spark"],
797
+ // common pure-letter tech skills (no symbol chars)
798
+ accessibility: ["a11y"],
799
+ frontend: ["front-end"],
800
+ backend: ["back-end"],
801
+ security: ["cybersecurity"],
802
+ testing: ["unittest", "pytest"],
803
+ microservices: [],
804
+ agile: ["scrum"],
805
+ blockchain: [],
806
+ devops: []
617
807
  };
618
808
  var softwareEngineerProfile = {
619
809
  name: "software-engineer",
@@ -740,6 +930,11 @@ var SuggestionEngine = class {
740
930
  "Strengthen bullet points with impact verbs (led, built, improved, delivered)."
741
931
  );
742
932
  }
933
+ if (input.resume.detectedSections.length < 2 && input.resume.raw.trim().length > 300) {
934
+ suggestions.push(
935
+ "Your resume may use a multi-column layout. Export as a single-column PDF or paste plain text \u2014 most ATS systems and this parser work best with a linear layout."
936
+ );
937
+ }
743
938
  return { suggestions, warnings };
744
939
  }
745
940
  };
@@ -1400,6 +1595,7 @@ function analyzeResume(input) {
1400
1595
  experienceGap: scoring.experienceGap,
1401
1596
  detectedSections: parsedResume.detectedSections,
1402
1597
  parsedExperienceYears: parsedResume.totalExperienceYears,
1598
+ experienceEntries: parsedResume.experience,
1403
1599
  suggestions,
1404
1600
  warnings: [...suggestionResult.warnings, ...llmWarnings]
1405
1601
  };
@@ -1464,6 +1660,7 @@ async function analyzeResumeAsync(input) {
1464
1660
  experienceGap: scoring.experienceGap,
1465
1661
  detectedSections: parsedResume.detectedSections,
1466
1662
  parsedExperienceYears: parsedResume.totalExperienceYears,
1663
+ experienceEntries: parsedResume.experience,
1467
1664
  suggestions,
1468
1665
  warnings: [...suggestionResult.warnings, ...llmWarnings]
1469
1666
  };