@pranavraut033/ats-checker 1.1.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,81 @@
1
+ 'use strict';
2
+
3
+ // src/pdf/index.ts
4
+ async function extractTextFromPDF(data) {
5
+ let pdfjsLib;
6
+ try {
7
+ pdfjsLib = await import('pdfjs-dist');
8
+ } catch {
9
+ throw new Error(
10
+ "pdfjs-dist is required for PDF extraction. Install it: npm install pdfjs-dist"
11
+ );
12
+ }
13
+ const bytes = data instanceof ArrayBuffer ? new Uint8Array(data) : data;
14
+ const doc = await pdfjsLib.getDocument({ data: bytes }).promise;
15
+ const pages = [];
16
+ for (let i = 1; i <= doc.numPages; i++) {
17
+ const page = await doc.getPage(i);
18
+ const content = await page.getTextContent();
19
+ const items = [];
20
+ for (const item of content.items) {
21
+ if (!("str" in item) || !item.str.trim()) continue;
22
+ const transform = Array.isArray(
23
+ item.transform
24
+ ) ? item.transform : void 0;
25
+ if (!transform) {
26
+ items.push({ x: 0, y: 0, str: item.str });
27
+ } else {
28
+ items.push({ x: transform[4], y: transform[5], str: item.str });
29
+ }
30
+ }
31
+ const COLUMN_GAP_THRESHOLD = 80;
32
+ const xPositions = [...new Set(items.map((it) => Math.round(it.x)))].sort(
33
+ (a, b) => a - b
34
+ );
35
+ let columnBoundary = null;
36
+ let maxGap = 0;
37
+ for (let j = 1; j < xPositions.length; j++) {
38
+ const gap = xPositions[j] - xPositions[j - 1];
39
+ if (gap > maxGap) {
40
+ maxGap = gap;
41
+ columnBoundary = (xPositions[j - 1] + xPositions[j]) / 2;
42
+ }
43
+ }
44
+ if (maxGap < COLUMN_GAP_THRESHOLD) columnBoundary = null;
45
+ const columns = columnBoundary !== null ? [
46
+ items.filter((it) => it.x < columnBoundary),
47
+ items.filter((it) => it.x >= columnBoundary)
48
+ ] : [items];
49
+ const columnTexts = columns.map((col) => renderColumn(col));
50
+ pages.push(columnTexts.filter(Boolean).join("\n"));
51
+ }
52
+ return pages.join("\n");
53
+ }
54
+ function renderColumn(items) {
55
+ const Y_TOLERANCE = 2;
56
+ const lineMap = /* @__PURE__ */ new Map();
57
+ const lineOrder = [];
58
+ for (const { x, y, str } of items) {
59
+ let bucketKey;
60
+ for (const key of lineOrder) {
61
+ if (Math.abs(key - y) <= Y_TOLERANCE) {
62
+ bucketKey = key;
63
+ break;
64
+ }
65
+ }
66
+ if (bucketKey === void 0) {
67
+ bucketKey = y;
68
+ lineOrder.push(y);
69
+ lineMap.set(y, []);
70
+ }
71
+ lineMap.get(bucketKey).push({ x, str });
72
+ }
73
+ lineOrder.sort((a, b) => b - a);
74
+ return lineOrder.map(
75
+ (key) => (lineMap.get(key) ?? []).sort((a, b) => a.x - b.x).map((it) => it.str).join(" ").replace(/[^\S\n]+/g, " ").trim()
76
+ ).filter(Boolean).join("\n");
77
+ }
78
+
79
+ exports.extractTextFromPDF = extractTextFromPDF;
80
+ //# sourceMappingURL=index.cjs.map
81
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/pdf/index.ts"],"names":[],"mappings":";;;AASA,eAAsB,mBACpB,IAAA,EACiB;AAEjB,EAAA,IAAI,QAAA;AACJ,EAAA,IAAI;AACF,IAAA,QAAA,GAAW,MAAM,OAAO,YAAY,CAAA;AAAA,EACtC,CAAA,CAAA,MAAQ;AACN,IAAA,MAAM,IAAI,KAAA;AAAA,MACR;AAAA,KACF;AAAA,EACF;AAEA,EAAA,MAAM,QACJ,IAAA,YAAgB,WAAA,GAAc,IAAI,UAAA,CAAW,IAAI,CAAA,GAAI,IAAA;AAEvD,EAAA,MAAM,GAAA,GAAM,MAAM,QAAA,CAAS,WAAA,CAAY,EAAE,IAAA,EAAM,KAAA,EAAO,CAAA,CAAE,OAAA;AACxD,EAAA,MAAM,QAAkB,EAAC;AAEzB,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,IAAK,GAAA,CAAI,UAAU,CAAA,EAAA,EAAK;AACtC,IAAA,MAAM,IAAA,GAAO,MAAM,GAAA,CAAI,OAAA,CAAQ,CAAC,CAAA;AAChC,IAAA,MAAM,OAAA,GAAU,MAAM,IAAA,CAAK,cAAA,EAAe;AAG1C,IAAA,MAAM,QAAmB,EAAC;AAE1B,IAAA,KAAA,MAAW,IAAA,IAAQ,QAAQ,KAAA,EAAO;AAChC,MAAA,IAAI,EAAE,KAAA,IAAS,IAAA,CAAA,IAAS,CAAC,IAAA,CAAK,GAAA,CAAI,MAAK,EAAG;AAC1C,MAAA,MAAM,YAAkC,KAAA,CAAM,OAAA;AAAA,QAC3C,IAAA,CAAkC;AAAA,OACrC,GACK,KAAiC,SAAA,GAClC,MAAA;AAEJ,MAAA,IAAI,CAAC,SAAA,EAAW;AAEd,QAAA,KAAA,CAAM,IAAA,CAAK,EAAE,CAAA,EAAG,CAAA,EAAG,GAAG,CAAA,EAAG,GAAA,EAAK,IAAA,CAAK,GAAA,EAAK,CAAA;AAAA,MAC1C,CAAA,MAAO;AACL,QAAA,KAAA,CAAM,IAAA,CAAK,EAAE,CAAA,EAAG,SAAA,CAAU,CAAC,CAAA,EAAG,CAAA,EAAG,SAAA,CAAU,CAAC,CAAA,EAAG,GAAA,EAAK,IAAA,CAAK,KAAK,CAAA;AAAA,MAChE;AAAA,IACF;AAWA,IAAA,MAAM,oBAAA,GAAuB,EAAA;AAC7B,IAAA,MAAM,aAAa,CAAC,GAAG,IAAI,GAAA,CAAI,MAAM,GAAA,CAAI,CAAC,EAAA,KAAO,IAAA,CAAK,MAAM,EAAA,CAAG,CAAC,CAAC,CAAC,CAAC,CAAA,CAAE,IAAA;AAAA,MACnE,CAAC,CAAA,EAAG,CAAA,KAAM,CAAA,GAAI;AAAA,KAChB;AAEA,IAAA,IAAI,cAAA,GAAgC,IAAA;AACpC,IAAA,IAAI,MAAA,GAAS,CAAA;AACb,IAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,UAAA,CAAW,QAAQ,CAAA,EAAA,EAAK;AAC1C,MAAA,MAAM,MAAM,UAAA,CAAW,CAAC,CAAA,GAAI,UAAA,CAAW,IAAI,CAAC,CAAA;AAC5C,MAAA,IAAI,MAAM,MAAA,EAAQ;AAChB,QAAA,MAAA,GAAS,GAAA;AACT,QAAA,cAAA,GAAA,CAAkB,WAAW,CAAA,GAAI,CAAC,CAAA,GAAI,UAAA,CAAW,CAAC,CAAA,IAAK,CAAA;AAAA,MACzD;AAAA,IACF;AACA,IAAA,IAAI,MAAA,GAAS,sBAAsB,cAAA,GAAiB,IAAA;AAEpD,IAAA,MAAM,OAAA,GACJ,mBAAmB,IAAA,GACf;AAAA,MACE,MAAM,MAAA,CAAO,CAAC,EAAA,KAAO,EAAA,CAAG,IAAI,cAAe,CAAA;AAAA,MAC3C,MAAM,MAAA,CAAO,CAAC,EAAA,KAAO,EAAA,CAAG,KAAK,cAAe;AAAA,KAC9C,GACA,CAAC,KAAK,CAAA;AAEZ,IAAA,MAAM,cAAc,OAAA,CAAQ,GAAA,CAAI,CAAC,GAAA,KAAQ,YAAA,CAAa,GAAG,CAAC,CAAA;AAC1D,IAAA,KAAA,CAAM,KAAK,WAAA,CAAY,MAAA,CAAO,OAAO,CAAA,CAAE,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,EACnD;AAEA,EAAA,OAAO,KAAA,CAAM,KAAK,IAAI,CAAA;AACxB;AAEA,SAAS,aAAa,KAAA,EAA6D;AACjF,EAAA,MAAM,WAAA,GAAc,CAAA;AACpB,EAAA,MAAM,OAAA,uBAA8D,GAAA,EAAI;AACxE,EAAA,MAAM,YAAsB,EAAC;AAE7B,EAAA,KAAA,MAAW,EAAE,CAAA,EAAG,CAAA,EAAG,GAAA,MAAS,KAAA,EAAO;AACjC,IAAA,IAAI,SAAA;AACJ,IAAA,KAAA,MAAW,OAAO,SAAA,EAAW;AAC3B,MAAA,IAAI,IAAA,CAAK,GAAA,CAAI,GAAA,GAAM,CAAC,KAAK,WAAA,EAAa;AACpC,QAAA,SAAA,GAAY,GAAA;AACZ,QAAA;AAAA,MACF;AAAA,IACF;AACA,IAAA,IAAI,cAAc,MAAA,EAAW;AAC3B,MAAA,SAAA,GAAY,CAAA;AACZ,MAAA,SAAA,CAAU,KAAK,CAAC,CAAA;AAChB,MAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,EAAG,EAAE,CAAA;AAAA,IACnB;AACA,IAAA,OAAA,CAAQ,IAAI,SAAS,CAAA,CAAG,KAAK,EAAE,CAAA,EAAG,KAAK,CAAA;AAAA,EACzC;AAGA,EAAA,SAAA,CAAU,IAAA,CAAK,CAAC,CAAA,EAAG,CAAA,KAAM,IAAI,CAAC,CAAA;AAE9B,EAAA,OAAO,SAAA,CACJ,GAAA;AAAA,IAAI,CAAC,GAAA,KAAA,CACH,OAAA,CAAQ,GAAA,CAAI,GAAG,CAAA,IAAK,EAAC,EACnB,IAAA,CAAK,CAAC,CAAA,EAAG,CAAA,KAAM,EAAE,CAAA,GAAI,CAAA,CAAE,CAAC,CAAA,CACxB,GAAA,CAAI,CAAC,EAAA,KAAO,EAAA,CAAG,GAAG,CAAA,CAClB,IAAA,CAAK,GAAG,CAAA,CACR,OAAA,CAAQ,WAAA,EAAa,GAAG,EACxB,IAAA;AAAK,GACV,CACC,MAAA,CAAO,OAAO,CAAA,CACd,KAAK,IAAI,CAAA;AACd","file":"index.cjs","sourcesContent":["/**\n * Extract plain text from a PDF buffer.\n *\n * Requires `pdfjs-dist` to be installed (optional peerDependency):\n * npm install pdfjs-dist\n *\n * @param data - Raw PDF bytes as Uint8Array or ArrayBuffer\n * @returns Extracted text, ready to pass as `resumeText` to analyzeResume\n */\nexport async function extractTextFromPDF(\n data: Uint8Array | ArrayBuffer\n): Promise<string> {\n // ponytail: lazy import keeps core zero-dep; missing peer throws with clear message\n let pdfjsLib: typeof import(\"pdfjs-dist\");\n try {\n pdfjsLib = await import(\"pdfjs-dist\");\n } catch {\n throw new Error(\n \"pdfjs-dist is required for PDF extraction. Install it: npm install pdfjs-dist\"\n );\n }\n\n const bytes =\n data instanceof ArrayBuffer ? new Uint8Array(data) : data;\n\n const doc = await pdfjsLib.getDocument({ data: bytes }).promise;\n const pages: string[] = [];\n\n for (let i = 1; i <= doc.numPages; i++) {\n const page = await doc.getPage(i);\n const content = await page.getTextContent();\n\n type RawItem = { x: number; y: number; str: string };\n const items: RawItem[] = [];\n\n for (const item of content.items) {\n if (!(\"str\" in item) || !item.str.trim()) continue;\n const transform: number[] | undefined = Array.isArray(\n (item as { transform?: number[] }).transform\n )\n ? (item as { transform: number[] }).transform\n : undefined;\n\n if (!transform) {\n // No positional info (unit-test mocks) — treat as single-column item\n items.push({ x: 0, y: 0, str: item.str });\n } else {\n items.push({ x: transform[4], y: transform[5], str: item.str });\n }\n }\n\n // Detect column boundary: find the largest x-gap among item start positions.\n // If it exceeds COLUMN_GAP_THRESHOLD, split into left / right columns and\n // process each independently so headers in different columns don't merge.\n // ponytail: single largest-gap heuristic handles the common 2-column resume;\n // n-column needs k-means on x-distribution — upgrade if this proves insufficient.\n // Column boundary heuristic: the largest gap in item x-positions.\n // Real PDF column gutters show as a gap >>80px; normal word spacing is <50px.\n // ponytail: magic number calibrated to PranavRaut2026.pdf (104px gap); raise\n // if single-column PDFs with wide indentation start getting falsely split.\n const COLUMN_GAP_THRESHOLD = 80;\n const xPositions = [...new Set(items.map((it) => Math.round(it.x)))].sort(\n (a, b) => a - b\n );\n\n let columnBoundary: number | null = null;\n let maxGap = 0;\n for (let j = 1; j < xPositions.length; j++) {\n const gap = xPositions[j] - xPositions[j - 1];\n if (gap > maxGap) {\n maxGap = gap;\n columnBoundary = (xPositions[j - 1] + xPositions[j]) / 2;\n }\n }\n if (maxGap < COLUMN_GAP_THRESHOLD) columnBoundary = null;\n\n const columns =\n columnBoundary !== null\n ? [\n items.filter((it) => it.x < columnBoundary!),\n items.filter((it) => it.x >= columnBoundary!),\n ]\n : [items];\n\n const columnTexts = columns.map((col) => renderColumn(col));\n pages.push(columnTexts.filter(Boolean).join(\"\\n\"));\n }\n\n return pages.join(\"\\n\");\n}\n\nfunction renderColumn(items: Array<{ x: number; y: number; str: string }>): string {\n const Y_TOLERANCE = 2;\n const lineMap: Map<number, Array<{ x: number; str: string }>> = new Map();\n const lineOrder: number[] = [];\n\n for (const { x, y, str } of items) {\n let bucketKey: number | undefined;\n for (const key of lineOrder) {\n if (Math.abs(key - y) <= Y_TOLERANCE) {\n bucketKey = key;\n break;\n }\n }\n if (bucketKey === undefined) {\n bucketKey = y;\n lineOrder.push(y);\n lineMap.set(y, []);\n }\n lineMap.get(bucketKey)!.push({ x, str });\n }\n\n // pdfjs y=0 is bottom of page — sort descending so top comes first\n lineOrder.sort((a, b) => b - a);\n\n return lineOrder\n .map((key) =>\n (lineMap.get(key) ?? [])\n .sort((a, b) => a.x - b.x)\n .map((it) => it.str)\n .join(\" \")\n .replace(/[^\\S\\n]+/g, \" \")\n .trim()\n )\n .filter(Boolean)\n .join(\"\\n\");\n}\n"]}
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Extract plain text from a PDF buffer.
3
+ *
4
+ * Requires `pdfjs-dist` to be installed (optional peerDependency):
5
+ * npm install pdfjs-dist
6
+ *
7
+ * @param data - Raw PDF bytes as Uint8Array or ArrayBuffer
8
+ * @returns Extracted text, ready to pass as `resumeText` to analyzeResume
9
+ */
10
+ declare function extractTextFromPDF(data: Uint8Array | ArrayBuffer): Promise<string>;
11
+
12
+ export { extractTextFromPDF };
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Extract plain text from a PDF buffer.
3
+ *
4
+ * Requires `pdfjs-dist` to be installed (optional peerDependency):
5
+ * npm install pdfjs-dist
6
+ *
7
+ * @param data - Raw PDF bytes as Uint8Array or ArrayBuffer
8
+ * @returns Extracted text, ready to pass as `resumeText` to analyzeResume
9
+ */
10
+ declare function extractTextFromPDF(data: Uint8Array | ArrayBuffer): Promise<string>;
11
+
12
+ export { extractTextFromPDF };
@@ -0,0 +1,79 @@
1
+ // src/pdf/index.ts
2
+ async function extractTextFromPDF(data) {
3
+ let pdfjsLib;
4
+ try {
5
+ pdfjsLib = await import('pdfjs-dist');
6
+ } catch {
7
+ throw new Error(
8
+ "pdfjs-dist is required for PDF extraction. Install it: npm install pdfjs-dist"
9
+ );
10
+ }
11
+ const bytes = data instanceof ArrayBuffer ? new Uint8Array(data) : data;
12
+ const doc = await pdfjsLib.getDocument({ data: bytes }).promise;
13
+ const pages = [];
14
+ for (let i = 1; i <= doc.numPages; i++) {
15
+ const page = await doc.getPage(i);
16
+ const content = await page.getTextContent();
17
+ const items = [];
18
+ for (const item of content.items) {
19
+ if (!("str" in item) || !item.str.trim()) continue;
20
+ const transform = Array.isArray(
21
+ item.transform
22
+ ) ? item.transform : void 0;
23
+ if (!transform) {
24
+ items.push({ x: 0, y: 0, str: item.str });
25
+ } else {
26
+ items.push({ x: transform[4], y: transform[5], str: item.str });
27
+ }
28
+ }
29
+ const COLUMN_GAP_THRESHOLD = 80;
30
+ const xPositions = [...new Set(items.map((it) => Math.round(it.x)))].sort(
31
+ (a, b) => a - b
32
+ );
33
+ let columnBoundary = null;
34
+ let maxGap = 0;
35
+ for (let j = 1; j < xPositions.length; j++) {
36
+ const gap = xPositions[j] - xPositions[j - 1];
37
+ if (gap > maxGap) {
38
+ maxGap = gap;
39
+ columnBoundary = (xPositions[j - 1] + xPositions[j]) / 2;
40
+ }
41
+ }
42
+ if (maxGap < COLUMN_GAP_THRESHOLD) columnBoundary = null;
43
+ const columns = columnBoundary !== null ? [
44
+ items.filter((it) => it.x < columnBoundary),
45
+ items.filter((it) => it.x >= columnBoundary)
46
+ ] : [items];
47
+ const columnTexts = columns.map((col) => renderColumn(col));
48
+ pages.push(columnTexts.filter(Boolean).join("\n"));
49
+ }
50
+ return pages.join("\n");
51
+ }
52
+ function renderColumn(items) {
53
+ const Y_TOLERANCE = 2;
54
+ const lineMap = /* @__PURE__ */ new Map();
55
+ const lineOrder = [];
56
+ for (const { x, y, str } of items) {
57
+ let bucketKey;
58
+ for (const key of lineOrder) {
59
+ if (Math.abs(key - y) <= Y_TOLERANCE) {
60
+ bucketKey = key;
61
+ break;
62
+ }
63
+ }
64
+ if (bucketKey === void 0) {
65
+ bucketKey = y;
66
+ lineOrder.push(y);
67
+ lineMap.set(y, []);
68
+ }
69
+ lineMap.get(bucketKey).push({ x, str });
70
+ }
71
+ lineOrder.sort((a, b) => b - a);
72
+ return lineOrder.map(
73
+ (key) => (lineMap.get(key) ?? []).sort((a, b) => a.x - b.x).map((it) => it.str).join(" ").replace(/[^\S\n]+/g, " ").trim()
74
+ ).filter(Boolean).join("\n");
75
+ }
76
+
77
+ export { extractTextFromPDF };
78
+ //# sourceMappingURL=index.mjs.map
79
+ //# sourceMappingURL=index.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/pdf/index.ts"],"names":[],"mappings":";AASA,eAAsB,mBACpB,IAAA,EACiB;AAEjB,EAAA,IAAI,QAAA;AACJ,EAAA,IAAI;AACF,IAAA,QAAA,GAAW,MAAM,OAAO,YAAY,CAAA;AAAA,EACtC,CAAA,CAAA,MAAQ;AACN,IAAA,MAAM,IAAI,KAAA;AAAA,MACR;AAAA,KACF;AAAA,EACF;AAEA,EAAA,MAAM,QACJ,IAAA,YAAgB,WAAA,GAAc,IAAI,UAAA,CAAW,IAAI,CAAA,GAAI,IAAA;AAEvD,EAAA,MAAM,GAAA,GAAM,MAAM,QAAA,CAAS,WAAA,CAAY,EAAE,IAAA,EAAM,KAAA,EAAO,CAAA,CAAE,OAAA;AACxD,EAAA,MAAM,QAAkB,EAAC;AAEzB,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,IAAK,GAAA,CAAI,UAAU,CAAA,EAAA,EAAK;AACtC,IAAA,MAAM,IAAA,GAAO,MAAM,GAAA,CAAI,OAAA,CAAQ,CAAC,CAAA;AAChC,IAAA,MAAM,OAAA,GAAU,MAAM,IAAA,CAAK,cAAA,EAAe;AAG1C,IAAA,MAAM,QAAmB,EAAC;AAE1B,IAAA,KAAA,MAAW,IAAA,IAAQ,QAAQ,KAAA,EAAO;AAChC,MAAA,IAAI,EAAE,KAAA,IAAS,IAAA,CAAA,IAAS,CAAC,IAAA,CAAK,GAAA,CAAI,MAAK,EAAG;AAC1C,MAAA,MAAM,YAAkC,KAAA,CAAM,OAAA;AAAA,QAC3C,IAAA,CAAkC;AAAA,OACrC,GACK,KAAiC,SAAA,GAClC,MAAA;AAEJ,MAAA,IAAI,CAAC,SAAA,EAAW;AAEd,QAAA,KAAA,CAAM,IAAA,CAAK,EAAE,CAAA,EAAG,CAAA,EAAG,GAAG,CAAA,EAAG,GAAA,EAAK,IAAA,CAAK,GAAA,EAAK,CAAA;AAAA,MAC1C,CAAA,MAAO;AACL,QAAA,KAAA,CAAM,IAAA,CAAK,EAAE,CAAA,EAAG,SAAA,CAAU,CAAC,CAAA,EAAG,CAAA,EAAG,SAAA,CAAU,CAAC,CAAA,EAAG,GAAA,EAAK,IAAA,CAAK,KAAK,CAAA;AAAA,MAChE;AAAA,IACF;AAWA,IAAA,MAAM,oBAAA,GAAuB,EAAA;AAC7B,IAAA,MAAM,aAAa,CAAC,GAAG,IAAI,GAAA,CAAI,MAAM,GAAA,CAAI,CAAC,EAAA,KAAO,IAAA,CAAK,MAAM,EAAA,CAAG,CAAC,CAAC,CAAC,CAAC,CAAA,CAAE,IAAA;AAAA,MACnE,CAAC,CAAA,EAAG,CAAA,KAAM,CAAA,GAAI;AAAA,KAChB;AAEA,IAAA,IAAI,cAAA,GAAgC,IAAA;AACpC,IAAA,IAAI,MAAA,GAAS,CAAA;AACb,IAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,UAAA,CAAW,QAAQ,CAAA,EAAA,EAAK;AAC1C,MAAA,MAAM,MAAM,UAAA,CAAW,CAAC,CAAA,GAAI,UAAA,CAAW,IAAI,CAAC,CAAA;AAC5C,MAAA,IAAI,MAAM,MAAA,EAAQ;AAChB,QAAA,MAAA,GAAS,GAAA;AACT,QAAA,cAAA,GAAA,CAAkB,WAAW,CAAA,GAAI,CAAC,CAAA,GAAI,UAAA,CAAW,CAAC,CAAA,IAAK,CAAA;AAAA,MACzD;AAAA,IACF;AACA,IAAA,IAAI,MAAA,GAAS,sBAAsB,cAAA,GAAiB,IAAA;AAEpD,IAAA,MAAM,OAAA,GACJ,mBAAmB,IAAA,GACf;AAAA,MACE,MAAM,MAAA,CAAO,CAAC,EAAA,KAAO,EAAA,CAAG,IAAI,cAAe,CAAA;AAAA,MAC3C,MAAM,MAAA,CAAO,CAAC,EAAA,KAAO,EAAA,CAAG,KAAK,cAAe;AAAA,KAC9C,GACA,CAAC,KAAK,CAAA;AAEZ,IAAA,MAAM,cAAc,OAAA,CAAQ,GAAA,CAAI,CAAC,GAAA,KAAQ,YAAA,CAAa,GAAG,CAAC,CAAA;AAC1D,IAAA,KAAA,CAAM,KAAK,WAAA,CAAY,MAAA,CAAO,OAAO,CAAA,CAAE,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,EACnD;AAEA,EAAA,OAAO,KAAA,CAAM,KAAK,IAAI,CAAA;AACxB;AAEA,SAAS,aAAa,KAAA,EAA6D;AACjF,EAAA,MAAM,WAAA,GAAc,CAAA;AACpB,EAAA,MAAM,OAAA,uBAA8D,GAAA,EAAI;AACxE,EAAA,MAAM,YAAsB,EAAC;AAE7B,EAAA,KAAA,MAAW,EAAE,CAAA,EAAG,CAAA,EAAG,GAAA,MAAS,KAAA,EAAO;AACjC,IAAA,IAAI,SAAA;AACJ,IAAA,KAAA,MAAW,OAAO,SAAA,EAAW;AAC3B,MAAA,IAAI,IAAA,CAAK,GAAA,CAAI,GAAA,GAAM,CAAC,KAAK,WAAA,EAAa;AACpC,QAAA,SAAA,GAAY,GAAA;AACZ,QAAA;AAAA,MACF;AAAA,IACF;AACA,IAAA,IAAI,cAAc,MAAA,EAAW;AAC3B,MAAA,SAAA,GAAY,CAAA;AACZ,MAAA,SAAA,CAAU,KAAK,CAAC,CAAA;AAChB,MAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,EAAG,EAAE,CAAA;AAAA,IACnB;AACA,IAAA,OAAA,CAAQ,IAAI,SAAS,CAAA,CAAG,KAAK,EAAE,CAAA,EAAG,KAAK,CAAA;AAAA,EACzC;AAGA,EAAA,SAAA,CAAU,IAAA,CAAK,CAAC,CAAA,EAAG,CAAA,KAAM,IAAI,CAAC,CAAA;AAE9B,EAAA,OAAO,SAAA,CACJ,GAAA;AAAA,IAAI,CAAC,GAAA,KAAA,CACH,OAAA,CAAQ,GAAA,CAAI,GAAG,CAAA,IAAK,EAAC,EACnB,IAAA,CAAK,CAAC,CAAA,EAAG,CAAA,KAAM,EAAE,CAAA,GAAI,CAAA,CAAE,CAAC,CAAA,CACxB,GAAA,CAAI,CAAC,EAAA,KAAO,EAAA,CAAG,GAAG,CAAA,CAClB,IAAA,CAAK,GAAG,CAAA,CACR,OAAA,CAAQ,WAAA,EAAa,GAAG,EACxB,IAAA;AAAK,GACV,CACC,MAAA,CAAO,OAAO,CAAA,CACd,KAAK,IAAI,CAAA;AACd","file":"index.mjs","sourcesContent":["/**\n * Extract plain text from a PDF buffer.\n *\n * Requires `pdfjs-dist` to be installed (optional peerDependency):\n * npm install pdfjs-dist\n *\n * @param data - Raw PDF bytes as Uint8Array or ArrayBuffer\n * @returns Extracted text, ready to pass as `resumeText` to analyzeResume\n */\nexport async function extractTextFromPDF(\n data: Uint8Array | ArrayBuffer\n): Promise<string> {\n // ponytail: lazy import keeps core zero-dep; missing peer throws with clear message\n let pdfjsLib: typeof import(\"pdfjs-dist\");\n try {\n pdfjsLib = await import(\"pdfjs-dist\");\n } catch {\n throw new Error(\n \"pdfjs-dist is required for PDF extraction. Install it: npm install pdfjs-dist\"\n );\n }\n\n const bytes =\n data instanceof ArrayBuffer ? new Uint8Array(data) : data;\n\n const doc = await pdfjsLib.getDocument({ data: bytes }).promise;\n const pages: string[] = [];\n\n for (let i = 1; i <= doc.numPages; i++) {\n const page = await doc.getPage(i);\n const content = await page.getTextContent();\n\n type RawItem = { x: number; y: number; str: string };\n const items: RawItem[] = [];\n\n for (const item of content.items) {\n if (!(\"str\" in item) || !item.str.trim()) continue;\n const transform: number[] | undefined = Array.isArray(\n (item as { transform?: number[] }).transform\n )\n ? (item as { transform: number[] }).transform\n : undefined;\n\n if (!transform) {\n // No positional info (unit-test mocks) — treat as single-column item\n items.push({ x: 0, y: 0, str: item.str });\n } else {\n items.push({ x: transform[4], y: transform[5], str: item.str });\n }\n }\n\n // Detect column boundary: find the largest x-gap among item start positions.\n // If it exceeds COLUMN_GAP_THRESHOLD, split into left / right columns and\n // process each independently so headers in different columns don't merge.\n // ponytail: single largest-gap heuristic handles the common 2-column resume;\n // n-column needs k-means on x-distribution — upgrade if this proves insufficient.\n // Column boundary heuristic: the largest gap in item x-positions.\n // Real PDF column gutters show as a gap >>80px; normal word spacing is <50px.\n // ponytail: magic number calibrated to PranavRaut2026.pdf (104px gap); raise\n // if single-column PDFs with wide indentation start getting falsely split.\n const COLUMN_GAP_THRESHOLD = 80;\n const xPositions = [...new Set(items.map((it) => Math.round(it.x)))].sort(\n (a, b) => a - b\n );\n\n let columnBoundary: number | null = null;\n let maxGap = 0;\n for (let j = 1; j < xPositions.length; j++) {\n const gap = xPositions[j] - xPositions[j - 1];\n if (gap > maxGap) {\n maxGap = gap;\n columnBoundary = (xPositions[j - 1] + xPositions[j]) / 2;\n }\n }\n if (maxGap < COLUMN_GAP_THRESHOLD) columnBoundary = null;\n\n const columns =\n columnBoundary !== null\n ? [\n items.filter((it) => it.x < columnBoundary!),\n items.filter((it) => it.x >= columnBoundary!),\n ]\n : [items];\n\n const columnTexts = columns.map((col) => renderColumn(col));\n pages.push(columnTexts.filter(Boolean).join(\"\\n\"));\n }\n\n return pages.join(\"\\n\");\n}\n\nfunction renderColumn(items: Array<{ x: number; y: number; str: string }>): string {\n const Y_TOLERANCE = 2;\n const lineMap: Map<number, Array<{ x: number; str: string }>> = new Map();\n const lineOrder: number[] = [];\n\n for (const { x, y, str } of items) {\n let bucketKey: number | undefined;\n for (const key of lineOrder) {\n if (Math.abs(key - y) <= Y_TOLERANCE) {\n bucketKey = key;\n break;\n }\n }\n if (bucketKey === undefined) {\n bucketKey = y;\n lineOrder.push(y);\n lineMap.set(y, []);\n }\n lineMap.get(bucketKey)!.push({ x, str });\n }\n\n // pdfjs y=0 is bottom of page — sort descending so top comes first\n lineOrder.sort((a, b) => b - a);\n\n return lineOrder\n .map((key) =>\n (lineMap.get(key) ?? [])\n .sort((a, b) => a.x - b.x)\n .map((it) => it.str)\n .join(\" \")\n .replace(/[^\\S\\n]+/g, \" \")\n .trim()\n )\n .filter(Boolean)\n .join(\"\\n\");\n}\n"]}
@@ -0,0 +1,319 @@
1
+ type ResumeSection = "summary" | "experience" | "skills" | "education" | "projects" | "certifications";
2
+ interface ParsedDateRange {
3
+ raw?: string;
4
+ start?: string;
5
+ end?: string;
6
+ durationInMonths?: number;
7
+ /** Numeric year/month of the start and end, for overlap-aware summing. */
8
+ startYear?: number;
9
+ startMonth?: number;
10
+ endYear?: number;
11
+ endMonth?: number;
12
+ }
13
+ interface ParsedExperienceEntry {
14
+ title?: string;
15
+ company?: string;
16
+ location?: string;
17
+ dates?: ParsedDateRange;
18
+ description?: string;
19
+ }
20
+ interface ParsedAchievement {
21
+ text: string;
22
+ strength: "strong" | "weak";
23
+ reason: string;
24
+ }
25
+ interface ParsedLanguage {
26
+ /** Canonical lowercase language name, e.g. "german". */
27
+ name: string;
28
+ /** Raw level as written/normalized, e.g. "c1", "fluent", "native". */
29
+ level?: string;
30
+ /** CEFR-aligned rank 1 (A1/basic) – 6 (C2/native), for comparing proficiency. */
31
+ levelRank?: number;
32
+ }
33
+ interface ParsedResume {
34
+ raw: string;
35
+ normalizedText: string;
36
+ detectedSections: ResumeSection[];
37
+ sectionContent: Partial<Record<ResumeSection, string>>;
38
+ skills: string[];
39
+ jobTitles: string[];
40
+ actionVerbs: string[];
41
+ /** Weak verbs (helped, worked, performed, ...) found in the resume text. */
42
+ weakVerbs: string[];
43
+ /** Experience bullets classified as strong/weak achievement statements. */
44
+ achievements: ParsedAchievement[];
45
+ educationEntries: string[];
46
+ experience: ParsedExperienceEntry[];
47
+ totalExperienceYears: number;
48
+ keywords: string[];
49
+ languages: ParsedLanguage[];
50
+ warnings: string[];
51
+ }
52
+ interface ParsedJobDescription {
53
+ raw: string;
54
+ normalizedText: string;
55
+ requiredSkills: string[];
56
+ preferredSkills: string[];
57
+ roleKeywords: string[];
58
+ keywords: string[];
59
+ minExperienceYears?: number;
60
+ educationRequirements: string[];
61
+ /** canonical keyword -> the surface form (original casing/spelling) the JD used. */
62
+ keywordSurfaceForms: Record<string, string>;
63
+ requiredLanguages: ParsedLanguage[];
64
+ }
65
+
66
+ interface ATSWeights {
67
+ skills: number;
68
+ experience: number;
69
+ keywords: number;
70
+ education: number;
71
+ }
72
+ type SkillAliases = Record<string, string[]>;
73
+ type KeywordCategory = "technical" | "tool" | "concept" | "soft" | "marketing" | "domain";
74
+ interface KeywordEntry {
75
+ canonical: string;
76
+ aliases: string[];
77
+ category: KeywordCategory;
78
+ }
79
+ type KeywordRegistry = KeywordEntry[];
80
+ interface ATSProfile {
81
+ name: string;
82
+ mandatorySkills: string[];
83
+ optionalSkills: string[];
84
+ minExperience?: number;
85
+ }
86
+ interface KeywordDensityConfig {
87
+ /** Minimum density before a keyword is considered underused (informational only). */
88
+ min: number;
89
+ /** Maximum density before a keyword is considered stuffed. */
90
+ max: number;
91
+ /** Penalty applied when density exceeds max. */
92
+ overusePenalty: number;
93
+ }
94
+ interface SectionPenaltyConfig {
95
+ missingSummary?: number;
96
+ missingExperience?: number;
97
+ missingSkills?: number;
98
+ missingEducation?: number;
99
+ }
100
+ interface ATSRule {
101
+ id: string;
102
+ description?: string;
103
+ penalty: number;
104
+ warning?: string;
105
+ condition: (context: RuleContext) => boolean;
106
+ }
107
+ interface ATSConfig {
108
+ weights?: Partial<ATSWeights>;
109
+ skillAliases?: SkillAliases;
110
+ /** Categorized keyword/alias entries (technical, tool, concept, soft, marketing, domain). Merges over the default registry by canonical term. */
111
+ keywordRegistry?: KeywordRegistry;
112
+ profile?: ATSProfile;
113
+ rules?: ATSRule[];
114
+ keywordDensity?: KeywordDensityConfig;
115
+ sectionPenalties?: SectionPenaltyConfig;
116
+ allowPartialMatches?: boolean;
117
+ /**
118
+ * ISO date string (e.g. "2024-06-01") used as the "today" reference when
119
+ * computing duration for open-ended date ranges ("Present"/"Current"/"Now").
120
+ * Omit to use the actual current date (live/production behaviour).
121
+ * Set to a fixed value in tests or batch processing to guarantee determinism.
122
+ */
123
+ referenceDate?: string;
124
+ }
125
+ interface NormalizedWeights extends ATSWeights {
126
+ /** Weights normalized so they sum to 1. */
127
+ normalizedTotal: number;
128
+ }
129
+ interface ResolvedATSConfig {
130
+ weights: NormalizedWeights;
131
+ skillAliases: SkillAliases;
132
+ keywordRegistry: KeywordRegistry;
133
+ /** canonical term -> category, derived once from keywordRegistry. */
134
+ categoryIndex: Map<string, KeywordCategory>;
135
+ profile?: ATSProfile;
136
+ rules: ATSRule[];
137
+ keywordDensity: KeywordDensityConfig;
138
+ sectionPenalties: Required<SectionPenaltyConfig>;
139
+ allowPartialMatches: boolean;
140
+ /** Resolved reference date for "Present" duration calculations. */
141
+ referenceDate?: Date;
142
+ }
143
+ interface RuleContext {
144
+ resume: ParsedResume;
145
+ job: ParsedJobDescription;
146
+ weights: NormalizedWeights;
147
+ keywordDensity: KeywordDensityConfig;
148
+ breakdown?: ATSBreakdown;
149
+ matchedKeywords?: string[];
150
+ overusedKeywords?: string[];
151
+ }
152
+
153
+ /**
154
+ * LLM v2 Support Types - Optional, Backward Compatible
155
+ */
156
+
157
+ /**
158
+ * JSON Schema for response validation
159
+ */
160
+ interface JSONSchema {
161
+ type: string;
162
+ properties?: Record<string, unknown>;
163
+ required?: string[];
164
+ items?: unknown;
165
+ [key: string]: unknown;
166
+ }
167
+ /**
168
+ * LLM Client abstraction - user provides their own implementation
169
+ * This allows flexibility with different LLM providers without direct dependencies
170
+ */
171
+ interface LLMClient {
172
+ /**
173
+ * Create a structured completion from the LLM
174
+ * Must validate and return only valid JSON matching the schema
175
+ */
176
+ createCompletion(input: {
177
+ model: string;
178
+ messages: {
179
+ role: "system" | "user";
180
+ content: string;
181
+ }[];
182
+ max_tokens: number;
183
+ response_format: JSONSchema;
184
+ }): Promise<{
185
+ content: unknown;
186
+ usage?: {
187
+ prompt_tokens?: number;
188
+ completion_tokens?: number;
189
+ total_tokens?: number;
190
+ };
191
+ }>;
192
+ }
193
+ /**
194
+ * LLM budget configuration - prevents runaway spending
195
+ */
196
+ interface LLMBudget {
197
+ maxCalls: number;
198
+ maxTokensPerCall: number;
199
+ maxTotalTokens: number;
200
+ }
201
+ /**
202
+ * Feature toggles for LLM capabilities
203
+ */
204
+ interface LLMFeatures {
205
+ skillNormalization?: boolean;
206
+ sectionClassification?: boolean;
207
+ suggestions?: boolean;
208
+ }
209
+ /**
210
+ * Complete LLM configuration
211
+ */
212
+ interface LLMConfig {
213
+ /** User-provided LLM client (e.g., OpenAI wrapper) */
214
+ client: LLMClient;
215
+ /** Model identifiers */
216
+ models?: {
217
+ /** Default model for fast, structured output (e.g., "gpt-4o-mini") */
218
+ default: string;
219
+ /** Optional thinking model for complex reasoning (e.g., "o4-mini") */
220
+ thinking?: string;
221
+ };
222
+ /** Budget constraints */
223
+ limits: LLMBudget;
224
+ /** Which LLM features to enable */
225
+ enable?: LLMFeatures;
226
+ /** Request timeout in milliseconds */
227
+ timeoutMs?: number;
228
+ }
229
+ /**
230
+ * Updated AnalyzeResumeInput with optional LLM support
231
+ */
232
+ interface AnalyzeResumeInputV2 {
233
+ resumeText: string;
234
+ jobDescription: string;
235
+ config?: ATSConfig;
236
+ llm?: LLMConfig;
237
+ }
238
+ /**
239
+ * LLM usage tracking for debugging
240
+ */
241
+ interface LLMUsageStats {
242
+ totalCalls: number;
243
+ totalTokensUsed: number;
244
+ callsRemaining: number;
245
+ tokensRemaining: number;
246
+ features: Partial<Record<keyof LLMFeatures, boolean>>;
247
+ }
248
+ /**
249
+ * Result of an LLM operation (with fallback info)
250
+ */
251
+ interface LLMResult<T> {
252
+ success: boolean;
253
+ data?: T;
254
+ fallback: boolean;
255
+ error?: string;
256
+ tokensUsed?: number;
257
+ }
258
+
259
+ interface ATSBreakdown {
260
+ skills: number;
261
+ experience: number;
262
+ keywords: number;
263
+ education: number;
264
+ }
265
+ interface AnalyzeResumeInput {
266
+ resumeText: string;
267
+ jobDescription: string;
268
+ config?: ATSConfig;
269
+ llm?: LLMConfig;
270
+ }
271
+ interface KeywordWeight {
272
+ term: string;
273
+ category: KeywordCategory;
274
+ /** Importance of this term in the job description (location + frequency based). */
275
+ jdWeight: number;
276
+ /** How often this term appears in the resume. */
277
+ resumeWeight: number;
278
+ /** Alias of jdWeight — how much this term matters for the role. */
279
+ importance: number;
280
+ }
281
+ interface ATSAnalysisResult {
282
+ score: number;
283
+ breakdown: ATSBreakdown;
284
+ /** Skills found in the resume that satisfy JD + profile requirements. */
285
+ matchedSkills: string[];
286
+ /** Required skills absent from the resume. */
287
+ missingSkills: string[];
288
+ matchedKeywords: string[];
289
+ missingKeywords: string[];
290
+ overusedKeywords: string[];
291
+ /** Matched/missing keywords grouped by category (technical, tool, concept, soft, marketing, domain). */
292
+ keywordsByCategory: Record<KeywordCategory, {
293
+ matched: string[];
294
+ missing: string[];
295
+ }>;
296
+ /** Per-keyword JD importance and resume usage, for callers who want the raw numbers. */
297
+ keywordWeights: KeywordWeight[];
298
+ /** Count of resume achievement bullets classified as strong vs weak. */
299
+ achievementStrength: {
300
+ strong: number;
301
+ weak: number;
302
+ };
303
+ /** JD-required languages the resume meets or exceeds in proficiency. */
304
+ matchedLanguages: ParsedLanguage[];
305
+ /** JD-required languages absent from the resume, or below the required proficiency. */
306
+ missingLanguages: ParsedLanguage[];
307
+ suggestions: string[];
308
+ warnings: string[];
309
+ /** Years below the JD's minimum experience requirement; 0 when the requirement is met. */
310
+ experienceGap: number;
311
+ /** Resume sections the parser successfully detected (e.g. "summary", "skills"). */
312
+ detectedSections: string[];
313
+ /** Total years of experience parsed from the resume's date ranges. */
314
+ parsedExperienceYears: number;
315
+ /** Parsed experience entries from the resume, with titles and date ranges. */
316
+ experienceEntries: ParsedExperienceEntry[];
317
+ }
318
+
319
+ export type { ATSProfile as A, JSONSchema as J, KeywordRegistry as K, LLMConfig as L, NormalizedWeights as N, ParsedDateRange as P, ResolvedATSConfig as R, SkillAliases as S, LLMResult as a, LLMBudget as b, AnalyzeResumeInput as c, ATSAnalysisResult as d, ATSWeights as e, KeywordCategory as f, KeywordEntry as g, KeywordDensityConfig as h, SectionPenaltyConfig as i, ATSRule as j, ATSConfig as k, RuleContext as l, ResumeSection as m, ParsedExperienceEntry as n, ParsedAchievement as o, ParsedLanguage as p, ParsedResume as q, ParsedJobDescription as r, ATSBreakdown as s, KeywordWeight as t, LLMClient as u, LLMFeatures as v, AnalyzeResumeInputV2 as w, LLMUsageStats as x };