@pranavraut033/ats-checker 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Extract plain text from a PDF buffer.
3
+ *
4
+ * Requires `pdfjs-dist` to be installed (optional peerDependency):
5
+ * npm install pdfjs-dist
6
+ *
7
+ * @param data - Raw PDF bytes as Uint8Array or ArrayBuffer
8
+ * @returns Extracted text, ready to pass as `resumeText` to analyzeResume
9
+ */
10
+ declare function extractTextFromPDF(data: Uint8Array | ArrayBuffer): Promise<string>;
11
+
12
+ export { extractTextFromPDF };
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Extract plain text from a PDF buffer.
3
+ *
4
+ * Requires `pdfjs-dist` to be installed (optional peerDependency):
5
+ * npm install pdfjs-dist
6
+ *
7
+ * @param data - Raw PDF bytes as Uint8Array or ArrayBuffer
8
+ * @returns Extracted text, ready to pass as `resumeText` to analyzeResume
9
+ */
10
+ declare function extractTextFromPDF(data: Uint8Array | ArrayBuffer): Promise<string>;
11
+
12
+ export { extractTextFromPDF };
@@ -0,0 +1,81 @@
1
+ 'use strict';
2
+
3
+ // src/pdf/index.ts
4
+ async function extractTextFromPDF(data) {
5
+ let pdfjsLib;
6
+ try {
7
+ pdfjsLib = await import('pdfjs-dist');
8
+ } catch {
9
+ throw new Error(
10
+ "pdfjs-dist is required for PDF extraction. Install it: npm install pdfjs-dist"
11
+ );
12
+ }
13
+ const bytes = data instanceof ArrayBuffer ? new Uint8Array(data) : data;
14
+ const doc = await pdfjsLib.getDocument({ data: bytes }).promise;
15
+ const pages = [];
16
+ for (let i = 1; i <= doc.numPages; i++) {
17
+ const page = await doc.getPage(i);
18
+ const content = await page.getTextContent();
19
+ const items = [];
20
+ for (const item of content.items) {
21
+ if (!("str" in item) || !item.str.trim()) continue;
22
+ const transform = Array.isArray(
23
+ item.transform
24
+ ) ? item.transform : void 0;
25
+ if (!transform) {
26
+ items.push({ x: 0, y: 0, str: item.str });
27
+ } else {
28
+ items.push({ x: transform[4], y: transform[5], str: item.str });
29
+ }
30
+ }
31
+ const COLUMN_GAP_THRESHOLD = 80;
32
+ const xPositions = [...new Set(items.map((it) => Math.round(it.x)))].sort(
33
+ (a, b) => a - b
34
+ );
35
+ let columnBoundary = null;
36
+ let maxGap = 0;
37
+ for (let j = 1; j < xPositions.length; j++) {
38
+ const gap = xPositions[j] - xPositions[j - 1];
39
+ if (gap > maxGap) {
40
+ maxGap = gap;
41
+ columnBoundary = (xPositions[j - 1] + xPositions[j]) / 2;
42
+ }
43
+ }
44
+ if (maxGap < COLUMN_GAP_THRESHOLD) columnBoundary = null;
45
+ const columns = columnBoundary !== null ? [
46
+ items.filter((it) => it.x < columnBoundary),
47
+ items.filter((it) => it.x >= columnBoundary)
48
+ ] : [items];
49
+ const columnTexts = columns.map((col) => renderColumn(col));
50
+ pages.push(columnTexts.filter(Boolean).join("\n"));
51
+ }
52
+ return pages.join("\n");
53
+ }
54
+ function renderColumn(items) {
55
+ const Y_TOLERANCE = 2;
56
+ const lineMap = /* @__PURE__ */ new Map();
57
+ const lineOrder = [];
58
+ for (const { x, y, str } of items) {
59
+ let bucketKey;
60
+ for (const key of lineOrder) {
61
+ if (Math.abs(key - y) <= Y_TOLERANCE) {
62
+ bucketKey = key;
63
+ break;
64
+ }
65
+ }
66
+ if (bucketKey === void 0) {
67
+ bucketKey = y;
68
+ lineOrder.push(y);
69
+ lineMap.set(y, []);
70
+ }
71
+ lineMap.get(bucketKey).push({ x, str });
72
+ }
73
+ lineOrder.sort((a, b) => b - a);
74
+ return lineOrder.map(
75
+ (key) => (lineMap.get(key) ?? []).sort((a, b) => a.x - b.x).map((it) => it.str).join(" ").replace(/[^\S\n]+/g, " ").trim()
76
+ ).filter(Boolean).join("\n");
77
+ }
78
+
79
+ exports.extractTextFromPDF = extractTextFromPDF;
80
+ //# sourceMappingURL=index.js.map
81
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/pdf/index.ts"],"names":[],"mappings":";;;AASA,eAAsB,mBACpB,IAAA,EACiB;AAEjB,EAAA,IAAI,QAAA;AACJ,EAAA,IAAI;AACF,IAAA,QAAA,GAAW,MAAM,OAAO,YAAY,CAAA;AAAA,EACtC,CAAA,CAAA,MAAQ;AACN,IAAA,MAAM,IAAI,KAAA;AAAA,MACR;AAAA,KACF;AAAA,EACF;AAEA,EAAA,MAAM,QACJ,IAAA,YAAgB,WAAA,GAAc,IAAI,UAAA,CAAW,IAAI,CAAA,GAAI,IAAA;AAEvD,EAAA,MAAM,GAAA,GAAM,MAAM,QAAA,CAAS,WAAA,CAAY,EAAE,IAAA,EAAM,KAAA,EAAO,CAAA,CAAE,OAAA;AACxD,EAAA,MAAM,QAAkB,EAAC;AAEzB,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,IAAK,GAAA,CAAI,UAAU,CAAA,EAAA,EAAK;AACtC,IAAA,MAAM,IAAA,GAAO,MAAM,GAAA,CAAI,OAAA,CAAQ,CAAC,CAAA;AAChC,IAAA,MAAM,OAAA,GAAU,MAAM,IAAA,CAAK,cAAA,EAAe;AAG1C,IAAA,MAAM,QAAmB,EAAC;AAE1B,IAAA,KAAA,MAAW,IAAA,IAAQ,QAAQ,KAAA,EAAO;AAChC,MAAA,IAAI,EAAE,KAAA,IAAS,IAAA,CAAA,IAAS,CAAC,IAAA,CAAK,GAAA,CAAI,MAAK,EAAG;AAC1C,MAAA,MAAM,YAAkC,KAAA,CAAM,OAAA;AAAA,QAC3C,IAAA,CAAkC;AAAA,OACrC,GACK,KAAiC,SAAA,GAClC,MAAA;AAEJ,MAAA,IAAI,CAAC,SAAA,EAAW;AAEd,QAAA,KAAA,CAAM,IAAA,CAAK,EAAE,CAAA,EAAG,CAAA,EAAG,GAAG,CAAA,EAAG,GAAA,EAAK,IAAA,CAAK,GAAA,EAAK,CAAA;AAAA,MAC1C,CAAA,MAAO;AACL,QAAA,KAAA,CAAM,IAAA,CAAK,EAAE,CAAA,EAAG,SAAA,CAAU,CAAC,CAAA,EAAG,CAAA,EAAG,SAAA,CAAU,CAAC,CAAA,EAAG,GAAA,EAAK,IAAA,CAAK,KAAK,CAAA;AAAA,MAChE;AAAA,IACF;AAWA,IAAA,MAAM,oBAAA,GAAuB,EAAA;AAC7B,IAAA,MAAM,aAAa,CAAC,GAAG,IAAI,GAAA,CAAI,MAAM,GAAA,CAAI,CAAC,EAAA,KAAO,IAAA,CAAK,MAAM,EAAA,CAAG,CAAC,CAAC,CAAC,CAAC,CAAA,CAAE,IAAA;AAAA,MACnE,CAAC,CAAA,EAAG,CAAA,KAAM,CAAA,GAAI;AAAA,KAChB;AAEA,IAAA,IAAI,cAAA,GAAgC,IAAA;AACpC,IAAA,IAAI,MAAA,GAAS,CAAA;AACb,IAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,UAAA,CAAW,QAAQ,CAAA,EAAA,EAAK;AAC1C,MAAA,MAAM,MAAM,UAAA,CAAW,CAAC,CAAA,GAAI,UAAA,CAAW,IAAI,CAAC,CAAA;AAC5C,MAAA,IAAI,MAAM,MAAA,EAAQ;AAChB,QAAA,MAAA,GAAS,GAAA;AACT,QAAA,cAAA,GAAA,CAAkB,WAAW,CAAA,GAAI,CAAC,CAAA,GAAI,UAAA,CAAW,CAAC,CAAA,IAAK,CAAA;AAAA,MACzD;AAAA,IACF;AACA,IAAA,IAAI,MAAA,GAAS,sBAAsB,cAAA,GAAiB,IAAA;AAEpD,IAAA,MAAM,OAAA,GACJ,mBAAmB,IAAA,GACf;AAAA,MACE,MAAM,MAAA,CAAO,CAAC,EAAA,KAAO,EAAA,CAAG,IAAI,cAAe,CAAA;AAAA,MAC3C,MAAM,MAAA,CAAO,CAAC,EAAA,KAAO,EAAA,CAAG,KAAK,cAAe;AAAA,KAC9C,GACA,CAAC,KAAK,CAAA;AAEZ,IAAA,MAAM,cAAc,OAAA,CAAQ,GAAA,CAAI,CAAC,GAAA,KAAQ,YAAA,CAAa,GAAG,CAAC,CAAA;AAC1D,IAAA,KAAA,CAAM,KAAK,WAAA,CAAY,MAAA,CAAO,OAAO,CAAA,CAAE,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,EACnD;AAEA,EAAA,OAAO,KAAA,CAAM,KAAK,IAAI,CAAA;AACxB;AAEA,SAAS,aAAa,KAAA,EAA6D;AACjF,EAAA,MAAM,WAAA,GAAc,CAAA;AACpB,EAAA,MAAM,OAAA,uBAA8D,GAAA,EAAI;AACxE,EAAA,MAAM,YAAsB,EAAC;AAE7B,EAAA,KAAA,MAAW,EAAE,CAAA,EAAG,CAAA,EAAG,GAAA,MAAS,KAAA,EAAO;AACjC,IAAA,IAAI,SAAA;AACJ,IAAA,KAAA,MAAW,OAAO,SAAA,EAAW;AAC3B,MAAA,IAAI,IAAA,CAAK,GAAA,CAAI,GAAA,GAAM,CAAC,KAAK,WAAA,EAAa;AACpC,QAAA,SAAA,GAAY,GAAA;AACZ,QAAA;AAAA,MACF;AAAA,IACF;AACA,IAAA,IAAI,cAAc,MAAA,EAAW;AAC3B,MAAA,SAAA,GAAY,CAAA;AACZ,MAAA,SAAA,CAAU,KAAK,CAAC,CAAA;AAChB,MAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,EAAG,EAAE,CAAA;AAAA,IACnB;AACA,IAAA,OAAA,CAAQ,IAAI,SAAS,CAAA,CAAG,KAAK,EAAE,CAAA,EAAG,KAAK,CAAA;AAAA,EACzC;AAGA,EAAA,SAAA,CAAU,IAAA,CAAK,CAAC,CAAA,EAAG,CAAA,KAAM,IAAI,CAAC,CAAA;AAE9B,EAAA,OAAO,SAAA,CACJ,GAAA;AAAA,IAAI,CAAC,GAAA,KAAA,CACH,OAAA,CAAQ,GAAA,CAAI,GAAG,CAAA,IAAK,EAAC,EACnB,IAAA,CAAK,CAAC,CAAA,EAAG,CAAA,KAAM,EAAE,CAAA,GAAI,CAAA,CAAE,CAAC,CAAA,CACxB,GAAA,CAAI,CAAC,EAAA,KAAO,EAAA,CAAG,GAAG,CAAA,CAClB,IAAA,CAAK,GAAG,CAAA,CACR,OAAA,CAAQ,WAAA,EAAa,GAAG,EACxB,IAAA;AAAK,GACV,CACC,MAAA,CAAO,OAAO,CAAA,CACd,KAAK,IAAI,CAAA;AACd","file":"index.js","sourcesContent":["/**\n * Extract plain text from a PDF buffer.\n *\n * Requires `pdfjs-dist` to be installed (optional peerDependency):\n * npm install pdfjs-dist\n *\n * @param data - Raw PDF bytes as Uint8Array or ArrayBuffer\n * @returns Extracted text, ready to pass as `resumeText` to analyzeResume\n */\nexport async function extractTextFromPDF(\n data: Uint8Array | ArrayBuffer\n): Promise<string> {\n // ponytail: lazy import keeps core zero-dep; missing peer throws with clear message\n let pdfjsLib: typeof import(\"pdfjs-dist\");\n try {\n pdfjsLib = await import(\"pdfjs-dist\");\n } catch {\n throw new Error(\n \"pdfjs-dist is required for PDF extraction. Install it: npm install pdfjs-dist\"\n );\n }\n\n const bytes =\n data instanceof ArrayBuffer ? new Uint8Array(data) : data;\n\n const doc = await pdfjsLib.getDocument({ data: bytes }).promise;\n const pages: string[] = [];\n\n for (let i = 1; i <= doc.numPages; i++) {\n const page = await doc.getPage(i);\n const content = await page.getTextContent();\n\n type RawItem = { x: number; y: number; str: string };\n const items: RawItem[] = [];\n\n for (const item of content.items) {\n if (!(\"str\" in item) || !item.str.trim()) continue;\n const transform: number[] | undefined = Array.isArray(\n (item as { transform?: number[] }).transform\n )\n ? (item as { transform: number[] }).transform\n : undefined;\n\n if (!transform) {\n // No positional info (unit-test mocks) — treat as single-column item\n items.push({ x: 0, y: 0, str: item.str });\n } else {\n items.push({ x: transform[4], y: transform[5], str: item.str });\n }\n }\n\n // Detect column boundary: find the largest x-gap among item start positions.\n // If it exceeds COLUMN_GAP_THRESHOLD, split into left / right columns and\n // process each independently so headers in different columns don't merge.\n // ponytail: single largest-gap heuristic handles the common 2-column resume;\n // n-column needs k-means on x-distribution — upgrade if this proves insufficient.\n // Column boundary heuristic: the largest gap in item x-positions.\n // Real PDF column gutters show as a gap >>80px; normal word spacing is <50px.\n // ponytail: magic number calibrated to PranavRaut2026.pdf (104px gap); raise\n // if single-column PDFs with wide indentation start getting falsely split.\n const COLUMN_GAP_THRESHOLD = 80;\n const xPositions = [...new Set(items.map((it) => Math.round(it.x)))].sort(\n (a, b) => a - b\n );\n\n let columnBoundary: number | null = null;\n let maxGap = 0;\n for (let j = 1; j < xPositions.length; j++) {\n const gap = xPositions[j] - xPositions[j - 1];\n if (gap > maxGap) {\n maxGap = gap;\n columnBoundary = (xPositions[j - 1] + xPositions[j]) / 2;\n }\n }\n if (maxGap < COLUMN_GAP_THRESHOLD) columnBoundary = null;\n\n const columns =\n columnBoundary !== null\n ? [\n items.filter((it) => it.x < columnBoundary!),\n items.filter((it) => it.x >= columnBoundary!),\n ]\n : [items];\n\n const columnTexts = columns.map((col) => renderColumn(col));\n pages.push(columnTexts.filter(Boolean).join(\"\\n\"));\n }\n\n return pages.join(\"\\n\");\n}\n\nfunction renderColumn(items: Array<{ x: number; y: number; str: string }>): string {\n const Y_TOLERANCE = 2;\n const lineMap: Map<number, Array<{ x: number; str: string }>> = new Map();\n const lineOrder: number[] = [];\n\n for (const { x, y, str } of items) {\n let bucketKey: number | undefined;\n for (const key of lineOrder) {\n if (Math.abs(key - y) <= Y_TOLERANCE) {\n bucketKey = key;\n break;\n }\n }\n if (bucketKey === undefined) {\n bucketKey = y;\n lineOrder.push(y);\n lineMap.set(y, []);\n }\n lineMap.get(bucketKey)!.push({ x, str });\n }\n\n // pdfjs y=0 is bottom of page — sort descending so top comes first\n lineOrder.sort((a, b) => b - a);\n\n return lineOrder\n .map((key) =>\n (lineMap.get(key) ?? [])\n .sort((a, b) => a.x - b.x)\n .map((it) => it.str)\n .join(\" \")\n .replace(/[^\\S\\n]+/g, \" \")\n .trim()\n )\n .filter(Boolean)\n .join(\"\\n\");\n}\n"]}
@@ -0,0 +1,79 @@
1
+ // src/pdf/index.ts
2
+ async function extractTextFromPDF(data) {
3
+ let pdfjsLib;
4
+ try {
5
+ pdfjsLib = await import('pdfjs-dist');
6
+ } catch {
7
+ throw new Error(
8
+ "pdfjs-dist is required for PDF extraction. Install it: npm install pdfjs-dist"
9
+ );
10
+ }
11
+ const bytes = data instanceof ArrayBuffer ? new Uint8Array(data) : data;
12
+ const doc = await pdfjsLib.getDocument({ data: bytes }).promise;
13
+ const pages = [];
14
+ for (let i = 1; i <= doc.numPages; i++) {
15
+ const page = await doc.getPage(i);
16
+ const content = await page.getTextContent();
17
+ const items = [];
18
+ for (const item of content.items) {
19
+ if (!("str" in item) || !item.str.trim()) continue;
20
+ const transform = Array.isArray(
21
+ item.transform
22
+ ) ? item.transform : void 0;
23
+ if (!transform) {
24
+ items.push({ x: 0, y: 0, str: item.str });
25
+ } else {
26
+ items.push({ x: transform[4], y: transform[5], str: item.str });
27
+ }
28
+ }
29
+ const COLUMN_GAP_THRESHOLD = 80;
30
+ const xPositions = [...new Set(items.map((it) => Math.round(it.x)))].sort(
31
+ (a, b) => a - b
32
+ );
33
+ let columnBoundary = null;
34
+ let maxGap = 0;
35
+ for (let j = 1; j < xPositions.length; j++) {
36
+ const gap = xPositions[j] - xPositions[j - 1];
37
+ if (gap > maxGap) {
38
+ maxGap = gap;
39
+ columnBoundary = (xPositions[j - 1] + xPositions[j]) / 2;
40
+ }
41
+ }
42
+ if (maxGap < COLUMN_GAP_THRESHOLD) columnBoundary = null;
43
+ const columns = columnBoundary !== null ? [
44
+ items.filter((it) => it.x < columnBoundary),
45
+ items.filter((it) => it.x >= columnBoundary)
46
+ ] : [items];
47
+ const columnTexts = columns.map((col) => renderColumn(col));
48
+ pages.push(columnTexts.filter(Boolean).join("\n"));
49
+ }
50
+ return pages.join("\n");
51
+ }
52
+ function renderColumn(items) {
53
+ const Y_TOLERANCE = 2;
54
+ const lineMap = /* @__PURE__ */ new Map();
55
+ const lineOrder = [];
56
+ for (const { x, y, str } of items) {
57
+ let bucketKey;
58
+ for (const key of lineOrder) {
59
+ if (Math.abs(key - y) <= Y_TOLERANCE) {
60
+ bucketKey = key;
61
+ break;
62
+ }
63
+ }
64
+ if (bucketKey === void 0) {
65
+ bucketKey = y;
66
+ lineOrder.push(y);
67
+ lineMap.set(y, []);
68
+ }
69
+ lineMap.get(bucketKey).push({ x, str });
70
+ }
71
+ lineOrder.sort((a, b) => b - a);
72
+ return lineOrder.map(
73
+ (key) => (lineMap.get(key) ?? []).sort((a, b) => a.x - b.x).map((it) => it.str).join(" ").replace(/[^\S\n]+/g, " ").trim()
74
+ ).filter(Boolean).join("\n");
75
+ }
76
+
77
+ export { extractTextFromPDF };
78
+ //# sourceMappingURL=index.mjs.map
79
+ //# sourceMappingURL=index.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/pdf/index.ts"],"names":[],"mappings":";AASA,eAAsB,mBACpB,IAAA,EACiB;AAEjB,EAAA,IAAI,QAAA;AACJ,EAAA,IAAI;AACF,IAAA,QAAA,GAAW,MAAM,OAAO,YAAY,CAAA;AAAA,EACtC,CAAA,CAAA,MAAQ;AACN,IAAA,MAAM,IAAI,KAAA;AAAA,MACR;AAAA,KACF;AAAA,EACF;AAEA,EAAA,MAAM,QACJ,IAAA,YAAgB,WAAA,GAAc,IAAI,UAAA,CAAW,IAAI,CAAA,GAAI,IAAA;AAEvD,EAAA,MAAM,GAAA,GAAM,MAAM,QAAA,CAAS,WAAA,CAAY,EAAE,IAAA,EAAM,KAAA,EAAO,CAAA,CAAE,OAAA;AACxD,EAAA,MAAM,QAAkB,EAAC;AAEzB,EAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,IAAK,GAAA,CAAI,UAAU,CAAA,EAAA,EAAK;AACtC,IAAA,MAAM,IAAA,GAAO,MAAM,GAAA,CAAI,OAAA,CAAQ,CAAC,CAAA;AAChC,IAAA,MAAM,OAAA,GAAU,MAAM,IAAA,CAAK,cAAA,EAAe;AAG1C,IAAA,MAAM,QAAmB,EAAC;AAE1B,IAAA,KAAA,MAAW,IAAA,IAAQ,QAAQ,KAAA,EAAO;AAChC,MAAA,IAAI,EAAE,KAAA,IAAS,IAAA,CAAA,IAAS,CAAC,IAAA,CAAK,GAAA,CAAI,MAAK,EAAG;AAC1C,MAAA,MAAM,YAAkC,KAAA,CAAM,OAAA;AAAA,QAC3C,IAAA,CAAkC;AAAA,OACrC,GACK,KAAiC,SAAA,GAClC,MAAA;AAEJ,MAAA,IAAI,CAAC,SAAA,EAAW;AAEd,QAAA,KAAA,CAAM,IAAA,CAAK,EAAE,CAAA,EAAG,CAAA,EAAG,GAAG,CAAA,EAAG,GAAA,EAAK,IAAA,CAAK,GAAA,EAAK,CAAA;AAAA,MAC1C,CAAA,MAAO;AACL,QAAA,KAAA,CAAM,IAAA,CAAK,EAAE,CAAA,EAAG,SAAA,CAAU,CAAC,CAAA,EAAG,CAAA,EAAG,SAAA,CAAU,CAAC,CAAA,EAAG,GAAA,EAAK,IAAA,CAAK,KAAK,CAAA;AAAA,MAChE;AAAA,IACF;AAWA,IAAA,MAAM,oBAAA,GAAuB,EAAA;AAC7B,IAAA,MAAM,aAAa,CAAC,GAAG,IAAI,GAAA,CAAI,MAAM,GAAA,CAAI,CAAC,EAAA,KAAO,IAAA,CAAK,MAAM,EAAA,CAAG,CAAC,CAAC,CAAC,CAAC,CAAA,CAAE,IAAA;AAAA,MACnE,CAAC,CAAA,EAAG,CAAA,KAAM,CAAA,GAAI;AAAA,KAChB;AAEA,IAAA,IAAI,cAAA,GAAgC,IAAA;AACpC,IAAA,IAAI,MAAA,GAAS,CAAA;AACb,IAAA,KAAA,IAAS,CAAA,GAAI,CAAA,EAAG,CAAA,GAAI,UAAA,CAAW,QAAQ,CAAA,EAAA,EAAK;AAC1C,MAAA,MAAM,MAAM,UAAA,CAAW,CAAC,CAAA,GAAI,UAAA,CAAW,IAAI,CAAC,CAAA;AAC5C,MAAA,IAAI,MAAM,MAAA,EAAQ;AAChB,QAAA,MAAA,GAAS,GAAA;AACT,QAAA,cAAA,GAAA,CAAkB,WAAW,CAAA,GAAI,CAAC,CAAA,GAAI,UAAA,CAAW,CAAC,CAAA,IAAK,CAAA;AAAA,MACzD;AAAA,IACF;AACA,IAAA,IAAI,MAAA,GAAS,sBAAsB,cAAA,GAAiB,IAAA;AAEpD,IAAA,MAAM,OAAA,GACJ,mBAAmB,IAAA,GACf;AAAA,MACE,MAAM,MAAA,CAAO,CAAC,EAAA,KAAO,EAAA,CAAG,IAAI,cAAe,CAAA;AAAA,MAC3C,MAAM,MAAA,CAAO,CAAC,EAAA,KAAO,EAAA,CAAG,KAAK,cAAe;AAAA,KAC9C,GACA,CAAC,KAAK,CAAA;AAEZ,IAAA,MAAM,cAAc,OAAA,CAAQ,GAAA,CAAI,CAAC,GAAA,KAAQ,YAAA,CAAa,GAAG,CAAC,CAAA;AAC1D,IAAA,KAAA,CAAM,KAAK,WAAA,CAAY,MAAA,CAAO,OAAO,CAAA,CAAE,IAAA,CAAK,IAAI,CAAC,CAAA;AAAA,EACnD;AAEA,EAAA,OAAO,KAAA,CAAM,KAAK,IAAI,CAAA;AACxB;AAEA,SAAS,aAAa,KAAA,EAA6D;AACjF,EAAA,MAAM,WAAA,GAAc,CAAA;AACpB,EAAA,MAAM,OAAA,uBAA8D,GAAA,EAAI;AACxE,EAAA,MAAM,YAAsB,EAAC;AAE7B,EAAA,KAAA,MAAW,EAAE,CAAA,EAAG,CAAA,EAAG,GAAA,MAAS,KAAA,EAAO;AACjC,IAAA,IAAI,SAAA;AACJ,IAAA,KAAA,MAAW,OAAO,SAAA,EAAW;AAC3B,MAAA,IAAI,IAAA,CAAK,GAAA,CAAI,GAAA,GAAM,CAAC,KAAK,WAAA,EAAa;AACpC,QAAA,SAAA,GAAY,GAAA;AACZ,QAAA;AAAA,MACF;AAAA,IACF;AACA,IAAA,IAAI,cAAc,MAAA,EAAW;AAC3B,MAAA,SAAA,GAAY,CAAA;AACZ,MAAA,SAAA,CAAU,KAAK,CAAC,CAAA;AAChB,MAAA,OAAA,CAAQ,GAAA,CAAI,CAAA,EAAG,EAAE,CAAA;AAAA,IACnB;AACA,IAAA,OAAA,CAAQ,IAAI,SAAS,CAAA,CAAG,KAAK,EAAE,CAAA,EAAG,KAAK,CAAA;AAAA,EACzC;AAGA,EAAA,SAAA,CAAU,IAAA,CAAK,CAAC,CAAA,EAAG,CAAA,KAAM,IAAI,CAAC,CAAA;AAE9B,EAAA,OAAO,SAAA,CACJ,GAAA;AAAA,IAAI,CAAC,GAAA,KAAA,CACH,OAAA,CAAQ,GAAA,CAAI,GAAG,CAAA,IAAK,EAAC,EACnB,IAAA,CAAK,CAAC,CAAA,EAAG,CAAA,KAAM,EAAE,CAAA,GAAI,CAAA,CAAE,CAAC,CAAA,CACxB,GAAA,CAAI,CAAC,EAAA,KAAO,EAAA,CAAG,GAAG,CAAA,CAClB,IAAA,CAAK,GAAG,CAAA,CACR,OAAA,CAAQ,WAAA,EAAa,GAAG,EACxB,IAAA;AAAK,GACV,CACC,MAAA,CAAO,OAAO,CAAA,CACd,KAAK,IAAI,CAAA;AACd","file":"index.mjs","sourcesContent":["/**\n * Extract plain text from a PDF buffer.\n *\n * Requires `pdfjs-dist` to be installed (optional peerDependency):\n * npm install pdfjs-dist\n *\n * @param data - Raw PDF bytes as Uint8Array or ArrayBuffer\n * @returns Extracted text, ready to pass as `resumeText` to analyzeResume\n */\nexport async function extractTextFromPDF(\n data: Uint8Array | ArrayBuffer\n): Promise<string> {\n // ponytail: lazy import keeps core zero-dep; missing peer throws with clear message\n let pdfjsLib: typeof import(\"pdfjs-dist\");\n try {\n pdfjsLib = await import(\"pdfjs-dist\");\n } catch {\n throw new Error(\n \"pdfjs-dist is required for PDF extraction. Install it: npm install pdfjs-dist\"\n );\n }\n\n const bytes =\n data instanceof ArrayBuffer ? new Uint8Array(data) : data;\n\n const doc = await pdfjsLib.getDocument({ data: bytes }).promise;\n const pages: string[] = [];\n\n for (let i = 1; i <= doc.numPages; i++) {\n const page = await doc.getPage(i);\n const content = await page.getTextContent();\n\n type RawItem = { x: number; y: number; str: string };\n const items: RawItem[] = [];\n\n for (const item of content.items) {\n if (!(\"str\" in item) || !item.str.trim()) continue;\n const transform: number[] | undefined = Array.isArray(\n (item as { transform?: number[] }).transform\n )\n ? (item as { transform: number[] }).transform\n : undefined;\n\n if (!transform) {\n // No positional info (unit-test mocks) — treat as single-column item\n items.push({ x: 0, y: 0, str: item.str });\n } else {\n items.push({ x: transform[4], y: transform[5], str: item.str });\n }\n }\n\n // Detect column boundary: find the largest x-gap among item start positions.\n // If it exceeds COLUMN_GAP_THRESHOLD, split into left / right columns and\n // process each independently so headers in different columns don't merge.\n // ponytail: single largest-gap heuristic handles the common 2-column resume;\n // n-column needs k-means on x-distribution — upgrade if this proves insufficient.\n // Column boundary heuristic: the largest gap in item x-positions.\n // Real PDF column gutters show as a gap >>80px; normal word spacing is <50px.\n // ponytail: magic number calibrated to PranavRaut2026.pdf (104px gap); raise\n // if single-column PDFs with wide indentation start getting falsely split.\n const COLUMN_GAP_THRESHOLD = 80;\n const xPositions = [...new Set(items.map((it) => Math.round(it.x)))].sort(\n (a, b) => a - b\n );\n\n let columnBoundary: number | null = null;\n let maxGap = 0;\n for (let j = 1; j < xPositions.length; j++) {\n const gap = xPositions[j] - xPositions[j - 1];\n if (gap > maxGap) {\n maxGap = gap;\n columnBoundary = (xPositions[j - 1] + xPositions[j]) / 2;\n }\n }\n if (maxGap < COLUMN_GAP_THRESHOLD) columnBoundary = null;\n\n const columns =\n columnBoundary !== null\n ? [\n items.filter((it) => it.x < columnBoundary!),\n items.filter((it) => it.x >= columnBoundary!),\n ]\n : [items];\n\n const columnTexts = columns.map((col) => renderColumn(col));\n pages.push(columnTexts.filter(Boolean).join(\"\\n\"));\n }\n\n return pages.join(\"\\n\");\n}\n\nfunction renderColumn(items: Array<{ x: number; y: number; str: string }>): string {\n const Y_TOLERANCE = 2;\n const lineMap: Map<number, Array<{ x: number; str: string }>> = new Map();\n const lineOrder: number[] = [];\n\n for (const { x, y, str } of items) {\n let bucketKey: number | undefined;\n for (const key of lineOrder) {\n if (Math.abs(key - y) <= Y_TOLERANCE) {\n bucketKey = key;\n break;\n }\n }\n if (bucketKey === undefined) {\n bucketKey = y;\n lineOrder.push(y);\n lineMap.set(y, []);\n }\n lineMap.get(bucketKey)!.push({ x, str });\n }\n\n // pdfjs y=0 is bottom of page — sort descending so top comes first\n lineOrder.sort((a, b) => b - a);\n\n return lineOrder\n .map((key) =>\n (lineMap.get(key) ?? [])\n .sort((a, b) => a.x - b.x)\n .map((it) => it.str)\n .join(\" \")\n .replace(/[^\\S\\n]+/g, \" \")\n .trim()\n )\n .filter(Boolean)\n .join(\"\\n\");\n}\n"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pranavraut033/ats-checker",
3
- "version": "1.1.0",
3
+ "version": "1.2.0",
4
4
  "description": "Deterministic, configurable ATS (Applicant Tracking System) compatibility checker with no external dependencies. Analyze resumes, generate scores, and get actionable suggestions.",
5
5
  "license": "MIT",
6
6
  "author": {
@@ -34,6 +34,20 @@
34
34
  "import": "./dist/index.mjs",
35
35
  "require": "./dist/index.cjs",
36
36
  "default": "./dist/index.mjs"
37
+ },
38
+ "./pdf": {
39
+ "types": "./dist/pdf/index.d.ts",
40
+ "import": "./dist/pdf/index.mjs",
41
+ "require": "./dist/pdf/index.cjs",
42
+ "default": "./dist/pdf/index.mjs"
43
+ }
44
+ },
45
+ "peerDependencies": {
46
+ "pdfjs-dist": ">=4"
47
+ },
48
+ "peerDependenciesMeta": {
49
+ "pdfjs-dist": {
50
+ "optional": true
37
51
  }
38
52
  },
39
53
  "sideEffects": false,
@@ -56,8 +70,8 @@
56
70
  ],
57
71
  "scripts": {
58
72
  "build": "tsup && npm run build:ui",
59
- "build:ui": "mkdir -p ui/public/dist && cp dist/index.mjs ui/public/dist/ && cp dist/index.d.ts ui/public/dist/",
60
- "serve": "npx http-server ui/public -p 3005",
73
+ "build:ui": "rm -rf ui/public/dist && cp -r dist ui/public/dist",
74
+ "serve": "npx http-server ui/public -p ${PORT:-3005}",
61
75
  "dev": "npm run serve",
62
76
  "test": "vitest run",
63
77
  "test:watch": "vitest",
@@ -71,8 +85,9 @@
71
85
  "@types/node": "^25.0.3",
72
86
  "@vitest/ui": "^4.0.16",
73
87
  "express": "^4.18.2",
74
- "tsx": "^4.7.0",
88
+ "pdfjs-dist": "^4.10.38",
75
89
  "tsup": "^8.3.0",
90
+ "tsx": "^4.7.0",
76
91
  "typescript": "^5.6.3",
77
92
  "vitest": "^4.0.16"
78
93
  }