job-forge 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/.codex/config.toml +8 -0
  2. package/.cursor/mcp.json +21 -0
  3. package/.cursor/rules/main.mdc +519 -0
  4. package/.mcp.json +21 -0
  5. package/.opencode/agents/general-free.md +85 -0
  6. package/.opencode/agents/general-paid.md +39 -0
  7. package/.opencode/agents/glm-minimal.md +50 -0
  8. package/.opencode/skills/job-forge.md +185 -0
  9. package/AGENTS.md +514 -0
  10. package/CLAUDE.md +514 -0
  11. package/LICENSE +21 -0
  12. package/README.md +195 -0
  13. package/batch/README.md +60 -0
  14. package/batch/batch-prompt.md +399 -0
  15. package/batch/batch-runner.sh +673 -0
  16. package/bin/create-job-forge.mjs +375 -0
  17. package/bin/job-forge.mjs +120 -0
  18. package/bin/sync.mjs +141 -0
  19. package/config/profile.example.yml +67 -0
  20. package/cv-sync-check.mjs +128 -0
  21. package/dedup-tracker.mjs +201 -0
  22. package/docs/ARCHITECTURE.md +220 -0
  23. package/docs/CUSTOMIZATION.md +101 -0
  24. package/docs/MODEL-ROUTING.md +195 -0
  25. package/docs/README.md +54 -0
  26. package/docs/SETUP.md +186 -0
  27. package/docs/demo.gif +0 -0
  28. package/fonts/dm-sans-latin-ext.woff2 +0 -0
  29. package/fonts/dm-sans-latin.woff2 +0 -0
  30. package/fonts/space-grotesk-latin-ext.woff2 +0 -0
  31. package/fonts/space-grotesk-latin.woff2 +0 -0
  32. package/generate-pdf.mjs +168 -0
  33. package/iso/agents/general-free.md +90 -0
  34. package/iso/agents/general-paid.md +44 -0
  35. package/iso/agents/glm-minimal.md +55 -0
  36. package/iso/commands/job-forge.md +188 -0
  37. package/iso/config.json +7 -0
  38. package/iso/instructions.md +514 -0
  39. package/iso/mcp.json +15 -0
  40. package/merge-tracker.mjs +377 -0
  41. package/modes/README.md +30 -0
  42. package/modes/_shared-calibration.md +26 -0
  43. package/modes/_shared.md +272 -0
  44. package/modes/apply.md +257 -0
  45. package/modes/auto-pipeline.md +70 -0
  46. package/modes/batch.md +110 -0
  47. package/modes/compare.md +23 -0
  48. package/modes/contact.md +82 -0
  49. package/modes/deep.md +99 -0
  50. package/modes/followup.md +68 -0
  51. package/modes/negotiation.md +146 -0
  52. package/modes/offer.md +199 -0
  53. package/modes/pdf.md +121 -0
  54. package/modes/pipeline.md +83 -0
  55. package/modes/project.md +30 -0
  56. package/modes/rejection.md +92 -0
  57. package/modes/scan.md +185 -0
  58. package/modes/tracker.md +31 -0
  59. package/modes/training.md +27 -0
  60. package/normalize-statuses.mjs +152 -0
  61. package/opencode.json +28 -0
  62. package/package.json +78 -0
  63. package/scripts/add-tags.mjs +894 -0
  64. package/scripts/cursor-agent-loop.sh +211 -0
  65. package/scripts/cursor-agent-stream-format.py +134 -0
  66. package/scripts/next-num.mjs +33 -0
  67. package/scripts/release/check-source.mjs +37 -0
  68. package/scripts/render-report-header.mjs +78 -0
  69. package/scripts/session-report.mjs +129 -0
  70. package/scripts/slugify.mjs +27 -0
  71. package/scripts/today.mjs +20 -0
  72. package/scripts/token-usage-report.mjs +315 -0
  73. package/scripts/tracker-line.mjs +67 -0
  74. package/scripts/verify-greenhouse-urls.mjs +195 -0
  75. package/templates/cv-template.html +395 -0
  76. package/templates/portals.example.yml +3140 -0
  77. package/templates/states.yml +62 -0
  78. package/tracker-lib.mjs +257 -0
  79. package/verify-pipeline.mjs +267 -0
@@ -0,0 +1,315 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * token-usage-report.mjs — Query opencode's SQLite DB for per-session token usage.
4
+ *
5
+ * Outputs a day-by-day breakdown of sessions, tokens, cost, and model usage.
6
+ * Helps identify which sessions / models are consuming the most tokens.
7
+ *
8
+ * Usage:
9
+ * node scripts/token-usage-report.mjs # last 7 days
10
+ * node scripts/token-usage-report.mjs --days 1 # today only
11
+ * node scripts/token-usage-report.mjs --days 30 # last 30 days
12
+ * node scripts/token-usage-report.mjs --tsv # TSV output for data/token-usage.tsv
13
+ * node scripts/token-usage-report.mjs --session <id> # drill into one session
14
+ */
15
+
16
+ import { execSync } from 'child_process';
17
+ import { writeFileSync, existsSync } from 'fs';
18
+ import { join } from 'path';
19
+
20
+ // Consumer's project dir (for locating data/token-usage.tsv).
21
+ const PROJECT_DIR = process.env.JOB_FORGE_PROJECT || process.cwd();
22
+ const OPENCODE = process.env.OPENCODE_BIN || '/Users/charlie/.opencode/bin/opencode';
23
+
24
+ // ---------- CLI args ----------
25
+
26
+ const args = process.argv.slice(2);
27
+ function flag(name) { return args.includes(`--${name}`); }
28
+ function flagVal(name) {
29
+ const idx = args.indexOf(`--${name}`);
30
+ return idx >= 0 && idx + 1 < args.length ? args[idx + 1] : null;
31
+ }
32
+
33
+ const DAYS = parseInt(flagVal('days') || '7', 10);
34
+ const TSV_MODE = flag('tsv');
35
+ const SESSION_ID = flagVal('session');
36
+ const APPEND_LOG = flag('append');
37
+
38
+ // ---------- Helpers ----------
39
+
40
+ function query(sql) {
41
+ const cmd = `cd "${PROJECT_DIR}" && "${OPENCODE}" db "${sql.replace(/"/g, '\\"')}" --format json 2>/dev/null`;
42
+ try {
43
+ const out = execSync(cmd, { encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024 }).trim();
44
+ return out ? JSON.parse(out) : [];
45
+ } catch {
46
+ return [];
47
+ }
48
+ }
49
+
50
+ function fmtNum(n) {
51
+ if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
52
+ if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
53
+ return String(n);
54
+ }
55
+
56
+ function fmtCost(n) { return `$${n.toFixed(4)}`; }
57
+
58
+ function epochToDate(ms) {
59
+ return new Date(ms).toISOString().slice(0, 10);
60
+ }
61
+
62
+ function epochToTime(ms) {
63
+ return new Date(ms).toISOString().slice(11, 19);
64
+ }
65
+
66
+ // ---------- Queries ----------
67
+
68
+ const cutoffMs = Date.now() - DAYS * 86400000;
69
+
70
+ if (SESSION_ID) {
71
+ // Drill into one session: show per-message token breakdown
72
+ const messages = query(`
73
+ SELECT
74
+ json_extract(data, '$.tokens.input') as input_tokens,
75
+ json_extract(data, '$.tokens.output') as output_tokens,
76
+ json_extract(data, '$.tokens.cache.read') as cache_read,
77
+ json_extract(data, '$.tokens.cache.write') as cache_write,
78
+ json_extract(data, '$.cost') as cost,
79
+ json_extract(data, '$.modelID') as model,
80
+ json_extract(data, '$.providerID') as provider,
81
+ time_created
82
+ FROM message
83
+ WHERE session_id = '${SESSION_ID}'
84
+ AND json_extract(data, '$.tokens.input') > 0
85
+ ORDER BY time_created ASC
86
+ `);
87
+
88
+ const session = query(`SELECT title, time_created FROM session WHERE id = '${SESSION_ID}'`);
89
+ const title = session[0]?.title || SESSION_ID;
90
+ const created = session[0]?.time_created ? epochToDate(session[0].time_created) : '?';
91
+
92
+ console.log(`\nSession: ${title}`);
93
+ console.log(`Date: ${created} | Messages with tokens: ${messages.length}`);
94
+ console.log('─'.repeat(110));
95
+ console.log(
96
+ 'Time'.padEnd(10),
97
+ 'Model'.padEnd(25),
98
+ 'Input'.padStart(10),
99
+ 'Output'.padStart(10),
100
+ 'CacheRd'.padStart(10),
101
+ 'CacheWr'.padStart(10),
102
+ 'Cost'.padStart(10),
103
+ 'CumCost'.padStart(10),
104
+ );
105
+ console.log('─'.repeat(110));
106
+
107
+ let cumCost = 0;
108
+ for (const m of messages) {
109
+ cumCost += m.cost || 0;
110
+ console.log(
111
+ epochToTime(m.time_created).padEnd(10),
112
+ `${m.provider}/${m.model}`.padEnd(25),
113
+ fmtNum(m.input_tokens || 0).padStart(10),
114
+ fmtNum(m.output_tokens || 0).padStart(10),
115
+ fmtNum(m.cache_read || 0).padStart(10),
116
+ fmtNum(m.cache_write || 0).padStart(10),
117
+ fmtCost(m.cost || 0).padStart(10),
118
+ fmtCost(cumCost).padStart(10),
119
+ );
120
+ }
121
+
122
+ const totals = messages.reduce((acc, m) => {
123
+ acc.input += m.input_tokens || 0;
124
+ acc.output += m.output_tokens || 0;
125
+ acc.cacheRead += m.cache_read || 0;
126
+ acc.cacheWrite += m.cache_write || 0;
127
+ acc.cost += m.cost || 0;
128
+ return acc;
129
+ }, { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, cost: 0 });
130
+
131
+ console.log('─'.repeat(110));
132
+ console.log(
133
+ 'TOTAL'.padEnd(10),
134
+ ''.padEnd(25),
135
+ fmtNum(totals.input).padStart(10),
136
+ fmtNum(totals.output).padStart(10),
137
+ fmtNum(totals.cacheRead).padStart(10),
138
+ fmtNum(totals.cacheWrite).padStart(10),
139
+ fmtCost(totals.cost).padStart(10),
140
+ );
141
+ process.exit(0);
142
+ }
143
+
144
+ // ---------- Day-by-day summary ----------
145
+
146
+ const sessions = query(`
147
+ SELECT
148
+ s.id,
149
+ s.title,
150
+ s.time_created,
151
+ SUM(json_extract(m.data, '$.tokens.input')) as total_input,
152
+ SUM(json_extract(m.data, '$.tokens.output')) as total_output,
153
+ SUM(json_extract(m.data, '$.tokens.cache.read')) as total_cache_read,
154
+ SUM(json_extract(m.data, '$.tokens.cache.write')) as total_cache_write,
155
+ SUM(json_extract(m.data, '$.cost')) as total_cost,
156
+ COUNT(CASE WHEN json_extract(m.data, '$.tokens.input') > 0 THEN 1 END) as msg_count,
157
+ GROUP_CONCAT(DISTINCT json_extract(m.data, '$.modelID')) as models
158
+ FROM session s
159
+ JOIN message m ON m.session_id = s.id
160
+ WHERE s.time_created >= ${cutoffMs}
161
+ AND json_extract(m.data, '$.role') = 'assistant'
162
+ GROUP BY s.id
163
+ ORDER BY s.time_created DESC
164
+ `);
165
+
166
+ if (TSV_MODE || APPEND_LOG) {
167
+ // TSV output: one row per session
168
+ const header = 'date\ttime\tsession_id\ttitle\tmodels\tmessages\tinput_tokens\toutput_tokens\tcache_read\tcache_write\tcost';
169
+ const rows = sessions.map(s => [
170
+ epochToDate(s.time_created),
171
+ epochToTime(s.time_created),
172
+ s.id,
173
+ (s.title || '').replace(/\t/g, ' '),
174
+ (s.models || '').replace(/\t/g, ' '),
175
+ s.msg_count || 0,
176
+ s.total_input || 0,
177
+ s.total_output || 0,
178
+ s.total_cache_read || 0,
179
+ s.total_cache_write || 0,
180
+ (s.total_cost || 0).toFixed(4),
181
+ ].join('\t'));
182
+
183
+ if (APPEND_LOG) {
184
+ const logFile = join(PROJECT_DIR, 'data', 'token-usage.tsv');
185
+ const existing = existsSync(logFile) ? '' : header + '\n';
186
+ writeFileSync(logFile, existing + rows.join('\n') + '\n', { flag: 'a' });
187
+ console.log(`Appended ${rows.length} sessions to ${logFile}`);
188
+ } else {
189
+ console.log(header);
190
+ rows.forEach(r => console.log(r));
191
+ }
192
+ process.exit(0);
193
+ }
194
+
195
+ // ---------- Pretty print: group by day ----------
196
+
197
+ const byDay = new Map();
198
+ for (const s of sessions) {
199
+ const day = epochToDate(s.time_created);
200
+ if (!byDay.has(day)) byDay.set(day, []);
201
+ byDay.get(day).push(s);
202
+ }
203
+
204
+ for (const [day, daySessions] of byDay) {
205
+ const dayTotals = daySessions.reduce((acc, s) => {
206
+ acc.input += s.total_input || 0;
207
+ acc.output += s.total_output || 0;
208
+ acc.cacheRead += s.total_cache_read || 0;
209
+ acc.cacheWrite += s.total_cache_write || 0;
210
+ acc.cost += s.total_cost || 0;
211
+ acc.msgs += s.msg_count || 0;
212
+ return acc;
213
+ }, { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, cost: 0, msgs: 0 });
214
+
215
+ console.log(`\n${'═'.repeat(90)}`);
216
+ console.log(` ${day} | ${daySessions.length} sessions | ${dayTotals.msgs} messages | Input: ${fmtNum(dayTotals.input)} | CacheRd: ${fmtNum(dayTotals.cacheRead)} | Cost: ${fmtCost(dayTotals.cost)}`);
217
+ console.log('─'.repeat(90));
218
+ console.log(
219
+ ' Time'.padEnd(10),
220
+ 'Title'.padEnd(40),
221
+ 'Input'.padStart(8),
222
+ 'Output'.padStart(8),
223
+ 'CacheRd'.padStart(9),
224
+ 'Cost'.padStart(9),
225
+ 'Msgs'.padStart(6),
226
+ );
227
+ console.log('─'.repeat(90));
228
+
229
+ for (const s of daySessions) {
230
+ const title = (s.title || '(untitled)').slice(0, 38);
231
+ console.log(
232
+ ` ${epochToTime(s.time_created)}`.padEnd(10),
233
+ title.padEnd(40),
234
+ fmtNum(s.total_input || 0).padStart(8),
235
+ fmtNum(s.total_output || 0).padStart(8),
236
+ fmtNum(s.total_cache_read || 0).padStart(9),
237
+ fmtCost(s.total_cost || 0).padStart(9),
238
+ String(s.msg_count || 0).padStart(6),
239
+ );
240
+ }
241
+ }
242
+
243
+ // ---------- Grand totals ----------
244
+
245
+ const grand = sessions.reduce((acc, s) => {
246
+ acc.input += s.total_input || 0;
247
+ acc.output += s.total_output || 0;
248
+ acc.cacheRead += s.total_cache_read || 0;
249
+ acc.cacheWrite += s.total_cache_write || 0;
250
+ acc.cost += s.total_cost || 0;
251
+ acc.msgs += s.msg_count || 0;
252
+ return acc;
253
+ }, { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, cost: 0, msgs: 0 });
254
+
255
+ console.log(`\n${'═'.repeat(90)}`);
256
+ console.log(` TOTAL (${DAYS} days) | ${sessions.length} sessions | ${grand.msgs} messages`);
257
+ console.log(` Input: ${fmtNum(grand.input)} | Output: ${fmtNum(grand.output)} | CacheRd: ${fmtNum(grand.cacheRead)} | CacheWr: ${fmtNum(grand.cacheWrite)}`);
258
+ console.log(` Cost: ${fmtCost(grand.cost)}`);
259
+ console.log('═'.repeat(90));
260
+
261
+ // ---------- Model breakdown ----------
262
+
263
+ const modelStats = query(`
264
+ SELECT
265
+ json_extract(m.data, '$.providerID') || '/' || json_extract(m.data, '$.modelID') as model,
266
+ SUM(json_extract(m.data, '$.tokens.input')) as total_input,
267
+ SUM(json_extract(m.data, '$.tokens.output')) as total_output,
268
+ SUM(json_extract(m.data, '$.tokens.cache.read')) as total_cache_read,
269
+ SUM(json_extract(m.data, '$.cost')) as total_cost,
270
+ COUNT(*) as msg_count
271
+ FROM message m
272
+ JOIN session s ON s.id = m.session_id
273
+ WHERE s.time_created >= ${cutoffMs}
274
+ AND json_extract(m.data, '$.tokens.input') > 0
275
+ GROUP BY model
276
+ ORDER BY total_cost DESC
277
+ `);
278
+
279
+ if (modelStats.length) {
280
+ console.log(`\nModel breakdown:`);
281
+ console.log('─'.repeat(80));
282
+ console.log(
283
+ ' Model'.padEnd(35),
284
+ 'Input'.padStart(9),
285
+ 'Output'.padStart(9),
286
+ 'CacheRd'.padStart(9),
287
+ 'Cost'.padStart(10),
288
+ 'Msgs'.padStart(7),
289
+ );
290
+ console.log('─'.repeat(80));
291
+ for (const m of modelStats) {
292
+ console.log(
293
+ ` ${(m.model || '?').slice(0, 33)}`.padEnd(35),
294
+ fmtNum(m.total_input || 0).padStart(9),
295
+ fmtNum(m.total_output || 0).padStart(9),
296
+ fmtNum(m.total_cache_read || 0).padStart(9),
297
+ fmtCost(m.total_cost || 0).padStart(10),
298
+ String(m.msg_count || 0).padStart(7),
299
+ );
300
+ }
301
+ }
302
+
303
+ // ---------- Top sessions by cost ----------
304
+
305
+ const topSessions = sessions.slice().sort((a, b) => (b.total_cost || 0) - (a.total_cost || 0)).slice(0, 10);
306
+ if (topSessions.length) {
307
+ console.log(`\nTop ${topSessions.length} sessions by cost:`);
308
+ console.log('─'.repeat(90));
309
+ for (const s of topSessions) {
310
+ const title = (s.title || '(untitled)').slice(0, 50);
311
+ console.log(
312
+ ` ${fmtCost(s.total_cost || 0).padStart(9)} ${epochToDate(s.time_created)} ${title} (${fmtNum(s.total_input || 0)} in, ${s.msg_count} msgs)`,
313
+ );
314
+ }
315
+ }
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * tracker-line — emit a single 9-column TSV row for batch/tracker-additions/.
4
+ *
5
+ * Saves the agent from having to remember exact column order, delimiters,
6
+ * or the status-before-score TSV convention. Writes to stdout by default,
7
+ * or to batch/tracker-additions/{id}.tsv with --write.
8
+ *
9
+ * Column order matches merge-tracker.mjs:
10
+ * num \t date \t company \t role \t status \t score/5 \t pdf \t [num](reports/...) \t notes
11
+ *
12
+ * Usage:
13
+ * job-forge tracker-line \
14
+ * --num 521 --date 2026-04-15 \
15
+ * --company "Anthropic" --role "Manager, FDE" \
16
+ * --status Evaluated --score 4.2 \
17
+ * --pdf ✅ --slug anthropic-mgr-fde \
18
+ * --notes "Strong fit; founding team" \
19
+ * [--write]
20
+ *
21
+ * The --slug is used to build the report link; --num/--date/--slug together
22
+ * reproduce the canonical `reports/{num}-{slug}-{date}.md` path.
23
+ */
24
+
25
+ import { writeFileSync, mkdirSync, existsSync } from 'fs';
26
+ import { join } from 'path';
27
+
28
+ const PROJECT_DIR = process.env.JOB_FORGE_PROJECT || process.cwd();
29
+
30
+ function arg(name, required = false) {
31
+ const i = process.argv.indexOf(`--${name}`);
32
+ if (i < 0 || i + 1 >= process.argv.length) {
33
+ if (required) {
34
+ console.error(`missing --${name}`);
35
+ process.exit(2);
36
+ }
37
+ return '';
38
+ }
39
+ return process.argv[i + 1];
40
+ }
41
+
42
+ const num = arg('num', true);
43
+ const date = arg('date', true);
44
+ const company = arg('company', true);
45
+ const role = arg('role', true);
46
+ const status = arg('status', true);
47
+ const score = arg('score', true); // bare number like "4.2"; will be suffixed "/5"
48
+ const pdf = arg('pdf') || '❌';
49
+ const slug = arg('slug', true);
50
+ const notes = arg('notes') || '';
51
+ const write = process.argv.includes('--write');
52
+
53
+ const paddedNum = String(num).padStart(3, '0');
54
+ const reportLink = `[${num}](reports/${paddedNum}-${slug}-${date}.md)`;
55
+ const scoreField = score.includes('/') ? score : `${score}/5`;
56
+
57
+ const line = [num, date, company, role, status, scoreField, pdf, reportLink, notes].join('\t');
58
+
59
+ if (write) {
60
+ const dir = join(PROJECT_DIR, 'batch/tracker-additions');
61
+ if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
62
+ const path = join(dir, `${num}.tsv`);
63
+ writeFileSync(path, line + '\n', 'utf-8');
64
+ console.log(path);
65
+ } else {
66
+ console.log(line);
67
+ }
@@ -0,0 +1,195 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * verify-greenhouse-urls.mjs
5
+ * Reads portals.example.yml, extracts Greenhouse API URLs, Ashby URLs, and Lever URLs,
6
+ * then verifies each with a HEAD/GET request.
7
+ */
8
+
9
+ import { readFileSync } from 'node:fs';
10
+ import { resolve, dirname } from 'node:path';
11
+ import { fileURLToPath } from 'node:url';
12
+
13
+ const __dirname = dirname(fileURLToPath(import.meta.url));
14
+ const YAML_PATH = resolve(__dirname, '..', 'templates', 'portals.example.yml');
15
+
16
+ const TIMEOUT_MS = 5000;
17
+ const DELAY_MS = 200;
18
+
19
+ function sleep(ms) {
20
+ return new Promise(r => setTimeout(r, ms));
21
+ }
22
+
23
+ /**
24
+ * Parse the YAML file line-by-line to extract company entries with their URLs.
25
+ * We avoid pulling in a YAML library by doing simple regex extraction.
26
+ */
27
+ function parseEntries(content) {
28
+ const lines = content.split('\n');
29
+ const greenhouse = [];
30
+ const ashby = [];
31
+ const lever = [];
32
+
33
+ let currentName = null;
34
+
35
+ for (const line of lines) {
36
+ // Match "- name: Something"
37
+ const nameMatch = line.match(/^\s*-\s*name:\s*(.+)/);
38
+ if (nameMatch) {
39
+ currentName = nameMatch[1].trim();
40
+ }
41
+
42
+ // Greenhouse API URLs
43
+ const apiMatch = line.match(/^\s*api:\s*(https:\/\/boards-api\.greenhouse\.io\S+)/);
44
+ if (apiMatch && currentName) {
45
+ greenhouse.push({ name: currentName, url: apiMatch[1] });
46
+ }
47
+
48
+ // Ashby careers URLs
49
+ const ashbyMatch = line.match(/^\s*careers_url:\s*(https:\/\/jobs\.ashbyhq\.com\/\S+)/);
50
+ if (ashbyMatch && currentName) {
51
+ ashby.push({ name: currentName, url: ashbyMatch[1] });
52
+ }
53
+
54
+ // Lever careers URLs
55
+ const leverMatch = line.match(/^\s*careers_url:\s*(https:\/\/jobs\.lever\.co\/\S+)/);
56
+ if (leverMatch && currentName) {
57
+ lever.push({ name: currentName, url: leverMatch[1] });
58
+ }
59
+ }
60
+
61
+ return { greenhouse, ashby, lever };
62
+ }
63
+
64
+ async function checkUrl(entry) {
65
+ const controller = new AbortController();
66
+ const timer = setTimeout(() => controller.abort(), TIMEOUT_MS);
67
+
68
+ try {
69
+ const res = await fetch(entry.url, {
70
+ method: 'GET',
71
+ signal: controller.signal,
72
+ headers: { 'User-Agent': 'JobForge-URLVerifier/1.0' },
73
+ redirect: 'follow',
74
+ });
75
+ clearTimeout(timer);
76
+ return {
77
+ ...entry,
78
+ status: res.status,
79
+ pass: res.status >= 200 && res.status < 400,
80
+ };
81
+ } catch (err) {
82
+ clearTimeout(timer);
83
+ return {
84
+ ...entry,
85
+ status: err.name === 'AbortError' ? 'TIMEOUT' : `ERR: ${err.code || err.message}`,
86
+ pass: false,
87
+ };
88
+ }
89
+ }
90
+
91
+ function printTable(title, results) {
92
+ console.log(`\n${'='.repeat(100)}`);
93
+ console.log(` ${title} (${results.length} URLs)`);
94
+ console.log('='.repeat(100));
95
+
96
+ const nameW = 30;
97
+ const urlW = 55;
98
+ const statusW = 10;
99
+
100
+ console.log(
101
+ 'Company'.padEnd(nameW) +
102
+ 'URL'.padEnd(urlW) +
103
+ 'Status'.padEnd(statusW) +
104
+ 'Result'
105
+ );
106
+ console.log('-'.repeat(100));
107
+
108
+ let failCount = 0;
109
+ for (const r of results) {
110
+ const result = r.pass ? 'PASS' : 'FAIL';
111
+ if (!r.pass) failCount++;
112
+ const shortUrl = r.url.length > urlW - 2 ? r.url.slice(0, urlW - 5) + '...' : r.url;
113
+ console.log(
114
+ r.name.slice(0, nameW - 1).padEnd(nameW) +
115
+ shortUrl.padEnd(urlW) +
116
+ String(r.status).padEnd(statusW) +
117
+ result
118
+ );
119
+ }
120
+
121
+ console.log('-'.repeat(100));
122
+ console.log(`Total: ${results.length} | Passed: ${results.length - failCount} | Failed: ${failCount}`);
123
+
124
+ if (failCount > 0) {
125
+ console.log(`\nFailed URLs:`);
126
+ for (const r of results.filter(r => !r.pass)) {
127
+ console.log(` - ${r.name}: ${r.url} (${r.status})`);
128
+ }
129
+ }
130
+ }
131
+
132
+ async function main() {
133
+ const content = readFileSync(YAML_PATH, 'utf-8');
134
+ const { greenhouse, ashby, lever } = parseEntries(content);
135
+
136
+ // Deduplicate by URL
137
+ const dedup = (arr) => {
138
+ const seen = new Set();
139
+ return arr.filter(e => {
140
+ if (seen.has(e.url)) return false;
141
+ seen.add(e.url);
142
+ return true;
143
+ });
144
+ };
145
+
146
+ const ghUnique = dedup(greenhouse);
147
+ const ashbyUnique = dedup(ashby);
148
+ const leverUnique = dedup(lever);
149
+
150
+ console.log(`Found: ${ghUnique.length} Greenhouse APIs, ${ashbyUnique.length} Ashby URLs, ${leverUnique.length} Lever URLs`);
151
+
152
+ // --- Greenhouse (all) ---
153
+ console.log(`\nVerifying all ${ghUnique.length} Greenhouse API URLs...`);
154
+ const ghResults = [];
155
+ for (const entry of ghUnique) {
156
+ ghResults.push(await checkUrl(entry));
157
+ await sleep(DELAY_MS);
158
+ }
159
+ printTable('Greenhouse API URLs', ghResults);
160
+
161
+ // --- Ashby (sample of 20) ---
162
+ const ashbySample = ashbyUnique.slice(0, 20);
163
+ console.log(`\nVerifying ${ashbySample.length} Ashby URLs (sample)...`);
164
+ const ashbyResults = [];
165
+ for (const entry of ashbySample) {
166
+ ashbyResults.push(await checkUrl(entry));
167
+ await sleep(DELAY_MS);
168
+ }
169
+ printTable('Ashby URLs (sample of 20)', ashbyResults);
170
+
171
+ // --- Lever (sample of 10) ---
172
+ const leverSample = leverUnique.slice(0, 10);
173
+ console.log(`\nVerifying ${leverSample.length} Lever URLs (sample)...`);
174
+ const leverResults = [];
175
+ for (const entry of leverSample) {
176
+ leverResults.push(await checkUrl(entry));
177
+ await sleep(DELAY_MS);
178
+ }
179
+ printTable('Lever URLs (sample of 10)', leverResults);
180
+
181
+ // --- Summary ---
182
+ const allResults = [...ghResults, ...ashbyResults, ...leverResults];
183
+ const allFails = allResults.filter(r => !r.pass);
184
+ console.log(`\n${'='.repeat(100)}`);
185
+ console.log(`OVERALL SUMMARY: ${allResults.length} checked, ${allResults.length - allFails.length} passed, ${allFails.length} failed`);
186
+ if (allFails.length > 0) {
187
+ console.log('\nAll failures:');
188
+ for (const r of allFails) {
189
+ console.log(` [${r.status}] ${r.name} -- ${r.url}`);
190
+ }
191
+ }
192
+ console.log('');
193
+ }
194
+
195
+ main();