@forwardimpact/basecamp 2.5.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,210 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Extract text from PowerPoint (.pptx) slides.
4
+ *
5
+ * PPTX files are ZIP archives containing XML. This script extracts all text
6
+ * from each slide and outputs it as structured markdown with slide headings.
7
+ * Handles multiple files and outputs to stdout or a file.
8
+ *
9
+ * Usage:
10
+ * node scripts/extract-pptx.mjs <path-to-pptx>
11
+ * node scripts/extract-pptx.mjs <path-to-pptx> -o /tmp/extract.txt
12
+ * node scripts/extract-pptx.mjs file1.pptx file2.pptx
13
+ * node scripts/extract-pptx.mjs -h|--help
14
+ *
15
+ * No external dependencies — uses Node.js built-in modules only.
16
+ */
17
+
18
+ import { readFileSync, writeFileSync } from "node:fs";
19
+ import { basename } from "node:path";
20
+
21
+ const HELP = `extract-pptx — extract slide text from .pptx files
22
+
23
+ Usage:
24
+ node scripts/extract-pptx.mjs <file.pptx> [file2.pptx ...]
25
+ node scripts/extract-pptx.mjs <file.pptx> -o <output.txt>
26
+ node scripts/extract-pptx.mjs -h|--help
27
+
28
+ Options:
29
+ -o <path> Write output to file instead of stdout
30
+ -h, --help Show this help
31
+
32
+ Output: Markdown-formatted text with ## Slide N headings per slide.
33
+ Multiple files get # Deck: filename.pptx headings.`;
34
+
35
+ if (
36
+ process.argv.includes("-h") ||
37
+ process.argv.includes("--help") ||
38
+ process.argv.length < 3
39
+ ) {
40
+ console.log(HELP);
41
+ process.exit(process.argv.length < 3 ? 1 : 0);
42
+ }
43
+
44
+ // --- Parse arguments ---
45
+
46
+ const args = process.argv.slice(2);
47
+ let outputPath = null;
48
+ const files = [];
49
+
50
+ for (let i = 0; i < args.length; i++) {
51
+ if (args[i] === "-o" && i + 1 < args.length) {
52
+ outputPath = args[++i];
53
+ } else if (!args[i].startsWith("-")) {
54
+ files.push(args[i]);
55
+ }
56
+ }
57
+
58
+ if (files.length === 0) {
59
+ console.error("Error: no .pptx files provided");
60
+ process.exit(1);
61
+ }
62
+
63
+ // --- ZIP parsing (no dependencies) ---
64
+
65
+ /**
66
+ * Parse a ZIP file's central directory to extract file entries.
67
+ * @param {Buffer} buf
68
+ * @returns {Array<{name: string, offset: number, compressedSize: number, compressionMethod: number}>}
69
+ */
70
+ function parseZipEntries(buf) {
71
+ // Find End of Central Directory record (signature 0x06054b50)
72
+ let eocdOffset = -1;
73
+ for (let i = buf.length - 22; i >= 0; i--) {
74
+ if (
75
+ buf[i] === 0x50 &&
76
+ buf[i + 1] === 0x4b &&
77
+ buf[i + 2] === 0x05 &&
78
+ buf[i + 3] === 0x06
79
+ ) {
80
+ eocdOffset = i;
81
+ break;
82
+ }
83
+ }
84
+ if (eocdOffset === -1) throw new Error("Not a valid ZIP file");
85
+
86
+ const cdOffset = buf.readUInt32LE(eocdOffset + 16);
87
+ const cdEntries = buf.readUInt16LE(eocdOffset + 10);
88
+
89
+ const entries = [];
90
+ let pos = cdOffset;
91
+
92
+ for (let e = 0; e < cdEntries; e++) {
93
+ // Central directory file header signature: 0x02014b50
94
+ if (buf.readUInt32LE(pos) !== 0x02014b50) break;
95
+
96
+ const compressionMethod = buf.readUInt16LE(pos + 10);
97
+ const compressedSize = buf.readUInt32LE(pos + 20);
98
+ const nameLen = buf.readUInt16LE(pos + 28);
99
+ const extraLen = buf.readUInt16LE(pos + 30);
100
+ const commentLen = buf.readUInt16LE(pos + 32);
101
+ const localHeaderOffset = buf.readUInt32LE(pos + 42);
102
+ const name = buf.toString("utf8", pos + 46, pos + 46 + nameLen);
103
+
104
+ entries.push({
105
+ name,
106
+ offset: localHeaderOffset,
107
+ compressedSize,
108
+ compressionMethod,
109
+ });
110
+ pos += 46 + nameLen + extraLen + commentLen;
111
+ }
112
+
113
+ return entries;
114
+ }
115
+
116
+ const { inflateRawSync } = await import("node:zlib");
117
+
118
+ /**
119
+ * Read the uncompressed content of a ZIP entry.
120
+ * @param {Buffer} buf
121
+ * @param {{offset: number, compressedSize: number, compressionMethod: number}} entry
122
+ * @returns {Buffer}
123
+ */
124
+ function readEntry(buf, entry) {
125
+ const pos = entry.offset;
126
+ const nameLen = buf.readUInt16LE(pos + 26);
127
+ const extraLen = buf.readUInt16LE(pos + 28);
128
+ const dataStart = pos + 30 + nameLen + extraLen;
129
+ const raw = buf.subarray(dataStart, dataStart + entry.compressedSize);
130
+
131
+ if (entry.compressionMethod === 0) return raw;
132
+ if (entry.compressionMethod === 8) return inflateRawSync(raw);
133
+ throw new Error(
134
+ `Unsupported compression method ${entry.compressionMethod} for ${entry.name}`,
135
+ );
136
+ }
137
+
138
+ /**
139
+ * Extract all text content from slide XML using the DrawingML namespace.
140
+ * Matches <a:t>text</a:t> elements used by PowerPoint.
141
+ * @param {string} xml
142
+ * @returns {string[]}
143
+ */
144
+ function extractTextFromXml(xml) {
145
+ const texts = [];
146
+ const re = /<a:t>([^<]*)<\/a:t>/g;
147
+ let match;
148
+ while ((match = re.exec(xml)) !== null) {
149
+ const text = match[1].trim();
150
+ if (text) texts.push(text);
151
+ }
152
+ return texts;
153
+ }
154
+
155
+ /**
156
+ * Extract slide text from a .pptx file.
157
+ * @param {string} filePath
158
+ * @returns {string} Markdown-formatted slide text
159
+ */
160
+ function extractPptx(filePath) {
161
+ const buf = readFileSync(filePath);
162
+ const entries = parseZipEntries(buf);
163
+
164
+ // Find slide XML files and sort by slide number
165
+ const slideEntries = entries
166
+ .filter(
167
+ (e) => e.name.startsWith("ppt/slides/slide") && e.name.endsWith(".xml"),
168
+ )
169
+ .sort((a, b) => {
170
+ const numA = parseInt(a.name.match(/(\d+)/)?.[1] || "0", 10);
171
+ const numB = parseInt(b.name.match(/(\d+)/)?.[1] || "0", 10);
172
+ return numA - numB;
173
+ });
174
+
175
+ const lines = [];
176
+
177
+ for (const entry of slideEntries) {
178
+ const xml = readEntry(buf, entry).toString("utf8");
179
+ const texts = extractTextFromXml(xml);
180
+
181
+ if (texts.length > 0) {
182
+ const num = entry.name.match(/(\d+)/)?.[1] || "?";
183
+ lines.push(`## Slide ${num}`);
184
+ lines.push(texts.join("\n"));
185
+ lines.push("");
186
+ }
187
+ }
188
+
189
+ return lines.join("\n");
190
+ }
191
+
192
+ // --- Main ---
193
+
194
+ const outputs = [];
195
+
196
+ for (const file of files) {
197
+ if (files.length > 1) {
198
+ outputs.push(`# Deck: ${basename(file)}\n`);
199
+ }
200
+ outputs.push(extractPptx(file));
201
+ }
202
+
203
+ const result = outputs.join("\n");
204
+
205
+ if (outputPath) {
206
+ writeFileSync(outputPath, result);
207
+ console.log(`Extracted ${files.length} deck(s) → ${outputPath}`);
208
+ } else {
209
+ process.stdout.write(result);
210
+ }
@@ -36,6 +36,7 @@ Run this skill:
36
36
 
37
37
  - `knowledge/Candidates/{Full Name}/brief.md` — candidate profile note
38
38
  - `knowledge/Candidates/{Full Name}/CV.pdf` — local copy of CV (or `CV.docx`)
39
+ - `knowledge/Candidates/{Full Name}/headshot.jpeg` — candidate headshot photo
39
40
  - `~/.cache/fit/basecamp/state/graph_processed` — updated with processed threads
40
41
 
41
42
  ---
@@ -192,6 +193,48 @@ cp "~/.cache/fit/basecamp/apple_mail/attachments/{thread_id}/{filename}" \
192
193
  Use `CV.pdf` for PDF files and `CV.docx` for Word documents. The `## CV` link in
193
194
  the brief uses a relative path: `./CV.pdf`.
194
195
 
196
+ ### Headshot Discovery
197
+
198
+ Search two locations for candidate headshot photos:
199
+
200
+ 1. **Email attachments** —
201
+ `~/.cache/fit/basecamp/apple_mail/attachments/{thread_id}/` may contain
202
+ headshot images sent by recruiters alongside CVs. Look for `.jpg`, `.jpeg`,
203
+ or `.png` files with candidate name fragments in the filename or that are
204
+ clearly portrait photos (not logos, signatures, or email decorations like
205
+ `image001.png`).
206
+
207
+ 2. **Downloads folder** — search `~/Downloads/` recursively (including
208
+ subdirectories) for headshot images:
209
+
210
+ ```bash
211
+ find ~/Downloads -maxdepth 3 -type f \( -iname "*.jpg" -o -iname "*.jpeg" -o -iname "*.png" -o -iname "*.heic" \) 2>/dev/null
212
+ ```
213
+
214
+ Match images to candidates by name similarity in the filename (e.g.
215
+ `vitalii.jpeg` matches "Vitalii Huliai", `qazi.jpeg` matches "Qazi Rehman"). Use
216
+ first name, last name, or full name matching — case-insensitive.
217
+
218
+ When a headshot is found, copy it into the candidate directory with a
219
+ standardized name:
220
+
221
+ ```bash
222
+ cp "{source_path}" "knowledge/Candidates/{Full Name}/headshot.jpeg"
223
+ ```
224
+
225
+ Always use `headshot.jpeg` as the filename regardless of the source format. If
226
+ the source is PNG or HEIC, convert it first:
227
+
228
+ ```bash
229
+ # PNG to JPEG
230
+ magick "{source}.png" "knowledge/Candidates/{Full Name}/headshot.jpeg"
231
+ # HEIC to JPEG
232
+ magick "{source}.heic" "knowledge/Candidates/{Full Name}/headshot.jpeg"
233
+ ```
234
+
235
+ If headshots exist in both locations, prefer the Downloads folder version (more
236
+ likely to be a curated, high-quality photo).
237
+
195
238
  ## Step 3: Determine Pipeline Status
196
239
 
197
240
  Assign a status based on the email context:
@@ -430,3 +473,5 @@ produces a full framework-aligned assessment.
430
473
  - [ ] Skills tagged using framework skill IDs where possible
431
474
  - [ ] Gender field populated only from explicit pronouns/titles (never
432
475
  name-inferred)
476
+ - [ ] Headshots searched in email attachments and `~/Downloads/` (recursive)
477
+ - [ ] Found headshots copied as `headshot.jpeg` into candidate directory
@@ -70,56 +70,56 @@ detection.
70
70
 
71
71
  **New format** — stage-count summary (no HM/Recruiter/Location):
72
72
 
73
- | Row | Field | Example |
74
- | --- | ------------------------ | -------------------------------------- |
75
- | 1 | Title header | `4951493 Principal Software Engineer…` |
76
- | 2 | Active Candidates | `74 of 74` |
77
- | 3 | Active Referrals | `3 of 3` |
78
- | 4 | Active Internal | `4 of 4` |
79
- | 7+ | Stage counts | `56 → Considered` |
73
+ | Row | Field | Example |
74
+ | --- | ----------------- | -------------------------------------- |
75
+ | 1 | Title header | `4951493 Principal Software Engineer…` |
76
+ | 2 | Active Candidates | `74 of 74` |
77
+ | 3 | Active Referrals | `3 of 3` |
78
+ | 4 | Active Internal | `4 of 4` |
79
+ | 7+ | Stage counts | `56 → Considered` |
80
80
 
81
81
  ### Candidates Sheet
82
82
 
83
83
  The parser auto-detects the candidates sheet and header row:
84
- - **Old format**: 3+ sheets; candidates on "Candidates" sheet or Sheet3;
85
- header at row 3 (index 2); two "Job Application" columns
84
+
85
+ - **Old format**: 3+ sheets; candidates on "Candidates" sheet or Sheet3; header
86
+ at row 3 (index 2); two "Job Application" columns
86
87
  - **New format**: 2 sheets; candidates on Sheet2; header at row 8 (index 7);
87
88
  single "Job Application" column
88
89
 
89
90
  Column mapping is header-driven — the parser reads the header row and maps
90
- columns by name, not position. Columns that vary between exports (e.g.
91
- "Jobs Applied to", "Referred by", "Convenience Task") are handled
92
- automatically.
91
+ columns by name, not position. Columns that vary between exports (e.g. "Jobs
92
+ Applied to", "Referred by", "Convenience Task") are handled automatically.
93
93
 
94
94
  **Core columns** (present in all formats):
95
95
 
96
- | Header | Maps to brief field… |
97
- | ---------------------- | ------------------------ |
98
- | Job Application | `# {Name}` |
96
+ | Header | Maps to brief field… |
97
+ | ---------------------- | ---------------------------------------- |
98
+ | Job Application | `# {Name}` |
99
99
  | Stage | Row detection only (not used for status) |
100
- | Step / Disposition | **Workday step** → status derivation |
101
- | Resume | Reference only (no file) |
102
- | Date Applied | **First seen** |
103
- | Current Job Title | **Current title**, Title |
104
- | Current Company | **Current title** suffix |
105
- | Source | **Source** |
106
- | Referred by | **Source** suffix |
107
- | Candidate Location | **Location** |
108
- | Phone | **Phone** |
109
- | Email | **Email** |
110
- | Availability Date | **Availability** |
111
- | Visa Requirement | Notes |
112
- | Eligible to Work | Notes |
113
- | Relocation | Notes |
114
- | Salary Expectations | **Rate** |
115
- | Non-Compete | Notes |
116
- | Total Years Experience | Summary context |
117
- | All Job Titles | Work History context |
118
- | Companies | Work History context |
119
- | Degrees | Education |
120
- | Fields of Study | Education |
121
- | Language | **English** / Language |
122
- | Resume Text | `CV.md` content |
100
+ | Step / Disposition | **Workday step** → status derivation |
101
+ | Resume | Reference only (no file) |
102
+ | Date Applied | **First seen** |
103
+ | Current Job Title | **Current title**, Title |
104
+ | Current Company | **Current title** suffix |
105
+ | Source | **Source** |
106
+ | Referred by | **Source** suffix |
107
+ | Candidate Location | **Location** |
108
+ | Phone | **Phone** |
109
+ | Email | **Email** |
110
+ | Availability Date | **Availability** |
111
+ | Visa Requirement | Notes |
112
+ | Eligible to Work | Notes |
113
+ | Relocation | Notes |
114
+ | Salary Expectations | **Rate** |
115
+ | Non-Compete | Notes |
116
+ | Total Years Experience | Summary context |
117
+ | All Job Titles | Work History context |
118
+ | Companies | Work History context |
119
+ | Degrees | Education |
120
+ | Fields of Study | Education |
121
+ | Language | **English** / Language |
122
+ | Resume Text | `CV.md` content |
123
123
 
124
124
  #### Name Annotations
125
125
 
@@ -175,28 +175,28 @@ Use fuzzy matching — the Workday name may differ slightly from an existing not
175
175
 
176
176
  ## Step 3: Determine Pipeline Status
177
177
 
178
- Map the **Step / Disposition** column to the `track-candidates` pipeline
179
- status. Do NOT use the Stage column for status — it is only used for row
180
- detection (stop condition):
181
-
182
- | Workday Step / Disposition | Pipeline Status |
183
- | ------------------------------------------ | ------------------ |
184
- | `Considered` | `new` |
185
- | `Review` | `new` |
186
- | `Manager Resume Screen` | `screening` |
187
- | `Schedule Recruiter Phone Screen` | `screening` |
188
- | `Manager Request to Move Forward (HS)` | `screening` |
189
- | `Proposed Interview Slate` | `screening` |
190
- | `Assessment` | `screening` |
191
- | `Manager Request to Decline (HS)` | `rejected` |
192
- | `Interview` / `Phone Screen` | `first-interview` |
193
- | `Second Interview` | `second-interview` |
194
- | `Reference Check` | `second-interview` |
195
- | `Offer` | `offer` |
196
- | `Employment Agreement` | `offer` |
197
- | `Background Check` | `hired` |
198
- | `Ready for Hire` | `hired` |
199
- | `Rejected` / `Declined` | `rejected` |
178
+ Map the **Step / Disposition** column to the `track-candidates` pipeline status.
179
+ Do NOT use the Stage column for status — it is only used for row detection (stop
180
+ condition):
181
+
182
+ | Workday Step / Disposition | Pipeline Status |
183
+ | -------------------------------------- | ------------------ |
184
+ | `Considered` | `new` |
185
+ | `Review` | `new` |
186
+ | `Manager Resume Screen` | `screening` |
187
+ | `Schedule Recruiter Phone Screen` | `screening` |
188
+ | `Manager Request to Move Forward (HS)` | `screening` |
189
+ | `Proposed Interview Slate` | `screening` |
190
+ | `Assessment` | `screening` |
191
+ | `Manager Request to Decline (HS)` | `rejected` |
192
+ | `Interview` / `Phone Screen` | `first-interview` |
193
+ | `Second Interview` | `second-interview` |
194
+ | `Reference Check` | `second-interview` |
195
+ | `Offer` | `offer` |
196
+ | `Employment Agreement` | `offer` |
197
+ | `Background Check` | `hired` |
198
+ | `Ready for Hire` | `hired` |
199
+ | `Rejected` / `Declined` | `rejected` |
200
200
 
201
201
  If the step value is empty or not recognized, default to `new`.
202
202
 
@@ -122,11 +122,7 @@ const candRows = XLSX.utils.sheet_to_json(ws3, { header: 1, defval: "" });
122
122
  // Old format: row 3 (index 2). New format: row 8 (index 7).
123
123
  let HEADER_ROW = 2;
124
124
  for (let i = 0; i < Math.min(15, candRows.length); i++) {
125
- if (
126
- candRows[i].some(
127
- (c) => String(c).trim().toLowerCase() === "stage",
128
- )
129
- ) {
125
+ if (candRows[i].some((c) => String(c).trim().toLowerCase() === "stage")) {
130
126
  HEADER_ROW = i;
131
127
  break;
132
128
  }
@@ -99,8 +99,8 @@ cycle. The naming convention is `{agent}_triage.md`:
99
99
  - `recruiter_triage.md` — candidate pipeline, assessments, track distribution
100
100
  - `head_hunter_triage.md` — prospect pipeline, source rotation, match strength
101
101
 
102
- The **chief-of-staff** reads all five triage files to synthesize daily
103
- briefings in `knowledge/Briefings/`.
102
+ The **chief-of-staff** reads all five triage files to synthesize daily briefings
103
+ in `knowledge/Briefings/`.
104
104
 
105
105
  ## Cache Directory (`~/.cache/fit/basecamp/`)
106
106