@reconcrap/boss-recommend-mcp 2.0.7 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@reconcrap/boss-recommend-mcp",
3
- "version": "2.0.7",
3
+ "version": "2.0.9",
4
4
  "description": "Unified MCP pipeline for recommend-page filtering and screening on Boss Zhipin",
5
5
  "keywords": [
6
6
  "boss",
package/src/chat-mcp.js CHANGED
@@ -863,15 +863,24 @@ function getRunOptions(args, normalized, session, { workspaceRoot = "", configRe
863
863
  llmConfig: resolvedConfig.ok ? {
864
864
  ...resolvedConfig.config
865
865
  } : null,
866
- llmTimeoutMs: parsePositiveInteger(args.llm_timeout_ms, slowLive ? 180000 : 120000),
867
- llmImageLimit: parsePositiveInteger(args.llm_image_limit, 8),
868
- llmImageDetail: normalizeText(args.llm_image_detail) || "high",
866
+ llmTimeoutMs: parsePositiveInteger(
867
+ args.llm_timeout_ms,
868
+ parsePositiveInteger(resolvedConfig.config?.llmTimeoutMs || resolvedConfig.config?.timeoutMs, slowLive ? 180000 : 120000)
869
+ ),
870
+ llmImageLimit: parsePositiveInteger(
871
+ args.llm_image_limit,
872
+ parsePositiveInteger(resolvedConfig.config?.llmImageLimit || resolvedConfig.config?.imageLimit, 8)
873
+ ),
874
+ llmImageDetail: normalizeText(
875
+ args.llm_image_detail || resolvedConfig.config?.llmImageDetail || resolvedConfig.config?.imageDetail
876
+ ) || "low",
869
877
  screeningMode: normalizeScreeningModeArg(args),
870
878
  listMaxScrolls: parsePositiveInteger(args.list_max_scrolls, 200),
871
879
  listStableSignatureLimit: parsePositiveInteger(args.list_stable_signature_limit, 2),
872
880
  listWheelDeltaY: parsePositiveInteger(args.list_wheel_delta_y, 850),
873
881
  listSettleMs: parsePositiveInteger(args.list_settle_ms, slowLive ? 1800 : 1200),
874
882
  listFallbackPoint: null,
883
+ imageOutputDir: resolveBossConfiguredOutputDir("", getChatRunsDir()),
875
884
  name: "mcp-boss-chat-run"
876
885
  };
877
886
  }
@@ -0,0 +1,199 @@
1
+ import { htmlToText, normalizeText } from "../screening/index.js";
2
+
3
+ function uniqueTexts(values = []) {
4
+ return Array.from(new Set(values.map((value) => normalizeText(value)).filter(Boolean)));
5
+ }
6
+
7
+ function classList(value = "") {
8
+ return String(value || "").split(/\s+/).map((item) => item.trim()).filter(Boolean);
9
+ }
10
+
11
+ function hasAllClasses(classValue = "", requiredClasses = []) {
12
+ const classes = classList(classValue);
13
+ return requiredClasses.every((required) => classes.includes(required));
14
+ }
15
+
16
+ function findClassAttributeIndex(html = "", requiredClasses = [], startIndex = 0) {
17
+ const regex = /class=(["'])(.*?)\1/gi;
18
+ regex.lastIndex = Math.max(0, Number(startIndex) || 0);
19
+ let match;
20
+ while ((match = regex.exec(String(html || "")))) {
21
+ if (hasAllClasses(match[2], requiredClasses)) return match.index;
22
+ }
23
+ return -1;
24
+ }
25
+
26
+ function sectionByClasses(html = "", startClasses = [], endClassGroups = []) {
27
+ const source = String(html || "");
28
+ const classIndex = findClassAttributeIndex(source, startClasses);
29
+ if (classIndex < 0) return "";
30
+ const start = Math.max(0, source.lastIndexOf("<", classIndex));
31
+ let end = source.length;
32
+ for (const group of endClassGroups) {
33
+ const found = findClassAttributeIndex(source, group, classIndex + 1);
34
+ if (found >= 0) {
35
+ const tagStart = source.lastIndexOf("<", found);
36
+ end = Math.min(end, tagStart >= 0 ? tagStart : found);
37
+ }
38
+ }
39
+ return source.slice(start, end);
40
+ }
41
+
42
+ function textFromHtmlFragment(fragment = "") {
43
+ return normalizeText(htmlToText(fragment).replace(/\n+/g, " "));
44
+ }
45
+
46
+ function stripNameSuffixes(value = "") {
47
+ return normalizeText(value)
48
+ .replace(/\s*(在线|刚刚活跃|今日活跃|本周活跃|本月活跃)$/u, "")
49
+ .trim();
50
+ }
51
+
52
+ function extractFirstSpanWithClass(html = "", className = "") {
53
+ const regex = /<span\b[^>]*class=(["'])(.*?)\1[^>]*>([\s\S]*?)<\/span>/gi;
54
+ let match;
55
+ while ((match = regex.exec(String(html || "")))) {
56
+ if (classList(match[2]).includes(className)) {
57
+ return textFromHtmlFragment(match[3]);
58
+ }
59
+ }
60
+ return "";
61
+ }
62
+
63
+ function extractSpanTexts(fragment = "") {
64
+ const values = [];
65
+ const regex = /<span\b[^>]*>([\s\S]*?)<\/span>/gi;
66
+ let match;
67
+ while ((match = regex.exec(String(fragment || "")))) {
68
+ values.push(textFromHtmlFragment(match[1]));
69
+ }
70
+ return uniqueTexts(values);
71
+ }
72
+
73
+ function extractDivTextsWithClasses(fragment = "", requiredClasses = []) {
74
+ const values = [];
75
+ const regex = /<div\b[^>]*class=(["'])(.*?)\1[^>]*>([\s\S]*?)<\/div>/gi;
76
+ let match;
77
+ while ((match = regex.exec(String(fragment || "")))) {
78
+ if (hasAllClasses(match[2], requiredClasses)) {
79
+ values.push(extractSpanTexts(match[3]));
80
+ }
81
+ }
82
+ return values.filter((items) => items.length);
83
+ }
84
+
85
+ function parseAgeValue(value = "") {
86
+ const match = normalizeText(value).match(/^(\d{2})岁$/u);
87
+ if (!match) return null;
88
+ const age = Number.parseInt(match[1], 10);
89
+ return Number.isFinite(age) ? age : null;
90
+ }
91
+
92
+ function parseDegreeValue(value = "") {
93
+ const normalized = normalizeText(value);
94
+ const match = normalized.match(/博士|硕士|本科|大专|专科|高中|中专\/中技|中专|中技|初中及以下/u);
95
+ return match ? match[0] : "";
96
+ }
97
+
98
+ function isSalaryLike(value = "") {
99
+ const normalized = normalizeText(value);
100
+ return Boolean(
101
+ /^(?:面议|薪资面议)$/i.test(normalized)
102
+ || /^\d+(?:\.\d+)?(?:\s*-\s*\d+(?:\.\d+)?)?\s*[kK](?:\s*[·xX*]\s*\d+\s*薪?)?$/.test(normalized)
103
+ || /^\d+\s*-\s*\d+\s*元\s*\/\s*天$/.test(normalized)
104
+ );
105
+ }
106
+
107
+ function extractSalary(html = "") {
108
+ const section = sectionByClasses(html, ["salary-wrap"], [
109
+ ["name-wrap"],
110
+ ["col-2"]
111
+ ]);
112
+ return extractSpanTexts(section).find(isSalaryLike) || "";
113
+ }
114
+
115
+ function extractBaseInfo(html = "") {
116
+ const section = sectionByClasses(html, ["base-info"], [
117
+ ["expect-wrap"],
118
+ ["geek-desc"],
119
+ ["timeline-wrap"]
120
+ ]);
121
+ const parts = extractSpanTexts(section);
122
+ return {
123
+ parts,
124
+ age: parts.map(parseAgeValue).find((value) => value != null) ?? null,
125
+ degree: parts.map(parseDegreeValue).find(Boolean) || ""
126
+ };
127
+ }
128
+
129
+ function extractFirstTimelineContent(html = "", timelineClass = "") {
130
+ const section = sectionByClasses(html, ["timeline-wrap", timelineClass], [
131
+ timelineClass === "work-exps" ? ["timeline-wrap", "edu-exps"] : ["card-btns"],
132
+ ["action-wrap"]
133
+ ]);
134
+ const contentRows = extractDivTextsWithClasses(section, ["join-text-wrap", "content"]);
135
+ return contentRows[0] || [];
136
+ }
137
+
138
+ function extractTagTexts(html = "") {
139
+ const tags = [];
140
+ const regex = /<span\b[^>]*class=(["'])(.*?)\1[^>]*>([\s\S]*?)<\/span>/gi;
141
+ let match;
142
+ while ((match = regex.exec(String(html || "")))) {
143
+ if (classList(match[2]).includes("tag-item")) {
144
+ tags.push(textFromHtmlFragment(match[3]));
145
+ }
146
+ }
147
+ return uniqueTexts(tags);
148
+ }
149
+
150
+ export function parseBossCandidateCardFieldsFromHtml(html = "") {
151
+ const name = stripNameSuffixes(extractFirstSpanWithClass(html, "name"));
152
+ const baseInfo = extractBaseInfo(html);
153
+ const work = extractFirstTimelineContent(html, "work-exps");
154
+ const education = extractFirstTimelineContent(html, "edu-exps");
155
+ const educationDegree = education.map(parseDegreeValue).find(Boolean) || "";
156
+ return {
157
+ identity: {
158
+ name: name && !isSalaryLike(name) ? name : "",
159
+ current_company: work[0] || "",
160
+ current_position: work[1] || "",
161
+ school: education[0] || "",
162
+ major: education[1] || "",
163
+ degree: educationDegree || baseInfo.degree || "",
164
+ age: baseInfo.age
165
+ },
166
+ salary: extractSalary(html),
167
+ base_info: baseInfo.parts,
168
+ work,
169
+ education,
170
+ tags: extractTagTexts(html)
171
+ };
172
+ }
173
+
174
+ export function mergeBossCandidateCardFields(candidate, outerHTML = "", {
175
+ metadataKey = "boss_card_fields"
176
+ } = {}) {
177
+ const parsed = parseBossCandidateCardFieldsFromHtml(outerHTML);
178
+ const identity = { ...(candidate.identity || {}) };
179
+ for (const [key, value] of Object.entries(parsed.identity || {})) {
180
+ if (value !== "" && value !== null && value !== undefined) {
181
+ identity[key] = value;
182
+ }
183
+ }
184
+ return {
185
+ ...candidate,
186
+ identity,
187
+ tags: uniqueTexts([...(candidate.tags || []), ...(parsed.tags || [])]),
188
+ metadata: {
189
+ ...(candidate.metadata || {}),
190
+ [metadataKey]: {
191
+ salary: parsed.salary || "",
192
+ base_info: parsed.base_info || [],
193
+ work: parsed.work || [],
194
+ education: parsed.education || [],
195
+ tags: parsed.tags || []
196
+ }
197
+ }
198
+ };
199
+ }
@@ -1,6 +1,7 @@
1
1
  import fs from "node:fs";
2
2
  import crypto from "node:crypto";
3
3
  import path from "node:path";
4
+ import sharp from "sharp";
4
5
  import {
5
6
  getAttributesMap,
6
7
  getNodeBox,
@@ -149,6 +150,63 @@ function screenshotHash(buffer) {
149
150
  return crypto.createHash("sha256").update(buffer).digest("hex");
150
151
  }
151
152
 
153
+ async function optimizeScreenshotBuffer(buffer, {
154
+ enabled = false,
155
+ format = "png",
156
+ quality,
157
+ resizeMaxWidth = 0
158
+ } = {}) {
159
+ if (!enabled && !resizeMaxWidth) {
160
+ return {
161
+ buffer,
162
+ optimized: false,
163
+ optimization_error: null
164
+ };
165
+ }
166
+ try {
167
+ const normalizedFormat = format === "jpg" ? "jpeg" : format;
168
+ let pipeline = sharp(buffer, { failOn: "none" });
169
+ const metadata = await pipeline.metadata();
170
+ const width = Number(metadata.width) || 0;
171
+ const safeMaxWidth = Math.max(0, Number(resizeMaxWidth) || 0);
172
+ if (safeMaxWidth > 0 && width > safeMaxWidth) {
173
+ pipeline = pipeline.resize({
174
+ width: safeMaxWidth,
175
+ withoutEnlargement: true
176
+ });
177
+ }
178
+ if (normalizedFormat === "jpeg") {
179
+ pipeline = pipeline.jpeg({
180
+ quality: quality == null ? 72 : Math.max(35, Math.min(95, Number(quality) || 72)),
181
+ mozjpeg: true
182
+ });
183
+ } else if (normalizedFormat === "webp") {
184
+ pipeline = pipeline.webp({
185
+ quality: quality == null ? 76 : Math.max(35, Math.min(95, Number(quality) || 76))
186
+ });
187
+ } else {
188
+ pipeline = pipeline.png({
189
+ compressionLevel: 9,
190
+ adaptiveFiltering: true
191
+ });
192
+ }
193
+ const optimizedBuffer = await pipeline.toBuffer();
194
+ return {
195
+ buffer: optimizedBuffer,
196
+ optimized: true,
197
+ original_byte_length: buffer.length,
198
+ optimization_error: null
199
+ };
200
+ } catch (error) {
201
+ return {
202
+ buffer,
203
+ optimized: false,
204
+ original_byte_length: buffer.length,
205
+ optimization_error: error?.message || String(error)
206
+ };
207
+ }
208
+ }
209
+
152
210
  export async function captureScrolledNodeScreenshots(client, nodeId, {
153
211
  filePath,
154
212
  format = "png",
@@ -156,21 +214,34 @@ export async function captureScrolledNodeScreenshots(client, nodeId, {
156
214
  padding = 0,
157
215
  captureBeyondViewport = true,
158
216
  fromSurface = true,
217
+ captureViewport = false,
159
218
  maxScreenshots = 6,
160
219
  wheelDeltaY = 650,
161
220
  settleMs = 900,
162
221
  duplicateStopCount = 2,
222
+ skipDuplicateScreenshots = false,
223
+ optimize = false,
224
+ resizeMaxWidth = 0,
163
225
  metadata = {}
164
226
  } = {}) {
165
227
  if (!nodeId) throw new Error("captureScrolledNodeScreenshots requires nodeId");
228
+ const sequenceStarted = Date.now();
166
229
  const screenshots = [];
167
230
  let consecutiveDuplicates = 0;
168
231
  let previousHash = "";
232
+ let captureCount = 0;
233
+ let droppedDuplicateCount = 0;
169
234
 
170
235
  for (let index = 0; index < Math.max(1, Number(maxScreenshots) || 1); index += 1) {
236
+ captureCount += 1;
237
+ const captureStarted = Date.now();
171
238
  const box = await getNodeBox(client, nodeId);
172
239
  const clip = withPadding(box.rect, padding);
173
- const captureOptions = {
240
+ const captureOptions = captureViewport ? {
241
+ format,
242
+ fromSurface,
243
+ captureBeyondViewport: false
244
+ } : {
174
245
  format,
175
246
  fromSurface,
176
247
  captureBeyondViewport,
@@ -180,7 +251,14 @@ export async function captureScrolledNodeScreenshots(client, nodeId, {
180
251
  captureOptions.quality = quality;
181
252
  }
182
253
  const screenshot = await client.Page.captureScreenshot(captureOptions);
183
- const buffer = Buffer.from(screenshot.data || "", "base64");
254
+ const originalBuffer = Buffer.from(screenshot.data || "", "base64");
255
+ const optimized = await optimizeScreenshotBuffer(originalBuffer, {
256
+ enabled: optimize,
257
+ format,
258
+ quality,
259
+ resizeMaxWidth
260
+ });
261
+ const buffer = optimized.buffer;
184
262
  const hash = screenshotHash(buffer);
185
263
  const duplicateOfPrevious = previousHash && previousHash === hash;
186
264
  if (duplicateOfPrevious) {
@@ -189,29 +267,40 @@ export async function captureScrolledNodeScreenshots(client, nodeId, {
189
267
  consecutiveDuplicates = 0;
190
268
  }
191
269
 
192
- const outputPath = filePath ? filePathForSequence(filePath, index, format) : null;
193
- if (outputPath) {
194
- fs.writeFileSync(outputPath, buffer);
195
- }
270
+ let outputPath = null;
271
+ if (duplicateOfPrevious && skipDuplicateScreenshots) {
272
+ droppedDuplicateCount += 1;
273
+ } else {
274
+ outputPath = filePath ? filePathForSequence(filePath, screenshots.length, format) : null;
275
+ if (outputPath) {
276
+ fs.writeFileSync(outputPath, buffer);
277
+ }
196
278
 
197
- screenshots.push({
198
- index,
199
- source: "image",
200
- captured_at: nowIso(),
201
- node_id: nodeId,
202
- format,
203
- mime_type: `image/${format === "jpeg" ? "jpeg" : "png"}`,
204
- byte_length: buffer.length,
205
- file_path: outputPath,
206
- sha256: hash,
207
- duplicate_of_previous: Boolean(duplicateOfPrevious),
208
- clip,
209
- node_rect: box.rect,
210
- scroll: index === 0
211
- ? { before_capture: "initial" }
212
- : { before_capture: `wheel_down_${index}` },
213
- metadata
214
- });
279
+ screenshots.push({
280
+ index: screenshots.length,
281
+ capture_index: index,
282
+ source: "image",
283
+ captured_at: nowIso(),
284
+ node_id: nodeId,
285
+ format,
286
+ mime_type: `image/${format === "jpeg" ? "jpeg" : "png"}`,
287
+ byte_length: buffer.length,
288
+ original_byte_length: optimized.original_byte_length || originalBuffer.length,
289
+ optimized: Boolean(optimized.optimized),
290
+ optimization_error: optimized.optimization_error || null,
291
+ elapsed_ms: Date.now() - captureStarted,
292
+ file_path: outputPath,
293
+ sha256: hash,
294
+ duplicate_of_previous: Boolean(duplicateOfPrevious),
295
+ clip,
296
+ capture_viewport: Boolean(captureViewport),
297
+ node_rect: box.rect,
298
+ scroll: index === 0
299
+ ? { before_capture: "initial" }
300
+ : { before_capture: `wheel_down_${index}` },
301
+ metadata
302
+ });
303
+ }
215
304
 
216
305
  previousHash = hash;
217
306
  if (consecutiveDuplicates >= Math.max(1, Number(duplicateStopCount) || 1)) {
@@ -238,8 +327,21 @@ export async function captureScrolledNodeScreenshots(client, nodeId, {
238
327
  source: "image-scroll-sequence",
239
328
  captured_at: nowIso(),
240
329
  node_id: nodeId,
330
+ elapsed_ms: Date.now() - sequenceStarted,
331
+ capture_count: captureCount,
241
332
  screenshot_count: screenshots.length,
242
333
  unique_screenshot_count: new Set(screenshots.map((item) => item.sha256)).size,
334
+ duplicate_screenshot_count: captureCount - new Set(screenshots.map((item) => item.sha256)).size,
335
+ dropped_duplicate_count: droppedDuplicateCount,
336
+ total_byte_length: screenshots.reduce((sum, item) => sum + (Number(item.byte_length) || 0), 0),
337
+ original_total_byte_length: screenshots.reduce((sum, item) => sum + (Number(item.original_byte_length) || 0), 0),
338
+ optimization: {
339
+ enabled: Boolean(optimize),
340
+ resize_max_width: Math.max(0, Number(resizeMaxWidth) || 0),
341
+ capture_viewport: Boolean(captureViewport),
342
+ format,
343
+ quality: quality ?? null
344
+ },
243
345
  file_paths: screenshots.map((item) => item.file_path).filter(Boolean),
244
346
  screenshots,
245
347
  metadata
@@ -125,8 +125,14 @@ export function summarizeImageEvidence(imageEvidence = null) {
125
125
  if (!imageEvidence) return null;
126
126
  return {
127
127
  source: imageEvidence.source || "",
128
+ elapsed_ms: imageEvidence.elapsed_ms || 0,
129
+ capture_count: imageEvidence.capture_count || imageEvidence.screenshot_count || 0,
128
130
  screenshot_count: imageEvidence.screenshot_count || 0,
129
131
  unique_screenshot_count: imageEvidence.unique_screenshot_count || 0,
132
+ dropped_duplicate_count: imageEvidence.dropped_duplicate_count || 0,
133
+ total_byte_length: imageEvidence.total_byte_length || 0,
134
+ original_total_byte_length: imageEvidence.original_total_byte_length || 0,
135
+ optimization: imageEvidence.optimization || null,
130
136
  file_paths: imageEvidence.file_paths || [],
131
137
  first_clip: imageEvidence.screenshots?.[0]?.clip || imageEvidence.clip || null
132
138
  };
@@ -227,8 +227,17 @@ function pickCandidate(row = {}) {
227
227
 
228
228
  function timingValue(row = {}, ...keys) {
229
229
  const timings = row.timings || row.timing || {};
230
+ const detail = row.detail || {};
231
+ const acquisition = detail.cv_acquisition || {};
232
+ const fallbackByKey = {
233
+ network_cv_wait_ms: acquisition.network_wait?.elapsed_ms,
234
+ screenshot_capture_ms: acquisition.image_evidence?.elapsed_ms || detail.image_evidence?.elapsed_ms,
235
+ dom_fallback_ms: acquisition.content_wait?.elapsed_ms,
236
+ close_detail_ms: detail.close_result?.elapsed_ms,
237
+ post_action_ms: row.post_action?.elapsed_ms
238
+ };
230
239
  for (const key of keys) {
231
- const value = firstDefined(row[key], timings[key]);
240
+ const value = firstDefined(row[key], timings[key], fallbackByKey[key]);
232
241
  if (value !== "") return value;
233
242
  }
234
243
  return "";
@@ -0,0 +1,33 @@
1
+ import path from "node:path";
2
+
3
+ export function addTiming(timings, key, value) {
4
+ if (!timings || !key) return;
5
+ const numeric = Number(value);
6
+ if (!Number.isFinite(numeric) || numeric < 0) return;
7
+ timings[key] = (Number(timings[key]) || 0) + Math.round(numeric);
8
+ }
9
+
10
+ export async function measureTiming(timings, key, task) {
11
+ const started = Date.now();
12
+ try {
13
+ return await task();
14
+ } finally {
15
+ addTiming(timings, key, Date.now() - started);
16
+ }
17
+ }
18
+
19
+ export function imageEvidenceFilePath({
20
+ imageOutputDir = "",
21
+ domain = "candidate",
22
+ runId = "",
23
+ index = 0,
24
+ extension = "png"
25
+ } = {}) {
26
+ const dir = String(imageOutputDir || "").trim();
27
+ if (!dir) return "";
28
+ const safeDomain = String(domain || "candidate").replace(/[^\w.-]+/g, "_");
29
+ const safeRunId = String(runId || `${safeDomain}-run`).replace(/[^\w.-]+/g, "_");
30
+ const safeIndex = String((Number(index) || 0) + 1).padStart(3, "0");
31
+ const safeExt = String(extension || "png").replace(/^\./, "") || "png";
32
+ return path.join(dir, safeRunId, `${safeDomain}-candidate-${safeIndex}.${safeExt}`);
33
+ }