@lightcone-ai/daemon 0.15.52 → 0.15.54
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/mcp-servers/_thin-proxy/forward.js +80 -0
- package/mcp-servers/official/audience-research/index.js +24 -376
- package/mcp-servers/official/hook-pattern-library/index.js +17 -410
- package/mcp-servers/official/keyword-research/index.js +17 -324
- package/mcp-servers/official/page-understanding/index.js +17 -96
- package/mcp-servers/official/platform-policy-db/index.js +19 -264
- package/mcp-servers/official/video-narration-planner/index.js +30 -130
- package/package.json +1 -1
- package/mcp-servers/official/keyword-research/keyword-fixtures.json +0 -58
- package/mcp-servers/official/platform-policy-db/policy-fixtures.json +0 -257
- package/mcp-servers/official/video-narration-planner/core.js +0 -1403
- package/mcp-servers/official/video-narration-planner/planner-config.json +0 -112
- package/src/_vendor/video/understanding/analyze-page.js +0 -737
- package/src/_vendor/video/understanding/heuristics.js +0 -826
- package/src/_vendor/video/understanding/index.js +0 -11
- package/src/_vendor/video/understanding/llm-client.js +0 -261
- package/src/_vendor/video/understanding/schema.js +0 -254
- package/src/_vendor/video/understanding/site-selectors.js +0 -47
|
@@ -1,826 +0,0 @@
|
|
|
1
|
-
const RECRUITMENT_KEYWORDS = /招聘|校招|春招|秋招|实习|应届|岗位|职位|网申|career|job|intern|campus/i;
|
|
2
|
-
const CAMPUS_KEYWORDS = /校招|校园招聘|应届|毕业生|春招|秋招|campus/i;
|
|
3
|
-
const INTERN_KEYWORDS = /实习|intern/i;
|
|
4
|
-
const EXPERIENCED_KEYWORDS = /社招|社会招聘|experienced|experienced hire|社会人才/i;
|
|
5
|
-
|
|
6
|
-
const JOB_DIRECTION_HINTS = Object.freeze([
|
|
7
|
-
'算法',
|
|
8
|
-
'研发',
|
|
9
|
-
'开发',
|
|
10
|
-
'前端',
|
|
11
|
-
'后端',
|
|
12
|
-
'测试',
|
|
13
|
-
'运维',
|
|
14
|
-
'产品',
|
|
15
|
-
'运营',
|
|
16
|
-
'市场',
|
|
17
|
-
'销售',
|
|
18
|
-
'设计',
|
|
19
|
-
'财务',
|
|
20
|
-
'人力',
|
|
21
|
-
'hr',
|
|
22
|
-
'数据',
|
|
23
|
-
'供应链',
|
|
24
|
-
'法务',
|
|
25
|
-
'风控',
|
|
26
|
-
'投研',
|
|
27
|
-
'量化',
|
|
28
|
-
'内容',
|
|
29
|
-
'客服',
|
|
30
|
-
]);
|
|
31
|
-
|
|
32
|
-
const LOCATION_HINTS = Object.freeze([
|
|
33
|
-
'北京',
|
|
34
|
-
'上海',
|
|
35
|
-
'广州',
|
|
36
|
-
'深圳',
|
|
37
|
-
'杭州',
|
|
38
|
-
'成都',
|
|
39
|
-
'南京',
|
|
40
|
-
'武汉',
|
|
41
|
-
'西安',
|
|
42
|
-
'苏州',
|
|
43
|
-
'天津',
|
|
44
|
-
'重庆',
|
|
45
|
-
'长沙',
|
|
46
|
-
'郑州',
|
|
47
|
-
'青岛',
|
|
48
|
-
'厦门',
|
|
49
|
-
'合肥',
|
|
50
|
-
'宁波',
|
|
51
|
-
'佛山',
|
|
52
|
-
'东莞',
|
|
53
|
-
'珠海',
|
|
54
|
-
'无锡',
|
|
55
|
-
'remote',
|
|
56
|
-
'远程',
|
|
57
|
-
]);
|
|
58
|
-
|
|
59
|
-
export const RECRUITMENT_SLOT_KEYS = Object.freeze([
|
|
60
|
-
'company',
|
|
61
|
-
'published_at',
|
|
62
|
-
'recruitment_type',
|
|
63
|
-
'cohort',
|
|
64
|
-
'job_directions',
|
|
65
|
-
'locations',
|
|
66
|
-
'target_or_requirements',
|
|
67
|
-
'process',
|
|
68
|
-
'entry_or_cta',
|
|
69
|
-
]);
|
|
70
|
-
|
|
71
|
-
const MODE_MIN_CONFIDENCE = Object.freeze({
|
|
72
|
-
strong: 0.6,
|
|
73
|
-
support: 0.52,
|
|
74
|
-
weakSignal: 0.42,
|
|
75
|
-
floor: 0.36,
|
|
76
|
-
});
|
|
77
|
-
|
|
78
|
-
function cleanText(value) {
|
|
79
|
-
return String(value ?? '').replace(/\s+/g, ' ').trim();
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
function midpoint(range = [0, 0]) {
|
|
83
|
-
const start = Number(range[0] ?? 0);
|
|
84
|
-
const end = Number(range[1] ?? start);
|
|
85
|
-
return Math.round((start + end) / 2);
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
function clampNumber(value, min, max, fallback) {
|
|
89
|
-
const num = Number(value);
|
|
90
|
-
if (!Number.isFinite(num)) return fallback;
|
|
91
|
-
return Math.max(min, Math.min(max, num));
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
function toSourceType(value) {
|
|
95
|
-
const normalized = cleanText(value).toLowerCase();
|
|
96
|
-
if (normalized === 'title' || normalized === 'meta' || normalized === 'body_text' || normalized === 'image_ocr' || normalized === 'heuristic') {
|
|
97
|
-
return normalized;
|
|
98
|
-
}
|
|
99
|
-
return 'heuristic';
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
function baseConfidenceForSourceType(sourceType) {
|
|
103
|
-
const normalized = toSourceType(sourceType);
|
|
104
|
-
if (normalized === 'title') return 0.9;
|
|
105
|
-
if (normalized === 'meta') return 0.84;
|
|
106
|
-
if (normalized === 'body_text') return 0.74;
|
|
107
|
-
if (normalized === 'image_ocr') return 0.7;
|
|
108
|
-
return 0.45;
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
function hasValue(value) {
|
|
112
|
-
if (Array.isArray(value)) return value.length > 0;
|
|
113
|
-
return cleanText(value).length > 0;
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
function uniqueStrings(values = []) {
|
|
117
|
-
const seen = new Set();
|
|
118
|
-
const output = [];
|
|
119
|
-
for (const row of values) {
|
|
120
|
-
const text = cleanText(row);
|
|
121
|
-
if (!text) continue;
|
|
122
|
-
const key = text.toLowerCase();
|
|
123
|
-
if (seen.has(key)) continue;
|
|
124
|
-
seen.add(key);
|
|
125
|
-
output.push(text);
|
|
126
|
-
}
|
|
127
|
-
return output;
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
function normalizeFocusRegion(range) {
|
|
131
|
-
if (!Array.isArray(range) || range.length < 2) return null;
|
|
132
|
-
const start = Number(range[0]);
|
|
133
|
-
const end = Number(range[1]);
|
|
134
|
-
if (!Number.isFinite(start) || !Number.isFinite(end)) return null;
|
|
135
|
-
const low = Math.round(Math.min(start, end));
|
|
136
|
-
const high = Math.round(Math.max(start, end));
|
|
137
|
-
if (high <= low) return null;
|
|
138
|
-
return [low, high];
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
function resolveTopFocusRegion({ coreRange, focusRange }) {
|
|
142
|
-
const normalizedCore = normalizeFocusRegion(coreRange);
|
|
143
|
-
const normalizedFocus = normalizeFocusRegion(focusRange);
|
|
144
|
-
const base = normalizedFocus ?? normalizedCore;
|
|
145
|
-
if (!base) return null;
|
|
146
|
-
const start = base[0];
|
|
147
|
-
const span = Math.max(180, Math.round((base[1] - base[0]) * 0.22));
|
|
148
|
-
const end = Math.min(base[1], start + span);
|
|
149
|
-
return normalizeFocusRegion([start, end]);
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
function createMissingSlot() {
|
|
153
|
-
return {
|
|
154
|
-
value: null,
|
|
155
|
-
source_type: 'heuristic',
|
|
156
|
-
confidence: 0,
|
|
157
|
-
focus_region: null,
|
|
158
|
-
status: 'missing',
|
|
159
|
-
};
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
function createSlot({
|
|
163
|
-
value,
|
|
164
|
-
sourceType = 'heuristic',
|
|
165
|
-
confidence = 0.45,
|
|
166
|
-
focusRegion = null,
|
|
167
|
-
forceMissing = false,
|
|
168
|
-
} = {}) {
|
|
169
|
-
const source_type = toSourceType(sourceType);
|
|
170
|
-
let normalizedValue = value;
|
|
171
|
-
if (Array.isArray(value)) {
|
|
172
|
-
normalizedValue = uniqueStrings(value);
|
|
173
|
-
} else {
|
|
174
|
-
normalizedValue = cleanText(value) || null;
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
const valuePresent = !forceMissing && hasValue(normalizedValue);
|
|
178
|
-
return {
|
|
179
|
-
value: valuePresent ? normalizedValue : null,
|
|
180
|
-
source_type,
|
|
181
|
-
confidence: Number(clampNumber(confidence, 0, 1, 0).toFixed(2)),
|
|
182
|
-
focus_region: normalizeFocusRegion(focusRegion),
|
|
183
|
-
status: valuePresent ? 'present' : 'missing',
|
|
184
|
-
};
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
function toNonGlobalRegex(regex) {
|
|
188
|
-
const flags = String(regex.flags ?? '').replace(/g/g, '');
|
|
189
|
-
return new RegExp(regex.source, flags);
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
function focusRegionFromBin(bin = {}) {
|
|
193
|
-
const yStart = Number(bin?.y_start);
|
|
194
|
-
if (Number.isFinite(yStart)) {
|
|
195
|
-
return normalizeFocusRegion([yStart, yStart + 400]);
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
const yCenter = Number(bin?.y_center);
|
|
199
|
-
if (!Number.isFinite(yCenter)) return null;
|
|
200
|
-
return normalizeFocusRegion([yCenter - 180, yCenter + 180]);
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
function buildSources({ meta, bins, hotspots, coreRange, focusRange }) {
|
|
204
|
-
const topRegion = resolveTopFocusRegion({ coreRange, focusRange });
|
|
205
|
-
const sources = [];
|
|
206
|
-
const title = cleanText(meta?.og_title || meta?.title || '');
|
|
207
|
-
const description = cleanText(meta?.og_description || meta?.description || '');
|
|
208
|
-
|
|
209
|
-
if (title) {
|
|
210
|
-
sources.push({
|
|
211
|
-
source_type: 'title',
|
|
212
|
-
text: title,
|
|
213
|
-
focus_region: topRegion,
|
|
214
|
-
});
|
|
215
|
-
}
|
|
216
|
-
if (description) {
|
|
217
|
-
sources.push({
|
|
218
|
-
source_type: 'meta',
|
|
219
|
-
text: description,
|
|
220
|
-
focus_region: topRegion,
|
|
221
|
-
});
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
for (const bin of (Array.isArray(bins) ? bins : [])) {
|
|
225
|
-
const text = cleanText(bin?.text);
|
|
226
|
-
if (!text) continue;
|
|
227
|
-
sources.push({
|
|
228
|
-
source_type: 'body_text',
|
|
229
|
-
text,
|
|
230
|
-
focus_region: focusRegionFromBin(bin),
|
|
231
|
-
});
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
for (const hotspot of (Array.isArray(hotspots) ? hotspots : [])) {
|
|
235
|
-
const text = cleanText(hotspot?.reason);
|
|
236
|
-
if (!text) continue;
|
|
237
|
-
const y = Number(hotspot?.y);
|
|
238
|
-
const focusRegion = Number.isFinite(y) ? normalizeFocusRegion([y - 160, y + 160]) : null;
|
|
239
|
-
sources.push({
|
|
240
|
-
source_type: 'body_text',
|
|
241
|
-
text,
|
|
242
|
-
focus_region: focusRegion,
|
|
243
|
-
});
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
return sources;
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
function findRegexCandidate({ sources, patterns }) {
|
|
250
|
-
for (const rawPattern of patterns) {
|
|
251
|
-
const pattern = rawPattern instanceof RegExp ? { regex: rawPattern } : rawPattern;
|
|
252
|
-
if (!(pattern.regex instanceof RegExp)) continue;
|
|
253
|
-
const regex = toNonGlobalRegex(pattern.regex);
|
|
254
|
-
const allowedTypes = Array.isArray(pattern.sourceTypes)
|
|
255
|
-
? new Set(pattern.sourceTypes.map(toSourceType))
|
|
256
|
-
: null;
|
|
257
|
-
|
|
258
|
-
for (const source of sources) {
|
|
259
|
-
if (allowedTypes && !allowedTypes.has(toSourceType(source.source_type))) continue;
|
|
260
|
-
const text = cleanText(source?.text);
|
|
261
|
-
if (!text) continue;
|
|
262
|
-
|
|
263
|
-
const match = regex.exec(text);
|
|
264
|
-
if (!match) continue;
|
|
265
|
-
|
|
266
|
-
const raw = cleanText(match[1] ?? match[0]);
|
|
267
|
-
const parsedValue = typeof pattern.parser === 'function'
|
|
268
|
-
? pattern.parser(raw, match, source)
|
|
269
|
-
: raw;
|
|
270
|
-
if (!hasValue(parsedValue)) continue;
|
|
271
|
-
|
|
272
|
-
const confidenceBoost = Number(pattern.confidenceBoost ?? 0);
|
|
273
|
-
const confidence = clampNumber(
|
|
274
|
-
baseConfidenceForSourceType(source.source_type) + confidenceBoost,
|
|
275
|
-
0,
|
|
276
|
-
0.99,
|
|
277
|
-
0.45
|
|
278
|
-
);
|
|
279
|
-
|
|
280
|
-
return {
|
|
281
|
-
value: parsedValue,
|
|
282
|
-
source_type: source.source_type,
|
|
283
|
-
confidence,
|
|
284
|
-
focus_region: source.focus_region ?? null,
|
|
285
|
-
};
|
|
286
|
-
}
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
return null;
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
function parseListTokens(raw, { maxItems = 6, allowChineseComma = true } = {}) {
|
|
293
|
-
const text = cleanText(raw).replace(/^[::\-\s]+/, '');
|
|
294
|
-
if (!text) return [];
|
|
295
|
-
|
|
296
|
-
const delimiter = allowChineseComma ? /[、,,/|;;]+/ : /[,/|;]+/;
|
|
297
|
-
const items = text
|
|
298
|
-
.split(delimiter)
|
|
299
|
-
.map(item => cleanText(item).replace(/^[·•\-\s]+/, '').replace(/[。;;,,\s]+$/, ''))
|
|
300
|
-
.filter(Boolean);
|
|
301
|
-
|
|
302
|
-
return uniqueStrings(items).slice(0, maxItems);
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
function normalizeDateToken(raw) {
|
|
306
|
-
const normalized = cleanText(raw)
|
|
307
|
-
.replace(/年|\/|\./g, '-')
|
|
308
|
-
.replace(/月/g, '-')
|
|
309
|
-
.replace(/日/g, '')
|
|
310
|
-
.replace(/\s+/g, '');
|
|
311
|
-
|
|
312
|
-
const fullDate = normalized.match(/(20\d{2})-(\d{1,2})-(\d{1,2})/);
|
|
313
|
-
if (fullDate) {
|
|
314
|
-
const year = Number(fullDate[1]);
|
|
315
|
-
const month = Number(fullDate[2]);
|
|
316
|
-
const day = Number(fullDate[3]);
|
|
317
|
-
if (month >= 1 && month <= 12 && day >= 1 && day <= 31) {
|
|
318
|
-
return `${year}-${String(month).padStart(2, '0')}-${String(day).padStart(2, '0')}`;
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
const monthDate = normalized.match(/(20\d{2})-(\d{1,2})$/);
|
|
323
|
-
if (monthDate) {
|
|
324
|
-
const year = Number(monthDate[1]);
|
|
325
|
-
const month = Number(monthDate[2]);
|
|
326
|
-
if (month >= 1 && month <= 12) {
|
|
327
|
-
return `${year}-${String(month).padStart(2, '0')}`;
|
|
328
|
-
}
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
return cleanText(raw).slice(0, 24);
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
function normalizeCompanyCandidate(raw) {
|
|
335
|
-
let text = cleanText(raw)
|
|
336
|
-
.replace(/^[\s::\-—_【】\[\]()()||]+/, '')
|
|
337
|
-
.replace(/[\s::\-—_【】\[\]()()||]+$/, '');
|
|
338
|
-
|
|
339
|
-
text = text
|
|
340
|
-
.replace(/(?:20\d{2}年(?:春招|秋招|暑期|寒假|实习)?|20\d{2}届)\s*$/i, '')
|
|
341
|
-
.replace(/(?:春招|秋招|校园招聘|校招|暑期实习|实习招聘|实习生招聘|实习|招聘信息|招聘公告|招聘简章|招聘|职位招聘|岗位招聘|职位|岗位)\s*$/i, '')
|
|
342
|
-
.replace(/(?:20\d{2}年?)\s*$/i, '')
|
|
343
|
-
.trim();
|
|
344
|
-
|
|
345
|
-
if (!text) return '';
|
|
346
|
-
if (text.length < 2 || text.length > 40) return '';
|
|
347
|
-
if (/^\d+$/.test(text)) return '';
|
|
348
|
-
if (/^(?:年|月|日)+$/.test(text)) return '';
|
|
349
|
-
if (/^(?:年)?(?:春招|秋招|校招|暑期|寒假|实习|招聘|岗位|职位|公告|信息|简章|海报|计划|入口|流程|网申|投递|campus|intern|career|jobs?)$/i.test(text)) {
|
|
350
|
-
return '';
|
|
351
|
-
}
|
|
352
|
-
return text;
|
|
353
|
-
}
|
|
354
|
-
|
|
355
|
-
function buildCombinedText(sources = []) {
|
|
356
|
-
return sources.map(item => cleanText(item?.text)).filter(Boolean).join(' ');
|
|
357
|
-
}
|
|
358
|
-
|
|
359
|
-
function slotHasValue(slot) {
|
|
360
|
-
if (!slot || typeof slot !== 'object') return false;
|
|
361
|
-
return hasValue(slot.value);
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
function slotConfidence(slot) {
|
|
365
|
-
const confidence = Number(slot?.confidence);
|
|
366
|
-
if (!Number.isFinite(confidence)) return 0;
|
|
367
|
-
return Math.max(0, Math.min(1, confidence));
|
|
368
|
-
}
|
|
369
|
-
|
|
370
|
-
function slotHasMinConfidence(slot, min = 0) {
|
|
371
|
-
return slotHasValue(slot) && slotConfidence(slot) >= min;
|
|
372
|
-
}
|
|
373
|
-
|
|
374
|
-
function averageConfidence(slots = []) {
|
|
375
|
-
const valid = slots
|
|
376
|
-
.map(item => Number(item?.confidence))
|
|
377
|
-
.filter(value => Number.isFinite(value) && value > 0);
|
|
378
|
-
if (valid.length === 0) return 0;
|
|
379
|
-
const sum = valid.reduce((acc, item) => acc + item, 0);
|
|
380
|
-
return Number((sum / valid.length).toFixed(2));
|
|
381
|
-
}
|
|
382
|
-
|
|
383
|
-
function extractCompanySlot(sources) {
|
|
384
|
-
const candidate = findRegexCandidate({
|
|
385
|
-
sources,
|
|
386
|
-
patterns: [
|
|
387
|
-
{
|
|
388
|
-
regex: /([\u4e00-\u9fa5A-Za-z0-9·()()\-]{2,40})(?:20\d{2}届)?(?:校园招聘|校招|春招|秋招|招聘简章|招聘公告|招聘信息|招聘|实习招聘|实习生招聘|社招|社会招聘|职位招聘|岗位招聘)/i,
|
|
389
|
-
parser: value => normalizeCompanyCandidate(value),
|
|
390
|
-
sourceTypes: ['title', 'meta'],
|
|
391
|
-
confidenceBoost: 0.06,
|
|
392
|
-
},
|
|
393
|
-
{
|
|
394
|
-
regex: /(?:公司名称|招聘单位|用人单位)[::]?\s*([\u4e00-\u9fa5A-Za-z0-9·()()\-]{2,40})/i,
|
|
395
|
-
parser: value => normalizeCompanyCandidate(value),
|
|
396
|
-
sourceTypes: ['body_text'],
|
|
397
|
-
confidenceBoost: 0.04,
|
|
398
|
-
},
|
|
399
|
-
{
|
|
400
|
-
regex: /([\u4e00-\u9fa5A-Za-z0-9·()()\-]{2,40}(?:公司|集团|科技|信息|网络|银行|证券|研究院|大学|股份有限公司|有限公司|Inc\.?|Ltd\.?))/i,
|
|
401
|
-
parser: value => normalizeCompanyCandidate(value),
|
|
402
|
-
confidenceBoost: 0.02,
|
|
403
|
-
},
|
|
404
|
-
],
|
|
405
|
-
});
|
|
406
|
-
|
|
407
|
-
if (!candidate) return createMissingSlot();
|
|
408
|
-
return createSlot(candidate);
|
|
409
|
-
}
|
|
410
|
-
|
|
411
|
-
function extractPublishedAtSlot(sources) {
|
|
412
|
-
const candidate = findRegexCandidate({
|
|
413
|
-
sources,
|
|
414
|
-
patterns: [
|
|
415
|
-
{
|
|
416
|
-
regex: /(?:发布时间|发布日期|发布于|更新于)[::]?\s*([12]\d{3}[./\-年]\d{1,2}(?:[./\-月]\d{1,2}日?)?)/i,
|
|
417
|
-
parser: normalizeDateToken,
|
|
418
|
-
confidenceBoost: 0.06,
|
|
419
|
-
},
|
|
420
|
-
{
|
|
421
|
-
regex: /([12]\d{3}[./\-年]\d{1,2}[./\-月]\d{1,2}日?)/,
|
|
422
|
-
parser: normalizeDateToken,
|
|
423
|
-
confidenceBoost: 0.02,
|
|
424
|
-
},
|
|
425
|
-
],
|
|
426
|
-
});
|
|
427
|
-
|
|
428
|
-
if (!candidate) return createMissingSlot();
|
|
429
|
-
return createSlot(candidate);
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
function extractRecruitmentTypeSlot(sources, { recruitmentRelated = false } = {}) {
|
|
433
|
-
const candidate = findRegexCandidate({
|
|
434
|
-
sources,
|
|
435
|
-
patterns: [
|
|
436
|
-
{
|
|
437
|
-
regex: CAMPUS_KEYWORDS,
|
|
438
|
-
parser: () => 'campus',
|
|
439
|
-
confidenceBoost: 0.06,
|
|
440
|
-
},
|
|
441
|
-
{
|
|
442
|
-
regex: INTERN_KEYWORDS,
|
|
443
|
-
parser: () => 'intern',
|
|
444
|
-
confidenceBoost: 0.06,
|
|
445
|
-
},
|
|
446
|
-
{
|
|
447
|
-
regex: EXPERIENCED_KEYWORDS,
|
|
448
|
-
parser: () => 'experienced',
|
|
449
|
-
confidenceBoost: 0.06,
|
|
450
|
-
},
|
|
451
|
-
{
|
|
452
|
-
regex: RECRUITMENT_KEYWORDS,
|
|
453
|
-
parser: () => 'job_posting_unknown',
|
|
454
|
-
confidenceBoost: -0.08,
|
|
455
|
-
},
|
|
456
|
-
],
|
|
457
|
-
});
|
|
458
|
-
|
|
459
|
-
if (candidate) return createSlot(candidate);
|
|
460
|
-
if (!recruitmentRelated) return createMissingSlot();
|
|
461
|
-
|
|
462
|
-
return createSlot({
|
|
463
|
-
value: 'job_posting_unknown',
|
|
464
|
-
sourceType: 'heuristic',
|
|
465
|
-
confidence: 0.35,
|
|
466
|
-
});
|
|
467
|
-
}
|
|
468
|
-
|
|
469
|
-
function extractCohortSlot(sources) {
|
|
470
|
-
const candidate = findRegexCandidate({
|
|
471
|
-
sources,
|
|
472
|
-
patterns: [
|
|
473
|
-
{
|
|
474
|
-
regex: /((?:20\d{2}|\d{2})届)/,
|
|
475
|
-
confidenceBoost: 0.05,
|
|
476
|
-
},
|
|
477
|
-
{
|
|
478
|
-
regex: /(20\d{2}\s*(?:春招|秋招|校招|实习))/,
|
|
479
|
-
confidenceBoost: 0.05,
|
|
480
|
-
},
|
|
481
|
-
],
|
|
482
|
-
});
|
|
483
|
-
|
|
484
|
-
if (!candidate) return createMissingSlot();
|
|
485
|
-
return createSlot(candidate);
|
|
486
|
-
}
|
|
487
|
-
|
|
488
|
-
function extractJobDirectionsSlot(sources, combinedText) {
|
|
489
|
-
const candidate = findRegexCandidate({
|
|
490
|
-
sources,
|
|
491
|
-
patterns: [
|
|
492
|
-
{
|
|
493
|
-
regex: /(?:岗位方向|招聘岗位|职位方向|岗位类别|职位类别|招聘方向)[::]?\s*([^\n。;;]{4,90})/i,
|
|
494
|
-
parser: value => parseListTokens(value, { maxItems: 8 }),
|
|
495
|
-
confidenceBoost: 0.05,
|
|
496
|
-
},
|
|
497
|
-
{
|
|
498
|
-
regex: /(?:岗位|职位)[::]?\s*([^\n。;;]{4,90})/i,
|
|
499
|
-
parser: value => parseListTokens(value, { maxItems: 8 }),
|
|
500
|
-
confidenceBoost: 0.01,
|
|
501
|
-
},
|
|
502
|
-
],
|
|
503
|
-
});
|
|
504
|
-
|
|
505
|
-
if (candidate) return createSlot(candidate);
|
|
506
|
-
|
|
507
|
-
const lower = cleanText(combinedText).toLowerCase();
|
|
508
|
-
const inferred = JOB_DIRECTION_HINTS.filter(item => lower.includes(item.toLowerCase())).slice(0, 6);
|
|
509
|
-
if (inferred.length === 0) return createMissingSlot();
|
|
510
|
-
|
|
511
|
-
return createSlot({
|
|
512
|
-
value: inferred,
|
|
513
|
-
sourceType: 'heuristic',
|
|
514
|
-
confidence: 0.52,
|
|
515
|
-
});
|
|
516
|
-
}
|
|
517
|
-
|
|
518
|
-
function extractLocationsSlot(sources, combinedText) {
|
|
519
|
-
const candidate = findRegexCandidate({
|
|
520
|
-
sources,
|
|
521
|
-
patterns: [
|
|
522
|
-
{
|
|
523
|
-
regex: /(?:工作地点|工作城市|地点|城市|base地|工作地)[::]?\s*([^\n。;;]{2,70})/i,
|
|
524
|
-
parser: value => parseListTokens(value, { maxItems: 8 }),
|
|
525
|
-
confidenceBoost: 0.04,
|
|
526
|
-
},
|
|
527
|
-
],
|
|
528
|
-
});
|
|
529
|
-
|
|
530
|
-
if (candidate) return createSlot(candidate);
|
|
531
|
-
|
|
532
|
-
const lower = cleanText(combinedText).toLowerCase();
|
|
533
|
-
const inferred = LOCATION_HINTS
|
|
534
|
-
.filter(item => lower.includes(item.toLowerCase()))
|
|
535
|
-
.slice(0, 8);
|
|
536
|
-
if (inferred.length === 0) return createMissingSlot();
|
|
537
|
-
|
|
538
|
-
return createSlot({
|
|
539
|
-
value: inferred,
|
|
540
|
-
sourceType: 'heuristic',
|
|
541
|
-
confidence: 0.5,
|
|
542
|
-
});
|
|
543
|
-
}
|
|
544
|
-
|
|
545
|
-
function extractTargetOrRequirementsSlot(sources) {
|
|
546
|
-
const candidate = findRegexCandidate({
|
|
547
|
-
sources,
|
|
548
|
-
patterns: [
|
|
549
|
-
{
|
|
550
|
-
regex: /(?:面向对象|招聘对象|任职要求|岗位要求|任职资格|申请条件|target|requirements?)[::]?\s*([^\n。]{8,140})/i,
|
|
551
|
-
parser: value => cleanText(value).slice(0, 140),
|
|
552
|
-
confidenceBoost: 0.03,
|
|
553
|
-
},
|
|
554
|
-
],
|
|
555
|
-
});
|
|
556
|
-
|
|
557
|
-
if (!candidate) return createMissingSlot();
|
|
558
|
-
return createSlot(candidate);
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
function extractProcessSlot(sources, combinedText) {
|
|
562
|
-
const candidate = findRegexCandidate({
|
|
563
|
-
sources,
|
|
564
|
-
patterns: [
|
|
565
|
-
{
|
|
566
|
-
regex: /(?:招聘流程|流程安排|投递流程|面试流程|笔试流程|流程)[::]?\s*([^\n。]{8,140})/i,
|
|
567
|
-
parser: value => cleanText(value).slice(0, 140),
|
|
568
|
-
confidenceBoost: 0.03,
|
|
569
|
-
},
|
|
570
|
-
],
|
|
571
|
-
});
|
|
572
|
-
if (candidate) return createSlot(candidate);
|
|
573
|
-
|
|
574
|
-
const lower = cleanText(combinedText).toLowerCase();
|
|
575
|
-
const signals = ['网申', '测评', '笔试', '面试', 'offer'].filter(item => lower.includes(item.toLowerCase()));
|
|
576
|
-
if (signals.length === 0) return createMissingSlot();
|
|
577
|
-
|
|
578
|
-
return createSlot({
|
|
579
|
-
value: `${signals.join(' -> ')}${signals.length > 0 ? '(流程线索)' : ''}`,
|
|
580
|
-
sourceType: 'heuristic',
|
|
581
|
-
confidence: 0.48,
|
|
582
|
-
});
|
|
583
|
-
}
|
|
584
|
-
|
|
585
|
-
function extractEntryOrCtaSlot(sources, combinedText) {
|
|
586
|
-
const candidate = findRegexCandidate({
|
|
587
|
-
sources,
|
|
588
|
-
patterns: [
|
|
589
|
-
{
|
|
590
|
-
regex: /(?:投递入口|网申入口|申请入口|报名入口|简历投递|投递方式|投递链接|apply|立即申请|点击申请|马上投递)[::]?\s*([^\n。]{4,120})/i,
|
|
591
|
-
parser: value => cleanText(value).slice(0, 120),
|
|
592
|
-
confidenceBoost: 0.04,
|
|
593
|
-
},
|
|
594
|
-
{
|
|
595
|
-
regex: /((?:立即申请|点击申请|马上投递|网申入口|投递入口|简历投递))/i,
|
|
596
|
-
parser: value => cleanText(value),
|
|
597
|
-
confidenceBoost: 0.02,
|
|
598
|
-
},
|
|
599
|
-
],
|
|
600
|
-
});
|
|
601
|
-
if (candidate) return createSlot(candidate);
|
|
602
|
-
|
|
603
|
-
if (!/(投递|网申|申请|报名|简历)/i.test(combinedText)) return createMissingSlot();
|
|
604
|
-
return createSlot({
|
|
605
|
-
value: '页面存在投递/申请线索,建议定位入口后确认截止时间',
|
|
606
|
-
sourceType: 'heuristic',
|
|
607
|
-
confidence: 0.46,
|
|
608
|
-
});
|
|
609
|
-
}
|
|
610
|
-
|
|
611
|
-
function computeModeHint({ slots, recruitmentRelated }) {
|
|
612
|
-
const companyStrong = slotHasMinConfidence(slots.company, MODE_MIN_CONFIDENCE.strong);
|
|
613
|
-
const publishedStrong = slotHasMinConfidence(slots.published_at, MODE_MIN_CONFIDENCE.strong);
|
|
614
|
-
const cohortSupport = slotHasMinConfidence(slots.cohort, MODE_MIN_CONFIDENCE.support);
|
|
615
|
-
const jobDirectionsSupport = slotHasMinConfidence(slots.job_directions, MODE_MIN_CONFIDENCE.support);
|
|
616
|
-
const locationsSupport = slotHasMinConfidence(slots.locations, MODE_MIN_CONFIDENCE.support);
|
|
617
|
-
const processSignal = slotHasMinConfidence(slots.process, MODE_MIN_CONFIDENCE.weakSignal);
|
|
618
|
-
const recruitmentTypeValue = cleanText(slots.recruitment_type?.value);
|
|
619
|
-
const recruitmentTypeKnown = recruitmentTypeValue === 'campus'
|
|
620
|
-
|| recruitmentTypeValue === 'intern'
|
|
621
|
-
|| recruitmentTypeValue === 'experienced';
|
|
622
|
-
const recruitmentTypePresent = recruitmentTypeKnown || recruitmentTypeValue === 'job_posting_unknown';
|
|
623
|
-
const recruitmentTypeStrong = recruitmentTypeKnown
|
|
624
|
-
&& slotHasMinConfidence(slots.recruitment_type, MODE_MIN_CONFIDENCE.strong);
|
|
625
|
-
const recruitmentTypeSignal = recruitmentTypePresent
|
|
626
|
-
&& slotHasMinConfidence(slots.recruitment_type, MODE_MIN_CONFIDENCE.weakSignal);
|
|
627
|
-
|
|
628
|
-
const keySlots = [slots.company, slots.published_at, slots.recruitment_type, slots.job_directions, slots.locations, slots.process];
|
|
629
|
-
const avgConfidence = averageConfidence(keySlots);
|
|
630
|
-
const reliableSignalCount = [
|
|
631
|
-
companyStrong,
|
|
632
|
-
publishedStrong,
|
|
633
|
-
recruitmentTypeSignal,
|
|
634
|
-
jobDirectionsSupport,
|
|
635
|
-
locationsSupport,
|
|
636
|
-
processSignal,
|
|
637
|
-
].filter(Boolean).length;
|
|
638
|
-
|
|
639
|
-
const mainMode = companyStrong && (
|
|
640
|
-
(publishedStrong && recruitmentTypeStrong)
|
|
641
|
-
|| (cohortSupport && jobDirectionsSupport)
|
|
642
|
-
|| (recruitmentTypeStrong && locationsSupport)
|
|
643
|
-
);
|
|
644
|
-
if (mainMode) {
|
|
645
|
-
return {
|
|
646
|
-
value: 'job_intel_broadcast',
|
|
647
|
-
confidence: Number(Math.max(0.7, avgConfidence).toFixed(2)),
|
|
648
|
-
};
|
|
649
|
-
}
|
|
650
|
-
|
|
651
|
-
const reminderMode = companyStrong && publishedStrong && recruitmentTypeSignal;
|
|
652
|
-
if (reminderMode) {
|
|
653
|
-
return {
|
|
654
|
-
value: 'job_alert',
|
|
655
|
-
confidence: Number(Math.max(0.6, avgConfidence).toFixed(2)),
|
|
656
|
-
};
|
|
657
|
-
}
|
|
658
|
-
|
|
659
|
-
if (recruitmentRelated
|
|
660
|
-
&& reliableSignalCount > 0
|
|
661
|
-
&& avgConfidence >= MODE_MIN_CONFIDENCE.floor) {
|
|
662
|
-
return {
|
|
663
|
-
value: 'info_summary',
|
|
664
|
-
confidence: Number(Math.max(0.42, avgConfidence).toFixed(2)),
|
|
665
|
-
};
|
|
666
|
-
}
|
|
667
|
-
|
|
668
|
-
return {
|
|
669
|
-
value: 'refuse_auto_broadcast',
|
|
670
|
-
confidence: Number(Math.max(0.2, avgConfidence * 0.6).toFixed(2)),
|
|
671
|
-
};
|
|
672
|
-
}
|
|
673
|
-
|
|
674
|
-
function isRecruitmentPage({ pageType, url, combinedText }) {
|
|
675
|
-
if (String(pageType) === 'job_detail') return true;
|
|
676
|
-
if (RECRUITMENT_KEYWORDS.test(cleanText(url))) return true;
|
|
677
|
-
return RECRUITMENT_KEYWORDS.test(combinedText);
|
|
678
|
-
}
|
|
679
|
-
|
|
680
|
-
export function derivePageType({ hostname, meta }) {
|
|
681
|
-
const host = String(hostname ?? '').toLowerCase();
|
|
682
|
-
const title = cleanText(meta?.og_title || meta?.title || '').toLowerCase();
|
|
683
|
-
|
|
684
|
-
if (host.includes('mp.weixin.qq.com') || host.includes('zhuanlan.zhihu.com')) return 'news_article';
|
|
685
|
-
if (host.includes('jianshu.com') || host.includes('juejin.cn')) return 'article';
|
|
686
|
-
if (/招聘|校招|实习|职位|job|career/.test(title)) return 'job_detail';
|
|
687
|
-
if (/产品|feature|pricing|方案|落地页|landing/.test(title)) return 'product_landing';
|
|
688
|
-
return 'generic_article';
|
|
689
|
-
}
|
|
690
|
-
|
|
691
|
-
export function buildHeuristicCoreMessage({ persona, meta, bins }) {
|
|
692
|
-
const p = cleanText(persona);
|
|
693
|
-
const title = cleanText(meta?.og_title || meta?.title || '');
|
|
694
|
-
const desc = cleanText(meta?.og_description || '');
|
|
695
|
-
const firstBin = cleanText(bins?.[0]?.text || '');
|
|
696
|
-
|
|
697
|
-
if (title && desc) return `${title}:${desc.slice(0, 80)}`;
|
|
698
|
-
if (title && firstBin) return `${title}。${firstBin.slice(0, 80)}`;
|
|
699
|
-
if (title) return title;
|
|
700
|
-
if (firstBin) return firstBin.slice(0, 100);
|
|
701
|
-
if (p) return `页面围绕「${p}」的关键信息组织,建议聚焦核心段落。`;
|
|
702
|
-
return '页面包含可用于短视频介绍的核心信息。';
|
|
703
|
-
}
|
|
704
|
-
|
|
705
|
-
function pickSpaced(candidates, { maxCount = 3, minGap = 750 } = {}) {
|
|
706
|
-
const sorted = [...candidates].sort((a, b) => b.score - a.score);
|
|
707
|
-
const picked = [];
|
|
708
|
-
|
|
709
|
-
for (const row of sorted) {
|
|
710
|
-
const farEnough = picked.every((prev) => Math.abs(prev.y - row.y) >= minGap);
|
|
711
|
-
if (!farEnough) continue;
|
|
712
|
-
picked.push(row);
|
|
713
|
-
if (picked.length >= maxCount) break;
|
|
714
|
-
}
|
|
715
|
-
|
|
716
|
-
return picked.sort((a, b) => a.y - b.y);
|
|
717
|
-
}
|
|
718
|
-
|
|
719
|
-
export function buildHeuristicHighlights({
|
|
720
|
-
bins,
|
|
721
|
-
hotspots,
|
|
722
|
-
coreRange,
|
|
723
|
-
focusRange,
|
|
724
|
-
maxCount = 3,
|
|
725
|
-
}) {
|
|
726
|
-
const [coreStart, coreEnd] = Array.isArray(coreRange) ? coreRange : [0, 0];
|
|
727
|
-
const [focusStart, focusEnd] = Array.isArray(focusRange) ? focusRange : [coreStart, coreEnd];
|
|
728
|
-
const candidates = [];
|
|
729
|
-
|
|
730
|
-
for (const hotspot of (Array.isArray(hotspots) ? hotspots : [])) {
|
|
731
|
-
const y = Number(hotspot?.y);
|
|
732
|
-
if (!Number.isFinite(y) || y < focusStart || y > focusEnd) continue;
|
|
733
|
-
candidates.push({
|
|
734
|
-
y: Math.round(y),
|
|
735
|
-
reason: cleanText(hotspot?.reason) || '结构锚点',
|
|
736
|
-
from_chunk: 'dom_hotspot',
|
|
737
|
-
score: Number(hotspot?.weight ?? 5) * 10,
|
|
738
|
-
});
|
|
739
|
-
}
|
|
740
|
-
|
|
741
|
-
for (const [idx, bin] of (Array.isArray(bins) ? bins : []).entries()) {
|
|
742
|
-
const y = Number(bin?.y_center);
|
|
743
|
-
if (!Number.isFinite(y) || y < focusStart || y > focusEnd) continue;
|
|
744
|
-
const text = cleanText(bin?.text);
|
|
745
|
-
if (text.length < 16) continue;
|
|
746
|
-
const score = Math.min(100, Math.max(20, text.length));
|
|
747
|
-
candidates.push({
|
|
748
|
-
y: Math.round(y),
|
|
749
|
-
reason: `正文信息密集段(bin_${idx})`,
|
|
750
|
-
from_chunk: `bin_${idx}`,
|
|
751
|
-
score,
|
|
752
|
-
});
|
|
753
|
-
}
|
|
754
|
-
|
|
755
|
-
if (candidates.length === 0) {
|
|
756
|
-
return [{
|
|
757
|
-
y: midpoint([focusStart, focusEnd]),
|
|
758
|
-
reason: '核心区中点回退',
|
|
759
|
-
from_chunk: 'fallback_center',
|
|
760
|
-
}];
|
|
761
|
-
}
|
|
762
|
-
|
|
763
|
-
return pickSpaced(candidates, { maxCount, minGap: 750 }).map((row) => ({
|
|
764
|
-
y: row.y,
|
|
765
|
-
reason: row.reason,
|
|
766
|
-
from_chunk: row.from_chunk,
|
|
767
|
-
}));
|
|
768
|
-
}
|
|
769
|
-
|
|
770
|
-
export function computeFocusRange(coreRange) {
|
|
771
|
-
const [coreStart, coreEnd] = Array.isArray(coreRange) ? coreRange : [0, 0];
|
|
772
|
-
const span = Math.max(0, coreEnd - coreStart);
|
|
773
|
-
if (span <= 3000) return [coreStart, coreEnd];
|
|
774
|
-
return [coreStart, coreStart + Math.round(span * 0.7)];
|
|
775
|
-
}
|
|
776
|
-
|
|
777
|
-
export function buildRecruitmentSemanticSlots({
|
|
778
|
-
pageType = 'generic_article',
|
|
779
|
-
url = '',
|
|
780
|
-
meta = {},
|
|
781
|
-
bins = [],
|
|
782
|
-
hotspots = [],
|
|
783
|
-
coreRange = [0, 0],
|
|
784
|
-
focusRange = [0, 0],
|
|
785
|
-
} = {}) {
|
|
786
|
-
const sources = buildSources({
|
|
787
|
-
meta,
|
|
788
|
-
bins,
|
|
789
|
-
hotspots,
|
|
790
|
-
coreRange,
|
|
791
|
-
focusRange,
|
|
792
|
-
});
|
|
793
|
-
const combinedText = buildCombinedText(sources);
|
|
794
|
-
const recruitmentRelated = isRecruitmentPage({
|
|
795
|
-
pageType,
|
|
796
|
-
url,
|
|
797
|
-
combinedText,
|
|
798
|
-
});
|
|
799
|
-
|
|
800
|
-
const semanticSlots = {
|
|
801
|
-
company: extractCompanySlot(sources),
|
|
802
|
-
published_at: extractPublishedAtSlot(sources),
|
|
803
|
-
recruitment_type: extractRecruitmentTypeSlot(sources, { recruitmentRelated }),
|
|
804
|
-
cohort: extractCohortSlot(sources),
|
|
805
|
-
job_directions: extractJobDirectionsSlot(sources, combinedText),
|
|
806
|
-
locations: extractLocationsSlot(sources, combinedText),
|
|
807
|
-
target_or_requirements: extractTargetOrRequirementsSlot(sources),
|
|
808
|
-
process: extractProcessSlot(sources, combinedText),
|
|
809
|
-
entry_or_cta: extractEntryOrCtaSlot(sources, combinedText),
|
|
810
|
-
};
|
|
811
|
-
|
|
812
|
-
for (const key of RECRUITMENT_SLOT_KEYS) {
|
|
813
|
-
if (!semanticSlots[key]) semanticSlots[key] = createMissingSlot();
|
|
814
|
-
}
|
|
815
|
-
|
|
816
|
-
const modeHint = computeModeHint({
|
|
817
|
-
slots: semanticSlots,
|
|
818
|
-
recruitmentRelated,
|
|
819
|
-
});
|
|
820
|
-
|
|
821
|
-
return {
|
|
822
|
-
semantic_slots: semanticSlots,
|
|
823
|
-
mode_hint: modeHint.value,
|
|
824
|
-
mode_hint_confidence: modeHint.confidence,
|
|
825
|
-
};
|
|
826
|
-
}
|