@lightcone-ai/daemon 0.15.53 → 0.15.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +0,0 @@
1
- export { analyzePage, analyzePageFromHtmlFixture } from './analyze-page.js';
2
- export { SITE_CORE_SELECTORS, SEMANTIC_CORE_SELECTORS, resolveSiteSelectors } from './site-selectors.js';
3
- export {
4
- RECRUITMENT_SLOT_KEYS,
5
- MIN_BIN_CHAR_COUNT,
6
- MIN_TEXT_BIN_COUNT,
7
- chooseUnderstandingPath,
8
- hasEnoughTextBins,
9
- normalizePageUnderstanding,
10
- validatePageUnderstanding,
11
- } from './schema.js';
@@ -1,261 +0,0 @@
1
- const ANTHROPIC_API_URL = 'https://api.anthropic.com/v1/messages';
2
-
3
- function clampInt(value, min, max, fallback) {
4
- const n = Number(value);
5
- if (!Number.isFinite(n)) return fallback;
6
- return Math.max(min, Math.min(max, Math.round(n)));
7
- }
8
-
9
- function parseJsonMaybe(text) {
10
- try {
11
- return JSON.parse(text);
12
- } catch {
13
- return null;
14
- }
15
- }
16
-
17
- function extractFirstJsonObject(text) {
18
- const input = String(text ?? '').trim();
19
- if (!input) return null;
20
- const direct = parseJsonMaybe(input);
21
- if (direct && typeof direct === 'object') return direct;
22
-
23
- const first = input.indexOf('{');
24
- if (first < 0) return null;
25
-
26
- let depth = 0;
27
- for (let i = first; i < input.length; i += 1) {
28
- const ch = input[i];
29
- if (ch === '{') depth += 1;
30
- if (ch === '}') depth -= 1;
31
- if (depth !== 0) continue;
32
-
33
- const slice = input.slice(first, i + 1);
34
- const parsed = parseJsonMaybe(slice);
35
- if (parsed && typeof parsed === 'object') return parsed;
36
- break;
37
- }
38
-
39
- return null;
40
- }
41
-
42
- export function resolveAnthropicConfig(overrides = {}) {
43
- const apiKey = String(
44
- overrides.apiKey
45
- ?? process.env.PAGE_UNDERSTANDING_ANTHROPIC_API_KEY
46
- ?? process.env.ANTHROPIC_API_KEY
47
- ?? ''
48
- ).trim();
49
-
50
- return {
51
- apiKey,
52
- model: String(
53
- overrides.model
54
- ?? process.env.PAGE_UNDERSTANDING_ANTHROPIC_MODEL
55
- ?? 'claude-3-5-sonnet-20241022'
56
- ).trim(),
57
- maxTokens: clampInt(
58
- overrides.maxTokens ?? process.env.PAGE_UNDERSTANDING_ANTHROPIC_MAX_TOKENS,
59
- 128,
60
- 4096,
61
- 1024
62
- ),
63
- temperature: Number.isFinite(Number(overrides.temperature))
64
- ? Number(overrides.temperature)
65
- : 0.1,
66
- timeoutMs: clampInt(
67
- overrides.timeoutMs ?? process.env.PAGE_UNDERSTANDING_ANTHROPIC_TIMEOUT_MS,
68
- 5000,
69
- 120000,
70
- 45000
71
- ),
72
- };
73
- }
74
-
75
- export function isAnthropicConfigured(config = resolveAnthropicConfig()) {
76
- return Boolean(config.apiKey);
77
- }
78
-
79
- async function callAnthropic({
80
- system,
81
- userContent,
82
- config = resolveAnthropicConfig(),
83
- }) {
84
- if (!config.apiKey) {
85
- throw new Error('anthropic_api_key_missing');
86
- }
87
-
88
- const controller = new AbortController();
89
- const timer = setTimeout(() => controller.abort(), config.timeoutMs);
90
- try {
91
- const response = await fetch(ANTHROPIC_API_URL, {
92
- method: 'POST',
93
- headers: {
94
- 'x-api-key': config.apiKey,
95
- 'anthropic-version': '2023-06-01',
96
- 'content-type': 'application/json',
97
- },
98
- body: JSON.stringify({
99
- model: config.model,
100
- max_tokens: config.maxTokens,
101
- temperature: config.temperature,
102
- system,
103
- messages: [
104
- {
105
- role: 'user',
106
- content: userContent,
107
- },
108
- ],
109
- }),
110
- signal: controller.signal,
111
- });
112
-
113
- const text = await response.text();
114
- const payload = parseJsonMaybe(text) ?? { raw: text.slice(0, 1000) };
115
- if (!response.ok) {
116
- const details = payload?.error?.message ?? text.slice(0, 300);
117
- throw new Error(`anthropic_http_${response.status}:${details}`);
118
- }
119
-
120
- const blocks = Array.isArray(payload.content) ? payload.content : [];
121
- const answerText = blocks
122
- .filter((item) => item?.type === 'text')
123
- .map((item) => String(item.text ?? ''))
124
- .join('\n')
125
- .trim();
126
-
127
- const json = extractFirstJsonObject(answerText);
128
- if (!json) {
129
- throw new Error('anthropic_json_parse_failed');
130
- }
131
-
132
- return {
133
- model: payload.model ?? config.model,
134
- data: json,
135
- rawText: answerText,
136
- };
137
- } finally {
138
- clearTimeout(timer);
139
- }
140
- }
141
-
142
- function sanitizeHighlights(input, { fallbackChunk = 'unknown' } = {}) {
143
- const list = Array.isArray(input) ? input : [];
144
- return list
145
- .map((row) => ({
146
- y: Number.isFinite(Number(row?.y)) ? Math.round(Number(row.y)) : NaN,
147
- from_chunk: String(row?.from_chunk ?? row?.fromChunk ?? fallbackChunk).trim() || fallbackChunk,
148
- reason: String(row?.reason ?? '').trim(),
149
- }))
150
- .filter((row) => Number.isFinite(row.y))
151
- .slice(0, 3);
152
- }
153
-
154
- export async function runTextLlm({
155
- persona,
156
- structure,
157
- configOverrides = {},
158
- }) {
159
- const config = resolveAnthropicConfig(configOverrides);
160
- const bins = Array.isArray(structure?.bins) ? structure.bins : [];
161
- const compactBins = bins.slice(0, 24).map((bin) => ({
162
- y_center: Math.round(Number(bin?.y_center ?? 0)),
163
- text: String(bin?.text ?? '').slice(0, 220),
164
- }));
165
-
166
- const userPrompt = JSON.stringify({
167
- task: '阅读页面结构化文本 bin,输出 1-3 个 highlight',
168
- persona: String(persona ?? '').trim() || '通用用户',
169
- url: structure?.url ?? '',
170
- hostname: structure?.hostname ?? '',
171
- core_y_range: structure?.core_y_range ?? [],
172
- focus_y_range: structure?.focus_y_range ?? [],
173
- meta: structure?.meta ?? {},
174
- bins: compactBins,
175
- output_schema: {
176
- core_message: 'string',
177
- highlights: [{ y: 1200, from_chunk: 'bin_3', reason: 'string' }],
178
- },
179
- }, null, 2);
180
-
181
- const result = await callAnthropic({
182
- system: '你是页面理解助手。必须返回 JSON,不要返回额外解释。',
183
- userContent: [{ type: 'text', text: userPrompt }],
184
- config,
185
- });
186
-
187
- return {
188
- core_message: String(result.data?.core_message ?? '').trim(),
189
- highlights: sanitizeHighlights(result.data?.highlights, { fallbackChunk: 'bin' }),
190
- model_used: result.model,
191
- };
192
- }
193
-
194
- export async function runVisionLlm({
195
- persona,
196
- structure,
197
- chunks,
198
- configOverrides = {},
199
- }) {
200
- const config = resolveAnthropicConfig(configOverrides);
201
- const content = [];
202
-
203
- for (const chunk of chunks) {
204
- content.push({
205
- type: 'image',
206
- source: {
207
- type: 'base64',
208
- media_type: 'image/png',
209
- data: chunk.image_base64,
210
- },
211
- });
212
- }
213
-
214
- const prompt = JSON.stringify({
215
- task: '从页面截图中为短视频挑选 1-3 个 highlights',
216
- persona: String(persona ?? '').trim() || '通用用户',
217
- url: structure?.url ?? '',
218
- hostname: structure?.hostname ?? '',
219
- core_y_range: structure?.core_y_range ?? [],
220
- focus_y_range: structure?.focus_y_range ?? [],
221
- candidate_pool: Array.isArray(structure?.hotspots)
222
- ? structure.hotspots.slice(0, 24).map((item) => ({
223
- y: item.y,
224
- type: item.type,
225
- reason: item.reason,
226
- }))
227
- : [],
228
- chunks: chunks.map((chunk) => ({
229
- chunk_id: chunk.id,
230
- y_start: chunk.y_start,
231
- y_end: chunk.y_end,
232
- })),
233
- constraints: [
234
- '只返回 JSON',
235
- 'highlights 数量 1-3',
236
- 'y 必须在 focus_y_range 内',
237
- '优先 candidate_pool',
238
- ],
239
- output_schema: {
240
- core_message: 'string',
241
- highlights: [{ y: 1200, from_chunk: 'chunk_1', reason: 'string' }],
242
- },
243
- }, null, 2);
244
-
245
- content.push({
246
- type: 'text',
247
- text: prompt,
248
- });
249
-
250
- const result = await callAnthropic({
251
- system: '你是页面视觉理解助手。必须输出 JSON。',
252
- userContent: content,
253
- config,
254
- });
255
-
256
- return {
257
- core_message: String(result.data?.core_message ?? '').trim(),
258
- highlights: sanitizeHighlights(result.data?.highlights, { fallbackChunk: 'chunk' }),
259
- model_used: result.model,
260
- };
261
- }
@@ -1,254 +0,0 @@
1
- export const MIN_TEXT_BIN_COUNT = 10;
2
- export const MIN_BIN_CHAR_COUNT = 30;
3
- export const RECRUITMENT_SLOT_KEYS = Object.freeze([
4
- 'company',
5
- 'published_at',
6
- 'recruitment_type',
7
- 'cohort',
8
- 'job_directions',
9
- 'locations',
10
- 'target_or_requirements',
11
- 'process',
12
- 'entry_or_cta',
13
- ]);
14
-
15
- const SLOT_SOURCE_TYPE_SET = new Set(['title', 'meta', 'body_text', 'image_ocr', 'heuristic']);
16
- const SLOT_STATUS_SET = new Set(['present', 'missing']);
17
- const MODE_HINT_SET = new Set([
18
- 'job_intel_broadcast',
19
- 'job_alert',
20
- 'info_summary',
21
- 'refuse_auto_broadcast',
22
- ]);
23
-
24
- function clampInt(value, min, max, fallback) {
25
- const n = Number(value);
26
- if (!Number.isFinite(n)) return fallback;
27
- return Math.max(min, Math.min(max, Math.round(n)));
28
- }
29
-
30
- function normalizeRange(input, { floor = 0, ceil = Number.MAX_SAFE_INTEGER, fallback = [0, 0] } = {}) {
31
- const raw = Array.isArray(input) ? input : fallback;
32
- const start = clampInt(raw[0], floor, ceil, fallback[0]);
33
- const end = clampInt(raw[1], floor, ceil, fallback[1]);
34
- if (end < start) return [start, start];
35
- return [start, end];
36
- }
37
-
38
- function normalizeDurationRange(input, fallback = [35, 55]) {
39
- const range = normalizeRange(input, { floor: 5, ceil: 240, fallback });
40
- if (range[1] <= range[0]) return [range[0], range[0] + 5];
41
- return range;
42
- }
43
-
44
- function normalizeConfidence(value, fallback = 0) {
45
- const n = Number(value);
46
- if (!Number.isFinite(n)) return fallback;
47
- return Number(Math.max(0, Math.min(1, n)).toFixed(2));
48
- }
49
-
50
- function normalizeSlotValue(value) {
51
- if (Array.isArray(value)) {
52
- const seen = new Set();
53
- const output = [];
54
- for (const row of value) {
55
- const text = String(row ?? '').trim();
56
- if (!text) continue;
57
- const key = text.toLowerCase();
58
- if (seen.has(key)) continue;
59
- seen.add(key);
60
- output.push(text);
61
- }
62
- return output.length > 0 ? output : null;
63
- }
64
-
65
- const text = String(value ?? '').trim();
66
- return text || null;
67
- }
68
-
69
- function normalizeSourceType(value) {
70
- const sourceType = String(value ?? '').trim();
71
- if (SLOT_SOURCE_TYPE_SET.has(sourceType)) return sourceType;
72
- return 'heuristic';
73
- }
74
-
75
- function normalizeModeHint(value) {
76
- const modeHint = String(value ?? '').trim();
77
- if (MODE_HINT_SET.has(modeHint)) return modeHint;
78
- return 'refuse_auto_broadcast';
79
- }
80
-
81
- function normalizeSlot(row, { totalHeight, fallbackSourceType = 'heuristic' } = {}) {
82
- const sourceType = normalizeSourceType(row?.source_type ?? fallbackSourceType);
83
- const value = normalizeSlotValue(row?.value);
84
- const focusRegion = row?.focus_region == null
85
- ? null
86
- : normalizeRange(row?.focus_region, { floor: 0, ceil: totalHeight, fallback: [0, 0] });
87
- const hasFocus = Array.isArray(focusRegion) && focusRegion[1] > focusRegion[0];
88
- const status = SLOT_STATUS_SET.has(String(row?.status ?? '').trim())
89
- ? String(row?.status ?? '').trim()
90
- : (value == null ? 'missing' : 'present');
91
-
92
- return {
93
- value,
94
- source_type: sourceType,
95
- confidence: normalizeConfidence(row?.confidence, 0),
96
- focus_region: hasFocus ? focusRegion : null,
97
- status,
98
- };
99
- }
100
-
101
- function normalizeSemanticSlots(input, { totalHeight }) {
102
- const source = input && typeof input === 'object' && !Array.isArray(input) ? input : {};
103
- const slots = {};
104
-
105
- for (const key of RECRUITMENT_SLOT_KEYS) {
106
- slots[key] = normalizeSlot(source[key], { totalHeight, fallbackSourceType: 'heuristic' });
107
- }
108
-
109
- return slots;
110
- }
111
-
112
- export function hasEnoughTextBins(
113
- bins,
114
- {
115
- minCount = MIN_TEXT_BIN_COUNT,
116
- minChars = MIN_BIN_CHAR_COUNT,
117
- } = {}
118
- ) {
119
- if (!Array.isArray(bins)) return false;
120
- const qualified = bins.filter((bin) => String(bin?.text ?? '').trim().length > minChars);
121
- return qualified.length >= minCount;
122
- }
123
-
124
- export function chooseUnderstandingPath(structure, options = {}) {
125
- return hasEnoughTextBins(structure?.bins, options) ? 'text' : 'vision';
126
- }
127
-
128
- function normalizeHighlights(input, { focusRange, fallbackReason = 'heuristic' } = {}) {
129
- const list = Array.isArray(input) ? input : [];
130
- const [focusStart, focusEnd] = normalizeRange(focusRange, { fallback: [0, 0] });
131
- const output = [];
132
-
133
- for (const row of list) {
134
- const y = clampInt(row?.y, focusStart, focusEnd, NaN);
135
- if (!Number.isFinite(y)) continue;
136
- output.push({
137
- y,
138
- from_chunk: String(row?.from_chunk ?? row?.fromChunk ?? '').trim() || 'unknown',
139
- reason: String(row?.reason ?? '').trim() || fallbackReason,
140
- });
141
- if (output.length >= 3) break;
142
- }
143
-
144
- return output;
145
- }
146
-
147
- function normalizeSkipZones(input, { totalHeight, coreRange }) {
148
- const provided = Array.isArray(input) ? input : [];
149
- const [coreStart, coreEnd] = normalizeRange(coreRange, { floor: 0, ceil: totalHeight, fallback: [0, totalHeight] });
150
-
151
- const fallback = [];
152
- if (coreStart > 0) {
153
- fallback.push({ y_range: [0, coreStart], reason: 'non_core_top' });
154
- }
155
- if (coreEnd < totalHeight) {
156
- fallback.push({ y_range: [coreEnd, totalHeight], reason: 'non_core_bottom' });
157
- }
158
-
159
- const merged = provided.length > 0 ? provided : fallback;
160
- return merged
161
- .map((zone) => ({
162
- y_range: normalizeRange(zone?.y_range, { floor: 0, ceil: totalHeight, fallback: [0, 0] }),
163
- reason: String(zone?.reason ?? '').trim() || 'non_core',
164
- }))
165
- .filter((zone) => zone.y_range[1] > zone.y_range[0]);
166
- }
167
-
168
- export function normalizePageUnderstanding(raw = {}) {
169
- const totalHeight = clampInt(raw.total_height, 100, 200000, 1920);
170
- const coreRange = normalizeRange(raw.core_y_range, { floor: 0, ceil: totalHeight, fallback: [0, totalHeight] });
171
- const focusRange = normalizeRange(raw.focus_y_range, { floor: coreRange[0], ceil: totalHeight, fallback: coreRange });
172
- const highlights = normalizeHighlights(raw.candidate_hotspots, { focusRange, fallbackReason: 'candidate' });
173
- const semanticSlots = normalizeSemanticSlots(raw.semantic_slots, { totalHeight });
174
- const modeHint = normalizeModeHint(raw.mode_hint);
175
- const modeHintConfidence = normalizeConfidence(raw.mode_hint_confidence, 0.2);
176
-
177
- return {
178
- url: String(raw.url ?? '').trim(),
179
- page_type: String(raw.page_type ?? 'generic_article').trim() || 'generic_article',
180
- hostname: String(raw.hostname ?? '').trim(),
181
- core_message: String(raw.core_message ?? '').trim(),
182
- core_y_range: coreRange,
183
- focus_y_range: focusRange,
184
- total_height: totalHeight,
185
- candidate_hotspots: highlights,
186
- skip_zones: normalizeSkipZones(raw.skip_zones, { totalHeight, coreRange }),
187
- recommended_duration_s: normalizeDurationRange(raw.recommended_duration_s),
188
- semantic_slots: semanticSlots,
189
- mode_hint: modeHint,
190
- mode_hint_confidence: modeHintConfidence,
191
- meta: raw.meta && typeof raw.meta === 'object' && !Array.isArray(raw.meta)
192
- ? raw.meta
193
- : {},
194
- };
195
- }
196
-
197
- function isSlotValueValid(value) {
198
- if (value == null) return true;
199
- if (Array.isArray(value)) return value.every(item => typeof item === 'string');
200
- return typeof value === 'string';
201
- }
202
-
203
- export function validatePageUnderstanding(payload) {
204
- const errors = [];
205
- const model = payload ?? {};
206
-
207
- if (!model.url || typeof model.url !== 'string') errors.push('url required');
208
- if (!model.hostname || typeof model.hostname !== 'string') errors.push('hostname required');
209
- if (!Array.isArray(model.core_y_range) || model.core_y_range.length !== 2) errors.push('core_y_range invalid');
210
- if (!Array.isArray(model.focus_y_range) || model.focus_y_range.length !== 2) errors.push('focus_y_range invalid');
211
- if (!Number.isFinite(model.total_height) || model.total_height <= 0) errors.push('total_height invalid');
212
- if (!Array.isArray(model.candidate_hotspots)) errors.push('candidate_hotspots invalid');
213
- if (!Array.isArray(model.skip_zones)) errors.push('skip_zones invalid');
214
- if (!Array.isArray(model.recommended_duration_s) || model.recommended_duration_s.length !== 2) {
215
- errors.push('recommended_duration_s invalid');
216
- }
217
- if (!model.semantic_slots || typeof model.semantic_slots !== 'object' || Array.isArray(model.semantic_slots)) {
218
- errors.push('semantic_slots invalid');
219
- } else {
220
- for (const key of RECRUITMENT_SLOT_KEYS) {
221
- const slot = model.semantic_slots[key];
222
- if (!slot || typeof slot !== 'object' || Array.isArray(slot)) {
223
- errors.push(`semantic_slots.${key} invalid`);
224
- continue;
225
- }
226
- if (!isSlotValueValid(slot.value)) errors.push(`semantic_slots.${key}.value invalid`);
227
- if (!SLOT_SOURCE_TYPE_SET.has(String(slot.source_type ?? '').trim())) {
228
- errors.push(`semantic_slots.${key}.source_type invalid`);
229
- }
230
- if (!Number.isFinite(Number(slot.confidence))) {
231
- errors.push(`semantic_slots.${key}.confidence invalid`);
232
- }
233
- if (!SLOT_STATUS_SET.has(String(slot.status ?? '').trim())) {
234
- errors.push(`semantic_slots.${key}.status invalid`);
235
- }
236
- if (slot.focus_region != null) {
237
- if (!Array.isArray(slot.focus_region) || slot.focus_region.length !== 2) {
238
- errors.push(`semantic_slots.${key}.focus_region invalid`);
239
- }
240
- }
241
- }
242
- }
243
- if (!MODE_HINT_SET.has(String(model.mode_hint ?? '').trim())) {
244
- errors.push('mode_hint invalid');
245
- }
246
- if (!Number.isFinite(Number(model.mode_hint_confidence))) {
247
- errors.push('mode_hint_confidence invalid');
248
- }
249
-
250
- return {
251
- ok: errors.length === 0,
252
- errors,
253
- };
254
- }
@@ -1,47 +0,0 @@
1
- export const SITE_CORE_SELECTORS = Object.freeze({
2
- 'mp.weixin.qq.com': Object.freeze([
3
- '#js_content',
4
- '#img-content',
5
- 'article',
6
- 'main',
7
- ]),
8
- 'zhuanlan.zhihu.com': Object.freeze([
9
- '.Post-RichTextContainer',
10
- '.RichText.ztext',
11
- 'article',
12
- 'main',
13
- ]),
14
- 'jianshu.com': Object.freeze([
15
- 'article',
16
- '.note .post',
17
- '.article',
18
- 'main',
19
- ]),
20
- 'juejin.cn': Object.freeze([
21
- 'article',
22
- '.article-content',
23
- '.main-area article',
24
- 'main',
25
- ]),
26
- });
27
-
28
- export const SEMANTIC_CORE_SELECTORS = Object.freeze([
29
- 'article',
30
- 'main',
31
- '[role="main"]',
32
- ]);
33
-
34
- export function resolveSiteSelectors(hostname) {
35
- const normalizedHost = String(hostname ?? '').trim().toLowerCase();
36
- if (!normalizedHost) return null;
37
-
38
- for (const [domain, selectors] of Object.entries(SITE_CORE_SELECTORS)) {
39
- if (!normalizedHost.includes(domain)) continue;
40
- return {
41
- domain,
42
- selectors: [...selectors],
43
- };
44
- }
45
-
46
- return null;
47
- }