@tikomni/skills 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/creator-analysis/SKILL.md +34 -10
- package/skills/creator-analysis/references/contracts/creator-card-fields.md +2 -0
- package/skills/creator-analysis/references/contracts/work-card-fields.md +40 -4
- package/skills/creator-analysis/references/platform-guides/douyin.md +41 -36
- package/skills/creator-analysis/references/platform-guides/generic.md +11 -7
- package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +45 -30
- package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +224 -95
- package/skills/creator-analysis/references/workflow.md +8 -3
- package/skills/creator-analysis/scripts/author_home/adapters/platform_adapters.py +205 -21
- package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +54 -11
- package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +200 -13
- package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +113 -42
- package/skills/creator-analysis/scripts/author_home/asr/home_asr.py +65 -7
- package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +82 -18
- package/skills/creator-analysis/scripts/author_home/collectors/homepage_collectors.py +198 -32
- package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +374 -31
- package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +68 -12
- package/skills/creator-analysis/scripts/core/storage_router.py +3 -0
- package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +3 -2
- package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +314 -137
|
@@ -10,9 +10,13 @@ import subprocess
|
|
|
10
10
|
from typing import Any, Dict, List, Tuple
|
|
11
11
|
|
|
12
12
|
from scripts.author_home.analyzers.author_analysis_v2_support import (
|
|
13
|
+
AnalysisResourceError,
|
|
14
|
+
OUTPUT_SCHEMA_PATH,
|
|
15
|
+
PROMPT_CONTRACT_PATH,
|
|
13
16
|
build_author_analysis_input_v1,
|
|
14
17
|
build_fallback_author_analysis_v2,
|
|
15
18
|
derive_legacy_summary,
|
|
19
|
+
prepare_author_analysis_bundle,
|
|
16
20
|
prompt_contract_text,
|
|
17
21
|
validate_author_analysis_v2,
|
|
18
22
|
)
|
|
@@ -106,8 +110,7 @@ def _compact_analysis_input_for_prompt(analysis_input: Dict[str, Any]) -> Dict[s
|
|
|
106
110
|
|
|
107
111
|
|
|
108
112
|
|
|
109
|
-
def _build_prompt(analysis_input: Dict[str, Any], sampled_work_explanations: Dict[str, Any]) -> str:
|
|
110
|
-
contract_prompt = prompt_contract_text()
|
|
113
|
+
def _build_prompt(analysis_input: Dict[str, Any], sampled_work_explanations: Dict[str, Any], *, contract_prompt: str) -> str:
|
|
111
114
|
prompt_input = _compact_analysis_input_for_prompt(analysis_input)
|
|
112
115
|
prompt_payload = {"author_analysis_input_v1": prompt_input}
|
|
113
116
|
if isinstance(sampled_work_explanations, dict) and sampled_work_explanations.get("sampled_work_explanations"):
|
|
@@ -126,6 +129,17 @@ def _build_prompt(analysis_input: Dict[str, Any], sampled_work_explanations: Dic
|
|
|
126
129
|
)
|
|
127
130
|
|
|
128
131
|
|
|
132
|
+
def _stage_status(*, status: str, ok_count: int, failed_count: int, degraded_count: int, reason_codes: List[str], failure_kind: str = "") -> Dict[str, Any]:
|
|
133
|
+
return {
|
|
134
|
+
"status": status,
|
|
135
|
+
"ok_count": ok_count,
|
|
136
|
+
"failed_count": failed_count,
|
|
137
|
+
"degraded_count": degraded_count,
|
|
138
|
+
"reason_codes": list(dict.fromkeys([code for code in reason_codes if code])),
|
|
139
|
+
"failure_kind": failure_kind or None,
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
|
|
129
143
|
def _extract_json_block(text: str) -> Dict[str, Any]:
|
|
130
144
|
content = (text or "").strip()
|
|
131
145
|
if not content:
|
|
@@ -159,9 +173,29 @@ def _unwrap_author_analysis(payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
159
173
|
return payload
|
|
160
174
|
|
|
161
175
|
|
|
162
|
-
def run_prompt_first_author_analysis(
|
|
163
|
-
|
|
164
|
-
|
|
176
|
+
def run_prompt_first_author_analysis(
|
|
177
|
+
profile: Dict[str, Any],
|
|
178
|
+
works: List[Dict[str, Any]],
|
|
179
|
+
*,
|
|
180
|
+
analysis_bundle: Dict[str, Any] | None = None,
|
|
181
|
+
) -> Tuple[Dict[str, Any], List[Dict[str, str]], List[Dict[str, Any]]]:
|
|
182
|
+
prepared = analysis_bundle if isinstance(analysis_bundle, dict) else prepare_author_analysis_bundle(
|
|
183
|
+
profile=profile,
|
|
184
|
+
works=works,
|
|
185
|
+
platform=str(profile.get("platform") or "unknown"),
|
|
186
|
+
)
|
|
187
|
+
analysis_input = prepared.get("analysis_input") if isinstance(prepared.get("analysis_input"), dict) else {}
|
|
188
|
+
input_errors: List[Dict[str, str]] = []
|
|
189
|
+
input_resource_error: AnalysisResourceError | None = None
|
|
190
|
+
try:
|
|
191
|
+
input_errors = build_author_analysis_input_v1(
|
|
192
|
+
profile=profile,
|
|
193
|
+
works=works,
|
|
194
|
+
platform=str(profile.get("platform") or "unknown"),
|
|
195
|
+
)[1]
|
|
196
|
+
except AnalysisResourceError as error:
|
|
197
|
+
input_resource_error = error
|
|
198
|
+
sampled_work_explanations, sampled_explanation_errors, sampled_explanation_trace, sampled_explanations_status = run_sampled_work_batch_explanations(analysis_input)
|
|
165
199
|
sampled_works_count = len(analysis_input.get("sampled_works") or [])
|
|
166
200
|
total_works = ((analysis_input.get("aggregate_stats") or {}).get("total_works") if isinstance(analysis_input.get("aggregate_stats"), dict) else 0)
|
|
167
201
|
llm_timeout_sec = max(int(os.getenv("TIKOMNI_AUTHOR_ANALYSIS_TIMEOUT_SEC", str(DEFAULT_ANALYSIS_TIMEOUT_SEC))), 5)
|
|
@@ -173,20 +207,46 @@ def run_prompt_first_author_analysis(profile: Dict[str, Any], works: List[Dict[s
|
|
|
173
207
|
"total_works": total_works,
|
|
174
208
|
"sampled_works_count": sampled_works_count,
|
|
175
209
|
"prompt_contract": f"prompt-contracts/{AUTHOR_ANALYSIS_PROMPT_FILE}@v1",
|
|
210
|
+
"contract_path": str(PROMPT_CONTRACT_PATH),
|
|
211
|
+
"schema_path": str(OUTPUT_SCHEMA_PATH),
|
|
176
212
|
"llm_timeout_sec": llm_timeout_sec,
|
|
177
213
|
"small_sample_skip_threshold": small_sample_skip_threshold,
|
|
178
214
|
}
|
|
179
215
|
] + sampled_explanation_trace
|
|
180
216
|
if input_errors:
|
|
181
217
|
trace.append({"step": "analysis.input_validation_failed", "error_count": len(input_errors)})
|
|
218
|
+
if input_resource_error is not None:
|
|
219
|
+
trace.append({"step": "analysis.input_resource_error", "error": str(input_resource_error)})
|
|
220
|
+
result = {
|
|
221
|
+
**derive_legacy_summary({}, analysis_input=analysis_input, validation_errors=[]),
|
|
222
|
+
"author_analysis_v2": {},
|
|
223
|
+
"author_analysis_input_v1": analysis_input,
|
|
224
|
+
"sampled_work_explanations": sampled_work_explanations,
|
|
225
|
+
"sampled_explanations_status": sampled_explanations_status,
|
|
226
|
+
"author_analysis_status": _stage_status(
|
|
227
|
+
status="failed",
|
|
228
|
+
ok_count=0,
|
|
229
|
+
failed_count=1,
|
|
230
|
+
degraded_count=0,
|
|
231
|
+
reason_codes=[input_resource_error.code],
|
|
232
|
+
failure_kind="configuration",
|
|
233
|
+
),
|
|
234
|
+
"quality_tier": "failed",
|
|
235
|
+
"validation": {
|
|
236
|
+
"ok": False,
|
|
237
|
+
"errors": [],
|
|
238
|
+
},
|
|
239
|
+
}
|
|
240
|
+
return result, [], trace
|
|
182
241
|
if sampled_explanation_errors:
|
|
183
242
|
trace.append({"step": "analysis.sampled_work_explanations_validation_failed", "error_count": len(sampled_explanation_errors)})
|
|
184
243
|
|
|
185
|
-
prompt = _build_prompt(analysis_input, sampled_work_explanations)
|
|
186
244
|
response_text = ""
|
|
187
245
|
analysis_v2: Dict[str, Any] = {}
|
|
188
246
|
llm_ok = False
|
|
189
247
|
skip_llm = sampled_works_count < small_sample_skip_threshold
|
|
248
|
+
author_reason_codes: List[str] = []
|
|
249
|
+
author_status = _stage_status(status="failed", ok_count=0, failed_count=1, degraded_count=0, reason_codes=["analysis_not_started"])
|
|
190
250
|
if skip_llm:
|
|
191
251
|
trace.append(
|
|
192
252
|
{
|
|
@@ -196,7 +256,42 @@ def run_prompt_first_author_analysis(profile: Dict[str, Any], works: List[Dict[s
|
|
|
196
256
|
"threshold": small_sample_skip_threshold,
|
|
197
257
|
}
|
|
198
258
|
)
|
|
259
|
+
author_reason_codes.append("small_sample_below_threshold")
|
|
199
260
|
else:
|
|
261
|
+
try:
|
|
262
|
+
contract_prompt = prompt_contract_text()
|
|
263
|
+
trace.append(
|
|
264
|
+
{
|
|
265
|
+
"step": "analysis.resources_loaded",
|
|
266
|
+
"contract_loaded": True,
|
|
267
|
+
"contract_chars": len(contract_prompt),
|
|
268
|
+
}
|
|
269
|
+
)
|
|
270
|
+
prompt = _build_prompt(analysis_input, sampled_work_explanations, contract_prompt=contract_prompt)
|
|
271
|
+
except AnalysisResourceError as error:
|
|
272
|
+
trace.append({"step": "analysis.resource_error", "error": str(error)})
|
|
273
|
+
author_status = _stage_status(
|
|
274
|
+
status="failed",
|
|
275
|
+
ok_count=0,
|
|
276
|
+
failed_count=1,
|
|
277
|
+
degraded_count=0,
|
|
278
|
+
reason_codes=[error.code],
|
|
279
|
+
failure_kind="configuration",
|
|
280
|
+
)
|
|
281
|
+
result = {
|
|
282
|
+
**derive_legacy_summary({}, analysis_input=analysis_input, validation_errors=input_errors),
|
|
283
|
+
"author_analysis_v2": {},
|
|
284
|
+
"author_analysis_input_v1": analysis_input,
|
|
285
|
+
"sampled_work_explanations": sampled_work_explanations,
|
|
286
|
+
"sampled_explanations_status": sampled_explanations_status,
|
|
287
|
+
"author_analysis_status": author_status,
|
|
288
|
+
"quality_tier": "failed",
|
|
289
|
+
"validation": {
|
|
290
|
+
"ok": False,
|
|
291
|
+
"errors": input_errors,
|
|
292
|
+
},
|
|
293
|
+
}
|
|
294
|
+
return result, input_errors, trace
|
|
200
295
|
try:
|
|
201
296
|
run = subprocess.run(
|
|
202
297
|
["openclaw", "agent", "--agent", "main", "--message", prompt, "--json"],
|
|
@@ -226,32 +321,124 @@ def run_prompt_first_author_analysis(profile: Dict[str, Any], works: List[Dict[s
|
|
|
226
321
|
)
|
|
227
322
|
except Exception as error:
|
|
228
323
|
trace.append({"step": "analysis.llm_error", "error": f"{type(error).__name__}:{error}"})
|
|
324
|
+
author_reason_codes.append("author_llm_runtime_error")
|
|
325
|
+
|
|
326
|
+
validation_errors: List[Dict[str, str]] = []
|
|
327
|
+
resource_error: AnalysisResourceError | None = None
|
|
328
|
+
if analysis_v2:
|
|
329
|
+
try:
|
|
330
|
+
validation_errors = validate_author_analysis_v2(analysis_v2, analysis_input=analysis_input)
|
|
331
|
+
trace.append({"step": "analysis.output_schema_loaded", "schema_loaded": True})
|
|
332
|
+
except AnalysisResourceError as error:
|
|
333
|
+
resource_error = error
|
|
334
|
+
|
|
335
|
+
if resource_error is not None:
|
|
336
|
+
trace.append(
|
|
337
|
+
{
|
|
338
|
+
"step": "analysis.resource_error",
|
|
339
|
+
"error": str(resource_error),
|
|
340
|
+
"contract_path": str(resource_error.path) if resource_error.code == "contract_load_failed" else str(resource_error.path),
|
|
341
|
+
}
|
|
342
|
+
)
|
|
343
|
+
author_status = _stage_status(
|
|
344
|
+
status="failed",
|
|
345
|
+
ok_count=0,
|
|
346
|
+
failed_count=1,
|
|
347
|
+
degraded_count=0,
|
|
348
|
+
reason_codes=[resource_error.code],
|
|
349
|
+
failure_kind="configuration",
|
|
350
|
+
)
|
|
351
|
+
result = {
|
|
352
|
+
**derive_legacy_summary({}, analysis_input=analysis_input, validation_errors=input_errors),
|
|
353
|
+
"author_analysis_v2": {},
|
|
354
|
+
"author_analysis_input_v1": analysis_input,
|
|
355
|
+
"sampled_work_explanations": sampled_work_explanations,
|
|
356
|
+
"sampled_explanations_status": sampled_explanations_status,
|
|
357
|
+
"author_analysis_status": author_status,
|
|
358
|
+
"quality_tier": "failed",
|
|
359
|
+
"validation": {
|
|
360
|
+
"ok": False,
|
|
361
|
+
"errors": input_errors,
|
|
362
|
+
},
|
|
363
|
+
}
|
|
364
|
+
return result, input_errors, trace
|
|
365
|
+
|
|
366
|
+
if not analysis_v2 or validation_errors or skip_llm:
|
|
367
|
+
try:
|
|
368
|
+
fallback = build_fallback_author_analysis_v2(analysis_input)
|
|
369
|
+
fallback_errors = validate_author_analysis_v2(fallback, analysis_input=analysis_input)
|
|
370
|
+
except AnalysisResourceError as error:
|
|
371
|
+
trace.append({"step": "analysis.fallback_resource_error", "error": str(error)})
|
|
372
|
+
author_status = _stage_status(
|
|
373
|
+
status="failed",
|
|
374
|
+
ok_count=0,
|
|
375
|
+
failed_count=1,
|
|
376
|
+
degraded_count=0,
|
|
377
|
+
reason_codes=[error.code],
|
|
378
|
+
failure_kind="configuration",
|
|
379
|
+
)
|
|
380
|
+
result = {
|
|
381
|
+
**derive_legacy_summary({}, analysis_input=analysis_input, validation_errors=input_errors),
|
|
382
|
+
"author_analysis_v2": {},
|
|
383
|
+
"author_analysis_input_v1": analysis_input,
|
|
384
|
+
"sampled_work_explanations": sampled_work_explanations,
|
|
385
|
+
"sampled_explanations_status": sampled_explanations_status,
|
|
386
|
+
"author_analysis_status": author_status,
|
|
387
|
+
"quality_tier": "failed",
|
|
388
|
+
"validation": {
|
|
389
|
+
"ok": False,
|
|
390
|
+
"errors": input_errors,
|
|
391
|
+
},
|
|
392
|
+
}
|
|
393
|
+
return result, input_errors, trace
|
|
229
394
|
|
|
230
|
-
validation_errors = validate_author_analysis_v2(analysis_v2, analysis_input=analysis_input) if analysis_v2 else []
|
|
231
|
-
if not analysis_v2 or validation_errors:
|
|
232
|
-
fallback = build_fallback_author_analysis_v2(analysis_input)
|
|
233
|
-
fallback_errors = validate_author_analysis_v2(fallback, analysis_input=analysis_input)
|
|
234
395
|
trace.append(
|
|
235
396
|
{
|
|
236
397
|
"step": "analysis.fallback_used",
|
|
237
|
-
"reason": "llm_empty_or_validation_failed",
|
|
398
|
+
"reason": "small_sample_below_threshold" if skip_llm else "llm_empty_or_validation_failed",
|
|
238
399
|
"llm_ok": llm_ok,
|
|
239
400
|
"validation_error_count": len(validation_errors),
|
|
240
401
|
"fallback_error_count": len(fallback_errors),
|
|
241
402
|
}
|
|
242
403
|
)
|
|
243
404
|
analysis_v2 = fallback
|
|
244
|
-
validation_errors = input_errors +
|
|
405
|
+
validation_errors = input_errors + validation_errors + fallback_errors
|
|
406
|
+
author_status = _stage_status(
|
|
407
|
+
status="fallback",
|
|
408
|
+
ok_count=1 if analysis_v2 else 0,
|
|
409
|
+
failed_count=0 if analysis_v2 else 1,
|
|
410
|
+
degraded_count=0,
|
|
411
|
+
reason_codes=author_reason_codes or ["fallback_used"],
|
|
412
|
+
failure_kind="runtime",
|
|
413
|
+
)
|
|
245
414
|
else:
|
|
246
|
-
validation_errors = input_errors +
|
|
415
|
+
validation_errors = input_errors + validation_errors
|
|
247
416
|
trace.append({"step": "analysis.schema_validation_passed"})
|
|
417
|
+
author_status = _stage_status(
|
|
418
|
+
status="full",
|
|
419
|
+
ok_count=1,
|
|
420
|
+
failed_count=0,
|
|
421
|
+
degraded_count=0,
|
|
422
|
+
reason_codes=[],
|
|
423
|
+
)
|
|
248
424
|
|
|
249
425
|
legacy = derive_legacy_summary(analysis_v2, analysis_input=analysis_input, validation_errors=validation_errors)
|
|
426
|
+
if author_status.get("status") == "failed":
|
|
427
|
+
quality_tier = "failed"
|
|
428
|
+
elif author_status.get("status") == "fallback":
|
|
429
|
+
quality_tier = "fallback"
|
|
430
|
+
elif sampled_explanations_status.get("status") != "full":
|
|
431
|
+
quality_tier = "degraded_author_only"
|
|
432
|
+
else:
|
|
433
|
+
quality_tier = "full"
|
|
250
434
|
result = {
|
|
251
435
|
**legacy,
|
|
252
436
|
"author_analysis_v2": analysis_v2,
|
|
253
437
|
"author_analysis_input_v1": analysis_input,
|
|
254
438
|
"sampled_work_explanations": sampled_work_explanations,
|
|
439
|
+
"sampled_explanations_status": sampled_explanations_status,
|
|
440
|
+
"author_analysis_status": author_status,
|
|
441
|
+
"quality_tier": quality_tier,
|
|
255
442
|
"validation": {
|
|
256
443
|
"ok": not bool(validation_errors),
|
|
257
444
|
"errors": validation_errors,
|
package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py
CHANGED
|
@@ -13,8 +13,9 @@ from typing import Any, Dict, List, Tuple
|
|
|
13
13
|
import jsonschema
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
|
|
16
|
+
SKILL_ROOT = Path(__file__).resolve().parents[3]
|
|
17
|
+
PROMPT_CONTRACT_PATH = SKILL_ROOT / "references" / "prompt-contracts" / "sampled-work-batch-explanations.md"
|
|
18
|
+
SCHEMA_PATH = SKILL_ROOT / "references" / "schemas" / "sampled-work-batch-explanations.schema.json"
|
|
18
19
|
DEFAULT_TIMEOUT_SEC = 45
|
|
19
20
|
TEXT_LIMITS = {
|
|
20
21
|
"title": 120,
|
|
@@ -24,6 +25,17 @@ TEXT_LIMITS = {
|
|
|
24
25
|
}
|
|
25
26
|
|
|
26
27
|
|
|
28
|
+
class SampledExplanationResourceError(RuntimeError):
|
|
29
|
+
def __init__(self, *, code: str, path: Path, detail: str = "") -> None:
|
|
30
|
+
self.code = code
|
|
31
|
+
self.path = path
|
|
32
|
+
self.detail = detail
|
|
33
|
+
message = f"{code}:{path}"
|
|
34
|
+
if detail:
|
|
35
|
+
message = f"{message}:{detail}"
|
|
36
|
+
super().__init__(message)
|
|
37
|
+
|
|
38
|
+
|
|
27
39
|
def _safe_text(value: Any) -> str:
|
|
28
40
|
if value is None:
|
|
29
41
|
return ""
|
|
@@ -42,14 +54,13 @@ def _truncate_text(value: Any, limit: int) -> str:
|
|
|
42
54
|
def _load_json(path: Path) -> Dict[str, Any]:
|
|
43
55
|
try:
|
|
44
56
|
return json.loads(path.read_text(encoding="utf-8"))
|
|
45
|
-
except Exception:
|
|
46
|
-
|
|
57
|
+
except Exception as error:
|
|
58
|
+
raise SampledExplanationResourceError(code="schema_load_failed", path=path, detail=f"{type(error).__name__}:{error}") from error
|
|
47
59
|
|
|
48
60
|
|
|
49
|
-
def _schema_errors(payload: Any) -> List[Dict[str, str]]:
|
|
50
|
-
schema = _load_json(SCHEMA_PATH)
|
|
61
|
+
def _schema_errors(payload: Any, schema: Dict[str, Any]) -> List[Dict[str, str]]:
|
|
51
62
|
if not schema:
|
|
52
|
-
|
|
63
|
+
raise SampledExplanationResourceError(code="schema_empty", path=SCHEMA_PATH)
|
|
53
64
|
try:
|
|
54
65
|
validator = jsonschema.Draft202012Validator(schema)
|
|
55
66
|
errors: List[Dict[str, str]] = []
|
|
@@ -64,8 +75,8 @@ def _schema_errors(payload: Any) -> List[Dict[str, str]]:
|
|
|
64
75
|
def _prompt_contract_text() -> str:
|
|
65
76
|
try:
|
|
66
77
|
return PROMPT_CONTRACT_PATH.read_text(encoding="utf-8").strip()
|
|
67
|
-
except Exception:
|
|
68
|
-
|
|
78
|
+
except Exception as error:
|
|
79
|
+
raise SampledExplanationResourceError(code="contract_load_failed", path=PROMPT_CONTRACT_PATH, detail=f"{type(error).__name__}:{error}") from error
|
|
69
80
|
|
|
70
81
|
|
|
71
82
|
def _extract_json_block(text: str) -> Dict[str, Any]:
|
|
@@ -144,40 +155,20 @@ def _compact_input(analysis_input: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
144
155
|
}
|
|
145
156
|
|
|
146
157
|
|
|
147
|
-
def _build_prompt(analysis_input: Dict[str, Any]) -> str:
|
|
158
|
+
def _build_prompt(analysis_input: Dict[str, Any], *, contract_text: str) -> str:
|
|
148
159
|
compacted = _compact_input(analysis_input)
|
|
149
160
|
return (
|
|
150
161
|
"请严格根据以下提示词原文输出,结果必须是 JSON 对象,且只输出 JSON。\n"
|
|
151
162
|
"顶层对象必须是 sampled_work_explanations。\n"
|
|
152
163
|
"不得输出 markdown,不得输出解释。\n\n"
|
|
153
164
|
"=== 提示词原文开始 ===\n"
|
|
154
|
-
f"{
|
|
165
|
+
f"{contract_text}\n"
|
|
155
166
|
"=== 提示词原文结束 ===\n\n"
|
|
156
167
|
"=== 输入数据(JSON) ===\n"
|
|
157
168
|
f"{json.dumps(compacted, ensure_ascii=False)}"
|
|
158
169
|
)
|
|
159
170
|
|
|
160
171
|
|
|
161
|
-
def _fallback_explanations(analysis_input: Dict[str, Any]) -> Dict[str, Any]:
|
|
162
|
-
sampled = analysis_input.get("sampled_works") if isinstance(analysis_input.get("sampled_works"), list) else []
|
|
163
|
-
explanations: Dict[str, Any] = {}
|
|
164
|
-
for item in sampled:
|
|
165
|
-
if not isinstance(item, dict):
|
|
166
|
-
continue
|
|
167
|
-
work_id = _safe_text(item.get("platform_work_id"))
|
|
168
|
-
if not work_id:
|
|
169
|
-
continue
|
|
170
|
-
explanations[work_id] = {
|
|
171
|
-
"why_it_worked_or_failed": f"该样本主要依赖 { _safe_text(item.get('hook_type')) or 'hook' }、{ _safe_text(item.get('structure_type')) or 'structure' } 与 { _safe_text(item.get('content_form')) or 'content_form' } 的组合。",
|
|
172
|
-
"copyable_elements": [value for value in [_safe_text(item.get("hook_type")), _safe_text(item.get("structure_type")), _safe_text(item.get("cta_type"))] if value],
|
|
173
|
-
"non_copyable_elements": ["具体个人经历或原始案例背书"],
|
|
174
|
-
"emotional_triggers": [_safe_text(item.get("hook_type")) or "结果预期"],
|
|
175
|
-
"cognitive_gap": "观众想知道为什么这个结构能成立,以及自己如何快速套用。",
|
|
176
|
-
"commercial_signal": "从 CTA 与内容结构看,具备基础商业承接意图,但证据仍有限。",
|
|
177
|
-
}
|
|
178
|
-
return {"sampled_work_explanations": explanations}
|
|
179
|
-
|
|
180
|
-
|
|
181
172
|
def _coverage_errors(payload: Dict[str, Any], analysis_input: Dict[str, Any]) -> List[Dict[str, str]]:
|
|
182
173
|
sampled = analysis_input.get("sampled_works") if isinstance(analysis_input.get("sampled_works"), list) else []
|
|
183
174
|
explanations = payload.get("sampled_work_explanations") if isinstance(payload.get("sampled_work_explanations"), dict) else {}
|
|
@@ -193,24 +184,84 @@ def _coverage_errors(payload: Dict[str, Any], analysis_input: Dict[str, Any]) ->
|
|
|
193
184
|
return errors
|
|
194
185
|
|
|
195
186
|
|
|
196
|
-
def
|
|
187
|
+
def _classify_runtime_reason(text: str) -> str:
|
|
188
|
+
lowered = (text or "").lower()
|
|
189
|
+
if "timeoutexpired" in lowered or "timeout" in lowered:
|
|
190
|
+
return "timeout"
|
|
191
|
+
if "rate limit" in lowered or "ratelimit" in lowered or "429" in lowered:
|
|
192
|
+
return "rate_limit"
|
|
193
|
+
return "transient_llm_error"
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _stage_status(*, status: str, ok_count: int, failed_count: int, degraded_count: int, reason_codes: List[str], failure_kind: str = "") -> Dict[str, Any]:
|
|
197
|
+
return {
|
|
198
|
+
"status": status,
|
|
199
|
+
"ok_count": ok_count,
|
|
200
|
+
"failed_count": failed_count,
|
|
201
|
+
"degraded_count": degraded_count,
|
|
202
|
+
"reason_codes": list(dict.fromkeys([code for code in reason_codes if code])),
|
|
203
|
+
"failure_kind": failure_kind or None,
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def run_sampled_work_batch_explanations(analysis_input: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Dict[str, str]], List[Dict[str, Any]], Dict[str, Any]]:
|
|
197
208
|
sampled = analysis_input.get("sampled_works") if isinstance(analysis_input.get("sampled_works"), list) else []
|
|
198
209
|
trace: List[Dict[str, Any]] = [
|
|
199
210
|
{
|
|
200
211
|
"step": "sampled_work_explanations.input_built",
|
|
201
212
|
"sampled_works_count": len(sampled),
|
|
202
213
|
"prompt_contract": "prompt-contracts/sampled-work-batch-explanations.md@v1",
|
|
214
|
+
"contract_path": str(PROMPT_CONTRACT_PATH),
|
|
215
|
+
"schema_path": str(SCHEMA_PATH),
|
|
203
216
|
}
|
|
204
217
|
]
|
|
205
218
|
|
|
206
219
|
if not sampled:
|
|
207
220
|
trace.append({"step": "sampled_work_explanations.skipped", "reason": "empty_sampled_works"})
|
|
208
|
-
return {"sampled_work_explanations": {}}, [], trace
|
|
221
|
+
return {"sampled_work_explanations": {}}, [], trace, _stage_status(
|
|
222
|
+
status="skipped",
|
|
223
|
+
ok_count=0,
|
|
224
|
+
failed_count=0,
|
|
225
|
+
degraded_count=0,
|
|
226
|
+
reason_codes=["empty_sampled_works"],
|
|
227
|
+
)
|
|
209
228
|
|
|
210
229
|
llm_timeout_sec = max(int(os.getenv("TIKOMNI_SAMPLED_EXPLANATION_TIMEOUT_SEC", str(DEFAULT_TIMEOUT_SEC))), 5)
|
|
211
|
-
prompt = _build_prompt(analysis_input)
|
|
212
230
|
result: Dict[str, Any] = {}
|
|
213
231
|
errors: List[Dict[str, str]] = []
|
|
232
|
+
reason_codes: List[str] = []
|
|
233
|
+
|
|
234
|
+
try:
|
|
235
|
+
contract_text = _prompt_contract_text()
|
|
236
|
+
schema = _load_json(SCHEMA_PATH)
|
|
237
|
+
trace.append(
|
|
238
|
+
{
|
|
239
|
+
"step": "sampled_work_explanations.resources_loaded",
|
|
240
|
+
"contract_loaded": True,
|
|
241
|
+
"contract_chars": len(contract_text),
|
|
242
|
+
"schema_loaded": True,
|
|
243
|
+
}
|
|
244
|
+
)
|
|
245
|
+
except SampledExplanationResourceError as error:
|
|
246
|
+
trace.append(
|
|
247
|
+
{
|
|
248
|
+
"step": "sampled_work_explanations.resource_error",
|
|
249
|
+
"contract_loaded": error.code != "contract_load_failed",
|
|
250
|
+
"contract_chars": 0,
|
|
251
|
+
"schema_loaded": error.code not in {"schema_load_failed", "schema_empty"},
|
|
252
|
+
"error": str(error),
|
|
253
|
+
}
|
|
254
|
+
)
|
|
255
|
+
return {"sampled_work_explanations": {}}, [], trace, _stage_status(
|
|
256
|
+
status="failed",
|
|
257
|
+
ok_count=0,
|
|
258
|
+
failed_count=len(sampled),
|
|
259
|
+
degraded_count=0,
|
|
260
|
+
reason_codes=[error.code],
|
|
261
|
+
failure_kind="configuration",
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
prompt = _build_prompt(analysis_input, contract_text=contract_text)
|
|
214
265
|
|
|
215
266
|
try:
|
|
216
267
|
run = subprocess.run(
|
|
@@ -238,23 +289,43 @@ def run_sampled_work_batch_explanations(analysis_input: Dict[str, Any]) -> Tuple
|
|
|
238
289
|
}
|
|
239
290
|
)
|
|
240
291
|
except Exception as error:
|
|
241
|
-
|
|
292
|
+
reason_code = _classify_runtime_reason(f"{type(error).__name__}:{error}")
|
|
293
|
+
trace.append({"step": "sampled_work_explanations.llm_error", "error": f"{type(error).__name__}:{error}", "reason_code": reason_code})
|
|
294
|
+
return {"sampled_work_explanations": {}}, [], trace, _stage_status(
|
|
295
|
+
status="degraded",
|
|
296
|
+
ok_count=0,
|
|
297
|
+
failed_count=0,
|
|
298
|
+
degraded_count=len(sampled),
|
|
299
|
+
reason_codes=[reason_code],
|
|
300
|
+
failure_kind="runtime",
|
|
301
|
+
)
|
|
242
302
|
|
|
243
|
-
errors = _schema_errors(result) if result else [{"field": "$", "reason": "empty_result"}]
|
|
303
|
+
errors = _schema_errors(result, schema) if result else [{"field": "$", "reason": "empty_result"}]
|
|
244
304
|
if not errors:
|
|
245
305
|
errors.extend(_coverage_errors(result, analysis_input))
|
|
246
306
|
if errors:
|
|
247
|
-
fallback = _fallback_explanations(analysis_input)
|
|
248
|
-
fallback_errors = _schema_errors(fallback) + _coverage_errors(fallback, analysis_input)
|
|
249
307
|
trace.append(
|
|
250
308
|
{
|
|
251
|
-
"step": "sampled_work_explanations.
|
|
309
|
+
"step": "sampled_work_explanations.validation_failed",
|
|
252
310
|
"reason": "llm_empty_or_validation_failed",
|
|
253
311
|
"validation_error_count": len(errors),
|
|
254
|
-
"fallback_error_count": len(fallback_errors),
|
|
255
312
|
}
|
|
256
313
|
)
|
|
257
|
-
|
|
314
|
+
reason_codes.append("validation_failed")
|
|
315
|
+
return {"sampled_work_explanations": {}}, errors, trace, _stage_status(
|
|
316
|
+
status="degraded",
|
|
317
|
+
ok_count=0,
|
|
318
|
+
failed_count=0,
|
|
319
|
+
degraded_count=len(sampled),
|
|
320
|
+
reason_codes=reason_codes,
|
|
321
|
+
failure_kind="runtime",
|
|
322
|
+
)
|
|
258
323
|
|
|
259
324
|
trace.append({"step": "sampled_work_explanations.schema_validation_passed"})
|
|
260
|
-
return result, [], trace
|
|
325
|
+
return result, [], trace, _stage_status(
|
|
326
|
+
status="full",
|
|
327
|
+
ok_count=len(sampled),
|
|
328
|
+
failed_count=0,
|
|
329
|
+
degraded_count=0,
|
|
330
|
+
reason_codes=[],
|
|
331
|
+
)
|
|
@@ -363,17 +363,68 @@ def _iter_xhs_interface_text_candidates(work: Dict[str, Any]) -> List[Tuple[str,
|
|
|
363
363
|
return deduped
|
|
364
364
|
|
|
365
365
|
|
|
366
|
-
def
|
|
366
|
+
def _classify_xhs_subtitle_failure(*, work: Dict[str, Any], interface_candidates: List[Tuple[str, str]], subtitle_urls: List[str], invalid_reason: str) -> str:
|
|
367
|
+
raw_ref = work.get("raw_ref") if isinstance(work.get("raw_ref"), dict) else {}
|
|
368
|
+
has_subtitle_signal = any(
|
|
369
|
+
normalize_text(raw_ref.get(key))
|
|
370
|
+
for key in (
|
|
371
|
+
"subtitle_inline",
|
|
372
|
+
"subtitle_text",
|
|
373
|
+
"subtitle_raw",
|
|
374
|
+
"caption_text",
|
|
375
|
+
"transcript_text",
|
|
376
|
+
)
|
|
377
|
+
)
|
|
378
|
+
if interface_candidates:
|
|
379
|
+
return "subtitle_content_invalid"
|
|
380
|
+
if subtitle_urls:
|
|
381
|
+
return "subtitle_url_unavailable"
|
|
382
|
+
if has_subtitle_signal:
|
|
383
|
+
return "subtitle_structure_unrecognized"
|
|
384
|
+
if invalid_reason == "subtitle_empty":
|
|
385
|
+
return "subtitle_missing"
|
|
386
|
+
return "subtitle_content_invalid"
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def _resolve_xhs_subtitle(work: Dict[str, Any], timeout_ms: int) -> Dict[str, Any]:
|
|
367
390
|
raw_ref = work.get("raw_ref") if isinstance(work.get("raw_ref"), dict) else {}
|
|
368
391
|
subtitle_urls = raw_ref.get("subtitle_urls") if isinstance(raw_ref.get("subtitle_urls"), list) else []
|
|
369
392
|
subtitle_urls = [normalize_text(item) for item in subtitle_urls if normalize_text(item)]
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
393
|
+
interface_candidates = _iter_xhs_interface_text_candidates(work)
|
|
394
|
+
invalid_reasons: List[Dict[str, str]] = []
|
|
395
|
+
|
|
396
|
+
for source, candidate in interface_candidates:
|
|
397
|
+
invalid_reason = _invalid_subtitle_reason(candidate)
|
|
398
|
+
if invalid_reason is None:
|
|
399
|
+
return {
|
|
400
|
+
"text": candidate,
|
|
401
|
+
"subtitle_source": "interface",
|
|
402
|
+
"subtitle_field": source,
|
|
403
|
+
"subtitle_urls": subtitle_urls,
|
|
404
|
+
"invalid_reasons": invalid_reasons,
|
|
405
|
+
"failure_category": "",
|
|
406
|
+
}
|
|
407
|
+
invalid_reasons.append({"field": source, "reason": invalid_reason})
|
|
374
408
|
|
|
375
409
|
fetched = _fetch_subtitle_text(subtitle_urls, timeout_ms=timeout_ms)
|
|
376
|
-
|
|
410
|
+
cleaned = _clean_text(fetched)
|
|
411
|
+
fetched_invalid = _invalid_subtitle_reason(cleaned)
|
|
412
|
+
if fetched_invalid is not None and subtitle_urls:
|
|
413
|
+
invalid_reasons.append({"field": "subtitle_url", "reason": fetched_invalid})
|
|
414
|
+
|
|
415
|
+
return {
|
|
416
|
+
"text": cleaned,
|
|
417
|
+
"subtitle_source": "url" if subtitle_urls else "missing",
|
|
418
|
+
"subtitle_field": "subtitle_url" if subtitle_urls else "",
|
|
419
|
+
"subtitle_urls": subtitle_urls,
|
|
420
|
+
"invalid_reasons": invalid_reasons,
|
|
421
|
+
"failure_category": _classify_xhs_subtitle_failure(
|
|
422
|
+
work=work,
|
|
423
|
+
interface_candidates=interface_candidates,
|
|
424
|
+
subtitle_urls=subtitle_urls,
|
|
425
|
+
invalid_reason=fetched_invalid or "subtitle_empty",
|
|
426
|
+
),
|
|
427
|
+
}
|
|
377
428
|
|
|
378
429
|
|
|
379
430
|
def _dedupe_works_by_platform_id(works: List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int]:
|
|
@@ -734,7 +785,11 @@ def enrich_author_home_asr(
|
|
|
734
785
|
)
|
|
735
786
|
continue
|
|
736
787
|
|
|
737
|
-
|
|
788
|
+
subtitle_probe = _resolve_xhs_subtitle(work, timeout_ms=timeout_ms)
|
|
789
|
+
subtitle_text = normalize_text(subtitle_probe.get("text"))
|
|
790
|
+
subtitle_source = normalize_text(subtitle_probe.get("subtitle_source"))
|
|
791
|
+
subtitle_urls = subtitle_probe.get("subtitle_urls") if isinstance(subtitle_probe.get("subtitle_urls"), list) else []
|
|
792
|
+
subtitle_field = normalize_text(subtitle_probe.get("subtitle_field"))
|
|
738
793
|
subtitle_invalid = _invalid_subtitle_reason(subtitle_text)
|
|
739
794
|
if subtitle_invalid is None:
|
|
740
795
|
work.update(
|
|
@@ -761,6 +816,7 @@ def enrich_author_home_asr(
|
|
|
761
816
|
"subtitle_source": subtitle_source,
|
|
762
817
|
"subtitle_field": subtitle_field,
|
|
763
818
|
"subtitle_url_count": len(subtitle_urls),
|
|
819
|
+
"failure_category": "",
|
|
764
820
|
}
|
|
765
821
|
)
|
|
766
822
|
else:
|
|
@@ -774,6 +830,8 @@ def enrich_author_home_asr(
|
|
|
774
830
|
"subtitle_source": subtitle_source,
|
|
775
831
|
"subtitle_field": subtitle_field,
|
|
776
832
|
"subtitle_url_count": len(subtitle_urls),
|
|
833
|
+
"failure_category": subtitle_probe.get("failure_category"),
|
|
834
|
+
"invalid_reasons": subtitle_probe.get("invalid_reasons"),
|
|
777
835
|
}
|
|
778
836
|
)
|
|
779
837
|
|