@tikomni/skills 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. package/package.json +1 -1
  2. package/skills/creator-analysis/SKILL.md +34 -10
  3. package/skills/creator-analysis/references/contracts/creator-card-fields.md +2 -0
  4. package/skills/creator-analysis/references/contracts/work-card-fields.md +40 -4
  5. package/skills/creator-analysis/references/platform-guides/douyin.md +41 -36
  6. package/skills/creator-analysis/references/platform-guides/generic.md +11 -7
  7. package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +45 -30
  8. package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +224 -95
  9. package/skills/creator-analysis/references/workflow.md +8 -3
  10. package/skills/creator-analysis/scripts/author_home/adapters/platform_adapters.py +205 -21
  11. package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +54 -11
  12. package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +200 -13
  13. package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +113 -42
  14. package/skills/creator-analysis/scripts/author_home/asr/home_asr.py +65 -7
  15. package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +82 -18
  16. package/skills/creator-analysis/scripts/author_home/collectors/homepage_collectors.py +198 -32
  17. package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +374 -31
  18. package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +68 -12
  19. package/skills/creator-analysis/scripts/core/storage_router.py +3 -0
  20. package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +3 -2
  21. package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +314 -137
@@ -58,6 +58,46 @@ def _pick_http_urls(payload: Any, keys: List[str]) -> List[str]:
58
58
  return deduped
59
59
 
60
60
 
61
+ def _extract_first_url(value: Any) -> str:
62
+ if isinstance(value, str):
63
+ text = value.strip()
64
+ return text if text.startswith("http://") or text.startswith("https://") else ""
65
+ if isinstance(value, list):
66
+ for item in value:
67
+ url = _extract_first_url(item)
68
+ if url:
69
+ return url
70
+ return ""
71
+ if isinstance(value, dict):
72
+ for key in ("url_list", "url", "uri", "avatar_url", "cover_url", "src"):
73
+ if key in value:
74
+ url = _extract_first_url(value.get(key))
75
+ if url:
76
+ return url
77
+ return ""
78
+ return ""
79
+
80
+
81
+ def _normalize_douyin_tags(value: Any) -> List[str]:
82
+ if not isinstance(value, list):
83
+ return []
84
+ tags: List[str] = []
85
+ for item in value:
86
+ if isinstance(item, str):
87
+ text = item.strip().lstrip("#")
88
+ if text:
89
+ tags.append(text)
90
+ continue
91
+ if not isinstance(item, dict):
92
+ continue
93
+ for key in ("hashtag_name", "search_text", "tag_name", "name", "text"):
94
+ text = _t(item.get(key)).lstrip("#")
95
+ if text:
96
+ tags.append(text)
97
+ break
98
+ return list(dict.fromkeys(tags))
99
+
100
+
61
101
  def _is_probable_video_url(url: str) -> bool:
62
102
  lower = (url or "").lower()
63
103
  if not (lower.startswith("http://") or lower.startswith("https://")):
@@ -93,13 +133,54 @@ def _extract_xhs_video_down_url(item: Dict[str, Any]) -> str:
93
133
  return ""
94
134
 
95
135
 
136
+ def _normalize_text_list(value: Any) -> List[str]:
137
+ values: List[str] = []
138
+ if isinstance(value, list):
139
+ items = value
140
+ else:
141
+ items = [value]
142
+ for item in items:
143
+ if isinstance(item, str):
144
+ text = item.strip().lstrip("#")
145
+ if text:
146
+ values.append(text)
147
+ continue
148
+ if not isinstance(item, dict):
149
+ continue
150
+ for key in ("name", "tag_name", "tag", "text", "display_text", "title"):
151
+ text = _t(item.get(key)).lstrip("#")
152
+ if text:
153
+ values.append(text)
154
+ break
155
+ return list(dict.fromkeys(values))
156
+
157
+
96
158
  def _extract_xhs_subtitle_inline(item: Dict[str, Any]) -> str:
97
159
  lines: List[str] = []
98
- for container in deep_find_all(item, ["subtitles", "subtitle_list", "subtitleList"]):
160
+ for container in deep_find_all(
161
+ item,
162
+ [
163
+ "subtitles",
164
+ "subtitle_list",
165
+ "subtitleList",
166
+ "subtitle",
167
+ "subtitle_text",
168
+ "caption_text",
169
+ "transcript",
170
+ "transcript_text",
171
+ "subtitle_content",
172
+ "subtitle_inline",
173
+ ],
174
+ ):
175
+ if isinstance(container, str):
176
+ value = _t(container)
177
+ if value:
178
+ lines.append(value)
179
+ continue
99
180
  if isinstance(container, list):
100
181
  for entry in container:
101
182
  if isinstance(entry, dict):
102
- for key in ["text", "content", "sentence", "line"]:
183
+ for key in ["text", "content", "sentence", "line", "subtitle_text", "caption_text"]:
103
184
  value = _t(entry.get(key))
104
185
  if value:
105
186
  lines.append(value)
@@ -118,7 +199,21 @@ def _extract_xhs_subtitle_inline(item: Dict[str, Any]) -> str:
118
199
 
119
200
 
120
201
  def _extract_xhs_subtitle_urls(item: Dict[str, Any]) -> List[str]:
121
- return _pick_http_urls(item, ["subtitle_url", "subtitleUrl", "srt_url", "srtUrl", "vtt_url", "vttUrl"])
202
+ return _pick_http_urls(
203
+ item,
204
+ [
205
+ "subtitle_url",
206
+ "subtitleUrl",
207
+ "srt_url",
208
+ "srtUrl",
209
+ "vtt_url",
210
+ "vttUrl",
211
+ "caption_url",
212
+ "captionUrl",
213
+ "subtitle_urls",
214
+ "subtitleUrls",
215
+ ],
216
+ )
122
217
 
123
218
 
124
219
  def _extract_xhs_work_modality(item: Dict[str, Any], *, video_download_url: str, subtitle_inline: str) -> str:
@@ -132,6 +227,66 @@ def _extract_xhs_work_modality(item: Dict[str, Any], *, video_download_url: str,
132
227
  return "text"
133
228
 
134
229
 
230
+ def _extract_xhs_avatar_url(payload: Any) -> str:
231
+ return (
232
+ _extract_first_url(_first(payload, ["image"], ""))
233
+ or _extract_first_url(_first(payload, ["avatar"], ""))
234
+ or _extract_first_url(_first(payload, ["avatar_url"], ""))
235
+ or _extract_first_url(_first(payload, ["images"], ""))
236
+ or _extract_first_url(_first(payload, ["avatar_info"], ""))
237
+ )
238
+
239
+
240
+ def _extract_xhs_cover_image(item: Dict[str, Any]) -> str:
241
+ return (
242
+ _extract_first_url(_first(item, ["cover"], ""))
243
+ or _extract_first_url(_first(item, ["cover_url"], ""))
244
+ or _extract_first_url(_first(item, ["cover_image"], ""))
245
+ or _extract_first_url(_first(item, ["image"], ""))
246
+ or _extract_first_url(_first(item, ["image_url"], ""))
247
+ or _extract_first_url(_first(item, ["images"], ""))
248
+ )
249
+
250
+
251
+ def _extract_xhs_share_url(item: Dict[str, Any], note_id: str) -> str:
252
+ return (
253
+ _t(_first(item, ["share_url", "share_link", "url", "note_url", "short_url"]))
254
+ or (f"https://www.xiaohongshu.com/explore/{note_id}" if note_id else "")
255
+ )
256
+
257
+
258
+ def _extract_xhs_source_url(item: Dict[str, Any], note_id: str) -> str:
259
+ return (
260
+ _t(_first(item, ["source_url", "note_url", "url", "share_url", "share_link"]))
261
+ or (f"https://www.xiaohongshu.com/explore/{note_id}" if note_id else "")
262
+ )
263
+
264
+
265
+ def _extract_xhs_title(item: Dict[str, Any]) -> str:
266
+ return _t(_first(item, ["title", "display_title", "note_title", "name"]))
267
+
268
+
269
+ def _extract_xhs_caption(item: Dict[str, Any]) -> str:
270
+ return _t(_first(item, ["desc", "content", "note_desc", "description", "text"]))
271
+
272
+
273
+ def _extract_xhs_tags(item: Dict[str, Any]) -> List[str]:
274
+ for key in ("tag_list", "tags", "hashtags", "topics"):
275
+ value = _first(item, [key], [])
276
+ tags = _normalize_text_list(value)
277
+ if tags:
278
+ return tags
279
+ return []
280
+
281
+
282
+ def _extract_xhs_profile_payload(raw: Dict[str, Any]) -> Any:
283
+ profile_response = raw.get("profile_response") if isinstance(raw.get("profile_response"), dict) else {}
284
+ profile_data = profile_response.get("data")
285
+ if isinstance(profile_data, dict):
286
+ return profile_data
287
+ return profile_response
288
+
289
+
135
290
  def adapt_douyin_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Dict[str, Any]], List[Dict[str, str]]]:
136
291
  missing: List[Dict[str, str]] = []
137
292
  profile_data = raw.get("profile_response", {}).get("data")
@@ -151,7 +306,11 @@ def adapt_douyin_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[
151
306
  liked_count=_i(_first(profile_data, ["total_favorited", "liked_count", "favoriting_count"])),
152
307
  collected_count=_i(_first(profile_data, ["collect_count", "collected_count", "total_collected_count"])),
153
308
  signature=_t(_first(profile_data, ["signature", "desc"])),
154
- avatar_url=_t(_first(profile_data, ["avatar_larger", "avatar_thumb", "avatar_url", "avatar"])),
309
+ avatar_url=(
310
+ _extract_first_url(_first(profile_data, ["avatar_larger"], ""))
311
+ or _extract_first_url(_first(profile_data, ["avatar_thumb"], ""))
312
+ or _extract_first_url(_first(profile_data, ["avatar_url", "avatar"], ""))
313
+ ),
155
314
  works_count=_i(_first(profile_data, ["aweme_count", "works_count", "video_count"])),
156
315
  verified=bool(_first(profile_data, ["verification_type", "verified"], 0) not in (0, None, "", "false", False)),
157
316
  snapshot_at=datetime.now().isoformat(timespec="seconds"),
@@ -177,6 +336,7 @@ def adapt_douyin_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[
177
336
  "play": _i(_first(item, ["play_count", "view_count"], 0)),
178
337
  }
179
338
  video_down_url = _extract_douyin_video_down_url(item)
339
+ tags = _normalize_douyin_tags(_first(item, ["hashtags", "tags", "text_extra"], []))
180
340
  work = build_work_item(
181
341
  platform="douyin",
182
342
  platform_work_id=aweme_id,
@@ -191,9 +351,13 @@ def adapt_douyin_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[
191
351
  work_modality="video",
192
352
  content_type="video",
193
353
  duration_ms=_i(_first(item, ["duration_ms", "duration"], 0)),
194
- tags=list(_first(item, ["hashtags", "tags", "text_extra"], [])) if isinstance(_first(item, ["hashtags", "tags", "text_extra"], []), list) else [],
354
+ tags=tags,
195
355
  metrics=metrics,
196
- cover_image=_t(_first(item, ["cover_url", "cover", "origin_cover"], "")),
356
+ cover_image=(
357
+ _extract_first_url(_first(item, ["cover_url"], ""))
358
+ or _extract_first_url(_first(item, ["cover"], ""))
359
+ or _extract_first_url(_first(item, ["origin_cover"], ""))
360
+ ),
197
361
  source_url=f"https://www.douyin.com/video/{aweme_id}" if aweme_id else "",
198
362
  share_url=_t(_first(item, ["share_url", "share_link"])),
199
363
  video_download_url=video_down_url,
@@ -206,6 +370,15 @@ def adapt_douyin_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[
206
370
  },
207
371
  raw_ref={"aweme_id": aweme_id, "raw_item": item},
208
372
  )
373
+ work.update(
374
+ {
375
+ "digg_count": metrics["like"],
376
+ "comment_count": metrics["comment"],
377
+ "collect_count": metrics["collect"],
378
+ "share_count": metrics["share"],
379
+ "play_count": metrics["play"],
380
+ }
381
+ )
209
382
 
210
383
  missing.extend(validate_work_item(work))
211
384
  works.append(work)
@@ -216,7 +389,7 @@ def adapt_douyin_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[
216
389
 
217
390
  def adapt_xhs_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Dict[str, Any]], List[Dict[str, str]]]:
218
391
  missing: List[Dict[str, str]] = []
219
- profile_data = raw.get("profile_response", {}).get("data")
392
+ profile_data = _extract_xhs_profile_payload(raw)
220
393
 
221
394
  author_id = _t(_first(profile_data, ["user_id", "userid", "id"], raw.get("resolved_author_id")))
222
395
  author_handle = _t(_first(profile_data, ["red_id", "redid", "display_id", "username"]))
@@ -230,7 +403,7 @@ def adapt_xhs_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Dic
230
403
  liked_count=_i(_first(profile_data, ["liked_count", "likes", "total_liked", "like_count"])),
231
404
  collected_count=_i(_first(profile_data, ["collected_count", "collect_count", "total_collected", "favorite_count"])),
232
405
  signature=_t(_first(profile_data, ["desc", "signature", "bio", "introduction"])),
233
- avatar_url=_t(_first(profile_data, ["image", "avatar", "avatar_url", "images"])),
406
+ avatar_url=_extract_xhs_avatar_url(profile_data),
234
407
  works_count=_i(_first(profile_data, ["notes", "note_count", "works_count", "post_count"])),
235
408
  verified=bool(_first(profile_data, ["official_verified", "verified"], False)),
236
409
  snapshot_at=datetime.now().isoformat(timespec="seconds"),
@@ -245,13 +418,12 @@ def adapt_xhs_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Dic
245
418
  if not isinstance(item, dict):
246
419
  continue
247
420
  note_id = _t(_first(item, ["note_id", "id", "item_id"]))
248
- interact = _first(item, ["interact_info", "interaction_info", "statistics"], {})
249
421
  metrics = {
250
- "like": _i(_first(interact, ["liked_count", "like_count", "digg_count"], 0)),
251
- "comment": _i(_first(interact, ["comment_count"], 0)),
252
- "collect": _i(_first(interact, ["collected_count", "collect_count"], 0)),
253
- "share": _i(_first(interact, ["share_count"], 0)),
254
- "play": _i(_first(interact, ["view_count", "play_count"], 0)),
422
+ "like": _i(_first(item, ["liked_count", "like_count", "digg_count"], 0)),
423
+ "comment": _i(_first(item, ["comment_count"], 0)),
424
+ "collect": _i(_first(item, ["collected_count", "collect_count"], 0)),
425
+ "share": _i(_first(item, ["share_count"], 0)),
426
+ "play": _i(_first(item, ["view_count", "play_count"], 0)),
255
427
  }
256
428
  subtitle_inline = _extract_xhs_subtitle_inline(item)
257
429
  subtitle_urls = _extract_xhs_subtitle_urls(item)
@@ -259,6 +431,9 @@ def adapt_xhs_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Dic
259
431
  content_type_raw = _t(_first(item, ["type", "note_type", "model_type"]))
260
432
  work_modality = _extract_xhs_work_modality(item, video_download_url=video_down_url, subtitle_inline=subtitle_inline)
261
433
  content_type = "video" if work_modality == "video" else (content_type_raw or "text")
434
+ cover_image = _extract_xhs_cover_image(item)
435
+ source_url = _extract_xhs_source_url(item, note_id)
436
+ share_url = _extract_xhs_share_url(item, note_id)
262
437
 
263
438
  work = build_work_item(
264
439
  platform="xiaohongshu",
@@ -266,19 +441,19 @@ def adapt_xhs_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Dic
266
441
  platform_author_id=author_id,
267
442
  author_handle=author_handle,
268
443
  author_platform_id=author_id,
269
- title=_t(_first(item, ["title", "display_title"])),
270
- caption_raw=_t(_first(item, ["desc", "content"])),
444
+ title=_extract_xhs_title(item),
445
+ caption_raw=_extract_xhs_caption(item),
271
446
  subtitle_raw=subtitle_inline,
272
447
  subtitle_source="native_subtitle" if subtitle_inline else "missing",
273
- publish_time=_t(_first(item, ["publish_time", "time", "create_time"])),
448
+ publish_time=_t(_first(item, ["publish_time", "time", "create_time", "publishTime", "created_at"])),
274
449
  work_modality=work_modality,
275
450
  content_type=content_type,
276
451
  duration_ms=_i(_first(item, ["duration_ms", "duration", "video_duration"], 0)),
277
- tags=list(_first(item, ["tag_list", "tags", "hashtags"], [])) if isinstance(_first(item, ["tag_list", "tags", "hashtags"], []), list) else [],
452
+ tags=_extract_xhs_tags(item),
278
453
  metrics=metrics,
279
- cover_image=_t(_first(item, ["cover", "cover_url", "cover_image", "image", "image_url"], "")),
280
- source_url=f"https://www.xiaohongshu.com/explore/{note_id}" if note_id else "",
281
- share_url=_t(_first(item, ["share_url", "share_link", "url", "note_url"])),
454
+ cover_image=cover_image,
455
+ source_url=source_url,
456
+ share_url=share_url,
282
457
  video_download_url=video_down_url,
283
458
  asr_status="subtitle_ready" if subtitle_inline else "pending",
284
459
  asr_error_reason="",
@@ -291,6 +466,15 @@ def adapt_xhs_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Dic
291
466
  "subtitle_urls": subtitle_urls,
292
467
  },
293
468
  )
469
+ work.update(
470
+ {
471
+ "digg_count": metrics["like"],
472
+ "comment_count": metrics["comment"],
473
+ "collect_count": metrics["collect"],
474
+ "share_count": metrics["share"],
475
+ "play_count": metrics["play"],
476
+ }
477
+ )
294
478
 
295
479
  missing.extend(validate_work_item(work))
296
480
  works.append(work)
@@ -13,9 +13,10 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
13
13
 
14
14
  import jsonschema
15
15
 
16
- INPUT_SCHEMA_PATH = Path(__file__).resolve().parents[2] / "references" / "schemas" / "author-analysis-input-v1.schema.json"
17
- OUTPUT_SCHEMA_PATH = Path(__file__).resolve().parents[2] / "references" / "schemas" / "author-analysis-v2.schema.json"
18
- PROMPT_CONTRACT_PATH = Path(__file__).resolve().parents[2] / "references" / "prompt-contracts" / "author-analysis-v2.md"
16
+ SKILL_ROOT = Path(__file__).resolve().parents[3]
17
+ INPUT_SCHEMA_PATH = SKILL_ROOT / "references" / "schemas" / "author-analysis-input-v1.schema.json"
18
+ OUTPUT_SCHEMA_PATH = SKILL_ROOT / "references" / "schemas" / "author-analysis-v2.schema.json"
19
+ PROMPT_CONTRACT_PATH = SKILL_ROOT / "references" / "prompt-contracts" / "author-analysis-v2.md"
19
20
 
20
21
  LOW_HIGH_MID = {"low", "mid", "high"}
21
22
  RELATIONSHIP_DISTANCE = {"near", "mid", "far"}
@@ -49,6 +50,17 @@ STOPWORDS = {
49
50
  }
50
51
  SCHEMA_CACHE: Dict[Path, Dict[str, Any]] = {}
51
52
 
53
+
54
+ class AnalysisResourceError(RuntimeError):
55
+ def __init__(self, *, code: str, path: Path, detail: str = "") -> None:
56
+ self.code = code
57
+ self.path = path
58
+ self.detail = detail
59
+ message = f"{code}:{path}"
60
+ if detail:
61
+ message = f"{message}:{detail}"
62
+ super().__init__(message)
63
+
52
64
  REQUIRED_V2_FIELDS = {
53
65
  "author_positioning": ["one_liner", "author_type", "primary_role", "target_audience", "core_problem_solved", "core_value_proposition", "evidence"],
54
66
  "trust_model": ["primary_trust_source", "secondary_trust_sources", "trust_building_mechanisms", "trust_risks", "relationship_posture", "evidence"],
@@ -105,15 +117,15 @@ def _clamp(value: float, low: float, high: float) -> float:
105
117
  def load_json_schema(path: Path) -> Dict[str, Any]:
106
118
  try:
107
119
  return json.loads(path.read_text(encoding="utf-8"))
108
- except Exception:
109
- return {}
120
+ except Exception as error:
121
+ raise AnalysisResourceError(code="schema_load_failed", path=path, detail=f"{type(error).__name__}:{error}") from error
110
122
 
111
123
 
112
124
  def prompt_contract_text() -> str:
113
125
  try:
114
126
  return PROMPT_CONTRACT_PATH.read_text(encoding="utf-8").strip()
115
- except Exception:
116
- return ""
127
+ except Exception as error:
128
+ raise AnalysisResourceError(code="contract_load_failed", path=PROMPT_CONTRACT_PATH, detail=f"{type(error).__name__}:{error}") from error
117
129
 
118
130
 
119
131
  def _load_schema(path: Path) -> Dict[str, Any]:
@@ -128,7 +140,7 @@ def _load_schema(path: Path) -> Dict[str, Any]:
128
140
  def _schema_errors(payload: Any, path: Path) -> List[Dict[str, str]]:
129
141
  schema = _load_schema(path)
130
142
  if not schema:
131
- return []
143
+ raise AnalysisResourceError(code="schema_empty", path=path)
132
144
  try:
133
145
  validator = jsonschema.Draft202012Validator(schema)
134
146
  rows: List[Dict[str, str]] = []
@@ -152,6 +164,24 @@ def _dedupe_keep_order(values: Sequence[str]) -> List[str]:
152
164
  return result
153
165
 
154
166
 
167
+ def _safe_text_list(value: Any) -> List[str]:
168
+ if not isinstance(value, list):
169
+ return []
170
+ result: List[str] = []
171
+ for item in value:
172
+ if isinstance(item, dict):
173
+ for key in ("name", "value", "label", "hashtag_name", "search_text", "tag_name", "text"):
174
+ text = _safe_text(item.get(key))
175
+ if text:
176
+ result.append(text)
177
+ break
178
+ continue
179
+ text = _safe_text(item)
180
+ if text:
181
+ result.append(text)
182
+ return _dedupe_keep_order(result)
183
+
184
+
155
185
  def _dedupe_error_list(errors: Sequence[Dict[str, str]]) -> List[Dict[str, str]]:
156
186
  result: List[Dict[str, str]] = []
157
187
  seen = set()
@@ -396,7 +426,7 @@ def _normalize_work(profile: Dict[str, Any], work: Dict[str, Any]) -> Dict[str,
396
426
  "share_count": share,
397
427
  "play_count": play,
398
428
  "content_form": _pick_content_form(work),
399
- "tags": list(work.get("tags") or []) if isinstance(work.get("tags"), list) else [],
429
+ "tags": _safe_text_list(work.get("tags")),
400
430
  "author_id": _safe_text(profile.get("author_platform_id") or profile.get("platform_author_id")),
401
431
  "author_name": _safe_text(profile.get("nickname")) or "作者",
402
432
  "performance_score": performance_score,
@@ -636,12 +666,13 @@ def _compare_bucket_groups(items: List[Dict[str, Any]]) -> Dict[str, Any]:
636
666
  return result
637
667
 
638
668
 
639
- def build_author_analysis_input_v1(*, profile: Dict[str, Any], works: List[Dict[str, Any]], platform: str) -> Tuple[Dict[str, Any], List[Dict[str, str]]]:
669
+ def prepare_author_analysis_bundle(*, profile: Dict[str, Any], works: List[Dict[str, Any]], platform: str) -> Dict[str, Any]:
640
670
  normalized = [_normalize_work(profile, work) for work in works if isinstance(work, dict)]
641
671
  eligible = [item for item in normalized if _safe_text(item.get("analysis_eligibility")) == "eligible"]
642
672
  excluded_count = len(normalized) - len(eligible)
643
673
  ranked = _assign_buckets(eligible)
644
674
  sampled = _sample_standard_works(ranked)
675
+ sampled_work_ids = [_safe_text(item.get("platform_work_id")) for item in sampled if _safe_text(item.get("platform_work_id"))]
645
676
  aggregate_stats = {
646
677
  "total_works": len(ranked),
647
678
  "excluded_works_count": excluded_count,
@@ -663,7 +694,7 @@ def build_author_analysis_input_v1(*, profile: Dict[str, Any], works: List[Dict[
663
694
  "global_bucket_distribution": _distribution_from_values([_safe_text(item.get("bucket")) for item in ranked], limit=4),
664
695
  "global_top_vs_mid_vs_bottom_deltas": _compare_bucket_groups(ranked),
665
696
  }
666
- payload = {
697
+ analysis_input = {
667
698
  "author_profile": {
668
699
  "platform": _safe_text(profile.get("platform")) or platform,
669
700
  "platform_author_id": _safe_text(profile.get("author_platform_id") or profile.get("platform_author_id")),
@@ -696,6 +727,18 @@ def build_author_analysis_input_v1(*, profile: Dict[str, Any], works: List[Dict[
696
727
  "sampled_works_count": len(sampled),
697
728
  },
698
729
  }
730
+ return {
731
+ "analysis_input": analysis_input,
732
+ "normalized_works": normalized,
733
+ "ranked_works": ranked,
734
+ "sampled_works": sampled,
735
+ "sampled_work_ids": sampled_work_ids,
736
+ "excluded_works_count": excluded_count,
737
+ }
738
+
739
+
740
+ def build_author_analysis_input_v1(*, profile: Dict[str, Any], works: List[Dict[str, Any]], platform: str) -> Tuple[Dict[str, Any], List[Dict[str, str]]]:
741
+ payload = prepare_author_analysis_bundle(profile=profile, works=works, platform=platform).get("analysis_input") or {}
699
742
  return payload, validate_author_analysis_input_v1(payload)
700
743
 
701
744