@tikomni/skills 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/creator-analysis/SKILL.md +34 -10
- package/skills/creator-analysis/references/contracts/creator-card-fields.md +2 -0
- package/skills/creator-analysis/references/contracts/work-card-fields.md +40 -4
- package/skills/creator-analysis/references/platform-guides/douyin.md +41 -36
- package/skills/creator-analysis/references/platform-guides/generic.md +11 -7
- package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +45 -30
- package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +224 -95
- package/skills/creator-analysis/references/workflow.md +8 -3
- package/skills/creator-analysis/scripts/author_home/adapters/platform_adapters.py +205 -21
- package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +54 -11
- package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +200 -13
- package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +113 -42
- package/skills/creator-analysis/scripts/author_home/asr/home_asr.py +65 -7
- package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +82 -18
- package/skills/creator-analysis/scripts/author_home/collectors/homepage_collectors.py +198 -32
- package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +374 -31
- package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +68 -12
- package/skills/creator-analysis/scripts/core/storage_router.py +3 -0
- package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +3 -2
- package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +314 -137
|
@@ -11,51 +11,111 @@ from typing import Any, Dict, List, Optional
|
|
|
11
11
|
from scripts.writers.write_benchmark_card import write_benchmark_card
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
AUTHOR_SAMPLE_CARD_ROLE = "author_sample_card"
|
|
15
|
+
SAMPLE_WORK_CARD_ROLE = "sample_work_card"
|
|
16
|
+
AUTHOR_CARD_ROLE = "author_card"
|
|
17
|
+
|
|
18
|
+
|
|
14
19
|
def build_work_cards(
|
|
15
20
|
*,
|
|
16
21
|
platform: str,
|
|
17
22
|
profile: Dict[str, Any],
|
|
18
23
|
works: List[Dict[str, Any]],
|
|
19
24
|
render_payloads: Dict[str, Dict[str, Any]],
|
|
25
|
+
sampled_work_ids: Optional[List[str]],
|
|
26
|
+
sampled_work_explanations: Optional[Dict[str, Any]],
|
|
20
27
|
card_root: Optional[str],
|
|
21
28
|
storage_config: Optional[Dict[str, Any]],
|
|
22
29
|
write_card: bool,
|
|
23
30
|
failed_items: Optional[List[Dict[str, Any]]] = None,
|
|
24
31
|
) -> Dict[str, Any]:
|
|
25
32
|
if not write_card:
|
|
26
|
-
return {
|
|
33
|
+
return {
|
|
34
|
+
"author_sample_cards": {"enabled": False, "count": 0, "results": []},
|
|
35
|
+
"sample_work_cards": {"enabled": False, "count": 0, "results": []},
|
|
36
|
+
}
|
|
27
37
|
|
|
28
38
|
sample_author = str(profile.get("nickname") or profile.get("platform_author_id") or "作者")
|
|
29
|
-
|
|
30
|
-
|
|
39
|
+
author_sample_results: List[Dict[str, Any]] = []
|
|
40
|
+
sample_work_results: List[Dict[str, Any]] = []
|
|
41
|
+
author_sample_failed: List[Dict[str, Any]] = list(failed_items or [])
|
|
42
|
+
sample_work_failed: List[Dict[str, Any]] = []
|
|
43
|
+
sampled_id_set = {str(item).strip() for item in (sampled_work_ids or []) if str(item).strip()}
|
|
44
|
+
explanation_map = sampled_work_explanations if isinstance(sampled_work_explanations, dict) else {}
|
|
31
45
|
|
|
32
46
|
for work in works:
|
|
33
47
|
platform_work_id = str(work.get("platform_work_id") or "").strip()
|
|
34
48
|
payload = render_payloads.get(platform_work_id)
|
|
35
49
|
if not isinstance(payload, dict):
|
|
36
|
-
|
|
50
|
+
author_sample_failed.append(
|
|
37
51
|
{
|
|
38
52
|
"platform_work_id": platform_work_id,
|
|
39
53
|
"error_reason": "missing_work_analysis_artifact",
|
|
40
54
|
}
|
|
41
55
|
)
|
|
42
56
|
continue
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
57
|
+
|
|
58
|
+
base_payload = dict(payload)
|
|
59
|
+
try:
|
|
60
|
+
full_result = write_benchmark_card(
|
|
61
|
+
payload=base_payload,
|
|
62
|
+
platform=platform,
|
|
63
|
+
card_type="author_sample_work",
|
|
64
|
+
card_root=card_root,
|
|
65
|
+
sample_author=sample_author,
|
|
66
|
+
content_kind="author_home",
|
|
67
|
+
storage_config=storage_config,
|
|
68
|
+
card_role=AUTHOR_SAMPLE_CARD_ROLE,
|
|
69
|
+
)
|
|
70
|
+
author_sample_results.append(full_result)
|
|
71
|
+
except Exception as error:
|
|
72
|
+
author_sample_failed.append(
|
|
73
|
+
{
|
|
74
|
+
"platform_work_id": platform_work_id,
|
|
75
|
+
"error_reason": f"author_sample_card_write_failed:{type(error).__name__}:{error}",
|
|
76
|
+
}
|
|
77
|
+
)
|
|
78
|
+
continue
|
|
79
|
+
|
|
80
|
+
if platform_work_id in sampled_id_set:
|
|
81
|
+
sample_payload = dict(base_payload)
|
|
82
|
+
explanation = explanation_map.get(platform_work_id)
|
|
83
|
+
sample_payload["sampled_explanation"] = explanation if isinstance(explanation, dict) else {}
|
|
84
|
+
try:
|
|
85
|
+
sample_result = write_benchmark_card(
|
|
86
|
+
payload=sample_payload,
|
|
87
|
+
platform=platform,
|
|
88
|
+
card_type="author_sample_work",
|
|
89
|
+
card_root=card_root,
|
|
90
|
+
sample_author=sample_author,
|
|
91
|
+
content_kind="author_home",
|
|
92
|
+
storage_config=storage_config,
|
|
93
|
+
route_card_type="author",
|
|
94
|
+
route_extra_parts=["sample_work"],
|
|
95
|
+
card_role=SAMPLE_WORK_CARD_ROLE,
|
|
96
|
+
)
|
|
97
|
+
sample_work_results.append(sample_result)
|
|
98
|
+
except Exception as error:
|
|
99
|
+
sample_work_failed.append(
|
|
100
|
+
{
|
|
101
|
+
"platform_work_id": platform_work_id,
|
|
102
|
+
"error_reason": f"sample_work_card_write_failed:{type(error).__name__}:{error}",
|
|
103
|
+
}
|
|
104
|
+
)
|
|
53
105
|
|
|
54
106
|
return {
|
|
55
|
-
"
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
107
|
+
"author_sample_cards": {
|
|
108
|
+
"enabled": True,
|
|
109
|
+
"count": len(author_sample_results),
|
|
110
|
+
"results": author_sample_results,
|
|
111
|
+
"failed_items": author_sample_failed,
|
|
112
|
+
},
|
|
113
|
+
"sample_work_cards": {
|
|
114
|
+
"enabled": True,
|
|
115
|
+
"count": len(sample_work_results),
|
|
116
|
+
"results": sample_work_results,
|
|
117
|
+
"failed_items": sample_work_failed,
|
|
118
|
+
},
|
|
59
119
|
}
|
|
60
120
|
|
|
61
121
|
|
|
@@ -127,6 +187,9 @@ def build_author_card(
|
|
|
127
187
|
"author_analysis_v2": author_analysis_v2,
|
|
128
188
|
"author_analysis_input_v1": analysis_payload.get("author_analysis_input_v1") if isinstance(analysis_payload.get("author_analysis_input_v1"), dict) else {},
|
|
129
189
|
"sampled_work_explanations": analysis_payload.get("sampled_work_explanations") if isinstance(analysis_payload.get("sampled_work_explanations"), dict) else {},
|
|
190
|
+
"stage_status": analysis_payload.get("stage_status") if isinstance(analysis_payload.get("stage_status"), dict) else {},
|
|
191
|
+
"quality_tier": analysis_payload.get("quality_tier") or "",
|
|
192
|
+
"sampled_work_ids": list(analysis_payload.get("sampled_work_ids") or []) if isinstance(analysis_payload.get("sampled_work_ids"), list) else [],
|
|
130
193
|
"author_card_highlights": author_card_highlights,
|
|
131
194
|
"business_score": int(analysis_payload.get("business_score", 0) or 0),
|
|
132
195
|
"benchmark_gap_score": int(analysis_payload.get("benchmark_gap_score", 0) or 0),
|
|
@@ -146,4 +209,5 @@ def build_author_card(
|
|
|
146
209
|
sample_author=None,
|
|
147
210
|
content_kind="author_analysis",
|
|
148
211
|
storage_config=storage_config,
|
|
212
|
+
card_role=AUTHOR_CARD_ROLE,
|
|
149
213
|
)
|
|
@@ -8,7 +8,7 @@ from urllib.parse import parse_qs, urlparse
|
|
|
8
8
|
|
|
9
9
|
from scripts.core.extract_pipeline import build_api_trace
|
|
10
10
|
from scripts.core.progress_report import ProgressReporter
|
|
11
|
-
from scripts.core.tikomni_common import call_json_api, deep_find_first
|
|
11
|
+
from scripts.core.tikomni_common import call_json_api, deep_find_all, deep_find_first
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def _to_text(value: Any) -> str:
|
|
@@ -140,6 +140,30 @@ def _pick_request_id(responses: List[Optional[Dict[str, Any]]], trace: Optional[
|
|
|
140
140
|
return None
|
|
141
141
|
|
|
142
142
|
|
|
143
|
+
def _extract_first_url(value: Any) -> str:
|
|
144
|
+
if isinstance(value, str):
|
|
145
|
+
text = value.strip()
|
|
146
|
+
return text if text.startswith("http://") or text.startswith("https://") else ""
|
|
147
|
+
if isinstance(value, list):
|
|
148
|
+
for item in value:
|
|
149
|
+
url = _extract_first_url(item)
|
|
150
|
+
if url:
|
|
151
|
+
return url
|
|
152
|
+
return ""
|
|
153
|
+
if isinstance(value, dict):
|
|
154
|
+
for key in ("url_list", "url", "src", "avatar_url", "cover_url", "image", "images", "default"):
|
|
155
|
+
if key in value:
|
|
156
|
+
url = _extract_first_url(value.get(key))
|
|
157
|
+
if url:
|
|
158
|
+
return url
|
|
159
|
+
for nested in value.values():
|
|
160
|
+
url = _extract_first_url(nested)
|
|
161
|
+
if url:
|
|
162
|
+
return url
|
|
163
|
+
return ""
|
|
164
|
+
return ""
|
|
165
|
+
|
|
166
|
+
|
|
143
167
|
def _build_field_completeness(fields: Dict[str, bool], *, core_keys: List[str]) -> Dict[str, Any]:
|
|
144
168
|
filled_count = sum(1 for value in fields.values() if value)
|
|
145
169
|
missing_core = [key for key in core_keys if not fields.get(key)]
|
|
@@ -155,31 +179,149 @@ def _build_field_completeness(fields: Dict[str, bool], *, core_keys: List[str])
|
|
|
155
179
|
|
|
156
180
|
def _xhs_profile_field_completeness(payload: Any, resolved_author_id: str) -> Dict[str, Any]:
|
|
157
181
|
fields = {
|
|
158
|
-
"
|
|
182
|
+
"platform_author_id": bool(_pick_text(payload, ["user_id", "userid", "uid", "id"]) or resolved_author_id),
|
|
159
183
|
"nickname": bool(_pick_text(payload, ["nickname", "name"])),
|
|
160
|
-
"
|
|
161
|
-
"
|
|
184
|
+
"avatar_url": bool(_extract_first_url(_first_url_candidate(payload, ["image", "avatar", "avatar_url", "images"]))),
|
|
185
|
+
"fans_count": _pick_int(payload, ["fans", "fans_count", "follower_count"], default=0) > 0,
|
|
162
186
|
"works_count": _pick_int(payload, ["notes", "note_count", "works_count"], default=0) > 0,
|
|
163
187
|
}
|
|
164
|
-
return _build_field_completeness(fields, core_keys=["
|
|
188
|
+
return _build_field_completeness(fields, core_keys=["platform_author_id", "nickname"])
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _first_url_candidate(payload: Any, keys: List[str]) -> Any:
|
|
192
|
+
for key in keys:
|
|
193
|
+
for value in deep_find_all(payload, [key]):
|
|
194
|
+
url = _extract_first_url(value)
|
|
195
|
+
if url:
|
|
196
|
+
return value
|
|
197
|
+
return ""
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _pick_first_mapping(items: List[Any]) -> Dict[str, Any]:
|
|
201
|
+
for item in items:
|
|
202
|
+
if isinstance(item, dict):
|
|
203
|
+
return item
|
|
204
|
+
return {}
|
|
165
205
|
|
|
166
206
|
|
|
167
207
|
def _xhs_posts_field_completeness(payload: Any) -> Dict[str, Any]:
|
|
168
208
|
page_items = _pick_list(payload, ["notes", "note_list", "noteList", "items", "list"])
|
|
209
|
+
first_item = _pick_first_mapping(page_items)
|
|
169
210
|
has_more_flag = _pick_int(payload, ["has_more", "hasMore"], default=-1) >= 0
|
|
170
211
|
cursor_hit = bool(_pick_text(payload, ["cursor", "next_cursor", "last_cursor", "last_note_id"]))
|
|
212
|
+
cover_hit = bool(_extract_first_url(_first_url_candidate(first_item, ["cover", "cover_url", "cover_image", "image", "image_url"])))
|
|
213
|
+
share_or_source = bool(_pick_text(first_item, ["share_url", "share_link", "url", "note_url"])) or bool(_pick_text(first_item, ["note_id", "id", "item_id"]))
|
|
214
|
+
interaction_values = [
|
|
215
|
+
_pick_int(first_item, ["liked_count", "like_count", "digg_count"], default=-1),
|
|
216
|
+
_pick_int(first_item, ["comment_count"], default=-1),
|
|
217
|
+
_pick_int(first_item, ["collected_count", "collect_count"], default=-1),
|
|
218
|
+
_pick_int(first_item, ["share_count"], default=-1),
|
|
219
|
+
_pick_int(first_item, ["view_count", "play_count"], default=-1),
|
|
220
|
+
]
|
|
171
221
|
fields = {
|
|
172
222
|
"items": len(page_items) > 0,
|
|
173
|
-
"
|
|
223
|
+
"platform_work_id": bool(_pick_text(first_item, ["note_id", "id", "item_id"])),
|
|
224
|
+
"title_or_caption": bool(_pick_text(first_item, ["title", "display_title", "desc", "content"])),
|
|
225
|
+
"published_date": bool(_pick_text(first_item, ["publish_time", "time", "create_time"])),
|
|
226
|
+
"base_link_fields": cover_hit or share_or_source,
|
|
227
|
+
"interaction_fields": any(value >= 0 for value in interaction_values),
|
|
174
228
|
"cursor": cursor_hit,
|
|
175
229
|
"has_more_flag": has_more_flag,
|
|
176
|
-
"response_shape": len(page_items) > 0 or
|
|
230
|
+
"response_shape": len(page_items) > 0 or cursor_hit or has_more_flag,
|
|
177
231
|
}
|
|
178
|
-
return _build_field_completeness(fields, core_keys=["
|
|
232
|
+
return _build_field_completeness(fields, core_keys=["items", "platform_work_id", "title_or_caption", "published_date"])
|
|
179
233
|
|
|
180
234
|
|
|
181
|
-
def
|
|
182
|
-
|
|
235
|
+
def _xhs_route_failure_reason(response: Dict[str, Any]) -> str:
|
|
236
|
+
if response.get("timeout_retry_exhausted"):
|
|
237
|
+
return "primary_timeout_retry_exhausted"
|
|
238
|
+
if response.get("error_reason"):
|
|
239
|
+
return "primary_non_timeout_failure"
|
|
240
|
+
return "primary_unknown_failure"
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _xhs_profile_accept_decision(response: Dict[str, Any], completeness: Dict[str, Any]) -> Dict[str, Any]:
|
|
244
|
+
if not response.get("ok"):
|
|
245
|
+
return {
|
|
246
|
+
"accepted": False,
|
|
247
|
+
"accept_reason": "response_not_ok",
|
|
248
|
+
"fallback_reason": _xhs_route_failure_reason(response),
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
missing_core = list(completeness.get("missing_core") or [])
|
|
252
|
+
if missing_core:
|
|
253
|
+
return {
|
|
254
|
+
"accepted": False,
|
|
255
|
+
"accept_reason": "profile_missing_core_fields",
|
|
256
|
+
"fallback_reason": f"profile_missing_core:{','.join(missing_core)}",
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
fields = completeness.get("fields") if isinstance(completeness.get("fields"), dict) else {}
|
|
260
|
+
optional_missing = [
|
|
261
|
+
field_name
|
|
262
|
+
for field_name in ("avatar_url", "fans_count", "works_count")
|
|
263
|
+
if not fields.get(field_name)
|
|
264
|
+
]
|
|
265
|
+
accept_reason = "profile_core_fields_ready"
|
|
266
|
+
if optional_missing:
|
|
267
|
+
accept_reason = f"profile_core_fields_ready_optional_missing:{','.join(optional_missing)}"
|
|
268
|
+
return {
|
|
269
|
+
"accepted": True,
|
|
270
|
+
"accept_reason": accept_reason,
|
|
271
|
+
"fallback_reason": "",
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _xhs_posts_accept_decision(response: Dict[str, Any], completeness: Dict[str, Any]) -> Dict[str, Any]:
|
|
276
|
+
if not response.get("ok"):
|
|
277
|
+
return {
|
|
278
|
+
"accepted": False,
|
|
279
|
+
"accept_reason": "response_not_ok",
|
|
280
|
+
"fallback_reason": _xhs_route_failure_reason(response),
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
missing_core = list(completeness.get("missing_core") or [])
|
|
284
|
+
if missing_core:
|
|
285
|
+
return {
|
|
286
|
+
"accepted": False,
|
|
287
|
+
"accept_reason": "posts_missing_core_fields",
|
|
288
|
+
"fallback_reason": f"posts_missing_core:{','.join(missing_core)}",
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
fields = completeness.get("fields") if isinstance(completeness.get("fields"), dict) else {}
|
|
292
|
+
if not fields.get("base_link_fields"):
|
|
293
|
+
return {
|
|
294
|
+
"accepted": False,
|
|
295
|
+
"accept_reason": "posts_missing_base_link_fields",
|
|
296
|
+
"fallback_reason": "posts_missing_base_link_fields",
|
|
297
|
+
}
|
|
298
|
+
if not fields.get("interaction_fields"):
|
|
299
|
+
return {
|
|
300
|
+
"accepted": False,
|
|
301
|
+
"accept_reason": "posts_missing_interaction_fields",
|
|
302
|
+
"fallback_reason": "posts_missing_interaction_fields",
|
|
303
|
+
}
|
|
304
|
+
return {
|
|
305
|
+
"accepted": True,
|
|
306
|
+
"accept_reason": "posts_contract_fields_ready",
|
|
307
|
+
"fallback_reason": "",
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _xhs_route_plan(kind: str) -> List[Tuple[str, str, str]]:
|
|
312
|
+
if kind == "profile":
|
|
313
|
+
return [
|
|
314
|
+
("xhs.profile.app_v2", "/api/u1/v1/xiaohongshu/app_v2/get_user_info", "app_v2"),
|
|
315
|
+
("xhs.profile.app", "/api/u1/v1/xiaohongshu/app/get_user_info", "app"),
|
|
316
|
+
("xhs.profile.web_v2", "/api/u1/v1/xiaohongshu/web_v2/fetch_user_info_app", "web_v2"),
|
|
317
|
+
]
|
|
318
|
+
if kind == "posts":
|
|
319
|
+
return [
|
|
320
|
+
("xhs.posts.app_v2", "/api/u1/v1/xiaohongshu/app_v2/get_user_posted_notes", "app_v2"),
|
|
321
|
+
("xhs.posts.app", "/api/u1/v1/xiaohongshu/app/get_user_notes", "app"),
|
|
322
|
+
("xhs.posts.web_v2", "/api/u1/v1/xiaohongshu/web_v2/fetch_home_notes_app", "web_v2"),
|
|
323
|
+
]
|
|
324
|
+
raise ValueError(f"unsupported_xhs_route_kind:{kind}")
|
|
183
325
|
|
|
184
326
|
|
|
185
327
|
def _call_xhs_route(
|
|
@@ -459,13 +601,10 @@ def collect_xhs_author_home_raw(
|
|
|
459
601
|
if not xsec_token:
|
|
460
602
|
xsec_token = _pick_text(data, ["xsec_token", "xsecToken"])
|
|
461
603
|
|
|
462
|
-
profile_routes =
|
|
463
|
-
("xhs.profile.primary", "/api/u1/v1/xiaohongshu/app_v2/get_user_info", "app_v2_primary"),
|
|
464
|
-
("xhs.profile.secondary", "/api/u1/v1/xiaohongshu/web_v2/fetch_user_info_app", "web_v2_secondary"),
|
|
465
|
-
("xhs.profile.fallback", "/api/u1/v1/xiaohongshu/app/get_user_info", "app_v1_fallback"),
|
|
466
|
-
]
|
|
604
|
+
profile_routes = _xhs_route_plan("profile")
|
|
467
605
|
profile_resp: Dict[str, Any] = {}
|
|
468
606
|
profile_reason: Optional[str] = None
|
|
607
|
+
profile_attempts: List[Dict[str, Any]] = []
|
|
469
608
|
for step_name, path, route_label in profile_routes:
|
|
470
609
|
profile_resp = _call_xhs_route(
|
|
471
610
|
base_url=base_url,
|
|
@@ -477,6 +616,18 @@ def collect_xhs_author_home_raw(
|
|
|
477
616
|
fallback_reason=profile_reason,
|
|
478
617
|
completeness_builder=lambda data, resolved_author_id=user_id: _xhs_profile_field_completeness(data, resolved_author_id),
|
|
479
618
|
)
|
|
619
|
+
profile_decision = _xhs_profile_accept_decision(profile_resp, profile_resp.get("_field_completeness") or {})
|
|
620
|
+
profile_attempts.append(
|
|
621
|
+
{
|
|
622
|
+
"route_label": route_label,
|
|
623
|
+
"endpoint": path,
|
|
624
|
+
"accepted": bool(profile_decision.get("accepted")),
|
|
625
|
+
"accept_reason": profile_decision.get("accept_reason"),
|
|
626
|
+
"fallback_reason": profile_decision.get("fallback_reason"),
|
|
627
|
+
"field_completeness": profile_resp.get("_field_completeness"),
|
|
628
|
+
"request_id": profile_resp.get("request_id"),
|
|
629
|
+
}
|
|
630
|
+
)
|
|
480
631
|
trace.append(
|
|
481
632
|
build_api_trace(
|
|
482
633
|
step=step_name,
|
|
@@ -485,17 +636,17 @@ def collect_xhs_author_home_raw(
|
|
|
485
636
|
extra={
|
|
486
637
|
"route_label": route_label,
|
|
487
638
|
"field_completeness": profile_resp.get("_field_completeness"),
|
|
639
|
+
"accept_reason": profile_decision.get("accept_reason"),
|
|
640
|
+
"route_accepted": bool(profile_decision.get("accepted")),
|
|
488
641
|
},
|
|
489
642
|
)
|
|
490
643
|
)
|
|
491
644
|
request_id_candidates.append(profile_resp)
|
|
492
|
-
if
|
|
645
|
+
if profile_decision.get("accepted"):
|
|
646
|
+
profile_resp["_accept_reason"] = profile_decision.get("accept_reason")
|
|
493
647
|
break
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
profile_reason = "field_completeness_below_threshold"
|
|
497
|
-
else:
|
|
498
|
-
profile_reason = "primary_timeout_retry_exhausted" if profile_resp.get("timeout_retry_exhausted") else "primary_non_timeout_failure"
|
|
648
|
+
profile_reason = str(profile_decision.get("fallback_reason") or "field_completeness_below_threshold")
|
|
649
|
+
profile_resp["fallback_trigger_reason"] = profile_reason
|
|
499
650
|
|
|
500
651
|
trace.append(
|
|
501
652
|
{
|
|
@@ -503,6 +654,9 @@ def collect_xhs_author_home_raw(
|
|
|
503
654
|
"chosen_route": profile_resp.get("_route_label"),
|
|
504
655
|
"request_id": profile_resp.get("request_id"),
|
|
505
656
|
"field_completeness": profile_resp.get("_field_completeness"),
|
|
657
|
+
"accept_reason": profile_resp.get("_accept_reason"),
|
|
658
|
+
"fallback_reason": profile_reason,
|
|
659
|
+
"attempted_routes": profile_attempts,
|
|
506
660
|
}
|
|
507
661
|
)
|
|
508
662
|
|
|
@@ -521,13 +675,10 @@ def collect_xhs_author_home_raw(
|
|
|
521
675
|
message="xiaohongshu pagination page requested",
|
|
522
676
|
data={"page": page, "cursor_in": cursor},
|
|
523
677
|
)
|
|
524
|
-
posts_routes =
|
|
525
|
-
("xhs.posts.primary", "/api/u1/v1/xiaohongshu/app_v2/get_user_posted_notes", "app_v2_primary"),
|
|
526
|
-
("xhs.posts.secondary", "/api/u1/v1/xiaohongshu/web_v2/fetch_home_notes_app", "web_v2_secondary"),
|
|
527
|
-
("xhs.posts.fallback", "/api/u1/v1/xiaohongshu/app/get_user_notes", "app_v1_fallback"),
|
|
528
|
-
]
|
|
678
|
+
posts_routes = _xhs_route_plan("posts")
|
|
529
679
|
posts_resp: Dict[str, Any] = {}
|
|
530
680
|
posts_reason: Optional[str] = None
|
|
681
|
+
posts_attempts: List[Dict[str, Any]] = []
|
|
531
682
|
for step_name, path, route_label in posts_routes:
|
|
532
683
|
posts_resp = _call_xhs_route(
|
|
533
684
|
base_url=base_url,
|
|
@@ -545,6 +696,18 @@ def collect_xhs_author_home_raw(
|
|
|
545
696
|
fallback_reason=posts_reason,
|
|
546
697
|
completeness_builder=_xhs_posts_field_completeness,
|
|
547
698
|
)
|
|
699
|
+
posts_decision = _xhs_posts_accept_decision(posts_resp, posts_resp.get("_field_completeness") or {})
|
|
700
|
+
posts_attempts.append(
|
|
701
|
+
{
|
|
702
|
+
"route_label": route_label,
|
|
703
|
+
"endpoint": path,
|
|
704
|
+
"accepted": bool(posts_decision.get("accepted")),
|
|
705
|
+
"accept_reason": posts_decision.get("accept_reason"),
|
|
706
|
+
"fallback_reason": posts_decision.get("fallback_reason"),
|
|
707
|
+
"field_completeness": posts_resp.get("_field_completeness"),
|
|
708
|
+
"request_id": posts_resp.get("request_id"),
|
|
709
|
+
}
|
|
710
|
+
)
|
|
548
711
|
trace.append(
|
|
549
712
|
build_api_trace(
|
|
550
713
|
step=step_name,
|
|
@@ -555,17 +718,17 @@ def collect_xhs_author_home_raw(
|
|
|
555
718
|
"cursor": cursor,
|
|
556
719
|
"route_label": route_label,
|
|
557
720
|
"field_completeness": posts_resp.get("_field_completeness"),
|
|
721
|
+
"accept_reason": posts_decision.get("accept_reason"),
|
|
722
|
+
"route_accepted": bool(posts_decision.get("accepted")),
|
|
558
723
|
},
|
|
559
724
|
)
|
|
560
725
|
)
|
|
561
726
|
request_id_candidates.append(posts_resp)
|
|
562
|
-
if
|
|
727
|
+
if posts_decision.get("accepted"):
|
|
728
|
+
posts_resp["_accept_reason"] = posts_decision.get("accept_reason")
|
|
563
729
|
break
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
posts_reason = "field_completeness_below_threshold"
|
|
567
|
-
else:
|
|
568
|
-
posts_reason = "primary_timeout_retry_exhausted" if posts_resp.get("timeout_retry_exhausted") else "primary_non_timeout_failure"
|
|
730
|
+
posts_reason = str(posts_decision.get("fallback_reason") or "field_completeness_below_threshold")
|
|
731
|
+
posts_resp["fallback_trigger_reason"] = posts_reason
|
|
569
732
|
|
|
570
733
|
trace.append(
|
|
571
734
|
{
|
|
@@ -575,6 +738,9 @@ def collect_xhs_author_home_raw(
|
|
|
575
738
|
"chosen_route": posts_resp.get("_route_label"),
|
|
576
739
|
"request_id": posts_resp.get("request_id"),
|
|
577
740
|
"field_completeness": posts_resp.get("_field_completeness"),
|
|
741
|
+
"accept_reason": posts_resp.get("_accept_reason"),
|
|
742
|
+
"fallback_reason": posts_reason,
|
|
743
|
+
"attempted_routes": posts_attempts,
|
|
578
744
|
}
|
|
579
745
|
)
|
|
580
746
|
|