@tikomni/skills 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/social-media-crawl/scripts/pipelines/douyin_metadata.py +151 -0
- package/skills/social-media-crawl/scripts/pipelines/home_asr.py +40 -37
- package/skills/social-media-crawl/scripts/pipelines/homepage_collectors.py +5 -11
- package/skills/social-media-crawl/scripts/pipelines/input_contracts.py +318 -0
- package/skills/social-media-crawl/scripts/pipelines/media_url_rules.py +86 -0
- package/skills/social-media-crawl/scripts/pipelines/platform_adapters.py +77 -30
- package/skills/social-media-crawl/scripts/pipelines/run_douyin_creator_home.py +84 -6
- package/skills/social-media-crawl/scripts/pipelines/run_douyin_single_work.py +79 -73
- package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_creator_home.py +84 -6
- package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_single_work.py +86 -60
- package/skills/social-media-crawl/scripts/writers/write_work_fact_card.py +5 -3
|
@@ -28,9 +28,14 @@ from scripts.core.progress_report import build_progress_reporter
|
|
|
28
28
|
from scripts.core.storage_router import resolve_author_directory_name
|
|
29
29
|
from scripts.core.tikomni_common import resolve_runtime, write_json_stdout
|
|
30
30
|
from scripts.pipelines.home_asr import enrich_author_home_asr
|
|
31
|
+
from scripts.pipelines.input_contracts import normalize_xhs_creator_input
|
|
32
|
+
from scripts.pipelines.schema import build_author_profile
|
|
31
33
|
from scripts.pipelines.xiaohongshu_creator_home_helpers import collect_and_adapt
|
|
32
34
|
from scripts.writers.write_work_fact_card import build_work_fact_card, persist_output_envelope, write_work_fact_card
|
|
33
35
|
|
|
36
|
+
DEFAULT_MAX_ITEMS = 200
|
|
37
|
+
MAX_ITEMS_HARD_LIMIT = 200
|
|
38
|
+
|
|
34
39
|
|
|
35
40
|
def _write_collection_artifacts(
|
|
36
41
|
*,
|
|
@@ -81,11 +86,12 @@ def run_xiaohongshu_creator_home(
|
|
|
81
86
|
*,
|
|
82
87
|
input_value: str,
|
|
83
88
|
config: Dict[str, Any],
|
|
84
|
-
runtime: Dict[str, Any],
|
|
89
|
+
runtime: Dict[str, Any] | None,
|
|
85
90
|
max_items: int,
|
|
86
91
|
write_card: bool,
|
|
87
92
|
persist_output: bool,
|
|
88
93
|
) -> Dict[str, Any]:
|
|
94
|
+
bounded_max_items = max(1, min(int(max_items), MAX_ITEMS_HARD_LIMIT))
|
|
89
95
|
progress = build_progress_reporter(
|
|
90
96
|
workflow="social-media-crawl",
|
|
91
97
|
platform="xiaohongshu",
|
|
@@ -94,15 +100,69 @@ def run_xiaohongshu_creator_home(
|
|
|
94
100
|
scope="workflow",
|
|
95
101
|
)
|
|
96
102
|
progress.started(stage="author_home.workflow", message="xiaohongshu author_home workflow started")
|
|
103
|
+
preflight = normalize_xhs_creator_input(input_value)
|
|
104
|
+
normalized_input_value = str(preflight.get("input_value") or "")
|
|
105
|
+
if preflight.get("error_reason"):
|
|
106
|
+
request_id = ensure_request_id(None, fallback_seed=input_value)
|
|
107
|
+
empty_profile = build_author_profile(platform="xiaohongshu", request_id=request_id)
|
|
108
|
+
extract_trace = [
|
|
109
|
+
{
|
|
110
|
+
"step": "input.preflight",
|
|
111
|
+
"ok": False,
|
|
112
|
+
"input_kind": "creator_url_or_user_id",
|
|
113
|
+
"normalized_input_value": normalized_input_value or None,
|
|
114
|
+
"error_reason": preflight.get("error_reason"),
|
|
115
|
+
"missing_fields": list(preflight.get("missing_fields") or []),
|
|
116
|
+
}
|
|
117
|
+
]
|
|
118
|
+
envelope = {
|
|
119
|
+
"object_type": "creator",
|
|
120
|
+
"platform": "xiaohongshu",
|
|
121
|
+
"input": input_value,
|
|
122
|
+
"normalized": {
|
|
123
|
+
"creator_profile": {**empty_profile, "request_id": request_id, "extract_trace": extract_trace},
|
|
124
|
+
"work_collection": {
|
|
125
|
+
"platform": "xiaohongshu",
|
|
126
|
+
"platform_author_id": "",
|
|
127
|
+
"count": 0,
|
|
128
|
+
"items": [],
|
|
129
|
+
"request_id": request_id,
|
|
130
|
+
"extract_trace": extract_trace,
|
|
131
|
+
},
|
|
132
|
+
},
|
|
133
|
+
"completeness": evaluate_collection(empty_profile, []),
|
|
134
|
+
"missing_fields": normalize_missing_fields(preflight.get("missing_fields")),
|
|
135
|
+
"error_reason": str(preflight.get("error_reason") or "invalid_creator_input"),
|
|
136
|
+
"extract_trace": extract_trace,
|
|
137
|
+
"request_id": request_id,
|
|
138
|
+
"card_write": {
|
|
139
|
+
"enabled": bool(write_card),
|
|
140
|
+
"ok": False,
|
|
141
|
+
"count": 0,
|
|
142
|
+
"results": [],
|
|
143
|
+
"reason": "skipped_invalid_input",
|
|
144
|
+
},
|
|
145
|
+
"collection_artifacts": {},
|
|
146
|
+
"output_persist": {"enabled": False, "skipped": True, "reason": "invalid_input"},
|
|
147
|
+
}
|
|
148
|
+
progress.done(
|
|
149
|
+
stage="author_home.workflow",
|
|
150
|
+
message="xiaohongshu author_home workflow finished",
|
|
151
|
+
data={"request_id": request_id, "works_count": 0, "error_reason": envelope["error_reason"]},
|
|
152
|
+
)
|
|
153
|
+
return envelope
|
|
154
|
+
|
|
155
|
+
if runtime is None:
|
|
156
|
+
raise ValueError("runtime_required_for_valid_input")
|
|
97
157
|
|
|
98
158
|
raw, profile, works, missing = collect_and_adapt(
|
|
99
|
-
input_value=input_value,
|
|
159
|
+
input_value=normalized_input_value or input_value,
|
|
100
160
|
base_url=runtime["base_url"],
|
|
101
161
|
token=runtime["token"],
|
|
102
162
|
timeout_ms=runtime["timeout_ms"],
|
|
103
163
|
page_size=20,
|
|
104
164
|
pages_max=50,
|
|
105
|
-
max_items=
|
|
165
|
+
max_items=bounded_max_items,
|
|
106
166
|
progress=progress.child(scope="author_home.collect"),
|
|
107
167
|
)
|
|
108
168
|
|
|
@@ -138,7 +198,7 @@ def run_xiaohongshu_creator_home(
|
|
|
138
198
|
|
|
139
199
|
request_id = ensure_request_id(
|
|
140
200
|
raw.get("request_id") or profile.get("request_id"),
|
|
141
|
-
fallback_seed=input_value,
|
|
201
|
+
fallback_seed=normalized_input_value or input_value,
|
|
142
202
|
)
|
|
143
203
|
extract_trace = list(raw.get("extract_trace") or []) + list(asr_bundle.get("trace") or [])
|
|
144
204
|
|
|
@@ -206,7 +266,12 @@ def main() -> None:
|
|
|
206
266
|
parser.add_argument("--allow-process-env", action="store_true", help="Allow process env overrides")
|
|
207
267
|
parser.add_argument("--base-url", default=None, help="Override Tikomni base URL")
|
|
208
268
|
parser.add_argument("--timeout-ms", type=int, default=None, help="Override timeout in ms")
|
|
209
|
-
parser.add_argument(
|
|
269
|
+
parser.add_argument(
|
|
270
|
+
"--max-items",
|
|
271
|
+
type=int,
|
|
272
|
+
default=DEFAULT_MAX_ITEMS,
|
|
273
|
+
help=f"Max works to collect from homepage (default full crawl, capped at {MAX_ITEMS_HARD_LIMIT})",
|
|
274
|
+
)
|
|
210
275
|
parser.set_defaults(write_card=True, persist_output=True)
|
|
211
276
|
parser.add_argument("--write-card", dest="write_card", action="store_true", help="Write work fact cards")
|
|
212
277
|
parser.add_argument("--no-write-card", dest="write_card", action="store_false", help="Skip card writing")
|
|
@@ -215,6 +280,19 @@ def main() -> None:
|
|
|
215
280
|
args = parser.parse_args()
|
|
216
281
|
|
|
217
282
|
config, _ = load_tikomni_config(args.config, env_file=args.env_file, allow_process_env=args.allow_process_env)
|
|
283
|
+
preflight = normalize_xhs_creator_input(args.input)
|
|
284
|
+
if preflight.get("error_reason"):
|
|
285
|
+
write_json_stdout(
|
|
286
|
+
run_xiaohongshu_creator_home(
|
|
287
|
+
input_value=args.input,
|
|
288
|
+
config=config,
|
|
289
|
+
runtime=None,
|
|
290
|
+
max_items=int(args.max_items),
|
|
291
|
+
write_card=bool(args.write_card),
|
|
292
|
+
persist_output=bool(args.persist_output),
|
|
293
|
+
)
|
|
294
|
+
)
|
|
295
|
+
return
|
|
218
296
|
runtime = resolve_runtime(
|
|
219
297
|
env_file=args.env_file,
|
|
220
298
|
api_key_env=str(config_get(config, "runtime.auth_env_key", "TIKOMNI_API_KEY")),
|
|
@@ -224,7 +302,7 @@ def main() -> None:
|
|
|
224
302
|
)
|
|
225
303
|
write_json_stdout(
|
|
226
304
|
run_xiaohongshu_creator_home(
|
|
227
|
-
input_value=args.input,
|
|
305
|
+
input_value=str(preflight.get("input_value") or args.input),
|
|
228
306
|
config=config,
|
|
229
307
|
runtime=runtime,
|
|
230
308
|
max_items=int(args.max_items),
|
|
@@ -40,6 +40,12 @@ from scripts.core.tikomni_common import (
|
|
|
40
40
|
summarize_content,
|
|
41
41
|
write_json_stdout,
|
|
42
42
|
)
|
|
43
|
+
from scripts.pipelines.input_contracts import (
|
|
44
|
+
extract_xhs_note_id as extract_shared_xhs_note_id,
|
|
45
|
+
normalize_xhs_note_input,
|
|
46
|
+
text_has_xhs_short_link,
|
|
47
|
+
)
|
|
48
|
+
from scripts.pipelines.media_url_rules import filter_video_urls, is_probable_video_url
|
|
43
49
|
from scripts.writers.write_work_fact_card import (
|
|
44
50
|
build_work_output_envelope,
|
|
45
51
|
persist_output_envelope,
|
|
@@ -194,36 +200,15 @@ def _finalize_result(
|
|
|
194
200
|
|
|
195
201
|
|
|
196
202
|
def _normalize_input(input_value: Optional[str], share_text: Optional[str], note_id: Optional[str]) -> Dict[str, Optional[str]]:
|
|
197
|
-
|
|
198
|
-
normalized_note_id = normalize_text(note_id) or None
|
|
199
|
-
|
|
200
|
-
if input_value and not normalized_share and not normalized_note_id:
|
|
201
|
-
candidate = input_value.strip()
|
|
202
|
-
if candidate.startswith("http://") or candidate.startswith("https://"):
|
|
203
|
-
normalized_share = candidate
|
|
204
|
-
else:
|
|
205
|
-
normalized_note_id = candidate
|
|
206
|
-
|
|
203
|
+
normalized = normalize_xhs_note_input(input_value, share_text, note_id)
|
|
207
204
|
return {
|
|
208
|
-
"share_text":
|
|
209
|
-
"note_id":
|
|
205
|
+
"share_text": normalize_text(normalized.get("share_text")) or None,
|
|
206
|
+
"note_id": normalize_text(normalized.get("note_id")) or None,
|
|
210
207
|
}
|
|
211
208
|
|
|
212
209
|
|
|
213
210
|
def _extract_note_id_from_share(share_text: Optional[str]) -> Optional[str]:
|
|
214
|
-
|
|
215
|
-
return None
|
|
216
|
-
text = share_text.strip()
|
|
217
|
-
patterns = [
|
|
218
|
-
r"/explore/([0-9a-zA-Z]+)",
|
|
219
|
-
r"/discovery/item/([0-9a-zA-Z]+)",
|
|
220
|
-
r"note_id=([0-9a-zA-Z]+)",
|
|
221
|
-
]
|
|
222
|
-
for pattern in patterns:
|
|
223
|
-
match = re.search(pattern, text)
|
|
224
|
-
if match:
|
|
225
|
-
return match.group(1)
|
|
226
|
-
return None
|
|
211
|
+
return extract_shared_xhs_note_id(share_text)
|
|
227
212
|
|
|
228
213
|
|
|
229
214
|
def _resolve_note_id(payload: Any, source_input: Dict[str, Optional[str]]) -> Optional[str]:
|
|
@@ -256,13 +241,7 @@ def _resolve_note_id(payload: Any, source_input: Dict[str, Optional[str]]) -> Op
|
|
|
256
241
|
|
|
257
242
|
|
|
258
243
|
def _is_short_share_url(share_text: Optional[str]) -> bool:
|
|
259
|
-
|
|
260
|
-
return False
|
|
261
|
-
try:
|
|
262
|
-
host = urllib.parse.urlparse(share_text).netloc.lower()
|
|
263
|
-
except Exception:
|
|
264
|
-
return False
|
|
265
|
-
return "xhslink.com" in host
|
|
244
|
+
return text_has_xhs_short_link(share_text)
|
|
266
245
|
|
|
267
246
|
|
|
268
247
|
def _app_response_has_core_fields(response_data: Any) -> bool:
|
|
@@ -609,17 +588,19 @@ def _extract_xhs_metadata(
|
|
|
609
588
|
if not cover_image and selected_image_urls:
|
|
610
589
|
cover_image = selected_image_urls[0]
|
|
611
590
|
|
|
612
|
-
|
|
613
|
-
payload,
|
|
614
|
-
[
|
|
591
|
+
video_down_url_candidates = [
|
|
592
|
+
_pick_text_from_paths(payload, [path])
|
|
593
|
+
for path in [
|
|
615
594
|
["video_down_url"],
|
|
616
595
|
["original_video_url"],
|
|
617
596
|
["video_url"],
|
|
618
597
|
["play_url"],
|
|
619
598
|
["master_url"],
|
|
620
599
|
["selected_video_url"],
|
|
621
|
-
]
|
|
622
|
-
|
|
600
|
+
]
|
|
601
|
+
]
|
|
602
|
+
filtered_video_down_urls = filter_video_urls(video_down_url_candidates)
|
|
603
|
+
video_down_url = filtered_video_down_urls[0] if filtered_video_down_urls else ""
|
|
623
604
|
if not video_down_url:
|
|
624
605
|
video_down_url = normalize_text(selected_video_url)
|
|
625
606
|
|
|
@@ -1091,24 +1072,9 @@ def _url_likely_image(url: str) -> bool:
|
|
|
1091
1072
|
|
|
1092
1073
|
|
|
1093
1074
|
def _url_likely_video(url: str) -> bool:
|
|
1094
|
-
lower = url.lower()
|
|
1095
|
-
video_tokens = [
|
|
1096
|
-
".mp4",
|
|
1097
|
-
".m3u8",
|
|
1098
|
-
".m4a",
|
|
1099
|
-
".mp3",
|
|
1100
|
-
"video",
|
|
1101
|
-
"play",
|
|
1102
|
-
"stream",
|
|
1103
|
-
"master",
|
|
1104
|
-
"sns-video",
|
|
1105
|
-
"redvideo",
|
|
1106
|
-
"vod",
|
|
1107
|
-
"/audio/",
|
|
1108
|
-
]
|
|
1109
1075
|
if _url_likely_image(url):
|
|
1110
1076
|
return False
|
|
1111
|
-
return
|
|
1077
|
+
return is_probable_video_url(url)
|
|
1112
1078
|
|
|
1113
1079
|
|
|
1114
1080
|
def _video_quality_hint(url: str) -> int:
|
|
@@ -1174,7 +1140,7 @@ def _extract_video_candidates(payload: Any) -> List[str]:
|
|
|
1174
1140
|
unique.append(url)
|
|
1175
1141
|
seen.add(url)
|
|
1176
1142
|
|
|
1177
|
-
video_only = [u for u in unique if _url_likely_video(u)]
|
|
1143
|
+
video_only = filter_video_urls([u for u in unique if _url_likely_video(u)])
|
|
1178
1144
|
if not video_only:
|
|
1179
1145
|
return []
|
|
1180
1146
|
|
|
@@ -1299,10 +1265,7 @@ def _detect_note_content_type(payload: Any, video_candidates: List[str], image_c
|
|
|
1299
1265
|
if "image" in note_type_value:
|
|
1300
1266
|
return "image"
|
|
1301
1267
|
|
|
1302
|
-
|
|
1303
|
-
has_note_audio = bool(note_sound_url and any(token in note_sound_url for token in [".m4a", ".mp3", "/audio/"]))
|
|
1304
|
-
|
|
1305
|
-
has_video = bool(video_candidates) or has_note_audio
|
|
1268
|
+
has_video = bool(video_candidates)
|
|
1306
1269
|
has_image = bool(image_candidates)
|
|
1307
1270
|
if has_video and has_image:
|
|
1308
1271
|
return "mixed"
|
|
@@ -1494,7 +1457,11 @@ def run_xiaohongshu_extract(
|
|
|
1494
1457
|
workflow_started_at = time.perf_counter()
|
|
1495
1458
|
timings = _empty_timings()
|
|
1496
1459
|
parse_started_at = time.perf_counter()
|
|
1497
|
-
|
|
1460
|
+
preflight = normalize_xhs_note_input(input_value, share_text, note_id)
|
|
1461
|
+
source_input = {
|
|
1462
|
+
"share_text": normalize_text(preflight.get("share_text")) or None,
|
|
1463
|
+
"note_id": normalize_text(preflight.get("note_id")) or None,
|
|
1464
|
+
}
|
|
1498
1465
|
timings["url_parse_ms"] = _elapsed_ms(parse_started_at)
|
|
1499
1466
|
if progress is not None:
|
|
1500
1467
|
progress.started(
|
|
@@ -1503,13 +1470,72 @@ def run_xiaohongshu_extract(
|
|
|
1503
1470
|
data={"analysis_mode": analysis_mode, "write_card": bool(write_card), "persist_output": bool(persist_output)},
|
|
1504
1471
|
)
|
|
1505
1472
|
metadata_fields: Dict[str, Any] = {}
|
|
1473
|
+
preflight_trace = [
|
|
1474
|
+
{
|
|
1475
|
+
"step": "input.preflight",
|
|
1476
|
+
"ok": preflight.get("error_reason") is None,
|
|
1477
|
+
"input_kind": "share_text_or_note_id",
|
|
1478
|
+
"normalized_share_text": source_input.get("share_text"),
|
|
1479
|
+
"normalized_note_id": source_input.get("note_id"),
|
|
1480
|
+
"error_reason": preflight.get("error_reason"),
|
|
1481
|
+
"missing_fields": list(preflight.get("missing_fields") or []),
|
|
1482
|
+
}
|
|
1483
|
+
]
|
|
1484
|
+
if preflight.get("error_reason"):
|
|
1485
|
+
result = _build_result(
|
|
1486
|
+
source_input=source_input,
|
|
1487
|
+
raw_content="",
|
|
1488
|
+
confidence="low",
|
|
1489
|
+
error_reason=str(preflight.get("error_reason") or "invalid_note_id"),
|
|
1490
|
+
extract_trace=preflight_trace,
|
|
1491
|
+
fallback_trace=[],
|
|
1492
|
+
request_id=None,
|
|
1493
|
+
text_source="none",
|
|
1494
|
+
note_id=None,
|
|
1495
|
+
subtitle_hit=False,
|
|
1496
|
+
u2_task_id=None,
|
|
1497
|
+
u2_task_status="UNKNOWN",
|
|
1498
|
+
note_content_type="unknown",
|
|
1499
|
+
analysis_mode=analysis_mode,
|
|
1500
|
+
selected_video_url=None,
|
|
1501
|
+
selected_video_candidates=[],
|
|
1502
|
+
selected_image_urls=[],
|
|
1503
|
+
downloaded_assets=[],
|
|
1504
|
+
missing_fields=list(preflight.get("missing_fields") or []),
|
|
1505
|
+
metadata_fields=metadata_fields,
|
|
1506
|
+
timings=timings,
|
|
1507
|
+
)
|
|
1508
|
+
if write_card:
|
|
1509
|
+
card_started_at = time.perf_counter()
|
|
1510
|
+
result["card_write"] = write_work_fact_card(
|
|
1511
|
+
payload=result,
|
|
1512
|
+
platform="xiaohongshu",
|
|
1513
|
+
card_type=card_type,
|
|
1514
|
+
card_root=card_root,
|
|
1515
|
+
content_kind="note",
|
|
1516
|
+
storage_config=storage_config,
|
|
1517
|
+
analysis_mode=analysis_mode,
|
|
1518
|
+
progress=progress.child(scope="card_write") if progress is not None else None,
|
|
1519
|
+
)
|
|
1520
|
+
timings["card_write_ms"] = _elapsed_ms(card_started_at)
|
|
1521
|
+
timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
|
|
1522
|
+
timings["total_ms"] = _elapsed_ms(workflow_started_at)
|
|
1523
|
+
result["timings"] = dict(timings)
|
|
1524
|
+
_update_pipeline_status(result)
|
|
1525
|
+
return _finalize_result(
|
|
1526
|
+
result=result,
|
|
1527
|
+
source_input=source_input,
|
|
1528
|
+
note_id=None,
|
|
1529
|
+
storage_config=storage_config,
|
|
1530
|
+
persist_output=persist_output,
|
|
1531
|
+
)
|
|
1506
1532
|
if not source_input["share_text"] and not source_input["note_id"]:
|
|
1507
1533
|
result = _build_result(
|
|
1508
1534
|
source_input=source_input,
|
|
1509
1535
|
raw_content="",
|
|
1510
1536
|
confidence="low",
|
|
1511
1537
|
error_reason="missing_share_text_or_note_id",
|
|
1512
|
-
extract_trace=
|
|
1538
|
+
extract_trace=preflight_trace,
|
|
1513
1539
|
fallback_trace=[],
|
|
1514
1540
|
request_id=None,
|
|
1515
1541
|
text_source="none",
|
|
@@ -215,10 +215,10 @@ def _resolve_primary_text(payload: Dict[str, Any], caption_raw: str) -> Dict[str
|
|
|
215
215
|
subtitle_raw = _safe_text(payload.get("subtitle_raw"))
|
|
216
216
|
asr_clean = _safe_text(payload.get("asr_clean"))
|
|
217
217
|
asr_raw = _safe_text(payload.get("asr_raw"))
|
|
218
|
-
if subtitle_raw:
|
|
219
|
-
return {"primary_text": subtitle_raw, "primary_text_source": "subtitle_raw"}
|
|
220
218
|
if asr_clean:
|
|
221
219
|
return {"primary_text": asr_clean, "primary_text_source": "asr_clean"}
|
|
220
|
+
if subtitle_raw:
|
|
221
|
+
return {"primary_text": subtitle_raw, "primary_text_source": "subtitle_raw"}
|
|
222
222
|
if asr_raw:
|
|
223
223
|
return {"primary_text": asr_raw, "primary_text_source": "asr_raw"}
|
|
224
224
|
if caption_raw:
|
|
@@ -356,7 +356,9 @@ def _markdown_lines(card: Dict[str, Any]) -> List[str]:
|
|
|
356
356
|
lines.extend(["", "## 主文本", primary_text or ""])
|
|
357
357
|
if caption_raw and caption_raw != primary_text:
|
|
358
358
|
lines.extend(["", "## 原始文案", caption_raw])
|
|
359
|
-
if subtitle_raw and subtitle_raw != primary_text:
|
|
359
|
+
if asr_raw and subtitle_raw and asr_raw == subtitle_raw and asr_raw != primary_text:
|
|
360
|
+
lines.extend(["", "## 原始转写", asr_raw])
|
|
361
|
+
elif subtitle_raw and subtitle_raw != primary_text:
|
|
360
362
|
lines.extend(["", "## 原始字幕", subtitle_raw])
|
|
361
363
|
if asr_raw and asr_raw not in {primary_text, subtitle_raw}:
|
|
362
364
|
lines.extend(["", "## 原始转写", asr_raw])
|