npm - yuanflow-cli - Versions diffs - 0.1.10 → 0.1.12 - Mend

yuanflow-cli 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/skills/yuanflow-skill//346/234/254/345/234/260/351/237/263/350/247/206/351/242/221/350/275/254/346/226/207/345/255/227/scripts/common/utils.py ADDED Viewed

@@ -0,0 +1,62 @@
+from __future__ import annotations
+import json
+import re
+from pathlib import Path
+from typing import Iterable
+VIDEO_SUFFIXES = {".mp4", ".mov", ".mkv", ".avi", ".webm", ".m4v"}
+AUDIO_SUFFIXES = {".wav", ".mp3", ".m4a", ".aac", ".flac", ".ogg", ".opus"}
+MEDIA_SUFFIXES = VIDEO_SUFFIXES | AUDIO_SUFFIXES
+INVALID_FILENAME_RE = re.compile('[<>:"/\\\\|?*\\x00-\\x1F]')
+def sanitize_filename(name: str, max_length: int = 120) -> str:
+    cleaned = INVALID_FILENAME_RE.sub("_", name).strip()
+    cleaned = re.sub(r"\s+", " ", cleaned)
+    cleaned = cleaned.rstrip(". ")
+    if not cleaned:
+        cleaned = "untitled"
+    return cleaned[:max_length].rstrip(". ")
+def ensure_parent(path: Path) -> Path:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    return path
+def ensure_dir(path: Path) -> Path:
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+def write_json(path: Path, data: object) -> Path:
+    ensure_parent(path)
+    path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
+    return path
+def write_text(path: Path, text: str) -> Path:
+    ensure_parent(path)
+    path.write_text(text, encoding="utf-8")
+    return path
+def iter_media_files(path: Path, recursive: bool = False) -> Iterable[Path]:
+    if path.is_file():
+        if path.suffix.lower() in MEDIA_SUFFIXES:
+            yield path
+        return
+    iterator = path.rglob("*") if recursive else path.glob("*")
+    for candidate in iterator:
+        if candidate.is_file() and candidate.suffix.lower() in MEDIA_SUFFIXES:
+            yield candidate
+def is_video_file(path: Path) -> bool:
+    return path.suffix.lower() in VIDEO_SUFFIXES
+def is_audio_file(path: Path) -> bool:
+    return path.suffix.lower() in AUDIO_SUFFIXES

package/skills/yuanflow-skill//346/234/254/345/234/260/351/237/263/350/247/206/351/242/221/350/275/254/346/226/207/345/255/227/scripts/requirements-transcribe.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ funasr>=1.1.6
2	+ modelscope>=1.18.1

package/skills/yuanflow-skill//346/234/254/345/234/260/351/237/263/350/247/206/351/242/221/350/275/254/346/226/207/345/255/227/scripts/transcribe_media.py ADDED Viewed

@@ -0,0 +1,126 @@
+from __future__ import annotations
+import argparse
+from pathlib import Path
+from common.media import extract_audio
+from common.sensevoice import build_model
+from common.sensevoice import clean_transcript
+from common.utils import ensure_dir
+from common.utils import is_audio_file
+from common.utils import is_video_file
+from common.utils import iter_media_files
+from common.utils import sanitize_filename
+from common.utils import write_text
+SCRIPT_DIR = Path(__file__).resolve().parent
+DEFAULT_CACHE_ROOT = SCRIPT_DIR / "cache"
+DEFAULT_MODELS_ROOT = SCRIPT_DIR / "models"
+def prepare_audio(
+    source: Path,
+    *,
+    audio_cache_dir: Path,
+    ffmpeg_bin: str,
+    overwrite: bool,
+) -> tuple[Path, str]:
+    if is_audio_file(source):
+        return source, "audio"
+    if not is_video_file(source):
+        raise ValueError(f"不支持的媒体文件类型：{source}")
+    output_path = audio_cache_dir / f"{sanitize_filename(source.stem)}.wav"
+    extract_audio(
+        source,
+        output_path,
+        ffmpeg_bin=ffmpeg_bin,
+        overwrite=overwrite,
+    )
+    return output_path, "video"
+def transcribe_file(
+    model,
+    audio_file: Path,
+    *,
+    output_dir: Path,
+    language: str,
+    batch_size_s: int,
+    overwrite: bool,
+) -> Path:
+    output_path = output_dir / f"{sanitize_filename(audio_file.stem)}.txt"
+    if output_path.exists() and not overwrite:
+        return output_path
+    result = model.generate(
+        input=str(audio_file),
+        cache={},
+        language=language,
+        use_itn=True,
+        batch_size_s=batch_size_s,
+        merge_vad=True,
+        merge_length_s=15,
+        ban_emo_unk=True,
+    )
+    raw_text = ""
+    if result and isinstance(result, list):
+        raw_text = str(result[0].get("text", ""))
+    return write_text(output_path, clean_transcript(raw_text))
+def main() -> None:
+    parser = argparse.ArgumentParser(description="本地音视频转文字：视频先抽音频，音频直接转写。")
+    parser.add_argument("input_path", type=Path, help="音频文件、视频文件或目录")
+    parser.add_argument("--cache-root", type=Path, default=DEFAULT_CACHE_ROOT, help="缓存目录")
+    parser.add_argument("--models-root", type=Path, default=DEFAULT_MODELS_ROOT, help="模型目录")
+    parser.add_argument("--recursive", action="store_true", help="目录模式下递归扫描")
+    parser.add_argument("--overwrite", action="store_true", help="覆盖已有缓存和文本结果")
+    parser.add_argument("--ffmpeg-bin", default="ffmpeg", help="ffmpeg 可执行文件名或路径")
+    parser.add_argument("--device", default="auto", help="auto、cpu、cuda:0 等")
+    parser.add_argument("--language", default="auto", help="zh、en、yue、ja、ko、auto")
+    parser.add_argument("--batch-size-s", type=int, default=60, help="动态 batch 秒数")
+    args = parser.parse_args()
+    cache_root = args.cache_root.resolve()
+    models_root = args.models_root.resolve()
+    audio_cache_dir = ensure_dir(cache_root / "audio")
+    transcript_dir = ensure_dir(cache_root / "transcripts")
+    media_files = list(iter_media_files(args.input_path.resolve(), recursive=args.recursive))
+    if not media_files:
+        raise SystemExit("没有找到可处理的音频或视频文件。")
+    try:
+        model, downloaded_models = build_model(models_root, device=args.device)
+    except ImportError as exc:
+        raise SystemExit(
+            "缺少 FunASR 或 ModelScope 依赖。请先在 scripts/.venv 中安装 requirements-transcribe.txt。"
+        ) from exc
+    print(f"模型目录: {models_root}")
+    print(f"缓存目录: {cache_root}")
+    print(f"模型首次下载: {'是' if downloaded_models else '否'}")
+    for media_file in media_files:
+        audio_file, source_type = prepare_audio(
+            media_file,
+            audio_cache_dir=audio_cache_dir,
+            ffmpeg_bin=args.ffmpeg_bin,
+            overwrite=args.overwrite,
+        )
+        text_path = transcribe_file(
+            model,
+            audio_file,
+            output_dir=transcript_dir,
+            language=args.language,
+            batch_size_s=max(1, args.batch_size_s),
+            overwrite=args.overwrite,
+        )
+        print(f"输入文件: {media_file}")
+        print(f"输入类型: {'视频，已先抽取音频' if source_type == 'video' else '音频'}")
+        print(f"音频文件: {audio_file}")
+        print(f"转写结果: {text_path}")
+if __name__ == "__main__":
+    main()