pyetool 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyetool/__init__.py +0 -0
- pyetool/tool/__init__.py +0 -0
- pyetool/tool/ai/__init__.py +6 -0
- pyetool/tool/ai/cli.py +29 -0
- pyetool/tool/ai/visual/__init__.py +5 -0
- pyetool/tool/ai/visual/cli.py +82 -0
- pyetool/tool/ai/visual/model.py +18 -0
- pyetool/tool/ai/visual/visual.py +173 -0
- pyetool/tool/obj_storage/__init__.py +11 -0
- pyetool/tool/obj_storage/backend.py +14 -0
- pyetool/tool/obj_storage/cli.py +135 -0
- pyetool/tool/obj_storage/factory.py +36 -0
- pyetool/tool/obj_storage/volc_tos.py +69 -0
- pyetool/tool/site/__init__.py +5 -0
- pyetool/tool/site/bili/__init__.py +7 -0
- pyetool/tool/site/bili/bili.py +462 -0
- pyetool/tool/site/bili/cli.py +157 -0
- pyetool/tool/site/bili/model.py +21 -0
- pyetool/tool/site/cli.py +26 -0
- pyetool/tool/vidkit/__init__.py +9 -0
- pyetool/tool/vidkit/cli.py +29 -0
- pyetool/tool/vidkit/ffmpeg/__init__.py +28 -0
- pyetool/tool/vidkit/ffmpeg/cli.py +473 -0
- pyetool/tool/vidkit/ffmpeg/ffmpeg.py +248 -0
- pyetool/tool/vidkit/ffmpeg/ffprobe.py +140 -0
- pyetool/tool/vidkit/ffmpeg/model.py +94 -0
- pyetool/tool/vidkit/ffmpeg/typing.py +40 -0
- pyetool/tool/vidkit/ffmpeg/util.py +16 -0
- pyetool/tool/vidkit/recipes.py +237 -0
- pyetool/tool/vidkit/ytdlp/__init__.py +14 -0
- pyetool/tool/vidkit/ytdlp/cli.py +188 -0
- pyetool/tool/vidkit/ytdlp/format_selector.py +72 -0
- pyetool/tool/vidkit/ytdlp/model.py +115 -0
- pyetool/tool/vidkit/ytdlp/ytdlp.py +95 -0
- pyetool/tool/web_api/__init__.py +0 -0
- pyetool/tool/web_api/pexels/__init__.py +43 -0
- pyetool/tool/web_api/pexels/cli.py +555 -0
- pyetool/tool/web_api/pexels/model.py +109 -0
- pyetool/tool/web_api/pexels/pexels.py +187 -0
- pyetool/tool/web_api/pexels/typing.py +67 -0
- pyetool/tool/web_api/seedasr/__init__.py +41 -0
- pyetool/tool/web_api/seedasr/cli.py +698 -0
- pyetool/tool/web_api/seedasr/model.py +87 -0
- pyetool/tool/web_api/seedasr/seedasr.py +511 -0
- pyetool/tool/web_api/seedasr/typing.py +45 -0
- pyetool/util/__init__.py +0 -0
- pyetool/util/cli_util.py +109 -0
- pyetool/util/env.py +19 -0
- pyetool/util/iter.py +9 -0
- pyetool/util/monad.py +37 -0
- pyetool/util/path.py +13 -0
- pyetool-0.15.1.dist-info/METADATA +33 -0
- pyetool-0.15.1.dist-info/RECORD +55 -0
- pyetool-0.15.1.dist-info/WHEEL +4 -0
- pyetool-0.15.1.dist-info/entry_points.txt +8 -0
pyetool/__init__.py
ADDED
|
File without changes
|
pyetool/tool/__init__.py
ADDED
|
File without changes
|
pyetool/tool/ai/cli.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Top-level AI CLI: dispatches to per-tool subcommands.
|
|
2
|
+
|
|
3
|
+
Each AI tool under `pyetool/tool/ai/` contributes its own click command
|
|
4
|
+
(e.g. `visual`) which is registered here.
|
|
5
|
+
|
|
6
|
+
All subcommands emit JSONL on stdout (one record per line) so they compose
|
|
7
|
+
cleanly with `jq` and other JSONL tools.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import click
|
|
11
|
+
|
|
12
|
+
from .visual import cli as visual_cli
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@click.group()
|
|
16
|
+
def cli() -> None:
|
|
17
|
+
"""pyetool AI tools — LLM-based analyses, pipeline-friendly via JSONL.
|
|
18
|
+
|
|
19
|
+
\b
|
|
20
|
+
Subcommands:
|
|
21
|
+
visual VIDEO analyse a video's visual content with Gemini, emit Visuals JSONL.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
cli.add_command(visual_cli)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
if __name__ == "__main__":
|
|
29
|
+
cli()
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""CLI for `aix visual`: analyse a video's visual content with Gemini, emit JSONL."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
import click
|
|
6
|
+
|
|
7
|
+
from pyetool.util.cli_util import collect_records, fail
|
|
8
|
+
from pyetool.util.monad import Err, Ok
|
|
9
|
+
|
|
10
|
+
from .visual import VisualAnalyser
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@click.command("visual")
|
|
14
|
+
@click.argument("video", required=False)
|
|
15
|
+
@click.option(
|
|
16
|
+
"--model",
|
|
17
|
+
default="gemini-2.5-flash",
|
|
18
|
+
show_default=True,
|
|
19
|
+
help="Gemini model id (e.g. gemini-2.5-flash, gemini-2.5-pro).",
|
|
20
|
+
)
|
|
21
|
+
@click.option(
|
|
22
|
+
"--max-attempts",
|
|
23
|
+
type=int,
|
|
24
|
+
default=5,
|
|
25
|
+
show_default=True,
|
|
26
|
+
help="Total tries (initial + retries) for the generate call when rate-limited (429).",
|
|
27
|
+
)
|
|
28
|
+
@click.option(
|
|
29
|
+
"--api-key",
|
|
30
|
+
default=None,
|
|
31
|
+
help="Gemini API key. Falls back to $GEMINI_API_KEY in ~/.config/pyetool/.env or ./.env.",
|
|
32
|
+
)
|
|
33
|
+
@click.option(
|
|
34
|
+
"--input-key",
|
|
35
|
+
default="path",
|
|
36
|
+
show_default=True,
|
|
37
|
+
help="JSONL field name carrying the source video path or URL. Default 'path' "
|
|
38
|
+
"matches the pyetool batch convention (vidkit / pexels download / etc.).",
|
|
39
|
+
)
|
|
40
|
+
def cli(video, model, max_attempts, api_key, input_key):
|
|
41
|
+
"""Analyse a video's visual content with a Gemini model.
|
|
42
|
+
|
|
43
|
+
\b
|
|
44
|
+
VIDEO is a local file path or an http(s) URL (e.g. a YouTube URL).
|
|
45
|
+
Pass as a positional argument and/or pipe records via stdin (one per
|
|
46
|
+
line: a plain path/URL string, or a JSON object whose `--input-key` field
|
|
47
|
+
is the source).
|
|
48
|
+
|
|
49
|
+
\b
|
|
50
|
+
Output: pass-through JSONL — each input record with the following
|
|
51
|
+
fields appended:
|
|
52
|
+
summary — overall 2-4 sentence description
|
|
53
|
+
scenes — list of {summary, mood} per distinct shot, chronological
|
|
54
|
+
mood — overall mood / tone
|
|
55
|
+
|
|
56
|
+
\b
|
|
57
|
+
Examples:
|
|
58
|
+
aix visual ./clip.mp4
|
|
59
|
+
aix visual 'https://www.youtube.com/watch?v=dQw4w9WgXcQ'
|
|
60
|
+
printf './a.mp4\\n./b.mp4\\n' | aix visual --model gemini-2.5-pro
|
|
61
|
+
pexels videos search 'sunset' --per-page 3 \\
|
|
62
|
+
| pexels videos download ./pack/ \\
|
|
63
|
+
| vidkit batch-shrink ./shrunk/ \\
|
|
64
|
+
| aix visual
|
|
65
|
+
"""
|
|
66
|
+
analyser = VisualAnalyser(model=model, api_key=api_key, max_attempts=max_attempts)
|
|
67
|
+
|
|
68
|
+
for record in collect_records(video, default_key=input_key, label="VIDEO"):
|
|
69
|
+
src = record.get(input_key)
|
|
70
|
+
if not src:
|
|
71
|
+
raise click.UsageError(f"record missing {input_key!r} field: {record!r}")
|
|
72
|
+
|
|
73
|
+
match analyser.analyse(src):
|
|
74
|
+
case Ok(value=visuals):
|
|
75
|
+
line = {**record, **visuals.model_dump(mode="json")}
|
|
76
|
+
click.echo(json.dumps(line, ensure_ascii=False))
|
|
77
|
+
case Err() as e:
|
|
78
|
+
fail(e)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
if __name__ == "__main__":
|
|
82
|
+
cli()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Pydantic models for video visual analysis output."""
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Scene(BaseModel):
|
|
7
|
+
"""One distinct shot or scene in the video."""
|
|
8
|
+
|
|
9
|
+
summary: str = Field(description="What happens in this scene")
|
|
10
|
+
mood: str = Field(description="Mood or tone of this scene")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Visuals(BaseModel):
|
|
14
|
+
"""Structured description of a video's visual content."""
|
|
15
|
+
|
|
16
|
+
summary: str = Field(description="2-4 sentence overall description of the video")
|
|
17
|
+
scenes: list[Scene] = Field(description="Distinct shots in chronological order")
|
|
18
|
+
mood: str = Field(description="Overall mood or tone of the video")
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""Gemini-based video visual analysis via pydantic-ai.
|
|
2
|
+
|
|
3
|
+
Source: a local file path or an http(s) URL (e.g. YouTube). Local files are
|
|
4
|
+
uploaded via the Gemini Files API and referenced by their generated URI;
|
|
5
|
+
URLs are passed through to Gemini as-is.
|
|
6
|
+
|
|
7
|
+
Output: `Visuals` — overall summary, per-scene breakdown, overall mood.
|
|
8
|
+
|
|
9
|
+
Auth: pass `api_key` to `VisualAnalyser`, or set `GEMINI_API_KEY` in
|
|
10
|
+
`~/.config/pyetool/.env` or `./.env`.
|
|
11
|
+
|
|
12
|
+
Retries: on 429 RESOURCE_EXHAUSTED the server returns a `RetryInfo.retryDelay`
|
|
13
|
+
hint inside the response body (not in the `Retry-After` header). The
|
|
14
|
+
google-genai SDK's built-in retry ignores it; we extract it from
|
|
15
|
+
`ModelHTTPError.body` surfaced by pydantic-ai and sleep for that duration.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import os
|
|
19
|
+
import time
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any, ClassVar
|
|
22
|
+
|
|
23
|
+
from google.genai.types import File
|
|
24
|
+
from pydantic_ai import Agent, DocumentUrl, VideoUrl
|
|
25
|
+
from pydantic_ai.exceptions import ModelHTTPError
|
|
26
|
+
from pydantic_ai.models.google import GoogleModel
|
|
27
|
+
from pydantic_ai.providers.google import GoogleProvider
|
|
28
|
+
|
|
29
|
+
from pyetool.util.env import load_env
|
|
30
|
+
from pyetool.util.monad import Err, Ok, Result
|
|
31
|
+
|
|
32
|
+
from .model import Visuals
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class VisualAnalyser:
|
|
36
|
+
"""Analyse a video's visual content with a Gemini model via pydantic-ai.
|
|
37
|
+
|
|
38
|
+
`analyse()` accepts a local path or an http(s) URL; local files are
|
|
39
|
+
uploaded via the Gemini Files API and then referenced by their URI.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
_PROMPT: ClassVar[str] = "Describe the visual content of this video as structured data."
|
|
43
|
+
_BACKOFF_BASE: ClassVar[float] = 5.0
|
|
44
|
+
_POLL_INTERVAL: ClassVar[float] = 2.0
|
|
45
|
+
_UPLOAD_TIMEOUT: ClassVar[float] = 600.0
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
model: str = "gemini-2.5-flash",
|
|
50
|
+
api_key: str | None = None,
|
|
51
|
+
max_attempts: int = 5,
|
|
52
|
+
) -> None:
|
|
53
|
+
"""Auth: pass `api_key`, or set `GEMINI_API_KEY` in `~/.config/pyetool/.env` / `./.env`.
|
|
54
|
+
|
|
55
|
+
`max_attempts` is the total number of tries (initial + retries) for
|
|
56
|
+
the generate call; the upload step is single-shot (Gemini rate limits
|
|
57
|
+
only the generation endpoint).
|
|
58
|
+
"""
|
|
59
|
+
load_env()
|
|
60
|
+
if not api_key and not (api_key := os.environ.get("GEMINI_API_KEY")):
|
|
61
|
+
raise ValueError("api_key is not provided as parameter or GEMINI_API_KEY in .env")
|
|
62
|
+
|
|
63
|
+
self._provider = GoogleProvider(api_key=api_key)
|
|
64
|
+
self._agent = Agent(
|
|
65
|
+
GoogleModel(model, provider=self._provider),
|
|
66
|
+
output_type=Visuals,
|
|
67
|
+
)
|
|
68
|
+
self._max_attempts = max_attempts
|
|
69
|
+
|
|
70
|
+
def analyse(self, video: str | Path) -> Result[Visuals]:
|
|
71
|
+
"""Analyse a local file or an http(s) URL.
|
|
72
|
+
|
|
73
|
+
Returns `Err` on missing file, upload failure, retries exhausted, or
|
|
74
|
+
any other non-recoverable API error.
|
|
75
|
+
"""
|
|
76
|
+
try:
|
|
77
|
+
content = self._prepare_content(video)
|
|
78
|
+
except (FileNotFoundError, RuntimeError) as e:
|
|
79
|
+
return Err(err=str(e))
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
return Ok(self._generate(content))
|
|
83
|
+
except ModelHTTPError as e:
|
|
84
|
+
return Err(err=f"Gemini API error: {e.body or e}", returncode=e.status_code)
|
|
85
|
+
|
|
86
|
+
# ------------------------------------------------------------------
|
|
87
|
+
# Internals
|
|
88
|
+
# ------------------------------------------------------------------
|
|
89
|
+
|
|
90
|
+
def _prepare_content(self, video: str | Path) -> list[Any]:
|
|
91
|
+
if isinstance(video, str) and video.startswith(("http://", "https://")):
|
|
92
|
+
return [self._PROMPT, VideoUrl(url=video)]
|
|
93
|
+
|
|
94
|
+
path = Path(video)
|
|
95
|
+
if not path.exists():
|
|
96
|
+
raise FileNotFoundError(f"Video file not found: {path}")
|
|
97
|
+
file = self._upload(path)
|
|
98
|
+
assert file.uri is not None
|
|
99
|
+
assert file.mime_type is not None
|
|
100
|
+
return [self._PROMPT, DocumentUrl(url=file.uri, media_type=file.mime_type)]
|
|
101
|
+
|
|
102
|
+
def _upload(self, path: Path) -> File:
|
|
103
|
+
"""Upload then poll until ACTIVE. Raises RuntimeError on FAILED / timeout."""
|
|
104
|
+
file = self._provider.client.files.upload(file=path)
|
|
105
|
+
deadline = time.monotonic() + self._UPLOAD_TIMEOUT
|
|
106
|
+
|
|
107
|
+
while file.state is not None and file.state.name == "PROCESSING":
|
|
108
|
+
if time.monotonic() >= deadline:
|
|
109
|
+
raise RuntimeError(f"Gemini file '{path.name}' stayed in PROCESSING for over {self._UPLOAD_TIMEOUT}s")
|
|
110
|
+
time.sleep(self._POLL_INTERVAL)
|
|
111
|
+
assert file.name is not None
|
|
112
|
+
file = self._provider.client.files.get(name=file.name)
|
|
113
|
+
|
|
114
|
+
if file.state is not None and file.state.name == "FAILED":
|
|
115
|
+
raise RuntimeError(
|
|
116
|
+
f"Gemini failed to process '{path.name}'; the file may be corrupted, "
|
|
117
|
+
f"in an unsupported format, or exceed size limits"
|
|
118
|
+
)
|
|
119
|
+
return file
|
|
120
|
+
|
|
121
|
+
def _generate(self, content: list[Any]) -> Visuals:
|
|
122
|
+
"""Run the agent with retry on transient errors.
|
|
123
|
+
|
|
124
|
+
Retries on 429 RESOURCE_EXHAUSTED and any 5xx (UNAVAILABLE / INTERNAL
|
|
125
|
+
/ etc.). For 429, the server's `RetryInfo.retryDelay` hint is honored
|
|
126
|
+
when present (5xx bodies normally do not carry it, so they fall back
|
|
127
|
+
to exponential backoff). Re-raises immediately on non-retryable 4xx.
|
|
128
|
+
"""
|
|
129
|
+
last_exc: ModelHTTPError | None = None
|
|
130
|
+
for attempt in range(self._max_attempts):
|
|
131
|
+
try:
|
|
132
|
+
return self._agent.run_sync(content).output
|
|
133
|
+
except ModelHTTPError as e:
|
|
134
|
+
if e.status_code != 429 and e.status_code < 500:
|
|
135
|
+
raise
|
|
136
|
+
last_exc = e
|
|
137
|
+
if attempt == self._max_attempts - 1:
|
|
138
|
+
break
|
|
139
|
+
delay = self._parse_retry_delay(e.body) or self._BACKOFF_BASE * (2**attempt)
|
|
140
|
+
time.sleep(delay)
|
|
141
|
+
|
|
142
|
+
assert last_exc is not None
|
|
143
|
+
raise last_exc
|
|
144
|
+
|
|
145
|
+
@staticmethod
|
|
146
|
+
def _parse_retry_delay(body: object) -> float | None:
|
|
147
|
+
"""Extract `retryDelay` seconds from a 429 body's `RetryInfo` entry.
|
|
148
|
+
|
|
149
|
+
Gemini's 429 body shape (per google.rpc.Status):
|
|
150
|
+
{"error": {"code": 429, "details": [
|
|
151
|
+
{"@type": ".../google.rpc.RetryInfo", "retryDelay": "53s"}, ...
|
|
152
|
+
]}}
|
|
153
|
+
"""
|
|
154
|
+
if not isinstance(body, dict):
|
|
155
|
+
return None
|
|
156
|
+
error = body.get("error")
|
|
157
|
+
if not isinstance(error, dict):
|
|
158
|
+
return None
|
|
159
|
+
details = error.get("details", [])
|
|
160
|
+
if not isinstance(details, list):
|
|
161
|
+
return None
|
|
162
|
+
for d in details:
|
|
163
|
+
if not isinstance(d, dict):
|
|
164
|
+
continue
|
|
165
|
+
if not str(d.get("@type", "")).endswith("RetryInfo"):
|
|
166
|
+
continue
|
|
167
|
+
delay = str(d.get("retryDelay", ""))
|
|
168
|
+
if delay.endswith("s"):
|
|
169
|
+
try:
|
|
170
|
+
return float(delay[:-1])
|
|
171
|
+
except ValueError:
|
|
172
|
+
return None
|
|
173
|
+
return None
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Object storage abstraction with a pluggable backend, plus a Volcano TOS implementation."""
|
|
2
|
+
|
|
3
|
+
from .cli import cli
|
|
4
|
+
from .factory import obj_storage
|
|
5
|
+
from .volc_tos import VolcTosBackend
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"VolcTosBackend",
|
|
9
|
+
"cli", # pyproject script depends on it
|
|
10
|
+
"obj_storage",
|
|
11
|
+
]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Protocol that any object storage backend must satisfy."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Protocol
|
|
5
|
+
|
|
6
|
+
from pyetool.util.monad import Result
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ObjStorageBackend(Protocol):
|
|
10
|
+
def put(self, file_path: Path | str, key: str) -> Result[str]: ...
|
|
11
|
+
|
|
12
|
+
def get(self, key: str, file_path: Path | str) -> Result[Path]: ...
|
|
13
|
+
|
|
14
|
+
def delete(self, key: str) -> Result[None]: ...
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Click-based CLI for object storage.
|
|
2
|
+
|
|
3
|
+
Pipeline-friendly: on success a single line (URL / local path / key) is printed to stdout;
|
|
4
|
+
errors go to stderr with non-zero exit, so pipes break cleanly.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
import click
|
|
11
|
+
|
|
12
|
+
from pyetool.util.monad import Err, Ok
|
|
13
|
+
|
|
14
|
+
from .factory import obj_storage
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@click.group()
|
|
18
|
+
@click.option("-e", "--endpoint", default=None, help="Endpoint host. Falls back to $OBJ_STORAGE_ENDPOINT.")
|
|
19
|
+
@click.option("-b", "--bucket", default=None, help="Bucket name. Falls back to $OBJ_STORAGE_BUCKET.")
|
|
20
|
+
@click.option(
|
|
21
|
+
"-r", "--region", default=None, help='Region (e.g. cn-beijing). Falls back to $OBJ_STORAGE_EXTRA_ARGS["region"].'
|
|
22
|
+
)
|
|
23
|
+
@click.option("--backend-type", default="tos", help='Storage backend. Only "tos" is implemented for now.')
|
|
24
|
+
@click.pass_context
|
|
25
|
+
def cli(ctx, endpoint, bucket, region, backend_type):
|
|
26
|
+
"""Upload, download, and delete files in object storage.
|
|
27
|
+
|
|
28
|
+
\b
|
|
29
|
+
Environment variables (loaded from ~/.config/pyetool/.env then ./.env):
|
|
30
|
+
TOS_ACCESS_KEY_ID Required. Volc TOS access key ID.
|
|
31
|
+
TOS_SECRET_ACCESS_KEY Required. Volc TOS secret access key.
|
|
32
|
+
OBJ_STORAGE_ENDPOINT Fallback for --endpoint.
|
|
33
|
+
OBJ_STORAGE_BUCKET Fallback for --bucket.
|
|
34
|
+
OBJ_STORAGE_EXTRA_ARGS Semicolon-separated key:value pairs that
|
|
35
|
+
supply backend-specific args, e.g.
|
|
36
|
+
"region:cn-beijing;<key>:<value>". Fallback source for
|
|
37
|
+
--region.
|
|
38
|
+
|
|
39
|
+
\b
|
|
40
|
+
Resolution order for endpoint / bucket / region: CLI flag > env var.
|
|
41
|
+
"""
|
|
42
|
+
kwargs = {}
|
|
43
|
+
if region:
|
|
44
|
+
kwargs["region"] = region
|
|
45
|
+
ctx.obj = obj_storage(backend_type=backend_type, endpoint=endpoint, bucket=bucket, **kwargs)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@cli.command()
|
|
49
|
+
@click.argument("file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
|
|
50
|
+
@click.argument("key")
|
|
51
|
+
@click.pass_obj
|
|
52
|
+
def put(backend, file: Path, key: str):
|
|
53
|
+
"""Upload a local file to object storage.
|
|
54
|
+
|
|
55
|
+
\b
|
|
56
|
+
Arguments:
|
|
57
|
+
FILE Path to the local file to upload. Must exist and be a regular file.
|
|
58
|
+
KEY Object key (path within the bucket) the file will be stored under,
|
|
59
|
+
e.g. "videos/2026/clip.mp4". No leading slash.
|
|
60
|
+
|
|
61
|
+
\b
|
|
62
|
+
On success: the resulting public URL is printed to stdout (one line).
|
|
63
|
+
On failure: error goes to stderr, exit code is non-zero.
|
|
64
|
+
|
|
65
|
+
\b
|
|
66
|
+
Example:
|
|
67
|
+
obj-storage put ./clip.mp4 videos/2026/clip.mp4
|
|
68
|
+
"""
|
|
69
|
+
match backend.put(file, key):
|
|
70
|
+
case Ok(value=url):
|
|
71
|
+
click.echo(url)
|
|
72
|
+
case Err(err=msg):
|
|
73
|
+
click.echo(f"error: {msg}", err=True)
|
|
74
|
+
sys.exit(1)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@cli.command()
|
|
78
|
+
@click.argument("key")
|
|
79
|
+
@click.argument("file", type=click.Path(dir_okay=False, path_type=Path))
|
|
80
|
+
@click.pass_obj
|
|
81
|
+
def get(backend, key: str, file: Path):
|
|
82
|
+
"""Download an object from storage to a local file.
|
|
83
|
+
|
|
84
|
+
\b
|
|
85
|
+
Arguments:
|
|
86
|
+
KEY Object key (path within the bucket) to download,
|
|
87
|
+
e.g. "videos/2026/clip.mp4".
|
|
88
|
+
FILE Local destination path. Existing files are overwritten;
|
|
89
|
+
the parent directory must already exist.
|
|
90
|
+
|
|
91
|
+
\b
|
|
92
|
+
On success: the local file path is printed to stdout (one line).
|
|
93
|
+
On failure: error goes to stderr, exit code is non-zero.
|
|
94
|
+
|
|
95
|
+
\b
|
|
96
|
+
Example:
|
|
97
|
+
obj-storage get videos/2026/clip.mp4 ./clip.mp4
|
|
98
|
+
"""
|
|
99
|
+
match backend.get(key, file):
|
|
100
|
+
case Ok(value=path):
|
|
101
|
+
click.echo(str(path))
|
|
102
|
+
case Err(err=msg):
|
|
103
|
+
click.echo(f"error: {msg}", err=True)
|
|
104
|
+
sys.exit(1)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@cli.command()
|
|
108
|
+
@click.argument("key")
|
|
109
|
+
@click.pass_obj
|
|
110
|
+
def delete(backend, key: str):
|
|
111
|
+
"""Delete an object from storage.
|
|
112
|
+
|
|
113
|
+
\b
|
|
114
|
+
Arguments:
|
|
115
|
+
KEY Object key (path within the bucket) to delete,
|
|
116
|
+
e.g. "videos/2026/clip.mp4".
|
|
117
|
+
|
|
118
|
+
\b
|
|
119
|
+
On success: KEY is echoed back to stdout (useful for piping into another step).
|
|
120
|
+
On failure: error goes to stderr, exit code is non-zero.
|
|
121
|
+
|
|
122
|
+
\b
|
|
123
|
+
Example:
|
|
124
|
+
obj-storage delete videos/2026/clip.mp4
|
|
125
|
+
"""
|
|
126
|
+
match backend.delete(key):
|
|
127
|
+
case Ok():
|
|
128
|
+
click.echo(key)
|
|
129
|
+
case Err(err=msg):
|
|
130
|
+
click.echo(f"error: {msg}", err=True)
|
|
131
|
+
sys.exit(1)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
if __name__ == "__main__":
|
|
135
|
+
cli()
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Factory that picks a concrete object-storage backend based on type and env config."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
from pyetool.util.env import load_env
|
|
7
|
+
|
|
8
|
+
from .backend import ObjStorageBackend
|
|
9
|
+
from .volc_tos import VolcTosBackend
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def obj_storage(
|
|
13
|
+
backend_type: Literal["tos"] = "tos",
|
|
14
|
+
endpoint: str | None = None,
|
|
15
|
+
bucket: str | None = None,
|
|
16
|
+
**kwargs,
|
|
17
|
+
) -> ObjStorageBackend:
|
|
18
|
+
if backend_type != "tos":
|
|
19
|
+
raise ValueError("tos is the only option for now")
|
|
20
|
+
|
|
21
|
+
load_env()
|
|
22
|
+
|
|
23
|
+
if not endpoint and not (endpoint := os.environ.get("OBJ_STORAGE_ENDPOINT")):
|
|
24
|
+
raise ValueError("endpoint is not given or OBJ_STORAGE_ENDPOINT is not in .env")
|
|
25
|
+
|
|
26
|
+
if not bucket and not (bucket := os.environ.get("OBJ_STORAGE_BUCKET")):
|
|
27
|
+
raise ValueError("bucket is not given or OBJ_STORAGE_BUCKET is not in .env")
|
|
28
|
+
|
|
29
|
+
if extra_args := os.environ.get("OBJ_STORAGE_EXTRA_ARGS"):
|
|
30
|
+
# example: OBJ_STORAGE_EXTRA_ARGS=key1:value1;key2:value2...
|
|
31
|
+
kwargs.update({section.split(":")[0]: section.split(":")[1] for section in extra_args.split(";")})
|
|
32
|
+
|
|
33
|
+
if not (region := kwargs.get("region")):
|
|
34
|
+
raise ValueError("region is not provided in kwargs or in OBJ_STORAGE_EXTRA_ARGS")
|
|
35
|
+
|
|
36
|
+
return VolcTosBackend(endpoint, bucket, region=region)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""VolcEngine TOS object-storage backend."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import tos
|
|
7
|
+
|
|
8
|
+
from pyetool.util.env import load_env
|
|
9
|
+
from pyetool.util.monad import Err, Ok, Result
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class VolcTosBackend:
|
|
13
|
+
def __init__(self, endpoint: str, bucket: str, region: str, **_) -> None:
|
|
14
|
+
load_env()
|
|
15
|
+
self._access_key_id = os.environ.get("TOS_ACCESS_KEY_ID")
|
|
16
|
+
self._access_key_secret = os.environ.get("TOS_SECRET_ACCESS_KEY")
|
|
17
|
+
|
|
18
|
+
if not self._access_key_id:
|
|
19
|
+
raise ValueError("TOS_ACCESS_KEY_ID is not in .env")
|
|
20
|
+
if not self._access_key_secret:
|
|
21
|
+
raise ValueError("TOS_SECRET_ACCESS_KEY is not in .env")
|
|
22
|
+
|
|
23
|
+
self._endpoint = endpoint.strip().removeprefix("https://")
|
|
24
|
+
self._bucket = bucket.strip().removesuffix("/")
|
|
25
|
+
self._region = region.strip()
|
|
26
|
+
|
|
27
|
+
self._client = tos.TosClientV2(
|
|
28
|
+
ak=self._access_key_id,
|
|
29
|
+
sk=self._access_key_secret,
|
|
30
|
+
endpoint=self._endpoint,
|
|
31
|
+
region=self._region,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def put(self, file_path: Path | str, key: str) -> Result[str]:
|
|
35
|
+
file_path = Path(file_path)
|
|
36
|
+
if not file_path.exists():
|
|
37
|
+
return Err(err=f"{file_path} is not existed")
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
with file_path.open("rb") as f:
|
|
41
|
+
self._client.put_object(bucket=self._bucket, key=key, content=f)
|
|
42
|
+
except Exception as e:
|
|
43
|
+
return Err(err=f"upload failed, {e}")
|
|
44
|
+
|
|
45
|
+
url = f"https://{self._bucket}.{self._endpoint}/{key}"
|
|
46
|
+
return Ok(url)
|
|
47
|
+
|
|
48
|
+
def get(self, key: str, file_path: Path | str) -> Result[Path]:
|
|
49
|
+
try:
|
|
50
|
+
content = self._client.get_object(bucket=self._bucket, key=key).read()
|
|
51
|
+
except Exception as e:
|
|
52
|
+
return Err(err=str(e))
|
|
53
|
+
|
|
54
|
+
file_path = Path(file_path)
|
|
55
|
+
|
|
56
|
+
if isinstance(content, bytes):
|
|
57
|
+
file_path.write_bytes(content)
|
|
58
|
+
else:
|
|
59
|
+
file_path.write_text(content)
|
|
60
|
+
|
|
61
|
+
return Ok(file_path)
|
|
62
|
+
|
|
63
|
+
def delete(self, key: str) -> Result[None]:
|
|
64
|
+
try:
|
|
65
|
+
self._client.delete_object(bucket=self._bucket, key=key)
|
|
66
|
+
except Exception as e:
|
|
67
|
+
return Err(err=f"delete failed: {e}")
|
|
68
|
+
|
|
69
|
+
return Ok(None)
|