rahcp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rahcp/__init__.py +7 -0
- rahcp/cli/__init__.py +3 -0
- rahcp/cli/_client.py +32 -0
- rahcp/cli/_output.py +36 -0
- rahcp/cli/_run.py +27 -0
- rahcp/cli/auth.py +40 -0
- rahcp/cli/config.py +130 -0
- rahcp/cli/iiif.py +632 -0
- rahcp/cli/main.py +143 -0
- rahcp/cli/namespace.py +154 -0
- rahcp/cli/s3.py +623 -0
- rahcp/cli/transkribus.py +555 -0
- rahcp-0.1.0.dist-info/METADATA +40 -0
- rahcp-0.1.0.dist-info/RECORD +61 -0
- rahcp-0.1.0.dist-info/WHEEL +5 -0
- rahcp-0.1.0.dist-info/entry_points.txt +2 -0
- rahcp-0.1.0.dist-info/licenses/LICENSE +73 -0
- rahcp-0.1.0.dist-info/top_level.txt +7 -0
- rahcp_client/__init__.py +47 -0
- rahcp_client/_transfer.py +70 -0
- rahcp_client/bulk/__init__.py +34 -0
- rahcp_client/bulk/config.py +150 -0
- rahcp_client/bulk/download.py +157 -0
- rahcp_client/bulk/helpers.py +549 -0
- rahcp_client/bulk/protocol.py +61 -0
- rahcp_client/bulk/stream.py +183 -0
- rahcp_client/bulk/upload.py +212 -0
- rahcp_client/client.py +303 -0
- rahcp_client/config.py +27 -0
- rahcp_client/errors.py +57 -0
- rahcp_client/mapi.py +92 -0
- rahcp_client/s3.py +427 -0
- rahcp_client/tracing.py +120 -0
- rahcp_etl/__init__.py +16 -0
- rahcp_etl/checkpointing.py +65 -0
- rahcp_etl/consumer.py +98 -0
- rahcp_etl/dlq.py +146 -0
- rahcp_etl/pipeline.py +146 -0
- rahcp_iiif/__init__.py +17 -0
- rahcp_iiif/config.py +9 -0
- rahcp_iiif/downloader.py +270 -0
- rahcp_iiif/manifest.py +125 -0
- rahcp_iiif/verify.py +146 -0
- rahcp_tracker/__init__.py +26 -0
- rahcp_tracker/_base.py +211 -0
- rahcp_tracker/factory.py +46 -0
- rahcp_tracker/models.py +31 -0
- rahcp_tracker/postgres.py +43 -0
- rahcp_tracker/protocol.py +49 -0
- rahcp_tracker/sqlite.py +62 -0
- rahcp_transkribus/__init__.py +46 -0
- rahcp_transkribus/alto.py +71 -0
- rahcp_transkribus/client.py +252 -0
- rahcp_transkribus/config.py +22 -0
- rahcp_transkribus/errors.py +11 -0
- rahcp_transkribus/exporter.py +424 -0
- rahcp_transkribus/models.py +95 -0
- rahcp_transkribus/versions.py +75 -0
- rahcp_validate/__init__.py +39 -0
- rahcp_validate/images.py +132 -0
- rahcp_validate/rules.py +92 -0
rahcp/__init__.py
ADDED
rahcp/cli/__init__.py
ADDED
rahcp/cli/_client.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Shared client factory — auto-authenticates from config/flags/env."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from rahcp_client import HCPClient
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def make_client(ctx: typer.Context) -> HCPClient:
|
|
14
|
+
"""Create an HCPClient from resolved settings.
|
|
15
|
+
|
|
16
|
+
Priority: CLI flags > env vars > config file > defaults.
|
|
17
|
+
|
|
18
|
+
If username/password are available, the client auto-logs in on __aenter__.
|
|
19
|
+
Tenant is passed during login so the backend routes to the correct HCP tenant.
|
|
20
|
+
"""
|
|
21
|
+
from rahcp_client import HCPClient
|
|
22
|
+
|
|
23
|
+
return HCPClient(
|
|
24
|
+
endpoint=ctx.obj["endpoint"],
|
|
25
|
+
username=ctx.obj.get("username", ""),
|
|
26
|
+
password=ctx.obj.get("password", ""),
|
|
27
|
+
tenant=ctx.obj.get("tenant"),
|
|
28
|
+
verify_ssl=ctx.obj.get("verify_ssl", True),
|
|
29
|
+
timeout=ctx.obj.get("timeout", 30.0),
|
|
30
|
+
multipart_threshold=ctx.obj.get("multipart_threshold", 100 * 1024 * 1024),
|
|
31
|
+
multipart_chunk=ctx.obj.get("multipart_chunk", 64 * 1024 * 1024),
|
|
32
|
+
)
|
rahcp/cli/_output.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Rich table/JSON formatting helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
from rich.table import Table
|
|
10
|
+
|
|
11
|
+
console = Console()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def print_json(data: Any) -> None:
|
|
15
|
+
"""Print data as formatted JSON."""
|
|
16
|
+
console.print_json(json.dumps(data, indent=2, default=str))
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def print_table(
|
|
20
|
+
rows: list[dict[str, Any]],
|
|
21
|
+
columns: list[str] | None = None,
|
|
22
|
+
*,
|
|
23
|
+
title: str | None = None,
|
|
24
|
+
) -> None:
|
|
25
|
+
"""Print a list of dicts as a Rich table."""
|
|
26
|
+
if not rows:
|
|
27
|
+
console.print("[dim]No results.[/dim]")
|
|
28
|
+
return
|
|
29
|
+
|
|
30
|
+
cols = columns or list(rows[0].keys())
|
|
31
|
+
table = Table(title=title)
|
|
32
|
+
for col in cols:
|
|
33
|
+
table.add_column(col)
|
|
34
|
+
for row in rows:
|
|
35
|
+
table.add_row(*[str(row.get(c, "")) for c in cols])
|
|
36
|
+
console.print(table)
|
rahcp/cli/_run.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Shared async runner with error handling."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import sys
|
|
7
|
+
from collections.abc import Coroutine
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from rahcp.cli._output import console
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def run(coro: Coroutine[Any, Any, None]) -> None:
|
|
14
|
+
"""Run an async coroutine with clean error output."""
|
|
15
|
+
try:
|
|
16
|
+
asyncio.run(coro)
|
|
17
|
+
except KeyboardInterrupt:
|
|
18
|
+
sys.exit(130)
|
|
19
|
+
except Exception as exc:
|
|
20
|
+
from rahcp_client.errors import HCPError
|
|
21
|
+
|
|
22
|
+
if isinstance(exc, HCPError):
|
|
23
|
+
label = type(exc).__name__
|
|
24
|
+
console.print(f"[red]{label}:[/red] {exc.message}")
|
|
25
|
+
else:
|
|
26
|
+
console.print(f"[red]Error:[/red] {exc}")
|
|
27
|
+
sys.exit(1)
|
rahcp/cli/auth.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Auth commands — whoami."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
import json
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
|
|
10
|
+
from rahcp.cli._client import make_client
|
|
11
|
+
from rahcp.cli._output import console, print_json
|
|
12
|
+
from rahcp.cli._run import run
|
|
13
|
+
|
|
14
|
+
app = typer.Typer(help="Authentication", no_args_is_help=True)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@app.command()
|
|
18
|
+
def whoami(ctx: typer.Context) -> None:
|
|
19
|
+
"""Show current user info by decoding the JWT token."""
|
|
20
|
+
|
|
21
|
+
async def _whoami() -> None:
|
|
22
|
+
async with make_client(ctx) as client:
|
|
23
|
+
token = client.token
|
|
24
|
+
if not token:
|
|
25
|
+
console.print("[red]Not authenticated. Check your config.[/red]")
|
|
26
|
+
raise typer.Exit(1)
|
|
27
|
+
try:
|
|
28
|
+
payload_b64 = token.split(".")[1]
|
|
29
|
+
payload_b64 += "=" * (4 - len(payload_b64) % 4)
|
|
30
|
+
payload = json.loads(base64.urlsafe_b64decode(payload_b64))
|
|
31
|
+
except (IndexError, json.JSONDecodeError, Exception) as exc:
|
|
32
|
+
console.print(f"[red]Invalid token format:[/red] {exc}")
|
|
33
|
+
raise typer.Exit(1) from exc
|
|
34
|
+
if ctx.obj["json"]:
|
|
35
|
+
print_json(payload)
|
|
36
|
+
else:
|
|
37
|
+
console.print(f"User: [bold]{payload.get('sub', '?')}[/bold]")
|
|
38
|
+
console.print(f"Tenant: {payload.get('tenant', '(system)')}")
|
|
39
|
+
|
|
40
|
+
run(_whoami())
|
rahcp/cli/config.py
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""YAML config file with named profiles.
|
|
2
|
+
|
|
3
|
+
Config file: ~/.rahcp/config.yaml (or --config / RAHCP_CONFIG)
|
|
4
|
+
|
|
5
|
+
Example::
|
|
6
|
+
|
|
7
|
+
default: dev
|
|
8
|
+
|
|
9
|
+
profiles:
|
|
10
|
+
dev:
|
|
11
|
+
endpoint: http://localhost:8000/api/v1
|
|
12
|
+
username: admin
|
|
13
|
+
password: secret
|
|
14
|
+
tenant: dev-ai
|
|
15
|
+
verify_ssl: false
|
|
16
|
+
prod:
|
|
17
|
+
endpoint: http://localhost:8000/api/v1
|
|
18
|
+
username: prod-user
|
|
19
|
+
password: secret
|
|
20
|
+
tenant: prod-archive
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import logging
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
import yaml
|
|
29
|
+
from pydantic import BaseModel, Field
|
|
30
|
+
|
|
31
|
+
log = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
CONFIG_DIR = Path.home() / ".rahcp"
|
|
34
|
+
CONFIG_PATH = CONFIG_DIR / "config.yaml"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class Profile(BaseModel):
|
|
38
|
+
"""A named connection profile."""
|
|
39
|
+
|
|
40
|
+
# Connection
|
|
41
|
+
endpoint: str = "http://localhost:8000/api/v1"
|
|
42
|
+
username: str = ""
|
|
43
|
+
password: str = ""
|
|
44
|
+
tenant: str = ""
|
|
45
|
+
verify_ssl: bool = True
|
|
46
|
+
timeout: float = 30.0
|
|
47
|
+
|
|
48
|
+
# Multipart upload
|
|
49
|
+
multipart_threshold: int = 100 * 1024 * 1024
|
|
50
|
+
multipart_chunk: int = 64 * 1024 * 1024
|
|
51
|
+
multipart_concurrency: int = 6
|
|
52
|
+
|
|
53
|
+
# Bulk transfer defaults
|
|
54
|
+
bulk_workers: int = 10
|
|
55
|
+
bulk_progress_interval: float = 5.0
|
|
56
|
+
bulk_queue_depth: int = 8
|
|
57
|
+
bulk_tracker_flush_every: int = 500
|
|
58
|
+
bulk_tracker_dir: str = ""
|
|
59
|
+
bulk_tracker_prefix: str = ""
|
|
60
|
+
bulk_presign_batch_size: int = 200
|
|
61
|
+
bulk_chunk_size: int = 4 * 1024 * 1024 # 4 MB
|
|
62
|
+
bulk_stream_threshold: int = 100 * 1024 * 1024 # 100 MB
|
|
63
|
+
|
|
64
|
+
# IIIF
|
|
65
|
+
iiif_url: str = "https://iiifintern-ai.ra.se"
|
|
66
|
+
iiif_timeout: float = 60.0
|
|
67
|
+
iiif_query_params: str = "full/max/0/default.jpg"
|
|
68
|
+
iiif_workers: int = 4
|
|
69
|
+
iiif_referer: str = ""
|
|
70
|
+
|
|
71
|
+
# Transkribus
|
|
72
|
+
transkribus_url: str = "https://transkribus.eu/TrpServer/rest"
|
|
73
|
+
transkribus_username: str = ""
|
|
74
|
+
transkribus_password: str = ""
|
|
75
|
+
transkribus_timeout: float = 60.0
|
|
76
|
+
transkribus_workers: int = 8
|
|
77
|
+
|
|
78
|
+
# Observability
|
|
79
|
+
log_level: str = "warning"
|
|
80
|
+
otel_endpoint: str = ""
|
|
81
|
+
otel_protocol: str = "http/protobuf"
|
|
82
|
+
otel_service_name: str = "rahcp-cli"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class CLIConfig(BaseModel):
|
|
86
|
+
"""Config with named profiles."""
|
|
87
|
+
|
|
88
|
+
default: str = ""
|
|
89
|
+
profiles: dict[str, Profile] = Field(default_factory=dict)
|
|
90
|
+
|
|
91
|
+
def resolve(self, name: str | None = None) -> Profile:
|
|
92
|
+
"""Resolve a profile by name, falling back to default."""
|
|
93
|
+
key = name or self.default
|
|
94
|
+
if key and key in self.profiles:
|
|
95
|
+
return self.profiles[key]
|
|
96
|
+
if len(self.profiles) == 1:
|
|
97
|
+
return next(iter(self.profiles.values()))
|
|
98
|
+
return Profile()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def load_config(path: str | None = None) -> CLIConfig:
|
|
102
|
+
"""Load config from a YAML file.
|
|
103
|
+
|
|
104
|
+
Resolution: explicit path > RAHCP_CONFIG env > ~/.rahcp/config.yaml
|
|
105
|
+
"""
|
|
106
|
+
config_path = Path(path) if path else CONFIG_PATH
|
|
107
|
+
if not config_path.exists():
|
|
108
|
+
return CLIConfig()
|
|
109
|
+
try:
|
|
110
|
+
raw = yaml.safe_load(config_path.read_text()) or {}
|
|
111
|
+
except Exception:
|
|
112
|
+
log.warning("Failed to parse config file: %s", config_path)
|
|
113
|
+
return CLIConfig()
|
|
114
|
+
|
|
115
|
+
# Multi-profile format
|
|
116
|
+
if "profiles" in raw:
|
|
117
|
+
return CLIConfig(
|
|
118
|
+
default=raw.get("default", ""),
|
|
119
|
+
profiles={
|
|
120
|
+
name: Profile(**vals)
|
|
121
|
+
for name, vals in raw["profiles"].items()
|
|
122
|
+
if isinstance(vals, dict)
|
|
123
|
+
},
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Flat format (backwards compat) — single "default" profile
|
|
127
|
+
return CLIConfig(
|
|
128
|
+
default="default",
|
|
129
|
+
profiles={"default": Profile(**raw)},
|
|
130
|
+
)
|