rahcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. rahcp/__init__.py +7 -0
  2. rahcp/cli/__init__.py +3 -0
  3. rahcp/cli/_client.py +32 -0
  4. rahcp/cli/_output.py +36 -0
  5. rahcp/cli/_run.py +27 -0
  6. rahcp/cli/auth.py +40 -0
  7. rahcp/cli/config.py +130 -0
  8. rahcp/cli/iiif.py +632 -0
  9. rahcp/cli/main.py +143 -0
  10. rahcp/cli/namespace.py +154 -0
  11. rahcp/cli/s3.py +623 -0
  12. rahcp/cli/transkribus.py +555 -0
  13. rahcp-0.1.0.dist-info/METADATA +40 -0
  14. rahcp-0.1.0.dist-info/RECORD +61 -0
  15. rahcp-0.1.0.dist-info/WHEEL +5 -0
  16. rahcp-0.1.0.dist-info/entry_points.txt +2 -0
  17. rahcp-0.1.0.dist-info/licenses/LICENSE +73 -0
  18. rahcp-0.1.0.dist-info/top_level.txt +7 -0
  19. rahcp_client/__init__.py +47 -0
  20. rahcp_client/_transfer.py +70 -0
  21. rahcp_client/bulk/__init__.py +34 -0
  22. rahcp_client/bulk/config.py +150 -0
  23. rahcp_client/bulk/download.py +157 -0
  24. rahcp_client/bulk/helpers.py +549 -0
  25. rahcp_client/bulk/protocol.py +61 -0
  26. rahcp_client/bulk/stream.py +183 -0
  27. rahcp_client/bulk/upload.py +212 -0
  28. rahcp_client/client.py +303 -0
  29. rahcp_client/config.py +27 -0
  30. rahcp_client/errors.py +57 -0
  31. rahcp_client/mapi.py +92 -0
  32. rahcp_client/s3.py +427 -0
  33. rahcp_client/tracing.py +120 -0
  34. rahcp_etl/__init__.py +16 -0
  35. rahcp_etl/checkpointing.py +65 -0
  36. rahcp_etl/consumer.py +98 -0
  37. rahcp_etl/dlq.py +146 -0
  38. rahcp_etl/pipeline.py +146 -0
  39. rahcp_iiif/__init__.py +17 -0
  40. rahcp_iiif/config.py +9 -0
  41. rahcp_iiif/downloader.py +270 -0
  42. rahcp_iiif/manifest.py +125 -0
  43. rahcp_iiif/verify.py +146 -0
  44. rahcp_tracker/__init__.py +26 -0
  45. rahcp_tracker/_base.py +211 -0
  46. rahcp_tracker/factory.py +46 -0
  47. rahcp_tracker/models.py +31 -0
  48. rahcp_tracker/postgres.py +43 -0
  49. rahcp_tracker/protocol.py +49 -0
  50. rahcp_tracker/sqlite.py +62 -0
  51. rahcp_transkribus/__init__.py +46 -0
  52. rahcp_transkribus/alto.py +71 -0
  53. rahcp_transkribus/client.py +252 -0
  54. rahcp_transkribus/config.py +22 -0
  55. rahcp_transkribus/errors.py +11 -0
  56. rahcp_transkribus/exporter.py +424 -0
  57. rahcp_transkribus/models.py +95 -0
  58. rahcp_transkribus/versions.py +75 -0
  59. rahcp_validate/__init__.py +39 -0
  60. rahcp_validate/images.py +132 -0
  61. rahcp_validate/rules.py +92 -0
rahcp/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ """rahcp — Python SDK for HCP Unified API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from rahcp_client import HCPClient, HCPSettings
6
+
7
+ __all__ = ["HCPClient", "HCPSettings"]
rahcp/cli/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """rahcp-cli — CLI for HCP Unified API."""
2
+
3
+ from __future__ import annotations
rahcp/cli/_client.py ADDED
@@ -0,0 +1,32 @@
1
+ """Shared client factory — auto-authenticates from config/flags/env."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ import typer
8
+
9
+ if TYPE_CHECKING:
10
+ from rahcp_client import HCPClient
11
+
12
+
13
+ def make_client(ctx: typer.Context) -> HCPClient:
14
+ """Create an HCPClient from resolved settings.
15
+
16
+ Priority: CLI flags > env vars > config file > defaults.
17
+
18
+ If username/password are available, the client auto-logs in on __aenter__.
19
+ Tenant is passed during login so the backend routes to the correct HCP tenant.
20
+ """
21
+ from rahcp_client import HCPClient
22
+
23
+ return HCPClient(
24
+ endpoint=ctx.obj["endpoint"],
25
+ username=ctx.obj.get("username", ""),
26
+ password=ctx.obj.get("password", ""),
27
+ tenant=ctx.obj.get("tenant"),
28
+ verify_ssl=ctx.obj.get("verify_ssl", True),
29
+ timeout=ctx.obj.get("timeout", 30.0),
30
+ multipart_threshold=ctx.obj.get("multipart_threshold", 100 * 1024 * 1024),
31
+ multipart_chunk=ctx.obj.get("multipart_chunk", 64 * 1024 * 1024),
32
+ )
rahcp/cli/_output.py ADDED
@@ -0,0 +1,36 @@
1
+ """Rich table/JSON formatting helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Any
7
+
8
+ from rich.console import Console
9
+ from rich.table import Table
10
+
11
+ console = Console()
12
+
13
+
14
+ def print_json(data: Any) -> None:
15
+ """Print data as formatted JSON."""
16
+ console.print_json(json.dumps(data, indent=2, default=str))
17
+
18
+
19
+ def print_table(
20
+ rows: list[dict[str, Any]],
21
+ columns: list[str] | None = None,
22
+ *,
23
+ title: str | None = None,
24
+ ) -> None:
25
+ """Print a list of dicts as a Rich table."""
26
+ if not rows:
27
+ console.print("[dim]No results.[/dim]")
28
+ return
29
+
30
+ cols = columns or list(rows[0].keys())
31
+ table = Table(title=title)
32
+ for col in cols:
33
+ table.add_column(col)
34
+ for row in rows:
35
+ table.add_row(*[str(row.get(c, "")) for c in cols])
36
+ console.print(table)
rahcp/cli/_run.py ADDED
@@ -0,0 +1,27 @@
1
+ """Shared async runner with error handling."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import sys
7
+ from collections.abc import Coroutine
8
+ from typing import Any
9
+
10
+ from rahcp.cli._output import console
11
+
12
+
13
+ def run(coro: Coroutine[Any, Any, None]) -> None:
14
+ """Run an async coroutine with clean error output."""
15
+ try:
16
+ asyncio.run(coro)
17
+ except KeyboardInterrupt:
18
+ sys.exit(130)
19
+ except Exception as exc:
20
+ from rahcp_client.errors import HCPError
21
+
22
+ if isinstance(exc, HCPError):
23
+ label = type(exc).__name__
24
+ console.print(f"[red]{label}:[/red] {exc.message}")
25
+ else:
26
+ console.print(f"[red]Error:[/red] {exc}")
27
+ sys.exit(1)
rahcp/cli/auth.py ADDED
@@ -0,0 +1,40 @@
1
+ """Auth commands — whoami."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import base64
6
+ import json
7
+
8
+ import typer
9
+
10
+ from rahcp.cli._client import make_client
11
+ from rahcp.cli._output import console, print_json
12
+ from rahcp.cli._run import run
13
+
14
+ app = typer.Typer(help="Authentication", no_args_is_help=True)
15
+
16
+
17
+ @app.command()
18
+ def whoami(ctx: typer.Context) -> None:
19
+ """Show current user info by decoding the JWT token."""
20
+
21
+ async def _whoami() -> None:
22
+ async with make_client(ctx) as client:
23
+ token = client.token
24
+ if not token:
25
+ console.print("[red]Not authenticated. Check your config.[/red]")
26
+ raise typer.Exit(1)
27
+ try:
28
+ payload_b64 = token.split(".")[1]
29
+ payload_b64 += "=" * (4 - len(payload_b64) % 4)
30
+ payload = json.loads(base64.urlsafe_b64decode(payload_b64))
31
+ except (IndexError, json.JSONDecodeError, Exception) as exc:
32
+ console.print(f"[red]Invalid token format:[/red] {exc}")
33
+ raise typer.Exit(1) from exc
34
+ if ctx.obj["json"]:
35
+ print_json(payload)
36
+ else:
37
+ console.print(f"User: [bold]{payload.get('sub', '?')}[/bold]")
38
+ console.print(f"Tenant: {payload.get('tenant', '(system)')}")
39
+
40
+ run(_whoami())
rahcp/cli/config.py ADDED
@@ -0,0 +1,130 @@
1
+ """YAML config file with named profiles.
2
+
3
+ Config file: ~/.rahcp/config.yaml (or --config / RAHCP_CONFIG)
4
+
5
+ Example::
6
+
7
+ default: dev
8
+
9
+ profiles:
10
+ dev:
11
+ endpoint: http://localhost:8000/api/v1
12
+ username: admin
13
+ password: secret
14
+ tenant: dev-ai
15
+ verify_ssl: false
16
+ prod:
17
+ endpoint: http://localhost:8000/api/v1
18
+ username: prod-user
19
+ password: secret
20
+ tenant: prod-archive
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import logging
26
+ from pathlib import Path
27
+
28
+ import yaml
29
+ from pydantic import BaseModel, Field
30
+
31
+ log = logging.getLogger(__name__)
32
+
33
+ CONFIG_DIR = Path.home() / ".rahcp"
34
+ CONFIG_PATH = CONFIG_DIR / "config.yaml"
35
+
36
+
37
+ class Profile(BaseModel):
38
+ """A named connection profile."""
39
+
40
+ # Connection
41
+ endpoint: str = "http://localhost:8000/api/v1"
42
+ username: str = ""
43
+ password: str = ""
44
+ tenant: str = ""
45
+ verify_ssl: bool = True
46
+ timeout: float = 30.0
47
+
48
+ # Multipart upload
49
+ multipart_threshold: int = 100 * 1024 * 1024
50
+ multipart_chunk: int = 64 * 1024 * 1024
51
+ multipart_concurrency: int = 6
52
+
53
+ # Bulk transfer defaults
54
+ bulk_workers: int = 10
55
+ bulk_progress_interval: float = 5.0
56
+ bulk_queue_depth: int = 8
57
+ bulk_tracker_flush_every: int = 500
58
+ bulk_tracker_dir: str = ""
59
+ bulk_tracker_prefix: str = ""
60
+ bulk_presign_batch_size: int = 200
61
+ bulk_chunk_size: int = 4 * 1024 * 1024 # 4 MB
62
+ bulk_stream_threshold: int = 100 * 1024 * 1024 # 100 MB
63
+
64
+ # IIIF
65
+ iiif_url: str = "https://iiifintern-ai.ra.se"
66
+ iiif_timeout: float = 60.0
67
+ iiif_query_params: str = "full/max/0/default.jpg"
68
+ iiif_workers: int = 4
69
+ iiif_referer: str = ""
70
+
71
+ # Transkribus
72
+ transkribus_url: str = "https://transkribus.eu/TrpServer/rest"
73
+ transkribus_username: str = ""
74
+ transkribus_password: str = ""
75
+ transkribus_timeout: float = 60.0
76
+ transkribus_workers: int = 8
77
+
78
+ # Observability
79
+ log_level: str = "warning"
80
+ otel_endpoint: str = ""
81
+ otel_protocol: str = "http/protobuf"
82
+ otel_service_name: str = "rahcp-cli"
83
+
84
+
85
+ class CLIConfig(BaseModel):
86
+ """Config with named profiles."""
87
+
88
+ default: str = ""
89
+ profiles: dict[str, Profile] = Field(default_factory=dict)
90
+
91
+ def resolve(self, name: str | None = None) -> Profile:
92
+ """Resolve a profile by name, falling back to default."""
93
+ key = name or self.default
94
+ if key and key in self.profiles:
95
+ return self.profiles[key]
96
+ if len(self.profiles) == 1:
97
+ return next(iter(self.profiles.values()))
98
+ return Profile()
99
+
100
+
101
+ def load_config(path: str | None = None) -> CLIConfig:
102
+ """Load config from a YAML file.
103
+
104
+ Resolution: explicit path > RAHCP_CONFIG env > ~/.rahcp/config.yaml
105
+ """
106
+ config_path = Path(path) if path else CONFIG_PATH
107
+ if not config_path.exists():
108
+ return CLIConfig()
109
+ try:
110
+ raw = yaml.safe_load(config_path.read_text()) or {}
111
+ except Exception:
112
+ log.warning("Failed to parse config file: %s", config_path)
113
+ return CLIConfig()
114
+
115
+ # Multi-profile format
116
+ if "profiles" in raw:
117
+ return CLIConfig(
118
+ default=raw.get("default", ""),
119
+ profiles={
120
+ name: Profile(**vals)
121
+ for name, vals in raw["profiles"].items()
122
+ if isinstance(vals, dict)
123
+ },
124
+ )
125
+
126
+ # Flat format (backwards compat) — single "default" profile
127
+ return CLIConfig(
128
+ default="default",
129
+ profiles={"default": Profile(**raw)},
130
+ )