heritage-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ """heritage-cli — Unified CLI for the heritage science open-source ecosystem.
2
+
3
+ Provides a single `heritage` command that routes to sibling projects:
4
+ heritage run → hoard run (HOARD pipeline)
5
+ heritage calibrate → libby (radiocarbon calibration)
6
+ heritage lithics → dibble (lithic analysis)
7
+ heritage review → trowel (review dashboard)
8
+ heritage matrix → stratigraph (Harris Matrix)
9
+ heritage publish → hoard export (final report)
10
+ heritage tools → list installed ecosystem tools
11
+
12
+ Usage:
13
+ heritage --help
14
+ heritage run --project X --phase 0
15
+ heritage calibrate --project X --input samples.json
16
+ heritage tools list
17
+
18
+ Configuration: ~/.config/heritage/config.toml
19
+ """
20
+
21
+ __version__ = "1.0.0"
@@ -0,0 +1,6 @@
1
+ """Command dispatch modules for sibling tools.
2
+
3
+ Each module exposes `tool_name` and `dispatch()` for the plugin
4
+ discovery system. Tools can be invoked via `heritage <tool>` or
5
+ programmatically via `dispatch()`.
6
+ """
@@ -0,0 +1,19 @@
1
+ """HOARD command dispatch — run pipeline phases.
2
+
3
+ Registered as heritage_cli.commands.hoard in entry_points.
4
+ """
5
+
6
+ tool_name = "hoard"
7
+ description = "Heritage Observation And Report Drafter — AI pipeline"
8
+
9
+
10
+ def dispatch(args: list[str] | None = None) -> int:
11
+ """Run hoard with the given CLI arguments.
12
+
13
+ Returns subprocess exit code.
14
+ """
15
+ import subprocess
16
+ import sys
17
+ cmd = ["hoard"] + (args or sys.argv[2:])
18
+ result = subprocess.run(cmd)
19
+ return result.returncode
heritage_cli/main.py ADDED
@@ -0,0 +1,389 @@
1
+ """main.py — Heritage CLI entry point and command tree.
2
+
3
+ Uses Typer for the command hierarchy with Rich for output formatting.
4
+ Routes commands to sibling project executables or Python packages.
5
+
6
+ Usage:
7
+ heritage --help
8
+ heritage run --project X --phase 0
9
+ heritage run --project X --auto
10
+ heritage calibrate --project X
11
+ heritage lithics --project X --input ./scans/
12
+ heritage review --project X
13
+ heritage matrix --project X
14
+ heritage publish --project X --format docx,pdf
15
+ heritage tools list
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from pathlib import Path
21
+
22
+ import typer
23
+ from rich.console import Console
24
+ from rich.table import Table
25
+
26
+ from heritage_cli import __version__
27
+
28
+ app = typer.Typer(
29
+ name="heritage",
30
+ help="Heritage science ecosystem CLI — orchestrates HOARD, StratiGraph, Trowel, Libby, and Dibble",
31
+ no_args_is_help=True,
32
+ rich_markup_mode="rich",
33
+ )
34
+ console = Console()
35
+
36
+
37
+ # ── Version callback ─────────────────────────────────────────────────────────
38
+
39
+
40
+ def _version_callback(value: bool) -> None:
41
+ if value:
42
+ console.print(f"heritage-cli v{__version__}")
43
+ raise typer.Exit()
44
+
45
+
46
+ @app.callback()
47
+ def _main(
48
+ version: bool = typer.Option(False, "--version", "-V", help="Show version and exit", callback=_version_callback),
49
+ ) -> None:
50
+ """Heritage science ecosystem CLI."""
51
+
52
+
53
+ # ── Config ───────────────────────────────────────────────────────────────────
54
+
55
+
56
+ def load_config() -> dict:
57
+ """Load ~/.config/heritage/config.toml, returning defaults on failure."""
58
+ import tomllib
59
+ config_path = Path.home() / ".config" / "heritage" / "config.toml"
60
+ if not config_path.exists():
61
+ return {}
62
+ try:
63
+ with open(config_path, "rb") as f:
64
+ return tomllib.load(f)
65
+ except (tomllib.TOMLDecodeError, OSError):
66
+ return {}
67
+
68
+
69
+ def find_tool(name: str) -> str | None:
70
+ """Find an installed tool's executable path via shutil.which."""
71
+ import shutil
72
+ return shutil.which(name)
73
+
74
+
75
+ # ── Commands ─────────────────────────────────────────────────────────────────
76
+
77
+
78
+ @app.command()
79
+ def run(
80
+ project: str = typer.Option(..., "--project", "-p", help="Project ID"),
81
+ phase: int | None = typer.Option(None, "--phase", help="Run a single phase only"),
82
+ from_phase: int | None = typer.Option(None, "--from-phase", help="Run from this phase onward"),
83
+ pipeline: str | None = typer.Option(None, "--pipeline", "-P", help="Path to pipeline YAML file"),
84
+ auto: bool = typer.Option(False, "--auto", help="Run full pipeline from Phase 0 (or skip review gates with --pipeline)"),
85
+ input_dir: str = typer.Option("./input", "--input", "-i", help="Input directory"),
86
+ strict: bool = typer.Option(False, "--strict", "-s", help="Halt on schema validation failure"),
87
+ extractor: str = typer.Option("glm-ocr", "--extractor", "-e", help="Extraction model"),
88
+ workspace: str = typer.Option("./erd_workspace", "--workspace", "-w", help="Workspace root"),
89
+ ) -> None:
90
+ """Run the HOARD pipeline (or a single phase, or a multi-tool pipeline).
91
+
92
+ Use --pipeline to run a declarative multi-tool pipeline YAML with
93
+ automated steps and human review gates.
94
+
95
+ Examples:
96
+ heritage run --project X --phase 0 # Single phase
97
+ heritage run --project X --auto # Full HOARD pipeline
98
+ heritage run --project X --pipeline pipe.yaml # Multi-tool pipeline
99
+ """
100
+ if pipeline:
101
+ from heritage_cli.orchestrator import PipelineOrchestrator
102
+ orch = PipelineOrchestrator(
103
+ pipeline_path=pipeline,
104
+ project_id=project,
105
+ workspace=workspace,
106
+ auto=auto,
107
+ )
108
+ try:
109
+ orch.load()
110
+ orch.run()
111
+ except (FileNotFoundError, ValueError) as e:
112
+ console.print(f"[red]✗[/] {e}")
113
+ raise typer.Exit(1)
114
+ return
115
+
116
+ hoard_bin = find_tool("hoard")
117
+ if hoard_bin:
118
+ import subprocess
119
+ cmd = [hoard_bin, "run", "--project", project, "--workspace", workspace]
120
+ if phase is not None:
121
+ cmd.extend(["--phase", str(phase)])
122
+ if from_phase is not None:
123
+ cmd.extend(["--from-phase", str(from_phase)])
124
+ if input_dir:
125
+ cmd.extend(["--input", input_dir])
126
+ if strict:
127
+ cmd.append("--strict")
128
+ if extractor:
129
+ cmd.extend(["--extractor", extractor])
130
+ if auto:
131
+ cmd.extend(["--from-phase", "0"])
132
+ console.print(f"[blue]→[/] Running: {' '.join(cmd)}")
133
+ subprocess.run(cmd)
134
+ else:
135
+ # Fallback: import and run HOARD directly
136
+ try:
137
+ from hoard.config import Config
138
+ from hoard.cli.run import run_pipeline, run_single_phase
139
+
140
+ cfg = Config(
141
+ project_id=project,
142
+ project_name=project,
143
+ jurisdiction="historic_england_cl3",
144
+ workspace_root=Path(workspace).resolve(),
145
+ input_dir=Path(input_dir).resolve(),
146
+ strict=strict,
147
+ extractor=extractor,
148
+ )
149
+ if phase is not None:
150
+ run_single_phase(cfg, phase)
151
+ else:
152
+ run_pipeline(cfg)
153
+ except ImportError:
154
+ console.print("[red]✗[/] HOARD not installed. Run: pip install hoard")
155
+
156
+
157
+ @app.command()
158
+ def calibrate(
159
+ project: str = typer.Option(..., "--project", "-p", help="Project ID"),
160
+ input_file: str = typer.Option("", "--input", "-i", help="Samples JSON file path"),
161
+ workspace: str = typer.Option("./erd_workspace", "--workspace", "-w", help="Workspace root"),
162
+ ) -> None:
163
+ """Calibrate radiocarbon samples using Libby.
164
+
165
+ Reads sample data from the project workspace or specified input file,
166
+ runs Libby calibration, and writes calibrated dates back to the workspace.
167
+ """
168
+ libby_bin = find_tool("libby")
169
+ if libby_bin:
170
+ import subprocess
171
+ input_path = input_file or f"{workspace}/{project}/01_digitised/samples.json"
172
+ cmd = [libby_bin, "calibrate", "--input", input_path, "--workspace", f"{workspace}/{project}"]
173
+ console.print(f"[blue]→[/] Running: {' '.join(cmd)}")
174
+ subprocess.run(cmd)
175
+ else:
176
+ console.print("[yellow]ℹ[/] Libby not installed. Install with: pip install libby")
177
+ console.print(" Or manually calibrate samples at a Libby web instance.")
178
+
179
+
180
+ @app.command(name="lithics")
181
+ def lithics(
182
+ project: str = typer.Option(..., "--project", "-p", help="Project ID"),
183
+ input_dir: str = typer.Option("./scans", "--input", "-i", help="Directory with 3D scans or photos"),
184
+ workspace: str = typer.Option("./erd_workspace", "--workspace", "-w", help="Workspace root"),
185
+ ) -> None:
186
+ """Run lithic analysis using Dibble.
187
+
188
+ Processes 3D scans or photographs through Dibble's classification pipeline
189
+ and writes results into the project workspace for HOARD Phase 3 consumption.
190
+ """
191
+ dibble_bin = find_tool("dibble")
192
+ if dibble_bin:
193
+ import subprocess
194
+ cmd = [dibble_bin, "process", "--input", input_dir, "--output", f"{workspace}/{project}/02_spatial/lithics/"]
195
+ console.print(f"[blue]→[/] Running: {' '.join(cmd)}")
196
+ subprocess.run(cmd)
197
+ else:
198
+ console.print("[yellow]ℹ[/] Dibble not installed. Install with: pip install dibble")
199
+ console.print(" Or manually add lithic analysis to the specialist appendices.")
200
+
201
+
202
+ @app.command()
203
+ def review(
204
+ project: str = typer.Option(..., "--project", "-p", help="Project ID"),
205
+ workspace: str = typer.Option("./erd_workspace", "--workspace", "-w", help="Workspace root"),
206
+ reset: bool = typer.Option(False, "--reset", "-r", help="Reset all review decisions"),
207
+ ) -> None:
208
+ """Open the interactive review dashboard.
209
+
210
+ Delegates to Trowel (if installed as desktop app) or HOARD's terminal
211
+ review dashboard (CLI fallback). Both share the same flag data format.
212
+ """
213
+ trowel_bin = find_tool("trowel")
214
+ if trowel_bin:
215
+ import subprocess
216
+ cmd = [trowel_bin, "open", "--project", project, "--workspace", workspace]
217
+ if reset:
218
+ cmd.append("--reset")
219
+ console.print(f"[blue]→[/] Running: {' '.join(cmd)}")
220
+ subprocess.run(cmd)
221
+ else:
222
+ # Fallback: use HOARD's review dashboard
223
+ try:
224
+ from hoard.config import Config
225
+ hoard_cfg = Config(
226
+ project_id=project,
227
+ project_name=project,
228
+ jurisdiction="historic_england_cl3",
229
+ workspace_root=Path(workspace).resolve(),
230
+ input_dir=Path("./input"),
231
+ )
232
+ from hoard.review import ReviewSession
233
+ session = ReviewSession(hoard_cfg)
234
+ session.load()
235
+ if session.total == 0:
236
+ console.print(f"[yellow]ℹ[/] No flagged items for project '{project}'.")
237
+ return
238
+ session.run_interactive()
239
+ except ImportError:
240
+ console.print("[red]✗[/] Neither Trowel nor HOARD review dashboard available.")
241
+ console.print(" Install HOARD: pip install hoard")
242
+
243
+
244
+ @app.command()
245
+ def matrix(
246
+ project: str = typer.Option(..., "--project", "-p", help="Project ID"),
247
+ workspace: str = typer.Option("./erd_workspace", "--workspace", "-w", help="Workspace root"),
248
+ ) -> None:
249
+ """Open the Harris Matrix in StratiGraph.
250
+
251
+ Imports HOARD Phase 1 context data into StratiGraph for interactive
252
+ Harris Matrix visualisation, validation, and EEDP export.
253
+ """
254
+ stratigraph_bin = find_tool("stratigraph")
255
+ if stratigraph_bin:
256
+ import subprocess
257
+ cmd = [stratigraph_bin, "import", "--data", f"{workspace}/{project}/01_digitised/"]
258
+ console.print(f"[blue]→[/] Running: {' '.join(cmd)}")
259
+ subprocess.run(cmd)
260
+ else:
261
+ console.print("[yellow]ℹ[/] StratiGraph not installed.")
262
+ console.print(" Install from: https://github.com/mabo-du/stratigraph")
263
+ console.print(f" Or import {workspace}/{project}/01_digitised/ manually.")
264
+
265
+
266
+ @app.command()
267
+ def publish(
268
+ project: str = typer.Option(..., "--project", "-p", help="Project ID"),
269
+ fmt: str = typer.Option("docx,pdf", "--format", "-f", help="Output formats (comma-separated)"),
270
+ workspace: str = typer.Option("./erd_workspace", "--workspace", "-w", help="Workspace root"),
271
+ ) -> None:
272
+ """Publish the final report (Phase 5 assembly + export).
273
+
274
+ Delegates to HOARD's Phase 5 export, which generates DOCX, PDF/A-2b,
275
+ TEI-XML, and ZIP outputs from the assembled report data.
276
+ """
277
+ try:
278
+ from hoard.config import load_config
279
+ cfg = load_config(project, Path(workspace))
280
+ if cfg is None:
281
+ console.print(f"[red]✗[/] Project '{project}' not found at {workspace}")
282
+ console.print(" Run 'hoard init' first.")
283
+ raise typer.Exit(1)
284
+
285
+ from hoard.phases.phase5 import run_phase5
286
+ formats = [f.strip() for f in fmt.split(",")]
287
+ console.print(f"[blue]→[/] Publishing [bold]{project}[/] as: {', '.join(formats)}")
288
+ result = run_phase5(cfg, formats=formats)
289
+ export_paths = result.get("export_paths", {})
290
+ if export_paths:
291
+ console.print(f"[green]✓[/] Published:")
292
+ for name, path in export_paths.items():
293
+ console.print(f" • {name}: {path}")
294
+ else:
295
+ console.print("[yellow]ℹ[/] No output generated. Run the pipeline first.")
296
+ except ImportError:
297
+ console.print("[red]✗[/] HOARD not installed. Run: pip install hoard")
298
+
299
+
300
+ # ── Pipeline Status ──────────────────────────────────────────────────────────
301
+
302
+
303
+ @app.command(name="pipeline-status")
304
+ def pipeline_status(
305
+ project: str = typer.Option(..., "--project", "-p", help="Project ID"),
306
+ workspace: str = typer.Option("./erd_workspace", "--workspace", "-w", help="Workspace root"),
307
+ ) -> None:
308
+ """Show the status of the most recent pipeline run for a project."""
309
+ from heritage_cli.orchestrator import PipelineOrchestrator
310
+
311
+ # Find the most recent pipeline state file
312
+ state_dir = Path(workspace) / project
313
+ state_file = state_dir / "pipeline_state.json"
314
+ if not state_file.exists():
315
+ console.print(f"[yellow]ℹ[/] No pipeline state found for project '{project}'")
316
+ console.print(f" Run [bold]heritage run --project {project} --pipeline <file>[/] first")
317
+ return
318
+
319
+ try:
320
+ import json
321
+ data = json.loads(state_file.read_text())
322
+ console.print(f"[bold]Pipeline Status:[/] {data.get('project', project)}")
323
+ console.print(f" State file: {state_file}")
324
+ console.print(f" Pipeline: {data.get('pipeline', 'unknown')}")
325
+ console.print(f" Last update: {data.get('updated_at', 'unknown')}")
326
+ console.print()
327
+ steps = data.get("steps", {})
328
+ console.print(f"[bold]Steps ({len(steps)}):[/]")
329
+ for step_id, status in steps.items():
330
+ icon = {"pending": "○", "running": "→", "complete": "✓",
331
+ "skipped": "−", "failed": "✗", "blocked": "⊘"}
332
+ marker = icon.get(status, "?")
333
+ console.print(f" {marker} {step_id}: {status}")
334
+ except (json.JSONDecodeError, OSError) as e:
335
+ console.print(f"[red]✗[/] Failed to read pipeline state: {e}")
336
+
337
+
338
+ # ── Tools sub-command ────────────────────────────────────────────────────────
339
+
340
+
341
+ @app.command(name="tools")
342
+ def tools_list() -> None:
343
+ """List installed heritage ecosystem tools and their status."""
344
+ table = Table(title="Heritage Ecosystem Tools")
345
+ table.add_column("Tool", style="cyan")
346
+ table.add_column("Installed", style="green")
347
+ table.add_column("Version", style="yellow")
348
+ table.add_column("Description")
349
+
350
+ tools = [
351
+ ("hoard", find_tool("hoard"), "HOARD pipeline"),
352
+ ("stratigraph", find_tool("stratigraph"), "Harris Matrix editor"),
353
+ ("trowel", find_tool("trowel"), "Desktop report drafter"),
354
+ ("libby", find_tool("libby"), "Radiocarbon calibration"),
355
+ ("dibble", find_tool("dibble"), "Lithic analysis"),
356
+ ("fritts", find_tool("fritts"), "Dendrochronology"),
357
+ ("argus", find_tool("argus"), "Site surveillance"),
358
+ ]
359
+
360
+ for name, installed, desc in tools:
361
+ status = "[green]✓[/]" if installed else "[red]✗[/]"
362
+ version = ""
363
+ if installed:
364
+ import subprocess
365
+ try:
366
+ result = subprocess.run([name, "--version"], capture_output=True, text=True, timeout=5)
367
+ version = result.stdout.strip() or result.stderr.strip()
368
+ version = version[:30]
369
+ # Clean up traceback noise from stderr
370
+ if "Traceback" in version or "Error" in version:
371
+ version = "?"
372
+ except (OSError, subprocess.TimeoutExpired):
373
+ version = "?"
374
+ table.add_row(name, status, version or "-", desc)
375
+
376
+ console.print(table)
377
+ console.print("\n[yellow]ℹ[/] Run [bold]pip install <tool>[/] for tools marked ✗")
378
+
379
+
380
+ # ── Entry point ──────────────────────────────────────────────────────────────
381
+
382
+
383
+ def entry_point() -> None:
384
+ """Console-scripts entry point."""
385
+ app()
386
+
387
+
388
+ if __name__ == "__main__":
389
+ app()
@@ -0,0 +1,507 @@
1
+ """orchestrator.py — Pipeline orchestration engine with review gates.
2
+
3
+ Implements a checkpoint-based execution model where a declarative YAML
4
+ pipeline definition is run step by step, pausing at human review gates
5
+ for expert validation. Pipeline state is persisted to a JSON file for
6
+ resumability after interruption.
7
+
8
+ Pipeline YAML format:
9
+ steps:
10
+ - project: hoard
11
+ phases: [0, 1, 2]
12
+ - gate: review
13
+ message: "Review the Harris Matrix in StratiGraph before proceeding"
14
+ action: "stratigraph import --path output/01_digitised"
15
+ - project: libby
16
+ action: calibrate
17
+ input: output/01_digitised/samples.json
18
+ - project: hoard
19
+ phases: [3, 4, 5]
20
+ - gate: review
21
+ message: "Review the draft before final export"
22
+ - project: hoard
23
+ action: export
24
+ formats: [docx, pdf]
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import json
30
+ import subprocess
31
+ import sys
32
+ import time
33
+ from dataclasses import dataclass, field
34
+ from enum import Enum
35
+ from pathlib import Path
36
+ from typing import Any
37
+
38
+
39
+ # ── Step types ────────────────────────────────────────────────────────────────
40
+
41
+
42
+ class StepStatus(str, Enum):
43
+ PENDING = "pending"
44
+ RUNNING = "running"
45
+ COMPLETE = "complete"
46
+ SKIPPED = "skipped"
47
+ FAILED = "failed"
48
+ BLOCKED = "blocked"
49
+
50
+
51
+ class StepKind(str, Enum):
52
+ HOARD = "hoard" # Run HOARD pipeline phases
53
+ GATE = "gate" # Human review gate — pauses execution
54
+ COMMAND = "command" # Run an arbitrary shell command
55
+ LIBBY = "libby" # Calibrate samples
56
+ DIBBLE = "dibble" # Run lithic analysis
57
+ EXPORT = "export" # Final report export
58
+
59
+
60
+ # ── Data models ───────────────────────────────────────────────────────────────
61
+
62
+
63
+ @dataclass
64
+ class PipelineStep:
65
+ """A single step in the pipeline DAG."""
66
+
67
+ id: str
68
+ kind: StepKind
69
+ status: StepStatus = StepStatus.PENDING
70
+ message: str = "" # For GATE steps: prompt shown to user
71
+ action: str = "" # For GATE steps: suggestion of what to do
72
+ tool_args: dict[str, Any] = field(default_factory=dict) # Phase-specific args
73
+ depends_on: list[str] = field(default_factory=list)
74
+
75
+ # Runtime fields (not serialised in pipeline.yaml)
76
+ error: str = ""
77
+
78
+
79
+ @dataclass
80
+ class PipelineState:
81
+ """Persistent state ledger for pipeline resumability."""
82
+
83
+ project_id: str
84
+ pipeline_path: str
85
+ steps: dict[str, StepStatus] = field(default_factory=dict)
86
+ started_at: str = ""
87
+ updated_at: str = ""
88
+
89
+
90
+ # ── Orchestrator ──────────────────────────────────────────────────────────────
91
+
92
+
93
+ class PipelineOrchestrator:
94
+ """Executes a pipeline definition step by step with review gates.
95
+
96
+ Usage:
97
+ orch = PipelineOrchestrator("path/to/pipeline.yaml", project_id="my_site")
98
+ orch.run()
99
+ """
100
+
101
+ def __init__(
102
+ self,
103
+ pipeline_path: str | Path,
104
+ project_id: str = "",
105
+ workspace: str = "./erd_workspace",
106
+ auto: bool = False,
107
+ ) -> None:
108
+ self.pipeline_path = Path(pipeline_path)
109
+ self.project_id = project_id
110
+ self.workspace = Path(workspace)
111
+ self.auto = auto # If True, skip review gates automatically
112
+ self.steps: list[PipelineStep] = []
113
+ self.state_dir = self.workspace / project_id
114
+ self.state_file = self.state_dir / "pipeline_state.json"
115
+ self._step_map: dict[str, PipelineStep] = {}
116
+
117
+ # ── Loading ──────────────────────────────────────────────────────────
118
+
119
+ def load(self) -> None:
120
+ """Load and parse a pipeline YAML definition."""
121
+ import yaml
122
+
123
+ if not self.pipeline_path.exists():
124
+ raise FileNotFoundError(f"Pipeline file not found: {self.pipeline_path}")
125
+
126
+ raw = yaml.safe_load(self.pipeline_path.read_text())
127
+ if not raw or "steps" not in raw:
128
+ raise ValueError(f"Pipeline file must contain a 'steps' list")
129
+
130
+ steps_raw = raw["steps"]
131
+ self.steps = []
132
+ for i, step_raw in enumerate(steps_raw):
133
+ step = self._parse_step(step_raw, i)
134
+ self.steps.append(step)
135
+ self._step_map[step.id] = step
136
+
137
+ def _parse_step(self, raw: dict, index: int) -> PipelineStep:
138
+ """Parse a single step from YAML."""
139
+ # Determine kind
140
+ if "gate" in raw:
141
+ kind = StepKind.GATE
142
+ step_id = raw.get("id", f"gate_{index}")
143
+ elif "project" in raw:
144
+ kind = StepKind(raw["project"])
145
+ step_id = raw.get("id", f"{kind.value}_{index}")
146
+ else:
147
+ kind = StepKind.COMMAND
148
+ step_id = raw.get("id", f"cmd_{index}")
149
+
150
+ return PipelineStep(
151
+ id=step_id,
152
+ kind=kind,
153
+ message=raw.get("message", ""),
154
+ action=raw.get("action", ""),
155
+ tool_args=self._extract_args(raw, kind),
156
+ depends_on=raw.get("depends_on", []),
157
+ )
158
+
159
+ def _extract_args(self, raw: dict, kind: StepKind) -> dict[str, Any]:
160
+ """Extract tool-specific arguments from the step definition."""
161
+ args: dict[str, Any] = {}
162
+ if kind == StepKind.HOARD:
163
+ args["phases"] = raw.get("phases", [])
164
+ args["from_phase"] = raw.get("from_phase")
165
+ args["extractor"] = raw.get("extractor", "glm-ocr")
166
+ args["strict"] = raw.get("strict", False)
167
+ elif kind == StepKind.LIBBY:
168
+ args["input"] = raw.get("input", "")
169
+ args["action"] = raw.get("action", "calibrate")
170
+ elif kind == StepKind.DIBBLE:
171
+ args["input"] = raw.get("input", "./scans")
172
+ args["output"] = raw.get("output", "")
173
+ elif kind == StepKind.EXPORT:
174
+ args["formats"] = raw.get("formats", ["docx", "pdf"])
175
+ elif kind == StepKind.COMMAND:
176
+ args["command"] = raw.get("command", "")
177
+ return args
178
+
179
+ # ── State Management ─────────────────────────────────────────────────
180
+
181
+ def _load_state(self) -> dict[str, StepStatus]:
182
+ """Load pipeline state from disk for resumability."""
183
+ if self.state_file.exists():
184
+ try:
185
+ data = json.loads(self.state_file.read_text())
186
+ raw_steps = data.get("steps", {})
187
+ return {k: StepStatus(v) for k, v in raw_steps.items()}
188
+ except (json.JSONDecodeError, ValueError):
189
+ pass
190
+ return {}
191
+
192
+ def _save_state(self) -> None:
193
+ """Persist current pipeline state to disk."""
194
+ self.state_dir.mkdir(parents=True, exist_ok=True)
195
+ from datetime import datetime, timezone
196
+ state = {
197
+ "project_id": self.project_id,
198
+ "pipeline": str(self.pipeline_path),
199
+ "steps": {s.id: s.status.value for s in self.steps},
200
+ "updated_at": datetime.now(timezone.utc).isoformat(),
201
+ }
202
+ # Write atomically
203
+ tmp = self.state_file.with_suffix(".json.tmp")
204
+ tmp.write_text(json.dumps(state, indent=2))
205
+ tmp.rename(self.state_file)
206
+
207
+ # ── Execution ────────────────────────────────────────────────────────
208
+
209
+ def run(self) -> None:
210
+ """Execute the entire pipeline, pausing at review gates."""
211
+ # Restore state if resuming
212
+ saved_state = self._load_state()
213
+ if saved_state:
214
+ self._apply_saved_state(saved_state)
215
+
216
+ console = _get_console()
217
+
218
+ console.print(f"[bold]Pipeline:[/] {self.pipeline_path}")
219
+ console.print(f"[bold]Project:[/] {self.project_id}")
220
+ console.print(f"[bold]Steps:[/] {len(self.steps)}")
221
+ console.print()
222
+
223
+ for step in self.steps:
224
+ # Skip already completed steps
225
+ if step.status == StepStatus.COMPLETE:
226
+ console.print(f" [dim]• {step.id}[/] [green](already complete)[/]")
227
+ continue
228
+ if step.status == StepStatus.SKIPPED:
229
+ console.print(f" [dim]• {step.id}[/] [yellow](skipped)[/]")
230
+ continue
231
+
232
+ # Check dependencies
233
+ if not self._dependencies_met(step):
234
+ console.print(f" [red]✗ {step.id}[/] dependencies not met — blocking")
235
+ step.status = StepStatus.BLOCKED
236
+ self._save_state()
237
+ return
238
+
239
+ # Execute
240
+ if step.kind == StepKind.GATE:
241
+ self._execute_gate(step)
242
+ else:
243
+ self._execute_step(step)
244
+
245
+ self._save_state()
246
+
247
+ console.print()
248
+ console.print("[green]✓[/] Pipeline complete!")
249
+
250
+ def _apply_saved_state(self, saved: dict[str, StepStatus]) -> None:
251
+ """Apply previously saved state to steps for resume."""
252
+ for step in self.steps:
253
+ if step.id in saved:
254
+ if saved[step.id] in (StepStatus.COMPLETE, StepStatus.SKIPPED):
255
+ step.status = saved[step.id]
256
+ # PENDING/FAILED steps get re-executed
257
+
258
+ def _dependencies_met(self, step: PipelineStep) -> bool:
259
+ """Check if all dependency steps completed successfully."""
260
+ for dep_id in step.depends_on:
261
+ dep = self._step_map.get(dep_id)
262
+ if dep and dep.status != StepStatus.COMPLETE:
263
+ return False
264
+ return True
265
+
266
+ # ── Step Executors ───────────────────────────────────────────────────
267
+
268
+ def _execute_step(self, step: PipelineStep) -> None:
269
+ """Execute a non-gate step (HOARD, Libby, Dibble, Export, Command)."""
270
+ console = _get_console()
271
+ step.status = StepStatus.RUNNING
272
+ self._save_state()
273
+
274
+ console.print(f" [blue]→[/] Running step: [bold]{step.id}[/]")
275
+
276
+ try:
277
+ if step.kind == StepKind.HOARD:
278
+ self._run_hoard(step)
279
+ elif step.kind == StepKind.LIBBY:
280
+ self._run_libby(step)
281
+ elif step.kind == StepKind.DIBBLE:
282
+ self._run_dibble(step)
283
+ elif step.kind == StepKind.EXPORT:
284
+ self._run_export(step)
285
+ elif step.kind == StepKind.COMMAND:
286
+ self._run_command(step)
287
+ else:
288
+ console.print(f" [yellow]⚠[/] Unknown step kind: {step.kind} — skipping")
289
+ step.status = StepStatus.SKIPPED
290
+
291
+ if step.status != StepStatus.SKIPPED:
292
+ step.status = StepStatus.COMPLETE
293
+ console.print(f" [green]✓[/] {step.id} complete")
294
+
295
+ except Exception as e:
296
+ step.status = StepStatus.FAILED
297
+ step.error = str(e)
298
+ console.print(f" [red]✗[/] {step.id} failed: {e}")
299
+ self._save_state()
300
+ raise
301
+
302
+ def _run_hoard(self, step: PipelineStep) -> None:
303
+ """Execute HOARD pipeline phases."""
304
+ args = step.tool_args
305
+ phases = args.get("phases", [])
306
+ extractor = args.get("extractor", "glm-ocr")
307
+ strict = args.get("strict", False)
308
+
309
+ import shutil
310
+ hoard_bin = shutil.which("hoard")
311
+
312
+ if hoard_bin:
313
+ cmd = [hoard_bin, "run", "--project", self.project_id, "--workspace", str(self.workspace)]
314
+ if phases:
315
+ # Run each phase sequentially
316
+ for phase in phases:
317
+ phase_cmd = cmd + ["--phase", str(phase)]
318
+ if extractor:
319
+ phase_cmd.extend(["--extractor", extractor])
320
+ if strict:
321
+ phase_cmd.append("--strict")
322
+ _run_subprocess(phase_cmd, step.id)
323
+ else:
324
+ # Run specified from_phase or full pipeline
325
+ from_phase = args.get("from_phase")
326
+ if from_phase is not None:
327
+ cmd.extend(["--from-phase", str(from_phase)])
328
+ if extractor:
329
+ cmd.extend(["--extractor", extractor])
330
+ if strict:
331
+ cmd.append("--strict")
332
+ _run_subprocess(cmd, step.id)
333
+ else:
334
+ # Fallback: Python import
335
+ from hoard.config import Config
336
+ from hoard.cli.run import run_single_phase
337
+
338
+ cfg = Config(
339
+ project_id=self.project_id,
340
+ project_name=self.project_id,
341
+ jurisdiction="historic_england_cl3",
342
+ workspace_root=self.workspace.resolve(),
343
+ input_dir=(self.workspace / self.project_id / "input").resolve(),
344
+ strict=strict,
345
+ extractor=extractor,
346
+ )
347
+ if phases:
348
+ for phase in phases:
349
+ run_single_phase(cfg, phase)
350
+ else:
351
+ from hoard.cli.run import run_pipeline
352
+ run_pipeline(cfg)
353
+
354
+ def _run_libby(self, step: PipelineStep) -> None:
355
+ """Execute Libby radiocarbon calibration."""
356
+ args = step.tool_args
357
+ input_path = args.get("input", "") or str(self.workspace / self.project_id / "01_digitised" / "samples.json")
358
+ output_dir = str(self.workspace / self.project_id / "03_draft")
359
+
360
+ import shutil
361
+ libby_bin = shutil.which("libby")
362
+ if libby_bin:
363
+ _run_subprocess([
364
+ libby_bin, "calibrate",
365
+ "--input", input_path,
366
+ "--output", output_dir,
367
+ ], step.id)
368
+ else:
369
+ raise RuntimeError(
370
+ "Libby not installed. Install with: pip install libby\n"
371
+ f" Or manually calibrate: {input_path}"
372
+ )
373
+
374
+ def _run_dibble(self, step: PipelineStep) -> None:
375
+ """Execute Dibble lithic analysis."""
376
+ args = step.tool_args
377
+ input_dir = args.get("input", "./scans")
378
+ output_dir = args.get("output", "") or str(self.workspace / self.project_id / "02_spatial" / "lithics")
379
+
380
+ import shutil
381
+ dibble_bin = shutil.which("dibble")
382
+ if dibble_bin:
383
+ _run_subprocess([
384
+ dibble_bin, "process",
385
+ "--input", input_dir,
386
+ "--output", output_dir,
387
+ ], step.id)
388
+ else:
389
+ raise RuntimeError(
390
+ "Dibble not installed. Install with: pip install dibble\n"
391
+ " Or skip lithic analysis by removing the step."
392
+ )
393
+
394
+ def _run_export(self, step: PipelineStep) -> None:
395
+ """Execute HOARD Phase 5 export."""
396
+ args = step.tool_args
397
+ formats = args.get("formats", ["docx", "pdf"])
398
+
399
+ try:
400
+ from hoard.config import load_config
401
+ cfg = load_config(self.project_id, self.workspace)
402
+ if cfg is None:
403
+ raise RuntimeError(f"Project '{self.project_id}' not initialised")
404
+ from hoard.phases.phase5 import run_phase5
405
+ result = run_phase5(cfg, formats=formats)
406
+ export_paths = result.get("export_paths", {})
407
+ if export_paths:
408
+ console = _get_console()
409
+ for name, path in export_paths.items():
410
+ console.print(f" • {name}: {path}")
411
+ except ImportError:
412
+ raise RuntimeError("HOARD not installed. Run: pip install hoard")
413
+
414
+ def _run_command(self, step: PipelineStep) -> None:
415
+ """Execute an arbitrary shell command."""
416
+ cmd_str = step.tool_args.get("command", "")
417
+ if not cmd_str:
418
+ raise ValueError(f"Step '{step.id}' of kind 'command' has no 'command' field")
419
+ import shlex
420
+ cmd_parts = shlex.split(cmd_str)
421
+ # Substitute {project_id} and {workspace}
422
+ cmd_parts = [
423
+ p.replace("{project_id}", self.project_id).replace("{workspace}", str(self.workspace))
424
+ for p in cmd_parts
425
+ ]
426
+ _run_subprocess(cmd_parts, step.id)
427
+
428
+ # ── Review Gates ─────────────────────────────────────────────────────
429
+
430
+ def _execute_gate(self, step: PipelineStep) -> None:
431
+ """Pause execution for human review."""
432
+ console = _get_console()
433
+
434
+ console.print()
435
+ console.print("=" * 60)
436
+ console.print(f"[bold yellow]🔍 REVIEW GATE: {step.id}[/]")
437
+ if step.message:
438
+ console.print(f" {step.message}")
439
+ if step.action:
440
+ console.print(f" [dim]Suggested action:[/] {step.action}")
441
+ console.print("=" * 60)
442
+
443
+ if self.auto:
444
+ console.print(" [yellow](auto mode — proceeding)[/]")
445
+ step.status = StepStatus.COMPLETE
446
+ return
447
+
448
+ while True:
449
+ try:
450
+ response = input(" Continue? [Y]es / [s]kip / [q]uit: ").strip().lower()
451
+ except (EOFError, KeyboardInterrupt):
452
+ print()
453
+ console.print("[red]✗ Pipeline interrupted[/]")
454
+ step.status = StepStatus.PENDING
455
+ self._save_state()
456
+ sys.exit(1)
457
+
458
+ if response in ("", "y", "yes"):
459
+ step.status = StepStatus.COMPLETE
460
+ console.print(" [green]✓[/] Proceeding...")
461
+ break
462
+ elif response in ("s", "skip"):
463
+ step.status = StepStatus.SKIPPED
464
+ console.print(" [yellow]→[/] Gate skipped")
465
+ break
466
+ elif response in ("q", "quit"):
467
+ console.print(" [red]✗ Pipeline paused — resume with 'heritage run --pipeline ...'[/]")
468
+ step.status = StepStatus.PENDING
469
+ self._save_state()
470
+ sys.exit(0)
471
+
472
+ # ── Reporting ────────────────────────────────────────────────────────
473
+
474
+ def status_report(self) -> str:
475
+ """Generate a human-readable status report."""
476
+ lines = [
477
+ f"Pipeline: {self.pipeline_path}",
478
+ f"Project: {self.project_id}",
479
+ f"State: {self.state_file}",
480
+ "",
481
+ "Steps:",
482
+ ]
483
+ for step in self.steps:
484
+ icon = {"pending": "○", "running": "→", "complete": "✓", "skipped": "−", "failed": "✗", "blocked": "⊘"}
485
+ marker = icon.get(step.status.value, "?")
486
+ lines.append(f" {marker} {step.id}: {step.status.value}")
487
+ if step.error:
488
+ lines.append(f" error: {step.error}")
489
+ return "\n".join(lines)
490
+
491
+
492
+ # ── Helpers ────────────────────────────────────────────────────────────────────
493
+
494
+
495
+ def _run_subprocess(cmd: list[str], step_id: str) -> None:
496
+ """Run a subprocess with passthrough stdout/stderr."""
497
+ console = _get_console()
498
+ console.print(f" $ {' '.join(cmd)}")
499
+ result = subprocess.run(cmd)
500
+ if result.returncode != 0:
501
+ raise RuntimeError(f"Step '{step_id}' exited with code {result.returncode}")
502
+
503
+
504
+ def _get_console():
505
+ """Get or create a Rich Console instance."""
506
+ from rich.console import Console
507
+ return Console()
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: heritage-cli
3
+ Version: 1.0.0
4
+ Summary: Unified CLI for the heritage science open-source ecosystem — orchestrates HOARD, StratiGraph, Trowel, Libby, Dibble, and other tools
5
+ Author: Marcus Quinn
6
+ License: MIT
7
+ Requires-Python: >=3.11
8
+ Requires-Dist: typer>=0.12
9
+ Requires-Dist: rich>=13
10
+ Requires-Dist: platformdirs>=4
11
+ Requires-Dist: tomli>=2; python_version < "3.11"
@@ -0,0 +1,10 @@
1
+ heritage_cli/__init__.py,sha256=hqEfPIisES2devRHemr6MAZg-anABvhD3V6ZBCqHL_M,758
2
+ heritage_cli/main.py,sha256=vnu2VCWnnreB0V_Vg2eeKBtpXyzPOHv3rqt8_ceLrA8,16000
3
+ heritage_cli/orchestrator.py,sha256=Ru9uZrXqRglgtNgyV5FqwKHKhsLxcwbgo5q1O-_6ToY,19895
4
+ heritage_cli/commands/__init__.py,sha256=IIsQBPZxOllXFD85bsItdor-R2RUVLBEis3IzN-zJQM,215
5
+ heritage_cli/commands/hoard.py,sha256=rFSEZSeVQfMTN0BzmWrd_472guwxeADJnKgr6oJRubA,496
6
+ heritage_cli-1.0.0.dist-info/METADATA,sha256=aFeU_SCjG5FSrM4vxcD3vafyJ0wuPYAN45i_QEnsrmc,389
7
+ heritage_cli-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
8
+ heritage_cli-1.0.0.dist-info/entry_points.txt,sha256=xfD1N_Huyz7HJbdwhoF6470KEEwexyLvziF5QWJspTM,265
9
+ heritage_cli-1.0.0.dist-info/top_level.txt,sha256=HDowMMpJEJf_zdm0IvpxhYOwS9WDw12BUs0lX8D53lA,13
10
+ heritage_cli-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,9 @@
1
+ [console_scripts]
2
+ heritage = heritage_cli.main:app
3
+
4
+ [heritage.tools]
5
+ dibble = heritage_cli.commands.dibble
6
+ hoard = heritage_cli.commands.hoard
7
+ libby = heritage_cli.commands.libby
8
+ stratigraph = heritage_cli.commands.stratigraph
9
+ trowel = heritage_cli.commands.trowel
@@ -0,0 +1 @@
1
+ heritage_cli