calkit-python 0.41.0__py3-none-any.whl → 0.41.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- calkit/cli/core.py +8 -3
- calkit/cli/main/core.py +296 -14
- calkit/cli/scheduler.py +293 -63
- calkit/core.py +18 -0
- calkit/dependencies.py +3 -11
- calkit/dvc/core.py +42 -0
- calkit/models/pipeline.py +16 -0
- calkit/pipeline.py +100 -0
- calkit/tests/cli/main/test_core.py +424 -0
- calkit/tests/cli/test_scheduler.py +163 -0
- calkit/tests/test_pipeline.py +133 -4
- {calkit_python-0.41.0.dist-info → calkit_python-0.41.2.dist-info}/METADATA +1 -1
- {calkit_python-0.41.0.dist-info → calkit_python-0.41.2.dist-info}/RECORD +31 -30
- {calkit_python-0.41.0.data → calkit_python-0.41.2.data}/data/etc/jupyter/jupyter_server_config.d/calkit.json +0 -0
- {calkit_python-0.41.0.data → calkit_python-0.41.2.data}/data/share/jupyter/labextensions/calkit/install.json +0 -0
- {calkit_python-0.41.0.data → calkit_python-0.41.2.data}/data/share/jupyter/labextensions/calkit/package.json +0 -0
- {calkit_python-0.41.0.data → calkit_python-0.41.2.data}/data/share/jupyter/labextensions/calkit/schemas/calkit/package.json.orig +0 -0
- {calkit_python-0.41.0.data → calkit_python-0.41.2.data}/data/share/jupyter/labextensions/calkit/schemas/calkit/plugin.json +0 -0
- {calkit_python-0.41.0.data → calkit_python-0.41.2.data}/data/share/jupyter/labextensions/calkit/static/502.9a2c5772a15466e923ef.js +0 -0
- {calkit_python-0.41.0.data → calkit_python-0.41.2.data}/data/share/jupyter/labextensions/calkit/static/695.2c41003a452d43d2b358.js +0 -0
- {calkit_python-0.41.0.data → calkit_python-0.41.2.data}/data/share/jupyter/labextensions/calkit/static/867.a42a046aa5108f54f8fb.js +0 -0
- {calkit_python-0.41.0.data → calkit_python-0.41.2.data}/data/share/jupyter/labextensions/calkit/static/909.e3f9cc3408834a7fdcc3.js +0 -0
- {calkit_python-0.41.0.data → calkit_python-0.41.2.data}/data/share/jupyter/labextensions/calkit/static/946.050af2abf7845cfbdbd2.js +0 -0
- {calkit_python-0.41.0.data → calkit_python-0.41.2.data}/data/share/jupyter/labextensions/calkit/static/946.050af2abf7845cfbdbd2.js.LICENSE.txt +0 -0
- {calkit_python-0.41.0.data → calkit_python-0.41.2.data}/data/share/jupyter/labextensions/calkit/static/b2f1c3efe70cb539d121.png +0 -0
- {calkit_python-0.41.0.data → calkit_python-0.41.2.data}/data/share/jupyter/labextensions/calkit/static/remoteEntry.65469af996e7a96aa983.js +0 -0
- {calkit_python-0.41.0.data → calkit_python-0.41.2.data}/data/share/jupyter/labextensions/calkit/static/style.js +0 -0
- {calkit_python-0.41.0.data → calkit_python-0.41.2.data}/data/share/jupyter/labextensions/calkit/static/third-party-licenses.json +0 -0
- {calkit_python-0.41.0.dist-info → calkit_python-0.41.2.dist-info}/WHEEL +0 -0
- {calkit_python-0.41.0.dist-info → calkit_python-0.41.2.dist-info}/entry_points.txt +0 -0
- {calkit_python-0.41.0.dist-info → calkit_python-0.41.2.dist-info}/licenses/LICENSE +0 -0
calkit/cli/core.py
CHANGED
|
@@ -32,11 +32,16 @@ class AliasGroup(TyperGroup):
|
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
def complete_stage_names(
|
|
35
|
-
ctx: "click.Context",
|
|
36
|
-
param: "click.Parameter",
|
|
35
|
+
ctx: "click.Context | None",
|
|
36
|
+
param: "click.Parameter | None",
|
|
37
37
|
incomplete: str,
|
|
38
38
|
) -> list:
|
|
39
|
-
"""Return pipeline stage names for shell tab-completion.
|
|
39
|
+
"""Return pipeline stage names for shell tab-completion.
|
|
40
|
+
|
|
41
|
+
``ctx`` and ``param`` are part of the Click completion-callback signature
|
|
42
|
+
but are unused here, so they accept ``None`` (e.g. when called directly in
|
|
43
|
+
tests).
|
|
44
|
+
"""
|
|
40
45
|
if not os.path.isfile("calkit.yaml"):
|
|
41
46
|
return []
|
|
42
47
|
try:
|
calkit/cli/main/core.py
CHANGED
|
@@ -421,7 +421,15 @@ def get_status(
|
|
|
421
421
|
else:
|
|
422
422
|
categories = valid_categories
|
|
423
423
|
pipeline_status = None
|
|
424
|
+
running_status = None
|
|
424
425
|
if "pipeline" in categories or "dvc" in categories:
|
|
426
|
+
# If a run is in progress it holds the DVC lock, so computing the full
|
|
427
|
+
# status would just error out (and would mutate files like dvc.yaml or
|
|
428
|
+
# notebooks mid-run). Report the running stage from the run log instead.
|
|
429
|
+
running_status = _get_running_pipeline_status()
|
|
430
|
+
if running_status is None and (
|
|
431
|
+
"pipeline" in categories or "dvc" in categories
|
|
432
|
+
):
|
|
425
433
|
# Sync zips so the zip files reflect current workspace state before
|
|
426
434
|
# reporting status
|
|
427
435
|
calkit.dvc.zip.sync_all(direction="to-zip")
|
|
@@ -501,7 +509,9 @@ def get_status(
|
|
|
501
509
|
except Exception as e:
|
|
502
510
|
status_dict["dvc"] = {"error": f"{e.__class__.__name__}: {e}"}
|
|
503
511
|
if "pipeline" in categories or "dvc" in categories:
|
|
504
|
-
if
|
|
512
|
+
if running_status is not None:
|
|
513
|
+
status_dict["pipeline"] = running_status
|
|
514
|
+
elif pipeline_status is None:
|
|
505
515
|
status_dict["pipeline"] = None
|
|
506
516
|
else:
|
|
507
517
|
status_dict["pipeline"] = pipeline_status.model_dump(
|
|
@@ -550,6 +560,9 @@ def get_status(
|
|
|
550
560
|
typer.echo(_format_dvc_data_status(raw, zip_path_map))
|
|
551
561
|
if "pipeline" in categories or "dvc" in categories:
|
|
552
562
|
print_sep("Pipeline")
|
|
563
|
+
if running_status is not None:
|
|
564
|
+
_print_running_pipeline_status(running_status)
|
|
565
|
+
return
|
|
553
566
|
# Nicely format the results from pipeline status
|
|
554
567
|
if pipeline_status and pipeline_status.errors:
|
|
555
568
|
warn("Pipeline status unavailable due to errors:")
|
|
@@ -1304,11 +1317,12 @@ def _stage_run_info_from_log_content(log_content: str) -> dict:
|
|
|
1304
1317
|
# If we were already running a stage, add its end time
|
|
1305
1318
|
add_stage_info(current_stage_name, "end_time", timestamp)
|
|
1306
1319
|
add_stage_info(current_stage_name, "status", "completed")
|
|
1307
|
-
# This is a stage run
|
|
1320
|
+
# This is a stage run. The line is ``Running stage '<name>':``;
|
|
1321
|
+
# strip only the trailing ``:`` delimiter and surrounding quotes
|
|
1322
|
+
# so colons inside the name (e.g. ``sub1/dvc.yaml:stage-a`` for an
|
|
1323
|
+
# inline subproject target) are preserved.
|
|
1308
1324
|
current_stage_name = (
|
|
1309
|
-
message.removeprefix("Running stage ")
|
|
1310
|
-
.replace("'", "")
|
|
1311
|
-
.replace(":", "")
|
|
1325
|
+
message.removeprefix("Running stage ").rstrip(":").strip("'")
|
|
1312
1326
|
)
|
|
1313
1327
|
current_stage_status = "running"
|
|
1314
1328
|
add_stage_info(current_stage_name, "start_time", timestamp)
|
|
@@ -1344,6 +1358,239 @@ def _stage_run_info_from_log_content(log_content: str) -> dict:
|
|
|
1344
1358
|
return res
|
|
1345
1359
|
|
|
1346
1360
|
|
|
1361
|
+
def _prune_run_logs(
|
|
1362
|
+
logs_dir: str, keep: int = 10, protect: str | None = None
|
|
1363
|
+
) -> None:
|
|
1364
|
+
"""Keep only the most recent ``keep`` run logs in ``logs_dir``.
|
|
1365
|
+
|
|
1366
|
+
Run logs are named by their start timestamp, so sorting by name orders
|
|
1367
|
+
them by time; the oldest beyond ``keep`` are removed so the private log
|
|
1368
|
+
directory doesn't grow without bound. ``protect`` (the active run's log
|
|
1369
|
+
filename) is never deleted, guarding against clock skew or odd names that
|
|
1370
|
+
could otherwise sort the live log into the prune set.
|
|
1371
|
+
"""
|
|
1372
|
+
if not os.path.isdir(logs_dir):
|
|
1373
|
+
return
|
|
1374
|
+
logs = sorted(f for f in os.listdir(logs_dir) if f.endswith(".log"))
|
|
1375
|
+
for fname in logs[:-keep]:
|
|
1376
|
+
if fname == protect:
|
|
1377
|
+
continue
|
|
1378
|
+
try:
|
|
1379
|
+
os.remove(os.path.join(logs_dir, fname))
|
|
1380
|
+
except OSError:
|
|
1381
|
+
pass
|
|
1382
|
+
|
|
1383
|
+
|
|
1384
|
+
def _get_latest_run_log_content() -> str | None:
|
|
1385
|
+
"""Return the contents of the most recent run log, or ``None``.
|
|
1386
|
+
|
|
1387
|
+
Looks in the private ``.calkit/local/logs`` directory (always written)
|
|
1388
|
+
and the tracked ``.calkit/logs`` directory, choosing the latest ``.log``
|
|
1389
|
+
file across both by name (logs are named by start timestamp).
|
|
1390
|
+
"""
|
|
1391
|
+
candidates = []
|
|
1392
|
+
for d in [
|
|
1393
|
+
os.path.join(".calkit", "local", "logs"),
|
|
1394
|
+
os.path.join(".calkit", "logs"),
|
|
1395
|
+
]:
|
|
1396
|
+
if os.path.isdir(d):
|
|
1397
|
+
candidates += [
|
|
1398
|
+
os.path.join(d, f) for f in os.listdir(d) if f.endswith(".log")
|
|
1399
|
+
]
|
|
1400
|
+
if not candidates:
|
|
1401
|
+
return None
|
|
1402
|
+
latest = max(candidates, key=os.path.basename)
|
|
1403
|
+
try:
|
|
1404
|
+
with open(latest) as f:
|
|
1405
|
+
return f.read()
|
|
1406
|
+
except OSError:
|
|
1407
|
+
return None
|
|
1408
|
+
|
|
1409
|
+
|
|
1410
|
+
def _format_run_elapsed(start_iso: str) -> str:
|
|
1411
|
+
"""Format the time elapsed since ``start_iso`` (a UTC ISO timestamp)."""
|
|
1412
|
+
try:
|
|
1413
|
+
start = datetime.fromisoformat(start_iso)
|
|
1414
|
+
except ValueError:
|
|
1415
|
+
return "?"
|
|
1416
|
+
total = int((calkit.utcnow(remove_tz=True) - start).total_seconds())
|
|
1417
|
+
total = max(total, 0)
|
|
1418
|
+
hours, rem = divmod(total, 3600)
|
|
1419
|
+
minutes, seconds = divmod(rem, 60)
|
|
1420
|
+
if hours:
|
|
1421
|
+
return f"{hours}h{minutes}m{seconds}s"
|
|
1422
|
+
if minutes:
|
|
1423
|
+
return f"{minutes}m{seconds}s"
|
|
1424
|
+
return f"{seconds}s"
|
|
1425
|
+
|
|
1426
|
+
|
|
1427
|
+
def _stage_target_from_cmd(cmd: str) -> str | None:
|
|
1428
|
+
"""Extract a DVC stage target from a ``dvc repro`` command string.
|
|
1429
|
+
|
|
1430
|
+
Concurrent scheduler items (an ``iterate_over`` sweep) run as separate
|
|
1431
|
+
``dvc repro --single-item <stage>`` processes whose stage name is the
|
|
1432
|
+
trailing positional argument recorded in the DVC lock. This lets the
|
|
1433
|
+
sweep items be named even before the main run log exists. Returns
|
|
1434
|
+
``None`` for commands without an explicit target (e.g. a full-pipeline
|
|
1435
|
+
``dvc repro`` or the parent ``calkit run``).
|
|
1436
|
+
"""
|
|
1437
|
+
tokens = cmd.split()
|
|
1438
|
+
if "repro" not in tokens:
|
|
1439
|
+
return None
|
|
1440
|
+
after = tokens[tokens.index("repro") + 1 :]
|
|
1441
|
+
targets = [t for t in after if not t.startswith("-")]
|
|
1442
|
+
return targets[-1] if targets else None
|
|
1443
|
+
|
|
1444
|
+
|
|
1445
|
+
def _get_running_pipeline_status() -> dict | None:
|
|
1446
|
+
"""Return live pipeline run progress, or ``None`` if no run is running.
|
|
1447
|
+
|
|
1448
|
+
A run is in progress when a live process holds DVC's rwlock. The most
|
|
1449
|
+
recent run log is then parsed to report which stages have finished and
|
|
1450
|
+
which is currently running. Concurrently-run scheduler items execute in
|
|
1451
|
+
their own processes before the run log exists, so their stage names are
|
|
1452
|
+
also recovered from the lock's command strings.
|
|
1453
|
+
"""
|
|
1454
|
+
processes = calkit.dvc.get_running_pipeline_processes()
|
|
1455
|
+
if not processes:
|
|
1456
|
+
return None
|
|
1457
|
+
# Stage targets in the lock commands identify concurrently-run scheduler
|
|
1458
|
+
# items (an iterate_over sweep). These run in their own processes during a
|
|
1459
|
+
# prepass, before the current run's log exists, so the latest log on disk
|
|
1460
|
+
# is from a previous run; report only the lock's items in that case to
|
|
1461
|
+
# avoid mixing in stale stages.
|
|
1462
|
+
concurrent_stages: list[str] = []
|
|
1463
|
+
for proc in processes:
|
|
1464
|
+
target = _stage_target_from_cmd(proc.get("cmd", ""))
|
|
1465
|
+
if target is not None and target not in concurrent_stages:
|
|
1466
|
+
concurrent_stages.append(target)
|
|
1467
|
+
if concurrent_stages:
|
|
1468
|
+
return {
|
|
1469
|
+
"running": True,
|
|
1470
|
+
"processes": processes,
|
|
1471
|
+
"stages": {},
|
|
1472
|
+
"running_stages": concurrent_stages,
|
|
1473
|
+
}
|
|
1474
|
+
content = _get_latest_run_log_content()
|
|
1475
|
+
stages = (
|
|
1476
|
+
_stage_run_info_from_log_content(content)
|
|
1477
|
+
if content is not None
|
|
1478
|
+
else {}
|
|
1479
|
+
)
|
|
1480
|
+
running_stages = [
|
|
1481
|
+
name for name, info in stages.items() if "status" not in info
|
|
1482
|
+
]
|
|
1483
|
+
return {
|
|
1484
|
+
"running": True,
|
|
1485
|
+
"processes": processes,
|
|
1486
|
+
"stages": stages,
|
|
1487
|
+
"running_stages": running_stages,
|
|
1488
|
+
}
|
|
1489
|
+
|
|
1490
|
+
|
|
1491
|
+
def _print_running_pipeline_status(running_status: dict) -> None:
|
|
1492
|
+
"""Print a human-readable summary of an in-progress pipeline run."""
|
|
1493
|
+
processes = running_status["processes"]
|
|
1494
|
+
pids = ", ".join(str(p["pid"]) for p in processes)
|
|
1495
|
+
typer.echo(f"Run in progress (PID {pids})")
|
|
1496
|
+
stages = running_status["stages"]
|
|
1497
|
+
finished = [
|
|
1498
|
+
name
|
|
1499
|
+
for name, info in stages.items()
|
|
1500
|
+
if info.get("status") in ("completed", "skipped")
|
|
1501
|
+
]
|
|
1502
|
+
running_stages = running_status["running_stages"]
|
|
1503
|
+
if finished:
|
|
1504
|
+
typer.echo(f" completed: {', '.join(finished)}")
|
|
1505
|
+
if running_stages:
|
|
1506
|
+
for name in running_stages:
|
|
1507
|
+
start = stages.get(name, {}).get("start_time")
|
|
1508
|
+
label = typer.style(name, fg="green")
|
|
1509
|
+
if start:
|
|
1510
|
+
typer.echo(
|
|
1511
|
+
f" running: {label} "
|
|
1512
|
+
f"({_format_run_elapsed(start)})"
|
|
1513
|
+
)
|
|
1514
|
+
else:
|
|
1515
|
+
typer.echo(f" running: {label}")
|
|
1516
|
+
elif not finished:
|
|
1517
|
+
typer.echo(" starting up...")
|
|
1518
|
+
|
|
1519
|
+
|
|
1520
|
+
def _concurrent_scheduler_prepass(
|
|
1521
|
+
ck_info: dict,
|
|
1522
|
+
targets: list[str],
|
|
1523
|
+
keep_going: bool,
|
|
1524
|
+
quiet: bool,
|
|
1525
|
+
) -> None:
|
|
1526
|
+
"""Submit iterated scheduler-stage jobs concurrently before ``dvc repro``.
|
|
1527
|
+
|
|
1528
|
+
DVC's ``repro`` runs matrix items serially, so each scheduler item would
|
|
1529
|
+
otherwise submit-and-wait one at a time. Here we build each eligible
|
|
1530
|
+
stage's upstreams (serially, to avoid an rwlock race), then fan its items
|
|
1531
|
+
out as concurrent ``dvc repro --single-item`` processes---all at once,
|
|
1532
|
+
leaving it to the cluster's scheduler to queue them. The trailing
|
|
1533
|
+
``dvc repro`` then sees the items as up-to-date and records them,
|
|
1534
|
+
preserving granular per-item caching so a failed sweep resumes only the
|
|
1535
|
+
failed items. Not used under --force (the caller runs those serially).
|
|
1536
|
+
"""
|
|
1537
|
+
import sys
|
|
1538
|
+
|
|
1539
|
+
import calkit.pipeline
|
|
1540
|
+
|
|
1541
|
+
eligible = calkit.pipeline.get_concurrent_scheduler_stages(ck_info)
|
|
1542
|
+
if targets:
|
|
1543
|
+
eligible = [name for name in eligible if name in targets]
|
|
1544
|
+
if not eligible:
|
|
1545
|
+
return
|
|
1546
|
+
for stage_name in eligible:
|
|
1547
|
+
item_targets, upstream_targets = (
|
|
1548
|
+
calkit.pipeline.get_matrix_item_targets(stage_name)
|
|
1549
|
+
)
|
|
1550
|
+
if not item_targets:
|
|
1551
|
+
continue
|
|
1552
|
+
# Each item holds a local polling process for the job's lifetime, so
|
|
1553
|
+
# cap the fan-out to avoid exhausting local resources; a sweep larger
|
|
1554
|
+
# than this should be split into multiple runs.
|
|
1555
|
+
max_jobs = 100
|
|
1556
|
+
if len(item_targets) > max_jobs:
|
|
1557
|
+
raise_error(
|
|
1558
|
+
f"Stage '{stage_name}' would submit {len(item_targets)} jobs "
|
|
1559
|
+
f"at once, exceeding the limit of {max_jobs}. Each concurrent "
|
|
1560
|
+
"submission holds a local process, so reduce the sweep size "
|
|
1561
|
+
"or split it across multiple runs."
|
|
1562
|
+
)
|
|
1563
|
+
if not quiet:
|
|
1564
|
+
calkit.echo(
|
|
1565
|
+
f"🧵 Submitting {len(item_targets)} '{stage_name}' jobs"
|
|
1566
|
+
)
|
|
1567
|
+
# Build shared upstreams first; if two items raced to build the same
|
|
1568
|
+
# stale dependency, one would fail with an rwlock "busy" error. Go
|
|
1569
|
+
# through `calkit dvc` so the ck:// remote scheme is registered.
|
|
1570
|
+
if upstream_targets:
|
|
1571
|
+
up_cmd = [sys.executable, "-m", "calkit", "dvc", "repro"]
|
|
1572
|
+
up_cmd += upstream_targets
|
|
1573
|
+
if subprocess.run(up_cmd).returncode != 0:
|
|
1574
|
+
raise_error(
|
|
1575
|
+
f"Failed to build dependencies for stage '{stage_name}'"
|
|
1576
|
+
)
|
|
1577
|
+
results = calkit.pipeline.reproduce_targets_concurrently(
|
|
1578
|
+
item_targets, max_workers=len(item_targets)
|
|
1579
|
+
)
|
|
1580
|
+
failed = [t for t, rc in results.items() if rc != 0]
|
|
1581
|
+
if failed:
|
|
1582
|
+
msg = (
|
|
1583
|
+
f"{len(failed)} of {len(item_targets)} '{stage_name}' jobs "
|
|
1584
|
+
f"failed: {', '.join(failed)}"
|
|
1585
|
+
)
|
|
1586
|
+
if keep_going:
|
|
1587
|
+
warn(msg)
|
|
1588
|
+
else:
|
|
1589
|
+
raise_error(
|
|
1590
|
+
msg + ". Successful jobs are cached; rerun to resume."
|
|
1591
|
+
)
|
|
1592
|
+
|
|
1593
|
+
|
|
1347
1594
|
@app.command(name="run")
|
|
1348
1595
|
def run(
|
|
1349
1596
|
targets: Annotated[
|
|
@@ -1767,24 +2014,48 @@ def run(
|
|
|
1767
2014
|
if downstream is not None:
|
|
1768
2015
|
args.append("--downstream")
|
|
1769
2016
|
args += downstream
|
|
2017
|
+
# Pre-submit iterated scheduler-stage jobs concurrently; the main repro
|
|
2018
|
+
# below then records them. Skipped for --dry so the dry plan stays intact,
|
|
2019
|
+
# and for --force, which re-runs every item: those go serially through the
|
|
2020
|
+
# main repro so we don't both pre-run and re-run each job. Also skipped
|
|
2021
|
+
# when a selector narrows the run (--downstream/--pipeline/--recursive/
|
|
2022
|
+
# --glob/--all-pipelines): positional ``targets`` is empty then, so the
|
|
2023
|
+
# prepass can't tell which sweeps will actually run and would otherwise
|
|
2024
|
+
# submit all of them.
|
|
2025
|
+
run_is_narrowed = bool(
|
|
2026
|
+
downstream or pipeline or recursive or glob or all_pipelines
|
|
2027
|
+
)
|
|
2028
|
+
if dvc_stages and not dry and not force and not run_is_narrowed:
|
|
2029
|
+
_concurrent_scheduler_prepass(
|
|
2030
|
+
ck_info=ck_info,
|
|
2031
|
+
targets=targets,
|
|
2032
|
+
keep_going=keep_going,
|
|
2033
|
+
quiet=quiet,
|
|
2034
|
+
)
|
|
1770
2035
|
start_time_no_tz = calkit.utcnow(remove_tz=True)
|
|
1771
2036
|
start_time = calkit.utcnow(remove_tz=False)
|
|
1772
2037
|
run_id = uuid.uuid4().hex
|
|
1773
|
-
|
|
1774
|
-
log_fpath = os.path.join(
|
|
1775
|
-
".calkit",
|
|
1776
|
-
"logs",
|
|
2038
|
+
log_fname = (
|
|
1777
2039
|
start_time_no_tz.isoformat(timespec="seconds").replace(":", "-")
|
|
1778
2040
|
+ "-"
|
|
1779
2041
|
+ run_id
|
|
1780
|
-
+ ".log"
|
|
2042
|
+
+ ".log"
|
|
1781
2043
|
)
|
|
2044
|
+
# Always write the run log under the gitignored .calkit/local/logs so
|
|
2045
|
+
# `calkit status` can report which stage is running while the pipeline
|
|
2046
|
+
# holds the DVC lock. With --log, the log is additionally saved to the
|
|
2047
|
+
# tracked .calkit/logs directory along with run information.
|
|
2048
|
+
local_logs_dir = os.path.join(calkit.ensure_local_dir(), "logs")
|
|
2049
|
+
os.makedirs(local_logs_dir, exist_ok=True)
|
|
2050
|
+
log_fpath = os.path.join(local_logs_dir, log_fname)
|
|
1782
2051
|
if verbose:
|
|
1783
2052
|
typer.echo(f"Starting run ID: {run_id}")
|
|
1784
2053
|
typer.echo(f"Saving logs to {log_fpath}")
|
|
1785
|
-
os.makedirs(os.path.dirname(log_fpath), exist_ok=True)
|
|
1786
2054
|
# Create a file handler for dvc.stage.run logger
|
|
1787
2055
|
file_handler = logging.FileHandler(log_fpath, mode="w")
|
|
2056
|
+
# Keep the private log directory bounded; the new log counts toward the
|
|
2057
|
+
# cap and is protected so it can never be pruned out from under this run.
|
|
2058
|
+
_prune_run_logs(local_logs_dir, keep=10, protect=log_fname)
|
|
1788
2059
|
file_handler.setLevel(logging.DEBUG)
|
|
1789
2060
|
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
|
1790
2061
|
formatter.converter = time.gmtime # Use UTC time for asctime
|
|
@@ -1794,7 +2065,14 @@ def run(
|
|
|
1794
2065
|
dvc.repo.reproduce.logger.setLevel(logging.ERROR)
|
|
1795
2066
|
# Disable other misc DVC output
|
|
1796
2067
|
dvc.ui.ui.write = lambda *args, **kwargs: None
|
|
1797
|
-
|
|
2068
|
+
# Tell `calkit scheduler batch` to resubmit completed jobs under --force;
|
|
2069
|
+
# otherwise it skips jobs it sees as already done.
|
|
2070
|
+
if force:
|
|
2071
|
+
os.environ["CALKIT_FORCE"] = "1"
|
|
2072
|
+
try:
|
|
2073
|
+
res = dvc_cli_main(["repro"] + args)
|
|
2074
|
+
finally:
|
|
2075
|
+
os.environ.pop("CALKIT_FORCE", None)
|
|
1798
2076
|
failed = failed or res != 0
|
|
1799
2077
|
# Parse log to get timing and which stages ran
|
|
1800
2078
|
with open(log_fpath, "r") as f:
|
|
@@ -1877,8 +2155,12 @@ def run(
|
|
|
1877
2155
|
os.makedirs(os.path.dirname(run_info_fpath), exist_ok=True)
|
|
1878
2156
|
with open(run_info_fpath, "w") as f:
|
|
1879
2157
|
json.dump(run_info, f, indent=2)
|
|
1880
|
-
|
|
1881
|
-
os.
|
|
2158
|
+
# Also keep the raw log in the tracked .calkit/logs directory
|
|
2159
|
+
saved_log_fpath = os.path.join(".calkit", "logs", log_fname)
|
|
2160
|
+
os.makedirs(os.path.dirname(saved_log_fpath), exist_ok=True)
|
|
2161
|
+
shutil.copy2(log_fpath, saved_log_fpath)
|
|
2162
|
+
# The private log under .calkit/local/logs is retained either way so the
|
|
2163
|
+
# last run's status stays inspectable; it is gitignored.
|
|
1882
2164
|
os.environ.pop("CALKIT_PIPELINE_RUNNING", None)
|
|
1883
2165
|
if failed:
|
|
1884
2166
|
raise_error("Pipeline failed")
|