@hallucination-studio/harness-engine 1.0.0-beta.8.87407 → 1.0.0-beta.9.bb2cd30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
3
  import json
4
+ import os
4
5
  import subprocess
5
6
  import sys
6
7
  import tempfile
@@ -67,6 +68,10 @@ def test_empty_repo_init(tmp_root):
67
68
  raise AssertionError("Analysis should report missing exec-plan state")
68
69
  if not analysis["missing_sops"]:
69
70
  raise AssertionError("Analysis should report missing SOPs")
71
+ nested_output = tmp_root / "nested" / "generated" / "analysis.json"
72
+ run_manager("analyze", "--repo", str(repo), "--output", str(nested_output))
73
+ if not nested_output.exists():
74
+ raise AssertionError("analyze --output should create missing parent directories")
70
75
 
71
76
  run_manager("init", "--repo", str(repo), "--answers", str(answers))
72
77
  for relative_path in [
@@ -74,12 +79,14 @@ def test_empty_repo_init(tmp_root):
74
79
  "ARCHITECTURE.md",
75
80
  "docs/PLANS.md",
76
81
  "docs/QUALITY_SCORE.md",
82
+ "docs/exec-plans/workstreams.md",
77
83
  "docs/exec-plans/active/_template.md",
78
84
  "docs/exec-plans/completed/README.md",
79
85
  "docs/sops/encode-unseen-knowledge.md",
80
86
  ]:
81
87
  assert_exists(repo, relative_path)
82
88
  assert_contains(repo, "AGENTS.md", "docs/exec-plans/active/")
89
+ assert_contains(repo, "AGENTS.md", "docs/exec-plans/workstreams.md")
83
90
  assert_contains(repo, "AGENTS.md", "docs/sops/")
84
91
  assert_contains(repo, "AGENTS.md", ".codex/skills/harness-repo-bootstrap/scripts/manage_harness.py check")
85
92
 
@@ -185,6 +192,69 @@ def test_closed_loop_plan(tmp_root):
185
192
  "--append",
186
193
  )
187
194
  assert_contains(repo, "docs/PRODUCT_SENSE.md", fact)
195
+ run_manager(
196
+ "plan-close",
197
+ "--repo",
198
+ str(repo),
199
+ "--plan",
200
+ relative_plan,
201
+ "--summary",
202
+ "done",
203
+ expect_success=False,
204
+ )
205
+ failing_score = run_manager(
206
+ "quality-score",
207
+ "--repo",
208
+ str(repo),
209
+ "--plan",
210
+ relative_plan,
211
+ "--product-correctness",
212
+ "9",
213
+ "--ux-operator-clarity",
214
+ "8",
215
+ "--architecture-maintainability",
216
+ "7",
217
+ "--reliability-observability",
218
+ "8",
219
+ "--security-data-handling",
220
+ "8",
221
+ "--architecture-note",
222
+ "Plan closure needs a deterministic quality gate before handoff",
223
+ expect_success=False,
224
+ )
225
+ if failing_score["status"] != "fail":
226
+ raise AssertionError("Low dimension score should fail the quality gate")
227
+ plan_text_after_fail = plan_path.read_text()
228
+ if "## Rework Required" not in plan_text_after_fail:
229
+ raise AssertionError("Failing quality score should keep a rework section")
230
+ if "Improve Architecture and maintainability" not in plan_text_after_fail:
231
+ raise AssertionError("Failing quality score should name the low dimension")
232
+ check_after_fail = run_manager("check", "--repo", str(repo), expect_success=False)
233
+ if check_after_fail["status"] != "fail":
234
+ raise AssertionError("Harness check should fail while an active plan has a failed quality gate")
235
+ passing_score = run_manager(
236
+ "quality-score",
237
+ "--repo",
238
+ str(repo),
239
+ "--plan",
240
+ relative_plan,
241
+ "--product-correctness",
242
+ "9",
243
+ "--ux-operator-clarity",
244
+ "8",
245
+ "--architecture-maintainability",
246
+ "8",
247
+ "--reliability-observability",
248
+ "8",
249
+ "--security-data-handling",
250
+ "8",
251
+ "--product-note",
252
+ "Requested behavior is complete",
253
+ "--architecture-note",
254
+ "Plan closure now has a deterministic quality gate",
255
+ )
256
+ if passing_score["status"] != "pass":
257
+ raise AssertionError("Scores at or above the minimum should pass")
188
258
  close_result = run_manager(
189
259
  "plan-close",
190
260
  "--repo",
@@ -247,6 +317,8 @@ def test_closed_loop_plan(tmp_root):
247
317
  handle.write(
248
318
  "\nThe `main` package owns keyboard input and rendering, while `game` contains pure state transitions.\n"
249
319
  )
320
+ evidence_file = tmp_root / "evidence.txt"
321
+ evidence_file.write_text("main package owns keyboard input and rendering\n")
250
322
  run_manager(
251
323
  "knowledge-mark-written",
252
324
  "--repo",
@@ -255,8 +327,25 @@ def test_closed_loop_plan(tmp_root):
255
327
  id_relative_plan,
256
328
  "--id",
257
329
  log_result["id"],
258
- "--evidence",
259
- "main package owns keyboard input and rendering",
330
+ "--evidence-file",
331
+ str(evidence_file),
332
+ )
333
+ run_manager(
334
+ "quality-score",
335
+ "--repo",
336
+ str(repo),
337
+ "--plan",
338
+ id_relative_plan,
339
+ "--product-correctness",
340
+ "8",
341
+ "--ux-operator-clarity",
342
+ "8",
343
+ "--architecture-maintainability",
344
+ "8",
345
+ "--reliability-observability",
346
+ "8",
347
+ "--security-data-handling",
348
+ "8",
260
349
  )
261
350
  plan_text = id_plan_path.read_text()
262
351
  if id_fact in (repo / "ARCHITECTURE.md").read_text():
@@ -279,6 +368,21 @@ def create_formatted_plan(repo):
279
368
  plan_path.write_text(
280
369
  """# Execution Plan: Formatted Plan
281
370
 
371
+ ## Quality Gate
372
+
373
+ Status: pass
374
+ Minimum score: 8.0
375
+ Average score: 8.0
376
+ Last scored: 2026-06-11T00:00:00Z
377
+
378
+ | Dimension | Score | Notes |
379
+ | --- | ---: | --- |
380
+ | Product correctness | 8.0 | ok |
381
+ | UX and operator clarity | 8.0 | ok |
382
+ | Architecture and maintainability | 8.0 | ok |
383
+ | Reliability and observability | 8.0 | ok |
384
+ | Security and data handling | 8.0 | ok |
385
+
282
386
  ## Durable Knowledge To Capture
283
387
 
284
388
  - [ ] `snake.sh` is the single runtime entrypoint and owns terminal control directly with `stty` and `tput`. -> `ARCHITECTURE.md`
@@ -301,10 +405,357 @@ def test_preserve_unmanaged_docs(tmp_root):
301
405
  assert_exists(repo, "docs/PLANS.md")
302
406
 
303
407
 
408
+ def test_phase_continuity_workstream(tmp_root):
409
+ repo = tmp_root / "phase-repo"
410
+ repo.mkdir()
411
+ answers = tmp_root / "phase-answers.json"
412
+ write_answers(answers, project_name="phase-demo")
413
+ run_manager("init", "--repo", str(repo), "--answers", str(answers))
414
+
415
+ plan_result = run_manager(
416
+ "plan-start",
417
+ "--repo",
418
+ str(repo),
419
+ "--slug",
420
+ "local-workbench-phase-1",
421
+ "--goal",
422
+ "Complete Local Workbench Phase 1",
423
+ )
424
+ plan_path = Path(plan_result["plan"])
425
+ relative_plan = str(plan_path.resolve().relative_to(repo.resolve()))
426
+ run_manager(
427
+ "quality-score",
428
+ "--repo",
429
+ str(repo),
430
+ "--plan",
431
+ relative_plan,
432
+ "--product-correctness",
433
+ "8",
434
+ "--ux-operator-clarity",
435
+ "8",
436
+ "--architecture-maintainability",
437
+ "8",
438
+ "--reliability-observability",
439
+ "8",
440
+ "--security-data-handling",
441
+ "8",
442
+ )
443
+ close_without_continuity = run_manager(
444
+ "plan-close",
445
+ "--repo",
446
+ str(repo),
447
+ "--plan",
448
+ relative_plan,
449
+ "--summary",
450
+ "Phase 1 done",
451
+ expect_success=False,
452
+ )
453
+ if close_without_continuity:
454
+ raise AssertionError("plan-close should not produce JSON when phase continuity blocks closure")
455
+ check_without_continuity = run_manager("check", "--repo", str(repo), expect_success=False)
456
+ issue_codes = {issue["code"] for issue in check_without_continuity["issues"]}
457
+ if "phase-mode-not-declared" not in issue_codes:
458
+ raise AssertionError("check should flag phased plans that do not declare continuation")
459
+
460
+ run_manager(
461
+ "phase-set",
462
+ "--repo",
463
+ str(repo),
464
+ "--plan",
465
+ relative_plan,
466
+ "--mode",
467
+ "multi-phase",
468
+ "--workstream",
469
+ "local-workbench",
470
+ "--current-phase",
471
+ "1",
472
+ "--next-phase",
473
+ "2",
474
+ "--continuation",
475
+ "docs/exec-plans/workstreams.md#local-workbench",
476
+ "--next-action",
477
+ "Create Phase 2 plan for command adapters",
478
+ "--resume-notes",
479
+ "Read completed Phase 1 plan and ARCHITECTURE.md before continuing",
480
+ )
481
+ close_without_workstream = run_manager(
482
+ "plan-close",
483
+ "--repo",
484
+ str(repo),
485
+ "--plan",
486
+ relative_plan,
487
+ "--summary",
488
+ "Phase 1 done",
489
+ expect_success=False,
490
+ )
491
+ if close_without_workstream:
492
+ raise AssertionError("plan-close should not allow a workstreams continuation without a ledger entry")
493
+ run_manager(
494
+ "workstream-upsert",
495
+ "--repo",
496
+ str(repo),
497
+ "--id",
498
+ "local-workbench",
499
+ "--status",
500
+ "active",
501
+ "--current-plan",
502
+ relative_plan,
503
+ "--next-action",
504
+ "Create Phase 2 plan for command adapters",
505
+ "--goal",
506
+ "Refactor local workbench into a maintainable terminal workflow",
507
+ "--resume-notes",
508
+ "Read completed Phase 1 plan and ARCHITECTURE.md before continuing",
509
+ )
510
+ assert_contains(repo, "docs/exec-plans/workstreams.md", "local-workbench")
511
+ assert_contains(repo, "docs/exec-plans/workstreams.md", "Create Phase 2 plan for command adapters")
512
+ close_result = run_manager(
513
+ "plan-close",
514
+ "--repo",
515
+ str(repo),
516
+ "--plan",
517
+ relative_plan,
518
+ "--summary",
519
+ "Phase 1 done; Phase 2 recovery is recorded in workstreams.",
520
+ )
521
+ if close_result["status"] != "closed":
522
+ raise AssertionError("Phased plan should close after continuity and workstream recovery are recorded")
523
+ completed_relative_plan = "docs/exec-plans/completed/" + plan_path.name
524
+ workstreams_text = (repo / "docs/exec-plans/workstreams.md").read_text()
525
+ if completed_relative_plan not in workstreams_text:
526
+ raise AssertionError("plan-close should update workstream ledger to the completed plan path")
527
+ if relative_plan in workstreams_text:
528
+ raise AssertionError("workstream ledger should not keep stale active plan references after plan-close")
529
+ broken = workstreams_text.replace(completed_relative_plan, relative_plan)
530
+ (repo / "docs/exec-plans/workstreams.md").write_text(broken)
531
+ broken_check = run_manager("check", "--repo", str(repo), expect_success=False)
532
+ broken_codes = {issue["code"] for issue in broken_check["issues"]}
533
+ if "missing-workstream-plan-reference" not in broken_codes:
534
+ raise AssertionError("check should fail when workstream ledger points to a missing plan")
535
+
536
+
537
+ def test_plan_path_canonicalization(tmp_root):
538
+ repo = tmp_root / "canonical-repo"
539
+ repo.mkdir()
540
+ answers = tmp_root / "canonical-answers.json"
541
+ write_answers(answers, project_name="canonical-demo")
542
+ run_manager("init", "--repo", str(repo), "--answers", str(answers))
543
+
544
+ plan_result = run_manager(
545
+ "plan-start",
546
+ "--repo",
547
+ str(repo),
548
+ "--slug",
549
+ "canonical-close",
550
+ "--goal",
551
+ "Close a plan when repo and plan paths use different filesystem spellings",
552
+ )
553
+ plan_path = Path(plan_result["plan"])
554
+ relative_plan = str(plan_path.resolve().relative_to(repo.resolve()))
555
+ run_manager(
556
+ "quality-score",
557
+ "--repo",
558
+ str(repo),
559
+ "--plan",
560
+ str(plan_path),
561
+ "--product-correctness",
562
+ "8",
563
+ "--ux-operator-clarity",
564
+ "8",
565
+ "--architecture-maintainability",
566
+ "8",
567
+ "--reliability-observability",
568
+ "8",
569
+ "--security-data-handling",
570
+ "8",
571
+ )
572
+ run_manager(
573
+ "workstream-upsert",
574
+ "--repo",
575
+ str(repo),
576
+ "--id",
577
+ "canonical-close",
578
+ "--status",
579
+ "active",
580
+ "--current-plan",
581
+ relative_plan,
582
+ "--next-action",
583
+ "Close after canonical path validation",
584
+ "--goal",
585
+ "Verify plan-close updates workstreams with normalized relative paths",
586
+ "--resume-notes",
587
+ "No special resume notes",
588
+ )
589
+
590
+ repo_arg = os.path.realpath(repo)
591
+ plan_arg = str(plan_path)
592
+ if repo_arg == str(repo) and plan_arg == str(plan_path.resolve()):
593
+ repo_arg = str(repo)
594
+ plan_arg = str(plan_path.resolve())
595
+
596
+ close_result = run_manager(
597
+ "plan-close",
598
+ "--repo",
599
+ repo_arg,
600
+ "--plan",
601
+ plan_arg,
602
+ "--summary",
603
+ "Closed with canonicalized plan path.",
604
+ )
605
+ if close_result["status"] != "closed":
606
+ raise AssertionError("plan-close should accept absolute plan paths inside the repo")
607
+ completed_relative_plan = "docs/exec-plans/completed/" + plan_path.name
608
+ workstreams_text = (repo / "docs/exec-plans/workstreams.md").read_text()
609
+ if completed_relative_plan not in workstreams_text:
610
+ raise AssertionError("canonicalized plan-close should update last completed plan")
611
+ if relative_plan in workstreams_text:
612
+ raise AssertionError("canonicalized plan-close should remove stale current plan references")
613
+ check_result = run_manager("check", "--repo", str(repo))
614
+ if check_result["status"] != "pass":
615
+ raise AssertionError("canonicalized plan-close should leave harness check passing")
616
+
617
+
618
+ def test_defect_recovery_loop(tmp_root):
619
+ repo = tmp_root / "defect-repo"
620
+ repo.mkdir()
621
+ answers = tmp_root / "defect-answers.json"
622
+ write_answers(answers, project_name="defect-demo")
623
+ run_manager("init", "--repo", str(repo), "--answers", str(answers))
624
+
625
+ plan_result = run_manager(
626
+ "plan-start",
627
+ "--repo",
628
+ str(repo),
629
+ "--slug",
630
+ "snake-tail-collision",
631
+ "--goal",
632
+ "Validate defect recovery when Snake tail-cell collision behavior fails",
633
+ )
634
+ plan_path = Path(plan_result["plan"])
635
+ relative_plan = str(plan_path.resolve().relative_to(repo.resolve()))
636
+ defect_summary = (
637
+ "Snake marks game over when the head moves into the current tail cell during a non-eating tick"
638
+ )
639
+ defect_result = run_manager(
640
+ "defect-log",
641
+ "--repo",
642
+ str(repo),
643
+ "--plan",
644
+ relative_plan,
645
+ "--severity",
646
+ "P1",
647
+ "--summary",
648
+ defect_summary,
649
+ "--evidence",
650
+ "go test ./internal/game -run TestCanMoveIntoVacatedTailCell failed",
651
+ expect_success=False,
652
+ )
653
+ defect_id = defect_result["id"]
654
+ plan_text = plan_path.read_text()
655
+ if "## Defects To Resolve" not in plan_text or defect_id not in plan_text:
656
+ raise AssertionError("defect-log should record the open defect in the plan")
657
+ if "Status: fail" not in plan_text:
658
+ raise AssertionError("defect-log should force the quality gate to fail")
659
+ if "Resolve all open defects" not in plan_text:
660
+ raise AssertionError("defect-log should turn the bug into rework input")
661
+
662
+ score_with_open_defect = run_manager(
663
+ "quality-score",
664
+ "--repo",
665
+ str(repo),
666
+ "--plan",
667
+ relative_plan,
668
+ "--product-correctness",
669
+ "10",
670
+ "--ux-operator-clarity",
671
+ "10",
672
+ "--architecture-maintainability",
673
+ "10",
674
+ "--reliability-observability",
675
+ "10",
676
+ "--security-data-handling",
677
+ "10",
678
+ expect_success=False,
679
+ )
680
+ if score_with_open_defect["status"] != "fail" or defect_id not in score_with_open_defect["open_defects"]:
681
+ raise AssertionError("quality-score should fail while any defect is open")
682
+ check_with_open_defect = run_manager("check", "--repo", str(repo), expect_success=False)
683
+ issue_codes = {issue["code"] for issue in check_with_open_defect["issues"]}
684
+ if "open-defect" not in issue_codes:
685
+ raise AssertionError("check should surface unresolved defects")
686
+ close_with_open_defect = run_manager(
687
+ "plan-close",
688
+ "--repo",
689
+ str(repo),
690
+ "--plan",
691
+ relative_plan,
692
+ "--summary",
693
+ "Should not close with open defects",
694
+ expect_success=False,
695
+ )
696
+ if close_with_open_defect:
697
+ raise AssertionError("plan-close should not close while defects are open")
698
+
699
+ run_manager(
700
+ "defect-resolve",
701
+ "--repo",
702
+ str(repo),
703
+ "--plan",
704
+ relative_plan,
705
+ "--id",
706
+ defect_id,
707
+ "--fix-evidence",
708
+ "go test ./internal/game -run TestCanMoveIntoVacatedTailCell passed",
709
+ )
710
+ plan_text_after_resolve = plan_path.read_text()
711
+ if f"- [x] [bug:{defect_id}]" not in plan_text_after_resolve:
712
+ raise AssertionError("defect-resolve should close the defect checkbox")
713
+ if "Defects resolved. Re-run validation and `quality-score` before closing." not in plan_text_after_resolve:
714
+ raise AssertionError("defect-resolve should require a fresh quality score")
715
+
716
+ passing_score = run_manager(
717
+ "quality-score",
718
+ "--repo",
719
+ str(repo),
720
+ "--plan",
721
+ relative_plan,
722
+ "--product-correctness",
723
+ "9",
724
+ "--ux-operator-clarity",
725
+ "8",
726
+ "--architecture-maintainability",
727
+ "8",
728
+ "--reliability-observability",
729
+ "9",
730
+ "--security-data-handling",
731
+ "10",
732
+ )
733
+ if passing_score["status"] != "pass":
734
+ raise AssertionError("quality-score should pass after defects are resolved")
735
+ close_result = run_manager(
736
+ "plan-close",
737
+ "--repo",
738
+ str(repo),
739
+ "--plan",
740
+ relative_plan,
741
+ "--summary",
742
+ "Closed after defect recovery and fresh quality score.",
743
+ )
744
+ if close_result["status"] != "closed":
745
+ raise AssertionError("plan-close should close after defect recovery")
746
+ completed_plan = repo / "docs" / "exec-plans" / "completed" / plan_path.name
747
+ completed_text = completed_plan.read_text()
748
+ if "- [x] Add durable facts here as they emerge" in completed_text:
749
+ raise AssertionError("plan-close should not mark the default knowledge placeholder as completed")
750
+
751
+
304
752
  EVALS = [
305
753
  ("empty-repo-init", test_empty_repo_init),
306
754
  ("frontend-analysis", test_frontend_analysis),
307
755
  ("closed-loop-plan", test_closed_loop_plan),
756
+ ("phase-continuity-workstream", test_phase_continuity_workstream),
757
+ ("plan-path-canonicalization", test_plan_path_canonicalization),
758
+ ("defect-recovery-loop", test_defect_recovery_loop),
308
759
  ("preserve-unmanaged-docs", test_preserve_unmanaged_docs),
309
760
  ]
310
761
 
@@ -12,6 +12,8 @@ Use this loop when changing the skill, templates, scripts, or policy references:
12
12
  - first-time initialization of an empty repository
13
13
  - frontend-aware repository analysis
14
14
  - execution-plan and knowledge-capture closure
15
+ - quality gates that block closure and force rework when scores fail
16
+ - phase continuity and workstream recovery for resumable work
15
17
  - preservation of unmanaged user-owned docs
16
18
  - local harness checks that do not require user-project CI
17
19
 
@@ -11,6 +11,7 @@ Execution plans are required for multi-step work, risky changes, or tasks that n
11
11
 
12
12
  ## Location
13
13
 
14
+ - Workstream recovery ledger: `docs/exec-plans/workstreams.md`
14
15
  - Active: `docs/exec-plans/active/`
15
16
  - Completed: `docs/exec-plans/completed/`
16
17
 
@@ -21,19 +22,28 @@ Execution plans are required for multi-step work, risky changes, or tasks that n
21
22
  - constraints
22
23
  - steps
23
24
  - validation
25
+ - quality gate
26
+ - defects to resolve
27
+ - rework required
28
+ - phase continuity
24
29
  - durable knowledge to capture
25
30
  - completion notes
26
31
 
27
32
  ## Operating Rule
28
33
 
29
- Update the active plan during the work. When the work is done, move it to `completed` and leave behind any durable facts in the right permanent docs.
34
+ Update the active plan during the work. When the work is done, score it, complete any required rework, record phase continuity for resumable work, move it to `completed`, and leave behind any durable facts in the right permanent docs.
30
35
 
31
36
  ## Closed Loop
32
37
 
33
38
  Use the script, not ad hoc manual edits, for the lifecycle:
34
39
 
35
40
  - `plan-start`: create a new active execution plan
36
- - `knowledge-log`: append a durable fact that still needs to be written into permanent docs and return its stable id
37
- - `knowledge-mark-written`: verify and mark a logged fact as written into its permanent doc; prefer `--id <knowledge-id> --evidence "<doc text>"`, and use `--append` only to append the exact fact first
38
- - `plan-close`: refuse to close cleanly until the listed knowledge items are marked as written to durable docs
41
+ - `knowledge-log`: append a durable fact that still needs to be written into permanent docs and return its stable id; use `--fact-file` for shell-sensitive facts
42
+ - `knowledge-mark-written`: verify and mark a logged fact as written into its permanent doc; prefer `--id <knowledge-id> --evidence-file <file>` for shell-sensitive evidence, and use `--append` only to append the exact fact first
43
+ - `defect-log`: record a bug found by validation, evals, browser testing, or code review; this forces the quality gate to fail and makes the defect the next rework input
44
+ - `defect-resolve`: mark a logged defect fixed with validation or code evidence; re-run validation and `quality-score` before closing
45
+ - `quality-score`: write a scored quality gate into the plan; if it fails, the generated `## Rework Required` section becomes the next implementation input
46
+ - `phase-set`: declare whether phased or resumable work continues, pauses, stops, or completes
47
+ - `workstream-upsert`: update `docs/exec-plans/workstreams.md` so interrupted work can be recovered without chat history
48
+ - `plan-close`: refuse to close cleanly until the quality gate passes, phase continuity is recorded, and the listed knowledge items are marked as written to durable docs
39
49
  - `check`: run a local handoff check without requiring target-repo CI
@@ -36,12 +36,18 @@ After the script runs, read the generated docs once and tighten weak generic phr
36
36
 
37
37
  After the scaffold exists:
38
38
 
39
+ - read `docs/exec-plans/workstreams.md` before resuming interrupted or long-running work
39
40
  - create an execution plan before multi-step work
40
41
  - use `plan-start` instead of creating plan files manually when possible
41
42
  - log durable facts during execution instead of waiting until the end
42
43
  - follow the matching SOP for architecture, UI, observability, or knowledge capture work
43
44
  - encode durable knowledge back into the repository before closing the task
44
45
  - mark logged knowledge items as written after updating the permanent docs
46
+ - log every defect found by tests, evals, browser validation, or code review with `defect-log`
47
+ - resolve logged defects only after fixing the implementation and citing passing validation with `defect-resolve`
48
+ - run `quality-score` after implementation and validation
49
+ - if `quality-score` fails, implement the `## Rework Required` items and score again
50
+ - use `phase-set` and `workstream-upsert` when a plan belongs to phased or resumable work
45
51
  - use `plan-close` to verify no durable knowledge is left stranded in the active plan
46
52
  - run `.codex/skills/harness-repo-bootstrap/scripts/manage_harness.py check --repo <target-repo>` before handoff
47
53
  - do not add CI to the target repository unless the human explicitly asks for it