codex-genesis-harness 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.codebase/COMPRESSED_CONTEXT.md +80 -0
- package/.codebase/CURRENT_STATE.md +37 -11
- package/.codebase/DEPENDENCY_GRAPH.md +14 -1
- package/.codebase/IMPLEMENTATION_HANDOFF.md +34 -336
- package/.codebase/KNOWN_PROBLEMS.md +54 -3
- package/.codebase/MODULE_INDEX.md +8 -0
- package/.codebase/PIPELINE_FLOW.md +7 -5
- package/.codebase/RECOVERY_POINTS.md +17 -78
- package/.codebase/TECH_DEBT.md +6 -0
- package/.codebase/TEST_MATRIX.md +4 -3
- package/.codebase/VISUAL_GRAPH.md +127 -0
- package/.codebase/context-policy.json +68 -0
- package/.codebase/memories/lessons_learned.md +21 -0
- package/.codebase/memories/preferences.md +17 -0
- package/.codebase/state.json +45 -24
- package/.codex/skills/genesis-architecture/SKILL.md +5 -0
- package/.codex/skills/genesis-debug-guide/SKILL.md +10 -4
- package/.codex/skills/genesis-docs-automation/SKILL.md +52 -973
- package/.codex/skills/genesis-executing-plans/SKILL.md +54 -0
- package/.codex/skills/genesis-executing-plans/agents/openai.yaml +6 -0
- package/.codex/skills/genesis-executing-plans/checklists/.gitkeep +0 -0
- package/.codex/skills/genesis-executing-plans/examples/.gitkeep +0 -0
- package/.codex/skills/genesis-executing-plans/templates/.gitkeep +0 -0
- package/.codex/skills/genesis-harness/SKILL.md +64 -1385
- package/.codex/skills/genesis-harness/scripts/check-docs-sync.sh +3 -3
- package/.codex/skills/genesis-harness/scripts/init-planning.sh +1 -1
- package/.codex/skills/genesis-new-design/SKILL.md +4 -1
- package/.codex/skills/genesis-new-design/agents/openai.yaml +2 -0
- package/.codex/skills/genesis-observability-automation/SKILL.md +69 -303
- package/.codex/skills/genesis-observability-automation/references/common-mistakes-and-recovery.md +84 -0
- package/.codex/skills/genesis-observability-automation/references/workflow-phases.md +78 -0
- package/.codex/skills/genesis-performance-profiling/SKILL.md +1 -22
- package/.codex/skills/genesis-performance-profiling/agents/openai.yaml +1 -1
- package/.codex/skills/genesis-planning/SKILL.md +6 -1
- package/.codex/skills/genesis-release/SKILL.md +5 -0
- package/.codex/skills/genesis-research-first/SKILL.md +6 -0
- package/.codex/skills/genesis-spec-propagation/SKILL.md +52 -504
- package/.codex/skills/genesis-test-driven-development/SKILL.md +55 -0
- package/.codex/skills/genesis-test-driven-development/agents/openai.yaml +6 -0
- package/.codex/skills/genesis-test-driven-development/checklists/.gitkeep +0 -0
- package/.codex/skills/genesis-test-driven-development/examples/.gitkeep +0 -0
- package/.codex/skills/genesis-test-driven-development/templates/.gitkeep +0 -0
- package/.codex/skills/genesis-upgrade-design/SKILL.md +4 -2
- package/.codex/skills/genesis-upgrade-design/agents/openai.yaml +2 -0
- package/.codex/skills/genesis-using-git-worktrees/SKILL.md +54 -0
- package/.codex/skills/genesis-using-git-worktrees/agents/openai.yaml +6 -0
- package/.codex/skills/genesis-using-git-worktrees/checklists/.gitkeep +0 -0
- package/.codex/skills/genesis-using-git-worktrees/examples/.gitkeep +0 -0
- package/.codex/skills/genesis-using-git-worktrees/templates/.gitkeep +0 -0
- package/.codex/skills/genesis-verification-before-completion/SKILL.md +53 -0
- package/.codex/skills/genesis-verification-before-completion/agents/openai.yaml +6 -0
- package/.codex/skills/genesis-verification-before-completion/checklists/.gitkeep +0 -0
- package/.codex/skills/genesis-verification-before-completion/examples/.gitkeep +0 -0
- package/.codex/skills/genesis-verification-before-completion/templates/.gitkeep +0 -0
- package/.codex/skills/spec-impact-engine/SKILL.md +77 -500
- package/.codex/skills/spec-impact-engine/checklists/checklist.md +10 -0
- package/.codex-plugin/plugin.json +3 -4
- package/CHANGELOG.md +4 -1
- package/README.EN.md +32 -17
- package/README.VI.md +35 -19
- package/README.md +48 -10
- package/VERSION +1 -1
- package/bin/genesis-harness.js +735 -5
- package/contracts/features/registry-schema.json +15 -0
- package/contracts/observability/agent-run-schema.json +34 -0
- package/contracts/observability/failure-schema.json +35 -0
- package/contracts/ui/auth/login-screen-contract.json +43 -0
- package/features/REGISTRY.md +63 -0
- package/features/SCOPE-template.md +65 -0
- package/fixtures/planning/MOCKUP_PROMPT_TEMPLATE.md +16 -0
- package/observability/agent-runs/sample-run.json +13 -0
- package/observability/decision-logs/sample-decision.md +43 -0
- package/observability/failures/sample-failure.json +12 -0
- package/package.json +9 -3
- package/playwright/e2e/app-template.spec.js +37 -0
- package/playwright/e2e/auth/login-screen.spec.js +65 -0
- package/playwright/e2e/web-template.spec.js +28 -0
- package/scripts/check-scope.sh +100 -0
- package/scripts/cold-start-check.js +133 -0
- package/scripts/install.sh +4 -0
- package/scripts/prompt_sentinel.js +35 -4
- package/scripts/run-evals.sh +119 -3
- package/scripts/scratch_parser.js +49 -0
- package/scripts/spec_visual_sync.js +1 -1
- package/scripts/test_generator.js +2 -2
- package/scripts/uninstall.sh +4 -0
- package/scripts/verify.sh +16 -1
- package/tests/integration/cli-smoke.test.js +103 -0
- package/tests/unit/feature_registry.test.js +152 -0
- package/tests/unit/prompt_sentinel.test.js +1 -1
- package/tests/unit/spec_visual_sync.test.js +1 -1
- package/tests/unit/test_generator.test.js +1 -1
- package/playwright/e2e/e2e-template.md +0 -4
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "1.0.0",
|
|
3
|
+
"description": "Schema for features/REGISTRY.md — the canonical machine-readable feature list primitive (L08 Harness Engineering)",
|
|
4
|
+
"changed_at": "2026-06-03T02:37:00Z",
|
|
5
|
+
"required_columns": ["id", "status", "title", "verify_cmd", "skill"],
|
|
6
|
+
"valid_statuses": ["planned", "in-progress", "done", "verified", "deprecated"],
|
|
7
|
+
"notes": "Every feature MUST have a verify_cmd. 'verified' status requires evidence from the last CI run.",
|
|
8
|
+
"changelog": [
|
|
9
|
+
{
|
|
10
|
+
"version": "1.0.0",
|
|
11
|
+
"date": "2026-06-03",
|
|
12
|
+
"change": "Initial schema — establishes feature list as harness primitive per L08"
|
|
13
|
+
}
|
|
14
|
+
]
|
|
15
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "1.0.0",
|
|
3
|
+
"description": "Schema for observability/agent-runs/*.json — records of agent execution sessions (L11 Harness Engineering)",
|
|
4
|
+
"changed_at": "2026-06-03T02:37:00Z",
|
|
5
|
+
"required_fields": [
|
|
6
|
+
"session_id",
|
|
7
|
+
"timestamp",
|
|
8
|
+
"skill",
|
|
9
|
+
"phase",
|
|
10
|
+
"outcome",
|
|
11
|
+
"evidence"
|
|
12
|
+
],
|
|
13
|
+
"field_types": {
|
|
14
|
+
"session_id": "string — unique identifier for the agent session (e.g. UUID or date-prefixed slug)",
|
|
15
|
+
"timestamp": "ISO 8601 string",
|
|
16
|
+
"skill": "string — name of the genesis skill invoked",
|
|
17
|
+
"phase": "string — one of: init, plan, execute, verify, handoff",
|
|
18
|
+
"outcome": "string — one of: success, failure, partial, skipped",
|
|
19
|
+
"evidence": "string — CLI output snippet or file path proving the outcome"
|
|
20
|
+
},
|
|
21
|
+
"optional_fields": {
|
|
22
|
+
"task_id": "string — task item from task.md this run belongs to",
|
|
23
|
+
"duration_ms": "number — wall-clock time of the session",
|
|
24
|
+
"tokens_used": "number — approximate tokens consumed",
|
|
25
|
+
"recovery_needed": "boolean — whether a recovery point was consulted"
|
|
26
|
+
},
|
|
27
|
+
"changelog": [
|
|
28
|
+
{
|
|
29
|
+
"version": "1.0.0",
|
|
30
|
+
"date": "2026-06-03",
|
|
31
|
+
"change": "Initial schema — establishes observability as first-class harness artifact per L11"
|
|
32
|
+
}
|
|
33
|
+
]
|
|
34
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "1.0.0",
|
|
3
|
+
"description": "Schema for observability/failures/*.json — records agent failures for post-mortem analysis (L11 Harness Engineering)",
|
|
4
|
+
"changed_at": "2026-06-03T02:37:00Z",
|
|
5
|
+
"required_fields": [
|
|
6
|
+
"failure_id",
|
|
7
|
+
"timestamp",
|
|
8
|
+
"skill",
|
|
9
|
+
"phase",
|
|
10
|
+
"error_type",
|
|
11
|
+
"error_message",
|
|
12
|
+
"recovery_action"
|
|
13
|
+
],
|
|
14
|
+
"field_types": {
|
|
15
|
+
"failure_id": "string — unique id for the failure event",
|
|
16
|
+
"timestamp": "ISO 8601 string",
|
|
17
|
+
"skill": "string — skill that was active when failure occurred",
|
|
18
|
+
"phase": "string — one of: init, plan, execute, verify, handoff",
|
|
19
|
+
"error_type": "string — one of: assertion, timeout, permission, contract_violation, scope_overreach, under_finish",
|
|
20
|
+
"error_message": "string — exact CLI error output or description",
|
|
21
|
+
"recovery_action": "string — what was done to recover (e.g. 'reverted via git checkout', 'resumed from RECOVERY_POINTS.md')"
|
|
22
|
+
},
|
|
23
|
+
"optional_fields": {
|
|
24
|
+
"session_id": "string — cross-reference to agent-runs/ entry",
|
|
25
|
+
"root_cause": "string — 5-why analysis result",
|
|
26
|
+
"prevention": "string — what harness change prevents recurrence"
|
|
27
|
+
},
|
|
28
|
+
"changelog": [
|
|
29
|
+
{
|
|
30
|
+
"version": "1.0.0",
|
|
31
|
+
"date": "2026-06-03",
|
|
32
|
+
"change": "Initial schema — failure observability as harness primitive"
|
|
33
|
+
}
|
|
34
|
+
]
|
|
35
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"contract_id": "UI-AUTH-LOGIN",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Concrete example of a UI contract for the demo login feature. Defines inputs, visual states, and outputs.",
|
|
5
|
+
"inputs": {
|
|
6
|
+
"data": [
|
|
7
|
+
{
|
|
8
|
+
"name": "email",
|
|
9
|
+
"type": "string",
|
|
10
|
+
"validation": "Valid email format",
|
|
11
|
+
"required": true
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"name": "password",
|
|
15
|
+
"type": "string",
|
|
16
|
+
"validation": "Minimum 8 characters",
|
|
17
|
+
"required": true
|
|
18
|
+
}
|
|
19
|
+
]
|
|
20
|
+
},
|
|
21
|
+
"states": {
|
|
22
|
+
"initial": "Empty fields, disabled submit button.",
|
|
23
|
+
"valid": "Both fields pass validation, submit button enabled with cyan neon glow.",
|
|
24
|
+
"loading": "Submit button shows spinner, fields disabled.",
|
|
25
|
+
"error": "Error message displayed below fields, fields have red error outline."
|
|
26
|
+
},
|
|
27
|
+
"outputs": {
|
|
28
|
+
"events": [
|
|
29
|
+
{
|
|
30
|
+
"name": "onLoginSubmit",
|
|
31
|
+
"payload": {
|
|
32
|
+
"email": "string",
|
|
33
|
+
"passwordHash": "string"
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
"name": "onGoogleSignIn",
|
|
38
|
+
"payload": null
|
|
39
|
+
}
|
|
40
|
+
]
|
|
41
|
+
},
|
|
42
|
+
"mockup_reference": ".planning/features/auth/mockup-login.png"
|
|
43
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Feature Registry
|
|
2
|
+
|
|
3
|
+
> **Nguồn sự thật duy nhất** cho tất cả tính năng của Genesis Codex Harness.
|
|
4
|
+
> Schema: [`contracts/features/registry-schema.json`](../contracts/features/registry-schema.json)
|
|
5
|
+
> **RULE**: Mỗi feature phải có `verify_cmd` — lệnh thực thi xác nhận tính năng hoạt động.
|
|
6
|
+
|
|
7
|
+
## Status Definitions
|
|
8
|
+
|
|
9
|
+
| Status | Ý nghĩa |
|
|
10
|
+
|---|---|
|
|
11
|
+
| `planned` | Đã xác định scope, chưa implement |
|
|
12
|
+
| `in-progress` | Đang được implement trong phiên hiện tại |
|
|
13
|
+
| `done` | Code xong, chưa chạy verification gate |
|
|
14
|
+
| `verified` | Đã có CLI evidence — verification passed |
|
|
15
|
+
| `deprecated` | Không còn được duy trì |
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Feature Table
|
|
20
|
+
|
|
21
|
+
| id | status | title | verify_cmd | skill |
|
|
22
|
+
|---|---|---|---|---|
|
|
23
|
+
| F001 | verified | Skill system — 25 packaged Codex skills | `bash scripts/verify.sh` | genesis-harness |
|
|
24
|
+
| F002 | verified | CLI binary `genesis-harness` với install/uninstall/status/docs | `node tests/integration/cli-smoke.test.js` | genesis-harness |
|
|
25
|
+
| F003 | verified | LeanCTX context budget policy seeding | `bash scripts/run-evals.sh` | genesis-harness |
|
|
26
|
+
| F004 | verified | Beads memory system (remember/recall/forget/prime) | `bash scripts/run-evals.sh` | genesis-harness |
|
|
27
|
+
| F005 | verified | Mermaid VISUAL_GRAPH.md sync gate | `bash scripts/run-evals.sh` | genesis-harness |
|
|
28
|
+
| F006 | verified | docs-gate hook (check-docs-sync.sh) | `node bin/genesis-harness.js docs-gate` | genesis-harness |
|
|
29
|
+
| F007 | verified | PEV Loop enforcement (Plan → Execute → Verify) | `bash scripts/verify.sh` | genesis-harness-engineering |
|
|
30
|
+
| F008 | verified | Contract system (api/agents/events/ui) | `bash scripts/verify.sh` | genesis-api-contract |
|
|
31
|
+
| F009 | verified | TDD workflow (Red → Green → Refactor) | `node tests/unit/feature_registry.test.js` | genesis-test-driven-development |
|
|
32
|
+
| F010 | verified | Verification-before-completion gate | `bash scripts/verify.sh` | genesis-verification-before-completion |
|
|
33
|
+
| F011 | verified | git worktrees isolation for dangerous changes | `bash scripts/verify.sh .codex/skills/genesis-using-git-worktrees` | genesis-using-git-worktrees |
|
|
34
|
+
| F012 | verified | Observability schema + live data (L11) | `node tests/unit/feature_registry.test.js` | genesis-observability-automation |
|
|
35
|
+
| F013 | verified | Feature Registry as harness primitive (L08) | `node tests/unit/feature_registry.test.js` | genesis-harness-engineering |
|
|
36
|
+
| F014 | verified | npm pack / tarball smoke test | `bash scripts/run-evals.sh` | genesis-release |
|
|
37
|
+
| F015 | verified | spec-impact-engine propagation chain | `bash scripts/verify.sh .codex/skills/spec-impact-engine` | spec-impact-engine |
|
|
38
|
+
| F016 | in-progress | Cold-start test automation (L03) | `node scripts/cold-start-check.js` | genesis-harness |
|
|
39
|
+
| F017 | planned | Per-session Time-to-First-Verification KPI (L06) | `node bin/genesis-harness.js status --ttfv` | genesis-harness |
|
|
40
|
+
| F018 | planned | Scope ledger per task (L07) | `bash scripts/check-scope.sh` | genesis-harness |
|
|
41
|
+
| F019 | verified | Demo Feature (Mockup + Contract + E2E) | `npm test` (or npx playwright test) | genesis-harness-engineering |
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## Verification Evidence (Last Run)
|
|
46
|
+
|
|
47
|
+
> Update this section after each CI run.
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
Date: 2026-06-03T02:38:00Z
|
|
51
|
+
scripts/verify.sh → verify passed
|
|
52
|
+
scripts/run-evals.sh → evals passed
|
|
53
|
+
feature_registry.test.js → feature_registry tests passed
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Adding a New Feature
|
|
59
|
+
|
|
60
|
+
1. Add a row to the Feature Table above with a **unique `id`** and a **real `verify_cmd`**
|
|
61
|
+
2. Set initial status to `planned`
|
|
62
|
+
3. Update `.codebase/MODULE_INDEX.md` if new module is introduced
|
|
63
|
+
4. Run `node tests/unit/feature_registry.test.js` — must pass before status → `verified`
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# SCOPE — [Task Name]
|
|
2
|
+
|
|
3
|
+
> **File**: `.planning/tasks/[task-id]/SCOPE.md`
|
|
4
|
+
> **Purpose**: Hard boundary definition — lists exactly which files this task MAY modify.
|
|
5
|
+
> **Rule**: Agent MUST NOT touch any file not listed below. If a necessary file is missing, update this SCOPE.md first and get confirmation.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Task ID
|
|
10
|
+
`[task-id]` — e.g. `F013-feature-registry`
|
|
11
|
+
|
|
12
|
+
## Task Description
|
|
13
|
+
[One-sentence description of what this task does]
|
|
14
|
+
|
|
15
|
+
## Skill
|
|
16
|
+
`[genesis-skill-name]` — the skill governing this task
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Permitted File Changes
|
|
21
|
+
|
|
22
|
+
### ✅ Files this task MAY create or modify
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
[list each file on its own line, relative to repo root]
|
|
26
|
+
features/REGISTRY.md
|
|
27
|
+
contracts/features/registry-schema.json
|
|
28
|
+
tests/unit/feature_registry.test.js
|
|
29
|
+
.codebase/MODULE_INDEX.md
|
|
30
|
+
scripts/run-evals.sh
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### ❌ Files this task MUST NOT touch
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
[list critical files that must not be affected]
|
|
37
|
+
.codex/SOUL.md
|
|
38
|
+
AGENTS.md
|
|
39
|
+
package.json (unless adding to 'files' array only)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### 🟡 Files requiring explicit confirmation before touching
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
[files that might need updating but require user review first]
|
|
46
|
+
scripts/verify.sh
|
|
47
|
+
.codebase/state.json
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## Scope Boundary Rationale
|
|
53
|
+
|
|
54
|
+
[Why these boundaries? What would happen if the agent went outside them?]
|
|
55
|
+
|
|
56
|
+
Example: "Restricting to `features/` and `contracts/features/` prevents accidental changes to the core verification loop while adding the registry primitive."
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Scope Check Command
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
# Verify no out-of-scope files were modified:
|
|
64
|
+
bash scripts/check-scope.sh .planning/tasks/[task-id]/SCOPE.md
|
|
65
|
+
```
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Mockup Generation Instruction
|
|
2
|
+
|
|
3
|
+
**Mục đích:** Bắt buộc AI Agent phải tạo ra bản nháp giao diện (Mockup) trực quan trước khi bắt đầu code bất kỳ file UI nào.
|
|
4
|
+
|
|
5
|
+
## Chỉ thị cho AI Agent:
|
|
6
|
+
Khi người dùng yêu cầu thiết kế một tính năng mới có giao diện người dùng (UI):
|
|
7
|
+
1. **Dừng viết code ngay lập tức.** Không tạo file `.tsx`, `.html`, `.css` nào.
|
|
8
|
+
2. Dùng công cụ `generate_image` (nếu có) để tạo ra một bản thiết kế UI Mockup dựa trên mô tả của người dùng.
|
|
9
|
+
- **Prompt cho generate_image:** Cần miêu tả chi tiết: Bố cục (Layout), Màu sắc (Colors), Nút bấm (Buttons), Trạng thái (States), và Phong cách (Style - ví dụ: Glassmorphism, Dark mode, Minimalist).
|
|
10
|
+
3. Sau khi ảnh được tạo thành công trong thư mục Artifacts:
|
|
11
|
+
- Hãy dùng lệnh terminal để copy file ảnh đó vào thư mục `.planning/features/<tên-tính-năng>/mockup.png`.
|
|
12
|
+
- Ví dụ: `mkdir -p .planning/features/auth && cp <đường-dẫn-ảnh-từ-artifact> .planning/features/auth/mockup.png`
|
|
13
|
+
4. Cập nhật `features/REGISTRY.md` để map tính năng này với file mockup vừa lưu.
|
|
14
|
+
5. Chỉ khi người dùng **phê duyệt (Approve)** ảnh Mockup đó, bạn mới được phép bắt đầu viết code Frontend.
|
|
15
|
+
|
|
16
|
+
> **Lý do:** Harness tuân thủ PEV Loop (Plan -> Execute -> Verify). Việc code UI mù mờ không có mockup được xem là vi phạm bước Plan.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"session_id": "2026-06-03-harness-engineering-L08-L11",
|
|
3
|
+
"timestamp": "2026-06-03T02:37:00Z",
|
|
4
|
+
"skill": "genesis-harness-engineering",
|
|
5
|
+
"phase": "execute",
|
|
6
|
+
"outcome": "success",
|
|
7
|
+
"evidence": "scripts/verify.sh exits 0; scripts/run-evals.sh exits 0; node tests/unit/feature_registry.test.js -> 'feature_registry tests passed'",
|
|
8
|
+
"task_id": "P1.1-feature-registry + P1.2-observability-live",
|
|
9
|
+
"duration_ms": 120000,
|
|
10
|
+
"tokens_used": 0,
|
|
11
|
+
"recovery_needed": false,
|
|
12
|
+
"_note": "This is the bootstrap sample demonstrating the observability format. Future agent runs should append new JSON files to this directory using the same schema (contracts/observability/agent-run-schema.json)."
|
|
13
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Decision: Feature Registry as Harness Primitive (L08)
|
|
2
|
+
|
|
3
|
+
**Date**: 2026-06-03
|
|
4
|
+
**Session**: `2026-06-03-harness-engineering-L08-L11`
|
|
5
|
+
**Skill**: `genesis-harness-engineering`
|
|
6
|
+
|
|
7
|
+
## Decision
|
|
8
|
+
|
|
9
|
+
Establish `features/REGISTRY.md` as the **single machine-readable source of truth** for all project features. Each feature entry must include: unique `id`, `status`, `title`, `verify_cmd` (executable verification command), and owning `skill`.
|
|
10
|
+
|
|
11
|
+
## Reason
|
|
12
|
+
|
|
13
|
+
**Harness Engineering Lecture 08** identifies the feature list as a "harness primitive" — not human prose, but an executable record the harness can validate. The previous state had features scattered across `ROADMAP.md` (prose) and `EVOLUTION_PLAN.md` (markdown narrative), with no per-feature verification command and no machine-readable status.
|
|
14
|
+
|
|
15
|
+
This meant:
|
|
16
|
+
1. An agent couldn't know which features were truly "verified" vs. "claimed done"
|
|
17
|
+
2. No CI gate could enforce feature status transitions
|
|
18
|
+
3. The harness couldn't generate per-feature test evidence
|
|
19
|
+
|
|
20
|
+
The fix closes this gap by creating a structured registry that `run-evals.sh` can parse and `feature_registry.test.js` can validate.
|
|
21
|
+
|
|
22
|
+
## Rejected Options
|
|
23
|
+
|
|
24
|
+
- **Option A**: Use a JSON file instead of Markdown table
|
|
25
|
+
*Rejected*: Markdown table is both human-readable and parseable. Keeps the "docs are code" principle without sacrificing discoverability.
|
|
26
|
+
|
|
27
|
+
- **Option B**: Use GitHub Issues as the feature list
|
|
28
|
+
*Rejected*: Violates L03 (repo is the single source of truth). External systems cannot be the harness primitive.
|
|
29
|
+
|
|
30
|
+
- **Option C**: Derive the list from `.planning/ROADMAP.md` automatically
|
|
31
|
+
*Rejected*: ROADMAP.md is narrative; auto-parsing prose is fragile. Explicit registry is safer.
|
|
32
|
+
|
|
33
|
+
## Verification
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
node tests/unit/feature_registry.test.js
|
|
37
|
+
→ feature_registry tests passed
|
|
38
|
+
|
|
39
|
+
scripts/run-evals.sh (L08 gate section)
|
|
40
|
+
→ evals passed
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Both gates pass with exit code 0 after implementation. Failure record before fix: `observability/failures/sample-failure.json`.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
{
|
|
2
|
+
"failure_id": "2026-06-03-pre-fix-L08-missing-registry",
|
|
3
|
+
"timestamp": "2026-06-03T02:36:50Z",
|
|
4
|
+
"skill": "genesis-harness-engineering",
|
|
5
|
+
"phase": "verify",
|
|
6
|
+
"error_type": "assertion",
|
|
7
|
+
"error_message": "AssertionError: L08: features/REGISTRY.md must exist as machine-readable feature primitive",
|
|
8
|
+
"recovery_action": "Created features/REGISTRY.md + contracts/features/registry-schema.json + observability schemas as implementation fix",
|
|
9
|
+
"session_id": "2026-06-03-harness-engineering-L08-L11",
|
|
10
|
+
"root_cause": "Feature list existed only as human-readable prose in ROADMAP.md. No machine-readable canonical source of truth for feature status existed (violates L08 principle).",
|
|
11
|
+
"prevention": "Added verify gate in run-evals.sh that checks REGISTRY.md presence + schema validity. Test now in tests/unit/feature_registry.test.js ensures regression cannot re-occur."
|
|
12
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codex-genesis-harness",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.8",
|
|
4
4
|
"description": "Hệ thống quản trị (Harness) dành cho AI Agent (Codex) với cơ chế FSM State Persistence, Validation Gates và Test-First Workflow. Đảm bảo agent không bị trôi context.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"bin": {
|
|
@@ -15,13 +15,15 @@
|
|
|
15
15
|
"test:gen": "node scripts/test_generator.js",
|
|
16
16
|
"sentinel": "node scripts/prompt_sentinel.js",
|
|
17
17
|
"integrity": "node scripts/contract_integrity_gate.js",
|
|
18
|
-
"telemetry": "node scripts/healing_telemetry.js"
|
|
18
|
+
"telemetry": "node scripts/healing_telemetry.js",
|
|
19
|
+
"mcp:setup": "node bin/genesis-harness.js mcp"
|
|
19
20
|
},
|
|
20
21
|
"files": [
|
|
21
22
|
".codex-plugin",
|
|
22
23
|
".codex/skills",
|
|
23
24
|
".codebase",
|
|
24
25
|
"contracts",
|
|
26
|
+
"features",
|
|
25
27
|
"fixtures",
|
|
26
28
|
"tests",
|
|
27
29
|
"playwright",
|
|
@@ -64,5 +66,9 @@
|
|
|
64
66
|
"funding": {
|
|
65
67
|
"type": "momo",
|
|
66
68
|
"url": "tel:0865814259"
|
|
69
|
+
},
|
|
70
|
+
"dependencies": {
|
|
71
|
+
"@babel/parser": "^7.29.7",
|
|
72
|
+
"@babel/traverse": "^7.29.7"
|
|
67
73
|
}
|
|
68
|
-
}
|
|
74
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
const { test, expect, devices } = require('@playwright/test');
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* MOBILE APP E2E TEST TEMPLATE
|
|
5
|
+
* Dành cho các ứng dụng Mobile App (React Native Web, PWA, hoặc Mobile Viewport testing)
|
|
6
|
+
* Lưu ý: Nếu test Native App thực sự (iOS/Android), dự án cần chuyển sang dùng Appium hoặc Detox.
|
|
7
|
+
* File này dùng Playwright Mobile Emulation để test App logic trên trình duyệt di động.
|
|
8
|
+
*/
|
|
9
|
+
test.describe('Mobile App Feature Flow', () => {
|
|
10
|
+
// Use a mobile device profile for emulation
|
|
11
|
+
test.use({ ...devices['iPhone 13'] });
|
|
12
|
+
|
|
13
|
+
test.beforeEach(async ({ page }) => {
|
|
14
|
+
// Replace with the actual URL of the Mobile Web / React Native Web server
|
|
15
|
+
await page.goto('http://localhost:8081');
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
test('should render mobile-specific layout (Hamburger menu)', async ({ page }) => {
|
|
19
|
+
// In mobile view, the hamburger menu should be visible instead of desktop navbar
|
|
20
|
+
const hamburgerBtn = page.locator('[aria-label="Open Menu"]');
|
|
21
|
+
await expect(hamburgerBtn).toBeVisible();
|
|
22
|
+
|
|
23
|
+
// Tap the menu
|
|
24
|
+
await hamburgerBtn.click();
|
|
25
|
+
await expect(page.locator('.mobile-drawer')).toBeVisible();
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
test('should support touch interactions and swipe', async ({ page }) => {
|
|
29
|
+
// Simulating touch actions on a mobile carousel or list
|
|
30
|
+
const listItem = page.locator('.list-item').first();
|
|
31
|
+
await expect(listItem).toBeVisible();
|
|
32
|
+
|
|
33
|
+
// Playwright touch simulation (if applicable to the web-mobile app)
|
|
34
|
+
await listItem.tap();
|
|
35
|
+
await expect(page.locator('.item-details')).toBeVisible();
|
|
36
|
+
});
|
|
37
|
+
});
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
const { test, expect } = require('@playwright/test');
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Concrete E2E Test Example for Demo Login Feature
|
|
5
|
+
* Contract Reference: contracts/ui/auth/login-screen-contract.json
|
|
6
|
+
* Mockup Reference: .planning/features/auth/mockup-login.png
|
|
7
|
+
*/
|
|
8
|
+
test.describe('UI-AUTH-LOGIN Contract Implementation', () => {
|
|
9
|
+
test.beforeEach(async ({ page }) => {
|
|
10
|
+
// Navigate to the demo app's login page (replace with actual dev server URL in real projects)
|
|
11
|
+
// For this harness template, we intercept the route to mock a UI
|
|
12
|
+
await page.route('**/login', route => {
|
|
13
|
+
route.fulfill({
|
|
14
|
+
status: 200,
|
|
15
|
+
contentType: 'text/html',
|
|
16
|
+
body: `
|
|
17
|
+
<html>
|
|
18
|
+
<body>
|
|
19
|
+
<form id="login-form">
|
|
20
|
+
<input type="email" id="email" required />
|
|
21
|
+
<input type="password" id="password" required minlength="8" />
|
|
22
|
+
<button type="submit" id="sign-in-btn" disabled>Sign In</button>
|
|
23
|
+
<button type="button" id="google-btn">Sign in with Google</button>
|
|
24
|
+
</form>
|
|
25
|
+
<div id="error-msg" style="display: none;"></div>
|
|
26
|
+
<script>
|
|
27
|
+
const email = document.getElementById('email');
|
|
28
|
+
const pwd = document.getElementById('password');
|
|
29
|
+
const btn = document.getElementById('sign-in-btn');
|
|
30
|
+
const checkValid = () => {
|
|
31
|
+
btn.disabled = !(email.value.includes('@') && pwd.value.length >= 8);
|
|
32
|
+
};
|
|
33
|
+
email.addEventListener('input', checkValid);
|
|
34
|
+
pwd.addEventListener('input', checkValid);
|
|
35
|
+
</script>
|
|
36
|
+
</body>
|
|
37
|
+
</html>
|
|
38
|
+
`
|
|
39
|
+
});
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
await page.goto('http://localhost:3000/login');
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
test('Initial state: fields are empty and submit button is disabled', async ({ page }) => {
|
|
46
|
+
const emailInput = page.locator('#email');
|
|
47
|
+
const pwdInput = page.locator('#password');
|
|
48
|
+
const submitBtn = page.locator('#sign-in-btn');
|
|
49
|
+
|
|
50
|
+
await expect(emailInput).toBeEmpty();
|
|
51
|
+
await expect(pwdInput).toBeEmpty();
|
|
52
|
+
await expect(submitBtn).toBeDisabled();
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
test('Valid state: entering valid email and password enables submit button', async ({ page }) => {
|
|
56
|
+
const emailInput = page.locator('#email');
|
|
57
|
+
const pwdInput = page.locator('#password');
|
|
58
|
+
const submitBtn = page.locator('#sign-in-btn');
|
|
59
|
+
|
|
60
|
+
await emailInput.fill('user@example.com');
|
|
61
|
+
await pwdInput.fill('securepassword123');
|
|
62
|
+
|
|
63
|
+
await expect(submitBtn).toBeEnabled();
|
|
64
|
+
});
|
|
65
|
+
});
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
const { test, expect } = require('@playwright/test');
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* WEB E2E TEST TEMPLATE
|
|
5
|
+
* Dành cho các dự án Web App (Next.js, React, Vue, v.v.)
|
|
6
|
+
*/
|
|
7
|
+
test.describe('Web App Feature Flow', () => {
|
|
8
|
+
test.beforeEach(async ({ page }) => {
|
|
9
|
+
// Replace with the actual URL or local dev server of the Web App
|
|
10
|
+
await page.goto('http://localhost:3000');
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
test('should display the main Web UI component', async ({ page }) => {
|
|
14
|
+
// Example: verify a specific web element is visible
|
|
15
|
+
const mainHeading = page.locator('h1');
|
|
16
|
+
await expect(mainHeading).toBeVisible();
|
|
17
|
+
await expect(mainHeading).toHaveText('Welcome to Web App');
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
test('should handle web form submission', async ({ page }) => {
|
|
21
|
+
// Example: fill out a web form and submit
|
|
22
|
+
await page.fill('input[name="username"]', 'testuser');
|
|
23
|
+
await page.click('button[type="submit"]');
|
|
24
|
+
|
|
25
|
+
// Verify success state
|
|
26
|
+
await expect(page.locator('.success-message')).toBeVisible();
|
|
27
|
+
});
|
|
28
|
+
});
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# check-scope.sh — L07 Scope Ledger Enforcement
|
|
3
|
+
# Usage: bash scripts/check-scope.sh [SCOPE.md path] [optional: git diff base]
|
|
4
|
+
#
|
|
5
|
+
# Reads a SCOPE.md file and verifies that all modified files (from git diff)
|
|
6
|
+
# are within the permitted boundaries defined in the scope ledger.
|
|
7
|
+
#
|
|
8
|
+
# Exit codes:
|
|
9
|
+
# 0 = all changes within scope
|
|
10
|
+
# 1 = out-of-scope changes detected
|
|
11
|
+
# 2 = SCOPE.md not found or invalid
|
|
12
|
+
|
|
13
|
+
set -euo pipefail
|
|
14
|
+
|
|
15
|
+
SCOPE_FILE="${1:-}"
|
|
16
|
+
GIT_BASE="${2:-HEAD}"
|
|
17
|
+
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
18
|
+
|
|
19
|
+
fail() {
|
|
20
|
+
echo "scope-check FAIL: $*" >&2
|
|
21
|
+
exit 1
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
warn() {
|
|
25
|
+
echo "scope-check WARN: $*" >&2
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
if [ -z "$SCOPE_FILE" ]; then
|
|
29
|
+
echo "Usage: bash scripts/check-scope.sh <SCOPE.md path> [git-base]"
|
|
30
|
+
echo " If no SCOPE.md is provided, scope check is advisory only."
|
|
31
|
+
echo "scope-check: no scope file provided — skipping (advisory mode)"
|
|
32
|
+
exit 0
|
|
33
|
+
fi
|
|
34
|
+
|
|
35
|
+
if [ ! -f "$SCOPE_FILE" ]; then
|
|
36
|
+
echo "scope-check: SCOPE.md not found at '$SCOPE_FILE' — using advisory mode (no hard boundary)"
|
|
37
|
+
echo "scope-check: Create '$SCOPE_FILE' from features/SCOPE-template.md to enable enforcement."
|
|
38
|
+
exit 0
|
|
39
|
+
fi
|
|
40
|
+
|
|
41
|
+
echo "scope-check: Enforcing boundaries from '$SCOPE_FILE'..."
|
|
42
|
+
|
|
43
|
+
# Extract permitted files section (lines between "## Permitted File Changes" and next ##)
|
|
44
|
+
PERMITTED=$(awk '/^### ✅ Files this task MAY create or modify/{found=1; next} /^### /{found=0} found && /^[a-zA-Z_.\/-]/{print $0}' "$SCOPE_FILE")
|
|
45
|
+
|
|
46
|
+
if [ -z "$PERMITTED" ]; then
|
|
47
|
+
warn "No permitted files extracted from '$SCOPE_FILE' — check format. Skipping enforcement."
|
|
48
|
+
exit 0
|
|
49
|
+
fi
|
|
50
|
+
|
|
51
|
+
# Get list of changed files
|
|
52
|
+
if git rev-parse --git-dir > /dev/null 2>&1; then
|
|
53
|
+
CHANGED=$(git diff --name-only "$GIT_BASE" 2>/dev/null || git status --porcelain | awk '{print $2}')
|
|
54
|
+
else
|
|
55
|
+
echo "scope-check: not inside a git repo — using git status fallback"
|
|
56
|
+
CHANGED=$(git status --porcelain 2>/dev/null | awk '{print $2}' || echo "")
|
|
57
|
+
fi
|
|
58
|
+
|
|
59
|
+
if [ -z "$CHANGED" ]; then
|
|
60
|
+
echo "scope-check: no changed files detected — scope check trivially passes"
|
|
61
|
+
exit 0
|
|
62
|
+
fi
|
|
63
|
+
|
|
64
|
+
OUT_OF_SCOPE=()
|
|
65
|
+
while IFS= read -r changed_file; do
|
|
66
|
+
[ -z "$changed_file" ] && continue
|
|
67
|
+
IN_SCOPE=false
|
|
68
|
+
while IFS= read -r permitted; do
|
|
69
|
+
[ -z "$permitted" ] && continue
|
|
70
|
+
# Check if changed file matches permitted entry (exact or prefix)
|
|
71
|
+
if [[ "$changed_file" == "$permitted" ]] || [[ "$changed_file" == "$permitted"* ]]; then
|
|
72
|
+
IN_SCOPE=true
|
|
73
|
+
break
|
|
74
|
+
fi
|
|
75
|
+
done <<< "$PERMITTED"
|
|
76
|
+
if [ "$IN_SCOPE" = false ]; then
|
|
77
|
+
OUT_OF_SCOPE+=("$changed_file")
|
|
78
|
+
fi
|
|
79
|
+
done <<< "$CHANGED"
|
|
80
|
+
|
|
81
|
+
if [ ${#OUT_OF_SCOPE[@]} -gt 0 ]; then
|
|
82
|
+
echo "scope-check FAIL: The following files are outside the permitted scope:"
|
|
83
|
+
for f in "${OUT_OF_SCOPE[@]}"; do
|
|
84
|
+
echo " ❌ $f"
|
|
85
|
+
done
|
|
86
|
+
echo ""
|
|
87
|
+
echo "To expand scope: edit '$SCOPE_FILE' and add the file to '## Permitted File Changes'."
|
|
88
|
+
echo "To override: add the file to '🟡 Files requiring explicit confirmation' and get user approval."
|
|
89
|
+
|
|
90
|
+
if [ "${VIBE_MODE:-0}" = "1" ]; then
|
|
91
|
+
echo "scope-check WARN: VIBE_MODE is active. Bypassing fatal blocker."
|
|
92
|
+
echo "- [$(date -u +"%Y-%m-%dT%H:%M:%SZ")] VIBE_MODE Bypass: ${#OUT_OF_SCOPE[@]} files modified out of scope. Files: ${OUT_OF_SCOPE[*]}" >> "$repo_root/.codebase/TECH_DEBT.md"
|
|
93
|
+
exit 0
|
|
94
|
+
fi
|
|
95
|
+
|
|
96
|
+
exit 1
|
|
97
|
+
fi
|
|
98
|
+
|
|
99
|
+
echo "scope-check passed: all ${#OUT_OF_SCOPE[@]} changed files are within scope"
|
|
100
|
+
echo "scope-check: $(echo "$CHANGED" | wc -l | tr -d ' ') files changed, all permitted"
|