codex-genesis-harness 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/.codebase/COMPRESSED_CONTEXT.md +80 -0
  2. package/.codebase/CURRENT_STATE.md +37 -11
  3. package/.codebase/DEPENDENCY_GRAPH.md +14 -1
  4. package/.codebase/IMPLEMENTATION_HANDOFF.md +34 -336
  5. package/.codebase/KNOWN_PROBLEMS.md +54 -3
  6. package/.codebase/MODULE_INDEX.md +8 -0
  7. package/.codebase/PIPELINE_FLOW.md +7 -5
  8. package/.codebase/RECOVERY_POINTS.md +17 -78
  9. package/.codebase/TECH_DEBT.md +6 -0
  10. package/.codebase/TEST_MATRIX.md +4 -3
  11. package/.codebase/VISUAL_GRAPH.md +127 -0
  12. package/.codebase/context-policy.json +68 -0
  13. package/.codebase/memories/lessons_learned.md +21 -0
  14. package/.codebase/memories/preferences.md +17 -0
  15. package/.codebase/state.json +45 -24
  16. package/.codex/skills/genesis-architecture/SKILL.md +5 -0
  17. package/.codex/skills/genesis-debug-guide/SKILL.md +10 -4
  18. package/.codex/skills/genesis-docs-automation/SKILL.md +52 -973
  19. package/.codex/skills/genesis-executing-plans/SKILL.md +54 -0
  20. package/.codex/skills/genesis-executing-plans/agents/openai.yaml +6 -0
  21. package/.codex/skills/genesis-executing-plans/checklists/.gitkeep +0 -0
  22. package/.codex/skills/genesis-executing-plans/examples/.gitkeep +0 -0
  23. package/.codex/skills/genesis-executing-plans/templates/.gitkeep +0 -0
  24. package/.codex/skills/genesis-harness/SKILL.md +64 -1385
  25. package/.codex/skills/genesis-harness/scripts/check-docs-sync.sh +3 -3
  26. package/.codex/skills/genesis-harness/scripts/init-planning.sh +1 -1
  27. package/.codex/skills/genesis-new-design/SKILL.md +4 -1
  28. package/.codex/skills/genesis-new-design/agents/openai.yaml +2 -0
  29. package/.codex/skills/genesis-observability-automation/SKILL.md +69 -303
  30. package/.codex/skills/genesis-observability-automation/references/common-mistakes-and-recovery.md +84 -0
  31. package/.codex/skills/genesis-observability-automation/references/workflow-phases.md +78 -0
  32. package/.codex/skills/genesis-performance-profiling/SKILL.md +1 -22
  33. package/.codex/skills/genesis-performance-profiling/agents/openai.yaml +1 -1
  34. package/.codex/skills/genesis-planning/SKILL.md +6 -1
  35. package/.codex/skills/genesis-release/SKILL.md +5 -0
  36. package/.codex/skills/genesis-research-first/SKILL.md +6 -0
  37. package/.codex/skills/genesis-spec-propagation/SKILL.md +52 -504
  38. package/.codex/skills/genesis-test-driven-development/SKILL.md +55 -0
  39. package/.codex/skills/genesis-test-driven-development/agents/openai.yaml +6 -0
  40. package/.codex/skills/genesis-test-driven-development/checklists/.gitkeep +0 -0
  41. package/.codex/skills/genesis-test-driven-development/examples/.gitkeep +0 -0
  42. package/.codex/skills/genesis-test-driven-development/templates/.gitkeep +0 -0
  43. package/.codex/skills/genesis-upgrade-design/SKILL.md +4 -2
  44. package/.codex/skills/genesis-upgrade-design/agents/openai.yaml +2 -0
  45. package/.codex/skills/genesis-using-git-worktrees/SKILL.md +54 -0
  46. package/.codex/skills/genesis-using-git-worktrees/agents/openai.yaml +6 -0
  47. package/.codex/skills/genesis-using-git-worktrees/checklists/.gitkeep +0 -0
  48. package/.codex/skills/genesis-using-git-worktrees/examples/.gitkeep +0 -0
  49. package/.codex/skills/genesis-using-git-worktrees/templates/.gitkeep +0 -0
  50. package/.codex/skills/genesis-verification-before-completion/SKILL.md +53 -0
  51. package/.codex/skills/genesis-verification-before-completion/agents/openai.yaml +6 -0
  52. package/.codex/skills/genesis-verification-before-completion/checklists/.gitkeep +0 -0
  53. package/.codex/skills/genesis-verification-before-completion/examples/.gitkeep +0 -0
  54. package/.codex/skills/genesis-verification-before-completion/templates/.gitkeep +0 -0
  55. package/.codex/skills/spec-impact-engine/SKILL.md +77 -500
  56. package/.codex/skills/spec-impact-engine/checklists/checklist.md +10 -0
  57. package/.codex-plugin/plugin.json +3 -4
  58. package/CHANGELOG.md +4 -1
  59. package/README.EN.md +32 -17
  60. package/README.VI.md +35 -19
  61. package/README.md +48 -10
  62. package/VERSION +1 -1
  63. package/bin/genesis-harness.js +735 -5
  64. package/contracts/features/registry-schema.json +15 -0
  65. package/contracts/observability/agent-run-schema.json +34 -0
  66. package/contracts/observability/failure-schema.json +35 -0
  67. package/contracts/ui/auth/login-screen-contract.json +43 -0
  68. package/features/REGISTRY.md +63 -0
  69. package/features/SCOPE-template.md +65 -0
  70. package/fixtures/planning/MOCKUP_PROMPT_TEMPLATE.md +16 -0
  71. package/observability/agent-runs/sample-run.json +13 -0
  72. package/observability/decision-logs/sample-decision.md +43 -0
  73. package/observability/failures/sample-failure.json +12 -0
  74. package/package.json +9 -3
  75. package/playwright/e2e/app-template.spec.js +37 -0
  76. package/playwright/e2e/auth/login-screen.spec.js +65 -0
  77. package/playwright/e2e/web-template.spec.js +28 -0
  78. package/scripts/check-scope.sh +100 -0
  79. package/scripts/cold-start-check.js +133 -0
  80. package/scripts/install.sh +4 -0
  81. package/scripts/prompt_sentinel.js +35 -4
  82. package/scripts/run-evals.sh +119 -3
  83. package/scripts/scratch_parser.js +49 -0
  84. package/scripts/spec_visual_sync.js +1 -1
  85. package/scripts/test_generator.js +2 -2
  86. package/scripts/uninstall.sh +4 -0
  87. package/scripts/verify.sh +16 -1
  88. package/tests/integration/cli-smoke.test.js +103 -0
  89. package/tests/unit/feature_registry.test.js +152 -0
  90. package/tests/unit/prompt_sentinel.test.js +1 -1
  91. package/tests/unit/spec_visual_sync.test.js +1 -1
  92. package/tests/unit/test_generator.test.js +1 -1
  93. package/playwright/e2e/e2e-template.md +0 -4
@@ -0,0 +1,15 @@
1
+ {
2
+ "version": "1.0.0",
3
+ "description": "Schema for features/REGISTRY.md — the canonical machine-readable feature list primitive (L08 Harness Engineering)",
4
+ "changed_at": "2026-06-03T02:37:00Z",
5
+ "required_columns": ["id", "status", "title", "verify_cmd", "skill"],
6
+ "valid_statuses": ["planned", "in-progress", "done", "verified", "deprecated"],
7
+ "notes": "Every feature MUST have a verify_cmd. 'verified' status requires evidence from the last CI run.",
8
+ "changelog": [
9
+ {
10
+ "version": "1.0.0",
11
+ "date": "2026-06-03",
12
+ "change": "Initial schema — establishes feature list as harness primitive per L08"
13
+ }
14
+ ]
15
+ }
@@ -0,0 +1,34 @@
1
+ {
2
+ "version": "1.0.0",
3
+ "description": "Schema for observability/agent-runs/*.json — records of agent execution sessions (L11 Harness Engineering)",
4
+ "changed_at": "2026-06-03T02:37:00Z",
5
+ "required_fields": [
6
+ "session_id",
7
+ "timestamp",
8
+ "skill",
9
+ "phase",
10
+ "outcome",
11
+ "evidence"
12
+ ],
13
+ "field_types": {
14
+ "session_id": "string — unique identifier for the agent session (e.g. UUID or date-prefixed slug)",
15
+ "timestamp": "ISO 8601 string",
16
+ "skill": "string — name of the genesis skill invoked",
17
+ "phase": "string — one of: init, plan, execute, verify, handoff",
18
+ "outcome": "string — one of: success, failure, partial, skipped",
19
+ "evidence": "string — CLI output snippet or file path proving the outcome"
20
+ },
21
+ "optional_fields": {
22
+ "task_id": "string — task item from task.md this run belongs to",
23
+ "duration_ms": "number — wall-clock time of the session",
24
+ "tokens_used": "number — approximate tokens consumed",
25
+ "recovery_needed": "boolean — whether a recovery point was consulted"
26
+ },
27
+ "changelog": [
28
+ {
29
+ "version": "1.0.0",
30
+ "date": "2026-06-03",
31
+ "change": "Initial schema — establishes observability as first-class harness artifact per L11"
32
+ }
33
+ ]
34
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "version": "1.0.0",
3
+ "description": "Schema for observability/failures/*.json — records agent failures for post-mortem analysis (L11 Harness Engineering)",
4
+ "changed_at": "2026-06-03T02:37:00Z",
5
+ "required_fields": [
6
+ "failure_id",
7
+ "timestamp",
8
+ "skill",
9
+ "phase",
10
+ "error_type",
11
+ "error_message",
12
+ "recovery_action"
13
+ ],
14
+ "field_types": {
15
+ "failure_id": "string — unique id for the failure event",
16
+ "timestamp": "ISO 8601 string",
17
+ "skill": "string — skill that was active when failure occurred",
18
+ "phase": "string — one of: init, plan, execute, verify, handoff",
19
+ "error_type": "string — one of: assertion, timeout, permission, contract_violation, scope_overreach, under_finish",
20
+ "error_message": "string — exact CLI error output or description",
21
+ "recovery_action": "string — what was done to recover (e.g. 'reverted via git checkout', 'resumed from RECOVERY_POINTS.md')"
22
+ },
23
+ "optional_fields": {
24
+ "session_id": "string — cross-reference to agent-runs/ entry",
25
+ "root_cause": "string — 5-why analysis result",
26
+ "prevention": "string — what harness change prevents recurrence"
27
+ },
28
+ "changelog": [
29
+ {
30
+ "version": "1.0.0",
31
+ "date": "2026-06-03",
32
+ "change": "Initial schema — failure observability as harness primitive"
33
+ }
34
+ ]
35
+ }
@@ -0,0 +1,43 @@
1
+ {
2
+ "contract_id": "UI-AUTH-LOGIN",
3
+ "version": "1.0.0",
4
+ "description": "Concrete example of a UI contract for the demo login feature. Defines inputs, visual states, and outputs.",
5
+ "inputs": {
6
+ "data": [
7
+ {
8
+ "name": "email",
9
+ "type": "string",
10
+ "validation": "Valid email format",
11
+ "required": true
12
+ },
13
+ {
14
+ "name": "password",
15
+ "type": "string",
16
+ "validation": "Minimum 8 characters",
17
+ "required": true
18
+ }
19
+ ]
20
+ },
21
+ "states": {
22
+ "initial": "Empty fields, disabled submit button.",
23
+ "valid": "Both fields pass validation, submit button enabled with cyan neon glow.",
24
+ "loading": "Submit button shows spinner, fields disabled.",
25
+ "error": "Error message displayed below fields, fields have red error outline."
26
+ },
27
+ "outputs": {
28
+ "events": [
29
+ {
30
+ "name": "onLoginSubmit",
31
+ "payload": {
32
+ "email": "string",
33
+ "passwordHash": "string"
34
+ }
35
+ },
36
+ {
37
+ "name": "onGoogleSignIn",
38
+ "payload": null
39
+ }
40
+ ]
41
+ },
42
+ "mockup_reference": ".planning/features/auth/mockup-login.png"
43
+ }
@@ -0,0 +1,63 @@
1
+ # Feature Registry
2
+
3
+ > **Nguồn sự thật duy nhất** cho tất cả tính năng của Genesis Codex Harness.
4
+ > Schema: [`contracts/features/registry-schema.json`](../contracts/features/registry-schema.json)
5
+ > **RULE**: Mỗi feature phải có `verify_cmd` — lệnh thực thi xác nhận tính năng hoạt động.
6
+
7
+ ## Status Definitions
8
+
9
+ | Status | Ý nghĩa |
10
+ |---|---|
11
+ | `planned` | Đã xác định scope, chưa implement |
12
+ | `in-progress` | Đang được implement trong phiên hiện tại |
13
+ | `done` | Code xong, chưa chạy verification gate |
14
+ | `verified` | Đã có CLI evidence — verification passed |
15
+ | `deprecated` | Không còn được duy trì |
16
+
17
+ ---
18
+
19
+ ## Feature Table
20
+
21
+ | id | status | title | verify_cmd | skill |
22
+ |---|---|---|---|---|
23
+ | F001 | verified | Skill system — 25 packaged Codex skills | `bash scripts/verify.sh` | genesis-harness |
24
+ | F002 | verified | CLI binary `genesis-harness` với install/uninstall/status/docs | `node tests/integration/cli-smoke.test.js` | genesis-harness |
25
+ | F003 | verified | LeanCTX context budget policy seeding | `bash scripts/run-evals.sh` | genesis-harness |
26
+ | F004 | verified | Beads memory system (remember/recall/forget/prime) | `bash scripts/run-evals.sh` | genesis-harness |
27
+ | F005 | verified | Mermaid VISUAL_GRAPH.md sync gate | `bash scripts/run-evals.sh` | genesis-harness |
28
+ | F006 | verified | docs-gate hook (check-docs-sync.sh) | `node bin/genesis-harness.js docs-gate` | genesis-harness |
29
+ | F007 | verified | PEV Loop enforcement (Plan → Execute → Verify) | `bash scripts/verify.sh` | genesis-harness-engineering |
30
+ | F008 | verified | Contract system (api/agents/events/ui) | `bash scripts/verify.sh` | genesis-api-contract |
31
+ | F009 | verified | TDD workflow (Red → Green → Refactor) | `node tests/unit/feature_registry.test.js` | genesis-test-driven-development |
32
+ | F010 | verified | Verification-before-completion gate | `bash scripts/verify.sh` | genesis-verification-before-completion |
33
+ | F011 | verified | git worktrees isolation for dangerous changes | `bash scripts/verify.sh .codex/skills/genesis-using-git-worktrees` | genesis-using-git-worktrees |
34
+ | F012 | verified | Observability schema + live data (L11) | `node tests/unit/feature_registry.test.js` | genesis-observability-automation |
35
+ | F013 | verified | Feature Registry as harness primitive (L08) | `node tests/unit/feature_registry.test.js` | genesis-harness-engineering |
36
+ | F014 | verified | npm pack / tarball smoke test | `bash scripts/run-evals.sh` | genesis-release |
37
+ | F015 | verified | spec-impact-engine propagation chain | `bash scripts/verify.sh .codex/skills/spec-impact-engine` | spec-impact-engine |
38
+ | F016 | in-progress | Cold-start test automation (L03) | `node scripts/cold-start-check.js` | genesis-harness |
39
+ | F017 | planned | Per-session Time-to-First-Verification KPI (L06) | `node bin/genesis-harness.js status --ttfv` | genesis-harness |
40
+ | F018 | planned | Scope ledger per task (L07) | `bash scripts/check-scope.sh` | genesis-harness |
41
+ | F019 | verified | Demo Feature (Mockup + Contract + E2E) | `npm test` (or npx playwright test) | genesis-harness-engineering |
42
+
43
+ ---
44
+
45
+ ## Verification Evidence (Last Run)
46
+
47
+ > Update this section after each CI run.
48
+
49
+ ```
50
+ Date: 2026-06-03T02:38:00Z
51
+ scripts/verify.sh → verify passed
52
+ scripts/run-evals.sh → evals passed
53
+ feature_registry.test.js → feature_registry tests passed
54
+ ```
55
+
56
+ ---
57
+
58
+ ## Adding a New Feature
59
+
60
+ 1. Add a row to the Feature Table above with a **unique `id`** and a **real `verify_cmd`**
61
+ 2. Set initial status to `planned`
62
+ 3. Update `.codebase/MODULE_INDEX.md` if new module is introduced
63
+ 4. Run `node tests/unit/feature_registry.test.js` — must pass before status → `verified`
@@ -0,0 +1,65 @@
1
+ # SCOPE — [Task Name]
2
+
3
+ > **File**: `.planning/tasks/[task-id]/SCOPE.md`
4
+ > **Purpose**: Hard boundary definition — lists exactly which files this task MAY modify.
5
+ > **Rule**: Agent MUST NOT touch any file not listed below. If a necessary file is missing, update this SCOPE.md first and get confirmation.
6
+
7
+ ---
8
+
9
+ ## Task ID
10
+ `[task-id]` — e.g. `F013-feature-registry`
11
+
12
+ ## Task Description
13
+ [One-sentence description of what this task does]
14
+
15
+ ## Skill
16
+ `[genesis-skill-name]` — the skill governing this task
17
+
18
+ ---
19
+
20
+ ## Permitted File Changes
21
+
22
+ ### ✅ Files this task MAY create or modify
23
+
24
+ ```
25
+ [list each file on its own line, relative to repo root]
26
+ features/REGISTRY.md
27
+ contracts/features/registry-schema.json
28
+ tests/unit/feature_registry.test.js
29
+ .codebase/MODULE_INDEX.md
30
+ scripts/run-evals.sh
31
+ ```
32
+
33
+ ### ❌ Files this task MUST NOT touch
34
+
35
+ ```
36
+ [list critical files that must not be affected]
37
+ .codex/SOUL.md
38
+ AGENTS.md
39
+ package.json (unless adding to 'files' array only)
40
+ ```
41
+
42
+ ### 🟡 Files requiring explicit confirmation before touching
43
+
44
+ ```
45
+ [files that might need updating but require user review first]
46
+ scripts/verify.sh
47
+ .codebase/state.json
48
+ ```
49
+
50
+ ---
51
+
52
+ ## Scope Boundary Rationale
53
+
54
+ [Why these boundaries? What would happen if the agent went outside them?]
55
+
56
+ Example: "Restricting to `features/` and `contracts/features/` prevents accidental changes to the core verification loop while adding the registry primitive."
57
+
58
+ ---
59
+
60
+ ## Scope Check Command
61
+
62
+ ```bash
63
+ # Verify no out-of-scope files were modified:
64
+ bash scripts/check-scope.sh .planning/tasks/[task-id]/SCOPE.md
65
+ ```
@@ -0,0 +1,16 @@
1
+ # Mockup Generation Instruction
2
+
3
+ **Mục đích:** Bắt buộc AI Agent phải tạo ra bản nháp giao diện (Mockup) trực quan trước khi bắt đầu code bất kỳ file UI nào.
4
+
5
+ ## Chỉ thị cho AI Agent:
6
+ Khi người dùng yêu cầu thiết kế một tính năng mới có giao diện người dùng (UI):
7
+ 1. **Dừng viết code ngay lập tức.** Không tạo file `.tsx`, `.html`, `.css` nào.
8
+ 2. Dùng công cụ `generate_image` (nếu có) để tạo ra một bản thiết kế UI Mockup dựa trên mô tả của người dùng.
9
+ - **Prompt cho generate_image:** Cần miêu tả chi tiết: Bố cục (Layout), Màu sắc (Colors), Nút bấm (Buttons), Trạng thái (States), và Phong cách (Style - ví dụ: Glassmorphism, Dark mode, Minimalist).
10
+ 3. Sau khi ảnh được tạo thành công trong thư mục Artifacts:
11
+ - Hãy dùng lệnh terminal để copy file ảnh đó vào thư mục `.planning/features/<tên-tính-năng>/mockup.png`.
12
+ - Ví dụ: `mkdir -p .planning/features/auth && cp <đường-dẫn-ảnh-từ-artifact> .planning/features/auth/mockup.png`
13
+ 4. Cập nhật `features/REGISTRY.md` để map tính năng này với file mockup vừa lưu.
14
+ 5. Chỉ khi người dùng **phê duyệt (Approve)** ảnh Mockup đó, bạn mới được phép bắt đầu viết code Frontend.
15
+
16
+ > **Lý do:** Harness tuân thủ PEV Loop (Plan -> Execute -> Verify). Việc code UI mù mờ không có mockup được xem là vi phạm bước Plan.
@@ -0,0 +1,13 @@
1
+ {
2
+ "session_id": "2026-06-03-harness-engineering-L08-L11",
3
+ "timestamp": "2026-06-03T02:37:00Z",
4
+ "skill": "genesis-harness-engineering",
5
+ "phase": "execute",
6
+ "outcome": "success",
7
+ "evidence": "scripts/verify.sh exits 0; scripts/run-evals.sh exits 0; node tests/unit/feature_registry.test.js -> 'feature_registry tests passed'",
8
+ "task_id": "P1.1-feature-registry + P1.2-observability-live",
9
+ "duration_ms": 120000,
10
+ "tokens_used": 0,
11
+ "recovery_needed": false,
12
+ "_note": "This is the bootstrap sample demonstrating the observability format. Future agent runs should append new JSON files to this directory using the same schema (contracts/observability/agent-run-schema.json)."
13
+ }
@@ -0,0 +1,43 @@
1
+ # Decision: Feature Registry as Harness Primitive (L08)
2
+
3
+ **Date**: 2026-06-03
4
+ **Session**: `2026-06-03-harness-engineering-L08-L11`
5
+ **Skill**: `genesis-harness-engineering`
6
+
7
+ ## Decision
8
+
9
+ Establish `features/REGISTRY.md` as the **single machine-readable source of truth** for all project features. Each feature entry must include: unique `id`, `status`, `title`, `verify_cmd` (executable verification command), and owning `skill`.
10
+
11
+ ## Reason
12
+
13
+ **Harness Engineering Lecture 08** identifies the feature list as a "harness primitive" — not human prose, but an executable record the harness can validate. The previous state had features scattered across `ROADMAP.md` (prose) and `EVOLUTION_PLAN.md` (markdown narrative), with no per-feature verification command and no machine-readable status.
14
+
15
+ This meant:
16
+ 1. An agent couldn't know which features were truly "verified" vs. "claimed done"
17
+ 2. No CI gate could enforce feature status transitions
18
+ 3. The harness couldn't generate per-feature test evidence
19
+
20
+ The fix closes this gap by creating a structured registry that `run-evals.sh` can parse and `feature_registry.test.js` can validate.
21
+
22
+ ## Rejected Options
23
+
24
+ - **Option A**: Use a JSON file instead of Markdown table
25
+ *Rejected*: Markdown table is both human-readable and parseable. Keeps the "docs are code" principle without sacrificing discoverability.
26
+
27
+ - **Option B**: Use GitHub Issues as the feature list
28
+ *Rejected*: Violates L03 (repo is the single source of truth). External systems cannot be the harness primitive.
29
+
30
+ - **Option C**: Derive the list from `.planning/ROADMAP.md` automatically
31
+ *Rejected*: ROADMAP.md is narrative; auto-parsing prose is fragile. Explicit registry is safer.
32
+
33
+ ## Verification
34
+
35
+ ```
36
+ node tests/unit/feature_registry.test.js
37
+ → feature_registry tests passed
38
+
39
+ scripts/run-evals.sh (L08 gate section)
40
+ → evals passed
41
+ ```
42
+
43
+ Both gates pass with exit code 0 after implementation. Failure record before fix: `observability/failures/sample-failure.json`.
@@ -0,0 +1,12 @@
1
+ {
2
+ "failure_id": "2026-06-03-pre-fix-L08-missing-registry",
3
+ "timestamp": "2026-06-03T02:36:50Z",
4
+ "skill": "genesis-harness-engineering",
5
+ "phase": "verify",
6
+ "error_type": "assertion",
7
+ "error_message": "AssertionError: L08: features/REGISTRY.md must exist as machine-readable feature primitive",
8
+ "recovery_action": "Created features/REGISTRY.md + contracts/features/registry-schema.json + observability schemas as implementation fix",
9
+ "session_id": "2026-06-03-harness-engineering-L08-L11",
10
+ "root_cause": "Feature list existed only as human-readable prose in ROADMAP.md. No machine-readable canonical source of truth for feature status existed (violates L08 principle).",
11
+ "prevention": "Added verify gate in run-evals.sh that checks REGISTRY.md presence + schema validity. Test now in tests/unit/feature_registry.test.js ensures regression cannot re-occur."
12
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codex-genesis-harness",
3
- "version": "0.1.7",
3
+ "version": "0.1.8",
4
4
  "description": "Hệ thống quản trị (Harness) dành cho AI Agent (Codex) với cơ chế FSM State Persistence, Validation Gates và Test-First Workflow. Đảm bảo agent không bị trôi context.",
5
5
  "license": "MIT",
6
6
  "bin": {
@@ -15,13 +15,15 @@
15
15
  "test:gen": "node scripts/test_generator.js",
16
16
  "sentinel": "node scripts/prompt_sentinel.js",
17
17
  "integrity": "node scripts/contract_integrity_gate.js",
18
- "telemetry": "node scripts/healing_telemetry.js"
18
+ "telemetry": "node scripts/healing_telemetry.js",
19
+ "mcp:setup": "node bin/genesis-harness.js mcp"
19
20
  },
20
21
  "files": [
21
22
  ".codex-plugin",
22
23
  ".codex/skills",
23
24
  ".codebase",
24
25
  "contracts",
26
+ "features",
25
27
  "fixtures",
26
28
  "tests",
27
29
  "playwright",
@@ -64,5 +66,9 @@
64
66
  "funding": {
65
67
  "type": "momo",
66
68
  "url": "tel:0865814259"
69
+ },
70
+ "dependencies": {
71
+ "@babel/parser": "^7.29.7",
72
+ "@babel/traverse": "^7.29.7"
67
73
  }
68
- }
74
+ }
@@ -0,0 +1,37 @@
1
+ const { test, expect, devices } = require('@playwright/test');
2
+
3
+ /**
4
+ * MOBILE APP E2E TEST TEMPLATE
5
+ * Dành cho các ứng dụng Mobile App (React Native Web, PWA, hoặc Mobile Viewport testing)
6
+ * Lưu ý: Nếu test Native App thực sự (iOS/Android), dự án cần chuyển sang dùng Appium hoặc Detox.
7
+ * File này dùng Playwright Mobile Emulation để test App logic trên trình duyệt di động.
8
+ */
9
+ test.describe('Mobile App Feature Flow', () => {
10
+ // Use a mobile device profile for emulation
11
+ test.use({ ...devices['iPhone 13'] });
12
+
13
+ test.beforeEach(async ({ page }) => {
14
+ // Replace with the actual URL of the Mobile Web / React Native Web server
15
+ await page.goto('http://localhost:8081');
16
+ });
17
+
18
+ test('should render mobile-specific layout (Hamburger menu)', async ({ page }) => {
19
+ // In mobile view, the hamburger menu should be visible instead of desktop navbar
20
+ const hamburgerBtn = page.locator('[aria-label="Open Menu"]');
21
+ await expect(hamburgerBtn).toBeVisible();
22
+
23
+ // Tap the menu
24
+ await hamburgerBtn.click();
25
+ await expect(page.locator('.mobile-drawer')).toBeVisible();
26
+ });
27
+
28
+ test('should support touch interactions and swipe', async ({ page }) => {
29
+ // Simulating touch actions on a mobile carousel or list
30
+ const listItem = page.locator('.list-item').first();
31
+ await expect(listItem).toBeVisible();
32
+
33
+ // Playwright touch simulation (if applicable to the web-mobile app)
34
+ await listItem.tap();
35
+ await expect(page.locator('.item-details')).toBeVisible();
36
+ });
37
+ });
@@ -0,0 +1,65 @@
1
+ const { test, expect } = require('@playwright/test');
2
+
3
+ /**
4
+ * Concrete E2E Test Example for Demo Login Feature
5
+ * Contract Reference: contracts/ui/auth/login-screen-contract.json
6
+ * Mockup Reference: .planning/features/auth/mockup-login.png
7
+ */
8
+ test.describe('UI-AUTH-LOGIN Contract Implementation', () => {
9
+ test.beforeEach(async ({ page }) => {
10
+ // Navigate to the demo app's login page (replace with actual dev server URL in real projects)
11
+ // For this harness template, we intercept the route to mock a UI
12
+ await page.route('**/login', route => {
13
+ route.fulfill({
14
+ status: 200,
15
+ contentType: 'text/html',
16
+ body: `
17
+ <html>
18
+ <body>
19
+ <form id="login-form">
20
+ <input type="email" id="email" required />
21
+ <input type="password" id="password" required minlength="8" />
22
+ <button type="submit" id="sign-in-btn" disabled>Sign In</button>
23
+ <button type="button" id="google-btn">Sign in with Google</button>
24
+ </form>
25
+ <div id="error-msg" style="display: none;"></div>
26
+ <script>
27
+ const email = document.getElementById('email');
28
+ const pwd = document.getElementById('password');
29
+ const btn = document.getElementById('sign-in-btn');
30
+ const checkValid = () => {
31
+ btn.disabled = !(email.value.includes('@') && pwd.value.length >= 8);
32
+ };
33
+ email.addEventListener('input', checkValid);
34
+ pwd.addEventListener('input', checkValid);
35
+ </script>
36
+ </body>
37
+ </html>
38
+ `
39
+ });
40
+ });
41
+
42
+ await page.goto('http://localhost:3000/login');
43
+ });
44
+
45
+ test('Initial state: fields are empty and submit button is disabled', async ({ page }) => {
46
+ const emailInput = page.locator('#email');
47
+ const pwdInput = page.locator('#password');
48
+ const submitBtn = page.locator('#sign-in-btn');
49
+
50
+ await expect(emailInput).toBeEmpty();
51
+ await expect(pwdInput).toBeEmpty();
52
+ await expect(submitBtn).toBeDisabled();
53
+ });
54
+
55
+ test('Valid state: entering valid email and password enables submit button', async ({ page }) => {
56
+ const emailInput = page.locator('#email');
57
+ const pwdInput = page.locator('#password');
58
+ const submitBtn = page.locator('#sign-in-btn');
59
+
60
+ await emailInput.fill('user@example.com');
61
+ await pwdInput.fill('securepassword123');
62
+
63
+ await expect(submitBtn).toBeEnabled();
64
+ });
65
+ });
@@ -0,0 +1,28 @@
1
+ const { test, expect } = require('@playwright/test');
2
+
3
+ /**
4
+ * WEB E2E TEST TEMPLATE
5
+ * Dành cho các dự án Web App (Next.js, React, Vue, v.v.)
6
+ */
7
+ test.describe('Web App Feature Flow', () => {
8
+ test.beforeEach(async ({ page }) => {
9
+ // Replace with the actual URL or local dev server of the Web App
10
+ await page.goto('http://localhost:3000');
11
+ });
12
+
13
+ test('should display the main Web UI component', async ({ page }) => {
14
+ // Example: verify a specific web element is visible
15
+ const mainHeading = page.locator('h1');
16
+ await expect(mainHeading).toBeVisible();
17
+ await expect(mainHeading).toHaveText('Welcome to Web App');
18
+ });
19
+
20
+ test('should handle web form submission', async ({ page }) => {
21
+ // Example: fill out a web form and submit
22
+ await page.fill('input[name="username"]', 'testuser');
23
+ await page.click('button[type="submit"]');
24
+
25
+ // Verify success state
26
+ await expect(page.locator('.success-message')).toBeVisible();
27
+ });
28
+ });
@@ -0,0 +1,100 @@
1
+ #!/usr/bin/env bash
2
+ # check-scope.sh — L07 Scope Ledger Enforcement
3
+ # Usage: bash scripts/check-scope.sh [SCOPE.md path] [optional: git diff base]
4
+ #
5
+ # Reads a SCOPE.md file and verifies that all modified files (from git diff)
6
+ # are within the permitted boundaries defined in the scope ledger.
7
+ #
8
+ # Exit codes:
9
+ # 0 = all changes within scope
10
+ # 1 = out-of-scope changes detected
11
+ # 2 = SCOPE.md not found or invalid
12
+
13
+ set -euo pipefail
14
+
15
+ SCOPE_FILE="${1:-}"
16
+ GIT_BASE="${2:-HEAD}"
17
+ repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
18
+
19
+ fail() {
20
+ echo "scope-check FAIL: $*" >&2
21
+ exit 1
22
+ }
23
+
24
+ warn() {
25
+ echo "scope-check WARN: $*" >&2
26
+ }
27
+
28
+ if [ -z "$SCOPE_FILE" ]; then
29
+ echo "Usage: bash scripts/check-scope.sh <SCOPE.md path> [git-base]"
30
+ echo " If no SCOPE.md is provided, scope check is advisory only."
31
+ echo "scope-check: no scope file provided — skipping (advisory mode)"
32
+ exit 0
33
+ fi
34
+
35
+ if [ ! -f "$SCOPE_FILE" ]; then
36
+ echo "scope-check: SCOPE.md not found at '$SCOPE_FILE' — using advisory mode (no hard boundary)"
37
+ echo "scope-check: Create '$SCOPE_FILE' from features/SCOPE-template.md to enable enforcement."
38
+ exit 0
39
+ fi
40
+
41
+ echo "scope-check: Enforcing boundaries from '$SCOPE_FILE'..."
42
+
43
+ # Extract permitted files section (lines between "## Permitted File Changes" and next ##)
44
+ PERMITTED=$(awk '/^### ✅ Files this task MAY create or modify/{found=1; next} /^### /{found=0} found && /^[a-zA-Z_.\/-]/{print $0}' "$SCOPE_FILE")
45
+
46
+ if [ -z "$PERMITTED" ]; then
47
+ warn "No permitted files extracted from '$SCOPE_FILE' — check format. Skipping enforcement."
48
+ exit 0
49
+ fi
50
+
51
+ # Get list of changed files
52
+ if git rev-parse --git-dir > /dev/null 2>&1; then
53
+ CHANGED=$(git diff --name-only "$GIT_BASE" 2>/dev/null || git status --porcelain | awk '{print $2}')
54
+ else
55
+ echo "scope-check: not inside a git repo — using git status fallback"
56
+ CHANGED=$(git status --porcelain 2>/dev/null | awk '{print $2}' || echo "")
57
+ fi
58
+
59
+ if [ -z "$CHANGED" ]; then
60
+ echo "scope-check: no changed files detected — scope check trivially passes"
61
+ exit 0
62
+ fi
63
+
64
+ OUT_OF_SCOPE=()
65
+ while IFS= read -r changed_file; do
66
+ [ -z "$changed_file" ] && continue
67
+ IN_SCOPE=false
68
+ while IFS= read -r permitted; do
69
+ [ -z "$permitted" ] && continue
70
+ # Check if changed file matches permitted entry (exact or prefix)
71
+ if [[ "$changed_file" == "$permitted" ]] || [[ "$changed_file" == "$permitted"* ]]; then
72
+ IN_SCOPE=true
73
+ break
74
+ fi
75
+ done <<< "$PERMITTED"
76
+ if [ "$IN_SCOPE" = false ]; then
77
+ OUT_OF_SCOPE+=("$changed_file")
78
+ fi
79
+ done <<< "$CHANGED"
80
+
81
+ if [ ${#OUT_OF_SCOPE[@]} -gt 0 ]; then
82
+ echo "scope-check FAIL: The following files are outside the permitted scope:"
83
+ for f in "${OUT_OF_SCOPE[@]}"; do
84
+ echo " ❌ $f"
85
+ done
86
+ echo ""
87
+ echo "To expand scope: edit '$SCOPE_FILE' and add the file to '## Permitted File Changes'."
88
+ echo "To override: add the file to '🟡 Files requiring explicit confirmation' and get user approval."
89
+
90
+ if [ "${VIBE_MODE:-0}" = "1" ]; then
91
+ echo "scope-check WARN: VIBE_MODE is active. Bypassing fatal blocker."
92
+ echo "- [$(date -u +"%Y-%m-%dT%H:%M:%SZ")] VIBE_MODE Bypass: ${#OUT_OF_SCOPE[@]} files modified out of scope. Files: ${OUT_OF_SCOPE[*]}" >> "$repo_root/.codebase/TECH_DEBT.md"
93
+ exit 0
94
+ fi
95
+
96
+ exit 1
97
+ fi
98
+
99
+ echo "scope-check passed: all ${#OUT_OF_SCOPE[@]} changed files are within scope"
100
+ echo "scope-check: $(echo "$CHANGED" | wc -l | tr -d ' ') files changed, all permitted"