npm - codeforge-dev - Versions diffs - 1.9.0 → 1.10.0 - Mend

codeforge-dev 1.9.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/.devcontainer/plugins/devs-marketplace/plugins/code-directive/scripts/advisory-test-runner.py CHANGED Viewed

@@ -2,9 +2,10 @@
 """
 Advisory test runner — Stop hook that injects test results as context.
-Detects the project's test framework and runs the test suite. Results are
-returned as additionalContext so Claude sees pass/fail info without being
-blocked. If tests fail, Claude's next response will naturally address them.
+Reads the list of files edited this session (written by collect-edited-files.py),
+maps them to affected test files, and runs only those tests. Skips entirely
+if no files were edited. Results are returned as additionalContext so Claude
+sees pass/fail info without being blocked.
 Reads hook input from stdin (JSON). Returns JSON on stdout.
 Always exits 0 (advisory, never blocking).
@@ -15,15 +16,37 @@ import os
 import subprocess
 import sys
+TIMEOUT_SECONDS = 15
+def get_edited_files(session_id: str) -> list[str]:
+    """Read the list of files edited this session.
+    Relies on collect-edited-files.py writing paths to a temp file.
+    Returns deduplicated list of paths that still exist on disk.
+    """
+    tmp_path = f"/tmp/claude-edited-files-{session_id}"
+    try:
+        with open(tmp_path, "r") as f:
+            raw = f.read()
+    except OSError:
+        return []
+    seen: set[str] = set()
+    result: list[str] = []
+    for line in raw.strip().splitlines():
+        path = line.strip()
+        if path and path not in seen and os.path.isfile(path):
+            seen.add(path)
+            result.append(path)
+    return result
 def detect_test_framework(cwd: str) -> tuple[str, list[str]]:
     """Detect which test framework is available in the project.
-    Checks for: pytest, vitest, jest, mocha, go test, cargo test.
-    Falls back to npm test if a test script is defined.
     Returns:
-        Tuple of (framework_name, command_list) or ("", []) if none found.
+        Tuple of (framework_name, base_command) or ("", []) if none found.
     """
     try:
         entries = set(os.listdir(cwd))
@@ -102,7 +125,7 @@ def detect_test_framework(cwd: str) -> tuple[str, list[str]]:
     # --- Go ---
     if "go.mod" in entries:
-        return ("go", ["go", "test", "./...", "-count=1"])
+        return ("go", ["go", "test", "-count=1"])
     # --- Rust ---
     if "Cargo.toml" in entries:
@@ -111,6 +134,139 @@ def detect_test_framework(cwd: str) -> tuple[str, list[str]]:
     return ("", [])
+def resolve_pytest_tests(edited_files: list[str], cwd: str) -> tuple[list[str], bool]:
+    """Map edited Python files to their corresponding pytest test files.
+    Returns:
+        (test_files, run_all) — if run_all is True, run the whole suite
+        (e.g. conftest.py was edited).
+    """
+    test_files: list[str] = []
+    for path in edited_files:
+        if not path.endswith(".py"):
+            continue
+        basename = os.path.basename(path)
+        # conftest changes can affect anything — run full suite
+        if basename == "conftest.py":
+            return ([], True)
+        # Already a test file — include directly
+        if basename.startswith("test_") or "/tests/" in path:
+            if os.path.isfile(path):
+                test_files.append(path)
+            continue
+        # Map source → test via directory mirroring
+        # e.g. src/engine/db/sessions.py → tests/engine/db/test_sessions.py
+        # e.g. src/engine/api/routes/github.py → tests/engine/api/test_routes_github.py
+        rel = os.path.relpath(path, cwd)
+        parts = rel.split(os.sep)
+        # Strip leading "src/" if present
+        if parts and parts[0] == "src":
+            parts = parts[1:]
+        if not parts:
+            continue
+        module = parts[-1]  # e.g. "sessions.py"
+        module_name = module.removesuffix(".py")
+        parent_parts = parts[:-1]  # e.g. ["engine", "db"]
+        # Standard mapping: tests/<parent>/test_<module>.py
+        test_path = os.path.join(cwd, "tests", *parent_parts, f"test_{module_name}.py")
+        if os.path.isfile(test_path):
+            test_files.append(test_path)
+            continue
+        # Routes mapping: src/engine/api/routes/github.py
+        # → tests/engine/api/test_routes_github.py
+        if len(parent_parts) >= 2 and parent_parts[-1] == "routes":
+            route_test = os.path.join(
+                cwd,
+                "tests",
+                *parent_parts[:-1],
+                f"test_routes_{module_name}.py",
+            )
+            if os.path.isfile(route_test):
+                test_files.append(route_test)
+    # Deduplicate while preserving order
+    seen: set[str] = set()
+    unique: list[str] = []
+    for t in test_files:
+        if t not in seen:
+            seen.add(t)
+            unique.append(t)
+    return (unique, False)
+def resolve_affected_tests(
+    edited_files: list[str], cwd: str, framework: str
+) -> tuple[list[str], bool]:
+    """Resolve edited files to framework-specific test arguments.
+    Returns:
+        (extra_args, run_all) — extra_args to append to the base command.
+        If run_all is True, run the whole suite (no extra args needed).
+        If extra_args is empty and run_all is False, skip testing entirely.
+    """
+    if framework == "pytest":
+        test_files, run_all = resolve_pytest_tests(edited_files, cwd)
+        return (test_files, run_all)
+    if framework == "vitest":
+        # vitest --related does dep-graph analysis natively
+        source_files = [
+            f
+            for f in edited_files
+            if not f.endswith(
+                (".md", ".json", ".yaml", ".yml", ".toml", ".txt", ".css")
+            )
+        ]
+        if not source_files:
+            return ([], False)
+        return (["--related"] + source_files, False)
+    if framework == "jest":
+        source_files = [
+            f
+            for f in edited_files
+            if not f.endswith(
+                (".md", ".json", ".yaml", ".yml", ".toml", ".txt", ".css")
+            )
+        ]
+        if not source_files:
+            return ([], False)
+        return (["--findRelatedTests"] + source_files, False)
+    if framework == "go":
+        # Map edited .go files to their package directories
+        pkgs: set[str] = set()
+        for path in edited_files:
+            if path.endswith(".go"):
+                pkg_dir = os.path.dirname(path)
+                rel = os.path.relpath(pkg_dir, cwd)
+                pkgs.add(f"./{rel}")
+        if not pkgs:
+            return ([], False)
+        return (sorted(pkgs), False)
+    # cargo, mocha, npm-test — no granular selection, run full suite
+    code_files = [
+        f
+        for f in edited_files
+        if not f.endswith((".md", ".json", ".yaml", ".yml", ".toml", ".txt"))
+    ]
+    if not code_files:
+        return ([], False)
+    return ([], True)
 def main():
     try:
         input_data = json.load(sys.stdin)
@@ -121,34 +277,51 @@ def main():
     if input_data.get("stop_hook_active"):
         sys.exit(0)
+    session_id = input_data.get("session_id", "")
+    if not session_id:
+        sys.exit(0)
+    # No files edited this session — nothing to test
+    edited_files = get_edited_files(session_id)
+    if not edited_files:
+        sys.exit(0)
     cwd = os.getcwd()
-    framework, cmd = detect_test_framework(cwd)
+    framework, base_cmd = detect_test_framework(cwd)
     if not framework:
         sys.exit(0)
+    extra_args, run_all = resolve_affected_tests(edited_files, cwd, framework)
+    # No affected tests and not a run-all situation — skip
+    if not extra_args and not run_all:
+        sys.exit(0)
+    cmd = base_cmd + extra_args
     try:
         result = subprocess.run(
             cmd,
             cwd=cwd,
             capture_output=True,
             text=True,
-            timeout=60,
+            timeout=TIMEOUT_SECONDS,
         )
     except subprocess.TimeoutExpired:
         json.dump(
-            {"additionalContext": f"[Tests] {framework} timed out after 60s"},
+            {
+                "additionalContext": f"[Tests] {framework} timed out after {TIMEOUT_SECONDS}s"
+            },
             sys.stdout,
         )
         sys.exit(0)
     except (FileNotFoundError, OSError):
-        # Test runner not installed or not accessible
         sys.exit(0)
     output = (result.stdout + "\n" + result.stderr).strip()
     if result.returncode == 0:
-        # Extract test count from output if possible
         json.dump(
             {"additionalContext": f"[Tests] All tests passed ({framework})"},
             sys.stdout,

package/.devcontainer/plugins/devs-marketplace/plugins/code-directive/scripts/spec-reminder.py CHANGED Viewed

@@ -110,7 +110,8 @@ def main():
         f"[Spec Reminder] Code was modified in {dirs_str} "
         "but no specs were updated. "
         "Use /spec-update to update the relevant spec, "
-        "or /spec-new if no spec exists for this feature."
+        "/spec-new if no spec exists for this feature, "
+        "or /spec-refine if the spec is still in draft status."
     )
     json.dump({"additionalContext": message}, sys.stdout)

package/.devcontainer/plugins/devs-marketplace/plugins/code-directive/skills/documentation-patterns/SKILL.md CHANGED Viewed

@@ -65,7 +65,7 @@ Development setup, how to run tests, how to submit changes. Link to CONTRIBUTING
 ## Sizing Rules
-Documentation files consumed by AI tools (CLAUDE.md, specs, architecture docs) should be **≤200 lines** each. Split large documents by concern. Each file should be independently useful.
+Documentation files consumed by AI tools (CLAUDE.md, specs, architecture docs) should aim for **~200 lines** each. Split large documents by concern when practical. Each file should be independently useful.
 For human-facing docs (README, API reference), there is no hard limit, but prefer shorter docs that link to detailed sub-pages over monolithic documents.

package/.devcontainer/plugins/devs-marketplace/plugins/code-directive/skills/spec-check/SKILL.md CHANGED Viewed

@@ -38,9 +38,11 @@ For each spec file, extract:
 - **Version** from the `**Version:**` field
 - **Status** from the `**Status:**` field
 - **Last Updated** from the `**Last Updated:**` field
+- **Approval** from the `**Approval:**` field (default `draft` if missing)
 - **Line count** (wc -l)
 - **Sections present** — check for each required section header
 - **Acceptance criteria** — count total, count checked `[x]`
+- **Requirements** — count total, count `[assumed]`, count `[user-approved]`
 - **Discrepancies** — check if section has content
 ### Step 3: Flag Issues
@@ -49,13 +51,17 @@ For each spec, check these conditions:
 | Issue | Condition | Severity |
 |-------|-----------|----------|
+| **Unapproved** | Approval is `draft` or missing | High |
+| **Assumed requirements** | Has requirements tagged `[assumed]` | Medium |
 | **Stale** | Status is `planned` but Last Updated is >30 days ago | High |
 | **Incomplete** | Missing required sections (Intent, Acceptance Criteria, Key Files, Requirements, Out of Scope) | High |
-| **Oversized** | Exceeds 200 lines | Medium |
+| **Long spec** | Exceeds ~200 lines — consider splitting | Info |
 | **No criteria** | Acceptance Criteria section is empty or has no checkboxes | High |
 | **Open discrepancies** | Discrepancies section has content | Medium |
 | **Missing as-built** | Status is `implemented` but Implementation Notes is empty | Medium |
 | **Stale paths** | Key Files references paths that don't exist | Low |
+| **Draft + implemented** | Status is `implemented` but Approval is `draft` — approval gate was bypassed | High |
+| **Inconsistent approval** | Approval is `user-approved` but spec has `[assumed]` requirements | High |
 ### Step 4: Report
@@ -64,11 +70,11 @@ Output a summary table:
 ```
 ## Spec Health Report
-| Feature | Version | Status | Updated | Lines | Issues |
-|---------|---------|--------|---------|-------|--------|
-| Session History | v0.2.0 | implemented | 2026-02-08 | 74 | None |
-| Auth Flow | v0.3.0 | planned | 2026-01-15 | 45 | Stale (26 days) |
-| Settings Page | v0.2.0 | partial | 2026-02-05 | 210 | Oversized |
+| Feature | Version | Status | Approval | Updated | Lines | Issues |
+|---------|---------|--------|----------|---------|-------|--------|
+| Session History | v0.2.0 | implemented | user-approved | 2026-02-08 | 74 | None |
+| Auth Flow | v0.3.0 | planned | draft | 2026-01-15 | 45 | Unapproved, Stale (26 days) |
+| Settings Page | v0.2.0 | partial | draft | 2026-02-05 | 210 | Unapproved, Long spec |
 ## Issues Found
@@ -76,11 +82,17 @@ Output a summary table:
 - **Auth Flow** (`.specs/v0.3.0/auth-flow.md`): Status is `planned` but last updated 26 days ago. Either implementation is stalled or the spec needs an as-built update.
 ### Medium Priority
-- **Settings Page** (`.specs/v0.2.0/settings-page.md`): 210 lines exceeds the 200-line limit. Split into sub-specs.
+- **Settings Page** (`.specs/v0.2.0/settings-page.md`): 210 lines — consider splitting into sub-specs for easier consumption.
 ### Suggested Actions
-1. Run `/spec-update auth-flow` to update the auth flow spec
-2. Split settings-page.md into sub-specs
+1. Run `/spec-refine auth-flow` to validate assumptions and get user approval
+2. Run `/spec-update auth-flow` to update the auth flow spec
+3. Split settings-page.md into sub-specs
+### Approval Summary
+- **User-approved:** 1 spec
+- **Draft (needs /spec-refine):** 2 specs
+- **Assumed requirements across all specs:** 8
 ```
-If no issues are found, report: "All specs healthy. N specs across M versions."
+If no issues are found, report: "All specs healthy. N specs across M versions. All user-approved."

package/.devcontainer/plugins/devs-marketplace/plugins/code-directive/skills/spec-init/SKILL.md CHANGED Viewed

@@ -67,18 +67,20 @@ Created:
 - `.specs/BACKLOG.md` — deferred items list
 Next steps:
-- Use `/spec-new <feature-name> <version>` to create your first feature spec
+- Add features to `BACKLOG.md` with priority grades (P0–P3)
+- Pull features into a version in `ROADMAP.md` when ready to scope
+- Use `/spec-new <feature-name> <version>` to create a spec
+- Use `/spec-refine <feature-name>` to validate before implementation
+- After implementing, use `/spec-update` to close the loop
 - Use `/spec-check` to audit spec health at any time
 ```
 ---
-## Hard Constraints
+## Constraints
 - **Never overwrite** an existing `.specs/` directory or its contents.
-- **ROADMAP.md** must stay under 30 lines (it's a summary, not a plan document).
-- **BACKLOG.md** must stay under 15 lines (it grows as items are added).
-- Templates are starting points — the user will extend them.
+- Templates are starting points — the user will extend them as the project grows.
 ---

package/.devcontainer/plugins/devs-marketplace/plugins/code-directive/skills/spec-init/references/backlog-template.md CHANGED Viewed

@@ -1,7 +1,23 @@
 # Backlog
-Deferred items not yet scheduled for a version.
+Priority-graded feature and infrastructure backlog. Items are pulled into versions when ready to scope and spec. See `ROADMAP.md` for the versioning workflow.
-## Items
+## P0 — High Priority
-- [ ] [Item description] — [context/rationale]
+- [ ] [Feature] — [Description]
+## P1 — Important
+- [ ] [Feature] — [Description]
+## P2 — Desired
+- [ ] [Feature] — [Description]
+## P3 — Nice to Have
+- [ ] [Feature] — [Description]
+## Infrastructure & CI
+- [ ] [Item] — [Description]

package/.devcontainer/plugins/devs-marketplace/plugins/code-directive/skills/spec-init/references/roadmap-template.md CHANGED Viewed

@@ -1,13 +1,33 @@
 # Roadmap
-What each version delivers and why.
+> Features live in the priority-graded backlog until pulled into a version.
+> Versions are scoped and spec'd when ready to build — not pre-assigned.
+> See `BACKLOG.md` for the feature backlog.
-| Version | Status | Summary |
-|---------|--------|---------|
-| v0.1.0  | planned | [Initial release — describe core features] |
+## How Versioning Works
-## Versioning
+1. **Backlog** — All desired features live in `BACKLOG.md`, graded by priority.
+2. **Version scoping** — When ready to start a new version, pull features from the backlog.
+3. **Spec first** — Each feature in a version gets a spec before implementation begins.
+4. **Ship** — Version is done when all its specs are implemented and verified.
-- **Major**: Breaking changes to public APIs or data models
-- **Minor**: New features, non-breaking changes
-- **Patch**: Bug fixes, documentation, internal refactors
+Only the **next version** is defined in detail. Everything else is backlog.
+## Released
+_None yet._
+## Current
+### v0.1.0 — [Name] 🔧
+- [ ] [Feature pulled from backlog]
+- [ ] [Feature pulled from backlog]
+## Next
+> Scoped from `BACKLOG.md` when current version is complete.
+## Out of Scope
+- [Items explicitly not planned]

package/.devcontainer/plugins/devs-marketplace/plugins/code-directive/skills/spec-new/SKILL.md CHANGED Viewed

@@ -14,7 +14,7 @@ version: 0.1.0
 A specification is a contract between the person requesting a feature and the person building it. Writing the spec BEFORE implementation forces you to think through edge cases, acceptance criteria, and scope boundaries while changes are cheap — before any code exists.
-Every project uses `.specs/` as the specification directory. Specs are version-organized, independently loadable, and capped at 200 lines.
+Every project uses `.specs/` as the specification directory. Specs are version-organized, independently loadable, and should aim for ~200 lines.
 ---
@@ -30,6 +30,8 @@ If arguments are missing, ask the user for:
 1. Feature name (what is being built)
 2. Target version (which release this belongs to)
+**Note:** Features should be pulled from the project's backlog (`BACKLOG.md`) into a version before creating a spec. If the feature isn't in the backlog yet, add it first, then assign it to a version.
 ### Step 2: Determine File Path
 - **Multi-feature version** (directory already exists or multiple features planned):
@@ -49,6 +51,7 @@ Pre-fill:
 - **Version**: from arguments
 - **Status**: `planned`
 - **Last Updated**: today's date (YYYY-MM-DD)
+- **Approval**: `draft`
 - **Feature name**: from arguments
 Leave all other sections as placeholders for the user to fill.
@@ -62,24 +65,30 @@ After creating the file, guide the user through filling it out:
 3. **Key Files** — Glob the codebase to identify existing files relevant to this feature
 4. **Schema / Data Model** — Reference file paths only, never inline schemas
 5. **API Endpoints** — Table format: Method | Path | Description
-6. **Requirements** — EARS format, numbered FR-1, FR-2, NFR-1, etc.
+6. **Requirements** — EARS format, numbered FR-1, FR-2, NFR-1, etc. Tag all requirements `[assumed]` at creation time — they become `[user-approved]` only after explicit user validation via `/spec-refine`.
 7. **Dependencies** — What this feature depends on
 8. **Out of Scope** — Explicit non-goals to prevent scope creep
+9. **Resolved Questions** — Leave empty at creation; populated by `/spec-refine`
 ### Step 5: Validate
 Before finishing:
-- [ ] File is ≤200 lines
+- [ ] If the file exceeds ~200 lines, consider splitting into sub-specs
 - [ ] No source code, SQL, or type definitions reproduced inline
-- [ ] Status is `planned`
+- [ ] Status is `planned` and Approval is `draft`
 - [ ] All required sections present (even if some are "N/A" or "TBD")
 - [ ] Acceptance criteria are testable
+- [ ] All requirements are tagged `[assumed]`
+After validation, inform the user: **"This spec MUST go through `/spec-refine` before implementation begins.** All requirements are marked `[assumed]` until explicitly validated."
+The `/spec-refine` skill walks through every `[assumed]` requirement with the user, validates tech decisions and scope boundaries, and upgrades approved items to `[user-approved]`. The spec's `**Approval:**` becomes `user-approved` only after all requirements pass review.
 ---
-## Hard Constraints
+## Sizing Guidelines
-- **≤200 lines per spec.** If a feature needs more, split into sub-specs with a parent `_overview.md` (≤50 lines) linking them.
+- **Aim for ~200 lines per spec.** If a feature needs more, consider splitting into sub-specs with a parent `_overview.md` linking them.
 - **Reference, don't reproduce.** Write `see src/engine/db/migrations/002.sql lines 48-70` — never paste the SQL.
 - **Independently loadable.** Each spec file must be useful without loading any other file.
 - **EARS format for requirements.** Use the `specification-writing` skill for templates and examples.

package/.devcontainer/plugins/devs-marketplace/plugins/code-directive/skills/spec-new/references/template.md CHANGED Viewed

@@ -12,6 +12,7 @@ Standard template for all feature specifications. Copy this structure when creat
 **Version:** v0.X.0
 **Status:** planned
 **Last Updated:** YYYY-MM-DD
+**Approval:** draft
 ## Intent
@@ -56,14 +57,14 @@ Standard template for all feature specifications. Copy this structure when creat
 ### Functional Requirements
-- FR-1: [EARS format requirement — see specification-writing skill for templates]
-- FR-2: When [event], the system shall [action].
-- FR-3: If [unwanted condition], then the system shall [action].
+- FR-1 [assumed]: [EARS format requirement — see specification-writing skill for templates]
+- FR-2 [assumed]: When [event], the system shall [action].
+- FR-3 [assumed]: If [unwanted condition], then the system shall [action].
 ### Non-Functional Requirements
-- NFR-1: The system shall respond to [endpoint] within [N]ms at the [percentile] percentile.
-- NFR-2: [Security, accessibility, scalability requirement]
+- NFR-1 [assumed]: The system shall respond to [endpoint] within [N]ms at the [percentile] percentile.
+- NFR-2 [assumed]: [Security, accessibility, scalability requirement]
 ## Dependencies
@@ -75,6 +76,10 @@ Standard template for all feature specifications. Copy this structure when creat
 - [Explicit non-goal 1 — prevents scope creep]
 - [Explicit non-goal 2]
+## Resolved Questions
+[Decisions explicitly approved by the user via `/spec-refine`. Each entry: decision topic, chosen option, date, brief rationale.]
 ## Implementation Notes
 [Post-implementation only. Leave empty in planned specs. After building, document what actually shipped vs. what was planned.]
@@ -108,3 +113,17 @@ Standard template for all feature specifications. Copy this structure when creat
 | `planned` | Spec written, implementation not started |
 | `partial` | Some acceptance criteria implemented, work ongoing |
 | `implemented` | All acceptance criteria met, as-built notes complete |
+## Approval Workflow
+| Tag | Meaning |
+|-----|---------|
+| `[assumed]` | Requirement was drafted by AI or inferred — treated as a hypothesis |
+| `[user-approved]` | Requirement was explicitly reviewed and approved by the user via `/spec-refine` |
+| Approval Status | Meaning |
+|-----------------|---------|
+| `draft` | Spec has unvalidated assumptions — NOT approved for implementation |
+| `user-approved` | All requirements are `[user-approved]` — ready for implementation |
+**Workflow:** `/spec-new` creates → `/spec-refine` validates → implementation begins → `/spec-update` closes the loop.