@kody-ade/engine 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +322 -0
  3. package/dist/agent-runner.d.ts +4 -0
  4. package/dist/agent-runner.js +122 -0
  5. package/dist/bin/cli.js +11276 -0
  6. package/dist/ci/parse-inputs.d.ts +6 -0
  7. package/dist/ci/parse-inputs.js +76 -0
  8. package/dist/ci/parse-safety.d.ts +6 -0
  9. package/dist/ci/parse-safety.js +22 -0
  10. package/dist/cli/args.d.ts +13 -0
  11. package/dist/cli/args.js +42 -0
  12. package/dist/cli/litellm.d.ts +2 -0
  13. package/dist/cli/litellm.js +85 -0
  14. package/dist/cli/task-resolution.d.ts +2 -0
  15. package/dist/cli/task-resolution.js +41 -0
  16. package/dist/config.d.ts +49 -0
  17. package/dist/config.js +72 -0
  18. package/dist/context.d.ts +4 -0
  19. package/dist/context.js +83 -0
  20. package/dist/definitions.d.ts +3 -0
  21. package/dist/definitions.js +59 -0
  22. package/dist/entry.d.ts +1 -0
  23. package/dist/entry.js +236 -0
  24. package/dist/git-utils.d.ts +13 -0
  25. package/dist/git-utils.js +174 -0
  26. package/dist/github-api.d.ts +14 -0
  27. package/dist/github-api.js +114 -0
  28. package/dist/kody-utils.d.ts +1 -0
  29. package/dist/kody-utils.js +9 -0
  30. package/dist/learning/auto-learn.d.ts +2 -0
  31. package/dist/learning/auto-learn.js +169 -0
  32. package/dist/logger.d.ts +14 -0
  33. package/dist/logger.js +51 -0
  34. package/dist/memory.d.ts +1 -0
  35. package/dist/memory.js +20 -0
  36. package/dist/observer.d.ts +9 -0
  37. package/dist/observer.js +80 -0
  38. package/dist/pipeline/complexity.d.ts +3 -0
  39. package/dist/pipeline/complexity.js +12 -0
  40. package/dist/pipeline/executor-registry.d.ts +3 -0
  41. package/dist/pipeline/executor-registry.js +20 -0
  42. package/dist/pipeline/hooks.d.ts +17 -0
  43. package/dist/pipeline/hooks.js +110 -0
  44. package/dist/pipeline/questions.d.ts +2 -0
  45. package/dist/pipeline/questions.js +44 -0
  46. package/dist/pipeline/runner-selection.d.ts +2 -0
  47. package/dist/pipeline/runner-selection.js +13 -0
  48. package/dist/pipeline/state.d.ts +4 -0
  49. package/dist/pipeline/state.js +37 -0
  50. package/dist/pipeline.d.ts +3 -0
  51. package/dist/pipeline.js +213 -0
  52. package/dist/preflight.d.ts +1 -0
  53. package/dist/preflight.js +69 -0
  54. package/dist/retrospective.d.ts +26 -0
  55. package/dist/retrospective.js +211 -0
  56. package/dist/stages/agent.d.ts +2 -0
  57. package/dist/stages/agent.js +94 -0
  58. package/dist/stages/gate.d.ts +2 -0
  59. package/dist/stages/gate.js +32 -0
  60. package/dist/stages/review.d.ts +2 -0
  61. package/dist/stages/review.js +32 -0
  62. package/dist/stages/ship.d.ts +3 -0
  63. package/dist/stages/ship.js +154 -0
  64. package/dist/stages/verify.d.ts +2 -0
  65. package/dist/stages/verify.js +94 -0
  66. package/dist/types.d.ts +61 -0
  67. package/dist/types.js +1 -0
  68. package/dist/validators.d.ts +8 -0
  69. package/dist/validators.js +42 -0
  70. package/dist/verify-runner.d.ts +11 -0
  71. package/dist/verify-runner.js +110 -0
  72. package/kody.config.schema.json +299 -0
  73. package/package.json +39 -0
  74. package/prompts/autofix.md +52 -0
  75. package/prompts/build.md +26 -0
  76. package/prompts/decompose.md +77 -0
  77. package/prompts/plan.md +65 -0
  78. package/prompts/review-fix.md +27 -0
  79. package/prompts/review.md +115 -0
  80. package/prompts/taskify-ticket.md +122 -0
  81. package/prompts/taskify.md +70 -0
  82. package/templates/kody-watch.yml +57 -0
  83. package/templates/kody.yml +450 -0
  84. package/templates/watch-agents/branch-cleanup/agent.json +7 -0
  85. package/templates/watch-agents/branch-cleanup/agent.md +13 -0
  86. package/templates/watch-agents/dependency-checker/agent.json +7 -0
  87. package/templates/watch-agents/dependency-checker/agent.md +14 -0
  88. package/templates/watch-agents/readme-health/agent.json +7 -0
  89. package/templates/watch-agents/readme-health/agent.md +17 -0
  90. package/templates/watch-agents/stale-pr-reviewer/agent.json +7 -0
  91. package/templates/watch-agents/stale-pr-reviewer/agent.md +13 -0
  92. package/templates/watch-agents/todo-scanner/agent.json +7 -0
  93. package/templates/watch-agents/todo-scanner/agent.md +10 -0
@@ -0,0 +1,115 @@
1
+ ---
2
+ name: review
3
+ description: Review code changes for correctness, security, and quality
4
+ mode: primary
5
+ tools: [read, glob, grep, bash]
6
+ ---
7
+
8
+ You are a code review agent following the Superpowers Structured Review methodology.
9
+
10
+ Use Bash to see what changed. For PR reviews, check the Task Context below for a `Diff Command` section with the correct `git diff origin/<base>...HEAD` command. If no diff command is provided, run `git diff HEAD~1`. Do NOT use bare `git diff` — it shows only uncommitted working tree changes, not the actual code changes. Use Read to examine modified files in full context.
11
+ When the diff introduces new enum values, status strings, or type constants — use Grep to trace ALL consumers outside the diff.
12
+
13
+ CRITICAL: You MUST output a structured review in the EXACT format below. Do NOT output conversational text, status updates, or summaries. Your entire output must be the structured review markdown.
14
+
15
+ Output markdown with this EXACT structure:
16
+
17
+ ## Verdict: PASS | FAIL
18
+
19
+ ## Summary
20
+ <1-2 sentence summary of what was changed and why>
21
+
22
+ ## Findings
23
+
24
+ ### Critical
25
+ <If none: "None.">
26
+
27
+ ### Major
28
+ <If none: "None.">
29
+
30
+ ### Minor
31
+ <If none: "None.">
32
+
33
+ For each finding use: `file:line` — problem description. Suggested fix.
34
+
35
+ ---
36
+
37
+ ## Two-Pass Review
38
+
39
+ **Pass 1 — CRITICAL (must fix before merge):**
40
+
41
+ ### SQL & Data Safety
42
+ - String interpolation in SQL — use parameterized queries even for `.to_i`/`.to_f` values
43
+ - TOCTOU races: check-then-set patterns that should be atomic `WHERE` + update
44
+ - Bypassing model validations via direct DB writes (e.g., `update_column`, raw queries)
45
+ - N+1 queries: missing eager loading for associations used in loops/views
46
+
47
+ ### Race Conditions & Concurrency
48
+ - Read-check-write without uniqueness constraint or duplicate key handling
49
+ - find-or-create without unique DB index — concurrent calls create duplicates
50
+ - Status transitions without atomic `WHERE old_status = ? UPDATE SET new_status`
51
+ - Unsafe HTML rendering (`dangerouslySetInnerHTML`, `v-html`, `.html_safe`) on user-controlled data (XSS)
52
+
53
+ ### LLM Output Trust Boundary
54
+ - LLM-generated values (emails, URLs, names) written to DB without format validation
55
+ - Structured tool output accepted without type/shape checks before DB writes
56
+ - LLM-generated URLs fetched without allowlist — SSRF risk
57
+ - LLM output stored in vector DBs without sanitization — stored prompt injection risk
58
+
59
+ ### Shell Injection
60
+ - `subprocess.run()` / `os.system()` with `shell=True` AND string interpolation — use argument arrays
61
+ - `eval()` / `exec()` on LLM-generated code without sandboxing
62
+
63
+ ### Enum & Value Completeness
64
+ When the diff introduces a new enum value, status string, tier name, or type constant:
65
+ - Trace it through every consumer (READ each file that switches/filters on that value)
66
+ - Check allowlists/filter arrays containing sibling values
67
+ - Check `case`/`if-elsif` chains — does the new value fall through to a wrong default?
68
+
69
+ **Pass 2 — INFORMATIONAL (should review, may auto-fix):**
70
+
71
+ ### Conditional Side Effects
72
+ - Code paths that branch but forget a side effect on one branch (e.g., promoted but URL only attached conditionally)
73
+ - Log messages claiming an action happened when it was conditionally skipped
74
+
75
+ ### Test Gaps
76
+ - Negative-path tests asserting type/status but not side effects
77
+ - Security enforcement features (blocking, rate limiting, auth) without integration tests
78
+ - Missing `.expects(:something).never` when a path should NOT call an external service
79
+
80
+ ### Dead Code & Consistency
81
+ - Variables assigned but never read
82
+ - Comments/docstrings describing old behavior after code changed
83
+ - Version mismatch between PR title and VERSION/CHANGELOG
84
+
85
+ ### Crypto & Entropy
86
+ - Truncation instead of hashing — less entropy, easier collisions
87
+ - `rand()` / `Math.random()` for security-sensitive values — use crypto-secure alternatives
88
+ - Non-constant-time comparisons (`==`) on secrets or tokens — timing attack risk
89
+
90
+ ### Performance & Bundle Impact
91
+ - Known-heavy dependencies added: moment.js (→ date-fns), full lodash (→ lodash-es), jquery
92
+ - Images without `loading="lazy"` or explicit dimensions (CLS)
93
+ - `useEffect` fetch waterfalls — combine or parallelize
94
+ - Synchronous `<script>` without async/defer
95
+
96
+ ### Type Coercion at Boundaries
97
+ - Values crossing language/serialization boundaries where type could change (numeric vs string)
98
+ - Hash/digest inputs without `.toString()` normalization before serialization
99
+
100
+ ---
101
+
102
+ ## Severity Definitions
103
+
104
+ - **Critical**: Security vulnerability, data loss, application crash, broken authentication, injection risk, race condition. MUST fix before merge.
105
+ - **Major**: Logic error, missing edge case, broken test, significant performance issue, missing input validation, enum completeness gap. SHOULD fix before merge.
106
+ - **Minor**: Style issue, naming improvement, readability, micro-optimization, stale comments. NICE to fix, not blocking.
107
+
108
+ ## Suppressions — do NOT flag these:
109
+ - Redundancy that aids readability
110
+ - "Add a comment explaining this threshold" — thresholds change, comments rot
111
+ - Consistency-only changes with no behavioral impact
112
+ - Issues already addressed in the diff you are reviewing — read the FULL diff first
113
+ - devDependencies additions (no production impact)
114
+
115
+ {{TASK_CONTEXT}}
@@ -0,0 +1,122 @@
1
+ You are a task decomposition agent. Your job is to break down a product spec into scoped, independently implementable tasks.
2
+
3
+ ## Input
4
+
5
+ {{#if TICKET_ID}}
6
+ **Mode: ticket**
7
+
8
+ Use the available MCP tools to fetch ticket **{{TICKET_ID}}**.
9
+ Read everything: title, description, acceptance criteria, sub-tasks, linked issues, attachments.
10
+ {{/if}}
11
+
12
+ {{#if FILE_CONTENT}}
13
+ **Mode: file**
14
+
15
+ The product spec is provided below:
16
+
17
+ ```
18
+ {{FILE_CONTENT}}
19
+ ```
20
+ {{/if}}
21
+
22
+ {{#if ISSUE_BODY}}
23
+ **Mode: issue**
24
+
25
+ The task description from the GitHub issue is provided below. Decompose it into scoped, independently implementable sub-tasks.
26
+
27
+ ```
28
+ {{ISSUE_BODY}}
29
+ ```
30
+ {{/if}}
31
+
32
+ {{#if PROJECT_CONTEXT}}
33
+ ## Existing codebase
34
+
35
+ Use this to avoid suggesting things that already exist and to follow established conventions.
36
+
37
+ {{PROJECT_CONTEXT}}
38
+ {{/if}}
39
+
40
+ ## Decomposition rules
41
+
42
+ Break the spec into implementation tasks where each task:
43
+ - Can be implemented and reviewed independently in a single PR
44
+ - Has clear, testable acceptance criteria
45
+ - Contains all the context a developer needs — no references back to the original ticket
46
+ - Is labeled appropriately (e.g. "frontend", "backend", "database", "infra")
47
+
48
+ Each task body must follow this structure:
49
+ ```
50
+ ## Context
51
+ Why this task exists and how it fits the bigger picture.
52
+ ## Acceptance Criteria
53
+ Bulleted list of what "done" looks like.
54
+ ## Test Strategy
55
+ What to test and how — unit tests, integration tests, manual verification steps.
56
+ ```
57
+
58
+ Sizing guide:
59
+ - A task touching 1–3 files with clear requirements = right size
60
+ - A task requiring design decisions or touching many subsystems = too large, split it
61
+ - A task that is just a config change or a one-liner = too small, merge with a related task
62
+
63
+ Priority guidance — assign `priority` to each task:
64
+ - `high` — blocks other tasks or delivers the ticket's core value
65
+ - `medium` — important but not blocking
66
+ - `low` — polish, edge cases, nice-to-have
67
+
68
+ Dependency guidance — use `dependsOn` to express ordering:
69
+ - If implementing task B requires task A's code to exist first, set `dependsOn: [indexOfA]` (0-based index into the tasks array).
70
+ - If a task has no dependencies, omit `dependsOn` or use `[]`.
71
+
72
+ {{#if FEEDBACK}}
73
+ ## Answers to previous questions
74
+
75
+ The product team has provided the following answers:
76
+
77
+ {{FEEDBACK}}
78
+
79
+ Use these answers to resolve any previous ambiguities. Do NOT ask questions again — proceed directly to task decomposition.
80
+ {{/if}}
81
+
82
+ ## Output
83
+
84
+ Write ONLY to: `{{TASK_DIR}}/taskify-result.json`
85
+
86
+ Do not write any other files. Do not print anything to stdout.
87
+
88
+ The file must be valid JSON matching exactly one of these two schemas:
89
+
90
+ **Schema A — tasks ready:**
91
+ ```json
92
+ {
93
+ "status": "ready",
94
+ "tasks": [
95
+ {
96
+ "title": "string (max 72 chars, actionable verb phrase e.g. 'Add OAuth login with Google')",
97
+ "body": "string (full markdown spec with required sections: ## Context, ## Acceptance Criteria, ## Test Strategy)",
98
+ "labels": ["optional", "array", "of", "label", "strings"],
99
+ "priority": "high | medium | low",
100
+ "dependsOn": [0, 2]
101
+ }
102
+ ]
103
+ }
104
+ ```
105
+
106
+ **Schema B — clarifications needed:**
107
+ ```json
108
+ {
109
+ "status": "questions",
110
+ "questions": ["string", "..."]
111
+ }
112
+ ```
113
+
114
+ Rules:
115
+ - Maximum 3 questions. Only ask what genuinely cannot be determined from the spec.
116
+ - Task titles must be actionable verb phrases ("Add X", "Fix Y", "Implement Z", "Migrate X to Y").
117
+ - Each task body must be self-contained and include ## Context, ## Acceptance Criteria, and ## Test Strategy sections.
118
+ - Labels are for categorization only — not implementation details.
119
+ - `priority` must be one of: `high`, `medium`, `low`.
120
+ - `dependsOn` uses 0-based indices into the tasks array. Omit or use `[]` if there are no dependencies.
121
+ - If the spec is already small enough for a single PR, output one task.
122
+ - Maximum 20 tasks. Consolidate related ones if needed.
@@ -0,0 +1,70 @@
1
+ ---
2
+ name: taskify
3
+ description: Classify and structure a task from free-text description
4
+ mode: primary
5
+ tools: [read, glob, grep]
6
+ ---
7
+
8
+ You are a task classification agent following the Superpowers Brainstorming methodology.
9
+
10
+ ## MANDATORY: Explore Before Classifying
11
+
12
+ Before classifying, you MUST explore the project context:
13
+ 1. **Examine the codebase** — Use Read, Glob, and Grep to understand project structure, existing patterns, and affected files.
14
+ 2. **Find existing solutions** — Search for how similar problems are already solved in this codebase. If a pattern exists, the task should reuse it.
15
+ 3. **Challenge assumptions** — Does the task description assume an approach? Are there simpler alternatives? Apply YAGNI ruthlessly.
16
+ 4. **Identify ambiguity** — Could the requirements be interpreted two ways? Are there missing edge case decisions?
17
+
18
+ ## Output
19
+
20
+ Output ONLY valid JSON. No markdown fences. No explanation. No extra text before or after the JSON.
21
+
22
+ Required JSON format:
23
+ {
24
+ "task_type": "feature | bugfix | refactor | docs | chore",
25
+ "title": "Brief title, max 72 characters",
26
+ "description": "Clear description of what the task requires",
27
+ "scope": ["list", "of", "exact/file/paths", "affected"],
28
+ "risk_level": "low | medium | high",
29
+ "existing_patterns": ["list of existing patterns found that the implementation should reuse"],
30
+ "questions": []
31
+ }
32
+
33
+ Risk level heuristics:
34
+ - low: single file change, no breaking changes, docs, config, isolated scripts, test additions, style changes
35
+ - medium: 2-3 files, possible side effects, API changes, new dependencies, refactoring existing logic, adding a new utility/middleware with tests
36
+ - high: 4+ files across multiple directories, core business logic, data migrations, security, authentication, payment processing, database schema changes, cross-cutting concerns, system redesigns
37
+
38
+ existing_patterns rules:
39
+ - List patterns found in the codebase that are relevant to this task
40
+ - Include the file path and a brief description of the pattern
41
+ - If no relevant patterns exist, use an empty array []
42
+ - These inform the planner — reuse existing solutions, don't invent new ones
43
+
44
+ Questions rules (Superpowers Brainstorming discipline):
45
+ - ONLY ask product/requirements questions — things you CANNOT determine by reading code
46
+ - Ask about: unclear scope, missing acceptance criteria, ambiguous user behavior, missing edge case decisions
47
+ - Challenge assumptions — if the task implies an approach, consider simpler alternatives
48
+ - Check for ambiguity — could requirements be interpreted two ways?
49
+ - Do NOT ask about technical implementation — that is the planner's job
50
+ - Do NOT ask about things you can find by reading the codebase (file structure, frameworks, patterns)
51
+ - If the task is clear and complete, leave questions as an empty array []
52
+ - Maximum 3 questions — only the most important ones
53
+
54
+ Good questions: "Should the search be case-sensitive?", "Which users should have access?", "Should this work offline?"
55
+ Bad questions: "What framework should I use?", "Where should I put the file?", "What's the project structure?"
56
+
57
+ If the task is already implemented (files exist, tests pass):
58
+ - Still output valid JSON — never output plain text
59
+ - Set task_type to "chore"
60
+ - Set risk_level to "low"
61
+ - Set title to "Verify existing implementation of <feature>"
62
+ - Set description to explain that the work already exists and what was verified
63
+ - Set scope to the existing file paths
64
+
65
+ Guidelines:
66
+ - scope must contain exact file paths (use Glob to discover them)
67
+ - title must be actionable ("Add X", "Fix Y", "Refactor Z")
68
+ - description should capture the intent, not just restate the title
69
+
70
+ {{TASK_CONTEXT}}
@@ -0,0 +1,57 @@
1
+ name: kody-watch
2
+
3
+ on:
4
+ schedule:
5
+ - cron: "*/30 * * * *"
6
+ workflow_dispatch:
7
+ inputs:
8
+ dry_run:
9
+ type: boolean
10
+ default: false
11
+ description: "Run without executing actions"
12
+
13
+ concurrency:
14
+ group: kody-watch
15
+ cancel-in-progress: false
16
+
17
+ jobs:
18
+ watch:
19
+ runs-on: ubuntu-latest
20
+ timeout-minutes: 15
21
+ permissions:
22
+ issues: write
23
+ contents: read
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+
27
+ - uses: actions/setup-node@v4
28
+ with:
29
+ node-version: 22
30
+
31
+ - name: Install Claude Code
32
+ if: hashFiles('.kody/watch/agents/*/agent.json') != ''
33
+ run: npm install -g @anthropic-ai/claude-code
34
+
35
+ - name: Install Kody Engine
36
+ run: npm install -g @kody-ade/kody-engine-lite
37
+
38
+ - name: Export project secrets
39
+ env:
40
+ ALL_SECRETS: ${{ toJSON(secrets) }}
41
+ run: |
42
+ echo "$ALL_SECRETS" | jq -r 'to_entries[] | select(.key | test("^(GITHUB_TOKEN)$") | not) | @json' | while IFS= read -r entry; do
43
+ KEY=$(echo "$entry" | jq -r '.key')
44
+ VALUE=$(echo "$entry" | jq -r '.value')
45
+ DELIM="KODY_EOF_${KEY}"
46
+ echo "${KEY}<<${DELIM}" >> $GITHUB_ENV
47
+ echo "${VALUE}" >> $GITHUB_ENV
48
+ echo "${DELIM}" >> $GITHUB_ENV
49
+ done
50
+
51
+ - name: Run Kody Watch
52
+ env:
53
+ GH_TOKEN: ${{ github.token }}
54
+ REPO: ${{ github.repository }}
55
+ WATCH_DIGEST_ISSUE: ${{ vars.WATCH_DIGEST_ISSUE }}
56
+ DRY_RUN: ${{ inputs.dry_run || 'false' }}
57
+ run: npx kody-engine-lite watch ${{ inputs.dry_run == 'true' && '--dry-run' || '' }}