npm - @compilr-dev/agents - Versions diffs - 0.3.0 → 0.3.2 - Mend

@compilr-dev/agents 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/agent.d.ts +82 -1
package/dist/agent.js +162 -23
package/dist/context/manager.d.ts +8 -0
package/dist/context/manager.js +25 -2
package/dist/errors.d.ts +20 -1
package/dist/errors.js +44 -2
package/dist/index.d.ts +6 -1
package/dist/index.js +7 -1
package/dist/messages/index.d.ts +12 -5
package/dist/messages/index.js +53 -15
package/dist/providers/gemini-native.d.ts +86 -0
package/dist/providers/gemini-native.js +339 -0
package/dist/providers/index.d.ts +7 -2
package/dist/providers/index.js +7 -2
package/dist/providers/ollama.d.ts +2 -2
package/dist/providers/ollama.js +3 -3
package/dist/providers/types.d.ts +11 -0
package/dist/skills/index.js +474 -56
package/dist/state/agent-state.d.ts +1 -0
package/dist/state/agent-state.js +2 -0
package/dist/state/serializer.js +20 -2
package/dist/state/types.d.ts +5 -0
package/dist/utils/index.d.ts +119 -4
package/dist/utils/index.js +164 -13
package/package.json +4 -1

package/dist/skills/index.js CHANGED Viewed

@@ -172,93 +172,439 @@ export const builtinSkills = [
     defineSkill({
         name: 'code-review',
         description: 'Perform a thorough code review',
-        prompt: `You are now in code review mode. When reviewing code:
+        prompt: `You are now in CODE REVIEW MODE.
-1. **Security**: Check for vulnerabilities (injection, XSS, auth issues)
-2. **Performance**: Identify bottlenecks and optimization opportunities
-3. **Maintainability**: Evaluate code clarity, naming, and structure
-4. **Best Practices**: Check for anti-patterns and code smells
-5. **Testing**: Assess test coverage and edge cases
+## Purpose
+Perform a thorough, actionable code review.
-Provide specific, actionable feedback with line references.
-Format: List issues by severity (Critical, High, Medium, Low).`,
+## When to Use
+- User asks to review code, PR, or changes
+- Before merging a feature branch
+- After implementing a feature
+## When NOT to Use
+- User just wants to understand code → use explain skill
+- Code is already in production and can't change
+- User wants security-specific review → use security-review skill
+## Workflow
+### Phase 1: Understand Context
+1. Read the files being reviewed
+2. Check git status for what changed
+3. Review the diff to see actual changes
+### Phase 2: Review by Severity
+**Critical** (must fix before merge):
+- Security vulnerabilities (injection, XSS, auth bypass)
+- Data loss risks
+- Breaking changes to public APIs
+**High** (should fix):
+- Logic errors / bugs
+- Missing error handling for likely cases
+- Performance issues (N+1 queries, unbounded loops)
+**Medium** (consider fixing):
+- Code clarity / naming issues
+- Missing tests for new code
+- Inconsistent patterns
+**Low** (optional):
+- Style preferences
+- Minor optimizations
+### Phase 3: Report
+Format findings with file:line references:
+\`\`\`
+## Code Review: [file/feature]
+### Critical Issues
+- **src/auth.ts:42**: SQL injection risk → use parameterized query
+### High Priority
+- **src/api.ts:15**: Missing null check → add guard
+### Suggestions
+- Consider extracting duplicate logic
+### What's Good
+- Clear naming, good test coverage
+\`\`\`
+## Rules
+- Read the code before reviewing - never review blind
+- Be specific with line references
+- Suggest fixes, not just problems
+- Distinguish blocking vs non-blocking issues
+- Don't nitpick style if code works
+## Completion Criteria
+✓ All changed files reviewed
+✓ Issues categorized by severity
+✓ Actionable suggestions provided`,
         tags: ['development', 'review'],
     }),
     defineSkill({
         name: 'debug',
         description: 'Systematic debugging approach',
-        prompt: `You are now in debugging mode. Follow this systematic approach:
-1. **Reproduce**: Ensure the issue can be consistently reproduced
-2. **Isolate**: Narrow down the scope to specific components
-3. **Analyze**: Examine logs, stack traces, and state
-4. **Hypothesize**: Form theories about root cause
-5. **Test**: Verify hypotheses with targeted tests
-6. **Fix**: Implement and validate the fix
-7. **Document**: Note the issue and solution for future reference
-Be methodical and avoid jumping to conclusions.`,
+        prompt: `You are now in DEBUG MODE.
+## Purpose
+Systematically investigate and fix issues.
+## When to Use
+- User reports a bug or error
+- Tests are failing
+- Unexpected behavior occurs
+## When NOT to Use
+- User wants to add a feature → use build skill
+- User wants to understand code → use explain skill
+- User wants code improvements → use refactor skill
+## Workflow
+### Phase 1: Reproduce
+1. Understand expected vs actual behavior
+2. Get error messages/stack traces if available
+3. Identify steps to reproduce
+### Phase 2: Isolate
+1. Find relevant code using grep or glob
+2. Read the suspected files
+3. Narrow down to specific function/line
+### Phase 3: Hypothesize
+Form theories about root cause:
+- Input validation issue?
+- State management bug?
+- Race condition?
+- Missing error handling?
+- Wrong assumption about data?
+### Phase 4: Verify
+1. Add logging/checks to confirm hypothesis
+2. Run tests to validate
+3. If hypothesis wrong, return to Phase 2
+### Phase 5: Fix
+1. Make minimal change to fix the issue
+2. Run tests to confirm fix works
+3. Check for regressions
+### Phase 6: Document
+- Add test case for the bug if missing
+- Add comment if fix is non-obvious
+## Common Patterns
+| Symptom | Likely Cause |
+|---------|--------------|
+| "undefined is not a function" | Missing import, typo |
+| "Cannot read property of null" | Unhandled null case |
+| "Network error" | API/CORS issue |
+| Works locally, fails in CI | Environment difference |
+## Rules
+- Read the code before suggesting fixes
+- Don't guess - investigate systematically
+- Make minimal changes - fix the bug, don't refactor
+- Verify the fix actually works
+## Completion Criteria
+✓ Bug is reproduced and understood
+✓ Root cause identified
+✓ Fix implemented and tested
+✓ No regressions introduced`,
         tags: ['development', 'troubleshooting'],
     }),
     defineSkill({
         name: 'explain',
         description: 'Explain code or concepts clearly',
-        prompt: `You are now in explanation mode. When explaining:
-1. Start with a high-level overview
-2. Break down complex concepts into smaller parts
-3. Use analogies and examples where helpful
-4. Highlight key points and takeaways
-5. Provide context for why things work the way they do
-6. Tailor complexity to the audience level
-Be clear, concise, and avoid unnecessary jargon.`,
+        prompt: `You are now in EXPLAIN MODE.
+## Purpose
+Clearly explain code, concepts, or architecture.
+## When to Use
+- User asks "what does X do?"
+- User asks "how does X work?"
+- User is learning the codebase
+## When NOT to Use
+- User wants to change the code → use appropriate skill (refactor, debug, build)
+- User wants a code review → use code-review skill
+- User wants security analysis → use security-review skill
+## Approach
+### For Code Explanation
+1. Read the actual code first - never explain from memory
+2. Start with high-level purpose (1-2 sentences)
+3. Walk through key parts with line references (file.ts:42)
+4. Explain non-obvious logic
+5. Note dependencies and side effects
+### For Concept Explanation
+1. Start with simple definition
+2. Use analogies if helpful
+3. Show concrete example
+4. Mention common pitfalls
+### For Architecture Explanation
+1. Start with component overview
+2. Explain data flow
+3. Note key decisions and trade-offs
+4. Reference relevant files
+## Rules
+- Read the actual code - don't explain from memory
+- Use line references: src/auth.ts:42
+- Match complexity to user's apparent level
+- Avoid jargon unless necessary
+- If you don't know, say so
+## Completion Criteria
+✓ User's question is answered
+✓ Explanation is grounded in actual code
+✓ Complex parts are broken down clearly`,
         tags: ['education', 'documentation'],
     }),
     defineSkill({
         name: 'refactor',
         description: 'Guide code refactoring',
-        prompt: `You are now in refactoring mode. When refactoring:
-1. **Understand**: Fully understand existing behavior before changing
-2. **Test First**: Ensure tests exist to catch regressions
-3. **Small Steps**: Make incremental, reversible changes
-4. **Single Purpose**: Each refactor should have one clear goal
-5. **Preserve Behavior**: No functional changes during refactoring
-6. **Clean Up**: Remove dead code, simplify complexity
-Follow established patterns and maintain consistency with the codebase.`,
+        prompt: `You are now in REFACTOR MODE.
+## Purpose
+Safely restructure code without changing behavior.
+## When to Use
+- Code is hard to understand/maintain
+- Preparing code for a new feature
+- Reducing duplication
+- Improving performance
+## When NOT to Use
+- Adding new features → use build skill
+- Fixing bugs → use debug skill (behavior change)
+- Just cosmetic changes → not worth the risk
+- No tests exist → add tests first
+## Workflow
+### Phase 1: Understand
+1. Read the code to refactor
+2. Identify what calls this code (find references)
+3. Check for existing tests
+### Phase 2: Ensure Safety
+1. Run existing tests first
+2. If no tests, consider adding them first
+3. Note the current behavior precisely
+### Phase 3: Plan
+Identify the specific refactoring:
+- Extract function/method
+- Rename for clarity
+- Reduce duplication
+- Simplify conditionals
+- Split large file
+### Phase 4: Execute
+1. Make ONE refactoring at a time
+2. Run tests after each change
+3. If tests fail, revert and try smaller step
+### Phase 5: Verify
+1. Run full test suite
+2. Run linter
+3. Review the diff
+## Red Flags - Stop and Reconsider
+- No tests exist for this code
+- You're tempted to add features
+- The change touches many files
+- You don't fully understand the code
+## Rules
+- NEVER change behavior during refactoring
+- Make small, reversible changes
+- Test after every change
+- Read the code before changing it
+- If also fixing bugs, do that separately
+## Completion Criteria
+✓ Code is improved (clearer, simpler, less duplication)
+✓ Behavior is unchanged (tests pass)
+✓ No new warnings/errors introduced`,
         tags: ['development', 'improvement'],
     }),
     defineSkill({
         name: 'planning',
         description: 'Help plan and structure work',
-        prompt: `You are now in planning mode. When planning:
+        prompt: `You are now in PLANNING MODE.
+## Purpose
+Break down work into clear, actionable steps.
+## When to Use
+- Complex feature with multiple parts
+- User asks "how should I approach X?"
+- Before starting significant work
+## When NOT to Use
+- Simple, obvious tasks → just do them
+- User already has a clear plan
+- Pure research questions → use explain skill
+## Workflow
+### Phase 1: Clarify Goals
+1. What is the desired outcome?
+2. What are the constraints?
+3. What's out of scope?
+If unclear, ask the user to clarify.
+### Phase 2: Assess Current State
+1. Read relevant code
+2. Check project structure
+3. Identify what exists vs needs to be built
+### Phase 3: Break Down
+Create actionable items:
+- Each item completable in one session
+- Items have clear completion criteria
+- Order by dependencies (what blocks what?)
+### Phase 4: Identify Risks
+- What could go wrong?
+- What unknowns need investigation?
+- What dependencies might cause issues?
+### Phase 5: Present Plan
+Format as:
+\`\`\`
+## Plan: [Feature Name]
+### Goal
+[1-2 sentences]
+### Steps
+1. [ ] [Task] - [brief description]
+2. [ ] [Task] - [brief description]
+...
-1. **Clarify Goals**: Understand the desired outcome
-2. **Break Down**: Divide into manageable tasks
-3. **Dependencies**: Identify task dependencies and order
-4. **Risks**: Anticipate potential blockers
-5. **Milestones**: Define checkpoints to measure progress
-6. **Resources**: Identify what's needed
+### Risks
+- [Risk]: [mitigation]
+### Open Questions
+- [Question needing user input]
+\`\`\`
-Provide actionable plans with clear next steps.`,
+## Rules
+- Don't plan trivial tasks - just do them
+- Be specific - "implement auth" is too vague
+- Consider dependencies between steps
+- Include verification steps (tests, manual check)
+- Don't give time estimates
+## Completion Criteria
+✓ Goal is clear
+✓ Steps are actionable and ordered
+✓ Risks are identified
+✓ User agrees with the approach`,
         tags: ['project-management', 'organization'],
     }),
     defineSkill({
         name: 'security-review',
         description: 'Focus on security aspects',
-        prompt: `You are now in security review mode. Focus on:
+        prompt: `You are now in SECURITY REVIEW MODE.
+## Purpose
+Identify security vulnerabilities in code.
+## When to Use
+- Before deploying to production
+- Reviewing authentication/authorization code
+- Handling user input or sensitive data
+- Third-party integration code
+## When NOT to Use
+- General code quality issues → use code-review skill
+- Performance issues → use code-optimization skill
+- Just want to understand code → use explain skill
+## Workflow
+### Phase 1: Identify Attack Surface
+1. Find entry points: APIs, forms, file uploads
+2. Find data flows: user input → storage → output
+3. Find sensitive operations: auth, payments, admin
+### Phase 2: Check OWASP Top 10
+**1. Injection (SQL, Command, XSS)**
+- Is user input sanitized before queries?
+- Are parameterized queries used?
+- Is output encoded/escaped?
+**2. Broken Authentication**
+- Are passwords hashed properly (bcrypt, argon2)?
+- Are sessions managed securely?
+- Is MFA available for sensitive ops?
+**3. Sensitive Data Exposure**
+- Is data encrypted in transit (HTTPS)?
+- Are secrets in environment variables (not code)?
+- Is sensitive data logged?
+**4. Broken Access Control**
+- Are authorization checks on every endpoint?
+- Can users access other users' data?
+- Are admin functions protected?
+**5. Security Misconfiguration**
+- Are default credentials changed?
+- Are error messages generic (no stack traces)?
+- Are unnecessary features disabled?
+**6. Using Vulnerable Components**
+- Run findVulnerabilities() or npm audit
+- Check for outdated dependencies
+### Phase 3: Report Findings
+Format as:
+\`\`\`
+## Security Review: [component]
-1. **Authentication**: Verify proper auth mechanisms
-2. **Authorization**: Check access control at every level
-3. **Input Validation**: Look for injection vulnerabilities
-4. **Data Protection**: Ensure sensitive data is protected
-5. **Dependencies**: Check for known vulnerabilities
-6. **Configuration**: Review security settings
-7. **Logging**: Verify security events are logged
+### Critical (immediate action)
+- **file:line**: [vulnerability]
+  Risk: [impact]
+  Fix: [solution]
-Apply OWASP guidelines and security best practices.`,
+### High (fix before deploy)
+...
+### Recommendations
+- [proactive improvements]
+\`\`\`
+## Rules
+- Be specific about the vulnerability
+- Explain the real-world impact
+- Provide actionable fix suggestions
+- Don't cry wolf - prioritize real risks
+- Check for existing security controls first
+## Completion Criteria
+✓ Attack surface identified
+✓ OWASP categories checked
+✓ Findings prioritized by severity
+✓ Fix suggestions provided`,
         tags: ['security', 'review'],
     }),
     defineSkill({
@@ -266,6 +612,16 @@ Apply OWASP guidelines and security best practices.`,
         description: 'Guide user through project design and requirements gathering',
         prompt: `You are in DESIGN MODE. Your goal is to gather enough information to populate the project backlog with 5-15 actionable items.
+## When to Use
+- Starting a new project from scratch
+- User has an idea but no clear requirements
+- Need to create initial backlog
+## When NOT to Use
+- Project already has requirements → use refine skill
+- Quick project outline needed → use sketch skill
+- Just need to update PRD → use prd skill
 ## PHASES
 ### Phase 1: Vision (3-5 questions)
@@ -333,6 +689,17 @@ When you have enough information:
         description: 'Iteratively refine and expand project requirements',
         prompt: `You are in REFINE MODE. Your goal is to deepen and expand existing requirements based on user feedback.
+## When to Use
+- Backlog items are too vague
+- Need to add acceptance criteria
+- Reprioritizing existing items
+- Adding missing items to backlog
+## When NOT to Use
+- No backlog exists yet → use design skill
+- Quick outline needed → use sketch skill
+- Refining single item → use refine-item skill
 ## STARTUP
 1. Use todo_write to create a task list:
@@ -400,6 +767,16 @@ For complex features:
         description: 'Quick project outline with simple questions',
         prompt: `You are in SKETCH MODE. Ask 6 quick questions, then create backlog items.
+## When to Use
+- Quick project kickoff
+- User wants fast outline without deep dive
+- Initial brainstorming session
+## When NOT to Use
+- Detailed requirements needed → use design skill
+- Project already has backlog → use refine skill
+- Single item focus → use refine-item skill
 STEPS:
 1. Use todo_write with 6 tasks (one per question)
 2. Ask each question using ask_user_simple tool
@@ -429,6 +806,16 @@ RULES:
         description: 'Focused refinement of a specific backlog item',
         prompt: `You are in FOCUSED REFINE MODE. Your goal is to refine a specific backlog item.
+## When to Use
+- User wants to work on a specific backlog item
+- Item needs more detail or breakdown
+- Changing priority of specific item
+## When NOT to Use
+- Multiple items need refinement → use refine skill
+- No items exist yet → use design or sketch skill
+- Ready to implement → use build skill
 ## STARTUP
 1. Use backlog_read with id parameter to get the specific item
@@ -487,6 +874,17 @@ Based on user's choice:
         description: 'Create architecture documentation (ADRs, diagrams, data models, API designs)',
         prompt: `You are in ARCHITECTURE MODE. Your goal is to create architecture documentation.
+## When to Use
+- Creating ADRs (Architecture Decision Records)
+- Documenting system diagrams
+- Designing data models or API specs
+- Recording technical decisions
+## When NOT to Use
+- Need product requirements → use design or prd skill
+- Need to implement features → use build skill
+- Just want to understand existing code → use explain skill
 ## DOCUMENT TYPE: {{doc_type}}
 {{#if custom_topic}}Custom Topic: {{custom_topic}}{{/if}}
@@ -621,6 +1019,16 @@ Generate appropriate documentation.
         description: 'Amend or enhance the Product Requirements Document',
         prompt: `You are in PRD MODE. Your goal is to update or enhance the existing Product Requirements Document.
+## When to Use
+- Updating existing PRD sections
+- Adding new requirements to PRD
+- Refining vision or scope
+## When NOT to Use
+- No PRD exists yet → use design skill first
+- Need architecture docs → use architecture skill
+- Need session summary → use session-notes skill
 ## STARTUP
 1. Use todo_write to track your progress:
@@ -693,6 +1101,16 @@ Based on section selected:
         description: 'Create structured session notes capturing work done and decisions made',
         prompt: `You are in SESSION NOTES MODE. Your goal is to create a structured summary of the current session.
+## When to Use
+- End of a work session
+- User asks to document what was done
+- Need to capture decisions for future reference
+## When NOT to Use
+- Need product requirements → use prd skill
+- Need architecture docs → use architecture skill
+- In the middle of active work
 ## STARTUP
 1. Use todo_write to track your progress:

package/dist/state/agent-state.d.ts CHANGED Viewed

@@ -24,6 +24,7 @@ export declare function createAgentState(options: {
     messages: Message[];
     todos: TodoItem[];
     currentIteration: number;
+    turnCount: number;
     totalTokensUsed: number;
     createdAt?: string;
 }): AgentState;

package/dist/state/agent-state.js CHANGED Viewed

@@ -22,6 +22,7 @@ export function createEmptyState(sessionId, systemPrompt) {
         systemPrompt,
         todos: [],
         currentIteration: 0,
+        turnCount: 0,
         totalTokensUsed: 0,
         createdAt: now,
         updatedAt: now,
@@ -40,6 +41,7 @@ export function createAgentState(options) {
         model: options.model,
         todos: serializeTodos(options.todos),
         currentIteration: options.currentIteration,
+        turnCount: options.turnCount,
         totalTokensUsed: options.totalTokensUsed,
         createdAt: options.createdAt || now,
         updatedAt: now,

package/dist/state/serializer.js CHANGED Viewed

@@ -34,7 +34,11 @@ export class JsonSerializer {
             throw StateError.deserialization('Invalid JSON format', error instanceof Error ? error : undefined);
         }
         this.validateParsed(parsed);
-        return parsed;
+        // Ensure turnCount has a default value for backward compatibility
+        // with saved states from before turnCount was added (pre-v2 states)
+        const partialState = parsed;
+        const turnCount = partialState.turnCount ?? partialState.messages.filter((m) => m.role === 'user').length;
+        return { ...partialState, turnCount };
     }
     /**
      * Validate state before serialization (public interface method).
@@ -77,6 +81,11 @@ export class JsonSerializer {
         if (typeof state.version !== 'number') {
             throw StateError.invalidState('version must be a number');
         }
+        // turnCount is optional for backward compatibility
+        // (old saved states may not have it)
+        if (state.turnCount !== undefined && typeof state.turnCount !== 'number') {
+            throw StateError.invalidState('turnCount must be a number if present');
+        }
         // Version check - safe to cast since we validated it's a number
         const version = state.version;
         if (version > CURRENT_STATE_VERSION) {
@@ -119,7 +128,11 @@ export class CompactJsonSerializer {
             throw StateError.deserialization('Invalid JSON format', error instanceof Error ? error : undefined);
         }
         this.validateParsed(parsed);
-        return parsed;
+        // Ensure turnCount has a default value for backward compatibility
+        // with saved states from before turnCount was added (pre-v2 states)
+        const partialState = parsed;
+        const turnCount = partialState.turnCount ?? partialState.messages.filter((m) => m.role === 'user').length;
+        return { ...partialState, turnCount };
     }
     /**
      * Validate state before serialization (public interface method).
@@ -159,6 +172,11 @@ export class CompactJsonSerializer {
         if (typeof state.version !== 'number') {
             throw StateError.invalidState('version must be a number');
         }
+        // turnCount is optional for backward compatibility
+        // (old saved states may not have it)
+        if (state.turnCount !== undefined && typeof state.turnCount !== 'number') {
+            throw StateError.invalidState('turnCount must be a number if present');
+        }
         // Version check - safe to cast since we validated it's a number
         const version = state.version;
         if (version > CURRENT_STATE_VERSION) {