npm - @claudetools/tools - Versions diffs - 0.8.2 → 0.8.3 - Mend

@claudetools/tools 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/dist/evaluation/build-dataset.d.ts +1 -0
package/dist/evaluation/build-dataset.js +135 -0
package/dist/evaluation/threshold-eval.d.ts +63 -0
package/dist/evaluation/threshold-eval.js +250 -0
package/dist/handlers/codedna-handlers.d.ts +1 -1
package/dist/handlers/tool-handlers.js +44 -155
package/dist/helpers/compact-formatter.d.ts +51 -0
package/dist/helpers/compact-formatter.js +130 -0
package/dist/helpers/engagement-tracker.d.ts +10 -0
package/dist/helpers/engagement-tracker.js +61 -0
package/dist/helpers/session-validation.d.ts +76 -0
package/dist/helpers/session-validation.js +221 -0
package/dist/index.d.ts +1 -0
package/dist/index.js +1 -0
package/dist/resources.js +3 -0
package/dist/templates/claude-md.d.ts +1 -1
package/dist/templates/claude-md.js +23 -35
package/dist/templates/worker-prompt.js +35 -202
package/dist/tools.js +22 -20
package/package.json +4 -2

package/dist/templates/worker-prompt.js CHANGED Viewed

@@ -67,136 +67,42 @@ function buildIdentitySection(worker) {
 function buildBehavioralSection(taskId) {
     return `<!-- Layer 2: Behavioral Guidelines -->
 <behavioral_guidelines>
-  <core_behaviors>
-    <behavior id="codedna_first" priority="MANDATORY">
-      BEFORE writing code manually, check if CodeDNA can generate it:
-      DISCOVERY WORKFLOW:
-      1. Call codedna_list_generators() to see available generators
-      2. Check if task includes Entity DSL format
-         Example: "User(email:string:unique, password:string:hashed)"
-      3. Detect framework from project (package.json, existing code)
-      4. Match detected framework to generator capabilities
-      5. Call appropriate generator with detected settings
-      BENEFITS:
-      → Saves 95-99% tokens vs manual coding
-      → Generates production-ready code with validation, auth, tests
-      ONLY write code manually when:
-      - Logic is too complex for generation
-      - Modifying existing code (not creating new)
-      - Custom business rules that can't be templated
-    </behavior>
-    <behavior id="tool_first" priority="MANDATORY">
-      BEFORE writing code, use available tools:
-      1. codebase_map: START HERE - Get project overview
-         → Understand project structure and key entry points
-         → Identify frameworks and patterns in use
-         → Find relevant directories before diving in
-         → Use BEFORE Grep/Glob for unfamiliar code
-      2. memory_search: Check for existing patterns and decisions
-         → "How was authentication implemented?"
-         → "What patterns are used for X?"
-      3. codebase_find: Find specific symbols/files
-         → Search for existing code to extend/adapt
-      4. codebase_context: Understand file dependencies
-         → See what a file imports/exports
-      5. docs_get: Retrieve cached documentation
-         → Get up-to-date API references
-    </behavior>
-    <behavior id="minimal_changes" priority="IMPORTANT">
-      Make ONLY the changes required for this task.
-      NEVER:
-      - Refactor unrelated code
-      - Add features not requested
-      - Over-engineer solutions
-      - Add unnecessary abstractions
-    </behavior>
-  </core_behaviors>
+  <behavior id="codedna_first" priority="MANDATORY">
+    BEFORE writing code: codedna_list_generators() → check if task matches generator
+    Entity DSL: "User(email:string:unique, password:string:hashed)"
+    Only write manually for: complex logic, modifications, custom business rules
+  </behavior>
+  <behavior id="tool_first" priority="MANDATORY">
+    Tool precedence: codebase_map → memory_search → codebase_find → docs_get
+  </behavior>
+  <behavior id="minimal_changes" priority="IMPORTANT">
+    Make ONLY task-required changes. No refactoring, features, or abstractions.
+  </behavior>
 </behavioral_guidelines>`;
 }
 function buildStandardsSection() {
     return `<!-- Layer 3: Standards & Best Practices -->
 <standards>
   <code_quality>
-    - Write code that others can understand
-    - Prefer explicit over implicit
-    - Validate inputs at system boundaries
-    - Handle errors with clear messages
+    Australian English. Explicit over implicit. Validate boundaries. Clear errors.
   </code_quality>
-  <formatting>
-    - Use Australian English in comments and messages
-    - Follow existing code style in the project
-    - Include file paths with line numbers in references
-  </formatting>
   <memory_usage priority="CRITICAL">
-    When storing facts with memory_store_fact(), follow these rules:
-    ✅ STORE (learned contextual knowledge):
-    - Project discoveries: Anti-patterns found, bugs fixed, patterns discovered
-    - User preferences: Workflows, communication styles learned through interaction
-    - Architectural decisions WHY: Reasoning behind choices, rejected alternatives
-    - Solutions that worked: Specific implementations with context
-    ❌ DON'T STORE (generic documentation):
-    - Tool behavior: How the tool system works (already in API docs)
-    - Universal knowledge: Programming concepts, language features
-    - Temporary state: Current task status, session-specific data
-    Rule of thumb: "Would I need to rediscover this in 3 months?"
-    → YES = store it (learned pattern, specific to this context)
-    → NO = don't store it (universal knowledge, temporary state)
+    Store: project discoveries, user preferences, decision rationale, working solutions
+    Don't store: tool behavior, universal knowledge, temporary state
+    Rule: "Would I rediscover this in 3 months?" → YES = store
   </memory_usage>
   <documentation_files priority="MANDATORY">
-    NEVER create .md files in random locations. Follow these rules:
-    DIRECTORY STRUCTURE:
-    - docs/           → Project documentation, guides, specs
-    - docs/research/  → Research notes, analysis, investigations
-    - docs/decisions/ → Architecture Decision Records (ADRs)
-    - CHANGELOG.md    → Only in project root
-    - README.md       → Only in project root or package roots
-    NAMING CONVENTIONS:
-    - Use lowercase with hyphens: user-authentication-guide.md
-    - NEVER use spaces or underscores in filenames
-    - Include date prefix for time-sensitive docs: YYYY-MM-DD-topic.md
-      Example: 2025-12-05-api-migration-plan.md
-    - Research docs: YYYY-MM-DD-research-topic.md
-    - Decision records: NNNN-decision-title.md (e.g., 0001-use-jwt-auth.md)
-    ANTI-PATTERNS (NEVER DO):
-    - Creating PLAN.md, NOTES.md, TODO.md in project root
-    - Random capitalised filenames like IMPLEMENTATION_GUIDE.md
-    - Nested docs in src/ or lib/ directories
-    - Multiple README files outside package roots
-    - Temporary docs without dates (impossible to clean up later)
-    BEFORE CREATING ANY .md FILE:
-    1. Check if docs/ directory exists - create if needed
-    2. Determine correct subdirectory (research/, decisions/, etc.)
-    3. Use proper naming convention with date if temporal
-    4. Ask user if uncertain about placement
+    Structure: docs/research/, docs/decisions/, README.md/CHANGELOG.md in root only
+    Naming: lowercase-with-hyphens.md, YYYY-MM-DD-topic.md for temporal docs
+    Never: PLAN.md/NOTES.md in root, random caps, nested in src/, undated temp files
   </documentation_files>
   <completion_summary>
-    When calling task_complete, include:
-    - Implementation: What you built/changed
-    - Files: List of modified files with paths
-    - Decisions: Any architectural choices made
-    - Testing: How changes were verified
-    - Notes: Caveats, limitations, follow-up needed
+    Include: implementation, files modified, decisions, testing, notes
   </completion_summary>
 </standards>`;
 }
@@ -204,31 +110,10 @@ function buildDomainSection(worker) {
     return `<!-- Layer 4: Domain Knowledge -->
 <domain_knowledge>
   <codedna_capabilities>
-    CODEDNA DISCOVERY PATTERN:
-    1. Call codedna_list_generators() to see available generators
-    2. Each generator lists supported frameworks and options
-    3. Detect project framework from package.json/pyproject.toml
-    4. Match detected framework to generator capabilities
-    5. If no match, ASK the user which framework to use
-    ENTITY DSL FORMAT:
-    EntityName(field:type:constraint, field:type:constraint, ...)
-    TYPES:
-    - string, integer, decimal, boolean, datetime
-    - ref(EntityName) - foreign key reference
-    - enum(val1|val2|val3) - enumeration
-    CONSTRAINTS:
-    - unique, required, min(n), max(n), hashed, default(value)
-    - UI hints: textarea, switch, radio (for form rendering)
-    WORKFLOW:
-    1. codedna_list_generators() → discover capabilities
-    2. codedna_validate_spec(spec) → validate DSL syntax
-    3. codedna_generate_*(spec, framework, options) → generate code
-    DO NOT assume frameworks exist - always discover via codedna_list_generators
+    Discovery: codedna_list_generators() → detect framework → validate spec → generate
+    DSL: EntityName(field:type:constraint, ...)
+    Types: string, integer, decimal, boolean, datetime, ref(Entity), enum(a|b)
+    Constraints: unique, required, min(n), max(n), hashed, default(v), textarea, switch
   </codedna_capabilities>
   <worker_expertise>
@@ -239,26 +124,9 @@ function buildDomainSection(worker) {
 function buildCrossCuttingSection() {
     return `<!-- Layer 5: Cross-Cutting Concerns -->
 <cross_cutting_concerns>
-  <error_handling>
-    - Validate inputs at boundaries (API, CLI, file I/O)
-    - Fail fast with clear error messages
-    - Log errors with sufficient context for debugging
-    - Provide actionable guidance to users
-  </error_handling>
-  <security>
-    CRITICAL: Follow OWASP Top 10 guidelines
-    - Never expose sensitive data (API keys, passwords, tokens)
-    - Sanitize all user inputs
-    - Use parameterized queries (never string concat for SQL)
-    - Apply principle of least privilege
-  </security>
-  <performance>
-    - Don't optimise prematurely
-    - Consider token efficiency in prompts
-    - Use CodeDNA for boilerplate (saves 95%+ tokens)
-  </performance>
+  <error_handling>Validate boundaries. Fail fast. Clear messages. Actionable guidance.</error_handling>
+  <security>OWASP Top 10. No exposed secrets. Sanitize inputs. Parameterized queries. Least privilege.</security>
+  <performance>No premature optimization. Token efficiency. CodeDNA for boilerplate.</performance>
 </cross_cutting_concerns>`;
 }
 function buildTaskSection(task, epicContext) {
@@ -309,54 +177,19 @@ function buildSiblingSection(siblingTasks) {
 function buildProtocolSection(taskId) {
     return `<!-- Protocol -->
 <protocol>
-  <step number="1" action="START">
-    Call: task_start(task_id="${taskId}", agent_id="your-agent-id")
-    This claims the task and prevents conflicts.
-  </step>
-  <step number="2" action="CHECK_CODEDNA">
-    IF task involves creating entities/APIs:
-    → Look for Entity DSL in task description
-    → Call codedna_generate_api with the spec
-    → Review generated code, make adjustments if needed
-    IF task is modification/complex logic:
-    → Use memory_search and codebase_find first
-    → Write code manually only when necessary
-  </step>
-  <step number="3" action="IMPLEMENT">
-    Complete the requirements described in the task.
-    Make minimal changes. Don't over-engineer.
-  </step>
-  <step number="4" action="COMPLETE">
-    Call: task_complete(task_id="${taskId}", summary="detailed summary")
-    Include in summary:
-    - What you implemented
-    - Files created/modified
-    - Decisions made
-    - How you verified it works
-  </step>
+  <step number="1">task_start(task_id="${taskId}")</step>
+  <step number="2">Check CodeDNA for entities/APIs, else use tools (memory_search, codebase_find)</step>
+  <step number="3">Implement requirements. Minimal changes.</step>
+  <step number="4">task_complete(task_id="${taskId}", summary="impl, files, decisions, testing")</step>
   <error_handling>
-    IF you encounter blocking issues:
-    1. Log error:
-       task_add_context(task_id="${taskId}", context_type="work_log",
-         content="ERROR: description", added_by="your-agent-id")
-    2. Release task:
-       task_release(task_id="${taskId}", agent_id="your-agent-id",
-         new_status="blocked", work_log="summary of issue")
-    Do NOT mark incomplete work as complete.
+    Blocking issues: task_add_context + task_release(status="blocked")
+    Never mark incomplete work as complete.
   </error_handling>
 </protocol>
 ---
-**Begin work now. Remember to call task_start first!**`;
+**Begin work now. Call task_start first!**`;
 }
 /**
  * Build a minimal prompt for simple tasks

package/dist/tools.js CHANGED Viewed

@@ -7,7 +7,14 @@ export function registerToolDefinitions(server) {
         tools: [
             {
                 name: 'memory_search',
-                description: 'Search the memory system for relevant facts and entities based on a query. Use this to recall past conversations, facts, and relationships. NOTE: Context is now automatically injected - use this only for explicit searches.',
+                description: `⚠️ RARELY NEEDED - Context is AUTO-INJECTED via hooks on every message.
+Only use this tool when:
+- You need to search with DIFFERENT parameters than the user's query
+- You're debugging what's in memory
+- The user explicitly asks to search memory
+DO NOT use this tool to "recall context" - that happens automatically.`,
                 inputSchema: {
                     type: 'object',
                     properties: {
@@ -29,7 +36,7 @@ export function registerToolDefinitions(server) {
             },
             {
                 name: 'memory_explain',
-                description: 'Explain what memory context was automatically injected in the last response. Use this when you want to understand what memories are being used.',
+                description: 'Debug tool: Show what memory context was auto-injected. Only use if debugging memory injection.',
                 inputSchema: {
                     type: 'object',
                     properties: {},
@@ -37,7 +44,13 @@ export function registerToolDefinitions(server) {
             },
             {
                 name: 'memory_inject',
-                description: 'Manually trigger context injection for a specific query. Normally context is injected automatically, but use this for explicit control.',
+                description: `⚠️ RARELY NEEDED - Context is AUTO-INJECTED via user-prompt-submit hook.
+Only use this tool when:
+- You need to inject context for a DIFFERENT query than the user's message
+- The user explicitly asks to refresh context
+DO NOT call this routinely - it wastes context tokens.`,
                 inputSchema: {
                     type: 'object',
                     properties: {
@@ -139,7 +152,9 @@ EXAMPLES:
             },
             {
                 name: 'memory_get_context',
-                description: 'Get the current memory context, optionally filtered by a query. Returns recent facts and entities.',
+                description: `⚠️ RARELY NEEDED - Context is AUTO-INJECTED via hooks.
+Only use for debugging or when user explicitly asks "what context do you have?"`,
                 inputSchema: {
                     type: 'object',
                     properties: {
@@ -185,23 +200,10 @@ EXAMPLES:
             // =========================================================================
             {
                 name: 'memory_index',
-                description: `Get a lightweight index of available memories WITHOUT fetching full content. Use this FIRST to scan what context is available before deciding what to retrieve.
-Returns for each memory:
-- id: Unique identifier for fetching details
-- topic: Short topic/subject description
-- relevance: Score (0-1) indicating how relevant to query
-- token_cost: Estimated tokens if fetched
-- importance: "critical" | "high" | "normal" - Critical facts are auto-injected
-- category: "architecture" | "pattern" | "decision" | "preference" | "fact"
-- last_accessed: When this memory was last used
-WORKFLOW:
-1. Call memory_index with your query to see what's available
-2. Review the index - critical items are auto-injected
-3. Call memory_detail for specific IDs you want full content for
+                description: `⚠️ RARELY NEEDED - Critical facts are AUTO-INJECTED via hooks.
-This saves tokens by letting you selectively fetch only relevant memories.`,
+Only use when you need to browse ALL available memories or debug what's stored.
+For normal work, context is injected automatically - don't call this.`,
                 inputSchema: {
                     type: 'object',
                     properties: {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@claudetools/tools",
-  "version": "0.8.2",
+  "version": "0.8.3",
   "description": "Persistent AI memory, task management, and codebase intelligence for Claude Code",
   "type": "module",
   "main": "dist/index.js",
@@ -26,7 +26,9 @@
     "codedna:analytics": "tsx -e \"import { weeklyAnalyticsSummary } from './src/helpers/usage-analytics.js'; weeklyAnalyticsSummary()\"",
     "codedna:analytics:24h": "tsx -e \"import { getLast24HoursAnalytics, printAnalytics } from './src/helpers/usage-analytics.js'; const r = await getLast24HoursAnalytics(); printAnalytics(r, 'Last 24 Hours')\"",
     "codedna:analytics:30d": "tsx -e \"import { getLast30DaysAnalytics, printAnalytics } from './src/helpers/usage-analytics.js'; const r = await getLast30DaysAnalytics(); printAnalytics(r, 'Last 30 Days')\"",
-    "prompt:verify": "scripts/verify-prompt-compliance.sh"
+    "prompt:verify": "scripts/verify-prompt-compliance.sh",
+    "eval:build-dataset": "tsx src/evaluation/build-dataset.ts",
+    "eval:threshold": "tsx src/evaluation/threshold-eval.ts"
   },
   "repository": {
     "type": "git",