universal-agent-protocol 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +462 -0
- package/dist/analyzers/index.d.ts +3 -0
- package/dist/analyzers/index.d.ts.map +1 -0
- package/dist/analyzers/index.js +656 -0
- package/dist/analyzers/index.js.map +1 -0
- package/dist/bin/cli.d.ts +3 -0
- package/dist/bin/cli.d.ts.map +1 -0
- package/dist/bin/cli.js +506 -0
- package/dist/bin/cli.js.map +1 -0
- package/dist/bin/tool-calls.d.ts +3 -0
- package/dist/bin/tool-calls.d.ts.map +1 -0
- package/dist/bin/tool-calls.js +4 -0
- package/dist/bin/tool-calls.js.map +1 -0
- package/dist/cli/agent.d.ts +20 -0
- package/dist/cli/agent.d.ts.map +1 -0
- package/dist/cli/agent.js +434 -0
- package/dist/cli/agent.js.map +1 -0
- package/dist/cli/analyze.d.ts +7 -0
- package/dist/cli/analyze.d.ts.map +1 -0
- package/dist/cli/analyze.js +103 -0
- package/dist/cli/analyze.js.map +1 -0
- package/dist/cli/coord.d.ts +7 -0
- package/dist/cli/coord.d.ts.map +1 -0
- package/dist/cli/coord.js +138 -0
- package/dist/cli/coord.js.map +1 -0
- package/dist/cli/dashboard.d.ts +8 -0
- package/dist/cli/dashboard.d.ts.map +1 -0
- package/dist/cli/dashboard.js +704 -0
- package/dist/cli/dashboard.js.map +1 -0
- package/dist/cli/deploy.d.ts +19 -0
- package/dist/cli/deploy.d.ts.map +1 -0
- package/dist/cli/deploy.js +267 -0
- package/dist/cli/deploy.js.map +1 -0
- package/dist/cli/droids.d.ts +9 -0
- package/dist/cli/droids.d.ts.map +1 -0
- package/dist/cli/droids.js +227 -0
- package/dist/cli/droids.js.map +1 -0
- package/dist/cli/generate.d.ts +17 -0
- package/dist/cli/generate.d.ts.map +1 -0
- package/dist/cli/generate.js +432 -0
- package/dist/cli/generate.js.map +1 -0
- package/dist/cli/hooks.d.ts +9 -0
- package/dist/cli/hooks.d.ts.map +1 -0
- package/dist/cli/hooks.js +374 -0
- package/dist/cli/hooks.js.map +1 -0
- package/dist/cli/init.d.ts +11 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +316 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli/mcp-router.d.ts +16 -0
- package/dist/cli/mcp-router.d.ts.map +1 -0
- package/dist/cli/mcp-router.js +143 -0
- package/dist/cli/mcp-router.js.map +1 -0
- package/dist/cli/memory.d.ts +24 -0
- package/dist/cli/memory.d.ts.map +1 -0
- package/dist/cli/memory.js +877 -0
- package/dist/cli/memory.js.map +1 -0
- package/dist/cli/model.d.ts +15 -0
- package/dist/cli/model.d.ts.map +1 -0
- package/dist/cli/model.js +270 -0
- package/dist/cli/model.js.map +1 -0
- package/dist/cli/patterns.d.ts +26 -0
- package/dist/cli/patterns.d.ts.map +1 -0
- package/dist/cli/patterns.js +587 -0
- package/dist/cli/patterns.js.map +1 -0
- package/dist/cli/setup-mcp-router.d.ts +8 -0
- package/dist/cli/setup-mcp-router.d.ts.map +1 -0
- package/dist/cli/setup-mcp-router.js +163 -0
- package/dist/cli/setup-mcp-router.js.map +1 -0
- package/dist/cli/setup.d.ts +13 -0
- package/dist/cli/setup.d.ts.map +1 -0
- package/dist/cli/setup.js +146 -0
- package/dist/cli/setup.js.map +1 -0
- package/dist/cli/sync.d.ts +7 -0
- package/dist/cli/sync.d.ts.map +1 -0
- package/dist/cli/sync.js +26 -0
- package/dist/cli/sync.js.map +1 -0
- package/dist/cli/task.d.ts +33 -0
- package/dist/cli/task.d.ts.map +1 -0
- package/dist/cli/task.js +616 -0
- package/dist/cli/task.js.map +1 -0
- package/dist/cli/tool-calls.d.ts +8 -0
- package/dist/cli/tool-calls.d.ts.map +1 -0
- package/dist/cli/tool-calls.js +239 -0
- package/dist/cli/tool-calls.js.map +1 -0
- package/dist/cli/update.d.ts +10 -0
- package/dist/cli/update.d.ts.map +1 -0
- package/dist/cli/update.js +300 -0
- package/dist/cli/update.js.map +1 -0
- package/dist/cli/visualize.d.ts +77 -0
- package/dist/cli/visualize.d.ts.map +1 -0
- package/dist/cli/visualize.js +287 -0
- package/dist/cli/visualize.js.map +1 -0
- package/dist/cli/worktree.d.ts +9 -0
- package/dist/cli/worktree.d.ts.map +1 -0
- package/dist/cli/worktree.js +175 -0
- package/dist/cli/worktree.js.map +1 -0
- package/dist/coordination/capability-router.d.ts +79 -0
- package/dist/coordination/capability-router.d.ts.map +1 -0
- package/dist/coordination/capability-router.js +324 -0
- package/dist/coordination/capability-router.js.map +1 -0
- package/dist/coordination/database.d.ts +13 -0
- package/dist/coordination/database.d.ts.map +1 -0
- package/dist/coordination/database.js +131 -0
- package/dist/coordination/database.js.map +1 -0
- package/dist/coordination/deploy-batcher.d.ts +101 -0
- package/dist/coordination/deploy-batcher.d.ts.map +1 -0
- package/dist/coordination/deploy-batcher.js +565 -0
- package/dist/coordination/deploy-batcher.js.map +1 -0
- package/dist/coordination/index.d.ts +5 -0
- package/dist/coordination/index.d.ts.map +1 -0
- package/dist/coordination/index.js +5 -0
- package/dist/coordination/index.js.map +1 -0
- package/dist/coordination/service.d.ts +81 -0
- package/dist/coordination/service.d.ts.map +1 -0
- package/dist/coordination/service.js +603 -0
- package/dist/coordination/service.js.map +1 -0
- package/dist/generators/claude-md.d.ts +3 -0
- package/dist/generators/claude-md.d.ts.map +1 -0
- package/dist/generators/claude-md.js +977 -0
- package/dist/generators/claude-md.js.map +1 -0
- package/dist/generators/template-loader.d.ts +105 -0
- package/dist/generators/template-loader.d.ts.map +1 -0
- package/dist/generators/template-loader.js +291 -0
- package/dist/generators/template-loader.js.map +1 -0
- package/dist/index.d.ts +47 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +59 -0
- package/dist/index.js.map +1 -0
- package/dist/mcp-router/config/parser.d.ts +9 -0
- package/dist/mcp-router/config/parser.d.ts.map +1 -0
- package/dist/mcp-router/config/parser.js +174 -0
- package/dist/mcp-router/config/parser.js.map +1 -0
- package/dist/mcp-router/executor/client.d.ts +31 -0
- package/dist/mcp-router/executor/client.d.ts.map +1 -0
- package/dist/mcp-router/executor/client.js +187 -0
- package/dist/mcp-router/executor/client.js.map +1 -0
- package/dist/mcp-router/index.d.ts +22 -0
- package/dist/mcp-router/index.d.ts.map +1 -0
- package/dist/mcp-router/index.js +18 -0
- package/dist/mcp-router/index.js.map +1 -0
- package/dist/mcp-router/output-compressor.d.ts +26 -0
- package/dist/mcp-router/output-compressor.d.ts.map +1 -0
- package/dist/mcp-router/output-compressor.js +236 -0
- package/dist/mcp-router/output-compressor.js.map +1 -0
- package/dist/mcp-router/search/fuzzy.d.ts +26 -0
- package/dist/mcp-router/search/fuzzy.d.ts.map +1 -0
- package/dist/mcp-router/search/fuzzy.js +94 -0
- package/dist/mcp-router/search/fuzzy.js.map +1 -0
- package/dist/mcp-router/server.d.ts +50 -0
- package/dist/mcp-router/server.d.ts.map +1 -0
- package/dist/mcp-router/server.js +229 -0
- package/dist/mcp-router/server.js.map +1 -0
- package/dist/mcp-router/session-stats.d.ts +37 -0
- package/dist/mcp-router/session-stats.d.ts.map +1 -0
- package/dist/mcp-router/session-stats.js +56 -0
- package/dist/mcp-router/session-stats.js.map +1 -0
- package/dist/mcp-router/tools/discover.d.ts +37 -0
- package/dist/mcp-router/tools/discover.d.ts.map +1 -0
- package/dist/mcp-router/tools/discover.js +65 -0
- package/dist/mcp-router/tools/discover.js.map +1 -0
- package/dist/mcp-router/tools/execute.d.ts +43 -0
- package/dist/mcp-router/tools/execute.d.ts.map +1 -0
- package/dist/mcp-router/tools/execute.js +103 -0
- package/dist/mcp-router/tools/execute.js.map +1 -0
- package/dist/mcp-router/types.d.ts +62 -0
- package/dist/mcp-router/types.d.ts.map +1 -0
- package/dist/mcp-router/types.js +6 -0
- package/dist/mcp-router/types.js.map +1 -0
- package/dist/memory/adaptive-context.d.ts +146 -0
- package/dist/memory/adaptive-context.d.ts.map +1 -0
- package/dist/memory/adaptive-context.js +1022 -0
- package/dist/memory/adaptive-context.js.map +1 -0
- package/dist/memory/agent-scoped-memory.d.ts +67 -0
- package/dist/memory/agent-scoped-memory.d.ts.map +1 -0
- package/dist/memory/agent-scoped-memory.js +126 -0
- package/dist/memory/agent-scoped-memory.js.map +1 -0
- package/dist/memory/backends/base.d.ts +18 -0
- package/dist/memory/backends/base.d.ts.map +1 -0
- package/dist/memory/backends/base.js +2 -0
- package/dist/memory/backends/base.js.map +1 -0
- package/dist/memory/backends/factory.d.ts +4 -0
- package/dist/memory/backends/factory.d.ts.map +1 -0
- package/dist/memory/backends/factory.js +53 -0
- package/dist/memory/backends/factory.js.map +1 -0
- package/dist/memory/backends/github.d.ts +22 -0
- package/dist/memory/backends/github.d.ts.map +1 -0
- package/dist/memory/backends/github.js +118 -0
- package/dist/memory/backends/github.js.map +1 -0
- package/dist/memory/backends/qdrant-cloud.d.ts +32 -0
- package/dist/memory/backends/qdrant-cloud.d.ts.map +1 -0
- package/dist/memory/backends/qdrant-cloud.js +168 -0
- package/dist/memory/backends/qdrant-cloud.js.map +1 -0
- package/dist/memory/context-compressor.d.ts +74 -0
- package/dist/memory/context-compressor.d.ts.map +1 -0
- package/dist/memory/context-compressor.js +289 -0
- package/dist/memory/context-compressor.js.map +1 -0
- package/dist/memory/correction-propagator.d.ts +44 -0
- package/dist/memory/correction-propagator.d.ts.map +1 -0
- package/dist/memory/correction-propagator.js +156 -0
- package/dist/memory/correction-propagator.js.map +1 -0
- package/dist/memory/daily-log.d.ts +67 -0
- package/dist/memory/daily-log.d.ts.map +1 -0
- package/dist/memory/daily-log.js +143 -0
- package/dist/memory/daily-log.js.map +1 -0
- package/dist/memory/dynamic-retrieval.d.ts +110 -0
- package/dist/memory/dynamic-retrieval.d.ts.map +1 -0
- package/dist/memory/dynamic-retrieval.js +688 -0
- package/dist/memory/dynamic-retrieval.js.map +1 -0
- package/dist/memory/embeddings.d.ts +116 -0
- package/dist/memory/embeddings.d.ts.map +1 -0
- package/dist/memory/embeddings.js +461 -0
- package/dist/memory/embeddings.js.map +1 -0
- package/dist/memory/hierarchical-memory.d.ts +141 -0
- package/dist/memory/hierarchical-memory.d.ts.map +1 -0
- package/dist/memory/hierarchical-memory.js +477 -0
- package/dist/memory/hierarchical-memory.js.map +1 -0
- package/dist/memory/memory-consolidator.d.ts +124 -0
- package/dist/memory/memory-consolidator.d.ts.map +1 -0
- package/dist/memory/memory-consolidator.js +514 -0
- package/dist/memory/memory-consolidator.js.map +1 -0
- package/dist/memory/memory-maintenance.d.ts +39 -0
- package/dist/memory/memory-maintenance.d.ts.map +1 -0
- package/dist/memory/memory-maintenance.js +305 -0
- package/dist/memory/memory-maintenance.js.map +1 -0
- package/dist/memory/model-router.d.ts +102 -0
- package/dist/memory/model-router.d.ts.map +1 -0
- package/dist/memory/model-router.js +448 -0
- package/dist/memory/model-router.js.map +1 -0
- package/dist/memory/multi-view-memory.d.ts +134 -0
- package/dist/memory/multi-view-memory.d.ts.map +1 -0
- package/dist/memory/multi-view-memory.js +420 -0
- package/dist/memory/multi-view-memory.js.map +1 -0
- package/dist/memory/prepopulate.d.ts +76 -0
- package/dist/memory/prepopulate.d.ts.map +1 -0
- package/dist/memory/prepopulate.js +815 -0
- package/dist/memory/prepopulate.js.map +1 -0
- package/dist/memory/semantic-compression.d.ts +77 -0
- package/dist/memory/semantic-compression.d.ts.map +1 -0
- package/dist/memory/semantic-compression.js +348 -0
- package/dist/memory/semantic-compression.js.map +1 -0
- package/dist/memory/serverless-qdrant.d.ts +102 -0
- package/dist/memory/serverless-qdrant.d.ts.map +1 -0
- package/dist/memory/serverless-qdrant.js +369 -0
- package/dist/memory/serverless-qdrant.js.map +1 -0
- package/dist/memory/short-term/factory.d.ts +26 -0
- package/dist/memory/short-term/factory.d.ts.map +1 -0
- package/dist/memory/short-term/factory.js +28 -0
- package/dist/memory/short-term/factory.js.map +1 -0
- package/dist/memory/short-term/indexeddb.d.ts +25 -0
- package/dist/memory/short-term/indexeddb.d.ts.map +1 -0
- package/dist/memory/short-term/indexeddb.js +64 -0
- package/dist/memory/short-term/indexeddb.js.map +1 -0
- package/dist/memory/short-term/schema.d.ts +6 -0
- package/dist/memory/short-term/schema.d.ts.map +1 -0
- package/dist/memory/short-term/schema.js +119 -0
- package/dist/memory/short-term/schema.js.map +1 -0
- package/dist/memory/short-term/sqlite.d.ts +50 -0
- package/dist/memory/short-term/sqlite.d.ts.map +1 -0
- package/dist/memory/short-term/sqlite.js +221 -0
- package/dist/memory/short-term/sqlite.js.map +1 -0
- package/dist/memory/speculative-cache.d.ts +111 -0
- package/dist/memory/speculative-cache.d.ts.map +1 -0
- package/dist/memory/speculative-cache.js +409 -0
- package/dist/memory/speculative-cache.js.map +1 -0
- package/dist/memory/task-classifier.d.ts +34 -0
- package/dist/memory/task-classifier.d.ts.map +1 -0
- package/dist/memory/task-classifier.js +300 -0
- package/dist/memory/task-classifier.js.map +1 -0
- package/dist/memory/terminal-bench-knowledge.d.ts +48 -0
- package/dist/memory/terminal-bench-knowledge.d.ts.map +1 -0
- package/dist/memory/terminal-bench-knowledge.js +399 -0
- package/dist/memory/terminal-bench-knowledge.js.map +1 -0
- package/dist/memory/write-gate.d.ts +39 -0
- package/dist/memory/write-gate.d.ts.map +1 -0
- package/dist/memory/write-gate.js +190 -0
- package/dist/memory/write-gate.js.map +1 -0
- package/dist/models/executor.d.ts +130 -0
- package/dist/models/executor.d.ts.map +1 -0
- package/dist/models/executor.js +383 -0
- package/dist/models/executor.js.map +1 -0
- package/dist/models/index.d.ts +15 -0
- package/dist/models/index.d.ts.map +1 -0
- package/dist/models/index.js +17 -0
- package/dist/models/index.js.map +1 -0
- package/dist/models/planner.d.ts +71 -0
- package/dist/models/planner.d.ts.map +1 -0
- package/dist/models/planner.js +344 -0
- package/dist/models/planner.js.map +1 -0
- package/dist/models/router.d.ts +75 -0
- package/dist/models/router.d.ts.map +1 -0
- package/dist/models/router.js +344 -0
- package/dist/models/router.js.map +1 -0
- package/dist/models/types.d.ts +370 -0
- package/dist/models/types.d.ts.map +1 -0
- package/dist/models/types.js +181 -0
- package/dist/models/types.js.map +1 -0
- package/dist/tasks/coordination.d.ts +74 -0
- package/dist/tasks/coordination.d.ts.map +1 -0
- package/dist/tasks/coordination.js +237 -0
- package/dist/tasks/coordination.js.map +1 -0
- package/dist/tasks/database.d.ts +14 -0
- package/dist/tasks/database.d.ts.map +1 -0
- package/dist/tasks/database.js +128 -0
- package/dist/tasks/database.js.map +1 -0
- package/dist/tasks/index.d.ts +5 -0
- package/dist/tasks/index.d.ts.map +1 -0
- package/dist/tasks/index.js +5 -0
- package/dist/tasks/index.js.map +1 -0
- package/dist/tasks/service.d.ts +39 -0
- package/dist/tasks/service.d.ts.map +1 -0
- package/dist/tasks/service.js +582 -0
- package/dist/tasks/service.js.map +1 -0
- package/dist/tasks/types.d.ts +224 -0
- package/dist/tasks/types.d.ts.map +1 -0
- package/dist/tasks/types.js +64 -0
- package/dist/tasks/types.js.map +1 -0
- package/dist/types/analysis.d.ts +82 -0
- package/dist/types/analysis.d.ts.map +1 -0
- package/dist/types/analysis.js +2 -0
- package/dist/types/analysis.js.map +1 -0
- package/dist/types/config.d.ts +3023 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/config.js +292 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/coordination.d.ts +240 -0
- package/dist/types/coordination.d.ts.map +1 -0
- package/dist/types/coordination.js +43 -0
- package/dist/types/coordination.js.map +1 -0
- package/dist/types/index.d.ts +4 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +4 -0
- package/dist/types/index.js.map +1 -0
- package/dist/utils/calculate-average.d.ts +15 -0
- package/dist/utils/calculate-average.d.ts.map +1 -0
- package/dist/utils/calculate-average.js +21 -0
- package/dist/utils/calculate-average.js.map +1 -0
- package/dist/utils/config-manager.d.ts +30 -0
- package/dist/utils/config-manager.d.ts.map +1 -0
- package/dist/utils/config-manager.js +41 -0
- package/dist/utils/config-manager.js.map +1 -0
- package/dist/utils/dijkstra.d.ts +17 -0
- package/dist/utils/dijkstra.d.ts.map +1 -0
- package/dist/utils/dijkstra.js +91 -0
- package/dist/utils/dijkstra.js.map +1 -0
- package/dist/utils/fetch-with-retry.d.ts +5 -0
- package/dist/utils/fetch-with-retry.d.ts.map +1 -0
- package/dist/utils/fetch-with-retry.js +61 -0
- package/dist/utils/fetch-with-retry.js.map +1 -0
- package/dist/utils/merge-claude-md.d.ts +28 -0
- package/dist/utils/merge-claude-md.d.ts.map +1 -0
- package/dist/utils/merge-claude-md.js +342 -0
- package/dist/utils/merge-claude-md.js.map +1 -0
- package/dist/utils/order-processor-refactored.d.ts +126 -0
- package/dist/utils/order-processor-refactored.d.ts.map +1 -0
- package/dist/utils/order-processor-refactored.js +165 -0
- package/dist/utils/order-processor-refactored.js.map +1 -0
- package/dist/utils/order-processor-strategy.d.ts +72 -0
- package/dist/utils/order-processor-strategy.d.ts.map +1 -0
- package/dist/utils/order-processor-strategy.js +158 -0
- package/dist/utils/order-processor-strategy.js.map +1 -0
- package/dist/utils/order-processor.d.ts +242 -0
- package/dist/utils/order-processor.d.ts.map +1 -0
- package/dist/utils/order-processor.js +370 -0
- package/dist/utils/order-processor.js.map +1 -0
- package/dist/utils/rate-limiter-simple.d.ts +58 -0
- package/dist/utils/rate-limiter-simple.d.ts.map +1 -0
- package/dist/utils/rate-limiter-simple.js +100 -0
- package/dist/utils/rate-limiter-simple.js.map +1 -0
- package/dist/utils/rate-limiter.d.ts +62 -0
- package/dist/utils/rate-limiter.d.ts.map +1 -0
- package/dist/utils/rate-limiter.js +150 -0
- package/dist/utils/rate-limiter.js.map +1 -0
- package/dist/utils/string-similarity.d.ts +37 -0
- package/dist/utils/string-similarity.d.ts.map +1 -0
- package/dist/utils/string-similarity.js +114 -0
- package/dist/utils/string-similarity.js.map +1 -0
- package/dist/utils/validate-json.d.ts +51 -0
- package/dist/utils/validate-json.d.ts.map +1 -0
- package/dist/utils/validate-json.js +99 -0
- package/dist/utils/validate-json.js.map +1 -0
- package/package.json +96 -0
- package/templates/CLAUDE.template.md +11 -0
- package/templates/CLAUDE_ARCHITECTURE.template.md +103 -0
- package/templates/CLAUDE_CODING.template.md +125 -0
- package/templates/CLAUDE_DROIDS.template.md +109 -0
- package/templates/CLAUDE_MEMORY.template.md +130 -0
- package/templates/CLAUDE_WORKFLOWS.template.md +136 -0
- package/templates/PROJECT.template.md +209 -0
- package/templates/SCHEMA.md +57 -0
- package/templates/archive/CLAUDE.template.root-v6.md +762 -0
- package/templates/archive/CLAUDE.template.v6.md +762 -0
- package/templates/hooks/pre-compact.sh +68 -0
- package/templates/hooks/session-start.sh +106 -0
- package/tools/agents/README.md +224 -0
- package/tools/agents/UAP/README.md +351 -0
- package/tools/agents/UAP/__init__.py +9 -0
- package/tools/agents/UAP/cli.py +675 -0
- package/tools/agents/UAP/version.py +2 -0
- package/tools/agents/benchmarks/benchmark_memory_systems.py +637 -0
- package/tools/agents/benchmarks/results/benchmark_20260106_064817.json +170 -0
- package/tools/agents/benchmarks/results/benchmark_20260106_064817.md +51 -0
- package/tools/agents/config/chat_template.jinja +172 -0
- package/tools/agents/docker-compose.qdrant.yml +24 -0
- package/tools/agents/migrations/apply.py +256 -0
- package/tools/agents/scripts/fix_qwen_chat_template.py +314 -0
- package/tools/agents/scripts/init_qdrant.py +151 -0
- package/tools/agents/scripts/memory_migration.py +518 -0
- package/tools/agents/scripts/migrate_memory_to_qdrant.py +113 -0
- package/tools/agents/scripts/query_memory.py +189 -0
- package/tools/agents/scripts/qwen_tool_call_test.py +419 -0
- package/tools/agents/scripts/qwen_tool_call_wrapper.py +517 -0
- package/tools/agents/scripts/start-services.sh +96 -0
- package/tools/agents/tests/test_uap_compliance.py +257 -0
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Query agent memory systems (both short-term SQLite and long-term Qdrant).
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
# Query short-term memory (recent actions)
|
|
7
|
+
agents/.venv/bin/python agents/scripts/query_memory.py short
|
|
8
|
+
|
|
9
|
+
# Semantic search in long-term memory
|
|
10
|
+
agents/.venv/bin/python agents/scripts/query_memory.py long "Redis caching"
|
|
11
|
+
|
|
12
|
+
# Add to short-term memory
|
|
13
|
+
agents/.venv/bin/python agents/scripts/query_memory.py add action "Deployed new feature X"
|
|
14
|
+
|
|
15
|
+
# Add to long-term memory (with embedding)
|
|
16
|
+
agents/.venv/bin/python agents/scripts/query_memory.py store lesson "Always check network policies" --tags networking,kubernetes --importance 8
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
import json
|
|
21
|
+
import sqlite3
|
|
22
|
+
import uuid
|
|
23
|
+
from datetime import datetime
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
from sentence_transformers import SentenceTransformer
|
|
28
|
+
from qdrant_client import QdrantClient
|
|
29
|
+
from qdrant_client.models import PointStruct
|
|
30
|
+
QDRANT_AVAILABLE = True
|
|
31
|
+
except ImportError:
|
|
32
|
+
QDRANT_AVAILABLE = False
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_project_root():
|
|
36
|
+
return Path(__file__).parent.parent.parent
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def query_short_term(limit=50):
|
|
40
|
+
"""Query short-term SQLite memory."""
|
|
41
|
+
db_path = get_project_root() / "agents/data/memory/short_term.db"
|
|
42
|
+
|
|
43
|
+
if not db_path.exists():
|
|
44
|
+
print("Short-term memory not initialized.")
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
conn = sqlite3.connect(db_path)
|
|
48
|
+
cursor = conn.cursor()
|
|
49
|
+
|
|
50
|
+
cursor.execute("""
|
|
51
|
+
SELECT id, timestamp, type, content
|
|
52
|
+
FROM memories
|
|
53
|
+
ORDER BY id DESC
|
|
54
|
+
LIMIT ?
|
|
55
|
+
""", (limit,))
|
|
56
|
+
|
|
57
|
+
rows = cursor.fetchall()
|
|
58
|
+
conn.close()
|
|
59
|
+
|
|
60
|
+
print(f"=== Short-term Memory (last {len(rows)} entries) ===\n")
|
|
61
|
+
for row in rows:
|
|
62
|
+
id_, timestamp, type_, content = row
|
|
63
|
+
print(f"[{id_:3d}] {timestamp} [{type_:11s}]")
|
|
64
|
+
print(f" {content[:100]}{'...' if len(content) > 100 else ''}")
|
|
65
|
+
print()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def add_short_term(type_: str, content: str):
|
|
69
|
+
"""Add entry to short-term memory."""
|
|
70
|
+
db_path = get_project_root() / "agents/data/memory/short_term.db"
|
|
71
|
+
|
|
72
|
+
conn = sqlite3.connect(db_path)
|
|
73
|
+
cursor = conn.cursor()
|
|
74
|
+
|
|
75
|
+
timestamp = datetime.utcnow().isoformat() + "Z"
|
|
76
|
+
cursor.execute(
|
|
77
|
+
"INSERT INTO memories (timestamp, type, content) VALUES (?, ?, ?)",
|
|
78
|
+
(timestamp, type_, content)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
conn.commit()
|
|
82
|
+
print(f"Added to short-term memory: [{type_}] {content[:50]}...")
|
|
83
|
+
conn.close()
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def query_long_term(query: str, limit=5):
|
|
87
|
+
"""Semantic search in long-term Qdrant memory."""
|
|
88
|
+
if not QDRANT_AVAILABLE:
|
|
89
|
+
print("Qdrant client not available. Install with:")
|
|
90
|
+
print(" agents/.venv/bin/pip install sentence-transformers qdrant-client")
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
client = QdrantClient(host="localhost", port=6333)
|
|
94
|
+
model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
95
|
+
|
|
96
|
+
query_embedding = model.encode(query).tolist()
|
|
97
|
+
|
|
98
|
+
results = client.query_points(
|
|
99
|
+
collection_name="claude_memory",
|
|
100
|
+
query=query_embedding,
|
|
101
|
+
limit=limit
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
print(f"=== Long-term Memory Search: '{query}' ===\n")
|
|
105
|
+
for i, point in enumerate(results.points, 1):
|
|
106
|
+
payload = point.payload
|
|
107
|
+
print(f"{i}. [{payload['type']:10s}] Score: {point.score:.3f}")
|
|
108
|
+
print(f" Tags: {', '.join(payload.get('tags', []))}")
|
|
109
|
+
print(f" {payload['content'][:100]}...")
|
|
110
|
+
print()
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def store_long_term(type_: str, content: str, tags: list, importance: int):
|
|
114
|
+
"""Store new entry in long-term Qdrant memory."""
|
|
115
|
+
if not QDRANT_AVAILABLE:
|
|
116
|
+
print("Qdrant client not available. Install with:")
|
|
117
|
+
print(" agents/.venv/bin/pip install sentence-transformers qdrant-client")
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
client = QdrantClient(host="localhost", port=6333)
|
|
121
|
+
model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
122
|
+
|
|
123
|
+
embedding = model.encode(content).tolist()
|
|
124
|
+
|
|
125
|
+
point = PointStruct(
|
|
126
|
+
id=str(uuid.uuid4()),
|
|
127
|
+
vector=embedding,
|
|
128
|
+
payload={
|
|
129
|
+
"original_id": f"{type_}-{uuid.uuid4().hex[:8]}",
|
|
130
|
+
"type": type_,
|
|
131
|
+
"tags": tags,
|
|
132
|
+
"importance": importance,
|
|
133
|
+
"content": content,
|
|
134
|
+
"timestamp": datetime.utcnow().isoformat() + "Z"
|
|
135
|
+
}
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
client.upsert(
|
|
139
|
+
collection_name="claude_memory",
|
|
140
|
+
points=[point]
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
print(f"Stored in long-term memory: [{type_}] importance={importance}")
|
|
144
|
+
print(f" Tags: {', '.join(tags)}")
|
|
145
|
+
print(f" {content[:80]}...")
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def main():
|
|
149
|
+
parser = argparse.ArgumentParser(description="Query agent memory systems")
|
|
150
|
+
subparsers = parser.add_subparsers(dest="command", help="Command")
|
|
151
|
+
|
|
152
|
+
# Short-term query
|
|
153
|
+
short_parser = subparsers.add_parser("short", help="Query short-term memory")
|
|
154
|
+
short_parser.add_argument("-n", "--limit", type=int, default=50, help="Number of entries")
|
|
155
|
+
|
|
156
|
+
# Long-term query
|
|
157
|
+
long_parser = subparsers.add_parser("long", help="Semantic search in long-term memory")
|
|
158
|
+
long_parser.add_argument("query", help="Search query")
|
|
159
|
+
long_parser.add_argument("-n", "--limit", type=int, default=5, help="Number of results")
|
|
160
|
+
|
|
161
|
+
# Add to short-term
|
|
162
|
+
add_parser = subparsers.add_parser("add", help="Add to short-term memory")
|
|
163
|
+
add_parser.add_argument("type", choices=["action", "observation", "thought", "goal"])
|
|
164
|
+
add_parser.add_argument("content", help="Memory content")
|
|
165
|
+
|
|
166
|
+
# Store in long-term
|
|
167
|
+
store_parser = subparsers.add_parser("store", help="Store in long-term memory")
|
|
168
|
+
store_parser.add_argument("type", choices=["fact", "skill", "preference", "lesson", "discovery"])
|
|
169
|
+
store_parser.add_argument("content", help="Memory content")
|
|
170
|
+
store_parser.add_argument("--tags", default="", help="Comma-separated tags")
|
|
171
|
+
store_parser.add_argument("--importance", type=int, default=5, help="Importance 1-10")
|
|
172
|
+
|
|
173
|
+
args = parser.parse_args()
|
|
174
|
+
|
|
175
|
+
if args.command == "short":
|
|
176
|
+
query_short_term(args.limit)
|
|
177
|
+
elif args.command == "long":
|
|
178
|
+
query_long_term(args.query, args.limit)
|
|
179
|
+
elif args.command == "add":
|
|
180
|
+
add_short_term(args.type, args.content)
|
|
181
|
+
elif args.command == "store":
|
|
182
|
+
tags = [t.strip() for t in args.tags.split(",") if t.strip()]
|
|
183
|
+
store_long_term(args.type, args.content, tags, args.importance)
|
|
184
|
+
else:
|
|
185
|
+
parser.print_help()
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
if __name__ == "__main__":
|
|
189
|
+
main()
|
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Qwen3.5 Tool Call Reliability Test Suite
|
|
4
|
+
|
|
5
|
+
Tests tool calling reliability across different scenarios:
|
|
6
|
+
1. Single tool call (baseline)
|
|
7
|
+
2. Multiple consecutive tool calls
|
|
8
|
+
3. Long context scenarios
|
|
9
|
+
4. With and without template fixes
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
python3 qwen_tool_call_test.py [--verbose] [--iterations N]
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import sys
|
|
16
|
+
import time
|
|
17
|
+
import argparse
|
|
18
|
+
import logging
|
|
19
|
+
from typing import List, Dict, Any, Tuple
|
|
20
|
+
from dataclasses import dataclass, asdict
|
|
21
|
+
from datetime import datetime
|
|
22
|
+
import json
|
|
23
|
+
|
|
24
|
+
# Add parent directory to path for imports
|
|
25
|
+
sys.path.insert(0, str(Path(__file__).parent))
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
from qwen_tool_call_wrapper import Qwen35ToolCallClient, Qwen35ToolCallError
|
|
29
|
+
except ImportError:
|
|
30
|
+
print("❌ Error: qwen_tool_call_wrapper.py not found")
|
|
31
|
+
print(" Run from: tools/agents/")
|
|
32
|
+
sys.exit(1)
|
|
33
|
+
|
|
34
|
+
# Configure logging
|
|
35
|
+
logging.basicConfig(
|
|
36
|
+
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
|
|
37
|
+
)
|
|
38
|
+
logger = logging.getLogger("qwen35_test")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class TestResult:
|
|
43
|
+
"""Result of a single test"""
|
|
44
|
+
|
|
45
|
+
test_name: str
|
|
46
|
+
success: bool
|
|
47
|
+
latency_ms: float
|
|
48
|
+
attempts: int
|
|
49
|
+
error: str = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class TestSummary:
|
|
54
|
+
"""Summary of test results"""
|
|
55
|
+
|
|
56
|
+
total_tests: int
|
|
57
|
+
passed_tests: int
|
|
58
|
+
failed_tests: int
|
|
59
|
+
results: List[TestResult]
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def success_rate(self) -> float:
|
|
63
|
+
if self.total_tests == 0:
|
|
64
|
+
return 0.0
|
|
65
|
+
return self.passed_tests / self.total_tests * 100
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class Qwen35TestSuite:
|
|
69
|
+
"""Test suite for Qwen3.5 tool calling reliability"""
|
|
70
|
+
|
|
71
|
+
def __init__(self, client: Qwen35ToolCallClient, verbose: bool = False):
|
|
72
|
+
self.client = client
|
|
73
|
+
self.verbose = verbose
|
|
74
|
+
self.results: List[TestResult] = []
|
|
75
|
+
|
|
76
|
+
# Define test tools
|
|
77
|
+
self.tools = [
|
|
78
|
+
{
|
|
79
|
+
"type": "function",
|
|
80
|
+
"function": {
|
|
81
|
+
"name": "read_file",
|
|
82
|
+
"description": "Read file contents from specified path",
|
|
83
|
+
"parameters": {
|
|
84
|
+
"type": "object",
|
|
85
|
+
"properties": {
|
|
86
|
+
"path": {
|
|
87
|
+
"type": "string",
|
|
88
|
+
"description": "Absolute file path",
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
"required": ["path"],
|
|
92
|
+
},
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
"type": "function",
|
|
97
|
+
"function": {
|
|
98
|
+
"name": "calculate",
|
|
99
|
+
"description": "Perform mathematical calculation",
|
|
100
|
+
"parameters": {
|
|
101
|
+
"type": "object",
|
|
102
|
+
"properties": {
|
|
103
|
+
"operation": {
|
|
104
|
+
"type": "string",
|
|
105
|
+
"enum": ["add", "subtract", "multiply", "divide"],
|
|
106
|
+
},
|
|
107
|
+
"a": {"type": "number"},
|
|
108
|
+
"b": {"type": "number"},
|
|
109
|
+
},
|
|
110
|
+
"required": ["operation", "a", "b"],
|
|
111
|
+
},
|
|
112
|
+
},
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
"type": "function",
|
|
116
|
+
"function": {
|
|
117
|
+
"name": "get_system_info",
|
|
118
|
+
"description": "Get system information",
|
|
119
|
+
"parameters": {
|
|
120
|
+
"type": "object",
|
|
121
|
+
"properties": {
|
|
122
|
+
"info_type": {
|
|
123
|
+
"type": "string",
|
|
124
|
+
"enum": ["cpu", "memory", "disk", "all"],
|
|
125
|
+
}
|
|
126
|
+
},
|
|
127
|
+
"required": ["info_type"],
|
|
128
|
+
},
|
|
129
|
+
},
|
|
130
|
+
},
|
|
131
|
+
]
|
|
132
|
+
|
|
133
|
+
def run_test(
|
|
134
|
+
self,
|
|
135
|
+
test_name: str,
|
|
136
|
+
messages: List[Dict],
|
|
137
|
+
expected_tool_calls: int = 1,
|
|
138
|
+
timeout: int = 60,
|
|
139
|
+
) -> TestResult:
|
|
140
|
+
"""Run a single test"""
|
|
141
|
+
start_time = time.time()
|
|
142
|
+
attempts = 0
|
|
143
|
+
success = False
|
|
144
|
+
error = None
|
|
145
|
+
latency_ms = 0
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
for attempt in range(3): # Allow up to 3 attempts per test
|
|
149
|
+
attempts += 1
|
|
150
|
+
|
|
151
|
+
response = self.client.chat_with_tools(
|
|
152
|
+
messages=messages, tools=self.tools, timeout=timeout
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
tool_calls = response.choices[0].message.tool_calls
|
|
156
|
+
|
|
157
|
+
if tool_calls and len(tool_calls) >= expected_tool_calls:
|
|
158
|
+
success = True
|
|
159
|
+
break
|
|
160
|
+
else:
|
|
161
|
+
if self.verbose:
|
|
162
|
+
logger.warning(
|
|
163
|
+
f"Test '{test_name}' attempt {attempt + 1}: "
|
|
164
|
+
f"Expected {expected_tool_calls} tool calls, "
|
|
165
|
+
f"got {len(tool_calls) if tool_calls else 0}"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
169
|
+
|
|
170
|
+
except Qwen35ToolCallError as e:
|
|
171
|
+
error = str(e)
|
|
172
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
173
|
+
if self.verbose:
|
|
174
|
+
logger.error(f"Test '{test_name}' failed: {error}")
|
|
175
|
+
|
|
176
|
+
result = TestResult(
|
|
177
|
+
test_name=test_name,
|
|
178
|
+
success=success,
|
|
179
|
+
latency_ms=latency_ms,
|
|
180
|
+
attempts=attempts,
|
|
181
|
+
error=error,
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
self.results.append(result)
|
|
185
|
+
|
|
186
|
+
status = "✅ PASS" if success else "❌ FAIL"
|
|
187
|
+
logger.info(
|
|
188
|
+
f"{status} - {test_name} ({latency_ms:.0f}ms, {attempts} attempt(s))"
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
return result
|
|
192
|
+
|
|
193
|
+
def test_single_tool_call(self) -> TestResult:
|
|
194
|
+
"""Test 1: Single tool call (baseline)"""
|
|
195
|
+
return self.run_test(
|
|
196
|
+
"Single Tool Call",
|
|
197
|
+
[{"role": "user", "content": "Read file at /etc/hostname"}],
|
|
198
|
+
expected_tool_calls=1,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
def test_two_consecutive_tool_calls(self) -> TestResult:
|
|
202
|
+
"""Test 2: Two consecutive tool calls"""
|
|
203
|
+
return self.run_test(
|
|
204
|
+
"Two Consecutive Tool Calls",
|
|
205
|
+
[{"role": "user", "content": "Read /etc/hostname and calculate 5 + 3"}],
|
|
206
|
+
expected_tool_calls=2,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
def test_three_tool_calls(self) -> TestResult:
|
|
210
|
+
"""Test 3: Three consecutive tool calls"""
|
|
211
|
+
return self.run_test(
|
|
212
|
+
"Three Tool Calls",
|
|
213
|
+
[
|
|
214
|
+
{
|
|
215
|
+
"role": "user",
|
|
216
|
+
"content": "Read /etc/hostname, calculate 10 * 5, and get system info for cpu",
|
|
217
|
+
}
|
|
218
|
+
],
|
|
219
|
+
expected_tool_calls=3,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
def test_five_tool_calls(self) -> TestResult:
|
|
223
|
+
"""Test 4: Five consecutive tool calls (stress test)"""
|
|
224
|
+
return self.run_test(
|
|
225
|
+
"Five Tool Calls (Stress)",
|
|
226
|
+
[
|
|
227
|
+
{
|
|
228
|
+
"role": "user",
|
|
229
|
+
"content": """
|
|
230
|
+
Read /etc/hostname
|
|
231
|
+
Calculate 100 / 4
|
|
232
|
+
Calculate 7 * 8
|
|
233
|
+
Get system info for memory
|
|
234
|
+
Get system info for disk
|
|
235
|
+
""",
|
|
236
|
+
}
|
|
237
|
+
],
|
|
238
|
+
expected_tool_calls=5,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
def test_with_reasoning_content(self) -> TestResult:
|
|
242
|
+
"""Test 5: Test that reasoning content doesn't interfere"""
|
|
243
|
+
# This test verifies the thinking mode is disabled
|
|
244
|
+
messages = [
|
|
245
|
+
{"role": "system", "content": "Think step by step before answering"},
|
|
246
|
+
{"role": "user", "content": "Read /etc/hosts"},
|
|
247
|
+
]
|
|
248
|
+
|
|
249
|
+
return self.run_test("Reasoning Content Test", messages, expected_tool_calls=1)
|
|
250
|
+
|
|
251
|
+
def test_invalid_tool_format_recovery(self) -> TestResult:
|
|
252
|
+
"""Test 6: Recovery from invalid tool format"""
|
|
253
|
+
# This tests the retry logic
|
|
254
|
+
return self.run_test(
|
|
255
|
+
"Invalid Format Recovery",
|
|
256
|
+
[{"role": "user", "content": "Call read_file with path /test.txt"}],
|
|
257
|
+
expected_tool_calls=1,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
def run_all_tests(self) -> TestSummary:
|
|
261
|
+
"""Run all tests"""
|
|
262
|
+
logger.info("=" * 70)
|
|
263
|
+
logger.info("Qwen3.5 Tool Call Reliability Test Suite")
|
|
264
|
+
logger.info("=" * 70)
|
|
265
|
+
logger.info(f"Model: {self.client.config['model']}")
|
|
266
|
+
logger.info(f"Base URL: {self.client.config['base_url']}")
|
|
267
|
+
logger.info(f"Temperature: {self.client.config['temperature']}")
|
|
268
|
+
logger.info(f"Thinking Mode: {self.client.config['enable_thinking']}")
|
|
269
|
+
logger.info("=" * 70)
|
|
270
|
+
logger.info("")
|
|
271
|
+
|
|
272
|
+
# Run tests
|
|
273
|
+
tests = [
|
|
274
|
+
self.test_single_tool_call,
|
|
275
|
+
self.test_two_consecutive_tool_calls,
|
|
276
|
+
self.test_three_tool_calls,
|
|
277
|
+
self.test_five_tool_calls,
|
|
278
|
+
self.test_with_reasoning_content,
|
|
279
|
+
self.test_invalid_tool_format_recovery,
|
|
280
|
+
]
|
|
281
|
+
|
|
282
|
+
for test in tests:
|
|
283
|
+
test()
|
|
284
|
+
if self.verbose:
|
|
285
|
+
time.sleep(1) # Small delay between tests
|
|
286
|
+
|
|
287
|
+
# Calculate summary
|
|
288
|
+
passed = sum(1 for r in self.results if r.success)
|
|
289
|
+
failed = len(self.results) - passed
|
|
290
|
+
|
|
291
|
+
summary = TestSummary(
|
|
292
|
+
total_tests=len(self.results),
|
|
293
|
+
passed_tests=passed,
|
|
294
|
+
failed_tests=failed,
|
|
295
|
+
results=self.results,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
return summary
|
|
299
|
+
|
|
300
|
+
def print_summary(self, summary: TestSummary):
|
|
301
|
+
"""Print test summary"""
|
|
302
|
+
print("\n" + "=" * 70)
|
|
303
|
+
print("TEST SUMMARY")
|
|
304
|
+
print("=" * 70)
|
|
305
|
+
print(f"Total Tests: {summary.total_tests}")
|
|
306
|
+
print(f"Passed: {summary.passed_tests}")
|
|
307
|
+
print(f"Failed: {summary.failed_tests}")
|
|
308
|
+
print(f"Success Rate: {summary.success_rate:.1f}%")
|
|
309
|
+
print("=" * 70)
|
|
310
|
+
|
|
311
|
+
print("\nDetailed Results:")
|
|
312
|
+
print("-" * 70)
|
|
313
|
+
|
|
314
|
+
for result in summary.results:
|
|
315
|
+
status = "✅ PASS" if result.success else "❌ FAIL"
|
|
316
|
+
print(f"{status} {result.test_name}")
|
|
317
|
+
print(
|
|
318
|
+
f" Latency: {result.latency_ms:.0f}ms | Attempts: {result.attempts}"
|
|
319
|
+
)
|
|
320
|
+
if result.error:
|
|
321
|
+
print(f" Error: {result.error[:100]}...")
|
|
322
|
+
print()
|
|
323
|
+
|
|
324
|
+
print("-" * 70)
|
|
325
|
+
|
|
326
|
+
# Performance analysis
|
|
327
|
+
if summary.success_rate >= 90:
|
|
328
|
+
print("✅ EXCELLENT: Tool calling is highly reliable")
|
|
329
|
+
elif summary.success_rate >= 70:
|
|
330
|
+
print("⚠️ GOOD: Tool calling is reliable with minor issues")
|
|
331
|
+
elif summary.success_rate >= 50:
|
|
332
|
+
print("❌ NEEDS IMPROVEMENT: Apply template fixes and retry logic")
|
|
333
|
+
else:
|
|
334
|
+
print("❌ CRITICAL: Tool calling is unreliable, review configuration")
|
|
335
|
+
|
|
336
|
+
print("=" * 70)
|
|
337
|
+
|
|
338
|
+
# Client metrics
|
|
339
|
+
metrics = self.client.get_metrics()
|
|
340
|
+
print("\nClient Metrics:")
|
|
341
|
+
print(f" Total Attempts: {metrics.total_attempts}")
|
|
342
|
+
print(f" Successful Calls: {metrics.successful_calls}")
|
|
343
|
+
print(f" Failed Calls: {metrics.failed_calls}")
|
|
344
|
+
print(f" Retries: {metrics.retries}")
|
|
345
|
+
print(f" Avg Latency: {metrics.avg_latency_ms:.0f}ms")
|
|
346
|
+
print("=" * 70)
|
|
347
|
+
|
|
348
|
+
return summary.success_rate >= 90
|
|
349
|
+
|
|
350
|
+
def save_results(self, filename: str = None):
|
|
351
|
+
"""Save test results to JSON file"""
|
|
352
|
+
if not filename:
|
|
353
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
354
|
+
filename = f"qwen35_test_results_{timestamp}.json"
|
|
355
|
+
|
|
356
|
+
data = {
|
|
357
|
+
"timestamp": datetime.now().isoformat(),
|
|
358
|
+
"model": self.client.config["model"],
|
|
359
|
+
"config": self.client.get_status(),
|
|
360
|
+
"summary": {
|
|
361
|
+
"total_tests": len(self.results),
|
|
362
|
+
"passed_tests": sum(1 for r in self.results if r.success),
|
|
363
|
+
"failed_tests": sum(1 for r in self.results if not r.success),
|
|
364
|
+
"success_rate": sum(1 for r in self.results if r.success)
|
|
365
|
+
/ len(self.results)
|
|
366
|
+
* 100
|
|
367
|
+
if self.results
|
|
368
|
+
else 0,
|
|
369
|
+
},
|
|
370
|
+
"results": [asdict(r) for r in self.results],
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
with open(filename, "w") as f:
|
|
374
|
+
json.dump(data, f, indent=2)
|
|
375
|
+
|
|
376
|
+
logger.info(f"Results saved to: {filename}")
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def main():
|
|
380
|
+
"""Main test execution"""
|
|
381
|
+
parser = argparse.ArgumentParser(description="Qwen3.5 Tool Call Reliability Test")
|
|
382
|
+
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
|
|
383
|
+
parser.add_argument("--output", "-o", type=str, help="Output results to JSON file")
|
|
384
|
+
args = parser.parse_args()
|
|
385
|
+
|
|
386
|
+
try:
|
|
387
|
+
# Initialize client
|
|
388
|
+
logger.info("Initializing Qwen3.5 tool call client...")
|
|
389
|
+
client = Qwen35ToolCallClient()
|
|
390
|
+
|
|
391
|
+
# Create test suite
|
|
392
|
+
test_suite = Qwen35TestSuite(client, verbose=args.verbose)
|
|
393
|
+
|
|
394
|
+
# Run tests
|
|
395
|
+
summary = test_suite.run_all_tests()
|
|
396
|
+
|
|
397
|
+
# Print summary
|
|
398
|
+
passed = test_suite.print_summary(summary)
|
|
399
|
+
|
|
400
|
+
# Save results
|
|
401
|
+
if args.output:
|
|
402
|
+
test_suite.save_results(args.output)
|
|
403
|
+
|
|
404
|
+
# Exit with appropriate code
|
|
405
|
+
sys.exit(0 if passed else 1)
|
|
406
|
+
|
|
407
|
+
except KeyboardInterrupt:
|
|
408
|
+
print("\n❌ Test interrupted by user")
|
|
409
|
+
sys.exit(1)
|
|
410
|
+
except Exception as e:
|
|
411
|
+
logger.error(f"Test failed with error: {e}")
|
|
412
|
+
import traceback
|
|
413
|
+
|
|
414
|
+
traceback.print_exc()
|
|
415
|
+
sys.exit(1)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
if __name__ == "__main__":
|
|
419
|
+
main()
|