@miller-tech/uap 1.9.1 → 1.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. package/README.md +285 -642
  2. package/config/chat_template.jinja +76 -0
  3. package/config/lora-finetune.yaml +82 -0
  4. package/config/model-profiles/claude-haiku-3.5.json +62 -0
  5. package/config/model-profiles/claude-opus-4.6.json +63 -0
  6. package/config/model-profiles/claude-sonnet-4.6.json +63 -0
  7. package/config/model-profiles/gemini-2.5-flash.json +62 -0
  8. package/config/model-profiles/gemini-2.5-pro.json +62 -0
  9. package/config/model-profiles/generic.json +54 -0
  10. package/config/model-profiles/glm-5.json +61 -0
  11. package/config/model-profiles/gpt-4.1.json +64 -0
  12. package/config/model-profiles/gpt-4o.json +64 -0
  13. package/config/model-profiles/gpt-o3.json +61 -0
  14. package/config/model-profiles/kimi-k2.5.json +70 -0
  15. package/config/model-profiles/llama.json +86 -0
  16. package/config/model-profiles/qwen35.json +218 -0
  17. package/dist/.tsbuildinfo +1 -1
  18. package/dist/bin/cli.js +78 -9
  19. package/dist/bin/cli.js.map +1 -1
  20. package/dist/bin/llama-server-optimize.js +0 -10
  21. package/dist/bin/llama-server-optimize.js.map +1 -1
  22. package/dist/bin/policy.js +11 -195
  23. package/dist/bin/policy.js.map +1 -1
  24. package/dist/cli/dashboard.d.ts.map +1 -1
  25. package/dist/cli/dashboard.js +10 -12
  26. package/dist/cli/dashboard.js.map +1 -1
  27. package/dist/cli/generate.d.ts +0 -7
  28. package/dist/cli/generate.d.ts.map +1 -1
  29. package/dist/cli/generate.js +10 -22
  30. package/dist/cli/generate.js.map +1 -1
  31. package/dist/cli/memory.d.ts.map +1 -1
  32. package/dist/cli/memory.js +43 -109
  33. package/dist/cli/memory.js.map +1 -1
  34. package/dist/cli/model.d.ts.map +1 -1
  35. package/dist/cli/model.js +8 -25
  36. package/dist/cli/model.js.map +1 -1
  37. package/dist/cli/patterns.d.ts.map +1 -1
  38. package/dist/cli/patterns.js +11 -9
  39. package/dist/cli/patterns.js.map +1 -1
  40. package/dist/cli/policy.d.ts.map +1 -1
  41. package/dist/cli/policy.js +165 -1
  42. package/dist/cli/policy.js.map +1 -1
  43. package/dist/cli/rtk-validation.d.ts +0 -4
  44. package/dist/cli/rtk-validation.d.ts.map +1 -1
  45. package/dist/cli/rtk-validation.js +5 -23
  46. package/dist/cli/rtk-validation.js.map +1 -1
  47. package/dist/cli/schema-diff.d.ts.map +1 -1
  48. package/dist/cli/schema-diff.js +25 -16
  49. package/dist/cli/schema-diff.js.map +1 -1
  50. package/dist/cli/setup-wizard.d.ts.map +1 -1
  51. package/dist/cli/setup-wizard.js +11 -7
  52. package/dist/cli/setup-wizard.js.map +1 -1
  53. package/dist/cli/setup.d.ts.map +1 -1
  54. package/dist/cli/setup.js +3 -4
  55. package/dist/cli/setup.js.map +1 -1
  56. package/dist/cli/visualize.d.ts +0 -13
  57. package/dist/cli/visualize.d.ts.map +1 -1
  58. package/dist/cli/visualize.js +1 -60
  59. package/dist/cli/visualize.js.map +1 -1
  60. package/dist/cli/worktree.d.ts.map +1 -1
  61. package/dist/cli/worktree.js +12 -13
  62. package/dist/cli/worktree.js.map +1 -1
  63. package/dist/coordination/adaptive-patterns.d.ts.map +1 -1
  64. package/dist/coordination/adaptive-patterns.js +7 -3
  65. package/dist/coordination/adaptive-patterns.js.map +1 -1
  66. package/dist/coordination/deploy-batcher.d.ts.map +1 -1
  67. package/dist/coordination/deploy-batcher.js +8 -8
  68. package/dist/coordination/deploy-batcher.js.map +1 -1
  69. package/dist/dashboard/data-service.d.ts.map +1 -1
  70. package/dist/dashboard/data-service.js +12 -13
  71. package/dist/dashboard/data-service.js.map +1 -1
  72. package/dist/dashboard/server.d.ts.map +1 -1
  73. package/dist/dashboard/server.js +74 -13
  74. package/dist/dashboard/server.js.map +1 -1
  75. package/dist/index.d.ts +4 -4
  76. package/dist/index.d.ts.map +1 -1
  77. package/dist/index.js +3 -3
  78. package/dist/index.js.map +1 -1
  79. package/dist/mcp-router/executor/client.d.ts +0 -6
  80. package/dist/mcp-router/executor/client.d.ts.map +1 -1
  81. package/dist/mcp-router/executor/client.js +1 -27
  82. package/dist/mcp-router/executor/client.js.map +1 -1
  83. package/dist/mcp-router/index.d.ts +1 -1
  84. package/dist/mcp-router/index.d.ts.map +1 -1
  85. package/dist/mcp-router/index.js.map +1 -1
  86. package/dist/mcp-router/output-compressor.js +114 -112
  87. package/dist/mcp-router/output-compressor.js.map +1 -1
  88. package/dist/mcp-router/types.d.ts +0 -5
  89. package/dist/mcp-router/types.d.ts.map +1 -1
  90. package/dist/memory/backends/github.d.ts.map +1 -1
  91. package/dist/memory/backends/github.js +21 -13
  92. package/dist/memory/backends/github.js.map +1 -1
  93. package/dist/memory/context-pruner.d.ts +2 -9
  94. package/dist/memory/context-pruner.d.ts.map +1 -1
  95. package/dist/memory/context-pruner.js +5 -22
  96. package/dist/memory/context-pruner.js.map +1 -1
  97. package/dist/memory/correction-propagator.d.ts.map +1 -1
  98. package/dist/memory/correction-propagator.js +19 -19
  99. package/dist/memory/correction-propagator.js.map +1 -1
  100. package/dist/memory/dynamic-retrieval.d.ts.map +1 -1
  101. package/dist/memory/dynamic-retrieval.js +263 -132
  102. package/dist/memory/dynamic-retrieval.js.map +1 -1
  103. package/dist/memory/embeddings.d.ts.map +1 -1
  104. package/dist/memory/embeddings.js +2 -15
  105. package/dist/memory/embeddings.js.map +1 -1
  106. package/dist/memory/hierarchical-memory.d.ts.map +1 -1
  107. package/dist/memory/hierarchical-memory.js +6 -0
  108. package/dist/memory/hierarchical-memory.js.map +1 -1
  109. package/dist/memory/knowledge-graph.d.ts.map +1 -1
  110. package/dist/memory/knowledge-graph.js +2 -1
  111. package/dist/memory/knowledge-graph.js.map +1 -1
  112. package/dist/memory/memory-consolidator.d.ts +1 -0
  113. package/dist/memory/memory-consolidator.d.ts.map +1 -1
  114. package/dist/memory/memory-consolidator.js +27 -3
  115. package/dist/memory/memory-consolidator.js.map +1 -1
  116. package/dist/memory/predictive-memory.d.ts +9 -1
  117. package/dist/memory/predictive-memory.d.ts.map +1 -1
  118. package/dist/memory/predictive-memory.js +77 -1
  119. package/dist/memory/predictive-memory.js.map +1 -1
  120. package/dist/memory/serverless-qdrant.d.ts +1 -0
  121. package/dist/memory/serverless-qdrant.d.ts.map +1 -1
  122. package/dist/memory/serverless-qdrant.js +3 -9
  123. package/dist/memory/serverless-qdrant.js.map +1 -1
  124. package/dist/memory/short-term/schema.d.ts.map +1 -1
  125. package/dist/memory/short-term/schema.js +44 -6
  126. package/dist/memory/short-term/schema.js.map +1 -1
  127. package/dist/memory/short-term/sqlite.d.ts +4 -3
  128. package/dist/memory/short-term/sqlite.d.ts.map +1 -1
  129. package/dist/memory/short-term/sqlite.js +3 -12
  130. package/dist/memory/short-term/sqlite.js.map +1 -1
  131. package/dist/memory/speculative-cache.js +2 -2
  132. package/dist/memory/speculative-cache.js.map +1 -1
  133. package/dist/models/executor.d.ts +21 -0
  134. package/dist/models/executor.d.ts.map +1 -1
  135. package/dist/models/executor.js +116 -4
  136. package/dist/models/executor.js.map +1 -1
  137. package/dist/models/planner.d.ts +1 -0
  138. package/dist/models/planner.d.ts.map +1 -1
  139. package/dist/models/planner.js +13 -1
  140. package/dist/models/planner.js.map +1 -1
  141. package/dist/policies/enforced-tool-router.d.ts +3 -1
  142. package/dist/policies/enforced-tool-router.d.ts.map +1 -1
  143. package/dist/policies/enforced-tool-router.js.map +1 -1
  144. package/dist/tasks/service.d.ts +1 -0
  145. package/dist/tasks/service.d.ts.map +1 -1
  146. package/dist/tasks/service.js +10 -6
  147. package/dist/tasks/service.js.map +1 -1
  148. package/dist/telemetry/session-telemetry.d.ts.map +1 -1
  149. package/dist/telemetry/session-telemetry.js +14 -11
  150. package/dist/telemetry/session-telemetry.js.map +1 -1
  151. package/dist/types/config.d.ts +426 -359
  152. package/dist/types/config.d.ts.map +1 -1
  153. package/dist/types/config.js +12 -67
  154. package/dist/types/config.js.map +1 -1
  155. package/dist/utils/config-loader.d.ts +34 -0
  156. package/dist/utils/config-loader.d.ts.map +1 -0
  157. package/dist/utils/config-loader.js +93 -0
  158. package/dist/utils/config-loader.js.map +1 -0
  159. package/dist/utils/lazy-imports.d.ts +18 -0
  160. package/dist/utils/lazy-imports.d.ts.map +1 -0
  161. package/dist/utils/lazy-imports.js +39 -0
  162. package/dist/utils/lazy-imports.js.map +1 -0
  163. package/dist/utils/stopwords.d.ts +12 -0
  164. package/dist/utils/stopwords.d.ts.map +1 -0
  165. package/dist/utils/stopwords.js +196 -0
  166. package/dist/utils/stopwords.js.map +1 -0
  167. package/dist/utils/string-similarity.d.ts +10 -3
  168. package/dist/utils/string-similarity.d.ts.map +1 -1
  169. package/dist/utils/string-similarity.js +49 -25
  170. package/dist/utils/string-similarity.js.map +1 -1
  171. package/docs/INDEX.md +35 -34
  172. package/package.json +13 -14
  173. package/scripts/maintenance/publish-npm.sh +82 -0
  174. package/scripts/maintenance/publish.sh +29 -0
  175. package/scripts/maintenance/update-droids.py +93 -0
  176. package/scripts/maintenance/update-skills.py +148 -0
  177. package/scripts/maintenance/update-uap-compliance.sh +45 -0
  178. package/scripts/maintenance/validate-skills.py +83 -0
  179. package/scripts/maintenance/verify-compliance.sh +117 -0
  180. package/scripts/setup/install-cloakbrowser.ts +14 -0
  181. package/scripts/setup/install-desktop.sh +105 -0
  182. package/scripts/setup/install-rtk.sh +184 -0
  183. package/scripts/setup/install-web.sh +73 -0
  184. package/scripts/setup/setup.sh +375 -0
  185. package/scripts/validate-build.sh +62 -0
  186. package/scripts/version-bump.sh +130 -0
  187. package/tools/agents/scripts/migrate_memory_to_qdrant.py +1 -1
  188. /package/docs/{BENCHMARK_GAPS_AND_PLAN.md → archive/BENCHMARK_GAPS_AND_PLAN.md} +0 -0
  189. /package/docs/{MODEL_ROUTING_IMPLEMENTATION_SUMMARY.md → archive/MODEL_ROUTING_IMPLEMENTATION_SUMMARY.md} +0 -0
  190. /package/docs/{MODEL_ROUTING_OPTIMIZATION_PLAN.md → archive/MODEL_ROUTING_OPTIMIZATION_PLAN.md} +0 -0
  191. /package/docs/{PARALLELISM_GAPS_AND_OPTIONS.md → archive/PARALLELISM_GAPS_AND_OPTIONS.md} +0 -0
  192. /package/docs/{POLICY_GATE_IMPLEMENTATION.md → archive/POLICY_GATE_IMPLEMENTATION.md} +0 -0
  193. /package/docs/{UAP_OPTIMIZATION_PLAN.md → archive/UAP_OPTIMIZATION_PLAN.md} +0 -0
  194. /package/docs/{opencode-integration-guide.md → archive/opencode-integration-guide.md} +0 -0
  195. /package/docs/{opencode-integration-quickref.md → archive/opencode-integration-quickref.md} +0 -0
@@ -0,0 +1,76 @@
1
+ {#- Qwen3.5 Chat Template - Optimized for 35B A3B tool calling -#}
2
+ {#- Compatible with llama.cpp / vLLM / OpenAI-compatible servers -#}
3
+ {#- Uses official Qwen3 tool call format for maximum reliability -#}
4
+ {%- if tools %}
5
+ {{- '<|im_start|>system\n' }}
6
+ {%- if messages[0].role == 'system' %}
7
+ {{- messages[0].content + '\n\n' }}
8
+ {%- endif %}
9
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
10
+ {%- for tool in tools %}
11
+ {{- "\n" }}
12
+ {{- tool | tojson }}
13
+ {%- endfor %}
14
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
15
+ {%- else %}
16
+ {%- if messages[0].role == 'system' %}
17
+ {{- '<|im_start|>system\n' + messages[0].content | trim + '<|im_end|>\n' }}
18
+ {%- endif %}
19
+ {%- endif %}
20
+
21
+ {%- for message in messages %}
22
+ {#- User message or non-first system message -#}
23
+ {%- if (message.role == 'user') or (message.role == 'system' and not loop.first) %}
24
+ {{- '<|im_start|>' + message.role + '\n' + message.content | trim + '<|im_end|>\n' }}
25
+
26
+ {#- Assistant message -#}
27
+ {%- elif message.role == 'assistant' %}
28
+ {{- '<|im_start|>assistant' }}
29
+ {%- if message.content is defined and message.content %}
30
+ {{- '\n' + message.content | trim }}
31
+ {%- endif %}
32
+ {%- if message.tool_calls is defined and message.tool_calls %}
33
+ {%- for tool_call in message.tool_calls %}
34
+ {%- if tool_call.function is defined %}
35
+ {%- set tc = tool_call.function %}
36
+ {%- else %}
37
+ {%- set tc = tool_call %}
38
+ {%- endif %}
39
+ {%- if (loop.first and message.content is defined and message.content) or (not loop.first) %}
40
+ {{- '\n' }}
41
+ {%- endif %}
42
+ {{- '<tool_call>\n{"name": "' }}
43
+ {{- tc.name }}
44
+ {{- '", "arguments": ' }}
45
+ {%- if tc.arguments is string %}
46
+ {{- tc.arguments }}
47
+ {%- else %}
48
+ {{- tc.arguments | tojson }}
49
+ {%- endif %}
50
+ {{- '}\n</tool_call>' }}
51
+ {%- endfor %}
52
+ {%- endif %}
53
+ {{- '<|im_end|>\n' }}
54
+
55
+ {#- Tool response (grouped under user role per Qwen3 spec) -#}
56
+ {%- elif message.role == 'tool' %}
57
+ {%- if loop.first or (messages[loop.index0 - 1].role != 'tool') %}
58
+ {{- '<|im_start|>user' }}
59
+ {%- endif %}
60
+ {{- '\n<tool_response>\n' }}
61
+ {{- message.content | trim }}
62
+ {{- '\n</tool_response>' }}
63
+ {%- if loop.last or (messages[loop.index0 + 1].role != 'tool') %}
64
+ {{- '<|im_end|>\n' }}
65
+ {%- endif %}
66
+
67
+ {%- endif %}
68
+ {%- endfor %}
69
+
70
+ {#- Generation prompt for assistant turn -#}
71
+ {%- if add_generation_prompt is defined and add_generation_prompt %}
72
+ {{- '<|im_start|>assistant\n' }}
73
+ {%- if enable_thinking is defined and enable_thinking is false %}
74
+ {{- '<think>\n\n</think>\n\n' }}
75
+ {%- endif %}
76
+ {%- endif %}
@@ -0,0 +1,82 @@
1
+ # LoRA Fine-Tune Configuration for Qwen3.5 35B A3B Tool Calling
2
+ # Compatible with: axolotl, LLaMA-Factory, unsloth
3
+ #
4
+ # This config trains a LoRA adapter to improve tool call format adherence.
5
+ # The adapter is small (~50MB) and can be merged into the base model or
6
+ # loaded at runtime with llama.cpp --lora flag.
7
+ #
8
+ # Usage:
9
+ # 1. Generate training data:
10
+ # python3 tools/agents/scripts/generate_lora_training_data.py -n 500
11
+ # 2. Fine-tune with axolotl:
12
+ # accelerate launch -m axolotl.cli.train config/lora-finetune.yaml
13
+ # 3. Or with unsloth (faster, less VRAM):
14
+ # unsloth train --config config/lora-finetune.yaml
15
+
16
+ # Base model
17
+ base_model: Qwen/Qwen3.5-35B-A3B
18
+ model_type: AutoModelForCausalLM
19
+ tokenizer_type: AutoTokenizer
20
+ trust_remote_code: true
21
+
22
+ # LoRA configuration
23
+ adapter: lora
24
+ lora_r: 16
25
+ lora_alpha: 32
26
+ lora_dropout: 0.05
27
+ lora_target_modules:
28
+ - q_proj
29
+ - k_proj
30
+ - v_proj
31
+ - o_proj
32
+ - gate_proj
33
+ - up_proj
34
+ - down_proj
35
+ lora_target_linear: true
36
+
37
+ # Dataset
38
+ datasets:
39
+ - path: tool_call_training_data.jsonl
40
+ type: chat_template
41
+ chat_template: chatml
42
+ field_messages: messages
43
+
44
+ # Training parameters
45
+ num_epochs: 3
46
+ micro_batch_size: 1
47
+ gradient_accumulation_steps: 8
48
+ learning_rate: 2.0e-4
49
+ lr_scheduler: cosine
50
+ warmup_ratio: 0.1
51
+ optimizer: adamw_torch
52
+ weight_decay: 0.01
53
+ max_grad_norm: 1.0
54
+
55
+ # Sequence length
56
+ sequence_len: 4096
57
+ sample_packing: true
58
+ pad_to_sequence_len: true
59
+
60
+ # Memory optimization
61
+ bf16: true
62
+ tf32: true
63
+ gradient_checkpointing: true
64
+ flash_attention: true
65
+
66
+ # Output
67
+ output_dir: ./output/qwen35-tool-call-lora
68
+ save_strategy: epoch
69
+ save_total_limit: 2
70
+ logging_steps: 10
71
+
72
+ # Evaluation
73
+ val_set_size: 0.05
74
+ eval_steps: 50
75
+
76
+ # Wandb (optional)
77
+ # wandb_project: qwen35-tool-call-lora
78
+ # wandb_run_id:
79
+
80
+ # Special tokens
81
+ special_tokens:
82
+ pad_token: '<|endoftext|>'
@@ -0,0 +1,62 @@
1
+ {
2
+ "_profile": "claude-haiku-3.5",
3
+ "_description": "Anthropic Claude 3.5 Haiku (claude-3-5-haiku-20241022). Fastest and most cost-effective Claude model. 200K context with strong tool calling for high-throughput tasks. Best for rapid iteration, simple tool use, classification, and cost-sensitive workloads where latency matters.",
4
+
5
+ "model": "claude-3-5-haiku-20241022",
6
+ "provider": "anthropic",
7
+ "api_base_url": "https://api.anthropic.com",
8
+ "max_tokens": 8192,
9
+ "temperature": 1.0,
10
+ "top_p": 1.0,
11
+ "timeout_ms": 60000,
12
+ "context_window": 200000,
13
+
14
+ "optimize_for_tool_calls": true,
15
+ "enable_thinking": false,
16
+
17
+ "tool_calling": {
18
+ "parallel_tool_calls": true,
19
+ "tool_choice": "auto",
20
+ "max_tool_calls_per_turn": 128,
21
+ "_comment": "Haiku 3.5 supports parallel tool use with very low latency. Great for high-volume tool calling."
22
+ },
23
+
24
+ "extended_thinking": {
25
+ "enabled": false,
26
+ "_comment": "Extended thinking not available on Haiku 3.5. Use Sonnet 4.6 or Opus 4.6 for thinking mode."
27
+ },
28
+
29
+ "dynamic_temperature": {
30
+ "enabled": false,
31
+ "_comment": "Anthropic API handles temperature differently. Default temp=1.0 is recommended."
32
+ },
33
+
34
+ "tool_call_batching": {
35
+ "enabled": true,
36
+ "system_prompt_suffix": "When multiple tools are needed, call ALL of them in a single response. Do not call one tool and wait - emit all tool calls together.",
37
+ "_comment": "Haiku 3.5 supports parallel tool calls with sub-second latency."
38
+ },
39
+
40
+ "pricing": {
41
+ "input_per_1m": 0.8,
42
+ "output_per_1m": 4.0,
43
+ "cache_write_per_1m": 1.0,
44
+ "cache_read_per_1m": 0.08,
45
+ "currency": "USD",
46
+ "_comment": "As of 2025. Extremely cost-effective. ~19x cheaper than Opus 4.6 on input."
47
+ },
48
+
49
+ "rate_limits": {
50
+ "requests_per_minute": 4000,
51
+ "tokens_per_minute": 400000,
52
+ "_comment": "Highest throughput of all Claude models. Designed for high-volume use cases."
53
+ },
54
+
55
+ "running_config": {
56
+ "description": "Anthropic hosted API. Requires ANTHROPIC_API_KEY environment variable.",
57
+ "env_vars": {
58
+ "ANTHROPIC_API_KEY": "<your-api-key>",
59
+ "UAP_MODEL_PROFILE": "claude-haiku-3.5"
60
+ }
61
+ }
62
+ }
@@ -0,0 +1,63 @@
1
+ {
2
+ "_profile": "claude-opus-4.6",
3
+ "_description": "Anthropic Claude Opus 4.6 (claude-opus-4-6-20250616). Most capable Claude model with extended thinking, parallel tool use, and 200K context. Best for complex multi-step reasoning, large codebase navigation, and agentic workflows where accuracy matters more than speed.",
4
+
5
+ "model": "claude-opus-4-6-20250616",
6
+ "provider": "anthropic",
7
+ "api_base_url": "https://api.anthropic.com",
8
+ "max_tokens": 32768,
9
+ "temperature": 1.0,
10
+ "top_p": 1.0,
11
+ "timeout_ms": 300000,
12
+ "context_window": 200000,
13
+
14
+ "optimize_for_tool_calls": true,
15
+ "enable_thinking": true,
16
+
17
+ "tool_calling": {
18
+ "parallel_tool_calls": true,
19
+ "tool_choice": "auto",
20
+ "max_tool_calls_per_turn": 128,
21
+ "_comment": "Opus 4.6 has native parallel tool use. tool_choice='auto' is recommended; 'required' forces tool use every turn."
22
+ },
23
+
24
+ "extended_thinking": {
25
+ "enabled": true,
26
+ "budget_tokens": 10000,
27
+ "_comment": "Extended thinking gives Opus 4.6 a scratchpad for complex reasoning. Budget controls max thinking tokens per turn. Set to 0 to disable."
28
+ },
29
+
30
+ "dynamic_temperature": {
31
+ "enabled": false,
32
+ "_comment": "Anthropic API handles temperature differently. Default temp=1.0 is recommended by Anthropic for most use cases."
33
+ },
34
+
35
+ "tool_call_batching": {
36
+ "enabled": true,
37
+ "system_prompt_suffix": "When multiple tools are needed, call ALL of them in a single response. Do not call one tool and wait - emit all tool calls together.",
38
+ "_comment": "Opus 4.6 natively supports parallel tool calls. This prompt reinforces the behavior."
39
+ },
40
+
41
+ "pricing": {
42
+ "input_per_1m": 15.0,
43
+ "output_per_1m": 75.0,
44
+ "cache_write_per_1m": 18.75,
45
+ "cache_read_per_1m": 1.5,
46
+ "currency": "USD",
47
+ "_comment": "As of 2025. Prompt caching gives 90% discount on cache hits."
48
+ },
49
+
50
+ "rate_limits": {
51
+ "requests_per_minute": 1000,
52
+ "tokens_per_minute": 80000,
53
+ "_comment": "Default tier limits. Higher tiers available via Anthropic."
54
+ },
55
+
56
+ "running_config": {
57
+ "description": "Anthropic hosted API. Requires ANTHROPIC_API_KEY environment variable.",
58
+ "env_vars": {
59
+ "ANTHROPIC_API_KEY": "<your-api-key>",
60
+ "UAP_MODEL_PROFILE": "claude-opus-4.6"
61
+ }
62
+ }
63
+ }
@@ -0,0 +1,63 @@
1
+ {
2
+ "_profile": "claude-sonnet-4.6",
3
+ "_description": "Anthropic Claude Sonnet 4.6 (claude-sonnet-4-6-20250514). Best balance of speed, cost, and capability. Excellent tool calling, 200K context, and strong coding performance. Ideal for most agentic tasks where you want fast iteration without sacrificing quality.",
4
+
5
+ "model": "claude-sonnet-4-6-20250514",
6
+ "provider": "anthropic",
7
+ "api_base_url": "https://api.anthropic.com",
8
+ "max_tokens": 16384,
9
+ "temperature": 1.0,
10
+ "top_p": 1.0,
11
+ "timeout_ms": 120000,
12
+ "context_window": 200000,
13
+
14
+ "optimize_for_tool_calls": true,
15
+ "enable_thinking": true,
16
+
17
+ "tool_calling": {
18
+ "parallel_tool_calls": true,
19
+ "tool_choice": "auto",
20
+ "max_tool_calls_per_turn": 128,
21
+ "_comment": "Sonnet 4.6 has native parallel tool use. Excellent tool call reliability out of the box."
22
+ },
23
+
24
+ "extended_thinking": {
25
+ "enabled": true,
26
+ "budget_tokens": 5000,
27
+ "_comment": "Extended thinking available on Sonnet 4.6. Lower budget than Opus since Sonnet is faster and cheaper."
28
+ },
29
+
30
+ "dynamic_temperature": {
31
+ "enabled": false,
32
+ "_comment": "Anthropic API handles temperature differently. Default temp=1.0 is recommended."
33
+ },
34
+
35
+ "tool_call_batching": {
36
+ "enabled": true,
37
+ "system_prompt_suffix": "When multiple tools are needed, call ALL of them in a single response. Do not call one tool and wait - emit all tool calls together.",
38
+ "_comment": "Sonnet 4.6 natively supports parallel tool calls."
39
+ },
40
+
41
+ "pricing": {
42
+ "input_per_1m": 3.0,
43
+ "output_per_1m": 15.0,
44
+ "cache_write_per_1m": 3.75,
45
+ "cache_read_per_1m": 0.3,
46
+ "currency": "USD",
47
+ "_comment": "As of 2025. 5x cheaper than Opus 4.6 with strong performance."
48
+ },
49
+
50
+ "rate_limits": {
51
+ "requests_per_minute": 2000,
52
+ "tokens_per_minute": 160000,
53
+ "_comment": "Default tier limits. Higher throughput than Opus."
54
+ },
55
+
56
+ "running_config": {
57
+ "description": "Anthropic hosted API. Requires ANTHROPIC_API_KEY environment variable.",
58
+ "env_vars": {
59
+ "ANTHROPIC_API_KEY": "<your-api-key>",
60
+ "UAP_MODEL_PROFILE": "claude-sonnet-4.6"
61
+ }
62
+ }
63
+ }
@@ -0,0 +1,62 @@
1
+ {
2
+ "_profile": "gemini-2.5-flash",
3
+ "_description": "Google Gemini 2.5 Flash (gemini-2.5-flash-preview-05-20). Fast and cost-effective with 1M context and thinking mode. Best for high-throughput agentic tasks where speed and cost matter. Strong tool calling with very low latency.",
4
+
5
+ "model": "gemini-2.5-flash-preview-05-20",
6
+ "provider": "google",
7
+ "api_base_url": "https://generativelanguage.googleapis.com/v1beta",
8
+ "max_tokens": 65536,
9
+ "temperature": 1.0,
10
+ "top_p": 0.95,
11
+ "timeout_ms": 120000,
12
+ "context_window": 1048576,
13
+
14
+ "optimize_for_tool_calls": true,
15
+ "enable_thinking": true,
16
+
17
+ "tool_calling": {
18
+ "parallel_tool_calls": true,
19
+ "tool_choice": "auto",
20
+ "_comment": "Gemini 2.5 Flash supports function calling with parallel tool use. Very fast response times."
21
+ },
22
+
23
+ "thinking": {
24
+ "enabled": true,
25
+ "thinking_budget": 4096,
26
+ "_comment": "Flash has thinking mode but with lower budget for speed. Increase for harder tasks."
27
+ },
28
+
29
+ "dynamic_temperature": {
30
+ "enabled": false,
31
+ "_comment": "Google recommends temperature=1.0 for most use cases."
32
+ },
33
+
34
+ "tool_call_batching": {
35
+ "enabled": true,
36
+ "system_prompt_suffix": "When multiple tools are needed, call ALL of them in a single response.",
37
+ "_comment": "Flash supports parallel function calls with very low latency."
38
+ },
39
+
40
+ "pricing": {
41
+ "input_per_1m_under_200k": 0.15,
42
+ "input_per_1m_over_200k": 0.3,
43
+ "output_per_1m": 0.6,
44
+ "thinking_per_1m": 0.7,
45
+ "currency": "USD",
46
+ "_comment": "As of 2025. Extremely cost-effective. Free tier available."
47
+ },
48
+
49
+ "rate_limits": {
50
+ "requests_per_minute": 2000,
51
+ "tokens_per_minute": 4000000,
52
+ "_comment": "Very high throughput. Designed for high-volume use cases."
53
+ },
54
+
55
+ "running_config": {
56
+ "description": "Google AI Studio or Vertex AI. Requires GOOGLE_API_KEY or GOOGLE_APPLICATION_CREDENTIALS.",
57
+ "env_vars": {
58
+ "GOOGLE_API_KEY": "<your-api-key>",
59
+ "UAP_MODEL_PROFILE": "gemini-2.5-flash"
60
+ }
61
+ }
62
+ }
@@ -0,0 +1,62 @@
1
+ {
2
+ "_profile": "gemini-2.5-pro",
3
+ "_description": "Google Gemini 2.5 Pro (gemini-2.5-pro-preview-06-05). Top-tier reasoning model with 1M token context, native tool use, and thinking mode. Excels at code generation, long-document analysis, and complex multi-step tasks. Competitive with Opus 4 at lower cost.",
4
+
5
+ "model": "gemini-2.5-pro-preview-06-05",
6
+ "provider": "google",
7
+ "api_base_url": "https://generativelanguage.googleapis.com/v1beta",
8
+ "max_tokens": 65536,
9
+ "temperature": 1.0,
10
+ "top_p": 0.95,
11
+ "timeout_ms": 300000,
12
+ "context_window": 1048576,
13
+
14
+ "optimize_for_tool_calls": true,
15
+ "enable_thinking": true,
16
+
17
+ "tool_calling": {
18
+ "parallel_tool_calls": true,
19
+ "tool_choice": "auto",
20
+ "_comment": "Gemini 2.5 Pro supports function calling with automatic tool selection. Parallel calls supported."
21
+ },
22
+
23
+ "thinking": {
24
+ "enabled": true,
25
+ "thinking_budget": 8192,
26
+ "_comment": "Gemini 2.5 Pro has a thinking mode that improves reasoning quality. Budget controls max thinking tokens."
27
+ },
28
+
29
+ "dynamic_temperature": {
30
+ "enabled": false,
31
+ "_comment": "Google recommends temperature=1.0 for most use cases with Gemini 2.5."
32
+ },
33
+
34
+ "tool_call_batching": {
35
+ "enabled": true,
36
+ "system_prompt_suffix": "When multiple tools are needed, call ALL of them in a single response.",
37
+ "_comment": "Gemini 2.5 Pro supports parallel function calls."
38
+ },
39
+
40
+ "pricing": {
41
+ "input_per_1m_under_200k": 1.25,
42
+ "input_per_1m_over_200k": 2.5,
43
+ "output_per_1m": 10.0,
44
+ "thinking_per_1m": 3.5,
45
+ "currency": "USD",
46
+ "_comment": "As of 2025. Tiered input pricing based on context length. Thinking tokens billed separately."
47
+ },
48
+
49
+ "rate_limits": {
50
+ "requests_per_minute": 1000,
51
+ "tokens_per_minute": 4000000,
52
+ "_comment": "Very high token throughput. Free tier available with lower limits."
53
+ },
54
+
55
+ "running_config": {
56
+ "description": "Google AI Studio or Vertex AI. Requires GOOGLE_API_KEY or GOOGLE_APPLICATION_CREDENTIALS.",
57
+ "env_vars": {
58
+ "GOOGLE_API_KEY": "<your-api-key>",
59
+ "UAP_MODEL_PROFILE": "gemini-2.5-pro"
60
+ }
61
+ }
62
+ }
@@ -0,0 +1,54 @@
1
+ {
2
+ "_profile": "generic",
3
+ "_description": "Generic defaults for any OpenAI-compatible model. Works out of the box with llama.cpp, vLLM, Ollama, OpenAI, or any server that speaks the /v1/chat/completions API. Conservative settings that prioritize reliability over speed.",
4
+
5
+ "model": "default",
6
+ "max_tokens": 4096,
7
+ "temperature": 0.6,
8
+ "top_p": 0.9,
9
+ "timeout_ms": 120000,
10
+ "context_window": 32768,
11
+
12
+ "optimize_for_tool_calls": true,
13
+ "enable_thinking": false,
14
+
15
+ "dynamic_temperature": {
16
+ "enabled": true,
17
+ "decay": 0.5,
18
+ "floor": 0.2,
19
+ "_comment": "On retry, temp = max(floor, base_temp * decay^attempt). Reduces randomness for structured output."
20
+ },
21
+
22
+ "tool_call_batching": {
23
+ "enabled": true,
24
+ "system_prompt_suffix": "When multiple tools are needed, call ALL of them in a single response. Do not call one tool and wait - emit all tool calls together.",
25
+ "_comment": "Reduces round-trips by encouraging multi-tool emission in one turn."
26
+ },
27
+
28
+ "server_optimization": {
29
+ "flash_attention": false,
30
+ "kv_cache_type_k": "f16",
31
+ "kv_cache_type_v": "f16",
32
+ "prompt_cache": {
33
+ "enabled": false
34
+ },
35
+ "speculative_decoding": {
36
+ "enabled": false,
37
+ "_comment": "Enable and configure a draft model for 2-3x speedup. Requires a small model from the same family."
38
+ },
39
+ "_comment": "Generic defaults with no hardware-specific optimizations. Override per your GPU/CPU setup."
40
+ },
41
+
42
+ "running_config": {
43
+ "description": "Generic OpenAI-compatible server. Point TARGET_URL to your inference server.",
44
+ "llm_server": {
45
+ "description": "Any OpenAI-compatible server on port 8080",
46
+ "command": "llama-server --model /path/to/model.gguf --port 8080 --host 0.0.0.0 --ctx-size 32768",
47
+ "vram_estimate": "Depends on model size and quantization"
48
+ },
49
+ "env_vars": {
50
+ "TARGET_URL": "http://127.0.0.1:8080",
51
+ "UAP_MODEL_PROFILE": "generic"
52
+ }
53
+ }
54
+ }
@@ -0,0 +1,61 @@
1
+ {
2
+ "_profile": "glm-5",
3
+ "_description": "Zhipu GLM-5 (glm-5-plus). Chinese-English bilingual reasoning model with deep thinking, tool calling, and 128K context. Strong at code generation, math, and structured output. Competitive with frontier models at significantly lower cost.",
4
+
5
+ "model": "glm-5-plus",
6
+ "provider": "zhipu",
7
+ "api_base_url": "https://open.bigmodel.cn/api/paas/v4",
8
+ "max_tokens": 16384,
9
+ "temperature": 0.7,
10
+ "top_p": 0.9,
11
+ "timeout_ms": 180000,
12
+ "context_window": 128000,
13
+
14
+ "optimize_for_tool_calls": true,
15
+ "enable_thinking": true,
16
+
17
+ "tool_calling": {
18
+ "parallel_tool_calls": true,
19
+ "tool_choice": "auto",
20
+ "_comment": "GLM-5 supports OpenAI-compatible function calling with parallel tool use."
21
+ },
22
+
23
+ "thinking": {
24
+ "enabled": true,
25
+ "_comment": "GLM-5 has deep thinking mode for complex reasoning tasks. Enabled by default."
26
+ },
27
+
28
+ "dynamic_temperature": {
29
+ "enabled": true,
30
+ "decay": 0.5,
31
+ "floor": 0.2,
32
+ "_comment": "Lower temperature on retries for more deterministic tool call output."
33
+ },
34
+
35
+ "tool_call_batching": {
36
+ "enabled": true,
37
+ "system_prompt_suffix": "When multiple tools are needed, call ALL of them in a single response. Do not call one tool and wait - emit all tool calls together.",
38
+ "_comment": "GLM-5 supports parallel function calls."
39
+ },
40
+
41
+ "pricing": {
42
+ "input_per_1m_cny": 5.0,
43
+ "output_per_1m_cny": 5.0,
44
+ "currency": "CNY",
45
+ "_comment": "As of 2025. Priced in CNY. Very competitive pricing for frontier-level performance."
46
+ },
47
+
48
+ "rate_limits": {
49
+ "requests_per_minute": 300,
50
+ "tokens_per_minute": 300000,
51
+ "_comment": "Default tier limits. Higher tiers available via Zhipu."
52
+ },
53
+
54
+ "running_config": {
55
+ "description": "Zhipu AI hosted API. Requires ZHIPU_API_KEY environment variable. OpenAI-compatible endpoint.",
56
+ "env_vars": {
57
+ "ZHIPU_API_KEY": "<your-api-key>",
58
+ "UAP_MODEL_PROFILE": "glm-5"
59
+ }
60
+ }
61
+ }
@@ -0,0 +1,64 @@
1
+ {
2
+ "_profile": "gpt-4.1",
3
+ "_description": "OpenAI GPT-4.1 (gpt-4.1-2025-04-14). Purpose-built for coding and instruction following. 1M token context window, superior at long-document understanding, and optimized for agentic tool use. Best OpenAI model for software engineering tasks.",
4
+
5
+ "model": "gpt-4.1-2025-04-14",
6
+ "provider": "openai",
7
+ "api_base_url": "https://api.openai.com/v1",
8
+ "max_tokens": 32768,
9
+ "temperature": 0.7,
10
+ "top_p": 1.0,
11
+ "timeout_ms": 180000,
12
+ "context_window": 1047576,
13
+
14
+ "optimize_for_tool_calls": true,
15
+ "enable_thinking": false,
16
+
17
+ "tool_calling": {
18
+ "parallel_tool_calls": true,
19
+ "tool_choice": "auto",
20
+ "strict_mode": true,
21
+ "_comment": "GPT-4.1 excels at agentic tool use with very high reliability. Parallel function calling supported."
22
+ },
23
+
24
+ "structured_output": {
25
+ "json_mode": true,
26
+ "strict_json_schema": true,
27
+ "_comment": "Supports structured outputs with guaranteed schema adherence."
28
+ },
29
+
30
+ "dynamic_temperature": {
31
+ "enabled": true,
32
+ "decay": 0.5,
33
+ "floor": 0.2,
34
+ "_comment": "Lower temperature on retries for more deterministic output."
35
+ },
36
+
37
+ "tool_call_batching": {
38
+ "enabled": true,
39
+ "system_prompt_suffix": "When multiple tools are needed, call ALL of them in a single response using parallel function calling.",
40
+ "_comment": "GPT-4.1 natively supports parallel function calls with high reliability."
41
+ },
42
+
43
+ "pricing": {
44
+ "input_per_1m": 2.0,
45
+ "output_per_1m": 8.0,
46
+ "cached_input_per_1m": 0.5,
47
+ "currency": "USD",
48
+ "_comment": "As of 2025. Cheaper than GPT-4o with better coding performance."
49
+ },
50
+
51
+ "rate_limits": {
52
+ "requests_per_minute": 5000,
53
+ "tokens_per_minute": 800000,
54
+ "_comment": "Tier 3+ limits."
55
+ },
56
+
57
+ "running_config": {
58
+ "description": "OpenAI hosted API. Requires OPENAI_API_KEY environment variable.",
59
+ "env_vars": {
60
+ "OPENAI_API_KEY": "<your-api-key>",
61
+ "UAP_MODEL_PROFILE": "gpt-4.1"
62
+ }
63
+ }
64
+ }