@agentic-patterns/runtime 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -1148,16 +1148,21 @@ declare const openrouterProvider: ProviderProtocol;
1148
1148
  /**
1149
1149
  * Ollama — local-only OSS models via HTTP.
1150
1150
  *
1151
- * Default tier map uses the Qwen3 family because Qwen's team explicitly
1152
- * prioritizes tool-calling and keeps the same grammar across sizes — so
1153
- * agents scale between haiku↔sonnet↔opus without prompt changes.
1151
+ * Default tier map uses the qwen3.5/3.6 families because Qwen's team
1152
+ * explicitly prioritizes tool-calling and keeps the same grammar across
1153
+ * sizes — so agents scale between haiku↔sonnet↔opus without prompt
1154
+ * changes. Three sizes selected from a single bench pass on a 4080 Super-
1155
+ * class box:
1154
1156
  *
1155
- * Sized for 16GB-class consumer GPUs (tested on 4080 Super):
1156
- * opus (30B MoE, activates 3B/token) ~14 GB VRAM, 50–80 tok/s
1157
- * sonnet (14B dense) ~9 GB VRAM, 30–50 tok/s
1158
- * haiku (4B dense) — ~3 GB VRAM, 100+ tok/s
1157
+ * opus (35B MoE, activates 3B/token) 15.1 GB VRAM + 10.5 GB RAM spill, ~15 tok/s
1158
+ * sonnet (9B dense) 8.2 GB VRAM, 0 spill, ~98 tok/s
1159
+ * haiku (4B dense) 3.4 GB VRAM, 0 spill, ~145 tok/s
1159
1160
  *
1160
- * Override with `options.modelId` if you want a different family.
1161
+ * Earlier qwen3:14b is the same speed class as 3.5:9b and worse at most
1162
+ * tool-calling tasks; 3.5:9b is preferred for the sonnet slot.
1163
+ *
1164
+ * Override with `options.modelId` (or `AGENT_MODEL` env) for any other
1165
+ * family — the tier map is just the default when nothing is pinned.
1161
1166
  */
1162
1167
  declare const ollamaProvider: ProviderProtocol;
1163
1168
 
package/dist/index.d.ts CHANGED
@@ -1148,16 +1148,21 @@ declare const openrouterProvider: ProviderProtocol;
1148
1148
  /**
1149
1149
  * Ollama — local-only OSS models via HTTP.
1150
1150
  *
1151
- * Default tier map uses the Qwen3 family because Qwen's team explicitly
1152
- * prioritizes tool-calling and keeps the same grammar across sizes — so
1153
- * agents scale between haiku↔sonnet↔opus without prompt changes.
1151
+ * Default tier map uses the qwen3.5/3.6 families because Qwen's team
1152
+ * explicitly prioritizes tool-calling and keeps the same grammar across
1153
+ * sizes — so agents scale between haiku↔sonnet↔opus without prompt
1154
+ * changes. Three sizes selected from a single bench pass on a 4080 Super-
1155
+ * class box:
1154
1156
  *
1155
- * Sized for 16GB-class consumer GPUs (tested on 4080 Super):
1156
- * opus (30B MoE, activates 3B/token) ~14 GB VRAM, 50–80 tok/s
1157
- * sonnet (14B dense) ~9 GB VRAM, 30–50 tok/s
1158
- * haiku (4B dense) — ~3 GB VRAM, 100+ tok/s
1157
+ * opus (35B MoE, activates 3B/token) 15.1 GB VRAM + 10.5 GB RAM spill, ~15 tok/s
1158
+ * sonnet (9B dense) 8.2 GB VRAM, 0 spill, ~98 tok/s
1159
+ * haiku (4B dense) 3.4 GB VRAM, 0 spill, ~145 tok/s
1159
1160
  *
1160
- * Override with `options.modelId` if you want a different family.
1161
+ * Earlier qwen3:14b is the same speed class as 3.5:9b and worse at most
1162
+ * tool-calling tasks; 3.5:9b is preferred for the sonnet slot.
1163
+ *
1164
+ * Override with `options.modelId` (or `AGENT_MODEL` env) for any other
1165
+ * family — the tier map is just the default when nothing is pinned.
1161
1166
  */
1162
1167
  declare const ollamaProvider: ProviderProtocol;
1163
1168
 
package/dist/index.js CHANGED
@@ -2506,9 +2506,9 @@ var openrouterProvider = {
2506
2506
  var ollamaProvider = {
2507
2507
  name: "ollama",
2508
2508
  tiers: {
2509
- opus: "qwen3:30b-a3b",
2510
- sonnet: "qwen3:14b",
2511
- haiku: "qwen3:4b"
2509
+ opus: "qwen3.6:35b-a3b",
2510
+ sonnet: "qwen3.5:9b",
2511
+ haiku: "qwen3.5:4b"
2512
2512
  },
2513
2513
  envVars: ["OLLAMA_HOST"],
2514
2514
  async load(modelId) {