@avasis-ai/synthcode 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -99,6 +99,7 @@ __export(src_exports, {
99
99
  AnthropicProvider: () => AnthropicProvider,
100
100
  BaseProvider: () => BaseProvider,
101
101
  BashTool: () => BashTool,
102
+ ClusterProvider: () => ClusterProvider,
102
103
  ContextManager: () => ContextManager,
103
104
  CostTracker: () => CostTracker,
104
105
  DEFAULT_COMPACT_THRESHOLD: () => DEFAULT_COMPACT_THRESHOLD,
@@ -124,6 +125,7 @@ __export(src_exports, {
124
125
  WebFetchTool: () => WebFetchTool,
125
126
  agentLoop: () => agentLoop,
126
127
  anthropic: () => anthropic,
128
+ cluster: () => cluster,
127
129
  createProvider: () => createProvider,
128
130
  createStreamAggregator: () => createStreamAggregator,
129
131
  defineTool: () => defineTool,
@@ -2400,6 +2402,338 @@ var OllamaProvider = class {
2400
2402
  }
2401
2403
  };
2402
2404
 
2405
+ // src/llm/cluster.ts
2406
+ var DEFAULT_BASE = "http://localhost:11434/v1";
2407
+ function defaultCluster(baseURL) {
2408
+ const url = baseURL || DEFAULT_BASE;
2409
+ return [
2410
+ { model: "gemma4:31b", role: "planner", baseURL: url, temperature: 0.3 },
2411
+ { model: "gemma4:26b", role: "worker", baseURL: url, temperature: 0.3 },
2412
+ { model: "gemma4:26b", role: "worker", baseURL: url, temperature: 0.35 },
2413
+ { model: "gemma4:26b", role: "reviewer", baseURL: url, temperature: 0.3 },
2414
+ { model: "gemma4:e4b", role: "drafter", baseURL: url, temperature: 0.3 }
2415
+ ];
2416
+ }
2417
+ function classifyComplexity(messages, hasTools) {
2418
+ let totalTokens = 0;
2419
+ let turns = messages.length;
2420
+ let hasToolResults = false;
2421
+ let hasCode = false;
2422
+ let hasErrors = false;
2423
+ for (const m of messages) {
2424
+ const text = typeof m.content === "string" ? m.content : JSON.stringify(m.content);
2425
+ totalTokens += text.length / 4;
2426
+ if (m.role === "tool") hasToolResults = true;
2427
+ if (text.includes("```") || text.includes("function ") || text.includes("class ")) hasCode = true;
2428
+ if (text.includes("error") || text.includes("Error") || text.includes("FAIL")) hasErrors = true;
2429
+ }
2430
+ if (totalTokens > 1e4 || hasToolResults && hasErrors) return "complex";
2431
+ if (totalTokens > 2e3 || hasTools || hasCode || hasErrors || turns > 4) return "medium";
2432
+ return "simple";
2433
+ }
2434
+ function selectSlots(config, complexity) {
2435
+ const planners = config.slots.filter((s) => s.role === "planner");
2436
+ const workers = config.slots.filter((s) => s.role === "worker");
2437
+ const reviewers = config.slots.filter((s) => s.role === "reviewer");
2438
+ const drafters = config.slots.filter((s) => s.role === "drafter");
2439
+ switch (complexity) {
2440
+ case "complex":
2441
+ return { planner: planners[0], workers, reviewers, drafters: drafters[0] };
2442
+ case "medium":
2443
+ return { planner: planners[0], workers: workers.slice(0, 1), reviewers: reviewers.slice(0, 1), drafters: drafters[0] };
2444
+ case "simple":
2445
+ default:
2446
+ return { planner: planners[0], workers: workers.slice(0, 1), reviewers: reviewers.slice(0, 1), drafters: drafters[0] };
2447
+ }
2448
+ }
2449
+ async function callModel(slot, request, baseURL, timeoutMs, extraOpts) {
2450
+ const url = (slot.baseURL || baseURL) + "/chat/completions";
2451
+ const messages = [];
2452
+ if (request.systemPrompt) {
2453
+ messages.push({ role: "system", content: request.systemPrompt });
2454
+ }
2455
+ for (const m of request.messages) {
2456
+ if (m.role === "tool") {
2457
+ messages.push({ role: "tool", tool_call_id: m.tool_use_id, content: m.content });
2458
+ continue;
2459
+ }
2460
+ if (m.role === "assistant" && Array.isArray(m.content)) {
2461
+ const textParts = m.content.filter((b) => b.type === "text");
2462
+ const toolParts = m.content.filter((b) => b.type === "tool_use");
2463
+ const msg = {};
2464
+ if (textParts.length) msg.content = textParts.map((p) => p.text).join("");
2465
+ if (toolParts.length) {
2466
+ msg.tool_calls = toolParts.map((b) => {
2467
+ const tb = b;
2468
+ return { id: tb.id, type: "function", function: { name: tb.name, arguments: JSON.stringify(tb.input) } };
2469
+ });
2470
+ }
2471
+ msg.role = "assistant";
2472
+ messages.push(msg);
2473
+ continue;
2474
+ }
2475
+ messages.push({ role: m.role, content: m.content });
2476
+ }
2477
+ const body = {
2478
+ model: slot.model,
2479
+ messages,
2480
+ stream: false,
2481
+ options: {
2482
+ num_predict: request.maxOutputTokens || slot.maxTokens || 4096,
2483
+ temperature: request.temperature ?? slot.temperature ?? 0.3,
2484
+ top_p: 0.95,
2485
+ top_k: 64
2486
+ },
2487
+ ...extraOpts
2488
+ };
2489
+ if (request.tools?.length) {
2490
+ body.tools = request.tools.map((t) => ({
2491
+ type: "function",
2492
+ function: { name: t.name, description: t.description, parameters: t.input_schema }
2493
+ }));
2494
+ }
2495
+ const start = Date.now();
2496
+ let response;
2497
+ try {
2498
+ response = await fetch(url, {
2499
+ method: "POST",
2500
+ headers: { "Content-Type": "application/json" },
2501
+ body: JSON.stringify(body),
2502
+ signal: AbortSignal.timeout(timeoutMs)
2503
+ });
2504
+ } catch (err) {
2505
+ if (err instanceof RetryableError) throw err;
2506
+ throw new RetryableError(`Cluster slot ${slot.model} connection failed: ${err instanceof Error ? err.message : String(err)}`);
2507
+ }
2508
+ if (!response.ok) {
2509
+ const text2 = await response.text();
2510
+ if (response.status === 429 || response.status === 503 || response.status === 529) {
2511
+ throw new RetryableError(`Cluster slot ${slot.model} rate limited: ${response.status}`);
2512
+ }
2513
+ throw new Error(`Cluster slot ${slot.model} error ${response.status}: ${text2.slice(0, 200)}`);
2514
+ }
2515
+ const data = await response.json();
2516
+ const choice = data.choices?.[0];
2517
+ if (!choice) throw new Error(`Cluster slot ${slot.model} returned no choices`);
2518
+ let text = choice.message?.content || "";
2519
+ text = text.replace(/<think[^>]*>[\s\S]*?<\/think>/gi, "").trim();
2520
+ text = text.replace(/<thinking>[\s\S]*?<\/thinking>/gi, "").trim();
2521
+ text = text.replace(/<channel>thought[\s\S]*?<channel|>/gi, "").trim();
2522
+ text = text.replace(/\[Thinking[^\]]*\]/gi, "").trim();
2523
+ return {
2524
+ content: text,
2525
+ usage: {
2526
+ inputTokens: data.usage?.prompt_tokens ?? 0,
2527
+ outputTokens: data.usage?.completion_tokens ?? 0,
2528
+ cacheReadTokens: 0,
2529
+ cacheWriteTokens: 0
2530
+ },
2531
+ stopReason: choice.finish_reason === "tool_calls" ? "tool_use" : "end_turn",
2532
+ model: slot.model,
2533
+ durationMs: Date.now() - start
2534
+ };
2535
+ }
2536
+ function mergeUsage(...usages) {
2537
+ return {
2538
+ inputTokens: usages.reduce((s, u) => s + u.inputTokens, 0),
2539
+ outputTokens: usages.reduce((s, u) => s + u.outputTokens, 0),
2540
+ cacheReadTokens: usages.reduce((s, u) => s + (u.cacheReadTokens || 0), 0),
2541
+ cacheWriteTokens: usages.reduce((s, u) => s + (u.cacheWriteTokens || 0), 0)
2542
+ };
2543
+ }
2544
+ var ClusterProvider = class {
2545
+ model;
2546
+ config;
2547
+ stats;
2548
+ constructor(config) {
2549
+ this.config = {
2550
+ baseURL: DEFAULT_BASE,
2551
+ timeoutMs: 18e4,
2552
+ strategy: "auto",
2553
+ draftCount: 1,
2554
+ debateRounds: 1,
2555
+ slots: defaultCluster(config?.baseURL),
2556
+ ...config
2557
+ };
2558
+ const models = [...new Set(this.config.slots.map((s) => s.model))];
2559
+ this.model = `cluster[${models.join(",")}]`;
2560
+ this.stats = { calls: 0, tokensIn: 0, tokensOut: 0, byModel: {} };
2561
+ for (const m of models) this.stats.byModel[m] = 0;
2562
+ }
2563
+ getStats() {
2564
+ return { ...this.stats };
2565
+ }
2566
+ async chat(request) {
2567
+ const strategy = this.config.strategy || "auto";
2568
+ const complexity = strategy === "auto" ? classifyComplexity(request.messages, !!request.tools?.length) : "medium";
2569
+ let result;
2570
+ switch (strategy === "auto" ? complexity : "simple") {
2571
+ case "simple":
2572
+ result = await this.speculative(request);
2573
+ break;
2574
+ case "complex":
2575
+ result = await this.debate(request);
2576
+ break;
2577
+ default:
2578
+ result = await this.draftVerify(request);
2579
+ break;
2580
+ }
2581
+ const content = [];
2582
+ if (result.content) {
2583
+ content.push({ type: "text", text: result.content });
2584
+ }
2585
+ return {
2586
+ content,
2587
+ stopReason: result.stopReason,
2588
+ usage: result.usage
2589
+ };
2590
+ }
2591
+ async speculative(request) {
2592
+ const slots = selectSlots(this.config, "simple");
2593
+ const drafter = slots.drafters;
2594
+ const worker = slots.workers[0];
2595
+ if (!drafter || !worker) {
2596
+ return this.fallback(request);
2597
+ }
2598
+ const draft = await this.safeCall(drafter, request, { num_predict: request.maxOutputTokens || 4096 });
2599
+ if (!draft || draft.content.length < 20) {
2600
+ const fb = await this.safeCall(worker, request);
2601
+ return fb || this.fallback(request);
2602
+ }
2603
+ const verifyMessages = [
2604
+ ...request.messages.slice(0, -1),
2605
+ { role: "user", content: `${typeof request.messages[request.messages.length - 1].content === "string" ? request.messages[request.messages.length - 1].content : ""}
2606
+
2607
+ Here is a draft response. Review it. If it is correct and complete, return exactly the same text. If it has errors, fix them. Return ONLY the final corrected text, nothing else.
2608
+
2609
+ DRAFT:
2610
+ ${draft.content.slice(0, 8e3)}` }
2611
+ ];
2612
+ const verifyRequest = { ...request, messages: verifyMessages };
2613
+ const verified = await this.safeCall(worker, verifyRequest);
2614
+ if (!verified) return { ...draft, model: this.model };
2615
+ const similarity2 = jaccardSimilarity(draft.content, verified.content);
2616
+ if (similarity2 > 0.7) {
2617
+ return { ...draft, usage: mergeUsage(draft.usage) };
2618
+ }
2619
+ return { ...verified, usage: mergeUsage(draft.usage, verified.usage) };
2620
+ }
2621
+ async draftVerify(request) {
2622
+ const slots = selectSlots(this.config, "medium");
2623
+ const worker = slots.workers[0];
2624
+ const reviewer = slots.reviewers[0];
2625
+ if (!worker) return this.fallback(request);
2626
+ const primary = await this.safeCall(worker, request);
2627
+ if (!primary) return this.fallback(request);
2628
+ if (!reviewer || primary.content.length < 50) {
2629
+ return primary;
2630
+ }
2631
+ const reviewMessages = [
2632
+ { role: "user", content: `Review this response for correctness. Fix any errors. Return ONLY the corrected text.
2633
+
2634
+ ORIGINAL REQUEST:
2635
+ ${typeof request.messages[request.messages.length - 1].content === "string" ? request.messages[request.messages.length - 1].content : ""}
2636
+
2637
+ RESPONSE TO REVIEW:
2638
+ ${primary.content.slice(0, 6e3)}` }
2639
+ ];
2640
+ const reviewed = await this.safeCall(reviewer, { ...request, messages: reviewMessages, maxOutputTokens: request.maxOutputTokens });
2641
+ if (!reviewed) return primary;
2642
+ return { ...reviewed, usage: mergeUsage(primary.usage, reviewed.usage) };
2643
+ }
2644
+ async debate(request) {
2645
+ const slots = selectSlots(this.config, "complex");
2646
+ const workers = slots.workers;
2647
+ const planner = slots.planner;
2648
+ if (!workers.length) return this.fallback(request);
2649
+ const roundRounds = this.config.debateRounds || 1;
2650
+ let currentContent = "";
2651
+ for (let round = 0; round < roundRounds; round++) {
2652
+ const workerSlot = workers[round % workers.length];
2653
+ const implMessages = round === 0 ? request.messages : [
2654
+ ...request.messages.slice(0, -1),
2655
+ {
2656
+ role: "user",
2657
+ content: `${typeof request.messages[request.messages.length - 1].content === "string" ? request.messages[request.messages.length - 1].content : ""}
2658
+
2659
+ PREVIOUS ATTEMPT (Round ${round}):
2660
+ ${currentContent.slice(0, 4e3)}
2661
+
2662
+ Improve this. Fix any issues.`
2663
+ }
2664
+ ];
2665
+ const impl = await this.safeCall(workerSlot, { ...request, messages: implMessages });
2666
+ if (!impl) continue;
2667
+ currentContent = impl.content;
2668
+ }
2669
+ if (!currentContent) return this.fallback(request);
2670
+ if (planner && workers.length > 1) {
2671
+ const candidates = [];
2672
+ for (const w of workers) {
2673
+ const c = await this.safeCall(w, request);
2674
+ if (c && c.content.length > 30) candidates.push(c);
2675
+ }
2676
+ if (candidates.length > 1) {
2677
+ const best = candidates.sort((a, b) => b.content.length - a.content.length)[0];
2678
+ const arbMessages = [
2679
+ {
2680
+ role: "user",
2681
+ content: `Select the best response or synthesize a better one. Return ONLY the final text.
2682
+
2683
+ TASK:
2684
+ ${typeof request.messages[request.messages.length - 1].content === "string" ? request.messages[request.messages.length - 1].content : ""}
2685
+
2686
+ CANDIDATES:
2687
+ ${candidates.map((c, i) => `--- Candidate ${i + 1} (${c.model}) ---
2688
+ ${c.content.slice(0, 3e3)}`).join("\n\n")}`
2689
+ }
2690
+ ];
2691
+ const arbitrated = await this.safeCall(planner, { ...request, messages: arbMessages, maxOutputTokens: request.maxOutputTokens });
2692
+ if (arbitrated && arbitrated.content.length > 20) {
2693
+ const allUsages = candidates.map((c) => c.usage).concat(arbitrated.usage);
2694
+ return { ...arbitrated, usage: mergeUsage(...allUsages) };
2695
+ }
2696
+ }
2697
+ }
2698
+ return {
2699
+ content: currentContent,
2700
+ usage: { inputTokens: 0, outputTokens: 0 },
2701
+ stopReason: "end_turn",
2702
+ model: this.model,
2703
+ durationMs: 0
2704
+ };
2705
+ }
2706
+ async safeCall(slot, request, extraOpts) {
2707
+ try {
2708
+ const result = await callModel(slot, request, this.config.baseURL || DEFAULT_BASE, this.config.timeoutMs || 18e4, extraOpts);
2709
+ this.stats.calls++;
2710
+ this.stats.tokensIn += result.usage.inputTokens;
2711
+ this.stats.tokensOut += result.usage.outputTokens;
2712
+ this.stats.byModel[slot.model] = (this.stats.byModel[slot.model] || 0) + 1;
2713
+ return result;
2714
+ } catch (e) {
2715
+ return null;
2716
+ }
2717
+ }
2718
+ async fallback(request) {
2719
+ const anySlot = this.config.slots[0];
2720
+ const result = await callModel(anySlot, request, this.config.baseURL || DEFAULT_BASE, this.config.timeoutMs || 18e4);
2721
+ this.stats.calls++;
2722
+ this.stats.tokensIn += result.usage.inputTokens;
2723
+ this.stats.tokensOut += result.usage.outputTokens;
2724
+ this.stats.byModel[anySlot.model] = (this.stats.byModel[anySlot.model] || 0) + 1;
2725
+ return result;
2726
+ }
2727
+ };
2728
+ function jaccardSimilarity(a, b) {
2729
+ const aWords = new Set(a.toLowerCase().split(/\s+/).filter((w) => w.length > 2));
2730
+ const bWords = new Set(b.toLowerCase().split(/\s+/).filter((w) => w.length > 2));
2731
+ if (aWords.size === 0 && bWords.size === 0) return 1;
2732
+ const intersection = [...aWords].filter((w) => bWords.has(w)).length;
2733
+ const union = (/* @__PURE__ */ new Set([...aWords, ...bWords])).size;
2734
+ return union > 0 ? intersection / union : 0;
2735
+ }
2736
+
2403
2737
  // src/llm/index.ts
2404
2738
  function anthropic(config) {
2405
2739
  return new AnthropicProvider(config);
@@ -2410,6 +2744,9 @@ function openai(config) {
2410
2744
  function ollama(config) {
2411
2745
  return new OllamaProvider(config);
2412
2746
  }
2747
+ function cluster(config) {
2748
+ return new ClusterProvider(config);
2749
+ }
2413
2750
  var CustomProvider = class {
2414
2751
  model;
2415
2752
  chatFn;
@@ -2868,6 +3205,7 @@ describe("Agent", () => { it("should have tools registered", () => { expect(true
2868
3205
  AnthropicProvider,
2869
3206
  BaseProvider,
2870
3207
  BashTool,
3208
+ ClusterProvider,
2871
3209
  ContextManager,
2872
3210
  CostTracker,
2873
3211
  DEFAULT_COMPACT_THRESHOLD,
@@ -2893,6 +3231,7 @@ describe("Agent", () => { it("should have tools registered", () => { expect(true
2893
3231
  WebFetchTool,
2894
3232
  agentLoop,
2895
3233
  anthropic,
3234
+ cluster,
2896
3235
  createProvider,
2897
3236
  createStreamAggregator,
2898
3237
  defineTool,