pi-free 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -370,24 +370,46 @@ function findBestVariantByPrefix(
370
370
  }
371
371
 
372
372
  // =============================================================================
373
- // Main lookup
373
+ // Variant alias mappings
374
374
  // =============================================================================
375
375
 
376
- export function findHardcodedBenchmark(
377
- modelName: string,
378
- modelId: string,
379
- provider?: string,
380
- ): HardcodedBenchmark | null {
381
- const search = `${modelName} ${modelId}`.toLowerCase();
376
+ const MODEL_VARIANTS: Record<string, string[]> = {
377
+ "gpt-4o-aug-24": ["gpt-4o", "gpt-4-o"],
378
+ "gpt-4": ["gpt-4", "gpt4"],
379
+ "claude-3.5-sonnet-oct-24": [
380
+ "claude-3.5-sonnet",
381
+ "claude-3-5-sonnet",
382
+ "sonnet-3.5",
383
+ ],
384
+ "claude-3-opus": ["claude-3-opus", "opus-3"],
385
+ "llama-3.1-instruct-405b": ["llama-3.1-405b", "llama3.1-405b", "llama-405b"],
386
+ "llama-3.1-instruct-70b": ["llama-3.1-70b", "llama3.1-70b", "llama-70b"],
387
+ "gemini-1.5-pro": ["gemini-1.5-pro", "gemini1.5-pro", "gemini-pro-1.5"],
388
+ "qwen2.5-instruct-72b": ["qwen2.5-72b", "qwen-2.5-72b"],
389
+ "deepseek-v3.2-non-reasoning": ["deepseek-v3", "deepseekv3", "deepseek-chat"],
390
+ "mimo-v2-pro": ["mimo-v2-pro", "mimo-v2-pro-free", "mimo-pro"],
391
+ "mimo-v2-omni": ["mimo-v2-omni", "mimo-v2-omni-free", "mimo-omni"],
392
+ "mimo-v2-flash": ["mimo-v2-flash", "mimo-v2-flash-free", "mimo-flash"],
393
+ "big-pickle": ["big-pickle", "bigpickle"],
394
+ "minimax-m2.5": ["minimax-m2.5", "minimax-m2.5-free", "minimax-m25"],
395
+ "nvidia-nemotron-3-super-120b-a12b-reasoning": [
396
+ "nemotron-3-super",
397
+ "nemotron-3-super-free",
398
+ "nemotron-super",
399
+ "nemotron-3",
400
+ ],
401
+ };
382
402
 
383
- logDebug({
384
- provider,
385
- modelId,
386
- modelName,
387
- action: "attempt",
388
- });
403
+ // =============================================================================
404
+ // Strategy steps
405
+ // =============================================================================
389
406
 
390
- // 1. Direct lookup — check if any benchmark key is a substring of the search
407
+ function tryDirectSubstringMatch(
408
+ search: string,
409
+ provider: string | undefined,
410
+ modelId: string,
411
+ modelName: string,
412
+ ): HardcodedBenchmark | null {
391
413
  for (const [key, data] of Object.entries(HARDCODED_BENCHMARKS) as [
392
414
  string,
393
415
  HardcodedBenchmark,
@@ -405,44 +427,16 @@ export function findHardcodedBenchmark(
405
427
  return data;
406
428
  }
407
429
  }
430
+ return null;
431
+ }
408
432
 
409
- // 2. Variant matching — aliases for models with different naming conventions
410
- const variants: Record<string, string[]> = {
411
- "gpt-4o-aug-24": ["gpt-4o", "gpt-4-o"],
412
- "gpt-4": ["gpt-4", "gpt4"],
413
- "claude-3.5-sonnet-oct-24": [
414
- "claude-3.5-sonnet",
415
- "claude-3-5-sonnet",
416
- "sonnet-3.5",
417
- ],
418
- "claude-3-opus": ["claude-3-opus", "opus-3"],
419
- "llama-3.1-instruct-405b": [
420
- "llama-3.1-405b",
421
- "llama3.1-405b",
422
- "llama-405b",
423
- ],
424
- "llama-3.1-instruct-70b": ["llama-3.1-70b", "llama3.1-70b", "llama-70b"],
425
- "gemini-1.5-pro": ["gemini-1.5-pro", "gemini1.5-pro", "gemini-pro-1.5"],
426
- "qwen2.5-instruct-72b": ["qwen2.5-72b", "qwen-2.5-72b"],
427
- "deepseek-v3.2-non-reasoning": [
428
- "deepseek-v3",
429
- "deepseekv3",
430
- "deepseek-chat",
431
- ],
432
- "mimo-v2-pro": ["mimo-v2-pro", "mimo-v2-pro-free", "mimo-pro"],
433
- "mimo-v2-omni": ["mimo-v2-omni", "mimo-v2-omni-free", "mimo-omni"],
434
- "mimo-v2-flash": ["mimo-v2-flash", "mimo-v2-flash-free", "mimo-flash"],
435
- "big-pickle": ["big-pickle", "bigpickle"],
436
- "minimax-m2.5": ["minimax-m2.5", "minimax-m2.5-free", "minimax-m25"],
437
- "nvidia-nemotron-3-super-120b-a12b-reasoning": [
438
- "nemotron-3-super",
439
- "nemotron-3-super-free",
440
- "nemotron-super",
441
- "nemotron-3",
442
- ],
443
- };
444
-
445
- for (const [canonical, names] of Object.entries(variants)) {
433
+ function tryVariantAliasMatch(
434
+ search: string,
435
+ provider: string | undefined,
436
+ modelId: string,
437
+ modelName: string,
438
+ ): HardcodedBenchmark | null {
439
+ for (const [canonical, names] of Object.entries(MODEL_VARIANTS)) {
446
440
  if (names.some((n) => search.includes(n.toLowerCase()))) {
447
441
  const data = HARDCODED_BENCHMARKS[canonical];
448
442
  if (data) {
@@ -459,66 +453,115 @@ export function findHardcodedBenchmark(
459
453
  }
460
454
  }
461
455
  }
456
+ return null;
457
+ }
462
458
 
463
- // 3. Provider-specific normalization
464
- const { normalized: providerNormalized, strategy: providerStrategy } =
465
- applyProviderNormalization(modelId, provider);
466
-
467
- if (providerNormalized !== modelId.toLowerCase()) {
468
- logDebug({
469
- provider,
470
- modelId,
471
- modelName,
472
- action: "normalized",
473
- strategy: providerStrategy,
474
- normalizedId: providerNormalized,
475
- });
459
+ function tryProviderNormalizedMatch(
460
+ modelId: string,
461
+ provider: string | undefined,
462
+ modelName: string,
463
+ ): { result: HardcodedBenchmark | null; normalized: string } {
464
+ const { normalized, strategy } = applyProviderNormalization(
465
+ modelId,
466
+ provider,
467
+ );
476
468
 
477
- // Try exact match on normalized ID
478
- for (const [key, data] of Object.entries(HARDCODED_BENCHMARKS) as [
479
- string,
480
- HardcodedBenchmark,
481
- ][]) {
482
- if (providerNormalized.includes(key.toLowerCase())) {
483
- logDebug({
484
- provider,
485
- modelId,
486
- modelName,
487
- action: "match",
488
- strategy: `provider-normalized:${providerStrategy}`,
489
- matchKey: key,
490
- codingIndex: data.codingIndex,
491
- });
492
- return data;
493
- }
494
- }
469
+ if (normalized === modelId.toLowerCase()) {
470
+ return { result: null, normalized };
495
471
  }
496
472
 
497
- // 4. Prefix fallback — extract base model ID and find best variant
498
- // Handles cases where benchmark keys have variant suffixes
499
- // (reasoning/non-reasoning, effort levels, dates) that the model ID lacks
500
- const baseId = extractBaseModelId(providerNormalized);
501
- if (baseId) {
502
- let best = findBestVariantByPrefix(baseId, provider, modelId);
503
- if (best) return best;
504
-
505
- // 4b. Try with word-order normalization
506
- // (e.g., llama-3.3-70b-instruct llama-3.3-instruct-70b)
507
- const normalizedId = normalizeSizeTokenOrder(baseId);
508
- if (normalizedId !== baseId) {
473
+ logDebug({
474
+ provider,
475
+ modelId,
476
+ modelName,
477
+ action: "normalized",
478
+ strategy,
479
+ normalizedId: normalized,
480
+ });
481
+
482
+ for (const [key, data] of Object.entries(HARDCODED_BENCHMARKS) as [
483
+ string,
484
+ HardcodedBenchmark,
485
+ ][]) {
486
+ if (normalized.includes(key.toLowerCase())) {
509
487
  logDebug({
510
488
  provider,
511
489
  modelId,
512
490
  modelName,
513
- action: "normalized",
514
- strategy: "size-token-reorder",
515
- normalizedId: normalizedId,
491
+ action: "match",
492
+ strategy: `provider-normalized:${strategy}`,
493
+ matchKey: key,
494
+ codingIndex: data.codingIndex,
516
495
  });
517
- best = findBestVariantByPrefix(normalizedId, provider, modelId);
518
- if (best) return best;
496
+ return { result: data, normalized };
519
497
  }
520
498
  }
521
499
 
500
+ return { result: null, normalized };
501
+ }
502
+
503
+ function tryPrefixFallback(
504
+ normalizedId: string,
505
+ provider: string | undefined,
506
+ modelId: string,
507
+ modelName: string,
508
+ ): HardcodedBenchmark | null {
509
+ const baseId = extractBaseModelId(normalizedId);
510
+ if (!baseId) return null;
511
+
512
+ const best = findBestVariantByPrefix(baseId, provider, modelId);
513
+ if (best) return best;
514
+
515
+ // Try with word-order normalization
516
+ // (e.g., llama-3.3-70b-instruct → llama-3.3-instruct-70b)
517
+ const reordered = normalizeSizeTokenOrder(baseId);
518
+ if (reordered === baseId) return null;
519
+
520
+ logDebug({
521
+ provider,
522
+ modelId,
523
+ modelName,
524
+ action: "normalized",
525
+ strategy: "size-token-reorder",
526
+ normalizedId: reordered,
527
+ });
528
+
529
+ return findBestVariantByPrefix(reordered, provider, modelId);
530
+ }
531
+
532
+ // =============================================================================
533
+ // Main lookup
534
+ // =============================================================================
535
+
536
+ export function findHardcodedBenchmark(
537
+ modelName: string,
538
+ modelId: string,
539
+ provider?: string,
540
+ ): HardcodedBenchmark | null {
541
+ const search = `${modelName} ${modelId}`.toLowerCase();
542
+
543
+ logDebug({ provider, modelId, modelName, action: "attempt" });
544
+
545
+ // 1. Direct substring match
546
+ const direct = tryDirectSubstringMatch(search, provider, modelId, modelName);
547
+ if (direct) return direct;
548
+
549
+ // 2. Variant alias matching
550
+ const variant = tryVariantAliasMatch(search, provider, modelId, modelName);
551
+ if (variant) return variant;
552
+
553
+ // 3. Provider-specific normalization
554
+ const { result: normalizedResult, normalized } = tryProviderNormalizedMatch(
555
+ modelId,
556
+ provider,
557
+ modelName,
558
+ );
559
+ if (normalizedResult) return normalizedResult;
560
+
561
+ // 4. Prefix fallback with base model extraction
562
+ const prefix = tryPrefixFallback(normalized, provider, modelId, modelName);
563
+ if (prefix) return prefix;
564
+
522
565
  // No match found
523
566
  logDebug({
524
567
  provider,
@@ -526,8 +569,8 @@ export function findHardcodedBenchmark(
526
569
  modelName,
527
570
  action: "miss",
528
571
  strategy: "all-strategies-failed",
529
- normalizedId: baseId || providerNormalized,
530
- details: `Final normalized: ${baseId || providerNormalized}`,
572
+ normalizedId: normalized,
573
+ details: `Final normalized: ${normalized}`,
531
574
  });
532
575
 
533
576
  return null;
@@ -569,6 +612,45 @@ export function enhanceModelNameWithCodingIndex(
569
612
  * Get statistics about model matching from the current session
570
613
  * Note: This reads the log file and computes stats
571
614
  */
615
+ interface LogStats {
616
+ totalAttempts: number;
617
+ matches: number;
618
+ misses: number;
619
+ byProvider: Record<
620
+ string,
621
+ { attempts: number; matches: number; misses: number }
622
+ >;
623
+ }
624
+
625
+ function parseLogLine(stats: LogStats, line: string): void {
626
+ if (!line.trim()) return;
627
+ const parts = line.split("|");
628
+ if (parts.length < 5) return;
629
+
630
+ const provider = parts[1] || "unknown";
631
+ const action = parts[4];
632
+
633
+ if (!stats.byProvider[provider]) {
634
+ stats.byProvider[provider] = { attempts: 0, matches: 0, misses: 0 };
635
+ }
636
+
637
+ if (action === "attempt") {
638
+ stats.totalAttempts++;
639
+ stats.byProvider[provider].attempts++;
640
+ } else if (action === "match") {
641
+ stats.matches++;
642
+ stats.byProvider[provider].matches++;
643
+ } else if (action === "miss") {
644
+ stats.misses++;
645
+ stats.byProvider[provider].misses++;
646
+ }
647
+ }
648
+
649
+ function computeMatchRate(stats: LogStats): number {
650
+ const total = stats.matches + stats.misses;
651
+ return total > 0 ? Math.round((stats.matches / total) * 100) : 0;
652
+ }
653
+
572
654
  export function getMatchingStats(): {
573
655
  totalAttempts: number;
574
656
  matches: number;
@@ -579,58 +661,27 @@ export function getMatchingStats(): {
579
661
  { attempts: number; matches: number; misses: number }
580
662
  >;
581
663
  } {
582
- const stats = {
664
+ const stats: LogStats = {
583
665
  totalAttempts: 0,
584
666
  matches: 0,
585
667
  misses: 0,
586
- matchRate: 0,
587
- byProvider: {} as Record<
588
- string,
589
- { attempts: number; matches: number; misses: number }
590
- >,
668
+ byProvider: {},
591
669
  };
592
670
 
593
671
  try {
594
672
  if (!existsSync(LOG_FILE)) {
595
- return stats;
673
+ return { ...stats, matchRate: 0 };
596
674
  }
597
675
 
598
676
  const content = readFileSync(LOG_FILE, "utf-8");
599
- const lines = content.split("\n").slice(1); // Skip header
600
-
601
- for (const line of lines) {
602
- if (!line.trim()) continue;
603
- const parts = line.split("|");
604
- if (parts.length < 5) continue;
605
-
606
- const provider = parts[1] || "unknown";
607
- const action = parts[4];
608
-
609
- if (!stats.byProvider[provider]) {
610
- stats.byProvider[provider] = { attempts: 0, matches: 0, misses: 0 };
611
- }
612
-
613
- if (action === "attempt") {
614
- stats.totalAttempts++;
615
- stats.byProvider[provider].attempts++;
616
- } else if (action === "match") {
617
- stats.matches++;
618
- stats.byProvider[provider].matches++;
619
- } else if (action === "miss") {
620
- stats.misses++;
621
- stats.byProvider[provider].misses++;
622
- }
677
+ for (const line of content.split("\n").slice(1)) {
678
+ parseLogLine(stats, line);
623
679
  }
624
-
625
- stats.matchRate =
626
- stats.totalAttempts > 0
627
- ? Math.round((stats.matches / (stats.matches + stats.misses)) * 100)
628
- : 0;
629
680
  } catch {
630
681
  // Return empty stats on error
631
682
  }
632
683
 
633
- return stats;
684
+ return { ...stats, matchRate: computeMatchRate(stats) };
634
685
  }
635
686
 
636
687
  // Need to import readFileSync for stats
@@ -138,19 +138,24 @@ function extractTaskBody(content: unknown): string {
138
138
  return "";
139
139
  }
140
140
 
141
- function shapeMessagesForCline(messages: any[]): any[] {
142
- let lastWrappedIdx = -1;
143
- let baseTranscript = "";
141
+ function findLastClineWrappedMessage(messages: any[]): {
142
+ index: number;
143
+ transcript: string;
144
+ } {
144
145
  for (let i = messages.length - 1; i >= 0; i--) {
145
146
  if (messages[i]?.role !== "user") continue;
146
147
  if (!isClineWrapped(messages[i]?.content)) continue;
147
- lastWrappedIdx = i;
148
- baseTranscript = extractTaskBody(messages[i].content);
149
- break;
148
+ return { index: i, transcript: extractTaskBody(messages[i].content) };
150
149
  }
150
+ return { index: -1, transcript: "" };
151
+ }
151
152
 
153
+ function buildTranscriptParts(
154
+ messages: any[],
155
+ startIdx: number,
156
+ baseTranscript: string,
157
+ ): string[] {
152
158
  const parts: string[] = baseTranscript ? [baseTranscript] : [];
153
- const startIdx = lastWrappedIdx >= 0 ? lastWrappedIdx + 1 : 0;
154
159
 
155
160
  for (let i = startIdx; i < messages.length; i++) {
156
161
  const msg = messages[i];
@@ -167,9 +172,10 @@ function shapeMessagesForCline(messages: any[]): any[] {
167
172
  }
168
173
  }
169
174
 
170
- const transcript = parts.join("\n\n").trim() || "(no conversation yet)";
171
- const envDetails = buildEnvironmentDetails();
175
+ return parts;
176
+ }
172
177
 
178
+ function buildCollapsedMessage(messages: any[], transcript: string): any[] {
173
179
  const collapsed: any[] = [];
174
180
  const systemMsg = messages.find((m: any) => m?.role === "system");
175
181
  if (systemMsg) {
@@ -182,13 +188,24 @@ function shapeMessagesForCline(messages: any[]): any[] {
182
188
  content: [
183
189
  { type: "text", text: `<task>\n${transcript}\n</task>` },
184
190
  { type: "text", text: TASK_PROGRESS_BLOCK },
185
- { type: "text", text: envDetails },
191
+ { type: "text", text: buildEnvironmentDetails() },
186
192
  ],
187
193
  });
188
194
 
189
195
  return collapsed;
190
196
  }
191
197
 
198
+ function shapeMessagesForCline(messages: any[]): any[] {
199
+ const { index: lastWrappedIdx, transcript: baseTranscript } =
200
+ findLastClineWrappedMessage(messages);
201
+
202
+ const startIdx = lastWrappedIdx >= 0 ? lastWrappedIdx + 1 : 0;
203
+ const parts = buildTranscriptParts(messages, startIdx, baseTranscript);
204
+ const transcript = parts.join("\n\n").trim() || "(no conversation yet)";
205
+
206
+ return buildCollapsedMessage(messages, transcript);
207
+ }
208
+
192
209
  // =============================================================================
193
210
  // Extension entry point
194
211
  // =============================================================================
@@ -58,7 +58,9 @@ interface FetchModelsOptions {
58
58
  async function fetchModelsFromEndpoint(
59
59
  opts: FetchModelsOptions,
60
60
  ): Promise<ProviderModelConfig[]> {
61
- const url = `${opts.baseUrl.replace(/\/+$/, "")}/models`;
61
+ let cleanBase = opts.baseUrl;
62
+ while (cleanBase.endsWith("/")) cleanBase = cleanBase.slice(0, -1);
63
+ const url = `${cleanBase}/models`;
62
64
  const headers: Record<string, string> = {
63
65
  Accept: "application/json",
64
66
  Authorization: `Bearer ${opts.apiKey}`,
@@ -170,39 +170,35 @@ function inferModelFromId(id: string): ModelsDevModel | null {
170
170
  // Fetch + map
171
171
  // =============================================================================
172
172
 
173
- async function fetchNvidiaModels(
174
- apiKey?: string,
175
- ): Promise<ProviderModelConfig[]> {
176
- // ── 1. Query NVIDIA's actual API (source of truth) ─────────────────
177
- let apiModelIds = new Set<string>();
178
- if (apiKey) {
179
- try {
180
- const response = await fetchWithRetry(
181
- `${BASE_URL_NVIDIA}/models`,
182
- {
183
- headers: {
184
- Authorization: `Bearer ${apiKey}`,
185
- "User-Agent": "pi-free-providers",
186
- },
173
+ async function fetchNvidiaApiModelIds(apiKey: string): Promise<Set<string>> {
174
+ try {
175
+ const response = await fetchWithRetry(
176
+ `${BASE_URL_NVIDIA}/models`,
177
+ {
178
+ headers: {
179
+ Authorization: `Bearer ${apiKey}`,
180
+ "User-Agent": "pi-free-providers",
187
181
  },
188
- 3,
189
- 1000,
190
- DEFAULT_FETCH_TIMEOUT_MS,
191
- );
192
- if (response.ok) {
193
- const json = (await response.json()) as {
194
- data?: Array<{ id: string }>;
195
- };
196
- if (json.data) {
197
- apiModelIds = new Set(json.data.map((m) => m.id));
198
- }
182
+ },
183
+ 3,
184
+ 1000,
185
+ DEFAULT_FETCH_TIMEOUT_MS,
186
+ );
187
+ if (response.ok) {
188
+ const json = (await response.json()) as {
189
+ data?: Array<{ id: string }>;
190
+ };
191
+ if (json.data) {
192
+ return new Set(json.data.map((m) => m.id));
199
193
  }
200
- } catch (error) {
201
- console.error("[nvidia] Failed to fetch models from NVIDIA API", error);
202
194
  }
195
+ } catch (error) {
196
+ console.error("[nvidia] Failed to fetch models from NVIDIA API", error);
203
197
  }
198
+ return new Set();
199
+ }
204
200
 
205
- // ── 2. Fetch models.dev for rich metadata (cost, limits, etc.) ─────
201
+ async function fetchModelsDevMetadata(): Promise<Map<string, ModelsDevModel>> {
206
202
  const devModels = new Map<string, ModelsDevModel>();
207
203
  try {
208
204
  const response = await fetchWithRetry(
@@ -226,6 +222,27 @@ async function fetchNvidiaModels(
226
222
  } catch (error) {
227
223
  console.error("[nvidia] Failed to fetch models.dev", error);
228
224
  }
225
+ return devModels;
226
+ }
227
+
228
+ function isChatModel(m: ModelsDevModel): boolean {
229
+ const modalities = m.modalities;
230
+ if (!modalities) return true;
231
+ const output = modalities.output ?? [];
232
+ const input = modalities.input ?? [];
233
+ return output.includes("text") && input.includes("text");
234
+ }
235
+
236
+ async function fetchNvidiaModels(
237
+ apiKey?: string,
238
+ ): Promise<ProviderModelConfig[]> {
239
+ // ── 1. Query NVIDIA's actual API (source of truth) ─────────────────
240
+ const apiModelIds = apiKey
241
+ ? await fetchNvidiaApiModelIds(apiKey)
242
+ : new Set<string>();
243
+
244
+ // ── 2. Fetch models.dev for rich metadata (cost, limits, etc.) ─────
245
+ const devModels = await fetchModelsDevMetadata();
229
246
 
230
247
  // ── 3. Build unified list (NVIDIA API wins; fallback to models.dev) ─
231
248
  const modelIds =
@@ -233,30 +250,11 @@ async function fetchNvidiaModels(
233
250
 
234
251
  const result = applyHidden(
235
252
  modelIds
236
- .map((id) => {
237
- const dev = devModels.get(id);
238
- if (dev) return dev;
239
- return inferModelFromId(id);
240
- })
253
+ .map((id) => devModels.get(id) ?? inferModelFromId(id))
241
254
  .filter((m): m is ModelsDevModel => m !== null)
242
255
  .filter((m) => isUsableModel(m.id, NVIDIA_MIN_SIZE_B))
243
- .filter((m) => {
244
- const modalities = m.modalities;
245
- if (modalities) {
246
- const output = modalities.output ?? [];
247
- const input = modalities.input ?? [];
248
- if (!output.includes("text")) return false;
249
- if (!input.includes("text")) return false;
250
- }
251
- return true;
252
- })
253
- // Filter out known 404 models (listed but not provisioned for chat)
254
- .filter((m) => {
255
- if (NVIDIA_KNOWN_404_MODELS.has(m.id)) {
256
- return false;
257
- }
258
- return true;
259
- })
256
+ .filter(isChatModel)
257
+ .filter((m) => !NVIDIA_KNOWN_404_MODELS.has(m.id))
260
258
  // NVIDIA is freemium — all models are usable with free credits.
261
259
  // No cost filtering applied.
262
260
  .map(