pi-free 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/update-benchmarks.yml +67 -0
- package/.pi/skills/pi-extension-dev/SKILL.md +155 -0
- package/CHANGELOG.md +59 -0
- package/LICENSE +21 -0
- package/README.md +289 -0
- package/config.ts +224 -0
- package/constants.ts +110 -0
- package/docs/free-tier-limits.md +213 -0
- package/docs/model-hopping.md +214 -0
- package/docs/plans/file-reorganization.md +172 -0
- package/docs/plans/package-json-fix.md +143 -0
- package/docs/provider-failover-plan.md +279 -0
- package/lib/json-persistence.ts +102 -0
- package/lib/logger.ts +94 -0
- package/lib/model-enhancer.ts +20 -0
- package/lib/types.ts +108 -0
- package/lib/util.ts +256 -0
- package/package.json +52 -0
- package/provider-factory.ts +221 -0
- package/provider-failover/errors.ts +275 -0
- package/provider-failover/hardcoded-benchmarks.ts +9889 -0
- package/provider-failover/index.ts +194 -0
- package/provider-helper.ts +336 -0
- package/providers/cline-auth.ts +473 -0
- package/providers/cline-models.ts +77 -0
- package/providers/cline.ts +257 -0
- package/providers/factory.ts +125 -0
- package/providers/fireworks.ts +49 -0
- package/providers/kilo-auth.ts +172 -0
- package/providers/kilo-models.ts +26 -0
- package/providers/kilo.ts +144 -0
- package/providers/mistral.ts +144 -0
- package/providers/model-fetcher.ts +138 -0
- package/providers/nvidia.ts +97 -0
- package/providers/ollama.ts +113 -0
- package/providers/openrouter.ts +175 -0
- package/providers/zen.ts +416 -0
- package/scripts/update-benchmarks.ts +255 -0
- package/tests/cline.test.ts +149 -0
- package/tests/errors.test.ts +139 -0
- package/tests/failover.test.ts +94 -0
- package/tests/fireworks.test.ts +148 -0
- package/tests/free-tier-limits.test.ts +191 -0
- package/tests/json-persistence.test.ts +105 -0
- package/tests/kilo.test.ts +186 -0
- package/tests/mistral.test.ts +138 -0
- package/tests/nvidia.test.ts +55 -0
- package/tests/ollama.test.ts +261 -0
- package/tests/openrouter.test.ts +192 -0
- package/tests/usage-tracking.test.ts +150 -0
- package/tests/util.test.ts +413 -0
- package/tests/zen.test.ts +180 -0
- package/todo.md +153 -0
- package/tsconfig.json +26 -0
- package/usage/commands.ts +17 -0
- package/usage/cumulative.ts +193 -0
- package/usage/formatters.ts +131 -0
- package/usage/index.ts +46 -0
- package/usage/limits.ts +166 -0
- package/usage/metrics.ts +222 -0
- package/usage/sessions.ts +355 -0
- package/usage/store.ts +99 -0
- package/usage/tracking.ts +329 -0
- package/usage/widget.ts +90 -0
- package/vitest.config.ts +20 -0
- package/widget/data.ts +113 -0
- package/widget/format.ts +26 -0
- package/widget/render.ts +117 -0
package/config.ts
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared config for pi-free-providers.
|
|
3
|
+
*
|
|
4
|
+
* Keys and flags are resolved in this order (first wins):
|
|
5
|
+
* 1. Environment variable
|
|
6
|
+
* 2. ~/.pi/free.json
|
|
7
|
+
*
|
|
8
|
+
* Per-provider paid model flags:
|
|
9
|
+
* OPENROUTER_SHOW_PAID=true or openrouter_show_paid: true
|
|
10
|
+
* NVIDIA_SHOW_PAID=true or nvidia_show_paid: true
|
|
11
|
+
* FIREWORKS_SHOW_PAID=true or fireworks_show_paid: true
|
|
12
|
+
* CLINE_SHOW_PAID=true or cline_show_paid: true
|
|
13
|
+
* OLLAMA_SHOW_PAID=true or ollama_show_paid: true
|
|
14
|
+
*
|
|
15
|
+
* PI_FREE_KILO_FREE_ONLY=true — restrict Kilo to free models even after login.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
19
|
+
import { join } from "node:path";
|
|
20
|
+
import { createLogger } from "./lib/logger.ts";
|
|
21
|
+
|
|
22
|
+
const _logger = createLogger("config");
|
|
23
|
+
|
|
24
|
+
interface PiFreeConfig {
|
|
25
|
+
openrouter_api_key?: string;
|
|
26
|
+
nvidia_api_key?: string;
|
|
27
|
+
opencode_api_key?: string;
|
|
28
|
+
fireworks_api_key?: string;
|
|
29
|
+
mistral_api_key?: string;
|
|
30
|
+
ollama_api_key?: string;
|
|
31
|
+
kilo_free_only?: boolean;
|
|
32
|
+
hidden_models?: string[];
|
|
33
|
+
// Per-provider paid model flags
|
|
34
|
+
openrouter_show_paid?: boolean;
|
|
35
|
+
nvidia_show_paid?: boolean;
|
|
36
|
+
fireworks_show_paid?: boolean;
|
|
37
|
+
cline_show_paid?: boolean;
|
|
38
|
+
zen_show_paid?: boolean;
|
|
39
|
+
mistral_show_paid?: boolean;
|
|
40
|
+
ollama_show_paid?: boolean;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const CONFIG_TEMPLATE: PiFreeConfig = {
|
|
44
|
+
openrouter_api_key: "",
|
|
45
|
+
nvidia_api_key: "",
|
|
46
|
+
opencode_api_key: "",
|
|
47
|
+
fireworks_api_key: "",
|
|
48
|
+
mistral_api_key: "",
|
|
49
|
+
ollama_api_key: "",
|
|
50
|
+
kilo_free_only: false,
|
|
51
|
+
hidden_models: [],
|
|
52
|
+
openrouter_show_paid: false,
|
|
53
|
+
nvidia_show_paid: false,
|
|
54
|
+
fireworks_show_paid: false,
|
|
55
|
+
cline_show_paid: false,
|
|
56
|
+
zen_show_paid: false,
|
|
57
|
+
mistral_show_paid: false,
|
|
58
|
+
ollama_show_paid: false,
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
const PI_DIR = join(process.env.HOME || process.env.USERPROFILE || "", ".pi");
|
|
62
|
+
const CONFIG_PATH = join(PI_DIR, "free.json");
|
|
63
|
+
|
|
64
|
+
function ensureConfigFile(): void {
|
|
65
|
+
try {
|
|
66
|
+
mkdirSync(PI_DIR, { recursive: true });
|
|
67
|
+
if (existsSync(CONFIG_PATH)) {
|
|
68
|
+
// Merge: add any new template keys without touching existing values
|
|
69
|
+
const existing = JSON.parse(
|
|
70
|
+
readFileSync(CONFIG_PATH, "utf8"),
|
|
71
|
+
) as PiFreeConfig;
|
|
72
|
+
const merged = { ...CONFIG_TEMPLATE, ...existing };
|
|
73
|
+
if (JSON.stringify(merged) !== JSON.stringify(existing)) {
|
|
74
|
+
writeFileSync(
|
|
75
|
+
CONFIG_PATH,
|
|
76
|
+
`${JSON.stringify(merged, null, 2)}\n`,
|
|
77
|
+
"utf8",
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
} else {
|
|
81
|
+
writeFileSync(
|
|
82
|
+
CONFIG_PATH,
|
|
83
|
+
`${JSON.stringify(CONFIG_TEMPLATE, null, 2)}\n`,
|
|
84
|
+
"utf8",
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
} catch (err) {
|
|
88
|
+
_logger.warn("Could not create config file", {
|
|
89
|
+
path: CONFIG_PATH,
|
|
90
|
+
error: err instanceof Error ? err.message : String(err),
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function loadConfigFile(): PiFreeConfig {
|
|
96
|
+
try {
|
|
97
|
+
return JSON.parse(readFileSync(CONFIG_PATH, "utf8")) as PiFreeConfig;
|
|
98
|
+
} catch {
|
|
99
|
+
return {};
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
ensureConfigFile();
|
|
104
|
+
const file = loadConfigFile();
|
|
105
|
+
|
|
106
|
+
// Resolve each value: env var takes priority over config file.
|
|
107
|
+
// Treat empty strings in the config file as unset.
|
|
108
|
+
function resolve(envKey: string, fileVal?: string): string | undefined {
|
|
109
|
+
return process.env[envKey] || (fileVal?.trim() ? fileVal : undefined);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Resolve boolean flag: env var takes priority, then config file.
|
|
113
|
+
// If neither is set, defaults to false (free-only mode).
|
|
114
|
+
function resolveBool(envKey: string, fileVal?: boolean): boolean {
|
|
115
|
+
const envValue = process.env[envKey];
|
|
116
|
+
if (envValue === "true") return true;
|
|
117
|
+
if (envValue === "false") return false;
|
|
118
|
+
return fileVal === true;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Global fallback (deprecated, use per-provider flags)
|
|
122
|
+
// Returns true only if explicitly enabled via env var
|
|
123
|
+
export const SHOW_PAID = process.env.PI_FREE_SHOW_PAID === "true";
|
|
124
|
+
|
|
125
|
+
// Per-provider paid model flags - default to false (free-only) if not set
|
|
126
|
+
export const OPENROUTER_SHOW_PAID = resolveBool(
|
|
127
|
+
"OPENROUTER_SHOW_PAID",
|
|
128
|
+
file.openrouter_show_paid,
|
|
129
|
+
);
|
|
130
|
+
|
|
131
|
+
export const NVIDIA_SHOW_PAID = resolveBool(
|
|
132
|
+
"NVIDIA_SHOW_PAID",
|
|
133
|
+
file.nvidia_show_paid,
|
|
134
|
+
);
|
|
135
|
+
|
|
136
|
+
export const FIREWORKS_SHOW_PAID = resolveBool(
|
|
137
|
+
"FIREWORKS_SHOW_PAID",
|
|
138
|
+
file.fireworks_show_paid,
|
|
139
|
+
);
|
|
140
|
+
|
|
141
|
+
export const CLINE_SHOW_PAID = resolveBool(
|
|
142
|
+
"CLINE_SHOW_PAID",
|
|
143
|
+
file.cline_show_paid,
|
|
144
|
+
);
|
|
145
|
+
|
|
146
|
+
export const ZEN_SHOW_PAID = resolveBool("ZEN_SHOW_PAID", file.zen_show_paid);
|
|
147
|
+
|
|
148
|
+
export const MISTRAL_SHOW_PAID = resolveBool(
|
|
149
|
+
"MISTRAL_SHOW_PAID",
|
|
150
|
+
file.mistral_show_paid,
|
|
151
|
+
);
|
|
152
|
+
|
|
153
|
+
export const OLLAMA_SHOW_PAID = resolveBool(
|
|
154
|
+
"OLLAMA_SHOW_PAID",
|
|
155
|
+
file.ollama_show_paid,
|
|
156
|
+
);
|
|
157
|
+
|
|
158
|
+
export const KILO_FREE_ONLY = resolveBool(
|
|
159
|
+
"PI_FREE_KILO_FREE_ONLY",
|
|
160
|
+
file.kilo_free_only,
|
|
161
|
+
);
|
|
162
|
+
|
|
163
|
+
const HIDDEN: Set<string> = new Set(file.hidden_models ?? []);
|
|
164
|
+
|
|
165
|
+
/** Removes any models whose id appears in hidden_models. */
|
|
166
|
+
export function applyHidden<T extends { id: string }>(models: T[]): T[] {
|
|
167
|
+
if (HIDDEN.size === 0) return models;
|
|
168
|
+
return models.filter((m) => !HIDDEN.has(m.id));
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
export const OPENROUTER_API_KEY = resolve(
|
|
172
|
+
"OPENROUTER_API_KEY",
|
|
173
|
+
file.openrouter_api_key,
|
|
174
|
+
);
|
|
175
|
+
export const NVIDIA_API_KEY = resolve("NVIDIA_API_KEY", file.nvidia_api_key);
|
|
176
|
+
export const OPENCODE_API_KEY = resolve(
|
|
177
|
+
"OPENCODE_API_KEY",
|
|
178
|
+
file.opencode_api_key,
|
|
179
|
+
);
|
|
180
|
+
export const FIREWORKS_API_KEY = resolve(
|
|
181
|
+
"FIREWORKS_API_KEY",
|
|
182
|
+
file.fireworks_api_key,
|
|
183
|
+
);
|
|
184
|
+
export const MISTRAL_API_KEY = resolve("MISTRAL_API_KEY", file.mistral_api_key);
|
|
185
|
+
export const OLLAMA_API_KEY = resolve("OLLAMA_API_KEY", file.ollama_api_key);
|
|
186
|
+
|
|
187
|
+
// Re-export provider names for consistency
|
|
188
|
+
export {
|
|
189
|
+
PROVIDER_CLINE,
|
|
190
|
+
PROVIDER_FIREWORKS,
|
|
191
|
+
PROVIDER_KILO,
|
|
192
|
+
PROVIDER_MISTRAL,
|
|
193
|
+
PROVIDER_NVIDIA,
|
|
194
|
+
PROVIDER_OLLAMA,
|
|
195
|
+
PROVIDER_OPENROUTER,
|
|
196
|
+
PROVIDER_ZEN,
|
|
197
|
+
} from "./constants.ts";
|
|
198
|
+
|
|
199
|
+
// =============================================================================
|
|
200
|
+
// Config Persistence
|
|
201
|
+
// =============================================================================
|
|
202
|
+
|
|
203
|
+
/** Save updated config values to ~/.pi/free.json */
|
|
204
|
+
export function saveConfig(updates: Partial<PiFreeConfig>): void {
|
|
205
|
+
try {
|
|
206
|
+
const existing = loadConfigFile();
|
|
207
|
+
const merged = { ...existing, ...updates };
|
|
208
|
+
writeFileSync(CONFIG_PATH, `${JSON.stringify(merged, null, 2)}\n`, "utf8");
|
|
209
|
+
_logger.info("Config saved", {
|
|
210
|
+
path: CONFIG_PATH,
|
|
211
|
+
keys: Object.keys(updates),
|
|
212
|
+
});
|
|
213
|
+
} catch (err) {
|
|
214
|
+
_logger.error("Failed to save config", {
|
|
215
|
+
path: CONFIG_PATH,
|
|
216
|
+
error: err instanceof Error ? err.message : String(err),
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/** Get current config values (for checking state) */
|
|
222
|
+
export function getConfig(): PiFreeConfig {
|
|
223
|
+
return loadConfigFile();
|
|
224
|
+
}
|
package/constants.ts
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared constants for pi-free-providers.
|
|
3
|
+
* Centralizes provider names, URLs, and configuration values.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// =============================================================================
|
|
7
|
+
// Provider names (must match registerProvider calls)
|
|
8
|
+
// =============================================================================
|
|
9
|
+
|
|
10
|
+
export const PROVIDER_KILO = "kilo";
|
|
11
|
+
export const PROVIDER_ZEN = "zen";
|
|
12
|
+
export const PROVIDER_OPENROUTER = "openrouter";
|
|
13
|
+
export const PROVIDER_NVIDIA = "nvidia";
|
|
14
|
+
export const PROVIDER_CLINE = "cline";
|
|
15
|
+
export const PROVIDER_FIREWORKS = "fireworks";
|
|
16
|
+
export const PROVIDER_OLLAMA = "ollama";
|
|
17
|
+
|
|
18
|
+
export const ALL_PROVIDERS = [
|
|
19
|
+
PROVIDER_KILO,
|
|
20
|
+
PROVIDER_ZEN,
|
|
21
|
+
PROVIDER_OPENROUTER,
|
|
22
|
+
PROVIDER_NVIDIA,
|
|
23
|
+
PROVIDER_CLINE,
|
|
24
|
+
PROVIDER_FIREWORKS,
|
|
25
|
+
PROVIDER_OLLAMA,
|
|
26
|
+
] as const;
|
|
27
|
+
|
|
28
|
+
// =============================================================================
|
|
29
|
+
// Provider base URLs
|
|
30
|
+
// =============================================================================
|
|
31
|
+
|
|
32
|
+
export const BASE_URL_KILO = "https://api.kilo.ai/api/gateway";
|
|
33
|
+
export const BASE_URL_ZEN = "https://opencode.ai/zen/v1";
|
|
34
|
+
export const BASE_URL_OPENROUTER = "https://openrouter.ai/api/v1";
|
|
35
|
+
export const BASE_URL_NVIDIA = "https://integrate.api.nvidia.com/v1";
|
|
36
|
+
export const BASE_URL_CLINE = "https://api.cline.bot/api/v1";
|
|
37
|
+
export const BASE_URL_FIREWORKS = "https://api.fireworks.ai/inference/v1";
|
|
38
|
+
export const BASE_URL_OLLAMA = "https://ollama.com/v1";
|
|
39
|
+
|
|
40
|
+
// =============================================================================
|
|
41
|
+
// External URLs
|
|
42
|
+
// =============================================================================
|
|
43
|
+
|
|
44
|
+
export const URL_MODELS_DEV = "https://models.dev/api.json";
|
|
45
|
+
export const URL_KILO_TOS = "https://kilo.ai/terms";
|
|
46
|
+
export const URL_ZEN_TOS = "https://opencode.ai/terms";
|
|
47
|
+
export const URL_CLINE_TOS = "https://cline.bot/tos";
|
|
48
|
+
|
|
49
|
+
// =============================================================================
|
|
50
|
+
// Cline auth
|
|
51
|
+
// =============================================================================
|
|
52
|
+
|
|
53
|
+
export const CLINE_AUTH_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
|
|
54
|
+
|
|
55
|
+
// =============================================================================
|
|
56
|
+
// Configuration thresholds
|
|
57
|
+
// =============================================================================
|
|
58
|
+
|
|
59
|
+
export const NVIDIA_MIN_SIZE_B = 70; // Minimum model size for NVIDIA NIM
|
|
60
|
+
export const DEFAULT_MIN_SIZE_B = 30; // Default minimum model size for filtering
|
|
61
|
+
|
|
62
|
+
// =============================================================================
|
|
63
|
+
// Timeouts (milliseconds)
|
|
64
|
+
// =============================================================================
|
|
65
|
+
|
|
66
|
+
// Timeout for fetch operations
|
|
67
|
+
export const DEFAULT_FETCH_TIMEOUT_MS: number = 10_000;
|
|
68
|
+
|
|
69
|
+
export interface TestConfig {
|
|
70
|
+
timeout: number;
|
|
71
|
+
retries: number;
|
|
72
|
+
label: string;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// LSP test - fixed - added missing property
|
|
76
|
+
export const testConfig: TestConfig = {
|
|
77
|
+
timeout: 5000,
|
|
78
|
+
retries: 3,
|
|
79
|
+
label: "test",
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
// LSP test - fixed return type
|
|
83
|
+
export function calculateTimeout(base: number): number {
|
|
84
|
+
return base * 2;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// LSP test - unused variable (should show hint/warning if configured)
|
|
88
|
+
export function unusedParamTest(required: string, _unused: number): string {
|
|
89
|
+
return required.toUpperCase();
|
|
90
|
+
}
|
|
91
|
+
export const KILO_POLL_INTERVAL_MS = 3_000;
|
|
92
|
+
export const KILO_TOKEN_EXPIRATION_MS = 365 * 24 * 60 * 60 * 1000; // 1 year
|
|
93
|
+
|
|
94
|
+
// =============================================================================
|
|
95
|
+
// Additional OpenAI-compatible providers
|
|
96
|
+
// =============================================================================
|
|
97
|
+
|
|
98
|
+
export const PROVIDER_GROQ = "groq";
|
|
99
|
+
export const PROVIDER_TOGETHER = "together";
|
|
100
|
+
export const PROVIDER_DEEPINFRA = "deepinfra";
|
|
101
|
+
export const PROVIDER_MISTRAL = "mistral";
|
|
102
|
+
export const PROVIDER_PERPLEXITY = "perplexity";
|
|
103
|
+
export const PROVIDER_XAI = "xai";
|
|
104
|
+
|
|
105
|
+
export const BASE_URL_GROQ = "https://api.groq.com/openai/v1";
|
|
106
|
+
export const BASE_URL_TOGETHER = "https://api.together.xyz/v1";
|
|
107
|
+
export const BASE_URL_DEEPINFRA = "https://api.deepinfra.com/v1/openai";
|
|
108
|
+
export const BASE_URL_MISTRAL = "https://api.mistral.ai/v1";
|
|
109
|
+
export const BASE_URL_PERPLEXITY = "https://api.perplexity.ai";
|
|
110
|
+
export const BASE_URL_XAI = "https://api.x.ai/v1";
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# Free Tier Rate Limits
|
|
2
|
+
|
|
3
|
+
This document tracks the free tier usage limits for each provider in pi-free-providers.
|
|
4
|
+
|
|
5
|
+
## Provider Limits
|
|
6
|
+
|
|
7
|
+
| Provider | Free Tier Limit | Notes |
|
|
8
|
+
|----------|-----------------|-------|
|
|
9
|
+
| **Kilo** | 200 requests/hour | Per IP (anonymous) or per account (authenticated) |
|
|
10
|
+
| **OpenRouter** | 1000 requests/day | For free tier (no API key) |
|
|
11
|
+
| **Zen (OpenCode)** | Fair use | No hard limits, but abuse may be throttled |
|
|
12
|
+
| **NVIDIA** | 1000 requests/month | NIM free tier |
|
|
13
|
+
| **Fireworks** | 1000 requests/month | Free tier |
|
|
14
|
+
| **Cline** | Undisclosed | Rate limited but limits not documented |
|
|
15
|
+
|
|
16
|
+
## Usage Tracking
|
|
17
|
+
|
|
18
|
+
The extension automatically tracks your usage against these limits:
|
|
19
|
+
|
|
20
|
+
```typescript
|
|
21
|
+
// Check current usage
|
|
22
|
+
const usage = getFreeTierUsage("kilo");
|
|
23
|
+
console.log(`Used ${usage.requestsThisHour}/${usage.limit.requestsPerHour} this hour`);
|
|
24
|
+
|
|
25
|
+
// Get warning if approaching limit
|
|
26
|
+
const warning = getLimitWarning("openrouter");
|
|
27
|
+
if (warning) {
|
|
28
|
+
console.warn(warning); // "⚠️ openrouter: 85% of free tier used. ~150 requests remaining."
|
|
29
|
+
}
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Status Indicators
|
|
33
|
+
|
|
34
|
+
| Status | Meaning | Action |
|
|
35
|
+
|--------|---------|--------|
|
|
36
|
+
| 🟢 **OK** | < 70% used | No action needed |
|
|
37
|
+
| 🟡 **Warning** | 70-90% used | Consider using other providers |
|
|
38
|
+
| 🔴 **Critical** | > 90% used | Switch provider soon to avoid 429s |
|
|
39
|
+
| ⚪ **Unknown** | Limits not documented | Monitor for errors |
|
|
40
|
+
|
|
41
|
+
## What Happens When You Hit Limits
|
|
42
|
+
|
|
43
|
+
When you approach or hit a rate limit:
|
|
44
|
+
|
|
45
|
+
1. **Warning**: The system warns you before hitting the limit
|
|
46
|
+
2. **429 Error**: Provider returns rate limit error
|
|
47
|
+
3. **Auto-failover**: If `auto_model_hop` is enabled, automatically switches to another provider
|
|
48
|
+
4. **Autocompact**: Suggests compacting conversation to reduce tokens
|
|
49
|
+
|
|
50
|
+
## Understanding Exact Limits
|
|
51
|
+
|
|
52
|
+
Per-model tracking helps discover exact limits empirically:
|
|
53
|
+
|
|
54
|
+
### Example: Discovering Kilo's Per-Model Limits
|
|
55
|
+
|
|
56
|
+
```typescript
|
|
57
|
+
// After heavy usage, check the report
|
|
58
|
+
logModelUsageReport("kilo");
|
|
59
|
+
// [usage-report] kilo: 267 total requests
|
|
60
|
+
// - xiaomi/mimo-v2-pro:free: 201 requests ← Hmm, stopped at ~200?
|
|
61
|
+
// - minimax/minimax-m2.5:free: 66 requests
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
This pattern suggests Kilo may have **per-model** limits (around 200/hour per model) in addition to IP-level limits.
|
|
65
|
+
|
|
66
|
+
### Example: OpenRouter Daily Pattern
|
|
67
|
+
|
|
68
|
+
```typescript
|
|
69
|
+
logModelUsageReport("openrouter");
|
|
70
|
+
// [usage-report] openrouter: 847 total requests
|
|
71
|
+
// - anthropic/claude-sonnet-4.6: 423 requests
|
|
72
|
+
// - google/gemini-3.1-pro-preview: 424 requests
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Even distribution suggests OpenRouter's 1000/day limit is **provider-wide**, not per-model.
|
|
76
|
+
|
|
77
|
+
### Example: Model-Specific Throttling
|
|
78
|
+
|
|
79
|
+
```typescript
|
|
80
|
+
// Compare two models at same provider
|
|
81
|
+
const claudeUsage = getModelUsage("zen", "claude-opus-4-6");
|
|
82
|
+
const geminiUsage = getModelUsage("zen", "gemini-3.1-pro");
|
|
83
|
+
|
|
84
|
+
// If Claude hits 429s at 50 requests but Gemini works at 200,
|
|
85
|
+
// Claude may have stricter rate limiting
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Avoiding Rate Limits
|
|
89
|
+
|
|
90
|
+
### Strategy 1: Provider Rotation
|
|
91
|
+
Use multiple providers to distribute load:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
# Start with Kilo
|
|
95
|
+
/model kilo/mimo-v2-pro:free
|
|
96
|
+
|
|
97
|
+
# If rate limited, hop to OpenRouter
|
|
98
|
+
/model openrouter/mimo-v2-pro:free
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Strategy 2: Session Management
|
|
102
|
+
Start fresh sessions periodically to reset counters:
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
/session # New session = fresh rate limit counters
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Strategy 3: Upgrade to Paid
|
|
109
|
+
When free tier isn't enough:
|
|
110
|
+
|
|
111
|
+
- **Kilo**: `/login kilo` for higher limits
|
|
112
|
+
- **OpenRouter**: Set `OPENROUTER_API_KEY` for paid access
|
|
113
|
+
- **NVIDIA**: Sign up for paid NIM access
|
|
114
|
+
|
|
115
|
+
## Configuration
|
|
116
|
+
|
|
117
|
+
Add to `~/.pi/free.json`:
|
|
118
|
+
|
|
119
|
+
```json
|
|
120
|
+
{
|
|
121
|
+
"auto_model_hop": true,
|
|
122
|
+
"max_model_hops": 3,
|
|
123
|
+
"allow_downgrades": "minor"
|
|
124
|
+
}
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
This enables automatic failover when rate limits are hit.
|
|
128
|
+
|
|
129
|
+
## Implementation Details
|
|
130
|
+
|
|
131
|
+
Usage tracking is done via:
|
|
132
|
+
- **In-memory counters**: Per session (resets when Pi restarts)
|
|
133
|
+
- **Daily tracking**: Tracks requests per provider per day
|
|
134
|
+
- **Hourly estimates**: Based on session activity
|
|
135
|
+
- **Per-model tracking**: Granular tracking of which models you use most
|
|
136
|
+
|
|
137
|
+
### Per-Model Tracking (NEW)
|
|
138
|
+
|
|
139
|
+
We now track usage per model per provider:
|
|
140
|
+
|
|
141
|
+
```typescript
|
|
142
|
+
// Track a request for specific model
|
|
143
|
+
incrementModelRequestCount("kilo", "xiaomi/mimo-v2-pro:free");
|
|
144
|
+
|
|
145
|
+
// Get usage for specific model
|
|
146
|
+
const usage = getModelUsage("kilo", "xiaomi/mimo-v2-pro:free");
|
|
147
|
+
// { count: 45, lastUsed: 1711731234567 }
|
|
148
|
+
|
|
149
|
+
// See which models you use most with a provider
|
|
150
|
+
const kiloModels = getProviderModelUsage("kilo");
|
|
151
|
+
// [
|
|
152
|
+
// { modelId: "xiaomi/mimo-v2-pro:free", count: 45, lastUsed: ... },
|
|
153
|
+
// { modelId: "minimax/minimax-m2.5:free", count: 32, lastUsed: ... }
|
|
154
|
+
// ]
|
|
155
|
+
|
|
156
|
+
// Get top models across all providers
|
|
157
|
+
const top = getTopModels(5);
|
|
158
|
+
// [
|
|
159
|
+
// { provider: "kilo", modelId: "xiaomi/mimo-v2-pro:free", count: 45 },
|
|
160
|
+
// { provider: "zen", modelId: "mimo-v2-pro-free", count: 38 },
|
|
161
|
+
// ...
|
|
162
|
+
// ]
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
This helps identify:
|
|
166
|
+
- Which models consume your quota fastest
|
|
167
|
+
- If certain models have stricter limits
|
|
168
|
+
- Optimal model rotation strategies
|
|
169
|
+
|
|
170
|
+
Note: These are estimates. Provider-side counters are authoritative.
|
|
171
|
+
|
|
172
|
+
## Known Limitations
|
|
173
|
+
|
|
174
|
+
1. **No server-side sync**: We can't know the provider's exact count
|
|
175
|
+
2. **Hourly estimates**: Hourly usage is estimated from daily total
|
|
176
|
+
3. **IP-based limits**: Kilo's IP-based limits affect all users on same network
|
|
177
|
+
4. **Undocumented limits**: Cline and Zen limits are not publicly documented
|
|
178
|
+
|
|
179
|
+
## API Reference
|
|
180
|
+
|
|
181
|
+
### Provider-Level Functions
|
|
182
|
+
|
|
183
|
+
#### `getFreeTierUsage(provider: string)`
|
|
184
|
+
Returns current usage against free tier limits.
|
|
185
|
+
|
|
186
|
+
#### `isApproachingLimit(provider: string)`
|
|
187
|
+
Returns `true` if usage is > 70% of limit.
|
|
188
|
+
|
|
189
|
+
#### `getLimitWarning(provider: string)`
|
|
190
|
+
Returns warning message if approaching limit, `null` otherwise.
|
|
191
|
+
|
|
192
|
+
#### `formatFreeTierStatus(provider: string)`
|
|
193
|
+
Returns formatted status string for display.
|
|
194
|
+
|
|
195
|
+
### Per-Model Functions
|
|
196
|
+
|
|
197
|
+
#### `incrementModelRequestCount(provider, modelId)`
|
|
198
|
+
Track a request for a specific model. Call this on every request.
|
|
199
|
+
|
|
200
|
+
#### `getModelUsage(provider, modelId)`
|
|
201
|
+
Get usage stats for a specific model:
|
|
202
|
+
```typescript
|
|
203
|
+
{ count: 45, lastUsed: 1711731234567 }
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
#### `getProviderModelUsage(provider)`
|
|
207
|
+
Get all model usage sorted by count (highest first) for a provider.
|
|
208
|
+
|
|
209
|
+
#### `getTopModels(n)`
|
|
210
|
+
Get top N most used models across all providers.
|
|
211
|
+
|
|
212
|
+
#### `logModelUsageReport(provider?)`
|
|
213
|
+
Print usage report to console. If provider omitted, shows top 10 across all providers.
|