parishad 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parishad/__init__.py +70 -0
- parishad/__main__.py +10 -0
- parishad/checker/__init__.py +25 -0
- parishad/checker/deterministic.py +644 -0
- parishad/checker/ensemble.py +496 -0
- parishad/checker/retrieval.py +546 -0
- parishad/cli/__init__.py +6 -0
- parishad/cli/code.py +3254 -0
- parishad/cli/main.py +1158 -0
- parishad/cli/prarambh.py +99 -0
- parishad/cli/sthapana.py +368 -0
- parishad/config/modes.py +139 -0
- parishad/config/pipeline.core.yaml +128 -0
- parishad/config/pipeline.extended.yaml +172 -0
- parishad/config/pipeline.fast.yaml +89 -0
- parishad/config/user_config.py +115 -0
- parishad/data/catalog.py +118 -0
- parishad/data/models.json +108 -0
- parishad/memory/__init__.py +79 -0
- parishad/models/__init__.py +181 -0
- parishad/models/backends/__init__.py +247 -0
- parishad/models/backends/base.py +211 -0
- parishad/models/backends/huggingface.py +318 -0
- parishad/models/backends/llama_cpp.py +239 -0
- parishad/models/backends/mlx_lm.py +141 -0
- parishad/models/backends/ollama.py +253 -0
- parishad/models/backends/openai_api.py +193 -0
- parishad/models/backends/transformers_hf.py +198 -0
- parishad/models/costs.py +385 -0
- parishad/models/downloader.py +1557 -0
- parishad/models/optimizations.py +871 -0
- parishad/models/profiles.py +610 -0
- parishad/models/reliability.py +876 -0
- parishad/models/runner.py +651 -0
- parishad/models/tokenization.py +287 -0
- parishad/orchestrator/__init__.py +24 -0
- parishad/orchestrator/config_loader.py +210 -0
- parishad/orchestrator/engine.py +1113 -0
- parishad/orchestrator/exceptions.py +14 -0
- parishad/roles/__init__.py +71 -0
- parishad/roles/base.py +712 -0
- parishad/roles/dandadhyaksha.py +163 -0
- parishad/roles/darbari.py +246 -0
- parishad/roles/majumdar.py +274 -0
- parishad/roles/pantapradhan.py +150 -0
- parishad/roles/prerak.py +357 -0
- parishad/roles/raja.py +345 -0
- parishad/roles/sacheev.py +203 -0
- parishad/roles/sainik.py +427 -0
- parishad/roles/sar_senapati.py +164 -0
- parishad/roles/vidushak.py +69 -0
- parishad/tools/__init__.py +7 -0
- parishad/tools/base.py +57 -0
- parishad/tools/fs.py +110 -0
- parishad/tools/perception.py +96 -0
- parishad/tools/retrieval.py +74 -0
- parishad/tools/shell.py +103 -0
- parishad/utils/__init__.py +7 -0
- parishad/utils/hardware.py +122 -0
- parishad/utils/logging.py +79 -0
- parishad/utils/scanner.py +164 -0
- parishad/utils/text.py +61 -0
- parishad/utils/tracing.py +133 -0
- parishad-0.1.0.dist-info/METADATA +256 -0
- parishad-0.1.0.dist-info/RECORD +68 -0
- parishad-0.1.0.dist-info/WHEEL +4 -0
- parishad-0.1.0.dist-info/entry_points.txt +2 -0
- parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
parishad/models/costs.py
ADDED
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cost estimation for Parishad model inference.
|
|
3
|
+
|
|
4
|
+
Provides cost estimation for different backends and models:
|
|
5
|
+
- API-based models (OpenAI, Anthropic) have per-token costs
|
|
6
|
+
- Local models (llama.cpp, transformers) are "free" (compute cost only)
|
|
7
|
+
|
|
8
|
+
This module tracks:
|
|
9
|
+
- Dollar cost estimates for API models
|
|
10
|
+
- Token usage metrics
|
|
11
|
+
- Approximate FLOP estimates for local models
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# =============================================================================
|
|
21
|
+
# Cost Data
|
|
22
|
+
# =============================================================================
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class ModelPricing:
|
|
27
|
+
"""
|
|
28
|
+
Pricing information for a model.
|
|
29
|
+
|
|
30
|
+
Prices are in USD per 1M tokens.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
input_price: float = 0.0
|
|
34
|
+
"""Price per 1M input tokens in USD."""
|
|
35
|
+
|
|
36
|
+
output_price: float = 0.0
|
|
37
|
+
"""Price per 1M output tokens in USD."""
|
|
38
|
+
|
|
39
|
+
name: str = ""
|
|
40
|
+
"""Model name for display."""
|
|
41
|
+
|
|
42
|
+
def cost_for_tokens(self, tokens_in: int, tokens_out: int) -> float:
|
|
43
|
+
"""
|
|
44
|
+
Calculate cost for a given token usage.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
tokens_in: Number of input tokens
|
|
48
|
+
tokens_out: Number of output tokens
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Estimated cost in USD
|
|
52
|
+
"""
|
|
53
|
+
input_cost = (tokens_in / 1_000_000) * self.input_price
|
|
54
|
+
output_cost = (tokens_out / 1_000_000) * self.output_price
|
|
55
|
+
return input_cost + output_cost
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# Default pricing for known models (as of Dec 2024)
|
|
59
|
+
# Prices in USD per 1M tokens
|
|
60
|
+
MODEL_PRICING: dict[str, ModelPricing] = {
|
|
61
|
+
# OpenAI
|
|
62
|
+
"gpt-4o": ModelPricing(input_price=2.50, output_price=10.00, name="GPT-4o"),
|
|
63
|
+
"gpt-4o-mini": ModelPricing(input_price=0.15, output_price=0.60, name="GPT-4o Mini"),
|
|
64
|
+
"gpt-4-turbo": ModelPricing(input_price=10.00, output_price=30.00, name="GPT-4 Turbo"),
|
|
65
|
+
"gpt-4": ModelPricing(input_price=30.00, output_price=60.00, name="GPT-4"),
|
|
66
|
+
"gpt-3.5-turbo": ModelPricing(input_price=0.50, output_price=1.50, name="GPT-3.5 Turbo"),
|
|
67
|
+
|
|
68
|
+
# Anthropic
|
|
69
|
+
"claude-3-5-sonnet-20241022": ModelPricing(input_price=3.00, output_price=15.00, name="Claude 3.5 Sonnet"),
|
|
70
|
+
"claude-3-opus-20240229": ModelPricing(input_price=15.00, output_price=75.00, name="Claude 3 Opus"),
|
|
71
|
+
"claude-3-haiku-20240307": ModelPricing(input_price=0.25, output_price=1.25, name="Claude 3 Haiku"),
|
|
72
|
+
|
|
73
|
+
# Local models (free)
|
|
74
|
+
"stub": ModelPricing(input_price=0.0, output_price=0.0, name="Stub"),
|
|
75
|
+
"mock": ModelPricing(input_price=0.0, output_price=0.0, name="Mock"),
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
# Backend-level defaults (for unknown models)
|
|
79
|
+
BACKEND_DEFAULT_PRICING: dict[str, ModelPricing] = {
|
|
80
|
+
"openai": ModelPricing(input_price=0.50, output_price=1.50, name="OpenAI Default"),
|
|
81
|
+
"anthropic": ModelPricing(input_price=3.00, output_price=15.00, name="Anthropic Default"),
|
|
82
|
+
"stub": ModelPricing(input_price=0.0, output_price=0.0, name="Stub"),
|
|
83
|
+
"mock": ModelPricing(input_price=0.0, output_price=0.0, name="Mock"),
|
|
84
|
+
"llama_cpp": ModelPricing(input_price=0.0, output_price=0.0, name="Local"),
|
|
85
|
+
"transformers": ModelPricing(input_price=0.0, output_price=0.0, name="Local"),
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def get_model_pricing(model_id: str, backend: str = "") -> ModelPricing:
|
|
90
|
+
"""
|
|
91
|
+
Get pricing for a model.
|
|
92
|
+
|
|
93
|
+
Looks up in order:
|
|
94
|
+
1. Exact model_id match
|
|
95
|
+
2. Backend default
|
|
96
|
+
3. Zero pricing (local/unknown)
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
model_id: Model identifier
|
|
100
|
+
backend: Optional backend name
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
ModelPricing instance
|
|
104
|
+
"""
|
|
105
|
+
# Check exact model match
|
|
106
|
+
if model_id in MODEL_PRICING:
|
|
107
|
+
return MODEL_PRICING[model_id]
|
|
108
|
+
|
|
109
|
+
# Check backend default
|
|
110
|
+
if backend in BACKEND_DEFAULT_PRICING:
|
|
111
|
+
return BACKEND_DEFAULT_PRICING[backend]
|
|
112
|
+
|
|
113
|
+
# Default to free (local models)
|
|
114
|
+
return ModelPricing(name=model_id or "Unknown")
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# =============================================================================
|
|
118
|
+
# Cost Tracking
|
|
119
|
+
# =============================================================================
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@dataclass
|
|
123
|
+
class CostMetrics:
|
|
124
|
+
"""
|
|
125
|
+
Accumulated cost metrics for a session or query.
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
total_tokens_in: int = 0
|
|
129
|
+
"""Total input tokens used."""
|
|
130
|
+
|
|
131
|
+
total_tokens_out: int = 0
|
|
132
|
+
"""Total output tokens generated."""
|
|
133
|
+
|
|
134
|
+
total_cost_usd: float = 0.0
|
|
135
|
+
"""Total estimated cost in USD."""
|
|
136
|
+
|
|
137
|
+
calls: int = 0
|
|
138
|
+
"""Number of API/inference calls."""
|
|
139
|
+
|
|
140
|
+
latency_ms: float = 0.0
|
|
141
|
+
"""Total latency in milliseconds."""
|
|
142
|
+
|
|
143
|
+
# Per-slot breakdown
|
|
144
|
+
slot_metrics: dict[str, dict[str, Any]] = field(default_factory=dict)
|
|
145
|
+
"""Metrics broken down by slot."""
|
|
146
|
+
|
|
147
|
+
def add_call(
|
|
148
|
+
self,
|
|
149
|
+
tokens_in: int,
|
|
150
|
+
tokens_out: int,
|
|
151
|
+
cost_usd: float,
|
|
152
|
+
latency_ms: float = 0.0,
|
|
153
|
+
slot: str = "",
|
|
154
|
+
) -> None:
|
|
155
|
+
"""
|
|
156
|
+
Add metrics from a call.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
tokens_in: Input tokens for this call
|
|
160
|
+
tokens_out: Output tokens for this call
|
|
161
|
+
cost_usd: Cost in USD for this call
|
|
162
|
+
latency_ms: Latency in milliseconds
|
|
163
|
+
slot: Optional slot name (small/mid/big)
|
|
164
|
+
"""
|
|
165
|
+
self.total_tokens_in += tokens_in
|
|
166
|
+
self.total_tokens_out += tokens_out
|
|
167
|
+
self.total_cost_usd += cost_usd
|
|
168
|
+
self.calls += 1
|
|
169
|
+
self.latency_ms += latency_ms
|
|
170
|
+
|
|
171
|
+
if slot:
|
|
172
|
+
if slot not in self.slot_metrics:
|
|
173
|
+
self.slot_metrics[slot] = {
|
|
174
|
+
"tokens_in": 0,
|
|
175
|
+
"tokens_out": 0,
|
|
176
|
+
"cost_usd": 0.0,
|
|
177
|
+
"calls": 0,
|
|
178
|
+
}
|
|
179
|
+
self.slot_metrics[slot]["tokens_in"] += tokens_in
|
|
180
|
+
self.slot_metrics[slot]["tokens_out"] += tokens_out
|
|
181
|
+
self.slot_metrics[slot]["cost_usd"] += cost_usd
|
|
182
|
+
self.slot_metrics[slot]["calls"] += 1
|
|
183
|
+
|
|
184
|
+
@property
|
|
185
|
+
def total_tokens(self) -> int:
|
|
186
|
+
"""Total tokens (in + out)."""
|
|
187
|
+
return self.total_tokens_in + self.total_tokens_out
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def avg_latency_ms(self) -> float:
|
|
191
|
+
"""Average latency per call."""
|
|
192
|
+
if self.calls == 0:
|
|
193
|
+
return 0.0
|
|
194
|
+
return self.latency_ms / self.calls
|
|
195
|
+
|
|
196
|
+
def to_dict(self) -> dict[str, Any]:
|
|
197
|
+
"""Convert to dictionary for JSON serialization."""
|
|
198
|
+
return {
|
|
199
|
+
"total_tokens_in": self.total_tokens_in,
|
|
200
|
+
"total_tokens_out": self.total_tokens_out,
|
|
201
|
+
"total_tokens": self.total_tokens,
|
|
202
|
+
"total_cost_usd": round(self.total_cost_usd, 6),
|
|
203
|
+
"calls": self.calls,
|
|
204
|
+
"total_latency_ms": round(self.latency_ms, 2),
|
|
205
|
+
"avg_latency_ms": round(self.avg_latency_ms, 2),
|
|
206
|
+
"slot_metrics": self.slot_metrics,
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# =============================================================================
|
|
211
|
+
# Cost Estimation Functions
|
|
212
|
+
# =============================================================================
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def estimate_cost(
|
|
216
|
+
backend: str,
|
|
217
|
+
model_id: str,
|
|
218
|
+
tokens_in: int,
|
|
219
|
+
tokens_out: int,
|
|
220
|
+
) -> float:
|
|
221
|
+
"""
|
|
222
|
+
Estimate cost for a generation call.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
backend: Backend name (e.g., 'openai', 'llama_cpp')
|
|
226
|
+
model_id: Model identifier
|
|
227
|
+
tokens_in: Number of input tokens
|
|
228
|
+
tokens_out: Number of output tokens
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
Estimated cost in USD (0.0 for local models)
|
|
232
|
+
"""
|
|
233
|
+
pricing = get_model_pricing(model_id, backend)
|
|
234
|
+
return pricing.cost_for_tokens(tokens_in, tokens_out)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def estimate_query_cost(
|
|
238
|
+
backend: str,
|
|
239
|
+
model_id: str,
|
|
240
|
+
prompt_tokens: int,
|
|
241
|
+
estimated_output_tokens: int = 500,
|
|
242
|
+
) -> float:
|
|
243
|
+
"""
|
|
244
|
+
Estimate cost for a query before running it.
|
|
245
|
+
|
|
246
|
+
Useful for budget checks before making API calls.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
backend: Backend name
|
|
250
|
+
model_id: Model identifier
|
|
251
|
+
prompt_tokens: Number of input tokens
|
|
252
|
+
estimated_output_tokens: Expected output tokens
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
Estimated cost in USD
|
|
256
|
+
"""
|
|
257
|
+
return estimate_cost(backend, model_id, prompt_tokens, estimated_output_tokens)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# =============================================================================
|
|
261
|
+
# FLOP Estimation (for local models)
|
|
262
|
+
# =============================================================================
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def estimate_flops(
|
|
266
|
+
model_params_billions: float,
|
|
267
|
+
tokens: int,
|
|
268
|
+
is_generation: bool = True,
|
|
269
|
+
) -> float:
|
|
270
|
+
"""
|
|
271
|
+
Estimate FLOPs for local model inference.
|
|
272
|
+
|
|
273
|
+
Uses the approximation: FLOPs ≈ 2 * params * tokens
|
|
274
|
+
For generation, each token requires a forward pass.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
model_params_billions: Model size in billions of parameters
|
|
278
|
+
tokens: Number of tokens processed/generated
|
|
279
|
+
is_generation: Whether this is generation (vs prefill)
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
Estimated FLOPs
|
|
283
|
+
"""
|
|
284
|
+
params = model_params_billions * 1e9
|
|
285
|
+
|
|
286
|
+
if is_generation:
|
|
287
|
+
# Each generated token requires ~2 * params FLOPs
|
|
288
|
+
return 2 * params * tokens
|
|
289
|
+
else:
|
|
290
|
+
# Prefill is more efficient (parallelized)
|
|
291
|
+
return 2 * params * tokens
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def estimate_local_inference_time_ms(
|
|
295
|
+
model_params_billions: float,
|
|
296
|
+
tokens_in: int,
|
|
297
|
+
tokens_out: int,
|
|
298
|
+
tflops: float = 100.0,
|
|
299
|
+
) -> float:
|
|
300
|
+
"""
|
|
301
|
+
Estimate inference time for local models.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
model_params_billions: Model size in billions
|
|
305
|
+
tokens_in: Input tokens (prefill)
|
|
306
|
+
tokens_out: Output tokens (generation)
|
|
307
|
+
tflops: Hardware capability in TFLOPS
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
Estimated time in milliseconds
|
|
311
|
+
"""
|
|
312
|
+
# Prefill FLOPs
|
|
313
|
+
prefill_flops = estimate_flops(model_params_billions, tokens_in, is_generation=False)
|
|
314
|
+
|
|
315
|
+
# Generation FLOPs
|
|
316
|
+
gen_flops = estimate_flops(model_params_billions, tokens_out, is_generation=True)
|
|
317
|
+
|
|
318
|
+
total_flops = prefill_flops + gen_flops
|
|
319
|
+
|
|
320
|
+
# Convert TFLOPS to FLOPS
|
|
321
|
+
flops_per_second = tflops * 1e12
|
|
322
|
+
|
|
323
|
+
# Time in seconds, then milliseconds
|
|
324
|
+
time_seconds = total_flops / flops_per_second
|
|
325
|
+
return time_seconds * 1000
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
# =============================================================================
|
|
329
|
+
# Model Size Registry
|
|
330
|
+
# =============================================================================
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
# Known model sizes in billions of parameters
|
|
334
|
+
MODEL_SIZES: dict[str, float] = {
|
|
335
|
+
# Qwen
|
|
336
|
+
"qwen2.5-0.5b": 0.5,
|
|
337
|
+
"qwen2.5-1.5b": 1.5,
|
|
338
|
+
"qwen2.5-3b": 3.0,
|
|
339
|
+
"qwen2.5-7b": 7.0,
|
|
340
|
+
"qwen2.5-14b": 14.0,
|
|
341
|
+
"qwen2.5-32b": 32.0,
|
|
342
|
+
"qwen2.5-72b": 72.0,
|
|
343
|
+
|
|
344
|
+
# Llama
|
|
345
|
+
"llama-3.2-1b": 1.0,
|
|
346
|
+
"llama-3.2-3b": 3.0,
|
|
347
|
+
"llama-3.1-8b": 8.0,
|
|
348
|
+
"llama-3.1-70b": 70.0,
|
|
349
|
+
|
|
350
|
+
# Mistral
|
|
351
|
+
"mistral-7b": 7.0,
|
|
352
|
+
"mixtral-8x7b": 47.0, # Sparse
|
|
353
|
+
|
|
354
|
+
# Stubs
|
|
355
|
+
"stub": 0.0,
|
|
356
|
+
"mock": 0.0,
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def get_model_size(model_id: str) -> float:
|
|
361
|
+
"""
|
|
362
|
+
Get model size in billions of parameters.
|
|
363
|
+
|
|
364
|
+
Tries to parse from model_id if not in registry.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
model_id: Model identifier
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
Model size in billions (0.0 if unknown)
|
|
371
|
+
"""
|
|
372
|
+
model_lower = model_id.lower()
|
|
373
|
+
|
|
374
|
+
# Check registry
|
|
375
|
+
for key, size in MODEL_SIZES.items():
|
|
376
|
+
if key in model_lower:
|
|
377
|
+
return size
|
|
378
|
+
|
|
379
|
+
# Try to parse from name (e.g., "7b", "14b")
|
|
380
|
+
import re
|
|
381
|
+
match = re.search(r'(\d+(?:\.\d+)?)\s*b', model_lower)
|
|
382
|
+
if match:
|
|
383
|
+
return float(match.group(1))
|
|
384
|
+
|
|
385
|
+
return 0.0
|