parishad 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. parishad/__init__.py +70 -0
  2. parishad/__main__.py +10 -0
  3. parishad/checker/__init__.py +25 -0
  4. parishad/checker/deterministic.py +644 -0
  5. parishad/checker/ensemble.py +496 -0
  6. parishad/checker/retrieval.py +546 -0
  7. parishad/cli/__init__.py +6 -0
  8. parishad/cli/code.py +3254 -0
  9. parishad/cli/main.py +1158 -0
  10. parishad/cli/prarambh.py +99 -0
  11. parishad/cli/sthapana.py +368 -0
  12. parishad/config/modes.py +139 -0
  13. parishad/config/pipeline.core.yaml +128 -0
  14. parishad/config/pipeline.extended.yaml +172 -0
  15. parishad/config/pipeline.fast.yaml +89 -0
  16. parishad/config/user_config.py +115 -0
  17. parishad/data/catalog.py +118 -0
  18. parishad/data/models.json +108 -0
  19. parishad/memory/__init__.py +79 -0
  20. parishad/models/__init__.py +181 -0
  21. parishad/models/backends/__init__.py +247 -0
  22. parishad/models/backends/base.py +211 -0
  23. parishad/models/backends/huggingface.py +318 -0
  24. parishad/models/backends/llama_cpp.py +239 -0
  25. parishad/models/backends/mlx_lm.py +141 -0
  26. parishad/models/backends/ollama.py +253 -0
  27. parishad/models/backends/openai_api.py +193 -0
  28. parishad/models/backends/transformers_hf.py +198 -0
  29. parishad/models/costs.py +385 -0
  30. parishad/models/downloader.py +1557 -0
  31. parishad/models/optimizations.py +871 -0
  32. parishad/models/profiles.py +610 -0
  33. parishad/models/reliability.py +876 -0
  34. parishad/models/runner.py +651 -0
  35. parishad/models/tokenization.py +287 -0
  36. parishad/orchestrator/__init__.py +24 -0
  37. parishad/orchestrator/config_loader.py +210 -0
  38. parishad/orchestrator/engine.py +1113 -0
  39. parishad/orchestrator/exceptions.py +14 -0
  40. parishad/roles/__init__.py +71 -0
  41. parishad/roles/base.py +712 -0
  42. parishad/roles/dandadhyaksha.py +163 -0
  43. parishad/roles/darbari.py +246 -0
  44. parishad/roles/majumdar.py +274 -0
  45. parishad/roles/pantapradhan.py +150 -0
  46. parishad/roles/prerak.py +357 -0
  47. parishad/roles/raja.py +345 -0
  48. parishad/roles/sacheev.py +203 -0
  49. parishad/roles/sainik.py +427 -0
  50. parishad/roles/sar_senapati.py +164 -0
  51. parishad/roles/vidushak.py +69 -0
  52. parishad/tools/__init__.py +7 -0
  53. parishad/tools/base.py +57 -0
  54. parishad/tools/fs.py +110 -0
  55. parishad/tools/perception.py +96 -0
  56. parishad/tools/retrieval.py +74 -0
  57. parishad/tools/shell.py +103 -0
  58. parishad/utils/__init__.py +7 -0
  59. parishad/utils/hardware.py +122 -0
  60. parishad/utils/logging.py +79 -0
  61. parishad/utils/scanner.py +164 -0
  62. parishad/utils/text.py +61 -0
  63. parishad/utils/tracing.py +133 -0
  64. parishad-0.1.0.dist-info/METADATA +256 -0
  65. parishad-0.1.0.dist-info/RECORD +68 -0
  66. parishad-0.1.0.dist-info/WHEEL +4 -0
  67. parishad-0.1.0.dist-info/entry_points.txt +2 -0
  68. parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,385 @@
1
+ """
2
+ Cost estimation for Parishad model inference.
3
+
4
+ Provides cost estimation for different backends and models:
5
+ - API-based models (OpenAI, Anthropic) have per-token costs
6
+ - Local models (llama.cpp, transformers) are "free" (compute cost only)
7
+
8
+ This module tracks:
9
+ - Dollar cost estimates for API models
10
+ - Token usage metrics
11
+ - Approximate FLOP estimates for local models
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from dataclasses import dataclass, field
17
+ from typing import Any
18
+
19
+
20
+ # =============================================================================
21
+ # Cost Data
22
+ # =============================================================================
23
+
24
+
25
+ @dataclass
26
+ class ModelPricing:
27
+ """
28
+ Pricing information for a model.
29
+
30
+ Prices are in USD per 1M tokens.
31
+ """
32
+
33
+ input_price: float = 0.0
34
+ """Price per 1M input tokens in USD."""
35
+
36
+ output_price: float = 0.0
37
+ """Price per 1M output tokens in USD."""
38
+
39
+ name: str = ""
40
+ """Model name for display."""
41
+
42
+ def cost_for_tokens(self, tokens_in: int, tokens_out: int) -> float:
43
+ """
44
+ Calculate cost for a given token usage.
45
+
46
+ Args:
47
+ tokens_in: Number of input tokens
48
+ tokens_out: Number of output tokens
49
+
50
+ Returns:
51
+ Estimated cost in USD
52
+ """
53
+ input_cost = (tokens_in / 1_000_000) * self.input_price
54
+ output_cost = (tokens_out / 1_000_000) * self.output_price
55
+ return input_cost + output_cost
56
+
57
+
58
+ # Default pricing for known models (as of Dec 2024)
59
+ # Prices in USD per 1M tokens
60
+ MODEL_PRICING: dict[str, ModelPricing] = {
61
+ # OpenAI
62
+ "gpt-4o": ModelPricing(input_price=2.50, output_price=10.00, name="GPT-4o"),
63
+ "gpt-4o-mini": ModelPricing(input_price=0.15, output_price=0.60, name="GPT-4o Mini"),
64
+ "gpt-4-turbo": ModelPricing(input_price=10.00, output_price=30.00, name="GPT-4 Turbo"),
65
+ "gpt-4": ModelPricing(input_price=30.00, output_price=60.00, name="GPT-4"),
66
+ "gpt-3.5-turbo": ModelPricing(input_price=0.50, output_price=1.50, name="GPT-3.5 Turbo"),
67
+
68
+ # Anthropic
69
+ "claude-3-5-sonnet-20241022": ModelPricing(input_price=3.00, output_price=15.00, name="Claude 3.5 Sonnet"),
70
+ "claude-3-opus-20240229": ModelPricing(input_price=15.00, output_price=75.00, name="Claude 3 Opus"),
71
+ "claude-3-haiku-20240307": ModelPricing(input_price=0.25, output_price=1.25, name="Claude 3 Haiku"),
72
+
73
+ # Local models (free)
74
+ "stub": ModelPricing(input_price=0.0, output_price=0.0, name="Stub"),
75
+ "mock": ModelPricing(input_price=0.0, output_price=0.0, name="Mock"),
76
+ }
77
+
78
+ # Backend-level defaults (for unknown models)
79
+ BACKEND_DEFAULT_PRICING: dict[str, ModelPricing] = {
80
+ "openai": ModelPricing(input_price=0.50, output_price=1.50, name="OpenAI Default"),
81
+ "anthropic": ModelPricing(input_price=3.00, output_price=15.00, name="Anthropic Default"),
82
+ "stub": ModelPricing(input_price=0.0, output_price=0.0, name="Stub"),
83
+ "mock": ModelPricing(input_price=0.0, output_price=0.0, name="Mock"),
84
+ "llama_cpp": ModelPricing(input_price=0.0, output_price=0.0, name="Local"),
85
+ "transformers": ModelPricing(input_price=0.0, output_price=0.0, name="Local"),
86
+ }
87
+
88
+
89
+ def get_model_pricing(model_id: str, backend: str = "") -> ModelPricing:
90
+ """
91
+ Get pricing for a model.
92
+
93
+ Looks up in order:
94
+ 1. Exact model_id match
95
+ 2. Backend default
96
+ 3. Zero pricing (local/unknown)
97
+
98
+ Args:
99
+ model_id: Model identifier
100
+ backend: Optional backend name
101
+
102
+ Returns:
103
+ ModelPricing instance
104
+ """
105
+ # Check exact model match
106
+ if model_id in MODEL_PRICING:
107
+ return MODEL_PRICING[model_id]
108
+
109
+ # Check backend default
110
+ if backend in BACKEND_DEFAULT_PRICING:
111
+ return BACKEND_DEFAULT_PRICING[backend]
112
+
113
+ # Default to free (local models)
114
+ return ModelPricing(name=model_id or "Unknown")
115
+
116
+
117
+ # =============================================================================
118
+ # Cost Tracking
119
+ # =============================================================================
120
+
121
+
122
+ @dataclass
123
+ class CostMetrics:
124
+ """
125
+ Accumulated cost metrics for a session or query.
126
+ """
127
+
128
+ total_tokens_in: int = 0
129
+ """Total input tokens used."""
130
+
131
+ total_tokens_out: int = 0
132
+ """Total output tokens generated."""
133
+
134
+ total_cost_usd: float = 0.0
135
+ """Total estimated cost in USD."""
136
+
137
+ calls: int = 0
138
+ """Number of API/inference calls."""
139
+
140
+ latency_ms: float = 0.0
141
+ """Total latency in milliseconds."""
142
+
143
+ # Per-slot breakdown
144
+ slot_metrics: dict[str, dict[str, Any]] = field(default_factory=dict)
145
+ """Metrics broken down by slot."""
146
+
147
+ def add_call(
148
+ self,
149
+ tokens_in: int,
150
+ tokens_out: int,
151
+ cost_usd: float,
152
+ latency_ms: float = 0.0,
153
+ slot: str = "",
154
+ ) -> None:
155
+ """
156
+ Add metrics from a call.
157
+
158
+ Args:
159
+ tokens_in: Input tokens for this call
160
+ tokens_out: Output tokens for this call
161
+ cost_usd: Cost in USD for this call
162
+ latency_ms: Latency in milliseconds
163
+ slot: Optional slot name (small/mid/big)
164
+ """
165
+ self.total_tokens_in += tokens_in
166
+ self.total_tokens_out += tokens_out
167
+ self.total_cost_usd += cost_usd
168
+ self.calls += 1
169
+ self.latency_ms += latency_ms
170
+
171
+ if slot:
172
+ if slot not in self.slot_metrics:
173
+ self.slot_metrics[slot] = {
174
+ "tokens_in": 0,
175
+ "tokens_out": 0,
176
+ "cost_usd": 0.0,
177
+ "calls": 0,
178
+ }
179
+ self.slot_metrics[slot]["tokens_in"] += tokens_in
180
+ self.slot_metrics[slot]["tokens_out"] += tokens_out
181
+ self.slot_metrics[slot]["cost_usd"] += cost_usd
182
+ self.slot_metrics[slot]["calls"] += 1
183
+
184
+ @property
185
+ def total_tokens(self) -> int:
186
+ """Total tokens (in + out)."""
187
+ return self.total_tokens_in + self.total_tokens_out
188
+
189
+ @property
190
+ def avg_latency_ms(self) -> float:
191
+ """Average latency per call."""
192
+ if self.calls == 0:
193
+ return 0.0
194
+ return self.latency_ms / self.calls
195
+
196
+ def to_dict(self) -> dict[str, Any]:
197
+ """Convert to dictionary for JSON serialization."""
198
+ return {
199
+ "total_tokens_in": self.total_tokens_in,
200
+ "total_tokens_out": self.total_tokens_out,
201
+ "total_tokens": self.total_tokens,
202
+ "total_cost_usd": round(self.total_cost_usd, 6),
203
+ "calls": self.calls,
204
+ "total_latency_ms": round(self.latency_ms, 2),
205
+ "avg_latency_ms": round(self.avg_latency_ms, 2),
206
+ "slot_metrics": self.slot_metrics,
207
+ }
208
+
209
+
210
+ # =============================================================================
211
+ # Cost Estimation Functions
212
+ # =============================================================================
213
+
214
+
215
+ def estimate_cost(
216
+ backend: str,
217
+ model_id: str,
218
+ tokens_in: int,
219
+ tokens_out: int,
220
+ ) -> float:
221
+ """
222
+ Estimate cost for a generation call.
223
+
224
+ Args:
225
+ backend: Backend name (e.g., 'openai', 'llama_cpp')
226
+ model_id: Model identifier
227
+ tokens_in: Number of input tokens
228
+ tokens_out: Number of output tokens
229
+
230
+ Returns:
231
+ Estimated cost in USD (0.0 for local models)
232
+ """
233
+ pricing = get_model_pricing(model_id, backend)
234
+ return pricing.cost_for_tokens(tokens_in, tokens_out)
235
+
236
+
237
+ def estimate_query_cost(
238
+ backend: str,
239
+ model_id: str,
240
+ prompt_tokens: int,
241
+ estimated_output_tokens: int = 500,
242
+ ) -> float:
243
+ """
244
+ Estimate cost for a query before running it.
245
+
246
+ Useful for budget checks before making API calls.
247
+
248
+ Args:
249
+ backend: Backend name
250
+ model_id: Model identifier
251
+ prompt_tokens: Number of input tokens
252
+ estimated_output_tokens: Expected output tokens
253
+
254
+ Returns:
255
+ Estimated cost in USD
256
+ """
257
+ return estimate_cost(backend, model_id, prompt_tokens, estimated_output_tokens)
258
+
259
+
260
+ # =============================================================================
261
+ # FLOP Estimation (for local models)
262
+ # =============================================================================
263
+
264
+
265
+ def estimate_flops(
266
+ model_params_billions: float,
267
+ tokens: int,
268
+ is_generation: bool = True,
269
+ ) -> float:
270
+ """
271
+ Estimate FLOPs for local model inference.
272
+
273
+ Uses the approximation: FLOPs ≈ 2 * params * tokens
274
+ For generation, each token requires a forward pass.
275
+
276
+ Args:
277
+ model_params_billions: Model size in billions of parameters
278
+ tokens: Number of tokens processed/generated
279
+ is_generation: Whether this is generation (vs prefill)
280
+
281
+ Returns:
282
+ Estimated FLOPs
283
+ """
284
+ params = model_params_billions * 1e9
285
+
286
+ if is_generation:
287
+ # Each generated token requires ~2 * params FLOPs
288
+ return 2 * params * tokens
289
+ else:
290
+ # Prefill is more efficient (parallelized)
291
+ return 2 * params * tokens
292
+
293
+
294
+ def estimate_local_inference_time_ms(
295
+ model_params_billions: float,
296
+ tokens_in: int,
297
+ tokens_out: int,
298
+ tflops: float = 100.0,
299
+ ) -> float:
300
+ """
301
+ Estimate inference time for local models.
302
+
303
+ Args:
304
+ model_params_billions: Model size in billions
305
+ tokens_in: Input tokens (prefill)
306
+ tokens_out: Output tokens (generation)
307
+ tflops: Hardware capability in TFLOPS
308
+
309
+ Returns:
310
+ Estimated time in milliseconds
311
+ """
312
+ # Prefill FLOPs
313
+ prefill_flops = estimate_flops(model_params_billions, tokens_in, is_generation=False)
314
+
315
+ # Generation FLOPs
316
+ gen_flops = estimate_flops(model_params_billions, tokens_out, is_generation=True)
317
+
318
+ total_flops = prefill_flops + gen_flops
319
+
320
+ # Convert TFLOPS to FLOPS
321
+ flops_per_second = tflops * 1e12
322
+
323
+ # Time in seconds, then milliseconds
324
+ time_seconds = total_flops / flops_per_second
325
+ return time_seconds * 1000
326
+
327
+
328
+ # =============================================================================
329
+ # Model Size Registry
330
+ # =============================================================================
331
+
332
+
333
+ # Known model sizes in billions of parameters
334
+ MODEL_SIZES: dict[str, float] = {
335
+ # Qwen
336
+ "qwen2.5-0.5b": 0.5,
337
+ "qwen2.5-1.5b": 1.5,
338
+ "qwen2.5-3b": 3.0,
339
+ "qwen2.5-7b": 7.0,
340
+ "qwen2.5-14b": 14.0,
341
+ "qwen2.5-32b": 32.0,
342
+ "qwen2.5-72b": 72.0,
343
+
344
+ # Llama
345
+ "llama-3.2-1b": 1.0,
346
+ "llama-3.2-3b": 3.0,
347
+ "llama-3.1-8b": 8.0,
348
+ "llama-3.1-70b": 70.0,
349
+
350
+ # Mistral
351
+ "mistral-7b": 7.0,
352
+ "mixtral-8x7b": 47.0, # Sparse
353
+
354
+ # Stubs
355
+ "stub": 0.0,
356
+ "mock": 0.0,
357
+ }
358
+
359
+
360
+ def get_model_size(model_id: str) -> float:
361
+ """
362
+ Get model size in billions of parameters.
363
+
364
+ Tries to parse from model_id if not in registry.
365
+
366
+ Args:
367
+ model_id: Model identifier
368
+
369
+ Returns:
370
+ Model size in billions (0.0 if unknown)
371
+ """
372
+ model_lower = model_id.lower()
373
+
374
+ # Check registry
375
+ for key, size in MODEL_SIZES.items():
376
+ if key in model_lower:
377
+ return size
378
+
379
+ # Try to parse from name (e.g., "7b", "14b")
380
+ import re
381
+ match = re.search(r'(\d+(?:\.\d+)?)\s*b', model_lower)
382
+ if match:
383
+ return float(match.group(1))
384
+
385
+ return 0.0