parishad 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. parishad/__init__.py +70 -0
  2. parishad/__main__.py +10 -0
  3. parishad/checker/__init__.py +25 -0
  4. parishad/checker/deterministic.py +644 -0
  5. parishad/checker/ensemble.py +496 -0
  6. parishad/checker/retrieval.py +546 -0
  7. parishad/cli/__init__.py +6 -0
  8. parishad/cli/code.py +3254 -0
  9. parishad/cli/main.py +1158 -0
  10. parishad/cli/prarambh.py +99 -0
  11. parishad/cli/sthapana.py +368 -0
  12. parishad/config/modes.py +139 -0
  13. parishad/config/pipeline.core.yaml +128 -0
  14. parishad/config/pipeline.extended.yaml +172 -0
  15. parishad/config/pipeline.fast.yaml +89 -0
  16. parishad/config/user_config.py +115 -0
  17. parishad/data/catalog.py +118 -0
  18. parishad/data/models.json +108 -0
  19. parishad/memory/__init__.py +79 -0
  20. parishad/models/__init__.py +181 -0
  21. parishad/models/backends/__init__.py +247 -0
  22. parishad/models/backends/base.py +211 -0
  23. parishad/models/backends/huggingface.py +318 -0
  24. parishad/models/backends/llama_cpp.py +239 -0
  25. parishad/models/backends/mlx_lm.py +141 -0
  26. parishad/models/backends/ollama.py +253 -0
  27. parishad/models/backends/openai_api.py +193 -0
  28. parishad/models/backends/transformers_hf.py +198 -0
  29. parishad/models/costs.py +385 -0
  30. parishad/models/downloader.py +1557 -0
  31. parishad/models/optimizations.py +871 -0
  32. parishad/models/profiles.py +610 -0
  33. parishad/models/reliability.py +876 -0
  34. parishad/models/runner.py +651 -0
  35. parishad/models/tokenization.py +287 -0
  36. parishad/orchestrator/__init__.py +24 -0
  37. parishad/orchestrator/config_loader.py +210 -0
  38. parishad/orchestrator/engine.py +1113 -0
  39. parishad/orchestrator/exceptions.py +14 -0
  40. parishad/roles/__init__.py +71 -0
  41. parishad/roles/base.py +712 -0
  42. parishad/roles/dandadhyaksha.py +163 -0
  43. parishad/roles/darbari.py +246 -0
  44. parishad/roles/majumdar.py +274 -0
  45. parishad/roles/pantapradhan.py +150 -0
  46. parishad/roles/prerak.py +357 -0
  47. parishad/roles/raja.py +345 -0
  48. parishad/roles/sacheev.py +203 -0
  49. parishad/roles/sainik.py +427 -0
  50. parishad/roles/sar_senapati.py +164 -0
  51. parishad/roles/vidushak.py +69 -0
  52. parishad/tools/__init__.py +7 -0
  53. parishad/tools/base.py +57 -0
  54. parishad/tools/fs.py +110 -0
  55. parishad/tools/perception.py +96 -0
  56. parishad/tools/retrieval.py +74 -0
  57. parishad/tools/shell.py +103 -0
  58. parishad/utils/__init__.py +7 -0
  59. parishad/utils/hardware.py +122 -0
  60. parishad/utils/logging.py +79 -0
  61. parishad/utils/scanner.py +164 -0
  62. parishad/utils/text.py +61 -0
  63. parishad/utils/tracing.py +133 -0
  64. parishad-0.1.0.dist-info/METADATA +256 -0
  65. parishad-0.1.0.dist-info/RECORD +68 -0
  66. parishad-0.1.0.dist-info/WHEEL +4 -0
  67. parishad-0.1.0.dist-info/entry_points.txt +2 -0
  68. parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,211 @@
1
+ """
2
+ Base classes and types for Parishad backends.
3
+
4
+ This module contains:
5
+ - BackendError: Exception for backend failures
6
+ - BackendConfig: Configuration dataclass
7
+ - BackendResult: Result dataclass
8
+ - ModelBackend: Protocol for backend implementations
9
+ - BaseBackend: Abstract base class
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import logging
15
+ from abc import ABC, abstractmethod
16
+ from dataclasses import dataclass, field
17
+ from typing import Any, Protocol, runtime_checkable
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class BackendError(Exception):
23
+ """
24
+ Raised when a backend operation fails.
25
+
26
+ Attributes:
27
+ backend_name: Name of the backend that failed
28
+ model_id: Model identifier (if known)
29
+ original_error: The underlying exception
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ message: str,
35
+ backend_name: str = "",
36
+ model_id: str = "",
37
+ original_error: Exception | None = None,
38
+ ):
39
+ super().__init__(message)
40
+ self.backend_name = backend_name
41
+ self.model_id = model_id
42
+ self.original_error = original_error
43
+
44
+
45
+ @dataclass
46
+ class BackendConfig:
47
+ """
48
+ Configuration for a model backend.
49
+
50
+ This is a unified config structure that backends can use.
51
+ Backend-specific options go in `extra`.
52
+ """
53
+
54
+ model_id: str
55
+ """Model identifier (path, HuggingFace ID, or API model name)."""
56
+
57
+ context_length: int = 4096
58
+ """Maximum context window size in tokens."""
59
+
60
+ temperature: float = 0.5
61
+ """Default sampling temperature."""
62
+
63
+ top_p: float = 0.9
64
+ """Default nucleus sampling parameter."""
65
+
66
+ max_tokens: int = 1024
67
+ """Default maximum tokens to generate."""
68
+
69
+ stop: list[str] | None = None
70
+ """Default stop sequences."""
71
+
72
+ timeout: float = 120.0
73
+ """Request timeout in seconds."""
74
+
75
+ extra: dict[str, Any] = field(default_factory=dict)
76
+ """Backend-specific options (e.g., n_gpu_layers for llama.cpp)."""
77
+
78
+
79
+ @dataclass
80
+ class BackendResult:
81
+ """
82
+ Result from a backend generation call.
83
+
84
+ All backends must return this structure for consistent handling.
85
+ """
86
+
87
+ text: str
88
+ """Generated text content."""
89
+
90
+ tokens_in: int
91
+ """Number of input/prompt tokens."""
92
+
93
+ tokens_out: int
94
+ """Number of output/generated tokens."""
95
+
96
+ model_id: str = ""
97
+ """Model identifier used for generation."""
98
+
99
+ finish_reason: str = "stop"
100
+ """Why generation stopped: 'stop', 'length', 'error'."""
101
+
102
+ latency_ms: float = 0.0
103
+ """Generation latency in milliseconds."""
104
+
105
+ extra: dict[str, Any] = field(default_factory=dict)
106
+ """Backend-specific metadata."""
107
+
108
+
109
+ @runtime_checkable
110
+ class ModelBackend(Protocol):
111
+ """
112
+ Protocol for model backend implementations.
113
+
114
+ All backends must implement these methods to be usable by ModelRunner.
115
+ """
116
+
117
+ @property
118
+ def name(self) -> str:
119
+ """Backend name (e.g., 'llama_cpp', 'openai', 'stub')."""
120
+ ...
121
+
122
+ @property
123
+ def is_loaded(self) -> bool:
124
+ """Whether the backend is ready to generate."""
125
+ ...
126
+
127
+ @property
128
+ def model_id(self) -> str:
129
+ """Current model identifier."""
130
+ ...
131
+
132
+ def load(self, config: BackendConfig) -> None:
133
+ """Load/initialize the backend with the given config."""
134
+ ...
135
+
136
+ def generate(
137
+ self,
138
+ prompt: str,
139
+ max_tokens: int,
140
+ temperature: float,
141
+ top_p: float,
142
+ stop: list[str] | None = None,
143
+ ) -> BackendResult:
144
+ """Generate text completion."""
145
+ ...
146
+
147
+ def unload(self) -> None:
148
+ """Unload the model to free resources."""
149
+ ...
150
+
151
+
152
+ class BaseBackend(ABC):
153
+ """
154
+ Abstract base class for backend implementations.
155
+ """
156
+
157
+ _name: str = "base"
158
+ _model_id: str = ""
159
+ _loaded: bool = False
160
+ _config: BackendConfig | None = None
161
+
162
+ @property
163
+ def name(self) -> str:
164
+ """Backend name."""
165
+ return self._name
166
+
167
+ @property
168
+ def is_loaded(self) -> bool:
169
+ """Whether backend is ready."""
170
+ return self._loaded
171
+
172
+ @property
173
+ def model_id(self) -> str:
174
+ """Current model ID."""
175
+ return self._model_id
176
+
177
+ @abstractmethod
178
+ def load(self, config: BackendConfig) -> None:
179
+ """Load the backend. Must be implemented by subclasses."""
180
+ pass
181
+
182
+ @abstractmethod
183
+ def generate(
184
+ self,
185
+ prompt: str,
186
+ max_tokens: int,
187
+ temperature: float,
188
+ top_p: float,
189
+ stop: list[str] | None = None,
190
+ ) -> BackendResult:
191
+ """Generate text. Must be implemented by subclasses."""
192
+ pass
193
+
194
+ def unload(self) -> None:
195
+ """Default unload implementation."""
196
+ self._loaded = False
197
+ self._model_id = ""
198
+ self._config = None
199
+
200
+ def _estimate_tokens(self, text: str) -> int:
201
+ """
202
+ Cheap token estimation heuristic.
203
+
204
+ Uses word count * 1.3 as a rough approximation.
205
+ Override in subclasses for more accurate counting.
206
+ """
207
+ if not text:
208
+ return 0
209
+ # Rough approximation: ~1.3 tokens per word for English
210
+ words = len(text.split())
211
+ return int(words * 1.3)
@@ -0,0 +1,318 @@
1
+ """
2
+ HuggingFace backends for inference.
3
+
4
+ Provides:
5
+ - HuggingFaceBackend: Uses HuggingFace Inference API (cloud)
6
+ - HuggingFaceLocalBackend: Uses local transformers (alias for TransformersBackend)
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import os
13
+ import time
14
+
15
+ from .base import BackendConfig, BackendError, BackendResult, BaseBackend
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Lazy imports
20
+ _huggingface_hub = None
21
+
22
+
23
+ def _get_huggingface_hub():
24
+ """Lazy import of huggingface_hub."""
25
+ global _huggingface_hub
26
+ if _huggingface_hub is None:
27
+ try:
28
+ import huggingface_hub
29
+ _huggingface_hub = huggingface_hub
30
+ except ImportError:
31
+ raise ImportError(
32
+ "huggingface_hub is required for HuggingFaceBackend. "
33
+ "Install with: pip install huggingface_hub"
34
+ )
35
+ return _huggingface_hub
36
+
37
+
38
+ class HuggingFaceBackend(BaseBackend):
39
+ """
40
+ Backend for HuggingFace Inference API (cloud-based).
41
+
42
+ Uses HuggingFace's serverless Inference API or dedicated Inference Endpoints.
43
+ Requires HF_TOKEN environment variable or token in config.
44
+ """
45
+
46
+ _name = "huggingface"
47
+
48
+ def __init__(self):
49
+ """Initialize HuggingFaceBackend."""
50
+ super().__init__()
51
+ self._client = None
52
+
53
+ def load(self, config: BackendConfig) -> None:
54
+ """Initialize HuggingFace Inference client."""
55
+ hf = _get_huggingface_hub()
56
+
57
+ extra = config.extra or {}
58
+
59
+ # Get token
60
+ token = extra.get("token") or os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
61
+
62
+ if not token:
63
+ logger.warning(
64
+ "No HuggingFace token found. Some models may not be accessible. "
65
+ "Set HF_TOKEN environment variable."
66
+ )
67
+
68
+ try:
69
+ # Check if it's an Inference Endpoint URL or model ID
70
+ model_id = config.model_id
71
+
72
+ if model_id.startswith("https://"):
73
+ # Dedicated Inference Endpoint
74
+ self._client = hf.InferenceClient(
75
+ model=model_id,
76
+ token=token,
77
+ timeout=config.timeout,
78
+ )
79
+ logger.info(f"✅ Connected to HuggingFace Inference Endpoint")
80
+ else:
81
+ # Serverless Inference API
82
+ self._client = hf.InferenceClient(
83
+ model=model_id,
84
+ token=token,
85
+ timeout=config.timeout,
86
+ )
87
+ logger.info(f"✅ Using HuggingFace Serverless API for {model_id}")
88
+
89
+ self._config = config
90
+ self._model_id = model_id
91
+ self._loaded = True
92
+
93
+ except Exception as e:
94
+ raise BackendError(
95
+ f"Failed to initialize HuggingFace client: {e}",
96
+ backend_name=self._name,
97
+ model_id=config.model_id,
98
+ original_error=e,
99
+ )
100
+
101
+ def generate(
102
+ self,
103
+ prompt: str,
104
+ max_tokens: int,
105
+ temperature: float,
106
+ top_p: float,
107
+ stop: list[str] | None = None,
108
+ ) -> BackendResult:
109
+ """Generate text using HuggingFace Inference API."""
110
+ if not self._loaded or self._client is None:
111
+ raise BackendError(
112
+ "Client not initialized",
113
+ backend_name=self._name,
114
+ model_id=self._model_id,
115
+ )
116
+
117
+ start_time = time.perf_counter()
118
+
119
+ try:
120
+ # Use text_generation for LLMs
121
+ response = self._client.text_generation(
122
+ prompt=prompt,
123
+ max_new_tokens=max_tokens,
124
+ temperature=max(temperature, 0.01),
125
+ top_p=top_p,
126
+ stop_sequences=stop or [],
127
+ return_full_text=False, # Only return generated text
128
+ details=True, # Get token counts
129
+ )
130
+
131
+ # Extract text and details
132
+ if hasattr(response, 'generated_text'):
133
+ text = response.generated_text
134
+ tokens_out = response.details.generated_tokens if hasattr(response, 'details') else self._estimate_tokens(text)
135
+ finish_reason = response.details.finish_reason if hasattr(response, 'details') else "stop"
136
+ else:
137
+ # Simple string response
138
+ text = str(response)
139
+ tokens_out = self._estimate_tokens(text)
140
+ finish_reason = "stop"
141
+
142
+ tokens_in = self._estimate_tokens(prompt)
143
+ latency_ms = (time.perf_counter() - start_time) * 1000
144
+
145
+ return BackendResult(
146
+ text=text,
147
+ tokens_in=tokens_in,
148
+ tokens_out=tokens_out,
149
+ model_id=self._model_id,
150
+ finish_reason=finish_reason,
151
+ latency_ms=latency_ms,
152
+ )
153
+
154
+ except Exception as e:
155
+ # Check for common HF API errors
156
+ error_msg = str(e)
157
+ if "429" in error_msg:
158
+ error_msg = f"Rate limited by HuggingFace API. Try again later. {e}"
159
+ elif "401" in error_msg or "403" in error_msg:
160
+ error_msg = f"Authentication failed. Check your HF_TOKEN. {e}"
161
+ elif "Model is currently loading" in error_msg:
162
+ error_msg = f"Model is loading on HuggingFace servers. Retry in ~30s. {e}"
163
+
164
+ raise BackendError(
165
+ f"HuggingFace generation failed: {error_msg}",
166
+ backend_name=self._name,
167
+ model_id=self._model_id,
168
+ original_error=e,
169
+ )
170
+
171
+ def unload(self) -> None:
172
+ """Close the client."""
173
+ self._client = None
174
+ super().unload()
175
+
176
+
177
+ class HuggingFaceChatBackend(BaseBackend):
178
+ """
179
+ Backend for HuggingFace Inference API with chat/conversation support.
180
+
181
+ Uses the chat_completion endpoint for models that support it.
182
+ """
183
+
184
+ _name = "huggingface_chat"
185
+
186
+ def __init__(self):
187
+ """Initialize HuggingFaceChatBackend."""
188
+ super().__init__()
189
+ self._client = None
190
+
191
+ def load(self, config: BackendConfig) -> None:
192
+ """Initialize HuggingFace Inference client."""
193
+ hf = _get_huggingface_hub()
194
+
195
+ extra = config.extra or {}
196
+ token = extra.get("token") or os.environ.get("HF_TOKEN")
197
+
198
+ try:
199
+ self._client = hf.InferenceClient(
200
+ model=config.model_id,
201
+ token=token,
202
+ timeout=config.timeout,
203
+ )
204
+
205
+ self._config = config
206
+ self._model_id = config.model_id
207
+ self._loaded = True
208
+
209
+ except Exception as e:
210
+ raise BackendError(
211
+ f"Failed to initialize HuggingFace chat client: {e}",
212
+ backend_name=self._name,
213
+ model_id=config.model_id,
214
+ original_error=e,
215
+ )
216
+
217
+ def generate(
218
+ self,
219
+ prompt: str,
220
+ max_tokens: int,
221
+ temperature: float,
222
+ top_p: float,
223
+ stop: list[str] | None = None,
224
+ ) -> BackendResult:
225
+ """Generate text using HuggingFace chat completion."""
226
+ if not self._loaded or self._client is None:
227
+ raise BackendError(
228
+ "Client not initialized",
229
+ backend_name=self._name,
230
+ model_id=self._model_id,
231
+ )
232
+
233
+ start_time = time.perf_counter()
234
+
235
+ try:
236
+ # Parse prompt into messages
237
+ messages = self._parse_prompt_to_messages(prompt)
238
+
239
+ response = self._client.chat_completion(
240
+ messages=messages,
241
+ max_tokens=max_tokens,
242
+ temperature=max(temperature, 0.01),
243
+ top_p=top_p,
244
+ stop=stop,
245
+ )
246
+
247
+ # Extract response
248
+ choice = response.choices[0]
249
+ text = choice.message.content or ""
250
+ finish_reason = choice.finish_reason or "stop"
251
+
252
+ # Token counts
253
+ if hasattr(response, 'usage') and response.usage:
254
+ tokens_in = response.usage.prompt_tokens
255
+ tokens_out = response.usage.completion_tokens
256
+ else:
257
+ tokens_in = self._estimate_tokens(prompt)
258
+ tokens_out = self._estimate_tokens(text)
259
+
260
+ latency_ms = (time.perf_counter() - start_time) * 1000
261
+
262
+ return BackendResult(
263
+ text=text,
264
+ tokens_in=tokens_in,
265
+ tokens_out=tokens_out,
266
+ model_id=self._model_id,
267
+ finish_reason=finish_reason,
268
+ latency_ms=latency_ms,
269
+ )
270
+
271
+ except Exception as e:
272
+ raise BackendError(
273
+ f"HuggingFace chat generation failed: {e}",
274
+ backend_name=self._name,
275
+ model_id=self._model_id,
276
+ original_error=e,
277
+ )
278
+
279
+ def _parse_prompt_to_messages(self, prompt: str) -> list[dict]:
280
+ """Parse prompt string into message format."""
281
+ messages = []
282
+
283
+ # Try to parse Llama-3 format
284
+ if "<|start_header_id|>" in prompt:
285
+ parts = prompt.split("<|start_header_id|>")
286
+ for part in parts:
287
+ if part.startswith("system"):
288
+ content = part.split("<|end_header_id|>")[1].split("<|eot_id|>")[0].strip()
289
+ if content:
290
+ messages.append({"role": "system", "content": content})
291
+ elif part.startswith("user"):
292
+ content = part.split("<|end_header_id|>")[1].split("<|eot_id|>")[0].strip()
293
+ if content:
294
+ messages.append({"role": "user", "content": content})
295
+ elif part.startswith("assistant"):
296
+ content = part.split("<|end_header_id|>")[1].split("<|eot_id|>")[0].strip()
297
+ if content:
298
+ messages.append({"role": "assistant", "content": content})
299
+ elif "System:" in prompt and "User:" in prompt:
300
+ # Simple format
301
+ parts = prompt.split("User:", 1)
302
+ system = parts[0].replace("System:", "").strip()
303
+ user = parts[1].strip() if len(parts) > 1 else ""
304
+
305
+ if system:
306
+ messages.append({"role": "system", "content": system})
307
+ if user:
308
+ messages.append({"role": "user", "content": user})
309
+ else:
310
+ # Single user message
311
+ messages.append({"role": "user", "content": prompt})
312
+
313
+ return messages
314
+
315
+ def unload(self) -> None:
316
+ """Close client."""
317
+ self._client = None
318
+ super().unload()