parishad 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parishad/__init__.py +70 -0
- parishad/__main__.py +10 -0
- parishad/checker/__init__.py +25 -0
- parishad/checker/deterministic.py +644 -0
- parishad/checker/ensemble.py +496 -0
- parishad/checker/retrieval.py +546 -0
- parishad/cli/__init__.py +6 -0
- parishad/cli/code.py +3254 -0
- parishad/cli/main.py +1158 -0
- parishad/cli/prarambh.py +99 -0
- parishad/cli/sthapana.py +368 -0
- parishad/config/modes.py +139 -0
- parishad/config/pipeline.core.yaml +128 -0
- parishad/config/pipeline.extended.yaml +172 -0
- parishad/config/pipeline.fast.yaml +89 -0
- parishad/config/user_config.py +115 -0
- parishad/data/catalog.py +118 -0
- parishad/data/models.json +108 -0
- parishad/memory/__init__.py +79 -0
- parishad/models/__init__.py +181 -0
- parishad/models/backends/__init__.py +247 -0
- parishad/models/backends/base.py +211 -0
- parishad/models/backends/huggingface.py +318 -0
- parishad/models/backends/llama_cpp.py +239 -0
- parishad/models/backends/mlx_lm.py +141 -0
- parishad/models/backends/ollama.py +253 -0
- parishad/models/backends/openai_api.py +193 -0
- parishad/models/backends/transformers_hf.py +198 -0
- parishad/models/costs.py +385 -0
- parishad/models/downloader.py +1557 -0
- parishad/models/optimizations.py +871 -0
- parishad/models/profiles.py +610 -0
- parishad/models/reliability.py +876 -0
- parishad/models/runner.py +651 -0
- parishad/models/tokenization.py +287 -0
- parishad/orchestrator/__init__.py +24 -0
- parishad/orchestrator/config_loader.py +210 -0
- parishad/orchestrator/engine.py +1113 -0
- parishad/orchestrator/exceptions.py +14 -0
- parishad/roles/__init__.py +71 -0
- parishad/roles/base.py +712 -0
- parishad/roles/dandadhyaksha.py +163 -0
- parishad/roles/darbari.py +246 -0
- parishad/roles/majumdar.py +274 -0
- parishad/roles/pantapradhan.py +150 -0
- parishad/roles/prerak.py +357 -0
- parishad/roles/raja.py +345 -0
- parishad/roles/sacheev.py +203 -0
- parishad/roles/sainik.py +427 -0
- parishad/roles/sar_senapati.py +164 -0
- parishad/roles/vidushak.py +69 -0
- parishad/tools/__init__.py +7 -0
- parishad/tools/base.py +57 -0
- parishad/tools/fs.py +110 -0
- parishad/tools/perception.py +96 -0
- parishad/tools/retrieval.py +74 -0
- parishad/tools/shell.py +103 -0
- parishad/utils/__init__.py +7 -0
- parishad/utils/hardware.py +122 -0
- parishad/utils/logging.py +79 -0
- parishad/utils/scanner.py +164 -0
- parishad/utils/text.py +61 -0
- parishad/utils/tracing.py +133 -0
- parishad-0.1.0.dist-info/METADATA +256 -0
- parishad-0.1.0.dist-info/RECORD +68 -0
- parishad-0.1.0.dist-info/WHEEL +4 -0
- parishad-0.1.0.dist-info/entry_points.txt +2 -0
- parishad-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenAI and Ollama backends for API-based inference.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
import time
|
|
10
|
+
|
|
11
|
+
from .base import BackendConfig, BackendError, BackendResult, BaseBackend
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
# Lazy import
|
|
16
|
+
_openai = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _get_openai():
|
|
20
|
+
"""Lazy import of openai package."""
|
|
21
|
+
global _openai
|
|
22
|
+
if _openai is None:
|
|
23
|
+
try:
|
|
24
|
+
import openai
|
|
25
|
+
_openai = openai
|
|
26
|
+
except ImportError:
|
|
27
|
+
raise ImportError(
|
|
28
|
+
"openai package is required for OpenAIBackend. "
|
|
29
|
+
"Install with: pip install openai"
|
|
30
|
+
)
|
|
31
|
+
return _openai
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class OpenAIBackend(BaseBackend):
|
|
35
|
+
"""Backend for OpenAI API and compatible endpoints."""
|
|
36
|
+
|
|
37
|
+
_name = "openai"
|
|
38
|
+
|
|
39
|
+
def __init__(self):
|
|
40
|
+
"""Initialize OpenAIBackend."""
|
|
41
|
+
super().__init__()
|
|
42
|
+
self._client = None
|
|
43
|
+
|
|
44
|
+
def load(self, config: BackendConfig) -> None:
|
|
45
|
+
"""Initialize OpenAI client."""
|
|
46
|
+
openai = _get_openai()
|
|
47
|
+
|
|
48
|
+
extra = config.extra or {}
|
|
49
|
+
|
|
50
|
+
api_key_env = extra.get("api_key_env", "OPENAI_API_KEY")
|
|
51
|
+
api_key = extra.get("api_key") or os.environ.get(api_key_env)
|
|
52
|
+
|
|
53
|
+
if not api_key:
|
|
54
|
+
raise BackendError(
|
|
55
|
+
f"OpenAI API key not found. Set {api_key_env} environment variable.",
|
|
56
|
+
backend_name=self._name,
|
|
57
|
+
model_id=config.model_id,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
client_kwargs = {
|
|
61
|
+
"api_key": api_key,
|
|
62
|
+
"timeout": extra.get("timeout", config.timeout),
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if "base_url" in extra:
|
|
66
|
+
client_kwargs["base_url"] = extra["base_url"]
|
|
67
|
+
|
|
68
|
+
if "organization" in extra:
|
|
69
|
+
client_kwargs["organization"] = extra["organization"]
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
self._client = openai.OpenAI(**client_kwargs)
|
|
73
|
+
self._config = config
|
|
74
|
+
self._model_id = config.model_id
|
|
75
|
+
self._loaded = True
|
|
76
|
+
|
|
77
|
+
except Exception as e:
|
|
78
|
+
raise BackendError(
|
|
79
|
+
f"Failed to initialize OpenAI client: {e}",
|
|
80
|
+
backend_name=self._name,
|
|
81
|
+
model_id=config.model_id,
|
|
82
|
+
original_error=e,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def generate(
|
|
86
|
+
self,
|
|
87
|
+
prompt: str,
|
|
88
|
+
max_tokens: int,
|
|
89
|
+
temperature: float,
|
|
90
|
+
top_p: float,
|
|
91
|
+
stop: list[str] | None = None,
|
|
92
|
+
) -> BackendResult:
|
|
93
|
+
"""Generate text using OpenAI API."""
|
|
94
|
+
if not self._loaded or self._client is None:
|
|
95
|
+
raise BackendError(
|
|
96
|
+
"Client not initialized",
|
|
97
|
+
backend_name=self._name,
|
|
98
|
+
model_id=self._model_id,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
start_time = time.perf_counter()
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
messages = self._parse_prompt_to_messages(prompt)
|
|
105
|
+
|
|
106
|
+
request_kwargs = {
|
|
107
|
+
"model": self._model_id,
|
|
108
|
+
"messages": messages,
|
|
109
|
+
"max_tokens": max_tokens,
|
|
110
|
+
"temperature": temperature,
|
|
111
|
+
"top_p": top_p,
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if stop:
|
|
115
|
+
request_kwargs["stop"] = stop
|
|
116
|
+
|
|
117
|
+
response = self._client.chat.completions.create(**request_kwargs)
|
|
118
|
+
|
|
119
|
+
choice = response.choices[0]
|
|
120
|
+
text = choice.message.content or ""
|
|
121
|
+
finish_reason = choice.finish_reason or "stop"
|
|
122
|
+
|
|
123
|
+
usage = response.usage
|
|
124
|
+
if usage:
|
|
125
|
+
tokens_in = usage.prompt_tokens
|
|
126
|
+
tokens_out = usage.completion_tokens
|
|
127
|
+
else:
|
|
128
|
+
tokens_in = self._estimate_tokens(prompt)
|
|
129
|
+
tokens_out = self._estimate_tokens(text)
|
|
130
|
+
|
|
131
|
+
latency_ms = (time.perf_counter() - start_time) * 1000
|
|
132
|
+
|
|
133
|
+
return BackendResult(
|
|
134
|
+
text=text,
|
|
135
|
+
tokens_in=tokens_in,
|
|
136
|
+
tokens_out=tokens_out,
|
|
137
|
+
model_id=self._model_id,
|
|
138
|
+
finish_reason=finish_reason,
|
|
139
|
+
latency_ms=latency_ms,
|
|
140
|
+
extra={"response_id": response.id, "created": response.created},
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
except Exception as e:
|
|
144
|
+
raise BackendError(
|
|
145
|
+
f"OpenAI API call failed: {e}",
|
|
146
|
+
backend_name=self._name,
|
|
147
|
+
model_id=self._model_id,
|
|
148
|
+
original_error=e,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def _parse_prompt_to_messages(self, prompt: str) -> list[dict[str, str]]:
|
|
152
|
+
"""Parse a prompt string into chat messages."""
|
|
153
|
+
messages = []
|
|
154
|
+
|
|
155
|
+
if "System:" in prompt and "User:" in prompt:
|
|
156
|
+
parts = prompt.split("User:", 1)
|
|
157
|
+
system_part = parts[0].replace("System:", "").strip()
|
|
158
|
+
user_part = parts[1].strip() if len(parts) > 1 else ""
|
|
159
|
+
|
|
160
|
+
if system_part:
|
|
161
|
+
messages.append({"role": "system", "content": system_part})
|
|
162
|
+
if user_part:
|
|
163
|
+
if "Assistant:" in user_part:
|
|
164
|
+
user_part = user_part.split("Assistant:")[0].strip()
|
|
165
|
+
messages.append({"role": "user", "content": user_part})
|
|
166
|
+
else:
|
|
167
|
+
messages.append({"role": "user", "content": prompt})
|
|
168
|
+
|
|
169
|
+
return messages
|
|
170
|
+
|
|
171
|
+
def unload(self) -> None:
|
|
172
|
+
"""Close the client."""
|
|
173
|
+
self._client = None
|
|
174
|
+
super().unload()
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class OllamaBackend(OpenAIBackend):
|
|
178
|
+
"""Backend for Ollama (via OpenAI compatibility layer)."""
|
|
179
|
+
|
|
180
|
+
_name = "ollama"
|
|
181
|
+
|
|
182
|
+
def load(self, config: BackendConfig) -> None:
|
|
183
|
+
"""Load Ollama backend with defaults."""
|
|
184
|
+
if config.extra is None:
|
|
185
|
+
config.extra = {}
|
|
186
|
+
|
|
187
|
+
config.extra.setdefault("base_url", "http://localhost:11434/v1")
|
|
188
|
+
config.extra.setdefault("api_key", "ollama")
|
|
189
|
+
|
|
190
|
+
if config.model_id.startswith("ollama:"):
|
|
191
|
+
config.model_id = config.model_id.replace("ollama:", "", 1)
|
|
192
|
+
|
|
193
|
+
super().load(config)
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""
|
|
2
|
+
HuggingFace Transformers backend.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import time
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from .base import BackendConfig, BackendError, BackendResult, BaseBackend
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
# Lazy imports
|
|
16
|
+
_transformers = None
|
|
17
|
+
_torch = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _get_transformers():
|
|
21
|
+
"""Lazy import of transformers and torch."""
|
|
22
|
+
global _transformers, _torch
|
|
23
|
+
if _transformers is None:
|
|
24
|
+
try:
|
|
25
|
+
import transformers
|
|
26
|
+
import torch
|
|
27
|
+
_transformers = transformers
|
|
28
|
+
_torch = torch
|
|
29
|
+
except ImportError:
|
|
30
|
+
raise ImportError(
|
|
31
|
+
"transformers and torch are required for TransformersBackend. "
|
|
32
|
+
"Install with: pip install transformers torch"
|
|
33
|
+
)
|
|
34
|
+
return _transformers, _torch
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class TransformersBackend(BaseBackend):
|
|
38
|
+
"""Backend for HuggingFace Transformers models."""
|
|
39
|
+
|
|
40
|
+
_name = "transformers"
|
|
41
|
+
|
|
42
|
+
def __init__(self):
|
|
43
|
+
"""Initialize TransformersBackend."""
|
|
44
|
+
super().__init__()
|
|
45
|
+
self._model = None
|
|
46
|
+
self._tokenizer = None
|
|
47
|
+
|
|
48
|
+
def load(self, config: BackendConfig) -> None:
|
|
49
|
+
"""Load a Transformers model."""
|
|
50
|
+
transformers, torch = _get_transformers()
|
|
51
|
+
|
|
52
|
+
extra = config.extra or {}
|
|
53
|
+
|
|
54
|
+
model_id_or_path = config.model_id
|
|
55
|
+
p = Path(model_id_or_path)
|
|
56
|
+
if p.exists() and p.is_file():
|
|
57
|
+
model_id_or_path = str(p.parent)
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
self._tokenizer = transformers.AutoTokenizer.from_pretrained(
|
|
61
|
+
model_id_or_path,
|
|
62
|
+
trust_remote_code=extra.get("trust_remote_code", False),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
model_kwargs = {
|
|
66
|
+
"device_map": extra.get("device_map", "auto"),
|
|
67
|
+
"trust_remote_code": extra.get("trust_remote_code", False),
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
dtype_str = extra.get("torch_dtype", "float16")
|
|
71
|
+
if dtype_str == "float16":
|
|
72
|
+
model_kwargs["torch_dtype"] = torch.float16
|
|
73
|
+
elif dtype_str == "bfloat16":
|
|
74
|
+
model_kwargs["torch_dtype"] = torch.bfloat16
|
|
75
|
+
elif dtype_str == "float32":
|
|
76
|
+
model_kwargs["torch_dtype"] = torch.float32
|
|
77
|
+
|
|
78
|
+
quantization = extra.get("quantization")
|
|
79
|
+
if quantization in ("4bit", "8bit"):
|
|
80
|
+
try:
|
|
81
|
+
from transformers import BitsAndBytesConfig
|
|
82
|
+
|
|
83
|
+
cpu_offload = extra.get("cpu_offload", False)
|
|
84
|
+
|
|
85
|
+
if quantization == "4bit":
|
|
86
|
+
model_kwargs["quantization_config"] = BitsAndBytesConfig(
|
|
87
|
+
load_in_4bit=True,
|
|
88
|
+
bnb_4bit_compute_dtype=torch.float16,
|
|
89
|
+
llm_int8_enable_fp32_cpu_offload=cpu_offload,
|
|
90
|
+
)
|
|
91
|
+
else:
|
|
92
|
+
model_kwargs["quantization_config"] = BitsAndBytesConfig(
|
|
93
|
+
load_in_8bit=True,
|
|
94
|
+
llm_int8_enable_fp32_cpu_offload=cpu_offload,
|
|
95
|
+
)
|
|
96
|
+
except ImportError:
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
self._model = transformers.AutoModelForCausalLM.from_pretrained(
|
|
100
|
+
model_id_or_path,
|
|
101
|
+
**model_kwargs,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
self._config = config
|
|
105
|
+
self._model_id = config.model_id
|
|
106
|
+
self._loaded = True
|
|
107
|
+
|
|
108
|
+
except Exception as e:
|
|
109
|
+
raise BackendError(
|
|
110
|
+
f"Failed to load model: {e}",
|
|
111
|
+
backend_name=self._name,
|
|
112
|
+
model_id=config.model_id,
|
|
113
|
+
original_error=e,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
def generate(
|
|
117
|
+
self,
|
|
118
|
+
prompt: str,
|
|
119
|
+
max_tokens: int,
|
|
120
|
+
temperature: float,
|
|
121
|
+
top_p: float,
|
|
122
|
+
stop: list[str] | None = None,
|
|
123
|
+
) -> BackendResult:
|
|
124
|
+
"""Generate text using Transformers."""
|
|
125
|
+
if not self._loaded or self._model is None or self._tokenizer is None:
|
|
126
|
+
raise BackendError(
|
|
127
|
+
"Model not loaded",
|
|
128
|
+
backend_name=self._name,
|
|
129
|
+
model_id=self._model_id,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
start_time = time.perf_counter()
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
inputs = self._tokenizer(prompt, return_tensors="pt")
|
|
136
|
+
inputs = inputs.to(self._model.device)
|
|
137
|
+
|
|
138
|
+
input_len = inputs.input_ids.shape[1]
|
|
139
|
+
|
|
140
|
+
with _torch.no_grad():
|
|
141
|
+
outputs = self._model.generate(
|
|
142
|
+
**inputs,
|
|
143
|
+
max_new_tokens=max_tokens,
|
|
144
|
+
temperature=max(temperature, 0.01),
|
|
145
|
+
top_p=top_p,
|
|
146
|
+
do_sample=temperature > 0,
|
|
147
|
+
pad_token_id=self._tokenizer.eos_token_id,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
generated_ids = outputs[0][input_len:]
|
|
151
|
+
text = self._tokenizer.decode(generated_ids, skip_special_tokens=True)
|
|
152
|
+
|
|
153
|
+
latency_ms = (time.perf_counter() - start_time) * 1000
|
|
154
|
+
|
|
155
|
+
if "cuda" in str(self._model.device):
|
|
156
|
+
_torch.cuda.empty_cache()
|
|
157
|
+
|
|
158
|
+
finish_reason = "length" if len(generated_ids) >= max_tokens else "stop"
|
|
159
|
+
if stop:
|
|
160
|
+
for s in stop:
|
|
161
|
+
if s in text:
|
|
162
|
+
text = text.split(s)[0]
|
|
163
|
+
finish_reason = "stop"
|
|
164
|
+
break
|
|
165
|
+
|
|
166
|
+
return BackendResult(
|
|
167
|
+
text=text,
|
|
168
|
+
tokens_in=input_len,
|
|
169
|
+
tokens_out=len(generated_ids),
|
|
170
|
+
model_id=self._model_id,
|
|
171
|
+
finish_reason=finish_reason,
|
|
172
|
+
latency_ms=latency_ms,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
except Exception as e:
|
|
176
|
+
raise BackendError(
|
|
177
|
+
f"Generation failed: {e}",
|
|
178
|
+
backend_name=self._name,
|
|
179
|
+
model_id=self._model_id,
|
|
180
|
+
original_error=e,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
def unload(self) -> None:
|
|
184
|
+
"""Unload model."""
|
|
185
|
+
if self._model is not None:
|
|
186
|
+
del self._model
|
|
187
|
+
self._model = None
|
|
188
|
+
if self._tokenizer is not None:
|
|
189
|
+
del self._tokenizer
|
|
190
|
+
self._tokenizer = None
|
|
191
|
+
|
|
192
|
+
if _torch is not None:
|
|
193
|
+
if _torch.cuda.is_available():
|
|
194
|
+
_torch.cuda.empty_cache()
|
|
195
|
+
elif _torch.backends.mps.is_available():
|
|
196
|
+
_torch.mps.empty_cache()
|
|
197
|
+
|
|
198
|
+
super().unload()
|