jarvis-cli-ck 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
core/__init__.py ADDED
File without changes
core/llm_engine.py ADDED
@@ -0,0 +1,347 @@
1
+ """
2
+ llm_engine.py — LLM interface for Jarvis CLI.
3
+
4
+ Primary: Ollama (local, phi3:mini, CPU-friendly, streaming).
5
+ Fallback 1: NVIDIA NIM DeepSeek V4 Flash (cloud, fast).
6
+ Fallback 2: NVIDIA NIM Llama 3.1 8B Instruct (cloud, backup).
7
+
8
+ Both are lazy-loaded — never imported at module level.
9
+ Reads API keys from ~/.jarvis/config.yaml.
10
+ """
11
+
12
+ from pathlib import Path
13
+ from typing import Dict, Generator, List, Optional
14
+
15
+ import yaml
16
+
17
+ from utils.logger import log
18
+ from utils.language_detect import detect
19
+
20
+ # ── Config path ───────────────────────────────────────────────────────────────
21
+ _CONFIG_PATH = Path.home() / ".jarvis" / "config.yaml"
22
+
23
+ # ── Defaults (overridden by config.yaml) ─────────────────────────────────────
24
+ DEFAULT_OLLAMA_MODEL = "phi3:mini"
25
+
26
+ NIM_API_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
27
+ TIMEOUT = 30 # increased to 30 seconds for larger model API calls and cold-starts
28
+
29
+ # ── NIM Models Map ────────────────────────────────────────────────────────────
30
+ NIM_MODELS = {
31
+ "deepseek_v4_pro": {
32
+ "id": "deepseek-ai/deepseek-v4-pro",
33
+ "key_name": "deepseek_v4_pro"
34
+ },
35
+ "llama_3_3_70b_instruct": {
36
+ "id": "meta/llama-3.3-70b-instruct",
37
+ "key_name": "llama_3_3_70b_instruct"
38
+ },
39
+ "mistral_large_3_675b_instruct_2512": {
40
+ "id": "mistralai/mistral-large-3-675b-instruct-2512",
41
+ "key_name": "mistral_large_3_675b_instruct_2512"
42
+ },
43
+ "llama_3_2_11b_vision_instruct": {
44
+ "id": "meta/llama-3.2-11b-vision-instruct",
45
+ "key_name": "llama_3_2_11b_vision_instruct"
46
+ },
47
+ "deepseek_v4_flash": {
48
+ "id": "deepseek-ai/deepseek-v4-flash",
49
+ "key_name": "deepseek_v4_flash"
50
+ },
51
+ "llama_3_1_8b_instruct": {
52
+ "id": "meta/llama-3.1-8b-instruct",
53
+ "key_name": "llama_3_1_8b_instruct"
54
+ }
55
+ }
56
+
57
+
58
+ def _load_config() -> Dict:
59
+ """Load Jarvis config from ~/.jarvis/config.yaml.
60
+
61
+ Returns:
62
+ Config dict, or empty dict if file is missing.
63
+ """
64
+ if not _CONFIG_PATH.exists():
65
+ return {}
66
+ try:
67
+ with _CONFIG_PATH.open("r") as f:
68
+ return yaml.safe_load(f) or {}
69
+ except Exception as exc:
70
+ log.error(f"Config load failed: {exc}")
71
+ return {}
72
+
73
+
74
+ def _get_model() -> str:
75
+ """Return the configured Ollama model name.
76
+
77
+ Returns:
78
+ Model name string from config, or DEFAULT_OLLAMA_MODEL.
79
+ """
80
+ cfg = _load_config()
81
+ return cfg.get("jarvis", {}).get("llm_model", DEFAULT_OLLAMA_MODEL)
82
+
83
+
84
+ def _get_nim_keys() -> Dict[str, str]:
85
+ """Return the NVIDIA NIM API keys from config.
86
+
87
+ Returns:
88
+ Dict mapping model key name to api key value.
89
+ """
90
+ cfg = _load_config()
91
+ nim_keys = cfg.get("apis", {}).get("nvidia_nim", {})
92
+ if isinstance(nim_keys, dict):
93
+ return nim_keys
94
+ return {}
95
+
96
+
97
+ # ── Ollama streaming ──────────────────────────────────────────────────────────
98
+
99
+ def stream_ollama(
100
+ messages: List[Dict[str, str]],
101
+ model: Optional[str] = None,
102
+ ) -> Generator[str, None, None]:
103
+ """Stream a chat completion from Ollama (local, CPU-safe).
104
+
105
+ Lazy-loads the ollama library on first call.
106
+ Yields token strings as they arrive.
107
+
108
+ Args:
109
+ messages: List of {"role": ..., "content": ...} dicts.
110
+ model: Ollama model name. Defaults to config value.
111
+
112
+ Yields:
113
+ Token strings from the LLM response.
114
+
115
+ Raises:
116
+ RuntimeError: If Ollama is not running or model is unavailable.
117
+ """
118
+ try:
119
+ import ollama # lazy-load
120
+ except ImportError:
121
+ raise RuntimeError("ollama package not installed. Run: pip install ollama")
122
+
123
+ m = model or _get_model()
124
+ log.debug(f"Ollama stream started.")
125
+
126
+ try:
127
+ stream = ollama.chat(
128
+ model=m,
129
+ messages=messages,
130
+ stream=True,
131
+ )
132
+ for chunk in stream:
133
+ token = chunk.get("message", {}).get("content", "")
134
+ if token:
135
+ yield token
136
+ except Exception as exc:
137
+ raise RuntimeError(f"Ollama error: {exc}") from exc
138
+
139
+
140
+ # ── NVIDIA NIM streaming ──────────────────────────────────────────────────────
141
+
142
+ def stream_nim(
143
+ messages: List[Dict[str, str]],
144
+ model_key: str,
145
+ max_tokens: int = 256,
146
+ ) -> Generator[str, None, None]:
147
+ """Stream a chat completion from NVIDIA NIM API (cloud).
148
+
149
+ Lazy-loads requests. Reads API keys from ~/.jarvis/config.yaml.
150
+ Yields token strings as they arrive via SSE.
151
+
152
+ Args:
153
+ messages: List of {"role": ..., "content": ...} dicts.
154
+ model_key: Key name in NIM_MODELS (e.g. "deepseek_v4_pro").
155
+
156
+ Yields:
157
+ Token strings from the LLM response.
158
+
159
+ Raises:
160
+ RuntimeError: If API key is missing or request fails.
161
+ """
162
+ import json
163
+ import requests
164
+
165
+ if model_key not in NIM_MODELS:
166
+ raise RuntimeError(f"Unknown NIM model key: {model_key}")
167
+
168
+ model_info = NIM_MODELS[model_key]
169
+ model_id = model_info["id"]
170
+ key_name = model_info["key_name"]
171
+
172
+ keys = _get_nim_keys()
173
+ key = keys.get(key_name)
174
+
175
+ if not key:
176
+ raise RuntimeError(
177
+ f"NVIDIA NIM API key for {key_name} not found in config."
178
+ )
179
+
180
+ headers = {
181
+ "Authorization": f"Bearer {key}",
182
+ "Content-Type": "application/json",
183
+ "Accept": "text/event-stream",
184
+ }
185
+ payload = {
186
+ "model": model_id,
187
+ "messages": messages,
188
+ "stream": True,
189
+ "max_tokens": max_tokens,
190
+ }
191
+
192
+ try:
193
+ with requests.post(
194
+ NIM_API_URL,
195
+ headers=headers,
196
+ json=payload,
197
+ stream=True,
198
+ timeout=TIMEOUT,
199
+ ) as resp:
200
+ resp.raise_for_status()
201
+ for line in resp.iter_lines():
202
+ if not line:
203
+ continue
204
+ decoded = line.decode("utf-8")
205
+ if decoded.startswith("data: "):
206
+ data_str = decoded[6:]
207
+ if data_str.strip() == "[DONE]":
208
+ break
209
+ try:
210
+ data = json.loads(data_str)
211
+ choices = data.get("choices")
212
+ if choices:
213
+ token = choices[0].get("delta", {}).get("content", "")
214
+ if token:
215
+ yield token
216
+ except json.JSONDecodeError:
217
+ continue
218
+ except requests.RequestException as exc:
219
+ raise RuntimeError(f"NVIDIA NIM API error for {model_id}: {exc}") from exc
220
+
221
+
222
+ # ── Public interface ──────────────────────────────────────────────────────────
223
+
224
+ def stream(
225
+ messages: List[Dict[str, str]],
226
+ model: Optional[str] = None,
227
+ max_tokens: int = 256,
228
+ ) -> Generator[str, None, None]:
229
+ """Stream tokens from Ollama, falling back to NIM engines sequentially.
230
+
231
+ This is the primary entry point for all LLM calls in Jarvis.
232
+
233
+ Args:
234
+ messages: List of {"role": ..., "content": ...} dicts.
235
+ model: Optional model override.
236
+
237
+ Yields:
238
+ Token strings.
239
+ """
240
+ # ── Language detection & System Prompt Injection ──
241
+ # Check the last user message to detect language
242
+ user_content = ""
243
+ for msg in reversed(messages):
244
+ if msg.get("role") == "user":
245
+ user_content = msg.get("content", "")
246
+ break
247
+
248
+ lang = detect(user_content)
249
+
250
+ # Inject language prompt at the start of conversation
251
+ lang_instructions = "Respond in English."
252
+ if lang == "tamil":
253
+ lang_instructions = "Respond in Tamil using Tamil script (Unicode)."
254
+ elif lang == "tanglish":
255
+ lang_instructions = (
256
+ "Respond in Tanglish — Tamil words written in English letters/script, "
257
+ "mixed naturally with English. Example: 'Nalla idea da, let me check panren.'"
258
+ )
259
+
260
+ # Check if a system prompt exists, otherwise create one
261
+ system_index = -1
262
+ for idx, msg in enumerate(messages):
263
+ if msg.get("role") == "system":
264
+ system_index = idx
265
+ break
266
+
267
+ system_content = (
268
+ f"You are Jarvis — a sharp, friendly AI partner built into a terminal. {lang_instructions} "
269
+ "Your personality: smart, concise, professional, and warmly human. "
270
+ "You speak like a knowledgeable friend, not a corporate chatbot. "
271
+ "RULES you must always follow:\n"
272
+ "1. Keep replies SHORT by default. 1-3 sentences max for casual conversation. Only elaborate when the user asks a technical or detailed question.\n"
273
+ "2. For greetings like 'hi', 'hello', 'hey' — reply with ONE casual, friendly line. No bullet points, no long intros.\n"
274
+ "3. Never wrap responses in markdown code fences (``` or ''').\n"
275
+ "4. Never prefix your response with 'Jarvis:' or 'Assistant:'.\n"
276
+ "5. Do NOT pad responses with unnecessary phrases like 'I am here to assist you with anything you need' — be direct.\n"
277
+ "6. If the user asks a technical question, answer it clearly and precisely. Use brief bullet points only when truly needed.\n"
278
+ "7. Match the user's energy — casual query = casual short reply; serious task = focused professional reply."
279
+ )
280
+
281
+ if system_index >= 0:
282
+ messages[system_index]["content"] = messages[system_index]["content"] + f" {lang_instructions}"
283
+ else:
284
+ messages.insert(0, {"role": "system", "content": system_content})
285
+
286
+ # Try local Ollama first
287
+ try_ollama = True
288
+ nim_model = None
289
+ if model:
290
+ if model.startswith("nim:"):
291
+ try_ollama = False
292
+ nim_model = model.replace("nim:", "")
293
+ elif model in NIM_MODELS:
294
+ try_ollama = False
295
+ nim_model = model
296
+
297
+ if try_ollama:
298
+ if ping_ollama():
299
+ try:
300
+ yield from stream_ollama(messages, model)
301
+ return
302
+ except Exception as ollama_err:
303
+ log.warn(f"Ollama unavailable: {ollama_err}. Falling back to Cloud NIM...")
304
+ else:
305
+ log.debug("Ollama is not running. Falling back to Cloud NIM...")
306
+
307
+ # Dynamic fallback list (optimized for instant response and verified active models first)
308
+ fallbacks = [
309
+ "deepseek_v4_flash",
310
+ "llama_3_2_11b_vision_instruct",
311
+ "llama_3_1_8b_instruct",
312
+ "mistral_large_3_675b_instruct_2512",
313
+ "deepseek_v4_pro",
314
+ "llama_3_3_70b_instruct"
315
+ ]
316
+
317
+ # If the user requested a specific NIM model via model override
318
+ if nim_model and nim_model in NIM_MODELS:
319
+ fallbacks = [nim_model] + [f for f in fallbacks if f != nim_model]
320
+
321
+ for model_key in fallbacks:
322
+ # Check if the key exists in our config
323
+ keys = _get_nim_keys()
324
+ if NIM_MODELS[model_key]["key_name"] in keys:
325
+ try:
326
+ yield from stream_nim(messages, model_key, max_tokens=max_tokens)
327
+ return
328
+ except Exception as e:
329
+ log.warn(f"NIM {model_key} failed: {e}. Trying next fallback...")
330
+
331
+ log.error(f"All LLM backends failed.")
332
+ yield "⚠ Jarvis: No LLM backend available. Start Ollama or check internet connection."
333
+
334
+
335
+ def ping_ollama() -> bool:
336
+ """Check if Ollama is running and accessible.
337
+
338
+ Returns:
339
+ True if Ollama is reachable, False otherwise.
340
+ """
341
+ try:
342
+ import requests
343
+ # Use 1.0 second timeout for fast localhost connection check
344
+ resp = requests.get("http://localhost:11434", timeout=1.0)
345
+ return resp.status_code == 200
346
+ except Exception:
347
+ return False
core/mode_toggle.py ADDED
@@ -0,0 +1,70 @@
1
+ """
2
+ mode_toggle.py — Simplified mode toggle state for Jarvis CLI, supporting Chat Mode only.
3
+ """
4
+
5
+ from enum import Enum
6
+ from typing import Callable, List
7
+
8
+
9
+ class Mode(Enum):
10
+ """Available Jarvis operating modes. Only CHAT is supported now."""
11
+ CHAT = "chat"
12
+
13
+
14
+ def get() -> Mode:
15
+ """Return the current active mode (always CHAT).
16
+
17
+ Returns:
18
+ Mode.CHAT
19
+ """
20
+ return Mode.CHAT
21
+
22
+
23
+ def set_mode(mode: Mode) -> None:
24
+ """Mock setting active mode (always CHAT).
25
+
26
+ Args:
27
+ mode: The Mode to switch to.
28
+ """
29
+ pass
30
+
31
+
32
+ def toggle() -> Mode:
33
+ """Toggling is a no-op, always remains in CHAT mode.
34
+
35
+ Returns:
36
+ Mode.CHAT
37
+ """
38
+ return Mode.CHAT
39
+
40
+
41
+ def is_voice() -> bool:
42
+ """Voice mode is completely removed.
43
+
44
+ Returns:
45
+ False
46
+ """
47
+ return False
48
+
49
+
50
+ def is_chat() -> bool:
51
+ """Jarvis is always in chat mode now.
52
+
53
+ Returns:
54
+ True
55
+ """
56
+ return True
57
+
58
+
59
+ def on_change(callback: Callable[[Mode], None]) -> None:
60
+ """Register callback (no-op)."""
61
+ pass
62
+
63
+
64
+ def label() -> str:
65
+ """Return 'CHAT'.
66
+
67
+ Returns:
68
+ "CHAT"
69
+ """
70
+ return "CHAT"
core/router.py ADDED
@@ -0,0 +1,198 @@
1
+ """
2
+ router.py — Intent-based request dispatcher for Jarvis CLI.
3
+
4
+ Receives a user query, classifies the intent via intent_parser,
5
+ dispatches to the appropriate module handler, and returns the response.
6
+ Falls back to the LLM engine for general conversation.
7
+ """
8
+
9
+ from typing import Dict, Optional
10
+
11
+ from core import session
12
+ from core.llm_engine import stream
13
+ from utils.intent_parser import parse
14
+ from utils.logger import log
15
+ from utils.language_detect import detect
16
+
17
+ _SYSTEM_PROMPT = (
18
+ "You are Jarvis — a sharp, friendly AI partner built into a terminal. "
19
+ "Your personality: smart, concise, professional, and warmly human. "
20
+ "You speak like a knowledgeable friend, not a corporate chatbot. "
21
+ "RULES: "
22
+ "1. Keep replies SHORT — 1-3 sentences for casual chat. "
23
+ "2. For greetings like 'hi', 'hello', 'hey' reply with ONE casual line. "
24
+ "3. Never use markdown code fences (``` or '''). "
25
+ "4. Never prefix with 'Jarvis:' or 'Assistant:'. "
26
+ "5. No padding phrases. Be direct. "
27
+ "6. For technical questions answer precisely; brief bullets only when truly needed. "
28
+ "7. Match the user's energy — casual = short, technical = focused."
29
+ )
30
+
31
+
32
+ def route(query: str) -> str:
33
+ """Route a user query to the correct module and return the response.
34
+
35
+ Classification priority:
36
+ 1. Keyword intent matching (fast, offline)
37
+ 2. Module handler (if intent found)
38
+ 3. LLM general chat fallback (Ollama → NIM)
39
+
40
+ Args:
41
+ query: Raw user input string.
42
+
43
+ Returns:
44
+ Response string to display/speak to the user.
45
+ """
46
+ if not query.strip():
47
+ return ""
48
+
49
+ session.add_message("user", query)
50
+ context = _build_context()
51
+
52
+ # Detect language of user query
53
+ lang = detect(query)
54
+
55
+ intent, confidence = parse(query)
56
+ log.debug(f"Intent: {intent} (confidence={confidence:.2f})")
57
+
58
+ response = _dispatch(intent, query, context)
59
+
60
+ # If the response is generated by a module (not general chat) and language is Tamil/Tanglish,
61
+ # localize/translate the response to the user's preferred language.
62
+ if intent != "UNKNOWN" and response and not response.startswith("⚠"):
63
+ response = _localize_response(response, lang)
64
+
65
+ session.add_message("assistant", response)
66
+ return response
67
+
68
+
69
+ def _localize_response(response: str, lang: str) -> str:
70
+ """Localize/Translate the response into Tamil or Tanglish."""
71
+ if lang == "tamil":
72
+ instruction = "Translate/respond to the following information in Tamil (using Tamil Unicode script):"
73
+ elif lang == "tanglish":
74
+ instruction = (
75
+ "Translate/respond to the following information in natural Tanglish "
76
+ "(Tamil words spelled in English, mixed with English naturally). "
77
+ "Keep it short, simple, and friendly. "
78
+ "Example: 'Iniku date vanthu Wednesday, July 1st da. Time 10:15 PM aachu.'"
79
+ )
80
+ else:
81
+ return response
82
+
83
+ messages = [
84
+ {
85
+ "role": "system",
86
+ "content": "You are a translator/localizer. Keep the response concise, matching the exact meaning of the input."
87
+ },
88
+ {
89
+ "role": "user",
90
+ "content": f"{instruction}\n\n{response}"
91
+ }
92
+ ]
93
+ tokens = []
94
+ try:
95
+ from core.llm_engine import stream as llm_stream
96
+ for token in llm_stream(messages):
97
+ tokens.append(token)
98
+ return "".join(tokens)
99
+ except Exception as exc:
100
+ log.warn(f"Localization failed: {exc}")
101
+ return response
102
+
103
+
104
+ def _dispatch(intent: str, query: str, context: Dict) -> str:
105
+ """Dispatch to the correct module handler based on intent.
106
+
107
+ Args:
108
+ intent: Classified intent name.
109
+ query: Original user query.
110
+ context: Session context dict.
111
+
112
+ Returns:
113
+ Response string from the handler.
114
+ """
115
+ try:
116
+ if intent == "NEWS":
117
+ from modules.news import handle
118
+ return handle(query, context)
119
+
120
+ elif intent == "TIME" or intent == "CALENDAR":
121
+ from modules.time_cal import handle
122
+ return handle(query, context)
123
+
124
+ elif intent == "MAPS":
125
+ from modules.maps import handle
126
+ return handle(query, context)
127
+
128
+ elif intent == "SYSTEM":
129
+ from modules.system_diag import handle
130
+ return handle(query, context)
131
+
132
+ elif intent == "FILE":
133
+ from modules.file_manager import handle
134
+ return handle(query, context)
135
+
136
+ elif intent == "WEB_DEV":
137
+ from modules.web_developer import handle
138
+ return handle(query, context)
139
+
140
+ elif intent == "CODE":
141
+ from modules.coder import handle
142
+ return handle(query, context)
143
+
144
+ elif intent == "VIRUS":
145
+ from modules.virus_scanner import handle
146
+ return handle(query, context)
147
+
148
+ elif intent == "COMMAND":
149
+ from modules.command_runner import handle
150
+ return handle(query, context)
151
+
152
+ else:
153
+ # General LLM chat fallback — short by default for casual queries
154
+ # Use more tokens only if the query looks complex (> 8 words)
155
+ word_count = len(query.split())
156
+ tokens_cap = 512 if word_count > 8 else 256
157
+ return _llm_chat(query, max_tokens=tokens_cap)
158
+
159
+
160
+ except Exception as exc:
161
+ log.error(f"Handler error for intent {intent}: {exc}", exc_info=True)
162
+ return f"⚠ Sorry, I ran into an error: {exc}"
163
+
164
+
165
+ def _llm_chat(query: str, max_tokens: int = 256) -> str:
166
+ """Send the query to the LLM and collect the full streamed response.
167
+
168
+ Args:
169
+ query: User query for general chat.
170
+ max_tokens: Token cap for the response.
171
+
172
+ Returns:
173
+ Complete LLM response string.
174
+ """
175
+ messages = [
176
+ {"role": "system", "content": _SYSTEM_PROMPT},
177
+ *session.history_as_dicts(n=10),
178
+ ]
179
+ tokens = []
180
+ try:
181
+ for token in stream(messages, max_tokens=max_tokens):
182
+ tokens.append(token)
183
+ return "".join(tokens)
184
+ except Exception as exc:
185
+ log.error(f"LLM chat failed: {exc}")
186
+ return f"⚠ LLM error: {exc}"
187
+
188
+
189
+ def _build_context() -> Dict:
190
+ """Build a context dict for module handlers.
191
+
192
+ Returns:
193
+ Dict with session summary and history reference.
194
+ """
195
+ return {
196
+ "history": session.get_history(n=5),
197
+ "summary": session.summary(),
198
+ }