jarvis-ai-assistant 0.1.123__py3-none-any.whl → 0.1.125__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jarvis-ai-assistant might be problematic. Click here for more details.

Files changed (67) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +19 -21
  3. jarvis/jarvis_code_agent/code_agent.py +205 -119
  4. jarvis/jarvis_code_agent/file_select.py +6 -105
  5. jarvis/jarvis_code_agent/patch.py +192 -259
  6. jarvis/jarvis_codebase/main.py +6 -2
  7. jarvis/jarvis_dev/main.py +6 -4
  8. jarvis/jarvis_git_squash/__init__.py +0 -0
  9. jarvis/jarvis_git_squash/main.py +81 -0
  10. jarvis/jarvis_lsp/cpp.py +1 -1
  11. jarvis/jarvis_lsp/go.py +1 -1
  12. jarvis/jarvis_lsp/registry.py +2 -2
  13. jarvis/jarvis_lsp/rust.py +1 -1
  14. jarvis/jarvis_multi_agent/__init__.py +1 -1
  15. jarvis/jarvis_platform/ai8.py +2 -1
  16. jarvis/jarvis_platform/base.py +20 -25
  17. jarvis/jarvis_platform/kimi.py +2 -3
  18. jarvis/jarvis_platform/ollama.py +3 -1
  19. jarvis/jarvis_platform/openai.py +1 -1
  20. jarvis/jarvis_platform/oyi.py +2 -1
  21. jarvis/jarvis_platform/registry.py +2 -1
  22. jarvis/jarvis_platform_manager/main.py +4 -6
  23. jarvis/jarvis_platform_manager/openai_test.py +0 -1
  24. jarvis/jarvis_rag/main.py +5 -2
  25. jarvis/jarvis_smart_shell/main.py +9 -4
  26. jarvis/jarvis_tools/ask_codebase.py +12 -7
  27. jarvis/jarvis_tools/ask_user.py +3 -2
  28. jarvis/jarvis_tools/base.py +21 -7
  29. jarvis/jarvis_tools/chdir.py +25 -1
  30. jarvis/jarvis_tools/code_review.py +13 -14
  31. jarvis/jarvis_tools/create_code_agent.py +4 -7
  32. jarvis/jarvis_tools/create_sub_agent.py +2 -2
  33. jarvis/jarvis_tools/execute_shell.py +3 -1
  34. jarvis/jarvis_tools/execute_shell_script.py +58 -0
  35. jarvis/jarvis_tools/file_operation.py +3 -2
  36. jarvis/jarvis_tools/git_commiter.py +26 -17
  37. jarvis/jarvis_tools/lsp_find_definition.py +1 -1
  38. jarvis/jarvis_tools/lsp_find_references.py +1 -1
  39. jarvis/jarvis_tools/lsp_get_diagnostics.py +19 -11
  40. jarvis/jarvis_tools/lsp_get_document_symbols.py +1 -1
  41. jarvis/jarvis_tools/lsp_prepare_rename.py +1 -1
  42. jarvis/jarvis_tools/lsp_validate_edit.py +1 -1
  43. jarvis/jarvis_tools/methodology.py +4 -1
  44. jarvis/jarvis_tools/rag.py +22 -15
  45. jarvis/jarvis_tools/read_code.py +4 -3
  46. jarvis/jarvis_tools/read_webpage.py +2 -1
  47. jarvis/jarvis_tools/registry.py +4 -1
  48. jarvis/jarvis_tools/{search.py → search_web.py} +5 -3
  49. jarvis/jarvis_tools/select_code_files.py +1 -1
  50. jarvis/jarvis_utils/__init__.py +19 -941
  51. jarvis/jarvis_utils/config.py +138 -0
  52. jarvis/jarvis_utils/embedding.py +201 -0
  53. jarvis/jarvis_utils/git_utils.py +120 -0
  54. jarvis/jarvis_utils/globals.py +82 -0
  55. jarvis/jarvis_utils/input.py +161 -0
  56. jarvis/jarvis_utils/methodology.py +128 -0
  57. jarvis/jarvis_utils/output.py +235 -0
  58. jarvis/jarvis_utils/utils.py +150 -0
  59. jarvis_ai_assistant-0.1.125.dist-info/METADATA +291 -0
  60. jarvis_ai_assistant-0.1.125.dist-info/RECORD +75 -0
  61. {jarvis_ai_assistant-0.1.123.dist-info → jarvis_ai_assistant-0.1.125.dist-info}/WHEEL +1 -1
  62. {jarvis_ai_assistant-0.1.123.dist-info → jarvis_ai_assistant-0.1.125.dist-info}/entry_points.txt +1 -0
  63. jarvis/jarvis_code_agent/relevant_files.py +0 -117
  64. jarvis_ai_assistant-0.1.123.dist-info/METADATA +0 -461
  65. jarvis_ai_assistant-0.1.123.dist-info/RECORD +0 -65
  66. {jarvis_ai_assistant-0.1.123.dist-info → jarvis_ai_assistant-0.1.125.dist-info}/LICENSE +0 -0
  67. {jarvis_ai_assistant-0.1.123.dist-info → jarvis_ai_assistant-0.1.125.dist-info}/top_level.txt +0 -0
@@ -1,946 +1,24 @@
1
- import hashlib
2
- from pathlib import Path
3
- import time
1
+ """
2
+ Jarvis Utils Module
3
+ This module provides utility functions and classes used throughout the Jarvis system.
4
+ It includes various helper functions, configuration management, and common operations.
5
+ The module is organized into several submodules:
6
+ - config: Configuration management
7
+ - embedding: Text embedding utilities
8
+ - git_utils: Git repository operations
9
+ - input: User input handling
10
+ - methodology: Methodology management
11
+ - output: Output formatting
12
+ - utils: General utilities
13
+ """
4
14
  import os
5
- from enum import Enum
6
- from datetime import datetime
7
- from typing import Any, Dict, List, Optional
8
15
  import colorama
9
- from colorama import Fore, Style as ColoramaStyle
10
- import numpy as np
11
- from prompt_toolkit import PromptSession
12
- from prompt_toolkit.styles import Style as PromptStyle
13
- from prompt_toolkit.formatted_text import FormattedText
14
- from sentence_transformers import SentenceTransformer
15
- from tqdm import tqdm
16
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
17
- import torch
18
- import yaml
19
- import faiss
20
- from pygments.lexers import guess_lexer
21
- from pygments.util import ClassNotFound
22
- import psutil
23
- from rich.console import Console
24
- from rich.theme import Theme
25
- from rich.panel import Panel
26
- from rich.box import HEAVY
27
- from rich.text import Text
28
16
  from rich.traceback import install as install_rich_traceback
29
- from rich.syntax import Syntax
30
- from rich.style import Style as RichStyle
31
-
32
- from prompt_toolkit.completion import Completer, Completion, PathCompleter
33
- from prompt_toolkit.document import Document
34
- from fuzzywuzzy import process
35
- from prompt_toolkit.key_binding import KeyBindings
36
-
37
- # 初始化colorama
17
+ # Re-export from new modules
18
+ # These imports are required for project functionality and may be used dynamically
19
+ # Initialize colorama for cross-platform colored text
38
20
  colorama.init()
39
-
21
+ # Disable tokenizers parallelism to avoid issues with multiprocessing
40
22
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
41
-
42
- global_agents = set()
43
- current_agent_name = ""
44
-
45
- # Install rich traceback handler
46
- install_rich_traceback()
47
-
48
- # Create console with custom theme
49
- custom_theme = Theme({
50
- "INFO": "yellow",
51
- "WARNING": "yellow",
52
- "ERROR": "red",
53
- "SUCCESS": "green",
54
- "SYSTEM": "cyan",
55
- "CODE": "green",
56
- "RESULT": "blue",
57
- "PLANNING": "magenta",
58
- "PROGRESS": "white",
59
- "DEBUG": "blue",
60
- "USER": "green",
61
- "TOOL": "yellow",
62
- })
63
-
64
- console = Console(theme=custom_theme)
65
-
66
- def make_agent_name(agent_name: str):
67
- if agent_name in global_agents:
68
- i = 1
69
- while f"{agent_name}_{i}" in global_agents:
70
- i += 1
71
- return f"{agent_name}_{i}"
72
- else:
73
- return agent_name
74
-
75
- def set_agent(agent_name: str, agent: Any):
76
- global_agents.add(agent_name)
77
- global current_agent_name
78
- current_agent_name = agent_name
79
-
80
- def get_agent_list():
81
- return "[" + str(len(global_agents)) + "]" + current_agent_name if global_agents else ""
82
-
83
- def delete_agent(agent_name: str):
84
- if agent_name in global_agents:
85
- global_agents.remove(agent_name)
86
- global current_agent_name
87
- current_agent_name = ""
88
-
89
- class OutputType(Enum):
90
- SYSTEM = "SYSTEM" # AI assistant message
91
- CODE = "CODE" # Code related
92
- RESULT = "RESULT" # Tool execution result
93
- ERROR = "ERROR" # Error information
94
- INFO = "INFO" # System prompt
95
- PLANNING = "PLANNING" # Task planning
96
- PROGRESS = "PROGRESS" # Execution progress
97
- SUCCESS = "SUCCESS" # Success information
98
- WARNING = "WARNING" # Warning information
99
- DEBUG = "DEBUG" # Debug information
100
- USER = "USER" # User input
101
- TOOL = "TOOL" # Tool call
102
-
103
- class PrettyOutput:
104
- """Pretty output using rich"""
105
-
106
- # Icons for different output types
107
- _ICONS = {
108
- OutputType.SYSTEM: "🤖", # Robot - AI assistant
109
- OutputType.CODE: "📝", # Notebook - Code
110
- OutputType.RESULT: "✨", # Flash - Result
111
- OutputType.ERROR: "❌", # Error - Error
112
- OutputType.INFO: "ℹ️", # Info - Prompt
113
- OutputType.PLANNING: "📋", # Clipboard - Planning
114
- OutputType.PROGRESS: "⏳", # Hourglass - Progress
115
- OutputType.SUCCESS: "✅", # Checkmark - Success
116
- OutputType.WARNING: "⚠️", # Warning - Warning
117
- OutputType.DEBUG: "🔍", # Magnifying glass - Debug
118
- OutputType.USER: "👤", # User - User
119
- OutputType.TOOL: "🔧", # Wrench - Tool
120
- }
121
-
122
- # Common language mapping dictionary
123
- _lang_map = {
124
- 'Python': 'python',
125
- 'JavaScript': 'javascript',
126
- 'TypeScript': 'typescript',
127
- 'Java': 'java',
128
- 'C++': 'cpp',
129
- 'C#': 'csharp',
130
- 'Ruby': 'ruby',
131
- 'PHP': 'php',
132
- 'Go': 'go',
133
- 'Rust': 'rust',
134
- 'Bash': 'bash',
135
- 'HTML': 'html',
136
- 'CSS': 'css',
137
- 'SQL': 'sql',
138
- 'R': 'r',
139
- 'Kotlin': 'kotlin',
140
- 'Swift': 'swift',
141
- 'Scala': 'scala',
142
- 'Perl': 'perl',
143
- 'Lua': 'lua',
144
- 'YAML': 'yaml',
145
- 'JSON': 'json',
146
- 'XML': 'xml',
147
- 'Markdown': 'markdown',
148
- 'Text': 'text',
149
- 'Shell': 'bash',
150
- 'Dockerfile': 'dockerfile',
151
- 'Makefile': 'makefile',
152
- 'INI': 'ini',
153
- 'TOML': 'toml',
154
- }
155
-
156
- @staticmethod
157
- def _detect_language(text: str, default_lang: str = 'markdown') -> str:
158
- """Helper method to detect language and map it to syntax highlighting name"""
159
- try:
160
- lexer = guess_lexer(text)
161
- detected_lang = lexer.name
162
- return PrettyOutput._lang_map.get(detected_lang, default_lang)
163
- except ClassNotFound:
164
- return default_lang
165
- except Exception:
166
- return default_lang
167
-
168
- @staticmethod
169
- def _format(output_type: OutputType, timestamp: bool = True) -> Text:
170
- """Format output text using rich Text"""
171
- # Create rich Text object
172
- formatted = Text()
173
-
174
- # Add timestamp and agent info
175
- if timestamp:
176
- formatted.append(f"[{datetime.now().strftime('%H:%M:%S')}][{output_type.value}]", style=output_type.value)
177
- agent_info = get_agent_list()
178
- if agent_info: # Only add brackets if there's agent info
179
- formatted.append(f"[{agent_info}]", style="blue")
180
- # Add icon
181
- icon = PrettyOutput._ICONS.get(output_type, "")
182
- formatted.append(f" {icon} ", style=output_type.value)
183
-
184
- return formatted
185
-
186
- @staticmethod
187
- def print(text: str, output_type: OutputType, timestamp: bool = True, lang: Optional[str] = None, traceback: bool = False):
188
- """Print formatted output using rich console with styling
189
-
190
- Args:
191
- text: The text content to print
192
- output_type: The type of output (affects styling)
193
- timestamp: Whether to show timestamp
194
- lang: Language for syntax highlighting
195
- traceback: Whether to show traceback for errors
196
- """
197
-
198
-
199
- # Define styles for different output types
200
- styles = {
201
- OutputType.SYSTEM: RichStyle(
202
- color="bright_cyan",
203
- italic=True,
204
- bold=True,
205
- ),
206
- OutputType.CODE: RichStyle(
207
- color="green",
208
- italic=True,
209
- bgcolor="#1a1a1a",
210
- frame=True
211
- ),
212
- OutputType.RESULT: RichStyle(
213
- color="bright_blue",
214
- bold=True,
215
- italic=True,
216
- bgcolor="navy_blue"
217
- ),
218
- OutputType.ERROR: RichStyle(
219
- color="red",
220
- bold=True,
221
- italic=True,
222
- blink=True,
223
- bgcolor="dark_red",
224
- ),
225
- OutputType.INFO: RichStyle(
226
- color="gold1",
227
- dim=True,
228
- bgcolor="grey11",
229
- italic=True
230
- ),
231
- OutputType.PLANNING: RichStyle(
232
- color="purple",
233
- italic=True,
234
- bold=True,
235
- ),
236
- OutputType.PROGRESS: RichStyle(
237
- color="white",
238
- encircle=True,
239
- italic=True,
240
- ),
241
- OutputType.SUCCESS: RichStyle(
242
- color="bright_green",
243
- bold=True,
244
- strike=False,
245
- meta={"icon": "✓"},
246
- italic=True
247
- ),
248
- OutputType.WARNING: RichStyle(
249
- color="yellow",
250
- bold=True,
251
- blink2=True,
252
- bgcolor="dark_orange",
253
- italic=True
254
- ),
255
- OutputType.DEBUG: RichStyle(
256
- color="grey58",
257
- dim=True,
258
- italic=True,
259
- conceal=True
260
- ),
261
- OutputType.USER: RichStyle(
262
- color="spring_green2",
263
- reverse=True,
264
- frame=True,
265
- italic=True
266
- ),
267
- OutputType.TOOL: RichStyle(
268
- color="dark_sea_green4",
269
- italic=True,
270
- bgcolor="grey19",
271
- )
272
- }
273
-
274
- # Get formatted header
275
- lang = lang if lang is not None else PrettyOutput._detect_language(text, default_lang='markdown')
276
- header = PrettyOutput._format(output_type, timestamp)
277
-
278
- # Create syntax highlighted content
279
- content = Syntax(
280
- text,
281
- lang,
282
- theme="monokai",
283
- word_wrap=True,
284
- )
285
-
286
- # Create panel with styling
287
- panel = Panel(
288
- content,
289
- style=styles[output_type],
290
- border_style=styles[output_type],
291
- title=header,
292
- title_align="left",
293
- padding=(0, 0),
294
- highlight=True,
295
- box=HEAVY,
296
- )
297
-
298
- # Print panel
299
- console.print(panel)
300
-
301
- # Print stack trace for errors if requested
302
- if traceback or output_type == OutputType.ERROR:
303
- console.print_exception()
304
-
305
- @staticmethod
306
- def section(title: str, output_type: OutputType = OutputType.INFO):
307
- """Print section title in a panel"""
308
- panel = Panel(
309
- Text(title, style=output_type.value, justify="center"),
310
- border_style=output_type.value
311
- )
312
- console.print()
313
- console.print(panel)
314
- console.print()
315
-
316
- @staticmethod
317
- def print_stream(text: str):
318
- """Print stream output without line break"""
319
- # 使用进度类型样式
320
- style = PrettyOutput._get_style(OutputType.SYSTEM)
321
- console.print(text, style=style, end="")
322
-
323
- @staticmethod
324
- def print_stream_end():
325
- """End stream output with line break"""
326
- # 结束符样式
327
- end_style = PrettyOutput._get_style(OutputType.SUCCESS)
328
- console.print("\n", style=end_style)
329
- console.file.flush()
330
-
331
- @staticmethod
332
- def _get_style(output_type: OutputType) -> RichStyle:
333
- """Get pre-defined RichStyle for output type"""
334
- return console.get_style(output_type.value)
335
-
336
- def get_single_line_input(tip: str) -> str:
337
- """Get single line input, support direction key, history function, etc."""
338
- session = PromptSession(history=None)
339
- style = PromptStyle.from_dict({
340
- 'prompt': 'ansicyan',
341
- })
342
- return session.prompt(f"{tip}", style=style)
343
-
344
- class FileCompleter(Completer):
345
- """Custom completer for file paths with fuzzy matching."""
346
- def __init__(self):
347
- self.path_completer = PathCompleter()
348
- self.max_suggestions = 10
349
- self.min_score = 10
350
-
351
- def get_completions(self, document: Document, complete_event):
352
- text = document.text_before_cursor
353
- cursor_pos = document.cursor_position
354
-
355
- # Find all @ positions in text
356
- at_positions = [i for i, char in enumerate(text) if char == '@']
357
-
358
- if not at_positions:
359
- return
360
-
361
- # Get the last @ position
362
- current_at_pos = at_positions[-1]
363
-
364
- # If cursor is not after the last @, don't complete
365
- if cursor_pos <= current_at_pos:
366
- return
367
-
368
- # Check if there's a space after @
369
- text_after_at = text[current_at_pos + 1:cursor_pos]
370
- if ' ' in text_after_at:
371
- return
372
-
373
- # Get the text after the current @
374
- file_path = text_after_at.strip()
375
-
376
- # 计算需要删除的字符数(包括@符号)
377
- replace_length = len(text_after_at) + 1 # +1 包含@符号
378
-
379
- # Get all possible files using git ls-files only
380
- all_files = []
381
- try:
382
- # Use git ls-files to get tracked files
383
- import subprocess
384
- result = subprocess.run(['git', 'ls-files'],
385
- stdout=subprocess.PIPE,
386
- stderr=subprocess.PIPE,
387
- text=True)
388
- if result.returncode == 0:
389
- all_files = [line.strip() for line in result.stdout.splitlines() if line.strip()]
390
- except Exception:
391
- # If git command fails, just use an empty list
392
- pass
393
-
394
- # If no input after @, show all files
395
- # Otherwise use fuzzy matching
396
- if not file_path:
397
- scored_files = [(path, 100) for path in all_files[:self.max_suggestions]]
398
- else:
399
- scored_files_data = process.extract(file_path, all_files, limit=self.max_suggestions)
400
- scored_files = [
401
- (m[0], m[1])
402
- for m in scored_files_data
403
- ]
404
- # Sort by score and take top results
405
- scored_files.sort(key=lambda x: x[1], reverse=True)
406
- scored_files = scored_files[:self.max_suggestions]
407
-
408
- # Return completions for files
409
- for path, score in scored_files:
410
- if not file_path or score > self.min_score:
411
- display_text = path # 显示时不带反引号
412
- if file_path and score < 100:
413
- display_text = f"{path} ({score}%)"
414
- completion = Completion(
415
- text=f"'{path}'", # 添加单引号包裹路径
416
- start_position=-replace_length,
417
- display=display_text,
418
- display_meta="File"
419
- )
420
- yield completion
421
-
422
- def get_multiline_input(tip: str) -> str:
423
- """Get multi-line input with enhanced completion confirmation"""
424
- # 单行输入说明
425
- PrettyOutput.section("用户输入 - 使用 @ 触发文件补全,Tab 选择补全项,Ctrl+J 提交,按 Ctrl+C 取消输入", OutputType.USER)
426
-
427
- print(f"{Fore.GREEN}{tip}{ColoramaStyle.RESET_ALL}")
428
-
429
- # 自定义按键绑定
430
- bindings = KeyBindings()
431
-
432
- @bindings.add('enter')
433
- def _(event):
434
- # 当有补全菜单时,回车键确认补全
435
- if event.current_buffer.complete_state:
436
- event.current_buffer.apply_completion(event.current_buffer.complete_state.current_completion)
437
- else:
438
- # 没有补全菜单时插入换行
439
- event.current_buffer.insert_text('\n')
440
-
441
- @bindings.add('c-j') # 修改为支持的按键组合
442
- def _(event):
443
- # 使用 Ctrl+J 提交输入
444
- event.current_buffer.validate_and_handle()
445
-
446
- style = PromptStyle.from_dict({
447
- 'prompt': 'ansicyan',
448
- })
449
-
450
- try:
451
- session = PromptSession(
452
- history=None,
453
- completer=FileCompleter(),
454
- key_bindings=bindings,
455
- complete_while_typing=True,
456
- multiline=True, # 启用原生多行支持
457
- vi_mode=False,
458
- mouse_support=False
459
- )
460
-
461
- prompt = FormattedText([
462
- ('class:prompt', '>>> ')
463
- ])
464
-
465
- # 单次获取多行输入
466
- text = session.prompt(
467
- prompt,
468
- style=style,
469
- ).strip()
470
-
471
- return text
472
-
473
- except KeyboardInterrupt:
474
- PrettyOutput.print("输入已取消", OutputType.INFO)
475
- return ""
476
-
477
- def init_env():
478
- """Load environment variables from ~/.jarvis/env"""
479
- jarvis_dir = Path.home() / ".jarvis"
480
- env_file = jarvis_dir / "env"
481
-
482
- # Check if ~/.jarvis directory exists
483
- if not jarvis_dir.exists():
484
- jarvis_dir.mkdir(parents=True)
485
-
486
- if env_file.exists():
487
- try:
488
- with open(env_file, "r", encoding="utf-8") as f:
489
- for line in f:
490
- line = line.strip()
491
- if line and not line.startswith(("#", ";")):
492
- try:
493
- key, value = line.split("=", 1)
494
- os.environ[key.strip()] = value.strip().strip("'").strip('"')
495
- except ValueError:
496
- continue
497
- except Exception as e:
498
- PrettyOutput.print(f"警告: 读取 {env_file} 失败: {e}", OutputType.WARNING)
499
-
500
-
501
- def while_success(func, sleep_time: float = 0.1):
502
- while True:
503
- try:
504
- return func()
505
- except Exception as e:
506
- PrettyOutput.print(f"执行失败: {str(e)}, 等待 {sleep_time}s...", OutputType.ERROR)
507
- time.sleep(sleep_time)
508
- continue
509
-
510
- def while_true(func, sleep_time: float = 0.1):
511
- """Loop execution function, until the function returns True"""
512
- while True:
513
- ret = func()
514
- if ret:
515
- break
516
- PrettyOutput.print(f"执行失败, 等待 {sleep_time}s...", OutputType.WARNING)
517
- time.sleep(sleep_time)
518
- return ret
519
-
520
- def find_git_root(dir="."):
521
- curr_dir = os.getcwd()
522
- os.chdir(dir)
523
- ret = os.popen("git rev-parse --show-toplevel").read().strip()
524
- os.chdir(curr_dir)
525
- return ret
526
-
527
- def has_uncommitted_changes():
528
- import subprocess
529
- # Add all changes silently
530
- subprocess.run(["git", "add", "."], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
531
-
532
- # Check working directory changes
533
- working_changes = subprocess.run(["git", "diff", "--exit-code"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL).returncode != 0
534
-
535
- # Check staged changes
536
- staged_changes = subprocess.run(["git", "diff", "--cached", "--exit-code"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL).returncode != 0
537
-
538
- # Reset changes silently
539
- subprocess.run(["git", "reset"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
540
-
541
- return working_changes or staged_changes
542
-
543
- def load_embedding_model():
544
- model_name = "BAAI/bge-m3"
545
- cache_dir = os.path.expanduser("~/.cache/huggingface/hub")
546
-
547
-
548
- try:
549
- # Load model
550
- embedding_model = SentenceTransformer(
551
- model_name,
552
- cache_folder=cache_dir,
553
- local_files_only=True
554
- )
555
- except Exception as e:
556
- # Load model
557
- embedding_model = SentenceTransformer(
558
- model_name,
559
- cache_folder=cache_dir,
560
- local_files_only=False
561
- )
562
-
563
- return embedding_model
564
-
565
- def load_tokenizer():
566
- """Load tokenizer"""
567
- model_name = "gpt2"
568
- cache_dir = os.path.expanduser("~/.cache/huggingface/hub")
569
-
570
- try:
571
- tokenizer = AutoTokenizer.from_pretrained(
572
- model_name,
573
- cache_dir=cache_dir,
574
- local_files_only=True
575
- )
576
- except Exception as e:
577
- tokenizer = AutoTokenizer.from_pretrained(
578
- model_name,
579
- cache_dir=cache_dir,
580
- local_files_only=False
581
- )
582
-
583
- return tokenizer
584
-
585
- def load_rerank_model():
586
- """Load reranking model"""
587
- model_name = "BAAI/bge-reranker-v2-m3"
588
- cache_dir = os.path.expanduser("~/.cache/huggingface/hub")
589
-
590
- PrettyOutput.print(f"加载重排序模型: {model_name}...", OutputType.INFO)
591
-
592
- try:
593
- # Load model and tokenizer
594
- tokenizer = AutoTokenizer.from_pretrained(
595
- model_name,
596
- cache_dir=cache_dir,
597
- local_files_only=True
598
- )
599
- model = AutoModelForSequenceClassification.from_pretrained(
600
- model_name,
601
- cache_dir=cache_dir,
602
- local_files_only=True
603
- )
604
- except Exception as e:
605
- # Load model and tokenizer
606
- tokenizer = AutoTokenizer.from_pretrained(
607
- model_name,
608
- cache_dir=cache_dir,
609
- local_files_only=False
610
- )
611
- model = AutoModelForSequenceClassification.from_pretrained(
612
- model_name,
613
- cache_dir=cache_dir,
614
- local_files_only=False
615
- )
616
-
617
- # Use GPU if available
618
- if torch.cuda.is_available():
619
- model = model.cuda()
620
- model.eval()
621
-
622
- return model, tokenizer
623
-
624
-
625
-
626
- def is_long_context(files: list) -> bool:
627
- """Check if the file list belongs to a long context (total characters exceed 80% of the maximum context length)"""
628
- max_token_count = get_max_token_count()
629
- threshold = max_token_count * 0.8
630
- total_tokens = 0
631
-
632
- for file_path in files:
633
- try:
634
- with open(file_path, 'r', encoding='utf-8') as f:
635
- content = f.read()
636
- total_tokens += get_context_token_count(content)
637
-
638
- if total_tokens > threshold:
639
- return True
640
- except Exception as e:
641
- PrettyOutput.print(f"读取文件 {file_path} 失败: {e}", OutputType.WARNING)
642
- continue
643
-
644
- return total_tokens > threshold
645
-
646
-
647
-
648
- def get_file_md5(filepath: str)->str:
649
- return hashlib.md5(open(filepath, "rb").read(100*1024*1024)).hexdigest()
650
-
651
-
652
-
653
-
654
- def _create_methodology_embedding(embedding_model: Any, methodology_text: str) -> np.ndarray:
655
- """Create embedding vector for methodology text"""
656
- try:
657
- # Truncate long text
658
- max_length = 512
659
- text = ' '.join(methodology_text.split()[:max_length])
660
-
661
- # 使用sentence_transformers模型获取嵌入向量
662
- embedding = embedding_model.encode([text],
663
- convert_to_tensor=True,
664
- normalize_embeddings=True)
665
- vector = np.array(embedding.cpu().numpy(), dtype=np.float32)
666
- return vector[0] # Return first vector, because we only encoded one text
667
- except Exception as e:
668
- PrettyOutput.print(f"创建方法论嵌入向量失败: {str(e)}", OutputType.ERROR)
669
- return np.zeros(1536, dtype=np.float32)
670
-
671
-
672
- def load_methodology(user_input: str) -> str:
673
- """Load methodology and build vector index"""
674
- PrettyOutput.print("加载方法论...", OutputType.PROGRESS)
675
- user_jarvis_methodology = os.path.expanduser("~/.jarvis/methodology")
676
- if not os.path.exists(user_jarvis_methodology):
677
- return ""
678
-
679
- def make_methodology_prompt(data: Dict) -> str:
680
- ret = """This is the standard methodology for handling previous problems, if the current task is similar, you can refer to it, if not,just ignore it:\n"""
681
- for key, value in data.items():
682
- ret += f"Problem: {key}\nMethodology: {value}\n"
683
- return ret
684
-
685
- try:
686
- with open(user_jarvis_methodology, "r", encoding="utf-8") as f:
687
- data = yaml.safe_load(f)
688
-
689
- if dont_use_local_model():
690
- return make_methodology_prompt(data)
691
-
692
- # Reset data structure
693
- methodology_data = []
694
- vectors = []
695
- ids = []
696
-
697
- # Get embedding model
698
- embedding_model = load_embedding_model()
699
-
700
- # Create test embedding to get correct dimension
701
- test_embedding = _create_methodology_embedding(embedding_model, "test")
702
- embedding_dimension = len(test_embedding)
703
-
704
- # Create embedding vector for each methodology
705
- for i, (key, value) in enumerate(data.items()):
706
- methodology_text = f"{key}\n{value}"
707
- embedding = _create_methodology_embedding(embedding_model, methodology_text)
708
- vectors.append(embedding)
709
- ids.append(i)
710
- methodology_data.append({"key": key, "value": value})
711
-
712
- if vectors:
713
- vectors_array = np.vstack(vectors)
714
- # Use correct dimension from test embedding
715
- hnsw_index = faiss.IndexHNSWFlat(embedding_dimension, 16)
716
- hnsw_index.hnsw.efConstruction = 40
717
- hnsw_index.hnsw.efSearch = 16
718
- methodology_index = faiss.IndexIDMap(hnsw_index)
719
- methodology_index.add_with_ids(vectors_array, np.array(ids)) # type: ignore
720
- query_embedding = _create_methodology_embedding(embedding_model, user_input)
721
- k = min(3, len(methodology_data))
722
- PrettyOutput.print(f"检索方法论...", OutputType.INFO)
723
- distances, indices = methodology_index.search(
724
- query_embedding.reshape(1, -1), k
725
- ) # type: ignore
726
-
727
- relevant_methodologies = {}
728
- output_lines = []
729
- for dist, idx in zip(distances[0], indices[0]):
730
- if idx >= 0:
731
- similarity = 1.0 / (1.0 + float(dist))
732
- methodology = methodology_data[idx]
733
- output_lines.append(
734
- f"Methodology '{methodology['key']}' similarity: {similarity:.3f}"
735
- )
736
- if similarity >= 0.5:
737
- relevant_methodologies[methodology["key"]] = methodology["value"]
738
-
739
- if output_lines:
740
- PrettyOutput.print("\n".join(output_lines), OutputType.INFO)
741
-
742
- if relevant_methodologies:
743
- return make_methodology_prompt(relevant_methodologies)
744
- return make_methodology_prompt(data)
745
-
746
- except Exception as e:
747
- PrettyOutput.print(f"加载方法论失败: {str(e)}", OutputType.ERROR)
748
- return ""
749
-
750
-
751
- def user_confirm(tip: str, default: bool = True) -> bool:
752
- """Prompt the user for confirmation.
753
-
754
- Args:
755
- tip: The message to show to the user
756
- default: The default response if user hits enter
757
-
758
- Returns:
759
- bool: True if user confirmed, False otherwise
760
- """
761
- suffix = "[Y/n]" if default else "[y/N]"
762
- ret = get_single_line_input(f"{tip} {suffix}: ")
763
- return default if ret == "" else ret.lower() == "y"
764
-
765
- def get_file_line_count(filename: str) -> int:
766
- try:
767
- return len(open(filename, "r", encoding="utf-8").readlines())
768
- except Exception as e:
769
- return 0
770
-
771
-
772
- def init_gpu_config() -> Dict:
773
- """Initialize GPU configuration based on available hardware
774
-
775
- Returns:
776
- Dict: GPU configuration including memory sizes and availability
777
- """
778
- config = {
779
- "has_gpu": False,
780
- "shared_memory": 0,
781
- "device_memory": 0,
782
- "memory_fraction": 0.8 # 默认使用80%的可用内存
783
- }
784
-
785
- try:
786
- import torch
787
- if torch.cuda.is_available():
788
- # 获取GPU信息
789
- gpu_mem = torch.cuda.get_device_properties(0).total_memory
790
- config["has_gpu"] = True
791
- config["device_memory"] = gpu_mem
792
-
793
- # 估算共享内存 (通常是系统内存的一部分)
794
-
795
- system_memory = psutil.virtual_memory().total
796
- config["shared_memory"] = min(system_memory * 0.5, gpu_mem * 2) # 取系统内存的50%或GPU内存的2倍中的较小值
797
-
798
- # 设置CUDA内存分配
799
- torch.cuda.set_per_process_memory_fraction(config["memory_fraction"])
800
- torch.cuda.empty_cache()
801
-
802
- PrettyOutput.print(
803
- f"GPU已初始化: {torch.cuda.get_device_name(0)}\n"
804
- f"设备内存: {gpu_mem / 1024**3:.1f}GB\n"
805
- f"共享内存: {config['shared_memory'] / 1024**3:.1f}GB",
806
- output_type=OutputType.SUCCESS
807
- )
808
- else:
809
- PrettyOutput.print("没有GPU可用, 使用CPU模式", output_type=OutputType.WARNING)
810
- except Exception as e:
811
- PrettyOutput.print(f"GPU初始化失败: {str(e)}", output_type=OutputType.WARNING)
812
-
813
- return config
814
-
815
-
816
- def get_embedding(embedding_model: Any, text: str) -> np.ndarray:
817
- """Get the vector representation of the text"""
818
- embedding = embedding_model.encode(text,
819
- normalize_embeddings=True,
820
- show_progress_bar=False)
821
- return np.array(embedding, dtype=np.float32)
822
-
823
- def get_embedding_batch(embedding_model: Any, texts: List[str]) -> np.ndarray:
824
- """Get embeddings for a batch of texts efficiently"""
825
- try:
826
- all_vectors = []
827
- for text in texts:
828
- vectors = get_embedding_with_chunks(embedding_model, text)
829
- all_vectors.extend(vectors)
830
- return np.vstack(all_vectors)
831
- except Exception as e:
832
- PrettyOutput.print(f"批量嵌入失败: {str(e)}", OutputType.ERROR)
833
- return np.zeros((0, embedding_model.get_sentence_embedding_dimension()), dtype=np.float32)
834
-
835
-
836
-
837
- def get_max_token_count():
838
- return int(os.getenv('JARVIS_MAX_TOKEN_COUNT', '131072')) # 默认128k
839
-
840
- def get_thread_count():
841
- return int(os.getenv('JARVIS_THREAD_COUNT', '1'))
842
-
843
- def dont_use_local_model():
844
- return os.getenv('JARVIS_DONT_USE_LOCAL_MODEL', 'false') == 'true'
845
-
846
- def is_auto_complete() -> bool:
847
- return os.getenv('JARVIS_AUTO_COMPLETE', 'false') == 'true'
848
-
849
- def is_use_methodology() -> bool:
850
- return os.getenv('JARVIS_USE_METHODOLOGY', 'true') == 'true'
851
-
852
- def is_record_methodology() -> bool:
853
- return os.getenv('JARVIS_RECORD_METHODOLOGY', 'true') == 'true'
854
-
855
- def is_need_summary() -> bool:
856
- return os.getenv('JARVIS_NEED_SUMMARY', 'true') == 'true'
857
-
858
- def get_min_paragraph_length() -> int:
859
- return int(os.getenv('JARVIS_MIN_PARAGRAPH_LENGTH', '50'))
860
-
861
- def get_max_paragraph_length() -> int:
862
- return int(os.getenv('JARVIS_MAX_PARAGRAPH_LENGTH', '12800'))
863
-
864
- def get_shell_name() -> str:
865
- return os.getenv('SHELL', 'bash')
866
-
867
- def get_normal_platform_name() -> str:
868
- return os.getenv('JARVIS_PLATFORM', 'kimi')
869
-
870
- def get_normal_model_name() -> str:
871
- return os.getenv('JARVIS_MODEL', 'kimi')
872
-
873
- def get_codegen_platform_name() -> str:
874
- return os.getenv('JARVIS_CODEGEN_PLATFORM', os.getenv('JARVIS_PLATFORM', 'kimi'))
875
-
876
- def get_codegen_model_name() -> str:
877
- return os.getenv('JARVIS_CODEGEN_MODEL', os.getenv('JARVIS_MODEL', 'kimi'))
878
-
879
- def get_thinking_platform_name() -> str:
880
- return os.getenv('JARVIS_THINKING_PLATFORM', os.getenv('JARVIS_PLATFORM', 'kimi'))
881
-
882
- def get_thinking_model_name() -> str:
883
- return os.getenv('JARVIS_THINKING_MODEL', os.getenv('JARVIS_MODEL', 'kimi'))
884
-
885
- def get_cheap_platform_name() -> str:
886
- return os.getenv('JARVIS_CHEAP_PLATFORM', os.getenv('JARVIS_PLATFORM', 'kimi'))
887
-
888
- def get_cheap_model_name() -> str:
889
- return os.getenv('JARVIS_CHEAP_MODEL', os.getenv('JARVIS_MODEL', 'kimi'))
890
-
891
- def is_execute_tool_confirm() -> bool:
892
- return os.getenv('JARVIS_EXECUTE_TOOL_CONFIRM', 'false') == 'true'
893
-
894
- def split_text_into_chunks(text: str, max_length: int = 512) -> List[str]:
895
- """Split text into chunks with overlapping windows"""
896
- chunks = []
897
- start = 0
898
- while start < len(text):
899
- end = start + max_length
900
- # Find the nearest sentence boundary
901
- if end < len(text):
902
- while end > start and text[end] not in {'.', '!', '?', '\n'}:
903
- end -= 1
904
- if end == start: # No punctuation found, hard cut
905
- end = start + max_length
906
- chunk = text[start:end]
907
- chunks.append(chunk)
908
- # Overlap 20% of the window
909
- start = end - int(max_length * 0.2)
910
- return chunks
911
-
912
- def get_embedding_with_chunks(embedding_model: Any, text: str) -> List[np.ndarray]:
913
- """Get embeddings for text chunks"""
914
- chunks = split_text_into_chunks(text, 512)
915
- if not chunks:
916
- return []
917
-
918
- vectors = []
919
- for chunk in chunks:
920
- vector = get_embedding(embedding_model, chunk)
921
- vectors.append(vector)
922
- return vectors
923
-
924
-
925
- def get_context_token_count(text: str) -> int:
926
- """Get the token count of the text using the tokenizer
927
-
928
- Args:
929
- text: The input text to count tokens for
930
-
931
- Returns:
932
- int: The number of tokens in the text
933
- """
934
- try:
935
- # Use a fast tokenizer that's good at general text
936
- tokenizer = load_tokenizer()
937
- chunks = split_text_into_chunks(text, 512)
938
- return sum([len(tokenizer.encode(chunk)) for chunk in chunks])
939
-
940
- except Exception as e:
941
- PrettyOutput.print(f"计算token失败: {str(e)}", OutputType.WARNING)
942
- # Fallback to rough character-based estimate
943
- return len(text) // 4 # Rough estimate of 4 chars per token
944
-
945
-
946
-
23
+ # Install rich traceback handler for better error messages
24
+ install_rich_traceback()