jarvis-ai-assistant 0.1.65__py3-none-any.whl → 0.1.67__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jarvis-ai-assistant might be problematic. Click here for more details.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_codebase/main.py +258 -153
- jarvis/jarvis_coder/main.py +1 -4
- jarvis/tools/__init__.py +0 -3
- {jarvis_ai_assistant-0.1.65.dist-info → jarvis_ai_assistant-0.1.67.dist-info}/METADATA +4 -5
- {jarvis_ai_assistant-0.1.65.dist-info → jarvis_ai_assistant-0.1.67.dist-info}/RECORD +10 -10
- {jarvis_ai_assistant-0.1.65.dist-info → jarvis_ai_assistant-0.1.67.dist-info}/LICENSE +0 -0
- {jarvis_ai_assistant-0.1.65.dist-info → jarvis_ai_assistant-0.1.67.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.1.65.dist-info → jarvis_ai_assistant-0.1.67.dist-info}/entry_points.txt +0 -0
- {jarvis_ai_assistant-0.1.65.dist-info → jarvis_ai_assistant-0.1.67.dist-info}/top_level.txt +0 -0
jarvis/__init__.py
CHANGED
jarvis/jarvis_codebase/main.py
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
import os
|
|
3
|
-
import sqlite3
|
|
4
|
-
import time
|
|
5
3
|
import numpy as np
|
|
6
4
|
import faiss
|
|
7
5
|
from typing import List, Tuple, Optional
|
|
@@ -13,13 +11,14 @@ from jarvis.utils import OutputType, PrettyOutput, find_git_root
|
|
|
13
11
|
from jarvis.utils import load_env_from_file
|
|
14
12
|
import argparse
|
|
15
13
|
from sentence_transformers import SentenceTransformer
|
|
14
|
+
import pickle
|
|
16
15
|
|
|
17
16
|
class CodeBase:
|
|
18
|
-
def __init__(self, root_dir: str
|
|
17
|
+
def __init__(self, root_dir: str):
|
|
19
18
|
load_env_from_file()
|
|
20
19
|
self.root_dir = root_dir
|
|
21
20
|
os.chdir(self.root_dir)
|
|
22
|
-
self.thread_count =
|
|
21
|
+
self.thread_count = os.environ.get("JARVIS_THREAD_COUNT") or 10
|
|
23
22
|
self.cheap_platform = os.environ.get("JARVIS_CHEAP_PLATFORM") or os.environ.get("JARVIS_PLATFORM") or "kimi"
|
|
24
23
|
self.cheap_model = os.environ.get("JARVIS_CHEAP_MODEL") or os.environ.get("JARVIS_MODEL") or "kimi"
|
|
25
24
|
self.normal_platform = os.environ.get("JARVIS_PLATFORM") or "kimi"
|
|
@@ -59,59 +58,37 @@ class CodeBase:
|
|
|
59
58
|
|
|
60
59
|
self.vector_dim = self.embedding_model.get_sentence_embedding_dimension()
|
|
61
60
|
|
|
62
|
-
|
|
63
|
-
self.db_path = os.path.join(self.data_dir, "codebase.db")
|
|
64
|
-
if not os.path.exists(self.db_path):
|
|
65
|
-
self.create_db()
|
|
66
61
|
self.git_file_list = self.get_git_file_list()
|
|
67
62
|
self.platform_registry = PlatformRegistry().get_global_platform_registry()
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
63
|
+
|
|
64
|
+
# 初始化缓存和索引
|
|
65
|
+
self.cache_path = os.path.join(self.data_dir, "cache.pkl")
|
|
66
|
+
self.vector_cache = {}
|
|
67
|
+
self.file_paths = []
|
|
68
|
+
|
|
69
|
+
# 加载缓存
|
|
70
|
+
if os.path.exists(self.cache_path):
|
|
71
|
+
try:
|
|
72
|
+
with open(self.cache_path, 'rb') as f:
|
|
73
|
+
cache_data = pickle.load(f)
|
|
74
|
+
self.vector_cache = cache_data["vectors"]
|
|
75
|
+
self.file_paths = cache_data["file_paths"]
|
|
76
|
+
PrettyOutput.print(f"加载了 {len(self.vector_cache)} 个向量缓存",
|
|
77
|
+
output_type=OutputType.INFO)
|
|
78
|
+
# 从缓存重建索引
|
|
79
|
+
self.build_index()
|
|
80
|
+
except Exception as e:
|
|
81
|
+
PrettyOutput.print(f"加载缓存失败: {str(e)}",
|
|
82
|
+
output_type=OutputType.WARNING)
|
|
83
|
+
self.vector_cache = {}
|
|
84
|
+
self.file_paths = []
|
|
85
|
+
self.index = None
|
|
73
86
|
|
|
74
87
|
def get_git_file_list(self):
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
""
|
|
79
|
-
return sqlite3.connect(self.db_path)
|
|
80
|
-
|
|
81
|
-
def clean_db(self) -> bool:
|
|
82
|
-
"""清理数据库和向量索引中的过期记录"""
|
|
83
|
-
db = self.get_db_connection()
|
|
84
|
-
try:
|
|
85
|
-
# 获取所有数据库记录
|
|
86
|
-
all_records = db.execute("SELECT path FROM codebase").fetchall()
|
|
87
|
-
files_to_delete = []
|
|
88
|
-
|
|
89
|
-
# 找出需要删除的文件
|
|
90
|
-
for row in all_records:
|
|
91
|
-
if row[0] not in self.git_file_list:
|
|
92
|
-
files_to_delete.append(row[0])
|
|
93
|
-
|
|
94
|
-
if not files_to_delete:
|
|
95
|
-
return False
|
|
96
|
-
|
|
97
|
-
for file_path in files_to_delete:
|
|
98
|
-
db.execute("DELETE FROM codebase WHERE path = ?", (file_path,))
|
|
99
|
-
|
|
100
|
-
db.commit()
|
|
101
|
-
|
|
102
|
-
PrettyOutput.print(f"清理了 {len(files_to_delete)} 个文件的记录",
|
|
103
|
-
output_type=OutputType.INFO)
|
|
104
|
-
return True
|
|
105
|
-
finally:
|
|
106
|
-
db.close()
|
|
107
|
-
|
|
108
|
-
def create_db(self):
|
|
109
|
-
db = self.get_db_connection()
|
|
110
|
-
try:
|
|
111
|
-
db.execute("CREATE TABLE IF NOT EXISTS codebase (path TEXT, md5 TEXT ,description TEXT)")
|
|
112
|
-
db.commit()
|
|
113
|
-
finally:
|
|
114
|
-
db.close()
|
|
88
|
+
"""获取 git 仓库中的文件列表,排除 .jarvis-codebase 目录"""
|
|
89
|
+
files = os.popen("git ls-files").read().splitlines()
|
|
90
|
+
# 过滤掉 .jarvis-codebase 目录下的文件
|
|
91
|
+
return [f for f in files if not f.startswith(".jarvis-codebase/")]
|
|
115
92
|
|
|
116
93
|
def is_text_file(self, file_path: str):
|
|
117
94
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
@@ -144,6 +121,74 @@ class CodeBase:
|
|
|
144
121
|
response = model.chat(prompt)
|
|
145
122
|
return response
|
|
146
123
|
|
|
124
|
+
def save_cache(self):
|
|
125
|
+
"""保存缓存数据"""
|
|
126
|
+
try:
|
|
127
|
+
cache_data = {
|
|
128
|
+
"vectors": self.vector_cache,
|
|
129
|
+
"file_paths": self.file_paths
|
|
130
|
+
}
|
|
131
|
+
with open(self.cache_path, 'wb') as f:
|
|
132
|
+
pickle.dump(cache_data, f)
|
|
133
|
+
PrettyOutput.print(f"保存了 {len(self.vector_cache)} 个向量缓存",
|
|
134
|
+
output_type=OutputType.INFO)
|
|
135
|
+
except Exception as e:
|
|
136
|
+
PrettyOutput.print(f"保存缓存失败: {str(e)}",
|
|
137
|
+
output_type=OutputType.ERROR)
|
|
138
|
+
|
|
139
|
+
def get_cached_vector(self, file_path: str, description: str) -> Optional[np.ndarray]:
|
|
140
|
+
"""从缓存获取文件的向量表示"""
|
|
141
|
+
if file_path not in self.vector_cache:
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
# 检查文件是否被修改
|
|
145
|
+
try:
|
|
146
|
+
with open(file_path, "rb") as f:
|
|
147
|
+
current_md5 = hashlib.md5(f.read()).hexdigest()
|
|
148
|
+
except Exception as e:
|
|
149
|
+
PrettyOutput.print(f"计算文件MD5失败 {file_path}: {str(e)}",
|
|
150
|
+
output_type=OutputType.ERROR)
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
cached_data = self.vector_cache[file_path]
|
|
154
|
+
if cached_data["md5"] != current_md5:
|
|
155
|
+
return None
|
|
156
|
+
|
|
157
|
+
# 检查描述是否变化
|
|
158
|
+
if cached_data["description"] != description:
|
|
159
|
+
return None
|
|
160
|
+
|
|
161
|
+
return cached_data["vector"]
|
|
162
|
+
|
|
163
|
+
def cache_vector(self, file_path: str, vector: np.ndarray, description: str):
|
|
164
|
+
"""缓存文件的向量表示"""
|
|
165
|
+
try:
|
|
166
|
+
with open(file_path, "rb") as f:
|
|
167
|
+
file_md5 = hashlib.md5(f.read()).hexdigest()
|
|
168
|
+
except Exception as e:
|
|
169
|
+
PrettyOutput.print(f"计算文件MD5失败 {file_path}: {str(e)}",
|
|
170
|
+
output_type=OutputType.ERROR)
|
|
171
|
+
file_md5 = ""
|
|
172
|
+
|
|
173
|
+
self.vector_cache[file_path] = {
|
|
174
|
+
"path": file_path, # 保存文件路径
|
|
175
|
+
"md5": file_md5, # 保存文件MD5
|
|
176
|
+
"description": description, # 保存文件描述
|
|
177
|
+
"vector": vector # 保存向量
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
# 保存缓存到文件
|
|
181
|
+
try:
|
|
182
|
+
with open(self.cache_path, 'wb') as f:
|
|
183
|
+
cache_data = {
|
|
184
|
+
"vectors": self.vector_cache,
|
|
185
|
+
"file_paths": self.file_paths
|
|
186
|
+
}
|
|
187
|
+
pickle.dump(cache_data, f)
|
|
188
|
+
except Exception as e:
|
|
189
|
+
PrettyOutput.print(f"保存向量缓存失败: {str(e)}",
|
|
190
|
+
output_type=OutputType.ERROR)
|
|
191
|
+
|
|
147
192
|
def get_embedding(self, text: str) -> np.ndarray:
|
|
148
193
|
"""使用 transformers 模型获取文本的向量表示"""
|
|
149
194
|
# 对长文本进行截断
|
|
@@ -152,148 +197,205 @@ class CodeBase:
|
|
|
152
197
|
|
|
153
198
|
# 获取嵌入向量
|
|
154
199
|
embedding = self.embedding_model.encode(text,
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
200
|
+
normalize_embeddings=True, # L2归一化
|
|
201
|
+
show_progress_bar=False)
|
|
202
|
+
vector = np.array(embedding, dtype=np.float32)
|
|
203
|
+
return vector
|
|
158
204
|
|
|
159
205
|
def vectorize_file(self, file_path: str, description: str) -> np.ndarray:
|
|
160
206
|
"""将文件内容和描述向量化"""
|
|
161
|
-
try:
|
|
207
|
+
try:
|
|
208
|
+
# 先尝试从缓存获取
|
|
209
|
+
cached_vector = self.get_cached_vector(file_path, description)
|
|
210
|
+
if cached_vector is not None:
|
|
211
|
+
return cached_vector
|
|
212
|
+
|
|
162
213
|
# 组合文件信息
|
|
163
214
|
combined_text = f"""
|
|
164
215
|
文件路径: {file_path}
|
|
165
216
|
文件描述: {description}
|
|
166
217
|
"""
|
|
167
|
-
|
|
218
|
+
vector = self.get_embedding(combined_text)
|
|
219
|
+
|
|
220
|
+
# 保存到缓存
|
|
221
|
+
self.cache_vector(file_path, vector, description)
|
|
222
|
+
return vector
|
|
168
223
|
except Exception as e:
|
|
169
224
|
PrettyOutput.print(f"Error vectorizing file {file_path}: {str(e)}",
|
|
170
225
|
output_type=OutputType.ERROR)
|
|
171
226
|
return np.zeros(self.vector_dim, dtype=np.float32)
|
|
172
227
|
|
|
173
|
-
def
|
|
174
|
-
"""
|
|
175
|
-
|
|
228
|
+
def clean_cache(self) -> bool:
|
|
229
|
+
"""清理过期的缓存记录,返回是否有文件被删除"""
|
|
230
|
+
files_to_delete = []
|
|
231
|
+
for file_path in list(self.vector_cache.keys()):
|
|
232
|
+
if file_path not in self.git_file_list:
|
|
233
|
+
del self.vector_cache[file_path]
|
|
234
|
+
files_to_delete.append(file_path)
|
|
235
|
+
|
|
236
|
+
if files_to_delete:
|
|
237
|
+
self.save_cache()
|
|
238
|
+
PrettyOutput.print(f"清理了 {len(files_to_delete)} 个文件的缓存",
|
|
239
|
+
output_type=OutputType.INFO)
|
|
240
|
+
return True
|
|
241
|
+
return False
|
|
242
|
+
|
|
243
|
+
def process_file(self, file_path: str):
|
|
244
|
+
"""处理单个文件"""
|
|
176
245
|
try:
|
|
177
|
-
|
|
246
|
+
# 跳过不存在的文件
|
|
247
|
+
if not os.path.exists(file_path):
|
|
178
248
|
return None
|
|
179
|
-
|
|
180
|
-
if
|
|
249
|
+
|
|
250
|
+
if not self.is_text_file(file_path):
|
|
181
251
|
return None
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
252
|
+
|
|
253
|
+
md5 = hashlib.md5(open(file_path, "rb").read()).hexdigest()
|
|
254
|
+
|
|
255
|
+
# 检查文件是否已经处理过且内容未变
|
|
256
|
+
if file_path in self.vector_cache:
|
|
257
|
+
if self.vector_cache[file_path].get("md5") == md5:
|
|
258
|
+
return None
|
|
259
|
+
|
|
260
|
+
description = self.make_description(file_path)
|
|
261
|
+
vector = self.vectorize_file(file_path, description)
|
|
262
|
+
|
|
263
|
+
# 保存到缓存,使用实际文件路径作为键
|
|
264
|
+
self.vector_cache[file_path] = {
|
|
265
|
+
"vector": vector,
|
|
266
|
+
"description": description,
|
|
267
|
+
"md5": md5
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
return file_path
|
|
271
|
+
|
|
272
|
+
except Exception as e:
|
|
273
|
+
PrettyOutput.print(f"处理文件失败 {file_path}: {str(e)}",
|
|
274
|
+
output_type=OutputType.ERROR,
|
|
275
|
+
traceback=True)
|
|
276
|
+
return None
|
|
186
277
|
|
|
187
|
-
def
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
self.
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
278
|
+
def build_index(self):
|
|
279
|
+
"""从向量缓存构建 faiss 索引"""
|
|
280
|
+
# 创建底层 HNSW 索引
|
|
281
|
+
hnsw_index = faiss.IndexHNSWFlat(self.vector_dim, 16)
|
|
282
|
+
hnsw_index.hnsw.efConstruction = 40
|
|
283
|
+
hnsw_index.hnsw.efSearch = 16
|
|
284
|
+
|
|
285
|
+
# 用 IndexIDMap 包装 HNSW 索引
|
|
286
|
+
self.index = faiss.IndexIDMap(hnsw_index)
|
|
287
|
+
|
|
288
|
+
vectors = []
|
|
289
|
+
ids = []
|
|
290
|
+
self.file_paths = [] # 重置文件路径列表
|
|
291
|
+
|
|
292
|
+
for i, (file_path, data) in enumerate(self.vector_cache.items()):
|
|
293
|
+
vectors.append(data["vector"].reshape(1, -1))
|
|
294
|
+
ids.append(i)
|
|
295
|
+
self.file_paths.append(file_path)
|
|
296
|
+
|
|
297
|
+
if vectors:
|
|
298
|
+
vectors = np.vstack(vectors)
|
|
299
|
+
self.index.add_with_ids(vectors, np.array(ids))
|
|
300
|
+
else:
|
|
301
|
+
self.index = None
|
|
302
|
+
|
|
303
|
+
def gen_vector_db_from_cache(self):
|
|
304
|
+
"""从缓存生成向量数据库"""
|
|
305
|
+
self.build_index()
|
|
306
|
+
self.save_cache()
|
|
203
307
|
|
|
204
308
|
def generate_codebase(self):
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
processed_files = []
|
|
208
|
-
|
|
209
|
-
def process_and_save(file):
|
|
210
|
-
result = self.process_file(file)
|
|
211
|
-
if result:
|
|
212
|
-
file, md5, description = result
|
|
213
|
-
db = self.get_db_connection()
|
|
214
|
-
try:
|
|
215
|
-
with db_lock:
|
|
216
|
-
db.execute("DELETE FROM codebase WHERE path = ?", (file,))
|
|
217
|
-
db.execute("INSERT INTO codebase (path, md5, description) VALUES (?, ?, ?)",
|
|
218
|
-
(file, md5, description))
|
|
219
|
-
db.commit()
|
|
220
|
-
PrettyOutput.print(f"索引文件: {file}", output_type=OutputType.INFO)
|
|
221
|
-
processed_files.append(file)
|
|
222
|
-
finally:
|
|
223
|
-
db.close()
|
|
309
|
+
"""生成代码库索引"""
|
|
310
|
+
files_deleted = self.clean_cache() # 清理过期缓存
|
|
311
|
+
processed_files = []
|
|
224
312
|
|
|
225
|
-
#
|
|
313
|
+
# 使用线程池处理文件
|
|
226
314
|
with ThreadPoolExecutor(max_workers=self.thread_count) as executor:
|
|
227
|
-
futures = [executor.submit(
|
|
228
|
-
|
|
229
|
-
|
|
315
|
+
futures = [executor.submit(self.process_file, file) for file in self.git_file_list]
|
|
316
|
+
for future in concurrent.futures.as_completed(futures):
|
|
317
|
+
result = future.result()
|
|
318
|
+
if result:
|
|
319
|
+
processed_files.append(result)
|
|
320
|
+
PrettyOutput.print(f"索引文件: {result}", output_type=OutputType.INFO)
|
|
230
321
|
|
|
231
|
-
if
|
|
232
|
-
PrettyOutput.print("
|
|
233
|
-
self.
|
|
322
|
+
if files_deleted or processed_files:
|
|
323
|
+
PrettyOutput.print("重新生成向量数据库", output_type=OutputType.INFO)
|
|
324
|
+
self.gen_vector_db_from_cache()
|
|
234
325
|
else:
|
|
235
|
-
PrettyOutput.print("
|
|
326
|
+
PrettyOutput.print("没有新的文件变更,跳过向量数据库生成", output_type=OutputType.INFO)
|
|
236
327
|
|
|
237
|
-
PrettyOutput.print(f"
|
|
328
|
+
PrettyOutput.print(f"成功为 {len(processed_files)} 个文件生成索引", output_type=OutputType.INFO)
|
|
238
329
|
|
|
239
330
|
def search_similar(self, query: str, top_k: int = 5) -> List[Tuple[str, float, str]]:
|
|
240
|
-
"""
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
top_k: 返回结果数量
|
|
245
|
-
|
|
246
|
-
Returns:
|
|
247
|
-
List of (file_path, similarity_score, description) tuples
|
|
248
|
-
"""
|
|
249
|
-
# 获取查询文本的向量表示
|
|
250
|
-
query_vector = self.get_embedding(query)
|
|
251
|
-
query_vector = query_vector.reshape(1, -1)
|
|
252
|
-
|
|
253
|
-
# 搜索最相似的向量
|
|
254
|
-
distances, indices = self.index.search(query_vector, top_k)
|
|
331
|
+
"""搜索相似文件"""
|
|
332
|
+
model = self.platform_registry.create_platform(self.normal_platform)
|
|
333
|
+
model.set_model_name(self.normal_model)
|
|
334
|
+
model.set_suppress_output(True)
|
|
255
335
|
|
|
256
|
-
# 获取对应的文件信息
|
|
257
|
-
db = self.get_db_connection()
|
|
258
336
|
try:
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
# 获取文件路径和描述
|
|
267
|
-
cursor = db.execute("SELECT path, description FROM codebase LIMIT 1 OFFSET ?", (offset,))
|
|
268
|
-
row = cursor.fetchone()
|
|
269
|
-
if row:
|
|
270
|
-
path, description = row
|
|
271
|
-
# 将distance转换为相似度分数(0-1之间)
|
|
272
|
-
similarity = 1.0 / (1.0 + float(distance)) # 确保使用Python float
|
|
273
|
-
results.append((path, similarity, description))
|
|
337
|
+
prompt = f"""请根据以下查询,生成意思完全相同的另一个表述。这个表述将用于代码搜索,所以要保持专业性和准确性。
|
|
338
|
+
原始查询: {query}
|
|
339
|
+
|
|
340
|
+
请直接输出新表述,不要有编号或其他标记。
|
|
341
|
+
"""
|
|
342
|
+
|
|
343
|
+
query = model.chat(prompt)
|
|
274
344
|
|
|
275
|
-
return results
|
|
276
345
|
finally:
|
|
277
|
-
|
|
346
|
+
model.delete_chat()
|
|
347
|
+
|
|
348
|
+
PrettyOutput.print(f"查询: {query}", output_type=OutputType.INFO)
|
|
349
|
+
|
|
350
|
+
# 为每个查询获取相似文件
|
|
351
|
+
all_results = {} # 文件路径 -> (总分数, 出现次数, 描述)
|
|
352
|
+
|
|
353
|
+
q_vector = self.get_embedding(query)
|
|
354
|
+
q_vector = q_vector.reshape(1, -1)
|
|
355
|
+
|
|
356
|
+
distances, indices = self.index.search(q_vector, top_k)
|
|
357
|
+
|
|
358
|
+
PrettyOutput.print(f"查询 {query} 的结果: ", output_type=OutputType.INFO)
|
|
359
|
+
|
|
360
|
+
ret = []
|
|
361
|
+
|
|
362
|
+
for i, distance in zip(indices[0], distances[0]):
|
|
363
|
+
if i == -1: # faiss返回-1表示无效结果
|
|
364
|
+
continue
|
|
365
|
+
|
|
366
|
+
similarity = 1.0 / (1.0 + float(distance))
|
|
367
|
+
PrettyOutput.print(f" {self.file_paths[i]} : 距离 {distance:.3f}, 相似度 {similarity:.3f}",
|
|
368
|
+
output_type=OutputType.INFO)
|
|
369
|
+
|
|
370
|
+
file_path = self.file_paths[i]
|
|
371
|
+
data = self.vector_cache[file_path]
|
|
372
|
+
ret.append((file_path, similarity, data["description"]))
|
|
373
|
+
return ret
|
|
278
374
|
|
|
279
375
|
def ask_codebase(self, query: str, top_k: int = 5) -> List[Tuple[str, float, str]]:
|
|
280
|
-
"""
|
|
281
|
-
# 使用搜索函数获取相似文件
|
|
376
|
+
"""查询代码库"""
|
|
282
377
|
results = self.search_similar(query, top_k)
|
|
283
378
|
PrettyOutput.print(f"找到的关联文件: ", output_type=OutputType.INFO)
|
|
284
379
|
for path, score, _ in results:
|
|
285
|
-
PrettyOutput.print(f"文件: {path} 关联度: {score:.3f}",
|
|
380
|
+
PrettyOutput.print(f"文件: {path} 关联度: {score:.3f}",
|
|
381
|
+
output_type=OutputType.INFO)
|
|
286
382
|
|
|
287
383
|
prompt = f"""你是一个代码专家,请根据以下文件信息回答用户的问题:
|
|
288
384
|
"""
|
|
289
385
|
for path, _, _ in results:
|
|
290
|
-
|
|
291
|
-
|
|
386
|
+
try:
|
|
387
|
+
content = open(path, "r", encoding="utf-8").read()
|
|
388
|
+
prompt += f"""
|
|
292
389
|
文件路径: {path}
|
|
293
390
|
文件内容:
|
|
294
391
|
{content}
|
|
295
392
|
========================================
|
|
296
393
|
"""
|
|
394
|
+
except Exception as e:
|
|
395
|
+
PrettyOutput.print(f"读取文件失败 {path}: {str(e)}",
|
|
396
|
+
output_type=OutputType.ERROR)
|
|
397
|
+
continue
|
|
398
|
+
|
|
297
399
|
prompt += f"""
|
|
298
400
|
用户问题: {query}
|
|
299
401
|
|
|
@@ -301,8 +403,11 @@ class CodeBase:
|
|
|
301
403
|
"""
|
|
302
404
|
model = self.platform_registry.create_platform(self.normal_platform)
|
|
303
405
|
model.set_model_name(self.normal_model)
|
|
304
|
-
|
|
305
|
-
|
|
406
|
+
try:
|
|
407
|
+
response = model.chat(prompt)
|
|
408
|
+
return response
|
|
409
|
+
finally:
|
|
410
|
+
model.delete_chat()
|
|
306
411
|
|
|
307
412
|
|
|
308
413
|
def main():
|
jarvis/jarvis_coder/main.py
CHANGED
|
@@ -1,11 +1,8 @@
|
|
|
1
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
2
|
-
import hashlib
|
|
3
1
|
import os
|
|
4
2
|
import re
|
|
5
|
-
import sqlite3
|
|
6
3
|
import threading
|
|
7
4
|
import time
|
|
8
|
-
from typing import Dict, Any, List,
|
|
5
|
+
from typing import Dict, Any, List, Tuple
|
|
9
6
|
|
|
10
7
|
import yaml
|
|
11
8
|
from jarvis.models.base import BasePlatform
|
jarvis/tools/__init__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: jarvis-ai-assistant
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.67
|
|
4
4
|
Summary: Jarvis: An AI assistant that uses tools to interact with the system
|
|
5
5
|
Home-page: https://github.com/skyfireitdiy/Jarvis
|
|
6
6
|
Author: skyfire
|
|
@@ -123,7 +123,6 @@ Jarvis supports configuration through environment variables that can be set in t
|
|
|
123
123
|
|---------|------|--------|------|
|
|
124
124
|
| JARVIS_PLATFORM | AI platform to use, supports kimi/openai/ai8 etc | kimi | Yes |
|
|
125
125
|
| JARVIS_MODEL | Model name to use | - | No |
|
|
126
|
-
|
|
127
126
|
| JARVIS_CODEGEN_PLATFORM | AI platform for code generation | Same as JARVIS_PLATFORM | No |
|
|
128
127
|
| JARVIS_CODEGEN_MODEL | Model name for code generation | Same as JARVIS_MODEL | No |
|
|
129
128
|
| JARVIS_CHEAP_PLATFORM | AI platform for cheap operations | Same as JARVIS_PLATFORM | No |
|
|
@@ -155,17 +154,17 @@ jarvis -p openai # Use OpenAI platform
|
|
|
155
154
|
|
|
156
155
|
### Code Modification
|
|
157
156
|
```bash
|
|
158
|
-
jarvis
|
|
157
|
+
jarvis-coder --feature "Add new feature" # Modify code to add new feature
|
|
159
158
|
```
|
|
160
159
|
|
|
161
160
|
### Codebase Search
|
|
162
161
|
```bash
|
|
163
|
-
jarvis
|
|
162
|
+
jarvis-codebase --search "database connection" # Search codebase
|
|
164
163
|
```
|
|
165
164
|
|
|
166
165
|
### Codebase Question
|
|
167
166
|
```bash
|
|
168
|
-
jarvis
|
|
167
|
+
jarvis-codebase --ask "How to use the database?" # Ask about codebase
|
|
169
168
|
```
|
|
170
169
|
|
|
171
170
|
### Keep Chat History
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
jarvis/__init__.py,sha256=
|
|
1
|
+
jarvis/__init__.py,sha256=YEUpLs5Xl5YrTW6S6tl7I42rz6TcpyCTJZRczXXMmi0,50
|
|
2
2
|
jarvis/agent.py,sha256=kl6pwNrluzb-9eZKgwmsk5Jh4CpWi4F8B3RvEQNvc5U,14921
|
|
3
3
|
jarvis/main.py,sha256=7EcSlxa5JFFXBujzKDWdNtwX6axLhFFdJMc2GxTjfdk,6295
|
|
4
4
|
jarvis/utils.py,sha256=bjC0PAR58RvcXHgabIFmNmYL1L_GhhiEwMFytWurcN4,7499
|
|
5
5
|
jarvis/jarvis_codebase/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
-
jarvis/jarvis_codebase/main.py,sha256=
|
|
7
|
-
jarvis/jarvis_coder/main.py,sha256=
|
|
6
|
+
jarvis/jarvis_codebase/main.py,sha256=LdJGg4dUaK8naNkGoqF9i53OaYndKGs3jXfZQd4w-3M,18699
|
|
7
|
+
jarvis/jarvis_coder/main.py,sha256=L_i1Zp3UJbc891WrpxKgoD4EzoqvLRnqznmLQP2ZN2U,21721
|
|
8
8
|
jarvis/models/__init__.py,sha256=mrOt67nselz_H1gX9wdAO4y2DY5WPXzABqJbr5Des8k,63
|
|
9
9
|
jarvis/models/ai8.py,sha256=vgy-r_3HHxGMAalZrA65VWHC1PuwBTYgtprSgHkCbrk,12557
|
|
10
10
|
jarvis/models/base.py,sha256=ShV1H8Unee4RMaiFO4idROQA0Hc6wu4dyeRPX5fcszk,1433
|
|
@@ -12,7 +12,7 @@ jarvis/models/kimi.py,sha256=1iTB0Z_WOmCML3Ufsge6jmeKOYvccr7I5lS3JUXymU4,17611
|
|
|
12
12
|
jarvis/models/openai.py,sha256=ayaBWAN5VexMcKVrjEPDNB-Q9wx0sCV9Z4BCrvwYJ9w,4315
|
|
13
13
|
jarvis/models/oyi.py,sha256=X2c5SWDIuQDCCFBcEKbzIWEz3I34eOAi0d1XAFgxlpw,15001
|
|
14
14
|
jarvis/models/registry.py,sha256=hJyaROiOF_TkbtIXsjOD8-ArOvAvtxviawyqBFfLV6s,7617
|
|
15
|
-
jarvis/tools/__init__.py,sha256=
|
|
15
|
+
jarvis/tools/__init__.py,sha256=7Rqyj5hBAv5cWDVr5T9ZTZASO7ssBHeQNm2_4ZARdkA,72
|
|
16
16
|
jarvis/tools/base.py,sha256=EGRGbdfbLXDLwtyoWdvp9rlxNX7bzc20t0Vc2VkwIEY,652
|
|
17
17
|
jarvis/tools/codebase_qa.py,sha256=AEpusYxyWtALVVwPk1DMUH9cVI73mE1e3WFHJXDpXto,2333
|
|
18
18
|
jarvis/tools/coder.py,sha256=ZJfPInKms4Hj3-eQlBwamVsvZ-2nlZ-4jsqJ-tJc6mg,2040
|
|
@@ -24,9 +24,9 @@ jarvis/tools/search.py,sha256=1EqOVvLhg2Csh-i03-XeCrusbyfmH69FZ8khwZt8Tow,6131
|
|
|
24
24
|
jarvis/tools/shell.py,sha256=UPKshPyOaUwTngresUw-ot1jHjQIb4wCY5nkJqa38lU,2520
|
|
25
25
|
jarvis/tools/sub_agent.py,sha256=rEtAmSVY2ZjFOZEKr5m5wpACOQIiM9Zr_3dT92FhXYU,2621
|
|
26
26
|
jarvis/tools/webpage.py,sha256=d3w3Jcjcu1ESciezTkz3n3Zf-rp_l91PrVoDEZnckOo,2391
|
|
27
|
-
jarvis_ai_assistant-0.1.
|
|
28
|
-
jarvis_ai_assistant-0.1.
|
|
29
|
-
jarvis_ai_assistant-0.1.
|
|
30
|
-
jarvis_ai_assistant-0.1.
|
|
31
|
-
jarvis_ai_assistant-0.1.
|
|
32
|
-
jarvis_ai_assistant-0.1.
|
|
27
|
+
jarvis_ai_assistant-0.1.67.dist-info/LICENSE,sha256=AGgVgQmTqFvaztRtCAXsAMryUymB18gZif7_l2e1XOg,1063
|
|
28
|
+
jarvis_ai_assistant-0.1.67.dist-info/METADATA,sha256=Vpf669oYa2bpkQDzfAsgSP0bv0geylGiyuPq6ezarJY,12373
|
|
29
|
+
jarvis_ai_assistant-0.1.67.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
30
|
+
jarvis_ai_assistant-0.1.67.dist-info/entry_points.txt,sha256=QNUeqmUJd7nHufel2FO7cRttS1uKFfnbIyObv8eVyOY,140
|
|
31
|
+
jarvis_ai_assistant-0.1.67.dist-info/top_level.txt,sha256=1BOxyWfzOP_ZXj8rVTDnNCJ92bBGB0rwq8N1PCpoMIs,7
|
|
32
|
+
jarvis_ai_assistant-0.1.67.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{jarvis_ai_assistant-0.1.65.dist-info → jarvis_ai_assistant-0.1.67.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|