jarvis-ai-assistant 0.1.64__tar.gz → 0.1.65__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jarvis-ai-assistant might be problematic. Click here for more details.
- {jarvis_ai_assistant-0.1.64/src/jarvis_ai_assistant.egg-info → jarvis_ai_assistant-0.1.65}/PKG-INFO +38 -3
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/README.md +34 -2
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/pyproject.toml +6 -2
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/setup.py +6 -2
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/__init__.py +1 -1
- jarvis_ai_assistant-0.1.65/src/jarvis/jarvis_codebase/__init__.py +0 -0
- jarvis_ai_assistant-0.1.65/src/jarvis/jarvis_codebase/main.py +342 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/jarvis_coder/main.py +24 -361
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/main.py +0 -2
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/models/ai8.py +1 -2
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/models/openai.py +0 -1
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/models/oyi.py +1 -4
- jarvis_ai_assistant-0.1.65/src/jarvis/tools/__init__.py +9 -0
- jarvis_ai_assistant-0.1.65/src/jarvis/tools/codebase_qa.py +70 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/utils.py +7 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65/src/jarvis_ai_assistant.egg-info}/PKG-INFO +38 -3
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis_ai_assistant.egg-info/SOURCES.txt +3 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis_ai_assistant.egg-info/entry_points.txt +1 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis_ai_assistant.egg-info/requires.txt +3 -0
- jarvis_ai_assistant-0.1.64/src/jarvis/tools/__init__.py +0 -5
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/LICENSE +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/MANIFEST.in +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/setup.cfg +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/agent.py +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/models/__init__.py +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/models/base.py +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/models/kimi.py +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/models/registry.py +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/tools/base.py +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/tools/coder.py +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/tools/file_ops.py +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/tools/generator.py +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/tools/methodology.py +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/tools/registry.py +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/tools/search.py +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/tools/shell.py +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/tools/sub_agent.py +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis/tools/webpage.py +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis_ai_assistant.egg-info/dependency_links.txt +0 -0
- {jarvis_ai_assistant-0.1.64 → jarvis_ai_assistant-0.1.65}/src/jarvis_ai_assistant.egg-info/top_level.txt +0 -0
{jarvis_ai_assistant-0.1.64/src/jarvis_ai_assistant.egg-info → jarvis_ai_assistant-0.1.65}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: jarvis-ai-assistant
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.65
|
|
4
4
|
Summary: Jarvis: An AI assistant that uses tools to interact with the system
|
|
5
5
|
Home-page: https://github.com/skyfireitdiy/Jarvis
|
|
6
6
|
Author: skyfire
|
|
@@ -44,6 +44,9 @@ Requires-Dist: colorama>=0.4.6
|
|
|
44
44
|
Requires-Dist: prompt_toolkit>=3.0.0
|
|
45
45
|
Requires-Dist: openai>=1.20.0
|
|
46
46
|
Requires-Dist: playwright>=1.41.1
|
|
47
|
+
Requires-Dist: numpy>=1.26.0
|
|
48
|
+
Requires-Dist: faiss-cpu>=1.8.1
|
|
49
|
+
Requires-Dist: sentence-transformers>=2.2.2
|
|
47
50
|
Provides-Extra: dev
|
|
48
51
|
Requires-Dist: pytest; extra == "dev"
|
|
49
52
|
Requires-Dist: black; extra == "dev"
|
|
@@ -120,8 +123,12 @@ Jarvis supports configuration through environment variables that can be set in t
|
|
|
120
123
|
|---------|------|--------|------|
|
|
121
124
|
| JARVIS_PLATFORM | AI platform to use, supports kimi/openai/ai8 etc | kimi | Yes |
|
|
122
125
|
| JARVIS_MODEL | Model name to use | - | No |
|
|
126
|
+
|
|
123
127
|
| JARVIS_CODEGEN_PLATFORM | AI platform for code generation | Same as JARVIS_PLATFORM | No |
|
|
124
128
|
| JARVIS_CODEGEN_MODEL | Model name for code generation | Same as JARVIS_MODEL | No |
|
|
129
|
+
| JARVIS_CHEAP_PLATFORM | AI platform for cheap operations | Same as JARVIS_PLATFORM | No |
|
|
130
|
+
| JARVIS_CHEAP_MODEL | Model name for cheap operations | Same as JARVIS_MODEL | No |
|
|
131
|
+
| JARVIS_EMBEDDING_MODEL | Embedding model for code analysis | BAAI/bge-large-zh-v1.5 | No |
|
|
125
132
|
| OPENAI_API_KEY | API key for OpenAI platform | - | Required for OpenAI |
|
|
126
133
|
| OPENAI_API_BASE | Base URL for OpenAI API | https://api.deepseek.com | No |
|
|
127
134
|
| OPENAI_MODEL_NAME | Model name for OpenAI | deepseek-chat | No |
|
|
@@ -139,15 +146,26 @@ Jarvis supports configuration through environment variables that can be set in t
|
|
|
139
146
|
jarvis
|
|
140
147
|
```
|
|
141
148
|
|
|
149
|
+
|
|
142
150
|
### With Specific Model
|
|
143
151
|
```bash
|
|
144
152
|
jarvis -p kimi # Use Kimi platform
|
|
145
153
|
jarvis -p openai # Use OpenAI platform
|
|
146
154
|
```
|
|
147
155
|
|
|
148
|
-
###
|
|
156
|
+
### Code Modification
|
|
157
|
+
```bash
|
|
158
|
+
jarvis coder --feature "Add new feature" # Modify code to add new feature
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### Codebase Search
|
|
149
162
|
```bash
|
|
150
|
-
jarvis
|
|
163
|
+
jarvis codebase --search "database connection" # Search codebase
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Codebase Question
|
|
167
|
+
```bash
|
|
168
|
+
jarvis codebase --ask "How to use the database?" # Ask about codebase
|
|
151
169
|
```
|
|
152
170
|
|
|
153
171
|
### Keep Chat History
|
|
@@ -157,6 +175,7 @@ jarvis --keep-history # Don't delete chat session after completion
|
|
|
157
175
|
|
|
158
176
|
## 🛠️ Tools
|
|
159
177
|
|
|
178
|
+
|
|
160
179
|
### Built-in Tools
|
|
161
180
|
|
|
162
181
|
| Tool | Description |
|
|
@@ -166,27 +185,43 @@ jarvis --keep-history # Don't delete chat session after completion
|
|
|
166
185
|
| generate_tool | AI-powered tool generation and integration |
|
|
167
186
|
| methodology | Experience accumulation and methodology management |
|
|
168
187
|
| create_sub_agent | Create specialized sub-agents for specific tasks |
|
|
188
|
+
| coder | Automatic code modification and generation tool |
|
|
189
|
+
| codebase | Codebase management and search tool |
|
|
169
190
|
|
|
170
191
|
### Tool Locations
|
|
171
192
|
- Built-in tools: `src/jarvis/tools/`
|
|
172
193
|
- User tools: `~/.jarvis_tools/`
|
|
173
194
|
|
|
195
|
+
|
|
174
196
|
### Key Features
|
|
175
197
|
|
|
176
198
|
#### 1. Self-Extending Capabilities
|
|
177
199
|
- Tool generation through natural language description
|
|
178
200
|
- Automatic code generation and integration
|
|
179
201
|
- Dynamic capability expansion through sub-agents
|
|
202
|
+
- Automatic code modification with version control
|
|
203
|
+
- Codebase indexing and semantic search
|
|
180
204
|
|
|
181
205
|
#### 2. Methodology Learning
|
|
182
206
|
- Automatic experience accumulation from interactions
|
|
183
207
|
- Pattern recognition and methodology extraction
|
|
184
208
|
- Continuous refinement through usage
|
|
209
|
+
- Code modification history tracking
|
|
210
|
+
- Codebase analysis and documentation generation
|
|
185
211
|
|
|
186
212
|
#### 3. Adaptive Problem Solving
|
|
187
213
|
- Context-aware sub-agent creation
|
|
188
214
|
- Dynamic tool composition
|
|
189
215
|
- Learning from execution feedback
|
|
216
|
+
- Codebase-aware problem solving
|
|
217
|
+
- Multi-model collaboration for complex tasks
|
|
218
|
+
|
|
219
|
+
#### 4. Code Intelligence
|
|
220
|
+
- Automatic codebase indexing
|
|
221
|
+
- Semantic code search
|
|
222
|
+
- Code modification with git integration
|
|
223
|
+
- Code analysis and documentation
|
|
224
|
+
- Multi-model code generation
|
|
190
225
|
|
|
191
226
|
## 🎯 Extending Jarvis
|
|
192
227
|
|
|
@@ -65,8 +65,12 @@ Jarvis supports configuration through environment variables that can be set in t
|
|
|
65
65
|
|---------|------|--------|------|
|
|
66
66
|
| JARVIS_PLATFORM | AI platform to use, supports kimi/openai/ai8 etc | kimi | Yes |
|
|
67
67
|
| JARVIS_MODEL | Model name to use | - | No |
|
|
68
|
+
|
|
68
69
|
| JARVIS_CODEGEN_PLATFORM | AI platform for code generation | Same as JARVIS_PLATFORM | No |
|
|
69
70
|
| JARVIS_CODEGEN_MODEL | Model name for code generation | Same as JARVIS_MODEL | No |
|
|
71
|
+
| JARVIS_CHEAP_PLATFORM | AI platform for cheap operations | Same as JARVIS_PLATFORM | No |
|
|
72
|
+
| JARVIS_CHEAP_MODEL | Model name for cheap operations | Same as JARVIS_MODEL | No |
|
|
73
|
+
| JARVIS_EMBEDDING_MODEL | Embedding model for code analysis | BAAI/bge-large-zh-v1.5 | No |
|
|
70
74
|
| OPENAI_API_KEY | API key for OpenAI platform | - | Required for OpenAI |
|
|
71
75
|
| OPENAI_API_BASE | Base URL for OpenAI API | https://api.deepseek.com | No |
|
|
72
76
|
| OPENAI_MODEL_NAME | Model name for OpenAI | deepseek-chat | No |
|
|
@@ -84,15 +88,26 @@ Jarvis supports configuration through environment variables that can be set in t
|
|
|
84
88
|
jarvis
|
|
85
89
|
```
|
|
86
90
|
|
|
91
|
+
|
|
87
92
|
### With Specific Model
|
|
88
93
|
```bash
|
|
89
94
|
jarvis -p kimi # Use Kimi platform
|
|
90
95
|
jarvis -p openai # Use OpenAI platform
|
|
91
96
|
```
|
|
92
97
|
|
|
93
|
-
###
|
|
98
|
+
### Code Modification
|
|
99
|
+
```bash
|
|
100
|
+
jarvis coder --feature "Add new feature" # Modify code to add new feature
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Codebase Search
|
|
94
104
|
```bash
|
|
95
|
-
jarvis
|
|
105
|
+
jarvis codebase --search "database connection" # Search codebase
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Codebase Question
|
|
109
|
+
```bash
|
|
110
|
+
jarvis codebase --ask "How to use the database?" # Ask about codebase
|
|
96
111
|
```
|
|
97
112
|
|
|
98
113
|
### Keep Chat History
|
|
@@ -102,6 +117,7 @@ jarvis --keep-history # Don't delete chat session after completion
|
|
|
102
117
|
|
|
103
118
|
## 🛠️ Tools
|
|
104
119
|
|
|
120
|
+
|
|
105
121
|
### Built-in Tools
|
|
106
122
|
|
|
107
123
|
| Tool | Description |
|
|
@@ -111,27 +127,43 @@ jarvis --keep-history # Don't delete chat session after completion
|
|
|
111
127
|
| generate_tool | AI-powered tool generation and integration |
|
|
112
128
|
| methodology | Experience accumulation and methodology management |
|
|
113
129
|
| create_sub_agent | Create specialized sub-agents for specific tasks |
|
|
130
|
+
| coder | Automatic code modification and generation tool |
|
|
131
|
+
| codebase | Codebase management and search tool |
|
|
114
132
|
|
|
115
133
|
### Tool Locations
|
|
116
134
|
- Built-in tools: `src/jarvis/tools/`
|
|
117
135
|
- User tools: `~/.jarvis_tools/`
|
|
118
136
|
|
|
137
|
+
|
|
119
138
|
### Key Features
|
|
120
139
|
|
|
121
140
|
#### 1. Self-Extending Capabilities
|
|
122
141
|
- Tool generation through natural language description
|
|
123
142
|
- Automatic code generation and integration
|
|
124
143
|
- Dynamic capability expansion through sub-agents
|
|
144
|
+
- Automatic code modification with version control
|
|
145
|
+
- Codebase indexing and semantic search
|
|
125
146
|
|
|
126
147
|
#### 2. Methodology Learning
|
|
127
148
|
- Automatic experience accumulation from interactions
|
|
128
149
|
- Pattern recognition and methodology extraction
|
|
129
150
|
- Continuous refinement through usage
|
|
151
|
+
- Code modification history tracking
|
|
152
|
+
- Codebase analysis and documentation generation
|
|
130
153
|
|
|
131
154
|
#### 3. Adaptive Problem Solving
|
|
132
155
|
- Context-aware sub-agent creation
|
|
133
156
|
- Dynamic tool composition
|
|
134
157
|
- Learning from execution feedback
|
|
158
|
+
- Codebase-aware problem solving
|
|
159
|
+
- Multi-model collaboration for complex tasks
|
|
160
|
+
|
|
161
|
+
#### 4. Code Intelligence
|
|
162
|
+
- Automatic codebase indexing
|
|
163
|
+
- Semantic code search
|
|
164
|
+
- Code modification with git integration
|
|
165
|
+
- Code analysis and documentation
|
|
166
|
+
- Multi-model code generation
|
|
135
167
|
|
|
136
168
|
## 🎯 Extending Jarvis
|
|
137
169
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "jarvis-ai-assistant"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.65"
|
|
8
8
|
description = "Jarvis: An AI assistant that uses tools to interact with the system"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [{ name = "Your Name", email = "your.email@example.com" }]
|
|
@@ -26,6 +26,9 @@ dependencies = [
|
|
|
26
26
|
"prompt_toolkit>=3.0.0",
|
|
27
27
|
"openai>=1.20.0",
|
|
28
28
|
"playwright>=1.41.1",
|
|
29
|
+
"numpy>=1.26.0",
|
|
30
|
+
"faiss-cpu>=1.8.1",
|
|
31
|
+
"sentence-transformers>=2.2.2",
|
|
29
32
|
]
|
|
30
33
|
requires-python = ">=3.8"
|
|
31
34
|
|
|
@@ -37,4 +40,5 @@ Homepage = "https://github.com/skyfireitdiy/Jarvis"
|
|
|
37
40
|
|
|
38
41
|
[project.scripts]
|
|
39
42
|
jarvis = "jarvis.main:main"
|
|
40
|
-
jarvis-coder = "jarvis.jarvis_coder.main:main"
|
|
43
|
+
jarvis-coder = "jarvis.jarvis_coder.main:main"
|
|
44
|
+
jarvis-codebase = "jarvis.jarvis_codebase.main:main"
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="jarvis-ai-assistant",
|
|
5
|
-
version="0.1.
|
|
5
|
+
version="0.1.65",
|
|
6
6
|
author="skyfire",
|
|
7
7
|
author_email="skyfireitdiy@hotmail.com",
|
|
8
8
|
description="An AI assistant that uses various tools to interact with the system",
|
|
@@ -19,11 +19,15 @@ setup(
|
|
|
19
19
|
"prompt_toolkit>=3.0.0",
|
|
20
20
|
"openai>=1.20.0",
|
|
21
21
|
"playwright>=1.41.1",
|
|
22
|
+
"numpy>=1.26.0",
|
|
23
|
+
"faiss-cpu>=1.8.1",
|
|
24
|
+
"sentence-transformers>=2.2.2",
|
|
22
25
|
],
|
|
23
26
|
entry_points={
|
|
24
27
|
"console_scripts": [
|
|
25
28
|
"jarvis=jarvis.main:main",
|
|
26
|
-
"jarvis-coder=jarvis.jarvis_coder.main:main"
|
|
29
|
+
"jarvis-coder=jarvis.jarvis_coder.main:main",
|
|
30
|
+
"jarvis-codebase=jarvis.jarvis_codebase.main:main",
|
|
27
31
|
],
|
|
28
32
|
},
|
|
29
33
|
python_requires=">=3.8",
|
|
File without changes
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import os
|
|
3
|
+
import sqlite3
|
|
4
|
+
import time
|
|
5
|
+
import numpy as np
|
|
6
|
+
import faiss
|
|
7
|
+
from typing import List, Tuple, Optional
|
|
8
|
+
from jarvis.models.registry import PlatformRegistry
|
|
9
|
+
import concurrent.futures
|
|
10
|
+
from threading import Lock
|
|
11
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
12
|
+
from jarvis.utils import OutputType, PrettyOutput, find_git_root
|
|
13
|
+
from jarvis.utils import load_env_from_file
|
|
14
|
+
import argparse
|
|
15
|
+
from sentence_transformers import SentenceTransformer
|
|
16
|
+
|
|
17
|
+
class CodeBase:
|
|
18
|
+
def __init__(self, root_dir: str, thread_count: int = 10):
|
|
19
|
+
load_env_from_file()
|
|
20
|
+
self.root_dir = root_dir
|
|
21
|
+
os.chdir(self.root_dir)
|
|
22
|
+
self.thread_count = thread_count
|
|
23
|
+
self.cheap_platform = os.environ.get("JARVIS_CHEAP_PLATFORM") or os.environ.get("JARVIS_PLATFORM") or "kimi"
|
|
24
|
+
self.cheap_model = os.environ.get("JARVIS_CHEAP_MODEL") or os.environ.get("JARVIS_MODEL") or "kimi"
|
|
25
|
+
self.normal_platform = os.environ.get("JARVIS_PLATFORM") or "kimi"
|
|
26
|
+
self.normal_model = os.environ.get("JARVIS_MODEL") or "kimi"
|
|
27
|
+
self.embedding_model_name = os.environ.get("JARVIS_EMBEDDING_MODEL") or "BAAI/bge-large-zh-v1.5"
|
|
28
|
+
if not self.cheap_platform or not self.cheap_model or not self.embedding_model_name or not self.normal_platform or not self.normal_model:
|
|
29
|
+
raise ValueError("JARVIS_CHEAP_PLATFORM or JARVIS_CHEAP_MODEL or JARVIS_EMBEDDING_MODEL or JARVIS_PLATFORM or JARVIS_MODEL is not set")
|
|
30
|
+
|
|
31
|
+
PrettyOutput.print(f"廉价模型使用平台: {self.cheap_platform} 模型: {self.cheap_model}", output_type=OutputType.INFO)
|
|
32
|
+
PrettyOutput.print(f"分析模型使用平台: {self.normal_platform} 模型: {self.normal_model}", output_type=OutputType.INFO)
|
|
33
|
+
PrettyOutput.print(f"嵌入模型: {self.embedding_model_name}", output_type=OutputType.INFO)
|
|
34
|
+
PrettyOutput.print(f"检索算法:分层导航小世界算法", output_type=OutputType.INFO)
|
|
35
|
+
|
|
36
|
+
# 初始化数据目录
|
|
37
|
+
self.data_dir = os.path.join(self.root_dir, ".jarvis-codebase")
|
|
38
|
+
if not os.path.exists(self.data_dir):
|
|
39
|
+
os.makedirs(self.data_dir)
|
|
40
|
+
|
|
41
|
+
# 初始化嵌入模型,使用系统默认缓存目录
|
|
42
|
+
try:
|
|
43
|
+
PrettyOutput.print("正在加载/下载模型,请稍候...", output_type=OutputType.INFO)
|
|
44
|
+
self.embedding_model = SentenceTransformer(self.embedding_model_name)
|
|
45
|
+
|
|
46
|
+
# 强制完全加载所有模型组件
|
|
47
|
+
test_text = """
|
|
48
|
+
这是一段测试文本,用于确保模型完全加载。
|
|
49
|
+
包含多行内容,以模拟实际使用场景。
|
|
50
|
+
"""
|
|
51
|
+
# 预热模型,确保所有组件都被加载
|
|
52
|
+
self.embedding_model.encode([test_text],
|
|
53
|
+
convert_to_tensor=True,
|
|
54
|
+
normalize_embeddings=True)
|
|
55
|
+
PrettyOutput.print("模型加载完成", output_type=OutputType.SUCCESS)
|
|
56
|
+
except Exception as e:
|
|
57
|
+
PrettyOutput.print(f"加载模型失败: {str(e)}", output_type=OutputType.ERROR)
|
|
58
|
+
raise
|
|
59
|
+
|
|
60
|
+
self.vector_dim = self.embedding_model.get_sentence_embedding_dimension()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
self.db_path = os.path.join(self.data_dir, "codebase.db")
|
|
64
|
+
if not os.path.exists(self.db_path):
|
|
65
|
+
self.create_db()
|
|
66
|
+
self.git_file_list = self.get_git_file_list()
|
|
67
|
+
self.platform_registry = PlatformRegistry().get_global_platform_registry()
|
|
68
|
+
self.index_path = os.path.join(self.data_dir, "vectors.index")
|
|
69
|
+
self.index = None
|
|
70
|
+
if os.path.exists(self.index_path):
|
|
71
|
+
PrettyOutput.print("正在加载向量数据库", output_type=OutputType.INFO)
|
|
72
|
+
self.index = faiss.read_index(self.index_path)
|
|
73
|
+
|
|
74
|
+
def get_git_file_list(self):
|
|
75
|
+
return os.popen("git ls-files").read().splitlines()
|
|
76
|
+
|
|
77
|
+
def get_db_connection(self):
|
|
78
|
+
"""创建并返回一个新的数据库连接"""
|
|
79
|
+
return sqlite3.connect(self.db_path)
|
|
80
|
+
|
|
81
|
+
def clean_db(self) -> bool:
|
|
82
|
+
"""清理数据库和向量索引中的过期记录"""
|
|
83
|
+
db = self.get_db_connection()
|
|
84
|
+
try:
|
|
85
|
+
# 获取所有数据库记录
|
|
86
|
+
all_records = db.execute("SELECT path FROM codebase").fetchall()
|
|
87
|
+
files_to_delete = []
|
|
88
|
+
|
|
89
|
+
# 找出需要删除的文件
|
|
90
|
+
for row in all_records:
|
|
91
|
+
if row[0] not in self.git_file_list:
|
|
92
|
+
files_to_delete.append(row[0])
|
|
93
|
+
|
|
94
|
+
if not files_to_delete:
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
for file_path in files_to_delete:
|
|
98
|
+
db.execute("DELETE FROM codebase WHERE path = ?", (file_path,))
|
|
99
|
+
|
|
100
|
+
db.commit()
|
|
101
|
+
|
|
102
|
+
PrettyOutput.print(f"清理了 {len(files_to_delete)} 个文件的记录",
|
|
103
|
+
output_type=OutputType.INFO)
|
|
104
|
+
return True
|
|
105
|
+
finally:
|
|
106
|
+
db.close()
|
|
107
|
+
|
|
108
|
+
def create_db(self):
|
|
109
|
+
db = self.get_db_connection()
|
|
110
|
+
try:
|
|
111
|
+
db.execute("CREATE TABLE IF NOT EXISTS codebase (path TEXT, md5 TEXT ,description TEXT)")
|
|
112
|
+
db.commit()
|
|
113
|
+
finally:
|
|
114
|
+
db.close()
|
|
115
|
+
|
|
116
|
+
def is_text_file(self, file_path: str):
|
|
117
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
118
|
+
try:
|
|
119
|
+
f.read()
|
|
120
|
+
return True
|
|
121
|
+
except UnicodeDecodeError:
|
|
122
|
+
return False
|
|
123
|
+
|
|
124
|
+
def make_description(self, file_path: str) -> str:
|
|
125
|
+
model = self.platform_registry.create_platform(self.cheap_platform)
|
|
126
|
+
model.set_model_name(self.cheap_model)
|
|
127
|
+
model.set_suppress_output(True)
|
|
128
|
+
content = open(file_path, "r", encoding="utf-8").read()
|
|
129
|
+
prompt = f"""请分析以下代码文件,并生成一个详细的描述。描述应该包含以下要点:
|
|
130
|
+
|
|
131
|
+
1. 主要功能和用途
|
|
132
|
+
2. 关键类和方法的作用
|
|
133
|
+
3. 重要的依赖和技术特征(如使用了什么框架、算法、设计模式等)
|
|
134
|
+
4. 代码处理的主要数据类型和数据结构
|
|
135
|
+
5. 关键业务逻辑和处理流程
|
|
136
|
+
6. 特殊功能点和亮点特性
|
|
137
|
+
|
|
138
|
+
请用简洁专业的语言描述,突出代码的技术特征和功能特点,以便后续进行相似代码检索。
|
|
139
|
+
|
|
140
|
+
文件路径:{file_path}
|
|
141
|
+
代码内容:
|
|
142
|
+
{content}
|
|
143
|
+
"""
|
|
144
|
+
response = model.chat(prompt)
|
|
145
|
+
return response
|
|
146
|
+
|
|
147
|
+
def get_embedding(self, text: str) -> np.ndarray:
|
|
148
|
+
"""使用 transformers 模型获取文本的向量表示"""
|
|
149
|
+
# 对长文本进行截断
|
|
150
|
+
max_length = 512 # 或其他合适的长度
|
|
151
|
+
text = ' '.join(text.split()[:max_length])
|
|
152
|
+
|
|
153
|
+
# 获取嵌入向量
|
|
154
|
+
embedding = self.embedding_model.encode(text,
|
|
155
|
+
normalize_embeddings=True, # L2归一化
|
|
156
|
+
show_progress_bar=False)
|
|
157
|
+
return np.array(embedding, dtype=np.float32)
|
|
158
|
+
|
|
159
|
+
def vectorize_file(self, file_path: str, description: str) -> np.ndarray:
|
|
160
|
+
"""将文件内容和描述向量化"""
|
|
161
|
+
try:
|
|
162
|
+
# 组合文件信息
|
|
163
|
+
combined_text = f"""
|
|
164
|
+
文件路径: {file_path}
|
|
165
|
+
文件描述: {description}
|
|
166
|
+
"""
|
|
167
|
+
return self.get_embedding(combined_text)
|
|
168
|
+
except Exception as e:
|
|
169
|
+
PrettyOutput.print(f"Error vectorizing file {file_path}: {str(e)}",
|
|
170
|
+
output_type=OutputType.ERROR)
|
|
171
|
+
return np.zeros(self.vector_dim, dtype=np.float32)
|
|
172
|
+
|
|
173
|
+
def process_file(self, file):
|
|
174
|
+
"""处理单个文件的辅助方法"""
|
|
175
|
+
db = self.get_db_connection()
|
|
176
|
+
try:
|
|
177
|
+
if not self.is_text_file(file):
|
|
178
|
+
return None
|
|
179
|
+
md5 = hashlib.md5(open(file, "rb").read()).hexdigest()
|
|
180
|
+
if db.execute("SELECT path FROM codebase WHERE md5 = ?", (md5,)).fetchone():
|
|
181
|
+
return None
|
|
182
|
+
description = self.make_description(file)
|
|
183
|
+
return (file, md5, description)
|
|
184
|
+
finally:
|
|
185
|
+
db.close()
|
|
186
|
+
|
|
187
|
+
def gen_vector_db_from_sqlite(self):
|
|
188
|
+
self.index = faiss.IndexHNSWFlat(self.vector_dim, 16)
|
|
189
|
+
self.index.hnsw.efConstruction = 40
|
|
190
|
+
self.index.hnsw.efSearch = 16
|
|
191
|
+
db = self.get_db_connection()
|
|
192
|
+
try:
|
|
193
|
+
all_records = db.execute("SELECT path, description FROM codebase").fetchall()
|
|
194
|
+
for row in all_records:
|
|
195
|
+
file, description = row
|
|
196
|
+
PrettyOutput.print(f"正在向量化文件: {file}", output_type=OutputType.INFO)
|
|
197
|
+
vector = self.vectorize_file(file, description)
|
|
198
|
+
vector = vector.reshape(1, -1)
|
|
199
|
+
self.index.add(vector)
|
|
200
|
+
faiss.write_index(self.index, self.index_path)
|
|
201
|
+
finally:
|
|
202
|
+
db.close()
|
|
203
|
+
|
|
204
|
+
def generate_codebase(self):
|
|
205
|
+
updated =self.clean_db()
|
|
206
|
+
db_lock = Lock()
|
|
207
|
+
processed_files = [] # 用于跟踪已处理的文件
|
|
208
|
+
|
|
209
|
+
def process_and_save(file):
|
|
210
|
+
result = self.process_file(file)
|
|
211
|
+
if result:
|
|
212
|
+
file, md5, description = result
|
|
213
|
+
db = self.get_db_connection()
|
|
214
|
+
try:
|
|
215
|
+
with db_lock:
|
|
216
|
+
db.execute("DELETE FROM codebase WHERE path = ?", (file,))
|
|
217
|
+
db.execute("INSERT INTO codebase (path, md5, description) VALUES (?, ?, ?)",
|
|
218
|
+
(file, md5, description))
|
|
219
|
+
db.commit()
|
|
220
|
+
PrettyOutput.print(f"索引文件: {file}", output_type=OutputType.INFO)
|
|
221
|
+
processed_files.append(file)
|
|
222
|
+
finally:
|
|
223
|
+
db.close()
|
|
224
|
+
|
|
225
|
+
# 使用 ThreadPoolExecutor 并等待所有任务完成
|
|
226
|
+
with ThreadPoolExecutor(max_workers=self.thread_count) as executor:
|
|
227
|
+
futures = [executor.submit(process_and_save, file) for file in self.git_file_list]
|
|
228
|
+
# 等待所有任务完成
|
|
229
|
+
concurrent.futures.wait(futures)
|
|
230
|
+
|
|
231
|
+
if updated or len(processed_files) > 0:
|
|
232
|
+
PrettyOutput.print("有新的文件被删除或添加,正在重新生成向量数据库", output_type=OutputType.INFO)
|
|
233
|
+
self.gen_vector_db_from_sqlite()
|
|
234
|
+
else:
|
|
235
|
+
PrettyOutput.print("没有新的文件被删除或添加,跳过向量数据库生成", output_type=OutputType.INFO)
|
|
236
|
+
|
|
237
|
+
PrettyOutput.print(f"成功索引 {len(processed_files)} 个文件", output_type=OutputType.INFO)
|
|
238
|
+
|
|
239
|
+
def search_similar(self, query: str, top_k: int = 5) -> List[Tuple[str, float, str]]:
|
|
240
|
+
"""搜索与查询最相似的文件
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
query: 查询文本
|
|
244
|
+
top_k: 返回结果数量
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
List of (file_path, similarity_score, description) tuples
|
|
248
|
+
"""
|
|
249
|
+
# 获取查询文本的向量表示
|
|
250
|
+
query_vector = self.get_embedding(query)
|
|
251
|
+
query_vector = query_vector.reshape(1, -1)
|
|
252
|
+
|
|
253
|
+
# 搜索最相似的向量
|
|
254
|
+
distances, indices = self.index.search(query_vector, top_k)
|
|
255
|
+
|
|
256
|
+
# 获取对应的文件信息
|
|
257
|
+
db = self.get_db_connection()
|
|
258
|
+
try:
|
|
259
|
+
results = []
|
|
260
|
+
for i, distance in zip(indices[0], distances[0]):
|
|
261
|
+
if i == -1: # faiss返回-1表示无效结果
|
|
262
|
+
continue
|
|
263
|
+
|
|
264
|
+
# 将numpy.int64转换为Python int
|
|
265
|
+
offset = int(i)
|
|
266
|
+
# 获取文件路径和描述
|
|
267
|
+
cursor = db.execute("SELECT path, description FROM codebase LIMIT 1 OFFSET ?", (offset,))
|
|
268
|
+
row = cursor.fetchone()
|
|
269
|
+
if row:
|
|
270
|
+
path, description = row
|
|
271
|
+
# 将distance转换为相似度分数(0-1之间)
|
|
272
|
+
similarity = 1.0 / (1.0 + float(distance)) # 确保使用Python float
|
|
273
|
+
results.append((path, similarity, description))
|
|
274
|
+
|
|
275
|
+
return results
|
|
276
|
+
finally:
|
|
277
|
+
db.close()
|
|
278
|
+
|
|
279
|
+
def ask_codebase(self, query: str, top_k: int = 5) -> List[Tuple[str, float, str]]:
|
|
280
|
+
"""Ask a question about the codebase"""
|
|
281
|
+
# 使用搜索函数获取相似文件
|
|
282
|
+
results = self.search_similar(query, top_k)
|
|
283
|
+
PrettyOutput.print(f"找到的关联文件: ", output_type=OutputType.INFO)
|
|
284
|
+
for path, score, _ in results:
|
|
285
|
+
PrettyOutput.print(f"文件: {path} 关联度: {score:.3f}", output_type=OutputType.INFO)
|
|
286
|
+
|
|
287
|
+
prompt = f"""你是一个代码专家,请根据以下文件信息回答用户的问题:
|
|
288
|
+
"""
|
|
289
|
+
for path, _, _ in results:
|
|
290
|
+
content = open(path, "r", encoding="utf-8").read()
|
|
291
|
+
prompt += f"""
|
|
292
|
+
文件路径: {path}
|
|
293
|
+
文件内容:
|
|
294
|
+
{content}
|
|
295
|
+
========================================
|
|
296
|
+
"""
|
|
297
|
+
prompt += f"""
|
|
298
|
+
用户问题: {query}
|
|
299
|
+
|
|
300
|
+
请用专业的语言回答用户的问题,如果给出的文件内容不足以回答用户的问题,请告诉用户,绝对不要胡编乱造。
|
|
301
|
+
"""
|
|
302
|
+
model = self.platform_registry.create_platform(self.normal_platform)
|
|
303
|
+
model.set_model_name(self.normal_model)
|
|
304
|
+
response = model.chat(prompt)
|
|
305
|
+
return response
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def main():
|
|
309
|
+
parser = argparse.ArgumentParser(description='Codebase management and search tool')
|
|
310
|
+
parser.add_argument('--search', type=str, help='Search query to find similar code files')
|
|
311
|
+
parser.add_argument('--top-k', type=int, default=5, help='Number of results to return (default: 5)')
|
|
312
|
+
parser.add_argument('--ask', type=str, help='Ask a question about the codebase')
|
|
313
|
+
args = parser.parse_args()
|
|
314
|
+
|
|
315
|
+
current_dir = find_git_root()
|
|
316
|
+
codebase = CodeBase(current_dir)
|
|
317
|
+
|
|
318
|
+
try:
|
|
319
|
+
codebase.generate_codebase()
|
|
320
|
+
PrettyOutput.print("\nCodebase generation completed", output_type=OutputType.SUCCESS)
|
|
321
|
+
except Exception as e:
|
|
322
|
+
PrettyOutput.print(f"Error during codebase generation: {str(e)}", output_type=OutputType.ERROR)
|
|
323
|
+
|
|
324
|
+
if args.search:
|
|
325
|
+
results = codebase.search_similar(args.search, args.top_k)
|
|
326
|
+
if not results:
|
|
327
|
+
PrettyOutput.print("No similar files found", output_type=OutputType.WARNING)
|
|
328
|
+
return
|
|
329
|
+
|
|
330
|
+
PrettyOutput.print("\nSearch Results:", output_type=OutputType.INFO)
|
|
331
|
+
for path, score, desc in results:
|
|
332
|
+
PrettyOutput.print("\n" + "="*50, output_type=OutputType.INFO)
|
|
333
|
+
PrettyOutput.print(f"File: {path}", output_type=OutputType.INFO)
|
|
334
|
+
PrettyOutput.print(f"Similarity: {score:.3f}", output_type=OutputType.INFO)
|
|
335
|
+
PrettyOutput.print(f"Description: {desc[100:]}", output_type=OutputType.INFO)
|
|
336
|
+
|
|
337
|
+
if args.ask:
|
|
338
|
+
codebase.ask_codebase(args.ask, args.top_k)
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
if __name__ == "__main__":
|
|
342
|
+
exit(main())
|