npcsh 0.3.31__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- npcsh/_state.py +942 -0
- npcsh/alicanto.py +1074 -0
- npcsh/guac.py +785 -0
- npcsh/mcp_helpers.py +357 -0
- npcsh/mcp_npcsh.py +822 -0
- npcsh/mcp_server.py +184 -0
- npcsh/npc.py +218 -0
- npcsh/npcsh.py +1161 -0
- npcsh/plonk.py +387 -269
- npcsh/pti.py +234 -0
- npcsh/routes.py +958 -0
- npcsh/spool.py +315 -0
- npcsh/wander.py +550 -0
- npcsh/yap.py +573 -0
- npcsh-1.0.0.dist-info/METADATA +596 -0
- npcsh-1.0.0.dist-info/RECORD +21 -0
- {npcsh-0.3.31.dist-info → npcsh-1.0.0.dist-info}/WHEEL +1 -1
- npcsh-1.0.0.dist-info/entry_points.txt +9 -0
- {npcsh-0.3.31.dist-info → npcsh-1.0.0.dist-info}/licenses/LICENSE +1 -1
- npcsh/audio.py +0 -210
- npcsh/cli.py +0 -545
- npcsh/command_history.py +0 -566
- npcsh/conversation.py +0 -291
- npcsh/data_models.py +0 -46
- npcsh/dataframes.py +0 -163
- npcsh/embeddings.py +0 -168
- npcsh/helpers.py +0 -641
- npcsh/image.py +0 -298
- npcsh/image_gen.py +0 -79
- npcsh/knowledge_graph.py +0 -1006
- npcsh/llm_funcs.py +0 -2027
- npcsh/load_data.py +0 -83
- npcsh/main.py +0 -5
- npcsh/model_runner.py +0 -189
- npcsh/npc_compiler.py +0 -2870
- npcsh/npc_sysenv.py +0 -383
- npcsh/npc_team/assembly_lines/test_pipeline.py +0 -181
- npcsh/npc_team/corca.npc +0 -13
- npcsh/npc_team/foreman.npc +0 -7
- npcsh/npc_team/npcsh.ctx +0 -11
- npcsh/npc_team/sibiji.npc +0 -4
- npcsh/npc_team/templates/analytics/celona.npc +0 -0
- npcsh/npc_team/templates/hr_support/raone.npc +0 -0
- npcsh/npc_team/templates/humanities/eriane.npc +0 -4
- npcsh/npc_team/templates/it_support/lineru.npc +0 -0
- npcsh/npc_team/templates/marketing/slean.npc +0 -4
- npcsh/npc_team/templates/philosophy/maurawa.npc +0 -0
- npcsh/npc_team/templates/sales/turnic.npc +0 -4
- npcsh/npc_team/templates/software/welxor.npc +0 -0
- npcsh/npc_team/tools/bash_executer.tool +0 -32
- npcsh/npc_team/tools/calculator.tool +0 -8
- npcsh/npc_team/tools/code_executor.tool +0 -16
- npcsh/npc_team/tools/generic_search.tool +0 -27
- npcsh/npc_team/tools/image_generation.tool +0 -25
- npcsh/npc_team/tools/local_search.tool +0 -149
- npcsh/npc_team/tools/npcsh_executor.tool +0 -9
- npcsh/npc_team/tools/screen_cap.tool +0 -27
- npcsh/npc_team/tools/sql_executor.tool +0 -26
- npcsh/response.py +0 -623
- npcsh/search.py +0 -248
- npcsh/serve.py +0 -1460
- npcsh/shell.py +0 -538
- npcsh/shell_helpers.py +0 -3529
- npcsh/stream.py +0 -700
- npcsh/video.py +0 -49
- npcsh-0.3.31.data/data/npcsh/npc_team/bash_executer.tool +0 -32
- npcsh-0.3.31.data/data/npcsh/npc_team/calculator.tool +0 -8
- npcsh-0.3.31.data/data/npcsh/npc_team/celona.npc +0 -0
- npcsh-0.3.31.data/data/npcsh/npc_team/code_executor.tool +0 -16
- npcsh-0.3.31.data/data/npcsh/npc_team/corca.npc +0 -13
- npcsh-0.3.31.data/data/npcsh/npc_team/eriane.npc +0 -4
- npcsh-0.3.31.data/data/npcsh/npc_team/foreman.npc +0 -7
- npcsh-0.3.31.data/data/npcsh/npc_team/generic_search.tool +0 -27
- npcsh-0.3.31.data/data/npcsh/npc_team/image_generation.tool +0 -25
- npcsh-0.3.31.data/data/npcsh/npc_team/lineru.npc +0 -0
- npcsh-0.3.31.data/data/npcsh/npc_team/local_search.tool +0 -149
- npcsh-0.3.31.data/data/npcsh/npc_team/maurawa.npc +0 -0
- npcsh-0.3.31.data/data/npcsh/npc_team/npcsh.ctx +0 -11
- npcsh-0.3.31.data/data/npcsh/npc_team/npcsh_executor.tool +0 -9
- npcsh-0.3.31.data/data/npcsh/npc_team/raone.npc +0 -0
- npcsh-0.3.31.data/data/npcsh/npc_team/screen_cap.tool +0 -27
- npcsh-0.3.31.data/data/npcsh/npc_team/sibiji.npc +0 -4
- npcsh-0.3.31.data/data/npcsh/npc_team/slean.npc +0 -4
- npcsh-0.3.31.data/data/npcsh/npc_team/sql_executor.tool +0 -26
- npcsh-0.3.31.data/data/npcsh/npc_team/test_pipeline.py +0 -181
- npcsh-0.3.31.data/data/npcsh/npc_team/turnic.npc +0 -4
- npcsh-0.3.31.data/data/npcsh/npc_team/welxor.npc +0 -0
- npcsh-0.3.31.dist-info/METADATA +0 -1853
- npcsh-0.3.31.dist-info/RECORD +0 -76
- npcsh-0.3.31.dist-info/entry_points.txt +0 -3
- {npcsh-0.3.31.dist-info → npcsh-1.0.0.dist-info}/top_level.txt +0 -0
npcsh/conversation.py
DELETED
|
@@ -1,291 +0,0 @@
|
|
|
1
|
-
########
|
|
2
|
-
########
|
|
3
|
-
########
|
|
4
|
-
########
|
|
5
|
-
######## CONVERSATION
|
|
6
|
-
########
|
|
7
|
-
from typing import Any, Dict, Generator, List
|
|
8
|
-
import os
|
|
9
|
-
import anthropic
|
|
10
|
-
|
|
11
|
-
from openai import OpenAI
|
|
12
|
-
from google.generativeai import types
|
|
13
|
-
import google.generativeai as genai
|
|
14
|
-
from .npc_sysenv import get_system_message
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def get_ollama_conversation(
|
|
18
|
-
messages: List[Dict[str, str]],
|
|
19
|
-
model: str,
|
|
20
|
-
npc: Any = None,
|
|
21
|
-
tools: list = None,
|
|
22
|
-
images=None,
|
|
23
|
-
**kwargs,
|
|
24
|
-
) -> List[Dict[str, str]]:
|
|
25
|
-
"""
|
|
26
|
-
Function Description:
|
|
27
|
-
This function generates a conversation using the Ollama API.
|
|
28
|
-
Args:
|
|
29
|
-
messages (List[Dict[str, str]]): The list of messages in the conversation.
|
|
30
|
-
model (str): The model to use for the conversation.
|
|
31
|
-
Keyword Args:
|
|
32
|
-
npc (Any): The NPC object.
|
|
33
|
-
Returns:
|
|
34
|
-
List[Dict[str, str]]: The list of messages in the conversation.
|
|
35
|
-
"""
|
|
36
|
-
import ollama
|
|
37
|
-
|
|
38
|
-
messages_copy = messages.copy()
|
|
39
|
-
if messages_copy[0]["role"] != "system":
|
|
40
|
-
if npc is not None:
|
|
41
|
-
system_message = get_system_message(npc)
|
|
42
|
-
messages_copy.insert(0, {"role": "system", "content": system_message})
|
|
43
|
-
|
|
44
|
-
response = ollama.chat(model=model, messages=messages_copy)
|
|
45
|
-
messages_copy.append(response["message"])
|
|
46
|
-
return messages_copy
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def get_openai_conversation(
|
|
50
|
-
messages: List[Dict[str, str]],
|
|
51
|
-
model: str,
|
|
52
|
-
npc: Any = None,
|
|
53
|
-
tools: list = None,
|
|
54
|
-
api_key: str = None,
|
|
55
|
-
images=None,
|
|
56
|
-
**kwargs,
|
|
57
|
-
) -> List[Dict[str, str]]:
|
|
58
|
-
"""
|
|
59
|
-
Function Description:
|
|
60
|
-
This function generates a conversation using the OpenAI API.
|
|
61
|
-
Args:
|
|
62
|
-
messages (List[Dict[str, str]]): The list of messages in the conversation.
|
|
63
|
-
model (str): The model to use for the conversation.
|
|
64
|
-
Keyword Args:
|
|
65
|
-
npc (Any): The NPC object.
|
|
66
|
-
api_key (str): The API key for accessing the OpenAI API.
|
|
67
|
-
Returns:
|
|
68
|
-
List[Dict[str, str]]: The list of messages in the conversation.
|
|
69
|
-
"""
|
|
70
|
-
|
|
71
|
-
try:
|
|
72
|
-
if api_key is None:
|
|
73
|
-
api_key = os.environ["OPENAI_API_KEY"]
|
|
74
|
-
client = OpenAI(api_key=api_key)
|
|
75
|
-
|
|
76
|
-
system_message = (
|
|
77
|
-
get_system_message(npc) if npc else "You are a helpful assistant."
|
|
78
|
-
)
|
|
79
|
-
|
|
80
|
-
if messages is None:
|
|
81
|
-
messages = []
|
|
82
|
-
|
|
83
|
-
# Ensure the system message is at the beginning
|
|
84
|
-
if not any(msg["role"] == "system" for msg in messages):
|
|
85
|
-
messages.insert(0, {"role": "system", "content": system_message})
|
|
86
|
-
|
|
87
|
-
# messages should already include the user's latest message
|
|
88
|
-
|
|
89
|
-
# Make the API call with the messages including the latest user input
|
|
90
|
-
completion = client.chat.completions.create(
|
|
91
|
-
model=model, messages=messages, **kwargs
|
|
92
|
-
)
|
|
93
|
-
|
|
94
|
-
response_message = completion.choices[0].message
|
|
95
|
-
messages.append({"role": "assistant", "content": response_message.content})
|
|
96
|
-
|
|
97
|
-
return messages
|
|
98
|
-
|
|
99
|
-
except Exception as e:
|
|
100
|
-
return f"Error interacting with OpenAI: {e}"
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
def get_openai_like_conversation(
|
|
104
|
-
messages: List[Dict[str, str]],
|
|
105
|
-
model: str,
|
|
106
|
-
api_url: str,
|
|
107
|
-
npc: Any = None,
|
|
108
|
-
images=None,
|
|
109
|
-
tools: list = None,
|
|
110
|
-
api_key: str = None,
|
|
111
|
-
**kwargs,
|
|
112
|
-
) -> List[Dict[str, str]]:
|
|
113
|
-
"""
|
|
114
|
-
Function Description:
|
|
115
|
-
This function generates a conversation using an OpenAI-like API.
|
|
116
|
-
Args:
|
|
117
|
-
messages (List[Dict[str, str]]): The list of messages in the conversation.
|
|
118
|
-
model (str): The model to use for the conversation.
|
|
119
|
-
Keyword Args:
|
|
120
|
-
npc (Any): The NPC object.
|
|
121
|
-
api_url (str): The URL of the API endpoint.
|
|
122
|
-
api_key (str): The API key for accessing the API.
|
|
123
|
-
Returns:
|
|
124
|
-
List[Dict[str, str]]: The list of messages in the conversation.
|
|
125
|
-
"""
|
|
126
|
-
|
|
127
|
-
if api_url is None:
|
|
128
|
-
raise ValueError("api_url is required for openai-like provider")
|
|
129
|
-
if api_key is None:
|
|
130
|
-
api_key = "dummy_api_key"
|
|
131
|
-
try:
|
|
132
|
-
client = OpenAI(api_key=api_key, base_url=api_url)
|
|
133
|
-
|
|
134
|
-
system_message = (
|
|
135
|
-
get_system_message(npc) if npc else "You are a helpful assistant."
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
if messages is None:
|
|
139
|
-
messages = []
|
|
140
|
-
|
|
141
|
-
# Ensure the system message is at the beginning
|
|
142
|
-
if not any(msg["role"] == "system" for msg in messages):
|
|
143
|
-
messages.insert(0, {"role": "system", "content": system_message})
|
|
144
|
-
|
|
145
|
-
# messages should already include the user's latest message
|
|
146
|
-
|
|
147
|
-
# Make the API call with the messages including the latest user input
|
|
148
|
-
|
|
149
|
-
completion = client.chat.completions.create(
|
|
150
|
-
model=model, messages=messages, **kwargs
|
|
151
|
-
)
|
|
152
|
-
response_message = completion.choices[0].message
|
|
153
|
-
messages.append({"role": "assistant", "content": response_message.content})
|
|
154
|
-
|
|
155
|
-
return messages
|
|
156
|
-
|
|
157
|
-
except Exception as e:
|
|
158
|
-
return f"Error interacting with OpenAI: {e}"
|
|
159
|
-
|
|
160
|
-
return messages
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
def get_anthropic_conversation(
|
|
164
|
-
messages: List[Dict[str, str]],
|
|
165
|
-
model: str,
|
|
166
|
-
npc: Any = None,
|
|
167
|
-
tools: list = None,
|
|
168
|
-
images=None,
|
|
169
|
-
api_key: str = None,
|
|
170
|
-
**kwargs,
|
|
171
|
-
) -> List[Dict[str, str]]:
|
|
172
|
-
"""
|
|
173
|
-
Function Description:
|
|
174
|
-
This function generates a conversation using the Anthropic API.
|
|
175
|
-
Args:
|
|
176
|
-
messages (List[Dict[str, str]]): The list of messages in the conversation.
|
|
177
|
-
model (str): The model to use for the conversation.
|
|
178
|
-
Keyword Args:
|
|
179
|
-
npc (Any): The NPC object.
|
|
180
|
-
api_key (str): The API key for accessing the Anthropic API.
|
|
181
|
-
Returns:
|
|
182
|
-
List[Dict[str, str]]: The list of messages in the conversation.
|
|
183
|
-
"""
|
|
184
|
-
|
|
185
|
-
try:
|
|
186
|
-
if api_key is None:
|
|
187
|
-
api_key = os.getenv("ANTHROPIC_API_KEY", None)
|
|
188
|
-
system_message = get_system_message(npc) if npc else ""
|
|
189
|
-
client = anthropic.Anthropic(api_key=api_key)
|
|
190
|
-
last_user_message = None
|
|
191
|
-
for msg in reversed(messages):
|
|
192
|
-
if msg["role"] == "user":
|
|
193
|
-
last_user_message = msg["content"]
|
|
194
|
-
break
|
|
195
|
-
|
|
196
|
-
if last_user_message is None:
|
|
197
|
-
raise ValueError("No user message found in the conversation history.")
|
|
198
|
-
|
|
199
|
-
# if a sys message is in messages, remove it
|
|
200
|
-
if messages[0]["role"] == "system":
|
|
201
|
-
messages.pop(0)
|
|
202
|
-
|
|
203
|
-
message = client.messages.create(
|
|
204
|
-
model=model,
|
|
205
|
-
system=system_message, # Include system message in each turn for Anthropic
|
|
206
|
-
messages=messages, # Send only the last user message
|
|
207
|
-
max_tokens=8192,
|
|
208
|
-
**kwargs,
|
|
209
|
-
)
|
|
210
|
-
|
|
211
|
-
messages.append({"role": "assistant", "content": message.content[0].text})
|
|
212
|
-
|
|
213
|
-
return messages
|
|
214
|
-
|
|
215
|
-
except Exception as e:
|
|
216
|
-
return f"Error interacting with Anthropic conversations: {e}"
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
def get_gemini_conversation(
|
|
220
|
-
messages: List[Dict[str, str]],
|
|
221
|
-
model: str,
|
|
222
|
-
npc: Any = None,
|
|
223
|
-
tools: list = None,
|
|
224
|
-
api_key: str = None,
|
|
225
|
-
) -> List[Dict[str, str]]:
|
|
226
|
-
"""
|
|
227
|
-
Function Description:
|
|
228
|
-
This function generates a conversation using the Gemini API.
|
|
229
|
-
Args:
|
|
230
|
-
messages (List[Dict[str, str]]): The list of messages in the conversation.
|
|
231
|
-
model (str): The model to use for the conversation.
|
|
232
|
-
Keyword Args:
|
|
233
|
-
npc (Any): The NPC object.
|
|
234
|
-
Returns:
|
|
235
|
-
List[Dict[str, str]]: The list of messages in the conversation.
|
|
236
|
-
"""
|
|
237
|
-
# Make the API call to Gemini
|
|
238
|
-
|
|
239
|
-
# print(messages)
|
|
240
|
-
response = get_gemini_response(
|
|
241
|
-
messages[-1]["content"], model, messages=messages[1:], npc=npc
|
|
242
|
-
)
|
|
243
|
-
# print(response)
|
|
244
|
-
return response.get("messages", [])
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
def get_deepseek_conversation(
|
|
248
|
-
messages: List[Dict[str, str]],
|
|
249
|
-
model: str,
|
|
250
|
-
npc: Any = None,
|
|
251
|
-
tools: list = None,
|
|
252
|
-
api_key: str = None,
|
|
253
|
-
) -> List[Dict[str, str]]:
|
|
254
|
-
"""
|
|
255
|
-
Function Description:
|
|
256
|
-
This function generates a conversation using the DeepSeek API.
|
|
257
|
-
Args:
|
|
258
|
-
messages (List[Dict[str, str]]): The list of messages in the conversation.
|
|
259
|
-
model (str): The model to use for the conversation.
|
|
260
|
-
Keyword Args:
|
|
261
|
-
npc (Any): The NPC object.
|
|
262
|
-
Returns:
|
|
263
|
-
List[Dict[str, str]]: The list of messages in the conversation.
|
|
264
|
-
"""
|
|
265
|
-
|
|
266
|
-
system_message = get_system_message(npc) if npc else "You are a helpful assistant."
|
|
267
|
-
|
|
268
|
-
# Prepare the messages list
|
|
269
|
-
if messages is None or len(messages) == 0:
|
|
270
|
-
messages = [{"role": "system", "content": system_message}]
|
|
271
|
-
elif not any(msg["role"] == "system" for msg in messages):
|
|
272
|
-
messages.insert(0, {"role": "system", "content": system_message})
|
|
273
|
-
|
|
274
|
-
# Make the API call to DeepSeek
|
|
275
|
-
try:
|
|
276
|
-
response = get_deepseek_response(
|
|
277
|
-
messages[-1]["content"], model, messages=messages, npc=npc
|
|
278
|
-
)
|
|
279
|
-
messages.append(
|
|
280
|
-
{"role": "assistant", "content": response.get("response", "No response")}
|
|
281
|
-
)
|
|
282
|
-
|
|
283
|
-
except Exception as e:
|
|
284
|
-
messages.append(
|
|
285
|
-
{
|
|
286
|
-
"role": "assistant",
|
|
287
|
-
"content": f"Error interacting with DeepSeek: {str(e)}",
|
|
288
|
-
}
|
|
289
|
-
)
|
|
290
|
-
|
|
291
|
-
return messages
|
npcsh/data_models.py
DELETED
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
from pydantic import BaseModel
|
|
2
|
-
from typing import List, Dict
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class NPC(BaseModel):
|
|
6
|
-
name: str
|
|
7
|
-
primary_directive: str
|
|
8
|
-
model: str
|
|
9
|
-
provider: str
|
|
10
|
-
api_url: str
|
|
11
|
-
tools: List[str]
|
|
12
|
-
use_default_tools: bool
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class Tool(BaseModel):
|
|
16
|
-
tool_name: str
|
|
17
|
-
description: str
|
|
18
|
-
steps: List[Dict[str, str]]
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class ToolStep(BaseModel):
|
|
22
|
-
engine: str
|
|
23
|
-
code: str
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class Context(BaseModel):
|
|
27
|
-
databases: List[str]
|
|
28
|
-
files: List[str]
|
|
29
|
-
vars: List[Dict[str, str]]
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class Pipeline(BaseModel):
|
|
33
|
-
steps: List[Dict[str, str]]
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class PipelineStep(BaseModel):
|
|
37
|
-
tool: str
|
|
38
|
-
args: List[str]
|
|
39
|
-
model: str
|
|
40
|
-
provider: str
|
|
41
|
-
task: str
|
|
42
|
-
npc: str
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
class Fabrication(BaseModel):
|
|
46
|
-
spell: str
|
npcsh/dataframes.py
DELETED
|
@@ -1,163 +0,0 @@
|
|
|
1
|
-
## functions for dataframes
|
|
2
|
-
import os
|
|
3
|
-
import sqlite3
|
|
4
|
-
import json
|
|
5
|
-
import pandas as pd
|
|
6
|
-
import numpy as np
|
|
7
|
-
import io
|
|
8
|
-
from PIL import Image
|
|
9
|
-
from typing import Optional
|
|
10
|
-
|
|
11
|
-
from .llm_funcs import get_llm_response
|
|
12
|
-
from .audio import process_audio
|
|
13
|
-
from .video import process_video
|
|
14
|
-
|
|
15
|
-
from .load_data import load_pdf, load_csv, load_json, load_excel, load_txt, load_image
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def load_data_into_table(
|
|
19
|
-
file_path: str, table_name: str, cursor: sqlite3.Cursor, conn: sqlite3.Connection
|
|
20
|
-
) -> None:
|
|
21
|
-
"""
|
|
22
|
-
Function Description:
|
|
23
|
-
This function is used to load data into a table.
|
|
24
|
-
Args:
|
|
25
|
-
file_path : str : The file path.
|
|
26
|
-
table_name : str : The table name.
|
|
27
|
-
cursor : sqlite3.Cursor : The SQLite cursor.
|
|
28
|
-
conn : sqlite3.Connection : The SQLite connection.
|
|
29
|
-
Keyword Args:
|
|
30
|
-
None
|
|
31
|
-
Returns:
|
|
32
|
-
None
|
|
33
|
-
"""
|
|
34
|
-
try:
|
|
35
|
-
if not os.path.exists(file_path):
|
|
36
|
-
raise FileNotFoundError(f"File not found: {file_path}")
|
|
37
|
-
|
|
38
|
-
# Determine file type and load data
|
|
39
|
-
if file_path.endswith(".csv"):
|
|
40
|
-
df = pd.read_csv(file_path)
|
|
41
|
-
elif file_path.endswith(".pdf"):
|
|
42
|
-
df = load_pdf(file_path)
|
|
43
|
-
elif file_path.endswith((".txt", ".log", ".md")):
|
|
44
|
-
df = load_txt(file_path)
|
|
45
|
-
elif file_path.endswith((".xls", ".xlsx")):
|
|
46
|
-
df = load_excel(file_path)
|
|
47
|
-
elif file_path.lower().endswith(
|
|
48
|
-
(".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff")
|
|
49
|
-
):
|
|
50
|
-
# Handle images as NumPy arrays
|
|
51
|
-
df = load_image(file_path)
|
|
52
|
-
elif file_path.lower().endswith(
|
|
53
|
-
(".mp4", ".avi", ".mov", ".mkv")
|
|
54
|
-
): # Video files
|
|
55
|
-
video_frames, audio_array = process_video(file_path)
|
|
56
|
-
# Store video frames and audio
|
|
57
|
-
df = pd.DataFrame(
|
|
58
|
-
{
|
|
59
|
-
"video_frames": [video_frames.tobytes()],
|
|
60
|
-
"shape": [video_frames.shape],
|
|
61
|
-
"dtype": [video_frames.dtype.str],
|
|
62
|
-
"audio_array": (
|
|
63
|
-
[audio_array.tobytes()] if audio_array is not None else None
|
|
64
|
-
),
|
|
65
|
-
"audio_rate": [sr] if audio_array is not None else None,
|
|
66
|
-
}
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
elif file_path.lower().endswith((".mp3", ".wav", ".ogg")): # Audio files
|
|
70
|
-
audio_array, sr = process_audio(file_path)
|
|
71
|
-
df = pd.DataFrame(
|
|
72
|
-
{
|
|
73
|
-
"audio_array": [audio_array.tobytes()],
|
|
74
|
-
"audio_rate": [sr],
|
|
75
|
-
}
|
|
76
|
-
)
|
|
77
|
-
else:
|
|
78
|
-
# Attempt to load as text if no other type matches
|
|
79
|
-
try:
|
|
80
|
-
df = load_txt(file_path)
|
|
81
|
-
except Exception as e:
|
|
82
|
-
print(f"Could not load file: {e}")
|
|
83
|
-
return
|
|
84
|
-
|
|
85
|
-
# Store DataFrame in the database
|
|
86
|
-
df.to_sql(table_name, conn, if_exists="replace", index=False)
|
|
87
|
-
print(f"Data from '{file_path}' loaded into table '{table_name}'")
|
|
88
|
-
|
|
89
|
-
except Exception as e:
|
|
90
|
-
raise e # Re-raise the exception for handling in enter_observation_mode
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def create_new_table(cursor: sqlite3.Cursor, conn: sqlite3.Connection) -> None:
|
|
94
|
-
"""
|
|
95
|
-
Function Description:
|
|
96
|
-
This function is used to create a new table.
|
|
97
|
-
Args:
|
|
98
|
-
cursor : sqlite3.Cursor : The SQLite cursor.
|
|
99
|
-
conn : sqlite3.Connection : The SQLite connection.
|
|
100
|
-
Keyword Args:
|
|
101
|
-
None
|
|
102
|
-
Returns:
|
|
103
|
-
None
|
|
104
|
-
"""
|
|
105
|
-
|
|
106
|
-
table_name = input("Enter new table name: ").strip()
|
|
107
|
-
columns = input("Enter column names separated by commas: ").strip()
|
|
108
|
-
|
|
109
|
-
create_query = (
|
|
110
|
-
f"CREATE TABLE {table_name} (id INTEGER PRIMARY KEY AUTOINCREMENT, {columns})"
|
|
111
|
-
)
|
|
112
|
-
cursor.execute(create_query)
|
|
113
|
-
conn.commit()
|
|
114
|
-
print(f"Table '{table_name}' created successfully.")
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def delete_table(cursor: sqlite3.Cursor, conn: sqlite3.Connection) -> None:
|
|
118
|
-
"""
|
|
119
|
-
Function Description:
|
|
120
|
-
This function is used to delete a table.
|
|
121
|
-
Args:
|
|
122
|
-
cursor : sqlite3.Cursor : The SQLite cursor.
|
|
123
|
-
conn : sqlite3.Connection : The SQLite connection.
|
|
124
|
-
Keyword Args:
|
|
125
|
-
None
|
|
126
|
-
Returns:
|
|
127
|
-
None
|
|
128
|
-
"""
|
|
129
|
-
|
|
130
|
-
table_name = input("Enter table name to delete: ").strip()
|
|
131
|
-
cursor.execute(f"DROP TABLE IF EXISTS {table_name}")
|
|
132
|
-
conn.commit()
|
|
133
|
-
print(f"Table '{table_name}' deleted successfully.")
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
def add_observation(
|
|
137
|
-
cursor: sqlite3.Cursor, conn: sqlite3.Connection, table_name: str
|
|
138
|
-
) -> None:
|
|
139
|
-
"""
|
|
140
|
-
Function Description:
|
|
141
|
-
This function is used to add an observation.
|
|
142
|
-
Args:
|
|
143
|
-
cursor : sqlite3.Cursor : The SQLite cursor.
|
|
144
|
-
conn : sqlite3.Connection : The SQLite connection.
|
|
145
|
-
table_name : str : The table name.
|
|
146
|
-
Keyword Args:
|
|
147
|
-
None
|
|
148
|
-
Returns:
|
|
149
|
-
None
|
|
150
|
-
"""
|
|
151
|
-
|
|
152
|
-
cursor.execute(f"PRAGMA table_info({table_name})")
|
|
153
|
-
columns = [column[1] for column in cursor.fetchall() if column[1] != "id"]
|
|
154
|
-
|
|
155
|
-
values = []
|
|
156
|
-
for column in columns:
|
|
157
|
-
value = input(f"Enter value for {column}: ").strip()
|
|
158
|
-
values.append(value)
|
|
159
|
-
|
|
160
|
-
insert_query = f"INSERT INTO {table_name} ({','.join(columns)}) VALUES ({','.join(['?' for _ in columns])})"
|
|
161
|
-
cursor.execute(insert_query, values)
|
|
162
|
-
conn.commit()
|
|
163
|
-
print("Observation added successfully.")
|
npcsh/embeddings.py
DELETED
|
@@ -1,168 +0,0 @@
|
|
|
1
|
-
#######
|
|
2
|
-
#######
|
|
3
|
-
#######
|
|
4
|
-
#######
|
|
5
|
-
####### EMBEDDINGS
|
|
6
|
-
#######
|
|
7
|
-
from typing import List, Dict, Optional
|
|
8
|
-
import numpy as np
|
|
9
|
-
from npcsh.npc_sysenv import (
|
|
10
|
-
NPCSH_VECTOR_DB_PATH,
|
|
11
|
-
NPCSH_EMBEDDING_MODEL,
|
|
12
|
-
NPCSH_EMBEDDING_PROVIDER,
|
|
13
|
-
chroma_client,
|
|
14
|
-
)
|
|
15
|
-
from openai import OpenAI
|
|
16
|
-
import anthropic
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def get_ollama_embeddings(
|
|
20
|
-
texts: List[str], model: str = "nomic-embed-text"
|
|
21
|
-
) -> List[List[float]]:
|
|
22
|
-
"""Generate embeddings using Ollama."""
|
|
23
|
-
import ollama
|
|
24
|
-
|
|
25
|
-
embeddings = []
|
|
26
|
-
for text in texts:
|
|
27
|
-
response = ollama.embeddings(model=model, prompt=text)
|
|
28
|
-
embeddings.append(response["embedding"])
|
|
29
|
-
return embeddings
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def get_openai_embeddings(
|
|
33
|
-
texts: List[str], model: str = "text-embedding-3-small"
|
|
34
|
-
) -> List[List[float]]:
|
|
35
|
-
"""Generate embeddings using OpenAI."""
|
|
36
|
-
client = OpenAI(api_key=openai_api_key)
|
|
37
|
-
response = client.embeddings.create(input=texts, model=model)
|
|
38
|
-
return [embedding.embedding for embedding in response.data]
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def get_openai_like_embeddings(
|
|
42
|
-
texts: List[str], model, api_url=None, api_key=None
|
|
43
|
-
) -> List[List[float]]:
|
|
44
|
-
"""Generate embeddings using OpenAI."""
|
|
45
|
-
client = OpenAI(api_key=openai_api_key, base_url=api_url)
|
|
46
|
-
response = client.embeddings.create(input=texts, model=model)
|
|
47
|
-
return [embedding.embedding for embedding in response.data]
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def get_anthropic_embeddings(
|
|
51
|
-
texts: List[str], model: str = "claude-3-haiku-20240307"
|
|
52
|
-
) -> List[List[float]]:
|
|
53
|
-
"""Generate embeddings using Anthropic."""
|
|
54
|
-
client = anthropic.Anthropic(api_key=anthropic_api_key)
|
|
55
|
-
embeddings = []
|
|
56
|
-
for text in texts:
|
|
57
|
-
# response = client.messages.create(
|
|
58
|
-
# model=model, max_tokens=1024, messages=[{"role": "user", "content": text}]
|
|
59
|
-
# )
|
|
60
|
-
# Placeholder for actual embedding
|
|
61
|
-
embeddings.append([0.0] * 1024) # Replace with actual embedding when available
|
|
62
|
-
return embeddings
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
def store_embeddings_for_model(
|
|
66
|
-
texts,
|
|
67
|
-
embeddings,
|
|
68
|
-
metadata=None,
|
|
69
|
-
model: str = NPCSH_EMBEDDING_MODEL,
|
|
70
|
-
provider: str = NPCSH_EMBEDDING_PROVIDER,
|
|
71
|
-
):
|
|
72
|
-
collection_name = f"{provider}_{model}_embeddings"
|
|
73
|
-
collection = chroma_client.get_collection(collection_name)
|
|
74
|
-
|
|
75
|
-
# Create meaningful metadata for each document (adjust as necessary)
|
|
76
|
-
if metadata is None:
|
|
77
|
-
metadata = [{"text_length": len(text)} for text in texts] # Example metadata
|
|
78
|
-
print(
|
|
79
|
-
"metadata is none, creating metadata for each document as the length of the text"
|
|
80
|
-
)
|
|
81
|
-
# Add embeddings to the collection with metadata
|
|
82
|
-
collection.add(
|
|
83
|
-
ids=[str(i) for i in range(len(texts))],
|
|
84
|
-
embeddings=embeddings,
|
|
85
|
-
metadatas=metadata, # Passing populated metadata
|
|
86
|
-
documents=texts,
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def delete_embeddings_from_collection(collection, ids):
|
|
91
|
-
"""Delete embeddings by id from Chroma collection."""
|
|
92
|
-
if ids:
|
|
93
|
-
collection.delete(ids=ids) # Only delete if ids are provided
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
def search_similar_texts(
|
|
97
|
-
query: str,
|
|
98
|
-
docs_to_embed: Optional[List[str]] = None,
|
|
99
|
-
top_k: int = 5,
|
|
100
|
-
db_path: str = NPCSH_VECTOR_DB_PATH,
|
|
101
|
-
embedding_model: str = NPCSH_EMBEDDING_MODEL,
|
|
102
|
-
embedding_provider: str = NPCSH_EMBEDDING_PROVIDER,
|
|
103
|
-
) -> List[Dict[str, any]]:
|
|
104
|
-
"""
|
|
105
|
-
Search for similar texts using either a Chroma database or direct embedding comparison.
|
|
106
|
-
"""
|
|
107
|
-
|
|
108
|
-
print(f"\nQuery to embed: {query}")
|
|
109
|
-
embedded_search_term = get_ollama_embeddings([query], embedding_model)[0]
|
|
110
|
-
# print(f"Query embedding: {embedded_search_term}")
|
|
111
|
-
|
|
112
|
-
if docs_to_embed is None:
|
|
113
|
-
# Fetch from the database if no documents to embed are provided
|
|
114
|
-
collection_name = f"{embedding_provider}_{embedding_model}_embeddings"
|
|
115
|
-
collection = chroma_client.get_collection(collection_name)
|
|
116
|
-
results = collection.query(
|
|
117
|
-
query_embeddings=[embedded_search_term], n_results=top_k
|
|
118
|
-
)
|
|
119
|
-
# Constructing and returning results
|
|
120
|
-
return [
|
|
121
|
-
{"id": id, "score": float(distance), "text": document}
|
|
122
|
-
for id, distance, document in zip(
|
|
123
|
-
results["ids"][0], results["distances"][0], results["documents"][0]
|
|
124
|
-
)
|
|
125
|
-
]
|
|
126
|
-
|
|
127
|
-
print(f"\nNumber of documents to embed: {len(docs_to_embed)}")
|
|
128
|
-
|
|
129
|
-
# Get embeddings for provided documents
|
|
130
|
-
raw_embeddings = get_ollama_embeddings(docs_to_embed, embedding_model)
|
|
131
|
-
|
|
132
|
-
output_embeddings = []
|
|
133
|
-
for idx, emb in enumerate(raw_embeddings):
|
|
134
|
-
if emb: # Exclude any empty embeddings
|
|
135
|
-
output_embeddings.append(emb)
|
|
136
|
-
|
|
137
|
-
# Convert to numpy arrays for calculations
|
|
138
|
-
doc_embeddings = np.array(output_embeddings)
|
|
139
|
-
query_embedding = np.array(embedded_search_term)
|
|
140
|
-
|
|
141
|
-
# Check for zero-length embeddings
|
|
142
|
-
if len(doc_embeddings) == 0:
|
|
143
|
-
raise ValueError("No valid document embeddings found")
|
|
144
|
-
|
|
145
|
-
# Normalize embeddings to avoid division by zeros
|
|
146
|
-
doc_norms = np.linalg.norm(doc_embeddings, axis=1, keepdims=True)
|
|
147
|
-
query_norm = np.linalg.norm(query_embedding)
|
|
148
|
-
|
|
149
|
-
# Ensure no zero vectors are being used in cosine similarity
|
|
150
|
-
if query_norm == 0:
|
|
151
|
-
raise ValueError("Query embedding is zero-length")
|
|
152
|
-
|
|
153
|
-
# Calculate cosine similarities
|
|
154
|
-
cosine_similarities = np.dot(doc_embeddings, query_embedding) / (
|
|
155
|
-
doc_norms.flatten() * query_norm
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
# Get indices of top K documents
|
|
159
|
-
top_indices = np.argsort(cosine_similarities)[::-1][:top_k]
|
|
160
|
-
|
|
161
|
-
return [
|
|
162
|
-
{
|
|
163
|
-
"id": str(idx),
|
|
164
|
-
"score": float(cosine_similarities[idx]),
|
|
165
|
-
"text": docs_to_embed[idx],
|
|
166
|
-
}
|
|
167
|
-
for idx in top_indices
|
|
168
|
-
]
|