npcpy 1.1.28__py3-none-any.whl → 1.2.32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- npcpy/data/audio.py +16 -38
- npcpy/data/image.py +29 -29
- npcpy/data/load.py +4 -3
- npcpy/data/text.py +28 -28
- npcpy/data/video.py +6 -6
- npcpy/data/web.py +49 -21
- npcpy/ft/__init__.py +0 -0
- npcpy/ft/diff.py +110 -0
- npcpy/ft/ge.py +115 -0
- npcpy/ft/memory_trainer.py +171 -0
- npcpy/ft/model_ensembler.py +357 -0
- npcpy/ft/rl.py +360 -0
- npcpy/ft/sft.py +248 -0
- npcpy/ft/usft.py +128 -0
- npcpy/gen/audio_gen.py +24 -0
- npcpy/gen/embeddings.py +13 -13
- npcpy/gen/image_gen.py +37 -15
- npcpy/gen/response.py +287 -111
- npcpy/gen/video_gen.py +10 -9
- npcpy/llm_funcs.py +447 -79
- npcpy/memory/command_history.py +201 -48
- npcpy/memory/kg_vis.py +74 -74
- npcpy/memory/knowledge_graph.py +482 -115
- npcpy/memory/memory_processor.py +81 -0
- npcpy/memory/search.py +70 -70
- npcpy/mix/debate.py +192 -3
- npcpy/npc_compiler.py +1541 -879
- npcpy/npc_sysenv.py +250 -78
- npcpy/serve.py +1036 -321
- npcpy/sql/ai_function_tools.py +257 -0
- npcpy/sql/database_ai_adapters.py +186 -0
- npcpy/sql/database_ai_functions.py +163 -0
- npcpy/sql/model_runner.py +19 -19
- npcpy/sql/npcsql.py +706 -507
- npcpy/sql/sql_model_compiler.py +156 -0
- npcpy/tools.py +20 -20
- npcpy/work/plan.py +8 -8
- npcpy/work/trigger.py +3 -3
- {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/METADATA +169 -9
- npcpy-1.2.32.dist-info/RECORD +54 -0
- npcpy-1.1.28.dist-info/RECORD +0 -40
- {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/WHEEL +0 -0
- {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/licenses/LICENSE +0 -0
- {npcpy-1.1.28.dist-info → npcpy-1.2.32.dist-info}/top_level.txt +0 -0
npcpy/data/audio.py
CHANGED
|
@@ -25,7 +25,7 @@ try:
|
|
|
25
25
|
RATE = 16000
|
|
26
26
|
CHUNK = 512
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
|
|
29
29
|
is_speaking = False
|
|
30
30
|
should_stop_speaking = False
|
|
31
31
|
tts_sequence = 0
|
|
@@ -35,12 +35,12 @@ try:
|
|
|
35
35
|
last_speech_time = 0
|
|
36
36
|
running = True
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
|
|
39
39
|
audio_queue = queue.Queue()
|
|
40
40
|
tts_queue = queue.PriorityQueue()
|
|
41
41
|
cleanup_files = []
|
|
42
42
|
|
|
43
|
-
|
|
43
|
+
|
|
44
44
|
pygame.mixer.quit()
|
|
45
45
|
pygame.mixer.init(frequency=44100, size=-16, channels=2, buffer=512)
|
|
46
46
|
except:
|
|
@@ -49,7 +49,7 @@ except:
|
|
|
49
49
|
|
|
50
50
|
def convert_mp3_to_wav(mp3_file, wav_file):
|
|
51
51
|
try:
|
|
52
|
-
|
|
52
|
+
|
|
53
53
|
if os.path.exists(wav_file):
|
|
54
54
|
os.remove(wav_file)
|
|
55
55
|
|
|
@@ -79,7 +79,7 @@ def convert_mp3_to_wav(mp3_file, wav_file):
|
|
|
79
79
|
raise
|
|
80
80
|
|
|
81
81
|
|
|
82
|
-
|
|
82
|
+
|
|
83
83
|
def check_ffmpeg():
|
|
84
84
|
try:
|
|
85
85
|
subprocess.run(
|
|
@@ -98,7 +98,7 @@ def get_context_string():
|
|
|
98
98
|
return "\n".join(context)
|
|
99
99
|
|
|
100
100
|
|
|
101
|
-
|
|
101
|
+
|
|
102
102
|
def cleanup_temp_files():
|
|
103
103
|
global cleanup_files
|
|
104
104
|
for file in list(cleanup_files):
|
|
@@ -175,7 +175,7 @@ def run_transcription(audio_np):
|
|
|
175
175
|
return None
|
|
176
176
|
|
|
177
177
|
|
|
178
|
-
|
|
178
|
+
|
|
179
179
|
def load_history():
|
|
180
180
|
global history
|
|
181
181
|
try:
|
|
@@ -216,7 +216,7 @@ def get_context_string():
|
|
|
216
216
|
return "\n".join(context)
|
|
217
217
|
|
|
218
218
|
|
|
219
|
-
|
|
219
|
+
|
|
220
220
|
def cleanup_temp_files():
|
|
221
221
|
global cleanup_files
|
|
222
222
|
for file in list(cleanup_files):
|
|
@@ -255,7 +255,7 @@ def audio_callback(in_data, frame_count, time_info, status):
|
|
|
255
255
|
return (in_data, pyaudio.paContinue)
|
|
256
256
|
|
|
257
257
|
|
|
258
|
-
|
|
258
|
+
|
|
259
259
|
def play_audio_from_queue():
|
|
260
260
|
global is_speaking, cleanup_files, should_stop_speaking
|
|
261
261
|
next_sequence = 0
|
|
@@ -349,7 +349,7 @@ import uuid
|
|
|
349
349
|
|
|
350
350
|
def create_and_queue_audio(text, state):
|
|
351
351
|
"""Create and queue audio with state awareness for TTS/recording coordination"""
|
|
352
|
-
|
|
352
|
+
|
|
353
353
|
state["tts_is_speaking"] = True
|
|
354
354
|
|
|
355
355
|
if not text.strip():
|
|
@@ -368,12 +368,12 @@ def create_and_queue_audio(text, state):
|
|
|
368
368
|
|
|
369
369
|
convert_mp3_to_wav(mp3_file, wav_file)
|
|
370
370
|
|
|
371
|
-
|
|
371
|
+
|
|
372
372
|
play_audio(wav_file, state)
|
|
373
373
|
except Exception as e:
|
|
374
374
|
print(f"Error in TTS process: {e}")
|
|
375
375
|
finally:
|
|
376
|
-
|
|
376
|
+
|
|
377
377
|
state["tts_is_speaking"] = False
|
|
378
378
|
state["tts_just_finished"] = True
|
|
379
379
|
|
|
@@ -387,7 +387,7 @@ def create_and_queue_audio(text, state):
|
|
|
387
387
|
|
|
388
388
|
def play_audio(filename, state):
|
|
389
389
|
"""Play audio with state awareness for TTS/recording coordination"""
|
|
390
|
-
CHUNK = 4096
|
|
390
|
+
CHUNK = 4096
|
|
391
391
|
|
|
392
392
|
wf = wave.open(filename, "rb")
|
|
393
393
|
p = pyaudio.PyAudio()
|
|
@@ -401,8 +401,8 @@ def play_audio(filename, state):
|
|
|
401
401
|
|
|
402
402
|
data = wf.readframes(CHUNK)
|
|
403
403
|
|
|
404
|
-
|
|
405
|
-
while data and state["running"]:
|
|
404
|
+
|
|
405
|
+
while data and state["running"]:
|
|
406
406
|
stream.write(data)
|
|
407
407
|
data = wf.readframes(CHUNK)
|
|
408
408
|
|
|
@@ -425,32 +425,10 @@ def process_response_chunk(text_chunk):
|
|
|
425
425
|
|
|
426
426
|
|
|
427
427
|
def process_text_for_tts(text):
|
|
428
|
-
text = re.sub(r"[*<>{}()\[\]
|
|
428
|
+
text = re.sub(r"[*<>{}()\[\]&%")
|
|
429
429
|
text = text.strip()
|
|
430
430
|
text = re.sub(r"(\w)\.(\w)\.", r"\1 \2 ", text)
|
|
431
431
|
text = re.sub(r"([.!?])(\w)", r"\1 \2", text)
|
|
432
432
|
return text
|
|
433
433
|
|
|
434
434
|
|
|
435
|
-
"""
|
|
436
|
-
|
|
437
|
-
To use this code, you'll need to have the following dependencies installed:
|
|
438
|
-
|
|
439
|
-
```bash
|
|
440
|
-
pip install numpy torch torchaudio faster-whisper pygame pyaudio gtts ollama
|
|
441
|
-
```
|
|
442
|
-
|
|
443
|
-
And optionally FFmpeg for audio speed adjustment:
|
|
444
|
-
```bash
|
|
445
|
-
# On Ubuntu/Debian
|
|
446
|
-
sudo apt-get install ffmpeg
|
|
447
|
-
|
|
448
|
-
# On MacOS with Homebrew
|
|
449
|
-
brew install ffmpeg
|
|
450
|
-
|
|
451
|
-
# On Windows with Chocolatey
|
|
452
|
-
choco install ffmpeg
|
|
453
|
-
```
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
"""
|
npcpy/data/image.py
CHANGED
|
@@ -11,25 +11,25 @@ from PIL import Image
|
|
|
11
11
|
def _windows_snip_to_file(file_path: str) -> bool:
|
|
12
12
|
"""Helper function to trigger Windows snipping and save to file."""
|
|
13
13
|
try:
|
|
14
|
-
|
|
14
|
+
|
|
15
15
|
import win32clipboard
|
|
16
16
|
from PIL import ImageGrab
|
|
17
17
|
from ctypes import windll
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
windll.user32.keybd_event(0x5B, 0, 0, 0)
|
|
21
|
-
windll.user32.keybd_event(0x10, 0, 0, 0)
|
|
22
|
-
windll.user32.keybd_event(0x53, 0, 0, 0)
|
|
23
|
-
windll.user32.keybd_event(0x53, 0, 0x0002, 0)
|
|
24
|
-
windll.user32.keybd_event(0x10, 0, 0x0002, 0)
|
|
25
|
-
windll.user32.keybd_event(0x5B, 0, 0x0002, 0)
|
|
19
|
+
|
|
20
|
+
windll.user32.keybd_event(0x5B, 0, 0, 0)
|
|
21
|
+
windll.user32.keybd_event(0x10, 0, 0, 0)
|
|
22
|
+
windll.user32.keybd_event(0x53, 0, 0, 0)
|
|
23
|
+
windll.user32.keybd_event(0x53, 0, 0x0002, 0)
|
|
24
|
+
windll.user32.keybd_event(0x10, 0, 0x0002, 0)
|
|
25
|
+
windll.user32.keybd_event(0x5B, 0, 0x0002, 0)
|
|
26
26
|
|
|
27
|
-
|
|
27
|
+
|
|
28
28
|
print("Please select an area to capture...")
|
|
29
|
-
time.sleep(1)
|
|
29
|
+
time.sleep(1)
|
|
30
30
|
|
|
31
|
-
|
|
32
|
-
max_wait = 30
|
|
31
|
+
|
|
32
|
+
max_wait = 30
|
|
33
33
|
start_time = time.time()
|
|
34
34
|
|
|
35
35
|
while time.time() - start_time < max_wait:
|
|
@@ -60,7 +60,7 @@ def capture_screenshot( full=False) -> Dict[str, str]:
|
|
|
60
60
|
Returns:
|
|
61
61
|
A dictionary containing the filename, file path, and model kwargs.
|
|
62
62
|
"""
|
|
63
|
-
|
|
63
|
+
|
|
64
64
|
|
|
65
65
|
directory = os.path.expanduser("~/.npcsh/screenshots")
|
|
66
66
|
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
|
@@ -71,7 +71,7 @@ def capture_screenshot( full=False) -> Dict[str, str]:
|
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
|
|
74
|
-
|
|
74
|
+
|
|
75
75
|
|
|
76
76
|
system = platform.system()
|
|
77
77
|
|
|
@@ -79,11 +79,11 @@ def capture_screenshot( full=False) -> Dict[str, str]:
|
|
|
79
79
|
|
|
80
80
|
|
|
81
81
|
if full:
|
|
82
|
-
|
|
82
|
+
|
|
83
83
|
if system.lower() == "darwin":
|
|
84
|
-
|
|
84
|
+
|
|
85
85
|
subprocess.run(["screencapture", file_path], capture_output=True)
|
|
86
|
-
|
|
86
|
+
|
|
87
87
|
elif system == "Linux":
|
|
88
88
|
if (
|
|
89
89
|
subprocess.run(
|
|
@@ -102,33 +102,33 @@ def capture_screenshot( full=False) -> Dict[str, str]:
|
|
|
102
102
|
time.sleep(0.5)
|
|
103
103
|
|
|
104
104
|
elif system == "Windows":
|
|
105
|
-
|
|
105
|
+
|
|
106
106
|
try:
|
|
107
107
|
import win32gui
|
|
108
108
|
import win32ui
|
|
109
109
|
import win32con
|
|
110
110
|
from PIL import Image
|
|
111
111
|
|
|
112
|
-
|
|
112
|
+
|
|
113
113
|
width = win32api.GetSystemMetrics(win32con.SM_CXVIRTUALSCREEN)
|
|
114
114
|
height = win32api.GetSystemMetrics(win32con.SM_CYVIRTUALSCREEN)
|
|
115
115
|
|
|
116
|
-
|
|
116
|
+
|
|
117
117
|
hdesktop = win32gui.GetDesktopWindow()
|
|
118
118
|
desktop_dc = win32gui.GetWindowDC(hdesktop)
|
|
119
119
|
img_dc = win32ui.CreateDCFromHandle(desktop_dc)
|
|
120
120
|
mem_dc = img_dc.CreateCompatibleDC()
|
|
121
121
|
|
|
122
|
-
|
|
122
|
+
|
|
123
123
|
screenshot = win32ui.CreateBitmap()
|
|
124
124
|
screenshot.CreateCompatibleBitmap(img_dc, width, height)
|
|
125
125
|
mem_dc.SelectObject(screenshot)
|
|
126
126
|
mem_dc.BitBlt((0, 0), (width, height), img_dc, (0, 0), win32con.SRCCOPY)
|
|
127
127
|
|
|
128
|
-
|
|
128
|
+
|
|
129
129
|
screenshot.SaveBitmapFile(mem_dc, file_path)
|
|
130
130
|
|
|
131
|
-
|
|
131
|
+
|
|
132
132
|
mem_dc.DeleteDC()
|
|
133
133
|
win32gui.DeleteObject(screenshot.GetHandle())
|
|
134
134
|
|
|
@@ -173,7 +173,7 @@ def capture_screenshot( full=False) -> Dict[str, str]:
|
|
|
173
173
|
print(f"Unsupported operating system: {system}")
|
|
174
174
|
return None
|
|
175
175
|
|
|
176
|
-
|
|
176
|
+
|
|
177
177
|
if os.path.exists(file_path):
|
|
178
178
|
print(f"Screenshot saved to: {file_path}")
|
|
179
179
|
return {
|
|
@@ -186,24 +186,24 @@ def capture_screenshot( full=False) -> Dict[str, str]:
|
|
|
186
186
|
return None
|
|
187
187
|
|
|
188
188
|
def compress_image(image_bytes, max_size=(800, 600)):
|
|
189
|
-
|
|
189
|
+
|
|
190
190
|
buffer = io.BytesIO(image_bytes)
|
|
191
191
|
img = Image.open(buffer)
|
|
192
192
|
|
|
193
|
-
|
|
193
|
+
|
|
194
194
|
img.load()
|
|
195
195
|
|
|
196
|
-
|
|
196
|
+
|
|
197
197
|
if img.mode == "RGBA":
|
|
198
198
|
background = Image.new("RGB", img.size, (255, 255, 255))
|
|
199
199
|
background.paste(img, mask=img.split()[3])
|
|
200
200
|
img = background
|
|
201
201
|
|
|
202
|
-
|
|
202
|
+
|
|
203
203
|
if img.size[0] > max_size[0] or img.size[1] > max_size[1]:
|
|
204
204
|
img.thumbnail(max_size)
|
|
205
205
|
|
|
206
|
-
|
|
206
|
+
|
|
207
207
|
out_buffer = io.BytesIO()
|
|
208
208
|
img.save(out_buffer, format="JPEG", quality=95, optimize=False)
|
|
209
209
|
return out_buffer.getvalue()
|
npcpy/data/load.py
CHANGED
|
@@ -112,10 +112,11 @@ extension_map = {
|
|
|
112
112
|
"GZ": "archives",
|
|
113
113
|
}
|
|
114
114
|
|
|
115
|
-
def load_file_contents(file_path, chunk_size=
|
|
115
|
+
def load_file_contents(file_path, chunk_size=None):
|
|
116
116
|
file_ext = os.path.splitext(file_path)[1].upper().lstrip('.')
|
|
117
117
|
full_content = ""
|
|
118
|
-
|
|
118
|
+
if not isinstance(chunk_size, int):
|
|
119
|
+
chunk_size=250
|
|
119
120
|
try:
|
|
120
121
|
if file_ext == 'PDF':
|
|
121
122
|
full_content = load_pdf(file_path)
|
|
@@ -131,7 +132,7 @@ def load_file_contents(file_path, chunk_size=250):
|
|
|
131
132
|
elif file_ext in ['XLS', 'XLSX']:
|
|
132
133
|
df = load_excel(file_path)
|
|
133
134
|
full_content = df.to_string()
|
|
134
|
-
elif file_ext in ['TXT', 'MD']:
|
|
135
|
+
elif file_ext in ['TXT', 'MD', 'PY', 'JSX', 'TSX', 'TS', 'JS', 'JSON', 'SQL', 'NPC', 'JINX', 'LINE', 'YAML', 'DART', 'JAVA']:
|
|
135
136
|
full_content = load_txt(file_path)
|
|
136
137
|
elif file_ext == 'JSON':
|
|
137
138
|
data = load_json(file_path)
|
npcpy/data/text.py
CHANGED
|
@@ -37,70 +37,70 @@ def rag_search(
|
|
|
37
37
|
)
|
|
38
38
|
results = []
|
|
39
39
|
|
|
40
|
-
|
|
40
|
+
|
|
41
41
|
query_embedding = embedding_model.encode(
|
|
42
42
|
query, convert_to_tensor=True, show_progress_bar=False
|
|
43
43
|
)
|
|
44
44
|
if isinstance(text_data, str):
|
|
45
|
-
|
|
45
|
+
|
|
46
46
|
lines = text_data.split(".")
|
|
47
47
|
if not lines:
|
|
48
48
|
return results
|
|
49
|
-
|
|
49
|
+
|
|
50
50
|
if text_data_embedded is None:
|
|
51
51
|
line_embeddings = embedding_model.encode(lines, convert_to_tensor=True)
|
|
52
52
|
else:
|
|
53
53
|
line_embeddings = text_data_embedded
|
|
54
|
-
|
|
54
|
+
|
|
55
55
|
cosine_scores = util.cos_sim(query_embedding, line_embeddings)[0].cpu().numpy()
|
|
56
56
|
|
|
57
|
-
|
|
57
|
+
|
|
58
58
|
relevant_line_indices = np.where(cosine_scores >= similarity_threshold)[0]
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
|
|
62
62
|
|
|
63
63
|
for idx in relevant_line_indices:
|
|
64
64
|
idx = int(idx)
|
|
65
|
-
|
|
65
|
+
|
|
66
66
|
start_idx = max(0, idx - 10)
|
|
67
|
-
end_idx = min(len(lines), idx + 11)
|
|
67
|
+
end_idx = min(len(lines), idx + 11)
|
|
68
68
|
snippet = ". ".join(lines[start_idx:end_idx])
|
|
69
69
|
results.append(snippet)
|
|
70
70
|
|
|
71
71
|
elif isinstance(text_data, dict):
|
|
72
72
|
for filename, content in text_data.items():
|
|
73
|
-
|
|
73
|
+
|
|
74
74
|
lines = content.split("\n")
|
|
75
75
|
if not lines:
|
|
76
76
|
continue
|
|
77
|
-
|
|
77
|
+
|
|
78
78
|
if text_data_embedded is None:
|
|
79
79
|
line_embeddings = embedding_model.encode(lines, convert_to_tensor=True)
|
|
80
80
|
else:
|
|
81
81
|
line_embeddings = text_data_embedded[filename]
|
|
82
|
-
|
|
82
|
+
|
|
83
83
|
cosine_scores = (
|
|
84
84
|
util.cos_sim(query_embedding, line_embeddings)[0].cpu().numpy()
|
|
85
85
|
)
|
|
86
86
|
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
|
|
90
90
|
relevant_line_indices = np.where(cosine_scores >= similarity_threshold)[0]
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
|
|
94
94
|
for idx in relevant_line_indices:
|
|
95
|
-
idx = int(idx)
|
|
96
|
-
|
|
95
|
+
idx = int(idx)
|
|
96
|
+
|
|
97
97
|
start_idx = max(0, idx - 10)
|
|
98
98
|
end_idx = min(
|
|
99
99
|
len(lines), idx + 11
|
|
100
|
-
)
|
|
100
|
+
)
|
|
101
101
|
snippet = "\n".join(lines[start_idx:end_idx])
|
|
102
102
|
results.append((filename, snippet))
|
|
103
|
-
|
|
103
|
+
|
|
104
104
|
return results
|
|
105
105
|
|
|
106
106
|
|
|
@@ -122,10 +122,10 @@ def load_all_files(
|
|
|
122
122
|
"""
|
|
123
123
|
text_data = {}
|
|
124
124
|
if depth < 1:
|
|
125
|
-
return text_data
|
|
125
|
+
return text_data
|
|
126
126
|
|
|
127
127
|
if extensions is None:
|
|
128
|
-
|
|
128
|
+
|
|
129
129
|
extensions = [
|
|
130
130
|
".txt",
|
|
131
131
|
".md",
|
|
@@ -139,11 +139,11 @@ def load_all_files(
|
|
|
139
139
|
".ts",
|
|
140
140
|
".tsx",
|
|
141
141
|
".npc",
|
|
142
|
-
|
|
142
|
+
|
|
143
143
|
]
|
|
144
144
|
|
|
145
145
|
try:
|
|
146
|
-
|
|
146
|
+
|
|
147
147
|
entries = os.listdir(directory)
|
|
148
148
|
except Exception as e:
|
|
149
149
|
print(f"Could not list directory {directory}: {e}")
|
|
@@ -159,7 +159,7 @@ def load_all_files(
|
|
|
159
159
|
except Exception as e:
|
|
160
160
|
print(f"Could not read file {path}: {e}")
|
|
161
161
|
elif os.path.isdir(path):
|
|
162
|
-
|
|
162
|
+
|
|
163
163
|
subdir_data = load_all_files(path, extensions, depth=depth - 1)
|
|
164
164
|
text_data.update(subdir_data)
|
|
165
165
|
|
npcpy/data/video.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
|
|
1
|
+
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
def process_video(file_path, table_name):
|
|
5
|
-
|
|
5
|
+
|
|
6
6
|
import cv2
|
|
7
7
|
import base64
|
|
8
8
|
|
|
@@ -18,11 +18,11 @@ def process_video(file_path, table_name):
|
|
|
18
18
|
if not ret:
|
|
19
19
|
break
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
n = 10
|
|
23
|
-
|
|
21
|
+
|
|
22
|
+
n = 10
|
|
23
|
+
|
|
24
24
|
return embeddings, texts
|
|
25
25
|
|
|
26
26
|
except Exception as e:
|
|
27
27
|
print(f"Error processing video: {e}")
|
|
28
|
-
return [], []
|
|
28
|
+
return [], []
|
npcpy/data/web.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
|
|
2
2
|
|
|
3
3
|
import requests
|
|
4
4
|
import os
|
|
@@ -21,6 +21,25 @@ except:
|
|
|
21
21
|
pass
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def search_exa(query:str,
|
|
28
|
+
api_key:str = None,
|
|
29
|
+
top_k = 5,
|
|
30
|
+
**kwargs):
|
|
31
|
+
from exa_py import Exa
|
|
32
|
+
if api_key is None:
|
|
33
|
+
api_key = os.environ.get('EXA_API_KEY')
|
|
34
|
+
exa = Exa(api_key)
|
|
35
|
+
|
|
36
|
+
results = exa.search_and_contents(
|
|
37
|
+
query,
|
|
38
|
+
text=True
|
|
39
|
+
)
|
|
40
|
+
return results.results[0:top_k]
|
|
41
|
+
|
|
42
|
+
|
|
24
43
|
def search_perplexity(
|
|
25
44
|
query: str,
|
|
26
45
|
api_key: str = None,
|
|
@@ -30,8 +49,11 @@ def search_perplexity(
|
|
|
30
49
|
top_p: float = 0.9,
|
|
31
50
|
):
|
|
32
51
|
if api_key is None:
|
|
33
|
-
api_key = os.environ
|
|
34
|
-
|
|
52
|
+
api_key = os.environ.get("PERPLEXITY_API_KEY")
|
|
53
|
+
if api_key is None:
|
|
54
|
+
raise
|
|
55
|
+
|
|
56
|
+
|
|
35
57
|
url = "https://api.perplexity.ai/chat/completions"
|
|
36
58
|
payload = {
|
|
37
59
|
"model": "sonar",
|
|
@@ -52,13 +74,17 @@ def search_perplexity(
|
|
|
52
74
|
"response_format": None,
|
|
53
75
|
}
|
|
54
76
|
|
|
55
|
-
|
|
56
|
-
headers = {"Authorization": f"Bearer {api_key}",
|
|
77
|
+
|
|
78
|
+
headers = {"Authorization": f"Bearer {api_key}",
|
|
79
|
+
"Content-Type": "application/json"}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
response = requests.post(url,
|
|
83
|
+
json=payload,
|
|
84
|
+
headers=headers)
|
|
85
|
+
|
|
86
|
+
response = response.json()
|
|
57
87
|
|
|
58
|
-
# Make the POST request to the API
|
|
59
|
-
response = requests.post(url, json=payload, headers=headers)
|
|
60
|
-
response = json.loads(response.text)
|
|
61
|
-
#print(response)
|
|
62
88
|
return [response["choices"][0]["message"]["content"], response["citations"]]
|
|
63
89
|
|
|
64
90
|
|
|
@@ -88,7 +114,7 @@ def search_web(
|
|
|
88
114
|
|
|
89
115
|
if provider == "perplexity":
|
|
90
116
|
search_result = search_perplexity(query, api_key=api_key, **perplexity_kwargs)
|
|
91
|
-
|
|
117
|
+
|
|
92
118
|
return search_result
|
|
93
119
|
|
|
94
120
|
if provider == "duckduckgo":
|
|
@@ -108,30 +134,32 @@ def search_web(
|
|
|
108
134
|
print("DuckDuckGo search failed: ", e)
|
|
109
135
|
urls = []
|
|
110
136
|
results = []
|
|
137
|
+
elif provider =='exa':
|
|
138
|
+
return search_exa(query, api_key=api_key, )
|
|
111
139
|
|
|
112
|
-
elif provider =='google':
|
|
140
|
+
elif provider =='google':
|
|
113
141
|
urls = list(search(query, num_results=num_results))
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
|
|
117
145
|
for url in urls:
|
|
118
146
|
try:
|
|
119
|
-
|
|
147
|
+
|
|
120
148
|
headers = {
|
|
121
149
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
|
122
150
|
}
|
|
123
151
|
response = requests.get(url, headers=headers, timeout=5)
|
|
124
152
|
response.raise_for_status()
|
|
125
153
|
|
|
126
|
-
|
|
154
|
+
|
|
127
155
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
128
156
|
|
|
129
|
-
|
|
157
|
+
|
|
130
158
|
title = soup.title.string if soup.title else url
|
|
131
159
|
|
|
132
|
-
|
|
160
|
+
|
|
133
161
|
content = " ".join([p.get_text() for p in soup.find_all("p")])
|
|
134
|
-
content = " ".join(content.split())
|
|
162
|
+
content = " ".join(content.split())
|
|
135
163
|
|
|
136
164
|
results.append(
|
|
137
165
|
{
|
|
@@ -147,8 +175,8 @@ def search_web(
|
|
|
147
175
|
print(f"Error fetching {url}: {str(e)}")
|
|
148
176
|
continue
|
|
149
177
|
|
|
150
|
-
|
|
151
|
-
|
|
178
|
+
|
|
179
|
+
|
|
152
180
|
content_str = "\n".join(
|
|
153
181
|
[r["content"] + "\n Citation: " + r["link"] + "\n\n\n" for r in results]
|
|
154
182
|
)
|
npcpy/ft/__init__.py
ADDED
|
File without changes
|