pygpt-net 2.4.31__py3-none-any.whl → 2.4.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- CHANGELOG.md +10 -0
- README.md +14 -4
- pygpt_net/CHANGELOG.txt +10 -0
- pygpt_net/__init__.py +3 -3
- pygpt_net/controller/attachment.py +52 -1
- pygpt_net/controller/chat/attachment.py +109 -44
- pygpt_net/controller/dialogs/confirm.py +17 -1
- pygpt_net/core/attachments/__init__.py +11 -7
- pygpt_net/core/attachments/context.py +171 -34
- pygpt_net/core/debug/attachments.py +3 -1
- pygpt_net/core/debug/context.py +5 -1
- pygpt_net/core/idx/indexing.py +123 -15
- pygpt_net/core/render/markdown/pid.py +2 -1
- pygpt_net/core/render/plain/pid.py +2 -1
- pygpt_net/core/render/web/body.py +34 -12
- pygpt_net/core/render/web/pid.py +2 -1
- pygpt_net/core/render/web/renderer.py +8 -3
- pygpt_net/data/config/config.json +3 -3
- pygpt_net/data/config/models.json +3 -3
- pygpt_net/data/config/modes.json +3 -3
- pygpt_net/data/css/web.css +70 -0
- pygpt_net/data/css/web.dark.css +4 -1
- pygpt_net/data/css/web.light.css +1 -1
- pygpt_net/data/locale/locale.de.ini +7 -1
- pygpt_net/data/locale/locale.en.ini +10 -4
- pygpt_net/data/locale/locale.es.ini +7 -1
- pygpt_net/data/locale/locale.fr.ini +7 -1
- pygpt_net/data/locale/locale.it.ini +7 -1
- pygpt_net/data/locale/locale.pl.ini +7 -1
- pygpt_net/data/locale/locale.uk.ini +7 -1
- pygpt_net/data/locale/locale.zh.ini +7 -1
- pygpt_net/item/attachment.py +9 -1
- pygpt_net/plugin/cmd_code_interpreter/runner.py +2 -2
- pygpt_net/plugin/cmd_mouse_control/__init__.py +4 -2
- pygpt_net/provider/core/attachment/json_file.py +4 -1
- pygpt_net/provider/loaders/base.py +10 -1
- pygpt_net/provider/loaders/web_yt.py +19 -1
- pygpt_net/tools/image_viewer/ui/dialogs.py +3 -1
- pygpt_net/ui/dialog/url.py +29 -0
- pygpt_net/ui/dialogs.py +5 -1
- pygpt_net/ui/layout/chat/attachments.py +20 -6
- pygpt_net/ui/layout/chat/attachments_ctx.py +4 -3
- pygpt_net/ui/layout/chat/attachments_uploaded.py +8 -4
- pygpt_net/ui/widget/dialog/url.py +59 -0
- pygpt_net/ui/widget/lists/attachment.py +22 -17
- pygpt_net/ui/widget/textarea/url.py +43 -0
- {pygpt_net-2.4.31.dist-info → pygpt_net-2.4.33.dist-info}/METADATA +15 -5
- {pygpt_net-2.4.31.dist-info → pygpt_net-2.4.33.dist-info}/RECORD +51 -48
- {pygpt_net-2.4.31.dist-info → pygpt_net-2.4.33.dist-info}/LICENSE +0 -0
- {pygpt_net-2.4.31.dist-info → pygpt_net-2.4.33.dist-info}/WHEEL +0 -0
- {pygpt_net-2.4.31.dist-info → pygpt_net-2.4.33.dist-info}/entry_points.txt +0 -0
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.11.
|
9
|
+
# Updated Date: 2024.11.26 04:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
import copy
|
@@ -31,6 +31,11 @@ class Context:
|
|
31
31
|
"""
|
32
32
|
self.window = window
|
33
33
|
self.dir_index = "index"
|
34
|
+
self.last_used_item = None
|
35
|
+
self.last_used_content = None
|
36
|
+
self.last_used_context = None
|
37
|
+
self.last_files = []
|
38
|
+
self.last_urls = []
|
34
39
|
self.summary_prompt = """
|
35
40
|
Summarize the text below by extracting the most important information,
|
36
41
|
especially those that may help answer the question:
|
@@ -87,16 +92,38 @@ class Context:
|
|
87
92
|
context = ""
|
88
93
|
if os.path.exists(meta_path) and os.path.isdir(meta_path):
|
89
94
|
for file in meta.additional_ctx:
|
90
|
-
if "type" not in file
|
95
|
+
if ("type" not in file
|
96
|
+
or file["type"] not in ["local_file", "url"]):
|
91
97
|
continue
|
92
98
|
file_id = file["uuid"]
|
93
99
|
file_idx_path = os.path.join(meta_path, file_id)
|
94
100
|
text_path = os.path.join(file_idx_path, file_id + ".txt")
|
101
|
+
store_path = file["path"]
|
102
|
+
if "real_path" in file:
|
103
|
+
store_path = file["real_path"]
|
95
104
|
if filename:
|
96
|
-
|
105
|
+
if file["type"] == "url":
|
106
|
+
context += "URL: {}\n".format(file["path"]) + "\n"
|
107
|
+
else:
|
108
|
+
context += "Filename: {}\n".format(file["name"]) + "\n"
|
109
|
+
|
110
|
+
# store used files and URLs in ctx
|
111
|
+
if file["type"] == "url":
|
112
|
+
if store_path not in self.last_urls:
|
113
|
+
self.last_urls.append(store_path)
|
114
|
+
else:
|
115
|
+
if store_path not in self.last_files:
|
116
|
+
self.last_files.append(store_path)
|
117
|
+
|
97
118
|
if os.path.exists(text_path):
|
98
|
-
|
99
|
-
|
119
|
+
try:
|
120
|
+
with open(text_path, "r", encoding="utf-8") as f:
|
121
|
+
context += f.read() + "\n\n"
|
122
|
+
except Exception as e:
|
123
|
+
print("Attachments: read error: {}".format(e))
|
124
|
+
|
125
|
+
self.last_used_content = context
|
126
|
+
self.last_used_context = context
|
100
127
|
return context
|
101
128
|
|
102
129
|
def query_context(self, meta: CtxMeta, query: str) -> str:
|
@@ -119,13 +146,17 @@ class Context:
|
|
119
146
|
file_id = file["uuid"]
|
120
147
|
file_idx_path = os.path.join(meta_path, file_id)
|
121
148
|
file_path = os.path.join(file_idx_path, file["name"])
|
122
|
-
|
123
|
-
|
149
|
+
type = AttachmentItem.TYPE_FILE
|
150
|
+
source = file_path
|
151
|
+
if "type" in file:
|
152
|
+
if file["type"] == "url":
|
153
|
+
type = AttachmentItem.TYPE_URL
|
154
|
+
source = file["path"] # URL
|
155
|
+
doc_ids = self.index_attachment(type, source, idx_path)
|
124
156
|
if self.is_verbose():
|
125
157
|
print("Attachments: indexed. Doc IDs: {}".format(doc_ids))
|
126
158
|
file["indexed"] = True
|
127
159
|
file["doc_ids"] = doc_ids
|
128
|
-
#meta.additional_ctx[i] = file # update meta
|
129
160
|
indexed = True
|
130
161
|
|
131
162
|
if indexed:
|
@@ -133,8 +164,9 @@ class Context:
|
|
133
164
|
self.window.core.ctx.replace(meta)
|
134
165
|
self.window.core.ctx.save(meta.id)
|
135
166
|
|
136
|
-
model = None
|
167
|
+
model = None # no model, retrieval is used
|
137
168
|
result = self.window.core.idx.chat.query_attachment(query, idx_path, model)
|
169
|
+
self.last_used_context = result
|
138
170
|
|
139
171
|
if self.is_verbose():
|
140
172
|
print("Attachments: query result: {}".format(result))
|
@@ -160,9 +192,10 @@ class Context:
|
|
160
192
|
if self.is_verbose():
|
161
193
|
print("Attachments: using summary model: {}".format(model))
|
162
194
|
|
195
|
+
content = self.get_context_text(ctx, filename=True)
|
163
196
|
prompt = self.summary_prompt.format(
|
164
197
|
query=str(query).strip(),
|
165
|
-
content=str(
|
198
|
+
content=str(content).strip(),
|
166
199
|
)
|
167
200
|
if self.is_verbose():
|
168
201
|
print("Attachments: summary prompt: {}".format(prompt))
|
@@ -180,6 +213,7 @@ class Context:
|
|
180
213
|
})
|
181
214
|
self.window.dispatch(event)
|
182
215
|
response = event.data.get("response")
|
216
|
+
self.last_used_context = response
|
183
217
|
if self.is_verbose():
|
184
218
|
print("Attachments: summary received: {}".format(response))
|
185
219
|
return response
|
@@ -211,6 +245,7 @@ class Context:
|
|
211
245
|
meta_path = self.get_dir(meta)
|
212
246
|
file_idx_path = os.path.join(meta_path, file_id)
|
213
247
|
index_path = os.path.join(meta_path, self.dir_index)
|
248
|
+
|
214
249
|
os.makedirs(meta_path, exist_ok=True)
|
215
250
|
os.makedirs(file_idx_path, exist_ok=True)
|
216
251
|
|
@@ -219,54 +254,55 @@ class Context:
|
|
219
254
|
if auto_index:
|
220
255
|
print("Attachments: vector index path: {}".format(index_path))
|
221
256
|
|
222
|
-
#
|
223
|
-
|
224
|
-
if os.path.exists(raw_path):
|
225
|
-
os.remove(raw_path)
|
226
|
-
copyfile(attachment.path, raw_path)
|
257
|
+
# store content to read
|
258
|
+
src_file = self.store_content(attachment, file_idx_path)
|
227
259
|
|
228
260
|
# extract text content using data loader
|
229
|
-
|
230
|
-
|
231
|
-
} # extra loader kwargs
|
232
|
-
text = self.window.core.idx.indexing.read_text_content(
|
233
|
-
path=raw_path,
|
234
|
-
loader_kwargs=loader_kwargs,
|
235
|
-
)
|
236
|
-
if text:
|
261
|
+
content = self.read_content(attachment, src_file, prompt)
|
262
|
+
if content:
|
237
263
|
text_path = os.path.join(file_idx_path, file_id + ".txt")
|
238
|
-
with open(text_path, "w") as f:
|
239
|
-
f.write(
|
240
|
-
|
264
|
+
with open(text_path, "w", encoding="utf-8") as f:
|
265
|
+
f.write(content)
|
241
266
|
if self.is_verbose():
|
242
|
-
print("Attachments: read text content: {}".format(
|
267
|
+
print("Attachments: read text content: {}".format(content))
|
243
268
|
|
244
269
|
tokens = 0
|
245
|
-
if
|
246
|
-
tokens = self.window.core.tokens.from_str(
|
270
|
+
if content:
|
271
|
+
tokens = self.window.core.tokens.from_str(content)
|
272
|
+
|
273
|
+
type = "local_file"
|
274
|
+
size = 0
|
275
|
+
if attachment.type == AttachmentItem.TYPE_FILE:
|
276
|
+
size = os.path.getsize(attachment.path)
|
277
|
+
elif attachment.type == AttachmentItem.TYPE_URL:
|
278
|
+
size = os.path.getsize(src_file)
|
279
|
+
type = "url" # extra ctx type
|
247
280
|
|
248
281
|
# index file to ctx index
|
249
282
|
doc_ids = []
|
250
283
|
if auto_index:
|
251
|
-
|
252
|
-
|
284
|
+
source = src_file
|
285
|
+
if attachment.type == AttachmentItem.TYPE_URL:
|
286
|
+
source = attachment.path # URL
|
287
|
+
doc_ids = self.index_attachment(attachment.type, source, index_path)
|
253
288
|
if self.is_verbose():
|
254
289
|
print("Attachments: indexed. Doc IDs: {}".format(doc_ids))
|
255
290
|
|
256
291
|
result = {
|
257
292
|
"name": name,
|
258
293
|
"path": attachment.path,
|
259
|
-
"type":
|
294
|
+
"type": type,
|
260
295
|
"uuid": str(file_id),
|
261
296
|
"content_type": "text",
|
262
|
-
"size":
|
263
|
-
"length": len(
|
297
|
+
"size": size,
|
298
|
+
"length": len(content),
|
264
299
|
"tokens": tokens,
|
265
300
|
"indexed": False,
|
266
301
|
}
|
267
302
|
if auto_index:
|
268
303
|
result["indexed"] = True
|
269
304
|
result["doc_ids"] = doc_ids
|
305
|
+
|
270
306
|
if real_path:
|
271
307
|
result["real_path"] = real_path
|
272
308
|
|
@@ -275,6 +311,83 @@ class Context:
|
|
275
311
|
|
276
312
|
return result
|
277
313
|
|
314
|
+
def read_content(self, attachment: AttachmentItem, path: str, prompt: str) -> str:
|
315
|
+
"""
|
316
|
+
Read content from attachment
|
317
|
+
|
318
|
+
:param attachment: AttachmentItem instance
|
319
|
+
:param path: source file path
|
320
|
+
:param prompt: user input prompt
|
321
|
+
:return: content
|
322
|
+
"""
|
323
|
+
content = ""
|
324
|
+
if attachment.type == AttachmentItem.TYPE_FILE:
|
325
|
+
loader_kwargs = {
|
326
|
+
"prompt": prompt,
|
327
|
+
} # extra loader kwargs
|
328
|
+
content = self.window.core.idx.indexing.read_text_content(
|
329
|
+
path=path,
|
330
|
+
loader_kwargs=loader_kwargs,
|
331
|
+
)
|
332
|
+
elif attachment.type == AttachmentItem.TYPE_URL:
|
333
|
+
# directly from path
|
334
|
+
with open(path, "r", encoding="utf-8") as f:
|
335
|
+
content = f.read() # already crawled
|
336
|
+
|
337
|
+
return content
|
338
|
+
|
339
|
+
def store_content(self, attachment: AttachmentItem, dir: str) -> str:
|
340
|
+
"""
|
341
|
+
Prepare content for attachment
|
342
|
+
|
343
|
+
:param attachment: AttachmentItem instance
|
344
|
+
:param dir: directory to save content
|
345
|
+
:return: content
|
346
|
+
"""
|
347
|
+
path = None
|
348
|
+
if attachment.type == AttachmentItem.TYPE_FILE:
|
349
|
+
# copy raw file
|
350
|
+
name = os.path.basename(attachment.path)
|
351
|
+
path = os.path.join(dir, name)
|
352
|
+
if os.path.exists(path):
|
353
|
+
os.remove(path)
|
354
|
+
copyfile(attachment.path, path)
|
355
|
+
elif attachment.type == AttachmentItem.TYPE_URL:
|
356
|
+
web_type = self.window.core.idx.indexing.get_webtype(attachment.path)
|
357
|
+
content = self.window.core.idx.indexing.read_web_content(
|
358
|
+
url=attachment.path,
|
359
|
+
type=web_type, # webpage, default, TODO: add more types
|
360
|
+
extra_args={},
|
361
|
+
)
|
362
|
+
# src file save
|
363
|
+
name = "url.txt"
|
364
|
+
path = os.path.join(dir, name)
|
365
|
+
if os.path.exists(path):
|
366
|
+
os.remove(path)
|
367
|
+
with open(path, "w", encoding="utf-8") as f:
|
368
|
+
f.write(content)
|
369
|
+
return path
|
370
|
+
|
371
|
+
def index_attachment(self, type: str, source: str, idx_path: str, documents: list = None) -> list:
|
372
|
+
"""
|
373
|
+
Index attachment
|
374
|
+
|
375
|
+
:param type: attachment type
|
376
|
+
:param source: source file or URL
|
377
|
+
:param idx_path: index path
|
378
|
+
:param documents: list of documents (optional)
|
379
|
+
:return: list of doc IDs
|
380
|
+
"""
|
381
|
+
model = None
|
382
|
+
doc_ids = []
|
383
|
+
if type == AttachmentItem.TYPE_FILE:
|
384
|
+
doc_ids = self.window.core.idx.indexing.index_attachment(source, idx_path, model, documents)
|
385
|
+
elif type == AttachmentItem.TYPE_URL:
|
386
|
+
doc_ids = self.window.core.idx.indexing.index_attachment_web(source, idx_path, model, documents)
|
387
|
+
if self.is_verbose():
|
388
|
+
print("Attachments: indexed. Doc IDs: {}".format(doc_ids))
|
389
|
+
return doc_ids
|
390
|
+
|
278
391
|
def duplicate(self, from_meta_id: int, to_meta_id: int) -> bool:
|
279
392
|
"""
|
280
393
|
Duplicate attachments from one meta to another
|
@@ -427,6 +540,30 @@ class Context:
|
|
427
540
|
except Exception as e:
|
428
541
|
self.window.core.debug.error("Attachment.truncate", e)
|
429
542
|
|
543
|
+
def reset(self):
|
544
|
+
"""Reset context info"""
|
545
|
+
self.last_used_item = None
|
546
|
+
self.last_used_content = None
|
547
|
+
self.last_used_context = None
|
548
|
+
self.last_files = []
|
549
|
+
self.last_urls = []
|
550
|
+
|
551
|
+
def get_used_files(self) -> list:
|
552
|
+
"""
|
553
|
+
Get last used files
|
554
|
+
|
555
|
+
:return: list of files
|
556
|
+
"""
|
557
|
+
return self.last_files
|
558
|
+
|
559
|
+
def get_used_urls(self) -> list:
|
560
|
+
"""
|
561
|
+
Get last used URLs
|
562
|
+
|
563
|
+
:return: list of URLs
|
564
|
+
"""
|
565
|
+
return self.last_urls
|
566
|
+
|
430
567
|
def is_verbose(self) -> bool:
|
431
568
|
"""
|
432
569
|
Check if verbose mode is enabled
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.11.
|
9
|
+
# Updated Date: 2024.11.26 02:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
class AttachmentsDebug:
|
@@ -36,6 +36,8 @@ class AttachmentsDebug:
|
|
36
36
|
'send': attachment.send,
|
37
37
|
'key': key,
|
38
38
|
'mode': mode,
|
39
|
+
'type': attachment.type,
|
40
|
+
'consumed': attachment.consumed,
|
39
41
|
}
|
40
42
|
self.window.core.debug.add(self.id, attachment.name, str(data))
|
41
43
|
|
pygpt_net/core/debug/context.py
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.11.
|
9
|
+
# Updated Date: 2024.11.26 04:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
import json
|
12
12
|
|
@@ -45,6 +45,10 @@ class ContextDebug:
|
|
45
45
|
self.window.core.debug.add(self.id, 'CMD (current)', str(self.window.core.ctx.current_cmd))
|
46
46
|
self.window.core.debug.add(self.id, 'CMD schema (current)', str(self.window.core.ctx.current_cmd_schema))
|
47
47
|
self.window.core.debug.add(self.id, 'FUNCTIONS (current)', str(self.get_functions()))
|
48
|
+
self.window.core.debug.add(self.id, 'Attachments: last used content',
|
49
|
+
str(self.window.core.attachments.context.last_used_content))
|
50
|
+
self.window.core.debug.add(self.id, 'Attachments: last used context',
|
51
|
+
str(self.window.core.attachments.context.last_used_context))
|
48
52
|
|
49
53
|
current = None
|
50
54
|
if self.window.core.ctx.get_current() is not None:
|
pygpt_net/core/idx/indexing.py
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.11.
|
9
|
+
# Updated Date: 2024.11.26 04:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
import datetime
|
@@ -349,6 +349,65 @@ class Indexing:
|
|
349
349
|
data.append(doc.text)
|
350
350
|
return "\n".join(data)
|
351
351
|
|
352
|
+
def read_web_content(
|
353
|
+
self,
|
354
|
+
url: str,
|
355
|
+
type: str = "webpage",
|
356
|
+
extra_args: dict = None
|
357
|
+
) -> str:
|
358
|
+
"""
|
359
|
+
Get content from external resource
|
360
|
+
|
361
|
+
:param url: external url to index
|
362
|
+
:param type: type of URL (webpage, feed, etc.)
|
363
|
+
:param extra_args: extra arguments for loader
|
364
|
+
:return: file content
|
365
|
+
"""
|
366
|
+
docs = self.read_web(url, type, extra_args)
|
367
|
+
data = []
|
368
|
+
for doc in docs:
|
369
|
+
data.append(doc.text)
|
370
|
+
return "\n".join(data)
|
371
|
+
|
372
|
+
def read_web(
|
373
|
+
self,
|
374
|
+
url: str,
|
375
|
+
type: str = "webpage",
|
376
|
+
extra_args: dict = None,
|
377
|
+
) -> list[Document]:
|
378
|
+
"""
|
379
|
+
Read data from external resource
|
380
|
+
|
381
|
+
:param url: external url to index
|
382
|
+
:param type: type of URL (webpage, feed, etc.)
|
383
|
+
:param extra_args: extra arguments for loader
|
384
|
+
:return: list of documents
|
385
|
+
"""
|
386
|
+
documents = []
|
387
|
+
|
388
|
+
# check if web loader for defined type exists
|
389
|
+
if type not in self.loaders["web"]:
|
390
|
+
raise ValueError("No web loader for type: {}".format(type))
|
391
|
+
|
392
|
+
try:
|
393
|
+
if "url" not in extra_args:
|
394
|
+
extra_args["url"] = url
|
395
|
+
|
396
|
+
# get unique external content identifier
|
397
|
+
unique_id = self.data_providers[type].get_external_id(extra_args)
|
398
|
+
self.window.core.idx.log("Loading web documents from: {}".format(unique_id))
|
399
|
+
self.window.core.idx.log("Using web loader for type: {}".format(type))
|
400
|
+
|
401
|
+
args = self.data_providers[type].prepare_args(**extra_args)
|
402
|
+
|
403
|
+
# get documents from external resource
|
404
|
+
documents = self.loaders["web"][type].load_data(
|
405
|
+
**args
|
406
|
+
)
|
407
|
+
except Exception as e:
|
408
|
+
self.window.core.debug.log(e)
|
409
|
+
return documents
|
410
|
+
|
352
411
|
def prepare_document(self, doc: Document):
|
353
412
|
"""
|
354
413
|
Prepare document to store
|
@@ -924,7 +983,8 @@ class Indexing:
|
|
924
983
|
self,
|
925
984
|
file_path: str,
|
926
985
|
index_path: str,
|
927
|
-
model: ModelItem = None
|
986
|
+
model: ModelItem = None,
|
987
|
+
documents: list = None,
|
928
988
|
) -> list:
|
929
989
|
"""
|
930
990
|
Index context attachment
|
@@ -932,6 +992,45 @@ class Indexing:
|
|
932
992
|
:param file_path: path to file to index
|
933
993
|
:param index_path: index path
|
934
994
|
:param model: model
|
995
|
+
:param documents: list of documents (optional)
|
996
|
+
:return: response
|
997
|
+
"""
|
998
|
+
if model is None:
|
999
|
+
model = self.window.core.models.from_defaults()
|
1000
|
+
|
1001
|
+
service_context = self.window.core.idx.llm.get_service_context(model=model)
|
1002
|
+
index = self.window.core.idx.storage.get_ctx_idx(index_path, service_context=service_context) # get or create ctx index
|
1003
|
+
|
1004
|
+
idx = "tmp:{}".format(index_path) # tmp index id
|
1005
|
+
self.window.core.idx.log("Indexing to context attachment index: {}...".format(idx))
|
1006
|
+
|
1007
|
+
doc_ids = []
|
1008
|
+
if documents is None:
|
1009
|
+
documents = self.get_documents(file_path)
|
1010
|
+
for d in documents:
|
1011
|
+
if self.is_stopped(): # force stop
|
1012
|
+
break
|
1013
|
+
self.prepare_document(d)
|
1014
|
+
self.index_document(index, d)
|
1015
|
+
doc_ids.append(d.id_) # add to index
|
1016
|
+
|
1017
|
+
self.window.core.idx.storage.store_ctx_idx(index_path, index)
|
1018
|
+
return doc_ids
|
1019
|
+
|
1020
|
+
def index_attachment_web(
|
1021
|
+
self,
|
1022
|
+
url: str,
|
1023
|
+
index_path: str,
|
1024
|
+
model: ModelItem = None,
|
1025
|
+
documents: list = None,
|
1026
|
+
) -> list:
|
1027
|
+
"""
|
1028
|
+
Index context attachment
|
1029
|
+
|
1030
|
+
:param url: URL to index
|
1031
|
+
:param index_path: index path
|
1032
|
+
:param model: model
|
1033
|
+
:param documents: list of documents (optional)
|
935
1034
|
:return: response
|
936
1035
|
"""
|
937
1036
|
if model is None:
|
@@ -943,8 +1042,14 @@ class Indexing:
|
|
943
1042
|
idx = "tmp:{}".format(index_path) # tmp index id
|
944
1043
|
self.window.core.idx.log("Indexing to context attachment index: {}...".format(idx))
|
945
1044
|
|
1045
|
+
web_type = self.get_webtype(url)
|
946
1046
|
doc_ids = []
|
947
|
-
documents
|
1047
|
+
if documents is None:
|
1048
|
+
documents = self.read_web(
|
1049
|
+
url=url,
|
1050
|
+
type=web_type,
|
1051
|
+
extra_args={},
|
1052
|
+
)
|
948
1053
|
for d in documents:
|
949
1054
|
if self.is_stopped(): # force stop
|
950
1055
|
break
|
@@ -955,20 +1060,23 @@ class Indexing:
|
|
955
1060
|
self.window.core.idx.storage.store_ctx_idx(index_path, index)
|
956
1061
|
return doc_ids
|
957
1062
|
|
1063
|
+
def get_webtype(self, url: str) -> str:
|
958
1064
|
"""
|
959
|
-
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
response = index.as_query_engine(
|
964
|
-
llm=llm,
|
965
|
-
streaming=False,
|
966
|
-
).query(query) # query with default prompt
|
967
|
-
if response:
|
968
|
-
ctx.add_doc_meta(self.get_metadata(response.source_nodes)) # store metadata
|
969
|
-
output = response.
|
970
|
-
return output
|
1065
|
+
Get web loader type by URL
|
1066
|
+
|
1067
|
+
:param url: URL
|
1068
|
+
:return: web loader type
|
971
1069
|
"""
|
1070
|
+
type = "webpage" # default
|
1071
|
+
for id in self.data_providers:
|
1072
|
+
loader = self.data_providers[id]
|
1073
|
+
if hasattr(loader, "is_supported_attachment"):
|
1074
|
+
if loader.is_supported_attachment(url):
|
1075
|
+
type = id
|
1076
|
+
break
|
1077
|
+
print("Selected web data loader: {}".format(type))
|
1078
|
+
return type
|
1079
|
+
|
972
1080
|
def remove_attachment(self, index_path: str, doc_id: str) -> bool:
|
973
1081
|
"""
|
974
1082
|
Remove document from index
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.11.
|
9
|
+
# Updated Date: 2024.11.26 04:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
class PidData():
|
@@ -17,6 +17,7 @@ class PidData():
|
|
17
17
|
self.meta = meta
|
18
18
|
self.images_appended = []
|
19
19
|
self.urls_appended = []
|
20
|
+
self.files_appended = []
|
20
21
|
self.buffer = ""
|
21
22
|
self.prev_position = None # previous cursor position (for chunk append)
|
22
23
|
self.is_cmd = False
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.11.
|
9
|
+
# Updated Date: 2024.11.26 04:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
class PidData():
|
@@ -17,5 +17,6 @@ class PidData():
|
|
17
17
|
self.meta = meta
|
18
18
|
self.images_appended = []
|
19
19
|
self.urls_appended = []
|
20
|
+
self.files_appended = []
|
20
21
|
self.buffer = ""
|
21
22
|
self.is_cmd = False
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.11.
|
9
|
+
# Updated Date: 2024.11.26 04:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
import os
|
@@ -167,10 +167,12 @@ class Body:
|
|
167
167
|
if num is not None and num_all is not None and num_all > 1:
|
168
168
|
num_str = " [{}]".format(num)
|
169
169
|
url, path = self.window.core.filesystem.extract_local_url(url)
|
170
|
-
|
171
|
-
<
|
170
|
+
basename = os.path.basename(path)
|
171
|
+
return """<div class="extra-src-img-box" title="{url}"><div class="img-outer"><div class="img-wrapper"><a href="{url}"><img src="{path}" class="image"></a></div>
|
172
|
+
<a href="{url}" class="title">{title}</a></div></div>""". \
|
172
173
|
format(prefix=trans('chat.prefix.img'),
|
173
174
|
url=url,
|
175
|
+
title=basename,
|
174
176
|
path=path,
|
175
177
|
num=num_str)
|
176
178
|
|
@@ -183,13 +185,19 @@ class Body:
|
|
183
185
|
:param num_all: number of all URLs
|
184
186
|
:return: HTML code
|
185
187
|
"""
|
188
|
+
icon_path = os.path.join(
|
189
|
+
self.window.core.config.get_app_path(),
|
190
|
+
"data", "icons", "public_filled.svg"
|
191
|
+
)
|
192
|
+
icon = '<img src="file://{}" width="25" height="25" valign="middle" class="extra-src-icon">'.format(icon_path)
|
186
193
|
num_str = ""
|
187
194
|
if num is not None and num_all is not None and num_all > 1:
|
188
195
|
num_str = " [{}]".format(num)
|
189
|
-
return """<b>{
|
190
|
-
format(
|
191
|
-
|
192
|
-
|
196
|
+
return """{icon}<b>{num}</b> <a href="{url}" title="{url}">{url}</a>""". \
|
197
|
+
format(url=url,
|
198
|
+
num=num_str,
|
199
|
+
icon=icon,
|
200
|
+
)
|
193
201
|
|
194
202
|
def get_docs_html(self, docs: list) -> str:
|
195
203
|
"""
|
@@ -223,8 +231,16 @@ class Body:
|
|
223
231
|
except Exception as e:
|
224
232
|
pass
|
225
233
|
|
234
|
+
icon_path = os.path.join(
|
235
|
+
self.window.core.config.get_app_path(),
|
236
|
+
"data", "icons", "db.svg"
|
237
|
+
)
|
238
|
+
icon = '<img src="file://{}" width="25" height="25" valign="middle" class="extra-src-icon">'.format(icon_path)
|
226
239
|
if html_sources != "":
|
227
|
-
html += "<p
|
240
|
+
html += "<p>{icon}<small><b>{prefix}:</b></small></p>".format(
|
241
|
+
prefix=trans('chat.prefix.doc'),
|
242
|
+
icon=icon,
|
243
|
+
)
|
228
244
|
html += "<div class=\"cmd\">"
|
229
245
|
html += "<p>" + html_sources + "</p>"
|
230
246
|
html += "</div> "
|
@@ -239,15 +255,21 @@ class Body:
|
|
239
255
|
:param num_all: number of all files
|
240
256
|
:return: HTML code
|
241
257
|
"""
|
258
|
+
icon_path = os.path.join(
|
259
|
+
self.window.core.config.get_app_path(),
|
260
|
+
"data", "icons", "attachments.svg"
|
261
|
+
)
|
262
|
+
icon = '<img src="file://{}" width="25" height="25" valign="middle" class="extra-src-icon">'.format(icon_path)
|
242
263
|
num_str = ""
|
243
264
|
if num is not None and num_all is not None and num_all > 1:
|
244
265
|
num_str = " [{}]".format(num)
|
245
266
|
url, path = self.window.core.filesystem.extract_local_url(url)
|
246
|
-
return """<
|
247
|
-
format(
|
248
|
-
url=url,
|
267
|
+
return """{icon} <b>{num}</b> <a href="{url}">{path}</a>""". \
|
268
|
+
format(url=url,
|
249
269
|
path=path,
|
250
|
-
num=num_str
|
270
|
+
num=num_str,
|
271
|
+
icon=icon,
|
272
|
+
)
|
251
273
|
|
252
274
|
def prepare_tool_extra(self, ctx: CtxItem) -> str:
|
253
275
|
"""
|
pygpt_net/core/render/web/pid.py
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
# GitHub: https://github.com/szczyglis-dev/py-gpt #
|
7
7
|
# MIT License #
|
8
8
|
# Created By : Marcin Szczygliński #
|
9
|
-
# Updated Date: 2024.11.
|
9
|
+
# Updated Date: 2024.11.26 04:00:00 #
|
10
10
|
# ================================================== #
|
11
11
|
|
12
12
|
from pygpt_net.utils import trans
|
@@ -20,6 +20,7 @@ class PidData():
|
|
20
20
|
self.meta = meta
|
21
21
|
self.images_appended = []
|
22
22
|
self.urls_appended = []
|
23
|
+
self.files_appended = []
|
23
24
|
self.buffer = "" # stream buffer
|
24
25
|
self.is_cmd = False
|
25
26
|
self.html = "" # html buffer
|