projectdavid 1.30.4__py3-none-any.whl → 1.31.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of projectdavid might be problematic. Click here for more details.
- projectdavid/clients/file_processor.py +150 -54
- projectdavid/clients/vectors.py +61 -61
- {projectdavid-1.30.4.dist-info → projectdavid-1.31.1.dist-info}/METADATA +3 -1
- {projectdavid-1.30.4.dist-info → projectdavid-1.31.1.dist-info}/RECORD +7 -7
- {projectdavid-1.30.4.dist-info → projectdavid-1.31.1.dist-info}/WHEEL +0 -0
- {projectdavid-1.30.4.dist-info → projectdavid-1.31.1.dist-info}/licenses/LICENSE +0 -0
- {projectdavid-1.30.4.dist-info → projectdavid-1.31.1.dist-info}/top_level.txt +0 -0
|
@@ -1,19 +1,21 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import csv
|
|
3
|
+
import json
|
|
3
4
|
import re
|
|
5
|
+
import textwrap
|
|
4
6
|
from concurrent.futures import ThreadPoolExecutor
|
|
5
|
-
from datetime import datetime
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import Any, Dict, List, Tuple, Union
|
|
8
9
|
|
|
9
|
-
try:
|
|
10
|
-
from typing import LiteralString
|
|
11
|
-
except ImportError:
|
|
10
|
+
try: # Python 3.11+
|
|
11
|
+
from typing import LiteralString
|
|
12
|
+
except ImportError: # 3.9–3.10
|
|
12
13
|
from typing_extensions import LiteralString
|
|
13
14
|
|
|
14
15
|
import numpy as np
|
|
15
16
|
import pdfplumber
|
|
16
|
-
import
|
|
17
|
+
from docx import Document
|
|
18
|
+
from pptx import Presentation
|
|
17
19
|
from projectdavid_common import UtilsInterface
|
|
18
20
|
from sentence_transformers import SentenceTransformer
|
|
19
21
|
|
|
@@ -21,60 +23,106 @@ log = UtilsInterface.LoggingUtility()
|
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
class FileProcessor:
|
|
26
|
+
# ------------------------------------------------------------------ #
|
|
27
|
+
# Construction
|
|
28
|
+
# ------------------------------------------------------------------ #
|
|
24
29
|
def __init__(self, max_workers: int = 4, chunk_size: int = 512):
|
|
25
30
|
self.embedding_model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
|
|
26
31
|
self.embedding_model_name = "paraphrase-MiniLM-L6-v2"
|
|
27
32
|
self._executor = ThreadPoolExecutor(max_workers=max_workers)
|
|
28
33
|
|
|
29
|
-
#
|
|
34
|
+
# token limits
|
|
30
35
|
self.max_seq_length = self.embedding_model.get_max_seq_length()
|
|
31
36
|
self.special_tokens_count = 2
|
|
32
37
|
self.effective_max_length = self.max_seq_length - self.special_tokens_count
|
|
33
|
-
|
|
34
|
-
# chunk_size cannot exceed 4× model max
|
|
35
38
|
self.chunk_size = min(chunk_size, self.effective_max_length * 4)
|
|
36
39
|
|
|
37
40
|
log.info("Initialized optimized FileProcessor")
|
|
38
41
|
|
|
42
|
+
# ------------------------------------------------------------------ #
|
|
43
|
+
# Generic validators
|
|
44
|
+
# ------------------------------------------------------------------ #
|
|
39
45
|
def validate_file(self, file_path: Path):
|
|
40
|
-
"""Ensure file exists and is under 100
|
|
46
|
+
"""Ensure file exists and is under 100 MB."""
|
|
41
47
|
max_size = 100 * 1024 * 1024
|
|
42
48
|
if not file_path.exists():
|
|
43
49
|
raise FileNotFoundError(f"File not found: {file_path}")
|
|
44
50
|
if file_path.stat().st_size > max_size:
|
|
45
51
|
mb = max_size // (1024 * 1024)
|
|
46
|
-
raise ValueError(f"{file_path.name} > {mb}
|
|
52
|
+
raise ValueError(f"{file_path.name} > {mb} MB limit")
|
|
47
53
|
|
|
54
|
+
# ------------------------------------------------------------------ #
|
|
55
|
+
# File-type detection (simple extension map – NO libmagic)
|
|
56
|
+
# ------------------------------------------------------------------ #
|
|
48
57
|
def _detect_file_type(self, file_path: Path) -> str:
|
|
49
|
-
"""
|
|
58
|
+
"""
|
|
59
|
+
Return one of:
|
|
60
|
+
|
|
61
|
+
• 'pdf' • 'csv' • 'json'
|
|
62
|
+
• 'office' (.doc/.docx/.pptx)
|
|
63
|
+
• 'text' (code / markup / plain text)
|
|
64
|
+
|
|
65
|
+
Raises *ValueError* if the extension is not recognised.
|
|
66
|
+
"""
|
|
50
67
|
suffix = file_path.suffix.lower()
|
|
68
|
+
|
|
51
69
|
if suffix == ".pdf":
|
|
52
70
|
return "pdf"
|
|
53
71
|
if suffix == ".csv":
|
|
54
72
|
return "csv"
|
|
55
|
-
if suffix
|
|
73
|
+
if suffix == ".json":
|
|
74
|
+
return "json"
|
|
75
|
+
if suffix in {".doc", ".docx", ".pptx"}:
|
|
76
|
+
return "office"
|
|
77
|
+
|
|
78
|
+
text_exts = {
|
|
79
|
+
".txt",
|
|
80
|
+
".md",
|
|
81
|
+
".rst",
|
|
82
|
+
".c",
|
|
83
|
+
".cpp",
|
|
84
|
+
".cs",
|
|
85
|
+
".go",
|
|
86
|
+
".java",
|
|
87
|
+
".js",
|
|
88
|
+
".ts",
|
|
89
|
+
".php",
|
|
90
|
+
".py",
|
|
91
|
+
".rb",
|
|
92
|
+
".sh",
|
|
93
|
+
".tex",
|
|
94
|
+
".html",
|
|
95
|
+
".css",
|
|
96
|
+
}
|
|
97
|
+
if suffix in text_exts:
|
|
56
98
|
return "text"
|
|
57
|
-
return "unknown"
|
|
58
99
|
|
|
100
|
+
raise ValueError(f"Unsupported file type: {file_path.name} (ext={suffix})")
|
|
101
|
+
|
|
102
|
+
# ------------------------------------------------------------------ #
|
|
103
|
+
# Public entry-point
|
|
104
|
+
# ------------------------------------------------------------------ #
|
|
59
105
|
async def process_file(self, file_path: Union[str, Path]) -> Dict[str, Any]:
|
|
60
|
-
"""
|
|
61
|
-
Async entrypoint: validate, detect type, then dispatch to the
|
|
62
|
-
appropriate processor (_process_pdf, _process_text, or _process_csv).
|
|
63
|
-
"""
|
|
106
|
+
"""Validate → detect → dispatch to the appropriate processor."""
|
|
64
107
|
file_path = Path(file_path)
|
|
65
108
|
self.validate_file(file_path)
|
|
66
109
|
ftype = self._detect_file_type(file_path)
|
|
67
110
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
111
|
+
dispatch_map = {
|
|
112
|
+
"pdf": self._process_pdf,
|
|
113
|
+
"text": self._process_text,
|
|
114
|
+
"csv": self._process_csv,
|
|
115
|
+
"office": self._process_office,
|
|
116
|
+
"json": self._process_json,
|
|
117
|
+
}
|
|
118
|
+
if ftype not in dispatch_map:
|
|
119
|
+
raise ValueError(f"Unsupported file type: {file_path.suffix}")
|
|
75
120
|
|
|
76
|
-
|
|
121
|
+
return await dispatch_map[ftype](file_path)
|
|
77
122
|
|
|
123
|
+
# ------------------------------------------------------------------ #
|
|
124
|
+
# PDF
|
|
125
|
+
# ------------------------------------------------------------------ #
|
|
78
126
|
async def _process_pdf(self, file_path: Path) -> Dict[str, Any]:
|
|
79
127
|
page_chunks, doc_meta = await self._extract_text(file_path)
|
|
80
128
|
all_chunks, line_data = [], []
|
|
@@ -82,7 +130,6 @@ class FileProcessor:
|
|
|
82
130
|
for page_text, page_num, line_nums in page_chunks:
|
|
83
131
|
lines = page_text.split("\n")
|
|
84
132
|
buf, buf_lines, length = [], [], 0
|
|
85
|
-
|
|
86
133
|
for line, ln in zip(lines, line_nums):
|
|
87
134
|
l = len(line) + 1
|
|
88
135
|
if length + l <= self.chunk_size:
|
|
@@ -94,12 +141,9 @@ class FileProcessor:
|
|
|
94
141
|
all_chunks.append("\n".join(buf))
|
|
95
142
|
line_data.append({"page": page_num, "lines": buf_lines})
|
|
96
143
|
buf, buf_lines, length = [], [], 0
|
|
97
|
-
|
|
98
|
-
# split any oversized line
|
|
99
144
|
for piece in self._split_oversized_chunk(line):
|
|
100
145
|
all_chunks.append(piece)
|
|
101
146
|
line_data.append({"page": page_num, "lines": [ln]})
|
|
102
|
-
|
|
103
147
|
if buf:
|
|
104
148
|
all_chunks.append("\n".join(buf))
|
|
105
149
|
line_data.append({"page": page_num, "lines": buf_lines})
|
|
@@ -107,7 +151,6 @@ class FileProcessor:
|
|
|
107
151
|
vectors = await asyncio.gather(
|
|
108
152
|
*[self._encode_chunk_async(c) for c in all_chunks]
|
|
109
153
|
)
|
|
110
|
-
|
|
111
154
|
return {
|
|
112
155
|
"content": "\n\n".join(all_chunks),
|
|
113
156
|
"metadata": {
|
|
@@ -121,6 +164,9 @@ class FileProcessor:
|
|
|
121
164
|
"line_data": line_data,
|
|
122
165
|
}
|
|
123
166
|
|
|
167
|
+
# ------------------------------------------------------------------ #
|
|
168
|
+
# Plain-text / code / markup
|
|
169
|
+
# ------------------------------------------------------------------ #
|
|
124
170
|
async def _process_text(self, file_path: Path) -> Dict[str, Any]:
|
|
125
171
|
text, extra_meta, _ = await self._extract_text(file_path)
|
|
126
172
|
chunks = self._chunk_text(text)
|
|
@@ -137,15 +183,12 @@ class FileProcessor:
|
|
|
137
183
|
"vectors": [v.tolist() for v in vectors],
|
|
138
184
|
}
|
|
139
185
|
|
|
140
|
-
#
|
|
186
|
+
# ------------------------------------------------------------------ #
|
|
187
|
+
# CSV
|
|
188
|
+
# ------------------------------------------------------------------ #
|
|
141
189
|
async def _process_csv(
|
|
142
190
|
self, file_path: Path, text_field: str = "description"
|
|
143
191
|
) -> Dict[str, Any]:
|
|
144
|
-
"""
|
|
145
|
-
Read each row, embed the `text_field`, and collect per-row metadata
|
|
146
|
-
from all other columns.
|
|
147
|
-
"""
|
|
148
|
-
# load rows synchronously
|
|
149
192
|
rows, texts, metas = [], [], []
|
|
150
193
|
with file_path.open(newline="", encoding="utf-8") as f:
|
|
151
194
|
reader = csv.DictReader(f)
|
|
@@ -154,27 +197,67 @@ class FileProcessor:
|
|
|
154
197
|
if not txt:
|
|
155
198
|
continue
|
|
156
199
|
texts.append(txt)
|
|
157
|
-
|
|
158
|
-
row_meta = {k: v for k, v in row.items() if k != text_field and v}
|
|
159
|
-
metas.append(row_meta)
|
|
200
|
+
metas.append({k: v for k, v in row.items() if k != text_field and v})
|
|
160
201
|
|
|
161
|
-
# embed in parallel
|
|
162
202
|
vectors = await asyncio.gather(*[self._encode_chunk_async(t) for t in texts])
|
|
203
|
+
return {
|
|
204
|
+
"content": None,
|
|
205
|
+
"metadata": {"source": str(file_path), "rows": len(texts), "type": "csv"},
|
|
206
|
+
"chunks": texts,
|
|
207
|
+
"vectors": [v.tolist() for v in vectors],
|
|
208
|
+
"csv_row_metadata": metas,
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
# ------------------------------------------------------------------ #
|
|
212
|
+
# Office docs (.doc/.docx/.pptx)
|
|
213
|
+
# ------------------------------------------------------------------ #
|
|
214
|
+
async def _process_office(self, file_path: Path) -> Dict[str, Any]:
|
|
215
|
+
loop = asyncio.get_event_loop()
|
|
216
|
+
if file_path.suffix.lower() in {".doc", ".docx"}:
|
|
217
|
+
text = await loop.run_in_executor(
|
|
218
|
+
self._executor, self._read_docx, file_path
|
|
219
|
+
)
|
|
220
|
+
else: # .pptx
|
|
221
|
+
text = await loop.run_in_executor(
|
|
222
|
+
self._executor, self._read_pptx, file_path
|
|
223
|
+
)
|
|
163
224
|
|
|
225
|
+
chunks = self._chunk_text(text)
|
|
226
|
+
vectors = await asyncio.gather(*[self._encode_chunk_async(c) for c in chunks])
|
|
164
227
|
return {
|
|
165
|
-
"content":
|
|
228
|
+
"content": text,
|
|
166
229
|
"metadata": {
|
|
167
230
|
"source": str(file_path),
|
|
168
|
-
"
|
|
169
|
-
"type": "
|
|
231
|
+
"chunks": len(chunks),
|
|
232
|
+
"type": "office",
|
|
170
233
|
},
|
|
171
|
-
"chunks":
|
|
234
|
+
"chunks": chunks,
|
|
172
235
|
"vectors": [v.tolist() for v in vectors],
|
|
173
|
-
"csv_row_metadata": metas,
|
|
174
236
|
}
|
|
175
237
|
|
|
176
|
-
#
|
|
238
|
+
# ------------------------------------------------------------------ #
|
|
239
|
+
# JSON
|
|
240
|
+
# ------------------------------------------------------------------ #
|
|
241
|
+
async def _process_json(self, file_path: Path) -> Dict[str, Any]:
|
|
242
|
+
text = await asyncio.get_event_loop().run_in_executor(
|
|
243
|
+
self._executor, self._read_json, file_path
|
|
244
|
+
)
|
|
245
|
+
chunks = self._chunk_text(text)
|
|
246
|
+
vectors = await asyncio.gather(*[self._encode_chunk_async(c) for c in chunks])
|
|
247
|
+
return {
|
|
248
|
+
"content": text,
|
|
249
|
+
"metadata": {
|
|
250
|
+
"source": str(file_path),
|
|
251
|
+
"chunks": len(chunks),
|
|
252
|
+
"type": "json",
|
|
253
|
+
},
|
|
254
|
+
"chunks": chunks,
|
|
255
|
+
"vectors": [v.tolist() for v in vectors],
|
|
256
|
+
}
|
|
177
257
|
|
|
258
|
+
# ------------------------------------------------------------------ #
|
|
259
|
+
# Shared helpers
|
|
260
|
+
# ------------------------------------------------------------------ #
|
|
178
261
|
async def _extract_text(self, file_path: Path) -> Union[
|
|
179
262
|
Tuple[List[Tuple[str, int, List[int]]], Dict[str, Any]],
|
|
180
263
|
Tuple[str, Dict[str, Any], List[int]],
|
|
@@ -202,10 +285,8 @@ class FileProcessor:
|
|
|
202
285
|
)
|
|
203
286
|
for i, page in enumerate(pdf.pages, start=1):
|
|
204
287
|
lines = page.extract_text_lines()
|
|
205
|
-
txts, nums = [], []
|
|
206
|
-
# sort by vertical position
|
|
207
288
|
sorted_lines = sorted(lines, key=lambda x: x["top"])
|
|
208
|
-
|
|
289
|
+
txts, nums = [], []
|
|
209
290
|
for ln_idx, L in enumerate(sorted_lines, start=1):
|
|
210
291
|
t = L.get("text", "").strip()
|
|
211
292
|
if t:
|
|
@@ -221,6 +302,23 @@ class FileProcessor:
|
|
|
221
302
|
except UnicodeDecodeError:
|
|
222
303
|
return file_path.read_text(encoding="latin-1")
|
|
223
304
|
|
|
305
|
+
def _read_docx(self, path: Path) -> str:
|
|
306
|
+
doc = Document(path)
|
|
307
|
+
return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
|
|
308
|
+
|
|
309
|
+
def _read_pptx(self, path: Path) -> str:
|
|
310
|
+
prs = Presentation(path)
|
|
311
|
+
slides = []
|
|
312
|
+
for slide in prs.slides:
|
|
313
|
+
chunks = [sh.text for sh in slide.shapes if hasattr(sh, "text")]
|
|
314
|
+
slides.append("\n".join(filter(None, chunks)))
|
|
315
|
+
return "\n\n".join(slides)
|
|
316
|
+
|
|
317
|
+
def _read_json(self, path: Path) -> str:
|
|
318
|
+
obj = json.loads(path.read_text(encoding="utf-8"))
|
|
319
|
+
pretty = json.dumps(obj, indent=2, ensure_ascii=False)
|
|
320
|
+
return "\n".join(textwrap.wrap(pretty, width=120))
|
|
321
|
+
|
|
224
322
|
async def _encode_chunk_async(self, chunk: str) -> np.ndarray:
|
|
225
323
|
return await asyncio.get_event_loop().run_in_executor(
|
|
226
324
|
self._executor,
|
|
@@ -233,11 +331,12 @@ class FileProcessor:
|
|
|
233
331
|
)[0],
|
|
234
332
|
)
|
|
235
333
|
|
|
334
|
+
# ------------------------------------------------------------------ #
|
|
335
|
+
# Text chunking helpers
|
|
336
|
+
# ------------------------------------------------------------------ #
|
|
236
337
|
def _chunk_text(self, text: str) -> List[str]:
|
|
237
|
-
# split into sentences, then re-chunk to token limits
|
|
238
338
|
sentences = re.split(r"(?<=[\.!?])\s+", text)
|
|
239
339
|
chunks, buf, length = [], [], 0
|
|
240
|
-
|
|
241
340
|
for sent in sentences:
|
|
242
341
|
slen = len(sent) + 1
|
|
243
342
|
if length + slen <= self.chunk_size:
|
|
@@ -247,15 +346,12 @@ class FileProcessor:
|
|
|
247
346
|
if buf:
|
|
248
347
|
chunks.append(" ".join(buf))
|
|
249
348
|
buf, length = [], 0
|
|
250
|
-
# sentence itself may be too big
|
|
251
349
|
while len(sent) > self.chunk_size:
|
|
252
350
|
part, sent = sent[: self.chunk_size], sent[self.chunk_size :]
|
|
253
351
|
chunks.append(part)
|
|
254
352
|
buf, length = [sent], len(sent)
|
|
255
|
-
|
|
256
353
|
if buf:
|
|
257
354
|
chunks.append(" ".join(buf))
|
|
258
|
-
|
|
259
355
|
return chunks
|
|
260
356
|
|
|
261
357
|
def _split_oversized_chunk(self, chunk: str, tokens: List[str] = None) -> List[str]:
|
projectdavid/clients/vectors.py
CHANGED
|
@@ -561,6 +561,67 @@ class VectorStoreClient:
|
|
|
561
561
|
raise FileNotFoundError(f"File not found: {p}")
|
|
562
562
|
return self._run_sync(self._add_file_async(vector_store_id, p, user_metadata))
|
|
563
563
|
|
|
564
|
+
def delete_vector_store(
|
|
565
|
+
self,
|
|
566
|
+
vector_store_id: str,
|
|
567
|
+
permanent: bool = False,
|
|
568
|
+
) -> Dict[str, Any]:
|
|
569
|
+
return self._run_sync(self._delete_vs_async(vector_store_id, permanent))
|
|
570
|
+
|
|
571
|
+
def delete_file_from_vector_store(
|
|
572
|
+
self,
|
|
573
|
+
vector_store_id: str,
|
|
574
|
+
file_path: str,
|
|
575
|
+
) -> Dict[str, Any]:
|
|
576
|
+
return self._run_sync(self._delete_file_async(vector_store_id, file_path))
|
|
577
|
+
|
|
578
|
+
def list_store_files(
|
|
579
|
+
self,
|
|
580
|
+
vector_store_id: str,
|
|
581
|
+
) -> List[ValidationInterface.VectorStoreFileRead]:
|
|
582
|
+
return self._run_sync(self._list_store_files_async(vector_store_id))
|
|
583
|
+
|
|
584
|
+
def update_vector_store_file_status(
|
|
585
|
+
self,
|
|
586
|
+
vector_store_id: str,
|
|
587
|
+
file_id: str,
|
|
588
|
+
status: ValidationInterface.StatusEnum,
|
|
589
|
+
error_message: Optional[str] = None,
|
|
590
|
+
) -> ValidationInterface.VectorStoreFileRead:
|
|
591
|
+
return self._run_sync(
|
|
592
|
+
self._update_file_status_async(
|
|
593
|
+
vector_store_id, file_id, status, error_message
|
|
594
|
+
)
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
def get_vector_stores_for_assistant(
|
|
598
|
+
self,
|
|
599
|
+
assistant_id: str,
|
|
600
|
+
) -> List[ValidationInterface.VectorStoreRead]:
|
|
601
|
+
return self._run_sync(self._get_assistant_vs_async(assistant_id))
|
|
602
|
+
|
|
603
|
+
def attach_vector_store_to_assistant(
|
|
604
|
+
self,
|
|
605
|
+
vector_store_id: str,
|
|
606
|
+
assistant_id: str,
|
|
607
|
+
) -> bool:
|
|
608
|
+
return self._run_sync(self._attach_vs_async(vector_store_id, assistant_id))
|
|
609
|
+
|
|
610
|
+
def detach_vector_store_from_assistant(
|
|
611
|
+
self,
|
|
612
|
+
vector_store_id: str,
|
|
613
|
+
assistant_id: str,
|
|
614
|
+
) -> bool:
|
|
615
|
+
return self._run_sync(self._detach_vs_async(vector_store_id, assistant_id))
|
|
616
|
+
|
|
617
|
+
def retrieve_vector_store_sync(
|
|
618
|
+
self,
|
|
619
|
+
vector_store_id: str,
|
|
620
|
+
) -> ValidationInterface.VectorStoreRead:
|
|
621
|
+
resp = self._sync_api_client.get(f"/v1/vector-stores/{vector_store_id}")
|
|
622
|
+
resp.raise_for_status()
|
|
623
|
+
return ValidationInterface.VectorStoreRead.model_validate(resp.json())
|
|
624
|
+
|
|
564
625
|
def vector_file_search_raw(
|
|
565
626
|
self,
|
|
566
627
|
vector_store_id: str,
|
|
@@ -627,67 +688,6 @@ class VectorStoreClient:
|
|
|
627
688
|
# 4️⃣ Wrap everything into an OpenAI envelope
|
|
628
689
|
return make_envelope(query_text, hits, answer_text)
|
|
629
690
|
|
|
630
|
-
def delete_vector_store(
|
|
631
|
-
self,
|
|
632
|
-
vector_store_id: str,
|
|
633
|
-
permanent: bool = False,
|
|
634
|
-
) -> Dict[str, Any]:
|
|
635
|
-
return self._run_sync(self._delete_vs_async(vector_store_id, permanent))
|
|
636
|
-
|
|
637
|
-
def delete_file_from_vector_store(
|
|
638
|
-
self,
|
|
639
|
-
vector_store_id: str,
|
|
640
|
-
file_path: str,
|
|
641
|
-
) -> Dict[str, Any]:
|
|
642
|
-
return self._run_sync(self._delete_file_async(vector_store_id, file_path))
|
|
643
|
-
|
|
644
|
-
def list_store_files(
|
|
645
|
-
self,
|
|
646
|
-
vector_store_id: str,
|
|
647
|
-
) -> List[ValidationInterface.VectorStoreFileRead]:
|
|
648
|
-
return self._run_sync(self._list_store_files_async(vector_store_id))
|
|
649
|
-
|
|
650
|
-
def update_vector_store_file_status(
|
|
651
|
-
self,
|
|
652
|
-
vector_store_id: str,
|
|
653
|
-
file_id: str,
|
|
654
|
-
status: ValidationInterface.StatusEnum,
|
|
655
|
-
error_message: Optional[str] = None,
|
|
656
|
-
) -> ValidationInterface.VectorStoreFileRead:
|
|
657
|
-
return self._run_sync(
|
|
658
|
-
self._update_file_status_async(
|
|
659
|
-
vector_store_id, file_id, status, error_message
|
|
660
|
-
)
|
|
661
|
-
)
|
|
662
|
-
|
|
663
|
-
def get_vector_stores_for_assistant(
|
|
664
|
-
self,
|
|
665
|
-
assistant_id: str,
|
|
666
|
-
) -> List[ValidationInterface.VectorStoreRead]:
|
|
667
|
-
return self._run_sync(self._get_assistant_vs_async(assistant_id))
|
|
668
|
-
|
|
669
|
-
def attach_vector_store_to_assistant(
|
|
670
|
-
self,
|
|
671
|
-
vector_store_id: str,
|
|
672
|
-
assistant_id: str,
|
|
673
|
-
) -> bool:
|
|
674
|
-
return self._run_sync(self._attach_vs_async(vector_store_id, assistant_id))
|
|
675
|
-
|
|
676
|
-
def detach_vector_store_from_assistant(
|
|
677
|
-
self,
|
|
678
|
-
vector_store_id: str,
|
|
679
|
-
assistant_id: str,
|
|
680
|
-
) -> bool:
|
|
681
|
-
return self._run_sync(self._detach_vs_async(vector_store_id, assistant_id))
|
|
682
|
-
|
|
683
|
-
def retrieve_vector_store_sync(
|
|
684
|
-
self,
|
|
685
|
-
vector_store_id: str,
|
|
686
|
-
) -> ValidationInterface.VectorStoreRead:
|
|
687
|
-
resp = self._sync_api_client.get(f"/v1/vector-stores/{vector_store_id}")
|
|
688
|
-
resp.raise_for_status()
|
|
689
|
-
return ValidationInterface.VectorStoreRead.model_validate(resp.json())
|
|
690
|
-
|
|
691
691
|
# ────────────────────────────────────────────────────────────────
|
|
692
692
|
# End‑to‑end: retrieve → (rerank) → synthesize → envelope
|
|
693
693
|
# ────────────────────────────────────────────────────────────────
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: projectdavid
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.31.1
|
|
4
4
|
Summary: Python SDK for interacting with the Entities Assistant API.
|
|
5
5
|
Author-email: Francis Neequaye Armah <francis.neequaye@projectdavid.co.uk>
|
|
6
6
|
License: PolyForm Noncommercial License 1.0.0
|
|
@@ -27,6 +27,8 @@ Requires-Dist: validators<0.35.0,>=0.29.0
|
|
|
27
27
|
Requires-Dist: sentence-transformers<5.0,>=3.4.0
|
|
28
28
|
Requires-Dist: sseclient-py
|
|
29
29
|
Requires-Dist: requests
|
|
30
|
+
Requires-Dist: python-docx
|
|
31
|
+
Requires-Dist: python-pptx
|
|
30
32
|
Provides-Extra: dev
|
|
31
33
|
Requires-Dist: black>=23.3; extra == "dev"
|
|
32
34
|
Requires-Dist: isort>=5.12; extra == "dev"
|
|
@@ -9,7 +9,7 @@ projectdavid/clients/assistants_client.py,sha256=SsIGa5wPr7ga9WX0ywam3djUF-uWFdk
|
|
|
9
9
|
projectdavid/clients/base_client.py,sha256=UWl6nr6sxD1_xC6iyptQDR1tnNdFCOrEx5cEUPCRqJE,3417
|
|
10
10
|
projectdavid/clients/base_vector_store.py,sha256=jXivmqAW1bgYcLgIeW-hPxOiWZbs2hCsLy4oWzSvpNI,2061
|
|
11
11
|
projectdavid/clients/event_handler.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
projectdavid/clients/file_processor.py,sha256=
|
|
12
|
+
projectdavid/clients/file_processor.py,sha256=t-Uw-kBP_VmlguMxO9PWY6ANuMAY0PstQDW37wLPF0Q,13980
|
|
13
13
|
projectdavid/clients/file_search.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
14
|
projectdavid/clients/files_client.py,sha256=XkIDzbQFGDrd88taf0Kouc_4YJOPIYEHiIyWYLKDofI,15581
|
|
15
15
|
projectdavid/clients/inference_client.py,sha256=xz4ACPv5Tkis604QxO5mJX1inH_TGDfQP-31geETYpE,6609
|
|
@@ -20,7 +20,7 @@ projectdavid/clients/threads_client.py,sha256=ekzU5w14zftmtmFkiec3NC90Of-_KVSUY1
|
|
|
20
20
|
projectdavid/clients/tools_client.py,sha256=GkCVOmwpAoPqVt6aYmH0G1HIFha3iEwR9IIf9teR0j8,11487
|
|
21
21
|
projectdavid/clients/users_client.py,sha256=eCuUb9qvyH1GUFhZu6TRL9zdoK-qzHSs8-Vmrk_0mmg,13729
|
|
22
22
|
projectdavid/clients/vector_store_manager.py,sha256=lk-sWJjo6Z0EHZzjRoKiHPr0GpEXfE4bJBQzmKV8ezc,11372
|
|
23
|
-
projectdavid/clients/vectors.py,sha256=
|
|
23
|
+
projectdavid/clients/vectors.py,sha256=1UNnLN5nsMvVHXK4Yf7iTXGWZfgIjQ9eLQtCBe0Cqew,30986
|
|
24
24
|
projectdavid/constants/platform.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
25
|
projectdavid/services/logging_service.py,sha256=jdoRL46E42Ar8JFTDOV-xVD67CulcHSN-xhcEqA5CXQ,2643
|
|
26
26
|
projectdavid/synthesis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -32,8 +32,8 @@ projectdavid/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
|
|
|
32
32
|
projectdavid/utils/monitor_launcher.py,sha256=3YAgJdeuaUvq3JGvpA4ymqFsAnk29nH5q93cwStP4hc,2836
|
|
33
33
|
projectdavid/utils/run_monitor.py,sha256=F_WkqIP-qnWH-4llIbileWWLfRj2Q1Cg-ni23SR1rec,3786
|
|
34
34
|
projectdavid/utils/vector_search_formatter.py,sha256=YTe3HPGec26qGY7uxY8_GS8lc4QaN6aNXMzkl29nZpI,1735
|
|
35
|
-
projectdavid-1.
|
|
36
|
-
projectdavid-1.
|
|
37
|
-
projectdavid-1.
|
|
38
|
-
projectdavid-1.
|
|
39
|
-
projectdavid-1.
|
|
35
|
+
projectdavid-1.31.1.dist-info/licenses/LICENSE,sha256=_8yjiEGttpS284BkfhXxfERqTRZW_tUaHiBB0GTJTMg,4563
|
|
36
|
+
projectdavid-1.31.1.dist-info/METADATA,sha256=f-SkJ06HipWaVJZ0W-bECBP7-2OjCNqTNc58kN7A0qw,10781
|
|
37
|
+
projectdavid-1.31.1.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
|
38
|
+
projectdavid-1.31.1.dist-info/top_level.txt,sha256=kil8GU4s7qYRfNnzGnFHhZnSNRSxgNG-J4HLgQMmMtw,13
|
|
39
|
+
projectdavid-1.31.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|