projectdavid 1.33.12__tar.gz → 1.33.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of projectdavid might be problematic. Click here for more details.
- {projectdavid-1.33.12 → projectdavid-1.33.14}/CHANGELOG.md +14 -0
- {projectdavid-1.33.12/src/projectdavid.egg-info → projectdavid-1.33.14}/PKG-INFO +1 -1
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/vector_store.md +7 -6
- {projectdavid-1.33.12 → projectdavid-1.33.14}/pyproject.toml +1 -1
- projectdavid-1.33.14/src/projectdavid/clients/file_processor.py +364 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/synchronous_inference_wrapper.py +44 -9
- projectdavid-1.33.12/src/projectdavid/clients/file_processor.py → projectdavid-1.33.14/src/projectdavid/clients/vision-file_processor.py +12 -124
- {projectdavid-1.33.12 → projectdavid-1.33.14/src/projectdavid.egg-info}/PKG-INFO +1 -1
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid.egg-info/SOURCES.txt +1 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/LICENSE +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/MANIFEST.in +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/README.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/assistants.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/code_interpretation.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/database.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/database_assistant_example.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/docker_comtainers.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/file_search.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/files.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/function_call_definition.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/function_calls.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/handling_function_calls.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/inference.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/messages.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/runs.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/streams.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/threads.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/tools.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/users.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/docs/versioning.md +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/setup.cfg +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/__init__.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/_version.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/actions_client.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/api_key_client.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/assistants_client.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/base_client.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/base_vector_store.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/event_handler.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/file_search.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/files_client.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/inference_client.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/messages_client.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/runs.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/threads_client.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/tools_client.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/users_client.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/vector_store_manager.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/vectors.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/constants/platform.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/decorators.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/entity.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/events.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/serializers.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/services/logging_service.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/synthesis/__init__.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/synthesis/llm_synthesizer.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/synthesis/prompt.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/synthesis/reranker.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/synthesis/retriever.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/utils/__init__.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/utils/function_call_suppressor.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/utils/monitor_launcher.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/utils/peek_gate.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/utils/run_monitor.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/utils/vector_search_formatter.py +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid.egg-info/dependency_links.txt +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid.egg-info/requires.txt +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid.egg-info/top_level.txt +0 -0
- {projectdavid-1.33.12 → projectdavid-1.33.14}/tests/test_clients.py +0 -0
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
## [1.33.14](https://github.com/frankie336/projectdavid/compare/v1.33.13...v1.33.14) (2025-06-16)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Bug Fixes
|
|
5
|
+
|
|
6
|
+
* Back out from vision support - resource issue. Revisit in grand plan ([3199ba7](https://github.com/frankie336/projectdavid/commit/3199ba7a18b3cfcc0f7306cd8748105f593a1836))
|
|
7
|
+
|
|
8
|
+
## [1.33.13](https://github.com/frankie336/projectdavid/compare/v1.33.12...v1.33.13) (2025-06-13)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Bug Fixes
|
|
12
|
+
|
|
13
|
+
* restore code_interpreter_stream passthrough.14 ([df2a75f](https://github.com/frankie336/projectdavid/commit/df2a75f47a55d07d42af3a9949ef9bed4496a602))
|
|
14
|
+
|
|
1
15
|
## [1.33.12](https://github.com/frankie336/projectdavid/compare/v1.33.11...v1.33.12) (2025-06-13)
|
|
2
16
|
|
|
3
17
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: projectdavid
|
|
3
|
-
Version: 1.33.
|
|
3
|
+
Version: 1.33.14
|
|
4
4
|
Summary: Python SDK for interacting with the Entities Assistant API.
|
|
5
5
|
Author-email: Francis Neequaye Armah <francis.neequaye@projectdavid.co.uk>
|
|
6
6
|
License: PolyForm Noncommercial License 1.0.0
|
|
@@ -24,9 +24,8 @@ print(test_user)
|
|
|
24
24
|
|
|
25
25
|
# create a vector store
|
|
26
26
|
store = client.vectors.create_vector_store(
|
|
27
|
-
name='Test Vector Store1'
|
|
28
|
-
|
|
29
|
-
)
|
|
27
|
+
name='Test Vector Store1')
|
|
28
|
+
|
|
30
29
|
print(store)
|
|
31
30
|
```
|
|
32
31
|
|
|
@@ -81,14 +80,16 @@ At this point, your file has been vectorized to your store.
|
|
|
81
80
|
|
|
82
81
|
---
|
|
83
82
|
|
|
83
|
+
### Searches
|
|
84
84
|
|
|
85
|
-
## Supporting image vectors
|
|
86
85
|
|
|
87
|
-
Entities now ingests and vectorizes a wide range of image formats for semantic search. You can leverage these image embeddings to extend text-only models into powerful multi-modal workflows; enriching chatbots, document search, recommendation engines, and more.
|
|
88
86
|
|
|
89
87
|
---
|
|
90
88
|
|
|
91
|
-
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
|
|
92
93
|
|
|
93
94
|
- The assistant will self-select appropriate vector store
|
|
94
95
|
searches using its latent logic when responding to a prompt.
|
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import csv
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
import textwrap
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Dict, List, Tuple, Union
|
|
9
|
+
|
|
10
|
+
try: # Python 3.11+
|
|
11
|
+
from typing import LiteralString
|
|
12
|
+
except ImportError: # 3.9–3.10
|
|
13
|
+
from typing_extensions import LiteralString
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pdfplumber
|
|
17
|
+
from docx import Document
|
|
18
|
+
from pptx import Presentation
|
|
19
|
+
from projectdavid_common import UtilsInterface
|
|
20
|
+
from sentence_transformers import SentenceTransformer
|
|
21
|
+
|
|
22
|
+
log = UtilsInterface.LoggingUtility()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class FileProcessor:
|
|
26
|
+
# ------------------------------------------------------------------ #
|
|
27
|
+
# Construction
|
|
28
|
+
# ------------------------------------------------------------------ #
|
|
29
|
+
def __init__(self, max_workers: int = 4, chunk_size: int = 512):
|
|
30
|
+
self.embedding_model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
|
|
31
|
+
self.embedding_model_name = "paraphrase-MiniLM-L6-v2"
|
|
32
|
+
self._executor = ThreadPoolExecutor(max_workers=max_workers)
|
|
33
|
+
|
|
34
|
+
# token limits
|
|
35
|
+
self.max_seq_length = self.embedding_model.get_max_seq_length()
|
|
36
|
+
self.special_tokens_count = 2
|
|
37
|
+
self.effective_max_length = self.max_seq_length - self.special_tokens_count
|
|
38
|
+
self.chunk_size = min(chunk_size, self.effective_max_length * 4)
|
|
39
|
+
|
|
40
|
+
log.info("Initialized optimized FileProcessor")
|
|
41
|
+
|
|
42
|
+
# ------------------------------------------------------------------ #
|
|
43
|
+
# Generic validators
|
|
44
|
+
# ------------------------------------------------------------------ #
|
|
45
|
+
def validate_file(self, file_path: Path):
|
|
46
|
+
"""Ensure file exists and is under 100 MB."""
|
|
47
|
+
max_size = 100 * 1024 * 1024
|
|
48
|
+
if not file_path.exists():
|
|
49
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
50
|
+
if file_path.stat().st_size > max_size:
|
|
51
|
+
mb = max_size // (1024 * 1024)
|
|
52
|
+
raise ValueError(f"{file_path.name} > {mb} MB limit")
|
|
53
|
+
|
|
54
|
+
# ------------------------------------------------------------------ #
|
|
55
|
+
# File-type detection (simple extension map – NO libmagic)
|
|
56
|
+
# ------------------------------------------------------------------ #
|
|
57
|
+
def _detect_file_type(self, file_path: Path) -> str:
|
|
58
|
+
"""
|
|
59
|
+
Return one of:
|
|
60
|
+
|
|
61
|
+
• 'pdf' • 'csv' • 'json'
|
|
62
|
+
• 'office' (.doc/.docx/.pptx)
|
|
63
|
+
• 'text' (code / markup / plain text)
|
|
64
|
+
|
|
65
|
+
Raises *ValueError* if the extension is not recognised.
|
|
66
|
+
"""
|
|
67
|
+
suffix = file_path.suffix.lower()
|
|
68
|
+
|
|
69
|
+
if suffix == ".pdf":
|
|
70
|
+
return "pdf"
|
|
71
|
+
if suffix == ".csv":
|
|
72
|
+
return "csv"
|
|
73
|
+
if suffix == ".json":
|
|
74
|
+
return "json"
|
|
75
|
+
if suffix in {".doc", ".docx", ".pptx"}:
|
|
76
|
+
return "office"
|
|
77
|
+
|
|
78
|
+
text_exts = {
|
|
79
|
+
".txt",
|
|
80
|
+
".md",
|
|
81
|
+
".rst",
|
|
82
|
+
".c",
|
|
83
|
+
".cpp",
|
|
84
|
+
".cs",
|
|
85
|
+
".go",
|
|
86
|
+
".java",
|
|
87
|
+
".js",
|
|
88
|
+
".ts",
|
|
89
|
+
".php",
|
|
90
|
+
".py",
|
|
91
|
+
".rb",
|
|
92
|
+
".sh",
|
|
93
|
+
".tex",
|
|
94
|
+
".html",
|
|
95
|
+
".css",
|
|
96
|
+
}
|
|
97
|
+
if suffix in text_exts:
|
|
98
|
+
return "text"
|
|
99
|
+
|
|
100
|
+
raise ValueError(f"Unsupported file type: {file_path.name} (ext={suffix})")
|
|
101
|
+
|
|
102
|
+
# ------------------------------------------------------------------ #
|
|
103
|
+
# Public entry-point
|
|
104
|
+
# ------------------------------------------------------------------ #
|
|
105
|
+
async def process_file(self, file_path: Union[str, Path]) -> Dict[str, Any]:
|
|
106
|
+
"""Validate → detect → dispatch to the appropriate processor."""
|
|
107
|
+
file_path = Path(file_path)
|
|
108
|
+
self.validate_file(file_path)
|
|
109
|
+
ftype = self._detect_file_type(file_path)
|
|
110
|
+
|
|
111
|
+
dispatch_map = {
|
|
112
|
+
"pdf": self._process_pdf,
|
|
113
|
+
"text": self._process_text,
|
|
114
|
+
"csv": self._process_csv,
|
|
115
|
+
"office": self._process_office,
|
|
116
|
+
"json": self._process_json,
|
|
117
|
+
}
|
|
118
|
+
if ftype not in dispatch_map:
|
|
119
|
+
raise ValueError(f"Unsupported file type: {file_path.suffix}")
|
|
120
|
+
|
|
121
|
+
return await dispatch_map[ftype](file_path)
|
|
122
|
+
|
|
123
|
+
# ------------------------------------------------------------------ #
|
|
124
|
+
# PDF
|
|
125
|
+
# ------------------------------------------------------------------ #
|
|
126
|
+
async def _process_pdf(self, file_path: Path) -> Dict[str, Any]:
|
|
127
|
+
page_chunks, doc_meta = await self._extract_text(file_path)
|
|
128
|
+
all_chunks, line_data = [], []
|
|
129
|
+
|
|
130
|
+
for page_text, page_num, line_nums in page_chunks:
|
|
131
|
+
lines = page_text.split("\n")
|
|
132
|
+
buf, buf_lines, length = [], [], 0
|
|
133
|
+
for line, ln in zip(lines, line_nums):
|
|
134
|
+
l = len(line) + 1
|
|
135
|
+
if length + l <= self.chunk_size:
|
|
136
|
+
buf.append(line)
|
|
137
|
+
buf_lines.append(ln)
|
|
138
|
+
length += l
|
|
139
|
+
else:
|
|
140
|
+
if buf:
|
|
141
|
+
all_chunks.append("\n".join(buf))
|
|
142
|
+
line_data.append({"page": page_num, "lines": buf_lines})
|
|
143
|
+
buf, buf_lines, length = [], [], 0
|
|
144
|
+
for piece in self._split_oversized_chunk(line):
|
|
145
|
+
all_chunks.append(piece)
|
|
146
|
+
line_data.append({"page": page_num, "lines": [ln]})
|
|
147
|
+
if buf:
|
|
148
|
+
all_chunks.append("\n".join(buf))
|
|
149
|
+
line_data.append({"page": page_num, "lines": buf_lines})
|
|
150
|
+
|
|
151
|
+
vectors = await asyncio.gather(
|
|
152
|
+
*[self._encode_chunk_async(c) for c in all_chunks]
|
|
153
|
+
)
|
|
154
|
+
return {
|
|
155
|
+
"content": "\n\n".join(all_chunks),
|
|
156
|
+
"metadata": {
|
|
157
|
+
**doc_meta,
|
|
158
|
+
"source": str(file_path),
|
|
159
|
+
"chunks": len(all_chunks),
|
|
160
|
+
"type": "pdf",
|
|
161
|
+
},
|
|
162
|
+
"chunks": all_chunks,
|
|
163
|
+
"vectors": [v.tolist() for v in vectors],
|
|
164
|
+
"line_data": line_data,
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
# ------------------------------------------------------------------ #
|
|
168
|
+
# Plain-text / code / markup
|
|
169
|
+
# ------------------------------------------------------------------ #
|
|
170
|
+
async def _process_text(self, file_path: Path) -> Dict[str, Any]:
|
|
171
|
+
text, extra_meta, _ = await self._extract_text(file_path)
|
|
172
|
+
chunks = self._chunk_text(text)
|
|
173
|
+
vectors = await asyncio.gather(*[self._encode_chunk_async(c) for c in chunks])
|
|
174
|
+
return {
|
|
175
|
+
"content": text,
|
|
176
|
+
"metadata": {
|
|
177
|
+
**extra_meta,
|
|
178
|
+
"source": str(file_path),
|
|
179
|
+
"chunks": len(chunks),
|
|
180
|
+
"type": "text",
|
|
181
|
+
},
|
|
182
|
+
"chunks": chunks,
|
|
183
|
+
"vectors": [v.tolist() for v in vectors],
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
# ------------------------------------------------------------------ #
|
|
187
|
+
# CSV
|
|
188
|
+
# ------------------------------------------------------------------ #
|
|
189
|
+
async def _process_csv(
|
|
190
|
+
self, file_path: Path, text_field: str = "description"
|
|
191
|
+
) -> Dict[str, Any]:
|
|
192
|
+
rows, texts, metas = [], [], []
|
|
193
|
+
with file_path.open(newline="", encoding="utf-8") as f:
|
|
194
|
+
reader = csv.DictReader(f)
|
|
195
|
+
for row in reader:
|
|
196
|
+
txt = row.get(text_field, "").strip()
|
|
197
|
+
if not txt:
|
|
198
|
+
continue
|
|
199
|
+
texts.append(txt)
|
|
200
|
+
metas.append({k: v for k, v in row.items() if k != text_field and v})
|
|
201
|
+
|
|
202
|
+
vectors = await asyncio.gather(*[self._encode_chunk_async(t) for t in texts])
|
|
203
|
+
return {
|
|
204
|
+
"content": None,
|
|
205
|
+
"metadata": {"source": str(file_path), "rows": len(texts), "type": "csv"},
|
|
206
|
+
"chunks": texts,
|
|
207
|
+
"vectors": [v.tolist() for v in vectors],
|
|
208
|
+
"csv_row_metadata": metas,
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
# ------------------------------------------------------------------ #
|
|
212
|
+
# Office docs (.doc/.docx/.pptx)
|
|
213
|
+
# ------------------------------------------------------------------ #
|
|
214
|
+
async def _process_office(self, file_path: Path) -> Dict[str, Any]:
|
|
215
|
+
loop = asyncio.get_event_loop()
|
|
216
|
+
if file_path.suffix.lower() in {".doc", ".docx"}:
|
|
217
|
+
text = await loop.run_in_executor(
|
|
218
|
+
self._executor, self._read_docx, file_path
|
|
219
|
+
)
|
|
220
|
+
else: # .pptx
|
|
221
|
+
text = await loop.run_in_executor(
|
|
222
|
+
self._executor, self._read_pptx, file_path
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
chunks = self._chunk_text(text)
|
|
226
|
+
vectors = await asyncio.gather(*[self._encode_chunk_async(c) for c in chunks])
|
|
227
|
+
return {
|
|
228
|
+
"content": text,
|
|
229
|
+
"metadata": {
|
|
230
|
+
"source": str(file_path),
|
|
231
|
+
"chunks": len(chunks),
|
|
232
|
+
"type": "office",
|
|
233
|
+
},
|
|
234
|
+
"chunks": chunks,
|
|
235
|
+
"vectors": [v.tolist() for v in vectors],
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
# ------------------------------------------------------------------ #
|
|
239
|
+
# JSON
|
|
240
|
+
# ------------------------------------------------------------------ #
|
|
241
|
+
async def _process_json(self, file_path: Path) -> Dict[str, Any]:
|
|
242
|
+
text = await asyncio.get_event_loop().run_in_executor(
|
|
243
|
+
self._executor, self._read_json, file_path
|
|
244
|
+
)
|
|
245
|
+
chunks = self._chunk_text(text)
|
|
246
|
+
vectors = await asyncio.gather(*[self._encode_chunk_async(c) for c in chunks])
|
|
247
|
+
return {
|
|
248
|
+
"content": text,
|
|
249
|
+
"metadata": {
|
|
250
|
+
"source": str(file_path),
|
|
251
|
+
"chunks": len(chunks),
|
|
252
|
+
"type": "json",
|
|
253
|
+
},
|
|
254
|
+
"chunks": chunks,
|
|
255
|
+
"vectors": [v.tolist() for v in vectors],
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
# ------------------------------------------------------------------ #
|
|
259
|
+
# Shared helpers
|
|
260
|
+
# ------------------------------------------------------------------ #
|
|
261
|
+
async def _extract_text(self, file_path: Path) -> Union[
|
|
262
|
+
Tuple[List[Tuple[str, int, List[int]]], Dict[str, Any]],
|
|
263
|
+
Tuple[str, Dict[str, Any], List[int]],
|
|
264
|
+
]:
|
|
265
|
+
loop = asyncio.get_event_loop()
|
|
266
|
+
if file_path.suffix.lower() == ".pdf":
|
|
267
|
+
return await loop.run_in_executor(
|
|
268
|
+
self._executor, self._extract_pdf_text, file_path
|
|
269
|
+
)
|
|
270
|
+
else:
|
|
271
|
+
text = await loop.run_in_executor(
|
|
272
|
+
self._executor, self._read_text_file, file_path
|
|
273
|
+
)
|
|
274
|
+
return text, {}, []
|
|
275
|
+
|
|
276
|
+
def _extract_pdf_text(self, file_path: Path):
|
|
277
|
+
page_chunks, meta = [], {}
|
|
278
|
+
with pdfplumber.open(file_path) as pdf:
|
|
279
|
+
meta.update(
|
|
280
|
+
{
|
|
281
|
+
"author": pdf.metadata.get("Author", ""),
|
|
282
|
+
"title": pdf.metadata.get("Title", file_path.stem),
|
|
283
|
+
"page_count": len(pdf.pages),
|
|
284
|
+
}
|
|
285
|
+
)
|
|
286
|
+
for i, page in enumerate(pdf.pages, start=1):
|
|
287
|
+
lines = page.extract_text_lines()
|
|
288
|
+
sorted_lines = sorted(lines, key=lambda x: x["top"])
|
|
289
|
+
txts, nums = [], []
|
|
290
|
+
for ln_idx, L in enumerate(sorted_lines, start=1):
|
|
291
|
+
t = L.get("text", "").strip()
|
|
292
|
+
if t:
|
|
293
|
+
txts.append(t)
|
|
294
|
+
nums.append(ln_idx)
|
|
295
|
+
if txts:
|
|
296
|
+
page_chunks.append(("\n".join(txts), i, nums))
|
|
297
|
+
return page_chunks, meta
|
|
298
|
+
|
|
299
|
+
def _read_text_file(self, file_path: Path) -> str:
|
|
300
|
+
try:
|
|
301
|
+
return file_path.read_text(encoding="utf-8")
|
|
302
|
+
except UnicodeDecodeError:
|
|
303
|
+
return file_path.read_text(encoding="latin-1")
|
|
304
|
+
|
|
305
|
+
def _read_docx(self, path: Path) -> str:
|
|
306
|
+
doc = Document(path)
|
|
307
|
+
return "\n".join(p.text for p in doc.paragraphs if p.text.strip())
|
|
308
|
+
|
|
309
|
+
def _read_pptx(self, path: Path) -> str:
|
|
310
|
+
prs = Presentation(path)
|
|
311
|
+
slides = []
|
|
312
|
+
for slide in prs.slides:
|
|
313
|
+
chunks = [sh.text for sh in slide.shapes if hasattr(sh, "text")]
|
|
314
|
+
slides.append("\n".join(filter(None, chunks)))
|
|
315
|
+
return "\n\n".join(slides)
|
|
316
|
+
|
|
317
|
+
def _read_json(self, path: Path) -> str:
|
|
318
|
+
obj = json.loads(path.read_text(encoding="utf-8"))
|
|
319
|
+
pretty = json.dumps(obj, indent=2, ensure_ascii=False)
|
|
320
|
+
return "\n".join(textwrap.wrap(pretty, width=120))
|
|
321
|
+
|
|
322
|
+
async def _encode_chunk_async(self, chunk: str) -> np.ndarray:
|
|
323
|
+
return await asyncio.get_event_loop().run_in_executor(
|
|
324
|
+
self._executor,
|
|
325
|
+
lambda: self.embedding_model.encode(
|
|
326
|
+
[chunk],
|
|
327
|
+
convert_to_numpy=True,
|
|
328
|
+
truncate="model_max_length",
|
|
329
|
+
normalize_embeddings=True,
|
|
330
|
+
show_progress_bar=False,
|
|
331
|
+
)[0],
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
# ------------------------------------------------------------------ #
|
|
335
|
+
# Text chunking helpers
|
|
336
|
+
# ------------------------------------------------------------------ #
|
|
337
|
+
def _chunk_text(self, text: str) -> List[str]:
|
|
338
|
+
sentences = re.split(r"(?<=[\.!?])\s+", text)
|
|
339
|
+
chunks, buf, length = [], [], 0
|
|
340
|
+
for sent in sentences:
|
|
341
|
+
slen = len(sent) + 1
|
|
342
|
+
if length + slen <= self.chunk_size:
|
|
343
|
+
buf.append(sent)
|
|
344
|
+
length += slen
|
|
345
|
+
else:
|
|
346
|
+
if buf:
|
|
347
|
+
chunks.append(" ".join(buf))
|
|
348
|
+
buf, length = [], 0
|
|
349
|
+
while len(sent) > self.chunk_size:
|
|
350
|
+
part, sent = sent[: self.chunk_size], sent[self.chunk_size :]
|
|
351
|
+
chunks.append(part)
|
|
352
|
+
buf, length = [sent], len(sent)
|
|
353
|
+
if buf:
|
|
354
|
+
chunks.append(" ".join(buf))
|
|
355
|
+
return chunks
|
|
356
|
+
|
|
357
|
+
def _split_oversized_chunk(self, chunk: str, tokens: List[str] = None) -> List[str]:
|
|
358
|
+
if tokens is None:
|
|
359
|
+
tokens = self.embedding_model.tokenizer.tokenize(chunk)
|
|
360
|
+
out = []
|
|
361
|
+
for i in range(0, len(tokens), self.effective_max_length):
|
|
362
|
+
seg = tokens[i : i + self.effective_max_length]
|
|
363
|
+
out.append(self.embedding_model.tokenizer.convert_tokens_to_string(seg))
|
|
364
|
+
return out
|
|
@@ -11,9 +11,15 @@ LOG = UtilsInterface.LoggingUtility()
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class SynchronousInferenceStream:
|
|
14
|
+
# ------------------------------------------------------------ #
|
|
15
|
+
# GLOBAL EVENT LOOP (single hidden thread for sync wrapper)
|
|
16
|
+
# ------------------------------------------------------------ #
|
|
14
17
|
_GLOBAL_LOOP = asyncio.new_event_loop()
|
|
15
18
|
asyncio.set_event_loop(_GLOBAL_LOOP)
|
|
16
19
|
|
|
20
|
+
# ------------------------------------------------------------ #
|
|
21
|
+
# Init / setup
|
|
22
|
+
# ------------------------------------------------------------ #
|
|
17
23
|
def __init__(self, inference) -> None:
|
|
18
24
|
self.inference_client = inference
|
|
19
25
|
self.user_id: Optional[str] = None
|
|
@@ -32,6 +38,7 @@ class SynchronousInferenceStream:
|
|
|
32
38
|
run_id: str,
|
|
33
39
|
api_key: str,
|
|
34
40
|
) -> None:
|
|
41
|
+
"""Populate IDs once, so callers only provide provider/model."""
|
|
35
42
|
self.user_id = user_id
|
|
36
43
|
self.thread_id = thread_id
|
|
37
44
|
self.assistant_id = assistant_id
|
|
@@ -39,7 +46,10 @@ class SynchronousInferenceStream:
|
|
|
39
46
|
self.run_id = run_id
|
|
40
47
|
self.api_key = api_key
|
|
41
48
|
|
|
42
|
-
|
|
49
|
+
# ------------------------------------------------------------ #
|
|
50
|
+
# Core sync-to-async streaming wrapper
|
|
51
|
+
# ------------------------------------------------------------ #
|
|
52
|
+
def stream_chunks( # noqa: PLR0915
|
|
43
53
|
self,
|
|
44
54
|
provider: str,
|
|
45
55
|
model: str,
|
|
@@ -48,9 +58,15 @@ class SynchronousInferenceStream:
|
|
|
48
58
|
timeout_per_chunk: float = 280.0,
|
|
49
59
|
suppress_fc: bool = True,
|
|
50
60
|
) -> Generator[dict, None, None]:
|
|
61
|
+
"""
|
|
62
|
+
Sync generator that mirrors async `inference_client.stream_inference_response`
|
|
63
|
+
but (optionally) removes raw <fc> … </fc> output *and* JSON
|
|
64
|
+
`{"type": "function_call" …}` objects from the stream.
|
|
65
|
+
"""
|
|
51
66
|
|
|
52
67
|
resolved_api_key = api_key or self.api_key
|
|
53
68
|
|
|
69
|
+
# ---------- async inner generator -------------------------------- #
|
|
54
70
|
async def _stream_chunks_async():
|
|
55
71
|
async for chk in self.inference_client.stream_inference_response(
|
|
56
72
|
provider=provider,
|
|
@@ -65,6 +81,7 @@ class SynchronousInferenceStream:
|
|
|
65
81
|
|
|
66
82
|
agen = _stream_chunks_async().__aiter__()
|
|
67
83
|
|
|
84
|
+
# ---------- FC-suppressor plumbing -------------------------------- #
|
|
68
85
|
if suppress_fc:
|
|
69
86
|
_suppressor = FunctionCallSuppressor()
|
|
70
87
|
_peek_gate = PeekGate(_suppressor)
|
|
@@ -72,11 +89,15 @@ class SynchronousInferenceStream:
|
|
|
72
89
|
def _filter_text(txt: str) -> str:
|
|
73
90
|
return _peek_gate.feed(txt)
|
|
74
91
|
|
|
92
|
+
LOG.debug("[SyncStream] Function-call suppression ACTIVE")
|
|
75
93
|
else:
|
|
76
94
|
|
|
77
95
|
def _filter_text(txt: str) -> str:
|
|
78
96
|
return txt
|
|
79
97
|
|
|
98
|
+
LOG.debug("[SyncStream] Function-call suppression DISABLED")
|
|
99
|
+
|
|
100
|
+
# ---------- helper to flush residual buffered text ---------------- #
|
|
80
101
|
def _drain_filters() -> Optional[dict]:
|
|
81
102
|
if not suppress_fc:
|
|
82
103
|
return None
|
|
@@ -97,18 +118,17 @@ class SynchronousInferenceStream:
|
|
|
97
118
|
}
|
|
98
119
|
return None
|
|
99
120
|
|
|
121
|
+
# ---------- main sync loop ---------------------------------------- #
|
|
100
122
|
while True:
|
|
101
123
|
try:
|
|
102
124
|
chunk = self._GLOBAL_LOOP.run_until_complete(
|
|
103
125
|
asyncio.wait_for(agen.__anext__(), timeout=timeout_per_chunk)
|
|
104
126
|
)
|
|
105
127
|
|
|
106
|
-
# Always attach run_id
|
|
128
|
+
# Always attach run_id for front-end helpers
|
|
107
129
|
chunk["run_id"] = self.run_id
|
|
108
130
|
|
|
109
|
-
#
|
|
110
|
-
# allow status chunks to bypass suppression suppression
|
|
111
|
-
# -------------------------------------------------------
|
|
131
|
+
# ----- bypass filters for status / code-exec related -------- #
|
|
112
132
|
if chunk.get("type") == "status":
|
|
113
133
|
yield chunk
|
|
114
134
|
continue
|
|
@@ -124,9 +144,19 @@ class SynchronousInferenceStream:
|
|
|
124
144
|
yield chunk
|
|
125
145
|
continue
|
|
126
146
|
|
|
147
|
+
# ----- NEW: swallow raw JSON function_call objects ---------- #
|
|
148
|
+
if suppress_fc and chunk.get("type") == "function_call":
|
|
149
|
+
LOG.debug(
|
|
150
|
+
"[SyncStream] Swallowing JSON function_call chunk: %s",
|
|
151
|
+
chunk.get("name") or "<unnamed>",
|
|
152
|
+
)
|
|
153
|
+
continue
|
|
154
|
+
|
|
155
|
+
# ----- text-level suppression ------------------------------- #
|
|
127
156
|
if isinstance(chunk.get("content"), str):
|
|
128
157
|
chunk["content"] = _filter_text(chunk["content"])
|
|
129
158
|
if chunk["content"] == "":
|
|
159
|
+
# Entire segment was inside <fc> … </fc>
|
|
130
160
|
continue
|
|
131
161
|
|
|
132
162
|
yield chunk
|
|
@@ -134,21 +164,26 @@ class SynchronousInferenceStream:
|
|
|
134
164
|
except StopAsyncIteration:
|
|
135
165
|
if tail := _drain_filters():
|
|
136
166
|
yield tail
|
|
137
|
-
LOG.info("Stream completed normally.")
|
|
167
|
+
LOG.info("[SyncStream] Stream completed normally.")
|
|
138
168
|
break
|
|
139
169
|
|
|
140
170
|
except asyncio.TimeoutError:
|
|
141
171
|
if tail := _drain_filters():
|
|
142
172
|
yield tail
|
|
143
|
-
LOG.error("[
|
|
173
|
+
LOG.error("[SyncStream] Timeout waiting for next chunk.")
|
|
144
174
|
break
|
|
145
175
|
|
|
146
|
-
except Exception as exc:
|
|
176
|
+
except Exception as exc: # noqa: BLE001
|
|
147
177
|
if tail := _drain_filters():
|
|
148
178
|
yield tail
|
|
149
|
-
LOG.error(
|
|
179
|
+
LOG.error(
|
|
180
|
+
"[SyncStream] Unexpected streaming error: %s", exc, exc_info=True
|
|
181
|
+
)
|
|
150
182
|
break
|
|
151
183
|
|
|
184
|
+
# ------------------------------------------------------------ #
|
|
185
|
+
# House-keeping
|
|
186
|
+
# ------------------------------------------------------------ #
|
|
152
187
|
@classmethod
|
|
153
188
|
def shutdown_loop(cls) -> None:
|
|
154
189
|
if cls._GLOBAL_LOOP and not cls._GLOBAL_LOOP.is_closed():
|
|
@@ -21,17 +21,19 @@ import torch
|
|
|
21
21
|
from docx import Document
|
|
22
22
|
from PIL import Image
|
|
23
23
|
from pptx import Presentation
|
|
24
|
-
from
|
|
25
|
-
from
|
|
24
|
+
from projectdavid_common import UtilsInterface
|
|
25
|
+
from sentence_transformers import SentenceTransformer
|
|
26
|
+
|
|
27
|
+
# from transformers import Blip2ForConditionalGeneration, Blip2Processor
|
|
28
|
+
|
|
29
|
+
# from ultralytics import YOLO
|
|
26
30
|
|
|
27
31
|
# OCR fallback – optional
|
|
28
|
-
try:
|
|
29
|
-
import pytesseract # noqa: F401 # pylint: disable=unused-import
|
|
30
|
-
except ImportError:
|
|
31
|
-
pytesseract = None
|
|
32
|
+
# try:
|
|
33
|
+
# import pytesseract # noqa: F401 # pylint: disable=unused-import
|
|
34
|
+
# except ImportError:
|
|
35
|
+
# pytesseract = None
|
|
32
36
|
|
|
33
|
-
from projectdavid_common import UtilsInterface
|
|
34
|
-
from sentence_transformers import SentenceTransformer
|
|
35
37
|
|
|
36
38
|
log = UtilsInterface.LoggingUtility()
|
|
37
39
|
|
|
@@ -81,14 +83,6 @@ class FileProcessor:
|
|
|
81
83
|
self.device = torch.device("cpu")
|
|
82
84
|
self.torch_dtype = torch.float32
|
|
83
85
|
|
|
84
|
-
# Feature flags
|
|
85
|
-
self.use_ocr = use_ocr and pytesseract is not None
|
|
86
|
-
self.use_detection = use_detection
|
|
87
|
-
if use_ocr and pytesseract is None:
|
|
88
|
-
log.warning("OCR requested but pytesseract not installed – skipping.")
|
|
89
|
-
if self.use_detection:
|
|
90
|
-
self.detector = YOLO("yolov8x.pt").to(self.device)
|
|
91
|
-
|
|
92
86
|
# Text embedder
|
|
93
87
|
self.embedding_model_name = "paraphrase-MiniLM-L6-v2"
|
|
94
88
|
self.embedding_model = SentenceTransformer(self.embedding_model_name)
|
|
@@ -100,35 +94,13 @@ class FileProcessor:
|
|
|
100
94
|
self.effective_max_length = self.max_seq_length - self.special_tokens_count
|
|
101
95
|
self.chunk_size = min(chunk_size, self.effective_max_length * 4)
|
|
102
96
|
|
|
103
|
-
# Image embedder
|
|
104
|
-
self.clip_model, _, self.clip_preprocess = (
|
|
105
|
-
open_clip.create_model_and_transforms(
|
|
106
|
-
image_model_name,
|
|
107
|
-
pretrained="laion2b_s32b_b79k",
|
|
108
|
-
precision="fp16" if self.device.type == "cuda" else "fp32",
|
|
109
|
-
)
|
|
110
|
-
)
|
|
111
|
-
self.clip_model = self.clip_model.to(self.device).eval()
|
|
112
|
-
self.clip_tokenizer = open_clip.get_tokenizer(image_model_name)
|
|
113
|
-
|
|
114
|
-
# Caption generator
|
|
115
|
-
self.blip_processor = Blip2Processor.from_pretrained(caption_model_name)
|
|
116
|
-
self.blip_model = (
|
|
117
|
-
Blip2ForConditionalGeneration.from_pretrained(
|
|
118
|
-
caption_model_name,
|
|
119
|
-
torch_dtype=self.torch_dtype,
|
|
120
|
-
)
|
|
121
|
-
.to(self.device)
|
|
122
|
-
.eval()
|
|
123
|
-
)
|
|
124
|
-
|
|
125
97
|
# Executor & logging
|
|
126
98
|
self._executor = ThreadPoolExecutor(max_workers=max_workers)
|
|
127
99
|
log.info(
|
|
128
100
|
"FileProcessor ready (device=%s, OCR=%s, detection=%s)",
|
|
129
101
|
self.device,
|
|
130
|
-
self.use_ocr,
|
|
131
|
-
self.use_detection,
|
|
102
|
+
# self.use_ocr,
|
|
103
|
+
# self.use_detection,
|
|
132
104
|
)
|
|
133
105
|
|
|
134
106
|
# ------------------------------------------------------------------ #
|
|
@@ -190,90 +162,6 @@ class FileProcessor:
|
|
|
190
162
|
ftype = self._detect_file_type(path)
|
|
191
163
|
return await getattr(self, f"_process_{ftype}")(path)
|
|
192
164
|
|
|
193
|
-
# ------------------------------------------------------------------ #
|
|
194
|
-
# Image processing (OpenCLIP + BLIP-2 + OCR + YOLO)
|
|
195
|
-
# ------------------------------------------------------------------ #
|
|
196
|
-
async def _process_image(self, file_path: Path) -> Dict[str, Any]:
|
|
197
|
-
loop = asyncio.get_event_loop()
|
|
198
|
-
img = await loop.run_in_executor(self._executor, Image.open, file_path)
|
|
199
|
-
|
|
200
|
-
# 1) Image vector
|
|
201
|
-
def enc_img():
|
|
202
|
-
with torch.no_grad():
|
|
203
|
-
t = self.clip_preprocess(img).unsqueeze(0).to(self.device)
|
|
204
|
-
v = self.clip_model.encode_image(t).squeeze()
|
|
205
|
-
return (v / v.norm()).float().cpu().numpy()
|
|
206
|
-
|
|
207
|
-
image_vec = await loop.run_in_executor(self._executor, enc_img)
|
|
208
|
-
|
|
209
|
-
# 2) Caption
|
|
210
|
-
def gen_cap():
|
|
211
|
-
inp = self.blip_processor(images=img, return_tensors="pt").to(self.device)
|
|
212
|
-
with torch.no_grad():
|
|
213
|
-
ids = self.blip_model.generate(**inp, max_new_tokens=50)
|
|
214
|
-
return self.blip_processor.decode(ids[0], skip_special_tokens=True)
|
|
215
|
-
|
|
216
|
-
caption = await loop.run_in_executor(self._executor, gen_cap)
|
|
217
|
-
|
|
218
|
-
# 3) OCR
|
|
219
|
-
if self.use_ocr:
|
|
220
|
-
text = await loop.run_in_executor(
|
|
221
|
-
self._executor, pytesseract.image_to_string, img
|
|
222
|
-
)
|
|
223
|
-
if t := text.strip():
|
|
224
|
-
caption += "\n" + t
|
|
225
|
-
|
|
226
|
-
# 4) Caption vector
|
|
227
|
-
def enc_txt():
|
|
228
|
-
with torch.no_grad():
|
|
229
|
-
tok = self.clip_tokenizer(caption).unsqueeze(0).to(self.device)
|
|
230
|
-
v = self.clip_model.encode_text(tok).squeeze()
|
|
231
|
-
return (v / v.norm()).float().cpu().numpy()
|
|
232
|
-
|
|
233
|
-
caption_vec = await loop.run_in_executor(self._executor, enc_txt)
|
|
234
|
-
|
|
235
|
-
# 5) YOLO regions
|
|
236
|
-
region_vectors = []
|
|
237
|
-
if self.use_detection:
|
|
238
|
-
dets = self.detector(img)[0]
|
|
239
|
-
for box in dets.boxes:
|
|
240
|
-
x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().tolist())
|
|
241
|
-
crop = img.crop((x1, y1, x2, y2))
|
|
242
|
-
vec = self.encode_image(crop)
|
|
243
|
-
region_vectors.append(
|
|
244
|
-
{
|
|
245
|
-
"vector": vec.tolist(),
|
|
246
|
-
"bbox": [x1, y1, x2, y2],
|
|
247
|
-
"label": dets.names[int(box.cls)],
|
|
248
|
-
"conf": float(box.conf),
|
|
249
|
-
}
|
|
250
|
-
)
|
|
251
|
-
|
|
252
|
-
# Metadata
|
|
253
|
-
sha = hashlib.sha256(file_path.read_bytes()).hexdigest()
|
|
254
|
-
w, h = img.size
|
|
255
|
-
meta = {
|
|
256
|
-
"source": str(file_path),
|
|
257
|
-
"type": "image",
|
|
258
|
-
"width": w,
|
|
259
|
-
"height": h,
|
|
260
|
-
"mime": f"image/{file_path.suffix.lstrip('.')}",
|
|
261
|
-
"sha256": sha,
|
|
262
|
-
"embedding_model": "openclip-vit-h-14",
|
|
263
|
-
"caption": caption,
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
result = {
|
|
267
|
-
"content": None,
|
|
268
|
-
"metadata": meta,
|
|
269
|
-
"chunks": [caption],
|
|
270
|
-
"vectors": [image_vec.tolist()],
|
|
271
|
-
"caption_vector": caption_vec.tolist(),
|
|
272
|
-
}
|
|
273
|
-
if region_vectors:
|
|
274
|
-
result["region_vectors"] = region_vectors
|
|
275
|
-
return result
|
|
276
|
-
|
|
277
165
|
# ------------------------------------------------------------------ #
|
|
278
166
|
# PDF
|
|
279
167
|
# ------------------------------------------------------------------ #
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: projectdavid
|
|
3
|
-
Version: 1.33.
|
|
3
|
+
Version: 1.33.14
|
|
4
4
|
Summary: Python SDK for interacting with the Entities Assistant API.
|
|
5
5
|
Author-email: Francis Neequaye Armah <francis.neequaye@projectdavid.co.uk>
|
|
6
6
|
License: PolyForm Noncommercial License 1.0.0
|
|
@@ -51,6 +51,7 @@ src/projectdavid/clients/tools_client.py
|
|
|
51
51
|
src/projectdavid/clients/users_client.py
|
|
52
52
|
src/projectdavid/clients/vector_store_manager.py
|
|
53
53
|
src/projectdavid/clients/vectors.py
|
|
54
|
+
src/projectdavid/clients/vision-file_processor.py
|
|
54
55
|
src/projectdavid/constants/platform.py
|
|
55
56
|
src/projectdavid/services/logging_service.py
|
|
56
57
|
src/projectdavid/synthesis/__init__.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/clients/vector_store_manager.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/utils/function_call_suppressor.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid/utils/vector_search_formatter.py
RENAMED
|
File without changes
|
{projectdavid-1.33.12 → projectdavid-1.33.14}/src/projectdavid.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|