docsgraph 0.1.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. cairn/__init__.py +5 -0
  2. cairn/bench/__init__.py +37 -0
  3. cairn/bench/baseline.py +236 -0
  4. cairn/bench/dataset.py +109 -0
  5. cairn/bench/judge.py +126 -0
  6. cairn/bench/metrics.py +32 -0
  7. cairn/bench/report.py +143 -0
  8. cairn/bench/runner.py +219 -0
  9. cairn/cli/__init__.py +5 -0
  10. cairn/cli/app.py +776 -0
  11. cairn/cli/config.py +105 -0
  12. cairn/core/__init__.py +41 -0
  13. cairn/core/errors.py +68 -0
  14. cairn/core/types.py +147 -0
  15. cairn/embed/__init__.py +17 -0
  16. cairn/embed/base.py +31 -0
  17. cairn/embed/doubao.py +167 -0
  18. cairn/embed/fake.py +36 -0
  19. cairn/embed/openai_compatible.py +155 -0
  20. cairn/engine/__init__.py +18 -0
  21. cairn/engine/indexer.py +298 -0
  22. cairn/engine/manifest.py +83 -0
  23. cairn/entity/__init__.py +21 -0
  24. cairn/entity/base.py +52 -0
  25. cairn/entity/fake.py +34 -0
  26. cairn/entity/heuristic.py +148 -0
  27. cairn/index/__init__.py +39 -0
  28. cairn/index/entities.py +244 -0
  29. cairn/index/summaries.py +269 -0
  30. cairn/index/tree.py +274 -0
  31. cairn/index/vectors.py +287 -0
  32. cairn/index/xrefs.py +195 -0
  33. cairn/ingest/__init__.py +36 -0
  34. cairn/ingest/base.py +46 -0
  35. cairn/ingest/markdown.py +244 -0
  36. cairn/ingest/markitdown.py +145 -0
  37. cairn/ingest/pdf.py +357 -0
  38. cairn/inspection.py +971 -0
  39. cairn/mcp/__init__.py +12 -0
  40. cairn/mcp/schemas.py +547 -0
  41. cairn/mcp/server.py +363 -0
  42. cairn/providers.py +50 -0
  43. cairn/py.typed +0 -0
  44. cairn/repo.py +1486 -0
  45. cairn/repo_search.py +1505 -0
  46. cairn/summarize/__init__.py +18 -0
  47. cairn/summarize/base.py +56 -0
  48. cairn/summarize/cache.py +66 -0
  49. cairn/summarize/fake.py +43 -0
  50. cairn/summarize/openai_compatible.py +148 -0
  51. cairn/summarize/prompts.py +73 -0
  52. cairn/tools/__init__.py +31 -0
  53. cairn/tools/base.py +126 -0
  54. cairn/tools/find_mentions.py +93 -0
  55. cairn/tools/get_related.py +140 -0
  56. cairn/tools/get_section.py +130 -0
  57. cairn/tools/outline.py +75 -0
  58. cairn/tools/read_range.py +94 -0
  59. cairn/tools/search_keyword.py +94 -0
  60. cairn/tools/search_semantic.py +181 -0
  61. cairn/xref/__init__.py +24 -0
  62. cairn/xref/base.py +50 -0
  63. cairn/xref/fake.py +40 -0
  64. cairn/xref/heuristic.py +217 -0
  65. docsgraph-0.1.0a2.dist-info/METADATA +688 -0
  66. docsgraph-0.1.0a2.dist-info/RECORD +69 -0
  67. docsgraph-0.1.0a2.dist-info/WHEEL +4 -0
  68. docsgraph-0.1.0a2.dist-info/entry_points.txt +3 -0
  69. docsgraph-0.1.0a2.dist-info/licenses/LICENSE +201 -0
cairn/index/tree.py ADDED
@@ -0,0 +1,274 @@
1
+ """Tree sub-index — persistence and queries for the structural backbone.
2
+
3
+ `TreeBuilder` writes a deterministic ``tree.json`` from a parsed
4
+ :class:`Document`. `Tree` loads and queries it.
5
+
6
+ The tree is the primary navigation structure (ARCHITECTURE.md §2.1). All other
7
+ sub-indexes key into the ``section_id`` namespace it defines.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ from collections.abc import Iterator
14
+ from datetime import datetime
15
+ from pathlib import Path
16
+ from typing import Any, Final
17
+
18
+ from cairn.core.errors import IndexBuildError, IndexNotFoundError
19
+ from cairn.core.types import Document, SectionNode, Span
20
+
21
+ TREE_FILENAME: Final = "tree.json"
22
+ TREE_FORMAT_VERSION: Final = 1
23
+
24
+
25
+ class TreeBuilder:
26
+ """Writes the structural tree of a Document to ``tree.json``."""
27
+
28
+ def build(self, document: Document, *, out_dir: Path) -> Path:
29
+ """Serialize ``document.sections`` into ``out_dir/tree.json``.
30
+
31
+ Args:
32
+ document: The parsed document. Its `sections` must form a valid
33
+ forest (every non-root section's `parent` exists; every
34
+ referenced `child` exists).
35
+ out_dir: Directory to write into. Created if it does not exist.
36
+
37
+ Returns:
38
+ The path to the written ``tree.json``.
39
+ """
40
+ self._validate_tree(document)
41
+ out_dir.mkdir(parents=True, exist_ok=True)
42
+ path = out_dir / TREE_FILENAME
43
+
44
+ payload: dict[str, Any] = {
45
+ "format_version": TREE_FORMAT_VERSION,
46
+ "doc_id": document.id,
47
+ "source_path": str(document.source_path),
48
+ "source_hash": document.source_hash,
49
+ "indexed_at": document.indexed_at.isoformat(),
50
+ "cairn_version": document.cairn_version,
51
+ "sections": [_section_to_dict(s) for s in document.sections],
52
+ }
53
+
54
+ with path.open("w", encoding="utf-8") as fh:
55
+ json.dump(payload, fh, ensure_ascii=False, indent=2, sort_keys=False)
56
+ fh.write("\n")
57
+ return path
58
+
59
+ @staticmethod
60
+ def _validate_tree(document: Document) -> None:
61
+ seen_ids: set[str] = set()
62
+ for section in document.sections:
63
+ if section.id in seen_ids:
64
+ msg = f"duplicate section id in document: {section.id!r}"
65
+ raise IndexBuildError(msg, details={"section_id": section.id})
66
+ seen_ids.add(section.id)
67
+
68
+ for section in document.sections:
69
+ if section.parent is not None and section.parent not in seen_ids:
70
+ msg = (
71
+ f"section {section.id!r} references unknown parent "
72
+ f"{section.parent!r}"
73
+ )
74
+ raise IndexBuildError(
75
+ msg,
76
+ details={"section_id": section.id, "parent": section.parent},
77
+ )
78
+ for child in section.children:
79
+ if child not in seen_ids:
80
+ msg = (
81
+ f"section {section.id!r} references unknown child "
82
+ f"{child!r}"
83
+ )
84
+ raise IndexBuildError(
85
+ msg,
86
+ details={"section_id": section.id, "child": child},
87
+ )
88
+
89
+
90
+ class Tree:
91
+ """Loaded tree index. Read-only queries against the structural backbone."""
92
+
93
+ def __init__(
94
+ self,
95
+ sections: tuple[SectionNode, ...],
96
+ *,
97
+ doc_id: str,
98
+ source_hash: str,
99
+ indexed_at: datetime,
100
+ ) -> None:
101
+ self._sections = sections
102
+ self._by_id: dict[str, SectionNode] = {s.id: s for s in sections}
103
+ self._roots: tuple[SectionNode, ...] = tuple(
104
+ s for s in sections if s.parent is None
105
+ )
106
+ self.doc_id = doc_id
107
+ self.source_hash = source_hash
108
+ self.indexed_at = indexed_at
109
+
110
+ # -- construction --------------------------------------------------------
111
+
112
+ @classmethod
113
+ def load(cls, doc_dir: Path) -> Tree:
114
+ """Load ``tree.json`` from a document directory."""
115
+ path = doc_dir / TREE_FILENAME
116
+ if not path.exists():
117
+ msg = f"tree.json not found in {doc_dir}"
118
+ raise IndexNotFoundError(msg, details={"path": str(path)})
119
+
120
+ with path.open("r", encoding="utf-8") as fh:
121
+ payload = json.load(fh)
122
+
123
+ format_version = payload.get("format_version")
124
+ if format_version != TREE_FORMAT_VERSION:
125
+ msg = (
126
+ f"unsupported tree format version: {format_version!r} "
127
+ f"(expected {TREE_FORMAT_VERSION})"
128
+ )
129
+ raise IndexNotFoundError(msg, details={"path": str(path)})
130
+
131
+ sections = tuple(_section_from_dict(d) for d in payload["sections"])
132
+ return cls(
133
+ sections,
134
+ doc_id=payload["doc_id"],
135
+ source_hash=payload["source_hash"],
136
+ indexed_at=datetime.fromisoformat(payload["indexed_at"]),
137
+ )
138
+
139
+ # -- queries -------------------------------------------------------------
140
+
141
+ def get(self, section_id: str) -> SectionNode | None:
142
+ """Look up a section by id. Returns ``None`` if absent."""
143
+ return self._by_id.get(section_id)
144
+
145
+ def require(self, section_id: str) -> SectionNode:
146
+ """Look up a section by id, raising :class:`IndexNotFoundError`."""
147
+ node = self.get(section_id)
148
+ if node is None:
149
+ msg = f"section not found: {section_id!r}"
150
+ raise IndexNotFoundError(msg, details={"section_id": section_id})
151
+ return node
152
+
153
+ def __contains__(self, section_id: object) -> bool:
154
+ return isinstance(section_id, str) and section_id in self._by_id
155
+
156
+ def __len__(self) -> int:
157
+ return len(self._sections)
158
+
159
+ def __iter__(self) -> Iterator[SectionNode]:
160
+ """Yield every section in document order."""
161
+ return iter(self._sections)
162
+
163
+ def roots(self) -> tuple[SectionNode, ...]:
164
+ """Top-level sections (those with `parent is None`)."""
165
+ return self._roots
166
+
167
+ def children_of(self, section_id: str) -> tuple[SectionNode, ...]:
168
+ """Direct children of a section, in document order."""
169
+ node = self.require(section_id)
170
+ return tuple(self._by_id[cid] for cid in node.children)
171
+
172
+ def descendants_of(self, section_id: str) -> Iterator[SectionNode]:
173
+ """Depth-first traversal of a section's descendants (excluding self)."""
174
+ node = self.require(section_id)
175
+ stack: list[str] = list(reversed(node.children))
176
+ while stack:
177
+ current_id = stack.pop()
178
+ current = self._by_id[current_id]
179
+ yield current
180
+ stack.extend(reversed(current.children))
181
+
182
+ def ancestors_of(self, section_id: str) -> Iterator[SectionNode]:
183
+ """Walk parents from the section up to the root (excluding self)."""
184
+ node = self.require(section_id)
185
+ current = node.parent
186
+ while current is not None:
187
+ parent_node = self._by_id[current]
188
+ yield parent_node
189
+ current = parent_node.parent
190
+
191
+ def outline(
192
+ self,
193
+ *,
194
+ depth: int = 2,
195
+ focus: str | None = None,
196
+ ) -> list[dict[str, Any]]:
197
+ """Return a nested outline suitable for the ``outline`` MCP tool.
198
+
199
+ Each node has: ``id``, ``title``, ``level``, ``children`` (recursively),
200
+ plus ``truncated: True`` when descendants exist beyond ``depth``.
201
+ Summaries are **not** attached here — that is the MCP tool's job after
202
+ joining with the Summaries sub-index.
203
+ """
204
+ if depth < 1 or depth > 6:
205
+ msg = f"depth must be in [1, 6]; got {depth}"
206
+ raise IndexNotFoundError(msg)
207
+
208
+ if focus is None:
209
+ roots = self._roots
210
+ base_level = 0
211
+ else:
212
+ focused = self.require(focus)
213
+ roots = (focused,)
214
+ base_level = focused.level - 1
215
+
216
+ return [self._outline_node(s, depth, base_level) for s in roots]
217
+
218
+ def _outline_node(
219
+ self,
220
+ node: SectionNode,
221
+ depth: int,
222
+ base_level: int,
223
+ ) -> dict[str, Any]:
224
+ remaining = depth - (node.level - base_level)
225
+ children_payload: list[dict[str, Any]] = []
226
+ truncated = False
227
+ if remaining > 0 and node.children:
228
+ for child_id in node.children:
229
+ child = self._by_id[child_id]
230
+ children_payload.append(self._outline_node(child, depth, base_level))
231
+ elif node.children:
232
+ truncated = True
233
+
234
+ payload: dict[str, Any] = {
235
+ "id": node.id,
236
+ "title": node.title,
237
+ "level": node.level,
238
+ "children": children_payload,
239
+ }
240
+ if truncated:
241
+ payload["truncated"] = True
242
+ return payload
243
+
244
+
245
+ # ---------------------------------------------------------------------------
246
+ # (de)serialization
247
+ # ---------------------------------------------------------------------------
248
+
249
+
250
+ def _section_to_dict(s: SectionNode) -> dict[str, Any]:
251
+ return {
252
+ "id": s.id,
253
+ "title": s.title,
254
+ "level": s.level,
255
+ "parent": s.parent,
256
+ "children": list(s.children),
257
+ "span": {"start": s.span.start, "end": s.span.end},
258
+ "path": list(s.path),
259
+ "raw_text": s.raw_text,
260
+ }
261
+
262
+
263
+ def _section_from_dict(d: dict[str, Any]) -> SectionNode:
264
+ span = d["span"]
265
+ return SectionNode(
266
+ id=d["id"],
267
+ title=d["title"],
268
+ level=d["level"],
269
+ parent=d["parent"],
270
+ children=tuple(d["children"]),
271
+ span=Span(start=span["start"], end=span["end"]),
272
+ path=tuple(d["path"]),
273
+ raw_text=d["raw_text"],
274
+ )
cairn/index/vectors.py ADDED
@@ -0,0 +1,287 @@
1
+ """Vectors sub-index — dense embeddings over LanceDB.
2
+
3
+ Storage layout::
4
+
5
+ <doc_dir>/
6
+ ├── vectors.lance/ # LanceDB connect root
7
+ │ └── data.lance/ # table holding (id, vector)
8
+ └── vectors_manifest.json # embedder name, dim, build metadata
9
+
10
+ LanceDB is the v0.1 default per ARCHITECTURE.md §7. We use the sync API and
11
+ wrap blocking calls in ``asyncio.to_thread`` to satisfy our async-by-default
12
+ public surface without adopting LanceDB's still-evolving native async API.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import asyncio
18
+ import json
19
+ import math
20
+ import re
21
+ import shutil
22
+ from datetime import UTC, datetime
23
+ from pathlib import Path
24
+ from typing import Any, Final
25
+
26
+ import lancedb
27
+ import pyarrow as pa
28
+ from pydantic import BaseModel, ConfigDict, Field
29
+
30
+ from cairn.core.errors import IndexBuildError, IndexNotFoundError
31
+ from cairn.core.types import Document, SectionNode
32
+ from cairn.embed.base import Embedder
33
+
34
+ VECTORS_DB_DIRNAME: Final = "vectors.lance"
35
+ VECTORS_TABLE_NAME: Final = "data"
36
+ VECTORS_MANIFEST_FILENAME: Final = "vectors_manifest.json"
37
+ VECTORS_FORMAT_VERSION: Final = 1
38
+
39
+ _SCOPE_PATTERN = re.compile(r"^[a-z0-9][a-z0-9_/-]*$")
40
+
41
+
42
+ class VectorHit(BaseModel):
43
+ """One result row from a vector search."""
44
+
45
+ model_config = ConfigDict(frozen=True, extra="forbid")
46
+
47
+ id: str
48
+ score: float = Field(ge=0.0, le=1.0)
49
+
50
+
51
+ class VectorEntry(BaseModel):
52
+ """One persisted section vector."""
53
+
54
+ model_config = ConfigDict(frozen=True, extra="forbid")
55
+
56
+ id: str
57
+ vector: list[float]
58
+
59
+
60
+ def embedding_text(node: SectionNode) -> str:
61
+ """Compose the text we embed for a section.
62
+
63
+ Includes the title so heading information enters the embedding, and falls
64
+ back to title alone for sections with empty bodies.
65
+ """
66
+ body = node.raw_text.strip()
67
+ if not body:
68
+ return node.title
69
+ return f"{node.title}\n\n{body}"
70
+
71
+
72
+ def l2_normalize(vec: list[float]) -> list[float]:
73
+ """Return the L2-normalized copy of ``vec``. Zero vectors are returned unchanged."""
74
+ norm = math.sqrt(sum(x * x for x in vec))
75
+ if norm == 0.0:
76
+ return list(vec)
77
+ return [x / norm for x in vec]
78
+
79
+
80
+ class VectorBuilder:
81
+ """Embed and persist section-level vectors for a Document."""
82
+
83
+ def __init__(
84
+ self,
85
+ embedder: Embedder,
86
+ *,
87
+ batch_size: int = 32,
88
+ ) -> None:
89
+ if batch_size < 1:
90
+ msg = f"batch_size must be >= 1; got {batch_size}"
91
+ raise IndexBuildError(msg)
92
+ self.embedder = embedder
93
+ self.batch_size = batch_size
94
+
95
+ async def build(self, document: Document, *, out_dir: Path) -> Path:
96
+ """Embed every section and write ``vectors.lance/`` + manifest.
97
+
98
+ Returns the path to the manifest file.
99
+ """
100
+ out_dir.mkdir(parents=True, exist_ok=True)
101
+ db_dir = out_dir / VECTORS_DB_DIRNAME
102
+ manifest_path = out_dir / VECTORS_MANIFEST_FILENAME
103
+
104
+ ids = [s.id for s in document.sections]
105
+ texts = [embedding_text(s) for s in document.sections]
106
+
107
+ vectors: list[list[float]] = []
108
+ for i in range(0, len(texts), self.batch_size):
109
+ batch = texts[i : i + self.batch_size]
110
+ raw = await self.embedder.embed(batch)
111
+ if len(raw) != len(batch):
112
+ msg = (
113
+ f"embedder returned {len(raw)} vectors for batch of "
114
+ f"{len(batch)}"
115
+ )
116
+ raise IndexBuildError(msg)
117
+ for vec in raw:
118
+ if len(vec) != self.embedder.dim:
119
+ msg = (
120
+ f"embedder returned dim={len(vec)} but expected "
121
+ f"dim={self.embedder.dim}"
122
+ )
123
+ raise IndexBuildError(msg)
124
+ vectors.append(l2_normalize(vec))
125
+
126
+ await asyncio.to_thread(self._write_table, db_dir, ids, vectors)
127
+
128
+ now = datetime.now(UTC)
129
+ manifest = {
130
+ "format_version": VECTORS_FORMAT_VERSION,
131
+ "doc_id": document.id,
132
+ "embedder": self.embedder.name,
133
+ "dim": self.embedder.dim,
134
+ "section_count": len(ids),
135
+ "generated_at": now.isoformat(),
136
+ }
137
+ with manifest_path.open("w", encoding="utf-8") as fh:
138
+ json.dump(manifest, fh, ensure_ascii=False, indent=2)
139
+ fh.write("\n")
140
+ return manifest_path
141
+
142
+ def _write_table(
143
+ self,
144
+ db_dir: Path,
145
+ ids: list[str],
146
+ vectors: list[list[float]],
147
+ ) -> None:
148
+ # Full rebuild: clear any previous table data for a clean schema state.
149
+ if db_dir.exists():
150
+ shutil.rmtree(db_dir)
151
+
152
+ db = lancedb.connect(str(db_dir))
153
+ schema = pa.schema(
154
+ [
155
+ pa.field("id", pa.string()),
156
+ pa.field("vector", pa.list_(pa.float32(), self.embedder.dim)),
157
+ ]
158
+ )
159
+ table = db.create_table(VECTORS_TABLE_NAME, schema=schema)
160
+ if ids:
161
+ records = [
162
+ {"id": sid, "vector": vec} for sid, vec in zip(ids, vectors, strict=True)
163
+ ]
164
+ table.add(records)
165
+
166
+
167
+ class Vectors:
168
+ """Loaded vectors index. Cosine-similarity search via LanceDB."""
169
+
170
+ def __init__(
171
+ self,
172
+ table: Any,
173
+ *,
174
+ doc_id: str,
175
+ embedder: str,
176
+ dim: int,
177
+ ) -> None:
178
+ self._table = table
179
+ self.doc_id = doc_id
180
+ self.embedder = embedder
181
+ self.dim = dim
182
+
183
+ @classmethod
184
+ def load(cls, doc_dir: Path) -> Vectors:
185
+ """Load vectors index from a document directory."""
186
+ manifest_path = doc_dir / VECTORS_MANIFEST_FILENAME
187
+ db_dir = doc_dir / VECTORS_DB_DIRNAME
188
+ if not manifest_path.exists():
189
+ msg = f"vectors manifest not found in {doc_dir}"
190
+ raise IndexNotFoundError(msg, details={"path": str(manifest_path)})
191
+ if not db_dir.exists():
192
+ msg = f"vectors.lance directory not found in {doc_dir}"
193
+ raise IndexNotFoundError(msg, details={"path": str(db_dir)})
194
+
195
+ with manifest_path.open("r", encoding="utf-8") as fh:
196
+ manifest = json.load(fh)
197
+
198
+ version = manifest.get("format_version")
199
+ if version != VECTORS_FORMAT_VERSION:
200
+ msg = (
201
+ f"unsupported vectors format version: {version!r} "
202
+ f"(expected {VECTORS_FORMAT_VERSION})"
203
+ )
204
+ raise IndexNotFoundError(msg, details={"path": str(manifest_path)})
205
+
206
+ db = lancedb.connect(str(db_dir))
207
+ table = db.open_table(VECTORS_TABLE_NAME)
208
+ return cls(
209
+ table,
210
+ doc_id=manifest["doc_id"],
211
+ embedder=manifest["embedder"],
212
+ dim=manifest["dim"],
213
+ )
214
+
215
+ async def search(
216
+ self,
217
+ query: list[float],
218
+ *,
219
+ k: int = 8,
220
+ scope_prefix: str | None = None,
221
+ ) -> list[VectorHit]:
222
+ """Return up to ``k`` nearest sections by cosine similarity.
223
+
224
+ When ``scope_prefix`` is given, results are restricted to sections
225
+ whose id equals the prefix or begins with ``f"{prefix}/"``.
226
+ """
227
+ if k < 1:
228
+ msg = f"k must be >= 1; got {k}"
229
+ raise IndexBuildError(msg)
230
+ if len(query) != self.dim:
231
+ msg = f"query dim {len(query)} != index dim {self.dim}"
232
+ raise IndexBuildError(msg)
233
+ if scope_prefix is not None and not _SCOPE_PATTERN.match(scope_prefix):
234
+ msg = (
235
+ f"invalid scope_prefix {scope_prefix!r}; only lowercase "
236
+ "alphanumeric, '-', '_', '/' allowed"
237
+ )
238
+ raise IndexBuildError(msg)
239
+
240
+ normalized = l2_normalize(query)
241
+ return await asyncio.to_thread(
242
+ self._sync_search, normalized, k, scope_prefix
243
+ )
244
+
245
+ def _sync_search(
246
+ self,
247
+ vec: list[float],
248
+ k: int,
249
+ scope_prefix: str | None,
250
+ ) -> list[VectorHit]:
251
+ q = self._table.search(vec).distance_type("cosine")
252
+ if scope_prefix is not None:
253
+ predicate = (
254
+ f"id = '{scope_prefix}' OR id LIKE '{scope_prefix}/%'"
255
+ )
256
+ q = q.where(predicate, prefilter=True)
257
+ rows = q.limit(k).to_list()
258
+
259
+ hits: list[VectorHit] = []
260
+ for row in rows:
261
+ distance = float(row["_distance"])
262
+ score = max(0.0, min(1.0, 1.0 - distance))
263
+ hits.append(VectorHit(id=str(row["id"]), score=score))
264
+ return hits
265
+
266
+ async def count(self) -> int:
267
+ """Total number of indexed sections."""
268
+ return await asyncio.to_thread(self._table.count_rows)
269
+
270
+ async def entries(self) -> list[VectorEntry]:
271
+ """Return every stored vector.
272
+
273
+ Repo-scoped search uses this to build a process-local flat index once,
274
+ then answers repeated MCP queries without reopening every per-document
275
+ LanceDB table.
276
+ """
277
+ return await asyncio.to_thread(self._sync_entries)
278
+
279
+ def _sync_entries(self) -> list[VectorEntry]:
280
+ rows = self._table.to_arrow().to_pylist()
281
+ return [
282
+ VectorEntry(
283
+ id=str(row["id"]),
284
+ vector=[float(value) for value in row["vector"]],
285
+ )
286
+ for row in rows
287
+ ]