deepagents-okf-backend 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ """OKF-aware filesystem backend for LangChain Deep Agents."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .backend import OKFBackend
6
+ from .frontmatter import parse_frontmatter, serialize_frontmatter
7
+ from .okf import KNOWN_FIELDS, REQUIRED_FIELDS, validate_metadata
8
+ from .query import OKFHit, make_okf_query_tool, query_bundle
9
+
10
+ __all__ = [
11
+ "OKFBackend",
12
+ "OKFHit",
13
+ "make_okf_query_tool",
14
+ "query_bundle",
15
+ "parse_frontmatter",
16
+ "serialize_frontmatter",
17
+ "validate_metadata",
18
+ "REQUIRED_FIELDS",
19
+ "KNOWN_FIELDS",
20
+ ]
21
+
22
+ __version__ = "0.1.0"
@@ -0,0 +1,376 @@
1
+ """OKF-aware filesystem backend for LangChain Deep Agents.
2
+
3
+ ``OKFBackend`` implements deepagents' ``BackendProtocol`` over an Open Knowledge
4
+ Format (OKF) bundle: a directory of markdown files with YAML frontmatter. Reads and
5
+ searches work like a normal virtual filesystem; writes to ``.md`` documents are
6
+ validated as OKF and (optionally) auto-stamped with a ``timestamp``.
7
+
8
+ Every method returns a structured result with an ``error`` field and never raises —
9
+ this is the ``BackendProtocol`` contract. All disk IO is therefore guarded, and every
10
+ path (including entries discovered by ``ls``/``glob``/``grep``) is confined to the
11
+ bundle root, so a symlink inside the bundle cannot leak files from outside it.
12
+
13
+ OKF spec: https://cloud.google.com/blog/products/data-analytics/how-the-open-knowledge-format-can-improve-data-sharing
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import asyncio
19
+ from collections.abc import Iterable
20
+ from datetime import datetime, timezone
21
+ from pathlib import Path
22
+
23
+ from deepagents.backends.protocol import (
24
+ BackendProtocol,
25
+ EditResult,
26
+ FileData,
27
+ FileDownloadResponse,
28
+ FileInfo,
29
+ FileUploadResponse,
30
+ GlobResult,
31
+ GrepMatch,
32
+ GrepResult,
33
+ LsResult,
34
+ ReadResult,
35
+ WriteResult,
36
+ )
37
+
38
+ from .frontmatter import parse_frontmatter, serialize_frontmatter
39
+ from .okf import is_okf_document, validate_metadata
40
+
41
+ DEFAULT_READ_LIMIT = 2000
42
+
43
+
44
+ class _PathEscapeError(Exception):
45
+ """Internal: agent attempted to access a path outside the bundle root."""
46
+
47
+
48
+ def _iso(ts: float) -> str:
49
+ """Format a POSIX timestamp as a second-precision UTC ISO-8601 string."""
50
+ return datetime.fromtimestamp(ts, timezone.utc).replace(microsecond=0).isoformat()
51
+
52
+
53
+ def _now_iso() -> str:
54
+ return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
55
+
56
+
57
+ class OKFBackend(BackendProtocol):
58
+ """A deepagents backend backed by a local OKF bundle directory.
59
+
60
+ Args:
61
+ root: Directory holding the OKF bundle. Created if it does not exist.
62
+ validate: When True, writes/edits to ``.md`` docs must be valid OKF
63
+ (``type`` field required) or the operation fails without touching disk.
64
+ auto_timestamp: When True, ``write`` sets/refreshes the ``timestamp``
65
+ frontmatter field on ``.md`` docs that carry frontmatter. ``edit`` never
66
+ rewrites the frontmatter block, so a body-only edit is byte-preserving.
67
+ """
68
+
69
+ def __init__(
70
+ self,
71
+ root: str | Path,
72
+ *,
73
+ validate: bool = True,
74
+ auto_timestamp: bool = True,
75
+ ) -> None:
76
+ self.root = Path(root).expanduser().resolve()
77
+ self.validate = validate
78
+ self.auto_timestamp = auto_timestamp
79
+ self.root.mkdir(parents=True, exist_ok=True)
80
+
81
+ # ------------------------------------------------------------------ helpers
82
+ def _resolve(self, path: str) -> Path:
83
+ """Resolve an agent-supplied path inside the bundle root (no escaping)."""
84
+ raw = str(path)
85
+ if "\x00" in raw:
86
+ raise _PathEscapeError(path)
87
+ try:
88
+ candidate = (self.root / raw.lstrip("/")).resolve()
89
+ except (OSError, ValueError) as exc: # malformed path, drive on Windows, etc.
90
+ raise _PathEscapeError(path) from exc
91
+ if not self._is_contained(candidate):
92
+ raise _PathEscapeError(path)
93
+ return candidate
94
+
95
+ def _is_contained(self, p: Path) -> bool:
96
+ """Whether ``p`` (after symlink resolution) stays within the bundle root."""
97
+ try:
98
+ resolved = p.resolve()
99
+ except OSError:
100
+ return False
101
+ return resolved == self.root or self.root in resolved.parents
102
+
103
+ def _rel(self, p: Path) -> str:
104
+ if p == self.root:
105
+ return "/"
106
+ return "/" + p.relative_to(self.root).as_posix()
107
+
108
+ def _safe_file_info(self, p: Path) -> FileInfo | None:
109
+ """Build a FileInfo, or None if the entry vanished / cannot be stat'd."""
110
+ try:
111
+ stat = p.stat()
112
+ except OSError:
113
+ return None
114
+ return FileInfo(
115
+ path=self._rel(p),
116
+ is_dir=p.is_dir(),
117
+ size=stat.st_size,
118
+ modified_at=_iso(stat.st_mtime),
119
+ )
120
+
121
+ def _contained_files(self, candidates: Iterable[Path]) -> list[Path]:
122
+ """Filter an iterable of paths to regular files that stay within root."""
123
+ out: list[Path] = []
124
+ for p in sorted(candidates):
125
+ if not self._is_contained(p):
126
+ continue
127
+ try:
128
+ if p.is_file():
129
+ out.append(p)
130
+ except OSError:
131
+ continue
132
+ return out
133
+
134
+ def _stamp_and_validate(self, file_path: str, content: str) -> tuple[str | None, str]:
135
+ """For ``write``: optionally stamp ``timestamp``, then validate OKF.
136
+
137
+ Returns ``(error, content)``; ``content`` may be re-serialized to inject the
138
+ timestamp. This is acceptable on a full-content write but never used by ``edit``.
139
+ """
140
+ if not is_okf_document(file_path):
141
+ return None, content
142
+ metadata, body = parse_frontmatter(content)
143
+ if self.auto_timestamp and metadata:
144
+ metadata["timestamp"] = _now_iso()
145
+ content = serialize_frontmatter(metadata, body)
146
+ if self.validate:
147
+ errors = validate_metadata(metadata)
148
+ if errors:
149
+ return f"OKF validation failed for {file_path}: {'; '.join(errors)}", content
150
+ return None, content
151
+
152
+ def _validate_only(self, file_path: str, content: str) -> str | None:
153
+ """For ``edit``: validate without mutating ``content``. Returns an error or None."""
154
+ if not is_okf_document(file_path) or not self.validate:
155
+ return None
156
+ metadata, _ = parse_frontmatter(content)
157
+ errors = validate_metadata(metadata)
158
+ if errors:
159
+ return f"edit would make {file_path} invalid OKF: {'; '.join(errors)}"
160
+ return None
161
+
162
+ # ------------------------------------------------------------------- sync API
163
+ def ls(self, path: str) -> LsResult:
164
+ try:
165
+ target = self._resolve(path)
166
+ except _PathEscapeError:
167
+ return LsResult(error=f"path escapes bundle root: {path}")
168
+ if not target.exists():
169
+ return LsResult(error=f"no such path: {path}")
170
+ if not target.is_dir():
171
+ return LsResult(error=f"not a directory: {path}")
172
+ try:
173
+ children = sorted(target.iterdir())
174
+ except OSError as exc:
175
+ return LsResult(error=f"cannot list {path}: {exc}")
176
+ entries: list[FileInfo] = []
177
+ for child in children:
178
+ if not self._is_contained(child):
179
+ continue
180
+ info = self._safe_file_info(child)
181
+ if info is not None:
182
+ entries.append(info)
183
+ return LsResult(entries=entries)
184
+
185
+ def read(self, file_path: str, offset: int = 0, limit: int = DEFAULT_READ_LIMIT) -> ReadResult:
186
+ try:
187
+ target = self._resolve(file_path)
188
+ except _PathEscapeError:
189
+ return ReadResult(error=f"path escapes bundle root: {file_path}")
190
+ if not target.is_file():
191
+ return ReadResult(error=f"no such file: {file_path}")
192
+ if offset < 0 or limit < 0:
193
+ return ReadResult(error="offset and limit must be non-negative")
194
+ try:
195
+ stat = target.stat()
196
+ text = target.read_text(encoding="utf-8")
197
+ except (OSError, UnicodeDecodeError) as exc:
198
+ return ReadResult(error=f"cannot read {file_path}: {exc}")
199
+ lines = text.splitlines(keepends=True)
200
+ content = "".join(lines[offset : offset + limit])
201
+ file_data = FileData(
202
+ content=content,
203
+ encoding="utf-8",
204
+ created_at=_iso(stat.st_ctime),
205
+ modified_at=_iso(stat.st_mtime),
206
+ )
207
+ return ReadResult(file_data=file_data)
208
+
209
+ def write(self, file_path: str, content: str) -> WriteResult:
210
+ try:
211
+ target = self._resolve(file_path)
212
+ except _PathEscapeError:
213
+ return WriteResult(error=f"path escapes bundle root: {file_path}", path=None)
214
+ error, content = self._stamp_and_validate(file_path, content)
215
+ if error:
216
+ return WriteResult(error=error, path=None)
217
+ try:
218
+ target.parent.mkdir(parents=True, exist_ok=True)
219
+ target.write_text(content, encoding="utf-8")
220
+ except OSError as exc:
221
+ return WriteResult(error=f"cannot write {file_path}: {exc}", path=None)
222
+ return WriteResult(error=None, path=self._rel(target))
223
+
224
+ def edit(
225
+ self,
226
+ file_path: str,
227
+ old_string: str,
228
+ new_string: str,
229
+ replace_all: bool = False,
230
+ ) -> EditResult:
231
+ try:
232
+ target = self._resolve(file_path)
233
+ except _PathEscapeError:
234
+ return EditResult(
235
+ error=f"path escapes bundle root: {file_path}", path=None, occurrences=None
236
+ )
237
+ if not target.is_file():
238
+ return EditResult(error=f"no such file: {file_path}", path=None, occurrences=None)
239
+ try:
240
+ text = target.read_text(encoding="utf-8")
241
+ except (OSError, UnicodeDecodeError) as exc:
242
+ return EditResult(error=f"cannot read {file_path}: {exc}", path=None, occurrences=None)
243
+ count = text.count(old_string)
244
+ if count == 0:
245
+ return EditResult(
246
+ error=f"old_string not found in {file_path}", path=None, occurrences=0
247
+ )
248
+ if replace_all:
249
+ new_text, occurrences = text.replace(old_string, new_string), count
250
+ else:
251
+ new_text, occurrences = text.replace(old_string, new_string, 1), 1
252
+ # Validate the result without rewriting the frontmatter block: a body-only
253
+ # edit stays byte-for-byte what the agent asked for.
254
+ error = self._validate_only(file_path, new_text)
255
+ if error:
256
+ return EditResult(error=error, path=None, occurrences=None)
257
+ try:
258
+ target.write_text(new_text, encoding="utf-8")
259
+ except OSError as exc:
260
+ return EditResult(error=f"cannot write {file_path}: {exc}", path=None, occurrences=None)
261
+ return EditResult(error=None, path=self._rel(target), occurrences=occurrences)
262
+
263
+ def glob(self, pattern: str, path: str | None = None) -> GlobResult:
264
+ try:
265
+ base = self._resolve(path) if path else self.root
266
+ except _PathEscapeError:
267
+ return GlobResult(error=f"path escapes bundle root: {path}")
268
+ matches: list[FileInfo] = []
269
+ for p in self._contained_files(base.glob(pattern)):
270
+ info = self._safe_file_info(p)
271
+ if info is not None:
272
+ matches.append(info)
273
+ return GlobResult(matches=matches)
274
+
275
+ def grep(
276
+ self,
277
+ pattern: str,
278
+ path: str | None = None,
279
+ glob: str | None = None,
280
+ ) -> GrepResult:
281
+ try:
282
+ base = self._resolve(path) if path else self.root
283
+ except _PathEscapeError:
284
+ return GrepResult(error=f"path escapes bundle root: {path}")
285
+ # grep is always recursive; the optional `glob` only filters file names.
286
+ candidates = base.rglob(glob) if glob else base.rglob("*")
287
+ matches: list[GrepMatch] = []
288
+ for p in self._contained_files(candidates):
289
+ try:
290
+ text = p.read_text(encoding="utf-8")
291
+ except (OSError, UnicodeDecodeError):
292
+ continue
293
+ for lineno, line in enumerate(text.splitlines(), start=1):
294
+ if pattern in line:
295
+ matches.append(GrepMatch(path=self._rel(p), line=lineno, text=line))
296
+ return GrepResult(matches=matches)
297
+
298
+ # ----------------------------------------------------------------- binary IO
299
+ # Raw byte transfer for non-markdown artifacts (images, exports, attachments).
300
+ # OKF validation is intentionally skipped here — these are opaque blobs.
301
+ def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
302
+ responses: list[FileUploadResponse] = []
303
+ for file_path, data in files:
304
+ try:
305
+ target = self._resolve(file_path)
306
+ except _PathEscapeError:
307
+ responses.append(FileUploadResponse(path=file_path, error="permission_denied"))
308
+ continue
309
+ try:
310
+ target.parent.mkdir(parents=True, exist_ok=True)
311
+ target.write_bytes(data)
312
+ except OSError as exc:
313
+ responses.append(FileUploadResponse(path=file_path, error=str(exc)))
314
+ continue
315
+ responses.append(FileUploadResponse(path=self._rel(target), error=None))
316
+ return responses
317
+
318
+ def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
319
+ responses: list[FileDownloadResponse] = []
320
+ for file_path in paths:
321
+ try:
322
+ target = self._resolve(file_path)
323
+ except _PathEscapeError:
324
+ responses.append(
325
+ FileDownloadResponse(path=file_path, content=None, error="permission_denied")
326
+ )
327
+ continue
328
+ if not target.is_file():
329
+ responses.append(
330
+ FileDownloadResponse(path=file_path, content=None, error="file_not_found")
331
+ )
332
+ continue
333
+ try:
334
+ data = target.read_bytes()
335
+ except OSError as exc:
336
+ responses.append(FileDownloadResponse(path=file_path, content=None, error=str(exc)))
337
+ continue
338
+ responses.append(FileDownloadResponse(path=self._rel(target), content=data, error=None))
339
+ return responses
340
+
341
+ # ------------------------------------------------------------------ async API
342
+ # Local filesystem IO is blocking; offload to a worker thread so async agents
343
+ # never block the event loop.
344
+ async def als(self, path: str) -> LsResult:
345
+ return await asyncio.to_thread(self.ls, path)
346
+
347
+ async def aread(
348
+ self, file_path: str, offset: int = 0, limit: int = DEFAULT_READ_LIMIT
349
+ ) -> ReadResult:
350
+ return await asyncio.to_thread(self.read, file_path, offset, limit)
351
+
352
+ async def awrite(self, file_path: str, content: str) -> WriteResult:
353
+ return await asyncio.to_thread(self.write, file_path, content)
354
+
355
+ async def aedit(
356
+ self,
357
+ file_path: str,
358
+ old_string: str,
359
+ new_string: str,
360
+ replace_all: bool = False,
361
+ ) -> EditResult:
362
+ return await asyncio.to_thread(self.edit, file_path, old_string, new_string, replace_all)
363
+
364
+ async def aglob(self, pattern: str, path: str | None = None) -> GlobResult:
365
+ return await asyncio.to_thread(self.glob, pattern, path)
366
+
367
+ async def agrep(
368
+ self, pattern: str, path: str | None = None, glob: str | None = None
369
+ ) -> GrepResult:
370
+ return await asyncio.to_thread(self.grep, pattern, path, glob)
371
+
372
+ async def aupload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
373
+ return await asyncio.to_thread(self.upload_files, files)
374
+
375
+ async def adownload_files(self, paths: list[str]) -> list[FileDownloadResponse]:
376
+ return await asyncio.to_thread(self.download_files, paths)
@@ -0,0 +1,57 @@
1
+ """YAML frontmatter parsing/serialization for OKF markdown documents.
2
+
3
+ Pure helper module — intentionally free of any ``deepagents`` import.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import Any
9
+
10
+ import yaml
11
+
12
+ _DELIMITER = "---"
13
+
14
+
15
+ def parse_frontmatter(text: str) -> tuple[dict[str, Any], str]:
16
+ """Split an OKF markdown document into ``(metadata, body)``.
17
+
18
+ A document has frontmatter when it starts with a ``---`` line, followed by a
19
+ YAML block, terminated by another ``---`` line. If there is no frontmatter,
20
+ ``metadata`` is an empty dict and ``body`` is the original text.
21
+ """
22
+ if not text.startswith(_DELIMITER):
23
+ return {}, text
24
+
25
+ lines = text.splitlines(keepends=True)
26
+ # lines[0] is the opening delimiter; find the closing one.
27
+ closing = None
28
+ for i in range(1, len(lines)):
29
+ if lines[i].strip() == _DELIMITER:
30
+ closing = i
31
+ break
32
+ if closing is None:
33
+ # Unterminated frontmatter block — treat the whole thing as body.
34
+ return {}, text
35
+
36
+ raw_yaml = "".join(lines[1:closing])
37
+ body = "".join(lines[closing + 1 :])
38
+ loaded = yaml.safe_load(raw_yaml) if raw_yaml.strip() else {}
39
+ metadata = loaded if isinstance(loaded, dict) else {}
40
+ return metadata, body
41
+
42
+
43
+ def serialize_frontmatter(metadata: dict[str, Any], body: str) -> str:
44
+ """Render ``(metadata, body)`` back into an OKF markdown document.
45
+
46
+ When ``metadata`` is empty the body is returned unchanged (no frontmatter block).
47
+ """
48
+ if not metadata:
49
+ return body
50
+ dumped = yaml.safe_dump(metadata, sort_keys=False, allow_unicode=True).rstrip("\n")
51
+ return f"{_DELIMITER}\n{dumped}\n{_DELIMITER}\n{body}"
52
+
53
+
54
+ def has_frontmatter(text: str) -> bool:
55
+ """Return whether ``text`` begins with a terminated frontmatter block."""
56
+ metadata, _ = parse_frontmatter(text)
57
+ return bool(metadata)
@@ -0,0 +1,64 @@
1
+ """Open Knowledge Format (OKF) v0.1 conventions and validation.
2
+
3
+ Pure helper module — intentionally free of any ``deepagents`` import.
4
+
5
+ Spec: https://cloud.google.com/blog/products/data-analytics/how-the-open-knowledge-format-can-improve-data-sharing
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any
11
+
12
+ #: The only frontmatter field OKF v0.1 makes mandatory.
13
+ REQUIRED_FIELDS: tuple[str, ...] = ("type",)
14
+
15
+ #: Structured fields OKF v0.1 gives meaning to. Everything else is producer-optional.
16
+ KNOWN_FIELDS: tuple[str, ...] = (
17
+ "type",
18
+ "title",
19
+ "description",
20
+ "resource",
21
+ "tags",
22
+ "timestamp",
23
+ )
24
+
25
+ #: Conventional per-directory index document.
26
+ INDEX_FILENAME = "index.md"
27
+
28
+ #: Extension of OKF concept documents.
29
+ DOC_SUFFIX = ".md"
30
+
31
+
32
+ class OKFValidationError(ValueError):
33
+ """Raised by strict helpers when a document violates OKF v0.1."""
34
+
35
+
36
+ def validate_metadata(metadata: dict[str, Any]) -> list[str]:
37
+ """Return a list of human-readable validation errors (empty list == valid).
38
+
39
+ OKF v0.1 only requires ``type``. We additionally sanity-check the shape of a
40
+ couple of well-known fields so the agent cannot write a structurally broken doc.
41
+ """
42
+ errors: list[str] = []
43
+
44
+ for field in REQUIRED_FIELDS:
45
+ if field not in metadata or metadata[field] in (None, ""):
46
+ errors.append(f"missing required OKF field: '{field}'")
47
+
48
+ tags = metadata.get("tags")
49
+ if tags is not None and not (
50
+ isinstance(tags, list) and all(isinstance(t, str) for t in tags)
51
+ ):
52
+ errors.append("'tags' must be a list of strings")
53
+
54
+ for field in ("type", "title", "description", "resource", "timestamp"):
55
+ value = metadata.get(field)
56
+ if value is not None and not isinstance(value, str):
57
+ errors.append(f"'{field}' must be a string")
58
+
59
+ return errors
60
+
61
+
62
+ def is_okf_document(path: str) -> bool:
63
+ """Whether ``path`` looks like an OKF concept document (a ``.md`` file)."""
64
+ return path.endswith(DOC_SUFFIX)
File without changes
@@ -0,0 +1,127 @@
1
+ """Semantic query over an OKF bundle, plus a LangChain tool factory.
2
+
3
+ The six standard filesystem tools only let an agent ``grep`` raw text. OKF frontmatter
4
+ (``type``, ``tags``, ``title``) is *structured*, so this module adds a typed query on top
5
+ and exposes it as an optional LangChain tool the agent can call directly — kept separate so
6
+ ``OKFBackend`` itself stays a pure ``BackendProtocol`` implementation.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass, field
12
+
13
+ from langchain_core.tools import BaseTool, tool
14
+
15
+ from .backend import OKFBackend
16
+ from .frontmatter import parse_frontmatter
17
+
18
+
19
+ @dataclass
20
+ class OKFHit:
21
+ """A single OKF document matched by :func:`query_bundle`."""
22
+
23
+ path: str
24
+ type: str | None = None
25
+ title: str | None = None
26
+ description: str | None = None
27
+ tags: list[str] = field(default_factory=list)
28
+
29
+
30
+ def query_bundle(
31
+ backend: OKFBackend,
32
+ *,
33
+ type: str | None = None,
34
+ tags: list[str] | None = None,
35
+ title_contains: str | None = None,
36
+ ) -> list[OKFHit]:
37
+ """Return OKF documents whose frontmatter matches every supplied filter.
38
+
39
+ Filters are ANDed. ``tags`` matches when the document carries *all* given tags.
40
+ ``type`` matches case-insensitively; ``title_contains`` is a case-insensitive substring.
41
+ """
42
+ glob_result = backend.glob("**/*.md")
43
+ if glob_result.error or not glob_result.matches:
44
+ return []
45
+
46
+ wanted_tags = {t.lower() for t in (tags or [])}
47
+ hits: list[OKFHit] = []
48
+ for entry in glob_result.matches:
49
+ read_result = backend.read(entry["path"])
50
+ if read_result.error or read_result.file_data is None:
51
+ continue
52
+ metadata, _ = parse_frontmatter(read_result.file_data["content"])
53
+ if not metadata:
54
+ continue
55
+
56
+ doc_type = metadata.get("type")
57
+ if type is not None and (doc_type or "").lower() != type.lower():
58
+ continue
59
+
60
+ raw_tags = metadata.get("tags") or []
61
+ # A malformed bundle may store `tags` as a bare string; normalize to a list
62
+ # so we never iterate it character-by-character.
63
+ doc_tags = raw_tags if isinstance(raw_tags, list) else [raw_tags]
64
+ if wanted_tags and not wanted_tags.issubset({str(t).lower() for t in doc_tags}):
65
+ continue
66
+
67
+ doc_title = metadata.get("title")
68
+ if title_contains is not None and title_contains.lower() not in (doc_title or "").lower():
69
+ continue
70
+
71
+ hits.append(
72
+ OKFHit(
73
+ path=entry["path"],
74
+ type=doc_type,
75
+ title=doc_title,
76
+ description=metadata.get("description"),
77
+ tags=[str(t) for t in doc_tags],
78
+ )
79
+ )
80
+ return hits
81
+
82
+
83
+ def _format_hits(hits: list[OKFHit]) -> str:
84
+ if not hits:
85
+ return "No matching OKF documents found."
86
+ lines = [f"Found {len(hits)} document(s):"]
87
+ for h in hits:
88
+ parts = [f"- {h.path}"]
89
+ if h.type:
90
+ parts.append(f"[{h.type}]")
91
+ if h.title:
92
+ parts.append(h.title)
93
+ line = " ".join(parts)
94
+ if h.description:
95
+ line += f" — {h.description}"
96
+ if h.tags:
97
+ line += f" (tags: {', '.join(h.tags)})"
98
+ lines.append(line)
99
+ return "\n".join(lines)
100
+
101
+
102
+ def make_okf_query_tool(backend: OKFBackend) -> BaseTool:
103
+ """Build a LangChain tool that lets an agent query the OKF bundle by frontmatter.
104
+
105
+ Add the returned tool to ``create_deep_agent(tools=[...])`` so the agent can do
106
+ structured lookups (by ``type``/``tags``/``title``) instead of blind ``grep``.
107
+ """
108
+
109
+ @tool
110
+ def okf_query(
111
+ type: str | None = None,
112
+ tags: list[str] | None = None,
113
+ title_contains: str | None = None,
114
+ ) -> str:
115
+ """Search the OKF knowledge bundle by frontmatter fields.
116
+
117
+ Args:
118
+ type: Match documents whose ``type`` equals this (case-insensitive),
119
+ e.g. "BigQuery Table" or "Metric".
120
+ tags: Match documents carrying all of these tags.
121
+ title_contains: Match documents whose title contains this substring.
122
+ """
123
+ return _format_hits(
124
+ query_bundle(backend, type=type, tags=tags, title_contains=title_contains)
125
+ )
126
+
127
+ return okf_query
@@ -0,0 +1,168 @@
1
+ Metadata-Version: 2.4
2
+ Name: deepagents-okf-backend
3
+ Version: 0.1.0
4
+ Summary: OKF-aware virtual filesystem backend for LangChain Deep Agents (Open Knowledge Format).
5
+ Project-URL: Homepage, https://github.com/emanueleielo/deepagents-okf-backend
6
+ Project-URL: Repository, https://github.com/emanueleielo/deepagents-okf-backend
7
+ Project-URL: Issues, https://github.com/emanueleielo/deepagents-okf-backend/issues
8
+ Project-URL: OKF spec, https://cloud.google.com/blog/products/data-analytics/how-the-open-knowledge-format-can-improve-data-sharing
9
+ Author-email: Emanuele Ielo <emanueleielo@gmail.com>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: agent,backend,deepagents,langchain,llm,okf,open-knowledge-format
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Software Development :: Libraries
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: deepagents>=0.6
23
+ Requires-Dist: langchain-core>=0.3
24
+ Requires-Dist: pyyaml>=6.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: mypy>=1.10; extra == 'dev'
27
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
28
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
29
+ Requires-Dist: pytest>=8.0; extra == 'dev'
30
+ Requires-Dist: ruff>=0.6; extra == 'dev'
31
+ Requires-Dist: types-pyyaml>=6.0; extra == 'dev'
32
+ Provides-Extra: examples
33
+ Requires-Dist: langchain-anthropic>=0.3; extra == 'examples'
34
+ Requires-Dist: python-dotenv>=1.0; extra == 'examples'
35
+ Description-Content-Type: text/markdown
36
+
37
+ # deepagents-okf-backend
38
+
39
+ [![CI](https://github.com/emanueleielo/deepagents-okf-backend/actions/workflows/ci.yml/badge.svg)](https://github.com/emanueleielo/deepagents-okf-backend/actions/workflows/ci.yml)
40
+ [![PyPI](https://img.shields.io/pypi/v/deepagents-okf-backend.svg)](https://pypi.org/project/deepagents-okf-backend/)
41
+ [![Python](https://img.shields.io/pypi/pyversions/deepagents-okf-backend.svg)](https://pypi.org/project/deepagents-okf-backend/)
42
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
43
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
44
+
45
+ An **OKF-aware virtual filesystem backend** for [LangChain Deep Agents](https://docs.langchain.com/oss/python/deepagents/overview).
46
+
47
+ It mounts an [Open Knowledge Format (OKF)](https://cloud.google.com/blog/products/data-analytics/how-the-open-knowledge-format-can-improve-data-sharing)
48
+ bundle — *"a directory of markdown files with YAML frontmatter"* — as the agent's filesystem,
49
+ so a deep agent can **read, search, and curate** organizational knowledge while every write
50
+ stays a **valid OKF document**.
51
+
52
+ > 🧩 Community backend (not maintained by LangChain), built for the
53
+ > [`integrations/backends`](https://docs.langchain.com/oss/python/integrations/backends) list.
54
+
55
+ ## Why
56
+
57
+ A deep agent's knowledge memory is usually either **ephemeral** (`StateBackend`) or **closed**
58
+ (`ContextHubBackend` → LangSmith Hub). OKF is the **open, vendor-neutral** alternative:
59
+ git-versionable markdown, human-readable, parseable by any agent or framework.
60
+
61
+ | | `StateBackend` | `StoreBackend` | `FilesystemBackend` | **`OKFBackend`** |
62
+ |---|---|---|---|---|
63
+ | Persists across threads | ❌ | ✅ | ✅ | ✅ |
64
+ | Human-readable on disk | ❌ | ❌ | ✅ | ✅ |
65
+ | Vendor-neutral / portable bundle | ❌ | ❌ | ➖ | ✅ |
66
+ | Structured frontmatter query | ❌ | ❌ | ❌ | ✅ |
67
+ | Validates writes as a shareable format | ❌ | ❌ | ❌ | ✅ |
68
+
69
+ What `OKFBackend` adds:
70
+
71
+ - **Open knowledge, no lock-in** — portable markdown bundles, not a proprietary store.
72
+ - **Semantic, not blind** — query by `type` / `tags` / `title`, not just `grep`.
73
+ - **Self-improving wiki** — the agent maintains the bundle; writes are validated as OKF.
74
+ - **Cross-agent sharing** — *"a bundle synthesized by one LLM can be queried by another."*
75
+ - **Sync + async**, path-sandboxed to the bundle root, fully typed (`py.typed`).
76
+
77
+ ## Install
78
+
79
+ ```bash
80
+ pip install deepagents-okf-backend
81
+ ```
82
+
83
+ ## Quickstart
84
+
85
+ ```python
86
+ from deepagents import create_deep_agent
87
+ from deepagents_okf_backend import OKFBackend
88
+
89
+ backend = OKFBackend("./knowledge", validate=True, auto_timestamp=True)
90
+
91
+ agent = create_deep_agent(
92
+ tools=[],
93
+ instructions="You curate the organization's OKF knowledge bundle.",
94
+ backend=backend,
95
+ )
96
+ ```
97
+
98
+ ### Knowledge surface + scratch space (`CompositeBackend`)
99
+
100
+ Mount the OKF bundle on `/knowledge` and keep an ephemeral scratch filesystem on `/`:
101
+
102
+ ```python
103
+ from deepagents.backends import CompositeBackend, StateBackend
104
+ from deepagents_okf_backend import OKFBackend
105
+
106
+ backend = CompositeBackend(
107
+ routes={"/knowledge": OKFBackend("./knowledge")},
108
+ default=StateBackend(),
109
+ )
110
+ ```
111
+
112
+ ### Structured query tool
113
+
114
+ The six standard filesystem tools only `grep` raw text. Give the agent a typed lookup over
115
+ OKF frontmatter:
116
+
117
+ ```python
118
+ from deepagents import create_deep_agent
119
+ from deepagents_okf_backend import OKFBackend, make_okf_query_tool
120
+
121
+ backend = OKFBackend("./knowledge")
122
+ agent = create_deep_agent(
123
+ tools=[make_okf_query_tool(backend)], # okf_query(type=..., tags=..., title_contains=...)
124
+ instructions="Use okf_query to find tables and metrics before answering.",
125
+ backend=backend,
126
+ )
127
+ ```
128
+
129
+ You can also call it directly:
130
+
131
+ ```python
132
+ from deepagents_okf_backend import query_bundle
133
+
134
+ hits = query_bundle(backend, type="Metric", tags=["growth"])
135
+ ```
136
+
137
+ ## What is OKF?
138
+
139
+ Open Knowledge Format (Google Cloud, 2026) represents knowledge as a directory of markdown
140
+ files with YAML frontmatter. The only required field is `type`; `title`, `description`,
141
+ `resource`, `tags`, and `timestamp` are optional. See the
142
+ [announcement](https://cloud.google.com/blog/products/data-analytics/how-the-open-knowledge-format-can-improve-data-sharing).
143
+
144
+ ```markdown
145
+ ---
146
+ type: BigQuery Table
147
+ title: Orders
148
+ description: One row per completed customer order.
149
+ tags: [sales, revenue]
150
+ ---
151
+ # Schema
152
+ | Column | Type | Description |
153
+ |--------|------|-------------|
154
+ | `order_id` | STRING | Globally unique order identifier. |
155
+ ```
156
+
157
+ ## Development
158
+
159
+ See [`DEVELOPMENT_PLAN.md`](DEVELOPMENT_PLAN.md). Contributions welcome — see [`CONTRIBUTING.md`](CONTRIBUTING.md).
160
+
161
+ ```bash
162
+ pip install -e ".[dev]"
163
+ ruff check . && mypy src && pytest --cov=deepagents_okf_backend
164
+ ```
165
+
166
+ ## License
167
+
168
+ MIT © Emanuele Ielo
@@ -0,0 +1,10 @@
1
+ deepagents_okf_backend/__init__.py,sha256=RWzcWJR2Aqjb5HCw4pTjtTLzx13jxkIkxm_ql1GfBd4,563
2
+ deepagents_okf_backend/backend.py,sha256=A0oNId_v0dNkpfW5YaY-Yw4z7yfKCpsymaWs_IWIr8Y,15396
3
+ deepagents_okf_backend/frontmatter.py,sha256=9R79Bn85xcFRvzA2yFZ9WQjL85XvmaSk5niOKEqCN0w,1892
4
+ deepagents_okf_backend/okf.py,sha256=6qTQdrtld7j8NBFvDyn48zIUX2bIs6xO1IZLgH1ELzE,2005
5
+ deepagents_okf_backend/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ deepagents_okf_backend/query.py,sha256=akWXe_xcZvxsoweoNVd9K_g0xnGW-Uy0fQLfK6-1x4g,4300
7
+ deepagents_okf_backend-0.1.0.dist-info/METADATA,sha256=_oRJeU07RlegnHY2xSySzNX8FkOQPdcv2MZgZ47wleI,6543
8
+ deepagents_okf_backend-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
9
+ deepagents_okf_backend-0.1.0.dist-info/licenses/LICENSE,sha256=LIAri2meHjPOuxPFxppzOk7HMUM3-sws8re8HioDyOo,1070
10
+ deepagents_okf_backend-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Emanuele Ielo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.