dig-client 0.1.0.dev4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dig-client
|
|
3
|
+
Version: 0.1.0.dev4
|
|
4
|
+
Summary: Python client for a local dig daemon — search, organize, reconcile, and export a knowledge base over HTTP.
|
|
5
|
+
Project-URL: Homepage, https://dig.vllnt.com
|
|
6
|
+
Project-URL: Repository, https://github.com/vllnt/dig
|
|
7
|
+
Author: vllnt
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
Keywords: ai-agents,dig,knowledge-base,memory,rag,retrieval
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# dig-client
|
|
14
|
+
|
|
15
|
+
Python client for a local [dig](https://github.com/vllnt/dig) daemon — search,
|
|
16
|
+
organize, reconcile, and export a knowledge base over HTTP. Dependency-free
|
|
17
|
+
(standard library only), local-first.
|
|
18
|
+
|
|
19
|
+
## Install
|
|
20
|
+
|
|
21
|
+
```sh
|
|
22
|
+
pip install dig-client
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Start a daemon next to your KB (dig binary from https://dig.vllnt.com):
|
|
26
|
+
|
|
27
|
+
```sh
|
|
28
|
+
dig serve # binds 127.0.0.1:3978 (loopback only)
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Use
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from dig_client import DigClient
|
|
35
|
+
|
|
36
|
+
dig = DigClient() # http://127.0.0.1:3978
|
|
37
|
+
|
|
38
|
+
# search — fts (default), vector, or hybrid (semantic)
|
|
39
|
+
hits = dig.find("invoice acme 2024", mode="hybrid", limit=5)
|
|
40
|
+
|
|
41
|
+
# agent memory — capture, then recall a token-budgeted pack
|
|
42
|
+
dig.retain(session_markdown, as_="memory/sessions/today.md")
|
|
43
|
+
pack = dig.recall("billing ledger decision", budget=1000)
|
|
44
|
+
|
|
45
|
+
# reorganize by policy — preview, then apply (reversible)
|
|
46
|
+
dig.org(apply=False) # preview the plan
|
|
47
|
+
dig.org(apply=True) # commit it
|
|
48
|
+
dig.undo() # step back
|
|
49
|
+
|
|
50
|
+
# reproducible dataset export (JSONL text)
|
|
51
|
+
jsonl = dig.export(filter="label:finance")
|
|
52
|
+
|
|
53
|
+
# read-only inspection
|
|
54
|
+
dig.drift()
|
|
55
|
+
dig.log()
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Target a specific KB with `kb="/path/or/name"` on any call; omit it to use the
|
|
59
|
+
KB at the daemon's working directory. Errors raise `DigError` carrying the HTTP
|
|
60
|
+
status.
|
|
61
|
+
|
|
62
|
+
The client speaks the same contract as `dig serve`, a thin adapter over the dig
|
|
63
|
+
CLI — so it never drifts from the tool.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# dig-client
|
|
2
|
+
|
|
3
|
+
Python client for a local [dig](https://github.com/vllnt/dig) daemon — search,
|
|
4
|
+
organize, reconcile, and export a knowledge base over HTTP. Dependency-free
|
|
5
|
+
(standard library only), local-first.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```sh
|
|
10
|
+
pip install dig-client
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Start a daemon next to your KB (dig binary from https://dig.vllnt.com):
|
|
14
|
+
|
|
15
|
+
```sh
|
|
16
|
+
dig serve # binds 127.0.0.1:3978 (loopback only)
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Use
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
from dig_client import DigClient
|
|
23
|
+
|
|
24
|
+
dig = DigClient() # http://127.0.0.1:3978
|
|
25
|
+
|
|
26
|
+
# search — fts (default), vector, or hybrid (semantic)
|
|
27
|
+
hits = dig.find("invoice acme 2024", mode="hybrid", limit=5)
|
|
28
|
+
|
|
29
|
+
# agent memory — capture, then recall a token-budgeted pack
|
|
30
|
+
dig.retain(session_markdown, as_="memory/sessions/today.md")
|
|
31
|
+
pack = dig.recall("billing ledger decision", budget=1000)
|
|
32
|
+
|
|
33
|
+
# reorganize by policy — preview, then apply (reversible)
|
|
34
|
+
dig.org(apply=False) # preview the plan
|
|
35
|
+
dig.org(apply=True) # commit it
|
|
36
|
+
dig.undo() # step back
|
|
37
|
+
|
|
38
|
+
# reproducible dataset export (JSONL text)
|
|
39
|
+
jsonl = dig.export(filter="label:finance")
|
|
40
|
+
|
|
41
|
+
# read-only inspection
|
|
42
|
+
dig.drift()
|
|
43
|
+
dig.log()
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Target a specific KB with `kb="/path/or/name"` on any call; omit it to use the
|
|
47
|
+
KB at the daemon's working directory. Errors raise `DigError` carrying the HTTP
|
|
48
|
+
status.
|
|
49
|
+
|
|
50
|
+
The client speaks the same contract as `dig serve`, a thin adapter over the dig
|
|
51
|
+
CLI — so it never drifts from the tool.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "dig-client"
|
|
7
|
+
version = "0.1.0.dev4"
|
|
8
|
+
description = "Python client for a local dig daemon — search, organize, reconcile, and export a knowledge base over HTTP."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
keywords = ["dig", "knowledge-base", "retrieval", "memory", "ai-agents", "rag"]
|
|
13
|
+
authors = [{ name = "vllnt" }]
|
|
14
|
+
dependencies = []
|
|
15
|
+
|
|
16
|
+
[project.urls]
|
|
17
|
+
Homepage = "https://dig.vllnt.com"
|
|
18
|
+
Repository = "https://github.com/vllnt/dig"
|
|
19
|
+
|
|
20
|
+
[tool.hatch.build.targets.wheel]
|
|
21
|
+
packages = ["src/dig_client"]
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""dig-client — a dependency-free Python client for a local dig daemon (`dig serve`).
|
|
2
|
+
|
|
3
|
+
Drives the same surface as the CLI over HTTP: search, drift, history, export
|
|
4
|
+
(read) and organize / reconcile / undo (mutations, preview-by-default).
|
|
5
|
+
Local-first: it talks only to the loopback daemon you run.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
from dig_client import DigClient
|
|
9
|
+
|
|
10
|
+
dig = DigClient() # http://127.0.0.1:3978
|
|
11
|
+
hits = dig.find("invoice acme", mode="hybrid", limit=5)
|
|
12
|
+
dig.org(apply=True) # commit a reorg (reversible)
|
|
13
|
+
dig.undo()
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import json
|
|
19
|
+
import urllib.error
|
|
20
|
+
import urllib.parse
|
|
21
|
+
import urllib.request
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
__all__ = ["DigClient", "DigError"]
|
|
25
|
+
|
|
26
|
+
DEFAULT_BASE_URL = "http://127.0.0.1:3978"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DigError(Exception):
|
|
30
|
+
"""Raised when the daemon returns a non-2xx response."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, message: str, status: int) -> None:
|
|
33
|
+
super().__init__(message)
|
|
34
|
+
self.status = status
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class DigClient:
|
|
38
|
+
"""Client for a local dig daemon."""
|
|
39
|
+
|
|
40
|
+
def __init__(self, base_url: str = DEFAULT_BASE_URL, timeout: float = 120.0) -> None:
|
|
41
|
+
self.base_url = base_url.rstrip("/")
|
|
42
|
+
self.timeout = timeout
|
|
43
|
+
|
|
44
|
+
def health(self) -> dict[str, Any]:
|
|
45
|
+
"""Liveness + daemon version."""
|
|
46
|
+
return self._request("GET", "/health", {})
|
|
47
|
+
|
|
48
|
+
def find(
|
|
49
|
+
self,
|
|
50
|
+
query: str,
|
|
51
|
+
kb: str | None = None,
|
|
52
|
+
mode: str | None = None,
|
|
53
|
+
limit: int | None = None,
|
|
54
|
+
) -> list[dict[str, Any]]:
|
|
55
|
+
"""Search the KB, ranked. ``mode`` is fts (default), vector, or hybrid."""
|
|
56
|
+
return self._request(
|
|
57
|
+
"GET", "/find", {"kb": kb, "query": query, "mode": mode, "limit": limit}
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
def recall(
|
|
61
|
+
self,
|
|
62
|
+
query: str,
|
|
63
|
+
kb: str | None = None,
|
|
64
|
+
mode: str | None = None,
|
|
65
|
+
budget: int | None = None,
|
|
66
|
+
) -> dict[str, Any]:
|
|
67
|
+
"""Load a token-budgeted, provenance-tagged context pack for ``query``.
|
|
68
|
+
|
|
69
|
+
The agent-memory recall primitive: snippets land on the matching passage.
|
|
70
|
+
``budget`` caps the pack in tokens; ``mode`` is fts (default), vector, or
|
|
71
|
+
hybrid.
|
|
72
|
+
"""
|
|
73
|
+
return self._request(
|
|
74
|
+
"GET", "/recall", {"kb": kb, "query": query, "mode": mode, "budget": budget}
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def retain(
|
|
78
|
+
self, content: str, kb: str | None = None, as_: str | None = None
|
|
79
|
+
) -> Any:
|
|
80
|
+
"""Capture ``content`` into the KB and index it — the capture primitive.
|
|
81
|
+
|
|
82
|
+
Writes to a dated ``memory/`` path by default; pass ``as_`` to choose the
|
|
83
|
+
path. Reversible with :meth:`undo`.
|
|
84
|
+
"""
|
|
85
|
+
return self._request(
|
|
86
|
+
"POST", "/retain", {"kb": kb, "as": as_}, data=content.encode("utf-8")
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
def drift(self, kb: str | None = None) -> Any:
|
|
90
|
+
"""Report how the KB diverges from its policy. Read-only."""
|
|
91
|
+
return self._request("GET", "/drift", {"kb": kb})
|
|
92
|
+
|
|
93
|
+
def log(self, kb: str | None = None) -> Any:
|
|
94
|
+
"""Browse change history, newest first. Read-only."""
|
|
95
|
+
return self._request("GET", "/log", {"kb": kb})
|
|
96
|
+
|
|
97
|
+
def export(
|
|
98
|
+
self, kb: str | None = None, filter: str | None = None, at: str | None = None
|
|
99
|
+
) -> str:
|
|
100
|
+
"""Export a reproducible, provenance-tagged dataset (JSONL text). Read-only."""
|
|
101
|
+
body = self._request("GET", "/export", {"kb": kb, "filter": filter, "at": at})
|
|
102
|
+
if isinstance(body, dict):
|
|
103
|
+
return str(body.get("output", ""))
|
|
104
|
+
return ""
|
|
105
|
+
|
|
106
|
+
def org(self, kb: str | None = None, apply: bool = False) -> Any:
|
|
107
|
+
"""Apply organization policy. Previews unless ``apply`` is True (reversible)."""
|
|
108
|
+
return self._request("POST", "/org", {"kb": kb, "apply": apply})
|
|
109
|
+
|
|
110
|
+
def reconcile(self, kb: str | None = None, apply: bool = False) -> Any:
|
|
111
|
+
"""Converge the KB to policy. Previews unless ``apply`` is True (reversible)."""
|
|
112
|
+
return self._request("POST", "/reconcile", {"kb": kb, "apply": apply})
|
|
113
|
+
|
|
114
|
+
def undo(self, kb: str | None = None) -> Any:
|
|
115
|
+
"""Revert the last changeset."""
|
|
116
|
+
return self._request("POST", "/undo", {"kb": kb})
|
|
117
|
+
|
|
118
|
+
def _request(
|
|
119
|
+
self, method: str, path: str, params: dict[str, Any], data: bytes | None = None
|
|
120
|
+
) -> Any:
|
|
121
|
+
query = {k: _str(v) for k, v in params.items() if v is not None}
|
|
122
|
+
url = self.base_url + path
|
|
123
|
+
if query:
|
|
124
|
+
url += "?" + urllib.parse.urlencode(query)
|
|
125
|
+
req = urllib.request.Request(url, data=data, method=method) # noqa: S310 (loopback only)
|
|
126
|
+
try:
|
|
127
|
+
with urllib.request.urlopen(req, timeout=self.timeout) as resp: # noqa: S310
|
|
128
|
+
return _parse(resp.read())
|
|
129
|
+
except urllib.error.HTTPError as exc:
|
|
130
|
+
body = exc.read()
|
|
131
|
+
raise DigError(_error(body) or exc.reason, exc.code) from None
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _str(value: Any) -> str:
|
|
135
|
+
if isinstance(value, bool):
|
|
136
|
+
return "true" if value else "false"
|
|
137
|
+
return str(value)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _parse(raw: bytes) -> Any:
|
|
141
|
+
if not raw:
|
|
142
|
+
return None
|
|
143
|
+
return json.loads(raw.decode("utf-8"))
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _error(raw: bytes) -> str | None:
|
|
147
|
+
try:
|
|
148
|
+
body = json.loads(raw.decode("utf-8"))
|
|
149
|
+
except (ValueError, UnicodeDecodeError):
|
|
150
|
+
return None
|
|
151
|
+
if isinstance(body, dict) and "error" in body:
|
|
152
|
+
return str(body["error"])
|
|
153
|
+
return None
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Integration tests: the SDK drives a REAL `dig serve` against a real temp KB —
|
|
2
|
+
no mocks. The dig binary comes from $DIG_BIN (CI builds it) or `dig` on PATH.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import subprocess
|
|
9
|
+
import sys
|
|
10
|
+
import tempfile
|
|
11
|
+
import time
|
|
12
|
+
import unittest
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "src"))
|
|
16
|
+
|
|
17
|
+
from dig_client import DigClient, DigError # noqa: E402
|
|
18
|
+
|
|
19
|
+
DIG = os.environ.get("DIG_BIN", "dig")
|
|
20
|
+
PORT = 3986
|
|
21
|
+
POLICY = """
|
|
22
|
+
[[rule]]
|
|
23
|
+
name = "invoices"
|
|
24
|
+
match = { ext = ["pdf"], content_matches = "invoice" }
|
|
25
|
+
into = "finance/invoices"
|
|
26
|
+
label = ["finance"]
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _dig(*args: str) -> None:
|
|
31
|
+
result = subprocess.run([DIG, *args], capture_output=True, text=True, check=False)
|
|
32
|
+
if result.returncode != 0:
|
|
33
|
+
raise RuntimeError(f"dig {' '.join(args)} failed: {result.stderr or result.stdout}")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TestDigClient(unittest.TestCase):
|
|
37
|
+
daemon: subprocess.Popen[bytes]
|
|
38
|
+
kb: str
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def setUpClass(cls) -> None:
|
|
42
|
+
cls.kb = tempfile.mkdtemp(prefix="dig-py-")
|
|
43
|
+
inbox = Path(cls.kb) / "inbox"
|
|
44
|
+
inbox.mkdir(parents=True, exist_ok=True)
|
|
45
|
+
(inbox / "acme.pdf").write_text("ACME invoice #1007")
|
|
46
|
+
(inbox / "todo.md").write_text("- [ ] things")
|
|
47
|
+
_dig("init", cls.kb)
|
|
48
|
+
(Path(cls.kb) / ".dig" / "policy.toml").write_text(POLICY)
|
|
49
|
+
_dig("--kb", cls.kb, "scan")
|
|
50
|
+
|
|
51
|
+
cls.daemon = subprocess.Popen(
|
|
52
|
+
[DIG, "serve", "--addr", f"127.0.0.1:{PORT}"],
|
|
53
|
+
stdout=subprocess.DEVNULL,
|
|
54
|
+
stderr=subprocess.DEVNULL,
|
|
55
|
+
)
|
|
56
|
+
client = DigClient(base_url=f"http://127.0.0.1:{PORT}")
|
|
57
|
+
for _ in range(50):
|
|
58
|
+
try:
|
|
59
|
+
client.health()
|
|
60
|
+
break
|
|
61
|
+
except Exception: # noqa: BLE001
|
|
62
|
+
time.sleep(0.1)
|
|
63
|
+
else:
|
|
64
|
+
raise RuntimeError("daemon never became healthy")
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def tearDownClass(cls) -> None:
|
|
68
|
+
cls.daemon.terminate()
|
|
69
|
+
cls.daemon.wait(timeout=5)
|
|
70
|
+
|
|
71
|
+
def client(self) -> DigClient:
|
|
72
|
+
return DigClient(base_url=f"http://127.0.0.1:{PORT}")
|
|
73
|
+
|
|
74
|
+
def test_health(self) -> None:
|
|
75
|
+
h = self.client().health()
|
|
76
|
+
self.assertEqual(h["status"], "ok")
|
|
77
|
+
self.assertTrue(h["version"])
|
|
78
|
+
|
|
79
|
+
def test_find(self) -> None:
|
|
80
|
+
hits = self.client().find("invoice", kb=self.kb)
|
|
81
|
+
self.assertTrue(any(h["Path"].endswith("acme.pdf") for h in hits))
|
|
82
|
+
|
|
83
|
+
def test_org_preview_apply_undo(self) -> None:
|
|
84
|
+
moved = lambda: (Path(self.kb) / "finance" / "invoices" / "acme.pdf").exists() # noqa: E731
|
|
85
|
+
|
|
86
|
+
self.client().org(kb=self.kb) # preview
|
|
87
|
+
self.assertFalse(moved(), "preview must not move files")
|
|
88
|
+
|
|
89
|
+
self.client().org(kb=self.kb, apply=True)
|
|
90
|
+
self.assertTrue(moved(), "apply must move the file")
|
|
91
|
+
|
|
92
|
+
self.client().undo(kb=self.kb)
|
|
93
|
+
self.assertFalse(moved(), "undo must revert")
|
|
94
|
+
|
|
95
|
+
def test_log_and_drift(self) -> None:
|
|
96
|
+
self.assertIsNotNone(self.client().log(kb=self.kb))
|
|
97
|
+
self.assertIsNotNone(self.client().drift(kb=self.kb))
|
|
98
|
+
|
|
99
|
+
def test_retain_then_recall(self) -> None:
|
|
100
|
+
fact = "Decision: adopt the new ledger in Q3; Dana owns the migration."
|
|
101
|
+
retained = self.client().retain(fact, kb=self.kb, as_="memory/py.md")
|
|
102
|
+
self.assertIn("Retained memory/py.md", retained["output"])
|
|
103
|
+
|
|
104
|
+
pack = self.client().recall("ledger migration Dana", kb=self.kb, budget=400)
|
|
105
|
+
self.assertEqual(pack["budgetTokens"], 400)
|
|
106
|
+
self.assertTrue(pack["manifest"])
|
|
107
|
+
self.assertTrue(
|
|
108
|
+
any("new ledger in Q3" in item["content"] for item in pack["items"]),
|
|
109
|
+
"recall should surface the retained fact",
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
def test_error_path(self) -> None:
|
|
113
|
+
with self.assertRaises(DigError) as ctx:
|
|
114
|
+
self.client().find("anything", kb="/no/such/kb")
|
|
115
|
+
self.assertGreaterEqual(ctx.exception.status, 400)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
if __name__ == "__main__":
|
|
119
|
+
unittest.main()
|