quackspace 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
quack/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """quack, a knowledge layer over your local work that helps LLMs navigate it."""
2
+
3
+ __version__ = "0.1.0"
quack/catalog.py ADDED
@@ -0,0 +1,217 @@
1
+ """The meta collection: one DuckDB catalog of all file metadata.
2
+
3
+ `quack reindex` rebuilds `.quack/quack.duckdb` from the files (+ the editable
4
+ .index.yaml store). It is a derived artifact, never the source of truth, so it
5
+ can be deleted and regenerated at any time. DuckDB is embedded (no server) and
6
+ gives real SQL plus BM25 full-text search over everything, the fast metadata
7
+ search `ls` can't do.
8
+
9
+ Schema:
10
+ files(name, rel, folder, ext, title, description, tags_csv, n_links,
11
+ n_inbound, is_orphan, is_binary, file_modified, described_at, stale,
12
+ body)
13
+ tags(name, tag) -- one row per (file, tag)
14
+ links(src, dst, dst_exists) -- one row per wikilink edge
15
+ A DuckDB FTS index is built over files(name, description, body) for `match_bm25`.
16
+ `stale` is true when the file changed after its description was written.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from collections import defaultdict
22
+ from pathlib import Path
23
+
24
+ import duckdb
25
+
26
+ from .core import Space
27
+
28
+ DB_NAME = "quack.duckdb"
29
+
30
+
31
+ def db_path(space: Space) -> Path:
32
+ return space.root / ".quack" / DB_NAME
33
+
34
+
35
+ def build(space: Space) -> dict:
36
+ """Rebuild the catalog from scratch over the loaded space. Returns a
37
+ summary. The space already carries effective metadata (authored .index.yaml
38
+ overlaid on each file)."""
39
+ path = db_path(space)
40
+ if path.exists():
41
+ path.unlink() # rebuild clean; the files + .index.yaml are the truth
42
+
43
+ names = set(space.by_name)
44
+ inbound: dict[str, int] = defaultdict(int)
45
+ for e in space.entries:
46
+ for target in e.links:
47
+ if target in names:
48
+ inbound[target] += 1
49
+
50
+ con = duckdb.connect(str(path))
51
+ try:
52
+ _create_schema(con)
53
+ for e in space.entries:
54
+ con.execute(
55
+ "INSERT INTO files VALUES "
56
+ "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
57
+ [
58
+ e.name,
59
+ e.rel,
60
+ e.folder,
61
+ e.ext,
62
+ e.name,
63
+ e.description,
64
+ ",".join(e.tags),
65
+ len(e.links),
66
+ inbound.get(e.name, 0),
67
+ inbound.get(e.name, 0) == 0 and len(e.links) == 0,
68
+ e.is_binary,
69
+ e.modified,
70
+ e.described_at,
71
+ e.stale,
72
+ e.body,
73
+ ],
74
+ )
75
+ for tag in e.tags:
76
+ con.execute("INSERT INTO tags VALUES (?, ?)", [e.name, tag])
77
+ for dst in e.links:
78
+ con.execute(
79
+ "INSERT INTO links VALUES (?, ?, ?)",
80
+ [e.name, dst, dst in names],
81
+ )
82
+ _build_fts(con)
83
+ n_files = con.execute("SELECT count(*) FROM files").fetchone()[0]
84
+ n_tags = con.execute("SELECT count(*) FROM tags").fetchone()[0]
85
+ n_links = con.execute("SELECT count(*) FROM links").fetchone()[0]
86
+ finally:
87
+ con.close()
88
+
89
+ return {"db": str(path), "files": n_files, "tags": n_tags, "links": n_links}
90
+
91
+
92
+ def _create_schema(con: duckdb.DuckDBPyConnection) -> None:
93
+ con.execute(
94
+ """
95
+ CREATE TABLE files (
96
+ name VARCHAR,
97
+ rel VARCHAR,
98
+ folder VARCHAR,
99
+ ext VARCHAR,
100
+ title VARCHAR,
101
+ description VARCHAR,
102
+ tags_csv VARCHAR,
103
+ n_links INTEGER,
104
+ n_inbound INTEGER,
105
+ is_orphan BOOLEAN,
106
+ is_binary BOOLEAN,
107
+ file_modified VARCHAR,
108
+ described_at VARCHAR,
109
+ stale BOOLEAN,
110
+ body VARCHAR
111
+ );
112
+ CREATE TABLE tags (name VARCHAR, tag VARCHAR);
113
+ CREATE TABLE links (src VARCHAR, dst VARCHAR, dst_exists BOOLEAN);
114
+ """
115
+ )
116
+
117
+
118
+ def _build_fts(con: duckdb.DuckDBPyConnection) -> None:
119
+ """Create the BM25 full-text index over the searchable note fields."""
120
+ con.execute("INSTALL fts; LOAD fts;")
121
+ con.execute(
122
+ "PRAGMA create_fts_index('files', 'name', 'name', 'description', 'body', "
123
+ "overwrite=1);"
124
+ )
125
+
126
+
127
+ def connect(explicit_root: str | None = None) -> duckdb.DuckDBPyConnection:
128
+ """Open the catalog read-only for querying. Caller closes it."""
129
+ space = Space.load(explicit_root)
130
+ path = db_path(space)
131
+ if not path.exists():
132
+ raise RuntimeError(
133
+ f"No catalog at {path}. Run `quack reindex` to build it."
134
+ )
135
+ return duckdb.connect(str(path), read_only=True)
136
+
137
+
138
+ def query(sql: str, explicit_root: str | None = None) -> tuple[list[str], list[tuple]]:
139
+ """Run a SQL query against the catalog. Returns (column_names, rows)."""
140
+ con = connect(explicit_root)
141
+ try:
142
+ cur = con.execute(sql)
143
+ cols = [d[0] for d in cur.description] if cur.description else []
144
+ return cols, cur.fetchall()
145
+ finally:
146
+ con.close()
147
+
148
+
149
+ def neighbours(
150
+ names: list[str], explicit_root: str | None = None, hops: int = 1
151
+ ) -> list[tuple[str, str, int, str]]:
152
+ """Graph traversal in SQL: notes within `hops` of any seed name, in either
153
+ link direction. Returns [(name, rel, distance, via_seed), ...], excluding
154
+ the seeds, where via_seed is one seed that reaches the note at min distance.
155
+
156
+ Uses a recursive CTE so only the relevant subgraph is materialized, the
157
+ whole point of keeping the graph in DuckDB instead of a flat file.
158
+ """
159
+ if not names:
160
+ return []
161
+ con = connect(explicit_root)
162
+ try:
163
+ placeholders = ",".join("?" for _ in names)
164
+ rows = con.execute(
165
+ f"""
166
+ WITH RECURSIVE
167
+ -- undirected edge view over existing notes only
168
+ edge(a, b) AS (
169
+ SELECT src, dst FROM links WHERE dst_exists
170
+ UNION ALL
171
+ SELECT dst, src FROM links WHERE dst_exists
172
+ ),
173
+ walk(name, dist, seed) AS (
174
+ SELECT name, 0, name FROM files WHERE name IN ({placeholders})
175
+ UNION
176
+ SELECT e.b, w.dist + 1, w.seed
177
+ FROM walk w JOIN edge e ON e.a = w.name
178
+ WHERE w.dist < ?
179
+ ),
180
+ ranked AS (
181
+ SELECT w.name, n.rel, w.dist, w.seed,
182
+ row_number() OVER (PARTITION BY w.name ORDER BY w.dist) AS rn
183
+ FROM walk w JOIN files n ON n.name = w.name
184
+ WHERE w.dist > 0
185
+ AND w.name NOT IN ({placeholders}) -- a seed is not its own neighbour
186
+ )
187
+ SELECT name, rel, dist, seed FROM ranked WHERE rn = 1
188
+ ORDER BY dist, name
189
+ """,
190
+ [*names, hops, *names],
191
+ ).fetchall()
192
+ return rows
193
+ finally:
194
+ con.close()
195
+
196
+
197
+ def fts_search(
198
+ terms: str, explicit_root: str | None = None, limit: int = 10
199
+ ) -> list[tuple[str, str, float]]:
200
+ """BM25 full-text search. Returns [(rel, description, score), ...]."""
201
+ con = connect(explicit_root)
202
+ try:
203
+ rows = con.execute(
204
+ """
205
+ SELECT rel, description, score FROM (
206
+ SELECT rel, description,
207
+ fts_main_files.match_bm25(name, ?) AS score
208
+ FROM files
209
+ ) WHERE score IS NOT NULL
210
+ ORDER BY score DESC
211
+ LIMIT ?
212
+ """,
213
+ [terms, limit],
214
+ ).fetchall()
215
+ return rows
216
+ finally:
217
+ con.close()