quackspace 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quack/__init__.py +3 -0
- quack/catalog.py +217 -0
- quack/cli.py +533 -0
- quack/config.py +147 -0
- quack/core.py +294 -0
- quack/diagram.py +127 -0
- quack/doctor.py +76 -0
- quack/embed.py +121 -0
- quack/generate.py +181 -0
- quack/graph.py +107 -0
- quack/index_store.py +127 -0
- quack/indexer.py +113 -0
- quack/kiro.py +82 -0
- quack/mcp_install.py +144 -0
- quack/mcp_server.py +183 -0
- quack/py.typed +0 -0
- quack/scaffold.py +134 -0
- quack/search.py +207 -0
- quack/setup.py +151 -0
- quackspace-0.1.0.dist-info/METADATA +188 -0
- quackspace-0.1.0.dist-info/RECORD +24 -0
- quackspace-0.1.0.dist-info/WHEEL +4 -0
- quackspace-0.1.0.dist-info/entry_points.txt +4 -0
- quackspace-0.1.0.dist-info/licenses/LICENSE +21 -0
quack/__init__.py
ADDED
quack/catalog.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"""The meta collection: one DuckDB catalog of all file metadata.
|
|
2
|
+
|
|
3
|
+
`quack reindex` rebuilds `.quack/quack.duckdb` from the files (+ the editable
|
|
4
|
+
.index.yaml store). It is a derived artifact, never the source of truth, so it
|
|
5
|
+
can be deleted and regenerated at any time. DuckDB is embedded (no server) and
|
|
6
|
+
gives real SQL plus BM25 full-text search over everything, the fast metadata
|
|
7
|
+
search `ls` can't do.
|
|
8
|
+
|
|
9
|
+
Schema:
|
|
10
|
+
files(name, rel, folder, ext, title, description, tags_csv, n_links,
|
|
11
|
+
n_inbound, is_orphan, is_binary, file_modified, described_at, stale,
|
|
12
|
+
body)
|
|
13
|
+
tags(name, tag) -- one row per (file, tag)
|
|
14
|
+
links(src, dst, dst_exists) -- one row per wikilink edge
|
|
15
|
+
A DuckDB FTS index is built over files(name, description, body) for `match_bm25`.
|
|
16
|
+
`stale` is true when the file changed after its description was written.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from collections import defaultdict
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
import duckdb
|
|
25
|
+
|
|
26
|
+
from .core import Space
|
|
27
|
+
|
|
28
|
+
DB_NAME = "quack.duckdb"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def db_path(space: Space) -> Path:
|
|
32
|
+
return space.root / ".quack" / DB_NAME
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def build(space: Space) -> dict:
|
|
36
|
+
"""Rebuild the catalog from scratch over the loaded space. Returns a
|
|
37
|
+
summary. The space already carries effective metadata (authored .index.yaml
|
|
38
|
+
overlaid on each file)."""
|
|
39
|
+
path = db_path(space)
|
|
40
|
+
if path.exists():
|
|
41
|
+
path.unlink() # rebuild clean; the files + .index.yaml are the truth
|
|
42
|
+
|
|
43
|
+
names = set(space.by_name)
|
|
44
|
+
inbound: dict[str, int] = defaultdict(int)
|
|
45
|
+
for e in space.entries:
|
|
46
|
+
for target in e.links:
|
|
47
|
+
if target in names:
|
|
48
|
+
inbound[target] += 1
|
|
49
|
+
|
|
50
|
+
con = duckdb.connect(str(path))
|
|
51
|
+
try:
|
|
52
|
+
_create_schema(con)
|
|
53
|
+
for e in space.entries:
|
|
54
|
+
con.execute(
|
|
55
|
+
"INSERT INTO files VALUES "
|
|
56
|
+
"(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
57
|
+
[
|
|
58
|
+
e.name,
|
|
59
|
+
e.rel,
|
|
60
|
+
e.folder,
|
|
61
|
+
e.ext,
|
|
62
|
+
e.name,
|
|
63
|
+
e.description,
|
|
64
|
+
",".join(e.tags),
|
|
65
|
+
len(e.links),
|
|
66
|
+
inbound.get(e.name, 0),
|
|
67
|
+
inbound.get(e.name, 0) == 0 and len(e.links) == 0,
|
|
68
|
+
e.is_binary,
|
|
69
|
+
e.modified,
|
|
70
|
+
e.described_at,
|
|
71
|
+
e.stale,
|
|
72
|
+
e.body,
|
|
73
|
+
],
|
|
74
|
+
)
|
|
75
|
+
for tag in e.tags:
|
|
76
|
+
con.execute("INSERT INTO tags VALUES (?, ?)", [e.name, tag])
|
|
77
|
+
for dst in e.links:
|
|
78
|
+
con.execute(
|
|
79
|
+
"INSERT INTO links VALUES (?, ?, ?)",
|
|
80
|
+
[e.name, dst, dst in names],
|
|
81
|
+
)
|
|
82
|
+
_build_fts(con)
|
|
83
|
+
n_files = con.execute("SELECT count(*) FROM files").fetchone()[0]
|
|
84
|
+
n_tags = con.execute("SELECT count(*) FROM tags").fetchone()[0]
|
|
85
|
+
n_links = con.execute("SELECT count(*) FROM links").fetchone()[0]
|
|
86
|
+
finally:
|
|
87
|
+
con.close()
|
|
88
|
+
|
|
89
|
+
return {"db": str(path), "files": n_files, "tags": n_tags, "links": n_links}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _create_schema(con: duckdb.DuckDBPyConnection) -> None:
|
|
93
|
+
con.execute(
|
|
94
|
+
"""
|
|
95
|
+
CREATE TABLE files (
|
|
96
|
+
name VARCHAR,
|
|
97
|
+
rel VARCHAR,
|
|
98
|
+
folder VARCHAR,
|
|
99
|
+
ext VARCHAR,
|
|
100
|
+
title VARCHAR,
|
|
101
|
+
description VARCHAR,
|
|
102
|
+
tags_csv VARCHAR,
|
|
103
|
+
n_links INTEGER,
|
|
104
|
+
n_inbound INTEGER,
|
|
105
|
+
is_orphan BOOLEAN,
|
|
106
|
+
is_binary BOOLEAN,
|
|
107
|
+
file_modified VARCHAR,
|
|
108
|
+
described_at VARCHAR,
|
|
109
|
+
stale BOOLEAN,
|
|
110
|
+
body VARCHAR
|
|
111
|
+
);
|
|
112
|
+
CREATE TABLE tags (name VARCHAR, tag VARCHAR);
|
|
113
|
+
CREATE TABLE links (src VARCHAR, dst VARCHAR, dst_exists BOOLEAN);
|
|
114
|
+
"""
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _build_fts(con: duckdb.DuckDBPyConnection) -> None:
|
|
119
|
+
"""Create the BM25 full-text index over the searchable note fields."""
|
|
120
|
+
con.execute("INSTALL fts; LOAD fts;")
|
|
121
|
+
con.execute(
|
|
122
|
+
"PRAGMA create_fts_index('files', 'name', 'name', 'description', 'body', "
|
|
123
|
+
"overwrite=1);"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def connect(explicit_root: str | None = None) -> duckdb.DuckDBPyConnection:
|
|
128
|
+
"""Open the catalog read-only for querying. Caller closes it."""
|
|
129
|
+
space = Space.load(explicit_root)
|
|
130
|
+
path = db_path(space)
|
|
131
|
+
if not path.exists():
|
|
132
|
+
raise RuntimeError(
|
|
133
|
+
f"No catalog at {path}. Run `quack reindex` to build it."
|
|
134
|
+
)
|
|
135
|
+
return duckdb.connect(str(path), read_only=True)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def query(sql: str, explicit_root: str | None = None) -> tuple[list[str], list[tuple]]:
|
|
139
|
+
"""Run a SQL query against the catalog. Returns (column_names, rows)."""
|
|
140
|
+
con = connect(explicit_root)
|
|
141
|
+
try:
|
|
142
|
+
cur = con.execute(sql)
|
|
143
|
+
cols = [d[0] for d in cur.description] if cur.description else []
|
|
144
|
+
return cols, cur.fetchall()
|
|
145
|
+
finally:
|
|
146
|
+
con.close()
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def neighbours(
|
|
150
|
+
names: list[str], explicit_root: str | None = None, hops: int = 1
|
|
151
|
+
) -> list[tuple[str, str, int, str]]:
|
|
152
|
+
"""Graph traversal in SQL: notes within `hops` of any seed name, in either
|
|
153
|
+
link direction. Returns [(name, rel, distance, via_seed), ...], excluding
|
|
154
|
+
the seeds, where via_seed is one seed that reaches the note at min distance.
|
|
155
|
+
|
|
156
|
+
Uses a recursive CTE so only the relevant subgraph is materialized, the
|
|
157
|
+
whole point of keeping the graph in DuckDB instead of a flat file.
|
|
158
|
+
"""
|
|
159
|
+
if not names:
|
|
160
|
+
return []
|
|
161
|
+
con = connect(explicit_root)
|
|
162
|
+
try:
|
|
163
|
+
placeholders = ",".join("?" for _ in names)
|
|
164
|
+
rows = con.execute(
|
|
165
|
+
f"""
|
|
166
|
+
WITH RECURSIVE
|
|
167
|
+
-- undirected edge view over existing notes only
|
|
168
|
+
edge(a, b) AS (
|
|
169
|
+
SELECT src, dst FROM links WHERE dst_exists
|
|
170
|
+
UNION ALL
|
|
171
|
+
SELECT dst, src FROM links WHERE dst_exists
|
|
172
|
+
),
|
|
173
|
+
walk(name, dist, seed) AS (
|
|
174
|
+
SELECT name, 0, name FROM files WHERE name IN ({placeholders})
|
|
175
|
+
UNION
|
|
176
|
+
SELECT e.b, w.dist + 1, w.seed
|
|
177
|
+
FROM walk w JOIN edge e ON e.a = w.name
|
|
178
|
+
WHERE w.dist < ?
|
|
179
|
+
),
|
|
180
|
+
ranked AS (
|
|
181
|
+
SELECT w.name, n.rel, w.dist, w.seed,
|
|
182
|
+
row_number() OVER (PARTITION BY w.name ORDER BY w.dist) AS rn
|
|
183
|
+
FROM walk w JOIN files n ON n.name = w.name
|
|
184
|
+
WHERE w.dist > 0
|
|
185
|
+
AND w.name NOT IN ({placeholders}) -- a seed is not its own neighbour
|
|
186
|
+
)
|
|
187
|
+
SELECT name, rel, dist, seed FROM ranked WHERE rn = 1
|
|
188
|
+
ORDER BY dist, name
|
|
189
|
+
""",
|
|
190
|
+
[*names, hops, *names],
|
|
191
|
+
).fetchall()
|
|
192
|
+
return rows
|
|
193
|
+
finally:
|
|
194
|
+
con.close()
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def fts_search(
|
|
198
|
+
terms: str, explicit_root: str | None = None, limit: int = 10
|
|
199
|
+
) -> list[tuple[str, str, float]]:
|
|
200
|
+
"""BM25 full-text search. Returns [(rel, description, score), ...]."""
|
|
201
|
+
con = connect(explicit_root)
|
|
202
|
+
try:
|
|
203
|
+
rows = con.execute(
|
|
204
|
+
"""
|
|
205
|
+
SELECT rel, description, score FROM (
|
|
206
|
+
SELECT rel, description,
|
|
207
|
+
fts_main_files.match_bm25(name, ?) AS score
|
|
208
|
+
FROM files
|
|
209
|
+
) WHERE score IS NOT NULL
|
|
210
|
+
ORDER BY score DESC
|
|
211
|
+
LIMIT ?
|
|
212
|
+
""",
|
|
213
|
+
[terms, limit],
|
|
214
|
+
).fetchall()
|
|
215
|
+
return rows
|
|
216
|
+
finally:
|
|
217
|
+
con.close()
|