daftari 1.10.0 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +58 -0
- package/README.md +109 -432
- package/dist/cli.js +0 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +10 -1
- package/dist/server.js.map +1 -1
- package/dist/themes/clustering.d.ts +30 -0
- package/dist/themes/clustering.d.ts.map +1 -0
- package/dist/themes/clustering.js +439 -0
- package/dist/themes/clustering.js.map +1 -0
- package/dist/tools/curation.d.ts.map +1 -1
- package/dist/tools/curation.js +6 -0
- package/dist/tools/curation.js.map +1 -1
- package/dist/tools/read.d.ts +8 -0
- package/dist/tools/read.d.ts.map +1 -1
- package/dist/tools/read.js +6 -0
- package/dist/tools/read.js.map +1 -1
- package/dist/tools/search.d.ts +3 -0
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +17 -2
- package/dist/tools/search.js.map +1 -1
- package/dist/tools/themes.d.ts +21 -0
- package/dist/tools/themes.d.ts.map +1 -0
- package/dist/tools/themes.js +427 -0
- package/dist/tools/themes.js.map +1 -0
- package/dist/tools/write.d.ts.map +1 -1
- package/dist/tools/write.js +26 -3
- package/dist/tools/write.js.map +1 -1
- package/package.json +2 -1
- package/templates/reviewer-vault/.daftari/config.yaml +25 -0
- package/templates/reviewer-vault/.daftari/tensions.md +6 -0
- package/templates/reviewer-vault/_drafts/moonshot-agentic-curation.md +33 -0
- package/templates/reviewer-vault/_drafts/scratch-incomplete.md +18 -0
- package/templates/reviewer-vault/competitive-intel/aurora-pipelines-positioning.md +40 -0
- package/templates/reviewer-vault/competitive-intel/cirrus-realtime-ga.md +38 -0
- package/templates/reviewer-vault/competitive-intel/cirrus-realtime-preview.md +34 -0
- package/templates/reviewer-vault/competitive-intel/helios-connect-overview.md +40 -0
- package/templates/reviewer-vault/competitive-intel/northwind-governance-snapshot.md +35 -0
- package/templates/reviewer-vault/pricing/aurora-pricing-model.md +35 -0
- package/templates/reviewer-vault/pricing/cirrus-capacity-tiers.md +37 -0
- package/templates/reviewer-vault/pricing/helios-credits-model.md +37 -0
- package/templates/reviewer-vault/pricing/vega-pricing-strategy.md +36 -0
package/README.md
CHANGED
|
@@ -1,275 +1,119 @@
|
|
|
1
1
|
# Daftari
|
|
2
2
|
|
|
3
|
-
[](https://github.com/mavaali/daftari/actions/workflows/ci.yml)
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
[](https://github.com/mavaali/daftari/actions/workflows/ci.yml) [](https://www.npmjs.com/package/daftari) [](LICENSE)
|
|
4
|
+
|
|
5
|
+
*Daftari* (دفتری) is the Urdu word for a ledger-keeper: the person in a
|
|
6
|
+
trading house who maintained the *daftar*, the bound register where every
|
|
7
|
+
transaction was recorded, cross-referenced, and preserved. The daftar was not a
|
|
8
|
+
filing cabinet. It was a living document. Entries referenced earlier entries.
|
|
9
|
+
Corrections were noted, not erased. The ledger got more valuable the longer it
|
|
10
|
+
was kept, because the accumulated record revealed patterns no single entry
|
|
11
|
+
could.
|
|
12
|
+
|
|
13
|
+
Daftari is an MCP server that gives AI agents the same thing: a persistent,
|
|
14
|
+
structured knowledge vault they can read, write, and curate over time. A
|
|
15
|
+
cortex, not a clipboard.
|
|
16
|
+
|
|
17
|
+
## The problem
|
|
18
|
+
|
|
19
|
+
Every agent conversation starts from zero. RAG retrieves chunks and hopes the
|
|
20
|
+
model stitches them together. AGENTS.md gives static context that nobody
|
|
21
|
+
updates. The knowledge an agent builds during a session evaporates when the
|
|
22
|
+
session ends.
|
|
23
|
+
|
|
24
|
+
Daftari takes the other path: **compilation over retrieval.** The agent
|
|
25
|
+
synthesizes an answer once, writes it back as a durable document, and every
|
|
26
|
+
later read starts from that compiled result. The vault gets better the more it
|
|
27
|
+
is used.
|
|
28
|
+
|
|
29
|
+
A human cortex doesn’t re-derive everything from sensory input each time it
|
|
30
|
+
thinks. It consolidates: experiences become memories, memories become
|
|
31
|
+
structure, structure shapes future thought. Daftari gives agents the same
|
|
32
|
+
loop. Drafts consolidate into canonical knowledge. Contradictions surface as
|
|
33
|
+
tensions. Stale knowledge decays on a schedule. The vault is a living system,
|
|
34
|
+
not a filing cabinet.
|
|
35
|
+
|
|
36
|
+
## What it is
|
|
37
|
+
|
|
38
|
+
A directory of markdown files with YAML frontmatter, exposed to agents as 14
|
|
39
|
+
MCP tools over stdio. The vault is plain text: you can read it in any editor,
|
|
40
|
+
`git log` it, grep it. Daftari adds the machinery agents need to treat it as a
|
|
41
|
+
shared workspace.
|
|
6
42
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
Daftari is not RAG. It is not a chatbot. It is a *living, agent-maintained
|
|
10
|
-
knowledge vault* — a directory of markdown files that an AI agent reads from,
|
|
11
|
-
writes to, and curates over time, so that knowledge **compounds** instead of
|
|
12
|
-
being re-derived on every query.
|
|
13
|
-
|
|
14
|
-
> *Daftari — from دفتر (daftar): notebook, ledger, register. A word shared
|
|
15
|
-
> across Urdu, Hindi, Marathi, Arabic, Persian, and Turkish for the book you
|
|
16
|
-
> write things down in so you don't forget.*
|
|
17
|
-
|
|
18
|
-
RAG retrieves chunks and hopes the model stitches them together. Daftari takes
|
|
19
|
-
the other path: the agent does the stitching *once*, writes the synthesized
|
|
20
|
-
result back as a durable document, and every later read starts from that
|
|
21
|
-
compiled answer. Karpathy's framing fits — **compilation over retrieval**. The
|
|
22
|
-
vault gets better the more it is used.
|
|
23
|
-
|
|
24
|
-
A vault is just markdown. You can read it, `git log` it, and edit it by hand.
|
|
25
|
-
Daftari adds the machinery an agent needs to treat it as a shared workspace:
|
|
26
|
-
access control, write safety, provenance, and curation.
|
|
27
|
-
|
|
28
|
-
---
|
|
29
|
-
|
|
30
|
-
## The four-layer model
|
|
31
|
-
|
|
32
|
-
Daftari is built in four layers. The first two are table stakes. **The moat is
|
|
33
|
-
layers 3 and 4** — anyone can store markdown and check a permission; keeping
|
|
34
|
-
every write safe and attributable, and managing knowledge decay, is the hard
|
|
35
|
-
part.
|
|
36
|
-
|
|
37
|
-
| Layer | Concern | What Daftari provides |
|
|
38
|
-
|------:|---------|-----------------------|
|
|
39
|
-
| 1 | **Storage** | Markdown + YAML frontmatter on disk, a git history, a rebuildable SQLite index — FTS5 for lexical ranking, sqlite-vec for vector search. |
|
|
40
|
-
| 2 | **Multi-tenant ACL** | Config-driven RBAC. Roles and per-collection read/write/promote permissions declared in `.daftari/config.yaml`. |
|
|
41
|
-
| 3 | **Write safety** ⭐ | File-level write locks (SQLite-backed, 60s TTL) give single-writer-per-document safety — a competing writer fails cleanly instead of corrupting the file. This is a safety mechanism, not a coordination protocol. The ⭐ is for what is genuinely differentiated: every write auto-committed to git with a provenance log of who changed what and when. |
|
|
42
|
-
| 4 | **Curation decay** ⭐ | The draft → canonical → deprecated lifecycle, TTL-based staleness, tension logging for contradictions, and an advisory linter. Knowledge that stops being true is surfaced, not silently trusted. |
|
|
43
|
-
|
|
44
|
-
Layer 3 today is *safety*, not orchestration: the lock prevents file corruption
|
|
45
|
-
and simultaneous writers, but a writer can still overwrite another's work if it
|
|
46
|
-
composed its change against a since-changed version of the document. Closing
|
|
47
|
-
that gap — with optimistic concurrency, not queuing — is the v2 direction; see
|
|
48
|
-
[What's not in v1](#whats-not-in-v1).
|
|
49
|
-
|
|
50
|
-
Layers 1–2 keep the vault *stored and scoped*. Layers 3–4 keep it *coherent as
|
|
51
|
-
it grows* — which is the entire point of a vault that compounds.
|
|
52
|
-
|
|
53
|
-
---
|
|
54
|
-
|
|
55
|
-
## Quickstart
|
|
56
|
-
|
|
57
|
-
```bash
|
|
58
|
-
# 1. Scaffold a new vault (collections, config, example documents, git, index)
|
|
43
|
+
```
|
|
59
44
|
npx daftari --init ./my-vault
|
|
60
|
-
|
|
61
|
-
# 2. Start the MCP server against it, as an identity with a role
|
|
62
45
|
npx daftari --vault ./my-vault --user me --role admin
|
|
63
46
|
```
|
|
64
47
|
|
|
65
|
-
|
|
66
|
-
(Claude Desktop, an agent SDK, your own harness) at it. See
|
|
67
|
-
[docs/getting-started.md](docs/getting-started.md) for the full walkthrough,
|
|
68
|
-
including a `claude_desktop_config.json` snippet.
|
|
69
|
-
|
|
70
|
-
---
|
|
71
|
-
|
|
72
|
-
## The MCP tools
|
|
73
|
-
|
|
74
|
-
Daftari exposes 13 tools, grouped by layer.
|
|
48
|
+
Point any MCP client (Claude Desktop, Claude Code, an agent SDK) at it.
|
|
75
49
|
|
|
76
|
-
|
|
50
|
+
## The four layers
|
|
77
51
|
|
|
78
|
-
|
|
79
|
-
|------|-------------|
|
|
80
|
-
| `vault_read` | Read one document: markdown body, parsed frontmatter, an advisory validation report, and an inline decay assessment. |
|
|
81
|
-
| `vault_index` | List documents, filterable by collection, status, domain, or tags. |
|
|
82
|
-
| `vault_status` | Vault health dashboard: total file count, per-collection counts, count of documents with invalid frontmatter, a staleness distribution (fresh/aging/stale), unresolved tensions, and recent write history. |
|
|
52
|
+
Storage and access control are table stakes. The moat is layers 3 and 4.
|
|
83
53
|
|
|
84
|
-
|
|
54
|
+
|Layer |What it does |Why it matters |
|
|
55
|
+
|---------------------|------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------|
|
|
56
|
+
|**Storage** |Markdown + frontmatter on disk, git history, rebuildable SQLite index for hybrid BM25 + vector search.|Plain text is the source of truth. Delete every `.db` file and rebuild.|
|
|
57
|
+
|**Access control** |Config-driven RBAC. Roles and per-collection read/write/promote permissions in `.daftari/config.yaml`.|Multiple agents, scoped access, no user-management system. |
|
|
58
|
+
|**Write arbitration**|File-level locks (60s TTL), auto-commit to git, structured provenance log. |Concurrent agents write safely. Every mutation is attributable. |
|
|
59
|
+
|**Curation** |Draft-to-canonical lifecycle, TTL-based staleness, tension logging, advisory linter. |Knowledge that stops being true gets surfaced, not silently trusted. |
|
|
85
60
|
|
|
86
|
-
|
|
87
|
-
|------|-------------|
|
|
88
|
-
| `vault_search` | Hybrid BM25 + vector search across the vault, with tunable ranking weights; each hit carries an inline decay assessment. |
|
|
89
|
-
| `vault_search_related` | Find documents thematically related to a given document. |
|
|
90
|
-
| `vault_reindex` | Rebuild the SQLite search index from the markdown files. |
|
|
61
|
+
## The tools
|
|
91
62
|
|
|
92
|
-
**
|
|
63
|
+
**Read:** `vault_read`, `vault_index`, `vault_status`
|
|
93
64
|
|
|
94
|
-
|
|
95
|
-
|------|-------------|
|
|
96
|
-
| `vault_write` | Create or overwrite a document. Stamps `updated`/`updated_by`, preserves `created`, auto-commits. |
|
|
97
|
-
| `vault_append` | Append a markdown section to a document. Re-stamps metadata, auto-commits. |
|
|
98
|
-
| `vault_promote` | Promote a draft to canonical — refuses unless the draft's frontmatter is complete. |
|
|
99
|
-
| `vault_deprecate` | Mark a document deprecated with a required reason and an optional `superseded_by`. |
|
|
65
|
+
**Search:** `vault_search` (hybrid BM25 + vector), `vault_search_related`, `vault_themes` (thematic clustering), `vault_reindex`
|
|
100
66
|
|
|
101
|
-
**
|
|
67
|
+
**Write:** `vault_write`, `vault_append`, `vault_promote`, `vault_deprecate`
|
|
102
68
|
|
|
103
|
-
|
|
104
|
-
|------|-------------|
|
|
105
|
-
| `vault_tension_log` | Record a contradiction between two documents to the advisory tension log. Records; does not resolve. |
|
|
106
|
-
| `vault_lint` | Run advisory curation checks: stale-past-TTL, orphans, old drafts, stagnant low-confidence files, deprecated-but-linked, unanswered questions. |
|
|
107
|
-
| `vault_provenance` | Return a single document's full write history from the provenance log. |
|
|
69
|
+
**Curate:** `vault_tension_log`, `vault_lint`, `vault_provenance`
|
|
108
70
|
|
|
109
|
-
The curation engine is
|
|
110
|
-
`vault_tension_log` records contradictions
|
|
111
|
-
|
|
71
|
+
The curation engine is advisory: `vault_lint` reports problems and
|
|
72
|
+
`vault_tension_log` records contradictions. Neither auto-fixes anything. Every
|
|
73
|
+
change is a deliberate, attributable act.
|
|
112
74
|
|
|
113
|
-
|
|
75
|
+
## Two kinds of knowledge
|
|
114
76
|
|
|
115
|
-
|
|
77
|
+
Every document declares a `domain`. The distinction drives how the curation
|
|
78
|
+
layer treats it.
|
|
116
79
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
80
|
+
**Accumulation** documents compile and compound. A competitive-intel note, a
|
|
81
|
+
pricing breakdown, a researched comparison. Each write builds on the last.
|
|
82
|
+
Going stale is a problem to fix.
|
|
120
83
|
|
|
121
|
-
**
|
|
122
|
-
|
|
123
|
-
```json
|
|
124
|
-
{ "method": "tools/call", "params": {
|
|
125
|
-
"name": "vault_search",
|
|
126
|
-
"arguments": { "query": "consumption pricing", "limit": 1 } } }
|
|
127
|
-
```
|
|
84
|
+
**Generative** documents speculate. A moonshot sketch, a brainstorm, a “what
|
|
85
|
+
if.” Going stale is expected, not a defect.
|
|
128
86
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
{
|
|
133
|
-
"query": "consumption pricing",
|
|
134
|
-
"count": 1,
|
|
135
|
-
"vectorUsed": true,
|
|
136
|
-
"weights": { "bm25": 0.5, "vector": 0.5 },
|
|
137
|
-
"hits": [
|
|
138
|
-
{
|
|
139
|
-
"path": "pricing/helios-consumption-pricing.md",
|
|
140
|
-
"title": "Helios Consumption Pricing (Compute Credit Model)",
|
|
141
|
-
"collection": "pricing", "status": "canonical",
|
|
142
|
-
"score": 1, "bm25Score": 1, "vectorScore": 1,
|
|
143
|
-
"snippet": "# Helios Consumption Pricing (Compute Credit Model) Helios is a fictional platform…",
|
|
144
|
-
"decay": null
|
|
145
|
-
}
|
|
146
|
-
]
|
|
147
|
-
}
|
|
148
|
-
```
|
|
87
|
+
The same curation rules applied uniformly would either nag about every
|
|
88
|
+
brainstorm or quietly trust every stale fact. The domain split lets the system
|
|
89
|
+
hold each to the right standard.
|
|
149
90
|
|
|
150
|
-
|
|
91
|
+
## Access control
|
|
151
92
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
`vault_search` is **hybrid**: a BM25 lexical score and a vector (semantic)
|
|
155
|
-
score, blended with tunable weights. Both halves are SQL-native — they
|
|
156
|
-
run inside SQLite, not in JavaScript.
|
|
157
|
-
|
|
158
|
-
- **Lexical half.** An FTS5 virtual table (`documents_fts`) over title,
|
|
159
|
-
tags, and body. SQLite's built-in BM25 ranks every MATCH'd row.
|
|
160
|
-
Triggers on the regular `documents` table keep the FTS index in sync
|
|
161
|
-
on every write, so the indexer never touches the virtual table
|
|
162
|
-
directly. Free-text queries are tokenised, stopword-filtered, and
|
|
163
|
-
prefix-OR'd (`cirrus pricing` becomes `cirrus* OR pricing*`) so a
|
|
164
|
-
partial-keystroke or stem variation still matches.
|
|
165
|
-
|
|
166
|
-
- **Vector half.** A sqlite-vec `vec0` virtual table
|
|
167
|
-
(`embeddings_vec`), sized at the active provider's dim and indexed for
|
|
168
|
-
KNN cosine queries. The durable `embeddings` cache (one row per
|
|
169
|
-
`(content_hash, model)`) is the source of truth; `embeddings_vec`
|
|
170
|
-
mirrors it for query-time access. Switching embedding providers
|
|
171
|
-
triggers a drop-and-rebuild of the vec table at the new dim — the
|
|
172
|
-
durable cache survives, so switching back is all cache hits.
|
|
173
|
-
|
|
174
|
-
**Prerequisite.** sqlite-vec is a loadable SQLite extension. The
|
|
175
|
-
`sqlite-vec` npm package ships pre-built binaries for darwin / linux /
|
|
176
|
-
windows on x64 and arm64; `better-sqlite3`'s npm prebuilt enables
|
|
177
|
-
extension loading by default. In the common case `npm install` is the
|
|
178
|
-
only setup step. If a custom `better-sqlite3` build with extension
|
|
179
|
-
loading disabled is in use, Daftari refuses to start with an actionable
|
|
180
|
-
error: `npm rebuild better-sqlite3 --build-from-source`.
|
|
181
|
-
|
|
182
|
-
The vector half is worth being explicit about, because a local-first
|
|
183
|
-
tool should never leave you guessing whether a query leaves your
|
|
184
|
-
machine.
|
|
185
|
-
|
|
186
|
-
### Embedding providers
|
|
187
|
-
|
|
188
|
-
Daftari ships with two embedding backends. Pick one in
|
|
189
|
-
`.daftari/config.yaml`:
|
|
93
|
+
No user-management system. Roles live in config, the server starts with one:
|
|
190
94
|
|
|
191
95
|
```yaml
|
|
192
|
-
embeddings:
|
|
193
|
-
provider: local-minilm # default. Other values: openai-3-small.
|
|
194
|
-
```
|
|
195
|
-
|
|
196
|
-
- **`local-minilm`** (default). `all-MiniLM-L6-v2` (the
|
|
197
|
-
`Xenova/all-MiniLM-L6-v2` build), a 384-dimension sentence-transformer.
|
|
198
|
-
Runs entirely **local**: embeddings are computed in-process by
|
|
199
|
-
[`@huggingface/transformers`](https://www.npmjs.com/package/@huggingface/transformers)
|
|
200
|
-
(Transformers.js). No external embedding API — nothing is sent to
|
|
201
|
-
Hugging Face, OpenAI, or anyone else at index or query time. Just
|
|
202
|
-
`npm install` — no Python, no API key. The **first** reindex downloads
|
|
203
|
-
the model weights (~25 MB) from the Hugging Face hub and caches them on
|
|
204
|
-
disk; every run after that is fully offline. Slow on cold start
|
|
205
|
-
(~25 min CPU on a 44k-chunk vault), but free.
|
|
206
|
-
|
|
207
|
-
- **`openai-3-small`**. OpenAI's `text-embedding-3-small`, a 1536-dimension
|
|
208
|
-
hosted embedding. **Sends chunk text to OpenAI** at reindex time —
|
|
209
|
-
enable this only if you're comfortable with that. Requires
|
|
210
|
-
`OPENAI_API_KEY` in the server's environment (it is never read from
|
|
211
|
-
config files). ~10x faster than `local-minilm` on large vaults; on the
|
|
212
|
-
44k-chunk benchmark above, ~2 minutes and ~$0.10. Because Daftari's
|
|
213
|
-
embedding cache is content-addressed by `(content_hash, model)`, the
|
|
214
|
-
paid cost is a **one-time event per chunk text** — re-running
|
|
215
|
-
`vault_reindex` on an unchanged vault embeds zero new chunks. Switching
|
|
216
|
-
providers between server runs is safe: the cache keeps both providers'
|
|
217
|
-
rows, so switching back to the other later re-uses what was previously
|
|
218
|
-
embedded.
|
|
219
|
-
|
|
220
|
-
- **Graceful degradation.** Whichever provider is active, if it cannot
|
|
221
|
-
reach the model (no network on the very first `local-minilm` run, before
|
|
222
|
-
the weights are cached; or OpenAI unreachable), `vault_reindex` still
|
|
223
|
-
builds the FTS5 lexical index. The vector column is left empty,
|
|
224
|
-
`vectorUsed` reports `false`, and search transparently falls back to
|
|
225
|
-
lexical-only ranking.
|
|
226
|
-
|
|
227
|
-
- **Quality tradeoff.** MiniLM is small and fast, which keeps Daftari
|
|
228
|
-
dependency-light and snappy, but its recall/precision is below larger
|
|
229
|
-
hosted embedding models. `openai-3-small` is the obvious next step.
|
|
230
|
-
Pairing either with FTS5 BM25 covers the common case where a small
|
|
231
|
-
model misses an exact-term match.
|
|
232
|
-
|
|
233
|
-
---
|
|
234
|
-
|
|
235
|
-
## RBAC
|
|
236
|
-
|
|
237
|
-
Access is config-driven. There is no user-management system — roles and their
|
|
238
|
-
per-collection permissions live in `.daftari/config.yaml`, and the server is
|
|
239
|
-
started with `--role <name>` to select one:
|
|
240
|
-
|
|
241
|
-
```yaml
|
|
242
|
-
version: 1
|
|
243
|
-
vault_name: my-vault
|
|
244
|
-
|
|
245
96
|
roles:
|
|
246
97
|
analyst:
|
|
247
98
|
read: [competitive-intel, pricing]
|
|
248
99
|
write: [competitive-intel, _drafts]
|
|
249
100
|
researcher:
|
|
250
|
-
read: ["*"]
|
|
101
|
+
read: ["*"]
|
|
251
102
|
write: [moonshot, _drafts]
|
|
252
103
|
admin:
|
|
253
104
|
read: ["*"]
|
|
254
105
|
write: ["*"]
|
|
255
|
-
promote: true
|
|
106
|
+
promote: true
|
|
256
107
|
```
|
|
257
108
|
|
|
258
|
-
|
|
259
|
-
- `write` — collections the role may create, append to, or deprecate in
|
|
260
|
-
- `promote` — whether the role may promote a draft to canonical (default `false`)
|
|
261
|
-
|
|
262
|
-
Starting the server with no `--role`, or with a name not in the config, falls
|
|
263
|
-
back to a deny-all **guest**: every tool is denied.
|
|
264
|
-
|
|
265
|
-
---
|
|
109
|
+
No `--role` or an unknown name falls back to deny-all.
|
|
266
110
|
|
|
267
111
|
## File format
|
|
268
112
|
|
|
269
|
-
|
|
270
|
-
|
|
113
|
+
Markdown with YAML frontmatter. Frontmatter is the metadata layer; there is no
|
|
114
|
+
separate database.
|
|
271
115
|
|
|
272
|
-
```
|
|
116
|
+
```yaml
|
|
273
117
|
---
|
|
274
118
|
title: "Aurora Pipelines — Positioning Overview"
|
|
275
119
|
domain: accumulation
|
|
@@ -279,231 +123,64 @@ confidence: medium
|
|
|
279
123
|
created: 2026-05-17
|
|
280
124
|
updated: 2026-05-17
|
|
281
125
|
updated_by: agent:claude-code
|
|
282
|
-
provenance:
|
|
126
|
+
provenance: synthesized
|
|
283
127
|
sources:
|
|
284
128
|
- aurora-product-page
|
|
285
|
-
superseded_by: null
|
|
286
129
|
ttl_days: 120
|
|
287
130
|
tags: [aurora, ingestion, competitive]
|
|
288
131
|
questions_answered:
|
|
289
|
-
- "How does Aurora frame the ingestion
|
|
132
|
+
- "How does Aurora frame the ingestion/transformation boundary?"
|
|
290
133
|
questions_raised:
|
|
291
134
|
- "Does an authored-pipeline model slow teams down at small scale?"
|
|
292
135
|
---
|
|
293
|
-
|
|
294
|
-
# Aurora Pipelines — Positioning Overview
|
|
295
|
-
|
|
296
|
-
Aurora Pipelines treats ingestion as an authored, version-controlled artifact
|
|
297
|
-
rather than a managed black box.
|
|
298
|
-
|
|
299
|
-
## Questions Answered
|
|
300
|
-
- How does Aurora frame the ingestion-vs-transformation boundary?
|
|
301
|
-
|
|
302
|
-
## Questions Raised
|
|
303
|
-
- Does an authored-pipeline model slow teams down at small scale?
|
|
304
136
|
```
|
|
305
137
|
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
The matching `## Questions Answered` / `## Questions Raised` body sections are
|
|
310
|
-
an optional human-readable mirror. Full field reference in
|
|
311
|
-
[docs/file-format.md](docs/file-format.md).
|
|
138
|
+
Documents can make their epistemic edges explicit: `questions_answered` is what
|
|
139
|
+
later agents can take as settled, `questions_raised` is where to build next.
|
|
140
|
+
`vault_lint` turns the open questions across the vault into a coverage map.
|
|
312
141
|
|
|
313
|
-
|
|
314
|
-
domain-specific fields, add a `schema_extensions` block to
|
|
315
|
-
`.daftari/config.yaml` — typed extension fields that participate in validation
|
|
316
|
-
and serialize in a stable order, with no core schema change. See
|
|
317
|
-
[docs/schema-extensions.md](docs/schema-extensions.md).
|
|
142
|
+
Full field reference in <docs/file-format.md>.
|
|
318
143
|
|
|
319
|
-
|
|
144
|
+
## How it compares
|
|
320
145
|
|
|
321
|
-
|
|
146
|
+
| |AGENTS.md |RAG |Daftari |
|
|
147
|
+
|--------------------|-----------------|-----------------------------|-------------------------------------|
|
|
148
|
+
|Who writes? |Humans |Nobody (retrieval only) |Agents + humans |
|
|
149
|
+
|Scales? |One file, doesn’t|Scales storage, not coherence|Structured collections with lifecycle|
|
|
150
|
+
|Knowledge compounds?|No |No |Yes, draft → canonical → deprecated |
|
|
151
|
+
|Contradictions? |Invisible |Invisible |Tension log surfaces them |
|
|
152
|
+
|Staleness? |Silent |Silent |TTL-based decay with advisory lint |
|
|
322
153
|
|
|
323
|
-
|
|
324
|
-
this vault. They let an organisation enforce conventions the built-in
|
|
325
|
-
frontmatter validator does not know about — naming rules, status-transition
|
|
326
|
-
guards, business-specific cross-field invariants, refusal lists — without
|
|
327
|
-
forking daftari or wrapping the MCP server. A hook is a plain ES module that
|
|
328
|
-
exports a default function and returns a list of `ValidationIssue` objects.
|
|
329
|
-
Any issue blocks the write, exactly the way a built-in schema violation does.
|
|
154
|
+
## What’s not in v1
|
|
330
155
|
|
|
331
|
-
|
|
156
|
+
Deliberately deferred to keep the surface tight:
|
|
332
157
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
158
|
+
- **Cloud-hosted multi-tenant server** with S3/GCS backend and token auth
|
|
159
|
+
- **Remote MCP transport** for claude.ai web, mobile, and Cowork (v1 is a local desktop extension for Claude Desktop and Claude Code)
|
|
160
|
+
- **Conflict resolution beyond file-level locks** (CRDTs, semantic merge)
|
|
161
|
+
- **Background curation agent** running lint on a cadence
|
|
162
|
+
- **LLM reranking** of search results
|
|
163
|
+
- **Enforced domain separation** (v1 documents the convention; v2 enforces it)
|
|
339
164
|
|
|
340
|
-
|
|
341
|
-
runs on every write, even if an earlier hook produced issues — the caller
|
|
342
|
-
gets one consolidated list back, not the first failure.
|
|
343
|
-
|
|
344
|
-
A hook looks like this:
|
|
345
|
-
|
|
346
|
-
```ts
|
|
347
|
-
// .daftari/hooks/forbid-status-skip.mjs
|
|
348
|
-
//
|
|
349
|
-
// ValidationIssue = { field: string; message: string }
|
|
350
|
-
// context = { path: string; operation: 'create' | 'update' | 'append' }
|
|
351
|
-
export default function forbidStatusSkip(frontmatter, context) {
|
|
352
|
-
if (context.operation !== "update") return [];
|
|
353
|
-
if (frontmatter.status === "canonical" && frontmatter.previous_status === "draft") {
|
|
354
|
-
return [
|
|
355
|
-
{
|
|
356
|
-
field: "status",
|
|
357
|
-
message: "draft → canonical is not allowed; promote via the dedicated tool",
|
|
358
|
-
},
|
|
359
|
-
];
|
|
360
|
-
}
|
|
361
|
-
return [];
|
|
362
|
-
}
|
|
363
|
-
```
|
|
364
|
-
|
|
365
|
-
A hook is called with the already-stamped frontmatter the write is about to
|
|
366
|
-
land (so `updated` and `updated_by` reflect this call, not the previous
|
|
367
|
-
version on disk). The hook **must not mutate its inputs**; v1 is
|
|
368
|
-
validate-only. Returning a non-array, or an array containing malformed issue
|
|
369
|
-
objects, is itself reported as a blocking issue tagged with the hook path —
|
|
370
|
-
hook bugs surface as loud failures, not silent passes.
|
|
371
|
-
|
|
372
|
-
### Transform hooks
|
|
373
|
-
|
|
374
|
-
A `pre_write` hook can observe and reject, but it cannot *change* the
|
|
375
|
-
frontmatter a write lands. **Transform hooks** can. A transform hook runs in an
|
|
376
|
-
earlier phase — before built-in schema validation — so it can derive or
|
|
377
|
-
override frontmatter fields the validator would otherwise reject as missing.
|
|
378
|
-
|
|
379
|
-
Transform hooks are declared under their own key, `pre_write_transform`:
|
|
380
|
-
|
|
381
|
-
```yaml
|
|
382
|
-
hooks:
|
|
383
|
-
pre_write_transform:
|
|
384
|
-
- path: .daftari/hooks/derive-status.mjs
|
|
385
|
-
pre_write:
|
|
386
|
-
- path: .daftari/hooks/forbid-status-skip.mjs
|
|
387
|
-
```
|
|
388
|
-
|
|
389
|
-
The phase order is fixed regardless of how the config lists the blocks: every
|
|
390
|
-
`pre_write_transform` hook runs (in declared order), then built-in schema
|
|
391
|
-
validation, then every `pre_write` validator (in declared order). A transform
|
|
392
|
-
always runs before any validator sees the frontmatter.
|
|
393
|
-
|
|
394
|
-
A transform hook returns a `Partial<Frontmatter>` patch — *not* a list of
|
|
395
|
-
issues:
|
|
396
|
-
|
|
397
|
-
```ts
|
|
398
|
-
// .daftari/hooks/derive-status.mjs
|
|
399
|
-
//
|
|
400
|
-
// context = { path: string; operation: 'create' | 'update' | 'append' }
|
|
401
|
-
export default function deriveStatus(frontmatter, context) {
|
|
402
|
-
if (frontmatter.decision_status === "ACTIVE") {
|
|
403
|
-
return { status: "canonical" };
|
|
404
|
-
}
|
|
405
|
-
return {}; // no change
|
|
406
|
-
}
|
|
407
|
-
```
|
|
408
|
-
|
|
409
|
-
The runner merges each patch into the candidate frontmatter **`Object.assign`
|
|
410
|
-
style**: shallow, last-writer-wins. A key present in the patch replaces the
|
|
411
|
-
existing value outright — arrays are replaced whole, never appended to or
|
|
412
|
-
merged element-wise. When two transforms target the same field, the
|
|
413
|
-
later-declared one wins. Each transform sees the merged output of every
|
|
414
|
-
transform declared before it.
|
|
415
|
-
|
|
416
|
-
A transform **refuses by throwing** — it does not return issues. A throw
|
|
417
|
-
becomes a synthetic blocking issue tagged with the hook path, identical to the
|
|
418
|
-
`pre_write` throw mechanism. Returning anything that is not an object (an
|
|
419
|
-
array, a primitive, `null`) is likewise a blocking issue.
|
|
420
|
-
|
|
421
|
-
Because transforms run before validation, a transform that sets an invalid
|
|
422
|
-
value — a `status` outside the allowed set, say — is caught by the built-in
|
|
423
|
-
validator exactly as a bad user-supplied value would be.
|
|
424
|
-
|
|
425
|
-
### Trust model
|
|
426
|
-
|
|
427
|
-
Hooks are **trusted code**. They run in the same Node process as the daftari
|
|
428
|
-
server, with the same filesystem and network access. v1 does no sandboxing,
|
|
429
|
-
no permission prompts, no signature checking — the vault owner is responsible
|
|
430
|
-
for the contents of `.daftari/hooks/`. Treat hook files the way you would
|
|
431
|
-
treat `package.json` scripts or git hooks: review every change, never run a
|
|
432
|
-
vault you don't trust, and pin hook code in source control next to the
|
|
433
|
-
config that loads it. If you need stronger isolation than that, don't
|
|
434
|
-
register hooks in v1.
|
|
435
|
-
|
|
436
|
-
### Scope and limits in v1
|
|
437
|
-
|
|
438
|
-
- **Surfaces:** `pre_write` (validators) and `pre_write_transform`
|
|
439
|
-
(field-deriving transforms). Future surfaces (`pre_read`, `post_write`,
|
|
440
|
-
etc.) are reserved — unrecognised keys under `hooks:` are a loud config
|
|
441
|
-
error, not a silent skip.
|
|
442
|
-
- **Operations:** both hook surfaces fire for `vault_write` (create + update)
|
|
443
|
-
and `vault_append`. `vault_promote` and `vault_deprecate` deliberately
|
|
444
|
-
bypass hooks — they're narrow metadata mutations the server controls
|
|
445
|
-
end-to-end.
|
|
446
|
-
- **Two phases:** a `pre_write` hook returns a list of issues and can only
|
|
447
|
-
reject. A `pre_write_transform` hook returns a `Partial<Frontmatter>` patch
|
|
448
|
-
and can derive or override fields before validation — see "Transform
|
|
449
|
-
hooks" above.
|
|
450
|
-
- **Sync:** hook bodies are synchronous functions. The loader is async
|
|
451
|
-
(it has to dynamic-import the module), but each individual hook call is
|
|
452
|
-
not awaited.
|
|
453
|
-
- **No caching across calls:** hooks are re-imported per write; expect to
|
|
454
|
-
pay one ESM dynamic-import per declared hook per call. The next iteration
|
|
455
|
-
may cache. Edits to a hook file are picked up on the next write — no
|
|
456
|
-
server restart required.
|
|
457
|
-
|
|
458
|
-
See [issue #29](https://github.com/mavaali/daftari/issues/29) for the design
|
|
459
|
-
rationale and the alternatives that were rejected.
|
|
460
|
-
|
|
461
|
-
---
|
|
462
|
-
|
|
463
|
-
## What's not in v1
|
|
464
|
-
|
|
465
|
-
A few capabilities were deliberately deferred so v1 ships with a tight,
|
|
466
|
-
defensible surface — a server that does its core job well rather than a wide
|
|
467
|
-
one that does many jobs partially. Not in this release:
|
|
468
|
-
|
|
469
|
-
- **Self-hosted server mode** — a long-lived HTTP/SSE server multiple clients
|
|
470
|
-
connect to, with pluggable cloud-storage backends (ADLS, S3, GCS) and OAuth
|
|
471
|
-
authentication. Self-hosted by the operator, *not* a managed service. v1 runs
|
|
472
|
-
against a local filesystem as a single stdio process.
|
|
473
|
-
- **LLM reranking of search results** — a model pass over the BM25 + vector
|
|
474
|
-
candidate set. v1 ships hybrid ranking without a rerank stage.
|
|
475
|
-
- **Enforced domain separation** — v1 *documents* the convention that
|
|
476
|
-
generative-domain documents are not cross-referenced into accumulation pages;
|
|
477
|
-
the write tools do not yet enforce it. v2 will.
|
|
478
|
-
|
|
479
|
-
Each of these is a clean increment on top of a surface that already works —
|
|
480
|
-
deliberately deferred, not forgotten.
|
|
481
|
-
|
|
482
|
-
---
|
|
483
|
-
|
|
484
|
-
## Documentation
|
|
485
|
-
|
|
486
|
-
- [docs/getting-started.md](docs/getting-started.md) — end-to-end walkthrough: scaffold, write, search, lint, promote, deprecate, and connect from Claude Desktop.
|
|
487
|
-
- [docs/worked-example.md](docs/worked-example.md) — the compilation thesis shown, not argued: one document maturing across three agent writes, contrasted with RAG.
|
|
488
|
-
- [docs/architecture.md](docs/architecture.md) — the layered architecture, the request path, and the accumulation-vs-generative domain split.
|
|
489
|
-
- [docs/curation-workflow.md](docs/curation-workflow.md) — the reference curation loop: how an agent acts on `vault_lint` output instead of letting it pile up.
|
|
490
|
-
- [docs/file-format.md](docs/file-format.md) — the complete frontmatter reference and markdown body conventions.
|
|
491
|
-
- [docs/schema-extensions.md](docs/schema-extensions.md) — declaring typed, vault-specific frontmatter fields with a `schema_extensions` config block.
|
|
492
|
-
|
|
493
|
-
---
|
|
165
|
+
Each is a clean increment on a surface that already works.
|
|
494
166
|
|
|
495
167
|
## Development
|
|
496
168
|
|
|
497
|
-
```
|
|
169
|
+
```
|
|
498
170
|
npm install
|
|
499
|
-
npm run build
|
|
500
|
-
npm test
|
|
501
|
-
npm run dev # run the server in watch mode against the sample vault
|
|
171
|
+
npm run build
|
|
172
|
+
npm test
|
|
502
173
|
```
|
|
503
174
|
|
|
504
175
|
Design tenets: functions and types, no classes; tool handlers return
|
|
505
176
|
`Result<T, Error>` rather than throwing; tests mirror the `src/` structure.
|
|
506
177
|
|
|
178
|
+
## Documentation
|
|
179
|
+
|
|
180
|
+
- <docs/getting-started.md> — scaffold, write, search, lint, promote, deprecate
|
|
181
|
+
- <docs/architecture.md> — layered design, request path, accumulation vs. generative domains
|
|
182
|
+
- <docs/file-format.md> — complete frontmatter reference
|
|
183
|
+
|
|
507
184
|
## License
|
|
508
185
|
|
|
509
|
-
MIT.
|
|
186
|
+
MIT.
|
package/dist/cli.js
CHANGED
|
File without changes
|
package/dist/server.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AAEnE,OAAO,EAAE,KAAK,aAAa,EAAe,MAAM,kBAAkB,CAAC;
|
|
1
|
+
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":"AAQA,OAAO,EAAE,MAAM,EAAE,MAAM,2CAA2C,CAAC;AAEnE,OAAO,EAAE,KAAK,aAAa,EAAe,MAAM,kBAAkB,CAAC;AAOnE,eAAO,MAAM,WAAW,YAAY,CAAC;AAQrC,eAAO,MAAM,cAAc,QAAmB,CAAC;AAK/C,wBAAgB,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,MAAM,GAAE,aAA6B,GAAG,MAAM,CA6D7F"}
|
package/dist/server.js
CHANGED
|
@@ -11,6 +11,7 @@ import { guestAccess } from "./access/rbac.js";
|
|
|
11
11
|
import { curationTools } from "./tools/curation.js";
|
|
12
12
|
import { readTools } from "./tools/read.js";
|
|
13
13
|
import { searchTools } from "./tools/search.js";
|
|
14
|
+
import { themesTools } from "./tools/themes.js";
|
|
14
15
|
import { writeTools } from "./tools/write.js";
|
|
15
16
|
export const SERVER_NAME = "daftari";
|
|
16
17
|
// The version is read from the package manifest so it never drifts from the
|
|
@@ -23,13 +24,21 @@ export const SERVER_VERSION = manifest.version;
|
|
|
23
24
|
// Absent an explicit context the server falls back to the deny-all guest.
|
|
24
25
|
export function createServer(vaultRoot, access = guestAccess()) {
|
|
25
26
|
const server = new Server({ name: SERVER_NAME, version: SERVER_VERSION }, { capabilities: { tools: {} } });
|
|
26
|
-
const tools = [
|
|
27
|
+
const tools = [
|
|
28
|
+
...readTools,
|
|
29
|
+
...searchTools,
|
|
30
|
+
...themesTools,
|
|
31
|
+
...writeTools,
|
|
32
|
+
...curationTools,
|
|
33
|
+
];
|
|
27
34
|
const byName = new Map(tools.map((t) => [t.name, t]));
|
|
28
35
|
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
29
36
|
tools: tools.map((t) => ({
|
|
30
37
|
name: t.name,
|
|
38
|
+
...(t.title ? { title: t.title } : {}),
|
|
31
39
|
description: t.description,
|
|
32
40
|
inputSchema: t.inputSchema,
|
|
41
|
+
...(t.annotations ? { annotations: t.annotations } : {}),
|
|
33
42
|
})),
|
|
34
43
|
}));
|
|
35
44
|
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|