@openparachute/vault 0.4.8 → 0.4.9-rc.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/core/src/hooks.test.ts +320 -1
- package/core/src/hooks.ts +243 -38
- package/core/src/mcp.ts +35 -0
- package/core/src/portable-md.test.ts +252 -1
- package/core/src/portable-md.ts +370 -2
- package/core/src/schema.ts +51 -2
- package/core/src/store.ts +68 -2
- package/package.json +1 -1
- package/src/auth.ts +29 -1
- package/src/auto-transcribe.test.ts +7 -2
- package/src/auto-transcribe.ts +6 -2
- package/src/export-watch.test.ts +74 -0
- package/src/export-watch.ts +108 -7
- package/src/github-device-flow.test.ts +404 -0
- package/src/github-device-flow.ts +415 -0
- package/src/mcp-http.ts +24 -36
- package/src/mcp-tools.ts +286 -2
- package/src/mirror-config.test.ts +184 -14
- package/src/mirror-config.ts +220 -24
- package/src/mirror-credentials.test.ts +450 -0
- package/src/mirror-credentials.ts +577 -0
- package/src/mirror-deps.ts +42 -1
- package/src/mirror-import.test.ts +550 -0
- package/src/mirror-import.ts +484 -0
- package/src/mirror-manager.test.ts +423 -12
- package/src/mirror-manager.ts +579 -62
- package/src/mirror-routes.test.ts +966 -10
- package/src/mirror-routes.ts +1096 -5
- package/src/module-config.ts +11 -5
- package/src/routing.test.ts +92 -1
- package/src/routing.ts +165 -1
- package/src/server.ts +21 -8
- package/src/token-store.ts +158 -5
- package/src/transcription-worker.ts +9 -4
- package/src/triggers.ts +16 -3
- package/src/vault.test.ts +380 -1
- package/web/ui/dist/assets/{index-BOa-JJtV.css → index-DBe8Xiah.css} +1 -1
- package/web/ui/dist/assets/index-DE18QJMx.js +60 -0
- package/web/ui/dist/index.html +2 -2
- package/web/ui/dist/assets/index-BzA5LgE3.js +0 -60
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
|
|
20
20
|
import { describe, it, expect, beforeEach } from "bun:test";
|
|
21
21
|
import { Database } from "bun:sqlite";
|
|
22
|
-
import { mkdirSync, readFileSync, readdirSync, rmSync, writeFileSync, existsSync, statSync } from "fs";
|
|
22
|
+
import { mkdirSync, readFileSync, readdirSync, rmSync, symlinkSync, writeFileSync, existsSync, statSync } from "fs";
|
|
23
23
|
import { join } from "path";
|
|
24
24
|
import { tmpdir } from "os";
|
|
25
25
|
|
|
@@ -33,6 +33,7 @@ import {
|
|
|
33
33
|
parseFrontmatter,
|
|
34
34
|
portableExportFilePath,
|
|
35
35
|
probeCaseSensitive,
|
|
36
|
+
pruneOrphans,
|
|
36
37
|
SIDECAR_DIR,
|
|
37
38
|
NOTES_META_DIR,
|
|
38
39
|
supportsInlineFrontmatter,
|
|
@@ -1799,3 +1800,253 @@ describe("case-collision detection (vault#327)", async () => {
|
|
|
1799
1800
|
expect(stats.disambiguated_paths).toHaveLength(1);
|
|
1800
1801
|
});
|
|
1801
1802
|
});
|
|
1803
|
+
|
|
1804
|
+
// ---------------------------------------------------------------------------
|
|
1805
|
+
// pruneOrphans (vault#382 — event-driven mirror delete propagation)
|
|
1806
|
+
// ---------------------------------------------------------------------------
|
|
1807
|
+
|
|
1808
|
+
describe("pruneOrphans", async () => {
|
|
1809
|
+
let tmpBase: string;
|
|
1810
|
+
beforeEach(() => {
|
|
1811
|
+
tmpBase = join(tmpdir(), `parachute-prune-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`);
|
|
1812
|
+
mkdirSync(tmpBase, { recursive: true });
|
|
1813
|
+
});
|
|
1814
|
+
|
|
1815
|
+
it("no-op on non-existent directory", () => {
|
|
1816
|
+
const stats = pruneOrphans({
|
|
1817
|
+
outDir: join(tmpBase, "doesnt-exist"),
|
|
1818
|
+
validNoteIds: new Set(),
|
|
1819
|
+
validTagNames: new Set(),
|
|
1820
|
+
validAttachmentIds: new Set(),
|
|
1821
|
+
});
|
|
1822
|
+
expect(stats.notes_removed).toBe(0);
|
|
1823
|
+
expect(stats.unparseable_files).toHaveLength(0);
|
|
1824
|
+
});
|
|
1825
|
+
|
|
1826
|
+
it("removes orphaned note .md file", async () => {
|
|
1827
|
+
const outDir = join(tmpBase, "orphan-note");
|
|
1828
|
+
// First do a real export so the structure is realistic.
|
|
1829
|
+
const db = new Database(":memory:");
|
|
1830
|
+
const store = new SqliteStore(db);
|
|
1831
|
+
await store.createNote("alive", { id: "01HFAA", path: "alive" });
|
|
1832
|
+
await store.createNote("doomed", { id: "01HFBB", path: "doomed" });
|
|
1833
|
+
await exportVaultToDir(store, { outDir, vaultName: "t", exportedAt: "2026-01-01T00:00:00.000Z" });
|
|
1834
|
+
expect(existsSync(join(outDir, "alive.md"))).toBe(true);
|
|
1835
|
+
expect(existsSync(join(outDir, "doomed.md"))).toBe(true);
|
|
1836
|
+
|
|
1837
|
+
// Now prune with only "alive" in the valid set.
|
|
1838
|
+
const stats = pruneOrphans({
|
|
1839
|
+
outDir,
|
|
1840
|
+
validNoteIds: new Set(["01HFAA"]),
|
|
1841
|
+
validTagNames: new Set(),
|
|
1842
|
+
validAttachmentIds: new Set(),
|
|
1843
|
+
});
|
|
1844
|
+
expect(stats.notes_removed).toBe(1);
|
|
1845
|
+
expect(existsSync(join(outDir, "alive.md"))).toBe(true);
|
|
1846
|
+
expect(existsSync(join(outDir, "doomed.md"))).toBe(false);
|
|
1847
|
+
});
|
|
1848
|
+
|
|
1849
|
+
it("removes orphaned schema sidecar", async () => {
|
|
1850
|
+
const outDir = join(tmpBase, "orphan-schema");
|
|
1851
|
+
const db = new Database(":memory:");
|
|
1852
|
+
const store = new SqliteStore(db);
|
|
1853
|
+
await store.upsertTagRecord("alive-tag", { description: "stays" });
|
|
1854
|
+
await store.upsertTagRecord("doomed-tag", { description: "goes" });
|
|
1855
|
+
await exportVaultToDir(store, { outDir, vaultName: "t", exportedAt: "2026-01-01T00:00:00.000Z" });
|
|
1856
|
+
const schemasDir = join(outDir, SIDECAR_DIR, "schemas");
|
|
1857
|
+
expect(existsSync(join(schemasDir, "alive-tag.yaml"))).toBe(true);
|
|
1858
|
+
expect(existsSync(join(schemasDir, "doomed-tag.yaml"))).toBe(true);
|
|
1859
|
+
|
|
1860
|
+
const stats = pruneOrphans({
|
|
1861
|
+
outDir,
|
|
1862
|
+
validNoteIds: new Set(),
|
|
1863
|
+
validTagNames: new Set(["alive-tag"]),
|
|
1864
|
+
validAttachmentIds: new Set(),
|
|
1865
|
+
});
|
|
1866
|
+
expect(stats.schemas_removed).toBe(1);
|
|
1867
|
+
expect(existsSync(join(schemasDir, "alive-tag.yaml"))).toBe(true);
|
|
1868
|
+
expect(existsSync(join(schemasDir, "doomed-tag.yaml"))).toBe(false);
|
|
1869
|
+
});
|
|
1870
|
+
|
|
1871
|
+
it("removes orphaned attachment directories", async () => {
|
|
1872
|
+
const outDir = join(tmpBase, "orphan-att");
|
|
1873
|
+
// Build the export structure by hand (attachment binaries need
|
|
1874
|
+
// assetsDir wiring; cheaper to just create the dirs).
|
|
1875
|
+
const attachmentsDir = join(outDir, SIDECAR_DIR, "attachments");
|
|
1876
|
+
mkdirSync(attachmentsDir, { recursive: true });
|
|
1877
|
+
mkdirSync(join(attachmentsDir, "att-alive"), { recursive: true });
|
|
1878
|
+
writeFileSync(join(attachmentsDir, "att-alive", "voice.m4a"), "");
|
|
1879
|
+
mkdirSync(join(attachmentsDir, "att-doomed"), { recursive: true });
|
|
1880
|
+
writeFileSync(join(attachmentsDir, "att-doomed", "voice.m4a"), "");
|
|
1881
|
+
// Need .parachute/vault.yaml so the structure is recognized (cheap to fake)
|
|
1882
|
+
writeFileSync(join(outDir, SIDECAR_DIR, "vault.yaml"), "name: t\n");
|
|
1883
|
+
|
|
1884
|
+
const stats = pruneOrphans({
|
|
1885
|
+
outDir,
|
|
1886
|
+
validNoteIds: new Set(),
|
|
1887
|
+
validTagNames: new Set(),
|
|
1888
|
+
validAttachmentIds: new Set(["att-alive"]),
|
|
1889
|
+
});
|
|
1890
|
+
expect(stats.attachment_dirs_removed).toBe(1);
|
|
1891
|
+
expect(existsSync(join(attachmentsDir, "att-alive"))).toBe(true);
|
|
1892
|
+
expect(existsSync(join(attachmentsDir, "att-doomed"))).toBe(false);
|
|
1893
|
+
});
|
|
1894
|
+
|
|
1895
|
+
it("skips unparseable .md files without crashing", async () => {
|
|
1896
|
+
const outDir = join(tmpBase, "unparseable");
|
|
1897
|
+
mkdirSync(outDir, { recursive: true });
|
|
1898
|
+
writeFileSync(join(outDir, "no-frontmatter.md"), "just content, no frontmatter\n");
|
|
1899
|
+
writeFileSync(join(outDir, "garbage.md"), "---\nnot-real-yaml\n");
|
|
1900
|
+
const stats = pruneOrphans({
|
|
1901
|
+
outDir,
|
|
1902
|
+
validNoteIds: new Set(),
|
|
1903
|
+
validTagNames: new Set(),
|
|
1904
|
+
validAttachmentIds: new Set(),
|
|
1905
|
+
});
|
|
1906
|
+
// Both files lacked an `id`, so we record them but don't remove.
|
|
1907
|
+
expect(stats.notes_removed).toBe(0);
|
|
1908
|
+
expect(stats.unparseable_files.length).toBeGreaterThanOrEqual(2);
|
|
1909
|
+
expect(existsSync(join(outDir, "no-frontmatter.md"))).toBe(true);
|
|
1910
|
+
expect(existsSync(join(outDir, "garbage.md"))).toBe(true);
|
|
1911
|
+
});
|
|
1912
|
+
|
|
1913
|
+
it("preserves all files when everything is in the valid sets", async () => {
|
|
1914
|
+
const outDir = join(tmpBase, "happy-path");
|
|
1915
|
+
const db = new Database(":memory:");
|
|
1916
|
+
const store = new SqliteStore(db);
|
|
1917
|
+
const a = await store.createNote("a", { path: "a" });
|
|
1918
|
+
const b = await store.createNote("b", { path: "b" });
|
|
1919
|
+
await store.upsertTagRecord("tag1", { description: "x" });
|
|
1920
|
+
await exportVaultToDir(store, { outDir, vaultName: "t", exportedAt: "2026-01-01T00:00:00.000Z" });
|
|
1921
|
+
const stats = pruneOrphans({
|
|
1922
|
+
outDir,
|
|
1923
|
+
validNoteIds: new Set([a.id, b.id]),
|
|
1924
|
+
validTagNames: new Set(["tag1"]),
|
|
1925
|
+
validAttachmentIds: new Set(),
|
|
1926
|
+
});
|
|
1927
|
+
expect(stats.notes_removed).toBe(0);
|
|
1928
|
+
expect(stats.schemas_removed).toBe(0);
|
|
1929
|
+
expect(stats.attachment_dirs_removed).toBe(0);
|
|
1930
|
+
expect(existsSync(join(outDir, "a.md"))).toBe(true);
|
|
1931
|
+
expect(existsSync(join(outDir, "b.md"))).toBe(true);
|
|
1932
|
+
});
|
|
1933
|
+
|
|
1934
|
+
it("removes orphan note + corresponding notes-meta sidecar for csv/yaml notes", async () => {
|
|
1935
|
+
// For non-frontmatter extensions, the sidecar lives at
|
|
1936
|
+
// .parachute/notes-meta/<id>.yaml. Pruning the note should remove
|
|
1937
|
+
// both files.
|
|
1938
|
+
const outDir = join(tmpBase, "orphan-csv");
|
|
1939
|
+
const db = new Database(":memory:");
|
|
1940
|
+
const store = new SqliteStore(db);
|
|
1941
|
+
await store.createNote("col1,col2\n1,2\n", {
|
|
1942
|
+
id: "01CSV-DEL",
|
|
1943
|
+
path: "data/table",
|
|
1944
|
+
extension: "csv",
|
|
1945
|
+
});
|
|
1946
|
+
await exportVaultToDir(store, { outDir, vaultName: "t", exportedAt: "2026-01-01T00:00:00.000Z" });
|
|
1947
|
+
const contentFile = join(outDir, "data", "table.csv");
|
|
1948
|
+
const sidecarFile = join(outDir, SIDECAR_DIR, "notes-meta", "01CSV-DEL.yaml");
|
|
1949
|
+
expect(existsSync(contentFile)).toBe(true);
|
|
1950
|
+
expect(existsSync(sidecarFile)).toBe(true);
|
|
1951
|
+
|
|
1952
|
+
const stats = pruneOrphans({
|
|
1953
|
+
outDir,
|
|
1954
|
+
validNoteIds: new Set(), // doom it
|
|
1955
|
+
validTagNames: new Set(),
|
|
1956
|
+
validAttachmentIds: new Set(),
|
|
1957
|
+
});
|
|
1958
|
+
expect(stats.notes_removed).toBe(1);
|
|
1959
|
+
expect(stats.sidecars_removed).toBeGreaterThanOrEqual(1);
|
|
1960
|
+
expect(existsSync(contentFile)).toBe(false);
|
|
1961
|
+
expect(existsSync(sidecarFile)).toBe(false);
|
|
1962
|
+
});
|
|
1963
|
+
|
|
1964
|
+
// Reviewer-flagged regression on vault#382 Critical #2 — pruneOrphans
|
|
1965
|
+
// walks via statSync (follows symlinks); without the safeRm guard a
|
|
1966
|
+
// symlink inside the mirror pointing OUTSIDE outDir would resurface
|
|
1967
|
+
// its target's files as orphans and rmSync would happily delete them
|
|
1968
|
+
// off-tree. The guard resolves each candidate and refuses anything
|
|
1969
|
+
// not under outDir; refusals get recorded in `unparseable_files` so
|
|
1970
|
+
// an operator can see what was skipped.
|
|
1971
|
+
it("refuses to delete files reached via a symlink pointing outside outDir", async () => {
|
|
1972
|
+
const outDir = join(tmpBase, "symlink-attack");
|
|
1973
|
+
const outside = join(tmpBase, "outside");
|
|
1974
|
+
mkdirSync(outside, { recursive: true });
|
|
1975
|
+
mkdirSync(outDir, { recursive: true });
|
|
1976
|
+
// A real, sensitive file in `outside/` we don't want pruneOrphans
|
|
1977
|
+
// to touch under any circumstance.
|
|
1978
|
+
const externalFile = join(outside, "do-not-touch.md");
|
|
1979
|
+
writeFileSync(externalFile, "---\nid: 01EXTERNAL\n---\nimportant\n");
|
|
1980
|
+
// A symlink inside outDir pointing at outside/ — walkContentFiles
|
|
1981
|
+
// would normally surface outside/do-not-touch.md as a candidate.
|
|
1982
|
+
try {
|
|
1983
|
+
symlinkSync(outside, join(outDir, "via-link"));
|
|
1984
|
+
} catch {
|
|
1985
|
+
// Some CI sandboxes refuse symlink creation. Skip the test in
|
|
1986
|
+
// that case rather than fail spuriously.
|
|
1987
|
+
return;
|
|
1988
|
+
}
|
|
1989
|
+
|
|
1990
|
+
const stats = pruneOrphans({
|
|
1991
|
+
outDir,
|
|
1992
|
+
validNoteIds: new Set(), // doom every id we see
|
|
1993
|
+
validTagNames: new Set(),
|
|
1994
|
+
validAttachmentIds: new Set(),
|
|
1995
|
+
});
|
|
1996
|
+
|
|
1997
|
+
// Critical assertion: the external file MUST survive.
|
|
1998
|
+
expect(existsSync(externalFile)).toBe(true);
|
|
1999
|
+
// And the refusal MUST be recorded so the operator sees it.
|
|
2000
|
+
expect(
|
|
2001
|
+
stats.unparseable_files.some(
|
|
2002
|
+
(u) => u.path.includes("via-link") || u.reason.includes("outside"),
|
|
2003
|
+
),
|
|
2004
|
+
).toBe(true);
|
|
2005
|
+
});
|
|
2006
|
+
|
|
2007
|
+
// Reviewer-flagged regression on vault#382 Critical #1 — pruneOrphans
|
|
2008
|
+
// builds `validTagNames` from ALL tag-table rows in mirror-deps.ts.
|
|
2009
|
+
// After `deleteTagSchema(t)` the schema fields are cleared but the
|
|
2010
|
+
// tag row persists with the bare name, so the sidecar lingers
|
|
2011
|
+
// forever. The fix routes validTagNames through `hasSchemaContent`
|
|
2012
|
+
// before passing into pruneOrphans, and exports the predicate so
|
|
2013
|
+
// mirror-deps can reuse the single source of truth.
|
|
2014
|
+
it("considers a schema-content-free tag the same as a deleted tag for sidecar pruning", async () => {
|
|
2015
|
+
const outDir = join(tmpBase, "stale-schema");
|
|
2016
|
+
const db = new Database(":memory:");
|
|
2017
|
+
const store = new SqliteStore(db);
|
|
2018
|
+
await store.upsertTagRecord("bare", {}); // bare-name only
|
|
2019
|
+
await store.upsertTagRecord("with-schema", { description: "real" });
|
|
2020
|
+
await exportVaultToDir(store, {
|
|
2021
|
+
outDir,
|
|
2022
|
+
vaultName: "t",
|
|
2023
|
+
exportedAt: "2026-01-01T00:00:00.000Z",
|
|
2024
|
+
});
|
|
2025
|
+
const schemasDir = join(outDir, SIDECAR_DIR, "schemas");
|
|
2026
|
+
// The bare tag SHOULDN'T have a sidecar; the schema-bearing one
|
|
2027
|
+
// SHOULD. This confirms the export-writer's contract before the
|
|
2028
|
+
// prune step.
|
|
2029
|
+
expect(existsSync(join(schemasDir, "bare.yaml"))).toBe(false);
|
|
2030
|
+
expect(existsSync(join(schemasDir, "with-schema.yaml"))).toBe(true);
|
|
2031
|
+
|
|
2032
|
+
// Now seed a stale sidecar for `bare` (simulating "the operator
|
|
2033
|
+
// previously had a schema for `bare`, then cleared it via
|
|
2034
|
+
// `deleteTagSchema`"). pruneOrphans should remove this iff the
|
|
2035
|
+
// caller correctly filtered validTagNames by hasSchemaContent.
|
|
2036
|
+
writeFileSync(join(schemasDir, "bare.yaml"), 'name: "bare"\ndescription: "stale"\n');
|
|
2037
|
+
expect(existsSync(join(schemasDir, "bare.yaml"))).toBe(true);
|
|
2038
|
+
|
|
2039
|
+
// Filtered set — only `with-schema` has hasSchemaContent === true.
|
|
2040
|
+
const validTagNames = new Set(["with-schema"]);
|
|
2041
|
+
const stats = pruneOrphans({
|
|
2042
|
+
outDir,
|
|
2043
|
+
validNoteIds: new Set(),
|
|
2044
|
+
validTagNames,
|
|
2045
|
+
validAttachmentIds: new Set(),
|
|
2046
|
+
});
|
|
2047
|
+
|
|
2048
|
+
expect(stats.schemas_removed).toBe(1);
|
|
2049
|
+
expect(existsSync(join(schemasDir, "bare.yaml"))).toBe(false);
|
|
2050
|
+
expect(existsSync(join(schemasDir, "with-schema.yaml"))).toBe(true);
|
|
2051
|
+
});
|
|
2052
|
+
});
|
package/core/src/portable-md.ts
CHANGED
|
@@ -72,7 +72,7 @@
|
|
|
72
72
|
* See vault#308.
|
|
73
73
|
*/
|
|
74
74
|
|
|
75
|
-
import { readdirSync, readFileSync, statSync, mkdirSync, writeFileSync, copyFileSync, existsSync, rmSync } from "fs";
|
|
75
|
+
import { readdirSync, readFileSync, realpathSync, statSync, mkdirSync, writeFileSync, copyFileSync, existsSync, rmSync } from "fs";
|
|
76
76
|
import { basename, join, relative, extname, dirname, resolve as resolvePath, sep as pathSep } from "path";
|
|
77
77
|
import type { Store, Note, Link, Attachment } from "./types.js";
|
|
78
78
|
import type { TagRecord } from "./tag-schemas.js";
|
|
@@ -1001,7 +1001,375 @@ export async function exportVaultToDir(
|
|
|
1001
1001
|
};
|
|
1002
1002
|
}
|
|
1003
1003
|
|
|
1004
|
-
|
|
1004
|
+
// ---------------------------------------------------------------------------
|
|
1005
|
+
// Orphan sweep — for git-mirror's delete propagation
|
|
1006
|
+
// ---------------------------------------------------------------------------
|
|
1007
|
+
|
|
1008
|
+
/**
|
|
1009
|
+
* Result of a `pruneOrphans` pass.
|
|
1010
|
+
*/
|
|
1011
|
+
export interface PruneOrphansStats {
|
|
1012
|
+
/** Note content files removed (frontmatter id not in `validNoteIds`). */
|
|
1013
|
+
notes_removed: number;
|
|
1014
|
+
/** Sidecar metadata files removed (under `.parachute/notes-meta/`). */
|
|
1015
|
+
sidecars_removed: number;
|
|
1016
|
+
/** Schema sidecars removed (under `.parachute/schemas/`). */
|
|
1017
|
+
schemas_removed: number;
|
|
1018
|
+
/** Attachment directories removed (under `.parachute/attachments/`). */
|
|
1019
|
+
attachment_dirs_removed: number;
|
|
1020
|
+
/**
|
|
1021
|
+
* Files we couldn't parse / classify. Surfaced for operator audit;
|
|
1022
|
+
* the sweep doesn't touch them. Empty when everything classified
|
|
1023
|
+
* cleanly.
|
|
1024
|
+
*/
|
|
1025
|
+
unparseable_files: Array<{ path: string; reason: string }>;
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
/**
|
|
1029
|
+
* Options for the orphan-sweep pass. Run periodically (the mirror manager
|
|
1030
|
+
* arms a safety-net poll, default 1h) and after operator-visible deletions
|
|
1031
|
+
* (the mirror manager's targeted-deletion fast path also calls this for the
|
|
1032
|
+
* touched-set).
|
|
1033
|
+
*
|
|
1034
|
+
* The sweep is the bookkeeping cousin of the event-driven fast path: events
|
|
1035
|
+
* cover the common case (a single note deletion fires "deleted" → mirror
|
|
1036
|
+
* removes that file); the sweep covers anything the fast path missed
|
|
1037
|
+
* (direct SQL writes, app crashes between dispatch and handler, restart
|
|
1038
|
+
* gaps).
|
|
1039
|
+
*/
|
|
1040
|
+
export interface PruneOrphansOptions {
|
|
1041
|
+
/** Directory to sweep — same shape as an `exportVaultToDir` outDir. */
|
|
1042
|
+
outDir: string;
|
|
1043
|
+
/** Note IDs that should be kept (everything else under the export gets removed). */
|
|
1044
|
+
validNoteIds: Set<string>;
|
|
1045
|
+
/** Tag names that should be kept (other schema sidecars under `.parachute/schemas/` get removed). */
|
|
1046
|
+
validTagNames: Set<string>;
|
|
1047
|
+
/** Attachment IDs that should be kept (other dirs under `.parachute/attachments/` get removed). */
|
|
1048
|
+
validAttachmentIds: Set<string>;
|
|
1049
|
+
/**
|
|
1050
|
+
* Override `extension`-based content-file extension recognition. The
|
|
1051
|
+
* default treats `.md` and `.mdx` as having inline frontmatter (with
|
|
1052
|
+
* `id:` reachable via the file head); everything else needs a sidecar
|
|
1053
|
+
* lookup to know what id owns it.
|
|
1054
|
+
*/
|
|
1055
|
+
supportsInlineFrontmatter?: (ext: string) => boolean;
|
|
1056
|
+
}
|
|
1057
|
+
|
|
1058
|
+
/**
|
|
1059
|
+
* Sweep the export directory for files belonging to notes / tags /
|
|
1060
|
+
* attachments that no longer exist in the vault. Removes them so the
|
|
1061
|
+
* mirror's `git diff` reflects what vault actually has.
|
|
1062
|
+
*
|
|
1063
|
+
* Strategy:
|
|
1064
|
+
* 1. Walk content files. For each `.md` / `.mdx`, parse the frontmatter
|
|
1065
|
+
* `id` and compare against `validNoteIds`. Mismatch → remove the
|
|
1066
|
+
* file. Files we can't parse are recorded in `unparseable_files`
|
|
1067
|
+
* and left alone (best-effort, no destructive guesses).
|
|
1068
|
+
* 2. For non-inline-frontmatter extensions (`.csv`, `.yaml`, etc.),
|
|
1069
|
+
* check the matching `.parachute/notes-meta/<id>.yaml` sidecar — the
|
|
1070
|
+
* sidecar carries the canonical `id` + `path` + `extension` triple.
|
|
1071
|
+
* An orphaned sidecar (no matching content file) is also removed,
|
|
1072
|
+
* and an orphaned content file (no matching sidecar) without a
|
|
1073
|
+
* parseable frontmatter is left as unparseable.
|
|
1074
|
+
* 3. Walk `.parachute/schemas/`. Each file is `<tag>.yaml` (after
|
|
1075
|
+
* filename sanitization). Parse the `name:` field; compare against
|
|
1076
|
+
* `validTagNames`. Mismatch → remove.
|
|
1077
|
+
* 4. Walk `.parachute/attachments/`. Each subdir name IS the
|
|
1078
|
+
* attachment id (per `exportVaultToDir`'s layout). Compare against
|
|
1079
|
+
* `validAttachmentIds`. Mismatch → recursive remove.
|
|
1080
|
+
*
|
|
1081
|
+
* Returns counts so callers can log + decide whether to commit.
|
|
1082
|
+
*
|
|
1083
|
+
* Safe to call on a directory that's never been exported to (returns
|
|
1084
|
+
* zero counts; doesn't create anything).
|
|
1085
|
+
*/
|
|
1086
|
+
export function pruneOrphans(opts: PruneOrphansOptions): PruneOrphansStats {
|
|
1087
|
+
const stats: PruneOrphansStats = {
|
|
1088
|
+
notes_removed: 0,
|
|
1089
|
+
sidecars_removed: 0,
|
|
1090
|
+
schemas_removed: 0,
|
|
1091
|
+
attachment_dirs_removed: 0,
|
|
1092
|
+
unparseable_files: [],
|
|
1093
|
+
};
|
|
1094
|
+
|
|
1095
|
+
const outDir = opts.outDir;
|
|
1096
|
+
if (!existsSync(outDir)) return stats;
|
|
1097
|
+
|
|
1098
|
+
const supportsInline = opts.supportsInlineFrontmatter ?? supportsInlineFrontmatter;
|
|
1099
|
+
const sidecarRoot = join(outDir, SIDECAR_DIR);
|
|
1100
|
+
const notesMetaRoot = join(sidecarRoot, NOTES_META_DIR);
|
|
1101
|
+
const schemasRoot = join(sidecarRoot, "schemas");
|
|
1102
|
+
const attachmentsRoot = join(sidecarRoot, "attachments");
|
|
1103
|
+
|
|
1104
|
+
// Path-traversal guard. `walkContentFiles` uses `statSync` which
|
|
1105
|
+
// follows symlinks — a symlink inside the mirror pointing OUTSIDE
|
|
1106
|
+
// `outDir` would resurface its target's files in the prune sweep,
|
|
1107
|
+
// and a bare `rmSync(filepath)` would delete them off-tree. Every
|
|
1108
|
+
// deletion in this function routes through `safeRm`, which calls
|
|
1109
|
+
// `realpathSync` (resolves through symlinks, unlike syntactic-only
|
|
1110
|
+
// `path.resolve`) and refuses to delete anything that isn't `outDir`
|
|
1111
|
+
// or beneath it after symlink resolution. Refusals get recorded in
|
|
1112
|
+
// `unparseable_files` so an operator can see what was skipped.
|
|
1113
|
+
// Reviewer-flagged on vault#382 (Critical #2).
|
|
1114
|
+
const outDirReal = realpathSync(outDir);
|
|
1115
|
+
const safeRm = (
|
|
1116
|
+
candidate: string,
|
|
1117
|
+
onSuccess: () => void,
|
|
1118
|
+
options: { recursive?: boolean } = {},
|
|
1119
|
+
): void => {
|
|
1120
|
+
let real: string;
|
|
1121
|
+
try {
|
|
1122
|
+
real = realpathSync(candidate);
|
|
1123
|
+
} catch (err) {
|
|
1124
|
+
// realpathSync throws if the path doesn't exist or is unreadable.
|
|
1125
|
+
// Don't delete what we can't fully resolve.
|
|
1126
|
+
stats.unparseable_files.push({
|
|
1127
|
+
path: candidate,
|
|
1128
|
+
reason: `realpath failed: ${(err as Error).message ?? err}`,
|
|
1129
|
+
});
|
|
1130
|
+
return;
|
|
1131
|
+
}
|
|
1132
|
+
if (!isWithinDir(real, outDirReal)) {
|
|
1133
|
+
stats.unparseable_files.push({
|
|
1134
|
+
path: candidate,
|
|
1135
|
+
reason: "real path resolved outside mirror outDir — refusing to delete (symlink?)",
|
|
1136
|
+
});
|
|
1137
|
+
return;
|
|
1138
|
+
}
|
|
1139
|
+
try {
|
|
1140
|
+
// Delete via the resolved real path; never via the unresolved
|
|
1141
|
+
// candidate (which could route through a symlink we already
|
|
1142
|
+
// determined would escape).
|
|
1143
|
+
rmSync(real, { force: true, recursive: options.recursive ?? false });
|
|
1144
|
+
onSuccess();
|
|
1145
|
+
} catch (err) {
|
|
1146
|
+
stats.unparseable_files.push({
|
|
1147
|
+
path: candidate,
|
|
1148
|
+
reason: `unlink failed: ${(err as Error).message ?? err}`,
|
|
1149
|
+
});
|
|
1150
|
+
}
|
|
1151
|
+
};
|
|
1152
|
+
|
|
1153
|
+
// ---- 1 + 2. Notes + sidecars ----
|
|
1154
|
+
//
|
|
1155
|
+
// First pass: build the sidecar id → { path, extension } map so we can
|
|
1156
|
+
// resolve non-inline-frontmatter content files via their sidecar.
|
|
1157
|
+
// Sidecars whose claimed (path, extension) doesn't map to an existing
|
|
1158
|
+
// content file are tracked as "orphaned sidecar" candidates.
|
|
1159
|
+
const sidecarById = new Map<string, { path: string | null; extension: string | null }>();
|
|
1160
|
+
const sidecarFilesById = new Map<string, string>(); // id → absolute filepath
|
|
1161
|
+
if (existsSync(notesMetaRoot)) {
|
|
1162
|
+
try {
|
|
1163
|
+
for (const entry of readdirSync(notesMetaRoot)) {
|
|
1164
|
+
if (!entry.endsWith(".yaml")) continue;
|
|
1165
|
+
const id = entry.slice(0, -5);
|
|
1166
|
+
const full = join(notesMetaRoot, entry);
|
|
1167
|
+
sidecarFilesById.set(id, full);
|
|
1168
|
+
try {
|
|
1169
|
+
const text = readFileSync(full, "utf-8");
|
|
1170
|
+
const { frontmatter } = parseFrontmatter(`---\n${text}---\n`);
|
|
1171
|
+
sidecarById.set(id, {
|
|
1172
|
+
path: typeof frontmatter.path === "string" ? (frontmatter.path as string) : null,
|
|
1173
|
+
extension: typeof frontmatter.extension === "string" ? (frontmatter.extension as string) : null,
|
|
1174
|
+
});
|
|
1175
|
+
} catch (err) {
|
|
1176
|
+
stats.unparseable_files.push({
|
|
1177
|
+
path: full,
|
|
1178
|
+
reason: `failed to parse sidecar: ${(err as Error).message ?? err}`,
|
|
1179
|
+
});
|
|
1180
|
+
}
|
|
1181
|
+
}
|
|
1182
|
+
} catch (err) {
|
|
1183
|
+
// Notes-meta dir read-error is non-fatal — record + carry on.
|
|
1184
|
+
stats.unparseable_files.push({
|
|
1185
|
+
path: notesMetaRoot,
|
|
1186
|
+
reason: `notes-meta walk failed: ${(err as Error).message ?? err}`,
|
|
1187
|
+
});
|
|
1188
|
+
}
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
// Now walk content files (everything outside the .parachute sidecar).
|
|
1192
|
+
const contentFiles = walkContentFiles(outDir);
|
|
1193
|
+
// Track which sidecars matched a content file so we can also remove
|
|
1194
|
+
// orphaned sidecars (sidecar present but content file gone).
|
|
1195
|
+
const pairedSidecarIds = new Set<string>();
|
|
1196
|
+
for (const filepath of contentFiles) {
|
|
1197
|
+
const ext = extname(filepath).slice(1).toLowerCase();
|
|
1198
|
+
if (supportsInline(ext)) {
|
|
1199
|
+
// Parse frontmatter, read id.
|
|
1200
|
+
try {
|
|
1201
|
+
const raw = readFileSync(filepath, "utf-8");
|
|
1202
|
+
const { frontmatter } = parseFrontmatter(raw);
|
|
1203
|
+
const id = typeof frontmatter.id === "string" ? (frontmatter.id as string) : null;
|
|
1204
|
+
if (!id) {
|
|
1205
|
+
stats.unparseable_files.push({
|
|
1206
|
+
path: filepath,
|
|
1207
|
+
reason: "no `id` in frontmatter",
|
|
1208
|
+
});
|
|
1209
|
+
continue;
|
|
1210
|
+
}
|
|
1211
|
+
if (!opts.validNoteIds.has(id)) {
|
|
1212
|
+
safeRm(filepath, () => {
|
|
1213
|
+
stats.notes_removed++;
|
|
1214
|
+
});
|
|
1215
|
+
}
|
|
1216
|
+
} catch (err) {
|
|
1217
|
+
stats.unparseable_files.push({
|
|
1218
|
+
path: filepath,
|
|
1219
|
+
reason: `read failed: ${(err as Error).message ?? err}`,
|
|
1220
|
+
});
|
|
1221
|
+
}
|
|
1222
|
+
} else {
|
|
1223
|
+
// Sidecar-required extension. Find the matching sidecar by
|
|
1224
|
+
// (path, extension) — sidecars are keyed by id, so we sweep the
|
|
1225
|
+
// sidecarById map.
|
|
1226
|
+
const relPath = relative(outDir, filepath).replace(/\\/g, "/");
|
|
1227
|
+
// Strip extension to get the canonical path stored in the sidecar
|
|
1228
|
+
// (vault paths don't carry extensions).
|
|
1229
|
+
const pathNoExt = relPath.slice(0, -(ext.length + 1));
|
|
1230
|
+
let foundId: string | null = null;
|
|
1231
|
+
for (const [id, info] of sidecarById.entries()) {
|
|
1232
|
+
if (
|
|
1233
|
+
info.path === pathNoExt &&
|
|
1234
|
+
(info.extension ?? "md").toLowerCase() === ext
|
|
1235
|
+
) {
|
|
1236
|
+
foundId = id;
|
|
1237
|
+
break;
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1240
|
+
if (!foundId) {
|
|
1241
|
+
// Content file with no sidecar — can't tell which note owns it.
|
|
1242
|
+
// Conservative: leave alone, record as unparseable.
|
|
1243
|
+
stats.unparseable_files.push({
|
|
1244
|
+
path: filepath,
|
|
1245
|
+
reason: "no sidecar metadata could be matched by (path, extension)",
|
|
1246
|
+
});
|
|
1247
|
+
continue;
|
|
1248
|
+
}
|
|
1249
|
+
pairedSidecarIds.add(foundId);
|
|
1250
|
+
if (!opts.validNoteIds.has(foundId)) {
|
|
1251
|
+
// Note is orphaned — remove both content and sidecar.
|
|
1252
|
+
safeRm(filepath, () => {
|
|
1253
|
+
stats.notes_removed++;
|
|
1254
|
+
});
|
|
1255
|
+
const sidecarPath = sidecarFilesById.get(foundId);
|
|
1256
|
+
if (sidecarPath) {
|
|
1257
|
+
safeRm(sidecarPath, () => {
|
|
1258
|
+
stats.sidecars_removed++;
|
|
1259
|
+
});
|
|
1260
|
+
}
|
|
1261
|
+
}
|
|
1262
|
+
}
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1265
|
+
// Sweep up orphaned sidecars (sidecar exists but no content file
|
|
1266
|
+
// matched OR sidecar's id isn't in validNoteIds).
|
|
1267
|
+
for (const [id, sidecarPath] of sidecarFilesById.entries()) {
|
|
1268
|
+
if (opts.validNoteIds.has(id) && pairedSidecarIds.has(id)) continue;
|
|
1269
|
+
if (opts.validNoteIds.has(id) && !pairedSidecarIds.has(id)) {
|
|
1270
|
+
// Sidecar refers to a valid note but the content file is gone —
|
|
1271
|
+
// that's an inconsistency, not an orphan. Leave the sidecar so
|
|
1272
|
+
// the next export can rewrite the content file alongside it.
|
|
1273
|
+
continue;
|
|
1274
|
+
}
|
|
1275
|
+
safeRm(sidecarPath, () => {
|
|
1276
|
+
stats.sidecars_removed++;
|
|
1277
|
+
});
|
|
1278
|
+
}
|
|
1279
|
+
|
|
1280
|
+
// ---- 3. Schema sidecars ----
|
|
1281
|
+
if (existsSync(schemasRoot)) {
|
|
1282
|
+
try {
|
|
1283
|
+
for (const entry of readdirSync(schemasRoot)) {
|
|
1284
|
+
if (!entry.endsWith(".yaml")) continue;
|
|
1285
|
+
const full = join(schemasRoot, entry);
|
|
1286
|
+
try {
|
|
1287
|
+
const text = readFileSync(full, "utf-8");
|
|
1288
|
+
const { frontmatter } = parseFrontmatter(`---\n${text}---\n`);
|
|
1289
|
+
const name = typeof frontmatter.name === "string" ? (frontmatter.name as string) : null;
|
|
1290
|
+
if (!name) {
|
|
1291
|
+
// Fall back to filename — sanitizeTagFilename replaces `/`
|
|
1292
|
+
// with `__`, so reverse for the lookup.
|
|
1293
|
+
const fromFilename = entry.slice(0, -5).replace(/__/g, "/");
|
|
1294
|
+
if (!opts.validTagNames.has(fromFilename)) {
|
|
1295
|
+
safeRm(full, () => {
|
|
1296
|
+
stats.schemas_removed++;
|
|
1297
|
+
});
|
|
1298
|
+
}
|
|
1299
|
+
continue;
|
|
1300
|
+
}
|
|
1301
|
+
if (!opts.validTagNames.has(name)) {
|
|
1302
|
+
safeRm(full, () => {
|
|
1303
|
+
stats.schemas_removed++;
|
|
1304
|
+
});
|
|
1305
|
+
}
|
|
1306
|
+
} catch (err) {
|
|
1307
|
+
stats.unparseable_files.push({
|
|
1308
|
+
path: full,
|
|
1309
|
+
reason: `schema sweep failed: ${(err as Error).message ?? err}`,
|
|
1310
|
+
});
|
|
1311
|
+
}
|
|
1312
|
+
}
|
|
1313
|
+
} catch (err) {
|
|
1314
|
+
stats.unparseable_files.push({
|
|
1315
|
+
path: schemasRoot,
|
|
1316
|
+
reason: `schemas walk failed: ${(err as Error).message ?? err}`,
|
|
1317
|
+
});
|
|
1318
|
+
}
|
|
1319
|
+
}
|
|
1320
|
+
|
|
1321
|
+
// ---- 4. Attachment directories ----
|
|
1322
|
+
if (existsSync(attachmentsRoot)) {
|
|
1323
|
+
try {
|
|
1324
|
+
for (const entry of readdirSync(attachmentsRoot)) {
|
|
1325
|
+
const full = join(attachmentsRoot, entry);
|
|
1326
|
+
let stat;
|
|
1327
|
+
try {
|
|
1328
|
+
stat = statSync(full);
|
|
1329
|
+
} catch (err) {
|
|
1330
|
+
stats.unparseable_files.push({
|
|
1331
|
+
path: full,
|
|
1332
|
+
reason: `stat failed: ${(err as Error).message ?? err}`,
|
|
1333
|
+
});
|
|
1334
|
+
continue;
|
|
1335
|
+
}
|
|
1336
|
+
if (!stat.isDirectory()) continue;
|
|
1337
|
+
// The directory name IS the attachment id (per the export layout).
|
|
1338
|
+
if (!opts.validAttachmentIds.has(entry)) {
|
|
1339
|
+
safeRm(
|
|
1340
|
+
full,
|
|
1341
|
+
() => {
|
|
1342
|
+
stats.attachment_dirs_removed++;
|
|
1343
|
+
},
|
|
1344
|
+
{ recursive: true },
|
|
1345
|
+
);
|
|
1346
|
+
}
|
|
1347
|
+
}
|
|
1348
|
+
} catch (err) {
|
|
1349
|
+
stats.unparseable_files.push({
|
|
1350
|
+
path: attachmentsRoot,
|
|
1351
|
+
reason: `attachments walk failed: ${(err as Error).message ?? err}`,
|
|
1352
|
+
});
|
|
1353
|
+
}
|
|
1354
|
+
}
|
|
1355
|
+
|
|
1356
|
+
return stats;
|
|
1357
|
+
}
|
|
1358
|
+
|
|
1359
|
+
/**
|
|
1360
|
+
* True iff the tag carries content the export writer will emit as a
|
|
1361
|
+
* schema sidecar (`.parachute/schemas/<tag>.yaml`). Bare-name tags
|
|
1362
|
+
* (the `tags` table has a row but description/fields/relationships/
|
|
1363
|
+
* parents are all empty) get no sidecar — and crucially, after
|
|
1364
|
+
* `deleteTagSchema` clears those fields the row persists with the
|
|
1365
|
+
* bare name. Callers building the `validTagNames` set for
|
|
1366
|
+
* `pruneOrphans` MUST filter through this predicate, otherwise the
|
|
1367
|
+
* stale sidecar lingers indefinitely.
|
|
1368
|
+
*
|
|
1369
|
+
* Reviewer-flagged on vault#382: without this filter, a cleared
|
|
1370
|
+
* schema's sidecar never gets pruned.
|
|
1371
|
+
*/
|
|
1372
|
+
export function hasSchemaContent(tag: TagRecord): boolean {
|
|
1005
1373
|
if (tag.description !== undefined && tag.description.length > 0) return true;
|
|
1006
1374
|
if (tag.fields && Object.keys(tag.fields).length > 0) return true;
|
|
1007
1375
|
if (tag.relationships && Object.keys(tag.relationships).length > 0) return true;
|