tgo-wiki 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +255 -0
- package/docs/mcp-usage.md +631 -0
- package/docs/v0-acceptance.md +105 -0
- package/docs/v0-delivery-checklist.md +57 -0
- package/docs/v1-acceptance.md +39 -0
- package/docs/v2-acceptance.md +165 -0
- package/package.json +69 -0
- package/packages/core/src/config/config-loader.ts +109 -0
- package/packages/core/src/config/defaults.ts +74 -0
- package/packages/core/src/config/workspace-resolver.ts +40 -0
- package/packages/core/src/documents/command-document-parser.ts +206 -0
- package/packages/core/src/documents/document-id.ts +26 -0
- package/packages/core/src/documents/document-parser-registry.ts +126 -0
- package/packages/core/src/documents/document-service.ts +656 -0
- package/packages/core/src/documents/document-store.ts +132 -0
- package/packages/core/src/documents/document-types.ts +33 -0
- package/packages/core/src/documents/pdf-text-parser.ts +35 -0
- package/packages/core/src/documents/text-markdown-parser.ts +50 -0
- package/packages/core/src/errors.ts +46 -0
- package/packages/core/src/git/git-service.ts +68 -0
- package/packages/core/src/index.ts +38 -0
- package/packages/core/src/markdown/markdown-scanner.ts +90 -0
- package/packages/core/src/permissions/permission-service.ts +50 -0
- package/packages/core/src/publish/publish-service.ts +142 -0
- package/packages/core/src/result.ts +13 -0
- package/packages/core/src/services/session-workflow-service.ts +493 -0
- package/packages/core/src/services/wiki-service.ts +119 -0
- package/packages/core/src/services/workspace-service.ts +223 -0
- package/packages/core/src/session/session-id.ts +14 -0
- package/packages/core/src/session/session-service.ts +77 -0
- package/packages/core/src/session/session-store.ts +91 -0
- package/packages/core/src/session/session-types.ts +17 -0
- package/packages/core/src/sources/source-id.ts +19 -0
- package/packages/core/src/sources/source-paths.ts +15 -0
- package/packages/core/src/sources/source-service.ts +416 -0
- package/packages/core/src/sources/source-types.ts +77 -0
- package/packages/core/src/sources/source-validator.ts +132 -0
- package/packages/core/src/sources/source-writer.ts +419 -0
- package/packages/core/src/validation/frontmatter-validator.ts +128 -0
- package/packages/core/src/validation/link-validator.ts +55 -0
- package/packages/core/src/validation/path-validator.ts +65 -0
- package/packages/core/src/validation/source-reference-validator.ts +191 -0
- package/packages/core/src/validation/validation-service.ts +106 -0
- package/packages/core/src/vfs/vfs-command-parser.ts +69 -0
- package/packages/core/src/vfs/vfs-service.ts +498 -0
- package/packages/core/src/web/html-to-markdown.ts +144 -0
- package/packages/core/src/web/static-web-fetcher.ts +537 -0
- package/packages/core/src/web/web-id.ts +26 -0
- package/packages/core/src/web/web-ingestion-service.ts +335 -0
- package/packages/core/src/web/web-paths.ts +6 -0
- package/packages/core/src/web/web-types.ts +33 -0
- package/packages/server/src/cli.ts +56 -0
- package/packages/server/src/context.ts +7 -0
- package/packages/server/src/index.ts +2 -0
- package/packages/server/src/mcp-server.ts +111 -0
- package/packages/server/src/schemas/documents.ts +17 -0
- package/packages/server/src/schemas/read.ts +16 -0
- package/packages/server/src/schemas/session.ts +31 -0
- package/packages/server/src/schemas/sources.ts +12 -0
- package/packages/server/src/schemas/web.ts +23 -0
- package/packages/server/src/tools/document-tools.ts +46 -0
- package/packages/server/src/tools/publish-tools.ts +33 -0
- package/packages/server/src/tools/read-tools.ts +52 -0
- package/packages/server/src/tools/response.ts +24 -0
- package/packages/server/src/tools/session-tools.ts +100 -0
- package/packages/server/src/tools/source-tools.ts +32 -0
- package/packages/server/src/tools/web-tools.ts +26 -0
|
@@ -0,0 +1,656 @@
|
|
|
1
|
+
import { createHash, randomBytes } from "node:crypto";
|
|
2
|
+
import { lstat, mkdir, readFile, realpath, rm, writeFile } from "node:fs/promises";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import { loadConfig } from "../config/config-loader.js";
|
|
5
|
+
import { resolveWorkspacePaths, type WorkspacePaths } from "../config/workspace-resolver.js";
|
|
6
|
+
import { WikiError, toWikiError } from "../errors.js";
|
|
7
|
+
import { err, ok, type Result } from "../result.js";
|
|
8
|
+
import { SessionStore } from "../session/session-store.js";
|
|
9
|
+
import type { SessionMetadata } from "../session/session-types.js";
|
|
10
|
+
import { sourceMetadataPath } from "../sources/source-paths.js";
|
|
11
|
+
import type { SourceMetadata } from "../sources/source-types.js";
|
|
12
|
+
import { parseSourceMetadata } from "../sources/source-validator.js";
|
|
13
|
+
import { assertNoExistingSourceForCreate, publishSource } from "../sources/source-writer.js";
|
|
14
|
+
import { assertValidDocumentId, generateDocumentId } from "./document-id.js";
|
|
15
|
+
import { DocumentParserRegistry } from "./document-parser-registry.js";
|
|
16
|
+
import { DocumentStore } from "./document-store.js";
|
|
17
|
+
import type {
|
|
18
|
+
DocumentIdGenerator,
|
|
19
|
+
DocumentSourceType,
|
|
20
|
+
DocumentUploadInput,
|
|
21
|
+
DocumentUploadResult,
|
|
22
|
+
PendingDocumentMetadata
|
|
23
|
+
} from "./document-types.js";
|
|
24
|
+
|
|
25
|
+
export type DocumentParseInput = {
|
|
26
|
+
sessionId: string;
|
|
27
|
+
documentId: string;
|
|
28
|
+
parser?: "auto" | string;
|
|
29
|
+
reparse?: boolean;
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
export type DocumentParseResult = {
|
|
33
|
+
document_id: string;
|
|
34
|
+
version: number;
|
|
35
|
+
raw_markdown_path: string;
|
|
36
|
+
metadata_path: string;
|
|
37
|
+
status: "parsed";
|
|
38
|
+
warnings: string[];
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
export type DocumentServiceOptions = {
|
|
42
|
+
parserRegistry?: DocumentParserRegistry;
|
|
43
|
+
clock?: () => Date;
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
export class DocumentService {
|
|
47
|
+
private readonly paths: WorkspacePaths;
|
|
48
|
+
private readonly sessions: SessionStore;
|
|
49
|
+
private readonly documents: DocumentStore;
|
|
50
|
+
private readonly parserRegistry?: DocumentParserRegistry;
|
|
51
|
+
private readonly clock: () => Date;
|
|
52
|
+
|
|
53
|
+
constructor(
|
|
54
|
+
workspaceRoot: string,
|
|
55
|
+
private readonly idGenerator: DocumentIdGenerator = generateDocumentId,
|
|
56
|
+
options: DocumentServiceOptions = {}
|
|
57
|
+
) {
|
|
58
|
+
this.paths = resolveWorkspacePaths(workspaceRoot);
|
|
59
|
+
this.sessions = new SessionStore(this.paths);
|
|
60
|
+
this.documents = new DocumentStore(this.paths);
|
|
61
|
+
this.parserRegistry = options.parserRegistry;
|
|
62
|
+
this.clock = options.clock ?? (() => new Date());
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async upload(input: DocumentUploadInput): Promise<Result<DocumentUploadResult>> {
|
|
66
|
+
try {
|
|
67
|
+
const session = await this.sessions.read(input.sessionId);
|
|
68
|
+
if (session.status !== "open") {
|
|
69
|
+
throw new WikiError("validation_failed", `Session is not open: ${input.sessionId}`, {
|
|
70
|
+
status: session.status
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const documentId = this.idGenerator();
|
|
75
|
+
assertValidDocumentId(documentId);
|
|
76
|
+
const sourceType = sourceTypeForMimeType(input.mimeType);
|
|
77
|
+
const content = decodeBase64(input.contentBase64);
|
|
78
|
+
const blobSha256 = createHash("sha256").update(content).digest("hex");
|
|
79
|
+
|
|
80
|
+
await this.documents.writeBlobIfAbsent(blobSha256, content);
|
|
81
|
+
|
|
82
|
+
const now = new Date().toISOString();
|
|
83
|
+
const metadata: PendingDocumentMetadata = {
|
|
84
|
+
documentId,
|
|
85
|
+
sessionId: session.sessionId,
|
|
86
|
+
sourceType,
|
|
87
|
+
originalFileName: input.fileName,
|
|
88
|
+
mimeType: input.mimeType,
|
|
89
|
+
blobSha256,
|
|
90
|
+
createdAt: now,
|
|
91
|
+
updatedAt: now,
|
|
92
|
+
createdBy: input.createdBy,
|
|
93
|
+
status: "uploaded"
|
|
94
|
+
};
|
|
95
|
+
await this.documents.writePending(metadata);
|
|
96
|
+
|
|
97
|
+
return ok({
|
|
98
|
+
document_id: documentId,
|
|
99
|
+
blob_sha256: blobSha256,
|
|
100
|
+
size: content.byteLength,
|
|
101
|
+
status: "uploaded"
|
|
102
|
+
});
|
|
103
|
+
} catch (error) {
|
|
104
|
+
return err(toWikiError(error, "validation_failed"));
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async parse(input: DocumentParseInput): Promise<Result<DocumentParseResult>> {
|
|
109
|
+
let lock: ParseLock | undefined;
|
|
110
|
+
|
|
111
|
+
try {
|
|
112
|
+
assertValidDocumentId(input.documentId);
|
|
113
|
+
const session = await this.sessions.read(input.sessionId);
|
|
114
|
+
if (session.status !== "open") {
|
|
115
|
+
throw new WikiError("validation_failed", `Session is not open: ${input.sessionId}`, {
|
|
116
|
+
status: session.status
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const worktreeRoot = await this.sessionWorktreeRoot(session);
|
|
121
|
+
lock = await acquireParseLock(this.paths, input.sessionId, input.documentId);
|
|
122
|
+
const existing = await this.readExistingSourceMetadataIfExists(worktreeRoot, input.documentId);
|
|
123
|
+
if (existing && input.reparse !== true) {
|
|
124
|
+
throw new WikiError("source_exists", `Source already exists: ${input.documentId}`);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
let document: PendingDocumentMetadata | SourceMetadata | undefined = existing;
|
|
128
|
+
if (!document) {
|
|
129
|
+
document = await this.readPendingIfExists(input.documentId, input.sessionId);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if (!document) {
|
|
133
|
+
throw new WikiError("document_not_found", `Document metadata not found: ${input.documentId}`);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (document.sourceType === "web") {
|
|
137
|
+
throw new WikiError("validation_failed", `Cannot parse web source as document: ${input.documentId}`);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
if (input.reparse !== true) {
|
|
141
|
+
await assertNoExistingSourceForCreate(worktreeRoot, input.documentId);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const blob = await this.readBlob(document.blobSha256);
|
|
145
|
+
const registry = await this.resolveParserRegistry();
|
|
146
|
+
const requestedParser = input.parser ?? "auto";
|
|
147
|
+
const parser = registry.resolve(requestedParser, document.mimeType);
|
|
148
|
+
const parsedAt = this.clock().toISOString();
|
|
149
|
+
const parsed = await parser.parse({
|
|
150
|
+
documentId: input.documentId,
|
|
151
|
+
fileName: document.originalFileName,
|
|
152
|
+
mimeType: document.mimeType,
|
|
153
|
+
content: blob,
|
|
154
|
+
parsedAt
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
const now = parsedAt;
|
|
158
|
+
const relativeRawMarkdownPath = sourceRelativePath(input.documentId, "raw.md");
|
|
159
|
+
const relativeMetadataPath = sourceRelativePath(input.documentId, "metadata.json");
|
|
160
|
+
const metadata = {
|
|
161
|
+
documentId: input.documentId,
|
|
162
|
+
version: existing && input.reparse ? existing.version + 1 : 1,
|
|
163
|
+
sourceType: document.sourceType,
|
|
164
|
+
originalFileName: document.originalFileName,
|
|
165
|
+
mimeType: document.mimeType,
|
|
166
|
+
blobSha256: document.blobSha256,
|
|
167
|
+
rawMarkdownPath: relativeRawMarkdownPath,
|
|
168
|
+
parser: {
|
|
169
|
+
name: parser.name,
|
|
170
|
+
version: parser.version
|
|
171
|
+
},
|
|
172
|
+
parserMetadata: parsed.metadata,
|
|
173
|
+
createdAt: existing?.createdAt ?? document.createdAt,
|
|
174
|
+
updatedAt: now,
|
|
175
|
+
createdBy: document.createdBy,
|
|
176
|
+
status: "parsed"
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
await publishSource({
|
|
180
|
+
worktreeRoot,
|
|
181
|
+
sourceId: input.documentId,
|
|
182
|
+
markdown: parsed.markdown,
|
|
183
|
+
assets: parsed.assets,
|
|
184
|
+
metadataJson: `${JSON.stringify(metadata, null, 2)}\n`,
|
|
185
|
+
mode: input.reparse ? "replace" : "create"
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
return ok({
|
|
189
|
+
document_id: input.documentId,
|
|
190
|
+
version: metadata.version,
|
|
191
|
+
raw_markdown_path: relativeRawMarkdownPath,
|
|
192
|
+
metadata_path: relativeMetadataPath,
|
|
193
|
+
status: "parsed",
|
|
194
|
+
warnings: parsed.warnings
|
|
195
|
+
});
|
|
196
|
+
} catch (error) {
|
|
197
|
+
return err(toWikiError(error, "validation_failed"));
|
|
198
|
+
} finally {
|
|
199
|
+
await lock?.release();
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
private async resolveParserRegistry(): Promise<DocumentParserRegistry> {
|
|
204
|
+
if (this.parserRegistry) {
|
|
205
|
+
return this.parserRegistry;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
const config = await loadConfig(this.paths);
|
|
209
|
+
return new DocumentParserRegistry(config.documents);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
private async readPendingIfExists(documentId: string, sessionId: string): Promise<PendingDocumentMetadata | undefined> {
|
|
213
|
+
try {
|
|
214
|
+
const raw = await readFile(this.documents.pendingPath(documentId), "utf8");
|
|
215
|
+
const metadata = JSON.parse(raw) as PendingDocumentMetadata;
|
|
216
|
+
if (!isPendingDocumentMetadata(metadata, documentId)) {
|
|
217
|
+
throw new WikiError("validation_failed", `Pending document metadata is invalid: ${documentId}`);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
if (metadata.sessionId !== sessionId) {
|
|
221
|
+
return undefined;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return metadata;
|
|
225
|
+
} catch (error) {
|
|
226
|
+
if (isEnoent(error)) {
|
|
227
|
+
return undefined;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
throw error;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
private async readExistingSourceMetadataIfExists(worktreeRoot: string, documentId: string): Promise<SourceMetadata | undefined> {
|
|
235
|
+
try {
|
|
236
|
+
const raw = await readFile(sourceMetadataPath(worktreeRoot, documentId), "utf8");
|
|
237
|
+
const metadata = parseSourceMetadata(JSON.parse(raw));
|
|
238
|
+
const expectedRawMarkdownPath = sourceRelativePath(documentId, "raw.md");
|
|
239
|
+
|
|
240
|
+
if (metadata.documentId !== documentId || metadata.rawMarkdownPath !== expectedRawMarkdownPath) {
|
|
241
|
+
throw new WikiError("validation_failed", `Existing source metadata does not match document: ${documentId}`, {
|
|
242
|
+
documentId: metadata.documentId,
|
|
243
|
+
rawMarkdownPath: metadata.rawMarkdownPath,
|
|
244
|
+
expectedDocumentId: documentId,
|
|
245
|
+
expectedRawMarkdownPath
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
return metadata;
|
|
250
|
+
} catch (error) {
|
|
251
|
+
if (isEnoent(error)) {
|
|
252
|
+
return undefined;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
throw error;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
private async readBlob(blobSha256: string): Promise<Buffer> {
|
|
260
|
+
try {
|
|
261
|
+
return await readFile(this.documents.blobPath(blobSha256));
|
|
262
|
+
} catch (error) {
|
|
263
|
+
if (isEnoent(error)) {
|
|
264
|
+
throw new WikiError("blob_not_found", `Document blob not found: ${blobSha256}`, { blobSha256 });
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
throw error;
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
private async sessionWorktreeRoot(metadata: SessionMetadata): Promise<string> {
|
|
272
|
+
const absolute = path.resolve(this.paths.workspaceRoot, metadata.worktree);
|
|
273
|
+
const sessionsRoot = path.resolve(this.paths.sessionsWorktreePath);
|
|
274
|
+
|
|
275
|
+
if (!absolute.startsWith(`${sessionsRoot}${path.sep}`)) {
|
|
276
|
+
throw new WikiError("session_metadata_invalid", `Session worktree escapes sessions root: ${metadata.sessionId}`);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
const realWorkspaceRoot = await realpath(this.paths.workspaceRoot);
|
|
280
|
+
await ensureDirectoryForWrite(this.paths.worktreesPath, realWorkspaceRoot, "Worktrees boundary");
|
|
281
|
+
const realWorktreesRoot = await realpath(this.paths.worktreesPath);
|
|
282
|
+
|
|
283
|
+
await ensureDirectoryForWrite(this.paths.sessionsWorktreePath, realWorktreesRoot, "Sessions worktree boundary");
|
|
284
|
+
const realSessionsRoot = await realpath(this.paths.sessionsWorktreePath);
|
|
285
|
+
|
|
286
|
+
await ensureDescendantDirectoryForWrite(this.paths.sessionsWorktreePath, absolute, realSessionsRoot, "Session worktree root");
|
|
287
|
+
const realWorktreeRoot = await realpath(absolute);
|
|
288
|
+
ensureStrictDescendant(realWorktreesRoot, realSessionsRoot, this.paths.sessionsWorktreePath, "Sessions worktree boundary escapes worktrees boundary");
|
|
289
|
+
ensureStrictDescendant(realSessionsRoot, realWorktreeRoot, absolute, "Session worktree root escapes sessions boundary");
|
|
290
|
+
return realWorktreeRoot;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function sourceTypeForMimeType(mimeType: string): DocumentSourceType {
|
|
295
|
+
if (mimeType === "application/pdf") {
|
|
296
|
+
return "pdf";
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
if (mimeType === "text/markdown" || mimeType === "text/x-markdown") {
|
|
300
|
+
return "markdown";
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
if (mimeType === "text/plain") {
|
|
304
|
+
return "text";
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
throw new WikiError("parser_not_supported", `Unsupported document MIME type: ${mimeType}`, { mimeType });
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
function decodeBase64(value: string): Buffer {
|
|
311
|
+
if (!isStrictBase64(value)) {
|
|
312
|
+
throw new WikiError("validation_failed", "Document content is not valid base64");
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
return Buffer.from(value, "base64");
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
function isStrictBase64(value: string): boolean {
|
|
319
|
+
return value.length % 4 === 0 && /^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$/.test(value);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
function sourceRelativePath(documentId: string, fileName: "metadata.json" | "raw.md"): string {
|
|
323
|
+
return `sources/${documentId}/${fileName}`;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
type ParseLock = {
|
|
327
|
+
release(): Promise<void>;
|
|
328
|
+
};
|
|
329
|
+
|
|
330
|
+
const parseLockTtlMs = 30 * 60 * 1000;
|
|
331
|
+
const parseLockHeartbeatIntervalMs = 60 * 1000;
|
|
332
|
+
const parseLockAdminTtlMs = 60 * 1000;
|
|
333
|
+
const parseLockReleaseRetryTimeoutMs = 1000;
|
|
334
|
+
const parseLockReleaseRetryDelayMs = 10;
|
|
335
|
+
|
|
336
|
+
type ParseLockMetadata = {
|
|
337
|
+
token: string;
|
|
338
|
+
createdAt: string;
|
|
339
|
+
updatedAt?: string;
|
|
340
|
+
};
|
|
341
|
+
|
|
342
|
+
async function acquireParseLock(paths: WorkspacePaths, sessionId: string, documentId: string): Promise<ParseLock> {
|
|
343
|
+
const locksRoot = path.join(paths.documentsStatePath, "parse-locks");
|
|
344
|
+
await mkdir(locksRoot, { recursive: true });
|
|
345
|
+
const lockPath = path.join(locksRoot, `${sessionId}--${documentId}.lock`);
|
|
346
|
+
const token = randomBytes(16).toString("hex");
|
|
347
|
+
|
|
348
|
+
await withParseLockAdminGuard(lockPath, async () => {
|
|
349
|
+
const existing = await readParseLockFreshness(lockPath);
|
|
350
|
+
if (existing) {
|
|
351
|
+
if (!isParseLockStale(existing)) {
|
|
352
|
+
throw new WikiError("validation_failed", `Document parse is already in progress: ${documentId}`, {
|
|
353
|
+
sessionId,
|
|
354
|
+
documentId
|
|
355
|
+
});
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
await rm(lockPath, { recursive: true, force: true });
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
await mkdir(lockPath);
|
|
362
|
+
try {
|
|
363
|
+
const now = new Date().toISOString();
|
|
364
|
+
await writeParseLockMetadata(lockPath, { token, createdAt: now, updatedAt: now });
|
|
365
|
+
} catch (error) {
|
|
366
|
+
await rm(lockPath, { recursive: true, force: true });
|
|
367
|
+
throw error;
|
|
368
|
+
}
|
|
369
|
+
});
|
|
370
|
+
|
|
371
|
+
const heartbeatTimer = startParseLockHeartbeat(lockPath, token);
|
|
372
|
+
return {
|
|
373
|
+
async release() {
|
|
374
|
+
stopParseLockHeartbeat(heartbeatTimer);
|
|
375
|
+
await releaseParseLockWithRetry(lockPath, token);
|
|
376
|
+
}
|
|
377
|
+
};
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
async function writeParseLockMetadata(lockPath: string, metadata: ParseLockMetadata): Promise<void> {
|
|
381
|
+
await writeFile(path.join(lockPath, "metadata.json"), `${JSON.stringify(metadata, null, 2)}\n`, "utf8");
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
function startParseLockHeartbeat(lockPath: string, token: string): ReturnType<typeof setInterval> {
|
|
385
|
+
const timer = setInterval(() => {
|
|
386
|
+
void updateParseLockHeartbeat(lockPath, token);
|
|
387
|
+
}, parseLockHeartbeatIntervalMs);
|
|
388
|
+
if (typeof timer === "object" && "unref" in timer && typeof timer.unref === "function") {
|
|
389
|
+
timer.unref();
|
|
390
|
+
}
|
|
391
|
+
return timer;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
function stopParseLockHeartbeat(timer: ReturnType<typeof setInterval>): void {
|
|
395
|
+
clearInterval(timer);
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
async function updateParseLockHeartbeat(lockPath: string, token: string): Promise<void> {
|
|
399
|
+
await withParseLockAdminGuard(lockPath, async () => {
|
|
400
|
+
const metadata = await readParseLockMetadataIfExists(lockPath);
|
|
401
|
+
if (metadata?.token !== token) {
|
|
402
|
+
return;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
await writeParseLockMetadata(lockPath, {
|
|
406
|
+
...metadata,
|
|
407
|
+
updatedAt: new Date().toISOString()
|
|
408
|
+
});
|
|
409
|
+
}).catch(error => {
|
|
410
|
+
if (!isEnoent(error) && !isWikiErrorCode(error, "validation_failed")) {
|
|
411
|
+
throw error;
|
|
412
|
+
}
|
|
413
|
+
});
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
async function releaseParseLock(lockPath: string, token: string): Promise<void> {
|
|
417
|
+
const metadata = await readParseLockMetadataIfExists(lockPath);
|
|
418
|
+
if (metadata?.token !== token) {
|
|
419
|
+
return;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
await rm(lockPath, { recursive: true, force: true });
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
async function releaseParseLockWithRetry(lockPath: string, token: string): Promise<void> {
|
|
426
|
+
const deadline = Date.now() + parseLockReleaseRetryTimeoutMs;
|
|
427
|
+
let lastError: unknown;
|
|
428
|
+
|
|
429
|
+
do {
|
|
430
|
+
try {
|
|
431
|
+
await withParseLockAdminGuard(lockPath, async () => {
|
|
432
|
+
await releaseParseLock(lockPath, token);
|
|
433
|
+
});
|
|
434
|
+
return;
|
|
435
|
+
} catch (error) {
|
|
436
|
+
if (!isWikiErrorCode(error, "validation_failed")) {
|
|
437
|
+
throw error;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
lastError = error;
|
|
441
|
+
await delay(parseLockReleaseRetryDelayMs);
|
|
442
|
+
}
|
|
443
|
+
} while (Date.now() < deadline);
|
|
444
|
+
|
|
445
|
+
throw lastError;
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
async function withParseLockAdminGuard<T>(lockPath: string, callback: () => Promise<T>): Promise<T> {
|
|
449
|
+
const guardPath = `${lockPath}.admin`;
|
|
450
|
+
await acquireParseLockAdminGuard(guardPath);
|
|
451
|
+
|
|
452
|
+
try {
|
|
453
|
+
return await callback();
|
|
454
|
+
} finally {
|
|
455
|
+
await rm(guardPath, { recursive: true, force: true });
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
async function acquireParseLockAdminGuard(guardPath: string): Promise<void> {
|
|
460
|
+
for (let attempt = 0; attempt < 3; attempt += 1) {
|
|
461
|
+
try {
|
|
462
|
+
await mkdir(guardPath);
|
|
463
|
+
return;
|
|
464
|
+
} catch (error) {
|
|
465
|
+
if (!isFileExists(error)) {
|
|
466
|
+
throw error;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
const guardStats = await lstatIfExists(guardPath);
|
|
470
|
+
if (!guardStats) {
|
|
471
|
+
continue;
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
if (Date.now() - guardStats.mtimeMs > parseLockAdminTtlMs) {
|
|
475
|
+
await rm(guardPath, { recursive: true, force: true });
|
|
476
|
+
continue;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
throw new WikiError("validation_failed", `Document parse lock administration is already in progress: ${path.basename(guardPath)}`);
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
throw new WikiError("validation_failed", `Document parse lock administration is already in progress: ${path.basename(guardPath)}`);
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
type ParseLockFreshness = {
|
|
487
|
+
observedAtMs: number;
|
|
488
|
+
};
|
|
489
|
+
|
|
490
|
+
async function readParseLockFreshness(lockPath: string): Promise<ParseLockFreshness | undefined> {
|
|
491
|
+
const metadata = await readParseLockMetadataIfExists(lockPath);
|
|
492
|
+
const metadataTime = metadata ? Date.parse(metadata.updatedAt ?? metadata.createdAt) : Number.NaN;
|
|
493
|
+
if (!Number.isNaN(metadataTime)) {
|
|
494
|
+
return { observedAtMs: metadataTime };
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
const stats = await lstatIfExists(lockPath);
|
|
498
|
+
return stats ? { observedAtMs: stats.mtimeMs } : undefined;
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
function isParseLockStale(freshness: ParseLockFreshness): boolean {
|
|
502
|
+
return Date.now() - freshness.observedAtMs > parseLockTtlMs;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
async function readParseLockMetadataIfExists(lockPath: string): Promise<ParseLockMetadata | undefined> {
|
|
506
|
+
let raw;
|
|
507
|
+
try {
|
|
508
|
+
raw = await readFile(path.join(lockPath, "metadata.json"), "utf8");
|
|
509
|
+
} catch (error) {
|
|
510
|
+
if (isEnoent(error)) {
|
|
511
|
+
return undefined;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
throw error;
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
try {
|
|
518
|
+
const parsed = JSON.parse(raw) as Partial<ParseLockMetadata>;
|
|
519
|
+
if (typeof parsed.token !== "string" || typeof parsed.createdAt !== "string") {
|
|
520
|
+
return undefined;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
return {
|
|
524
|
+
token: parsed.token,
|
|
525
|
+
createdAt: parsed.createdAt,
|
|
526
|
+
updatedAt: typeof parsed.updatedAt === "string" ? parsed.updatedAt : undefined
|
|
527
|
+
};
|
|
528
|
+
} catch {
|
|
529
|
+
return undefined;
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
async function lstatIfExists(filePath: string): Promise<{ mtimeMs: number } | undefined> {
|
|
534
|
+
try {
|
|
535
|
+
return await lstat(filePath);
|
|
536
|
+
} catch (error) {
|
|
537
|
+
if (isEnoent(error)) {
|
|
538
|
+
return undefined;
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
throw error;
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
async function ensureDescendantDirectoryForWrite(root: string, target: string, realBoundary: string, label: string): Promise<void> {
|
|
546
|
+
const relativePath = path.relative(root, target);
|
|
547
|
+
if (relativePath.length === 0 || relativePath.startsWith("..") || path.isAbsolute(relativePath)) {
|
|
548
|
+
throw new WikiError("invalid_path", `${label} escapes boundary: ${target}`);
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
const segments = relativePath.split(path.sep).filter(segment => segment.length > 0 && segment !== ".");
|
|
552
|
+
let current = root;
|
|
553
|
+
|
|
554
|
+
for (const segment of segments) {
|
|
555
|
+
current = path.join(current, segment);
|
|
556
|
+
await ensureDirectoryForWrite(current, realBoundary, label);
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
async function ensureDirectoryForWrite(directoryPath: string, realBoundary: string, label: string): Promise<void> {
|
|
561
|
+
try {
|
|
562
|
+
const stat = await lstat(directoryPath);
|
|
563
|
+
|
|
564
|
+
if (stat.isSymbolicLink()) {
|
|
565
|
+
throw new WikiError("invalid_path", `${label} is a symlink: ${directoryPath}`);
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
if (!stat.isDirectory()) {
|
|
569
|
+
throw new WikiError("invalid_path", `${label} is not a directory: ${directoryPath}`);
|
|
570
|
+
}
|
|
571
|
+
} catch (error) {
|
|
572
|
+
if (!isEnoent(error)) {
|
|
573
|
+
throw error;
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
await mkdir(directoryPath);
|
|
577
|
+
await ensureDirectoryNotSymlink(directoryPath, label);
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
const realDirectory = await realpath(directoryPath);
|
|
581
|
+
ensureInsideRealRoot(realBoundary, realDirectory, directoryPath, `${label} escapes boundary`);
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
async function ensureDirectoryNotSymlink(directoryPath: string, label: string): Promise<void> {
|
|
585
|
+
const stat = await lstat(directoryPath);
|
|
586
|
+
|
|
587
|
+
if (stat.isSymbolicLink()) {
|
|
588
|
+
throw new WikiError("invalid_path", `${label} is a symlink: ${directoryPath}`);
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
if (!stat.isDirectory()) {
|
|
592
|
+
throw new WikiError("invalid_path", `${label} is not a directory: ${directoryPath}`);
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
function ensureStrictDescendant(realRoot: string, target: string, originalPath: string, message: string): void {
|
|
597
|
+
if (!target.startsWith(`${realRoot}${path.sep}`)) {
|
|
598
|
+
throw new WikiError("invalid_path", `${message}: ${originalPath}`);
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
function ensureInsideRealRoot(realRoot: string, target: string, originalPath: string, message: string): void {
|
|
603
|
+
if (!target.startsWith(`${realRoot}${path.sep}`) && target !== realRoot) {
|
|
604
|
+
throw new WikiError("invalid_path", `${message}: ${originalPath}`);
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
function isPendingDocumentMetadata(value: unknown, documentId: string): value is PendingDocumentMetadata {
|
|
609
|
+
if (!value || typeof value !== "object") {
|
|
610
|
+
return false;
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
const metadata = value as Partial<PendingDocumentMetadata>;
|
|
614
|
+
return (
|
|
615
|
+
metadata.documentId === documentId &&
|
|
616
|
+
typeof metadata.sessionId === "string" &&
|
|
617
|
+
metadata.sessionId.length > 0 &&
|
|
618
|
+
isSupportedPendingSource(metadata.sourceType, metadata.mimeType) &&
|
|
619
|
+
typeof metadata.originalFileName === "string" &&
|
|
620
|
+
metadata.originalFileName.trim().length > 0 &&
|
|
621
|
+
typeof metadata.blobSha256 === "string" &&
|
|
622
|
+
/^[0-9a-f]{64}$/.test(metadata.blobSha256) &&
|
|
623
|
+
typeof metadata.createdAt === "string" &&
|
|
624
|
+
typeof metadata.updatedAt === "string" &&
|
|
625
|
+
(metadata.createdBy === undefined || typeof metadata.createdBy === "string") &&
|
|
626
|
+
metadata.status === "uploaded"
|
|
627
|
+
);
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
function isSupportedPendingSource(sourceType: unknown, mimeType: unknown): sourceType is DocumentSourceType {
|
|
631
|
+
return (
|
|
632
|
+
(sourceType === "pdf" && mimeType === "application/pdf") ||
|
|
633
|
+
(sourceType === "markdown" && (mimeType === "text/markdown" || mimeType === "text/x-markdown")) ||
|
|
634
|
+
(sourceType === "text" && mimeType === "text/plain")
|
|
635
|
+
);
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
function isEnoent(error: unknown): boolean {
|
|
639
|
+
return Boolean(error && typeof error === "object" && "code" in error && error.code === "ENOENT");
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
function isFileExists(error: unknown): boolean {
|
|
643
|
+
return Boolean(error && typeof error === "object" && "code" in error && error.code === "EEXIST");
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
function isWikiErrorCode(error: unknown, code: string): boolean {
|
|
647
|
+
return error instanceof WikiError && error.code === code;
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
async function delay(milliseconds: number): Promise<void> {
|
|
651
|
+
await new Promise(resolve => setTimeout(resolve, milliseconds));
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
export const __documentServiceTest = {
|
|
655
|
+
acquireParseLock
|
|
656
|
+
};
|