tgo-wiki 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/CHANGELOG.md +32 -0
  2. package/README.md +255 -0
  3. package/docs/mcp-usage.md +631 -0
  4. package/docs/v0-acceptance.md +105 -0
  5. package/docs/v0-delivery-checklist.md +57 -0
  6. package/docs/v1-acceptance.md +39 -0
  7. package/docs/v2-acceptance.md +165 -0
  8. package/package.json +69 -0
  9. package/packages/core/src/config/config-loader.ts +109 -0
  10. package/packages/core/src/config/defaults.ts +74 -0
  11. package/packages/core/src/config/workspace-resolver.ts +40 -0
  12. package/packages/core/src/documents/command-document-parser.ts +206 -0
  13. package/packages/core/src/documents/document-id.ts +26 -0
  14. package/packages/core/src/documents/document-parser-registry.ts +126 -0
  15. package/packages/core/src/documents/document-service.ts +656 -0
  16. package/packages/core/src/documents/document-store.ts +132 -0
  17. package/packages/core/src/documents/document-types.ts +33 -0
  18. package/packages/core/src/documents/pdf-text-parser.ts +35 -0
  19. package/packages/core/src/documents/text-markdown-parser.ts +50 -0
  20. package/packages/core/src/errors.ts +46 -0
  21. package/packages/core/src/git/git-service.ts +68 -0
  22. package/packages/core/src/index.ts +38 -0
  23. package/packages/core/src/markdown/markdown-scanner.ts +90 -0
  24. package/packages/core/src/permissions/permission-service.ts +50 -0
  25. package/packages/core/src/publish/publish-service.ts +142 -0
  26. package/packages/core/src/result.ts +13 -0
  27. package/packages/core/src/services/session-workflow-service.ts +493 -0
  28. package/packages/core/src/services/wiki-service.ts +119 -0
  29. package/packages/core/src/services/workspace-service.ts +223 -0
  30. package/packages/core/src/session/session-id.ts +14 -0
  31. package/packages/core/src/session/session-service.ts +77 -0
  32. package/packages/core/src/session/session-store.ts +91 -0
  33. package/packages/core/src/session/session-types.ts +17 -0
  34. package/packages/core/src/sources/source-id.ts +19 -0
  35. package/packages/core/src/sources/source-paths.ts +15 -0
  36. package/packages/core/src/sources/source-service.ts +416 -0
  37. package/packages/core/src/sources/source-types.ts +77 -0
  38. package/packages/core/src/sources/source-validator.ts +132 -0
  39. package/packages/core/src/sources/source-writer.ts +419 -0
  40. package/packages/core/src/validation/frontmatter-validator.ts +128 -0
  41. package/packages/core/src/validation/link-validator.ts +55 -0
  42. package/packages/core/src/validation/path-validator.ts +65 -0
  43. package/packages/core/src/validation/source-reference-validator.ts +191 -0
  44. package/packages/core/src/validation/validation-service.ts +106 -0
  45. package/packages/core/src/vfs/vfs-command-parser.ts +69 -0
  46. package/packages/core/src/vfs/vfs-service.ts +498 -0
  47. package/packages/core/src/web/html-to-markdown.ts +144 -0
  48. package/packages/core/src/web/static-web-fetcher.ts +537 -0
  49. package/packages/core/src/web/web-id.ts +26 -0
  50. package/packages/core/src/web/web-ingestion-service.ts +335 -0
  51. package/packages/core/src/web/web-paths.ts +6 -0
  52. package/packages/core/src/web/web-types.ts +33 -0
  53. package/packages/server/src/cli.ts +56 -0
  54. package/packages/server/src/context.ts +7 -0
  55. package/packages/server/src/index.ts +2 -0
  56. package/packages/server/src/mcp-server.ts +111 -0
  57. package/packages/server/src/schemas/documents.ts +17 -0
  58. package/packages/server/src/schemas/read.ts +16 -0
  59. package/packages/server/src/schemas/session.ts +31 -0
  60. package/packages/server/src/schemas/sources.ts +12 -0
  61. package/packages/server/src/schemas/web.ts +23 -0
  62. package/packages/server/src/tools/document-tools.ts +46 -0
  63. package/packages/server/src/tools/publish-tools.ts +33 -0
  64. package/packages/server/src/tools/read-tools.ts +52 -0
  65. package/packages/server/src/tools/response.ts +24 -0
  66. package/packages/server/src/tools/session-tools.ts +100 -0
  67. package/packages/server/src/tools/source-tools.ts +32 -0
  68. package/packages/server/src/tools/web-tools.ts +26 -0
@@ -0,0 +1,416 @@
1
+ import type { Dirent } from "node:fs";
2
+ import { lstat, readFile, readdir, realpath } from "node:fs/promises";
3
+ import path from "node:path";
4
+ import { resolveWorkspacePaths, type WorkspacePaths } from "../config/workspace-resolver.js";
5
+ import { WikiError } from "../errors.js";
6
+ import { err, ok, type Result } from "../result.js";
7
+ import { assertValidSessionId } from "../session/session-id.js";
8
+ import { SessionStore } from "../session/session-store.js";
9
+ import type { SessionMetadata } from "../session/session-types.js";
10
+ import { assertValidSourceId } from "./source-id.js";
11
+ import { sourceDirectory } from "./source-paths.js";
12
+ import type { SourceListInput, SourceListResult, SourceMetadata, SourceReadInput, SourceReadResult } from "./source-types.js";
13
+ import { parseSourceMetadata } from "./source-validator.js";
14
+
15
+ type ResolvedSourceRef = {
16
+ ref: string;
17
+ worktreeRoot: string;
18
+ };
19
+
20
+ const unsupportedBareRefs = new Set(["draft"]);
21
+
22
+ export class SourceService {
23
+ private readonly paths: WorkspacePaths;
24
+ private readonly store: SessionStore;
25
+
26
+ constructor(workspaceRoot: string) {
27
+ this.paths = resolveWorkspacePaths(workspaceRoot);
28
+ this.store = new SessionStore(this.paths);
29
+ }
30
+
31
+ async list(input: SourceListInput = {}): Promise<Result<SourceListResult>> {
32
+ try {
33
+ const resolved = await this.resolveRef(input.ref);
34
+ const sourcesRoot = path.join(resolved.worktreeRoot, "sources");
35
+ const entries = await readSourcesDirectory(sourcesRoot);
36
+ const sources: SourceListResult["sources"] = [];
37
+
38
+ for (const entry of entries) {
39
+ if (!entry.isDirectory() && !entry.isSymbolicLink()) {
40
+ continue;
41
+ }
42
+
43
+ const metadata = await this.readMetadataIfExists(resolved.worktreeRoot, entry.name);
44
+ if (metadata === undefined) {
45
+ continue;
46
+ }
47
+
48
+ sources.push(toSourceListItem(metadata));
49
+ }
50
+
51
+ return ok({ sources: sources.sort((a, b) => a.document_id.localeCompare(b.document_id)) });
52
+ } catch (error) {
53
+ return err(toSourceError(error, "document_not_found"));
54
+ }
55
+ }
56
+
57
+ async read(input: SourceReadInput): Promise<Result<SourceReadResult>> {
58
+ try {
59
+ assertValidSourceId(input.document_id);
60
+ const resolved = await this.resolveRef(input.ref);
61
+ const metadata = await this.readMetadata(resolved.worktreeRoot, input.document_id);
62
+ const rawMarkdown = await readRequiredSourceFile(resolved.worktreeRoot, input.document_id, "raw.md");
63
+
64
+ return ok({
65
+ document_id: input.document_id,
66
+ ref: resolved.ref,
67
+ metadata: metadata as unknown as Record<string, unknown>,
68
+ raw_markdown: rawMarkdown
69
+ });
70
+ } catch (error) {
71
+ return err(toSourceError(error, "document_not_found"));
72
+ }
73
+ }
74
+
75
+ private async readMetadata(worktreeRoot: string, documentId: string): Promise<SourceMetadata> {
76
+ assertValidSourceId(documentId);
77
+ const raw = await readRequiredSourceFile(worktreeRoot, documentId, "metadata.json");
78
+ return parseMetadataJson(raw, documentId);
79
+ }
80
+
81
+ private async readMetadataIfExists(worktreeRoot: string, documentId: string): Promise<SourceMetadata | undefined> {
82
+ assertValidSourceId(documentId);
83
+ const raw = await readOptionalSourceFile(worktreeRoot, documentId, "metadata.json");
84
+ if (raw === undefined) {
85
+ return undefined;
86
+ }
87
+
88
+ return parseMetadataJson(raw, documentId);
89
+ }
90
+
91
+ private async resolveRef(ref: SourceListInput["ref"]): Promise<ResolvedSourceRef> {
92
+ if (ref === undefined || ref === "stable") {
93
+ return {
94
+ ref: "stable",
95
+ worktreeRoot: await resolveSafeStableWorktreeRoot(this.paths)
96
+ };
97
+ }
98
+
99
+ if (ref.startsWith("wiki/session/")) {
100
+ const sessionId = ref.slice("wiki/session/".length);
101
+ const metadata = await this.store.read(sessionId);
102
+ return {
103
+ ref: `wiki/session/${metadata.sessionId}`,
104
+ worktreeRoot: await this.sessionWorktreeRoot(metadata)
105
+ };
106
+ }
107
+
108
+ if (unsupportedBareRefs.has(ref)) {
109
+ throw new WikiError("unsupported_channel", `Unsupported source ref: ${ref}`);
110
+ }
111
+
112
+ if (!isValidBareSessionRef(ref)) {
113
+ throw new WikiError("unsupported_channel", `Unsupported source ref: ${ref}`);
114
+ }
115
+
116
+ const metadata = await this.store.read(ref);
117
+ return {
118
+ ref: `wiki/session/${metadata.sessionId}`,
119
+ worktreeRoot: await this.sessionWorktreeRoot(metadata)
120
+ };
121
+ }
122
+
123
+ private async sessionWorktreeRoot(metadata: SessionMetadata): Promise<string> {
124
+ const absolute = path.resolve(this.paths.workspaceRoot, metadata.worktree);
125
+ const sessionsRoot = path.resolve(this.paths.sessionsWorktreePath);
126
+
127
+ if (!absolute.startsWith(`${sessionsRoot}${path.sep}`) && absolute !== sessionsRoot) {
128
+ throw new WikiError("session_metadata_invalid", `Session worktree escapes sessions root: ${metadata.sessionId}`);
129
+ }
130
+
131
+ return await resolveSafeSessionWorktreeRoot(absolute, this.paths);
132
+ }
133
+ }
134
+
135
+ async function readSourcesDirectory(sourcesRoot: string): Promise<Dirent[]> {
136
+ try {
137
+ const stat = await lstat(sourcesRoot);
138
+
139
+ if (stat.isSymbolicLink()) {
140
+ throw new WikiError("invalid_path", `Sources directory is a symlink: ${sourcesRoot}`);
141
+ }
142
+
143
+ if (!stat.isDirectory()) {
144
+ throw new WikiError("invalid_path", `Sources path is not a directory: ${sourcesRoot}`);
145
+ }
146
+
147
+ return await readdir(sourcesRoot, { withFileTypes: true });
148
+ } catch (error) {
149
+ if (isEnoent(error)) {
150
+ return [];
151
+ }
152
+
153
+ throw error;
154
+ }
155
+ }
156
+
157
+ async function resolveSafeStableWorktreeRoot(paths: WorkspacePaths): Promise<string> {
158
+ await ensureConfiguredDirectoryNotSymlink(paths.worktreesPath, "Worktrees boundary");
159
+ await ensureConfiguredDirectoryNotSymlink(paths.stableWorktreePath, "Stable worktree root");
160
+ const realWorktreesRoot = await realpath(paths.worktreesPath);
161
+ const realStableRoot = await realpath(paths.stableWorktreePath);
162
+ ensureStrictDescendant(realWorktreesRoot, realStableRoot, paths.stableWorktreePath, "Stable worktree root escapes worktrees boundary");
163
+ return realStableRoot;
164
+ }
165
+
166
+ async function resolveSafeSessionWorktreeRoot(worktreeRoot: string, paths: WorkspacePaths): Promise<string> {
167
+ await ensureConfiguredDirectoryNotSymlink(paths.worktreesPath, "Worktrees boundary");
168
+ await ensureConfiguredDirectoryNotSymlink(paths.sessionsWorktreePath, "Sessions worktree boundary");
169
+ await ensureConfiguredDirectoryNotSymlink(worktreeRoot, "Session worktree root");
170
+ const realWorktreesRoot = await realpath(paths.worktreesPath);
171
+ const realSessionsRoot = await realpath(paths.sessionsWorktreePath);
172
+ const realWorktreeRoot = await realpath(worktreeRoot);
173
+ ensureStrictDescendant(realWorktreesRoot, realSessionsRoot, paths.sessionsWorktreePath, "Sessions worktree boundary escapes worktrees boundary");
174
+ ensureStrictDescendant(realSessionsRoot, realWorktreeRoot, worktreeRoot, "Session worktree root escapes sessions boundary");
175
+ return realWorktreeRoot;
176
+ }
177
+
178
+ async function readRequiredSourceFile(worktreeRoot: string, documentId: string, fileName: "metadata.json" | "raw.md"): Promise<string> {
179
+ const filePath = await resolveSourceFileForRead(worktreeRoot, documentId, fileName, false);
180
+
181
+ try {
182
+ return await readFile(filePath, "utf8");
183
+ } catch (error) {
184
+ if (isEnoent(error)) {
185
+ throw new WikiError("document_not_found", `Source file not found: ${filePath}`);
186
+ }
187
+
188
+ throw error;
189
+ }
190
+ }
191
+
192
+ async function readOptionalSourceFile(
193
+ worktreeRoot: string,
194
+ documentId: string,
195
+ fileName: "metadata.json" | "raw.md"
196
+ ): Promise<string | undefined> {
197
+ const filePath = await resolveSourceFileForRead(worktreeRoot, documentId, fileName, true);
198
+ if (filePath === undefined) {
199
+ return undefined;
200
+ }
201
+
202
+ try {
203
+ return await readFile(filePath, "utf8");
204
+ } catch (error) {
205
+ if (isEnoent(error)) {
206
+ return undefined;
207
+ }
208
+
209
+ throw error;
210
+ }
211
+ }
212
+
213
+ async function resolveSourceFileForRead(
214
+ worktreeRoot: string,
215
+ documentId: string,
216
+ fileName: "metadata.json" | "raw.md",
217
+ optional: false
218
+ ): Promise<string>;
219
+ async function resolveSourceFileForRead(
220
+ worktreeRoot: string,
221
+ documentId: string,
222
+ fileName: "metadata.json" | "raw.md",
223
+ optional: true
224
+ ): Promise<string | undefined>;
225
+ async function resolveSourceFileForRead(
226
+ worktreeRoot: string,
227
+ documentId: string,
228
+ fileName: "metadata.json" | "raw.md",
229
+ optional: boolean
230
+ ): Promise<string | undefined> {
231
+ assertValidSourceId(documentId);
232
+ const sourcesRoot = path.join(worktreeRoot, "sources");
233
+ await ensureDirectoryNotSymlink(sourcesRoot, `Sources directory not found: ${sourcesRoot}`);
234
+
235
+ const sourceRoot = sourceDirectory(worktreeRoot, documentId);
236
+ await ensureDirectoryNotSymlink(sourceRoot, `Source directory not found: ${documentId}`);
237
+ const realSourceRoot = await realpath(sourceRoot);
238
+ await ensureParentDirectoriesNotSymlink(sourceRoot, fileName);
239
+ const filePath = path.join(sourceRoot, fileName);
240
+
241
+ try {
242
+ const stat = await lstat(filePath);
243
+
244
+ if (stat.isSymbolicLink()) {
245
+ throw new WikiError("invalid_path", `Source file is a symlink: ${filePath}`);
246
+ }
247
+
248
+ if (!stat.isFile()) {
249
+ throw new WikiError("invalid_path", `Source path is not a file: ${filePath}`);
250
+ }
251
+ } catch (error) {
252
+ if (isEnoent(error)) {
253
+ if (optional) {
254
+ return undefined;
255
+ }
256
+
257
+ throw new WikiError("document_not_found", `Source file not found: ${filePath}`);
258
+ }
259
+
260
+ throw error;
261
+ }
262
+
263
+ const realTarget = await realpath(filePath);
264
+ ensureInsideRealSourceRoot(realSourceRoot, realTarget, filePath);
265
+ return filePath;
266
+ }
267
+
268
+ async function ensureParentDirectoriesNotSymlink(sourceRoot: string, relativeFilePath: string): Promise<void> {
269
+ const parentSegments = path.dirname(relativeFilePath).split(path.sep).filter(segment => segment.length > 0 && segment !== ".");
270
+ let currentPath = sourceRoot;
271
+
272
+ for (const segment of parentSegments) {
273
+ currentPath = path.join(currentPath, segment);
274
+ await ensureDirectoryNotSymlink(currentPath, `Source parent directory not found: ${currentPath}`);
275
+ }
276
+ }
277
+
278
+ async function ensureDirectoryNotSymlink(directoryPath: string, missingMessage: string): Promise<void> {
279
+ let stat;
280
+
281
+ try {
282
+ stat = await lstat(directoryPath);
283
+ } catch (error) {
284
+ if (isEnoent(error)) {
285
+ throw new WikiError("document_not_found", missingMessage);
286
+ }
287
+
288
+ throw error;
289
+ }
290
+
291
+ if (stat.isSymbolicLink()) {
292
+ throw new WikiError("invalid_path", `Source directory is a symlink: ${directoryPath}`);
293
+ }
294
+
295
+ if (!stat.isDirectory()) {
296
+ throw new WikiError("invalid_path", `Source path is not a directory: ${directoryPath}`);
297
+ }
298
+ }
299
+
300
+ async function ensureConfiguredDirectoryNotSymlink(directoryPath: string, label: string): Promise<void> {
301
+ let stat;
302
+
303
+ try {
304
+ stat = await lstat(directoryPath);
305
+ } catch (error) {
306
+ if (isEnoent(error)) {
307
+ throw new WikiError("invalid_path", `${label} not found: ${directoryPath}`);
308
+ }
309
+
310
+ throw error;
311
+ }
312
+
313
+ if (stat.isSymbolicLink()) {
314
+ throw new WikiError("invalid_path", `${label} is a symlink: ${directoryPath}`);
315
+ }
316
+
317
+ if (!stat.isDirectory()) {
318
+ throw new WikiError("invalid_path", `${label} is not a directory: ${directoryPath}`);
319
+ }
320
+ }
321
+
322
+ function ensureInsideRealSourceRoot(realSourceRoot: string, target: string, originalPath: string): void {
323
+ ensureInsideRealRoot(realSourceRoot, target, originalPath, "Source path escapes source directory");
324
+ }
325
+
326
+ function ensureStrictDescendant(realRoot: string, target: string, originalPath: string, message: string): void {
327
+ if (!target.startsWith(`${realRoot}${path.sep}`)) {
328
+ throw new WikiError("invalid_path", `${message}: ${originalPath}`);
329
+ }
330
+ }
331
+
332
+ function ensureInsideRealRoot(realRoot: string, target: string, originalPath: string, message: string): void {
333
+ if (!target.startsWith(`${realRoot}${path.sep}`) && target !== realRoot) {
334
+ throw new WikiError("invalid_path", `${message}: ${originalPath}`);
335
+ }
336
+ }
337
+
338
+ function parseMetadataJson(raw: string, documentId: string): SourceMetadata {
339
+ try {
340
+ const metadata = parseSourceMetadata(JSON.parse(raw));
341
+ if (metadata.documentId !== documentId) {
342
+ throw new WikiError("validation_failed", `Source metadata document id mismatch: ${documentId}`, {
343
+ expected: documentId,
344
+ actual: metadata.documentId
345
+ });
346
+ }
347
+
348
+ if (metadata.rawMarkdownPath !== `sources/${documentId}/raw.md`) {
349
+ throw new WikiError("validation_failed", `Source metadata raw markdown path mismatch: ${documentId}`, {
350
+ expected: `sources/${documentId}/raw.md`,
351
+ actual: metadata.rawMarkdownPath
352
+ });
353
+ }
354
+
355
+ return metadata;
356
+ } catch (error) {
357
+ if (error instanceof SyntaxError) {
358
+ throw new WikiError("validation_failed", `Source metadata is malformed JSON: ${documentId}`, {
359
+ cause: error.message
360
+ });
361
+ }
362
+
363
+ throw error;
364
+ }
365
+ }
366
+
367
+ function toSourceListItem(metadata: SourceMetadata): SourceListResult["sources"][number] {
368
+ if (metadata.sourceType === "web") {
369
+ return {
370
+ document_id: metadata.documentId,
371
+ source_type: metadata.sourceType,
372
+ original_file_name: metadata.title ?? metadata.finalUrl,
373
+ version: metadata.version,
374
+ parser: metadata.fetcher.name,
375
+ raw_markdown_path: metadata.rawMarkdownPath,
376
+ created_at: metadata.createdAt,
377
+ updated_at: metadata.updatedAt
378
+ };
379
+ }
380
+
381
+ return {
382
+ document_id: metadata.documentId,
383
+ source_type: metadata.sourceType,
384
+ original_file_name: metadata.originalFileName,
385
+ version: metadata.version,
386
+ parser: metadata.parser.name,
387
+ raw_markdown_path: metadata.rawMarkdownPath,
388
+ created_at: metadata.createdAt,
389
+ updated_at: metadata.updatedAt
390
+ };
391
+ }
392
+
393
+ function toSourceError(error: unknown, fallbackCode: "document_not_found"): WikiError {
394
+ if (error instanceof WikiError) {
395
+ return error;
396
+ }
397
+
398
+ if (error instanceof Error) {
399
+ return new WikiError(fallbackCode, error.message);
400
+ }
401
+
402
+ return new WikiError(fallbackCode, String(error));
403
+ }
404
+
405
+ function isEnoent(error: unknown): boolean {
406
+ return Boolean(error && typeof error === "object" && "code" in error && error.code === "ENOENT");
407
+ }
408
+
409
+ function isValidBareSessionRef(ref: string): boolean {
410
+ try {
411
+ assertValidSessionId(ref);
412
+ return true;
413
+ } catch {
414
+ return false;
415
+ }
416
+ }
@@ -0,0 +1,77 @@
1
+ export type SourceBaseMetadata = {
2
+ documentId: string;
3
+ version: number;
4
+ sourceType: string;
5
+ createdAt: string;
6
+ updatedAt: string;
7
+ status: string;
8
+ createdBy?: string;
9
+ };
10
+
11
+ export type SourceParserMetadata = {
12
+ name: string;
13
+ version: string;
14
+ };
15
+
16
+ export type DocumentSourceMetadata = SourceBaseMetadata & {
17
+ sourceType: "pdf" | "markdown" | "text";
18
+ originalFileName: string;
19
+ mimeType: string;
20
+ blobSha256: string;
21
+ rawMarkdownPath: string;
22
+ parser: SourceParserMetadata;
23
+ status: "parsed";
24
+ };
25
+
26
+ export type WebFetcherMetadata = {
27
+ name: "static-fetch";
28
+ version: string;
29
+ };
30
+
31
+ export type WebSourceMetadata = SourceBaseMetadata & {
32
+ sourceType: "web";
33
+ url: string;
34
+ finalUrl: string;
35
+ title?: string;
36
+ contentType: string;
37
+ statusCode: number;
38
+ htmlBlobSha256: string;
39
+ rawMarkdownPath: string;
40
+ fetcher: WebFetcherMetadata;
41
+ fetchMetadata: {
42
+ htmlBytes: number;
43
+ markdownBytes: number;
44
+ };
45
+ status: "fetched";
46
+ };
47
+
48
+ export type SourceMetadata = DocumentSourceMetadata | WebSourceMetadata;
49
+
50
+ export type SourceListInput = {
51
+ ref?: "stable" | string;
52
+ };
53
+
54
+ export type SourceReadInput = {
55
+ document_id: string;
56
+ ref?: "stable" | string;
57
+ };
58
+
59
+ export type SourceListResult = {
60
+ sources: Array<{
61
+ document_id: string;
62
+ source_type: string;
63
+ original_file_name: string;
64
+ version: number;
65
+ parser: string;
66
+ raw_markdown_path: string;
67
+ created_at: string;
68
+ updated_at: string;
69
+ }>;
70
+ };
71
+
72
+ export type SourceReadResult = {
73
+ document_id: string;
74
+ ref: string;
75
+ metadata: Record<string, unknown>;
76
+ raw_markdown: string;
77
+ };
@@ -0,0 +1,132 @@
1
+ import { WikiError } from "../errors.js";
2
+ import { assertValidSourceId } from "./source-id.js";
3
+ import type { SourceMetadata, SourceParserMetadata, WebFetcherMetadata } from "./source-types.js";
4
+
5
+ const sha256Pattern = /^[a-fA-F0-9]{64}$/;
6
+
7
+ export function parseSourceMetadata(value: unknown): SourceMetadata {
8
+ if (!value || typeof value !== "object") {
9
+ throw invalidMetadata("Source metadata must be an object");
10
+ }
11
+
12
+ const metadata = value as Record<string, unknown>;
13
+
14
+ if (
15
+ !isString(metadata.documentId) ||
16
+ !isPositiveInteger(metadata.version) ||
17
+ !isString(metadata.sourceType) ||
18
+ !isIsoishString(metadata.createdAt) ||
19
+ !isIsoishString(metadata.updatedAt) ||
20
+ !isString(metadata.status) ||
21
+ !isOptionalString(metadata.createdBy)
22
+ ) {
23
+ throw invalidMetadata("Source metadata common fields are invalid", { fields: ["documentId", "version", "sourceType"] });
24
+ }
25
+
26
+ assertValidSourceId(metadata.documentId);
27
+
28
+ if (!isSupportedSourceType(metadata.sourceType)) {
29
+ throw invalidMetadata("Unsupported source metadata type", { sourceType: metadata.sourceType });
30
+ }
31
+
32
+ if (metadata.sourceType === "web") {
33
+ if (
34
+ !isNonEmptyString(metadata.url) ||
35
+ !isNonEmptyString(metadata.finalUrl) ||
36
+ !isOptionalString(metadata.title) ||
37
+ !isNonEmptyString(metadata.contentType) ||
38
+ !isPositiveInteger(metadata.statusCode) ||
39
+ !isString(metadata.htmlBlobSha256) ||
40
+ !sha256Pattern.test(metadata.htmlBlobSha256) ||
41
+ !isString(metadata.rawMarkdownPath) ||
42
+ !isFetcherMetadata(metadata.fetcher) ||
43
+ !isWebFetchMetadata(metadata.fetchMetadata) ||
44
+ metadata.status !== "fetched"
45
+ ) {
46
+ throw invalidMetadata("Source metadata web fields are invalid", { documentId: metadata.documentId });
47
+ }
48
+
49
+ return metadata as SourceMetadata;
50
+ }
51
+
52
+ if (
53
+ !isNonEmptyString(metadata.originalFileName) ||
54
+ !isSupportedSourceMime(metadata.sourceType, metadata.mimeType) ||
55
+ !isString(metadata.blobSha256) ||
56
+ !sha256Pattern.test(metadata.blobSha256) ||
57
+ !isString(metadata.rawMarkdownPath) ||
58
+ !isParserMetadata(metadata.parser) ||
59
+ metadata.status !== "parsed"
60
+ ) {
61
+ throw invalidMetadata("Source metadata document fields are invalid", { documentId: metadata.documentId });
62
+ }
63
+
64
+ return metadata as SourceMetadata;
65
+ }
66
+
67
+ function isSupportedSourceType(value: unknown): value is SourceMetadata["sourceType"] {
68
+ return value === "pdf" || value === "markdown" || value === "text" || value === "web";
69
+ }
70
+
71
+ function isSupportedSourceMime(sourceType: unknown, mimeType: unknown): boolean {
72
+ return (
73
+ (sourceType === "pdf" && mimeType === "application/pdf") ||
74
+ (sourceType === "markdown" && (mimeType === "text/markdown" || mimeType === "text/x-markdown")) ||
75
+ (sourceType === "text" && mimeType === "text/plain")
76
+ );
77
+ }
78
+
79
+ function invalidMetadata(message: string, details: Record<string, unknown> = {}): WikiError {
80
+ return new WikiError("validation_failed", message, details);
81
+ }
82
+
83
+ function isParserMetadata(value: unknown): value is SourceParserMetadata {
84
+ if (!value || typeof value !== "object") {
85
+ return false;
86
+ }
87
+
88
+ const parser = value as Partial<SourceParserMetadata>;
89
+ return isNonEmptyString(parser.name) && isNonEmptyString(parser.version);
90
+ }
91
+
92
+ function isFetcherMetadata(value: unknown): value is WebFetcherMetadata {
93
+ if (!value || typeof value !== "object") {
94
+ return false;
95
+ }
96
+
97
+ const fetcher = value as { name?: unknown; version?: unknown };
98
+ return fetcher.name === "static-fetch" && isNonEmptyString(fetcher.version);
99
+ }
100
+
101
+ function isWebFetchMetadata(value: unknown): value is { htmlBytes: number; markdownBytes: number } {
102
+ if (!value || typeof value !== "object") {
103
+ return false;
104
+ }
105
+
106
+ const metadata = value as { htmlBytes?: unknown; markdownBytes?: unknown };
107
+ return isNonNegativeInteger(metadata.htmlBytes) && isNonNegativeInteger(metadata.markdownBytes);
108
+ }
109
+
110
+ function isString(value: unknown): value is string {
111
+ return typeof value === "string";
112
+ }
113
+
114
+ function isNonEmptyString(value: unknown): value is string {
115
+ return typeof value === "string" && value.trim().length > 0;
116
+ }
117
+
118
+ function isOptionalString(value: unknown): value is string | undefined {
119
+ return value === undefined || typeof value === "string";
120
+ }
121
+
122
+ function isPositiveInteger(value: unknown): value is number {
123
+ return typeof value === "number" && Number.isInteger(value) && value > 0;
124
+ }
125
+
126
+ function isNonNegativeInteger(value: unknown): value is number {
127
+ return typeof value === "number" && Number.isInteger(value) && value >= 0;
128
+ }
129
+
130
+ function isIsoishString(value: unknown): value is string {
131
+ return typeof value === "string" && !Number.isNaN(Date.parse(value));
132
+ }