@arabold/docs-mcp-server 1.17.0 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,67 @@
1
+ -- Migration: Complete normalization by removing obsolete library and version columns
2
+ -- This migration finalizes the schema normalization process
3
+ -- Note: Must recreate table because obsolete columns are part of UNIQUE constraint
4
+
5
+ -- 1. Create new documents table with only foreign key references
6
+ CREATE TABLE documents_new (
7
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
8
+ library_id INTEGER NOT NULL REFERENCES libraries(id),
9
+ version_id INTEGER NOT NULL REFERENCES versions(id),
10
+ url TEXT NOT NULL,
11
+ content TEXT,
12
+ metadata JSON,
13
+ sort_order INTEGER NOT NULL,
14
+ indexed_at DATETIME,
15
+ UNIQUE(url, library_id, version_id, sort_order)
16
+ );
17
+
18
+ -- 2. Copy data from old table (excluding obsolete library and version columns)
19
+ INSERT INTO documents_new (id, library_id, version_id, url, content, metadata, sort_order, indexed_at)
20
+ SELECT id, library_id, version_id, url, content, metadata, sort_order, indexed_at
21
+ FROM documents;
22
+
23
+ -- 3. Drop the old documents table
24
+ DROP TABLE documents;
25
+
26
+ -- 4. Rename the new table to documents
27
+ ALTER TABLE documents_new RENAME TO documents;
28
+
29
+ -- 5. Recreate indexes that were lost when dropping the table
30
+ CREATE INDEX IF NOT EXISTS idx_documents_library_id ON documents(library_id);
31
+ CREATE INDEX IF NOT EXISTS idx_documents_version_id ON documents(version_id);
32
+ CREATE INDEX IF NOT EXISTS idx_documents_lib_ver_id ON documents(library_id, version_id);
33
+
34
+ -- 6. Recreate FTS5 virtual table (gets dropped when main table is dropped)
35
+ -- Using external content approach - FTS index is maintained entirely through triggers
36
+ CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
37
+ content,
38
+ title,
39
+ url,
40
+ path,
41
+ tokenize='porter unicode61'
42
+ );
43
+
44
+ -- 7. Recreate FTS triggers to maintain the index
45
+ -- Note: Triggers work directly with documents table, no JOIN needed for FTS content
46
+ CREATE TRIGGER IF NOT EXISTS documents_fts_after_delete AFTER DELETE ON documents BEGIN
47
+ INSERT INTO documents_fts(documents_fts, rowid, content, title, url, path)
48
+ VALUES('delete', old.id, old.content, json_extract(old.metadata, '$.title'), old.url, json_extract(old.metadata, '$.path'));
49
+ END;
50
+
51
+ CREATE TRIGGER IF NOT EXISTS documents_fts_after_update AFTER UPDATE ON documents BEGIN
52
+ INSERT INTO documents_fts(documents_fts, rowid, content, title, url, path)
53
+ VALUES('delete', old.id, old.content, json_extract(old.metadata, '$.title'), old.url, json_extract(old.metadata, '$.path'));
54
+ INSERT INTO documents_fts(rowid, content, title, url, path)
55
+ VALUES(new.id, new.content, json_extract(new.metadata, '$.title'), new.url, json_extract(new.metadata, '$.path'));
56
+ END;
57
+
58
+ CREATE TRIGGER IF NOT EXISTS documents_fts_after_insert AFTER INSERT ON documents BEGIN
59
+ INSERT INTO documents_fts(rowid, content, title, url, path)
60
+ VALUES(new.id, new.content, json_extract(new.metadata, '$.title'), new.url, json_extract(new.metadata, '$.path'));
61
+ END;
62
+
63
+ -- 8. Rebuild FTS index from existing documents data
64
+ -- Manually populate the FTS index since we're using external content approach
65
+ INSERT INTO documents_fts(rowid, content, title, url, path)
66
+ SELECT id, content, json_extract(metadata, '$.title'), url, json_extract(metadata, '$.path')
67
+ FROM documents;
@@ -0,0 +1,42 @@
1
+ -- Migration: Add status tracking and progress monitoring to versions table
2
+ -- This migration adds job status tracking directly to the versions table
3
+ -- enabling persistent job state and progress monitoring across server restarts
4
+
5
+ -- Add job status and progress tracking columns
6
+ ALTER TABLE versions ADD COLUMN status TEXT DEFAULT 'not_indexed';
7
+ ALTER TABLE versions ADD COLUMN progress_pages INTEGER DEFAULT 0;
8
+ ALTER TABLE versions ADD COLUMN progress_max_pages INTEGER DEFAULT 0;
9
+ ALTER TABLE versions ADD COLUMN error_message TEXT;
10
+ ALTER TABLE versions ADD COLUMN started_at DATETIME;
11
+ ALTER TABLE versions ADD COLUMN updated_at DATETIME;
12
+
13
+ -- Create indexes for efficient status queries
14
+ CREATE INDEX IF NOT EXISTS idx_versions_status ON versions(status);
15
+ CREATE INDEX IF NOT EXISTS idx_versions_started_at ON versions(started_at);
16
+ CREATE INDEX IF NOT EXISTS idx_versions_library_status ON versions(library_id, status);
17
+
18
+ -- Create trigger to automatically update updated_at timestamp
19
+ CREATE TRIGGER IF NOT EXISTS versions_updated_at
20
+ AFTER UPDATE ON versions BEGIN
21
+ UPDATE versions SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
22
+ END;
23
+
24
+ -- Set existing versions to 'completed' status based on whether they have documents
25
+ -- This ensures backward compatibility with existing data
26
+ UPDATE versions
27
+ SET status = 'completed', updated_at = CURRENT_TIMESTAMP
28
+ WHERE id IN (
29
+ SELECT DISTINCT v.id
30
+ FROM versions v
31
+ JOIN documents d ON v.id = d.version_id
32
+ );
33
+
34
+ -- Set updated_at for any remaining records without it
35
+ UPDATE versions
36
+ SET updated_at = CURRENT_TIMESTAMP
37
+ WHERE updated_at IS NULL;
38
+
39
+ -- Note: Versions without documents remain as 'not_indexed' which is correct
40
+ -- as they were created but never successfully indexed
41
+ -- The started_at field tracks when indexing jobs begin
42
+ -- The status field provides comprehensive state tracking for version indexing
@@ -0,0 +1,16 @@
1
+ -- Migration: Add scraper options tracking to versions table
2
+ -- This migration adds scraper options storage to enable reproducible indexing
3
+ -- with the exact same parameters used in previous runs
4
+
5
+ -- Add scraper options tracking columns
6
+ ALTER TABLE versions ADD COLUMN source_url TEXT;
7
+ ALTER TABLE versions ADD COLUMN scraper_options JSON;
8
+
9
+ -- Create indexes for efficient queries
10
+ CREATE INDEX IF NOT EXISTS idx_versions_source_url ON versions(source_url);
11
+ CREATE INDEX IF NOT EXISTS idx_versions_scraper_options_scope
12
+ ON versions(json_extract(scraper_options, '$.scope'));
13
+
14
+ -- Note: No data migration needed - new columns default to NULL
15
+ -- Existing versions without stored options will gracefully fallback to manual configuration
16
+ -- Future indexing operations will store complete scraper options for reproducibility
@@ -0,0 +1,62 @@
1
+ -- Migration 007: Deduplicate unversioned versions
2
+ -- Goal: collapse multiple NULL-name version rows per library to a single canonical row
3
+ -- Steps:
4
+ -- 1. For each library, choose canonical NULL-name row:
5
+ -- a) Prefer a row referenced by any documents (highest document count)
6
+ -- b) Fallback to lowest id
7
+ -- 2. Repoint any documents referencing non-canonical NULL rows to canonical
8
+ -- 3. Delete surplus NULL-name rows with zero documents
9
+ -- 4. Convert remaining NULL names to empty string '' for future uniqueness enforcement
10
+ -- Safe to run multiple times (idempotent)
11
+
12
+ -- 1 & 2: Repoint documents
13
+ -- Use TEMP tables instead of CTEs because we need the canonical mapping
14
+ -- across multiple subsequent statements. All TEMP objects are connection-scoped
15
+ -- and vanish automatically; safe for repeated runs (we DROP IF EXISTS first).
16
+
17
+ DROP TABLE IF EXISTS temp_null_versions;
18
+ CREATE TEMP TABLE temp_null_versions AS
19
+ SELECT v.id, v.library_id,
20
+ (SELECT COUNT(*) FROM documents d WHERE d.version_id = v.id) AS doc_count
21
+ FROM versions v
22
+ WHERE v.name IS NULL;
23
+
24
+ -- Build canonical mapping per library (one row per library_id)
25
+ DROP TABLE IF EXISTS temp_canonical_versions;
26
+ CREATE TEMP TABLE temp_canonical_versions AS
27
+ SELECT nv.library_id,
28
+ COALESCE(
29
+ (
30
+ SELECT id FROM temp_null_versions nv2
31
+ WHERE nv2.library_id = nv.library_id AND nv2.doc_count > 0
32
+ ORDER BY nv2.doc_count DESC, nv2.id ASC LIMIT 1
33
+ ),
34
+ (
35
+ SELECT id FROM temp_null_versions nv3
36
+ WHERE nv3.library_id = nv.library_id
37
+ ORDER BY nv3.id ASC LIMIT 1
38
+ )
39
+ ) AS keep_id
40
+ FROM temp_null_versions nv
41
+ GROUP BY nv.library_id;
42
+
43
+ -- Repoint documents from non-canonical NULL-name versions
44
+ UPDATE documents
45
+ SET version_id = (
46
+ SELECT keep_id FROM temp_canonical_versions c
47
+ WHERE c.library_id = documents.library_id
48
+ )
49
+ WHERE version_id IN (SELECT id FROM versions WHERE name IS NULL)
50
+ AND version_id NOT IN (SELECT keep_id FROM temp_canonical_versions);
51
+
52
+ -- 3: Delete surplus NULL-name rows now unreferenced
53
+ DELETE FROM versions
54
+ WHERE name IS NULL
55
+ AND id NOT IN (SELECT keep_id FROM temp_canonical_versions)
56
+ AND (SELECT COUNT(*) FROM documents d WHERE d.version_id = versions.id) = 0;
57
+
58
+ -- 4: Normalize remaining NULL names to ''
59
+ UPDATE versions SET name = '' WHERE name IS NULL;
60
+
61
+ -- (Optional) Unique index already exists if schema defined; if not, we could add:
62
+ -- CREATE UNIQUE INDEX IF NOT EXISTS idx_versions_library_name ON versions(library_id, name);
@@ -0,0 +1,10 @@
1
+ -- We only need to normalize existing strings to lower-case and add expression unique indexes
2
+ -- for defense-in-depth. Idempotent: LOWER(name) is stable on re-run.
3
+
4
+ UPDATE libraries SET name = LOWER(name);
5
+ UPDATE versions SET name = LOWER(name) WHERE name IS NOT NULL AND name <> '';
6
+
7
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_libraries_lower_name ON libraries(LOWER(name));
8
+ CREATE UNIQUE INDEX IF NOT EXISTS idx_versions_library_lower_name ON versions(library_id, LOWER(name));
9
+
10
+ -- Existing UNIQUE(library_id, name) plus these expression indexes enforce case-insensitive uniqueness.
@@ -0,0 +1,57 @@
1
+ import { createTRPCProxyClient, httpBatchLink } from "@trpc/client";
2
+ import { l as logger } from "./index.js";
3
+ class DocumentManagementClient {
4
+ baseUrl;
5
+ client;
6
+ constructor(serverUrl) {
7
+ this.baseUrl = serverUrl.replace(/\/$/, "");
8
+ this.client = createTRPCProxyClient({
9
+ links: [httpBatchLink({ url: this.baseUrl })]
10
+ });
11
+ logger.debug(`DocumentManagementClient (tRPC) created for: ${this.baseUrl}`);
12
+ }
13
+ async initialize() {
14
+ await this.client.ping.query();
15
+ }
16
+ async shutdown() {
17
+ }
18
+ async listLibraries() {
19
+ return this.client.listLibraries.query();
20
+ }
21
+ async validateLibraryExists(library) {
22
+ await this.client.validateLibraryExists.mutate({ library });
23
+ }
24
+ async findBestVersion(library, targetVersion) {
25
+ return this.client.findBestVersion.query({ library, targetVersion });
26
+ }
27
+ async searchStore(library, version, query, limit) {
28
+ return this.client.search.query({ library, version: version ?? null, query, limit });
29
+ }
30
+ async removeAllDocuments(library, version) {
31
+ await this.client.removeAllDocuments.mutate({ library, version: version ?? null });
32
+ }
33
+ async getVersionsByStatus(statuses) {
34
+ return this.client.getVersionsByStatus.query({
35
+ statuses
36
+ });
37
+ }
38
+ async findVersionsBySourceUrl(url) {
39
+ return this.client.findVersionsBySourceUrl.query({ url });
40
+ }
41
+ async getScraperOptions(versionId) {
42
+ return this.client.getScraperOptions.query({ versionId });
43
+ }
44
+ async updateVersionStatus(versionId, status, errorMessage) {
45
+ await this.client.updateVersionStatus.mutate({ versionId, status, errorMessage });
46
+ }
47
+ async updateVersionProgress(versionId, pages, maxPages) {
48
+ await this.client.updateVersionProgress.mutate({ versionId, pages, maxPages });
49
+ }
50
+ async storeScraperOptions(versionId, options) {
51
+ await this.client.storeScraperOptions.mutate({ versionId, options });
52
+ }
53
+ }
54
+ export {
55
+ DocumentManagementClient
56
+ };
57
+ //# sourceMappingURL=DocumentManagementClient-CAFdDwTu.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DocumentManagementClient-CAFdDwTu.js","sources":["../src/store/DocumentManagementClient.ts"],"sourcesContent":["/**\n * tRPC client for the document management API.\n * Implements IDocumentManagement and delegates to /api data router.\n */\nimport { createTRPCProxyClient, httpBatchLink } from \"@trpc/client\";\nimport type { ScraperOptions } from \"../scraper/types\";\nimport { logger } from \"../utils/logger\";\nimport type { IDocumentManagement } from \"./trpc/interfaces\";\nimport type { DataRouter } from \"./trpc/router\";\nimport type {\n DbVersionWithLibrary,\n FindVersionResult,\n LibrarySummary,\n StoredScraperOptions,\n StoreSearchResult,\n VersionStatus,\n} from \"./types\";\n\nexport class DocumentManagementClient implements IDocumentManagement {\n private readonly baseUrl: string;\n private readonly client: ReturnType<typeof createTRPCProxyClient<DataRouter>>;\n\n constructor(serverUrl: string) {\n this.baseUrl = serverUrl.replace(/\\/$/, \"\");\n this.client = createTRPCProxyClient<DataRouter>({\n links: [httpBatchLink({ url: this.baseUrl })],\n });\n logger.debug(`DocumentManagementClient (tRPC) created for: ${this.baseUrl}`);\n }\n\n async initialize(): Promise<void> {\n // Connectivity check\n await (\n this.client as unknown as { ping: { query: () => Promise<unknown> } }\n ).ping.query();\n }\n\n async shutdown(): Promise<void> {\n // no-op for HTTP client\n }\n\n async listLibraries(): Promise<LibrarySummary[]> {\n return this.client.listLibraries.query();\n }\n\n async validateLibraryExists(library: string): Promise<void> {\n await this.client.validateLibraryExists.mutate({ library });\n }\n\n async findBestVersion(\n library: string,\n targetVersion?: string,\n ): Promise<FindVersionResult> {\n return this.client.findBestVersion.query({ library, targetVersion });\n }\n\n async searchStore(\n library: string,\n version: string | null | undefined,\n query: string,\n limit?: number,\n ): Promise<StoreSearchResult[]> {\n return this.client.search.query({ library, version: version ?? null, query, limit });\n }\n\n async removeAllDocuments(library: string, version?: string | null): Promise<void> {\n await this.client.removeAllDocuments.mutate({ library, version: version ?? null });\n }\n\n async getVersionsByStatus(statuses: VersionStatus[]): Promise<DbVersionWithLibrary[]> {\n return this.client.getVersionsByStatus.query({\n statuses: statuses as unknown as string[],\n });\n }\n\n async findVersionsBySourceUrl(url: string): Promise<DbVersionWithLibrary[]> {\n return this.client.findVersionsBySourceUrl.query({ url });\n }\n\n async getScraperOptions(versionId: number): Promise<StoredScraperOptions | null> {\n return this.client.getScraperOptions.query({ versionId });\n }\n\n async updateVersionStatus(\n versionId: number,\n status: VersionStatus,\n errorMessage?: string,\n ): Promise<void> {\n await this.client.updateVersionStatus.mutate({ versionId, status, errorMessage });\n }\n\n async updateVersionProgress(\n versionId: number,\n pages: number,\n maxPages: number,\n ): Promise<void> {\n await this.client.updateVersionProgress.mutate({ versionId, pages, maxPages });\n }\n\n async storeScraperOptions(versionId: number, options: ScraperOptions): Promise<void> {\n await this.client.storeScraperOptions.mutate({ versionId, options });\n }\n}\n"],"names":[],"mappings":";;AAkBO,MAAM,yBAAwD;AAAA,EAClD;AAAA,EACA;AAAA,EAEjB,YAAY,WAAmB;AAC7B,SAAK,UAAU,UAAU,QAAQ,OAAO,EAAE;AAC1C,SAAK,SAAS,sBAAkC;AAAA,MAC9C,OAAO,CAAC,cAAc,EAAE,KAAK,KAAK,QAAA,CAAS,CAAC;AAAA,IAAA,CAC7C;AACD,WAAO,MAAM,gDAAgD,KAAK,OAAO,EAAE;AAAA,EAC7E;AAAA,EAEA,MAAM,aAA4B;AAEhC,UACE,KAAK,OACL,KAAK,MAAA;AAAA,EACT;AAAA,EAEA,MAAM,WAA0B;AAAA,EAEhC;AAAA,EAEA,MAAM,gBAA2C;AAC/C,WAAO,KAAK,OAAO,cAAc,MAAA;AAAA,EACnC;AAAA,EAEA,MAAM,sBAAsB,SAAgC;AAC1D,UAAM,KAAK,OAAO,sBAAsB,OAAO,EAAE,SAAS;AAAA,EAC5D;AAAA,EAEA,MAAM,gBACJ,SACA,eAC4B;AAC5B,WAAO,KAAK,OAAO,gBAAgB,MAAM,EAAE,SAAS,eAAe;AAAA,EACrE;AAAA,EAEA,MAAM,YACJ,SACA,SACA,OACA,OAC8B;AAC9B,WAAO,KAAK,OAAO,OAAO,MAAM,EAAE,SAAS,SAAS,WAAW,MAAM,OAAO,MAAA,CAAO;AAAA,EACrF;AAAA,EAEA,MAAM,mBAAmB,SAAiB,SAAwC;AAChF,UAAM,KAAK,OAAO,mBAAmB,OAAO,EAAE,SAAS,SAAS,WAAW,MAAM;AAAA,EACnF;AAAA,EAEA,MAAM,oBAAoB,UAA4D;AACpF,WAAO,KAAK,OAAO,oBAAoB,MAAM;AAAA,MAC3C;AAAA,IAAA,CACD;AAAA,EACH;AAAA,EAEA,MAAM,wBAAwB,KAA8C;AAC1E,WAAO,KAAK,OAAO,wBAAwB,MAAM,EAAE,KAAK;AAAA,EAC1D;AAAA,EAEA,MAAM,kBAAkB,WAAyD;AAC/E,WAAO,KAAK,OAAO,kBAAkB,MAAM,EAAE,WAAW;AAAA,EAC1D;AAAA,EAEA,MAAM,oBACJ,WACA,QACA,cACe;AACf,UAAM,KAAK,OAAO,oBAAoB,OAAO,EAAE,WAAW,QAAQ,cAAc;AAAA,EAClF;AAAA,EAEA,MAAM,sBACJ,WACA,OACA,UACe;AACf,UAAM,KAAK,OAAO,sBAAsB,OAAO,EAAE,WAAW,OAAO,UAAU;AAAA,EAC/E;AAAA,EAEA,MAAM,oBAAoB,WAAmB,SAAwC;AACnF,UAAM,KAAK,OAAO,oBAAoB,OAAO,EAAE,WAAW,SAAS;AAAA,EACrE;AACF;"}