@arabold/docs-mcp-server 1.26.0 → 1.26.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,124 @@
1
+ -- Migration: Add pages table to normalize page-level metadata and support Etag tracking
2
+ -- This migration introduces a pages table to store page-level metadata once per URL
3
+ -- and links document chunks to their parent pages via page_id foreign key
4
+
5
+ -- 1. Create pages table to store unique page-level metadata
6
+ CREATE TABLE IF NOT EXISTS pages (
7
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
8
+ version_id INTEGER NOT NULL REFERENCES versions(id),
9
+ url TEXT NOT NULL,
10
+ title TEXT,
11
+ etag TEXT,
12
+ last_modified TEXT,
13
+ content_type TEXT,
14
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
15
+ updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
16
+ UNIQUE(version_id, url)
17
+ );
18
+
19
+ -- 2. Add indexes for efficient querying
20
+ CREATE INDEX IF NOT EXISTS idx_pages_version_id ON pages(version_id);
21
+ CREATE INDEX IF NOT EXISTS idx_pages_url ON pages(url);
22
+ CREATE INDEX IF NOT EXISTS idx_pages_etag ON pages(etag);
23
+
24
+ -- 3. Create new documents table with page_id foreign key
25
+ CREATE TABLE documents_new (
26
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
27
+ page_id INTEGER NOT NULL REFERENCES pages(id),
28
+ content TEXT,
29
+ metadata JSON, -- Now contains only chunk-specific metadata (level, path)
30
+ sort_order INTEGER NOT NULL,
31
+ embedding BLOB, -- Store embeddings directly in documents table
32
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP
33
+ );
34
+
35
+ -- 4. Create indexes for the new documents table
36
+ CREATE INDEX IF NOT EXISTS idx_documents_page_id ON documents_new(page_id);
37
+ CREATE INDEX IF NOT EXISTS idx_documents_sort_order ON documents_new(page_id, sort_order);
38
+
39
+ -- 5. Migrate data from old documents table to new structure
40
+ -- First, populate pages table with unique page data from existing documents
41
+ -- Group by version_id and url to ensure uniqueness, using MAX() to handle any duplicates
42
+ INSERT INTO pages (version_id, url, title, created_at, updated_at)
43
+ SELECT
44
+ version_id,
45
+ url,
46
+ MAX(json_extract(metadata, '$.title')) as title,
47
+ MAX(COALESCE(indexed_at, CURRENT_TIMESTAMP)) as created_at,
48
+ MAX(COALESCE(indexed_at, CURRENT_TIMESTAMP)) as updated_at
49
+ FROM documents
50
+ GROUP BY version_id, url;
51
+
52
+ -- 6. Migrate document chunks to new table structure
53
+ -- Preserve all existing metadata except page-level fields (url, title, library, version)
54
+ -- that are now stored in pages and versions tables
55
+ INSERT INTO documents_new (id, page_id, content, metadata, sort_order, created_at)
56
+ SELECT
57
+ d.id,
58
+ p.id as page_id,
59
+ d.content,
60
+ json_remove(
61
+ json_remove(
62
+ json_remove(
63
+ json_remove(d.metadata, '$.url'),
64
+ '$.title'
65
+ ),
66
+ '$.library'
67
+ ),
68
+ '$.version'
69
+ ) as metadata,
70
+ d.sort_order,
71
+ COALESCE(d.indexed_at, CURRENT_TIMESTAMP)
72
+ FROM documents d
73
+ JOIN pages p ON d.version_id = p.version_id AND d.url = p.url;
74
+
75
+ -- 7. Drop the old documents table
76
+ DROP TABLE documents;
77
+
78
+ -- 8. Rename the new table to documents
79
+ ALTER TABLE documents_new RENAME TO documents;
80
+
81
+ -- 9. Recreate FTS5 virtual table to work with new structure
82
+ -- Drop existing FTS table and triggers
83
+ DROP TRIGGER IF EXISTS documents_fts_after_delete;
84
+ DROP TRIGGER IF EXISTS documents_fts_after_update;
85
+ DROP TRIGGER IF EXISTS documents_fts_after_insert;
86
+ DROP TABLE IF EXISTS documents_fts;
87
+
88
+ -- Create new FTS table
89
+ CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
90
+ content,
91
+ title,
92
+ url,
93
+ path,
94
+ tokenize='porter unicode61'
95
+ );
96
+
97
+ -- 10. Create new FTS triggers that join with pages table
98
+ CREATE TRIGGER IF NOT EXISTS documents_fts_after_delete AFTER DELETE ON documents BEGIN
99
+ DELETE FROM documents_fts WHERE rowid = old.id;
100
+ END;
101
+
102
+ CREATE TRIGGER IF NOT EXISTS documents_fts_after_update AFTER UPDATE ON documents BEGIN
103
+ DELETE FROM documents_fts WHERE rowid = old.id;
104
+ INSERT INTO documents_fts(rowid, content, title, url, path)
105
+ SELECT new.id, new.content, p.title, p.url, json_extract(new.metadata, '$.path')
106
+ FROM pages p WHERE p.id = new.page_id;
107
+ END;
108
+
109
+ CREATE TRIGGER IF NOT EXISTS documents_fts_after_insert AFTER INSERT ON documents BEGIN
110
+ INSERT INTO documents_fts(rowid, content, title, url, path)
111
+ SELECT new.id, new.content, p.title, p.url, json_extract(new.metadata, '$.path')
112
+ FROM pages p WHERE p.id = new.page_id;
113
+ END;
114
+
115
+ -- 11. Create trigger to update pages.updated_at when page title changes
116
+ CREATE TRIGGER IF NOT EXISTS pages_updated_at_trigger AFTER UPDATE ON pages BEGIN
117
+ UPDATE pages SET updated_at = CURRENT_TIMESTAMP WHERE id = new.id;
118
+ END;
119
+
120
+ -- 12. Rebuild FTS index from migrated data
121
+ INSERT INTO documents_fts(rowid, content, title, url, path)
122
+ SELECT d.id, d.content, p.title, p.url, json_extract(d.metadata, '$.path')
123
+ FROM documents d
124
+ JOIN pages p ON d.page_id = p.id;