harper-knowledge 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +276 -0
  3. package/config.yaml +17 -0
  4. package/dist/core/embeddings.d.ts +29 -0
  5. package/dist/core/embeddings.js +199 -0
  6. package/dist/core/entries.d.ts +85 -0
  7. package/dist/core/entries.js +235 -0
  8. package/dist/core/history.d.ts +30 -0
  9. package/dist/core/history.js +119 -0
  10. package/dist/core/search.d.ts +23 -0
  11. package/dist/core/search.js +306 -0
  12. package/dist/core/tags.d.ts +32 -0
  13. package/dist/core/tags.js +76 -0
  14. package/dist/core/triage.d.ts +55 -0
  15. package/dist/core/triage.js +126 -0
  16. package/dist/http-utils.d.ts +37 -0
  17. package/dist/http-utils.js +132 -0
  18. package/dist/index.d.ts +21 -0
  19. package/dist/index.js +76 -0
  20. package/dist/mcp/server.d.ts +24 -0
  21. package/dist/mcp/server.js +124 -0
  22. package/dist/mcp/tools.d.ts +13 -0
  23. package/dist/mcp/tools.js +497 -0
  24. package/dist/oauth/authorize.d.ts +27 -0
  25. package/dist/oauth/authorize.js +438 -0
  26. package/dist/oauth/github.d.ts +28 -0
  27. package/dist/oauth/github.js +62 -0
  28. package/dist/oauth/keys.d.ts +33 -0
  29. package/dist/oauth/keys.js +100 -0
  30. package/dist/oauth/metadata.d.ts +21 -0
  31. package/dist/oauth/metadata.js +55 -0
  32. package/dist/oauth/middleware.d.ts +22 -0
  33. package/dist/oauth/middleware.js +64 -0
  34. package/dist/oauth/register.d.ts +14 -0
  35. package/dist/oauth/register.js +83 -0
  36. package/dist/oauth/token.d.ts +15 -0
  37. package/dist/oauth/token.js +178 -0
  38. package/dist/oauth/validate.d.ts +30 -0
  39. package/dist/oauth/validate.js +52 -0
  40. package/dist/resources/HistoryResource.d.ts +38 -0
  41. package/dist/resources/HistoryResource.js +38 -0
  42. package/dist/resources/KnowledgeEntryResource.d.ts +64 -0
  43. package/dist/resources/KnowledgeEntryResource.js +157 -0
  44. package/dist/resources/QueryLogResource.d.ts +20 -0
  45. package/dist/resources/QueryLogResource.js +57 -0
  46. package/dist/resources/ServiceKeyResource.d.ts +51 -0
  47. package/dist/resources/ServiceKeyResource.js +132 -0
  48. package/dist/resources/TagResource.d.ts +25 -0
  49. package/dist/resources/TagResource.js +32 -0
  50. package/dist/resources/TriageResource.d.ts +51 -0
  51. package/dist/resources/TriageResource.js +107 -0
  52. package/dist/types.d.ts +317 -0
  53. package/dist/types.js +7 -0
  54. package/dist/webhooks/datadog.d.ts +26 -0
  55. package/dist/webhooks/datadog.js +120 -0
  56. package/dist/webhooks/github.d.ts +24 -0
  57. package/dist/webhooks/github.js +167 -0
  58. package/dist/webhooks/middleware.d.ts +14 -0
  59. package/dist/webhooks/middleware.js +161 -0
  60. package/dist/webhooks/types.d.ts +17 -0
  61. package/dist/webhooks/types.js +4 -0
  62. package/package.json +72 -0
  63. package/schema/knowledge.graphql +134 -0
  64. package/web/index.html +735 -0
  65. package/web/js/app.js +461 -0
  66. package/web/js/detail.js +223 -0
  67. package/web/js/editor.js +303 -0
  68. package/web/js/search.js +238 -0
  69. package/web/js/triage.js +305 -0
@@ -0,0 +1,497 @@
1
+ /**
2
+ * MCP Tool Registration
3
+ *
4
+ * Defines and registers all 6 MCP tools with the McpServer instance.
5
+ * Each tool wraps a core function from src/core/ and returns JSON-formatted results.
6
+ */
7
+ import * as z from "zod/v4";
8
+ import { search } from "../core/search.js";
9
+ import { createEntry, getEntry, updateEntry, stripEmbedding, } from "../core/entries.js";
10
+ import { listTags } from "../core/tags.js";
11
+ import { submitTriage } from "../core/triage.js";
12
+ import { generateEmbedding } from "../core/embeddings.js";
13
+ import { getHistory } from "../core/history.js";
14
+ /**
15
+ * Format a result as MCP tool content (JSON text block).
16
+ */
17
+ function jsonContent(data) {
18
+ return {
19
+ content: [{ type: "text", text: JSON.stringify(data, null, 2) }],
20
+ };
21
+ }
22
+ /**
23
+ * Format an error as MCP tool content with isError flag.
24
+ */
25
+ function errorContent(message) {
26
+ return {
27
+ content: [{ type: "text", text: message }],
28
+ isError: true,
29
+ };
30
+ }
31
+ /**
32
+ * Register all knowledge base MCP tools on the given server.
33
+ * The caller determines scope access — write tools require mcp:write.
34
+ */
35
+ export function registerTools(server, caller) {
36
+ // =========================================================================
37
+ // 1. knowledge_search — Search the knowledge base
38
+ // =========================================================================
39
+ server.registerTool("knowledge_search", {
40
+ description: "Search the Harper knowledge base using keyword, semantic, or hybrid search. " +
41
+ "Returns scored results sorted by relevance. Provide optional environment " +
42
+ "context to boost results matching your setup.",
43
+ inputSchema: {
44
+ query: z.string().describe("Search query string"),
45
+ tags: z.array(z.string()).optional().describe("Filter results by tags"),
46
+ limit: z
47
+ .number()
48
+ .int()
49
+ .min(1)
50
+ .max(50)
51
+ .optional()
52
+ .describe("Maximum number of results (default 10)"),
53
+ context: z
54
+ .object({
55
+ harper: z
56
+ .string()
57
+ .optional()
58
+ .describe('Harper version (e.g., "4.6.0" or ">=4.6.0")'),
59
+ storageEngine: z
60
+ .string()
61
+ .optional()
62
+ .describe('Storage engine (e.g., "lmdb", "rocksdb")'),
63
+ node: z
64
+ .string()
65
+ .optional()
66
+ .describe('Node.js version (e.g., "22.0.0")'),
67
+ platform: z
68
+ .string()
69
+ .optional()
70
+ .describe('Platform (e.g., "linux", "darwin", "win32")'),
71
+ })
72
+ .optional()
73
+ .describe("Caller's environment context for applicability filtering"),
74
+ },
75
+ }, async ({ query, tags, limit, context }) => {
76
+ try {
77
+ const results = await search({ query, tags, limit, context });
78
+ const cleaned = results.map(stripEmbedding);
79
+ return jsonContent({
80
+ resultCount: cleaned.length,
81
+ results: cleaned,
82
+ });
83
+ }
84
+ catch (error) {
85
+ return errorContent("Search failed. Please try again.");
86
+ }
87
+ });
88
+ // =========================================================================
89
+ // 2. knowledge_add — Add a new knowledge entry
90
+ // =========================================================================
91
+ server.registerTool("knowledge_add", {
92
+ description: "Add a new entry to the Harper knowledge base. Entries added via MCP " +
93
+ 'are automatically tagged with confidence "ai-generated". An embedding ' +
94
+ "is generated from the title and content for semantic search.",
95
+ inputSchema: {
96
+ title: z
97
+ .string()
98
+ .max(500)
99
+ .describe("Entry title — concise summary of the knowledge"),
100
+ content: z
101
+ .string()
102
+ .max(100_000)
103
+ .describe("Full content of the knowledge entry (Markdown supported)"),
104
+ tags: z
105
+ .array(z.string().max(100))
106
+ .max(50)
107
+ .describe('Tags for categorization (e.g., ["plugins", "config"])'),
108
+ source: z
109
+ .string()
110
+ .max(200)
111
+ .optional()
112
+ .describe('Source identifier (e.g., "github-issue", "docs", "slack")'),
113
+ sourceUrl: z
114
+ .string()
115
+ .max(2000)
116
+ .optional()
117
+ .describe("URL to the original source"),
118
+ appliesTo: z
119
+ .object({
120
+ harper: z
121
+ .string()
122
+ .optional()
123
+ .describe("Harper version or semver range"),
124
+ storageEngine: z
125
+ .string()
126
+ .optional()
127
+ .describe("Storage engine type"),
128
+ node: z
129
+ .string()
130
+ .optional()
131
+ .describe("Node.js version or semver range"),
132
+ platform: z.string().optional().describe("Platform identifier"),
133
+ })
134
+ .optional()
135
+ .describe("Applicability scope — what environments this entry applies to"),
136
+ },
137
+ }, async ({ title, content, tags, source, sourceUrl, appliesTo }) => {
138
+ if (!caller.scopes.includes("mcp:write")) {
139
+ return errorContent("Write access required. Authenticate with an authorized GitHub account to add entries.");
140
+ }
141
+ try {
142
+ const entry = await createEntry({
143
+ title,
144
+ content,
145
+ tags,
146
+ source,
147
+ sourceUrl,
148
+ appliesTo,
149
+ confidence: "ai-generated", // MCP callers are AI agents
150
+ });
151
+ return jsonContent({
152
+ message: "Knowledge entry created successfully",
153
+ entry: stripEmbedding(entry),
154
+ });
155
+ }
156
+ catch (error) {
157
+ return errorContent("Failed to create entry. Please try again.");
158
+ }
159
+ });
160
+ // =========================================================================
161
+ // 3. knowledge_get — Get a knowledge entry by ID
162
+ // =========================================================================
163
+ server.registerTool("knowledge_get", {
164
+ description: "Get a single knowledge entry by ID. If the entry has relationships " +
165
+ "(supersedes, superseded by, siblings, related), the linked entries " +
166
+ "are also fetched and included in the response.",
167
+ inputSchema: {
168
+ id: z.string().describe("The knowledge entry ID"),
169
+ },
170
+ }, async ({ id }) => {
171
+ try {
172
+ const entry = await getEntry(id);
173
+ if (!entry) {
174
+ return errorContent(`Knowledge entry not found: ${id}`);
175
+ }
176
+ const result = {
177
+ entry: stripEmbedding(entry),
178
+ };
179
+ // Fetch related entries if relationships exist
180
+ const relationships = {};
181
+ if (entry.supersedesId) {
182
+ const supersedes = await getEntry(entry.supersedesId);
183
+ if (supersedes) {
184
+ relationships.supersedes = stripEmbedding(supersedes);
185
+ }
186
+ }
187
+ if (entry.supersededById) {
188
+ const supersededBy = await getEntry(entry.supersededById);
189
+ if (supersededBy) {
190
+ relationships.supersededBy = stripEmbedding(supersededBy);
191
+ }
192
+ }
193
+ if (entry.siblingIds && entry.siblingIds.length > 0) {
194
+ const siblings = [];
195
+ for (const siblingId of entry.siblingIds) {
196
+ const sibling = await getEntry(siblingId);
197
+ if (sibling) {
198
+ siblings.push(stripEmbedding(sibling));
199
+ }
200
+ }
201
+ if (siblings.length > 0) {
202
+ relationships.siblings = siblings;
203
+ }
204
+ }
205
+ if (entry.relatedIds && entry.relatedIds.length > 0) {
206
+ const related = [];
207
+ for (const relatedId of entry.relatedIds) {
208
+ const relatedEntry = await getEntry(relatedId);
209
+ if (relatedEntry) {
210
+ related.push(stripEmbedding(relatedEntry));
211
+ }
212
+ }
213
+ if (related.length > 0) {
214
+ relationships.related = related;
215
+ }
216
+ }
217
+ if (Object.keys(relationships).length > 0) {
218
+ result.relationships = relationships;
219
+ }
220
+ return jsonContent(result);
221
+ }
222
+ catch (error) {
223
+ return errorContent("Failed to get entry. Please try again.");
224
+ }
225
+ });
226
+ // =========================================================================
227
+ // 4. knowledge_related — Find entries related to a given entry
228
+ // =========================================================================
229
+ server.registerTool("knowledge_related", {
230
+ description: "Find knowledge entries related to a given entry. Combines explicit " +
231
+ "relationships (siblings, related, supersedes chain) with semantic " +
232
+ "similarity search using the entry's embedding.",
233
+ inputSchema: {
234
+ id: z
235
+ .string()
236
+ .describe("The knowledge entry ID to find related entries for"),
237
+ limit: z
238
+ .number()
239
+ .int()
240
+ .min(1)
241
+ .max(50)
242
+ .optional()
243
+ .describe("Maximum number of results (default 10)"),
244
+ },
245
+ }, async ({ id, limit }) => {
246
+ const maxResults = limit ?? 10;
247
+ try {
248
+ const entry = await getEntry(id);
249
+ if (!entry) {
250
+ return errorContent(`Knowledge entry not found: ${id}`);
251
+ }
252
+ const relatedMap = new Map();
253
+ // Gather explicit relationships
254
+ if (entry.supersedesId) {
255
+ const supersedes = await getEntry(entry.supersedesId);
256
+ if (supersedes) {
257
+ relatedMap.set(supersedes.id, {
258
+ entry: stripEmbedding(supersedes),
259
+ relationship: "supersedes",
260
+ });
261
+ }
262
+ }
263
+ if (entry.supersededById) {
264
+ const supersededBy = await getEntry(entry.supersededById);
265
+ if (supersededBy) {
266
+ relatedMap.set(supersededBy.id, {
267
+ entry: stripEmbedding(supersededBy),
268
+ relationship: "superseded_by",
269
+ });
270
+ }
271
+ }
272
+ if (entry.siblingIds) {
273
+ for (const siblingId of entry.siblingIds) {
274
+ if (!relatedMap.has(siblingId)) {
275
+ const sibling = await getEntry(siblingId);
276
+ if (sibling) {
277
+ relatedMap.set(sibling.id, {
278
+ entry: stripEmbedding(sibling),
279
+ relationship: "sibling",
280
+ });
281
+ }
282
+ }
283
+ }
284
+ }
285
+ if (entry.relatedIds) {
286
+ for (const relatedId of entry.relatedIds) {
287
+ if (!relatedMap.has(relatedId)) {
288
+ const relatedEntry = await getEntry(relatedId);
289
+ if (relatedEntry) {
290
+ relatedMap.set(relatedEntry.id, {
291
+ entry: stripEmbedding(relatedEntry),
292
+ relationship: "related",
293
+ });
294
+ }
295
+ }
296
+ }
297
+ }
298
+ // Semantic similarity search using entry content
299
+ let semanticResults = [];
300
+ try {
301
+ const queryText = `${entry.title}\n\n${entry.content}`;
302
+ const embedding = await generateEmbedding(queryText);
303
+ // Collect similar entries from HNSW search
304
+ const searchResults = [];
305
+ for await (const item of databases.kb.KnowledgeEntry.search({
306
+ sort: { attribute: "embedding", target: embedding },
307
+ limit: maxResults + 10, // Fetch extra to account for filtering
308
+ })) {
309
+ searchResults.push(item);
310
+ }
311
+ semanticResults = searchResults
312
+ .map((r) => r)
313
+ .filter((r) => r.id !== id && !r.deprecated);
314
+ }
315
+ catch {
316
+ // Embedding model may not be available — continue with explicit relationships only
317
+ }
318
+ // Merge semantic results (add those not already in explicit relationships)
319
+ for (const result of semanticResults) {
320
+ if (!relatedMap.has(result.id) && relatedMap.size < maxResults) {
321
+ relatedMap.set(result.id, {
322
+ entry: stripEmbedding(result),
323
+ relationship: "similar",
324
+ });
325
+ }
326
+ }
327
+ // Convert to array, limited to maxResults
328
+ const results = Array.from(relatedMap.values()).slice(0, maxResults);
329
+ return jsonContent({
330
+ entryId: id,
331
+ entryTitle: entry.title,
332
+ relatedCount: results.length,
333
+ related: results,
334
+ });
335
+ }
336
+ catch (error) {
337
+ return errorContent("Failed to find related entries. Please try again.");
338
+ }
339
+ });
340
+ // =========================================================================
341
+ // 5. knowledge_list_tags — List all knowledge tags
342
+ // =========================================================================
343
+ server.registerTool("knowledge_list_tags", {
344
+ description: "List all tags in the knowledge base with their entry counts. " +
345
+ "Useful for discovering available categories before searching.",
346
+ }, async () => {
347
+ try {
348
+ const tags = await listTags();
349
+ return jsonContent({
350
+ tagCount: tags.length,
351
+ tags,
352
+ });
353
+ }
354
+ catch (error) {
355
+ return errorContent("Failed to list tags. Please try again.");
356
+ }
357
+ });
358
+ // =========================================================================
359
+ // 6. knowledge_triage — Submit an item to the triage queue
360
+ // =========================================================================
361
+ server.registerTool("knowledge_triage", {
362
+ description: "Submit a new item to the knowledge triage queue for review. " +
363
+ "Use this when you encounter information that should potentially " +
364
+ "be added to the knowledge base but needs human review first.",
365
+ inputSchema: {
366
+ source: z
367
+ .string()
368
+ .describe('Source identifier (e.g., "claude-code", "github-issue", "slack")'),
369
+ summary: z
370
+ .string()
371
+ .describe("Brief summary of the knowledge to triage"),
372
+ payload: z
373
+ .record(z.string(), z.unknown())
374
+ .optional()
375
+ .describe("Additional payload data from the source"),
376
+ },
377
+ }, async ({ source, summary, payload }) => {
378
+ if (!caller.scopes.includes("mcp:write")) {
379
+ return errorContent("Write access required. Authenticate with an authorized GitHub account to submit triage items.");
380
+ }
381
+ try {
382
+ const item = await submitTriage(source, summary, payload);
383
+ return jsonContent({
384
+ message: "Triage item submitted successfully",
385
+ item,
386
+ });
387
+ }
388
+ catch (error) {
389
+ return errorContent("Failed to submit triage item. Please try again.");
390
+ }
391
+ });
392
+ // =========================================================================
393
+ // 7. knowledge_update — Update an existing knowledge entry
394
+ // =========================================================================
395
+ server.registerTool("knowledge_update", {
396
+ description: "Update an existing knowledge entry. Only provide fields you want to change. " +
397
+ "Edits are tracked in the history log with who made the change and why.",
398
+ inputSchema: {
399
+ id: z.string().describe("The knowledge entry ID to update"),
400
+ title: z.string().max(500).optional().describe("Updated title"),
401
+ content: z
402
+ .string()
403
+ .max(100_000)
404
+ .optional()
405
+ .describe("Updated content (Markdown supported)"),
406
+ tags: z
407
+ .array(z.string().max(100))
408
+ .max(50)
409
+ .optional()
410
+ .describe("Updated tags"),
411
+ source: z
412
+ .string()
413
+ .max(200)
414
+ .optional()
415
+ .describe("Updated source identifier"),
416
+ sourceUrl: z
417
+ .string()
418
+ .max(2000)
419
+ .optional()
420
+ .describe("Updated source URL"),
421
+ confidence: z
422
+ .enum(["ai-generated", "reviewed", "verified"])
423
+ .optional()
424
+ .describe("Updated confidence level"),
425
+ appliesTo: z
426
+ .object({
427
+ harper: z.string().optional(),
428
+ storageEngine: z.string().optional(),
429
+ node: z.string().optional(),
430
+ platform: z.string().optional(),
431
+ })
432
+ .optional()
433
+ .describe("Updated applicability scope"),
434
+ deprecated: z.boolean().optional().describe("Mark as deprecated"),
435
+ editSummary: z
436
+ .string()
437
+ .max(1000)
438
+ .optional()
439
+ .describe("Brief description of what changed and why (for the edit log)"),
440
+ },
441
+ }, async ({ id, editSummary, ...updates }) => {
442
+ if (!caller.scopes.includes("mcp:write")) {
443
+ return errorContent("Write access required. Authenticate with an authorized GitHub account to update entries.");
444
+ }
445
+ // MCP callers are AI agents — prevent confidence escalation
446
+ if (updates.confidence && updates.confidence !== "ai-generated") {
447
+ delete updates.confidence;
448
+ }
449
+ try {
450
+ const entry = await updateEntry(id, updates, {
451
+ editedBy: caller.userId,
452
+ editSummary,
453
+ });
454
+ return jsonContent({
455
+ message: "Knowledge entry updated successfully",
456
+ entry: stripEmbedding(entry),
457
+ });
458
+ }
459
+ catch (error) {
460
+ return errorContent("Failed to update entry. Please try again.");
461
+ }
462
+ });
463
+ // =========================================================================
464
+ // 8. knowledge_history — Get edit history for an entry
465
+ // =========================================================================
466
+ server.registerTool("knowledge_history", {
467
+ description: "Get the edit history for a knowledge entry. Shows who changed what, " +
468
+ "when, and why — with snapshots of previous values for each changed field.",
469
+ inputSchema: {
470
+ id: z.string().describe("The knowledge entry ID to get history for"),
471
+ limit: z
472
+ .number()
473
+ .int()
474
+ .min(1)
475
+ .max(100)
476
+ .optional()
477
+ .describe("Maximum number of edits to return (default 50)"),
478
+ },
479
+ }, async ({ id, limit }) => {
480
+ try {
481
+ const entry = await getEntry(id);
482
+ if (!entry) {
483
+ return errorContent(`Knowledge entry not found: ${id}`);
484
+ }
485
+ const edits = await getHistory(id, limit);
486
+ return jsonContent({
487
+ entryId: id,
488
+ entryTitle: entry.title,
489
+ editCount: edits.length,
490
+ edits,
491
+ });
492
+ }
493
+ catch (error) {
494
+ return errorContent("Failed to get edit history. Please try again.");
495
+ }
496
+ });
497
+ }
@@ -0,0 +1,27 @@
1
+ /**
2
+ * OAuth Authorization Endpoint
3
+ *
4
+ * GET /oauth/authorize — MCP OAuth 2.1 authorization endpoint.
5
+ *
6
+ * Shows a login page with GitHub as the primary auth method and a
7
+ * subtle link to fall back to Harper credentials. If the user has an
8
+ * active session (from a prior GitHub login), issues an auth code
9
+ * immediately.
10
+ *
11
+ * Org membership is checked against the ALLOWED_GITHUB_ORGS env var
12
+ * for GitHub logins. Harper credential logins bypass org checks.
13
+ */
14
+ import type { HarperRequest } from "../types.ts";
15
+ /**
16
+ * Handle GET /oauth/authorize
17
+ *
18
+ * Three modes:
19
+ * 1. Returning from GitHub login (`pending` param) — complete authorization.
20
+ * 2. User already has a session — issue auth code directly.
21
+ * 3. First visit — show login page with GitHub button + Harper credentials.
22
+ */
23
+ export declare function handleAuthorizeGet(request: HarperRequest): Promise<Response>;
24
+ /**
25
+ * Handle POST /oauth/authorize — Harper credential login.
26
+ */
27
+ export declare function handleAuthorizePost(request: HarperRequest): Promise<Response>;