@pella-labs/pinakes 0.1.10 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -20
- package/dist/cli/audit.d.ts +5 -3
- package/dist/cli/audit.d.ts.map +1 -1
- package/dist/cli/audit.js +3 -24
- package/dist/cli/audit.js.map +1 -1
- package/dist/cli/contradiction-cli.d.ts +12 -0
- package/dist/cli/contradiction-cli.d.ts.map +1 -0
- package/dist/cli/contradiction-cli.js +35 -0
- package/dist/cli/contradiction-cli.js.map +1 -0
- package/dist/cli/contradiction.d.ts +59 -0
- package/dist/cli/contradiction.d.ts.map +1 -0
- package/dist/cli/contradiction.js +172 -0
- package/dist/cli/contradiction.js.map +1 -0
- package/dist/cli/export.d.ts +1 -1
- package/dist/cli/export.d.ts.map +1 -1
- package/dist/cli/export.js +7 -28
- package/dist/cli/export.js.map +1 -1
- package/dist/cli/import.d.ts +2 -2
- package/dist/cli/import.d.ts.map +1 -1
- package/dist/cli/import.js +5 -26
- package/dist/cli/import.js.map +1 -1
- package/dist/cli/index.js +39 -11
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/purge.d.ts +2 -2
- package/dist/cli/purge.d.ts.map +1 -1
- package/dist/cli/purge.js +2 -23
- package/dist/cli/purge.js.map +1 -1
- package/dist/cli/rebuild.d.ts +8 -6
- package/dist/cli/rebuild.d.ts.map +1 -1
- package/dist/cli/rebuild.js +9 -31
- package/dist/cli/rebuild.js.map +1 -1
- package/dist/cli/serve.d.ts +9 -7
- package/dist/cli/serve.d.ts.map +1 -1
- package/dist/cli/serve.js +71 -41
- package/dist/cli/serve.js.map +1 -1
- package/dist/cli/status.d.ts +7 -5
- package/dist/cli/status.d.ts.map +1 -1
- package/dist/cli/status.js +10 -29
- package/dist/cli/status.js.map +1 -1
- package/dist/db/client.d.ts +4 -4
- package/dist/db/client.js +3 -3
- package/dist/db/client.js.map +1 -1
- package/dist/db/migrations/migrations/0002_rename_kg_to_pinakes.sql +49 -0
- package/dist/db/migrations/migrations/meta/0002_snapshot.json +504 -0
- package/dist/db/migrations/migrations/meta/_journal.json +7 -0
- package/dist/db/repository.js +5 -5
- package/dist/db/repository.js.map +1 -1
- package/dist/db/schema.d.ts +84 -84
- package/dist/db/schema.d.ts.map +1 -1
- package/dist/db/schema.js +49 -49
- package/dist/db/schema.js.map +1 -1
- package/dist/db/types.d.ts +2 -2
- package/dist/db/types.js +1 -1
- package/dist/gaps/detector.d.ts +5 -5
- package/dist/gaps/detector.js +15 -15
- package/dist/gaps/detector.js.map +1 -1
- package/dist/ingest/chokidar.js +1 -1
- package/dist/ingest/ingester.d.ts +9 -4
- package/dist/ingest/ingester.d.ts.map +1 -1
- package/dist/ingest/ingester.js +82 -17
- package/dist/ingest/ingester.js.map +1 -1
- package/dist/ingest/manifest.d.ts +5 -3
- package/dist/ingest/manifest.d.ts.map +1 -1
- package/dist/ingest/manifest.js +23 -14
- package/dist/ingest/manifest.js.map +1 -1
- package/dist/ingest/parse/chunk.js +1 -1
- package/dist/ingest/parse/markdown.d.ts +2 -2
- package/dist/ingest/queue.d.ts +1 -1
- package/dist/ingest/queue.js +1 -1
- package/dist/ingest/source.d.ts +2 -2
- package/dist/ingest/source.d.ts.map +1 -1
- package/dist/ingest/source.js +1 -1
- package/dist/llm/provider.d.ts +70 -0
- package/dist/llm/provider.d.ts.map +1 -0
- package/dist/llm/provider.js +235 -0
- package/dist/llm/provider.js.map +1 -0
- package/dist/mcp/envelope.d.ts +1 -1
- package/dist/mcp/tools/execute.d.ts +7 -7
- package/dist/mcp/tools/execute.d.ts.map +1 -1
- package/dist/mcp/tools/execute.js +41 -59
- package/dist/mcp/tools/execute.js.map +1 -1
- package/dist/mcp/tools/search.d.ts +13 -8
- package/dist/mcp/tools/search.d.ts.map +1 -1
- package/dist/mcp/tools/search.js +53 -28
- package/dist/mcp/tools/search.js.map +1 -1
- package/dist/observability/audit.d.ts +2 -2
- package/dist/observability/audit.js +3 -3
- package/dist/observability/audit.js.map +1 -1
- package/dist/observability/logger.js +4 -4
- package/dist/observability/logger.js.map +1 -1
- package/dist/observability/metrics.js +1 -1
- package/dist/observability/metrics.js.map +1 -1
- package/dist/paths.d.ts +61 -0
- package/dist/paths.d.ts.map +1 -0
- package/dist/paths.js +106 -0
- package/dist/paths.js.map +1 -0
- package/dist/retrieval/dedup.d.ts +25 -0
- package/dist/retrieval/dedup.d.ts.map +1 -0
- package/dist/retrieval/dedup.js +97 -0
- package/dist/retrieval/dedup.js.map +1 -0
- package/dist/retrieval/embedder.d.ts +9 -9
- package/dist/retrieval/embedder.d.ts.map +1 -1
- package/dist/retrieval/embedder.js +17 -17
- package/dist/retrieval/embedder.js.map +1 -1
- package/dist/retrieval/expand.d.ts +18 -0
- package/dist/retrieval/expand.d.ts.map +1 -0
- package/dist/retrieval/expand.js +89 -0
- package/dist/retrieval/expand.js.map +1 -0
- package/dist/retrieval/fts.d.ts +3 -3
- package/dist/retrieval/fts.js +8 -8
- package/dist/retrieval/graph.d.ts +47 -0
- package/dist/retrieval/graph.d.ts.map +1 -0
- package/dist/retrieval/graph.js +144 -0
- package/dist/retrieval/graph.js.map +1 -0
- package/dist/retrieval/hybrid.d.ts +10 -0
- package/dist/retrieval/hybrid.d.ts.map +1 -1
- package/dist/retrieval/hybrid.js +57 -2
- package/dist/retrieval/hybrid.js.map +1 -1
- package/dist/retrieval/vec.d.ts +2 -2
- package/dist/retrieval/vec.js +3 -3
- package/dist/sandbox/bindings/install.d.ts +5 -5
- package/dist/sandbox/bindings/install.d.ts.map +1 -1
- package/dist/sandbox/bindings/install.js +3 -3
- package/dist/sandbox/bindings/install.js.map +1 -1
- package/dist/sandbox/bindings/kg.d.ts.map +1 -1
- package/dist/sandbox/bindings/kg.js +10 -0
- package/dist/sandbox/bindings/kg.js.map +1 -1
- package/dist/sandbox/bindings/pinakes.d.ts +29 -0
- package/dist/sandbox/bindings/pinakes.d.ts.map +1 -0
- package/dist/sandbox/bindings/pinakes.js +333 -0
- package/dist/sandbox/bindings/pinakes.js.map +1 -0
- package/dist/sandbox/bindings/write.d.ts +1 -1
- package/dist/sandbox/bindings/write.d.ts.map +1 -1
- package/dist/sandbox/bindings/write.js +9 -9
- package/dist/sandbox/bindings/write.js.map +1 -1
- package/dist/sandbox/executor.d.ts +3 -3
- package/dist/sandbox/executor.js +6 -6
- package/dist/sandbox/executor.js.map +1 -1
- package/dist/sandbox/helpers.d.ts +1 -1
- package/dist/sandbox/helpers.d.ts.map +1 -1
- package/dist/sandbox/helpers.js +2 -2
- package/dist/sandbox/vendored-codemode.d.ts +1 -1
- package/dist/sandbox/vendored-codemode.js +1 -1
- package/dist/server.js +15 -6
- package/dist/server.js.map +1 -1
- package/package.json +22 -27
package/dist/mcp/tools/search.js
CHANGED
|
@@ -1,21 +1,23 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
import { hybridSearch } from '../../retrieval/hybrid.js';
|
|
2
|
+
import { hybridSearch, rrfFuseMulti } from '../../retrieval/hybrid.js';
|
|
3
|
+
import { dedupResults } from '../../retrieval/dedup.js';
|
|
4
|
+
import { expandQuery } from '../../retrieval/expand.js';
|
|
3
5
|
import { fitResults, countEnvelopeTokens } from '../../gate/budget.js';
|
|
4
6
|
import { buildEnvelope, QueryTimer } from '../envelope.js';
|
|
5
7
|
import { nextReader } from '../../db/client.js';
|
|
6
8
|
/**
|
|
7
|
-
* `
|
|
9
|
+
* `search` — fast-path hybrid search against the project knowledge base.
|
|
8
10
|
*
|
|
9
11
|
* Phase 5: supports all three scopes. For `scope='both'`, results from
|
|
10
|
-
* both
|
|
12
|
+
* both knowledge bases are merged and tagged with `source_scope`.
|
|
11
13
|
*/
|
|
12
|
-
export const
|
|
14
|
+
export const searchInputShape = {
|
|
13
15
|
query: z
|
|
14
16
|
.string()
|
|
15
17
|
.min(1)
|
|
16
|
-
.describe('
|
|
17
|
-
'
|
|
18
|
-
'Examples: "
|
|
18
|
+
.describe('Natural language or keyword query. Semantic search finds relevant ' +
|
|
19
|
+
'knowledge even with different wording. ' +
|
|
20
|
+
'Examples: "how does auth work", "database design decisions", "error handling conventions".'),
|
|
19
21
|
max_tokens: z
|
|
20
22
|
.number()
|
|
21
23
|
.int()
|
|
@@ -29,34 +31,43 @@ export const kgSearchInputShape = {
|
|
|
29
31
|
scope: z
|
|
30
32
|
.enum(['project', 'personal', 'both'])
|
|
31
33
|
.optional()
|
|
32
|
-
.describe('Which
|
|
33
|
-
'"personal"
|
|
34
|
-
|
|
34
|
+
.describe('Which knowledge base to query. "project" (default) = this project\'s knowledge, ' +
|
|
35
|
+
'"personal" = your cross-project notes, "both" = merged with source tagging.'),
|
|
36
|
+
expand: z
|
|
37
|
+
.boolean()
|
|
38
|
+
.optional()
|
|
39
|
+
.describe('Set to true to use an LLM for multi-query expansion. Generates 2 ' +
|
|
40
|
+
'alternative phrasings and merges results via RRF for better recall. ' +
|
|
41
|
+
'Requires an LLM provider (Ollama, API key, or Claude/Codex CLI). ' +
|
|
42
|
+
'Non-fatal: falls back to the original query if unavailable.'),
|
|
35
43
|
};
|
|
36
|
-
export const
|
|
37
|
-
title: 'Search
|
|
38
|
-
description: '
|
|
39
|
-
'
|
|
40
|
-
'
|
|
41
|
-
'
|
|
42
|
-
'
|
|
43
|
-
'
|
|
44
|
-
|
|
44
|
+
export const searchToolConfig = {
|
|
45
|
+
title: 'Search project knowledge base',
|
|
46
|
+
description: 'START HERE when you need to understand the project — architecture, ' +
|
|
47
|
+
'conventions, decisions, requirements, data models, deployment, or how ' +
|
|
48
|
+
'subsystems relate. Returns distilled knowledge at a fraction of the tokens ' +
|
|
49
|
+
'vs. reading raw source files. Semantic hybrid search (FTS5 + vector) finds ' +
|
|
50
|
+
'relevant context even when you don\'t know exact terms. Results ranked by ' +
|
|
51
|
+
'relevance with `title` and `section_path` for quick triage. ' +
|
|
52
|
+
'Use `execute` for advanced queries: chaining filters, browsing the full ' +
|
|
53
|
+
'index, writing new knowledge, or checking knowledge gaps.',
|
|
54
|
+
inputSchema: searchInputShape,
|
|
45
55
|
};
|
|
46
56
|
/**
|
|
47
|
-
* Build the `
|
|
57
|
+
* Build the `search` handler. Supports project, personal, and both scopes.
|
|
48
58
|
*/
|
|
49
|
-
export function
|
|
59
|
+
export function makeSearchHandler(deps) {
|
|
50
60
|
return async (args) => {
|
|
51
61
|
const timer = new QueryTimer();
|
|
52
62
|
const maxTokens = args.max_tokens ?? 5000;
|
|
53
63
|
const scope = args.scope ?? 'project';
|
|
64
|
+
const shouldExpand = args.expand === true;
|
|
54
65
|
// Check personal scope availability
|
|
55
66
|
if ((scope === 'personal' || scope === 'both') && !deps.personalBundle) {
|
|
56
67
|
const envelope = buildEnvelope({
|
|
57
68
|
result: {
|
|
58
|
-
error: 'personal scope requested but no personal
|
|
59
|
-
'
|
|
69
|
+
error: 'personal scope requested but no personal wiki is configured — ' +
|
|
70
|
+
'create a personal wiki at ~/.pinakes/wiki/ or pass --profile-path',
|
|
60
71
|
},
|
|
61
72
|
tokensBudgeted: maxTokens,
|
|
62
73
|
tokensUsed: 0,
|
|
@@ -66,11 +77,22 @@ export function makeKgSearchHandler(deps) {
|
|
|
66
77
|
});
|
|
67
78
|
return wrapText(envelope);
|
|
68
79
|
}
|
|
69
|
-
|
|
80
|
+
// Determine query variants: original + optional expansions
|
|
81
|
+
const queries = [args.query];
|
|
82
|
+
if (shouldExpand && deps.llmProvider) {
|
|
83
|
+
const expanded = await expandQuery(args.query, deps.llmProvider);
|
|
84
|
+
queries.push(...expanded.alternatives);
|
|
85
|
+
}
|
|
86
|
+
const allHits = [];
|
|
70
87
|
if (scope === 'project' || scope === 'both') {
|
|
71
88
|
const reader = nextReader(deps.bundle);
|
|
72
|
-
|
|
73
|
-
const
|
|
89
|
+
// Run hybrid search for all query variants
|
|
90
|
+
const hitLists = await Promise.all(queries.map((q) => hybridSearch(reader, 'project', q, deps.embedder, { dedup: false })));
|
|
91
|
+
// If multiple queries, merge via multi-list RRF then dedup
|
|
92
|
+
const merged = hitLists.length > 1
|
|
93
|
+
? dedupResults(rrfFuseMulti(hitLists, 60, 40)).slice(0, 20)
|
|
94
|
+
: hitLists[0];
|
|
95
|
+
const tagged = merged.map((h) => ({
|
|
74
96
|
id: h.id, text: h.text, source_uri: h.source_uri, score: h.score,
|
|
75
97
|
confidence: h.confidence, title: h.title, section_path: h.section_path,
|
|
76
98
|
...(scope === 'both' ? { source_scope: 'project' } : {}),
|
|
@@ -79,8 +101,11 @@ export function makeKgSearchHandler(deps) {
|
|
|
79
101
|
}
|
|
80
102
|
if ((scope === 'personal' || scope === 'both') && deps.personalBundle) {
|
|
81
103
|
const reader = nextReader(deps.personalBundle);
|
|
82
|
-
const
|
|
83
|
-
const
|
|
104
|
+
const hitLists = await Promise.all(queries.map((q) => hybridSearch(reader, 'personal', q, deps.embedder, { dedup: false })));
|
|
105
|
+
const merged = hitLists.length > 1
|
|
106
|
+
? dedupResults(rrfFuseMulti(hitLists, 60, 40)).slice(0, 20)
|
|
107
|
+
: hitLists[0];
|
|
108
|
+
const tagged = merged.map((h) => ({
|
|
84
109
|
id: h.id, text: h.text, source_uri: h.source_uri, score: h.score,
|
|
85
110
|
confidence: h.confidence, title: h.title, section_path: h.section_path,
|
|
86
111
|
...(scope === 'both' ? { source_scope: 'personal' } : {}),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search.js","sourceRoot":"","sources":["../../../src/mcp/tools/search.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;
|
|
1
|
+
{"version":3,"file":"search.js","sourceRoot":"","sources":["../../../src/mcp/tools/search.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAMxB,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AACxD,OAAO,EAAE,UAAU,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,aAAa,EAAE,UAAU,EAAc,MAAM,gBAAgB,CAAC;AACvE,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAEhD;;;;;GAKG;AAEH,MAAM,CAAC,MAAM,gBAAgB,GAAG;IAC9B,KAAK,EAAE,CAAC;SACL,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,QAAQ,CACP,oEAAoE;QAClE,yCAAyC;QACzC,4FAA4F,CAC/F;IACH,UAAU,EAAE,CAAC;SACV,MAAM,EAAE;SACR,GAAG,EAAE;SACL,QAAQ,EAAE;SACV,GAAG,CAAC,MAAM,CAAC;SACX,QAAQ,EAAE;SACV,QAAQ,CACP,8EAA8E;QAC5E,uEAAuE;QACvE,iEAAiE;QACjE,0CAA0C,CAC7C;IACH,KAAK,EAAE,CAAC;SACL,IAAI,CAAC,CAAC,SAAS,EAAE,UAAU,EAAE,MAAM,CAAC,CAAC;SACrC,QAAQ,EAAE;SACV,QAAQ,CACP,kFAAkF;QAChF,6EAA6E,CAChF;IACH,MAAM,EAAE,CAAC;SACN,OAAO,EAAE;SACT,QAAQ,EAAE;SACV,QAAQ,CACP,mEAAmE;QACjE,sEAAsE;QACtE,mEAAmE;QACnE,6DAA6D,CAChE;CACJ,CAAC;AAEF,MAAM,CAAC,MAAM,gBAAgB,GAAG;IAC9B,KAAK,EAAE,+BAA+B;IACtC,WAAW,EACT,qEAAqE;QACrE,wEAAwE;QACxE,6EAA6E;QAC7E,6EAA6E;QAC7E,4EAA4E;QAC5E,8DAA8D;QAC9D,0EAA0E;QAC1E,2DAA2D;IAC7D,WAAW,EAAE,gBAAgB;CACrB,CAAC;AAqBX;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,IAAgB;IAChD,OAAO,KAAK,EAAE,IAKb,EAA6E,EAAE;QAC9E,MAAM,KAAK,GAAG,IAAI,UAAU,EAAE,CAAC;QAC/B,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC;QAC1C,MAAM,KAAK,GAAU,IAAI,CAAC,KAAK,IAAI,SAAS,CAAC;QAC7C,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,KAAK,IAAI,CAAC;QAE1C,oCAAoC;QACpC,IAAI,CAAC,KAAK,KAAK,UAAU,IAAI,KAAK,KAAK,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC;YACvE,MAAM,QAAQ,GAAG,aAAa,CAAC;gBAC7B,MAAM,EAAE;oBACN,KAAK,EACH,gEAAgE;wBAChE,mEAAmE;iBACtE;gBACD,cAAc,EAAE,SAAS;gBACzB,UAAU,EAAE,CAAC;gBACb,gBAAgB,EAAE,KAAK;gBACvB,KAAK;gBACL,WAAW,EAAE,KAAK,CAAC,GAAG,EAAE;aACzB,CAAC,CAAC;YACH,OAAO,QAAQ,CAAC,QAAQ,CAAC,CAAC;QAC5B,CAAC;QAED,2DAA2D;QAC3D,MAAM,OAAO,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC7B,IAAI,YAAY,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACrC,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;YACjE,OAAO,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC,YAAY,CAAC,CAAC;QACzC,CAAC;QAED,MAAM,OAAO,GAAmB,EAAE,CAAC;QAEnC,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,KAAK,MAAM,EAAE,CAAC;YAC5C,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvC,2CAA2C;YAC3C,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,GAAG,CAChC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,MAAM,EAAE,SAAS,EAAE,CAAC,EAAE,IAAI,CAAC,QAAQ,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC,CACxF,CAAC;YACF,2DAA2D;YAC3D,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC;gBAChC,CAAC,CAAC,YAAY,CAAC,YAAY,CAAC,QAAQ,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;gBAC3D,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;YAChB,MAAM,MAAM,GAAmB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAChD,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC,UAAU,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK;gBAChE,UAAU,EAAE,CAAC,CAAC,UAAU,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,YAAY,EAAE,CAAC,CAAC,YAAY;gBACtE,GAAG,CAAC,KAAK,KAAK,MAAM,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,SAAkB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aAClE,CAAC,CAAC,CAAC;YACJ,OAAO,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;QAC1B,CAAC;QAED,IAAI,CAAC,KAAK,KAAK,UAAU,IAAI,KAAK,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YACtE,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAC/C,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,GAAG,CAChC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,MAAM,EAAE,UAAU,EAAE,CAAC,EAAE,IAAI,CAAC,QAAQ,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC,CACzF,CAAC;YACF,MAAM,MAAM,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC;gBAChC,CAAC,CAAC,YAAY,CAAC,YAAY,CAAC,QAAQ,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;gBAC3D,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;YAChB,MAAM,MAAM,GAAmB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAChD,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC,UAAU,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK;gBAChE,UAAU,EAAE,CAAC,CAAC,UAAU,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,YAAY,EAAE,CAAC,CAAC,YAAY;gBACtE,GAAG,CAAC,KAAK,KAAK,MAAM,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,UAAmB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aACnE,CAAC,CAAC,CAAC;YACJ,OAAO,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;QAC1B,CAAC;QAED,+DAA+D;QAC/D,IAAI,KAAK,KAAK,MAAM,EAAE,CAAC;YACrB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAC5C,CAAC;QAED,MAAM,GAAG,GAAG,UAAU,CACpB,OAAO,EACP,SAAS,EACT,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,EACxB,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,EACX,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CACpB,CAAC;QAEF,MAAM,QAAQ,GAAG,aAAa,CAAC;YAC7B,MAAM,EAAE,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;gBAC5B,IAAI,WAAW,IAAI,IAAI;oBAAE,OAAO,IAAI,CAAC;gBACrC,OAAO,IAAI,CAAC;YACd,CAAC,CAAC;YACF,cAAc,EAAE,GAAG,CAAC,cAAc;YAClC,UAAU,EAAE,CAAC;YACb,gBAAgB,EAAE,GAAG,CAAC,SAAS;YAC/B,KAAK;YACL,WAAW,EAAE,KAAK,CAAC,GAAG,EAAE;SACzB,CAAC,CAAC;QAEH,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QACtC,QAAQ,CAAC,IAAI,CAAC,WAAW,GAAG,mBAAmB,CAAC,IAAI,CAAC,CAAC;QAEtD,OAAO,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC5B,CAAC,CAAC;AACJ,CAAC;AAED,SAAS,QAAQ,CAAC,QAAiB;IACjC,OAAO,EAAE,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,CAAC;AACzE,CAAC"}
|
|
@@ -3,8 +3,8 @@ import type { Database as BetterSqliteDatabase } from 'better-sqlite3';
|
|
|
3
3
|
* Audit log writer with scope-split JSONL mirror.
|
|
4
4
|
*
|
|
5
5
|
* Per CLAUDE.md §Security #7:
|
|
6
|
-
* - scope='project' →
|
|
7
|
-
* - scope='personal' or 'both' →
|
|
6
|
+
* - scope='project' → pinakes_audit table in ~/.pinakes/projects/<mangled>/pinakes.db + audit.jsonl
|
|
7
|
+
* - scope='personal' or 'both' → pinakes_audit table in ~/.pinakes/pinakes.db + ~/.pinakes/audit.jsonl
|
|
8
8
|
* - NEVER write personal query text to a path inside the project repo
|
|
9
9
|
*/
|
|
10
10
|
export interface AuditEntry {
|
|
@@ -10,15 +10,15 @@ import { logger } from './logger.js';
|
|
|
10
10
|
*/
|
|
11
11
|
export function writeAuditRow(writer, jsonlPath, entry) {
|
|
12
12
|
const ts = Date.now();
|
|
13
|
-
// Write to
|
|
13
|
+
// Write to pinakes_audit table
|
|
14
14
|
try {
|
|
15
15
|
writer
|
|
16
|
-
.prepare(`INSERT INTO
|
|
16
|
+
.prepare(`INSERT INTO pinakes_audit (ts, tool_name, scope_requested, caller_ctx, response_tokens, error)
|
|
17
17
|
VALUES (?, ?, ?, ?, ?, ?)`)
|
|
18
18
|
.run(ts, entry.toolName, entry.scopeRequested, entry.callerCtx ?? null, entry.responseTokens ?? null, entry.error ?? null);
|
|
19
19
|
}
|
|
20
20
|
catch (err) {
|
|
21
|
-
logger.warn({ err, entry }, 'failed to write
|
|
21
|
+
logger.warn({ err, entry }, 'failed to write pinakes_audit row');
|
|
22
22
|
}
|
|
23
23
|
// Mirror to JSONL
|
|
24
24
|
if (jsonlPath) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"audit.js","sourceRoot":"","sources":["../../src/observability/audit.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAChE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAGpC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAmBrC;;;;;;GAMG;AACH,MAAM,UAAU,aAAa,CAC3B,MAA4B,EAC5B,SAA6B,EAC7B,KAAiB;IAEjB,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEtB
|
|
1
|
+
{"version":3,"file":"audit.js","sourceRoot":"","sources":["../../src/observability/audit.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,SAAS,CAAC;AAChE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAGpC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAmBrC;;;;;;GAMG;AACH,MAAM,UAAU,aAAa,CAC3B,MAA4B,EAC5B,SAA6B,EAC7B,KAAiB;IAEjB,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEtB,+BAA+B;IAC/B,IAAI,CAAC;QACH,MAAM;aACH,OAAO,CACN;mCAC2B,CAC5B;aACA,GAAG,CACF,EAAE,EACF,KAAK,CAAC,QAAQ,EACd,KAAK,CAAC,cAAc,EACpB,KAAK,CAAC,SAAS,IAAI,IAAI,EACvB,KAAK,CAAC,cAAc,IAAI,IAAI,EAC5B,KAAK,CAAC,KAAK,IAAI,IAAI,CACpB,CAAC;IACN,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,mCAAmC,CAAC,CAAC;IACnE,CAAC;IAED,kBAAkB;IAClB,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC;YAC/B,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBACrB,SAAS,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YACtC,CAAC;YACD,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,EAAE,GAAG,KAAK,EAAE,CAAC,GAAG,IAAI,CAAC;YACrD,cAAc,CAAC,SAAS,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QAC3C,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,EAAE,8BAA8B,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;AACH,CAAC"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { pino } from 'pino';
|
|
2
2
|
/**
|
|
3
|
-
* Pino logger for
|
|
3
|
+
* Pino logger for Pinakes.
|
|
4
4
|
*
|
|
5
5
|
* CRITICAL: MCP stdio transport uses stdout for the JSON-RPC protocol. Every
|
|
6
6
|
* log line must go to stderr — writing to stdout would corrupt the protocol
|
|
@@ -9,20 +9,20 @@ import { pino } from 'pino';
|
|
|
9
9
|
* destination that would route to stdout.
|
|
10
10
|
*
|
|
11
11
|
* Pretty transport is only enabled when:
|
|
12
|
-
* 1. `
|
|
12
|
+
* 1. `PINAKES_LOG_LEVEL=debug` or `trace`, AND
|
|
13
13
|
* 2. stderr is a TTY (i.e. a developer is watching in a terminal)
|
|
14
14
|
*
|
|
15
15
|
* In production (Pharos-spawned stdio child, non-TTY), logs stay as newline-
|
|
16
16
|
* delimited JSON for machine consumption.
|
|
17
17
|
*/
|
|
18
|
-
const level = process.env.
|
|
18
|
+
const level = process.env.PINAKES_LOG_LEVEL ?? 'info';
|
|
19
19
|
const isVerbose = level === 'debug' || level === 'trace';
|
|
20
20
|
const isTty = process.stderr.isTTY ?? false;
|
|
21
21
|
const usePretty = isVerbose && isTty;
|
|
22
22
|
const baseOptions = {
|
|
23
23
|
level,
|
|
24
24
|
base: {
|
|
25
|
-
service: '
|
|
25
|
+
service: 'pinakes',
|
|
26
26
|
pid: process.pid,
|
|
27
27
|
},
|
|
28
28
|
timestamp: pino.stdTimeFunctions.isoTime,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"logger.js","sourceRoot":"","sources":["../../src/observability/logger.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAmC,MAAM,MAAM,CAAC;AAE7D;;;;;;;;;;;;;;;GAeG;AAEH,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,
|
|
1
|
+
{"version":3,"file":"logger.js","sourceRoot":"","sources":["../../src/observability/logger.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAmC,MAAM,MAAM,CAAC;AAE7D;;;;;;;;;;;;;;;GAeG;AAEH,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,MAAM,CAAC;AACtD,MAAM,SAAS,GAAG,KAAK,KAAK,OAAO,IAAI,KAAK,KAAK,OAAO,CAAC;AACzD,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,IAAI,KAAK,CAAC;AAC5C,MAAM,SAAS,GAAG,SAAS,IAAI,KAAK,CAAC;AAErC,MAAM,WAAW,GAAkB;IACjC,KAAK;IACL,IAAI,EAAE;QACJ,OAAO,EAAE,SAAS;QAClB,GAAG,EAAE,OAAO,CAAC,GAAG;KACjB;IACD,SAAS,EAAE,IAAI,CAAC,gBAAgB,CAAC,OAAO;CACzC,CAAC;AAEF,SAAS,YAAY;IACnB,IAAI,SAAS,EAAE,CAAC;QACd,2EAA2E;QAC3E,yCAAyC;QACzC,OAAO,IAAI,CAAC;YACV,GAAG,WAAW;YACd,SAAS,EAAE;gBACT,MAAM,EAAE,aAAa;gBACrB,OAAO,EAAE;oBACP,QAAQ,EAAE,IAAI;oBACd,WAAW,EAAE,CAAC,EAAE,SAAS;oBACzB,aAAa,EAAE,YAAY;oBAC3B,MAAM,EAAE,sBAAsB;iBAC/B;aACF;SACF,CAAC,CAAC;IACL,CAAC;IACD,0EAA0E;IAC1E,+DAA+D;IAC/D,8BAA8B;IAC9B,OAAO,IAAI,CAAC,WAAW,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;AAC3C,CAAC;AAED,MAAM,CAAC,MAAM,MAAM,GAAW,YAAY,EAAE,CAAC;AAE7C,MAAM,UAAU,KAAK,CAAC,QAAiC;IACrD,OAAO,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;AAChC,CAAC"}
|
|
@@ -57,7 +57,7 @@ export function installSighupHandler() {
|
|
|
57
57
|
process.on('SIGHUP', () => {
|
|
58
58
|
const snap = metrics.snapshot();
|
|
59
59
|
// Write directly to stderr to avoid pino's async buffering.
|
|
60
|
-
process.stderr.write(JSON.stringify({
|
|
60
|
+
process.stderr.write(JSON.stringify({ pinakes_metrics: snap }) + '\n');
|
|
61
61
|
logger.info('SIGHUP: metrics dumped to stderr');
|
|
62
62
|
});
|
|
63
63
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"metrics.js","sourceRoot":"","sources":["../../src/observability/metrics.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAiBrC,MAAM,OAAO;IACH,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;IACtC,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAC;IACvC,WAAW,GAAG,IAAI,GAAG,EAAyB,CAAC;IAC/C,WAAW,GAAG,CAAC,CAAC;IAChB,YAAY,GAAG,CAAC,CAAC;IACR,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAExC,cAAc,CAAC,IAAY,EAAE,SAAiB,EAAE,KAAe;QAC7D,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC9D,IAAI,KAAK,EAAE,CAAC;YACV,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC;QACvE,GAAG,CAAC,KAAK,EAAE,CAAC;QACZ,GAAG,CAAC,GAAG,IAAI,SAAS,CAAC;QACrB,GAAG,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QACvC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAClC,CAAC;IAED,YAAY,CAAC,KAAe;QAC1B,IAAI,KAAK;YAAE,IAAI,CAAC,YAAY,EAAE,CAAC;;YAC1B,IAAI,CAAC,WAAW,EAAE,CAAC;IAC1B,CAAC;IAED,QAAQ;QACN,OAAO;YACL,UAAU,EAAE,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC;YAC9C,WAAW,EAAE,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC;YAChD,eAAe,EAAE,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,WAAW,CAAC;YACrD,YAAY,EAAE,IAAI,CAAC,WAAW;YAC9B,aAAa,EAAE,IAAI,CAAC,YAAY;YAChC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC;SAC3D,CAAC;IACJ,CAAC;CACF;AAED,MAAM,CAAC,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAErC;;;GAGG;AACH,IAAI,eAAe,GAAG,KAAK,CAAC;AAC5B,MAAM,UAAU,oBAAoB;IAClC,IAAI,eAAe;QAAE,OAAO;IAC5B,eAAe,GAAG,IAAI,CAAC;IACvB,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE;QACxB,MAAM,IAAI,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;QAChC,4DAA4D;QAC5D,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,
|
|
1
|
+
{"version":3,"file":"metrics.js","sourceRoot":"","sources":["../../src/observability/metrics.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAiBrC,MAAM,OAAO;IACH,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAC;IACtC,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAC;IACvC,WAAW,GAAG,IAAI,GAAG,EAAyB,CAAC;IAC/C,WAAW,GAAG,CAAC,CAAC;IAChB,YAAY,GAAG,CAAC,CAAC;IACR,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAExC,cAAc,CAAC,IAAY,EAAE,SAAiB,EAAE,KAAe;QAC7D,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC9D,IAAI,KAAK,EAAE,CAAC;YACV,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC;QACvE,GAAG,CAAC,KAAK,EAAE,CAAC;QACZ,GAAG,CAAC,GAAG,IAAI,SAAS,CAAC;QACrB,GAAG,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QACvC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAClC,CAAC;IAED,YAAY,CAAC,KAAe;QAC1B,IAAI,KAAK;YAAE,IAAI,CAAC,YAAY,EAAE,CAAC;;YAC1B,IAAI,CAAC,WAAW,EAAE,CAAC;IAC1B,CAAC;IAED,QAAQ;QACN,OAAO;YACL,UAAU,EAAE,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC;YAC9C,WAAW,EAAE,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC;YAChD,eAAe,EAAE,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,WAAW,CAAC;YACrD,YAAY,EAAE,IAAI,CAAC,WAAW;YAC9B,aAAa,EAAE,IAAI,CAAC,YAAY;YAChC,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC;SAC3D,CAAC;IACJ,CAAC;CACF;AAED,MAAM,CAAC,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAErC;;;GAGG;AACH,IAAI,eAAe,GAAG,KAAK,CAAC;AAC5B,MAAM,UAAU,oBAAoB;IAClC,IAAI,eAAe;QAAE,OAAO;IAC5B,eAAe,GAAG,IAAI,CAAC;IACvB,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,GAAG,EAAE;QACxB,MAAM,IAAI,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;QAChC,4DAA4D;QAC5D,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,eAAe,EAAE,IAAI,EAAE,CAAC,GAAG,IAAI,CAAC,CAAC;QACvE,MAAM,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;AACL,CAAC"}
|
package/dist/paths.d.ts
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Centralized path resolution for Pinakes.
|
|
3
|
+
*
|
|
4
|
+
* All Pinakes data lives under a single root directory (~/.pinakes by default).
|
|
5
|
+
* Project data is stored in mirrored paths under ~/.pinakes/projects/, using
|
|
6
|
+
* the Claude Code convention of replacing '/' with '-' in the absolute project
|
|
7
|
+
* path. Personal data lives directly under the root.
|
|
8
|
+
*
|
|
9
|
+
* Layout:
|
|
10
|
+
* ~/.pinakes/
|
|
11
|
+
* wiki/ # personal wiki
|
|
12
|
+
* pinakes.db # personal DB
|
|
13
|
+
* audit.jsonl # personal audit
|
|
14
|
+
* manifest.json # personal manifest
|
|
15
|
+
* projects/
|
|
16
|
+
* -Users-sebastian-dev-myproject/ # mangled project root
|
|
17
|
+
* wiki/ # project wiki
|
|
18
|
+
* pinakes.db # project DB
|
|
19
|
+
* audit.jsonl # project audit
|
|
20
|
+
* manifest.json # project manifest
|
|
21
|
+
*/
|
|
22
|
+
/** Resolve a path to absolute, using cwd if relative. */
|
|
23
|
+
export declare function resolveAbs(p: string): string;
|
|
24
|
+
/**
|
|
25
|
+
* Mangle an absolute path into a directory-safe name.
|
|
26
|
+
* Uses Claude Code's convention: replace '/' with '-'.
|
|
27
|
+
*
|
|
28
|
+
* `/Users/sebastian/dev/myproject` → `-Users-sebastian-dev-myproject`
|
|
29
|
+
*/
|
|
30
|
+
export declare function mangleProjectPath(absolutePath: string): string;
|
|
31
|
+
/**
|
|
32
|
+
* Pinakes root directory. All data lives here.
|
|
33
|
+
* Default: `~/.pinakes`
|
|
34
|
+
* Override: `PINAKES_ROOT` env var
|
|
35
|
+
*/
|
|
36
|
+
export declare function pinakesRoot(): string;
|
|
37
|
+
/**
|
|
38
|
+
* The mirrored data directory for a project.
|
|
39
|
+
* e.g. `/Users/me/dev/proj` → `~/.pinakes/projects/-Users-me-dev-proj`
|
|
40
|
+
*/
|
|
41
|
+
export declare function projectDataDir(projectRoot: string): string;
|
|
42
|
+
export declare function projectWikiPath(projectRoot: string): string;
|
|
43
|
+
export declare function projectDbPath(projectRoot: string): string;
|
|
44
|
+
export declare function projectAuditJsonlPath(projectRoot: string): string;
|
|
45
|
+
export declare function projectManifestPath(projectRoot: string): string;
|
|
46
|
+
export declare function personalWikiPath(): string;
|
|
47
|
+
export declare function personalDbPath(): string;
|
|
48
|
+
export declare function personalAuditJsonlPath(): string;
|
|
49
|
+
export declare function personalManifestPath(): string;
|
|
50
|
+
export interface CliPathOverrides {
|
|
51
|
+
projectRoot?: string;
|
|
52
|
+
dbPath?: string;
|
|
53
|
+
wikiPath?: string;
|
|
54
|
+
profileDbPath?: string;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Resolve the DB path for a given scope, honoring CLI overrides.
|
|
58
|
+
* Shared by all CLI subcommands (status, audit, export, import, purge, etc.)
|
|
59
|
+
*/
|
|
60
|
+
export declare function resolveCliDbPath(options: CliPathOverrides, scope: 'project' | 'personal'): string;
|
|
61
|
+
//# sourceMappingURL=paths.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"paths.d.ts","sourceRoot":"","sources":["../src/paths.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AASH,yDAAyD;AACzD,wBAAgB,UAAU,CAAC,CAAC,EAAE,MAAM,GAAG,MAAM,CAE5C;AAED;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAAC,YAAY,EAAE,MAAM,GAAG,MAAM,CAE9D;AAMD;;;;GAIG;AACH,wBAAgB,WAAW,IAAI,MAAM,CAIpC;AAMD;;;GAGG;AACH,wBAAgB,cAAc,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,CAG1D;AAED,wBAAgB,eAAe,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,CAE3D;AAED,wBAAgB,aAAa,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,CAEzD;AAED,wBAAgB,qBAAqB,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,CAEjE;AAED,wBAAgB,mBAAmB,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,CAE/D;AAMD,wBAAgB,gBAAgB,IAAI,MAAM,CAEzC;AAED,wBAAgB,cAAc,IAAI,MAAM,CAEvC;AAED,wBAAgB,sBAAsB,IAAI,MAAM,CAE/C;AAED,wBAAgB,oBAAoB,IAAI,MAAM,CAE7C;AAMD,MAAM,WAAW,gBAAgB;IAC/B,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,gBAAgB,EACzB,KAAK,EAAE,SAAS,GAAG,UAAU,GAC5B,MAAM,CAOR"}
|
package/dist/paths.js
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Centralized path resolution for Pinakes.
|
|
3
|
+
*
|
|
4
|
+
* All Pinakes data lives under a single root directory (~/.pinakes by default).
|
|
5
|
+
* Project data is stored in mirrored paths under ~/.pinakes/projects/, using
|
|
6
|
+
* the Claude Code convention of replacing '/' with '-' in the absolute project
|
|
7
|
+
* path. Personal data lives directly under the root.
|
|
8
|
+
*
|
|
9
|
+
* Layout:
|
|
10
|
+
* ~/.pinakes/
|
|
11
|
+
* wiki/ # personal wiki
|
|
12
|
+
* pinakes.db # personal DB
|
|
13
|
+
* audit.jsonl # personal audit
|
|
14
|
+
* manifest.json # personal manifest
|
|
15
|
+
* projects/
|
|
16
|
+
* -Users-sebastian-dev-myproject/ # mangled project root
|
|
17
|
+
* wiki/ # project wiki
|
|
18
|
+
* pinakes.db # project DB
|
|
19
|
+
* audit.jsonl # project audit
|
|
20
|
+
* manifest.json # project manifest
|
|
21
|
+
*/
|
|
22
|
+
import { homedir } from 'node:os';
|
|
23
|
+
import { isAbsolute, resolve } from 'node:path';
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
// Helpers
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
/** Resolve a path to absolute, using cwd if relative. */
|
|
28
|
+
export function resolveAbs(p) {
|
|
29
|
+
return isAbsolute(p) ? p : resolve(process.cwd(), p);
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Mangle an absolute path into a directory-safe name.
|
|
33
|
+
* Uses Claude Code's convention: replace '/' with '-'.
|
|
34
|
+
*
|
|
35
|
+
* `/Users/sebastian/dev/myproject` → `-Users-sebastian-dev-myproject`
|
|
36
|
+
*/
|
|
37
|
+
export function mangleProjectPath(absolutePath) {
|
|
38
|
+
return absolutePath.replace(/\//g, '-');
|
|
39
|
+
}
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
// Root
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
/**
|
|
44
|
+
* Pinakes root directory. All data lives here.
|
|
45
|
+
* Default: `~/.pinakes`
|
|
46
|
+
* Override: `PINAKES_ROOT` env var
|
|
47
|
+
*/
|
|
48
|
+
export function pinakesRoot() {
|
|
49
|
+
const env = process.env.PINAKES_ROOT;
|
|
50
|
+
if (env)
|
|
51
|
+
return resolveAbs(env);
|
|
52
|
+
return resolve(homedir(), '.pinakes');
|
|
53
|
+
}
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
// Project paths (derived from project root directory)
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
/**
|
|
58
|
+
* The mirrored data directory for a project.
|
|
59
|
+
* e.g. `/Users/me/dev/proj` → `~/.pinakes/projects/-Users-me-dev-proj`
|
|
60
|
+
*/
|
|
61
|
+
export function projectDataDir(projectRoot) {
|
|
62
|
+
const abs = resolveAbs(projectRoot);
|
|
63
|
+
return resolve(pinakesRoot(), 'projects', mangleProjectPath(abs));
|
|
64
|
+
}
|
|
65
|
+
export function projectWikiPath(projectRoot) {
|
|
66
|
+
return resolve(projectDataDir(projectRoot), 'wiki');
|
|
67
|
+
}
|
|
68
|
+
export function projectDbPath(projectRoot) {
|
|
69
|
+
return resolve(projectDataDir(projectRoot), 'pinakes.db');
|
|
70
|
+
}
|
|
71
|
+
export function projectAuditJsonlPath(projectRoot) {
|
|
72
|
+
return resolve(projectDataDir(projectRoot), 'audit.jsonl');
|
|
73
|
+
}
|
|
74
|
+
export function projectManifestPath(projectRoot) {
|
|
75
|
+
return resolve(projectDataDir(projectRoot), 'manifest.json');
|
|
76
|
+
}
|
|
77
|
+
// ---------------------------------------------------------------------------
|
|
78
|
+
// Personal paths (directly under pinakes root)
|
|
79
|
+
// ---------------------------------------------------------------------------
|
|
80
|
+
export function personalWikiPath() {
|
|
81
|
+
return resolve(pinakesRoot(), 'wiki');
|
|
82
|
+
}
|
|
83
|
+
export function personalDbPath() {
|
|
84
|
+
return resolve(pinakesRoot(), 'pinakes.db');
|
|
85
|
+
}
|
|
86
|
+
export function personalAuditJsonlPath() {
|
|
87
|
+
return resolve(pinakesRoot(), 'audit.jsonl');
|
|
88
|
+
}
|
|
89
|
+
export function personalManifestPath() {
|
|
90
|
+
return resolve(pinakesRoot(), 'manifest.json');
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Resolve the DB path for a given scope, honoring CLI overrides.
|
|
94
|
+
* Shared by all CLI subcommands (status, audit, export, import, purge, etc.)
|
|
95
|
+
*/
|
|
96
|
+
export function resolveCliDbPath(options, scope) {
|
|
97
|
+
if (scope === 'personal') {
|
|
98
|
+
if (options.profileDbPath)
|
|
99
|
+
return resolveAbs(options.profileDbPath);
|
|
100
|
+
return personalDbPath();
|
|
101
|
+
}
|
|
102
|
+
if (options.dbPath)
|
|
103
|
+
return resolveAbs(options.dbPath);
|
|
104
|
+
return projectDbPath(options.projectRoot ?? process.cwd());
|
|
105
|
+
}
|
|
106
|
+
//# sourceMappingURL=paths.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"paths.js","sourceRoot":"","sources":["../src/paths.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEhD,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,yDAAyD;AACzD,MAAM,UAAU,UAAU,CAAC,CAAS;IAClC,OAAO,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC;AACvD,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,iBAAiB,CAAC,YAAoB;IACpD,OAAO,YAAY,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;AAC1C,CAAC;AAED,8EAA8E;AAC9E,OAAO;AACP,8EAA8E;AAE9E;;;;GAIG;AACH,MAAM,UAAU,WAAW;IACzB,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC;IACrC,IAAI,GAAG;QAAE,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC;IAChC,OAAO,OAAO,CAAC,OAAO,EAAE,EAAE,UAAU,CAAC,CAAC;AACxC,CAAC;AAED,8EAA8E;AAC9E,sDAAsD;AACtD,8EAA8E;AAE9E;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,WAAmB;IAChD,MAAM,GAAG,GAAG,UAAU,CAAC,WAAW,CAAC,CAAC;IACpC,OAAO,OAAO,CAAC,WAAW,EAAE,EAAE,UAAU,EAAE,iBAAiB,CAAC,GAAG,CAAC,CAAC,CAAC;AACpE,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,WAAmB;IACjD,OAAO,OAAO,CAAC,cAAc,CAAC,WAAW,CAAC,EAAE,MAAM,CAAC,CAAC;AACtD,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,WAAmB;IAC/C,OAAO,OAAO,CAAC,cAAc,CAAC,WAAW,CAAC,EAAE,YAAY,CAAC,CAAC;AAC5D,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,WAAmB;IACvD,OAAO,OAAO,CAAC,cAAc,CAAC,WAAW,CAAC,EAAE,aAAa,CAAC,CAAC;AAC7D,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,WAAmB;IACrD,OAAO,OAAO,CAAC,cAAc,CAAC,WAAW,CAAC,EAAE,eAAe,CAAC,CAAC;AAC/D,CAAC;AAED,8EAA8E;AAC9E,+CAA+C;AAC/C,8EAA8E;AAE9E,MAAM,UAAU,gBAAgB;IAC9B,OAAO,OAAO,CAAC,WAAW,EAAE,EAAE,MAAM,CAAC,CAAC;AACxC,CAAC;AAED,MAAM,UAAU,cAAc;IAC5B,OAAO,OAAO,CAAC,WAAW,EAAE,EAAE,YAAY,CAAC,CAAC;AAC9C,CAAC;AAED,MAAM,UAAU,sBAAsB;IACpC,OAAO,OAAO,CAAC,WAAW,EAAE,EAAE,aAAa,CAAC,CAAC;AAC/C,CAAC;AAED,MAAM,UAAU,oBAAoB;IAClC,OAAO,OAAO,CAAC,WAAW,EAAE,EAAE,eAAe,CAAC,CAAC;AACjD,CAAC;AAaD;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAC9B,OAAyB,EACzB,KAA6B;IAE7B,IAAI,KAAK,KAAK,UAAU,EAAE,CAAC;QACzB,IAAI,OAAO,CAAC,aAAa;YAAE,OAAO,UAAU,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;QACpE,OAAO,cAAc,EAAE,CAAC;IAC1B,CAAC;IACD,IAAI,OAAO,CAAC,MAAM;QAAE,OAAO,UAAU,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IACtD,OAAO,aAAa,CAAC,OAAO,CAAC,WAAW,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC,CAAC;AAC7D,CAAC"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { HybridResult } from './hybrid.js';
|
|
2
|
+
/**
|
|
3
|
+
* Post-RRF dedup pipeline (D37, GBrain-inspired).
|
|
4
|
+
*
|
|
5
|
+
* Three layers, applied in order after RRF fusion:
|
|
6
|
+
*
|
|
7
|
+
* 1. **Source-URI cap (max 3)**: no single wiki page dominates results.
|
|
8
|
+
* 2. **Jaccard bigram similarity (>0.85)**: drop near-duplicate text
|
|
9
|
+
* regardless of which file it came from.
|
|
10
|
+
* 3. **Final source-URI cap (max 2)**: tighter diversity after dedup.
|
|
11
|
+
*
|
|
12
|
+
* The input must already be sorted by descending RRF score (which
|
|
13
|
+
* `rrfFuse` guarantees). The output preserves that ordering.
|
|
14
|
+
*/
|
|
15
|
+
/**
|
|
16
|
+
* Deduplicate hybrid search results. Input MUST be sorted by descending
|
|
17
|
+
* RRF score. Returns a new array (does not mutate the input).
|
|
18
|
+
*/
|
|
19
|
+
export declare function dedupResults(results: HybridResult[]): HybridResult[];
|
|
20
|
+
/** Split text into a set of whitespace-separated bigrams. */
|
|
21
|
+
declare function toBigrams(text: string): Set<string>;
|
|
22
|
+
/** |A ∩ B| / |A ∪ B| — classic Jaccard index. */
|
|
23
|
+
declare function jaccardSimilarity(a: Set<string>, b: Set<string>): number;
|
|
24
|
+
export { toBigrams as _toBigrams, jaccardSimilarity as _jaccardSimilarity };
|
|
25
|
+
//# sourceMappingURL=dedup.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dedup.d.ts","sourceRoot":"","sources":["../../src/retrieval/dedup.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD;;;;;;;;;;;;GAYG;AAEH;;;GAGG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,YAAY,EAAE,CAWpE;AAoDD,6DAA6D;AAC7D,iBAAS,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,CAW5C;AAED,iDAAiD;AACjD,iBAAS,iBAAiB,CAAC,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,MAAM,CAUjE;AAGD,OAAO,EAAE,SAAS,IAAI,UAAU,EAAE,iBAAiB,IAAI,kBAAkB,EAAE,CAAC"}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Post-RRF dedup pipeline (D37, GBrain-inspired).
|
|
3
|
+
*
|
|
4
|
+
* Three layers, applied in order after RRF fusion:
|
|
5
|
+
*
|
|
6
|
+
* 1. **Source-URI cap (max 3)**: no single wiki page dominates results.
|
|
7
|
+
* 2. **Jaccard bigram similarity (>0.85)**: drop near-duplicate text
|
|
8
|
+
* regardless of which file it came from.
|
|
9
|
+
* 3. **Final source-URI cap (max 2)**: tighter diversity after dedup.
|
|
10
|
+
*
|
|
11
|
+
* The input must already be sorted by descending RRF score (which
|
|
12
|
+
* `rrfFuse` guarantees). The output preserves that ordering.
|
|
13
|
+
*/
|
|
14
|
+
/**
|
|
15
|
+
* Deduplicate hybrid search results. Input MUST be sorted by descending
|
|
16
|
+
* RRF score. Returns a new array (does not mutate the input).
|
|
17
|
+
*/
|
|
18
|
+
export function dedupResults(results) {
|
|
19
|
+
// Layer 1: max 3 chunks per source_uri
|
|
20
|
+
let kept = capPerSourceUri(results, 3);
|
|
21
|
+
// Layer 2: Jaccard bigram similarity > 0.85 against already-kept
|
|
22
|
+
kept = jaccardDedup(kept, 0.85);
|
|
23
|
+
// Layer 3: final cap at 2 per source_uri
|
|
24
|
+
kept = capPerSourceUri(kept, 2);
|
|
25
|
+
return kept;
|
|
26
|
+
}
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// Layer 1 & 3: cap per source_uri
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
function capPerSourceUri(results, maxPerUri) {
|
|
31
|
+
const counts = new Map();
|
|
32
|
+
const out = [];
|
|
33
|
+
for (const r of results) {
|
|
34
|
+
const count = counts.get(r.source_uri) ?? 0;
|
|
35
|
+
if (count >= maxPerUri)
|
|
36
|
+
continue;
|
|
37
|
+
counts.set(r.source_uri, count + 1);
|
|
38
|
+
out.push(r);
|
|
39
|
+
}
|
|
40
|
+
return out;
|
|
41
|
+
}
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
// Layer 2: Jaccard bigram similarity
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
/**
|
|
46
|
+
* Remove results whose text is too similar (Jaccard > threshold) to a
|
|
47
|
+
* higher-ranked result already in the kept set. Preserves rank order.
|
|
48
|
+
*/
|
|
49
|
+
function jaccardDedup(results, threshold) {
|
|
50
|
+
const kept = [];
|
|
51
|
+
const keptBigrams = [];
|
|
52
|
+
for (const r of results) {
|
|
53
|
+
const bigrams = toBigrams(r.text);
|
|
54
|
+
let duplicate = false;
|
|
55
|
+
for (const existingBigrams of keptBigrams) {
|
|
56
|
+
if (jaccardSimilarity(bigrams, existingBigrams) > threshold) {
|
|
57
|
+
duplicate = true;
|
|
58
|
+
break;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
if (!duplicate) {
|
|
62
|
+
kept.push(r);
|
|
63
|
+
keptBigrams.push(bigrams);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return kept;
|
|
67
|
+
}
|
|
68
|
+
/** Split text into a set of whitespace-separated bigrams. */
|
|
69
|
+
function toBigrams(text) {
|
|
70
|
+
const words = text.toLowerCase().split(/\s+/).filter(Boolean);
|
|
71
|
+
const bigrams = new Set();
|
|
72
|
+
for (let i = 0; i < words.length - 1; i++) {
|
|
73
|
+
bigrams.add(`${words[i]} ${words[i + 1]}`);
|
|
74
|
+
}
|
|
75
|
+
// Single-word texts: use the word itself as the "bigram"
|
|
76
|
+
if (bigrams.size === 0 && words.length > 0) {
|
|
77
|
+
bigrams.add(words[0]);
|
|
78
|
+
}
|
|
79
|
+
return bigrams;
|
|
80
|
+
}
|
|
81
|
+
/** |A ∩ B| / |A ∪ B| — classic Jaccard index. */
|
|
82
|
+
function jaccardSimilarity(a, b) {
|
|
83
|
+
if (a.size === 0 && b.size === 0)
|
|
84
|
+
return 1;
|
|
85
|
+
let intersection = 0;
|
|
86
|
+
const smaller = a.size <= b.size ? a : b;
|
|
87
|
+
const larger = a.size <= b.size ? b : a;
|
|
88
|
+
for (const item of smaller) {
|
|
89
|
+
if (larger.has(item))
|
|
90
|
+
intersection++;
|
|
91
|
+
}
|
|
92
|
+
const union = a.size + b.size - intersection;
|
|
93
|
+
return union === 0 ? 1 : intersection / union;
|
|
94
|
+
}
|
|
95
|
+
// Re-export for testing
|
|
96
|
+
export { toBigrams as _toBigrams, jaccardSimilarity as _jaccardSimilarity };
|
|
97
|
+
//# sourceMappingURL=dedup.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"dedup.js","sourceRoot":"","sources":["../../src/retrieval/dedup.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;GAYG;AAEH;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,OAAuB;IAClD,uCAAuC;IACvC,IAAI,IAAI,GAAG,eAAe,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IAEvC,iEAAiE;IACjE,IAAI,GAAG,YAAY,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IAEhC,yCAAyC;IACzC,IAAI,GAAG,eAAe,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAEhC,OAAO,IAAI,CAAC;AACd,CAAC;AAED,8EAA8E;AAC9E,kCAAkC;AAClC,8EAA8E;AAE9E,SAAS,eAAe,CAAC,OAAuB,EAAE,SAAiB;IACjE,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IACzC,MAAM,GAAG,GAAmB,EAAE,CAAC;IAE/B,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QAC5C,IAAI,KAAK,IAAI,SAAS;YAAE,SAAS;QACjC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;QACpC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACd,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC;AAED,8EAA8E;AAC9E,qCAAqC;AACrC,8EAA8E;AAE9E;;;GAGG;AACH,SAAS,YAAY,CAAC,OAAuB,EAAE,SAAiB;IAC9D,MAAM,IAAI,GAAmB,EAAE,CAAC;IAChC,MAAM,WAAW,GAAkB,EAAE,CAAC;IAEtC,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,SAAS,GAAG,KAAK,CAAC;QAEtB,KAAK,MAAM,eAAe,IAAI,WAAW,EAAE,CAAC;YAC1C,IAAI,iBAAiB,CAAC,OAAO,EAAE,eAAe,CAAC,GAAG,SAAS,EAAE,CAAC;gBAC5D,SAAS,GAAG,IAAI,CAAC;gBACjB,MAAM;YACR,CAAC;QACH,CAAC;QAED,IAAI,CAAC,SAAS,EAAE,CAAC;YACf,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACb,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC5B,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,6DAA6D;AAC7D,SAAS,SAAS,CAAC,IAAY;IAC7B,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC9D,MAAM,OAAO,GAAG,IAAI,GAAG,EAAU,CAAC;IAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;IAC7C,CAAC;IACD,yDAAyD;IACzD,IAAI,OAAO,CAAC,IAAI,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3C,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACxB,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,iDAAiD;AACjD,SAAS,iBAAiB,CAAC,CAAc,EAAE,CAAc;IACvD,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC3C,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACxC,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE,CAAC;QAC3B,IAAI,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,YAAY,EAAE,CAAC;IACvC,CAAC;IACD,MAAM,KAAK,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,GAAG,YAAY,CAAC;IAC7C,OAAO,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,KAAK,CAAC;AAChD,CAAC;AAED,wBAAwB;AACxB,OAAO,EAAE,SAAS,IAAI,UAAU,EAAE,iBAAiB,IAAI,kBAAkB,EAAE,CAAC"}
|
|
@@ -5,15 +5,15 @@
|
|
|
5
5
|
* between the ingest path (computing chunk embeddings on insert) and the
|
|
6
6
|
* future query path in Phase 4 (computing query embeddings for vector
|
|
7
7
|
* search). Putting it under `retrieval/` keeps the code-mode bindings'
|
|
8
|
-
* mental model consistent — `
|
|
8
|
+
* mental model consistent — `pinakes.vec()` and ingest both call into the same
|
|
9
9
|
* factory.
|
|
10
10
|
*
|
|
11
11
|
* **Provider strategy** (CLAUDE.md §AI Rules #3): Phase 2 ships only the
|
|
12
12
|
* bundled MiniLM provider. Phase 4 adds the env-driven factory:
|
|
13
|
-
* - `
|
|
14
|
-
* - `
|
|
15
|
-
* - `
|
|
16
|
-
* - `
|
|
13
|
+
* - `PINAKES_EMBED_PROVIDER=transformers` (default, this file)
|
|
14
|
+
* - `PINAKES_EMBED_PROVIDER=ollama` (HTTP, user-controlled)
|
|
15
|
+
* - `PINAKES_EMBED_PROVIDER=voyage` (HTTPS, paid)
|
|
16
|
+
* - `PINAKES_EMBED_PROVIDER=openai` (HTTPS, paid)
|
|
17
17
|
*
|
|
18
18
|
* **Failure mode** (CLAUDE.md §AI Rules #4): if the embedder fails during
|
|
19
19
|
* ingest, the ingester logs a warning and inserts the node + chunks WITHOUT
|
|
@@ -32,7 +32,7 @@ export declare const EMBEDDING_DIM = 384;
|
|
|
32
32
|
* `VoyageEmbedder` / `OpenAIEmbedder`.
|
|
33
33
|
*/
|
|
34
34
|
export interface Embedder {
|
|
35
|
-
/** Output dimension — load-bearing for the `
|
|
35
|
+
/** Output dimension — load-bearing for the `pinakes_chunks_vec` schema */
|
|
36
36
|
readonly dim: number;
|
|
37
37
|
/** Eagerly load the underlying model. Optional — embed() will lazy-load on first call. */
|
|
38
38
|
warmup(): Promise<void>;
|
|
@@ -80,7 +80,7 @@ export declare class CountingEmbedder implements Embedder {
|
|
|
80
80
|
}
|
|
81
81
|
/**
|
|
82
82
|
* Ollama embedder via HTTP POST to `/api/embeddings`.
|
|
83
|
-
* Requires `
|
|
83
|
+
* Requires `PINAKES_OLLAMA_URL` and `PINAKES_OLLAMA_MODEL` env vars.
|
|
84
84
|
*/
|
|
85
85
|
export declare class OllamaEmbedder implements Embedder {
|
|
86
86
|
private readonly baseUrl;
|
|
@@ -92,7 +92,7 @@ export declare class OllamaEmbedder implements Embedder {
|
|
|
92
92
|
}
|
|
93
93
|
/**
|
|
94
94
|
* Voyage AI embedder via HTTPS POST to `https://api.voyageai.com/v1/embeddings`.
|
|
95
|
-
* Requires `
|
|
95
|
+
* Requires `PINAKES_VOYAGE_API_KEY` env var.
|
|
96
96
|
*/
|
|
97
97
|
export declare class VoyageEmbedder implements Embedder {
|
|
98
98
|
private readonly apiKey;
|
|
@@ -104,7 +104,7 @@ export declare class VoyageEmbedder implements Embedder {
|
|
|
104
104
|
}
|
|
105
105
|
/**
|
|
106
106
|
* OpenAI embedder via HTTPS POST to `https://api.openai.com/v1/embeddings`.
|
|
107
|
-
* Requires `
|
|
107
|
+
* Requires `PINAKES_OPENAI_API_KEY` env var.
|
|
108
108
|
*/
|
|
109
109
|
export declare class OpenAIEmbedder implements Embedder {
|
|
110
110
|
private readonly apiKey;
|