r2mcp 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +66 -0
- package/LICENSE +21 -0
- package/README.md +532 -0
- package/dist/breadcrumbs.d.ts +123 -0
- package/dist/breadcrumbs.js +135 -0
- package/dist/cli/classify-edges.d.ts +2 -0
- package/dist/cli/classify-edges.js +130 -0
- package/dist/cli/compile-wiki.d.ts +2 -0
- package/dist/cli/compile-wiki.js +173 -0
- package/dist/cli/dump-edges-json.d.ts +2 -0
- package/dist/cli/dump-edges-json.js +21 -0
- package/dist/cli/extract-entities.d.ts +17 -0
- package/dist/cli/extract-entities.js +166 -0
- package/dist/cli/lint-memory.d.ts +16 -0
- package/dist/cli/lint-memory.js +94 -0
- package/dist/cli/migrate.d.ts +17 -0
- package/dist/cli/migrate.js +146 -0
- package/dist/cli/setup-helpers.d.ts +7 -0
- package/dist/cli/setup-helpers.js +72 -0
- package/dist/cli/setup.d.ts +15 -0
- package/dist/cli/setup.js +95 -0
- package/dist/compiler/clustering.d.ts +29 -0
- package/dist/compiler/clustering.js +66 -0
- package/dist/compiler/frontmatter.d.ts +35 -0
- package/dist/compiler/frontmatter.js +168 -0
- package/dist/compiler/manifest.d.ts +32 -0
- package/dist/compiler/manifest.js +82 -0
- package/dist/compiler/prompts.d.ts +17 -0
- package/dist/compiler/prompts.js +82 -0
- package/dist/compiler/run.d.ts +52 -0
- package/dist/compiler/run.js +186 -0
- package/dist/compiler/tier.d.ts +10 -0
- package/dist/compiler/tier.js +85 -0
- package/dist/compiler/topic.d.ts +16 -0
- package/dist/compiler/topic.js +105 -0
- package/dist/compiler/types.d.ts +101 -0
- package/dist/compiler/types.js +4 -0
- package/dist/db.d.ts +10 -0
- package/dist/db.js +46 -0
- package/dist/edges/candidate-pairs.d.ts +24 -0
- package/dist/edges/candidate-pairs.js +35 -0
- package/dist/edges/classifier.d.ts +45 -0
- package/dist/edges/classifier.js +172 -0
- package/dist/edges/signals.d.ts +13 -0
- package/dist/edges/signals.js +45 -0
- package/dist/edges/stage1-haiku.d.ts +21 -0
- package/dist/edges/stage1-haiku.js +33 -0
- package/dist/edges/stage2-opus.d.ts +41 -0
- package/dist/edges/stage2-opus.js +101 -0
- package/dist/edges/state.d.ts +44 -0
- package/dist/edges/state.js +79 -0
- package/dist/edges/types.d.ts +20 -0
- package/dist/edges/types.js +1 -0
- package/dist/embeddings.d.ts +13 -0
- package/dist/embeddings.js +54 -0
- package/dist/entities/db.d.ts +49 -0
- package/dist/entities/db.js +109 -0
- package/dist/entities/extractor.d.ts +14 -0
- package/dist/entities/extractor.js +154 -0
- package/dist/entities/normalize.d.ts +5 -0
- package/dist/entities/normalize.js +7 -0
- package/dist/entities/prompt.d.ts +19 -0
- package/dist/entities/prompt.js +100 -0
- package/dist/entities/state.d.ts +44 -0
- package/dist/entities/state.js +99 -0
- package/dist/entities/types.d.ts +62 -0
- package/dist/entities/types.js +6 -0
- package/dist/env.d.ts +13 -0
- package/dist/env.js +32 -0
- package/dist/fingerprint.d.ts +2 -0
- package/dist/fingerprint.js +12 -0
- package/dist/graph-rebuild.d.ts +6 -0
- package/dist/graph-rebuild.js +20 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +403 -0
- package/dist/instrumentation.d.ts +10 -0
- package/dist/instrumentation.js +37 -0
- package/dist/lint/checks/contradictions.d.ts +30 -0
- package/dist/lint/checks/contradictions.js +52 -0
- package/dist/lint/checks/drift.d.ts +5 -0
- package/dist/lint/checks/drift.js +34 -0
- package/dist/lint/checks/orphans.d.ts +5 -0
- package/dist/lint/checks/orphans.js +25 -0
- package/dist/lint/checks/stale.d.ts +6 -0
- package/dist/lint/checks/stale.js +29 -0
- package/dist/lint/checks/superseded-unflagged.d.ts +5 -0
- package/dist/lint/checks/superseded-unflagged.js +47 -0
- package/dist/lint/run.d.ts +11 -0
- package/dist/lint/run.js +95 -0
- package/dist/lint/types.d.ts +60 -0
- package/dist/lint/types.js +13 -0
- package/dist/mcp-response.d.ts +7 -0
- package/dist/mcp-response.js +13 -0
- package/dist/providers/anthropic.d.ts +13 -0
- package/dist/providers/anthropic.js +56 -0
- package/dist/providers/claude-code.d.ts +35 -0
- package/dist/providers/claude-code.js +175 -0
- package/dist/providers/errors.d.ts +12 -0
- package/dist/providers/errors.js +19 -0
- package/dist/providers/index.d.ts +30 -0
- package/dist/providers/index.js +71 -0
- package/dist/providers/openrouter.d.ts +19 -0
- package/dist/providers/openrouter.js +76 -0
- package/dist/providers/semaphore.d.ts +19 -0
- package/dist/providers/semaphore.js +51 -0
- package/dist/providers/types.d.ts +27 -0
- package/dist/providers/types.js +7 -0
- package/dist/schema.sql +116 -0
- package/dist/server-instructions.d.ts +9 -0
- package/dist/server-instructions.js +20 -0
- package/dist/telemetry.d.ts +39 -0
- package/dist/telemetry.js +130 -0
- package/dist/tools/classify.d.ts +44 -0
- package/dist/tools/classify.js +121 -0
- package/dist/tools/compile.d.ts +31 -0
- package/dist/tools/compile.js +132 -0
- package/dist/tools/dump-edges-sidecar.d.ts +37 -0
- package/dist/tools/dump-edges-sidecar.js +80 -0
- package/dist/tools/extract-entities.d.ts +53 -0
- package/dist/tools/extract-entities.js +169 -0
- package/dist/tools/lint.d.ts +10 -0
- package/dist/tools/lint.js +13 -0
- package/dist/tools/meditate.d.ts +25 -0
- package/dist/tools/meditate.js +128 -0
- package/dist/tools/recall.d.ts +66 -0
- package/dist/tools/recall.js +409 -0
- package/dist/tools/reject.d.ts +10 -0
- package/dist/tools/reject.js +24 -0
- package/dist/tools/remember.d.ts +26 -0
- package/dist/tools/remember.js +140 -0
- package/dist/tools/search.d.ts +30 -0
- package/dist/tools/search.js +69 -0
- package/dist/tools/spawn-cli.d.ts +14 -0
- package/dist/tools/spawn-cli.js +41 -0
- package/dist/tools/stats.d.ts +31 -0
- package/dist/tools/stats.js +88 -0
- package/package.json +86 -0
- package/skills/remember/SKILL.md +357 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provider-selection error surfaces. Kept in a dedicated module so consumers
|
|
3
|
+
* can import them without pulling in the heavy adapter modules.
|
|
4
|
+
*/
|
|
5
|
+
export class ProviderUnavailableError extends Error {
|
|
6
|
+
constructor(message) {
|
|
7
|
+
super(message);
|
|
8
|
+
this.name = 'ProviderUnavailableError';
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* The exact error message thrown when no provider can be auto-selected.
|
|
13
|
+
* D.AC4 requires that all three remediation paths be named.
|
|
14
|
+
*/
|
|
15
|
+
export const NO_PROVIDER_AVAILABLE_MESSAGE = 'No LLM provider is configured. Pick one of:\n' +
|
|
16
|
+
' • Claude Code (Max plan, $0/call): run `claude /login` so the headless adapter can spawn `claude -p`.\n' +
|
|
17
|
+
' • Anthropic API: export ANTHROPIC_API_KEY=sk-... and re-run.\n' +
|
|
18
|
+
' • OpenRouter: export R2MCP_OPENROUTER_API_KEY=sk-or-... and re-run.\n' +
|
|
19
|
+
'You can also force a specific provider via --provider=<claude-code|anthropic|openrouter> or R2MCP_CLASSIFIER_PROVIDER.';
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public surface for the LLMProvider layer.
|
|
3
|
+
*
|
|
4
|
+
* Selection precedence (D.R3):
|
|
5
|
+
* 1. `flag` (e.g., from --provider=...)
|
|
6
|
+
* 2. R2MCP_CLASSIFIER_PROVIDER env var
|
|
7
|
+
* 3. Auto-fallback: claude-code (if logged in) → anthropic (if API key)
|
|
8
|
+
* → openrouter (if API key) → ProviderUnavailableError
|
|
9
|
+
*/
|
|
10
|
+
import type { LLMProvider, ProviderName } from './types.js';
|
|
11
|
+
export type { CompleteRequest, CompleteResponse, LLMProvider, LogicalModel, ProviderName, } from './types.js';
|
|
12
|
+
export { AnthropicProvider } from './anthropic.js';
|
|
13
|
+
export { ClaudeCodeProvider, probeClaudeCode } from './claude-code.js';
|
|
14
|
+
export { OpenRouterProvider } from './openrouter.js';
|
|
15
|
+
export { Semaphore } from './semaphore.js';
|
|
16
|
+
export { ProviderUnavailableError, NO_PROVIDER_AVAILABLE_MESSAGE } from './errors.js';
|
|
17
|
+
export declare function isProviderName(name: string): name is ProviderName;
|
|
18
|
+
export interface SelectProviderOptions {
|
|
19
|
+
/** Provider name from --provider=... flag. Highest precedence. */
|
|
20
|
+
flag?: ProviderName;
|
|
21
|
+
/** Process env (defaults to process.env). Tests override this. */
|
|
22
|
+
env?: NodeJS.ProcessEnv;
|
|
23
|
+
/** Probe whether Claude Code is logged in. Default uses real subprocess. */
|
|
24
|
+
probeClaudeCode?: () => Promise<boolean>;
|
|
25
|
+
/** Factory hooks (tests inject mocks; production uses defaults). */
|
|
26
|
+
makeAnthropic?: (apiKey: string) => LLMProvider;
|
|
27
|
+
makeClaudeCode?: () => LLMProvider;
|
|
28
|
+
makeOpenRouter?: (apiKey: string) => LLMProvider;
|
|
29
|
+
}
|
|
30
|
+
export declare function selectProvider(opts?: SelectProviderOptions): Promise<LLMProvider>;
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Public surface for the LLMProvider layer.
|
|
3
|
+
*
|
|
4
|
+
* Selection precedence (D.R3):
|
|
5
|
+
* 1. `flag` (e.g., from --provider=...)
|
|
6
|
+
* 2. R2MCP_CLASSIFIER_PROVIDER env var
|
|
7
|
+
* 3. Auto-fallback: claude-code (if logged in) → anthropic (if API key)
|
|
8
|
+
* → openrouter (if API key) → ProviderUnavailableError
|
|
9
|
+
*/
|
|
10
|
+
import { AnthropicProvider } from './anthropic.js';
|
|
11
|
+
import { ClaudeCodeProvider, probeClaudeCode } from './claude-code.js';
|
|
12
|
+
import { OpenRouterProvider } from './openrouter.js';
|
|
13
|
+
import { NO_PROVIDER_AVAILABLE_MESSAGE, ProviderUnavailableError } from './errors.js';
|
|
14
|
+
export { AnthropicProvider } from './anthropic.js';
|
|
15
|
+
export { ClaudeCodeProvider, probeClaudeCode } from './claude-code.js';
|
|
16
|
+
export { OpenRouterProvider } from './openrouter.js';
|
|
17
|
+
export { Semaphore } from './semaphore.js';
|
|
18
|
+
export { ProviderUnavailableError, NO_PROVIDER_AVAILABLE_MESSAGE } from './errors.js';
|
|
19
|
+
const PROVIDER_NAMES = ['anthropic', 'claude-code', 'openrouter'];
|
|
20
|
+
export function isProviderName(name) {
|
|
21
|
+
return PROVIDER_NAMES.includes(name);
|
|
22
|
+
}
|
|
23
|
+
export async function selectProvider(opts = {}) {
|
|
24
|
+
const env = opts.env ?? process.env;
|
|
25
|
+
const probe = opts.probeClaudeCode ?? (() => probeClaudeCode());
|
|
26
|
+
const makeAnthropic = opts.makeAnthropic ?? ((apiKey) => new AnthropicProvider({ apiKey }));
|
|
27
|
+
const makeClaudeCode = opts.makeClaudeCode ?? (() => new ClaudeCodeProvider());
|
|
28
|
+
const makeOpenRouter = opts.makeOpenRouter ?? ((apiKey) => new OpenRouterProvider({ apiKey }));
|
|
29
|
+
// Precedence 1: explicit --provider flag
|
|
30
|
+
const explicit = opts.flag ?? readEnvProviderName(env);
|
|
31
|
+
if (explicit) {
|
|
32
|
+
return instantiate(explicit, env, { makeAnthropic, makeClaudeCode, makeOpenRouter });
|
|
33
|
+
}
|
|
34
|
+
// Precedence 2: auto-fallback. Claude Code first (Max-covered, $0/call).
|
|
35
|
+
if (await probe()) {
|
|
36
|
+
return makeClaudeCode();
|
|
37
|
+
}
|
|
38
|
+
if (env.ANTHROPIC_API_KEY) {
|
|
39
|
+
return makeAnthropic(env.ANTHROPIC_API_KEY);
|
|
40
|
+
}
|
|
41
|
+
if (env.R2MCP_OPENROUTER_API_KEY) {
|
|
42
|
+
return makeOpenRouter(env.R2MCP_OPENROUTER_API_KEY);
|
|
43
|
+
}
|
|
44
|
+
throw new ProviderUnavailableError(NO_PROVIDER_AVAILABLE_MESSAGE);
|
|
45
|
+
}
|
|
46
|
+
function readEnvProviderName(env) {
|
|
47
|
+
const raw = env.R2MCP_CLASSIFIER_PROVIDER;
|
|
48
|
+
if (!raw)
|
|
49
|
+
return undefined;
|
|
50
|
+
if (!isProviderName(raw)) {
|
|
51
|
+
throw new ProviderUnavailableError(`R2MCP_CLASSIFIER_PROVIDER=${raw} is not a recognized provider. Use one of: ${PROVIDER_NAMES.join(', ')}`);
|
|
52
|
+
}
|
|
53
|
+
return raw;
|
|
54
|
+
}
|
|
55
|
+
function instantiate(name, env, factories) {
|
|
56
|
+
if (name === 'claude-code')
|
|
57
|
+
return factories.makeClaudeCode();
|
|
58
|
+
if (name === 'anthropic') {
|
|
59
|
+
const apiKey = env.ANTHROPIC_API_KEY;
|
|
60
|
+
if (!apiKey) {
|
|
61
|
+
throw new ProviderUnavailableError('ANTHROPIC_API_KEY is required when --provider=anthropic is selected.');
|
|
62
|
+
}
|
|
63
|
+
return factories.makeAnthropic(apiKey);
|
|
64
|
+
}
|
|
65
|
+
// openrouter
|
|
66
|
+
const apiKey = env.R2MCP_OPENROUTER_API_KEY;
|
|
67
|
+
if (!apiKey) {
|
|
68
|
+
throw new ProviderUnavailableError('R2MCP_OPENROUTER_API_KEY is required when --provider=openrouter is selected.');
|
|
69
|
+
}
|
|
70
|
+
return factories.makeOpenRouter(apiKey);
|
|
71
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { CompleteRequest, CompleteResponse, LLMProvider, LogicalModel, ProviderName } from './types.js';
|
|
2
|
+
type FetchFn = typeof fetch;
|
|
3
|
+
export interface OpenRouterOptions {
|
|
4
|
+
apiKey?: string;
|
|
5
|
+
fetchFn?: FetchFn;
|
|
6
|
+
/** Override the API endpoint (tests use this). */
|
|
7
|
+
endpoint?: string;
|
|
8
|
+
}
|
|
9
|
+
export declare class OpenRouterProvider implements LLMProvider {
|
|
10
|
+
readonly name: ProviderName;
|
|
11
|
+
readonly concurrencyLimit = 10;
|
|
12
|
+
private readonly apiKey;
|
|
13
|
+
private readonly fetchFn;
|
|
14
|
+
private readonly endpoint;
|
|
15
|
+
constructor(opts?: OpenRouterOptions);
|
|
16
|
+
static priceForTokens(model: LogicalModel, inputTokens: number, outputTokens: number): number;
|
|
17
|
+
complete(req: CompleteRequest): Promise<CompleteResponse>;
|
|
18
|
+
}
|
|
19
|
+
export {};
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
const OPENROUTER_URL = 'https://openrouter.ai/api/v1/chat/completions';
|
|
2
|
+
// OpenRouter routes Anthropic models under their own slugs. Logical model →
|
|
3
|
+
// OpenRouter slug. We pick the same family as the AnthropicProvider so cross-
|
|
4
|
+
// provider agreement (D.AC6) measures abstraction quality, not model swap.
|
|
5
|
+
const MODEL_IDS = {
|
|
6
|
+
haiku: 'anthropic/claude-haiku-4.5',
|
|
7
|
+
opus: 'anthropic/claude-opus-4.7',
|
|
8
|
+
sonnet: 'anthropic/claude-sonnet-4.6',
|
|
9
|
+
};
|
|
10
|
+
// OpenRouter passes through underlying model pricing. We approximate using
|
|
11
|
+
// the same Anthropic list prices — operator can override via env var if
|
|
12
|
+
// OpenRouter applies a markup.
|
|
13
|
+
const PRICES = {
|
|
14
|
+
haiku: { input: 0.8, output: 4.0 },
|
|
15
|
+
opus: { input: 15.0, output: 75.0 },
|
|
16
|
+
sonnet: { input: 3.0, output: 15.0 },
|
|
17
|
+
};
|
|
18
|
+
const DEFAULT_MAX_TOKENS = 256;
|
|
19
|
+
export class OpenRouterProvider {
|
|
20
|
+
name = 'openrouter';
|
|
21
|
+
concurrencyLimit = 10;
|
|
22
|
+
apiKey;
|
|
23
|
+
fetchFn;
|
|
24
|
+
endpoint;
|
|
25
|
+
constructor(opts = {}) {
|
|
26
|
+
const apiKey = opts.apiKey ?? process.env.R2MCP_OPENROUTER_API_KEY;
|
|
27
|
+
if (!apiKey) {
|
|
28
|
+
throw new Error('R2MCP_OPENROUTER_API_KEY is required to construct OpenRouterProvider');
|
|
29
|
+
}
|
|
30
|
+
this.apiKey = apiKey;
|
|
31
|
+
this.fetchFn = opts.fetchFn ?? fetch;
|
|
32
|
+
this.endpoint = opts.endpoint ?? OPENROUTER_URL;
|
|
33
|
+
}
|
|
34
|
+
static priceForTokens(model, inputTokens, outputTokens) {
|
|
35
|
+
const p = PRICES[model];
|
|
36
|
+
return (inputTokens / 1_000_000) * p.input + (outputTokens / 1_000_000) * p.output;
|
|
37
|
+
}
|
|
38
|
+
async complete(req) {
|
|
39
|
+
const startedAt = Date.now();
|
|
40
|
+
const messages = [];
|
|
41
|
+
if (req.system)
|
|
42
|
+
messages.push({ role: 'system', content: req.system });
|
|
43
|
+
messages.push({ role: 'user', content: req.prompt });
|
|
44
|
+
const body = {
|
|
45
|
+
model: MODEL_IDS[req.model],
|
|
46
|
+
max_tokens: req.max_tokens ?? DEFAULT_MAX_TOKENS,
|
|
47
|
+
messages,
|
|
48
|
+
};
|
|
49
|
+
const res = await this.fetchFn(this.endpoint, {
|
|
50
|
+
method: 'POST',
|
|
51
|
+
headers: {
|
|
52
|
+
'Content-Type': 'application/json',
|
|
53
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
54
|
+
'HTTP-Referer': 'https://github.com/DMokong/r2mcp',
|
|
55
|
+
'X-Title': 'r2mcp-classifier',
|
|
56
|
+
},
|
|
57
|
+
body: JSON.stringify(body),
|
|
58
|
+
});
|
|
59
|
+
if (!res.ok) {
|
|
60
|
+
const errBody = await res.text();
|
|
61
|
+
throw new Error(`OpenRouter ${res.status}: ${errBody.slice(0, 400)}`);
|
|
62
|
+
}
|
|
63
|
+
const data = (await res.json());
|
|
64
|
+
const text = data.choices[0]?.message?.content ?? '';
|
|
65
|
+
const inputTokens = data.usage?.prompt_tokens ?? 0;
|
|
66
|
+
const outputTokens = data.usage?.completion_tokens ?? 0;
|
|
67
|
+
return {
|
|
68
|
+
response: text,
|
|
69
|
+
cost_usd: OpenRouterProvider.priceForTokens(req.model, inputTokens, outputTokens),
|
|
70
|
+
latency_ms: Date.now() - startedAt,
|
|
71
|
+
input_tokens: inputTokens,
|
|
72
|
+
output_tokens: outputTokens,
|
|
73
|
+
raw: data,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Counting semaphore for per-provider concurrency caps (D.R6, D.AC8).
|
|
3
|
+
*
|
|
4
|
+
* The driver wraps each provider call in `withPermit` so that no more than
|
|
5
|
+
* `limit` calls are in flight concurrently. `inFlight` is exposed so tests
|
|
6
|
+
* can observe peak concurrency.
|
|
7
|
+
*/
|
|
8
|
+
export declare class Semaphore {
|
|
9
|
+
readonly limit: number;
|
|
10
|
+
private _inFlight;
|
|
11
|
+
private waiters;
|
|
12
|
+
private _peak;
|
|
13
|
+
constructor(limit: number);
|
|
14
|
+
get inFlight(): number;
|
|
15
|
+
get peak(): number;
|
|
16
|
+
private acquire;
|
|
17
|
+
private release;
|
|
18
|
+
withPermit<T>(fn: () => Promise<T>): Promise<T>;
|
|
19
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Counting semaphore for per-provider concurrency caps (D.R6, D.AC8).
|
|
3
|
+
*
|
|
4
|
+
* The driver wraps each provider call in `withPermit` so that no more than
|
|
5
|
+
* `limit` calls are in flight concurrently. `inFlight` is exposed so tests
|
|
6
|
+
* can observe peak concurrency.
|
|
7
|
+
*/
|
|
8
|
+
export class Semaphore {
|
|
9
|
+
limit;
|
|
10
|
+
_inFlight = 0;
|
|
11
|
+
waiters = [];
|
|
12
|
+
_peak = 0;
|
|
13
|
+
constructor(limit) {
|
|
14
|
+
this.limit = limit;
|
|
15
|
+
if (limit < 1)
|
|
16
|
+
throw new Error(`Semaphore limit must be >= 1, got ${limit}`);
|
|
17
|
+
}
|
|
18
|
+
get inFlight() {
|
|
19
|
+
return this._inFlight;
|
|
20
|
+
}
|
|
21
|
+
get peak() {
|
|
22
|
+
return this._peak;
|
|
23
|
+
}
|
|
24
|
+
async acquire() {
|
|
25
|
+
if (this._inFlight < this.limit) {
|
|
26
|
+
this._inFlight++;
|
|
27
|
+
if (this._inFlight > this._peak)
|
|
28
|
+
this._peak = this._inFlight;
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
await new Promise((resolve) => this.waiters.push(resolve));
|
|
32
|
+
this._inFlight++;
|
|
33
|
+
if (this._inFlight > this._peak)
|
|
34
|
+
this._peak = this._inFlight;
|
|
35
|
+
}
|
|
36
|
+
release() {
|
|
37
|
+
this._inFlight--;
|
|
38
|
+
const next = this.waiters.shift();
|
|
39
|
+
if (next)
|
|
40
|
+
next();
|
|
41
|
+
}
|
|
42
|
+
async withPermit(fn) {
|
|
43
|
+
await this.acquire();
|
|
44
|
+
try {
|
|
45
|
+
return await fn();
|
|
46
|
+
}
|
|
47
|
+
finally {
|
|
48
|
+
this.release();
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLMProvider interface — the runtime-configurable layer that lets the edge
|
|
3
|
+
* classifier and the wiki compiler call into Anthropic SDK, Claude Code
|
|
4
|
+
* headless (Max OAuth), or OpenRouter without baking a cost-mode assumption
|
|
5
|
+
* into the architecture. SPEC-044 Section D.
|
|
6
|
+
*/
|
|
7
|
+
export type LogicalModel = 'haiku' | 'opus' | 'sonnet';
|
|
8
|
+
export type ProviderName = 'anthropic' | 'claude-code' | 'openrouter';
|
|
9
|
+
export interface CompleteRequest {
|
|
10
|
+
model: LogicalModel;
|
|
11
|
+
prompt: string;
|
|
12
|
+
system?: string;
|
|
13
|
+
max_tokens?: number;
|
|
14
|
+
}
|
|
15
|
+
export interface CompleteResponse {
|
|
16
|
+
response: string;
|
|
17
|
+
cost_usd: number;
|
|
18
|
+
latency_ms: number;
|
|
19
|
+
input_tokens?: number;
|
|
20
|
+
output_tokens?: number;
|
|
21
|
+
raw?: unknown;
|
|
22
|
+
}
|
|
23
|
+
export interface LLMProvider {
|
|
24
|
+
readonly name: ProviderName;
|
|
25
|
+
readonly concurrencyLimit: number;
|
|
26
|
+
complete(req: CompleteRequest): Promise<CompleteResponse>;
|
|
27
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLMProvider interface — the runtime-configurable layer that lets the edge
|
|
3
|
+
* classifier and the wiki compiler call into Anthropic SDK, Claude Code
|
|
4
|
+
* headless (Max OAuth), or OpenRouter without baking a cost-mode assumption
|
|
5
|
+
* into the architecture. SPEC-044 Section D.
|
|
6
|
+
*/
|
|
7
|
+
export {};
|
package/dist/schema.sql
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
-- r2mcp — Database Schema
|
|
2
|
+
-- PostgreSQL + pgvector: memories table with semantic search indexes
|
|
3
|
+
|
|
4
|
+
CREATE TABLE IF NOT EXISTS memories (
|
|
5
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
6
|
+
content TEXT NOT NULL,
|
|
7
|
+
tier TEXT NOT NULL CHECK (tier IN ('preferences', 'project-context', 'conversations')),
|
|
8
|
+
type TEXT NOT NULL CHECK (type IN ('preference', 'decision', 'context', 'relationship', 'observation', 'rejection', 'archived')),
|
|
9
|
+
section TEXT,
|
|
10
|
+
topics TEXT[] DEFAULT '{}',
|
|
11
|
+
people TEXT[] DEFAULT '{}',
|
|
12
|
+
date DATE,
|
|
13
|
+
fingerprint TEXT NOT NULL UNIQUE,
|
|
14
|
+
embedding vector(1536),
|
|
15
|
+
source_file TEXT,
|
|
16
|
+
source_line INTEGER,
|
|
17
|
+
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
18
|
+
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
19
|
+
);
|
|
20
|
+
|
|
21
|
+
-- Full-text search index
|
|
22
|
+
ALTER TABLE memories ADD COLUMN IF NOT EXISTS tsv tsvector
|
|
23
|
+
GENERATED ALWAYS AS (to_tsvector('english', content)) STORED;
|
|
24
|
+
CREATE INDEX IF NOT EXISTS idx_memories_tsv ON memories USING gin(tsv);
|
|
25
|
+
|
|
26
|
+
-- pgvector index for semantic search (hnsw for small corpus)
|
|
27
|
+
CREATE INDEX IF NOT EXISTS idx_memories_embedding ON memories USING hnsw (embedding vector_cosine_ops);
|
|
28
|
+
|
|
29
|
+
-- Metadata indexes
|
|
30
|
+
CREATE INDEX IF NOT EXISTS idx_memories_tier ON memories (tier);
|
|
31
|
+
CREATE INDEX IF NOT EXISTS idx_memories_type ON memories (type);
|
|
32
|
+
CREATE INDEX IF NOT EXISTS idx_memories_topics ON memories USING gin (topics);
|
|
33
|
+
CREATE INDEX IF NOT EXISTS idx_memories_fingerprint ON memories (fingerprint);
|
|
34
|
+
CREATE INDEX IF NOT EXISTS idx_memories_created ON memories (created_at);
|
|
35
|
+
|
|
36
|
+
-- Migration: add 'archived' to type CHECK constraint (idempotent)
|
|
37
|
+
DO $$
|
|
38
|
+
BEGIN
|
|
39
|
+
ALTER TABLE memories DROP CONSTRAINT IF EXISTS memories_type_check;
|
|
40
|
+
ALTER TABLE memories ADD CONSTRAINT memories_type_check
|
|
41
|
+
CHECK (type IN ('preference', 'decision', 'context', 'relationship', 'observation', 'rejection', 'archived'));
|
|
42
|
+
END $$;
|
|
43
|
+
|
|
44
|
+
-- ============================================================================
|
|
45
|
+
-- SPEC-043: memory_edges — typed relations between memories (Phase 1 wiki-mode)
|
|
46
|
+
-- Mirrors OB1's thought_edges shape for future convergence.
|
|
47
|
+
-- ============================================================================
|
|
48
|
+
|
|
49
|
+
CREATE TABLE IF NOT EXISTS memory_edges (
|
|
50
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
51
|
+
from_memory_id UUID NOT NULL REFERENCES memories(id) ON DELETE CASCADE,
|
|
52
|
+
to_memory_id UUID NOT NULL REFERENCES memories(id) ON DELETE CASCADE,
|
|
53
|
+
relation TEXT NOT NULL CHECK (relation IN (
|
|
54
|
+
'supports', 'contradicts', 'supersedes',
|
|
55
|
+
'evolved_into', 'depends_on', 'related_to'
|
|
56
|
+
)),
|
|
57
|
+
confidence NUMERIC(3,2) NOT NULL CHECK (confidence >= 0 AND confidence <= 1),
|
|
58
|
+
rationale TEXT NOT NULL,
|
|
59
|
+
classifier_version TEXT NOT NULL,
|
|
60
|
+
valid_from TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
61
|
+
valid_until TIMESTAMPTZ,
|
|
62
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
63
|
+
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
64
|
+
CONSTRAINT memory_edges_no_self CHECK (from_memory_id <> to_memory_id),
|
|
65
|
+
CONSTRAINT memory_edges_unique UNIQUE (from_memory_id, to_memory_id, relation)
|
|
66
|
+
);
|
|
67
|
+
|
|
68
|
+
-- Outgoing-edge lookup (used by recall() signals query AND Phase 2 compile)
|
|
69
|
+
CREATE INDEX IF NOT EXISTS idx_edges_from
|
|
70
|
+
ON memory_edges (from_memory_id, relation);
|
|
71
|
+
|
|
72
|
+
-- Incoming-edge lookup (used by recall() reverse signals AND Phase 2 compile)
|
|
73
|
+
CREATE INDEX IF NOT EXISTS idx_edges_to
|
|
74
|
+
ON memory_edges (to_memory_id, relation);
|
|
75
|
+
|
|
76
|
+
-- Partial index for currently-valid edges (used by Phase 3 lint)
|
|
77
|
+
CREATE INDEX IF NOT EXISTS idx_edges_currently_valid
|
|
78
|
+
ON memory_edges (relation, from_memory_id)
|
|
79
|
+
WHERE valid_until IS NULL;
|
|
80
|
+
|
|
81
|
+
-- ============================================================================
|
|
82
|
+
-- SPEC-046: entities + memory_entities — light entity extraction (Phase 4 wiki-mode)
|
|
83
|
+
-- Four-type taxonomy; one join table. Lighter than OB1's full ontology.
|
|
84
|
+
-- ============================================================================
|
|
85
|
+
|
|
86
|
+
CREATE TABLE IF NOT EXISTS entities (
|
|
87
|
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
88
|
+
type TEXT NOT NULL CHECK (type IN ('project', 'person', 'tool', 'decision')),
|
|
89
|
+
canonical_name TEXT NOT NULL,
|
|
90
|
+
normalized_name TEXT NOT NULL,
|
|
91
|
+
aliases TEXT[] NOT NULL DEFAULT '{}',
|
|
92
|
+
metadata JSONB NOT NULL DEFAULT '{}',
|
|
93
|
+
first_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
94
|
+
last_seen_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
95
|
+
CONSTRAINT entities_unique UNIQUE (type, normalized_name)
|
|
96
|
+
);
|
|
97
|
+
|
|
98
|
+
CREATE INDEX IF NOT EXISTS idx_entities_normalized ON entities (normalized_name);
|
|
99
|
+
CREATE INDEX IF NOT EXISTS idx_entities_aliases ON entities USING gin (aliases);
|
|
100
|
+
CREATE INDEX IF NOT EXISTS idx_entities_type ON entities (type);
|
|
101
|
+
|
|
102
|
+
-- Note: memory_entities.confidence is NUMERIC(3,2) to match memory_edges.confidence
|
|
103
|
+
-- from SPEC-043 (already shipped). The SPEC-046 PR review (claw-2jbo) flagged the
|
|
104
|
+
-- prior REAL type as a cross-table inconsistency. Aligning here is safe because
|
|
105
|
+
-- memory_entities ships for the first time in this PR — no shipped consumers.
|
|
106
|
+
CREATE TABLE IF NOT EXISTS memory_entities (
|
|
107
|
+
memory_id UUID NOT NULL REFERENCES memories(id) ON DELETE CASCADE,
|
|
108
|
+
entity_id UUID NOT NULL REFERENCES entities(id) ON DELETE CASCADE,
|
|
109
|
+
confidence NUMERIC(3,2) NOT NULL DEFAULT 1.0 CHECK (confidence BETWEEN 0 AND 1),
|
|
110
|
+
source TEXT NOT NULL DEFAULT 'classifier',
|
|
111
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
112
|
+
PRIMARY KEY (memory_id, entity_id)
|
|
113
|
+
);
|
|
114
|
+
|
|
115
|
+
CREATE INDEX IF NOT EXISTS idx_memory_entities_entity ON memory_entities (entity_id);
|
|
116
|
+
CREATE INDEX IF NOT EXISTS idx_memory_entities_memory ON memory_entities (memory_id);
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP server instructions (claw-8cjf.8) — sent in the initialize response and
|
|
3
|
+
* loaded into the agent's context by Claude Code at session start. This is the
|
|
4
|
+
* only guidance a fresh project's agent gets, so it must teach the session
|
|
5
|
+
* loop on its own. Claude Code truncates at 2,048 characters; keep critical
|
|
6
|
+
* content near the start. Lives in its own module because src/index.ts runs
|
|
7
|
+
* main() on import and cannot be imported by tests.
|
|
8
|
+
*/
|
|
9
|
+
export declare const SERVER_INSTRUCTIONS = "Persistent memory across Claude Code sessions, stored in PostgreSQL with semantic search.\n\nSession loop:\n1. At session start, call recall with a query about the current task to load relevant context (e.g. recall({query: \"<topic you are working on>\"})).\n2. During work, call remember when something durable surfaces \u2014 a decision with its rationale, a user preference or correction, project state worth carrying forward. Choose the tier: \"preferences\" (decisions, style, corrections \u2014 never expires), \"project-context\" (architecture, system state), \"conversations\" (session continuity).\n3. When the user corrects your approach and the correction would apply again in future situations, store it with operation \"REJECTION\" so it is excluded from normal recall but prevents repeating the mistake.\n\nCold start: on a new, empty database, recall returns zero results \u2014 that is expected, not an error. Begin storing memories as durable facts emerge and recall becomes useful within a session or two.\n\nDegraded mode: responses may carry a warnings[] field (e.g. embeddings disabled because R2MCP_OPENROUTER_API_KEY is unset \u2014 search falls back to full-text). Surface such warnings to the user once rather than ignoring them.\n\nThe other tools (search, stats, meditate, reject, compile, lint, classify, extract_entities, dump_edges_sidecar) are for browsing, curation, and batch maintenance \u2014 recall and remember are the everyday pair.";
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP server instructions (claw-8cjf.8) — sent in the initialize response and
|
|
3
|
+
* loaded into the agent's context by Claude Code at session start. This is the
|
|
4
|
+
* only guidance a fresh project's agent gets, so it must teach the session
|
|
5
|
+
* loop on its own. Claude Code truncates at 2,048 characters; keep critical
|
|
6
|
+
* content near the start. Lives in its own module because src/index.ts runs
|
|
7
|
+
* main() on import and cannot be imported by tests.
|
|
8
|
+
*/
|
|
9
|
+
export const SERVER_INSTRUCTIONS = `Persistent memory across Claude Code sessions, stored in PostgreSQL with semantic search.
|
|
10
|
+
|
|
11
|
+
Session loop:
|
|
12
|
+
1. At session start, call recall with a query about the current task to load relevant context (e.g. recall({query: "<topic you are working on>"})).
|
|
13
|
+
2. During work, call remember when something durable surfaces — a decision with its rationale, a user preference or correction, project state worth carrying forward. Choose the tier: "preferences" (decisions, style, corrections — never expires), "project-context" (architecture, system state), "conversations" (session continuity).
|
|
14
|
+
3. When the user corrects your approach and the correction would apply again in future situations, store it with operation "REJECTION" so it is excluded from normal recall but prevents repeating the mistake.
|
|
15
|
+
|
|
16
|
+
Cold start: on a new, empty database, recall returns zero results — that is expected, not an error. Begin storing memories as durable facts emerge and recall becomes useful within a session or two.
|
|
17
|
+
|
|
18
|
+
Degraded mode: responses may carry a warnings[] field (e.g. embeddings disabled because R2MCP_OPENROUTER_API_KEY is unset — search falls back to full-text). Surface such warnings to the user once rather than ignoring them.
|
|
19
|
+
|
|
20
|
+
The other tools (search, stats, meditate, reject, compile, lint, classify, extract_entities, dump_edges_sidecar) are for browsing, curation, and batch maintenance — recall and remember are the everyday pair.`;
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Telemetry helpers for custom spans and metrics.
|
|
3
|
+
* Uses the OTel API — returns no-ops when SDK is not initialized.
|
|
4
|
+
*/
|
|
5
|
+
import { type Span } from '@opentelemetry/api';
|
|
6
|
+
export declare const toolDuration: import("@opentelemetry/api").Histogram<import("@opentelemetry/api").Attributes>;
|
|
7
|
+
export declare const toolCount: import("@opentelemetry/api").Counter<import("@opentelemetry/api").Attributes>;
|
|
8
|
+
export declare const toolErrors: import("@opentelemetry/api").Counter<import("@opentelemetry/api").Attributes>;
|
|
9
|
+
export declare const embeddingDuration: import("@opentelemetry/api").Histogram<import("@opentelemetry/api").Attributes>;
|
|
10
|
+
export declare const embeddingCount: import("@opentelemetry/api").Counter<import("@opentelemetry/api").Attributes>;
|
|
11
|
+
export declare const embeddingCost: import("@opentelemetry/api").Counter<import("@opentelemetry/api").Attributes>;
|
|
12
|
+
/**
|
|
13
|
+
* Wraps an async MCP tool handler with OTel span + metrics.
|
|
14
|
+
*/
|
|
15
|
+
export declare function withToolSpan<T>(toolName: string, attributes: Record<string, string | number | boolean>, fn: (span: Span) => Promise<T>): Promise<T>;
|
|
16
|
+
/**
|
|
17
|
+
* Wraps a single LLM provider.complete() call in a child span so the
|
|
18
|
+
* cross-process parent context (restored from OTEL_TRACEPARENT in scripts)
|
|
19
|
+
* has a concrete operation to inherit. Attributes mirror the
|
|
20
|
+
* provider.complete result: model, cost_usd, latency_ms. The provider name
|
|
21
|
+
* is supplied separately (the response payload doesn't carry it).
|
|
22
|
+
*
|
|
23
|
+
* spanName SHOULD be `memory.<op>.call` (e.g. memory.extract_entities.call)
|
|
24
|
+
* so traces group naturally with the top-level tool span.
|
|
25
|
+
*
|
|
26
|
+
* (claw-1ejd) — without this wrapper the OTEL_TRACEPARENT plumbing is a
|
|
27
|
+
* no-op because the subprocess never opens a span to inherit the parent.
|
|
28
|
+
*/
|
|
29
|
+
export declare function withLLMCallSpan<T extends {
|
|
30
|
+
cost_usd?: number;
|
|
31
|
+
latency_ms?: number;
|
|
32
|
+
}>(spanName: string, attrs: {
|
|
33
|
+
provider: string;
|
|
34
|
+
model?: string;
|
|
35
|
+
}, fn: () => Promise<T>): Promise<T>;
|
|
36
|
+
/**
|
|
37
|
+
* Wraps an embedding API call with OTel span + metrics.
|
|
38
|
+
*/
|
|
39
|
+
export declare function withEmbeddingSpan<T>(textCount: number, fn: () => Promise<T>, totalChars?: number): Promise<T>;
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Telemetry helpers for custom spans and metrics.
|
|
3
|
+
* Uses the OTel API — returns no-ops when SDK is not initialized.
|
|
4
|
+
*/
|
|
5
|
+
import { trace, metrics, SpanStatusCode } from '@opentelemetry/api';
|
|
6
|
+
const tracer = trace.getTracer('r2mcp', '0.1.0');
|
|
7
|
+
const meter = metrics.getMeter('r2mcp', '0.1.0');
|
|
8
|
+
// Metrics
|
|
9
|
+
export const toolDuration = meter.createHistogram('r2mcp.memory.tool_duration_ms', {
|
|
10
|
+
description: 'Duration of MCP tool operations in milliseconds',
|
|
11
|
+
unit: 'ms',
|
|
12
|
+
});
|
|
13
|
+
export const toolCount = meter.createCounter('r2mcp.memory.tool_count', {
|
|
14
|
+
description: 'Number of MCP tool invocations',
|
|
15
|
+
});
|
|
16
|
+
export const toolErrors = meter.createCounter('r2mcp.memory.tool_errors', {
|
|
17
|
+
description: 'Number of MCP tool errors',
|
|
18
|
+
});
|
|
19
|
+
export const embeddingDuration = meter.createHistogram('r2mcp.memory.embedding_latency_ms', {
|
|
20
|
+
description: 'Duration of embedding API calls in milliseconds',
|
|
21
|
+
unit: 'ms',
|
|
22
|
+
});
|
|
23
|
+
export const embeddingCount = meter.createCounter('r2mcp.memory.embedding_count', {
|
|
24
|
+
description: 'Number of embedding API calls',
|
|
25
|
+
});
|
|
26
|
+
// Estimated cost per token for text-embedding-3-small via OpenRouter
|
|
27
|
+
// OpenRouter pricing: $0.02 per 1M tokens for text-embedding-3-small
|
|
28
|
+
const EMBEDDING_COST_PER_TOKEN = 0.00000002;
|
|
29
|
+
export const embeddingCost = meter.createCounter('r2mcp.memory.embedding_cost_usd', {
|
|
30
|
+
description: 'Estimated cost of embedding API calls in USD',
|
|
31
|
+
unit: 'usd',
|
|
32
|
+
});
|
|
33
|
+
/**
|
|
34
|
+
* Wraps an async MCP tool handler with OTel span + metrics.
|
|
35
|
+
*/
|
|
36
|
+
export async function withToolSpan(toolName, attributes, fn) {
|
|
37
|
+
return tracer.startActiveSpan(`memory.${toolName}`, async (span) => {
|
|
38
|
+
const start = Date.now();
|
|
39
|
+
try {
|
|
40
|
+
span.setAttributes(attributes);
|
|
41
|
+
toolCount.add(1, { tool: toolName });
|
|
42
|
+
const result = await fn(span);
|
|
43
|
+
span.setStatus({ code: SpanStatusCode.OK });
|
|
44
|
+
return result;
|
|
45
|
+
}
|
|
46
|
+
catch (err) {
|
|
47
|
+
span.setStatus({ code: SpanStatusCode.ERROR, message: String(err) });
|
|
48
|
+
toolErrors.add(1, { tool: toolName });
|
|
49
|
+
throw err;
|
|
50
|
+
}
|
|
51
|
+
finally {
|
|
52
|
+
const duration = Date.now() - start;
|
|
53
|
+
toolDuration.record(duration, { tool: toolName });
|
|
54
|
+
span.setAttribute('duration_ms', duration);
|
|
55
|
+
span.end();
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Wraps a single LLM provider.complete() call in a child span so the
|
|
61
|
+
* cross-process parent context (restored from OTEL_TRACEPARENT in scripts)
|
|
62
|
+
* has a concrete operation to inherit. Attributes mirror the
|
|
63
|
+
* provider.complete result: model, cost_usd, latency_ms. The provider name
|
|
64
|
+
* is supplied separately (the response payload doesn't carry it).
|
|
65
|
+
*
|
|
66
|
+
* spanName SHOULD be `memory.<op>.call` (e.g. memory.extract_entities.call)
|
|
67
|
+
* so traces group naturally with the top-level tool span.
|
|
68
|
+
*
|
|
69
|
+
* (claw-1ejd) — without this wrapper the OTEL_TRACEPARENT plumbing is a
|
|
70
|
+
* no-op because the subprocess never opens a span to inherit the parent.
|
|
71
|
+
*/
|
|
72
|
+
export async function withLLMCallSpan(spanName, attrs, fn) {
|
|
73
|
+
return tracer.startActiveSpan(spanName, async (span) => {
|
|
74
|
+
const start = Date.now();
|
|
75
|
+
try {
|
|
76
|
+
span.setAttribute('llm.provider', attrs.provider);
|
|
77
|
+
if (attrs.model)
|
|
78
|
+
span.setAttribute('llm.model', attrs.model);
|
|
79
|
+
const result = await fn();
|
|
80
|
+
if (typeof result.cost_usd === 'number') {
|
|
81
|
+
span.setAttribute('llm.cost_usd', result.cost_usd);
|
|
82
|
+
}
|
|
83
|
+
if (typeof result.latency_ms === 'number') {
|
|
84
|
+
span.setAttribute('llm.latency_ms', result.latency_ms);
|
|
85
|
+
}
|
|
86
|
+
span.setStatus({ code: SpanStatusCode.OK });
|
|
87
|
+
return result;
|
|
88
|
+
}
|
|
89
|
+
catch (err) {
|
|
90
|
+
span.setStatus({ code: SpanStatusCode.ERROR, message: String(err) });
|
|
91
|
+
throw err;
|
|
92
|
+
}
|
|
93
|
+
finally {
|
|
94
|
+
span.setAttribute('duration_ms', Date.now() - start);
|
|
95
|
+
span.end();
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Wraps an embedding API call with OTel span + metrics.
|
|
101
|
+
*/
|
|
102
|
+
export async function withEmbeddingSpan(textCount, fn, totalChars) {
|
|
103
|
+
return tracer.startActiveSpan('memory.embedding', async (span) => {
|
|
104
|
+
const start = Date.now();
|
|
105
|
+
try {
|
|
106
|
+
span.setAttributes({ 'embedding.text_count': textCount });
|
|
107
|
+
embeddingCount.add(1);
|
|
108
|
+
if (totalChars) {
|
|
109
|
+
const estimatedTokens = Math.ceil(totalChars / 4);
|
|
110
|
+
const estimatedCostUsd = estimatedTokens * EMBEDDING_COST_PER_TOKEN;
|
|
111
|
+
embeddingCost.add(estimatedCostUsd);
|
|
112
|
+
span.setAttribute('embedding.estimated_tokens', estimatedTokens);
|
|
113
|
+
span.setAttribute('embedding.estimated_cost_usd', estimatedCostUsd);
|
|
114
|
+
}
|
|
115
|
+
const result = await fn();
|
|
116
|
+
span.setStatus({ code: SpanStatusCode.OK });
|
|
117
|
+
return result;
|
|
118
|
+
}
|
|
119
|
+
catch (err) {
|
|
120
|
+
span.setStatus({ code: SpanStatusCode.ERROR, message: String(err) });
|
|
121
|
+
throw err;
|
|
122
|
+
}
|
|
123
|
+
finally {
|
|
124
|
+
const duration = Date.now() - start;
|
|
125
|
+
embeddingDuration.record(duration);
|
|
126
|
+
span.setAttribute('duration_ms', duration);
|
|
127
|
+
span.end();
|
|
128
|
+
}
|
|
129
|
+
});
|
|
130
|
+
}
|