npm - @ottocode/server - Versions diffs - 0.1.244 → 0.1.246 - Mend

@ottocode/server 0.1.244 → 0.1.246

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/package.json +4 -3
package/src/events/types.ts +9 -9
package/src/index.ts +9 -4
package/src/openapi/paths/auth.ts +11 -11
package/src/openapi/paths/config.ts +118 -2
package/src/openapi/paths/{setu.ts → ottorouter.ts} +31 -31
package/src/openapi/paths/skills.ts +122 -0
package/src/openapi/schemas.ts +35 -3
package/src/openapi/spec.ts +3 -3
package/src/routes/auth.ts +40 -46
package/src/routes/branch.ts +3 -2
package/src/routes/config/defaults.ts +10 -3
package/src/routes/config/main.ts +3 -0
package/src/routes/config/models.ts +84 -14
package/src/routes/config/providers.ts +137 -4
package/src/routes/config/utils.ts +72 -2
package/src/routes/doctor.ts +15 -27
package/src/routes/git/commit.ts +16 -5
package/src/routes/{setu.ts → ottorouter.ts} +52 -49
package/src/routes/research.ts +3 -3
package/src/routes/session-messages.ts +14 -8
package/src/routes/sessions.ts +12 -18
package/src/routes/skills.ts +140 -59
package/src/runtime/agent/registry.ts +5 -2
package/src/runtime/agent/runner-setup.ts +123 -38
package/src/runtime/agent/runner.ts +140 -4
package/src/runtime/ask/service.ts +14 -11
package/src/runtime/message/history-builder.ts +22 -6
package/src/runtime/message/service.ts +7 -1
package/src/runtime/prompt/builder.ts +12 -0
package/src/runtime/prompt/capabilities.ts +200 -0
package/src/runtime/provider/index.ts +106 -5
package/src/runtime/provider/{setu.ts → ottorouter.ts} +22 -22
package/src/runtime/provider/reasoning.ts +73 -17
package/src/runtime/provider/selection.ts +17 -15
package/src/runtime/session/db-operations.ts +1 -1
package/src/runtime/session/manager.ts +1 -1
package/src/runtime/session/queue.ts +7 -2
package/src/runtime/stream/error-handler.ts +3 -3

package/src/runtime/agent/runner.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { hasToolCall, streamText } from 'ai';
+import { logger } from '@ottocode/sdk';
 import type { getDb } from '@ottocode/database';
 import { messageParts, sessions } from '@ottocode/database/schema';
 import { eq } from 'drizzle-orm';
@@ -80,6 +81,50 @@ function summarizeTraceValue(value: unknown, max = 160): string {
 	return fallback.length > max ? `${fallback.slice(0, max)}…` : fallback;
 }
+function nowMs(): number {
+	const perf = globalThis.performance;
+	if (perf && typeof perf.now === 'function') return perf.now();
+	return Date.now();
+}
+function approximateMessageChars(
+	messages: Array<{ role: string; content: string | Array<unknown> }>,
+): number {
+	let total = 0;
+	for (const message of messages) {
+		total += message.role.length;
+		if (typeof message.content === 'string') {
+			total += message.content.length;
+			continue;
+		}
+		try {
+			total += JSON.stringify(message.content).length;
+		} catch {}
+	}
+	return total;
+}
+function summarizeToolShape(tools: Record<string, unknown>) {
+	const names = Object.keys(tools);
+	const entries = names.map((name) => {
+		const toolValue = tools[name];
+		let approxChars = 0;
+		try {
+			approxChars = JSON.stringify(toolValue).length;
+		} catch {}
+		return { name, approxChars };
+	});
+	entries.sort((a, b) => b.approxChars - a.approxChars);
+	return {
+		toolNames: names,
+		toolSchemaCharsApprox: entries.reduce(
+			(total, entry) => total + entry.approxChars,
+			0,
+		),
+		largestTools: entries.slice(0, 8),
+	};
+}
 async function shouldPreemptivelyAutoCompact(
 	db: Awaited<ReturnType<typeof getDb>>,
 	opts: RunOpts,
@@ -119,6 +164,8 @@ export async function runSessionLoop(sessionId: string) {
 }
 async function runAssistant(opts: RunOpts) {
+	const runStartedAt = nowMs();
+	const queueWaitMs = opts.queuedAt ? runStartedAt - opts.queuedAt : 0;
 	const setup = await setupRunner(opts);
 	const {
 		cfg,
@@ -134,6 +181,7 @@ async function runAssistant(opts: RunOpts) {
 		providerOptions,
 		isOpenAIOAuth,
 		mcpToolsRecord,
+		timings,
 	} = setup;
 	let { toolset } = setup;
@@ -296,6 +344,25 @@ async function runAssistant(opts: RunOpts) {
 	const streamStartTimer = time('runner:first-delta');
 	let firstDeltaSeen = false;
+	const logFirstOutputLatency = (kind: 'text' | 'reasoning') => {
+		if (firstDeltaSeen) return;
+		firstDeltaSeen = true;
+		const firstOutputMs = nowMs() - runStartedAt;
+		streamStartTimer.end({ kind, queueWaitMs, setupMs: timings.totalMs });
+		logger.info('[latency] first output', {
+			sessionId: opts.sessionId,
+			messageId: opts.assistantMessageId,
+			agent: opts.agent,
+			provider: opts.provider,
+			model: opts.model,
+			kind,
+			queueWaitMs,
+			firstOutputMs,
+			setupMs: timings.totalMs,
+			totalSinceEnqueueMs: queueWaitMs + firstOutputMs,
+			timings,
+		});
+	};
 	let currentPartId: string | null = null;
 	let accumulated = '';
@@ -387,8 +454,39 @@ async function runAssistant(opts: RunOpts) {
 	const stopWhenCondition = isCopilotResponsesApi
 		? undefined
 		: hasToolCall('finish');
+	const toolShape = summarizeToolShape(toolset as Record<string, unknown>);
+	logger.info('[latency] stream request ready', {
+		sessionId: opts.sessionId,
+		messageId: opts.assistantMessageId,
+		agent: opts.agent,
+		provider: opts.provider,
+		model: opts.model,
+		queueWaitMs,
+		setupMs: timings.totalMs,
+		messageCount: messagesWithSystemInstructions.length,
+		toolCount: Object.keys(toolset).length,
+		toolNames: toolShape.toolNames,
+		toolSchemaCharsApprox: toolShape.toolSchemaCharsApprox,
+		largestTools: toolShape.largestTools,
+		hasPrepareStep: Boolean(prepareStep),
+		providerOptionsKeys: Object.keys(providerOptions),
+		systemPromptChars: system.length,
+		messageCharsApprox: approximateMessageChars(messagesWithSystemInstructions),
+		additionalSystemMessages: additionalSystemMessages.length,
+		historyMessages: history.length,
+	});
 	try {
+		const streamInvocationStartedAt = nowMs();
+		logger.info('[latency] streamText invoke', {
+			sessionId: opts.sessionId,
+			messageId: opts.assistantMessageId,
+			agent: opts.agent,
+			provider: opts.provider,
+			model: opts.model,
+			queueWaitMs,
+			setupMs: timings.totalMs,
+		});
 		const result = streamText({
 			model,
 			tools: toolset,
@@ -412,10 +510,34 @@ async function runAssistant(opts: RunOpts) {
 			onFinish: onFinish as any,
 			// biome-ignore lint/suspicious/noExplicitAny: AI SDK streamText options type
 		} as any);
+		logger.info('[latency] streamText returned', {
+			sessionId: opts.sessionId,
+			messageId: opts.assistantMessageId,
+			agent: opts.agent,
+			provider: opts.provider,
+			model: opts.model,
+			invokeMs: nowMs() - streamInvocationStartedAt,
+		});
 		const tracedToolInputNamesById = new Map<string, string>();
+		let firstFullStreamPartSeen = false;
+		let firstPublishedDeltaSeen = false;
 		for await (const part of result.fullStream) {
 			if (!part) continue;
+			if (!firstFullStreamPartSeen) {
+				firstFullStreamPartSeen = true;
+				logger.info('[latency] first fullStream part', {
+					sessionId: opts.sessionId,
+					messageId: opts.assistantMessageId,
+					agent: opts.agent,
+					provider: opts.provider,
+					model: opts.model,
+					partType: part.type,
+					sinceRunStartMs: nowMs() - runStartedAt,
+					queueWaitMs,
+					setupMs: timings.totalMs,
+				});
+			}
 			if (part.type === 'tool-input-start') {
 				if (shouldTraceToolInput(part.toolName)) {
@@ -482,10 +604,7 @@ async function runAssistant(opts: RunOpts) {
 					continue;
 				}
-				if (!firstDeltaSeen) {
-					firstDeltaSeen = true;
-					streamStartTimer.end();
-				}
+				logFirstOutputLatency('text');
 				if (!currentPartId) {
 					currentPartId = crypto.randomUUID();
@@ -514,6 +633,20 @@ async function runAssistant(opts: RunOpts) {
 						delta,
 					},
 				});
+				if (!firstPublishedDeltaSeen) {
+					firstPublishedDeltaSeen = true;
+					logger.info('[latency] first published delta', {
+						sessionId: opts.sessionId,
+						messageId: opts.assistantMessageId,
+						agent: opts.agent,
+						provider: opts.provider,
+						model: opts.model,
+						sinceRunStartMs: nowMs() - runStartedAt,
+						queueWaitMs,
+						setupMs: timings.totalMs,
+						deltaPreview: delta.length > 80 ? `${delta.slice(0, 80)}…` : delta,
+					});
+				}
 				await db
 					.update(messageParts)
 					.set({ content: JSON.stringify({ text: accumulated }) })
@@ -537,6 +670,9 @@ async function runAssistant(opts: RunOpts) {
 			}
 			if (part.type === 'reasoning-delta') {
+				if (part.text) {
+					logFirstOutputLatency('reasoning');
+				}
 				await handleReasoningDelta(
 					part.id,
 					part.text,

package/src/runtime/ask/service.ts CHANGED Viewed

@@ -16,7 +16,7 @@ import {
 	validateProviderModel,
 	isProviderAuthorized,
 	ensureProviderEnv,
-	isProviderId,
+	hasConfiguredProvider,
 	providerEnvVar,
 	type ProviderId,
 	type ReasoningLevel,
@@ -134,10 +134,11 @@ async function processAskRequest(
 				openai: { enabled: true },
 				anthropic: { enabled: true },
 				google: { enabled: true },
+				'ollama-cloud': { enabled: true, baseURL: 'https://ollama.com' },
 				openrouter: { enabled: true },
 				opencode: { enabled: true },
 				copilot: { enabled: true },
-				setu: { enabled: true },
+				ottorouter: { enabled: true },
 				zai: { enabled: true },
 				'zai-coding': { enabled: true },
 				moonshot: { enabled: true },
@@ -204,20 +205,22 @@ async function processAskRequest(
 				name: agentName,
 				prompt: request.agentPrompt,
 				tools: request.tools ?? ['progress_update', 'finish'],
-				provider: isProviderId(request.provider)
-					? (request.provider as ProviderId)
-					: undefined,
+				provider:
+					typeof request.provider === 'string' &&
+					hasConfiguredProvider(cfg, request.provider)
+						? request.provider
+						: undefined,
 				model: request.model,
 			}
 		: await resolveAgentConfig(cfg.projectRoot, agentName);
 	agentTimer.end({ agent: agentName });
-	const agentProviderDefault = isProviderId(agentCfg.provider)
+	const agentProviderDefault = hasConfiguredProvider(cfg, agentCfg.provider)
 		? agentCfg.provider
 		: cfg.defaults.provider;
 	const agentModelDefault = agentCfg.model ?? cfg.defaults.model;
-	const explicitProvider = isProviderId(request.provider)
-		? (request.provider as ProviderId)
+	const explicitProvider = hasConfiguredProvider(cfg, request.provider)
+		? request.provider
 		: undefined;
 	let providerSelection: ProviderSelection;
@@ -265,8 +268,8 @@ async function processAskRequest(
 		providerForMessage = providerSelection.provider;
 		modelForMessage = providerSelection.model;
 	} else if (session.provider && session.model) {
-		const sessionProvider = isProviderId(session.provider)
-			? (session.provider as ProviderId)
+		const sessionProvider = hasConfiguredProvider(cfg, session.provider)
+			? session.provider
 			: agentProviderDefault;
 		providerForMessage = sessionProvider;
 		modelForMessage = session.model;
@@ -302,7 +305,7 @@ async function processAskRequest(
 		} as SessionRow;
 	}
-	validateProviderModel(providerForMessage, modelForMessage);
+	validateProviderModel(providerForMessage, modelForMessage, cfg);
 	if (!request.skipFileConfig && !request.config && !request.credentials) {
 		await ensureProviderEnv(cfg, providerForMessage);

package/src/runtime/message/history-builder.ts CHANGED Viewed

@@ -7,7 +7,7 @@ import {
 } from 'ai';
 import type { getDb } from '@ottocode/database';
 import { messages, messageParts } from '@ottocode/database/schema';
-import { eq, asc } from 'drizzle-orm';
+import { eq, asc, inArray } from 'drizzle-orm';
 import { ToolHistoryTracker } from './tool-history-tracker.ts';
 /**
@@ -24,16 +24,32 @@ export async function buildHistoryMessages(
 		.from(messages)
 		.where(eq(messages.sessionId, sessionId))
 		.orderBy(asc(messages.createdAt));
+	const messageIds = rows.map((row) => row.id);
+	const allParts = messageIds.length
+		? await db
+				.select()
+				.from(messageParts)
+				.where(inArray(messageParts.messageId, messageIds))
+				.orderBy(asc(messageParts.messageId), asc(messageParts.index))
+		: [];
+	const partsByMessageId = new Map<
+		string,
+		(typeof messageParts.$inferSelect)[]
+	>();
+	for (const part of allParts) {
+		const existing = partsByMessageId.get(part.messageId);
+		if (existing) {
+			existing.push(part);
+			continue;
+		}
+		partsByMessageId.set(part.messageId, [part]);
+	}
 	const history: ModelMessage[] = [];
 	const toolHistory = new ToolHistoryTracker();
 	for (const m of rows) {
-		const parts = await db
-			.select()
-			.from(messageParts)
-			.where(eq(messageParts.messageId, m.id))
-			.orderBy(asc(messageParts.index));
+		const parts = partsByMessageId.get(m.id) ?? [];
 		if (
 			m.role === 'assistant' &&

package/src/runtime/message/service.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import { runSessionLoop } from '../agent/runner.ts';
 import { resolveModel } from '../provider/index.ts';
 import {
 	getFastModelForAuth,
+	getProviderDefinition,
 	logger,
 	type ProviderId,
 	type ReasoningLevel,
@@ -316,8 +317,13 @@ async function generateSessionTitle(args: {
 		const { getAuth } = await import('@ottocode/sdk');
 		const auth = await getAuth(provider, cfg.projectRoot);
 		const oauth = detectOAuth(provider, auth);
+		const providerDefinition = getProviderDefinition(cfg, provider);
-		const titleModel = getFastModelForAuth(provider, auth?.type) ?? modelName;
+		const titleModel =
+			providerDefinition?.source === 'custom' ||
+			providerDefinition?.compatibility === 'ollama'
+				? modelName
+				: (getFastModelForAuth(provider, auth?.type) ?? modelName);
 		const model = await resolveModel(provider, titleModel, cfg);
 		const promptText = String(content ?? '').slice(0, 2000);

package/src/runtime/prompt/builder.ts CHANGED Viewed

@@ -20,6 +20,7 @@ import ANTHROPIC_SPOOF_PROMPT from '@ottocode/sdk/prompts/providers/anthropicSpo
 };
 import { getTerminalManager } from '@ottocode/sdk';
+import { buildCapabilitySummary } from './capabilities.ts';
 export type ComposedSystemPrompt = {
 	prompt: string;
@@ -29,6 +30,8 @@ export type ComposedSystemPrompt = {
 export async function composeSystemPrompt(options: {
 	provider: string;
 	model?: string;
+	promptFamily?: import('@ottocode/sdk').ProviderPromptFamily | null;
+	skillSettings?: import('@ottocode/sdk').OttoConfig['skills'];
 	projectRoot: string;
 	agentPrompt: string;
 	oneShot?: boolean;
@@ -68,6 +71,7 @@ export async function composeSystemPrompt(options: {
 			options.provider,
 			options.model,
 			options.projectRoot,
+			options.promptFamily ?? undefined,
 		);
 		const baseInstructions = (BASE_PROMPT || '').trim();
 		const agentInstructions = options.agentPrompt.trim();
@@ -126,6 +130,14 @@ export async function composeSystemPrompt(options: {
 		}
 	}
+	const capabilitySummary = buildCapabilitySummary({
+		skillSettings: options.skillSettings,
+	});
+	if (capabilitySummary.prompt) {
+		parts.push(capabilitySummary.prompt);
+		components.push(...capabilitySummary.components);
+	}
 	// Add user-provided context if present
 	if (options.userContext?.trim()) {
 		const userContextBlock = [

package/src/runtime/prompt/capabilities.ts ADDED Viewed

@@ -0,0 +1,200 @@
+import {
+	filterDiscoveredSkills,
+	getDiscoveredSkills,
+	getMCPManager,
+	summarizeDescription,
+	type DiscoveredSkill,
+	type OttoConfig,
+} from '@ottocode/sdk';
+const MAX_SKILLS = 8;
+const MAX_MCP_SERVERS = 8;
+const MAX_MCP_TOOLS_PER_SERVER = 3;
+export type CapabilitySummaryMCPTool = {
+	name: string;
+	server: string;
+	description?: string;
+};
+export type CapabilitySummaryResult = {
+	prompt: string;
+	components: string[];
+};
+/**
+ * Build a compact prompt block that advertises available skills and started MCP
+ * servers without inlining their full instructions or tool catalogs.
+ */
+export function buildCapabilitySummary(options?: {
+	skillSettings?: OttoConfig['skills'];
+	skills?: DiscoveredSkill[];
+	mcpTools?: CapabilitySummaryMCPTool[];
+}): CapabilitySummaryResult {
+	const skillLines = buildSkillLines(options?.skills, options?.skillSettings);
+	const mcpLines = buildMCPLines(options?.mcpTools);
+	const components = ['capabilities'];
+	const sections: string[] = [];
+	if (skillLines.length > 0) {
+		sections.push(['Skills:', ...skillLines].join('\n'));
+		components.push('capabilities:skills');
+	}
+	if (mcpLines.length > 0) {
+		sections.push(['Started MCP capabilities:', ...mcpLines].join('\n'));
+		components.push('capabilities:mcp');
+	}
+	if (sections.length === 0) {
+		return { prompt: '', components: [] };
+	}
+	const prompt = [
+		'<optional-capabilities>',
+		'You have additional capabilities available when they may help with the task.',
+		'Use them proactively when relevant, but do not load or call them unnecessarily.',
+		'',
+		sections.join('\n\n'),
+		'',
+		'When one of these capabilities may help, prefer using it instead of ignoring it.',
+		'</optional-capabilities>',
+	].join('\n');
+	return { prompt, components };
+}
+function buildSkillLines(
+	providedSkills: DiscoveredSkill[] | undefined,
+	skillSettings: OttoConfig['skills'] | undefined,
+): string[] {
+	const skills = filterDiscoveredSkills(
+		providedSkills ?? getDiscoveredSkills(),
+		skillSettings,
+	);
+	const seen = new Set<string>();
+	const unique: DiscoveredSkill[] = [];
+	for (const skill of skills) {
+		const name = skill.name.trim();
+		if (!name || seen.has(name)) continue;
+		seen.add(name);
+		unique.push(skill);
+	}
+	unique.sort((a, b) => a.name.localeCompare(b.name));
+	const visible = unique.slice(0, MAX_SKILLS).map((skill) => {
+		const summary = finalizeSentence(summarizeDescription(skill.description));
+		const description = summary || 'Task-specific instructions and guidance';
+		return `- ${skill.name}: ${description}. Load with \`skill\` when it matches the task.`;
+	});
+	const remaining = unique.length - visible.length;
+	if (remaining > 0) {
+		visible.push(
+			`- ${remaining} more skill${remaining === 1 ? '' : 's'} available via \`skill\`.`,
+		);
+	}
+	return visible;
+}
+function buildMCPLines(
+	providedMCPTools: CapabilitySummaryMCPTool[] | undefined,
+): string[] {
+	const tools = providedMCPTools ?? getLiveMCPTools();
+	if (tools.length === 0) return [];
+	const grouped = new Map<string, CapabilitySummaryMCPTool[]>();
+	for (const tool of tools) {
+		const list = grouped.get(tool.server) ?? [];
+		list.push(tool);
+		grouped.set(tool.server, list);
+	}
+	const servers = Array.from(grouped.entries()).sort(([a], [b]) =>
+		a.localeCompare(b),
+	);
+	const visible = servers
+		.slice(0, MAX_MCP_SERVERS)
+		.map(([server, serverTools]) => {
+			const summary = summarizeMCPServer(server, serverTools);
+			return `- ${server}: ${summary}. Load relevant tools with \`load_mcp_tools\` when the task may benefit from them.`;
+		});
+	const remaining = servers.length - visible.length;
+	if (remaining > 0) {
+		visible.push(
+			`- ${remaining} more started MCP server${remaining === 1 ? '' : 's'} available via \`load_mcp_tools\`.`,
+		);
+	}
+	return visible;
+}
+function getLiveMCPTools(): CapabilitySummaryMCPTool[] {
+	const manager = getMCPManager();
+	if (!manager?.started) return [];
+	return manager.getTools().map(({ name, server, tool }) => ({
+		name,
+		server,
+		description: tool.description,
+	}));
+}
+function summarizeMCPServer(
+	server: string,
+	tools: CapabilitySummaryMCPTool[],
+): string {
+	const namedTools = dedupeStrings(
+		tools
+			.map((tool) => stripServerPrefix(tool.name, server))
+			.filter((name) => name.length > 0),
+	);
+	const representativeNames = namedTools.slice(0, MAX_MCP_TOOLS_PER_SERVER);
+	const descriptiveText = dedupeStrings(
+		tools
+			.map((tool) => tool.description?.trim() ?? '')
+			.filter((description) => description.length > 0)
+			.map((description) => description.replace(/^MCP tool:\s*/i, '')),
+	).map((description) => finalizeSentence(description));
+	const summaryFromDescription = descriptiveText.find(
+		(description) => description.length > 0,
+	);
+	if (summaryFromDescription) {
+		if (representativeNames.length === 0) {
+			return summaryFromDescription;
+		}
+		return `${summaryFromDescription}; tools include ${representativeNames.join(', ')}`;
+	}
+	if (representativeNames.length === 0) {
+		return `external tools exposed by the ${server} MCP server`;
+	}
+	return `external ${server} tools such as ${representativeNames.join(', ')}`;
+}
+function stripServerPrefix(name: string, server: string): string {
+	const prefix = `${server}__`;
+	return name.startsWith(prefix) ? name.slice(prefix.length) : name;
+}
+function finalizeSentence(value: string): string {
+	const normalized = value.replace(/\s+/g, ' ').trim();
+	if (!normalized) return '';
+	return normalized.replace(/[.!?;:,\s]+$/g, '');
+}
+function dedupeStrings(values: string[]): string[] {
+	const seen = new Set<string>();
+	const out: string[] = [];
+	for (const value of values) {
+		const normalized = value.trim();
+		if (!normalized || seen.has(normalized)) continue;
+		seen.add(normalized);
+		out.push(normalized);
+	}
+	return out;
+}