npm - @codexstar/pi-listen - Versions diffs - 1.0.4 - Mend

@codexstar/pi-listen 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/LICENSE +21 -0
package/README.md +283 -0
package/daemon.py +517 -0
package/docs/API.md +273 -0
package/docs/ARCHITECTURE.md +114 -0
package/docs/backends.md +196 -0
package/docs/plans/2026-03-12-pi-voice-master-plan.md +613 -0
package/docs/plans/2026-03-12-pi-voice-model-aware-execution-plan.md +256 -0
package/docs/plans/2026-03-12-pi-voice-onboarding-remediation-plan.md +391 -0
package/docs/plans/pi-voice-model-aware-review.md +196 -0
package/docs/plans/pi-voice-model-detection-qa-plan.md +226 -0
package/docs/plans/pi-voice-model-detection-research.md +483 -0
package/docs/plans/pi-voice-onboarding-ux-plan.md +388 -0
package/docs/plans/pi-voice-release-validation-plan.md +386 -0
package/docs/plans/pi-voice-remaining-implementation-plan.md +524 -0
package/docs/plans/pi-voice-review-findings.md +227 -0
package/docs/plans/pi-voice-technical-remediation-plan.md +613 -0
package/docs/qa-matrix.md +69 -0
package/docs/qa-results.md +357 -0
package/docs/troubleshooting.md +265 -0
package/extensions/voice/config.ts +206 -0
package/extensions/voice/diagnostics.ts +212 -0
package/extensions/voice/install.ts +62 -0
package/extensions/voice/onboarding.ts +315 -0
package/extensions/voice.ts +1149 -0
package/package.json +48 -0
package/scripts/setup-macos.sh +374 -0
package/scripts/setup-windows.ps1 +271 -0
package/transcribe.py +497 -0

package/extensions/voice/config.ts ADDED Viewed

@@ -0,0 +1,206 @@
+import { getAgentDir } from "@mariozechner/pi-coding-agent";
+import * as crypto from "node:crypto";
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+export const SETTINGS_KEY = "voice";
+export const VOICE_CONFIG_VERSION = 2;
+export type VoiceBackend = "faster-whisper" | "moonshine" | "whisper-cpp" | "deepgram" | "parakeet" | "auto";
+export type VoiceMode = "auto" | "local" | "api";
+export type VoiceSettingsScope = "global" | "project";
+export type VoiceConfigSource = VoiceSettingsScope | "default";
+export interface VoiceOnboardingState {
+	completed: boolean;
+	schemaVersion: number;
+	completedAt?: string;
+	lastValidatedAt?: string;
+	source?: "first-run" | "setup-command" | "migration" | "repair";
+	skippedAt?: string;
+}
+export interface VoiceConfig {
+	version: number;
+	enabled: boolean;
+	language: string;
+	mode: VoiceMode;
+	backend: VoiceBackend;
+	model: string;
+	scope: VoiceSettingsScope;
+	btwEnabled: boolean;
+	onboarding: VoiceOnboardingState;
+}
+export interface LoadedVoiceConfig {
+	config: VoiceConfig;
+	source: VoiceConfigSource;
+	globalSettingsPath: string;
+	projectSettingsPath: string;
+}
+export interface ConfigPathOptions {
+	agentDir?: string;
+}
+export interface SocketPathOptions {
+	scope: VoiceSettingsScope;
+	cwd: string;
+	backend: VoiceBackend;
+	model: string;
+}
+export const DEFAULT_CONFIG: VoiceConfig = {
+	version: VOICE_CONFIG_VERSION,
+	enabled: true,
+	language: "en",
+	mode: "auto",
+	backend: "auto",
+	model: "small",
+	scope: "global",
+	btwEnabled: true,
+	onboarding: {
+		completed: false,
+		schemaVersion: VOICE_CONFIG_VERSION,
+	},
+};
+export function readJsonFile(filePath: string): any {
+	try {
+		if (!fs.existsSync(filePath)) return {};
+		return JSON.parse(fs.readFileSync(filePath, "utf8"));
+	} catch {
+		return {};
+	}
+}
+export function getGlobalSettingsPath(options: ConfigPathOptions = {}): string {
+	return path.join(options.agentDir ?? getAgentDir(), "settings.json");
+}
+export function getProjectSettingsPath(cwd: string): string {
+	return path.join(cwd, ".pi", "settings.json");
+}
+function inferMode(backend: VoiceBackend): VoiceMode {
+	if (backend === "deepgram") return "api";
+	if (backend === "auto") return "auto";
+	return "local";
+}
+function normalizeOnboarding(input: any, fallbackCompleted: boolean): VoiceOnboardingState {
+	const completed = typeof input?.completed === "boolean" ? input.completed : fallbackCompleted;
+	return {
+		completed,
+		schemaVersion: Number.isFinite(input?.schemaVersion) ? Number(input.schemaVersion) : VOICE_CONFIG_VERSION,
+		completedAt: typeof input?.completedAt === "string" ? input.completedAt : undefined,
+		lastValidatedAt: typeof input?.lastValidatedAt === "string" ? input.lastValidatedAt : undefined,
+		source: typeof input?.source === "string" ? input.source : fallbackCompleted ? "migration" : undefined,
+		skippedAt: typeof input?.skippedAt === "string" ? input.skippedAt : undefined,
+	};
+}
+function migrateConfig(rawVoice: any, source: VoiceConfigSource): VoiceConfig {
+	if (!rawVoice || typeof rawVoice !== "object") {
+		return structuredClone(DEFAULT_CONFIG);
+	}
+	const backend = (rawVoice.backend ?? DEFAULT_CONFIG.backend) as VoiceBackend;
+	const hasMeaningfulLegacySetup =
+		(typeof rawVoice.backend === "string" && typeof rawVoice.model === "string") ||
+		rawVoice.onboarding?.completed === true;
+	const fallbackCompleted = hasMeaningfulLegacySetup;
+	return {
+		version: VOICE_CONFIG_VERSION,
+		enabled: typeof rawVoice.enabled === "boolean" ? rawVoice.enabled : DEFAULT_CONFIG.enabled,
+		language: typeof rawVoice.language === "string" ? rawVoice.language : DEFAULT_CONFIG.language,
+		mode: (rawVoice.mode as VoiceMode | undefined) ?? inferMode(backend),
+		backend,
+		model: typeof rawVoice.model === "string" ? rawVoice.model : DEFAULT_CONFIG.model,
+		scope: (rawVoice.scope as VoiceSettingsScope | undefined) ?? (source === "project" ? "project" : "global"),
+		btwEnabled: typeof rawVoice.btwEnabled === "boolean" ? rawVoice.btwEnabled : DEFAULT_CONFIG.btwEnabled,
+		onboarding: normalizeOnboarding(rawVoice.onboarding, fallbackCompleted),
+	};
+}
+export function loadConfigWithSource(cwd: string, options: ConfigPathOptions = {}): LoadedVoiceConfig {
+	const globalSettingsPath = getGlobalSettingsPath(options);
+	const projectSettingsPath = getProjectSettingsPath(cwd);
+	const globalVoice = readJsonFile(globalSettingsPath)[SETTINGS_KEY];
+	const projectVoice = readJsonFile(projectSettingsPath)[SETTINGS_KEY];
+	if (projectVoice && typeof projectVoice === "object") {
+		return {
+			config: migrateConfig(projectVoice, "project"),
+			source: "project",
+			globalSettingsPath,
+			projectSettingsPath,
+		};
+	}
+	if (globalVoice && typeof globalVoice === "object") {
+		return {
+			config: migrateConfig(globalVoice, "global"),
+			source: "global",
+			globalSettingsPath,
+			projectSettingsPath,
+		};
+	}
+	return {
+		config: structuredClone(DEFAULT_CONFIG),
+		source: "default",
+		globalSettingsPath,
+		projectSettingsPath,
+	};
+}
+function serializeConfig(config: VoiceConfig, scope: VoiceSettingsScope): VoiceConfig {
+	return {
+		...config,
+		scope,
+		onboarding: {
+			...config.onboarding,
+			schemaVersion: VOICE_CONFIG_VERSION,
+		},
+	};
+}
+export function saveConfig(
+	config: VoiceConfig,
+	scope: VoiceSettingsScope,
+	cwd: string,
+	options: ConfigPathOptions = {},
+): string {
+	const settingsPath = scope === "project" ? getProjectSettingsPath(cwd) : getGlobalSettingsPath(options);
+	const settings = readJsonFile(settingsPath);
+	settings[SETTINGS_KEY] = serializeConfig(config, scope);
+	fs.mkdirSync(path.dirname(settingsPath), { recursive: true });
+	fs.writeFileSync(settingsPath, JSON.stringify(settings, null, 2) + "\n");
+	return settingsPath;
+}
+export function needsOnboarding(config: VoiceConfig, source: VoiceConfigSource): boolean {
+	const skippedAt = config.onboarding.skippedAt ? Date.parse(config.onboarding.skippedAt) : Number.NaN;
+	const deferWindowMs = 1000 * 60 * 60 * 24;
+	const recentlyDeferred = Number.isFinite(skippedAt) && Date.now() - skippedAt < deferWindowMs;
+	if (recentlyDeferred) return false;
+	if (source === "default") return true;
+	return !config.onboarding.completed;
+}
+export function getSocketPath(options: SocketPathOptions): string {
+	const fingerprint = crypto
+		.createHash("sha1")
+		.update(JSON.stringify({
+			scope: options.scope,
+			cwd: options.scope === "project" ? path.resolve(options.cwd) : "global",
+			backend: options.backend,
+			model: options.model,
+		}))
+		.digest("hex")
+		.slice(0, 12);
+	return path.join(os.tmpdir(), `pi-voice-${fingerprint}.sock`);
+}

package/extensions/voice/diagnostics.ts ADDED Viewed

@@ -0,0 +1,212 @@
+import { spawnSync } from "node:child_process";
+export type DiagnosticsPreference = "balanced" | "speed" | "privacy" | "accuracy" | "low-resource";
+export interface BackendAvailability {
+	name: "faster-whisper" | "moonshine" | "whisper-cpp" | "deepgram" | "parakeet";
+	available: boolean;
+	type: "local" | "cloud";
+	default_model: string;
+	models: string[];
+	installed_models?: string[];
+	install_detection?: string;
+	install?: string | null;
+}
+export interface EnvironmentDiagnostics {
+	hasPython: boolean;
+	hasSox: boolean;
+	hasHomebrew: boolean;
+	hasDeepgramKey: boolean;
+	backends: BackendAvailability[];
+	issues: string[];
+}
+export interface VoiceRecommendation {
+	mode: "local" | "api";
+	backend: BackendAvailability["name"];
+	model: string;
+	reason: string;
+	fixableIssues: string[];
+}
+export type ModelReadiness = "installed" | "download required" | "unknown" | "api";
+function commandExists(command: string): boolean {
+	return spawnSync("which", [command], { stdio: "pipe", timeout: 3000 }).status === 0;
+}
+export function scanEnvironment(transcribeScriptPath: string): EnvironmentDiagnostics {
+	const hasPython = commandExists("python3");
+	const hasSox = commandExists("rec");
+	const hasHomebrew = commandExists("brew");
+	const hasDeepgramKey = Boolean(process.env.DEEPGRAM_API_KEY);
+	let backends: BackendAvailability[] = [];
+	if (hasPython) {
+		const result = spawnSync("python3", [transcribeScriptPath, "--list-backends"], {
+			stdio: ["pipe", "pipe", "pipe"],
+			timeout: 10000,
+			encoding: "utf8",
+		});
+		try {
+			backends = JSON.parse(result.stdout || "[]") as BackendAvailability[];
+		} catch {
+			backends = [];
+		}
+	}
+	const issues: string[] = [];
+	if (!hasPython) issues.push("python3 is required for all STT backends");
+	if (!hasSox) issues.push("Install SoX for microphone recording");
+	if (!backends.some((backend) => backend.available)) {
+		issues.push("No STT backend is currently installed or configured");
+	}
+	if (!hasDeepgramKey) {
+		issues.push("Deepgram API key is not configured");
+	}
+	return {
+		hasPython,
+		hasSox,
+		hasHomebrew,
+		hasDeepgramKey,
+		backends,
+		issues,
+	};
+}
+function getBackend(backends: BackendAvailability[], name: BackendAvailability["name"]): BackendAvailability | undefined {
+	return backends.find((backend) => backend.name === name);
+}
+export function getModelReadiness(backend: BackendAvailability | undefined, model: string): ModelReadiness {
+	if (!backend) return "unknown";
+	if (backend.type === "cloud") return "api";
+	if ((backend.installed_models ?? []).includes(model)) return "installed";
+	const highConfidenceDetectors = new Set(["huggingface-cache", "whisper-cpp-model-paths"]);
+	if (backend.available && highConfidenceDetectors.has(backend.install_detection ?? "")) {
+		return "download required";
+	}
+	return "unknown";
+}
+function getPreferredInstalledModel(backend: BackendAvailability | undefined): string | undefined {
+	if (!backend) return undefined;
+	const installed = backend.installed_models ?? [];
+	if (installed.includes(backend.default_model)) return backend.default_model;
+	return installed[0];
+}
+const HIGH_CONFIDENCE_DETECTORS = new Set(["huggingface-cache", "whisper-cpp-model-paths"]);
+function getDetectorConfidenceKey(backend: BackendAvailability): string {
+	if (backend.install_detection) return backend.install_detection;
+	if (backend.name === "faster-whisper") return "huggingface-cache";
+	if (backend.name === "whisper-cpp") return "whisper-cpp-model-paths";
+	return "";
+}
+function hasHighConfidenceInstalledModel(backend: BackendAvailability): boolean {
+	return (backend.installed_models?.length ?? 0) > 0 && HIGH_CONFIDENCE_DETECTORS.has(getDetectorConfidenceKey(backend));
+}
+function hasHighConfidenceBackendAvailability(backend: BackendAvailability): boolean {
+	return backend.available && HIGH_CONFIDENCE_DETECTORS.has(getDetectorConfidenceKey(backend));
+}
+function getPreferredLocalBackend(backends: BackendAvailability[]): BackendAvailability | undefined {
+	const localBackends = backends.filter((backend) => backend.type === "local");
+	const withHighConfidenceInstalledModel = localBackends.find((backend) => hasHighConfidenceInstalledModel(backend));
+	if (withHighConfidenceInstalledModel) return withHighConfidenceInstalledModel;
+	const highConfidenceAvailable = localBackends.find((backend) => hasHighConfidenceBackendAvailability(backend));
+	if (highConfidenceAvailable) return highConfidenceAvailable;
+	const withHeuristicInstalledModel = localBackends.find((backend) => (backend.installed_models?.length ?? 0) > 0);
+	if (withHeuristicInstalledModel) return withHeuristicInstalledModel;
+	return localBackends.find((backend) => backend.available);
+}
+export function recommendVoiceSetup(
+	diagnostics: EnvironmentDiagnostics,
+	preference: DiagnosticsPreference = "balanced",
+): VoiceRecommendation {
+	const fixableIssues: string[] = [];
+	if (!diagnostics.hasSox) {
+		fixableIssues.push("Install SoX for microphone recording");
+	}
+	if (!diagnostics.hasPython) {
+		fixableIssues.push("Install python3 for local transcription backends");
+	}
+	const fasterWhisper = getBackend(diagnostics.backends, "faster-whisper");
+	const deepgram = getBackend(diagnostics.backends, "deepgram");
+	const preferredLocalBackend = getPreferredLocalBackend(diagnostics.backends);
+	const anyLocalAvailable = diagnostics.backends.some((backend) => backend.type === "local" && backend.available);
+	if (!anyLocalAvailable) {
+		fixableIssues.push("Install a local STT backend such as faster-whisper");
+	}
+	const installedLocalModel = getPreferredInstalledModel(preferredLocalBackend);
+	const installedModelIsHighConfidence = preferredLocalBackend ? hasHighConfidenceInstalledModel(preferredLocalBackend) : false;
+	const localRecommendation = {
+		mode: "local" as const,
+		backend: preferredLocalBackend?.name ?? "faster-whisper",
+		model: installedLocalModel ?? preferredLocalBackend?.default_model ?? fasterWhisper?.default_model ?? "small",
+		reason: installedLocalModel && installedModelIsHighConfidence
+			? `Recommended local default because ${installedLocalModel} is already installed and ready to configure.`
+			: preference === "privacy"
+				? "Best for privacy and offline use with a strong local default."
+				: preference === "accuracy"
+					? "Best balance of local quality and maturity."
+					: "Recommended local default with good balance of quality and setup effort.",
+		fixableIssues,
+	};
+	if (preference === "speed" && diagnostics.hasDeepgramKey && deepgram?.available) {
+		return {
+			mode: "api",
+			backend: "deepgram",
+			model: deepgram.default_model,
+			reason: "The fastest path to a working setup because cloud transcription is already configured.",
+			fixableIssues,
+		};
+	}
+	if (preference === "privacy") {
+		return {
+			...localRecommendation,
+			reason: "Best for privacy and offline use without sending audio to a cloud API.",
+		};
+	}
+	if (preference === "balanced") {
+		if (preferredLocalBackend?.available && (hasHighConfidenceInstalledModel(preferredLocalBackend) || hasHighConfidenceBackendAvailability(preferredLocalBackend) || !(diagnostics.hasDeepgramKey && deepgram?.available))) {
+			return localRecommendation;
+		}
+		if (diagnostics.hasDeepgramKey && deepgram?.available) {
+			return {
+				mode: "api",
+				backend: "deepgram",
+				model: deepgram.default_model,
+				reason: "Recommended because cloud transcription is already configured and ready to use.",
+				fixableIssues,
+			};
+		}
+	}
+	if (preference === "low-resource") {
+		const moonshine = getBackend(diagnostics.backends, "moonshine");
+		if (moonshine?.available) {
+			return {
+				mode: "local",
+				backend: "moonshine",
+				model: moonshine.default_model,
+				reason: "Lightweight local option with lower resource requirements.",
+				fixableIssues,
+			};
+		}
+	}
+	return localRecommendation;
+}

package/extensions/voice/install.ts ADDED Viewed

@@ -0,0 +1,62 @@
+import type { VoiceConfig } from "./config";
+import type { EnvironmentDiagnostics } from "./diagnostics";
+export interface ProvisioningPlan {
+	ready: boolean;
+	summary: string;
+	commands: string[];
+	manualSteps: string[];
+}
+export function buildProvisioningPlan(config: VoiceConfig, diagnostics: EnvironmentDiagnostics): ProvisioningPlan {
+	const commands: string[] = [];
+	const manualSteps: string[] = [];
+	if (!diagnostics.hasSox) {
+		commands.push("brew install sox");
+	}
+	if (config.mode === "api") {
+		if (config.backend === "deepgram" && !diagnostics.hasDeepgramKey) {
+			manualSteps.push("Set DEEPGRAM_API_KEY before using Deepgram API mode");
+		}
+	} else {
+		if (!diagnostics.hasPython) {
+			manualSteps.push("Install python3 before enabling local STT backends");
+		}
+		const selectedBackend = diagnostics.backends.find((backend) => backend.name === config.backend);
+		if (!selectedBackend?.available) {
+			switch (config.backend) {
+				case "faster-whisper":
+					commands.push("python3 -m pip install faster-whisper");
+					break;
+				case "moonshine":
+					commands.push("python3 -m pip install 'useful-moonshine[onnx]'");
+					break;
+				case "whisper-cpp":
+					commands.push("brew install whisper-cpp");
+					break;
+				case "parakeet":
+					commands.push("python3 -m pip install 'nemo_toolkit[asr]'");
+					break;
+				default:
+					manualSteps.push(`Install the selected backend: ${config.backend}`);
+			}
+		} else if (!(selectedBackend.installed_models ?? []).includes(config.model)) {
+			const highConfidenceDetectors = new Set(["huggingface-cache", "whisper-cpp-model-paths"]);
+			if (highConfidenceDetectors.has(selectedBackend.install_detection ?? "")) {
+				manualSteps.push(`Selected model ${config.model} is not installed yet and may need to be downloaded on first use`);
+			} else {
+				manualSteps.push(`Selected model ${config.model} could not be confirmed locally and may still need a download on first use`);
+			}
+		}
+	}
+	return {
+		ready: commands.length === 0 && manualSteps.length === 0,
+		summary: config.mode === "api" ? "Provisioning plan for api mode" : "Provisioning plan for local mode",
+		commands,
+		manualSteps,
+	};
+}