npm - @rajnandan1/atticus - Versions diffs - 1.0.0 - Mend

@rajnandan1/atticus 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,510 @@
+import { RealtimeItem } from '@openai/agents/realtime';
+/**
+ * Configuration for the AI agent's behavior and personality.
+ */
+interface AgentConfig {
+    /** The name of the agent (used for display/logging purposes) */
+    name: string;
+    /** System instructions that define the agent's behavior and personality */
+    instructions: string;
+}
+/**
+ * D2Snap options for DOM compression.
+ */
+interface D2SnapOptions {
+    /** Maximum tokens for adaptive compression @default 4096 */
+    maxTokens?: number;
+    /** Assign unique IDs to interactive elements @default true */
+    assignUniqueIDs?: boolean;
+}
+/**
+ * Configuration for UI awareness.
+ */
+interface UIConfig {
+    /**
+     * Whether to enable UI-aware interactions.
+     * When enabled, the agent can understand and interact with the DOM.
+     */
+    enabled: boolean;
+    /**
+     * The root DOM element to capture.
+     * The library will use element.innerHTML and compress it with d2snap.
+     *
+     * @example document.body
+     * @example document.getElementById('app')
+     */
+    rootElement: Element;
+    /**
+     * D2Snap options for DOM compression.
+     */
+    d2SnapOptions?: D2SnapOptions;
+    /**
+     * Whether to automatically update DOM context periodically.
+     * @default false
+     */
+    autoUpdate?: boolean;
+    /**
+     * Interval in milliseconds for auto-updating DOM context.
+     * Only used if autoUpdate is true.
+     * @default 5000
+     */
+    autoUpdateInterval?: number;
+}
+/**
+ * Available voice options for the agent.
+ * These are OpenAI's text-to-speech voices.
+ */
+type AtticusVoice = "alloy" | "ash" | "ballad" | "coral" | "echo" | "sage" | "shimmer" | "verse";
+/**
+ * Configuration options for Atticus Voice Agent.
+ */
+interface AtticusConfig {
+    /**
+     * The OpenAI client secret (ephemeral key) for the Realtime API.
+     * Obtain this from your backend server.
+     *
+     * @example 'ek_...'
+     */
+    clientSecret: string;
+    /**
+     * Configuration for the AI agent.
+     */
+    agent: AgentConfig;
+    /**
+     * The voice to use for the agent's speech.
+     * Available voices: 'alloy', 'ash', 'ballad', 'coral', 'echo', 'sage', 'shimmer', 'verse'
+     * @default 'alloy'
+     */
+    voice?: AtticusVoice;
+    /**
+     * The language for the voice conversation.
+     * The agent will speak and understand this language.
+     *
+     * @example 'en' (English)
+     * @example 'hi' (Hindi)
+     * @example 'es' (Spanish)
+     * @example 'fr' (French)
+     * @default 'en'
+     */
+    language?: string;
+    /**
+     * The OpenAI model to use for realtime conversations.
+     * @default 'gpt-4o-realtime-preview'
+     */
+    model?: string;
+    /**
+     * Whether to automatically send a greeting message when connected.
+     * @default true
+     */
+    autoGreet?: boolean;
+    /**
+     * The initial message to send when connected (if autoGreet is true).
+     * @default 'Hello!'
+     */
+    greetingMessage?: string;
+    /**
+     * Enable debug logging.
+     * @default false
+     */
+    debug?: boolean;
+    /**
+     * If true, UI actions will not be automatically executed.
+     * The 'action' event will still be emitted for you to handle manually.
+     * @default false
+     */
+    doNotExecuteActions?: boolean;
+    /**
+     * Configuration for UI-aware interactions.
+     * When enabled, the agent can understand and interact with the page's DOM.
+     */
+    ui?: UIConfig;
+}
+/**
+ * The connection status of the voice agent.
+ *
+ * - `idle`: Not connected, ready to connect
+ * - `connecting`: Currently establishing connection
+ * - `connected`: Successfully connected and ready for conversation
+ * - `error`: Connection failed or encountered an error
+ */
+type AtticusStatus = "idle" | "connecting" | "connected" | "error";
+/**
+ * The current state of the conversation.
+ *
+ * - `idle`: No active conversation
+ * - `ai_speaking`: The AI assistant is currently speaking
+ * - `user_turn`: Waiting for the user to speak
+ * - `user_speaking`: The user is currently speaking
+ */
+type ConversationState = "idle" | "ai_speaking" | "user_turn" | "user_speaking";
+/**
+ * The complete state of the voice agent at any given moment.
+ */
+interface AtticusState {
+    /** Current connection status */
+    status: AtticusStatus;
+    /** Current conversation state */
+    conversationState: ConversationState;
+    /** Error message if status is 'error', null otherwise */
+    error: string | null;
+    /** Conversation history */
+    history: Message[];
+    /** Whether the agent is currently connected */
+    isConnected: boolean;
+    /** Whether the AI is currently speaking */
+    isAiSpeaking: boolean;
+    /** Whether the user is currently speaking */
+    isUserSpeaking: boolean;
+}
+/**
+ * Content types that can appear in a message.
+ */
+type MessageContent = {
+    type: "text";
+    text: string;
+} | {
+    type: "audio";
+    transcript: string | null;
+};
+/**
+ * A parsed message from the conversation history.
+ */
+interface Message {
+    /** Unique identifier for this message */
+    id: string;
+    /** Who sent this message */
+    role: "user" | "assistant";
+    /** The content of the message */
+    content: MessageContent;
+    /** The original raw item from the API */
+    raw: RealtimeItem;
+    /** Timestamp when this message was created */
+    timestamp: Date;
+}
+/**
+ * A UI action requested by the agent.
+ * Contains both the spoken response and executable code.
+ */
+interface UIAction {
+    /** Unique identifier for this action */
+    id: string;
+    /** The text the agent spoke (explanation of the action) */
+    outputText: string;
+    /** JavaScript code to execute the UI interaction */
+    outputCode: string | null;
+    /** Description of what the code does */
+    actionDescription: string | null;
+    /** The element selector/identifier being targeted */
+    targetElement: string | null;
+    /** Type of action (click, type, scroll, etc.) */
+    actionType: UIActionType | null;
+    /** Timestamp when this action was created */
+    timestamp: Date;
+}
+/**
+ * Types of UI actions the agent can perform.
+ */
+type UIActionType = "click" | "type" | "scroll" | "focus" | "hover" | "select" | "navigate" | "read" | "other";
+/**
+ * All events emitted by Atticus.
+ *
+ * Subscribe to these events using `atticus.on(eventName, callback)`.
+ */
+interface AtticusEvents {
+    /**
+     * Emitted when the connection status changes.
+     * @param status - The new status
+     */
+    statusChange: (status: AtticusStatus) => void;
+    /**
+     * Emitted when the conversation state changes.
+     * @param state - The new conversation state
+     */
+    conversationStateChange: (state: ConversationState) => void;
+    /**
+     * Emitted when an error occurs.
+     * @param error - The error message
+     */
+    error: (error: string) => void;
+    /**
+     * Emitted when a new message is added to the history.
+     * @param message - The new message
+     */
+    message: (message: Message) => void;
+    /**
+     * Emitted when the conversation history is updated.
+     * @param history - The complete conversation history
+     */
+    historyChange: (history: Message[]) => void;
+    /**
+     * Emitted when the complete state changes.
+     * Useful for frameworks that want a single state object.
+     * @param state - The complete current state
+     */
+    stateChange: (state: AtticusState) => void;
+    /**
+     * Emitted when the agent starts speaking.
+     */
+    agentStart: () => void;
+    /**
+     * Emitted when the agent stops speaking (response generation ended).
+     */
+    agentEnd: () => void;
+    /**
+     * Emitted when audio playback starts.
+     */
+    audioStart: () => void;
+    /**
+     * Emitted when audio playback ends.
+     * This is when the user can start speaking.
+     */
+    audioEnd: () => void;
+    /**
+     * Emitted when user audio is detected.
+     */
+    userAudio: () => void;
+    /**
+     * Emitted when successfully connected.
+     */
+    connected: () => void;
+    /**
+     * Emitted when disconnected.
+     */
+    disconnected: () => void;
+    /**
+     * Emitted when the agent requests a UI action.
+     * The developer should execute the code or handle the action.
+     * @param action - The UI action to perform
+     */
+    action: (action: UIAction) => void;
+}
+/**
+ * Event names for Atticus.
+ */
+type AtticusEventName = keyof AtticusEvents;
+/**
+ * Map of language codes to their full names.
+ */
+declare const LANGUAGE_NAMES: Record<string, string>;
+/**
+ * Languages officially supported by OpenAI's transcription API.
+ * Only these language codes can be passed to inputAudioTranscription.language
+ */
+declare const SUPPORTED_TRANSCRIPTION_LANGUAGES: Set<string>;
+/**
+ * Native greetings for each supported language.
+ */
+declare const LANGUAGE_GREETINGS: Record<string, string>;
+/**
+ * Get the full name of a language from its code.
+ * @param code - Language code (e.g., 'en', 'hi')
+ * @returns The full language name or the code if not found
+ */
+declare function getLanguageName(code: string): string;
+/**
+ * Get the native greeting for a language.
+ * @param code - Language code (e.g., 'en', 'hi')
+ * @returns The native greeting or "Hello!" if not found
+ */
+declare function getLanguageGreeting(code: string): string;
+/**
+ * Check if a language is supported for transcription.
+ * @param code - Language code to check
+ * @returns True if the language is supported for transcription
+ */
+declare function isTranscriptionSupported(code: string): boolean;
+/**
+ * Atticus - A framework-agnostic voice agent for voice-controlled UI interactions.
+ *
+ * @example
+ * ```ts
+ * import { Atticus } from 'atticus';
+ *
+ * const agent = new Atticus({
+ *   clientSecret: 'ek_...',
+ *   agent: {
+ *     name: 'Assistant',
+ *     instructions: 'You are a helpful assistant.'
+ *   }
+ * });
+ *
+ * agent.on('connected', () => console.log('Connected!'));
+ * agent.on('message', (msg) => console.log('Message:', msg));
+ *
+ * await agent.connect();
+ * ```
+ */
+declare class Atticus {
+    private config;
+    private agent;
+    private session;
+    private listeners;
+    private _status;
+    private _conversationState;
+    private _error;
+    private _history;
+    private _messageIdCounter;
+    private _actionIdCounter;
+    private _currentDOM;
+    private _autoUpdateTimer;
+    /**
+     * Create a new Atticus instance.
+     *
+     * @param config - Configuration options
+     */
+    constructor(config: AtticusConfig);
+    private createGetUIStateTool;
+    private createUIActionTool;
+    private buildInstructions;
+    private getLanguageDirective;
+    private getUIDirective;
+    /**
+     * Subscribe to an event.
+     *
+     * @param event - The event name
+     * @param callback - The callback function
+     * @returns A function to unsubscribe
+     */
+    on<T extends AtticusEventName>(event: T, callback: AtticusEvents[T]): () => void;
+    /**
+     * Subscribe to an event for one invocation only.
+     *
+     * @param event - The event name
+     * @param callback - The callback function
+     * @returns A function to unsubscribe
+     */
+    once<T extends AtticusEventName>(event: T, callback: AtticusEvents[T]): () => void;
+    /**
+     * Unsubscribe from an event.
+     *
+     * @param event - The event name
+     * @param callback - The callback function to remove
+     */
+    off<T extends AtticusEventName>(event: T, callback: AtticusEvents[T]): void;
+    /**
+     * Remove all event listeners.
+     */
+    removeAllListeners(): void;
+    /**
+     * Get the current connection status.
+     */
+    get status(): AtticusStatus;
+    /**
+     * Get the current conversation state.
+     */
+    get conversationState(): ConversationState;
+    /**
+     * Get the current error message (if any).
+     */
+    get error(): string | null;
+    /**
+     * Get the conversation history.
+     */
+    get history(): Message[];
+    /**
+     * Check if the agent is currently connected.
+     */
+    get isConnected(): boolean;
+    /**
+     * Check if the AI is currently speaking.
+     */
+    get isAiSpeaking(): boolean;
+    /**
+     * Check if the user is currently speaking.
+     */
+    get isUserSpeaking(): boolean;
+    /**
+     * Get the configured language code.
+     */
+    get language(): string;
+    /**
+     * Get the complete current state as a single object.
+     */
+    getState(): AtticusState;
+    /**
+     * Connect to the voice agent and start a conversation.
+     *
+     * @throws Error if connection fails
+     */
+    connect(): Promise<void>;
+    /**
+     * Disconnect from the voice agent and end the conversation.
+     */
+    disconnect(): void;
+    /**
+     * Interrupt the AI while it's speaking.
+     */
+    interrupt(): void;
+    /**
+     * Send a text message to the agent.
+     *
+     * @param message - The text message to send
+     */
+    sendMessage(message: string): void;
+    /**
+     * Toggle the connection state.
+     */
+    toggle(): Promise<void>;
+    /**
+     * Destroy the agent instance and clean up all resources.
+     */
+    destroy(): void;
+    /**
+     * Update the DOM context manually.
+     *
+     * @param dom - The DOM Element or HTML string
+     */
+    updateDOM(dom: string | Element): Promise<void>;
+    /**
+     * Refresh the DOM context from the configured root element.
+     */
+    refreshDOM(): Promise<void>;
+    /**
+     * Start auto-updating the DOM context.
+     */
+    startAutoUpdate(): void;
+    /**
+     * Stop auto-updating the DOM context.
+     */
+    stopAutoUpdate(): void;
+    /**
+     * Get the current DOM context.
+     */
+    get currentDOM(): string | null;
+    /**
+     * Check if UI mode is enabled.
+     */
+    get isUIEnabled(): boolean;
+    private setupSessionListeners;
+    private handleHistoryUpdate;
+    private setStatus;
+    private setConversationState;
+    private emitStateChange;
+    private emit;
+    private log;
+    private captureDOM;
+    private sendDOMContext;
+    /**
+     * Get the text content of a message.
+     *
+     * @param message - The message to extract text from
+     * @returns The text content or transcript
+     */
+    getMessageText(message: Message): string;
+    /**
+     * Execute a UI action's code.
+     *
+     * @param action - The UI action to execute
+     * @returns The result of the execution or error
+     */
+    executeAction(action: UIAction): Promise<{
+        success: boolean;
+        result?: unknown;
+        error?: string;
+    }>;
+}
+export { type AgentConfig, Atticus, type AtticusConfig, type AtticusEventName, type AtticusEvents, type AtticusState, type AtticusStatus, type AtticusVoice, type ConversationState, type D2SnapOptions, LANGUAGE_GREETINGS, LANGUAGE_NAMES, type Message, type MessageContent, SUPPORTED_TRANSCRIPTION_LANGUAGES, type UIAction, type UIActionType, type UIConfig, getLanguageGreeting, getLanguageName, isTranscriptionSupported };