npm - @floe-ai/sdk - Versions diffs - 0.1.0-dev.10 - Mend

@floe-ai/sdk 0.1.0-dev.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/LICENSE +36 -0
package/README.md +146 -0
package/dist-sdk/floe-sdk.es.js +18631 -0
package/dist-sdk/floe-sdk.es.js.map +1 -0
package/dist-sdk/floe-sdk.iife.js +918 -0
package/dist-sdk/floe-sdk.iife.js.map +1 -0
package/dist-sdk/floe-sdk.umd.js +918 -0
package/dist-sdk/floe-sdk.umd.js.map +1 -0
package/dist-sdk/index.d.ts +561 -0
package/package.json +74 -0

package/dist-sdk/index.d.ts ADDED Viewed

@@ -0,0 +1,561 @@
+declare interface ContextData {
+    screenshot?: string;
+    url: string;
+    structure: PageStructure;
+    timestamp: number;
+    userProfile?: any;
+}
+declare interface ElementInfo {
+    selector: string;
+    type: string;
+    text?: string;
+    visible: boolean;
+    position: DOMRect;
+    attributes: Record<string, string>;
+}
+declare class EventEmitter {
+    private events;
+    on(event: string, listener: (...args: any[]) => void): this;
+    off(event: string, listener: (...args: any[]) => void): this;
+    emit(event: string, ...args: any[]): boolean;
+}
+/**
+ * Initialize Floe SDK
+ * @param config Configuration options
+ * @returns SDK instance with a `ready` promise for initialization status
+ *
+ * @example
+ * // Fire and forget (errors logged to console)
+ * const sdk = Floe({ clientKey: 'your-key' });
+ *
+ * @example
+ * // Await initialization
+ * const sdk = Floe({ clientKey: 'your-key' });
+ * await sdk.ready;
+ * console.log('SDK initialized!');
+ *
+ * @example
+ * // Handle initialization errors
+ * const sdk = Floe({ clientKey: 'your-key' });
+ * sdk.ready.catch(err => console.error('Init failed:', err));
+ */
+export declare function Floe(config: FloeConfig): FloeSDKInstance;
+/**
+ * Floe SDK Configuration Options
+ */
+export declare interface FloeConfig {
+    /** Your Floe API client key (required) */
+    clientKey: string;
+    /** Floe API URL (defaults to production) */
+    apiUrl?: string;
+    /** Enable screen sharing (default: true) */
+    enableScreenCapture?: boolean;
+    /** Enable voice interaction (default: true) */
+    enableAudio?: boolean;
+    /** User's industry for context */
+    industry?: string;
+    /** User's use case */
+    useCase?: string;
+    /** User's company name */
+    companyName?: string;
+    /** Company size */
+    companySize?: string;
+    /** User's role */
+    role?: string;
+    /** Enable debug logging */
+    debug?: boolean;
+    /** User identification for personalization */
+    userInfo?: FloeUserInfo;
+    /** Skip the welcome modal for returning users */
+    skipOnboardingModal?: boolean;
+}
+/**
+ * Extended SDK instance with ready promise
+ */
+export declare interface FloeSDKInstance extends OnboardingSDK {
+    /** Promise that resolves when SDK is fully initialized */
+    ready: Promise<void>;
+}
+/**
+ * User information for personalization
+ */
+export declare interface FloeUserInfo {
+    externalId?: string;
+    email?: string;
+    name?: string;
+    company?: string;
+    designation?: string;
+    metadata?: Record<string, any>;
+}
+/**
+ * Main Onboarding SDK
+ */
+declare class OnboardingSDK extends EventEmitter {
+    private config;
+    private sessionId;
+    private overlay;
+    private capture;
+    private actionExecutor;
+    private pipecatClient;
+    private transport;
+    private isInitialized;
+    private reactRoot;
+    private reactContainer;
+    private agentState;
+    private pausedSessionId;
+    private pendingResumeSessionId;
+    private botTranscript;
+    private userTranscript;
+    private transcriptHistory;
+    private lastUserTranscript;
+    private isConnected;
+    private connectInFlight;
+    private micEnabled;
+    private userIsSpeaking;
+    private botIsSpeaking;
+    private audioLevels;
+    private _discoveryPopupShown;
+    private _pendingDiscoveryStart;
+    private botAudioAnalyzer;
+    private userAudioAnalyzer;
+    private userMicStream;
+    private audioElements;
+    private pageTracker;
+    private screenShareEnabled;
+    private domIntrospector;
+    private accessibleSnapshot;
+    private fastExecutionMode;
+    private batchedActionExecutor;
+    private navigationCompleteDetector;
+    private _botActionDepth;
+    private endUserStatus;
+    private connectionProgress;
+    private isReturningUser;
+    private _isMinimized;
+    constructor(config: SDKConfig);
+    /**
+     * Generate unique session ID
+     */
+    private generateSessionId;
+    /**
+     * Start screen sharing: sends the tab/window as a WebRTC video track to Pipecat.
+     */
+    private startScreenShare;
+    /**
+     * Initialize the SDK and connect to servers
+     */
+    init(): Promise<void>;
+    /**
+     * Setup audio playback for bot audio tracks with real-time level analysis.
+     * Centralizes all audio element handling, analyzer instantiation, and navigation detector setup.
+     * This is the single source of truth for audio/navigation initialization to prevent duplicates.
+     */
+    private setupAudioPlayback;
+    /**
+     * Setup user microphone analysis for waveform visualization
+     */
+    private setupUserMicAnalysis;
+    /**
+     * Initialize React UI components
+     */
+    private initReactUI;
+    /**
+     * Render React UI components
+     */
+    private renderReactUI;
+    /**
+     * Handle minimize from UI - keeps widget visible but collapsed
+     */
+    private handleMinimize;
+    /**
+     * Handle expand from UI
+     */
+    private handleExpand;
+    /**
+     * Update connection progress during connection
+     */
+    private setConnectionProgress;
+    /**
+     * Clear connection progress
+     */
+    private clearConnectionProgress;
+    /**
+     * Handle connection toggle from UI
+     * Now implements minimize behavior instead of full disconnect
+     */
+    private handleToggleConnection;
+    /**
+     * Disconnect and minimize the UI (instead of removing it)
+     */
+    private disconnectAndMinimize;
+    /**
+     * Connect with progress stages for better UX.
+     *
+     * NOTE: This method is intended ONLY for post-onboarding reconnects (e.g., when a user
+     * clicks the connect button after having already completed the initial onboarding flow).
+     * As such, it intentionally skips onboarding checks that are performed in init():
+     * - checkEndUserStatus() - not needed for reconnects
+     * - shouldSkipOnboardingModal() - onboarding already completed
+     * - welcome popup logic - user already onboarded
+     *
+     * For initial connections, use init() instead, which includes all onboarding logic.
+     */
+    private connectWithProgress;
+    /**
+     * Create Pipecat client callbacks (extracted for reuse)
+     * Contains the FULL callback logic matching init() to ensure all features work
+     */
+    private createPipecatCallbacks;
+    /**
+     * Resume a paused session
+     * Always forces a full reconnect since the bot's pipeline was cancelled during pause
+     */
+    resumeSession(sessionId: string): Promise<void>;
+    /**
+     * Stop/end a paused session (disconnect and clear paused state)
+     */
+    stopSession(): Promise<void>;
+    /**
+     * Handle screen share toggle from UI
+     */
+    private handleToggleScreenShare;
+    /**
+     * Send metadata to bot via Pipecat
+     * Data should include a 'type' field (e.g., 'click', 'page_metadata', 'form_input')
+     */
+    sendMetadata(data: any): void;
+    /**
+     * Send DOM snapshot to server for DOM Introspection Layer
+     * Server will use this for accurate element targeting via vision + DOM correlation
+     */
+    private sendDOMSnapshot;
+    /**
+     * Send accessible snapshot to server (improved LLM-friendly format)
+     *
+     * Key improvements over raw DOM snapshot:
+     * - Only interactive elements (~90% reduction in noise)
+     * - Computed accessible names (W3C accname algorithm)
+     * - Semantic roles and states (selected, checked, disabled)
+     * - Context awareness (row context, section headings)
+     * - Unique refs for easy element identification (B1, C2, T3)
+     */
+    private sendAccessibleSnapshot;
+    /**
+     * Handle user input
+     */
+    private handleUserInput;
+    /**
+     * Handle server messages from bot (via Pipecat ServerMessage events)
+     * Supports both legacy selector-based commands and new hybrid target format
+     *
+     * Message formats supported:
+     * 1. RTVIServerMessageFrame: { data: { type: 'ui_action', action: '...', target: {...} } }
+     * 2. Direct ui_action: { type: 'ui_action', action: '...', target: {...} }
+     * 3. Direct action: { action: '...', target: {...} }
+     * 4. Legacy: { type: 'highlight', selector: '...' }
+     */
+    private handleServerMessage;
+    /**
+     * Handle hybrid UI action from bot
+     * This is the core handler for the Hybrid UI Action System (HSE)
+     *
+     * Actions: highlight, click, type, focus, hover, scroll
+     */
+    private handleHybridAction;
+    /**
+     * Check a precondition and report the result to the server.
+     * Used for conditional task execution (e.g., check if an option exists before selecting it).
+     */
+    private handleCheckPrecondition;
+    /**
+     * Handle selecting multiple options from a dropdown.
+     * This handles the complex flow of:
+     * 1. Opening the dropdown
+     * 2. Clicking each value in selectedValues
+     * 3. Closing the dropdown
+     */
+    private handleMultiSelectAction;
+    /**
+     * Handle clicking a dropdown option when optionText is specified.
+     * This handles the complex flow of:
+     * 1. Opening the dropdown if not already open
+     * 2. Waiting for options to appear
+     * 3. Finding and clicking the actual option
+     */
+    private handleDropdownOptionClick;
+    /**
+     * Find a visible dropdown option by text
+     * Uses STRICT matching to avoid selecting wrong options
+     */
+    private findVisibleDropdownOption;
+    /**
+     * Try to scroll within the dropdown to find an option that might be off-screen.
+     * Returns the element if found after scrolling, null otherwise.
+     */
+    private scrollToDropdownOption;
+    /**
+     * Validate that an element is a valid dropdown element (not SDK UI, visible, clickable)
+     *
+     * @param el - The element to validate
+     * @param requireDropdownAncestor - If true, requires element to have a dropdown-like ancestor (for Strategy 5)
+     * @returns true if the element is valid for dropdown interaction
+     */
+    private isValidDropdownElement;
+    /**
+     * Find the dropdown trigger element (combobox input or select control)
+     *
+     * Priority:
+     * 1. dropdownTriggerSelector - captured CSS selector for the trigger
+     * 2. labelText - find dropdown by its form label (label, div, span, etc.)
+     * 3. Find currently open/focused dropdown on page
+     * 4. Fallback strategies
+     */
+    private findDropdownTrigger;
+    /**
+     * Find dropdown by its label text (searches label, div, span elements)
+     * Works with proper <label> elements AND improper div/span "labels"
+     */
+    private findDropdownByLabel;
+    /**
+     * Get the clickable container for a dropdown input
+     * (The input itself may not be clickable, but its container is)
+     */
+    private getClickableDropdownContainer;
+    /**
+     * Click a dropdown option with proper animation and feedback
+     */
+    private clickDropdownOption;
+    /**
+     * Find collapsed sidebar container that contains the element
+     * Returns the sidebar element if found, null otherwise
+     */
+    private findCollapsedSidebar;
+    /**
+     * Execute a single action quickly without cursor animation
+     * Used for fast mode where we want minimal visual overhead
+     *
+     * @param action The hybrid UI action to execute
+     * @returns true if action succeeded
+     */
+    private executeActionFast;
+    /**
+     * Handle batched navigation request from server
+     * Executes multiple navigation steps quickly without LLM round-trips
+     */
+    private handleBatchedNavigation;
+    /**
+     * Start navigation complete detection
+     * Call this when you want to monitor for page navigation completion
+     */
+    startNavigationDetection(onComplete: (url: string) => void): void;
+    /**
+     * Stop navigation complete detection
+     */
+    stopNavigationDetection(): void;
+    /**
+     * Check if currently in fast execution mode
+     */
+    isInFastMode(): boolean;
+    /**
+     * Extract intent from user input
+     */
+    private extractIntent;
+    /**
+     * Capture current context
+     */
+    captureContext(): Promise<ContextData>;
+    /**
+     * Extract page structure
+     */
+    private extractPageStructure;
+    /**
+     * Get unique selector for element
+     */
+    private getSelector;
+    /**
+     * Get element attributes
+     */
+    private getAttributes;
+    /**
+     * Public API Methods
+     */
+    /**
+     * Send text message
+     */
+    sendText(text: string): Promise<void>;
+    /**
+     * Toggle microphone mute
+     */
+    toggleMute(): boolean;
+    /**
+     * Highlight an element
+     */
+    highlight(selector: string, options?: any): void;
+    /**
+     * Clear highlights
+     */
+    clearHighlight(): void;
+    /**
+     * Show tooltip
+     */
+    showTooltip(text: string, options?: any): void;
+    /**
+     * Hide tooltip
+     */
+    hideTooltip(): void;
+    /**
+     * Execute an action
+     */
+    executeAction(action: UIAction): Promise<boolean>;
+    /**
+     * Take a screenshot
+     */
+    takeScreenshot(options?: ScreenshotOptions): Promise<string>;
+    /**
+     * Get session ID
+     */
+    getSessionId(): string;
+    /**
+     * Get connection status
+     */
+    getStatus(): {
+        initialized: boolean;
+        connected: boolean;
+        sessionId: string;
+        microphoneMuted: boolean;
+    };
+    /**
+     * Start guided discovery mode.
+     *
+     * Sends a message to the bot to begin the discovery flow, which will
+     * ask structured questions to determine the best onboarding plan for the user.
+     *
+     * @returns true if the message was sent successfully
+     */
+    startGuidedDiscovery(): boolean;
+    /**
+     * Skip discovery and let the user state their goal directly.
+     *
+     * This closes the welcome popup and lets the bot handle the conversation
+     * in the standard greeting flow.
+     */
+    skipDiscovery(): void;
+    /**
+     * Show the welcome popup and wait for user choice.
+     * Returns a Promise that resolves to true if user wants discovery.
+     *
+     * Uses Raycast-style dark theme to match other modals.
+     */
+    showWelcomePopupAsync(): Promise<boolean>;
+    /**
+     * Show the welcome popup for new users (legacy sync version).
+     *
+     * Displays a popup with options to start guided discovery or
+     * proceed with the standard flow.
+     * @deprecated Use showWelcomePopupAsync instead
+     */
+    showWelcomePopup(): void;
+    /**
+     * Hide the welcome popup.
+     */
+    hideWelcomePopup(): void;
+    /**
+     * Check if discovery mode should be shown for this user.
+     *
+     * Can be called after connection to determine if the welcome popup
+     * should be displayed. Returns true for new users or users who haven't
+     * completed onboarding.
+     */
+    shouldShowDiscoveryPrompt(): boolean;
+    /**
+     * Check EndUser status from API to determine if onboarding should be skipped.
+     * Called during init() if userInfo is provided.
+     */
+    private checkEndUserStatus;
+    /**
+     * Get EndUser preferences (for qualification pre-fill).
+     * Returns preferences from API if available.
+     */
+    getEndUserPreferences(): Record<string, any> | null;
+    /**
+     * Determine if onboarding modal should be skipped.
+     * Priority: explicit config > API EndUser.isNewUser > localStorage fallback
+     */
+    private shouldSkipOnboardingModal;
+    /**
+     * Mark that the user has visited (used to track new vs returning users).
+     */
+    markUserVisited(): void;
+    /**
+     * Mark onboarding as complete.
+     */
+    markOnboardingComplete(): void;
+    /**
+     * Disconnect and cleanup
+     */
+    disconnect(): Promise<void>;
+}
+declare interface PageStructure {
+    title: string;
+    url: string;
+    elements: ElementInfo[];
+    viewport: {
+        width: number;
+        height: number;
+    };
+}
+declare interface ScreenshotOptions {
+    redact?: boolean;
+    scale?: number;
+    quality?: number;
+    format?: 'webp' | 'jpeg' | 'png';
+}
+declare interface SDKConfig {
+    clientKey: string;
+    apiUrl?: string;
+    enableVideo?: boolean;
+    enableAudio?: boolean;
+    enableScreenCapture?: boolean;
+    redactionPatterns?: Record<string, RegExp>;
+    debug?: boolean;
+    industry?: string;
+    useCase?: string;
+    companyName?: string;
+    companySize?: string;
+    role?: string;
+    userInfo?: UserInfo;
+    skipOnboardingModal?: boolean;
+    enableDiscoveryPopup?: boolean;
+}
+declare interface UIAction {
+    type: 'click' | 'type' | 'select' | 'hover' | 'scroll';
+    selector: string;
+    value?: string;
+    description?: string;
+}
+/**
+ * AI Onboarding Agent SDK
+ * Voice-first onboarding platform with WebRTC connectivity to Pipecat server
+ */
+declare interface UserInfo {
+    externalId?: string;
+    email?: string;
+    name?: string;
+    company?: string;
+    designation?: string;
+    metadata?: Record<string, any>;
+}
+export { }

package/package.json ADDED Viewed

@@ -0,0 +1,74 @@
+{
+  "name": "@floe-ai/sdk",
+  "version": "0.1.0-dev.10",
+  "description": "Floe AI Onboarding SDK for React applications",
+  "type": "module",
+  "main": "./dist-sdk/floe-sdk.es.js",
+  "module": "./dist-sdk/floe-sdk.es.js",
+  "types": "./dist-sdk/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./dist-sdk/index.d.ts",
+      "import": "./dist-sdk/floe-sdk.es.js"
+    }
+  },
+  "files": [
+    "dist-sdk/floe-sdk.es.js",
+    "dist-sdk/floe-sdk.es.js.map",
+    "dist-sdk/floe-sdk.umd.js",
+    "dist-sdk/floe-sdk.umd.js.map",
+    "dist-sdk/floe-sdk.iife.js",
+    "dist-sdk/floe-sdk.iife.js.map",
+    "dist-sdk/index.d.ts",
+    "README.md",
+    "LICENSE"
+  ],
+  "publishConfig": {
+    "access": "public"
+  },
+  "scripts": {
+    "dev": "vite",
+    "build": "tsc && vite build",
+    "build:sdk": "vite build --config vite.config.sdk.ts && BUILD_TARGET=cdn vite build --config vite.config.sdk.ts",
+    "build:all": "npm run build && npm run build:sdk",
+    "preview": "vite preview",
+    "serve": "node serve-dist.js",
+    "serve:sdk": "vite preview --config vite.config.sdk.ts --port 5174",
+    "clean": "rm -rf dist dist-sdk"
+  },
+  "peerDependencies": {
+    "react": "^18.0.0 || ^19.0.0",
+    "react-dom": "^18.0.0 || ^19.0.0"
+  },
+  "dependencies": {
+    "@pipecat-ai/client-js": "^1.4.1",
+    "@pipecat-ai/client-react": "^1.1.0",
+    "@pipecat-ai/daily-transport": "^1.4.1",
+    "@pipecat-ai/small-webrtc-transport": "^1.7.0",
+    "@pipecat-ai/voice-ui-kit": "^0.4.2"
+  },
+  "devDependencies": {
+    "@types/react": "^18.2.0 || ^19.0.0",
+    "@types/react-dom": "^18.2.0 || ^19.0.0",
+    "@vitejs/plugin-react": "^4.2.0",
+    "cors": "^2.8.5",
+    "express": "^5.1.0",
+    "patch-package": "^8.0.0",
+    "react": "^18.2.0",
+    "react-dom": "^18.2.0",
+    "typescript": "^5.3.0",
+    "vite": "^5.0.0",
+    "vite-plugin-dts": "^4.0.0"
+  },
+  "keywords": [
+    "floe",
+    "floe-ai",
+    "onboarding",
+    "sdk",
+    "react",
+    "ai",
+    "voice",
+    "pipecat"
+  ],
+  "license": "SEE LICENSE IN LICENSE"
+}