npm - ds-agent-cli - Versions diffs - 0.1.0 - Mend

ds-agent-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

package/bin/ds-agent.js +451 -0
package/ds_agent/__init__.py +8 -0
package/package.json +28 -0
package/requirements.txt +126 -0
package/setup.py +35 -0
package/src/__init__.py +7 -0
package/src/_compress_tool_result.py +118 -0
package/src/api/__init__.py +4 -0
package/src/api/app.py +1626 -0
package/src/cache/__init__.py +5 -0
package/src/cache/cache_manager.py +561 -0
package/src/cli.py +2886 -0
package/src/dynamic_prompts.py +281 -0
package/src/orchestrator.py +4799 -0
package/src/progress_manager.py +139 -0
package/src/reasoning/__init__.py +332 -0
package/src/reasoning/business_summary.py +431 -0
package/src/reasoning/data_understanding.py +356 -0
package/src/reasoning/model_explanation.py +383 -0
package/src/reasoning/reasoning_trace.py +239 -0
package/src/registry/__init__.py +3 -0
package/src/registry/tools_registry.py +3 -0
package/src/session_memory.py +448 -0
package/src/session_store.py +370 -0
package/src/storage/__init__.py +19 -0
package/src/storage/artifact_store.py +620 -0
package/src/storage/helpers.py +116 -0
package/src/storage/huggingface_storage.py +694 -0
package/src/storage/r2_storage.py +0 -0
package/src/storage/user_files_service.py +288 -0
package/src/tools/__init__.py +335 -0
package/src/tools/advanced_analysis.py +823 -0
package/src/tools/advanced_feature_engineering.py +708 -0
package/src/tools/advanced_insights.py +578 -0
package/src/tools/advanced_preprocessing.py +549 -0
package/src/tools/advanced_training.py +906 -0
package/src/tools/agent_tool_mapping.py +326 -0
package/src/tools/auto_pipeline.py +420 -0
package/src/tools/autogluon_training.py +1480 -0
package/src/tools/business_intelligence.py +860 -0
package/src/tools/cloud_data_sources.py +581 -0
package/src/tools/code_interpreter.py +390 -0
package/src/tools/computer_vision.py +614 -0
package/src/tools/data_cleaning.py +614 -0
package/src/tools/data_profiling.py +593 -0
package/src/tools/data_type_conversion.py +268 -0
package/src/tools/data_wrangling.py +433 -0
package/src/tools/eda_reports.py +284 -0
package/src/tools/enhanced_feature_engineering.py +241 -0
package/src/tools/feature_engineering.py +302 -0
package/src/tools/matplotlib_visualizations.py +1327 -0
package/src/tools/model_training.py +520 -0
package/src/tools/nlp_text_analytics.py +761 -0
package/src/tools/plotly_visualizations.py +497 -0
package/src/tools/production_mlops.py +852 -0
package/src/tools/time_series.py +507 -0
package/src/tools/tools_registry.py +2133 -0
package/src/tools/visualization_engine.py +559 -0
package/src/utils/__init__.py +42 -0
package/src/utils/error_recovery.py +313 -0
package/src/utils/parallel_executor.py +402 -0
package/src/utils/polars_helpers.py +248 -0
package/src/utils/schema_extraction.py +132 -0
package/src/utils/semantic_layer.py +392 -0
package/src/utils/token_budget.py +411 -0
package/src/utils/validation.py +377 -0
package/src/workflow_state.py +154 -0

package/src/progress_manager.py ADDED Viewed

@@ -0,0 +1,139 @@
+"""
+Global Progress Event Manager for Real-Time SSE Streaming
+This module provides a singleton ProgressManager that captures all workflow progress
+events and broadcasts them to connected SSE clients in real-time.
+"""
+import asyncio
+import json
+from typing import Dict, List, Any, Optional
+from datetime import datetime
+from collections import defaultdict
+class ProgressManager:
+    """
+    Manages progress events for active analysis sessions.
+    Features:
+    - Emit events to multiple subscribers simultaneously
+    - Store event history for late-joining clients
+    - Automatic cleanup of dead connections
+    - Thread-safe event broadcasting
+    """
+    def __init__(self):
+        self._queues: Dict[str, List[asyncio.Queue]] = defaultdict(list)
+        self._history: Dict[str, List[Dict]] = defaultdict(list)
+        self._lock = asyncio.Lock()
+    def emit(self, session_id: str, event: Dict[str, Any]):
+        """
+        Emit a progress event to all subscribers.
+        Args:
+            session_id: Session identifier
+            event: Event data (must include 'type' and 'message')
+        """
+        print(f"[SSE] PROGRESS_MANAGER EMIT: session={session_id}, event_type={event.get('type')}, msg={event.get('message', '')[:50]}")
+        # Add timestamp
+        event['timestamp'] = datetime.now().isoformat()
+        # Store in history
+        self._history[session_id].append(event)
+        # Limit history size to prevent memory leaks
+        if len(self._history[session_id]) > 500:
+            self._history[session_id] = self._history[session_id][-500:]
+        print(f"[SSE] History stored, total events for {session_id}: {len(self._history[session_id])}")
+        # Send to all active subscribers
+        if session_id in self._queues:
+            print(f"[SSE] Found {len(self._queues[session_id])} subscribers for {session_id}")
+            dead_queues = []
+            for i, queue in enumerate(self._queues[session_id]):
+                try:
+                    queue.put_nowait(event)
+                    print(f"[SSE] Successfully queued event to subscriber {i+1}")
+                except asyncio.QueueFull:
+                    print(f"[SSE] ERROR: Queue full for subscriber {i+1}")
+                    dead_queues.append(queue)
+                except Exception as e:
+                    print(f"[SSE] ERROR: Exception queuing event to subscriber {i+1}: {type(e).__name__}: {e}")
+                    dead_queues.append(queue)
+            # Remove dead queues
+            for dead_queue in dead_queues:
+                if dead_queue in self._queues[session_id]:
+                    self._queues[session_id].remove(dead_queue)
+    async def subscribe(self, session_id: str):
+        """
+        Subscribe to progress events for a session.
+        Args:
+            session_id: Session identifier
+        Yields:
+            Progress events as they occur
+        """
+        queue = asyncio.Queue(maxsize=100)
+        self._queues[session_id].append(queue)
+        try:
+            while True:
+                event = await queue.get()
+                print(f"[SSE] YIELDING event to client: type={event.get('type')}, msg={event.get('message', '')[:50]}")
+                yield event
+        except asyncio.CancelledError:
+            # Client disconnected
+            pass
+        finally:
+            # Cleanup
+            if session_id in self._queues and queue in self._queues[session_id]:
+                self._queues[session_id].remove(queue)
+    def get_history(self, session_id: str) -> List[Dict]:
+        """
+        Get all past events for a session.
+        Args:
+            session_id: Session identifier
+        Returns:
+            List of past events
+        """
+        return self._history.get(session_id, [])
+    def clear(self, session_id: str):
+        """
+        Clear history and disconnect all subscribers for a session.
+        Args:
+            session_id: Session identifier
+        """
+        if session_id in self._history:
+            del self._history[session_id]
+        if session_id in self._queues:
+            # Close all queues
+            for queue in self._queues[session_id]:
+                try:
+                    queue.put_nowait({'type': 'session_cleared', 'message': 'Session ended'})
+                except:
+                    pass
+            del self._queues[session_id]
+    def get_active_sessions(self) -> List[str]:
+        """Get list of sessions with active subscribers."""
+        return [sid for sid, queues in self._queues.items() if len(queues) > 0]
+    def get_subscriber_count(self, session_id: str) -> int:
+        """Get number of active subscribers for a session."""
+        return len(self._queues.get(session_id, []))
+# Global singleton instance
+progress_manager = ProgressManager()

package/src/reasoning/__init__.py ADDED Viewed

@@ -0,0 +1,332 @@
+"""
+Reasoning Module - Core Abstraction
+Provides clean separation between:
+- Deterministic data processing (tools)
+- Non-deterministic reasoning (LLM)
+Design Principles:
+- NO RAW DATA ACCESS - Only summaries/metadata
+- NO TRAINING DECISIONS - Only explanations
+- STRUCTURED I/O - JSON in, JSON + text out
+- CACHEABLE - Deterministic enough to cache
+- REASONING ONLY - No execution, no side effects
+Architecture:
+    Tool → Generates Summary → Reasoning Module → Returns Explanation
+    Tool: "Here's what I found: {stats}"
+    Reasoning: "Based on these stats, this means..."
+Usage:
+    from reasoning import get_reasoner
+    reasoner = get_reasoner()
+    result = reasoner.explain_data(
+        summary={"rows": 1000, "columns": 20, "missing": 50}
+    )
+"""
+import os
+from typing import Dict, Any, Optional, Union
+from abc import ABC, abstractmethod
+class ReasoningBackend(ABC):
+    """Abstract base class for reasoning backends."""
+    @abstractmethod
+    def generate(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+        temperature: float = 0.1,
+        max_tokens: int = 2048
+    ) -> str:
+        """Generate reasoning response."""
+        pass
+    @abstractmethod
+    def generate_structured(
+        self,
+        prompt: str,
+        schema: Dict[str, Any],
+        system_prompt: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """Generate structured JSON response."""
+        pass
+class GeminiBackend(ReasoningBackend):
+    """Gemini reasoning backend."""
+    def __init__(self, api_key: Optional[str] = None, model: str = "gemini-2.0-flash-exp"):
+        try:
+            import google.generativeai as genai
+        except ImportError:
+            raise ImportError(
+                "google-generativeai not installed. "
+                "Install with: pip install google-generativeai"
+            )
+        api_key = api_key or os.getenv("GOOGLE_API_KEY")
+        if not api_key:
+            raise ValueError(
+                "Google API key required. Set GOOGLE_API_KEY env var or pass api_key"
+            )
+        genai.configure(api_key=api_key)
+        self.model = genai.GenerativeModel(
+            model,
+            generation_config={"temperature": 0.1}
+        )
+        self.model_name = model
+    def generate(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+        temperature: float = 0.1,
+        max_tokens: int = 2048
+    ) -> str:
+        """Generate reasoning response."""
+        # Combine system and user prompts
+        full_prompt = prompt
+        if system_prompt:
+            full_prompt = f"{system_prompt}\n\n{prompt}"
+        response = self.model.generate_content(
+            full_prompt,
+            generation_config={
+                "temperature": temperature,
+                "max_output_tokens": max_tokens
+            }
+        )
+        return response.text
+    def generate_structured(
+        self,
+        prompt: str,
+        schema: Dict[str, Any],
+        system_prompt: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """Generate structured JSON response."""
+        import json
+        # Add schema instruction
+        schema_str = json.dumps(schema, indent=2)
+        structured_prompt = f"""{prompt}
+Respond with valid JSON matching this schema:
+{schema_str}
+Your response must be valid JSON only, no other text."""
+        response_text = self.generate(structured_prompt, system_prompt)
+        # Extract JSON from response
+        try:
+            # Try direct parse
+            return json.loads(response_text)
+        except json.JSONDecodeError:
+            # Try to extract JSON from markdown code blocks
+            import re
+            json_match = re.search(r'```json\s*\n(.*?)\n```', response_text, re.DOTALL)
+            if json_match:
+                return json.loads(json_match.group(1))
+            # Try to extract any JSON object
+            json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
+            if json_match:
+                return json.loads(json_match.group(0))
+            raise ValueError(f"Failed to extract JSON from response: {response_text[:200]}...")
+class GroqBackend(ReasoningBackend):
+    """Groq reasoning backend."""
+    def __init__(self, api_key: Optional[str] = None, model: str = "llama-3.3-70b-versatile"):
+        try:
+            from groq import Groq
+        except ImportError:
+            raise ImportError(
+                "groq not installed. "
+                "Install with: pip install groq"
+            )
+        api_key = api_key or os.getenv("GROQ_API_KEY")
+        if not api_key:
+            raise ValueError(
+                "Groq API key required. Set GROQ_API_KEY env var or pass api_key"
+            )
+        self.client = Groq(api_key=api_key)
+        self.model_name = model
+    def generate(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+        temperature: float = 0.1,
+        max_tokens: int = 2048
+    ) -> str:
+        """Generate reasoning response."""
+        messages = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        messages.append({"role": "user", "content": prompt})
+        response = self.client.chat.completions.create(
+            model=self.model_name,
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens
+        )
+        return response.choices[0].message.content
+    def generate_structured(
+        self,
+        prompt: str,
+        schema: Dict[str, Any],
+        system_prompt: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """Generate structured JSON response."""
+        import json
+        # Add schema instruction
+        schema_str = json.dumps(schema, indent=2)
+        structured_prompt = f"""{prompt}
+Respond with valid JSON matching this schema:
+{schema_str}
+Your response must be valid JSON only, no other text."""
+        response_text = self.generate(structured_prompt, system_prompt)
+        # Extract JSON from response
+        try:
+            return json.loads(response_text)
+        except json.JSONDecodeError:
+            # Try to extract JSON from markdown code blocks
+            import re
+            json_match = re.search(r'```json\s*\n(.*?)\n```', response_text, re.DOTALL)
+            if json_match:
+                return json.loads(json_match.group(1))
+            # Try to extract any JSON object
+            json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
+            if json_match:
+                return json.loads(json_match.group(0))
+            raise ValueError(f"Failed to extract JSON from response: {response_text[:200]}...")
+class ReasoningEngine:
+    """
+    Main reasoning engine.
+    Delegates to appropriate backend (Gemini, Groq, etc).
+    Provides high-level reasoning capabilities.
+    """
+    def __init__(
+        self,
+        backend: Optional[ReasoningBackend] = None,
+        provider: str = "gemini"
+    ):
+        """
+        Initialize reasoning engine.
+        Args:
+            backend: Custom backend instance
+            provider: 'gemini' or 'groq' (if backend not provided)
+        """
+        if backend:
+            self.backend = backend
+        else:
+            provider = provider or os.getenv("LLM_PROVIDER", "gemini")
+            if provider == "gemini":
+                self.backend = GeminiBackend()
+            elif provider == "groq":
+                self.backend = GroqBackend()
+            else:
+                raise ValueError(f"Unsupported provider: {provider}")
+        self.provider = provider
+    def reason(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+        temperature: float = 0.1
+    ) -> str:
+        """
+        General-purpose reasoning.
+        Args:
+            prompt: User prompt
+            system_prompt: Optional system context
+            temperature: Creativity (0.0 = deterministic, 1.0 = creative)
+        Returns:
+            Natural language response
+        """
+        return self.backend.generate(prompt, system_prompt, temperature)
+    def reason_structured(
+        self,
+        prompt: str,
+        schema: Dict[str, Any],
+        system_prompt: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """
+        Structured reasoning with JSON output.
+        Args:
+            prompt: User prompt
+            schema: Expected JSON schema
+            system_prompt: Optional system context
+        Returns:
+            Parsed JSON response
+        """
+        return self.backend.generate_structured(prompt, schema, system_prompt)
+# Singleton instance
+_reasoning_engine: Optional[ReasoningEngine] = None
+def get_reasoner(
+    backend: Optional[ReasoningBackend] = None,
+    provider: Optional[str] = None
+) -> ReasoningEngine:
+    """
+    Get singleton reasoning engine.
+    Args:
+        backend: Custom backend instance
+        provider: 'gemini' or 'groq'
+    Returns:
+        ReasoningEngine instance
+    """
+    global _reasoning_engine
+    if _reasoning_engine is None or backend is not None:
+        _reasoning_engine = ReasoningEngine(backend=backend, provider=provider)
+    return _reasoning_engine
+def reset_reasoner():
+    """Reset singleton (for testing)."""
+    global _reasoning_engine
+    _reasoning_engine = None