PyPI - datascience-agent - Versions diffs - 0.3.0__tar.gz - Mend

datascience-agent 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

datascience_agent-0.3.0/.env.example +69 -0
datascience_agent-0.3.0/.github/workflows/python-publish.yml +70 -0
datascience_agent-0.3.0/.gitignore +63 -0
datascience_agent-0.3.0/CONTRIBUTING.md +198 -0
datascience_agent-0.3.0/PKG-INFO +279 -0
datascience_agent-0.3.0/README.md +233 -0
datascience_agent-0.3.0/examples/basic_usage.py +72 -0
datascience_agent-0.3.0/examples/fastapi_example.py +164 -0
datascience_agent-0.3.0/examples/streaming_example.py +101 -0
datascience_agent-0.3.0/pyproject.toml +106 -0
datascience_agent-0.3.0/src/dsagent/__init__.py +74 -0
datascience_agent-0.3.0/src/dsagent/agents/__init__.py +7 -0
datascience_agent-0.3.0/src/dsagent/agents/base.py +366 -0
datascience_agent-0.3.0/src/dsagent/cli.py +187 -0
datascience_agent-0.3.0/src/dsagent/core/__init__.py +11 -0
datascience_agent-0.3.0/src/dsagent/core/context.py +136 -0
datascience_agent-0.3.0/src/dsagent/core/engine.py +588 -0
datascience_agent-0.3.0/src/dsagent/core/executor.py +231 -0
datascience_agent-0.3.0/src/dsagent/core/planner.py +180 -0
datascience_agent-0.3.0/src/dsagent/schema/__init__.py +21 -0
datascience_agent-0.3.0/src/dsagent/schema/models.py +268 -0
datascience_agent-0.3.0/src/dsagent/utils/__init__.py +10 -0
datascience_agent-0.3.0/src/dsagent/utils/logger.py +245 -0
datascience_agent-0.3.0/src/dsagent/utils/notebook.py +483 -0
datascience_agent-0.3.0/src/dsagent/utils/run_logger.py +352 -0
datascience_agent-0.3.0/tests/__init__.py +1 -0
datascience_agent-0.3.0/tests/test_context.py +117 -0
datascience_agent-0.3.0/tests/test_notebook.py +162 -0
datascience_agent-0.3.0/tests/test_run_logger.py +293 -0
datascience_agent-0.3.0/uv.lock +4673 -0

datascience_agent-0.3.0/.env.example ADDED Viewed

@@ -0,0 +1,69 @@
+# =============================================================================
+# DSAgent - Configuration
+# =============================================================================
+# Copy this file to .env and fill in your values
+# Priority: CLI args > Environment variables > .env file > defaults
+# =============================================================================
+# MODEL CONFIGURATION (choose one provider)
+# =============================================================================
+# Default model to use (overridden by --model CLI flag)
+LLM_MODEL=gpt-4o
+# Examples:
+# LLM_MODEL=gpt-4o                        # OpenAI GPT-4o
+# LLM_MODEL=gpt-4o-mini                   # OpenAI GPT-4o Mini (cheaper)
+# LLM_MODEL=claude-3-5-sonnet-20241022    # Anthropic Claude 3.5 Sonnet
+# LLM_MODEL=claude-3-opus-20240229        # Anthropic Claude 3 Opus
+# LLM_MODEL=gemini/gemini-1.5-pro         # Google Gemini 1.5 Pro
+# LLM_MODEL=ollama/llama3                 # Ollama local model
+# LLM_MODEL=ollama/codellama              # Ollama CodeLlama
+# =============================================================================
+# API KEYS (set the one for your chosen provider)
+# =============================================================================
+# OpenAI
+OPENAI_API_KEY=sk-your-openai-api-key-here
+# Anthropic
+ANTHROPIC_API_KEY=sk-ant-your-anthropic-api-key-here
+# Google
+GOOGLE_API_KEY=your-google-api-key-here
+# =============================================================================
+# CUSTOM API BASE (for local models or proxies)
+# =============================================================================
+# LM Studio (local)
+# LLM_API_BASE=http://localhost:1234/v1
+# LLM_MODEL=openai/your-model-name
+# Ollama (if not using default port)
+# OLLAMA_API_BASE=http://localhost:11434
+# Azure OpenAI
+# AZURE_API_KEY=your-azure-api-key
+# AZURE_API_BASE=https://your-resource.openai.azure.com/
+# AZURE_API_VERSION=2024-02-15-preview
+# =============================================================================
+# AGENT SETTINGS (optional)
+# =============================================================================
+# Maximum rounds before stopping (default: 30)
+# DSAGENT_MAX_ROUNDS=30
+# LLM temperature (default: 0.3)
+# DSAGENT_TEMPERATURE=0.3
+# Max tokens per response (default: 4096)
+# DSAGENT_MAX_TOKENS=4096
+# Code execution timeout in seconds (default: 300)
+# DSAGENT_CODE_TIMEOUT=300
+# Default workspace directory (default: ./workspace)
+# DSAGENT_WORKSPACE=./workspace

datascience_agent-0.3.0/.github/workflows/python-publish.yml ADDED Viewed

@@ -0,0 +1,70 @@
+# This workflow will upload a Python Package to PyPI when a release is created
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+name: Upload Python Package
+on:
+  release:
+    types: [published]
+permissions:
+  contents: read
+jobs:
+  release-build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.x"
+      - name: Build release distributions
+        run: |
+          # NOTE: put your own distribution build steps here.
+          python -m pip install build
+          python -m build
+      - name: Upload distributions
+        uses: actions/upload-artifact@v4
+        with:
+          name: release-dists
+          path: dist/
+  pypi-publish:
+    runs-on: ubuntu-latest
+    needs:
+      - release-build
+    permissions:
+      # IMPORTANT: this permission is mandatory for trusted publishing
+      id-token: write
+    # Dedicated environments with protections for publishing are strongly recommended.
+    # For more information, see: https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment#deployment-protection-rules
+    environment:
+      name: pypi
+      # OPTIONAL: uncomment and update to include your PyPI project URL in the deployment status:
+      url: https://pypi.org/project/aiuda-planner-agent/
+      #
+      # ALTERNATIVE: if your GitHub Release name is the PyPI project version string
+      # ALTERNATIVE: exactly, uncomment the following line instead:
+      # url: https://pypi.org/project/YOURPROJECT/${{ github.event.release.name }}
+    steps:
+      - name: Retrieve release distributions
+        uses: actions/download-artifact@v4
+        with:
+          name: release-dists
+          path: dist/
+      - name: Publish release distributions to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          packages-dir: dist/

datascience_agent-0.3.0/.gitignore ADDED Viewed

@@ -0,0 +1,63 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual environments
+.venv/
+venv/
+ENV/
+env/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+# Testing
+.tox/
+.nox/
+.coverage
+.coverage.*
+htmlcov/
+.pytest_cache/
+.mypy_cache/
+# Jupyter
+.ipynb_checkpoints/
+# Environment variables
+.env
+.env.local
+# OS
+.DS_Store
+Thumbs.db
+# Project specific
+workspace/
+workspaces/
+*.ipynb
+!examples/*.ipynb
+# uv
+.python-version

datascience_agent-0.3.0/CONTRIBUTING.md ADDED Viewed

@@ -0,0 +1,198 @@
+# Contributing to Aiuda Planner Agent
+## Git Flow
+This project follows Git Flow branching strategy.
+### Branch Structure
+```
+main              ← Production (stable releases only)
+  │
+develop           ← Integration (development changes)
+  │
+  ├── feature/*   ← New features
+  ├── bugfix/*    ← Bug fixes
+  ├── release/*   ← Release preparation
+  └── hotfix/*    ← Urgent production fixes
+```
+### Branch Naming Convention
+| Type | Pattern | Example |
+|------|---------|---------|
+| Feature | `feature/short-description` | `feature/add-streaming-api` |
+| Bug fix | `bugfix/issue-or-description` | `bugfix/fix-notebook-export` |
+| Release | `release/vX.Y.Z` | `release/v0.2.0` |
+| Hotfix | `hotfix/description` | `hotfix/critical-memory-leak` |
+## Development Workflow
+### 1. New Feature
+```bash
+# Start from develop
+git checkout develop
+git pull origin develop
+# Create feature branch
+git checkout -b feature/my-new-feature
+# Work on your feature...
+# Make commits with clear messages
+# Push and create PR
+git push -u origin feature/my-new-feature
+```
+Then create a Pull Request to `develop` on GitHub.
+### 2. Bug Fix
+```bash
+# Start from develop
+git checkout develop
+git pull origin develop
+# Create bugfix branch
+git checkout -b bugfix/fix-issue-123
+# Fix the bug...
+# Push and create PR
+git push -u origin bugfix/fix-issue-123
+```
+Then create a Pull Request to `develop` on GitHub.
+### 3. Release
+When `develop` is ready for a new release:
+```bash
+# Start from develop
+git checkout develop
+git pull origin develop
+# Create release branch
+git checkout -b release/v0.2.0
+# Update version in pyproject.toml
+# Update CHANGELOG if exists
+# Final testing
+# Push
+git push -u origin release/v0.2.0
+```
+Then:
+1. Create PR to `main`
+2. After merge, tag the release: `git tag v0.2.0`
+3. Merge back to `develop`
+### 4. Hotfix (Urgent Production Fix)
+```bash
+# Start from main
+git checkout main
+git pull origin main
+# Create hotfix branch
+git checkout -b hotfix/critical-fix
+# Fix the issue...
+# Push
+git push -u origin hotfix/critical-fix
+```
+Then:
+1. Create PR to `main`
+2. After merge, also merge to `develop`
+## Commit Messages
+Use clear, descriptive commit messages:
+```
+type: short description
+Longer description if needed.
+- Bullet points for multiple changes
+- Another change
+```
+**Types:**
+- `feat:` New feature
+- `fix:` Bug fix
+- `docs:` Documentation only
+- `refactor:` Code refactoring
+- `test:` Adding tests
+- `chore:` Maintenance tasks
+**Examples:**
+```
+feat: add streaming API endpoint
+fix: resolve notebook cell ordering issue
+docs: update README with CLI examples
+refactor: simplify plan parsing logic
+```
+## Development Setup
+```bash
+# Clone repository
+git clone https://github.com/nmlemus/aiuda-planner-agent.git
+cd aiuda-planner-agent
+# Create environment with uv
+uv venv --python 3.11
+source .venv/bin/activate
+# Install with dev dependencies
+uv sync --all-extras
+# Run tests
+uv run pytest
+# Run linting
+uv run ruff check .
+```
+## Code Style
+- Use `ruff` for linting
+- Use `mypy` for type checking
+- Follow PEP 8 guidelines
+- Add type hints to all functions
+- Write docstrings for public APIs
+```bash
+# Check code style
+uv run ruff check .
+# Auto-fix issues
+uv run ruff check --fix .
+# Type checking
+uv run mypy src/
+```
+## Pull Request Checklist
+Before submitting a PR:
+- [ ] Code follows project style guidelines
+- [ ] Tests pass locally
+- [ ] New features have tests
+- [ ] Documentation updated if needed
+- [ ] Commit messages are clear
+- [ ] PR description explains the changes
+## Questions?
+Open an issue on GitHub for any questions or discussions.

datascience_agent-0.3.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,279 @@
+Metadata-Version: 2.4
+Name: datascience-agent
+Version: 0.3.0
+Summary: AI Agent with dynamic planning and persistent Jupyter kernel execution for data analysis
+Project-URL: Homepage, https://github.com/nmlemus/dsagent
+Project-URL: Documentation, https://github.com/nmlemus/dsagent#readme
+Project-URL: Repository, https://github.com/nmlemus/dsagent
+Author: DSAgent Contributors
+License-Expression: MIT
+Keywords: agent,ai,autonomous-agent,data-analysis,datascience-agent,dsagent,jupyter,llm,machine-learning,planner
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Scientific/Engineering :: Information Analysis
+Requires-Python: >=3.10
+Requires-Dist: ipykernel>=6.0.0
+Requires-Dist: jupyter-client>=8.0.0
+Requires-Dist: litellm>=1.0.0
+Requires-Dist: matplotlib>=3.7.0
+Requires-Dist: numpy>=1.24.0
+Requires-Dist: openpyxl>=3.1.0
+Requires-Dist: pandas>=2.0.0
+Requires-Dist: pycaret>=3.0.0
+Requires-Dist: pydantic-settings>=2.0.0
+Requires-Dist: pydantic>=2.0.0
+Requires-Dist: python-dotenv>=1.0.0
+Requires-Dist: scikit-learn>=1.3.0
+Requires-Dist: seaborn>=0.12.0
+Requires-Dist: statsmodels>=0.14.0
+Provides-Extra: api
+Requires-Dist: fastapi>=0.100.0; extra == 'api'
+Requires-Dist: sse-starlette>=1.0.0; extra == 'api'
+Requires-Dist: uvicorn>=0.20.0; extra == 'api'
+Provides-Extra: dev
+Requires-Dist: mypy>=1.0.0; extra == 'dev'
+Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
+Requires-Dist: pytest>=7.0.0; extra == 'dev'
+Requires-Dist: ruff>=0.1.0; extra == 'dev'
+Description-Content-Type: text/markdown
+# DSAgent
+An AI-powered autonomous agent for data analysis with dynamic planning and persistent Jupyter kernel execution.
+## Features
+- **Dynamic Planning**: Agent creates and follows plans with [x]/[ ] step tracking
+- **Persistent Execution**: Code runs in a Jupyter kernel with variable persistence
+- **Multi-Provider LLM**: Supports OpenAI, Anthropic, Google, Ollama via LiteLLM
+- **Notebook Generation**: Automatically generates clean, runnable Jupyter notebooks
+- **Event Streaming**: Real-time events for UI integration
+- **Comprehensive Logging**: Full execution logs for debugging and ML retraining
+- **Session Management**: State persistence for multi-user scenarios
+## Installation
+Using pip:
+```bash
+pip install datascience-agent
+```
+With FastAPI support:
+```bash
+pip install "datascience-agent[api]"
+```
+Using uv (recommended):
+```bash
+uv pip install datascience-agent
+uv pip install "datascience-agent[api]"  # with FastAPI
+```
+For development:
+```bash
+git clone https://github.com/nmlemus/dsagent
+cd dsagent
+uv sync --all-extras
+```
+## Quick Start
+### Basic Usage
+```python
+from dsagent import PlannerAgent
+# Create agent
+with PlannerAgent(model="gpt-4o", workspace="./workspace") as agent:
+    result = agent.run("Analyze sales_data.csv and identify top performing products")
+    print(result.answer)
+    print(f"Notebook: {result.notebook_path}")
+```
+### With Streaming
+```python
+from dsagent import PlannerAgent, EventType
+agent = PlannerAgent(model="claude-3-sonnet-20240229")
+agent.start()
+for event in agent.run_stream("Build a predictive model for customer churn"):
+    if event.type == EventType.PLAN_UPDATED:
+        print(f"Plan: {event.plan.raw_text if event.plan else ''}")
+    elif event.type == EventType.CODE_SUCCESS:
+        print("Code executed successfully")
+    elif event.type == EventType.CODE_FAILED:
+        print("Code execution failed")
+    elif event.type == EventType.ANSWER_ACCEPTED:
+        print(f"Answer: {event.message}")
+# Get result with notebook after streaming
+result = agent.get_result()
+print(f"Notebook: {result.notebook_path}")
+agent.shutdown()
+```
+### FastAPI Integration
+```python
+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
+from uuid import uuid4
+from dsagent import PlannerAgent, EventType
+app = FastAPI()
+@app.post("/analyze")
+async def analyze(task: str):
+    async def event_stream():
+        agent = PlannerAgent(
+            model="gpt-4o",
+            session_id=str(uuid4()),
+        )
+        agent.start()
+        try:
+            for event in agent.run_stream(task):
+                yield f"data: {event.to_sse()}\n\n"
+        finally:
+            agent.shutdown()
+    return StreamingResponse(event_stream(), media_type="text/event-stream")
+```
+## Command Line Interface
+The package includes a CLI for quick analysis from the terminal:
+```bash
+dsagent "Analyze this dataset and create visualizations" --data ./my_data.csv
+```
+### CLI Options
+| Option | Short | Description |
+|--------|-------|-------------|
+| `--data` | `-d` | Path to data file or directory (required) |
+| `--model` | `-m` | LLM model to use (default: gpt-4o) |
+| `--workspace` | `-w` | Output directory (default: ./workspace) |
+| `--run-id` | | Custom run ID for this execution |
+| `--max-rounds` | `-r` | Max iterations (default: 30) |
+| `--quiet` | `-q` | Suppress verbose output |
+| `--no-stream` | | Disable streaming output |
+### CLI Examples
+```bash
+# Basic analysis
+dsagent "Find trends and patterns" -d ./sales.csv
+# With specific model
+dsagent "Build ML model" -d ./dataset -m claude-3-sonnet-20240229
+# Custom output directory
+dsagent "Create charts" -d ./data -w ./output
+# With custom run ID
+dsagent "Analyze" -d ./data --run-id my-analysis-001
+# Quiet mode
+dsagent "Analyze" -d ./data -q
+```
+### Output Structure
+Each run creates an isolated workspace:
+```
+workspace/
+└── runs/
+    └── {run_id}/
+        ├── data/          # Input data (copied)
+        ├── notebooks/     # Generated notebooks
+        ├── artifacts/     # Images, charts, outputs
+        └── logs/
+            ├── run.log        # Human-readable log
+            └── events.jsonl   # Structured events for ML
+```
+## Configuration
+```python
+from dsagent import PlannerAgent, RunContext
+# With automatic run isolation
+context = RunContext(workspace="./workspace")
+agent = PlannerAgent(
+    model="gpt-4o",           # Any LiteLLM-supported model
+    context=context,          # Run context for isolation
+    max_rounds=30,            # Max agent iterations
+    max_tokens=4096,          # Max tokens per response
+    temperature=0.2,          # LLM temperature
+    timeout=300,              # Code execution timeout (seconds)
+    verbose=True,             # Print to console
+    event_callback=None,      # Callback for events
+)
+```
+## Supported Models
+Any model supported by [LiteLLM](https://docs.litellm.ai/docs/providers):
+- OpenAI: `gpt-4o`, `gpt-4-turbo`, `gpt-3.5-turbo`
+- Anthropic: `claude-3-opus-20240229`, `claude-3-sonnet-20240229`
+- Google: `gemini-pro`, `gemini-1.5-pro`
+- Ollama: `ollama/llama3`, `ollama/codellama`
+- And many more...
+## Event Types
+```python
+from dsagent import EventType
+EventType.AGENT_STARTED       # Agent started processing
+EventType.AGENT_FINISHED      # Agent finished
+EventType.AGENT_ERROR         # Error occurred
+EventType.ROUND_STARTED       # New iteration round
+EventType.ROUND_FINISHED      # Round completed
+EventType.LLM_CALL_STARTED    # LLM call started
+EventType.LLM_CALL_FINISHED   # LLM response received
+EventType.PLAN_CREATED        # Plan was created
+EventType.PLAN_UPDATED        # Plan was updated
+EventType.CODE_EXECUTING      # Code execution started
+EventType.CODE_SUCCESS        # Code execution succeeded
+EventType.CODE_FAILED         # Code execution failed
+EventType.ANSWER_ACCEPTED     # Final answer generated
+EventType.ANSWER_REJECTED     # Answer rejected (plan incomplete)
+```
+## Architecture
+```
+dsagent/
+├── agents/
+│   └── base.py          # PlannerAgent - main user interface
+├── core/
+│   ├── context.py       # RunContext - workspace management
+│   ├── engine.py        # AgentEngine - main loop
+│   ├── executor.py      # JupyterExecutor - code execution
+│   └── planner.py       # PlanParser - response parsing
+├── schema/
+│   └── models.py        # Pydantic models
+└── utils/
+    ├── logger.py        # AgentLogger - console logging
+    ├── run_logger.py    # RunLogger - comprehensive logging
+    └── notebook.py      # NotebookBuilder - notebook generation
+```
+## License
+MIT