PyPI - pynlqe - Versions diffs - 0.1.0__tar.gz - Mend

pynlqe 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

pynlqe-0.1.0/.coveragerc +17 -0
pynlqe-0.1.0/.dockerignore +15 -0
pynlqe-0.1.0/.env.example +19 -0
pynlqe-0.1.0/.github/workflows/ci.yml +47 -0
pynlqe-0.1.0/.github/workflows/publish.yml +43 -0
pynlqe-0.1.0/.gitignore +14 -0
pynlqe-0.1.0/.python-version +1 -0
pynlqe-0.1.0/CHANGELOG.md +33 -0
pynlqe-0.1.0/Dockerfile +33 -0
pynlqe-0.1.0/EXAMPLE_QUERIES.md +322 -0
pynlqe-0.1.0/FILE_INVENTORY.md +210 -0
pynlqe-0.1.0/IMPLEMENTATION_SUMMARY.md +506 -0
pynlqe-0.1.0/LICENSE +21 -0
pynlqe-0.1.0/Makefile +42 -0
pynlqe-0.1.0/PKG-INFO +194 -0
pynlqe-0.1.0/README.md +162 -0
pynlqe-0.1.0/create_sample_data.py +168 -0
pynlqe-0.1.0/docs/API.md +715 -0
pynlqe-0.1.0/docs/ARCHITECTURE.md +679 -0
pynlqe-0.1.0/docs/DESIGN.md +570 -0
pynlqe-0.1.0/docs/FAQ.md +630 -0
pynlqe-0.1.0/docs/ROADMAP.md +695 -0
pynlqe-0.1.0/docs/TESTING.md +744 -0
pynlqe-0.1.0/fixtures/README.md +222 -0
pynlqe-0.1.0/fixtures/customers.parquet +0 -0
pynlqe-0.1.0/fixtures/example_queries.yaml +327 -0
pynlqe-0.1.0/fixtures/golden_datasets.yaml +22132 -0
pynlqe-0.1.0/fixtures/products.parquet +0 -0
pynlqe-0.1.0/fixtures/regions.parquet +0 -0
pynlqe-0.1.0/fixtures/transactions.parquet +0 -0
pynlqe-0.1.0/main.py +6 -0
pynlqe-0.1.0/prototype.ipynb +298 -0
pynlqe-0.1.0/prototype_advanced.ipynb +504 -0
pynlqe-0.1.0/pyproject.toml +62 -0
pynlqe-0.1.0/src/nlqe/__init__.py +81 -0
pynlqe-0.1.0/src/nlqe/config.py +83 -0
pynlqe-0.1.0/src/nlqe/conversation/__init__.py +5 -0
pynlqe-0.1.0/src/nlqe/conversation/manager.py +163 -0
pynlqe-0.1.0/src/nlqe/datasource/__init__.py +6 -0
pynlqe-0.1.0/src/nlqe/datasource/introspector.py +204 -0
pynlqe-0.1.0/src/nlqe/datasource/manager.py +83 -0
pynlqe-0.1.0/src/nlqe/duckdb/__init__.py +5 -0
pynlqe-0.1.0/src/nlqe/duckdb/executor.py +204 -0
pynlqe-0.1.0/src/nlqe/engine.py +262 -0
pynlqe-0.1.0/src/nlqe/llm/__init__.py +15 -0
pynlqe-0.1.0/src/nlqe/llm/client.py +354 -0
pynlqe-0.1.0/src/nlqe/openai/__init__.py +10 -0
pynlqe-0.1.0/src/nlqe/openai/client.py +5 -0
pynlqe-0.1.0/src/nlqe/query/__init__.py +5 -0
pynlqe-0.1.0/src/nlqe/query/loop.py +206 -0
pynlqe-0.1.0/src/nlqe/synthesis/__init__.py +5 -0
pynlqe-0.1.0/src/nlqe/synthesis/answer.py +33 -0
pynlqe-0.1.0/src/nlqe/testing/__init__.py +28 -0
pynlqe-0.1.0/src/nlqe/testing/cli.py +255 -0
pynlqe-0.1.0/src/nlqe/testing/datasets.py +197 -0
pynlqe-0.1.0/src/nlqe/testing/evaluator.py +318 -0
pynlqe-0.1.0/src/nlqe/testing/metrics.py +386 -0
pynlqe-0.1.0/src/nlqe/testing/reporter.py +289 -0
pynlqe-0.1.0/src/nlqe/types.py +107 -0
pynlqe-0.1.0/src/nlqe/utils/__init__.py +48 -0
pynlqe-0.1.0/src/nlqe/utils/errors.py +115 -0
pynlqe-0.1.0/src/nlqe/utils/logging.py +37 -0
pynlqe-0.1.0/tests/__init__.py +0 -0
pynlqe-0.1.0/tests/conftest.py +281 -0
pynlqe-0.1.0/tests/integration/__init__.py +0 -0
pynlqe-0.1.0/tests/integration/test_pipeline.py +222 -0
pynlqe-0.1.0/tests/unit/__init__.py +0 -0
pynlqe-0.1.0/tests/unit/core/__init__.py +0 -0
pynlqe-0.1.0/tests/unit/core/test_config.py +49 -0
pynlqe-0.1.0/tests/unit/core/test_engine.py +163 -0
pynlqe-0.1.0/tests/unit/core/test_query_loop.py +167 -0
pynlqe-0.1.0/tests/unit/datasource/test_introspector.py +101 -0
pynlqe-0.1.0/tests/unit/duckdb/test_executor.py +140 -0
pynlqe-0.1.0/tests/unit/llm/__init__.py +0 -0
pynlqe-0.1.0/tests/unit/llm/test_client.py +184 -0
pynlqe-0.1.0/tests/unit/synthesis/test_answer.py +23 -0
pynlqe-0.1.0/tests/unit/testing/__init__.py +0 -0
pynlqe-0.1.0/tests/unit/testing/test_cli.py +177 -0
pynlqe-0.1.0/tests/unit/testing/test_datasets.py +193 -0
pynlqe-0.1.0/tests/unit/testing/test_evaluator.py +222 -0
pynlqe-0.1.0/tests/unit/testing/test_metrics.py +202 -0
pynlqe-0.1.0/tests/unit/testing/test_reporter.py +181 -0
pynlqe-0.1.0/uv.lock +4369 -0

pynlqe-0.1.0/.coveragerc ADDED Viewed

@@ -0,0 +1,17 @@
+[run]
+omit =
+    src/nlqe/openai/client.py
+    src/nlqe/openai/__init__.py
+    src/nlqe/testing/__init__.py
+    src/nlqe/utils/logging.py
+[report]
+exclude_lines =
+    pragma: no cover
+    def __repr__
+    if self.debug:
+    if __name__ == .__main__.:
+    if 0:
+    class .*\bProtocol\):
+    @(abc\.)?abstractmethod
+    except Exception as e:

pynlqe-0.1.0/.dockerignore ADDED Viewed

@@ -0,0 +1,15 @@
+__pycache__/
+*.py[cod]
+*$py.class
+.pytest_cache/
+.ruff_cache/
+.mypy_cache/
+.coverage
+.env
+.git/
+.idea/
+.venv/
+dist/
+build/
+reports/
+*.duckdb

pynlqe-0.1.0/.env.example ADDED Viewed

@@ -0,0 +1,19 @@
+# OpenAI API Configuration
+NLQE_OPENAI_API_KEY=sk-your-api-key-here
+# OpenAI Model Settings (optional)
+NLQE_OPENAI_MODEL=gpt-4
+NLQE_OPENAI_TEMPERATURE=0.0
+NLQE_OPENAI_MAX_TOKENS=2000
+# Query Execution Settings (optional)
+NLQE_QUERY_TIMEOUT_SECONDS=30
+NLQE_MAX_DEBUG_ATTEMPTS=3
+# Datasource Settings (optional)
+# NLQE_DATASOURCE_PATH=./data/sales.parquet
+# NLQE_DATASOURCE_TYPE=parquet
+# Operational Settings (optional)
+NLQE_LOG_LEVEL=INFO
+NLQE_LOG_QUERIES=true

pynlqe-0.1.0/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,47 @@
+name: CI
+on:
+  push:
+    branches: [ main, master ]
+  pull_request:
+    branches: [ main, master ]
+jobs:
+  lint-and-test:
+    strategy:
+      matrix:
+        python-version: ["3.11", "3.12"]
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          enable-cache: true
+          cache-dependency-path: uv.lock
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          uv sync --frozen --all-extras
+      - name: Lint with ruff
+        run: |
+          make lint
+      - name: Run tests with pytest
+        run: |
+          make test-cov
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v4
+        with:
+          file: ./coverage.xml
+          fail_ci_if_error: false
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

pynlqe-0.1.0/.github/workflows/publish.yml ADDED Viewed

@@ -0,0 +1,43 @@
+name: Publish to PyPI
+on:
+  push:
+    tags:
+      - 'v*'  # Run on tags like v0.1.0, v1.0.0, etc.
+  workflow_dispatch:  # Allow manual triggering if needed
+jobs:
+  build-and-publish:
+    name: Build and Publish
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/pynlqe
+    permissions:
+      id-token: write  # Necessary for trusted publishing
+      contents: read   # Required for actions/checkout
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          enable-cache: true
+          cache-dependency-path: uv.lock
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Build package
+        run: |
+          uv build
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        # No password needed if you use Trusted Publishing in PyPI
+        # Otherwise, use secrets.PYPI_API_TOKEN:
+        # with:
+        #   password: ${{ secrets.PYPI_API_TOKEN }}

pynlqe-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,14 @@
+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv
+.idea/
+.jbeval
+.env
+/reports/evaluation_report.csv

pynlqe-0.1.0/.python-version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 3.14

pynlqe-0.1.0/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,33 @@
+# Changelog
+All notable changes to the Natural Language Query Engine (NLQE) will be documented in this file.
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.1.0] - 2026-03-30
+This is the initial open-source release of the **Natural Language Query Engine (NLQE)**.
+NLQE is a modular library designed to bridge the gap between human language and structured data. It leverages Large Language Models (LLMs) to automatically generate, validate, and execute SQL queries over in-process datasources.
+### Added
+- **Core Engine API**: Programmatic `QueryEngine` for executing natural language queries against arbitrary datasets.
+- **Datasource Introspection**: Automatic schema discovery for locally stored `.csv` and `.parquet` files via DuckDB.
+- **LLM Integrations**:
+  - First-class support for OpenAI's language models (`gpt-4o`, `gpt-3.5-turbo`, etc.).
+  - Support for Anthropic's Claude (`claude-3-5-sonnet-20241022`, etc.) via `custom_llm_client`.
+  - Extensible `LLMClient` class that wraps LangChain integrations.
+- **DuckDB Execution Layer**: Secure, in-memory analytical query engine utilizing `duckdb>=1.5.0` to read `parquet` and `csv` files directly.
+- **Iterative Debug Loop**: Automatic error recovery system. When DuckDB encounters a SQL syntax or schema mismatch error, the LLM is provided the error context to self-correct and re-execute the query (up to 3 attempts by default).
+- **Multi-turn Conversations**: Context-aware `start_conversation()` feature enabling users to ask follow-up questions referencing previous tabular results natively.
+- **Evaluation Framework**: `nlqe.testing.cli` evaluation system using "golden datasets" to securely score LLM generation accuracy, completeness, and confidence calibration.
+- **Safety Checks**: Built-in AST-level checks mapping out dangerous queries (e.g. `DROP`, `DELETE`, `TRUNCATE`) prior to database execution.
+### Security
+- Verified protection against SQL injection attacks targeting the local process.
+- Strictly uncoupled architecture to ensure API keys are injected at runtime exclusively via environment variables (`NLQE_OPENAI_API_KEY`) or securely initiated configurations.
+### Performance
+- Fully decoupled in-process querying resulting in sub-500ms analytical execution overhead.
+- Configurable timeouts limiting run-away query consumption.

pynlqe-0.1.0/Dockerfile ADDED Viewed

@@ -0,0 +1,33 @@
+# Use an official Python runtime as a parent image
+FROM python:3.11-slim
+# Set the working directory in the container
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Install uv
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+# Copy the project files
+COPY pyproject.toml uv.lock ./
+COPY README.md ./
+# Install dependencies
+RUN uv sync --frozen --no-dev
+# Copy the rest of the application code
+COPY src/ ./src/
+COPY fixtures/ ./fixtures/
+# Set environment variables
+ENV PYTHONPATH="/app/src:${PYTHONPATH}"
+ENV PYTHONUNBUFFERED=1
+# Command to run the application (assuming main.py is the entry point)
+# Adjust if there's a specific CLI command
+CMD ["uv", "run", "python", "-m", "nlqe.testing.cli", "--help"]

pynlqe-0.1.0/EXAMPLE_QUERIES.md ADDED Viewed

@@ -0,0 +1,322 @@
+# Example Queries for NLQE Testing
+This document lists all 25+ example query patterns organized by complexity and SQL feature.
+## Simple Queries (Easy - Good for Testing Basics)
+### 1. Count Transactions
+**Question**: "How many transactions are in the dataset?"
+**Difficulty**: Easy
+**SQL Feature**: COUNT(*)
+**Expected Approach**: Basic aggregation without WHERE clause
+### 2. Total Revenue
+**Question**: "What is the total revenue from all transactions?"
+**Difficulty**: Easy
+**SQL Feature**: SUM()
+**Expected Approach**: Basic SUM aggregation
+### 3. Average Amount
+**Question**: "What is the average transaction amount?"
+**Difficulty**: Easy
+**SQL Feature**: AVG()
+**Expected Approach**: Simple average calculation
+### 4. Filter by Category
+**Question**: "Show me all Electronics transactions"
+**Difficulty**: Easy
+**SQL Feature**: WHERE with string comparison
+**Expected Approach**: Filter single table by category
+### 5. Filter by Date Range
+**Question**: "Show transactions from February 2024"
+**Difficulty**: Easy
+**SQL Feature**: WHERE with date range (>=, <)
+**Expected Approach**: Date range filtering
+### 6. Filter by Amount
+**Question**: "Show me transactions over 1000 dollars"
+**Difficulty**: Easy
+**SQL Feature**: WHERE with numeric comparison
+**Expected Approach**: Simple numeric filter
+---
+## Grouping Queries (Medium - Test GROUP BY)
+### 7. Revenue by Category
+**Question**: "What is the total revenue by product category?"
+**Difficulty**: Medium
+**SQL Features**: GROUP BY, SUM(), ORDER BY
+**Expected Approach**: GROUP BY with aggregation and sorting
+### 8. Transaction Count by Category
+**Question**: "How many transactions are there for each category?"
+**Difficulty**: Medium
+**SQL Features**: GROUP BY, COUNT(), ORDER BY
+**Expected Approach**: Count transactions per category
+### 9. Revenue by Region and Category
+**Question**: "Show total revenue by region and category"
+**Difficulty**: Medium
+**SQL Features**: GROUP BY (multiple columns), SUM(), JOIN
+**Expected Approach**: Multi-column GROUP BY with join
+---
+## Sorting and Limiting (Medium - Test ORDER BY and LIMIT)
+### 10. Top 10 Transactions
+**Question**: "Show the 10 largest transactions"
+**Difficulty**: Easy
+**SQL Features**: ORDER BY DESC, LIMIT
+**Expected Approach**: Sort descending and limit results
+### 11. Smallest Transactions
+**Question**: "Show the 5 smallest transactions"
+**Difficulty**: Easy
+**SQL Features**: ORDER BY ASC, LIMIT
+**Expected Approach**: Sort ascending for minimum values
+---
+## Join Queries (Medium - Test INNER and LEFT JOINs)
+### 12. Revenue by Region Name
+**Question**: "Show revenue by region name"
+**Difficulty**: Medium
+**SQL Features**: INNER JOIN, GROUP BY, SUM()
+**Expected Approach**: Join transactions with regions table
+### 13. Transactions with Details
+**Question**: "Show transactions with customer name and region name"
+**Difficulty**: Medium
+**SQL Features**: Multiple JOINs, SELECT specific columns
+**Expected Approach**: Denormalize data with two joins
+### 14. All Customers and Spending
+**Question**: "Show all customers and their total spending (including inactive)"
+**Difficulty**: Medium
+**SQL Features**: LEFT JOIN, GROUP BY, NULLS
+**Expected Approach**: Preserve unmatched customers with LEFT JOIN
+---
+## Complex Queries (Hard - Advanced SQL Features)
+### 15. Multi-Dimensional Revenue Analysis
+**Question**: "Show revenue, transaction count, and average amount by region and category"
+**Difficulty**: Hard
+**SQL Features**: GROUP BY (multi-column), multiple aggregations
+**Expected Approach**: Multiple aggregate functions with 2-level grouping
+### 16. Customer Segmentation
+**Question**: "Show spending analysis for Gold tier customers who are active"
+**Difficulty**: Hard
+**SQL Features**: WHERE multiple conditions, LEFT JOIN, multiple aggregations
+**Expected Approach**: Filter on customer attributes, join with transactions
+### 17. High-Value Customers
+**Question**: "List customers with lifetime value over 10000 dollars and at least 5 transactions"
+**Difficulty**: Hard
+**SQL Features**: GROUP BY, HAVING clause, aggregate filtering
+**Expected Approach**: HAVING clause to filter aggregated results
+### 18. Return Rate by Category
+**Question**: "Show return rate by category"
+**Difficulty**: Hard
+**SQL Features**: CASE WHEN, conditional aggregation, percentage calculation
+**Expected Approach**: Use CASE WHEN for conditional counting
+### 19. Quarterly Revenue Comparison
+**Question**: "Show quarterly revenue and growth comparison"
+**Difficulty**: Hard
+**SQL Features**: QUARTER(), EXTRACT(), GROUP BY date components
+**Expected Approach**: Date functions for temporal grouping
+### 20. Profit Margin Analysis
+**Question**: "Which category has the highest profit margin and is it growing?"
+**Difficulty**: Hard
+**SQL Features**: SUM(), AVG(), percentage calculations
+**Expected Approach**: Calculate margins and percentages
+### 21. Top Customers by Region
+**Question**: "Show the top 3 spenders in each region"
+**Difficulty**: Hard
+**SQL Features**: Multiple JOINs, GROUP BY, LIMIT with grouping
+**Expected Approach**: Top N per group pattern
+---
+## Multi-Turn Conversation Examples
+### 22. Regional Analysis (3-turn conversation)
+**Turn 1**: "Which product category has the highest total revenue?"
+**Turn 2**: "How many returns did we have in that category?"
+**Turn 3**: "How does the return rate compare to other categories?"
+**Purpose**: Test context preservation and follow-up understanding
+### 23. Customer Analysis (3-turn conversation)
+**Turn 1**: "Show me our Gold tier customers"
+**Turn 2**: "What is their average lifetime value?"
+**Turn 3**: "How does that compare to other tiers?"
+**Purpose**: Test multi-level context and comparison
+---
+## SQL Features Coverage Matrix
+| Feature | Query | Difficulty |
+|---------|-------|------------|
+| COUNT(*) | #1 | Easy |
+| SUM() | #2 | Easy |
+| AVG() | #3 | Easy |
+| WHERE (string) | #4 | Easy |
+| WHERE (date range) | #5 | Easy |
+| WHERE (numeric) | #6 | Easy |
+| GROUP BY | #7-9 | Medium |
+| ORDER BY | #10-11 | Easy |
+| LIMIT | #10-11 | Easy |
+| INNER JOIN | #12-13 | Medium |
+| LEFT JOIN | #14 | Medium |
+| Multiple JOINs | #13 | Medium |
+| Multi-column GROUP BY | #9, #15 | Medium-Hard |
+| Multiple Aggregations | #15, #20 | Hard |
+| HAVING clause | #17 | Hard |
+| CASE WHEN | #18 | Hard |
+| Date Functions | #19 | Hard |
+| Percentage Calc | #18, #20 | Hard |
+| WHERE (multi-condition) | #16 | Hard |
+| Top N per group | #21 | Hard |
+---
+## Query Difficulty Distribution
+**Easy (6)**: Basic operations
+- Simple aggregations (COUNT, SUM, AVG)
+- Single table filtering
+- Basic sorting and limiting
+**Medium (10)**: Combine multiple concepts
+- GROUP BY with aggregation
+- Joining multiple tables
+- Sorting with limiting
+- Basic multi-column operations
+**Hard (9+)**: Advanced patterns
+- Multi-dimensional analysis
+- Complex filtering with aggregation
+- HAVING clauses
+- Conditional aggregation (CASE WHEN)
+- Date/time functions
+- Top N per group
+- Multi-turn conversation
+---
+## Running the Examples
+### Option 1: Interactive Notebooks
+```bash
+# Basic examples
+jupyter notebook prototype.ipynb
+# Advanced examples with complex queries
+jupyter notebook prototype_advanced.ipynb
+```
+### Option 2: Python API
+```python
+from nlqe import QueryEngine, QueryEngineConfig
+config = QueryEngineConfig.from_env()
+engine = QueryEngine(config)
+engine.load_datasource("fixtures/transactions.parquet")
+# Try any query
+response = engine.query("Show revenue by region and category")
+print(response.answer)
+```
+### Option 3: From YAML
+See `fixtures/example_queries.yaml` for all 25+ examples in structured format with:
+- Natural language question
+- Expected SQL
+- Explanation
+- Difficulty level
+- SQL features used
+- Tags for filtering
+---
+## Expected Accuracy by Category
+Based on the design goals (>85% accuracy):
+| Category | Expected Accuracy |
+|----------|------------------|
+| Simple Aggregations | >95% |
+| Filtering | >90% |
+| Single GROUP BY | >90% |
+| Joins | 80-85% |
+| Multi-column GROUP BY | 75-85% |
+| Complex (HAVING, CASE) | 70-80% |
+| Multi-turn Context | 75-85% |
+**Note**: Accuracy improves with better examples and prompt engineering in Phase 2.
+---
+## Extending the Examples
+To add more example queries to the system:
+1. **Add to `fixtures/example_queries.yaml`:**
+   ```yaml
+   - id: "unique_id"
+     category: "category_name"
+     difficulty: "easy|medium|hard"
+     description: "What this tests"
+     question: "Natural language question"
+     sql: "Expected SQL"
+     explanation: "Why this SQL works"
+     tags: ["tag1", "tag2"]
+   ```
+2. **Add test case to notebook:**
+   - Create cell with question
+   - Call `engine.query(question)`
+   - Verify results
+3. **Track metrics:**
+   - Did it generate correct SQL?
+   - Did it execute successfully?
+   - Was the answer helpful?
+   - Confidence score appropriate?
+---
+## Next Phase Goals
+**Phase 2 (Weeks 2-3):**
+- ✓ Add 50+ more example patterns
+- ✓ Create golden dataset with expected results
+- ✓ Implement evaluation metrics
+- ✓ Measure accuracy by category
+- ✓ Optimize prompts based on failures
+**Phase 3 (Weeks 4-5):**
+- ✓ Add domain-specific examples
+- ✓ Fine-tune confidence scoring
+- ✓ Improve multi-turn handling
+- ✓ Reach 85%+ accuracy target
+---
+## Resources
+- See `fixtures/README.md` for sample data documentation
+- See `docs/TESTING.md` for accuracy evaluation methodology
+- See `docs/API.md` for QueryEngine usage
+- See `IMPLEMENTATION_SUMMARY.md` for architecture overview