PyPI - QuerySUTRA - Versions diffs - 0.3.3__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

QuerySUTRA 0.3.3py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

querysutra-0.4.1.dist-info/METADATA +264 -0
{querysutra-0.3.3.dist-info → querysutra-0.4.1.dist-info}/RECORD +7 -7
sutra/__init__.py +2 -5
sutra/sutra.py +352 -580
querysutra-0.3.3.dist-info/METADATA +0 -285
{querysutra-0.3.3.dist-info → querysutra-0.4.1.dist-info}/WHEEL +0 -0
{querysutra-0.3.3.dist-info → querysutra-0.4.1.dist-info}/licenses/LICENSE +0 -0
{querysutra-0.3.3.dist-info → querysutra-0.4.1.dist-info}/top_level.txt +0 -0

sutra/sutra.py CHANGED Viewed

@@ -1,35 +1,32 @@
 """
-QuerySUTRA v0.3.3 - PROPER RELATIONAL DATABASE EXTRACTION
+QuerySUTRA v0.4.0 - SIMPLE & AUTOMATIC
 SUTRA: Structured-Unstructured-Text-Retrieval-Architecture
-FIXED: Proper primary keys, foreign keys, and relational integrity
-- Unique IDs for each entity
-- Proper foreign key relationships
-- No duplicate keys
-- Comprehensive entity extraction (skills, technologies, projects, etc.)
+FIXED:
+- Auto-creates MySQL database if not exists
+- One-line export to MySQL
+- Complete data extraction from large PDFs
+- No manual file transfers needed
 Author: Aditya Batta
-License: MIT
-Version: 0.3.3
+Version: 0.4.0
 """
-__version__ = "0.3.3"
+__version__ = "0.4.0"
 __author__ = "Aditya Batta"
-__title__ = "QuerySUTRA: Structured-Unstructured-Text-Retrieval-Architecture"
 __all__ = ["SUTRA", "QueryResult", "quick_start"]
 import os
 import sqlite3
 import pandas as pd
 import numpy as np
-from typing import Optional, Union, Dict, Any, List
+from typing import Optional, Union, Dict, List
 from pathlib import Path
 import json
 import hashlib
 import warnings
 import shutil
 import datetime
-import re
 from io import StringIO
 from difflib import get_close_matches
 warnings.filterwarnings('ignore')
@@ -73,22 +70,13 @@ except ImportError:
 class SUTRA:
-    """
-    SUTRA: Structured-Unstructured-Text-Retrieval-Architecture
+    """SUTRA: Structured-Unstructured-Text-Retrieval-Architecture"""
-    Professional data analysis with proper relational database structure
-    """
-    def __init__(self,
-                 api_key: Optional[str] = None,
-                 db: str = "sutra.db",
-                 use_embeddings: bool = False,
-                 check_relevance: bool = False,
-                 fuzzy_match: bool = True,
-                 cache_queries: bool = True):
-        """Initialize SUTRA with optional features."""
-        print("Initializing QuerySUTRA v0.3.3")
-        print("SUTRA: Structured-Unstructured-Text-Retrieval-Architecture")
+    def __init__(self, api_key: Optional[str] = None, db: str = "sutra.db",
+                 use_embeddings: bool = False, check_relevance: bool = False,
+                 fuzzy_match: bool = True, cache_queries: bool = True):
+        """Initialize."""
+        print("Initializing QuerySUTRA v0.4.0")
         if api_key:
             os.environ["OPENAI_API_KEY"] = api_key
@@ -97,454 +85,354 @@ class SUTRA:
         self.client = OpenAI(api_key=self.api_key) if self.api_key and HAS_OPENAI else None
         self.db_path = db
-        self.conn = sqlite3.connect(db, check_same_thread=False)
-        self.cursor = self.conn.cursor()
+        try:
+            self.conn = sqlite3.connect(db, timeout=30, check_same_thread=False)
+            self.conn.execute("PRAGMA journal_mode=WAL")
+            self.conn.execute("PRAGMA synchronous=NORMAL")
+        except:
+            self.conn = sqlite3.connect(db, check_same_thread=False)
+        self.cursor = self.conn.cursor()
         self.current_table = None
         self.schema_info = {}
         self.cache_queries = cache_queries
         self.cache = {} if cache_queries else None
         self.use_embeddings = use_embeddings
         self.embedding_model = None
         self.query_embeddings = {}
         self.check_relevance = check_relevance
         self.fuzzy_match = fuzzy_match
         if use_embeddings and HAS_EMBEDDINGS:
             try:
-                print("Loading embeddings model...")
                 self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
-                print("Embeddings ready")
             except:
-                print("Embeddings unavailable")
                 self.use_embeddings = False
         self._refresh_schema()
         print(f"Ready! Database: {db}")
-        if not self.api_key:
-            print("No API key - use .sql() for direct queries")
     @classmethod
     def load_from_db(cls, db_path: str, api_key: Optional[str] = None, **kwargs):
-        """Load existing SQLite database."""
+        """Load existing database."""
         if not Path(db_path).exists():
-            raise FileNotFoundError(f"Database not found: {db_path}")
-        print(f"Loading database: {db_path}")
-        instance = cls(api_key=api_key, db=db_path, **kwargs)
-        tables = instance.tables()
-        print(f"Loaded {len(tables)} tables")
-        return instance
+            raise FileNotFoundError(f"Not found: {db_path}")
+        return cls(api_key=api_key, db=db_path, **kwargs)
     @classmethod
     def connect_mysql(cls, host: str, user: str, password: str, database: str,
                      port: int = 3306, api_key: Optional[str] = None, **kwargs):
-        """Connect to MySQL database."""
+        """Connect to MySQL."""
         try:
             from sqlalchemy import create_engine
+            import mysql.connector
         except ImportError:
             raise ImportError("Run: pip install QuerySUTRA[mysql]")
-        print(f"Connecting to MySQL: {host}:{port}/{database}")
+        print(f"Connecting to MySQL...")
-        connection_string = f"mysql+mysqlconnector://{user}:{password}@{host}:{port}/{database}"
+        # Auto-create database if not exists
+        try:
+            temp_conn = mysql.connector.connect(host=host, user=user, password=password, port=port)
+            temp_cursor = temp_conn.cursor()
+            temp_cursor.execute(f"CREATE DATABASE IF NOT EXISTS {database}")
+            temp_cursor.close()
+            temp_conn.close()
+        except:
+            pass
+        engine = create_engine(f"mysql+mysqlconnector://{user}:{password}@{host}:{port}/{database}")
         temp_db = f"sutra_mysql_{database}.db"
         instance = cls(api_key=api_key, db=temp_db, **kwargs)
-        engine = create_engine(connection_string)
         tables = pd.read_sql_query("SHOW TABLES", engine).iloc[:, 0].tolist()
-        print(f"Found {len(tables)} tables, syncing...")
         for table in tables:
             df = pd.read_sql_query(f"SELECT * FROM {table}", engine)
             df.to_sql(table, instance.conn, if_exists='replace', index=False)
-            print(f"  {table}: {len(df)} rows")
         instance._refresh_schema()
-        print(f"Connected! {len(tables)} tables available")
+        print(f"Connected! {len(tables)} tables")
         return instance
     @classmethod
     def connect_postgres(cls, host: str, user: str, password: str, database: str,
                         port: int = 5432, api_key: Optional[str] = None, **kwargs):
-        """Connect to PostgreSQL database."""
+        """Connect to PostgreSQL."""
         try:
             from sqlalchemy import create_engine
         except ImportError:
             raise ImportError("Run: pip install QuerySUTRA[postgres]")
-        print(f"Connecting to PostgreSQL: {host}:{port}/{database}")
-        connection_string = f"postgresql://{user}:{password}@{host}:{port}/{database}"
+        print(f"Connecting to PostgreSQL...")
+        engine = create_engine(f"postgresql://{user}:{password}@{host}:{port}/{database}")
         temp_db = f"sutra_postgres_{database}.db"
         instance = cls(api_key=api_key, db=temp_db, **kwargs)
-        engine = create_engine(connection_string)
-        tables = pd.read_sql_query(
-            "SELECT tablename FROM pg_tables WHERE schemaname='public'",
-            engine
-        )['tablename'].tolist()
-        print(f"Found {len(tables)} tables, syncing...")
+        tables = pd.read_sql_query("SELECT tablename FROM pg_tables WHERE schemaname='public'", engine)['tablename'].tolist()
         for table in tables:
             df = pd.read_sql_query(f"SELECT * FROM {table}", engine)
             df.to_sql(table, instance.conn, if_exists='replace', index=False)
-            print(f"  {table}: {len(df)} rows")
         instance._refresh_schema()
-        print(f"Connected! {len(tables)} tables available")
+        print(f"Connected! {len(tables)} tables")
         return instance
     def upload(self, data: Union[str, pd.DataFrame], name: Optional[str] = None,
-               extract_entities: Optional[List[str]] = None) -> 'SUTRA':
+               extract_entities: Optional[List[str]] = None,
+               auto_export_mysql: Optional[Dict[str, str]] = None) -> 'SUTRA':
         """
-        Upload data with optional custom entity extraction.
+        Upload data with OPTIONAL automatic MySQL export.
         Args:
             data: File path or DataFrame
             name: Table name
-            extract_entities: Custom entities to extract (e.g., ['skills', 'technologies'])
+            extract_entities: Custom entities to extract
+            auto_export_mysql: Auto-export to MySQL after upload
+                              {'host': 'localhost', 'user': 'root', 'password': 'pass', 'database': 'mydb'}
+        Example:
+            sutra.upload("data.pdf", auto_export_mysql={
+                'host': 'localhost',
+                'user': 'root',
+                'password': '123456',
+                'database': 'my_database'
+            })
         """
-        print(f"\nUploading data...")
+        print("\nUploading...")
         if isinstance(data, pd.DataFrame):
             name = name or "data"
             self._store_dataframe(data, name)
-            return self
-        path = Path(data)
-        if not path.exists():
-            raise FileNotFoundError(f"File not found: {data}")
-        name = name or path.stem.replace(" ", "_").replace("-", "_")
-        ext = path.suffix.lower()
-        print(f"File: {path.name}")
-        if ext == ".csv":
-            df = pd.read_csv(path)
-            self._store_dataframe(df, name)
-        elif ext in [".xlsx", ".xls"]:
-            df = pd.read_excel(path)
-            self._store_dataframe(df, name)
-        elif ext == ".json":
-            df = pd.read_json(path)
-            self._store_dataframe(df, name)
-        elif ext == ".sql":
-            with open(path) as f:
-                self.cursor.executescript(f.read())
-            self.conn.commit()
-            self._refresh_schema()
-            print("SQL executed")
-        elif ext == ".pdf":
-            self._smart_upload_pdf(path, name, extract_entities)
-        elif ext == ".docx":
-            self._smart_upload_docx(path, name, extract_entities)
-        elif ext == ".txt":
-            self._smart_upload_txt(path, name, extract_entities)
         else:
-            raise ValueError(f"Unsupported format: {ext}")
+            path = Path(data)
+            if not path.exists():
+                raise FileNotFoundError(f"Not found: {data}")
+            name = name or path.stem.replace(" ", "_").replace("-", "_")
+            ext = path.suffix.lower()
+            print(f"File: {path.name}")
+            if ext == ".csv":
+                self._store_dataframe(pd.read_csv(path), name)
+            elif ext in [".xlsx", ".xls"]:
+                self._store_dataframe(pd.read_excel(path), name)
+            elif ext == ".json":
+                self._store_dataframe(pd.read_json(path), name)
+            elif ext == ".sql":
+                with open(path) as f:
+                    self.cursor.executescript(f.read())
+                self.conn.commit()
+                self._refresh_schema()
+            elif ext == ".pdf":
+                self._smart_upload_pdf(path, name, extract_entities)
+            elif ext == ".docx":
+                self._smart_upload_docx(path, name, extract_entities)
+            elif ext == ".txt":
+                self._smart_upload_txt(path, name, extract_entities)
+            else:
+                raise ValueError(f"Unsupported: {ext}")
+        # AUTO-EXPORT to MySQL if requested
+        if auto_export_mysql:
+            print("\nAuto-exporting to MySQL...")
+            self.save_to_mysql(
+                host=auto_export_mysql.get('host', 'localhost'),
+                user=auto_export_mysql.get('user', 'root'),
+                password=auto_export_mysql['password'],
+                database=auto_export_mysql['database'],
+                port=auto_export_mysql.get('port', 3306)
+            )
         return self
     def _smart_upload_pdf(self, path: Path, base_name: str, extract_entities: Optional[List[str]] = None):
-        """Parse PDF with proper relational structure."""
+        """Parse PDF - extracts ALL pages."""
         if not HAS_PYPDF2:
             raise ImportError("Run: pip install PyPDF2")
-        print("Extracting text from PDF...")
+        print("Extracting PDF...")
         with open(path, 'rb') as file:
             pdf_reader = PyPDF2.PdfReader(file)
-            text = ""
+            full_text = ""
             for page_num, page in enumerate(pdf_reader.pages, 1):
-                text += page.extract_text() + "\n"
+                full_text += page.extract_text() + "\n"
                 print(f"  Page {page_num}/{len(pdf_reader.pages)}")
         if self.client:
-            print("AI: Comprehensive entity extraction with proper relationships...")
-            tables = self._create_tables_with_ai(text, base_name, extract_entities)
+            print("AI: Extracting entities...")
-            if tables and len(tables) > 0:
-                print(f"\nCreated {len(tables)} relational tables:")
-                for tbl_name in tables:
-                    count = pd.read_sql_query(f"SELECT COUNT(*) FROM {tbl_name}", self.conn).iloc[0, 0]
-                    cols = len(self.schema_info.get(tbl_name, {}))
-                    print(f"  {tbl_name}: {count} rows, {cols} columns")
+            # Process in chunks for large documents
+            chunk_size = 10000
+            all_entities = {}
+            for i in range(0, len(full_text), chunk_size):
+                chunk = full_text[i:i+chunk_size]
+                chunk_num = (i // chunk_size) + 1
+                total_chunks = (len(full_text) // chunk_size) + 1
+                if total_chunks > 1:
+                    print(f"  Chunk {chunk_num}/{total_chunks}...")
+                entities = self._extract_chunk(chunk, extract_entities)
+                for entity_type, records in entities.items():
+                    if entity_type not in all_entities:
+                        all_entities[entity_type] = []
+                    all_entities[entity_type].extend(records)
+            # Renumber IDs
+            for entity_type, records in all_entities.items():
+                for idx, record in enumerate(records, 1):
+                    record['id'] = idx
+            # Create tables
+            if all_entities:
+                print(f"\nCreated {len(all_entities)} tables:")
+                for entity_type, records in all_entities.items():
+                    if records:
+                        table_name = f"{base_name}_{entity_type}"
+                        df = pd.DataFrame(records)
+                        self._store_dataframe_safe(df, table_name)
+                        print(f"  {entity_type}: {len(df)} records")
                 return
-        print("AI unavailable, creating simple table")
-        df = self._parse_text_simple(text)
-        self._store_dataframe(df, base_name)
+        print("Creating simple table")
+        self._store_dataframe(self._parse_text_simple(full_text), base_name)
     def _smart_upload_docx(self, path: Path, base_name: str, extract_entities: Optional[List[str]] = None):
-        """Parse DOCX with proper structure."""
+        """Parse DOCX."""
         if not HAS_DOCX:
             raise ImportError("Run: pip install python-docx")
-        print("Extracting from DOCX...")
         doc = docx.Document(path)
         if doc.tables:
-            print(f"Found {len(doc.tables)} table(s)")
             for i, table in enumerate(doc.tables):
                 data = [[cell.text.strip() for cell in row.cells] for row in table.rows]
                 if data and len(data) > 1:
                     df = pd.DataFrame(data[1:], columns=data[0])
-                    table_name = f"{base_name}_table_{i+1}" if len(doc.tables) > 1 else base_name
-                    self._store_dataframe(df, table_name)
+                    self._store_dataframe(df, f"{base_name}_table_{i+1}" if len(doc.tables) > 1 else base_name)
             return
         text = "\n".join([para.text for para in doc.paragraphs])
-        if self.client:
-            print("AI: Analyzing...")
-            tables = self._create_tables_with_ai(text, base_name, extract_entities)
-            if tables and len(tables) > 0:
-                print(f"\nCreated {len(tables)} tables:")
-                for tbl_name in tables:
-                    count = pd.read_sql_query(f"SELECT COUNT(*) FROM {tbl_name}", self.conn).iloc[0, 0]
-                    cols = len(self.schema_info.get(tbl_name, {}))
-                    print(f"  {tbl_name}: {count} rows, {cols} columns")
-                return
-        df = self._parse_text_simple(text)
-        self._store_dataframe(df, base_name)
+        if self.client and len(text) > 0:
+            entities = self._extract_chunk(text, extract_entities)
+            for entity_type, records in entities.items():
+                if records:
+                    df = pd.DataFrame(records)
+                    self._store_dataframe_safe(df, f"{base_name}_{entity_type}")
+        else:
+            self._store_dataframe(self._parse_text_simple(text), base_name)
     def _smart_upload_txt(self, path: Path, base_name: str, extract_entities: Optional[List[str]] = None):
-        """Parse TXT with proper structure."""
-        print("Reading TXT...")
+        """Parse TXT."""
         with open(path, 'r', encoding='utf-8') as file:
             text = file.read()
-        if self.client:
-            print("AI: Analyzing...")
-            tables = self._create_tables_with_ai(text, base_name, extract_entities)
-            if tables and len(tables) > 0:
-                print(f"\nCreated {len(tables)} tables:")
-                for tbl_name in tables:
-                    count = pd.read_sql_query(f"SELECT COUNT(*) FROM {tbl_name}", self.conn).iloc[0, 0]
-                    cols = len(self.schema_info.get(tbl_name, {}))
-                    print(f"  {tbl_name}: {count} rows, {cols} columns")
-                return
-        df = self._parse_text_simple(text)
-        self._store_dataframe(df, base_name)
+        if self.client and len(text) > 0:
+            entities = self._extract_chunk(text, extract_entities)
+            for entity_type, records in entities.items():
+                if records:
+                    df = pd.DataFrame(records)
+                    self._store_dataframe_safe(df, f"{base_name}_{entity_type}")
+        else:
+            self._store_dataframe(self._parse_text_simple(text), base_name)
-    def _create_tables_with_ai(self, text: str, base_name: str, custom_entities: Optional[List[str]] = None) -> List[str]:
-        """
-        AI extracts ALL entities with PROPER primary and foreign keys.
-        CRITICAL: Each entity gets UNIQUE IDs, foreign keys properly link tables.
-        """
+    def _extract_chunk(self, text: str, custom_entities: Optional[List[str]] = None) -> Dict:
+        """Extract entities from text chunk."""
         if not self.client:
-            return []
+            return {}
         try:
-            if custom_entities:
-                entity_instruction = f"""Extract these specific entities: {', '.join(custom_entities)}
-For each entity type, create a proper table with unique IDs."""
-            else:
-                entity_instruction = """Automatically identify and extract ALL structured entities.
-Common entities (extract ALL you find):
-- people: Personal information (id, name, email, phone, address, city, state, zip)
-- skills: Individual skills (id, person_id, skill_name, proficiency_level, years_experience)
-- technologies: Technologies/tools (id, person_id, technology_name, category, proficiency)
-- projects: Projects (id, person_id, project_name, description, start_date, end_date)
-- certifications: Certifications (id, person_id, cert_name, issuer, date_obtained)
-- education: Education records (id, person_id, degree, institution, graduation_year)
-- work_experience: Work history (id, person_id, company, title, start_date, end_date)
-- events: Events/meetings (id, host_id, description, location, date, attendee_ids)
-- organizations: Companies/departments (id, name, address, city, industry)
-- products: Products/services (id, name, description, price, category)
-- ANY other structured entities you identify
-Extract EVERYTHING you find in the text."""
-            extraction_prompt = f"""Analyze this text and extract ALL structured data into proper relational database tables.
+            prompt = f"""Extract ALL structured entities from this text.
 Text:
-{text[:6000]}
+{text[:8000]}
-{entity_instruction}
+Extract entities like: people, skills, technologies, projects, certifications, education, work_experience, events, organizations, or ANY other structured data.
-CRITICAL REQUIREMENTS FOR PROPER DATABASE DESIGN:
+Return JSON with arrays. Use sequential IDs (1,2,3...). Foreign keys reference primary keys.
-1. PRIMARY KEYS:
-   - Each table MUST have unique sequential IDs starting from 1
-   - Person 1 gets id=1, Person 2 gets id=2, etc.
-   - NO DUPLICATE IDs within same table
-   - IDs must be integers
-2. FOREIGN KEYS:
-   - Use foreign keys to link related tables
-   - Example: skills table has person_id that references people.id
-   - Example: projects table has person_id that references people.id
-   - Foreign keys MUST match existing primary keys
-3. TABLE STRUCTURE:
-   - Each entity type gets its own table
-   - Use clear table names (people, skills, technologies, not table1, table2)
-   - Include ALL relevant attributes for each entity
-Return JSON with this EXACT structure:
+Example:
 {{
-  "people": [
-    {{"id": 1, "name": "John Doe", "email": "john@email.com", "phone": "+1-555-0100", "city": "Dallas", "state": "TX"}},
-    {{"id": 2, "name": "Jane Smith", "email": "jane@email.com", "phone": "+1-555-0101", "city": "New York", "state": "NY"}},
-    ...
-  ],
-  "skills": [
-    {{"id": 1, "person_id": 1, "skill_name": "Python", "proficiency": "Expert", "years": 5}},
-    {{"id": 2, "person_id": 1, "skill_name": "SQL", "proficiency": "Advanced", "years": 3}},
-    {{"id": 3, "person_id": 2, "skill_name": "Java", "proficiency": "Expert", "years": 7}},
-    ...
-  ],
-  "technologies": [
-    {{"id": 1, "person_id": 1, "technology": "React", "category": "Frontend"}},
-    {{"id": 2, "person_id": 1, "technology": "PostgreSQL", "category": "Database"}},
-    {{"id": 3, "person_id": 2, "technology": "Spring Boot", "category": "Backend"}},
-    ...
-  ],
-  "projects": [
-    {{"id": 1, "person_id": 1, "project_name": "E-commerce Platform", "role": "Lead Developer"}},
-    {{"id": 2, "person_id": 2, "project_name": "Analytics Dashboard", "role": "Backend Engineer"}},
-    ...
-  ]
+  "people": [{{"id": 1, "name": "John", "email": "john@co.com", "city": "Dallas"}}, ...],
+  "skills": [{{"id": 1, "person_id": 1, "skill_name": "Python"}}, ...]
 }}
-IMPORTANT:
-- Extract EVERY structured piece of data you find
-- Assign UNIQUE sequential IDs (1, 2, 3, ...) for each table
-- Foreign keys MUST reference valid primary keys
-- Create as many tables as needed (don't limit yourself)
-- Return ONLY valid JSON, no explanations
-- Be COMPREHENSIVE - extract skills, technologies, projects, certifications, education, work history, etc."""
+Return ONLY valid JSON."""
-            response = self.client.chat.completions.create(
+            resp = self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
-                    {"role": "system", "content": "You are a database design expert. Extract ALL entities with proper primary keys (unique sequential IDs) and foreign keys (referencing valid IDs). Be comprehensive and extract EVERYTHING. Return only valid JSON."},
-                    {"role": "user", "content": extraction_prompt}
+                    {"role": "system", "content": "Extract ALL entities with unique IDs. Return only JSON."},
+                    {"role": "user", "content": prompt}
                 ],
                 temperature=0,
-                max_tokens=4096
+                max_tokens=8000
             )
-            json_text = response.choices[0].message.content.strip()
-            json_text = json_text.replace("```json", "").replace("```", "").strip()
-            extracted_data = json.loads(json_text)
-            created_tables = []
-            for entity_type, records in extracted_data.items():
-                if records and isinstance(records, list) and len(records) > 0:
-                    table_name = f"{base_name}_{entity_type}"
-                    try:
-                        df = pd.DataFrame(records)
-                        if not df.empty:
-                            self._store_dataframe(df, table_name, silent=True)
-                            created_tables.append(table_name)
-                            print(f"  {entity_type}: {len(df)} records")
-                    except Exception as e:
-                        print(f"  Failed {entity_type}: {e}")
-            return created_tables
+            json_text = resp.choices[0].message.content.strip().replace("```json", "").replace("```", "").strip()
+            return json.loads(json_text)
         except Exception as e:
-            print(f"AI extraction error: {e}")
-            return []
+            return {}
+    def _store_dataframe_safe(self, df: pd.DataFrame, name: str):
+        """Store with error handling."""
+        try:
+            df.columns = [str(c).strip().replace(" ", "_").replace("-", "_") for c in df.columns]
+            df.to_sql(name, self.conn, if_exists='replace', index=False, method='multi', chunksize=500)
+            self.conn.commit()
+            self.current_table = name
+            self._refresh_schema()
+        except:
+            df.to_sql(name, self.conn, if_exists='replace', index=False)
+            self.conn.commit()
+            self.current_table = name
+            self._refresh_schema()
     def _parse_text_simple(self, text: str) -> pd.DataFrame:
-        """Fallback text parsing."""
+        """Simple parsing."""
         lines = [line.strip() for line in text.split('\n') if line.strip()]
         if not lines:
             return pd.DataFrame({'content': ['No content']})
-        sample = lines[:min(10, len(lines))]
-        for delimiter in ['\t', ',', '|', ';']:
-            if all(delimiter in line for line in sample):
-                try:
-                    df = pd.read_csv(StringIO('\n'.join(lines)), sep=delimiter)
-                    if len(df.columns) > 1:
-                        return df
-                except:
-                    continue
-        return pd.DataFrame({
-            'line_number': range(1, len(lines) + 1),
-            'content': lines
-        })
+        return pd.DataFrame({'line_number': range(1, len(lines) + 1), 'content': lines})
-    def _store_dataframe(self, df: pd.DataFrame, name: str, silent: bool = False):
-        """Store DataFrame."""
-        df.columns = [str(c).strip().replace(" ", "_").replace("-", "_") for c in df.columns]
-        df.to_sql(name, self.conn, if_exists='replace', index=False)
-        self.current_table = name
-        self._refresh_schema()
-        if not silent:
-            print(f"Uploaded: {name}")
-            print(f"  {len(df)} rows, {len(df.columns)} columns")
+    def _store_dataframe(self, df: pd.DataFrame, name: str):
+        """Store."""
+        self._store_dataframe_safe(df, name)
+        print(f"Uploaded: {name} ({len(df)} rows)")
     def ask(self, question: str, viz: Union[bool, str] = False, table: Optional[str] = None) -> 'QueryResult':
-        """Query with natural language."""
+        """Natural language query."""
         if not self.client:
-            print("No API key")
             return QueryResult(False, "", pd.DataFrame(), None, "No API key")
         print(f"\nQuestion: {question}")
-        if self.check_relevance:
-            if not self._is_relevant_query(question):
-                print("Warning: Query may be irrelevant")
-                choice = input("Continue? (yes/no): ").strip().lower()
-                if choice not in ['yes', 'y']:
-                    return QueryResult(False, "", pd.DataFrame(), None, "Irrelevant")
+        if self.check_relevance and not self._is_relevant_query(question):
+            print("Warning: Irrelevant query")
+            choice = input("Continue? (yes/no): ").strip().lower()
+            if choice not in ['yes', 'y']:
+                return QueryResult(False, "", pd.DataFrame(), None, "Irrelevant")
-        tbl = table or self.current_table
+        tbl = table or self.current_table or (self._get_table_names()[0] if self._get_table_names() else None)
         if not tbl:
-            all_tables = self._get_table_names()
-            if all_tables:
-                tbl = all_tables[0]
-            else:
-                print("No tables found")
-                return QueryResult(False, "", pd.DataFrame(), None, "No table")
+            return QueryResult(False, "", pd.DataFrame(), None, "No table")
         if self.use_embeddings and self.embedding_model:
-            cached_result = self._check_embedding_cache(question, tbl)
-            if cached_result:
-                print("  Using cached result")
-                return cached_result
+            cached = self._check_embedding_cache(question, tbl)
+            if cached:
+                print("  Cached")
+                return cached
         if self.fuzzy_match:
             question = self._apply_fuzzy_matching(question, tbl)
@@ -567,7 +455,7 @@ IMPORTANT:
             fig = None
             if viz:
                 viz_type = viz if isinstance(viz, str) else "auto"
-                fig = self._visualize(df, question, viz_type=viz_type)
+                fig = self._visualize(df, question, viz_type)
             result = QueryResult(True, sql_query, df, fig)
@@ -584,199 +472,155 @@ IMPORTANT:
         if not self.client:
             return True
-        tables = self._get_table_names()
-        columns = []
-        for tbl in tables[:3]:
-            cols = list(self.schema_info.get(tbl, {}).keys())
-            columns.extend(cols[:5])
-        db_context = f"Tables: {', '.join(tables[:5])}. Columns: {', '.join(columns[:15])}"
         try:
-            response = self.client.chat.completions.create(
+            tables = self._get_table_names()[:3]
+            cols = []
+            for tbl in tables:
+                cols.extend(list(self.schema_info.get(tbl, {}).keys())[:5])
+            resp = self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
-                    {"role": "system", "content": "Relevance checker. Return only 'yes' or 'no'."},
-                    {"role": "user", "content": f"Is this relevant to database with {db_context}?\n\nQuestion: {question}\n\nyes or no:"}
+                    {"role": "system", "content": "Return 'yes' or 'no'."},
+                    {"role": "user", "content": f"Relevant to DB with tables {', '.join(tables)}?\n\nQ: {question}\n\nyes/no:"}
                 ],
                 temperature=0,
                 max_tokens=5
             )
-            return 'yes' in response.choices[0].message.content.strip().lower()
+            return 'yes' in resp.choices[0].message.content.lower()
         except:
             return True
     def _apply_fuzzy_matching(self, question: str, table: str) -> str:
-        """Fuzzy match query terms."""
+        """Fuzzy matching."""
         if not self.schema_info.get(table):
             return question
         try:
-            string_cols = [col for col, dtype in self.schema_info[table].items()
-                          if 'TEXT' in dtype or 'VARCHAR' in dtype]
+            string_cols = [col for col, dtype in self.schema_info[table].items() if 'TEXT' in dtype]
             if not string_cols:
                 return question
             for col in string_cols[:2]:
                 df = pd.read_sql_query(f"SELECT DISTINCT {col} FROM {table} LIMIT 100", self.conn)
-                unique_values = [str(v) for v in df[col].dropna().tolist()]
+                values = [str(v) for v in df[col].dropna().tolist()]
                 words = question.split()
                 for i, word in enumerate(words):
-                    matches = get_close_matches(word, unique_values, n=1, cutoff=0.6)
+                    matches = get_close_matches(word, values, n=1, cutoff=0.6)
                     if matches and word != matches[0]:
                         words[i] = matches[0]
                         print(f"  Fuzzy: '{word}' -> '{matches[0]}'")
                 question = " ".join(words)
             return question
         except:
             return question
     def _check_embedding_cache(self, question: str, table: str) -> Optional['QueryResult']:
-        """Check embedding cache."""
+        """Check cache."""
         if not self.query_embeddings:
             return None
-        q_embedding = self.embedding_model.encode([question])[0]
+        q_emb = self.embedding_model.encode([question])[0]
         best_match = None
-        best_similarity = 0.85
+        best_sim = 0.85
-        for cached_q, cached_data in self.query_embeddings.items():
-            if cached_data['table'] != table:
+        for cached_q, data in self.query_embeddings.items():
+            if data['table'] != table:
                 continue
-            similarity = np.dot(q_embedding, cached_data['embedding']) / (
-                np.linalg.norm(q_embedding) * np.linalg.norm(cached_data['embedding'])
-            )
-            if similarity > best_similarity:
-                best_similarity = similarity
+            sim = np.dot(q_emb, data['embedding']) / (np.linalg.norm(q_emb) * np.linalg.norm(data['embedding']))
+            if sim > best_sim:
+                best_sim = sim
                 best_match = cached_q
         if best_match:
-            print(f"  Similar query ({best_similarity:.0%}): '{best_match}'")
+            print(f"  Similar ({best_sim:.0%})")
             return self.query_embeddings[best_match]['result']
         return None
     def _store_in_embedding_cache(self, question: str, table: str, result: 'QueryResult'):
-        """Store in cache."""
-        q_embedding = self.embedding_model.encode([question])[0]
-        self.query_embeddings[question] = {
-            'table': table,
-            'embedding': q_embedding,
-            'result': result
-        }
+        """Store cache."""
+        q_emb = self.embedding_model.encode([question])[0]
+        self.query_embeddings[question] = {'table': table, 'embedding': q_emb, 'result': result}
     def _visualize(self, df: pd.DataFrame, title: str, viz_type: str = "auto"):
-        """Create visualization."""
+        """Visualize."""
         if not HAS_PLOTLY and not HAS_MATPLOTLIB:
-            print("Install plotly or matplotlib")
             return None
         print(f"Creating {viz_type} chart...")
-        if HAS_PLOTLY:
-            return self._plotly_viz(df, title, viz_type)
-        else:
-            return self._matplotlib_viz(df, title, viz_type)
+        return self._plotly_viz(df, title, viz_type) if HAS_PLOTLY else self._matplotlib_viz(df, title, viz_type)
     def _plotly_viz(self, df: pd.DataFrame, title: str, viz_type: str):
-        """Plotly visualization."""
+        """Plotly."""
         try:
-            numeric = df.select_dtypes(include=[np.number]).columns.tolist()
-            categorical = df.select_dtypes(include=['object']).columns.tolist()
+            num = df.select_dtypes(include=[np.number]).columns.tolist()
+            cat = df.select_dtypes(include=['object']).columns.tolist()
-            if viz_type == "table" or len(df) == 1:
-                fig = go.Figure(data=[go.Table(
-                    header=dict(values=list(df.columns)),
-                    cells=dict(values=[df[c] for c in df.columns])
-                )])
-            elif viz_type == "pie" and categorical and numeric:
-                fig = px.pie(df, names=categorical[0], values=numeric[0], title=title)
-            elif viz_type == "bar" and categorical and numeric:
-                fig = px.bar(df, x=categorical[0], y=numeric[0], title=title)
-            elif viz_type == "line" and numeric:
-                fig = px.line(df, y=numeric[0], title=title)
-            elif viz_type == "scatter" and len(numeric) >= 2:
-                fig = px.scatter(df, x=numeric[0], y=numeric[1], title=title)
-            elif viz_type == "heatmap" and len(numeric) >= 2:
-                corr = df[numeric].corr()
-                fig = go.Figure(data=go.Heatmap(
-                    z=corr.values, x=corr.columns, y=corr.columns, colorscale='Viridis'
-                ))
+            if viz_type == "table":
+                fig = go.Figure(data=[go.Table(header=dict(values=list(df.columns)), cells=dict(values=[df[c] for c in df.columns]))])
+            elif viz_type == "pie" and cat and num:
+                fig = px.pie(df, names=cat[0], values=num[0], title=title)
+            elif viz_type == "bar" and cat and num:
+                fig = px.bar(df, x=cat[0], y=num[0], title=title)
+            elif viz_type == "line" and num:
+                fig = px.line(df, y=num[0], title=title)
+            elif viz_type == "scatter" and len(num) >= 2:
+                fig = px.scatter(df, x=num[0], y=num[1], title=title)
+            elif viz_type == "heatmap" and len(num) >= 2:
+                corr = df[num].corr()
+                fig = go.Figure(data=go.Heatmap(z=corr.values, x=corr.columns, y=corr.columns))
                 fig.update_layout(title=title)
-            elif viz_type == "auto":
-                if categorical and numeric:
-                    fig = px.pie(df, names=categorical[0], values=numeric[0], title=title) if len(df) <= 10 else px.bar(df, x=categorical[0], y=numeric[0], title=title)
-                elif len(numeric) >= 2:
-                    fig = px.line(df, y=numeric[0], title=title)
+            else:
+                if cat and num:
+                    fig = px.pie(df, names=cat[0], values=num[0], title=title) if len(df) <= 10 else px.bar(df, x=cat[0], y=num[0], title=title)
                 else:
                     fig = px.bar(df, y=df.columns[0], title=title)
-            else:
-                fig = px.bar(df, x=categorical[0] if categorical else df.index, y=numeric[0] if numeric else df.columns[0], title=title)
             fig.show()
-            print("Chart displayed")
             return fig
-        except Exception as e:
-            print(f"Viz error: {e}")
+        except:
             return None
     def _matplotlib_viz(self, df: pd.DataFrame, title: str, viz_type: str):
-        """Matplotlib visualization."""
+        """Matplotlib."""
         try:
             plt.figure(figsize=(10, 6))
-            numeric = df.select_dtypes(include=[np.number]).columns
+            num = df.select_dtypes(include=[np.number]).columns
-            if viz_type == "pie" and len(numeric) > 0:
+            if viz_type == "pie":
                 df[df.columns[0]].value_counts().plot(kind='pie')
-            elif viz_type == "line" and len(numeric) > 0:
-                df[numeric[0]].plot(kind='line')
+            elif viz_type == "line" and len(num) > 0:
+                df[num[0]].plot(kind='line')
             else:
-                if len(numeric) > 0:
-                    df[numeric[0]].plot(kind='bar')
-                else:
-                    df.iloc[:, 0].value_counts().plot(kind='bar')
+                (df[num[0]] if len(num) > 0 else df.iloc[:, 0].value_counts()).plot(kind='bar')
             plt.title(title)
             plt.tight_layout()
             plt.show()
-            print("Chart displayed")
             return plt.gcf()
-        except Exception as e:
-            print(f"Viz error: {e}")
+        except:
             return None
     def tables(self) -> Dict[str, dict]:
-        """List all tables."""
+        """List tables."""
         print("\n" + "="*70)
-        print("TABLES IN DATABASE")
+        print("TABLES")
         print("="*70)
         all_tables = self._get_table_names()
         if not all_tables:
-            print("No tables found")
+            print("No tables")
             return {}
         result = {}
         for i, tbl in enumerate(all_tables, 1):
-            count = pd.read_sql_query(f"SELECT COUNT(*) FROM {tbl}", self.conn).iloc[0, 0]
-            cols = self.schema_info.get(tbl, {})
-            col_list = list(cols.keys())
-            marker = ">" if tbl == self.current_table else " "
-            print(f"{marker} {i}. {tbl}")
-            print(f"     {count} rows, {len(col_list)} columns")
-            print(f"     Columns: {', '.join(col_list[:8])}")
-            result[tbl] = {'rows': count, 'columns': col_list}
+            cnt = pd.read_sql_query(f"SELECT COUNT(*) FROM {tbl}", self.conn).iloc[0, 0]
+            cols = list(self.schema_info.get(tbl, {}).keys())
+            print(f" {i}. {tbl}: {cnt} rows, {len(cols)} columns")
+            result[tbl] = {'rows': cnt, 'columns': cols}
         print("="*70)
         return result
@@ -787,77 +631,55 @@ IMPORTANT:
             self._refresh_schema()
         print("\n" + "="*70)
-        print("DATABASE SCHEMA")
+        print("SCHEMA")
         print("="*70)
-        tables_to_show = [table] if table else self.schema_info.keys()
         result = {}
-        for tbl in tables_to_show:
+        for tbl in ([table] if table else self.schema_info.keys()):
             if tbl in self.schema_info:
-                count = pd.read_sql_query(f"SELECT COUNT(*) FROM {tbl}", self.conn).iloc[0, 0]
-                print(f"\nTable: {tbl}")
-                print(f"Records: {count}")
-                print("Columns:")
+                cnt = pd.read_sql_query(f"SELECT COUNT(*) FROM {tbl}", self.conn).iloc[0, 0]
+                print(f"\n{tbl}: {cnt} records")
                 for col, dtype in self.schema_info[tbl].items():
-                    print(f"  - {col:<30} ({dtype})")
-                result[tbl] = {
-                    'records': count,
-                    'columns': self.schema_info[tbl]
-                }
+                    print(f"  - {col:<30} {dtype}")
+                result[tbl] = {'records': cnt, 'columns': self.schema_info[tbl]}
         print("="*70)
         return result
     def peek(self, table: Optional[str] = None, n: int = 5) -> pd.DataFrame:
-        """Preview data."""
+        """Preview."""
         tbl = table or self.current_table
         if not tbl:
-            print("No table specified")
             return pd.DataFrame()
         df = pd.read_sql_query(f"SELECT * FROM {tbl} LIMIT {n}", self.conn)
-        print(f"\nSample from '{tbl}' ({n} rows):")
+        print(f"\nSample from '{tbl}':")
         print(df.to_string(index=False))
         return df
     def info(self):
-        """Database overview."""
+        """Overview."""
         return self.tables()
     def sql(self, query: str, viz: Union[bool, str] = False) -> 'QueryResult':
         """Execute SQL."""
-        print("\nExecuting SQL...")
         try:
             df = pd.read_sql_query(query, self.conn)
             print(f"Success! {len(df)} rows")
-            fig = None
-            if viz:
-                viz_type = viz if isinstance(viz, str) else "auto"
-                fig = self._visualize(df, "SQL Result", viz_type=viz_type)
+            fig = self._visualize(df, "Result", viz if isinstance(viz, str) else "auto") if viz else None
             return QueryResult(True, query, df, fig)
         except Exception as e:
             print(f"Error: {e}")
             return QueryResult(False, query, pd.DataFrame(), None, str(e))
     def interactive(self, question: str) -> 'QueryResult':
-        """Interactive query."""
-        print(f"\nQuestion: {question}")
+        """Interactive."""
         choice = input("Visualize? (yes/no/pie/bar/line/scatter): ").strip().lower()
         viz = choice if choice in ['pie', 'bar', 'line', 'scatter', 'table', 'heatmap'] else (True if choice in ['yes', 'y'] else False)
         return self.ask(question, viz=viz)
     def export_db(self, path: str, format: str = "sqlite"):
         """Export database."""
-        print(f"\nExporting to {format}...")
         if format == "sqlite":
             shutil.copy2(self.db_path, path)
         elif format == "sql":
@@ -865,93 +687,90 @@ IMPORTANT:
                 for line in self.conn.iterdump():
                     f.write(f'{line}\n')
         elif format == "json":
-            data = {}
-            for table in self._get_table_names():
-                df = pd.read_sql_query(f"SELECT * FROM {table}", self.conn)
-                data[table] = df.to_dict(orient='records')
+            data = {t: pd.read_sql_query(f"SELECT * FROM {t}", self.conn).to_dict('records') for t in self._get_table_names()}
             with open(path, 'w', encoding='utf-8') as f:
                 json.dump(data, f, indent=2, default=str)
         elif format == "excel":
             with pd.ExcelWriter(path, engine='openpyxl') as writer:
-                for table in self._get_table_names():
-                    df = pd.read_sql_query(f"SELECT * FROM {table}", self.conn)
-                    df.to_excel(writer, sheet_name=table[:31], index=False)
+                for t in self._get_table_names():
+                    pd.read_sql_query(f"SELECT * FROM {t}", self.conn).to_excel(writer, sheet_name=t[:31], index=False)
         else:
             raise ValueError(f"Unsupported: {format}")
-        print(f"Saved to {path}")
+        print(f"Saved: {path}")
         return self
-    def save_to_mysql(self, host: str, user: str, password: str, database: str,
-                      port: int = 3306, tables: Optional[List[str]] = None):
-        """Export to MySQL."""
+    def save_to_mysql(self, host: str, user: str, password: str, database: str,
+                      port: int = 3306, tables: Optional[List[str]] = None,
+                      auto_create: bool = True):
+        """
+        Export to MySQL - AUTO-CREATES database if not exists.
+        Args:
+            host: MySQL host
+            user: MySQL user
+            password: MySQL password
+            database: Database name (auto-created if not exists)
+            port: MySQL port
+            tables: Specific tables to export (None = all)
+            auto_create: Auto-create database if not exists
+        """
         try:
             from sqlalchemy import create_engine
+            import mysql.connector
         except ImportError:
             raise ImportError("Run: pip install QuerySUTRA[mysql]")
-        print(f"\nConnecting to MySQL: {host}:{port}...")
+        print(f"Exporting to MySQL: {host}/{database}")
-        engine = create_engine(f"mysql+mysqlconnector://{user}:{password}@{host}:{port}/{database}")
-        tables_to_export = tables or self._get_table_names()
+        # Auto-create database if requested
+        if auto_create:
+            try:
+                temp_conn = mysql.connector.connect(host=host, user=user, password=password, port=port)
+                temp_cursor = temp_conn.cursor()
+                temp_cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{database}`")
+                temp_cursor.close()
+                temp_conn.close()
+                print(f"  Database '{database}' ready")
+            except Exception as e:
+                print(f"  Warning: Could not auto-create database: {e}")
-        print(f"Exporting {len(tables_to_export)} tables...")
+        engine = create_engine(f"mysql+mysqlconnector://{user}:{password}@{host}:{port}/{database}")
-        for table in tables_to_export:
-            df = pd.read_sql_query(f"SELECT * FROM {table}", self.conn)
-            df.to_sql(table, engine, if_exists='replace', index=False)
-            print(f"  {table}: {len(df)} rows")
+        for t in (tables or self._get_table_names()):
+            df = pd.read_sql_query(f"SELECT * FROM {t}", self.conn)
+            df.to_sql(t, engine, if_exists='replace', index=False)
+            print(f"  {t}: {len(df)} rows")
         print("Complete!")
         return self
-    def save_to_postgres(self, host: str, user: str, password: str, database: str,
+    def save_to_postgres(self, host: str, user: str, password: str, database: str,
                          port: int = 5432, tables: Optional[List[str]] = None):
         """Export to PostgreSQL."""
         try:
             from sqlalchemy import create_engine
+            engine = create_engine(f"postgresql://{user}:{password}@{host}:{port}/{database}")
+            print(f"Exporting to PostgreSQL...")
+            for t in (tables or self._get_table_names()):
+                df = pd.read_sql_query(f"SELECT * FROM {t}", self.conn)
+                df.to_sql(t, engine, if_exists='replace', index=False)
+                print(f"  {t}: {len(df)} rows")
+            print("Complete!")
+            return self
         except ImportError:
             raise ImportError("Run: pip install QuerySUTRA[postgres]")
-        print(f"\nConnecting to PostgreSQL: {host}:{port}...")
-        engine = create_engine(f"postgresql://{user}:{password}@{host}:{port}/{database}")
-        tables_to_export = tables or self._get_table_names()
-        print(f"Exporting {len(tables_to_export)} tables...")
-        for table in tables_to_export:
-            df = pd.read_sql_query(f"SELECT * FROM {table}", self.conn)
-            df.to_sql(table, engine, if_exists='replace', index=False)
-            print(f"  {table}: {len(df)} rows")
-        print("Complete!")
-        return self
-    def backup(self, backup_path: str = None):
-        """Create backup."""
-        if backup_path:
-            backup_dir = Path(backup_path)
-            backup_dir.mkdir(parents=True, exist_ok=True)
-        else:
-            backup_dir = Path(".")
-        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        print("\nCreating backup...")
-        db_backup = backup_dir / f"sutra_{timestamp}.db"
-        self.export_db(str(db_backup), format="sqlite")
-        json_backup = backup_dir / f"sutra_{timestamp}.json"
-        self.export_db(str(json_backup), format="json")
-        print(f"\nBackup complete!")
-        print(f"  Database: {db_backup}")
-        print(f"  Data: {json_backup}")
+    def backup(self, path: str = None):
+        """Backup."""
+        dir = Path(path) if path else Path(".")
+        dir.mkdir(parents=True, exist_ok=True)
+        ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+        self.export_db(str(dir / f"sutra_{ts}.db"), "sqlite")
+        self.export_db(str(dir / f"sutra_{ts}.json"), "json")
+        print("Backup complete!")
         return self
     def export(self, data: pd.DataFrame, path: str, format: str = "csv"):
@@ -962,17 +781,13 @@ IMPORTANT:
             data.to_excel(path, index=False)
         elif format == "json":
             data.to_json(path, orient="records", indent=2)
-        else:
-            raise ValueError(f"Unknown: {format}")
-        print(f"Exported to {path}")
+        print(f"Exported: {path}")
         return self
     def close(self):
-        """Close connection."""
+        """Close."""
         if self.conn:
             self.conn.close()
-            print("Closed")
     def _get_table_names(self) -> List[str]:
         """Get tables."""
@@ -980,46 +795,28 @@ IMPORTANT:
         return [r[0] for r in self.cursor.fetchall()]
     def _refresh_schema(self):
-        """Refresh schema."""
-        tables = self._get_table_names()
+        """Refresh."""
         self.schema_info = {}
-        for tbl in tables:
+        for tbl in self._get_table_names():
             self.cursor.execute(f"PRAGMA table_info({tbl})")
             self.schema_info[tbl] = {r[1]: r[2] for r in self.cursor.fetchall()}
     def _generate_sql(self, question: str, table: str) -> str:
         """Generate SQL."""
         schema = self.schema_info.get(table, {})
-        sample_df = pd.read_sql_query(f"SELECT * FROM {table} LIMIT 3", self.conn)
-        sample = sample_df.to_string(index=False)
+        sample = pd.read_sql_query(f"SELECT * FROM {table} LIMIT 3", self.conn).to_string(index=False)
         schema_str = ", ".join([f"{col} ({dtype})" for col, dtype in schema.items()])
-        prompt = f"""Convert to SQL.
-Database: SQLite
-Table: {table}
-Columns: {schema_str}
-Sample:
-{sample}
-Question: {question}
-Return ONLY SQL."""
-        response = self.client.chat.completions.create(
+        resp = self.client.chat.completions.create(
             model="gpt-4o-mini",
             messages=[
-                {"role": "system", "content": "SQL expert. Return only SQL code."},
-                {"role": "user", "content": prompt}
+                {"role": "system", "content": "SQL expert. Return only SQL."},
+                {"role": "user", "content": f"Table: {table}\nColumns: {schema_str}\nSample:\n{sample}\n\nQ: {question}\n\nSQL:"}
             ],
             temperature=0
         )
-        sql = response.choices[0].message.content.strip()
-        return sql.replace("```sql", "").replace("```", "").strip()
+        return resp.choices[0].message.content.strip().replace("```sql", "").replace("```", "").strip()
     def __enter__(self):
         return self
@@ -1028,53 +825,28 @@ Return ONLY SQL."""
         self.close()
     def __repr__(self):
-        features = []
-        if self.cache_queries:
-            features.append("cache")
-        if self.use_embeddings:
-            features.append("embeddings")
-        if self.check_relevance:
-            features.append("relevance")
-        if self.fuzzy_match:
-            features.append("fuzzy")
-        feat_str = f", {', '.join(features)}" if features else ""
-        return f"SUTRA(tables={len(self.schema_info)}{feat_str})"
+        return f"SUTRA(tables={len(self.schema_info)})"
 class QueryResult:
-    """Query result."""
+    """Result."""
     def __init__(self, success: bool, sql: str, data: pd.DataFrame, viz, error: str = None):
-        self.success = success
-        self.sql = sql
-        self.data = data
-        self.viz = viz
-        self.error = error
+        self.success, self.sql, self.data, self.viz, self.error = success, sql, data, viz, error
     def __repr__(self):
-        return f"QueryResult(rows={len(self.data)}, cols={len(self.data.columns)})" if self.success else f"QueryResult(error='{self.error}')"
+        return f"QueryResult(rows={len(self.data)})" if self.success else f"QueryResult(error='{self.error}')"
     def show(self):
-        print(self.data) if self.success else print(f"Error: {self.error}")
+        print(self.data if self.success else f"Error: {self.error}")
         return self
 def quick_start(api_key: str, data_path: str, question: str, viz: Union[bool, str] = False):
-    """One-liner."""
+    """Quick start."""
     with SUTRA(api_key=api_key) as sutra:
         sutra.upload(data_path)
         return sutra.ask(question, viz=viz)
 if __name__ == "__main__":
-    print("""
-QuerySUTRA v0.3.3 - Professional Data Analysis
-SUTRA: Structured-Unstructured-Text-Retrieval-Architecture
-Fixed: Proper primary and foreign keys with unique IDs
-Features: Load existing DB, custom viz, fuzzy matching, embeddings
-Installation: pip install QuerySUTRA
-Usage: from sutra import SUTRA
-""")
+    print("QuerySUTRA v0.4.0 - Simple & Automatic")

QuerySUTRA 0.3.3__py3-none-any.whl → 0.4.1__py3-none-any.whl

QuerySUTRA 0.3.3py3-none-any.whl → 0.4.1py3-none-any.whl