PyPI - QuerySUTRA - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend

QuerySUTRA 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

querysutra-0.3.3.dist-info/METADATA +285 -0
{querysutra-0.3.1.dist-info → querysutra-0.3.3.dist-info}/RECORD +7 -7
sutra/__init__.py +2 -2
sutra/sutra.py +281 -463
querysutra-0.3.1.dist-info/METADATA +0 -429
{querysutra-0.3.1.dist-info → querysutra-0.3.3.dist-info}/WHEEL +0 -0
{querysutra-0.3.1.dist-info → querysutra-0.3.3.dist-info}/licenses/LICENSE +0 -0
{querysutra-0.3.1.dist-info → querysutra-0.3.3.dist-info}/top_level.txt +0 -0

sutra/sutra.py CHANGED Viewed

@@ -1,21 +1,19 @@
 """
-QuerySUTRA v0.3.0 - ENHANCED FLEXIBLE VERSION
+QuerySUTRA v0.3.3 - PROPER RELATIONAL DATABASE EXTRACTION
 SUTRA: Structured-Unstructured-Text-Retrieval-Architecture
-NEW FEATURES:
-✅ Custom visualization types - USER CHOICE
-✅ Load existing databases - no re-upload
-✅ Smart NLP with fuzzy matching - OPTIONAL
-✅ Irrelevant query detection - OPTIONAL
-✅ Embeddings for caching - OPTIONAL (user decides)
-✅ All features are OPTIONAL - zero hard coding
+FIXED: Proper primary keys, foreign keys, and relational integrity
+- Unique IDs for each entity
+- Proper foreign key relationships
+- No duplicate keys
+- Comprehensive entity extraction (skills, technologies, projects, etc.)
 Author: Aditya Batta
 License: MIT
-Version: 0.3.0
+Version: 0.3.3
 """
-__version__ = "0.3.0"
+__version__ = "0.3.3"
 __author__ = "Aditya Batta"
 __title__ = "QuerySUTRA: Structured-Unstructured-Text-Retrieval-Architecture"
 __all__ = ["SUTRA", "QueryResult", "quick_start"]
@@ -24,7 +22,7 @@ import os
 import sqlite3
 import pandas as pd
 import numpy as np
-from typing import Optional, Union, Dict, Any, List, Literal
+from typing import Optional, Union, Dict, Any, List
 from pathlib import Path
 import json
 import hashlib
@@ -78,12 +76,7 @@ class SUTRA:
     """
     SUTRA: Structured-Unstructured-Text-Retrieval-Architecture
-    Enhanced with OPTIONAL features (user controls everything):
-    - Custom visualizations (pie, bar, line, scatter, etc.)
-    - Load existing databases (SQLite, MySQL, PostgreSQL)
-    - Smart NLP with fuzzy matching (OPTIONAL)
-    - Query relevance detection (OPTIONAL)
-    - Embeddings for caching (OPTIONAL)
+    Professional data analysis with proper relational database structure
     """
     def __init__(self,
@@ -93,19 +86,9 @@ class SUTRA:
                  check_relevance: bool = False,
                  fuzzy_match: bool = True,
                  cache_queries: bool = True):
-        """
-        Initialize SUTRA with OPTIONAL features.
-        Args:
-            api_key: OpenAI API key (optional)
-            db: Database path (SQLite file)
-            use_embeddings: Use embeddings for smart query caching (saves API calls)
-            check_relevance: Check if query is relevant to database before processing
-            fuzzy_match: Enable fuzzy matching for city names, etc. (e.g., "New York City" → "New York")
-            cache_queries: Cache SQL queries to avoid repeated API calls
-        """
-        print("🚀 Initializing QuerySUTRA v0.3.0 - ENHANCED MODE")
-        print("   SUTRA: Structured-Unstructured-Text-Retrieval-Architecture")
+        """Initialize SUTRA with optional features."""
+        print("Initializing QuerySUTRA v0.3.3")
+        print("SUTRA: Structured-Unstructured-Text-Retrieval-Architecture")
         if api_key:
             os.environ["OPENAI_API_KEY"] = api_key
@@ -120,7 +103,6 @@ class SUTRA:
         self.current_table = None
         self.schema_info = {}
-        # OPTIONAL FEATURES (user decides)
         self.cache_queries = cache_queries
         self.cache = {} if cache_queries else None
@@ -131,126 +113,77 @@ class SUTRA:
         self.check_relevance = check_relevance
         self.fuzzy_match = fuzzy_match
-        # Initialize embeddings if requested
         if use_embeddings and HAS_EMBEDDINGS:
             try:
-                print("   🧠 Loading embeddings model for smart caching...")
+                print("Loading embeddings model...")
                 self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
-                print("   ✅ Embeddings ready (similar queries will use cache)")
+                print("Embeddings ready")
             except:
-                print("   ⚠️  Embeddings failed, using simple cache instead")
+                print("Embeddings unavailable")
                 self.use_embeddings = False
-        # Refresh schema
         self._refresh_schema()
-        print(f"✅ Ready! Database: {db}")
-        print(f"   Cache: {'ON' if cache_queries else 'OFF'}")
-        print(f"   Embeddings: {'ON' if use_embeddings else 'OFF'}")
-        print(f"   Relevance Check: {'ON' if check_relevance else 'OFF'}")
-        print(f"   Fuzzy Match: {'ON' if fuzzy_match else 'OFF'}")
+        print(f"Ready! Database: {db}")
         if not self.api_key:
-            print("⚠️  No API key - use .sql() for direct queries")
-    # ========================================================================
-    # NEW: LOAD EXISTING DATABASE
-    # ========================================================================
+            print("No API key - use .sql() for direct queries")
     @classmethod
     def load_from_db(cls, db_path: str, api_key: Optional[str] = None, **kwargs):
-        """
-        Load existing database WITHOUT re-uploading data.
-        Args:
-            db_path: Path to existing SQLite database
-            api_key: OpenAI API key
-            **kwargs: Other options (use_embeddings, check_relevance, etc.)
-        Returns:
-            SUTRA instance connected to existing database
-        Example:
-            sutra = SUTRA.load_from_db("sutra.db", api_key="sk-...")
-            sutra.tables()  # See existing tables
-            result = sutra.ask("Show me data")  # Query immediately!
-        """
+        """Load existing SQLite database."""
         if not Path(db_path).exists():
             raise FileNotFoundError(f"Database not found: {db_path}")
-        print(f"📂 Loading existing database: {db_path}")
+        print(f"Loading database: {db_path}")
         instance = cls(api_key=api_key, db=db_path, **kwargs)
         tables = instance.tables()
-        print(f"\n✅ Loaded {len(tables)} existing tables - ready to query!")
+        print(f"Loaded {len(tables)} tables")
         return instance
     @classmethod
     def connect_mysql(cls, host: str, user: str, password: str, database: str,
                      port: int = 3306, api_key: Optional[str] = None, **kwargs):
-        """
-        Connect to existing MySQL database WITHOUT importing data.
-        Query directly from MySQL!
-        Args:
-            host: MySQL host
-            user: MySQL user
-            password: MySQL password
-            database: Database name
-            port: MySQL port
-            api_key: OpenAI API key
-        Example:
-            sutra = SUTRA.connect_mysql("localhost", "root", "pass", "mydb", api_key="sk-...")
-            result = sutra.ask("Show me users")
-        """
+        """Connect to MySQL database."""
         try:
             from sqlalchemy import create_engine
         except ImportError:
-            raise ImportError("Run: pip install sqlalchemy mysql-connector-python")
+            raise ImportError("Run: pip install QuerySUTRA[mysql]")
-        print(f"🔄 Connecting to MySQL: {host}:{port}/{database}")
+        print(f"Connecting to MySQL: {host}:{port}/{database}")
         connection_string = f"mysql+mysqlconnector://{user}:{password}@{host}:{port}/{database}"
-        # Create temporary SQLite and sync tables
         temp_db = f"sutra_mysql_{database}.db"
         instance = cls(api_key=api_key, db=temp_db, **kwargs)
         engine = create_engine(connection_string)
-        # Get all tables from MySQL
         tables = pd.read_sql_query("SHOW TABLES", engine).iloc[:, 0].tolist()
-        print(f"   Found {len(tables)} tables in MySQL")
-        print(f"   Syncing to local cache...")
+        print(f"Found {len(tables)} tables, syncing...")
         for table in tables:
             df = pd.read_sql_query(f"SELECT * FROM {table}", engine)
             df.to_sql(table, instance.conn, if_exists='replace', index=False)
-            print(f"      ✅ {table}: {len(df)} rows")
+            print(f"  {table}: {len(df)} rows")
         instance._refresh_schema()
-        print(f"\n✅ Connected! You can now query {len(tables)} MySQL tables")
+        print(f"Connected! {len(tables)} tables available")
         return instance
     @classmethod
     def connect_postgres(cls, host: str, user: str, password: str, database: str,
                         port: int = 5432, api_key: Optional[str] = None, **kwargs):
-        """
-        Connect to existing PostgreSQL database WITHOUT importing data.
-        Example:
-            sutra = SUTRA.connect_postgres("localhost", "postgres", "pass", "mydb", api_key="sk-...")
-        """
+        """Connect to PostgreSQL database."""
         try:
             from sqlalchemy import create_engine
         except ImportError:
-            raise ImportError("Run: pip install sqlalchemy psycopg2-binary")
+            raise ImportError("Run: pip install QuerySUTRA[postgres]")
-        print(f"🔄 Connecting to PostgreSQL: {host}:{port}/{database}")
+        print(f"Connecting to PostgreSQL: {host}:{port}/{database}")
         connection_string = f"postgresql://{user}:{password}@{host}:{port}/{database}"
@@ -259,32 +192,34 @@ class SUTRA:
         engine = create_engine(connection_string)
-        # Get all tables
         tables = pd.read_sql_query(
             "SELECT tablename FROM pg_tables WHERE schemaname='public'",
             engine
         )['tablename'].tolist()
-        print(f"   Found {len(tables)} tables in PostgreSQL")
-        print(f"   Syncing to local cache...")
+        print(f"Found {len(tables)} tables, syncing...")
         for table in tables:
             df = pd.read_sql_query(f"SELECT * FROM {table}", engine)
             df.to_sql(table, instance.conn, if_exists='replace', index=False)
-            print(f"      ✅ {table}: {len(df)} rows")
+            print(f"  {table}: {len(df)} rows")
         instance._refresh_schema()
-        print(f"\n✅ Connected! You can now query {len(tables)} PostgreSQL tables")
+        print(f"Connected! {len(tables)} tables available")
         return instance
-    # ========================================================================
-    # UPLOAD - CREATES MULTIPLE TABLES
-    # ========================================================================
-    def upload(self, data: Union[str, pd.DataFrame], name: Optional[str] = None) -> 'SUTRA':
-        """Upload data and create structured tables with AI."""
-        print(f"\n📤 Uploading data...")
+    def upload(self, data: Union[str, pd.DataFrame], name: Optional[str] = None,
+               extract_entities: Optional[List[str]] = None) -> 'SUTRA':
+        """
+        Upload data with optional custom entity extraction.
+        Args:
+            data: File path or DataFrame
+            name: Table name
+            extract_entities: Custom entities to extract (e.g., ['skills', 'technologies'])
+        """
+        print(f"\nUploading data...")
         if isinstance(data, pd.DataFrame):
             name = name or "data"
@@ -298,9 +233,8 @@ class SUTRA:
         name = name or path.stem.replace(" ", "_").replace("-", "_")
         ext = path.suffix.lower()
-        print(f"   📄 File: {path.name}")
+        print(f"File: {path.name}")
-        # Load based on format
         if ext == ".csv":
             df = pd.read_csv(path)
             self._store_dataframe(df, name)
@@ -318,69 +252,63 @@ class SUTRA:
                 self.cursor.executescript(f.read())
             self.conn.commit()
             self._refresh_schema()
-            print(f"✅ SQL executed!")
+            print("SQL executed")
         elif ext == ".pdf":
-            self._smart_upload_pdf(path, name)
+            self._smart_upload_pdf(path, name, extract_entities)
         elif ext == ".docx":
-            self._smart_upload_docx(path, name)
+            self._smart_upload_docx(path, name, extract_entities)
         elif ext == ".txt":
-            self._smart_upload_txt(path, name)
+            self._smart_upload_txt(path, name, extract_entities)
         else:
             raise ValueError(f"Unsupported format: {ext}")
         return self
-    # ========================================================================
-    # SMART PARSING - CREATES MULTIPLE TABLES
-    # ========================================================================
-    def _smart_upload_pdf(self, path: Path, base_name: str):
-        """Parse PDF and create multiple tables."""
+    def _smart_upload_pdf(self, path: Path, base_name: str, extract_entities: Optional[List[str]] = None):
+        """Parse PDF with proper relational structure."""
         if not HAS_PYPDF2:
-            raise ImportError("PyPDF2 not installed. Run: pip install PyPDF2")
+            raise ImportError("Run: pip install PyPDF2")
-        print("   📑 Extracting text from PDF...")
+        print("Extracting text from PDF...")
         with open(path, 'rb') as file:
             pdf_reader = PyPDF2.PdfReader(file)
             text = ""
             for page_num, page in enumerate(pdf_reader.pages, 1):
                 text += page.extract_text() + "\n"
-                print(f"      Extracted page {page_num}/{len(pdf_reader.pages)}")
+                print(f"  Page {page_num}/{len(pdf_reader.pages)}")
         if self.client:
-            print("   🧠 AI: Analyzing and extracting structured entities...")
-            tables = self._create_tables_with_ai(text, base_name)
+            print("AI: Comprehensive entity extraction with proper relationships...")
+            tables = self._create_tables_with_ai(text, base_name, extract_entities)
             if tables and len(tables) > 0:
-                print(f"\n✅ Created {len(tables)} structured tables:")
+                print(f"\nCreated {len(tables)} relational tables:")
                 for tbl_name in tables:
                     count = pd.read_sql_query(f"SELECT COUNT(*) FROM {tbl_name}", self.conn).iloc[0, 0]
                     cols = len(self.schema_info.get(tbl_name, {}))
-                    print(f"   📊 {tbl_name}: {count} rows, {cols} columns")
+                    print(f"  {tbl_name}: {count} rows, {cols} columns")
                 return
-        # Fallback: simple text table
-        print("   ⚠️  AI not available, creating simple text table")
+        print("AI unavailable, creating simple table")
         df = self._parse_text_simple(text)
         self._store_dataframe(df, base_name)
-    def _smart_upload_docx(self, path: Path, base_name: str):
-        """Parse DOCX and create multiple tables."""
+    def _smart_upload_docx(self, path: Path, base_name: str, extract_entities: Optional[List[str]] = None):
+        """Parse DOCX with proper structure."""
         if not HAS_DOCX:
-            raise ImportError("python-docx not installed. Run: pip install python-docx")
+            raise ImportError("Run: pip install python-docx")
-        print("   📄 Extracting content from DOCX...")
+        print("Extracting from DOCX...")
         doc = docx.Document(path)
-        # Check for tables first
         if doc.tables:
-            print(f"   📊 Found {len(doc.tables)} table(s)")
+            print(f"Found {len(doc.tables)} table(s)")
             for i, table in enumerate(doc.tables):
                 data = [[cell.text.strip() for cell in row.cells] for row in table.rows]
                 if data and len(data) > 1:
@@ -389,93 +317,144 @@ class SUTRA:
                     self._store_dataframe(df, table_name)
             return
-        # Extract text
         text = "\n".join([para.text for para in doc.paragraphs])
         if self.client:
-            print("   🧠 AI: Analyzing and extracting structured entities...")
-            tables = self._create_tables_with_ai(text, base_name)
+            print("AI: Analyzing...")
+            tables = self._create_tables_with_ai(text, base_name, extract_entities)
             if tables and len(tables) > 0:
-                print(f"\n✅ Created {len(tables)} structured tables:")
+                print(f"\nCreated {len(tables)} tables:")
                 for tbl_name in tables:
                     count = pd.read_sql_query(f"SELECT COUNT(*) FROM {tbl_name}", self.conn).iloc[0, 0]
                     cols = len(self.schema_info.get(tbl_name, {}))
-                    print(f"   📊 {tbl_name}: {count} rows, {cols} columns")
+                    print(f"  {tbl_name}: {count} rows, {cols} columns")
                 return
         df = self._parse_text_simple(text)
         self._store_dataframe(df, base_name)
-    def _smart_upload_txt(self, path: Path, base_name: str):
-        """Parse TXT and create multiple tables."""
-        print("   📝 Reading TXT file...")
+    def _smart_upload_txt(self, path: Path, base_name: str, extract_entities: Optional[List[str]] = None):
+        """Parse TXT with proper structure."""
+        print("Reading TXT...")
         with open(path, 'r', encoding='utf-8') as file:
             text = file.read()
         if self.client:
-            print("   🧠 AI: Analyzing and extracting structured entities...")
-            tables = self._create_tables_with_ai(text, base_name)
+            print("AI: Analyzing...")
+            tables = self._create_tables_with_ai(text, base_name, extract_entities)
             if tables and len(tables) > 0:
-                print(f"\n✅ Created {len(tables)} structured tables:")
+                print(f"\nCreated {len(tables)} tables:")
                 for tbl_name in tables:
                     count = pd.read_sql_query(f"SELECT COUNT(*) FROM {tbl_name}", self.conn).iloc[0, 0]
                     cols = len(self.schema_info.get(tbl_name, {}))
-                    print(f"   📊 {tbl_name}: {count} rows, {cols} columns")
+                    print(f"  {tbl_name}: {count} rows, {cols} columns")
                 return
         df = self._parse_text_simple(text)
         self._store_dataframe(df, base_name)
-    def _create_tables_with_ai(self, text: str, base_name: str) -> List[str]:
-        """Use AI to extract entities and create multiple tables."""
+    def _create_tables_with_ai(self, text: str, base_name: str, custom_entities: Optional[List[str]] = None) -> List[str]:
+        """
+        AI extracts ALL entities with PROPER primary and foreign keys.
+        CRITICAL: Each entity gets UNIQUE IDs, foreign keys properly link tables.
+        """
         if not self.client:
             return []
         try:
-            extraction_prompt = f"""Extract ALL structured data from this text into separate entity tables.
+            if custom_entities:
+                entity_instruction = f"""Extract these specific entities: {', '.join(custom_entities)}
+For each entity type, create a proper table with unique IDs."""
+            else:
+                entity_instruction = """Automatically identify and extract ALL structured entities.
+Common entities (extract ALL you find):
+- people: Personal information (id, name, email, phone, address, city, state, zip)
+- skills: Individual skills (id, person_id, skill_name, proficiency_level, years_experience)
+- technologies: Technologies/tools (id, person_id, technology_name, category, proficiency)
+- projects: Projects (id, person_id, project_name, description, start_date, end_date)
+- certifications: Certifications (id, person_id, cert_name, issuer, date_obtained)
+- education: Education records (id, person_id, degree, institution, graduation_year)
+- work_experience: Work history (id, person_id, company, title, start_date, end_date)
+- events: Events/meetings (id, host_id, description, location, date, attendee_ids)
+- organizations: Companies/departments (id, name, address, city, industry)
+- products: Products/services (id, name, description, price, category)
+- ANY other structured entities you identify
+Extract EVERYTHING you find in the text."""
+            extraction_prompt = f"""Analyze this text and extract ALL structured data into proper relational database tables.
 Text:
-{text[:4000]}
+{text[:6000]}
+{entity_instruction}
+CRITICAL REQUIREMENTS FOR PROPER DATABASE DESIGN:
+1. PRIMARY KEYS:
+   - Each table MUST have unique sequential IDs starting from 1
+   - Person 1 gets id=1, Person 2 gets id=2, etc.
+   - NO DUPLICATE IDs within same table
+   - IDs must be integers
+2. FOREIGN KEYS:
+   - Use foreign keys to link related tables
+   - Example: skills table has person_id that references people.id
+   - Example: projects table has person_id that references people.id
+   - Foreign keys MUST match existing primary keys
-Create these types of tables (if data exists):
-1. people - (id, name, address, city, state, zip, email, phone)
-2. contacts - (id, person_id, contact_type, value)
-3. events - (id, host_id, description, location, city)
-4. organizations - (id, name, address, city)
-5. Any other entities you find
+3. TABLE STRUCTURE:
+   - Each entity type gets its own table
+   - Use clear table names (people, skills, technologies, not table1, table2)
+   - Include ALL relevant attributes for each entity
-Return a JSON object with this EXACT structure:
+Return JSON with this EXACT structure:
 {{
   "people": [
-    {{"id": 1, "name": "John Doe", "address": "123 Main St", "city": "Dallas", "email": "john@email.com", "phone": "555-1234"}},
+    {{"id": 1, "name": "John Doe", "email": "john@email.com", "phone": "+1-555-0100", "city": "Dallas", "state": "TX"}},
+    {{"id": 2, "name": "Jane Smith", "email": "jane@email.com", "phone": "+1-555-0101", "city": "New York", "state": "NY"}},
     ...
   ],
-  "contacts": [
-    {{"id": 1, "person_id": 1, "email": "john@email.com", "phone": "555-1234"}},
+  "skills": [
+    {{"id": 1, "person_id": 1, "skill_name": "Python", "proficiency": "Expert", "years": 5}},
+    {{"id": 2, "person_id": 1, "skill_name": "SQL", "proficiency": "Advanced", "years": 3}},
+    {{"id": 3, "person_id": 2, "skill_name": "Java", "proficiency": "Expert", "years": 7}},
     ...
   ],
-  "events": [
-    {{"id": 1, "host_id": 1, "description": "Team meeting", "city": "Dallas"}},
+  "technologies": [
+    {{"id": 1, "person_id": 1, "technology": "React", "category": "Frontend"}},
+    {{"id": 2, "person_id": 1, "technology": "PostgreSQL", "category": "Database"}},
+    {{"id": 3, "person_id": 2, "technology": "Spring Boot", "category": "Backend"}},
+    ...
+  ],
+  "projects": [
+    {{"id": 1, "person_id": 1, "project_name": "E-commerce Platform", "role": "Lead Developer"}},
+    {{"id": 2, "person_id": 2, "project_name": "Analytics Dashboard", "role": "Backend Engineer"}},
     ...
   ]
 }}
 IMPORTANT:
-- Extract ALL people, contacts, events, organizations you find
-- Use consistent column names
+- Extract EVERY structured piece of data you find
+- Assign UNIQUE sequential IDs (1, 2, 3, ...) for each table
+- Foreign keys MUST reference valid primary keys
+- Create as many tables as needed (don't limit yourself)
 - Return ONLY valid JSON, no explanations
-- If a table type has no data, omit it from JSON"""
+- Be COMPREHENSIVE - extract skills, technologies, projects, certifications, education, work history, etc."""
             response = self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
-                    {"role": "system", "content": "You are a data extraction expert. Extract ALL entities from text into structured JSON tables. Return only valid JSON."},
+                    {"role": "system", "content": "You are a database design expert. Extract ALL entities with proper primary keys (unique sequential IDs) and foreign keys (referencing valid IDs). Be comprehensive and extract EVERYTHING. Return only valid JSON."},
                     {"role": "user", "content": extraction_prompt}
                 ],
-                temperature=0
+                temperature=0,
+                max_tokens=4096
             )
             json_text = response.choices[0].message.content.strip()
@@ -485,7 +464,6 @@ IMPORTANT:
             created_tables = []
-            # Create tables from extracted data
             for entity_type, records in extracted_data.items():
                 if records and isinstance(records, list) and len(records) > 0:
                     table_name = f"{base_name}_{entity_type}"
@@ -495,24 +473,23 @@ IMPORTANT:
                         if not df.empty:
                             self._store_dataframe(df, table_name, silent=True)
                             created_tables.append(table_name)
-                            print(f"      ✅ {entity_type}: {len(df)} records")
+                            print(f"  {entity_type}: {len(df)} records")
                     except Exception as e:
-                        print(f"      ⚠️  Failed to create {entity_type}: {e}")
+                        print(f"  Failed {entity_type}: {e}")
             return created_tables
         except Exception as e:
-            print(f"   ⚠️  AI extraction error: {e}")
+            print(f"AI extraction error: {e}")
             return []
     def _parse_text_simple(self, text: str) -> pd.DataFrame:
-        """Simple text to DataFrame (fallback)."""
+        """Fallback text parsing."""
         lines = [line.strip() for line in text.split('\n') if line.strip()]
         if not lines:
             return pd.DataFrame({'content': ['No content']})
-        # Try to detect if it's tabular
         sample = lines[:min(10, len(lines))]
         for delimiter in ['\t', ',', '|', ';']:
             if all(delimiter in line for line in sample):
@@ -529,98 +506,63 @@ IMPORTANT:
         })
     def _store_dataframe(self, df: pd.DataFrame, name: str, silent: bool = False):
-        """Store DataFrame in database."""
+        """Store DataFrame."""
         df.columns = [str(c).strip().replace(" ", "_").replace("-", "_") for c in df.columns]
         df.to_sql(name, self.conn, if_exists='replace', index=False)
         self.current_table = name
         self._refresh_schema()
         if not silent:
-            print(f"✅ Uploaded to table: {name}")
-            print(f"   📊 {len(df)} rows × {len(df.columns)} columns")
-            print(f"   🔤 Columns: {', '.join(df.columns[:10].tolist())}{' ...' if len(df.columns) > 10 else ''}")
+            print(f"Uploaded: {name}")
+            print(f"  {len(df)} rows, {len(df.columns)} columns")
-    # ========================================================================
-    # NEW: FLEXIBLE QUERY WITH CUSTOM VIZ AND RELEVANCE CHECK
-    # ========================================================================
-    def ask(self, question: str,
-            viz: Union[bool, str] = False,
-            table: Optional[str] = None) -> 'QueryResult':
-        """
-        Ask question with FLEXIBLE visualization options.
-        Args:
-            question: Natural language question
-            viz: Visualization type:
-                - False: No visualization
-                - True: Auto-detect best chart
-                - "pie": Pie chart
-                - "bar": Bar chart
-                - "line": Line chart
-                - "scatter": Scatter plot
-                - "table": Table view
-                - "heatmap": Heatmap
-            table: Specific table to query (optional)
-        Examples:
-            result = sutra.ask("How many people in each city?")
-            result = sutra.ask("Show sales by month", viz="line")
-            result = sutra.ask("Revenue breakdown", viz="pie")
-            result = sutra.ask("Compare metrics", viz="bar")
-        """
+    def ask(self, question: str, viz: Union[bool, str] = False, table: Optional[str] = None) -> 'QueryResult':
+        """Query with natural language."""
         if not self.client:
-            print("❌ No API key configured")
+            print("No API key")
             return QueryResult(False, "", pd.DataFrame(), None, "No API key")
-        print(f"\n🔍 Question: {question}")
+        print(f"\nQuestion: {question}")
-        # NEW: Check relevance if enabled
         if self.check_relevance:
             if not self._is_relevant_query(question):
-                print("⚠️  This question seems irrelevant to your database")
-                print("   Database contains tables about:", ", ".join(self._get_table_names()[:5]))
-                choice = input("   Continue anyway? (yes/no): ").strip().lower()
-                if choice not in ['yes', 'y', 'yeah', 'yep', 'sure']:
-                    return QueryResult(False, "", pd.DataFrame(), None, "Query not relevant to database")
+                print("Warning: Query may be irrelevant")
+                choice = input("Continue? (yes/no): ").strip().lower()
+                if choice not in ['yes', 'y']:
+                    return QueryResult(False, "", pd.DataFrame(), None, "Irrelevant")
-        # Select table
         tbl = table or self.current_table
         if not tbl:
             all_tables = self._get_table_names()
             if all_tables:
                 tbl = all_tables[0]
             else:
-                print("❌ No tables found")
+                print("No tables found")
                 return QueryResult(False, "", pd.DataFrame(), None, "No table")
-        # NEW: Check embeddings cache if enabled
         if self.use_embeddings and self.embedding_model:
             cached_result = self._check_embedding_cache(question, tbl)
             if cached_result:
-                print("   ⚡ Using cached result from similar query")
+                print("  Using cached result")
                 return cached_result
-        # NEW: Apply fuzzy matching to question if enabled
         if self.fuzzy_match:
             question = self._apply_fuzzy_matching(question, tbl)
-        # Check simple cache
         cache_key = hashlib.md5(f"{question}:{tbl}".encode()).hexdigest()
-        if self.cache_queries and cache_key in self.cache:
+        if self.cache_queries and self.cache and cache_key in self.cache:
             sql_query = self.cache[cache_key]
-            print("   💾 From cache")
+            print("  From cache")
         else:
             sql_query = self._generate_sql(question, tbl)
-            if self.cache_queries:
+            if self.cache_queries and self.cache is not None:
                 self.cache[cache_key] = sql_query
-        print(f"   📝 SQL: {sql_query}")
+        print(f"SQL: {sql_query}")
         try:
             df = pd.read_sql_query(sql_query, self.conn)
-            print(f"✅ Success! {len(df)} rows")
+            print(f"Success! {len(df)} rows")
             fig = None
             if viz:
@@ -629,30 +571,24 @@ IMPORTANT:
             result = QueryResult(True, sql_query, df, fig)
-            # Store in embedding cache if enabled
             if self.use_embeddings and self.embedding_model:
                 self._store_in_embedding_cache(question, tbl, result)
             return result
         except Exception as e:
-            print(f"❌ Error: {e}")
+            print(f"Error: {e}")
             return QueryResult(False, sql_query, pd.DataFrame(), None, str(e))
-    # ========================================================================
-    # NEW: RELEVANCE CHECK
-    # ========================================================================
     def _is_relevant_query(self, question: str) -> bool:
-        """Check if query is relevant to database (OPTIONAL feature)."""
+        """Check relevance."""
         if not self.client:
             return True
-        # Get database context
         tables = self._get_table_names()
         columns = []
-        for tbl in tables[:3]:  # First 3 tables
+        for tbl in tables[:3]:
             cols = list(self.schema_info.get(tbl, {}).keys())
-            columns.extend(cols[:5])  # First 5 columns
+            columns.extend(cols[:5])
         db_context = f"Tables: {', '.join(tables[:5])}. Columns: {', '.join(columns[:15])}"
@@ -660,31 +596,22 @@ IMPORTANT:
             response = self.client.chat.completions.create(
                 model="gpt-4o-mini",
                 messages=[
-                    {"role": "system", "content": "You are a query relevance checker. Return only 'yes' or 'no'."},
-                    {"role": "user", "content": f"Is this question relevant to a database with {db_context}?\n\nQuestion: {question}\n\nAnswer only 'yes' or 'no':"}
+                    {"role": "system", "content": "Relevance checker. Return only 'yes' or 'no'."},
+                    {"role": "user", "content": f"Is this relevant to database with {db_context}?\n\nQuestion: {question}\n\nyes or no:"}
                 ],
                 temperature=0,
                 max_tokens=5
             )
-            answer = response.choices[0].message.content.strip().lower()
-            return 'yes' in answer
+            return 'yes' in response.choices[0].message.content.strip().lower()
         except:
-            return True  # If check fails, allow query
-    # ========================================================================
-    # NEW: FUZZY MATCHING FOR BETTER NLP
-    # ========================================================================
+            return True
     def _apply_fuzzy_matching(self, question: str, table: str) -> str:
-        """
-        Apply fuzzy matching to improve NLP understanding.
-        Example: "New York City" → finds "New York" in database
-        """
+        """Fuzzy match query terms."""
         if not self.schema_info.get(table):
             return question
-        # Get all unique values from string columns
         try:
             string_cols = [col for col, dtype in self.schema_info[table].items()
                           if 'TEXT' in dtype or 'VARCHAR' in dtype]
@@ -692,41 +619,32 @@ IMPORTANT:
             if not string_cols:
                 return question
-            # Get unique values from first string column (usually city, name, etc.)
-            col = string_cols[0]
-            df = pd.read_sql_query(f"SELECT DISTINCT {col} FROM {table} LIMIT 100", self.conn)
-            unique_values = df[col].dropna().tolist()
-            # Find words in question that might match database values
-            words_in_question = question.split()
-            for i, word in enumerate(words_in_question):
-                # Try to find close matches
-                matches = get_close_matches(word, unique_values, n=1, cutoff=0.6)
-                if matches:
-                    # Replace with closest match
-                    words_in_question[i] = matches[0]
-                    print(f"   🔍 Fuzzy match: '{word}' → '{matches[0]}'")
+            for col in string_cols[:2]:
+                df = pd.read_sql_query(f"SELECT DISTINCT {col} FROM {table} LIMIT 100", self.conn)
+                unique_values = [str(v) for v in df[col].dropna().tolist()]
+                words = question.split()
+                for i, word in enumerate(words):
+                    matches = get_close_matches(word, unique_values, n=1, cutoff=0.6)
+                    if matches and word != matches[0]:
+                        words[i] = matches[0]
+                        print(f"  Fuzzy: '{word}' -> '{matches[0]}'")
+                question = " ".join(words)
-            return " ".join(words_in_question)
+            return question
         except:
             return question
-    # ========================================================================
-    # NEW: EMBEDDING-BASED CACHE
-    # ========================================================================
     def _check_embedding_cache(self, question: str, table: str) -> Optional['QueryResult']:
-        """Check if similar query exists in cache using embeddings."""
+        """Check embedding cache."""
         if not self.query_embeddings:
             return None
-        # Get embedding for current question
         q_embedding = self.embedding_model.encode([question])[0]
-        # Find most similar cached query
         best_match = None
-        best_similarity = 0.85  # Threshold
+        best_similarity = 0.85
         for cached_q, cached_data in self.query_embeddings.items():
             if cached_data['table'] != table:
@@ -741,39 +659,27 @@ IMPORTANT:
                 best_match = cached_q
         if best_match:
-            print(f"   🎯 Found similar query ({best_similarity:.1%} match): '{best_match}'")
+            print(f"  Similar query ({best_similarity:.0%}): '{best_match}'")
             return self.query_embeddings[best_match]['result']
         return None
     def _store_in_embedding_cache(self, question: str, table: str, result: 'QueryResult'):
-        """Store query result in embedding cache."""
+        """Store in cache."""
         q_embedding = self.embedding_model.encode([question])[0]
         self.query_embeddings[question] = {
             'table': table,
             'embedding': q_embedding,
             'result': result
         }
-    # ========================================================================
-    # NEW: FLEXIBLE VISUALIZATION
-    # ========================================================================
     def _visualize(self, df: pd.DataFrame, title: str, viz_type: str = "auto"):
-        """
-        Create flexible visualization based on user choice.
-        Args:
-            df: Data to visualize
-            title: Chart title
-            viz_type: Type of visualization (auto, pie, bar, line, scatter, table, heatmap)
-        """
+        """Create visualization."""
         if not HAS_PLOTLY and not HAS_MATPLOTLIB:
-            print("⚠️  Install plotly or matplotlib for visualizations")
+            print("Install plotly or matplotlib")
             return None
-        print(f"📊 Creating {viz_type} chart...")
+        print(f"Creating {viz_type} chart...")
         if HAS_PLOTLY:
             return self._plotly_viz(df, title, viz_type)
@@ -781,7 +687,7 @@ IMPORTANT:
             return self._matplotlib_viz(df, title, viz_type)
     def _plotly_viz(self, df: pd.DataFrame, title: str, viz_type: str):
-        """Create Plotly chart with user-specified type."""
+        """Plotly visualization."""
         try:
             numeric = df.select_dtypes(include=[np.number]).columns.tolist()
             categorical = df.select_dtypes(include=['object']).columns.tolist()
@@ -791,57 +697,39 @@ IMPORTANT:
                     header=dict(values=list(df.columns)),
                     cells=dict(values=[df[c] for c in df.columns])
                 )])
             elif viz_type == "pie" and categorical and numeric:
                 fig = px.pie(df, names=categorical[0], values=numeric[0], title=title)
             elif viz_type == "bar" and categorical and numeric:
                 fig = px.bar(df, x=categorical[0], y=numeric[0], title=title)
             elif viz_type == "line" and numeric:
                 fig = px.line(df, y=numeric[0], title=title)
             elif viz_type == "scatter" and len(numeric) >= 2:
                 fig = px.scatter(df, x=numeric[0], y=numeric[1], title=title)
             elif viz_type == "heatmap" and len(numeric) >= 2:
-                # Create correlation heatmap
                 corr = df[numeric].corr()
                 fig = go.Figure(data=go.Heatmap(
-                    z=corr.values,
-                    x=corr.columns,
-                    y=corr.columns,
-                    colorscale='Viridis'
+                    z=corr.values, x=corr.columns, y=corr.columns, colorscale='Viridis'
                 ))
                 fig.update_layout(title=title)
             elif viz_type == "auto":
-                # Auto-detect best chart
                 if categorical and numeric:
-                    if len(df) <= 10:
-                        fig = px.pie(df, names=categorical[0], values=numeric[0], title=title)
-                    else:
-                        fig = px.bar(df, x=categorical[0], y=numeric[0], title=title)
+                    fig = px.pie(df, names=categorical[0], values=numeric[0], title=title) if len(df) <= 10 else px.bar(df, x=categorical[0], y=numeric[0], title=title)
                 elif len(numeric) >= 2:
                     fig = px.line(df, y=numeric[0], title=title)
                 else:
                     fig = px.bar(df, y=df.columns[0], title=title)
             else:
-                # Default to bar
-                if categorical and numeric:
-                    fig = px.bar(df, x=categorical[0], y=numeric[0], title=title)
-                else:
-                    fig = px.bar(df, y=df.columns[0], title=title)
+                fig = px.bar(df, x=categorical[0] if categorical else df.index, y=numeric[0] if numeric else df.columns[0], title=title)
             fig.show()
-            print("✅ Chart displayed")
+            print("Chart displayed")
             return fig
         except Exception as e:
-            print(f"⚠️  Viz error: {e}")
+            print(f"Viz error: {e}")
             return None
     def _matplotlib_viz(self, df: pd.DataFrame, title: str, viz_type: str):
-        """Create Matplotlib chart."""
+        """Matplotlib visualization."""
         try:
             plt.figure(figsize=(10, 6))
             numeric = df.select_dtypes(include=[np.number]).columns
@@ -859,26 +747,22 @@ IMPORTANT:
             plt.title(title)
             plt.tight_layout()
             plt.show()
-            print("✅ Chart displayed")
+            print("Chart displayed")
             return plt.gcf()
         except Exception as e:
-            print(f"⚠️  Viz error: {e}")
+            print(f"Viz error: {e}")
             return None
-    # ========================================================================
-    # VIEW DATABASE
-    # ========================================================================
     def tables(self) -> Dict[str, dict]:
-        """Show all tables with details."""
+        """List all tables."""
         print("\n" + "="*70)
-        print("📋 TABLES IN DATABASE")
+        print("TABLES IN DATABASE")
         print("="*70)
         all_tables = self._get_table_names()
         if not all_tables:
-            print("❌ No tables found")
+            print("No tables found")
             return {}
         result = {}
@@ -887,26 +771,23 @@ IMPORTANT:
             cols = self.schema_info.get(tbl, {})
             col_list = list(cols.keys())
-            marker = "👉" if tbl == self.current_table else "  "
-            print(f"{marker} {i}. Table: {tbl}")
-            print(f"      📊 {count} rows, {len(col_list)} columns")
-            print(f"      🔤 Columns: {', '.join(col_list[:8])}{' ...' if len(col_list) > 8 else ''}")
+            marker = ">" if tbl == self.current_table else " "
+            print(f"{marker} {i}. {tbl}")
+            print(f"     {count} rows, {len(col_list)} columns")
+            print(f"     Columns: {', '.join(col_list[:8])}")
-            result[tbl] = {
-                'rows': count,
-                'columns': col_list
-            }
+            result[tbl] = {'rows': count, 'columns': col_list}
         print("="*70)
         return result
     def schema(self, table: Optional[str] = None) -> dict:
-        """Show detailed schema with data types."""
+        """Show schema."""
         if not self.schema_info:
             self._refresh_schema()
         print("\n" + "="*70)
-        print("📋 DATABASE SCHEMA")
+        print("DATABASE SCHEMA")
         print("="*70)
         tables_to_show = [table] if table else self.schema_info.keys()
@@ -915,12 +796,12 @@ IMPORTANT:
         for tbl in tables_to_show:
             if tbl in self.schema_info:
                 count = pd.read_sql_query(f"SELECT COUNT(*) FROM {tbl}", self.conn).iloc[0, 0]
-                print(f"\n📊 Table: {tbl}")
-                print(f"   Records: {count}")
-                print(f"   Columns:")
+                print(f"\nTable: {tbl}")
+                print(f"Records: {count}")
+                print("Columns:")
                 for col, dtype in self.schema_info[tbl].items():
-                    print(f"     - {col:<30} ({dtype})")
+                    print(f"  - {col:<30} ({dtype})")
                 result[tbl] = {
                     'records': count,
@@ -931,151 +812,126 @@ IMPORTANT:
         return result
     def peek(self, table: Optional[str] = None, n: int = 5) -> pd.DataFrame:
-        """View sample data."""
+        """Preview data."""
         tbl = table or self.current_table
         if not tbl:
-            print("❌ No table specified")
+            print("No table specified")
             return pd.DataFrame()
         df = pd.read_sql_query(f"SELECT * FROM {tbl} LIMIT {n}", self.conn)
-        print(f"\n📊 Sample from '{tbl}' ({n} rows):")
+        print(f"\nSample from '{tbl}' ({n} rows):")
         print(df.to_string(index=False))
         return df
     def info(self):
-        """Show complete database overview."""
+        """Database overview."""
         return self.tables()
-    # ========================================================================
-    # QUERY METHODS
-    # ========================================================================
     def sql(self, query: str, viz: Union[bool, str] = False) -> 'QueryResult':
-        """Execute SQL directly (no API cost)."""
-        print(f"\n⚡ Executing SQL...")
+        """Execute SQL."""
+        print("\nExecuting SQL...")
         try:
             df = pd.read_sql_query(query, self.conn)
-            print(f"✅ Success! {len(df)} rows returned")
+            print(f"Success! {len(df)} rows")
             fig = None
             if viz:
                 viz_type = viz if isinstance(viz, str) else "auto"
-                fig = self._visualize(df, "SQL Query Result", viz_type=viz_type)
+                fig = self._visualize(df, "SQL Result", viz_type=viz_type)
             return QueryResult(True, query, df, fig)
         except Exception as e:
-            print(f"❌ Error: {e}")
+            print(f"Error: {e}")
             return QueryResult(False, query, pd.DataFrame(), None, str(e))
     def interactive(self, question: str) -> 'QueryResult':
-        """Ask with interactive visualization prompt."""
-        print(f"\n🔍 Question: {question}")
-        choice = input("💡 Visualize? (yes/no/pie/bar/line/scatter): ").strip().lower()
-        if choice in ['yes', 'y', 'yeah', 'yep', 'sure']:
-            viz = True
-        elif choice in ['pie', 'bar', 'line', 'scatter', 'table', 'heatmap']:
-            viz = choice
-        else:
-            viz = False
+        """Interactive query."""
+        print(f"\nQuestion: {question}")
+        choice = input("Visualize? (yes/no/pie/bar/line/scatter): ").strip().lower()
+        viz = choice if choice in ['pie', 'bar', 'line', 'scatter', 'table', 'heatmap'] else (True if choice in ['yes', 'y'] else False)
         return self.ask(question, viz=viz)
-    # ========================================================================
-    # DATABASE EXPORT
-    # ========================================================================
     def export_db(self, path: str, format: str = "sqlite"):
-        """Export entire database."""
-        print(f"\n💾 Exporting to {format}...")
-        format = format.lower()
+        """Export database."""
+        print(f"\nExporting to {format}...")
         if format == "sqlite":
             shutil.copy2(self.db_path, path)
-            print(f"✅ Saved to {path}")
         elif format == "sql":
             with open(path, 'w', encoding='utf-8') as f:
                 for line in self.conn.iterdump():
                     f.write(f'{line}\n')
-            print(f"✅ Saved to {path}")
         elif format == "json":
             data = {}
             for table in self._get_table_names():
                 df = pd.read_sql_query(f"SELECT * FROM {table}", self.conn)
                 data[table] = df.to_dict(orient='records')
             with open(path, 'w', encoding='utf-8') as f:
                 json.dump(data, f, indent=2, default=str)
-            print(f"✅ Saved to {path}")
         elif format == "excel":
             with pd.ExcelWriter(path, engine='openpyxl') as writer:
                 for table in self._get_table_names():
                     df = pd.read_sql_query(f"SELECT * FROM {table}", self.conn)
                     df.to_excel(writer, sheet_name=table[:31], index=False)
-            print(f"✅ Saved to {path}")
         else:
-            raise ValueError(f"Unsupported format: {format}")
+            raise ValueError(f"Unsupported: {format}")
+        print(f"Saved to {path}")
         return self
     def save_to_mysql(self, host: str, user: str, password: str, database: str,
                       port: int = 3306, tables: Optional[List[str]] = None):
-        """Save to MySQL (local or cloud)."""
+        """Export to MySQL."""
         try:
             from sqlalchemy import create_engine
         except ImportError:
-            raise ImportError("Run: pip install sqlalchemy mysql-connector-python")
+            raise ImportError("Run: pip install QuerySUTRA[mysql]")
-        print(f"\n🔄 Connecting to MySQL at {host}:{port}...")
+        print(f"\nConnecting to MySQL: {host}:{port}...")
-        connection_string = f"mysql+mysqlconnector://{user}:{password}@{host}:{port}/{database}"
-        engine = create_engine(connection_string)
+        engine = create_engine(f"mysql+mysqlconnector://{user}:{password}@{host}:{port}/{database}")
         tables_to_export = tables or self._get_table_names()
-        print(f"📤 Exporting {len(tables_to_export)} tables...")
+        print(f"Exporting {len(tables_to_export)} tables...")
         for table in tables_to_export:
             df = pd.read_sql_query(f"SELECT * FROM {table}", self.conn)
             df.to_sql(table, engine, if_exists='replace', index=False)
-            print(f"   ✅ {table}: {len(df)} rows")
+            print(f"  {table}: {len(df)} rows")
-        print(f"✅ Complete!")
+        print("Complete!")
         return self
     def save_to_postgres(self, host: str, user: str, password: str, database: str,
                          port: int = 5432, tables: Optional[List[str]] = None):
-        """Save to PostgreSQL (local or cloud)."""
+        """Export to PostgreSQL."""
         try:
             from sqlalchemy import create_engine
         except ImportError:
-            raise ImportError("Run: pip install sqlalchemy psycopg2-binary")
+            raise ImportError("Run: pip install QuerySUTRA[postgres]")
-        print(f"\n🔄 Connecting to PostgreSQL at {host}:{port}...")
+        print(f"\nConnecting to PostgreSQL: {host}:{port}...")
-        connection_string = f"postgresql://{user}:{password}@{host}:{port}/{database}"
-        engine = create_engine(connection_string)
+        engine = create_engine(f"postgresql://{user}:{password}@{host}:{port}/{database}")
         tables_to_export = tables or self._get_table_names()
-        print(f"📤 Exporting {len(tables_to_export)} tables...")
+        print(f"Exporting {len(tables_to_export)} tables...")
         for table in tables_to_export:
             df = pd.read_sql_query(f"SELECT * FROM {table}", self.conn)
             df.to_sql(table, engine, if_exists='replace', index=False)
-            print(f"   ✅ {table}: {len(df)} rows")
+            print(f"  {table}: {len(df)} rows")
-        print(f"✅ Complete!")
+        print("Complete!")
         return self
     def backup(self, backup_path: str = None):
-        """Create complete backup."""
+        """Create backup."""
         if backup_path:
             backup_dir = Path(backup_path)
             backup_dir.mkdir(parents=True, exist_ok=True)
@@ -1084,52 +940,47 @@ IMPORTANT:
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        print(f"\n💾 Creating backup...")
+        print("\nCreating backup...")
-        db_backup = backup_dir / f"sutra_backup_{timestamp}.db"
+        db_backup = backup_dir / f"sutra_{timestamp}.db"
         self.export_db(str(db_backup), format="sqlite")
-        json_backup = backup_dir / f"sutra_data_{timestamp}.json"
+        json_backup = backup_dir / f"sutra_{timestamp}.json"
         self.export_db(str(json_backup), format="json")
-        print(f"\n✅ Backup complete!")
-        print(f"   📁 Database: {db_backup}")
-        print(f"   📊 Data: {json_backup}")
+        print(f"\nBackup complete!")
+        print(f"  Database: {db_backup}")
+        print(f"  Data: {json_backup}")
         return self
-    # ========================================================================
-    # UTILITIES
-    # ========================================================================
     def export(self, data: pd.DataFrame, path: str, format: str = "csv"):
         """Export results."""
-        fmt = format.lower()
-        if fmt == "csv":
+        if format == "csv":
             data.to_csv(path, index=False)
-        elif fmt in ["excel", "xlsx"]:
+        elif format in ["excel", "xlsx"]:
             data.to_excel(path, index=False)
-        elif fmt == "json":
+        elif format == "json":
             data.to_json(path, orient="records", indent=2)
         else:
-            raise ValueError(f"Unknown format: {format}")
+            raise ValueError(f"Unknown: {format}")
-        print(f"✅ Exported to {path}")
+        print(f"Exported to {path}")
         return self
     def close(self):
-        """Close database."""
+        """Close connection."""
         if self.conn:
             self.conn.close()
-            print("✅ Closed")
+            print("Closed")
     def _get_table_names(self) -> List[str]:
-        """Get list of tables."""
+        """Get tables."""
         self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
         return [r[0] for r in self.cursor.fetchall()]
     def _refresh_schema(self):
-        """Refresh schema info."""
+        """Refresh schema."""
         tables = self._get_table_names()
         self.schema_info = {}
@@ -1138,7 +989,7 @@ IMPORTANT:
             self.schema_info[tbl] = {r[1]: r[2] for r in self.cursor.fetchall()}
     def _generate_sql(self, question: str, table: str) -> str:
-        """Generate SQL with OpenAI."""
+        """Generate SQL."""
         schema = self.schema_info.get(table, {})
         sample_df = pd.read_sql_query(f"SELECT * FROM {table} LIMIT 3", self.conn)
         sample = sample_df.to_string(index=False)
@@ -1156,7 +1007,7 @@ Sample:
 Question: {question}
-Return ONLY SQL. No explanations."""
+Return ONLY SQL."""
         response = self.client.chat.completions.create(
             model="gpt-4o-mini",
@@ -1177,7 +1028,6 @@ Return ONLY SQL. No explanations."""
         self.close()
     def __repr__(self):
-        tables = len(self.schema_info)
         features = []
         if self.cache_queries:
             features.append("cache")
@@ -1188,8 +1038,8 @@ Return ONLY SQL. No explanations."""
         if self.fuzzy_match:
             features.append("fuzzy")
-        features_str = f", features=[{', '.join(features)}]" if features else ""
-        return f"SUTRA(tables={tables}, current='{self.current_table}'{features_str})"
+        feat_str = f", {', '.join(features)}" if features else ""
+        return f"SUTRA(tables={len(self.schema_info)}{feat_str})"
 class QueryResult:
@@ -1203,15 +1053,10 @@ class QueryResult:
         self.error = error
     def __repr__(self):
-        if self.success:
-            return f"QueryResult(rows={len(self.data)}, cols={len(self.data.columns)})"
-        return f"QueryResult(error='{self.error}')"
+        return f"QueryResult(rows={len(self.data)}, cols={len(self.data.columns)})" if self.success else f"QueryResult(error='{self.error}')"
     def show(self):
-        if self.success:
-            print(self.data)
-        else:
-            print(f"Error: {self.error}")
+        print(self.data) if self.success else print(f"Error: {self.error}")
         return self
@@ -1224,39 +1069,12 @@ def quick_start(api_key: str, data_path: str, question: str, viz: Union[bool, st
 if __name__ == "__main__":
     print("""
-╔══════════════════════════════════════════════════════════════╗
-║              QuerySUTRA v0.3.0 - ENHANCED                    ║
-║   Structured-Unstructured-Text-Retrieval-Architecture        ║
-╚══════════════════════════════════════════════════════════════╝
+QuerySUTRA v0.3.3 - Professional Data Analysis
+SUTRA: Structured-Unstructured-Text-Retrieval-Architecture
-NEW FEATURES:
-✅ Load existing databases (no re-upload needed!)
-✅ Custom visualizations (pie, bar, line, scatter, table)
-✅ Smart NLP with fuzzy matching (optional)
-✅ Irrelevant query detection (optional)
-✅ Embeddings for caching (optional)
-✅ All features are OPTIONAL - you control everything!
+Fixed: Proper primary and foreign keys with unique IDs
+Features: Load existing DB, custom viz, fuzzy matching, embeddings
 Installation: pip install QuerySUTRA
-Quick Start:
-    from sutra import SUTRA
-    # NEW: Load existing database
-    sutra = SUTRA.load_from_db("sutra.db", api_key="sk-...")
-    # Or create new with options
-    sutra = SUTRA(api_key="sk-...",
-                  use_embeddings=True,      # Smart caching
-                  check_relevance=True,      # Detect irrelevant queries
-                  fuzzy_match=True)          # Better NLP
-    # Upload and query
-    sutra.upload("data.pdf")
-    result = sutra.ask("Show sales by region", viz="pie")
-    # Connect to MySQL/PostgreSQL
-    sutra = SUTRA.connect_mysql("localhost", "root", "pass", "db")
-Supported: CSV, Excel, JSON, SQL, PDF, DOCX, TXT, DataFrame
+Usage: from sutra import SUTRA
 """)

QuerySUTRA 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

QuerySUTRA 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl