PyPI - QuerySUTRA - Versions diffs - 0.5.3__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

QuerySUTRA 0.5.3py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{querysutra-0.5.3.dist-info → querysutra-0.6.0.dist-info}/METADATA +18 -2
querysutra-0.6.0.dist-info/RECORD +22 -0
{querysutra-0.5.3.dist-info → querysutra-0.6.0.dist-info}/WHEEL +1 -1
querysutra-0.6.0.dist-info/top_level.txt +1 -0
sutra/__init__.py +6 -4
sutra/database_manager.py +235 -195
sutra/nlp_processor.py +175 -143
sutra/schema_generator.py +56 -52
sutra/sutra.py +123 -15
querysutra-0.5.3.dist-info/RECORD +0 -28
querysutra-0.5.3.dist-info/top_level.txt +0 -3
tests/__init__.py +0 -0
tests/test_modules.py +0 -0
tests/test_sutra.py +0 -76
utils/__init__.py +0 -0
utils/file_utils.py +0 -0
utils/text_utils.py +0 -0
{querysutra-0.5.3.dist-info → querysutra-0.6.0.dist-info}/licenses/LICENSE +0 -0

{querysutra-0.5.3.dist-info → querysutra-0.6.0.dist-info}/METADATA RENAMED Viewed

@@ -1,9 +1,25 @@
 Metadata-Version: 2.4
 Name: QuerySUTRA
-Version: 0.5.3
-Summary: SUTRA
+Version: 0.6.0
+Summary: AI-powered data analysis for structured and unstructured data. Query PDF, Word, CSV, Excel with natural language.
 Author: Aditya Batta
 License: MIT
+Project-URL: Homepage, https://github.com/adityabatta/QuerySUTRA
+Project-URL: Repository, https://github.com/adityabatta/QuerySUTRA
+Project-URL: Issues, https://github.com/adityabatta/QuerySUTRA/issues
+Keywords: ai,data-analysis,nlp,sql,pdf,openai,natural-language,query,database
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Database
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE

querysutra-0.6.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,22 @@
+querysutra-0.6.0.dist-info/licenses/LICENSE,sha256=F-4b93u0OVrVwGXgMwBRq6MlGyUT9zmre1oh5Gft5Ts,1066
+sutra/__init__.py,sha256=rRSfC1jjMvi8-LpP0P6dpMVx0xC_HnxsgisPr4WPkGM,200
+sutra/cache_manager.py,sha256=e0AAeUqoR-aiqzZ3fB-IDvpJ4JA6-YBFyRJxusEnIrA,3082
+sutra/clear_cache.py,sha256=rVIz29p7V11Uh6oHXeaWpFtYXXv-2OED91cHMAWWxtQ,187
+sutra/core.py,sha256=R_JbOlZTukegP92Dr-WLsdr632_otFN7o9qSvcxyBtw,10497
+sutra/data_loader.py,sha256=_yPj-DS2qYtlCgaMACQtfXZfSuAdVVd4igNP7yzXolc,5781
+sutra/database_manager.py,sha256=usnQTOnfjyFwpcaczG3eF-Pg0snIUeqzHl4rwsd_9rA,9150
+sutra/direct_query.py,sha256=X69I646zHIZlZjMmgn8O2xLS_7ww7miAkABTnJEPAAc,2724
+sutra/feedback.py,sha256=PHSffU_rfORjLkTW3-j2VSjQdw4ufROsTeBWaX6DZ00,1642
+sutra/feedback_matcher.py,sha256=WXYpGtFJnOyYQOzy-z8uBiUWH5vyJJOMS1NwEYzNfic,2865
+sutra/nlp_processor.py,sha256=cvMDvmtf3b2tTbFPItJgF_t541MQqP4SdEXECR1pa0Q,6719
+sutra/schema_embeddings.py,sha256=bVPzpJOdYTyUdG2k3ZdgYJLrX2opHBx68RIjJcMlueo,9732
+sutra/schema_generator.py,sha256=EYEOo7-ljSukTx9Mm2hXhgY-DFCgsaa7RpzDWqVx4K8,2348
+sutra/sutra.py,sha256=73A4HPZVf6jSl5T4ob1vuIbr7CXVnWHP70NfUYbCz-Y,27594
+sutra/sutra_client.py,sha256=PYYDGqVbA9pB-Zcsm52i9KarwijCIGVZOThgONZP6Vs,14203
+sutra/sutra_core.py,sha256=diaWOXUHn1wrqCQrBhLKL612tMQioaqx-ILc3y9-CqM,11708
+sutra/sutra_simple.py,sha256=rnqzG7OAt4p64XtO0peMqHS1pG5tdA8U3EYTMVsq7BE,23201
+sutra/visualizer.py,sha256=YOKTmjQcY72smmx9KsZrQTdbAiE5GQDKofMFjpLIUfI,6996
+querysutra-0.6.0.dist-info/METADATA,sha256=e1PS_Cr8aByv3OYnE2kLxuLTUdsfjSAMNioP5DjYpBk,8252
+querysutra-0.6.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+querysutra-0.6.0.dist-info/top_level.txt,sha256=tqRK7nxuOJvFTkUn-YahGogCSCkk1ZE90Wf3MgT9BDI,6
+querysutra-0.6.0.dist-info/RECORD,,

{querysutra-0.5.3.dist-info → querysutra-0.6.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

querysutra-0.6.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ sutra

sutra/__init__.py CHANGED Viewed

@@ -1,4 +1,6 @@
-"""QuerySUTRA v0.5.2"""
-__version__="0.5.2"
-from sutra.sutra import SUTRA,QueryResult
-__all__=["SUTRA","QueryResult"]
+"""QuerySUTRA v0.6.0 - AI-powered data analysis for structured and unstructured data"""
+__version__ = "0.6.0"
+from .sutra import SUTRA, QueryResult
+__all__ = ["SUTRA", "QueryResult", "__version__"]

sutra/database_manager.py CHANGED Viewed

@@ -1,196 +1,236 @@
-"""Database management for both SQLite and MySQL"""
-import sqlite3
-import pandas as pd
-from pathlib import Path
-from typing import Optional, Tuple, List
-from tabulate import tabulate
-import config
-# Add MySQL support
-try:
-    import mysql.connector
-    MYSQL_AVAILABLE = True
-except ImportError:
-    MYSQL_AVAILABLE = False
-    print("⚠️ MySQL not installed. Run: pip install mysql-connector-python")
-class DatabaseManager:
-    """Manage database operations (SQLite or MySQL)"""
-    """Database management for both SQLite and MySQL"""
-import sqlite3
-import pandas as pd
-from pathlib import Path
-from typing import Optional, Tuple, List
-from tabulate import tabulate
-import config
-# Add MySQL support
-try:
-    import mysql.connector
-    MYSQL_AVAILABLE = True
-except ImportError:
-    MYSQL_AVAILABLE = False
-    print("⚠️ MySQL not installed. Run: pip install mysql-connector-python")
-class DatabaseManager:
-    """Manage database operations (SQLite or MySQL)"""
-    def __init__(self, db_path: str = ':memory:', db_type: str = 'sqlite'):  # FIX: Added indentation
-        self.db_type = db_type.lower()
-        if self.db_type == 'mysql':
-            if not MYSQL_AVAILABLE:
-                print("❌ MySQL not available, falling back to SQLite")
-                self.db_type = 'sqlite'
-            else:
-                # First connect without database to create it if needed
-                try:
-                    conn_temp = mysql.connector.connect(
-                        host=config.MYSQL_HOST,
-                        user=config.MYSQL_USER,
-                        password=config.MYSQL_PASSWORD
-                    )
-                    cursor_temp = conn_temp.cursor()
-                    cursor_temp.execute(f"CREATE DATABASE IF NOT EXISTS {config.MYSQL_DATABASE}")
-                    conn_temp.close()
-                    print(f"✅ Database {config.MYSQL_DATABASE} ready")
-                except Exception as e:
-                    print(f"❌ Could not create database: {e}")
-                # Now connect to the database
-                self.conn = mysql.connector.connect(
-                    host=config.MYSQL_HOST,
-                    user=config.MYSQL_USER,
-                    password=config.MYSQL_PASSWORD,
-                    database=config.MYSQL_DATABASE
-                )
-                self.cursor = self.conn.cursor()
-                print(f"📂 Connected to MySQL: {config.MYSQL_DATABASE}")
-        if self.db_type == 'sqlite':  # FIX: Added this block for SQLite
-            self.conn = sqlite3.connect(db_path)
-            self.cursor = self.conn.cursor()
-            print(f"📂 SQLite {'created in memory' if db_path == ':memory:' else f'connected: {db_path}'}")
-    # Rest of the methods stay the same...
-    def execute_schema(self, schema_sql: str) -> bool:
-        """Execute SQL schema with MySQL compatibility"""
-        try:
-            if self.db_type == 'mysql':
-                # MySQL adjustments
-                schema_sql = schema_sql.replace('INTEGER PRIMARY KEY AUTOINCREMENT',
-                                                'INT PRIMARY KEY AUTO_INCREMENT')
-                schema_sql = schema_sql.replace('TEXT', 'VARCHAR(255)')
-                schema_sql = schema_sql.replace('REAL', 'DECIMAL(10,2)')
-                # Execute statements one by one for MySQL
-                for statement in schema_sql.split(';'):
-                    if statement.strip():
-                        self.cursor.execute(statement)
-                self.conn.commit()
-            else:
-                # SQLite can handle multiple statements
-                self.cursor.executescript(schema_sql)
-                self.conn.commit()
-            print("✅ Schema executed successfully!")
-            return True
-        except Exception as e:
-            print(f"❌ Error executing schema: {e}")
-            return False
-    def execute_query(self, query: str) -> Optional[pd.DataFrame]:
-        """Execute query on either database"""
-        try:
-            df = pd.read_sql_query(query, self.conn)
-            return df
-        except Exception as e:
-            print(f"❌ Query error: {e}")
-            return None
-    def get_tables(self):
-        """Get list of all tables in database"""
-        if self.db_type == 'mysql':
-            cursor = self.conn.cursor()
-            cursor.execute("SHOW TABLES")
-            tables = [table[0] for table in cursor.fetchall()]
-            cursor.close()
-            return tables
-        else:  # sqlite
-            cursor = self.conn.cursor()
-            cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
-            tables = [table[0] for table in cursor.fetchall()]
-            cursor.close()
-            return tables
-    def get_columns(self, table_name):
-        """Get list of columns for a specific table"""
-        if self.db_type == 'mysql':
-            cursor = self.conn.cursor()
-            cursor.execute(f"SHOW COLUMNS FROM `{table_name}`")
-            columns = [col[0] for col in cursor.fetchall()]
-            cursor.close()
-            return columns
-        else:  # sqlite
-            cursor = self.conn.cursor()
-            cursor.execute(f"PRAGMA table_info({table_name})")
-            columns = [col[1] for col in cursor.fetchall()]
-            cursor.close()
-            return columns
-    def get_schema_context(self) -> str:
-        """Get database schema"""
-        if self.db_type == 'mysql':
-            tables = self.get_tables()
-            schema = []
-            for table in tables:
-                self.cursor.execute(f"SHOW CREATE TABLE {table}")
-                schema.append(self.cursor.fetchone()[1])
-            return '\n'.join(schema)
-        else:
-            self.cursor.execute(
-                "SELECT sql FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';"
-            )
-            return '\n'.join([row[0] for row in self.cursor.fetchall()])
-    def display_tables(self):  # FIX: Proper indentation - part of class
-        """Display all tables with their structure and data"""
-        tables = self.get_tables()
-        print(f"\n📋 Created {len(tables)} tables:")
-        for table in tables:
-            print(f"\n  Table: {table}")
-            # Show columns
-            columns = self.get_table_info(table)
-            for col in columns:
-                print(f"    - {col[1]} ({col[2]})")
-            # Show row count
-            count = self.get_row_count(table)
-            print(f"    Records: {count}")
-    def get_table_info(self, table_name: str) -> List[Tuple]:  # FIX: Proper indentation
-        """Get column information for a table"""
-        if self.db_type == 'mysql':
-            self.cursor.execute(f"DESCRIBE {table_name}")
-            return [(i, row[0], row[1]) for i, row in enumerate(self.cursor.fetchall())]
-        else:
-            self.cursor.execute(f"PRAGMA table_info({table_name})")
-            return self.cursor.fetchall()
-    def get_row_count(self, table_name: str) -> int:  # FIX: Proper indentation
-        """Get number of rows in a table"""
-        self.cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
-        return self.cursor.fetchone()[0]
-    def close(self):
-        """Close database connection"""
-        self.conn.close()
-        print("📂 Database connection closed")
+"""Database management for both SQLite and MySQL"""
+import sqlite3
+import pandas as pd
+from pathlib import Path
+from typing import Optional, Tuple, List
+from tabulate import tabulate
+import config
+# Add MySQL support
+try:
+    import mysql.connector
+    MYSQL_AVAILABLE = True
+except ImportError:
+    MYSQL_AVAILABLE = False
+    print("⚠️ MySQL not installed. Run: pip install mysql-connector-python")
+class DatabaseManager:
+    """Manage database operations (SQLite or MySQL)"""
+    """Database management for both SQLite and MySQL"""
+import sqlite3
+import pandas as pd
+from pathlib import Path
+from typing import Optional, Tuple, List
+from tabulate import tabulate
+import config
+# Add MySQL support
+try:
+    import mysql.connector
+    MYSQL_AVAILABLE = True
+except ImportError:
+    MYSQL_AVAILABLE = False
+    print("⚠️ MySQL not installed. Run: pip install mysql-connector-python")
+class DatabaseManager:
+    """Manage database operations (SQLite or MySQL)"""
+    def __init__(self, db_path: str = ':memory:', db_type: str = 'sqlite'):  # FIX: Added indentation
+        self.db_type = db_type.lower()
+        if self.db_type == 'mysql':
+            if not MYSQL_AVAILABLE:
+                print("❌ MySQL not available, falling back to SQLite")
+                self.db_type = 'sqlite'
+            else:
+                # First connect without database to create it if needed
+                try:
+                    conn_temp = mysql.connector.connect(
+                        host=config.MYSQL_HOST,
+                        user=config.MYSQL_USER,
+                        password=config.MYSQL_PASSWORD
+                    )
+                    cursor_temp = conn_temp.cursor()
+                    cursor_temp.execute(f"CREATE DATABASE IF NOT EXISTS {config.MYSQL_DATABASE}")
+                    conn_temp.close()
+                    print(f"✅ Database {config.MYSQL_DATABASE} ready")
+                except Exception as e:
+                    print(f"❌ Could not create database: {e}")
+                # Now connect to the database
+                self.conn = mysql.connector.connect(
+                    host=config.MYSQL_HOST,
+                    user=config.MYSQL_USER,
+                    password=config.MYSQL_PASSWORD,
+                    database=config.MYSQL_DATABASE
+                )
+                self.cursor = self.conn.cursor()
+                print(f"📂 Connected to MySQL: {config.MYSQL_DATABASE}")
+        if self.db_type == 'sqlite':  # FIX: Added this block for SQLite
+            self.conn = sqlite3.connect(db_path)
+            self.cursor = self.conn.cursor()
+            print(f"📂 SQLite {'created in memory' if db_path == ':memory:' else f'connected: {db_path}'}")
+    # Rest of the methods stay the same...
+    def execute_schema(self, schema_sql: str) -> bool:
+        """Execute SQL schema with MySQL compatibility"""
+        try:
+            if self.db_type == 'mysql':
+                # MySQL adjustments
+                schema_sql = schema_sql.replace('INTEGER PRIMARY KEY AUTOINCREMENT',
+                                                'INT PRIMARY KEY AUTO_INCREMENT')
+                schema_sql = schema_sql.replace('TEXT', 'VARCHAR(255)')
+                schema_sql = schema_sql.replace('REAL', 'DECIMAL(10,2)')
+                # Execute statements one by one for MySQL
+                for statement in schema_sql.split(';'):
+                    if statement.strip():
+                        self.cursor.execute(statement)
+                self.conn.commit()
+            else:
+                # SQLite can handle multiple statements
+                self.cursor.executescript(schema_sql)
+                self.conn.commit()
+            print("✅ Schema executed successfully!")
+            return True
+        except Exception as e:
+            print(f"❌ Error executing schema: {e}")
+            return False
+    def execute_query(self, query: str) -> Optional[pd.DataFrame]:
+        """Execute query on either database"""
+        try:
+            df = pd.read_sql_query(query, self.conn)
+            return df
+        except Exception as e:
+            print(f"❌ Query error: {e}")
+            return None
+    def get_tables(self):
+        """Get list of all tables in database"""
+        if self.db_type == 'mysql':
+            cursor = self.conn.cursor()
+            cursor.execute("SHOW TABLES")
+            tables = [table[0] for table in cursor.fetchall()]
+            cursor.close()
+            return tables
+        else:  # sqlite
+            cursor = self.conn.cursor()
+            cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
+            tables = [table[0] for table in cursor.fetchall()]
+            cursor.close()
+            return tables
+    def get_columns(self, table_name):
+        """Get list of columns for a specific table"""
+        if self.db_type == 'mysql':
+            cursor = self.conn.cursor()
+            cursor.execute(f"SHOW COLUMNS FROM `{table_name}`")
+            columns = [col[0] for col in cursor.fetchall()]
+            cursor.close()
+            return columns
+        else:  # sqlite
+            cursor = self.conn.cursor()
+            cursor.execute(f"PRAGMA table_info({table_name})")
+            columns = [col[1] for col in cursor.fetchall()]
+            cursor.close()
+            return columns
+    def get_schema_context(self) -> str:
+        """Get database schema with relationship information"""
+        if self.db_type == 'mysql':
+            tables = self.get_tables()
+            schema = []
+            for table in tables:
+                self.cursor.execute(f"SHOW CREATE TABLE {table}")
+                schema.append(self.cursor.fetchone()[1])
+            schema_text = '\n'.join(schema)
+        else:
+            self.cursor.execute(
+                "SELECT sql FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';"
+            )
+            schema_text = '\n'.join([row[0] for row in self.cursor.fetchall()])
+        # Add relationship summary at the beginning
+        relationships = self._extract_relationships(schema_text)
+        if relationships:
+            relationship_summary = "\n=== TABLE RELATIONSHIPS ===\n" + "\n".join(relationships) + "\n\n=== FULL SCHEMA ===\n"
+            return relationship_summary + schema_text
+        return schema_text
+    def _extract_relationships(self, schema_text: str) -> List[str]:
+        """Extract and format foreign key relationships from schema"""
+        import re
+        relationships = []
+        # Pattern to match FOREIGN KEY statements
+        fk_pattern = r'FOREIGN KEY\s*\(([^)]+)\)\s*REFERENCES\s+(\w+)\s*\(([^)]+)\)'
+        # Split schema into individual table definitions
+        tables = schema_text.split('CREATE TABLE')
+        for table_def in tables:
+            if not table_def.strip():
+                continue
+            # Extract table name
+            table_match = re.search(r'[`"]?(\w+)[`"]?', table_def)
+            if not table_match:
+                continue
+            table_name = table_match.group(1)
+            # Find all foreign keys in this table
+            for match in re.finditer(fk_pattern, table_def, re.IGNORECASE):
+                fk_column = match.group(1).strip('`" ')
+                ref_table = match.group(2).strip('`" ')
+                ref_column = match.group(3).strip('`" ')
+                relationships.append(
+                    f"  {table_name}.{fk_column} → {ref_table}.{ref_column}"
+                )
+        return relationships
+    def display_tables(self):  # FIX: Proper indentation - part of class
+        """Display all tables with their structure and data"""
+        tables = self.get_tables()
+        print(f"\n📋 Created {len(tables)} tables:")
+        for table in tables:
+            print(f"\n  Table: {table}")
+            # Show columns
+            columns = self.get_table_info(table)
+            for col in columns:
+                print(f"    - {col[1]} ({col[2]})")
+            # Show row count
+            count = self.get_row_count(table)
+            print(f"    Records: {count}")
+    def get_table_info(self, table_name: str) -> List[Tuple]:  # FIX: Proper indentation
+        """Get column information for a table"""
+        if self.db_type == 'mysql':
+            self.cursor.execute(f"DESCRIBE {table_name}")
+            return [(i, row[0], row[1]) for i, row in enumerate(self.cursor.fetchall())]
+        else:
+            self.cursor.execute(f"PRAGMA table_info({table_name})")
+            return self.cursor.fetchall()
+    def get_row_count(self, table_name: str) -> int:  # FIX: Proper indentation
+        """Get number of rows in a table"""
+        self.cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
+        return self.cursor.fetchone()[0]
+    def close(self):
+        """Close database connection"""
+        self.conn.close()
+        print("📂 Database connection closed")

sutra/nlp_processor.py CHANGED Viewed

@@ -1,144 +1,176 @@
-"""NLP to SQL query processor with relevancy checking"""
-import pandas as pd
-from typing import Optional, Tuple
-from tabulate import tabulate
-from sutra.cache_manager import CacheManager
-import openai
-import config
-from sutra.feedback import SimpleFeedback
-from sutra.schema_embeddings import SchemaEmbeddings
-from sutra.feedback_matcher import FeedbackMatcher
-class NLPProcessor:
-    """Process natural language questions to SQL queries"""
-    def __init__(self, db_manager, openai_client=None):
-        self.db = db_manager
-        self.cache = CacheManager() if config.CACHE_ENABLED else None
-        self.model_name = config.MODEL_NAME
-        # Set the API key directly for openai 0.28.1
-        openai.api_key = config.OPENAI_API_KEY
-        # Added for feedback handling and tracking
-        self.feedback = SimpleFeedback()
-        self.last_question = None
-        self.last_sql = None
-        # ✅ NEW: Auto-load schema embeddings
-        self.relevancy_checker = SchemaEmbeddings(db_manager)
-        # ✅ NEW: Smart feedback matcher
-        self.feedback_matcher = FeedbackMatcher()
-    def nlp_to_sql(self, question: str) -> str:
-        """Convert natural language question to SQL"""
-        # ✅ NEW: Check feedback for similar queries first
-        similar_sql, similarity = self.feedback_matcher.find_similar_query(question)
-        if similar_sql:
-            print(f"🎯 Found similar query in feedback (similarity: {similarity:.2f})")
-            return similar_sql
-        # Check cache next
-        if self.cache:
-            cached_sql = self.cache.get_cached_query(question)
-            if cached_sql:
-                print("⚡ Using cached query")
-                return cached_sql
-        # Only call API if no feedback match and no cache
-        print("🤖 Calling OpenAI API...")
-        # Get schema context
-        schema = self.db.get_schema_context()
-        prompt = f"""
-Convert this question to a SQLite query:
-Question: {question}
-Database schema:
-{schema}
-Return ONLY the SELECT statement. No explanations, no markdown.
-"""
-        # Use openai.ChatCompletion directly for version 0.28.1
-        response = openai.ChatCompletion.create(
-            model=self.model_name,
-            messages=[{"role": "user", "content": prompt}],
-            temperature=0
-        )
-        sql_query = response['choices'][0]['message']['content'].strip()
-        sql_query = sql_query.replace('```sql', '').replace('```', '').strip()
-        # Cache the result
-        if self.cache:
-            self.cache.add_to_cache(question, sql_query)
-        return sql_query
-    def process_question(self, question: str) -> Tuple[Optional[pd.DataFrame], str]:
-        """Process a natural language question and return results"""
-        # ✅ NEW: Check relevancy FIRST - BEFORE any API calls
-        is_relevant, similarity, info = self.relevancy_checker.is_relevant(question)
-        if not is_relevant:
-            print(f"\n❌ Question not relevant to database (similarity: {similarity:.2f})")
-            for item in info:
-                print(f"   {item}")
-            return None, ""
-        print(f"✅ Relevant question (similarity: {similarity:.2f})")
-        try:
-            # Convert to SQL (only if relevant)
-            sql_query = self.nlp_to_sql(question)
-            print(f"\n🔍 Generated SQL Query:")
-            print(f"   {sql_query}")
-            # Track for feedback
-            self.last_question = question
-            self.last_sql = sql_query
-            # Execute query
-            result_df = self.db.execute_query(sql_query)
-            return result_df, sql_query
-        except Exception as e:
-            print(f"❌ Error processing question: {e}")
-            return None, ""
-    def display_results(self, df: pd.DataFrame, max_rows: int = 15):
-        """Display query results in a formatted table"""
-        if df is None or df.empty:
-            print("   No results found")
-            return  # Exit early if no results
-        # Show the table
-        display_df = df.head(max_rows) if len(df) > max_rows else df
-        print(tabulate(display_df, headers='keys', tablefmt='grid', showindex=False))
-        if len(df) > max_rows:
-            print(f"   ... showing first {max_rows} of {len(df)} rows")
-        # ✅ UPDATED: Only ask for feedback for relevant questions with results
-        # (Irrelevant questions never reach here due to early return)
-        feedback = input("\n👍 or 👎? (y/n): ").lower()
-        if feedback == 'y':
-            self.feedback.save(self.last_question, self.last_sql, True)
-            print("✅ Saved as good")
-            # Reload feedback matcher with new data
-            self.feedback_matcher.reload_feedback()
-        elif feedback == 'n':
-            correct = input("Correct SQL: ").strip()
-            self.feedback.save(self.last_question, self.last_sql, False, correct)
-            if correct:
-                print("✅ Learned correction")
-                # Reload feedback matcher with new data
+"""NLP to SQL query processor with relevancy checking"""
+import pandas as pd
+from typing import Optional, Tuple
+from tabulate import tabulate
+from sutra.cache_manager import CacheManager
+import openai
+import config
+from sutra.feedback import SimpleFeedback
+from sutra.schema_embeddings import SchemaEmbeddings
+from sutra.feedback_matcher import FeedbackMatcher
+class NLPProcessor:
+    """Process natural language questions to SQL queries"""
+    def __init__(self, db_manager, openai_client=None):
+        self.db = db_manager
+        self.cache = CacheManager() if config.CACHE_ENABLED else None
+        self.model_name = config.MODEL_NAME
+        # Set the API key directly for openai 0.28.1
+        openai.api_key = config.OPENAI_API_KEY
+        # Added for feedback handling and tracking
+        self.feedback = SimpleFeedback()
+        self.last_question = None
+        self.last_sql = None
+        # ✅ NEW: Auto-load schema embeddings
+        self.relevancy_checker = SchemaEmbeddings(db_manager)
+        # ✅ NEW: Smart feedback matcher
+        self.feedback_matcher = FeedbackMatcher()
+    def nlp_to_sql(self, question: str) -> str:
+        """Convert natural language question to SQL"""
+        # ✅ NEW: Check feedback for similar queries first
+        similar_sql, similarity = self.feedback_matcher.find_similar_query(question)
+        if similar_sql:
+            print(f"🎯 Found similar query in feedback (similarity: {similarity:.2f})")
+            return similar_sql
+        # Check cache next
+        if self.cache:
+            cached_sql = self.cache.get_cached_query(question)
+            if cached_sql:
+                print("⚡ Using cached query")
+                return cached_sql
+        # Only call API if no feedback match and no cache
+        print("🤖 Calling OpenAI API...")
+        # Get schema context
+        schema = self.db.get_schema_context()
+        prompt = f"""
+Convert this question to a SQLite query.
+Question: {question}
+Database schema:
+{schema}
+CRITICAL INSTRUCTIONS FOR MULTI-TABLE QUERIES:
+**STEP 1: CHECK TABLE RELATIONSHIPS FIRST**
+Look at the "=== TABLE RELATIONSHIPS ===" section at the top of the schema.
+These show you exactly how tables are connected via foreign keys.
+Format: table1.column → table2.column means table1.column references table2.column
+**STEP 2: IDENTIFY REQUIRED TABLES**
+Analyze which tables contain the data needed to answer the question.
+If information is spread across multiple tables, you MUST join them.
+**STEP 3: USE THE RELATIONSHIPS TO JOIN**
+When you need data from multiple tables:
+- Use the foreign key relationships shown in the TABLE RELATIONSHIPS section
+- Join table1 to table2 using: JOIN table2 ON table1.fk_column = table2.pk_column
+- Use INNER JOIN when both tables must have matching data
+- Use LEFT JOIN when you need all rows from the first table regardless of matches
+**STEP 4: WRITE THE QUERY**
+- Use table aliases (t1, t2, etc.) for readability
+- Qualify all column names with table aliases to avoid ambiguity
+- Include all necessary columns from all joined tables in SELECT
+EXAMPLES:
+❌ WRONG: SELECT name FROM customers WHERE city = 'NYC'
+   (if you need order information too)
+✅ CORRECT: SELECT c.name, o.order_date, o.total
+             FROM customers c
+             JOIN orders o ON c.customer_id = o.customer_id
+             WHERE c.city = 'NYC'
+Return ONLY the executable SELECT statement. No explanations, no markdown, no code blocks.
+"""
+        # Use openai.ChatCompletion directly for version 0.28.1
+        response = openai.ChatCompletion.create(
+            model=self.model_name,
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0
+        )
+        sql_query = response['choices'][0]['message']['content'].strip()
+        sql_query = sql_query.replace('```sql', '').replace('```', '').strip()
+        # Cache the result
+        if self.cache:
+            self.cache.add_to_cache(question, sql_query)
+        return sql_query
+    def process_question(self, question: str) -> Tuple[Optional[pd.DataFrame], str]:
+        """Process a natural language question and return results"""
+        # ✅ NEW: Check relevancy FIRST - BEFORE any API calls
+        is_relevant, similarity, info = self.relevancy_checker.is_relevant(question)
+        if not is_relevant:
+            print(f"\n❌ Question not relevant to database (similarity: {similarity:.2f})")
+            for item in info:
+                print(f"   {item}")
+            return None, ""
+        print(f"✅ Relevant question (similarity: {similarity:.2f})")
+        try:
+            # Convert to SQL (only if relevant)
+            sql_query = self.nlp_to_sql(question)
+            print(f"\n🔍 Generated SQL Query:")
+            print(f"   {sql_query}")
+            # Track for feedback
+            self.last_question = question
+            self.last_sql = sql_query
+            # Execute query
+            result_df = self.db.execute_query(sql_query)
+            return result_df, sql_query
+        except Exception as e:
+            print(f"❌ Error processing question: {e}")
+            return None, ""
+    def display_results(self, df: pd.DataFrame, max_rows: int = 15):
+        """Display query results in a formatted table"""
+        if df is None or df.empty:
+            print("   No results found")
+            return  # Exit early if no results
+        # Show the table
+        display_df = df.head(max_rows) if len(df) > max_rows else df
+        print(tabulate(display_df, headers='keys', tablefmt='grid', showindex=False))
+        if len(df) > max_rows:
+            print(f"   ... showing first {max_rows} of {len(df)} rows")
+        # ✅ UPDATED: Only ask for feedback for relevant questions with results
+        # (Irrelevant questions never reach here due to early return)
+        feedback = input("\n👍 or 👎? (y/n): ").lower()
+        if feedback == 'y':
+            self.feedback.save(self.last_question, self.last_sql, True)
+            print("✅ Saved as good")
+            # Reload feedback matcher with new data
+            self.feedback_matcher.reload_feedback()
+        elif feedback == 'n':
+            correct = input("Correct SQL: ").strip()
+            self.feedback.save(self.last_question, self.last_sql, False, correct)
+            if correct:
+                print("✅ Learned correction")
+                # Reload feedback matcher with new data
                 self.feedback_matcher.reload_feedback()

sutra/schema_generator.py CHANGED Viewed

@@ -1,53 +1,57 @@
-"""SQL schema generation from unstructured text using AI"""
-import openai
-import config
-class SchemaGenerator:
-    """Generate SQL schema from unstructured data using OpenAI"""
-    def __init__(self, api_key: str, model_name: str = "gpt-3.5-turbo"):
-        openai.api_key = api_key
-        self.model_name = model_name
-        self.temperature = config.TEMPERATURE
-    def generate_schema(self, unstructured_data: str) -> str:
-        """Generate SQL schema from unstructured text"""
-        # Truncate if too long
-        if len(unstructured_data) > config.MAX_TEXT_LENGTH:
-            unstructured_data = unstructured_data[:config.MAX_TEXT_LENGTH]
-            print(f"⚠️ Data truncated to {config.MAX_TEXT_LENGTH} characters")
-        prompt = f"""
-Convert this unstructured text into a SQLite database:
-{unstructured_data}
-Requirements:
-1. Create tables based on what entities you find in the text
-2. Add foreign keys to connect related tables
-3. Extract ALL data from the text - don't add anything not in the text
-4. Use INTEGER PRIMARY KEY AUTOINCREMENT for IDs
-Return ONLY executable SQLite statements:
-- DROP TABLE IF EXISTS statements
-- CREATE TABLE statements with PRIMARY KEY and FOREIGN KEY
-- INSERT statements with the actual data from the text
-No markdown, no code blocks, just SQL.
-"""
-        print("🔄 Generating schema via OpenAI API...")
-        response = openai.ChatCompletion.create(
-            model=self.model_name,
-            messages=[{"role": "user", "content": prompt}],
-            temperature=self.temperature
-        )
-        generated_schema = response['choices'][0]['message']['content'].strip()
-        generated_schema = generated_schema.replace('```sql', '').replace('```', '').strip()
-        print("✅ Schema generated!")
+"""SQL schema generation from unstructured text using AI"""
+import openai
+import config
+class SchemaGenerator:
+    """Generate SQL schema from unstructured data using OpenAI"""
+    def __init__(self, api_key: str, model_name: str = "gpt-3.5-turbo"):
+        openai.api_key = api_key
+        self.model_name = model_name
+        self.temperature = config.TEMPERATURE
+    def generate_schema(self, unstructured_data: str) -> str:
+        """Generate SQL schema from unstructured text"""
+        # Truncate if too long
+        if len(unstructured_data) > config.MAX_TEXT_LENGTH:
+            unstructured_data = unstructured_data[:config.MAX_TEXT_LENGTH]
+            print(f"⚠️ Data truncated to {config.MAX_TEXT_LENGTH} characters")
+        prompt = f"""
+Convert this unstructured text into a SQLite database:
+{unstructured_data}
+CRITICAL Requirements:
+1. Identify all entities in the text and create a table for each
+2. **MANDATORY:** Add FOREIGN KEY constraints to connect related tables
+   - If table A references table B, add: FOREIGN KEY (column_name) REFERENCES table_b(id)
+   - Example: If employees belong to departments, employees table must have:
+     department_id INTEGER, FOREIGN KEY (department_id) REFERENCES departments(id)
+3. Extract ALL data from the text - don't add anything not in the text
+4. Use INTEGER PRIMARY KEY AUTOINCREMENT for all ID columns
+5. Ensure parent tables (referenced tables) are created BEFORE child tables
+Return ONLY executable SQLite statements in this order:
+1. DROP TABLE IF EXISTS statements (child tables first, parent tables last)
+2. CREATE TABLE statements (parent tables first, child tables last)
+3. INSERT statements (parent tables first, child tables last)
+No markdown, no code blocks, no explanations - just SQL statements.
+"""
+        print("🔄 Generating schema via OpenAI API...")
+        response = openai.ChatCompletion.create(
+            model=self.model_name,
+            messages=[{"role": "user", "content": prompt}],
+            temperature=self.temperature
+        )
+        generated_schema = response['choices'][0]['message']['content'].strip()
+        generated_schema = generated_schema.replace('```sql', '').replace('```', '').strip()
+        print("✅ Schema generated!")
         return generated_schema

sutra/sutra.py CHANGED Viewed

@@ -1,5 +1,5 @@
-"""QuerySUTRA v0.5.2 - FIXED: Smart table selection"""
-__version__ = "0.5.2"
+"""QuerySUTRA v0.6.0 - AI-powered data analysis for structured and unstructured data"""
+__version__ = "0.6.0"
 __author__ = "Aditya Batta"
 __all__ = ["SUTRA", "QueryResult"]
@@ -72,7 +72,7 @@ class SUTRA:
                 pass
         self._refresh_schema()
-        print(f"QuerySUTRA v0.5.2 Ready")
+        print(f"QuerySUTRA v{__version__} Ready")
     def upload(self, data: Union[str, pd.DataFrame], name: Optional[str] = None) -> 'SUTRA':
         """Upload."""
@@ -136,6 +136,8 @@ class SUTRA:
                         rec['id'] = idx
                     self._store(pd.DataFrame(recs), f"{name}_{etype}")
                     print(f"  {etype}: {len(recs)} rows")
+            # After all tables are created, detect and store foreign key relationships
+            self._create_foreign_keys()
             return
         print("Using regex fallback...")
@@ -264,6 +266,55 @@ JSON:"""
         self._refresh_schema()
         print(f"  {name}: {len(df)} rows")
+    def _create_foreign_keys(self, silent=False):
+        """Detect foreign key relationships between tables by matching column naming patterns.
+        e.g., 'person_id' in work_experience -> 'id' in people table."""
+        tables = self._get_tables()
+        # Build a map of potential parent tables by looking for 'id' columns
+        # e.g., employee_data_people has 'id' -> can be referenced as person_id, people_id
+        parent_map = {}  # Maps potential FK column names -> (parent_table, parent_pk)
+        for t in tables:
+            self.cursor.execute(f"PRAGMA table_info({t})")
+            cols = {r[1]: r[2] for r in self.cursor.fetchall()}
+            if 'id' in cols:
+                # Generate possible FK names from table name
+                # e.g., 'employee_data_people' -> 'person_id', 'people_id'
+                parts = t.split('_')
+                for part in parts:
+                    # singular form guesses
+                    fk_name = f"{part}_id"
+                    parent_map[fk_name] = (t, 'id')
+                    # Handle plural -> singular (people -> person)
+                    if part.endswith('ies'):
+                        parent_map[f"{part[:-3]}y_id"] = (t, 'id')
+                    elif part.endswith('es'):
+                        parent_map[f"{part[:-2]}_id"] = (t, 'id')
+                    elif part.endswith('s'):
+                        parent_map[f"{part[:-1]}_id"] = (t, 'id')
+                # Also try full table name as FK
+                parent_map[f"{t}_id"] = (t, 'id')
+        # Now scan all tables for columns matching FK patterns
+        self.foreign_keys = {}  # table -> [(fk_col, parent_table, parent_col)]
+        for t in tables:
+            self.cursor.execute(f"PRAGMA table_info({t})")
+            cols = [r[1] for r in self.cursor.fetchall()]
+            fks = []
+            for col in cols:
+                if col in parent_map:
+                    parent_table, parent_col = parent_map[col]
+                    if parent_table != t:  # Don't self-reference
+                        fks.append((col, parent_table, parent_col))
+            if fks:
+                self.foreign_keys[t] = fks
+        if self.foreign_keys and not silent:
+            print(f"\n🔗 Detected relationships:")
+            for t, fks in self.foreign_keys.items():
+                for fk_col, parent_table, parent_col in fks:
+                    print(f"   {t}.{fk_col} → {parent_table}.{parent_col}")
     def ask(self, q: str, viz: Union[bool, str] = False, table: Optional[str] = None) -> 'QueryResult':
         """
         Query - FIXED: Considers ALL tables, picks best one or joins multiple.
@@ -273,6 +324,10 @@ JSON:"""
         print(f"\nQuestion: {q}")
+        # Ensure foreign key relationships are detected
+        if not hasattr(self, 'foreign_keys') or not self.foreign_keys:
+            self._create_foreign_keys(silent=True)
         # FIXED: If no table specified, let AI pick the right one(s)
         if not table:
             # Get ALL table schemas
@@ -308,6 +363,24 @@ JSON:"""
             print(f"Error: {e}")
             return QueryResult(False, sql, pd.DataFrame(), None, str(e))
+    def _get_relationship_context(self) -> str:
+        """Build a clear relationship context string for the AI prompt."""
+        if not hasattr(self, 'foreign_keys') or not self.foreign_keys:
+            # Try to detect relationships if not already done
+            self._create_foreign_keys(silent=True)
+        if not hasattr(self, 'foreign_keys') or not self.foreign_keys:
+            return ""
+        lines = ["\n=== TABLE RELATIONSHIPS (FOREIGN KEYS) ==="]
+        lines.append("Use these to JOIN tables when a question needs data from multiple tables:")
+        for t, fks in self.foreign_keys.items():
+            for fk_col, parent_table, parent_col in fks:
+                lines.append(f"  {t}.{fk_col} → {parent_table}.{parent_col}")
+                lines.append(f"    JOIN syntax: JOIN {parent_table} ON {t}.{fk_col} = {parent_table}.{parent_col}")
+        lines.append("=" * 50)
+        return "\n".join(lines)
     def _gen_sql_smart(self, q: str, all_schemas: Dict) -> str:
         """
         FIXED: Generate SQL considering ALL tables and their relationships.
@@ -318,35 +391,70 @@ JSON:"""
             schema_context += f"\n{tbl} ({info['row_count']} rows):\n"
             schema_context += f"  Columns: {', '.join(info['columns'])}\n"
-        # Add sample data from key tables
+        # Add relationship context
+        relationship_context = self._get_relationship_context()
+        # Add sample data from ALL tables (not just first 3)
         samples = ""
-        for tbl in list(all_schemas.keys())[:3]:  # First 3 tables
+        for tbl in list(all_schemas.keys())[:6]:  # Show more tables
             try:
                 sample_df = pd.read_sql_query(f"SELECT * FROM {tbl} LIMIT 2", self.conn)
                 samples += f"\nSample from {tbl}:\n{sample_df.to_string(index=False)}\n"
             except:
                 pass
-        prompt = f"""You are an SQL expert. Generate a query for this question.
+        prompt = f"""You are an expert SQL query generator.
 {schema_context}
+{relationship_context}
 {samples}
 Question: {q}
-Rules:
-1. Use JOIN if question needs data from multiple tables
-2. If asking about "employee" or "person" info, always include employee_data_people table
-3. Use proper foreign key relationships (person_id references people.id)
-4. Return employee names/info when asked "which employee" or "who"
+CRITICAL INSTRUCTIONS - FOLLOW THESE STEPS:
+STEP 1: READ THE TABLE RELATIONSHIPS SECTION ABOVE.
+        Those show you exactly how tables connect via foreign keys.
+STEP 2: IDENTIFY WHICH TABLES HAVE THE DATA NEEDED.
+        - Person info (name, email, city, state) → look in *_people table
+        - Work info (company, position, start_date) → look in *_work_experience table
+        - Skills, education, etc. → look in their respective tables
+STEP 3: IF THE QUESTION NEEDS DATA FROM MULTIPLE TABLES, YOU MUST USE JOIN.
+        Use the foreign key relationships shown above.
+        Example: If work_experience has person_id and people has id:
+        JOIN people ON work_experience.person_id = people.id
+STEP 4: WRITE THE QUERY.
+        - Use table aliases for readability
+        - Qualify ALL column names with table alias to avoid ambiguity
+        - For "who" / "which person" questions, ALWAYS join to the people table to get names
+        - For "from <state>" or "in <city>" questions, the location is in the people table, JOIN to it
+        - For "count by state" or "group by state", the state column is in the people table, JOIN to it
+EXAMPLES:
+❌ WRONG: SELECT COUNT(*) FROM work_experience GROUP BY company
+   (when asked "count by state" - state is NOT in work_experience!)
+✅ CORRECT: SELECT p.state, COUNT(*) as employee_count
+            FROM work_experience w
+            JOIN people p ON w.person_id = p.id
+            GROUP BY p.state
+❌ WRONG: SELECT * FROM work_experience WHERE company LIKE '%FL%'
+   (when asked "how many from FL" - FL is a state, not a company!)
+✅ CORRECT: SELECT COUNT(*) as count
+            FROM people p
+            WHERE p.state = 'FL'
-Return ONLY the SQL query, no explanations:"""
+Return ONLY the executable SQL query. No explanations, no markdown, no code blocks:"""
         r = self.client.chat.completions.create(
             model="gpt-4o-mini",
             messages=[
-                {"role": "system", "content": "SQL expert. Generate queries using proper JOINs. Return only SQL."},
+                {"role": "system", "content": "You are an expert SQL query generator. ALWAYS use JOIN when data is spread across multiple tables. ALWAYS check which table a column belongs to before using it. State, city, name are typically in people tables. Position, company are in work_experience tables. Return ONLY executable SQL."},
                 {"role": "user", "content": prompt}
             ],
             temperature=0
@@ -562,7 +670,7 @@ Return ONLY the SQL query, no explanations:"""
         return [r[0] for r in self.cursor.fetchall()]
     def _refresh_schema(self):
-        """Refresh."""
+        """Refresh schema info."""
         self.schema_info = {}
         for t in self._get_tables():
             self.cursor.execute(f"PRAGMA table_info({t})")

querysutra-0.5.3.dist-info/RECORD DELETED Viewed

@@ -1,28 +0,0 @@
-querysutra-0.5.3.dist-info/licenses/LICENSE,sha256=F-4b93u0OVrVwGXgMwBRq6MlGyUT9zmre1oh5Gft5Ts,1066
-sutra/__init__.py,sha256=25HUMETpmA1tlMl5j-ajdo9MRXljSZBrirSTH7w7jIc,118
-sutra/cache_manager.py,sha256=e0AAeUqoR-aiqzZ3fB-IDvpJ4JA6-YBFyRJxusEnIrA,3082
-sutra/clear_cache.py,sha256=rVIz29p7V11Uh6oHXeaWpFtYXXv-2OED91cHMAWWxtQ,187
-sutra/core.py,sha256=R_JbOlZTukegP92Dr-WLsdr632_otFN7o9qSvcxyBtw,10497
-sutra/data_loader.py,sha256=_yPj-DS2qYtlCgaMACQtfXZfSuAdVVd4igNP7yzXolc,5781
-sutra/database_manager.py,sha256=L-QC_WwR3Pnl1BRh0rnEv5MNSTr4C7ZP-hIPfCHRK88,7672
-sutra/direct_query.py,sha256=X69I646zHIZlZjMmgn8O2xLS_7ww7miAkABTnJEPAAc,2724
-sutra/feedback.py,sha256=PHSffU_rfORjLkTW3-j2VSjQdw4ufROsTeBWaX6DZ00,1642
-sutra/feedback_matcher.py,sha256=WXYpGtFJnOyYQOzy-z8uBiUWH5vyJJOMS1NwEYzNfic,2865
-sutra/nlp_processor.py,sha256=wMS1hz1aGWjSwPUD7lSNBbQapFtLgF2l65j0QKXQOd0,5461
-sutra/schema_embeddings.py,sha256=bVPzpJOdYTyUdG2k3ZdgYJLrX2opHBx68RIjJcMlueo,9732
-sutra/schema_generator.py,sha256=BX_vXmnvSGc6nCBx40WLSoNL3WIYPDahd1cEYloyY4M,1925
-sutra/sutra.py,sha256=61juV3zlMau4UZJ-5IxjaN-Bc1XBP8w2vkYfum-aXlY,21979
-sutra/sutra_client.py,sha256=PYYDGqVbA9pB-Zcsm52i9KarwijCIGVZOThgONZP6Vs,14203
-sutra/sutra_core.py,sha256=diaWOXUHn1wrqCQrBhLKL612tMQioaqx-ILc3y9-CqM,11708
-sutra/sutra_simple.py,sha256=rnqzG7OAt4p64XtO0peMqHS1pG5tdA8U3EYTMVsq7BE,23201
-sutra/visualizer.py,sha256=YOKTmjQcY72smmx9KsZrQTdbAiE5GQDKofMFjpLIUfI,6996
-tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tests/test_modules.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tests/test_sutra.py,sha256=6Z4SoIuBzza101304I7plkyPVkUBbjIxR8uPs9z5ntg,2383
-utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-utils/file_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-utils/text_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-querysutra-0.5.3.dist-info/METADATA,sha256=yFffBSYGfbLrYnXA7OFGHk1mO37fpUV-0iglmHXbAVQ,7258
-querysutra-0.5.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-querysutra-0.5.3.dist-info/top_level.txt,sha256=9v0buw21eo5LaUU_3Cf9b9MqRyEvtM9cHaOuEXUKVqM,18
-querysutra-0.5.3.dist-info/RECORD,,

querysutra-0.5.3.dist-info/top_level.txt DELETED Viewed

@@ -1,3 +0,0 @@
-sutra
-tests
-utils

tests/__init__.py DELETED Viewed

File without changes

tests/test_modules.py DELETED Viewed

File without changes

tests/test_sutra.py DELETED Viewed

@@ -1,76 +0,0 @@
-"""
-Test suite for SUTRA library
-Run with: pytest test_sutra.py
-"""
-import pytest
-import pandas as pd
-import os
-from sutra import SutraClient
-class TestSutraClient:
-    """Test cases for SutraClient"""
-    @pytest.fixture
-    def client(self):
-        """Create a test client"""
-        # Use a test database
-        api_key = os.getenv('OPENAI_API_KEY', 'test-key')
-        client = SutraClient(api_key=api_key, db_path="test_db.db")
-        yield client
-        # Cleanup
-        client.close()
-        if os.path.exists("test_db.db"):
-            os.remove("test_db.db")
-    @pytest.fixture
-    def sample_data(self):
-        """Create sample DataFrame"""
-        return pd.DataFrame({
-            'name': ['Alice', 'Bob', 'Charlie'],
-            'age': [25, 30, 35],
-            'city': ['New York', 'London', 'Paris']
-        })
-    def test_client_initialization(self, client):
-        """Test client can be initialized"""
-        assert client is not None
-        assert client.db_path == "test_db.db"
-    def test_upload_dataframe(self, client, sample_data):
-        """Test uploading a DataFrame"""
-        result = client.upload_dataframe(sample_data, "test_table")
-        assert result['status'] == 'success'
-        assert result['table_name'] == 'test_table'
-        assert result['rows_inserted'] == 3
-    def test_list_tables(self, client, sample_data):
-        """Test listing tables"""
-        client.upload_dataframe(sample_data, "test_table")
-        tables = client.list_tables()
-        assert 'test_table' in tables
-    def test_execute_sql(self, client, sample_data):
-        """Test direct SQL execution"""
-        client.upload_dataframe(sample_data, "test_table")
-        result = client.execute_sql("SELECT * FROM test_table")
-        assert result['status'] == 'success'
-        assert len(result['results']) == 3
-    def test_get_table_info(self, client, sample_data):
-        """Test getting table information"""
-        client.upload_dataframe(sample_data, "test_table")
-        info = client.get_table_info("test_table")
-        assert info['table_name'] == 'test_table'
-        assert len(info['columns']) > 0
-def test_import():
-    """Test that the library can be imported"""
-    from sutra import SutraClient
-    assert SutraClient is not None
-if __name__ == "__main__":
-    pytest.main([__file__, "-v"])

utils/__init__.py DELETED Viewed

File without changes

utils/file_utils.py DELETED Viewed

File without changes

utils/text_utils.py DELETED Viewed

File without changes

{querysutra-0.5.3.dist-info → querysutra-0.6.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

QuerySUTRA 0.5.3__py3-none-any.whl → 0.6.0__py3-none-any.whl

QuerySUTRA 0.5.3py3-none-any.whl → 0.6.0py3-none-any.whl