PyPI - genshotsql - Versions diffs - 0.1.2__tar.gz - Mend

genshotsql 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

genshotsql-0.1.2/PKG-INFO +5 -0
genshotsql-0.1.2/genshotsql.egg-info/PKG-INFO +5 -0
genshotsql-0.1.2/genshotsql.egg-info/SOURCES.txt +8 -0
genshotsql-0.1.2/genshotsql.egg-info/dependency_links.txt +1 -0
genshotsql-0.1.2/genshotsql.egg-info/entry_points.txt +2 -0
genshotsql-0.1.2/genshotsql.egg-info/top_level.txt +1 -0
genshotsql-0.1.2/pyproject.toml +12 -0
genshotsql-0.1.2/setup.cfg +4 -0
genshotsql-0.1.2/templates/chat_with_database.py +468 -0
genshotsql-0.1.2/templates/config.py +143 -0

genshotsql-0.1.2/PKG-INFO ADDED Viewed

@@ -0,0 +1,5 @@
+Metadata-Version: 2.4
+Name: genshotsql
+Version: 0.1.2
+Summary: A framework for interacting with SQL databases by writing prompts
+Author: Pranav Verma

genshotsql-0.1.2/genshotsql.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,5 @@
+Metadata-Version: 2.4
+Name: genshotsql
+Version: 0.1.2
+Summary: A framework for interacting with SQL databases by writing prompts
+Author: Pranav Verma

genshotsql-0.1.2/genshotsql.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,8 @@
+pyproject.toml
+genshotsql.egg-info/PKG-INFO
+genshotsql.egg-info/SOURCES.txt
+genshotsql.egg-info/dependency_links.txt
+genshotsql.egg-info/entry_points.txt
+genshotsql.egg-info/top_level.txt
+templates/chat_with_database.py
+templates/config.py

genshotsql-0.1.2/genshotsql.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

genshotsql-0.1.2/genshotsql.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ gensql = genshotsql.cli:main

genshotsql-0.1.2/genshotsql.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ templates

genshotsql-0.1.2/pyproject.toml ADDED Viewed

@@ -0,0 +1,12 @@
+[project]
+name = "genshotsql"
+version = "0.1.2"
+description = "A framework for interacting with SQL databases by writing prompts"
+authors = [
+    { name = "Pranav Verma" }
+]
+[project.scripts]
+gensql = "genshotsql.cli:main"

genshotsql-0.1.2/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

genshotsql-0.1.2/templates/chat_with_database.py ADDED Viewed

@@ -0,0 +1,468 @@
+import json
+import re
+import urllib.request
+import urllib.error
+import faiss
+import mysql.connector
+import numpy as np
+# Import configuration from config.py
+from config import DB_CONFIG, OLLAMA_MODEL, OLLAMA_URL, OLLAMA_EMBED_MODEL, OLLAMA_EMBED_URL, SCHEMAS
+RAG_DOCUMENTS = []
+def add_schema_to_rag(schema):
+    """Add a structured schema to RAG documents"""
+    table_name = schema["table_name"]
+    content = f"Table: {table_name}\n"
+    if schema.get("description"):
+        content += f"Purpose: {schema['description']}\n"
+    content += "Columns:\n"
+    for column in schema["columns"]:
+        line = f"- {column['name']} {column['type']}"
+        if column.get("primary_key"):
+            line += " PRIMARY KEY"
+        if column.get("description"):
+            line += f". {column['description']}"
+        content += line + "\n"
+    if schema.get("usage"):
+        content += schema["usage"]
+    RAG_DOCUMENTS.append({
+        "title": f"{table_name} table",
+        "content": content.strip()
+    })
+def add_table_to_rag_documents(table_name, schema_text):
+    """Add a table schema to RAG documents"""
+    RAG_DOCUMENTS.append({
+        "title": f"{table_name} table",
+        "content": schema_text.strip()
+    })
+    return True
+def ask_ollama(prompt):
+    """Send a prompt to Ollama and get response"""
+    data = {
+        "model": OLLAMA_MODEL,
+        "prompt": prompt,
+        "stream": False,
+    }
+    request = urllib.request.Request(
+        OLLAMA_URL,
+        data=json.dumps(data).encode("utf-8"),
+        headers={"Content-Type": "application/json"},
+    )
+    try:
+        with urllib.request.urlopen(request, timeout=30) as response:
+            result = json.loads(response.read().decode("utf-8"))
+            return result["response"].strip()
+    except urllib.error.URLError as e:
+        raise Exception(f"Failed to connect to Ollama: {e}")
+def clean_json(text):
+    """Clean JSON response from Ollama"""
+    text = text.strip()
+    text = re.sub(r"^```json", "", text, flags=re.IGNORECASE).strip()
+    text = re.sub(r"^```", "", text).strip()
+    text = re.sub(r"```$", "", text).strip()
+    match = re.search(r"\{.*\}", text, flags=re.DOTALL)
+    if match:
+        return match.group(0)
+    return text
+def convert_readable_schema_to_rag_schema(readable_schema):
+    """Convert user-written schema to RAG format"""
+    prompt = f"""
+Convert this user-written database schema into a RAG document.
+Return only valid JSON with these keys:
+- table_name
+- schema
+Rules:
+- Do not write explanation before or after the JSON.
+- Keep the schema simple and readable.
+- Include every column mentioned by the user.
+- The schema must start with: Table: <table_name>
+- Include Purpose, Columns, Use this table when, and Join rules if relationships exist.
+- If a primary key is mentioned, write PRIMARY KEY.
+- If a foreign key relationship is mentioned, write FOREIGN KEY REFERENCES table(column).
+- Do not invent columns that are not mentioned.
+User-written schema:
+{readable_schema}
+Example JSON format:
+{{
+  "table_name": "department",
+  "schema": "Table: department\\nPurpose: Stores department details.\\nColumns:\\n- id INT PRIMARY KEY. Unique department id.\\n- department_name VARCHAR(50). Department name.\\nUse this table when the question asks about departments."
+}}
+"""
+    response = clean_json(ask_ollama(prompt))
+    converted_schema = json.loads(response)
+    if "table_name" not in converted_schema or "schema" not in converted_schema:
+        raise ValueError("The model did not return table_name and schema.")
+    schema = converted_schema["schema"]
+    mentioned_id_columns = re.findall(r"\b([a-zA-Z][a-zA-Z0-9_]*_id)\b", readable_schema)
+    for column in mentioned_id_columns:
+        if column not in schema:
+            foreign_key_match = re.search(
+                rf"\b{column}\b.*?connected to ([a-zA-Z][a-zA-Z0-9_]*) id",
+                readable_schema,
+                flags=re.IGNORECASE,
+            )
+            if foreign_key_match:
+                referenced_table = foreign_key_match.group(1).lower()
+                column_text = (
+                    f"- {column} INT FOREIGN KEY REFERENCES {referenced_table}(id). "
+                    f"Connected to {referenced_table}.")
+                join_text = f"- {converted_schema['table_name']}.{column} = {referenced_table}.id"
+            else:
+                column_text = f"- {column} INT. Mentioned by the user."
+                join_text = None
+            if "Columns:" in schema:
+                schema = schema.replace("Use this table when", f"{column_text}\nUse this table when")
+            else:
+                schema += f"\nColumns:\n{column_text}"
+            if join_text and "Join rules:" in schema:
+                schema += f"\n{join_text}"
+            elif join_text:
+                schema += f"\nJoin rules:\n{join_text}"
+    return converted_schema["table_name"], schema
+def add_readable_schema_to_rag_documents(readable_schema):
+    """Add a readable schema to RAG documents"""
+    table_name, schema = convert_readable_schema_to_rag_schema(readable_schema)
+    return add_table_to_rag_documents(table_name, schema)
+def get_embedding(text):
+    """Get embedding for text from Ollama"""
+    data = {
+        "model": OLLAMA_EMBED_MODEL,
+        "prompt": text,
+    }
+    request = urllib.request.Request(
+        OLLAMA_EMBED_URL,
+        data=json.dumps(data).encode("utf-8"),
+        headers={"Content-Type": "application/json"},
+    )
+    try:
+        with urllib.request.urlopen(request, timeout=30) as response:
+            result = json.loads(response.read().decode("utf-8"))
+            return result["embedding"]
+    except urllib.error.URLError as e:
+        raise Exception(f"Failed to get embedding from Ollama: {e}")
+def normalize_vectors(vectors):
+    """Normalize vectors for cosine similarity"""
+    norms = np.linalg.norm(vectors, axis=1, keepdims=True)
+    return vectors / np.maximum(norms, 1e-12)
+def build_faiss_index():
+    """Build FAISS index from RAG documents"""
+    if not RAG_DOCUMENTS:
+        return None
+    document_texts = [
+        document["title"] + "\n" + document["content"]
+        for document in RAG_DOCUMENTS
+    ]
+    embeddings = np.array(
+        [get_embedding(text) for text in document_texts],
+        dtype="float32",
+    )
+    embeddings = normalize_vectors(embeddings)
+    index = faiss.IndexFlatIP(embeddings.shape[1])
+    index.add(embeddings)
+    return index
+FAISS_INDEX = None
+def retrieve_context(question, top_k=3):
+    """Retrieve relevant context for a question"""
+    global FAISS_INDEX
+    if not RAG_DOCUMENTS:
+        return "No schema information available."
+    top_k = min(top_k, len(RAG_DOCUMENTS))
+    if FAISS_INDEX is None:
+        FAISS_INDEX = build_faiss_index()
+    if FAISS_INDEX is None:
+        return "No schema information available."
+    query_embedding = np.array([get_embedding(question)], dtype="float32")
+    query_embedding = normalize_vectors(query_embedding)
+    _, indexes = FAISS_INDEX.search(query_embedding, top_k)
+    selected_documents = [RAG_DOCUMENTS[index] for index in indexes[0]]
+    return "\n\n".join(document["content"] for document in selected_documents)
+def clean_sql(text):
+    """Clean SQL response from Ollama"""
+    text = text.strip()
+    text = re.sub(r"^```sql", "", text, flags=re.IGNORECASE).strip()
+    text = re.sub(r"^```", "", text).strip()
+    text = re.sub(r"```$", "", text).strip()
+    return text.rstrip(";")
+def question_to_sql(question):
+    """Convert a natural language question to SQL"""
+    context = retrieve_context(question)
+    prompt = f"""
+You are a MySQL assistant.
+Convert the user's question into one MySQL SELECT query.
+Rules:
+- Return only SQL.
+- Only use SELECT queries.
+- Do not use INSERT, UPDATE, DELETE, DROP, ALTER, or CREATE.
+- Use only the retrieved database context below.
+- Always use proper JOIN syntax.
+- Use parameterized values (strings in quotes).
+Retrieved database context:
+{context}
+User question: {question}
+"""
+    sql = clean_sql(ask_ollama(prompt))
+    if not sql.lower().startswith("select"):
+        raise ValueError("Only SELECT queries are allowed.")
+    blocked_words = ["insert", "update", "delete", "drop", "alter", "create", "truncate"]
+    if any(word in sql.lower() for word in blocked_words):
+        raise ValueError("This query is not allowed.")
+    return sql
+def repair_sql(question, bad_sql, error_message):
+    """Repair SQL that caused an error"""
+    context = retrieve_context(question)
+    prompt = f"""
+You are a MySQL assistant.
+The SQL query below failed. Fix it.
+Rules:
+- Return only corrected SQL.
+- Only use SELECT queries.
+- Do not use INSERT, UPDATE, DELETE, DROP, ALTER, or CREATE.
+- Use only the retrieved database context below.
+- Always use proper JOIN syntax.
+- Fix any syntax errors or column name issues.
+Retrieved database context:
+{context}
+User question:
+{question}
+Bad SQL:
+{bad_sql}
+Database error:
+{error_message}
+"""
+    sql = clean_sql(ask_ollama(prompt))
+    if not sql.lower().startswith("select"):
+        raise ValueError("Only SELECT queries are allowed.")
+    blocked_words = ["insert", "update", "delete", "drop", "alter", "create", "truncate"]
+    if any(word in sql.lower() for word in blocked_words):
+        raise ValueError("This query is not allowed.")
+    return sql
+def run_query(sql):
+    """Execute SQL query on the database"""
+    try:
+        conn = mysql.connector.connect(**DB_CONFIG)
+        cursor = conn.cursor()
+        cursor.execute(sql)
+        rows = cursor.fetchall()
+        columns = [column[0] for column in cursor.description]
+        cursor.close()
+        conn.close()
+        return columns, rows
+    except mysql.connector.Error as e:
+        raise Exception(f"Database error: {e}")
+def explain_result(question, sql, columns, rows):
+    """Explain query results in natural language"""
+    # Limit rows to avoid overwhelming the prompt
+    display_rows = rows[:10] if len(rows) > 10 else rows
+    prompt = f"""
+The user asked: {question}
+SQL used:
+{sql}
+Columns:
+{columns}
+Rows (showing first {len(display_rows)} of {len(rows)}):
+{display_rows}
+Explain the result in simple English.
+"""
+    return ask_ollama(prompt)
+def initialize_schemas():
+    """Initialize the database schemas"""
+    for schema in SCHEMAS:
+        add_schema_to_rag(schema)
+    print(f"Loaded {len(SCHEMAS)} schemas into RAG")
+def test_connection():
+    """Test database connection"""
+    try:
+        conn = mysql.connector.connect(**DB_CONFIG)
+        cursor = conn.cursor()
+        cursor.execute("SELECT VERSION()")
+        version = cursor.fetchone()
+        cursor.close()
+        conn.close()
+        print(f"✓ Database connected successfully (MySQL Version: {version[0]})")
+        return True
+    except Exception as e:
+        print(f"✗ Database connection failed: {e}")
+        return False
+def main():
+    print("="*60)
+    print("RAG Chat with Database")
+    print("="*60)
+    print("\nType 'exit' to stop the program")
+    print("Type 'test' to test database connection")
+    print("Type 'schemas' to show loaded schemas\n")
+    # Test database connection
+    if not test_connection():
+        print("\nPlease check your database configuration in config.py")
+        return
+    # Initialize schemas
+    try:
+        initialize_schemas()
+        print(f"✓ Loaded {len(SCHEMAS)} schemas into RAG\n")
+    except Exception as e:
+        print(f"Error initializing schemas: {e}")
+        return
+    while True:
+        try:
+            question = input("\n❓ Ask: ").strip()
+            if question.lower() in ["exit", "quit"]:
+                print("\nGoodbye! 👋")
+                break
+            if question.lower() == "test":
+                test_connection()
+                continue
+            if question.lower() == "schemas":
+                print("\nLoaded Schemas:")
+                for schema in SCHEMAS:
+                    print(f"  - {schema['table_name']}: {schema.get('description', 'No description')}")
+                continue
+            if not question:
+                continue
+            print("\n🤔 Processing your question...")
+            # Generate SQL
+            sql = question_to_sql(question)
+            print(f"\n📝 Generated SQL: {sql}")
+            # Try to execute with repair attempts
+            columns = None
+            rows = None
+            for attempt in range(2):
+                try:
+                    columns, rows = run_query(sql)
+                    break
+                except Exception as db_error:
+                    if attempt == 1:
+                        print(f"\n❌ Database error after retry: {db_error}")
+                        raise
+                    print(f"\n🔧 SQL error, attempting repair...")
+                    sql = repair_sql(question, sql, str(db_error))
+                    print(f"📝 Repaired SQL: {sql}")
+            # Display results
+            print("\n" + "="*60)
+            print("📊 RESULTS:")
+            print("="*60)
+            if rows:
+                # Print column headers
+                header = " | ".join(columns)
+                print(header)
+                print("-" * len(header))
+                for i, row in enumerate(rows[:20]):  # Show first 20 rows
+                    print(" | ".join(str(value) for value in row))
+                if len(rows) > 20:
+                    print(f"\n... and {len(rows) - 20} more rows")
+                print(f"\n📈 Total rows returned: {len(rows)}")
+            else:
+                print("No rows found")
+            # Get natural language explanation
+            print("\n" + "="*60)
+            print("💡 EXPLANATION:")
+            print("="*60)
+            answer = explain_result(question, sql, columns, rows)
+            print(f"\n{answer}\n")
+        except KeyboardInterrupt:
+            print("\n\nGoodbye! 👋")
+            break
+        except Exception as error:
+            print(f"\n❌ Error: {error}\n")
+            print("💡 Tips:")
+            print("  - Make sure Ollama is running: 'ollama serve'")
+            print("  - Pull required models: 'ollama pull llama3.2:3b && ollama pull nomic-embed-text'")
+            print("  - Check database connection in config.py")
+if __name__ == "__main__":
+    main()

genshotsql-0.1.2/templates/config.py ADDED Viewed

@@ -0,0 +1,143 @@
+# config.example.py
+# Copy this file to config.py and update with your credentials
+# Database configuration
+DB_CONFIG = {
+    "host": "host_name",
+    "user": "user_name",
+    "password": "password",
+    "database": "database_name",
+}
+# Ollama configuration
+OLLAMA_MODEL = "llama3.2:3b"
+OLLAMA_URL = "http://127.0.0.1:11434/api/generate"
+OLLAMA_EMBED_MODEL = "nomic-embed-text"
+OLLAMA_EMBED_URL = "http://127.0.0.1:11434/api/embeddings"
+# Database schemas
+SCHEMAS = [
+    ## Add your database schemas here
+]
+# SCHEMAS = [
+#     {
+#         "table_name": "employee",
+#         "description": "Stores employee details including personal information, department assignment, and salary.",
+#         "columns": [
+#             {
+#                 "name": "id",
+#                 "type": "INT",
+#                 "primary_key": True,
+#                 "description": "Unique employee identifier."
+#             },
+#             {
+#                 "name": "name",
+#                 "type": "VARCHAR(100)",
+#                 "primary_key": False,
+#                 "description": "Full name of the employee."
+#             },
+#             {
+#                 "name": "department_id",
+#                 "type": "INT",
+#                 "primary_key": False,
+#                 "description": "Foreign key referencing the department where the employee works."
+#             },
+#             {
+#                 "name": "salary",
+#                 "type": "INT",
+#                 "primary_key": False,
+#                 "description": "Employee's salary in numeric format."
+#             }
+#         ],
+#         "usage": "Use this table for employee information, employee names, salaries, and department assignments."
+#     },
+#     {
+#         "table_name": "department",
+#         "description": "Stores department information and office location mapping.",
+#         "columns": [
+#             {
+#                 "name": "id",
+#                 "type": "INT",
+#                 "primary_key": True,
+#                 "description": "Unique department identifier."
+#             },
+#             {
+#                 "name": "name",
+#                 "type": "VARCHAR(100)",
+#                 "primary_key": False,
+#                 "description": "Department name (e.g., IT, HR, Finance, Sales, Marketing, Operations, Support, Research, Legal, Engineering)."
+#             },
+#             {
+#                 "name": "office_id",
+#                 "type": "INT",
+#                 "primary_key": False,
+#                 "description": "Foreign key referencing the office location where the department is situated."
+#             }
+#         ],
+#         "usage": "Use this table when the question asks about departments, department names, or office locations."
+#     },
+#     {
+#         "table_name": "project",
+#         "description": "Stores project information available in the company.",
+#         "columns": [
+#             {
+#                 "name": "id",
+#                 "type": "INT",
+#                 "primary_key": True,
+#                 "description": "Unique project identifier."
+#             },
+#             {
+#                 "name": "project_name",
+#                 "type": "VARCHAR(100)",
+#                 "primary_key": False,
+#                 "description": "Name of the project (e.g., Payroll System, Inventory App, Customer Portal, CRM Upgrade, Mobile Banking, Data Warehouse, Analytics Dashboard, AI Assistant, Cloud Migration, E-Commerce Platform)."
+#             }
+#         ],
+#         "usage": "Use this table when the question asks about projects, project names, or project-related information."
+#     },
+#     {
+#         "table_name": "employee_project",
+#         "description": "Junction table mapping employees to projects they are working on (many-to-many relationship).",
+#         "columns": [
+#             {
+#                 "name": "id",
+#                 "type": "INT",
+#                 "primary_key": True,
+#                 "description": "Unique mapping identifier."
+#             },
+#             {
+#                 "name": "employee_id",
+#                 "type": "INT",
+#                 "primary_key": False,
+#                 "description": "Foreign key referencing the employee assigned to a project."
+#             },
+#             {
+#                 "name": "project_id",
+#                 "type": "INT",
+#                 "primary_key": False,
+#                 "description": "Foreign key referencing the project assigned to an employee."
+#             }
+#         ],
+#         "usage": "Use this table to find relationships between employees and projects. This table does NOT contain name columns. To get employee or project names, join with employee and project tables."
+#     },
+#     {
+#         "table_name": "office",
+#         "description": "Stores office location information.",
+#         "columns": [
+#             {
+#                 "name": "id",
+#                 "type": "INT",
+#                 "primary_key": True,
+#                 "description": "Unique office identifier."
+#             },
+#             {
+#                 "name": "city",
+#                 "type": "VARCHAR(100)",
+#                 "primary_key": False,
+#                 "description": "City where the office is located (e.g., Bangalore, Mumbai, Delhi, Pune, Hyderabad, Chennai, Kolkata, Ahmedabad, Noida, Jaipur)."
+#             }
+#         ],
+#         "usage": "Use this table when the question asks about office locations, cities, or where departments are situated."
+#     }
+# ]