QuerySUTRA 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- querysutra-0.4.1.dist-info/METADATA +264 -0
- {querysutra-0.4.0.dist-info → querysutra-0.4.1.dist-info}/RECORD +7 -7
- sutra/__init__.py +2 -3
- sutra/sutra.py +272 -294
- querysutra-0.4.0.dist-info/METADATA +0 -438
- {querysutra-0.4.0.dist-info → querysutra-0.4.1.dist-info}/WHEEL +0 -0
- {querysutra-0.4.0.dist-info → querysutra-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {querysutra-0.4.0.dist-info → querysutra-0.4.1.dist-info}/top_level.txt +0 -0
sutra/sutra.py
CHANGED
|
@@ -1,36 +1,32 @@
|
|
|
1
1
|
"""
|
|
2
|
-
QuerySUTRA v0.
|
|
2
|
+
QuerySUTRA v0.4.0 - SIMPLE & AUTOMATIC
|
|
3
3
|
SUTRA: Structured-Unstructured-Text-Retrieval-Architecture
|
|
4
4
|
|
|
5
|
-
FIXED:
|
|
6
|
-
-
|
|
7
|
-
-
|
|
8
|
-
-
|
|
9
|
-
-
|
|
10
|
-
- Comprehensive entity extraction
|
|
5
|
+
FIXED:
|
|
6
|
+
- Auto-creates MySQL database if not exists
|
|
7
|
+
- One-line export to MySQL
|
|
8
|
+
- Complete data extraction from large PDFs
|
|
9
|
+
- No manual file transfers needed
|
|
11
10
|
|
|
12
11
|
Author: Aditya Batta
|
|
13
|
-
|
|
14
|
-
Version: 0.3.5
|
|
12
|
+
Version: 0.4.0
|
|
15
13
|
"""
|
|
16
14
|
|
|
17
|
-
__version__ = "0.
|
|
15
|
+
__version__ = "0.4.0"
|
|
18
16
|
__author__ = "Aditya Batta"
|
|
19
|
-
__title__ = "QuerySUTRA: Structured-Unstructured-Text-Retrieval-Architecture"
|
|
20
17
|
__all__ = ["SUTRA", "QueryResult", "quick_start"]
|
|
21
18
|
|
|
22
19
|
import os
|
|
23
20
|
import sqlite3
|
|
24
21
|
import pandas as pd
|
|
25
22
|
import numpy as np
|
|
26
|
-
from typing import Optional, Union, Dict,
|
|
23
|
+
from typing import Optional, Union, Dict, List
|
|
27
24
|
from pathlib import Path
|
|
28
25
|
import json
|
|
29
26
|
import hashlib
|
|
30
27
|
import warnings
|
|
31
28
|
import shutil
|
|
32
29
|
import datetime
|
|
33
|
-
import re
|
|
34
30
|
from io import StringIO
|
|
35
31
|
from difflib import get_close_matches
|
|
36
32
|
warnings.filterwarnings('ignore')
|
|
@@ -74,19 +70,13 @@ except ImportError:
|
|
|
74
70
|
|
|
75
71
|
|
|
76
72
|
class SUTRA:
|
|
77
|
-
"""
|
|
78
|
-
|
|
79
|
-
""
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
use_embeddings: bool = False,
|
|
85
|
-
check_relevance: bool = False,
|
|
86
|
-
fuzzy_match: bool = True,
|
|
87
|
-
cache_queries: bool = True):
|
|
88
|
-
"""Initialize SUTRA."""
|
|
89
|
-
print("Initializing QuerySUTRA v0.3.5")
|
|
73
|
+
"""SUTRA: Structured-Unstructured-Text-Retrieval-Architecture"""
|
|
74
|
+
|
|
75
|
+
def __init__(self, api_key: Optional[str] = None, db: str = "sutra.db",
|
|
76
|
+
use_embeddings: bool = False, check_relevance: bool = False,
|
|
77
|
+
fuzzy_match: bool = True, cache_queries: bool = True):
|
|
78
|
+
"""Initialize."""
|
|
79
|
+
print("Initializing QuerySUTRA v0.4.0")
|
|
90
80
|
|
|
91
81
|
if api_key:
|
|
92
82
|
os.environ["OPENAI_API_KEY"] = api_key
|
|
@@ -96,7 +86,6 @@ class SUTRA:
|
|
|
96
86
|
|
|
97
87
|
self.db_path = db
|
|
98
88
|
|
|
99
|
-
# FIXED: Better connection handling for Colab
|
|
100
89
|
try:
|
|
101
90
|
self.conn = sqlite3.connect(db, timeout=30, check_same_thread=False)
|
|
102
91
|
self.conn.execute("PRAGMA journal_mode=WAL")
|
|
@@ -105,24 +94,20 @@ class SUTRA:
|
|
|
105
94
|
self.conn = sqlite3.connect(db, check_same_thread=False)
|
|
106
95
|
|
|
107
96
|
self.cursor = self.conn.cursor()
|
|
108
|
-
|
|
109
97
|
self.current_table = None
|
|
110
98
|
self.schema_info = {}
|
|
111
99
|
|
|
112
100
|
self.cache_queries = cache_queries
|
|
113
101
|
self.cache = {} if cache_queries else None
|
|
114
|
-
|
|
115
102
|
self.use_embeddings = use_embeddings
|
|
116
103
|
self.embedding_model = None
|
|
117
104
|
self.query_embeddings = {}
|
|
118
|
-
|
|
119
105
|
self.check_relevance = check_relevance
|
|
120
106
|
self.fuzzy_match = fuzzy_match
|
|
121
107
|
|
|
122
108
|
if use_embeddings and HAS_EMBEDDINGS:
|
|
123
109
|
try:
|
|
124
110
|
self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
125
|
-
print("Embeddings ready")
|
|
126
111
|
except:
|
|
127
112
|
self.use_embeddings = False
|
|
128
113
|
|
|
@@ -133,12 +118,8 @@ class SUTRA:
|
|
|
133
118
|
def load_from_db(cls, db_path: str, api_key: Optional[str] = None, **kwargs):
|
|
134
119
|
"""Load existing database."""
|
|
135
120
|
if not Path(db_path).exists():
|
|
136
|
-
raise FileNotFoundError(f"
|
|
137
|
-
|
|
138
|
-
print(f"Loading: {db_path}")
|
|
139
|
-
instance = cls(api_key=api_key, db=db_path, **kwargs)
|
|
140
|
-
print(f"Loaded {len(instance.tables())} tables")
|
|
141
|
-
return instance
|
|
121
|
+
raise FileNotFoundError(f"Not found: {db_path}")
|
|
122
|
+
return cls(api_key=api_key, db=db_path, **kwargs)
|
|
142
123
|
|
|
143
124
|
@classmethod
|
|
144
125
|
def connect_mysql(cls, host: str, user: str, password: str, database: str,
|
|
@@ -146,26 +127,34 @@ class SUTRA:
|
|
|
146
127
|
"""Connect to MySQL."""
|
|
147
128
|
try:
|
|
148
129
|
from sqlalchemy import create_engine
|
|
130
|
+
import mysql.connector
|
|
149
131
|
except ImportError:
|
|
150
132
|
raise ImportError("Run: pip install QuerySUTRA[mysql]")
|
|
151
133
|
|
|
152
|
-
print(f"Connecting to MySQL
|
|
134
|
+
print(f"Connecting to MySQL...")
|
|
153
135
|
|
|
154
|
-
|
|
136
|
+
# Auto-create database if not exists
|
|
137
|
+
try:
|
|
138
|
+
temp_conn = mysql.connector.connect(host=host, user=user, password=password, port=port)
|
|
139
|
+
temp_cursor = temp_conn.cursor()
|
|
140
|
+
temp_cursor.execute(f"CREATE DATABASE IF NOT EXISTS {database}")
|
|
141
|
+
temp_cursor.close()
|
|
142
|
+
temp_conn.close()
|
|
143
|
+
except:
|
|
144
|
+
pass
|
|
155
145
|
|
|
146
|
+
engine = create_engine(f"mysql+mysqlconnector://{user}:{password}@{host}:{port}/{database}")
|
|
156
147
|
temp_db = f"sutra_mysql_{database}.db"
|
|
157
148
|
instance = cls(api_key=api_key, db=temp_db, **kwargs)
|
|
158
149
|
|
|
159
150
|
tables = pd.read_sql_query("SHOW TABLES", engine).iloc[:, 0].tolist()
|
|
160
|
-
print(f"Syncing {len(tables)} tables...")
|
|
161
151
|
|
|
162
152
|
for table in tables:
|
|
163
153
|
df = pd.read_sql_query(f"SELECT * FROM {table}", engine)
|
|
164
154
|
df.to_sql(table, instance.conn, if_exists='replace', index=False)
|
|
165
|
-
print(f" {table}: {len(df)} rows")
|
|
166
155
|
|
|
167
156
|
instance._refresh_schema()
|
|
168
|
-
print("Connected!")
|
|
157
|
+
print(f"Connected! {len(tables)} tables")
|
|
169
158
|
return instance
|
|
170
159
|
|
|
171
160
|
@classmethod
|
|
@@ -177,286 +166,250 @@ class SUTRA:
|
|
|
177
166
|
except ImportError:
|
|
178
167
|
raise ImportError("Run: pip install QuerySUTRA[postgres]")
|
|
179
168
|
|
|
180
|
-
print(f"Connecting to PostgreSQL
|
|
169
|
+
print(f"Connecting to PostgreSQL...")
|
|
181
170
|
|
|
182
171
|
engine = create_engine(f"postgresql://{user}:{password}@{host}:{port}/{database}")
|
|
183
|
-
|
|
184
172
|
temp_db = f"sutra_postgres_{database}.db"
|
|
185
173
|
instance = cls(api_key=api_key, db=temp_db, **kwargs)
|
|
186
174
|
|
|
187
175
|
tables = pd.read_sql_query("SELECT tablename FROM pg_tables WHERE schemaname='public'", engine)['tablename'].tolist()
|
|
188
|
-
print(f"Syncing {len(tables)} tables...")
|
|
189
176
|
|
|
190
177
|
for table in tables:
|
|
191
178
|
df = pd.read_sql_query(f"SELECT * FROM {table}", engine)
|
|
192
179
|
df.to_sql(table, instance.conn, if_exists='replace', index=False)
|
|
193
|
-
print(f" {table}: {len(df)} rows")
|
|
194
180
|
|
|
195
181
|
instance._refresh_schema()
|
|
196
|
-
print("Connected!")
|
|
182
|
+
print(f"Connected! {len(tables)} tables")
|
|
197
183
|
return instance
|
|
198
184
|
|
|
199
185
|
def upload(self, data: Union[str, pd.DataFrame], name: Optional[str] = None,
|
|
200
|
-
extract_entities: Optional[List[str]] = None
|
|
201
|
-
|
|
202
|
-
|
|
186
|
+
extract_entities: Optional[List[str]] = None,
|
|
187
|
+
auto_export_mysql: Optional[Dict[str, str]] = None) -> 'SUTRA':
|
|
188
|
+
"""
|
|
189
|
+
Upload data with OPTIONAL automatic MySQL export.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
data: File path or DataFrame
|
|
193
|
+
name: Table name
|
|
194
|
+
extract_entities: Custom entities to extract
|
|
195
|
+
auto_export_mysql: Auto-export to MySQL after upload
|
|
196
|
+
{'host': 'localhost', 'user': 'root', 'password': 'pass', 'database': 'mydb'}
|
|
197
|
+
|
|
198
|
+
Example:
|
|
199
|
+
sutra.upload("data.pdf", auto_export_mysql={
|
|
200
|
+
'host': 'localhost',
|
|
201
|
+
'user': 'root',
|
|
202
|
+
'password': '123456',
|
|
203
|
+
'database': 'my_database'
|
|
204
|
+
})
|
|
205
|
+
"""
|
|
206
|
+
print("\nUploading...")
|
|
203
207
|
|
|
204
208
|
if isinstance(data, pd.DataFrame):
|
|
205
209
|
name = name or "data"
|
|
206
210
|
self._store_dataframe(data, name)
|
|
207
|
-
return self
|
|
208
|
-
|
|
209
|
-
path = Path(data)
|
|
210
|
-
if not path.exists():
|
|
211
|
-
raise FileNotFoundError(f"File not found: {data}")
|
|
212
|
-
|
|
213
|
-
name = name or path.stem.replace(" ", "_").replace("-", "_")
|
|
214
|
-
ext = path.suffix.lower()
|
|
215
|
-
|
|
216
|
-
print(f"File: {path.name}")
|
|
217
|
-
|
|
218
|
-
if ext == ".csv":
|
|
219
|
-
df = pd.read_csv(path)
|
|
220
|
-
self._store_dataframe(df, name)
|
|
221
|
-
elif ext in [".xlsx", ".xls"]:
|
|
222
|
-
df = pd.read_excel(path)
|
|
223
|
-
self._store_dataframe(df, name)
|
|
224
|
-
elif ext == ".json":
|
|
225
|
-
df = pd.read_json(path)
|
|
226
|
-
self._store_dataframe(df, name)
|
|
227
|
-
elif ext == ".sql":
|
|
228
|
-
with open(path) as f:
|
|
229
|
-
self.cursor.executescript(f.read())
|
|
230
|
-
self.conn.commit()
|
|
231
|
-
self._refresh_schema()
|
|
232
|
-
print("SQL executed")
|
|
233
|
-
elif ext == ".pdf":
|
|
234
|
-
self._smart_upload_pdf(path, name, extract_entities)
|
|
235
|
-
elif ext == ".docx":
|
|
236
|
-
self._smart_upload_docx(path, name, extract_entities)
|
|
237
|
-
elif ext == ".txt":
|
|
238
|
-
self._smart_upload_txt(path, name, extract_entities)
|
|
239
211
|
else:
|
|
240
|
-
|
|
212
|
+
path = Path(data)
|
|
213
|
+
if not path.exists():
|
|
214
|
+
raise FileNotFoundError(f"Not found: {data}")
|
|
215
|
+
|
|
216
|
+
name = name or path.stem.replace(" ", "_").replace("-", "_")
|
|
217
|
+
ext = path.suffix.lower()
|
|
218
|
+
|
|
219
|
+
print(f"File: {path.name}")
|
|
220
|
+
|
|
221
|
+
if ext == ".csv":
|
|
222
|
+
self._store_dataframe(pd.read_csv(path), name)
|
|
223
|
+
elif ext in [".xlsx", ".xls"]:
|
|
224
|
+
self._store_dataframe(pd.read_excel(path), name)
|
|
225
|
+
elif ext == ".json":
|
|
226
|
+
self._store_dataframe(pd.read_json(path), name)
|
|
227
|
+
elif ext == ".sql":
|
|
228
|
+
with open(path) as f:
|
|
229
|
+
self.cursor.executescript(f.read())
|
|
230
|
+
self.conn.commit()
|
|
231
|
+
self._refresh_schema()
|
|
232
|
+
elif ext == ".pdf":
|
|
233
|
+
self._smart_upload_pdf(path, name, extract_entities)
|
|
234
|
+
elif ext == ".docx":
|
|
235
|
+
self._smart_upload_docx(path, name, extract_entities)
|
|
236
|
+
elif ext == ".txt":
|
|
237
|
+
self._smart_upload_txt(path, name, extract_entities)
|
|
238
|
+
else:
|
|
239
|
+
raise ValueError(f"Unsupported: {ext}")
|
|
240
|
+
|
|
241
|
+
# AUTO-EXPORT to MySQL if requested
|
|
242
|
+
if auto_export_mysql:
|
|
243
|
+
print("\nAuto-exporting to MySQL...")
|
|
244
|
+
self.save_to_mysql(
|
|
245
|
+
host=auto_export_mysql.get('host', 'localhost'),
|
|
246
|
+
user=auto_export_mysql.get('user', 'root'),
|
|
247
|
+
password=auto_export_mysql['password'],
|
|
248
|
+
database=auto_export_mysql['database'],
|
|
249
|
+
port=auto_export_mysql.get('port', 3306)
|
|
250
|
+
)
|
|
241
251
|
|
|
242
252
|
return self
|
|
243
253
|
|
|
244
254
|
def _smart_upload_pdf(self, path: Path, base_name: str, extract_entities: Optional[List[str]] = None):
|
|
245
|
-
"""Parse PDF."""
|
|
255
|
+
"""Parse PDF - extracts ALL pages."""
|
|
246
256
|
if not HAS_PYPDF2:
|
|
247
257
|
raise ImportError("Run: pip install PyPDF2")
|
|
248
258
|
|
|
249
|
-
print("Extracting
|
|
259
|
+
print("Extracting PDF...")
|
|
250
260
|
|
|
251
261
|
with open(path, 'rb') as file:
|
|
252
262
|
pdf_reader = PyPDF2.PdfReader(file)
|
|
253
|
-
|
|
263
|
+
full_text = ""
|
|
254
264
|
for page_num, page in enumerate(pdf_reader.pages, 1):
|
|
255
|
-
|
|
265
|
+
full_text += page.extract_text() + "\n"
|
|
256
266
|
print(f" Page {page_num}/{len(pdf_reader.pages)}")
|
|
257
267
|
|
|
258
268
|
if self.client:
|
|
259
269
|
print("AI: Extracting entities...")
|
|
260
|
-
tables = self._create_tables_with_ai(text, base_name, extract_entities)
|
|
261
270
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
271
|
+
# Process in chunks for large documents
|
|
272
|
+
chunk_size = 10000
|
|
273
|
+
all_entities = {}
|
|
274
|
+
|
|
275
|
+
for i in range(0, len(full_text), chunk_size):
|
|
276
|
+
chunk = full_text[i:i+chunk_size]
|
|
277
|
+
chunk_num = (i // chunk_size) + 1
|
|
278
|
+
total_chunks = (len(full_text) // chunk_size) + 1
|
|
279
|
+
|
|
280
|
+
if total_chunks > 1:
|
|
281
|
+
print(f" Chunk {chunk_num}/{total_chunks}...")
|
|
282
|
+
|
|
283
|
+
entities = self._extract_chunk(chunk, extract_entities)
|
|
284
|
+
|
|
285
|
+
for entity_type, records in entities.items():
|
|
286
|
+
if entity_type not in all_entities:
|
|
287
|
+
all_entities[entity_type] = []
|
|
288
|
+
all_entities[entity_type].extend(records)
|
|
289
|
+
|
|
290
|
+
# Renumber IDs
|
|
291
|
+
for entity_type, records in all_entities.items():
|
|
292
|
+
for idx, record in enumerate(records, 1):
|
|
293
|
+
record['id'] = idx
|
|
294
|
+
|
|
295
|
+
# Create tables
|
|
296
|
+
if all_entities:
|
|
297
|
+
print(f"\nCreated {len(all_entities)} tables:")
|
|
298
|
+
for entity_type, records in all_entities.items():
|
|
299
|
+
if records:
|
|
300
|
+
table_name = f"{base_name}_{entity_type}"
|
|
301
|
+
df = pd.DataFrame(records)
|
|
302
|
+
self._store_dataframe_safe(df, table_name)
|
|
303
|
+
print(f" {entity_type}: {len(df)} records")
|
|
268
304
|
return
|
|
269
305
|
|
|
270
306
|
print("Creating simple table")
|
|
271
|
-
|
|
272
|
-
self._store_dataframe(df, base_name)
|
|
307
|
+
self._store_dataframe(self._parse_text_simple(full_text), base_name)
|
|
273
308
|
|
|
274
309
|
def _smart_upload_docx(self, path: Path, base_name: str, extract_entities: Optional[List[str]] = None):
|
|
275
310
|
"""Parse DOCX."""
|
|
276
311
|
if not HAS_DOCX:
|
|
277
312
|
raise ImportError("Run: pip install python-docx")
|
|
278
313
|
|
|
279
|
-
print("Extracting from DOCX...")
|
|
280
|
-
|
|
281
314
|
doc = docx.Document(path)
|
|
282
315
|
|
|
283
316
|
if doc.tables:
|
|
284
|
-
print(f"Found {len(doc.tables)} table(s)")
|
|
285
317
|
for i, table in enumerate(doc.tables):
|
|
286
318
|
data = [[cell.text.strip() for cell in row.cells] for row in table.rows]
|
|
287
319
|
if data and len(data) > 1:
|
|
288
320
|
df = pd.DataFrame(data[1:], columns=data[0])
|
|
289
|
-
|
|
290
|
-
self._store_dataframe(df, table_name)
|
|
321
|
+
self._store_dataframe(df, f"{base_name}_table_{i+1}" if len(doc.tables) > 1 else base_name)
|
|
291
322
|
return
|
|
292
323
|
|
|
293
324
|
text = "\n".join([para.text for para in doc.paragraphs])
|
|
294
325
|
|
|
295
|
-
if self.client:
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
return
|
|
304
|
-
|
|
305
|
-
df = self._parse_text_simple(text)
|
|
306
|
-
self._store_dataframe(df, base_name)
|
|
326
|
+
if self.client and len(text) > 0:
|
|
327
|
+
entities = self._extract_chunk(text, extract_entities)
|
|
328
|
+
for entity_type, records in entities.items():
|
|
329
|
+
if records:
|
|
330
|
+
df = pd.DataFrame(records)
|
|
331
|
+
self._store_dataframe_safe(df, f"{base_name}_{entity_type}")
|
|
332
|
+
else:
|
|
333
|
+
self._store_dataframe(self._parse_text_simple(text), base_name)
|
|
307
334
|
|
|
308
335
|
def _smart_upload_txt(self, path: Path, base_name: str, extract_entities: Optional[List[str]] = None):
|
|
309
336
|
"""Parse TXT."""
|
|
310
|
-
print("Reading TXT...")
|
|
311
|
-
|
|
312
337
|
with open(path, 'r', encoding='utf-8') as file:
|
|
313
338
|
text = file.read()
|
|
314
339
|
|
|
315
|
-
if self.client:
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
return
|
|
324
|
-
|
|
325
|
-
df = self._parse_text_simple(text)
|
|
326
|
-
self._store_dataframe(df, base_name)
|
|
340
|
+
if self.client and len(text) > 0:
|
|
341
|
+
entities = self._extract_chunk(text, extract_entities)
|
|
342
|
+
for entity_type, records in entities.items():
|
|
343
|
+
if records:
|
|
344
|
+
df = pd.DataFrame(records)
|
|
345
|
+
self._store_dataframe_safe(df, f"{base_name}_{entity_type}")
|
|
346
|
+
else:
|
|
347
|
+
self._store_dataframe(self._parse_text_simple(text), base_name)
|
|
327
348
|
|
|
328
|
-
def
|
|
329
|
-
"""
|
|
349
|
+
def _extract_chunk(self, text: str, custom_entities: Optional[List[str]] = None) -> Dict:
|
|
350
|
+
"""Extract entities from text chunk."""
|
|
330
351
|
if not self.client:
|
|
331
|
-
return
|
|
352
|
+
return {}
|
|
332
353
|
|
|
333
354
|
try:
|
|
334
|
-
|
|
335
|
-
- people: id, name, email, phone, address, city, state, zip
|
|
336
|
-
- skills: id, person_id, skill_name, proficiency, years
|
|
337
|
-
- technologies: id, person_id, technology, category, proficiency
|
|
338
|
-
- projects: id, person_id, project_name, description, role
|
|
339
|
-
- certifications: id, person_id, cert_name, issuer, date
|
|
340
|
-
- education: id, person_id, degree, institution, year
|
|
341
|
-
- work_experience: id, person_id, company, title, start_date, end_date
|
|
342
|
-
- events: id, host_id, description, location, date
|
|
343
|
-
- organizations: id, name, address, city
|
|
344
|
-
- ANY other structured data
|
|
345
|
-
|
|
346
|
-
CRITICAL: Use UNIQUE sequential IDs (1,2,3...) for each table. Foreign keys MUST reference valid IDs."""
|
|
347
|
-
|
|
348
|
-
if custom_entities:
|
|
349
|
-
entity_list = f"Extract these entities: {', '.join(custom_entities)}"
|
|
350
|
-
|
|
351
|
-
extraction_prompt = f"""Extract structured data from this text.
|
|
355
|
+
prompt = f"""Extract ALL structured entities from this text.
|
|
352
356
|
|
|
353
357
|
Text:
|
|
354
|
-
{text[:
|
|
358
|
+
{text[:8000]}
|
|
359
|
+
|
|
360
|
+
Extract entities like: people, skills, technologies, projects, certifications, education, work_experience, events, organizations, or ANY other structured data.
|
|
355
361
|
|
|
356
|
-
|
|
362
|
+
Return JSON with arrays. Use sequential IDs (1,2,3...). Foreign keys reference primary keys.
|
|
357
363
|
|
|
358
|
-
|
|
364
|
+
Example:
|
|
359
365
|
{{
|
|
360
|
-
"people": [{{"id": 1, "name": "John",
|
|
361
|
-
"skills": [{{"id": 1, "person_id": 1, "skill_name": "Python"
|
|
366
|
+
"people": [{{"id": 1, "name": "John", "email": "john@co.com", "city": "Dallas"}}, ...],
|
|
367
|
+
"skills": [{{"id": 1, "person_id": 1, "skill_name": "Python"}}, ...]
|
|
362
368
|
}}
|
|
363
369
|
|
|
364
|
-
|
|
365
|
-
- UNIQUE IDs: id=1,2,3,... (no duplicates)
|
|
366
|
-
- Valid foreign keys: person_id must match people.id
|
|
367
|
-
- Extract EVERYTHING
|
|
368
|
-
- Return ONLY valid JSON"""
|
|
370
|
+
Return ONLY valid JSON."""
|
|
369
371
|
|
|
370
|
-
|
|
372
|
+
resp = self.client.chat.completions.create(
|
|
371
373
|
model="gpt-4o-mini",
|
|
372
374
|
messages=[
|
|
373
|
-
{"role": "system", "content": "Extract entities with unique IDs
|
|
374
|
-
{"role": "user", "content":
|
|
375
|
+
{"role": "system", "content": "Extract ALL entities with unique IDs. Return only JSON."},
|
|
376
|
+
{"role": "user", "content": prompt}
|
|
375
377
|
],
|
|
376
378
|
temperature=0,
|
|
377
|
-
max_tokens=
|
|
379
|
+
max_tokens=8000
|
|
378
380
|
)
|
|
379
381
|
|
|
380
|
-
json_text =
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
extracted_data = json.loads(json_text)
|
|
384
|
-
|
|
385
|
-
created_tables = []
|
|
386
|
-
|
|
387
|
-
for entity_type, records in extracted_data.items():
|
|
388
|
-
if records and isinstance(records, list) and len(records) > 0:
|
|
389
|
-
table_name = f"{base_name}_{entity_type}"
|
|
390
|
-
|
|
391
|
-
try:
|
|
392
|
-
df = pd.DataFrame(records)
|
|
393
|
-
if not df.empty:
|
|
394
|
-
# FIXED: Store with better error handling
|
|
395
|
-
self._store_dataframe_safe(df, table_name)
|
|
396
|
-
created_tables.append(table_name)
|
|
397
|
-
print(f" {entity_type}: {len(df)} records")
|
|
398
|
-
except Exception as e:
|
|
399
|
-
print(f" Error {entity_type}: {e}")
|
|
400
|
-
|
|
401
|
-
return created_tables
|
|
402
|
-
|
|
382
|
+
json_text = resp.choices[0].message.content.strip().replace("```json", "").replace("```", "").strip()
|
|
383
|
+
return json.loads(json_text)
|
|
403
384
|
except Exception as e:
|
|
404
|
-
|
|
405
|
-
return []
|
|
385
|
+
return {}
|
|
406
386
|
|
|
407
387
|
def _store_dataframe_safe(self, df: pd.DataFrame, name: str):
|
|
408
|
-
"""
|
|
388
|
+
"""Store with error handling."""
|
|
409
389
|
try:
|
|
410
390
|
df.columns = [str(c).strip().replace(" ", "_").replace("-", "_") for c in df.columns]
|
|
411
|
-
|
|
412
|
-
# FIXED: Use method='multi' for better performance and if_exists='replace'
|
|
413
391
|
df.to_sql(name, self.conn, if_exists='replace', index=False, method='multi', chunksize=500)
|
|
414
|
-
|
|
415
|
-
self.
|
|
392
|
+
self.conn.commit()
|
|
393
|
+
self.current_table = name
|
|
394
|
+
self._refresh_schema()
|
|
395
|
+
except:
|
|
396
|
+
df.to_sql(name, self.conn, if_exists='replace', index=False)
|
|
397
|
+
self.conn.commit()
|
|
416
398
|
self.current_table = name
|
|
417
399
|
self._refresh_schema()
|
|
418
|
-
|
|
419
|
-
except Exception as e:
|
|
420
|
-
# FIXED: Fallback to single-row insert if bulk fails
|
|
421
|
-
print(f" Bulk insert failed, using row-by-row (slower but safer)")
|
|
422
|
-
try:
|
|
423
|
-
df.to_sql(name, self.conn, if_exists='replace', index=False)
|
|
424
|
-
self.conn.commit()
|
|
425
|
-
self.current_table = name
|
|
426
|
-
self._refresh_schema()
|
|
427
|
-
except Exception as e2:
|
|
428
|
-
print(f" Storage error: {e2}")
|
|
429
|
-
raise
|
|
430
400
|
|
|
431
401
|
def _parse_text_simple(self, text: str) -> pd.DataFrame:
|
|
432
402
|
"""Simple parsing."""
|
|
433
403
|
lines = [line.strip() for line in text.split('\n') if line.strip()]
|
|
434
|
-
|
|
435
404
|
if not lines:
|
|
436
405
|
return pd.DataFrame({'content': ['No content']})
|
|
437
406
|
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
df = pd.read_csv(StringIO('\n'.join(lines)), sep=delimiter)
|
|
443
|
-
if len(df.columns) > 1:
|
|
444
|
-
return df
|
|
445
|
-
except:
|
|
446
|
-
continue
|
|
447
|
-
|
|
448
|
-
return pd.DataFrame({
|
|
449
|
-
'line_number': range(1, len(lines) + 1),
|
|
450
|
-
'content': lines
|
|
451
|
-
})
|
|
452
|
-
|
|
453
|
-
def _store_dataframe(self, df: pd.DataFrame, name: str, silent: bool = False):
|
|
454
|
-
"""Store DataFrame."""
|
|
407
|
+
return pd.DataFrame({'line_number': range(1, len(lines) + 1), 'content': lines})
|
|
408
|
+
|
|
409
|
+
def _store_dataframe(self, df: pd.DataFrame, name: str):
|
|
410
|
+
"""Store."""
|
|
455
411
|
self._store_dataframe_safe(df, name)
|
|
456
|
-
|
|
457
|
-
if not silent:
|
|
458
|
-
print(f"Uploaded: {name}")
|
|
459
|
-
print(f" {len(df)} rows, {len(df.columns)} columns")
|
|
412
|
+
print(f"Uploaded: {name} ({len(df)} rows)")
|
|
460
413
|
|
|
461
414
|
def ask(self, question: str, viz: Union[bool, str] = False, table: Optional[str] = None) -> 'QueryResult':
|
|
462
415
|
"""Natural language query."""
|
|
@@ -466,7 +419,7 @@ Requirements:
|
|
|
466
419
|
print(f"\nQuestion: {question}")
|
|
467
420
|
|
|
468
421
|
if self.check_relevance and not self._is_relevant_query(question):
|
|
469
|
-
print("Warning:
|
|
422
|
+
print("Warning: Irrelevant query")
|
|
470
423
|
choice = input("Continue? (yes/no): ").strip().lower()
|
|
471
424
|
if choice not in ['yes', 'y']:
|
|
472
425
|
return QueryResult(False, "", pd.DataFrame(), None, "Irrelevant")
|
|
@@ -478,7 +431,7 @@ Requirements:
|
|
|
478
431
|
if self.use_embeddings and self.embedding_model:
|
|
479
432
|
cached = self._check_embedding_cache(question, tbl)
|
|
480
433
|
if cached:
|
|
481
|
-
print(" Cached
|
|
434
|
+
print(" Cached")
|
|
482
435
|
return cached
|
|
483
436
|
|
|
484
437
|
if self.fuzzy_match:
|
|
@@ -519,19 +472,17 @@ Requirements:
|
|
|
519
472
|
if not self.client:
|
|
520
473
|
return True
|
|
521
474
|
|
|
522
|
-
tables = self._get_table_names()[:3]
|
|
523
|
-
cols = []
|
|
524
|
-
for tbl in tables:
|
|
525
|
-
cols.extend(list(self.schema_info.get(tbl, {}).keys())[:5])
|
|
526
|
-
|
|
527
|
-
context = f"Tables: {', '.join(tables)}. Columns: {', '.join(cols[:15])}"
|
|
528
|
-
|
|
529
475
|
try:
|
|
476
|
+
tables = self._get_table_names()[:3]
|
|
477
|
+
cols = []
|
|
478
|
+
for tbl in tables:
|
|
479
|
+
cols.extend(list(self.schema_info.get(tbl, {}).keys())[:5])
|
|
480
|
+
|
|
530
481
|
resp = self.client.chat.completions.create(
|
|
531
482
|
model="gpt-4o-mini",
|
|
532
483
|
messages=[
|
|
533
|
-
{"role": "system", "content": "Return
|
|
534
|
-
{"role": "user", "content": f"Relevant to {
|
|
484
|
+
{"role": "system", "content": "Return 'yes' or 'no'."},
|
|
485
|
+
{"role": "user", "content": f"Relevant to DB with tables {', '.join(tables)}?\n\nQ: {question}\n\nyes/no:"}
|
|
535
486
|
],
|
|
536
487
|
temperature=0,
|
|
537
488
|
max_tokens=5
|
|
@@ -571,7 +522,6 @@ Requirements:
|
|
|
571
522
|
return None
|
|
572
523
|
|
|
573
524
|
q_emb = self.embedding_model.encode([question])[0]
|
|
574
|
-
|
|
575
525
|
best_match = None
|
|
576
526
|
best_sim = 0.85
|
|
577
527
|
|
|
@@ -580,13 +530,12 @@ Requirements:
|
|
|
580
530
|
continue
|
|
581
531
|
|
|
582
532
|
sim = np.dot(q_emb, data['embedding']) / (np.linalg.norm(q_emb) * np.linalg.norm(data['embedding']))
|
|
583
|
-
|
|
584
533
|
if sim > best_sim:
|
|
585
534
|
best_sim = sim
|
|
586
535
|
best_match = cached_q
|
|
587
536
|
|
|
588
537
|
if best_match:
|
|
589
|
-
print(f" Similar ({best_sim:.0%})
|
|
538
|
+
print(f" Similar ({best_sim:.0%})")
|
|
590
539
|
return self.query_embeddings[best_match]['result']
|
|
591
540
|
|
|
592
541
|
return None
|
|
@@ -605,7 +554,7 @@ Requirements:
|
|
|
605
554
|
return self._plotly_viz(df, title, viz_type) if HAS_PLOTLY else self._matplotlib_viz(df, title, viz_type)
|
|
606
555
|
|
|
607
556
|
def _plotly_viz(self, df: pd.DataFrame, title: str, viz_type: str):
|
|
608
|
-
"""Plotly
|
|
557
|
+
"""Plotly."""
|
|
609
558
|
try:
|
|
610
559
|
num = df.select_dtypes(include=[np.number]).columns.tolist()
|
|
611
560
|
cat = df.select_dtypes(include=['object']).columns.tolist()
|
|
@@ -631,14 +580,12 @@ Requirements:
|
|
|
631
580
|
fig = px.bar(df, y=df.columns[0], title=title)
|
|
632
581
|
|
|
633
582
|
fig.show()
|
|
634
|
-
print("Chart displayed")
|
|
635
583
|
return fig
|
|
636
|
-
except
|
|
637
|
-
print(f"Viz error: {e}")
|
|
584
|
+
except:
|
|
638
585
|
return None
|
|
639
586
|
|
|
640
587
|
def _matplotlib_viz(self, df: pd.DataFrame, title: str, viz_type: str):
|
|
641
|
-
"""Matplotlib
|
|
588
|
+
"""Matplotlib."""
|
|
642
589
|
try:
|
|
643
590
|
plt.figure(figsize=(10, 6))
|
|
644
591
|
num = df.select_dtypes(include=[np.number]).columns
|
|
@@ -654,14 +601,13 @@ Requirements:
|
|
|
654
601
|
plt.tight_layout()
|
|
655
602
|
plt.show()
|
|
656
603
|
return plt.gcf()
|
|
657
|
-
except
|
|
658
|
-
print(f"Viz error: {e}")
|
|
604
|
+
except:
|
|
659
605
|
return None
|
|
660
606
|
|
|
661
607
|
def tables(self) -> Dict[str, dict]:
|
|
662
608
|
"""List tables."""
|
|
663
609
|
print("\n" + "="*70)
|
|
664
|
-
print("TABLES
|
|
610
|
+
print("TABLES")
|
|
665
611
|
print("="*70)
|
|
666
612
|
|
|
667
613
|
all_tables = self._get_table_names()
|
|
@@ -673,11 +619,7 @@ Requirements:
|
|
|
673
619
|
for i, tbl in enumerate(all_tables, 1):
|
|
674
620
|
cnt = pd.read_sql_query(f"SELECT COUNT(*) FROM {tbl}", self.conn).iloc[0, 0]
|
|
675
621
|
cols = list(self.schema_info.get(tbl, {}).keys())
|
|
676
|
-
|
|
677
|
-
print(f" {i}. {tbl}")
|
|
678
|
-
print(f" {cnt} rows, {len(cols)} columns")
|
|
679
|
-
print(f" {', '.join(cols[:8])}")
|
|
680
|
-
|
|
622
|
+
print(f" {i}. {tbl}: {cnt} rows, {len(cols)} columns")
|
|
681
623
|
result[tbl] = {'rows': cnt, 'columns': cols}
|
|
682
624
|
|
|
683
625
|
print("="*70)
|
|
@@ -689,16 +631,14 @@ Requirements:
|
|
|
689
631
|
self._refresh_schema()
|
|
690
632
|
|
|
691
633
|
print("\n" + "="*70)
|
|
692
|
-
print("
|
|
634
|
+
print("SCHEMA")
|
|
693
635
|
print("="*70)
|
|
694
636
|
|
|
695
|
-
tables_to_show = [table] if table else self.schema_info.keys()
|
|
696
|
-
|
|
697
637
|
result = {}
|
|
698
|
-
for tbl in
|
|
638
|
+
for tbl in ([table] if table else self.schema_info.keys()):
|
|
699
639
|
if tbl in self.schema_info:
|
|
700
640
|
cnt = pd.read_sql_query(f"SELECT COUNT(*) FROM {tbl}", self.conn).iloc[0, 0]
|
|
701
|
-
print(f"\
|
|
641
|
+
print(f"\n{tbl}: {cnt} records")
|
|
702
642
|
for col, dtype in self.schema_info[tbl].items():
|
|
703
643
|
print(f" - {col:<30} {dtype}")
|
|
704
644
|
result[tbl] = {'records': cnt, 'columns': self.schema_info[tbl]}
|
|
@@ -723,12 +663,10 @@ Requirements:
|
|
|
723
663
|
|
|
724
664
|
def sql(self, query: str, viz: Union[bool, str] = False) -> 'QueryResult':
|
|
725
665
|
"""Execute SQL."""
|
|
726
|
-
print("\nExecuting SQL...")
|
|
727
666
|
try:
|
|
728
667
|
df = pd.read_sql_query(query, self.conn)
|
|
729
668
|
print(f"Success! {len(df)} rows")
|
|
730
|
-
|
|
731
|
-
fig = self._visualize(df, "SQL Result", viz if isinstance(viz, str) else "auto") if viz else None
|
|
669
|
+
fig = self._visualize(df, "Result", viz if isinstance(viz, str) else "auto") if viz else None
|
|
732
670
|
return QueryResult(True, query, df, fig)
|
|
733
671
|
except Exception as e:
|
|
734
672
|
print(f"Error: {e}")
|
|
@@ -736,44 +674,79 @@ Requirements:
|
|
|
736
674
|
|
|
737
675
|
def interactive(self, question: str) -> 'QueryResult':
|
|
738
676
|
"""Interactive."""
|
|
739
|
-
print(f"\nQuestion: {question}")
|
|
740
677
|
choice = input("Visualize? (yes/no/pie/bar/line/scatter): ").strip().lower()
|
|
741
678
|
viz = choice if choice in ['pie', 'bar', 'line', 'scatter', 'table', 'heatmap'] else (True if choice in ['yes', 'y'] else False)
|
|
742
679
|
return self.ask(question, viz=viz)
|
|
743
680
|
|
|
744
681
|
def export_db(self, path: str, format: str = "sqlite"):
|
|
745
|
-
"""Export."""
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
682
|
+
"""Export database."""
|
|
683
|
+
if format == "sqlite":
|
|
684
|
+
shutil.copy2(self.db_path, path)
|
|
685
|
+
elif format == "sql":
|
|
686
|
+
with open(path, 'w', encoding='utf-8') as f:
|
|
687
|
+
for line in self.conn.iterdump():
|
|
688
|
+
f.write(f'{line}\n')
|
|
689
|
+
elif format == "json":
|
|
690
|
+
data = {t: pd.read_sql_query(f"SELECT * FROM {t}", self.conn).to_dict('records') for t in self._get_table_names()}
|
|
691
|
+
with open(path, 'w', encoding='utf-8') as f:
|
|
692
|
+
json.dump(data, f, indent=2, default=str)
|
|
693
|
+
elif format == "excel":
|
|
694
|
+
with pd.ExcelWriter(path, engine='openpyxl') as writer:
|
|
695
|
+
for t in self._get_table_names():
|
|
696
|
+
pd.read_sql_query(f"SELECT * FROM {t}", self.conn).to_excel(writer, sheet_name=t[:31], index=False)
|
|
756
697
|
else:
|
|
757
698
|
raise ValueError(f"Unsupported: {format}")
|
|
699
|
+
|
|
700
|
+
print(f"Saved: {path}")
|
|
758
701
|
return self
|
|
759
702
|
|
|
760
|
-
def save_to_mysql(self, host: str, user: str, password: str, database: str,
|
|
761
|
-
|
|
703
|
+
def save_to_mysql(self, host: str, user: str, password: str, database: str,
|
|
704
|
+
port: int = 3306, tables: Optional[List[str]] = None,
|
|
705
|
+
auto_create: bool = True):
|
|
706
|
+
"""
|
|
707
|
+
Export to MySQL - AUTO-CREATES database if not exists.
|
|
708
|
+
|
|
709
|
+
Args:
|
|
710
|
+
host: MySQL host
|
|
711
|
+
user: MySQL user
|
|
712
|
+
password: MySQL password
|
|
713
|
+
database: Database name (auto-created if not exists)
|
|
714
|
+
port: MySQL port
|
|
715
|
+
tables: Specific tables to export (None = all)
|
|
716
|
+
auto_create: Auto-create database if not exists
|
|
717
|
+
"""
|
|
762
718
|
try:
|
|
763
719
|
from sqlalchemy import create_engine
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
print(f"Exporting to MySQL...")
|
|
767
|
-
for t in (tables or self._get_table_names()):
|
|
768
|
-
df = pd.read_sql_query(f"SELECT * FROM {t}", self.conn)
|
|
769
|
-
df.to_sql(t, engine, if_exists='replace', index=False)
|
|
770
|
-
print(f" {t}: {len(df)} rows")
|
|
771
|
-
print("Complete!")
|
|
772
|
-
return self
|
|
720
|
+
import mysql.connector
|
|
773
721
|
except ImportError:
|
|
774
722
|
raise ImportError("Run: pip install QuerySUTRA[mysql]")
|
|
723
|
+
|
|
724
|
+
print(f"Exporting to MySQL: {host}/{database}")
|
|
725
|
+
|
|
726
|
+
# Auto-create database if requested
|
|
727
|
+
if auto_create:
|
|
728
|
+
try:
|
|
729
|
+
temp_conn = mysql.connector.connect(host=host, user=user, password=password, port=port)
|
|
730
|
+
temp_cursor = temp_conn.cursor()
|
|
731
|
+
temp_cursor.execute(f"CREATE DATABASE IF NOT EXISTS `{database}`")
|
|
732
|
+
temp_cursor.close()
|
|
733
|
+
temp_conn.close()
|
|
734
|
+
print(f" Database '{database}' ready")
|
|
735
|
+
except Exception as e:
|
|
736
|
+
print(f" Warning: Could not auto-create database: {e}")
|
|
737
|
+
|
|
738
|
+
engine = create_engine(f"mysql+mysqlconnector://{user}:{password}@{host}:{port}/{database}")
|
|
739
|
+
|
|
740
|
+
for t in (tables or self._get_table_names()):
|
|
741
|
+
df = pd.read_sql_query(f"SELECT * FROM {t}", self.conn)
|
|
742
|
+
df.to_sql(t, engine, if_exists='replace', index=False)
|
|
743
|
+
print(f" {t}: {len(df)} rows")
|
|
744
|
+
|
|
745
|
+
print("Complete!")
|
|
746
|
+
return self
|
|
775
747
|
|
|
776
|
-
def save_to_postgres(self, host: str, user: str, password: str, database: str,
|
|
748
|
+
def save_to_postgres(self, host: str, user: str, password: str, database: str,
|
|
749
|
+
port: int = 5432, tables: Optional[List[str]] = None):
|
|
777
750
|
"""Export to PostgreSQL."""
|
|
778
751
|
try:
|
|
779
752
|
from sqlalchemy import create_engine
|
|
@@ -795,7 +768,6 @@ Requirements:
|
|
|
795
768
|
dir.mkdir(parents=True, exist_ok=True)
|
|
796
769
|
ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
797
770
|
|
|
798
|
-
print("Creating backup...")
|
|
799
771
|
self.export_db(str(dir / f"sutra_{ts}.db"), "sqlite")
|
|
800
772
|
self.export_db(str(dir / f"sutra_{ts}.json"), "json")
|
|
801
773
|
print("Backup complete!")
|
|
@@ -803,9 +775,12 @@ Requirements:
|
|
|
803
775
|
|
|
804
776
|
def export(self, data: pd.DataFrame, path: str, format: str = "csv"):
|
|
805
777
|
"""Export results."""
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
778
|
+
if format == "csv":
|
|
779
|
+
data.to_csv(path, index=False)
|
|
780
|
+
elif format in ["excel", "xlsx"]:
|
|
781
|
+
data.to_excel(path, index=False)
|
|
782
|
+
elif format == "json":
|
|
783
|
+
data.to_json(path, orient="records", indent=2)
|
|
809
784
|
print(f"Exported: {path}")
|
|
810
785
|
return self
|
|
811
786
|
|
|
@@ -820,7 +795,7 @@ Requirements:
|
|
|
820
795
|
return [r[0] for r in self.cursor.fetchall()]
|
|
821
796
|
|
|
822
797
|
def _refresh_schema(self):
|
|
823
|
-
"""Refresh
|
|
798
|
+
"""Refresh."""
|
|
824
799
|
self.schema_info = {}
|
|
825
800
|
for tbl in self._get_table_names():
|
|
826
801
|
self.cursor.execute(f"PRAGMA table_info({tbl})")
|
|
@@ -836,7 +811,7 @@ Requirements:
|
|
|
836
811
|
model="gpt-4o-mini",
|
|
837
812
|
messages=[
|
|
838
813
|
{"role": "system", "content": "SQL expert. Return only SQL."},
|
|
839
|
-
{"role": "user", "content": f"
|
|
814
|
+
{"role": "user", "content": f"Table: {table}\nColumns: {schema_str}\nSample:\n{sample}\n\nQ: {question}\n\nSQL:"}
|
|
840
815
|
],
|
|
841
816
|
temperature=0
|
|
842
817
|
)
|
|
@@ -850,8 +825,7 @@ Requirements:
|
|
|
850
825
|
self.close()
|
|
851
826
|
|
|
852
827
|
def __repr__(self):
|
|
853
|
-
|
|
854
|
-
return f"SUTRA(tables={len(self.schema_info)}, {', '.join(feat)})"
|
|
828
|
+
return f"SUTRA(tables={len(self.schema_info)})"
|
|
855
829
|
|
|
856
830
|
|
|
857
831
|
class QueryResult:
|
|
@@ -860,7 +834,7 @@ class QueryResult:
|
|
|
860
834
|
self.success, self.sql, self.data, self.viz, self.error = success, sql, data, viz, error
|
|
861
835
|
|
|
862
836
|
def __repr__(self):
|
|
863
|
-
return f"QueryResult(rows={len(self.data)}
|
|
837
|
+
return f"QueryResult(rows={len(self.data)})" if self.success else f"QueryResult(error='{self.error}')"
|
|
864
838
|
|
|
865
839
|
def show(self):
|
|
866
840
|
print(self.data if self.success else f"Error: {self.error}")
|
|
@@ -872,3 +846,7 @@ def quick_start(api_key: str, data_path: str, question: str, viz: Union[bool, st
|
|
|
872
846
|
with SUTRA(api_key=api_key) as sutra:
|
|
873
847
|
sutra.upload(data_path)
|
|
874
848
|
return sutra.ask(question, viz=viz)
|
|
849
|
+
|
|
850
|
+
|
|
851
|
+
if __name__ == "__main__":
|
|
852
|
+
print("QuerySUTRA v0.4.0 - Simple & Automatic")
|