QuerySUTRA 0.4.5__py3-none-any.whl → 0.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: QuerySUTRA
3
- Version: 0.4.5
3
+ Version: 0.4.6
4
4
  Summary: SUTRA
5
5
  Author: Aditya Batta
6
6
  License: MIT
@@ -1,4 +1,4 @@
1
- querysutra-0.4.5.dist-info/licenses/LICENSE,sha256=F-4b93u0OVrVwGXgMwBRq6MlGyUT9zmre1oh5Gft5Ts,1066
1
+ querysutra-0.4.6.dist-info/licenses/LICENSE,sha256=F-4b93u0OVrVwGXgMwBRq6MlGyUT9zmre1oh5Gft5Ts,1066
2
2
  sutra/__init__.py,sha256=ie1Gs0etPOrKWW6v3enD68QYLvETQWmYF0wxo9usqEU,152
3
3
  sutra/cache_manager.py,sha256=e0AAeUqoR-aiqzZ3fB-IDvpJ4JA6-YBFyRJxusEnIrA,3082
4
4
  sutra/clear_cache.py,sha256=rVIz29p7V11Uh6oHXeaWpFtYXXv-2OED91cHMAWWxtQ,187
@@ -11,7 +11,7 @@ sutra/feedback_matcher.py,sha256=WXYpGtFJnOyYQOzy-z8uBiUWH5vyJJOMS1NwEYzNfic,286
11
11
  sutra/nlp_processor.py,sha256=wMS1hz1aGWjSwPUD7lSNBbQapFtLgF2l65j0QKXQOd0,5461
12
12
  sutra/schema_embeddings.py,sha256=bVPzpJOdYTyUdG2k3ZdgYJLrX2opHBx68RIjJcMlueo,9732
13
13
  sutra/schema_generator.py,sha256=BX_vXmnvSGc6nCBx40WLSoNL3WIYPDahd1cEYloyY4M,1925
14
- sutra/sutra.py,sha256=GQ-3gLKoDsSDAgb57Yfc7uHFdx_ptYryxCFS8Jr0Nmc,33082
14
+ sutra/sutra.py,sha256=etDxiGYwCj8t6sdppYk2MsFmZlX9d2JiJv1na1GYF4Y,32320
15
15
  sutra/sutra_client.py,sha256=PYYDGqVbA9pB-Zcsm52i9KarwijCIGVZOThgONZP6Vs,14203
16
16
  sutra/sutra_core.py,sha256=diaWOXUHn1wrqCQrBhLKL612tMQioaqx-ILc3y9-CqM,11708
17
17
  sutra/sutra_simple.py,sha256=rnqzG7OAt4p64XtO0peMqHS1pG5tdA8U3EYTMVsq7BE,23201
@@ -22,7 +22,7 @@ tests/test_sutra.py,sha256=6Z4SoIuBzza101304I7plkyPVkUBbjIxR8uPs9z5ntg,2383
22
22
  utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  utils/file_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  utils/text_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- querysutra-0.4.5.dist-info/METADATA,sha256=P58pvODZzmE6gc0VpSol2fFhomOvT_xaN7n9gIlnayk,7258
26
- querysutra-0.4.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
- querysutra-0.4.5.dist-info/top_level.txt,sha256=9v0buw21eo5LaUU_3Cf9b9MqRyEvtM9cHaOuEXUKVqM,18
28
- querysutra-0.4.5.dist-info/RECORD,,
25
+ querysutra-0.4.6.dist-info/METADATA,sha256=IdGXdU4zCEUwrj_FpOHDlS9T-sqa875zD6MLTUWwDuo,7258
26
+ querysutra-0.4.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
27
+ querysutra-0.4.6.dist-info/top_level.txt,sha256=9v0buw21eo5LaUU_3Cf9b9MqRyEvtM9cHaOuEXUKVqM,18
28
+ querysutra-0.4.6.dist-info/RECORD,,
sutra/sutra.py CHANGED
@@ -1,18 +1,9 @@
1
1
  """
2
- QuerySUTRA v0.4.0 - SIMPLE & AUTOMATIC
3
- SUTRA: Structured-Unstructured-Text-Retrieval-Architecture
4
-
5
- FIXED:
6
- - Auto-creates MySQL database if not exists
7
- - One-line export to MySQL
8
- - Complete data extraction from large PDFs
9
- - No manual file transfers needed
10
-
11
- Author: Aditya Batta
12
- Version: 0.4.0
2
+ QuerySUTRA v0.4.5 - FIXED AI EXTRACTION
3
+ Debug mode to see why extraction fails
13
4
  """
14
5
 
15
- __version__ = "0.4.0"
6
+ __version__ = "0.4.5"
16
7
  __author__ = "Aditya Batta"
17
8
  __all__ = ["SUTRA", "QueryResult", "quick_start"]
18
9
 
@@ -74,9 +65,9 @@ class SUTRA:
74
65
 
75
66
  def __init__(self, api_key: Optional[str] = None, db: str = "sutra.db",
76
67
  use_embeddings: bool = False, check_relevance: bool = False,
77
- fuzzy_match: bool = True, cache_queries: bool = True):
68
+ fuzzy_match: bool = True, cache_queries: bool = True, debug: bool = False):
78
69
  """Initialize."""
79
- print("Initializing QuerySUTRA v0.4.0")
70
+ print("Initializing QuerySUTRA v0.4.5")
80
71
 
81
72
  if api_key:
82
73
  os.environ["OPENAI_API_KEY"] = api_key
@@ -85,6 +76,7 @@ class SUTRA:
85
76
  self.client = OpenAI(api_key=self.api_key) if self.api_key and HAS_OPENAI else None
86
77
 
87
78
  self.db_path = db
79
+ self.debug = debug
88
80
 
89
81
  try:
90
82
  self.conn = sqlite3.connect(db, timeout=30, check_same_thread=False)
@@ -133,7 +125,6 @@ class SUTRA:
133
125
 
134
126
  print(f"Connecting to MySQL...")
135
127
 
136
- # Auto-create database if not exists
137
128
  try:
138
129
  temp_conn = mysql.connector.connect(host=host, user=user, password=password, port=port)
139
130
  temp_cursor = temp_conn.cursor()
@@ -185,24 +176,7 @@ class SUTRA:
185
176
  def upload(self, data: Union[str, pd.DataFrame], name: Optional[str] = None,
186
177
  extract_entities: Optional[List[str]] = None,
187
178
  auto_export_mysql: Optional[Dict[str, str]] = None) -> 'SUTRA':
188
- """
189
- Upload data with OPTIONAL automatic MySQL export.
190
-
191
- Args:
192
- data: File path or DataFrame
193
- name: Table name
194
- extract_entities: Custom entities to extract
195
- auto_export_mysql: Auto-export to MySQL after upload
196
- {'host': 'localhost', 'user': 'root', 'password': 'pass', 'database': 'mydb'}
197
-
198
- Example:
199
- sutra.upload("data.pdf", auto_export_mysql={
200
- 'host': 'localhost',
201
- 'user': 'root',
202
- 'password': '123456',
203
- 'database': 'my_database'
204
- })
205
- """
179
+ """Upload data."""
206
180
  print("\nUploading...")
207
181
 
208
182
  if isinstance(data, pd.DataFrame):
@@ -238,7 +212,6 @@ class SUTRA:
238
212
  else:
239
213
  raise ValueError(f"Unsupported: {ext}")
240
214
 
241
- # AUTO-EXPORT to MySQL if requested
242
215
  if auto_export_mysql:
243
216
  print("\nAuto-exporting to MySQL...")
244
217
  self.save_to_mysql(
@@ -252,7 +225,7 @@ class SUTRA:
252
225
  return self
253
226
 
254
227
  def _smart_upload_pdf(self, path: Path, base_name: str, extract_entities: Optional[List[str]] = None):
255
- """Parse PDF - extracts ALL pages."""
228
+ """Parse PDF."""
256
229
  if not HAS_PYPDF2:
257
230
  raise ImportError("Run: pip install PyPDF2")
258
231
 
@@ -268,7 +241,6 @@ class SUTRA:
268
241
  if self.client:
269
242
  print("AI: Extracting entities...")
270
243
 
271
- # Process in chunks for large documents
272
244
  chunk_size = 10000
273
245
  all_entities = {}
274
246
 
@@ -282,11 +254,19 @@ class SUTRA:
282
254
 
283
255
  entities = self._extract_chunk(chunk, extract_entities)
284
256
 
257
+ if self.debug:
258
+ print(f" DEBUG: Chunk {chunk_num} returned {len(entities)} entity types")
259
+
285
260
  for entity_type, records in entities.items():
286
261
  if entity_type not in all_entities:
287
262
  all_entities[entity_type] = []
288
263
  all_entities[entity_type].extend(records)
289
264
 
265
+ if self.debug:
266
+ print(f" DEBUG: Total entities collected: {len(all_entities)}")
267
+ for k, v in all_entities.items():
268
+ print(f" - {k}: {len(v)} records")
269
+
290
270
  # Renumber IDs
291
271
  for entity_type, records in all_entities.items():
292
272
  for idx, record in enumerate(records, 1):
@@ -306,48 +286,8 @@ class SUTRA:
306
286
  print("Creating simple table")
307
287
  self._store_dataframe(self._parse_text_simple(full_text), base_name)
308
288
 
309
- def _smart_upload_docx(self, path: Path, base_name: str, extract_entities: Optional[List[str]] = None):
310
- """Parse DOCX."""
311
- if not HAS_DOCX:
312
- raise ImportError("Run: pip install python-docx")
313
-
314
- doc = docx.Document(path)
315
-
316
- if doc.tables:
317
- for i, table in enumerate(doc.tables):
318
- data = [[cell.text.strip() for cell in row.cells] for row in table.rows]
319
- if data and len(data) > 1:
320
- df = pd.DataFrame(data[1:], columns=data[0])
321
- self._store_dataframe(df, f"{base_name}_table_{i+1}" if len(doc.tables) > 1 else base_name)
322
- return
323
-
324
- text = "\n".join([para.text for para in doc.paragraphs])
325
-
326
- if self.client and len(text) > 0:
327
- entities = self._extract_chunk(text, extract_entities)
328
- for entity_type, records in entities.items():
329
- if records:
330
- df = pd.DataFrame(records)
331
- self._store_dataframe_safe(df, f"{base_name}_{entity_type}")
332
- else:
333
- self._store_dataframe(self._parse_text_simple(text), base_name)
334
-
335
- def _smart_upload_txt(self, path: Path, base_name: str, extract_entities: Optional[List[str]] = None):
336
- """Parse TXT."""
337
- with open(path, 'r', encoding='utf-8') as file:
338
- text = file.read()
339
-
340
- if self.client and len(text) > 0:
341
- entities = self._extract_chunk(text, extract_entities)
342
- for entity_type, records in entities.items():
343
- if records:
344
- df = pd.DataFrame(records)
345
- self._store_dataframe_safe(df, f"{base_name}_{entity_type}")
346
- else:
347
- self._store_dataframe(self._parse_text_simple(text), base_name)
348
-
349
289
  def _extract_chunk(self, text: str, custom_entities: Optional[List[str]] = None) -> Dict:
350
- """Extract entities from text chunk."""
290
+ """Extract entities - WITH BETTER ERROR HANDLING."""
351
291
  if not self.client:
352
292
  return {}
353
293
 
@@ -357,35 +297,98 @@ class SUTRA:
357
297
  Text:
358
298
  {text[:8000]}
359
299
 
360
- Extract entities like: people, skills, technologies, projects, certifications, education, work_experience, events, organizations, or ANY other structured data.
300
+ Extract: people, skills, technologies, projects, certifications, education, work_experience, events, organizations, or ANY structured data.
361
301
 
362
- Return JSON with arrays. Use sequential IDs (1,2,3...). Foreign keys reference primary keys.
302
+ Return JSON with arrays. Sequential IDs. Foreign keys reference primary keys.
363
303
 
364
- Example:
365
304
  {{
366
305
  "people": [{{"id": 1, "name": "John", "email": "john@co.com", "city": "Dallas"}}, ...],
367
306
  "skills": [{{"id": 1, "person_id": 1, "skill_name": "Python"}}, ...]
368
307
  }}
369
308
 
370
- Return ONLY valid JSON."""
309
+ ONLY valid JSON. No explanations."""
371
310
 
372
311
  resp = self.client.chat.completions.create(
373
312
  model="gpt-4o-mini",
374
313
  messages=[
375
- {"role": "system", "content": "Extract ALL entities with unique IDs. Return only JSON."},
314
+ {"role": "system", "content": "Extract ALL entities with unique IDs. Return ONLY valid JSON, nothing else."},
376
315
  {"role": "user", "content": prompt}
377
316
  ],
378
317
  temperature=0,
379
318
  max_tokens=8000
380
319
  )
381
320
 
382
- json_text = resp.choices[0].message.content.strip().replace("```json", "").replace("```", "").strip()
383
- return json.loads(json_text)
321
+ json_text = resp.choices[0].message.content.strip()
322
+
323
+ if self.debug:
324
+ print(f" DEBUG: AI response length: {len(json_text)} chars")
325
+ print(f" DEBUG: First 200 chars: {json_text[:200]}")
326
+
327
+ json_text = json_text.replace("```json", "").replace("```", "").strip()
328
+
329
+ result = json.loads(json_text)
330
+
331
+ if self.debug:
332
+ print(f" DEBUG: Parsed {len(result)} entity types")
333
+
334
+ return result
335
+
336
+ except json.JSONDecodeError as e:
337
+ if self.debug:
338
+ print(f" DEBUG: JSON parse error: {e}")
339
+ print(f" DEBUG: Response was: {json_text[:500]}")
340
+ return {}
384
341
  except Exception as e:
342
+ if self.debug:
343
+ print(f" DEBUG: Extraction error: {e}")
385
344
  return {}
386
345
 
346
+ def _smart_upload_docx(self, path: Path, base_name: str, extract_entities: Optional[List[str]] = None):
347
+ """Parse DOCX."""
348
+ if not HAS_DOCX:
349
+ raise ImportError("Run: pip install python-docx")
350
+
351
+ doc = docx.Document(path)
352
+
353
+ if doc.tables:
354
+ for i, table in enumerate(doc.tables):
355
+ data = [[cell.text.strip() for cell in row.cells] for row in table.rows]
356
+ if data and len(data) > 1:
357
+ df = pd.DataFrame(data[1:], columns=data[0])
358
+ self._store_dataframe(df, f"{base_name}_table_{i+1}" if len(doc.tables) > 1 else base_name)
359
+ return
360
+
361
+ text = "\n".join([para.text for para in doc.paragraphs])
362
+
363
+ if self.client and len(text) > 0:
364
+ entities = self._extract_chunk(text, extract_entities)
365
+ if entities:
366
+ for entity_type, records in entities.items():
367
+ if records:
368
+ df = pd.DataFrame(records)
369
+ self._store_dataframe_safe(df, f"{base_name}_{entity_type}")
370
+ return
371
+
372
+ self._store_dataframe(self._parse_text_simple(text), base_name)
373
+
374
+ def _smart_upload_txt(self, path: Path, base_name: str, extract_entities: Optional[List[str]] = None):
375
+ """Parse TXT."""
376
+ with open(path, 'r', encoding='utf-8') as file:
377
+ text = file.read()
378
+
379
+ if self.client and len(text) > 0:
380
+ entities = self._extract_chunk(text, extract_entities)
381
+ if entities:
382
+ for entity_type, records in entities.items():
383
+ if records:
384
+ df = pd.DataFrame(records)
385
+ self._store_dataframe_safe(df, f"{base_name}_{entity_type}")
386
+ return
387
+
388
+ self._store_dataframe(self._parse_text_simple(text), base_name)
389
+
387
390
  def _store_dataframe_safe(self, df: pd.DataFrame, name: str):
388
- """Store with error handling."""
391
+ """Store."""
389
392
  try:
390
393
  df.columns = [str(c).strip().replace(" ", "_").replace("-", "_") for c in df.columns]
391
394
  df.to_sql(name, self.conn, if_exists='replace', index=False, method='multi', chunksize=500)
@@ -403,7 +406,6 @@ Return ONLY valid JSON."""
403
406
  lines = [line.strip() for line in text.split('\n') if line.strip()]
404
407
  if not lines:
405
408
  return pd.DataFrame({'content': ['No content']})
406
-
407
409
  return pd.DataFrame({'line_number': range(1, len(lines) + 1), 'content': lines})
408
410
 
409
411
  def _store_dataframe(self, df: pd.DataFrame, name: str):
@@ -412,14 +414,14 @@ Return ONLY valid JSON."""
412
414
  print(f"Uploaded: {name} ({len(df)} rows)")
413
415
 
414
416
  def ask(self, question: str, viz: Union[bool, str] = False, table: Optional[str] = None) -> 'QueryResult':
415
- """Natural language query."""
417
+ """Query."""
416
418
  if not self.client:
417
419
  return QueryResult(False, "", pd.DataFrame(), None, "No API key")
418
420
 
419
421
  print(f"\nQuestion: {question}")
420
422
 
421
423
  if self.check_relevance and not self._is_relevant_query(question):
422
- print("Warning: Irrelevant query")
424
+ print("Warning: Irrelevant")
423
425
  choice = input("Continue? (yes/no): ").strip().lower()
424
426
  if choice not in ['yes', 'y']:
425
427
  return QueryResult(False, "", pd.DataFrame(), None, "Irrelevant")
@@ -471,13 +473,8 @@ Return ONLY valid JSON."""
471
473
  """Check relevance."""
472
474
  if not self.client:
473
475
  return True
474
-
475
476
  try:
476
477
  tables = self._get_table_names()[:3]
477
- cols = []
478
- for tbl in tables:
479
- cols.extend(list(self.schema_info.get(tbl, {}).keys())[:5])
480
-
481
478
  resp = self.client.chat.completions.create(
482
479
  model="gpt-4o-mini",
483
480
  messages=[
@@ -492,7 +489,7 @@ Return ONLY valid JSON."""
492
489
  return True
493
490
 
494
491
  def _apply_fuzzy_matching(self, question: str, table: str) -> str:
495
- """Fuzzy matching."""
492
+ """Fuzzy."""
496
493
  if not self.schema_info.get(table):
497
494
  return question
498
495
 
@@ -522,13 +519,11 @@ Return ONLY valid JSON."""
522
519
  return None
523
520
 
524
521
  q_emb = self.embedding_model.encode([question])[0]
525
- best_match = None
526
- best_sim = 0.85
522
+ best_match, best_sim = None, 0.85
527
523
 
528
524
  for cached_q, data in self.query_embeddings.items():
529
525
  if data['table'] != table:
530
526
  continue
531
-
532
527
  sim = np.dot(q_emb, data['embedding']) / (np.linalg.norm(q_emb) * np.linalg.norm(data['embedding']))
533
528
  if sim > best_sim:
534
529
  best_sim = sim
@@ -537,19 +532,17 @@ Return ONLY valid JSON."""
537
532
  if best_match:
538
533
  print(f" Similar ({best_sim:.0%})")
539
534
  return self.query_embeddings[best_match]['result']
540
-
541
535
  return None
542
536
 
543
537
  def _store_in_embedding_cache(self, question: str, table: str, result: 'QueryResult'):
544
- """Store cache."""
538
+ """Store."""
545
539
  q_emb = self.embedding_model.encode([question])[0]
546
540
  self.query_embeddings[question] = {'table': table, 'embedding': q_emb, 'result': result}
547
541
 
548
542
  def _visualize(self, df: pd.DataFrame, title: str, viz_type: str = "auto"):
549
- """Visualize."""
543
+ """Viz."""
550
544
  if not HAS_PLOTLY and not HAS_MATPLOTLIB:
551
545
  return None
552
-
553
546
  print(f"Creating {viz_type} chart...")
554
547
  return self._plotly_viz(df, title, viz_type) if HAS_PLOTLY else self._matplotlib_viz(df, title, viz_type)
555
548
 
@@ -578,7 +571,6 @@ Return ONLY valid JSON."""
578
571
  fig = px.pie(df, names=cat[0], values=num[0], title=title) if len(df) <= 10 else px.bar(df, x=cat[0], y=num[0], title=title)
579
572
  else:
580
573
  fig = px.bar(df, y=df.columns[0], title=title)
581
-
582
574
  fig.show()
583
575
  return fig
584
576
  except:
@@ -589,14 +581,12 @@ Return ONLY valid JSON."""
589
581
  try:
590
582
  plt.figure(figsize=(10, 6))
591
583
  num = df.select_dtypes(include=[np.number]).columns
592
-
593
584
  if viz_type == "pie":
594
585
  df[df.columns[0]].value_counts().plot(kind='pie')
595
586
  elif viz_type == "line" and len(num) > 0:
596
587
  df[num[0]].plot(kind='line')
597
588
  else:
598
589
  (df[num[0]] if len(num) > 0 else df.iloc[:, 0].value_counts()).plot(kind='bar')
599
-
600
590
  plt.title(title)
601
591
  plt.tight_layout()
602
592
  plt.show()
@@ -605,7 +595,7 @@ Return ONLY valid JSON."""
605
595
  return None
606
596
 
607
597
  def tables(self) -> Dict[str, dict]:
608
- """List tables."""
598
+ """List."""
609
599
  print("\n" + "="*70)
610
600
  print("TABLES")
611
601
  print("="*70)
@@ -626,7 +616,7 @@ Return ONLY valid JSON."""
626
616
  return result
627
617
 
628
618
  def schema(self, table: Optional[str] = None) -> dict:
629
- """Show schema."""
619
+ """Schema."""
630
620
  if not self.schema_info:
631
621
  self._refresh_schema()
632
622
 
@@ -651,18 +641,17 @@ Return ONLY valid JSON."""
651
641
  tbl = table or self.current_table
652
642
  if not tbl:
653
643
  return pd.DataFrame()
654
-
655
644
  df = pd.read_sql_query(f"SELECT * FROM {tbl} LIMIT {n}", self.conn)
656
645
  print(f"\nSample from '{tbl}':")
657
646
  print(df.to_string(index=False))
658
647
  return df
659
648
 
660
649
  def info(self):
661
- """Overview."""
650
+ """Info."""
662
651
  return self.tables()
663
652
 
664
653
  def sql(self, query: str, viz: Union[bool, str] = False) -> 'QueryResult':
665
- """Execute SQL."""
654
+ """SQL."""
666
655
  try:
667
656
  df = pd.read_sql_query(query, self.conn)
668
657
  print(f"Success! {len(df)} rows")
@@ -679,7 +668,7 @@ Return ONLY valid JSON."""
679
668
  return self.ask(question, viz=viz)
680
669
 
681
670
  def export_db(self, path: str, format: str = "sqlite"):
682
- """Export database."""
671
+ """Export."""
683
672
  if format == "sqlite":
684
673
  shutil.copy2(self.db_path, path)
685
674
  elif format == "sql":
@@ -696,25 +685,12 @@ Return ONLY valid JSON."""
696
685
  pd.read_sql_query(f"SELECT * FROM {t}", self.conn).to_excel(writer, sheet_name=t[:31], index=False)
697
686
  else:
698
687
  raise ValueError(f"Unsupported: {format}")
699
-
700
688
  print(f"Saved: {path}")
701
689
  return self
702
690
 
703
691
  def save_to_mysql(self, host: str, user: str, password: str, database: str,
704
- port: int = 3306, tables: Optional[List[str]] = None,
705
- auto_create: bool = True):
706
- """
707
- Export to MySQL - AUTO-CREATES database if not exists.
708
-
709
- Args:
710
- host: MySQL host
711
- user: MySQL user
712
- password: MySQL password
713
- database: Database name (auto-created if not exists)
714
- port: MySQL port
715
- tables: Specific tables to export (None = all)
716
- auto_create: Auto-create database if not exists
717
- """
692
+ port: int = 3306, tables: Optional[List[str]] = None, auto_create: bool = True):
693
+ """Export to MySQL."""
718
694
  try:
719
695
  from sqlalchemy import create_engine
720
696
  import mysql.connector
@@ -723,7 +699,6 @@ Return ONLY valid JSON."""
723
699
 
724
700
  print(f"Exporting to MySQL: {host}/{database}")
725
701
 
726
- # Auto-create database if requested
727
702
  if auto_create:
728
703
  try:
729
704
  temp_conn = mysql.connector.connect(host=host, user=user, password=password, port=port)
@@ -733,7 +708,7 @@ Return ONLY valid JSON."""
733
708
  temp_conn.close()
734
709
  print(f" Database '{database}' ready")
735
710
  except Exception as e:
736
- print(f" Warning: Could not auto-create database: {e}")
711
+ print(f" Warning: {e}")
737
712
 
738
713
  engine = create_engine(f"mysql+mysqlconnector://{user}:{password}@{host}:{port}/{database}")
739
714
 
@@ -745,13 +720,11 @@ Return ONLY valid JSON."""
745
720
  print("Complete!")
746
721
  return self
747
722
 
748
- def save_to_postgres(self, host: str, user: str, password: str, database: str,
749
- port: int = 5432, tables: Optional[List[str]] = None):
750
- """Export to PostgreSQL."""
723
+ def save_to_postgres(self, host: str, user: str, password: str, database: str, port: int = 5432, tables: Optional[List[str]] = None):
724
+ """PostgreSQL."""
751
725
  try:
752
726
  from sqlalchemy import create_engine
753
727
  engine = create_engine(f"postgresql://{user}:{password}@{host}:{port}/{database}")
754
-
755
728
  print(f"Exporting to PostgreSQL...")
756
729
  for t in (tables or self._get_table_names()):
757
730
  df = pd.read_sql_query(f"SELECT * FROM {t}", self.conn)
@@ -767,14 +740,13 @@ Return ONLY valid JSON."""
767
740
  dir = Path(path) if path else Path(".")
768
741
  dir.mkdir(parents=True, exist_ok=True)
769
742
  ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
770
-
771
743
  self.export_db(str(dir / f"sutra_{ts}.db"), "sqlite")
772
744
  self.export_db(str(dir / f"sutra_{ts}.json"), "json")
773
745
  print("Backup complete!")
774
746
  return self
775
747
 
776
748
  def export(self, data: pd.DataFrame, path: str, format: str = "csv"):
777
- """Export results."""
749
+ """Export."""
778
750
  if format == "csv":
779
751
  data.to_csv(path, index=False)
780
752
  elif format in ["excel", "xlsx"]:
@@ -790,7 +762,7 @@ Return ONLY valid JSON."""
790
762
  self.conn.close()
791
763
 
792
764
  def _get_table_names(self) -> List[str]:
793
- """Get tables."""
765
+ """Tables."""
794
766
  self.cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
795
767
  return [r[0] for r in self.cursor.fetchall()]
796
768
 
@@ -802,7 +774,7 @@ Return ONLY valid JSON."""
802
774
  self.schema_info[tbl] = {r[1]: r[2] for r in self.cursor.fetchall()}
803
775
 
804
776
  def _generate_sql(self, question: str, table: str) -> str:
805
- """Generate SQL."""
777
+ """SQL."""
806
778
  schema = self.schema_info.get(table, {})
807
779
  sample = pd.read_sql_query(f"SELECT * FROM {table} LIMIT 3", self.conn).to_string(index=False)
808
780
  schema_str = ", ".join([f"{col} ({dtype})" for col, dtype in schema.items()])
@@ -815,7 +787,6 @@ Return ONLY valid JSON."""
815
787
  ],
816
788
  temperature=0
817
789
  )
818
-
819
790
  return resp.choices[0].message.content.strip().replace("```sql", "").replace("```", "").strip()
820
791
 
821
792
  def __enter__(self):
@@ -842,11 +813,7 @@ class QueryResult:
842
813
 
843
814
 
844
815
  def quick_start(api_key: str, data_path: str, question: str, viz: Union[bool, str] = False):
845
- """Quick start."""
816
+ """Quick."""
846
817
  with SUTRA(api_key=api_key) as sutra:
847
818
  sutra.upload(data_path)
848
819
  return sutra.ask(question, viz=viz)
849
-
850
-
851
- if __name__ == "__main__":
852
- print("QuerySUTRA v0.4.0 - Simple & Automatic")