QuerySUTRA 0.5.2__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {querysutra-0.5.2.dist-info → querysutra-0.6.0.dist-info}/METADATA +18 -2
- querysutra-0.6.0.dist-info/RECORD +22 -0
- {querysutra-0.5.2.dist-info → querysutra-0.6.0.dist-info}/WHEEL +1 -1
- querysutra-0.6.0.dist-info/top_level.txt +1 -0
- sutra/__init__.py +6 -4
- sutra/database_manager.py +235 -195
- sutra/nlp_processor.py +175 -143
- sutra/schema_generator.py +56 -52
- sutra/sutra.py +196 -53
- querysutra-0.5.2.dist-info/RECORD +0 -28
- querysutra-0.5.2.dist-info/top_level.txt +0 -3
- tests/__init__.py +0 -0
- tests/test_modules.py +0 -0
- tests/test_sutra.py +0 -76
- utils/__init__.py +0 -0
- utils/file_utils.py +0 -0
- utils/text_utils.py +0 -0
- {querysutra-0.5.2.dist-info → querysutra-0.6.0.dist-info}/licenses/LICENSE +0 -0
sutra/sutra.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
"""QuerySUTRA v0.
|
|
2
|
-
__version__ = "0.
|
|
1
|
+
"""QuerySUTRA v0.6.0 - AI-powered data analysis for structured and unstructured data"""
|
|
2
|
+
__version__ = "0.6.0"
|
|
3
3
|
__author__ = "Aditya Batta"
|
|
4
4
|
__all__ = ["SUTRA", "QueryResult"]
|
|
5
5
|
|
|
@@ -41,7 +41,7 @@ except:
|
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
class SUTRA:
|
|
44
|
-
"""SUTRA -
|
|
44
|
+
"""SUTRA - FIXED: Considers ALL tables"""
|
|
45
45
|
|
|
46
46
|
def __init__(self, api_key: Optional[str] = None, db: str = "sutra.db",
|
|
47
47
|
use_embeddings: bool = False, fuzzy_match: bool = True,
|
|
@@ -72,7 +72,7 @@ class SUTRA:
|
|
|
72
72
|
pass
|
|
73
73
|
|
|
74
74
|
self._refresh_schema()
|
|
75
|
-
print(f"QuerySUTRA
|
|
75
|
+
print(f"QuerySUTRA v{__version__} Ready")
|
|
76
76
|
|
|
77
77
|
def upload(self, data: Union[str, pd.DataFrame], name: Optional[str] = None) -> 'SUTRA':
|
|
78
78
|
"""Upload."""
|
|
@@ -105,7 +105,7 @@ class SUTRA:
|
|
|
105
105
|
return self
|
|
106
106
|
|
|
107
107
|
def _pdf(self, path: Path, name: str):
|
|
108
|
-
"""
|
|
108
|
+
"""PDF extraction."""
|
|
109
109
|
if not HAS_PYPDF2:
|
|
110
110
|
raise ImportError("pip install PyPDF2")
|
|
111
111
|
|
|
@@ -115,12 +115,11 @@ class SUTRA:
|
|
|
115
115
|
text = "".join([p.extract_text() + "\n" for p in PyPDF2.PdfReader(f).pages])
|
|
116
116
|
|
|
117
117
|
if not self.client:
|
|
118
|
-
print("ERROR: No API key!
|
|
118
|
+
print("ERROR: No API key!")
|
|
119
119
|
return
|
|
120
120
|
|
|
121
121
|
print("AI: Extracting...")
|
|
122
122
|
|
|
123
|
-
# TRY 3 TIMES
|
|
124
123
|
entities = None
|
|
125
124
|
for attempt in [1, 2, 3]:
|
|
126
125
|
entities = self._extract(text, attempt)
|
|
@@ -129,7 +128,6 @@ class SUTRA:
|
|
|
129
128
|
if attempt < 3:
|
|
130
129
|
print(f" Retry {attempt+1}/3...")
|
|
131
130
|
|
|
132
|
-
# Create tables from entities
|
|
133
131
|
if entities and len(entities) > 0:
|
|
134
132
|
print(f"Extracted {len(entities)} entity types:")
|
|
135
133
|
for etype, recs in entities.items():
|
|
@@ -138,18 +136,16 @@ class SUTRA:
|
|
|
138
136
|
rec['id'] = idx
|
|
139
137
|
self._store(pd.DataFrame(recs), f"{name}_{etype}")
|
|
140
138
|
print(f" {etype}: {len(recs)} rows")
|
|
139
|
+
# After all tables are created, detect and store foreign key relationships
|
|
140
|
+
self._create_foreign_keys()
|
|
141
141
|
return
|
|
142
142
|
|
|
143
|
-
# REGEX FALLBACK - FIXED
|
|
144
143
|
print("Using regex fallback...")
|
|
145
144
|
people = []
|
|
146
145
|
emails = re.findall(r'[\w\.-]+@[\w\.-]+\.\w+', text)
|
|
147
|
-
|
|
148
|
-
# Extract names from common patterns
|
|
149
146
|
name_patterns = [
|
|
150
147
|
r'(?:Employee|Name|Mr\.|Mrs\.|Ms\.|Dr\.)\s*[:\-]?\s*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)',
|
|
151
148
|
r'([A-Z][a-z]+\s+[A-Z][a-z]+)\s+(?:lives|resides|works|is based)',
|
|
152
|
-
r'\*\*([A-Z][a-z]+\s+[A-Z][a-z]+)\*\*'
|
|
153
149
|
]
|
|
154
150
|
|
|
155
151
|
names = []
|
|
@@ -158,7 +154,6 @@ class SUTRA:
|
|
|
158
154
|
if len(names) >= len(emails):
|
|
159
155
|
break
|
|
160
156
|
|
|
161
|
-
# Match emails to names
|
|
162
157
|
max_people = min(len(emails), 50)
|
|
163
158
|
for i in range(max_people):
|
|
164
159
|
people.append({
|
|
@@ -169,55 +164,49 @@ class SUTRA:
|
|
|
169
164
|
|
|
170
165
|
if people:
|
|
171
166
|
self._store(pd.DataFrame(people), f"{name}_people")
|
|
172
|
-
print(f" Extracted {len(people)} people
|
|
167
|
+
print(f" Extracted {len(people)} people")
|
|
173
168
|
else:
|
|
174
|
-
# Absolute last resort
|
|
175
169
|
lines = [l.strip() for l in text.split('\n') if l.strip()][:100]
|
|
176
170
|
self._store(pd.DataFrame({'line': range(1, len(lines)+1), 'text': lines}), name)
|
|
177
171
|
|
|
178
172
|
def _extract(self, text: str, attempt: int) -> Dict:
|
|
179
|
-
"""Extract
|
|
173
|
+
"""Extract."""
|
|
180
174
|
if not self.client:
|
|
181
175
|
return {}
|
|
182
176
|
|
|
183
177
|
try:
|
|
184
178
|
if attempt == 1:
|
|
185
179
|
sys_msg = "Extract entities as JSON. Return ONLY valid JSON."
|
|
186
|
-
usr_msg = f"""Extract ALL entities
|
|
180
|
+
usr_msg = f"""Extract ALL entities.
|
|
187
181
|
|
|
188
182
|
Text:
|
|
189
183
|
{text[:15000]}
|
|
190
184
|
|
|
191
|
-
|
|
185
|
+
JSON with: people, skills, technologies, projects, certifications, education, work_experience
|
|
192
186
|
|
|
193
187
|
Example:
|
|
194
|
-
{{"people":[{{"id":1,"name":"Sarah
|
|
188
|
+
{{"people":[{{"id":1,"name":"Sarah","email":"s@co.com","city":"NYC","state":"NY"}}],"skills":[{{"id":1,"person_id":1,"skill_name":"Python"}}]}}
|
|
195
189
|
|
|
196
|
-
|
|
190
|
+
Unique IDs (1,2,3...), person_id links to people.id
|
|
197
191
|
|
|
198
192
|
JSON:"""
|
|
199
|
-
|
|
200
193
|
elif attempt == 2:
|
|
201
194
|
sys_msg = "Return JSON."
|
|
202
195
|
usr_msg = f"""Text: {text[:10000]}
|
|
203
196
|
|
|
204
|
-
Extract people
|
|
205
|
-
{{"people":[{{"id":1,"name":"...","email":"..."
|
|
197
|
+
Extract people:
|
|
198
|
+
{{"people":[{{"id":1,"name":"...","email":"..."}}]}}
|
|
206
199
|
|
|
207
200
|
JSON:"""
|
|
208
|
-
|
|
209
201
|
else:
|
|
210
|
-
sys_msg = "JSON
|
|
211
|
-
usr_msg = f"""
|
|
202
|
+
sys_msg = "JSON."
|
|
203
|
+
usr_msg = f"""Names/emails from: {text[:8000]}
|
|
212
204
|
|
|
213
|
-
{{"people":[{{"id":1,"name":"John","email":"
|
|
205
|
+
{{"people":[{{"id":1,"name":"John","email":"j@co.com"}}]}}"""
|
|
214
206
|
|
|
215
207
|
r = self.client.chat.completions.create(
|
|
216
208
|
model="gpt-4o-mini",
|
|
217
|
-
messages=[
|
|
218
|
-
{"role": "system", "content": sys_msg},
|
|
219
|
-
{"role": "user", "content": usr_msg}
|
|
220
|
-
],
|
|
209
|
+
messages=[{"role": "system", "content": sys_msg}, {"role": "user", "content": usr_msg}],
|
|
221
210
|
temperature=0,
|
|
222
211
|
max_tokens=12000
|
|
223
212
|
)
|
|
@@ -237,7 +226,6 @@ JSON:"""
|
|
|
237
226
|
has_data = any(isinstance(v, list) and len(v) > 0 for v in result.values())
|
|
238
227
|
if has_data:
|
|
239
228
|
return result
|
|
240
|
-
|
|
241
229
|
return {}
|
|
242
230
|
|
|
243
231
|
except Exception as e:
|
|
@@ -278,30 +266,86 @@ JSON:"""
|
|
|
278
266
|
self._refresh_schema()
|
|
279
267
|
print(f" {name}: {len(df)} rows")
|
|
280
268
|
|
|
269
|
+
def _create_foreign_keys(self, silent=False):
|
|
270
|
+
"""Detect foreign key relationships between tables by matching column naming patterns.
|
|
271
|
+
e.g., 'person_id' in work_experience -> 'id' in people table."""
|
|
272
|
+
tables = self._get_tables()
|
|
273
|
+
|
|
274
|
+
# Build a map of potential parent tables by looking for 'id' columns
|
|
275
|
+
# e.g., employee_data_people has 'id' -> can be referenced as person_id, people_id
|
|
276
|
+
parent_map = {} # Maps potential FK column names -> (parent_table, parent_pk)
|
|
277
|
+
for t in tables:
|
|
278
|
+
self.cursor.execute(f"PRAGMA table_info({t})")
|
|
279
|
+
cols = {r[1]: r[2] for r in self.cursor.fetchall()}
|
|
280
|
+
if 'id' in cols:
|
|
281
|
+
# Generate possible FK names from table name
|
|
282
|
+
# e.g., 'employee_data_people' -> 'person_id', 'people_id'
|
|
283
|
+
parts = t.split('_')
|
|
284
|
+
for part in parts:
|
|
285
|
+
# singular form guesses
|
|
286
|
+
fk_name = f"{part}_id"
|
|
287
|
+
parent_map[fk_name] = (t, 'id')
|
|
288
|
+
# Handle plural -> singular (people -> person)
|
|
289
|
+
if part.endswith('ies'):
|
|
290
|
+
parent_map[f"{part[:-3]}y_id"] = (t, 'id')
|
|
291
|
+
elif part.endswith('es'):
|
|
292
|
+
parent_map[f"{part[:-2]}_id"] = (t, 'id')
|
|
293
|
+
elif part.endswith('s'):
|
|
294
|
+
parent_map[f"{part[:-1]}_id"] = (t, 'id')
|
|
295
|
+
# Also try full table name as FK
|
|
296
|
+
parent_map[f"{t}_id"] = (t, 'id')
|
|
297
|
+
|
|
298
|
+
# Now scan all tables for columns matching FK patterns
|
|
299
|
+
self.foreign_keys = {} # table -> [(fk_col, parent_table, parent_col)]
|
|
300
|
+
for t in tables:
|
|
301
|
+
self.cursor.execute(f"PRAGMA table_info({t})")
|
|
302
|
+
cols = [r[1] for r in self.cursor.fetchall()]
|
|
303
|
+
fks = []
|
|
304
|
+
for col in cols:
|
|
305
|
+
if col in parent_map:
|
|
306
|
+
parent_table, parent_col = parent_map[col]
|
|
307
|
+
if parent_table != t: # Don't self-reference
|
|
308
|
+
fks.append((col, parent_table, parent_col))
|
|
309
|
+
if fks:
|
|
310
|
+
self.foreign_keys[t] = fks
|
|
311
|
+
|
|
312
|
+
if self.foreign_keys and not silent:
|
|
313
|
+
print(f"\n🔗 Detected relationships:")
|
|
314
|
+
for t, fks in self.foreign_keys.items():
|
|
315
|
+
for fk_col, parent_table, parent_col in fks:
|
|
316
|
+
print(f" {t}.{fk_col} → {parent_table}.{parent_col}")
|
|
317
|
+
|
|
281
318
|
def ask(self, q: str, viz: Union[bool, str] = False, table: Optional[str] = None) -> 'QueryResult':
|
|
282
|
-
"""
|
|
319
|
+
"""
|
|
320
|
+
Query - FIXED: Considers ALL tables, picks best one or joins multiple.
|
|
321
|
+
"""
|
|
283
322
|
if not self.client:
|
|
284
323
|
return QueryResult(False, "", pd.DataFrame(), None, "No API")
|
|
285
324
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
325
|
+
print(f"\nQuestion: {q}")
|
|
326
|
+
|
|
327
|
+
# Ensure foreign key relationships are detected
|
|
328
|
+
if not hasattr(self, 'foreign_keys') or not self.foreign_keys:
|
|
329
|
+
self._create_foreign_keys(silent=True)
|
|
330
|
+
|
|
331
|
+
# FIXED: If no table specified, let AI pick the right one(s)
|
|
332
|
+
if not table:
|
|
333
|
+
# Get ALL table schemas
|
|
334
|
+
all_schemas = {}
|
|
335
|
+
for tbl in self._get_tables():
|
|
336
|
+
all_schemas[tbl] = {
|
|
337
|
+
'columns': list(self.schema_info.get(tbl, {}).keys()),
|
|
338
|
+
'row_count': pd.read_sql_query(f"SELECT COUNT(*) FROM {tbl}", self.conn).iloc[0, 0]
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
# Let AI decide which table(s) to use
|
|
342
|
+
sql = self._gen_sql_smart(q, all_schemas)
|
|
343
|
+
else:
|
|
344
|
+
# Use specified table
|
|
345
|
+
sql = self._gen_sql(q, table)
|
|
294
346
|
|
|
295
347
|
if self.fuzzy_match:
|
|
296
|
-
q = self._fuzzy(q,
|
|
297
|
-
|
|
298
|
-
key = hashlib.md5(f"{q}:{t}".encode()).hexdigest()
|
|
299
|
-
if self.cache_queries and self.cache and key in self.cache:
|
|
300
|
-
sql = self.cache[key]
|
|
301
|
-
else:
|
|
302
|
-
sql = self._gen_sql(q, t)
|
|
303
|
-
if self.cache_queries and self.cache:
|
|
304
|
-
self.cache[key] = sql
|
|
348
|
+
q = self._fuzzy(q, table or self._get_tables()[0])
|
|
305
349
|
|
|
306
350
|
print(f"SQL: {sql}")
|
|
307
351
|
|
|
@@ -312,12 +356,111 @@ JSON:"""
|
|
|
312
356
|
r = QueryResult(True, sql, df, fig)
|
|
313
357
|
|
|
314
358
|
if self.use_embeddings and self.embedding_model:
|
|
315
|
-
self._store_cache(q,
|
|
359
|
+
self._store_cache(q, table or "all", r)
|
|
316
360
|
|
|
317
361
|
return r
|
|
318
362
|
except Exception as e:
|
|
363
|
+
print(f"Error: {e}")
|
|
319
364
|
return QueryResult(False, sql, pd.DataFrame(), None, str(e))
|
|
320
365
|
|
|
366
|
+
def _get_relationship_context(self) -> str:
|
|
367
|
+
"""Build a clear relationship context string for the AI prompt."""
|
|
368
|
+
if not hasattr(self, 'foreign_keys') or not self.foreign_keys:
|
|
369
|
+
# Try to detect relationships if not already done
|
|
370
|
+
self._create_foreign_keys(silent=True)
|
|
371
|
+
|
|
372
|
+
if not hasattr(self, 'foreign_keys') or not self.foreign_keys:
|
|
373
|
+
return ""
|
|
374
|
+
|
|
375
|
+
lines = ["\n=== TABLE RELATIONSHIPS (FOREIGN KEYS) ==="]
|
|
376
|
+
lines.append("Use these to JOIN tables when a question needs data from multiple tables:")
|
|
377
|
+
for t, fks in self.foreign_keys.items():
|
|
378
|
+
for fk_col, parent_table, parent_col in fks:
|
|
379
|
+
lines.append(f" {t}.{fk_col} → {parent_table}.{parent_col}")
|
|
380
|
+
lines.append(f" JOIN syntax: JOIN {parent_table} ON {t}.{fk_col} = {parent_table}.{parent_col}")
|
|
381
|
+
lines.append("=" * 50)
|
|
382
|
+
return "\n".join(lines)
|
|
383
|
+
|
|
384
|
+
def _gen_sql_smart(self, q: str, all_schemas: Dict) -> str:
|
|
385
|
+
"""
|
|
386
|
+
FIXED: Generate SQL considering ALL tables and their relationships.
|
|
387
|
+
"""
|
|
388
|
+
# Build context with ALL tables
|
|
389
|
+
schema_context = "Database has these tables:\n"
|
|
390
|
+
for tbl, info in all_schemas.items():
|
|
391
|
+
schema_context += f"\n{tbl} ({info['row_count']} rows):\n"
|
|
392
|
+
schema_context += f" Columns: {', '.join(info['columns'])}\n"
|
|
393
|
+
|
|
394
|
+
# Add relationship context
|
|
395
|
+
relationship_context = self._get_relationship_context()
|
|
396
|
+
|
|
397
|
+
# Add sample data from ALL tables (not just first 3)
|
|
398
|
+
samples = ""
|
|
399
|
+
for tbl in list(all_schemas.keys())[:6]: # Show more tables
|
|
400
|
+
try:
|
|
401
|
+
sample_df = pd.read_sql_query(f"SELECT * FROM {tbl} LIMIT 2", self.conn)
|
|
402
|
+
samples += f"\nSample from {tbl}:\n{sample_df.to_string(index=False)}\n"
|
|
403
|
+
except:
|
|
404
|
+
pass
|
|
405
|
+
|
|
406
|
+
prompt = f"""You are an expert SQL query generator.
|
|
407
|
+
|
|
408
|
+
{schema_context}
|
|
409
|
+
{relationship_context}
|
|
410
|
+
{samples}
|
|
411
|
+
|
|
412
|
+
Question: {q}
|
|
413
|
+
|
|
414
|
+
CRITICAL INSTRUCTIONS - FOLLOW THESE STEPS:
|
|
415
|
+
|
|
416
|
+
STEP 1: READ THE TABLE RELATIONSHIPS SECTION ABOVE.
|
|
417
|
+
Those show you exactly how tables connect via foreign keys.
|
|
418
|
+
|
|
419
|
+
STEP 2: IDENTIFY WHICH TABLES HAVE THE DATA NEEDED.
|
|
420
|
+
- Person info (name, email, city, state) → look in *_people table
|
|
421
|
+
- Work info (company, position, start_date) → look in *_work_experience table
|
|
422
|
+
- Skills, education, etc. → look in their respective tables
|
|
423
|
+
|
|
424
|
+
STEP 3: IF THE QUESTION NEEDS DATA FROM MULTIPLE TABLES, YOU MUST USE JOIN.
|
|
425
|
+
Use the foreign key relationships shown above.
|
|
426
|
+
Example: If work_experience has person_id and people has id:
|
|
427
|
+
JOIN people ON work_experience.person_id = people.id
|
|
428
|
+
|
|
429
|
+
STEP 4: WRITE THE QUERY.
|
|
430
|
+
- Use table aliases for readability
|
|
431
|
+
- Qualify ALL column names with table alias to avoid ambiguity
|
|
432
|
+
- For "who" / "which person" questions, ALWAYS join to the people table to get names
|
|
433
|
+
- For "from <state>" or "in <city>" questions, the location is in the people table, JOIN to it
|
|
434
|
+
- For "count by state" or "group by state", the state column is in the people table, JOIN to it
|
|
435
|
+
|
|
436
|
+
EXAMPLES:
|
|
437
|
+
❌ WRONG: SELECT COUNT(*) FROM work_experience GROUP BY company
|
|
438
|
+
(when asked "count by state" - state is NOT in work_experience!)
|
|
439
|
+
|
|
440
|
+
✅ CORRECT: SELECT p.state, COUNT(*) as employee_count
|
|
441
|
+
FROM work_experience w
|
|
442
|
+
JOIN people p ON w.person_id = p.id
|
|
443
|
+
GROUP BY p.state
|
|
444
|
+
|
|
445
|
+
❌ WRONG: SELECT * FROM work_experience WHERE company LIKE '%FL%'
|
|
446
|
+
(when asked "how many from FL" - FL is a state, not a company!)
|
|
447
|
+
|
|
448
|
+
✅ CORRECT: SELECT COUNT(*) as count
|
|
449
|
+
FROM people p
|
|
450
|
+
WHERE p.state = 'FL'
|
|
451
|
+
|
|
452
|
+
Return ONLY the executable SQL query. No explanations, no markdown, no code blocks:"""
|
|
453
|
+
|
|
454
|
+
r = self.client.chat.completions.create(
|
|
455
|
+
model="gpt-4o-mini",
|
|
456
|
+
messages=[
|
|
457
|
+
{"role": "system", "content": "You are an expert SQL query generator. ALWAYS use JOIN when data is spread across multiple tables. ALWAYS check which table a column belongs to before using it. State, city, name are typically in people tables. Position, company are in work_experience tables. Return ONLY executable SQL."},
|
|
458
|
+
{"role": "user", "content": prompt}
|
|
459
|
+
],
|
|
460
|
+
temperature=0
|
|
461
|
+
)
|
|
462
|
+
return r.choices[0].message.content.strip().replace("```sql", "").replace("```", "").strip()
|
|
463
|
+
|
|
321
464
|
def _fuzzy(self, q: str, t: str) -> str:
|
|
322
465
|
"""Fuzzy."""
|
|
323
466
|
try:
|
|
@@ -506,7 +649,7 @@ JSON:"""
|
|
|
506
649
|
return instance
|
|
507
650
|
|
|
508
651
|
def _gen_sql(self, q: str, t: str) -> str:
|
|
509
|
-
"""SQL."""
|
|
652
|
+
"""SQL for single table."""
|
|
510
653
|
schema = self.schema_info.get(t, {})
|
|
511
654
|
sample = pd.read_sql_query(f"SELECT * FROM {t} LIMIT 3", self.conn).to_string(index=False)
|
|
512
655
|
cols = ", ".join([f"{c} ({d})" for c, d in schema.items()])
|
|
@@ -527,7 +670,7 @@ JSON:"""
|
|
|
527
670
|
return [r[0] for r in self.cursor.fetchall()]
|
|
528
671
|
|
|
529
672
|
def _refresh_schema(self):
|
|
530
|
-
"""Refresh."""
|
|
673
|
+
"""Refresh schema info."""
|
|
531
674
|
self.schema_info = {}
|
|
532
675
|
for t in self._get_tables():
|
|
533
676
|
self.cursor.execute(f"PRAGMA table_info({t})")
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
querysutra-0.5.2.dist-info/licenses/LICENSE,sha256=F-4b93u0OVrVwGXgMwBRq6MlGyUT9zmre1oh5Gft5Ts,1066
|
|
2
|
-
sutra/__init__.py,sha256=25HUMETpmA1tlMl5j-ajdo9MRXljSZBrirSTH7w7jIc,118
|
|
3
|
-
sutra/cache_manager.py,sha256=e0AAeUqoR-aiqzZ3fB-IDvpJ4JA6-YBFyRJxusEnIrA,3082
|
|
4
|
-
sutra/clear_cache.py,sha256=rVIz29p7V11Uh6oHXeaWpFtYXXv-2OED91cHMAWWxtQ,187
|
|
5
|
-
sutra/core.py,sha256=R_JbOlZTukegP92Dr-WLsdr632_otFN7o9qSvcxyBtw,10497
|
|
6
|
-
sutra/data_loader.py,sha256=_yPj-DS2qYtlCgaMACQtfXZfSuAdVVd4igNP7yzXolc,5781
|
|
7
|
-
sutra/database_manager.py,sha256=L-QC_WwR3Pnl1BRh0rnEv5MNSTr4C7ZP-hIPfCHRK88,7672
|
|
8
|
-
sutra/direct_query.py,sha256=X69I646zHIZlZjMmgn8O2xLS_7ww7miAkABTnJEPAAc,2724
|
|
9
|
-
sutra/feedback.py,sha256=PHSffU_rfORjLkTW3-j2VSjQdw4ufROsTeBWaX6DZ00,1642
|
|
10
|
-
sutra/feedback_matcher.py,sha256=WXYpGtFJnOyYQOzy-z8uBiUWH5vyJJOMS1NwEYzNfic,2865
|
|
11
|
-
sutra/nlp_processor.py,sha256=wMS1hz1aGWjSwPUD7lSNBbQapFtLgF2l65j0QKXQOd0,5461
|
|
12
|
-
sutra/schema_embeddings.py,sha256=bVPzpJOdYTyUdG2k3ZdgYJLrX2opHBx68RIjJcMlueo,9732
|
|
13
|
-
sutra/schema_generator.py,sha256=BX_vXmnvSGc6nCBx40WLSoNL3WIYPDahd1cEYloyY4M,1925
|
|
14
|
-
sutra/sutra.py,sha256=XgNCY8QPOod0-ymt6R50JMaHJetyfTsElzyvNHpYStw,20664
|
|
15
|
-
sutra/sutra_client.py,sha256=PYYDGqVbA9pB-Zcsm52i9KarwijCIGVZOThgONZP6Vs,14203
|
|
16
|
-
sutra/sutra_core.py,sha256=diaWOXUHn1wrqCQrBhLKL612tMQioaqx-ILc3y9-CqM,11708
|
|
17
|
-
sutra/sutra_simple.py,sha256=rnqzG7OAt4p64XtO0peMqHS1pG5tdA8U3EYTMVsq7BE,23201
|
|
18
|
-
sutra/visualizer.py,sha256=YOKTmjQcY72smmx9KsZrQTdbAiE5GQDKofMFjpLIUfI,6996
|
|
19
|
-
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
-
tests/test_modules.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
-
tests/test_sutra.py,sha256=6Z4SoIuBzza101304I7plkyPVkUBbjIxR8uPs9z5ntg,2383
|
|
22
|
-
utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
|
-
utils/file_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
-
utils/text_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
-
querysutra-0.5.2.dist-info/METADATA,sha256=8brpcR8UxQwuz28hi8oUL8F5Dfug5AcFk_SdReJlWd0,7258
|
|
26
|
-
querysutra-0.5.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
27
|
-
querysutra-0.5.2.dist-info/top_level.txt,sha256=9v0buw21eo5LaUU_3Cf9b9MqRyEvtM9cHaOuEXUKVqM,18
|
|
28
|
-
querysutra-0.5.2.dist-info/RECORD,,
|
tests/__init__.py
DELETED
|
File without changes
|
tests/test_modules.py
DELETED
|
File without changes
|
tests/test_sutra.py
DELETED
|
@@ -1,76 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Test suite for SUTRA library
|
|
3
|
-
Run with: pytest test_sutra.py
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import pytest
|
|
7
|
-
import pandas as pd
|
|
8
|
-
import os
|
|
9
|
-
from sutra import SutraClient
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class TestSutraClient:
|
|
13
|
-
"""Test cases for SutraClient"""
|
|
14
|
-
|
|
15
|
-
@pytest.fixture
|
|
16
|
-
def client(self):
|
|
17
|
-
"""Create a test client"""
|
|
18
|
-
# Use a test database
|
|
19
|
-
api_key = os.getenv('OPENAI_API_KEY', 'test-key')
|
|
20
|
-
client = SutraClient(api_key=api_key, db_path="test_db.db")
|
|
21
|
-
yield client
|
|
22
|
-
# Cleanup
|
|
23
|
-
client.close()
|
|
24
|
-
if os.path.exists("test_db.db"):
|
|
25
|
-
os.remove("test_db.db")
|
|
26
|
-
|
|
27
|
-
@pytest.fixture
|
|
28
|
-
def sample_data(self):
|
|
29
|
-
"""Create sample DataFrame"""
|
|
30
|
-
return pd.DataFrame({
|
|
31
|
-
'name': ['Alice', 'Bob', 'Charlie'],
|
|
32
|
-
'age': [25, 30, 35],
|
|
33
|
-
'city': ['New York', 'London', 'Paris']
|
|
34
|
-
})
|
|
35
|
-
|
|
36
|
-
def test_client_initialization(self, client):
|
|
37
|
-
"""Test client can be initialized"""
|
|
38
|
-
assert client is not None
|
|
39
|
-
assert client.db_path == "test_db.db"
|
|
40
|
-
|
|
41
|
-
def test_upload_dataframe(self, client, sample_data):
|
|
42
|
-
"""Test uploading a DataFrame"""
|
|
43
|
-
result = client.upload_dataframe(sample_data, "test_table")
|
|
44
|
-
assert result['status'] == 'success'
|
|
45
|
-
assert result['table_name'] == 'test_table'
|
|
46
|
-
assert result['rows_inserted'] == 3
|
|
47
|
-
|
|
48
|
-
def test_list_tables(self, client, sample_data):
|
|
49
|
-
"""Test listing tables"""
|
|
50
|
-
client.upload_dataframe(sample_data, "test_table")
|
|
51
|
-
tables = client.list_tables()
|
|
52
|
-
assert 'test_table' in tables
|
|
53
|
-
|
|
54
|
-
def test_execute_sql(self, client, sample_data):
|
|
55
|
-
"""Test direct SQL execution"""
|
|
56
|
-
client.upload_dataframe(sample_data, "test_table")
|
|
57
|
-
result = client.execute_sql("SELECT * FROM test_table")
|
|
58
|
-
assert result['status'] == 'success'
|
|
59
|
-
assert len(result['results']) == 3
|
|
60
|
-
|
|
61
|
-
def test_get_table_info(self, client, sample_data):
|
|
62
|
-
"""Test getting table information"""
|
|
63
|
-
client.upload_dataframe(sample_data, "test_table")
|
|
64
|
-
info = client.get_table_info("test_table")
|
|
65
|
-
assert info['table_name'] == 'test_table'
|
|
66
|
-
assert len(info['columns']) > 0
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def test_import():
|
|
70
|
-
"""Test that the library can be imported"""
|
|
71
|
-
from sutra import SutraClient
|
|
72
|
-
assert SutraClient is not None
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
if __name__ == "__main__":
|
|
76
|
-
pytest.main([__file__, "-v"])
|
utils/__init__.py
DELETED
|
File without changes
|
utils/file_utils.py
DELETED
|
File without changes
|
utils/text_utils.py
DELETED
|
File without changes
|
|
File without changes
|