QuerySUTRA 0.4.4__tar.gz → 0.4.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- querysutra-0.4.6/PKG-INFO +289 -0
- querysutra-0.4.6/QuerySUTRA.egg-info/PKG-INFO +289 -0
- querysutra-0.4.6/README.md +256 -0
- querysutra-0.4.6/pyproject.toml +17 -0
- querysutra-0.4.6/setup.py +4 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/__init__.py +2 -2
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/sutra.py +108 -141
- querysutra-0.4.4/PKG-INFO +0 -441
- querysutra-0.4.4/QuerySUTRA.egg-info/PKG-INFO +0 -441
- querysutra-0.4.4/README.md +0 -408
- querysutra-0.4.4/pyproject.toml +0 -17
- querysutra-0.4.4/setup.py +0 -4
- {querysutra-0.4.4 → querysutra-0.4.6}/LICENSE +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/MANIFEST.in +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/QuerySUTRA.egg-info/SOURCES.txt +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/QuerySUTRA.egg-info/dependency_links.txt +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/QuerySUTRA.egg-info/requires.txt +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/QuerySUTRA.egg-info/top_level.txt +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/examples/quickstart.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/examples/sutra_usage_guide.ipynb +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/examples/usage_guide.ipynb +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/requirements.txt +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/setup.cfg +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/cache_manager.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/clear_cache.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/core.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/data_loader.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/database_manager.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/direct_query.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/feedback.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/feedback_matcher.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/nlp_processor.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/schema_embeddings.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/schema_generator.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/sutra_client.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/sutra_core.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/sutra_simple.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/sutra/visualizer.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/tests/__init__.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/tests/test_modules.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/tests/test_sutra.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/utils/__init__.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/utils/file_utils.py +0 -0
- {querysutra-0.4.4 → querysutra-0.4.6}/utils/text_utils.py +0 -0
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: QuerySUTRA
|
|
3
|
+
Version: 0.4.6
|
|
4
|
+
Summary: SUTRA
|
|
5
|
+
Author: Aditya Batta
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.8
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: pandas>=1.3.0
|
|
11
|
+
Requires-Dist: numpy>=1.21.0
|
|
12
|
+
Requires-Dist: openai>=1.0.0
|
|
13
|
+
Requires-Dist: plotly>=5.0.0
|
|
14
|
+
Requires-Dist: matplotlib>=3.3.0
|
|
15
|
+
Requires-Dist: PyPDF2>=3.0.0
|
|
16
|
+
Requires-Dist: python-docx>=0.8.11
|
|
17
|
+
Requires-Dist: openpyxl>=3.0.0
|
|
18
|
+
Provides-Extra: mysql
|
|
19
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "mysql"
|
|
20
|
+
Requires-Dist: mysql-connector-python>=8.0.0; extra == "mysql"
|
|
21
|
+
Provides-Extra: postgres
|
|
22
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "postgres"
|
|
23
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
|
|
24
|
+
Provides-Extra: embeddings
|
|
25
|
+
Requires-Dist: sentence-transformers>=2.0.0; extra == "embeddings"
|
|
26
|
+
Provides-Extra: all
|
|
27
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "all"
|
|
28
|
+
Requires-Dist: mysql-connector-python>=8.0.0; extra == "all"
|
|
29
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "all"
|
|
30
|
+
Requires-Dist: sentence-transformers>=2.0.0; extra == "all"
|
|
31
|
+
Dynamic: license-file
|
|
32
|
+
Dynamic: requires-python
|
|
33
|
+
|
|
34
|
+
# QuerySUTRA
|
|
35
|
+
|
|
36
|
+
**SUTRA: Structured-Unstructured-Text-Retrieval-Architecture**
|
|
37
|
+
|
|
38
|
+
AI-powered data analysis. Upload any data (PDF, Word, Text, CSV, Excel), query with natural language, export to MySQL.
|
|
39
|
+
|
|
40
|
+
## Installation
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install QuerySUTRA
|
|
44
|
+
pip install QuerySUTRA[mysql] # MySQL support
|
|
45
|
+
pip install QuerySUTRA[embeddings] # Smart caching
|
|
46
|
+
pip install QuerySUTRA[all] # All features
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Quick Start
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from sutra import SUTRA
|
|
53
|
+
|
|
54
|
+
sutra = SUTRA(api_key="your-openai-key")
|
|
55
|
+
sutra.upload("data.pdf") # or .docx, .txt, .csv, .xlsx, .json
|
|
56
|
+
result = sutra.ask("Show me all people")
|
|
57
|
+
print(result.data)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Supported Formats
|
|
61
|
+
|
|
62
|
+
**Structured Data:**
|
|
63
|
+
- CSV (.csv)
|
|
64
|
+
- Excel (.xlsx, .xls)
|
|
65
|
+
- JSON (.json)
|
|
66
|
+
- SQL (.sql)
|
|
67
|
+
- Pandas DataFrame
|
|
68
|
+
|
|
69
|
+
**Unstructured Documents (AI Extraction):**
|
|
70
|
+
- PDF (.pdf)
|
|
71
|
+
- Word (.docx)
|
|
72
|
+
- Text (.txt)
|
|
73
|
+
|
|
74
|
+
## Core Features
|
|
75
|
+
|
|
76
|
+
### 1. Upload Any Data Format
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
# Structured data
|
|
80
|
+
sutra.upload("sales.csv")
|
|
81
|
+
sutra.upload("report.xlsx")
|
|
82
|
+
sutra.upload("api_data.json")
|
|
83
|
+
sutra.upload("dump.sql")
|
|
84
|
+
|
|
85
|
+
# Unstructured documents (AI extracts entities)
|
|
86
|
+
sutra.upload("resume.pdf")
|
|
87
|
+
sutra.upload("meeting_notes.docx")
|
|
88
|
+
sutra.upload("transcript.txt")
|
|
89
|
+
|
|
90
|
+
# DataFrame
|
|
91
|
+
import pandas as pd
|
|
92
|
+
df = pd.DataFrame({'name': ['Alice'], 'score': [95]})
|
|
93
|
+
sutra.upload(df, name="scores")
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### 2. Complete Data Extraction
|
|
97
|
+
|
|
98
|
+
Processes entire documents in chunks. No data loss.
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
# PDF - Extracts ALL pages
|
|
102
|
+
sutra.upload("50_page_report.pdf") # Gets all 50 pages, all employees
|
|
103
|
+
|
|
104
|
+
# Word - Extracts ALL content
|
|
105
|
+
sutra.upload("large_document.docx") # Full document processed
|
|
106
|
+
|
|
107
|
+
# Text - Processes ALL lines
|
|
108
|
+
sutra.upload("log_file.txt") # Entire file analyzed
|
|
109
|
+
|
|
110
|
+
# All create multiple related tables
|
|
111
|
+
sutra.tables()
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### 3. Automatic MySQL Export
|
|
115
|
+
|
|
116
|
+
One-line upload and export. Database auto-created.
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
sutra.upload("data.pdf", auto_export_mysql={
|
|
120
|
+
'host': 'localhost',
|
|
121
|
+
'user': 'root',
|
|
122
|
+
'password': 'your_password',
|
|
123
|
+
'database': 'my_database' # Auto-creates if not exists
|
|
124
|
+
})
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### 4. Natural Language Queries
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
result = sutra.ask("Show all people from California")
|
|
131
|
+
result = sutra.ask("Who has Python skills?", table="skills")
|
|
132
|
+
result = sutra.ask("Count employees by state", viz="pie")
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### 5. Custom Visualizations
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
result = sutra.ask("Sales by region", viz="pie") # Pie chart
|
|
139
|
+
result = sutra.ask("Trends over time", viz="line") # Line chart
|
|
140
|
+
result = sutra.ask("Compare values", viz="bar") # Bar chart
|
|
141
|
+
result = sutra.ask("Correlations", viz="scatter") # Scatter
|
|
142
|
+
result = sutra.ask("Show table", viz="table") # Table
|
|
143
|
+
result = sutra.ask("Heatmap", viz="heatmap") # Heatmap
|
|
144
|
+
result = sutra.ask("Auto", viz=True) # Auto-detect
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### 6. Load Existing Databases
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
# Load SQLite
|
|
151
|
+
sutra = SUTRA.load_from_db("data.db", api_key="key")
|
|
152
|
+
|
|
153
|
+
# Connect to MySQL
|
|
154
|
+
sutra = SUTRA.connect_mysql("localhost", "root", "pass", "database")
|
|
155
|
+
|
|
156
|
+
# Connect to PostgreSQL
|
|
157
|
+
sutra = SUTRA.connect_postgres("localhost", "postgres", "pass", "database")
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### 7. Fuzzy Matching
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
sutra = SUTRA(api_key="key", fuzzy_match=True)
|
|
164
|
+
|
|
165
|
+
# "New York City" matches "New York" automatically
|
|
166
|
+
result = sutra.ask("Who are from New York City?")
|
|
167
|
+
# Fuzzy: 'City' -> 'New York'
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Uses `difflib.get_close_matches` with 60% threshold.
|
|
171
|
+
|
|
172
|
+
### 8. Embeddings for Smart Caching
|
|
173
|
+
|
|
174
|
+
Save 90% on API costs.
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
sutra = SUTRA(api_key="key", use_embeddings=True)
|
|
178
|
+
|
|
179
|
+
result = sutra.ask("Show sales") # API call
|
|
180
|
+
result = sutra.ask("Display sales data") # Cached (92% similar)
|
|
181
|
+
result = sutra.ask("Give me sales info") # Cached (88% similar)
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
**How it works:**
|
|
185
|
+
- Model: `all-MiniLM-L6-v2` (80MB, runs locally)
|
|
186
|
+
- Converts queries to 384D vectors
|
|
187
|
+
- 85% similarity threshold
|
|
188
|
+
- No external API calls
|
|
189
|
+
|
|
190
|
+
**Cost savings:**
|
|
191
|
+
- 10 similar queries: 1 API call vs 10 = 90% savings
|
|
192
|
+
|
|
193
|
+
### 9. Irrelevant Query Detection
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
sutra = SUTRA(api_key="key", check_relevance=True)
|
|
197
|
+
|
|
198
|
+
result = sutra.ask("What's the weather?")
|
|
199
|
+
# Warning: Query may be irrelevant
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### 10. Direct SQL
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
result = sutra.sql("SELECT * FROM people WHERE state='CA'")
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
## Complete Example
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
from sutra import SUTRA
|
|
212
|
+
|
|
213
|
+
# Initialize with all features
|
|
214
|
+
sutra = SUTRA(
|
|
215
|
+
api_key="your-key",
|
|
216
|
+
use_embeddings=True,
|
|
217
|
+
fuzzy_match=True,
|
|
218
|
+
check_relevance=True
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Upload any format
|
|
222
|
+
sutra.upload("employees.pdf") # PDF
|
|
223
|
+
sutra.upload("skills.docx") # Word
|
|
224
|
+
sutra.upload("projects.txt") # Text
|
|
225
|
+
sutra.upload("sales.csv") # CSV
|
|
226
|
+
sutra.upload("budget.xlsx") # Excel
|
|
227
|
+
|
|
228
|
+
# View tables
|
|
229
|
+
sutra.tables()
|
|
230
|
+
|
|
231
|
+
# Query
|
|
232
|
+
result = sutra.ask("Show all people", viz="bar")
|
|
233
|
+
|
|
234
|
+
# Export to MySQL
|
|
235
|
+
sutra.save_to_mysql("localhost", "root", "pass", "my_db")
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## Import to MySQL Workflow
|
|
239
|
+
|
|
240
|
+
**Colab:**
|
|
241
|
+
```python
|
|
242
|
+
sutra.upload("data.pdf")
|
|
243
|
+
sutra.export_db("data.db", "sqlite")
|
|
244
|
+
from google.colab import files
|
|
245
|
+
files.download("data.db")
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
**Windows:**
|
|
249
|
+
```python
|
|
250
|
+
sutra = SUTRA.load_from_db("data.db", api_key="key")
|
|
251
|
+
sutra.save_to_mysql("localhost", "root", "pass", "my_db")
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
## Export Options
|
|
255
|
+
|
|
256
|
+
```python
|
|
257
|
+
sutra.export_db("backup.db", "sqlite")
|
|
258
|
+
sutra.export_db("schema.sql", "sql")
|
|
259
|
+
sutra.export_db("data.json", "json")
|
|
260
|
+
sutra.export_db("data.xlsx", "excel")
|
|
261
|
+
sutra.save_to_mysql("localhost", "root", "pass", "db")
|
|
262
|
+
sutra.save_to_postgres("localhost", "postgres", "pass", "db")
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
## API Reference
|
|
266
|
+
|
|
267
|
+
**Methods**
|
|
268
|
+
- `upload(data, name, auto_export_mysql)` - Upload any format
|
|
269
|
+
- `ask(question, viz, table)` - Natural language query
|
|
270
|
+
- `sql(query, viz)` - Direct SQL
|
|
271
|
+
- `tables()` - List tables
|
|
272
|
+
- `schema()` - Show schema
|
|
273
|
+
- `peek(table, n)` - Preview
|
|
274
|
+
- `save_to_mysql(...)` - Export MySQL (auto-creates DB)
|
|
275
|
+
- `export_db(path, format)` - Export database
|
|
276
|
+
- `load_from_db(path)` - Load SQLite
|
|
277
|
+
- `connect_mysql(...)` - Connect MySQL
|
|
278
|
+
|
|
279
|
+
## Requirements
|
|
280
|
+
|
|
281
|
+
Python 3.8+, OpenAI API key
|
|
282
|
+
|
|
283
|
+
## License
|
|
284
|
+
|
|
285
|
+
MIT
|
|
286
|
+
|
|
287
|
+
---
|
|
288
|
+
|
|
289
|
+
**Made by Aditya Batta**
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: QuerySUTRA
|
|
3
|
+
Version: 0.4.6
|
|
4
|
+
Summary: SUTRA
|
|
5
|
+
Author: Aditya Batta
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.8
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: pandas>=1.3.0
|
|
11
|
+
Requires-Dist: numpy>=1.21.0
|
|
12
|
+
Requires-Dist: openai>=1.0.0
|
|
13
|
+
Requires-Dist: plotly>=5.0.0
|
|
14
|
+
Requires-Dist: matplotlib>=3.3.0
|
|
15
|
+
Requires-Dist: PyPDF2>=3.0.0
|
|
16
|
+
Requires-Dist: python-docx>=0.8.11
|
|
17
|
+
Requires-Dist: openpyxl>=3.0.0
|
|
18
|
+
Provides-Extra: mysql
|
|
19
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "mysql"
|
|
20
|
+
Requires-Dist: mysql-connector-python>=8.0.0; extra == "mysql"
|
|
21
|
+
Provides-Extra: postgres
|
|
22
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "postgres"
|
|
23
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
|
|
24
|
+
Provides-Extra: embeddings
|
|
25
|
+
Requires-Dist: sentence-transformers>=2.0.0; extra == "embeddings"
|
|
26
|
+
Provides-Extra: all
|
|
27
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "all"
|
|
28
|
+
Requires-Dist: mysql-connector-python>=8.0.0; extra == "all"
|
|
29
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "all"
|
|
30
|
+
Requires-Dist: sentence-transformers>=2.0.0; extra == "all"
|
|
31
|
+
Dynamic: license-file
|
|
32
|
+
Dynamic: requires-python
|
|
33
|
+
|
|
34
|
+
# QuerySUTRA
|
|
35
|
+
|
|
36
|
+
**SUTRA: Structured-Unstructured-Text-Retrieval-Architecture**
|
|
37
|
+
|
|
38
|
+
AI-powered data analysis. Upload any data (PDF, Word, Text, CSV, Excel), query with natural language, export to MySQL.
|
|
39
|
+
|
|
40
|
+
## Installation
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install QuerySUTRA
|
|
44
|
+
pip install QuerySUTRA[mysql] # MySQL support
|
|
45
|
+
pip install QuerySUTRA[embeddings] # Smart caching
|
|
46
|
+
pip install QuerySUTRA[all] # All features
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Quick Start
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from sutra import SUTRA
|
|
53
|
+
|
|
54
|
+
sutra = SUTRA(api_key="your-openai-key")
|
|
55
|
+
sutra.upload("data.pdf") # or .docx, .txt, .csv, .xlsx, .json
|
|
56
|
+
result = sutra.ask("Show me all people")
|
|
57
|
+
print(result.data)
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Supported Formats
|
|
61
|
+
|
|
62
|
+
**Structured Data:**
|
|
63
|
+
- CSV (.csv)
|
|
64
|
+
- Excel (.xlsx, .xls)
|
|
65
|
+
- JSON (.json)
|
|
66
|
+
- SQL (.sql)
|
|
67
|
+
- Pandas DataFrame
|
|
68
|
+
|
|
69
|
+
**Unstructured Documents (AI Extraction):**
|
|
70
|
+
- PDF (.pdf)
|
|
71
|
+
- Word (.docx)
|
|
72
|
+
- Text (.txt)
|
|
73
|
+
|
|
74
|
+
## Core Features
|
|
75
|
+
|
|
76
|
+
### 1. Upload Any Data Format
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
# Structured data
|
|
80
|
+
sutra.upload("sales.csv")
|
|
81
|
+
sutra.upload("report.xlsx")
|
|
82
|
+
sutra.upload("api_data.json")
|
|
83
|
+
sutra.upload("dump.sql")
|
|
84
|
+
|
|
85
|
+
# Unstructured documents (AI extracts entities)
|
|
86
|
+
sutra.upload("resume.pdf")
|
|
87
|
+
sutra.upload("meeting_notes.docx")
|
|
88
|
+
sutra.upload("transcript.txt")
|
|
89
|
+
|
|
90
|
+
# DataFrame
|
|
91
|
+
import pandas as pd
|
|
92
|
+
df = pd.DataFrame({'name': ['Alice'], 'score': [95]})
|
|
93
|
+
sutra.upload(df, name="scores")
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### 2. Complete Data Extraction
|
|
97
|
+
|
|
98
|
+
Processes entire documents in chunks. No data loss.
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
# PDF - Extracts ALL pages
|
|
102
|
+
sutra.upload("50_page_report.pdf") # Gets all 50 pages, all employees
|
|
103
|
+
|
|
104
|
+
# Word - Extracts ALL content
|
|
105
|
+
sutra.upload("large_document.docx") # Full document processed
|
|
106
|
+
|
|
107
|
+
# Text - Processes ALL lines
|
|
108
|
+
sutra.upload("log_file.txt") # Entire file analyzed
|
|
109
|
+
|
|
110
|
+
# All create multiple related tables
|
|
111
|
+
sutra.tables()
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### 3. Automatic MySQL Export
|
|
115
|
+
|
|
116
|
+
One-line upload and export. Database auto-created.
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
sutra.upload("data.pdf", auto_export_mysql={
|
|
120
|
+
'host': 'localhost',
|
|
121
|
+
'user': 'root',
|
|
122
|
+
'password': 'your_password',
|
|
123
|
+
'database': 'my_database' # Auto-creates if not exists
|
|
124
|
+
})
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### 4. Natural Language Queries
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
result = sutra.ask("Show all people from California")
|
|
131
|
+
result = sutra.ask("Who has Python skills?", table="skills")
|
|
132
|
+
result = sutra.ask("Count employees by state", viz="pie")
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### 5. Custom Visualizations
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
result = sutra.ask("Sales by region", viz="pie") # Pie chart
|
|
139
|
+
result = sutra.ask("Trends over time", viz="line") # Line chart
|
|
140
|
+
result = sutra.ask("Compare values", viz="bar") # Bar chart
|
|
141
|
+
result = sutra.ask("Correlations", viz="scatter") # Scatter
|
|
142
|
+
result = sutra.ask("Show table", viz="table") # Table
|
|
143
|
+
result = sutra.ask("Heatmap", viz="heatmap") # Heatmap
|
|
144
|
+
result = sutra.ask("Auto", viz=True) # Auto-detect
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### 6. Load Existing Databases
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
# Load SQLite
|
|
151
|
+
sutra = SUTRA.load_from_db("data.db", api_key="key")
|
|
152
|
+
|
|
153
|
+
# Connect to MySQL
|
|
154
|
+
sutra = SUTRA.connect_mysql("localhost", "root", "pass", "database")
|
|
155
|
+
|
|
156
|
+
# Connect to PostgreSQL
|
|
157
|
+
sutra = SUTRA.connect_postgres("localhost", "postgres", "pass", "database")
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### 7. Fuzzy Matching
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
sutra = SUTRA(api_key="key", fuzzy_match=True)
|
|
164
|
+
|
|
165
|
+
# "New York City" matches "New York" automatically
|
|
166
|
+
result = sutra.ask("Who are from New York City?")
|
|
167
|
+
# Fuzzy: 'City' -> 'New York'
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Uses `difflib.get_close_matches` with 60% threshold.
|
|
171
|
+
|
|
172
|
+
### 8. Embeddings for Smart Caching
|
|
173
|
+
|
|
174
|
+
Save 90% on API costs.
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
sutra = SUTRA(api_key="key", use_embeddings=True)
|
|
178
|
+
|
|
179
|
+
result = sutra.ask("Show sales") # API call
|
|
180
|
+
result = sutra.ask("Display sales data") # Cached (92% similar)
|
|
181
|
+
result = sutra.ask("Give me sales info") # Cached (88% similar)
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
**How it works:**
|
|
185
|
+
- Model: `all-MiniLM-L6-v2` (80MB, runs locally)
|
|
186
|
+
- Converts queries to 384D vectors
|
|
187
|
+
- 85% similarity threshold
|
|
188
|
+
- No external API calls
|
|
189
|
+
|
|
190
|
+
**Cost savings:**
|
|
191
|
+
- 10 similar queries: 1 API call vs 10 = 90% savings
|
|
192
|
+
|
|
193
|
+
### 9. Irrelevant Query Detection
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
sutra = SUTRA(api_key="key", check_relevance=True)
|
|
197
|
+
|
|
198
|
+
result = sutra.ask("What's the weather?")
|
|
199
|
+
# Warning: Query may be irrelevant
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### 10. Direct SQL
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
result = sutra.sql("SELECT * FROM people WHERE state='CA'")
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
## Complete Example
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
from sutra import SUTRA
|
|
212
|
+
|
|
213
|
+
# Initialize with all features
|
|
214
|
+
sutra = SUTRA(
|
|
215
|
+
api_key="your-key",
|
|
216
|
+
use_embeddings=True,
|
|
217
|
+
fuzzy_match=True,
|
|
218
|
+
check_relevance=True
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Upload any format
|
|
222
|
+
sutra.upload("employees.pdf") # PDF
|
|
223
|
+
sutra.upload("skills.docx") # Word
|
|
224
|
+
sutra.upload("projects.txt") # Text
|
|
225
|
+
sutra.upload("sales.csv") # CSV
|
|
226
|
+
sutra.upload("budget.xlsx") # Excel
|
|
227
|
+
|
|
228
|
+
# View tables
|
|
229
|
+
sutra.tables()
|
|
230
|
+
|
|
231
|
+
# Query
|
|
232
|
+
result = sutra.ask("Show all people", viz="bar")
|
|
233
|
+
|
|
234
|
+
# Export to MySQL
|
|
235
|
+
sutra.save_to_mysql("localhost", "root", "pass", "my_db")
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## Import to MySQL Workflow
|
|
239
|
+
|
|
240
|
+
**Colab:**
|
|
241
|
+
```python
|
|
242
|
+
sutra.upload("data.pdf")
|
|
243
|
+
sutra.export_db("data.db", "sqlite")
|
|
244
|
+
from google.colab import files
|
|
245
|
+
files.download("data.db")
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
**Windows:**
|
|
249
|
+
```python
|
|
250
|
+
sutra = SUTRA.load_from_db("data.db", api_key="key")
|
|
251
|
+
sutra.save_to_mysql("localhost", "root", "pass", "my_db")
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
## Export Options
|
|
255
|
+
|
|
256
|
+
```python
|
|
257
|
+
sutra.export_db("backup.db", "sqlite")
|
|
258
|
+
sutra.export_db("schema.sql", "sql")
|
|
259
|
+
sutra.export_db("data.json", "json")
|
|
260
|
+
sutra.export_db("data.xlsx", "excel")
|
|
261
|
+
sutra.save_to_mysql("localhost", "root", "pass", "db")
|
|
262
|
+
sutra.save_to_postgres("localhost", "postgres", "pass", "db")
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
## API Reference
|
|
266
|
+
|
|
267
|
+
**Methods**
|
|
268
|
+
- `upload(data, name, auto_export_mysql)` - Upload any format
|
|
269
|
+
- `ask(question, viz, table)` - Natural language query
|
|
270
|
+
- `sql(query, viz)` - Direct SQL
|
|
271
|
+
- `tables()` - List tables
|
|
272
|
+
- `schema()` - Show schema
|
|
273
|
+
- `peek(table, n)` - Preview
|
|
274
|
+
- `save_to_mysql(...)` - Export MySQL (auto-creates DB)
|
|
275
|
+
- `export_db(path, format)` - Export database
|
|
276
|
+
- `load_from_db(path)` - Load SQLite
|
|
277
|
+
- `connect_mysql(...)` - Connect MySQL
|
|
278
|
+
|
|
279
|
+
## Requirements
|
|
280
|
+
|
|
281
|
+
Python 3.8+, OpenAI API key
|
|
282
|
+
|
|
283
|
+
## License
|
|
284
|
+
|
|
285
|
+
MIT
|
|
286
|
+
|
|
287
|
+
---
|
|
288
|
+
|
|
289
|
+
**Made by Aditya Batta**
|