QuerySUTRA 0.3.3__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- querysutra-0.4.1/PKG-INFO +264 -0
- querysutra-0.4.1/QuerySUTRA.egg-info/PKG-INFO +264 -0
- querysutra-0.4.1/README.md +231 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/pyproject.toml +2 -2
- querysutra-0.4.1/setup.py +31 -0
- querysutra-0.4.1/sutra/__init__.py +9 -0
- querysutra-0.4.1/sutra/sutra.py +852 -0
- querysutra-0.3.3/PKG-INFO +0 -285
- querysutra-0.3.3/QuerySUTRA.egg-info/PKG-INFO +0 -285
- querysutra-0.3.3/README.md +0 -249
- querysutra-0.3.3/setup.py +0 -50
- querysutra-0.3.3/sutra/__init__.py +0 -12
- querysutra-0.3.3/sutra/sutra.py +0 -1080
- {querysutra-0.3.3 → querysutra-0.4.1}/LICENSE +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/MANIFEST.in +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/QuerySUTRA.egg-info/SOURCES.txt +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/QuerySUTRA.egg-info/dependency_links.txt +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/QuerySUTRA.egg-info/requires.txt +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/QuerySUTRA.egg-info/top_level.txt +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/examples/quickstart.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/examples/sutra_usage_guide.ipynb +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/examples/usage_guide.ipynb +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/requirements.txt +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/setup.cfg +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/sutra/cache_manager.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/sutra/clear_cache.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/sutra/core.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/sutra/data_loader.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/sutra/database_manager.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/sutra/direct_query.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/sutra/feedback.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/sutra/feedback_matcher.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/sutra/nlp_processor.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/sutra/schema_embeddings.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/sutra/schema_generator.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/sutra/sutra_client.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/sutra/sutra_core.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/sutra/sutra_simple.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/sutra/visualizer.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/tests/__init__.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/tests/test_modules.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/tests/test_sutra.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/utils/__init__.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/utils/file_utils.py +0 -0
- {querysutra-0.3.3 → querysutra-0.4.1}/utils/text_utils.py +0 -0
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: QuerySUTRA
|
|
3
|
+
Version: 0.4.1
|
|
4
|
+
Summary: SUTRA: AI-powered data analysis with automatic MySQL export
|
|
5
|
+
Author: Aditya Batta
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.8
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: pandas>=1.3.0
|
|
11
|
+
Requires-Dist: numpy>=1.21.0
|
|
12
|
+
Requires-Dist: openai>=1.0.0
|
|
13
|
+
Requires-Dist: plotly>=5.0.0
|
|
14
|
+
Requires-Dist: matplotlib>=3.3.0
|
|
15
|
+
Requires-Dist: PyPDF2>=3.0.0
|
|
16
|
+
Requires-Dist: python-docx>=0.8.11
|
|
17
|
+
Requires-Dist: openpyxl>=3.0.0
|
|
18
|
+
Provides-Extra: mysql
|
|
19
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "mysql"
|
|
20
|
+
Requires-Dist: mysql-connector-python>=8.0.0; extra == "mysql"
|
|
21
|
+
Provides-Extra: postgres
|
|
22
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "postgres"
|
|
23
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
|
|
24
|
+
Provides-Extra: embeddings
|
|
25
|
+
Requires-Dist: sentence-transformers>=2.0.0; extra == "embeddings"
|
|
26
|
+
Provides-Extra: all
|
|
27
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "all"
|
|
28
|
+
Requires-Dist: mysql-connector-python>=8.0.0; extra == "all"
|
|
29
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "all"
|
|
30
|
+
Requires-Dist: sentence-transformers>=2.0.0; extra == "all"
|
|
31
|
+
Dynamic: license-file
|
|
32
|
+
Dynamic: requires-python
|
|
33
|
+
|
|
34
|
+
# QuerySUTRA
|
|
35
|
+
|
|
36
|
+
**SUTRA: Structured-Unstructured-Text-Retrieval-Architecture**
|
|
37
|
+
|
|
38
|
+
AI-powered data analysis library. Upload PDFs, query with natural language, export to MySQL automatically.
|
|
39
|
+
|
|
40
|
+
## Installation
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install QuerySUTRA
|
|
44
|
+
pip install QuerySUTRA[mysql] # For MySQL export
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Quick Start
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from sutra import SUTRA
|
|
51
|
+
|
|
52
|
+
# Upload PDF and auto-export to MySQL in ONE step
|
|
53
|
+
sutra = SUTRA(api_key="your-openai-key")
|
|
54
|
+
|
|
55
|
+
sutra.upload("data.pdf", auto_export_mysql={
|
|
56
|
+
'host': 'localhost',
|
|
57
|
+
'user': 'root',
|
|
58
|
+
'password': '123456',
|
|
59
|
+
'database': 'my_database' # Auto-creates if not exists
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
# Query immediately
|
|
63
|
+
result = sutra.ask("Show me all people")
|
|
64
|
+
print(result.data)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Features
|
|
68
|
+
|
|
69
|
+
**1. Automatic MySQL Export**
|
|
70
|
+
|
|
71
|
+
Database auto-created if not exists. No errors.
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
# Upload and export to MySQL automatically
|
|
75
|
+
sutra.upload("data.pdf", auto_export_mysql={
|
|
76
|
+
'host': 'localhost',
|
|
77
|
+
'user': 'root',
|
|
78
|
+
'password': 'your_password',
|
|
79
|
+
'database': 'my_new_database' # Creates automatically
|
|
80
|
+
})
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
**2. Complete Data Extraction**
|
|
84
|
+
|
|
85
|
+
Processes entire PDF in chunks. Extracts ALL employees (not just first 10).
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
sutra.upload("large_document.pdf") # Extracts all 50+ employees
|
|
89
|
+
sutra.tables() # Shows all extracted tables
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
**3. Natural Language Queries**
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
result = sutra.ask("Show all people from California")
|
|
96
|
+
result = sutra.ask("Who has Python skills?", table="skills")
|
|
97
|
+
result = sutra.ask("Count employees by state", viz="pie")
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
**4. Custom Visualizations**
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
result = sutra.ask("Sales by region", viz="pie")
|
|
104
|
+
result = sutra.ask("Trends", viz="line")
|
|
105
|
+
result = sutra.ask("Compare", viz="bar")
|
|
106
|
+
result = sutra.ask("Data", viz="scatter")
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
**5. Load Existing Databases**
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
# Load SQLite
|
|
113
|
+
sutra = SUTRA.load_from_db("data.db", api_key="key")
|
|
114
|
+
|
|
115
|
+
# Connect to MySQL
|
|
116
|
+
sutra = SUTRA.connect_mysql("localhost", "root", "pass", "database")
|
|
117
|
+
|
|
118
|
+
# Connect to PostgreSQL
|
|
119
|
+
sutra = SUTRA.connect_postgres("localhost", "postgres", "pass", "database")
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
**6. Smart Features (Optional)**
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
sutra = SUTRA(
|
|
126
|
+
api_key="your-key",
|
|
127
|
+
use_embeddings=True, # Cache similar queries (saves API calls)
|
|
128
|
+
fuzzy_match=True, # "New York City" matches "New York"
|
|
129
|
+
check_relevance=True, # Detect irrelevant queries
|
|
130
|
+
cache_queries=True # Cache exact queries
|
|
131
|
+
)
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
**7. Direct SQL (Free)**
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
result = sutra.sql("SELECT * FROM people WHERE state='CA'")
|
|
138
|
+
print(result.data)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Complete Workflow
|
|
142
|
+
|
|
143
|
+
**In Colab:**
|
|
144
|
+
```python
|
|
145
|
+
from sutra import SUTRA
|
|
146
|
+
|
|
147
|
+
sutra = SUTRA(api_key="your-key")
|
|
148
|
+
sutra.upload("employee_data.pdf")
|
|
149
|
+
sutra.tables() # See extracted tables
|
|
150
|
+
|
|
151
|
+
# Export and download
|
|
152
|
+
sutra.export_db("data.db", format="sqlite")
|
|
153
|
+
from google.colab import files
|
|
154
|
+
files.download("data.db")
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
**On Windows:**
|
|
158
|
+
```python
|
|
159
|
+
from sutra import SUTRA
|
|
160
|
+
|
|
161
|
+
# Load downloaded database
|
|
162
|
+
sutra = SUTRA.load_from_db("data.db", api_key="your-key")
|
|
163
|
+
|
|
164
|
+
# Export to MySQL (auto-creates database)
|
|
165
|
+
sutra.save_to_mysql("localhost", "root", "password", "my_database")
|
|
166
|
+
|
|
167
|
+
# Verify in MySQL
|
|
168
|
+
sutra_mysql = SUTRA.connect_mysql("localhost", "root", "password", "my_database")
|
|
169
|
+
sutra_mysql.tables()
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## Export Options
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
# SQLite
|
|
176
|
+
sutra.export_db("backup.db", format="sqlite")
|
|
177
|
+
|
|
178
|
+
# SQL dump
|
|
179
|
+
sutra.export_db("schema.sql", format="sql")
|
|
180
|
+
|
|
181
|
+
# JSON
|
|
182
|
+
sutra.export_db("data.json", format="json")
|
|
183
|
+
|
|
184
|
+
# Excel
|
|
185
|
+
sutra.export_db("data.xlsx", format="excel")
|
|
186
|
+
|
|
187
|
+
# MySQL (auto-creates database)
|
|
188
|
+
sutra.save_to_mysql("localhost", "root", "pass", "new_db")
|
|
189
|
+
|
|
190
|
+
# PostgreSQL
|
|
191
|
+
sutra.save_to_postgres("localhost", "postgres", "pass", "new_db")
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## API Reference
|
|
195
|
+
|
|
196
|
+
**Initialize**
|
|
197
|
+
```python
|
|
198
|
+
SUTRA(api_key, db, use_embeddings, check_relevance, fuzzy_match, cache_queries)
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
**Class Methods**
|
|
202
|
+
- `load_from_db(path, api_key)` - Load SQLite
|
|
203
|
+
- `connect_mysql(host, user, password, database)` - Connect MySQL
|
|
204
|
+
- `connect_postgres(host, user, password, database)` - Connect PostgreSQL
|
|
205
|
+
|
|
206
|
+
**Instance Methods**
|
|
207
|
+
- `upload(data, name, auto_export_mysql)` - Upload with optional auto-export
|
|
208
|
+
- `ask(question, viz, table)` - Natural language query
|
|
209
|
+
- `sql(query, viz)` - Direct SQL
|
|
210
|
+
- `tables()` - List tables
|
|
211
|
+
- `schema(table)` - Show schema
|
|
212
|
+
- `peek(table, n)` - Preview data
|
|
213
|
+
- `export_db(path, format)` - Export database
|
|
214
|
+
- `save_to_mysql(host, user, password, database)` - Export to MySQL (auto-creates DB)
|
|
215
|
+
- `save_to_postgres(...)` - Export to PostgreSQL
|
|
216
|
+
- `backup(path)` - Backup
|
|
217
|
+
- `close()` - Close
|
|
218
|
+
|
|
219
|
+
## Troubleshooting
|
|
220
|
+
|
|
221
|
+
**MySQL database doesn't exist**
|
|
222
|
+
- Fixed in v0.4.0 - auto-creates database automatically
|
|
223
|
+
- No need to manually create database
|
|
224
|
+
|
|
225
|
+
**Only 10 employees extracted from 50-employee PDF**
|
|
226
|
+
- Fixed in v0.4.0 - processes entire PDF in chunks
|
|
227
|
+
- Upgrade: `pip install --upgrade QuerySUTRA`
|
|
228
|
+
|
|
229
|
+
**connect_mysql() not found**
|
|
230
|
+
- Update: `pip install --upgrade QuerySUTRA`
|
|
231
|
+
- Install MySQL support: `pip install QuerySUTRA[mysql]`
|
|
232
|
+
|
|
233
|
+
## Supported Formats
|
|
234
|
+
|
|
235
|
+
CSV, Excel, JSON, SQL, PDF, Word, Text, Pandas DataFrame
|
|
236
|
+
|
|
237
|
+
## Requirements
|
|
238
|
+
|
|
239
|
+
- Python 3.8+
|
|
240
|
+
- OpenAI API key
|
|
241
|
+
- MySQL/PostgreSQL (optional)
|
|
242
|
+
|
|
243
|
+
## License
|
|
244
|
+
|
|
245
|
+
MIT License
|
|
246
|
+
|
|
247
|
+
## Changelog
|
|
248
|
+
|
|
249
|
+
**v0.4.0**
|
|
250
|
+
- AUTO-CREATES MySQL database (no more errors)
|
|
251
|
+
- Complete PDF extraction (all pages, all employees)
|
|
252
|
+
- Chunk processing for large documents
|
|
253
|
+
- One-line auto-export to MySQL
|
|
254
|
+
- Simplified everything
|
|
255
|
+
|
|
256
|
+
**v0.3.x**
|
|
257
|
+
- MySQL/PostgreSQL connectivity
|
|
258
|
+
- Embeddings caching
|
|
259
|
+
- Fuzzy matching
|
|
260
|
+
- Custom visualizations
|
|
261
|
+
|
|
262
|
+
---
|
|
263
|
+
|
|
264
|
+
**Made by Aditya Batta**
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: QuerySUTRA
|
|
3
|
+
Version: 0.4.1
|
|
4
|
+
Summary: SUTRA: AI-powered data analysis with automatic MySQL export
|
|
5
|
+
Author: Aditya Batta
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.8
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: pandas>=1.3.0
|
|
11
|
+
Requires-Dist: numpy>=1.21.0
|
|
12
|
+
Requires-Dist: openai>=1.0.0
|
|
13
|
+
Requires-Dist: plotly>=5.0.0
|
|
14
|
+
Requires-Dist: matplotlib>=3.3.0
|
|
15
|
+
Requires-Dist: PyPDF2>=3.0.0
|
|
16
|
+
Requires-Dist: python-docx>=0.8.11
|
|
17
|
+
Requires-Dist: openpyxl>=3.0.0
|
|
18
|
+
Provides-Extra: mysql
|
|
19
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "mysql"
|
|
20
|
+
Requires-Dist: mysql-connector-python>=8.0.0; extra == "mysql"
|
|
21
|
+
Provides-Extra: postgres
|
|
22
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "postgres"
|
|
23
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
|
|
24
|
+
Provides-Extra: embeddings
|
|
25
|
+
Requires-Dist: sentence-transformers>=2.0.0; extra == "embeddings"
|
|
26
|
+
Provides-Extra: all
|
|
27
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "all"
|
|
28
|
+
Requires-Dist: mysql-connector-python>=8.0.0; extra == "all"
|
|
29
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "all"
|
|
30
|
+
Requires-Dist: sentence-transformers>=2.0.0; extra == "all"
|
|
31
|
+
Dynamic: license-file
|
|
32
|
+
Dynamic: requires-python
|
|
33
|
+
|
|
34
|
+
# QuerySUTRA
|
|
35
|
+
|
|
36
|
+
**SUTRA: Structured-Unstructured-Text-Retrieval-Architecture**
|
|
37
|
+
|
|
38
|
+
AI-powered data analysis library. Upload PDFs, query with natural language, export to MySQL automatically.
|
|
39
|
+
|
|
40
|
+
## Installation
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install QuerySUTRA
|
|
44
|
+
pip install QuerySUTRA[mysql] # For MySQL export
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Quick Start
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
from sutra import SUTRA
|
|
51
|
+
|
|
52
|
+
# Upload PDF and auto-export to MySQL in ONE step
|
|
53
|
+
sutra = SUTRA(api_key="your-openai-key")
|
|
54
|
+
|
|
55
|
+
sutra.upload("data.pdf", auto_export_mysql={
|
|
56
|
+
'host': 'localhost',
|
|
57
|
+
'user': 'root',
|
|
58
|
+
'password': '123456',
|
|
59
|
+
'database': 'my_database' # Auto-creates if not exists
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
# Query immediately
|
|
63
|
+
result = sutra.ask("Show me all people")
|
|
64
|
+
print(result.data)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Features
|
|
68
|
+
|
|
69
|
+
**1. Automatic MySQL Export**
|
|
70
|
+
|
|
71
|
+
Database auto-created if not exists. No errors.
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
# Upload and export to MySQL automatically
|
|
75
|
+
sutra.upload("data.pdf", auto_export_mysql={
|
|
76
|
+
'host': 'localhost',
|
|
77
|
+
'user': 'root',
|
|
78
|
+
'password': 'your_password',
|
|
79
|
+
'database': 'my_new_database' # Creates automatically
|
|
80
|
+
})
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
**2. Complete Data Extraction**
|
|
84
|
+
|
|
85
|
+
Processes entire PDF in chunks. Extracts ALL employees (not just first 10).
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
sutra.upload("large_document.pdf") # Extracts all 50+ employees
|
|
89
|
+
sutra.tables() # Shows all extracted tables
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
**3. Natural Language Queries**
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
result = sutra.ask("Show all people from California")
|
|
96
|
+
result = sutra.ask("Who has Python skills?", table="skills")
|
|
97
|
+
result = sutra.ask("Count employees by state", viz="pie")
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
**4. Custom Visualizations**
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
result = sutra.ask("Sales by region", viz="pie")
|
|
104
|
+
result = sutra.ask("Trends", viz="line")
|
|
105
|
+
result = sutra.ask("Compare", viz="bar")
|
|
106
|
+
result = sutra.ask("Data", viz="scatter")
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
**5. Load Existing Databases**
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
# Load SQLite
|
|
113
|
+
sutra = SUTRA.load_from_db("data.db", api_key="key")
|
|
114
|
+
|
|
115
|
+
# Connect to MySQL
|
|
116
|
+
sutra = SUTRA.connect_mysql("localhost", "root", "pass", "database")
|
|
117
|
+
|
|
118
|
+
# Connect to PostgreSQL
|
|
119
|
+
sutra = SUTRA.connect_postgres("localhost", "postgres", "pass", "database")
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
**6. Smart Features (Optional)**
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
sutra = SUTRA(
|
|
126
|
+
api_key="your-key",
|
|
127
|
+
use_embeddings=True, # Cache similar queries (saves API calls)
|
|
128
|
+
fuzzy_match=True, # "New York City" matches "New York"
|
|
129
|
+
check_relevance=True, # Detect irrelevant queries
|
|
130
|
+
cache_queries=True # Cache exact queries
|
|
131
|
+
)
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
**7. Direct SQL (Free)**
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
result = sutra.sql("SELECT * FROM people WHERE state='CA'")
|
|
138
|
+
print(result.data)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Complete Workflow
|
|
142
|
+
|
|
143
|
+
**In Colab:**
|
|
144
|
+
```python
|
|
145
|
+
from sutra import SUTRA
|
|
146
|
+
|
|
147
|
+
sutra = SUTRA(api_key="your-key")
|
|
148
|
+
sutra.upload("employee_data.pdf")
|
|
149
|
+
sutra.tables() # See extracted tables
|
|
150
|
+
|
|
151
|
+
# Export and download
|
|
152
|
+
sutra.export_db("data.db", format="sqlite")
|
|
153
|
+
from google.colab import files
|
|
154
|
+
files.download("data.db")
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
**On Windows:**
|
|
158
|
+
```python
|
|
159
|
+
from sutra import SUTRA
|
|
160
|
+
|
|
161
|
+
# Load downloaded database
|
|
162
|
+
sutra = SUTRA.load_from_db("data.db", api_key="your-key")
|
|
163
|
+
|
|
164
|
+
# Export to MySQL (auto-creates database)
|
|
165
|
+
sutra.save_to_mysql("localhost", "root", "password", "my_database")
|
|
166
|
+
|
|
167
|
+
# Verify in MySQL
|
|
168
|
+
sutra_mysql = SUTRA.connect_mysql("localhost", "root", "password", "my_database")
|
|
169
|
+
sutra_mysql.tables()
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## Export Options
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
# SQLite
|
|
176
|
+
sutra.export_db("backup.db", format="sqlite")
|
|
177
|
+
|
|
178
|
+
# SQL dump
|
|
179
|
+
sutra.export_db("schema.sql", format="sql")
|
|
180
|
+
|
|
181
|
+
# JSON
|
|
182
|
+
sutra.export_db("data.json", format="json")
|
|
183
|
+
|
|
184
|
+
# Excel
|
|
185
|
+
sutra.export_db("data.xlsx", format="excel")
|
|
186
|
+
|
|
187
|
+
# MySQL (auto-creates database)
|
|
188
|
+
sutra.save_to_mysql("localhost", "root", "pass", "new_db")
|
|
189
|
+
|
|
190
|
+
# PostgreSQL
|
|
191
|
+
sutra.save_to_postgres("localhost", "postgres", "pass", "new_db")
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## API Reference
|
|
195
|
+
|
|
196
|
+
**Initialize**
|
|
197
|
+
```python
|
|
198
|
+
SUTRA(api_key, db, use_embeddings, check_relevance, fuzzy_match, cache_queries)
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
**Class Methods**
|
|
202
|
+
- `load_from_db(path, api_key)` - Load SQLite
|
|
203
|
+
- `connect_mysql(host, user, password, database)` - Connect MySQL
|
|
204
|
+
- `connect_postgres(host, user, password, database)` - Connect PostgreSQL
|
|
205
|
+
|
|
206
|
+
**Instance Methods**
|
|
207
|
+
- `upload(data, name, auto_export_mysql)` - Upload with optional auto-export
|
|
208
|
+
- `ask(question, viz, table)` - Natural language query
|
|
209
|
+
- `sql(query, viz)` - Direct SQL
|
|
210
|
+
- `tables()` - List tables
|
|
211
|
+
- `schema(table)` - Show schema
|
|
212
|
+
- `peek(table, n)` - Preview data
|
|
213
|
+
- `export_db(path, format)` - Export database
|
|
214
|
+
- `save_to_mysql(host, user, password, database)` - Export to MySQL (auto-creates DB)
|
|
215
|
+
- `save_to_postgres(...)` - Export to PostgreSQL
|
|
216
|
+
- `backup(path)` - Backup
|
|
217
|
+
- `close()` - Close
|
|
218
|
+
|
|
219
|
+
## Troubleshooting
|
|
220
|
+
|
|
221
|
+
**MySQL database doesn't exist**
|
|
222
|
+
- Fixed in v0.4.0 - auto-creates database automatically
|
|
223
|
+
- No need to manually create database
|
|
224
|
+
|
|
225
|
+
**Only 10 employees extracted from 50-employee PDF**
|
|
226
|
+
- Fixed in v0.4.0 - processes entire PDF in chunks
|
|
227
|
+
- Upgrade: `pip install --upgrade QuerySUTRA`
|
|
228
|
+
|
|
229
|
+
**connect_mysql() not found**
|
|
230
|
+
- Update: `pip install --upgrade QuerySUTRA`
|
|
231
|
+
- Install MySQL support: `pip install QuerySUTRA[mysql]`
|
|
232
|
+
|
|
233
|
+
## Supported Formats
|
|
234
|
+
|
|
235
|
+
CSV, Excel, JSON, SQL, PDF, Word, Text, Pandas DataFrame
|
|
236
|
+
|
|
237
|
+
## Requirements
|
|
238
|
+
|
|
239
|
+
- Python 3.8+
|
|
240
|
+
- OpenAI API key
|
|
241
|
+
- MySQL/PostgreSQL (optional)
|
|
242
|
+
|
|
243
|
+
## License
|
|
244
|
+
|
|
245
|
+
MIT License
|
|
246
|
+
|
|
247
|
+
## Changelog
|
|
248
|
+
|
|
249
|
+
**v0.4.0**
|
|
250
|
+
- AUTO-CREATES MySQL database (no more errors)
|
|
251
|
+
- Complete PDF extraction (all pages, all employees)
|
|
252
|
+
- Chunk processing for large documents
|
|
253
|
+
- One-line auto-export to MySQL
|
|
254
|
+
- Simplified everything
|
|
255
|
+
|
|
256
|
+
**v0.3.x**
|
|
257
|
+
- MySQL/PostgreSQL connectivity
|
|
258
|
+
- Embeddings caching
|
|
259
|
+
- Fuzzy matching
|
|
260
|
+
- Custom visualizations
|
|
261
|
+
|
|
262
|
+
---
|
|
263
|
+
|
|
264
|
+
**Made by Aditya Batta**
|