QuerySUTRA 0.3.1__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- querysutra-0.3.2/PKG-INFO +290 -0
- querysutra-0.3.2/QuerySUTRA.egg-info/PKG-INFO +290 -0
- querysutra-0.3.2/README.md +249 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/pyproject.toml +1 -1
- {querysutra-0.3.1 → querysutra-0.3.2}/setup.py +1 -1
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/__init__.py +2 -2
- querysutra-0.3.1/PKG-INFO +0 -429
- querysutra-0.3.1/QuerySUTRA.egg-info/PKG-INFO +0 -429
- querysutra-0.3.1/README.md +0 -388
- {querysutra-0.3.1 → querysutra-0.3.2}/.gitignore +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/LICENSE +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/MANIFEST.in +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/QuerySUTRA.egg-info/SOURCES.txt +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/QuerySUTRA.egg-info/dependency_links.txt +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/QuerySUTRA.egg-info/requires.txt +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/QuerySUTRA.egg-info/top_level.txt +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/config.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/examples/quickstart.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/examples/sutra_usage_guide.ipynb +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/examples/usage_guide.ipynb +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/main.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/requirements.txt +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/setup.cfg +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/cache_manager.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/clear_cache.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/core.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/data_loader.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/database_manager.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/direct_query.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/feedback.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/feedback_matcher.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/nlp_processor.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/schema_embeddings.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/schema_generator.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/sutra.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/sutra_client.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/sutra_core.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/sutra_simple.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/sutra/visualizer.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/test_openapi.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/tests/__init__.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/tests/test_modules.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/tests/test_sutra.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/utils/__init__.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/utils/file_utils.py +0 -0
- {querysutra-0.3.1 → querysutra-0.3.2}/utils/text_utils.py +0 -0
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: QuerySUTRA
|
|
3
|
+
Version: 0.3.2
|
|
4
|
+
Summary: SUTRA: Structured-Unstructured-Text-Retrieval-Architecture - AI-powered data analysis with custom visualizations, fuzzy matching, and smart caching
|
|
5
|
+
Home-page: https://github.com/yourusername/querysutra
|
|
6
|
+
Author: Aditya Batta
|
|
7
|
+
Author-email:
|
|
8
|
+
License: MIT
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Topic :: Database
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Requires-Python: >=3.8
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: pandas>=1.3.0
|
|
18
|
+
Requires-Dist: numpy>=1.21.0
|
|
19
|
+
Requires-Dist: openai>=1.0.0
|
|
20
|
+
Requires-Dist: plotly>=5.0.0
|
|
21
|
+
Requires-Dist: matplotlib>=3.3.0
|
|
22
|
+
Requires-Dist: PyPDF2>=3.0.0
|
|
23
|
+
Requires-Dist: python-docx>=0.8.11
|
|
24
|
+
Requires-Dist: openpyxl>=3.0.0
|
|
25
|
+
Provides-Extra: mysql
|
|
26
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "mysql"
|
|
27
|
+
Requires-Dist: mysql-connector-python>=8.0.0; extra == "mysql"
|
|
28
|
+
Provides-Extra: postgres
|
|
29
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "postgres"
|
|
30
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
|
|
31
|
+
Provides-Extra: embeddings
|
|
32
|
+
Requires-Dist: sentence-transformers>=2.0.0; extra == "embeddings"
|
|
33
|
+
Provides-Extra: all
|
|
34
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "all"
|
|
35
|
+
Requires-Dist: mysql-connector-python>=8.0.0; extra == "all"
|
|
36
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "all"
|
|
37
|
+
Requires-Dist: sentence-transformers>=2.0.0; extra == "all"
|
|
38
|
+
Dynamic: home-page
|
|
39
|
+
Dynamic: license-file
|
|
40
|
+
Dynamic: requires-python
|
|
41
|
+
|
|
42
|
+
# QuerySUTRA
|
|
43
|
+
|
|
44
|
+
**SUTRA: Structured-Unstructured-Text-Retrieval-Architecture**
|
|
45
|
+
|
|
46
|
+
Professional Python library for AI-powered data analysis with automatic entity extraction, natural language querying, and intelligent caching.
|
|
47
|
+
|
|
48
|
+
## Installation
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install QuerySUTRA
|
|
52
|
+
|
|
53
|
+
# Optional features
|
|
54
|
+
pip install QuerySUTRA[embeddings] # Smart caching
|
|
55
|
+
pip install QuerySUTRA[mysql] # MySQL support
|
|
56
|
+
pip install QuerySUTRA[postgres] # PostgreSQL support
|
|
57
|
+
pip install QuerySUTRA[all] # All features
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Key Features
|
|
61
|
+
|
|
62
|
+
### 1. Automatic Multi-Table Creation
|
|
63
|
+
Upload PDFs, Word documents, or text files and automatically extract structured entities.
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from sutra import SUTRA
|
|
67
|
+
|
|
68
|
+
sutra = SUTRA(api_key="your-openai-key")
|
|
69
|
+
sutra.upload("employee_data.pdf")
|
|
70
|
+
|
|
71
|
+
# Automatically creates:
|
|
72
|
+
# - employee_data_people (20 rows, 6 columns)
|
|
73
|
+
# - employee_data_contacts (20 rows, 4 columns)
|
|
74
|
+
# - employee_data_events (15 rows, 4 columns)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 2. Natural Language Querying
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
result = sutra.ask("Show me all people from New York")
|
|
81
|
+
print(result.data)
|
|
82
|
+
|
|
83
|
+
# With visualization
|
|
84
|
+
result = sutra.ask("Show sales by region", viz="pie")
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### 3. Load Existing Databases
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
# Load SQLite database
|
|
91
|
+
sutra = SUTRA.load_from_db("sutra.db", api_key="your-key")
|
|
92
|
+
|
|
93
|
+
# Connect to MySQL
|
|
94
|
+
sutra = SUTRA.connect_mysql("localhost", "root", "password", "database")
|
|
95
|
+
|
|
96
|
+
# Connect to PostgreSQL
|
|
97
|
+
sutra = SUTRA.connect_postgres("localhost", "postgres", "password", "database")
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### 4. Custom Visualizations
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
result = sutra.ask("Sales by region", viz="pie") # Pie chart
|
|
104
|
+
result = sutra.ask("Trends", viz="line") # Line chart
|
|
105
|
+
result = sutra.ask("Compare", viz="bar") # Bar chart
|
|
106
|
+
result = sutra.ask("Correlation", viz="scatter") # Scatter plot
|
|
107
|
+
result = sutra.ask("Data", viz="table") # Table view
|
|
108
|
+
result = sutra.ask("Analysis", viz="heatmap") # Heatmap
|
|
109
|
+
result = sutra.ask("Auto", viz=True) # Auto-detect
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### 5. Smart Fuzzy Matching
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
sutra = SUTRA(api_key="your-key", fuzzy_match=True)
|
|
116
|
+
|
|
117
|
+
# "New York City" matches "New York" automatically
|
|
118
|
+
result = sutra.ask("Who are from New York City?")
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### 6. Intelligent Caching with Embeddings
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
sutra = SUTRA(api_key="your-key", use_embeddings=True)
|
|
125
|
+
|
|
126
|
+
result = sutra.ask("Show sales") # Calls API
|
|
127
|
+
result = sutra.ask("Display sales data") # Uses cache (no API call)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### 7. Irrelevant Query Detection
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
sutra = SUTRA(api_key="your-key", check_relevance=True)
|
|
134
|
+
|
|
135
|
+
result = sutra.ask("What is the weather?")
|
|
136
|
+
# Warns: "This question seems irrelevant to your database"
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### 8. Direct SQL Access (Free)
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
result = sutra.sql("SELECT * FROM people WHERE city='New York'")
|
|
143
|
+
print(result.data)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Complete Configuration
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
sutra = SUTRA(
|
|
150
|
+
api_key="your-openai-key",
|
|
151
|
+
db="database.db", # SQLite path
|
|
152
|
+
use_embeddings=True, # Smart caching (saves API calls)
|
|
153
|
+
check_relevance=True, # Detect irrelevant queries
|
|
154
|
+
fuzzy_match=True, # Better NLP
|
|
155
|
+
cache_queries=True # Simple caching
|
|
156
|
+
)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Supported Formats
|
|
160
|
+
|
|
161
|
+
CSV, Excel, JSON, SQL, PDF, Word, Text, Pandas DataFrame
|
|
162
|
+
|
|
163
|
+
## Usage Examples
|
|
164
|
+
|
|
165
|
+
### Basic Workflow
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
sutra = SUTRA(api_key="your-key")
|
|
169
|
+
sutra.upload("data.pdf")
|
|
170
|
+
sutra.tables() # View tables
|
|
171
|
+
sutra.schema() # View schema
|
|
172
|
+
sutra.peek("table_name", n=10) # Preview data
|
|
173
|
+
result = sutra.ask("Your question?")
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Database Export
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
sutra.export_db("backup.db", format="sqlite")
|
|
180
|
+
sutra.export_db("schema.sql", format="sql")
|
|
181
|
+
sutra.save_to_mysql("localhost", "root", "pass", "db")
|
|
182
|
+
sutra.save_to_postgres("localhost", "postgres", "pass", "db")
|
|
183
|
+
sutra.backup("./backups")
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## How It Works
|
|
187
|
+
|
|
188
|
+
### Entity Extraction Example
|
|
189
|
+
|
|
190
|
+
**Input PDF:**
|
|
191
|
+
```
|
|
192
|
+
John Doe lives at 123 Main St, Dallas. Email: john@company.com.
|
|
193
|
+
Sarah Smith lives at 456 Oak Ave, Boston. Email: sarah@company.com.
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
**Output Tables:**
|
|
197
|
+
|
|
198
|
+
**people**
|
|
199
|
+
| id | name | address | city | email |
|
|
200
|
+
|----|------|---------|------|-------|
|
|
201
|
+
| 1 | John Doe | 123 Main St | Dallas | john@company.com |
|
|
202
|
+
| 2 | Sarah Smith | 456 Oak Ave | Boston | sarah@company.com |
|
|
203
|
+
|
|
204
|
+
### Embeddings for Smart Caching
|
|
205
|
+
|
|
206
|
+
Uses `all-MiniLM-L6-v2` model (80MB, runs locally):
|
|
207
|
+
- Query 1: "Show sales" → API call
|
|
208
|
+
- Query 2: "Display sales" → 92% similar → Cached (no API call)
|
|
209
|
+
|
|
210
|
+
### Fuzzy Matching
|
|
211
|
+
|
|
212
|
+
- Query: "New York City"
|
|
213
|
+
- Database: ["New York", "Dallas", "Boston"]
|
|
214
|
+
- Match: "New York City" → "New York" (85% similar)
|
|
215
|
+
|
|
216
|
+
## API Reference
|
|
217
|
+
|
|
218
|
+
### Class Methods
|
|
219
|
+
|
|
220
|
+
`SUTRA.load_from_db(db_path, api_key, **kwargs)` - Load existing SQLite database
|
|
221
|
+
|
|
222
|
+
`SUTRA.connect_mysql(host, user, password, database, ...)` - Connect to MySQL
|
|
223
|
+
|
|
224
|
+
`SUTRA.connect_postgres(host, user, password, database, ...)` - Connect to PostgreSQL
|
|
225
|
+
|
|
226
|
+
### Instance Methods
|
|
227
|
+
|
|
228
|
+
`upload(data, name=None)` - Upload data
|
|
229
|
+
|
|
230
|
+
`ask(question, viz=False, table=None)` - Natural language query
|
|
231
|
+
|
|
232
|
+
`sql(query, viz=False)` - Raw SQL query
|
|
233
|
+
|
|
234
|
+
`tables()` - List all tables
|
|
235
|
+
|
|
236
|
+
`schema(table=None)` - Show schema
|
|
237
|
+
|
|
238
|
+
`peek(table=None, n=5)` - Preview data
|
|
239
|
+
|
|
240
|
+
`export_db(path, format)` - Export database
|
|
241
|
+
|
|
242
|
+
`save_to_mysql(...)` - Export to MySQL
|
|
243
|
+
|
|
244
|
+
`save_to_postgres(...)` - Export to PostgreSQL
|
|
245
|
+
|
|
246
|
+
`backup(path=None)` - Create backup
|
|
247
|
+
|
|
248
|
+
`close()` - Close connection
|
|
249
|
+
|
|
250
|
+
## Performance Tips
|
|
251
|
+
|
|
252
|
+
1. Use `load_from_db()` to avoid re-uploading
|
|
253
|
+
2. Use `sql()` for complex queries (no API cost)
|
|
254
|
+
3. Enable `use_embeddings=True` for caching
|
|
255
|
+
4. Enable `cache_queries=True` for exact matches
|
|
256
|
+
|
|
257
|
+
## Troubleshooting
|
|
258
|
+
|
|
259
|
+
**No API key error:** `sutra = SUTRA(api_key="sk-...")`
|
|
260
|
+
|
|
261
|
+
**PDF fails:** `pip install PyPDF2`
|
|
262
|
+
|
|
263
|
+
**MySQL error:** `pip install QuerySUTRA[mysql]`
|
|
264
|
+
|
|
265
|
+
**Embeddings error:** `pip install QuerySUTRA[embeddings]`
|
|
266
|
+
|
|
267
|
+
## Requirements
|
|
268
|
+
|
|
269
|
+
- Python 3.8+
|
|
270
|
+
- OpenAI API key
|
|
271
|
+
- 100MB disk space (if using embeddings)
|
|
272
|
+
|
|
273
|
+
## License
|
|
274
|
+
|
|
275
|
+
MIT License
|
|
276
|
+
|
|
277
|
+
## Changelog
|
|
278
|
+
|
|
279
|
+
### v0.3.1
|
|
280
|
+
- Semantic embeddings for smart caching
|
|
281
|
+
- Fuzzy matching for better NLP
|
|
282
|
+
- Irrelevant query detection
|
|
283
|
+
- Load existing databases
|
|
284
|
+
- MySQL/PostgreSQL connectivity
|
|
285
|
+
- Custom visualizations
|
|
286
|
+
- All features optional
|
|
287
|
+
|
|
288
|
+
---
|
|
289
|
+
|
|
290
|
+
**Made by Aditya Batta**
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: QuerySUTRA
|
|
3
|
+
Version: 0.3.2
|
|
4
|
+
Summary: SUTRA: Structured-Unstructured-Text-Retrieval-Architecture - AI-powered data analysis with custom visualizations, fuzzy matching, and smart caching
|
|
5
|
+
Home-page: https://github.com/yourusername/querysutra
|
|
6
|
+
Author: Aditya Batta
|
|
7
|
+
Author-email:
|
|
8
|
+
License: MIT
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Topic :: Database
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Requires-Python: >=3.8
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: pandas>=1.3.0
|
|
18
|
+
Requires-Dist: numpy>=1.21.0
|
|
19
|
+
Requires-Dist: openai>=1.0.0
|
|
20
|
+
Requires-Dist: plotly>=5.0.0
|
|
21
|
+
Requires-Dist: matplotlib>=3.3.0
|
|
22
|
+
Requires-Dist: PyPDF2>=3.0.0
|
|
23
|
+
Requires-Dist: python-docx>=0.8.11
|
|
24
|
+
Requires-Dist: openpyxl>=3.0.0
|
|
25
|
+
Provides-Extra: mysql
|
|
26
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "mysql"
|
|
27
|
+
Requires-Dist: mysql-connector-python>=8.0.0; extra == "mysql"
|
|
28
|
+
Provides-Extra: postgres
|
|
29
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "postgres"
|
|
30
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
|
|
31
|
+
Provides-Extra: embeddings
|
|
32
|
+
Requires-Dist: sentence-transformers>=2.0.0; extra == "embeddings"
|
|
33
|
+
Provides-Extra: all
|
|
34
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "all"
|
|
35
|
+
Requires-Dist: mysql-connector-python>=8.0.0; extra == "all"
|
|
36
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "all"
|
|
37
|
+
Requires-Dist: sentence-transformers>=2.0.0; extra == "all"
|
|
38
|
+
Dynamic: home-page
|
|
39
|
+
Dynamic: license-file
|
|
40
|
+
Dynamic: requires-python
|
|
41
|
+
|
|
42
|
+
# QuerySUTRA
|
|
43
|
+
|
|
44
|
+
**SUTRA: Structured-Unstructured-Text-Retrieval-Architecture**
|
|
45
|
+
|
|
46
|
+
Professional Python library for AI-powered data analysis with automatic entity extraction, natural language querying, and intelligent caching.
|
|
47
|
+
|
|
48
|
+
## Installation
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install QuerySUTRA
|
|
52
|
+
|
|
53
|
+
# Optional features
|
|
54
|
+
pip install QuerySUTRA[embeddings] # Smart caching
|
|
55
|
+
pip install QuerySUTRA[mysql] # MySQL support
|
|
56
|
+
pip install QuerySUTRA[postgres] # PostgreSQL support
|
|
57
|
+
pip install QuerySUTRA[all] # All features
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Key Features
|
|
61
|
+
|
|
62
|
+
### 1. Automatic Multi-Table Creation
|
|
63
|
+
Upload PDFs, Word documents, or text files and automatically extract structured entities.
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from sutra import SUTRA
|
|
67
|
+
|
|
68
|
+
sutra = SUTRA(api_key="your-openai-key")
|
|
69
|
+
sutra.upload("employee_data.pdf")
|
|
70
|
+
|
|
71
|
+
# Automatically creates:
|
|
72
|
+
# - employee_data_people (20 rows, 6 columns)
|
|
73
|
+
# - employee_data_contacts (20 rows, 4 columns)
|
|
74
|
+
# - employee_data_events (15 rows, 4 columns)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 2. Natural Language Querying
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
result = sutra.ask("Show me all people from New York")
|
|
81
|
+
print(result.data)
|
|
82
|
+
|
|
83
|
+
# With visualization
|
|
84
|
+
result = sutra.ask("Show sales by region", viz="pie")
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### 3. Load Existing Databases
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
# Load SQLite database
|
|
91
|
+
sutra = SUTRA.load_from_db("sutra.db", api_key="your-key")
|
|
92
|
+
|
|
93
|
+
# Connect to MySQL
|
|
94
|
+
sutra = SUTRA.connect_mysql("localhost", "root", "password", "database")
|
|
95
|
+
|
|
96
|
+
# Connect to PostgreSQL
|
|
97
|
+
sutra = SUTRA.connect_postgres("localhost", "postgres", "password", "database")
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### 4. Custom Visualizations
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
result = sutra.ask("Sales by region", viz="pie") # Pie chart
|
|
104
|
+
result = sutra.ask("Trends", viz="line") # Line chart
|
|
105
|
+
result = sutra.ask("Compare", viz="bar") # Bar chart
|
|
106
|
+
result = sutra.ask("Correlation", viz="scatter") # Scatter plot
|
|
107
|
+
result = sutra.ask("Data", viz="table") # Table view
|
|
108
|
+
result = sutra.ask("Analysis", viz="heatmap") # Heatmap
|
|
109
|
+
result = sutra.ask("Auto", viz=True) # Auto-detect
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### 5. Smart Fuzzy Matching
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
sutra = SUTRA(api_key="your-key", fuzzy_match=True)
|
|
116
|
+
|
|
117
|
+
# "New York City" matches "New York" automatically
|
|
118
|
+
result = sutra.ask("Who are from New York City?")
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### 6. Intelligent Caching with Embeddings
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
sutra = SUTRA(api_key="your-key", use_embeddings=True)
|
|
125
|
+
|
|
126
|
+
result = sutra.ask("Show sales") # Calls API
|
|
127
|
+
result = sutra.ask("Display sales data") # Uses cache (no API call)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### 7. Irrelevant Query Detection
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
sutra = SUTRA(api_key="your-key", check_relevance=True)
|
|
134
|
+
|
|
135
|
+
result = sutra.ask("What is the weather?")
|
|
136
|
+
# Warns: "This question seems irrelevant to your database"
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### 8. Direct SQL Access (Free)
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
result = sutra.sql("SELECT * FROM people WHERE city='New York'")
|
|
143
|
+
print(result.data)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Complete Configuration
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
sutra = SUTRA(
|
|
150
|
+
api_key="your-openai-key",
|
|
151
|
+
db="database.db", # SQLite path
|
|
152
|
+
use_embeddings=True, # Smart caching (saves API calls)
|
|
153
|
+
check_relevance=True, # Detect irrelevant queries
|
|
154
|
+
fuzzy_match=True, # Better NLP
|
|
155
|
+
cache_queries=True # Simple caching
|
|
156
|
+
)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Supported Formats
|
|
160
|
+
|
|
161
|
+
CSV, Excel, JSON, SQL, PDF, Word, Text, Pandas DataFrame
|
|
162
|
+
|
|
163
|
+
## Usage Examples
|
|
164
|
+
|
|
165
|
+
### Basic Workflow
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
sutra = SUTRA(api_key="your-key")
|
|
169
|
+
sutra.upload("data.pdf")
|
|
170
|
+
sutra.tables() # View tables
|
|
171
|
+
sutra.schema() # View schema
|
|
172
|
+
sutra.peek("table_name", n=10) # Preview data
|
|
173
|
+
result = sutra.ask("Your question?")
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Database Export
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
sutra.export_db("backup.db", format="sqlite")
|
|
180
|
+
sutra.export_db("schema.sql", format="sql")
|
|
181
|
+
sutra.save_to_mysql("localhost", "root", "pass", "db")
|
|
182
|
+
sutra.save_to_postgres("localhost", "postgres", "pass", "db")
|
|
183
|
+
sutra.backup("./backups")
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## How It Works
|
|
187
|
+
|
|
188
|
+
### Entity Extraction Example
|
|
189
|
+
|
|
190
|
+
**Input PDF:**
|
|
191
|
+
```
|
|
192
|
+
John Doe lives at 123 Main St, Dallas. Email: john@company.com.
|
|
193
|
+
Sarah Smith lives at 456 Oak Ave, Boston. Email: sarah@company.com.
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
**Output Tables:**
|
|
197
|
+
|
|
198
|
+
**people**
|
|
199
|
+
| id | name | address | city | email |
|
|
200
|
+
|----|------|---------|------|-------|
|
|
201
|
+
| 1 | John Doe | 123 Main St | Dallas | john@company.com |
|
|
202
|
+
| 2 | Sarah Smith | 456 Oak Ave | Boston | sarah@company.com |
|
|
203
|
+
|
|
204
|
+
### Embeddings for Smart Caching
|
|
205
|
+
|
|
206
|
+
Uses `all-MiniLM-L6-v2` model (80MB, runs locally):
|
|
207
|
+
- Query 1: "Show sales" → API call
|
|
208
|
+
- Query 2: "Display sales" → 92% similar → Cached (no API call)
|
|
209
|
+
|
|
210
|
+
### Fuzzy Matching
|
|
211
|
+
|
|
212
|
+
- Query: "New York City"
|
|
213
|
+
- Database: ["New York", "Dallas", "Boston"]
|
|
214
|
+
- Match: "New York City" → "New York" (85% similar)
|
|
215
|
+
|
|
216
|
+
## API Reference
|
|
217
|
+
|
|
218
|
+
### Class Methods
|
|
219
|
+
|
|
220
|
+
`SUTRA.load_from_db(db_path, api_key, **kwargs)` - Load existing SQLite database
|
|
221
|
+
|
|
222
|
+
`SUTRA.connect_mysql(host, user, password, database, ...)` - Connect to MySQL
|
|
223
|
+
|
|
224
|
+
`SUTRA.connect_postgres(host, user, password, database, ...)` - Connect to PostgreSQL
|
|
225
|
+
|
|
226
|
+
### Instance Methods
|
|
227
|
+
|
|
228
|
+
`upload(data, name=None)` - Upload data
|
|
229
|
+
|
|
230
|
+
`ask(question, viz=False, table=None)` - Natural language query
|
|
231
|
+
|
|
232
|
+
`sql(query, viz=False)` - Raw SQL query
|
|
233
|
+
|
|
234
|
+
`tables()` - List all tables
|
|
235
|
+
|
|
236
|
+
`schema(table=None)` - Show schema
|
|
237
|
+
|
|
238
|
+
`peek(table=None, n=5)` - Preview data
|
|
239
|
+
|
|
240
|
+
`export_db(path, format)` - Export database
|
|
241
|
+
|
|
242
|
+
`save_to_mysql(...)` - Export to MySQL
|
|
243
|
+
|
|
244
|
+
`save_to_postgres(...)` - Export to PostgreSQL
|
|
245
|
+
|
|
246
|
+
`backup(path=None)` - Create backup
|
|
247
|
+
|
|
248
|
+
`close()` - Close connection
|
|
249
|
+
|
|
250
|
+
## Performance Tips
|
|
251
|
+
|
|
252
|
+
1. Use `load_from_db()` to avoid re-uploading
|
|
253
|
+
2. Use `sql()` for complex queries (no API cost)
|
|
254
|
+
3. Enable `use_embeddings=True` for caching
|
|
255
|
+
4. Enable `cache_queries=True` for exact matches
|
|
256
|
+
|
|
257
|
+
## Troubleshooting
|
|
258
|
+
|
|
259
|
+
**No API key error:** `sutra = SUTRA(api_key="sk-...")`
|
|
260
|
+
|
|
261
|
+
**PDF fails:** `pip install PyPDF2`
|
|
262
|
+
|
|
263
|
+
**MySQL error:** `pip install QuerySUTRA[mysql]`
|
|
264
|
+
|
|
265
|
+
**Embeddings error:** `pip install QuerySUTRA[embeddings]`
|
|
266
|
+
|
|
267
|
+
## Requirements
|
|
268
|
+
|
|
269
|
+
- Python 3.8+
|
|
270
|
+
- OpenAI API key
|
|
271
|
+
- 100MB disk space (if using embeddings)
|
|
272
|
+
|
|
273
|
+
## License
|
|
274
|
+
|
|
275
|
+
MIT License
|
|
276
|
+
|
|
277
|
+
## Changelog
|
|
278
|
+
|
|
279
|
+
### v0.3.1
|
|
280
|
+
- Semantic embeddings for smart caching
|
|
281
|
+
- Fuzzy matching for better NLP
|
|
282
|
+
- Irrelevant query detection
|
|
283
|
+
- Load existing databases
|
|
284
|
+
- MySQL/PostgreSQL connectivity
|
|
285
|
+
- Custom visualizations
|
|
286
|
+
- All features optional
|
|
287
|
+
|
|
288
|
+
---
|
|
289
|
+
|
|
290
|
+
**Made by Aditya Batta**
|