QuerySUTRA 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- querysutra-0.3.2.dist-info/METADATA +290 -0
- {querysutra-0.3.0.dist-info → querysutra-0.3.2.dist-info}/RECORD +6 -6
- sutra/__init__.py +2 -2
- querysutra-0.3.0.dist-info/METADATA +0 -429
- {querysutra-0.3.0.dist-info → querysutra-0.3.2.dist-info}/WHEEL +0 -0
- {querysutra-0.3.0.dist-info → querysutra-0.3.2.dist-info}/licenses/LICENSE +0 -0
- {querysutra-0.3.0.dist-info → querysutra-0.3.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: QuerySUTRA
|
|
3
|
+
Version: 0.3.2
|
|
4
|
+
Summary: SUTRA: Structured-Unstructured-Text-Retrieval-Architecture - AI-powered data analysis with custom visualizations, fuzzy matching, and smart caching
|
|
5
|
+
Home-page: https://github.com/yourusername/querysutra
|
|
6
|
+
Author: Aditya Batta
|
|
7
|
+
Author-email:
|
|
8
|
+
License: MIT
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Topic :: Database
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Requires-Python: >=3.8
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: pandas>=1.3.0
|
|
18
|
+
Requires-Dist: numpy>=1.21.0
|
|
19
|
+
Requires-Dist: openai>=1.0.0
|
|
20
|
+
Requires-Dist: plotly>=5.0.0
|
|
21
|
+
Requires-Dist: matplotlib>=3.3.0
|
|
22
|
+
Requires-Dist: PyPDF2>=3.0.0
|
|
23
|
+
Requires-Dist: python-docx>=0.8.11
|
|
24
|
+
Requires-Dist: openpyxl>=3.0.0
|
|
25
|
+
Provides-Extra: mysql
|
|
26
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "mysql"
|
|
27
|
+
Requires-Dist: mysql-connector-python>=8.0.0; extra == "mysql"
|
|
28
|
+
Provides-Extra: postgres
|
|
29
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "postgres"
|
|
30
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
|
|
31
|
+
Provides-Extra: embeddings
|
|
32
|
+
Requires-Dist: sentence-transformers>=2.0.0; extra == "embeddings"
|
|
33
|
+
Provides-Extra: all
|
|
34
|
+
Requires-Dist: sqlalchemy>=1.4.0; extra == "all"
|
|
35
|
+
Requires-Dist: mysql-connector-python>=8.0.0; extra == "all"
|
|
36
|
+
Requires-Dist: psycopg2-binary>=2.9.0; extra == "all"
|
|
37
|
+
Requires-Dist: sentence-transformers>=2.0.0; extra == "all"
|
|
38
|
+
Dynamic: home-page
|
|
39
|
+
Dynamic: license-file
|
|
40
|
+
Dynamic: requires-python
|
|
41
|
+
|
|
42
|
+
# QuerySUTRA
|
|
43
|
+
|
|
44
|
+
**SUTRA: Structured-Unstructured-Text-Retrieval-Architecture**
|
|
45
|
+
|
|
46
|
+
Professional Python library for AI-powered data analysis with automatic entity extraction, natural language querying, and intelligent caching.
|
|
47
|
+
|
|
48
|
+
## Installation
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install QuerySUTRA
|
|
52
|
+
|
|
53
|
+
# Optional features
|
|
54
|
+
pip install QuerySUTRA[embeddings] # Smart caching
|
|
55
|
+
pip install QuerySUTRA[mysql] # MySQL support
|
|
56
|
+
pip install QuerySUTRA[postgres] # PostgreSQL support
|
|
57
|
+
pip install QuerySUTRA[all] # All features
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Key Features
|
|
61
|
+
|
|
62
|
+
### 1. Automatic Multi-Table Creation
|
|
63
|
+
Upload PDFs, Word documents, or text files and automatically extract structured entities.
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from sutra import SUTRA
|
|
67
|
+
|
|
68
|
+
sutra = SUTRA(api_key="your-openai-key")
|
|
69
|
+
sutra.upload("employee_data.pdf")
|
|
70
|
+
|
|
71
|
+
# Automatically creates:
|
|
72
|
+
# - employee_data_people (20 rows, 6 columns)
|
|
73
|
+
# - employee_data_contacts (20 rows, 4 columns)
|
|
74
|
+
# - employee_data_events (15 rows, 4 columns)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 2. Natural Language Querying
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
result = sutra.ask("Show me all people from New York")
|
|
81
|
+
print(result.data)
|
|
82
|
+
|
|
83
|
+
# With visualization
|
|
84
|
+
result = sutra.ask("Show sales by region", viz="pie")
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### 3. Load Existing Databases
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
# Load SQLite database
|
|
91
|
+
sutra = SUTRA.load_from_db("sutra.db", api_key="your-key")
|
|
92
|
+
|
|
93
|
+
# Connect to MySQL
|
|
94
|
+
sutra = SUTRA.connect_mysql("localhost", "root", "password", "database")
|
|
95
|
+
|
|
96
|
+
# Connect to PostgreSQL
|
|
97
|
+
sutra = SUTRA.connect_postgres("localhost", "postgres", "password", "database")
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### 4. Custom Visualizations
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
result = sutra.ask("Sales by region", viz="pie") # Pie chart
|
|
104
|
+
result = sutra.ask("Trends", viz="line") # Line chart
|
|
105
|
+
result = sutra.ask("Compare", viz="bar") # Bar chart
|
|
106
|
+
result = sutra.ask("Correlation", viz="scatter") # Scatter plot
|
|
107
|
+
result = sutra.ask("Data", viz="table") # Table view
|
|
108
|
+
result = sutra.ask("Analysis", viz="heatmap") # Heatmap
|
|
109
|
+
result = sutra.ask("Auto", viz=True) # Auto-detect
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### 5. Smart Fuzzy Matching
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
sutra = SUTRA(api_key="your-key", fuzzy_match=True)
|
|
116
|
+
|
|
117
|
+
# "New York City" matches "New York" automatically
|
|
118
|
+
result = sutra.ask("Who are from New York City?")
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### 6. Intelligent Caching with Embeddings
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
sutra = SUTRA(api_key="your-key", use_embeddings=True)
|
|
125
|
+
|
|
126
|
+
result = sutra.ask("Show sales") # Calls API
|
|
127
|
+
result = sutra.ask("Display sales data") # Uses cache (no API call)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### 7. Irrelevant Query Detection
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
sutra = SUTRA(api_key="your-key", check_relevance=True)
|
|
134
|
+
|
|
135
|
+
result = sutra.ask("What is the weather?")
|
|
136
|
+
# Warns: "This question seems irrelevant to your database"
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### 8. Direct SQL Access (Free)
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
result = sutra.sql("SELECT * FROM people WHERE city='New York'")
|
|
143
|
+
print(result.data)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Complete Configuration
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
sutra = SUTRA(
|
|
150
|
+
api_key="your-openai-key",
|
|
151
|
+
db="database.db", # SQLite path
|
|
152
|
+
use_embeddings=True, # Smart caching (saves API calls)
|
|
153
|
+
check_relevance=True, # Detect irrelevant queries
|
|
154
|
+
fuzzy_match=True, # Better NLP
|
|
155
|
+
cache_queries=True # Simple caching
|
|
156
|
+
)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Supported Formats
|
|
160
|
+
|
|
161
|
+
CSV, Excel, JSON, SQL, PDF, Word, Text, Pandas DataFrame
|
|
162
|
+
|
|
163
|
+
## Usage Examples
|
|
164
|
+
|
|
165
|
+
### Basic Workflow
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
sutra = SUTRA(api_key="your-key")
|
|
169
|
+
sutra.upload("data.pdf")
|
|
170
|
+
sutra.tables() # View tables
|
|
171
|
+
sutra.schema() # View schema
|
|
172
|
+
sutra.peek("table_name", n=10) # Preview data
|
|
173
|
+
result = sutra.ask("Your question?")
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Database Export
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
sutra.export_db("backup.db", format="sqlite")
|
|
180
|
+
sutra.export_db("schema.sql", format="sql")
|
|
181
|
+
sutra.save_to_mysql("localhost", "root", "pass", "db")
|
|
182
|
+
sutra.save_to_postgres("localhost", "postgres", "pass", "db")
|
|
183
|
+
sutra.backup("./backups")
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## How It Works
|
|
187
|
+
|
|
188
|
+
### Entity Extraction Example
|
|
189
|
+
|
|
190
|
+
**Input PDF:**
|
|
191
|
+
```
|
|
192
|
+
John Doe lives at 123 Main St, Dallas. Email: john@company.com.
|
|
193
|
+
Sarah Smith lives at 456 Oak Ave, Boston. Email: sarah@company.com.
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
**Output Tables:**
|
|
197
|
+
|
|
198
|
+
**people**
|
|
199
|
+
| id | name | address | city | email |
|
|
200
|
+
|----|------|---------|------|-------|
|
|
201
|
+
| 1 | John Doe | 123 Main St | Dallas | john@company.com |
|
|
202
|
+
| 2 | Sarah Smith | 456 Oak Ave | Boston | sarah@company.com |
|
|
203
|
+
|
|
204
|
+
### Embeddings for Smart Caching
|
|
205
|
+
|
|
206
|
+
Uses `all-MiniLM-L6-v2` model (80MB, runs locally):
|
|
207
|
+
- Query 1: "Show sales" → API call
|
|
208
|
+
- Query 2: "Display sales" → 92% similar → Cached (no API call)
|
|
209
|
+
|
|
210
|
+
### Fuzzy Matching
|
|
211
|
+
|
|
212
|
+
- Query: "New York City"
|
|
213
|
+
- Database: ["New York", "Dallas", "Boston"]
|
|
214
|
+
- Match: "New York City" → "New York" (85% similar)
|
|
215
|
+
|
|
216
|
+
## API Reference
|
|
217
|
+
|
|
218
|
+
### Class Methods
|
|
219
|
+
|
|
220
|
+
`SUTRA.load_from_db(db_path, api_key, **kwargs)` - Load existing SQLite database
|
|
221
|
+
|
|
222
|
+
`SUTRA.connect_mysql(host, user, password, database, ...)` - Connect to MySQL
|
|
223
|
+
|
|
224
|
+
`SUTRA.connect_postgres(host, user, password, database, ...)` - Connect to PostgreSQL
|
|
225
|
+
|
|
226
|
+
### Instance Methods
|
|
227
|
+
|
|
228
|
+
`upload(data, name=None)` - Upload data
|
|
229
|
+
|
|
230
|
+
`ask(question, viz=False, table=None)` - Natural language query
|
|
231
|
+
|
|
232
|
+
`sql(query, viz=False)` - Raw SQL query
|
|
233
|
+
|
|
234
|
+
`tables()` - List all tables
|
|
235
|
+
|
|
236
|
+
`schema(table=None)` - Show schema
|
|
237
|
+
|
|
238
|
+
`peek(table=None, n=5)` - Preview data
|
|
239
|
+
|
|
240
|
+
`export_db(path, format)` - Export database
|
|
241
|
+
|
|
242
|
+
`save_to_mysql(...)` - Export to MySQL
|
|
243
|
+
|
|
244
|
+
`save_to_postgres(...)` - Export to PostgreSQL
|
|
245
|
+
|
|
246
|
+
`backup(path=None)` - Create backup
|
|
247
|
+
|
|
248
|
+
`close()` - Close connection
|
|
249
|
+
|
|
250
|
+
## Performance Tips
|
|
251
|
+
|
|
252
|
+
1. Use `load_from_db()` to avoid re-uploading
|
|
253
|
+
2. Use `sql()` for complex queries (no API cost)
|
|
254
|
+
3. Enable `use_embeddings=True` for caching
|
|
255
|
+
4. Enable `cache_queries=True` for exact matches
|
|
256
|
+
|
|
257
|
+
## Troubleshooting
|
|
258
|
+
|
|
259
|
+
**No API key error:** `sutra = SUTRA(api_key="sk-...")`
|
|
260
|
+
|
|
261
|
+
**PDF fails:** `pip install PyPDF2`
|
|
262
|
+
|
|
263
|
+
**MySQL error:** `pip install QuerySUTRA[mysql]`
|
|
264
|
+
|
|
265
|
+
**Embeddings error:** `pip install QuerySUTRA[embeddings]`
|
|
266
|
+
|
|
267
|
+
## Requirements
|
|
268
|
+
|
|
269
|
+
- Python 3.8+
|
|
270
|
+
- OpenAI API key
|
|
271
|
+
- 100MB disk space (if using embeddings)
|
|
272
|
+
|
|
273
|
+
## License
|
|
274
|
+
|
|
275
|
+
MIT License
|
|
276
|
+
|
|
277
|
+
## Changelog
|
|
278
|
+
|
|
279
|
+
### v0.3.1
|
|
280
|
+
- Semantic embeddings for smart caching
|
|
281
|
+
- Fuzzy matching for better NLP
|
|
282
|
+
- Irrelevant query detection
|
|
283
|
+
- Load existing databases
|
|
284
|
+
- MySQL/PostgreSQL connectivity
|
|
285
|
+
- Custom visualizations
|
|
286
|
+
- All features optional
|
|
287
|
+
|
|
288
|
+
---
|
|
289
|
+
|
|
290
|
+
**Made by Aditya Batta**
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
querysutra-0.3.
|
|
2
|
-
sutra/__init__.py,sha256=
|
|
1
|
+
querysutra-0.3.2.dist-info/licenses/LICENSE,sha256=F-4b93u0OVrVwGXgMwBRq6MlGyUT9zmre1oh5Gft5Ts,1066
|
|
2
|
+
sutra/__init__.py,sha256=FmZQ12IUfRXNZJVonPE3-hMPejB4bGdqPJXK8feKFts,318
|
|
3
3
|
sutra/cache_manager.py,sha256=e0AAeUqoR-aiqzZ3fB-IDvpJ4JA6-YBFyRJxusEnIrA,3082
|
|
4
4
|
sutra/clear_cache.py,sha256=rVIz29p7V11Uh6oHXeaWpFtYXXv-2OED91cHMAWWxtQ,187
|
|
5
5
|
sutra/core.py,sha256=R_JbOlZTukegP92Dr-WLsdr632_otFN7o9qSvcxyBtw,10497
|
|
@@ -22,7 +22,7 @@ tests/test_sutra.py,sha256=6Z4SoIuBzza101304I7plkyPVkUBbjIxR8uPs9z5ntg,2383
|
|
|
22
22
|
utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
23
|
utils/file_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
24
|
utils/text_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
-
querysutra-0.3.
|
|
26
|
-
querysutra-0.3.
|
|
27
|
-
querysutra-0.3.
|
|
28
|
-
querysutra-0.3.
|
|
25
|
+
querysutra-0.3.2.dist-info/METADATA,sha256=jr9L8cCDlHeu2Cf58b6uVJ2yJTs7mlAZ9wMkpXDWknE,7947
|
|
26
|
+
querysutra-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
27
|
+
querysutra-0.3.2.dist-info/top_level.txt,sha256=9v0buw21eo5LaUU_3Cf9b9MqRyEvtM9cHaOuEXUKVqM,18
|
|
28
|
+
querysutra-0.3.2.dist-info/RECORD,,
|
sutra/__init__.py
CHANGED
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
QuerySUTRA - Structured-Unstructured-Text-Retrieval-Architecture
|
|
3
3
|
Creates multiple structured tables from ANY data with AI
|
|
4
4
|
|
|
5
|
-
v0.3.
|
|
5
|
+
v0.3.2 - Professional release with comprehensive README
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
__version__ = "0.3.
|
|
8
|
+
__version__ = "0.3.2"
|
|
9
9
|
|
|
10
10
|
from sutra.sutra import SUTRA, QueryResult, quick_start
|
|
11
11
|
|
|
@@ -1,429 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: QuerySUTRA
|
|
3
|
-
Version: 0.3.0
|
|
4
|
-
Summary: SUTRA: Structured-Unstructured-Text-Retrieval-Architecture - AI-powered data analysis with custom visualizations, fuzzy matching, and smart caching
|
|
5
|
-
Home-page: https://github.com/yourusername/querysutra
|
|
6
|
-
Author: Aditya Batta
|
|
7
|
-
Author-email:
|
|
8
|
-
License: MIT
|
|
9
|
-
Classifier: Development Status :: 4 - Beta
|
|
10
|
-
Classifier: Intended Audience :: Developers
|
|
11
|
-
Classifier: Topic :: Database
|
|
12
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
-
Classifier: Programming Language :: Python :: 3
|
|
14
|
-
Requires-Python: >=3.8
|
|
15
|
-
Description-Content-Type: text/markdown
|
|
16
|
-
License-File: LICENSE
|
|
17
|
-
Requires-Dist: pandas>=1.3.0
|
|
18
|
-
Requires-Dist: numpy>=1.21.0
|
|
19
|
-
Requires-Dist: openai>=1.0.0
|
|
20
|
-
Requires-Dist: plotly>=5.0.0
|
|
21
|
-
Requires-Dist: matplotlib>=3.3.0
|
|
22
|
-
Requires-Dist: PyPDF2>=3.0.0
|
|
23
|
-
Requires-Dist: python-docx>=0.8.11
|
|
24
|
-
Requires-Dist: openpyxl>=3.0.0
|
|
25
|
-
Provides-Extra: mysql
|
|
26
|
-
Requires-Dist: sqlalchemy>=1.4.0; extra == "mysql"
|
|
27
|
-
Requires-Dist: mysql-connector-python>=8.0.0; extra == "mysql"
|
|
28
|
-
Provides-Extra: postgres
|
|
29
|
-
Requires-Dist: sqlalchemy>=1.4.0; extra == "postgres"
|
|
30
|
-
Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
|
|
31
|
-
Provides-Extra: embeddings
|
|
32
|
-
Requires-Dist: sentence-transformers>=2.0.0; extra == "embeddings"
|
|
33
|
-
Provides-Extra: all
|
|
34
|
-
Requires-Dist: sqlalchemy>=1.4.0; extra == "all"
|
|
35
|
-
Requires-Dist: mysql-connector-python>=8.0.0; extra == "all"
|
|
36
|
-
Requires-Dist: psycopg2-binary>=2.9.0; extra == "all"
|
|
37
|
-
Requires-Dist: sentence-transformers>=2.0.0; extra == "all"
|
|
38
|
-
Dynamic: home-page
|
|
39
|
-
Dynamic: license-file
|
|
40
|
-
Dynamic: requires-python
|
|
41
|
-
|
|
42
|
-
# QuerySUTRA
|
|
43
|
-
|
|
44
|
-
**SUTRA: Structured-Unstructured-Text-Retrieval-Architecture**
|
|
45
|
-
|
|
46
|
-
Transform any data into structured, queryable databases with AI-powered entity extraction.
|
|
47
|
-
|
|
48
|
-
## 🎯 Key Features
|
|
49
|
-
|
|
50
|
-
✅ **Multi-Table Creation** - Automatically extracts entities and creates multiple related tables
|
|
51
|
-
✅ **Smart Entity Extraction** - Identifies people, contacts, events, organizations from unstructured data
|
|
52
|
-
✅ **Natural Language Queries** - Ask questions in plain English
|
|
53
|
-
✅ **Multiple Data Formats** - CSV, Excel, JSON, PDF, DOCX, TXT, SQL, DataFrames
|
|
54
|
-
✅ **Direct SQL Access** - Query without API costs
|
|
55
|
-
✅ **Auto Visualization** - Built-in charts and graphs
|
|
56
|
-
✅ **Cloud Export** - Save to MySQL, PostgreSQL, or local SQLite
|
|
57
|
-
|
|
58
|
-
## 📦 Installation
|
|
59
|
-
|
|
60
|
-
```bash
|
|
61
|
-
pip install QuerySUTRA
|
|
62
|
-
|
|
63
|
-
# With MySQL support
|
|
64
|
-
pip install QuerySUTRA[mysql]
|
|
65
|
-
|
|
66
|
-
# With PostgreSQL support
|
|
67
|
-
pip install QuerySUTRA[postgres]
|
|
68
|
-
|
|
69
|
-
# With all database support
|
|
70
|
-
pip install QuerySUTRA[all]
|
|
71
|
-
```
|
|
72
|
-
|
|
73
|
-
## 🚀 Quick Start
|
|
74
|
-
|
|
75
|
-
```python
|
|
76
|
-
from sutra import SUTRA
|
|
77
|
-
|
|
78
|
-
# Initialize
|
|
79
|
-
sutra = SUTRA(api_key="your-openai-key")
|
|
80
|
-
|
|
81
|
-
# Upload any data - AI creates multiple structured tables!
|
|
82
|
-
sutra.upload("employee_story.pdf")
|
|
83
|
-
|
|
84
|
-
# View all created tables
|
|
85
|
-
sutra.tables()
|
|
86
|
-
# Output:
|
|
87
|
-
# 📋 TABLES IN DATABASE
|
|
88
|
-
# 1. employee_story_people (20 rows, 6 columns)
|
|
89
|
-
# Columns: id, name, address, city, email, phone
|
|
90
|
-
# 2. employee_story_contacts (20 rows, 4 columns)
|
|
91
|
-
# Columns: id, person_id, email, phone
|
|
92
|
-
# 3. employee_story_events (15 rows, 4 columns)
|
|
93
|
-
# Columns: id, host_id, description, city
|
|
94
|
-
|
|
95
|
-
# View detailed schema
|
|
96
|
-
sutra.schema()
|
|
97
|
-
|
|
98
|
-
# Query with natural language
|
|
99
|
-
result = sutra.ask("Show all people from New York")
|
|
100
|
-
print(result.data)
|
|
101
|
-
|
|
102
|
-
# With visualization
|
|
103
|
-
result = sutra.ask("Show events by city", viz=True)
|
|
104
|
-
|
|
105
|
-
# Direct SQL (no API cost!)
|
|
106
|
-
result = sutra.sql("SELECT * FROM employee_story_people WHERE city='Dallas'")
|
|
107
|
-
print(result.data)
|
|
108
|
-
```
|
|
109
|
-
|
|
110
|
-
## 📊 How It Works
|
|
111
|
-
|
|
112
|
-
### From Unstructured PDF to Structured Tables
|
|
113
|
-
|
|
114
|
-
**Input:** PDF with employee information
|
|
115
|
-
|
|
116
|
-
**AI Automatically Creates:**
|
|
117
|
-
```
|
|
118
|
-
📋 Created 3 structured tables:
|
|
119
|
-
📊 employee_story_people: 20 rows, 6 columns
|
|
120
|
-
- id, name, address, city, email, phone
|
|
121
|
-
📊 employee_story_contacts: 20 rows, 4 columns
|
|
122
|
-
- id, person_id, email, phone
|
|
123
|
-
📊 employee_story_events: 15 rows, 4 columns
|
|
124
|
-
- id, host_id, description, city
|
|
125
|
-
```
|
|
126
|
-
|
|
127
|
-
## 💡 Usage Examples
|
|
128
|
-
|
|
129
|
-
### 1. Upload Different Formats
|
|
130
|
-
|
|
131
|
-
```python
|
|
132
|
-
# CSV file
|
|
133
|
-
sutra.upload("sales_data.csv")
|
|
134
|
-
|
|
135
|
-
# Excel file
|
|
136
|
-
sutra.upload("quarterly_report.xlsx")
|
|
137
|
-
|
|
138
|
-
# PDF document (AI extracts entities!)
|
|
139
|
-
sutra.upload("company_directory.pdf")
|
|
140
|
-
|
|
141
|
-
# Word document
|
|
142
|
-
sutra.upload("meeting_notes.docx")
|
|
143
|
-
|
|
144
|
-
# Text file
|
|
145
|
-
sutra.upload("log_data.txt")
|
|
146
|
-
|
|
147
|
-
# DataFrame
|
|
148
|
-
import pandas as pd
|
|
149
|
-
df = pd.DataFrame({'name': ['Alice', 'Bob'], 'score': [95, 87]})
|
|
150
|
-
sutra.upload(df, name="test_scores")
|
|
151
|
-
```
|
|
152
|
-
|
|
153
|
-
### 2. View Your Data
|
|
154
|
-
|
|
155
|
-
```python
|
|
156
|
-
# List all tables with details
|
|
157
|
-
sutra.tables()
|
|
158
|
-
|
|
159
|
-
# Show schema with data types
|
|
160
|
-
sutra.schema()
|
|
161
|
-
|
|
162
|
-
# Show schema for specific table
|
|
163
|
-
sutra.schema("employee_story_people")
|
|
164
|
-
|
|
165
|
-
# Preview data
|
|
166
|
-
sutra.peek("employee_story_people", n=10)
|
|
167
|
-
```
|
|
168
|
-
|
|
169
|
-
### 3. Query Your Data
|
|
170
|
-
|
|
171
|
-
```python
|
|
172
|
-
# Natural language (uses OpenAI)
|
|
173
|
-
result = sutra.ask("What are the top 5 sales by region?")
|
|
174
|
-
print(result.data)
|
|
175
|
-
|
|
176
|
-
# With visualization
|
|
177
|
-
result = sutra.ask("Show sales trends by month", viz=True)
|
|
178
|
-
|
|
179
|
-
# Interactive mode (asks if you want viz)
|
|
180
|
-
result = sutra.interactive("Compare revenue across quarters")
|
|
181
|
-
|
|
182
|
-
# Direct SQL (free, no API!)
|
|
183
|
-
result = sutra.sql("SELECT city, COUNT(*) as count FROM employee_story_people GROUP BY city")
|
|
184
|
-
print(result.data)
|
|
185
|
-
```
|
|
186
|
-
|
|
187
|
-
### 4. Export Your Database
|
|
188
|
-
|
|
189
|
-
```python
|
|
190
|
-
# Export to MySQL (local or cloud)
|
|
191
|
-
sutra.save_to_mysql(
|
|
192
|
-
host="localhost",
|
|
193
|
-
user="root",
|
|
194
|
-
password="password",
|
|
195
|
-
database="my_database"
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
# Export to PostgreSQL
|
|
199
|
-
sutra.save_to_postgres(
|
|
200
|
-
host="mydb.amazonaws.com",
|
|
201
|
-
user="admin",
|
|
202
|
-
password="password",
|
|
203
|
-
database="production_db"
|
|
204
|
-
)
|
|
205
|
-
|
|
206
|
-
# Export to SQLite file
|
|
207
|
-
sutra.export_db("backup.db", format="sqlite")
|
|
208
|
-
|
|
209
|
-
# Export to SQL dump
|
|
210
|
-
sutra.export_db("schema.sql", format="sql")
|
|
211
|
-
|
|
212
|
-
# Export to JSON
|
|
213
|
-
sutra.export_db("data.json", format="json")
|
|
214
|
-
|
|
215
|
-
# Export to Excel (all tables as sheets)
|
|
216
|
-
sutra.export_db("data.xlsx", format="excel")
|
|
217
|
-
|
|
218
|
-
# Complete backup
|
|
219
|
-
sutra.backup("./backups")
|
|
220
|
-
```
|
|
221
|
-
|
|
222
|
-
## 🔥 Advanced Features
|
|
223
|
-
|
|
224
|
-
### Entity Extraction
|
|
225
|
-
|
|
226
|
-
QuerySUTRA automatically identifies and extracts:
|
|
227
|
-
|
|
228
|
-
- 👥 **People** - Names, addresses, contact info
|
|
229
|
-
- 📧 **Contacts** - Emails, phone numbers
|
|
230
|
-
- 📅 **Events** - Meetings, activities, locations
|
|
231
|
-
- 🏢 **Organizations** - Companies, departments
|
|
232
|
-
- 📍 **Locations** - Cities, addresses, coordinates
|
|
233
|
-
|
|
234
|
-
### Multiple Table Relationships
|
|
235
|
-
|
|
236
|
-
```python
|
|
237
|
-
# AI creates relational structure
|
|
238
|
-
sutra.upload("company_data.pdf")
|
|
239
|
-
|
|
240
|
-
# Result:
|
|
241
|
-
# people table with person_id
|
|
242
|
-
# contacts table with foreign key to person_id
|
|
243
|
-
# events table with host_id linking to people
|
|
244
|
-
```
|
|
245
|
-
|
|
246
|
-
### Query Across Tables
|
|
247
|
-
|
|
248
|
-
```python
|
|
249
|
-
# Natural language handles joins automatically
|
|
250
|
-
result = sutra.ask("Show all events hosted by people from Dallas")
|
|
251
|
-
|
|
252
|
-
# Or write SQL joins manually
|
|
253
|
-
result = sutra.sql("""
|
|
254
|
-
SELECT e.description, p.name, p.city
|
|
255
|
-
FROM employee_story_events e
|
|
256
|
-
JOIN employee_story_people p ON e.host_id = p.id
|
|
257
|
-
WHERE p.city = 'Dallas'
|
|
258
|
-
""")
|
|
259
|
-
```
|
|
260
|
-
|
|
261
|
-
## 📈 Visualization
|
|
262
|
-
|
|
263
|
-
```python
|
|
264
|
-
# Auto-detect best chart type
|
|
265
|
-
result = sutra.ask("Show revenue by product", viz=True)
|
|
266
|
-
|
|
267
|
-
# Interactive charts with Plotly
|
|
268
|
-
# - Bar charts for categorical data
|
|
269
|
-
# - Line charts for time series
|
|
270
|
-
# - Tables for detailed data
|
|
271
|
-
# - Pie charts for distributions
|
|
272
|
-
```
|
|
273
|
-
|
|
274
|
-
## 🌐 Cloud Database Integration
|
|
275
|
-
|
|
276
|
-
### AWS RDS MySQL
|
|
277
|
-
```python
|
|
278
|
-
sutra.save_to_mysql(
|
|
279
|
-
host="mydb.xxxx.us-east-1.rds.amazonaws.com",
|
|
280
|
-
user="admin",
|
|
281
|
-
password="password",
|
|
282
|
-
database="production",
|
|
283
|
-
port=3306
|
|
284
|
-
)
|
|
285
|
-
```
|
|
286
|
-
|
|
287
|
-
### Google Cloud SQL
|
|
288
|
-
```python
|
|
289
|
-
sutra.save_to_postgres(
|
|
290
|
-
host="35.123.456.789",
|
|
291
|
-
user="postgres",
|
|
292
|
-
password="password",
|
|
293
|
-
database="analytics"
|
|
294
|
-
)
|
|
295
|
-
```
|
|
296
|
-
|
|
297
|
-
### Heroku Postgres
|
|
298
|
-
```python
|
|
299
|
-
sutra.save_to_postgres(
|
|
300
|
-
host="ec2-xx-xxx-xxx-xxx.compute-1.amazonaws.com",
|
|
301
|
-
user="username",
|
|
302
|
-
password="password",
|
|
303
|
-
database="dbname",
|
|
304
|
-
port=5432
|
|
305
|
-
)
|
|
306
|
-
```
|
|
307
|
-
|
|
308
|
-
## ⚡ Performance Tips
|
|
309
|
-
|
|
310
|
-
```python
|
|
311
|
-
# Use direct SQL for complex queries (faster, no API cost)
|
|
312
|
-
result = sutra.sql("SELECT * FROM data WHERE status='active'")
|
|
313
|
-
|
|
314
|
-
# Cache is automatic for repeated questions
|
|
315
|
-
result1 = sutra.ask("Show total sales") # Calls API
|
|
316
|
-
result2 = sutra.ask("Show total sales") # From cache ⚡
|
|
317
|
-
|
|
318
|
-
# Export results for reuse
|
|
319
|
-
result.data.to_csv("results.csv")
|
|
320
|
-
```
|
|
321
|
-
|
|
322
|
-
## 🔒 API Key Security
|
|
323
|
-
|
|
324
|
-
```python
|
|
325
|
-
# Option 1: Pass directly (not recommended for production)
|
|
326
|
-
sutra = SUTRA(api_key="sk-...")
|
|
327
|
-
|
|
328
|
-
# Option 2: Environment variable (recommended)
|
|
329
|
-
import os
|
|
330
|
-
os.environ["OPENAI_API_KEY"] = "sk-..."
|
|
331
|
-
sutra = SUTRA()
|
|
332
|
-
|
|
333
|
-
# Option 3: .env file
|
|
334
|
-
# Create .env file with: OPENAI_API_KEY=sk-...
|
|
335
|
-
from dotenv import load_dotenv
|
|
336
|
-
load_dotenv()
|
|
337
|
-
sutra = SUTRA()
|
|
338
|
-
```
|
|
339
|
-
|
|
340
|
-
## 🎓 Complete Example
|
|
341
|
-
|
|
342
|
-
```python
|
|
343
|
-
from sutra import SUTRA
|
|
344
|
-
import pandas as pd
|
|
345
|
-
|
|
346
|
-
# Initialize
|
|
347
|
-
sutra = SUTRA(api_key="your-openai-key")
|
|
348
|
-
|
|
349
|
-
# Upload PDF - creates multiple tables
|
|
350
|
-
sutra.upload("employee_directory.pdf")
|
|
351
|
-
|
|
352
|
-
# View what was created
|
|
353
|
-
tables_info = sutra.tables()
|
|
354
|
-
print(f"Created {len(tables_info)} tables")
|
|
355
|
-
|
|
356
|
-
# View detailed schema
|
|
357
|
-
sutra.schema()
|
|
358
|
-
|
|
359
|
-
# Query specific table
|
|
360
|
-
result = sutra.ask("How many people are in each city?",
|
|
361
|
-
table="employee_directory_people")
|
|
362
|
-
print(result.data)
|
|
363
|
-
|
|
364
|
-
# Visualize
|
|
365
|
-
result = sutra.ask("Show distribution of people by city", viz=True)
|
|
366
|
-
|
|
367
|
-
# Export to MySQL
|
|
368
|
-
sutra.save_to_mysql("localhost", "root", "password", "company_db")
|
|
369
|
-
|
|
370
|
-
# Backup everything
|
|
371
|
-
sutra.backup("./backups")
|
|
372
|
-
|
|
373
|
-
# Close connection
|
|
374
|
-
sutra.close()
|
|
375
|
-
```
|
|
376
|
-
|
|
377
|
-
## 📚 Method Reference
|
|
378
|
-
|
|
379
|
-
### Core Methods
|
|
380
|
-
|
|
381
|
-
| Method | Description |
|
|
382
|
-
|--------|-------------|
|
|
383
|
-
| `upload(data, name)` | Upload any data format, creates multiple tables |
|
|
384
|
-
| `tables()` | List all tables with row/column counts |
|
|
385
|
-
| `schema(table)` | Show detailed schema with data types |
|
|
386
|
-
| `peek(table, n)` | Preview first n rows |
|
|
387
|
-
| `ask(question, viz)` | Natural language query |
|
|
388
|
-
| `sql(query, viz)` | Direct SQL query |
|
|
389
|
-
| `interactive(question)` | Query with viz prompt |
|
|
390
|
-
|
|
391
|
-
### Export Methods
|
|
392
|
-
|
|
393
|
-
| Method | Description |
|
|
394
|
-
|--------|-------------|
|
|
395
|
-
| `export_db(path, format)` | Export database (sqlite/sql/json/excel) |
|
|
396
|
-
| `save_to_mysql(...)` | Save to MySQL database |
|
|
397
|
-
| `save_to_postgres(...)` | Save to PostgreSQL database |
|
|
398
|
-
| `backup(path)` | Complete backup with timestamp |
|
|
399
|
-
|
|
400
|
-
## 🐛 Troubleshooting
|
|
401
|
-
|
|
402
|
-
**Q: Only one table created instead of multiple?**
|
|
403
|
-
A: Make sure you have OpenAI API key set. Without it, falls back to simple parsing.
|
|
404
|
-
|
|
405
|
-
**Q: "No API key" error?**
|
|
406
|
-
A: Set your OpenAI key: `sutra = SUTRA(api_key="sk-...")`
|
|
407
|
-
|
|
408
|
-
**Q: PDF extraction failed?**
|
|
409
|
-
A: Install PyPDF2: `pip install PyPDF2`
|
|
410
|
-
|
|
411
|
-
**Q: MySQL export error?**
|
|
412
|
-
A: Install extras: `pip install QuerySUTRA[mysql]`
|
|
413
|
-
|
|
414
|
-
## 📄 License
|
|
415
|
-
|
|
416
|
-
MIT License - see LICENSE file
|
|
417
|
-
|
|
418
|
-
## 🤝 Contributing
|
|
419
|
-
|
|
420
|
-
Contributions welcome! Open an issue or submit a PR.
|
|
421
|
-
|
|
422
|
-
## 📞 Support
|
|
423
|
-
|
|
424
|
-
- Issues: [GitHub Issues](https://github.com/yourusername/querysutra/issues)
|
|
425
|
-
- Email: your@email.com
|
|
426
|
-
|
|
427
|
-
---
|
|
428
|
-
|
|
429
|
-
**Made with ❤️ by Aditya Batta**
|
|
File without changes
|
|
File without changes
|
|
File without changes
|