QuerySUTRA 0.3.1__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. querysutra-0.3.2/PKG-INFO +290 -0
  2. querysutra-0.3.2/QuerySUTRA.egg-info/PKG-INFO +290 -0
  3. querysutra-0.3.2/README.md +249 -0
  4. {querysutra-0.3.1 → querysutra-0.3.2}/pyproject.toml +1 -1
  5. {querysutra-0.3.1 → querysutra-0.3.2}/setup.py +1 -1
  6. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/__init__.py +2 -2
  7. querysutra-0.3.1/PKG-INFO +0 -429
  8. querysutra-0.3.1/QuerySUTRA.egg-info/PKG-INFO +0 -429
  9. querysutra-0.3.1/README.md +0 -388
  10. {querysutra-0.3.1 → querysutra-0.3.2}/.gitignore +0 -0
  11. {querysutra-0.3.1 → querysutra-0.3.2}/LICENSE +0 -0
  12. {querysutra-0.3.1 → querysutra-0.3.2}/MANIFEST.in +0 -0
  13. {querysutra-0.3.1 → querysutra-0.3.2}/QuerySUTRA.egg-info/SOURCES.txt +0 -0
  14. {querysutra-0.3.1 → querysutra-0.3.2}/QuerySUTRA.egg-info/dependency_links.txt +0 -0
  15. {querysutra-0.3.1 → querysutra-0.3.2}/QuerySUTRA.egg-info/requires.txt +0 -0
  16. {querysutra-0.3.1 → querysutra-0.3.2}/QuerySUTRA.egg-info/top_level.txt +0 -0
  17. {querysutra-0.3.1 → querysutra-0.3.2}/config.py +0 -0
  18. {querysutra-0.3.1 → querysutra-0.3.2}/examples/quickstart.py +0 -0
  19. {querysutra-0.3.1 → querysutra-0.3.2}/examples/sutra_usage_guide.ipynb +0 -0
  20. {querysutra-0.3.1 → querysutra-0.3.2}/examples/usage_guide.ipynb +0 -0
  21. {querysutra-0.3.1 → querysutra-0.3.2}/main.py +0 -0
  22. {querysutra-0.3.1 → querysutra-0.3.2}/requirements.txt +0 -0
  23. {querysutra-0.3.1 → querysutra-0.3.2}/setup.cfg +0 -0
  24. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/cache_manager.py +0 -0
  25. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/clear_cache.py +0 -0
  26. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/core.py +0 -0
  27. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/data_loader.py +0 -0
  28. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/database_manager.py +0 -0
  29. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/direct_query.py +0 -0
  30. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/feedback.py +0 -0
  31. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/feedback_matcher.py +0 -0
  32. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/nlp_processor.py +0 -0
  33. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/schema_embeddings.py +0 -0
  34. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/schema_generator.py +0 -0
  35. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/sutra.py +0 -0
  36. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/sutra_client.py +0 -0
  37. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/sutra_core.py +0 -0
  38. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/sutra_simple.py +0 -0
  39. {querysutra-0.3.1 → querysutra-0.3.2}/sutra/visualizer.py +0 -0
  40. {querysutra-0.3.1 → querysutra-0.3.2}/test_openapi.py +0 -0
  41. {querysutra-0.3.1 → querysutra-0.3.2}/tests/__init__.py +0 -0
  42. {querysutra-0.3.1 → querysutra-0.3.2}/tests/test_modules.py +0 -0
  43. {querysutra-0.3.1 → querysutra-0.3.2}/tests/test_sutra.py +0 -0
  44. {querysutra-0.3.1 → querysutra-0.3.2}/utils/__init__.py +0 -0
  45. {querysutra-0.3.1 → querysutra-0.3.2}/utils/file_utils.py +0 -0
  46. {querysutra-0.3.1 → querysutra-0.3.2}/utils/text_utils.py +0 -0
@@ -0,0 +1,290 @@
1
+ Metadata-Version: 2.4
2
+ Name: QuerySUTRA
3
+ Version: 0.3.2
4
+ Summary: SUTRA: Structured-Unstructured-Text-Retrieval-Architecture - AI-powered data analysis with custom visualizations, fuzzy matching, and smart caching
5
+ Home-page: https://github.com/yourusername/querysutra
6
+ Author: Aditya Batta
7
+ Author-email:
8
+ License: MIT
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Topic :: Database
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Requires-Python: >=3.8
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: pandas>=1.3.0
18
+ Requires-Dist: numpy>=1.21.0
19
+ Requires-Dist: openai>=1.0.0
20
+ Requires-Dist: plotly>=5.0.0
21
+ Requires-Dist: matplotlib>=3.3.0
22
+ Requires-Dist: PyPDF2>=3.0.0
23
+ Requires-Dist: python-docx>=0.8.11
24
+ Requires-Dist: openpyxl>=3.0.0
25
+ Provides-Extra: mysql
26
+ Requires-Dist: sqlalchemy>=1.4.0; extra == "mysql"
27
+ Requires-Dist: mysql-connector-python>=8.0.0; extra == "mysql"
28
+ Provides-Extra: postgres
29
+ Requires-Dist: sqlalchemy>=1.4.0; extra == "postgres"
30
+ Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
31
+ Provides-Extra: embeddings
32
+ Requires-Dist: sentence-transformers>=2.0.0; extra == "embeddings"
33
+ Provides-Extra: all
34
+ Requires-Dist: sqlalchemy>=1.4.0; extra == "all"
35
+ Requires-Dist: mysql-connector-python>=8.0.0; extra == "all"
36
+ Requires-Dist: psycopg2-binary>=2.9.0; extra == "all"
37
+ Requires-Dist: sentence-transformers>=2.0.0; extra == "all"
38
+ Dynamic: home-page
39
+ Dynamic: license-file
40
+ Dynamic: requires-python
41
+
42
+ # QuerySUTRA
43
+
44
+ **SUTRA: Structured-Unstructured-Text-Retrieval-Architecture**
45
+
46
+ Professional Python library for AI-powered data analysis with automatic entity extraction, natural language querying, and intelligent caching.
47
+
48
+ ## Installation
49
+
50
+ ```bash
51
+ pip install QuerySUTRA
52
+
53
+ # Optional features
54
+ pip install QuerySUTRA[embeddings] # Smart caching
55
+ pip install QuerySUTRA[mysql] # MySQL support
56
+ pip install QuerySUTRA[postgres] # PostgreSQL support
57
+ pip install QuerySUTRA[all] # All features
58
+ ```
59
+
60
+ ## Key Features
61
+
62
+ ### 1. Automatic Multi-Table Creation
63
+ Upload PDFs, Word documents, or text files and automatically extract structured entities.
64
+
65
+ ```python
66
+ from sutra import SUTRA
67
+
68
+ sutra = SUTRA(api_key="your-openai-key")
69
+ sutra.upload("employee_data.pdf")
70
+
71
+ # Automatically creates:
72
+ # - employee_data_people (20 rows, 6 columns)
73
+ # - employee_data_contacts (20 rows, 4 columns)
74
+ # - employee_data_events (15 rows, 4 columns)
75
+ ```
76
+
77
+ ### 2. Natural Language Querying
78
+
79
+ ```python
80
+ result = sutra.ask("Show me all people from New York")
81
+ print(result.data)
82
+
83
+ # With visualization
84
+ result = sutra.ask("Show sales by region", viz="pie")
85
+ ```
86
+
87
+ ### 3. Load Existing Databases
88
+
89
+ ```python
90
+ # Load SQLite database
91
+ sutra = SUTRA.load_from_db("sutra.db", api_key="your-key")
92
+
93
+ # Connect to MySQL
94
+ sutra = SUTRA.connect_mysql("localhost", "root", "password", "database")
95
+
96
+ # Connect to PostgreSQL
97
+ sutra = SUTRA.connect_postgres("localhost", "postgres", "password", "database")
98
+ ```
99
+
100
+ ### 4. Custom Visualizations
101
+
102
+ ```python
103
+ result = sutra.ask("Sales by region", viz="pie") # Pie chart
104
+ result = sutra.ask("Trends", viz="line") # Line chart
105
+ result = sutra.ask("Compare", viz="bar") # Bar chart
106
+ result = sutra.ask("Correlation", viz="scatter") # Scatter plot
107
+ result = sutra.ask("Data", viz="table") # Table view
108
+ result = sutra.ask("Analysis", viz="heatmap") # Heatmap
109
+ result = sutra.ask("Auto", viz=True) # Auto-detect
110
+ ```
111
+
112
+ ### 5. Smart Fuzzy Matching
113
+
114
+ ```python
115
+ sutra = SUTRA(api_key="your-key", fuzzy_match=True)
116
+
117
+ # "New York City" matches "New York" automatically
118
+ result = sutra.ask("Who are from New York City?")
119
+ ```
120
+
121
+ ### 6. Intelligent Caching with Embeddings
122
+
123
+ ```python
124
+ sutra = SUTRA(api_key="your-key", use_embeddings=True)
125
+
126
+ result = sutra.ask("Show sales") # Calls API
127
+ result = sutra.ask("Display sales data") # Uses cache (no API call)
128
+ ```
129
+
130
+ ### 7. Irrelevant Query Detection
131
+
132
+ ```python
133
+ sutra = SUTRA(api_key="your-key", check_relevance=True)
134
+
135
+ result = sutra.ask("What is the weather?")
136
+ # Warns: "This question seems irrelevant to your database"
137
+ ```
138
+
139
+ ### 8. Direct SQL Access (Free)
140
+
141
+ ```python
142
+ result = sutra.sql("SELECT * FROM people WHERE city='New York'")
143
+ print(result.data)
144
+ ```
145
+
146
+ ## Complete Configuration
147
+
148
+ ```python
149
+ sutra = SUTRA(
150
+ api_key="your-openai-key",
151
+ db="database.db", # SQLite path
152
+ use_embeddings=True, # Smart caching (saves API calls)
153
+ check_relevance=True, # Detect irrelevant queries
154
+ fuzzy_match=True, # Better NLP
155
+ cache_queries=True # Simple caching
156
+ )
157
+ ```
158
+
159
+ ## Supported Formats
160
+
161
+ CSV, Excel, JSON, SQL, PDF, Word, Text, Pandas DataFrame
162
+
163
+ ## Usage Examples
164
+
165
+ ### Basic Workflow
166
+
167
+ ```python
168
+ sutra = SUTRA(api_key="your-key")
169
+ sutra.upload("data.pdf")
170
+ sutra.tables() # View tables
171
+ sutra.schema() # View schema
172
+ sutra.peek("table_name", n=10) # Preview data
173
+ result = sutra.ask("Your question?")
174
+ ```
175
+
176
+ ### Database Export
177
+
178
+ ```python
179
+ sutra.export_db("backup.db", format="sqlite")
180
+ sutra.export_db("schema.sql", format="sql")
181
+ sutra.save_to_mysql("localhost", "root", "pass", "db")
182
+ sutra.save_to_postgres("localhost", "postgres", "pass", "db")
183
+ sutra.backup("./backups")
184
+ ```
185
+
186
+ ## How It Works
187
+
188
+ ### Entity Extraction Example
189
+
190
+ **Input PDF:**
191
+ ```
192
+ John Doe lives at 123 Main St, Dallas. Email: john@company.com.
193
+ Sarah Smith lives at 456 Oak Ave, Boston. Email: sarah@company.com.
194
+ ```
195
+
196
+ **Output Tables:**
197
+
198
+ **people**
199
+ | id | name | address | city | email |
200
+ |----|------|---------|------|-------|
201
+ | 1 | John Doe | 123 Main St | Dallas | john@company.com |
202
+ | 2 | Sarah Smith | 456 Oak Ave | Boston | sarah@company.com |
203
+
204
+ ### Embeddings for Smart Caching
205
+
206
+ Uses `all-MiniLM-L6-v2` model (80MB, runs locally):
207
+ - Query 1: "Show sales" → API call
208
+ - Query 2: "Display sales" → 92% similar → Cached (no API call)
209
+
210
+ ### Fuzzy Matching
211
+
212
+ - Query: "New York City"
213
+ - Database: ["New York", "Dallas", "Boston"]
214
+ - Match: "New York City" → "New York" (85% similar)
215
+
216
+ ## API Reference
217
+
218
+ ### Class Methods
219
+
220
+ `SUTRA.load_from_db(db_path, api_key, **kwargs)` - Load existing SQLite database
221
+
222
+ `SUTRA.connect_mysql(host, user, password, database, ...)` - Connect to MySQL
223
+
224
+ `SUTRA.connect_postgres(host, user, password, database, ...)` - Connect to PostgreSQL
225
+
226
+ ### Instance Methods
227
+
228
+ `upload(data, name=None)` - Upload data
229
+
230
+ `ask(question, viz=False, table=None)` - Natural language query
231
+
232
+ `sql(query, viz=False)` - Raw SQL query
233
+
234
+ `tables()` - List all tables
235
+
236
+ `schema(table=None)` - Show schema
237
+
238
+ `peek(table=None, n=5)` - Preview data
239
+
240
+ `export_db(path, format)` - Export database
241
+
242
+ `save_to_mysql(...)` - Export to MySQL
243
+
244
+ `save_to_postgres(...)` - Export to PostgreSQL
245
+
246
+ `backup(path=None)` - Create backup
247
+
248
+ `close()` - Close connection
249
+
250
+ ## Performance Tips
251
+
252
+ 1. Use `load_from_db()` to avoid re-uploading
253
+ 2. Use `sql()` for complex queries (no API cost)
254
+ 3. Enable `use_embeddings=True` for caching
255
+ 4. Enable `cache_queries=True` for exact matches
256
+
257
+ ## Troubleshooting
258
+
259
+ **No API key error:** `sutra = SUTRA(api_key="sk-...")`
260
+
261
+ **PDF fails:** `pip install PyPDF2`
262
+
263
+ **MySQL error:** `pip install QuerySUTRA[mysql]`
264
+
265
+ **Embeddings error:** `pip install QuerySUTRA[embeddings]`
266
+
267
+ ## Requirements
268
+
269
+ - Python 3.8+
270
+ - OpenAI API key
271
+ - 100MB disk space (if using embeddings)
272
+
273
+ ## License
274
+
275
+ MIT License
276
+
277
+ ## Changelog
278
+
279
+ ### v0.3.1
280
+ - Semantic embeddings for smart caching
281
+ - Fuzzy matching for better NLP
282
+ - Irrelevant query detection
283
+ - Load existing databases
284
+ - MySQL/PostgreSQL connectivity
285
+ - Custom visualizations
286
+ - All features optional
287
+
288
+ ---
289
+
290
+ **Made by Aditya Batta**
@@ -0,0 +1,290 @@
1
+ Metadata-Version: 2.4
2
+ Name: QuerySUTRA
3
+ Version: 0.3.2
4
+ Summary: SUTRA: Structured-Unstructured-Text-Retrieval-Architecture - AI-powered data analysis with custom visualizations, fuzzy matching, and smart caching
5
+ Home-page: https://github.com/yourusername/querysutra
6
+ Author: Aditya Batta
7
+ Author-email:
8
+ License: MIT
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Topic :: Database
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Requires-Python: >=3.8
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: pandas>=1.3.0
18
+ Requires-Dist: numpy>=1.21.0
19
+ Requires-Dist: openai>=1.0.0
20
+ Requires-Dist: plotly>=5.0.0
21
+ Requires-Dist: matplotlib>=3.3.0
22
+ Requires-Dist: PyPDF2>=3.0.0
23
+ Requires-Dist: python-docx>=0.8.11
24
+ Requires-Dist: openpyxl>=3.0.0
25
+ Provides-Extra: mysql
26
+ Requires-Dist: sqlalchemy>=1.4.0; extra == "mysql"
27
+ Requires-Dist: mysql-connector-python>=8.0.0; extra == "mysql"
28
+ Provides-Extra: postgres
29
+ Requires-Dist: sqlalchemy>=1.4.0; extra == "postgres"
30
+ Requires-Dist: psycopg2-binary>=2.9.0; extra == "postgres"
31
+ Provides-Extra: embeddings
32
+ Requires-Dist: sentence-transformers>=2.0.0; extra == "embeddings"
33
+ Provides-Extra: all
34
+ Requires-Dist: sqlalchemy>=1.4.0; extra == "all"
35
+ Requires-Dist: mysql-connector-python>=8.0.0; extra == "all"
36
+ Requires-Dist: psycopg2-binary>=2.9.0; extra == "all"
37
+ Requires-Dist: sentence-transformers>=2.0.0; extra == "all"
38
+ Dynamic: home-page
39
+ Dynamic: license-file
40
+ Dynamic: requires-python
41
+
42
+ # QuerySUTRA
43
+
44
+ **SUTRA: Structured-Unstructured-Text-Retrieval-Architecture**
45
+
46
+ Professional Python library for AI-powered data analysis with automatic entity extraction, natural language querying, and intelligent caching.
47
+
48
+ ## Installation
49
+
50
+ ```bash
51
+ pip install QuerySUTRA
52
+
53
+ # Optional features
54
+ pip install QuerySUTRA[embeddings] # Smart caching
55
+ pip install QuerySUTRA[mysql] # MySQL support
56
+ pip install QuerySUTRA[postgres] # PostgreSQL support
57
+ pip install QuerySUTRA[all] # All features
58
+ ```
59
+
60
+ ## Key Features
61
+
62
+ ### 1. Automatic Multi-Table Creation
63
+ Upload PDFs, Word documents, or text files and automatically extract structured entities.
64
+
65
+ ```python
66
+ from sutra import SUTRA
67
+
68
+ sutra = SUTRA(api_key="your-openai-key")
69
+ sutra.upload("employee_data.pdf")
70
+
71
+ # Automatically creates:
72
+ # - employee_data_people (20 rows, 6 columns)
73
+ # - employee_data_contacts (20 rows, 4 columns)
74
+ # - employee_data_events (15 rows, 4 columns)
75
+ ```
76
+
77
+ ### 2. Natural Language Querying
78
+
79
+ ```python
80
+ result = sutra.ask("Show me all people from New York")
81
+ print(result.data)
82
+
83
+ # With visualization
84
+ result = sutra.ask("Show sales by region", viz="pie")
85
+ ```
86
+
87
+ ### 3. Load Existing Databases
88
+
89
+ ```python
90
+ # Load SQLite database
91
+ sutra = SUTRA.load_from_db("sutra.db", api_key="your-key")
92
+
93
+ # Connect to MySQL
94
+ sutra = SUTRA.connect_mysql("localhost", "root", "password", "database")
95
+
96
+ # Connect to PostgreSQL
97
+ sutra = SUTRA.connect_postgres("localhost", "postgres", "password", "database")
98
+ ```
99
+
100
+ ### 4. Custom Visualizations
101
+
102
+ ```python
103
+ result = sutra.ask("Sales by region", viz="pie") # Pie chart
104
+ result = sutra.ask("Trends", viz="line") # Line chart
105
+ result = sutra.ask("Compare", viz="bar") # Bar chart
106
+ result = sutra.ask("Correlation", viz="scatter") # Scatter plot
107
+ result = sutra.ask("Data", viz="table") # Table view
108
+ result = sutra.ask("Analysis", viz="heatmap") # Heatmap
109
+ result = sutra.ask("Auto", viz=True) # Auto-detect
110
+ ```
111
+
112
+ ### 5. Smart Fuzzy Matching
113
+
114
+ ```python
115
+ sutra = SUTRA(api_key="your-key", fuzzy_match=True)
116
+
117
+ # "New York City" matches "New York" automatically
118
+ result = sutra.ask("Who are from New York City?")
119
+ ```
120
+
121
+ ### 6. Intelligent Caching with Embeddings
122
+
123
+ ```python
124
+ sutra = SUTRA(api_key="your-key", use_embeddings=True)
125
+
126
+ result = sutra.ask("Show sales") # Calls API
127
+ result = sutra.ask("Display sales data") # Uses cache (no API call)
128
+ ```
129
+
130
+ ### 7. Irrelevant Query Detection
131
+
132
+ ```python
133
+ sutra = SUTRA(api_key="your-key", check_relevance=True)
134
+
135
+ result = sutra.ask("What is the weather?")
136
+ # Warns: "This question seems irrelevant to your database"
137
+ ```
138
+
139
+ ### 8. Direct SQL Access (Free)
140
+
141
+ ```python
142
+ result = sutra.sql("SELECT * FROM people WHERE city='New York'")
143
+ print(result.data)
144
+ ```
145
+
146
+ ## Complete Configuration
147
+
148
+ ```python
149
+ sutra = SUTRA(
150
+ api_key="your-openai-key",
151
+ db="database.db", # SQLite path
152
+ use_embeddings=True, # Smart caching (saves API calls)
153
+ check_relevance=True, # Detect irrelevant queries
154
+ fuzzy_match=True, # Better NLP
155
+ cache_queries=True # Simple caching
156
+ )
157
+ ```
158
+
159
+ ## Supported Formats
160
+
161
+ CSV, Excel, JSON, SQL, PDF, Word, Text, Pandas DataFrame
162
+
163
+ ## Usage Examples
164
+
165
+ ### Basic Workflow
166
+
167
+ ```python
168
+ sutra = SUTRA(api_key="your-key")
169
+ sutra.upload("data.pdf")
170
+ sutra.tables() # View tables
171
+ sutra.schema() # View schema
172
+ sutra.peek("table_name", n=10) # Preview data
173
+ result = sutra.ask("Your question?")
174
+ ```
175
+
176
+ ### Database Export
177
+
178
+ ```python
179
+ sutra.export_db("backup.db", format="sqlite")
180
+ sutra.export_db("schema.sql", format="sql")
181
+ sutra.save_to_mysql("localhost", "root", "pass", "db")
182
+ sutra.save_to_postgres("localhost", "postgres", "pass", "db")
183
+ sutra.backup("./backups")
184
+ ```
185
+
186
+ ## How It Works
187
+
188
+ ### Entity Extraction Example
189
+
190
+ **Input PDF:**
191
+ ```
192
+ John Doe lives at 123 Main St, Dallas. Email: john@company.com.
193
+ Sarah Smith lives at 456 Oak Ave, Boston. Email: sarah@company.com.
194
+ ```
195
+
196
+ **Output Tables:**
197
+
198
+ **people**
199
+ | id | name | address | city | email |
200
+ |----|------|---------|------|-------|
201
+ | 1 | John Doe | 123 Main St | Dallas | john@company.com |
202
+ | 2 | Sarah Smith | 456 Oak Ave | Boston | sarah@company.com |
203
+
204
+ ### Embeddings for Smart Caching
205
+
206
+ Uses `all-MiniLM-L6-v2` model (80MB, runs locally):
207
+ - Query 1: "Show sales" → API call
208
+ - Query 2: "Display sales" → 92% similar → Cached (no API call)
209
+
210
+ ### Fuzzy Matching
211
+
212
+ - Query: "New York City"
213
+ - Database: ["New York", "Dallas", "Boston"]
214
+ - Match: "New York City" → "New York" (85% similar)
215
+
216
+ ## API Reference
217
+
218
+ ### Class Methods
219
+
220
+ `SUTRA.load_from_db(db_path, api_key, **kwargs)` - Load existing SQLite database
221
+
222
+ `SUTRA.connect_mysql(host, user, password, database, ...)` - Connect to MySQL
223
+
224
+ `SUTRA.connect_postgres(host, user, password, database, ...)` - Connect to PostgreSQL
225
+
226
+ ### Instance Methods
227
+
228
+ `upload(data, name=None)` - Upload data
229
+
230
+ `ask(question, viz=False, table=None)` - Natural language query
231
+
232
+ `sql(query, viz=False)` - Raw SQL query
233
+
234
+ `tables()` - List all tables
235
+
236
+ `schema(table=None)` - Show schema
237
+
238
+ `peek(table=None, n=5)` - Preview data
239
+
240
+ `export_db(path, format)` - Export database
241
+
242
+ `save_to_mysql(...)` - Export to MySQL
243
+
244
+ `save_to_postgres(...)` - Export to PostgreSQL
245
+
246
+ `backup(path=None)` - Create backup
247
+
248
+ `close()` - Close connection
249
+
250
+ ## Performance Tips
251
+
252
+ 1. Use `load_from_db()` to avoid re-uploading
253
+ 2. Use `sql()` for complex queries (no API cost)
254
+ 3. Enable `use_embeddings=True` for caching
255
+ 4. Enable `cache_queries=True` for exact matches
256
+
257
+ ## Troubleshooting
258
+
259
+ **No API key error:** `sutra = SUTRA(api_key="sk-...")`
260
+
261
+ **PDF fails:** `pip install PyPDF2`
262
+
263
+ **MySQL error:** `pip install QuerySUTRA[mysql]`
264
+
265
+ **Embeddings error:** `pip install QuerySUTRA[embeddings]`
266
+
267
+ ## Requirements
268
+
269
+ - Python 3.8+
270
+ - OpenAI API key
271
+ - 100MB disk space (if using embeddings)
272
+
273
+ ## License
274
+
275
+ MIT License
276
+
277
+ ## Changelog
278
+
279
+ ### v0.3.1
280
+ - Semantic embeddings for smart caching
281
+ - Fuzzy matching for better NLP
282
+ - Irrelevant query detection
283
+ - Load existing databases
284
+ - MySQL/PostgreSQL connectivity
285
+ - Custom visualizations
286
+ - All features optional
287
+
288
+ ---
289
+
290
+ **Made by Aditya Batta**