querymind-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. app/agents/InterpreterAgent.py +473 -0
  2. app/agents/__init__.py +0 -0
  3. app/agents/insights_generator.py +151 -0
  4. app/agents/intent_corrector.py +59 -0
  5. app/agents/llm_intepreter.py +132 -0
  6. app/agents/narrator.py +27 -0
  7. app/agents/planner.py +77 -0
  8. app/cli/__init__.py +0 -0
  9. app/cli/main.py +346 -0
  10. app/cli/tui_app.py +98 -0
  11. app/cli/ui.py +21 -0
  12. app/core/__init__.py +0 -0
  13. app/core/context.py +10 -0
  14. app/core/logger.py +2 -0
  15. app/core/pipeline.py +379 -0
  16. app/data/__init__.py +0 -0
  17. app/data/connectors/csv_connector.py +99 -0
  18. app/data/connectors/excel_connector.py +68 -0
  19. app/data/connectors/no_sql_db_connector.py +0 -0
  20. app/data/connectors/sql_db_connector.py +0 -0
  21. app/data/schema_engine.py +18 -0
  22. app/data/type_caster.py +128 -0
  23. app/executor/__init__.py +0 -0
  24. app/executor/db_executor.py +0 -0
  25. app/executor/sheet_selector.py +120 -0
  26. app/llm/ollama_client.py +47 -0
  27. app/prompts/interpreter_prompt.txt +28 -0
  28. app/security/__init__.py +0 -0
  29. app/security/input_guard.py +133 -0
  30. app/security/schema_filter.py +20 -0
  31. app/tests/__init__.py +0 -0
  32. app/tests/llm_test.py +18 -0
  33. app/tools/__init__.py +0 -0
  34. app/tools/analyzer.py +157 -0
  35. app/tools/join_resolver.py +159 -0
  36. app/tools/sql_writer.py +37 -0
  37. app/tools/validator.py +0 -0
  38. querymind_cli-0.1.0.dist-info/METADATA +139 -0
  39. querymind_cli-0.1.0.dist-info/RECORD +43 -0
  40. querymind_cli-0.1.0.dist-info/WHEEL +5 -0
  41. querymind_cli-0.1.0.dist-info/entry_points.txt +2 -0
  42. querymind_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
  43. querymind_cli-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,159 @@
1
+ import pandas as pd
2
+
3
+
4
+ class JoinResolver:
5
+ """
6
+ Detects when a query's metric and dimension columns exist in different
7
+ sheets (i.e. have no rows where both are non-null simultaneously),
8
+ finds the join key automatically, and produces a merged DataFrame
9
+ that the Analyzer can use normally.
10
+
11
+ Domain-agnostic: works purely from the loaded data structure.
12
+ No hardcoded column names or subject-matter assumptions.
13
+
14
+ Context keys read
15
+ -----------------
16
+ intent – {"metric": str, "dimension": str, ...}
17
+ dataframe – combined df (all sheets outer-joined)
18
+ sheet_dataframes – dict[sheet_name -> DataFrame]
19
+ schema – {"columns": [...]}
20
+
21
+ Context keys written
22
+ --------------------
23
+ dataframe – replaced with joined df when a join is needed
24
+ schema – updated to reflect joined df columns
25
+ join_resolved – True if a join was performed
26
+ join_info – dict with join details (for debugging / TUI display)
27
+ """
28
+
29
+ def run(self, context: dict) -> dict:
30
+ sheet_dfs = context.get("sheet_dataframes", {})
31
+
32
+ # Only relevant for multi-sheet Excel files
33
+ if len(sheet_dfs) < 2:
34
+ return context
35
+
36
+ intent = context.get("intent", {})
37
+ metric = intent.get("metric")
38
+ dimension = intent.get("dimension")
39
+
40
+ if not metric or not dimension:
41
+ return context
42
+
43
+ combined = context["dataframe"]
44
+
45
+ # ── Co-existence check ────────────────────────────────────────────
46
+ # The combined df outer-joins all sheets, so both columns may exist
47
+ # but have non-null values on completely different rows.
48
+ # If there are no rows where BOTH are non-null → cross-sheet join needed.
49
+ if metric in combined.columns and dimension in combined.columns:
50
+ both_valid = (combined[metric].notna() & combined[dimension].notna()).sum()
51
+ if both_valid > 0:
52
+ return context # columns co-exist — no join needed
53
+
54
+ # ── Find which sheet owns each column ─────────────────────────────
55
+ metric_sheet = self._find_sheet(metric, sheet_dfs)
56
+ dimension_sheet = self._find_sheet(dimension, sheet_dfs)
57
+
58
+ if not metric_sheet:
59
+ return context # let Analyzer produce a clear column-not-found error
60
+
61
+ if not dimension_sheet:
62
+ return context # same
63
+
64
+ if metric_sheet == dimension_sheet:
65
+ # Both happen to be in the same sheet — use that df directly
66
+ context["dataframe"] = sheet_dfs[metric_sheet].copy()
67
+ context["schema"] = self._build_schema(context["dataframe"])
68
+ return context
69
+
70
+ # ── Find join key ─────────────────────────────────────────────────
71
+ join_key = self._find_join_key(
72
+ sheet_dfs[metric_sheet],
73
+ sheet_dfs[dimension_sheet],
74
+ )
75
+
76
+ if not join_key:
77
+ context["error"] = (
78
+ f"Cannot answer this query automatically.\n\n"
79
+ f" '{metric}' lives in '{metric_sheet}' and "
80
+ f"'{dimension}' lives in '{dimension_sheet}', "
81
+ f"but these sheets share no common column to join on.\n\n"
82
+ f" Try a query within a single sheet instead."
83
+ )
84
+ return context
85
+
86
+ # ── Perform the join ──────────────────────────────────────────────
87
+ try:
88
+ left_df = sheet_dfs[metric_sheet].copy()
89
+ right_df = sheet_dfs[dimension_sheet].copy()
90
+
91
+ # Only bring new columns from right (avoids _x/_y suffix collisions)
92
+ new_cols = [join_key] + [
93
+ c for c in right_df.columns if c not in left_df.columns
94
+ ]
95
+ right_df = right_df[new_cols]
96
+
97
+ joined = left_df.merge(right_df, on=join_key, how="left")
98
+
99
+ context["dataframe"] = joined
100
+ context["schema"] = self._build_schema(joined)
101
+ context["join_resolved"] = True
102
+ context["join_info"] = {
103
+ "metric_sheet": metric_sheet,
104
+ "dimension_sheet": dimension_sheet,
105
+ "join_key": join_key,
106
+ "joined_shape": joined.shape,
107
+ }
108
+
109
+ print(
110
+ f"JoinResolver: '{metric_sheet}' ⋈ '{dimension_sheet}' "
111
+ f"ON '{join_key}' → {joined.shape}"
112
+ )
113
+
114
+ except Exception as e:
115
+ context["error"] = f"Join failed: {e}"
116
+
117
+ return context
118
+
119
+ # ── Helpers ───────────────────────────────────────────────────────────
120
+
121
+ def _find_sheet(self, column: str, sheet_dfs: dict):
122
+ """Return the first sheet that contains column."""
123
+ for sheet, df in sheet_dfs.items():
124
+ if column in df.columns:
125
+ return sheet
126
+ return None
127
+
128
+ def _find_join_key(
129
+ self,
130
+ left_df: pd.DataFrame,
131
+ right_df: pd.DataFrame,
132
+ ):
133
+ """
134
+ Find the best shared column to join on.
135
+
136
+ Scoring: prefer columns where the right sheet is more lookup-like
137
+ (few unique values on right, many on left = classic FK→PK join).
138
+ Exclude obvious row-number columns.
139
+ """
140
+ shared = set(left_df.columns) & set(right_df.columns)
141
+
142
+ excluded_hints = {"row_id", "index", "unnamed"}
143
+ shared = {c for c in shared if not any(h in c.lower() for h in excluded_hints)}
144
+
145
+ if not shared:
146
+ return None
147
+
148
+ def score(col):
149
+ try:
150
+ return right_df[col].nunique() / max(left_df[col].nunique(), 1)
151
+ except Exception:
152
+ return 1.0
153
+
154
+ return sorted(shared, key=score)[0]
155
+
156
+ def _build_schema(self, df: pd.DataFrame) -> dict:
157
+ return {
158
+ "columns": [{"name": col, "type": str(df[col].dtype)} for col in df.columns]
159
+ }
@@ -0,0 +1,37 @@
1
+ class SQLWriter:
2
+ def run(self, context):
3
+ intent = context.get("intent")
4
+ schema = context.get("schema")
5
+
6
+ if not intent or not schema:
7
+ context["error"] = "Missing intent or schema"
8
+ return context
9
+
10
+ columns = [col["name"] for col in schema["columns"]]
11
+
12
+ metric = intent.get("metric")
13
+ dimension = intent.get("dimension")
14
+ analysis_type = intent.get("analysis_type")
15
+
16
+ # Safety check: ensure columns exist
17
+ if metric not in columns or dimension not in columns:
18
+ context["error"] = "Invalid columns in intent"
19
+ return context
20
+
21
+ try:
22
+ # Build SQL
23
+ sql = f"SELECT {dimension}, SUM({metric}) AS total_{metric} FROM data"
24
+
25
+ # GROUP BY
26
+ sql += f" GROUP BY {dimension}"
27
+
28
+ # ORDER BY (for comparison queries)
29
+ if analysis_type == "comparison":
30
+ sql += f" ORDER BY total_{metric} DESC"
31
+
32
+ context["sql_query"] = sql
33
+ return context
34
+
35
+ except Exception as e:
36
+ context["error"] = f"SQL generation failed: {str(e)}"
37
+ return context
app/tools/validator.py ADDED
File without changes
@@ -0,0 +1,139 @@
1
+ Metadata-Version: 2.4
2
+ Name: querymind-cli
3
+ Version: 0.1.0
4
+ Summary: CLI AI Data Analyst — query CSV and Excel files in plain English
5
+ Author-email: Siddhesh <siddhesh.codemaster.github@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/SiddheshCodeMaster/QueryMind
8
+ Project-URL: Repository, https://github.com/SiddheshCodeMaster/QueryMind
9
+ Project-URL: Bug Tracker, https://github.com/SiddheshCodeMaster/QueryMind/issues
10
+ Keywords: data analysis,cli,natural language,csv,excel,pandas,llm,ollama,terminal,data analytics
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
22
+ Classifier: Topic :: Utilities
23
+ Requires-Python: >=3.10
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: pandas>=2.0
27
+ Requires-Dist: rich>=13.0
28
+ Requires-Dist: textual>=0.47
29
+ Requires-Dist: requests>=2.31
30
+ Requires-Dist: chardet>=5.0
31
+ Requires-Dist: openpyxl>=3.1
32
+ Requires-Dist: xlrd>=2.0
33
+ Dynamic: license-file
34
+
35
+ # 🧠 QueryMind
36
+
37
+ **Ask questions about your data in plain English. No SQL. No code. Just a terminal.**
38
+
39
+ QueryMind is a CLI data analyst that lets you load a CSV or Excel file and query it conversationally — right in your terminal.
40
+
41
+ ```
42
+ >> top 5 regions by sales
43
+ >> which month had the highest profit?
44
+ >> average spend by payment method in ascending order
45
+ >> show sales in sheet Orders by customer segment
46
+ ```
47
+
48
+ ---
49
+
50
+ ## Install
51
+
52
+ ```bash
53
+ pip install querymind-cli
54
+ ```
55
+
56
+ **Requirements:**
57
+ - Python 3.10+
58
+ - [Ollama](https://ollama.ai) (optional — enables LLM fallback for complex queries)
59
+
60
+ If you want LLM support, install Ollama and pull the model:
61
+ ```bash
62
+ ollama pull phi
63
+ ```
64
+
65
+ ---
66
+
67
+ ## Quickstart
68
+
69
+ ```bash
70
+ querymind
71
+ ```
72
+
73
+ You'll be prompted to:
74
+ 1. Enter a CSV or Excel file path
75
+ 2. Select sheets (Excel only)
76
+ 3. Map your metric and dimension columns
77
+ 4. Start asking questions
78
+
79
+ ---
80
+
81
+ ## What it can do
82
+
83
+ | Query | What happens |
84
+ |---|---|
85
+ | `top 5 products by revenue` | Ranked bar chart in terminal |
86
+ | `which region had lowest sales` | Ascending comparison with insight |
87
+ | `average profit by category` | Mean aggregation per group |
88
+ | `sales trend over time monthly` | Monthly groupby on datetime column |
89
+ | `show sales in sheet Orders by region` | Sheet-scoped query |
90
+ | `which manager had the most sales` | Cross-sheet join (Orders + Users) |
91
+ | `sales by region in ascending order` | Explicit sort order |
92
+
93
+ ---
94
+
95
+ ## Supported file formats
96
+
97
+ | Format | Extension |
98
+ |---|---|
99
+ | CSV | `.csv`, `.tsv` |
100
+ | Excel | `.xlsx`, `.xls`, `.xlsm` |
101
+
102
+ Auto-detects: encoding (UTF-8 BOM, latin-1), delimiter (comma, semicolon, tab, pipe), packed integer dates (DDMMYYYY, YYYYMMDD).
103
+
104
+ ---
105
+
106
+ ## How it works
107
+
108
+ ```
109
+ Your query
110
+
111
+ InputGuard — blocks gibberish and sensitive input
112
+
113
+ InterpreterAgent — rule-based intent extraction (fast, no LLM needed)
114
+
115
+ LLMInterpreter — Ollama fallback for complex queries (optional)
116
+
117
+ JoinResolver — auto-detects and performs cross-sheet joins
118
+
119
+ Analyzer — pandas groupby / aggregation
120
+
121
+ InsightGenerator — formats result + ASCII bar chart
122
+ ```
123
+
124
+ ---
125
+
126
+ ## Beta
127
+
128
+ QueryMind is in active development. If something breaks or a query gives a wrong answer, please [open an issue](https://github.com/SiddheshCodeMaster/QueryMind/issues) with:
129
+ - Your query
130
+ - The column names in your file (no need to share actual data)
131
+ - The output you got
132
+
133
+ This feedback directly shapes what gets fixed next.
134
+
135
+ ---
136
+
137
+ ## License
138
+
139
+ MIT
@@ -0,0 +1,43 @@
1
+ app/agents/InterpreterAgent.py,sha256=VO-ibaWjFvuy-S06FMWZrHy6SLAukfQ6RHn-R_EvrzQ,17590
2
+ app/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ app/agents/insights_generator.py,sha256=NS3vInyvHWKChPbwxXaUvc7VZpVhgBPnK_2iJIYtw-Y,5935
4
+ app/agents/intent_corrector.py,sha256=hhT78H3Yg-RyRcnnmabM_wsLTP-YdNlkjxFrwFK_ras,2217
5
+ app/agents/llm_intepreter.py,sha256=NeAWFCdtagCufbmVr_4h4Ujyp9HFfcxkcBUVs-3esLU,4440
6
+ app/agents/narrator.py,sha256=6OT-XyKjkqson6GcjO-uGVTPdIiTWebpKLluRiWL-0w,707
7
+ app/agents/planner.py,sha256=ENOw9XsjTRb-te_Wv7A37O4cRMUjvKMTAqdakYvoeY8,2352
8
+ app/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ app/cli/main.py,sha256=tIkk2MaA4r2xxIGlTdB3Fqg9dHIh2A_i3QRMi8KzMbw,13718
10
+ app/cli/tui_app.py,sha256=34SfvpPuHBduvARnEJo1LqvwWlD1Gupv4doEDrERC4Y,2975
11
+ app/cli/ui.py,sha256=w066O0YJykmUNFDY0MJVEmIhMXirlvR-iFxR3gzuke8,525
12
+ app/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ app/core/context.py,sha256=i1E8bY67WpySZ_FDZdn8LrNXVbe6FbAEB_TyS6HbeHw,282
14
+ app/core/logger.py,sha256=Xy00yyxfTvJsjWkDQhPOWctWuv-xrpR5KLdyWqU7_0Y,95
15
+ app/core/pipeline.py,sha256=4e8BnGRbNJO9jVlipTxfoTjnkOMD7jG9gbkFnamAJUA,13409
16
+ app/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ app/data/schema_engine.py,sha256=QbpoADeGjKkZLWIjmuEREx1WwY1gBcMWE1eqXoJzKGo,497
18
+ app/data/type_caster.py,sha256=rXMy4spsa8EMn7__Tn95RDt22x-lfC4l-Muy2sJ_oYs,4562
19
+ app/data/connectors/csv_connector.py,sha256=tpA7bCucV_G48FBQbG_2YV3kqZsOkenQag0p9J5Z_0Y,3367
20
+ app/data/connectors/excel_connector.py,sha256=BOk5g4W-A_KKocPxSHNPR157IASmHFUXiHI6EuAhuGg,2572
21
+ app/data/connectors/no_sql_db_connector.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ app/data/connectors/sql_db_connector.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
+ app/executor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
+ app/executor/db_executor.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
+ app/executor/sheet_selector.py,sha256=X_4CXfNkgMzjJSIGkpKrvm4MTPGM52FZd52f7PKcoJA,4218
26
+ app/llm/ollama_client.py,sha256=pTmwoTpIwqy1JFosjD7q6GZZ7Li7vcB7GlyhPBdMBzQ,1358
27
+ app/prompts/interpreter_prompt.txt,sha256=uyg_1iXhmgvoV-MOSNaHNMWkt4SusOy5guxcys9zV3k,620
28
+ app/security/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
+ app/security/input_guard.py,sha256=_zf6FeN_4dXmtHQsPg-OUN03ysndQTciElTXZ-V5680,2961
30
+ app/security/schema_filter.py,sha256=jGPx557EsYM1fU4eqirdbreg6vEFRCHntJ4X97DJCjw,473
31
+ app/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
+ app/tests/llm_test.py,sha256=ci5tL4kTSF16e0ov6m5z_ZY_zTNDDMA6nDB5vE4xsP0,388
33
+ app/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
+ app/tools/analyzer.py,sha256=nbdwaUcvSGSRYx5Okb-jGuoKq46XSrvxgX0j1VfyQwU,7083
35
+ app/tools/join_resolver.py,sha256=lir29AZsPYU9_M7c_SI6HToNFsAwa8kKRV2hJS_2whM,6518
36
+ app/tools/sql_writer.py,sha256=UdSJNtF380H9UXcH1DTebTFTTPgc2Uk7X48onPbDIIk,1206
37
+ app/tools/validator.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
+ querymind_cli-0.1.0.dist-info/licenses/LICENSE,sha256=AGAfkJ3vPHAHt2b8yrK5Sz0YsbRAW7iRP4AoAiyVmLY,1094
39
+ querymind_cli-0.1.0.dist-info/METADATA,sha256=w6-OY_U2rc_ke3sLr5yY8Uj2g97gRMCEBArgkPe3Djk,4028
40
+ querymind_cli-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
41
+ querymind_cli-0.1.0.dist-info/entry_points.txt,sha256=zUoXxONuufqV4XVVZRIfNgJLbdgt4g8DVHJLR8TJdl8,48
42
+ querymind_cli-0.1.0.dist-info/top_level.txt,sha256=io9g7LCbfmTG1SFKgEOGXmCFB9uMP2H5lerm0HiHWQE,4
43
+ querymind_cli-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ querymind = app.cli.main:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Siddhesh Shankar
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ app