querymind-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. app/agents/InterpreterAgent.py +473 -0
  2. app/agents/__init__.py +0 -0
  3. app/agents/insights_generator.py +151 -0
  4. app/agents/intent_corrector.py +59 -0
  5. app/agents/llm_intepreter.py +132 -0
  6. app/agents/narrator.py +27 -0
  7. app/agents/planner.py +77 -0
  8. app/cli/__init__.py +0 -0
  9. app/cli/main.py +346 -0
  10. app/cli/tui_app.py +98 -0
  11. app/cli/ui.py +21 -0
  12. app/core/__init__.py +0 -0
  13. app/core/context.py +10 -0
  14. app/core/logger.py +2 -0
  15. app/core/pipeline.py +379 -0
  16. app/data/__init__.py +0 -0
  17. app/data/connectors/csv_connector.py +99 -0
  18. app/data/connectors/excel_connector.py +68 -0
  19. app/data/connectors/no_sql_db_connector.py +0 -0
  20. app/data/connectors/sql_db_connector.py +0 -0
  21. app/data/schema_engine.py +18 -0
  22. app/data/type_caster.py +128 -0
  23. app/executor/__init__.py +0 -0
  24. app/executor/db_executor.py +0 -0
  25. app/executor/sheet_selector.py +120 -0
  26. app/llm/ollama_client.py +47 -0
  27. app/prompts/interpreter_prompt.txt +28 -0
  28. app/security/__init__.py +0 -0
  29. app/security/input_guard.py +133 -0
  30. app/security/schema_filter.py +20 -0
  31. app/tests/__init__.py +0 -0
  32. app/tests/llm_test.py +18 -0
  33. app/tools/__init__.py +0 -0
  34. app/tools/analyzer.py +157 -0
  35. app/tools/join_resolver.py +159 -0
  36. app/tools/sql_writer.py +37 -0
  37. app/tools/validator.py +0 -0
  38. querymind_cli-0.1.0.dist-info/METADATA +139 -0
  39. querymind_cli-0.1.0.dist-info/RECORD +43 -0
  40. querymind_cli-0.1.0.dist-info/WHEEL +5 -0
  41. querymind_cli-0.1.0.dist-info/entry_points.txt +2 -0
  42. querymind_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
  43. querymind_cli-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,128 @@
1
+ """
2
+ Shared column type-casting logic used by both CSVConnector and ExcelConnector.
3
+
4
+ smart_cast_df(df) processes every column:
5
+ - Already datetime → leave alone
6
+ - Already numeric → packed-date check, then whole-number downcast
7
+ - Object/string → try numeric → packed-date check → whole-number downcast
8
+ """
9
+
10
+ import pandas as pd
11
+
12
+
13
+ def _try_packed_date(int_series: pd.Series, original_series: pd.Series):
14
+ """
15
+ Try to parse an integer series as a packed date
16
+ (DDMMYYYY, MMDDYYYY, YYYYMMDD) with zero-padding for 7-digit values.
17
+
18
+ Returns (datetime_series, fmt) if successful, None otherwise.
19
+ """
20
+ digits = int_series.astype(str).str.len()
21
+ mostly_7_8 = ((digits >= 7) & (digits <= 8)).sum() / len(digits)
22
+
23
+ if mostly_7_8 <= 0.8:
24
+ return None
25
+
26
+ padded = int_series.astype(str).str.zfill(8)
27
+
28
+ for fmt in ("%d%m%Y", "%m%d%Y", "%Y%m%d"):
29
+ try:
30
+ candidate = pd.to_datetime(padded, format=fmt, errors="raise")
31
+ if not (
32
+ (candidate.dt.year >= 1900).all() and (candidate.dt.year <= 2100).all()
33
+ ):
34
+ continue
35
+
36
+ full_padded = (
37
+ pd.to_numeric(original_series, errors="coerce")
38
+ .astype("Int64")
39
+ .astype(str)
40
+ .str.zfill(8)
41
+ .replace("<NA>", pd.NaT)
42
+ )
43
+ result = pd.to_datetime(full_padded, format=fmt, errors="coerce")
44
+ return result, fmt
45
+
46
+ except Exception:
47
+ continue
48
+
49
+ return None
50
+
51
+
52
+ def _try_downcast_to_int(series: pd.Series) -> pd.Series:
53
+ """
54
+ If all non-null values in a float series are whole numbers
55
+ (e.g. 553.0, 1733.0), convert to nullable Int64 so they
56
+ display as 553, 1733 instead of 553.0, 1733.0.
57
+
58
+ Uses Int64 (nullable) rather than int64 so NaN rows are preserved.
59
+ """
60
+ if series.dtype not in ("float64", "float32"):
61
+ return series
62
+
63
+ non_null = series.dropna()
64
+ if len(non_null) == 0:
65
+ return series
66
+
67
+ if (non_null == non_null.astype("int64")).all():
68
+ return series.astype("Int64")
69
+
70
+ return series
71
+
72
+
73
+ def smart_cast_df(df: pd.DataFrame) -> pd.DataFrame:
74
+ """
75
+ Intelligently cast each column to the most appropriate type.
76
+
77
+ Processing order per column:
78
+ 1. Already datetime → skip
79
+ 2. Numeric dtype (int/float from Excel) → packed-date check,
80
+ then whole-number float → Int64 downcast
81
+ 3. Object/string → try numeric cast (>70% parseable),
82
+ then packed-date check, then whole-number downcast
83
+ """
84
+ for col in df.columns:
85
+ dtype_str = str(df[col].dtype)
86
+
87
+ # Already datetime — nothing to do
88
+ if "datetime" in dtype_str:
89
+ continue
90
+
91
+ # ── Already numeric (common with Excel-loaded columns) ────────────
92
+ if df[col].dtype in ("int64", "int32", "float64", "float32", "Int64", "Int32"):
93
+ int_series = (
94
+ pd.to_numeric(df[col], errors="coerce").dropna().astype("int64")
95
+ )
96
+ if len(int_series) > 0:
97
+ result = _try_packed_date(int_series, df[col])
98
+ if result is not None:
99
+ dt_col, fmt = result
100
+ df[col] = dt_col
101
+ print(f"📅 '{col}' detected as packed date ({fmt})")
102
+ continue
103
+
104
+ # Downcast whole-number floats to Int64
105
+ df[col] = _try_downcast_to_int(df[col])
106
+ continue
107
+
108
+ # ── Object / string → try numeric cast first ──────────────────────
109
+ if df[col].dtype == object or dtype_str in ("string", "str"):
110
+ converted = pd.to_numeric(df[col], errors="coerce")
111
+ ratio = converted.notna().sum() / max(len(df), 1)
112
+
113
+ if ratio <= 0.7:
114
+ continue # Not numeric enough — leave as object/string
115
+
116
+ int_series = converted.dropna().astype("int64")
117
+ if len(int_series) > 0:
118
+ result = _try_packed_date(int_series, df[col])
119
+ if result is not None:
120
+ dt_col, fmt = result
121
+ df[col] = dt_col
122
+ print(f"📅 '{col}' detected as packed date ({fmt})")
123
+ continue
124
+
125
+ # Downcast whole-number floats before storing
126
+ df[col] = _try_downcast_to_int(converted)
127
+
128
+ return df
File without changes
File without changes
@@ -0,0 +1,120 @@
1
+ import pandas as pd
2
+ from rich.console import Console
3
+ from rich.table import Table
4
+ from rich.prompt import Prompt
5
+
6
+ console = Console()
7
+
8
+ EXIT_WORDS = {"exit", "quit", "/exit", "/quit", "bye", "q", ":q"}
9
+
10
+
11
+ def _ask(message: str) -> str:
12
+ """Prompt wrapper that raises UserExitError on exit words or Ctrl+C."""
13
+ # Import here to avoid circular import (main imports sheet_selector)
14
+ from app.cli.main import UserExitError
15
+
16
+ try:
17
+ value = Prompt.ask(message)
18
+ except (KeyboardInterrupt, EOFError):
19
+ raise UserExitError()
20
+ if value.strip().lower() in EXIT_WORDS:
21
+ raise UserExitError()
22
+ return value
23
+
24
+
25
+ def get_sheet_info(file_path: str) -> dict:
26
+ """Returns {sheet_name: {"rows": int, "cols": int, "columns": [str]}}"""
27
+ xl = pd.ExcelFile(file_path)
28
+ info = {}
29
+ for name in xl.sheet_names:
30
+ try:
31
+ full_df = xl.parse(name)
32
+ info[name] = {
33
+ "rows": len(full_df),
34
+ "cols": len(full_df.columns),
35
+ "columns": full_df.columns.tolist(),
36
+ }
37
+ except Exception:
38
+ info[name] = {"rows": "?", "cols": "?", "columns": []}
39
+ return info
40
+
41
+
42
+ def prompt_sheet_selection(file_path: str) -> list:
43
+ """
44
+ Interactive sheet picker.
45
+ Returns list of selected sheet names.
46
+ Raises UserExitError if the user types an exit command.
47
+ """
48
+ console.print("\n[bold cyan]📋 Excel Sheet Selection[/bold cyan]")
49
+
50
+ try:
51
+ sheet_info = get_sheet_info(file_path)
52
+ except Exception as e:
53
+ console.print(f"[red]❌ Could not read sheets: {e}[/red]")
54
+ return []
55
+
56
+ sheet_names = list(sheet_info.keys())
57
+
58
+ table = Table(title="Available Sheets", border_style="blue", show_lines=True)
59
+ table.add_column("#", style="bold yellow", width=4)
60
+ table.add_column("Sheet", style="bold white")
61
+ table.add_column("Rows", justify="right")
62
+ table.add_column("Columns", justify="right")
63
+ table.add_column("Sample columns", style="dim")
64
+
65
+ for i, name in enumerate(sheet_names, 1):
66
+ info = sheet_info[name]
67
+ sample = ", ".join(str(c) for c in info["columns"][:5])
68
+ if len(info["columns"]) > 5:
69
+ sample += f" … (+{len(info['columns']) - 5} more)"
70
+ table.add_row(str(i), name, str(info["rows"]), str(info["cols"]), sample)
71
+
72
+ console.print(table)
73
+ console.print(
74
+ "\n[dim]Options:[/dim]\n"
75
+ " [yellow]•[/yellow] Sheet number(s) separated by commas: [bold]1[/bold] or [bold]1,2[/bold]\n"
76
+ " [yellow]•[/yellow] Type [bold]all[/bold] to load all sheets\n"
77
+ " [yellow]•[/yellow] Type [bold]exit[/bold] to quit\n"
78
+ )
79
+
80
+ while True:
81
+ raw = _ask("[cyan]👉 Select sheet(s)[/cyan]").strip().lower()
82
+
83
+ if raw == "all":
84
+ selected = sheet_names
85
+ break
86
+
87
+ parts = [p.strip() for p in raw.split(",")]
88
+ try:
89
+ indices = [int(p) for p in parts if p]
90
+ selected = []
91
+ valid = True
92
+ for idx in indices:
93
+ if 1 <= idx <= len(sheet_names):
94
+ name = sheet_names[idx - 1]
95
+ if name not in selected:
96
+ selected.append(name)
97
+ else:
98
+ console.print(
99
+ f"[red]❌ '{idx}' is out of range (1–{len(sheet_names)})[/red]"
100
+ )
101
+ valid = False
102
+ break
103
+ if valid and selected:
104
+ break
105
+ except ValueError:
106
+ console.print(
107
+ "[red]❌ Please enter numbers, 'all', or 'exit' to quit.[/red]"
108
+ )
109
+
110
+ if len(selected) == 1:
111
+ console.print(f"\n[green]✅ Loading sheet:[/green] [bold]{selected[0]}[/bold]")
112
+ else:
113
+ console.print(
114
+ f"\n[green]✅ Loading {len(selected)} sheets:[/green] "
115
+ f"[bold]{', '.join(selected)}[/bold]\n"
116
+ "[yellow]ℹ️ Sheets will be merged with a '_sheet' column added "
117
+ "so you can filter per sheet in queries.[/yellow]"
118
+ )
119
+
120
+ return selected
@@ -0,0 +1,47 @@
1
+ import requests
2
+
3
+
4
+ class OllamaClient:
5
+ """
6
+ Handles all communication with Ollama LLM.
7
+ Optimized for speed + reliability.
8
+ """
9
+
10
+ def __init__(self, model="phi"):
11
+ self.url = "http://localhost:11434/api/generate"
12
+ self.model = model
13
+
14
+ def generate(self, prompt: str) -> str:
15
+ try:
16
+ response = requests.post(
17
+ self.url,
18
+ json={
19
+ "model": self.model,
20
+ "prompt": prompt,
21
+ "stream": False,
22
+ "options": {
23
+ "num_predict": 100,
24
+ "temperature": 0,
25
+ "top_p": 0.9,
26
+ },
27
+ },
28
+ timeout=30,
29
+ )
30
+
31
+ if response.status_code != 200:
32
+ return f"ERROR: Bad response {response.status_code}"
33
+
34
+ data = response.json()
35
+
36
+ # print("DEBUG FULL RESPONSE:", data)
37
+
38
+ return data.get("response", "").strip()
39
+
40
+ except requests.exceptions.ConnectionError:
41
+ return "ERROR: Could not connect to Ollama. Is it running?"
42
+
43
+ except requests.exceptions.Timeout:
44
+ return "ERROR: Ollama request timed out"
45
+
46
+ except Exception as e:
47
+ return f"ERROR: {str(e)}"
@@ -0,0 +1,28 @@
1
+ You are a data analyst assistant.
2
+
3
+ Your job is to convert a user query into structured intent.
4
+
5
+ Dataset schema:
6
+ {schema}
7
+
8
+ User query:
9
+ {query}
10
+
11
+ Return ONLY JSON in this format:
12
+
13
+ {
14
+ "metric": "<column_name>",
15
+ "dimension": "<column_name>",
16
+ "operation": "sum | mean | count",
17
+ "query_type": "comparison | aggregation | trend | top_n",
18
+ "limit": number or null
19
+ }
20
+
21
+ Rules:
22
+ - Use only columns from schema
23
+ - "revenue", "sales", "spending" → numeric columns
24
+ - "highest", "most" → comparison
25
+ - "top N" → top_n
26
+ - "average" → mean
27
+ - If unclear → choose best match
28
+ - Do NOT explain anything
File without changes
@@ -0,0 +1,133 @@
1
+ import re
2
+
3
+ BLOCKED_KEYWORDS = ["password", "ssn", "credit card", "api key", "private key"]
4
+
5
+ ANALYTICAL_KEYWORDS = {
6
+ # intent words
7
+ "highest",
8
+ "lowest",
9
+ "top",
10
+ "bottom",
11
+ "most",
12
+ "least",
13
+ "best",
14
+ "worst",
15
+ "average",
16
+ "avg",
17
+ "mean",
18
+ "total",
19
+ "sum",
20
+ "count",
21
+ "max",
22
+ "min",
23
+ "trend",
24
+ "over time",
25
+ "monthly",
26
+ "daily",
27
+ "weekly",
28
+ "yearly",
29
+ "compare",
30
+ "comparison",
31
+ "distribution",
32
+ "breakdown",
33
+ "ascending",
34
+ "descending",
35
+ "asc",
36
+ "desc",
37
+ "increasing",
38
+ "decreasing",
39
+ "lowest to highest",
40
+ "highest to lowest",
41
+ "sorted",
42
+ "order",
43
+ # question words
44
+ "show",
45
+ "give",
46
+ "find",
47
+ "list",
48
+ "get",
49
+ "what",
50
+ "which",
51
+ "how many",
52
+ "how much",
53
+ "where",
54
+ "who",
55
+ # common data domain words
56
+ "sales",
57
+ "revenue",
58
+ "profit",
59
+ "spend",
60
+ "spending",
61
+ "spent",
62
+ "cost",
63
+ "item",
64
+ "items",
65
+ "product",
66
+ "products",
67
+ "category",
68
+ "location",
69
+ "payment",
70
+ "method",
71
+ "customer",
72
+ "customers",
73
+ "order",
74
+ "orders",
75
+ "region",
76
+ "city",
77
+ "country",
78
+ "store",
79
+ "date",
80
+ "month",
81
+ "year",
82
+ "by",
83
+ "per",
84
+ "across",
85
+ "between",
86
+ "vs",
87
+ "versus",
88
+ }
89
+
90
+
91
+ class InputGuard:
92
+ def __init__(self, extra_domain_words=None):
93
+ """
94
+ extra_domain_words: pass your semantic_map column names here so that
95
+ queries referencing column names directly are always accepted.
96
+ Example: InputGuard(extra_domain_words=["corrected_t_spent", "payment_method"])
97
+ """
98
+ self._extra = set(extra_domain_words or [])
99
+
100
+ def run(self, context):
101
+ query = context.get("user_query", "").strip()
102
+
103
+ # --- Empty ---
104
+ if not query:
105
+ context["error"] = "Please enter a question."
106
+ return context
107
+
108
+ query_lower = query.lower()
109
+
110
+ # --- Sensitive content ---
111
+ for word in BLOCKED_KEYWORDS:
112
+ if word in query_lower:
113
+ context["error"] = (
114
+ "⛔ Sensitive query detected. Please ask about your data."
115
+ )
116
+ return context
117
+
118
+ # --- Gibberish / no intent ---
119
+ domain_words = ANALYTICAL_KEYWORDS | self._extra
120
+ has_intent = any(kw in query_lower for kw in domain_words)
121
+
122
+ if not has_intent:
123
+ context["error"] = (
124
+ "❓ I couldn't understand that as a data question.\n\n"
125
+ "Try something like:\n"
126
+ " • 'top 5 items by sales'\n"
127
+ " • 'highest revenue by location'\n"
128
+ " • 'average spend by payment method'\n"
129
+ " • 'total sales trend over time'"
130
+ )
131
+ return context
132
+
133
+ return context
@@ -0,0 +1,20 @@
1
+ SENSITIVE_COLUMNS = ["password", "ssn", "credit_card"]
2
+
3
+
4
+ class SchemaFilter:
5
+ def run(self, context):
6
+ schema = context["schema"]
7
+
8
+ if not schema:
9
+ return context
10
+
11
+ filtered_columns = [
12
+ col
13
+ for col in schema["columns"]
14
+ if col["name"].lower() not in SENSITIVE_COLUMNS
15
+ ]
16
+
17
+ schema["columns"] = filtered_columns
18
+ context["schema"] = schema
19
+
20
+ return context
app/tests/__init__.py ADDED
File without changes
app/tests/llm_test.py ADDED
@@ -0,0 +1,18 @@
1
+ from app.agents.llm_intepreter import LLMInterpreter
2
+
3
+ context = {
4
+ "user_query": "Which location has highest revenue?",
5
+ "schema": {
6
+ "columns": [
7
+ {"name": "location"},
8
+ {"name": "payment_method"},
9
+ {"name": "total_spent"},
10
+ ]
11
+ },
12
+ }
13
+
14
+ agent = LLMInterpreter()
15
+
16
+ result = agent.run(context)
17
+
18
+ print(result["intent"])
app/tools/__init__.py ADDED
File without changes
app/tools/analyzer.py ADDED
@@ -0,0 +1,157 @@
1
+ import pandas as pd
2
+
3
+
4
+ class Analyzer:
5
+ """
6
+ Executes the structured intent produced by the interpreter.
7
+
8
+ Sheet-aware: if context["intent"]["sheet"] is set (e.g. the user said
9
+ "in sheet Orders"), only that sheet's rows are used for analysis.
10
+ Otherwise the full combined dataframe is used.
11
+
12
+ Supported query types
13
+ ---------------------
14
+ comparison – groupby dimension, sum metric, sort descending
15
+ top_n – like comparison, limited to N rows
16
+ aggregation – groupby dimension, mean or sum
17
+ trend – groupby time dimension, sum, sort by index
18
+ """
19
+
20
+ def run(self, context: dict) -> dict:
21
+ intent = context.get("intent", {})
22
+
23
+ metric = intent.get("metric")
24
+ dimension = intent.get("dimension")
25
+ query_type = intent.get("query_type")
26
+ target_sheet = intent.get("sheet") # set by InterpreterAgent for sheet queries
27
+
28
+ # ── Sheet-aware dataframe selection ──────────────────────────────
29
+ if target_sheet and target_sheet in context.get("sheet_dataframes", {}):
30
+ df = context["sheet_dataframes"][target_sheet].copy()
31
+ else:
32
+ df = context["dataframe"].copy()
33
+
34
+ # ── Guard: columns must exist ─────────────────────────────────────
35
+ all_columns = df.columns.tolist()
36
+ visible_cols = [c for c in all_columns if c != "_sheet"]
37
+
38
+ if not metric or metric not in all_columns:
39
+ numeric_in_sheet = df.select_dtypes(include="number").columns.tolist()
40
+ id_hints = {"id", "_id", "key", "code", "num", "no", "number"}
41
+ real_numeric = [
42
+ c for c in numeric_in_sheet if not any(h in c.lower() for h in id_hints)
43
+ ]
44
+
45
+ if target_sheet and not real_numeric:
46
+ context["error"] = (
47
+ f"The '{target_sheet}' sheet has no numeric columns to measure.\n"
48
+ f" Columns in this sheet: {visible_cols}\n\n"
49
+ f"This sheet is likely a lookup/reference table.\n"
50
+ f"Try querying a sheet that has numeric data, like Orders."
51
+ )
52
+ elif target_sheet and real_numeric:
53
+ context["error"] = (
54
+ f"'{metric}' is not available in the '{target_sheet}' sheet.\n"
55
+ f" Available numeric columns here: {real_numeric}\n"
56
+ f" Try: 'top 5 by {real_numeric[0]} in {target_sheet}'"
57
+ )
58
+ else:
59
+ context["error"] = (
60
+ f"Metric column '{metric}' not found.\n"
61
+ f" Available columns: {visible_cols}"
62
+ )
63
+ return context
64
+
65
+ # Hard guard: internal/system columns must never be used as dimension
66
+ INTERNAL_COLS = {"_sheet"}
67
+ if dimension in INTERNAL_COLS:
68
+ context["error"] = (
69
+ f"'{dimension}' is an internal system column and cannot be "
70
+ f"used as a dimension.\n\n"
71
+ f" Please rephrase and specify a real dimension column.\n"
72
+ f" Available columns: {visible_cols}"
73
+ )
74
+ return context
75
+
76
+ if not dimension or dimension not in all_columns:
77
+ context["error"] = (
78
+ f"Dimension column '{dimension}' not found.\n"
79
+ f" Available columns: {visible_cols}"
80
+ )
81
+ return context
82
+ if not query_type:
83
+ context["error"] = "No query type detected. Please rephrase your question."
84
+ return context
85
+
86
+ # ── Date granularity: group datetime columns by month/year/week ──
87
+ # If the dimension is a datetime column, extract the requested period
88
+ # so "which month" groups by month label, not individual dates.
89
+ if pd.api.types.is_datetime64_any_dtype(df[dimension]):
90
+ granularity = intent.get("time_granularity", "day")
91
+ if granularity == "year":
92
+ df[dimension] = df[dimension].dt.to_period("Y").astype(str)
93
+ elif granularity == "month":
94
+ df[dimension] = df[dimension].dt.to_period("M").astype(str)
95
+ elif granularity == "week":
96
+ df[dimension] = df[dimension].dt.to_period("W").astype(str)
97
+ else:
98
+ df[dimension] = df[dimension].dt.date.astype(str)
99
+ else:
100
+ # ── Clean categorical dimension ───────────────────────────────
101
+ df[dimension] = (
102
+ df[dimension]
103
+ .astype(str)
104
+ .str.strip()
105
+ .replace(["ERROR", "UNKNOWN", "Unknown", "nan", ""], "Unknown")
106
+ )
107
+
108
+ # ── Coerce metric to numeric ──────────────────────────────────────
109
+ df[metric] = pd.to_numeric(df[metric], errors="coerce")
110
+ df = df.dropna(subset=[metric])
111
+
112
+ if df.empty:
113
+ context["error"] = f"No numeric data found in '{metric}' after cleaning."
114
+ return context
115
+
116
+ # ── Run analysis ──────────────────────────────────────────────────
117
+ try:
118
+ ascending = intent.get("ascending", False)
119
+
120
+ if query_type == "comparison":
121
+ result = (
122
+ df.groupby(dimension)[metric].sum().sort_values(ascending=ascending)
123
+ )
124
+
125
+ elif query_type == "top_n":
126
+ n = intent.get("limit") or 5
127
+ result = (
128
+ df.groupby(dimension)[metric]
129
+ .sum()
130
+ .sort_values(ascending=ascending)
131
+ .head(n)
132
+ )
133
+
134
+ elif query_type == "aggregation":
135
+ op = intent.get("operation", "sum")
136
+ result = (
137
+ df.groupby(dimension)[metric].mean()
138
+ if op == "mean"
139
+ else df.groupby(dimension)[metric].sum()
140
+ )
141
+ # Always sort by value so display and insight are consistent
142
+ result = result.sort_values(ascending=ascending)
143
+
144
+ elif query_type == "trend":
145
+ result = df.groupby(dimension)[metric].sum().sort_index()
146
+
147
+ else:
148
+ context["error"] = f"Unsupported query type: '{query_type}'"
149
+ return context
150
+
151
+ context["analysis"] = result
152
+ context["target_sheet"] = target_sheet # for InsightGenerator label
153
+ return context
154
+
155
+ except Exception as e:
156
+ context["error"] = f"Analysis failed: {e}"
157
+ return context