csv-analytics-mcp 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ Metadata-Version: 2.4
2
+ Name: csv-analytics-mcp
3
+ Version: 1.0.0
4
+ Summary: MCP server for csv analytics. Features load csv, query data, describe columns. From MEOK AI Labs.
5
+ Project-URL: Homepage, https://meok.ai
6
+ Project-URL: Repository, https://github.com/CSOAI-ORG/csv-analytics-mcp
7
+ Author-email: MEOK AI Labs <nicholas@meok.ai>
8
+ License: MIT License
9
+
10
+ Copyright (c) 2026 MEOK AI Labs (meok.ai)
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25
+ License-File: LICENSE
26
+ Keywords: ai,analytics,csv,mcp,mcp/,meok
27
+ Classifier: License :: OSI Approved :: MIT License
28
+ Classifier: Operating System :: OS Independent
29
+ Classifier: Programming Language :: Python :: 3
30
+ Classifier: Topic :: Software Development :: Libraries
31
+ Requires-Python: >=3.10
32
+ Requires-Dist: mcp>=1.0.0
33
+ Requires-Dist: pandas>=1.5.0
@@ -0,0 +1,6 @@
1
+ server.py,sha256=f_9vTNV1cULben5u6dBeSr55U6zm_erHTl5MCQSgCMo,16308
2
+ csv_analytics_mcp-1.0.0.dist-info/METADATA,sha256=Kq6g9qFzRv7ivJDu_Vu6ifDJUvIeV3pXcTQa95YbEjY,1625
3
+ csv_analytics_mcp-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
4
+ csv_analytics_mcp-1.0.0.dist-info/entry_points.txt,sha256=PT4RWH9pd2aKpGURTA7fnLjuKueC3Ra4jSSNWeu7RZk,50
5
+ csv_analytics_mcp-1.0.0.dist-info/licenses/LICENSE,sha256=j3ubn5qaWJ2R1iHLwwnUIwaFCGnaPWGUP4rLLcmYL9k,820
6
+ csv_analytics_mcp-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ csv_analytics_mcp = server:main
@@ -0,0 +1,17 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 MEOK AI Labs (meok.ai)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
server.py ADDED
@@ -0,0 +1,472 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ CSV Analytics MCP Server
4
+ ==========================
5
+ Spreadsheet and CSV analysis toolkit for AI agents. Load CSV files, query data
6
+ with SQL-like syntax, compute statistics, create aggregations, pivot tables,
7
+ and export chart-ready data.
8
+
9
+ By MEOK AI Labs | https://meok.ai
10
+
11
+ Install: pip install mcp pandas
12
+ Run: python server.py
13
+ """
14
+
15
+
16
+ import sys, os
17
+ sys.path.insert(0, os.path.expanduser('~/clawd/meok-labs-engine/shared'))
18
+ from auth_middleware import check_access
19
+
20
+ import io
21
+ import json
22
+ import os
23
+ import tempfile
24
+ from datetime import datetime, timedelta
25
+ from typing import Any, Optional
26
+ from collections import defaultdict
27
+ from mcp.server.fastmcp import FastMCP
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # Rate limiting
31
+ # ---------------------------------------------------------------------------
32
+ FREE_DAILY_LIMIT = 30
33
+ _usage: dict[str, list[datetime]] = defaultdict(list)
34
+
35
+
36
+ def _check_rate_limit(caller: str = "anonymous") -> Optional[str]:
37
+ now = datetime.now()
38
+ cutoff = now - timedelta(days=1)
39
+ _usage[caller] = [t for t in _usage[caller] if t > cutoff]
40
+ if len(_usage[caller]) >= FREE_DAILY_LIMIT:
41
+ return f"Free tier limit reached ({FREE_DAILY_LIMIT}/day). Upgrade to Pro: https://mcpize.com/csv-analytics-mcp/pro"
42
+ _usage[caller].append(now)
43
+ return None
44
+
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # In-memory dataset store
48
+ # ---------------------------------------------------------------------------
49
+ _datasets: dict[str, "pd.DataFrame"] = {}
50
+
51
+
52
+ def _get_dataset(name: str):
53
+ """Get a loaded dataset by name."""
54
+ if name not in _datasets:
55
+ raise KeyError(f"Dataset '{name}' not loaded. Use load_csv first. Loaded: {list(_datasets.keys())}")
56
+ return _datasets[name]
57
+
58
+
59
+ def _df_to_dict(df, limit: int = 100) -> dict:
60
+ """Convert a DataFrame to a JSON-safe dictionary with row limit."""
61
+ import pandas as pd
62
+ total = len(df)
63
+ truncated = total > limit
64
+ df_limited = df.head(limit)
65
+
66
+ # Convert to records, handling special types
67
+ records = []
68
+ for _, row in df_limited.iterrows():
69
+ record = {}
70
+ for col in df_limited.columns:
71
+ val = row[col]
72
+ if pd.isna(val):
73
+ record[col] = None
74
+ elif isinstance(val, (datetime)):
75
+ record[col] = val.isoformat()
76
+ elif hasattr(val, 'item'): # numpy types
77
+ record[col] = val.item()
78
+ else:
79
+ record[col] = val
80
+ # Ensure JSON-serializable
81
+ try:
82
+ json.dumps(record[col])
83
+ except (TypeError, ValueError):
84
+ record[col] = str(val)
85
+ records.append(record)
86
+
87
+ return {
88
+ "columns": list(df_limited.columns),
89
+ "rows": records,
90
+ "row_count": len(records),
91
+ "total_rows": total,
92
+ "truncated": truncated,
93
+ }
94
+
95
+
96
+ # ---------------------------------------------------------------------------
97
+ # Core operations
98
+ # ---------------------------------------------------------------------------
99
+
100
+ def _load_csv(file_path: str, name: str = "", delimiter: str = ",", encoding: str = "utf-8") -> dict:
101
+ """Load a CSV file into memory."""
102
+ import pandas as pd
103
+
104
+ if not os.path.isfile(file_path):
105
+ raise FileNotFoundError(f"File not found: {file_path}")
106
+
107
+ df = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding)
108
+ dataset_name = name or os.path.splitext(os.path.basename(file_path))[0]
109
+ _datasets[dataset_name] = df
110
+
111
+ return {
112
+ "status": "loaded",
113
+ "name": dataset_name,
114
+ "file": file_path,
115
+ "rows": len(df),
116
+ "columns": list(df.columns),
117
+ "column_count": len(df.columns),
118
+ "dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()},
119
+ "memory_mb": round(df.memory_usage(deep=True).sum() / 1024 / 1024, 2),
120
+ "preview": _df_to_dict(df, limit=5),
121
+ }
122
+
123
+
124
+ def _query_data(name: str, filter_expr: str = "", columns: Optional[list[str]] = None,
125
+ sort_by: str = "", ascending: bool = True, limit: int = 100) -> dict:
126
+ """Query a loaded dataset with filtering, column selection, and sorting."""
127
+ import pandas as pd
128
+ df = _get_dataset(name)
129
+
130
+ # Apply filter
131
+ if filter_expr:
132
+ try:
133
+ df = df.query(filter_expr)
134
+ except Exception as e:
135
+ return {"error": f"Invalid filter expression: {e}. Use pandas query syntax, e.g. 'age > 30 and city == \"London\"'"}
136
+
137
+ # Select columns
138
+ if columns:
139
+ missing = [c for c in columns if c not in df.columns]
140
+ if missing:
141
+ return {"error": f"Columns not found: {missing}. Available: {list(df.columns)}"}
142
+ df = df[columns]
143
+
144
+ # Sort
145
+ if sort_by:
146
+ if sort_by not in df.columns:
147
+ return {"error": f"Sort column '{sort_by}' not found. Available: {list(df.columns)}"}
148
+ df = df.sort_values(sort_by, ascending=ascending)
149
+
150
+ result = _df_to_dict(df, limit=limit)
151
+ result["dataset"] = name
152
+ result["filter"] = filter_expr
153
+ return result
154
+
155
+
156
+ def _describe_columns(name: str) -> dict:
157
+ """Get statistical summary of all columns."""
158
+ import pandas as pd
159
+ df = _get_dataset(name)
160
+
161
+ stats = {}
162
+ for col in df.columns:
163
+ col_stats = {"dtype": str(df[col].dtype), "non_null": int(df[col].count()), "null_count": int(df[col].isna().sum())}
164
+
165
+ if pd.api.types.is_numeric_dtype(df[col]):
166
+ desc = df[col].describe()
167
+ col_stats.update({
168
+ "mean": round(float(desc.get("mean", 0)), 4),
169
+ "std": round(float(desc.get("std", 0)), 4),
170
+ "min": float(desc.get("min", 0)),
171
+ "max": float(desc.get("max", 0)),
172
+ "median": round(float(df[col].median()), 4),
173
+ "25%": float(desc.get("25%", 0)),
174
+ "75%": float(desc.get("75%", 0)),
175
+ })
176
+ else:
177
+ col_stats["unique"] = int(df[col].nunique())
178
+ top_values = df[col].value_counts().head(5)
179
+ col_stats["top_values"] = {str(k): int(v) for k, v in top_values.items()}
180
+
181
+ stats[col] = col_stats
182
+
183
+ return {
184
+ "dataset": name,
185
+ "total_rows": len(df),
186
+ "total_columns": len(df.columns),
187
+ "columns": stats,
188
+ }
189
+
190
+
191
+ def _aggregate(name: str, group_by: list[str], metrics: dict[str, str]) -> dict:
192
+ """Aggregate data with GROUP BY and aggregate functions.
193
+
194
+ metrics: {"column_name": "agg_function"} where agg_function is one of:
195
+ sum, mean, min, max, count, median, std, first, last
196
+ """
197
+ import pandas as pd
198
+ df = _get_dataset(name)
199
+
200
+ # Validate columns
201
+ for col in group_by:
202
+ if col not in df.columns:
203
+ return {"error": f"Group column '{col}' not found. Available: {list(df.columns)}"}
204
+ for col in metrics:
205
+ if col not in df.columns:
206
+ return {"error": f"Metric column '{col}' not found. Available: {list(df.columns)}"}
207
+
208
+ valid_aggs = {"sum", "mean", "min", "max", "count", "median", "std", "first", "last", "nunique"}
209
+ for col, agg in metrics.items():
210
+ if agg not in valid_aggs:
211
+ return {"error": f"Invalid aggregation '{agg}' for '{col}'. Use: {valid_aggs}"}
212
+
213
+ result_df = df.groupby(group_by, as_index=False).agg(metrics)
214
+
215
+ # Flatten multi-level column names if needed
216
+ if isinstance(result_df.columns, pd.MultiIndex):
217
+ result_df.columns = ['_'.join(col).strip('_') for col in result_df.columns]
218
+
219
+ result = _df_to_dict(result_df, limit=200)
220
+ result["dataset"] = name
221
+ result["group_by"] = group_by
222
+ result["metrics"] = metrics
223
+ return result
224
+
225
+
226
+ def _export_chart_data(name: str, x_column: str, y_columns: list[str],
227
+ chart_type: str = "bar", limit: int = 50) -> dict:
228
+ """Export data in a chart-ready format for visualization."""
229
+ import pandas as pd
230
+ df = _get_dataset(name)
231
+
232
+ all_cols = [x_column] + y_columns
233
+ missing = [c for c in all_cols if c not in df.columns]
234
+ if missing:
235
+ return {"error": f"Columns not found: {missing}. Available: {list(df.columns)}"}
236
+
237
+ chart_df = df[all_cols].head(limit).dropna()
238
+
239
+ labels = chart_df[x_column].astype(str).tolist()
240
+ datasets = []
241
+ for y_col in y_columns:
242
+ values = chart_df[y_col].tolist()
243
+ # Ensure numeric
244
+ clean_values = []
245
+ for v in values:
246
+ try:
247
+ clean_values.append(float(v))
248
+ except (TypeError, ValueError):
249
+ clean_values.append(0)
250
+ datasets.append({
251
+ "label": y_col,
252
+ "data": clean_values,
253
+ })
254
+
255
+ return {
256
+ "chart_type": chart_type,
257
+ "labels": labels,
258
+ "datasets": datasets,
259
+ "data_points": len(labels),
260
+ "dataset": name,
261
+ "note": "Compatible with Chart.js, Plotly, or any charting library",
262
+ }
263
+
264
+
265
+ def _pivot_table(name: str, index: str, columns: str, values: str,
266
+ aggfunc: str = "mean") -> dict:
267
+ """Create a pivot table from a dataset."""
268
+ import pandas as pd
269
+ df = _get_dataset(name)
270
+
271
+ for col in [index, columns, values]:
272
+ if col not in df.columns:
273
+ return {"error": f"Column '{col}' not found. Available: {list(df.columns)}"}
274
+
275
+ valid_aggs = {"mean", "sum", "count", "min", "max", "median", "std"}
276
+ if aggfunc not in valid_aggs:
277
+ return {"error": f"Invalid aggfunc '{aggfunc}'. Use: {valid_aggs}"}
278
+
279
+ try:
280
+ pivot = pd.pivot_table(
281
+ df, values=values, index=index, columns=columns,
282
+ aggfunc=aggfunc, fill_value=0)
283
+ except Exception as e:
284
+ return {"error": f"Pivot table error: {e}"}
285
+
286
+ # Convert to serializable format
287
+ pivot_dict = {}
288
+ for idx_val in pivot.index[:50]: # Limit rows
289
+ row_data = {}
290
+ for col_val in pivot.columns[:20]: # Limit columns
291
+ val = pivot.loc[idx_val, col_val]
292
+ try:
293
+ row_data[str(col_val)] = round(float(val), 4) if val != 0 else 0
294
+ except (TypeError, ValueError):
295
+ row_data[str(col_val)] = str(val)
296
+ pivot_dict[str(idx_val)] = row_data
297
+
298
+ return {
299
+ "dataset": name,
300
+ "index": index,
301
+ "columns_field": columns,
302
+ "values_field": values,
303
+ "aggfunc": aggfunc,
304
+ "row_count": len(pivot_dict),
305
+ "column_values": [str(c) for c in pivot.columns[:20]],
306
+ "pivot": pivot_dict,
307
+ }
308
+
309
+
310
+ # ---------------------------------------------------------------------------
311
+ # MCP Server
312
+ # ---------------------------------------------------------------------------
313
+ mcp = FastMCP(
314
+ "CSV Analytics MCP",
315
+ instructions="Spreadsheet and CSV analysis toolkit: load files, filter/query data, compute statistics, create aggregations, pivot tables, and export chart-ready data. By MEOK AI Labs.")
316
+
317
+
318
+ @mcp.tool()
319
+ def load_csv(file_path: str, name: str = "", delimiter: str = ",", encoding: str = "utf-8", api_key: str = "") -> dict:
320
+ """Load a CSV file into memory for analysis. The dataset is stored under
321
+ a name (defaults to filename) and can be referenced in subsequent calls.
322
+
323
+ Args:
324
+ file_path: Absolute path to the CSV file
325
+ name: Optional name for the dataset (default: filename without extension)
326
+ delimiter: Column delimiter (default: comma)
327
+ encoding: File encoding (default: utf-8)
328
+ """
329
+ allowed, msg, tier = check_access(api_key)
330
+ if not allowed:
331
+ return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
332
+
333
+ err = _check_rate_limit()
334
+ if err:
335
+ return {"error": err}
336
+ try:
337
+ return _load_csv(file_path, name, delimiter, encoding)
338
+ except Exception as e:
339
+ return {"error": str(e)}
340
+
341
+
342
+ @mcp.tool()
343
+ def query_data(name: str, filter_expr: str = "", columns: Optional[list[str]] = None,
344
+ sort_by: str = "", ascending: bool = True, limit: int = 100, api_key: str = "") -> dict:
345
+ """Query a loaded dataset with filtering, column selection, and sorting.
346
+
347
+ Uses pandas query syntax for filters:
348
+ - 'age > 30'
349
+ - 'city == "London" and salary > 50000'
350
+ - 'status.isin(["active", "pending"])'
351
+
352
+ Args:
353
+ name: Dataset name (from load_csv)
354
+ filter_expr: Pandas query expression for filtering rows
355
+ columns: List of column names to return (default: all)
356
+ sort_by: Column name to sort by
357
+ ascending: Sort order (default: True)
358
+ limit: Max rows to return (default: 100)
359
+ """
360
+ allowed, msg, tier = check_access(api_key)
361
+ if not allowed:
362
+ return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
363
+
364
+ err = _check_rate_limit()
365
+ if err:
366
+ return {"error": err}
367
+ try:
368
+ return _query_data(name, filter_expr, columns, sort_by, ascending, min(limit, 500))
369
+ except Exception as e:
370
+ return {"error": str(e)}
371
+
372
+
373
+ @mcp.tool()
374
+ def describe_columns(name: str, api_key: str = "") -> dict:
375
+ """Get detailed statistics for every column in a dataset:
376
+ - Numeric columns: mean, std, min, max, median, quartiles
377
+ - Categorical columns: unique count, top 5 values with frequencies
378
+
379
+ Args:
380
+ name: Dataset name (from load_csv)
381
+ """
382
+ allowed, msg, tier = check_access(api_key)
383
+ if not allowed:
384
+ return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
385
+
386
+ err = _check_rate_limit()
387
+ if err:
388
+ return {"error": err}
389
+ try:
390
+ return _describe_columns(name)
391
+ except Exception as e:
392
+ return {"error": str(e)}
393
+
394
+
395
+ @mcp.tool()
396
+ def aggregate(name: str, group_by: list[str], metrics: dict[str, str], api_key: str = "") -> dict:
397
+ """Aggregate data with GROUP BY and compute metrics.
398
+
399
+ Supported aggregation functions: sum, mean, min, max, count, median, std, first, last, nunique
400
+
401
+ Args:
402
+ name: Dataset name (from load_csv)
403
+ group_by: List of columns to group by (e.g. ["department", "year"])
404
+ metrics: Dict of column -> aggregation function (e.g. {"salary": "mean", "id": "count"})
405
+ """
406
+ allowed, msg, tier = check_access(api_key)
407
+ if not allowed:
408
+ return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
409
+
410
+ err = _check_rate_limit()
411
+ if err:
412
+ return {"error": err}
413
+ try:
414
+ return _aggregate(name, group_by, metrics)
415
+ except Exception as e:
416
+ return {"error": str(e)}
417
+
418
+
419
+ @mcp.tool()
420
+ def export_chart_data(name: str, x_column: str, y_columns: list[str],
421
+ chart_type: str = "bar", limit: int = 50, api_key: str = "") -> dict:
422
+ """Export data in a chart-ready format. Output is compatible with Chart.js,
423
+ Plotly, or any visualization library. Includes labels and datasets arrays.
424
+
425
+ Args:
426
+ name: Dataset name (from load_csv)
427
+ x_column: Column for the X axis / labels
428
+ y_columns: List of columns for Y axis / data series
429
+ chart_type: Suggested chart type (bar, line, scatter, pie)
430
+ limit: Max data points (default: 50)
431
+ """
432
+ allowed, msg, tier = check_access(api_key)
433
+ if not allowed:
434
+ return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
435
+
436
+ err = _check_rate_limit()
437
+ if err:
438
+ return {"error": err}
439
+ try:
440
+ return _export_chart_data(name, x_column, y_columns, chart_type, limit)
441
+ except Exception as e:
442
+ return {"error": str(e)}
443
+
444
+
445
+ @mcp.tool()
446
+ def pivot_table(name: str, index: str, columns: str, values: str,
447
+ aggfunc: str = "mean", api_key: str = "") -> dict:
448
+ """Create a pivot table from a dataset. Reshapes data by grouping rows
449
+ and spreading column values, similar to Excel pivot tables.
450
+
451
+ Args:
452
+ name: Dataset name (from load_csv)
453
+ index: Column to use as row labels
454
+ columns: Column whose unique values become new columns
455
+ values: Column to aggregate
456
+ aggfunc: Aggregation function (mean, sum, count, min, max, median, std)
457
+ """
458
+ allowed, msg, tier = check_access(api_key)
459
+ if not allowed:
460
+ return {"error": msg, "upgrade_url": "https://meok.ai/pricing"}
461
+
462
+ err = _check_rate_limit()
463
+ if err:
464
+ return {"error": err}
465
+ try:
466
+ return _pivot_table(name, index, columns, values, aggfunc)
467
+ except Exception as e:
468
+ return {"error": str(e)}
469
+
470
+
471
+ if __name__ == "__main__":
472
+ mcp.run()