multifunctionplotter 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- multifunctionplotter/mfp.py +989 -0
- multifunctionplotter/mfp_data_manipulator.py +192 -0
- multifunctionplotter/mfp_dmanp.py +931 -0
- multifunctionplotter/mfp_dmanp_help.py +741 -0
- multifunctionplotter/mfp_help.py +396 -0
- multifunctionplotter/mfp_server.py +603 -0
- multifunctionplotter/prophet_pred.py +214 -0
- multifunctionplotter-1.0.3.dist-info/METADATA +881 -0
- multifunctionplotter-1.0.3.dist-info/RECORD +13 -0
- multifunctionplotter-1.0.3.dist-info/WHEEL +5 -0
- multifunctionplotter-1.0.3.dist-info/entry_points.txt +3 -0
- multifunctionplotter-1.0.3.dist-info/licenses/LICENSE +201 -0
- multifunctionplotter-1.0.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,603 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
mfp_server.py — MCP server for MultiFunctionPlotter (mfp)
|
|
4
|
+
|
|
5
|
+
Wraps mfp's full CLI as MCP tools so any AI agent (Claude Code, Cursor,
|
|
6
|
+
Windsurf, LangChain, etc.) can call it in plain English with no syntax
|
|
7
|
+
knowledge required.
|
|
8
|
+
|
|
9
|
+
Install dep: pip install fastmcp
|
|
10
|
+
Run directly: python mfp_server.py
|
|
11
|
+
Add to Claude Code: claude mcp add mfp -- python /path/to/mfp_server.py
|
|
12
|
+
|
|
13
|
+
Author: based on mfp by Dr. Swarnadeep Seth
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import subprocess
|
|
17
|
+
import sys
|
|
18
|
+
import io
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
from fastmcp import FastMCP
|
|
22
|
+
|
|
23
|
+
# ── Locate mfp.py relative to this server file ───────────────────────────────
|
|
24
|
+
# Assumes mfp_server.py sits in the repo root, mfp.py is in src/
|
|
25
|
+
_REPO_ROOT = Path(__file__).resolve().parent
|
|
26
|
+
_MFP_PY = _REPO_ROOT / "src" / "mfp.py"
|
|
27
|
+
_MFP_CMD = [sys.executable, str(_MFP_PY)] # always calls the right Python
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _run(args: list[str], stdin_text: str | None = None) -> str:
|
|
31
|
+
"""Run mfp with the given args and return combined stdout+stderr."""
|
|
32
|
+
result = subprocess.run(
|
|
33
|
+
_MFP_CMD + args,
|
|
34
|
+
capture_output=True,
|
|
35
|
+
text=True,
|
|
36
|
+
input=stdin_text,
|
|
37
|
+
)
|
|
38
|
+
out = (result.stdout or "").strip()
|
|
39
|
+
err = (result.stderr or "").strip()
|
|
40
|
+
# mfp writes INFO logs to stderr — only surface them on failure
|
|
41
|
+
if result.returncode != 0:
|
|
42
|
+
return f"Error (exit {result.returncode}):\n{err}\n{out}".strip()
|
|
43
|
+
return out or "Done."
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# ═════════════════════════════════════════════════════════════════════════════
|
|
47
|
+
mcp = FastMCP("MultiFunctionPlotter")
|
|
48
|
+
# ═════════════════════════════════════════════════════════════════════════════
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
52
|
+
# Tool 1 — plot (single series from a data file)
|
|
53
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
54
|
+
|
|
55
|
+
@mcp.tool
|
|
56
|
+
def plot(
|
|
57
|
+
file: str,
|
|
58
|
+
x_col: int,
|
|
59
|
+
y_col: int,
|
|
60
|
+
style: str = "lines",
|
|
61
|
+
title: str = "",
|
|
62
|
+
xlabel: str = "",
|
|
63
|
+
ylabel: str = "",
|
|
64
|
+
legend: str = "",
|
|
65
|
+
linecolor: str = "",
|
|
66
|
+
linewidth: int = 2,
|
|
67
|
+
xrange: str = "",
|
|
68
|
+
yrange: str = "",
|
|
69
|
+
save: str = "plot.png",
|
|
70
|
+
xlog: bool = False,
|
|
71
|
+
ylog: bool = False,
|
|
72
|
+
sci_notation: str = "",
|
|
73
|
+
xticks: str = "",
|
|
74
|
+
yticks: str = "",
|
|
75
|
+
xtick_rotation: int = 0,
|
|
76
|
+
ytick_rotation: int = 0,
|
|
77
|
+
date_format: str = "",
|
|
78
|
+
yerr_col: int = 0,
|
|
79
|
+
capsize: int = 4,
|
|
80
|
+
cmap_col: int = 0,
|
|
81
|
+
colormap: str = "viridis",
|
|
82
|
+
cbar_label: str = "",
|
|
83
|
+
levels: int = 10,
|
|
84
|
+
bin: int = 0,
|
|
85
|
+
) -> str:
|
|
86
|
+
"""
|
|
87
|
+
Plot a single data series from a CSV, TXT, or DAT file.
|
|
88
|
+
|
|
89
|
+
Column indexing:
|
|
90
|
+
- CSV files: 0-based (first col = 0)
|
|
91
|
+
- TXT / DAT files: 1-based (first col = 1)
|
|
92
|
+
|
|
93
|
+
Styles available:
|
|
94
|
+
Line/marker : lines (l), dashed, dotted, points (p), linespoints (lp),
|
|
95
|
+
stars, d
|
|
96
|
+
Error bars : errorbars (eb) — needs yerr_col
|
|
97
|
+
errorshade (es) — needs yerr_col
|
|
98
|
+
Colormap : scatter — needs cmap_col
|
|
99
|
+
2-D matrix : heatmap, contour, contourf (no x_col/y_col needed)
|
|
100
|
+
Distribution: hist, kde, box, violin
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
file: Path to data file (.csv, .txt, .dat)
|
|
104
|
+
x_col: Column index for x-axis
|
|
105
|
+
y_col: Column index for y-axis
|
|
106
|
+
style: Plot style (see above). Default: lines
|
|
107
|
+
title: Plot title (will be quoted automatically)
|
|
108
|
+
xlabel: X-axis label
|
|
109
|
+
ylabel: Y-axis label
|
|
110
|
+
legend: Legend entry for this series (single word, no spaces)
|
|
111
|
+
linecolor: Any matplotlib color: 'tab:blue', 'red', '#3a7ab3', 'steelblue'
|
|
112
|
+
linewidth: Line width in points. Default: 2
|
|
113
|
+
xrange: X-axis limits as 'min:max', e.g. '0:100'
|
|
114
|
+
yrange: Y-axis limits as 'min:max'
|
|
115
|
+
save: Output file path. Supports .png .pdf .svg .eps. Default: plot.png
|
|
116
|
+
xlog: Use log scale on x-axis
|
|
117
|
+
ylog: Use log scale on y-axis
|
|
118
|
+
sci_notation: Scientific notation axis: 'x', 'y', or 'both'
|
|
119
|
+
xticks: Custom x-tick positions as '0,90,180,270'
|
|
120
|
+
yticks: Custom y-tick positions as '0,1e-5,2e-5'
|
|
121
|
+
xtick_rotation: Rotate x-axis labels by this many degrees
|
|
122
|
+
ytick_rotation: Rotate y-axis labels by this many degrees
|
|
123
|
+
date_format: Parse x-axis as dates, e.g. '%Y-%m-%d' or '%d/%m/%Y'
|
|
124
|
+
yerr_col: Column index with ±σ error values (for errorbars/errorshade)
|
|
125
|
+
capsize: Error bar cap width in points. Default: 4
|
|
126
|
+
cmap_col: Column index for scatter colormap values
|
|
127
|
+
colormap: Matplotlib colormap name. Default: viridis
|
|
128
|
+
cbar_label: Colorbar label text
|
|
129
|
+
levels: Number of contour levels for contour/contourf. Default: 10
|
|
130
|
+
bin: Number of histogram bins (0 = auto)
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Success message with the output path, or error details.
|
|
134
|
+
|
|
135
|
+
Examples:
|
|
136
|
+
plot("data.csv", 0, 4, style="lines", title="Close Price", save="price.png")
|
|
137
|
+
plot("results.dat", 1, 2, style="errorbars", yerr_col=3, linecolor="tab:red")
|
|
138
|
+
plot("samples.csv", 0, 1, style="hist", bin=30, save="dist.pdf")
|
|
139
|
+
plot("matrix.dat", 0, 0, style="heatmap", colormap="inferno", save="heat.png")
|
|
140
|
+
plot("data.csv", 1, 2, style="scatter", cmap_col=3, colormap="plasma")
|
|
141
|
+
"""
|
|
142
|
+
# Build the gnuplot-style command string that mfp parses
|
|
143
|
+
cmd_parts = [file, "using", f"{x_col}:{y_col}", "with", style]
|
|
144
|
+
|
|
145
|
+
if title: cmd_parts += [f'title "{title}"']
|
|
146
|
+
if xlabel: cmd_parts += [f'xlabel "{xlabel}"']
|
|
147
|
+
if ylabel: cmd_parts += [f'ylabel "{ylabel}"']
|
|
148
|
+
if legend: cmd_parts += ["legend", legend]
|
|
149
|
+
if linecolor: cmd_parts += ["lc", linecolor]
|
|
150
|
+
if linewidth != 2: cmd_parts += ["lw", str(linewidth)]
|
|
151
|
+
if xrange: cmd_parts += ["xrange", xrange]
|
|
152
|
+
if yrange: cmd_parts += ["yrange", yrange]
|
|
153
|
+
if sci_notation: cmd_parts += ["sci_notation", sci_notation]
|
|
154
|
+
if xticks: cmd_parts += [f'xticks "{xticks}"']
|
|
155
|
+
if yticks: cmd_parts += [f'yticks "{yticks}"']
|
|
156
|
+
if xtick_rotation: cmd_parts += ["xtick_rotation", str(xtick_rotation)]
|
|
157
|
+
if ytick_rotation: cmd_parts += ["ytick_rotation", str(ytick_rotation)]
|
|
158
|
+
if date_format: cmd_parts += [f'date_format "{date_format}"']
|
|
159
|
+
if yerr_col: cmd_parts += ["yerr", str(yerr_col), "capsize", str(capsize)]
|
|
160
|
+
if cmap_col: cmd_parts += ["cmap", str(cmap_col), "colormap", colormap]
|
|
161
|
+
if cbar_label: cmd_parts += [f'cbar_label "{cbar_label}"']
|
|
162
|
+
if levels != 10: cmd_parts += ["levels", str(levels)]
|
|
163
|
+
if bin: cmd_parts += ["bin", str(bin)]
|
|
164
|
+
|
|
165
|
+
args = cmd_parts + ["--save", save]
|
|
166
|
+
if xlog: args += ["--xlog"]
|
|
167
|
+
if ylog: args += ["--ylog"]
|
|
168
|
+
|
|
169
|
+
_run(args)
|
|
170
|
+
return f"Plot saved to: {save}"
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
174
|
+
# Tool 2 — plot_function (math expression, no data file)
|
|
175
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
176
|
+
|
|
177
|
+
@mcp.tool
|
|
178
|
+
def plot_function(
|
|
179
|
+
expression: str,
|
|
180
|
+
xrange: str,
|
|
181
|
+
save: str = "plot.png",
|
|
182
|
+
title: str = "",
|
|
183
|
+
xlabel: str = "",
|
|
184
|
+
ylabel: str = "",
|
|
185
|
+
legend: str = "",
|
|
186
|
+
linecolor: str = "",
|
|
187
|
+
linewidth: int = 2,
|
|
188
|
+
yrange: str = "",
|
|
189
|
+
ylog: bool = False,
|
|
190
|
+
) -> str:
|
|
191
|
+
"""
|
|
192
|
+
Plot a mathematical function directly — no data file needed.
|
|
193
|
+
|
|
194
|
+
Uses numpy (np.) functions. Parameters can be embedded in the expression.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
expression: Function definition string. Format: 'f(x) = <expr>'
|
|
198
|
+
or 'f(x, param=value) = <expr>'.
|
|
199
|
+
Use np. prefix for numpy functions.
|
|
200
|
+
Examples:
|
|
201
|
+
'f(x) = np.sin(x)'
|
|
202
|
+
'f(x) = x**2 + np.cos(x)'
|
|
203
|
+
'f(x, a=2) = a * np.exp(-x)'
|
|
204
|
+
'f(x, a=1, b=2) = a * np.exp(-b * x)'
|
|
205
|
+
'f(x) = np.sin(x) / x'
|
|
206
|
+
xrange: Required. X-axis range as 'min:max'. E.g. '0:10', '-5:5'
|
|
207
|
+
save: Output file path (.png, .pdf, .svg, .eps). Default: plot.png
|
|
208
|
+
title: Plot title
|
|
209
|
+
xlabel: X-axis label
|
|
210
|
+
ylabel: Y-axis label
|
|
211
|
+
legend: Legend entry (single word)
|
|
212
|
+
linecolor: Matplotlib color string
|
|
213
|
+
linewidth: Line width. Default: 2
|
|
214
|
+
yrange: Y-axis limits as 'min:max'
|
|
215
|
+
ylog: Use log scale on y-axis
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
Success message with output path, or error details.
|
|
219
|
+
|
|
220
|
+
Examples:
|
|
221
|
+
plot_function("f(x) = np.sin(x)", xrange="-10:10", save="sin.png")
|
|
222
|
+
plot_function("f(x) = x**2", xrange="0:5", linecolor="tab:red", save="parabola.pdf")
|
|
223
|
+
plot_function("f(x,a=1,b=2) = a*np.exp(-b*x)", xrange="0:5", title="Decay")
|
|
224
|
+
"""
|
|
225
|
+
# mfp parses: func: "f(x) = ..." xrange min:max [tokens] --save path
|
|
226
|
+
cmd_parts = [f'func: "{expression}"', "xrange", xrange]
|
|
227
|
+
|
|
228
|
+
if title: cmd_parts += [f'title "{title}"']
|
|
229
|
+
if xlabel: cmd_parts += [f'xlabel "{xlabel}"']
|
|
230
|
+
if ylabel: cmd_parts += [f'ylabel "{ylabel}"']
|
|
231
|
+
if legend: cmd_parts += ["legend", legend]
|
|
232
|
+
if linecolor: cmd_parts += ["lc", linecolor]
|
|
233
|
+
if linewidth != 2: cmd_parts += ["lw", str(linewidth)]
|
|
234
|
+
if yrange: cmd_parts += ["yrange", yrange]
|
|
235
|
+
|
|
236
|
+
args = cmd_parts + ["--save", save]
|
|
237
|
+
if ylog: args += ["--ylog"]
|
|
238
|
+
|
|
239
|
+
_run(args)
|
|
240
|
+
return f"Plot saved to: {save}"
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
244
|
+
# Tool 3 — multi_plot (multiple series / subplots in one call)
|
|
245
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
246
|
+
|
|
247
|
+
@mcp.tool
|
|
248
|
+
def multi_plot(
|
|
249
|
+
commands: str,
|
|
250
|
+
save: str = "plot.png",
|
|
251
|
+
subplot_layout: str = "",
|
|
252
|
+
xlog: bool = False,
|
|
253
|
+
ylog: bool = False,
|
|
254
|
+
) -> str:
|
|
255
|
+
"""
|
|
256
|
+
Plot multiple series on one figure, or arrange plots in a subplot grid.
|
|
257
|
+
|
|
258
|
+
This exposes mfp's full multi-command syntax — the most powerful tool
|
|
259
|
+
for complex figures. Comma-separate individual plot commands.
|
|
260
|
+
|
|
261
|
+
IMPORTANT: Each sub-command must start with a filename or 'func:'.
|
|
262
|
+
mfp uses the first token of each comma-separated part to detect splits.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
commands: One or more mfp commands, comma-separated.
|
|
266
|
+
Each command follows the same syntax as the plot tool.
|
|
267
|
+
|
|
268
|
+
Multiple series on one axes:
|
|
269
|
+
'data.csv using 0:2 with lines lc green legend Open,
|
|
270
|
+
data.csv using 0:4 with lines lc blue legend Close'
|
|
271
|
+
|
|
272
|
+
Error band + mean line overlay (classic combo):
|
|
273
|
+
'data.dat using 1:2 with errorshade yerr 3 lc steelblue,
|
|
274
|
+
data.dat using 1:2 with lines lc steelblue'
|
|
275
|
+
|
|
276
|
+
Multiple functions:
|
|
277
|
+
'func: "f(x) = np.sin(x)" xrange -10:10 lc blue,
|
|
278
|
+
func: "f(x) = np.cos(x)" xrange -10:10 lc red'
|
|
279
|
+
|
|
280
|
+
save: Output file path. Default: plot.png
|
|
281
|
+
|
|
282
|
+
subplot_layout: Optional layout string for subplot grids.
|
|
283
|
+
Letters = panels, '-' separates rows.
|
|
284
|
+
Each panel gets one comma-separated command (left→right, top→bottom).
|
|
285
|
+
Examples:
|
|
286
|
+
'AB' → 1 row, 2 panels side by side
|
|
287
|
+
'AB-CD' → 2×2 grid
|
|
288
|
+
'AA-BC' → A spans full top row, B and C share bottom
|
|
289
|
+
Leave empty to overlay all series on one axes.
|
|
290
|
+
|
|
291
|
+
xlog: Log scale on x-axis (applies to all panels)
|
|
292
|
+
ylog: Log scale on y-axis (applies to all panels)
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
Success message with output path, or error details.
|
|
296
|
+
|
|
297
|
+
Examples:
|
|
298
|
+
# Two overlaid series
|
|
299
|
+
multi_plot(
|
|
300
|
+
"data.csv using 0:2 with lines lc green, data.csv using 0:4 with lines lc blue",
|
|
301
|
+
save="comparison.png"
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# 2-panel subplot grid
|
|
305
|
+
multi_plot(
|
|
306
|
+
"data.csv using 1:2 with lines, data.csv using 0:1 with hist bin 30",
|
|
307
|
+
subplot_layout="AB",
|
|
308
|
+
save="grid.png"
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Asymmetric layout: full-width top, two panels bottom
|
|
312
|
+
multi_plot(
|
|
313
|
+
"data.csv using 1:2 with lines title \\"Full series\\",
|
|
314
|
+
data.csv using 0:1 with hist,
|
|
315
|
+
data.csv using 0:2 with kde",
|
|
316
|
+
subplot_layout="AA-BC",
|
|
317
|
+
save="layout.png"
|
|
318
|
+
)
|
|
319
|
+
"""
|
|
320
|
+
args = []
|
|
321
|
+
if subplot_layout:
|
|
322
|
+
args += ["--subplot", subplot_layout]
|
|
323
|
+
|
|
324
|
+
# Pass the full multi-command string as a single argument
|
|
325
|
+
args += [commands, "--save", save]
|
|
326
|
+
if xlog: args += ["--xlog"]
|
|
327
|
+
if ylog: args += ["--ylog"]
|
|
328
|
+
|
|
329
|
+
_run(args)
|
|
330
|
+
return f"Plot saved to: {save}"
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
334
|
+
# Tool 4 — inspect_data (non-mutating data exploration)
|
|
335
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
336
|
+
|
|
337
|
+
@mcp.tool
|
|
338
|
+
def inspect_data(
|
|
339
|
+
file: str,
|
|
340
|
+
action: str = "properties",
|
|
341
|
+
n_rows: int = 10,
|
|
342
|
+
column: str = "",
|
|
343
|
+
) -> str:
|
|
344
|
+
"""
|
|
345
|
+
Inspect a data file without modifying it.
|
|
346
|
+
|
|
347
|
+
Uses mfp's Data Manipulator (DM) in non-interactive mode by importing
|
|
348
|
+
MFPDataManipulator directly (not via subprocess) so output is captured.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
file: Path to CSV, Excel (.xlsx/.xls), or JSON file
|
|
352
|
+
action: What to do. Options:
|
|
353
|
+
'properties' — column names, dtypes, NaN counts, summary stats
|
|
354
|
+
'head' — first n_rows rows
|
|
355
|
+
'tail' — last n_rows rows
|
|
356
|
+
'show' — full DataFrame
|
|
357
|
+
'counts' — value frequency for a column (requires column=)
|
|
358
|
+
n_rows: Number of rows for head/tail. Default: 10
|
|
359
|
+
column: Column name, required for action='counts'
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
Formatted text output of the inspection result.
|
|
363
|
+
|
|
364
|
+
Examples:
|
|
365
|
+
inspect_data("data.csv")
|
|
366
|
+
inspect_data("data.csv", action="head", n_rows=5)
|
|
367
|
+
inspect_data("data.csv", action="counts", column="Category")
|
|
368
|
+
"""
|
|
369
|
+
import sys
|
|
370
|
+
import io
|
|
371
|
+
|
|
372
|
+
# Import MFPDataManipulator directly to capture output
|
|
373
|
+
sys.path.insert(0, str(_REPO_ROOT / "src"))
|
|
374
|
+
from mfp_dmanp import MFPDataManipulator
|
|
375
|
+
|
|
376
|
+
buf = io.StringIO()
|
|
377
|
+
old_stdout = sys.stdout
|
|
378
|
+
sys.stdout = buf
|
|
379
|
+
|
|
380
|
+
try:
|
|
381
|
+
dm = MFPDataManipulator(file)
|
|
382
|
+
if action == "properties":
|
|
383
|
+
dm.properties()
|
|
384
|
+
elif action == "head":
|
|
385
|
+
dm.head(n_rows)
|
|
386
|
+
elif action == "tail":
|
|
387
|
+
dm.tail(n_rows)
|
|
388
|
+
elif action == "show":
|
|
389
|
+
dm.show()
|
|
390
|
+
elif action == "counts":
|
|
391
|
+
if not column:
|
|
392
|
+
sys.stdout = old_stdout
|
|
393
|
+
return "Error: 'counts' action requires a column name."
|
|
394
|
+
dm.counts(column)
|
|
395
|
+
else:
|
|
396
|
+
sys.stdout = old_stdout
|
|
397
|
+
return f"Unknown action '{action}'. Use: properties, head, tail, show, counts"
|
|
398
|
+
except Exception as exc:
|
|
399
|
+
sys.stdout = old_stdout
|
|
400
|
+
return f"Error: {exc}"
|
|
401
|
+
finally:
|
|
402
|
+
sys.stdout = old_stdout
|
|
403
|
+
|
|
404
|
+
return buf.getvalue().strip() or "Done."
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
408
|
+
# Tool 5 — clean_data (mutating transformations, saves result)
|
|
409
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
410
|
+
|
|
411
|
+
@mcp.tool
|
|
412
|
+
def clean_data(
|
|
413
|
+
file: str,
|
|
414
|
+
save_as: str,
|
|
415
|
+
filter_query: str = "",
|
|
416
|
+
sort_col: str = "",
|
|
417
|
+
sort_order: str = "asc",
|
|
418
|
+
rename_pairs: str = "",
|
|
419
|
+
cast_col: str = "",
|
|
420
|
+
cast_dtype: str = "",
|
|
421
|
+
add_col_name: str = "",
|
|
422
|
+
add_col_expr: str = "",
|
|
423
|
+
drop_columns: str = "",
|
|
424
|
+
dedup: bool = False,
|
|
425
|
+
dedup_cols: str = "",
|
|
426
|
+
fillna_col: str = "",
|
|
427
|
+
fillna_value: str = "",
|
|
428
|
+
dropna_col: str = "",
|
|
429
|
+
slice_start: int = -1,
|
|
430
|
+
slice_end: int = -1,
|
|
431
|
+
) -> str:
|
|
432
|
+
"""
|
|
433
|
+
Clean and transform a data file, saving the result to a new file.
|
|
434
|
+
|
|
435
|
+
All operations run in sequence on the data. Only specify the ones you need.
|
|
436
|
+
|
|
437
|
+
Args:
|
|
438
|
+
file: Input file path (CSV, Excel, JSON)
|
|
439
|
+
save_as: Output file path — format from extension (.csv, .xlsx, .json)
|
|
440
|
+
|
|
441
|
+
filter_query: Keep rows matching a pandas query expression.
|
|
442
|
+
Examples: 'price > 100', 'city == "Roanoke"',
|
|
443
|
+
'score >= 90 and grade == "A"'
|
|
444
|
+
Column names with spaces: backtick them: '`first name` == "Alice"'
|
|
445
|
+
|
|
446
|
+
sort_col: Column name to sort by
|
|
447
|
+
sort_order: 'asc' or 'desc'. Default: asc
|
|
448
|
+
|
|
449
|
+
rename_pairs: Rename columns as 'old:new' or multiple 'old1:new1,old2:new2'
|
|
450
|
+
|
|
451
|
+
cast_col: Column name to change dtype
|
|
452
|
+
cast_dtype: Target dtype: 'int', 'float', 'str', or 'datetime'
|
|
453
|
+
|
|
454
|
+
add_col_name: Name for a new computed column
|
|
455
|
+
add_col_expr: Expression for the new column (pandas eval syntax)
|
|
456
|
+
Examples: 'price * qty', 'revenue - cost', 'score / score.max()'
|
|
457
|
+
|
|
458
|
+
drop_columns: Comma-separated column names to drop: 'col1,col2'
|
|
459
|
+
|
|
460
|
+
dedup: Remove duplicate rows if True
|
|
461
|
+
dedup_cols: Comma-separated columns to consider for dedup (empty = all)
|
|
462
|
+
|
|
463
|
+
fillna_col: Column to fill NaN/empty values in
|
|
464
|
+
fillna_value: Value to fill with (auto-converts to int/float if numeric)
|
|
465
|
+
|
|
466
|
+
dropna_col: Drop rows where this column is NaN (empty string = any column)
|
|
467
|
+
|
|
468
|
+
slice_start: Keep rows from this index (0-based, -1 = disabled)
|
|
469
|
+
slice_end: Keep rows up to this index exclusive (-1 = disabled)
|
|
470
|
+
|
|
471
|
+
Returns:
|
|
472
|
+
Summary of operations performed and the saved file path.
|
|
473
|
+
|
|
474
|
+
Examples:
|
|
475
|
+
# Filter, sort, deduplicate, save
|
|
476
|
+
clean_data(
|
|
477
|
+
"raw.csv", "cleaned.csv",
|
|
478
|
+
filter_query="price > 100",
|
|
479
|
+
sort_col="volume", sort_order="desc",
|
|
480
|
+
dedup=True
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
# Add computed column, rename, save as Excel
|
|
484
|
+
clean_data(
|
|
485
|
+
"sales.csv", "sales_with_profit.xlsx",
|
|
486
|
+
add_col_name="profit", add_col_expr="revenue - cost",
|
|
487
|
+
rename_pairs="cust_id:customer_id,txn_dt:date"
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
# Fill missing values and drop bad rows
|
|
491
|
+
clean_data(
|
|
492
|
+
"data.csv", "data_clean.csv",
|
|
493
|
+
fillna_col="price", fillna_value="0",
|
|
494
|
+
dropna_col="date"
|
|
495
|
+
)
|
|
496
|
+
"""
|
|
497
|
+
sys.path.insert(0, str(_REPO_ROOT / "src"))
|
|
498
|
+
from mfp_dmanp import MFPDataManipulator
|
|
499
|
+
|
|
500
|
+
buf = io.StringIO()
|
|
501
|
+
old_stdout = sys.stdout
|
|
502
|
+
sys.stdout = buf
|
|
503
|
+
|
|
504
|
+
steps_done = []
|
|
505
|
+
try:
|
|
506
|
+
dm = MFPDataManipulator(file)
|
|
507
|
+
|
|
508
|
+
if slice_start >= 0 and slice_end > slice_start:
|
|
509
|
+
dm.slice(slice_start, slice_end)
|
|
510
|
+
steps_done.append(f"Sliced rows {slice_start}:{slice_end}")
|
|
511
|
+
|
|
512
|
+
if filter_query:
|
|
513
|
+
dm.filter(filter_query)
|
|
514
|
+
steps_done.append(f"Filtered: {filter_query}")
|
|
515
|
+
|
|
516
|
+
if sort_col:
|
|
517
|
+
dm.sort(sort_col, sort_order)
|
|
518
|
+
steps_done.append(f"Sorted by {sort_col} ({sort_order})")
|
|
519
|
+
|
|
520
|
+
if rename_pairs:
|
|
521
|
+
dm.rename(rename_pairs)
|
|
522
|
+
steps_done.append(f"Renamed: {rename_pairs}")
|
|
523
|
+
|
|
524
|
+
if cast_col and cast_dtype:
|
|
525
|
+
dm.cast(cast_col, cast_dtype)
|
|
526
|
+
steps_done.append(f"Cast {cast_col} → {cast_dtype}")
|
|
527
|
+
|
|
528
|
+
if add_col_name and add_col_expr:
|
|
529
|
+
dm.addcol(add_col_name, add_col_expr)
|
|
530
|
+
steps_done.append(f"Added column '{add_col_name}' = {add_col_expr}")
|
|
531
|
+
|
|
532
|
+
if drop_columns:
|
|
533
|
+
dm.delete(drop_columns)
|
|
534
|
+
steps_done.append(f"Dropped columns: {drop_columns}")
|
|
535
|
+
|
|
536
|
+
if fillna_col and fillna_value:
|
|
537
|
+
dm.fillna(fillna_col, fillna_value)
|
|
538
|
+
steps_done.append(f"Filled NaN in '{fillna_col}' with {fillna_value}")
|
|
539
|
+
|
|
540
|
+
if dropna_col is not None and dropna_col != "__skip__":
|
|
541
|
+
dm.dropna(dropna_col if dropna_col else None)
|
|
542
|
+
scope = f"'{dropna_col}'" if dropna_col else "any column"
|
|
543
|
+
steps_done.append(f"Dropped NaN rows in {scope}")
|
|
544
|
+
|
|
545
|
+
if dedup:
|
|
546
|
+
dm.dedup(dedup_cols if dedup_cols else None)
|
|
547
|
+
steps_done.append(f"Deduplicated" + (f" on {dedup_cols}" if dedup_cols else ""))
|
|
548
|
+
|
|
549
|
+
saved_path = dm.save(save_as)
|
|
550
|
+
steps_done.append(f"Saved → {saved_path}")
|
|
551
|
+
|
|
552
|
+
except Exception as exc:
|
|
553
|
+
sys.stdout = old_stdout
|
|
554
|
+
return f"Error during cleaning: {exc}\nSteps completed: {steps_done}"
|
|
555
|
+
finally:
|
|
556
|
+
sys.stdout = old_stdout
|
|
557
|
+
|
|
558
|
+
log_output = buf.getvalue().strip()
|
|
559
|
+
summary = "\n".join(steps_done)
|
|
560
|
+
return f"Steps performed:\n{summary}\n\nDetail log:\n{log_output}"
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
564
|
+
# Tool 6 — replay_config (replay plot.json)
|
|
565
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
566
|
+
|
|
567
|
+
@mcp.tool
|
|
568
|
+
def replay_config(config_file: str = "plot.json") -> str:
|
|
569
|
+
"""
|
|
570
|
+
Replay a saved plot configuration from a JSON file.
|
|
571
|
+
|
|
572
|
+
mfp automatically saves the last plot's configuration to plot.json.
|
|
573
|
+
You can also craft these files manually for reproducible figures.
|
|
574
|
+
|
|
575
|
+
Args:
|
|
576
|
+
config_file: Path to the JSON config file. Default: plot.json
|
|
577
|
+
|
|
578
|
+
Returns:
|
|
579
|
+
Success message or error details.
|
|
580
|
+
|
|
581
|
+
Example JSON format:
|
|
582
|
+
{
|
|
583
|
+
"file": "data.csv",
|
|
584
|
+
"x_col": 0,
|
|
585
|
+
"y_col": 4,
|
|
586
|
+
"style": "lines",
|
|
587
|
+
"title": "Stock Price",
|
|
588
|
+
"linecolor": "tab:blue",
|
|
589
|
+
"linewidth": 2
|
|
590
|
+
}
|
|
591
|
+
"""
|
|
592
|
+
return _run([config_file])
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
596
|
+
# Entry point
|
|
597
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
598
|
+
def main():
|
|
599
|
+
mcp.run()
|
|
600
|
+
|
|
601
|
+
if __name__ == "__main__":
|
|
602
|
+
main()
|
|
603
|
+
|