@mseep/csv-editor 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/ISSUE_TEMPLATE/bug_report.md +53 -0
- package/.github/ISSUE_TEMPLATE/feature_request.md +38 -0
- package/.github/workflows/deploy-docs.yml +62 -0
- package/.github/workflows/publish-github.yml +52 -0
- package/.github/workflows/publish.yml +44 -0
- package/.github/workflows/test.yml +32 -0
- package/.pre-commit-config.yaml +157 -0
- package/ALTERNATIVE_PUBLISHING.md +175 -0
- package/ARCHITECTURE.md +1011 -0
- package/CHANGELOG.md +99 -0
- package/CODE_OF_CONDUCT.md +41 -0
- package/CONTRIBUTING.md +427 -0
- package/Dockerfile +22 -0
- package/LICENSE +21 -0
- package/MCP_CONFIG.md +505 -0
- package/PUBLISHING.md +210 -0
- package/README.md +400 -0
- package/SECURITY.md +61 -0
- package/docs/README.md +41 -0
- package/docs/blog/2019-05-28-first-blog-post.md +12 -0
- package/docs/blog/2019-05-29-long-blog-post.md +44 -0
- package/docs/blog/2021-08-01-mdx-blog-post.mdx +24 -0
- package/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
- package/docs/blog/2021-08-26-welcome/index.md +29 -0
- package/docs/blog/authors.yml +25 -0
- package/docs/blog/tags.yml +19 -0
- package/docs/docs/api/overview.md +183 -0
- package/docs/docs/installation.md +252 -0
- package/docs/docs/intro.md +87 -0
- package/docs/docs/tutorial-basics/_category_.json +8 -0
- package/docs/docs/tutorial-basics/congratulations.md +23 -0
- package/docs/docs/tutorial-basics/create-a-blog-post.md +34 -0
- package/docs/docs/tutorial-basics/create-a-document.md +57 -0
- package/docs/docs/tutorial-basics/create-a-page.md +43 -0
- package/docs/docs/tutorial-basics/deploy-your-site.md +31 -0
- package/docs/docs/tutorial-basics/markdown-features.mdx +152 -0
- package/docs/docs/tutorial-extras/_category_.json +7 -0
- package/docs/docs/tutorial-extras/img/docsVersionDropdown.png +0 -0
- package/docs/docs/tutorial-extras/img/localeDropdown.png +0 -0
- package/docs/docs/tutorial-extras/manage-docs-versions.md +55 -0
- package/docs/docs/tutorial-extras/translate-your-site.md +88 -0
- package/docs/docs/tutorials/quickstart.md +365 -0
- package/docs/docusaurus.config.ts +163 -0
- package/docs/package-lock.json +17493 -0
- package/docs/package.json +48 -0
- package/docs/sidebars.ts +33 -0
- package/docs/src/components/HomepageFeatures/index.tsx +71 -0
- package/docs/src/components/HomepageFeatures/styles.module.css +11 -0
- package/docs/src/css/custom.css +30 -0
- package/docs/src/pages/index.module.css +23 -0
- package/docs/src/pages/index.tsx +44 -0
- package/docs/src/pages/markdown-page.md +7 -0
- package/docs/static/.nojekyll +0 -0
- package/docs/static/img/docusaurus-social-card.jpg +0 -0
- package/docs/static/img/docusaurus.png +0 -0
- package/docs/static/img/favicon.ico +0 -0
- package/docs/static/img/logo.svg +1 -0
- package/docs/static/img/undraw_docusaurus_mountain.svg +171 -0
- package/docs/static/img/undraw_docusaurus_react.svg +170 -0
- package/docs/static/img/undraw_docusaurus_tree.svg +40 -0
- package/docs/tsconfig.json +8 -0
- package/examples/README.md +48 -0
- package/examples/auto_save_demo.py +206 -0
- package/examples/auto_save_overwrite.py +201 -0
- package/examples/basic_usage.py +135 -0
- package/examples/demo.py +139 -0
- package/examples/history_demo.py +317 -0
- package/examples/test_default_autosave.py +124 -0
- package/examples/update_consignee_example.py +179 -0
- package/package.json +51 -0
- package/plans/2026-04-19-fastmcp3-migration-plan.md +1045 -0
- package/pyproject.toml +331 -0
- package/requirements-dev.txt +30 -0
- package/requirements.txt +22 -0
- package/scripts/publish.py +67 -0
- package/smithery.yaml +15 -0
- package/specs/2026-04-19-fastmcp3-migration-design.md +243 -0
- package/src/csv_editor/__init__.py +8 -0
- package/src/csv_editor/models/__init__.py +39 -0
- package/src/csv_editor/models/auto_save.py +246 -0
- package/src/csv_editor/models/csv_session.py +468 -0
- package/src/csv_editor/models/data_models.py +244 -0
- package/src/csv_editor/models/history_manager.py +456 -0
- package/src/csv_editor/prompts/__init__.py +0 -0
- package/src/csv_editor/prompts/data_prompts.py +13 -0
- package/src/csv_editor/resources/__init__.py +0 -0
- package/src/csv_editor/resources/csv_resources.py +22 -0
- package/src/csv_editor/server.py +640 -0
- package/src/csv_editor/tools/__init__.py +5 -0
- package/src/csv_editor/tools/analytics.py +700 -0
- package/src/csv_editor/tools/auto_save_operations.py +235 -0
- package/src/csv_editor/tools/data_operations.py +3 -0
- package/src/csv_editor/tools/history_operations.py +315 -0
- package/src/csv_editor/tools/io_operations.py +431 -0
- package/src/csv_editor/tools/transformations.py +663 -0
- package/src/csv_editor/tools/validation.py +822 -0
- package/src/csv_editor/utils/__init__.py +0 -0
- package/src/csv_editor/utils/validators.py +205 -0
- package/tests/README.md +65 -0
- package/tests/__init__.py +7 -0
- package/tests/conftest.py +50 -0
- package/tests/test_auto_save.py +378 -0
- package/tests/test_basic.py +103 -0
- package/tests/test_integration.py +356 -0
- package/tests/test_server_boot.py +50 -0
- package/tests/test_settings.py +184 -0
|
@@ -0,0 +1,663 @@
|
|
|
1
|
+
"""Data transformation tools for CSV manipulation."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from fastmcp import Context
|
|
8
|
+
|
|
9
|
+
from ..models.csv_session import get_session_manager
|
|
10
|
+
from ..models.data_models import OperationType
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def filter_rows(
|
|
16
|
+
session_id: str, conditions: list[dict[str, Any]], mode: str = "and", ctx: Context = None
|
|
17
|
+
) -> dict[str, Any]:
|
|
18
|
+
"""
|
|
19
|
+
Filter rows based on conditions.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
session_id: Session identifier
|
|
23
|
+
conditions: List of filter conditions, each with:
|
|
24
|
+
- column: Column name
|
|
25
|
+
- operator: One of '==', '!=', '>', '<', '>=', '<=', 'contains', 'starts_with', 'ends_with', 'in', 'not_in', 'is_null', 'not_null'
|
|
26
|
+
- value: Value to compare (not needed for is_null/not_null)
|
|
27
|
+
mode: 'and' or 'or' to combine multiple conditions
|
|
28
|
+
ctx: FastMCP context
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Dict with success status and filtered row count
|
|
32
|
+
"""
|
|
33
|
+
try:
|
|
34
|
+
manager = get_session_manager()
|
|
35
|
+
session = manager.get_session(session_id)
|
|
36
|
+
|
|
37
|
+
if not session or session.df is None:
|
|
38
|
+
return {"success": False, "error": "Invalid session or no data loaded"}
|
|
39
|
+
|
|
40
|
+
df = session.df
|
|
41
|
+
mask = pd.Series([True] * len(df))
|
|
42
|
+
|
|
43
|
+
for condition in conditions:
|
|
44
|
+
column = condition.get("column")
|
|
45
|
+
operator = condition.get("operator")
|
|
46
|
+
value = condition.get("value")
|
|
47
|
+
|
|
48
|
+
if column not in df.columns:
|
|
49
|
+
return {"success": False, "error": f"Column '{column}' not found"}
|
|
50
|
+
|
|
51
|
+
col_data = df[column]
|
|
52
|
+
|
|
53
|
+
if operator == "==":
|
|
54
|
+
condition_mask = col_data == value
|
|
55
|
+
elif operator == "!=":
|
|
56
|
+
condition_mask = col_data != value
|
|
57
|
+
elif operator == ">":
|
|
58
|
+
condition_mask = col_data > value
|
|
59
|
+
elif operator == "<":
|
|
60
|
+
condition_mask = col_data < value
|
|
61
|
+
elif operator == ">=":
|
|
62
|
+
condition_mask = col_data >= value
|
|
63
|
+
elif operator == "<=":
|
|
64
|
+
condition_mask = col_data <= value
|
|
65
|
+
elif operator == "contains":
|
|
66
|
+
condition_mask = col_data.astype(str).str.contains(str(value), na=False)
|
|
67
|
+
elif operator == "starts_with":
|
|
68
|
+
condition_mask = col_data.astype(str).str.startswith(str(value), na=False)
|
|
69
|
+
elif operator == "ends_with":
|
|
70
|
+
condition_mask = col_data.astype(str).str.endswith(str(value), na=False)
|
|
71
|
+
elif operator == "in":
|
|
72
|
+
condition_mask = col_data.isin(value if isinstance(value, list) else [value])
|
|
73
|
+
elif operator == "not_in":
|
|
74
|
+
condition_mask = ~col_data.isin(value if isinstance(value, list) else [value])
|
|
75
|
+
elif operator == "is_null":
|
|
76
|
+
condition_mask = col_data.isna()
|
|
77
|
+
elif operator == "not_null":
|
|
78
|
+
condition_mask = col_data.notna()
|
|
79
|
+
else:
|
|
80
|
+
return {"success": False, "error": f"Unknown operator: {operator}"}
|
|
81
|
+
|
|
82
|
+
if mode == "and":
|
|
83
|
+
mask = mask & condition_mask
|
|
84
|
+
else:
|
|
85
|
+
mask = mask | condition_mask
|
|
86
|
+
|
|
87
|
+
session.df = df[mask].reset_index(drop=True)
|
|
88
|
+
session.record_operation(
|
|
89
|
+
OperationType.FILTER,
|
|
90
|
+
{
|
|
91
|
+
"conditions": conditions,
|
|
92
|
+
"mode": mode,
|
|
93
|
+
"rows_before": len(df),
|
|
94
|
+
"rows_after": len(session.df),
|
|
95
|
+
},
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
"success": True,
|
|
100
|
+
"rows_before": len(df),
|
|
101
|
+
"rows_after": len(session.df),
|
|
102
|
+
"rows_filtered": len(df) - len(session.df),
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
except Exception as e:
|
|
106
|
+
logger.error(f"Error filtering rows: {e!s}")
|
|
107
|
+
return {"success": False, "error": str(e)}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
async def sort_data(
|
|
111
|
+
session_id: str, columns: list[str | dict[str, str]], ctx: Context = None
|
|
112
|
+
) -> dict[str, Any]:
|
|
113
|
+
"""
|
|
114
|
+
Sort data by one or more columns.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
session_id: Session identifier
|
|
118
|
+
columns: List of column names or dicts with 'column' and 'ascending' keys
|
|
119
|
+
ctx: FastMCP context
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Dict with success status
|
|
123
|
+
"""
|
|
124
|
+
try:
|
|
125
|
+
manager = get_session_manager()
|
|
126
|
+
session = manager.get_session(session_id)
|
|
127
|
+
|
|
128
|
+
if not session or session.df is None:
|
|
129
|
+
return {"success": False, "error": "Invalid session or no data loaded"}
|
|
130
|
+
|
|
131
|
+
df = session.df
|
|
132
|
+
|
|
133
|
+
# Parse columns into names and ascending flags
|
|
134
|
+
sort_columns = []
|
|
135
|
+
ascending = []
|
|
136
|
+
|
|
137
|
+
for col in columns:
|
|
138
|
+
if isinstance(col, str):
|
|
139
|
+
sort_columns.append(col)
|
|
140
|
+
ascending.append(True)
|
|
141
|
+
elif isinstance(col, dict):
|
|
142
|
+
sort_columns.append(col["column"])
|
|
143
|
+
ascending.append(col.get("ascending", True))
|
|
144
|
+
else:
|
|
145
|
+
return {"success": False, "error": f"Invalid column specification: {col}"}
|
|
146
|
+
|
|
147
|
+
# Validate columns exist
|
|
148
|
+
for col in sort_columns:
|
|
149
|
+
if col not in df.columns:
|
|
150
|
+
return {"success": False, "error": f"Column '{col}' not found"}
|
|
151
|
+
|
|
152
|
+
session.df = df.sort_values(by=sort_columns, ascending=ascending).reset_index(drop=True)
|
|
153
|
+
session.record_operation(
|
|
154
|
+
OperationType.SORT, {"columns": sort_columns, "ascending": ascending}
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
return {"success": True, "sorted_by": sort_columns, "ascending": ascending}
|
|
158
|
+
|
|
159
|
+
except Exception as e:
|
|
160
|
+
logger.error(f"Error sorting data: {e!s}")
|
|
161
|
+
return {"success": False, "error": str(e)}
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
async def select_columns(
|
|
165
|
+
session_id: str, columns: list[str], ctx: Context = None
|
|
166
|
+
) -> dict[str, Any]:
|
|
167
|
+
"""
|
|
168
|
+
Select specific columns from the dataframe.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
session_id: Session identifier
|
|
172
|
+
columns: List of column names to keep
|
|
173
|
+
ctx: FastMCP context
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Dict with success status and selected columns
|
|
177
|
+
"""
|
|
178
|
+
try:
|
|
179
|
+
manager = get_session_manager()
|
|
180
|
+
session = manager.get_session(session_id)
|
|
181
|
+
|
|
182
|
+
if not session or session.df is None:
|
|
183
|
+
return {"success": False, "error": "Invalid session or no data loaded"}
|
|
184
|
+
|
|
185
|
+
df = session.df
|
|
186
|
+
|
|
187
|
+
# Validate columns exist
|
|
188
|
+
missing_cols = [col for col in columns if col not in df.columns]
|
|
189
|
+
if missing_cols:
|
|
190
|
+
return {"success": False, "error": f"Columns not found: {missing_cols}"}
|
|
191
|
+
|
|
192
|
+
session.df = df[columns].copy()
|
|
193
|
+
session.record_operation(
|
|
194
|
+
OperationType.SELECT,
|
|
195
|
+
{"columns": columns, "columns_before": df.columns.tolist(), "columns_after": columns},
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
return {
|
|
199
|
+
"success": True,
|
|
200
|
+
"selected_columns": columns,
|
|
201
|
+
"columns_removed": [col for col in df.columns if col not in columns],
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
except Exception as e:
|
|
205
|
+
logger.error(f"Error selecting columns: {e!s}")
|
|
206
|
+
return {"success": False, "error": str(e)}
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
async def rename_columns(
|
|
210
|
+
session_id: str, mapping: dict[str, str], ctx: Context = None
|
|
211
|
+
) -> dict[str, Any]:
|
|
212
|
+
"""
|
|
213
|
+
Rename columns in the dataframe.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
session_id: Session identifier
|
|
217
|
+
mapping: Dict mapping old column names to new names
|
|
218
|
+
ctx: FastMCP context
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
Dict with success status and renamed columns
|
|
222
|
+
"""
|
|
223
|
+
try:
|
|
224
|
+
manager = get_session_manager()
|
|
225
|
+
session = manager.get_session(session_id)
|
|
226
|
+
|
|
227
|
+
if not session or session.df is None:
|
|
228
|
+
return {"success": False, "error": "Invalid session or no data loaded"}
|
|
229
|
+
|
|
230
|
+
df = session.df
|
|
231
|
+
|
|
232
|
+
# Validate columns exist
|
|
233
|
+
missing_cols = [col for col in mapping if col not in df.columns]
|
|
234
|
+
if missing_cols:
|
|
235
|
+
return {"success": False, "error": f"Columns not found: {missing_cols}"}
|
|
236
|
+
|
|
237
|
+
session.df = df.rename(columns=mapping)
|
|
238
|
+
session.record_operation(OperationType.RENAME, {"mapping": mapping})
|
|
239
|
+
|
|
240
|
+
return {"success": True, "renamed": mapping, "columns": session.df.columns.tolist()}
|
|
241
|
+
|
|
242
|
+
except Exception as e:
|
|
243
|
+
logger.error(f"Error renaming columns: {e!s}")
|
|
244
|
+
return {"success": False, "error": str(e)}
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
async def add_column(
|
|
248
|
+
session_id: str, name: str, value: Any = None, formula: str | None = None, ctx: Context = None
|
|
249
|
+
) -> dict[str, Any]:
|
|
250
|
+
"""
|
|
251
|
+
Add a new column to the dataframe.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
session_id: Session identifier
|
|
255
|
+
name: Name for the new column
|
|
256
|
+
value: Default value for all rows (scalar or list)
|
|
257
|
+
formula: Python expression to calculate values (e.g., "col1 + col2")
|
|
258
|
+
ctx: FastMCP context
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
Dict with success status
|
|
262
|
+
"""
|
|
263
|
+
try:
|
|
264
|
+
manager = get_session_manager()
|
|
265
|
+
session = manager.get_session(session_id)
|
|
266
|
+
|
|
267
|
+
if not session or session.df is None:
|
|
268
|
+
return {"success": False, "error": "Invalid session or no data loaded"}
|
|
269
|
+
|
|
270
|
+
df = session.df
|
|
271
|
+
|
|
272
|
+
if name in df.columns:
|
|
273
|
+
return {"success": False, "error": f"Column '{name}' already exists"}
|
|
274
|
+
|
|
275
|
+
if formula:
|
|
276
|
+
# Evaluate formula in the context of the dataframe
|
|
277
|
+
try:
|
|
278
|
+
session.df[name] = df.eval(formula)
|
|
279
|
+
except Exception as e:
|
|
280
|
+
return {"success": False, "error": f"Formula evaluation failed: {e!s}"}
|
|
281
|
+
elif isinstance(value, list):
|
|
282
|
+
if len(value) != len(df):
|
|
283
|
+
return {
|
|
284
|
+
"success": False,
|
|
285
|
+
"error": f"Value list length ({len(value)}) doesn't match row count ({len(df)})",
|
|
286
|
+
}
|
|
287
|
+
session.df[name] = value
|
|
288
|
+
else:
|
|
289
|
+
# Scalar value or None
|
|
290
|
+
session.df[name] = value
|
|
291
|
+
|
|
292
|
+
session.record_operation(
|
|
293
|
+
OperationType.ADD_COLUMN,
|
|
294
|
+
{"name": name, "value": str(value) if value is not None else None, "formula": formula},
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
return {"success": True, "column_added": name, "columns": session.df.columns.tolist()}
|
|
298
|
+
|
|
299
|
+
except Exception as e:
|
|
300
|
+
logger.error(f"Error adding column: {e!s}")
|
|
301
|
+
return {"success": False, "error": str(e)}
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
async def remove_columns(
|
|
305
|
+
session_id: str, columns: list[str], ctx: Context = None
|
|
306
|
+
) -> dict[str, Any]:
|
|
307
|
+
"""
|
|
308
|
+
Remove columns from the dataframe.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
session_id: Session identifier
|
|
312
|
+
columns: List of column names to remove
|
|
313
|
+
ctx: FastMCP context
|
|
314
|
+
|
|
315
|
+
Returns:
|
|
316
|
+
Dict with success status and removed columns
|
|
317
|
+
"""
|
|
318
|
+
try:
|
|
319
|
+
manager = get_session_manager()
|
|
320
|
+
session = manager.get_session(session_id)
|
|
321
|
+
|
|
322
|
+
if not session or session.df is None:
|
|
323
|
+
return {"success": False, "error": "Invalid session or no data loaded"}
|
|
324
|
+
|
|
325
|
+
df = session.df
|
|
326
|
+
|
|
327
|
+
# Validate columns exist
|
|
328
|
+
missing_cols = [col for col in columns if col not in df.columns]
|
|
329
|
+
if missing_cols:
|
|
330
|
+
return {"success": False, "error": f"Columns not found: {missing_cols}"}
|
|
331
|
+
|
|
332
|
+
session.df = df.drop(columns=columns)
|
|
333
|
+
session.record_operation(OperationType.REMOVE_COLUMN, {"columns": columns})
|
|
334
|
+
|
|
335
|
+
return {
|
|
336
|
+
"success": True,
|
|
337
|
+
"removed_columns": columns,
|
|
338
|
+
"remaining_columns": session.df.columns.tolist(),
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
except Exception as e:
|
|
342
|
+
logger.error(f"Error removing columns: {e!s}")
|
|
343
|
+
return {"success": False, "error": str(e)}
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
async def change_column_type(
|
|
347
|
+
session_id: str, column: str, dtype: str, errors: str = "coerce", ctx: Context = None
|
|
348
|
+
) -> dict[str, Any]:
|
|
349
|
+
"""
|
|
350
|
+
Change the data type of a column.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
session_id: Session identifier
|
|
354
|
+
column: Column name to change
|
|
355
|
+
dtype: Target data type ('int', 'float', 'str', 'bool', 'datetime', 'category')
|
|
356
|
+
errors: How to handle conversion errors ('raise', 'coerce', 'ignore')
|
|
357
|
+
ctx: FastMCP context
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
Dict with success status and conversion info
|
|
361
|
+
"""
|
|
362
|
+
try:
|
|
363
|
+
manager = get_session_manager()
|
|
364
|
+
session = manager.get_session(session_id)
|
|
365
|
+
|
|
366
|
+
if not session or session.df is None:
|
|
367
|
+
return {"success": False, "error": "Invalid session or no data loaded"}
|
|
368
|
+
|
|
369
|
+
df = session.df
|
|
370
|
+
|
|
371
|
+
if column not in df.columns:
|
|
372
|
+
return {"success": False, "error": f"Column '{column}' not found"}
|
|
373
|
+
|
|
374
|
+
original_dtype = str(df[column].dtype)
|
|
375
|
+
null_count_before = df[column].isna().sum()
|
|
376
|
+
|
|
377
|
+
# Convert based on target dtype
|
|
378
|
+
if dtype == "int":
|
|
379
|
+
session.df[column] = pd.to_numeric(df[column], errors=errors).astype("Int64")
|
|
380
|
+
elif dtype == "float":
|
|
381
|
+
session.df[column] = pd.to_numeric(df[column], errors=errors)
|
|
382
|
+
elif dtype == "str":
|
|
383
|
+
session.df[column] = df[column].astype(str)
|
|
384
|
+
elif dtype == "bool":
|
|
385
|
+
session.df[column] = df[column].astype(bool)
|
|
386
|
+
elif dtype == "datetime":
|
|
387
|
+
session.df[column] = pd.to_datetime(df[column], errors=errors)
|
|
388
|
+
elif dtype == "category":
|
|
389
|
+
session.df[column] = df[column].astype("category")
|
|
390
|
+
else:
|
|
391
|
+
return {"success": False, "error": f"Unsupported dtype: {dtype}"}
|
|
392
|
+
|
|
393
|
+
null_count_after = session.df[column].isna().sum()
|
|
394
|
+
|
|
395
|
+
session.record_operation(
|
|
396
|
+
OperationType.CHANGE_TYPE,
|
|
397
|
+
{"column": column, "from_type": original_dtype, "to_type": dtype, "errors": errors},
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
return {
|
|
401
|
+
"success": True,
|
|
402
|
+
"column": column,
|
|
403
|
+
"original_type": original_dtype,
|
|
404
|
+
"new_type": str(session.df[column].dtype),
|
|
405
|
+
"null_count_before": int(null_count_before),
|
|
406
|
+
"null_count_after": int(null_count_after),
|
|
407
|
+
"conversion_errors": int(null_count_after - null_count_before),
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
except Exception as e:
|
|
411
|
+
logger.error(f"Error changing column type: {e!s}")
|
|
412
|
+
return {"success": False, "error": str(e)}
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
async def fill_missing_values(
|
|
416
|
+
session_id: str,
|
|
417
|
+
strategy: str = "drop",
|
|
418
|
+
value: Any = None,
|
|
419
|
+
columns: list[str] | None = None,
|
|
420
|
+
ctx: Context = None,
|
|
421
|
+
) -> dict[str, Any]:
|
|
422
|
+
"""
|
|
423
|
+
Fill or remove missing values.
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
session_id: Session identifier
|
|
427
|
+
strategy: One of 'drop', 'fill', 'forward', 'backward', 'mean', 'median', 'mode'
|
|
428
|
+
value: Value to fill with (for 'fill' strategy)
|
|
429
|
+
columns: Specific columns to apply to (None for all)
|
|
430
|
+
ctx: FastMCP context
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
Dict with success status and fill info
|
|
434
|
+
"""
|
|
435
|
+
try:
|
|
436
|
+
manager = get_session_manager()
|
|
437
|
+
session = manager.get_session(session_id)
|
|
438
|
+
|
|
439
|
+
if not session or session.df is None:
|
|
440
|
+
return {"success": False, "error": "Invalid session or no data loaded"}
|
|
441
|
+
|
|
442
|
+
df = session.df
|
|
443
|
+
null_counts_before = df.isnull().sum().to_dict()
|
|
444
|
+
|
|
445
|
+
if columns:
|
|
446
|
+
missing_cols = [col for col in columns if col not in df.columns]
|
|
447
|
+
if missing_cols:
|
|
448
|
+
return {"success": False, "error": f"Columns not found: {missing_cols}"}
|
|
449
|
+
target_cols = columns
|
|
450
|
+
else:
|
|
451
|
+
target_cols = df.columns.tolist()
|
|
452
|
+
|
|
453
|
+
if strategy == "drop":
|
|
454
|
+
session.df = df.dropna(subset=target_cols)
|
|
455
|
+
elif strategy == "fill":
|
|
456
|
+
if value is None:
|
|
457
|
+
return {"success": False, "error": "Value required for 'fill' strategy"}
|
|
458
|
+
session.df[target_cols] = df[target_cols].fillna(value)
|
|
459
|
+
elif strategy == "forward":
|
|
460
|
+
session.df[target_cols] = df[target_cols].fillna(method="ffill")
|
|
461
|
+
elif strategy == "backward":
|
|
462
|
+
session.df[target_cols] = df[target_cols].fillna(method="bfill")
|
|
463
|
+
elif strategy == "mean":
|
|
464
|
+
for col in target_cols:
|
|
465
|
+
if df[col].dtype in ["int64", "float64"]:
|
|
466
|
+
session.df[col] = df[col].fillna(df[col].mean())
|
|
467
|
+
elif strategy == "median":
|
|
468
|
+
for col in target_cols:
|
|
469
|
+
if df[col].dtype in ["int64", "float64"]:
|
|
470
|
+
session.df[col] = df[col].fillna(df[col].median())
|
|
471
|
+
elif strategy == "mode":
|
|
472
|
+
for col in target_cols:
|
|
473
|
+
mode_val = df[col].mode()
|
|
474
|
+
if len(mode_val) > 0:
|
|
475
|
+
session.df[col] = df[col].fillna(mode_val[0])
|
|
476
|
+
else:
|
|
477
|
+
return {"success": False, "error": f"Unknown strategy: {strategy}"}
|
|
478
|
+
|
|
479
|
+
null_counts_after = session.df.isnull().sum().to_dict()
|
|
480
|
+
|
|
481
|
+
session.record_operation(
|
|
482
|
+
OperationType.FILL_MISSING,
|
|
483
|
+
{
|
|
484
|
+
"strategy": strategy,
|
|
485
|
+
"value": str(value) if value is not None else None,
|
|
486
|
+
"columns": target_cols,
|
|
487
|
+
},
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
return {
|
|
491
|
+
"success": True,
|
|
492
|
+
"strategy": strategy,
|
|
493
|
+
"rows_before": len(df),
|
|
494
|
+
"rows_after": len(session.df),
|
|
495
|
+
"null_counts_before": null_counts_before,
|
|
496
|
+
"null_counts_after": null_counts_after,
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
except Exception as e:
|
|
500
|
+
logger.error(f"Error filling missing values: {e!s}")
|
|
501
|
+
return {"success": False, "error": str(e)}
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
async def update_column(
|
|
505
|
+
session_id: str,
|
|
506
|
+
column: str,
|
|
507
|
+
operation: str,
|
|
508
|
+
value: Any | None = None,
|
|
509
|
+
pattern: str | None = None,
|
|
510
|
+
replacement: str | None = None,
|
|
511
|
+
ctx: Context = None,
|
|
512
|
+
) -> dict[str, Any]:
|
|
513
|
+
"""
|
|
514
|
+
Update values in a specific column with simple operations.
|
|
515
|
+
|
|
516
|
+
Args:
|
|
517
|
+
session_id: Session identifier
|
|
518
|
+
column: Column name to update
|
|
519
|
+
operation: Operation type - 'replace', 'extract', 'split', 'strip', 'upper', 'lower', 'fill'
|
|
520
|
+
value: Value for certain operations (e.g., fill value)
|
|
521
|
+
pattern: Pattern for replace/extract operations (regex supported)
|
|
522
|
+
replacement: Replacement string for replace operation
|
|
523
|
+
ctx: FastMCP context
|
|
524
|
+
|
|
525
|
+
Returns:
|
|
526
|
+
Dict with success status and update info
|
|
527
|
+
"""
|
|
528
|
+
try:
|
|
529
|
+
manager = get_session_manager()
|
|
530
|
+
session = manager.get_session(session_id)
|
|
531
|
+
|
|
532
|
+
if not session or session.df is None:
|
|
533
|
+
return {"success": False, "error": "Invalid session or no data loaded"}
|
|
534
|
+
|
|
535
|
+
df = session.df
|
|
536
|
+
|
|
537
|
+
if column not in df.columns:
|
|
538
|
+
return {"success": False, "error": f"Column '{column}' not found"}
|
|
539
|
+
|
|
540
|
+
original_values_sample = df[column].head(5).tolist()
|
|
541
|
+
|
|
542
|
+
if operation == "replace":
|
|
543
|
+
if pattern is None or replacement is None:
|
|
544
|
+
return {
|
|
545
|
+
"success": False,
|
|
546
|
+
"error": "Pattern and replacement required for replace operation",
|
|
547
|
+
}
|
|
548
|
+
session.df[column] = (
|
|
549
|
+
df[column].astype(str).str.replace(pattern, replacement, regex=True)
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
elif operation == "extract":
|
|
553
|
+
if pattern is None:
|
|
554
|
+
return {"success": False, "error": "Pattern required for extract operation"}
|
|
555
|
+
session.df[column] = df[column].astype(str).str.extract(pattern, expand=False)
|
|
556
|
+
|
|
557
|
+
elif operation == "split":
|
|
558
|
+
if pattern is None:
|
|
559
|
+
pattern = " "
|
|
560
|
+
if value is not None and isinstance(value, int):
|
|
561
|
+
# Extract specific part after split
|
|
562
|
+
session.df[column] = df[column].astype(str).str.split(pattern).str[value]
|
|
563
|
+
else:
|
|
564
|
+
# Just do the split, take first part
|
|
565
|
+
session.df[column] = df[column].astype(str).str.split(pattern).str[0]
|
|
566
|
+
|
|
567
|
+
elif operation == "strip":
|
|
568
|
+
session.df[column] = df[column].astype(str).str.strip()
|
|
569
|
+
|
|
570
|
+
elif operation == "upper":
|
|
571
|
+
session.df[column] = df[column].astype(str).str.upper()
|
|
572
|
+
|
|
573
|
+
elif operation == "lower":
|
|
574
|
+
session.df[column] = df[column].astype(str).str.lower()
|
|
575
|
+
|
|
576
|
+
elif operation == "fill":
|
|
577
|
+
if value is None:
|
|
578
|
+
return {"success": False, "error": "Value required for fill operation"}
|
|
579
|
+
session.df[column] = df[column].fillna(value)
|
|
580
|
+
|
|
581
|
+
else:
|
|
582
|
+
return {"success": False, "error": f"Unknown operation: {operation}"}
|
|
583
|
+
|
|
584
|
+
updated_values_sample = session.df[column].head(5).tolist()
|
|
585
|
+
|
|
586
|
+
session.record_operation(
|
|
587
|
+
OperationType.UPDATE_COLUMN,
|
|
588
|
+
{
|
|
589
|
+
"column": column,
|
|
590
|
+
"operation": operation,
|
|
591
|
+
"pattern": pattern,
|
|
592
|
+
"replacement": replacement,
|
|
593
|
+
"value": str(value) if value is not None else None,
|
|
594
|
+
},
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
return {
|
|
598
|
+
"success": True,
|
|
599
|
+
"column": column,
|
|
600
|
+
"operation": operation,
|
|
601
|
+
"original_sample": original_values_sample,
|
|
602
|
+
"updated_sample": updated_values_sample,
|
|
603
|
+
"rows_updated": len(session.df),
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
except Exception as e:
|
|
607
|
+
logger.error(f"Error updating column: {e!s}")
|
|
608
|
+
return {"success": False, "error": str(e)}
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
async def remove_duplicates(
|
|
612
|
+
session_id: str, subset: list[str] | None = None, keep: str = "first", ctx: Context = None
|
|
613
|
+
) -> dict[str, Any]:
|
|
614
|
+
"""
|
|
615
|
+
Remove duplicate rows.
|
|
616
|
+
|
|
617
|
+
Args:
|
|
618
|
+
session_id: Session identifier
|
|
619
|
+
subset: Column names to consider for duplicates (None for all)
|
|
620
|
+
keep: Which duplicates to keep ('first', 'last', False to drop all)
|
|
621
|
+
ctx: FastMCP context
|
|
622
|
+
|
|
623
|
+
Returns:
|
|
624
|
+
Dict with success status and duplicate info
|
|
625
|
+
"""
|
|
626
|
+
try:
|
|
627
|
+
manager = get_session_manager()
|
|
628
|
+
session = manager.get_session(session_id)
|
|
629
|
+
|
|
630
|
+
if not session or session.df is None:
|
|
631
|
+
return {"success": False, "error": "Invalid session or no data loaded"}
|
|
632
|
+
|
|
633
|
+
df = session.df
|
|
634
|
+
rows_before = len(df)
|
|
635
|
+
|
|
636
|
+
if subset:
|
|
637
|
+
missing_cols = [col for col in subset if col not in df.columns]
|
|
638
|
+
if missing_cols:
|
|
639
|
+
return {"success": False, "error": f"Columns not found: {missing_cols}"}
|
|
640
|
+
|
|
641
|
+
# Convert keep parameter
|
|
642
|
+
keep_param = keep if keep != "none" else False
|
|
643
|
+
|
|
644
|
+
session.df = df.drop_duplicates(subset=subset, keep=keep_param).reset_index(drop=True)
|
|
645
|
+
rows_after = len(session.df)
|
|
646
|
+
|
|
647
|
+
session.record_operation(
|
|
648
|
+
OperationType.REMOVE_DUPLICATES,
|
|
649
|
+
{"subset": subset, "keep": keep, "rows_removed": rows_before - rows_after},
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
return {
|
|
653
|
+
"success": True,
|
|
654
|
+
"rows_before": rows_before,
|
|
655
|
+
"rows_after": rows_after,
|
|
656
|
+
"duplicates_removed": rows_before - rows_after,
|
|
657
|
+
"subset": subset,
|
|
658
|
+
"keep": keep,
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
except Exception as e:
|
|
662
|
+
logger.error(f"Error removing duplicates: {e!s}")
|
|
663
|
+
return {"success": False, "error": str(e)}
|