@mseep/csv-editor 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.md +53 -0
  2. package/.github/ISSUE_TEMPLATE/feature_request.md +38 -0
  3. package/.github/workflows/deploy-docs.yml +62 -0
  4. package/.github/workflows/publish-github.yml +52 -0
  5. package/.github/workflows/publish.yml +44 -0
  6. package/.github/workflows/test.yml +32 -0
  7. package/.pre-commit-config.yaml +157 -0
  8. package/ALTERNATIVE_PUBLISHING.md +175 -0
  9. package/ARCHITECTURE.md +1011 -0
  10. package/CHANGELOG.md +99 -0
  11. package/CODE_OF_CONDUCT.md +41 -0
  12. package/CONTRIBUTING.md +427 -0
  13. package/Dockerfile +22 -0
  14. package/LICENSE +21 -0
  15. package/MCP_CONFIG.md +505 -0
  16. package/PUBLISHING.md +210 -0
  17. package/README.md +400 -0
  18. package/SECURITY.md +61 -0
  19. package/docs/README.md +41 -0
  20. package/docs/blog/2019-05-28-first-blog-post.md +12 -0
  21. package/docs/blog/2019-05-29-long-blog-post.md +44 -0
  22. package/docs/blog/2021-08-01-mdx-blog-post.mdx +24 -0
  23. package/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
  24. package/docs/blog/2021-08-26-welcome/index.md +29 -0
  25. package/docs/blog/authors.yml +25 -0
  26. package/docs/blog/tags.yml +19 -0
  27. package/docs/docs/api/overview.md +183 -0
  28. package/docs/docs/installation.md +252 -0
  29. package/docs/docs/intro.md +87 -0
  30. package/docs/docs/tutorial-basics/_category_.json +8 -0
  31. package/docs/docs/tutorial-basics/congratulations.md +23 -0
  32. package/docs/docs/tutorial-basics/create-a-blog-post.md +34 -0
  33. package/docs/docs/tutorial-basics/create-a-document.md +57 -0
  34. package/docs/docs/tutorial-basics/create-a-page.md +43 -0
  35. package/docs/docs/tutorial-basics/deploy-your-site.md +31 -0
  36. package/docs/docs/tutorial-basics/markdown-features.mdx +152 -0
  37. package/docs/docs/tutorial-extras/_category_.json +7 -0
  38. package/docs/docs/tutorial-extras/img/docsVersionDropdown.png +0 -0
  39. package/docs/docs/tutorial-extras/img/localeDropdown.png +0 -0
  40. package/docs/docs/tutorial-extras/manage-docs-versions.md +55 -0
  41. package/docs/docs/tutorial-extras/translate-your-site.md +88 -0
  42. package/docs/docs/tutorials/quickstart.md +365 -0
  43. package/docs/docusaurus.config.ts +163 -0
  44. package/docs/package-lock.json +17493 -0
  45. package/docs/package.json +48 -0
  46. package/docs/sidebars.ts +33 -0
  47. package/docs/src/components/HomepageFeatures/index.tsx +71 -0
  48. package/docs/src/components/HomepageFeatures/styles.module.css +11 -0
  49. package/docs/src/css/custom.css +30 -0
  50. package/docs/src/pages/index.module.css +23 -0
  51. package/docs/src/pages/index.tsx +44 -0
  52. package/docs/src/pages/markdown-page.md +7 -0
  53. package/docs/static/.nojekyll +0 -0
  54. package/docs/static/img/docusaurus-social-card.jpg +0 -0
  55. package/docs/static/img/docusaurus.png +0 -0
  56. package/docs/static/img/favicon.ico +0 -0
  57. package/docs/static/img/logo.svg +1 -0
  58. package/docs/static/img/undraw_docusaurus_mountain.svg +171 -0
  59. package/docs/static/img/undraw_docusaurus_react.svg +170 -0
  60. package/docs/static/img/undraw_docusaurus_tree.svg +40 -0
  61. package/docs/tsconfig.json +8 -0
  62. package/examples/README.md +48 -0
  63. package/examples/auto_save_demo.py +206 -0
  64. package/examples/auto_save_overwrite.py +201 -0
  65. package/examples/basic_usage.py +135 -0
  66. package/examples/demo.py +139 -0
  67. package/examples/history_demo.py +317 -0
  68. package/examples/test_default_autosave.py +124 -0
  69. package/examples/update_consignee_example.py +179 -0
  70. package/package.json +51 -0
  71. package/plans/2026-04-19-fastmcp3-migration-plan.md +1045 -0
  72. package/pyproject.toml +331 -0
  73. package/requirements-dev.txt +30 -0
  74. package/requirements.txt +22 -0
  75. package/scripts/publish.py +67 -0
  76. package/smithery.yaml +15 -0
  77. package/specs/2026-04-19-fastmcp3-migration-design.md +243 -0
  78. package/src/csv_editor/__init__.py +8 -0
  79. package/src/csv_editor/models/__init__.py +39 -0
  80. package/src/csv_editor/models/auto_save.py +246 -0
  81. package/src/csv_editor/models/csv_session.py +468 -0
  82. package/src/csv_editor/models/data_models.py +244 -0
  83. package/src/csv_editor/models/history_manager.py +456 -0
  84. package/src/csv_editor/prompts/__init__.py +0 -0
  85. package/src/csv_editor/prompts/data_prompts.py +13 -0
  86. package/src/csv_editor/resources/__init__.py +0 -0
  87. package/src/csv_editor/resources/csv_resources.py +22 -0
  88. package/src/csv_editor/server.py +640 -0
  89. package/src/csv_editor/tools/__init__.py +5 -0
  90. package/src/csv_editor/tools/analytics.py +700 -0
  91. package/src/csv_editor/tools/auto_save_operations.py +235 -0
  92. package/src/csv_editor/tools/data_operations.py +3 -0
  93. package/src/csv_editor/tools/history_operations.py +315 -0
  94. package/src/csv_editor/tools/io_operations.py +431 -0
  95. package/src/csv_editor/tools/transformations.py +663 -0
  96. package/src/csv_editor/tools/validation.py +822 -0
  97. package/src/csv_editor/utils/__init__.py +0 -0
  98. package/src/csv_editor/utils/validators.py +205 -0
  99. package/tests/README.md +65 -0
  100. package/tests/__init__.py +7 -0
  101. package/tests/conftest.py +50 -0
  102. package/tests/test_auto_save.py +378 -0
  103. package/tests/test_basic.py +103 -0
  104. package/tests/test_integration.py +356 -0
  105. package/tests/test_server_boot.py +50 -0
  106. package/tests/test_settings.py +184 -0
@@ -0,0 +1,663 @@
1
+ """Data transformation tools for CSV manipulation."""
2
+
3
+ import logging
4
+ from typing import Any
5
+
6
+ import pandas as pd
7
+ from fastmcp import Context
8
+
9
+ from ..models.csv_session import get_session_manager
10
+ from ..models.data_models import OperationType
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ async def filter_rows(
16
+ session_id: str, conditions: list[dict[str, Any]], mode: str = "and", ctx: Context = None
17
+ ) -> dict[str, Any]:
18
+ """
19
+ Filter rows based on conditions.
20
+
21
+ Args:
22
+ session_id: Session identifier
23
+ conditions: List of filter conditions, each with:
24
+ - column: Column name
25
+ - operator: One of '==', '!=', '>', '<', '>=', '<=', 'contains', 'starts_with', 'ends_with', 'in', 'not_in', 'is_null', 'not_null'
26
+ - value: Value to compare (not needed for is_null/not_null)
27
+ mode: 'and' or 'or' to combine multiple conditions
28
+ ctx: FastMCP context
29
+
30
+ Returns:
31
+ Dict with success status and filtered row count
32
+ """
33
+ try:
34
+ manager = get_session_manager()
35
+ session = manager.get_session(session_id)
36
+
37
+ if not session or session.df is None:
38
+ return {"success": False, "error": "Invalid session or no data loaded"}
39
+
40
+ df = session.df
41
+ mask = pd.Series([True] * len(df))
42
+
43
+ for condition in conditions:
44
+ column = condition.get("column")
45
+ operator = condition.get("operator")
46
+ value = condition.get("value")
47
+
48
+ if column not in df.columns:
49
+ return {"success": False, "error": f"Column '{column}' not found"}
50
+
51
+ col_data = df[column]
52
+
53
+ if operator == "==":
54
+ condition_mask = col_data == value
55
+ elif operator == "!=":
56
+ condition_mask = col_data != value
57
+ elif operator == ">":
58
+ condition_mask = col_data > value
59
+ elif operator == "<":
60
+ condition_mask = col_data < value
61
+ elif operator == ">=":
62
+ condition_mask = col_data >= value
63
+ elif operator == "<=":
64
+ condition_mask = col_data <= value
65
+ elif operator == "contains":
66
+ condition_mask = col_data.astype(str).str.contains(str(value), na=False)
67
+ elif operator == "starts_with":
68
+ condition_mask = col_data.astype(str).str.startswith(str(value), na=False)
69
+ elif operator == "ends_with":
70
+ condition_mask = col_data.astype(str).str.endswith(str(value), na=False)
71
+ elif operator == "in":
72
+ condition_mask = col_data.isin(value if isinstance(value, list) else [value])
73
+ elif operator == "not_in":
74
+ condition_mask = ~col_data.isin(value if isinstance(value, list) else [value])
75
+ elif operator == "is_null":
76
+ condition_mask = col_data.isna()
77
+ elif operator == "not_null":
78
+ condition_mask = col_data.notna()
79
+ else:
80
+ return {"success": False, "error": f"Unknown operator: {operator}"}
81
+
82
+ if mode == "and":
83
+ mask = mask & condition_mask
84
+ else:
85
+ mask = mask | condition_mask
86
+
87
+ session.df = df[mask].reset_index(drop=True)
88
+ session.record_operation(
89
+ OperationType.FILTER,
90
+ {
91
+ "conditions": conditions,
92
+ "mode": mode,
93
+ "rows_before": len(df),
94
+ "rows_after": len(session.df),
95
+ },
96
+ )
97
+
98
+ return {
99
+ "success": True,
100
+ "rows_before": len(df),
101
+ "rows_after": len(session.df),
102
+ "rows_filtered": len(df) - len(session.df),
103
+ }
104
+
105
+ except Exception as e:
106
+ logger.error(f"Error filtering rows: {e!s}")
107
+ return {"success": False, "error": str(e)}
108
+
109
+
110
+ async def sort_data(
111
+ session_id: str, columns: list[str | dict[str, str]], ctx: Context = None
112
+ ) -> dict[str, Any]:
113
+ """
114
+ Sort data by one or more columns.
115
+
116
+ Args:
117
+ session_id: Session identifier
118
+ columns: List of column names or dicts with 'column' and 'ascending' keys
119
+ ctx: FastMCP context
120
+
121
+ Returns:
122
+ Dict with success status
123
+ """
124
+ try:
125
+ manager = get_session_manager()
126
+ session = manager.get_session(session_id)
127
+
128
+ if not session or session.df is None:
129
+ return {"success": False, "error": "Invalid session or no data loaded"}
130
+
131
+ df = session.df
132
+
133
+ # Parse columns into names and ascending flags
134
+ sort_columns = []
135
+ ascending = []
136
+
137
+ for col in columns:
138
+ if isinstance(col, str):
139
+ sort_columns.append(col)
140
+ ascending.append(True)
141
+ elif isinstance(col, dict):
142
+ sort_columns.append(col["column"])
143
+ ascending.append(col.get("ascending", True))
144
+ else:
145
+ return {"success": False, "error": f"Invalid column specification: {col}"}
146
+
147
+ # Validate columns exist
148
+ for col in sort_columns:
149
+ if col not in df.columns:
150
+ return {"success": False, "error": f"Column '{col}' not found"}
151
+
152
+ session.df = df.sort_values(by=sort_columns, ascending=ascending).reset_index(drop=True)
153
+ session.record_operation(
154
+ OperationType.SORT, {"columns": sort_columns, "ascending": ascending}
155
+ )
156
+
157
+ return {"success": True, "sorted_by": sort_columns, "ascending": ascending}
158
+
159
+ except Exception as e:
160
+ logger.error(f"Error sorting data: {e!s}")
161
+ return {"success": False, "error": str(e)}
162
+
163
+
164
+ async def select_columns(
165
+ session_id: str, columns: list[str], ctx: Context = None
166
+ ) -> dict[str, Any]:
167
+ """
168
+ Select specific columns from the dataframe.
169
+
170
+ Args:
171
+ session_id: Session identifier
172
+ columns: List of column names to keep
173
+ ctx: FastMCP context
174
+
175
+ Returns:
176
+ Dict with success status and selected columns
177
+ """
178
+ try:
179
+ manager = get_session_manager()
180
+ session = manager.get_session(session_id)
181
+
182
+ if not session or session.df is None:
183
+ return {"success": False, "error": "Invalid session or no data loaded"}
184
+
185
+ df = session.df
186
+
187
+ # Validate columns exist
188
+ missing_cols = [col for col in columns if col not in df.columns]
189
+ if missing_cols:
190
+ return {"success": False, "error": f"Columns not found: {missing_cols}"}
191
+
192
+ session.df = df[columns].copy()
193
+ session.record_operation(
194
+ OperationType.SELECT,
195
+ {"columns": columns, "columns_before": df.columns.tolist(), "columns_after": columns},
196
+ )
197
+
198
+ return {
199
+ "success": True,
200
+ "selected_columns": columns,
201
+ "columns_removed": [col for col in df.columns if col not in columns],
202
+ }
203
+
204
+ except Exception as e:
205
+ logger.error(f"Error selecting columns: {e!s}")
206
+ return {"success": False, "error": str(e)}
207
+
208
+
209
+ async def rename_columns(
210
+ session_id: str, mapping: dict[str, str], ctx: Context = None
211
+ ) -> dict[str, Any]:
212
+ """
213
+ Rename columns in the dataframe.
214
+
215
+ Args:
216
+ session_id: Session identifier
217
+ mapping: Dict mapping old column names to new names
218
+ ctx: FastMCP context
219
+
220
+ Returns:
221
+ Dict with success status and renamed columns
222
+ """
223
+ try:
224
+ manager = get_session_manager()
225
+ session = manager.get_session(session_id)
226
+
227
+ if not session or session.df is None:
228
+ return {"success": False, "error": "Invalid session or no data loaded"}
229
+
230
+ df = session.df
231
+
232
+ # Validate columns exist
233
+ missing_cols = [col for col in mapping if col not in df.columns]
234
+ if missing_cols:
235
+ return {"success": False, "error": f"Columns not found: {missing_cols}"}
236
+
237
+ session.df = df.rename(columns=mapping)
238
+ session.record_operation(OperationType.RENAME, {"mapping": mapping})
239
+
240
+ return {"success": True, "renamed": mapping, "columns": session.df.columns.tolist()}
241
+
242
+ except Exception as e:
243
+ logger.error(f"Error renaming columns: {e!s}")
244
+ return {"success": False, "error": str(e)}
245
+
246
+
247
+ async def add_column(
248
+ session_id: str, name: str, value: Any = None, formula: str | None = None, ctx: Context = None
249
+ ) -> dict[str, Any]:
250
+ """
251
+ Add a new column to the dataframe.
252
+
253
+ Args:
254
+ session_id: Session identifier
255
+ name: Name for the new column
256
+ value: Default value for all rows (scalar or list)
257
+ formula: Python expression to calculate values (e.g., "col1 + col2")
258
+ ctx: FastMCP context
259
+
260
+ Returns:
261
+ Dict with success status
262
+ """
263
+ try:
264
+ manager = get_session_manager()
265
+ session = manager.get_session(session_id)
266
+
267
+ if not session or session.df is None:
268
+ return {"success": False, "error": "Invalid session or no data loaded"}
269
+
270
+ df = session.df
271
+
272
+ if name in df.columns:
273
+ return {"success": False, "error": f"Column '{name}' already exists"}
274
+
275
+ if formula:
276
+ # Evaluate formula in the context of the dataframe
277
+ try:
278
+ session.df[name] = df.eval(formula)
279
+ except Exception as e:
280
+ return {"success": False, "error": f"Formula evaluation failed: {e!s}"}
281
+ elif isinstance(value, list):
282
+ if len(value) != len(df):
283
+ return {
284
+ "success": False,
285
+ "error": f"Value list length ({len(value)}) doesn't match row count ({len(df)})",
286
+ }
287
+ session.df[name] = value
288
+ else:
289
+ # Scalar value or None
290
+ session.df[name] = value
291
+
292
+ session.record_operation(
293
+ OperationType.ADD_COLUMN,
294
+ {"name": name, "value": str(value) if value is not None else None, "formula": formula},
295
+ )
296
+
297
+ return {"success": True, "column_added": name, "columns": session.df.columns.tolist()}
298
+
299
+ except Exception as e:
300
+ logger.error(f"Error adding column: {e!s}")
301
+ return {"success": False, "error": str(e)}
302
+
303
+
304
+ async def remove_columns(
305
+ session_id: str, columns: list[str], ctx: Context = None
306
+ ) -> dict[str, Any]:
307
+ """
308
+ Remove columns from the dataframe.
309
+
310
+ Args:
311
+ session_id: Session identifier
312
+ columns: List of column names to remove
313
+ ctx: FastMCP context
314
+
315
+ Returns:
316
+ Dict with success status and removed columns
317
+ """
318
+ try:
319
+ manager = get_session_manager()
320
+ session = manager.get_session(session_id)
321
+
322
+ if not session or session.df is None:
323
+ return {"success": False, "error": "Invalid session or no data loaded"}
324
+
325
+ df = session.df
326
+
327
+ # Validate columns exist
328
+ missing_cols = [col for col in columns if col not in df.columns]
329
+ if missing_cols:
330
+ return {"success": False, "error": f"Columns not found: {missing_cols}"}
331
+
332
+ session.df = df.drop(columns=columns)
333
+ session.record_operation(OperationType.REMOVE_COLUMN, {"columns": columns})
334
+
335
+ return {
336
+ "success": True,
337
+ "removed_columns": columns,
338
+ "remaining_columns": session.df.columns.tolist(),
339
+ }
340
+
341
+ except Exception as e:
342
+ logger.error(f"Error removing columns: {e!s}")
343
+ return {"success": False, "error": str(e)}
344
+
345
+
346
+ async def change_column_type(
347
+ session_id: str, column: str, dtype: str, errors: str = "coerce", ctx: Context = None
348
+ ) -> dict[str, Any]:
349
+ """
350
+ Change the data type of a column.
351
+
352
+ Args:
353
+ session_id: Session identifier
354
+ column: Column name to change
355
+ dtype: Target data type ('int', 'float', 'str', 'bool', 'datetime', 'category')
356
+ errors: How to handle conversion errors ('raise', 'coerce', 'ignore')
357
+ ctx: FastMCP context
358
+
359
+ Returns:
360
+ Dict with success status and conversion info
361
+ """
362
+ try:
363
+ manager = get_session_manager()
364
+ session = manager.get_session(session_id)
365
+
366
+ if not session or session.df is None:
367
+ return {"success": False, "error": "Invalid session or no data loaded"}
368
+
369
+ df = session.df
370
+
371
+ if column not in df.columns:
372
+ return {"success": False, "error": f"Column '{column}' not found"}
373
+
374
+ original_dtype = str(df[column].dtype)
375
+ null_count_before = df[column].isna().sum()
376
+
377
+ # Convert based on target dtype
378
+ if dtype == "int":
379
+ session.df[column] = pd.to_numeric(df[column], errors=errors).astype("Int64")
380
+ elif dtype == "float":
381
+ session.df[column] = pd.to_numeric(df[column], errors=errors)
382
+ elif dtype == "str":
383
+ session.df[column] = df[column].astype(str)
384
+ elif dtype == "bool":
385
+ session.df[column] = df[column].astype(bool)
386
+ elif dtype == "datetime":
387
+ session.df[column] = pd.to_datetime(df[column], errors=errors)
388
+ elif dtype == "category":
389
+ session.df[column] = df[column].astype("category")
390
+ else:
391
+ return {"success": False, "error": f"Unsupported dtype: {dtype}"}
392
+
393
+ null_count_after = session.df[column].isna().sum()
394
+
395
+ session.record_operation(
396
+ OperationType.CHANGE_TYPE,
397
+ {"column": column, "from_type": original_dtype, "to_type": dtype, "errors": errors},
398
+ )
399
+
400
+ return {
401
+ "success": True,
402
+ "column": column,
403
+ "original_type": original_dtype,
404
+ "new_type": str(session.df[column].dtype),
405
+ "null_count_before": int(null_count_before),
406
+ "null_count_after": int(null_count_after),
407
+ "conversion_errors": int(null_count_after - null_count_before),
408
+ }
409
+
410
+ except Exception as e:
411
+ logger.error(f"Error changing column type: {e!s}")
412
+ return {"success": False, "error": str(e)}
413
+
414
+
415
+ async def fill_missing_values(
416
+ session_id: str,
417
+ strategy: str = "drop",
418
+ value: Any = None,
419
+ columns: list[str] | None = None,
420
+ ctx: Context = None,
421
+ ) -> dict[str, Any]:
422
+ """
423
+ Fill or remove missing values.
424
+
425
+ Args:
426
+ session_id: Session identifier
427
+ strategy: One of 'drop', 'fill', 'forward', 'backward', 'mean', 'median', 'mode'
428
+ value: Value to fill with (for 'fill' strategy)
429
+ columns: Specific columns to apply to (None for all)
430
+ ctx: FastMCP context
431
+
432
+ Returns:
433
+ Dict with success status and fill info
434
+ """
435
+ try:
436
+ manager = get_session_manager()
437
+ session = manager.get_session(session_id)
438
+
439
+ if not session or session.df is None:
440
+ return {"success": False, "error": "Invalid session or no data loaded"}
441
+
442
+ df = session.df
443
+ null_counts_before = df.isnull().sum().to_dict()
444
+
445
+ if columns:
446
+ missing_cols = [col for col in columns if col not in df.columns]
447
+ if missing_cols:
448
+ return {"success": False, "error": f"Columns not found: {missing_cols}"}
449
+ target_cols = columns
450
+ else:
451
+ target_cols = df.columns.tolist()
452
+
453
+ if strategy == "drop":
454
+ session.df = df.dropna(subset=target_cols)
455
+ elif strategy == "fill":
456
+ if value is None:
457
+ return {"success": False, "error": "Value required for 'fill' strategy"}
458
+ session.df[target_cols] = df[target_cols].fillna(value)
459
+ elif strategy == "forward":
460
+ session.df[target_cols] = df[target_cols].fillna(method="ffill")
461
+ elif strategy == "backward":
462
+ session.df[target_cols] = df[target_cols].fillna(method="bfill")
463
+ elif strategy == "mean":
464
+ for col in target_cols:
465
+ if df[col].dtype in ["int64", "float64"]:
466
+ session.df[col] = df[col].fillna(df[col].mean())
467
+ elif strategy == "median":
468
+ for col in target_cols:
469
+ if df[col].dtype in ["int64", "float64"]:
470
+ session.df[col] = df[col].fillna(df[col].median())
471
+ elif strategy == "mode":
472
+ for col in target_cols:
473
+ mode_val = df[col].mode()
474
+ if len(mode_val) > 0:
475
+ session.df[col] = df[col].fillna(mode_val[0])
476
+ else:
477
+ return {"success": False, "error": f"Unknown strategy: {strategy}"}
478
+
479
+ null_counts_after = session.df.isnull().sum().to_dict()
480
+
481
+ session.record_operation(
482
+ OperationType.FILL_MISSING,
483
+ {
484
+ "strategy": strategy,
485
+ "value": str(value) if value is not None else None,
486
+ "columns": target_cols,
487
+ },
488
+ )
489
+
490
+ return {
491
+ "success": True,
492
+ "strategy": strategy,
493
+ "rows_before": len(df),
494
+ "rows_after": len(session.df),
495
+ "null_counts_before": null_counts_before,
496
+ "null_counts_after": null_counts_after,
497
+ }
498
+
499
+ except Exception as e:
500
+ logger.error(f"Error filling missing values: {e!s}")
501
+ return {"success": False, "error": str(e)}
502
+
503
+
504
+ async def update_column(
505
+ session_id: str,
506
+ column: str,
507
+ operation: str,
508
+ value: Any | None = None,
509
+ pattern: str | None = None,
510
+ replacement: str | None = None,
511
+ ctx: Context = None,
512
+ ) -> dict[str, Any]:
513
+ """
514
+ Update values in a specific column with simple operations.
515
+
516
+ Args:
517
+ session_id: Session identifier
518
+ column: Column name to update
519
+ operation: Operation type - 'replace', 'extract', 'split', 'strip', 'upper', 'lower', 'fill'
520
+ value: Value for certain operations (e.g., fill value)
521
+ pattern: Pattern for replace/extract operations (regex supported)
522
+ replacement: Replacement string for replace operation
523
+ ctx: FastMCP context
524
+
525
+ Returns:
526
+ Dict with success status and update info
527
+ """
528
+ try:
529
+ manager = get_session_manager()
530
+ session = manager.get_session(session_id)
531
+
532
+ if not session or session.df is None:
533
+ return {"success": False, "error": "Invalid session or no data loaded"}
534
+
535
+ df = session.df
536
+
537
+ if column not in df.columns:
538
+ return {"success": False, "error": f"Column '{column}' not found"}
539
+
540
+ original_values_sample = df[column].head(5).tolist()
541
+
542
+ if operation == "replace":
543
+ if pattern is None or replacement is None:
544
+ return {
545
+ "success": False,
546
+ "error": "Pattern and replacement required for replace operation",
547
+ }
548
+ session.df[column] = (
549
+ df[column].astype(str).str.replace(pattern, replacement, regex=True)
550
+ )
551
+
552
+ elif operation == "extract":
553
+ if pattern is None:
554
+ return {"success": False, "error": "Pattern required for extract operation"}
555
+ session.df[column] = df[column].astype(str).str.extract(pattern, expand=False)
556
+
557
+ elif operation == "split":
558
+ if pattern is None:
559
+ pattern = " "
560
+ if value is not None and isinstance(value, int):
561
+ # Extract specific part after split
562
+ session.df[column] = df[column].astype(str).str.split(pattern).str[value]
563
+ else:
564
+ # Just do the split, take first part
565
+ session.df[column] = df[column].astype(str).str.split(pattern).str[0]
566
+
567
+ elif operation == "strip":
568
+ session.df[column] = df[column].astype(str).str.strip()
569
+
570
+ elif operation == "upper":
571
+ session.df[column] = df[column].astype(str).str.upper()
572
+
573
+ elif operation == "lower":
574
+ session.df[column] = df[column].astype(str).str.lower()
575
+
576
+ elif operation == "fill":
577
+ if value is None:
578
+ return {"success": False, "error": "Value required for fill operation"}
579
+ session.df[column] = df[column].fillna(value)
580
+
581
+ else:
582
+ return {"success": False, "error": f"Unknown operation: {operation}"}
583
+
584
+ updated_values_sample = session.df[column].head(5).tolist()
585
+
586
+ session.record_operation(
587
+ OperationType.UPDATE_COLUMN,
588
+ {
589
+ "column": column,
590
+ "operation": operation,
591
+ "pattern": pattern,
592
+ "replacement": replacement,
593
+ "value": str(value) if value is not None else None,
594
+ },
595
+ )
596
+
597
+ return {
598
+ "success": True,
599
+ "column": column,
600
+ "operation": operation,
601
+ "original_sample": original_values_sample,
602
+ "updated_sample": updated_values_sample,
603
+ "rows_updated": len(session.df),
604
+ }
605
+
606
+ except Exception as e:
607
+ logger.error(f"Error updating column: {e!s}")
608
+ return {"success": False, "error": str(e)}
609
+
610
+
611
+ async def remove_duplicates(
612
+ session_id: str, subset: list[str] | None = None, keep: str = "first", ctx: Context = None
613
+ ) -> dict[str, Any]:
614
+ """
615
+ Remove duplicate rows.
616
+
617
+ Args:
618
+ session_id: Session identifier
619
+ subset: Column names to consider for duplicates (None for all)
620
+ keep: Which duplicates to keep ('first', 'last', False to drop all)
621
+ ctx: FastMCP context
622
+
623
+ Returns:
624
+ Dict with success status and duplicate info
625
+ """
626
+ try:
627
+ manager = get_session_manager()
628
+ session = manager.get_session(session_id)
629
+
630
+ if not session or session.df is None:
631
+ return {"success": False, "error": "Invalid session or no data loaded"}
632
+
633
+ df = session.df
634
+ rows_before = len(df)
635
+
636
+ if subset:
637
+ missing_cols = [col for col in subset if col not in df.columns]
638
+ if missing_cols:
639
+ return {"success": False, "error": f"Columns not found: {missing_cols}"}
640
+
641
+ # Convert keep parameter
642
+ keep_param = keep if keep != "none" else False
643
+
644
+ session.df = df.drop_duplicates(subset=subset, keep=keep_param).reset_index(drop=True)
645
+ rows_after = len(session.df)
646
+
647
+ session.record_operation(
648
+ OperationType.REMOVE_DUPLICATES,
649
+ {"subset": subset, "keep": keep, "rows_removed": rows_before - rows_after},
650
+ )
651
+
652
+ return {
653
+ "success": True,
654
+ "rows_before": rows_before,
655
+ "rows_after": rows_after,
656
+ "duplicates_removed": rows_before - rows_after,
657
+ "subset": subset,
658
+ "keep": keep,
659
+ }
660
+
661
+ except Exception as e:
662
+ logger.error(f"Error removing duplicates: {e!s}")
663
+ return {"success": False, "error": str(e)}