@mseep/csv-editor 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.md +53 -0
  2. package/.github/ISSUE_TEMPLATE/feature_request.md +38 -0
  3. package/.github/workflows/deploy-docs.yml +62 -0
  4. package/.github/workflows/publish-github.yml +52 -0
  5. package/.github/workflows/publish.yml +44 -0
  6. package/.github/workflows/test.yml +32 -0
  7. package/.pre-commit-config.yaml +157 -0
  8. package/ALTERNATIVE_PUBLISHING.md +175 -0
  9. package/ARCHITECTURE.md +1011 -0
  10. package/CHANGELOG.md +99 -0
  11. package/CODE_OF_CONDUCT.md +41 -0
  12. package/CONTRIBUTING.md +427 -0
  13. package/Dockerfile +22 -0
  14. package/LICENSE +21 -0
  15. package/MCP_CONFIG.md +505 -0
  16. package/PUBLISHING.md +210 -0
  17. package/README.md +400 -0
  18. package/SECURITY.md +61 -0
  19. package/docs/README.md +41 -0
  20. package/docs/blog/2019-05-28-first-blog-post.md +12 -0
  21. package/docs/blog/2019-05-29-long-blog-post.md +44 -0
  22. package/docs/blog/2021-08-01-mdx-blog-post.mdx +24 -0
  23. package/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
  24. package/docs/blog/2021-08-26-welcome/index.md +29 -0
  25. package/docs/blog/authors.yml +25 -0
  26. package/docs/blog/tags.yml +19 -0
  27. package/docs/docs/api/overview.md +183 -0
  28. package/docs/docs/installation.md +252 -0
  29. package/docs/docs/intro.md +87 -0
  30. package/docs/docs/tutorial-basics/_category_.json +8 -0
  31. package/docs/docs/tutorial-basics/congratulations.md +23 -0
  32. package/docs/docs/tutorial-basics/create-a-blog-post.md +34 -0
  33. package/docs/docs/tutorial-basics/create-a-document.md +57 -0
  34. package/docs/docs/tutorial-basics/create-a-page.md +43 -0
  35. package/docs/docs/tutorial-basics/deploy-your-site.md +31 -0
  36. package/docs/docs/tutorial-basics/markdown-features.mdx +152 -0
  37. package/docs/docs/tutorial-extras/_category_.json +7 -0
  38. package/docs/docs/tutorial-extras/img/docsVersionDropdown.png +0 -0
  39. package/docs/docs/tutorial-extras/img/localeDropdown.png +0 -0
  40. package/docs/docs/tutorial-extras/manage-docs-versions.md +55 -0
  41. package/docs/docs/tutorial-extras/translate-your-site.md +88 -0
  42. package/docs/docs/tutorials/quickstart.md +365 -0
  43. package/docs/docusaurus.config.ts +163 -0
  44. package/docs/package-lock.json +17493 -0
  45. package/docs/package.json +48 -0
  46. package/docs/sidebars.ts +33 -0
  47. package/docs/src/components/HomepageFeatures/index.tsx +71 -0
  48. package/docs/src/components/HomepageFeatures/styles.module.css +11 -0
  49. package/docs/src/css/custom.css +30 -0
  50. package/docs/src/pages/index.module.css +23 -0
  51. package/docs/src/pages/index.tsx +44 -0
  52. package/docs/src/pages/markdown-page.md +7 -0
  53. package/docs/static/.nojekyll +0 -0
  54. package/docs/static/img/docusaurus-social-card.jpg +0 -0
  55. package/docs/static/img/docusaurus.png +0 -0
  56. package/docs/static/img/favicon.ico +0 -0
  57. package/docs/static/img/logo.svg +1 -0
  58. package/docs/static/img/undraw_docusaurus_mountain.svg +171 -0
  59. package/docs/static/img/undraw_docusaurus_react.svg +170 -0
  60. package/docs/static/img/undraw_docusaurus_tree.svg +40 -0
  61. package/docs/tsconfig.json +8 -0
  62. package/examples/README.md +48 -0
  63. package/examples/auto_save_demo.py +206 -0
  64. package/examples/auto_save_overwrite.py +201 -0
  65. package/examples/basic_usage.py +135 -0
  66. package/examples/demo.py +139 -0
  67. package/examples/history_demo.py +317 -0
  68. package/examples/test_default_autosave.py +124 -0
  69. package/examples/update_consignee_example.py +179 -0
  70. package/package.json +51 -0
  71. package/plans/2026-04-19-fastmcp3-migration-plan.md +1045 -0
  72. package/pyproject.toml +331 -0
  73. package/requirements-dev.txt +30 -0
  74. package/requirements.txt +22 -0
  75. package/scripts/publish.py +67 -0
  76. package/smithery.yaml +15 -0
  77. package/specs/2026-04-19-fastmcp3-migration-design.md +243 -0
  78. package/src/csv_editor/__init__.py +8 -0
  79. package/src/csv_editor/models/__init__.py +39 -0
  80. package/src/csv_editor/models/auto_save.py +246 -0
  81. package/src/csv_editor/models/csv_session.py +468 -0
  82. package/src/csv_editor/models/data_models.py +244 -0
  83. package/src/csv_editor/models/history_manager.py +456 -0
  84. package/src/csv_editor/prompts/__init__.py +0 -0
  85. package/src/csv_editor/prompts/data_prompts.py +13 -0
  86. package/src/csv_editor/resources/__init__.py +0 -0
  87. package/src/csv_editor/resources/csv_resources.py +22 -0
  88. package/src/csv_editor/server.py +640 -0
  89. package/src/csv_editor/tools/__init__.py +5 -0
  90. package/src/csv_editor/tools/analytics.py +700 -0
  91. package/src/csv_editor/tools/auto_save_operations.py +235 -0
  92. package/src/csv_editor/tools/data_operations.py +3 -0
  93. package/src/csv_editor/tools/history_operations.py +315 -0
  94. package/src/csv_editor/tools/io_operations.py +431 -0
  95. package/src/csv_editor/tools/transformations.py +663 -0
  96. package/src/csv_editor/tools/validation.py +822 -0
  97. package/src/csv_editor/utils/__init__.py +0 -0
  98. package/src/csv_editor/utils/validators.py +205 -0
  99. package/tests/README.md +65 -0
  100. package/tests/__init__.py +7 -0
  101. package/tests/conftest.py +50 -0
  102. package/tests/test_auto_save.py +378 -0
  103. package/tests/test_basic.py +103 -0
  104. package/tests/test_integration.py +356 -0
  105. package/tests/test_server_boot.py +50 -0
  106. package/tests/test_settings.py +184 -0
@@ -0,0 +1,468 @@
1
+ """Session management for CSV Editor MCP Server."""
2
+
3
+ import logging
4
+ from datetime import datetime, timedelta
5
+ from pathlib import Path
6
+ from typing import Any
7
+ from uuid import uuid4
8
+
9
+ import pandas as pd
10
+ from pydantic_settings import BaseSettings
11
+
12
+ from .auto_save import AutoSaveConfig, AutoSaveManager
13
+ from .data_models import ExportFormat, OperationType, SessionInfo
14
+ from .history_manager import HistoryManager, HistoryStorage
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class CSVSettings(BaseSettings):
20
+ """Configuration settings for CSV Editor sessions."""
21
+
22
+ csv_history_dir: str = ".csv_history"
23
+
24
+ model_config = {"env_prefix": "CSV_EDITOR_", "case_sensitive": False}
25
+
26
+
27
+ # Global settings instance
28
+ _settings: CSVSettings | None = None
29
+
30
+
31
+ def get_csv_settings() -> CSVSettings:
32
+ """Get or create the global CSV settings."""
33
+ global _settings
34
+ if _settings is None:
35
+ _settings = CSVSettings()
36
+ return _settings
37
+
38
+
39
+ class CSVSession:
40
+ """Represents a single CSV editing session."""
41
+
42
+ def __init__(
43
+ self,
44
+ session_id: str | None = None,
45
+ ttl_minutes: int = 60,
46
+ auto_save_config: AutoSaveConfig | None = None,
47
+ enable_history: bool = True,
48
+ history_storage: HistoryStorage = HistoryStorage.JSON,
49
+ ):
50
+ """Initialize a new CSV session."""
51
+ self.session_id = session_id or str(uuid4())
52
+ self.created_at = datetime.utcnow()
53
+ self.last_accessed = datetime.utcnow()
54
+ self.ttl = timedelta(minutes=ttl_minutes)
55
+ self.df: pd.DataFrame | None = None
56
+ self.original_df: pd.DataFrame | None = None
57
+ self.metadata: dict[str, Any] = {}
58
+ self.operations_history: list[dict[str, Any]] = [] # Keep for backward compatibility
59
+ self.file_path: str | None = None
60
+
61
+ # Auto-save configuration
62
+ self.auto_save_config = auto_save_config or AutoSaveConfig()
63
+ self.auto_save_manager = AutoSaveManager(self.session_id, self.auto_save_config)
64
+
65
+ # History management
66
+ self.enable_history = enable_history
67
+ settings = get_csv_settings()
68
+ self.history_manager = (
69
+ HistoryManager(
70
+ session_id=self.session_id,
71
+ storage_type=history_storage if enable_history else HistoryStorage.MEMORY,
72
+ history_dir=settings.csv_history_dir,
73
+ enable_snapshots=True,
74
+ snapshot_interval=5, # Take snapshot every 5 operations
75
+ )
76
+ if enable_history
77
+ else None
78
+ )
79
+
80
+ def update_access_time(self):
81
+ """Update the last accessed time."""
82
+ self.last_accessed = datetime.utcnow()
83
+
84
+ def is_expired(self) -> bool:
85
+ """Check if session has expired."""
86
+ return datetime.utcnow() - self.last_accessed > self.ttl
87
+
88
+ def load_data(self, df: pd.DataFrame, file_path: str | None = None):
89
+ """Load data into the session."""
90
+ self.df = df.copy()
91
+ self.original_df = df.copy()
92
+ self.file_path = file_path
93
+ self.update_access_time()
94
+ self.record_operation(OperationType.LOAD, {"file_path": file_path, "shape": df.shape})
95
+
96
+ # Update auto-save manager with original file path
97
+ if file_path:
98
+ self.auto_save_manager.original_file_path = file_path
99
+
100
+ def get_info(self) -> SessionInfo:
101
+ """Get session information."""
102
+ if self.df is None:
103
+ raise ValueError("No data loaded in session")
104
+
105
+ memory_usage = self.df.memory_usage(deep=True).sum() / (1024 * 1024) # Convert to MB
106
+
107
+ return SessionInfo(
108
+ session_id=self.session_id,
109
+ created_at=self.created_at,
110
+ last_accessed=self.last_accessed,
111
+ row_count=len(self.df),
112
+ column_count=len(self.df.columns),
113
+ columns=self.df.columns.tolist(),
114
+ memory_usage_mb=round(memory_usage, 2),
115
+ operations_count=len(self.operations_history),
116
+ file_path=self.file_path,
117
+ )
118
+
119
+ def record_operation(self, operation_type: OperationType, details: dict[str, Any]):
120
+ """Record an operation in history."""
121
+ # Legacy history (backward compatibility)
122
+ self.operations_history.append(
123
+ {
124
+ "timestamp": datetime.utcnow().isoformat(),
125
+ "type": operation_type.value,
126
+ "details": details,
127
+ }
128
+ )
129
+ self.update_access_time()
130
+
131
+ # New persistent history
132
+ if self.history_manager and self.df is not None:
133
+ self.history_manager.add_operation(
134
+ operation_type=operation_type.value,
135
+ details=details,
136
+ current_data=self.df,
137
+ metadata={
138
+ "file_path": self.file_path,
139
+ "shape": self.df.shape if self.df is not None else None,
140
+ },
141
+ )
142
+
143
+ # Mark that auto-save is needed
144
+ self.metadata["needs_autosave"] = True
145
+
146
+ async def trigger_auto_save_if_needed(self) -> dict[str, Any] | None:
147
+ """Trigger auto-save after operation if configured."""
148
+ if self.auto_save_manager.should_save_after_operation() and self.metadata.get(
149
+ "needs_autosave"
150
+ ):
151
+ result = await self.auto_save_manager.trigger_save(
152
+ self._save_callback, "after_operation"
153
+ )
154
+ if result.get("success"):
155
+ self.metadata["needs_autosave"] = False
156
+ return result
157
+ return None
158
+
159
+ async def _save_callback(
160
+ self, file_path: str, format: ExportFormat, encoding: str
161
+ ) -> dict[str, Any]:
162
+ """Callback for auto-save operations."""
163
+ try:
164
+ if self.df is None:
165
+ return {"success": False, "error": "No data to save"}
166
+
167
+ # Handle different export formats
168
+ file_path = Path(file_path)
169
+ file_path.parent.mkdir(parents=True, exist_ok=True)
170
+
171
+ if format == ExportFormat.CSV:
172
+ self.df.to_csv(file_path, index=False, encoding=encoding)
173
+ elif format == ExportFormat.TSV:
174
+ self.df.to_csv(file_path, sep="\t", index=False, encoding=encoding)
175
+ elif format == ExportFormat.JSON:
176
+ self.df.to_json(file_path, orient="records", indent=2)
177
+ elif format == ExportFormat.EXCEL:
178
+ self.df.to_excel(file_path, index=False)
179
+ elif format == ExportFormat.PARQUET:
180
+ self.df.to_parquet(file_path, index=False)
181
+ else:
182
+ return {"success": False, "error": f"Unsupported format: {format}"}
183
+
184
+ return {
185
+ "success": True,
186
+ "file_path": str(file_path),
187
+ "rows": len(self.df),
188
+ "columns": len(self.df.columns),
189
+ }
190
+ except Exception as e:
191
+ return {"success": False, "error": str(e)}
192
+
193
+ def rollback(self, steps: int = 1) -> bool:
194
+ """Rollback operations by specified number of steps."""
195
+ if self.original_df is None:
196
+ return False
197
+
198
+ if steps >= len(self.operations_history):
199
+ # Rollback to original state
200
+ self.df = self.original_df.copy()
201
+ self.operations_history = []
202
+ return True
203
+
204
+ # This is a simplified rollback - in production, you'd replay operations
205
+ logger.warning("Partial rollback not fully implemented")
206
+ return False
207
+
208
+ async def enable_auto_save(self, config: dict[str, Any]) -> dict[str, Any]:
209
+ """Enable or update auto-save configuration."""
210
+ try:
211
+ # Update configuration
212
+ self.auto_save_config = AutoSaveConfig.from_dict(config)
213
+ self.auto_save_manager = AutoSaveManager(
214
+ self.session_id,
215
+ self.auto_save_config,
216
+ self.file_path, # Pass the original file path
217
+ )
218
+
219
+ # Start periodic save if needed
220
+ if self.auto_save_config.enabled:
221
+ await self.auto_save_manager.start_periodic_save(self._save_callback)
222
+
223
+ return {
224
+ "success": True,
225
+ "message": "Auto-save configuration updated",
226
+ "config": self.auto_save_config.to_dict(),
227
+ }
228
+ except Exception as e:
229
+ return {"success": False, "error": str(e)}
230
+
231
+ async def disable_auto_save(self) -> dict[str, Any]:
232
+ """Disable auto-save."""
233
+ try:
234
+ await self.auto_save_manager.stop_periodic_save()
235
+ self.auto_save_config.enabled = False
236
+ return {"success": True, "message": "Auto-save disabled"}
237
+ except Exception as e:
238
+ return {"success": False, "error": str(e)}
239
+
240
+ def get_auto_save_status(self) -> dict[str, Any]:
241
+ """Get current auto-save status."""
242
+ return self.auto_save_manager.get_status()
243
+
244
+ async def manual_save(self) -> dict[str, Any]:
245
+ """Manually trigger a save."""
246
+ return await self.auto_save_manager.trigger_save(self._save_callback, "manual")
247
+
248
+ async def undo(self) -> dict[str, Any]:
249
+ """Undo the last operation."""
250
+ if not self.history_manager:
251
+ return {"success": False, "error": "History is not enabled"}
252
+
253
+ if not self.history_manager.can_undo():
254
+ return {"success": False, "error": "No operations to undo"}
255
+
256
+ try:
257
+ operation, data_snapshot = self.history_manager.undo()
258
+
259
+ if data_snapshot is not None:
260
+ self.df = data_snapshot
261
+
262
+ # Trigger auto-save if configured
263
+ if self.auto_save_manager.should_save_after_operation():
264
+ await self.auto_save_manager.trigger_save(self._save_callback, "undo")
265
+
266
+ return {
267
+ "success": True,
268
+ "message": f"Undid operation: {operation.operation_type}",
269
+ "operation": operation.to_dict(),
270
+ "can_undo": self.history_manager.can_undo(),
271
+ "can_redo": self.history_manager.can_redo(),
272
+ }
273
+ else:
274
+ return {"success": False, "error": "No snapshot available for undo"}
275
+
276
+ except Exception as e:
277
+ logger.error(f"Error during undo: {e!s}")
278
+ return {"success": False, "error": str(e)}
279
+
280
+ async def redo(self) -> dict[str, Any]:
281
+ """Redo the previously undone operation."""
282
+ if not self.history_manager:
283
+ return {"success": False, "error": "History is not enabled"}
284
+
285
+ if not self.history_manager.can_redo():
286
+ return {"success": False, "error": "No operations to redo"}
287
+
288
+ try:
289
+ operation, data_snapshot = self.history_manager.redo()
290
+
291
+ if data_snapshot is not None:
292
+ self.df = data_snapshot
293
+
294
+ # Trigger auto-save if configured
295
+ if self.auto_save_manager.should_save_after_operation():
296
+ await self.auto_save_manager.trigger_save(self._save_callback, "redo")
297
+
298
+ return {
299
+ "success": True,
300
+ "message": f"Redid operation: {operation.operation_type}",
301
+ "operation": operation.to_dict(),
302
+ "can_undo": self.history_manager.can_undo(),
303
+ "can_redo": self.history_manager.can_redo(),
304
+ }
305
+ else:
306
+ return {"success": False, "error": "No snapshot available for redo"}
307
+
308
+ except Exception as e:
309
+ logger.error(f"Error during redo: {e!s}")
310
+ return {"success": False, "error": str(e)}
311
+
312
+ def get_history(self, limit: int | None = None) -> dict[str, Any]:
313
+ """Get operation history."""
314
+ if not self.history_manager:
315
+ # Return legacy history if new history is not enabled
316
+ return {
317
+ "success": True,
318
+ "history": self.operations_history[-limit:] if limit else self.operations_history,
319
+ "total": len(self.operations_history),
320
+ }
321
+
322
+ try:
323
+ history = self.history_manager.get_history(limit)
324
+ stats = self.history_manager.get_statistics()
325
+
326
+ return {"success": True, "history": history, "statistics": stats}
327
+ except Exception as e:
328
+ logger.error(f"Error getting history: {e!s}")
329
+ return {"success": False, "error": str(e)}
330
+
331
+ async def restore_to_operation(self, operation_id: str) -> dict[str, Any]:
332
+ """Restore data to a specific operation point."""
333
+ if not self.history_manager:
334
+ return {"success": False, "error": "History is not enabled"}
335
+
336
+ try:
337
+ data_snapshot = self.history_manager.restore_to_operation(operation_id)
338
+
339
+ if data_snapshot is not None:
340
+ self.df = data_snapshot
341
+
342
+ # Trigger auto-save if configured
343
+ if self.auto_save_manager.should_save_after_operation():
344
+ await self.auto_save_manager.trigger_save(self._save_callback, "restore")
345
+
346
+ return {
347
+ "success": True,
348
+ "message": f"Restored to operation {operation_id}",
349
+ "shape": self.df.shape,
350
+ }
351
+ else:
352
+ return {"success": False, "error": f"Could not restore to operation {operation_id}"}
353
+
354
+ except Exception as e:
355
+ logger.error(f"Error during restore: {e!s}")
356
+ return {"success": False, "error": str(e)}
357
+
358
+ async def clear(self):
359
+ """Clear session data to free memory."""
360
+ # Stop auto-save if running
361
+ await self.auto_save_manager.stop_periodic_save()
362
+
363
+ # Clear history if enabled
364
+ if self.history_manager:
365
+ self.history_manager.clear_history()
366
+
367
+ self.df = None
368
+ self.original_df = None
369
+ self.metadata.clear()
370
+ self.operations_history.clear()
371
+
372
+
373
+ class SessionManager:
374
+ """Manages multiple CSV sessions."""
375
+
376
+ def __init__(self, max_sessions: int = 100, ttl_minutes: int = 60):
377
+ """Initialize the session manager."""
378
+ self.sessions: dict[str, CSVSession] = {}
379
+ self.max_sessions = max_sessions
380
+ self.ttl_minutes = ttl_minutes
381
+ self.sessions_to_cleanup: set = set()
382
+
383
+ def create_session(self) -> str:
384
+ """Create a new session."""
385
+ self._cleanup_expired()
386
+
387
+ if len(self.sessions) >= self.max_sessions:
388
+ # Remove oldest session
389
+ oldest = min(self.sessions.values(), key=lambda s: s.last_accessed)
390
+ del self.sessions[oldest.session_id]
391
+
392
+ session = CSVSession(ttl_minutes=self.ttl_minutes)
393
+ self.sessions[session.session_id] = session
394
+ logger.info(f"Created new session: {session.session_id}")
395
+ return session.session_id
396
+
397
+ def get_session(self, session_id: str) -> CSVSession | None:
398
+ """Get a session by ID."""
399
+ session = self.sessions.get(session_id)
400
+ if session and not session.is_expired():
401
+ session.update_access_time()
402
+ return session
403
+ elif session and session.is_expired():
404
+ # Mark for cleanup but don't remove synchronously
405
+ self.sessions_to_cleanup.add(session_id)
406
+ return None
407
+
408
+ async def remove_session(self, session_id: str) -> bool:
409
+ """Remove a session."""
410
+ if session_id in self.sessions:
411
+ await self.sessions[session_id].clear()
412
+ del self.sessions[session_id]
413
+ logger.info(f"Removed session: {session_id}")
414
+ return True
415
+ return False
416
+
417
+ def list_sessions(self) -> list[SessionInfo]:
418
+ """List all active sessions."""
419
+ self._cleanup_expired()
420
+ return [session.get_info() for session in self.sessions.values() if session.df is not None]
421
+
422
+ def _cleanup_expired(self):
423
+ """Mark expired sessions for cleanup."""
424
+ expired = [sid for sid, session in self.sessions.items() if session.is_expired()]
425
+ self.sessions_to_cleanup.update(expired)
426
+ if expired:
427
+ logger.info(f"Marked {len(expired)} expired sessions for cleanup")
428
+
429
+ async def cleanup_marked_sessions(self):
430
+ """Clean up sessions marked for removal."""
431
+ for session_id in list(self.sessions_to_cleanup):
432
+ await self.remove_session(session_id)
433
+ self.sessions_to_cleanup.discard(session_id)
434
+
435
+ def get_or_create_session(self, session_id: str | None = None) -> CSVSession:
436
+ """Get existing session or create new one."""
437
+ if session_id:
438
+ session = self.get_session(session_id)
439
+ if session:
440
+ return session
441
+
442
+ new_session_id = self.create_session()
443
+ return self.sessions[new_session_id]
444
+
445
+ def export_session_history(self, session_id: str) -> dict[str, Any] | None:
446
+ """Export session history as JSON."""
447
+ session = self.get_session(session_id)
448
+ if not session:
449
+ return None
450
+
451
+ return {
452
+ "session_id": session.session_id,
453
+ "created_at": session.created_at.isoformat(),
454
+ "operations": session.operations_history,
455
+ "metadata": session.metadata,
456
+ }
457
+
458
+
459
+ # Global session manager instance
460
+ _session_manager: SessionManager | None = None
461
+
462
+
463
+ def get_session_manager() -> SessionManager:
464
+ """Get or create the global session manager."""
465
+ global _session_manager
466
+ if _session_manager is None:
467
+ _session_manager = SessionManager()
468
+ return _session_manager