@mseep/csv-editor 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.md +53 -0
  2. package/.github/ISSUE_TEMPLATE/feature_request.md +38 -0
  3. package/.github/workflows/deploy-docs.yml +62 -0
  4. package/.github/workflows/publish-github.yml +52 -0
  5. package/.github/workflows/publish.yml +44 -0
  6. package/.github/workflows/test.yml +32 -0
  7. package/.pre-commit-config.yaml +157 -0
  8. package/ALTERNATIVE_PUBLISHING.md +175 -0
  9. package/ARCHITECTURE.md +1011 -0
  10. package/CHANGELOG.md +99 -0
  11. package/CODE_OF_CONDUCT.md +41 -0
  12. package/CONTRIBUTING.md +427 -0
  13. package/Dockerfile +22 -0
  14. package/LICENSE +21 -0
  15. package/MCP_CONFIG.md +505 -0
  16. package/PUBLISHING.md +210 -0
  17. package/README.md +400 -0
  18. package/SECURITY.md +61 -0
  19. package/docs/README.md +41 -0
  20. package/docs/blog/2019-05-28-first-blog-post.md +12 -0
  21. package/docs/blog/2019-05-29-long-blog-post.md +44 -0
  22. package/docs/blog/2021-08-01-mdx-blog-post.mdx +24 -0
  23. package/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
  24. package/docs/blog/2021-08-26-welcome/index.md +29 -0
  25. package/docs/blog/authors.yml +25 -0
  26. package/docs/blog/tags.yml +19 -0
  27. package/docs/docs/api/overview.md +183 -0
  28. package/docs/docs/installation.md +252 -0
  29. package/docs/docs/intro.md +87 -0
  30. package/docs/docs/tutorial-basics/_category_.json +8 -0
  31. package/docs/docs/tutorial-basics/congratulations.md +23 -0
  32. package/docs/docs/tutorial-basics/create-a-blog-post.md +34 -0
  33. package/docs/docs/tutorial-basics/create-a-document.md +57 -0
  34. package/docs/docs/tutorial-basics/create-a-page.md +43 -0
  35. package/docs/docs/tutorial-basics/deploy-your-site.md +31 -0
  36. package/docs/docs/tutorial-basics/markdown-features.mdx +152 -0
  37. package/docs/docs/tutorial-extras/_category_.json +7 -0
  38. package/docs/docs/tutorial-extras/img/docsVersionDropdown.png +0 -0
  39. package/docs/docs/tutorial-extras/img/localeDropdown.png +0 -0
  40. package/docs/docs/tutorial-extras/manage-docs-versions.md +55 -0
  41. package/docs/docs/tutorial-extras/translate-your-site.md +88 -0
  42. package/docs/docs/tutorials/quickstart.md +365 -0
  43. package/docs/docusaurus.config.ts +163 -0
  44. package/docs/package-lock.json +17493 -0
  45. package/docs/package.json +48 -0
  46. package/docs/sidebars.ts +33 -0
  47. package/docs/src/components/HomepageFeatures/index.tsx +71 -0
  48. package/docs/src/components/HomepageFeatures/styles.module.css +11 -0
  49. package/docs/src/css/custom.css +30 -0
  50. package/docs/src/pages/index.module.css +23 -0
  51. package/docs/src/pages/index.tsx +44 -0
  52. package/docs/src/pages/markdown-page.md +7 -0
  53. package/docs/static/.nojekyll +0 -0
  54. package/docs/static/img/docusaurus-social-card.jpg +0 -0
  55. package/docs/static/img/docusaurus.png +0 -0
  56. package/docs/static/img/favicon.ico +0 -0
  57. package/docs/static/img/logo.svg +1 -0
  58. package/docs/static/img/undraw_docusaurus_mountain.svg +171 -0
  59. package/docs/static/img/undraw_docusaurus_react.svg +170 -0
  60. package/docs/static/img/undraw_docusaurus_tree.svg +40 -0
  61. package/docs/tsconfig.json +8 -0
  62. package/examples/README.md +48 -0
  63. package/examples/auto_save_demo.py +206 -0
  64. package/examples/auto_save_overwrite.py +201 -0
  65. package/examples/basic_usage.py +135 -0
  66. package/examples/demo.py +139 -0
  67. package/examples/history_demo.py +317 -0
  68. package/examples/test_default_autosave.py +124 -0
  69. package/examples/update_consignee_example.py +179 -0
  70. package/package.json +51 -0
  71. package/plans/2026-04-19-fastmcp3-migration-plan.md +1045 -0
  72. package/pyproject.toml +331 -0
  73. package/requirements-dev.txt +30 -0
  74. package/requirements.txt +22 -0
  75. package/scripts/publish.py +67 -0
  76. package/smithery.yaml +15 -0
  77. package/specs/2026-04-19-fastmcp3-migration-design.md +243 -0
  78. package/src/csv_editor/__init__.py +8 -0
  79. package/src/csv_editor/models/__init__.py +39 -0
  80. package/src/csv_editor/models/auto_save.py +246 -0
  81. package/src/csv_editor/models/csv_session.py +468 -0
  82. package/src/csv_editor/models/data_models.py +244 -0
  83. package/src/csv_editor/models/history_manager.py +456 -0
  84. package/src/csv_editor/prompts/__init__.py +0 -0
  85. package/src/csv_editor/prompts/data_prompts.py +13 -0
  86. package/src/csv_editor/resources/__init__.py +0 -0
  87. package/src/csv_editor/resources/csv_resources.py +22 -0
  88. package/src/csv_editor/server.py +640 -0
  89. package/src/csv_editor/tools/__init__.py +5 -0
  90. package/src/csv_editor/tools/analytics.py +700 -0
  91. package/src/csv_editor/tools/auto_save_operations.py +235 -0
  92. package/src/csv_editor/tools/data_operations.py +3 -0
  93. package/src/csv_editor/tools/history_operations.py +315 -0
  94. package/src/csv_editor/tools/io_operations.py +431 -0
  95. package/src/csv_editor/tools/transformations.py +663 -0
  96. package/src/csv_editor/tools/validation.py +822 -0
  97. package/src/csv_editor/utils/__init__.py +0 -0
  98. package/src/csv_editor/utils/validators.py +205 -0
  99. package/tests/README.md +65 -0
  100. package/tests/__init__.py +7 -0
  101. package/tests/conftest.py +50 -0
  102. package/tests/test_auto_save.py +378 -0
  103. package/tests/test_basic.py +103 -0
  104. package/tests/test_integration.py +356 -0
  105. package/tests/test_server_boot.py +50 -0
  106. package/tests/test_settings.py +184 -0
@@ -0,0 +1,1011 @@
1
+ # CSV Editor - Research & Implementation Documentation
2
+
3
+ ## ✅ Implementation Status: COMPLETED with AUTO-SAVE!
4
+
5
+ **This comprehensive research document guided the successful implementation of a production-ready CSV editor MCP server using FastMCP.**
6
+
7
+ ### 🎯 Implementation Summary
8
+ - **40+ Tools Implemented**: All planned CSV operations + auto-save + complete history system
9
+ - **Auto-Save by Default**: Automatically saves after each operation (overwrite mode)
10
+ - **History & Undo/Redo**: Complete operation tracking with persistent storage
11
+ - **Modern Stack**: Using uv (ultra-fast package manager), Ruff, Black, MyPy
12
+ - **Full Type Safety**: 100% type hints with Pydantic validation
13
+ - **Production Ready**: Error handling, logging, session management, auto-save, history
14
+ - **Latest Dependencies**: FastMCP 2.11.3+, Pandas 2.2.3+, NumPy 2.1.3+
15
+
16
+ ### 📦 Technology Decisions
17
+ - **Package Manager**: **uv** chosen over pip/poetry/hatch (10-100x faster)
18
+ - **Build Backend**: Hatchling (for packaging only, not environment management)
19
+ - **Linting**: Ruff (replaces flake8, isort, pylint - all in one)
20
+ - **Formatting**: Black
21
+ - **Type Checking**: MyPy with strict mode
22
+
23
+ ---
24
+
25
+ ## Original Research & Planning
26
+
27
+ This document contains the comprehensive research that guided the implementation. The server provides CSV manipulation capabilities through a standardized Model Context Protocol interface, enabling AI assistants to perform complex data operations.
28
+
29
+ ## Documentation & Resources
30
+
31
+ ### FastMCP Documentation
32
+ - **Official Website**: [https://gofastmcp.com](https://gofastmcp.com)
33
+ - **Getting Started Guide**: [https://gofastmcp.com/getting-started](https://gofastmcp.com/getting-started)
34
+ - **Concepts Overview**: [https://gofastmcp.com/concepts](https://gofastmcp.com/concepts)
35
+ - Tools: [https://gofastmcp.com/concepts/tools](https://gofastmcp.com/concepts/tools)
36
+ - Resources: [https://gofastmcp.com/concepts/resources](https://gofastmcp.com/concepts/resources)
37
+ - Context: [https://gofastmcp.com/concepts/context](https://gofastmcp.com/concepts/context)
38
+ - Prompts: [https://gofastmcp.com/concepts/prompts](https://gofastmcp.com/concepts/prompts)
39
+ - **Tutorials**: [https://gofastmcp.com/tutorials](https://gofastmcp.com/tutorials)
40
+ - Your First Server: [https://gofastmcp.com/tutorials/your-first-server](https://gofastmcp.com/tutorials/your-first-server)
41
+ - **API Reference**: [https://gofastmcp.com/reference](https://gofastmcp.com/reference)
42
+
43
+ ### GitHub Resources
44
+ - **FastMCP Repository**: [https://github.com/jlowin/fastmcp](https://github.com/jlowin/fastmcp)
45
+ - **Examples Directory**: [https://github.com/jlowin/fastmcp/tree/main/examples](https://github.com/jlowin/fastmcp/tree/main/examples)
46
+ - **Issues & Discussions**: [https://github.com/jlowin/fastmcp/issues](https://github.com/jlowin/fastmcp/issues)
47
+
48
+ ### MCP Protocol Documentation
49
+ - **Model Context Protocol Spec**: [https://modelcontextprotocol.io](https://modelcontextprotocol.io)
50
+ - **MCP Python SDK**: [https://github.com/modelcontextprotocol/python-sdk](https://github.com/modelcontextprotocol/python-sdk)
51
+ - **MCP TypeScript SDK**: [https://github.com/modelcontextprotocol/typescript-sdk](https://github.com/modelcontextprotocol/typescript-sdk)
52
+
53
+ ### Supporting Libraries Documentation
54
+ - **Pandas Documentation**: [https://pandas.pydata.org/docs/](https://pandas.pydata.org/docs/)
55
+ - CSV Reading: [https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html)
56
+ - DataFrame Operations: [https://pandas.pydata.org/docs/reference/frame.html](https://pandas.pydata.org/docs/reference/frame.html)
57
+ - **NumPy Documentation**: [https://numpy.org/doc/stable/](https://numpy.org/doc/stable/)
58
+ - **Python Type Hints**: [https://docs.python.org/3/library/typing.html](https://docs.python.org/3/library/typing.html)
59
+ - **Pydantic for Validation**: [https://docs.pydantic.dev/](https://docs.pydantic.dev/)
60
+
61
+ ### Deployment & Infrastructure
62
+ - **Docker Documentation**: [https://docs.docker.com/](https://docs.docker.com/)
63
+ - **Redis Documentation**: [https://redis.io/documentation](https://redis.io/documentation)
64
+ - **FastAPI (for HTTP transport)**: [https://fastapi.tiangolo.com/](https://fastapi.tiangolo.com/)
65
+ - **SSE (Server-Sent Events)**: [https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events)
66
+
67
+ ### Testing Resources
68
+ - **Pytest Documentation**: [https://docs.pytest.org/](https://docs.pytest.org/)
69
+ - **Pytest-asyncio**: [https://pytest-asyncio.readthedocs.io/](https://pytest-asyncio.readthedocs.io/)
70
+ - **Coverage.py**: [https://coverage.readthedocs.io/](https://coverage.readthedocs.io/)
71
+
72
+ ### AI Integration Guides
73
+ - **Claude Desktop Integration**: [https://claude.ai/docs/desktop-integration](https://claude.ai/docs/desktop-integration)
74
+ - **OpenAI Function Calling**: [https://platform.openai.com/docs/guides/function-calling](https://platform.openai.com/docs/guides/function-calling)
75
+ - **LangChain MCP Integration**: [https://python.langchain.com/docs/integrations/tools/mcp](https://python.langchain.com/docs/integrations/tools/mcp)
76
+
77
+ ### Best Practices & Patterns
78
+ - **CSV File Best Practices**: [https://www.w3.org/TR/tabular-data-primer/](https://www.w3.org/TR/tabular-data-primer/)
79
+ - **REST API Design**: [https://restfulapi.net/](https://restfulapi.net/)
80
+ - **Python Async Best Practices**: [https://docs.python.org/3/library/asyncio-task.html](https://docs.python.org/3/library/asyncio-task.html)
81
+ - **Error Handling in Python**: [https://docs.python.org/3/tutorial/errors.html](https://docs.python.org/3/tutorial/errors.html)
82
+
83
+ ### Community & Support
84
+ - **FastMCP Discord**: Check GitHub repository for invite link
85
+ - **MCP Community Forum**: [https://community.modelcontextprotocol.io](https://community.modelcontextprotocol.io)
86
+ - **Stack Overflow Tags**: `fastmcp`, `model-context-protocol`, `mcp-server`
87
+
88
+ ### Video Tutorials & Courses
89
+ - **FastMCP YouTube Channel**: Search "FastMCP tutorials" on YouTube
90
+ - **MCP Introduction Videos**: Available on the official MCP website
91
+ - **Python Async Programming**: [Real Python Async IO Tutorial](https://realpython.com/async-io-python/)
92
+
93
+ ### Reference Implementations & Examples
94
+ - **FastMCP Examples**: [https://github.com/jlowin/fastmcp/tree/main/examples](https://github.com/jlowin/fastmcp/tree/main/examples)
95
+ - Simple Echo Server: `examples/echo.py`
96
+ - File Operations: `examples/get_file.py`
97
+ - Complex Inputs: `examples/complex_inputs.py`
98
+ - Sampling Example: `examples/sampling.py`
99
+ - **MCP Server Examples**: [https://github.com/modelcontextprotocol/servers](https://github.com/modelcontextprotocol/servers)
100
+ - **Community MCP Servers**: [https://github.com/topics/mcp-server](https://github.com/topics/mcp-server)
101
+
102
+ ### Data Processing Libraries
103
+ - **Polars (Alternative to Pandas)**: [https://pola.rs/](https://pola.rs/)
104
+ - **DuckDB for SQL on CSV**: [https://duckdb.org/docs/data/csv](https://duckdb.org/docs/data/csv)
105
+ - **PyArrow for Parquet**: [https://arrow.apache.org/docs/python/](https://arrow.apache.org/docs/python/)
106
+ - **Dask for Large Datasets**: [https://docs.dask.org/](https://docs.dask.org/)
107
+
108
+ ### Performance & Optimization
109
+ - **Memory Profiling with memory_profiler**: [https://pypi.org/project/memory-profiler/](https://pypi.org/project/memory-profiler/)
110
+ - **Line Profiler**: [https://github.com/pyutils/line_profiler](https://github.com/pyutils/line_profiler)
111
+ - **Pandas Performance Tips**: [https://pandas.pydata.org/docs/user_guide/enhancingperf.html](https://pandas.pydata.org/docs/user_guide/enhancingperf.html)
112
+
113
+ ### Security Resources
114
+ - **OWASP CSV Injection**: [https://owasp.org/www-community/attacks/CSV_Injection](https://owasp.org/www-community/attacks/CSV_Injection)
115
+ - **Python Security Best Practices**: [https://python.readthedocs.io/en/latest/library/security_warnings.html](https://python.readthedocs.io/en/latest/library/security_warnings.html)
116
+ - **Input Validation Guide**: [https://cheatsheetseries.owasp.org/cheatsheets/Input_Validation_Cheat_Sheet.html](https://cheatsheetseries.owasp.org/cheatsheets/Input_Validation_Cheat_Sheet.html)
117
+
118
+ ### Monitoring & Logging
119
+ - **Structured Logging with structlog**: [https://www.structlog.org/](https://www.structlog.org/)
120
+ - **OpenTelemetry Python**: [https://opentelemetry.io/docs/languages/python/](https://opentelemetry.io/docs/languages/python/)
121
+ - **Prometheus Python Client**: [https://github.com/prometheus/client_python](https://github.com/prometheus/client_python)
122
+
123
+ ### Related Tools & Projects
124
+ - **Jupyter Notebooks**: [https://jupyter.org/documentation](https://jupyter.org/documentation)
125
+ - **Streamlit for Data Apps**: [https://docs.streamlit.io/](https://docs.streamlit.io/)
126
+ - **Apache Superset**: [https://superset.apache.org/](https://superset.apache.org/)
127
+ - **Metabase**: [https://www.metabase.com/docs/](https://www.metabase.com/docs/)
128
+
129
+ ## 1. Architecture Overview
130
+
131
+ ### 1.1 Core Technologies
132
+ - **FastMCP v2.0**: Primary framework for MCP server implementation
133
+ - **Pandas**: Data manipulation and CSV processing
134
+ - **NumPy**: Numerical operations and statistics
135
+ - **Python 3.9+**: Runtime environment
136
+ - **Type Hints**: Full typing for automatic validation
137
+
138
+ ### 1.2 Design Principles
139
+ - **Stateless Operations**: Each tool call is independent
140
+ - **Session Management**: Support for multi-user concurrent sessions
141
+ - **Error Recovery**: Graceful handling of malformed data
142
+ - **Performance**: Efficient processing of large CSV files (up to 1GB)
143
+ - **Extensibility**: Plugin architecture for custom operations
144
+
145
+ ## 2. Core Components
146
+
147
+ ### 2.1 MCP Server Structure
148
+ ```
149
+ csv-editor/
150
+ ├── src/
151
+ │ ├── __init__.py
152
+ │ ├── server.py # Main MCP server definition
153
+ │ ├── tools/
154
+ │ │ ├── __init__.py
155
+ │ │ ├── data_operations.py # CRUD operations
156
+ │ │ ├── transformations.py # Data transformation tools
157
+ │ │ ├── analytics.py # Statistical analysis
158
+ │ │ ├── validation.py # Data validation tools
159
+ │ │ └── io_operations.py # Import/Export tools
160
+ │ ├── resources/
161
+ │ │ ├── __init__.py
162
+ │ │ ├── csv_resources.py # CSV data resources
163
+ │ │ └── schema_resources.py # Schema definitions
164
+ │ ├── prompts/
165
+ │ │ ├── __init__.py
166
+ │ │ └── data_prompts.py # Reusable prompts
167
+ │ ├── models/
168
+ │ │ ├── __init__.py
169
+ │ │ ├── csv_session.py # Session management
170
+ │ │ └── data_models.py # Data type definitions
171
+ │ └── utils/
172
+ │ ├── __init__.py
173
+ │ ├── validators.py # Input validation
174
+ │ ├── converters.py # Type conversion
175
+ │ └── cache_manager.py # Caching logic
176
+ ├── tests/
177
+ ├── examples/
178
+ ├── requirements.txt
179
+ └── README.md
180
+ ```
181
+
182
+ ### 2.2 Session Management Architecture
183
+ - **Session Store**: Redis or in-memory dictionary
184
+ - **Session ID**: UUID-based unique identifiers
185
+ - **Session Data**: DataFrame state, metadata, history
186
+ - **TTL Management**: Configurable session expiration
187
+ - **Cleanup**: Automatic garbage collection
188
+
189
+ ## 3. Tool Categories and Implementation
190
+
191
+ ### 3.1 Data Loading and I/O Operations
192
+
193
+ #### 3.1.1 Core Loading Tools
194
+ ```python
195
+ @mcp.tool
196
+ async def load_csv(
197
+ file_path: str,
198
+ encoding: str = "utf-8",
199
+ delimiter: str = ",",
200
+ session_id: Optional[str] = None,
201
+ ctx: Context = None
202
+ ) -> Dict[str, Any]:
203
+ """Load CSV file with automatic type inference"""
204
+
205
+ @mcp.tool
206
+ async def load_csv_from_url(
207
+ url: str,
208
+ session_id: Optional[str] = None,
209
+ ctx: Context = None
210
+ ) -> Dict[str, Any]:
211
+ """Load CSV from HTTP/HTTPS URL"""
212
+
213
+ @mcp.tool
214
+ async def load_csv_from_content(
215
+ content: str,
216
+ delimiter: str = ",",
217
+ session_id: Optional[str] = None,
218
+ ctx: Context = None
219
+ ) -> Dict[str, Any]:
220
+ """Load CSV from string content"""
221
+ ```
222
+
223
+ #### 3.1.2 Export Tools
224
+ ```python
225
+ @mcp.tool
226
+ async def export_csv(
227
+ session_id: str,
228
+ file_path: Optional[str] = None,
229
+ format: Literal["csv", "tsv", "excel", "json", "parquet"] = "csv",
230
+ ctx: Context = None
231
+ ) -> Dict[str, Any]:
232
+ """Export data to various formats"""
233
+
234
+ @mcp.tool
235
+ async def get_download_link(
236
+ session_id: str,
237
+ format: str = "csv",
238
+ ctx: Context = None
239
+ ) -> str:
240
+ """Generate temporary download link"""
241
+ ```
242
+
243
+ ### 3.2 Data Manipulation Tools
244
+
245
+ #### 3.2.1 Filtering and Selection
246
+ ```python
247
+ @mcp.tool
248
+ async def filter_rows(
249
+ session_id: str,
250
+ conditions: List[Dict[str, Any]],
251
+ logical_operator: Literal["AND", "OR"] = "AND",
252
+ ctx: Context = None
253
+ ) -> Dict[str, Any]:
254
+ """Advanced filtering with multiple conditions"""
255
+
256
+ @mcp.tool
257
+ async def select_columns(
258
+ session_id: str,
259
+ columns: List[str],
260
+ exclude: bool = False,
261
+ ctx: Context = None
262
+ ) -> Dict[str, Any]:
263
+ """Select or exclude specific columns"""
264
+
265
+ @mcp.tool
266
+ async def query_data(
267
+ session_id: str,
268
+ sql_query: str,
269
+ ctx: Context = None
270
+ ) -> Dict[str, Any]:
271
+ """Execute SQL-like queries on CSV data"""
272
+ ```
273
+
274
+ #### 3.2.2 Sorting and Ordering
275
+ ```python
276
+ @mcp.tool
277
+ async def sort_data(
278
+ session_id: str,
279
+ columns: List[str],
280
+ ascending: List[bool] = None,
281
+ ctx: Context = None
282
+ ) -> Dict[str, Any]:
283
+ """Multi-column sorting"""
284
+
285
+ @mcp.tool
286
+ async def rank_data(
287
+ session_id: str,
288
+ column: str,
289
+ method: Literal["average", "min", "max", "first", "dense"] = "average",
290
+ ctx: Context = None
291
+ ) -> Dict[str, Any]:
292
+ """Add ranking column"""
293
+ ```
294
+
295
+ ### 3.3 Data Transformation Tools
296
+
297
+ #### 3.3.1 Column Operations
298
+ ```python
299
+ @mcp.tool
300
+ async def add_calculated_column(
301
+ session_id: str,
302
+ column_name: str,
303
+ expression: str,
304
+ ctx: Context = None
305
+ ) -> Dict[str, Any]:
306
+ """Add column with calculated values"""
307
+
308
+ @mcp.tool
309
+ async def rename_columns(
310
+ session_id: str,
311
+ column_mapping: Dict[str, str],
312
+ ctx: Context = None
313
+ ) -> Dict[str, Any]:
314
+ """Rename multiple columns"""
315
+
316
+ @mcp.tool
317
+ async def change_column_type(
318
+ session_id: str,
319
+ column: str,
320
+ dtype: Literal["int", "float", "string", "datetime", "boolean"],
321
+ ctx: Context = None
322
+ ) -> Dict[str, Any]:
323
+ """Convert column data type"""
324
+ ```
325
+
326
+ #### 3.3.2 Data Cleaning
327
+ ```python
328
+ @mcp.tool
329
+ async def handle_missing_values(
330
+ session_id: str,
331
+ strategy: Literal["drop", "fill", "interpolate", "forward_fill", "backward_fill"],
332
+ columns: Optional[List[str]] = None,
333
+ fill_value: Any = None,
334
+ ctx: Context = None
335
+ ) -> Dict[str, Any]:
336
+ """Handle missing values with various strategies"""
337
+
338
+ @mcp.tool
339
+ async def remove_duplicates(
340
+ session_id: str,
341
+ columns: Optional[List[str]] = None,
342
+ keep: Literal["first", "last", "none"] = "first",
343
+ ctx: Context = None
344
+ ) -> Dict[str, Any]:
345
+ """Remove duplicate rows"""
346
+
347
+ @mcp.tool
348
+ async def trim_whitespace(
349
+ session_id: str,
350
+ columns: Optional[List[str]] = None,
351
+ ctx: Context = None
352
+ ) -> Dict[str, Any]:
353
+ """Remove leading/trailing whitespace"""
354
+ ```
355
+
356
+ ### 3.4 Data Analysis Tools
357
+
358
+ #### 3.4.1 Statistical Analysis
359
+ ```python
360
+ @mcp.tool
361
+ async def get_statistics(
362
+ session_id: str,
363
+ columns: Optional[List[str]] = None,
364
+ include_percentiles: bool = True,
365
+ ctx: Context = None
366
+ ) -> Dict[str, Any]:
367
+ """Comprehensive statistical summary"""
368
+
369
+ @mcp.tool
370
+ async def correlation_analysis(
371
+ session_id: str,
372
+ columns: Optional[List[str]] = None,
373
+ method: Literal["pearson", "spearman", "kendall"] = "pearson",
374
+ ctx: Context = None
375
+ ) -> Dict[str, Any]:
376
+ """Calculate correlation matrix"""
377
+
378
+ @mcp.tool
379
+ async def group_statistics(
380
+ session_id: str,
381
+ group_by: List[str],
382
+ aggregations: Dict[str, List[str]],
383
+ ctx: Context = None
384
+ ) -> Dict[str, Any]:
385
+ """Group-by aggregations"""
386
+ ```
387
+
388
+ #### 3.4.2 Data Profiling
389
+ ```python
390
+ @mcp.tool
391
+ async def profile_data(
392
+ session_id: str,
393
+ ctx: Context = None
394
+ ) -> Dict[str, Any]:
395
+ """Complete data profiling report"""
396
+
397
+ @mcp.tool
398
+ async def detect_outliers(
399
+ session_id: str,
400
+ columns: List[str],
401
+ method: Literal["iqr", "zscore", "isolation_forest"] = "iqr",
402
+ ctx: Context = None
403
+ ) -> Dict[str, Any]:
404
+ """Detect statistical outliers"""
405
+ ```
406
+
407
+ ### 3.5 Data Validation Tools
408
+
409
+ ```python
410
+ @mcp.tool
411
+ async def validate_schema(
412
+ session_id: str,
413
+ schema: Dict[str, Any],
414
+ ctx: Context = None
415
+ ) -> Dict[str, Any]:
416
+ """Validate data against schema"""
417
+
418
+ @mcp.tool
419
+ async def check_data_quality(
420
+ session_id: str,
421
+ rules: List[Dict[str, Any]],
422
+ ctx: Context = None
423
+ ) -> Dict[str, Any]:
424
+ """Apply data quality rules"""
425
+
426
+ @mcp.tool
427
+ async def find_anomalies(
428
+ session_id: str,
429
+ columns: List[str],
430
+ ctx: Context = None
431
+ ) -> Dict[str, Any]:
432
+ """Detect data anomalies"""
433
+ ```
434
+
435
+ ### 3.6 Advanced Operations
436
+
437
+ ```python
438
+ @mcp.tool
439
+ async def merge_datasets(
440
+ left_session: str,
441
+ right_session: str,
442
+ on: Union[str, List[str]],
443
+ how: Literal["inner", "left", "right", "outer"] = "inner",
444
+ ctx: Context = None
445
+ ) -> Dict[str, Any]:
446
+ """Merge two CSV datasets"""
447
+
448
+ @mcp.tool
449
+ async def pivot_table(
450
+ session_id: str,
451
+ index: List[str],
452
+ columns: List[str],
453
+ values: str,
454
+ aggfunc: str = "mean",
455
+ ctx: Context = None
456
+ ) -> Dict[str, Any]:
457
+ """Create pivot table"""
458
+
459
+ @mcp.tool
460
+ async def unpivot_data(
461
+ session_id: str,
462
+ id_vars: List[str],
463
+ value_vars: Optional[List[str]] = None,
464
+ ctx: Context = None
465
+ ) -> Dict[str, Any]:
466
+ """Unpivot/melt data"""
467
+ ```
468
+
469
+ ## 4. Resource Implementation
470
+
471
+ ### 4.1 Dynamic CSV Resources
472
+ ```python
473
+ @mcp.resource("csv://{session_id}/data")
474
+ async def get_csv_data(session_id: str, ctx: Context) -> Dict[str, Any]:
475
+ """Get current CSV data as resource"""
476
+
477
+ @mcp.resource("csv://{session_id}/schema")
478
+ async def get_csv_schema(session_id: str, ctx: Context) -> Dict[str, Any]:
479
+ """Get CSV schema information"""
480
+
481
+ @mcp.resource("csv://{session_id}/preview")
482
+ async def get_data_preview(session_id: str, ctx: Context) -> Dict[str, Any]:
483
+ """Get data preview (first 100 rows)"""
484
+ ```
485
+
486
+ ### 4.2 Metadata Resources
487
+ ```python
488
+ @mcp.resource("sessions://active")
489
+ async def list_active_sessions(ctx: Context) -> List[Dict[str, Any]]:
490
+ """List all active CSV sessions"""
491
+
492
+ @mcp.resource("operations://history/{session_id}")
493
+ async def get_operation_history(session_id: str, ctx: Context) -> List[Dict[str, Any]]:
494
+ """Get operation history for session"""
495
+ ```
496
+
497
+ ## 5. Prompt Templates
498
+
499
+ ```python
500
+ @mcp.prompt
501
+ def analyze_csv_prompt(
502
+ session_id: str,
503
+ analysis_type: Literal["summary", "quality", "insights"]
504
+ ) -> str:
505
+ """Generate analysis prompt for CSV data"""
506
+
507
+ @mcp.prompt
508
+ def suggest_transformations_prompt(
509
+ session_id: str,
510
+ goal: str
511
+ ) -> str:
512
+ """Suggest data transformations based on goal"""
513
+
514
+ @mcp.prompt
515
+ def data_cleaning_prompt(
516
+ session_id: str,
517
+ issues: List[str]
518
+ ) -> str:
519
+ """Generate data cleaning recommendations"""
520
+ ```
521
+
522
+ ## 6. Context Integration
523
+
524
+ ### 6.1 Progress Reporting
525
+ ```python
526
+ async def process_large_csv(file_path: str, ctx: Context):
527
+ await ctx.report_progress(0, "Starting CSV processing...")
528
+
529
+ # Load data in chunks
530
+ total_chunks = calculate_chunks(file_path)
531
+ for i, chunk in enumerate(read_csv_chunks(file_path)):
532
+ await ctx.report_progress(
533
+ (i + 1) / total_chunks,
534
+ f"Processing chunk {i + 1} of {total_chunks}"
535
+ )
536
+ process_chunk(chunk)
537
+
538
+ await ctx.report_progress(1.0, "Processing complete!")
539
+ ```
540
+
541
+ ### 6.2 Logging Integration
542
+ ```python
543
+ async def validate_data(session_id: str, ctx: Context):
544
+ await ctx.info(f"Starting validation for session {session_id}")
545
+
546
+ try:
547
+ results = perform_validation(session_id)
548
+ await ctx.info(f"Validation complete: {len(results)} issues found")
549
+ return results
550
+ except Exception as e:
551
+ await ctx.error(f"Validation failed: {str(e)}")
552
+ raise
553
+ ```
554
+
555
+ ### 6.3 LLM Sampling for Intelligence
556
+ ```python
557
+ async def smart_data_analysis(session_id: str, ctx: Context):
558
+ data_summary = get_data_summary(session_id)
559
+
560
+ analysis = await ctx.sample(
561
+ f"Analyze this dataset and suggest improvements:\n{data_summary}",
562
+ max_tokens=500
563
+ )
564
+
565
+ return {
566
+ "analysis": analysis.text,
567
+ "suggestions": parse_suggestions(analysis.text)
568
+ }
569
+ ```
570
+
571
+ ## 7. Error Handling Strategy
572
+
573
+ ### 7.1 Error Categories
574
+ - **Input Validation Errors**: Invalid parameters, missing required fields
575
+ - **Data Format Errors**: Malformed CSV, encoding issues
576
+ - **Processing Errors**: Memory overflow, computation failures
577
+ - **Session Errors**: Invalid session ID, expired sessions
578
+ - **Resource Errors**: File not found, network issues
579
+
580
+ ### 7.2 Error Response Format
581
+ ```python
582
+ {
583
+ "success": False,
584
+ "error": {
585
+ "type": "ValidationError",
586
+ "message": "Column 'price' contains non-numeric values",
587
+ "details": {
588
+ "column": "price",
589
+ "invalid_rows": [23, 45, 67],
590
+ "suggestion": "Use change_column_type tool to convert to numeric"
591
+ }
592
+ },
593
+ "session_id": "uuid-here",
594
+ "timestamp": "2024-01-15T10:30:00Z"
595
+ }
596
+ ```
597
+
598
+ ## 8. Performance Optimization
599
+
600
+ ### 8.1 Chunked Processing
601
+ - Process large files in configurable chunks (default: 10,000 rows)
602
+ - Stream processing for files > 100MB
603
+ - Lazy loading for initial preview
604
+
605
+ ### 8.2 Caching Strategy
606
+ - Cache frequently accessed columns
607
+ - Memoize statistical computations
608
+ - Store intermediate results for complex operations
609
+
610
+ ### 8.3 Memory Management
611
+ - Automatic garbage collection for expired sessions
612
+ - Column-wise operations for memory efficiency
613
+ - Data type optimization (downcast numerics)
614
+
615
+ ## 9. Security Considerations
616
+
617
+ ### 9.1 Input Validation
618
+ - Sanitize file paths (prevent directory traversal)
619
+ - Validate SQL queries (prevent injection)
620
+ - Limit expression evaluation to safe operations
621
+
622
+ ### 9.2 Resource Limits
623
+ - Maximum file size: 1GB (configurable)
624
+ - Maximum session duration: 1 hour (configurable)
625
+ - Maximum concurrent sessions: 100 (configurable)
626
+
627
+ ### 9.3 Data Privacy
628
+ - Optional data anonymization tools
629
+ - Session isolation
630
+ - Secure temporary file handling
631
+
632
+ ## 10. Testing Strategy
633
+
634
+ ### 10.1 Unit Tests
635
+ - Test each tool in isolation
636
+ - Mock context and session management
637
+ - Validate error handling
638
+
639
+ ### 10.2 Integration Tests
640
+ - Test tool combinations
641
+ - Session lifecycle testing
642
+ - Resource access patterns
643
+
644
+ ### 10.3 Performance Tests
645
+ - Large file handling (100MB, 500MB, 1GB)
646
+ - Concurrent session stress testing
647
+ - Memory leak detection
648
+
649
+ ### 10.4 Test Data
650
+ - Various CSV formats (standard, TSV, pipe-delimited)
651
+ - Different encodings (UTF-8, Latin-1, etc.)
652
+ - Edge cases (empty files, single column, special characters)
653
+
654
+ ## 11. Deployment Configuration
655
+
656
+ ### 11.1 Transport Options
657
+ ```python
658
+ # STDIO (for local development)
659
+ mcp.run()
660
+
661
+ # HTTP Streaming
662
+ mcp.run(transport="http", host="0.0.0.0", port=8000)
663
+
664
+ # Server-Sent Events
665
+ mcp.run(transport="sse", host="0.0.0.0", port=8000)
666
+ ```
667
+
668
+ ### 11.2 Environment Variables
669
+ ```bash
670
+ CSV_MCP_MAX_FILE_SIZE=1073741824 # 1GB in bytes
671
+ CSV_MCP_SESSION_TIMEOUT=3600 # 1 hour in seconds
672
+ CSV_MCP_CACHE_SIZE=104857600 # 100MB cache
673
+ CSV_MCP_CHUNK_SIZE=10000 # Rows per chunk
674
+ CSV_MCP_LOG_LEVEL=INFO
675
+ ```
676
+
677
+ ### 11.3 MCP Servers Repository Integration
678
+ ```markdown
679
+ # For submission to https://github.com/modelcontextprotocol/servers
680
+
681
+ ## Repository Structure
682
+ csv-editor/
683
+ ├── README.md # Comprehensive documentation
684
+ ├── pyproject.toml # Python package configuration
685
+ ├── LICENSE # MIT License
686
+ ├── src/
687
+ │ └── csv_editor/
688
+ │ ├── __init__.py
689
+ │ ├── server.py # Main FastMCP server
690
+ │ └── tools/ # Tool implementations
691
+ └── tests/
692
+
693
+ ## Installation
694
+ pip install csv-editor
695
+
696
+ ## Claude Desktop Configuration
697
+ Add to ~/Library/Application Support/Claude/claude_desktop_config.json:
698
+ {
699
+ "mcpServers": {
700
+ "csv-editor": {
701
+ "command": "python",
702
+ "args": ["-m", "csv_editor.server"]
703
+ }
704
+ }
705
+ }
706
+ ```
707
+
708
+ ## 12. Client Integration Examples
709
+
710
+ ### 12.1 Python Client
711
+ ```python
712
+ from fastmcp import FastMCPClient
713
+
714
+ async with FastMCPClient("http://localhost:8000") as client:
715
+ # Load CSV
716
+ result = await client.call_tool("load_csv", {
717
+ "file_path": "/data/sales.csv"
718
+ })
719
+ session_id = result["session_id"]
720
+
721
+ # Filter data
722
+ await client.call_tool("filter_rows", {
723
+ "session_id": session_id,
724
+ "conditions": [{"column": "sales", "operator": ">", "value": 1000}]
725
+ })
726
+
727
+ # Get statistics
728
+ stats = await client.call_tool("get_statistics", {
729
+ "session_id": session_id
730
+ })
731
+ ```
732
+
733
+ ### 12.2 CLI Usage
734
+ ```bash
735
+ # Start server
736
+ fastmcp run src.server
737
+
738
+ # Or with custom config
739
+ fastmcp run src.server --transport http --port 8000
740
+ ```
741
+
742
+ ## 13. Monitoring and Observability
743
+
744
+ ### 13.1 Metrics
745
+ - Request latency per tool
746
+ - Session count and duration
747
+ - Memory usage per session
748
+ - Cache hit rates
749
+ - Error rates by category
750
+
751
+ ### 13.2 Logging
752
+ - Structured JSON logging
753
+ - Correlation IDs for request tracking
754
+ - Sensitive data masking
755
+ - Log aggregation support
756
+
757
+ ### 13.3 Health Checks
758
+ ```python
759
+ @mcp.tool
760
+ async def health_check(ctx: Context) -> Dict[str, Any]:
761
+ """System health check"""
762
+ return {
763
+ "status": "healthy",
764
+ "version": "1.0.0",
765
+ "active_sessions": get_session_count(),
766
+ "memory_usage_mb": get_memory_usage(),
767
+ "uptime_seconds": get_uptime()
768
+ }
769
+ ```
770
+
771
+ ## 14. Documentation Requirements
772
+
773
+ ### 14.1 API Documentation
774
+ - OpenAPI/Swagger specification
775
+ - Tool parameter descriptions
776
+ - Example requests and responses
777
+ - Error code reference
778
+
779
+ ### 14.2 User Guide
780
+ - Quick start tutorial
781
+ - Common use cases
782
+ - Best practices
783
+ - Troubleshooting guide
784
+
785
+ ### 14.3 Developer Documentation
786
+ - Architecture overview
787
+ - Extension points
788
+ - Contributing guidelines
789
+ - Plugin development guide
790
+
791
+ ## 15. Future Enhancements
792
+
793
+ ### 15.1 Phase 2 Features
794
+ - Machine learning integration (auto-ML features)
795
+ - Real-time collaboration support
796
+ - Streaming data support
797
+ - Database connectivity (direct SQL queries)
798
+ - Advanced visualizations
799
+
800
+ ### 15.2 Phase 3 Features
801
+ - Distributed processing (Dask/Ray integration)
802
+ - Custom function registration
803
+ - Webhook notifications
804
+ - Data versioning
805
+ - Automated data quality monitoring
806
+
807
+ ## 16. Implementation Timeline
808
+
809
+ ### Week 1-2: Foundation
810
+ - Set up project structure
811
+ - Implement core session management
812
+ - Basic I/O operations
813
+ - Initial testing framework
814
+
815
+ ### Week 3-4: Core Tools
816
+ - Data manipulation tools
817
+ - Transformation tools
818
+ - Basic analysis tools
819
+ - Error handling
820
+
821
+ ### Week 5-6: Advanced Features
822
+ - Advanced analysis tools
823
+ - Validation tools
824
+ - Resource implementation
825
+ - Prompt templates
826
+
827
+ ### Week 7-8: Integration & Testing
828
+ - Context integration
829
+ - Comprehensive testing
830
+ - Performance optimization
831
+ - Documentation
832
+
833
+ ### Week 9-10: Production Ready
834
+ - Deployment configurations
835
+ - Monitoring setup
836
+ - Security hardening
837
+ - Final testing and release
838
+
839
+ ## 17. Success Metrics
840
+
841
+ - **Performance**: Process 1GB file in < 30 seconds
842
+ - **Reliability**: 99.9% uptime
843
+ - **Scalability**: Support 100 concurrent sessions
844
+ - **Usability**: 90% of operations require single tool call
845
+ - **Compatibility**: Work with 95% of real-world CSV files
846
+
847
+ ## 18. MCP Servers Repository Integration
848
+
849
+ ### 18.1 Publishing to modelcontextprotocol/servers
850
+
851
+ #### Repository Structure Requirements
852
+ ```
853
+ csv-editor/
854
+ ├── README.md # Comprehensive documentation
855
+ ├── pyproject.toml # Python package configuration
856
+ ├── LICENSE # MIT License (required)
857
+ ├── src/
858
+ │ └── csv_editor/
859
+ │ ├── __init__.py
860
+ │ ├── server.py # Main FastMCP server
861
+ │ ├── tools.py # Tool implementations
862
+ │ ├── resources.py # Resource definitions
863
+ │ └── prompts.py # Prompt templates
864
+ ├── tests/
865
+ │ ├── __init__.py
866
+ │ ├── test_tools.py
867
+ │ └── test_integration.py
868
+ └── examples/
869
+ ├── basic_usage.py
870
+ └── sample_data.csv
871
+ ```
872
+
873
+ #### pyproject.toml Template
874
+ ```toml
875
+ [build-system]
876
+ requires = ["hatchling"]
877
+ build-backend = "hatchling.build"
878
+
879
+ [project]
880
+ name = "csv-editor"
881
+ version = "1.0.0"
882
+ description = "A Model Context Protocol server for CSV operations"
883
+ readme = "README.md"
884
+ license = "MIT"
885
+ authors = [
886
+ { name = "Your Name", email = "email@example.com" }
887
+ ]
888
+ requires-python = ">=3.9"
889
+ dependencies = [
890
+ "fastmcp>=2.0.0",
891
+ "pandas>=2.0.0",
892
+ "numpy>=1.24.0",
893
+ ]
894
+
895
+ [project.urls]
896
+ "Homepage" = "https://github.com/santoshray02/csv-editor"
897
+ "Issues" = "https://github.com/santoshray02/csv-editor/issues"
898
+ ```
899
+
900
+ #### README.md Requirements
901
+ ```markdown
902
+ # CSV Editor MCP Server
903
+
904
+ A FastMCP server providing comprehensive CSV manipulation tools.
905
+
906
+ ## Installation
907
+
908
+ ### Using pip
909
+ pip install csv-editor
910
+
911
+ ### Using uv (recommended)
912
+ uv add csv-editor
913
+
914
+ ## Configuration
915
+
916
+ ### Claude Desktop
917
+ Add to ~/Library/Application Support/Claude/claude_desktop_config.json:
918
+ {
919
+ "mcpServers": {
920
+ "csv-editor": {
921
+ "command": "python",
922
+ "args": ["-m", "csv_editor.server"]
923
+ }
924
+ }
925
+ }
926
+
927
+ ### Other MCP Clients
928
+ {
929
+ "mcp": {
930
+ "servers": {
931
+ "csv-editor": {
932
+ "command": "python",
933
+ "args": ["-m", "csv_editor.server"]
934
+ }
935
+ }
936
+ }
937
+ }
938
+
939
+ ## Available Tools
940
+
941
+ [List all tools with descriptions]
942
+
943
+ ## Usage Examples
944
+
945
+ [Provide clear examples of common use cases]
946
+
947
+ ## License
948
+ MIT
949
+ ```
950
+
951
+ ### 18.2 Submission Process
952
+
953
+ 1. **Prepare Repository**:
954
+ - Ensure all required files are present
955
+ - Run tests and verify functionality
956
+ - Test with Claude Desktop
957
+
958
+ 2. **Fork and Clone**:
959
+ ```bash
960
+ gh repo fork modelcontextprotocol/servers
961
+ git clone https://github.com/santoshray02/servers
962
+ cd servers
963
+ ```
964
+
965
+ 3. **Add Your Server**:
966
+ - Create directory: `src/csv-editor/`
967
+ - Add all server files
968
+ - Update root README.md in alphabetical order
969
+
970
+ 4. **Submit PR**:
971
+ ```bash
972
+ git add .
973
+ git commit -m "Add CSV Editor MCP server"
974
+ git push origin main
975
+ gh pr create --title "Add CSV Editor MCP server" \
976
+ --body "Adds comprehensive CSV manipulation server using FastMCP"
977
+ ```
978
+
979
+ ### 18.3 Quality Checklist
980
+
981
+ - [ ] **Documentation**
982
+ - [ ] Clear README with examples
983
+ - [ ] All tools documented
984
+ - [ ] Configuration examples for multiple clients
985
+
986
+ - [ ] **Code Quality**
987
+ - [ ] Type hints on all functions
988
+ - [ ] Docstrings for tools
989
+ - [ ] Error handling
990
+ - [ ] No hardcoded paths
991
+
992
+ - [ ] **Testing**
993
+ - [ ] Unit tests for core functionality
994
+ - [ ] Integration tests
995
+ - [ ] Tested with Claude Desktop
996
+
997
+ - [ ] **Package**
998
+ - [ ] Installable via pip
999
+ - [ ] Dependencies properly specified
1000
+ - [ ] Version numbering follows semver
1001
+
1002
+ - [ ] **Security**
1003
+ - [ ] Input validation
1004
+ - [ ] Path traversal prevention
1005
+ - [ ] No credential exposure
1006
+
1007
+ ## 19. Conclusion
1008
+
1009
+ This generic CSV Editor MCP Server will provide a robust, scalable, and intelligent interface for CSV data manipulation through the Model Context Protocol. By leveraging FastMCP v2.0's capabilities and following best practices, we'll create a production-ready tool that can be integrated with any MCP-compatible AI assistant or application.
1010
+
1011
+ The modular architecture ensures easy maintenance and extension, while the comprehensive tool set covers all common CSV operations and beyond. With proper testing, documentation, and deployment strategies, this server will serve as a reliable foundation for AI-powered data processing workflows.