@mseep/csv-editor 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.md +53 -0
  2. package/.github/ISSUE_TEMPLATE/feature_request.md +38 -0
  3. package/.github/workflows/deploy-docs.yml +62 -0
  4. package/.github/workflows/publish-github.yml +52 -0
  5. package/.github/workflows/publish.yml +44 -0
  6. package/.github/workflows/test.yml +32 -0
  7. package/.pre-commit-config.yaml +157 -0
  8. package/ALTERNATIVE_PUBLISHING.md +175 -0
  9. package/ARCHITECTURE.md +1011 -0
  10. package/CHANGELOG.md +99 -0
  11. package/CODE_OF_CONDUCT.md +41 -0
  12. package/CONTRIBUTING.md +427 -0
  13. package/Dockerfile +22 -0
  14. package/LICENSE +21 -0
  15. package/MCP_CONFIG.md +505 -0
  16. package/PUBLISHING.md +210 -0
  17. package/README.md +400 -0
  18. package/SECURITY.md +61 -0
  19. package/docs/README.md +41 -0
  20. package/docs/blog/2019-05-28-first-blog-post.md +12 -0
  21. package/docs/blog/2019-05-29-long-blog-post.md +44 -0
  22. package/docs/blog/2021-08-01-mdx-blog-post.mdx +24 -0
  23. package/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
  24. package/docs/blog/2021-08-26-welcome/index.md +29 -0
  25. package/docs/blog/authors.yml +25 -0
  26. package/docs/blog/tags.yml +19 -0
  27. package/docs/docs/api/overview.md +183 -0
  28. package/docs/docs/installation.md +252 -0
  29. package/docs/docs/intro.md +87 -0
  30. package/docs/docs/tutorial-basics/_category_.json +8 -0
  31. package/docs/docs/tutorial-basics/congratulations.md +23 -0
  32. package/docs/docs/tutorial-basics/create-a-blog-post.md +34 -0
  33. package/docs/docs/tutorial-basics/create-a-document.md +57 -0
  34. package/docs/docs/tutorial-basics/create-a-page.md +43 -0
  35. package/docs/docs/tutorial-basics/deploy-your-site.md +31 -0
  36. package/docs/docs/tutorial-basics/markdown-features.mdx +152 -0
  37. package/docs/docs/tutorial-extras/_category_.json +7 -0
  38. package/docs/docs/tutorial-extras/img/docsVersionDropdown.png +0 -0
  39. package/docs/docs/tutorial-extras/img/localeDropdown.png +0 -0
  40. package/docs/docs/tutorial-extras/manage-docs-versions.md +55 -0
  41. package/docs/docs/tutorial-extras/translate-your-site.md +88 -0
  42. package/docs/docs/tutorials/quickstart.md +365 -0
  43. package/docs/docusaurus.config.ts +163 -0
  44. package/docs/package-lock.json +17493 -0
  45. package/docs/package.json +48 -0
  46. package/docs/sidebars.ts +33 -0
  47. package/docs/src/components/HomepageFeatures/index.tsx +71 -0
  48. package/docs/src/components/HomepageFeatures/styles.module.css +11 -0
  49. package/docs/src/css/custom.css +30 -0
  50. package/docs/src/pages/index.module.css +23 -0
  51. package/docs/src/pages/index.tsx +44 -0
  52. package/docs/src/pages/markdown-page.md +7 -0
  53. package/docs/static/.nojekyll +0 -0
  54. package/docs/static/img/docusaurus-social-card.jpg +0 -0
  55. package/docs/static/img/docusaurus.png +0 -0
  56. package/docs/static/img/favicon.ico +0 -0
  57. package/docs/static/img/logo.svg +1 -0
  58. package/docs/static/img/undraw_docusaurus_mountain.svg +171 -0
  59. package/docs/static/img/undraw_docusaurus_react.svg +170 -0
  60. package/docs/static/img/undraw_docusaurus_tree.svg +40 -0
  61. package/docs/tsconfig.json +8 -0
  62. package/examples/README.md +48 -0
  63. package/examples/auto_save_demo.py +206 -0
  64. package/examples/auto_save_overwrite.py +201 -0
  65. package/examples/basic_usage.py +135 -0
  66. package/examples/demo.py +139 -0
  67. package/examples/history_demo.py +317 -0
  68. package/examples/test_default_autosave.py +124 -0
  69. package/examples/update_consignee_example.py +179 -0
  70. package/package.json +51 -0
  71. package/plans/2026-04-19-fastmcp3-migration-plan.md +1045 -0
  72. package/pyproject.toml +331 -0
  73. package/requirements-dev.txt +30 -0
  74. package/requirements.txt +22 -0
  75. package/scripts/publish.py +67 -0
  76. package/smithery.yaml +15 -0
  77. package/specs/2026-04-19-fastmcp3-migration-design.md +243 -0
  78. package/src/csv_editor/__init__.py +8 -0
  79. package/src/csv_editor/models/__init__.py +39 -0
  80. package/src/csv_editor/models/auto_save.py +246 -0
  81. package/src/csv_editor/models/csv_session.py +468 -0
  82. package/src/csv_editor/models/data_models.py +244 -0
  83. package/src/csv_editor/models/history_manager.py +456 -0
  84. package/src/csv_editor/prompts/__init__.py +0 -0
  85. package/src/csv_editor/prompts/data_prompts.py +13 -0
  86. package/src/csv_editor/resources/__init__.py +0 -0
  87. package/src/csv_editor/resources/csv_resources.py +22 -0
  88. package/src/csv_editor/server.py +640 -0
  89. package/src/csv_editor/tools/__init__.py +5 -0
  90. package/src/csv_editor/tools/analytics.py +700 -0
  91. package/src/csv_editor/tools/auto_save_operations.py +235 -0
  92. package/src/csv_editor/tools/data_operations.py +3 -0
  93. package/src/csv_editor/tools/history_operations.py +315 -0
  94. package/src/csv_editor/tools/io_operations.py +431 -0
  95. package/src/csv_editor/tools/transformations.py +663 -0
  96. package/src/csv_editor/tools/validation.py +822 -0
  97. package/src/csv_editor/utils/__init__.py +0 -0
  98. package/src/csv_editor/utils/validators.py +205 -0
  99. package/tests/README.md +65 -0
  100. package/tests/__init__.py +7 -0
  101. package/tests/conftest.py +50 -0
  102. package/tests/test_auto_save.py +378 -0
  103. package/tests/test_basic.py +103 -0
  104. package/tests/test_integration.py +356 -0
  105. package/tests/test_server_boot.py +50 -0
  106. package/tests/test_settings.py +184 -0
@@ -0,0 +1,243 @@
1
+ # Sub-project 1: FastMCP 3 Migration + Dependency Bumps + Python Floor (v2.0.0)
2
+
3
+ **Status:** Draft
4
+ **Date:** 2026-04-19
5
+ **Author:** santoshray02
6
+ **Target release:** csv-editor v2.0.0
7
+
8
+ ## Context
9
+
10
+ `csv-editor` is an MCP server (pandas-based, FastMCP 2.x, ~40 tools, 22 GitHub stars as of April 2026) published to PyPI and Smithery. The last substantive work on the stack landed in August 2025. Since then:
11
+
12
+ - The MCP spec has shipped revision [2025-11-25](https://modelcontextprotocol.io/specification/2025-11-25) (async Tasks, OAuth overhaul, elicitation).
13
+ - [FastMCP 3.x](https://gofastmcp.com/getting-started/upgrading/from-fastmcp-2) is the current major line (3.2.4 as of April 14, 2026) with breaking changes vs. 2.x.
14
+ - DuckDB and Polars have matured into production-ready engines and a MotherDuck-published MCP server is now a direct competitor for large-file analytics.
15
+ - Many Python libraries have dropped 3.8/3.9 support; pandas 3.0 requires 3.11+.
16
+
17
+ The full modernization roadmap breaks into six sub-projects:
18
+ 0. **Docs migration: Docusaurus → MkDocs-Material** (parallel track, independent of 1–5)
19
+ 1. **FastMCP 3 + dep bumps + Python floor cleanup** (this spec — prerequisite for 2–5)
20
+ 2. DuckDB / Polars engine layer
21
+ 3. Async Tasks + Resource Links
22
+ 4. Remote HTTP + OAuth deployment mode
23
+ 5. Elicitation for ambiguous CSV dialects
24
+
25
+ This spec covers **only Sub-project 1**. Sub-project 0 runs in parallel and does not block it.
26
+
27
+ ## Goals
28
+
29
+ 1. Restore the project to a current, maintainable baseline so Sub-projects 2–5 can be built on top.
30
+ 2. Ship a clean **v2.0.0** with honest breaking-change signaling.
31
+ 3. Establish automated test coverage so future changes don't regress silently.
32
+
33
+ ## Non-goals
34
+
35
+ - Upgrading to pandas 3.0 (Copy-on-Write migration) — deferred to a later sub-project because the default-dtype and CoW changes will touch many of the 40 existing tools and deserve focused testing.
36
+ - Adding new tools, engines, or MCP protocol features.
37
+ - Re-platforming the Docusaurus docs site.
38
+ - Fixing the `.venv` drift on the maintainer's machine via a PR (documentation fix only).
39
+
40
+ ## Decisions (locked)
41
+
42
+ | Decision | Choice | Rationale |
43
+ |---|---|---|
44
+ | Python floor | **3.11** (tested/recommended 3.14) | Enables later pandas 3.0; still covers mainstream user base. 3.14 as hard floor would exclude too many users. |
45
+ | Dependency scope | Medium: FastMCP 3 + non-breaking deps only | Defer pandas 3.0 / numpy 2.4 to focused sub-project. |
46
+ | Release version | **2.0.0** (direct, no pre-release) | Breaking changes (Python floor, FastMCP major, SSE removal) justify a major. User explicitly opted out of pre-release. |
47
+ | Remote transport | Drop `sse`; keep `stdio` + `http` (Streamable HTTP) | Per [FastMCP 3 docs](https://gofastmcp.com/clients/transports): SSE is "backward compatibility only, shouldn't be used in new projects." Major version is the right time to drop it. |
48
+ | CI | Add pytest matrix workflow (Python 3.11/3.12/3.13/3.14, Ubuntu) | No test CI exists today. Migration PRs would be blind without it. |
49
+ | Rollout | Phased PRs to `main`, each green in CI | Isolates risk; each PR is a revertable checkpoint. |
50
+
51
+ ## Architecture / scope
52
+
53
+ ### In-scope changes
54
+
55
+ - `pyproject.toml`:
56
+ - `requires-python = ">=3.11"`
57
+ - Classifiers: drop 3.8/3.9/3.10; keep 3.11–3.13; add 3.14.
58
+ - `dependencies`:
59
+ - `fastmcp>=3.2,<4` (from `>=2.11.3`)
60
+ - `pyarrow>=23` (from `>=17.0.0`)
61
+ - `pydantic>=2.13` (from `>=2.10.4`)
62
+ - `pydantic-settings>=2.13` (from `>=2.10.1`)
63
+ - `httpx>=0.28` (from `>=0.27.0`)
64
+ - `aiofiles>=25` (from `>=24.1.0`)
65
+ - `tabulate>=0.10` (from `>=0.9.0`)
66
+ - pandas, numpy, openpyxl, python-dateutil, pytz: **unchanged**
67
+ - `[tool.black] target-version`, `[tool.ruff] target-version`, `[tool.mypy] python_version`: bump to `py311` / `3.11`.
68
+ - `version = "2.0.0"` (in the final release PR).
69
+ - `src/csv_editor/server.py`:
70
+ - `argparse --transport` choices: `["stdio", "http"]` (drop `"sse"`).
71
+ - `health_check` `version` field: `"2.0.0"`.
72
+ - No other code changes — the `FastMCP("CSV Editor")` constructor and `mcp.run(transport=..., host=..., port=...)` calls are already compatible with FastMCP 3.
73
+ - `README.md`: update Python badge, remove 3.8/3.9 claims, scrub SSE references in config examples.
74
+ - `Dockerfile`: already `python:3.11-slim`; bump to a pinned bookworm tag for reproducibility.
75
+ - `smithery.yaml`: verify Python version if specified; update.
76
+ - `MCP_CONFIG.md`: scrub SSE references.
77
+ - `CHANGELOG.md`: add a `[2.0.0]` section with `### BREAKING CHANGES`, `### Added`, `### Changed`, `### Removed`.
78
+ - `.github/workflows/test.yml`: **new file** (see PR 1).
79
+
80
+ ### Out-of-scope
81
+
82
+ - `pandas>=3.0` and `numpy>=2.4` (separate sub-project).
83
+ - Any of the five sub-projects 2–5.
84
+ - Docusaurus content changes beyond badge/README updates.
85
+ - Raising or lowering the `fail_under = 80` coverage gate.
86
+
87
+ ### Breaking-change surface for users
88
+
89
+ 1. Python < 3.11 no longer supported.
90
+ 2. `--transport sse` is no longer a valid CLI argument.
91
+ 3. `csv-editor` depends on `fastmcp>=3.2,<4`, which is a breaking change for any consumer importing FastMCP APIs transitively.
92
+
93
+ Users who pinned `csv-editor>=1,<2` are unaffected; 1.x remains on PyPI.
94
+
95
+ ## PR sequence
96
+
97
+ Four PRs to `main`, each green in CI before the next.
98
+
99
+ ### PR 1 — CI workflow baseline
100
+
101
+ **Single file:** `.github/workflows/test.yml`
102
+
103
+ ```yaml
104
+ name: test
105
+ on:
106
+ pull_request:
107
+ push:
108
+ branches: [main]
109
+ jobs:
110
+ pytest:
111
+ runs-on: ubuntu-latest
112
+ strategy:
113
+ fail-fast: false
114
+ matrix:
115
+ python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
116
+ steps:
117
+ - uses: actions/checkout@v4
118
+ - uses: actions/setup-python@v5
119
+ with:
120
+ python-version: ${{ matrix.python-version }}
121
+ - name: Install uv
122
+ uses: astral-sh/setup-uv@v4
123
+ - name: Sync deps
124
+ run: uv sync --all-extras
125
+ - name: Run tests
126
+ run: uv run pytest tests/ -v
127
+ ```
128
+
129
+ **Notes:**
130
+ - Matrix includes 3.10 so PR 1 can pass against current main (which declares `>=3.10`). PR 2 removes 3.10 from the matrix.
131
+ - No coverage gate on the initial workflow — the `pyproject.toml` `fail_under = 80` is advisory until we know the actual pass rate.
132
+ - Windows and macOS matrix axes are deferred to a follow-up issue; Ubuntu is sufficient for release gating.
133
+
134
+ **Acceptance:** runner exists, matrix runs, green or red is documented. If baseline is red, PR 1 still merges (the runner is the deliverable); a follow-up issue tracks the failures.
135
+
136
+ ### PR 2 — Python floor + non-breaking dep bumps
137
+
138
+ **Changes** (per "In-scope changes" above, excluding the FastMCP and version bumps):
139
+
140
+ - `pyproject.toml`: `requires-python`, classifiers, non-breaking dep bumps, tool target-versions.
141
+ - `.github/workflows/test.yml`: drop `"3.10"` from matrix.
142
+ - `README.md`: Python badge, remove 3.8/3.9 claims.
143
+ - `Dockerfile`: change `FROM python:3.11-slim` to `FROM python:3.11-slim-bookworm` for a pinned base image.
144
+ - `smithery.yaml`: verify any Python version or runtime declarations; update to 3.11 if present.
145
+ - `[tool.black]`, `[tool.ruff]`, `[tool.mypy]` `target-version`/`python_version`: bump to `py311`/`3.11`.
146
+
147
+ **Acceptance:** all four CI matrix rows (3.11/3.12/3.13/3.14) green; no test behavior changes.
148
+
149
+ **Risk mitigations:**
150
+ - If pydantic 2.13 union-serialization change breaks a tool return type, pin `pydantic<2.13` as a hotfix and open a follow-up issue. This is not a release blocker for v2.0.0.
151
+ - If pyarrow 23 wheels are missing for 3.14 on any platform, document in CHANGELOG.
152
+
153
+ ### PR 3 — FastMCP 3 migration + SSE removal
154
+
155
+ **Changes:**
156
+ - `pyproject.toml`: `fastmcp>=3.2,<4`.
157
+ - `src/csv_editor/server.py`:
158
+ - `--transport` argparse: `choices=["stdio", "http"]`, help text updated.
159
+ - No other code changes (verified against [FastMCP 3 upgrade guide](https://gofastmcp.com/getting-started/upgrading/from-fastmcp-2)).
160
+ - `README.md`, `MCP_CONFIG.md`: remove SSE mentions.
161
+ - `Dockerfile`: verify `CMD`/`ENTRYPOINT` doesn't reference SSE.
162
+ - **New file:** `tests/test_server_boot.py`:
163
+ - `test_server_imports_clean` — `import csv_editor.server` without errors.
164
+ - `test_tool_registry_populated` — after import, the `mcp` instance has ≥40 registered tools.
165
+ - `test_cli_rejects_sse_transport` — invoking `main` with `--transport sse` raises `SystemExit` from argparse.
166
+
167
+ **Acceptance:**
168
+ - CI matrix green on 3.11/3.12/3.13/3.14.
169
+ - Manual smoke test: `uv run csv-editor --transport stdio` responds to an MCP `tools/list` request; `--transport http --port 8765` boots and serves at `/mcp`.
170
+ - Claude Desktop smoke test (manual): the server loads end-to-end; `health_check` returns `status: "healthy"` and `load_csv` successfully loads a small fixture file.
171
+
172
+ ### PR 4 — v2.0.0 release cut
173
+
174
+ **Changes:**
175
+ - `pyproject.toml`: `version = "2.0.0"`.
176
+ - `CHANGELOG.md`: add `[2.0.0] - YYYY-MM-DD` section:
177
+ - `### BREAKING CHANGES`: Python floor 3.11; SSE transport removed; FastMCP 3 required.
178
+ - `### Added`: CI test workflow; `tests/test_server_boot.py`.
179
+ - `### Changed`: dependency bumps (list each with from → to).
180
+ - `### Removed`: `--transport sse` CLI option.
181
+ - `src/csv_editor/server.py` `health_check`: `"version": "2.0.0"`.
182
+ - Git tag `v2.0.0` pushed after merge; existing `publish.yml` handles PyPI publish.
183
+
184
+ **Acceptance:**
185
+ - Tag pushed; PyPI publish workflow green.
186
+ - `pip install csv-editor==2.0.0` on a fresh Python 3.11 venv succeeds.
187
+ - Smithery listing reflects 2.0.0 (manual refresh if needed).
188
+
189
+ ## Testing strategy
190
+
191
+ | PR | Tests |
192
+ |---|---|
193
+ | PR 1 | No new tests. Runner ships; baseline pass rate documented. |
194
+ | PR 2 | Existing tests must pass on all four Python versions. |
195
+ | PR 3 | Add `tests/test_server_boot.py` (three tests listed above). Manual smoke tests against stdio, http, and Claude Desktop. |
196
+ | PR 4 | No new tests. Full suite green on the release commit. |
197
+
198
+ **Coverage:** the `pyproject.toml` `fail_under = 80` gate is not enforced by PR 1's workflow (no `--cov` flag). Raising or lowering it is a follow-up issue.
199
+
200
+ **Local dev:** maintainer's `.venv` currently points at a removed conda interpreter. Fix instructions are added to `CONTRIBUTING.md` in PR 2:
201
+
202
+ ```bash
203
+ rm -rf .venv
204
+ uv sync --all-extras
205
+ ```
206
+
207
+ ## Risks & rollback
208
+
209
+ | Risk | Likelihood | Impact | Mitigation |
210
+ |---|---|---|---|
211
+ | pydantic 2.13 union-serialization regression | Low | Medium | CI catches in PR 2. Hotfix: pin `pydantic<2.13`. Not a v2.0.0 blocker. |
212
+ | pyarrow 23 wheel missing on 3.14 for some platform | Low | Low | Document in CHANGELOG; user can fall back to 3.13. |
213
+ | FastMCP 3 `run()` signature drift from our call site | Very Low | Medium | Verified against official docs; smoke tests catch any runtime break. |
214
+ | User on `--transport sse` hits v2.0.0 and breaks | Medium | Low | Major version signals it; CHANGELOG documents it. `csv-editor>=1,<2` users are unaffected. |
215
+ | Existing tests are red on current main | Unknown | Blocks PR 1 acceptance criteria only if treated as a block | PR 1 ships the runner regardless; broken tests get a follow-up issue. |
216
+ | Smithery/Glama listings out of sync post-release | Low | Cosmetic | Manually refresh after 2.0.0 publish. |
217
+
218
+ **Rollback per PR:**
219
+ - **PR 1:** revert the workflow file. Zero blast radius.
220
+ - **PR 2:** revert the pyproject commit. Users on current main are unaffected (no tag cut yet).
221
+ - **PR 3:** revert; independent of PR 2 because the FastMCP pin doesn't require newer pyarrow/pydantic.
222
+ - **PR 4:** PyPI is append-only. Critical bug → publish 2.0.1. Security-critical only → yank 2.0.0. `csv-editor>=1,<2` users unaffected either way.
223
+
224
+ ## Decision gates
225
+
226
+ - **After PR 1:** baseline test pass count is known and documented in an issue.
227
+ - **After PR 2:** CI matrix confirms new dep set works on 3.11/3.12/3.13/3.14.
228
+ - **After PR 3:** manual Claude Desktop smoke test confirms end-to-end functionality (highest-value manual check).
229
+ - **After PR 4:** `pip install csv-editor==2.0.0` on a clean 3.11 venv succeeds; Smithery listing refreshed.
230
+
231
+ ## Open questions
232
+
233
+ None at spec sign-off — all structural decisions locked via clarifying questions on 2026-04-19.
234
+
235
+ ## References
236
+
237
+ - [MCP Specification 2025-11-25](https://modelcontextprotocol.io/specification/2025-11-25)
238
+ - [FastMCP Upgrade Guide (2 → 3)](https://gofastmcp.com/getting-started/upgrading/from-fastmcp-2)
239
+ - [FastMCP Running Your Server](https://gofastmcp.com/deployment/running-server)
240
+ - [FastMCP Client Transports (SSE status)](https://gofastmcp.com/clients/transports)
241
+ - [FastMCP v3.2.4 release notes](https://github.com/PrefectHQ/fastmcp/releases/tag/v3.2.4)
242
+ - [pandas 3.0 What's New](https://pandas.pydata.org/docs/whatsnew/v3.0.0.html) (for Sub-project 1b)
243
+ - [PyArrow 23.0 release](https://arrow.apache.org/blog/2026/01/18/23.0.0-release/)
@@ -0,0 +1,8 @@
1
+ """CSV Editor - MCP server for comprehensive CSV operations."""
2
+
3
+ __version__ = "1.0.0"
4
+ __author__ = "Santosh Ray"
5
+
6
+ from .server import main, mcp
7
+
8
+ __all__ = ["main", "mcp"]
@@ -0,0 +1,39 @@
1
+ """Data models for CSV Editor MCP Server."""
2
+
3
+ from .csv_session import CSVSession, SessionManager, get_session_manager
4
+ from .data_models import (
5
+ AggregateFunction,
6
+ ColumnSchema,
7
+ ComparisonOperator,
8
+ DataQualityRule,
9
+ DataSchema,
10
+ DataStatistics,
11
+ DataType,
12
+ ExportFormat,
13
+ FilterCondition,
14
+ LogicalOperator,
15
+ OperationResult,
16
+ OperationType,
17
+ SessionInfo,
18
+ SortSpec,
19
+ )
20
+
21
+ __all__ = [
22
+ "AggregateFunction",
23
+ "CSVSession",
24
+ "ColumnSchema",
25
+ "ComparisonOperator",
26
+ "DataQualityRule",
27
+ "DataSchema",
28
+ "DataStatistics",
29
+ "DataType",
30
+ "ExportFormat",
31
+ "FilterCondition",
32
+ "LogicalOperator",
33
+ "OperationResult",
34
+ "OperationType",
35
+ "SessionInfo",
36
+ "SessionManager",
37
+ "SortSpec",
38
+ "get_session_manager",
39
+ ]
@@ -0,0 +1,246 @@
1
+ """Auto-save functionality for CSV sessions."""
2
+
3
+ import asyncio
4
+ import logging
5
+ import os
6
+ from datetime import datetime
7
+ from enum import Enum
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from ..models.data_models import ExportFormat
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class AutoSaveMode(str, Enum):
17
+ """Auto-save trigger modes."""
18
+
19
+ DISABLED = "disabled"
20
+ AFTER_OPERATION = "after_operation" # Save after each operation
21
+ PERIODIC = "periodic" # Save at regular intervals
22
+ HYBRID = "hybrid" # Both after operation and periodic
23
+
24
+
25
+ class AutoSaveStrategy(str, Enum):
26
+ """Auto-save file strategies."""
27
+
28
+ OVERWRITE = "overwrite" # Overwrite original file
29
+ BACKUP = "backup" # Create backup files with timestamp
30
+ VERSIONED = "versioned" # Keep numbered versions
31
+ CUSTOM = "custom" # Save to custom path
32
+
33
+
34
+ class AutoSaveConfig:
35
+ """Configuration for auto-save functionality."""
36
+
37
+ def __init__(
38
+ self,
39
+ enabled: bool = True, # Changed to True by default
40
+ mode: AutoSaveMode = AutoSaveMode.AFTER_OPERATION, # Changed to save after each operation
41
+ strategy: AutoSaveStrategy = AutoSaveStrategy.OVERWRITE, # Changed to overwrite same file
42
+ interval_seconds: int = 300, # 5 minutes default
43
+ max_backups: int = 10,
44
+ backup_dir: str | None = None,
45
+ custom_path: str | None = None,
46
+ format: ExportFormat = ExportFormat.CSV,
47
+ encoding: str = "utf-8",
48
+ ):
49
+ """Initialize auto-save configuration."""
50
+ self.enabled = enabled
51
+ self.mode = mode
52
+ self.strategy = strategy
53
+ self.interval_seconds = interval_seconds
54
+ self.max_backups = max_backups
55
+ self.backup_dir = backup_dir or os.path.join(os.getcwd(), ".csv_backups")
56
+ self.custom_path = custom_path
57
+ self.format = format
58
+ self.encoding = encoding
59
+
60
+ # Create backup directory if needed
61
+ if self.enabled and self.strategy in [AutoSaveStrategy.BACKUP, AutoSaveStrategy.VERSIONED]:
62
+ Path(self.backup_dir).mkdir(parents=True, exist_ok=True)
63
+
64
+ def to_dict(self) -> dict[str, Any]:
65
+ """Convert config to dictionary."""
66
+ return {
67
+ "enabled": self.enabled,
68
+ "mode": self.mode.value,
69
+ "strategy": self.strategy.value,
70
+ "interval_seconds": self.interval_seconds,
71
+ "max_backups": self.max_backups,
72
+ "backup_dir": self.backup_dir,
73
+ "custom_path": self.custom_path,
74
+ "format": self.format.value,
75
+ "encoding": self.encoding,
76
+ }
77
+
78
+ @classmethod
79
+ def from_dict(cls, data: dict[str, Any]) -> "AutoSaveConfig":
80
+ """Create config from dictionary."""
81
+ return cls(
82
+ enabled=data.get("enabled", False),
83
+ mode=AutoSaveMode(data.get("mode", "disabled")),
84
+ strategy=AutoSaveStrategy(data.get("strategy", "backup")),
85
+ interval_seconds=data.get("interval_seconds", 300),
86
+ max_backups=data.get("max_backups", 10),
87
+ backup_dir=data.get("backup_dir"),
88
+ custom_path=data.get("custom_path"),
89
+ format=ExportFormat(data.get("format", "csv")),
90
+ encoding=data.get("encoding", "utf-8"),
91
+ )
92
+
93
+
94
+ class AutoSaveManager:
95
+ """Manages auto-save operations for a CSV session."""
96
+
97
+ def __init__(
98
+ self, session_id: str, config: AutoSaveConfig, original_file_path: str | None = None
99
+ ):
100
+ """Initialize auto-save manager."""
101
+ self.session_id = session_id
102
+ self.config = config
103
+ self.original_file_path = original_file_path
104
+ self.last_save = datetime.utcnow()
105
+ self.save_count = 0
106
+ self.periodic_task: asyncio.Task | None = None
107
+ self._lock = asyncio.Lock()
108
+
109
+ async def start_periodic_save(self, save_callback):
110
+ """Start periodic auto-save task."""
111
+ if self.config.mode in [AutoSaveMode.PERIODIC, AutoSaveMode.HYBRID]:
112
+ self.periodic_task = asyncio.create_task(self._periodic_save_loop(save_callback))
113
+ logger.info(f"Started periodic auto-save for session {self.session_id}")
114
+
115
+ async def stop_periodic_save(self):
116
+ """Stop periodic auto-save task."""
117
+ if self.periodic_task:
118
+ self.periodic_task.cancel()
119
+ try:
120
+ await self.periodic_task
121
+ except asyncio.CancelledError:
122
+ pass
123
+ self.periodic_task = None
124
+ logger.info(f"Stopped periodic auto-save for session {self.session_id}")
125
+
126
+ async def _periodic_save_loop(self, save_callback):
127
+ """Periodic save loop."""
128
+ while True:
129
+ try:
130
+ await asyncio.sleep(self.config.interval_seconds)
131
+ await self.trigger_save(save_callback, "periodic")
132
+ except asyncio.CancelledError:
133
+ break
134
+ except Exception as e:
135
+ logger.error(f"Error in periodic save: {e!s}")
136
+
137
+ async def trigger_save(self, save_callback, trigger: str = "manual") -> dict[str, Any]:
138
+ """Trigger an auto-save operation."""
139
+ async with self._lock:
140
+ try:
141
+ # Determine save path based on strategy
142
+ save_path = self._get_save_path()
143
+
144
+ # Perform the save
145
+ result = await save_callback(save_path, self.config.format, self.config.encoding)
146
+
147
+ if result.get("success"):
148
+ self.last_save = datetime.utcnow()
149
+ self.save_count += 1
150
+
151
+ # Clean up old backups if needed
152
+ if self.config.strategy in [
153
+ AutoSaveStrategy.BACKUP,
154
+ AutoSaveStrategy.VERSIONED,
155
+ ]:
156
+ await self._cleanup_old_backups()
157
+
158
+ logger.info(
159
+ f"Auto-save successful for session {self.session_id} (trigger: {trigger})"
160
+ )
161
+
162
+ return {
163
+ "success": True,
164
+ "save_path": save_path,
165
+ "trigger": trigger,
166
+ "save_count": self.save_count,
167
+ "timestamp": self.last_save.isoformat(),
168
+ }
169
+ else:
170
+ logger.error(
171
+ f"Auto-save failed for session {self.session_id}: {result.get('error')}"
172
+ )
173
+ return {"success": False, "error": result.get("error"), "trigger": trigger}
174
+
175
+ except Exception as e:
176
+ logger.error(f"Auto-save error for session {self.session_id}: {e!s}")
177
+ return {"success": False, "error": str(e), "trigger": trigger}
178
+
179
+ def _get_save_path(self) -> str:
180
+ """Determine the save path based on strategy."""
181
+ if self.config.strategy == AutoSaveStrategy.CUSTOM:
182
+ return self.config.custom_path or f"session_{self.session_id}.csv"
183
+
184
+ elif self.config.strategy == AutoSaveStrategy.OVERWRITE:
185
+ # Use the original file path if available, otherwise fall back
186
+ if self.original_file_path:
187
+ return self.original_file_path
188
+ return f"session_{self.session_id}_autosave.csv"
189
+
190
+ elif self.config.strategy == AutoSaveStrategy.BACKUP:
191
+ timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
192
+ filename = f"backup_{self.session_id}_{timestamp}.{self.config.format.value}"
193
+ return os.path.join(self.config.backup_dir, filename)
194
+
195
+ elif self.config.strategy == AutoSaveStrategy.VERSIONED:
196
+ version = self.save_count + 1
197
+ filename = f"version_{self.session_id}_v{version:04d}.{self.config.format.value}"
198
+ return os.path.join(self.config.backup_dir, filename)
199
+
200
+ else:
201
+ return f"session_{self.session_id}.{self.config.format.value}"
202
+
203
+ async def _cleanup_old_backups(self):
204
+ """Remove old backup files beyond max_backups limit."""
205
+ if not os.path.exists(self.config.backup_dir):
206
+ return
207
+
208
+ try:
209
+ # List all backup files for this session
210
+ backup_pattern = f"*{self.session_id}*"
211
+ backup_files = []
212
+
213
+ for file_path in Path(self.config.backup_dir).glob(backup_pattern):
214
+ if file_path.is_file():
215
+ backup_files.append({"path": file_path, "mtime": file_path.stat().st_mtime})
216
+
217
+ # Sort by modification time (oldest first)
218
+ backup_files.sort(key=lambda x: x["mtime"])
219
+
220
+ # Remove excess backups
221
+ while len(backup_files) > self.config.max_backups:
222
+ oldest = backup_files.pop(0)
223
+ oldest["path"].unlink()
224
+ logger.info(f"Removed old backup: {oldest['path']}")
225
+
226
+ except Exception as e:
227
+ logger.error(f"Error cleaning up backups: {e!s}")
228
+
229
+ def should_save_after_operation(self) -> bool:
230
+ """Check if auto-save should trigger after an operation."""
231
+ return self.config.enabled and self.config.mode in [
232
+ AutoSaveMode.AFTER_OPERATION,
233
+ AutoSaveMode.HYBRID,
234
+ ]
235
+
236
+ def get_status(self) -> dict[str, Any]:
237
+ """Get auto-save status."""
238
+ return {
239
+ "enabled": self.config.enabled,
240
+ "mode": self.config.mode.value,
241
+ "strategy": self.config.strategy.value,
242
+ "last_save": self.last_save.isoformat() if self.last_save else None,
243
+ "save_count": self.save_count,
244
+ "periodic_active": self.periodic_task is not None and not self.periodic_task.done(),
245
+ "config": self.config.to_dict(),
246
+ }