@mseep/csv-editor 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.md +53 -0
  2. package/.github/ISSUE_TEMPLATE/feature_request.md +38 -0
  3. package/.github/workflows/deploy-docs.yml +62 -0
  4. package/.github/workflows/publish-github.yml +52 -0
  5. package/.github/workflows/publish.yml +44 -0
  6. package/.github/workflows/test.yml +32 -0
  7. package/.pre-commit-config.yaml +157 -0
  8. package/ALTERNATIVE_PUBLISHING.md +175 -0
  9. package/ARCHITECTURE.md +1011 -0
  10. package/CHANGELOG.md +99 -0
  11. package/CODE_OF_CONDUCT.md +41 -0
  12. package/CONTRIBUTING.md +427 -0
  13. package/Dockerfile +22 -0
  14. package/LICENSE +21 -0
  15. package/MCP_CONFIG.md +505 -0
  16. package/PUBLISHING.md +210 -0
  17. package/README.md +400 -0
  18. package/SECURITY.md +61 -0
  19. package/docs/README.md +41 -0
  20. package/docs/blog/2019-05-28-first-blog-post.md +12 -0
  21. package/docs/blog/2019-05-29-long-blog-post.md +44 -0
  22. package/docs/blog/2021-08-01-mdx-blog-post.mdx +24 -0
  23. package/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
  24. package/docs/blog/2021-08-26-welcome/index.md +29 -0
  25. package/docs/blog/authors.yml +25 -0
  26. package/docs/blog/tags.yml +19 -0
  27. package/docs/docs/api/overview.md +183 -0
  28. package/docs/docs/installation.md +252 -0
  29. package/docs/docs/intro.md +87 -0
  30. package/docs/docs/tutorial-basics/_category_.json +8 -0
  31. package/docs/docs/tutorial-basics/congratulations.md +23 -0
  32. package/docs/docs/tutorial-basics/create-a-blog-post.md +34 -0
  33. package/docs/docs/tutorial-basics/create-a-document.md +57 -0
  34. package/docs/docs/tutorial-basics/create-a-page.md +43 -0
  35. package/docs/docs/tutorial-basics/deploy-your-site.md +31 -0
  36. package/docs/docs/tutorial-basics/markdown-features.mdx +152 -0
  37. package/docs/docs/tutorial-extras/_category_.json +7 -0
  38. package/docs/docs/tutorial-extras/img/docsVersionDropdown.png +0 -0
  39. package/docs/docs/tutorial-extras/img/localeDropdown.png +0 -0
  40. package/docs/docs/tutorial-extras/manage-docs-versions.md +55 -0
  41. package/docs/docs/tutorial-extras/translate-your-site.md +88 -0
  42. package/docs/docs/tutorials/quickstart.md +365 -0
  43. package/docs/docusaurus.config.ts +163 -0
  44. package/docs/package-lock.json +17493 -0
  45. package/docs/package.json +48 -0
  46. package/docs/sidebars.ts +33 -0
  47. package/docs/src/components/HomepageFeatures/index.tsx +71 -0
  48. package/docs/src/components/HomepageFeatures/styles.module.css +11 -0
  49. package/docs/src/css/custom.css +30 -0
  50. package/docs/src/pages/index.module.css +23 -0
  51. package/docs/src/pages/index.tsx +44 -0
  52. package/docs/src/pages/markdown-page.md +7 -0
  53. package/docs/static/.nojekyll +0 -0
  54. package/docs/static/img/docusaurus-social-card.jpg +0 -0
  55. package/docs/static/img/docusaurus.png +0 -0
  56. package/docs/static/img/favicon.ico +0 -0
  57. package/docs/static/img/logo.svg +1 -0
  58. package/docs/static/img/undraw_docusaurus_mountain.svg +171 -0
  59. package/docs/static/img/undraw_docusaurus_react.svg +170 -0
  60. package/docs/static/img/undraw_docusaurus_tree.svg +40 -0
  61. package/docs/tsconfig.json +8 -0
  62. package/examples/README.md +48 -0
  63. package/examples/auto_save_demo.py +206 -0
  64. package/examples/auto_save_overwrite.py +201 -0
  65. package/examples/basic_usage.py +135 -0
  66. package/examples/demo.py +139 -0
  67. package/examples/history_demo.py +317 -0
  68. package/examples/test_default_autosave.py +124 -0
  69. package/examples/update_consignee_example.py +179 -0
  70. package/package.json +51 -0
  71. package/plans/2026-04-19-fastmcp3-migration-plan.md +1045 -0
  72. package/pyproject.toml +331 -0
  73. package/requirements-dev.txt +30 -0
  74. package/requirements.txt +22 -0
  75. package/scripts/publish.py +67 -0
  76. package/smithery.yaml +15 -0
  77. package/specs/2026-04-19-fastmcp3-migration-design.md +243 -0
  78. package/src/csv_editor/__init__.py +8 -0
  79. package/src/csv_editor/models/__init__.py +39 -0
  80. package/src/csv_editor/models/auto_save.py +246 -0
  81. package/src/csv_editor/models/csv_session.py +468 -0
  82. package/src/csv_editor/models/data_models.py +244 -0
  83. package/src/csv_editor/models/history_manager.py +456 -0
  84. package/src/csv_editor/prompts/__init__.py +0 -0
  85. package/src/csv_editor/prompts/data_prompts.py +13 -0
  86. package/src/csv_editor/resources/__init__.py +0 -0
  87. package/src/csv_editor/resources/csv_resources.py +22 -0
  88. package/src/csv_editor/server.py +640 -0
  89. package/src/csv_editor/tools/__init__.py +5 -0
  90. package/src/csv_editor/tools/analytics.py +700 -0
  91. package/src/csv_editor/tools/auto_save_operations.py +235 -0
  92. package/src/csv_editor/tools/data_operations.py +3 -0
  93. package/src/csv_editor/tools/history_operations.py +315 -0
  94. package/src/csv_editor/tools/io_operations.py +431 -0
  95. package/src/csv_editor/tools/transformations.py +663 -0
  96. package/src/csv_editor/tools/validation.py +822 -0
  97. package/src/csv_editor/utils/__init__.py +0 -0
  98. package/src/csv_editor/utils/validators.py +205 -0
  99. package/tests/README.md +65 -0
  100. package/tests/__init__.py +7 -0
  101. package/tests/conftest.py +50 -0
  102. package/tests/test_auto_save.py +378 -0
  103. package/tests/test_basic.py +103 -0
  104. package/tests/test_integration.py +356 -0
  105. package/tests/test_server_boot.py +50 -0
  106. package/tests/test_settings.py +184 -0
@@ -0,0 +1,244 @@
1
+ """Data models for CSV Editor MCP Server."""
2
+
3
+ from datetime import datetime
4
+ from enum import Enum
5
+ from typing import Any, Literal
6
+
7
+ import pandas as pd
8
+ from pydantic import BaseModel, Field, field_validator
9
+
10
+
11
+ class DataType(str, Enum):
12
+ """Supported data types for columns."""
13
+
14
+ INTEGER = "integer"
15
+ FLOAT = "float"
16
+ STRING = "string"
17
+ DATETIME = "datetime"
18
+ BOOLEAN = "boolean"
19
+ MIXED = "mixed"
20
+
21
+
22
+ class OperationType(str, Enum):
23
+ """Types of operations that can be performed."""
24
+
25
+ LOAD = "load"
26
+ FILTER = "filter"
27
+ SORT = "sort"
28
+ TRANSFORM = "transform"
29
+ AGGREGATE = "aggregate"
30
+ EXPORT = "export"
31
+ ANALYZE = "analyze"
32
+ UPDATE_COLUMN = "update_column"
33
+ ADD_COLUMN = "add_column"
34
+ REMOVE_COLUMN = "remove_column"
35
+ RENAME = "rename"
36
+ SELECT = "select"
37
+ CHANGE_TYPE = "change_type"
38
+ FILL_MISSING = "fill_missing"
39
+ REMOVE_DUPLICATES = "remove_duplicates"
40
+ GROUP_BY = "group_by"
41
+ VALIDATE = "validate"
42
+ PROFILE = "profile"
43
+ QUALITY_CHECK = "quality_check"
44
+ ANOMALY_DETECTION = "anomaly_detection"
45
+
46
+
47
+ class ComparisonOperator(str, Enum):
48
+ """Comparison operators for filtering."""
49
+
50
+ EQUALS = "="
51
+ NOT_EQUALS = "!="
52
+ GREATER_THAN = ">"
53
+ LESS_THAN = "<"
54
+ GREATER_THAN_OR_EQUALS = ">="
55
+ LESS_THAN_OR_EQUALS = "<="
56
+ CONTAINS = "contains"
57
+ NOT_CONTAINS = "not_contains"
58
+ STARTS_WITH = "starts_with"
59
+ ENDS_WITH = "ends_with"
60
+ IN = "in"
61
+ NOT_IN = "not_in"
62
+ IS_NULL = "is_null"
63
+ IS_NOT_NULL = "is_not_null"
64
+
65
+
66
+ class LogicalOperator(str, Enum):
67
+ """Logical operators for combining conditions."""
68
+
69
+ AND = "AND"
70
+ OR = "OR"
71
+ NOT = "NOT"
72
+
73
+
74
+ class AggregateFunction(str, Enum):
75
+ """Aggregate functions for data analysis."""
76
+
77
+ SUM = "sum"
78
+ MEAN = "mean"
79
+ MEDIAN = "median"
80
+ MIN = "min"
81
+ MAX = "max"
82
+ COUNT = "count"
83
+ COUNT_DISTINCT = "count_distinct"
84
+ STD = "std"
85
+ VAR = "var"
86
+ FIRST = "first"
87
+ LAST = "last"
88
+
89
+
90
+ class ExportFormat(str, Enum):
91
+ """Supported export formats."""
92
+
93
+ CSV = "csv"
94
+ TSV = "tsv"
95
+ JSON = "json"
96
+ EXCEL = "excel"
97
+ PARQUET = "parquet"
98
+ HTML = "html"
99
+ MARKDOWN = "markdown"
100
+
101
+
102
+ class FilterCondition(BaseModel):
103
+ """A single filter condition."""
104
+
105
+ column: str = Field(..., description="Column name to filter on")
106
+ operator: ComparisonOperator = Field(..., description="Comparison operator")
107
+ value: Any = Field(None, description="Value to compare against")
108
+
109
+ @field_validator("value", mode="before")
110
+ def validate_value(cls, v, info):
111
+ """Validate value based on operator."""
112
+ operator = info.data.get("operator") if hasattr(info, "data") else None
113
+ if operator in [ComparisonOperator.IS_NULL, ComparisonOperator.IS_NOT_NULL]:
114
+ return None
115
+ if operator in [ComparisonOperator.IN, ComparisonOperator.NOT_IN]:
116
+ if not isinstance(v, list):
117
+ return [v]
118
+ return v
119
+
120
+
121
+ class SortSpec(BaseModel):
122
+ """Specification for sorting data."""
123
+
124
+ column: str = Field(..., description="Column to sort by")
125
+ ascending: bool = Field(True, description="Sort in ascending order")
126
+
127
+
128
+ class ColumnSchema(BaseModel):
129
+ """Schema definition for a column."""
130
+
131
+ name: str = Field(..., description="Column name")
132
+ dtype: DataType = Field(..., description="Data type")
133
+ nullable: bool = Field(True, description="Whether column can contain null values")
134
+ unique: bool = Field(False, description="Whether values must be unique")
135
+ min_value: float | int | str | None = Field(None, description="Minimum value")
136
+ max_value: float | int | str | None = Field(None, description="Maximum value")
137
+ allowed_values: list[Any] | None = Field(None, description="List of allowed values")
138
+ pattern: str | None = Field(None, description="Regex pattern for validation")
139
+
140
+
141
+ class DataSchema(BaseModel):
142
+ """Complete schema for a dataset."""
143
+
144
+ columns: list[ColumnSchema] = Field(..., description="Column definitions")
145
+ row_count: int | None = Field(None, description="Expected number of rows")
146
+ primary_key: list[str] | None = Field(None, description="Primary key columns")
147
+
148
+ def validate_dataframe(self, df: pd.DataFrame) -> dict[str, Any]:
149
+ """Validate a DataFrame against this schema."""
150
+ errors = []
151
+ warnings = []
152
+
153
+ # Check columns
154
+ expected_cols = {col.name for col in self.columns}
155
+ actual_cols = set(df.columns)
156
+
157
+ missing_cols = expected_cols - actual_cols
158
+ extra_cols = actual_cols - expected_cols
159
+
160
+ if missing_cols:
161
+ errors.append(f"Missing columns: {missing_cols}")
162
+ if extra_cols:
163
+ warnings.append(f"Extra columns: {extra_cols}")
164
+
165
+ # Validate each column
166
+ for col_schema in self.columns:
167
+ if col_schema.name not in df.columns:
168
+ continue
169
+
170
+ col_data = df[col_schema.name]
171
+
172
+ # Check nullability
173
+ if not col_schema.nullable and col_data.isnull().any():
174
+ errors.append(f"Column {col_schema.name} contains null values")
175
+
176
+ # Check uniqueness
177
+ if col_schema.unique and col_data.duplicated().any():
178
+ errors.append(f"Column {col_schema.name} contains duplicate values")
179
+
180
+ # Check allowed values
181
+ if col_schema.allowed_values:
182
+ invalid = ~col_data.isin(col_schema.allowed_values)
183
+ if invalid.any():
184
+ errors.append(f"Column {col_schema.name} contains invalid values")
185
+
186
+ return {"valid": len(errors) == 0, "errors": errors, "warnings": warnings}
187
+
188
+
189
+ class DataQualityRule(BaseModel):
190
+ """A data quality rule to check."""
191
+
192
+ name: str = Field(..., description="Rule name")
193
+ description: str = Field(..., description="Rule description")
194
+ column: str | None = Field(None, description="Column to check (if applicable)")
195
+ rule_type: Literal["completeness", "uniqueness", "validity", "consistency", "accuracy"] = Field(
196
+ ..., description="Type of quality check"
197
+ )
198
+ expression: str | None = Field(None, description="Expression to evaluate")
199
+ threshold: float | None = Field(None, description="Threshold for pass/fail")
200
+
201
+
202
+ class OperationResult(BaseModel):
203
+ """Result of a data operation."""
204
+
205
+ success: bool = Field(..., description="Whether operation succeeded")
206
+ message: str = Field(..., description="Result message")
207
+ session_id: str | None = Field(None, description="Session ID")
208
+ rows_affected: int | None = Field(None, description="Number of rows affected")
209
+ columns_affected: list[str] | None = Field(None, description="Columns affected")
210
+ data: dict[str, Any] | None = Field(None, description="Additional result data")
211
+ error: str | None = Field(None, description="Error message if failed")
212
+ warnings: list[str] | None = Field(None, description="Warning messages")
213
+
214
+
215
+ class SessionInfo(BaseModel):
216
+ """Information about a data session."""
217
+
218
+ session_id: str = Field(..., description="Unique session identifier")
219
+ created_at: datetime = Field(..., description="Session creation time")
220
+ last_accessed: datetime = Field(..., description="Last access time")
221
+ row_count: int = Field(..., description="Number of rows in dataset")
222
+ column_count: int = Field(..., description="Number of columns")
223
+ columns: list[str] = Field(..., description="Column names")
224
+ memory_usage_mb: float = Field(..., description="Memory usage in MB")
225
+ operations_count: int = Field(0, description="Number of operations performed")
226
+ file_path: str | None = Field(None, description="Source file path")
227
+
228
+
229
+ class DataStatistics(BaseModel):
230
+ """Statistical summary of data."""
231
+
232
+ column: str = Field(..., description="Column name")
233
+ dtype: str = Field(..., description="Data type")
234
+ count: int = Field(..., description="Non-null count")
235
+ null_count: int = Field(..., description="Null count")
236
+ unique_count: int = Field(..., description="Unique value count")
237
+ mean: float | None = Field(None, description="Mean (numeric only)")
238
+ std: float | None = Field(None, description="Standard deviation (numeric only)")
239
+ min: Any | None = Field(None, description="Minimum value")
240
+ max: Any | None = Field(None, description="Maximum value")
241
+ q25: float | None = Field(None, description="25th percentile (numeric only)")
242
+ q50: float | None = Field(None, description="50th percentile (numeric only)")
243
+ q75: float | None = Field(None, description="75th percentile (numeric only)")
244
+ top_values: dict[str, int] | None = Field(None, description="Top 10 most frequent values")
@@ -0,0 +1,456 @@
1
+ """History management for CSV operations with persistence and undo/redo capabilities."""
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import pickle
7
+ from datetime import datetime
8
+ from enum import Enum
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ import pandas as pd
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class HistoryStorage(str, Enum):
18
+ """History storage strategies."""
19
+
20
+ MEMORY = "memory" # In-memory only (lost on session end)
21
+ JSON = "json" # Save as JSON file
22
+ PICKLE = "pickle" # Save as pickle (preserves DataFrames)
23
+ SQLITE = "sqlite" # Save in SQLite database (future)
24
+
25
+
26
+ class OperationHistory:
27
+ """Represents a single operation in history."""
28
+
29
+ def __init__(
30
+ self,
31
+ operation_id: str,
32
+ operation_type: str,
33
+ timestamp: datetime,
34
+ details: dict[str, Any],
35
+ data_snapshot: pd.DataFrame | None = None,
36
+ metadata: dict[str, Any] | None = None,
37
+ ):
38
+ """Initialize operation history entry."""
39
+ self.operation_id = operation_id
40
+ self.operation_type = operation_type
41
+ self.timestamp = timestamp
42
+ self.details = details
43
+ self.data_snapshot = data_snapshot
44
+ self.metadata = metadata or {}
45
+
46
+ def to_dict(self) -> dict[str, Any]:
47
+ """Convert to dictionary for serialization."""
48
+ return {
49
+ "operation_id": self.operation_id,
50
+ "operation_type": self.operation_type,
51
+ "timestamp": self.timestamp.isoformat(),
52
+ "details": self.details,
53
+ "metadata": self.metadata,
54
+ "has_snapshot": self.data_snapshot is not None,
55
+ }
56
+
57
+ @classmethod
58
+ def from_dict(
59
+ cls, data: dict[str, Any], data_snapshot: pd.DataFrame | None = None
60
+ ) -> "OperationHistory":
61
+ """Create from dictionary."""
62
+ return cls(
63
+ operation_id=data["operation_id"],
64
+ operation_type=data["operation_type"],
65
+ timestamp=datetime.fromisoformat(data["timestamp"]),
66
+ details=data["details"],
67
+ data_snapshot=data_snapshot,
68
+ metadata=data.get("metadata", {}),
69
+ )
70
+
71
+
72
+ class HistoryManager:
73
+ """Manages operation history with persistence and undo/redo capabilities."""
74
+
75
+ def __init__(
76
+ self,
77
+ session_id: str,
78
+ storage_type: HistoryStorage = HistoryStorage.MEMORY,
79
+ history_dir: str | None = None,
80
+ max_history: int = 100,
81
+ enable_snapshots: bool = True,
82
+ snapshot_interval: int = 5, # Take snapshot every N operations
83
+ ):
84
+ """Initialize history manager."""
85
+ self.session_id = session_id
86
+ self.storage_type = storage_type
87
+ self.history_dir = history_dir or os.path.join(os.getcwd(), ".csv_history")
88
+ self.max_history = max_history
89
+ self.enable_snapshots = enable_snapshots
90
+ self.snapshot_interval = snapshot_interval
91
+
92
+ # History tracking
93
+ self.history: list[OperationHistory] = []
94
+ self.current_index = -1 # Points to current position in history
95
+ self.redo_stack: list[OperationHistory] = [] # For redo functionality
96
+
97
+ # Create history directory if needed
98
+ if storage_type != HistoryStorage.MEMORY:
99
+ Path(self.history_dir).mkdir(parents=True, exist_ok=True)
100
+ self._load_history()
101
+
102
+ def _get_history_file_path(self, extension: str = "json") -> str:
103
+ """Get the path for history file."""
104
+ return os.path.join(self.history_dir, f"history_{self.session_id}.{extension}")
105
+
106
+ def _get_snapshot_file_path(self, operation_id: str) -> str:
107
+ """Get the path for snapshot file."""
108
+ snapshot_dir = os.path.join(self.history_dir, "snapshots", self.session_id)
109
+ Path(snapshot_dir).mkdir(parents=True, exist_ok=True)
110
+ return os.path.join(snapshot_dir, f"snapshot_{operation_id}.pkl")
111
+
112
+ def _load_history(self):
113
+ """Load history from persistent storage."""
114
+ try:
115
+ if self.storage_type == HistoryStorage.JSON:
116
+ history_file = self._get_history_file_path("json")
117
+ if os.path.exists(history_file):
118
+ with open(history_file) as f:
119
+ data = json.load(f)
120
+ for entry in data.get("history", []):
121
+ # Load snapshot if available
122
+ snapshot = None
123
+ if entry.get("has_snapshot"):
124
+ snapshot_file = self._get_snapshot_file_path(entry["operation_id"])
125
+ if os.path.exists(snapshot_file):
126
+ with open(snapshot_file, "rb") as sf:
127
+ snapshot = pickle.load(sf)
128
+
129
+ self.history.append(OperationHistory.from_dict(entry, snapshot))
130
+
131
+ self.current_index = data.get("current_index", -1)
132
+ logger.info(
133
+ f"Loaded {len(self.history)} history entries for session {self.session_id}"
134
+ )
135
+
136
+ elif self.storage_type == HistoryStorage.PICKLE:
137
+ history_file = self._get_history_file_path("pkl")
138
+ if os.path.exists(history_file):
139
+ with open(history_file, "rb") as f:
140
+ data = pickle.load(f)
141
+ self.history = data.get("history", [])
142
+ self.current_index = data.get("current_index", -1)
143
+ logger.info(
144
+ f"Loaded {len(self.history)} history entries for session {self.session_id}"
145
+ )
146
+
147
+ except Exception as e:
148
+ logger.error(f"Error loading history: {e!s}")
149
+
150
+ def _save_history(self):
151
+ """Save history to persistent storage."""
152
+ try:
153
+ if self.storage_type == HistoryStorage.JSON:
154
+ history_file = self._get_history_file_path("json")
155
+ data = {
156
+ "session_id": self.session_id,
157
+ "history": [h.to_dict() for h in self.history],
158
+ "current_index": self.current_index,
159
+ "timestamp": datetime.utcnow().isoformat(),
160
+ }
161
+
162
+ with open(history_file, "w") as f:
163
+ json.dump(data, f, indent=2)
164
+
165
+ # Save snapshots separately
166
+ for entry in self.history:
167
+ if entry.data_snapshot is not None:
168
+ snapshot_file = self._get_snapshot_file_path(entry.operation_id)
169
+ with open(snapshot_file, "wb") as sf:
170
+ pickle.dump(entry.data_snapshot, sf)
171
+
172
+ elif self.storage_type == HistoryStorage.PICKLE:
173
+ history_file = self._get_history_file_path("pkl")
174
+ data = {
175
+ "session_id": self.session_id,
176
+ "history": self.history,
177
+ "current_index": self.current_index,
178
+ "timestamp": datetime.utcnow(),
179
+ }
180
+
181
+ with open(history_file, "wb") as f:
182
+ pickle.dump(data, f)
183
+
184
+ logger.debug(f"Saved {len(self.history)} history entries for session {self.session_id}")
185
+
186
+ except Exception as e:
187
+ logger.error(f"Error saving history: {e!s}")
188
+
189
+ def add_operation(
190
+ self,
191
+ operation_type: str,
192
+ details: dict[str, Any],
193
+ current_data: pd.DataFrame | None = None,
194
+ metadata: dict[str, Any] | None = None,
195
+ ) -> str:
196
+ """Add a new operation to history."""
197
+ # Clear redo stack when new operation is added
198
+ self.redo_stack.clear()
199
+
200
+ # Remove operations after current index (for undo/redo consistency)
201
+ if self.current_index < len(self.history) - 1:
202
+ self.history = self.history[: self.current_index + 1]
203
+
204
+ # Generate operation ID
205
+ operation_id = f"{self.session_id}_{datetime.utcnow().strftime('%Y%m%d_%H%M%S_%f')}"
206
+
207
+ # Determine if we should take a snapshot
208
+ take_snapshot = (
209
+ self.enable_snapshots
210
+ and current_data is not None
211
+ and (len(self.history) % self.snapshot_interval == 0 or len(self.history) == 0)
212
+ )
213
+
214
+ # Create operation entry
215
+ operation = OperationHistory(
216
+ operation_id=operation_id,
217
+ operation_type=operation_type,
218
+ timestamp=datetime.utcnow(),
219
+ details=details,
220
+ data_snapshot=current_data.copy() if take_snapshot else None,
221
+ metadata=metadata,
222
+ )
223
+
224
+ # Add to history
225
+ self.history.append(operation)
226
+ self.current_index += 1
227
+
228
+ # Trim history if exceeds max
229
+ if len(self.history) > self.max_history:
230
+ removed = self.history.pop(0)
231
+ self.current_index -= 1
232
+
233
+ # Clean up old snapshot file if exists
234
+ if removed.data_snapshot is not None and self.storage_type != HistoryStorage.MEMORY:
235
+ snapshot_file = self._get_snapshot_file_path(removed.operation_id)
236
+ if os.path.exists(snapshot_file):
237
+ os.remove(snapshot_file)
238
+
239
+ # Save to persistent storage
240
+ if self.storage_type != HistoryStorage.MEMORY:
241
+ self._save_history()
242
+
243
+ logger.info(f"Added operation {operation_id}: {operation_type}")
244
+ return operation_id
245
+
246
+ def can_undo(self) -> bool:
247
+ """Check if undo is possible."""
248
+ return self.current_index >= 0
249
+
250
+ def can_redo(self) -> bool:
251
+ """Check if redo is possible."""
252
+ return len(self.redo_stack) > 0
253
+
254
+ def undo(self) -> tuple[OperationHistory | None, pd.DataFrame | None]:
255
+ """Undo the last operation and return the previous state."""
256
+ if not self.can_undo():
257
+ return None, None
258
+
259
+ # Move current operation to redo stack
260
+ current_op = self.history[self.current_index]
261
+ self.redo_stack.append(current_op)
262
+
263
+ # Move index back
264
+ self.current_index -= 1
265
+
266
+ # Find the most recent snapshot before current position
267
+ snapshot = None
268
+ for i in range(self.current_index, -1, -1):
269
+ if self.history[i].data_snapshot is not None:
270
+ snapshot = self.history[i].data_snapshot.copy()
271
+ break
272
+
273
+ # Save state
274
+ if self.storage_type != HistoryStorage.MEMORY:
275
+ self._save_history()
276
+
277
+ logger.info(f"Undid operation: {current_op.operation_type}")
278
+
279
+ # Return the operation that was undone and the data to restore
280
+ return current_op, snapshot
281
+
282
+ def redo(self) -> tuple[OperationHistory | None, pd.DataFrame | None]:
283
+ """Redo the previously undone operation."""
284
+ if not self.can_redo():
285
+ return None, None
286
+
287
+ # Get operation from redo stack
288
+ operation = self.redo_stack.pop()
289
+
290
+ # Move index forward
291
+ self.current_index += 1
292
+
293
+ # Get the snapshot at this position if available
294
+ snapshot = None
295
+ if self.current_index < len(self.history):
296
+ snapshot = self.history[self.current_index].data_snapshot
297
+ if snapshot is not None:
298
+ snapshot = snapshot.copy()
299
+
300
+ # Save state
301
+ if self.storage_type != HistoryStorage.MEMORY:
302
+ self._save_history()
303
+
304
+ logger.info(f"Redid operation: {operation.operation_type}")
305
+
306
+ return operation, snapshot
307
+
308
+ def get_history(self, limit: int | None = None) -> list[dict[str, Any]]:
309
+ """Get operation history."""
310
+ history_list = []
311
+
312
+ start = 0 if limit is None else max(0, len(self.history) - limit)
313
+
314
+ for i, entry in enumerate(self.history[start:], start=start):
315
+ history_dict = entry.to_dict()
316
+ history_dict["index"] = i
317
+ history_dict["is_current"] = i == self.current_index
318
+ history_dict["can_restore"] = entry.data_snapshot is not None
319
+ history_list.append(history_dict)
320
+
321
+ return history_list
322
+
323
+ def get_operation(self, operation_id: str) -> OperationHistory | None:
324
+ """Get a specific operation by ID."""
325
+ for entry in self.history:
326
+ if entry.operation_id == operation_id:
327
+ return entry
328
+ return None
329
+
330
+ def restore_to_operation(self, operation_id: str) -> pd.DataFrame | None:
331
+ """Restore data to a specific operation point."""
332
+ # Find the operation
333
+ target_index = None
334
+ for i, entry in enumerate(self.history):
335
+ if entry.operation_id == operation_id:
336
+ target_index = i
337
+ break
338
+
339
+ if target_index is None:
340
+ logger.error(f"Operation {operation_id} not found")
341
+ return None
342
+
343
+ # Find the nearest snapshot at or before target
344
+ snapshot = None
345
+ for i in range(target_index, -1, -1):
346
+ if self.history[i].data_snapshot is not None:
347
+ snapshot = self.history[i].data_snapshot.copy()
348
+ self.current_index = target_index
349
+
350
+ # Clear redo stack since we're jumping to a specific point
351
+ self.redo_stack.clear()
352
+
353
+ # Save state
354
+ if self.storage_type != HistoryStorage.MEMORY:
355
+ self._save_history()
356
+
357
+ logger.info(f"Restored to operation {operation_id}")
358
+ return snapshot
359
+
360
+ logger.error(f"No snapshot available for operation {operation_id}")
361
+ return None
362
+
363
+ def clear_history(self):
364
+ """Clear all history."""
365
+ self.history.clear()
366
+ self.redo_stack.clear()
367
+ self.current_index = -1
368
+
369
+ # Clean up files
370
+ if self.storage_type != HistoryStorage.MEMORY:
371
+ # Remove history file
372
+ for ext in ["json", "pkl"]:
373
+ history_file = self._get_history_file_path(ext)
374
+ if os.path.exists(history_file):
375
+ os.remove(history_file)
376
+
377
+ # Remove snapshot files
378
+ snapshot_dir = os.path.join(self.history_dir, "snapshots", self.session_id)
379
+ if os.path.exists(snapshot_dir):
380
+ import shutil
381
+
382
+ shutil.rmtree(snapshot_dir)
383
+
384
+ logger.info(f"Cleared history for session {self.session_id}")
385
+
386
+ def export_history(self, file_path: str, format: str = "json") -> bool:
387
+ """Export history to a file."""
388
+ try:
389
+ if format == "json":
390
+ data = {
391
+ "session_id": self.session_id,
392
+ "exported_at": datetime.utcnow().isoformat(),
393
+ "total_operations": len(self.history),
394
+ "current_position": self.current_index,
395
+ "operations": self.get_history(),
396
+ }
397
+
398
+ with open(file_path, "w") as f:
399
+ json.dump(data, f, indent=2)
400
+
401
+ elif format == "csv":
402
+ # Export as CSV with operation details
403
+ history_data = []
404
+ for entry in self.history:
405
+ history_data.append(
406
+ {
407
+ "timestamp": entry.timestamp.isoformat(),
408
+ "operation_type": entry.operation_type,
409
+ "details": json.dumps(entry.details),
410
+ "has_snapshot": entry.data_snapshot is not None,
411
+ }
412
+ )
413
+
414
+ df = pd.DataFrame(history_data)
415
+ df.to_csv(file_path, index=False)
416
+
417
+ logger.info(f"Exported history to {file_path}")
418
+ return True
419
+
420
+ except Exception as e:
421
+ logger.error(f"Error exporting history: {e!s}")
422
+ return False
423
+
424
+ def get_statistics(self) -> dict[str, Any]:
425
+ """Get history statistics."""
426
+ if not self.history:
427
+ return {
428
+ "total_operations": 0,
429
+ "operation_types": {},
430
+ "first_operation": None,
431
+ "last_operation": None,
432
+ "snapshots_count": 0,
433
+ }
434
+
435
+ # Count operation types
436
+ operation_types = {}
437
+ snapshots_count = 0
438
+
439
+ for entry in self.history:
440
+ operation_types[entry.operation_type] = operation_types.get(entry.operation_type, 0) + 1
441
+ if entry.data_snapshot is not None:
442
+ snapshots_count += 1
443
+
444
+ return {
445
+ "total_operations": len(self.history),
446
+ "current_position": self.current_index + 1,
447
+ "can_undo": self.can_undo(),
448
+ "can_redo": self.can_redo(),
449
+ "redo_stack_size": len(self.redo_stack),
450
+ "operation_types": operation_types,
451
+ "first_operation": self.history[0].timestamp.isoformat() if self.history else None,
452
+ "last_operation": self.history[-1].timestamp.isoformat() if self.history else None,
453
+ "snapshots_count": snapshots_count,
454
+ "storage_type": self.storage_type.value,
455
+ "max_history": self.max_history,
456
+ }
File without changes