@mseep/csv-editor 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/ISSUE_TEMPLATE/bug_report.md +53 -0
- package/.github/ISSUE_TEMPLATE/feature_request.md +38 -0
- package/.github/workflows/deploy-docs.yml +62 -0
- package/.github/workflows/publish-github.yml +52 -0
- package/.github/workflows/publish.yml +44 -0
- package/.github/workflows/test.yml +32 -0
- package/.pre-commit-config.yaml +157 -0
- package/ALTERNATIVE_PUBLISHING.md +175 -0
- package/ARCHITECTURE.md +1011 -0
- package/CHANGELOG.md +99 -0
- package/CODE_OF_CONDUCT.md +41 -0
- package/CONTRIBUTING.md +427 -0
- package/Dockerfile +22 -0
- package/LICENSE +21 -0
- package/MCP_CONFIG.md +505 -0
- package/PUBLISHING.md +210 -0
- package/README.md +400 -0
- package/SECURITY.md +61 -0
- package/docs/README.md +41 -0
- package/docs/blog/2019-05-28-first-blog-post.md +12 -0
- package/docs/blog/2019-05-29-long-blog-post.md +44 -0
- package/docs/blog/2021-08-01-mdx-blog-post.mdx +24 -0
- package/docs/blog/2021-08-26-welcome/docusaurus-plushie-banner.jpeg +0 -0
- package/docs/blog/2021-08-26-welcome/index.md +29 -0
- package/docs/blog/authors.yml +25 -0
- package/docs/blog/tags.yml +19 -0
- package/docs/docs/api/overview.md +183 -0
- package/docs/docs/installation.md +252 -0
- package/docs/docs/intro.md +87 -0
- package/docs/docs/tutorial-basics/_category_.json +8 -0
- package/docs/docs/tutorial-basics/congratulations.md +23 -0
- package/docs/docs/tutorial-basics/create-a-blog-post.md +34 -0
- package/docs/docs/tutorial-basics/create-a-document.md +57 -0
- package/docs/docs/tutorial-basics/create-a-page.md +43 -0
- package/docs/docs/tutorial-basics/deploy-your-site.md +31 -0
- package/docs/docs/tutorial-basics/markdown-features.mdx +152 -0
- package/docs/docs/tutorial-extras/_category_.json +7 -0
- package/docs/docs/tutorial-extras/img/docsVersionDropdown.png +0 -0
- package/docs/docs/tutorial-extras/img/localeDropdown.png +0 -0
- package/docs/docs/tutorial-extras/manage-docs-versions.md +55 -0
- package/docs/docs/tutorial-extras/translate-your-site.md +88 -0
- package/docs/docs/tutorials/quickstart.md +365 -0
- package/docs/docusaurus.config.ts +163 -0
- package/docs/package-lock.json +17493 -0
- package/docs/package.json +48 -0
- package/docs/sidebars.ts +33 -0
- package/docs/src/components/HomepageFeatures/index.tsx +71 -0
- package/docs/src/components/HomepageFeatures/styles.module.css +11 -0
- package/docs/src/css/custom.css +30 -0
- package/docs/src/pages/index.module.css +23 -0
- package/docs/src/pages/index.tsx +44 -0
- package/docs/src/pages/markdown-page.md +7 -0
- package/docs/static/.nojekyll +0 -0
- package/docs/static/img/docusaurus-social-card.jpg +0 -0
- package/docs/static/img/docusaurus.png +0 -0
- package/docs/static/img/favicon.ico +0 -0
- package/docs/static/img/logo.svg +1 -0
- package/docs/static/img/undraw_docusaurus_mountain.svg +171 -0
- package/docs/static/img/undraw_docusaurus_react.svg +170 -0
- package/docs/static/img/undraw_docusaurus_tree.svg +40 -0
- package/docs/tsconfig.json +8 -0
- package/examples/README.md +48 -0
- package/examples/auto_save_demo.py +206 -0
- package/examples/auto_save_overwrite.py +201 -0
- package/examples/basic_usage.py +135 -0
- package/examples/demo.py +139 -0
- package/examples/history_demo.py +317 -0
- package/examples/test_default_autosave.py +124 -0
- package/examples/update_consignee_example.py +179 -0
- package/package.json +51 -0
- package/plans/2026-04-19-fastmcp3-migration-plan.md +1045 -0
- package/pyproject.toml +331 -0
- package/requirements-dev.txt +30 -0
- package/requirements.txt +22 -0
- package/scripts/publish.py +67 -0
- package/smithery.yaml +15 -0
- package/specs/2026-04-19-fastmcp3-migration-design.md +243 -0
- package/src/csv_editor/__init__.py +8 -0
- package/src/csv_editor/models/__init__.py +39 -0
- package/src/csv_editor/models/auto_save.py +246 -0
- package/src/csv_editor/models/csv_session.py +468 -0
- package/src/csv_editor/models/data_models.py +244 -0
- package/src/csv_editor/models/history_manager.py +456 -0
- package/src/csv_editor/prompts/__init__.py +0 -0
- package/src/csv_editor/prompts/data_prompts.py +13 -0
- package/src/csv_editor/resources/__init__.py +0 -0
- package/src/csv_editor/resources/csv_resources.py +22 -0
- package/src/csv_editor/server.py +640 -0
- package/src/csv_editor/tools/__init__.py +5 -0
- package/src/csv_editor/tools/analytics.py +700 -0
- package/src/csv_editor/tools/auto_save_operations.py +235 -0
- package/src/csv_editor/tools/data_operations.py +3 -0
- package/src/csv_editor/tools/history_operations.py +315 -0
- package/src/csv_editor/tools/io_operations.py +431 -0
- package/src/csv_editor/tools/transformations.py +663 -0
- package/src/csv_editor/tools/validation.py +822 -0
- package/src/csv_editor/utils/__init__.py +0 -0
- package/src/csv_editor/utils/validators.py +205 -0
- package/tests/README.md +65 -0
- package/tests/__init__.py +7 -0
- package/tests/conftest.py +50 -0
- package/tests/test_auto_save.py +378 -0
- package/tests/test_basic.py +103 -0
- package/tests/test_integration.py +356 -0
- package/tests/test_server_boot.py +50 -0
- package/tests/test_settings.py +184 -0
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
"""Data models for CSV Editor MCP Server."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Any, Literal
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from pydantic import BaseModel, Field, field_validator
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DataType(str, Enum):
|
|
12
|
+
"""Supported data types for columns."""
|
|
13
|
+
|
|
14
|
+
INTEGER = "integer"
|
|
15
|
+
FLOAT = "float"
|
|
16
|
+
STRING = "string"
|
|
17
|
+
DATETIME = "datetime"
|
|
18
|
+
BOOLEAN = "boolean"
|
|
19
|
+
MIXED = "mixed"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class OperationType(str, Enum):
|
|
23
|
+
"""Types of operations that can be performed."""
|
|
24
|
+
|
|
25
|
+
LOAD = "load"
|
|
26
|
+
FILTER = "filter"
|
|
27
|
+
SORT = "sort"
|
|
28
|
+
TRANSFORM = "transform"
|
|
29
|
+
AGGREGATE = "aggregate"
|
|
30
|
+
EXPORT = "export"
|
|
31
|
+
ANALYZE = "analyze"
|
|
32
|
+
UPDATE_COLUMN = "update_column"
|
|
33
|
+
ADD_COLUMN = "add_column"
|
|
34
|
+
REMOVE_COLUMN = "remove_column"
|
|
35
|
+
RENAME = "rename"
|
|
36
|
+
SELECT = "select"
|
|
37
|
+
CHANGE_TYPE = "change_type"
|
|
38
|
+
FILL_MISSING = "fill_missing"
|
|
39
|
+
REMOVE_DUPLICATES = "remove_duplicates"
|
|
40
|
+
GROUP_BY = "group_by"
|
|
41
|
+
VALIDATE = "validate"
|
|
42
|
+
PROFILE = "profile"
|
|
43
|
+
QUALITY_CHECK = "quality_check"
|
|
44
|
+
ANOMALY_DETECTION = "anomaly_detection"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class ComparisonOperator(str, Enum):
|
|
48
|
+
"""Comparison operators for filtering."""
|
|
49
|
+
|
|
50
|
+
EQUALS = "="
|
|
51
|
+
NOT_EQUALS = "!="
|
|
52
|
+
GREATER_THAN = ">"
|
|
53
|
+
LESS_THAN = "<"
|
|
54
|
+
GREATER_THAN_OR_EQUALS = ">="
|
|
55
|
+
LESS_THAN_OR_EQUALS = "<="
|
|
56
|
+
CONTAINS = "contains"
|
|
57
|
+
NOT_CONTAINS = "not_contains"
|
|
58
|
+
STARTS_WITH = "starts_with"
|
|
59
|
+
ENDS_WITH = "ends_with"
|
|
60
|
+
IN = "in"
|
|
61
|
+
NOT_IN = "not_in"
|
|
62
|
+
IS_NULL = "is_null"
|
|
63
|
+
IS_NOT_NULL = "is_not_null"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class LogicalOperator(str, Enum):
|
|
67
|
+
"""Logical operators for combining conditions."""
|
|
68
|
+
|
|
69
|
+
AND = "AND"
|
|
70
|
+
OR = "OR"
|
|
71
|
+
NOT = "NOT"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class AggregateFunction(str, Enum):
|
|
75
|
+
"""Aggregate functions for data analysis."""
|
|
76
|
+
|
|
77
|
+
SUM = "sum"
|
|
78
|
+
MEAN = "mean"
|
|
79
|
+
MEDIAN = "median"
|
|
80
|
+
MIN = "min"
|
|
81
|
+
MAX = "max"
|
|
82
|
+
COUNT = "count"
|
|
83
|
+
COUNT_DISTINCT = "count_distinct"
|
|
84
|
+
STD = "std"
|
|
85
|
+
VAR = "var"
|
|
86
|
+
FIRST = "first"
|
|
87
|
+
LAST = "last"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class ExportFormat(str, Enum):
|
|
91
|
+
"""Supported export formats."""
|
|
92
|
+
|
|
93
|
+
CSV = "csv"
|
|
94
|
+
TSV = "tsv"
|
|
95
|
+
JSON = "json"
|
|
96
|
+
EXCEL = "excel"
|
|
97
|
+
PARQUET = "parquet"
|
|
98
|
+
HTML = "html"
|
|
99
|
+
MARKDOWN = "markdown"
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class FilterCondition(BaseModel):
|
|
103
|
+
"""A single filter condition."""
|
|
104
|
+
|
|
105
|
+
column: str = Field(..., description="Column name to filter on")
|
|
106
|
+
operator: ComparisonOperator = Field(..., description="Comparison operator")
|
|
107
|
+
value: Any = Field(None, description="Value to compare against")
|
|
108
|
+
|
|
109
|
+
@field_validator("value", mode="before")
|
|
110
|
+
def validate_value(cls, v, info):
|
|
111
|
+
"""Validate value based on operator."""
|
|
112
|
+
operator = info.data.get("operator") if hasattr(info, "data") else None
|
|
113
|
+
if operator in [ComparisonOperator.IS_NULL, ComparisonOperator.IS_NOT_NULL]:
|
|
114
|
+
return None
|
|
115
|
+
if operator in [ComparisonOperator.IN, ComparisonOperator.NOT_IN]:
|
|
116
|
+
if not isinstance(v, list):
|
|
117
|
+
return [v]
|
|
118
|
+
return v
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class SortSpec(BaseModel):
|
|
122
|
+
"""Specification for sorting data."""
|
|
123
|
+
|
|
124
|
+
column: str = Field(..., description="Column to sort by")
|
|
125
|
+
ascending: bool = Field(True, description="Sort in ascending order")
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class ColumnSchema(BaseModel):
|
|
129
|
+
"""Schema definition for a column."""
|
|
130
|
+
|
|
131
|
+
name: str = Field(..., description="Column name")
|
|
132
|
+
dtype: DataType = Field(..., description="Data type")
|
|
133
|
+
nullable: bool = Field(True, description="Whether column can contain null values")
|
|
134
|
+
unique: bool = Field(False, description="Whether values must be unique")
|
|
135
|
+
min_value: float | int | str | None = Field(None, description="Minimum value")
|
|
136
|
+
max_value: float | int | str | None = Field(None, description="Maximum value")
|
|
137
|
+
allowed_values: list[Any] | None = Field(None, description="List of allowed values")
|
|
138
|
+
pattern: str | None = Field(None, description="Regex pattern for validation")
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class DataSchema(BaseModel):
|
|
142
|
+
"""Complete schema for a dataset."""
|
|
143
|
+
|
|
144
|
+
columns: list[ColumnSchema] = Field(..., description="Column definitions")
|
|
145
|
+
row_count: int | None = Field(None, description="Expected number of rows")
|
|
146
|
+
primary_key: list[str] | None = Field(None, description="Primary key columns")
|
|
147
|
+
|
|
148
|
+
def validate_dataframe(self, df: pd.DataFrame) -> dict[str, Any]:
|
|
149
|
+
"""Validate a DataFrame against this schema."""
|
|
150
|
+
errors = []
|
|
151
|
+
warnings = []
|
|
152
|
+
|
|
153
|
+
# Check columns
|
|
154
|
+
expected_cols = {col.name for col in self.columns}
|
|
155
|
+
actual_cols = set(df.columns)
|
|
156
|
+
|
|
157
|
+
missing_cols = expected_cols - actual_cols
|
|
158
|
+
extra_cols = actual_cols - expected_cols
|
|
159
|
+
|
|
160
|
+
if missing_cols:
|
|
161
|
+
errors.append(f"Missing columns: {missing_cols}")
|
|
162
|
+
if extra_cols:
|
|
163
|
+
warnings.append(f"Extra columns: {extra_cols}")
|
|
164
|
+
|
|
165
|
+
# Validate each column
|
|
166
|
+
for col_schema in self.columns:
|
|
167
|
+
if col_schema.name not in df.columns:
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
col_data = df[col_schema.name]
|
|
171
|
+
|
|
172
|
+
# Check nullability
|
|
173
|
+
if not col_schema.nullable and col_data.isnull().any():
|
|
174
|
+
errors.append(f"Column {col_schema.name} contains null values")
|
|
175
|
+
|
|
176
|
+
# Check uniqueness
|
|
177
|
+
if col_schema.unique and col_data.duplicated().any():
|
|
178
|
+
errors.append(f"Column {col_schema.name} contains duplicate values")
|
|
179
|
+
|
|
180
|
+
# Check allowed values
|
|
181
|
+
if col_schema.allowed_values:
|
|
182
|
+
invalid = ~col_data.isin(col_schema.allowed_values)
|
|
183
|
+
if invalid.any():
|
|
184
|
+
errors.append(f"Column {col_schema.name} contains invalid values")
|
|
185
|
+
|
|
186
|
+
return {"valid": len(errors) == 0, "errors": errors, "warnings": warnings}
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class DataQualityRule(BaseModel):
|
|
190
|
+
"""A data quality rule to check."""
|
|
191
|
+
|
|
192
|
+
name: str = Field(..., description="Rule name")
|
|
193
|
+
description: str = Field(..., description="Rule description")
|
|
194
|
+
column: str | None = Field(None, description="Column to check (if applicable)")
|
|
195
|
+
rule_type: Literal["completeness", "uniqueness", "validity", "consistency", "accuracy"] = Field(
|
|
196
|
+
..., description="Type of quality check"
|
|
197
|
+
)
|
|
198
|
+
expression: str | None = Field(None, description="Expression to evaluate")
|
|
199
|
+
threshold: float | None = Field(None, description="Threshold for pass/fail")
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
class OperationResult(BaseModel):
|
|
203
|
+
"""Result of a data operation."""
|
|
204
|
+
|
|
205
|
+
success: bool = Field(..., description="Whether operation succeeded")
|
|
206
|
+
message: str = Field(..., description="Result message")
|
|
207
|
+
session_id: str | None = Field(None, description="Session ID")
|
|
208
|
+
rows_affected: int | None = Field(None, description="Number of rows affected")
|
|
209
|
+
columns_affected: list[str] | None = Field(None, description="Columns affected")
|
|
210
|
+
data: dict[str, Any] | None = Field(None, description="Additional result data")
|
|
211
|
+
error: str | None = Field(None, description="Error message if failed")
|
|
212
|
+
warnings: list[str] | None = Field(None, description="Warning messages")
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class SessionInfo(BaseModel):
|
|
216
|
+
"""Information about a data session."""
|
|
217
|
+
|
|
218
|
+
session_id: str = Field(..., description="Unique session identifier")
|
|
219
|
+
created_at: datetime = Field(..., description="Session creation time")
|
|
220
|
+
last_accessed: datetime = Field(..., description="Last access time")
|
|
221
|
+
row_count: int = Field(..., description="Number of rows in dataset")
|
|
222
|
+
column_count: int = Field(..., description="Number of columns")
|
|
223
|
+
columns: list[str] = Field(..., description="Column names")
|
|
224
|
+
memory_usage_mb: float = Field(..., description="Memory usage in MB")
|
|
225
|
+
operations_count: int = Field(0, description="Number of operations performed")
|
|
226
|
+
file_path: str | None = Field(None, description="Source file path")
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class DataStatistics(BaseModel):
|
|
230
|
+
"""Statistical summary of data."""
|
|
231
|
+
|
|
232
|
+
column: str = Field(..., description="Column name")
|
|
233
|
+
dtype: str = Field(..., description="Data type")
|
|
234
|
+
count: int = Field(..., description="Non-null count")
|
|
235
|
+
null_count: int = Field(..., description="Null count")
|
|
236
|
+
unique_count: int = Field(..., description="Unique value count")
|
|
237
|
+
mean: float | None = Field(None, description="Mean (numeric only)")
|
|
238
|
+
std: float | None = Field(None, description="Standard deviation (numeric only)")
|
|
239
|
+
min: Any | None = Field(None, description="Minimum value")
|
|
240
|
+
max: Any | None = Field(None, description="Maximum value")
|
|
241
|
+
q25: float | None = Field(None, description="25th percentile (numeric only)")
|
|
242
|
+
q50: float | None = Field(None, description="50th percentile (numeric only)")
|
|
243
|
+
q75: float | None = Field(None, description="75th percentile (numeric only)")
|
|
244
|
+
top_values: dict[str, int] | None = Field(None, description="Top 10 most frequent values")
|
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
"""History management for CSV operations with persistence and undo/redo capabilities."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import pickle
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class HistoryStorage(str, Enum):
|
|
18
|
+
"""History storage strategies."""
|
|
19
|
+
|
|
20
|
+
MEMORY = "memory" # In-memory only (lost on session end)
|
|
21
|
+
JSON = "json" # Save as JSON file
|
|
22
|
+
PICKLE = "pickle" # Save as pickle (preserves DataFrames)
|
|
23
|
+
SQLITE = "sqlite" # Save in SQLite database (future)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class OperationHistory:
|
|
27
|
+
"""Represents a single operation in history."""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
operation_id: str,
|
|
32
|
+
operation_type: str,
|
|
33
|
+
timestamp: datetime,
|
|
34
|
+
details: dict[str, Any],
|
|
35
|
+
data_snapshot: pd.DataFrame | None = None,
|
|
36
|
+
metadata: dict[str, Any] | None = None,
|
|
37
|
+
):
|
|
38
|
+
"""Initialize operation history entry."""
|
|
39
|
+
self.operation_id = operation_id
|
|
40
|
+
self.operation_type = operation_type
|
|
41
|
+
self.timestamp = timestamp
|
|
42
|
+
self.details = details
|
|
43
|
+
self.data_snapshot = data_snapshot
|
|
44
|
+
self.metadata = metadata or {}
|
|
45
|
+
|
|
46
|
+
def to_dict(self) -> dict[str, Any]:
|
|
47
|
+
"""Convert to dictionary for serialization."""
|
|
48
|
+
return {
|
|
49
|
+
"operation_id": self.operation_id,
|
|
50
|
+
"operation_type": self.operation_type,
|
|
51
|
+
"timestamp": self.timestamp.isoformat(),
|
|
52
|
+
"details": self.details,
|
|
53
|
+
"metadata": self.metadata,
|
|
54
|
+
"has_snapshot": self.data_snapshot is not None,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def from_dict(
|
|
59
|
+
cls, data: dict[str, Any], data_snapshot: pd.DataFrame | None = None
|
|
60
|
+
) -> "OperationHistory":
|
|
61
|
+
"""Create from dictionary."""
|
|
62
|
+
return cls(
|
|
63
|
+
operation_id=data["operation_id"],
|
|
64
|
+
operation_type=data["operation_type"],
|
|
65
|
+
timestamp=datetime.fromisoformat(data["timestamp"]),
|
|
66
|
+
details=data["details"],
|
|
67
|
+
data_snapshot=data_snapshot,
|
|
68
|
+
metadata=data.get("metadata", {}),
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class HistoryManager:
|
|
73
|
+
"""Manages operation history with persistence and undo/redo capabilities."""
|
|
74
|
+
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
session_id: str,
|
|
78
|
+
storage_type: HistoryStorage = HistoryStorage.MEMORY,
|
|
79
|
+
history_dir: str | None = None,
|
|
80
|
+
max_history: int = 100,
|
|
81
|
+
enable_snapshots: bool = True,
|
|
82
|
+
snapshot_interval: int = 5, # Take snapshot every N operations
|
|
83
|
+
):
|
|
84
|
+
"""Initialize history manager."""
|
|
85
|
+
self.session_id = session_id
|
|
86
|
+
self.storage_type = storage_type
|
|
87
|
+
self.history_dir = history_dir or os.path.join(os.getcwd(), ".csv_history")
|
|
88
|
+
self.max_history = max_history
|
|
89
|
+
self.enable_snapshots = enable_snapshots
|
|
90
|
+
self.snapshot_interval = snapshot_interval
|
|
91
|
+
|
|
92
|
+
# History tracking
|
|
93
|
+
self.history: list[OperationHistory] = []
|
|
94
|
+
self.current_index = -1 # Points to current position in history
|
|
95
|
+
self.redo_stack: list[OperationHistory] = [] # For redo functionality
|
|
96
|
+
|
|
97
|
+
# Create history directory if needed
|
|
98
|
+
if storage_type != HistoryStorage.MEMORY:
|
|
99
|
+
Path(self.history_dir).mkdir(parents=True, exist_ok=True)
|
|
100
|
+
self._load_history()
|
|
101
|
+
|
|
102
|
+
def _get_history_file_path(self, extension: str = "json") -> str:
|
|
103
|
+
"""Get the path for history file."""
|
|
104
|
+
return os.path.join(self.history_dir, f"history_{self.session_id}.{extension}")
|
|
105
|
+
|
|
106
|
+
def _get_snapshot_file_path(self, operation_id: str) -> str:
|
|
107
|
+
"""Get the path for snapshot file."""
|
|
108
|
+
snapshot_dir = os.path.join(self.history_dir, "snapshots", self.session_id)
|
|
109
|
+
Path(snapshot_dir).mkdir(parents=True, exist_ok=True)
|
|
110
|
+
return os.path.join(snapshot_dir, f"snapshot_{operation_id}.pkl")
|
|
111
|
+
|
|
112
|
+
def _load_history(self):
|
|
113
|
+
"""Load history from persistent storage."""
|
|
114
|
+
try:
|
|
115
|
+
if self.storage_type == HistoryStorage.JSON:
|
|
116
|
+
history_file = self._get_history_file_path("json")
|
|
117
|
+
if os.path.exists(history_file):
|
|
118
|
+
with open(history_file) as f:
|
|
119
|
+
data = json.load(f)
|
|
120
|
+
for entry in data.get("history", []):
|
|
121
|
+
# Load snapshot if available
|
|
122
|
+
snapshot = None
|
|
123
|
+
if entry.get("has_snapshot"):
|
|
124
|
+
snapshot_file = self._get_snapshot_file_path(entry["operation_id"])
|
|
125
|
+
if os.path.exists(snapshot_file):
|
|
126
|
+
with open(snapshot_file, "rb") as sf:
|
|
127
|
+
snapshot = pickle.load(sf)
|
|
128
|
+
|
|
129
|
+
self.history.append(OperationHistory.from_dict(entry, snapshot))
|
|
130
|
+
|
|
131
|
+
self.current_index = data.get("current_index", -1)
|
|
132
|
+
logger.info(
|
|
133
|
+
f"Loaded {len(self.history)} history entries for session {self.session_id}"
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
elif self.storage_type == HistoryStorage.PICKLE:
|
|
137
|
+
history_file = self._get_history_file_path("pkl")
|
|
138
|
+
if os.path.exists(history_file):
|
|
139
|
+
with open(history_file, "rb") as f:
|
|
140
|
+
data = pickle.load(f)
|
|
141
|
+
self.history = data.get("history", [])
|
|
142
|
+
self.current_index = data.get("current_index", -1)
|
|
143
|
+
logger.info(
|
|
144
|
+
f"Loaded {len(self.history)} history entries for session {self.session_id}"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
except Exception as e:
|
|
148
|
+
logger.error(f"Error loading history: {e!s}")
|
|
149
|
+
|
|
150
|
+
def _save_history(self):
|
|
151
|
+
"""Save history to persistent storage."""
|
|
152
|
+
try:
|
|
153
|
+
if self.storage_type == HistoryStorage.JSON:
|
|
154
|
+
history_file = self._get_history_file_path("json")
|
|
155
|
+
data = {
|
|
156
|
+
"session_id": self.session_id,
|
|
157
|
+
"history": [h.to_dict() for h in self.history],
|
|
158
|
+
"current_index": self.current_index,
|
|
159
|
+
"timestamp": datetime.utcnow().isoformat(),
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
with open(history_file, "w") as f:
|
|
163
|
+
json.dump(data, f, indent=2)
|
|
164
|
+
|
|
165
|
+
# Save snapshots separately
|
|
166
|
+
for entry in self.history:
|
|
167
|
+
if entry.data_snapshot is not None:
|
|
168
|
+
snapshot_file = self._get_snapshot_file_path(entry.operation_id)
|
|
169
|
+
with open(snapshot_file, "wb") as sf:
|
|
170
|
+
pickle.dump(entry.data_snapshot, sf)
|
|
171
|
+
|
|
172
|
+
elif self.storage_type == HistoryStorage.PICKLE:
|
|
173
|
+
history_file = self._get_history_file_path("pkl")
|
|
174
|
+
data = {
|
|
175
|
+
"session_id": self.session_id,
|
|
176
|
+
"history": self.history,
|
|
177
|
+
"current_index": self.current_index,
|
|
178
|
+
"timestamp": datetime.utcnow(),
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
with open(history_file, "wb") as f:
|
|
182
|
+
pickle.dump(data, f)
|
|
183
|
+
|
|
184
|
+
logger.debug(f"Saved {len(self.history)} history entries for session {self.session_id}")
|
|
185
|
+
|
|
186
|
+
except Exception as e:
|
|
187
|
+
logger.error(f"Error saving history: {e!s}")
|
|
188
|
+
|
|
189
|
+
def add_operation(
|
|
190
|
+
self,
|
|
191
|
+
operation_type: str,
|
|
192
|
+
details: dict[str, Any],
|
|
193
|
+
current_data: pd.DataFrame | None = None,
|
|
194
|
+
metadata: dict[str, Any] | None = None,
|
|
195
|
+
) -> str:
|
|
196
|
+
"""Add a new operation to history."""
|
|
197
|
+
# Clear redo stack when new operation is added
|
|
198
|
+
self.redo_stack.clear()
|
|
199
|
+
|
|
200
|
+
# Remove operations after current index (for undo/redo consistency)
|
|
201
|
+
if self.current_index < len(self.history) - 1:
|
|
202
|
+
self.history = self.history[: self.current_index + 1]
|
|
203
|
+
|
|
204
|
+
# Generate operation ID
|
|
205
|
+
operation_id = f"{self.session_id}_{datetime.utcnow().strftime('%Y%m%d_%H%M%S_%f')}"
|
|
206
|
+
|
|
207
|
+
# Determine if we should take a snapshot
|
|
208
|
+
take_snapshot = (
|
|
209
|
+
self.enable_snapshots
|
|
210
|
+
and current_data is not None
|
|
211
|
+
and (len(self.history) % self.snapshot_interval == 0 or len(self.history) == 0)
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
# Create operation entry
|
|
215
|
+
operation = OperationHistory(
|
|
216
|
+
operation_id=operation_id,
|
|
217
|
+
operation_type=operation_type,
|
|
218
|
+
timestamp=datetime.utcnow(),
|
|
219
|
+
details=details,
|
|
220
|
+
data_snapshot=current_data.copy() if take_snapshot else None,
|
|
221
|
+
metadata=metadata,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
# Add to history
|
|
225
|
+
self.history.append(operation)
|
|
226
|
+
self.current_index += 1
|
|
227
|
+
|
|
228
|
+
# Trim history if exceeds max
|
|
229
|
+
if len(self.history) > self.max_history:
|
|
230
|
+
removed = self.history.pop(0)
|
|
231
|
+
self.current_index -= 1
|
|
232
|
+
|
|
233
|
+
# Clean up old snapshot file if exists
|
|
234
|
+
if removed.data_snapshot is not None and self.storage_type != HistoryStorage.MEMORY:
|
|
235
|
+
snapshot_file = self._get_snapshot_file_path(removed.operation_id)
|
|
236
|
+
if os.path.exists(snapshot_file):
|
|
237
|
+
os.remove(snapshot_file)
|
|
238
|
+
|
|
239
|
+
# Save to persistent storage
|
|
240
|
+
if self.storage_type != HistoryStorage.MEMORY:
|
|
241
|
+
self._save_history()
|
|
242
|
+
|
|
243
|
+
logger.info(f"Added operation {operation_id}: {operation_type}")
|
|
244
|
+
return operation_id
|
|
245
|
+
|
|
246
|
+
def can_undo(self) -> bool:
|
|
247
|
+
"""Check if undo is possible."""
|
|
248
|
+
return self.current_index >= 0
|
|
249
|
+
|
|
250
|
+
def can_redo(self) -> bool:
|
|
251
|
+
"""Check if redo is possible."""
|
|
252
|
+
return len(self.redo_stack) > 0
|
|
253
|
+
|
|
254
|
+
def undo(self) -> tuple[OperationHistory | None, pd.DataFrame | None]:
|
|
255
|
+
"""Undo the last operation and return the previous state."""
|
|
256
|
+
if not self.can_undo():
|
|
257
|
+
return None, None
|
|
258
|
+
|
|
259
|
+
# Move current operation to redo stack
|
|
260
|
+
current_op = self.history[self.current_index]
|
|
261
|
+
self.redo_stack.append(current_op)
|
|
262
|
+
|
|
263
|
+
# Move index back
|
|
264
|
+
self.current_index -= 1
|
|
265
|
+
|
|
266
|
+
# Find the most recent snapshot before current position
|
|
267
|
+
snapshot = None
|
|
268
|
+
for i in range(self.current_index, -1, -1):
|
|
269
|
+
if self.history[i].data_snapshot is not None:
|
|
270
|
+
snapshot = self.history[i].data_snapshot.copy()
|
|
271
|
+
break
|
|
272
|
+
|
|
273
|
+
# Save state
|
|
274
|
+
if self.storage_type != HistoryStorage.MEMORY:
|
|
275
|
+
self._save_history()
|
|
276
|
+
|
|
277
|
+
logger.info(f"Undid operation: {current_op.operation_type}")
|
|
278
|
+
|
|
279
|
+
# Return the operation that was undone and the data to restore
|
|
280
|
+
return current_op, snapshot
|
|
281
|
+
|
|
282
|
+
def redo(self) -> tuple[OperationHistory | None, pd.DataFrame | None]:
|
|
283
|
+
"""Redo the previously undone operation."""
|
|
284
|
+
if not self.can_redo():
|
|
285
|
+
return None, None
|
|
286
|
+
|
|
287
|
+
# Get operation from redo stack
|
|
288
|
+
operation = self.redo_stack.pop()
|
|
289
|
+
|
|
290
|
+
# Move index forward
|
|
291
|
+
self.current_index += 1
|
|
292
|
+
|
|
293
|
+
# Get the snapshot at this position if available
|
|
294
|
+
snapshot = None
|
|
295
|
+
if self.current_index < len(self.history):
|
|
296
|
+
snapshot = self.history[self.current_index].data_snapshot
|
|
297
|
+
if snapshot is not None:
|
|
298
|
+
snapshot = snapshot.copy()
|
|
299
|
+
|
|
300
|
+
# Save state
|
|
301
|
+
if self.storage_type != HistoryStorage.MEMORY:
|
|
302
|
+
self._save_history()
|
|
303
|
+
|
|
304
|
+
logger.info(f"Redid operation: {operation.operation_type}")
|
|
305
|
+
|
|
306
|
+
return operation, snapshot
|
|
307
|
+
|
|
308
|
+
def get_history(self, limit: int | None = None) -> list[dict[str, Any]]:
|
|
309
|
+
"""Get operation history."""
|
|
310
|
+
history_list = []
|
|
311
|
+
|
|
312
|
+
start = 0 if limit is None else max(0, len(self.history) - limit)
|
|
313
|
+
|
|
314
|
+
for i, entry in enumerate(self.history[start:], start=start):
|
|
315
|
+
history_dict = entry.to_dict()
|
|
316
|
+
history_dict["index"] = i
|
|
317
|
+
history_dict["is_current"] = i == self.current_index
|
|
318
|
+
history_dict["can_restore"] = entry.data_snapshot is not None
|
|
319
|
+
history_list.append(history_dict)
|
|
320
|
+
|
|
321
|
+
return history_list
|
|
322
|
+
|
|
323
|
+
def get_operation(self, operation_id: str) -> OperationHistory | None:
|
|
324
|
+
"""Get a specific operation by ID."""
|
|
325
|
+
for entry in self.history:
|
|
326
|
+
if entry.operation_id == operation_id:
|
|
327
|
+
return entry
|
|
328
|
+
return None
|
|
329
|
+
|
|
330
|
+
def restore_to_operation(self, operation_id: str) -> pd.DataFrame | None:
|
|
331
|
+
"""Restore data to a specific operation point."""
|
|
332
|
+
# Find the operation
|
|
333
|
+
target_index = None
|
|
334
|
+
for i, entry in enumerate(self.history):
|
|
335
|
+
if entry.operation_id == operation_id:
|
|
336
|
+
target_index = i
|
|
337
|
+
break
|
|
338
|
+
|
|
339
|
+
if target_index is None:
|
|
340
|
+
logger.error(f"Operation {operation_id} not found")
|
|
341
|
+
return None
|
|
342
|
+
|
|
343
|
+
# Find the nearest snapshot at or before target
|
|
344
|
+
snapshot = None
|
|
345
|
+
for i in range(target_index, -1, -1):
|
|
346
|
+
if self.history[i].data_snapshot is not None:
|
|
347
|
+
snapshot = self.history[i].data_snapshot.copy()
|
|
348
|
+
self.current_index = target_index
|
|
349
|
+
|
|
350
|
+
# Clear redo stack since we're jumping to a specific point
|
|
351
|
+
self.redo_stack.clear()
|
|
352
|
+
|
|
353
|
+
# Save state
|
|
354
|
+
if self.storage_type != HistoryStorage.MEMORY:
|
|
355
|
+
self._save_history()
|
|
356
|
+
|
|
357
|
+
logger.info(f"Restored to operation {operation_id}")
|
|
358
|
+
return snapshot
|
|
359
|
+
|
|
360
|
+
logger.error(f"No snapshot available for operation {operation_id}")
|
|
361
|
+
return None
|
|
362
|
+
|
|
363
|
+
def clear_history(self):
|
|
364
|
+
"""Clear all history."""
|
|
365
|
+
self.history.clear()
|
|
366
|
+
self.redo_stack.clear()
|
|
367
|
+
self.current_index = -1
|
|
368
|
+
|
|
369
|
+
# Clean up files
|
|
370
|
+
if self.storage_type != HistoryStorage.MEMORY:
|
|
371
|
+
# Remove history file
|
|
372
|
+
for ext in ["json", "pkl"]:
|
|
373
|
+
history_file = self._get_history_file_path(ext)
|
|
374
|
+
if os.path.exists(history_file):
|
|
375
|
+
os.remove(history_file)
|
|
376
|
+
|
|
377
|
+
# Remove snapshot files
|
|
378
|
+
snapshot_dir = os.path.join(self.history_dir, "snapshots", self.session_id)
|
|
379
|
+
if os.path.exists(snapshot_dir):
|
|
380
|
+
import shutil
|
|
381
|
+
|
|
382
|
+
shutil.rmtree(snapshot_dir)
|
|
383
|
+
|
|
384
|
+
logger.info(f"Cleared history for session {self.session_id}")
|
|
385
|
+
|
|
386
|
+
def export_history(self, file_path: str, format: str = "json") -> bool:
|
|
387
|
+
"""Export history to a file."""
|
|
388
|
+
try:
|
|
389
|
+
if format == "json":
|
|
390
|
+
data = {
|
|
391
|
+
"session_id": self.session_id,
|
|
392
|
+
"exported_at": datetime.utcnow().isoformat(),
|
|
393
|
+
"total_operations": len(self.history),
|
|
394
|
+
"current_position": self.current_index,
|
|
395
|
+
"operations": self.get_history(),
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
with open(file_path, "w") as f:
|
|
399
|
+
json.dump(data, f, indent=2)
|
|
400
|
+
|
|
401
|
+
elif format == "csv":
|
|
402
|
+
# Export as CSV with operation details
|
|
403
|
+
history_data = []
|
|
404
|
+
for entry in self.history:
|
|
405
|
+
history_data.append(
|
|
406
|
+
{
|
|
407
|
+
"timestamp": entry.timestamp.isoformat(),
|
|
408
|
+
"operation_type": entry.operation_type,
|
|
409
|
+
"details": json.dumps(entry.details),
|
|
410
|
+
"has_snapshot": entry.data_snapshot is not None,
|
|
411
|
+
}
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
df = pd.DataFrame(history_data)
|
|
415
|
+
df.to_csv(file_path, index=False)
|
|
416
|
+
|
|
417
|
+
logger.info(f"Exported history to {file_path}")
|
|
418
|
+
return True
|
|
419
|
+
|
|
420
|
+
except Exception as e:
|
|
421
|
+
logger.error(f"Error exporting history: {e!s}")
|
|
422
|
+
return False
|
|
423
|
+
|
|
424
|
+
def get_statistics(self) -> dict[str, Any]:
|
|
425
|
+
"""Get history statistics."""
|
|
426
|
+
if not self.history:
|
|
427
|
+
return {
|
|
428
|
+
"total_operations": 0,
|
|
429
|
+
"operation_types": {},
|
|
430
|
+
"first_operation": None,
|
|
431
|
+
"last_operation": None,
|
|
432
|
+
"snapshots_count": 0,
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
# Count operation types
|
|
436
|
+
operation_types = {}
|
|
437
|
+
snapshots_count = 0
|
|
438
|
+
|
|
439
|
+
for entry in self.history:
|
|
440
|
+
operation_types[entry.operation_type] = operation_types.get(entry.operation_type, 0) + 1
|
|
441
|
+
if entry.data_snapshot is not None:
|
|
442
|
+
snapshots_count += 1
|
|
443
|
+
|
|
444
|
+
return {
|
|
445
|
+
"total_operations": len(self.history),
|
|
446
|
+
"current_position": self.current_index + 1,
|
|
447
|
+
"can_undo": self.can_undo(),
|
|
448
|
+
"can_redo": self.can_redo(),
|
|
449
|
+
"redo_stack_size": len(self.redo_stack),
|
|
450
|
+
"operation_types": operation_types,
|
|
451
|
+
"first_operation": self.history[0].timestamp.isoformat() if self.history else None,
|
|
452
|
+
"last_operation": self.history[-1].timestamp.isoformat() if self.history else None,
|
|
453
|
+
"snapshots_count": snapshots_count,
|
|
454
|
+
"storage_type": self.storage_type.value,
|
|
455
|
+
"max_history": self.max_history,
|
|
456
|
+
}
|
|
File without changes
|