fustor-source-mysql 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fustor_source_mysql-0.1.9/PKG-INFO +14 -0
- fustor_source_mysql-0.1.9/README.md +47 -0
- fustor_source_mysql-0.1.9/pyproject.toml +33 -0
- fustor_source_mysql-0.1.9/setup.cfg +4 -0
- fustor_source_mysql-0.1.9/src/fustor_source_mysql/__init__.py +547 -0
- fustor_source_mysql-0.1.9/src/fustor_source_mysql/py.typed +0 -0
- fustor_source_mysql-0.1.9/src/fustor_source_mysql.egg-info/PKG-INFO +14 -0
- fustor_source_mysql-0.1.9/src/fustor_source_mysql.egg-info/SOURCES.txt +13 -0
- fustor_source_mysql-0.1.9/src/fustor_source_mysql.egg-info/dependency_links.txt +1 -0
- fustor_source_mysql-0.1.9/src/fustor_source_mysql.egg-info/entry_points.txt +2 -0
- fustor_source_mysql-0.1.9/src/fustor_source_mysql.egg-info/requires.txt +9 -0
- fustor_source_mysql-0.1.9/src/fustor_source_mysql.egg-info/top_level.txt +1 -0
- fustor_source_mysql-0.1.9/tests/conftest.py +52 -0
- fustor_source_mysql-0.1.9/tests/docker-mysql.yml +25 -0
- fustor_source_mysql-0.1.9/tests/mysql-init.sql +90 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fustor-source-mysql
|
|
3
|
+
Version: 0.1.9
|
|
4
|
+
Summary: A MySQL source for Fustor Agent
|
|
5
|
+
Author-email: Huajin Wang <wanghuajin999@163.com>
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: fustor-core
|
|
8
|
+
Requires-Dist: aiomysql>=0.2.0
|
|
9
|
+
Requires-Dist: mysql-replication>=1.0.9
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
12
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
13
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
14
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# fustor-source-mysql
|
|
2
|
+
|
|
3
|
+
This package provides a `SourceDriver` implementation for the Fustor Agent service, enabling it to extract data from MySQL databases. It supports both consistent snapshot (historical) and real-time change data capture (CDC) via MySQL's binary log.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
* **Consistent Snapshot Synchronization**: Performs a consistent snapshot of tables using `START TRANSACTION WITH CONSISTENT SNAPSHOT` to capture historical data.
|
|
8
|
+
* **Real-time Change Data Capture (CDC)**: Streams real-time data changes (INSERT, UPDATE, DELETE) from MySQL's binary log using `pymysqlreplication`.
|
|
9
|
+
* **Binlog Position Tracking**: Manages and checks binlog positions for resuming streams and determining data availability.
|
|
10
|
+
* **Connection Management**: Handles connection to MySQL using username/password credentials.
|
|
11
|
+
* **Runtime Parameter Validation**: Checks essential MySQL global variables like `log_bin` and `binlog_format` to ensure proper CDC setup.
|
|
12
|
+
* **Agent User Management**: Provides functionality to create a dedicated agent user with necessary replication and select privileges.
|
|
13
|
+
* **Privilege Checking**: Verifies that the agent user has the required permissions.
|
|
14
|
+
* **Field Discovery**: Dynamically discovers available fields (columns) from MySQL schemas.
|
|
15
|
+
* **Shared Instance Model**: Optimizes resource usage by sharing MySQL client instances for identical configurations.
|
|
16
|
+
* **Wizard Definition**: Provides a comprehensive configuration wizard for UI integration, guiding users through connection, runtime checks, and agent user setup.
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
This package is part of the Fustor monorepo and is typically installed in editable mode within the monorepo's development environment using `uv sync`. It is registered as a `fustor_agent.drivers.sources` entry point.
|
|
21
|
+
|
|
22
|
+
## Usage
|
|
23
|
+
|
|
24
|
+
To use the `fustor-source-mysql` driver, configure a Source in your Fustor Agent setup with the driver type `mysql`. You will need to provide the MySQL URI (host:port) and credentials for both an administrative user (for setup and checks) and a dedicated agent user (for data extraction).
|
|
25
|
+
|
|
26
|
+
Example (conceptual configuration in Fustor Agent):
|
|
27
|
+
|
|
28
|
+
```yaml
|
|
29
|
+
# ~/.fustor/config.yaml
|
|
30
|
+
sources:
|
|
31
|
+
my-mysql-source:
|
|
32
|
+
driver_type: mysql
|
|
33
|
+
uri: localhost:3306
|
|
34
|
+
admin_creds: # Used for initial setup and checks, not saved
|
|
35
|
+
user: admin_user
|
|
36
|
+
passwd: admin_password
|
|
37
|
+
credential: # Dedicated agent user for data extraction, saved
|
|
38
|
+
user: fustor_agent_user
|
|
39
|
+
passwd: agent_password
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Dependencies
|
|
43
|
+
|
|
44
|
+
* `aiomysql`: Asynchronous MySQL client for Python.
|
|
45
|
+
* `mysql-replication`: Library for reading MySQL binary logs.
|
|
46
|
+
* `fustor-core`: Provides the `SourceDriver` abstract base class and other core components.
|
|
47
|
+
* `fustor-event-model`: Provides `EventBase` for event data structures.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "fustor-source-mysql"
|
|
3
|
+
dynamic = ["version"]
|
|
4
|
+
description = "A MySQL source for Fustor Agent"
|
|
5
|
+
requires-python = ">=3.11"
|
|
6
|
+
dependencies = [ "fustor-core", "aiomysql>=0.2.0", "mysql-replication>=1.0.9",]
|
|
7
|
+
[[project.authors]]
|
|
8
|
+
name = "Huajin Wang"
|
|
9
|
+
email = "wanghuajin999@163.com"
|
|
10
|
+
|
|
11
|
+
[build-system]
|
|
12
|
+
requires = [ "setuptools>=61.0", "setuptools-scm>=8.0"]
|
|
13
|
+
build-backend = "setuptools.build_meta"
|
|
14
|
+
|
|
15
|
+
[tool.setuptools_scm]
|
|
16
|
+
root = "../.."
|
|
17
|
+
version_scheme = "post-release"
|
|
18
|
+
local_scheme = "dirty-tag"
|
|
19
|
+
|
|
20
|
+
["project.urls"]
|
|
21
|
+
Homepage = "https://github.com/excelwang/fustor/tree/master/packages/source_mysql"
|
|
22
|
+
"Bug Tracker" = "https://github.com/excelwang/fustor/issues"
|
|
23
|
+
|
|
24
|
+
license = "MIT"
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
dev = [ "pytest>=8.0.0", "ruff>=0.1.0", "mypy>=1.0.0", "pytest-asyncio>=0.23.0",]
|
|
28
|
+
|
|
29
|
+
[project.entry-points."fustor_agent.drivers.sources"]
|
|
30
|
+
mysql = "fustor_source_mysql:MysqlDriver"
|
|
31
|
+
|
|
32
|
+
[tool.setuptools.packages.find]
|
|
33
|
+
where = [ "src",]
|
|
@@ -0,0 +1,547 @@
|
|
|
1
|
+
"Fuagent source driver for MySQL."
|
|
2
|
+
import time
|
|
3
|
+
import pymysql
|
|
4
|
+
from pymysql.cursors import SSCursor
|
|
5
|
+
import uuid
|
|
6
|
+
from typing import Iterator, Optional, Dict, Any, Tuple, List, Set
|
|
7
|
+
from decimal import Decimal
|
|
8
|
+
from datetime import datetime, date, timedelta
|
|
9
|
+
from contextlib import contextmanager
|
|
10
|
+
from pymysqlreplication import BinLogStreamReader
|
|
11
|
+
from pymysqlreplication.row_event import DeleteRowsEvent, UpdateRowsEvent, WriteRowsEvent
|
|
12
|
+
import logging
|
|
13
|
+
import aiomysql
|
|
14
|
+
import threading
|
|
15
|
+
import json
|
|
16
|
+
import os
|
|
17
|
+
|
|
18
|
+
from fustor_core.drivers import SourceDriver
|
|
19
|
+
from fustor_core.models.config import SourceConfig, PasswdCredential
|
|
20
|
+
from fustor_core.exceptions import DriverError
|
|
21
|
+
from fustor_event_model.models import EventBase, InsertEvent, UpdateEvent, DeleteEvent
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger("fustor_agent.driver.mysql")
|
|
24
|
+
|
|
25
|
+
class MysqlDriver(SourceDriver):
|
|
26
|
+
_instances: Dict[str, 'MysqlDriver'] = {}
|
|
27
|
+
_lock = threading.Lock()
|
|
28
|
+
|
|
29
|
+
def __new__(cls, id: str, config: SourceConfig):
|
|
30
|
+
# Generate unique signature: URI + credential to ensure permission isolation
|
|
31
|
+
signature = f"{config.uri}#{hash(str(config.credential))}"
|
|
32
|
+
|
|
33
|
+
with MysqlDriver._lock:
|
|
34
|
+
if signature not in MysqlDriver._instances:
|
|
35
|
+
instance = super().__new__(cls)
|
|
36
|
+
MysqlDriver._instances[signature] = instance
|
|
37
|
+
return MysqlDriver._instances[signature]
|
|
38
|
+
|
|
39
|
+
def __init__(self, id: str, config: SourceConfig):
|
|
40
|
+
# Prevent re-initialization of shared instances
|
|
41
|
+
if hasattr(self, '_initialized'):
|
|
42
|
+
return
|
|
43
|
+
|
|
44
|
+
super().__init__(id, config)
|
|
45
|
+
self.uri = self.config.uri
|
|
46
|
+
self.credential: PasswdCredential = self.config.credential
|
|
47
|
+
self.column_maps: Dict[str, Dict[int, str]] = {}
|
|
48
|
+
self._load_schema_and_build_map()
|
|
49
|
+
|
|
50
|
+
self._initialized = True
|
|
51
|
+
|
|
52
|
+
def _load_schema_and_build_map(self):
|
|
53
|
+
schema_file_path = os.path.join('.conf', 'schemas', f'source_{self.id}.schema.json')
|
|
54
|
+
if not os.path.exists(schema_file_path):
|
|
55
|
+
logger.warning(f"Schema file not found for source '{self.id}' at '{schema_file_path}'. Binlog events will use placeholder column names.")
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
with open(schema_file_path, 'r', encoding='utf-8') as f:
|
|
60
|
+
schema_data = json.load(f)
|
|
61
|
+
|
|
62
|
+
for table_key, table_schema in schema_data.get("properties", {}).items():
|
|
63
|
+
column_map: Dict[int, str] = {}
|
|
64
|
+
for col_name, col_props in table_schema.get("properties", {}).items():
|
|
65
|
+
col_index = col_props.get("column_index")
|
|
66
|
+
if col_index is not None:
|
|
67
|
+
column_map[col_index] = col_name
|
|
68
|
+
self.column_maps[table_key] = column_map
|
|
69
|
+
logger.info(f"Successfully loaded schema and built column maps for source '{self.id}'.")
|
|
70
|
+
except (json.JSONDecodeError, IOError) as e:
|
|
71
|
+
logger.error(f"Failed to load or parse schema file '{schema_file_path}': {e}", exc_info=True)
|
|
72
|
+
|
|
73
|
+
def _get_row_with_column_names(self, table_key: str, values: List[Any]) -> Dict[str, Any]:
|
|
74
|
+
column_map = self.column_maps.get(table_key)
|
|
75
|
+
if not column_map:
|
|
76
|
+
return {f"UNKNOWN_COL{i}": val for i, val in enumerate(values)}
|
|
77
|
+
|
|
78
|
+
row_dict = {}
|
|
79
|
+
for i, val in enumerate(values):
|
|
80
|
+
col_name = column_map.get(i, f"UNKNOWN_COL{i}")
|
|
81
|
+
row_dict[col_name] = val
|
|
82
|
+
return row_dict
|
|
83
|
+
|
|
84
|
+
def get_snapshot_iterator(self, **kwargs) -> Iterator[EventBase]:
|
|
85
|
+
stream_id = f"snapshot-{uuid.uuid4().hex[:6]}"
|
|
86
|
+
logger.info(f"[{stream_id}] Starting Consistent Snapshot.")
|
|
87
|
+
|
|
88
|
+
snapshot_conn = None
|
|
89
|
+
try:
|
|
90
|
+
host, port_str = self.uri.split(':')
|
|
91
|
+
snapshot_conn = pymysql.connect(
|
|
92
|
+
host=host, port=int(port_str), user=self.credential.user, passwd=self.credential.passwd or ''
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
with snapshot_conn.cursor(SSCursor) as cursor:
|
|
96
|
+
cursor.execute("START TRANSACTION WITH CONSISTENT SNAPSHOT")
|
|
97
|
+
logger.info(f"[{stream_id}] Transaction started for consistent snapshot.")
|
|
98
|
+
|
|
99
|
+
cursor.execute("SHOW MASTER STATUS")
|
|
100
|
+
status = cursor.fetchone()
|
|
101
|
+
if not status:
|
|
102
|
+
raise DriverError("Could not get master status to determine snapshot position.")
|
|
103
|
+
|
|
104
|
+
binlog_start_pos_int = _generate_event_index(status[0], status[1])
|
|
105
|
+
logger.info(f"[{stream_id}] Consistent snapshot locked at position: {binlog_start_pos_int} ({status[0]}:{status[1]})")
|
|
106
|
+
|
|
107
|
+
required_fields = kwargs.get("required_fields_tracker").get_fields() if kwargs.get("required_fields_tracker") else set()
|
|
108
|
+
table_columns: Dict[Tuple[str, str], List[str]] = {}
|
|
109
|
+
for full_field_name in required_fields:
|
|
110
|
+
field_parts = full_field_name.split('.')
|
|
111
|
+
if len(field_parts) < 3: continue
|
|
112
|
+
schema, table_name, column_name = field_parts[0], field_parts[1], field_parts[2]
|
|
113
|
+
key = (schema, table_name)
|
|
114
|
+
if key not in table_columns:
|
|
115
|
+
table_columns[key] = []
|
|
116
|
+
table_columns[key].append(column_name)
|
|
117
|
+
|
|
118
|
+
for (schema, table_name), columns in table_columns.items():
|
|
119
|
+
if not columns: continue
|
|
120
|
+
columns_csv = ', '.join([f"`{col}`" for col in columns])
|
|
121
|
+
query = f"SELECT {columns_csv} FROM `{schema}`.`{table_name}`"
|
|
122
|
+
|
|
123
|
+
logger.debug(f"[{stream_id}] Executing snapshot query: {query}")
|
|
124
|
+
cursor.execute(query)
|
|
125
|
+
|
|
126
|
+
batch_size = kwargs.get("batch_size", 100)
|
|
127
|
+
while True:
|
|
128
|
+
batch = cursor.fetchmany(batch_size)
|
|
129
|
+
if not batch:
|
|
130
|
+
break
|
|
131
|
+
|
|
132
|
+
rows = [{columns[i]: _normalize_row(col) for i, col in enumerate(row)} for row in batch]
|
|
133
|
+
if rows:
|
|
134
|
+
event = InsertEvent(event_schema, table_name, rows, index=binlog_start_pos_int)
|
|
135
|
+
yield event
|
|
136
|
+
|
|
137
|
+
snapshot_conn.commit()
|
|
138
|
+
logger.info(f"[{stream_id}] Snapshot transaction committed.")
|
|
139
|
+
|
|
140
|
+
except Exception as e:
|
|
141
|
+
if snapshot_conn:
|
|
142
|
+
snapshot_conn.rollback()
|
|
143
|
+
logger.error(f"[{stream_id}] Snapshot phase failed, transaction rolled back: {e}", exc_info=True)
|
|
144
|
+
raise
|
|
145
|
+
finally:
|
|
146
|
+
if snapshot_conn:
|
|
147
|
+
snapshot_conn.close()
|
|
148
|
+
|
|
149
|
+
def is_position_available(self, position: int) -> bool:
|
|
150
|
+
"""
|
|
151
|
+
Checks if the MySQL binlog position is available for resuming.
|
|
152
|
+
"""
|
|
153
|
+
if position <= 0: #means from the latest snapshot
|
|
154
|
+
return False
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
logger.debug(f"Checking availability of binlog position {position}")
|
|
158
|
+
with _create_binlog_streamer(self.uri, self.credential, position, "pos-check", None, connect_timeout=5) as checker:
|
|
159
|
+
pass # If context manager succeeds, position is valid
|
|
160
|
+
logger.debug(f"Binlog position {position} is available.")
|
|
161
|
+
return True
|
|
162
|
+
except Exception as e:
|
|
163
|
+
# Broad exception to catch various pymysqlreplication errors for lost logs
|
|
164
|
+
logger.warning(f"Binlog position {position} is not available (Reason: {e}).")
|
|
165
|
+
return False
|
|
166
|
+
|
|
167
|
+
def get_message_iterator(self, start_position: int=-1, **kwargs) -> Iterator[EventBase]:
|
|
168
|
+
"""
|
|
169
|
+
Performs incremental data capture (CDC).
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
def _iterator_func() -> Iterator[EventBase]:
|
|
173
|
+
stream_id = f"message-stream-{uuid.uuid4().hex[:6]}"
|
|
174
|
+
|
|
175
|
+
stop_event = kwargs.get("stop_event")
|
|
176
|
+
required_fields_tracker = kwargs.get("required_fields_tracker")
|
|
177
|
+
max_retries = self.config.max_retries
|
|
178
|
+
retry_delay_sec = self.config.retry_delay_sec
|
|
179
|
+
|
|
180
|
+
event_id_from = start_position if start_position != -1 else 0
|
|
181
|
+
attempt = 0
|
|
182
|
+
while attempt < max_retries:
|
|
183
|
+
if stop_event and stop_event.is_set(): break
|
|
184
|
+
try:
|
|
185
|
+
with _create_binlog_streamer(self.uri, self.credential, event_id_from, stream_id, stop_event) as streamer:
|
|
186
|
+
for binlog_event in streamer:
|
|
187
|
+
if stop_event and stop_event.is_set(): break
|
|
188
|
+
|
|
189
|
+
if required_fields_tracker and required_fields_tracker.wait_for_change(timeout=0.1):
|
|
190
|
+
required_fields_tracker.clear_event()
|
|
191
|
+
|
|
192
|
+
if streamer.log_file is None or streamer.log_pos is None: continue
|
|
193
|
+
event_index = _generate_event_index(streamer.log_file, streamer.log_pos)
|
|
194
|
+
event = None
|
|
195
|
+
if hasattr(binlog_event, 'rows') and binlog_event.rows:
|
|
196
|
+
table_key = f"{binlog_event.event_schema}.{binlog_event.table}"
|
|
197
|
+
if isinstance(binlog_event, WriteRowsEvent):
|
|
198
|
+
rows = [_normalize_row(self._get_row_with_column_names(table_key, row['values'])) for row in binlog_event.rows]
|
|
199
|
+
event = InsertEvent(binlog_event.event_schema, binlog_event.table, rows, index=event_index)
|
|
200
|
+
elif isinstance(binlog_event, UpdateRowsEvent):
|
|
201
|
+
rows = [_normalize_row(self._get_row_with_column_names(table_key, row['after_values'])) for row in binlog_event.rows]
|
|
202
|
+
event = UpdateEvent(binlog_event.event_schema, binlog_event.table, rows, index=event_index)
|
|
203
|
+
elif isinstance(binlog_event, DeleteRowsEvent):
|
|
204
|
+
rows = [_normalize_row(self._get_row_with_column_names(table_key, row['values'])) for row in binlog_event.rows]
|
|
205
|
+
event = DeleteEvent(binlog_event.event_schema, binlog_event.table, rows, index=event_index)
|
|
206
|
+
|
|
207
|
+
if event:
|
|
208
|
+
filtered_event = _filter_event_rows(event, required_fields_tracker.get_fields() if required_fields_tracker else set())
|
|
209
|
+
if filtered_event:
|
|
210
|
+
yield filtered_event
|
|
211
|
+
|
|
212
|
+
event_id_from = event_index
|
|
213
|
+
|
|
214
|
+
if stop_event and stop_event.is_set(): break
|
|
215
|
+
break
|
|
216
|
+
except Exception as e:
|
|
217
|
+
attempt += 1
|
|
218
|
+
if attempt < max_retries:
|
|
219
|
+
logger.warning(f"[{stream_id}] Transient error in binlog stream (attempt {attempt}/{max_retries}): {e}")
|
|
220
|
+
time.sleep(retry_delay_sec)
|
|
221
|
+
else:
|
|
222
|
+
logger.error(f"[{stream_id}] Failed after {max_retries} retries in binlog stream: {e}", exc_info=True)
|
|
223
|
+
raise DriverError(f"Binlog streaming failed after {max_retries} retries: {e}")
|
|
224
|
+
|
|
225
|
+
logger.info(f"[{stream_id}] Message iterator finished.")
|
|
226
|
+
|
|
227
|
+
return _iterator_func()
|
|
228
|
+
|
|
229
|
+
@classmethod
|
|
230
|
+
async def test_connection(cls, **kwargs) -> Tuple[bool, str]:
|
|
231
|
+
uri = kwargs.get("uri")
|
|
232
|
+
admin_creds_dict = kwargs.get("admin_creds", {})
|
|
233
|
+
if not uri or not admin_creds_dict:
|
|
234
|
+
return (False, "缺少 'uri' 或 'admin_creds' 参数")
|
|
235
|
+
creds = PasswdCredential(**admin_creds_dict)
|
|
236
|
+
|
|
237
|
+
conn = None
|
|
238
|
+
try:
|
|
239
|
+
conn = await _get_connection(uri, creds)
|
|
240
|
+
logger.info(f"Successfully tested connection to {uri}")
|
|
241
|
+
return True, "数据库连接成功。"
|
|
242
|
+
except Exception as e:
|
|
243
|
+
logger.error(f"MySQL async test_connection failed: {e}", exc_info=True)
|
|
244
|
+
return False, f"数据库连接失败: {e}"
|
|
245
|
+
finally:
|
|
246
|
+
if conn is not None:
|
|
247
|
+
close = getattr(conn, "close", None)
|
|
248
|
+
if callable(close):
|
|
249
|
+
close()
|
|
250
|
+
|
|
251
|
+
@classmethod
|
|
252
|
+
async def check_runtime_params(cls, **kwargs) -> Tuple[bool, str]:
|
|
253
|
+
uri = kwargs.get("uri")
|
|
254
|
+
admin_creds_dict = kwargs.get("admin_creds", {})
|
|
255
|
+
if not uri or not admin_creds_dict:
|
|
256
|
+
return (False, "缺少 'uri' 或 'admin_creds' 参数")
|
|
257
|
+
admin_creds = PasswdCredential(**admin_creds_dict)
|
|
258
|
+
|
|
259
|
+
conn = None
|
|
260
|
+
try:
|
|
261
|
+
conn = await _get_connection(uri, admin_creds)
|
|
262
|
+
async with conn.cursor() as cursor:
|
|
263
|
+
await cursor.execute("SHOW GLOBAL VARIABLES LIKE 'log_bin'")
|
|
264
|
+
log_bin = await cursor.fetchone()
|
|
265
|
+
if not log_bin or log_bin[1] != 'ON':
|
|
266
|
+
return (False, "配置检查失败: 全局变量 'log_bin' 必须为 'ON'")
|
|
267
|
+
|
|
268
|
+
await cursor.execute("SHOW GLOBAL VARIABLES LIKE 'binlog_format'")
|
|
269
|
+
binlog_format = await cursor.fetchone()
|
|
270
|
+
if not binlog_format or binlog_format[1] != 'ROW':
|
|
271
|
+
return (False, "配置检查失败: 全局变量 'binlog_format' 必须为 'ROW'")
|
|
272
|
+
logger.info("Runtime parameters check passed")
|
|
273
|
+
return True, "运行时参数有效。"
|
|
274
|
+
except Exception as e:
|
|
275
|
+
logger.error(f"MySQL check_runtime_params failed: {e}", exc_info=True)
|
|
276
|
+
return False, f"检查运行时参数失败: {e}"
|
|
277
|
+
finally:
|
|
278
|
+
if conn is not None:
|
|
279
|
+
close = getattr(conn, "close", None)
|
|
280
|
+
if callable(close):
|
|
281
|
+
close()
|
|
282
|
+
|
|
283
|
+
@classmethod
|
|
284
|
+
async def create_agent_user(cls, **kwargs) -> Tuple[bool, str]:
|
|
285
|
+
uri = kwargs.get("uri")
|
|
286
|
+
admin_creds_dict = kwargs.get("admin_creds", {})
|
|
287
|
+
agent_user_dict = kwargs.get("credential", {})
|
|
288
|
+
if not uri or not admin_creds_dict or not agent_user_dict:
|
|
289
|
+
return (False, "缺少 'uri', 'admin_creds', 或 'credential' 参数")
|
|
290
|
+
admin_creds = PasswdCredential(**admin_creds_dict)
|
|
291
|
+
agent_user = PasswdCredential(**agent_user_dict)
|
|
292
|
+
|
|
293
|
+
conn = None
|
|
294
|
+
try:
|
|
295
|
+
conn = await _get_connection(uri, admin_creds)
|
|
296
|
+
async with conn.cursor() as cursor:
|
|
297
|
+
await cursor.execute(
|
|
298
|
+
"CREATE USER IF NOT EXISTS %s@%s IDENTIFIED BY %s",
|
|
299
|
+
(agent_user.user, '%', agent_user.passwd or '')
|
|
300
|
+
)
|
|
301
|
+
await cursor.execute(
|
|
302
|
+
"GRANT REPLICATION SLAVE, REPLICATION CLIENT, SELECT ON *.* TO %s@%s",
|
|
303
|
+
(agent_user.user, '%')
|
|
304
|
+
)
|
|
305
|
+
await cursor.execute("FLUSH PRIVILEGES")
|
|
306
|
+
logger.info(f"User '{agent_user.user}' is ready for replication.")
|
|
307
|
+
return True, f"用户 '{agent_user.user}' 已成功创建或验证。"
|
|
308
|
+
except Exception as e:
|
|
309
|
+
logger.error(f"Failed to create or grant privileges to user '{agent_user.user}': {e}", exc_info=True)
|
|
310
|
+
return False, f"创建或授权用户 '{agent_user.user}' 失败: {e}"
|
|
311
|
+
finally:
|
|
312
|
+
if conn is not None:
|
|
313
|
+
close = getattr(conn, "close", None)
|
|
314
|
+
if callable(close):
|
|
315
|
+
close()
|
|
316
|
+
|
|
317
|
+
@classmethod
|
|
318
|
+
async def check_privileges(cls, **kwargs) -> Tuple[bool, str]:
|
|
319
|
+
uri = kwargs.get("uri")
|
|
320
|
+
admin_creds_dict = kwargs.get("admin_creds", {})
|
|
321
|
+
agent_user_dict = kwargs.get("credential", {})
|
|
322
|
+
if not uri or not admin_creds_dict or not agent_user_dict:
|
|
323
|
+
return (False, "缺少 'uri', 'admin_creds', 或 'credential' 参数")
|
|
324
|
+
admin_creds = PasswdCredential(**admin_creds_dict)
|
|
325
|
+
agent_user = PasswdCredential(**agent_user_dict)
|
|
326
|
+
|
|
327
|
+
conn = None
|
|
328
|
+
try:
|
|
329
|
+
conn = await _get_connection(uri, admin_creds)
|
|
330
|
+
async with conn.cursor() as cursor:
|
|
331
|
+
await cursor.execute(
|
|
332
|
+
"SELECT Repl_slave_priv, Repl_client_priv, Select_priv FROM mysql.user WHERE User = %s AND Host = %s",
|
|
333
|
+
(agent_user.user, '%')
|
|
334
|
+
)
|
|
335
|
+
result = await cursor.fetchone()
|
|
336
|
+
if not result or result[0] != 'Y' or result[1] != 'Y' or result[2] != 'Y':
|
|
337
|
+
msg = f"用户 '{agent_user.user}' 缺少必要的权限 (REPLICATION SLAVE, REPLICATION CLIENT, SELECT)。"
|
|
338
|
+
logger.error(msg)
|
|
339
|
+
return False, msg
|
|
340
|
+
|
|
341
|
+
logger.info(f"User '{agent_user.user}' privileges verified")
|
|
342
|
+
return True, f"用户 '{agent_user.user}' 权限充足。"
|
|
343
|
+
except Exception as e:
|
|
344
|
+
logger.error(f"MySQL check_user_privileges failed for user '{agent_user.user}': {e}", exc_info=True)
|
|
345
|
+
return False, f"检查用户 '{agent_user.user}' 权限失败: {e}"
|
|
346
|
+
finally:
|
|
347
|
+
if conn is not None:
|
|
348
|
+
close = getattr(conn, "close", None)
|
|
349
|
+
if callable(close):
|
|
350
|
+
close()
|
|
351
|
+
|
|
352
|
+
@classmethod
|
|
353
|
+
async def get_available_fields(cls, **kwargs) -> Dict[str, Any]:
|
|
354
|
+
uri = kwargs.get("uri")
|
|
355
|
+
admin_creds_dict = kwargs.get("admin_creds")
|
|
356
|
+
if not uri or not admin_creds_dict:
|
|
357
|
+
raise DriverError("get_available_fields requires 'uri' and 'admin_creds'.")
|
|
358
|
+
|
|
359
|
+
creds = PasswdCredential(**admin_creds_dict)
|
|
360
|
+
|
|
361
|
+
conn = None
|
|
362
|
+
try:
|
|
363
|
+
conn = await _get_connection(uri, creds)
|
|
364
|
+
available_fields = {}
|
|
365
|
+
system_schemas = ('information_schema', 'mysql', 'performance_schema', 'sys')
|
|
366
|
+
|
|
367
|
+
async with conn.cursor(aiomysql.DictCursor) as cursor:
|
|
368
|
+
await cursor.execute("SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, ORDINAL_POSITION FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA NOT IN %s ORDER BY TABLE_SCHEMA, TABLE_NAME, ORDINAL_POSITION", (system_schemas,))
|
|
369
|
+
rows = await cursor.fetchall()
|
|
370
|
+
for row in rows:
|
|
371
|
+
composite_key = f"{row['TABLE_SCHEMA']}.{row['TABLE_NAME']}.{row['COLUMN_NAME']}"
|
|
372
|
+
available_fields[composite_key] = {"type": "string", "column_index": row['ORDINAL_POSITION'] - 1}
|
|
373
|
+
|
|
374
|
+
logger.info(f"Successfully retrieved {len(available_fields)} available fields from {uri}.")
|
|
375
|
+
return {"properties": available_fields}
|
|
376
|
+
except pymysql.err.OperationalError as e:
|
|
377
|
+
error_message = f"连接到 MySQL 失败: 访问被拒绝。请检查用户名、密码和主机权限。"
|
|
378
|
+
logger.debug(f"Original MySQL connection error in mysql driver: {e}", exc_info=True)
|
|
379
|
+
raise DriverError(error_message) from e
|
|
380
|
+
except Exception as e:
|
|
381
|
+
logger.error(f"Error getting available fields: {e}", exc_info=True)
|
|
382
|
+
raise
|
|
383
|
+
finally:
|
|
384
|
+
if conn is not None:
|
|
385
|
+
conn.close()
|
|
386
|
+
|
|
387
|
+
@classmethod
|
|
388
|
+
async def get_wizard_steps(cls) -> Dict[str, Any]:
|
|
389
|
+
return {
|
|
390
|
+
"steps": [
|
|
391
|
+
{
|
|
392
|
+
"step_id": "connection",
|
|
393
|
+
"title": "连接与发现",
|
|
394
|
+
"schema": {
|
|
395
|
+
"type": "object",
|
|
396
|
+
"properties": {
|
|
397
|
+
"uri": {
|
|
398
|
+
"type": "string",
|
|
399
|
+
"title": "URI",
|
|
400
|
+
"description": "MySQL服务器地址 (例如, localhost:3306)",
|
|
401
|
+
"pattern": "^[a-zA-Z0-9._-]+:\\\\d+$"
|
|
402
|
+
},
|
|
403
|
+
"admin_creds": {
|
|
404
|
+
"$ref": "#/components/schemas/PasswdCredential",
|
|
405
|
+
"title": "管理员凭证",
|
|
406
|
+
"description": "用于执行连接测试、环境检查和创建代理用户的一次性管理员凭证。此凭证不会被保存。"
|
|
407
|
+
}
|
|
408
|
+
},
|
|
409
|
+
"required": ["uri", "admin_creds"]
|
|
410
|
+
},
|
|
411
|
+
"validations": ["test_connection", "check_params", "discover_fields_no_cache"]
|
|
412
|
+
},
|
|
413
|
+
{
|
|
414
|
+
"step_id": "agent_setup",
|
|
415
|
+
"title": "代理用户与参数",
|
|
416
|
+
"schema": {
|
|
417
|
+
"type": "object",
|
|
418
|
+
"properties": {
|
|
419
|
+
"credential": {
|
|
420
|
+
"$ref": "#/components/schemas/PasswdCredential",
|
|
421
|
+
"title": "代理用户凭证",
|
|
422
|
+
"description": "为FuAgent创建一个专用的、权限受限的用户,用于日常的数据拉取。此凭证将被保存。"
|
|
423
|
+
}
|
|
424
|
+
},
|
|
425
|
+
"required": ["credential"]
|
|
426
|
+
},
|
|
427
|
+
"validations": ["create_agent_user", "check_privileges"]
|
|
428
|
+
}
|
|
429
|
+
],
|
|
430
|
+
"components": {
|
|
431
|
+
"schemas": {
|
|
432
|
+
"PasswdCredential": {
|
|
433
|
+
"type": "object",
|
|
434
|
+
"title": "用户名/密码凭证",
|
|
435
|
+
"properties": {
|
|
436
|
+
"user": { "type": "string", "title": "用户名" },
|
|
437
|
+
"passwd": { "type": "string", "title": "密码", "format": "password" }
|
|
438
|
+
},
|
|
439
|
+
"required": ["user"]
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
# --- Module-level helper functions and classes ---
|
|
446
|
+
|
|
447
|
+
@contextmanager
|
|
448
|
+
def _create_binlog_streamer(
|
|
449
|
+
uri: str, user_creds: PasswdCredential, event_id_from: int, stream_id: str, stop_event: Optional[threading.Event] = None, connect_timeout: int = 30
|
|
450
|
+
) -> Iterator[BinLogStreamReader]:
|
|
451
|
+
streamer = None
|
|
452
|
+
try:
|
|
453
|
+
host, port_str = uri.split(':')
|
|
454
|
+
mysql_settings = {
|
|
455
|
+
"host": host,
|
|
456
|
+
"port": int(port_str),
|
|
457
|
+
"user": user_creds.user,
|
|
458
|
+
"passwd": user_creds.passwd or ''
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
log_file, log_pos = _parse_event_index(event_id_from)
|
|
462
|
+
|
|
463
|
+
server_id = 10086 + int(uuid.uuid4().hex[:8], 16) % 1000
|
|
464
|
+
streamer = BinLogStreamReader(
|
|
465
|
+
connection_settings=mysql_settings,
|
|
466
|
+
server_id=server_id,
|
|
467
|
+
resume_stream=True,
|
|
468
|
+
log_file=log_file,
|
|
469
|
+
log_pos=log_pos,
|
|
470
|
+
blocking=True,
|
|
471
|
+
only_events=[DeleteRowsEvent, WriteRowsEvent, UpdateRowsEvent]
|
|
472
|
+
)
|
|
473
|
+
logger.info(f"Stream {stream_id}: Started MySQL binlog monitoring from {log_file}:{log_pos} with server_id {server_id}")
|
|
474
|
+
if stop_event and stop_event.is_set():
|
|
475
|
+
logger.info(f"Stream {stream_id}: Stop event already set, not starting binlog stream.")
|
|
476
|
+
return
|
|
477
|
+
yield streamer
|
|
478
|
+
except Exception as e:
|
|
479
|
+
logger.error(f"Stream {stream_id}: Failed to create BinLogStreamReader: {e}", exc_info=True)
|
|
480
|
+
raise
|
|
481
|
+
finally:
|
|
482
|
+
if streamer:
|
|
483
|
+
streamer.close()
|
|
484
|
+
logger.info(f"Stream {stream_id}: MySQL binlog stream closed.")
|
|
485
|
+
|
|
486
|
+
async def _get_connection(uri: str, creds: PasswdCredential) -> aiomysql.Connection:
|
|
487
|
+
host, port_str = uri.split(':')
|
|
488
|
+
return await aiomysql.connect(
|
|
489
|
+
host=host, port=int(port_str), user=creds.user,
|
|
490
|
+
password=creds.passwd or '', autocommit=True
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
def _generate_event_index(log_file: str, log_pos: int) -> int:
|
|
494
|
+
if not log_file:
|
|
495
|
+
return 0
|
|
496
|
+
try:
|
|
497
|
+
return (int(log_file.split('.')[-1]) << 32) | log_pos
|
|
498
|
+
except (ValueError, IndexError):
|
|
499
|
+
logger.warning(f"Invalid log_file format: {log_file}, returning default index 0")
|
|
500
|
+
return 0
|
|
501
|
+
|
|
502
|
+
def _parse_event_index(index: int) -> Tuple[Optional[str], int]:
|
|
503
|
+
if index == 0:
|
|
504
|
+
return None, 4
|
|
505
|
+
try:
|
|
506
|
+
return f"mysql-bin.{index >> 32:06d}", index & 0xFFFFFFFF
|
|
507
|
+
except Exception as e:
|
|
508
|
+
logger.error(f"Failed to parse event index {index}: {e}", exc_info=True)
|
|
509
|
+
return None, 4
|
|
510
|
+
|
|
511
|
+
def _normalize_row(data):
|
|
512
|
+
if isinstance(data, dict):
|
|
513
|
+
return {k: _normalize_row(v) for k, v in data.items()}
|
|
514
|
+
if isinstance(data, list):
|
|
515
|
+
return [_normalize_row(item) for item in data]
|
|
516
|
+
if isinstance(data, (datetime, date, timedelta)):
|
|
517
|
+
return str(data)
|
|
518
|
+
if isinstance(data, Decimal):
|
|
519
|
+
return float(data)
|
|
520
|
+
return data
|
|
521
|
+
|
|
522
|
+
def _filter_event_rows(event: EventBase, required_fields: Set[str]) -> Optional[EventBase]:
|
|
523
|
+
if not required_fields:
|
|
524
|
+
return event
|
|
525
|
+
|
|
526
|
+
event_prefix = f"{event.event_schema}.{event.table}."
|
|
527
|
+
if not any(f.startswith(event_prefix) for f in required_fields):
|
|
528
|
+
return None
|
|
529
|
+
|
|
530
|
+
filtered_rows = []
|
|
531
|
+
for row in event.rows:
|
|
532
|
+
filtered_row = {}
|
|
533
|
+
for field_name, field_value in row.items():
|
|
534
|
+
full_field_name = f"{event.event_schema}.{event.table}.{field_name}"
|
|
535
|
+
if full_field_name in required_fields:
|
|
536
|
+
filtered_row[field_name] = field_value
|
|
537
|
+
|
|
538
|
+
if filtered_row:
|
|
539
|
+
filtered_rows.append(filtered_row)
|
|
540
|
+
|
|
541
|
+
if filtered_rows:
|
|
542
|
+
new_event = type(event)(event.event_schema, event.table, filtered_rows)
|
|
543
|
+
new_event.fields = list(filtered_rows[0].keys())
|
|
544
|
+
new_event.index = event.index
|
|
545
|
+
return new_event
|
|
546
|
+
|
|
547
|
+
return None
|
|
File without changes
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fustor-source-mysql
|
|
3
|
+
Version: 0.1.9
|
|
4
|
+
Summary: A MySQL source for Fustor Agent
|
|
5
|
+
Author-email: Huajin Wang <wanghuajin999@163.com>
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: fustor-core
|
|
8
|
+
Requires-Dist: aiomysql>=0.2.0
|
|
9
|
+
Requires-Dist: mysql-replication>=1.0.9
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
12
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
13
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
14
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
src/fustor_source_mysql/__init__.py
|
|
4
|
+
src/fustor_source_mysql/py.typed
|
|
5
|
+
src/fustor_source_mysql.egg-info/PKG-INFO
|
|
6
|
+
src/fustor_source_mysql.egg-info/SOURCES.txt
|
|
7
|
+
src/fustor_source_mysql.egg-info/dependency_links.txt
|
|
8
|
+
src/fustor_source_mysql.egg-info/entry_points.txt
|
|
9
|
+
src/fustor_source_mysql.egg-info/requires.txt
|
|
10
|
+
src/fustor_source_mysql.egg-info/top_level.txt
|
|
11
|
+
tests/conftest.py
|
|
12
|
+
tests/docker-mysql.yml
|
|
13
|
+
tests/mysql-init.sql
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
fustor_source_mysql
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import pytest_asyncio
|
|
3
|
+
import os
|
|
4
|
+
import pymysql
|
|
5
|
+
|
|
6
|
+
from fustor_agent.app import App
|
|
7
|
+
|
|
8
|
+
@pytest_asyncio.fixture(scope="function")
|
|
9
|
+
async def test_db_setup(test_app_instance: App):
|
|
10
|
+
# ... (this fixture is unchanged)
|
|
11
|
+
source_config = test_app_instance.source_config_service.get_config('test-test')
|
|
12
|
+
if not source_config:
|
|
13
|
+
pytest.fail("Source config 'test-test' not found. Ensure config.yaml is correctly set up for tests.")
|
|
14
|
+
|
|
15
|
+
mysql_root_password = os.getenv("MYSQL_ROOT_PASSWORD", "")
|
|
16
|
+
if not mysql_root_password:
|
|
17
|
+
pytest.skip("MYSQL_ROOT_PASSWORD environment variable not set, skipping integration test.")
|
|
18
|
+
|
|
19
|
+
conn = pymysql.connect(
|
|
20
|
+
host=source_config.uri.split(':')[0],
|
|
21
|
+
port=int(source_config.uri.split(':')[1]),
|
|
22
|
+
user="root",
|
|
23
|
+
password=mysql_root_password,
|
|
24
|
+
database="testdb"
|
|
25
|
+
)
|
|
26
|
+
table_name = "test_snapshot_table"
|
|
27
|
+
with conn.cursor() as cursor:
|
|
28
|
+
cursor.execute(f"DROP TABLE IF EXISTS {table_name}")
|
|
29
|
+
cursor.execute(f"""
|
|
30
|
+
CREATE TABLE {table_name} (
|
|
31
|
+
id INT AUTO_INCREMENT PRIMARY KEY,
|
|
32
|
+
name VARCHAR(255) NOT NULL,
|
|
33
|
+
value INT
|
|
34
|
+
);
|
|
35
|
+
""")
|
|
36
|
+
cursor.executemany(f"INSERT INTO {table_name} (name, value) VALUES (%s, %s)", [('record_1', 100), ('record_2', 200), ('record_3', 300)])
|
|
37
|
+
conn.commit()
|
|
38
|
+
conn.close()
|
|
39
|
+
|
|
40
|
+
yield table_name
|
|
41
|
+
|
|
42
|
+
conn = pymysql.connect(
|
|
43
|
+
host=source_config.uri.split(':')[0],
|
|
44
|
+
port=int(source_config.uri.split(':')[1]),
|
|
45
|
+
user="root",
|
|
46
|
+
password=mysql_root_password,
|
|
47
|
+
database="testdb"
|
|
48
|
+
)
|
|
49
|
+
with conn.cursor() as cursor:
|
|
50
|
+
cursor.execute(f"DROP TABLE IF EXISTS {table_name}")
|
|
51
|
+
conn.commit()
|
|
52
|
+
conn.close()
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
services:
|
|
2
|
+
mysql-test:
|
|
3
|
+
image: mysql:8.2.0
|
|
4
|
+
command:
|
|
5
|
+
--upgrade=FORCE
|
|
6
|
+
--log-bin=mysql-bin
|
|
7
|
+
--server-id=1
|
|
8
|
+
--binlog-format=ROW
|
|
9
|
+
--log_replica_updates=ON
|
|
10
|
+
--binlog_expire_logs_seconds=0
|
|
11
|
+
container_name: fuagent-mysql-test
|
|
12
|
+
environment:
|
|
13
|
+
MYSQL_ROOT_PASSWORD: testroot
|
|
14
|
+
MYSQL_DATABASE: testdb
|
|
15
|
+
MYSQL_USER: testuser
|
|
16
|
+
MYSQL_PASSWORD: testpass
|
|
17
|
+
ports:
|
|
18
|
+
- "3307:3306"
|
|
19
|
+
healthcheck:
|
|
20
|
+
test: ["CMD", "sh", "-c", "mysqladmin ping -uroot -ptestroot --protocol=tcp"]
|
|
21
|
+
interval: 5s
|
|
22
|
+
timeout: 10s
|
|
23
|
+
retries: 60
|
|
24
|
+
volumes:
|
|
25
|
+
- ./mysql-init.sql:/docker-entrypoint-initdb.d/init.sql
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
-- 创建测试表
|
|
2
|
+
-- =================================================================
|
|
3
|
+
-- Table structure for user (用户信息表)
|
|
4
|
+
-- =================================================================
|
|
5
|
+
CREATE TABLE `user` (
|
|
6
|
+
`user_id` INT NOT NULL AUTO_INCREMENT COMMENT '用户主键ID',
|
|
7
|
+
`email` VARCHAR(128) NULL,
|
|
8
|
+
`openid_user_id` VARCHAR(50) NULL,
|
|
9
|
+
PRIMARY KEY (`user_id`)
|
|
10
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
|
11
|
+
|
|
12
|
+
-- =================================================================
|
|
13
|
+
-- Table structure for dataset (数据信息表)
|
|
14
|
+
-- =================================================================
|
|
15
|
+
CREATE TABLE `dataset` (
|
|
16
|
+
`dataset_id` INT NOT NULL AUTO_INCREMENT COMMENT '数据集主键ID',
|
|
17
|
+
`user_id` INT NULL COMMENT '外键,关联到用户表',
|
|
18
|
+
`status` TINYINT(1) NULL,
|
|
19
|
+
`create_time` DATETIME NULL,
|
|
20
|
+
`title` VARCHAR(300) NULL,
|
|
21
|
+
`description` MEDIUMTEXT NULL,
|
|
22
|
+
`is_deleted` TINYINT(1) DEFAULT 0,
|
|
23
|
+
`is_checked` INT NULL,
|
|
24
|
+
`is_review` INT NULL,
|
|
25
|
+
`backup_status` INT NULL,
|
|
26
|
+
`path` VARCHAR(60) NULL,
|
|
27
|
+
PRIMARY KEY (`dataset_id`),
|
|
28
|
+
CONSTRAINT `fk_dataset_user` FOREIGN KEY (`user_id`) REFERENCES `user` (`user_id`) ON DELETE SET NULL ON UPDATE CASCADE
|
|
29
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
|
30
|
+
|
|
31
|
+
-- =================================================================
|
|
32
|
+
-- Table structure for file (文件信息表)
|
|
33
|
+
-- =================================================================
|
|
34
|
+
CREATE TABLE `file` (
|
|
35
|
+
`file_id` INT NOT NULL AUTO_INCREMENT COMMENT '文件主键ID',
|
|
36
|
+
`dataset_id` INT NULL COMMENT '外键,关联到数据信息表(dataset)',
|
|
37
|
+
`file_name` VARCHAR(300) NULL,
|
|
38
|
+
`is_deleted` TINYINT(1) DEFAULT 0,
|
|
39
|
+
`relative_path` VARCHAR(500) NULL,
|
|
40
|
+
`file_size` VARCHAR(100) NULL,
|
|
41
|
+
`file_suffix` VARCHAR(300) NULL,
|
|
42
|
+
`file_code` VARCHAR(300) NULL,
|
|
43
|
+
`md5` VARCHAR(300) NULL,
|
|
44
|
+
`status` VARCHAR(30) NULL,
|
|
45
|
+
`create_time` DATETIME NULL,
|
|
46
|
+
PRIMARY KEY (`file_id`),
|
|
47
|
+
CONSTRAINT `fk_file_dataset` FOREIGN KEY (`dataset_id`) REFERENCES `dataset` (`dataset_id`) ON DELETE CASCADE ON UPDATE CASCADE
|
|
48
|
+
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
|
|
49
|
+
|
|
50
|
+
-- 启用 binlog需要依赖容器启动参数配置
|
|
51
|
+
SET GLOBAL binlog_format = ROW;
|
|
52
|
+
|
|
53
|
+
-- 创建必要的权限(测试专用)
|
|
54
|
+
GRANT REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'testuser'@'%';
|
|
55
|
+
GRANT SELECT, INSERT, UPDATE, DELETE ON testdb.* TO 'testuser'@'%';
|
|
56
|
+
FLUSH PRIVILEGES;
|
|
57
|
+
|
|
58
|
+
-- =================================================================
|
|
59
|
+
-- 插入测试数据 (5条/表)
|
|
60
|
+
-- =================================================================
|
|
61
|
+
|
|
62
|
+
-- user表数据
|
|
63
|
+
INSERT INTO `user` (`email`, `openid_user_id`) VALUES
|
|
64
|
+
('user1@example.com', 'openid1'),
|
|
65
|
+
('user2@example.com', 'openid2'),
|
|
66
|
+
('user3@example.com', 'openid3'),
|
|
67
|
+
('user4@example.com', 'openid4'),
|
|
68
|
+
('user5@example.com', 'openid5');
|
|
69
|
+
|
|
70
|
+
-- dataset表数据
|
|
71
|
+
INSERT INTO `dataset` (
|
|
72
|
+
`user_id`, `status`, `create_time`, `title`,
|
|
73
|
+
`description`, `is_checked`, `is_review`, `backup_status`, `path`
|
|
74
|
+
) VALUES
|
|
75
|
+
(1, 1, NOW(), 'Dataset 1', 'Description 1', 1, 0, 0, '/datasets/1'),
|
|
76
|
+
(2, 1, NOW(), 'Dataset 2', 'Description 2', 1, 0, 0, '/datasets/2'),
|
|
77
|
+
(3, 1, NOW(), 'Dataset 3', 'Description 3', 1, 0, 0, '/datasets/3'),
|
|
78
|
+
(4, 1, NOW(), 'Dataset 4', 'Description 4', 1, 0, 0, '/datasets/4'),
|
|
79
|
+
(5, 1, NOW(), 'Dataset 5', 'Description 5', 1, 0, 0, '/datasets/5');
|
|
80
|
+
|
|
81
|
+
-- file表数据
|
|
82
|
+
INSERT INTO `file` (
|
|
83
|
+
`dataset_id`, `file_name`, `relative_path`, `file_size`,
|
|
84
|
+
`file_suffix`, `file_code`, `md5`, `status`, `create_time`
|
|
85
|
+
) VALUES
|
|
86
|
+
(1, 'file1.txt', 'files/1', '1024', 'txt', 'F001', 'md51', 'active', NOW()),
|
|
87
|
+
(2, 'file2.jpg', 'files/2', '2048', 'jpg', 'F002', 'md52', 'active', NOW()),
|
|
88
|
+
(3, 'file3.pdf', 'files/3', '3072', 'pdf', 'F003', 'md53', 'active', NOW()),
|
|
89
|
+
(4, 'file4.png', 'files/4', '4096', 'png', 'F004', 'md54', 'active', NOW()),
|
|
90
|
+
(5, 'file5.doc', 'files/5', '5120', 'doc', 'F005', 'md55', 'active', NOW());
|