fustor-fusion 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fustor_fusion-0.1.7/PKG-INFO +21 -0
- fustor_fusion-0.1.7/docs/README.md +17 -0
- fustor_fusion-0.1.7/pyproject.toml +33 -0
- fustor_fusion-0.1.7/setup.cfg +4 -0
- fustor_fusion-0.1.7/src/fustor_fusion/__init__.py +0 -0
- fustor_fusion-0.1.7/src/fustor_fusion/api/ingestion.py +189 -0
- fustor_fusion-0.1.7/src/fustor_fusion/api/session.py +194 -0
- fustor_fusion-0.1.7/src/fustor_fusion/api/views.py +71 -0
- fustor_fusion-0.1.7/src/fustor_fusion/auth/__init__.py +0 -0
- fustor_fusion-0.1.7/src/fustor_fusion/auth/cache.py +21 -0
- fustor_fusion-0.1.7/src/fustor_fusion/auth/datastore_cache.py +24 -0
- fustor_fusion-0.1.7/src/fustor_fusion/auth/dependencies.py +21 -0
- fustor_fusion-0.1.7/src/fustor_fusion/cli.py +177 -0
- fustor_fusion-0.1.7/src/fustor_fusion/clients/__init__.py +0 -0
- fustor_fusion-0.1.7/src/fustor_fusion/config.py +28 -0
- fustor_fusion-0.1.7/src/fustor_fusion/core/session_manager.py +276 -0
- fustor_fusion-0.1.7/src/fustor_fusion/database/__init__.py +31 -0
- fustor_fusion-0.1.7/src/fustor_fusion/datastore_state_manager.py +178 -0
- fustor_fusion-0.1.7/src/fustor_fusion/in_memory_queue.py +271 -0
- fustor_fusion-0.1.7/src/fustor_fusion/jobs/__init__.py +0 -0
- fustor_fusion-0.1.7/src/fustor_fusion/jobs/sync_cache.py +47 -0
- fustor_fusion-0.1.7/src/fustor_fusion/main.py +155 -0
- fustor_fusion-0.1.7/src/fustor_fusion/models/__init__.py +0 -0
- fustor_fusion-0.1.7/src/fustor_fusion/models/event.py +9 -0
- fustor_fusion-0.1.7/src/fustor_fusion/parsers/__init__.py +0 -0
- fustor_fusion-0.1.7/src/fustor_fusion/parsers/background.py +179 -0
- fustor_fusion-0.1.7/src/fustor_fusion/parsers/file_directory_parser.py +353 -0
- fustor_fusion-0.1.7/src/fustor_fusion/parsers/manager.py +207 -0
- fustor_fusion-0.1.7/src/fustor_fusion/parsers/services.py +32 -0
- fustor_fusion-0.1.7/src/fustor_fusion/processing_manager.py +79 -0
- fustor_fusion-0.1.7/src/fustor_fusion/queue_integration.py +143 -0
- fustor_fusion-0.1.7/src/fustor_fusion/runtime.py +31 -0
- fustor_fusion-0.1.7/src/fustor_fusion/runtime_objects.py +9 -0
- fustor_fusion-0.1.7/src/fustor_fusion/view.html +766 -0
- fustor_fusion-0.1.7/src/fustor_fusion.egg-info/PKG-INFO +21 -0
- fustor_fusion-0.1.7/src/fustor_fusion.egg-info/SOURCES.txt +48 -0
- fustor_fusion-0.1.7/src/fustor_fusion.egg-info/dependency_links.txt +1 -0
- fustor_fusion-0.1.7/src/fustor_fusion.egg-info/entry_points.txt +2 -0
- fustor_fusion-0.1.7/src/fustor_fusion.egg-info/requires.txt +16 -0
- fustor_fusion-0.1.7/src/fustor_fusion.egg-info/top_level.txt +1 -0
- fustor_fusion-0.1.7/tests/conftest.py +20 -0
- fustor_fusion-0.1.7/tests/test_consumption_position.py +32 -0
- fustor_fusion-0.1.7/tests/test_parser_cache.py.bak +89 -0
- fustor_fusion-0.1.7/tests/test_parsers.py.bak +119 -0
- fustor_fusion-0.1.7/tests/test_parsers_api.py.bak +51 -0
- fustor_fusion-0.1.7/tests/test_parsers_integration.py.bak +112 -0
- fustor_fusion-0.1.7/tests/test_persistent_parsing.py +39 -0
- fustor_fusion-0.1.7/tests/test_session_cleanup.py +97 -0
- fustor_fusion-0.1.7/tests/test_session_concurrent.py +77 -0
- fustor_fusion-0.1.7/tests/test_session_management.py +315 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fustor-fusion
|
|
3
|
+
Version: 0.1.7
|
|
4
|
+
Summary: Fusion service for Fustor
|
|
5
|
+
Author-email: Huajin Wang <wanghuajin999@163.com>
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: click
|
|
8
|
+
Requires-Dist: fastapi[all]
|
|
9
|
+
Requires-Dist: uvicorn
|
|
10
|
+
Requires-Dist: pydantic
|
|
11
|
+
Requires-Dist: pydantic-settings
|
|
12
|
+
Requires-Dist: greenlet==3.2.2
|
|
13
|
+
Requires-Dist: sqlalchemy==2.0.41
|
|
14
|
+
Requires-Dist: httpx
|
|
15
|
+
Requires-Dist: fustor_registry_client
|
|
16
|
+
Requires-Dist: python-json-logger>=4.0.0
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
19
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
20
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Fustor Fusion 服务文档
|
|
2
|
+
|
|
3
|
+
本目录包含 Fustor Fusion 服务的相关文档。
|
|
4
|
+
|
|
5
|
+
## 概述
|
|
6
|
+
|
|
7
|
+
Fustor Fusion 服务负责从 Agent 接收数据,并进行数据摄取、处理和转换。它是 Fustor 平台中数据流的核心处理单元。
|
|
8
|
+
|
|
9
|
+
## 模块
|
|
10
|
+
|
|
11
|
+
* **API**: 提供数据摄取 API 接口,供 Agent 服务调用。
|
|
12
|
+
* **Processors**: 包含各种数据处理和转换逻辑。
|
|
13
|
+
* **Models**: 定义了 Fusion 服务中使用的所有数据模型。
|
|
14
|
+
|
|
15
|
+
## 更多信息
|
|
16
|
+
|
|
17
|
+
* **API 文档**: 访问 `/docs` (Swagger UI) 或 `/redoc` (ReDoc) 查看详细的 API 接口说明。
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "fustor-fusion"
|
|
3
|
+
dynamic = ["version"]
|
|
4
|
+
description = "Fusion service for Fustor"
|
|
5
|
+
requires-python = ">=3.11"
|
|
6
|
+
dependencies = [ "click", "fastapi[all]", "uvicorn", "pydantic", "pydantic-settings", "greenlet==3.2.2", "sqlalchemy==2.0.41", "httpx", "fustor_registry_client", "python-json-logger>=4.0.0",]
|
|
7
|
+
[[project.authors]]
|
|
8
|
+
name = "Huajin Wang"
|
|
9
|
+
email = "wanghuajin999@163.com"
|
|
10
|
+
|
|
11
|
+
[build-system]
|
|
12
|
+
requires = [ "setuptools>=61.0", "setuptools-scm>=8.0"]
|
|
13
|
+
build-backend = "setuptools.build_meta"
|
|
14
|
+
|
|
15
|
+
[tool.setuptools_scm]
|
|
16
|
+
root = ".."
|
|
17
|
+
version_scheme = "post-release"
|
|
18
|
+
local_scheme = "dirty-tag"
|
|
19
|
+
|
|
20
|
+
["project.urls"]
|
|
21
|
+
Homepage = "https://github.com/excelwang/fustor/fusion"
|
|
22
|
+
"Bug Tracker" = "https://github.com/excelwang/fustor/issues"
|
|
23
|
+
|
|
24
|
+
license = "MIT"
|
|
25
|
+
|
|
26
|
+
[project.optional-dependencies]
|
|
27
|
+
dev = [ "pytest>=8.0.0", "ruff>=0.1.0", "mypy>=1.0.0", "pytest-asyncio>=0.23.0",]
|
|
28
|
+
|
|
29
|
+
[project.scripts]
|
|
30
|
+
fustor-fusion = "fustor_fusion.cli:cli"
|
|
31
|
+
|
|
32
|
+
[tool.setuptools.packages.find]
|
|
33
|
+
where = [ "src",]
|
|
File without changes
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
from fastapi import APIRouter, Depends, status, HTTPException, Query, Request
|
|
2
|
+
from pydantic import BaseModel
|
|
3
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
4
|
+
import logging
|
|
5
|
+
from typing import List, Dict, Any
|
|
6
|
+
import time
|
|
7
|
+
|
|
8
|
+
from ..auth.dependencies import get_datastore_id_from_api_key
|
|
9
|
+
from ..runtime import datastore_event_manager
|
|
10
|
+
from ..core.session_manager import session_manager
|
|
11
|
+
from ..auth.datastore_cache import datastore_config_cache
|
|
12
|
+
from ..datastore_state_manager import datastore_state_manager
|
|
13
|
+
|
|
14
|
+
# Import the queue-based ingestion
|
|
15
|
+
from ..queue_integration import queue_based_ingestor, add_events_batch_to_queue, get_position_from_queue, update_position_in_queue
|
|
16
|
+
from fustor_event_model.models import EventBase, EventType # Import EventBase and EventType
|
|
17
|
+
|
|
18
|
+
from ..parsers.manager import ParserManager, get_directory_stats # CORRECTED
|
|
19
|
+
from datetime import datetime
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
ingestion_router = APIRouter(tags=["Ingestion"])
|
|
23
|
+
|
|
24
|
+
@ingestion_router.get("/stats", summary="Get global ingestion statistics")
|
|
25
|
+
async def get_global_stats():
|
|
26
|
+
"""
|
|
27
|
+
Get aggregated statistics across all active datastores for the monitoring dashboard.
|
|
28
|
+
"""
|
|
29
|
+
active_datastores = datastore_config_cache.get_all_active_datastores()
|
|
30
|
+
|
|
31
|
+
sources = []
|
|
32
|
+
total_volume = 0
|
|
33
|
+
min_latency_ms = None # Use None to indicate no data
|
|
34
|
+
oldest_dir_info = {"path": "N/A", "age_days": 0}
|
|
35
|
+
max_staleness_seconds = -1
|
|
36
|
+
|
|
37
|
+
now = datetime.now().timestamp()
|
|
38
|
+
|
|
39
|
+
for ds_config in active_datastores:
|
|
40
|
+
ds_id = ds_config.id
|
|
41
|
+
sources.append({
|
|
42
|
+
"id": ds_config.name or f"Datastore {ds_id}",
|
|
43
|
+
"type": "Fusion" # Or derive from config if available
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
stats = await get_directory_stats(datastore_id=ds_id)
|
|
48
|
+
|
|
49
|
+
# 1. Volume
|
|
50
|
+
total_volume += stats.get("total_files", 0)
|
|
51
|
+
|
|
52
|
+
# 2. Latency (Freshness)
|
|
53
|
+
# We want the SMALLEST gap between now and the latest file time (i.e., most fresh)
|
|
54
|
+
latest_ts = stats.get("latest_file_timestamp")
|
|
55
|
+
if latest_ts:
|
|
56
|
+
latency = (now - latest_ts) * 1000 # ms
|
|
57
|
+
# Latency can't be negative ideally, but clocks vary
|
|
58
|
+
latency = max(0, latency)
|
|
59
|
+
|
|
60
|
+
if min_latency_ms is None or latency < min_latency_ms:
|
|
61
|
+
min_latency_ms = latency
|
|
62
|
+
|
|
63
|
+
# 3. Staleness (Oldest Directory)
|
|
64
|
+
# We want the LARGEST gap between now and the oldest directory time
|
|
65
|
+
oldest = stats.get("oldest_directory")
|
|
66
|
+
if oldest and oldest.get("timestamp"):
|
|
67
|
+
age_seconds = now - oldest["timestamp"]
|
|
68
|
+
if age_seconds > max_staleness_seconds:
|
|
69
|
+
max_staleness_seconds = age_seconds
|
|
70
|
+
oldest_dir_info = {
|
|
71
|
+
"path": f"[{ds_config.name}] {oldest['path']}",
|
|
72
|
+
"age_days": int(age_seconds / 86400)
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
except Exception as e:
|
|
76
|
+
logger.error(f"Failed to get stats for datastore {ds_id}: {e}")
|
|
77
|
+
|
|
78
|
+
return {
|
|
79
|
+
"sources": sources,
|
|
80
|
+
"metrics": {
|
|
81
|
+
"total_volume": total_volume,
|
|
82
|
+
"latency_ms": int(min_latency_ms) if min_latency_ms is not None else 0,
|
|
83
|
+
"oldest_directory": oldest_dir_info
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@ingestion_router.get("/position", summary="获取同步源的最新检查点位置")
|
|
89
|
+
async def get_position(
|
|
90
|
+
session_id: str = Query(..., description="同步源的唯一 ID"),
|
|
91
|
+
datastore_id=Depends(get_datastore_id_from_api_key),
|
|
92
|
+
):
|
|
93
|
+
si = await session_manager.get_session_info(datastore_id, session_id)
|
|
94
|
+
if not si:
|
|
95
|
+
raise HTTPException(status_code=404, detail="Session not found")
|
|
96
|
+
|
|
97
|
+
# Get position from memory queue
|
|
98
|
+
position_index = await get_position_from_queue(datastore_id, si.task_id)
|
|
99
|
+
|
|
100
|
+
if position_index is not None:
|
|
101
|
+
return {"index": position_index}
|
|
102
|
+
else:
|
|
103
|
+
# No position found
|
|
104
|
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="该同步源的检查点未找到,建议触发快照同步")
|
|
105
|
+
|
|
106
|
+
# --- Pydantic Models for Ingestion ---
|
|
107
|
+
class BatchIngestPayload(BaseModel):
|
|
108
|
+
"""
|
|
109
|
+
Defines the generic payload for receiving a batch of events from any client.
|
|
110
|
+
"""
|
|
111
|
+
session_id: str
|
|
112
|
+
events: List[Dict[str, Any]] # Events are received as dicts
|
|
113
|
+
source_type: str # 'message' or 'snapshot'
|
|
114
|
+
# --- End Ingestion Models ---
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@ingestion_router.post(
|
|
118
|
+
"/",
|
|
119
|
+
summary="接收批量事件",
|
|
120
|
+
description="此端点用于从客户端接收批量事件。",
|
|
121
|
+
status_code=status.HTTP_204_NO_CONTENT
|
|
122
|
+
)
|
|
123
|
+
async def ingest_event_batch(
|
|
124
|
+
payload: BatchIngestPayload,
|
|
125
|
+
request: Request,
|
|
126
|
+
datastore_id=Depends(get_datastore_id_from_api_key),
|
|
127
|
+
):
|
|
128
|
+
si = await session_manager.get_session_info(datastore_id, payload.session_id)
|
|
129
|
+
if not si:
|
|
130
|
+
raise HTTPException(status_code=404, detail="Session not found")
|
|
131
|
+
await session_manager.keep_session_alive(
|
|
132
|
+
datastore_id,
|
|
133
|
+
payload.session_id,
|
|
134
|
+
client_ip=request.client.host # Assuming request is available here
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# NEW: Check for outdated snapshot pushes
|
|
138
|
+
datastore_config = datastore_config_cache.get_datastore_config(datastore_id)
|
|
139
|
+
if datastore_config and datastore_config.allow_concurrent_push and payload.source_type == 'snapshot':
|
|
140
|
+
is_authoritative = await datastore_state_manager.is_authoritative_session(datastore_id, payload.session_id)
|
|
141
|
+
if not is_authoritative:
|
|
142
|
+
logger.warning(f"Received snapshot push from outdated session '{payload.session_id}' for datastore {datastore_id}. Rejecting with 419.")
|
|
143
|
+
raise HTTPException(status_code=419, detail="A newer sync session has been started. This snapshot task is now obsolete and should stop.")
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
if payload.events:
|
|
147
|
+
latest_index = 0
|
|
148
|
+
event_objects_to_add: List[EventBase] = []
|
|
149
|
+
for event_dict in payload.events:
|
|
150
|
+
# Infer event_type, schema, table, index, and fields from the dict
|
|
151
|
+
# Default to UPDATE if not specified, as it's the most common for generic data
|
|
152
|
+
event_type = EventType(event_dict.get("event_type", EventType.UPDATE.value))
|
|
153
|
+
event_schema = event_dict.get("event_schema", "default_schema") # Use event_schema
|
|
154
|
+
table = event_dict.get("table", "default_table")
|
|
155
|
+
index = event_dict.get("index", -1)
|
|
156
|
+
rows = event_dict.get("rows", [])
|
|
157
|
+
fields = event_dict.get("fields", list(rows[0].keys()) if rows else [])
|
|
158
|
+
|
|
159
|
+
# Create EventBase object
|
|
160
|
+
event_obj = EventBase(
|
|
161
|
+
event_type=event_type,
|
|
162
|
+
event_schema=event_schema, # Use event_schema
|
|
163
|
+
table=table,
|
|
164
|
+
index=index,
|
|
165
|
+
rows=rows,
|
|
166
|
+
fields=fields
|
|
167
|
+
)
|
|
168
|
+
event_objects_to_add.append(event_obj)
|
|
169
|
+
|
|
170
|
+
if isinstance(index, int):
|
|
171
|
+
latest_index = max(latest_index, index)
|
|
172
|
+
|
|
173
|
+
# Update position in memory queue
|
|
174
|
+
if latest_index > 0:
|
|
175
|
+
await update_position_in_queue(datastore_id, si.task_id, latest_index)
|
|
176
|
+
|
|
177
|
+
# Add events to the in-memory queue for high-throughput ingestion
|
|
178
|
+
# Pass task_id for position tracking
|
|
179
|
+
total_events_added = await add_events_batch_to_queue(datastore_id, event_objects_to_add, si.task_id)
|
|
180
|
+
|
|
181
|
+
# Notify the background task that there are new events
|
|
182
|
+
try:
|
|
183
|
+
await datastore_event_manager.notify(datastore_id)
|
|
184
|
+
except Exception as e:
|
|
185
|
+
logger.error(f"Failed to notify event manager for datastore {datastore_id}: {e}", exc_info=True)
|
|
186
|
+
|
|
187
|
+
except Exception as e:
|
|
188
|
+
logger.error(f"处理批量事件失败 (task: {si.task_id}): {e}", exc_info=True)
|
|
189
|
+
raise HTTPException(status_code=500, detail=f"推送批量事件失败: {str(e)}")
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from fastapi import APIRouter, Depends, status, HTTPException, Header, Query, Request
|
|
2
|
+
from pydantic import BaseModel
|
|
3
|
+
import logging
|
|
4
|
+
from typing import List, Dict, Any, Optional
|
|
5
|
+
import time
|
|
6
|
+
import uuid
|
|
7
|
+
|
|
8
|
+
from ..auth.dependencies import get_datastore_id_from_api_key
|
|
9
|
+
from ..auth.datastore_cache import datastore_config_cache, DatastoreConfig
|
|
10
|
+
from ..core.session_manager import session_manager
|
|
11
|
+
from ..datastore_state_manager import datastore_state_manager
|
|
12
|
+
from ..parsers.manager import reset_directory_tree
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
session_router = APIRouter(tags=["Session Management"])
|
|
16
|
+
|
|
17
|
+
# --- Pydantic Models for Session Creation ---
|
|
18
|
+
class CreateSessionPayload(BaseModel):
|
|
19
|
+
"""Payload for creating a new session"""
|
|
20
|
+
task_id: str
|
|
21
|
+
client_info: Optional[Dict[str, Any]] = None
|
|
22
|
+
|
|
23
|
+
# --- End Session Creation Models ---
|
|
24
|
+
|
|
25
|
+
async def _should_allow_new_session(datastore_config: DatastoreConfig, datastore_id: int, task_id: str, session_id: str) -> bool:
|
|
26
|
+
"""
|
|
27
|
+
Determine if a new session should be allowed based on datastore configuration and current active sessions
|
|
28
|
+
"""
|
|
29
|
+
sessions = await session_manager.get_datastore_sessions(datastore_id)
|
|
30
|
+
active_session_ids = set(sessions.keys())
|
|
31
|
+
|
|
32
|
+
logger.debug(f"Checking if new session {session_id} for task {task_id} should be allowed on datastore {datastore_id}")
|
|
33
|
+
logger.debug(f"Current active sessions: {list(active_session_ids)}")
|
|
34
|
+
logger.debug(f"Datastore allows concurrent push: {datastore_config.allow_concurrent_push}")
|
|
35
|
+
|
|
36
|
+
if datastore_config.allow_concurrent_push:
|
|
37
|
+
# If concurrent pushes are allowed, we only care about sessions for the same task_id
|
|
38
|
+
current_task_sessions = [
|
|
39
|
+
s_info for s_id, s_info in sessions.items()
|
|
40
|
+
if s_info.task_id == task_id
|
|
41
|
+
]
|
|
42
|
+
logger.debug(f"Current sessions for task {task_id}: {len(current_task_sessions)}")
|
|
43
|
+
return len(current_task_sessions) == 0
|
|
44
|
+
else:
|
|
45
|
+
# If concurrent pushes are not allowed, the datastore acts as a global lock
|
|
46
|
+
locked_session_id = await datastore_state_manager.get_locked_session_id(datastore_id)
|
|
47
|
+
logger.debug(f"Datastore {datastore_id} is locked by session: {locked_session_id}")
|
|
48
|
+
|
|
49
|
+
if not locked_session_id:
|
|
50
|
+
# Not locked, so a new session is allowed
|
|
51
|
+
logger.debug(f"Datastore {datastore_id} is not locked. Allowing new session.")
|
|
52
|
+
return True
|
|
53
|
+
|
|
54
|
+
# The datastore is locked. Check if the lock is stale.
|
|
55
|
+
if locked_session_id not in active_session_ids:
|
|
56
|
+
# The session holding the lock is no longer in the active session manager.
|
|
57
|
+
# This indicates a stale lock (e.g., from a previous crashed instance).
|
|
58
|
+
logger.warning(f"Datastore {datastore_id} is locked by a stale session {locked_session_id} that is no longer active. Unlocking automatically.")
|
|
59
|
+
await datastore_state_manager.unlock_for_session(datastore_id, locked_session_id)
|
|
60
|
+
return True # Allow the new session to proceed
|
|
61
|
+
else:
|
|
62
|
+
# The datastore is locked by a currently active session.
|
|
63
|
+
logger.warning(f"Datastore {datastore_id} is locked by an active session {locked_session_id}. Denying new session {session_id}.")
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
@session_router.post("/",
|
|
67
|
+
summary="创建新的同步会话",
|
|
68
|
+
description="为新的同步任务创建会话ID并注册会话")
|
|
69
|
+
async def create_session(
|
|
70
|
+
payload: CreateSessionPayload,
|
|
71
|
+
request: Request,
|
|
72
|
+
datastore_id: int = Depends(get_datastore_id_from_api_key),
|
|
73
|
+
):
|
|
74
|
+
# Get datastore configuration from cache
|
|
75
|
+
datastore_config = datastore_config_cache.get_datastore_config(datastore_id)
|
|
76
|
+
|
|
77
|
+
if not datastore_config:
|
|
78
|
+
raise HTTPException(
|
|
79
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
80
|
+
detail=f"Configuration for Datastore {datastore_id} not found"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
session_id = str(uuid.uuid4())
|
|
84
|
+
|
|
85
|
+
should_allow_new_session = await _should_allow_new_session(
|
|
86
|
+
datastore_config, datastore_id, payload.task_id, session_id
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if not should_allow_new_session:
|
|
90
|
+
raise HTTPException(
|
|
91
|
+
status_code=status.HTTP_409_CONFLICT,
|
|
92
|
+
detail="New session cannot be created due to current active sessions"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if datastore_config.allow_concurrent_push:
|
|
96
|
+
logger.info(f"Datastore {datastore_id} allows concurrent push. Setting new authoritative session {session_id} and resetting parser.")
|
|
97
|
+
|
|
98
|
+
# 1. Set the new session as authoritative
|
|
99
|
+
await datastore_state_manager.set_authoritative_session(datastore_id, session_id)
|
|
100
|
+
|
|
101
|
+
# 2. Reset the parser state (memory only)
|
|
102
|
+
try:
|
|
103
|
+
await reset_directory_tree(datastore_id)
|
|
104
|
+
logger.info(f"Successfully reset parser for datastore {datastore_id}.")
|
|
105
|
+
except Exception as e:
|
|
106
|
+
logger.error(f"Exception during parser reset for datastore {datastore_id}: {e}", exc_info=True)
|
|
107
|
+
|
|
108
|
+
client_ip = request.client.host
|
|
109
|
+
|
|
110
|
+
await session_manager.create_session_entry(
|
|
111
|
+
datastore_id,
|
|
112
|
+
session_id,
|
|
113
|
+
task_id=payload.task_id,
|
|
114
|
+
client_ip=client_ip,
|
|
115
|
+
allow_concurrent_push=datastore_config.allow_concurrent_push,
|
|
116
|
+
session_timeout_seconds=datastore_config.session_timeout_seconds
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
if not datastore_config.allow_concurrent_push:
|
|
120
|
+
await datastore_state_manager.lock_for_session(datastore_id, session_id)
|
|
121
|
+
|
|
122
|
+
return {
|
|
123
|
+
"session_id": session_id,
|
|
124
|
+
"suggested_heartbeat_interval_seconds": max(1, datastore_config.session_timeout_seconds // 2),
|
|
125
|
+
"session_timeout_seconds": datastore_config.session_timeout_seconds
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
@session_router.post("/heartbeat", tags=["Session Management"], summary="会话心跳保活")
|
|
129
|
+
async def heartbeat(
|
|
130
|
+
request: Request,
|
|
131
|
+
datastore_id: int = Depends(get_datastore_id_from_api_key),
|
|
132
|
+
session_id: str = Header(..., description="会话ID"),
|
|
133
|
+
):
|
|
134
|
+
si = await session_manager.get_session_info(datastore_id, session_id)
|
|
135
|
+
|
|
136
|
+
if not si:
|
|
137
|
+
raise HTTPException(
|
|
138
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
139
|
+
detail=f"Session {session_id} not found"
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
is_locked_by_session = await datastore_state_manager.is_locked_by_session(datastore_id, session_id)
|
|
143
|
+
if not is_locked_by_session:
|
|
144
|
+
await datastore_state_manager.lock_for_session(datastore_id, session_id)
|
|
145
|
+
|
|
146
|
+
await session_manager.keep_session_alive(datastore_id, session_id, client_ip=request.client.host)
|
|
147
|
+
return {
|
|
148
|
+
"status": "ok",
|
|
149
|
+
"message": f"Session {session_id} heartbeat updated successfully",
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
@session_router.delete("/", tags=["Session Management"], summary="结束会话")
|
|
153
|
+
async def end_session(
|
|
154
|
+
datastore_id: int = Depends(get_datastore_id_from_api_key),
|
|
155
|
+
session_id: str = Header(..., description="会话ID"),
|
|
156
|
+
):
|
|
157
|
+
success = await session_manager.terminate_session(datastore_id, session_id)
|
|
158
|
+
|
|
159
|
+
if not success:
|
|
160
|
+
raise HTTPException(
|
|
161
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
162
|
+
detail=f"Session {session_id} not found"
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
await datastore_state_manager.unlock_for_session(datastore_id, session_id)
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
"status": "ok",
|
|
169
|
+
"message": f"Session {session_id} terminated successfully",
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
@session_router.get("/", tags=["Session Management"], summary="获取活动会话列表")
|
|
173
|
+
async def list_sessions(
|
|
174
|
+
datastore_id: int = Depends(get_datastore_id_from_api_key),
|
|
175
|
+
):
|
|
176
|
+
sessions = await session_manager.get_datastore_sessions(datastore_id)
|
|
177
|
+
|
|
178
|
+
session_list = []
|
|
179
|
+
for session_id, session_info in sessions.items():
|
|
180
|
+
session_list.append({
|
|
181
|
+
"session_id": session_id,
|
|
182
|
+
"task_id": session_info.task_id,
|
|
183
|
+
"client_ip": session_info.client_ip,
|
|
184
|
+
"last_activity": session_info.last_activity,
|
|
185
|
+
"created_at": session_info.created_at,
|
|
186
|
+
"allow_concurrent_push": session_info.allow_concurrent_push,
|
|
187
|
+
"session_timeout_seconds": session_info.session_timeout_seconds
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
return {
|
|
191
|
+
"datastore_id": datastore_id,
|
|
192
|
+
"active_sessions": session_list,
|
|
193
|
+
"count": len(session_list)
|
|
194
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""
|
|
2
|
+
API endpoints for the parsers module.
|
|
3
|
+
Provides REST endpoints to access parsed data views.
|
|
4
|
+
"""
|
|
5
|
+
from fastapi import APIRouter, Query, Header, Depends, status, HTTPException
|
|
6
|
+
import logging
|
|
7
|
+
from typing import Dict, Any, Optional
|
|
8
|
+
|
|
9
|
+
from ..parsers.manager import get_directory_tree, search_files, get_directory_stats, reset_directory_tree
|
|
10
|
+
from ..auth.dependencies import get_datastore_id_from_api_key
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
parser_router = APIRouter(tags=["Parsers - Data Views"])
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@parser_router.get("/fs/tree", summary="Get directory tree structure")
|
|
18
|
+
async def get_directory_tree_api(
|
|
19
|
+
path: str = Query("/", description="Directory path to retrieve (default: '/')"),
|
|
20
|
+
datastore_id: int = Depends(get_datastore_id_from_api_key)
|
|
21
|
+
) -> Optional[Dict[str, Any]]:
|
|
22
|
+
"""Get the directory structure tree starting from the specified path."""
|
|
23
|
+
logger.info(f"API request for directory tree: path={path}, datastore_id={datastore_id}")
|
|
24
|
+
result = await get_directory_tree(path, datastore_id=datastore_id)
|
|
25
|
+
logger.info(f"Directory tree result for path '{path}': {result}")
|
|
26
|
+
return result
|
|
27
|
+
|
|
28
|
+
@parser_router.get("/fs/search", summary="Search for files by pattern")
|
|
29
|
+
async def search_files_api(
|
|
30
|
+
pattern: str = Query(..., description="Search pattern to match in file paths"),
|
|
31
|
+
datastore_id: int = Depends(get_datastore_id_from_api_key)
|
|
32
|
+
) -> list:
|
|
33
|
+
"""Search for files matching the specified pattern."""
|
|
34
|
+
logger.info(f"API request for file search: pattern={pattern}, datastore_id={datastore_id}")
|
|
35
|
+
result = await search_files(pattern, datastore_id=datastore_id)
|
|
36
|
+
logger.info(f"File search result for pattern '{pattern}': found {len(result)} files")
|
|
37
|
+
return result
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@parser_router.get("/fs/stats", summary="Get statistics about the directory structure")
|
|
41
|
+
async def get_directory_stats_api(
|
|
42
|
+
datastore_id: int = Depends(get_datastore_id_from_api_key)
|
|
43
|
+
) -> Dict[str, Any]:
|
|
44
|
+
"""Get statistics about the current directory structure."""
|
|
45
|
+
logger.info(f"API request for directory stats: datastore_id={datastore_id}")
|
|
46
|
+
result = await get_directory_stats(datastore_id=datastore_id)
|
|
47
|
+
logger.info(f"Directory stats result: {result}")
|
|
48
|
+
return result
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@parser_router.delete("/fs/reset",
|
|
52
|
+
summary="Reset directory tree structure",
|
|
53
|
+
description="Clear all directory entries for a specific datastore",
|
|
54
|
+
status_code=status.HTTP_204_NO_CONTENT
|
|
55
|
+
)
|
|
56
|
+
async def reset_directory_tree_api(
|
|
57
|
+
datastore_id: int = Depends(get_datastore_id_from_api_key)
|
|
58
|
+
) -> None:
|
|
59
|
+
"""
|
|
60
|
+
Reset the directory tree structure by clearing all entries for a specific datastore.
|
|
61
|
+
"""
|
|
62
|
+
logger.info(f"API request to reset directory tree for datastore {datastore_id}")
|
|
63
|
+
success = await reset_directory_tree(datastore_id)
|
|
64
|
+
|
|
65
|
+
if not success:
|
|
66
|
+
logger.error(f"Failed to reset directory tree for datastore {datastore_id}")
|
|
67
|
+
raise HTTPException(
|
|
68
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
69
|
+
detail="Failed to reset directory tree"
|
|
70
|
+
)
|
|
71
|
+
logger.info(f"Successfully reset directory tree for datastore {datastore_id}")
|
|
File without changes
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from typing import Dict, Optional, List, Any
|
|
2
|
+
from fustor_fusion_sdk.interfaces import ApiKeyCacheInterface # Import the interface
|
|
3
|
+
|
|
4
|
+
class ApiKeyCache(ApiKeyCacheInterface): # Inherit from the interface
|
|
5
|
+
def __init__(self):
|
|
6
|
+
self._cache: Dict[str, int] = {}
|
|
7
|
+
|
|
8
|
+
def set_cache(self, api_keys_data: List[Dict[str, Any]]):
|
|
9
|
+
"""
|
|
10
|
+
Sets the entire cache from a list of API key data.
|
|
11
|
+
Expected format: [{'key': '...', 'datastore_id': 1}, ...]
|
|
12
|
+
"""
|
|
13
|
+
new_cache = {item['key']: item['datastore_id'] for item in api_keys_data if 'key' in item and 'datastore_id' in item}
|
|
14
|
+
self._cache = new_cache
|
|
15
|
+
def get_datastore_id(self, api_key: str) -> Optional[int]:
|
|
16
|
+
"""
|
|
17
|
+
Retrieves the datastore_id for a given API key.
|
|
18
|
+
"""
|
|
19
|
+
return self._cache.get(api_key)
|
|
20
|
+
|
|
21
|
+
api_key_cache = ApiKeyCache()
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from typing import Dict, Optional, List, Any
|
|
2
|
+
from fustor_common.models import DatastoreConfig
|
|
3
|
+
from fustor_registry_client.models import ClientDatastoreConfigResponse
|
|
4
|
+
from fustor_fusion_sdk.interfaces import DatastoreConfigCacheInterface # Import the interface
|
|
5
|
+
|
|
6
|
+
class DatastoreConfigCache(DatastoreConfigCacheInterface): # Inherit from the interface
|
|
7
|
+
def __init__(self):
|
|
8
|
+
self._cache: Dict[int, DatastoreConfig] = {}
|
|
9
|
+
|
|
10
|
+
def set_cache(self, datastore_configs_data: List[ClientDatastoreConfigResponse]): # Changed type hint
|
|
11
|
+
"""
|
|
12
|
+
Sets the entire cache from a list of datastore config data.
|
|
13
|
+
"""
|
|
14
|
+
new_cache = {item.datastore_id: item for item in datastore_configs_data} # Store the full ClientDatastoreConfigResponse object
|
|
15
|
+
self._cache = new_cache
|
|
16
|
+
print(f"Datastore config cache updated. Total datastores: {len(self._cache)}")
|
|
17
|
+
|
|
18
|
+
def get_datastore_config(self, datastore_id: int) -> Optional[DatastoreConfig]:
|
|
19
|
+
"""
|
|
20
|
+
Retrieves the configuration for a given datastore.
|
|
21
|
+
"""
|
|
22
|
+
return self._cache.get(datastore_id)
|
|
23
|
+
|
|
24
|
+
datastore_config_cache = DatastoreConfigCache()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from fastapi import Header, HTTPException, status, Depends
|
|
2
|
+
from typing import Optional
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from .cache import api_key_cache
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
async def get_datastore_id_from_api_key(x_api_key: str = Header(..., alias="X-API-Key")) -> int:
|
|
10
|
+
"""
|
|
11
|
+
Retrieves the datastore_id from the in-memory API key cache.
|
|
12
|
+
"""
|
|
13
|
+
logger.debug(f"Received X-API-Key: {x_api_key}")
|
|
14
|
+
datastore_id = api_key_cache.get_datastore_id(x_api_key)
|
|
15
|
+
logger.debug(f"Resolved datastore_id for key '{x_api_key[:5]}...': {datastore_id}")
|
|
16
|
+
if datastore_id is None:
|
|
17
|
+
raise HTTPException(
|
|
18
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
19
|
+
detail="Invalid or inactive X-API-Key"
|
|
20
|
+
)
|
|
21
|
+
return datastore_id
|