fustor-fusion 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. fustor_fusion-0.1.7/PKG-INFO +21 -0
  2. fustor_fusion-0.1.7/docs/README.md +17 -0
  3. fustor_fusion-0.1.7/pyproject.toml +33 -0
  4. fustor_fusion-0.1.7/setup.cfg +4 -0
  5. fustor_fusion-0.1.7/src/fustor_fusion/__init__.py +0 -0
  6. fustor_fusion-0.1.7/src/fustor_fusion/api/ingestion.py +189 -0
  7. fustor_fusion-0.1.7/src/fustor_fusion/api/session.py +194 -0
  8. fustor_fusion-0.1.7/src/fustor_fusion/api/views.py +71 -0
  9. fustor_fusion-0.1.7/src/fustor_fusion/auth/__init__.py +0 -0
  10. fustor_fusion-0.1.7/src/fustor_fusion/auth/cache.py +21 -0
  11. fustor_fusion-0.1.7/src/fustor_fusion/auth/datastore_cache.py +24 -0
  12. fustor_fusion-0.1.7/src/fustor_fusion/auth/dependencies.py +21 -0
  13. fustor_fusion-0.1.7/src/fustor_fusion/cli.py +177 -0
  14. fustor_fusion-0.1.7/src/fustor_fusion/clients/__init__.py +0 -0
  15. fustor_fusion-0.1.7/src/fustor_fusion/config.py +28 -0
  16. fustor_fusion-0.1.7/src/fustor_fusion/core/session_manager.py +276 -0
  17. fustor_fusion-0.1.7/src/fustor_fusion/database/__init__.py +31 -0
  18. fustor_fusion-0.1.7/src/fustor_fusion/datastore_state_manager.py +178 -0
  19. fustor_fusion-0.1.7/src/fustor_fusion/in_memory_queue.py +271 -0
  20. fustor_fusion-0.1.7/src/fustor_fusion/jobs/__init__.py +0 -0
  21. fustor_fusion-0.1.7/src/fustor_fusion/jobs/sync_cache.py +47 -0
  22. fustor_fusion-0.1.7/src/fustor_fusion/main.py +155 -0
  23. fustor_fusion-0.1.7/src/fustor_fusion/models/__init__.py +0 -0
  24. fustor_fusion-0.1.7/src/fustor_fusion/models/event.py +9 -0
  25. fustor_fusion-0.1.7/src/fustor_fusion/parsers/__init__.py +0 -0
  26. fustor_fusion-0.1.7/src/fustor_fusion/parsers/background.py +179 -0
  27. fustor_fusion-0.1.7/src/fustor_fusion/parsers/file_directory_parser.py +353 -0
  28. fustor_fusion-0.1.7/src/fustor_fusion/parsers/manager.py +207 -0
  29. fustor_fusion-0.1.7/src/fustor_fusion/parsers/services.py +32 -0
  30. fustor_fusion-0.1.7/src/fustor_fusion/processing_manager.py +79 -0
  31. fustor_fusion-0.1.7/src/fustor_fusion/queue_integration.py +143 -0
  32. fustor_fusion-0.1.7/src/fustor_fusion/runtime.py +31 -0
  33. fustor_fusion-0.1.7/src/fustor_fusion/runtime_objects.py +9 -0
  34. fustor_fusion-0.1.7/src/fustor_fusion/view.html +766 -0
  35. fustor_fusion-0.1.7/src/fustor_fusion.egg-info/PKG-INFO +21 -0
  36. fustor_fusion-0.1.7/src/fustor_fusion.egg-info/SOURCES.txt +48 -0
  37. fustor_fusion-0.1.7/src/fustor_fusion.egg-info/dependency_links.txt +1 -0
  38. fustor_fusion-0.1.7/src/fustor_fusion.egg-info/entry_points.txt +2 -0
  39. fustor_fusion-0.1.7/src/fustor_fusion.egg-info/requires.txt +16 -0
  40. fustor_fusion-0.1.7/src/fustor_fusion.egg-info/top_level.txt +1 -0
  41. fustor_fusion-0.1.7/tests/conftest.py +20 -0
  42. fustor_fusion-0.1.7/tests/test_consumption_position.py +32 -0
  43. fustor_fusion-0.1.7/tests/test_parser_cache.py.bak +89 -0
  44. fustor_fusion-0.1.7/tests/test_parsers.py.bak +119 -0
  45. fustor_fusion-0.1.7/tests/test_parsers_api.py.bak +51 -0
  46. fustor_fusion-0.1.7/tests/test_parsers_integration.py.bak +112 -0
  47. fustor_fusion-0.1.7/tests/test_persistent_parsing.py +39 -0
  48. fustor_fusion-0.1.7/tests/test_session_cleanup.py +97 -0
  49. fustor_fusion-0.1.7/tests/test_session_concurrent.py +77 -0
  50. fustor_fusion-0.1.7/tests/test_session_management.py +315 -0
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.4
2
+ Name: fustor-fusion
3
+ Version: 0.1.7
4
+ Summary: Fusion service for Fustor
5
+ Author-email: Huajin Wang <wanghuajin999@163.com>
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: click
8
+ Requires-Dist: fastapi[all]
9
+ Requires-Dist: uvicorn
10
+ Requires-Dist: pydantic
11
+ Requires-Dist: pydantic-settings
12
+ Requires-Dist: greenlet==3.2.2
13
+ Requires-Dist: sqlalchemy==2.0.41
14
+ Requires-Dist: httpx
15
+ Requires-Dist: fustor_registry_client
16
+ Requires-Dist: python-json-logger>=4.0.0
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
19
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
20
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
21
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
@@ -0,0 +1,17 @@
1
+ # Fustor Fusion 服务文档
2
+
3
+ 本目录包含 Fustor Fusion 服务的相关文档。
4
+
5
+ ## 概述
6
+
7
+ Fustor Fusion 服务负责从 Agent 接收数据,并进行数据摄取、处理和转换。它是 Fustor 平台中数据流的核心处理单元。
8
+
9
+ ## 模块
10
+
11
+ * **API**: 提供数据摄取 API 接口,供 Agent 服务调用。
12
+ * **Processors**: 包含各种数据处理和转换逻辑。
13
+ * **Models**: 定义了 Fusion 服务中使用的所有数据模型。
14
+
15
+ ## 更多信息
16
+
17
+ * **API 文档**: 访问 `/docs` (Swagger UI) 或 `/redoc` (ReDoc) 查看详细的 API 接口说明。
@@ -0,0 +1,33 @@
1
+ [project]
2
+ name = "fustor-fusion"
3
+ dynamic = ["version"]
4
+ description = "Fusion service for Fustor"
5
+ requires-python = ">=3.11"
6
+ dependencies = [ "click", "fastapi[all]", "uvicorn", "pydantic", "pydantic-settings", "greenlet==3.2.2", "sqlalchemy==2.0.41", "httpx", "fustor_registry_client", "python-json-logger>=4.0.0",]
7
+ [[project.authors]]
8
+ name = "Huajin Wang"
9
+ email = "wanghuajin999@163.com"
10
+
11
+ [build-system]
12
+ requires = [ "setuptools>=61.0", "setuptools-scm>=8.0"]
13
+ build-backend = "setuptools.build_meta"
14
+
15
+ [tool.setuptools_scm]
16
+ root = ".."
17
+ version_scheme = "post-release"
18
+ local_scheme = "dirty-tag"
19
+
20
+ ["project.urls"]
21
+ Homepage = "https://github.com/excelwang/fustor/fusion"
22
+ "Bug Tracker" = "https://github.com/excelwang/fustor/issues"
23
+
24
+ license = "MIT"
25
+
26
+ [project.optional-dependencies]
27
+ dev = [ "pytest>=8.0.0", "ruff>=0.1.0", "mypy>=1.0.0", "pytest-asyncio>=0.23.0",]
28
+
29
+ [project.scripts]
30
+ fustor-fusion = "fustor_fusion.cli:cli"
31
+
32
+ [tool.setuptools.packages.find]
33
+ where = [ "src",]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
File without changes
@@ -0,0 +1,189 @@
1
+ from fastapi import APIRouter, Depends, status, HTTPException, Query, Request
2
+ from pydantic import BaseModel
3
+ from sqlalchemy.ext.asyncio import AsyncSession
4
+ import logging
5
+ from typing import List, Dict, Any
6
+ import time
7
+
8
+ from ..auth.dependencies import get_datastore_id_from_api_key
9
+ from ..runtime import datastore_event_manager
10
+ from ..core.session_manager import session_manager
11
+ from ..auth.datastore_cache import datastore_config_cache
12
+ from ..datastore_state_manager import datastore_state_manager
13
+
14
+ # Import the queue-based ingestion
15
+ from ..queue_integration import queue_based_ingestor, add_events_batch_to_queue, get_position_from_queue, update_position_in_queue
16
+ from fustor_event_model.models import EventBase, EventType # Import EventBase and EventType
17
+
18
+ from ..parsers.manager import ParserManager, get_directory_stats # CORRECTED
19
+ from datetime import datetime
20
+
21
+ logger = logging.getLogger(__name__)
22
+ ingestion_router = APIRouter(tags=["Ingestion"])
23
+
24
+ @ingestion_router.get("/stats", summary="Get global ingestion statistics")
25
+ async def get_global_stats():
26
+ """
27
+ Get aggregated statistics across all active datastores for the monitoring dashboard.
28
+ """
29
+ active_datastores = datastore_config_cache.get_all_active_datastores()
30
+
31
+ sources = []
32
+ total_volume = 0
33
+ min_latency_ms = None # Use None to indicate no data
34
+ oldest_dir_info = {"path": "N/A", "age_days": 0}
35
+ max_staleness_seconds = -1
36
+
37
+ now = datetime.now().timestamp()
38
+
39
+ for ds_config in active_datastores:
40
+ ds_id = ds_config.id
41
+ sources.append({
42
+ "id": ds_config.name or f"Datastore {ds_id}",
43
+ "type": "Fusion" # Or derive from config if available
44
+ })
45
+
46
+ try:
47
+ stats = await get_directory_stats(datastore_id=ds_id)
48
+
49
+ # 1. Volume
50
+ total_volume += stats.get("total_files", 0)
51
+
52
+ # 2. Latency (Freshness)
53
+ # We want the SMALLEST gap between now and the latest file time (i.e., most fresh)
54
+ latest_ts = stats.get("latest_file_timestamp")
55
+ if latest_ts:
56
+ latency = (now - latest_ts) * 1000 # ms
57
+ # Latency can't be negative ideally, but clocks vary
58
+ latency = max(0, latency)
59
+
60
+ if min_latency_ms is None or latency < min_latency_ms:
61
+ min_latency_ms = latency
62
+
63
+ # 3. Staleness (Oldest Directory)
64
+ # We want the LARGEST gap between now and the oldest directory time
65
+ oldest = stats.get("oldest_directory")
66
+ if oldest and oldest.get("timestamp"):
67
+ age_seconds = now - oldest["timestamp"]
68
+ if age_seconds > max_staleness_seconds:
69
+ max_staleness_seconds = age_seconds
70
+ oldest_dir_info = {
71
+ "path": f"[{ds_config.name}] {oldest['path']}",
72
+ "age_days": int(age_seconds / 86400)
73
+ }
74
+
75
+ except Exception as e:
76
+ logger.error(f"Failed to get stats for datastore {ds_id}: {e}")
77
+
78
+ return {
79
+ "sources": sources,
80
+ "metrics": {
81
+ "total_volume": total_volume,
82
+ "latency_ms": int(min_latency_ms) if min_latency_ms is not None else 0,
83
+ "oldest_directory": oldest_dir_info
84
+ }
85
+ }
86
+
87
+
88
+ @ingestion_router.get("/position", summary="获取同步源的最新检查点位置")
89
+ async def get_position(
90
+ session_id: str = Query(..., description="同步源的唯一 ID"),
91
+ datastore_id=Depends(get_datastore_id_from_api_key),
92
+ ):
93
+ si = await session_manager.get_session_info(datastore_id, session_id)
94
+ if not si:
95
+ raise HTTPException(status_code=404, detail="Session not found")
96
+
97
+ # Get position from memory queue
98
+ position_index = await get_position_from_queue(datastore_id, si.task_id)
99
+
100
+ if position_index is not None:
101
+ return {"index": position_index}
102
+ else:
103
+ # No position found
104
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="该同步源的检查点未找到,建议触发快照同步")
105
+
106
+ # --- Pydantic Models for Ingestion ---
107
+ class BatchIngestPayload(BaseModel):
108
+ """
109
+ Defines the generic payload for receiving a batch of events from any client.
110
+ """
111
+ session_id: str
112
+ events: List[Dict[str, Any]] # Events are received as dicts
113
+ source_type: str # 'message' or 'snapshot'
114
+ # --- End Ingestion Models ---
115
+
116
+
117
+ @ingestion_router.post(
118
+ "/",
119
+ summary="接收批量事件",
120
+ description="此端点用于从客户端接收批量事件。",
121
+ status_code=status.HTTP_204_NO_CONTENT
122
+ )
123
+ async def ingest_event_batch(
124
+ payload: BatchIngestPayload,
125
+ request: Request,
126
+ datastore_id=Depends(get_datastore_id_from_api_key),
127
+ ):
128
+ si = await session_manager.get_session_info(datastore_id, payload.session_id)
129
+ if not si:
130
+ raise HTTPException(status_code=404, detail="Session not found")
131
+ await session_manager.keep_session_alive(
132
+ datastore_id,
133
+ payload.session_id,
134
+ client_ip=request.client.host # Assuming request is available here
135
+ )
136
+
137
+ # NEW: Check for outdated snapshot pushes
138
+ datastore_config = datastore_config_cache.get_datastore_config(datastore_id)
139
+ if datastore_config and datastore_config.allow_concurrent_push and payload.source_type == 'snapshot':
140
+ is_authoritative = await datastore_state_manager.is_authoritative_session(datastore_id, payload.session_id)
141
+ if not is_authoritative:
142
+ logger.warning(f"Received snapshot push from outdated session '{payload.session_id}' for datastore {datastore_id}. Rejecting with 419.")
143
+ raise HTTPException(status_code=419, detail="A newer sync session has been started. This snapshot task is now obsolete and should stop.")
144
+
145
+ try:
146
+ if payload.events:
147
+ latest_index = 0
148
+ event_objects_to_add: List[EventBase] = []
149
+ for event_dict in payload.events:
150
+ # Infer event_type, schema, table, index, and fields from the dict
151
+ # Default to UPDATE if not specified, as it's the most common for generic data
152
+ event_type = EventType(event_dict.get("event_type", EventType.UPDATE.value))
153
+ event_schema = event_dict.get("event_schema", "default_schema") # Use event_schema
154
+ table = event_dict.get("table", "default_table")
155
+ index = event_dict.get("index", -1)
156
+ rows = event_dict.get("rows", [])
157
+ fields = event_dict.get("fields", list(rows[0].keys()) if rows else [])
158
+
159
+ # Create EventBase object
160
+ event_obj = EventBase(
161
+ event_type=event_type,
162
+ event_schema=event_schema, # Use event_schema
163
+ table=table,
164
+ index=index,
165
+ rows=rows,
166
+ fields=fields
167
+ )
168
+ event_objects_to_add.append(event_obj)
169
+
170
+ if isinstance(index, int):
171
+ latest_index = max(latest_index, index)
172
+
173
+ # Update position in memory queue
174
+ if latest_index > 0:
175
+ await update_position_in_queue(datastore_id, si.task_id, latest_index)
176
+
177
+ # Add events to the in-memory queue for high-throughput ingestion
178
+ # Pass task_id for position tracking
179
+ total_events_added = await add_events_batch_to_queue(datastore_id, event_objects_to_add, si.task_id)
180
+
181
+ # Notify the background task that there are new events
182
+ try:
183
+ await datastore_event_manager.notify(datastore_id)
184
+ except Exception as e:
185
+ logger.error(f"Failed to notify event manager for datastore {datastore_id}: {e}", exc_info=True)
186
+
187
+ except Exception as e:
188
+ logger.error(f"处理批量事件失败 (task: {si.task_id}): {e}", exc_info=True)
189
+ raise HTTPException(status_code=500, detail=f"推送批量事件失败: {str(e)}")
@@ -0,0 +1,194 @@
1
+ from fastapi import APIRouter, Depends, status, HTTPException, Header, Query, Request
2
+ from pydantic import BaseModel
3
+ import logging
4
+ from typing import List, Dict, Any, Optional
5
+ import time
6
+ import uuid
7
+
8
+ from ..auth.dependencies import get_datastore_id_from_api_key
9
+ from ..auth.datastore_cache import datastore_config_cache, DatastoreConfig
10
+ from ..core.session_manager import session_manager
11
+ from ..datastore_state_manager import datastore_state_manager
12
+ from ..parsers.manager import reset_directory_tree
13
+
14
+ logger = logging.getLogger(__name__)
15
+ session_router = APIRouter(tags=["Session Management"])
16
+
17
+ # --- Pydantic Models for Session Creation ---
18
+ class CreateSessionPayload(BaseModel):
19
+ """Payload for creating a new session"""
20
+ task_id: str
21
+ client_info: Optional[Dict[str, Any]] = None
22
+
23
+ # --- End Session Creation Models ---
24
+
25
+ async def _should_allow_new_session(datastore_config: DatastoreConfig, datastore_id: int, task_id: str, session_id: str) -> bool:
26
+ """
27
+ Determine if a new session should be allowed based on datastore configuration and current active sessions
28
+ """
29
+ sessions = await session_manager.get_datastore_sessions(datastore_id)
30
+ active_session_ids = set(sessions.keys())
31
+
32
+ logger.debug(f"Checking if new session {session_id} for task {task_id} should be allowed on datastore {datastore_id}")
33
+ logger.debug(f"Current active sessions: {list(active_session_ids)}")
34
+ logger.debug(f"Datastore allows concurrent push: {datastore_config.allow_concurrent_push}")
35
+
36
+ if datastore_config.allow_concurrent_push:
37
+ # If concurrent pushes are allowed, we only care about sessions for the same task_id
38
+ current_task_sessions = [
39
+ s_info for s_id, s_info in sessions.items()
40
+ if s_info.task_id == task_id
41
+ ]
42
+ logger.debug(f"Current sessions for task {task_id}: {len(current_task_sessions)}")
43
+ return len(current_task_sessions) == 0
44
+ else:
45
+ # If concurrent pushes are not allowed, the datastore acts as a global lock
46
+ locked_session_id = await datastore_state_manager.get_locked_session_id(datastore_id)
47
+ logger.debug(f"Datastore {datastore_id} is locked by session: {locked_session_id}")
48
+
49
+ if not locked_session_id:
50
+ # Not locked, so a new session is allowed
51
+ logger.debug(f"Datastore {datastore_id} is not locked. Allowing new session.")
52
+ return True
53
+
54
+ # The datastore is locked. Check if the lock is stale.
55
+ if locked_session_id not in active_session_ids:
56
+ # The session holding the lock is no longer in the active session manager.
57
+ # This indicates a stale lock (e.g., from a previous crashed instance).
58
+ logger.warning(f"Datastore {datastore_id} is locked by a stale session {locked_session_id} that is no longer active. Unlocking automatically.")
59
+ await datastore_state_manager.unlock_for_session(datastore_id, locked_session_id)
60
+ return True # Allow the new session to proceed
61
+ else:
62
+ # The datastore is locked by a currently active session.
63
+ logger.warning(f"Datastore {datastore_id} is locked by an active session {locked_session_id}. Denying new session {session_id}.")
64
+ return False
65
+
66
+ @session_router.post("/",
67
+ summary="创建新的同步会话",
68
+ description="为新的同步任务创建会话ID并注册会话")
69
+ async def create_session(
70
+ payload: CreateSessionPayload,
71
+ request: Request,
72
+ datastore_id: int = Depends(get_datastore_id_from_api_key),
73
+ ):
74
+ # Get datastore configuration from cache
75
+ datastore_config = datastore_config_cache.get_datastore_config(datastore_id)
76
+
77
+ if not datastore_config:
78
+ raise HTTPException(
79
+ status_code=status.HTTP_404_NOT_FOUND,
80
+ detail=f"Configuration for Datastore {datastore_id} not found"
81
+ )
82
+
83
+ session_id = str(uuid.uuid4())
84
+
85
+ should_allow_new_session = await _should_allow_new_session(
86
+ datastore_config, datastore_id, payload.task_id, session_id
87
+ )
88
+
89
+ if not should_allow_new_session:
90
+ raise HTTPException(
91
+ status_code=status.HTTP_409_CONFLICT,
92
+ detail="New session cannot be created due to current active sessions"
93
+ )
94
+
95
+ if datastore_config.allow_concurrent_push:
96
+ logger.info(f"Datastore {datastore_id} allows concurrent push. Setting new authoritative session {session_id} and resetting parser.")
97
+
98
+ # 1. Set the new session as authoritative
99
+ await datastore_state_manager.set_authoritative_session(datastore_id, session_id)
100
+
101
+ # 2. Reset the parser state (memory only)
102
+ try:
103
+ await reset_directory_tree(datastore_id)
104
+ logger.info(f"Successfully reset parser for datastore {datastore_id}.")
105
+ except Exception as e:
106
+ logger.error(f"Exception during parser reset for datastore {datastore_id}: {e}", exc_info=True)
107
+
108
+ client_ip = request.client.host
109
+
110
+ await session_manager.create_session_entry(
111
+ datastore_id,
112
+ session_id,
113
+ task_id=payload.task_id,
114
+ client_ip=client_ip,
115
+ allow_concurrent_push=datastore_config.allow_concurrent_push,
116
+ session_timeout_seconds=datastore_config.session_timeout_seconds
117
+ )
118
+
119
+ if not datastore_config.allow_concurrent_push:
120
+ await datastore_state_manager.lock_for_session(datastore_id, session_id)
121
+
122
+ return {
123
+ "session_id": session_id,
124
+ "suggested_heartbeat_interval_seconds": max(1, datastore_config.session_timeout_seconds // 2),
125
+ "session_timeout_seconds": datastore_config.session_timeout_seconds
126
+ }
127
+
128
+ @session_router.post("/heartbeat", tags=["Session Management"], summary="会话心跳保活")
129
+ async def heartbeat(
130
+ request: Request,
131
+ datastore_id: int = Depends(get_datastore_id_from_api_key),
132
+ session_id: str = Header(..., description="会话ID"),
133
+ ):
134
+ si = await session_manager.get_session_info(datastore_id, session_id)
135
+
136
+ if not si:
137
+ raise HTTPException(
138
+ status_code=status.HTTP_404_NOT_FOUND,
139
+ detail=f"Session {session_id} not found"
140
+ )
141
+
142
+ is_locked_by_session = await datastore_state_manager.is_locked_by_session(datastore_id, session_id)
143
+ if not is_locked_by_session:
144
+ await datastore_state_manager.lock_for_session(datastore_id, session_id)
145
+
146
+ await session_manager.keep_session_alive(datastore_id, session_id, client_ip=request.client.host)
147
+ return {
148
+ "status": "ok",
149
+ "message": f"Session {session_id} heartbeat updated successfully",
150
+ }
151
+
152
+ @session_router.delete("/", tags=["Session Management"], summary="结束会话")
153
+ async def end_session(
154
+ datastore_id: int = Depends(get_datastore_id_from_api_key),
155
+ session_id: str = Header(..., description="会话ID"),
156
+ ):
157
+ success = await session_manager.terminate_session(datastore_id, session_id)
158
+
159
+ if not success:
160
+ raise HTTPException(
161
+ status_code=status.HTTP_404_NOT_FOUND,
162
+ detail=f"Session {session_id} not found"
163
+ )
164
+
165
+ await datastore_state_manager.unlock_for_session(datastore_id, session_id)
166
+
167
+ return {
168
+ "status": "ok",
169
+ "message": f"Session {session_id} terminated successfully",
170
+ }
171
+
172
+ @session_router.get("/", tags=["Session Management"], summary="获取活动会话列表")
173
+ async def list_sessions(
174
+ datastore_id: int = Depends(get_datastore_id_from_api_key),
175
+ ):
176
+ sessions = await session_manager.get_datastore_sessions(datastore_id)
177
+
178
+ session_list = []
179
+ for session_id, session_info in sessions.items():
180
+ session_list.append({
181
+ "session_id": session_id,
182
+ "task_id": session_info.task_id,
183
+ "client_ip": session_info.client_ip,
184
+ "last_activity": session_info.last_activity,
185
+ "created_at": session_info.created_at,
186
+ "allow_concurrent_push": session_info.allow_concurrent_push,
187
+ "session_timeout_seconds": session_info.session_timeout_seconds
188
+ })
189
+
190
+ return {
191
+ "datastore_id": datastore_id,
192
+ "active_sessions": session_list,
193
+ "count": len(session_list)
194
+ }
@@ -0,0 +1,71 @@
1
+ """
2
+ API endpoints for the parsers module.
3
+ Provides REST endpoints to access parsed data views.
4
+ """
5
+ from fastapi import APIRouter, Query, Header, Depends, status, HTTPException
6
+ import logging
7
+ from typing import Dict, Any, Optional
8
+
9
+ from ..parsers.manager import get_directory_tree, search_files, get_directory_stats, reset_directory_tree
10
+ from ..auth.dependencies import get_datastore_id_from_api_key
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ parser_router = APIRouter(tags=["Parsers - Data Views"])
15
+
16
+
17
+ @parser_router.get("/fs/tree", summary="Get directory tree structure")
18
+ async def get_directory_tree_api(
19
+ path: str = Query("/", description="Directory path to retrieve (default: '/')"),
20
+ datastore_id: int = Depends(get_datastore_id_from_api_key)
21
+ ) -> Optional[Dict[str, Any]]:
22
+ """Get the directory structure tree starting from the specified path."""
23
+ logger.info(f"API request for directory tree: path={path}, datastore_id={datastore_id}")
24
+ result = await get_directory_tree(path, datastore_id=datastore_id)
25
+ logger.info(f"Directory tree result for path '{path}': {result}")
26
+ return result
27
+
28
+ @parser_router.get("/fs/search", summary="Search for files by pattern")
29
+ async def search_files_api(
30
+ pattern: str = Query(..., description="Search pattern to match in file paths"),
31
+ datastore_id: int = Depends(get_datastore_id_from_api_key)
32
+ ) -> list:
33
+ """Search for files matching the specified pattern."""
34
+ logger.info(f"API request for file search: pattern={pattern}, datastore_id={datastore_id}")
35
+ result = await search_files(pattern, datastore_id=datastore_id)
36
+ logger.info(f"File search result for pattern '{pattern}': found {len(result)} files")
37
+ return result
38
+
39
+
40
+ @parser_router.get("/fs/stats", summary="Get statistics about the directory structure")
41
+ async def get_directory_stats_api(
42
+ datastore_id: int = Depends(get_datastore_id_from_api_key)
43
+ ) -> Dict[str, Any]:
44
+ """Get statistics about the current directory structure."""
45
+ logger.info(f"API request for directory stats: datastore_id={datastore_id}")
46
+ result = await get_directory_stats(datastore_id=datastore_id)
47
+ logger.info(f"Directory stats result: {result}")
48
+ return result
49
+
50
+
51
+ @parser_router.delete("/fs/reset",
52
+ summary="Reset directory tree structure",
53
+ description="Clear all directory entries for a specific datastore",
54
+ status_code=status.HTTP_204_NO_CONTENT
55
+ )
56
+ async def reset_directory_tree_api(
57
+ datastore_id: int = Depends(get_datastore_id_from_api_key)
58
+ ) -> None:
59
+ """
60
+ Reset the directory tree structure by clearing all entries for a specific datastore.
61
+ """
62
+ logger.info(f"API request to reset directory tree for datastore {datastore_id}")
63
+ success = await reset_directory_tree(datastore_id)
64
+
65
+ if not success:
66
+ logger.error(f"Failed to reset directory tree for datastore {datastore_id}")
67
+ raise HTTPException(
68
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
69
+ detail="Failed to reset directory tree"
70
+ )
71
+ logger.info(f"Successfully reset directory tree for datastore {datastore_id}")
File without changes
@@ -0,0 +1,21 @@
1
+ from typing import Dict, Optional, List, Any
2
+ from fustor_fusion_sdk.interfaces import ApiKeyCacheInterface # Import the interface
3
+
4
+ class ApiKeyCache(ApiKeyCacheInterface): # Inherit from the interface
5
+ def __init__(self):
6
+ self._cache: Dict[str, int] = {}
7
+
8
+ def set_cache(self, api_keys_data: List[Dict[str, Any]]):
9
+ """
10
+ Sets the entire cache from a list of API key data.
11
+ Expected format: [{'key': '...', 'datastore_id': 1}, ...]
12
+ """
13
+ new_cache = {item['key']: item['datastore_id'] for item in api_keys_data if 'key' in item and 'datastore_id' in item}
14
+ self._cache = new_cache
15
+ def get_datastore_id(self, api_key: str) -> Optional[int]:
16
+ """
17
+ Retrieves the datastore_id for a given API key.
18
+ """
19
+ return self._cache.get(api_key)
20
+
21
+ api_key_cache = ApiKeyCache()
@@ -0,0 +1,24 @@
1
+ from typing import Dict, Optional, List, Any
2
+ from fustor_common.models import DatastoreConfig
3
+ from fustor_registry_client.models import ClientDatastoreConfigResponse
4
+ from fustor_fusion_sdk.interfaces import DatastoreConfigCacheInterface # Import the interface
5
+
6
+ class DatastoreConfigCache(DatastoreConfigCacheInterface): # Inherit from the interface
7
+ def __init__(self):
8
+ self._cache: Dict[int, DatastoreConfig] = {}
9
+
10
+ def set_cache(self, datastore_configs_data: List[ClientDatastoreConfigResponse]): # Changed type hint
11
+ """
12
+ Sets the entire cache from a list of datastore config data.
13
+ """
14
+ new_cache = {item.datastore_id: item for item in datastore_configs_data} # Store the full ClientDatastoreConfigResponse object
15
+ self._cache = new_cache
16
+ print(f"Datastore config cache updated. Total datastores: {len(self._cache)}")
17
+
18
+ def get_datastore_config(self, datastore_id: int) -> Optional[DatastoreConfig]:
19
+ """
20
+ Retrieves the configuration for a given datastore.
21
+ """
22
+ return self._cache.get(datastore_id)
23
+
24
+ datastore_config_cache = DatastoreConfigCache()
@@ -0,0 +1,21 @@
1
+ from fastapi import Header, HTTPException, status, Depends
2
+ from typing import Optional
3
+ import logging
4
+
5
+ from .cache import api_key_cache
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ async def get_datastore_id_from_api_key(x_api_key: str = Header(..., alias="X-API-Key")) -> int:
10
+ """
11
+ Retrieves the datastore_id from the in-memory API key cache.
12
+ """
13
+ logger.debug(f"Received X-API-Key: {x_api_key}")
14
+ datastore_id = api_key_cache.get_datastore_id(x_api_key)
15
+ logger.debug(f"Resolved datastore_id for key '{x_api_key[:5]}...': {datastore_id}")
16
+ if datastore_id is None:
17
+ raise HTTPException(
18
+ status_code=status.HTTP_401_UNAUTHORIZED,
19
+ detail="Invalid or inactive X-API-Key"
20
+ )
21
+ return datastore_id