geek-cafe-saas-sdk 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geek-cafe-saas-sdk might be problematic. Click here for more details.
- geek_cafe_saas_sdk/__init__.py +2 -2
- geek_cafe_saas_sdk/domains/files/handlers/README.md +446 -0
- geek_cafe_saas_sdk/domains/files/handlers/__init__.py +6 -0
- geek_cafe_saas_sdk/domains/files/handlers/files/create/app.py +121 -0
- geek_cafe_saas_sdk/domains/files/handlers/files/download/app.py +80 -0
- geek_cafe_saas_sdk/domains/files/handlers/files/get/app.py +62 -0
- geek_cafe_saas_sdk/domains/files/handlers/files/list/app.py +72 -0
- geek_cafe_saas_sdk/domains/files/handlers/lineage/create_derived/app.py +99 -0
- geek_cafe_saas_sdk/domains/files/handlers/lineage/create_main/app.py +104 -0
- geek_cafe_saas_sdk/domains/files/handlers/lineage/download_bundle/app.py +99 -0
- geek_cafe_saas_sdk/domains/files/handlers/lineage/get_lineage/app.py +68 -0
- geek_cafe_saas_sdk/domains/files/handlers/lineage/prepare_bundle/app.py +76 -0
- geek_cafe_saas_sdk/domains/files/models/__init__.py +17 -0
- geek_cafe_saas_sdk/domains/files/models/file.py +118 -12
- geek_cafe_saas_sdk/domains/files/services/__init__.py +21 -0
- geek_cafe_saas_sdk/domains/files/services/file_lineage_service.py +487 -0
- geek_cafe_saas_sdk/domains/files/services/file_system_service.py +27 -1
- geek_cafe_saas_sdk/utilities/cognito_utility.py +16 -26
- {geek_cafe_saas_sdk-0.6.0.dist-info → geek_cafe_saas_sdk-0.7.0.dist-info}/METADATA +11 -11
- {geek_cafe_saas_sdk-0.6.0.dist-info → geek_cafe_saas_sdk-0.7.0.dist-info}/RECORD +22 -10
- {geek_cafe_saas_sdk-0.6.0.dist-info → geek_cafe_saas_sdk-0.7.0.dist-info}/WHEEL +0 -0
- {geek_cafe_saas_sdk-0.6.0.dist-info → geek_cafe_saas_sdk-0.7.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -19,6 +19,10 @@ class File(BaseModel):
|
|
|
19
19
|
Represents a file in the system with metadata, virtual path, and S3 location.
|
|
20
20
|
Does not contain file data (stored in S3) - only metadata and references.
|
|
21
21
|
|
|
22
|
+
Multi-Tenancy:
|
|
23
|
+
- tenant_id: Organization/company (can have multiple users)
|
|
24
|
+
- owner_id: Specific user within the tenant who owns this file
|
|
25
|
+
|
|
22
26
|
Access Patterns (DynamoDB Keys):
|
|
23
27
|
- pk: FILE#{tenant_id}#{file_id}
|
|
24
28
|
- sk: METADATA
|
|
@@ -35,11 +39,9 @@ class File(BaseModel):
|
|
|
35
39
|
def __init__(self):
|
|
36
40
|
super().__init__()
|
|
37
41
|
|
|
38
|
-
#
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
# Ownership (inherited tenant_id from BaseModel)
|
|
42
|
-
self._owner_id: str | None = None # User who owns the file
|
|
42
|
+
# Identity (inherited from BaseModel: id, tenant_id)
|
|
43
|
+
# Note: tenant_id = organization/company, owner_id = specific user within tenant
|
|
44
|
+
self._owner_id: str | None = None # User ID who owns this file
|
|
43
45
|
|
|
44
46
|
# File Information
|
|
45
47
|
self._file_name: str | None = None # Display name (e.g., "report.pdf")
|
|
@@ -70,25 +72,36 @@ class File(BaseModel):
|
|
|
70
72
|
self._status: str = "active" # "active", "archived", "deleted"
|
|
71
73
|
self._is_shared: bool = False # Has active shares
|
|
72
74
|
|
|
75
|
+
# Lineage Tracking (for data processing pipelines)
|
|
76
|
+
self._file_role: str = "standalone" # "standalone", "original", "main", "derived"
|
|
77
|
+
self._original_file_id: str | None = None # Root file in lineage chain
|
|
78
|
+
self._parent_file_id: str | None = None # Immediate parent file
|
|
79
|
+
|
|
80
|
+
# Transformation Tracking
|
|
81
|
+
self._transformation_type: str | None = None # "convert", "clean", "process"
|
|
82
|
+
self._transformation_operation: str | None = None # "xls_to_csv", "data_cleaning_v2"
|
|
83
|
+
self._transformation_metadata: Dict[str, Any] | None = None # Additional operation details
|
|
84
|
+
|
|
85
|
+
# Relationship Counts
|
|
86
|
+
self._derived_file_count: int = 0 # Number of files derived from this one
|
|
87
|
+
|
|
73
88
|
# Timestamps (inherited from BaseModel)
|
|
74
89
|
# created_utc_ts, updated_utc_ts, deleted_utc_ts
|
|
75
90
|
|
|
76
|
-
# Properties - File Identity
|
|
91
|
+
# Properties - File Identity (alias for BaseModel.id)
|
|
77
92
|
@property
|
|
78
93
|
def file_id(self) -> str | None:
|
|
79
|
-
"""Unique file ID."""
|
|
80
|
-
return self.
|
|
94
|
+
"""Unique file ID (alias for id)."""
|
|
95
|
+
return self.id
|
|
81
96
|
|
|
82
97
|
@file_id.setter
|
|
83
98
|
def file_id(self, value: str | None):
|
|
84
|
-
self.
|
|
85
|
-
if value:
|
|
86
|
-
self.id = value
|
|
99
|
+
self.id = value
|
|
87
100
|
|
|
88
101
|
# Properties - Ownership
|
|
89
102
|
@property
|
|
90
103
|
def owner_id(self) -> str | None:
|
|
91
|
-
"""User who owns the file."""
|
|
104
|
+
"""User ID who owns the file (not tenant_id - that's the organization)."""
|
|
92
105
|
return self._owner_id
|
|
93
106
|
|
|
94
107
|
@owner_id.setter
|
|
@@ -258,6 +271,74 @@ class File(BaseModel):
|
|
|
258
271
|
def is_shared(self, value: bool):
|
|
259
272
|
self._is_shared = bool(value)
|
|
260
273
|
|
|
274
|
+
# Properties - Lineage Tracking
|
|
275
|
+
@property
|
|
276
|
+
def file_role(self) -> str:
|
|
277
|
+
"""File role in lineage chain: 'standalone', 'original', 'main', 'derived'."""
|
|
278
|
+
return self._file_role
|
|
279
|
+
|
|
280
|
+
@file_role.setter
|
|
281
|
+
def file_role(self, value: str | None):
|
|
282
|
+
valid_roles = ["standalone", "original", "main", "derived"]
|
|
283
|
+
if value in valid_roles:
|
|
284
|
+
self._file_role = value
|
|
285
|
+
else:
|
|
286
|
+
self._file_role = "standalone"
|
|
287
|
+
|
|
288
|
+
@property
|
|
289
|
+
def original_file_id(self) -> str | None:
|
|
290
|
+
"""Root file in lineage chain."""
|
|
291
|
+
return self._original_file_id
|
|
292
|
+
|
|
293
|
+
@original_file_id.setter
|
|
294
|
+
def original_file_id(self, value: str | None):
|
|
295
|
+
self._original_file_id = value
|
|
296
|
+
|
|
297
|
+
@property
|
|
298
|
+
def parent_file_id(self) -> str | None:
|
|
299
|
+
"""Immediate parent file."""
|
|
300
|
+
return self._parent_file_id
|
|
301
|
+
|
|
302
|
+
@parent_file_id.setter
|
|
303
|
+
def parent_file_id(self, value: str | None):
|
|
304
|
+
self._parent_file_id = value
|
|
305
|
+
|
|
306
|
+
@property
|
|
307
|
+
def transformation_type(self) -> str | None:
|
|
308
|
+
"""Type of transformation applied: 'convert', 'clean', 'process'."""
|
|
309
|
+
return self._transformation_type
|
|
310
|
+
|
|
311
|
+
@transformation_type.setter
|
|
312
|
+
def transformation_type(self, value: str | None):
|
|
313
|
+
self._transformation_type = value
|
|
314
|
+
|
|
315
|
+
@property
|
|
316
|
+
def transformation_operation(self) -> str | None:
|
|
317
|
+
"""Specific operation performed (e.g., 'xls_to_csv', 'data_cleaning_v2')."""
|
|
318
|
+
return self._transformation_operation
|
|
319
|
+
|
|
320
|
+
@transformation_operation.setter
|
|
321
|
+
def transformation_operation(self, value: str | None):
|
|
322
|
+
self._transformation_operation = value
|
|
323
|
+
|
|
324
|
+
@property
|
|
325
|
+
def transformation_metadata(self) -> Dict[str, Any] | None:
|
|
326
|
+
"""Additional transformation details."""
|
|
327
|
+
return self._transformation_metadata
|
|
328
|
+
|
|
329
|
+
@transformation_metadata.setter
|
|
330
|
+
def transformation_metadata(self, value: Dict[str, Any] | None):
|
|
331
|
+
self._transformation_metadata = value if isinstance(value, dict) else None
|
|
332
|
+
|
|
333
|
+
@property
|
|
334
|
+
def derived_file_count(self) -> int:
|
|
335
|
+
"""Number of files derived from this one."""
|
|
336
|
+
return self._derived_file_count
|
|
337
|
+
|
|
338
|
+
@derived_file_count.setter
|
|
339
|
+
def derived_file_count(self, value: int | None):
|
|
340
|
+
self._derived_file_count = value if isinstance(value, int) else 0
|
|
341
|
+
|
|
261
342
|
# Helper Methods
|
|
262
343
|
def is_active(self) -> bool:
|
|
263
344
|
"""Check if file is active."""
|
|
@@ -310,3 +391,28 @@ class File(BaseModel):
|
|
|
310
391
|
if self._s3_bucket and self._s3_key:
|
|
311
392
|
return f"s3://{self._s3_bucket}/{self._s3_key}"
|
|
312
393
|
return None
|
|
394
|
+
|
|
395
|
+
# Lineage Helper Methods
|
|
396
|
+
def has_lineage(self) -> bool:
|
|
397
|
+
"""Check if file participates in lineage tracking."""
|
|
398
|
+
return self._file_role != "standalone"
|
|
399
|
+
|
|
400
|
+
def is_original(self) -> bool:
|
|
401
|
+
"""Check if this is an original file."""
|
|
402
|
+
return self._file_role == "original"
|
|
403
|
+
|
|
404
|
+
def is_main(self) -> bool:
|
|
405
|
+
"""Check if this is a main file."""
|
|
406
|
+
return self._file_role == "main"
|
|
407
|
+
|
|
408
|
+
def is_derived(self) -> bool:
|
|
409
|
+
"""Check if this is a derived file."""
|
|
410
|
+
return self._file_role == "derived"
|
|
411
|
+
|
|
412
|
+
def is_standalone(self) -> bool:
|
|
413
|
+
"""Check if this is a standalone file (no lineage)."""
|
|
414
|
+
return self._file_role == "standalone"
|
|
415
|
+
|
|
416
|
+
def increment_derived_count(self):
|
|
417
|
+
"""Increment the derived file count."""
|
|
418
|
+
self._derived_file_count += 1
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""File services.
|
|
2
|
+
|
|
3
|
+
Geek Cafe, LLC
|
|
4
|
+
MIT License. See Project Root for the license information.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .file_system_service import FileSystemService
|
|
8
|
+
from .directory_service import DirectoryService
|
|
9
|
+
from .file_version_service import FileVersionService
|
|
10
|
+
from .file_share_service import FileShareService
|
|
11
|
+
from .s3_file_service import S3FileService
|
|
12
|
+
from .file_lineage_service import FileLineageService
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"FileSystemService",
|
|
16
|
+
"DirectoryService",
|
|
17
|
+
"FileVersionService",
|
|
18
|
+
"FileShareService",
|
|
19
|
+
"S3FileService",
|
|
20
|
+
"FileLineageService",
|
|
21
|
+
]
|
|
@@ -0,0 +1,487 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File Lineage Service
|
|
3
|
+
|
|
4
|
+
Helper service for managing file lineage and transformations.
|
|
5
|
+
Works on top of FileSystemService.
|
|
6
|
+
|
|
7
|
+
Geek Cafe, LLC
|
|
8
|
+
MIT License. See Project Root for the license information.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from typing import Dict, Any, List, Optional
|
|
12
|
+
from geek_cafe_saas_sdk.domains.files.services.file_system_service import FileSystemService
|
|
13
|
+
from geek_cafe_saas_sdk.domains.files.services.s3_file_service import S3FileService
|
|
14
|
+
from geek_cafe_saas_sdk.domains.files.models.file import File
|
|
15
|
+
from geek_cafe_saas_sdk.core.service_result import ServiceResult
|
|
16
|
+
from geek_cafe_saas_sdk.core.service_errors import ValidationError, NotFoundError
|
|
17
|
+
import datetime as dt
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FileLineageService:
|
|
21
|
+
"""Service for managing file lineage and transformations."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, file_service: FileSystemService, s3_service: S3FileService):
|
|
24
|
+
"""
|
|
25
|
+
Initialize lineage service.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
file_service: FileSystemService instance
|
|
29
|
+
s3_service: S3FileService instance
|
|
30
|
+
"""
|
|
31
|
+
self.file_service = file_service
|
|
32
|
+
self.s3_service = s3_service
|
|
33
|
+
|
|
34
|
+
def create_main_file(
|
|
35
|
+
self,
|
|
36
|
+
tenant_id: str,
|
|
37
|
+
user_id: str,
|
|
38
|
+
original_file_id: str,
|
|
39
|
+
file_name: str,
|
|
40
|
+
file_data: bytes,
|
|
41
|
+
mime_type: str,
|
|
42
|
+
transformation_operation: str,
|
|
43
|
+
transformation_metadata: Optional[Dict[str, Any]] = None,
|
|
44
|
+
directory_id: Optional[str] = None
|
|
45
|
+
) -> ServiceResult[File]:
|
|
46
|
+
"""
|
|
47
|
+
Create a main file from an original file.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
tenant_id: Tenant ID
|
|
51
|
+
user_id: User ID
|
|
52
|
+
original_file_id: Original file ID
|
|
53
|
+
file_name: New file name (e.g., "data.csv")
|
|
54
|
+
file_data: Converted file data
|
|
55
|
+
mime_type: MIME type
|
|
56
|
+
transformation_operation: Operation name (e.g., "xls_to_csv")
|
|
57
|
+
transformation_metadata: Additional metadata
|
|
58
|
+
directory_id: Optional directory ID (inherits from original if not provided)
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
ServiceResult with main File
|
|
62
|
+
"""
|
|
63
|
+
try:
|
|
64
|
+
# Get original file
|
|
65
|
+
original_result = self.file_service.get_by_id(
|
|
66
|
+
resource_id=original_file_id,
|
|
67
|
+
tenant_id=tenant_id,
|
|
68
|
+
user_id=user_id
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
if not original_result.success:
|
|
72
|
+
return original_result
|
|
73
|
+
|
|
74
|
+
original_file = original_result.data
|
|
75
|
+
|
|
76
|
+
# Validate original is actually an original
|
|
77
|
+
if original_file.file_role != "original":
|
|
78
|
+
return ServiceResult.error_result(
|
|
79
|
+
message="Source file must have role 'original'",
|
|
80
|
+
error_code="INVALID_FILE_ROLE"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Use original's directory if not specified
|
|
84
|
+
target_directory_id = directory_id if directory_id is not None else original_file.directory_id
|
|
85
|
+
|
|
86
|
+
# Create main file
|
|
87
|
+
result = self.file_service.create(
|
|
88
|
+
tenant_id=tenant_id,
|
|
89
|
+
user_id=user_id,
|
|
90
|
+
file_name=file_name,
|
|
91
|
+
file_data=file_data,
|
|
92
|
+
mime_type=mime_type,
|
|
93
|
+
directory_id=target_directory_id,
|
|
94
|
+
file_role="main",
|
|
95
|
+
parent_file_id=original_file_id,
|
|
96
|
+
original_file_id=original_file_id,
|
|
97
|
+
transformation_type="convert",
|
|
98
|
+
transformation_operation=transformation_operation,
|
|
99
|
+
transformation_metadata=transformation_metadata or {}
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
if result.success:
|
|
103
|
+
# Update original file's derived count
|
|
104
|
+
self.file_service.update(
|
|
105
|
+
resource_id=original_file_id,
|
|
106
|
+
tenant_id=tenant_id,
|
|
107
|
+
user_id=user_id,
|
|
108
|
+
updates={'derived_file_count': 1}
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return result
|
|
112
|
+
|
|
113
|
+
except Exception as e:
|
|
114
|
+
return ServiceResult.error_result(
|
|
115
|
+
message=f"Failed to create main file: {str(e)}",
|
|
116
|
+
error_code="CREATE_MAIN_FILE_FAILED",
|
|
117
|
+
error=e
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
def create_derived_file(
|
|
121
|
+
self,
|
|
122
|
+
tenant_id: str,
|
|
123
|
+
user_id: str,
|
|
124
|
+
main_file_id: str,
|
|
125
|
+
file_name: str,
|
|
126
|
+
file_data: bytes,
|
|
127
|
+
transformation_operation: str,
|
|
128
|
+
transformation_metadata: Optional[Dict[str, Any]] = None,
|
|
129
|
+
directory_id: Optional[str] = None
|
|
130
|
+
) -> ServiceResult[File]:
|
|
131
|
+
"""
|
|
132
|
+
Create a derived file from a main file.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
tenant_id: Tenant ID
|
|
136
|
+
user_id: User ID
|
|
137
|
+
main_file_id: Main file ID (parent)
|
|
138
|
+
file_name: New file name (e.g., "data_clean_v1.csv")
|
|
139
|
+
file_data: Processed file data
|
|
140
|
+
transformation_operation: Operation name (e.g., "data_cleaning_v1")
|
|
141
|
+
transformation_metadata: Additional metadata
|
|
142
|
+
directory_id: Optional directory ID (inherits from main if not provided)
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
ServiceResult with derived File
|
|
146
|
+
"""
|
|
147
|
+
try:
|
|
148
|
+
# Get main file
|
|
149
|
+
main_result = self.file_service.get_by_id(
|
|
150
|
+
resource_id=main_file_id,
|
|
151
|
+
tenant_id=tenant_id,
|
|
152
|
+
user_id=user_id
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
if not main_result.success:
|
|
156
|
+
return main_result
|
|
157
|
+
|
|
158
|
+
main_file = main_result.data
|
|
159
|
+
|
|
160
|
+
# Validate main is actually a main file
|
|
161
|
+
if main_file.file_role != "main":
|
|
162
|
+
return ServiceResult.error_result(
|
|
163
|
+
message="Source file must have role 'main'",
|
|
164
|
+
error_code="INVALID_FILE_ROLE"
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Use main's directory if not specified
|
|
168
|
+
target_directory_id = directory_id if directory_id is not None else main_file.directory_id
|
|
169
|
+
|
|
170
|
+
# Create derived file
|
|
171
|
+
result = self.file_service.create(
|
|
172
|
+
tenant_id=tenant_id,
|
|
173
|
+
user_id=user_id,
|
|
174
|
+
file_name=file_name,
|
|
175
|
+
file_data=file_data,
|
|
176
|
+
mime_type=main_file.mime_type,
|
|
177
|
+
directory_id=target_directory_id,
|
|
178
|
+
file_role="derived",
|
|
179
|
+
parent_file_id=main_file_id,
|
|
180
|
+
original_file_id=main_file.original_file_id,
|
|
181
|
+
transformation_type="clean",
|
|
182
|
+
transformation_operation=transformation_operation,
|
|
183
|
+
transformation_metadata=transformation_metadata or {}
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
if result.success:
|
|
187
|
+
# Atomically increment main file's derived count
|
|
188
|
+
# Re-fetch to get current count and avoid race conditions
|
|
189
|
+
fresh_main = self.file_service.get_by_id(
|
|
190
|
+
resource_id=main_file_id,
|
|
191
|
+
tenant_id=tenant_id,
|
|
192
|
+
user_id=user_id
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
if fresh_main.success:
|
|
196
|
+
new_count = fresh_main.data.derived_file_count + 1
|
|
197
|
+
self.file_service.update(
|
|
198
|
+
resource_id=main_file_id,
|
|
199
|
+
tenant_id=tenant_id,
|
|
200
|
+
user_id=user_id,
|
|
201
|
+
updates={'derived_file_count': new_count}
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
return result
|
|
205
|
+
|
|
206
|
+
except Exception as e:
|
|
207
|
+
return ServiceResult.error_result(
|
|
208
|
+
message=f"Failed to create derived file: {str(e)}",
|
|
209
|
+
error_code="CREATE_DERIVED_FILE_FAILED",
|
|
210
|
+
error=e
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
def get_lineage(
|
|
214
|
+
self,
|
|
215
|
+
file_id: str,
|
|
216
|
+
tenant_id: str,
|
|
217
|
+
user_id: str
|
|
218
|
+
) -> ServiceResult[Dict[str, Any]]:
|
|
219
|
+
"""
|
|
220
|
+
Get complete lineage for a file.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
ServiceResult with lineage dict containing:
|
|
224
|
+
- selected: The selected file
|
|
225
|
+
- main: Main file (if exists)
|
|
226
|
+
- original: Original file (if exists)
|
|
227
|
+
- all_derived: List of all derived files (if viewing main)
|
|
228
|
+
"""
|
|
229
|
+
try:
|
|
230
|
+
# Get selected file
|
|
231
|
+
selected_result = self.file_service.get_by_id(
|
|
232
|
+
resource_id=file_id,
|
|
233
|
+
tenant_id=tenant_id,
|
|
234
|
+
user_id=user_id
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
if not selected_result.success:
|
|
238
|
+
return selected_result
|
|
239
|
+
|
|
240
|
+
selected_file = selected_result.data
|
|
241
|
+
|
|
242
|
+
lineage = {
|
|
243
|
+
'selected': selected_file,
|
|
244
|
+
'main': None,
|
|
245
|
+
'original': None,
|
|
246
|
+
'all_derived': []
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
# Get original file
|
|
250
|
+
if selected_file.original_file_id:
|
|
251
|
+
original_result = self.file_service.get_by_id(
|
|
252
|
+
resource_id=selected_file.original_file_id,
|
|
253
|
+
tenant_id=tenant_id,
|
|
254
|
+
user_id=user_id
|
|
255
|
+
)
|
|
256
|
+
if original_result.success:
|
|
257
|
+
lineage['original'] = original_result.data
|
|
258
|
+
|
|
259
|
+
# Get main file
|
|
260
|
+
if selected_file.is_derived() and selected_file.parent_file_id:
|
|
261
|
+
main_result = self.file_service.get_by_id(
|
|
262
|
+
resource_id=selected_file.parent_file_id,
|
|
263
|
+
tenant_id=tenant_id,
|
|
264
|
+
user_id=user_id
|
|
265
|
+
)
|
|
266
|
+
if main_result.success:
|
|
267
|
+
lineage['main'] = main_result.data
|
|
268
|
+
elif selected_file.is_main():
|
|
269
|
+
lineage['main'] = selected_file
|
|
270
|
+
|
|
271
|
+
# Get all derived files if viewing main
|
|
272
|
+
if selected_file.is_main():
|
|
273
|
+
derived = self.list_derived_files(
|
|
274
|
+
main_file_id=file_id,
|
|
275
|
+
tenant_id=tenant_id,
|
|
276
|
+
user_id=user_id
|
|
277
|
+
)
|
|
278
|
+
if derived.success:
|
|
279
|
+
lineage['all_derived'] = derived.data
|
|
280
|
+
|
|
281
|
+
return ServiceResult.success_result(lineage)
|
|
282
|
+
|
|
283
|
+
except Exception as e:
|
|
284
|
+
return ServiceResult.error_result(
|
|
285
|
+
message=f"Failed to get lineage: {str(e)}",
|
|
286
|
+
error_code="GET_LINEAGE_FAILED",
|
|
287
|
+
error=e
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
def list_derived_files(
|
|
291
|
+
self,
|
|
292
|
+
main_file_id: str,
|
|
293
|
+
tenant_id: str,
|
|
294
|
+
user_id: str,
|
|
295
|
+
limit: int = 100
|
|
296
|
+
) -> ServiceResult[List[File]]:
|
|
297
|
+
"""
|
|
298
|
+
List all files derived from a main file.
|
|
299
|
+
|
|
300
|
+
Returns:
|
|
301
|
+
ServiceResult with list of derived Files
|
|
302
|
+
"""
|
|
303
|
+
try:
|
|
304
|
+
# Get all user's files
|
|
305
|
+
all_files_result = self.file_service.list_files_by_owner(
|
|
306
|
+
tenant_id=tenant_id,
|
|
307
|
+
owner_id=user_id,
|
|
308
|
+
user_id=user_id,
|
|
309
|
+
limit=limit
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
if not all_files_result.success:
|
|
313
|
+
return all_files_result
|
|
314
|
+
|
|
315
|
+
# Filter to derived files from this main file
|
|
316
|
+
derived_files = [
|
|
317
|
+
f for f in all_files_result.data
|
|
318
|
+
if f.parent_file_id == main_file_id and f.is_derived()
|
|
319
|
+
]
|
|
320
|
+
|
|
321
|
+
# Sort by creation date
|
|
322
|
+
derived_files.sort(key=lambda f: f.created_utc_ts)
|
|
323
|
+
|
|
324
|
+
return ServiceResult.success_result(derived_files)
|
|
325
|
+
|
|
326
|
+
except Exception as e:
|
|
327
|
+
return ServiceResult.error_result(
|
|
328
|
+
message=f"Failed to list derived files: {str(e)}",
|
|
329
|
+
error_code="LIST_DERIVED_FAILED",
|
|
330
|
+
error=e
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
def prepare_lineage_bundle(
|
|
334
|
+
self,
|
|
335
|
+
selected_file_id: str,
|
|
336
|
+
tenant_id: str,
|
|
337
|
+
user_id: str
|
|
338
|
+
) -> ServiceResult[Dict[str, Any]]:
|
|
339
|
+
"""
|
|
340
|
+
Prepare bundle of files for lineage.
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
ServiceResult with bundle dict containing:
|
|
344
|
+
- selected_file: Selected file
|
|
345
|
+
- main_file: Main file
|
|
346
|
+
- original_file: Original file
|
|
347
|
+
- metadata: Transformation chain info
|
|
348
|
+
"""
|
|
349
|
+
try:
|
|
350
|
+
lineage_result = self.get_lineage(
|
|
351
|
+
file_id=selected_file_id,
|
|
352
|
+
tenant_id=tenant_id,
|
|
353
|
+
user_id=user_id
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
if not lineage_result.success:
|
|
357
|
+
return lineage_result
|
|
358
|
+
|
|
359
|
+
lineage = lineage_result.data
|
|
360
|
+
|
|
361
|
+
bundle = {
|
|
362
|
+
'selected_file': lineage['selected'],
|
|
363
|
+
'main_file': lineage['main'],
|
|
364
|
+
'original_file': lineage['original'],
|
|
365
|
+
'metadata': {
|
|
366
|
+
'selected_file_id': selected_file_id,
|
|
367
|
+
'selected_file_name': lineage['selected'].file_name,
|
|
368
|
+
'transformation_chain': []
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
# Build transformation chain
|
|
373
|
+
if lineage['original']:
|
|
374
|
+
bundle['metadata']['transformation_chain'].append({
|
|
375
|
+
'step': 1,
|
|
376
|
+
'type': 'original',
|
|
377
|
+
'file_id': lineage['original'].file_id,
|
|
378
|
+
'file_name': lineage['original'].file_name
|
|
379
|
+
})
|
|
380
|
+
|
|
381
|
+
if lineage['main']:
|
|
382
|
+
bundle['metadata']['transformation_chain'].append({
|
|
383
|
+
'step': 2,
|
|
384
|
+
'type': 'convert',
|
|
385
|
+
'file_id': lineage['main'].file_id,
|
|
386
|
+
'file_name': lineage['main'].file_name,
|
|
387
|
+
'operation': lineage['main'].transformation_operation
|
|
388
|
+
})
|
|
389
|
+
|
|
390
|
+
if lineage['selected'].is_derived():
|
|
391
|
+
bundle['metadata']['transformation_chain'].append({
|
|
392
|
+
'step': 3,
|
|
393
|
+
'type': 'clean',
|
|
394
|
+
'file_id': lineage['selected'].file_id,
|
|
395
|
+
'file_name': lineage['selected'].file_name,
|
|
396
|
+
'operation': lineage['selected'].transformation_operation
|
|
397
|
+
})
|
|
398
|
+
|
|
399
|
+
return ServiceResult.success_result(bundle)
|
|
400
|
+
|
|
401
|
+
except Exception as e:
|
|
402
|
+
return ServiceResult.error_result(
|
|
403
|
+
message=f"Failed to prepare bundle: {str(e)}",
|
|
404
|
+
error_code="PREPARE_BUNDLE_FAILED",
|
|
405
|
+
error=e
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
def download_lineage_bundle(
|
|
409
|
+
self,
|
|
410
|
+
selected_file_id: str,
|
|
411
|
+
tenant_id: str,
|
|
412
|
+
user_id: str
|
|
413
|
+
) -> ServiceResult[Dict[str, Any]]:
|
|
414
|
+
"""
|
|
415
|
+
Download all files in lineage chain.
|
|
416
|
+
|
|
417
|
+
Returns:
|
|
418
|
+
ServiceResult with dict containing:
|
|
419
|
+
- selected: {'file': File, 'data': bytes}
|
|
420
|
+
- main: {'file': File, 'data': bytes}
|
|
421
|
+
- original: {'file': File, 'data': bytes}
|
|
422
|
+
- metadata: Transformation chain info
|
|
423
|
+
"""
|
|
424
|
+
try:
|
|
425
|
+
bundle_result = self.prepare_lineage_bundle(
|
|
426
|
+
selected_file_id=selected_file_id,
|
|
427
|
+
tenant_id=tenant_id,
|
|
428
|
+
user_id=user_id
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
if not bundle_result.success:
|
|
432
|
+
return bundle_result
|
|
433
|
+
|
|
434
|
+
bundle = bundle_result.data
|
|
435
|
+
download_bundle = {
|
|
436
|
+
'selected': None,
|
|
437
|
+
'main': None,
|
|
438
|
+
'original': None,
|
|
439
|
+
'metadata': bundle['metadata']
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
# Download selected file
|
|
443
|
+
selected_download = self.file_service.download_file(
|
|
444
|
+
tenant_id=tenant_id,
|
|
445
|
+
file_id=selected_file_id,
|
|
446
|
+
user_id=user_id
|
|
447
|
+
)
|
|
448
|
+
if selected_download.success:
|
|
449
|
+
download_bundle['selected'] = {
|
|
450
|
+
'file': selected_download.data['file'],
|
|
451
|
+
'data': selected_download.data['data']
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
# Download main file
|
|
455
|
+
if bundle['main_file']:
|
|
456
|
+
main_download = self.file_service.download_file(
|
|
457
|
+
tenant_id=tenant_id,
|
|
458
|
+
file_id=bundle['main_file'].file_id,
|
|
459
|
+
user_id=user_id
|
|
460
|
+
)
|
|
461
|
+
if main_download.success:
|
|
462
|
+
download_bundle['main'] = {
|
|
463
|
+
'file': main_download.data['file'],
|
|
464
|
+
'data': main_download.data['data']
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
# Download original file
|
|
468
|
+
if bundle['original_file']:
|
|
469
|
+
original_download = self.file_service.download_file(
|
|
470
|
+
tenant_id=tenant_id,
|
|
471
|
+
file_id=bundle['original_file'].file_id,
|
|
472
|
+
user_id=user_id
|
|
473
|
+
)
|
|
474
|
+
if original_download.success:
|
|
475
|
+
download_bundle['original'] = {
|
|
476
|
+
'file': original_download.data['file'],
|
|
477
|
+
'data': original_download.data['data']
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
return ServiceResult.success_result(download_bundle)
|
|
481
|
+
|
|
482
|
+
except Exception as e:
|
|
483
|
+
return ServiceResult.error_result(
|
|
484
|
+
message=f"Failed to download bundle: {str(e)}",
|
|
485
|
+
error_code="DOWNLOAD_BUNDLE_FAILED",
|
|
486
|
+
error=e
|
|
487
|
+
)
|