pltr-cli 0.10.0__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pltr/__init__.py +1 -1
- pltr/cli.py +16 -0
- pltr/commands/admin.py +553 -9
- pltr/commands/aip_agents.py +333 -0
- pltr/commands/connectivity.py +309 -1
- pltr/commands/cp.py +103 -0
- pltr/commands/dataset.py +104 -4
- pltr/commands/mediasets.py +176 -0
- pltr/commands/ontology.py +137 -13
- pltr/commands/orchestration.py +167 -11
- pltr/commands/project.py +249 -0
- pltr/commands/resource.py +452 -0
- pltr/commands/sql.py +54 -7
- pltr/commands/third_party_applications.py +82 -0
- pltr/services/admin.py +318 -1
- pltr/services/aip_agents.py +147 -0
- pltr/services/base.py +104 -1
- pltr/services/connectivity.py +139 -0
- pltr/services/copy.py +391 -0
- pltr/services/dataset.py +80 -9
- pltr/services/mediasets.py +144 -9
- pltr/services/ontology.py +119 -1
- pltr/services/orchestration.py +133 -1
- pltr/services/project.py +136 -0
- pltr/services/resource.py +227 -0
- pltr/services/sql.py +44 -20
- pltr/services/third_party_applications.py +53 -0
- pltr/utils/formatting.py +195 -1
- pltr/utils/pagination.py +325 -0
- {pltr_cli-0.10.0.dist-info → pltr_cli-0.12.0.dist-info}/METADATA +5 -3
- pltr_cli-0.12.0.dist-info/RECORD +62 -0
- {pltr_cli-0.10.0.dist-info → pltr_cli-0.12.0.dist-info}/WHEEL +1 -1
- pltr_cli-0.10.0.dist-info/RECORD +0 -55
- {pltr_cli-0.10.0.dist-info → pltr_cli-0.12.0.dist-info}/entry_points.txt +0 -0
- {pltr_cli-0.10.0.dist-info → pltr_cli-0.12.0.dist-info}/licenses/LICENSE +0 -0
pltr/services/base.py
CHANGED
|
@@ -2,13 +2,20 @@
|
|
|
2
2
|
Base service class for Foundry API wrappers.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from typing import Any, Optional, Dict
|
|
5
|
+
from typing import Any, Optional, Dict, Callable, Iterator
|
|
6
6
|
from abc import ABC, abstractmethod
|
|
7
|
+
import json
|
|
7
8
|
import requests
|
|
8
9
|
|
|
9
10
|
from ..auth.manager import AuthManager
|
|
10
11
|
from ..auth.storage import CredentialStorage
|
|
11
12
|
from ..config.profiles import ProfileManager
|
|
13
|
+
from ..utils.pagination import (
|
|
14
|
+
PaginationConfig,
|
|
15
|
+
PaginationResult,
|
|
16
|
+
IteratorPaginationHandler,
|
|
17
|
+
ResponsePaginationHandler,
|
|
18
|
+
)
|
|
12
19
|
|
|
13
20
|
|
|
14
21
|
class BaseService(ABC):
|
|
@@ -127,3 +134,99 @@ class BaseService(ABC):
|
|
|
127
134
|
response.raise_for_status()
|
|
128
135
|
|
|
129
136
|
return response
|
|
137
|
+
|
|
138
|
+
def _paginate_iterator(
|
|
139
|
+
self,
|
|
140
|
+
iterator: Iterator[Any],
|
|
141
|
+
config: PaginationConfig,
|
|
142
|
+
progress_callback: Optional[Callable[[int, int], None]] = None,
|
|
143
|
+
) -> PaginationResult:
|
|
144
|
+
"""
|
|
145
|
+
Handle pagination for iterator-based SDK methods.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
iterator: Iterator returned from SDK (e.g., ResourceIterator)
|
|
149
|
+
config: Pagination configuration
|
|
150
|
+
progress_callback: Optional progress callback
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
PaginationResult with collected items and metadata
|
|
154
|
+
|
|
155
|
+
Example:
|
|
156
|
+
>>> iterator = self.service.Dataset.File.list(dataset_rid, page_size=20)
|
|
157
|
+
>>> result = self._paginate_iterator(iterator, config)
|
|
158
|
+
"""
|
|
159
|
+
handler = IteratorPaginationHandler()
|
|
160
|
+
return handler.collect_pages(iterator, config, progress_callback)
|
|
161
|
+
|
|
162
|
+
def _paginate_response(
|
|
163
|
+
self,
|
|
164
|
+
fetch_fn: Callable[[Optional[str]], Dict[str, Any]],
|
|
165
|
+
config: PaginationConfig,
|
|
166
|
+
progress_callback: Optional[Callable[[int, int], None]] = None,
|
|
167
|
+
) -> PaginationResult:
|
|
168
|
+
"""
|
|
169
|
+
Handle pagination for response-based SDK methods.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
fetch_fn: Function that accepts page_token and returns dict with
|
|
173
|
+
'data' and 'next_page_token' keys
|
|
174
|
+
config: Pagination configuration
|
|
175
|
+
progress_callback: Optional progress callback
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
PaginationResult with collected items and metadata
|
|
179
|
+
|
|
180
|
+
Example:
|
|
181
|
+
>>> def fetch(token):
|
|
182
|
+
... response = self.service.User.list(page_token=token)
|
|
183
|
+
... return self._serialize_response(response)
|
|
184
|
+
>>> result = self._paginate_response(fetch, config)
|
|
185
|
+
"""
|
|
186
|
+
handler = ResponsePaginationHandler()
|
|
187
|
+
return handler.collect_pages(fetch_fn, config, progress_callback)
|
|
188
|
+
|
|
189
|
+
def _serialize_response(self, response: Any) -> Dict[str, Any]:
|
|
190
|
+
"""
|
|
191
|
+
Convert response object to serializable dictionary.
|
|
192
|
+
|
|
193
|
+
This handles various SDK response types including Pydantic models,
|
|
194
|
+
regular objects, and primitives.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
response: Response object from SDK
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
Serializable dictionary representation
|
|
201
|
+
|
|
202
|
+
Note:
|
|
203
|
+
This method was moved from AdminService to provide consistent
|
|
204
|
+
serialization across all services.
|
|
205
|
+
"""
|
|
206
|
+
if response is None:
|
|
207
|
+
return {}
|
|
208
|
+
|
|
209
|
+
# Handle different response types
|
|
210
|
+
if hasattr(response, "dict"):
|
|
211
|
+
# Pydantic models
|
|
212
|
+
return response.dict()
|
|
213
|
+
elif hasattr(response, "__dict__"):
|
|
214
|
+
# Regular objects
|
|
215
|
+
result = {}
|
|
216
|
+
for key, value in response.__dict__.items():
|
|
217
|
+
if not key.startswith("_"):
|
|
218
|
+
try:
|
|
219
|
+
# Try to serialize the value
|
|
220
|
+
json.dumps(value)
|
|
221
|
+
result[key] = value
|
|
222
|
+
except (TypeError, ValueError):
|
|
223
|
+
# Convert non-serializable values to string
|
|
224
|
+
result[key] = str(value)
|
|
225
|
+
return result
|
|
226
|
+
else:
|
|
227
|
+
# Primitive types or already serializable
|
|
228
|
+
try:
|
|
229
|
+
json.dumps(response)
|
|
230
|
+
return response
|
|
231
|
+
except (TypeError, ValueError):
|
|
232
|
+
return {"data": str(response)}
|
pltr/services/connectivity.py
CHANGED
|
@@ -58,6 +58,145 @@ class ConnectivityService(BaseService):
|
|
|
58
58
|
except Exception as e:
|
|
59
59
|
raise RuntimeError(f"Failed to get connection {connection_rid}: {e}")
|
|
60
60
|
|
|
61
|
+
def create_connection(
|
|
62
|
+
self,
|
|
63
|
+
display_name: str,
|
|
64
|
+
parent_folder_rid: str,
|
|
65
|
+
configuration: Dict[str, Any],
|
|
66
|
+
worker: Dict[str, Any],
|
|
67
|
+
) -> Dict[str, Any]:
|
|
68
|
+
"""
|
|
69
|
+
Create a new connection.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
display_name: Display name for the connection
|
|
73
|
+
parent_folder_rid: Parent folder Resource Identifier
|
|
74
|
+
configuration: Connection configuration dictionary
|
|
75
|
+
worker: Worker configuration dictionary
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Created connection information dictionary
|
|
79
|
+
"""
|
|
80
|
+
try:
|
|
81
|
+
connection = self.connections_service.Connection.create(
|
|
82
|
+
configuration=configuration,
|
|
83
|
+
display_name=display_name,
|
|
84
|
+
parent_folder_rid=parent_folder_rid,
|
|
85
|
+
worker=worker,
|
|
86
|
+
)
|
|
87
|
+
return self._format_connection_info(connection)
|
|
88
|
+
except Exception as e:
|
|
89
|
+
raise RuntimeError(f"Failed to create connection '{display_name}': {e}")
|
|
90
|
+
|
|
91
|
+
def get_connection_configuration(self, connection_rid: str) -> Dict[str, Any]:
|
|
92
|
+
"""
|
|
93
|
+
Get connection configuration.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
connection_rid: Connection Resource Identifier
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Connection configuration dictionary
|
|
100
|
+
"""
|
|
101
|
+
try:
|
|
102
|
+
config = self.connections_service.Connection.get_configuration(
|
|
103
|
+
connection_rid
|
|
104
|
+
)
|
|
105
|
+
return {"connection_rid": connection_rid, "configuration": config}
|
|
106
|
+
except Exception as e:
|
|
107
|
+
raise RuntimeError(
|
|
108
|
+
f"Failed to get configuration for connection {connection_rid}: {e}"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
def update_export_settings(
|
|
112
|
+
self, connection_rid: str, export_settings: Dict[str, Any]
|
|
113
|
+
) -> Dict[str, Any]:
|
|
114
|
+
"""
|
|
115
|
+
Update connection export settings.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
connection_rid: Connection Resource Identifier
|
|
119
|
+
export_settings: Export settings dictionary
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Status dictionary
|
|
123
|
+
"""
|
|
124
|
+
try:
|
|
125
|
+
self.connections_service.Connection.update_export_settings(
|
|
126
|
+
connection_rid=connection_rid,
|
|
127
|
+
export_settings=export_settings,
|
|
128
|
+
)
|
|
129
|
+
return {
|
|
130
|
+
"connection_rid": connection_rid,
|
|
131
|
+
"status": "export settings updated",
|
|
132
|
+
}
|
|
133
|
+
except Exception as e:
|
|
134
|
+
raise RuntimeError(
|
|
135
|
+
f"Failed to update export settings for connection {connection_rid}: {e}"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
def update_secrets(
|
|
139
|
+
self, connection_rid: str, secrets: Dict[str, str]
|
|
140
|
+
) -> Dict[str, Any]:
|
|
141
|
+
"""
|
|
142
|
+
Update connection secrets.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
connection_rid: Connection Resource Identifier
|
|
146
|
+
secrets: Dictionary mapping secret names to values
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Status dictionary
|
|
150
|
+
"""
|
|
151
|
+
try:
|
|
152
|
+
self.connections_service.Connection.update_secrets(
|
|
153
|
+
connection_rid=connection_rid,
|
|
154
|
+
secrets=secrets,
|
|
155
|
+
)
|
|
156
|
+
return {"connection_rid": connection_rid, "status": "secrets updated"}
|
|
157
|
+
except Exception as e:
|
|
158
|
+
raise RuntimeError(
|
|
159
|
+
f"Failed to update secrets for connection {connection_rid}: {e}"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
def upload_custom_jdbc_drivers(
|
|
163
|
+
self, connection_rid: str, file_path: str
|
|
164
|
+
) -> Dict[str, Any]:
|
|
165
|
+
"""
|
|
166
|
+
Upload custom JDBC drivers to a connection.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
connection_rid: Connection Resource Identifier
|
|
170
|
+
file_path: Path to the JAR file
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Updated connection information dictionary
|
|
174
|
+
"""
|
|
175
|
+
from pathlib import Path
|
|
176
|
+
|
|
177
|
+
file_path_obj = Path(file_path)
|
|
178
|
+
|
|
179
|
+
if not file_path_obj.exists():
|
|
180
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
181
|
+
|
|
182
|
+
if not file_path_obj.suffix.lower() == ".jar":
|
|
183
|
+
raise ValueError(f"File must be a JAR file: {file_path}")
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
with open(file_path_obj, "rb") as f:
|
|
187
|
+
file_content = f.read()
|
|
188
|
+
|
|
189
|
+
connection = self.connections_service.Connection.upload_custom_jdbc_drivers(
|
|
190
|
+
connection_rid=connection_rid,
|
|
191
|
+
body=file_content,
|
|
192
|
+
file_name=file_path_obj.name,
|
|
193
|
+
)
|
|
194
|
+
return self._format_connection_info(connection)
|
|
195
|
+
except Exception as e:
|
|
196
|
+
raise RuntimeError(
|
|
197
|
+
f"Failed to upload JDBC driver to connection {connection_rid}: {e}"
|
|
198
|
+
)
|
|
199
|
+
|
|
61
200
|
def create_file_import(
|
|
62
201
|
self,
|
|
63
202
|
connection_rid: str,
|
pltr/services/copy.py
ADDED
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
"""
|
|
2
|
+
High-level service that copies Foundry resources between Compass folders.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path, PurePosixPath
|
|
9
|
+
from typing import Dict, List, Optional
|
|
10
|
+
import tempfile
|
|
11
|
+
import traceback
|
|
12
|
+
|
|
13
|
+
from rich.console import Console
|
|
14
|
+
|
|
15
|
+
from .dataset import DatasetService
|
|
16
|
+
from .folder import FolderService
|
|
17
|
+
from .resource import ResourceService
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Known resource types for exact matching (avoids false positives with substring matching)
|
|
21
|
+
DATASET_TYPES = frozenset({"foundry_dataset", "dataset"})
|
|
22
|
+
FOLDER_TYPES = frozenset({"folder", "compass_folder"})
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class CopyStats:
|
|
27
|
+
"""Summary of a copy operation."""
|
|
28
|
+
|
|
29
|
+
folders: int = 0
|
|
30
|
+
datasets: int = 0
|
|
31
|
+
skipped: int = 0
|
|
32
|
+
errors: int = 0
|
|
33
|
+
|
|
34
|
+
def as_dict(self) -> Dict[str, int]:
|
|
35
|
+
"""Return summary as a dictionary."""
|
|
36
|
+
return {
|
|
37
|
+
"folders_copied": self.folders,
|
|
38
|
+
"datasets_copied": self.datasets,
|
|
39
|
+
"skipped": self.skipped,
|
|
40
|
+
"errors": self.errors,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class CopyService:
|
|
45
|
+
"""Copy datasets or folders (and their children) into another Compass folder."""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
*,
|
|
50
|
+
profile: Optional[str] = None,
|
|
51
|
+
branch: str = "master",
|
|
52
|
+
name_suffix: str = "-copy",
|
|
53
|
+
copy_schema: bool = True,
|
|
54
|
+
dry_run: bool = False,
|
|
55
|
+
debug: bool = False,
|
|
56
|
+
fail_fast: bool = False,
|
|
57
|
+
console: Optional[Console] = None,
|
|
58
|
+
):
|
|
59
|
+
self.profile = profile
|
|
60
|
+
self.branch = branch
|
|
61
|
+
self.name_suffix = name_suffix
|
|
62
|
+
self.copy_schema = copy_schema
|
|
63
|
+
self.dry_run = dry_run
|
|
64
|
+
self.debug = debug
|
|
65
|
+
self.fail_fast = fail_fast
|
|
66
|
+
self.console = console or Console()
|
|
67
|
+
|
|
68
|
+
self.dataset_service = DatasetService(profile=profile)
|
|
69
|
+
self.folder_service = FolderService(profile=profile)
|
|
70
|
+
self.resource_service = ResourceService(profile=profile)
|
|
71
|
+
|
|
72
|
+
self.stats = CopyStats()
|
|
73
|
+
self._skipped_messages: List[str] = []
|
|
74
|
+
|
|
75
|
+
# ------------------------------------------------------------------ public
|
|
76
|
+
def copy_resource(
|
|
77
|
+
self, source_rid: str, target_folder_rid: str, recursive: bool = False
|
|
78
|
+
) -> Dict[str, int]:
|
|
79
|
+
"""
|
|
80
|
+
Copy a resource by RID into a destination folder.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
source_rid: Resource Identifier to copy.
|
|
84
|
+
target_folder_rid: Destination Compass folder RID.
|
|
85
|
+
recursive: Required when copying folders. Recursively copies contents.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Dictionary summary of copied items.
|
|
89
|
+
"""
|
|
90
|
+
self._reset_stats()
|
|
91
|
+
|
|
92
|
+
resource = self.resource_service.get_resource(source_rid)
|
|
93
|
+
resource_type = (resource.get("type") or "").lower()
|
|
94
|
+
|
|
95
|
+
if resource_type in DATASET_TYPES:
|
|
96
|
+
self._log_info(
|
|
97
|
+
f"Copying dataset '{self._get_resource_name(resource)}' "
|
|
98
|
+
f"({source_rid}) → folder {target_folder_rid}"
|
|
99
|
+
)
|
|
100
|
+
try:
|
|
101
|
+
self._copy_dataset(resource, target_folder_rid)
|
|
102
|
+
except Exception:
|
|
103
|
+
self.stats.errors += 1
|
|
104
|
+
raise
|
|
105
|
+
elif resource_type in FOLDER_TYPES:
|
|
106
|
+
if not recursive:
|
|
107
|
+
raise RuntimeError(
|
|
108
|
+
"Source resource is a folder. Pass --recursive to copy folder contents."
|
|
109
|
+
)
|
|
110
|
+
try:
|
|
111
|
+
self._copy_folder(resource, target_folder_rid)
|
|
112
|
+
except Exception:
|
|
113
|
+
self.stats.errors += 1
|
|
114
|
+
raise
|
|
115
|
+
else:
|
|
116
|
+
raise RuntimeError(
|
|
117
|
+
f"Copy is only supported for datasets and folders. Resource "
|
|
118
|
+
f"{source_rid} is of type '{resource.get('type')}'."
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
self._print_summary()
|
|
122
|
+
return self.stats.as_dict()
|
|
123
|
+
|
|
124
|
+
# ----------------------------------------------------------------- datasets
|
|
125
|
+
def _copy_dataset(
|
|
126
|
+
self, dataset_info: Dict[str, str], target_folder_rid: str
|
|
127
|
+
) -> None:
|
|
128
|
+
dataset_rid = dataset_info["rid"]
|
|
129
|
+
dataset_name = self._get_resource_name(dataset_info)
|
|
130
|
+
new_name = self._derive_name(dataset_name)
|
|
131
|
+
|
|
132
|
+
if self.dry_run:
|
|
133
|
+
self._log_warning(
|
|
134
|
+
f"[DRY-RUN] Would copy dataset '{dataset_name}' ({dataset_rid}) "
|
|
135
|
+
f"→ '{new_name}' in folder {target_folder_rid}"
|
|
136
|
+
)
|
|
137
|
+
self.stats.datasets += 1
|
|
138
|
+
return
|
|
139
|
+
|
|
140
|
+
new_dataset = self.dataset_service.create_dataset(new_name, target_folder_rid)
|
|
141
|
+
new_dataset_rid = new_dataset["rid"]
|
|
142
|
+
self._log_success(
|
|
143
|
+
f"Created dataset '{new_name}' ({new_dataset_rid}) in {target_folder_rid}"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
if self.copy_schema:
|
|
148
|
+
self._copy_dataset_schema(dataset_rid, new_dataset_rid)
|
|
149
|
+
|
|
150
|
+
self._copy_dataset_files(dataset_rid, new_dataset_rid)
|
|
151
|
+
self.stats.datasets += 1
|
|
152
|
+
self._log_success(f"Finished copying dataset to {new_dataset_rid}")
|
|
153
|
+
except Exception as exc:
|
|
154
|
+
# Clean up partially created dataset on failure
|
|
155
|
+
self._log_warning(
|
|
156
|
+
f" Deleting partially created dataset {new_dataset_rid} due to error"
|
|
157
|
+
)
|
|
158
|
+
try:
|
|
159
|
+
self.dataset_service.delete_dataset(new_dataset_rid)
|
|
160
|
+
self._log_info(f" Deleted dataset {new_dataset_rid}")
|
|
161
|
+
except Exception as delete_exc:
|
|
162
|
+
self._log_error(
|
|
163
|
+
f" Failed to delete dataset {new_dataset_rid}: {delete_exc}"
|
|
164
|
+
)
|
|
165
|
+
raise exc
|
|
166
|
+
|
|
167
|
+
def _copy_dataset_schema(self, source_rid: str, target_rid: str) -> None:
|
|
168
|
+
"""Copy schema metadata, warning if not available."""
|
|
169
|
+
try:
|
|
170
|
+
schema_info = self.dataset_service.get_schema(source_rid)
|
|
171
|
+
schema = schema_info.get("schema")
|
|
172
|
+
if schema:
|
|
173
|
+
self.dataset_service.put_schema(target_rid, schema)
|
|
174
|
+
self._log_info(" Copied dataset schema")
|
|
175
|
+
else:
|
|
176
|
+
self._log_info(" Source dataset has no schema to copy")
|
|
177
|
+
except Exception as exc:
|
|
178
|
+
self._log_warning(f" Could not copy schema for {source_rid}: {exc}")
|
|
179
|
+
if self.debug:
|
|
180
|
+
traceback.print_exc()
|
|
181
|
+
|
|
182
|
+
def _copy_dataset_files(self, source_rid: str, target_rid: str) -> None:
|
|
183
|
+
"""Download files from source and upload them into the target dataset."""
|
|
184
|
+
files = self.dataset_service.list_files(source_rid, branch=self.branch)
|
|
185
|
+
if not files:
|
|
186
|
+
self._log_info(
|
|
187
|
+
" Source dataset does not expose any files on the requested branch"
|
|
188
|
+
)
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
# Wrap transaction creation in try block to ensure cleanup on any failure
|
|
192
|
+
transaction_rid = None
|
|
193
|
+
transaction_created = False
|
|
194
|
+
try:
|
|
195
|
+
transaction = self.dataset_service.create_transaction(
|
|
196
|
+
target_rid, branch=self.branch, transaction_type="SNAPSHOT"
|
|
197
|
+
)
|
|
198
|
+
# Check both possible key names for the transaction RID
|
|
199
|
+
transaction_rid = transaction.get("transaction_rid") or transaction.get(
|
|
200
|
+
"rid"
|
|
201
|
+
)
|
|
202
|
+
if not transaction_rid:
|
|
203
|
+
raise RuntimeError(
|
|
204
|
+
f"Transaction response missing RID: {list(transaction.keys())}"
|
|
205
|
+
)
|
|
206
|
+
transaction_created = True
|
|
207
|
+
|
|
208
|
+
with tempfile.TemporaryDirectory(prefix="pltr-cp-") as tmpdir:
|
|
209
|
+
temp_dir = Path(tmpdir)
|
|
210
|
+
for file_info in files:
|
|
211
|
+
dataset_path = file_info.get("path")
|
|
212
|
+
if not dataset_path:
|
|
213
|
+
self._report_skip(
|
|
214
|
+
{
|
|
215
|
+
"rid": source_rid,
|
|
216
|
+
"display_name": dataset_path or "unknown file",
|
|
217
|
+
},
|
|
218
|
+
"File path missing; skipping file.",
|
|
219
|
+
)
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
local_rel_path = self._sanitize_local_path(dataset_path)
|
|
223
|
+
download_path = temp_dir / local_rel_path
|
|
224
|
+
download_path.parent.mkdir(parents=True, exist_ok=True)
|
|
225
|
+
|
|
226
|
+
self.dataset_service.download_file(
|
|
227
|
+
source_rid, dataset_path, download_path, branch=self.branch
|
|
228
|
+
)
|
|
229
|
+
self._upload_dataset_file(
|
|
230
|
+
target_rid, dataset_path, download_path, transaction_rid
|
|
231
|
+
)
|
|
232
|
+
self._log_info(f" Copied dataset file: {dataset_path}")
|
|
233
|
+
|
|
234
|
+
self.dataset_service.commit_transaction(target_rid, transaction_rid)
|
|
235
|
+
self._log_info(f" Committed transaction {transaction_rid}")
|
|
236
|
+
except Exception as exc:
|
|
237
|
+
# Only attempt rollback if we successfully created a transaction
|
|
238
|
+
if transaction_created and transaction_rid:
|
|
239
|
+
try:
|
|
240
|
+
self.dataset_service.abort_transaction(target_rid, transaction_rid)
|
|
241
|
+
self._log_warning(f" Rolled back transaction {transaction_rid}")
|
|
242
|
+
except Exception:
|
|
243
|
+
self._log_warning(
|
|
244
|
+
f" Failed to roll back transaction {transaction_rid}"
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
if self.debug:
|
|
248
|
+
traceback.print_exc()
|
|
249
|
+
|
|
250
|
+
raise RuntimeError(
|
|
251
|
+
f"{type(exc).__name__} while copying files from {source_rid} to {target_rid}: {exc}"
|
|
252
|
+
) from exc
|
|
253
|
+
|
|
254
|
+
def _upload_dataset_file(
|
|
255
|
+
self,
|
|
256
|
+
dataset_rid: str,
|
|
257
|
+
dataset_path: str,
|
|
258
|
+
local_file: Path,
|
|
259
|
+
transaction_rid: str,
|
|
260
|
+
) -> None:
|
|
261
|
+
"""Upload the downloaded file bytes back into a dataset."""
|
|
262
|
+
with open(local_file, "rb") as handle:
|
|
263
|
+
body = handle.read()
|
|
264
|
+
|
|
265
|
+
remote_path = PurePosixPath(dataset_path.lstrip("/")).as_posix()
|
|
266
|
+
upload_kwargs = {
|
|
267
|
+
"dataset_rid": dataset_rid,
|
|
268
|
+
"file_path": remote_path,
|
|
269
|
+
"body": body,
|
|
270
|
+
"transaction_rid": transaction_rid,
|
|
271
|
+
}
|
|
272
|
+
# API only allows either branchName or transactionRid. When using a transaction
|
|
273
|
+
# we must omit branchName to avoid InvalidParameterCombination.
|
|
274
|
+
if not transaction_rid:
|
|
275
|
+
upload_kwargs["branch_name"] = self.branch
|
|
276
|
+
|
|
277
|
+
self.dataset_service.service.Dataset.File.upload(**upload_kwargs)
|
|
278
|
+
|
|
279
|
+
# ------------------------------------------------------------------ folders
|
|
280
|
+
def _copy_folder(self, folder_info: Dict[str, str], target_folder_rid: str) -> None:
|
|
281
|
+
folder_rid = folder_info["rid"]
|
|
282
|
+
folder_name = self._get_resource_name(folder_info)
|
|
283
|
+
new_name = self._derive_name(folder_name)
|
|
284
|
+
|
|
285
|
+
if self.dry_run:
|
|
286
|
+
self._log_warning(
|
|
287
|
+
f"[DRY-RUN] Would copy folder '{folder_name}' ({folder_rid}) "
|
|
288
|
+
f"→ '{new_name}' in folder {target_folder_rid}"
|
|
289
|
+
)
|
|
290
|
+
new_folder_rid = "<dry-run>"
|
|
291
|
+
else:
|
|
292
|
+
created = self.folder_service.create_folder(new_name, target_folder_rid)
|
|
293
|
+
new_folder_rid = created["rid"]
|
|
294
|
+
self._log_success(
|
|
295
|
+
f"Created folder '{new_name}' ({new_folder_rid}) in {target_folder_rid}"
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
self.stats.folders += 1
|
|
299
|
+
|
|
300
|
+
children = self.folder_service.list_children(folder_rid)
|
|
301
|
+
if not children:
|
|
302
|
+
self._log_info(" Folder has no children to copy.")
|
|
303
|
+
return
|
|
304
|
+
|
|
305
|
+
for child in children:
|
|
306
|
+
resource_type_raw = child.get("type") or ""
|
|
307
|
+
resource_type = resource_type_raw.lower()
|
|
308
|
+
|
|
309
|
+
try:
|
|
310
|
+
if resource_type in FOLDER_TYPES:
|
|
311
|
+
self._copy_folder(child, new_folder_rid)
|
|
312
|
+
elif resource_type in DATASET_TYPES:
|
|
313
|
+
self._copy_dataset(child, new_folder_rid)
|
|
314
|
+
else:
|
|
315
|
+
self._report_skip(
|
|
316
|
+
child, f"Unsupported child type '{resource_type_raw}'"
|
|
317
|
+
)
|
|
318
|
+
except Exception as exc:
|
|
319
|
+
self.stats.errors += 1
|
|
320
|
+
self._log_error(
|
|
321
|
+
f"Failed to copy child {child.get('rid', 'unknown RID')}: {exc}"
|
|
322
|
+
)
|
|
323
|
+
if self.debug:
|
|
324
|
+
traceback.print_exc()
|
|
325
|
+
if self.fail_fast:
|
|
326
|
+
raise RuntimeError(f"Stopping due to --fail-fast: {exc}") from exc
|
|
327
|
+
|
|
328
|
+
# ------------------------------------------------------------------ helpers
|
|
329
|
+
def _get_resource_name(self, resource: Dict[str, str]) -> str:
|
|
330
|
+
"""Extract display name from a resource, with fallbacks."""
|
|
331
|
+
return (
|
|
332
|
+
resource.get("display_name")
|
|
333
|
+
or resource.get("name")
|
|
334
|
+
or resource.get("rid", "unknown")
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
def _sanitize_local_path(self, original_path: str) -> str:
|
|
338
|
+
"""Return a safe relative path for storing temporary files."""
|
|
339
|
+
# Check for path traversal BEFORE normalization to prevent attacks
|
|
340
|
+
if ".." in original_path:
|
|
341
|
+
raise ValueError(f"Path traversal detected: {original_path}")
|
|
342
|
+
clean = PurePosixPath(original_path.lstrip("/"))
|
|
343
|
+
# Verify no parent directory references remain after normalization
|
|
344
|
+
if ".." in clean.parts:
|
|
345
|
+
raise ValueError(f"Invalid path after normalization: {original_path}")
|
|
346
|
+
return clean.as_posix()
|
|
347
|
+
|
|
348
|
+
def _derive_name(self, base_name: str) -> str:
|
|
349
|
+
"""Append the configured suffix if provided."""
|
|
350
|
+
suffix = self.name_suffix or ""
|
|
351
|
+
if not suffix:
|
|
352
|
+
return base_name
|
|
353
|
+
if base_name.endswith(suffix):
|
|
354
|
+
return base_name
|
|
355
|
+
return f"{base_name}{suffix}"
|
|
356
|
+
|
|
357
|
+
def _report_skip(self, resource: Dict[str, str], reason: str) -> None:
|
|
358
|
+
"""Track skipped resources."""
|
|
359
|
+
rid = resource.get("rid", "unknown RID")
|
|
360
|
+
display_name = resource.get("display_name") or resource.get("name") or rid
|
|
361
|
+
message = f"[SKIP] {display_name} ({rid}): {reason}"
|
|
362
|
+
self.console.print(message)
|
|
363
|
+
self.stats.skipped += 1
|
|
364
|
+
self._skipped_messages.append(message)
|
|
365
|
+
|
|
366
|
+
def _reset_stats(self) -> None:
|
|
367
|
+
self.stats = CopyStats()
|
|
368
|
+
self._skipped_messages = []
|
|
369
|
+
|
|
370
|
+
def _print_summary(self) -> None:
|
|
371
|
+
"""Print summary after an operation."""
|
|
372
|
+
self.console.print(
|
|
373
|
+
"\nSummary: "
|
|
374
|
+
f"{self.stats.folders} folders copied, "
|
|
375
|
+
f"{self.stats.datasets} datasets copied, "
|
|
376
|
+
f"{self.stats.skipped} resources skipped, "
|
|
377
|
+
f"{self.stats.errors} errors"
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
# ------------------------------------------------------------------ logging
|
|
381
|
+
def _log_info(self, message: str) -> None:
|
|
382
|
+
self.console.print(message)
|
|
383
|
+
|
|
384
|
+
def _log_warning(self, message: str) -> None:
|
|
385
|
+
self.console.print(f"[yellow]{message}[/yellow]")
|
|
386
|
+
|
|
387
|
+
def _log_error(self, message: str) -> None:
|
|
388
|
+
self.console.print(f"[red]{message}[/red]")
|
|
389
|
+
|
|
390
|
+
def _log_success(self, message: str) -> None:
|
|
391
|
+
self.console.print(f"[green]{message}[/green]")
|