pltr-cli 0.11.0__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pltr/__init__.py +1 -1
- pltr/cli.py +40 -0
- pltr/commands/admin.py +565 -11
- pltr/commands/aip_agents.py +333 -0
- pltr/commands/connectivity.py +309 -1
- pltr/commands/cp.py +103 -0
- pltr/commands/dataset.py +104 -4
- pltr/commands/functions.py +503 -0
- pltr/commands/language_models.py +515 -0
- pltr/commands/mediasets.py +176 -0
- pltr/commands/models.py +362 -0
- pltr/commands/ontology.py +44 -13
- pltr/commands/orchestration.py +167 -11
- pltr/commands/project.py +231 -22
- pltr/commands/resource.py +416 -17
- pltr/commands/space.py +25 -303
- pltr/commands/sql.py +54 -7
- pltr/commands/streams.py +616 -0
- pltr/commands/third_party_applications.py +82 -0
- pltr/services/admin.py +331 -3
- pltr/services/aip_agents.py +147 -0
- pltr/services/base.py +104 -1
- pltr/services/connectivity.py +139 -0
- pltr/services/copy.py +391 -0
- pltr/services/dataset.py +77 -4
- pltr/services/folder.py +6 -1
- pltr/services/functions.py +223 -0
- pltr/services/language_models.py +281 -0
- pltr/services/mediasets.py +144 -9
- pltr/services/models.py +179 -0
- pltr/services/ontology.py +48 -1
- pltr/services/orchestration.py +133 -1
- pltr/services/project.py +213 -39
- pltr/services/resource.py +229 -60
- pltr/services/space.py +24 -175
- pltr/services/sql.py +44 -20
- pltr/services/streams.py +290 -0
- pltr/services/third_party_applications.py +53 -0
- pltr/utils/formatting.py +195 -1
- pltr/utils/pagination.py +325 -0
- {pltr_cli-0.11.0.dist-info → pltr_cli-0.13.0.dist-info}/METADATA +55 -4
- pltr_cli-0.13.0.dist-info/RECORD +70 -0
- {pltr_cli-0.11.0.dist-info → pltr_cli-0.13.0.dist-info}/WHEEL +1 -1
- pltr_cli-0.11.0.dist-info/RECORD +0 -55
- {pltr_cli-0.11.0.dist-info → pltr_cli-0.13.0.dist-info}/entry_points.txt +0 -0
- {pltr_cli-0.11.0.dist-info → pltr_cli-0.13.0.dist-info}/licenses/LICENSE +0 -0
pltr/services/connectivity.py
CHANGED
|
@@ -58,6 +58,145 @@ class ConnectivityService(BaseService):
|
|
|
58
58
|
except Exception as e:
|
|
59
59
|
raise RuntimeError(f"Failed to get connection {connection_rid}: {e}")
|
|
60
60
|
|
|
61
|
+
def create_connection(
|
|
62
|
+
self,
|
|
63
|
+
display_name: str,
|
|
64
|
+
parent_folder_rid: str,
|
|
65
|
+
configuration: Dict[str, Any],
|
|
66
|
+
worker: Dict[str, Any],
|
|
67
|
+
) -> Dict[str, Any]:
|
|
68
|
+
"""
|
|
69
|
+
Create a new connection.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
display_name: Display name for the connection
|
|
73
|
+
parent_folder_rid: Parent folder Resource Identifier
|
|
74
|
+
configuration: Connection configuration dictionary
|
|
75
|
+
worker: Worker configuration dictionary
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Created connection information dictionary
|
|
79
|
+
"""
|
|
80
|
+
try:
|
|
81
|
+
connection = self.connections_service.Connection.create(
|
|
82
|
+
configuration=configuration,
|
|
83
|
+
display_name=display_name,
|
|
84
|
+
parent_folder_rid=parent_folder_rid,
|
|
85
|
+
worker=worker,
|
|
86
|
+
)
|
|
87
|
+
return self._format_connection_info(connection)
|
|
88
|
+
except Exception as e:
|
|
89
|
+
raise RuntimeError(f"Failed to create connection '{display_name}': {e}")
|
|
90
|
+
|
|
91
|
+
def get_connection_configuration(self, connection_rid: str) -> Dict[str, Any]:
|
|
92
|
+
"""
|
|
93
|
+
Get connection configuration.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
connection_rid: Connection Resource Identifier
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Connection configuration dictionary
|
|
100
|
+
"""
|
|
101
|
+
try:
|
|
102
|
+
config = self.connections_service.Connection.get_configuration(
|
|
103
|
+
connection_rid
|
|
104
|
+
)
|
|
105
|
+
return {"connection_rid": connection_rid, "configuration": config}
|
|
106
|
+
except Exception as e:
|
|
107
|
+
raise RuntimeError(
|
|
108
|
+
f"Failed to get configuration for connection {connection_rid}: {e}"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
def update_export_settings(
|
|
112
|
+
self, connection_rid: str, export_settings: Dict[str, Any]
|
|
113
|
+
) -> Dict[str, Any]:
|
|
114
|
+
"""
|
|
115
|
+
Update connection export settings.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
connection_rid: Connection Resource Identifier
|
|
119
|
+
export_settings: Export settings dictionary
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Status dictionary
|
|
123
|
+
"""
|
|
124
|
+
try:
|
|
125
|
+
self.connections_service.Connection.update_export_settings(
|
|
126
|
+
connection_rid=connection_rid,
|
|
127
|
+
export_settings=export_settings,
|
|
128
|
+
)
|
|
129
|
+
return {
|
|
130
|
+
"connection_rid": connection_rid,
|
|
131
|
+
"status": "export settings updated",
|
|
132
|
+
}
|
|
133
|
+
except Exception as e:
|
|
134
|
+
raise RuntimeError(
|
|
135
|
+
f"Failed to update export settings for connection {connection_rid}: {e}"
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
def update_secrets(
|
|
139
|
+
self, connection_rid: str, secrets: Dict[str, str]
|
|
140
|
+
) -> Dict[str, Any]:
|
|
141
|
+
"""
|
|
142
|
+
Update connection secrets.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
connection_rid: Connection Resource Identifier
|
|
146
|
+
secrets: Dictionary mapping secret names to values
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Status dictionary
|
|
150
|
+
"""
|
|
151
|
+
try:
|
|
152
|
+
self.connections_service.Connection.update_secrets(
|
|
153
|
+
connection_rid=connection_rid,
|
|
154
|
+
secrets=secrets,
|
|
155
|
+
)
|
|
156
|
+
return {"connection_rid": connection_rid, "status": "secrets updated"}
|
|
157
|
+
except Exception as e:
|
|
158
|
+
raise RuntimeError(
|
|
159
|
+
f"Failed to update secrets for connection {connection_rid}: {e}"
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
def upload_custom_jdbc_drivers(
|
|
163
|
+
self, connection_rid: str, file_path: str
|
|
164
|
+
) -> Dict[str, Any]:
|
|
165
|
+
"""
|
|
166
|
+
Upload custom JDBC drivers to a connection.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
connection_rid: Connection Resource Identifier
|
|
170
|
+
file_path: Path to the JAR file
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Updated connection information dictionary
|
|
174
|
+
"""
|
|
175
|
+
from pathlib import Path
|
|
176
|
+
|
|
177
|
+
file_path_obj = Path(file_path)
|
|
178
|
+
|
|
179
|
+
if not file_path_obj.exists():
|
|
180
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
181
|
+
|
|
182
|
+
if not file_path_obj.suffix.lower() == ".jar":
|
|
183
|
+
raise ValueError(f"File must be a JAR file: {file_path}")
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
with open(file_path_obj, "rb") as f:
|
|
187
|
+
file_content = f.read()
|
|
188
|
+
|
|
189
|
+
connection = self.connections_service.Connection.upload_custom_jdbc_drivers(
|
|
190
|
+
connection_rid=connection_rid,
|
|
191
|
+
body=file_content,
|
|
192
|
+
file_name=file_path_obj.name,
|
|
193
|
+
)
|
|
194
|
+
return self._format_connection_info(connection)
|
|
195
|
+
except Exception as e:
|
|
196
|
+
raise RuntimeError(
|
|
197
|
+
f"Failed to upload JDBC driver to connection {connection_rid}: {e}"
|
|
198
|
+
)
|
|
199
|
+
|
|
61
200
|
def create_file_import(
|
|
62
201
|
self,
|
|
63
202
|
connection_rid: str,
|
pltr/services/copy.py
ADDED
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
"""
|
|
2
|
+
High-level service that copies Foundry resources between Compass folders.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path, PurePosixPath
|
|
9
|
+
from typing import Dict, List, Optional
|
|
10
|
+
import tempfile
|
|
11
|
+
import traceback
|
|
12
|
+
|
|
13
|
+
from rich.console import Console
|
|
14
|
+
|
|
15
|
+
from .dataset import DatasetService
|
|
16
|
+
from .folder import FolderService
|
|
17
|
+
from .resource import ResourceService
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Known resource types for exact matching (avoids false positives with substring matching)
|
|
21
|
+
DATASET_TYPES = frozenset({"foundry_dataset", "dataset"})
|
|
22
|
+
FOLDER_TYPES = frozenset({"folder", "compass_folder"})
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class CopyStats:
|
|
27
|
+
"""Summary of a copy operation."""
|
|
28
|
+
|
|
29
|
+
folders: int = 0
|
|
30
|
+
datasets: int = 0
|
|
31
|
+
skipped: int = 0
|
|
32
|
+
errors: int = 0
|
|
33
|
+
|
|
34
|
+
def as_dict(self) -> Dict[str, int]:
|
|
35
|
+
"""Return summary as a dictionary."""
|
|
36
|
+
return {
|
|
37
|
+
"folders_copied": self.folders,
|
|
38
|
+
"datasets_copied": self.datasets,
|
|
39
|
+
"skipped": self.skipped,
|
|
40
|
+
"errors": self.errors,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class CopyService:
|
|
45
|
+
"""Copy datasets or folders (and their children) into another Compass folder."""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
*,
|
|
50
|
+
profile: Optional[str] = None,
|
|
51
|
+
branch: str = "master",
|
|
52
|
+
name_suffix: str = "-copy",
|
|
53
|
+
copy_schema: bool = True,
|
|
54
|
+
dry_run: bool = False,
|
|
55
|
+
debug: bool = False,
|
|
56
|
+
fail_fast: bool = False,
|
|
57
|
+
console: Optional[Console] = None,
|
|
58
|
+
):
|
|
59
|
+
self.profile = profile
|
|
60
|
+
self.branch = branch
|
|
61
|
+
self.name_suffix = name_suffix
|
|
62
|
+
self.copy_schema = copy_schema
|
|
63
|
+
self.dry_run = dry_run
|
|
64
|
+
self.debug = debug
|
|
65
|
+
self.fail_fast = fail_fast
|
|
66
|
+
self.console = console or Console()
|
|
67
|
+
|
|
68
|
+
self.dataset_service = DatasetService(profile=profile)
|
|
69
|
+
self.folder_service = FolderService(profile=profile)
|
|
70
|
+
self.resource_service = ResourceService(profile=profile)
|
|
71
|
+
|
|
72
|
+
self.stats = CopyStats()
|
|
73
|
+
self._skipped_messages: List[str] = []
|
|
74
|
+
|
|
75
|
+
# ------------------------------------------------------------------ public
|
|
76
|
+
def copy_resource(
|
|
77
|
+
self, source_rid: str, target_folder_rid: str, recursive: bool = False
|
|
78
|
+
) -> Dict[str, int]:
|
|
79
|
+
"""
|
|
80
|
+
Copy a resource by RID into a destination folder.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
source_rid: Resource Identifier to copy.
|
|
84
|
+
target_folder_rid: Destination Compass folder RID.
|
|
85
|
+
recursive: Required when copying folders. Recursively copies contents.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Dictionary summary of copied items.
|
|
89
|
+
"""
|
|
90
|
+
self._reset_stats()
|
|
91
|
+
|
|
92
|
+
resource = self.resource_service.get_resource(source_rid)
|
|
93
|
+
resource_type = (resource.get("type") or "").lower()
|
|
94
|
+
|
|
95
|
+
if resource_type in DATASET_TYPES:
|
|
96
|
+
self._log_info(
|
|
97
|
+
f"Copying dataset '{self._get_resource_name(resource)}' "
|
|
98
|
+
f"({source_rid}) → folder {target_folder_rid}"
|
|
99
|
+
)
|
|
100
|
+
try:
|
|
101
|
+
self._copy_dataset(resource, target_folder_rid)
|
|
102
|
+
except Exception:
|
|
103
|
+
self.stats.errors += 1
|
|
104
|
+
raise
|
|
105
|
+
elif resource_type in FOLDER_TYPES:
|
|
106
|
+
if not recursive:
|
|
107
|
+
raise RuntimeError(
|
|
108
|
+
"Source resource is a folder. Pass --recursive to copy folder contents."
|
|
109
|
+
)
|
|
110
|
+
try:
|
|
111
|
+
self._copy_folder(resource, target_folder_rid)
|
|
112
|
+
except Exception:
|
|
113
|
+
self.stats.errors += 1
|
|
114
|
+
raise
|
|
115
|
+
else:
|
|
116
|
+
raise RuntimeError(
|
|
117
|
+
f"Copy is only supported for datasets and folders. Resource "
|
|
118
|
+
f"{source_rid} is of type '{resource.get('type')}'."
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
self._print_summary()
|
|
122
|
+
return self.stats.as_dict()
|
|
123
|
+
|
|
124
|
+
# ----------------------------------------------------------------- datasets
|
|
125
|
+
def _copy_dataset(
|
|
126
|
+
self, dataset_info: Dict[str, str], target_folder_rid: str
|
|
127
|
+
) -> None:
|
|
128
|
+
dataset_rid = dataset_info["rid"]
|
|
129
|
+
dataset_name = self._get_resource_name(dataset_info)
|
|
130
|
+
new_name = self._derive_name(dataset_name)
|
|
131
|
+
|
|
132
|
+
if self.dry_run:
|
|
133
|
+
self._log_warning(
|
|
134
|
+
f"[DRY-RUN] Would copy dataset '{dataset_name}' ({dataset_rid}) "
|
|
135
|
+
f"→ '{new_name}' in folder {target_folder_rid}"
|
|
136
|
+
)
|
|
137
|
+
self.stats.datasets += 1
|
|
138
|
+
return
|
|
139
|
+
|
|
140
|
+
new_dataset = self.dataset_service.create_dataset(new_name, target_folder_rid)
|
|
141
|
+
new_dataset_rid = new_dataset["rid"]
|
|
142
|
+
self._log_success(
|
|
143
|
+
f"Created dataset '{new_name}' ({new_dataset_rid}) in {target_folder_rid}"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
if self.copy_schema:
|
|
148
|
+
self._copy_dataset_schema(dataset_rid, new_dataset_rid)
|
|
149
|
+
|
|
150
|
+
self._copy_dataset_files(dataset_rid, new_dataset_rid)
|
|
151
|
+
self.stats.datasets += 1
|
|
152
|
+
self._log_success(f"Finished copying dataset to {new_dataset_rid}")
|
|
153
|
+
except Exception as exc:
|
|
154
|
+
# Clean up partially created dataset on failure
|
|
155
|
+
self._log_warning(
|
|
156
|
+
f" Deleting partially created dataset {new_dataset_rid} due to error"
|
|
157
|
+
)
|
|
158
|
+
try:
|
|
159
|
+
self.dataset_service.delete_dataset(new_dataset_rid)
|
|
160
|
+
self._log_info(f" Deleted dataset {new_dataset_rid}")
|
|
161
|
+
except Exception as delete_exc:
|
|
162
|
+
self._log_error(
|
|
163
|
+
f" Failed to delete dataset {new_dataset_rid}: {delete_exc}"
|
|
164
|
+
)
|
|
165
|
+
raise exc
|
|
166
|
+
|
|
167
|
+
def _copy_dataset_schema(self, source_rid: str, target_rid: str) -> None:
|
|
168
|
+
"""Copy schema metadata, warning if not available."""
|
|
169
|
+
try:
|
|
170
|
+
schema_info = self.dataset_service.get_schema(source_rid)
|
|
171
|
+
schema = schema_info.get("schema")
|
|
172
|
+
if schema:
|
|
173
|
+
self.dataset_service.put_schema(target_rid, schema)
|
|
174
|
+
self._log_info(" Copied dataset schema")
|
|
175
|
+
else:
|
|
176
|
+
self._log_info(" Source dataset has no schema to copy")
|
|
177
|
+
except Exception as exc:
|
|
178
|
+
self._log_warning(f" Could not copy schema for {source_rid}: {exc}")
|
|
179
|
+
if self.debug:
|
|
180
|
+
traceback.print_exc()
|
|
181
|
+
|
|
182
|
+
def _copy_dataset_files(self, source_rid: str, target_rid: str) -> None:
|
|
183
|
+
"""Download files from source and upload them into the target dataset."""
|
|
184
|
+
files = self.dataset_service.list_files(source_rid, branch=self.branch)
|
|
185
|
+
if not files:
|
|
186
|
+
self._log_info(
|
|
187
|
+
" Source dataset does not expose any files on the requested branch"
|
|
188
|
+
)
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
# Wrap transaction creation in try block to ensure cleanup on any failure
|
|
192
|
+
transaction_rid = None
|
|
193
|
+
transaction_created = False
|
|
194
|
+
try:
|
|
195
|
+
transaction = self.dataset_service.create_transaction(
|
|
196
|
+
target_rid, branch=self.branch, transaction_type="SNAPSHOT"
|
|
197
|
+
)
|
|
198
|
+
# Check both possible key names for the transaction RID
|
|
199
|
+
transaction_rid = transaction.get("transaction_rid") or transaction.get(
|
|
200
|
+
"rid"
|
|
201
|
+
)
|
|
202
|
+
if not transaction_rid:
|
|
203
|
+
raise RuntimeError(
|
|
204
|
+
f"Transaction response missing RID: {list(transaction.keys())}"
|
|
205
|
+
)
|
|
206
|
+
transaction_created = True
|
|
207
|
+
|
|
208
|
+
with tempfile.TemporaryDirectory(prefix="pltr-cp-") as tmpdir:
|
|
209
|
+
temp_dir = Path(tmpdir)
|
|
210
|
+
for file_info in files:
|
|
211
|
+
dataset_path = file_info.get("path")
|
|
212
|
+
if not dataset_path:
|
|
213
|
+
self._report_skip(
|
|
214
|
+
{
|
|
215
|
+
"rid": source_rid,
|
|
216
|
+
"display_name": dataset_path or "unknown file",
|
|
217
|
+
},
|
|
218
|
+
"File path missing; skipping file.",
|
|
219
|
+
)
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
local_rel_path = self._sanitize_local_path(dataset_path)
|
|
223
|
+
download_path = temp_dir / local_rel_path
|
|
224
|
+
download_path.parent.mkdir(parents=True, exist_ok=True)
|
|
225
|
+
|
|
226
|
+
self.dataset_service.download_file(
|
|
227
|
+
source_rid, dataset_path, download_path, branch=self.branch
|
|
228
|
+
)
|
|
229
|
+
self._upload_dataset_file(
|
|
230
|
+
target_rid, dataset_path, download_path, transaction_rid
|
|
231
|
+
)
|
|
232
|
+
self._log_info(f" Copied dataset file: {dataset_path}")
|
|
233
|
+
|
|
234
|
+
self.dataset_service.commit_transaction(target_rid, transaction_rid)
|
|
235
|
+
self._log_info(f" Committed transaction {transaction_rid}")
|
|
236
|
+
except Exception as exc:
|
|
237
|
+
# Only attempt rollback if we successfully created a transaction
|
|
238
|
+
if transaction_created and transaction_rid:
|
|
239
|
+
try:
|
|
240
|
+
self.dataset_service.abort_transaction(target_rid, transaction_rid)
|
|
241
|
+
self._log_warning(f" Rolled back transaction {transaction_rid}")
|
|
242
|
+
except Exception:
|
|
243
|
+
self._log_warning(
|
|
244
|
+
f" Failed to roll back transaction {transaction_rid}"
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
if self.debug:
|
|
248
|
+
traceback.print_exc()
|
|
249
|
+
|
|
250
|
+
raise RuntimeError(
|
|
251
|
+
f"{type(exc).__name__} while copying files from {source_rid} to {target_rid}: {exc}"
|
|
252
|
+
) from exc
|
|
253
|
+
|
|
254
|
+
def _upload_dataset_file(
|
|
255
|
+
self,
|
|
256
|
+
dataset_rid: str,
|
|
257
|
+
dataset_path: str,
|
|
258
|
+
local_file: Path,
|
|
259
|
+
transaction_rid: str,
|
|
260
|
+
) -> None:
|
|
261
|
+
"""Upload the downloaded file bytes back into a dataset."""
|
|
262
|
+
with open(local_file, "rb") as handle:
|
|
263
|
+
body = handle.read()
|
|
264
|
+
|
|
265
|
+
remote_path = PurePosixPath(dataset_path.lstrip("/")).as_posix()
|
|
266
|
+
upload_kwargs = {
|
|
267
|
+
"dataset_rid": dataset_rid,
|
|
268
|
+
"file_path": remote_path,
|
|
269
|
+
"body": body,
|
|
270
|
+
"transaction_rid": transaction_rid,
|
|
271
|
+
}
|
|
272
|
+
# API only allows either branchName or transactionRid. When using a transaction
|
|
273
|
+
# we must omit branchName to avoid InvalidParameterCombination.
|
|
274
|
+
if not transaction_rid:
|
|
275
|
+
upload_kwargs["branch_name"] = self.branch
|
|
276
|
+
|
|
277
|
+
self.dataset_service.service.Dataset.File.upload(**upload_kwargs)
|
|
278
|
+
|
|
279
|
+
# ------------------------------------------------------------------ folders
|
|
280
|
+
def _copy_folder(self, folder_info: Dict[str, str], target_folder_rid: str) -> None:
|
|
281
|
+
folder_rid = folder_info["rid"]
|
|
282
|
+
folder_name = self._get_resource_name(folder_info)
|
|
283
|
+
new_name = self._derive_name(folder_name)
|
|
284
|
+
|
|
285
|
+
if self.dry_run:
|
|
286
|
+
self._log_warning(
|
|
287
|
+
f"[DRY-RUN] Would copy folder '{folder_name}' ({folder_rid}) "
|
|
288
|
+
f"→ '{new_name}' in folder {target_folder_rid}"
|
|
289
|
+
)
|
|
290
|
+
new_folder_rid = "<dry-run>"
|
|
291
|
+
else:
|
|
292
|
+
created = self.folder_service.create_folder(new_name, target_folder_rid)
|
|
293
|
+
new_folder_rid = created["rid"]
|
|
294
|
+
self._log_success(
|
|
295
|
+
f"Created folder '{new_name}' ({new_folder_rid}) in {target_folder_rid}"
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
self.stats.folders += 1
|
|
299
|
+
|
|
300
|
+
children = self.folder_service.list_children(folder_rid)
|
|
301
|
+
if not children:
|
|
302
|
+
self._log_info(" Folder has no children to copy.")
|
|
303
|
+
return
|
|
304
|
+
|
|
305
|
+
for child in children:
|
|
306
|
+
resource_type_raw = child.get("type") or ""
|
|
307
|
+
resource_type = resource_type_raw.lower()
|
|
308
|
+
|
|
309
|
+
try:
|
|
310
|
+
if resource_type in FOLDER_TYPES:
|
|
311
|
+
self._copy_folder(child, new_folder_rid)
|
|
312
|
+
elif resource_type in DATASET_TYPES:
|
|
313
|
+
self._copy_dataset(child, new_folder_rid)
|
|
314
|
+
else:
|
|
315
|
+
self._report_skip(
|
|
316
|
+
child, f"Unsupported child type '{resource_type_raw}'"
|
|
317
|
+
)
|
|
318
|
+
except Exception as exc:
|
|
319
|
+
self.stats.errors += 1
|
|
320
|
+
self._log_error(
|
|
321
|
+
f"Failed to copy child {child.get('rid', 'unknown RID')}: {exc}"
|
|
322
|
+
)
|
|
323
|
+
if self.debug:
|
|
324
|
+
traceback.print_exc()
|
|
325
|
+
if self.fail_fast:
|
|
326
|
+
raise RuntimeError(f"Stopping due to --fail-fast: {exc}") from exc
|
|
327
|
+
|
|
328
|
+
# ------------------------------------------------------------------ helpers
|
|
329
|
+
def _get_resource_name(self, resource: Dict[str, str]) -> str:
|
|
330
|
+
"""Extract display name from a resource, with fallbacks."""
|
|
331
|
+
return (
|
|
332
|
+
resource.get("display_name")
|
|
333
|
+
or resource.get("name")
|
|
334
|
+
or resource.get("rid", "unknown")
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
def _sanitize_local_path(self, original_path: str) -> str:
|
|
338
|
+
"""Return a safe relative path for storing temporary files."""
|
|
339
|
+
# Check for path traversal BEFORE normalization to prevent attacks
|
|
340
|
+
if ".." in original_path:
|
|
341
|
+
raise ValueError(f"Path traversal detected: {original_path}")
|
|
342
|
+
clean = PurePosixPath(original_path.lstrip("/"))
|
|
343
|
+
# Verify no parent directory references remain after normalization
|
|
344
|
+
if ".." in clean.parts:
|
|
345
|
+
raise ValueError(f"Invalid path after normalization: {original_path}")
|
|
346
|
+
return clean.as_posix()
|
|
347
|
+
|
|
348
|
+
def _derive_name(self, base_name: str) -> str:
|
|
349
|
+
"""Append the configured suffix if provided."""
|
|
350
|
+
suffix = self.name_suffix or ""
|
|
351
|
+
if not suffix:
|
|
352
|
+
return base_name
|
|
353
|
+
if base_name.endswith(suffix):
|
|
354
|
+
return base_name
|
|
355
|
+
return f"{base_name}{suffix}"
|
|
356
|
+
|
|
357
|
+
def _report_skip(self, resource: Dict[str, str], reason: str) -> None:
|
|
358
|
+
"""Track skipped resources."""
|
|
359
|
+
rid = resource.get("rid", "unknown RID")
|
|
360
|
+
display_name = resource.get("display_name") or resource.get("name") or rid
|
|
361
|
+
message = f"[SKIP] {display_name} ({rid}): {reason}"
|
|
362
|
+
self.console.print(message)
|
|
363
|
+
self.stats.skipped += 1
|
|
364
|
+
self._skipped_messages.append(message)
|
|
365
|
+
|
|
366
|
+
def _reset_stats(self) -> None:
|
|
367
|
+
self.stats = CopyStats()
|
|
368
|
+
self._skipped_messages = []
|
|
369
|
+
|
|
370
|
+
def _print_summary(self) -> None:
|
|
371
|
+
"""Print summary after an operation."""
|
|
372
|
+
self.console.print(
|
|
373
|
+
"\nSummary: "
|
|
374
|
+
f"{self.stats.folders} folders copied, "
|
|
375
|
+
f"{self.stats.datasets} datasets copied, "
|
|
376
|
+
f"{self.stats.skipped} resources skipped, "
|
|
377
|
+
f"{self.stats.errors} errors"
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
# ------------------------------------------------------------------ logging
|
|
381
|
+
def _log_info(self, message: str) -> None:
|
|
382
|
+
self.console.print(message)
|
|
383
|
+
|
|
384
|
+
def _log_warning(self, message: str) -> None:
|
|
385
|
+
self.console.print(f"[yellow]{message}[/yellow]")
|
|
386
|
+
|
|
387
|
+
def _log_error(self, message: str) -> None:
|
|
388
|
+
self.console.print(f"[red]{message}[/red]")
|
|
389
|
+
|
|
390
|
+
def _log_success(self, message: str) -> None:
|
|
391
|
+
self.console.print(f"[green]{message}[/green]")
|
pltr/services/dataset.py
CHANGED
|
@@ -2,10 +2,12 @@
|
|
|
2
2
|
Dataset service wrapper for Foundry SDK.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, Callable, Dict, List, Optional, Union
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
import csv
|
|
8
8
|
|
|
9
|
+
from ..config.settings import Settings
|
|
10
|
+
from ..utils.pagination import PaginationConfig, PaginationResult
|
|
9
11
|
from .base import BaseService
|
|
10
12
|
|
|
11
13
|
|
|
@@ -16,8 +18,6 @@ class DatasetService(BaseService):
|
|
|
16
18
|
"""Get the Foundry datasets service."""
|
|
17
19
|
return self.client.datasets
|
|
18
20
|
|
|
19
|
-
# list_datasets method removed - not supported by foundry-platform-sdk v1.27.0
|
|
20
|
-
|
|
21
21
|
def get_dataset(self, dataset_rid: str) -> Dict[str, Any]:
|
|
22
22
|
"""
|
|
23
23
|
Get information about a specific dataset.
|
|
@@ -252,8 +252,9 @@ class DatasetService(BaseService):
|
|
|
252
252
|
# Clean column name (remove special characters for field name)
|
|
253
253
|
clean_name = col.strip().replace(" ", "_").replace("-", "_")
|
|
254
254
|
|
|
255
|
+
# SDK 1.69.0 expects FieldType enum but accepts strings at runtime
|
|
255
256
|
fields.append(
|
|
256
|
-
DatasetFieldSchema(name=clean_name, type=field_type, nullable=nullable)
|
|
257
|
+
DatasetFieldSchema(name=clean_name, type=field_type, nullable=nullable) # type: ignore[arg-type]
|
|
257
258
|
)
|
|
258
259
|
|
|
259
260
|
return DatasetSchema(field_schema_list=fields)
|
|
@@ -312,6 +313,29 @@ class DatasetService(BaseService):
|
|
|
312
313
|
except Exception as e:
|
|
313
314
|
raise RuntimeError(f"Failed to read dataset {dataset_rid}: {e}")
|
|
314
315
|
|
|
316
|
+
def preview_data(
|
|
317
|
+
self,
|
|
318
|
+
dataset_rid: str,
|
|
319
|
+
limit: int = 10,
|
|
320
|
+
) -> List[Dict[str, Any]]:
|
|
321
|
+
"""
|
|
322
|
+
Preview dataset contents as a list of records.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
dataset_rid: Dataset Resource Identifier
|
|
326
|
+
limit: Maximum number of rows to return
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
List of dictionaries representing rows
|
|
330
|
+
"""
|
|
331
|
+
try:
|
|
332
|
+
# Use read_table with pandas format for easy conversion
|
|
333
|
+
df = self.read_table(dataset_rid, format="pandas")
|
|
334
|
+
# Limit rows and convert to records
|
|
335
|
+
return df.head(limit).to_dict(orient="records")
|
|
336
|
+
except Exception as e:
|
|
337
|
+
raise RuntimeError(f"Failed to preview dataset {dataset_rid}: {e}")
|
|
338
|
+
|
|
315
339
|
def delete_dataset(self, dataset_rid: str) -> bool:
|
|
316
340
|
"""
|
|
317
341
|
Delete a dataset.
|
|
@@ -516,6 +540,8 @@ class DatasetService(BaseService):
|
|
|
516
540
|
"""
|
|
517
541
|
List files in a dataset.
|
|
518
542
|
|
|
543
|
+
DEPRECATED: Use list_files_paginated() instead for better pagination support.
|
|
544
|
+
|
|
519
545
|
Args:
|
|
520
546
|
dataset_rid: Dataset Resource Identifier
|
|
521
547
|
branch: Dataset branch name
|
|
@@ -540,6 +566,53 @@ class DatasetService(BaseService):
|
|
|
540
566
|
except Exception as e:
|
|
541
567
|
raise RuntimeError(f"Failed to list files in dataset {dataset_rid}: {e}")
|
|
542
568
|
|
|
569
|
+
def list_files_paginated(
|
|
570
|
+
self,
|
|
571
|
+
dataset_rid: str,
|
|
572
|
+
branch: str,
|
|
573
|
+
config: PaginationConfig,
|
|
574
|
+
progress_callback: Optional[Callable[[int, int], None]] = None,
|
|
575
|
+
) -> PaginationResult:
|
|
576
|
+
"""
|
|
577
|
+
List files with full pagination control.
|
|
578
|
+
|
|
579
|
+
Args:
|
|
580
|
+
dataset_rid: Dataset Resource Identifier
|
|
581
|
+
branch: Dataset branch name
|
|
582
|
+
config: Pagination configuration
|
|
583
|
+
progress_callback: Optional progress callback
|
|
584
|
+
|
|
585
|
+
Returns:
|
|
586
|
+
PaginationResult with file information and metadata
|
|
587
|
+
"""
|
|
588
|
+
try:
|
|
589
|
+
settings = Settings()
|
|
590
|
+
|
|
591
|
+
# Get iterator from SDK - ResourceIterator with next_page_token support
|
|
592
|
+
iterator = self.service.Dataset.File.list(
|
|
593
|
+
dataset_rid=dataset_rid,
|
|
594
|
+
branch_name=branch,
|
|
595
|
+
page_size=config.page_size or settings.get("page_size", 20),
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
# Use iterator pagination handler
|
|
599
|
+
result = self._paginate_iterator(iterator, config, progress_callback)
|
|
600
|
+
|
|
601
|
+
# Format file information
|
|
602
|
+
result.data = [
|
|
603
|
+
{
|
|
604
|
+
"path": file.path,
|
|
605
|
+
"size_bytes": getattr(file, "size_bytes", None),
|
|
606
|
+
"last_modified": getattr(file, "last_modified", None),
|
|
607
|
+
"transaction_rid": getattr(file, "transaction_rid", None),
|
|
608
|
+
}
|
|
609
|
+
for file in result.data
|
|
610
|
+
]
|
|
611
|
+
|
|
612
|
+
return result
|
|
613
|
+
except Exception as e:
|
|
614
|
+
raise RuntimeError(f"Failed to list files: {e}")
|
|
615
|
+
|
|
543
616
|
def get_branches(self, dataset_rid: str) -> List[Dict[str, Any]]:
|
|
544
617
|
"""
|
|
545
618
|
Get list of branches for a dataset.
|
pltr/services/folder.py
CHANGED
|
@@ -4,6 +4,8 @@ Folder service wrapper for Foundry SDK filesystem API.
|
|
|
4
4
|
|
|
5
5
|
from typing import Any, Optional, Dict, List
|
|
6
6
|
|
|
7
|
+
from foundry_sdk.v2.filesystem.models import GetFoldersBatchRequestElement
|
|
8
|
+
|
|
7
9
|
from .base import BaseService
|
|
8
10
|
|
|
9
11
|
|
|
@@ -95,7 +97,10 @@ class FolderService(BaseService):
|
|
|
95
97
|
raise ValueError("Maximum batch size is 1000 folders")
|
|
96
98
|
|
|
97
99
|
try:
|
|
98
|
-
|
|
100
|
+
elements = [
|
|
101
|
+
GetFoldersBatchRequestElement(folder_rid=rid) for rid in folder_rids
|
|
102
|
+
]
|
|
103
|
+
response = self.service.Folder.get_batch(body=elements, preview=True)
|
|
99
104
|
folders = []
|
|
100
105
|
for folder in response.folders:
|
|
101
106
|
folders.append(self._format_folder_info(folder))
|