pltr-cli 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pltr/services/dataset.py CHANGED
@@ -4,6 +4,7 @@ Dataset service wrapper for Foundry SDK.
4
4
 
5
5
  from typing import Any, Optional, List, Dict, Union
6
6
  from pathlib import Path
7
+ import csv
7
8
 
8
9
  from .base import BaseService
9
10
 
@@ -55,6 +56,153 @@ class DatasetService(BaseService):
55
56
  except Exception as e:
56
57
  raise RuntimeError(f"Failed to get schema for dataset {dataset_rid}: {e}")
57
58
 
59
+ def put_schema(
60
+ self,
61
+ dataset_rid: str,
62
+ schema: Any,
63
+ branch: str = "master",
64
+ transaction_rid: Optional[str] = None,
65
+ ) -> Dict[str, Any]:
66
+ """
67
+ Set or update dataset schema.
68
+
69
+ Args:
70
+ dataset_rid: Dataset Resource Identifier
71
+ schema: DatasetSchema object with field definitions
72
+ branch: Dataset branch name
73
+ transaction_rid: Optional transaction RID
74
+
75
+ Returns:
76
+ Schema update result
77
+ """
78
+ try:
79
+ from foundry_sdk.v2.core.models import DatasetSchema
80
+
81
+ # Ensure schema is a DatasetSchema object
82
+ if not isinstance(schema, DatasetSchema):
83
+ raise ValueError("Schema must be a DatasetSchema object")
84
+
85
+ result = self.service.Dataset.put_schema(
86
+ dataset_rid=dataset_rid,
87
+ schema=schema,
88
+ branch_name=branch,
89
+ end_transaction_rid=transaction_rid,
90
+ )
91
+
92
+ return {
93
+ "dataset_rid": dataset_rid,
94
+ "branch": branch,
95
+ "transaction_rid": transaction_rid,
96
+ "status": "Schema updated successfully",
97
+ "schema": result,
98
+ }
99
+ except Exception as e:
100
+ raise RuntimeError(f"Failed to set schema for dataset {dataset_rid}: {e}")
101
+
102
+ def infer_schema_from_csv(
103
+ self, csv_path: Union[str, Path], sample_rows: int = 100
104
+ ) -> Any:
105
+ """
106
+ Infer schema from a CSV file by analyzing headers and sample data.
107
+
108
+ Args:
109
+ csv_path: Path to CSV file
110
+ sample_rows: Number of rows to sample for type inference
111
+
112
+ Returns:
113
+ DatasetSchema object with inferred field types
114
+ """
115
+ from foundry_sdk.v2.core.models import DatasetSchema, DatasetFieldSchema
116
+
117
+ csv_path = Path(csv_path)
118
+ if not csv_path.exists():
119
+ raise FileNotFoundError(f"CSV file not found: {csv_path}")
120
+
121
+ def infer_type(values: List[str]) -> tuple[str, bool]:
122
+ """
123
+ Infer type from a list of values.
124
+ Returns (type_name, nullable)
125
+ """
126
+ # Remove empty strings and track if nullable
127
+ non_empty = [v for v in values if v.strip()]
128
+ nullable = len(non_empty) < len(values) or len(non_empty) == 0
129
+
130
+ if not non_empty:
131
+ return ("STRING", True)
132
+
133
+ # Check for boolean
134
+ bool_values = {"true", "false", "yes", "no", "1", "0"}
135
+ if all(v.lower() in bool_values for v in non_empty):
136
+ return ("BOOLEAN", nullable)
137
+
138
+ # Check for integer
139
+ try:
140
+ for v in non_empty:
141
+ int(v)
142
+ return ("INTEGER", nullable)
143
+ except ValueError:
144
+ pass
145
+
146
+ # Check for double
147
+ try:
148
+ for v in non_empty:
149
+ float(v)
150
+ return ("DOUBLE", nullable)
151
+ except ValueError:
152
+ pass
153
+
154
+ # Check for date patterns
155
+ date_patterns = [
156
+ r"^\d{4}-\d{2}-\d{2}$", # YYYY-MM-DD
157
+ r"^\d{2}/\d{2}/\d{4}$", # MM/DD/YYYY
158
+ r"^\d{2}-\d{2}-\d{4}$", # DD-MM-YYYY
159
+ ]
160
+ import re
161
+
162
+ for pattern in date_patterns:
163
+ if all(re.match(pattern, v) for v in non_empty[:10]): # Check first 10
164
+ return ("DATE", nullable)
165
+
166
+ # Check for timestamp patterns
167
+ if all(
168
+ "-" in v and ":" in v and len(v) > 10 for v in non_empty[:10]
169
+ ): # Basic timestamp check
170
+ return ("TIMESTAMP", nullable)
171
+
172
+ # Default to string
173
+ return ("STRING", nullable)
174
+
175
+ # Read CSV and analyze
176
+ with open(csv_path, "r", encoding="utf-8") as f:
177
+ reader = csv.DictReader(f)
178
+ headers = reader.fieldnames
179
+
180
+ if not headers:
181
+ raise ValueError("CSV file has no headers")
182
+
183
+ # Collect sample values for each column
184
+ column_values: Dict[str, List[str]] = {col: [] for col in headers}
185
+ for i, row in enumerate(reader):
186
+ if i >= sample_rows:
187
+ break
188
+ for col in headers:
189
+ column_values[col].append(row.get(col, ""))
190
+
191
+ # Infer types for each column
192
+ fields = []
193
+ for col in headers:
194
+ values = column_values[col]
195
+ field_type, nullable = infer_type(values)
196
+
197
+ # Clean column name (remove special characters for field name)
198
+ clean_name = col.strip().replace(" ", "_").replace("-", "_")
199
+
200
+ fields.append(
201
+ DatasetFieldSchema(name=clean_name, type=field_type, nullable=nullable)
202
+ )
203
+
204
+ return DatasetSchema(field_schema_list=fields)
205
+
58
206
  def create_dataset(
59
207
  self, name: str, parent_folder_rid: Optional[str] = None
60
208
  ) -> Dict[str, Any]:
@@ -171,9 +319,93 @@ class DatasetService(BaseService):
171
319
  "transaction_rid": getattr(result, "transaction_rid", transaction_rid),
172
320
  }
173
321
  except Exception as e:
174
- raise RuntimeError(
175
- f"Failed to upload file {file_path} to dataset {dataset_rid}: {e}"
176
- )
322
+ # Try to extract more detailed error information
323
+ error_msg = str(e).strip()
324
+ error_type = type(e).__name__
325
+
326
+ # Check for common HTTP/API errors
327
+ if hasattr(e, "response") and hasattr(e.response, "status_code"):
328
+ status_code = e.response.status_code
329
+ if hasattr(e.response, "text"):
330
+ response_text = e.response.text[:500] # Limit to 500 chars
331
+ error_details = f"HTTP {status_code}: {response_text}"
332
+ else:
333
+ error_details = f"HTTP {status_code}"
334
+ error_msg = f"{error_details} ({error_type}: {error_msg})"
335
+ elif hasattr(e, "status_code"):
336
+ error_msg = f"HTTP {e.status_code}: {error_msg}"
337
+ elif hasattr(e, "message"):
338
+ error_msg = f"{error_type}: {e.message}"
339
+ else:
340
+ if error_msg:
341
+ error_msg = f"{error_type}: {error_msg}"
342
+ else:
343
+ error_msg = f"{error_type} (no additional details available)"
344
+
345
+ # Add context about what might have gone wrong
346
+ context_hints = []
347
+ error_lower = error_msg.lower()
348
+
349
+ if (
350
+ "permission" in error_lower
351
+ or "forbidden" in error_lower
352
+ or "401" in error_msg
353
+ or "403" in error_msg
354
+ ):
355
+ context_hints.append(
356
+ "Check your authentication credentials and dataset permissions"
357
+ )
358
+ if "not found" in error_lower or "404" in error_msg:
359
+ context_hints.append(
360
+ "Verify the dataset RID and transaction RID are correct"
361
+ )
362
+ if "transaction" in error_lower:
363
+ context_hints.append(
364
+ "Check if the transaction is still open and not expired"
365
+ )
366
+ if "schema" in error_lower or "validation" in error_lower:
367
+ context_hints.append(
368
+ "The file might not match the expected dataset schema"
369
+ )
370
+ if (
371
+ "invalidparametercombination" in error_lower
372
+ or "invalid parameter" in error_lower
373
+ ):
374
+ context_hints.append(
375
+ "The combination of parameters (dataset RID, transaction RID, branch) may be invalid"
376
+ )
377
+ context_hints.append(
378
+ "Try without --transaction-rid, or verify the transaction belongs to this dataset"
379
+ )
380
+ if (
381
+ "opentransactionalreadyexists" in error_lower
382
+ or "transaction already exists" in error_lower
383
+ ):
384
+ context_hints.append(
385
+ "There's already an open transaction for this dataset"
386
+ )
387
+ context_hints.append(
388
+ "Use the existing transaction with --transaction-rid, or commit/abort it first"
389
+ )
390
+ context_hints.append(
391
+ "List transactions with: pltr dataset transactions list "
392
+ + dataset_rid
393
+ )
394
+
395
+ # Try to get more detailed error information from the exception
396
+ if hasattr(e, "__dict__"):
397
+ for attr in ["detail", "details", "error_message", "description"]:
398
+ if hasattr(e, attr):
399
+ detail = getattr(e, attr)
400
+ if detail and str(detail).strip():
401
+ error_msg += f" - {detail}"
402
+ break
403
+
404
+ full_error = f"Failed to upload file {file_path.name} to dataset {dataset_rid}: {error_msg}"
405
+ if context_hints:
406
+ full_error += f". Suggestions: {'; '.join(context_hints)}"
407
+
408
+ raise RuntimeError(full_error)
177
409
 
178
410
  def download_file(
179
411
  self,
@@ -560,6 +792,395 @@ class DatasetService(BaseService):
560
792
  f"Failed to create view '{view_name}' for dataset {dataset_rid}: {e}"
561
793
  )
562
794
 
795
+ def get_schedules(self, dataset_rid: str) -> List[Dict[str, Any]]:
796
+ """
797
+ Get schedules that target a specific dataset.
798
+
799
+ Args:
800
+ dataset_rid: Dataset Resource Identifier
801
+
802
+ Returns:
803
+ List of schedule information dictionaries
804
+ """
805
+ try:
806
+ schedules = self.service.Dataset.get_schedules(dataset_rid=dataset_rid)
807
+
808
+ return [
809
+ {
810
+ "schedule_rid": getattr(schedule, "rid", None),
811
+ "name": getattr(schedule, "name", None),
812
+ "description": getattr(schedule, "description", None),
813
+ "enabled": getattr(schedule, "enabled", None),
814
+ "created_time": getattr(schedule, "created_time", None),
815
+ }
816
+ for schedule in schedules
817
+ ]
818
+ except Exception as e:
819
+ raise RuntimeError(
820
+ f"Failed to get schedules for dataset {dataset_rid}: {e}"
821
+ )
822
+
823
+ def get_jobs(
824
+ self, dataset_rid: str, branch: str = "master"
825
+ ) -> List[Dict[str, Any]]:
826
+ """
827
+ Get jobs for a specific dataset.
828
+
829
+ Args:
830
+ dataset_rid: Dataset Resource Identifier
831
+ branch: Dataset branch name
832
+
833
+ Returns:
834
+ List of job information dictionaries
835
+ """
836
+ try:
837
+ jobs = self.service.Dataset.jobs(
838
+ dataset_rid=dataset_rid, branch_name=branch
839
+ )
840
+
841
+ return [
842
+ {
843
+ "job_rid": getattr(job, "rid", None),
844
+ "name": getattr(job, "name", None),
845
+ "status": getattr(job, "status", None),
846
+ "created_time": getattr(job, "created_time", None),
847
+ "started_time": getattr(job, "started_time", None),
848
+ "completed_time": getattr(job, "completed_time", None),
849
+ }
850
+ for job in jobs
851
+ ]
852
+ except Exception as e:
853
+ raise RuntimeError(f"Failed to get jobs for dataset {dataset_rid}: {e}")
854
+
855
+ def delete_branch(self, dataset_rid: str, branch_name: str) -> Dict[str, Any]:
856
+ """
857
+ Delete a branch from a dataset.
858
+
859
+ Args:
860
+ dataset_rid: Dataset Resource Identifier
861
+ branch_name: Branch name to delete
862
+
863
+ Returns:
864
+ Deletion result information
865
+ """
866
+ try:
867
+ self.service.Dataset.Branch.delete(
868
+ dataset_rid=dataset_rid, branch_name=branch_name
869
+ )
870
+
871
+ return {
872
+ "dataset_rid": dataset_rid,
873
+ "branch_name": branch_name,
874
+ "status": "deleted",
875
+ "success": True,
876
+ }
877
+ except Exception as e:
878
+ raise RuntimeError(
879
+ f"Failed to delete branch '{branch_name}' from dataset {dataset_rid}: {e}"
880
+ )
881
+
882
+ def get_branch(self, dataset_rid: str, branch_name: str) -> Dict[str, Any]:
883
+ """
884
+ Get detailed information about a specific branch.
885
+
886
+ Args:
887
+ dataset_rid: Dataset Resource Identifier
888
+ branch_name: Branch name
889
+
890
+ Returns:
891
+ Branch information dictionary
892
+ """
893
+ try:
894
+ branch = self.service.Dataset.Branch.get(
895
+ dataset_rid=dataset_rid, branch_name=branch_name
896
+ )
897
+
898
+ return {
899
+ "name": branch_name,
900
+ "dataset_rid": dataset_rid,
901
+ "transaction_rid": getattr(branch, "transaction_rid", None),
902
+ "created_time": getattr(branch, "created_time", None),
903
+ "created_by": getattr(branch, "created_by", None),
904
+ }
905
+ except Exception as e:
906
+ raise RuntimeError(
907
+ f"Failed to get branch '{branch_name}' from dataset {dataset_rid}: {e}"
908
+ )
909
+
910
+ def get_branch_transactions(
911
+ self, dataset_rid: str, branch_name: str
912
+ ) -> List[Dict[str, Any]]:
913
+ """
914
+ Get transaction history for a specific branch.
915
+
916
+ Args:
917
+ dataset_rid: Dataset Resource Identifier
918
+ branch_name: Branch name
919
+
920
+ Returns:
921
+ List of transaction information dictionaries
922
+ """
923
+ try:
924
+ transactions = self.service.Dataset.Branch.transactions(
925
+ dataset_rid=dataset_rid, branch_name=branch_name
926
+ )
927
+
928
+ return [
929
+ {
930
+ "transaction_rid": getattr(transaction, "rid", None),
931
+ "status": getattr(transaction, "status", None),
932
+ "transaction_type": getattr(transaction, "transaction_type", None),
933
+ "branch": branch_name,
934
+ "created_time": getattr(transaction, "created_time", None),
935
+ "created_by": getattr(transaction, "created_by", None),
936
+ "committed_time": getattr(transaction, "committed_time", None),
937
+ "aborted_time": getattr(transaction, "aborted_time", None),
938
+ }
939
+ for transaction in transactions
940
+ ]
941
+ except Exception as e:
942
+ raise RuntimeError(
943
+ f"Failed to get transaction history for branch '{branch_name}' in dataset {dataset_rid}: {e}"
944
+ )
945
+
946
+ def delete_file(
947
+ self, dataset_rid: str, file_path: str, branch: str = "master"
948
+ ) -> Dict[str, Any]:
949
+ """
950
+ Delete a file from a dataset.
951
+
952
+ Args:
953
+ dataset_rid: Dataset Resource Identifier
954
+ file_path: Path of file within dataset to delete
955
+ branch: Dataset branch name
956
+
957
+ Returns:
958
+ Deletion result information
959
+ """
960
+ try:
961
+ self.service.Dataset.File.delete(
962
+ dataset_rid=dataset_rid, file_path=file_path, branch_name=branch
963
+ )
964
+
965
+ return {
966
+ "dataset_rid": dataset_rid,
967
+ "file_path": file_path,
968
+ "branch": branch,
969
+ "status": "deleted",
970
+ "success": True,
971
+ }
972
+ except Exception as e:
973
+ raise RuntimeError(
974
+ f"Failed to delete file {file_path} from dataset {dataset_rid}: {e}"
975
+ )
976
+
977
+ def get_file_info(
978
+ self, dataset_rid: str, file_path: str, branch: str = "master"
979
+ ) -> Dict[str, Any]:
980
+ """
981
+ Get metadata about a file in a dataset.
982
+
983
+ Args:
984
+ dataset_rid: Dataset Resource Identifier
985
+ file_path: Path of file within dataset
986
+ branch: Dataset branch name
987
+
988
+ Returns:
989
+ File metadata information
990
+ """
991
+ try:
992
+ file_info = self.service.Dataset.File.get(
993
+ dataset_rid=dataset_rid, file_path=file_path, branch_name=branch
994
+ )
995
+
996
+ return {
997
+ "path": file_path,
998
+ "dataset_rid": dataset_rid,
999
+ "branch": branch,
1000
+ "size_bytes": getattr(file_info, "size_bytes", None),
1001
+ "last_modified": getattr(file_info, "last_modified", None),
1002
+ "transaction_rid": getattr(file_info, "transaction_rid", None),
1003
+ "created_time": getattr(file_info, "created_time", None),
1004
+ "content_type": getattr(file_info, "content_type", None),
1005
+ }
1006
+ except Exception as e:
1007
+ raise RuntimeError(
1008
+ f"Failed to get file info for {file_path} in dataset {dataset_rid}: {e}"
1009
+ )
1010
+
1011
+ def get_transaction_build(
1012
+ self, dataset_rid: str, transaction_rid: str
1013
+ ) -> Dict[str, Any]:
1014
+ """
1015
+ Get build information for a transaction.
1016
+
1017
+ Args:
1018
+ dataset_rid: Dataset Resource Identifier
1019
+ transaction_rid: Transaction Resource Identifier
1020
+
1021
+ Returns:
1022
+ Build information dictionary
1023
+ """
1024
+ try:
1025
+ build = self.service.Dataset.Transaction.build(
1026
+ dataset_rid=dataset_rid, transaction_rid=transaction_rid
1027
+ )
1028
+
1029
+ return {
1030
+ "transaction_rid": transaction_rid,
1031
+ "dataset_rid": dataset_rid,
1032
+ "build_rid": getattr(build, "rid", None),
1033
+ "status": getattr(build, "status", None),
1034
+ "started_time": getattr(build, "started_time", None),
1035
+ "completed_time": getattr(build, "completed_time", None),
1036
+ "duration_ms": getattr(build, "duration_ms", None),
1037
+ }
1038
+ except Exception as e:
1039
+ raise RuntimeError(
1040
+ f"Failed to get build for transaction {transaction_rid}: {e}"
1041
+ )
1042
+
1043
+ def get_view(self, view_rid: str, branch: str = "master") -> Dict[str, Any]:
1044
+ """
1045
+ Get detailed information about a view.
1046
+
1047
+ Args:
1048
+ view_rid: View Resource Identifier
1049
+ branch: Branch name
1050
+
1051
+ Returns:
1052
+ View information dictionary
1053
+ """
1054
+ try:
1055
+ view = self.service.Dataset.View.get(
1056
+ dataset_rid=view_rid, branch_name=branch
1057
+ )
1058
+
1059
+ return {
1060
+ "view_rid": view_rid,
1061
+ "name": getattr(view, "name", None),
1062
+ "description": getattr(view, "description", None),
1063
+ "branch": branch,
1064
+ "created_time": getattr(view, "created_time", None),
1065
+ "created_by": getattr(view, "created_by", None),
1066
+ "backing_datasets": getattr(view, "backing_datasets", []),
1067
+ "primary_key": getattr(view, "primary_key", None),
1068
+ }
1069
+ except Exception as e:
1070
+ raise RuntimeError(f"Failed to get view {view_rid}: {e}")
1071
+
1072
+ def add_backing_datasets(
1073
+ self, view_rid: str, dataset_rids: List[str]
1074
+ ) -> Dict[str, Any]:
1075
+ """
1076
+ Add backing datasets to a view.
1077
+
1078
+ Args:
1079
+ view_rid: View Resource Identifier
1080
+ dataset_rids: List of dataset RIDs to add as backing datasets
1081
+
1082
+ Returns:
1083
+ Operation result
1084
+ """
1085
+ try:
1086
+ result = self.service.Dataset.View.add_backing_datasets(
1087
+ dataset_rid=view_rid, backing_datasets=dataset_rids
1088
+ )
1089
+
1090
+ return {
1091
+ "view_rid": view_rid,
1092
+ "added_datasets": dataset_rids,
1093
+ "success": True,
1094
+ "result": result,
1095
+ }
1096
+ except Exception as e:
1097
+ raise RuntimeError(
1098
+ f"Failed to add backing datasets to view {view_rid}: {e}"
1099
+ )
1100
+
1101
+ def remove_backing_datasets(
1102
+ self, view_rid: str, dataset_rids: List[str]
1103
+ ) -> Dict[str, Any]:
1104
+ """
1105
+ Remove backing datasets from a view.
1106
+
1107
+ Args:
1108
+ view_rid: View Resource Identifier
1109
+ dataset_rids: List of dataset RIDs to remove as backing datasets
1110
+
1111
+ Returns:
1112
+ Operation result
1113
+ """
1114
+ try:
1115
+ result = self.service.Dataset.View.remove_backing_datasets(
1116
+ dataset_rid=view_rid, backing_datasets=dataset_rids
1117
+ )
1118
+
1119
+ return {
1120
+ "view_rid": view_rid,
1121
+ "removed_datasets": dataset_rids,
1122
+ "success": True,
1123
+ "result": result,
1124
+ }
1125
+ except Exception as e:
1126
+ raise RuntimeError(
1127
+ f"Failed to remove backing datasets from view {view_rid}: {e}"
1128
+ )
1129
+
1130
+ def replace_backing_datasets(
1131
+ self, view_rid: str, dataset_rids: List[str]
1132
+ ) -> Dict[str, Any]:
1133
+ """
1134
+ Replace all backing datasets in a view.
1135
+
1136
+ Args:
1137
+ view_rid: View Resource Identifier
1138
+ dataset_rids: List of dataset RIDs to set as backing datasets
1139
+
1140
+ Returns:
1141
+ Operation result
1142
+ """
1143
+ try:
1144
+ result = self.service.Dataset.View.replace_backing_datasets(
1145
+ dataset_rid=view_rid, backing_datasets=dataset_rids
1146
+ )
1147
+
1148
+ return {
1149
+ "view_rid": view_rid,
1150
+ "new_datasets": dataset_rids,
1151
+ "success": True,
1152
+ "result": result,
1153
+ }
1154
+ except Exception as e:
1155
+ raise RuntimeError(
1156
+ f"Failed to replace backing datasets in view {view_rid}: {e}"
1157
+ )
1158
+
1159
+ def add_primary_key(self, view_rid: str, key_fields: List[str]) -> Dict[str, Any]:
1160
+ """
1161
+ Add a primary key to a view.
1162
+
1163
+ Args:
1164
+ view_rid: View Resource Identifier
1165
+ key_fields: List of field names to use as primary key
1166
+
1167
+ Returns:
1168
+ Operation result
1169
+ """
1170
+ try:
1171
+ result = self.service.Dataset.View.add_primary_key(
1172
+ dataset_rid=view_rid, primary_key=key_fields
1173
+ )
1174
+
1175
+ return {
1176
+ "view_rid": view_rid,
1177
+ "primary_key_fields": key_fields,
1178
+ "success": True,
1179
+ "result": result,
1180
+ }
1181
+ except Exception as e:
1182
+ raise RuntimeError(f"Failed to add primary key to view {view_rid}: {e}")
1183
+
563
1184
  def _format_dataset_info(self, dataset: Any) -> Dict[str, Any]:
564
1185
  """
565
1186
  Format dataset information for consistent output.