pltr-cli 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pltr/services/dataset.py CHANGED
@@ -4,6 +4,7 @@ Dataset service wrapper for Foundry SDK.
4
4
 
5
5
  from typing import Any, Optional, List, Dict, Union
6
6
  from pathlib import Path
7
+ import csv
7
8
 
8
9
  from .base import BaseService
9
10
 
@@ -55,6 +56,153 @@ class DatasetService(BaseService):
55
56
  except Exception as e:
56
57
  raise RuntimeError(f"Failed to get schema for dataset {dataset_rid}: {e}")
57
58
 
59
+ def put_schema(
60
+ self,
61
+ dataset_rid: str,
62
+ schema: Any,
63
+ branch: str = "master",
64
+ transaction_rid: Optional[str] = None,
65
+ ) -> Dict[str, Any]:
66
+ """
67
+ Set or update dataset schema.
68
+
69
+ Args:
70
+ dataset_rid: Dataset Resource Identifier
71
+ schema: DatasetSchema object with field definitions
72
+ branch: Dataset branch name
73
+ transaction_rid: Optional transaction RID
74
+
75
+ Returns:
76
+ Schema update result
77
+ """
78
+ try:
79
+ from foundry_sdk.v2.core.models import DatasetSchema
80
+
81
+ # Ensure schema is a DatasetSchema object
82
+ if not isinstance(schema, DatasetSchema):
83
+ raise ValueError("Schema must be a DatasetSchema object")
84
+
85
+ result = self.service.Dataset.put_schema(
86
+ dataset_rid=dataset_rid,
87
+ schema=schema,
88
+ branch_name=branch,
89
+ end_transaction_rid=transaction_rid,
90
+ )
91
+
92
+ return {
93
+ "dataset_rid": dataset_rid,
94
+ "branch": branch,
95
+ "transaction_rid": transaction_rid,
96
+ "status": "Schema updated successfully",
97
+ "schema": result,
98
+ }
99
+ except Exception as e:
100
+ raise RuntimeError(f"Failed to set schema for dataset {dataset_rid}: {e}")
101
+
102
+ def infer_schema_from_csv(
103
+ self, csv_path: Union[str, Path], sample_rows: int = 100
104
+ ) -> Any:
105
+ """
106
+ Infer schema from a CSV file by analyzing headers and sample data.
107
+
108
+ Args:
109
+ csv_path: Path to CSV file
110
+ sample_rows: Number of rows to sample for type inference
111
+
112
+ Returns:
113
+ DatasetSchema object with inferred field types
114
+ """
115
+ from foundry_sdk.v2.core.models import DatasetSchema, DatasetFieldSchema
116
+
117
+ csv_path = Path(csv_path)
118
+ if not csv_path.exists():
119
+ raise FileNotFoundError(f"CSV file not found: {csv_path}")
120
+
121
+ def infer_type(values: List[str]) -> tuple[str, bool]:
122
+ """
123
+ Infer type from a list of values.
124
+ Returns (type_name, nullable)
125
+ """
126
+ # Remove empty strings and track if nullable
127
+ non_empty = [v for v in values if v.strip()]
128
+ nullable = len(non_empty) < len(values) or len(non_empty) == 0
129
+
130
+ if not non_empty:
131
+ return ("STRING", True)
132
+
133
+ # Check for boolean
134
+ bool_values = {"true", "false", "yes", "no", "1", "0"}
135
+ if all(v.lower() in bool_values for v in non_empty):
136
+ return ("BOOLEAN", nullable)
137
+
138
+ # Check for integer
139
+ try:
140
+ for v in non_empty:
141
+ int(v)
142
+ return ("INTEGER", nullable)
143
+ except ValueError:
144
+ pass
145
+
146
+ # Check for double
147
+ try:
148
+ for v in non_empty:
149
+ float(v)
150
+ return ("DOUBLE", nullable)
151
+ except ValueError:
152
+ pass
153
+
154
+ # Check for date patterns
155
+ date_patterns = [
156
+ r"^\d{4}-\d{2}-\d{2}$", # YYYY-MM-DD
157
+ r"^\d{2}/\d{2}/\d{4}$", # MM/DD/YYYY
158
+ r"^\d{2}-\d{2}-\d{4}$", # DD-MM-YYYY
159
+ ]
160
+ import re
161
+
162
+ for pattern in date_patterns:
163
+ if all(re.match(pattern, v) for v in non_empty[:10]): # Check first 10
164
+ return ("DATE", nullable)
165
+
166
+ # Check for timestamp patterns
167
+ if all(
168
+ "-" in v and ":" in v and len(v) > 10 for v in non_empty[:10]
169
+ ): # Basic timestamp check
170
+ return ("TIMESTAMP", nullable)
171
+
172
+ # Default to string
173
+ return ("STRING", nullable)
174
+
175
+ # Read CSV and analyze
176
+ with open(csv_path, "r", encoding="utf-8") as f:
177
+ reader = csv.DictReader(f)
178
+ headers = reader.fieldnames
179
+
180
+ if not headers:
181
+ raise ValueError("CSV file has no headers")
182
+
183
+ # Collect sample values for each column
184
+ column_values: Dict[str, List[str]] = {col: [] for col in headers}
185
+ for i, row in enumerate(reader):
186
+ if i >= sample_rows:
187
+ break
188
+ for col in headers:
189
+ column_values[col].append(row.get(col, ""))
190
+
191
+ # Infer types for each column
192
+ fields = []
193
+ for col in headers:
194
+ values = column_values[col]
195
+ field_type, nullable = infer_type(values)
196
+
197
+ # Clean column name (remove special characters for field name)
198
+ clean_name = col.strip().replace(" ", "_").replace("-", "_")
199
+
200
+ fields.append(
201
+ DatasetFieldSchema(name=clean_name, type=field_type, nullable=nullable)
202
+ )
203
+
204
+ return DatasetSchema(field_schema_list=fields)
205
+
58
206
  def create_dataset(
59
207
  self, name: str, parent_folder_rid: Optional[str] = None
60
208
  ) -> Dict[str, Any]:
@@ -560,6 +708,395 @@ class DatasetService(BaseService):
560
708
  f"Failed to create view '{view_name}' for dataset {dataset_rid}: {e}"
561
709
  )
562
710
 
711
+ def get_schedules(self, dataset_rid: str) -> List[Dict[str, Any]]:
712
+ """
713
+ Get schedules that target a specific dataset.
714
+
715
+ Args:
716
+ dataset_rid: Dataset Resource Identifier
717
+
718
+ Returns:
719
+ List of schedule information dictionaries
720
+ """
721
+ try:
722
+ schedules = self.service.Dataset.get_schedules(dataset_rid=dataset_rid)
723
+
724
+ return [
725
+ {
726
+ "schedule_rid": getattr(schedule, "rid", None),
727
+ "name": getattr(schedule, "name", None),
728
+ "description": getattr(schedule, "description", None),
729
+ "enabled": getattr(schedule, "enabled", None),
730
+ "created_time": getattr(schedule, "created_time", None),
731
+ }
732
+ for schedule in schedules
733
+ ]
734
+ except Exception as e:
735
+ raise RuntimeError(
736
+ f"Failed to get schedules for dataset {dataset_rid}: {e}"
737
+ )
738
+
739
+ def get_jobs(
740
+ self, dataset_rid: str, branch: str = "master"
741
+ ) -> List[Dict[str, Any]]:
742
+ """
743
+ Get jobs for a specific dataset.
744
+
745
+ Args:
746
+ dataset_rid: Dataset Resource Identifier
747
+ branch: Dataset branch name
748
+
749
+ Returns:
750
+ List of job information dictionaries
751
+ """
752
+ try:
753
+ jobs = self.service.Dataset.jobs(
754
+ dataset_rid=dataset_rid, branch_name=branch
755
+ )
756
+
757
+ return [
758
+ {
759
+ "job_rid": getattr(job, "rid", None),
760
+ "name": getattr(job, "name", None),
761
+ "status": getattr(job, "status", None),
762
+ "created_time": getattr(job, "created_time", None),
763
+ "started_time": getattr(job, "started_time", None),
764
+ "completed_time": getattr(job, "completed_time", None),
765
+ }
766
+ for job in jobs
767
+ ]
768
+ except Exception as e:
769
+ raise RuntimeError(f"Failed to get jobs for dataset {dataset_rid}: {e}")
770
+
771
+ def delete_branch(self, dataset_rid: str, branch_name: str) -> Dict[str, Any]:
772
+ """
773
+ Delete a branch from a dataset.
774
+
775
+ Args:
776
+ dataset_rid: Dataset Resource Identifier
777
+ branch_name: Branch name to delete
778
+
779
+ Returns:
780
+ Deletion result information
781
+ """
782
+ try:
783
+ self.service.Dataset.Branch.delete(
784
+ dataset_rid=dataset_rid, branch_name=branch_name
785
+ )
786
+
787
+ return {
788
+ "dataset_rid": dataset_rid,
789
+ "branch_name": branch_name,
790
+ "status": "deleted",
791
+ "success": True,
792
+ }
793
+ except Exception as e:
794
+ raise RuntimeError(
795
+ f"Failed to delete branch '{branch_name}' from dataset {dataset_rid}: {e}"
796
+ )
797
+
798
+ def get_branch(self, dataset_rid: str, branch_name: str) -> Dict[str, Any]:
799
+ """
800
+ Get detailed information about a specific branch.
801
+
802
+ Args:
803
+ dataset_rid: Dataset Resource Identifier
804
+ branch_name: Branch name
805
+
806
+ Returns:
807
+ Branch information dictionary
808
+ """
809
+ try:
810
+ branch = self.service.Dataset.Branch.get(
811
+ dataset_rid=dataset_rid, branch_name=branch_name
812
+ )
813
+
814
+ return {
815
+ "name": branch_name,
816
+ "dataset_rid": dataset_rid,
817
+ "transaction_rid": getattr(branch, "transaction_rid", None),
818
+ "created_time": getattr(branch, "created_time", None),
819
+ "created_by": getattr(branch, "created_by", None),
820
+ }
821
+ except Exception as e:
822
+ raise RuntimeError(
823
+ f"Failed to get branch '{branch_name}' from dataset {dataset_rid}: {e}"
824
+ )
825
+
826
+ def get_branch_transactions(
827
+ self, dataset_rid: str, branch_name: str
828
+ ) -> List[Dict[str, Any]]:
829
+ """
830
+ Get transaction history for a specific branch.
831
+
832
+ Args:
833
+ dataset_rid: Dataset Resource Identifier
834
+ branch_name: Branch name
835
+
836
+ Returns:
837
+ List of transaction information dictionaries
838
+ """
839
+ try:
840
+ transactions = self.service.Dataset.Branch.transactions(
841
+ dataset_rid=dataset_rid, branch_name=branch_name
842
+ )
843
+
844
+ return [
845
+ {
846
+ "transaction_rid": getattr(transaction, "rid", None),
847
+ "status": getattr(transaction, "status", None),
848
+ "transaction_type": getattr(transaction, "transaction_type", None),
849
+ "branch": branch_name,
850
+ "created_time": getattr(transaction, "created_time", None),
851
+ "created_by": getattr(transaction, "created_by", None),
852
+ "committed_time": getattr(transaction, "committed_time", None),
853
+ "aborted_time": getattr(transaction, "aborted_time", None),
854
+ }
855
+ for transaction in transactions
856
+ ]
857
+ except Exception as e:
858
+ raise RuntimeError(
859
+ f"Failed to get transaction history for branch '{branch_name}' in dataset {dataset_rid}: {e}"
860
+ )
861
+
862
+ def delete_file(
863
+ self, dataset_rid: str, file_path: str, branch: str = "master"
864
+ ) -> Dict[str, Any]:
865
+ """
866
+ Delete a file from a dataset.
867
+
868
+ Args:
869
+ dataset_rid: Dataset Resource Identifier
870
+ file_path: Path of file within dataset to delete
871
+ branch: Dataset branch name
872
+
873
+ Returns:
874
+ Deletion result information
875
+ """
876
+ try:
877
+ self.service.Dataset.File.delete(
878
+ dataset_rid=dataset_rid, file_path=file_path, branch_name=branch
879
+ )
880
+
881
+ return {
882
+ "dataset_rid": dataset_rid,
883
+ "file_path": file_path,
884
+ "branch": branch,
885
+ "status": "deleted",
886
+ "success": True,
887
+ }
888
+ except Exception as e:
889
+ raise RuntimeError(
890
+ f"Failed to delete file {file_path} from dataset {dataset_rid}: {e}"
891
+ )
892
+
893
+ def get_file_info(
894
+ self, dataset_rid: str, file_path: str, branch: str = "master"
895
+ ) -> Dict[str, Any]:
896
+ """
897
+ Get metadata about a file in a dataset.
898
+
899
+ Args:
900
+ dataset_rid: Dataset Resource Identifier
901
+ file_path: Path of file within dataset
902
+ branch: Dataset branch name
903
+
904
+ Returns:
905
+ File metadata information
906
+ """
907
+ try:
908
+ file_info = self.service.Dataset.File.get(
909
+ dataset_rid=dataset_rid, file_path=file_path, branch_name=branch
910
+ )
911
+
912
+ return {
913
+ "path": file_path,
914
+ "dataset_rid": dataset_rid,
915
+ "branch": branch,
916
+ "size_bytes": getattr(file_info, "size_bytes", None),
917
+ "last_modified": getattr(file_info, "last_modified", None),
918
+ "transaction_rid": getattr(file_info, "transaction_rid", None),
919
+ "created_time": getattr(file_info, "created_time", None),
920
+ "content_type": getattr(file_info, "content_type", None),
921
+ }
922
+ except Exception as e:
923
+ raise RuntimeError(
924
+ f"Failed to get file info for {file_path} in dataset {dataset_rid}: {e}"
925
+ )
926
+
927
+ def get_transaction_build(
928
+ self, dataset_rid: str, transaction_rid: str
929
+ ) -> Dict[str, Any]:
930
+ """
931
+ Get build information for a transaction.
932
+
933
+ Args:
934
+ dataset_rid: Dataset Resource Identifier
935
+ transaction_rid: Transaction Resource Identifier
936
+
937
+ Returns:
938
+ Build information dictionary
939
+ """
940
+ try:
941
+ build = self.service.Dataset.Transaction.build(
942
+ dataset_rid=dataset_rid, transaction_rid=transaction_rid
943
+ )
944
+
945
+ return {
946
+ "transaction_rid": transaction_rid,
947
+ "dataset_rid": dataset_rid,
948
+ "build_rid": getattr(build, "rid", None),
949
+ "status": getattr(build, "status", None),
950
+ "started_time": getattr(build, "started_time", None),
951
+ "completed_time": getattr(build, "completed_time", None),
952
+ "duration_ms": getattr(build, "duration_ms", None),
953
+ }
954
+ except Exception as e:
955
+ raise RuntimeError(
956
+ f"Failed to get build for transaction {transaction_rid}: {e}"
957
+ )
958
+
959
+ def get_view(self, view_rid: str, branch: str = "master") -> Dict[str, Any]:
960
+ """
961
+ Get detailed information about a view.
962
+
963
+ Args:
964
+ view_rid: View Resource Identifier
965
+ branch: Branch name
966
+
967
+ Returns:
968
+ View information dictionary
969
+ """
970
+ try:
971
+ view = self.service.Dataset.View.get(
972
+ dataset_rid=view_rid, branch_name=branch
973
+ )
974
+
975
+ return {
976
+ "view_rid": view_rid,
977
+ "name": getattr(view, "name", None),
978
+ "description": getattr(view, "description", None),
979
+ "branch": branch,
980
+ "created_time": getattr(view, "created_time", None),
981
+ "created_by": getattr(view, "created_by", None),
982
+ "backing_datasets": getattr(view, "backing_datasets", []),
983
+ "primary_key": getattr(view, "primary_key", None),
984
+ }
985
+ except Exception as e:
986
+ raise RuntimeError(f"Failed to get view {view_rid}: {e}")
987
+
988
+ def add_backing_datasets(
989
+ self, view_rid: str, dataset_rids: List[str]
990
+ ) -> Dict[str, Any]:
991
+ """
992
+ Add backing datasets to a view.
993
+
994
+ Args:
995
+ view_rid: View Resource Identifier
996
+ dataset_rids: List of dataset RIDs to add as backing datasets
997
+
998
+ Returns:
999
+ Operation result
1000
+ """
1001
+ try:
1002
+ result = self.service.Dataset.View.add_backing_datasets(
1003
+ dataset_rid=view_rid, backing_datasets=dataset_rids
1004
+ )
1005
+
1006
+ return {
1007
+ "view_rid": view_rid,
1008
+ "added_datasets": dataset_rids,
1009
+ "success": True,
1010
+ "result": result,
1011
+ }
1012
+ except Exception as e:
1013
+ raise RuntimeError(
1014
+ f"Failed to add backing datasets to view {view_rid}: {e}"
1015
+ )
1016
+
1017
+ def remove_backing_datasets(
1018
+ self, view_rid: str, dataset_rids: List[str]
1019
+ ) -> Dict[str, Any]:
1020
+ """
1021
+ Remove backing datasets from a view.
1022
+
1023
+ Args:
1024
+ view_rid: View Resource Identifier
1025
+ dataset_rids: List of dataset RIDs to remove as backing datasets
1026
+
1027
+ Returns:
1028
+ Operation result
1029
+ """
1030
+ try:
1031
+ result = self.service.Dataset.View.remove_backing_datasets(
1032
+ dataset_rid=view_rid, backing_datasets=dataset_rids
1033
+ )
1034
+
1035
+ return {
1036
+ "view_rid": view_rid,
1037
+ "removed_datasets": dataset_rids,
1038
+ "success": True,
1039
+ "result": result,
1040
+ }
1041
+ except Exception as e:
1042
+ raise RuntimeError(
1043
+ f"Failed to remove backing datasets from view {view_rid}: {e}"
1044
+ )
1045
+
1046
+ def replace_backing_datasets(
1047
+ self, view_rid: str, dataset_rids: List[str]
1048
+ ) -> Dict[str, Any]:
1049
+ """
1050
+ Replace all backing datasets in a view.
1051
+
1052
+ Args:
1053
+ view_rid: View Resource Identifier
1054
+ dataset_rids: List of dataset RIDs to set as backing datasets
1055
+
1056
+ Returns:
1057
+ Operation result
1058
+ """
1059
+ try:
1060
+ result = self.service.Dataset.View.replace_backing_datasets(
1061
+ dataset_rid=view_rid, backing_datasets=dataset_rids
1062
+ )
1063
+
1064
+ return {
1065
+ "view_rid": view_rid,
1066
+ "new_datasets": dataset_rids,
1067
+ "success": True,
1068
+ "result": result,
1069
+ }
1070
+ except Exception as e:
1071
+ raise RuntimeError(
1072
+ f"Failed to replace backing datasets in view {view_rid}: {e}"
1073
+ )
1074
+
1075
+ def add_primary_key(self, view_rid: str, key_fields: List[str]) -> Dict[str, Any]:
1076
+ """
1077
+ Add a primary key to a view.
1078
+
1079
+ Args:
1080
+ view_rid: View Resource Identifier
1081
+ key_fields: List of field names to use as primary key
1082
+
1083
+ Returns:
1084
+ Operation result
1085
+ """
1086
+ try:
1087
+ result = self.service.Dataset.View.add_primary_key(
1088
+ dataset_rid=view_rid, primary_key=key_fields
1089
+ )
1090
+
1091
+ return {
1092
+ "view_rid": view_rid,
1093
+ "primary_key_fields": key_fields,
1094
+ "success": True,
1095
+ "result": result,
1096
+ }
1097
+ except Exception as e:
1098
+ raise RuntimeError(f"Failed to add primary key to view {view_rid}: {e}")
1099
+
563
1100
  def _format_dataset_info(self, dataset: Any) -> Dict[str, Any]:
564
1101
  """
565
1102
  Format dataset information for consistent output.