pyxecm 1.5__py3-none-any.whl → 1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyxecm might be problematic. Click here for more details.

@@ -2,30 +2,30 @@
2
2
  Payload Module to implement functions to process Terrarium payload
3
3
 
4
4
  This code processes a YAML payload file that includes various settings:
5
- * WebHooks (URLs) to call (e.g. to start-up external services or applications)
6
- * OTDS partitions and OAuth clients
7
- * OTDS trusted sites and system attributes
8
- * OTDS licenses
9
- * Extended ECM users and groups
10
- * Microsoft 365 user, groups, and teams
11
- * Salesforce users and groups
12
- * SuccessFactors users
13
- * Core Share users and groups
14
- * Extended ECM Admin Settings (LLConfig)
15
- * Extended ECM External System Connections (SAP, SuccessFactors, ...)
16
- * Extended ECM Transport Packages (scenarios and demo content)
17
- * Extended ECM CS Applications (typically based on Web Reports)
18
- * Extended ECM Web Reports to run
19
- * Extended ECM Workspaces to create (incl. members, workspace relationships)
20
- * Extended ECM User photos, user favorites and user settings
21
- * Extended ECM Items to create and permissions to apply
22
- * Extended ECM Items to rename
23
- * Extended ECM Documents to generate (from templates)
24
- * Extended ECM Assignments (used e.g. for Government scenario)
25
- * Extended ECM Records Management settings, Security Clearance, Supplemental Markings, and Holds
26
- * SAP RFCs (Remote Function Calls)
27
- * Commands to execute in Kubernetes Pods
28
- * Browser Automations (for things that cannot be automated via an API)
5
+ - WebHooks (URLs) to call (e.g. to start-up external services or applications)
6
+ - OTDS partitions and OAuth clients
7
+ - OTDS trusted sites and system attributes
8
+ - OTDS licenses
9
+ - Extended ECM users and groups
10
+ - Microsoft 365 user, groups, and teams
11
+ - Salesforce users and groups
12
+ - SuccessFactors users
13
+ - Core Share users and groups
14
+ - Extended ECM Admin Settings (LLConfig)
15
+ - Extended ECM External System Connections (SAP, SuccessFactors, ...)
16
+ - Extended ECM Transport Packages (scenarios and demo content)
17
+ - Extended ECM CS Applications (typically based on Web Reports)
18
+ - Extended ECM Web Reports to run
19
+ - Extended ECM Workspaces to create (incl. members, workspace relationships)
20
+ - Extended ECM User photos, user favorites and user settings
21
+ - Extended ECM Items to create and permissions to apply
22
+ - Extended ECM Items to rename
23
+ - Extended ECM Documents to generate (from templates)
24
+ - Extended ECM Assignments (used e.g. for Government scenario)
25
+ - Extended ECM Records Management settings, Security Clearance, Supplemental Markings, and Holds
26
+ - SAP RFCs (Remote Function Calls)
27
+ - Commands to execute in Kubernetes Pods
28
+ - Browser Automations (for things that cannot be automated via an API)
29
29
 
30
30
  This code typically runs in a container as part of the cloud automation.
31
31
 
@@ -65,7 +65,9 @@ determine_workspace_type_and_template_id: determine the IDs of type and template
65
65
 
66
66
  process_payload: process payload (main method)
67
67
  process_web_hooks: process list of web hooks
68
- process_partitions: process the OTDS partitions
68
+ process_resources: process OTDS resources in payload and create them in OTDS
69
+ process_synchronized_partition: process OTDS synchronized partitions in payload and create them in OTDS
70
+ process_partitions: process OTDS partitions in payload and create them in OTDS
69
71
  process_partition_licenses: process the licenses that should be assigned to OTDS partitions
70
72
  (this includes existing partitions)
71
73
  process_oauth_clients: process the OTDS OAuth clients
@@ -130,6 +132,7 @@ process_assignments: process assignments of workspaces / documents to users / gr
130
132
  process_user_licenses: process and apply licenses to all Extended ECM users (used for OTIV)
131
133
  process_exec_pod_commands: process Kubernetes pod commands
132
134
  process_document_generators: Generate documents for a defined workspace type based on template
135
+ process_workflows: Initiate and process workflows for a defined workspace type and folder path
133
136
  process_browser_automations: process Selenium-based browser automation payload
134
137
  init_sap: initalize SAP object for RFC communication
135
138
  process_sap_rfcs: process SAP Remote Function Calls (RFC) to trigger automation in SAP S/4HANA
@@ -146,6 +149,8 @@ process_bulk_workspaces_synonym_lookup: Use a datasource to lookup the workspace
146
149
  process_bulk_workspaces_lookup: Use a combination of workspace name, workspace type, and workspace datasource
147
150
  (using synonyms) to lookup the workspace name and ID
148
151
  process_bulk_workspace_relationships: Process workspaces in payload and bulk create them in Extended ECM (multi-threaded)
152
+ get_bulk_workspace_relationship_endpoint: Determine the node ID of the workspace that is one of the endpoints
153
+ of the workspace relationship (either 'from' or 'to')
149
154
  process_bulk_workspace_relationships_worker: This is the thread worker to create workspaces relationships in bulk.
150
155
  prepare_category_data: Prepare the category information for a new or updated item (document or workspace)
151
156
  process_bulk_documents: Process bulkDocuments in payload and bulk create them in Extended ECM (multi-threaded)
@@ -175,15 +180,20 @@ import threading
175
180
  import traceback
176
181
  import copy
177
182
  import time
183
+ from datetime import datetime, timedelta
178
184
  import fnmatch
179
-
180
185
  import base64
181
186
  import gzip
187
+ from ast import literal_eval
188
+
189
+ from dateutil.parser import parse
190
+
182
191
  import yaml
183
192
  import hcl2.api
184
193
 
194
+
185
195
  # OpenText specific modules:
186
- from pyxecm import OTAC, OTCS, OTDS, OTIV, OTMM, CoreShare
196
+ from pyxecm import OTAC, OTCS, OTDS, OTIV, OTMM, CoreShare, OTAWP, AVTS
187
197
  from pyxecm.customizer.k8s import K8s
188
198
  from pyxecm.customizer.m365 import M365
189
199
  from pyxecm.customizer.sap import SAP
@@ -241,6 +251,7 @@ class Payload:
241
251
  _servicenow: ServiceNow | None
242
252
  _browser_automation: BrowserAutomation | None
243
253
  _custom_settings_dir = ""
254
+ _otawp: OTAWP | None
244
255
 
245
256
  # _payload_source (string): This is either path + filename of the yaml payload
246
257
  # or an path + filename of the Terraform HCL payload
@@ -272,6 +283,19 @@ class Payload:
272
283
  _webhooks = []
273
284
  _webhooks_post = []
274
285
 
286
+ # _resources: List of OTDS resources. Each element
287
+ # is a dict with these keys:
288
+ # - enabled (bool, optional, default = True)
289
+ # - name (str, mandatory)
290
+ # - description (str, optional)
291
+ # - display_name (str, optional)
292
+ # - activate (bool, optional, default = True) - if a secret is provided the resource will automatically be activated
293
+ # - allow_impersonation (bool, optional, default = True)
294
+ # - resource_id (str, optional, default = None) - a predefined resource ID. If specified, also secrethas to be provided
295
+ # - secret (string, optional, default = None) - a predefined secret. Should be 24 characters long and should end with '=='
296
+ # - additional_payload (dict, optional)
297
+ _resources = []
298
+
275
299
  # _partitions: List of OTDS partitions (for users and groups). Each element
276
300
  # is a dict with these keys:
277
301
  # - enabled (bool, optional, default = True)
@@ -321,7 +345,7 @@ class Payload:
321
345
  # _trusted_sites: List of OTDS trasted sites. Each element
322
346
  # is a dict with these keys:
323
347
  # - enabled (bool, optional, default = True)
324
- # - url (str)
348
+ # - url (str, mandatory)
325
349
  _trusted_sites = []
326
350
 
327
351
  # _system_attributes: List of OTDS System Attributes. Each element
@@ -568,6 +592,16 @@ class Payload:
568
592
  # - exec_as_user (str, optional, default = "")
569
593
  _doc_generators = []
570
594
 
595
+ # _workflows: List of workflow initiations inside workspace instances of a workspace type
596
+ # Each element is a dict with these keys:
597
+ # - enabled (bool, optional, default = True)
598
+ # - worklow_nickname (str, mandatory) - the nickname of the workflow
599
+ # - initiate_as_user (str, mandatory) - user that initiates the workflow
600
+ # - workspace_type (str, mandatory) - for each instance of the given workspace type a workflow is started
601
+ # - workspace_folder_path (list, optional) - the subfolder that contains the document the workflow is started with
602
+ # - attributes (list, optional) - the list of attributes (name, value) the workflow is started with
603
+ _workflows = []
604
+
571
605
  # _browser_automations: List of browser automation for things that can only be
572
606
  # automated via the web user interface. Each element is a dict with these keys:
573
607
  # - enabled (bool, optional, default = True)
@@ -643,8 +677,9 @@ class Payload:
643
677
  # - sn_password (str, optional, default = "")
644
678
  # - sn_client_id (str, optional, default = None)
645
679
  # - sn_client_secret (str, optional, default = None)
646
- # - sn_table_name (str, optional, default = "u_kb_template_technical_article_public")
647
- # - sn_query (str, optional, default = None)
680
+ # - sn_queries (list, mandatory if type = servicenow)
681
+ # * sn_table_name (str, mandatory) - name of the ServiceNow database table for the query
682
+ # * sn_query (str, mandatory) - query string
648
683
  # - sn_thread_number (int, optional, default = BULK_THREAD_NUMBER)
649
684
  # - sn_download_dir (str, optional, default = "/data/knowledgebase")
650
685
  # - otcs_hostname (str, mandatory if type = otcs)
@@ -658,8 +693,19 @@ class Payload:
658
693
  # - otcs_root_node_id (int, mandatory if type = otcs)
659
694
  # - otcs_filter_workspace_depth (int, optional, default = 0)
660
695
  # - otcs_filter_workspace_subtype (int, optional, default = 0)
661
- # - otcs_filter_workspace_category (str, optional, default = None)
696
+ # - otcs_filter_workspace_category (str, optional, default = None) - name of the category the workspace needs to have
662
697
  # - otcs_filter_workspace_attributes (dict | list, optional, default = None)
698
+ # * set (str, optional, default = None) - name of the attribute set
699
+ # * row (int, optional, default = None) - row number (starting with 1) - only required for multi-value sets
700
+ # * attribute (str, mandatory) - name of the attribute
701
+ # * value (str, mandatory) - value the attribute should have to pass the filter
702
+ # - otcs_filter_item_depth (int, optional, default = None)
703
+ # - otcs_filter_item_category (str, optional, default = None) - name of the category the workspace needs to have
704
+ # - otcs_filter_item_attributes (dict | list, optional, default = None)
705
+ # * set (str, optional, default = None) - name of the attribute set
706
+ # * row (int, optional, default = None) - row number (starting with 1) - only required for multi-value sets
707
+ # * attribute (str, mandatory) - name of the attribute
708
+ # * value (str, mandatory) - value the attribute should have to pass the filter
663
709
  # - cleansings (dict, optional, default = {}) - the keys of this dict are the field names! The values of the dict are sub-dicts with these keys:
664
710
  # * upper (bool, optional, default = False)
665
711
  # * lower (bool, optional, default = False)
@@ -676,13 +722,21 @@ class Payload:
676
722
  # * length (int, optional, default = None)
677
723
  # * group_chars (str, optional, default = None)
678
724
  # * group_separator (str, optional, default =".")
725
+ # - columns_to_add_list (list, optional, default = []): add a new column with list values. Each payload item is a dictionary with these keys:
726
+ # * source_columns (str, mandatory) - names of the columns from which row values are taken from to create the list of string values
727
+ # * name (str, mandatory) - name of the new column
728
+ # - columns_to_add_table (list, optional, default = []): add a new column with table values. Each payload item is a dictionary with these keys:
729
+ # * source_columns (str, mandatory) - names of the columns from which row values are taken from to create a list of dictionary values. It is expected that the source columns already have list items or are strings with delimiter-separated values.
730
+ # * name (str, mandatory) - name of the new column
731
+ # * list_splitter (str, optional, default = ",")
679
732
  # - conditions (list, optional, default = []) - each list item is a dict with these keys:
680
733
  # * field (str, mandatory)
681
734
  # * value (str | bool | list, optional, default = None)
682
735
  # - explosions (list, optional, default = []) - each list item is a dict with these keys:
683
- # * explode_field (str | list, mandatory)
736
+ # * explode_fields (str | list, mandatory)
684
737
  # * flatten_fields (list, optional, default = [])
685
738
  # * split_string_to_list (bool, optional, default = False)
739
+ # * list_splitter (str, optional, default = ",;") - string with characters that are used to split a string into list items.
686
740
  # - name_column (str, optional, default = None)
687
741
  # - synonyms_column (str, optional, default = None)
688
742
  _bulk_datasources = []
@@ -692,8 +746,9 @@ class Payload:
692
746
  # - enabled (bool, optional, default = True)
693
747
  # - type_name (str, mandatory) - type of the workspace
694
748
  # - data_source (str, mandatory)
695
- # - force_reload (bool, optional, default = True)
696
- # - enforce_updates (bool, optional, default = False)
749
+ # - force_reload (bool, optional, default = True) - enforce a reload of the data source, e.g. useful if data source has been modified before by column operations or explosions
750
+ # - copy_data_source (bool, optional, default = False) - to avoid sideeffects for repeative usage of the data source
751
+ # - operations (list, optional, default = ["create"]) - possible values: "create", "update", "delete", "recreate" (delete existing + create new)
697
752
  # - unique (list, optional, default = []) - list of fields (columns) that should be unique -> deduplication
698
753
  # - sort (list, optional, default = []) - list of fields to sort the data frame by
699
754
  # - name (str, mandatory)
@@ -706,9 +761,11 @@ class Payload:
706
761
  # * attribute (str, mandatory)
707
762
  # * value (str, optional if value_field is specified, default = None)
708
763
  # * value_field (str, optional if value is specified, default = None) - can include placeholder surrounded by {...}
709
- # * value_type (str, optional, default = "string") - values can be string or list, if list then string with comma-separated values will be converted to a list
764
+ # * value_type (str, optional, default = "string") - possible values: "string", "date", "list" and "table". If list then string with comma-separated values will be converted to a list.
765
+ # * attribute_mapping (dict, optional, default = None) - only relevant for value_type = "table" - defines a mapping from the data frame column names to the category attribute names
710
766
  # * list_splitter (str, optional, default = ";,")
711
767
  # * lookup_data_source (str, optional, default = None)
768
+ # * lookup_data_failure_drop (bool, optional, default = False) - should we clear / drop values that cannot be looked up?
712
769
  # * is_key (bool,optional, default = False) - find document with old name. For this we expect a "key" value to be defined in the bulk workspace and one of the category / attribute item to be marked with "is_key" = True
713
770
  # - workspaces (dict, dynamically bult up, default = {}) - list of already generated workspaces
714
771
  # - external_create_date (str, optional, default = "")
@@ -728,17 +785,23 @@ class Payload:
728
785
  # - from_workspace_type (str, optional, default = None)
729
786
  # - from_workspace_name (str, optional, default = None)
730
787
  # - from_workspace_data_source (str, optional, default = None)
788
+ # - from_sub_workspace_name (str, optional, default = None) - if the related workspace is a sub-workspace
789
+ # - from_sub_workspace_path (list, optional, default = None) - the folder path under the main workspace where the sub-workspaces are located
731
790
  # - to_workspace (str, mandatory)
732
791
  # - to_workspace_type (str, optional, default = None)
733
792
  # - to_workspace_name (str, optional, default = None)
734
793
  # - to_workspace_data_source (str, optional, default = None)
735
- # - relationship_type (str, optional, default = "child")
794
+ # - to_sub_workspace_name (str, optional, default = None) - if the related workspace is a sub-workspace
795
+ # - to_sub_workspace_path (list, optional, default = None) - the folder path under the main workspace where the sub-workspaces are located
796
+ # - type (str, optional, default = "child") - type of the relationship (defines if the _from_ workspace is the parent or the child)
736
797
  # - data_source (str, mandatory)
798
+ # - force_reload (bool, optional, default = True) - enforce a reload of the data source, e.g. useful if data source has been modified before by column operations or explosions
737
799
  # - copy_data_source (bool, optional, default = False) - to avoid sideeffects for repeative usage of the data source
738
800
  # - explosions (list, optional, default = []) - each list item is a dict with these keys:
739
- # * explode_field (str | list, mandatory)
801
+ # * explode_fields (str | list, mandatory)
740
802
  # * flatten_fields (list, optional, default = [])
741
803
  # * split_string_to_list (bool, optional, default = False)
804
+ # * list_splitter (str, optional, default = ",;") - string with characters that are used to split a string into list items.
742
805
  # - unique (list, optional, default = [])
743
806
  # - sort (list, optional, default = [])
744
807
  # - thread_number (int, optional, default = BULK_THREAD_NUMBER)
@@ -752,10 +815,16 @@ class Payload:
752
815
  # is a dict with these keys:
753
816
  # - enabled (bool, optional, default = True)
754
817
  # - data_source (str, mandatory)
818
+ # - force_reload (bool, optional, default = True) - enforce a reload of the data source, e.g. useful if data source has been modified before by column operations or explosions
819
+ # - copy_data_source (bool, optional, default = False) - to avoid sideeffects for repeative usage of the data source
755
820
  # - explosions (list of dicts, optional, default = [])
756
- # - unique (list, optional, default = []) - list of fields (columns) that should be unique -> deduplication
821
+ # * explode_fields (str | list, mandatory)
822
+ # * flatten_fields (list, optional, default = [])
823
+ # * split_string_to_list (bool, optional, default = False)
824
+ # * list_splitter (str, optional, default = ",;") - string with characters that are used to split a string into list items.
825
+ # - unique (list, optional, default = []) - list of column names which values should be unique -> deduplication
757
826
  # - sort (list, optional, default = []) - list of fields to sort the data frame by
758
- # - enforce_updates (bool, optional, default = False)
827
+ # - operations (list, optional, default = ["create"])
759
828
  # - name (str, mandatory) - can include placeholder surrounded by {...}
760
829
  # - name_alt (str, optional, default = None) - can include placeholder surrounded by {...}
761
830
  # - description (str, optional, default = None) - can include placeholder surrounded by {...}
@@ -777,9 +846,11 @@ class Payload:
777
846
  # * attribute (str, mandatory)
778
847
  # * value (str, optional if value_field is specified, default = None)
779
848
  # * value_field (str, optional if value is specified, default = None) - can include placeholder surrounded by {...}
780
- # * value_type (str, optional, default = "string") - values can be string or list, if list then string with comma-separated values will be converted to a list
849
+ # * value_type (str, optional, default = "string") - possible values: "string", "date", "list" and "table". If list then string with comma-separated values will be converted to a list.
850
+ # * attribute_mapping (dict, optional, default = None) - only relevant for value_type = "table" - defines a mapping from the data frame column names to the category attribute names
781
851
  # * list_splitter (str, optional, default = ";,")
782
852
  # * lookup_data_source (str, optional, default = None)
853
+ # * lookup_data_failure_drop (bool, optional, default = False) - should we clear / drop values that cannot be looked up?
783
854
  # * is_key (bool, optional, default = False) - find document is old name. For this we expect a "key" value to be defined for the bulk document and one of the category / attribute item to be marked with "is_key" = True
784
855
  # - thread_number (int, optional, default = BULK_THREAD_NUMBER)
785
856
  # - external_create_date (str, optional, default = "")
@@ -819,6 +890,9 @@ class Payload:
819
890
 
820
891
  _transport_extractions: list = []
821
892
  _transport_replacements: list = []
893
+ _otawpsection = []
894
+
895
+ _avts_repositories: list = []
822
896
 
823
897
  # Disable Status files
824
898
  upload_status_files: bool = True
@@ -842,6 +916,8 @@ class Payload:
842
916
  stop_on_error: bool = False,
843
917
  aviator_enabled: bool = False,
844
918
  upload_status_files: bool = True,
919
+ otawp_object: OTAWP | None = None,
920
+ avts_object: AVTS | None = None,
845
921
  ):
846
922
  """Initialize the Payload object
847
923
 
@@ -883,6 +959,7 @@ class Payload:
883
959
  self._otmm = None
884
960
  self._otcs_source = None
885
961
  self._pht = None # the OpenText prodcut hierarchy
962
+ self._avts = avts_object
886
963
  self._browser_automation = browser_automation_object
887
964
  self._custom_settings_dir = custom_settings_dir
888
965
  self._placeholder_values = placeholder_values
@@ -891,6 +968,7 @@ class Payload:
891
968
  self._aviator_enabled = aviator_enabled
892
969
  self._http_object = HTTP()
893
970
  self.upload_status_files = upload_status_files
971
+ self._otawp = otawp_object
894
972
 
895
973
  # end method definition
896
974
 
@@ -900,7 +978,7 @@ class Payload:
900
978
  target(*args, **kwargs)
901
979
  except Exception as e:
902
980
  thread_name = threading.current_thread().name
903
- logger.error("Thread %s: failed with exception %s", thread_name, e)
981
+ logger.error("Thread '%s': failed with exception -> %s", thread_name, e)
904
982
  logger.error(traceback.format_exc())
905
983
 
906
984
  # end method definition
@@ -1036,13 +1114,18 @@ class Payload:
1036
1114
  # Retrieve all the payload sections and store them in lists:
1037
1115
  self._webhooks = self.get_payload_section("webHooks")
1038
1116
  self._webhooks_post = self.get_payload_section("webHooksPost")
1117
+ self._resources = self.get_payload_section("resources")
1039
1118
  self._partitions = self.get_payload_section("partitions")
1119
+ self._synchronized_partition = self.get_payload_section("synchronizedPartitions")
1040
1120
  self._oauth_clients = self.get_payload_section("oauthClients")
1041
1121
  self._auth_handlers = self.get_payload_section("authHandlers")
1042
1122
  self._trusted_sites = self.get_payload_section("trustedSites")
1043
1123
  self._system_attributes = self.get_payload_section("systemAttributes")
1044
1124
  self._groups = self.get_payload_section("groups")
1045
1125
  self._users = self.get_payload_section("users")
1126
+ if self._users:
1127
+ # Check if multiple user instances should be created
1128
+ self.init_payload_user_instances()
1046
1129
  self._admin_settings = self.get_payload_section("adminSettings")
1047
1130
  self._admin_settings_post = self.get_payload_section("adminSettingsPost")
1048
1131
  self._exec_pod_commands = self.get_payload_section("execPodCommands")
@@ -1085,15 +1168,53 @@ class Payload:
1085
1168
  )
1086
1169
  self._holds = self.get_payload_section("holds")
1087
1170
  self._doc_generators = self.get_payload_section("documentGenerators")
1171
+ self._workflows = self.get_payload_section("workflows")
1088
1172
  self._browser_automations = self.get_payload_section("browserAutomations")
1089
1173
  self._browser_automations_post = self.get_payload_section(
1090
1174
  "browserAutomationsPost"
1091
1175
  )
1092
-
1176
+ self._otawpsection = self.get_payload_section("platformCustomConfig")
1177
+ self._avts_repositories = self.get_payload_section("avtsRepositories")
1093
1178
  return self._payload
1094
1179
 
1095
1180
  # end method definition
1096
1181
 
1182
+ def init_payload_user_instances(self):
1183
+ """Read setting for Multiple User instances"""
1184
+
1185
+ for dic in self._payload_sections:
1186
+ if dic.get("name") == "users":
1187
+ users_payload = dic
1188
+ break
1189
+ user_instances = users_payload.get("additional_instances", 0)
1190
+
1191
+ if user_instances == 0:
1192
+ logger.info("No additional user instances configured (default = 0)")
1193
+ return
1194
+
1195
+ i = 0
1196
+
1197
+ original_users = copy.deepcopy(self._users)
1198
+ while i <= user_instances:
1199
+ for user in copy.deepcopy(original_users):
1200
+ user["name"] = user["name"] + "-" + str(i).zfill(2)
1201
+ user["lastname"] = user["lastname"] + " " + str(i).zfill(2)
1202
+ user["enable_sap"] = False
1203
+ user["enable_o365"] = False
1204
+ user["enable_core_share"] = False
1205
+ user["enable_salesforce"] = False
1206
+ user["enable_successfactors"] = False
1207
+
1208
+ logger.info("Creating additional user instance -> '%s'", user["name"])
1209
+ logger.debug("Create user instance -> %s", user)
1210
+ self._users.append(user)
1211
+
1212
+ i = i + 1
1213
+
1214
+ return
1215
+
1216
+ # end method definition
1217
+
1097
1218
  def get_payload_section(self, payload_section_name: str) -> list:
1098
1219
  """Get a defined section of the payload. The section is delivered as a list of settings.
1099
1220
  It deliveres an empty list if this payload section is disabled by the corresponding
@@ -1128,12 +1249,85 @@ class Payload:
1128
1249
 
1129
1250
  # end method definition
1130
1251
 
1252
+ def ot_awp_create_project(self) -> bool:
1253
+ """
1254
+ Initiates the configuration of AppWorks projects.
1255
+ This method is responsible for setting up the necessary configurations for AppWorks projects.
1256
+ If the payload contains a `platformCustomConfig` section, it will execute the corresponding actions
1257
+ to process and apply the custom configuration.
1258
+
1259
+ Returns:
1260
+ bool: `True` on success, `False` on failure.
1261
+ """
1262
+
1263
+ if self._otawpsection == []:
1264
+ logger.info("OTAWP configuration not enabled")
1265
+ return False
1266
+
1267
+ if self._otawpsection is not None:
1268
+ platform = self._otawpsection.get("platform", {})
1269
+ if platform is not None:
1270
+ cws = platform.get("cws", {})
1271
+ if cws is not None:
1272
+ workspaces = cws.get("workspaces", [])
1273
+ if workspaces is not None:
1274
+ ispod_running = self._k8s.verify_pod_status("appworks-0")
1275
+ logger.info(ispod_running)
1276
+ if ispod_running is False:
1277
+ return False
1278
+ self._otawp.authenticate(False)
1279
+ for workspace in workspaces:
1280
+ workspace_name = workspace.get("name")
1281
+ workspace_path = workspace.get("path")
1282
+ workspace_gui_id = workspace.get("workspaceGuiID")
1283
+ respose = self._otawp.create_workspace_with_retry(
1284
+ workspace_name, workspace_gui_id
1285
+ )
1286
+ if not self._otawp.validate_workspace_response(
1287
+ respose, workspace_name
1288
+ ):
1289
+ return False
1290
+ if not self._otawp.is_workspace_already_exists(
1291
+ respose, workspace_name
1292
+ ):
1293
+ self._otawp.sync_workspace(
1294
+ workspace_name, workspace_gui_id
1295
+ )
1296
+ self._k8s.exec_pod_command(
1297
+ "appworks-0",
1298
+ [
1299
+ "/bin/sh",
1300
+ "-c",
1301
+ f'cp -r "{workspace_path}/"* "/opt/appworks/cluster/shared/cws/sync/system/{workspace_name}"',
1302
+ ],
1303
+ )
1304
+ self._otawp.sync_workspace(workspace_name, workspace_gui_id)
1305
+ projects = workspace.get("projects", [])
1306
+ if projects is not None:
1307
+ for project in projects:
1308
+ if not self._otawp.publish_project(
1309
+ workspace_name,
1310
+ project.get("name"),
1311
+ workspace_gui_id,
1312
+ project.get("documentId"),
1313
+ ):
1314
+ return False
1315
+
1316
+ self._otawp.create_loanruntime_from_config_file(platform)
1317
+ self._otawp.create_roles_from_config_file(self._otawpsection, self._otds)
1318
+ self._otawp.create_users_from_config_file(self._otawpsection, self._otds)
1319
+
1320
+ return True
1321
+
1322
+ # end method definition
1323
+
1131
1324
  def get_all_group_names(self) -> list:
1132
1325
  """Construct a list of all group name
1133
1326
 
1134
1327
  Returns:
1135
1328
  list: list of all group names
1136
1329
  """
1330
+
1137
1331
  return [group.get("name") for group in self._groups]
1138
1332
 
1139
1333
  # end method definition
@@ -1345,7 +1539,7 @@ class Payload:
1345
1539
  payload_specific: bool = True,
1346
1540
  prefix: str = "success_",
1347
1541
  ) -> list | None:
1348
- """Get the status file and read it into a dictionary.
1542
+ """Get the status file and read it into a list of dictionaries.
1349
1543
 
1350
1544
  Args:
1351
1545
  payload_section_name (str): name of the payload section. This
@@ -1356,7 +1550,7 @@ class Payload:
1356
1550
  payload file)
1357
1551
  prefix (str, optional): prefix of the file. Typically, either "success_" or "failure_"
1358
1552
  Returns:
1359
- dict: content of the status file as a dictionary or None in case of an error
1553
+ list: content of the status file as a list of dictionaries or None in case of an error
1360
1554
  """
1361
1555
 
1362
1556
  logger.info(
@@ -1369,7 +1563,7 @@ class Payload:
1369
1563
  ) # read from Personal Workspace of Admin
1370
1564
  source_folder_id = self._otcs.get_result_value(response, "id")
1371
1565
  if not source_folder_id:
1372
- source_folder_id = 2004 # use Personal Workspace of Admin as fallback
1566
+ source_folder_id = 2004 # use Personal Workspace ID of Admin as fallback
1373
1567
 
1374
1568
  file_name = self.get_status_file_name(
1375
1569
  payload_section_name=payload_section_name,
@@ -1384,18 +1578,8 @@ class Payload:
1384
1578
  if not status_file_id:
1385
1579
  logger.error("Cannot find status file -> '%s'", file_name)
1386
1580
  return None
1387
- content = self._otcs.get_document_content(status_file_id)
1388
1581
 
1389
- try:
1390
- json_data = json.loads(content.decode("utf-8"))
1391
- if isinstance(json_data, list):
1392
- return json_data
1393
- else:
1394
- logger.error("File content is in JSON format but not a list.")
1395
- return None
1396
- except json.JSONDecodeError as e:
1397
- logger.error("File content is not in valid JSON format; error -> %s", e)
1398
- return None
1582
+ return self._otcs.get_json_document(status_file_id)
1399
1583
 
1400
1584
  # end method definition
1401
1585
 
@@ -1404,7 +1588,7 @@ class Payload:
1404
1588
  return self._payload
1405
1589
 
1406
1590
  def get_users(self) -> list:
1407
- """Get all useres"""
1591
+ """Get all users"""
1408
1592
  return self._users
1409
1593
 
1410
1594
  def get_groups(self) -> list:
@@ -1797,7 +1981,7 @@ class Payload:
1797
1981
  response = self._otcs.get_workspace_by_type_and_name(
1798
1982
  type_name=workspace["type_name"], name=workspace["name"]
1799
1983
  )
1800
- workspace_id = self._otcs.get_result_value(response, "id")
1984
+ workspace_id = self._otcs.get_result_value(response=response, key="id")
1801
1985
  if workspace_id:
1802
1986
  # Write nodeID back into the payload
1803
1987
  workspace["nodeId"] = workspace_id
@@ -1937,6 +2121,12 @@ class Payload:
1937
2121
 
1938
2122
  for payload_section in self._payload_sections:
1939
2123
  match payload_section["name"]:
2124
+ case "avtsRepositories":
2125
+ self._log_header_callback("Process Aviator Search repositories")
2126
+ self.process_avts_repositories()
2127
+ case "platformCustomConfig":
2128
+ self._log_header_callback("Process Create AppWorks workspaces")
2129
+ self.ot_awp_create_project()
1940
2130
  case "webHooks":
1941
2131
  self._log_header_callback("Process Web Hooks")
1942
2132
  self.process_web_hooks(webhooks=self._webhooks)
@@ -1945,11 +2135,17 @@ class Payload:
1945
2135
  self.process_web_hooks(
1946
2136
  webhooks=self._webhooks_post, section_name="webHooksPost"
1947
2137
  )
2138
+ case "resources":
2139
+ self._log_header_callback("Process OTDS Resources")
2140
+ self.process_resources()
1948
2141
  case "partitions":
1949
2142
  self._log_header_callback("Process OTDS Partitions")
1950
2143
  self.process_partitions()
1951
2144
  self._log_header_callback("Assign OTCS Licenses to Partitions")
1952
2145
  self.process_partition_licenses()
2146
+ case "synchronizedPartitions":
2147
+ self._log_header_callback("Process OTDS synchronizedPartition")
2148
+ self.process_synchronized_partition()
1953
2149
  case "oauthClients":
1954
2150
  self._log_header_callback("Process OTDS OAuth Clients")
1955
2151
  self.process_oauth_clients()
@@ -2162,9 +2358,15 @@ class Payload:
2162
2358
  self._log_header_callback("Process Bulk Workspaces")
2163
2359
  self.process_bulk_workspaces()
2164
2360
  case "bulkWorkspaceRelationships":
2361
+ if not self._workspace_types:
2362
+ self._log_header_callback("Process Workspace Types")
2363
+ self.process_workspace_types()
2165
2364
  self._log_header_callback("Process Bulk Workspace Relationships")
2166
2365
  self.process_bulk_workspace_relationships()
2167
2366
  case "bulkDocuments":
2367
+ if not self._workspace_types:
2368
+ self._log_header_callback("Process Workspace Types")
2369
+ self.process_workspace_types()
2168
2370
  self._log_header_callback("Process Bulk Documents")
2169
2371
  self.process_bulk_documents()
2170
2372
  case "sapRFCs":
@@ -2252,6 +2454,17 @@ class Payload:
2252
2454
 
2253
2455
  self._log_header_callback("Process Document Generators")
2254
2456
  self.process_document_generators()
2457
+ case "workflowInitiations":
2458
+ # If a payload file (e.g. additional ones) does not have
2459
+ # transportPackages then it can happen that the
2460
+ # self._workspace_types is not yet initialized. As we need
2461
+ # this structure for workflowInitiations we initialize it here:
2462
+ if not self._workspace_types:
2463
+ self._log_header_callback("Process Workspace Types")
2464
+ self.process_workspace_types()
2465
+
2466
+ self._log_header_callback("Process Workflows")
2467
+ self.process_workflows()
2255
2468
  case "browserAutomations":
2256
2469
  self._log_header_callback("Process Browser Automations")
2257
2470
  self.process_browser_automations(
@@ -2264,7 +2477,9 @@ class Payload:
2264
2477
  section_name="browserAutomationsPost",
2265
2478
  )
2266
2479
  case "workspaceTypes":
2267
- pass
2480
+ if not self._workspace_types:
2481
+ self._log_header_callback("Process Workspace Types")
2482
+ self.process_workspace_types()
2268
2483
  case _:
2269
2484
  logger.error(
2270
2485
  "Illegal payload section name -> '%s' in payloadSections!",
@@ -2380,6 +2595,234 @@ class Payload:
2380
2595
 
2381
2596
  # end method definition
2382
2597
 
2598
+ def process_resources(self, section_name: str = "resources") -> bool:
2599
+ """Process OTDS resources in payload and create them in OTDS.
2600
+
2601
+ Args:
2602
+ section_name (str, optional): name of the section. It can be overridden
2603
+ for cases where multiple sections of same type
2604
+ are used (e.g. the "Post" sections). This
2605
+ name is also used for the "success" status
2606
+ files written to the Admin Personal Workspace
2607
+ Returns:
2608
+ bool: True if payload has been processed without errors, False otherwise
2609
+ """
2610
+
2611
+ if not self._resources:
2612
+ logger.info("Payload section -> '%s' is empty. Skipping...", section_name)
2613
+ return True
2614
+
2615
+ # If this payload section has been processed successfully before we
2616
+ # can return True and skip processing it once more:
2617
+ if self.check_status_file(section_name):
2618
+ return True
2619
+
2620
+ success: bool = True
2621
+
2622
+ for resource in self._resources:
2623
+ resource_name = resource.get("name")
2624
+ if not resource_name:
2625
+ logger.error("OTDS Resource does not have a name. Skipping...")
2626
+ success = False
2627
+ continue
2628
+
2629
+ # Check if element has been disabled in payload (enabled = false).
2630
+ # In this case we skip the element:
2631
+ if "enabled" in resource and not resource["enabled"]:
2632
+ logger.info(
2633
+ "Payload for OTDS Resource -> '%s' is disabled. Skipping...",
2634
+ resource_name,
2635
+ )
2636
+ continue
2637
+
2638
+ resource_description = resource.get("description", "")
2639
+ display_name = resource.get("display_name", "")
2640
+ additional_payload = resource.get("additional_payload", {})
2641
+ activate_resource = resource.get("activate", True)
2642
+ resource_id = resource.get("resource_id", None)
2643
+ allow_impersonation = resource.get("allow_impersonation", True)
2644
+ secret = resource.get("secret", None)
2645
+
2646
+ # Check if Partition does already exist
2647
+ # (in an attempt to make the code idem-potent)
2648
+ logger.info(
2649
+ "Check if OTDS resource -> '%s' does already exist...", resource_name
2650
+ )
2651
+ response = self._otds.get_resource(name=resource_name, show_error=False)
2652
+ if response:
2653
+ logger.info(
2654
+ "OTDS Resource -> '%s' does already exist. Skipping...",
2655
+ resource_name,
2656
+ )
2657
+ continue
2658
+
2659
+ # Only continue if Partition does not exist already
2660
+ logger.info("Resource -> '%s' does not exist. Creating...", resource_name)
2661
+
2662
+ response = self._otds.add_resource(
2663
+ name=resource_name,
2664
+ description=resource_description,
2665
+ display_name=display_name,
2666
+ allow_impersonation=allow_impersonation,
2667
+ resource_id=resource_id,
2668
+ secret=secret,
2669
+ additional_payload=additional_payload,
2670
+ )
2671
+ if response:
2672
+ logger.info("Added OTDS resource -> '%s'", resource_name)
2673
+ else:
2674
+ logger.error("Failed to add OTDS resource -> '%s'", resource_name)
2675
+ success = False
2676
+ continue
2677
+
2678
+ # If resource_id and secret are provided then the resource will
2679
+ # automatically be activated.
2680
+ if activate_resource and not secret:
2681
+ resource_id = response["resourceID"]
2682
+ logger.info(
2683
+ "Activate OTDS resource -> '%s' with ID -> %s...",
2684
+ resource_name,
2685
+ resource_id,
2686
+ )
2687
+ response = self._otds.activate_resource(resource_id=resource_id)
2688
+
2689
+ self.write_status_file(success, section_name, self._resources)
2690
+
2691
+ return success
2692
+
2693
+ # end method definition
2694
+
2695
+ def process_synchronized_partition(self, section_name: str = "synchronizedPartitions") -> bool:
2696
+ """Process OTDS synchronizedPartitions in payload and create them in OTDS.
2697
+ Returns:
2698
+ bool: True if payload has been processed without errors, False otherwise
2699
+ """
2700
+
2701
+ # check if section present, if not return True
2702
+ if not self._synchronized_partition:
2703
+ logger.info("Payload section -> '%s' is empty. Skipping...", section_name)
2704
+ return True
2705
+ # If this payload section has been processed successfully before we
2706
+ # can return True and skip processing it once more:
2707
+ if self.check_status_file(section_name):
2708
+ return True
2709
+
2710
+ success = True
2711
+
2712
+ for partition in self._synchronized_partition:
2713
+ partition_name = partition["spec"].get("profileName")
2714
+ if not partition_name:
2715
+ logger.error("synchronizedPartition does not have a profileName. Skipping...")
2716
+ success = False
2717
+ continue
2718
+
2719
+ # Check if element has been disabled in payload (enabled = false).
2720
+ # In this case we skip the element:
2721
+ if "enabled" in partition and not partition["enabled"]:
2722
+ logger.info(
2723
+ "Payload for synchronizedPartitions -> '%s' is disabled. Skipping...",
2724
+ partition_name,
2725
+ )
2726
+ continue
2727
+
2728
+ partition_description = partition["spec"].get("description")
2729
+
2730
+ # Check if Partition does already exist
2731
+ # (in an attempt to make the code idem-potent)
2732
+ logger.info(
2733
+ "Check if OTDS synchronizedPartition -> '%s' does already exist...", partition_name
2734
+ )
2735
+ response = self._otds.get_partition(partition_name, show_error=False)
2736
+ if response:
2737
+ logger.info(
2738
+ "synchronizedPartition -> '%s' does already exist. Skipping...", partition_name
2739
+ )
2740
+ continue
2741
+
2742
+ # Only continue if synchronized Partition does not exist already
2743
+ logger.info("synchronizedPartition -> '%s' does not exist. Creating...", partition_name)
2744
+
2745
+ response = self._otds.add_synchronized_partition(partition_name, partition_description, partition["spec"])
2746
+ if response:
2747
+ logger.info("Added synchronized partition to OTDS-> '%s'", partition_name)
2748
+ else:
2749
+ logger.error("Failed to add synchronized partition to OTDS -> '%s'", partition_name)
2750
+ success = False
2751
+ continue
2752
+
2753
+ reponse = self._otds.import_synchronized_partition_members(partition_name)
2754
+ if response:
2755
+ logger.info("Imported members to synchronized partition to OTDS-> '%s'", partition_name)
2756
+ else:
2757
+ logger.error("Failed to Imported members to synchronized partition to OTDS -> '%s'", partition_name)
2758
+ success = False
2759
+ continue
2760
+
2761
+ access_role = partition.get("access_role")
2762
+ if access_role:
2763
+ response = self._otds.add_partition_to_access_role(
2764
+ access_role, partition_name
2765
+ )
2766
+ if response:
2767
+ logger.info(
2768
+ "Added OTDS partition -> '%s' to access role -> '%s'",
2769
+ partition_name,
2770
+ access_role,
2771
+ )
2772
+ else:
2773
+ logger.error(
2774
+ "Failed to add OTDS partition -> '%s' to access role -> '%s'",
2775
+ partition_name,
2776
+ access_role,
2777
+ )
2778
+ success = False
2779
+ continue
2780
+
2781
+ # Partions may have an optional list of licenses in
2782
+ # the payload. Assign the partition to all these licenses:
2783
+ partition_specific_licenses = partition.get("licenses")
2784
+ if partition_specific_licenses:
2785
+ # We assume these licenses are Extended ECM licenses!
2786
+ otcs_resource_name = self._otcs.config()["resource"]
2787
+ otcs_resource = self._otds.get_resource(otcs_resource_name)
2788
+ if not otcs_resource:
2789
+ logger.error(
2790
+ "Cannot find OTCS resource -> '%s'", otcs_resource_name
2791
+ )
2792
+ success = False
2793
+ continue
2794
+ otcs_resource_id = otcs_resource["resourceID"]
2795
+ license_name = "EXTENDED_ECM"
2796
+ for license_feature in partition_specific_licenses:
2797
+ assigned_license = self._otds.assign_partition_to_license(
2798
+ partition_name,
2799
+ otcs_resource_id,
2800
+ license_feature,
2801
+ license_name,
2802
+ )
2803
+
2804
+ if not assigned_license:
2805
+ logger.error(
2806
+ "Failed to assign partition -> '%s' to license feature -> '%s' of license -> '%s'!",
2807
+ partition_name,
2808
+ license_feature,
2809
+ license_name,
2810
+ )
2811
+ success = False
2812
+ else:
2813
+ logger.info(
2814
+ "Successfully assigned partition -> '%s' to license feature -> '%s' of license -> '%s'",
2815
+ partition_name,
2816
+ license_feature,
2817
+ license_name,
2818
+ )
2819
+
2820
+ self.write_status_file(success, section_name, self._partitions)
2821
+
2822
+ return success
2823
+
2824
+ # end method definition
2825
+
2383
2826
  def process_partitions(self, section_name: str = "partitions") -> bool:
2384
2827
  """Process OTDS partitions in payload and create them in OTDS.
2385
2828
 
@@ -3038,9 +3481,9 @@ class Payload:
3038
3481
 
3039
3482
  response = self._otds.add_trusted_site(url)
3040
3483
  if response:
3041
- logger.info("Added OTDS trusted site -> %s", url)
3484
+ logger.info("Added OTDS trusted site -> '%s'", url)
3042
3485
  else:
3043
- logger.error("Failed to add trusted site -> %s", url)
3486
+ logger.error("Failed to add trusted site -> '%s'", url)
3044
3487
  success = False
3045
3488
 
3046
3489
  self.write_status_file(success, section_name, self._trusted_sites)
@@ -3099,13 +3542,13 @@ class Payload:
3099
3542
  )
3100
3543
  if response:
3101
3544
  logger.info(
3102
- "Added OTDS system attribute -> '%s' with value -> %s",
3545
+ "Added OTDS system attribute -> '%s' with value -> '%s'",
3103
3546
  attribute_name,
3104
3547
  attribute_value,
3105
3548
  )
3106
3549
  else:
3107
3550
  logger.error(
3108
- "Failed to add OTDS system attribute -> '%s' with value -> %s",
3551
+ "Failed to add OTDS system attribute -> '%s' with value -> '%s'",
3109
3552
  attribute_name,
3110
3553
  attribute_value,
3111
3554
  )
@@ -3539,7 +3982,7 @@ class Payload:
3539
3982
  )
3540
3983
  else:
3541
3984
  logger.error(
3542
- "Failed to create Salesforce group -> %s!",
3985
+ "Failed to create Salesforce group -> '%s'!",
3543
3986
  group_name,
3544
3987
  )
3545
3988
  success = False
@@ -3549,7 +3992,7 @@ class Payload:
3549
3992
  for group in self._groups:
3550
3993
  if not "salesforce_id" in group:
3551
3994
  logger.info(
3552
- "Group -> %s does not have an Salesforce ID. Skipping...",
3995
+ "Group -> '%s' does not have an Salesforce ID. Skipping...",
3553
3996
  group["name"],
3554
3997
  )
3555
3998
  # Not all groups may be enabled for Salesforce. This is not an error.
@@ -3900,6 +4343,11 @@ class Payload:
3900
4343
  success = False
3901
4344
  # for some unclear reason the user is not added to its base group in OTDS
3902
4345
  # so we do this explicitly:
4346
+ logger.info(
4347
+ "Add user -> '%s' to its base group -> '%s'",
4348
+ user["name"],
4349
+ user["base_group"],
4350
+ )
3903
4351
  response = self._otds.add_user_to_group(
3904
4352
  user["name"], user["base_group"]
3905
4353
  )
@@ -4409,7 +4857,7 @@ class Payload:
4409
4857
  # if the user does not have its own inbox) and click the
4410
4858
  # verification link...
4411
4859
 
4412
- if need_email_verification:
4860
+ if need_email_verification and user.get("enable_o365", False):
4413
4861
  logger.info(
4414
4862
  "Processing Email verification for user -> '%s' (%s). Wait a few seconds to make sure verification mail in user's inbox...",
4415
4863
  user_name,
@@ -4827,7 +5275,7 @@ class Payload:
4827
5275
  # if the user does not have its own inbox) and click the
4828
5276
  # verification link...
4829
5277
 
4830
- if need_email_verification:
5278
+ if need_email_verification and user.get("enable_o365", False):
4831
5279
  logger.info(
4832
5280
  "Processing Email verification for user -> '%s' (%s). Wait a few seconds to make sure verification mail in user's inbox...",
4833
5281
  user_name,
@@ -6140,17 +6588,18 @@ class Payload:
6140
6588
  # update the payload dict with a "reachable" key/value pair:
6141
6589
  if not self.check_external_system(external_system):
6142
6590
  logger.warning(
6143
- "External System connection -> '%s' (%s) is not reachable! Skipping to next external system...",
6144
- system_name,
6145
- system_type,
6591
+ "External System connection -> '%s' (%s) is not reachable! Skipping to next external system...",
6592
+ system_name,
6593
+ system_type,
6146
6594
  )
6147
6595
  success = False
6148
6596
  continue
6149
6597
  else:
6150
6598
  logger.info(
6151
- "skip_connection_test is %s; Skipping external system check for %s...",
6599
+ "skip_connection_test is %s; Skipping external system check for %s...",
6152
6600
  skip_connection_test,
6153
- system_name)
6601
+ system_name,
6602
+ )
6154
6603
 
6155
6604
  # Read either username/password (BASIC) or client ID / secret (OAuth)
6156
6605
  match auth_method:
@@ -6395,7 +6844,7 @@ class Payload:
6395
6844
  logger.error("Failed to deploy transport -> '%s'", name)
6396
6845
  success = False
6397
6846
  if self._stop_on_error:
6398
- break
6847
+ raise Exception("STOP_ON_ERROR enabled -> Stopping execution")
6399
6848
  else:
6400
6849
  logger.info("Successfully deployed transport -> '%s'", name)
6401
6850
  # Save the extractions for later processing, e.g. in process_business_object_types()
@@ -8687,7 +9136,7 @@ class Payload:
8687
9136
  workspace["nodeId"] = workspace_id
8688
9137
  else:
8689
9138
  logger.error(
8690
- "Failed to create workspace -> '%s' of type -> %s!",
9139
+ "Failed to create workspace -> '%s' of type -> '%s'!",
8691
9140
  name,
8692
9141
  type_name,
8693
9142
  )
@@ -8705,6 +9154,8 @@ class Payload:
8705
9154
  # This is important to make the python container idem-potent.
8706
9155
  response = self._otcs.get_workspace(workspace["nodeId"])
8707
9156
  workspace["name"] = self._otcs.get_result_value(response, "name")
9157
+ # Also update the 'name' variable accordingly, as it is used below.
9158
+ name = workspace["name"]
8708
9159
 
8709
9160
  logger.info(
8710
9161
  "Successfully created workspace with final name -> '%s' and node ID -> %s",
@@ -8858,7 +9309,22 @@ class Payload:
8858
9309
  str(index),
8859
9310
  row["name"],
8860
9311
  )
8861
- success = self.process_workspace(workspace=row.dropna().to_dict())
9312
+ # Convert the row to a dictionary - omitting any empty column:
9313
+ workspace = row.dropna().to_dict()
9314
+ # workspace is a mutable dictionary that may be updated
9315
+ # by process_workspace():
9316
+ success = self.process_workspace(workspace=workspace)
9317
+ # We need to make sure the row (and the whole data frame)
9318
+ # gets these updates back (and adds new columns such as "nodeId"):
9319
+ for key, value in workspace.items():
9320
+ row[key] = value # This will update existing keys and add new ones
9321
+ logger.debug("Final values of row %s -> %s", str(index), str(row))
9322
+
9323
+ # As iterrows() creates a copy of the data we need to
9324
+ # write the changes back into the partition
9325
+ partition.loc[index] = row
9326
+
9327
+ # row.update(pd.Series(workspace)) # this doesn't wortk as it is not adding new values
8862
9328
  if success:
8863
9329
  result["success_counter"] += 1
8864
9330
  else:
@@ -8887,7 +9353,9 @@ class Payload:
8887
9353
  bool: True if payload has been processed without errors, False otherwise
8888
9354
 
8889
9355
  Side Effects:
8890
- Set workspace["nodeId] to the node ID of the created workspace
9356
+ Set workspace["nodeId"] to the node ID of the created workspace and update
9357
+ the workspace["name"] to the final name of the workspaces (which may be different
9358
+ from the ones in the payload depending on workspace type configutrations)
8891
9359
  """
8892
9360
 
8893
9361
  if not self._workspaces:
@@ -8896,7 +9364,20 @@ class Payload:
8896
9364
 
8897
9365
  # If this payload section has been processed successfully before we
8898
9366
  # can return True and skip processing it once more:
8899
- if self.check_status_file(section_name):
9367
+ if self.check_status_file(payload_section_name=section_name):
9368
+
9369
+ # Read the list of created workspaces from the json file in admin Home
9370
+ # This is important in case of restart / rerun of customizer pod
9371
+ # as this data structure is used later on for workspace relationship
9372
+ # processing (and other) and the workspaces dictionaries have been
9373
+ # updated with "nodeId" and "name" (the final names of the workspaces
9374
+ # that can be different from the names in the payload)
9375
+ logger.info(
9376
+ "Re-Initialize workspace list from status file -> '%s' to have final names and node IDs...",
9377
+ self.get_status_file_name(payload_section_name=section_name),
9378
+ )
9379
+ self._workspaces = self.get_status_file(section_name)
9380
+
8900
9381
  return True
8901
9382
 
8902
9383
  success: bool = True
@@ -8909,6 +9390,16 @@ class Payload:
8909
9390
 
8910
9391
  df = Data(self._workspaces)
8911
9392
 
9393
+ # Add empty column for "nodeId" so that the worker threads can properly fill it:
9394
+ df.get_data_frame()["nodeId"] = None
9395
+
9396
+ logger.info(
9397
+ "Created a data frame with -> %s rows from the workspaces list with -> %s elements.",
9398
+ str(len(df)),
9399
+ str(len(self._workspaces)),
9400
+ )
9401
+ df.print_info()
9402
+
8912
9403
  partitions = df.partitionate(THREAD_NUMBER)
8913
9404
 
8914
9405
  # Create and start a thread for each partition
@@ -8929,6 +9420,28 @@ class Payload:
8929
9420
  thread.join()
8930
9421
  logger.info("Thread -> %s has completed.", str(thread.name))
8931
9422
 
9423
+ # As we have basically created a copy of self._workspaces into the Pandas
9424
+ # data frame (see df = Data(...) above) and the workspace processing
9425
+ # updates the workspaces data with "nodeID" and the final
9426
+ # workspace names, we need to write the Pandas Data frame
9427
+ # back into the self._workspaces data structure for further processing
9428
+ # e.g. in the process_workspace_relationships. Otherwise the
9429
+ # changes to "nodeId" or "name" would be lost. We need to do it
9430
+ # in 2 steps as we want to avoid to have NaN values in the resulting dicts:
9431
+ # 1. Convert the data frame back to a list of dictionaries:
9432
+ updated_workspaces = df.get_data_frame().to_dict(orient="records")
9433
+ # 2. Remove any dictionary item that has a "NaN" scalar value
9434
+ # (pd.notna() only works on scalar values, not on lists!):
9435
+ self._workspaces = [
9436
+ # {k: v for k, v in w.items() if pd.notna(v)} for w in updated_workspaces
9437
+ {
9438
+ key: value
9439
+ for key, value in updated_workspace.items()
9440
+ if not pd.api.types.is_scalar(value) or pd.notna(value)
9441
+ }
9442
+ for updated_workspace in updated_workspaces
9443
+ ]
9444
+
8932
9445
  # Check if all threads have completed without error / failure.
8933
9446
  # If there's a single failure in on of the thread results we
8934
9447
  # set 'success' variable to False.
@@ -8993,25 +9506,31 @@ class Payload:
8993
9506
  # otherwise we cannot establish the relationship:
8994
9507
  if not "id" in workspace:
8995
9508
  logger.warning(
8996
- "Workspace without ID cannot have a relationship. Skipping to next workspace...",
9509
+ "Workspace without logical ID in payload cannot have a relationship. Skipping to next workspace...",
8997
9510
  )
8998
9511
  return False
8999
9512
 
9000
9513
  workspace_id = workspace["id"]
9001
- logger.info("Workspace -> '%s' has relationships - creating...", name)
9514
+ logger.info(
9515
+ "Workspace -> '%s' (type -> '%s') has relationships - creating...",
9516
+ name,
9517
+ workspace["type_name"],
9518
+ )
9002
9519
 
9003
- # now determine the actual node IDs of the workspaces (have been created above):
9520
+ # now determine the actual node IDs of the workspaces (has been created before):
9004
9521
  workspace_node_id = self.determine_workspace_id(workspace)
9005
9522
  if not workspace_node_id:
9006
9523
  logger.warning(
9007
- "Workspace without node ID cannot have a relationship (workspace creation may have failed). Skipping to next workspace...",
9524
+ "Workspace -> '%s' (type -> '%s') has no node ID and cannot have a relationship (workspace creation may have failed or final name is different from payload). Skipping to next workspace...",
9525
+ name,
9526
+ workspace["type_name"],
9008
9527
  )
9009
9528
  return False
9010
9529
 
9011
9530
  logger.debug(
9012
9531
  "Workspace with logical ID -> %s has node ID -> %s",
9013
- workspace_id,
9014
- workspace_node_id,
9532
+ str(workspace_id),
9533
+ str(workspace_node_id),
9015
9534
  )
9016
9535
 
9017
9536
  success: bool = True
@@ -9044,7 +9563,9 @@ class Payload:
9044
9563
  related_workspace_node_id = self.determine_workspace_id(related_workspace)
9045
9564
  if not related_workspace_node_id:
9046
9565
  logger.warning(
9047
- "Related Workspace without node ID (workspaces creation may have failed). Skipping to next workspace...",
9566
+ "Related Workspace -> '%s' (type -> '%s') has no node ID (workspaces creation may have failed or name is different from payload). Skipping to next workspace...",
9567
+ related_workspace["name"],
9568
+ related_workspace["type_name"],
9048
9569
  )
9049
9570
  continue
9050
9571
 
@@ -9065,14 +9586,14 @@ class Payload:
9065
9586
  if existing_workspace_relationship:
9066
9587
  logger.info(
9067
9588
  "Workspace relationship between workspace ID -> %s and related workspace ID -> %s does already exist. Skipping...",
9068
- workspace_node_id,
9589
+ str(workspace_node_id),
9069
9590
  related_workspace_node_id,
9070
9591
  )
9071
9592
  continue
9072
9593
 
9073
9594
  logger.info(
9074
9595
  "Create Workspace Relationship between workspace node ID -> %s and workspace node ID -> %s",
9075
- workspace_node_id,
9596
+ str(workspace_node_id),
9076
9597
  related_workspace_node_id,
9077
9598
  )
9078
9599
 
@@ -9222,7 +9743,7 @@ class Payload:
9222
9743
  result = self.process_workspace_relationship(workspace=workspace)
9223
9744
  success = (
9224
9745
  success and result
9225
- ) # if a single result is False then mark this in 'success' variable.
9746
+ ) # if a single result is False then the 'success' variable becomes 'False' as well.
9226
9747
 
9227
9748
  self.write_status_file(success, section_name, self._workspaces)
9228
9749
 
@@ -10204,10 +10725,17 @@ class Payload:
10204
10725
  )
10205
10726
  is_workspace = False
10206
10727
  if favorite_item:
10207
- logger.info(
10208
- "Found favorite item (workspace) in payload -> %s",
10209
- favorite_item["name"],
10210
- )
10728
+ if favorite_item.get("enabled", True):
10729
+ logger.info(
10730
+ "Found favorite item (workspace) -> '%s' in payload and it is enabled",
10731
+ favorite_item["name"],
10732
+ )
10733
+ else:
10734
+ logger.info(
10735
+ "Found favorite item (workspace) -> '%s' in payload but it is not enabled. Skipping...",
10736
+ favorite_item["name"],
10737
+ )
10738
+ continue
10211
10739
  favorite_id = self.determine_workspace_id(favorite_item)
10212
10740
  if not favorite_id:
10213
10741
  logger.warning(
@@ -10227,7 +10755,6 @@ class Payload:
10227
10755
  if favorite_type == 848:
10228
10756
  is_workspace = True
10229
10757
 
10230
- # if favorite_item is None:
10231
10758
  if favorite_id is None:
10232
10759
  logger.warning(
10233
10760
  "Favorite -> '%s' neither found as workspace ID nor as nickname. Skipping to next favorite...",
@@ -10238,13 +10765,13 @@ class Payload:
10238
10765
  response = self._otcs.add_favorite(favorite_id)
10239
10766
  if response is None:
10240
10767
  logger.warning(
10241
- "Favorite ID -> %s couldn't be added for user -> %s!",
10768
+ "Favorite ID -> %s couldn't be added for user -> '%s'!",
10242
10769
  favorite_id,
10243
10770
  user_name,
10244
10771
  )
10245
10772
  else:
10246
10773
  logger.info(
10247
- "Added favorite for user -> %s, node ID -> %s.",
10774
+ "Added favorite for user -> '%s', node ID -> %s.",
10248
10775
  user_name,
10249
10776
  favorite_id,
10250
10777
  )
@@ -11859,6 +12386,8 @@ class Payload:
11859
12386
  continue
11860
12387
  command = exec_pod_command["command"]
11861
12388
 
12389
+ container = exec_pod_command.get("container", None)
12390
+
11862
12391
  # Check if element has been disabled in payload (enabled = false).
11863
12392
  # In this case we skip the element:
11864
12393
  if "enabled" in exec_pod_command and not exec_pod_command["enabled"]:
@@ -11884,7 +12413,9 @@ class Payload:
11884
12413
  not "interactive" in exec_pod_command
11885
12414
  or exec_pod_command["interactive"] is False
11886
12415
  ):
11887
- result = self._k8s.exec_pod_command(pod_name, command)
12416
+ result = self._k8s.exec_pod_command(
12417
+ pod_name, command, container=container
12418
+ )
11888
12419
  else:
11889
12420
  if not "timeout" in exec_pod_command:
11890
12421
  result = self._k8s.exec_pod_command_interactive(pod_name, command)
@@ -11900,14 +12431,14 @@ class Payload:
11900
12431
  # 3. result is a non-empty string - this is OK - print it to log
11901
12432
  if result is None:
11902
12433
  logger.error(
11903
- "Execution of command -> '%s' in pod -> '%s' failed",
12434
+ "Execution of command -> %s in pod -> '%s' failed",
11904
12435
  command,
11905
12436
  pod_name,
11906
12437
  )
11907
12438
  success = False
11908
12439
  elif result != "":
11909
12440
  logger.info(
11910
- "Execution of command -> '%s' in pod -> '%s' returned result -> %s",
12441
+ "Execution of command -> %s in pod -> '%s' returned result -> %s",
11911
12442
  command,
11912
12443
  pod_name,
11913
12444
  result,
@@ -11916,7 +12447,7 @@ class Payload:
11916
12447
  # It is not an error if no result is returned. It depends on the nature of the command
11917
12448
  # if a result is written to stdout or stderr.
11918
12449
  logger.info(
11919
- "Execution of command -> '%s' in pod -> '%s' did not return a result",
12450
+ "Execution of command -> %s in pod -> '%s' did not return a result",
11920
12451
  command,
11921
12452
  pod_name,
11922
12453
  )
@@ -12252,6 +12783,413 @@ class Payload:
12252
12783
 
12253
12784
  # end method definition
12254
12785
 
12786
+ def process_workflow_attributes(
12787
+ self, attributes: list, workflow_attribute_definition: dict
12788
+ ):
12789
+ """Process the attributes in the workflow steps. This method
12790
+ adds the IDs for the attribute to the payload dicts. The
12791
+ IDs are needed for the workflow REST API calls.
12792
+
12793
+ Args:
12794
+ attributes (list): the list of attributes (payload) processed in the workflow step
12795
+ workflow_attribute_definition (dict): the workflow attribute definition
12796
+
12797
+ Returns:
12798
+ None. The mutable dictionary in the workflow_step is updated with the IDs.
12799
+ """
12800
+
12801
+ # now we need to get the technical attribute IDs from
12802
+ # the workflow definition and map them
12803
+ # with the attribute names from the payload:
12804
+ for attribute in attributes:
12805
+ attribute_name = attribute["name"]
12806
+ attribute_value = attribute["value"]
12807
+ attribute_type = attribute.get("type", None)
12808
+
12809
+ # Special treatment for type user: determine the ID for the login name.
12810
+ # the ID is the actual value we have to put in the attribute:
12811
+ if attribute_type and attribute_type.lower() == "user":
12812
+ user = self._otcs.get_user(name=attribute_value, show_error=True)
12813
+ user_id = self._otcs.get_result_value(response=user, key="id")
12814
+ if not user_id:
12815
+ logger.error(
12816
+ "Cannot find user with login name -> '%s'. Skipping...",
12817
+ attribute_value,
12818
+ )
12819
+ continue
12820
+ attribute_value = user_id
12821
+ attribute["value"] = user_id
12822
+
12823
+ attribute_definition = workflow_attribute_definition.get(
12824
+ attribute_name, None
12825
+ )
12826
+ if not attribute_definition:
12827
+ logger.error(
12828
+ "Cannot find the attribute -> '%s' in the workflow definition. Skipping..."
12829
+ )
12830
+ continue
12831
+ # Enrich the attribute dictionary with the attribute ID from the workflow definition:
12832
+ attribute["id"] = attribute_definition["id"]
12833
+ # Enrich the attribute dictionary with the attribute form ID from the workflow definition:
12834
+ attribute["form_id"] = attribute_definition["form_id"]
12835
+
12836
+ if attributes:
12837
+ logger.info(
12838
+ "Updated workflow step attributes with IDs -> %s",
12839
+ str(attributes),
12840
+ )
12841
+
12842
+ return True
12843
+
12844
+ # end method definition
12845
+
12846
+ def process_workflow_step(
12847
+ self,
12848
+ workflow_id: int,
12849
+ workflow_step: dict,
12850
+ workflow_attribute_definition: dict,
12851
+ documents: list | None = None,
12852
+ process_id: int | None = None,
12853
+ ) -> bool:
12854
+ """Process a workflow step of a workflow.
12855
+
12856
+ Args:
12857
+ workflow_id (int): Node ID of the workflow (the workflow map)
12858
+ workflow_step (dict): Payload dictionary for a single workflow step._
12859
+ """
12860
+
12861
+ if not "action" in workflow_step:
12862
+ logger.error("Missing workflow action in workflow step.")
12863
+ return False
12864
+ action = workflow_step["action"]
12865
+
12866
+ if not "exec_as_user" in workflow_step:
12867
+ logger.error("Missing workflow user in workflow step.")
12868
+ return False
12869
+ exec_as_user = workflow_step["exec_as_user"]
12870
+
12871
+ # Find the user in the users payload:
12872
+ exec_user = next(
12873
+ (item for item in self._users if item["name"] == exec_as_user),
12874
+ None,
12875
+ )
12876
+ # Have we found the user in the payload?
12877
+ if exec_user is None:
12878
+ logger.error(
12879
+ "Cannot find user with login name -> '%s' for workflow processing.",
12880
+ exec_as_user,
12881
+ )
12882
+ return False
12883
+
12884
+ logger.info("Executing workflow step as user -> '%s'", exec_as_user)
12885
+ # we change the otcs credentials to the user:
12886
+ self._otcs.set_credentials(exec_user["name"], exec_user["password"])
12887
+
12888
+ # we re-authenticate as the user:
12889
+ logger.info("Authenticate user -> '%s'...", exec_as_user)
12890
+ # True = force new login with new user
12891
+ cookie = self._otcs.authenticate(revalidate=True)
12892
+ if not cookie:
12893
+ logger.error("Couldn't authenticate user -> '%s'", exec_as_user)
12894
+ return False
12895
+
12896
+ # "attributes" is optional:
12897
+ if not "attributes" in workflow_step:
12898
+ logger.warning(
12899
+ "No workflow attributes specified in the payload for this workflow step.",
12900
+ )
12901
+ attributes = []
12902
+ workflow_step_values = None
12903
+ else:
12904
+ attributes = workflow_step["attributes"]
12905
+ logger.info(
12906
+ "Workflow step has attributes -> %s. Adding attribute IDs to the payload names...",
12907
+ str(attributes),
12908
+ )
12909
+ # Update / enrich the attributes in the workflow step with the IDs
12910
+ # from the workflow definition (this CHANGES the attributes!)
12911
+ self.process_workflow_attributes(
12912
+ attributes=attributes,
12913
+ workflow_attribute_definition=workflow_attribute_definition,
12914
+ )
12915
+ # Prepare the data for the REST call to
12916
+ # update the process:
12917
+ workflow_step_values = {
12918
+ attr["form_id"]: attr["value"]
12919
+ for attr in attributes
12920
+ if "form_id" in attr and "value" in attr
12921
+ }
12922
+
12923
+ if action == "Initiate":
12924
+ # Create a draft process in preparation for the workflow initiation:
12925
+ response = self._otcs.create_draft_process(
12926
+ workflow_id=workflow_id, documents=documents, attach_documents=True
12927
+ )
12928
+ draftprocess_id = self._otcs.get_result_value(
12929
+ response=response, key="draftprocess_id", property_name=""
12930
+ )
12931
+ if not draftprocess_id:
12932
+ logger.error(
12933
+ "Failed to create draft process for workflow ID -> %s as user -> '%s'",
12934
+ str(workflow_id),
12935
+ exec_as_user,
12936
+ )
12937
+ return False
12938
+ else:
12939
+ logger.info(
12940
+ "Successfully generated draft process with ID -> %s%s",
12941
+ str(draftprocess_id),
12942
+ " attching document IDs -> " + str(documents) if documents else "",
12943
+ )
12944
+ workflow_step["draftprocess_id"] = draftprocess_id
12945
+
12946
+ # Check if a due date is specified. The payload has
12947
+ # a relative offset in number of days that we add to
12948
+ # the current date:
12949
+ due_in_days = workflow_step.get("due_in_days", None)
12950
+ if due_in_days:
12951
+ due_date = datetime.now() + timedelta(days=int(due_in_days))
12952
+ due_date = due_date.strftime("%Y-%m-%d")
12953
+ else:
12954
+ due_date = None
12955
+ # Record the due date in the workflow step dictionary
12956
+ workflow_step["due_date"] = due_date
12957
+
12958
+ # Update the draft process with title, due date
12959
+ # and workflow attribute values from the payload:
12960
+ response = self._otcs.update_draft_process(
12961
+ draftprocess_id=draftprocess_id,
12962
+ title=workflow_step.get("title", None),
12963
+ due_date=due_date,
12964
+ values=workflow_step_values,
12965
+ )
12966
+
12967
+ # Initiate the draft process. This creates
12968
+ # the running workflow instance:
12969
+ response = self._otcs.initiate_draft_process(
12970
+ draftprocess_id=draftprocess_id,
12971
+ comment=workflow_step.get("comment", None),
12972
+ )
12973
+ process_id = self._otcs.get_result_value(
12974
+ response=response, key="process_id", property_name=""
12975
+ )
12976
+ if not process_id:
12977
+ logger.error(
12978
+ "Failed to initiate process for workflow with ID -> %s as user -> '%s'",
12979
+ str(workflow_id),
12980
+ exec_as_user,
12981
+ )
12982
+ return False
12983
+ logger.info(
12984
+ "Successfully initiated process with ID -> %s for workflow with ID -> %s as user -> '%s'",
12985
+ str(process_id),
12986
+ str(workflow_id),
12987
+ exec_as_user,
12988
+ )
12989
+ workflow_step["process_id"] = process_id
12990
+ else:
12991
+ if not process_id:
12992
+ logger.error(
12993
+ "Workflow step cannot be executed as process is not initiated (process ID not set)"
12994
+ )
12995
+ return False
12996
+ response = self._otcs.get_process_task(
12997
+ process_id=process_id,
12998
+ )
12999
+ # Are there any workflow attributes to update with new values?
13000
+ if attributes:
13001
+ response = self._otcs.update_process_task(
13002
+ process_id=process_id, values=workflow_step_values
13003
+ )
13004
+ # Execute the step action defined in the payload
13005
+ response = self._otcs.update_process_task(
13006
+ process_id=process_id, action=action
13007
+ )
13008
+
13009
+ return True
13010
+
13011
+ # end method definition
13012
+
13013
+ def process_workflows(self, section_name: str = "workflows") -> bool:
13014
+ """Initiate and process workflows for a defined workspace type and folder path
13015
+
13016
+ Args:
13017
+ section_name (str, optional): name of the section. It can be overridden
13018
+ for cases where multiple sections of same type
13019
+ are used (e.g. the "Post" sections)
13020
+ This name is also used for the "success" status
13021
+ files written to the Admin Personal Workspace
13022
+ Returns:
13023
+ bool: True if payload has been processed without errors, False otherwise
13024
+ """
13025
+
13026
+ if not self._workflows:
13027
+ logger.info("Payload section -> '%s' is empty. Skipping...", section_name)
13028
+ return True
13029
+
13030
+ # If this payload section has been processed successfully before we
13031
+ # can return True and skip processing it once more:
13032
+ if self.check_status_file(section_name):
13033
+ return True
13034
+
13035
+ success: bool = True
13036
+
13037
+ # save admin credentials for later switch back to admin user:
13038
+ admin_credentials = self._otcs.credentials()
13039
+
13040
+ for workflow in self._workflows:
13041
+ if not "workflow_nickname" in workflow:
13042
+ logger.error(
13043
+ "To initiate and process workflows for documents in workspaces the workflow nickname needs to be specified in the payload! Skipping to next workflow initiation..."
13044
+ )
13045
+ success = False
13046
+ continue
13047
+ workflow_nickname = workflow["workflow_nickname"]
13048
+ workflow_node = self._otcs.get_node_from_nickname(
13049
+ nickname=workflow_nickname
13050
+ )
13051
+ workflow_id = self._otcs.get_result_value(response=workflow_node, key="id")
13052
+ workflow_name = self._otcs.get_result_value(
13053
+ response=workflow_node, key="name"
13054
+ )
13055
+ if not workflow_id:
13056
+ logger.error(
13057
+ "Cannot find workflow by nickname -> '%s'! Skipping to next workflow...",
13058
+ workflow_nickname,
13059
+ )
13060
+ success = False
13061
+ continue
13062
+
13063
+ if not "workspace_type" in workflow:
13064
+ logger.error(
13065
+ "To process workflow -> '%s' for documents in workspaces the workspace type needs to be specified in the payload! Skipping to next workflow...",
13066
+ workflow_name,
13067
+ )
13068
+ success = False
13069
+ continue
13070
+ workspace_type = workflow["workspace_type"]
13071
+
13072
+ # Check if element has been disabled in payload (enabled = false).
13073
+ # In this case we skip the element:
13074
+ if "enabled" in workflow and not workflow["enabled"]:
13075
+ logger.info(
13076
+ "Payload for workflow -> '%s' of workspace type -> '%s' is disabled. Skipping...",
13077
+ workflow_name,
13078
+ workspace_type,
13079
+ )
13080
+ continue
13081
+ workspace_type = workflow["workspace_type"]
13082
+ # Find the workspace type with the name given in the _workspace_types
13083
+ # datastructure that has been generated by process_workspace_types() method before:
13084
+ workspace_type_id = next(
13085
+ (
13086
+ item["id"]
13087
+ for item in self._workspace_types
13088
+ if item["name"] == workspace_type
13089
+ ),
13090
+ None,
13091
+ )
13092
+ workspace_instances = self._otcs.get_workspace_instances(
13093
+ type_name=workspace_type, type_id=workspace_type_id
13094
+ )
13095
+ if not workspace_instances or not workspace_instances["results"]:
13096
+ logger.warning(
13097
+ "No workspace instances found for workspace type -> '%s' (%s). Skipping processing of workflow -> '%s'.",
13098
+ workspace_type,
13099
+ workspace_type_id,
13100
+ workflow_name,
13101
+ )
13102
+ success = False
13103
+ continue
13104
+
13105
+ if not "workspace_folder_path" in workflow:
13106
+ logger.info(
13107
+ "No workspace folder path defined for workspaces of type -> '%s'. Workflows will be started for documents in workspace root.",
13108
+ workspace_type,
13109
+ )
13110
+ workspace_folder_path = []
13111
+ else:
13112
+ workspace_folder_path = workflow["workspace_folder_path"]
13113
+
13114
+ if not "steps" in workflow:
13115
+ logger.error(
13116
+ "To process workflow -> '%s', workflow steps ('steps') needs to be specified in the payload! Skipping to next workflow initiation...",
13117
+ workflow_name,
13118
+ )
13119
+ success = False
13120
+ continue
13121
+ workflow_steps = workflow["steps"]
13122
+
13123
+ # Get the attribute details (name, ID, type) from the workflow definition:
13124
+ workflow_attribute_definition = self._otcs.get_workflow_attributes(
13125
+ workflow_id=workflow_id
13126
+ )
13127
+
13128
+ for workspace_instance in workspace_instances["results"]:
13129
+ workspace_id = workspace_instance["data"]["properties"]["id"]
13130
+ workspace_name = workspace_instance["data"]["properties"]["name"]
13131
+ if workspace_folder_path:
13132
+ workspace_folder = self._otcs.get_node_by_workspace_and_path(
13133
+ workspace_id=workspace_id, path=workspace_folder_path
13134
+ )
13135
+ if workspace_folder:
13136
+ workspace_folder_id = self._otcs.get_result_value(
13137
+ workspace_folder, "id"
13138
+ )
13139
+ else:
13140
+ # If the workspace template is not matching
13141
+ # the path we may have an error here. Then
13142
+ # we fall back to workspace root level.
13143
+ logger.warning(
13144
+ "Folder path does not exist in workspace -> '%s'. Using workspace root level instead...",
13145
+ workspace_name,
13146
+ )
13147
+ workspace_folder_id = workspace_id
13148
+ else:
13149
+ workspace_folder_id = workspace_id
13150
+
13151
+ # Get all documents (-3 = non-container) from the defined folder:
13152
+ response = self._otcs.get_subnodes(
13153
+ parent_node_id=workspace_folder_id, filter_node_types=-3
13154
+ )
13155
+ documents = self._otcs.get_result_values(response=response, key="id")
13156
+
13157
+ process_id = None
13158
+ for workflow_step in workflow_steps:
13159
+ result = self.process_workflow_step(
13160
+ workflow_id=workflow_id,
13161
+ workflow_step=workflow_step,
13162
+ workflow_attribute_definition=workflow_attribute_definition,
13163
+ documents=documents,
13164
+ process_id=process_id,
13165
+ )
13166
+ # If the step fails we are bailing out as it doesn't make
13167
+ # sense to continue with further steps:
13168
+ if not result:
13169
+ success = False
13170
+ break
13171
+ if "process_id" in workflow_step:
13172
+ process_id = workflow_step["process_id"]
13173
+
13174
+ # end for workflow in self._workflows
13175
+
13176
+ # Set back admin credentials:
13177
+ self._otcs.set_credentials(
13178
+ admin_credentials["username"], admin_credentials["password"]
13179
+ )
13180
+ # Authenticate back as the admin user:
13181
+ logger.info(
13182
+ "Authenticate as admin user -> '%s'...", admin_credentials["username"]
13183
+ )
13184
+ # True = force new login with new user
13185
+ self._otcs.authenticate(revalidate=True)
13186
+
13187
+ self.write_status_file(success, section_name, self._workflows)
13188
+
13189
+ return success
13190
+
13191
+ # end method definition
13192
+
12255
13193
  def process_browser_automations(
12256
13194
  self,
12257
13195
  browser_automations: list,
@@ -12825,18 +13763,32 @@ class Payload:
12825
13763
  "otcs_filter_workspace_attributes", None
12826
13764
  )
12827
13765
 
13766
+ # Filter item by depth under the given root:
13767
+ otcs_filter_item_depth = data_source.get("otcs_filter_item_depth", None)
13768
+ # Filter workspace by category name (only consider items as workspace if they have the category):
13769
+ otcs_filter_item_category = data_source.get("otcs_filter_item_category", None)
13770
+ # Filter workspace by attribute values (only consider items as workspace if they have the attributes with the defined values):
13771
+ otcs_filter_item_attributes = data_source.get(
13772
+ "otcs_filter_item_attributes", None
13773
+ )
13774
+
12828
13775
  if not otcs_root_node_id:
12829
13776
  logger.error(
12830
13777
  "Content Server root node ID for traversal is missing in payload of bulk data source. Cannot load data!"
12831
13778
  )
12832
13779
  return None
12833
13780
 
13781
+ # Ensure Root_node_id is a list of integers
13782
+ if not isinstance(otcs_root_node_id, list):
13783
+ otcs_root_node_id = [otcs_root_node_id]
13784
+ otcs_root_node_id = [int(item) for item in otcs_root_node_id]
13785
+
12834
13786
  logger.info(
12835
13787
  "Loading data from Content Server (folder, workspaces, items) from root ID -> %s.",
12836
13788
  otcs_root_node_id,
12837
13789
  )
12838
13790
 
12839
- # 2. Creating the ServiceNow object:
13791
+ # 2. Creating the OTCS object:
12840
13792
  self._otcs_source = OTCS(
12841
13793
  protocol=otcs_protocol,
12842
13794
  hostname=otcs_hostname,
@@ -12848,19 +13800,24 @@ class Payload:
12848
13800
  download_dir=otcs_download_dir,
12849
13801
  )
12850
13802
 
12851
- # 3. Authenticate at ServiceNow
13803
+ # 3. Authenticate at OTCS
12852
13804
  self._otcs_source.authenticate()
12853
13805
 
12854
13806
  # 4. Load the Content Server data into the Data object (Pandas DataFrame):
12855
- if not self._otcs_source.load_items(
12856
- node_id=otcs_root_node_id,
12857
- filter_workspace_depth=otcs_filter_workspace_depth,
12858
- filter_workspace_subtypes=otcs_filter_workspace_subtypes,
12859
- filter_workspace_category=otcs_filter_workspace_category,
12860
- filter_workspace_attributes=otcs_filter_workspace_attributes,
12861
- ):
12862
- logger.error("Failure during load of Content Server items!")
12863
- return None
13807
+
13808
+ for root_node in otcs_root_node_id:
13809
+ if not self._otcs_source.load_items(
13810
+ node_id=root_node,
13811
+ filter_workspace_depth=otcs_filter_workspace_depth,
13812
+ filter_workspace_subtypes=otcs_filter_workspace_subtypes,
13813
+ filter_workspace_category=otcs_filter_workspace_category,
13814
+ filter_workspace_attributes=otcs_filter_workspace_attributes,
13815
+ filter_item_depth=otcs_filter_item_depth,
13816
+ filter_item_category=otcs_filter_item_category,
13817
+ filter_item_attributes=otcs_filter_item_attributes,
13818
+ ):
13819
+ logger.error("Failure during load of Content Server items!")
13820
+ return None
12864
13821
  data = self._otcs_source.get_data()
12865
13822
  if not data:
12866
13823
  logger.error("Failure during load of Content Server items! No data loaded!")
@@ -12880,7 +13837,7 @@ class Payload:
12880
13837
  Data: Data class that includes a Pandas DataFrame
12881
13838
 
12882
13839
  Side Effects:
12883
- self._servicenow is set to the PHT object created by this method
13840
+ self._servicenow is set to the ServiceNow object created by this method
12884
13841
  """
12885
13842
 
12886
13843
  # 1. Read and validate values from the data source payload:
@@ -12898,7 +13855,11 @@ class Payload:
12898
13855
  sn_table_name = data_source.get(
12899
13856
  "sn_table_name", "u_kb_template_technical_article_public"
12900
13857
  )
13858
+ sn_queries = data_source.get("sn_queries", [])
12901
13859
  sn_query = data_source.get("sn_query", None)
13860
+ if sn_query is not None:
13861
+ sn_queries.append({"table": sn_table_name, "query": sn_query})
13862
+
12902
13863
  sn_thread_number = data_source.get("sn_thread_number", BULK_THREAD_NUMBER)
12903
13864
  sn_download_dir = data_source.get("sn_download_dir", "/data/knowledgebase")
12904
13865
  if (
@@ -12920,11 +13881,6 @@ class Payload:
12920
13881
  )
12921
13882
  return None
12922
13883
 
12923
- logger.info(
12924
- "Loading data from ServiceNow (Knowledge Base Articles) with query -> '%s'",
12925
- sn_query,
12926
- )
12927
-
12928
13884
  # 2. Creating the ServiceNow object:
12929
13885
  self._servicenow = ServiceNow(
12930
13886
  base_url=sn_base_url,
@@ -12946,11 +13902,29 @@ class Payload:
12946
13902
  logger.info("Successfully authenticated at ServiceNow -> %s", sn_base_url)
12947
13903
 
12948
13904
  # 4. Load the ServiceNow data into the Data object (Pandas DataFrame):
12949
- if not self._servicenow.load_articles(table_name=sn_table_name, query=sn_query):
12950
- logger.error("Failure during load of ServiceNow articles!")
12951
- return None
13905
+ for item in sn_queries:
13906
+ sn_table_name = item["sn_table_name"]
13907
+ sn_query = item["sn_query"]
13908
+
13909
+ logger.info(
13910
+ "Loading data from ServiceNow table -> '%s' with query -> '%s'",
13911
+ sn_table_name,
13912
+ sn_query,
13913
+ )
13914
+
13915
+ if not self._servicenow.load_articles(
13916
+ table_name=sn_table_name, query=sn_query
13917
+ ):
13918
+ logger.error(
13919
+ "Failure during load of ServiceNow articles from table -> '%s' using query -> '%s' !",
13920
+ sn_table_name,
13921
+ sn_query,
13922
+ )
13923
+ continue
13924
+
12952
13925
  data = self._servicenow.get_data()
12953
- if not data:
13926
+
13927
+ if data is None:
12954
13928
  logger.error(
12955
13929
  "Failure during load of ServiceNow articles! No articles loaded!"
12956
13930
  )
@@ -13336,43 +14310,43 @@ class Payload:
13336
14310
  match data_source_type:
13337
14311
  case "excel":
13338
14312
  data = self.process_bulk_datasource_excel(data_source=data_source)
13339
- if not data:
14313
+ if data is None:
13340
14314
  logger.error("Failure during load of ServiceNow articles!")
13341
14315
  return None
13342
14316
  case "servicenow":
13343
14317
  data = self.process_bulk_datasource_servicenow(data_source=data_source)
13344
- if not data:
14318
+ if data is None:
13345
14319
  logger.error("Failure during load of ServiceNow articles!")
13346
14320
  return None
13347
14321
  case "otmm":
13348
14322
  data = self.process_bulk_datasource_otmm(data_source=data_source)
13349
- if not data:
14323
+ if data is None:
13350
14324
  logger.error(
13351
14325
  "Failure during load of OpenText Media Management assets!"
13352
14326
  )
13353
14327
  return None
13354
14328
  case "otcs":
13355
14329
  data = self.process_bulk_datasource_otcs(data_source=data_source)
13356
- if not data:
14330
+ if data is None:
13357
14331
  logger.error(
13358
14332
  "Failure during load of OpenText Content Server items!"
13359
14333
  )
13360
14334
  return None
13361
14335
  case "pht":
13362
14336
  data = self.process_bulk_datasource_pht(data_source=data_source)
13363
- if not data:
14337
+ if data is None:
13364
14338
  logger.error(
13365
14339
  "Failure during load of OpenText Product Hierarchy (PHT)!"
13366
14340
  )
13367
14341
  return None
13368
14342
  case "json":
13369
14343
  data = self.process_bulk_datasource_json(data_source=data_source)
13370
- if not data:
14344
+ if data is None:
13371
14345
  logger.error("Failure during load of JSON data source!")
13372
14346
  return None
13373
14347
  case "xml":
13374
14348
  data = self.process_bulk_datasource_xml(data_source=data_source)
13375
- if not data:
14349
+ if data is None:
13376
14350
  logger.error("Failure during load of XML data source!")
13377
14351
  return None
13378
14352
  case _:
@@ -13381,6 +14355,10 @@ class Payload:
13381
14355
  )
13382
14356
  return None
13383
14357
 
14358
+ if data.get_data_frame().empty:
14359
+ logger.warning("Data source is empty - nothing loaded.")
14360
+ return None
14361
+
13384
14362
  logger.info(
13385
14363
  "Data Frame for source -> '%s' has %s row(s) and %s column(s) after data loading.",
13386
14364
  data_source_name,
@@ -13392,6 +14370,8 @@ class Payload:
13392
14370
  columns_to_drop = data_source.get("columns_to_drop", [])
13393
14371
  columns_to_keep = data_source.get("columns_to_keep", [])
13394
14372
  columns_to_add = data_source.get("columns_to_add", [])
14373
+ columns_to_add_list = data_source.get("columns_to_add_list", [])
14374
+ columns_to_add_table = data_source.get("columns_to_add_table", [])
13395
14375
  conditions = data_source.get("conditions", [])
13396
14376
  explosions = data_source.get("explosions", [])
13397
14377
 
@@ -13413,12 +14393,39 @@ class Payload:
13413
14393
  group_separator=add_column.get("group_separator", "."),
13414
14394
  )
13415
14395
 
13416
- # Drop columns if specified in data_source:
13417
- if columns_to_drop:
13418
- data.drop_columns(columns_to_drop)
13419
-
13420
- # Keep only selected columns if specified in data_source:
13421
- if columns_to_keep:
14396
+ # Add columns with list values from a list of other columns
14397
+ # if specified in data_source:
14398
+ for add_column in columns_to_add_list:
14399
+ if not "source_columns" in add_column or not "name" in add_column:
14400
+ logger.error(
14401
+ "Add list columns is missing name or source columns. Column will not be added."
14402
+ )
14403
+ continue
14404
+ data.add_column_list(
14405
+ source_columns=add_column["source_columns"],
14406
+ new_column=add_column["name"],
14407
+ )
14408
+
14409
+ # Add columns with list values from a list of other columns
14410
+ # if specified in data_source:
14411
+ for add_column in columns_to_add_table:
14412
+ if not "source_columns" in add_column or not "name" in add_column:
14413
+ logger.error(
14414
+ "Add table columns is missing name or source columns. Column will not be added."
14415
+ )
14416
+ continue
14417
+ data.add_column_table(
14418
+ source_columns=add_column["source_columns"],
14419
+ new_column=add_column["name"],
14420
+ delimiter=add_column.get("list_splitter", ","),
14421
+ )
14422
+
14423
+ # Drop columns if specified in data_source:
14424
+ if columns_to_drop:
14425
+ data.drop_columns(columns_to_drop)
14426
+
14427
+ # Keep only selected columns if specified in data_source:
14428
+ if columns_to_keep:
13422
14429
  data.keep_columns(columns_to_keep)
13423
14430
 
13424
14431
  # cleanup data if specified in data_source
@@ -13441,6 +14448,7 @@ class Payload:
13441
14448
  explode_field = explosion["explode_field"]
13442
14449
  flatten_fields = explosion.get("flatten_fields", [])
13443
14450
  split_string_to_list = explosion.get("split_string_to_list", False)
14451
+ list_splitter = explosion.get("list_splitter", None)
13444
14452
  logger.info(
13445
14453
  "Starting explosion of data source '%s' by field(s) -> '%s' (type -> '%s'). Size of data set before explosion -> %s",
13446
14454
  data_source_name,
@@ -13453,6 +14461,8 @@ class Payload:
13453
14461
  flatten_fields=flatten_fields,
13454
14462
  make_unique=False,
13455
14463
  split_string_to_list=split_string_to_list,
14464
+ separator=list_splitter,
14465
+ reset_index=True,
13456
14466
  )
13457
14467
  logger.info("Size of data set after explosion -> %s", str(len(data)))
13458
14468
 
@@ -13545,12 +14555,24 @@ class Payload:
13545
14555
  char="-",
13546
14556
  )
13547
14557
 
14558
+ copy_data_source = bulk_workspace.get("copy_data_source", False)
13548
14559
  force_reload = bulk_workspace.get("force_reload", True)
13549
14560
 
13550
14561
  # Load and prepare the data source for the bulk processing:
13551
- data = self.process_bulk_datasource(
13552
- data_source_name=data_source_name, force_reload=force_reload
13553
- )
14562
+ if copy_data_source:
14563
+ logger.info(
14564
+ "Take a copy of data source -> %s to avoid side-effects for repeative usage of the data source...",
14565
+ data_source_name,
14566
+ )
14567
+ data = Data(
14568
+ self.process_bulk_datasource(
14569
+ data_source_name=data_source_name, force_reload=force_reload
14570
+ )
14571
+ )
14572
+ else:
14573
+ data = self.process_bulk_datasource(
14574
+ data_source_name=data_source_name, force_reload=force_reload
14575
+ )
13554
14576
  if not data:
13555
14577
  logger.error(
13556
14578
  "Failed to load data source for bulk workspace type -> '%s'",
@@ -13558,6 +14580,46 @@ class Payload:
13558
14580
  )
13559
14581
  continue
13560
14582
 
14583
+ # Check if fields with list substructures should be exploded.
14584
+ # We may want to do this outside the bulkDatasource to only
14585
+ # explode for bulkDocuments and not for bulkWorkspaces or
14586
+ # bulkWorkspaceRelationships:
14587
+ explosions = bulk_workspace.get("explosions", [])
14588
+ for explosion in explosions:
14589
+ # explode field can be a string or a list
14590
+ # exploding multiple fields at once avoids
14591
+ # combinatorial explosions - this is VERY
14592
+ # different from exploding columns one after the other!
14593
+ if (
14594
+ not "explode_field" in explosion
14595
+ and not "explode_fields" in explosion
14596
+ ):
14597
+ logger.error("Missing explosion field(s)!")
14598
+ continue
14599
+ # we want to be backwards compatible...
14600
+ if "explode_field" in explosion:
14601
+ explode_fields = explosion["explode_field"]
14602
+ else:
14603
+ explode_fields = explosion["explode_fields"]
14604
+ flatten_fields = explosion.get("flatten_fields", [])
14605
+ split_string_to_list = explosion.get("split_string_to_list", False)
14606
+ list_splitter = explosion.get("list_splitter", None)
14607
+ logger.info(
14608
+ "Starting explosion of bulk workspaces by field(s) -> %s (type -> %s). Size of data set before explosion -> %s",
14609
+ explode_fields,
14610
+ type(explode_fields),
14611
+ str(len(data)),
14612
+ )
14613
+ data.explode_and_flatten(
14614
+ explode_field=explode_fields,
14615
+ flatten_fields=flatten_fields,
14616
+ make_unique=False,
14617
+ split_string_to_list=split_string_to_list,
14618
+ separator=list_splitter,
14619
+ reset_index=True,
14620
+ )
14621
+ logger.info("Size of data set after explosion -> %s", str(len(data)))
14622
+
13561
14623
  # Check if duplicate lines for given fields should be removed:
13562
14624
  if "unique" in bulk_workspace and bulk_workspace["unique"]:
13563
14625
  unique_fields = bulk_workspace["unique"]
@@ -13627,24 +14689,29 @@ class Payload:
13627
14689
 
13628
14690
  # check if the template to be used is specified in the payload:
13629
14691
  if "template_name" in bulk_workspace:
13630
- template_name = bulk_workspace["template_name"]
14692
+ template_name_field = bulk_workspace["template_name"]
13631
14693
  workspace_template = next(
13632
14694
  (
13633
14695
  item
13634
14696
  for item in workspace_type["templates"]
13635
- if item["name"] == template_name
14697
+ if item["name"] == template_name_field
13636
14698
  ),
13637
14699
  None,
13638
14700
  )
13639
14701
  if workspace_template: # does this template exist?
13640
14702
  logger.info(
13641
14703
  "Workspace Template -> '%s' has been specified in payload and it does exist.",
13642
- template_name,
14704
+ template_name_field,
14705
+ )
14706
+ elif "{" in template_name_field and "}" in template_name_field:
14707
+ logger.info(
14708
+ "Workspace Template -> '%s' has been specified in payload and contains placeholders, validation cannot be performed.",
14709
+ template_name_field,
13643
14710
  )
13644
14711
  else:
13645
14712
  logger.error(
13646
14713
  "Workspace Template -> '%s' has been specified in payload but it doesn't exist!",
13647
- template_name,
14714
+ template_name_field,
13648
14715
  )
13649
14716
  logger.error(
13650
14717
  "Workspace Type -> '%s' has only these templates -> %s",
@@ -13656,15 +14723,12 @@ class Payload:
13656
14723
  # template to be used is NOT specified in the payload - then we just take the first one:
13657
14724
  else:
13658
14725
  workspace_template = workspace_type["templates"][0]
14726
+ template_name_field = None
13659
14727
  logger.info(
13660
14728
  "Workspace Template has not been specified in payload - we just take the first one (%s)",
13661
14729
  workspace_template,
13662
14730
  )
13663
14731
 
13664
- template_id = workspace_template["id"]
13665
- template_name = workspace_template["name"]
13666
- workspace_type_id = workspace_type["id"]
13667
-
13668
14732
  if not "categories" in bulk_workspace:
13669
14733
  logger.info(
13670
14734
  "Bulk workspace payload has no category data! Will leave category attributes empty..."
@@ -13674,15 +14738,14 @@ class Payload:
13674
14738
  categories = bulk_workspace["categories"]
13675
14739
 
13676
14740
  # Should existing workspaces be updated? No is the default.
13677
- enforce_updates = bulk_workspace.get("enforce_updates", False)
14741
+ operations = bulk_workspace.get("operations", ["create"])
13678
14742
 
13679
14743
  logger.info(
13680
- "Bulk create Workspaces (name field -> %s, type -> '%s') from workspace template -> '%s' (%s). Enforce Updates -> %s.",
14744
+ "Bulk create Workspaces (name field -> %s, type -> '%s') from workspace template -> '%s'. Operations -> %s.",
13681
14745
  workspace_name_field,
13682
14746
  type_name,
13683
- template_name,
13684
- template_id,
13685
- str(enforce_updates),
14747
+ template_name_field,
14748
+ str(operations),
13686
14749
  )
13687
14750
 
13688
14751
  # see if bulkWorkspace definition has a specific thread number
@@ -13706,12 +14769,12 @@ class Payload:
13706
14769
  self.process_bulk_workspaces_worker,
13707
14770
  bulk_workspace,
13708
14771
  partition,
13709
- template_id,
13710
- workspace_type_id,
14772
+ workspace_type,
14773
+ template_name_field,
13711
14774
  workspace_name_field,
13712
14775
  workspace_description_field,
13713
14776
  categories,
13714
- enforce_updates,
14777
+ operations,
13715
14778
  results,
13716
14779
  ),
13717
14780
  )
@@ -13731,20 +14794,24 @@ class Payload:
13731
14794
  for result in results:
13732
14795
  if not result["success"]:
13733
14796
  logger.info(
13734
- "Thread -> %s completed with %s failed, %s skipped, and %s created %s workspaces.",
14797
+ "Thread -> %s completed with %s failed, %s skipped, %s created, %s updated, and %s deleted '%s' workspaces.",
13735
14798
  str(result["thread_id"]),
13736
14799
  str(result["failure_counter"]),
13737
14800
  str(result["skipped_counter"]),
13738
- str(result["success_counter"]),
14801
+ str(result["create_counter"]),
14802
+ str(result["update_counter"]),
14803
+ str(result["delete_counter"]),
13739
14804
  type_name,
13740
14805
  )
13741
14806
  success = False
13742
14807
  else:
13743
14808
  logger.info(
13744
- "Thread -> %s completed successful with %s skipped, and %s created %s workspaces.",
14809
+ "Thread -> %s completed successful with %s skipped, %s created, %s updated, and %s deleted '%s' workspaces.",
13745
14810
  str(result["thread_id"]),
13746
14811
  str(result["skipped_counter"]),
13747
- str(result["success_counter"]),
14812
+ str(result["create_counter"]),
14813
+ str(result["update_counter"]),
14814
+ str(result["delete_counter"]),
13748
14815
  type_name,
13749
14816
  )
13750
14817
  # Record all generated workspaces. If this should allow us
@@ -13761,7 +14828,8 @@ class Payload:
13761
14828
  def process_bulk_categories(
13762
14829
  self, row: pd.Series, index: str, categories: list, replacements: list
13763
14830
  ) -> list:
13764
- """Helper method to replace the value placeholders the bulk category structures with the Pandas Series (row)
14831
+ """Helper method to replace the value placeholders the bulk category structures
14832
+ in the payload with values from the Pandas Series (row)
13765
14833
 
13766
14834
  Args:
13767
14835
  row (pd.Series): current row
@@ -13775,10 +14843,85 @@ class Payload:
13775
14843
  # list and dicts are "mutable" data structures in Python!
13776
14844
  worker_categories = copy.deepcopy(categories)
13777
14845
 
14846
+ # In this first loop we expand table-value attributes into a new
14847
+ # list of category / attribute payload. The value of table-value attributes
14848
+ # is a list of dictionaries (in a string we evaluate into a Python
14849
+ # datastructure)
14850
+ worker_categories_expanded = []
14851
+ for category_item in worker_categories:
14852
+ if "value_type" in category_item and category_item["value_type"] == "table":
14853
+ value_field = category_item["value_field"]
14854
+
14855
+ # The following method always returns a string even if the value is actually a list.
14856
+ # TODO: consider to change replace_bulk_placeholders to return "str | list".
14857
+ # But this may be difficult as we still want to support multiple placeholders in one string...
14858
+ value = self.replace_bulk_placeholders(
14859
+ input_string=value_field,
14860
+ row=row,
14861
+ index=None,
14862
+ replacements=replacements,
14863
+ )
14864
+ if not value:
14865
+ logger.warning(
14866
+ "Value table-type attribute is empty (value field -> %s). Cannot parse table. Skipping...",
14867
+ value_field,
14868
+ )
14869
+ continue
14870
+
14871
+ try:
14872
+ value_table = literal_eval(value)
14873
+ except (SyntaxError, ValueError) as e:
14874
+ logger.error(
14875
+ "Cannot parse table-type attribute; value field -> %s; error -> %s",
14876
+ value_field,
14877
+ str(e),
14878
+ )
14879
+ continue
14880
+
14881
+ if not isinstance(value_table, list):
14882
+ logger.error("Table-type value requires a list of dictionaries!")
14883
+ continue
14884
+
14885
+ # Get the mapping of the loader generated columns in the Data Frame to the
14886
+ # attribute names in the target OTCS category. If no mapping
14887
+ # is in the payload, then it is assumed that the category
14888
+ # attribute names are identical to the column names in the Data Frame
14889
+ #
14890
+ # Example mapping:
14891
+ #
14892
+ # attribute_mapping = {
14893
+ # "Application": "u_product_model",
14894
+ # "Version": "u_version_name"
14895
+ # }
14896
+
14897
+ attribute_mapping = category_item.get("attribute_mapping", None)
14898
+
14899
+ row_index = 1
14900
+ for value_row in value_table:
14901
+ for key, value in value_row.items():
14902
+ attribute = {}
14903
+ attribute["name"] = category_item.get("name", "")
14904
+ attribute["set"] = category_item.get("set", "")
14905
+ attribute["row"] = row_index
14906
+ # check if we have a mapping for this attribute in the payload:
14907
+ if attribute_mapping and key in attribute_mapping:
14908
+ attribute["attribute"] = attribute_mapping[key]
14909
+ else:
14910
+ attribute["attribute"] = key
14911
+ # For tables values can be None if the number of
14912
+ # list items in the source columns are not equal
14913
+ # To avoid the warning below we set the value to empty string
14914
+ # if it is None:
14915
+ attribute["value"] = value if value is not None else ""
14916
+ worker_categories_expanded.append(attribute)
14917
+ row_index += 1
14918
+ else:
14919
+ worker_categories_expanded.append(category_item)
14920
+
13778
14921
  # this loop generates "value" for each
13779
14922
  # "value_field". "value_field" may also contain lists
13780
14923
  # that are either delimited by [...] or by a "value_type" with value "list"
13781
- for category_item in worker_categories:
14924
+ for category_item in worker_categories_expanded:
13782
14925
  if not "attribute" in category_item:
13783
14926
  logger.error(
13784
14927
  "Category item -> %s is missing the attribute field!",
@@ -13813,36 +14956,72 @@ class Payload:
13813
14956
 
13814
14957
  # if we don't have a value now, then there's an issue:
13815
14958
  if value is None:
13816
- logger.error(
14959
+ value = ""
14960
+ logger.warning(
13817
14961
  "Category item needs either a value or value_field! Skipping attribute -> '%s'",
13818
14962
  category_item["attribute"],
13819
14963
  )
14964
+
14965
+ # We have an empty string value (this is different from None!)
14966
+ if value == "":
14967
+ category_item["value"] = value
14968
+ # We can continue as any further processing (below) does not make sense for an empty string value:
13820
14969
  continue
13821
14970
 
13822
- # Handle this special case where we get a string that actually represents a list.
13823
- # Convert it back to a real list:
13824
- is_list = False
13825
- if category_item.get("value_type", "string") == "list":
14971
+ # This variable should only be true if we don't have
14972
+ # a native python string but a delimiter separated
14973
+ # value list in a string, e.g. "a, b, c" or "a | b | c" or "x;y;z":
14974
+ is_list_in_string = False
14975
+
14976
+ # The datasource loader may have written a real python list into the value
14977
+ # In this case the value includes square brackets [...]
14978
+ if value.startswith("[") and value.endswith("]"):
14979
+ # Remove the square brackets and declare it is a list!
14980
+ try:
14981
+ value = literal_eval(value)
14982
+ except (SyntaxError, ValueError) as e:
14983
+ logger.warning(
14984
+ "Cannot directly parse list-type attribute; value field -> %s; error -> %s. Try string processing...",
14985
+ value_field,
14986
+ str(e),
14987
+ )
14988
+ logger.warning(
14989
+ "Value string -> %s has [...] - remove brackets and interpret as delimiter separated values in a string...",
14990
+ value,
14991
+ )
14992
+ # In this failure case we try to remove the square brackets and hope the inner part
14993
+ # can be treated as a string of values delimited with a delimiter (e.g. comma or semicolon)
14994
+ value = value.strip("[]")
14995
+ is_list_in_string = True
14996
+
14997
+ # Handle this special case where we get a string that actually represents a date time format and convert it.
14998
+ if category_item.get("value_type", "string") == "datetime":
13826
14999
  # if it is explicitly declared
15000
+ old_value = value
15001
+ date_obj = parse(value)
15002
+ value = datetime.strftime(date_obj, "%Y-%m-%dT%H:%M:%SZ")
15003
+
13827
15004
  logger.debug(
13828
- "Value -> %s is declared in payload to be a list (items separated by comma or semicolon)",
15005
+ "Attribute -> %s is declared in payload to be a datetime (convert format). Converting from -> %s to -> %s",
15006
+ category_item.get("attribute"),
15007
+ old_value,
13829
15008
  value,
13830
15009
  )
13831
- is_list = True
13832
- # also values not declared as lists may include lists indicated by [...]
13833
- # also if value_type == "list" we double-check that no [...] are around the values:
13834
- if value.startswith("[") and value.endswith("]"):
13835
- # Remove the square brackets and declare it is a list!
13836
- logger.debug(
13837
- "Value string -> %s has [...] - remove brackets...",
15010
+
15011
+ # Handle special case where we get a string that actually represents a list but is
15012
+ # not yet a python list.
15013
+ # This requires that value_type == "list" we double-check that no [...] are around the values:
15014
+ # Convert it back to a real list:
15015
+ if (
15016
+ category_item.get("value_type", "string") == "list" or is_list_in_string
15017
+ ) and isinstance(value, str):
15018
+ logger.info(
15019
+ "Value -> %s is declared in payload to be a list (items separated by comma or semicolon) or the python list evaluation failed.",
13838
15020
  value,
13839
15021
  )
13840
- value = value.strip("[]")
13841
- is_list = True
13842
- if is_list:
13843
15022
  # we split the string at commas or semicolons:
13844
15023
  list_splitter = category_item.get("list_splitter", ";,")
13845
- logger.debug(
15024
+ logger.info(
13846
15025
  "Split value string -> %s after these characters -> '%s'",
13847
15026
  value,
13848
15027
  list_splitter,
@@ -13860,7 +15039,7 @@ class Payload:
13860
15039
  # Remove the quotes around each element
13861
15040
  elements = [element.strip("'") for element in elements]
13862
15041
  value = elements
13863
- logger.debug(
15042
+ logger.info(
13864
15043
  "Found list for a multi-value category attribute -> '%s' from field -> '%s' in data row -> %s. Value -> %s",
13865
15044
  category_item["attribute"],
13866
15045
  value_field,
@@ -13869,6 +15048,9 @@ class Payload:
13869
15048
  )
13870
15049
  # now we check if there's a data lookup configured in the payload:
13871
15050
  lookup_data_source = category_item.get("lookup_data_source", None)
15051
+ # Do we want to drop / clear values that fail to lookup?
15052
+ drop_value = category_item.get("lookup_data_failure_drop", False)
15053
+
13872
15054
  if lookup_data_source:
13873
15055
  logger.info(
13874
15056
  "Found lookup data source -> '%s' for attribute -> '%s'. Processing...",
@@ -13891,18 +15073,31 @@ class Payload:
13891
15073
  )
13892
15074
  value = synonym
13893
15075
  else:
13894
- logger.warning(
13895
- "Cannot lookup the value for attribute -> '%s' and value -> '%s' in data source -> '%s'. Keep existing value.",
13896
- category_item["attribute"],
13897
- value,
13898
- lookup_data_source,
13899
- )
15076
+ if drop_value:
15077
+ logger.warning(
15078
+ "Cannot lookup the value for attribute -> '%s' and value -> '%s' in data source -> '%s'. Clear existing value.",
15079
+ category_item["attribute"],
15080
+ value,
15081
+ lookup_data_source,
15082
+ )
15083
+ # Clear the value:
15084
+ value = ""
15085
+ else:
15086
+ logger.warning(
15087
+ "Cannot lookup the value for attribute -> '%s' and value -> '%s' in data source -> '%s'. Keep existing value.",
15088
+ category_item["attribute"],
15089
+ value,
15090
+ lookup_data_source,
15091
+ )
13900
15092
  else:
13901
15093
  # value is a list - so we apply the lookup to each item:
13902
- for i, s in enumerate(value):
15094
+ # Iterate backwards to avoid index issues while popping items:
15095
+ for i in range(len(value) - 1, -1, -1):
15096
+ # Make sure the value does not have leading or trailing spaces:
15097
+ value[i] = value[i].strip()
13903
15098
  (_, synonym) = self.process_bulk_workspaces_synonym_lookup(
13904
15099
  data_source_name=lookup_data_source,
13905
- workspace_name_synonym=s,
15100
+ workspace_name_synonym=value[i],
13906
15101
  workspace_type=None, # we don't need the workspace ID, just the workspace name
13907
15102
  )
13908
15103
  if synonym:
@@ -13915,12 +15110,22 @@ class Payload:
13915
15110
  )
13916
15111
  value[i] = synonym
13917
15112
  else:
13918
- logger.warning(
13919
- "Cannot lookup the value for attribute -> '%s' and value -> '%s' in data source -> '%s'. Keep existing value.",
13920
- category_item["attribute"],
13921
- value[i],
13922
- lookup_data_source,
13923
- )
15113
+ if drop_value:
15114
+ logger.warning(
15115
+ "Cannot lookup the value -> '%s' for attribute -> '%s' in data source -> '%s'. Drop existing value from list.",
15116
+ value[i],
15117
+ category_item["attribute"],
15118
+ lookup_data_source,
15119
+ )
15120
+ # Remove the list item we couldn't lookup as drop_value is True:
15121
+ value.pop(i)
15122
+ else:
15123
+ logger.warning(
15124
+ "Cannot lookup the value -> '%s' for attribute -> '%s' in data source -> '%s'. Keep existing value.",
15125
+ value[i],
15126
+ category_item["attribute"],
15127
+ lookup_data_source,
15128
+ )
13924
15129
  if value_field:
13925
15130
  logger.debug(
13926
15131
  "Reading category attribute -> '%s' from field -> '%s' in data row -> %s. Value -> %s",
@@ -13940,23 +15145,26 @@ class Payload:
13940
15145
 
13941
15146
  # cleanup categories_payload to remove empty rows of sets:
13942
15147
  rows_to_remove = {}
13943
- for attribute in worker_categories:
15148
+ for attribute in worker_categories_expanded:
13944
15149
  if attribute.get("row") is not None:
13945
15150
  set_name = attribute["set"]
13946
15151
  row_number = attribute["row"]
13947
15152
  value = attribute["value"]
13948
15153
 
13949
15154
  # If value is empty, track it for removal
13950
- if not value: # Treat empty strings or None as empty
15155
+ if (
15156
+ not value or value == [""] or value == ""
15157
+ ): # Treat empty strings or None as empty
13951
15158
  if (set_name, row_number) not in rows_to_remove:
13952
15159
  rows_to_remove[(set_name, row_number)] = True
13953
15160
  else:
13954
15161
  # If any value in the row is not empty, mark the row as not removable
13955
15162
  rows_to_remove[(set_name, row_number)] = False
15163
+
13956
15164
  logger.debug("Empty Rows to remove from sets: %s", rows_to_remove)
13957
15165
  cleaned_categories = [
13958
15166
  item
13959
- for item in worker_categories
15167
+ for item in worker_categories_expanded
13960
15168
  if "set" not in item
13961
15169
  or "row" not in item
13962
15170
  or not rows_to_remove.get((item["set"], item["row"]), False)
@@ -13970,12 +15178,12 @@ class Payload:
13970
15178
  self,
13971
15179
  bulk_workspace: dict,
13972
15180
  partition: pd.DataFrame,
13973
- template_id: int,
13974
- workspace_type_id: int,
15181
+ workspace_type: dict,
15182
+ template_name_field: str | None,
13975
15183
  workspace_name_field: str,
13976
15184
  workspace_description_field: str,
13977
15185
  categories: list | None = None,
13978
- enforce_updates: bool = False,
15186
+ operations: list | None = None,
13979
15187
  results: list | None = None,
13980
15188
  ):
13981
15189
  """This is the thread worker to create workspaces in bulk.
@@ -13989,7 +15197,7 @@ class Payload:
13989
15197
  workspace_name_field (str): Field where the workspace name is stored
13990
15198
  workspace_description_field (str): Field where the workspace description is stored
13991
15199
  categories (list): list of category dictionieres
13992
- enforce_updates (bool): should existing workspaces be updated with new metadata?
15200
+ operations (list): which operations should be applyed on workspaces: "create", "update", "delete", "recreate"
13993
15201
  results (list): mutable list of thread results
13994
15202
  """
13995
15203
 
@@ -14000,18 +15208,30 @@ class Payload:
14000
15208
  str(len(partition)),
14001
15209
  )
14002
15210
 
15211
+ # Avoid linter warnings - so make parameter default None while we
15212
+ # actually want ["create"] to be the default:
15213
+ if operations is None:
15214
+ operations = ["create"]
15215
+
14003
15216
  result = {}
14004
15217
  result["thread_id"] = thread_id
14005
15218
  result["success_counter"] = 0
14006
15219
  result["failure_counter"] = 0
14007
15220
  result["skipped_counter"] = 0
15221
+ result["create_counter"] = 0
15222
+ result["update_counter"] = 0
15223
+ result["delete_counter"] = 0
14008
15224
  result["workspaces"] = {}
14009
15225
  result["success"] = True
14010
15226
 
14011
15227
  # Check if workspaces have been processed before, e.i. testing
14012
15228
  # if a "workspaces" key exists and if it is pointing to a non-empty list.
14013
15229
  # Additionally we check that workspace updates are not enforced:
14014
- if bulk_workspace.get("workspaces", None) and not enforce_updates:
15230
+ if (
15231
+ bulk_workspace.get("workspaces", None)
15232
+ and "update" not in operations
15233
+ and "delete" not in operations
15234
+ ):
14015
15235
  existing_workspaces = bulk_workspace["workspaces"]
14016
15236
  logger.info(
14017
15237
  "Found %s already processed workspaces. Try to complete the job...",
@@ -14049,14 +15269,56 @@ class Payload:
14049
15269
  # Process all datasets in the partion that was given to the thread:
14050
15270
  for index, row in partition.iterrows():
14051
15271
 
14052
- # clear variables to esure clean state
14053
- workspace_id = None
14054
-
14055
15272
  logger.info(
14056
15273
  "Processing data row -> %s for bulk workspace creation...",
14057
15274
  str(index),
14058
15275
  )
14059
15276
 
15277
+ workspace_template = None
15278
+ if template_name_field is None:
15279
+ workspace_template = workspace_type["templates"][0]
15280
+
15281
+ else:
15282
+ workspace_template_name = self.replace_bulk_placeholders(
15283
+ input_string=template_name_field,
15284
+ row=row,
15285
+ replacements=replacements,
15286
+ )
15287
+
15288
+ workspace_template = next(
15289
+ (
15290
+ item
15291
+ for item in workspace_type["templates"]
15292
+ if item["name"] == workspace_template_name
15293
+ ),
15294
+ None,
15295
+ )
15296
+
15297
+ if workspace_template is None:
15298
+ logger.error(
15299
+ "Workspace Template -> '%s' has been specified in payload but it doesn't exist!",
15300
+ workspace_template_name,
15301
+ )
15302
+ logger.error(
15303
+ "Workspace Type -> '%s' has only these templates -> %s",
15304
+ workspace_type["name"],
15305
+ workspace_type["templates"],
15306
+ )
15307
+ result["success"] = False
15308
+ result["failure_counter"] += 1
15309
+ continue
15310
+
15311
+ template_id = workspace_template["id"]
15312
+ template_name = workspace_template["name"]
15313
+ workspace_type_id = workspace_type["id"]
15314
+
15315
+ # clear variables to esure clean state
15316
+ workspace_id = None
15317
+
15318
+ # Create a copy of the mutable operations list as we may
15319
+ # want to modify it:
15320
+ row_operations = list(operations)
15321
+
14060
15322
  # Determine the workspace name:
14061
15323
  workspace_name = self.replace_bulk_placeholders(
14062
15324
  input_string=workspace_name_field,
@@ -14073,6 +15335,8 @@ class Payload:
14073
15335
  continue
14074
15336
  # Workspace names for sure are not allowed to have ":":
14075
15337
  workspace_name = workspace_name.replace(":", "")
15338
+ # Workspace names for sure should not have leading or trailing spaces:
15339
+ workspace_name = workspace_name.strip()
14076
15340
  # Truncate the workspace name to 254 characters which is the maximum allowed length in Extended ECM
14077
15341
  if len(workspace_name) > 254:
14078
15342
  workspace_name = workspace_name[:254]
@@ -14109,12 +15373,54 @@ class Payload:
14109
15373
  )
14110
15374
  if not evaluated_condition:
14111
15375
  logger.info(
14112
- "Condition for row -> %s not met. Skipping row for workspace creation",
15376
+ "Condition for bulk workspace row -> %s not met. Skipping row for workspace creation",
14113
15377
  str(index),
14114
15378
  )
14115
15379
  result["skipped_counter"] += 1
14116
15380
  continue
14117
15381
 
15382
+ # Check if all data conditions to create or recreate the workspace are met:
15383
+ if "create" in row_operations or "recreate" in row_operations:
15384
+ conditions_create = bulk_workspace.get("conditions_create", None)
15385
+ if conditions_create:
15386
+ evaluated_conditions_create = self.evaluate_conditions(
15387
+ conditions=conditions_create, row=row, replacements=replacements
15388
+ )
15389
+ if not evaluated_conditions_create:
15390
+ logger.info(
15391
+ "Create condition for bulk workspace row -> %s not met. Excluding create operation for current row...",
15392
+ str(index),
15393
+ )
15394
+ if "create" in row_operations:
15395
+ row_operations.remove("create")
15396
+ if "recreate" in row_operations:
15397
+ row_operations.remove("recreate")
15398
+ elif (
15399
+ "recreate" in row_operations
15400
+ ): # we still create and recreate without conditions_create. But give a warning for 'recreate' without condition.
15401
+ logger.warning(
15402
+ "No create condition provided but 'recreate' operation requested. This will recreate all existing workspaces!"
15403
+ )
15404
+
15405
+ # Check if all data conditions to delete the workspace are met:
15406
+ if "delete" in row_operations:
15407
+ conditions_delete = bulk_workspace.get("conditions_delete", None)
15408
+ if conditions_delete:
15409
+ evaluated_conditions_delete = self.evaluate_conditions(
15410
+ conditions=conditions_delete, row=row, replacements=replacements
15411
+ )
15412
+ if not evaluated_conditions_delete:
15413
+ logger.info(
15414
+ "Delete condition for bulk workspace row -> %s not met. Excluding delete operation for current row...",
15415
+ str(index),
15416
+ )
15417
+ row_operations.remove("delete")
15418
+ else: # without delete_conditions we don't delete!!
15419
+ logger.warning(
15420
+ "Delete operation requested for bulk workspaces but conditions for deletion are missing! (specify 'conditions_delete')!"
15421
+ )
15422
+ row_operations.remove("delete")
15423
+
14118
15424
  # Determine the external modification field (if any):
14119
15425
  if external_modify_date_field:
14120
15426
  external_modify_date = self.replace_bulk_placeholders(
@@ -14146,6 +15452,8 @@ class Payload:
14146
15452
  replacements=replacements,
14147
15453
  additional_regex_list=nickname_additional_regex_list,
14148
15454
  )
15455
+ # Nicknames for sure should not have leading or trailing spaces:
15456
+ nickname = nickname.strip()
14149
15457
  # Nicknames for sure are not allowed to include spaces:
14150
15458
  nickname = nickname.replace(" ", "_")
14151
15459
  # We also want to replace hyphens with underscores
@@ -14167,10 +15475,13 @@ class Payload:
14167
15475
  found_workspace_name,
14168
15476
  )
14169
15477
  else:
14170
- # Only skip if workspace update is not enforced:
14171
- if not enforce_updates:
15478
+ # Only skip if workspace update or delete is not requested:
15479
+ if (
15480
+ "update" not in row_operations
15481
+ and "delete" not in row_operations
15482
+ ):
14172
15483
  logger.info(
14173
- "Workspace -> '%s' with nickname -> '%s' does already exist (found -> %s). Skipping...",
15484
+ "Workspace -> '%s' with nickname -> '%s' does already exist (found -> %s). No update or delete operations requested or allowed. Skipping...",
14174
15485
  workspace_name,
14175
15486
  nickname,
14176
15487
  found_workspace_name,
@@ -14228,15 +15539,35 @@ class Payload:
14228
15539
  key,
14229
15540
  )
14230
15541
 
14231
- # We try to get the external modify date of the existing workspace.
14232
- # The REST API may not return these field if it was never set before.
14233
- # So we set show_error = False for this call to avoid error messages.
14234
- workspace_external_modify_date = self._otcs_frontend.get_result_value(
14235
- response, "external_modify_date", show_error=False
15542
+ # We get the modify date of the existing workspace.
15543
+ workspace_modify_date = self._otcs_frontend.get_result_value(
15544
+ response,
15545
+ "modify_date",
14236
15546
  )
14237
15547
 
14238
- # Workspace does not exists - we create a new workspace:
14239
- if not workspace_id:
15548
+ # Check if we want to recreate an existing workspace:
15549
+ if workspace_id and "recreate" in row_operations:
15550
+ response = self._otcs_frontend.delete_node(
15551
+ node_id=workspace_id, purge=True
15552
+ )
15553
+ if not response:
15554
+ logger.error(
15555
+ "Failed to bulk recreate existing workspace -> '%s' (%s) with type ID -> %s! Delete failed.",
15556
+ workspace_name,
15557
+ workspace_id,
15558
+ workspace_type_id,
15559
+ )
15560
+ result["success"] = False
15561
+ result["failure_counter"] += 1
15562
+ continue
15563
+ result["delete_counter"] += 1
15564
+ workspace_id = None
15565
+
15566
+ # Check if workspace does not exists - then we create a new workspace
15567
+ # if this is requested ("create" or "recreate" value in operations list in payload)
15568
+ if not workspace_id and (
15569
+ "create" in row_operations or "recreate" in row_operations
15570
+ ):
14240
15571
  # If category data is in payload we substitute
14241
15572
  # the values with data from the current data row:
14242
15573
  if categories:
@@ -14290,19 +15621,24 @@ class Payload:
14290
15621
  workspace_id = self._otcs_frontend.get_result_value(response, "id")
14291
15622
  if not workspace_id:
14292
15623
  logger.error(
14293
- "Failed to bulk create workspace -> '%s' with type ID -> %s!",
15624
+ "Failed to bulk create workspace -> '%s' with type ID -> %s from template -> %s (%s)!",
14294
15625
  workspace_name,
14295
15626
  workspace_type_id,
15627
+ template_name,
15628
+ template_id,
14296
15629
  )
14297
15630
  result["success"] = False
14298
15631
  result["failure_counter"] += 1
14299
15632
  continue
14300
15633
  else:
14301
15634
  logger.info(
14302
- "Successfully created bulk workspace -> '%s' with ID -> %s",
15635
+ "Successfully created bulk workspace -> '%s' with ID -> %s from template -> %s (%s)!",
14303
15636
  workspace_name,
14304
15637
  workspace_id,
15638
+ template_name,
15639
+ template_id,
14305
15640
  )
15641
+ result["create_counter"] += 1
14306
15642
  if self._aviator_enabled:
14307
15643
  if (
14308
15644
  "enable_aviator" in bulk_workspace
@@ -14317,14 +15653,38 @@ class Payload:
14317
15653
  workspace_name,
14318
15654
  workspace_id,
14319
15655
  )
14320
- # end if not workspace_id
14321
15656
 
14322
- # If updates are enforced we update the existing workspace with
15657
+ # Check if metadata embeddings need to be updated
15658
+ if bulk_workspace.get("aviator_metadata", False):
15659
+ logger.info(
15660
+ "Trigger external metadata embeding via FEME for Workspace -> %s (%s)",
15661
+ workspace_name,
15662
+ workspace_id,
15663
+ )
15664
+ self._otcs.feme_embedd_metadata(
15665
+ node_id=workspace_id,
15666
+ node_type=848,
15667
+ wait_for_completion=True,
15668
+ timeout=1.0,
15669
+ )
15670
+
15671
+ # end if not workspace_id and "create" in row_operations
15672
+
15673
+ # If updates are an requested row opera dtion we update the existing workspace with
14323
15674
  # fresh metadata from the payload. Additionally we check the external
14324
15675
  # modify date to support incremental load for content that has really
14325
15676
  # changed.
14326
- elif enforce_updates and OTCS.date_is_newer(
14327
- date_old=workspace_external_modify_date, date_new=external_modify_date
15677
+ # In addition we check that "delete" is not requested as otherwise it will
15678
+ # never go in elif "delete" ... below (and it does not make sense to update a workspace
15679
+ # that is deleted in the next step...)
15680
+ elif (
15681
+ workspace_id
15682
+ and "update" in row_operations
15683
+ and "delete" not in row_operations # note the NOT !
15684
+ and OTCS.date_is_newer(
15685
+ date_old=workspace_modify_date,
15686
+ date_new=external_modify_date,
15687
+ )
14328
15688
  ):
14329
15689
  # If category data is in payload we substitute
14330
15690
  # the values with data from the current data row:
@@ -14378,23 +15738,58 @@ class Payload:
14378
15738
  result["success"] = False
14379
15739
  result["failure_counter"] += 1
14380
15740
  continue
15741
+ result["update_counter"] += 1
15742
+
15743
+ # Check if metadata embeddings need to be updated
15744
+ if bulk_workspace.get("aviator_metadata", False):
15745
+ logger.info(
15746
+ "Trigger external metadata embeding via FEME for Workspace -> %s (%s)",
15747
+ workspace_name,
15748
+ workspace_id,
15749
+ )
15750
+ self._otcs.feme_embedd_metadata(
15751
+ node_id=workspace_id, node_type=848, wait_for_completion=False
15752
+ )
14381
15753
 
14382
- # nickname has been calculated for existence test above
14383
- # we now assign it to the new workspace
14384
- if nickname:
14385
- response = self._otcs_frontend.set_node_nickname(
14386
- node_id=workspace_id, nickname=nickname, show_error=True
15754
+ # end elif "update" in row_operations...
15755
+ elif workspace_id and "delete" in row_operations:
15756
+ response = self._otcs_frontend.delete_node(
15757
+ node_id=workspace_id, purge=True
14387
15758
  )
14388
15759
  if not response:
14389
15760
  logger.error(
14390
- "Failed to assign nickname -> '%s' to workspace -> '%s'",
14391
- nickname,
15761
+ "Failed to bulk delete existing workspace -> '%s' with type ID -> %s!",
14392
15762
  workspace_name,
15763
+ workspace_type_id,
14393
15764
  )
14394
- result["success_counter"] += 1
14395
- # Record the workspace name and ID to allow to read it from failure file
14396
- # and speedup the process.
14397
- result["workspaces"][workspace_name] = workspace_id
15765
+ result["success"] = False
15766
+ result["failure_counter"] += 1
15767
+ continue
15768
+ result["delete_counter"] += 1
15769
+ workspace_id = None
15770
+ # end elif workspace_id and "delete" in row_operations
15771
+
15772
+ # Depending on the bulk operations (create, update, delete)
15773
+ # and the related conditions it may well be that workspace_id is None.
15774
+ # In this case we also don't want to set the nickname nor record this
15775
+ # as success:
15776
+ if workspace_id:
15777
+ if nickname:
15778
+ response = self._otcs_frontend.set_node_nickname(
15779
+ node_id=workspace_id, nickname=nickname, show_error=True
15780
+ )
15781
+ if not response:
15782
+ logger.error(
15783
+ "Failed to assign nickname -> '%s' to workspace -> '%s'",
15784
+ nickname,
15785
+ workspace_name,
15786
+ )
15787
+ result["success_counter"] += 1
15788
+ # Record the workspace name and ID to allow to read it from failure file
15789
+ # and speedup the process.
15790
+ result["workspaces"][workspace_name] = workspace_id
15791
+ else:
15792
+ result["skipped_counter"] += 1
14398
15793
 
14399
15794
  logger.info("End working...")
14400
15795
 
@@ -14409,6 +15804,9 @@ class Payload:
14409
15804
  ) -> pd.Series | None:
14410
15805
  """Lookup a value in a given data source (specified by payload dict).
14411
15806
  If the data source has not been loaded before then load the data source.
15807
+ As this runs in a multi-threading environment we need to protect
15808
+ the data source update from multiple threads doing it at the same time.
15809
+ A global data_load_lock variable acts as a mutex.
14412
15810
 
14413
15811
  Args:
14414
15812
  data_source (dict): Payload dictionary of the data source definition.
@@ -14424,27 +15822,27 @@ class Payload:
14424
15822
  logger.error("Data source has no name!")
14425
15823
  return None
14426
15824
 
14427
- # First we check if the data source has been loaded already.
14428
- # If not, we load the data source on the fly:
14429
- data_source_data: Data = data_source.get("data", None)
14430
- if not data_source_data:
14431
- logger.warning(
14432
- "Data source -> '%s' has no data. Trying to reload...",
14433
- data_source_name,
14434
- )
14435
- # We don't want multiple threads to trigger a datasource load at the same time,
14436
- # so we use a lock (mutex) to avoid this:
14437
- data_load_lock.acquire()
14438
- try:
15825
+ # We don't want multiple threads to trigger a datasource load at the same time,
15826
+ # so we use a lock (mutex) to avoid this:
15827
+ data_load_lock.acquire()
15828
+ try:
15829
+ # First we check if the data source has been loaded already.
15830
+ # If not, we load the data source on the fly:
15831
+ data_source_data: Data = data_source.get("data", None)
15832
+ if not data_source_data:
15833
+ logger.warning(
15834
+ "Data source -> '%s' has no data. Trying to reload...",
15835
+ data_source_name,
15836
+ )
14439
15837
  data_source_data = self.process_bulk_datasource(
14440
15838
  data_source_name=data_source_name,
14441
15839
  force_reload=True,
14442
15840
  )
14443
- finally:
14444
- # Ensure the lock is released even if an error occurs
14445
- data_load_lock.release()
15841
+ finally:
15842
+ # Ensure the lock is released even if an error occurs
15843
+ data_load_lock.release()
14446
15844
 
14447
- # iIf we still don't have data from this data source we bail out:
15845
+ # If we still don't have data from this data source we bail out:
14448
15846
  if not data_source_data:
14449
15847
  logger.error(
14450
15848
  "Data source -> '%s' has no data and reload did not work. Cannot lookup value -> '%s' in column -> '%s'!",
@@ -14531,6 +15929,15 @@ class Payload:
14531
15929
  lookup_value=workspace_name_synonym,
14532
15930
  )
14533
15931
 
15932
+ if lookup_row is None:
15933
+ # Handle an edge case where the actual workspace name
15934
+ # is already correct:
15935
+ lookup_row = self.lookup_data_source_value(
15936
+ data_source=workspace_data_source,
15937
+ lookup_column=workspace_data_source_name_column,
15938
+ lookup_value=workspace_name_synonym,
15939
+ )
15940
+
14534
15941
  if lookup_row is not None:
14535
15942
  # Now we determine the real workspace name be taking it from
14536
15943
  # the name column in the result row:
@@ -14567,16 +15974,18 @@ class Payload:
14567
15974
  workspace_nickname: str | None = None,
14568
15975
  workspace_name: str | None = None,
14569
15976
  workspace_type: str | None = None,
15977
+ parent_id: int | None = None,
14570
15978
  data_source_name: str | None = None,
14571
15979
  ) -> tuple[int | None, str | None]:
14572
15980
  """Use a combination of workspace name, workspace type, and workspace datasource (using synonyms)
14573
15981
  to lookup the workspace name and ID
14574
15982
 
14575
15983
  Args:
14576
- workspace_nickname (str): the nickname of the workspace
14577
- workspace_name (str): The name as input for lookup. This must be one of the synonyms.
14578
- workspace_type (str): Name of the workspace type
14579
- data_source_name (str): Workspace data source name
15984
+ workspace_nickname (str, optional): the nickname of the workspace
15985
+ workspace_name (str, optional): The name as input for lookup. This must be one of the synonyms.
15986
+ workspace_type (str, optional): Name of the workspace type
15987
+ parent_id (int, optional): ID of parent workspace (if it is a sub-workspace) or parent folder
15988
+ data_source_name (str, optional): Workspace data source name
14580
15989
 
14581
15990
  Returns:
14582
15991
  tuple[int | None, str | None]: returns the workspace ID and the looked up workspace name
@@ -14607,10 +16016,20 @@ class Payload:
14607
16016
  workspace_name = workspace_name.strip()
14608
16017
  else:
14609
16018
  logger.error(
14610
- "No workspace name specified. Cannot find the workspace by type and name or synonym.",
16019
+ "No workspace name specified. Cannot find the workspace by nickname, nor by type and name, nor by parent ID and name, nor by synonym.",
14611
16020
  )
14612
16021
  return (None, None)
14613
16022
 
16023
+ # If we have workspace name and workspace parent ID then we try this:
16024
+ if workspace_name and parent_id is not None:
16025
+ response = self._otcs_frontend.get_node_by_parent_and_name(
16026
+ parent_id=parent_id, name=workspace_name
16027
+ )
16028
+ workspace_id = self._otcs_frontend.get_result_value(response, "id")
16029
+ if workspace_id:
16030
+ return (workspace_id, workspace_name)
16031
+
16032
+ # If we have workspace name and workspace type then we try this:
14614
16033
  if workspace_name and workspace_type:
14615
16034
  response = self._otcs_frontend.get_workspace_by_type_and_name(
14616
16035
  type_name=workspace_type, name=workspace_name
@@ -14620,8 +16039,8 @@ class Payload:
14620
16039
  return (workspace_id, workspace_name)
14621
16040
 
14622
16041
  # if the code gets to here we dont have a nickname and the workspace with given name
14623
- # and type was not found either. Now we see if we can find the workspace name
14624
- # as a synonym in the workspace data source to find the real/correct name:
16042
+ # type, or parent ID was not found either. Now we see if we can find the workspace name
16043
+ # as a synonym in the workspace data source to find the real/correct workspace name:
14625
16044
  if data_source_name:
14626
16045
  logger.info(
14627
16046
  "Try to find the workspace with the synonym -> '%s' using data source -> '%s'...",
@@ -14649,6 +16068,9 @@ class Payload:
14649
16068
  if workspace_name:
14650
16069
  message += "{}by name -> '{}'".format(concat_string, workspace_name)
14651
16070
  concat_string = ", nor "
16071
+ if parent_id:
16072
+ message += "{}by parent ID -> {}".format(concat_string, parent_id)
16073
+ concat_string = ", nor "
14652
16074
  if data_source_name:
14653
16075
  message += "{}as synonym in data source -> {}".format(
14654
16076
  concat_string, data_source_name
@@ -14727,6 +16149,9 @@ class Payload:
14727
16149
  success = False
14728
16150
  continue
14729
16151
  from_workspace = bulk_workspace_relationship["from_workspace"]
16152
+ from_sub_workspace = bulk_workspace_relationship.get(
16153
+ "from_sub_workspace_name", None
16154
+ )
14730
16155
 
14731
16156
  # Read Pattern for "To" Workspace from payload:
14732
16157
  if not "to_workspace" in bulk_workspace_relationship:
@@ -14736,6 +16161,9 @@ class Payload:
14736
16161
  success = False
14737
16162
  continue
14738
16163
  to_workspace = bulk_workspace_relationship["to_workspace"]
16164
+ to_sub_workspace = bulk_workspace_relationship.get(
16165
+ "to_sub_workspace_name", None
16166
+ )
14739
16167
 
14740
16168
  # The payload element must have a "data_source" key:
14741
16169
  if not "data_source" in bulk_workspace_relationship:
@@ -14746,7 +16174,8 @@ class Payload:
14746
16174
 
14747
16175
  self._log_header_callback(
14748
16176
  text="Process Bulk Workspace Relationships from -> '{}' to -> '{}'".format(
14749
- from_workspace, to_workspace
16177
+ from_workspace if not from_sub_workspace else from_sub_workspace,
16178
+ to_workspace if not to_sub_workspace else to_sub_workspace,
14750
16179
  ),
14751
16180
  char="-",
14752
16181
  )
@@ -14779,8 +16208,8 @@ class Payload:
14779
16208
  if not data:
14780
16209
  logger.error(
14781
16210
  "Failed to load data source for bulk workspace relationships from -> '%s' to -> '%s'",
14782
- from_workspace,
14783
- to_workspace,
16211
+ from_workspace if not from_sub_workspace else from_sub_workspace,
16212
+ to_workspace if not to_sub_workspace else to_sub_workspace,
14784
16213
  )
14785
16214
  continue
14786
16215
 
@@ -14794,23 +16223,34 @@ class Payload:
14794
16223
  # exploding multiple fields at once avoids
14795
16224
  # combinatorial explosions - this is VERY
14796
16225
  # different from exploding columns one after the other!
14797
- if not "explode_field" in explosion:
16226
+ if (
16227
+ not "explode_field" in explosion
16228
+ and not "explode_fields" in explosion
16229
+ ):
14798
16230
  logger.error("Missing explosion field(s)!")
14799
16231
  continue
14800
- explode_field = explosion["explode_field"]
16232
+ # we want to be backwards compatible...
16233
+ if "explode_field" in explosion:
16234
+ explode_fields = explosion["explode_field"]
16235
+ else:
16236
+ explode_fields = explosion["explode_fields"]
14801
16237
  flatten_fields = explosion.get("flatten_fields", [])
14802
16238
  split_string_to_list = explosion.get("split_string_to_list", False)
16239
+ list_splitter = explosion.get(
16240
+ "list_splitter", ","
16241
+ ) # don't have None as default!
14803
16242
  logger.info(
14804
16243
  "Starting explosion of bulk relationships by field(s) -> %s (type -> %s). Size of data set before explosion -> %s",
14805
- explode_field,
14806
- type(explode_field),
16244
+ explode_fields,
16245
+ type(explode_fields),
14807
16246
  str(len(data)),
14808
16247
  )
14809
16248
  data.explode_and_flatten(
14810
- explode_field=explode_field,
16249
+ explode_field=explode_fields,
14811
16250
  flatten_fields=flatten_fields,
14812
16251
  make_unique=False,
14813
16252
  split_string_to_list=split_string_to_list,
16253
+ separator=list_splitter,
14814
16254
  reset_index=True,
14815
16255
  )
14816
16256
  logger.info("Size of data set after explosion -> %s", str(len(data)))
@@ -14852,8 +16292,8 @@ class Payload:
14852
16292
 
14853
16293
  logger.info(
14854
16294
  "Bulk create Workspace Relationships (from workspace -> '%s' to workspace -> '%s')",
14855
- from_workspace,
14856
- to_workspace,
16295
+ from_workspace if not from_sub_workspace else from_sub_workspace,
16296
+ to_workspace if not to_sub_workspace else to_sub_workspace,
14857
16297
  )
14858
16298
 
14859
16299
  bulk_thread_number = int(
@@ -14875,8 +16315,6 @@ class Payload:
14875
16315
  self.process_bulk_workspace_relationships_worker,
14876
16316
  bulk_workspace_relationship,
14877
16317
  partition,
14878
- from_workspace,
14879
- to_workspace,
14880
16318
  results,
14881
16319
  ),
14882
16320
  )
@@ -14917,11 +16355,210 @@ class Payload:
14917
16355
  result["relationships"]
14918
16356
  )
14919
16357
 
14920
- self.write_status_file(
14921
- success, section_name, self._bulk_workspace_relationships
16358
+ self.write_status_file(
16359
+ success, section_name, self._bulk_workspace_relationships
16360
+ )
16361
+
16362
+ return success
16363
+
16364
+ # end method definition
16365
+
16366
+ def get_bulk_workspace_relationship_endpoint(
16367
+ self,
16368
+ bulk_workspace_relationship: dict,
16369
+ row: pd.Series,
16370
+ index: int,
16371
+ endpoint: str,
16372
+ replacements: dict | None = None,
16373
+ nickname_additional_regex_list: list | None = None,
16374
+ ) -> tuple[int | None, str | None]:
16375
+ """Determine the node ID of the workspace that is one of the endpoints
16376
+ of the workspace relationship (either 'from' or 'to')
16377
+
16378
+ Args:
16379
+ bulk_workspace_relationship (dict): The payload element of the bulk workspace relationship
16380
+ row (pd.Series): the data frame row
16381
+ index (int): the index of the data frame row
16382
+ endpoint (str): name of the endpoint - either "from" or "to"
16383
+ replacements (dict | None, optional): Replacements for placeholders. Defaults to None.
16384
+ nickname_additional_regex_list (list | None, optional): Additional regex replacements for nicknames. Defaults to None.
16385
+
16386
+ Returns:
16387
+ tuple[int | None, str | None]: returns the workspace ID and the looked up workspace name
16388
+ """
16389
+
16390
+ if endpoint not in ["from", "to"]:
16391
+ logger.error("The endpoint must be either 'from' or 'to'!")
16392
+ return (None, None)
16393
+
16394
+ # Determine the workspace nickname field:
16395
+ workspace_nickname_field = bulk_workspace_relationship.get(
16396
+ "{}_workspace".format(endpoint), None
16397
+ )
16398
+ workspace_nickname = self.replace_bulk_placeholders(
16399
+ input_string=workspace_nickname_field,
16400
+ row=row,
16401
+ replacements=replacements,
16402
+ additional_regex_list=nickname_additional_regex_list,
16403
+ )
16404
+ if not workspace_nickname:
16405
+ logger.warning(
16406
+ "Row -> %s does not have the required data to resolve -> %s for the workspace nickname (endpoint -> '%s')!",
16407
+ str(index),
16408
+ workspace_nickname_field,
16409
+ endpoint,
16410
+ )
16411
+ return (None, None)
16412
+
16413
+ # Get the workspace type if specified:
16414
+ workspace_type = bulk_workspace_relationship.get(
16415
+ "{}_workspace_type".format(endpoint), None
16416
+ )
16417
+
16418
+ # Get the workspace name if specified:
16419
+ workspace_name_field = bulk_workspace_relationship.get(
16420
+ "{}_workspace_name".format(endpoint), None
16421
+ )
16422
+ if workspace_name_field:
16423
+ workspace_name = self.replace_bulk_placeholders(
16424
+ input_string=workspace_name_field,
16425
+ row=row,
16426
+ replacements=replacements,
16427
+ )
16428
+ if not workspace_name:
16429
+ logger.warning(
16430
+ "Row -> %s does not have the required data to resolve -> %s for the workspace name (endpoint -> '%s')!",
16431
+ str(index),
16432
+ workspace_name_field,
16433
+ endpoint,
16434
+ )
16435
+ return (None, None)
16436
+ else:
16437
+ workspace_name = None
16438
+
16439
+ # Get the workspace datasource if specified:
16440
+ workspace_data_source = bulk_workspace_relationship.get(
16441
+ "{}_workspace_data_source".format(endpoint), None
16442
+ )
16443
+
16444
+ # Based on the given information, we now try to dtermine
16445
+ # the name and the ID of the workspace that is the endpoint
16446
+ # for the workspace relationship:
16447
+ (workspace_id, workspace_name) = self.process_bulk_workspaces_lookup(
16448
+ workspace_nickname=workspace_nickname,
16449
+ workspace_name=workspace_name,
16450
+ workspace_type=workspace_type,
16451
+ data_source_name=workspace_data_source,
16452
+ )
16453
+
16454
+ if not workspace_id:
16455
+ logger.warning(
16456
+ "Cannot find Workspace to establish relationship (endpoint -> '%s')%s%s%s%s",
16457
+ endpoint,
16458
+ (
16459
+ ", Nickname -> '{}'".format(workspace_nickname)
16460
+ if workspace_nickname
16461
+ else ""
16462
+ ),
16463
+ (
16464
+ ", Workspace Name -> '{}'".format(workspace_name)
16465
+ if workspace_name
16466
+ else ""
16467
+ ),
16468
+ (
16469
+ ", Workspace Type -> '{}'".format(workspace_type)
16470
+ if workspace_type
16471
+ else ""
16472
+ ),
16473
+ (
16474
+ ", Data Source -> '{}'".format(workspace_data_source)
16475
+ if workspace_data_source
16476
+ else ""
16477
+ ),
16478
+ )
16479
+ return (None, None)
16480
+
16481
+ # See if a sub-workspace is configured:
16482
+ sub_workspace_name_field = bulk_workspace_relationship.get(
16483
+ "{}_sub_workspace_name".format(endpoint), None
16484
+ )
16485
+ # If no sub-workspace is configured we can already
16486
+ # return the resulting workspace ID:
16487
+ if not sub_workspace_name_field:
16488
+ return (workspace_id, workspace_name)
16489
+
16490
+ # Otherwise we are no processing the sub-workspaces to return
16491
+ # its ID instead:
16492
+ sub_workspace_name = self.replace_bulk_placeholders(
16493
+ input_string=sub_workspace_name_field,
16494
+ row=row,
16495
+ replacements=replacements,
16496
+ )
16497
+ if not sub_workspace_name:
16498
+ logger.warning(
16499
+ "Row -> %s does not have the required data to resolve -> %s for the sub-workspace name (endpoint -> '%s')!",
16500
+ str(index),
16501
+ sub_workspace_name_field,
16502
+ endpoint,
16503
+ )
16504
+ return (None, None)
16505
+
16506
+ # See if a sub-workspace is in a sub-path of the main workspace:
16507
+ sub_workspace_path = bulk_workspace_relationship.get(
16508
+ "{}_sub_workspace_path".format(endpoint), None
16509
+ )
16510
+ if sub_workspace_path:
16511
+ # sub_workspace_path is a mutable that is changed in place!
16512
+ result = self.replace_bulk_placeholders_list(
16513
+ input_list=sub_workspace_path,
16514
+ row=row,
16515
+ replacements=replacements,
16516
+ )
16517
+ if not result:
16518
+ logger.warning(
16519
+ "Row -> %s does not have the required data to resolve -> %s for the sub-workspace path (endpoint -> '%s')!",
16520
+ str(index),
16521
+ sub_workspace_path,
16522
+ endpoint,
16523
+ )
16524
+ return None
16525
+
16526
+ logger.info(
16527
+ "Endpoint has a sub-workspace -> '%s' configured. Try to find the sub-workspace in workspace path -> %s",
16528
+ sub_workspace_name,
16529
+ sub_workspace_path,
16530
+ )
16531
+
16532
+ # We now want to retrieve the folder in the main workspace that
16533
+ # includes the sub-workspace:
16534
+ response = self._otcs_frontend.get_node_by_workspace_and_path(
16535
+ workspace_id=workspace_id,
16536
+ path=sub_workspace_path,
16537
+ create_path=False, # we want the path to be created if it doesn't exist
16538
+ show_error=True,
16539
+ )
16540
+ parent_id = self._otcs_frontend.get_result_value(response, "id")
16541
+ if not parent_id:
16542
+ logger.error(
16543
+ "Failed to find path -> %s in workspace -> '%s' (%s)...",
16544
+ str(sub_workspace_path),
16545
+ workspace_name,
16546
+ workspace_id,
16547
+ )
16548
+ return (None, None)
16549
+ # end if sub_workspace_path_field
16550
+ else:
16551
+ # the sub-workspace is immediately under the main workspace:
16552
+ parent_id = workspace_id
16553
+
16554
+ response = self._otcs_frontend.get_node_by_parent_and_name(
16555
+ parent_id=parent_id, name=sub_workspace_name, show_error=True
16556
+ )
16557
+ sub_workspace_id = self._otcs_frontend.get_result_value(
16558
+ response=response, key="id"
14922
16559
  )
14923
16560
 
14924
- return success
16561
+ return (sub_workspace_id, sub_workspace_name)
14925
16562
 
14926
16563
  # end method definition
14927
16564
 
@@ -14929,8 +16566,6 @@ class Payload:
14929
16566
  self,
14930
16567
  bulk_workspace_relationship: dict,
14931
16568
  partition: pd.DataFrame,
14932
- from_workspace: str,
14933
- to_workspace: str,
14934
16569
  results: list | None = None,
14935
16570
  ):
14936
16571
  """This is the thread worker to create workspaces relationships in bulk.
@@ -15001,10 +16636,6 @@ class Payload:
15001
16636
  # Process all datasets in the partion that was given to the thread:
15002
16637
  for index, row in partition.iterrows():
15003
16638
 
15004
- # ensure clean variables by reset
15005
- from_workspace_id = None
15006
- to_workspace_id = None
15007
-
15008
16639
  logger.info(
15009
16640
  "Processing data row -> %s for bulk workspace relationship creation...",
15010
16641
  str(index),
@@ -15023,157 +16654,57 @@ class Payload:
15023
16654
  result["skipped_counter"] += 1
15024
16655
  continue
15025
16656
 
15026
- # Determine the workspace "from" nickname:
15027
- from_workspace_nickname = self.replace_bulk_placeholders(
15028
- input_string=from_workspace,
15029
- row=row,
15030
- replacements=replacements,
15031
- additional_regex_list=nickname_additional_regex_list,
15032
- )
15033
- if not from_workspace_nickname:
15034
- logger.warning(
15035
- "Row -> %s does not have the required data to resolve -> %s for the workspace nickname (from)!",
15036
- str(index),
15037
- from_workspace,
15038
- )
15039
- result["skipped_counter"] += 1
15040
- continue
15041
- from_workspace_type = bulk_workspace_relationship.get(
15042
- "from_workspace_type", None
15043
- )
15044
- from_workspace_name = bulk_workspace_relationship.get(
15045
- "from_workspace_name", None
15046
- )
15047
- if from_workspace_name:
15048
- from_workspace_name = self.replace_bulk_placeholders(
15049
- input_string=from_workspace_name,
16657
+ (from_workspace_id, from_workspace_name) = (
16658
+ self.get_bulk_workspace_relationship_endpoint(
16659
+ bulk_workspace_relationship=bulk_workspace_relationship,
15050
16660
  row=row,
16661
+ index=index,
16662
+ endpoint="from",
15051
16663
  replacements=replacements,
15052
- )
15053
- if not from_workspace_name:
15054
- logger.warning(
15055
- "Row -> %s does not have the required data to resolve -> %s for the workspace name (from)!",
15056
- str(index),
15057
- from_workspace,
15058
- )
15059
- result["skipped_counter"] += 1
15060
- continue
15061
- from_workspace_data_source = bulk_workspace_relationship.get(
15062
- "from_workspace_data_source", None
15063
- )
15064
-
15065
- (from_workspace_id, from_workspace_name) = (
15066
- self.process_bulk_workspaces_lookup(
15067
- workspace_nickname=from_workspace_nickname,
15068
- workspace_name=from_workspace_name,
15069
- workspace_type=from_workspace_type,
15070
- data_source_name=from_workspace_data_source,
16664
+ nickname_additional_regex_list=nickname_additional_regex_list,
15071
16665
  )
15072
16666
  )
15073
16667
 
15074
- if not from_workspace_id:
15075
- logger.warning(
15076
- "Cannot find Workspace to establish relationship (from)%s%s%s%s",
15077
- (
15078
- ", Nickname -> '{}'".format(from_workspace_nickname)
15079
- if from_workspace_nickname
15080
- else ""
15081
- ),
15082
- (
15083
- ", Workspace Name -> '{}'".format(from_workspace_name)
15084
- if from_workspace_name
15085
- else ""
15086
- ),
15087
- (
15088
- ", Workspace Type -> '{}'".format(from_workspace_type)
15089
- if from_workspace_type
15090
- else ""
15091
- ),
15092
- (
15093
- ", Data Source -> '{}'".format(from_workspace_data_source)
15094
- if from_workspace_data_source
15095
- else ""
15096
- ),
15097
- )
15098
- # Lower severity of this issue
15099
- # result["failure_counter"] += 1
15100
- # result["success"] = False
15101
- result["skipped_counter"] += 1
15102
- continue
15103
-
15104
- # Determine the workspace "to" nickname:
15105
- to_workspace_nickname = self.replace_bulk_placeholders(
15106
- input_string=to_workspace,
15107
- row=row,
15108
- replacements=replacements,
15109
- additional_regex_list=nickname_additional_regex_list,
15110
- )
15111
- if not to_workspace_nickname:
15112
- logger.warning(
15113
- "Row -> %s does not have the required data to resolve -> %s for the workspace nickname (to)!",
15114
- str(index),
15115
- to_workspace,
15116
- )
15117
- result["failure_counter"] += 1
15118
- continue
15119
- to_workspace_type = bulk_workspace_relationship.get(
15120
- "to_workspace_type", None
15121
- )
15122
- to_workspace_name = bulk_workspace_relationship.get(
15123
- "to_workspace_name", None
15124
- )
15125
- if to_workspace_name:
15126
- to_workspace_name = self.replace_bulk_placeholders(
15127
- input_string=to_workspace_name,
16668
+ (to_workspace_id, to_workspace_name) = (
16669
+ self.get_bulk_workspace_relationship_endpoint(
16670
+ bulk_workspace_relationship=bulk_workspace_relationship,
15128
16671
  row=row,
16672
+ index=index,
16673
+ endpoint="to",
15129
16674
  replacements=replacements,
16675
+ nickname_additional_regex_list=nickname_additional_regex_list,
15130
16676
  )
15131
- if not to_workspace_name:
15132
- logger.warning(
15133
- "Row -> %s does not have the required data to resolve -> %s for the workspace nickname (to)!",
15134
- str(index),
15135
- from_workspace,
15136
- )
15137
- result["skipped_counter"] += 1
15138
- continue
15139
- to_workspace_data_source = bulk_workspace_relationship.get(
15140
- "to_workspace_data_source", None
15141
- )
15142
-
15143
- (to_workspace_id, to_workspace_name) = self.process_bulk_workspaces_lookup(
15144
- workspace_nickname=to_workspace_nickname,
15145
- workspace_name=to_workspace_name,
15146
- workspace_type=to_workspace_type,
15147
- data_source_name=to_workspace_data_source,
15148
16677
  )
15149
16678
 
15150
- if not to_workspace_id:
16679
+ # Check we have both endpoints:
16680
+ if not from_workspace_id or not to_workspace_id:
15151
16681
  logger.warning(
15152
- "Cannot find Workspace to establish relationship (to)%s%s%s%s",
16682
+ "%s%s%s%s",
15153
16683
  (
15154
- ", Nickname -> '{}'".format(to_workspace_nickname)
15155
- if to_workspace_nickname
16684
+ "Failed to retrieve 'from' endpoint for bulk workspace relationship! "
16685
+ if not from_workspace_id and not from_workspace_name
15156
16686
  else ""
15157
16687
  ),
15158
16688
  (
15159
- ", Workspace Name -> '{}'".format(to_workspace_name)
15160
- if to_workspace_name
16689
+ "Failed to retrieve 'from' endpoint (workspace name -> {}) for bulk workspace relationship! ".format(
16690
+ from_workspace_name
16691
+ )
16692
+ if not from_workspace_id and from_workspace_name
15161
16693
  else ""
15162
16694
  ),
15163
16695
  (
15164
- ", Workspace Type -> '{}'".format(to_workspace_type)
15165
- if to_workspace_type
16696
+ "Failed to retrieve 'to' endpoint for bulk workspace relationship!"
16697
+ if not to_workspace_id and not to_workspace_name
15166
16698
  else ""
15167
16699
  ),
15168
16700
  (
15169
- ", Data Source -> '{}'".format(to_workspace_data_source)
15170
- if to_workspace_data_source
16701
+ "Failed to retrieve 'to' endpoint (workspace name -> {}) for bulk workspace relationship!".format(
16702
+ to_workspace_name
16703
+ )
16704
+ if not to_workspace_id and to_workspace_name
15171
16705
  else ""
15172
16706
  ),
15173
16707
  )
15174
- # Lower severity of this issue
15175
- # result["failure_counter"] += 1
15176
- # result["success"] = False
15177
16708
  result["skipped_counter"] += 1
15178
16709
  continue
15179
16710
 
@@ -15488,9 +17019,6 @@ class Payload:
15488
17019
  logger.info("Payload for Bulk Document is disabled. Skipping...")
15489
17020
  continue
15490
17021
 
15491
- copy_data_source = bulk_document.get("copy_data_source", False)
15492
- force_reload = bulk_document.get("force_reload", True)
15493
-
15494
17022
  # The payload element must have a "data_source" key:
15495
17023
  if not "data_source" in bulk_document:
15496
17024
  logger.error("No data source specified in Bulk Document!")
@@ -15498,11 +17026,14 @@ class Payload:
15498
17026
  continue
15499
17027
  data_source_name = bulk_document["data_source"]
15500
17028
 
17029
+ copy_data_source = bulk_document.get("copy_data_source", False)
17030
+ force_reload = bulk_document.get("force_reload", True)
17031
+
15501
17032
  # Load and prepare the data source for the bulk processing:
15502
17033
  if copy_data_source:
15503
17034
  logger.info(
15504
- "Take a copy of data source -> %s to avoid sideeffects for repeative usage of the data source...",
15505
- bulk_document["data_source"],
17035
+ "Take a copy of data source -> %s to avoid side-effects for repeative usage of the data source...",
17036
+ data_source_name,
15506
17037
  )
15507
17038
  data = Data(
15508
17039
  self.process_bulk_datasource(
@@ -15529,23 +17060,33 @@ class Payload:
15529
17060
  # exploding multiple fields at once avoids
15530
17061
  # combinatorial explosions - this is VERY
15531
17062
  # different from exploding columns one after the other!
15532
- if not "explode_field" in explosion:
17063
+ if (
17064
+ not "explode_field" in explosion
17065
+ and not "explode_fields" in explosion
17066
+ ):
15533
17067
  logger.error("Missing explosion field(s)!")
15534
17068
  continue
15535
- explode_field = explosion["explode_field"]
17069
+ # we want to be backwards compatible...
17070
+ if "explode_field" in explosion:
17071
+ explode_fields = explosion["explode_field"]
17072
+ else:
17073
+ explode_fields = explosion["explode_fields"]
15536
17074
  flatten_fields = explosion.get("flatten_fields", [])
15537
17075
  split_string_to_list = explosion.get("split_string_to_list", False)
17076
+ list_splitter = explosion.get("list_splitter", None)
15538
17077
  logger.info(
15539
17078
  "Starting explosion of bulk documents by field(s) -> %s (type -> %s). Size of data set before explosion -> %s",
15540
- explode_field,
15541
- str(type(explode_field)),
17079
+ explode_fields,
17080
+ str(type(explode_fields)),
15542
17081
  str(len(data)),
15543
17082
  )
15544
17083
  data.explode_and_flatten(
15545
- explode_field=explode_field,
17084
+ explode_field=explode_fields,
15546
17085
  flatten_fields=flatten_fields,
15547
17086
  make_unique=False,
15548
17087
  split_string_to_list=split_string_to_list,
17088
+ separator=list_splitter,
17089
+ reset_index=True,
15549
17090
  )
15550
17091
  logger.info("Size of data set after explosion -> %s", str(len(data)))
15551
17092
 
@@ -15607,12 +17148,12 @@ class Payload:
15607
17148
  categories = bulk_document["categories"]
15608
17149
 
15609
17150
  # Should existing documents be updated? False (= no) is the default.
15610
- enforce_updates = bulk_document.get("enforce_updates", False)
17151
+ operations = bulk_document.get("operations", ["create"])
15611
17152
 
15612
17153
  logger.info(
15613
- "Bulk create Documents (name field -> %s. Enforce Updates -> %s.)",
17154
+ "Bulk create Documents (name field -> %s. Operations -> %s.)",
15614
17155
  name_field,
15615
- str(enforce_updates),
17156
+ str(operations),
15616
17157
  )
15617
17158
 
15618
17159
  bulk_thread_number = int(
@@ -15678,7 +17219,7 @@ class Payload:
15678
17219
  name_field,
15679
17220
  description_field,
15680
17221
  categories,
15681
- enforce_updates,
17222
+ operations,
15682
17223
  results,
15683
17224
  source_otcs,
15684
17225
  ),
@@ -15699,19 +17240,23 @@ class Payload:
15699
17240
  for result in results:
15700
17241
  if not result["success"]:
15701
17242
  logger.info(
15702
- "Thread -> %s completed with %s failed, %s skipped, and %s created documents.",
17243
+ "Thread -> %s completed with %s failed, %s skipped, %s created, %s updated, and %s deleted documents.",
15703
17244
  str(result["thread_id"]),
15704
17245
  str(result["failure_counter"]),
15705
17246
  str(result["skipped_counter"]),
15706
- str(result["success_counter"]),
17247
+ str(result["create_counter"]),
17248
+ str(result["update_counter"]),
17249
+ str(result["delete_counter"]),
15707
17250
  )
15708
17251
  success = False
15709
17252
  else:
15710
17253
  logger.info(
15711
- "Thread -> %s completed successful with %s skipped, and %s created documents.",
17254
+ "Thread -> %s completed successful with %s skipped, %s created, %s updated, and %s deleted documents.",
15712
17255
  str(result["thread_id"]),
15713
17256
  str(result["skipped_counter"]),
15714
- str(result["success_counter"]),
17257
+ str(result["create_counter"]),
17258
+ str(result["update_counter"]),
17259
+ str(result["delete_counter"]),
15715
17260
  )
15716
17261
  # Record all generated documents. If this should allow us
15717
17262
  # to restart in case of failures and avoid trying to
@@ -15731,7 +17276,7 @@ class Payload:
15731
17276
  name_field: str,
15732
17277
  description_field: str,
15733
17278
  categories: list | None = None,
15734
- enforce_updates: bool = False,
17279
+ operations: list | None = None,
15735
17280
  results: list | None = None,
15736
17281
  source_otcs: OTCS | None = None,
15737
17282
  ):
@@ -15745,7 +17290,7 @@ class Payload:
15745
17290
  name_field (str): Field where the workspace name is stored
15746
17291
  description_field (str): Field where the workspace description is stored
15747
17292
  categories (list): list of category dictionieres
15748
- enforce_updates (bool): should existing documents be updated with new version and metadata?
17293
+ operations (list): which operations should be applyed on workspaces: "create", "update", "delete"
15749
17294
  results (list): mutable list of thread results
15750
17295
  """
15751
17296
 
@@ -15756,18 +17301,30 @@ class Payload:
15756
17301
  str(len(partition)),
15757
17302
  )
15758
17303
 
17304
+ # Avoid linter warnings - so make parameter default None while we
17305
+ # actually want ["create"] to be the default:
17306
+ if operations is None:
17307
+ operations = ["create"]
17308
+
15759
17309
  result = {}
15760
17310
  result["thread_id"] = thread_id
15761
17311
  result["success_counter"] = 0
15762
17312
  result["failure_counter"] = 0
15763
17313
  result["skipped_counter"] = 0
17314
+ result["create_counter"] = 0
17315
+ result["update_counter"] = 0
17316
+ result["delete_counter"] = 0
15764
17317
  result["documents"] = {}
15765
17318
  result["success"] = True
15766
17319
 
15767
17320
  # Check if documents have been processed before, e.i. testing
15768
17321
  # if a "documents" key exists and if it is pointing to a non-empty list:
15769
17322
  # Additionally we check that workspace updates are not enforced:
15770
- if bulk_document.get("documents", None) and not enforce_updates:
17323
+ if (
17324
+ bulk_document.get("documents", None)
17325
+ and "update" not in operations
17326
+ and "delete" not in operations
17327
+ ):
15771
17328
  existing_documents = bulk_document["documents"]
15772
17329
  logger.info(
15773
17330
  "Found %s already processed documents. Try to complete the job...",
@@ -15829,7 +17386,13 @@ class Payload:
15829
17386
  str(index),
15830
17387
  )
15831
17388
 
17389
+ # clear variables to esure clean state
15832
17390
  parent_id = None
17391
+ document_id = None
17392
+
17393
+ # Create a copy of the mutable operations list as we may
17394
+ # want to modify it:
17395
+ row_operations = list(operations)
15833
17396
 
15834
17397
  # Check if all data conditions to create the document are met
15835
17398
  conditions = bulk_document.get("conditions", None)
@@ -15837,12 +17400,54 @@ class Payload:
15837
17400
  evaluated_condition = self.evaluate_conditions(conditions, row)
15838
17401
  if not evaluated_condition:
15839
17402
  logger.info(
15840
- "Document condition for row -> %s not met. Skipping row for document creation...",
17403
+ "Document condition for bulk document row -> %s not met. Skipping row for document creation...",
15841
17404
  str(index),
15842
17405
  )
15843
17406
  result["skipped_counter"] += 1
15844
17407
  continue
15845
17408
 
17409
+ # Check if all data conditions to create the document are met:
17410
+ if "create" in row_operations or "recreate" in row_operations:
17411
+ conditions_create = bulk_document.get("conditions_create", None)
17412
+ if conditions_create:
17413
+ evaluated_conditions_create = self.evaluate_conditions(
17414
+ conditions=conditions_create, row=row, replacements=replacements
17415
+ )
17416
+ if not evaluated_conditions_create:
17417
+ logger.info(
17418
+ "Create condition for bulk document row -> %s not met. Excluding create operation for current row...",
17419
+ str(index),
17420
+ )
17421
+ if "create" in row_operations:
17422
+ row_operations.remove("create")
17423
+ if "recreate" in row_operations:
17424
+ row_operations.remove("recreate")
17425
+ elif (
17426
+ "recreate" in row_operations
17427
+ ): # we still create and recreate without conditions_create. But give a warning for 'recreate' without condition.
17428
+ logger.warning(
17429
+ "No create condition provided but 'recreate' operation requested. This will recreate all existing documents!"
17430
+ )
17431
+
17432
+ # Check if all data conditions to delete the document are met:
17433
+ if "delete" in row_operations:
17434
+ conditions_delete = bulk_document.get("conditions_delete", None)
17435
+ if conditions_delete:
17436
+ evaluated_conditions_delete = self.evaluate_conditions(
17437
+ conditions=conditions_delete, row=row, replacements=replacements
17438
+ )
17439
+ if not evaluated_conditions_delete:
17440
+ logger.info(
17441
+ "Delete condition for bulk document row -> %s not met. Excluding delete operation for current row...",
17442
+ str(index),
17443
+ )
17444
+ row_operations.remove("delete")
17445
+ else: # without delete_conditions we don't delete!!
17446
+ logger.warning(
17447
+ "Delete operation requested for bulk documents but conditions for deletion are missing! (specify 'conditions_delete'!)"
17448
+ )
17449
+ row_operations.remove("delete")
17450
+
15846
17451
  document_name = self.replace_bulk_placeholders(
15847
17452
  input_string=name_field,
15848
17453
  row=row,
@@ -15870,7 +17475,7 @@ class Payload:
15870
17475
  str(index),
15871
17476
  name_field,
15872
17477
  (
15873
- "nor in alternative name field -> " + name_field_alt
17478
+ " nor in alternative name field -> " + name_field_alt
15874
17479
  if name_field_alt
15875
17480
  else ""
15876
17481
  ),
@@ -15900,7 +17505,7 @@ class Payload:
15900
17505
  document_name = document_name[:254]
15901
17506
 
15902
17507
  # This is an optimization. We check if the document was created
15903
- # in a former run. This helps if the customizer gets re-run:
17508
+ # in a former run. This helps if the customizer is re-run:
15904
17509
  if document_name and document_name in existing_documents:
15905
17510
  logger.info(
15906
17511
  "Document -> '%s' does already exist and has ID -> %s. Skipping...",
@@ -16149,9 +17754,10 @@ class Payload:
16149
17754
  for workspace in workspaces:
16150
17755
  if not "workspace_name" in workspace:
16151
17756
  logger.error(
16152
- "No workspace name field specified for document upload! Skipping document upload to this workspace...",
17757
+ "No workspace name field specified for document upload! Cannot upload document to this workspace...",
16153
17758
  )
16154
17759
  success = False
17760
+ result["failure_counter"] += 1
16155
17761
  continue
16156
17762
  workspace_name_field = workspace["workspace_name"]
16157
17763
 
@@ -16172,7 +17778,9 @@ class Payload:
16172
17778
  )
16173
17779
  # success = False - NO, DON'T DO THIS!!!
16174
17780
  document_id = None # do this to avoid fatal error after the main loop where the success counters are set
17781
+ result["skipped_counter"] += 1
16175
17782
  continue # for workspace in workspaces
17783
+
16176
17784
  # Workspace names for sure are not allowed to have ":":
16177
17785
  workspace_name = workspace_name.replace(":", "")
16178
17786
  # Truncate the workspace name to 254 characters which is the maximum allowed length in Extended ECM
@@ -16185,9 +17793,11 @@ class Payload:
16185
17793
  evaluated_condition = self.evaluate_conditions(conditions, row)
16186
17794
  if not evaluated_condition:
16187
17795
  logger.info(
16188
- "Workspace condition for row -> %s not met. Skipping row for document upload to workspace...",
17796
+ "Workspace condition for row -> %s not met. Skipping row for document upload to workspace -> '%s'...",
16189
17797
  str(index),
17798
+ workspace_name,
16190
17799
  )
17800
+ result["skipped_counter"] += 1
16191
17801
  continue # for workspace in workspaces
16192
17802
 
16193
17803
  if not "workspace_type" in workspace:
@@ -16196,6 +17806,7 @@ class Payload:
16196
17806
  workspace_name,
16197
17807
  )
16198
17808
  success = False
17809
+ result["failure_counter"] += 1
16199
17810
  continue # for workspace in workspaces
16200
17811
  workspace_type = workspace["workspace_type"]
16201
17812
  workspace_type = self.replace_bulk_placeholders(
@@ -16217,6 +17828,7 @@ class Payload:
16217
17828
  workspace_type,
16218
17829
  )
16219
17830
  success = False
17831
+ result["failure_counter"] += 1
16220
17832
  continue # for workspace in workspaces
16221
17833
 
16222
17834
  # If the workspace payload element has a "data_source" key,
@@ -16264,9 +17876,7 @@ class Payload:
16264
17876
  # "workspace_folder" can be used if the payload contains
16265
17877
  # the path as a comma-separated string (top down)
16266
17878
  workspace_folder = workspace.get("workspace_folder", "")
16267
-
16268
- # we need to do a copy as the path list is a mutable data type that we modify below!
16269
- workspace_path = list(workspace.get("workspace_path", []))
17879
+ workspace_path = workspace.get("workspace_path", None)
16270
17880
 
16271
17881
  if workspace_folder and not workspace_path:
16272
17882
  workspace_folder = self.replace_bulk_placeholders(
@@ -16282,26 +17892,56 @@ class Payload:
16282
17892
  workspace_path = [workspace_folder]
16283
17893
 
16284
17894
  if workspace_path:
16285
- # Replace placeholders in payload for the path elements:
16286
- # Note: workspace_path is a mutable data type that is changed in place!
16287
- result_placeholders = self.replace_bulk_placeholders_list(
16288
- input_list=workspace_path,
16289
- row=row,
16290
- replacements=replacements,
16291
- )
16292
- if not result_placeholders:
17895
+ if isinstance(workspace_path, str):
17896
+ # if the path is actually a list in a string
17897
+ # we need to convert it to a python list in a safe way:
17898
+ try:
17899
+ workspace_path = self.replace_bulk_placeholders(
17900
+ input_string=workspace_path,
17901
+ index=None, # None is VERY important as otherwise index=0 is the dfault and we only get the first element
17902
+ row=row,
17903
+ replacements=replacements,
17904
+ )
17905
+ if workspace_path:
17906
+ workspace_path = literal_eval(workspace_path)
17907
+ else:
17908
+ workspace_path = None
17909
+ except (SyntaxError, ValueError) as e:
17910
+ logger.error(
17911
+ "Cannot parse list-type folder path wrapped in string -> '%s'; error -> %s",
17912
+ workspace_path,
17913
+ str(e),
17914
+ )
17915
+ workspace_path = None
17916
+ elif isinstance(workspace_path, list):
17917
+ # We create a copy list to not modify original payload
17918
+ workspace_path = list(workspace_path)
17919
+ # Replace placeholders in payload for the path elements:
17920
+ # Note: workspace_path is a mutable data type that is changed in place!
17921
+ result_placeholders = self.replace_bulk_placeholders_list(
17922
+ input_list=workspace_path,
17923
+ row=row,
17924
+ replacements=replacements,
17925
+ )
17926
+ if not result_placeholders:
17927
+ workspace_path = None
17928
+ else:
17929
+ logger.warning("Unsupported data type for workspace path!")
17930
+ workspace_path = None
17931
+
17932
+ if not workspace_path:
17933
+ # we put the document into the root of the workspace
17934
+ # if we couldn't determine a path inside the workspace:
16293
17935
  logger.warning(
16294
17936
  "Workspace folder path for workspace -> '%s' of workspace type -> '%s' cannot be resolved (placeholder issue). Using workspace root for document upload.",
16295
17937
  workspace_name,
16296
17938
  workspace_type,
16297
17939
  )
16298
- # we put the document into the root of the workspace:
16299
17940
  parent_id = workspace_id
16300
- workspace_path = None
16301
17941
  else:
16302
17942
  # Check if the folder path does already exist and get the target folder at the end of the path:
16303
17943
  logger.info(
16304
- "Check if path -> %s does already exist in workspace -> '%s' (%s)... (otherwise create it)",
17944
+ "Check if path -> %s does already exist in workspace -> '%s' (%s) or otherwise create it...",
16305
17945
  str(workspace_path),
16306
17946
  workspace_name,
16307
17947
  workspace_id,
@@ -16321,6 +17961,7 @@ class Payload:
16321
17961
  workspace_id,
16322
17962
  )
16323
17963
  success = False
17964
+ result["failure_counter"] += 1
16324
17965
  continue # for workspace in workspaces
16325
17966
  else:
16326
17967
  logger.info(
@@ -16394,6 +18035,7 @@ class Payload:
16394
18035
  "Coudn't dertermine workspace template ID and workspace type ID of sub-workspace!",
16395
18036
  )
16396
18037
  success = False
18038
+ result["failure_counter"] += 1
16397
18039
  continue # for workspace in workspaces
16398
18040
 
16399
18041
  # Check if we have categories for the sub-workspace:
@@ -16430,6 +18072,7 @@ class Payload:
16430
18072
  sub_workspace_name,
16431
18073
  )
16432
18074
  success = False
18075
+ result["failure_counter"] += 1
16433
18076
  continue # for workspace in workspaces
16434
18077
  # Now we create the sub-workspace:
16435
18078
  response = self._otcs_frontend.create_workspace(
@@ -16458,6 +18101,8 @@ class Payload:
16458
18101
  sub_workspace_type_id,
16459
18102
  )
16460
18103
  success = False
18104
+ parent_id = None
18105
+ result["failure_counter"] += 1
16461
18106
  continue # for workspace in workspaces
16462
18107
  else:
16463
18108
  logger.info(
@@ -16593,6 +18238,7 @@ class Payload:
16593
18238
  sub_workspace_id,
16594
18239
  )
16595
18240
  success = False
18241
+ result["failure_counter"] += 1
16596
18242
  continue # for workspace in workspaces
16597
18243
  else:
16598
18244
  logger.info(
@@ -16693,11 +18339,32 @@ class Payload:
16693
18339
  key,
16694
18340
  )
16695
18341
 
16696
- document_external_modify_date = self._otcs_frontend.get_result_value(
16697
- response, "external_modify_date"
18342
+ document_modify_date = self._otcs_frontend.get_result_value(
18343
+ response, "modify_date"
16698
18344
  )
16699
18345
 
16700
- if not document_id:
18346
+ # Check if we want to recreate an existing document:
18347
+ if document_id and "recreate" in row_operations:
18348
+ response = self._otcs_frontend.delete_node(
18349
+ node_id=document_id, purge=True
18350
+ )
18351
+ if not response:
18352
+ logger.error(
18353
+ "Failed to bulk recreate existing document -> '%s' (%s)! Delete failed.",
18354
+ document_name,
18355
+ document_id,
18356
+ )
18357
+ success = False
18358
+ result["failure_counter"] += 1
18359
+ continue
18360
+ result["delete_counter"] += 1
18361
+ document_id = None
18362
+
18363
+ # Check if document does not exists - then we create a new document
18364
+ # if this is requested ("create" value in operations list in payload)
18365
+ if not document_id and (
18366
+ "create" in row_operations or "recreate" in row_operations
18367
+ ):
16701
18368
  # The document does not exist in Extended ECM - so we
16702
18369
  # upload it now:
16703
18370
 
@@ -16770,14 +18437,28 @@ class Payload:
16770
18437
  parent_id,
16771
18438
  )
16772
18439
  success = False
18440
+ result["failure_counter"] += 1
16773
18441
  continue
16774
- # end if not workspace_id
16775
-
16776
- # If updates are enforced we update the existing document with
16777
- # a new document version and with fresh metadata from the payload:
16778
- elif enforce_updates and OTCS.date_is_newer(
16779
- date_old=document_external_modify_date,
16780
- date_new=external_modify_date,
18442
+ else:
18443
+ result["create_counter"] += 1
18444
+
18445
+ # end if not workspace_id and "create" in row_operations
18446
+
18447
+ # If updates are requested we update the existing document with
18448
+ # a new document version and with fresh metadata from the payload.
18449
+ # Additionally we check the external modify date to support
18450
+ # incremental load for content that has really changed.
18451
+ # In addition we check that "delete" is not requested as otherwise it will
18452
+ # never go in elif "delete" ... below (and it does not make sense to update a document
18453
+ # that is deleted in the next step...)
18454
+ elif (
18455
+ document_id
18456
+ and "update" in row_operations
18457
+ and "delete" not in row_operations # note the NOT !
18458
+ and OTCS.date_is_newer(
18459
+ date_old=document_modify_date,
18460
+ date_new=external_modify_date,
18461
+ )
16781
18462
  ):
16782
18463
  # If category data is in payload we substitute
16783
18464
  # the values with data from the current data row:
@@ -16817,11 +18498,12 @@ class Payload:
16817
18498
  )
16818
18499
  if not response:
16819
18500
  logger.error(
16820
- "Failed to add new version to document -> '%s' (%s)",
18501
+ "Failed to add new version to existing document -> '%s' (%s)",
16821
18502
  document_name,
16822
18503
  document_id,
16823
18504
  )
16824
18505
  success = False
18506
+ result["failure_counter"] += 1
16825
18507
  continue
16826
18508
  response = self._otcs_frontend.update_item(
16827
18509
  node_id=document_id,
@@ -16834,16 +18516,38 @@ class Payload:
16834
18516
  )
16835
18517
  if not response:
16836
18518
  logger.error(
16837
- "Failed to update metadata of document -> '%s' (%s) with metadata -> %s",
18519
+ "Failed to update metadata of existing document -> '%s' (%s) with metadata -> %s",
16838
18520
  document_name,
16839
18521
  document_id,
16840
18522
  str(document_category_data),
16841
18523
  )
16842
18524
  success = False
18525
+ result["failure_counter"] += 1
18526
+ continue
18527
+ else:
18528
+ result["update_counter"] += 1
18529
+ # end if workspace_id and "update" in row_operations
18530
+ elif document_id and "delete" in row_operations:
18531
+ # We delete with immediate purging to keep recycle bin clean
18532
+ # and to not run into issues with nicknames used in deleted items:
18533
+ response = self._otcs_frontend.delete_node(
18534
+ node_id=document_id, purge=True
18535
+ )
18536
+ if not response:
18537
+ logger.error(
18538
+ "Failed to bulk delete existing document -> '%s' (%s)!",
18539
+ document_name,
18540
+ document_id,
18541
+ )
18542
+ success = False
18543
+ result["failure_counter"] += 1
16843
18544
  continue
18545
+ result["delete_counter"] += 1
18546
+ document_id = None
18547
+
16844
18548
  # nickname has been calculated for existence test above
16845
18549
  # we now assign it to the new document
16846
- if nickname:
18550
+ if nickname and document_id:
16847
18551
  response = self._otcs_frontend.set_node_nickname(
16848
18552
  node_id=document_id, nickname=nickname, show_error=True
16849
18553
  )
@@ -16853,43 +18557,25 @@ class Payload:
16853
18557
  nickname,
16854
18558
  document_name,
16855
18559
  )
18560
+ if document_id is not None:
18561
+ result["success_counter"] += 1
18562
+ result["documents"][document_name] = document_id
16856
18563
 
16857
18564
  # end for workspaces
16858
18565
 
16859
18566
  if not success:
16860
- # check if the parent_id is set.
16861
- if parent_id is None:
16862
- parent_id = "could not get id"
16863
-
16864
- logger.error(
16865
- "Failed to bulk upload document -> '%s' to parent folder with ID -> %s!",
16866
- document_name,
16867
- parent_id,
16868
- )
16869
18567
  result["success"] = False
16870
- result["failure_counter"] += 1
16871
- elif (
16872
- document_id is not None
16873
- ): # it can be None if the workspace name failed to resolve
16874
- logger.info(
16875
- "Successfully uploaded bulk document -> '%s' with ID -> %s",
16876
- document_name,
16877
- document_id,
16878
- )
16879
- result["success_counter"] += 1
16880
- # Record the workspace name and ID to allow to read it from failure file
16881
- # and speedup the process.
16882
- result["documents"][document_name] = document_id
16883
- else:
16884
- logger.info(
16885
- "Bulk document -> '%s' was not uploaded to any workspace.",
16886
- document_name,
16887
- )
18568
+ if not document_name in result["documents"]:
18569
+ logger.info(
18570
+ "Bulk document -> '%s' was not uploaded to any workspace.",
18571
+ document_name,
18572
+ )
16888
18573
 
16889
18574
  # Make sure no temp documents are piling up except
16890
18575
  # we want it (e.g. if using cloud document storage):
16891
18576
  if os.path.exists(file_name) and delete_download:
16892
18577
  os.remove(file_name)
18578
+ # end for index, row in partition.iterrows()
16893
18579
 
16894
18580
  logger.info("End working...")
16895
18581
 
@@ -16906,7 +18592,7 @@ class Payload:
16906
18592
  index: int = 0,
16907
18593
  replacements: dict | None = None,
16908
18594
  additional_regex_list: list | None = None,
16909
- ):
18595
+ ) -> bool:
16910
18596
  """Wrapper method to process list of payload strings and replace placeholders (see next method)
16911
18597
 
16912
18598
  Args:
@@ -16941,7 +18627,7 @@ class Payload:
16941
18627
  self,
16942
18628
  input_string: str,
16943
18629
  row: pd.Series,
16944
- index: int | None = 0,
18630
+ index: int | None = 0, # don't use None here!
16945
18631
  replacements: dict | None = None,
16946
18632
  additional_regex_list: list | None = None,
16947
18633
  ) -> str:
@@ -16951,7 +18637,8 @@ class Payload:
16951
18637
  input_string (str): the string to replace placeholders in
16952
18638
  row (pd.Series): curent row (DataFrame series / row)
16953
18639
  index (int): Index for use if we encounter a list value.
16954
- If index is "None" then we return the complete list as value
18640
+ If index is "None" then we return the complete list as value.
18641
+ Otherwise we return the list item with the given index (0 = first element is the default).
16955
18642
  replacements (dict): Replacements to apply to given fields (dictionary key = field name)
16956
18643
  additional_regex_list (list, optional): These are not coming from the payload but dynamically
16957
18644
  added for special needs like determining the nicknames.
@@ -16986,11 +18673,11 @@ class Payload:
16986
18673
  # first we access the field in the row and handle the
16987
18674
  # exception that key may not be a valid column (KeyError):
16988
18675
  try:
16989
- # read the value of the column defined by key
18676
+ # read the value of the data frame column defined by key
16990
18677
  value = value[key]
16991
18678
  except KeyError as e:
16992
18679
  logger.warning(
16993
- "KeyError: Cannot replace field -> '%s'%s as the row does not have a column called '%s': %s",
18680
+ "KeyError: Cannot replace field -> '%s'%s as the data frame row does not have a column called '%s': %s",
16994
18681
  field_name,
16995
18682
  " (sub-key -> '{}')".format(key) if key != field_name else "",
16996
18683
  field_name,
@@ -17063,7 +18750,7 @@ class Payload:
17063
18750
  upper=upper,
17064
18751
  lower=lower,
17065
18752
  )
17066
- else:
18753
+ else: # we have a list, so we need to iterate
17067
18754
  for v in value:
17068
18755
  v = self.cleanup_value(
17069
18756
  cleanup_value=v,
@@ -17228,3 +18915,137 @@ class Payload:
17228
18915
  return evaluated_condition
17229
18916
 
17230
18917
  # end method definition
18918
+
18919
+ def process_avts_repositories(self, section_name: str = "avtsRepositories") -> bool:
18920
+ """Process Aviator Search repositories.
18921
+
18922
+ Args:
18923
+ section_name (str, optional): name of the section. It can be overridden
18924
+ for cases where multiple sections of same type
18925
+ are used (e.g. the "Post" sections). This
18926
+ name is also used for the "success" status
18927
+ files written to the Admin Personal Workspace
18928
+ Returns:
18929
+ bool: True if payload has been processed without errors, False otherwise
18930
+ """
18931
+
18932
+ if not self._avts_repositories:
18933
+ logger.info("Payload section -> '%s' is empty. Skipping...", section_name)
18934
+ return True
18935
+
18936
+ # If this payload section has been processed successfully before we
18937
+ # can return True and skip processing it once more:
18938
+ if self.check_status_file(section_name):
18939
+ return True
18940
+
18941
+ success: bool = True
18942
+
18943
+ self._avts.authenticate()
18944
+
18945
+ for payload_repo in self._avts_repositories:
18946
+
18947
+ if not payload_repo.get("enabled", True):
18948
+ continue
18949
+
18950
+ repository = self._avts.get_repo_by_name(name=payload_repo["name"])
18951
+
18952
+ if repository is None:
18953
+ logger.info(
18954
+ "Repository -> '%s' does not exist, creating it...",
18955
+ payload_repo["name"],
18956
+ )
18957
+
18958
+ if payload_repo.get("type", "Extended ECM") == "Extended ECM":
18959
+ repository = self._avts.repo_create_extended_ecm(
18960
+ name=payload_repo["name"],
18961
+ username=payload_repo["username"],
18962
+ password=payload_repo["password"],
18963
+ otcs_url=payload_repo["otcs_url"],
18964
+ otcs_api_url=payload_repo["otcs_api_url"],
18965
+ node_id=int(payload_repo["node_id"]),
18966
+ )
18967
+
18968
+ elif payload_repo["type"] == "Documentum":
18969
+ logger.warning("Not yet implemented")
18970
+ elif payload_repo["type"] == "MSTeams":
18971
+ repository = self._avts.repo_create_msteams(
18972
+ name=payload_repo["name"],
18973
+ client_id=payload_repo["client_id"],
18974
+ tenant_id=payload_repo["tenant_id"],
18975
+ certificate_file=payload_repo["certificate_file"],
18976
+ certificate_password=payload_repo["certificate_password"],
18977
+ index_attachments=payload_repo.get("index_attachments", True),
18978
+ index_call_recordings=payload_repo.get(
18979
+ "index_call_recordings", True
18980
+ ),
18981
+ index_message_replies=payload_repo.get(
18982
+ "index_message_replies", True
18983
+ ),
18984
+ index_user_chats=payload_repo.get("index_user_chats", True),
18985
+ )
18986
+ elif payload_repo["type"] == "SharePoint":
18987
+ repository = self._avts.repo_create_sharepoint(
18988
+ name=payload_repo["name"],
18989
+ client_id=payload_repo["client_id"],
18990
+ tenant_id=payload_repo["tenant_id"],
18991
+ certificate_file=payload_repo["certificate_file"],
18992
+ certificate_password=payload_repo["certificate_password"],
18993
+ sharepoint_url=payload_repo["sharepoint_url"],
18994
+ sharepoint_url_type=payload_repo["sharepoint_url_type"],
18995
+ sharepoint_mysite_url=payload_repo["sharepoint_mysite_url"],
18996
+ sharepoint_admin_url=payload_repo["sharepoint_admin_url"],
18997
+ index_user_profiles=payload_repo.get(
18998
+ "index_message_replies", False
18999
+ ),
19000
+ )
19001
+ else:
19002
+ logger.error(
19003
+ "Invalid repository type -> '%s' specified. Valid values are: Extended ECM, Documentum, MSTeams, SharePoint",
19004
+ payload_repo["type"],
19005
+ )
19006
+ success = False
19007
+ break
19008
+
19009
+ if repository is None:
19010
+ logger.error(
19011
+ "Creation of Search Aviator repository -> '%s' failed!",
19012
+ payload_repo["name"],
19013
+ )
19014
+ success = False
19015
+ else:
19016
+ logger.info(
19017
+ "Successfully created Search Aviator repository -> %s",
19018
+ payload_repo["name"],
19019
+ )
19020
+ logger.debug("%s", repository)
19021
+
19022
+ else:
19023
+ logger.info(
19024
+ "Search Aviator Repository -> '%s' already exists.",
19025
+ payload_repo["name"],
19026
+ )
19027
+
19028
+ # Start Crawling
19029
+ start_crawling = True if payload_repo.get("start", False) is True else False
19030
+
19031
+ if repository is not None and start_crawling:
19032
+ response = self._avts.start_crawling(repo_name=payload_repo["name"])
19033
+
19034
+ if response is None:
19035
+ logger.error(
19036
+ "Aviator Search start crawling on repository failed -> %s",
19037
+ payload_repo["name"],
19038
+ )
19039
+ success = False
19040
+ else:
19041
+ logger.info(
19042
+ "Aviator Search crawling started on repository -> %s",
19043
+ payload_repo["name"],
19044
+ )
19045
+ logger.debug("%s", response)
19046
+
19047
+ self.write_status_file(success, section_name, self._partitions)
19048
+
19049
+ return success
19050
+
19051
+ # end method definition