pyxecm 2.0.2__py3-none-any.whl → 2.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyxecm might be problematic. Click here for more details.

pyxecm/otcs.py CHANGED
@@ -27,10 +27,12 @@ import threading
27
27
  import time
28
28
  import urllib.parse
29
29
  import zipfile
30
+ from concurrent.futures import ThreadPoolExecutor
30
31
  from datetime import datetime, timezone
31
32
  from functools import cache
32
33
  from http import HTTPStatus
33
34
  from importlib.metadata import version
35
+ from queue import Empty, LifoQueue, Queue
34
36
 
35
37
  import requests
36
38
  import websockets
@@ -148,6 +150,38 @@ class OTCS:
148
150
  ITEM_TYPE_WORKFLOW_MAP = 128
149
151
  ITEM_TYPE_WORKFLOW_STATUS = 190
150
152
 
153
+ CONTAINER_ITEM_TYPES = [
154
+ ITEM_TYPE_FOLDER,
155
+ ITEM_TYPE_BUSINESS_WORKSPACE,
156
+ ITEM_TYPE_COMPOUND_DOCUMENT,
157
+ ITEM_TYPE_CLASSIFICATION,
158
+ VOLUME_TYPE_ENTERPRISE_WORKSPACE,
159
+ VOLUME_TYPE_CLASSIFICATION_VOLUME,
160
+ VOLUME_TYPE_CONTENT_SERVER_DOCUMENT_TEMPLATES,
161
+ ]
162
+
163
+ PERMISSION_TYPES = [
164
+ "see",
165
+ "see_contents",
166
+ "modify",
167
+ "edit_attributes",
168
+ "add_items",
169
+ "reserve",
170
+ "add_major_version",
171
+ "delete_versions",
172
+ "delete",
173
+ "edit_permissions",
174
+ ]
175
+ PERMISSION_ASSIGNEE_TYPES = [
176
+ "owner",
177
+ "group",
178
+ "public",
179
+ "custom",
180
+ ]
181
+
182
+ # The maximum length of an item name in OTCS:
183
+ MAX_ITEM_NAME_LENGTH = 248
184
+
151
185
  _config: dict
152
186
  _otcs_ticket = None
153
187
  _otds_ticket = None
@@ -165,6 +199,42 @@ class OTCS:
165
199
  ) # only 1 thread should handle the re-authentication
166
200
  _session_lock = threading.Lock()
167
201
 
202
+ @classmethod
203
+ def cleanse_item_name(cls, item_name: str, max_length: int | None = None) -> str:
204
+ """Cleanse the given name of an OTCS item.
205
+
206
+ Control for forbidden characters and check the item name length.
207
+
208
+ Args:
209
+ item_name (str):
210
+ The item name to cleanse.
211
+ max_length (int, optional):
212
+ A specific maximum length for custom cases.
213
+ If not provided we will use the default OTCS.MAX_ITEM_NAME_LENGTH.
214
+
215
+ Returns:
216
+ str:
217
+ The cleansed item name.
218
+
219
+ """
220
+
221
+ # If no custom max length is given we use the default:
222
+ if max_length is None:
223
+ max_length = OTCS.MAX_ITEM_NAME_LENGTH
224
+
225
+ # Item names for sure are not allowed to have ":":
226
+ item_name = item_name.replace(":", "")
227
+ # Item names for sure should not have leading or trailing spaces:
228
+ item_name = item_name.strip()
229
+ # Truncate the item name to 248 characters which is the maximum
230
+ # allowed length in Content Server
231
+ if len(item_name) > max_length:
232
+ item_name = item_name[:max_length]
233
+
234
+ return item_name
235
+
236
+ # end method definition
237
+
168
238
  @classmethod
169
239
  def date_is_newer(cls, date_old: str, date_new: str) -> bool:
170
240
  """Compare two dates, typically create or modification dates.
@@ -443,6 +513,7 @@ class OTCS:
443
513
  self._semaphore = threading.BoundedSemaphore(value=thread_number)
444
514
  self._last_session_renewal = 0
445
515
  self._use_numeric_category_identifier = use_numeric_category_identifier
516
+ self._executor = ThreadPoolExecutor(max_workers=thread_number)
446
517
 
447
518
  # end method definition
448
519
 
@@ -716,6 +787,21 @@ class OTCS:
716
787
 
717
788
  # end method definition
718
789
 
790
+ def clear_data(self) -> Data:
791
+ """Reset the data object to an empty data frame.
792
+
793
+ Returns:
794
+ Data:
795
+ Newly initialized data object.
796
+
797
+ """
798
+
799
+ self._data = Data(logger=self.logger)
800
+
801
+ return self._data
802
+
803
+ # end method definition
804
+
719
805
  def request_form_header(self) -> dict:
720
806
  """Deliver the request header used for the CRUD REST API calls.
721
807
 
@@ -858,6 +944,9 @@ class OTCS:
858
944
  # a cookie that is in process of being renewed
859
945
  # by another thread:
860
946
  with self._session_lock:
947
+ if not self.cookie():
948
+ self.logger.error("Cannot call -> %s - user is not authenticatd!", url)
949
+ return None
861
950
  # IMPORTANT: this needs to be a copy - dicts are mutable and
862
951
  # we need to preserve the old value to detect in reauthenticate()
863
952
  # if the cookie has been renewed already or not:
@@ -1436,7 +1525,7 @@ class OTCS:
1436
1525
  property_name: str = "properties",
1437
1526
  data_name: str = "data",
1438
1527
  ) -> list | None:
1439
- """Read an item value from the REST API response.
1528
+ """Read all values with a given key from the REST API response.
1440
1529
 
1441
1530
  This method handles the most common response structures delivered by the
1442
1531
  V2 REST API of Extended ECM. For more details, refer to the documentation at
@@ -1530,6 +1619,44 @@ class OTCS:
1530
1619
 
1531
1620
  # end method definition
1532
1621
 
1622
+ def get_result_values_iterator(
1623
+ self,
1624
+ response: dict,
1625
+ property_name: str = "properties",
1626
+ data_name: str = "data",
1627
+ ) -> iter:
1628
+ """Get an iterator object that can be used to traverse through OTCS responses.
1629
+
1630
+ This method handles the most common response structures delivered by the
1631
+ V2 REST API of Extended ECM. For more details, refer to the documentation at
1632
+ developer.opentext.com.
1633
+
1634
+ Args:
1635
+ response (dict):
1636
+ REST API response object.
1637
+ property_name (str, optional):
1638
+ Name of the sub-dictionary holding the actual values.
1639
+ Defaults to "properties".
1640
+ data_name (str, optional):
1641
+ Name of the sub-dictionary holding the data.
1642
+ Defaults to "data".
1643
+
1644
+ Returns:
1645
+ list | None:
1646
+ Value list of the item with the given key, or None if no value is found.
1647
+
1648
+ """
1649
+
1650
+ # First do some sanity checks:
1651
+ if not response:
1652
+ return
1653
+ if "results" not in response:
1654
+ return
1655
+
1656
+ yield from (item[data_name][property_name] for item in response["results"])
1657
+
1658
+ # end method definition
1659
+
1533
1660
  def is_configured(self) -> bool:
1534
1661
  """Check if the Content Server pod is configured to receive requests.
1535
1662
 
@@ -1671,11 +1798,8 @@ class OTCS:
1671
1798
  "Requesting OTCS ticket with existing OTDS ticket; calling -> %s",
1672
1799
  request_url,
1673
1800
  )
1674
- request_header = {
1675
- "Content-Type": "application/x-www-form-urlencoded",
1676
- "Accept": "application/json",
1677
- "OTDSTicket": self._otds_ticket,
1678
- }
1801
+ # Add the OTDS ticket to the request headers:
1802
+ request_header = REQUEST_FORM_HEADERS | {"OTDSTicket": self._otds_ticket}
1679
1803
 
1680
1804
  try:
1681
1805
  response = requests.get(
@@ -1923,7 +2047,7 @@ class OTCS:
1923
2047
  """
1924
2048
 
1925
2049
  request_url = self.config()["serverInfoUrl"]
1926
- request_header = self.request_form_header() # self.cookie()
2050
+ request_header = self.request_form_header()
1927
2051
 
1928
2052
  self.logger.debug(
1929
2053
  "Retrieve Content Server information; calling -> %s",
@@ -2018,57 +2142,141 @@ class OTCS:
2018
2142
 
2019
2143
  # end method definition
2020
2144
 
2021
- @cache
2022
- def get_user(self, name: str, show_error: bool = False) -> dict | None:
2023
- """Look up an Content Server user based on the login name.
2145
+ def get_users(
2146
+ self,
2147
+ where_type: int = 0,
2148
+ where_name: str | None = None,
2149
+ where_first_name: str | None = None,
2150
+ where_last_name: str | None = None,
2151
+ where_business_email: str | None = None,
2152
+ query_string: str | None = None,
2153
+ sort: str | None = None,
2154
+ limit: int = 20,
2155
+ page: int = 1,
2156
+ show_error: bool = False,
2157
+ ) -> dict | None:
2158
+ """Get a Content Server users based on different criterias.
2159
+
2160
+ The criterias can be combined.
2024
2161
 
2025
2162
  Args:
2026
- name (str):
2163
+ where_type (int, optional):
2164
+ Type ID of user:
2165
+ 0 - Regular User
2166
+ 17 - Service User
2167
+ Defaults to 0 -> (Regular User)
2168
+ where_name (str | None = None):
2027
2169
  Name of the user (login).
2170
+ where_first_name (str | None = None):
2171
+ First name of the user.
2172
+ where_last_name (str | None = None):
2173
+ Last name of the user.
2174
+ where_business_email (str | None = None):
2175
+ Business email address of the user.
2176
+ query_string (str | None = None):
2177
+ Filters the results, returning the users with the specified query string
2178
+ in any of the following fields: log-in name, first name, last name, email address,
2179
+ and groups with the specified query string in the group name.
2180
+ NOTE: query cannot be used together with any combination of: where_name,
2181
+ where_first_name, where_last_name, where_business_email.
2182
+ The query value will be used to perform a search within the log-in name,
2183
+ first name, last name and email address properties for users and group name
2184
+ for groups to see if that value is contained within any of those properties.
2185
+ This differs from the user search that is performed in Classic UI where it
2186
+ searches for a specific property that begins with the value provided by the user.
2187
+ sort (str | None = None):
2188
+ Order by named column (Using prefixes such as sort=asc_name or sort=desc_name).
2189
+ Format can be sort = id, sort = name, sort = first_name, sort = last_name,
2190
+ sort = group_id, sort = mailaddress. If the prefix of asc or desc is not used
2191
+ then asc will be assumed.
2192
+ Default is None.
2193
+ limit (int, optional):
2194
+ The maximum number of results per page (internal default is 10). OTCS does
2195
+ not allow values > 20 so this method adjusts values > 20 to 20.
2196
+ page (int, optional):
2197
+ The page number to retrieve.
2028
2198
  show_error (bool, optional):
2029
2199
  If True, treat as an error if the user is not found. Defaults to False.
2030
2200
 
2031
2201
  Returns:
2032
2202
  dict | None:
2033
- User information as a dictionary, or None if the user is not found.
2203
+ User information as a dictionary, or None if the user could not be found
2204
+ (e.g., because it doesn't exist).
2034
2205
 
2035
2206
  Example:
2036
2207
  ```json
2037
2208
  {
2038
2209
  'collection': {
2039
- 'paging': {...},
2040
- 'sorting': {...}
2210
+ 'paging': {
2211
+ 'limit': 10,
2212
+ 'page': 1,
2213
+ 'page_total': 1,
2214
+ 'range_max': 1,
2215
+ 'range_min': 1,
2216
+ 'total_count': 1
2217
+ },
2218
+ 'sorting': {
2219
+ 'sort': [
2220
+ {
2221
+ 'key': 'sort',
2222
+ 'value': 'asc_id'
2223
+ }
2224
+ ]
2225
+ }
2041
2226
  },
2042
2227
  'links': {
2043
- 'data': {...}
2228
+ 'data': {
2229
+ 'self': {
2230
+ 'body': '',
2231
+ 'content_type': '',
2232
+ 'href': '/api/v2/members?where_first_name=Peter',
2233
+ 'method': 'GET',
2234
+ 'name': ''
2235
+ }
2236
+ }
2044
2237
  },
2045
2238
  'results': [
2046
2239
  {
2047
2240
  'data': {
2048
- 'birth_date': None,
2049
- 'business_email': 'pramos@M365x61936377.onmicrosoft.com',
2050
- 'business_fax': None,
2051
- 'business_phone': None,
2052
- 'cell_phone': None,
2053
- 'deleted': False,
2054
- 'display_language': None,
2055
- 'first_name': 'Peter',
2056
- 'gender': None,
2057
- 'group_id': 8006,
2058
- 'home_address_1': None,
2059
- 'home_address_2': None,
2060
- 'home_fax': None,
2061
- 'home_phone': None,
2062
- 'id': 8123,
2063
- 'initials': None,
2064
- 'last_name': 'Ramos',
2065
- 'middle_name': None,
2066
- 'name': 'pramos',
2067
- 'name_formatted': 'Peter Ramos',
2068
- 'photo_id': 13981,
2069
- 'photo_url': 'api/v1/members/8123/photo?v=13981.1',
2070
- 'type': 0,
2071
- 'type_name': 'User'
2241
+ 'properties': {
2242
+ 'birth_date': None,
2243
+ 'business_email': 'pramos@M365x61936377.onmicrosoft.com',
2244
+ 'business_fax': None,
2245
+ 'business_phone': None,
2246
+ 'cell_phone': None,
2247
+ 'deleted': False,
2248
+ 'display_language': None,
2249
+ 'first_name': 'Peter',
2250
+ 'gender': None,
2251
+ 'group_id': 8006,
2252
+ 'home_address_1': None,
2253
+ 'home_address_2': None,
2254
+ 'home_fax': None,
2255
+ 'home_phone': None,
2256
+ 'id': 8123,
2257
+ 'initials': None,
2258
+ 'last_name': 'Ramos',
2259
+ 'middle_name': None,
2260
+ 'name': 'pramos',
2261
+ 'name_formatted': 'Peter Ramos',
2262
+ 'office_location': None,
2263
+ 'pager': None,
2264
+ 'personal_email': None,
2265
+ 'photo_id': 13981,
2266
+ 'photo_url': 'api/v1/members/8123/photo?v=13981.1',
2267
+ 'privilege_content_manager': False,
2268
+ 'privilege_grant_discovery': False,
2269
+ 'privilege_login': True,
2270
+ 'privilege_modify_groups': False,
2271
+ 'privilege_modify_users': False,
2272
+ 'privilege_public_access': True,
2273
+ 'privilege_system_admin_rights': False,
2274
+ 'privilege_user_admin_rights': False,
2275
+ 'time_zone': -1,
2276
+ 'title': 'Maintenance Planner',
2277
+ 'type': 0,
2278
+ 'type_name': 'User'
2279
+ }
2072
2280
  }
2073
2281
  }
2074
2282
  ]
@@ -2081,17 +2289,45 @@ class OTCS:
2081
2289
 
2082
2290
  """
2083
2291
 
2084
- # Add query parameters (these are NOT passed via JSon body!)
2085
- # type = 0 ==> User
2086
- query = {"where_type": 0, "where_name": name}
2292
+ # Add query parameters (embedded in the URL)
2293
+ # Using type = 0 for OTCS groups or type = 17 for service user:
2294
+ query = {}
2295
+ filter_string = " type -> 'service user'" if where_type == 17 else ""
2296
+ query["where_type"] = where_type
2297
+ if where_name:
2298
+ query["where_name"] = where_name
2299
+ filter_string += " login name -> '{}'".format(where_name) if where_name else ""
2300
+ if where_first_name:
2301
+ query["where_first_name"] = where_first_name
2302
+ filter_string += " first name -> '{}'".format(where_first_name) if where_first_name else ""
2303
+ if where_last_name:
2304
+ query["where_last_name"] = where_last_name
2305
+ filter_string += " last name -> '{}'".format(where_last_name) if where_last_name else ""
2306
+ if where_business_email:
2307
+ query["where_business_email"] = where_business_email
2308
+ filter_string += " business email -> '{}'".format(where_business_email) if where_business_email else ""
2309
+ if query_string:
2310
+ query["query"] = query_string
2311
+ filter_string += " query -> '{}'".format(query_string) if where_business_email else ""
2312
+ if sort:
2313
+ query["sort"] = sort
2314
+ if limit:
2315
+ if limit > 20:
2316
+ self.logger.warning(
2317
+ "Page limit for user query cannot be larger than 20. Adjusting from %d to 20.", limit
2318
+ )
2319
+ limit = 20
2320
+ query["limit"] = limit
2321
+ if page:
2322
+ query["page"] = page
2087
2323
  encoded_query = urllib.parse.urlencode(query=query, doseq=True)
2088
2324
  request_url = self.config()["membersUrlv2"] + "?{}".format(encoded_query)
2089
2325
 
2090
2326
  request_header = self.request_form_header()
2091
2327
 
2092
2328
  self.logger.debug(
2093
- "Get user with login name -> '%s'; calling -> %s",
2094
- name,
2329
+ "Get users%s; calling -> %s",
2330
+ " with{}".format(filter_string) if filter_string else "",
2095
2331
  request_url,
2096
2332
  )
2097
2333
 
@@ -2100,97 +2336,159 @@ class OTCS:
2100
2336
  method="GET",
2101
2337
  headers=request_header,
2102
2338
  timeout=None,
2103
- failure_message="Failed to get user with login -> '{}'".format(name),
2104
- warning_message="Couldn't find user with login -> '{}'".format(name),
2339
+ failure_message="Failed to get users{}".format(" with{}".format(filter_string) if filter_string else ""),
2340
+ warning_message="Couldn't find users{}".format(" with{}".format(filter_string) if filter_string else ""),
2105
2341
  show_error=show_error,
2106
2342
  )
2107
2343
 
2108
2344
  # end method definition
2109
2345
 
2110
- def add_user(
2346
+ def get_users_iterator(
2111
2347
  self,
2112
- name: str,
2113
- password: str,
2114
- first_name: str,
2115
- last_name: str,
2116
- email: str,
2117
- title: str,
2118
- base_group: int,
2119
- privileges: list | None = None,
2120
- user_type: int = 0,
2121
- ) -> dict | None:
2122
- """Add Content Server user.
2348
+ where_type: int = 0,
2349
+ where_name: str | None = None,
2350
+ where_first_name: str | None = None,
2351
+ where_last_name: str | None = None,
2352
+ where_business_email: str | None = None,
2353
+ query_string: str | None = None,
2354
+ sort: str | None = None,
2355
+ limit: int = 20,
2356
+ ) -> iter:
2357
+ """Get an iterator object that can be used to traverse OTCS users.
2358
+
2359
+ Filters can be applied that are given by the "where" and "query" parameters.
2360
+
2361
+ Using a generator avoids loading a large users into memory at once.
2362
+ Instead you can iterate over the potential large list of users.
2363
+
2364
+ Example usage:
2365
+ ```python
2366
+ users = otcs_object.get_users_iterator(where_type=0, limit=10)
2367
+ for user in users:
2368
+ logger.info(
2369
+ "Traversing user -> '%s' (%s)",
2370
+ otcs_object.get_result_value(response=user, key="name"),
2371
+ otcs_object.get_result_value(response=user, key="id"),
2372
+ )
2373
+ ```
2123
2374
 
2124
2375
  Args:
2125
- name (str): login name of the user
2126
- password (str): password of the user
2127
- first_name (str): first name of the user
2128
- last_name (str): last name of the user
2129
- email (str): email address of the user
2130
- title (str): title of the user
2131
- base_group (int): base group id of the user (e.g. department)
2132
- privileges (list, optional):
2133
- Possible values are Login, Public Access, Content Manager,
2134
- Modify Users, Modify Groups, User Admin Rights,
2135
- Grant Discovery, System Admin Rights
2136
- user_type (int, optional): id of user_type 0-User, 17-ServiceUser, ...
2376
+ where_type (int, optional):
2377
+ Type ID of user:
2378
+ 0 - Regular User
2379
+ 17 - Service User
2380
+ Defaults to 0 -> (Regular User)
2381
+ where_name (str | None = None):
2382
+ Name of the user (login).
2383
+ where_first_name (str | None = None):
2384
+ First name of the user.
2385
+ where_last_name (str | None = None):
2386
+ Last name of the user.
2387
+ where_business_email (str | None = None):
2388
+ Business email address of the user.
2389
+ query_string (str | None = None):
2390
+ Filters the results, returning the users with the specified query string
2391
+ in any of the following fields: log-in name, first name, last name, email address,
2392
+ and groups with the specified query string in the group name.
2393
+ NOTE: query cannot be used together with any combination of: where_name,
2394
+ where_first_name, where_last_name, where_business_email.
2395
+ The query value will be used to perform a search within the log-in name,
2396
+ first name, last name and email address properties for users and group name
2397
+ for groups to see if that value is contained within any of those properties.
2398
+ This differs from the user search that is performed in Classic UI where it
2399
+ searches for a specific property that begins with the value provided by the user.
2400
+ sort (str | None = None):
2401
+ Order by named column (Using prefixes such as sort=asc_name or sort=desc_name).
2402
+ Format can be sort = id, sort = name, sort = first_name, sort = last_name,
2403
+ sort = group_id, sort = mailaddress. If the prefix of asc or desc is not used
2404
+ then asc will be assumed.
2405
+ Default is None.
2406
+ limit (int, optional):
2407
+ The maximum number of results per page (internal default is 10). OTCS does
2408
+ not allow values > 20 so this method adjusts values > 20 to 20.
2137
2409
 
2138
2410
  Returns:
2139
- dict | None:
2140
- User information or None if the user couldn't be created
2141
- (e.g. because it exisits already).
2411
+ iter:
2412
+ A generator yielding one user per iteration.
2413
+ If the REST API fails, returns no value.
2142
2414
 
2143
2415
  """
2144
2416
 
2145
- if privileges is None:
2146
- privileges = ["Login", "Public Access"]
2417
+ # First we probe how many members we have:
2418
+ response = self.get_users(
2419
+ where_type=where_type,
2420
+ where_name=where_name,
2421
+ where_first_name=where_first_name,
2422
+ where_last_name=where_last_name,
2423
+ where_business_email=where_business_email,
2424
+ query_string=query_string,
2425
+ limit=1,
2426
+ page=1,
2427
+ )
2428
+ if not response or "results" not in response:
2429
+ # Don't return None! Plain return is what we need for iterators.
2430
+ # Natural Termination: If the generator does not yield, it behaves
2431
+ # like an empty iterable when used in a loop or converted to a list:
2432
+ return
2147
2433
 
2148
- user_post_body = {
2149
- "type": user_type,
2150
- "name": name,
2151
- "password": password,
2152
- "first_name": first_name,
2153
- "last_name": last_name,
2154
- "business_email": email,
2155
- "title": title,
2156
- "group_id": base_group,
2157
- "privilege_login": ("Login" in privileges),
2158
- "privilege_public_access": ("Public Access" in privileges),
2159
- "privilege_content_manager": ("Content Manager" in privileges),
2160
- "privilege_modify_users": ("Modify Users" in privileges),
2161
- "privilege_modify_groups": ("Modify Groups" in privileges),
2162
- "privilege_user_admin_rights": ("User Admin Rights" in privileges),
2163
- "privilege_grant_discovery": ("Grant Discovery" in privileges),
2164
- "privilege_system_admin_rights": ("System Admin Rights" in privileges),
2165
- }
2434
+ number_of_users = response["collection"]["paging"]["total_count"]
2435
+ if not number_of_users:
2436
+ self.logger.warning(
2437
+ "No users found! Cannot iterate over users.",
2438
+ )
2439
+ # Don't return None! Plain return is what we need for iterators.
2440
+ # Natural Termination: If the generator does not yield, it behaves
2441
+ # like an empty iterable when used in a loop or converted to a list:
2442
+ return
2166
2443
 
2167
- request_url = self.config()["membersUrlv2"]
2168
- request_header = self.request_form_header()
2444
+ # If the group has many members we need to go through all pages
2445
+ # Adding page_size - 1 ensures that any remainder from the division is
2446
+ # accounted for, effectively rounding up. Integer division (//) performs floor division,
2447
+ # giving the desired number of pages:
2448
+ total_pages = (number_of_users + limit - 1) // limit
2169
2449
 
2170
- self.logger.debug("Add user -> '%s'; calling -> %s", name, request_url)
2450
+ for page in range(1, total_pages + 1):
2451
+ # Get the next page of sub node items:
2452
+ response = self.get_users(
2453
+ where_type=where_type,
2454
+ where_name=where_name,
2455
+ where_first_name=where_first_name,
2456
+ where_last_name=where_last_name,
2457
+ where_business_email=where_business_email,
2458
+ query_string=query_string,
2459
+ sort=sort,
2460
+ limit=limit,
2461
+ page=page,
2462
+ )
2463
+ if not response or not response.get("results", None):
2464
+ self.logger.warning(
2465
+ "Failed to retrieve users (page -> %d)",
2466
+ page,
2467
+ )
2468
+ return
2171
2469
 
2172
- # Clear user cache
2173
- self.get_user.cache_clear()
2470
+ # Yield nodes one at a time:
2471
+ yield from response["results"]
2174
2472
 
2175
- return self.do_request(
2176
- url=request_url,
2177
- method="POST",
2178
- headers=request_header,
2179
- data=user_post_body,
2180
- timeout=None,
2181
- failure_message="Failed to add user -> '{}'".format(name),
2182
- )
2473
+ # end for page in range(1, total_pages + 1)
2183
2474
 
2184
2475
  # end method definition
2185
2476
 
2186
- def search_user(self, value: str, field: str = "where_name") -> dict | None:
2187
- """Find a user based on search criteria.
2477
+ @cache
2478
+ def get_user(self, name: str, user_type: int = 0, show_error: bool = False) -> dict | None:
2479
+ """Get a Content Server user based on the login name and type.
2188
2480
 
2189
2481
  Args:
2190
- value (str):
2191
- Field value to search for.
2192
- field (str):
2193
- User field to search with (e.g. "where_name", "where_first_name", "where_last_name").
2482
+ name (str):
2483
+ Name of the user (login).
2484
+ user_type (int, optional):
2485
+ Type ID of user:
2486
+ 0 - Regular User
2487
+ 17 - Service User
2488
+ Defaults to 0 -> (Regular User)
2489
+
2490
+ show_error (bool, optional):
2491
+ If True, treat as an error if the user is not found. Defaults to False.
2194
2492
 
2195
2493
  Returns:
2196
2494
  dict | None:
@@ -2201,11 +2499,162 @@ class OTCS:
2201
2499
  ```json
2202
2500
  {
2203
2501
  'collection': {
2204
- 'paging': {...},
2205
- 'sorting': {...}
2502
+ 'paging': {
2503
+ 'limit': 10,
2504
+ 'page': 1,
2505
+ 'page_total': 1,
2506
+ 'range_max': 1,
2507
+ 'range_min': 1,
2508
+ 'total_count': 1
2509
+ },
2510
+ 'sorting': {
2511
+ 'sort': [
2512
+ {
2513
+ 'key': 'sort',
2514
+ 'value': 'asc_id'
2515
+ }
2516
+ ]
2517
+ }
2206
2518
  },
2207
2519
  'links': {
2208
- 'data': {...}
2520
+ 'data': {
2521
+ 'self': {
2522
+ 'body': '',
2523
+ 'content_type': '',
2524
+ 'href': '/api/v2/members?where_first_name=Peter',
2525
+ 'method': 'GET',
2526
+ 'name': ''
2527
+ }
2528
+ }
2529
+ },
2530
+ 'results': [
2531
+ {
2532
+ 'data': {
2533
+ 'properties': {
2534
+ 'birth_date': None,
2535
+ 'business_email': 'pramos@M365x61936377.onmicrosoft.com',
2536
+ 'business_fax': None,
2537
+ 'business_phone': None,
2538
+ 'cell_phone': None,
2539
+ 'deleted': False,
2540
+ 'display_language': None,
2541
+ 'first_name': 'Peter',
2542
+ 'gender': None,
2543
+ 'group_id': 8006,
2544
+ 'home_address_1': None,
2545
+ 'home_address_2': None,
2546
+ 'home_fax': None,
2547
+ 'home_phone': None,
2548
+ 'id': 8123,
2549
+ 'initials': None,
2550
+ 'last_name': 'Ramos',
2551
+ 'middle_name': None,
2552
+ 'name': 'pramos',
2553
+ 'name_formatted': 'Peter Ramos',
2554
+ 'office_location': None,
2555
+ 'pager': None,
2556
+ 'personal_email': None,
2557
+ 'photo_id': 13981,
2558
+ 'photo_url': 'api/v1/members/8123/photo?v=13981.1',
2559
+ 'privilege_content_manager': False,
2560
+ 'privilege_grant_discovery': False,
2561
+ 'privilege_login': True,
2562
+ 'privilege_modify_groups': False,
2563
+ 'privilege_modify_users': False,
2564
+ 'privilege_public_access': True,
2565
+ 'privilege_system_admin_rights': False,
2566
+ 'privilege_user_admin_rights': False,
2567
+ 'time_zone': -1,
2568
+ 'title': 'Maintenance Planner',
2569
+ 'type': 0,
2570
+ 'type_name': 'User'
2571
+ }
2572
+ }
2573
+ }
2574
+ ]
2575
+ }
2576
+ ```
2577
+
2578
+ To access the (login) name of the first user found, use
2579
+ `["results"][0]["data"]["properties"]["name"]`.
2580
+ Alternatively, use the method `get_result_value(response, "name", 0)`.
2581
+
2582
+ """
2583
+
2584
+ # Add query parameters (embedded in the URL)
2585
+ # Using type = 0 for OTCS groups or type = 17 for service user:
2586
+ query = {"where_type": user_type, "where_name": name}
2587
+ encoded_query = urllib.parse.urlencode(query=query, doseq=True)
2588
+ request_url = self.config()["membersUrlv2"] + "?{}".format(encoded_query)
2589
+
2590
+ request_header = self.request_form_header()
2591
+
2592
+ self.logger.debug(
2593
+ "Get user with login name -> '%s'%s; calling -> %s",
2594
+ name,
2595
+ ", type -> 'service user'" if user_type == 17 else "",
2596
+ request_url,
2597
+ )
2598
+
2599
+ return self.do_request(
2600
+ url=request_url,
2601
+ method="GET",
2602
+ headers=request_header,
2603
+ timeout=None,
2604
+ failure_message="Failed to get user with login -> '{}' and type -> {}".format(name, user_type),
2605
+ warning_message="Couldn't find user with login -> '{}' and type -> {}".format(name, user_type),
2606
+ show_error=show_error,
2607
+ )
2608
+
2609
+ # end method definition
2610
+
2611
+ def search_user(self, value: str, field: str = "where_name") -> dict | None:
2612
+ """Find a user based on search criteria.
2613
+
2614
+ Args:
2615
+ value (str):
2616
+ Field value to search for.
2617
+ field (str):
2618
+ User field to search with (e.g. "where_type", "where_name",
2619
+ "where_first_name", "where_last_name", "where_business_email", "query").
2620
+
2621
+ Returns:
2622
+ dict | None:
2623
+ User information as a dictionary, or None if the user could not be found
2624
+ (e.g., because it doesn't exist).
2625
+
2626
+ Example:
2627
+ ```json
2628
+ {
2629
+ 'collection': {
2630
+ 'paging': {
2631
+ 'limit': 10,
2632
+ 'links': {'data': {...}},
2633
+ 'page': 1,
2634
+ 'page_total': 2,
2635
+ 'range_max': 10,
2636
+ 'range_min': 1,
2637
+ 'total_count': 11
2638
+ },
2639
+ 'sorting': {
2640
+ 'sort': [
2641
+ {
2642
+ 'key': 'sort',
2643
+ 'value': 'asc_id'
2644
+ }
2645
+ ]
2646
+ }
2647
+ },
2648
+ 'links': {
2649
+ 'data': {
2650
+ 'self': {
2651
+ 'body': '',
2652
+ 'content_type': '',
2653
+ 'href': '/api/v2/members?where_first_name=Peter',
2654
+ 'method': 'GET',
2655
+ 'name': ''
2656
+ }
2657
+ }
2209
2658
  },
2210
2659
  'results': [
2211
2660
  {
@@ -2231,7 +2680,23 @@ class OTCS:
2231
2680
  'middle_name': None,
2232
2681
  'name': 'dfoxhoven',
2233
2682
  'name_formatted': 'Deke Foxhoven',
2234
- ...
2683
+ 'office_location': None,
2684
+ 'pager': None,
2685
+ 'personal_email': None,
2686
+ 'photo_id': 17467,
2687
+ 'photo_url': 'api/v1/members/8123/photo?v=17467.1',
2688
+ 'privilege_content_manager': False,
2689
+ 'privilege_grant_discovery': False,
2690
+ 'privilege_login': True,
2691
+ 'privilege_modify_groups': False,
2692
+ 'privilege_modify_users': False,
2693
+ 'privilege_public_access': True,
2694
+ 'privilege_system_admin_rights': False,
2695
+ 'privilege_user_admin_rights': False,
2696
+ 'time_zone': -1,
2697
+ 'title': 'Contract Manager',
2698
+ 'type': 0,
2699
+ 'type_name': 'User'
2235
2700
  }
2236
2701
  }
2237
2702
  }
@@ -2264,13 +2729,100 @@ class OTCS:
2264
2729
 
2265
2730
  # end method definition
2266
2731
 
2732
+ def add_user(
2733
+ self,
2734
+ name: str,
2735
+ password: str,
2736
+ first_name: str,
2737
+ last_name: str,
2738
+ email: str,
2739
+ title: str,
2740
+ base_group: int,
2741
+ privileges: list | None = None,
2742
+ user_type: int = 0,
2743
+ ) -> dict | None:
2744
+ """Add Content Server user.
2745
+
2746
+ Args:
2747
+ name (str):
2748
+ The login name of the user.
2749
+ password (str):
2750
+ The password of the user.
2751
+ first_name (str):
2752
+ The first name of the user.
2753
+ last_name (str):
2754
+ The last name of the user.
2755
+ email (str):
2756
+ The email address of the user.
2757
+ title (str):
2758
+ The title of the user.
2759
+ base_group (int):
2760
+ The base group id of the user (e.g. department)
2761
+ privileges (list, optional):
2762
+ Possible values are Login, Public Access, Content Manager,
2763
+ Modify Users, Modify Groups, User Admin Rights,
2764
+ Grant Discovery, System Admin Rights
2765
+ user_type (int, optional):
2766
+ The ID of the user type. 0 = regular user, 17 = service user.
2767
+
2768
+ Returns:
2769
+ dict | None:
2770
+ User information or None if the user couldn't be created
2771
+ (e.g. because it exisits already).
2772
+
2773
+ """
2774
+
2775
+ if privileges is None:
2776
+ privileges = ["Login", "Public Access"]
2777
+
2778
+ user_post_body = {
2779
+ "type": user_type,
2780
+ "name": name,
2781
+ "password": password,
2782
+ "first_name": first_name,
2783
+ "last_name": last_name,
2784
+ "business_email": email,
2785
+ "title": title,
2786
+ "group_id": base_group,
2787
+ "privilege_login": ("Login" in privileges),
2788
+ "privilege_public_access": ("Public Access" in privileges),
2789
+ "privilege_content_manager": ("Content Manager" in privileges),
2790
+ "privilege_modify_users": ("Modify Users" in privileges),
2791
+ "privilege_modify_groups": ("Modify Groups" in privileges),
2792
+ "privilege_user_admin_rights": ("User Admin Rights" in privileges),
2793
+ "privilege_grant_discovery": ("Grant Discovery" in privileges),
2794
+ "privilege_system_admin_rights": ("System Admin Rights" in privileges),
2795
+ }
2796
+
2797
+ request_url = self.config()["membersUrlv2"]
2798
+ request_header = self.request_form_header()
2799
+
2800
+ self.logger.debug("Add user -> '%s'; calling -> %s", name, request_url)
2801
+
2802
+ # Clear user cache
2803
+ self.get_user.cache_clear()
2804
+
2805
+ return self.do_request(
2806
+ url=request_url,
2807
+ method="POST",
2808
+ headers=request_header,
2809
+ data=user_post_body,
2810
+ timeout=None,
2811
+ failure_message="Failed to add user -> '{}'".format(name),
2812
+ )
2813
+
2814
+ # end method definition
2815
+
2267
2816
  def update_user(self, user_id: int, field: str, value: str) -> dict | None:
2268
2817
  """Update a defined field for a user.
2269
2818
 
2270
2819
  Args:
2271
- user_id (int): ID of the user
2272
- value (str): field value
2273
- field (str): user field
2820
+ user_id (int):
2821
+ The ID of the user to update.
2822
+ field (str):
2823
+ The user data field to update.
2824
+ value (str):
2825
+ The new value for user data field.
2274
2826
 
2275
2827
  Returns:
2276
2828
  dict | None:
@@ -2656,30 +3208,252 @@ class OTCS:
2656
3208
 
2657
3209
  """
2658
3210
 
2659
- favorite_tab_post_body = {"name": tab_name, "order": str(order)}
3211
+ favorite_tab_post_body = {"name": tab_name, "order": str(order)}
3212
+
3213
+ request_url = self.config()["favoritesUrl"] + "/tabs"
3214
+ request_header = self.request_form_header()
3215
+
3216
+ self.logger.debug(
3217
+ "Adding favorite tab -> %s; calling -> %s",
3218
+ tab_name,
3219
+ request_url,
3220
+ )
3221
+
3222
+ return self.do_request(
3223
+ url=request_url,
3224
+ method="POST",
3225
+ headers=request_header,
3226
+ data=favorite_tab_post_body,
3227
+ timeout=None,
3228
+ failure_message="Failed to add favorite tab -> {}".format(tab_name),
3229
+ )
3230
+
3231
+ # end method definition
3232
+
3233
+ def get_groups(
3234
+ self,
3235
+ where_name: str | None = None,
3236
+ sort: str | None = None,
3237
+ limit: int = 20,
3238
+ page: int = 1,
3239
+ show_error: bool = False,
3240
+ ) -> dict | None:
3241
+ """Get a list of Content Server groups.
3242
+
3243
+ Args:
3244
+ where_name (str | None = None):
3245
+ The name of the group to look up.
3246
+ sort (str | None = None):
3247
+ Order by named column (Using prefixes such as sort=asc_name or sort=desc_name).
3248
+ Format can be sort = id, sort = name, sort = group_id.
3249
+ If the prefix of asc or desc is not used then asc will be assumed.
3250
+ Default is None.
3251
+ limit (int, optional):
3252
+ The maximum number of results per page (internal default is 10). OTCS does
3253
+ not allow values > 20 so this method adjusts values > 20 to 20.
3254
+ page (int, optional):
3255
+ The page number to retrieve.
3256
+ show_error (bool, optional):
3257
+ If True, treats the absence of the group as an error. Defaults to False.
3258
+
3259
+ Returns:
3260
+ dict | None:
3261
+ Group information as a dictionary, or None if the group is not found.
3262
+
3263
+ Example:
3264
+ ```json
3265
+ {
3266
+ 'collection': {
3267
+ 'paging': {
3268
+ 'limit': 10,
3269
+ 'page': 1,
3270
+ 'page_total': 1,
3271
+ 'range_max': 1,
3272
+ 'range_min': 1,
3273
+ 'total_count': 1
3274
+ },
3275
+ 'sorting': {
3276
+ 'sort': [
3277
+ {
3278
+ 'key': 'sort',
3279
+ 'value': 'asc_id'
3280
+ }
3281
+ ]
3282
+ }
3283
+ },
3284
+ 'links': {
3285
+ 'data': {
3286
+ 'self': {
3287
+ 'body': '',
3288
+ 'content_type': '',
3289
+ 'href': '/api/v2/members?where_name=Procurement&where_type=1',
3290
+ 'method': 'GET',
3291
+ 'name': ''
3292
+ }
3293
+ }
3294
+ },
3295
+ 'results': [
3296
+ {
3297
+ 'data': {
3298
+ 'properties': {
3299
+ 'deleted': False,
3300
+ 'id': 17649,
3301
+ 'initials': 'P',
3302
+ 'leader_id': None,
3303
+ 'name': 'Procurement',
3304
+ 'name_formatted': 'Procurement',
3305
+ 'type': 1,
3306
+ 'type_name': 'Group'
3307
+ }
3308
+ }
3309
+ }
3310
+ ]
3311
+ }
3312
+ ```
3313
+
3314
+ To access the ID of the first group found, use ["results"][0]["data"]["properties"]["id"].
3315
+ Or use the method get_result_value(response, key="id")
3316
+
3317
+ """
3318
+
3319
+ # Add query parameters (embedded in the URL)
3320
+ # Using type = 1 for OTCS groups:
3321
+ query = {"where_type": 1}
3322
+ if where_name:
3323
+ query["where_name"] = where_name
3324
+ if sort:
3325
+ query["sort"] = sort
3326
+ if limit:
3327
+ if limit > 20:
3328
+ self.logger.warning(
3329
+ "Page limit for group query cannot be larger than 20. Adjusting from %d to 20.", limit
3330
+ )
3331
+ limit = 20
3332
+ query["limit"] = limit
3333
+ if page:
3334
+ query["page"] = page
3335
+ encoded_query = urllib.parse.urlencode(query=query, doseq=True)
3336
+ request_url = self.config()["membersUrlv2"] + "?{}".format(encoded_query)
3337
+
3338
+ request_header = self.request_form_header()
3339
+
3340
+ self.logger.debug(
3341
+ "Get groups%s; calling -> %s",
3342
+ " with name -> '{}'".format(where_name) if where_name else "",
3343
+ request_url,
3344
+ )
3345
+
3346
+ return self.do_request(
3347
+ url=request_url,
3348
+ method="GET",
3349
+ headers=request_header,
3350
+ timeout=None,
3351
+ failure_message="Failed to get groups{}".format(
3352
+ " with name -> '{}'".format(where_name) if where_name else ""
3353
+ ),
3354
+ warning_message="Groups{} do not yet exist!".format(
3355
+ " with name -> '{}'".format(where_name) if where_name else ""
3356
+ ),
3357
+ show_error=show_error,
3358
+ )
3359
+
3360
+ # end method definition
3361
+
3362
+ def get_groups_iterator(
3363
+ self,
3364
+ where_name: str | None = None,
3365
+ sort: str | None = None,
3366
+ limit: int = 20,
3367
+ ) -> iter:
3368
+ """Get an iterator object that can be used to traverse OTCS groups.
3369
+
3370
+ Filters can be applied that are given by the "where" and "query" parameters.
3371
+
3372
+ Using a generator avoids loading a large number of groups into memory at once.
3373
+ Instead you can iterate over the potential large list of groups.
3374
+
3375
+ Example usage:
3376
+ ```python
3377
+ groups = otcs_object.get_groups_iterator(limit=10)
3378
+ for group in groups:
3379
+ logger.info(
3380
+ "Traversing group -> '%s' (%s)",
3381
+ otcs_object.get_result_value(response=group, key="name"),
3382
+ otcs_object.get_result_value(response=group, key="id"),
3383
+ )
3384
+ ```
3385
+
3386
+ Args:
3387
+ where_name (str | None = None):
3388
+ Name of the user (login).
3389
+ sort (str | None = None):
3390
+ Order by named column (Using prefixes such as sort=asc_name or sort=desc_name ).
3391
+ Format can be sort = id, sort = name, sort = group_id.
3392
+ If the prefix of asc or desc is not used then asc will be assumed.
3393
+ Default is None.
3394
+ limit (int, optional):
3395
+ The maximum number of results per page (internal default is 10). OTCS does
3396
+ not allow values > 20 so this method adjusts values > 20 to 20.
3397
+
3398
+ Returns:
3399
+ iter:
3400
+ A generator yielding one group per iteration.
3401
+ If the REST API fails, returns no value.
3402
+
3403
+ """
3404
+
3405
+ # First we probe how many members we have:
3406
+ response = self.get_groups(
3407
+ where_name=where_name,
3408
+ limit=1,
3409
+ page=1,
3410
+ )
3411
+ if not response or "results" not in response:
3412
+ # Don't return None! Plain return is what we need for iterators.
3413
+ # Natural Termination: If the generator does not yield, it behaves
3414
+ # like an empty iterable when used in a loop or converted to a list:
3415
+ return
3416
+
3417
+ number_of_users = response["collection"]["paging"]["total_count"]
3418
+ if not number_of_users:
3419
+ self.logger.warning(
3420
+ "No groups found! Cannot iterate over groups.",
3421
+ )
3422
+ # Don't return None! Plain return is what we need for iterators.
3423
+ # Natural Termination: If the generator does not yield, it behaves
3424
+ # like an empty iterable when used in a loop or converted to a list:
3425
+ return
3426
+
3427
+ # If the group has many members we need to go through all pages
3428
+ # Adding page_size - 1 ensures that any remainder from the division is
3429
+ # accounted for, effectively rounding up. Integer division (//) performs floor division,
3430
+ # giving the desired number of pages:
3431
+ total_pages = (number_of_users + limit - 1) // limit
2660
3432
 
2661
- request_url = self.config()["favoritesUrl"] + "/tabs"
2662
- request_header = self.request_form_header()
3433
+ for page in range(1, total_pages + 1):
3434
+ # Get the next page of sub node items:
3435
+ response = self.get_groups(
3436
+ where_name=where_name,
3437
+ sort=sort,
3438
+ limit=limit,
3439
+ page=page,
3440
+ )
3441
+ if not response or not response.get("results", None):
3442
+ self.logger.warning(
3443
+ "Failed to retrieve groups (page -> %d)",
3444
+ page,
3445
+ )
3446
+ return
2663
3447
 
2664
- self.logger.debug(
2665
- "Adding favorite tab -> %s; calling -> %s",
2666
- tab_name,
2667
- request_url,
2668
- )
3448
+ # Yield nodes one at a time:
3449
+ yield from response["results"]
2669
3450
 
2670
- return self.do_request(
2671
- url=request_url,
2672
- method="POST",
2673
- headers=request_header,
2674
- data=favorite_tab_post_body,
2675
- timeout=None,
2676
- failure_message="Failed to add favorite tab -> {}".format(tab_name),
2677
- )
3451
+ # end for page in range(1, total_pages + 1)
2678
3452
 
2679
3453
  # end method definition
2680
3454
 
2681
3455
  def get_group(self, name: str, show_error: bool = False) -> dict | None:
2682
- """Look up a Content Server group.
3456
+ """Get the Content Server group with a given name.
2683
3457
 
2684
3458
  Args:
2685
3459
  name (str):
@@ -2690,23 +3464,65 @@ class OTCS:
2690
3464
  Returns:
2691
3465
  dict | None:
2692
3466
  Group information as a dictionary, or None if the group is not found.
2693
- The returned information has the following structure:
3467
+
3468
+ Example:
3469
+ ```json
2694
3470
  {
2695
- "data": [
3471
+ 'collection': {
3472
+ 'paging': {
3473
+ 'limit': 10,
3474
+ 'page': 1,
3475
+ 'page_total': 1,
3476
+ 'range_max': 1,
3477
+ 'range_min': 1,
3478
+ 'total_count': 1
3479
+ },
3480
+ 'sorting': {
3481
+ 'sort': [
3482
+ {
3483
+ 'key': 'sort',
3484
+ 'value': 'asc_id'
3485
+ }
3486
+ ]
3487
+ }
3488
+ },
3489
+ 'links': {
3490
+ 'data': {
3491
+ 'self': {
3492
+ 'body': '',
3493
+ 'content_type': '',
3494
+ 'href': '/api/v2/members?where_name=Procurement&where_type=1',
3495
+ 'method': 'GET',
3496
+ 'name': ''
3497
+ }
3498
+ }
3499
+ },
3500
+ 'results': [
2696
3501
  {
2697
- "id": 0,
2698
- "name": "string",
2699
- ...
3502
+ 'data': {
3503
+ 'properties': {
3504
+ 'deleted': False,
3505
+ 'id': 17649,
3506
+ 'initials': 'P',
3507
+ 'leader_id': None,
3508
+ 'name': 'Procurement',
3509
+ 'name_formatted': 'Procurement',
3510
+ 'type': 1,
3511
+ 'type_name': 'Group'
3512
+ }
3513
+ }
2700
3514
  }
2701
3515
  ]
2702
3516
  }
3517
+ ```
2703
3518
 
2704
- To access the ID of the first group found, use ["data"][0]["id"].
3519
+ To access the ID of the first group found, use ["results"][0]["data"]["properties"]["id"].
3520
+ Or use the method get_result_value(response, key="id")
2705
3521
 
2706
3522
  """
2707
3523
 
2708
- # Add query parameters (these are NOT passed via JSon body!)
2709
- # type = 1 ==> Group
3524
+ # Add query parameters (embedded in the URL)
3525
+ # Using type = 1 for OTCS groups:
2710
3526
  query = {"where_type": 1, "where_name": name}
2711
3527
  encoded_query = urllib.parse.urlencode(query=query, doseq=True)
2712
3528
  request_url = self.config()["membersUrlv2"] + "?{}".format(encoded_query)
@@ -2804,10 +3620,6 @@ class OTCS:
2804
3620
 
2805
3621
  query = {}
2806
3622
  query["where_type"] = str(member_type)
2807
- if limit:
2808
- query["limit"] = limit
2809
- if page:
2810
- query["page"] = page
2811
3623
  if where_name:
2812
3624
  query["where_name"] = where_name
2813
3625
  if where_first_name:
@@ -2816,12 +3628,13 @@ class OTCS:
2816
3628
  query["where_last_name"] = where_last_name
2817
3629
  if where_business_email:
2818
3630
  query["where_business_email"] = where_business_email
2819
-
3631
+ if limit:
3632
+ query["limit"] = limit
3633
+ if page:
3634
+ query["page"] = page
2820
3635
  encoded_query = urllib.parse.urlencode(query=query, doseq=True)
2821
-
2822
- # default limit is 25 which may not be enough for groups with many members
2823
- # where_type = 1 makes sure we just get groups and not users
2824
3636
  request_url = self.config()["membersUrlv2"] + "/" + str(group) + "/members?{}".format(encoded_query)
3637
+
2825
3638
  request_header = self.request_form_header()
2826
3639
 
2827
3640
  self.logger.debug(
@@ -2856,8 +3669,8 @@ class OTCS:
2856
3669
 
2857
3670
  Filters can be applied that are given by the "where" parameters.
2858
3671
 
2859
- Using a generator avoids loading a large number of nodes into memory at once.
2860
- Instead you can iterate over the potential large list of related workspaces.
3672
+ Using a generator avoids loading a large number of group members into memory at once.
3673
+ Instead you can iterate over the potential large list of group members.
2861
3674
 
2862
3675
  Example usage:
2863
3676
  ```python
@@ -3669,7 +4482,8 @@ class OTCS:
3669
4482
  """Get a node based on the workspace ID (= node ID) and path (list of folder names).
3670
4483
 
3671
4484
  Args:
3672
- workspace_id (int): node ID of the workspace
4485
+ workspace_id (int):
4486
+ The node ID of the workspace.
3673
4487
  path (list):
3674
4488
  A list of container items (top down).
3675
4489
  The last item is name of to be retrieved item.
@@ -3871,8 +4685,10 @@ class OTCS:
3871
4685
  """Get a node based on the nickname.
3872
4686
 
3873
4687
  Args:
3874
- nickname (str): The nickname of the node.
3875
- show_error (bool): If True, treat as error if node is not found.
4688
+ nickname (str):
4689
+ The nickname of the node.
4690
+ show_error (bool):
4691
+ If True, treat as error if node is not found.
3876
4692
 
3877
4693
  Returns:
3878
4694
  dict | None:
@@ -4315,7 +5131,7 @@ class OTCS:
4315
5131
  The name of the attribute that includes the value to match with
4316
5132
  value (str):
4317
5133
  The lookup value that is matched agains the node attribute value.
4318
- attribute_set (str, optional):
5134
+ attribute_set (str | None, optional):
4319
5135
  The name of the attribute set
4320
5136
 
4321
5137
  Returns:
@@ -4343,7 +5159,7 @@ class OTCS:
4343
5159
  )
4344
5160
  if not category_schema:
4345
5161
  self.logger.debug(
4346
- "Node -> '%s' (%s) does not have category -> '%s'. Cannot lookup -> '%s'. Skipping...",
5162
+ "Node -> '%s' (%s) does not have category -> '%s'. Cannot lookup value -> '%s'. Skipping...",
4347
5163
  node_name,
4348
5164
  node_id,
4349
5165
  category,
@@ -4365,6 +5181,8 @@ class OTCS:
4365
5181
  )
4366
5182
  continue
4367
5183
  attribute_key = attribute_schema["key"]
5184
+ # Split the attribute key once (1) at the first underscore from the right.
5185
+ # rsplit delivers a list and [-1] delivers the last list item:
4368
5186
  attribute_id = attribute_key.rsplit("_", 1)[-1]
4369
5187
 
4370
5188
  if attribute_set:
@@ -4399,6 +5217,7 @@ class OTCS:
4399
5217
  attribute_value = cat_data.get(key)
4400
5218
  if not attribute_value:
4401
5219
  break
5220
+ # Is it a multi-value attribute (i.e. a list of values)?
4402
5221
  if isinstance(attribute_value, list):
4403
5222
  if value in attribute_value:
4404
5223
  # Create a "results" dict that is compatible with normal REST calls
@@ -4437,89 +5256,6 @@ class OTCS:
4437
5256
 
4438
5257
  # end method definition
4439
5258
 
4440
- def lookup_node_old(
4441
- self,
4442
- parent_node_id: int,
4443
- category: str,
4444
- attribute: str,
4445
- value: str,
4446
- ) -> dict | None:
4447
- """Lookup the node under a parent node that has a specified value in a category attribute.
4448
-
4449
- Args:
4450
- parent_node_id (int):
4451
- The node ID of the parent (typically folder or workspace).
4452
- category (str):
4453
- The name of the category.
4454
- attribute (str):
4455
- The name of the attribute that includes the value to match with
4456
- value (str):
4457
- The lookup value that is matched agains the node attribute value.
4458
-
4459
- Returns:
4460
- dict | None:
4461
- Node wrapped in dictionary with "results" key or None if the REST API fails.
4462
-
4463
- """
4464
-
4465
- # get_subnodes_iterator() returns a python generator that we use for iterating over all nodes
4466
- # in an efficient way avoiding to retrieve all nodes at once (which could be a large number):
4467
- for node in self.get_subnodes_iterator(
4468
- parent_node_id=parent_node_id,
4469
- fields=["properties", "categories"],
4470
- metadata=True,
4471
- ):
4472
- schema = node["metadata"]["categories"]
4473
- data = node["data"]["categories"]
4474
- for cat_data, cat_schema in zip(data, schema, strict=False):
4475
- data_values = list(cat_data.values())
4476
- schema_values = list(cat_schema.values())
4477
- # Schema has one additional element (the first one) representing
4478
- # the category object itself. This includes the name. We need
4479
- # to remove (pop) it from the schema list to make sure the schema list
4480
- # and the data list have the same number of items. Otherwise
4481
- # the following for loop with zip() would not properly align the
4482
- # two lists:
4483
- category_name = schema_values.pop(0)["name"]
4484
- # Set attributes (standing for the set itself, not it's contained attributes)
4485
- # are only in the schema values, not in the data values. We need to remove
4486
- # them as well to avoid mis-alignment:
4487
- schema_values = [schema_value for schema_value in schema_values if schema_value.get("persona") != "set"]
4488
- if category_name == category:
4489
- for attr_data, attr_schema in zip(
4490
- data_values,
4491
- schema_values,
4492
- strict=False,
4493
- ):
4494
- attr_name = attr_schema["name"]
4495
- if attr_name == attribute:
4496
- if isinstance(attr_data, list):
4497
- if value in attr_data:
4498
- # Create a "results" dict that is compatible with normal REST calls
4499
- # to not break get_result_value() method that may be called on the result:
4500
- return {"results": node}
4501
- elif value == attr_data:
4502
- # Create a results dict that is compatible with normal REST calls
4503
- # to not break get_result_value() method that may be called on the result:
4504
- return {"results": node}
4505
- # we can break here and continue with the next node
4506
- # as we had the right category but did not find the matching value
4507
- break
4508
- # end for cat_data, cat_schema in zip(data, schema)
4509
- # end for node in nodes
4510
-
4511
- self.logger.debug(
4512
- "Couldn't find a node with the value -> '%s' in the attribute -> '%s' of category -> '%s' in parent with node ID -> %s.",
4513
- value,
4514
- attribute,
4515
- category,
4516
- parent_node_id,
4517
- )
4518
-
4519
- return None
4520
-
4521
- # end method definition
4522
-
4523
5259
  def lookup_node_by_regex(
4524
5260
  self,
4525
5261
  parent_node_id: int,
@@ -4861,13 +5597,18 @@ class OTCS:
4861
5597
  node_id (int):
4862
5598
  ID of the node. You can use the get_volume() function below to
4863
5599
  to the node id for a volume.
4864
- name (str): New name of the node.
4865
- description (str): New description of the node.
4866
- name_multilingual (dict, optional): multi-lingual node names
4867
- description_multilingual (dict, optional): multi-lingual description
5600
+ name (str):
5601
+ New name of the node.
5602
+ description (str):
5603
+ New description of the node.
5604
+ name_multilingual (dict | None, optional):
5605
+ The multi-lingual node names.
5606
+ description_multilingual (dict | None, optional):
5607
+ The multi-lingual descriptions.
4868
5608
 
4869
5609
  Returns:
4870
- dict | None: Request response or None if the renaming fails.
5610
+ dict | None:
5611
+ Request response or None if the renaming fails.
4871
5612
 
4872
5613
  """
4873
5614
 
@@ -5020,27 +5761,306 @@ class OTCS:
5020
5761
 
5021
5762
  """
5022
5763
 
5023
- request_url = self.config()["recycleBinUrl"] + "/nodes/restore"
5024
- request_header = self.request_form_header()
5764
+ request_url = self.config()["recycleBinUrl"] + "/nodes/restore"
5765
+ request_header = self.request_form_header()
5766
+
5767
+ restore_data = {"ids": node_id} if isinstance(node_id, list) else {"ids": [node_id]}
5768
+
5769
+ self.logger.debug(
5770
+ "Restore node(s) with ID(s) -> %s from recycle bin; calling -> %s",
5771
+ str(node_id),
5772
+ request_url,
5773
+ )
5774
+
5775
+ return self.do_request(
5776
+ url=request_url,
5777
+ method="POST",
5778
+ headers=request_header,
5779
+ data=restore_data,
5780
+ timeout=None,
5781
+ failure_message="Failed to restore node(s) with ID(s) -> {} from the recycle bin".format(
5782
+ node_id,
5783
+ ),
5784
+ )
5785
+
5786
+ # end method definition
5787
+
5788
+ def get_node_audit(
5789
+ self,
5790
+ node_id: int,
5791
+ filter_event_type: int | None = None,
5792
+ filter_user_id: int | None = None,
5793
+ filter_date_start: str | None = None,
5794
+ filter_date_end: str | None = None,
5795
+ limit: int = 100,
5796
+ page: int = 1,
5797
+ sort: str = "desc_audit_date",
5798
+ ) -> dict | None:
5799
+ """Get the audit information for a given node ID.
5800
+
5801
+ Args:
5802
+ node_id (int):
5803
+ The ID of the node to get the audit for.
5804
+ filter_event_type (int | None, optional):
5805
+ Type of audit events to filter by. Possible values:
5806
+ - 9 : Permission Changed
5807
+ - 10 : Attribute Value Changed
5808
+ - 92 : Create from Copy
5809
+ - 264 : Classification Applied
5810
+ - 301 : Deployed from Warehouse
5811
+ - 416 : XML Import
5812
+ - 6000 : Content Sharing - Shared with external system
5813
+ - 6014 : Content Sharing - Share Coordinator changed
5814
+ - ...
5815
+ filter_user_id (int, optional):
5816
+ Filter audit events by user ID. Defaults to no filter.
5817
+ The date should be provided in YYYY-MM-DD notation. Time
5818
+ is not considered (only days)
5819
+ filter_date_start (str | None, optional):
5820
+ Filter audit events by start date. Defaults to no filter.
5821
+ The date should be provided in YYYY-MM-DD notation. Time
5822
+ is not considered (only days)
5823
+ filter_date_end (str | None, optional):
5824
+ Filter audit events by end date. Defaults to no filter.
5825
+ limit (int, optional):
5826
+ The maximum number of results to return. Defaults to 100.
5827
+ page (int, optional):
5828
+ The page of results to retrieve. Defaults to 1 (first page).
5829
+ sort (str, optional):
5830
+ Sort order of audit results. Format can be sort=desc_audit_date or sort=asc_audit_date.
5831
+ Results are sorted in descending order by default.
5832
+
5833
+ Returns:
5834
+ dict | None:
5835
+ Subnode information as a dictionary, or None if no nodes with
5836
+ the given parent ID are found.
5837
+
5838
+ Example:
5839
+ {
5840
+ 'collection': {
5841
+ 'paging': {
5842
+ 'limit': 100,
5843
+ 'page': 1,
5844
+ 'page_total': 1,
5845
+ 'range_max': 23,
5846
+ 'range_min': 1,
5847
+ 'total_count': 23
5848
+ },
5849
+ 'sorting': {
5850
+ 'sort': [
5851
+ {
5852
+ 'key': 'sort',
5853
+ 'value': 'desc_audit_date'
5854
+ }
5855
+ ]
5856
+ }
5857
+ },
5858
+ 'links': {
5859
+ 'data': {
5860
+ 'self': {
5861
+ 'body': '',
5862
+ 'content_type': '',
5863
+ 'href': '/api/v2/nodes/29572/audit?fields=properties&limit=100&sort=desc_audit_date',
5864
+ 'method': 'GET',
5865
+ 'name': ''
5866
+ }
5867
+ }
5868
+ },
5869
+ 'results': {
5870
+ 'data': {
5871
+ 'audit': [
5872
+ {
5873
+ 'id': 29572,
5874
+ 'event_type': 6000,
5875
+ 'audit_date': '2025-05-23T10:20:56Z',
5876
+ 'user_id': 8306,
5877
+ 'agent_id': None,
5878
+ 'audit_language_code': None,
5879
+ 'target_user_id': None,
5880
+ 'audit_name': 'Shared with Microsoft Teams Content Sharing Provider'
5881
+ },
5882
+ ...
5883
+ ],
5884
+ 'audit_event_types': [
5885
+ {
5886
+ 'id': 92,
5887
+ 'name': 'Create from Copy'
5888
+ },
5889
+ {
5890
+ 'id': 6014,
5891
+ 'name': 'Content Sharing - Share Coordinators Changed'
5892
+ },
5893
+ {
5894
+ 'id': 301,
5895
+ 'name': 'Deployed from Warehouse'
5896
+ },
5897
+ ...
5898
+ ]
5899
+ }
5900
+ }
5901
+ }
5902
+
5903
+ """
5904
+
5905
+ # Add query parameters (these are NOT passed via JSon body!)
5906
+ query = {"limit": limit, "sort": sort}
5907
+ if filter_event_type:
5908
+ query["where_type"] = filter_event_type
5909
+ if filter_user_id:
5910
+ query["where_user_id"] = filter_user_id
5911
+ if filter_date_start:
5912
+ query["where_audit_date_start"] = filter_date_start
5913
+ if filter_date_end:
5914
+ query["where_audit_date_end"] = filter_date_end
5915
+ if page > 1:
5916
+ query["page"] = page
5917
+
5918
+ encoded_query = urllib.parse.urlencode(query=query, doseq=True)
5919
+
5920
+ request_url = self.config()["nodesUrlv2"] + "/" + str(node_id) + "/audit" + "?{}".format(encoded_query)
5921
+
5922
+ request_header = self.request_form_header()
5923
+
5924
+ self.logger.debug(
5925
+ "Get audit of node with ID -> %s (page -> %d, item limit -> %d); calling -> %s",
5926
+ str(node_id),
5927
+ page,
5928
+ limit,
5929
+ request_url,
5930
+ )
5931
+
5932
+ return self.do_request(
5933
+ url=request_url,
5934
+ method="GET",
5935
+ headers=request_header,
5936
+ timeout=None,
5937
+ failure_message="Failed to get audit for node with ID -> {}".format(
5938
+ node_id,
5939
+ ),
5940
+ )
5941
+
5942
+ # end method definition
5943
+
5944
+ def get_node_audit_iterator(
5945
+ self,
5946
+ node_id: int,
5947
+ filter_event_type: int | None = None,
5948
+ filter_user_id: int | None = None,
5949
+ filter_date_start: str | None = None,
5950
+ filter_date_end: str | None = None,
5951
+ page_size: int = 25,
5952
+ sort: str = "desc_audit_date",
5953
+ ) -> iter:
5954
+ """Get an iterator object that can be used to traverse subnodes.
5955
+
5956
+ Filters can be applied that are given by the "filter" parameters.
5957
+
5958
+ Using a generator avoids loading a large number of nodes into memory at once.
5959
+ Instead you can iterate over the potential large list of subnodes.
5960
+
5961
+ Example usage:
5962
+ ```python
5963
+ audit_entries = otcs_object.get_node_audit_iterator(node_id=15838)
5964
+ for audit_entry in audit_entries:
5965
+ logger.info("Audit entry -> '%s'", ...)
5966
+ ```
5967
+
5968
+ Args:
5969
+ node_id (int):
5970
+ The ID of the node to get the audit for.
5971
+ filter_event_type (int, optional):
5972
+ Type of audit events to filter by. Possible values:
5973
+ - 9 : Permission Changed
5974
+ - 10 : Attribute Value Changed
5975
+ - 92 : Create from Copy
5976
+ - 264 : Classification Applied
5977
+ - 301 : Deployed from Warehouse
5978
+ - 416 : XML Import
5979
+ - 6000 : Content Sharing - Shared with external system
5980
+ - 6014 : Content Sharing - Share Coordinator changed
5981
+ - ...
5982
+ filter_user_id (int, optional):
5983
+ Filter audit events by user ID. Defaults to no filter.
5984
+ The date should be provided in YYYY-MM-DD notation. Time
5985
+ is not considered (only days)
5986
+ filter_date_start (str, optional):
5987
+ Filter audit events by start date. Defaults to no filter.
5988
+ The date should be provided in YYYY-MM-DD notation. Time
5989
+ is not considered (only days)
5990
+ filter_date_end (str, optional):
5991
+ Filter audit events by end date. Defaults to no filter.
5992
+ limit (int, optional):
5993
+ The maximum number of results to return. Defaults to 100.
5994
+ page (int, optional):
5995
+ The page of results to retrieve. Defaults to 1 (first page).
5996
+ sort (str, optional):
5997
+ Sort order of audit results. Format can be sort=desc_audit_date or sort=asc_audit_date.
5998
+ Results are sorted in descending order by default.
5999
+ page_size (int, optional):
6000
+ The number of subnodes that are requested per page.
6001
+ For the iterator this is basically the chunk size.
6002
+
6003
+ Returns:
6004
+ iter:
6005
+ A generator yielding one node per iteration under the parent.
6006
+ If the REST API fails, returns no value.
6007
+
6008
+ """
6009
+
6010
+ response = self.get_node_audit(
6011
+ node_id=node_id,
6012
+ filter_event_type=filter_event_type,
6013
+ filter_user_id=filter_user_id,
6014
+ filter_date_start=filter_date_start,
6015
+ filter_date_end=filter_date_end,
6016
+ )
6017
+ if (
6018
+ not response
6019
+ or "collection" not in response
6020
+ or "paging" not in response["collection"]
6021
+ or not response["collection"]["paging"].get("total_count")
6022
+ ):
6023
+ self.logger.debug(
6024
+ "Item with node ID -> %s has no audit information! Cannot iterate audit.",
6025
+ str(node_id),
6026
+ )
6027
+ # Don't return None! Plain return is what we need for iterators.
6028
+ # Natural Termination: If the generator does not yield, it behaves
6029
+ # like an empty iterable when used in a loop or converted to a list:
6030
+ return
5025
6031
 
5026
- restore_data = {"ids": node_id} if isinstance(node_id, list) else {"ids": [node_id]}
6032
+ audit_size = response["collection"]["paging"]["total_count"]
5027
6033
 
5028
- self.logger.debug(
5029
- "Restore node(s) with ID(s) -> %s from recycle bin; calling -> %s",
5030
- str(node_id),
5031
- request_url,
5032
- )
6034
+ # If the container has many items we need to go through all pages
6035
+ # Adding page_size - 1 ensures that any remainder from the division is
6036
+ # accounted for, effectively rounding up. Integer division (//) performs floor division,
6037
+ # giving the desired number of pages:
6038
+ total_pages = (audit_size + page_size - 1) // page_size
5033
6039
 
5034
- return self.do_request(
5035
- url=request_url,
5036
- method="POST",
5037
- headers=request_header,
5038
- data=restore_data,
5039
- timeout=None,
5040
- failure_message="Failed to restore node(s) with ID(s) -> {} from the recycle bin".format(
5041
- node_id,
5042
- ),
5043
- )
6040
+ for page in range(1, total_pages + 1):
6041
+ # Get the next page of sub node items:
6042
+ response = self.get_node_audit(
6043
+ node_id=node_id,
6044
+ filter_event_type=filter_event_type,
6045
+ filter_user_id=filter_user_id,
6046
+ filter_date_start=filter_date_start,
6047
+ filter_date_end=filter_date_end,
6048
+ limit=page_size,
6049
+ page=page,
6050
+ sort=sort,
6051
+ )
6052
+ if not response or not response.get("results", None):
6053
+ self.logger.warning(
6054
+ "Failed to retrieve audit for node ID -> %d (page -> %d)",
6055
+ node_id,
6056
+ page,
6057
+ )
6058
+ return None
6059
+
6060
+ # Yield nodes one at a time
6061
+ yield from response["results"]["data"]["audit"]
6062
+
6063
+ # end for page in range(1, total_pages + 1)
5044
6064
 
5045
6065
  # end method definition
5046
6066
 
@@ -5123,11 +6143,14 @@ class OTCS:
5123
6143
  """Get Volume information based on the volume type ID.
5124
6144
 
5125
6145
  Args:
5126
- volume_type (int): ID of the volume type
5127
- timeout (int, optional): timeout for the request in seconds
6146
+ volume_type (int):
6147
+ The ID of the volume type.
6148
+ timeout (int, optional):
6149
+ The timeout for the request in seconds.
5128
6150
 
5129
6151
  Returns:
5130
- dict | None: Volume Details or None if volume is not found.
6152
+ dict | None:
6153
+ Volume details or None if volume is not found.
5131
6154
 
5132
6155
  Example:
5133
6156
  ["results"]["data"]["properties"]["id"] is the node ID of the volume.
@@ -5411,7 +6434,7 @@ class OTCS:
5411
6434
  "12508_9": "MS Word", # Text drop-down
5412
6435
  }
5413
6436
  }
5414
- classifications (list):
6437
+ classifications (list | None, optional):
5415
6438
  List of classification item IDs to apply to the new item.
5416
6439
  description (str, optional):
5417
6440
  A description of the document.
@@ -5861,6 +6884,158 @@ class OTCS:
5861
6884
 
5862
6885
  # end method definition
5863
6886
 
6887
+ def get_document_versions(self, node_id: str) -> list | None:
6888
+ """Get a list of the document versions of a document node.
6889
+
6890
+ Args:
6891
+ node_id (str):
6892
+ Node ID of the document.
6893
+
6894
+ Returns:
6895
+ list | None:
6896
+ The list of document versions.
6897
+
6898
+ Example:
6899
+ {
6900
+ 'links': {'data': {...}},
6901
+ 'results': [
6902
+ {
6903
+ 'data': {
6904
+ 'versions': {
6905
+ 'create_date': '2025-06-07T05:29:22Z',
6906
+ 'description': '',
6907
+ 'external_create_date': None,
6908
+ 'external_identity': '',
6909
+ 'external_identity_type': '',
6910
+ 'external_modify_date': '2025-06-05T10:06:02',
6911
+ 'external_source': 'file_system',
6912
+ 'file_create_date': '2025-06-07T05:29:22Z',
6913
+ 'file_modify_date': '2025-06-05T10:06:02Z',
6914
+ 'file_name': 'OpenText-PPT-Presentation-FY25-LIGHT-FINAL.pptx',
6915
+ 'file_size': 4057237,
6916
+ 'file_type': 'pptx',
6917
+ 'has_generation': False,
6918
+ 'id': 107044,
6919
+ 'locked': False,
6920
+ 'locked_date': None,
6921
+ 'locked_user_id': None,
6922
+ 'mime_type': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
6923
+ 'modify_date': '2025-06-07T05:29:22Z',
6924
+ 'name': 'OpenText-PPT-Presentation-FY25-LIGHT-FINAL.pptx',
6925
+ 'owner_id': 1000,
6926
+ 'provider_id': 103563,
6927
+ 'version_id': 103564,
6928
+ 'version_number': 2,
6929
+ 'version_number_major': 0,
6930
+ 'version_number_minor': 2,
6931
+ 'version_number_name': '2'
6932
+ }
6933
+ }
6934
+ }
6935
+ ]
6936
+ }
6937
+
6938
+ """
6939
+
6940
+ request_url = self.config()["nodesUrlv2"] + "/" + str(node_id) + "/versions"
6941
+ request_header = self.request_form_header()
6942
+
6943
+ self.logger.debug(
6944
+ "Get a list of all versions of document with node ID -> %s; calling -> %s",
6945
+ str(node_id),
6946
+ request_url,
6947
+ )
6948
+
6949
+ return self.do_request(
6950
+ url=request_url,
6951
+ method="GET",
6952
+ headers=request_header,
6953
+ timeout=None,
6954
+ failure_message="Failed to get list of versions of document with node ID -> {}".format(
6955
+ str(node_id),
6956
+ ),
6957
+ )
6958
+
6959
+ # end method definition
6960
+
6961
+ def get_document_version(self, node_id: str, version_number: int) -> dict | None:
6962
+ """Get a particular version of a document based on the version number.
6963
+
6964
+ The first version (oldest) typically has the number 1.
6965
+
6966
+ Args:
6967
+ node_id (str):
6968
+ Node ID of the document.
6969
+ version_number (int):
6970
+ The version number.
6971
+
6972
+ Returns:
6973
+ dict | None:
6974
+ The version data.
6975
+
6976
+ Example:
6977
+ {
6978
+ 'links': {'data': {...}},
6979
+ 'results': {
6980
+ 'data': {
6981
+ 'versions': {
6982
+ 'create_date': '2025-06-07T05:29:22Z',
6983
+ 'description': '',
6984
+ 'external_create_date': None,
6985
+ 'external_identity': '',
6986
+ 'external_identity_type': '',
6987
+ 'external_modify_date': '2025-06-05T10:06:02',
6988
+ 'external_source': 'file_system',
6989
+ 'file_create_date': '2025-06-07T05:29:22Z',
6990
+ 'file_modify_date': '2025-06-05T10:06:02Z',
6991
+ 'file_name': 'OpenText-PPT-Presentation-FY25-LIGHT-FINAL.pptx',
6992
+ 'file_size': 4057237,
6993
+ 'file_type': 'pptx',
6994
+ 'has_generation': False,
6995
+ 'id': 107044,
6996
+ 'locked': False,
6997
+ 'locked_date': None,
6998
+ 'locked_user_id': None,
6999
+ 'mime_type': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
7000
+ 'modify_date': '2025-06-07T05:29:22Z',
7001
+ 'name': 'OpenText-PPT-Presentation-FY25-LIGHT-FINAL.pptx',
7002
+ 'owner_id': 1000,
7003
+ 'provider_id': 103563,
7004
+ 'version_id': 103564,
7005
+ 'version_number': 2,
7006
+ 'version_number_major': 0,
7007
+ 'version_number_minor': 2,
7008
+ 'version_number_name': '2'
7009
+ }
7010
+ }
7011
+ }
7012
+ }
7013
+
7014
+ """
7015
+
7016
+ request_url = self.config()["nodesUrlv2"] + "/" + str(node_id) + "/versions/" + str(version_number)
7017
+ request_header = self.request_form_header()
7018
+
7019
+ self.logger.debug(
7020
+ "Get version -> %d of document with node ID -> %s; calling -> %s",
7021
+ version_number,
7022
+ str(node_id),
7023
+ request_url,
7024
+ )
7025
+
7026
+ return self.do_request(
7027
+ url=request_url,
7028
+ method="GET",
7029
+ headers=request_header,
7030
+ timeout=None,
7031
+ failure_message="Failed to get version -> {} of document with node ID -> {}".format(
7032
+ version_number,
7033
+ str(node_id),
7034
+ ),
7035
+ )
7036
+
7037
+ # end method definition
7038
+
5864
7039
  def get_latest_document_version(self, node_id: int) -> dict | None:
5865
7040
  """Get latest version of a document node based on the node ID.
5866
7041
 
@@ -5874,6 +7049,7 @@ class OTCS:
5874
7049
 
5875
7050
  """
5876
7051
 
7052
+ # This Method requires V1of the REST API!
5877
7053
  request_url = self.config()["nodesUrl"] + "/" + str(node_id) + "/versions/latest"
5878
7054
  request_header = self.request_form_header()
5879
7055
 
@@ -5895,6 +7071,63 @@ class OTCS:
5895
7071
 
5896
7072
  # end method definition
5897
7073
 
7074
+ def purge_document_versions(self, node_id: int, versions_to_keep: int = 1) -> dict | None:
7075
+ """Purge versions of a document based on the node ID of the document.
7076
+
7077
+ Args:
7078
+ node_id (int):
7079
+ The ID of the document node to purge versions for.
7080
+ versions_to_keep (int):
7081
+ Number of versions to keep (from the newest to the oldest).
7082
+ The minimum allowed number is 1. This is also the default.
7083
+ If 1 is provided it means to keep the nerwest version only.
7084
+
7085
+ Returns:
7086
+ dict | None:
7087
+ The result data or None if the request fails.
7088
+
7089
+ Example:
7090
+ {
7091
+ 'links': {'data': {...}},
7092
+ 'results': {}
7093
+ }
7094
+
7095
+ """
7096
+
7097
+ # Sanity check:
7098
+ if versions_to_keep < 1:
7099
+ self.logger.error("Purging to less than 1 version is not possible. The value -> %d is not valid!")
7100
+ return None
7101
+
7102
+ request_url = self.config()["nodesUrlv2"] + "/" + str(node_id) + "/versions"
7103
+ request_header = self.request_form_header()
7104
+
7105
+ purge_delete_body = {
7106
+ "number_to_keep": versions_to_keep,
7107
+ }
7108
+
7109
+ self.logger.debug(
7110
+ "Purge document versions down to the newest%s version%s of document with node ID -> %s; calling -> %s",
7111
+ " {}".format(versions_to_keep) if versions_to_keep > 1 else "",
7112
+ "s" if versions_to_keep > 1 else "",
7113
+ str(node_id),
7114
+ request_url,
7115
+ )
7116
+
7117
+ return self.do_request(
7118
+ url=request_url,
7119
+ method="DELETE",
7120
+ headers=request_header,
7121
+ data=purge_delete_body,
7122
+ timeout=None,
7123
+ failure_message="Failed to purge to {} versions of document with node ID -> {}".format(
7124
+ versions_to_keep,
7125
+ str(node_id),
7126
+ ),
7127
+ )
7128
+
7129
+ # end method definition
7130
+
5898
7131
  def get_document_content(
5899
7132
  self,
5900
7133
  node_id: int,
@@ -5945,7 +7178,7 @@ class OTCS:
5945
7178
  method="GET",
5946
7179
  headers=request_header,
5947
7180
  timeout=None,
5948
- failure_message="Failed to download document with node ID -> {}".format(
7181
+ failure_message="Failed to get content of document with node ID -> {}".format(
5949
7182
  node_id,
5950
7183
  ),
5951
7184
  parse_request_response=parse_request_response,
@@ -5969,16 +7202,19 @@ class OTCS:
5969
7202
  node_id: int,
5970
7203
  version_number: str = "",
5971
7204
  ) -> list | dict | None:
5972
- """Get document content from Extended ECM and read content as JSON.
7205
+ """Get document content from Content Server and parse content as JSON.
5973
7206
 
5974
7207
  Args:
5975
- node_id (int): The node ID of the document to download
5976
- version_number (str, optional): The version of the document to download.
5977
- If version = "" then download the latest
5978
- version.
7208
+ node_id (int):
7209
+ The node ID of the document to download
7210
+ version_number (str, optional):
7211
+ The version of the document to download.
7212
+ If version = "" then download the latest
7213
+ version.
5979
7214
 
5980
7215
  Returns:
5981
- list | dict | None: Content of the file or None in case of an error.
7216
+ list | dict | None:
7217
+ Content of the file or None in case of an error.
5982
7218
 
5983
7219
  """
5984
7220
 
@@ -5994,16 +7230,16 @@ class OTCS:
5994
7230
  self,
5995
7231
  node_id: int,
5996
7232
  file_path: str,
5997
- version_number: str = "",
7233
+ version_number: str | int = "",
5998
7234
  ) -> bool:
5999
- """Download a document from Extended ECM to local file system.
7235
+ """Download a document from OTCS to local file system.
6000
7236
 
6001
7237
  Args:
6002
7238
  node_id (int):
6003
7239
  The node ID of the document to download
6004
7240
  file_path (str):
6005
7241
  The local file path (directory).
6006
- version_number (str, optional):
7242
+ version_number (str | int, optional):
6007
7243
  The version of the document to download.
6008
7244
  If version = "" then download the latest version.
6009
7245
 
@@ -6525,14 +7761,17 @@ class OTCS:
6525
7761
  connection_name: str,
6526
7762
  show_error: bool = False,
6527
7763
  ) -> dict | None:
6528
- """Get Extended ECM external system connection (e.g. SAP, Salesforce, SuccessFactors).
7764
+ """Get external system connection (e.g. SAP, Salesforce, SuccessFactors).
6529
7765
 
6530
7766
  Args:
6531
- connection_name (str): Name of the connection
6532
- show_error (bool, optional): If True, treat as error if connection is not found.
7767
+ connection_name (str):
7768
+ The name of the connection to an external system.
7769
+ show_error (bool, optional):
7770
+ If True, treat as error if connection is not found.
6533
7771
 
6534
7772
  Returns:
6535
- dict | None: External system Details or None if the REST call fails.
7773
+ dict | None:
7774
+ External system Details or None if the REST call fails.
6536
7775
 
6537
7776
  """
6538
7777
  # Encode special characters in connection_name
@@ -6570,7 +7809,7 @@ class OTCS:
6570
7809
  base_url: str,
6571
7810
  username: str,
6572
7811
  password: str,
6573
- authentication_method: str = "BASIC", # either BASIC or OAUTH
7812
+ authentication_method: str = "BASIC",
6574
7813
  client_id: str | None = None,
6575
7814
  client_secret: str | None = None,
6576
7815
  ) -> dict | None:
@@ -6591,9 +7830,9 @@ class OTCS:
6591
7830
  The password (used for BASIC authentication)
6592
7831
  authentication_method (str, optional):
6593
7832
  Either BASIC (using username and password) or OAUTH.
6594
- client_id (str, optional):
7833
+ client_id (str | None, optional):
6595
7834
  The OAUTH Client ID (only required if authenticationMethod = OAUTH).
6596
- client_secret (str, optional):
7835
+ client_secret (str | None, optional):
6597
7836
  OAUTH Client Secret (only required if authenticationMethod = OAUTH).
6598
7837
 
6599
7838
  Returns:
@@ -6799,12 +8038,12 @@ class OTCS:
6799
8038
  Name of the transport package ZIP file.
6800
8039
  package_description (str, optional):
6801
8040
  Description of the transport package. Default is an empty string.
6802
- replacements (list of dicts, optional):
8041
+ replacements (list[dict] | None, optional):
6803
8042
  List of replacement values to be applied to all XML files in the transport.
6804
8043
  Each dictionary must contain:
6805
8044
  - 'placeholder': text to replace
6806
8045
  - 'value': text to replace with
6807
- extractions (list of dicts, optional):
8046
+ extractions (list[dict] | None, optional):
6808
8047
  List of XML subtrees to extract from each XML file in the transport.
6809
8048
  Each dictionary must contain:
6810
8049
  - 'xpath': defining the subtree to extract
@@ -7029,14 +8268,16 @@ class OTCS:
7029
8268
  """Search and replace strings in the XML files of the transport package.
7030
8269
 
7031
8270
  Args:
7032
- zip_file_path (str): Path to transport zip file.
7033
- replacements (list of dicts):
8271
+ zip_file_path (str):
8272
+ Path to transport zip file.
8273
+ replacements (list[dict]):
7034
8274
  List of replacement values; dict needs to have two values:
7035
8275
  - placeholder: The text to replace.
7036
8276
  - value: The replacement text.
7037
8277
 
7038
8278
  Returns:
7039
- bool: True = success, False = error.
8279
+ bool:
8280
+ True = success, False = error.
7040
8281
 
7041
8282
  """
7042
8283
 
@@ -7175,7 +8416,8 @@ class OTCS:
7175
8416
  """Search and extract XML data from the transport package.
7176
8417
 
7177
8418
  Args:
7178
- zip_file_path (str): Path to transport zip file.
8419
+ zip_file_path (str):
8420
+ Path to transport zip file.
7179
8421
  extractions (list of dicts):
7180
8422
  List of extraction values; dict needs to have two values:
7181
8423
  - xpath: structure to find
@@ -7419,9 +8661,9 @@ class OTCS:
7419
8661
  where_clauses (dict | None, optional):
7420
8662
  Filter the results based on one or multiple where clauses.
7421
8663
  TODO: NAME CONVENTION FOR THE FIELDS
7422
- limit (int, optional):
8664
+ limit (int | None, optional):
7423
8665
  The maximum number of result items.
7424
- page (int, optional):
8666
+ page (int | None, optional):
7425
8667
  The page number for a chunked result list.
7426
8668
 
7427
8669
  Returns:
@@ -7581,7 +8823,7 @@ class OTCS:
7581
8823
  """Get all workspace types configured in Extended ECM.
7582
8824
 
7583
8825
  This REST API is very limited. It does not return all workspace type properties
7584
- you can see in Extended ECM admin page.
8826
+ you can see in OTCS business admin page.
7585
8827
 
7586
8828
  Args:
7587
8829
  expand_workspace_info (bool, optional):
@@ -8393,11 +9635,11 @@ class OTCS:
8393
9635
  Args:
8394
9636
  workspace_id (int):
8395
9637
  The ID of the workspace.
8396
- external_system_id (str, optional):
9638
+ external_system_id (str | None, optional):
8397
9639
  Identifier of the external system (None if no external system).
8398
- bo_type (str, optional):
9640
+ bo_type (str | None, optional):
8399
9641
  Business object type (None if no external system)
8400
- bo_id (str, optional):
9642
+ bo_id (str | None, optional):
8401
9643
  Business object identifier / key (None if no external system)
8402
9644
  show_error (bool, optional):
8403
9645
  Log an error if workspace cration fails. Otherwise log a warning.
@@ -9213,10 +10455,12 @@ class OTCS:
9213
10455
  """Get the Workspace roles.
9214
10456
 
9215
10457
  Args:
9216
- workspace_id (int): ID of the workspace template or workspace
10458
+ workspace_id (int):
10459
+ The ID of the workspace template or workspace.
9217
10460
 
9218
10461
  Returns:
9219
- dict | None: Workspace Roles data or None if the request fails.
10462
+ dict | None:
10463
+ Workspace Roles data or None if the request fails.
9220
10464
 
9221
10465
  """
9222
10466
 
@@ -9245,11 +10489,14 @@ class OTCS:
9245
10489
  """Get the Workspace members of a given role.
9246
10490
 
9247
10491
  Args:
9248
- workspace_id (int): ID of the workspace template
9249
- role_id (int): ID of the role
10492
+ workspace_id (int):
10493
+ The ID of the workspace.
10494
+ role_id (int):
10495
+ The ID of the workspace role.
9250
10496
 
9251
10497
  Returns:
9252
- dict | None: Workspace member data or None if the request fails.
10498
+ dict | None:
10499
+ Workspace member data or None if the request fails.
9253
10500
 
9254
10501
  """
9255
10502
 
@@ -9268,7 +10515,9 @@ class OTCS:
9268
10515
  method="GET",
9269
10516
  headers=request_header,
9270
10517
  timeout=None,
9271
- failure_message="Failed to get workspace members",
10518
+ failure_message="Failed to get workspace members for workspace with ID -> {} and role with ID -> {}".format(
10519
+ workspace_id, role_id
10520
+ ),
9272
10521
  )
9273
10522
 
9274
10523
  # end method definition
@@ -9283,13 +10532,18 @@ class OTCS:
9283
10532
  """Add member to a workspace role. Check that the user/group is not yet a member.
9284
10533
 
9285
10534
  Args:
9286
- workspace_id (int): ID of the workspace
9287
- role_id (int): ID of the role
9288
- member_id (int): User ID or Group ID
9289
- show_warning (bool, optional): If True logs a warning if member is already in role
10535
+ workspace_id (int):
10536
+ The ID of the workspace.
10537
+ role_id (int):
10538
+ The ID of the workspace role.
10539
+ member_id (int):
10540
+ The user ID or group ID.
10541
+ show_warning (bool, optional):
10542
+ If True logs a warning if member is already in role.
9290
10543
 
9291
10544
  Returns:
9292
- dict | None: Workspace Role Membership or None if the request fails.
10545
+ dict | None:
10546
+ Workspace Role Membership or None if the request fails.
9293
10547
 
9294
10548
  """
9295
10549
 
@@ -9357,13 +10611,18 @@ class OTCS:
9357
10611
  """Remove a member from a workspace role. Check that the user is currently a member.
9358
10612
 
9359
10613
  Args:
9360
- workspace_id (int): ID of the workspace
9361
- role_id (int): ID of the role
9362
- member_id (int): User or Group Id
9363
- show_warning (bool, optional): If True logs a warning if member is not in role
10614
+ workspace_id (int):
10615
+ The ID of the workspace.
10616
+ role_id (int):
10617
+ The ID of the workspace role.
10618
+ member_id (int):
10619
+ The user or Group ID.
10620
+ show_warning (bool, optional):
10621
+ If True logs a warning if member is not in role.
9364
10622
 
9365
10623
  Returns:
9366
- dict | None: Workspace Role Membership or None if the request fails.
10624
+ dict | None:
10625
+ Workspace Role Membership or None if the request fails.
9367
10626
 
9368
10627
  """
9369
10628
 
@@ -9431,12 +10690,16 @@ class OTCS:
9431
10690
  """Remove all members from a workspace role. Check that the user is currently a member.
9432
10691
 
9433
10692
  Args:
9434
- workspace_id (int): ID of the workspace
9435
- role_id (int): ID of the role
9436
- show_warning (bool, optional): If True, logs a warning if member is not in role
10693
+ workspace_id (int):
10694
+ The ID of the workspace.
10695
+ role_id (int):
10696
+ The ID of the workspace role.
10697
+ show_warning (bool, optional):
10698
+ If True, logs a warning if member is not in role.
9437
10699
 
9438
10700
  Returns:
9439
- bool: True if success or False if the request fails.
10701
+ bool:
10702
+ True if success or False if the request fails.
9440
10703
 
9441
10704
  """
9442
10705
 
@@ -9478,9 +10741,12 @@ class OTCS:
9478
10741
  specifying whether to apply these permissions to the item itself, its sub-items, or both.
9479
10742
 
9480
10743
  Args:
9481
- workspace_id (int): ID of the workspace for which the role permissions are being assigned.
9482
- role_id (int): ID of the role to which the permissions will be assigned.
9483
- permissions (list of str): List of permissions to assign to the role. Valid permissions include:
10744
+ workspace_id (int):
10745
+ The ID of the workspace for which the role permissions are being assigned.
10746
+ role_id (int):
10747
+ The ID of the role to which the permissions will be assigned.
10748
+ permissions (list):
10749
+ List of permissions to assign to the role. Valid permissions include:
9484
10750
  - "see" : View the workspace
9485
10751
  - "see_contents" : View contents of the workspace
9486
10752
  - "modify" : Modify the workspace
@@ -9491,14 +10757,16 @@ class OTCS:
9491
10757
  - "delete_versions" : Delete versions of the workspace
9492
10758
  - "delete" : Delete the workspace
9493
10759
  - "edit_permissions" : Modify permissions for the workspace
9494
- apply_to (int, optional): Specifies the scope of permission assignment. Possible values:
10760
+ apply_to (int, optional):
10761
+ Specifies the scope of permission assignment. Possible values:
9495
10762
  - 0 = Apply to this item only
9496
10763
  - 1 = Apply to sub-items only
9497
10764
  - 2 = Apply to this item and its sub-items (default)
9498
10765
  - 3 = Apply to this item and its immediate sub-items
9499
10766
 
9500
10767
  Returns:
9501
- dict | None: Updated workspace role membership details or `None` if the request fails.
10768
+ dict | None:
10769
+ Updated workspace role membership details or `None` if the request fails.
9502
10770
 
9503
10771
  Notes:
9504
10772
  - If `apply_to` is set to `2`, both the workspace and its sub-items will inherit the updated permissions.
@@ -9549,12 +10817,16 @@ class OTCS:
9549
10817
  """Update a workspace with a with a new icon (which is uploaded).
9550
10818
 
9551
10819
  Args:
9552
- workspace_id (int): ID of the workspace
9553
- file_path (str): path + filename of icon file
9554
- file_mimetype (str, optional): mimetype of the image
10820
+ workspace_id (int):
10821
+ The ID of the workspace to update the icon for.
10822
+ file_path (str):
10823
+ The path + filename of icon file.
10824
+ file_mimetype (str, optional):
10825
+ The mimetype of the image.
9555
10826
 
9556
10827
  Returns:
9557
- dict | None: Node information or None if REST call fails.
10828
+ dict | None:
10829
+ Node information or None if REST call fails.
9558
10830
 
9559
10831
  """
9560
10832
 
@@ -9609,11 +10881,14 @@ class OTCS:
9609
10881
  """Get definition information for Unique Names.
9610
10882
 
9611
10883
  Args:
9612
- names (list): list of unique names to lookup.
9613
- subtype (int): filter unique names for those pointing to a specific subtype
10884
+ names (list):
10885
+ A list of unique names to lookup.
10886
+ subtype (int):
10887
+ A subtype ID to filter unique names to those pointing to a specific subtype.
9614
10888
 
9615
10889
  Returns:
9616
- dict | None: Unique name definition information or None if REST call fails.
10890
+ dict | None:
10891
+ Unique name definition information or None if REST call fails.
9617
10892
 
9618
10893
  Example:
9619
10894
  ```json
@@ -9878,12 +11153,12 @@ class OTCS:
9878
11153
  Address of the URL item (if it is an URL item type).
9879
11154
  category_data (dict | None, optional):
9880
11155
  New category and attributes values.
9881
- classifications (list):
11156
+ classifications (list | None, optional):
9882
11157
  List of classification item IDs to apply to the new item.
9883
- body (bool):
11158
+ body (bool, optional):
9884
11159
  Should the payload be put in an body tag. Most V2 REST API methods
9885
11160
  do require this but some not (like Scheduled Bots)
9886
- **kwargs (dict):
11161
+ **kwargs (dict, optional):
9887
11162
  Add additional attributes to the body of the POST request
9888
11163
 
9889
11164
  Returns:
@@ -9971,9 +11246,9 @@ class OTCS:
9971
11246
  Args:
9972
11247
  parent_id (int):
9973
11248
  The node the category should be applied to.
9974
- subtype (int):
11249
+ subtype (int, optional):
9975
11250
  The subtype of the new node. Default is document.
9976
- category_ids (int | list[int]):
11251
+ category_ids (int | list[int], optional):
9977
11252
  The ID of the category or a list of category IDs.
9978
11253
 
9979
11254
  Returns:
@@ -10273,7 +11548,7 @@ class OTCS:
10273
11548
  description: str = "",
10274
11549
  show_error: bool = True,
10275
11550
  ) -> dict | None:
10276
- """Create an Extended ECM wiki page.
11551
+ """Create an OTCS wiki page.
10277
11552
 
10278
11553
  Args:
10279
11554
  wiki_id (int):
@@ -10327,7 +11602,7 @@ class OTCS:
10327
11602
  # end method definition
10328
11603
 
10329
11604
  def get_web_report_parameters(self, nickname: str) -> list | None:
10330
- """Retrieve parameters of a Web Report in Extended ECM.
11605
+ """Retrieve parameters of a Web Report in OTCS.
10331
11606
 
10332
11607
  These parameters are defined on the Web Report node (Properties -> Parameters).
10333
11608
 
@@ -10384,14 +11659,17 @@ class OTCS:
10384
11659
  nickname: str,
10385
11660
  web_report_parameters: dict | None = None,
10386
11661
  ) -> dict | None:
10387
- """Run a Web Report that is identified by its nick name.
11662
+ """Run a Web Report that is identified by its nickname.
10388
11663
 
10389
11664
  Args:
10390
- nickname (str): nickname of the Web Reports node.
10391
- web_report_parameters (dict, optional): Parameters of the Web Report (names + value pairs)
11665
+ nickname (str):
11666
+ The nickname of the Web Reports node.
11667
+ web_report_parameters (dict, optional):
11668
+ Parameters of the Web Report (names + value pairs)
10392
11669
 
10393
11670
  Returns:
10394
- dict | None: Response of the run Web Report request or None if the Web Report execution has failed.
11671
+ dict | None:
11672
+ Response of the run Web Report request or None if the Web Report execution has failed.
10395
11673
 
10396
11674
  """
10397
11675
 
@@ -10403,7 +11681,7 @@ class OTCS:
10403
11681
  request_header = self.request_form_header()
10404
11682
 
10405
11683
  self.logger.debug(
10406
- "Running Web Report with nickname -> %s; calling -> %s",
11684
+ "Running Web Report with nickname -> '%s'; calling -> %s",
10407
11685
  nickname,
10408
11686
  request_url,
10409
11687
  )
@@ -10467,11 +11745,11 @@ class OTCS:
10467
11745
  ) -> dict | None:
10468
11746
  """Assign an Content Server item to users and groups.
10469
11747
 
10470
- This is a function used by Extended ECM for Government.
11748
+ This is a function used by OT Content Management for Government.
10471
11749
 
10472
11750
  Args:
10473
11751
  node_id (int):
10474
- The node ID of the Extended ECM item (e.g. a workspace or a document)
11752
+ The node ID of the OTCS item (e.g. a workspace or a document)
10475
11753
  subject (str):
10476
11754
  The title / subject of the assignment.
10477
11755
  instruction (str):
@@ -10598,28 +11876,19 @@ class OTCS:
10598
11876
  def assign_permission(
10599
11877
  self,
10600
11878
  node_id: int,
10601
- assignee_type: str,
10602
- assignee: int,
10603
11879
  permissions: list,
11880
+ assignee_type: str,
11881
+ assignee: int = 0,
10604
11882
  apply_to: int = 0,
10605
11883
  ) -> dict | None:
10606
- """Assign permissions to a user or group for an Extended ECM item.
11884
+ """Assign permissions to a user or group for an Content Server item.
10607
11885
 
10608
11886
  This method allows you to assign specified permissions to a user or group for a given
10609
11887
  Content Server item (node). The permissions can be applied to the item itself, its sub-items,
10610
11888
  or both.
10611
11889
 
10612
11890
  Args:
10613
- node_id (int): The ID of the Extended ECM item (node) to which permissions are being assigned.
10614
- assignee_type (str): The type of assignee. This can be one of the following:
10615
- - "owner": Permissions are assigned to the owner.
10616
- - "group": Permissions are assigned to the owner group.
10617
- - "public": Permissions are assigned to the public (all users).
10618
- - "custom": Permissions are assigned to a specific user or group (specified by `assignee`).
10619
- assignee (int):
10620
- The ID of the user or group (referred to as "right ID").
10621
- If `assignee` is 0 and `assignee_type` is "owner" or "group",
10622
- the owner or group will not be changed.
11891
+ node_id (int): The ID of the OTCS item (node) to which permissions are being assigned.
10623
11892
  permissions (list of str): A list of permissions to assign to the assignee. Valid permissions include:
10624
11893
  - "see" : View the item
10625
11894
  - "see_contents" : View the contents of the item
@@ -10631,6 +11900,15 @@ class OTCS:
10631
11900
  - "delete_versions" : Delete versions of the item
10632
11901
  - "delete" : Delete the item
10633
11902
  - "edit_permissions" : Modify permissions for the item
11903
+ assignee_type (str): The type of assignee. This can be one of the following:
11904
+ - "owner": Permissions are assigned to the owner.
11905
+ - "group": Permissions are assigned to the owner group.
11906
+ - "public": Permissions are assigned to the public (all users).
11907
+ - "custom": Permissions are assigned to a specific user or group (specified by `assignee`).
11908
+ assignee (int):
11909
+ The ID of the user or group (referred to as "right ID").
11910
+ If `assignee` is 0 and `assignee_type` is "owner" or "group",
11911
+ the owner or group will not be changed.
10634
11912
  apply_to (int, optional): The scope of the permission assignment. Possible values:
10635
11913
  - 0 = Apply to this item only (default)
10636
11914
  - 1 = Apply to sub-items only
@@ -10647,18 +11925,24 @@ class OTCS:
10647
11925
 
10648
11926
  """
10649
11927
 
10650
- if not assignee_type or assignee_type not in [
10651
- "owner",
10652
- "group",
10653
- "public",
10654
- "custom",
10655
- ]:
11928
+ if not assignee_type or assignee_type not in OTCS.PERMISSION_ASSIGNEE_TYPES:
10656
11929
  self.logger.error(
10657
- "Missing or wrong assignee type. Needs to be owner, group, public or custom!",
11930
+ "Missing or wrong assignee type. Needs to be one of %s!", str(OTCS.PERMISSION_ASSIGNEE_TYPES)
10658
11931
  )
10659
11932
  return None
10660
11933
  if assignee_type == "custom" and not assignee:
10661
- self.logger.error("Missing permission assignee!")
11934
+ self.logger.error("Assignee type is 'custom' but permission assignee is missing!")
11935
+ return None
11936
+
11937
+ if any(permission not in OTCS.PERMISSION_TYPES for permission in permissions):
11938
+ illegal_permissions = [permission for permission in permissions if permission not in OTCS.PERMISSION_TYPES]
11939
+ self.logger.error(
11940
+ "Illegal permission%s -> %s! Allowed permissions are -> %s. Cannot assign permissions to node with ID -> %d.",
11941
+ "s" if len(illegal_permissions) > 1 else "",
11942
+ str(illegal_permissions),
11943
+ str(OTCS.PERMISSION_TYPES),
11944
+ node_id,
11945
+ )
10662
11946
  return None
10663
11947
 
10664
11948
  permission_post_data = {
@@ -10676,10 +11960,11 @@ class OTCS:
10676
11960
  request_header = self.request_form_header()
10677
11961
 
10678
11962
  self.logger.debug(
10679
- "Assign permissions -> %s to item with ID -> %s; assignee type -> '%s'; calling -> %s",
11963
+ "Assign permissions -> %s to item with ID -> %s; assignee type -> '%s'; apply to -> '%d'; calling -> %s",
10680
11964
  str(permissions),
10681
11965
  str(node_id),
10682
11966
  assignee_type,
11967
+ apply_to,
10683
11968
  request_url,
10684
11969
  )
10685
11970
 
@@ -10692,9 +11977,8 @@ class OTCS:
10692
11977
  headers=request_header,
10693
11978
  data={"body": json.dumps(permission_post_data)},
10694
11979
  timeout=None,
10695
- failure_message="Failed to assign custom permissions -> {} to item with ID -> {}".format(
10696
- permissions,
10697
- node_id,
11980
+ failure_message="Failed to assign 'custom' permissions -> {} to item with ID -> {} (apply to -> {})".format(
11981
+ permissions, node_id, apply_to
10698
11982
  ),
10699
11983
  )
10700
11984
  else:
@@ -10705,9 +11989,8 @@ class OTCS:
10705
11989
  headers=request_header,
10706
11990
  data={"body": json.dumps(permission_post_data)},
10707
11991
  timeout=None,
10708
- failure_message="Failed to assign stadard permissions -> {} to item with ID -> {}".format(
10709
- permissions,
10710
- node_id,
11992
+ failure_message="Failed to assign -> '{}' permissions -> {} to item with ID -> {} (apply to -> {})".format(
11993
+ assignee_type, permissions, node_id, apply_to
10711
11994
  ),
10712
11995
  )
10713
11996
 
@@ -11156,8 +12439,10 @@ class OTCS:
11156
12439
  throw an error.
11157
12440
 
11158
12441
  Args:
11159
- node_id (int): node ID to apply the category to
11160
- category_id (list): ID of the category definition object
12442
+ node_id (int):
12443
+ The node ID to apply the category to.
12444
+ category_id (list):
12445
+ The ID of the category definition object.
11161
12446
  inheritance (bool | None):
11162
12447
  If True, turn on inheritance for the category
11163
12448
  (this makes only sense if the node is a container like a folder or workspace).
@@ -13340,11 +14625,14 @@ class OTCS:
13340
14625
  """Get a list of available workflows for a document ID and a parent ID.
13341
14626
 
13342
14627
  Args:
13343
- node_id (int): node ID of the document
13344
- parent_id (int): node ID of the parent
14628
+ node_id (int):
14629
+ The node ID of the document.
14630
+ parent_id (int):
14631
+ The node ID of the parent.
13345
14632
 
13346
14633
  Returns:
13347
- list: list of available workflows
14634
+ list:
14635
+ The list of available workflows.
13348
14636
 
13349
14637
  Example:
13350
14638
  ```json
@@ -14375,44 +15663,295 @@ class OTCS:
14375
15663
 
14376
15664
  # end method definition
14377
15665
 
14378
- def volume_translator(
15666
+ def traverse_node(
14379
15667
  self,
14380
- current_node_id: int,
14381
- translator: object,
14382
- languages: list,
14383
- simulate: bool = False,
14384
- ) -> None:
14385
- """Experimental code to translate the item names and descriptions in a hierarchy.
15668
+ node: dict | int,
15669
+ executables: list[callable],
15670
+ current_depth: int = 0,
15671
+ **kwargs: dict,
15672
+ ) -> dict:
15673
+ """Recursively traverse the node an its subnodes.
15674
+
15675
+ This method is preferred for CPU intensive traversals.
15676
+
15677
+ Args:
15678
+ node (dict | int):
15679
+ The node datastructure (like in a V2 REST Call response)
15680
+ executables (list[callable]):
15681
+ A list of methods to call for each traversed node. The node
15682
+ and a optional dictionary of keyword arguments (kwargs)
15683
+ are passed. The executables are called BEFORE the subnodes
15684
+ are traversed. The executables should return a boolean result.
15685
+ If the result is False, then the execution of the executables
15686
+ list is stopped.
15687
+ current_depth (int, optional):
15688
+ The recursion depth - distance in hierarchy from the root note
15689
+ traverse_node() was INITIALLY called from.
15690
+ kwargs:
15691
+ Additional keyword arguments for the executables.
15692
+
15693
+ Returns:
15694
+ dict: {
15695
+ "processed": int,
15696
+ "traversed": int,
15697
+ }
15698
+
15699
+ """
15700
+
15701
+ processed = 0
15702
+ traversed = 0
15703
+
15704
+ # Initialze the traverse flag. If True, container
15705
+ # subnodes will be processed. If executables exist
15706
+ # than at least one executable has to indicate that
15707
+ # further traversal is required:
15708
+ traverse = not (executables)
15709
+
15710
+ if isinstance(node, dict):
15711
+ node_id = self.get_result_value(response=node, key="id")
15712
+ elif isinstance(node, int):
15713
+ node_id = node
15714
+ node = self.get_node(node_id=node_id)
15715
+ else:
15716
+ self.logger.error("Illegal type of node object. Expect 'int' or 'dict'!")
15717
+ return (False, False)
15718
+
15719
+ # Run executables:
15720
+ for executable in executables:
15721
+ result_success, result_traverse = executable(node=node, current_depth=current_depth, **kwargs)
15722
+ if result_traverse:
15723
+ traverse = True
15724
+ if not result_success:
15725
+ break
15726
+ else:
15727
+ # else case is processed only if NO break occured in the for loop
15728
+ # If all executables have been successful than the node counts as processed:
15729
+ processed += 1
15730
+
15731
+ node_type = self.get_result_value(response=node, key="type")
15732
+
15733
+ # We only traverse the subtnodes if the current node is a container type
15734
+ # and the executables have all been executed successfully:
15735
+ if traverse and node_type in self.CONTAINER_ITEM_TYPES:
15736
+ # Get children nodes of the current node:
15737
+ subnodes = self.get_subnodes_iterator(parent_node_id=node_id, page_size=200)
15738
+
15739
+ # Recursive call of all subnodes:
15740
+ for subnode in subnodes:
15741
+ subnode_id = self.get_result_value(response=subnode, key="id")
15742
+ subnode_name = self.get_result_value(response=subnode, key="name")
15743
+ self.logger.info("Traversing node -> '%s' (%s)", subnode_name, str(subnode_id))
15744
+ # Recursive call for current subnode:
15745
+ result = self.traverse_node(
15746
+ node=subnode,
15747
+ executables=executables,
15748
+ current_depth=current_depth + 1,
15749
+ **kwargs,
15750
+ )
15751
+ processed += result.get("processed", 0)
15752
+ traversed += result.get("traversed", 0)
15753
+ traversed += 1
15754
+
15755
+ return {"processed": processed, "traversed": traversed}
15756
+
15757
+ # end method definition
15758
+
15759
+ def traverse_node_parallel(
15760
+ self,
15761
+ node: dict | int,
15762
+ executables: list[callable],
15763
+ workers: int = 3,
15764
+ strategy: str = "BFS",
15765
+ timeout: float = 1.0,
15766
+ **kwargs: dict,
15767
+ ) -> dict:
15768
+ """Traverse nodes using a queue and thread pool (BFS-style).
15769
+
15770
+ This method is preferred for I/O or API intensive traversals.
15771
+
15772
+ Args:
15773
+ node (dict | int):
15774
+ Root node to start traversal. It can be a node or a node ID.
15775
+ executables (list[callable]):
15776
+ Callables to execute per node.
15777
+ workers (int, optional):
15778
+ Number of parallel workers.
15779
+ strategy (str, optional):
15780
+ Either "DFS" for Depth First Search, or "BFS" for Breadth First Search.
15781
+ "BFS" is the default.
15782
+ timeout (float, optional):
15783
+ Wait time for the queue to have items:
15784
+ kwargs (dict):
15785
+ Additional arguments for executables.
15786
+
15787
+ Returns:
15788
+ dict:
15789
+ Stats with processed and traversed counters.
15790
+
15791
+ """
15792
+
15793
+ results = {"processed": 0, "traversed": 0}
15794
+ lock = threading.Lock()
15795
+ if strategy == "BFS":
15796
+ task_queue = Queue()
15797
+ elif strategy == "DFS":
15798
+ task_queue = LifoQueue()
15799
+
15800
+ # Enqueue initial nodes at depth 0:
15801
+ node_id = self.get_result_value(response=node, key="id") if isinstance(node, dict) else node
15802
+ subnodes = self.get_subnodes_iterator(parent_node_id=node_id, page_size=100)
15803
+ for subnode in subnodes:
15804
+ # Each queue element needs its own copy of traversal data:
15805
+ traversal_data = {
15806
+ "folder_path": [],
15807
+ "workspace_id": None,
15808
+ "workspace_type": None,
15809
+ "workspace_name": None,
15810
+ "workspace_description": None,
15811
+ "current_depth": 0,
15812
+ }
15813
+ task_queue.put((subnode, 0, traversal_data))
15814
+
15815
+ def traverse_node_worker() -> None:
15816
+ """Work on queue.
15817
+
15818
+ Returns:
15819
+ None
15820
+
15821
+ """
15822
+
15823
+ thread_name = threading.current_thread().name
15824
+
15825
+ while True:
15826
+ # Initialze the traverse flag. If True, container
15827
+ # subnodes will be processed. If executables exist
15828
+ # than at least one executable has to return that
15829
+ # further traversal is required:
15830
+ traverse = not (executables)
15831
+
15832
+ try:
15833
+ node, current_depth, traversal_data = task_queue.get(timeout=timeout)
15834
+ except Empty:
15835
+ self.logger.info("[%s] No (more) nodes to process - finishing...", thread_name)
15836
+ return # Queue is empty - worker is done
15837
+
15838
+ try:
15839
+ # Fetch node dictionary if just an ID was passed as parameter:
15840
+ if isinstance(node, int):
15841
+ node = self.get_node(node_id=node)
15842
+
15843
+ node_id = self.get_result_value(response=node, key="id")
15844
+ node_name = self.get_result_value(response=node, key="name")
15845
+ node_type = self.get_result_value(response=node, key="type")
15846
+
15847
+ self.logger.info(
15848
+ "[%s] Traversing node -> '%s' (%s) at depth %d", thread_name, node_name, node_id, current_depth
15849
+ )
15850
+
15851
+ # Run all executables
15852
+ for executable in executables:
15853
+ try:
15854
+ result_success, result_traverse = executable(
15855
+ node=node,
15856
+ current_depth=current_depth,
15857
+ traversal_data=traversal_data,
15858
+ **kwargs,
15859
+ )
15860
+ if result_traverse:
15861
+ traverse = True
15862
+ if not result_success:
15863
+ break
15864
+ except Exception as e:
15865
+ self.logger.error("Failed to run executable on node -> '%s' (%s), error -> %s", node_name, node_id, str(e))
15866
+ else:
15867
+ with lock:
15868
+ results["processed"] += 1
15869
+
15870
+ # We only traverse the subtnodes if the current node is a container type
15871
+ # and at least one executables (if they any) indicate to require further traversal:
15872
+ if traverse and node_type in self.CONTAINER_ITEM_TYPES:
15873
+ subnodes = self.get_subnodes_iterator(parent_node_id=node_id, page_size=100)
15874
+ for subnode in subnodes:
15875
+ sub_traversal_data = {
15876
+ **traversal_data,
15877
+ "folder_path": traversal_data["folder_path"] + [node_name],
15878
+ "current_depth": current_depth + 1,
15879
+ }
15880
+ task_queue.put((subnode, current_depth + 1, sub_traversal_data))
15881
+
15882
+ with lock:
15883
+ results["traversed"] += 1
15884
+
15885
+ finally:
15886
+ # Guarantee task_done() is called even if exceptions occur:
15887
+ task_queue.task_done()
15888
+
15889
+ # end method traverse_node_worker()
15890
+
15891
+ # Start thread pool with limited concurrency
15892
+ with ThreadPoolExecutor(max_workers=workers, thread_name_prefix="Traversal_Worker") as executor:
15893
+ for i in range(workers):
15894
+ self.logger.info("Starting worker -> %d...", i)
15895
+ executor.submit(traverse_node_worker)
15896
+
15897
+ # Wait for all tasks to complete
15898
+ task_queue.join()
15899
+
15900
+ return results
15901
+
15902
+ # end method definition
15903
+
15904
+ def translate_node(self, node: dict | int, **kwargs: dict) -> bool:
15905
+ """Translate a node.
14386
15906
 
14387
15907
  The actual translation is done by a tranlator object. This recursive method just
14388
15908
  traverses the hierarchy and calls the translate() method of the translator object.
14389
15909
 
14390
15910
  Args:
14391
- current_node_id (int):
14392
- The current node ID to translate.
14393
- translator (object):
14394
- This object needs to be created based on the "Translator" class
14395
- and passed to this method.
14396
- languages (list):
14397
- A list of target languages to translate into.
14398
- simulate (bool, optional):
14399
- If True, do not really rename but just traverse and log info.
14400
- The default is False.
15911
+ node (dict | int):
15912
+ The current node to translate. This can be the node data structure or just
15913
+ the node ID. If it is just the ID the actual node will be fetched.
15914
+ kwargs (dict):
15915
+ Keyword parameters. The methods expects the follwoing keyword parameters:
15916
+ * simulate (bool):
15917
+ If True, do not really rename but just traverse and log info.
15918
+ * translator (object):
15919
+ This object needs to be created based on the "Translator" class
15920
+ and passed to this method.
15921
+ * languages (list):
15922
+ A list of target languages to translate into.
15923
+
15924
+ Returns:
15925
+ bool:
15926
+ True for success, False for error.
14401
15927
 
14402
15928
  """
14403
15929
 
14404
- # Get current node based on the ID:
14405
- current_node = self.get_node(current_node_id)
14406
- current_node_id = self.get_result_value(response=current_node, key="id")
15930
+ translator = kwargs.get("translator")
15931
+ languages = kwargs.get("languages", [])
15932
+ simulate = kwargs.get("simulate", False)
15933
+
15934
+ if not translator:
15935
+ self.logger.error("Missing 'translator' parameter (object)!")
15936
+ return False
15937
+ if not languages:
15938
+ self.logger.error("Missing or empty 'languages' parameter (list)!")
15939
+ return False
15940
+
15941
+ if isinstance(node, dict):
15942
+ current_node_id = self.get_result_value(response=node, key="id")
15943
+ else:
15944
+ current_node_id = node
15945
+ node = self.get_node(node_id=current_node_id)
14407
15946
 
14408
- name = self.get_result_value(response=current_node, key="name")
14409
- description = self.get_result_value(response=current_node, key="description")
15947
+ name = self.get_result_value(response=node, key="name")
15948
+ description = self.get_result_value(response=node, key="description")
14410
15949
  names_multilingual = self.get_result_value(
14411
- response=current_node,
15950
+ response=node,
14412
15951
  key="name_multilingual",
14413
15952
  )
14414
15953
  descriptions_multilingual = self.get_result_value(
14415
- response=current_node,
15954
+ response=node,
14416
15955
  key="description_multilingual",
14417
15956
  )
14418
15957
 
@@ -14427,7 +15966,7 @@ class OTCS:
14427
15966
  language,
14428
15967
  names_multilingual["en"],
14429
15968
  )
14430
- self.logger.debug(
15969
+ self.logger.info(
14431
15970
  "Translate name of node -> %s from -> '%s' (%s) to -> '%s' (%s)",
14432
15971
  current_node_id,
14433
15972
  name,
@@ -14445,7 +15984,7 @@ class OTCS:
14445
15984
  language,
14446
15985
  descriptions_multilingual["en"],
14447
15986
  )
14448
- self.logger.debug(
15987
+ self.logger.info(
14449
15988
  "Translate description of node -> %s from -> '%s' (%s) to -> '%s' (%s)",
14450
15989
  current_node_id,
14451
15990
  descriptions_multilingual["en"],
@@ -14456,24 +15995,17 @@ class OTCS:
14456
15995
 
14457
15996
  # Rename node multi-lingual:
14458
15997
  if not simulate:
14459
- self.rename_node(
15998
+ response = self.rename_node(
14460
15999
  node_id=current_node_id,
14461
16000
  name=name,
14462
16001
  description=description,
14463
16002
  name_multilingual=names_multilingual,
14464
16003
  description_multilingual=descriptions_multilingual,
14465
16004
  )
16005
+ if not response:
16006
+ return False
14466
16007
 
14467
- # Get children nodes of the current node:
14468
- results = self.get_subnodes(parent_node_id=current_node_id, limit=200)["results"]
14469
-
14470
- # Recursive call of all subnodes:
14471
- for result in results:
14472
- self.volume_translator(
14473
- current_node_id=result["data"]["properties"]["id"],
14474
- translator=translator,
14475
- languages=languages,
14476
- )
16008
+ return True
14477
16009
 
14478
16010
  # end method definition
14479
16011
 
@@ -15311,10 +16843,12 @@ class OTCS:
15311
16843
  subnode["id"],
15312
16844
  subnode["type"],
15313
16845
  )
16846
+ # end match subnode["type"]:
15314
16847
 
15315
16848
  # Wait for all download threads to complete:
15316
16849
  for thread in download_threads:
15317
16850
  thread.join()
16851
+ # end for subnode in subnodes:
15318
16852
 
15319
16853
  # Wait for all traversal threads to complete:
15320
16854
  for thread in traversal_threads:
@@ -15324,6 +16858,481 @@ class OTCS:
15324
16858
 
15325
16859
  # end method definition
15326
16860
 
16861
+ def load_items_new(
16862
+ self,
16863
+ node_id: int,
16864
+ filter_workspace_depth: int | None = None,
16865
+ filter_workspace_subtypes: list | None = None,
16866
+ filter_workspace_category: str | None = None,
16867
+ filter_workspace_attributes: dict | list | None = None,
16868
+ filter_item_depth: int | None = None,
16869
+ filter_item_subtypes: list | None = None,
16870
+ filter_item_category: str | None = None,
16871
+ filter_item_attributes: dict | list | None = None,
16872
+ filter_item_in_workspace: bool = True,
16873
+ exclude_node_ids: list | None = None,
16874
+ workspace_metadata: bool = True,
16875
+ item_metadata: bool = True,
16876
+ download_documents: bool = True,
16877
+ skip_existing_downloads: bool = True,
16878
+ extract_zip: bool = False,
16879
+ workers: int = 3,
16880
+ ) -> dict | None:
16881
+ """Create a Pandas Data Frame by traversing a given Content Server hierarchy.
16882
+
16883
+ This method collects workspace and document items.
16884
+
16885
+ Args:
16886
+ node_id (int):
16887
+ The root Node ID the traversal should start at.
16888
+ filter_workspace_depth (int | None, optional):
16889
+ Additive filter criterium for workspace path depth.
16890
+ Defaults to None = filter not active.
16891
+ filter_workspace_subtypes (list | None, optional):
16892
+ Additive filter criterium for workspace type.
16893
+ Defaults to None = filter not active.
16894
+ filter_workspace_category (str | None, optional):
16895
+ Additive filter criterium for workspace category.
16896
+ Defaults to None = filter not active.
16897
+ filter_workspace_attributes (dict | list, optional):
16898
+ Additive filter criterium for workspace attribute values.
16899
+ Defaults to None = filter not active
16900
+ filter_item_depth (int | None, optional):
16901
+ Additive filter criterium for item path depth.
16902
+ Defaults to None = filter not active.
16903
+ filter_item_subtypes (list | None, optional):
16904
+ Additive filter criterium for item types.
16905
+ Defaults to None = filter not active.
16906
+ filter_item_category (str | None, optional):
16907
+ Additive filter criterium for item category.
16908
+ Defaults to None = filter not active.
16909
+ filter_item_attributes (dict | list, optional):
16910
+ Additive filter criterium for item attribute values.
16911
+ Defaults to None = filter not active.
16912
+ filter_item_in_workspace (bool, optional):
16913
+ Defines if item filters should be applied to
16914
+ items inside workspaces as well. If False,
16915
+ then items inside workspaces are always included.
16916
+ exclude_node_ids (list, optional):
16917
+ List of node IDs to exclude from traversal.
16918
+ workspace_metadata (bool, optional):
16919
+ If True, include workspace metadata.
16920
+ item_metadata (bool, optional):
16921
+ if True, include item metadata.
16922
+ download_documents (bool, optional):
16923
+ Whether or not documents should be downloaded.
16924
+ skip_existing_downloads (bool, optional):
16925
+ If True, reuse already existing downloads in the file system.
16926
+ extract_zip (bool, optional):
16927
+ If True, documents that are downloaded with mime-type
16928
+ "application/x-zip-compressed" will be extracted recursively.
16929
+ workers (int, optional):
16930
+ Number of worker threads to start.
16931
+
16932
+ Returns:
16933
+ dict:
16934
+ Stats with processed and traversed counters.
16935
+
16936
+ """
16937
+
16938
+ # Initiaze download threads for this subnode:
16939
+ download_threads = []
16940
+
16941
+ def check_node_exclusions(node: dict, **kwargs: dict) -> tuple[bool, bool]:
16942
+ """Check if the processed node is on the exclusion list.
16943
+
16944
+ Stop processing and traversing if the node is excluded.
16945
+
16946
+ Args:
16947
+ node (dict):
16948
+ The current node being processed.
16949
+ kwargs (dict):
16950
+ Additional keyword arguments that are specific for the method.
16951
+
16952
+ Returns:
16953
+ tuple[bool, bool]:
16954
+ success (bool) - if node was processed successfully
16955
+ traverse (bool) - if subnodes should be processed
16956
+
16957
+ """
16958
+
16959
+ exclude_node_ids = kwargs.get("exclude_node_ids")
16960
+ if exclude_node_ids is None:
16961
+ self.logger.error("Missing keyword arguments for executable in node traversal!")
16962
+ return (False, False)
16963
+
16964
+ node_id = self.get_result_value(response=node, key="id")
16965
+ node_name = self.get_result_value(response=node, key="name")
16966
+
16967
+ if node_id and exclude_node_ids is not None and (node_id in exclude_node_ids):
16968
+ self.logger.info(
16969
+ "Node -> '%s' (%s) is in exclusion list. Skip traversal of this node.",
16970
+ node_name,
16971
+ node_id,
16972
+ )
16973
+ return (False, False)
16974
+ return (True, True)
16975
+
16976
+ # end check_node_exclusions()
16977
+
16978
+ def check_node_workspace(node: dict, **kwargs: dict) -> tuple[bool, bool]:
16979
+ """Check if the processed node should be recorded as a workspace in the data frame.
16980
+
16981
+ Args:
16982
+ node (dict):
16983
+ The current node being processed.
16984
+ kwargs (dict):
16985
+ Additional keyword arguments that are specific for the method.
16986
+
16987
+ Returns:
16988
+ tuple[bool, bool]:
16989
+ success (bool) - if node was processed successfully
16990
+ traverse (bool) - if subnodes should be processed
16991
+
16992
+ """
16993
+
16994
+ traversal_data = kwargs.get("traversal_data")
16995
+ filter_workspace_data = kwargs.get("filter_workspace_data")
16996
+ control_flags = kwargs.get("control_flags")
16997
+
16998
+ if not traversal_data or not filter_workspace_data or not control_flags:
16999
+ self.logger.error("Missing keyword arguments for executable in node traversal!")
17000
+ return False
17001
+
17002
+ node_id = self.get_result_value(response=node, key="id")
17003
+ node_name = self.get_result_value(response=node, key="name")
17004
+ node_description = self.get_result_value(response=node, key="description")
17005
+ node_type = self.get_result_value(response=node, key="type")
17006
+
17007
+ #
17008
+ # 1. Check if the traversal is already inside a workflow. Then we can skip
17009
+ # the workspace processing. We currently don't support sub-workspaces.
17010
+ #
17011
+ workspace_id = traversal_data["workspace_id"]
17012
+ if workspace_id:
17013
+ self.logger.debug(
17014
+ "Found folder or workspace -> '%s' (%s) inside workspace with ID -> %s. So this container cannot be a workspace.",
17015
+ node_name,
17016
+ node_id,
17017
+ workspace_id,
17018
+ )
17019
+ # Success = False, Traverse = True
17020
+ return (False, True)
17021
+
17022
+ #
17023
+ # 2. Check if metadata is required (either for columns or for filters)
17024
+ #
17025
+ if (
17026
+ control_flags["workspace_metadata"]
17027
+ or filter_workspace_data["filter_workspace_category"]
17028
+ or filter_workspace_data["filter_workspace_attributes"]
17029
+ ):
17030
+ categories = self.get_node_categories(
17031
+ node_id=node_id,
17032
+ metadata=(
17033
+ filter_workspace_data["filter_workspace_category"] is not None
17034
+ or filter_workspace_data["filter_workspace_attributes"] is not None
17035
+ or not self._use_numeric_category_identifier
17036
+ ),
17037
+ )
17038
+ else:
17039
+ categories = None
17040
+
17041
+ #
17042
+ # 3. Apply the defined filters to the current node to see
17043
+ # if we want to 'interpret' it as a workspace
17044
+ #
17045
+ # See if it is a node that we want to interpret as a workspace.
17046
+ # Only "workspaces" that comply with ALL provided filters are
17047
+ # considered and written into the data frame as a workspace row:
17048
+ # Root nodes may have a "results" dict. The subnode iterators don't have it:
17049
+ node_properties = node["results"]["data"]["properties"] if "results" in node else node["data"]["properties"]
17050
+ if not self.apply_filter(
17051
+ node=node_properties,
17052
+ node_categories=categories,
17053
+ current_depth=traversal_data["current_depth"],
17054
+ filter_depth=filter_workspace_data["filter_workspace_depth"],
17055
+ filter_subtypes=filter_workspace_data["filter_workspace_subtypes"],
17056
+ filter_category=filter_workspace_data["filter_workspace_category"],
17057
+ filter_attributes=filter_workspace_data["filter_workspace_attributes"],
17058
+ ):
17059
+ # Success = False, Traverse = True
17060
+ return (False, True)
17061
+
17062
+ self.logger.debug(
17063
+ "Found workspace -> '%s' (%s) in depth -> %s.",
17064
+ node_name,
17065
+ node_id,
17066
+ traversal_data["current_depth"],
17067
+ )
17068
+
17069
+ #
17070
+ # 4. Create the data frame row from the node / traversal data:
17071
+ #
17072
+ row = {}
17073
+ row["workspace_type"] = node_type
17074
+ row["workspace_id"] = node_id
17075
+ row["workspace_name"] = node_name
17076
+ row["workspace_description"] = node_description
17077
+ row["workspace_outer_path"] = traversal_data["folder_path"]
17078
+ # If we want (and have) metadata then add it as columns:
17079
+ if control_flags["workspace_metadata"] and categories and categories.get("results", None):
17080
+ # Add columns for workspace node categories have been determined above.
17081
+ self.add_attribute_columns(row=row, categories=categories, prefix="workspace_cat_")
17082
+
17083
+ # Now we add the article to the Pandas Data Frame in the Data class:
17084
+ with self._data.lock():
17085
+ self._data.append(row)
17086
+
17087
+ #
17088
+ # 5. Update the traversal data:
17089
+ #
17090
+ traversal_data["workspace_id"] = node_id
17091
+ traversal_data["workspace_name"] = node_name
17092
+ traversal_data["workspace_type"] = node_type
17093
+ traversal_data["workspace_description"] = node_description
17094
+ self.logger.debug("Updated traversal data -> %s", str(traversal_data))
17095
+
17096
+ # Success = True, Traverse = False
17097
+ # We have traverse = True because we need to
17098
+ # keep traversing into the workspace folders.
17099
+ return (True, True)
17100
+
17101
+ # end check_node_workspace()
17102
+
17103
+ def check_node_item(node: dict, **kwargs: dict) -> tuple[bool, bool]:
17104
+ """Check if the processed node should be recorded as an item in the data frame.
17105
+
17106
+ Args:
17107
+ node (dict):
17108
+ The current node being processed.
17109
+ kwargs (dict):
17110
+ Additional keyword arguments that are specific for the method.
17111
+
17112
+ Returns:
17113
+ tuple[bool, bool]:
17114
+ success (bool) - if node was processed successfully
17115
+ traverse (bool) - if subnodes should be processed
17116
+
17117
+ """
17118
+
17119
+ traversal_data = kwargs.get("traversal_data")
17120
+ filter_item_data = kwargs.get("filter_item_data")
17121
+ control_flags = kwargs.get("control_flags")
17122
+
17123
+ if not traversal_data or not filter_item_data or not control_flags:
17124
+ self.logger.error("Missing keyword arguments for executable in node item traversal!")
17125
+ return (False, False)
17126
+
17127
+ node_id = self.get_result_value(response=node, key="id")
17128
+ node_name = self.get_result_value(response=node, key="name")
17129
+ node_description = self.get_result_value(response=node, key="description")
17130
+ node_type = self.get_result_value(response=node, key="type")
17131
+
17132
+ current_depth = traversal_data["current_depth"]
17133
+ folder_path = traversal_data["folder_path"]
17134
+ workspace_id = traversal_data["workspace_id"]
17135
+ workspace_name = traversal_data["workspace_name"]
17136
+ workspace_description = traversal_data["workspace_description"]
17137
+ workspace_type = traversal_data["workspace_type"]
17138
+
17139
+ #
17140
+ # 1. Check if metadata is required (either for columns or for filters)
17141
+ #
17142
+ if (
17143
+ control_flags["item_metadata"]
17144
+ or filter_item_data["filter_item_category"]
17145
+ or filter_item_data["filter_item_attributes"]
17146
+ ):
17147
+ categories = self.get_node_categories(
17148
+ node_id=node_id,
17149
+ metadata=(
17150
+ filter_item_data["filter_item_category"] is not None
17151
+ or filter_item_data["filter_item_attributes"] is not None
17152
+ or not self._use_numeric_category_identifier
17153
+ ),
17154
+ )
17155
+ else:
17156
+ categories = None
17157
+
17158
+ #
17159
+ # 2. Apply the defined filters to the current node to see
17160
+ # if we want to add it to the data frame as an item.
17161
+ #
17162
+ # If filter_item_in_workspace is false, then documents
17163
+ # inside workspaces are included in the data frame unconditionally!
17164
+ # We apply the defined filters to the current node. Only "documents"
17165
+ # that comply with ALL provided filters are considered and written into the data frame
17166
+ node_properties = node["results"]["data"]["properties"] if "results" in node else node["data"]["properties"]
17167
+ if (not workspace_id or filter_item_in_workspace) and not self.apply_filter(
17168
+ node=node_properties,
17169
+ node_categories=categories,
17170
+ current_depth=current_depth,
17171
+ filter_depth=filter_item_data["filter_item_depth"],
17172
+ filter_subtypes=filter_item_data["filter_item_subtypes"],
17173
+ filter_category=filter_item_data["filter_item_category"],
17174
+ filter_attributes=filter_item_data["filter_item_attributes"],
17175
+ ):
17176
+ # Success = False, Traverse = True
17177
+ return (False, True)
17178
+
17179
+ # We only consider documents that are inside the defined "workspaces":
17180
+ if workspace_id:
17181
+ self.logger.debug(
17182
+ "Found %s item -> '%s' (%s) in depth -> %s inside workspace -> '%s' (%s).",
17183
+ "document" if node_type == self.ITEM_TYPE_DOCUMENT else "URL",
17184
+ node_name,
17185
+ node_id,
17186
+ current_depth,
17187
+ workspace_name,
17188
+ workspace_id,
17189
+ )
17190
+ else:
17191
+ self.logger.debug(
17192
+ "Found %s item -> '%s' (%s) in depth -> %s outside of workspace.",
17193
+ "document" if node_type == self.ITEM_TYPE_DOCUMENT else "URL",
17194
+ node_name,
17195
+ node_id,
17196
+ current_depth,
17197
+ )
17198
+
17199
+ # Special handling for documents: download them if requested:
17200
+ if node_type == self.ITEM_TYPE_DOCUMENT:
17201
+ # We use the node ID as the filename to avoid any
17202
+ # issues with too long or not valid file names.
17203
+ # As the Pandas DataFrame has all information
17204
+ # this is easy to resolve at upload time.
17205
+ file_path = "{}/{}".format(self._download_dir, node_id)
17206
+
17207
+ # We download only if not downloaded before or if downloaded
17208
+ # before but forced to re-download:
17209
+ if control_flags["download_documents"] and (
17210
+ not os.path.exists(file_path) or not control_flags["skip_existing_downloads"]
17211
+ ):
17212
+ #
17213
+ # Start anasynchronous Download Thread:
17214
+ #
17215
+ self.logger.debug(
17216
+ "Downloading file -> '%s'...",
17217
+ file_path,
17218
+ )
17219
+
17220
+ extract_after_download = node["mime_type"] == "application/x-zip-compressed" and extract_zip
17221
+ thread = threading.Thread(
17222
+ target=self.download_document_multi_threading,
17223
+ args=(node_id, file_path, extract_after_download),
17224
+ name="download_document_node_{}".format(node_id),
17225
+ )
17226
+ thread.start()
17227
+ download_threads.append(thread)
17228
+ else:
17229
+ self.logger.debug(
17230
+ "File -> %s has been downloaded before or download is not requested. Skipping download...",
17231
+ file_path,
17232
+ )
17233
+ # end if document
17234
+
17235
+ #
17236
+ # Construct a dictionary 'row' that we will add
17237
+ # to the resulting data frame:
17238
+ #
17239
+ row = {}
17240
+ # First we include some key workspace data to associate
17241
+ # the item with the workspace:
17242
+ row["workspace_type"] = workspace_type
17243
+ row["workspace_id"] = workspace_id
17244
+ row["workspace_name"] = workspace_name
17245
+ row["workspace_description"] = workspace_description
17246
+ # Then add item specific data:
17247
+ row["item_id"] = str(node_id)
17248
+ row["item_type"] = node_type
17249
+ row["item_name"] = node_name
17250
+ row["item_description"] = node_description
17251
+ # We take the sub-path of the folder path inside the workspace
17252
+ # as the item path:
17253
+ try:
17254
+ # Item path are the list elements after the item that is the workspace name:
17255
+ row["item_path"] = folder_path[folder_path.index(workspace_name) + 1 :]
17256
+ except ValueError:
17257
+ self.logger.warning("Cannot access folder path while processing -> '%s' (%s)!", node_name, node_id)
17258
+ row["item_path"] = []
17259
+ row["item_download_name"] = str(node_id) if node_type == self.ITEM_TYPE_DOCUMENT else ""
17260
+ row["item_mime_type"] = (
17261
+ self.get_result_value(response=node, key="mime_type") if node_type == self.ITEM_TYPE_DOCUMENT else ""
17262
+ )
17263
+ # URL specific data:
17264
+ row["item_url"] = (
17265
+ self.get_result_value(response=node, key="mime_type") if node_type == self.ITEM_TYPE_URL else ""
17266
+ )
17267
+ if item_metadata and categories and categories["results"]:
17268
+ # Add columns for workspace node categories have been determined above.
17269
+ self.add_attribute_columns(row=row, categories=categories, prefix="item_cat_")
17270
+
17271
+ # Now we add the row to the Pandas Data Frame in the Data class:
17272
+ self.logger.info(
17273
+ "Adding %s -> '%s' (%s) to data frame...",
17274
+ "document" if node_type == self.ITEM_TYPE_DOCUMENT else "URL",
17275
+ row["item_name"],
17276
+ row["item_id"],
17277
+ )
17278
+ with self._data.lock():
17279
+ self._data.append(row)
17280
+
17281
+ return True
17282
+
17283
+ # end check_node_item()
17284
+
17285
+ #
17286
+ # Start Main method:
17287
+ #
17288
+
17289
+ # Create folder if it does not exist
17290
+ if download_documents and not os.path.exists(self._download_dir):
17291
+ os.makedirs(self._download_dir)
17292
+
17293
+ # These won't change during processing - stays the same for all nodes:
17294
+ filter_workspace_data = {
17295
+ "filter_workspace_depth": filter_workspace_depth,
17296
+ "filter_workspace_subtypes": filter_workspace_subtypes,
17297
+ "filter_workspace_category": filter_workspace_category,
17298
+ "filter_workspace_attributes": filter_workspace_attributes,
17299
+ }
17300
+
17301
+ # These won't change during processing - stays the same for all nodes:
17302
+ filter_item_data = {
17303
+ "filter_item_depth": filter_item_depth,
17304
+ "filter_item_subtypes": filter_item_subtypes,
17305
+ "filter_item_category": filter_item_category,
17306
+ "filter_item_attributes": filter_item_attributes,
17307
+ "filter_item_in_workspace": filter_item_in_workspace,
17308
+ }
17309
+
17310
+ # These won't change during processing - stays the same for all nodes:
17311
+ control_flags = {
17312
+ "workspace_metadata": workspace_metadata,
17313
+ "item_metadata": item_metadata,
17314
+ "download_documents": download_documents,
17315
+ "skip_existing_downloads": skip_existing_downloads,
17316
+ "extract_zip": extract_zip,
17317
+ }
17318
+
17319
+ #
17320
+ # Start the traversal of the nodes:
17321
+ #
17322
+ result = self.traverse_node_parallel(
17323
+ node=node_id,
17324
+ executables=[check_node_exclusions, check_node_workspace, check_node_item],
17325
+ exclude_node_ids=exclude_node_ids,
17326
+ filter_workspace_data=filter_workspace_data,
17327
+ filter_item_data=filter_item_data,
17328
+ control_flags=control_flags,
17329
+ workers=workers,
17330
+ )
17331
+
17332
+ return result
17333
+
17334
+ # end method definition
17335
+
15327
17336
  def aviator_embed_metadata(
15328
17337
  self,
15329
17338
  node_id: int,
@@ -15351,7 +17360,7 @@ class OTCS:
15351
17360
  Defines if the method waits for the completion of the embedding. Defaults to True.
15352
17361
  message_override (dict | None, optional):
15353
17362
  Overwrite specific message details. Defaults to None.
15354
- timeout (float):
17363
+ timeout (float, optional):
15355
17364
  Time in seconds to wait until the WebSocket times out. Defaults to 10.0.
15356
17365
  document_metadata (bool, optional):
15357
17366
  Defines whether or not to embed document metadata.