pyxecm 1.4__py3-none-any.whl → 1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyxecm might be problematic. Click here for more details.
- pyxecm/__init__.py +5 -0
- pyxecm/avts.py +1065 -0
- pyxecm/coreshare.py +2532 -0
- pyxecm/customizer/__init__.py +4 -0
- pyxecm/customizer/browser_automation.py +164 -54
- pyxecm/customizer/customizer.py +588 -231
- pyxecm/customizer/k8s.py +143 -29
- pyxecm/customizer/m365.py +1434 -1323
- pyxecm/customizer/payload.py +15073 -5933
- pyxecm/customizer/pht.py +926 -0
- pyxecm/customizer/salesforce.py +866 -351
- pyxecm/customizer/sap.py +4 -4
- pyxecm/customizer/servicenow.py +1467 -0
- pyxecm/customizer/successfactors.py +1056 -0
- pyxecm/helper/__init__.py +2 -0
- pyxecm/helper/assoc.py +44 -1
- pyxecm/helper/data.py +1731 -0
- pyxecm/helper/web.py +170 -46
- pyxecm/helper/xml.py +170 -34
- pyxecm/otac.py +309 -23
- pyxecm/otawp.py +1810 -0
- pyxecm/otcs.py +5308 -2985
- pyxecm/otds.py +1909 -1954
- pyxecm/otmm.py +928 -0
- pyxecm/otpd.py +13 -10
- {pyxecm-1.4.dist-info → pyxecm-1.6.dist-info}/METADATA +5 -1
- pyxecm-1.6.dist-info/RECORD +32 -0
- {pyxecm-1.4.dist-info → pyxecm-1.6.dist-info}/WHEEL +1 -1
- pyxecm-1.4.dist-info/RECORD +0 -24
- {pyxecm-1.4.dist-info → pyxecm-1.6.dist-info}/LICENSE +0 -0
- {pyxecm-1.4.dist-info → pyxecm-1.6.dist-info}/top_level.txt +0 -0
pyxecm/otmm.py
ADDED
|
@@ -0,0 +1,928 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OTMM Module to interact with the OpenText Media Management API
|
|
3
|
+
See:
|
|
4
|
+
|
|
5
|
+
Class: OTMM
|
|
6
|
+
Methods:
|
|
7
|
+
|
|
8
|
+
__init__ : class initializer
|
|
9
|
+
config : Returns config data set
|
|
10
|
+
get_data: Get the Data object that holds all processed Media Management base Assets
|
|
11
|
+
credentials: Returns the token data
|
|
12
|
+
request_header: Returns the request header for ServiceNow API calls
|
|
13
|
+
parse_request_response: Parse the REST API responses and convert
|
|
14
|
+
them to Python dict in a safe way
|
|
15
|
+
exist_result_item: Check if an dict item is in the response
|
|
16
|
+
of the ServiceNow API call
|
|
17
|
+
get_result_value: Check if a defined value (based on a key) is in the ServiceNow API response
|
|
18
|
+
|
|
19
|
+
authenticate : Authenticates at ServiceNow API
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
__author__ = "Dr. Marc Diefenbruch"
|
|
23
|
+
__copyright__ = "Copyright 2024, OpenText"
|
|
24
|
+
__credits__ = ["Kai-Philip Gatzweiler"]
|
|
25
|
+
__maintainer__ = "Dr. Marc Diefenbruch"
|
|
26
|
+
__email__ = "mdiefenb@opentext.com"
|
|
27
|
+
|
|
28
|
+
from json import JSONDecodeError
|
|
29
|
+
import os
|
|
30
|
+
import logging
|
|
31
|
+
import urllib.parse
|
|
32
|
+
import threading
|
|
33
|
+
import traceback
|
|
34
|
+
|
|
35
|
+
import requests
|
|
36
|
+
from requests.exceptions import HTTPError, RequestException
|
|
37
|
+
|
|
38
|
+
from pyxecm.helper.data import Data
|
|
39
|
+
|
|
40
|
+
logger = logging.getLogger("pyxecm.otmm")
|
|
41
|
+
|
|
42
|
+
REQUEST_HEADERS = {"Accept": "application/json", "Content-Type": "application/json"}
|
|
43
|
+
|
|
44
|
+
REQUEST_TIMEOUT = 60
|
|
45
|
+
|
|
46
|
+
ASSET_BASE_PATH = "/tmp/mediaassets"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class OTMM:
|
|
50
|
+
"""Used to retrieve and automate data extraction from OTMM."""
|
|
51
|
+
|
|
52
|
+
_config: dict
|
|
53
|
+
_access_token = None
|
|
54
|
+
_data: Data = None
|
|
55
|
+
_thread_number = 3
|
|
56
|
+
_download_dir = ""
|
|
57
|
+
_business_unit_exclusions = None
|
|
58
|
+
_product_exclusions = None
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
base_url: str,
|
|
63
|
+
username: str,
|
|
64
|
+
password: str,
|
|
65
|
+
client_id: str,
|
|
66
|
+
client_secret: str,
|
|
67
|
+
thread_number: int,
|
|
68
|
+
download_dir: str,
|
|
69
|
+
business_unit_exclusions: list | None = None,
|
|
70
|
+
product_exclusions: list | None = None,
|
|
71
|
+
):
|
|
72
|
+
|
|
73
|
+
# Initialize otcs_config as an empty dictionary
|
|
74
|
+
otmm_config = {}
|
|
75
|
+
|
|
76
|
+
# Store the credentials and parameters in a config dictionary:
|
|
77
|
+
otmm_config["baseUrl"] = base_url
|
|
78
|
+
otmm_config["username"] = username
|
|
79
|
+
otmm_config["password"] = password
|
|
80
|
+
otmm_config["clientId"] = client_id
|
|
81
|
+
otmm_config["clientSecret"] = client_secret
|
|
82
|
+
|
|
83
|
+
otmm_config["restUrl"] = otmm_config["baseUrl"] + "/otmmapi/v6"
|
|
84
|
+
otmm_config["tokenUrl"] = otmm_config["restUrl"] + "/sessions/oauth2/token"
|
|
85
|
+
otmm_config["domainUrl"] = otmm_config["restUrl"] + "/lookupdomains"
|
|
86
|
+
otmm_config["assetsUrl"] = otmm_config["restUrl"] + "/assets"
|
|
87
|
+
otmm_config["searchUrl"] = otmm_config["restUrl"] + "/search/text"
|
|
88
|
+
|
|
89
|
+
self._config = otmm_config
|
|
90
|
+
|
|
91
|
+
self._session = requests.Session()
|
|
92
|
+
|
|
93
|
+
self._data = Data()
|
|
94
|
+
|
|
95
|
+
self._thread_number = thread_number
|
|
96
|
+
|
|
97
|
+
self._download_dir = download_dir
|
|
98
|
+
|
|
99
|
+
self._business_unit_exclusions = business_unit_exclusions
|
|
100
|
+
self._product_exclusions = product_exclusions
|
|
101
|
+
|
|
102
|
+
# end method definition
|
|
103
|
+
|
|
104
|
+
def thread_wrapper(self, target, *args, **kwargs):
|
|
105
|
+
"""Function to wrap around threads to catch exceptions during exection"""
|
|
106
|
+
try:
|
|
107
|
+
target(*args, **kwargs)
|
|
108
|
+
except Exception as e:
|
|
109
|
+
thread_name = threading.current_thread().name
|
|
110
|
+
logger.error("Thread '%s': failed with exception -> %s", thread_name, e)
|
|
111
|
+
logger.error(traceback.format_exc())
|
|
112
|
+
|
|
113
|
+
# end method definition
|
|
114
|
+
|
|
115
|
+
def config(self) -> dict:
|
|
116
|
+
"""Returns the configuration dictionary
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
dict: Configuration dictionary
|
|
120
|
+
"""
|
|
121
|
+
return self._config
|
|
122
|
+
|
|
123
|
+
# end method definition
|
|
124
|
+
|
|
125
|
+
def get_data(self) -> Data:
|
|
126
|
+
"""Get the Data object that holds all processed Media Management base Assets
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
Data: Datastructure with all processed assets.
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
return self._data
|
|
133
|
+
|
|
134
|
+
# end method definition
|
|
135
|
+
|
|
136
|
+
def authenticate(self) -> str | None:
|
|
137
|
+
"""Authenticate at OTMM with client ID and client secret or with basic authentication."""
|
|
138
|
+
|
|
139
|
+
request_url = self.config()["tokenUrl"]
|
|
140
|
+
headers = {"Content-Type": "application/x-www-form-urlencoded"}
|
|
141
|
+
payload = {
|
|
142
|
+
"username": self.config()["username"],
|
|
143
|
+
"password": self.config()["password"],
|
|
144
|
+
"grant_type": "password",
|
|
145
|
+
"client_id": self.config()["clientId"],
|
|
146
|
+
"client_secret": self.config()["clientSecret"],
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
response = self._session.post(
|
|
151
|
+
request_url,
|
|
152
|
+
headers=headers,
|
|
153
|
+
data=urllib.parse.urlencode(payload),
|
|
154
|
+
)
|
|
155
|
+
response.raise_for_status()
|
|
156
|
+
|
|
157
|
+
self._access_token = (
|
|
158
|
+
response.json().get("token_info").get("oauth_token").get("accessToken")
|
|
159
|
+
)
|
|
160
|
+
self._session.headers.update(
|
|
161
|
+
{"Authorization": f"Bearer {self._access_token}"}
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
return self._access_token
|
|
165
|
+
|
|
166
|
+
except requests.exceptions.HTTPError as http_err:
|
|
167
|
+
logger.error("HTTP error occurred: %s", http_err)
|
|
168
|
+
except requests.exceptions.ConnectionError as conn_err:
|
|
169
|
+
logger.error("Connection error occurred: %s", conn_err)
|
|
170
|
+
except requests.exceptions.Timeout as timeout_err:
|
|
171
|
+
logger.error("Timeout error occurred: %s", timeout_err)
|
|
172
|
+
except requests.exceptions.RequestException as req_err:
|
|
173
|
+
logger.error("Request error occurred: %s", req_err)
|
|
174
|
+
except Exception as e:
|
|
175
|
+
logger.error("An unexpected error occurred: %s", e)
|
|
176
|
+
|
|
177
|
+
return None
|
|
178
|
+
|
|
179
|
+
# end method definition
|
|
180
|
+
|
|
181
|
+
def get_products(self, domain: str = "OTMM.DOMAIN.OTM_PRODUCT") -> dict:
|
|
182
|
+
"""Get a dictionary with product names (keys) and IDs (values)
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
domain (str, optional): Domain. Defaults to "OTMM.DOMAIN.OTM_PRODUCT".
|
|
186
|
+
Returns:
|
|
187
|
+
dict: Dictionary of all known products.
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
lookup_products = self.lookup_domains(domain)
|
|
191
|
+
|
|
192
|
+
result = {}
|
|
193
|
+
for product in lookup_products:
|
|
194
|
+
result[product.get("display_value")] = product.get("field_value").get(
|
|
195
|
+
"value"
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
return result
|
|
199
|
+
|
|
200
|
+
# end method definition
|
|
201
|
+
|
|
202
|
+
def get_business_units(
|
|
203
|
+
self, domain: str = "OTMM.DOMAIN.OTM_BUSINESS_UNIT.LU"
|
|
204
|
+
) -> dict:
|
|
205
|
+
"""Get a dictionary with product names (keys) and IDs (values)
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
domain (str, optional): Domain. Defaults to "OTMM.DOMAIN.OTM_BUSINESS_UNIT.LU".
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
dict: Dictionary of all known business units.
|
|
212
|
+
"""
|
|
213
|
+
|
|
214
|
+
lookup_bus = self.lookup_domains(domain)
|
|
215
|
+
result = {}
|
|
216
|
+
for bu in lookup_bus:
|
|
217
|
+
result[bu.get("display_value")] = bu.get("field_value").get("value")
|
|
218
|
+
|
|
219
|
+
return result
|
|
220
|
+
|
|
221
|
+
# end method definition
|
|
222
|
+
|
|
223
|
+
def lookup_domains(self, domain: str):
|
|
224
|
+
"""Lookup domain values in a given OTMM domain
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
domain (str): name / identifier of the domain.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
_type_: _description_
|
|
231
|
+
"""
|
|
232
|
+
|
|
233
|
+
request_url = self.config()["domainUrl"] + "/" + domain
|
|
234
|
+
|
|
235
|
+
try:
|
|
236
|
+
response = self._session.get(
|
|
237
|
+
request_url,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
response.raise_for_status()
|
|
241
|
+
|
|
242
|
+
except requests.exceptions.HTTPError as http_err:
|
|
243
|
+
logger.error("HTTP error occurred: %s", http_err)
|
|
244
|
+
except requests.exceptions.ConnectionError as conn_err:
|
|
245
|
+
logger.error("Connection error occurred: %s", conn_err)
|
|
246
|
+
except requests.exceptions.Timeout as timeout_err:
|
|
247
|
+
logger.error("Timeout error occurred: %s", timeout_err)
|
|
248
|
+
except requests.exceptions.RequestException as req_err:
|
|
249
|
+
logger.error("Request error occurred: %s", req_err)
|
|
250
|
+
except Exception as e:
|
|
251
|
+
logger.error("An unexpected error occurred: %s", e)
|
|
252
|
+
|
|
253
|
+
response = (
|
|
254
|
+
response.json()
|
|
255
|
+
.get("lookup_domain_resource")
|
|
256
|
+
.get("lookup_domain")
|
|
257
|
+
.get("domainValues")
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
return response
|
|
261
|
+
|
|
262
|
+
# end method definition
|
|
263
|
+
|
|
264
|
+
def get_asset(self, asset_id: str) -> dict:
|
|
265
|
+
"""Get an asset based on its ID
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
asset_id (str): Asset ID
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
dict: dictionary with asset data
|
|
272
|
+
"""
|
|
273
|
+
|
|
274
|
+
request_url = self.config()["assetsUrl"] + "/" + asset_id
|
|
275
|
+
|
|
276
|
+
headers = {"Content-Type": "application/x-www-form-urlencoded"}
|
|
277
|
+
|
|
278
|
+
try:
|
|
279
|
+
response = self._session.get(
|
|
280
|
+
request_url,
|
|
281
|
+
headers=headers,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
response.raise_for_status()
|
|
285
|
+
|
|
286
|
+
except requests.exceptions.HTTPError as http_err:
|
|
287
|
+
logger.error("HTTP error occurred: %s", http_err)
|
|
288
|
+
return None
|
|
289
|
+
except requests.exceptions.ConnectionError as conn_err:
|
|
290
|
+
logger.error("Connection error occurred: %s", conn_err)
|
|
291
|
+
return None
|
|
292
|
+
except requests.exceptions.Timeout as timeout_err:
|
|
293
|
+
logger.error("Timeout error occurred: %s", timeout_err)
|
|
294
|
+
return None
|
|
295
|
+
except requests.exceptions.RequestException as req_err:
|
|
296
|
+
logger.error("Request error occurred: %s", req_err)
|
|
297
|
+
return None
|
|
298
|
+
except Exception as e:
|
|
299
|
+
logger.error("An unexpected error occurred: %s", e)
|
|
300
|
+
return None
|
|
301
|
+
|
|
302
|
+
return response.json()
|
|
303
|
+
|
|
304
|
+
# end method definition
|
|
305
|
+
|
|
306
|
+
def get_business_unit_assets(
|
|
307
|
+
self, bu_id: int, offset: int = 0, limit: int = 200
|
|
308
|
+
) -> list | None:
|
|
309
|
+
"""Get all Media Assets for a given Business Unit (ID) that are NOT related to a product.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
bu_id (int): Identifier of the Business Unit.
|
|
313
|
+
offset (int, optional): Result pagination. Starting ID. Defaults to 0.
|
|
314
|
+
limit (int, optional): Result pagination. Page length. Defaults to 200.
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
dict: Search Results
|
|
318
|
+
"""
|
|
319
|
+
|
|
320
|
+
payload = {
|
|
321
|
+
"load_type": ["metadata"],
|
|
322
|
+
"load_multilingual_values": ["true"],
|
|
323
|
+
"level_of_detail": ["full"],
|
|
324
|
+
"after": offset,
|
|
325
|
+
"limit": limit,
|
|
326
|
+
"multilingual_language_code": ["en_US"],
|
|
327
|
+
"search_config_id": ["3"],
|
|
328
|
+
"preference_id": ["ARTESIA.PREFERENCE.GALLERYVIEW.DISPLAYED_FIELDS"],
|
|
329
|
+
"metadata_to_return": ["ARTESIA.FIELD.TAG"],
|
|
330
|
+
"facet_restriction_list": '{"facet_restriction_list":{"facet_field_restriction":[{"type":"com.artesia.search.facet.FacetSimpleFieldRestriction","facet_generation_behavior":"EXCLUDE","field_id":"PRODUCT_CHAR_ID","value_list":[null]}]}}',
|
|
331
|
+
"search_condition_list": [
|
|
332
|
+
'{"search_condition_list":{"search_condition":[{"type":"com.artesia.search.SearchTabularCondition","metadata_table_id":"OTMM.FIELD.BUSINESS_UNIT.TAB","tabular_field_list":[{"type":"com.artesia.search.SearchTabularFieldCondition","metadata_field_id":"OTMM.COLUMN.BUSINESS_UNIT.TAB","relational_operator_id":"ARTESIA.OPERATOR.CHAR.CONTAINS","value":"'
|
|
333
|
+
+ str(bu_id)
|
|
334
|
+
+ '","left_paren":"(","right_paren":")"}]}]}}'
|
|
335
|
+
],
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
flattened_data = {
|
|
339
|
+
k: v if not isinstance(v, list) else ",".join(v) for k, v in payload.items()
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
search_result = self.search_assets(flattened_data)
|
|
343
|
+
|
|
344
|
+
if not search_result or not "search_result_resource" in search_result:
|
|
345
|
+
logger.error("No assets found via search!")
|
|
346
|
+
return None
|
|
347
|
+
search_result = search_result.get("search_result_resource")
|
|
348
|
+
|
|
349
|
+
hits = search_result["search_result"]["hit_count"]
|
|
350
|
+
hits_total = search_result["search_result"]["total_hit_count"]
|
|
351
|
+
|
|
352
|
+
asset_list = search_result.get("asset_list", None)
|
|
353
|
+
|
|
354
|
+
hits_remaining = hits_total - hits
|
|
355
|
+
|
|
356
|
+
while hits_remaining > 0:
|
|
357
|
+
flattened_data["after"] += hits
|
|
358
|
+
search_result = self.search_assets(flattened_data)
|
|
359
|
+
|
|
360
|
+
if not search_result or not "search_result_resource" in search_result:
|
|
361
|
+
break
|
|
362
|
+
|
|
363
|
+
search_result = search_result.get("search_result_resource")
|
|
364
|
+
|
|
365
|
+
hits = search_result["search_result"]["hit_count"]
|
|
366
|
+
hits_remaining = hits_remaining - hits
|
|
367
|
+
|
|
368
|
+
asset_list += search_result.get("asset_list", [])
|
|
369
|
+
|
|
370
|
+
return asset_list
|
|
371
|
+
|
|
372
|
+
# end method definition
|
|
373
|
+
|
|
374
|
+
def get_product_assets(
|
|
375
|
+
self, product_id: int, offset: int = 0, limit: int = 200
|
|
376
|
+
) -> list | None:
|
|
377
|
+
"""Get all Media Assets for a given product (ID).
|
|
378
|
+
|
|
379
|
+
Args:
|
|
380
|
+
product_id (int): Identifier of the product.
|
|
381
|
+
offset (int, optional): Result pagination. Starting ID. Defaults to 0.
|
|
382
|
+
limit (int, optional): Result pagination. Page length. Defaults to 200.
|
|
383
|
+
|
|
384
|
+
Returns:
|
|
385
|
+
dict: Search Results
|
|
386
|
+
"""
|
|
387
|
+
|
|
388
|
+
payload = {
|
|
389
|
+
"load_type": ["metadata"],
|
|
390
|
+
"load_multilingual_values": ["true"],
|
|
391
|
+
"level_of_detail": ["full"],
|
|
392
|
+
"after": offset,
|
|
393
|
+
"limit": limit,
|
|
394
|
+
"multilingual_language_code": ["en_US"],
|
|
395
|
+
"search_config_id": ["3"],
|
|
396
|
+
"preference_id": ["ARTESIA.PREFERENCE.GALLERYVIEW.DISPLAYED_FIELDS"],
|
|
397
|
+
"metadata_to_return": ["ARTESIA.FIELD.TAG"],
|
|
398
|
+
"search_condition_list": [
|
|
399
|
+
'{"search_condition_list":{"search_condition":[{"type":"com.artesia.search.SearchTabularCondition","metadata_table_id":"OTM.TABLE.PRODUCT_TABLE_FIELD","tabular_field_list":[{"type":"com.artesia.search.SearchTabularFieldCondition","metadata_field_id":"PRODUCT_CHAR_ID","relational_operator_id":"ARTESIA.OPERATOR.CHAR.CONTAINS","value":"'
|
|
400
|
+
+ str(product_id)
|
|
401
|
+
+ '","left_paren":"(","right_paren":")"}]}]}}'
|
|
402
|
+
],
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
flattened_data = {
|
|
406
|
+
k: v if not isinstance(v, list) else ",".join(v) for k, v in payload.items()
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
search_result = self.search_assets(flattened_data)
|
|
410
|
+
|
|
411
|
+
if not search_result or not "search_result_resource" in search_result:
|
|
412
|
+
logger.error("No assets found via search!")
|
|
413
|
+
return None
|
|
414
|
+
search_result = search_result.get("search_result_resource")
|
|
415
|
+
|
|
416
|
+
hits = search_result["search_result"]["hit_count"]
|
|
417
|
+
hits_total = search_result["search_result"]["total_hit_count"]
|
|
418
|
+
|
|
419
|
+
asset_list = search_result.get("asset_list", None)
|
|
420
|
+
|
|
421
|
+
hits_remaining = hits_total - hits
|
|
422
|
+
|
|
423
|
+
while hits_remaining > 0:
|
|
424
|
+
flattened_data["after"] += hits
|
|
425
|
+
search_result = self.search_assets(flattened_data)
|
|
426
|
+
|
|
427
|
+
if not search_result or not "search_result_resource" in search_result:
|
|
428
|
+
break
|
|
429
|
+
|
|
430
|
+
search_result = search_result.get("search_result_resource")
|
|
431
|
+
|
|
432
|
+
hits = search_result["search_result"]["hit_count"]
|
|
433
|
+
hits_remaining = hits_remaining - hits
|
|
434
|
+
|
|
435
|
+
asset_list += search_result.get("asset_list", [])
|
|
436
|
+
|
|
437
|
+
return asset_list
|
|
438
|
+
|
|
439
|
+
# end method definition
|
|
440
|
+
|
|
441
|
+
def download_asset(
|
|
442
|
+
self,
|
|
443
|
+
asset_id: str,
|
|
444
|
+
asset_name: str,
|
|
445
|
+
download_url: str = "",
|
|
446
|
+
skip_existing: bool = True,
|
|
447
|
+
) -> bool:
|
|
448
|
+
"""Download a given Media Asset
|
|
449
|
+
|
|
450
|
+
Args:
|
|
451
|
+
asset_id (str): ID of the asset to download
|
|
452
|
+
asset_name (str): Name of the assets - becomes the file name.
|
|
453
|
+
download_url (str, optiona): URL to download the asset (optional).
|
|
454
|
+
|
|
455
|
+
Returns:
|
|
456
|
+
bool: True = success, False = failure
|
|
457
|
+
"""
|
|
458
|
+
# url = f"{self.base_url}/assets/v1/{asset_id}/download"
|
|
459
|
+
|
|
460
|
+
if download_url:
|
|
461
|
+
request_url = download_url
|
|
462
|
+
else:
|
|
463
|
+
request_url = self.config()["assetsUrl"] + "/" + asset_id + "/contents"
|
|
464
|
+
|
|
465
|
+
file_name = os.path.join(self._download_dir, asset_id)
|
|
466
|
+
|
|
467
|
+
if os.path.exists(file_name):
|
|
468
|
+
if skip_existing:
|
|
469
|
+
logger.debug(
|
|
470
|
+
"OpenText Media Management asset has been downloaded before skipping download -> '%s' (%s) to -> %s...",
|
|
471
|
+
asset_name,
|
|
472
|
+
asset_id,
|
|
473
|
+
file_name,
|
|
474
|
+
)
|
|
475
|
+
return True
|
|
476
|
+
else:
|
|
477
|
+
logger.debug(
|
|
478
|
+
"OpenText Media Management asset has been downloaded before. Update download -> '%s' (%s) to -> %s...",
|
|
479
|
+
asset_name,
|
|
480
|
+
asset_id,
|
|
481
|
+
file_name,
|
|
482
|
+
)
|
|
483
|
+
os.remove(file_name)
|
|
484
|
+
|
|
485
|
+
try:
|
|
486
|
+
if not os.path.exists(self._download_dir):
|
|
487
|
+
# Create the directory
|
|
488
|
+
os.makedirs(self._download_dir)
|
|
489
|
+
|
|
490
|
+
logger.info(
|
|
491
|
+
"Downloading OpenText Media Management asset -> '%s' (%s) to -> %s...",
|
|
492
|
+
asset_name,
|
|
493
|
+
asset_id,
|
|
494
|
+
file_name,
|
|
495
|
+
)
|
|
496
|
+
response = self._session.get(request_url, stream=True)
|
|
497
|
+
response.raise_for_status()
|
|
498
|
+
with open(file_name, "wb") as f:
|
|
499
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
500
|
+
f.write(chunk)
|
|
501
|
+
return True
|
|
502
|
+
except HTTPError as http_err:
|
|
503
|
+
logger.error("HTTP error occurred -> %s!", str(http_err))
|
|
504
|
+
except RequestException as req_err:
|
|
505
|
+
logger.error("Request error occurred -> %s!", str(req_err))
|
|
506
|
+
except Exception as err:
|
|
507
|
+
logger.error("An error occurred -> %s!", str(err))
|
|
508
|
+
|
|
509
|
+
return False
|
|
510
|
+
|
|
511
|
+
# end method definition
|
|
512
|
+
|
|
513
|
+
def search_assets(self, payload: dict):
|
|
514
|
+
"""Search an asset based on the given parameters / criterias.
|
|
515
|
+
|
|
516
|
+
Args:
|
|
517
|
+
payload (dict): in the format of:
|
|
518
|
+
payload = {
|
|
519
|
+
"PRODUCT_CHAR_ID": "Extended ECM for Engineering",
|
|
520
|
+
"BUSINESS_AREA_CHAR_ID": "Content",
|
|
521
|
+
"keyword_query": "*",
|
|
522
|
+
"limit": "5",
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
Returns:
|
|
526
|
+
_type_: JSON search results
|
|
527
|
+
"""
|
|
528
|
+
|
|
529
|
+
request_url = self.config()["searchUrl"]
|
|
530
|
+
|
|
531
|
+
headers = {"Content-Type": "application/x-www-form-urlencoded"}
|
|
532
|
+
|
|
533
|
+
encoded_payload = urllib.parse.urlencode(payload, safe="/:")
|
|
534
|
+
|
|
535
|
+
try:
|
|
536
|
+
response = self._session.post(
|
|
537
|
+
request_url,
|
|
538
|
+
headers=headers,
|
|
539
|
+
data=encoded_payload,
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
response.raise_for_status()
|
|
543
|
+
|
|
544
|
+
except requests.exceptions.HTTPError as http_err:
|
|
545
|
+
logger.error("HTTP error occurred: %s", http_err)
|
|
546
|
+
except requests.exceptions.ConnectionError as conn_err:
|
|
547
|
+
logger.error("Connection error occurred: %s", conn_err)
|
|
548
|
+
except requests.exceptions.Timeout as timeout_err:
|
|
549
|
+
logger.error("Timeout error occurred: %s", timeout_err)
|
|
550
|
+
except requests.exceptions.RequestException as req_err:
|
|
551
|
+
logger.error("Request error occurred: %s", req_err)
|
|
552
|
+
except Exception as e:
|
|
553
|
+
logger.error("An unexpected error occurred: %s", e)
|
|
554
|
+
|
|
555
|
+
return response.json()
|
|
556
|
+
|
|
557
|
+
# end method definition
|
|
558
|
+
|
|
559
|
+
def get_asset_metadata(self, asset_id: str) -> dict:
|
|
560
|
+
"""Retrieve metadata of an asset based on the given parameters / criterias.
|
|
561
|
+
|
|
562
|
+
Args:
|
|
563
|
+
asset_id (str): asset_id of the asset to query
|
|
564
|
+
|
|
565
|
+
Returns:
|
|
566
|
+
dict: Metadata information as dict with values as list
|
|
567
|
+
|
|
568
|
+
example:
|
|
569
|
+
{
|
|
570
|
+
'OTMM.CUSTOM.FIELD_TITLE': [],
|
|
571
|
+
'OTMM.CUSTOM.FIELD_DESCRIPTION': [],
|
|
572
|
+
'OTMM.CUSTOM.FIELD_KEYWORDS': [],
|
|
573
|
+
'CONTENT_TYPE_COMBO_CHAR_ID': [],
|
|
574
|
+
'OTM.TABLE.APPROVED_USAGE_FIELD': [],
|
|
575
|
+
'OTMM.FIELD.RESOURCE_LIBRARY.TAB': [],
|
|
576
|
+
'LANGUAGE_COMBO_CHAR_ID': [],
|
|
577
|
+
'OTMM.CUSTOM.FIELD_PART_NUMBER': [],
|
|
578
|
+
'OTMM.FIELD.BUSINESS_UNIT.TAB': ['Content'],
|
|
579
|
+
'OTM.TABLE.PRODUCT_TABLE_FIELD': ['Vendor Invoice Management for SAP'],
|
|
580
|
+
'OTM.TABLE.INDUSTRY_TABLE_FIELD': [],
|
|
581
|
+
'OTMM.CUSTOM.FIELD_URL': [],
|
|
582
|
+
'OTMM.CUSTOM.FIELD_PREVIOUS_URL': [],
|
|
583
|
+
'OTMM.CUSTOM.FIELD_CONTENT_OWNER': [],
|
|
584
|
+
'OTMM.CUSTOM.FIELD_EMAIL': [],
|
|
585
|
+
'OTMM.CUSTOM.FIELD_JOB_NUMBER': [],
|
|
586
|
+
'OTM.TABLE.BUSINESS_AREA_TABLE_FIELD': [],
|
|
587
|
+
'OTM.TABLE.JOURNEY_TABLE_FIELD': ['Buy', 'Try', 'Learn'],
|
|
588
|
+
'OTMM.FIELD.PERSONA.TAB': [],
|
|
589
|
+
'OTMM.FIELD.SERVICES.TAB': [],
|
|
590
|
+
'OTMM.FIELD.REGION.TAB': [],
|
|
591
|
+
'OTMM.FIELD.PURPOSE.TAB': [],
|
|
592
|
+
'AODA_CHAR_ID': [],
|
|
593
|
+
'REVIEW_CADENCE_CHAR_ID': [],
|
|
594
|
+
'CONTENT_CREATED_DATE_ID': [],
|
|
595
|
+
'ARTESIA.FIELD.EXPIRATION DATE': [],
|
|
596
|
+
'OTMM.CUSTOM.FIELD_REAL_COMMENTS': []
|
|
597
|
+
}
|
|
598
|
+
"""
|
|
599
|
+
|
|
600
|
+
request_url = self.config()["assetsUrl"] + f"/{asset_id}"
|
|
601
|
+
headers = {"Content-Type": "application/x-www-form-urlencoded"}
|
|
602
|
+
|
|
603
|
+
params = {
|
|
604
|
+
"load_type": "custom",
|
|
605
|
+
"level_of_detail": "slim",
|
|
606
|
+
"data_load_request": '{"data_load_request":{"load_multilingual_field_values":"true","load_subscribed_to":"true","load_asset_content_info":"true","load_metadata":"true","load_inherited_metadata":"true","load_thumbnail_info":"true","load_preview_info":"true", "load_pdf_preview_info":"true", "load_3d_preview_info" : "true","load_destination_links":"true", "load_security_policies":"true","load_path":"true","load_deep_zoom_info":"true"}}',
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
try:
|
|
610
|
+
response = self._session.get(request_url, headers=headers, params=params)
|
|
611
|
+
|
|
612
|
+
response.raise_for_status()
|
|
613
|
+
|
|
614
|
+
except requests.exceptions.HTTPError as http_err:
|
|
615
|
+
logger.error("HTTP error occurred: %s", http_err)
|
|
616
|
+
except requests.exceptions.ConnectionError as conn_err:
|
|
617
|
+
logger.error("Connection error occurred: %s", conn_err)
|
|
618
|
+
except requests.exceptions.Timeout as timeout_err:
|
|
619
|
+
logger.error("Timeout error occurred: %s", timeout_err)
|
|
620
|
+
except requests.exceptions.RequestException as req_err:
|
|
621
|
+
logger.error("Request error occurred: %s", req_err)
|
|
622
|
+
except Exception as e:
|
|
623
|
+
logger.error("An unexpected error occurred: %s", e)
|
|
624
|
+
|
|
625
|
+
# Read Metadata from nested structure
|
|
626
|
+
try:
|
|
627
|
+
metadata = (
|
|
628
|
+
response.json()
|
|
629
|
+
.get("asset_resource", {})
|
|
630
|
+
.get("asset", {})
|
|
631
|
+
.get("metadata", {})
|
|
632
|
+
.get("metadata_element_list", [])[0]
|
|
633
|
+
.get("metadata_element_list", [])
|
|
634
|
+
)
|
|
635
|
+
except JSONDecodeError:
|
|
636
|
+
logger.error("Cannot decode JSON response for assset_id -> %s", asset_id)
|
|
637
|
+
return {}
|
|
638
|
+
|
|
639
|
+
# Generate empty result dict
|
|
640
|
+
result = {}
|
|
641
|
+
|
|
642
|
+
# Extract Metadata fields with values as list
|
|
643
|
+
for data in metadata:
|
|
644
|
+
index = data.get("id").replace(" ", "").replace(".", "_")
|
|
645
|
+
|
|
646
|
+
try:
|
|
647
|
+
result[index] = data.get("value").get("value").get("value")
|
|
648
|
+
except AttributeError:
|
|
649
|
+
|
|
650
|
+
infos = []
|
|
651
|
+
for element in data.get("metadata_element_list", []):
|
|
652
|
+
for value in element.get("values", []):
|
|
653
|
+
infos.append(value.get("value").get("display_value"))
|
|
654
|
+
|
|
655
|
+
result[index] = infos
|
|
656
|
+
return result
|
|
657
|
+
|
|
658
|
+
# end method definition
|
|
659
|
+
|
|
660
|
+
def load_assets(
|
|
661
|
+
self,
|
|
662
|
+
load_products: bool = True,
|
|
663
|
+
load_business_units: bool = True,
|
|
664
|
+
download_assets: bool = True,
|
|
665
|
+
) -> bool:
|
|
666
|
+
"""Load all Media Assets for Products and Business Units
|
|
667
|
+
|
|
668
|
+
Args:
|
|
669
|
+
load_products (bool, optional): If true load assets on Business Unit level. Defaults to True.
|
|
670
|
+
load_business_units (bool, optional): If true load assets on Product level. Defaults to True.
|
|
671
|
+
download_assets (bool, optional): Should assets been downloaded. Defaults to True.
|
|
672
|
+
|
|
673
|
+
Returns:
|
|
674
|
+
bool: True = Success, False = Failure
|
|
675
|
+
|
|
676
|
+
Example Asset:
|
|
677
|
+
{
|
|
678
|
+
'access_control_descriptor': {
|
|
679
|
+
'permissions_map': {...}
|
|
680
|
+
},
|
|
681
|
+
'asset_content_info': {
|
|
682
|
+
'master_content': {...}
|
|
683
|
+
},
|
|
684
|
+
'asset_id': '68fe5a6423fd317fdf87e83bc8cde736d4df27bf',
|
|
685
|
+
'asset_lock_state_last_update_date': '2024-09-09T22:02:53Z',
|
|
686
|
+
'asset_lock_state_user_id': '202',
|
|
687
|
+
'asset_state': 'NORMAL',
|
|
688
|
+
'asset_state_last_update_date': '2024-09-09T22:02:53Z',
|
|
689
|
+
'asset_state_user_id': '202',
|
|
690
|
+
'checked_out': False,
|
|
691
|
+
'content_editable': True,
|
|
692
|
+
'content_lock_state_last_update_date': '2024-08-14T00:33:27Z',
|
|
693
|
+
'content_lock_state_user_id': '202',
|
|
694
|
+
'content_lock_state_user_name': 'ajohnson3',
|
|
695
|
+
'content_size': 18474085,
|
|
696
|
+
'content_state': 'NORMAL',
|
|
697
|
+
'content_state_last_update_date': '2024-08-14T00:33:27Z',
|
|
698
|
+
'content_state_user_id': '202',
|
|
699
|
+
'content_state_user_name': 'Amanda Johnson',
|
|
700
|
+
'content_type': 'ACROBAT',
|
|
701
|
+
'creator_id': '202',
|
|
702
|
+
'date_imported': '2024-08-14T00:33:26Z',
|
|
703
|
+
'date_last_updated': '2024-09-09T22:02:53Z',
|
|
704
|
+
'deleted': False,
|
|
705
|
+
'delivery_service_url': 'https://assets.opentext.com/adaptivemedia/rendition?id=68fe5a6423fd317fdf87e83bc8cde736d4df27bf',
|
|
706
|
+
'expired': False,
|
|
707
|
+
'import_job_id': 7764,
|
|
708
|
+
'import_user_name': 'ajohnson3',
|
|
709
|
+
'latest_version': True,
|
|
710
|
+
'legacy_model_id': 104,
|
|
711
|
+
'locked': False,
|
|
712
|
+
'master_content_info': {
|
|
713
|
+
'content_checksum': '45f42d19542af5b6146cbb3927a5490f',
|
|
714
|
+
'content_data': {...},
|
|
715
|
+
'content_kind': 'MASTER',
|
|
716
|
+
'content_manager_id': 'ARTESIA.CONTENT.GOOGLE.CLOUD',
|
|
717
|
+
'content_path': 'data/repository/original/generative-ai-governance-essentials-wp-en_56cbbfe270593ba1a5ab6551d2c8b373469cc1a9.pdf',
|
|
718
|
+
'content_size': 18474085,
|
|
719
|
+
'height': -1,
|
|
720
|
+
'id': '56cbbfe270593ba1a5ab6551d2c8b373469cc1a9',
|
|
721
|
+
'mime_type': 'application/pdf',
|
|
722
|
+
'name': 'generative-ai-governance-essentials-wp-en.pdf',
|
|
723
|
+
'unit_of_size': 'BYTES',
|
|
724
|
+
'url': '/otmmapi/v6/renditions/56cbbfe270593ba1a5ab6551d2c8b373469cc1a9',
|
|
725
|
+
'width': -1
|
|
726
|
+
},
|
|
727
|
+
'metadata_lock_state_user_name': 'ajohnson3',
|
|
728
|
+
'metadata_model_id': 'OTM.MARKETING.MODEL',
|
|
729
|
+
'metadata_state_user_name': 'Amanda Johnson',
|
|
730
|
+
'mime_type': 'application/pdf',
|
|
731
|
+
'name': 'generative-ai-governance-essentials-wp-en.pdf',
|
|
732
|
+
'original_asset_id': '68fe5a6423fd317fdf87e83bc8cde736d4df27bf',
|
|
733
|
+
'product_associations': False,
|
|
734
|
+
'rendition_content': {
|
|
735
|
+
'thumbnail_content': {...},
|
|
736
|
+
'preview_content': {...},
|
|
737
|
+
'pdf_preview_content': {...}
|
|
738
|
+
},
|
|
739
|
+
'subscribed_to': False,
|
|
740
|
+
'thumbnail_content_id': '70aef1a5b5e480337bc115e47443884432c355ff',
|
|
741
|
+
'version': 1
|
|
742
|
+
}
|
|
743
|
+
"""
|
|
744
|
+
|
|
745
|
+
asset_list = []
|
|
746
|
+
|
|
747
|
+
if load_products:
|
|
748
|
+
|
|
749
|
+
products = self.get_products() # dictionary with key = name and value = ID
|
|
750
|
+
|
|
751
|
+
if self._product_exclusions:
|
|
752
|
+
logger.info("Excluding products -> %s", str(self._product_exclusions))
|
|
753
|
+
for key in self._product_exclusions:
|
|
754
|
+
products.pop(
|
|
755
|
+
key, None
|
|
756
|
+
) # pop(key, None) will remove the key if it exists, and do nothing if it doesn't
|
|
757
|
+
|
|
758
|
+
for product_name, product_id in products.items():
|
|
759
|
+
if "DO NOT USE" in product_name:
|
|
760
|
+
continue
|
|
761
|
+
|
|
762
|
+
logger.info("Processing product -> '%s'...", product_name)
|
|
763
|
+
|
|
764
|
+
assets = self.get_product_assets(product_id)
|
|
765
|
+
|
|
766
|
+
if not assets:
|
|
767
|
+
logger.info("Found no assets for product -> '%s'", product_name)
|
|
768
|
+
continue
|
|
769
|
+
|
|
770
|
+
for asset in assets:
|
|
771
|
+
asset["workspace_type"] = "Product"
|
|
772
|
+
asset["workspace_name"] = product_name
|
|
773
|
+
|
|
774
|
+
asset_list += [asset for asset in assets if "content_size" in asset]
|
|
775
|
+
|
|
776
|
+
if load_business_units:
|
|
777
|
+
|
|
778
|
+
business_units = self.get_business_units()
|
|
779
|
+
|
|
780
|
+
if self._business_unit_exclusions:
|
|
781
|
+
logger.info(
|
|
782
|
+
"Excluding business units -> %s",
|
|
783
|
+
str(self._business_unit_exclusions),
|
|
784
|
+
)
|
|
785
|
+
for key in self._business_unit_exclusions:
|
|
786
|
+
business_units.pop(
|
|
787
|
+
key, None
|
|
788
|
+
) # pop(key, None) will remove the key if it exists, and do nothing if it doesn't
|
|
789
|
+
|
|
790
|
+
for bu_name, bu_id in business_units.items():
|
|
791
|
+
logger.debug(bu_name)
|
|
792
|
+
assets = self.get_business_unit_assets(bu_id)
|
|
793
|
+
|
|
794
|
+
if not assets:
|
|
795
|
+
logger.info("Found no assets for business unit -> '%s'", bu_name)
|
|
796
|
+
continue
|
|
797
|
+
|
|
798
|
+
for asset in assets:
|
|
799
|
+
asset["workspace_type"] = "Business Unit"
|
|
800
|
+
asset["workspace_name"] = bu_name
|
|
801
|
+
|
|
802
|
+
asset_list += [asset for asset in assets if "content_size" in asset]
|
|
803
|
+
# end for bu_name...
|
|
804
|
+
# end if load_business_units
|
|
805
|
+
|
|
806
|
+
# WE DON'T WANT TO DO THIS HERE ANY MORE!
|
|
807
|
+
# This is now done in the bulk document processing
|
|
808
|
+
# using conditions_delete and conditions_create
|
|
809
|
+
# asset_list = [
|
|
810
|
+
# item
|
|
811
|
+
# for item in asset_list
|
|
812
|
+
# if not item.get("deleted", False) and not item.get("expired", False)
|
|
813
|
+
# ]
|
|
814
|
+
|
|
815
|
+
total_count = len(asset_list)
|
|
816
|
+
|
|
817
|
+
number = self._thread_number
|
|
818
|
+
|
|
819
|
+
if total_count >= number:
|
|
820
|
+
partition_size = total_count // number
|
|
821
|
+
remainder = total_count % number
|
|
822
|
+
else:
|
|
823
|
+
partition_size = total_count
|
|
824
|
+
number = 1
|
|
825
|
+
remainder = 0
|
|
826
|
+
|
|
827
|
+
logger.info(
|
|
828
|
+
"Processing -> %s Media Assets, thread number -> %s, partition size -> %s",
|
|
829
|
+
str(total_count),
|
|
830
|
+
number,
|
|
831
|
+
partition_size,
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
threads = []
|
|
835
|
+
|
|
836
|
+
start = 0
|
|
837
|
+
for index in range(number):
|
|
838
|
+
extra = 1 if remainder > 0 else 0
|
|
839
|
+
end = start + partition_size + extra
|
|
840
|
+
if remainder > 0:
|
|
841
|
+
remainder -= 1
|
|
842
|
+
|
|
843
|
+
thread = threading.Thread(
|
|
844
|
+
name=f"load_assets_{index + 1:02}",
|
|
845
|
+
target=self.thread_wrapper,
|
|
846
|
+
args=(
|
|
847
|
+
self.load_assets_worker,
|
|
848
|
+
asset_list,
|
|
849
|
+
partition_size + extra,
|
|
850
|
+
start,
|
|
851
|
+
download_assets,
|
|
852
|
+
),
|
|
853
|
+
)
|
|
854
|
+
thread.start()
|
|
855
|
+
threads.append(thread)
|
|
856
|
+
start = end
|
|
857
|
+
|
|
858
|
+
for thread in threads:
|
|
859
|
+
thread.join()
|
|
860
|
+
|
|
861
|
+
return True
|
|
862
|
+
|
|
863
|
+
# end method definition
|
|
864
|
+
|
|
865
|
+
def load_assets_worker(
|
|
866
|
+
self,
|
|
867
|
+
asset_list: list,
|
|
868
|
+
partition_size: int,
|
|
869
|
+
offset: int = 0,
|
|
870
|
+
download_assets: bool = True,
|
|
871
|
+
):
|
|
872
|
+
"""Worker Method for multi-threading
|
|
873
|
+
|
|
874
|
+
Args:
|
|
875
|
+
asset_list (list): List of assets to process
|
|
876
|
+
business_unit (str, optional): Name of business unit. Defaults to "".
|
|
877
|
+
"""
|
|
878
|
+
|
|
879
|
+
logger.info(
|
|
880
|
+
"Processing Media Assets in range from -> %s to -> %s...",
|
|
881
|
+
offset,
|
|
882
|
+
offset + partition_size,
|
|
883
|
+
)
|
|
884
|
+
|
|
885
|
+
worker_asset_list = asset_list[offset : offset + partition_size]
|
|
886
|
+
|
|
887
|
+
for asset in worker_asset_list:
|
|
888
|
+
asset_id = asset.get("asset_id")
|
|
889
|
+
asset_name = asset.get("name")
|
|
890
|
+
# Store name as asset_name
|
|
891
|
+
asset["asset_name"] = asset_name
|
|
892
|
+
asset_download_url = asset.get("delivery_service_url")
|
|
893
|
+
asset_deleted = asset.get("deleted", False)
|
|
894
|
+
asset_expired = asset.get("expired", False)
|
|
895
|
+
if asset_deleted or asset_expired:
|
|
896
|
+
logger.info(
|
|
897
|
+
"Asset -> '%s' is deleted or expired. Skipping...",
|
|
898
|
+
asset_name,
|
|
899
|
+
)
|
|
900
|
+
continue
|
|
901
|
+
|
|
902
|
+
if download_assets and asset.get("content_size", 0) > 0:
|
|
903
|
+
success = self.download_asset(
|
|
904
|
+
asset_id=asset_id,
|
|
905
|
+
asset_name=asset_name,
|
|
906
|
+
download_url=asset_download_url,
|
|
907
|
+
)
|
|
908
|
+
if not success:
|
|
909
|
+
logger.error(
|
|
910
|
+
"Failed to download asset -> '%s' (%s) to '%s'",
|
|
911
|
+
asset_name,
|
|
912
|
+
asset_id,
|
|
913
|
+
self._download_dir,
|
|
914
|
+
)
|
|
915
|
+
else:
|
|
916
|
+
logger.info(
|
|
917
|
+
"Successfully downloaded asset -> '%s' (%s) to '%s'",
|
|
918
|
+
asset_name,
|
|
919
|
+
asset_id,
|
|
920
|
+
self._download_dir,
|
|
921
|
+
)
|
|
922
|
+
|
|
923
|
+
## Add metadata to asset and add to new list
|
|
924
|
+
asset.update(self.get_asset_metadata(asset_id))
|
|
925
|
+
|
|
926
|
+
# Now we add the article to the Pandas Data Frame in the Data class:
|
|
927
|
+
with self._data.lock():
|
|
928
|
+
self._data.append(worker_asset_list)
|