brynq-sdk-task-scheduler 3.0.5__tar.gz → 3.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {brynq_sdk_task_scheduler-3.0.5 → brynq_sdk_task_scheduler-3.0.7}/PKG-INFO +1 -1
- brynq_sdk_task_scheduler-3.0.7/brynq_sdk_task_scheduler/__init__.py +1 -0
- brynq_sdk_task_scheduler-3.0.7/brynq_sdk_task_scheduler/task_scheduler.py +564 -0
- {brynq_sdk_task_scheduler-3.0.5 → brynq_sdk_task_scheduler-3.0.7}/brynq_sdk_task_scheduler.egg-info/PKG-INFO +1 -1
- {brynq_sdk_task_scheduler-3.0.5 → brynq_sdk_task_scheduler-3.0.7}/brynq_sdk_task_scheduler.egg-info/SOURCES.txt +2 -0
- brynq_sdk_task_scheduler-3.0.7/brynq_sdk_task_scheduler.egg-info/top_level.txt +1 -0
- {brynq_sdk_task_scheduler-3.0.5 → brynq_sdk_task_scheduler-3.0.7}/setup.py +1 -1
- brynq_sdk_task_scheduler-3.0.5/brynq_sdk_task_scheduler.egg-info/top_level.txt +0 -1
- {brynq_sdk_task_scheduler-3.0.5 → brynq_sdk_task_scheduler-3.0.7}/brynq_sdk_task_scheduler.egg-info/dependency_links.txt +0 -0
- {brynq_sdk_task_scheduler-3.0.5 → brynq_sdk_task_scheduler-3.0.7}/brynq_sdk_task_scheduler.egg-info/not-zip-safe +0 -0
- {brynq_sdk_task_scheduler-3.0.5 → brynq_sdk_task_scheduler-3.0.7}/brynq_sdk_task_scheduler.egg-info/requires.txt +0 -0
- {brynq_sdk_task_scheduler-3.0.5 → brynq_sdk_task_scheduler-3.0.7}/setup.cfg +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .task_scheduler import TaskScheduler
|
|
@@ -0,0 +1,564 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import os
|
|
3
|
+
import datetime
|
|
4
|
+
import inspect
|
|
5
|
+
import time
|
|
6
|
+
from typing import Literal, List, Optional
|
|
7
|
+
import traceback
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import json
|
|
10
|
+
import pymysql
|
|
11
|
+
import requests
|
|
12
|
+
from brynq_sdk_mandrill import MailClient
|
|
13
|
+
from brynq_sdk_functions import Functions
|
|
14
|
+
from brynq_sdk_mysql import MySQL
|
|
15
|
+
from brynq_sdk_elastic import Elastic
|
|
16
|
+
from brynq_sdk_brynq import BrynQ
|
|
17
|
+
import warnings
|
|
18
|
+
import re
|
|
19
|
+
LOGGING_OPTIONS = Literal['MYSQL', 'ELASTIC']
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TaskScheduler(BrynQ):
|
|
23
|
+
|
|
24
|
+
def __init__(self, task_id: int = None, loglevel: str = 'INFO', email_after_errors: bool = False, logging: List[LOGGING_OPTIONS] | None = ["MYSQL", "ELASTIC"]):
|
|
25
|
+
"""
|
|
26
|
+
The TaskScheduler is responsible for the logging to the database. Based on this logging, the next reload will
|
|
27
|
+
start or not and warning will be given or not
|
|
28
|
+
:param task_id: The ID from the task as saved in the task_scheduler table in the customer database
|
|
29
|
+
:param email_after_errors: a True or False value. When True, there will be send an email to a contactperson of the customer (as given in the database) with the number of errors
|
|
30
|
+
:param loglevel: Chose on which level you want to store the logs. Default is INFO. that means that a logline
|
|
31
|
+
:param disable_logging: If the interface is started from a local instance, logs will not be stored by default. If this is set to True, the logs will be stored in the database
|
|
32
|
+
with level DEBUG not is stored
|
|
33
|
+
"""
|
|
34
|
+
super().__init__()
|
|
35
|
+
try:
|
|
36
|
+
self.mysql_enabled = 'MYSQL' in logging
|
|
37
|
+
self.elastic_enabled = 'ELASTIC' in logging
|
|
38
|
+
except Exception as e:
|
|
39
|
+
print("Error parsing logging options, disabling logging. Error is: " + str(e))
|
|
40
|
+
self.mysql_enabled = False
|
|
41
|
+
self.elastic_enabled = False
|
|
42
|
+
|
|
43
|
+
# Initialize MySQL
|
|
44
|
+
self.mysql_unreachable = False
|
|
45
|
+
# if self.mysql_enabled:
|
|
46
|
+
try:
|
|
47
|
+
self.mysql = MySQL()
|
|
48
|
+
self.mysql.ping()
|
|
49
|
+
except Exception as e:
|
|
50
|
+
self.mysql_unreachable = True
|
|
51
|
+
self.mysql = None
|
|
52
|
+
print("MySQL is enabled but not reachable, logs will be saved locally if needed.")
|
|
53
|
+
# else:
|
|
54
|
+
# self.mysql = None
|
|
55
|
+
|
|
56
|
+
# Initialize ElasticSearch
|
|
57
|
+
self.elastic_unreachable = False
|
|
58
|
+
if self.elastic_enabled:
|
|
59
|
+
try:
|
|
60
|
+
self.es = Elastic()
|
|
61
|
+
self.es.get_health()
|
|
62
|
+
except Exception as e:
|
|
63
|
+
self.elastic_unreachable = True
|
|
64
|
+
self.es = Elastic(disabled=True)
|
|
65
|
+
print("ElasticSearch is enabled but not reachable, logs will be saved locally if needed.")
|
|
66
|
+
else:
|
|
67
|
+
self.es = Elastic(disabled=True)
|
|
68
|
+
|
|
69
|
+
# Set up local log directory
|
|
70
|
+
self.local_log_dir = 'local_logs'
|
|
71
|
+
os.makedirs(self.local_log_dir, exist_ok=True)
|
|
72
|
+
|
|
73
|
+
# Process local logs if services are now reachable
|
|
74
|
+
if self.mysql_enabled and not self.mysql_unreachable:
|
|
75
|
+
self._process_local_mysql_logs()
|
|
76
|
+
if self.elastic_enabled and not self.elastic_unreachable:
|
|
77
|
+
self._process_local_elastic_logs()
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
self.email_after_errors = email_after_errors
|
|
81
|
+
self.customer_db = os.getenv("MYSQL_DATABASE")
|
|
82
|
+
self.customer = os.getenv('BRYNQ_SUBDOMAIN').lower().replace(' ', '_')
|
|
83
|
+
self.partner_id = os.getenv('PARTNER_ID').lower().replace(' ', '_') if os.getenv('PARTNER_ID') else 'brynq'
|
|
84
|
+
self.loglevel = loglevel
|
|
85
|
+
self.started_at = datetime.datetime.now()
|
|
86
|
+
# If the task is started via the task_scheduler, the following 3 parameters will be passed by the scheduler.
|
|
87
|
+
# The distinction between local and non local is made because the scheduler usually sets the scheduler_log table entry and run_id. When running locally, the tasks should do this itself.
|
|
88
|
+
if len(sys.argv[1:4]) > 0:
|
|
89
|
+
self.started_local = False
|
|
90
|
+
self.customer_db, self.task_id, self.run_id = sys.argv[1:4]
|
|
91
|
+
# If the task is started locally, the parameters should be set locally
|
|
92
|
+
else:
|
|
93
|
+
self.started_local = True
|
|
94
|
+
self.run_id = int(round(time.time() * 100000))
|
|
95
|
+
self.task_id = task_id
|
|
96
|
+
print(self.task_id, self.run_id)
|
|
97
|
+
self.error_count = 0
|
|
98
|
+
|
|
99
|
+
# Check if the log tables exists in the customer database1. If not, create them
|
|
100
|
+
# Mysql throws a warning when a table already exists. We don't care so we ignore warnings. (not exceptions!)
|
|
101
|
+
warnings.filterwarnings('ignore')
|
|
102
|
+
|
|
103
|
+
# Creates Elasticsearch index and data view if not exists
|
|
104
|
+
if self.elastic_enabled:
|
|
105
|
+
self.es_index = f"task_execution_log_{self.customer_db}"
|
|
106
|
+
self.es.create_index(index_name=self.es_index)
|
|
107
|
+
self.es.create_data_view(space_name='interfaces', view_name=f'task_execution_log_{self.customer_db}', name=f'Task execution log {self.customer_db}', time_field='started_at')
|
|
108
|
+
|
|
109
|
+
# Start the task and setup the data in the database
|
|
110
|
+
if self.mysql_enabled:
|
|
111
|
+
self.customer_id = self.mysql.raw_query(f'SELECT id FROM sc.customers WHERE dbname = \'{self.customer_db}\'')[0][0]
|
|
112
|
+
# Check if the task is started on schedule or manual. store in a variable to use later in the script
|
|
113
|
+
self.task_manual_started = self._check_if_task_manual_started()
|
|
114
|
+
self._start_task()
|
|
115
|
+
else:
|
|
116
|
+
self.task_manual_started = True
|
|
117
|
+
except Exception as e:
|
|
118
|
+
self.error_handling(e)
|
|
119
|
+
|
|
120
|
+
def __count_keys(self, json_obj):
|
|
121
|
+
if not isinstance(json_obj, dict):
|
|
122
|
+
return 0
|
|
123
|
+
key_count = 0
|
|
124
|
+
for key, value in json_obj.items():
|
|
125
|
+
if not isinstance(value, dict):
|
|
126
|
+
key_count += 1 # Count the current key
|
|
127
|
+
else:
|
|
128
|
+
key_count += self.__count_keys(value) # Recursively count keys in nested dictionaries
|
|
129
|
+
return key_count
|
|
130
|
+
|
|
131
|
+
def __get_caller_info(self):
|
|
132
|
+
stack = inspect.stack()
|
|
133
|
+
caller_frame = stack[2][0]
|
|
134
|
+
file_name = caller_frame.f_code.co_filename
|
|
135
|
+
line_number = caller_frame.f_lineno
|
|
136
|
+
function_name = stack[2][3]
|
|
137
|
+
return file_name, line_number, function_name
|
|
138
|
+
|
|
139
|
+
def create_task_execution_steps(self, step_details: list):
|
|
140
|
+
"""
|
|
141
|
+
Check if the given steps already exists in the task_execution_steps table. If not, update or insert the values in the table
|
|
142
|
+
:param step_details: list of dicts. Each dict must contain task details according to required_fields.
|
|
143
|
+
Example: step_details = [
|
|
144
|
+
{'nr': 1, 'description': 'test'},
|
|
145
|
+
{'nr': 2, 'description': 'test2'}
|
|
146
|
+
]
|
|
147
|
+
:return: error (str) or response of mysql
|
|
148
|
+
"""
|
|
149
|
+
warnings.warn("Execution steps are deprecated, please stop calling this method. It does nothing anymore", DeprecationWarning)
|
|
150
|
+
return
|
|
151
|
+
|
|
152
|
+
def _check_if_task_manual_started(self):
|
|
153
|
+
"""
|
|
154
|
+
Check if the task manual is started of on schedule. If it's manual started, that's important for the variables in the db_variables function.
|
|
155
|
+
In that case the dynamic variables should be used instead of the static ones
|
|
156
|
+
:return: True of False
|
|
157
|
+
"""
|
|
158
|
+
# without logging is only possible during dev, so this is always manual
|
|
159
|
+
response = self.mysql.select('task_scheduler', 'run_instant', f'WHERE id = {self.task_id}')[0][0]
|
|
160
|
+
if response == 1:
|
|
161
|
+
# Reset the 1 back to 0 before sending the result
|
|
162
|
+
self.mysql.update('task_scheduler', ['run_instant'], [0], 'WHERE `id` = {}'.format(self.task_id))
|
|
163
|
+
return True
|
|
164
|
+
else:
|
|
165
|
+
return False
|
|
166
|
+
|
|
167
|
+
def _start_task(self):
|
|
168
|
+
"""
|
|
169
|
+
Start the task and write this to the database. While the status is running, the task will not start again
|
|
170
|
+
:return: if the update to the database is successful or not
|
|
171
|
+
"""
|
|
172
|
+
# If the task is started from a local instance (not the task_scheduler), create a start log row in the task_scheduler_log
|
|
173
|
+
if self.started_local:
|
|
174
|
+
self.mysql.raw_query(f"INSERT INTO `task_scheduler_log` (reload_id, task_id, reload_status, started_at, finished_at) VALUES ({self.run_id}, {self.task_id}, 'Running', '{self.started_at}', null)", insert=True)
|
|
175
|
+
|
|
176
|
+
self.mysql.update('task_scheduler', ['status', 'step_nr'], ['RUNNING', 1], 'WHERE `id` = {}'.format(self.task_id))
|
|
177
|
+
|
|
178
|
+
def db_variable(self, variable_name: str, default_value_if_temp_is_empty: bool = False):
|
|
179
|
+
"""
|
|
180
|
+
Get a value from the task_variables table corresponding with the given name. If the task is manually started
|
|
181
|
+
(run_instant = 1), then the temp_value will be returned. This is to give the possibility for users in the frontend to run
|
|
182
|
+
a task once manual with other values then normal without overwriting the normal values.
|
|
183
|
+
:param variable_name: the name of the variable
|
|
184
|
+
:param default_value_if_temp_is_empty: bool to determine whether default value should be used if temp value is empty when manually started
|
|
185
|
+
:return: the value of the given variable.
|
|
186
|
+
"""
|
|
187
|
+
if self.mysql_enabled:
|
|
188
|
+
if self.task_manual_started or self.started_local:
|
|
189
|
+
response = self.mysql.select('task_variables', 'temp_value, value',
|
|
190
|
+
f'WHERE name = \'{variable_name}\' AND task_id = {self.task_id}')
|
|
191
|
+
else:
|
|
192
|
+
response = self.mysql.select('task_variables', 'value',
|
|
193
|
+
f'WHERE name = \'{variable_name}\' AND task_id = {self.task_id}')
|
|
194
|
+
if len(response) == 0:
|
|
195
|
+
raise Exception(f'Variable with name \'{variable_name}\' does not exist')
|
|
196
|
+
else:
|
|
197
|
+
value = response[0][0]
|
|
198
|
+
if value is None and default_value_if_temp_is_empty is True and len(response[0]) > 0:
|
|
199
|
+
value = response[0][1]
|
|
200
|
+
return value
|
|
201
|
+
else:
|
|
202
|
+
value: str = input(f'Your MYSQL connection is not defined, enter the value for the variable {variable_name}: ')
|
|
203
|
+
return value
|
|
204
|
+
|
|
205
|
+
def write_execution_log(self, message: str, data, loglevel: str = 'INFO', full_extract: bool = False):
|
|
206
|
+
"""
|
|
207
|
+
Writes messages to the database. Give the message and the level of the log
|
|
208
|
+
:param message: A string with a message for the log
|
|
209
|
+
:param loglevel: You can choose between DEBUG, INFO, ERROR or CRITICAL (DEBUG is most granulated, CRITICAL the less)
|
|
210
|
+
:param data: Uploaded data by the interface that has to be logged in ElasticSearch, if you have nothing to log, use None
|
|
211
|
+
:param full_extract: If the data is a full load, set this to True. This will prevent the payload from being logged in ElasticSearch
|
|
212
|
+
:return: If writing to the database is successful or not
|
|
213
|
+
"""
|
|
214
|
+
# Validate if the provided loglevel is valid
|
|
215
|
+
allowed_loglevels = ['DEBUG', 'INFO', 'ERROR', 'CRITICAL']
|
|
216
|
+
if loglevel not in allowed_loglevels:
|
|
217
|
+
raise Exception('You\'ve entered a not allowed loglevel. Choose one of: {}'.format(allowed_loglevels))
|
|
218
|
+
|
|
219
|
+
# Get the linenumber from where the logline is executed.
|
|
220
|
+
file_name, line_number, function_name = self.__get_caller_info()
|
|
221
|
+
|
|
222
|
+
print('{} at line: {}'.format(message, line_number))
|
|
223
|
+
|
|
224
|
+
# Put everything together in the payload for ElasticSearch and send it
|
|
225
|
+
payload = {
|
|
226
|
+
'task_id': self.task_id,
|
|
227
|
+
'reload_id': self.run_id,
|
|
228
|
+
'started_at': datetime.datetime.now().isoformat(),
|
|
229
|
+
'partner_id': self.partner_id,
|
|
230
|
+
'customer_id': self.customer_id,
|
|
231
|
+
'customer': self.customer,
|
|
232
|
+
'file_name': file_name,
|
|
233
|
+
'function_name': function_name,
|
|
234
|
+
'line_number': line_number,
|
|
235
|
+
'task_loglevel': self.loglevel,
|
|
236
|
+
'line_loglevel': loglevel,
|
|
237
|
+
'message': message
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
# Count the errors for relevant log levels
|
|
241
|
+
if loglevel == 'ERROR' or loglevel == 'CRITICAL':
|
|
242
|
+
self.error_count += 1
|
|
243
|
+
|
|
244
|
+
if self.elastic_enabled:
|
|
245
|
+
# For Elastic, we need to have the data in JSON format. Handling different data types and preparing extra payload information based on the data type
|
|
246
|
+
# If the data is just a series, count rows, columns and cells
|
|
247
|
+
if isinstance(data, pd.Series):
|
|
248
|
+
dataframe = pd.DataFrame(data).T
|
|
249
|
+
extra_payload = {
|
|
250
|
+
'rows': len(dataframe),
|
|
251
|
+
'columns': len(dataframe.columns),
|
|
252
|
+
'cells': len(dataframe) * len(dataframe.columns),
|
|
253
|
+
}
|
|
254
|
+
if not full_extract:
|
|
255
|
+
extra_payload['payload'] = dataframe.to_json(orient='records')
|
|
256
|
+
# If the data is a list, count rows, columns and cells
|
|
257
|
+
elif isinstance(data, dict):
|
|
258
|
+
records = self.__count_keys(data)
|
|
259
|
+
extra_payload = {
|
|
260
|
+
'rows': 1,
|
|
261
|
+
'columns': records,
|
|
262
|
+
'cells': records,
|
|
263
|
+
}
|
|
264
|
+
if not full_extract:
|
|
265
|
+
extra_payload['payload'] = data
|
|
266
|
+
elif isinstance(data, pd.DataFrame):
|
|
267
|
+
extra_payload = {
|
|
268
|
+
'rows': len(data),
|
|
269
|
+
'columns': len(data.columns),
|
|
270
|
+
'cells': len(data) * len(data.columns),
|
|
271
|
+
}
|
|
272
|
+
if not full_extract:
|
|
273
|
+
extra_payload['payload'] = data.to_json(orient='records')
|
|
274
|
+
# If the data is a response from an URL request, also store all the information about the URL request.
|
|
275
|
+
elif isinstance(data, requests.Response):
|
|
276
|
+
records = 1
|
|
277
|
+
if data.request.body is not None:
|
|
278
|
+
records = self.__count_keys(json.loads(data.request.body))
|
|
279
|
+
if isinstance(data.request.body, bytes):
|
|
280
|
+
data.request.body = data.request.body.decode('utf-8')
|
|
281
|
+
extra_payload = {
|
|
282
|
+
'response': data.text,
|
|
283
|
+
'status_code': data.status_code,
|
|
284
|
+
'url': data.url,
|
|
285
|
+
'method': data.request.method,
|
|
286
|
+
'rows': 1,
|
|
287
|
+
'columns': records,
|
|
288
|
+
'cells': records,
|
|
289
|
+
}
|
|
290
|
+
if not full_extract:
|
|
291
|
+
extra_payload['payload'] = data.request.body
|
|
292
|
+
elif data is None:
|
|
293
|
+
extra_payload = {}
|
|
294
|
+
else:
|
|
295
|
+
extra_payload = {
|
|
296
|
+
'data_type': str(type(data)),
|
|
297
|
+
}
|
|
298
|
+
if not full_extract:
|
|
299
|
+
extra_payload['payload'] = data
|
|
300
|
+
|
|
301
|
+
# Modify payload based on 'full_load' flag
|
|
302
|
+
if data is not None and full_extract is True:
|
|
303
|
+
extra_payload['full_load'] = True
|
|
304
|
+
elif data is not None and full_extract is False:
|
|
305
|
+
extra_payload['full_load'] = False
|
|
306
|
+
|
|
307
|
+
payload.update(extra_payload)
|
|
308
|
+
if not self.elastic_unreachable:
|
|
309
|
+
self.es.post_document(index_name=self.es_index, document=payload)
|
|
310
|
+
else:
|
|
311
|
+
self._save_log_locally(payload, 'elastic')
|
|
312
|
+
|
|
313
|
+
# Write the logline to the MYSQL database, depends on the chosen loglevel in the task
|
|
314
|
+
if self.mysql_enabled:
|
|
315
|
+
mysql_log_data = {
|
|
316
|
+
'reload_id': self.run_id,
|
|
317
|
+
'task_id': self.task_id,
|
|
318
|
+
'log_level': loglevel,
|
|
319
|
+
'created_at': datetime.datetime.now(),
|
|
320
|
+
'line_number': line_number,
|
|
321
|
+
'message': re.sub("[']", '', message)
|
|
322
|
+
}
|
|
323
|
+
if not self.mysql_unreachable:
|
|
324
|
+
try:
|
|
325
|
+
query = f"INSERT INTO `task_execution_log` (reload_id, task_id, log_level, created_at, line_number, message) VALUES ({mysql_log_data['reload_id']}, {mysql_log_data['task_id']}, '{mysql_log_data['log_level']}', '{mysql_log_data['created_at']}', {mysql_log_data['line_number']}, '{mysql_log_data['message']}')"
|
|
326
|
+
if self.loglevel == 'DEBUG' or (self.loglevel == 'INFO' and loglevel != 'DEBUG') or (self.loglevel == 'ERROR' and loglevel in ['ERROR', 'CRITICAL']) or (self.loglevel == 'CRITICAL' and loglevel == 'CRITICAL'):
|
|
327
|
+
self.mysql.raw_query(query, insert=True)
|
|
328
|
+
except pymysql.err.OperationalError as e:
|
|
329
|
+
print(f"MySQL connection lost during logging: {e}")
|
|
330
|
+
self.mysql_unreachable = True
|
|
331
|
+
self._save_log_locally(mysql_log_data, 'mysql')
|
|
332
|
+
except pymysql.err.InterfaceError as e:
|
|
333
|
+
print(f"MySQL connection closed: {e}")
|
|
334
|
+
self.mysql_unreachable = True
|
|
335
|
+
self._save_log_locally(mysql_log_data, 'mysql')
|
|
336
|
+
except Exception as e:
|
|
337
|
+
print(f"Error during logging to MySQL: {e}")
|
|
338
|
+
self._save_log_locally(mysql_log_data, 'mysql')
|
|
339
|
+
else:
|
|
340
|
+
self._save_log_locally(mysql_log_data, 'mysql')
|
|
341
|
+
|
|
342
|
+
def update_execution_step(self, step_number: int):
|
|
343
|
+
"""
|
|
344
|
+
Update the current step number in the task_scheduler table so that user's in the frontend of BrynQ can see where a task is at any moment
|
|
345
|
+
:param step_number: Give only a number
|
|
346
|
+
:return: nothing
|
|
347
|
+
"""
|
|
348
|
+
# Update the step number in the task_scheduler table
|
|
349
|
+
warnings.warn("Execution steps are deprecated, please stop calling this method. It does nothing anymore", DeprecationWarning)
|
|
350
|
+
return
|
|
351
|
+
|
|
352
|
+
def error_handling(self, e: Exception, breaking=True, send_to_teams=False):
|
|
353
|
+
"""
|
|
354
|
+
This function handles errors that occur in the scheduler. Logs the traceback, updates run statuses and notifies users
|
|
355
|
+
:param e: the Exception that is to be handled
|
|
356
|
+
:param task_id: The scheduler task id
|
|
357
|
+
:param mysql_con: The connection which is used to update the scheduler task status
|
|
358
|
+
:param logger: The logger that is used to write the logging status to
|
|
359
|
+
:param breaking: Determines if the error is breaking or code will continue
|
|
360
|
+
:param started_at: Give the time the task is started
|
|
361
|
+
:return: nothing
|
|
362
|
+
"""
|
|
363
|
+
# Get the linenumber from where the logline is executed.
|
|
364
|
+
file_name, line_number, function_name = self.__get_caller_info()
|
|
365
|
+
|
|
366
|
+
# Format error to a somewhat readable format
|
|
367
|
+
exc_type, exc_obj, exc_tb = sys.exc_info()
|
|
368
|
+
error = str(e)[:400].replace('\'', '').replace('\"', '') + ' | Line: {}'.format(exc_tb.tb_lineno)
|
|
369
|
+
|
|
370
|
+
if self.elastic_enabled:
|
|
371
|
+
# Preparing the primary payload with error details for upload to elastic and send it
|
|
372
|
+
payload = {
|
|
373
|
+
'task_id': self.task_id,
|
|
374
|
+
'reload_id': self.run_id,
|
|
375
|
+
'started_at': datetime.datetime.now().isoformat(),
|
|
376
|
+
'partner_id': self.partner_id,
|
|
377
|
+
'customer_id': self.customer_id,
|
|
378
|
+
'customer': self.customer,
|
|
379
|
+
'file_name': file_name,
|
|
380
|
+
'function_name': function_name,
|
|
381
|
+
'line_number': line_number,
|
|
382
|
+
'task_loglevel': self.loglevel,
|
|
383
|
+
'line_loglevel': 'CRITICAL',
|
|
384
|
+
'message': str(e),
|
|
385
|
+
'traceback': traceback.format_exc()
|
|
386
|
+
}
|
|
387
|
+
self.es.post_document(index_name=self.es_index, document=payload)
|
|
388
|
+
|
|
389
|
+
if self.mysql_enabled:
|
|
390
|
+
self.error_count += 1
|
|
391
|
+
# Get scheduler task details for logging
|
|
392
|
+
task_details = \
|
|
393
|
+
self.mysql.select('task_scheduler, data_interfaces', 'data_interfaces.docker_image, data_interfaces.runfile_path', 'WHERE task_scheduler.data_interface_id = data_interfaces.id AND task_scheduler.id = {}'.format(self.task_id))[0]
|
|
394
|
+
taskname = task_details[0]
|
|
395
|
+
customer = task_details[1].split('/')[-1].split('.')[0]
|
|
396
|
+
now = datetime.datetime.now()
|
|
397
|
+
|
|
398
|
+
# Log to log table in the database
|
|
399
|
+
if self.mysql_enabled:
|
|
400
|
+
query = "INSERT INTO `task_execution_log` (reload_id, task_id, log_level, created_at, line_number, message) VALUES ({}, {}, 'CRITICAL', '{}', {}, '{}')".format(self.run_id, self.task_id, now, exc_tb.tb_lineno, error)
|
|
401
|
+
self.mysql.raw_query(query, insert=True)
|
|
402
|
+
if send_to_teams:
|
|
403
|
+
Functions.send_error_to_teams(database=customer, task_number=self.task_id, task_title=taskname)
|
|
404
|
+
if breaking:
|
|
405
|
+
# Set scheduler status to failed
|
|
406
|
+
self.mysql.update('task_scheduler', ['status', 'last_reload', 'last_error_message', 'step_nr'],
|
|
407
|
+
['IDLE', now, 'Failed', 0],
|
|
408
|
+
'WHERE `id` = {}'.format(self.task_id))
|
|
409
|
+
|
|
410
|
+
self.mysql.update(table='task_scheduler_log',
|
|
411
|
+
columns=['reload_status', 'finished_at'],
|
|
412
|
+
values=['Failed', f'{now}'],
|
|
413
|
+
filter=f'WHERE `reload_id` = {self.run_id}')
|
|
414
|
+
if self.email_after_errors:
|
|
415
|
+
self.email_errors(failed=True)
|
|
416
|
+
# Remove the temp values from the variables table
|
|
417
|
+
self.mysql.raw_query(f'UPDATE `task_variables` SET temp_value = null WHERE task_id = {self.task_id}', insert=True)
|
|
418
|
+
|
|
419
|
+
# Start the chained tasks if it there are tasks which should start if this one is failed
|
|
420
|
+
self.start_chained_tasks(finished_task_status='FAILED')
|
|
421
|
+
|
|
422
|
+
raise Exception(error)
|
|
423
|
+
|
|
424
|
+
def finish_task(self, reload_instant=False, log_limit: Optional[int] = 10000, log_date_limit: datetime.date = None):
|
|
425
|
+
"""
|
|
426
|
+
At the end of the script, write the outcome to the database. Write if the task is finished with or without errors, Email to a contactperson if this variable is given in the
|
|
427
|
+
variables table. Also clean up the execution_log table when the number of lines is more than 1000
|
|
428
|
+
:param reload_instant: If the task should start again after it's finished
|
|
429
|
+
:param log_limit: The maximum number of logs to keep in the database. If the number of logs exceeds this limit, the oldest logs will be deleted.
|
|
430
|
+
:param log_date_limit: The date from which logs should be kept. If this is set, logs older than this date will be deleted.
|
|
431
|
+
:return:
|
|
432
|
+
"""
|
|
433
|
+
if self.mysql_enabled:
|
|
434
|
+
# If reload instant is true, this adds an extra field 'run_instant' to the update query, and sets the value to 1. This makes the task reload immediately after it's finished
|
|
435
|
+
field = ['run_instant', 'next_reload'] if reload_instant else []
|
|
436
|
+
value = ['1', datetime.datetime.now()] if reload_instant else []
|
|
437
|
+
if self.error_count > 0:
|
|
438
|
+
self.mysql.update('task_scheduler', ['status', 'last_reload', 'last_error_message', 'step_nr'],
|
|
439
|
+
['IDLE', datetime.datetime.now(), 'FinishedWithErrors', 0],
|
|
440
|
+
'WHERE `id` = {}'.format(self.task_id))
|
|
441
|
+
self.mysql.update(table='task_scheduler_log',
|
|
442
|
+
columns=['reload_status', 'finished_at'],
|
|
443
|
+
values=['FinishedWithErrors', f'{datetime.datetime.now()}'],
|
|
444
|
+
filter=f'WHERE `reload_id` = {self.run_id}')
|
|
445
|
+
# If the variable self.send_mail_after_errors is set to True, send an email with the number of errors to the given user
|
|
446
|
+
if self.email_after_errors:
|
|
447
|
+
self.email_errors(failed=False)
|
|
448
|
+
else:
|
|
449
|
+
self.mysql.update(table='task_scheduler',
|
|
450
|
+
columns=['status', 'last_reload', 'last_error_message', 'step_nr', 'stopped_by_user'] + field,
|
|
451
|
+
values=['IDLE', datetime.datetime.now(), 'FinishedSucces', 0, 0] + value,
|
|
452
|
+
filter='WHERE `id` = {}'.format(self.task_id))
|
|
453
|
+
|
|
454
|
+
self.mysql.update(table='task_scheduler_log',
|
|
455
|
+
columns=['reload_status', 'finished_at'],
|
|
456
|
+
values=['FinishedSuccess', f'{datetime.datetime.now()}'],
|
|
457
|
+
filter=f'WHERE `reload_id` = {self.run_id}')
|
|
458
|
+
|
|
459
|
+
# Remove the temp values from the variables table
|
|
460
|
+
self.mysql.raw_query(f'UPDATE `task_variables` SET temp_value = null WHERE task_id = {self.task_id}', insert=True)
|
|
461
|
+
|
|
462
|
+
# Start the new task if it there is a task which should start if this one is finished
|
|
463
|
+
self.start_chained_tasks(finished_task_status='SUCCESS')
|
|
464
|
+
|
|
465
|
+
# Clean up execution log
|
|
466
|
+
# set this date filter above the actual delete filter because of the many uncooperative quotation marks involved in the whole filter
|
|
467
|
+
log_date_limit_filter = f"AND created_at >= \'{log_date_limit.strftime('%Y-%m-%d')}\'" if log_date_limit is not None else None
|
|
468
|
+
delete_filter = f"WHERE task_id = {self.task_id} " \
|
|
469
|
+
f"AND reload_id NOT IN (SELECT reload_id FROM (SELECT reload_id FROM `task_execution_log` WHERE task_id = {self.task_id} " \
|
|
470
|
+
f"AND log_level != 'CRITICAL' " \
|
|
471
|
+
f"AND log_level != 'ERROR' " \
|
|
472
|
+
f"{log_date_limit_filter if log_date_limit_filter is not None else ''} " \
|
|
473
|
+
f"ORDER BY created_at DESC {f' LIMIT {log_limit} ' if log_limit is not None else ''}) temp)"
|
|
474
|
+
resp = self.mysql.delete(table="task_execution_log", filter=delete_filter)
|
|
475
|
+
print(resp)
|
|
476
|
+
print(f'{datetime.datetime.now()} - Task finished')
|
|
477
|
+
|
|
478
|
+
def start_chained_tasks(self, finished_task_status: str):
|
|
479
|
+
if self.mysql_enabled:
|
|
480
|
+
filter = f'WHERE start_after_task_id = \'{self.task_id}\' AND start_after_preceding_task = \'{finished_task_status}\''
|
|
481
|
+
response = self.mysql.select(table='task_scheduler', selection='id', filter=filter)
|
|
482
|
+
if len(response) > 0:
|
|
483
|
+
tasks_to_run = [str(task[0]) for task in response]
|
|
484
|
+
self.mysql.update(table='task_scheduler', columns=['run_instant'], values=['1'], filter=f'WHERE id IN({",".join(tasks_to_run)})')
|
|
485
|
+
else:
|
|
486
|
+
print("Unable to start chained tasks, MySQL is disabled")
|
|
487
|
+
|
|
488
|
+
def email_errors(self, failed):
|
|
489
|
+
# The mails to email to should be stored in the task_variables table with the variable email_errors_to
|
|
490
|
+
email_variable = self.db_variable('email_errors_to')
|
|
491
|
+
if email_variable is not None:
|
|
492
|
+
email_to = email_variable.split(',')
|
|
493
|
+
if isinstance(email_to, list):
|
|
494
|
+
# The email_errors_to variable is a simple string. Convert it to a list and add a name because mandrill is asking for it
|
|
495
|
+
email_list = []
|
|
496
|
+
for i in email_to:
|
|
497
|
+
email_list.append({'name': 'BrynQ User', 'mail': i.strip()})
|
|
498
|
+
|
|
499
|
+
# Recieve the task name and the finished_at time from the task_scheduler table joined with the data_interfaces table
|
|
500
|
+
response = self.mysql.select(
|
|
501
|
+
table='task_scheduler LEFT JOIN data_interfaces ON task_scheduler.data_interface_id = data_interfaces.id ',
|
|
502
|
+
selection="title, last_reload",
|
|
503
|
+
filter=f'WHERE task_scheduler.id = {self.task_id}'
|
|
504
|
+
)
|
|
505
|
+
task = response[0][0]
|
|
506
|
+
finished_at = response[0][1]
|
|
507
|
+
|
|
508
|
+
# Set the content of the mail and all other stuff
|
|
509
|
+
if failed:
|
|
510
|
+
subject = f'Task \'{task}\' has failed'
|
|
511
|
+
content = f'Task \'{task}\' with task ID \'{self.task_id}\' failed during its last run and was stopped at {finished_at}. ' \
|
|
512
|
+
f'The task is failed. ' \
|
|
513
|
+
f'to visit the BrynQ scheduler, click here: <a href="https://app.brynq.com/interfaces/">here</a>. Here you can find the logs and find more information on why this task had failed.'
|
|
514
|
+
else:
|
|
515
|
+
subject = f'Task \'{task}\' is finished with errors'
|
|
516
|
+
content = f'Task \'{task}\' with ID \'{self.task_id}\' has runned and is finished at {finished_at}. ' \
|
|
517
|
+
f'The task is finished with {self.error_count} errors. ' \
|
|
518
|
+
f'to visit the BrynQ scheduler, click here: <a href="https://app.brynq.com/interfaces/">here</a>. Here you can find the logs and find more information on why this task had some errors.'
|
|
519
|
+
MailClient().send_mail(email_to=email_list, subject=subject, content=content, language='EN')
|
|
520
|
+
|
|
521
|
+
def _save_log_locally(self, payload, system):
|
|
522
|
+
# system is 'mysql' or 'elastic'
|
|
523
|
+
log_file_path = os.path.join(self.local_log_dir, f'{system}_log_{self.run_id}.json')
|
|
524
|
+
try:
|
|
525
|
+
if os.path.exists(log_file_path):
|
|
526
|
+
with open(log_file_path, 'r') as f:
|
|
527
|
+
logs = json.load(f)
|
|
528
|
+
else:
|
|
529
|
+
logs = []
|
|
530
|
+
logs.append(payload)
|
|
531
|
+
with open(log_file_path, 'w') as f:
|
|
532
|
+
json.dump(logs, f)
|
|
533
|
+
except Exception as e:
|
|
534
|
+
print(f"Error saving log locally: {e}")
|
|
535
|
+
|
|
536
|
+
def _process_local_mysql_logs(self):
|
|
537
|
+
mysql_log_files = [f for f in os.listdir(self.local_log_dir) if f.startswith('mysql_log_')]
|
|
538
|
+
for log_file in mysql_log_files:
|
|
539
|
+
log_file_path = os.path.join(self.local_log_dir, log_file)
|
|
540
|
+
try:
|
|
541
|
+
with open(log_file_path, 'r') as f:
|
|
542
|
+
logs = json.load(f)
|
|
543
|
+
# Process logs
|
|
544
|
+
for log_entry in logs:
|
|
545
|
+
self._write_log_to_mysql(log_entry)
|
|
546
|
+
# Remove the log file after processing
|
|
547
|
+
os.remove(log_file_path)
|
|
548
|
+
except Exception as e:
|
|
549
|
+
print(f"Error processing MySQL log file {log_file}: {e}")
|
|
550
|
+
|
|
551
|
+
def _process_local_elastic_logs(self):
|
|
552
|
+
elastic_log_files = [f for f in os.listdir(self.local_log_dir) if f.startswith('elastic_log_')]
|
|
553
|
+
for log_file in elastic_log_files:
|
|
554
|
+
log_file_path = os.path.join(self.local_log_dir, log_file)
|
|
555
|
+
try:
|
|
556
|
+
with open(log_file_path, 'r') as f:
|
|
557
|
+
logs = json.load(f)
|
|
558
|
+
# Process logs
|
|
559
|
+
for log_entry in logs:
|
|
560
|
+
self.es.post_document(index_name=self.es_index, document=log_entry)
|
|
561
|
+
# Remove the log file after processing
|
|
562
|
+
os.remove(log_file_path)
|
|
563
|
+
except Exception as e:
|
|
564
|
+
print(f"Error processing ElasticSearch log file {log_file}: {e}")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
brynq_sdk_task_scheduler
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_namespace_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name='brynq_sdk_task_scheduler',
|
|
5
|
-
version='3.0.
|
|
5
|
+
version='3.0.7',
|
|
6
6
|
description='Code to execute tasks in BrynQ.com with the task scheduler',
|
|
7
7
|
long_description='Code to execute tasks in the BrynQ.com platform with the task scheduler',
|
|
8
8
|
author='BrynQ',
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|