brynq-sdk-task-scheduler 1.1.4__tar.gz → 1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 1.0
2
2
  Name: brynq_sdk_task_scheduler
3
- Version: 1.1.4
3
+ Version: 1.2.1
4
4
  Summary: Code to execute tasks in BrynQ.com with the task scheduler
5
5
  Home-page: UNKNOWN
6
6
  Author: BrynQ
@@ -29,36 +29,43 @@ class TaskScheduler(BrynQ):
29
29
  with level DEBUG not is stored
30
30
  """
31
31
  super().__init__()
32
- self.es = Elastic()
33
32
  self.mysql = MySQL()
34
- self.email_after_errors = email_after_errors
35
- self.task_id = task_id
36
- self.loglevel = loglevel
37
- self.started_at = datetime.datetime.now()
38
- # If the task is started via the task_scheduler, the following 3 parameters will be passed by the scheduler
39
- if len(sys.argv[1:4]) > 0:
40
- self.started_local = False
41
- self.customer_db, self.task_id, self.run_id = sys.argv[1:4]
42
- # If the task is started locally, the parameters should be set locally
43
- else:
44
- self.started_local = True
45
- self.customer_db = 'placeholder'
46
- self.run_id = int(round(time.time() * 100000))
47
- print(self.task_id, self.run_id)
48
- self.error_count = 0
33
+ try:
34
+ self.es = Elastic()
35
+ self.email_after_errors = email_after_errors
36
+ self.customer_db = self.mysql.database
37
+ self.customer_id = self.mysql.raw_query(f'SELECT id FROM sc.customers WHERE dbname = \'{self.customer_db}\'')[0][0]
38
+ self.partner_id = os.getenv('PARTNER_ID').lower().replace(' ', '_') if os.getenv('PARTNER_ID') else 'brynq'
39
+ self.task_id = task_id
40
+ self.loglevel = loglevel
41
+ self.started_at = datetime.datetime.now()
42
+ # If the task is started via the task_scheduler, the following 3 parameters will be passed by the scheduler
43
+ if len(sys.argv[1:4]) > 0:
44
+ self.started_local = False
45
+ self.customer_db, self.task_id, self.run_id = sys.argv[1:4]
46
+ # If the task is started locally, the parameters should be set locally
47
+ else:
48
+ self.started_local = True
49
+ self.run_id = int(round(time.time() * 100000))
50
+ print(self.task_id, self.run_id)
51
+ self.error_count = 0
49
52
 
50
- # Check if the log tables exists in the customer database. If not, create them
51
- # Mysql throws a warning when a table already exists. We don't care so we ignore warnings. (not exceptions!)
52
- warnings.filterwarnings('ignore')
53
+ # Check if the log tables exists in the customer database. If not, create them
54
+ # Mysql throws a warning when a table already exists. We don't care so we ignore warnings. (not exceptions!)
55
+ warnings.filterwarnings('ignore')
53
56
 
54
- # Check if the task is started on schedule or manual. store in a variable to use later in the script
55
- self.task_manual_started = self.check_if_task_manual_started()
57
+ # Check if the task is started on schedule or manual. store in a variable to use later in the script
58
+ self.task_manual_started = self.check_if_task_manual_started()
56
59
 
57
- # Creates Elasticsearch index and data view if not exists
58
- self.es.initialize_customer()
60
+ # Creates Elasticsearch index and data view if not exists
61
+ self.es_index = f"task_execution_log_{self.customer_db}_{self.started_at.strftime('%Y_%m')}"
62
+ self.es.create_index(index_name=self.es_index)
63
+ self.es.create_data_view(space_name='interfaces', view_name=f'task_execution_log_{self.customer_db}', name=f'Task execution log {self.customer_db}', time_field='started_at')
59
64
 
60
- # Start the task and setup the data in the database
61
- self.start_task()
65
+ # Start the task and setup the data in the database
66
+ self.start_task()
67
+ except Exception as e:
68
+ self.error_handling(e)
62
69
 
63
70
  def __count_keys(self, json_obj):
64
71
  if not isinstance(json_obj, dict):
@@ -71,6 +78,14 @@ class TaskScheduler(BrynQ):
71
78
  key_count += self.__count_keys(value) # Recursively count keys in nested dictionaries
72
79
  return key_count
73
80
 
81
+ def __get_caller_info(self):
82
+ stack = inspect.stack()
83
+ caller_frame = stack[2][0]
84
+ file_name = caller_frame.f_code.co_filename
85
+ line_number = caller_frame.f_lineno
86
+ function_name = stack[2][3]
87
+ return file_name, line_number, function_name
88
+
74
89
  def create_task_execution_steps(self, step_details: list):
75
90
  """
76
91
  Check if the given steps already exists in the task_execution_steps table. If not, update or insert the values in the table
@@ -158,7 +173,8 @@ class TaskScheduler(BrynQ):
158
173
  if loglevel not in allowed_loglevels:
159
174
  raise Exception('You\'ve entered a not allowed loglevel. Choose one of: {}'.format(allowed_loglevels))
160
175
 
161
- # Handling different data types and preparing extra payload information based on the data type
176
+ # For Elastic, we need to have the data in JSON format. Handling different data types and preparing extra payload information based on the data type
177
+ # If the data is just a series, count rows, columns and cells
162
178
  if isinstance(data, pd.Series):
163
179
  dataframe = pd.DataFrame(data).T
164
180
  extra_payload = {
@@ -168,6 +184,7 @@ class TaskScheduler(BrynQ):
168
184
  }
169
185
  if not full_extract:
170
186
  extra_payload['payload'] = dataframe.to_json(orient='records')
187
+ # If the data is a list, count rows, columns and cells
171
188
  elif isinstance(data, dict):
172
189
  records = self.__count_keys(data)
173
190
  extra_payload = {
@@ -185,6 +202,7 @@ class TaskScheduler(BrynQ):
185
202
  }
186
203
  if not full_extract:
187
204
  extra_payload['payload'] = data.to_json(orient='records')
205
+ # If the data is a response from an URL request, also store all the information about the URL request.
188
206
  elif isinstance(data, requests.Response):
189
207
  records = 1
190
208
  if data.request.body is not None:
@@ -217,41 +235,37 @@ class TaskScheduler(BrynQ):
217
235
  elif data is not None and full_extract is False:
218
236
  extra_payload['full_load'] = False
219
237
 
220
- # Preparing the primary payload with log details
238
+ # Get the linenumber from where the logline is executed.
239
+ file_name, line_number, function_name = self.__get_caller_info()
240
+
241
+ # Put everything together in the payload for ElasticSearch and send it
221
242
  payload = {
222
- 'reload_id': self.run_id,
223
243
  'task_id': self.task_id,
224
- 'customer_id': os.getenv('BRYNQ_SUBDOMAIN').lower().replace(' ', '_'),
244
+ 'reload_id': self.run_id,
225
245
  'started_at': datetime.datetime.now().isoformat(),
226
- 'loglevel': loglevel,
246
+ 'partner_id': self.partner_id,
247
+ 'customer_id': self.customer_id,
248
+ 'customer': os.getenv('BRYNQ_SUBDOMAIN').lower().replace(' ', '_'),
249
+ 'file_name': file_name,
250
+ 'function_name': function_name,
251
+ 'line_number': line_number,
252
+ 'task_loglevel': self.loglevel,
253
+ 'line_loglevel': loglevel,
227
254
  'message': message
228
255
  }
229
256
  payload.update(extra_payload)
257
+ self.es.post_document(index_name=self.es_index, document=payload)
230
258
 
231
- # Sending the payload to ElasticSearch
232
- self.es.task_execution_log(payload)
233
-
234
- # Get the linenumber from where the logline is executed. Get the stacktrace of this action, jump 1 file up and pick then the linenumber (second item)
235
- linenumber = inspect.getouterframes(inspect.currentframe())[1][2]
236
- # Write the logline to the database, depends on the chosen loglevel in the task
237
- print('{} at line: {}'.format(message, linenumber))
259
+ # Write the logline to the MYSQL database, depends on the chosen loglevel in the task
260
+ print('{} at line: {}'.format(message, line_number))
238
261
  # Remove quotes from message since these break the query
239
262
  message = re.sub("[']", '', message)
240
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
241
- information = {
242
- 'reload_id': self.run_id,
243
- 'task_id': self.task_id,
244
- 'log_level': loglevel,
245
- 'line_number': linenumber,
246
- 'message': message,
247
- 'created_at': timestamp
248
- }
249
263
  if self.loglevel == 'DEBUG':
250
264
  # Count the errors
251
265
  if loglevel == 'ERROR' or loglevel == 'CRITICAL':
252
266
  self.error_count += 1
253
267
  return self.mysql.raw_query(
254
- "INSERT INTO `task_execution_log` (reload_id, task_id, log_level, created_at, line_number, message) VALUES ({}, {}, '{}', '{}', {}, '{}')".format(self.run_id, self.task_id, loglevel, datetime.datetime.now(), linenumber, message), insert=True)
268
+ "INSERT INTO `task_execution_log` (reload_id, task_id, log_level, created_at, line_number, message) VALUES ({}, {}, '{}', '{}', {}, '{}')".format(self.run_id, self.task_id, loglevel, datetime.datetime.now(), line_number, message), insert=True)
255
269
  return self.mysql.update(table='task_execution_log',
256
270
  columns=['reload_id', 'task_id', 'log_level', 'created_at', 'line_number', 'message'],
257
271
  values=[self.run_id, self.task_id, loglevel, datetime.datetime.now(), linenumber, message])
@@ -260,15 +274,15 @@ class TaskScheduler(BrynQ):
260
274
  if loglevel == 'ERROR' or loglevel == 'CRITICAL':
261
275
  self.error_count += 1
262
276
  return self.mysql.raw_query(
263
- "INSERT INTO `task_execution_log` (reload_id, task_id, log_level, created_at, line_number, message) VALUES ({}, {}, '{}', '{}', {}, '{}')".format(self.run_id, self.task_id, loglevel, datetime.datetime.now(), linenumber, message), insert=True)
277
+ "INSERT INTO `task_execution_log` (reload_id, task_id, log_level, created_at, line_number, message) VALUES ({}, {}, '{}', '{}', {}, '{}')".format(self.run_id, self.task_id, loglevel, datetime.datetime.now(), line_number, message), insert=True)
264
278
  elif self.loglevel == 'ERROR' and (loglevel == 'ERROR' or loglevel == 'CRITICAL'):
265
279
  self.error_count += 1
266
280
  return self.mysql.raw_query(
267
- "INSERT INTO `task_execution_log` (reload_id, task_id, log_level, created_at, line_number, message) VALUES ({}, {}, '{}', '{}', {}, '{}')".format(self.run_id, self.task_id, loglevel, datetime.datetime.now(), linenumber, message), insert=True)
281
+ "INSERT INTO `task_execution_log` (reload_id, task_id, log_level, created_at, line_number, message) VALUES ({}, {}, '{}', '{}', {}, '{}')".format(self.run_id, self.task_id, loglevel, datetime.datetime.now(), line_number, message), insert=True)
268
282
  elif self.loglevel == 'CRITICAL' and loglevel == 'CRITICAL':
269
283
  self.error_count += 1
270
284
  return self.mysql.raw_query(
271
- "INSERT INTO `task_execution_log` (reload_id, task_id, log_level, created_at, line_number, message) VALUES ({}, {}, '{}', '{}', {}, '{}')".format(self.run_id, self.task_id, loglevel, datetime.datetime.now(), linenumber, message), insert=True)
285
+ "INSERT INTO `task_execution_log` (reload_id, task_id, log_level, created_at, line_number, message) VALUES ({}, {}, '{}', '{}', {}, '{}')".format(self.run_id, self.task_id, loglevel, datetime.datetime.now(), line_number, message), insert=True)
272
286
 
273
287
  def update_execution_step(self, step_number: int):
274
288
  """
@@ -290,24 +304,32 @@ class TaskScheduler(BrynQ):
290
304
  :param started_at: Give the time the task is started
291
305
  :return: nothing
292
306
  """
307
+ # Format error to a somewhat readable format
308
+ exc_type, exc_obj, exc_tb = sys.exc_info()
309
+ error = str(e)[:400].replace('\'', '').replace('\"', '') + ' | Line: {}'.format(exc_tb.tb_lineno)
310
+
311
+ # Get the linenumber from where the logline is executed.
312
+ file_name, line_number, function_name = self.__get_caller_info()
293
313
 
294
- # Preparing the primary payload with error details for upload to elastic
314
+ # Preparing the primary payload with error details for upload to elastic and send it
295
315
  payload = {
296
- 'reload_id': self.run_id,
297
316
  'task_id': self.task_id,
298
- 'customer_id': os.getenv('BRYNQ_SUBDOMAIN').lower().replace(' ', '_'),
317
+ 'reload_id': self.run_id,
299
318
  'started_at': datetime.datetime.now().isoformat(),
300
- 'loglevel': 'CRITICAL',
319
+ 'partner_id': self.partner_id,
320
+ 'customer_id': self.customer_id,
321
+ 'customer': os.getenv('BRYNQ_SUBDOMAIN').lower().replace(' ', '_'),
322
+ 'file_name': file_name,
323
+ 'function_name': function_name,
324
+ 'line_number': line_number,
325
+ 'task_loglevel': self.loglevel,
326
+ 'line_loglevel': 'CRITICAL',
301
327
  'message': str(e),
302
328
  'traceback': traceback.format_exc()
303
329
  }
330
+ self.es.post_document(index_name=self.es_index, document=payload)
304
331
 
305
- # Sending the payload to ElasticSearch
306
- self.es.task_execution_log(payload)
307
332
 
308
- # Format error to a somewhat readable format
309
- exc_type, exc_obj, exc_tb = sys.exc_info()
310
- error = str(e)[:400].replace('\'', '').replace('\"', '') + ' | Line: {}'.format(exc_tb.tb_lineno)
311
333
  # Get scheduler task details for logging
312
334
  task_details = \
313
335
  self.mysql.select('task_scheduler, data_interfaces', 'data_interfaces.docker_image, data_interfaces.runfile_path', 'WHERE task_scheduler.data_interface_id = data_interfaces.id AND task_scheduler.id = {}'.format(self.task_id))[0]
@@ -320,15 +342,6 @@ class TaskScheduler(BrynQ):
320
342
  ['IDLE', datetime.datetime.now(), 'Failed', 0],
321
343
  'WHERE `id` = {}'.format(self.task_id))
322
344
  # Log to database
323
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
324
- information = {
325
- 'reload_id': self.run_id,
326
- 'task_id': self.task_id,
327
- 'log_level': 'CRITICAL',
328
- 'line_number': exc_tb.tb_lineno,
329
- 'message': error,
330
- 'created_at': timestamp
331
- }
332
345
  self.mysql.raw_query(
333
346
  "INSERT INTO `task_execution_log` (reload_id, task_id, log_level, created_at, line_number, message) VALUES ({}, {}, 'CRITICAL', '{}', {}, '{}')".format(self.run_id,
334
347
  self.task_id,
@@ -353,15 +366,6 @@ class TaskScheduler(BrynQ):
353
366
 
354
367
  raise Exception(error)
355
368
  else:
356
- timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
357
- information = {
358
- 'reload_id': self.run_id,
359
- 'task_id': self.task_id,
360
- 'log_level': 'CRITICAL',
361
- 'line_number': exc_tb.tb_lineno,
362
- 'message': error,
363
- 'created_at': timestamp
364
- }
365
369
  self.mysql.raw_query(
366
370
  "INSERT INTO `task_execution_log` (reload_id, task_id, log_level, created_at, line_number, message) VALUES ({}, {}, 'CRITICAL', '{}', {}, '{}')".format(self.run_id,
367
371
  self.task_id,
@@ -442,11 +446,9 @@ class TaskScheduler(BrynQ):
442
446
  for i in email_to:
443
447
  email_list.append({'name': 'BrynQ User', 'mail': i.strip()})
444
448
  # Set the content of the mail and all other stuff
445
- task = self.mysql.select(table='task_scheduler', selection='title', filter=f'WHERE id = {self.task_id}')[0][
446
- 0]
449
+ task = self.mysql.select(table='data_interfaces', selection='title', filter=f'WHERE id = {self.task_id}')[0][0]
447
450
  finished_at = \
448
- self.mysql.select(table='task_scheduler', selection='last_reload', filter=f'WHERE id = {self.task_id}')[0][
449
- 0]
451
+ self.mysql.select(table='task_scheduler', selection='last_reload', filter=f'WHERE data_interface_id = {self.task_id}')[0][0]
450
452
  if failed:
451
453
  subject = f'Task \'{task}\' has failed'
452
454
  content = f'Task \'{task}\' with task ID \'{self.task_id}\' failed during its last run and was stopped at {finished_at}. ' \
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 1.0
2
2
  Name: brynq-sdk-task-scheduler
3
- Version: 1.1.4
3
+ Version: 1.2.1
4
4
  Summary: Code to execute tasks in BrynQ.com with the task scheduler
5
5
  Home-page: UNKNOWN
6
6
  Author: BrynQ
@@ -3,7 +3,7 @@ from setuptools import setup
3
3
 
4
4
  setup(
5
5
  name='brynq_sdk_task_scheduler',
6
- version='1.1.4',
6
+ version='1.2.1',
7
7
  description='Code to execute tasks in BrynQ.com with the task scheduler',
8
8
  long_description='Code to execute tasks in the BrynQ.com platform with the task scheduler',
9
9
  author='BrynQ',