cornflow 1.1.5a1__py3-none-any.whl → 2.0.0a7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,830 @@
1
+ """
2
+ External endpoints to manage the executions: create new ones, list all of them, get one in particular
3
+ or check the status of an ongoing one
4
+ These endpoints hve different access url, but manage the same data entities
5
+ """
6
+
7
+ # Import from libraries
8
+ import datetime
9
+ import logging
10
+ import time
11
+ from databricks.sdk import WorkspaceClient
12
+ import databricks.sdk.service.jobs as j
13
+ from cornflow.constants import *
14
+ from cornflow.shared.const import (
15
+ AIRFLOW_BACKEND,
16
+ DATABRICKS_BACKEND,
17
+ STATUS_HEALTHY,
18
+ STATUS_UNHEALTHY,
19
+ )
20
+ # TODO AGA: Modificar import para sacarlo de cornflow_client
21
+ from cornflow.shared.databricks import Databricks
22
+ from cornflow_client.constants import INSTANCE_SCHEMA, CONFIG_SCHEMA, SOLUTION_SCHEMA
23
+ from cornflow_client.airflow.api import Airflow
24
+ # TODO AGA: Porqué el import no funcina correctamente
25
+ from flask import request, current_app
26
+ from flask_apispec import marshal_with, use_kwargs, doc
27
+
28
+ # Import from internal modules
29
+ from cornflow.endpoints.meta_resource import BaseMetaResource
30
+ from cornflow.models import InstanceModel, DeployedOrch, ExecutionModel
31
+ from cornflow.schemas.execution import (
32
+ ExecutionDetailsEndpointResponse,
33
+ ExecutionDetailsEndpointWithIndicatorsResponse,
34
+ ExecutionDataEndpointResponse,
35
+ ExecutionLogEndpointResponse,
36
+ ExecutionStatusEndpointResponse,
37
+ ExecutionStatusEndpointUpdate,
38
+ ExecutionRequest,
39
+ ExecutionEditRequest,
40
+ QueryFiltersExecution,
41
+ ReLaunchExecutionRequest,
42
+ ExecutionDetailsWithIndicatorsAndLogResponse
43
+ )
44
+ from cornflow.shared.authentication import Auth, authenticate
45
+ from cornflow.shared.compress import compressed
46
+ from cornflow.shared.const import (
47
+ EXEC_STATE_RUNNING,
48
+ EXEC_STATE_ERROR,
49
+ EXEC_STATE_ERROR_START,
50
+ EXEC_STATE_NOT_RUN,
51
+ EXEC_STATE_UNKNOWN,
52
+ EXECUTION_STATE_MESSAGE_DICT,
53
+ AIRFLOW_TO_STATE_MAP,
54
+ DATABRICKS_TO_STATE_MAP,
55
+ DATABRICKS_FINISH_TO_STATE_MAP,
56
+ EXEC_STATE_STOPPED,
57
+ EXEC_STATE_QUEUED,
58
+ )
59
+ from cornflow.shared.exceptions import AirflowError, DatabricksError, ObjectDoesNotExist, InvalidData
60
+ from cornflow.shared.validators import (
61
+ json_schema_validate_as_string,
62
+ json_schema_extend_and_validate_as_string,
63
+ )
64
+ from cornflow.orchestrator_constants import config_orchestrator
65
+
66
+
67
+ class ExecutionEndpoint(BaseMetaResource):
68
+ """
69
+ Endpoint used to create a new execution or get all the executions and their information back
70
+ """
71
+
72
+ def __init__(self):
73
+ super().__init__()
74
+ self.model = ExecutionModel
75
+ self.data_model = ExecutionModel
76
+ self.foreign_data = {"instance_id": InstanceModel}
77
+
78
+ @doc(description="Get all executions", tags=["Executions"])
79
+ @authenticate(auth_class=Auth())
80
+ @marshal_with(ExecutionDetailsWithIndicatorsAndLogResponse(many=True))
81
+ @use_kwargs(QueryFiltersExecution, location="query")
82
+ def get(self, **kwargs):
83
+ """
84
+ API method to get all the executions created by the user and its related info
85
+ It requires authentication to be passed in the form of a token that has to be linked to
86
+ an existing session (login) made by a user
87
+
88
+ :return: A dictionary with a message (error if authentication failed or a list with all the executions
89
+ created by the authenticated user) and a integer with the HTTP status code
90
+ :rtype: Tuple(dict, integer)
91
+ """
92
+ executions = self.get_list(user=self.get_user(), **kwargs)
93
+ current_app.logger.info(f"User {self.get_user()} gets list of executions")
94
+
95
+ executions = [
96
+ execution
97
+ for execution in executions
98
+ if not execution.config.get("checks_only", False)
99
+ ]
100
+
101
+ running_executions = [
102
+ execution
103
+ for execution in executions
104
+ if execution.state
105
+ in [EXEC_STATE_RUNNING, EXEC_STATE_QUEUED, EXEC_STATE_UNKNOWN]
106
+ ]
107
+
108
+ for execution in running_executions:
109
+ dag_run_id = execution.dag_run_id
110
+ if not dag_run_id:
111
+ # it's safe to say we will never get anything if we did not store the dag_run_id
112
+ current_app.logger.warning(
113
+ "Error while the app tried to update the status of all running executions."
114
+ f"Execution {execution.id} has status {execution.state} but has no dag run associated."
115
+ )
116
+ continue
117
+
118
+ af_client = Airflow.from_config(current_app.config)
119
+ if not af_client.is_alive():
120
+ current_app.logger.warning(
121
+ "Error while the app tried to update the status of all running executions."
122
+ "Airflow is not accessible."
123
+ )
124
+ continue
125
+
126
+ try:
127
+ response = af_client.get_run_status(
128
+ dag_name=execution.schema, dag_run_id=dag_run_id
129
+ )
130
+ except AirflowError as err:
131
+ current_app.logger.warning(
132
+ "Error while the app tried to update the status of all running executions."
133
+ f"Airflow responded with an error: {err}"
134
+ )
135
+ continue
136
+
137
+ data = response.json()
138
+ state = AIRFLOW_TO_STATE_MAP.get(data["state"], EXEC_STATE_UNKNOWN)
139
+ execution.update_state(state)
140
+
141
+ return executions
142
+
143
+ @doc(description="Create an execution", tags=["Executions"])
144
+ @authenticate(auth_class=Auth())
145
+ @Auth.dag_permission_required
146
+ @marshal_with(ExecutionDetailsEndpointResponse)
147
+ @use_kwargs(ExecutionRequest, location="json")
148
+ def post(self, **kwargs):
149
+ """
150
+ API method to create a new execution linked to an already existing instance
151
+ It requires authentication to be passed in the form of a token that has to be linked to
152
+ an existing session (login) made by a user
153
+
154
+ :return: A dictionary with a message (error if authentication failed, error if data is not validated or
155
+ the reference_id for the newly created execution if successful) and a integer wit the HTTP status code
156
+ :rtype: Tuple(dict, integer)
157
+ """
158
+ # TODO: should validation should be done even if the execution is not going to be run?
159
+ # TODO: should the schema field be cross validated with the instance schema field?
160
+
161
+ ORQ_TYPE = current_app.config["CORNFLOW_BACKEND"]
162
+ if ORQ_TYPE==AIRFLOW_BACKEND:
163
+ orq_const= config_orchestrator["airflow"]
164
+ ORQ_ERROR=AirflowError
165
+ elif ORQ_TYPE==DATABRICKS_BACKEND:
166
+ orq_const= config_orchestrator["databricks"]
167
+ # TODO AGA: Revisar si esto funcionaría correctamente
168
+ ORQ_ERROR=DatabricksError
169
+
170
+ if "schema" not in kwargs:
171
+ kwargs["schema"] = orq_const["def_schema"]
172
+ # region INDEPENDIENTE A AIRFLOW
173
+ config = current_app.config
174
+ execution, status_code = self.post_list(data=kwargs)
175
+ instance = InstanceModel.get_one_object(
176
+ user=self.get_user(), idx=execution.instance_id
177
+ )
178
+
179
+ current_app.logger.debug(f"The request is: {request.args.get('run')}")
180
+ # this allows testing without orchestrator interaction:
181
+ if request.args.get("run", "1") == "0":
182
+ current_app.logger.info(
183
+ f"User {self.get_user_id()} creates execution {execution.id} but does not run it."
184
+ )
185
+ execution.update_state(EXEC_STATE_NOT_RUN)
186
+ return execution, 201
187
+
188
+ # We now try to launch the task in the orchestrator
189
+ # We try to create an orch client
190
+ # Note schema is a string with the name of the job/dag
191
+ schema = execution.schema
192
+ # If we are dealing with DataBricks, the schema will
193
+ # be the job id
194
+ orch_client, schema_info, execution= get_orch_client(schema,ORQ_TYPE,execution)
195
+ # endregion
196
+
197
+ # region VALIDACIONES
198
+ # We check if the job/dag exists
199
+ orch_client.get_orch_info(schema)
200
+ # Validate config before running the dag
201
+ config_schema = DeployedOrch.get_one_schema(config, schema, CONFIG_SCHEMA)
202
+ new_config, config_errors = json_schema_extend_and_validate_as_string(
203
+ config_schema, kwargs["config"]
204
+ )
205
+ if config_errors:
206
+ execution.update_state(
207
+ EXEC_STATE_ERROR_START,
208
+ message="The execution could not be run because the config does not comply with the json schema. "
209
+ "Check the log for more details",
210
+ )
211
+ execution.update_log_txt(f"{config_errors}")
212
+ raise InvalidData(
213
+ payload=dict(jsonschema_errors=config_errors),
214
+ log_txt=f"Error while user {self.get_user()} tries to create an execution. "
215
+ f"Configuration data does not match the jsonschema.",
216
+ )
217
+ elif new_config != kwargs["config"]:
218
+ execution.update_config(new_config)
219
+
220
+ # Validate instance data before running the dag
221
+ instance_schema = DeployedOrch.get_one_schema(config, schema, INSTANCE_SCHEMA)
222
+ instance_errors = json_schema_validate_as_string(instance_schema, instance.data)
223
+ if instance_errors:
224
+ execution.update_state(
225
+ EXEC_STATE_ERROR_START,
226
+ message="The execution could not be run because the instance data does not "
227
+ "comply with the json schema. Check the log for more details",
228
+ )
229
+ execution.update_log_txt(f"{instance_errors}")
230
+ raise InvalidData(
231
+ payload=dict(jsonschema_errors=instance_errors),
232
+ log_txt=f"Error while user {self.get_user()} tries to create an execution. "
233
+ f"Instance data does not match the jsonschema.",
234
+ )
235
+ # Validate solution data before running the dag (if it exists)
236
+ if kwargs.get("data") is not None:
237
+ solution_schema = DeployedOrch.get_one_schema(
238
+ config, schema, SOLUTION_SCHEMA
239
+ )
240
+ solution_errors = json_schema_validate_as_string(
241
+ solution_schema, kwargs["data"]
242
+ )
243
+ if solution_errors:
244
+ execution.update_state(
245
+ EXEC_STATE_ERROR_START,
246
+ message="The execution could not be run because the solution data does not "
247
+ "comply with the json schema. Check the log for more details",
248
+ )
249
+ execution.update_log_txt(f"{solution_errors}")
250
+ raise InvalidData(payload=dict(jsonschema_errors=solution_errors))
251
+ # endregion
252
+
253
+ if ORQ_TYPE==AIRFLOW_BACKEND:
254
+ info = schema_info.json()
255
+ if info["is_paused"]:
256
+ err = "The dag exists but it is paused in airflow"
257
+ current_app.logger.error(err)
258
+ execution.update_state(EXEC_STATE_ERROR_START)
259
+ raise ORQ_ERROR(
260
+ error=err,
261
+ payload=dict(
262
+ message=EXECUTION_STATE_MESSAGE_DICT[EXEC_STATE_ERROR_START],
263
+ state=EXEC_STATE_ERROR_START,
264
+ ),
265
+ log_txt=f"Error while user {self.get_user()} tries to create an execution. "
266
+ + err,
267
+ )
268
+ # TODO AGA: revisar si hay que hacer alguna verificación a los JOBS
269
+
270
+ try:
271
+ # TODO AGA: Hay que genestionar la posible eliminación de execution.id como
272
+ # parámetro, ya que no se puede seleccionar el id en databricks
273
+ # revisar las consecuencias que puede tener
274
+ response = orch_client.run_workflow(execution.id, orch_name=schema)
275
+ except ORQ_ERROR as err:
276
+ error = orq_const["name"] + " responded with an error: {}".format(err)
277
+ current_app.logger.error(error)
278
+ execution.update_state(EXEC_STATE_ERROR)
279
+ raise ORQ_ERROR(
280
+ error=error,
281
+ payload=dict(
282
+ message=EXECUTION_STATE_MESSAGE_DICT[EXEC_STATE_ERROR],
283
+ state=EXEC_STATE_ERROR,
284
+ ),
285
+ log_txt=f"Error while user {self.get_user()} tries to create an execution. "
286
+ + error,
287
+ )
288
+
289
+ # if we succeed, we register the dag_run_id in the execution table:
290
+ orq_data = response.json()
291
+ # TODO AGA: revisar el nombre del modelo de ejecuciones
292
+ execution.dag_run_id = orq_data[orq_const["run_id"]]
293
+ execution.update_state(EXEC_STATE_QUEUED)
294
+ current_app.logger.info(
295
+ "User {} creates execution {}".format(self.get_user_id(), execution.id)
296
+ )
297
+ return execution, 201
298
+
299
+
300
+ class ExecutionRelaunchEndpoint(BaseMetaResource):
301
+ def __init__(self):
302
+ super().__init__()
303
+ self.model = ExecutionModel
304
+ self.data_model = ExecutionModel
305
+ self.foreign_data = {"instance_id": InstanceModel}
306
+
307
+ @doc(description="Re-launch an execution", tags=["Executions"])
308
+ @authenticate(auth_class=Auth())
309
+ @Auth.dag_permission_required
310
+ @use_kwargs(ReLaunchExecutionRequest, location="json")
311
+ def post(self, idx, **kwargs):
312
+ """
313
+ API method to re-launch an existing execution
314
+ It requires authentication to be passed in the form of a token that has to be linked to
315
+ an existing session (login) made by a user
316
+
317
+ :return: A dictionary with a message (error if authentication failed, error if data is not validated or
318
+ the reference_id for the newly created execution if successful) and a integer wit the HTTP status code
319
+ :rtype: Tuple(dict, integer)
320
+ """
321
+ ORQ_TYPE = current_app.config["CORNFLOW_BACKEND"]
322
+ if ORQ_TYPE==AIRFLOW_BACKEND:
323
+ orq_const= config_orchestrator["airflow"]
324
+ ORQ_ERROR=AirflowError
325
+ elif ORQ_TYPE==DATABRICKS_BACKEND:
326
+ orq_const= config_orchestrator["databricks"]
327
+ # TODO AGA: Revisar si esto funcionaría correctamente
328
+ ORQ_ERROR=DatabricksError
329
+
330
+ config = current_app.config
331
+ if "schema" not in kwargs:
332
+ kwargs["schema"] = orq_const["def_schema"]
333
+
334
+ self.put_detail(
335
+ data=dict(config=kwargs["config"]), user=self.get_user(), idx=idx
336
+ )
337
+
338
+ execution = ExecutionModel.get_one_object(user=self.get_user(), idx=idx)
339
+
340
+ # If the execution does not exist, raise an error
341
+ if execution is None:
342
+ err = "The execution to re-solve does not exist"
343
+ raise ObjectDoesNotExist(
344
+ err,
345
+ log_txt=f"Error while user {self.get_user()} tries to relaunch execution {idx}. "
346
+ + err,
347
+ )
348
+
349
+ execution.update({"checks": None})
350
+
351
+ # If the execution is still running or queued, raise an error
352
+ if execution.state == 0 or execution.state == -7:
353
+ return {"message": "This execution is still running"}, 400
354
+
355
+ # this allows testing without airflow interaction:
356
+ if request.args.get("run", "1") == "0":
357
+ execution.update_state(EXEC_STATE_NOT_RUN)
358
+ return {
359
+ "message": "The execution was set for relaunch but was not launched"
360
+ }, 201
361
+
362
+ # Validate config before running the dag
363
+ config_schema = DeployedOrch.get_one_schema(
364
+ config, kwargs["schema"], CONFIG_SCHEMA
365
+ )
366
+ config_errors = json_schema_validate_as_string(config_schema, kwargs["config"])
367
+ if config_errors:
368
+ raise InvalidData(
369
+ payload=dict(jsonschema_errors=config_errors),
370
+ log_txt=f"Error while user {self.get_user()} tries to relaunch execution {idx}. "
371
+ f"Configuration data does not match the jsonschema.",
372
+ )
373
+ orch_client, schema_info, execution = get_orch_client(schema,ORQ_TYPE,execution)
374
+
375
+ if not orch_client.is_alive():
376
+ err = orq_const["name"]+" is not accessible"
377
+ current_app.logger.error(err)
378
+ execution.update_state(EXEC_STATE_ERROR_START)
379
+ raise ORQ_ERROR(
380
+ error=err,
381
+ payload=dict(
382
+ message=EXECUTION_STATE_MESSAGE_DICT[EXEC_STATE_ERROR_START],
383
+ state=EXEC_STATE_ERROR_START,
384
+ ),
385
+ log_txt=f"Error while user {self.get_user()} tries to relaunch execution {idx}. "
386
+ + err,
387
+ )
388
+
389
+ # ask airflow if dag_name exists
390
+ schema = execution.schema
391
+ schema_info = orch_client.get_orch_info(schema)
392
+
393
+ info = schema_info.json()
394
+ if ORQ_TYPE==AIRFLOW_BACKEND:
395
+ if info["is_paused"]:
396
+ err = "The dag exists but it is paused in airflow"
397
+ current_app.logger.error(err)
398
+ execution.update_state(EXEC_STATE_ERROR_START)
399
+ raise ORQ_ERROR(
400
+ error=err,
401
+ payload=dict(
402
+ message=EXECUTION_STATE_MESSAGE_DICT[EXEC_STATE_ERROR_START],
403
+ state=EXEC_STATE_ERROR_START,
404
+ ),
405
+ log_txt=f"Error while user {self.get_user()} tries to relaunch execution {idx}. "
406
+ + err,
407
+ )
408
+ # TODO AGA: revisar si hay que hacer alguna comprobación del estilo a databricks
409
+ try:
410
+ response = orch_client.run_workflow(execution.id, orch_name=schema)
411
+ except ORQ_ERROR as err:
412
+ error = orq_const["name"]+" responded with an error: {}".format(err)
413
+ current_app.logger.error(error)
414
+ execution.update_state(EXEC_STATE_ERROR)
415
+ raise ORQ_ERROR(
416
+ error=error,
417
+ payload=dict(
418
+ message=EXECUTION_STATE_MESSAGE_DICT[EXEC_STATE_ERROR],
419
+ state=EXEC_STATE_ERROR,
420
+ ),
421
+ log_txt=f"Error while user {self.get_user()} tries to relaunch execution {idx}. "
422
+ + error,
423
+ )
424
+
425
+ # if we succeed, we register the dag_run_id in the execution table:
426
+ orq_data = response.json()
427
+ # TODO AGA: revisar el nombre del modelo de ejecuciones
428
+ execution.dag_run_id = orq_data[orq_const["run_id"]]
429
+ execution.update_state(EXEC_STATE_QUEUED)
430
+ current_app.logger.info(
431
+ "User {} relaunches execution {}".format(self.get_user_id(), execution.id)
432
+ )
433
+ return {"message": "The execution was relaunched correctly"}, 201
434
+
435
+
436
+ class ExecutionDetailsEndpointBase(BaseMetaResource):
437
+ """
438
+ Endpoint used to get the information of a certain execution. But not the data!
439
+ """
440
+ # TODO AGA DUDA: Se usa? Qué debería devolver?
441
+ def __init__(self):
442
+ super().__init__()
443
+ self.data_model = ExecutionModel
444
+ self.foreign_data = {"instance_id": InstanceModel}
445
+
446
+
447
+ class ExecutionDetailsEndpoint(ExecutionDetailsEndpointBase):
448
+ @doc(description="Get details of an execution", tags=["Executions"], inherit=False)
449
+ @authenticate(auth_class=Auth())
450
+ @marshal_with(ExecutionDetailsEndpointWithIndicatorsResponse)
451
+ @BaseMetaResource.get_data_or_404
452
+ def get(self, idx):
453
+ """
454
+ API method to get an execution created by the user and its related info.
455
+ It requires authentication to be passed in the form of a token that has to be linked to
456
+ an existing session (login) made by a user.
457
+
458
+ :param str idx: ID of the execution.
459
+ :return: A dictionary with a message (error if authentication failed, or the execution does not exist or
460
+ the data of the execution) and an integer with the HTTP status code.
461
+ :rtype: Tuple(dict, integer)
462
+ """
463
+ current_app.logger.info(
464
+ f"User {self.get_user()} gets details of execution {idx}"
465
+ )
466
+ return self.get_detail(user=self.get_user(), idx=idx)
467
+
468
+ @doc(description="Edit an execution", tags=["Executions"], inherit=False)
469
+ @authenticate(auth_class=Auth())
470
+ @use_kwargs(ExecutionEditRequest, location="json")
471
+ def put(self, idx, **data):
472
+ """
473
+ Edit an existing execution
474
+
475
+ :param string idx: ID of the execution.
476
+ :return: A dictionary with a message (error if authentication failed, or the execution does not exist or
477
+ a message) and an integer with the HTTP status code.
478
+ :rtype: Tuple(dict, integer)
479
+ """
480
+ config = current_app.config
481
+
482
+ schema = ExecutionModel.get_one_object(user=self.get_user(), idx=idx).schema
483
+
484
+ if data.get("data") is not None and schema is not None:
485
+ data_jsonschema = DeployedOrch.get_one_schema(
486
+ config, schema, SOLUTION_SCHEMA
487
+ )
488
+ validation_errors = json_schema_validate_as_string(
489
+ data_jsonschema, data["data"]
490
+ )
491
+
492
+ if validation_errors:
493
+ raise InvalidData(
494
+ payload=dict(jsonschema_errors=validation_errors),
495
+ log_txt=f"Error while user {self.get_user()} tries to edit execution {idx}. "
496
+ f"Solution data does not match the jsonschema.",
497
+ )
498
+
499
+ current_app.logger.info(f"User {self.get_user()} edits execution {idx}")
500
+ return self.put_detail(data, user=self.get_user(), idx=idx)
501
+
502
+ @doc(description="Delete an execution", tags=["Executions"], inherit=False)
503
+ @authenticate(auth_class=Auth())
504
+ def delete(self, idx):
505
+ """
506
+ API method to delete an execution created by the user and its related info.
507
+ It requires authentication to be passed in the form of a token that has to be linked to
508
+ an existing session (login) made by a user.
509
+
510
+ :param string idx: ID of the execution.
511
+ :return: A dictionary with a message (error if authentication failed, or the execution does not exist or
512
+ a message) and an integer with the HTTP status code.
513
+ :rtype: Tuple(dict, integer)
514
+ """
515
+ current_app.logger.info(f"User {self.get_user()} deleted execution {idx}")
516
+ return self.delete_detail(user=self.get_user(), idx=idx)
517
+
518
+ @doc(description="Stop an execution", tags=["Executions"], inherit=False)
519
+ @authenticate(auth_class=Auth())
520
+ @Auth.dag_permission_required
521
+ def post(self, idx):
522
+ execution = ExecutionModel.get_one_object(user=self.get_user(), idx=idx)
523
+ if execution is None:
524
+ raise ObjectDoesNotExist(
525
+ log_txt=f"Error while user {self.get_user()} tries to stop execution {idx}. "
526
+ f"The execution does not exist."
527
+ )
528
+ af_client = Airflow.from_config(current_app.config)
529
+ if not af_client.is_alive():
530
+ err = "Airflow is not accessible"
531
+ raise AirflowError(
532
+ error=err,
533
+ log_txt=f"Error while user {self.get_user()} tries to stop execution {idx}. "
534
+ + err,
535
+ )
536
+ response = af_client.set_dag_run_to_fail(
537
+ dag_name=execution.schema, dag_run_id=execution.dag_run_id
538
+ )
539
+ execution.update_state(EXEC_STATE_STOPPED)
540
+ current_app.logger.info(f"User {self.get_user()} stopped execution {idx}")
541
+ return {"message": "The execution has been stopped"}, 200
542
+
543
+
544
+ class ExecutionStatusEndpoint(BaseMetaResource):
545
+ """
546
+ Endpoint used to get the status of a certain execution that is running in the airflow webserver
547
+ """
548
+
549
+ def __init__(self):
550
+ super().__init__()
551
+ self.data_model = ExecutionModel
552
+
553
+ @doc(description="Get status of an execution", tags=["Executions"])
554
+ @authenticate(auth_class=Auth())
555
+ @marshal_with(ExecutionStatusEndpointResponse)
556
+ def get(self, idx):
557
+ """
558
+ API method to get the status of the execution created by the user
559
+ It requires authentication to be passed in the form of a token that has to be linked to
560
+ an existing session (login) made by a user.
561
+
562
+ :param str idx: ID of the execution
563
+ :return: A dictionary with a message (error if the execution does not exist or status of the execution)
564
+ and an integer with the HTTP status code.
565
+ :rtype: Tuple(dict, integer)
566
+ """
567
+ ORQ_TYPE = current_app.config["CORNFLOW_BACKEND"]
568
+ if ORQ_TYPE==AIRFLOW_BACKEND:
569
+ orq_const= config_orchestrator["airflow"]
570
+ ORQ_ERROR=AirflowError
571
+ elif ORQ_TYPE==DATABRICKS_BACKEND:
572
+ orq_const= config_orchestrator["databricks"]
573
+ # TODO AGA: Revisar si esto funcionaría correctamente
574
+ ORQ_ERROR=DatabricksError
575
+ execution = self.data_model.get_one_object(user=self.get_user(), idx=idx)
576
+ if execution is None:
577
+ raise ObjectDoesNotExist(
578
+ log_txt=f"Error while user {self.get_user()} tries to get the status of execution {idx}. "
579
+ f"The execution does not exist."
580
+ )
581
+ if execution.state not in [
582
+ EXEC_STATE_RUNNING,
583
+ EXEC_STATE_QUEUED,
584
+ EXEC_STATE_UNKNOWN,
585
+ ]:
586
+ # we only care on asking orchestrator if the status is unknown, queued or running.
587
+ return execution, 200
588
+
589
+ def _raise_af_error(execution, error, state=EXEC_STATE_UNKNOWN, log_txt=None):
590
+ if log_txt is None:
591
+ log_txt = error
592
+ message = EXECUTION_STATE_MESSAGE_DICT[state]
593
+ execution.update_state(state)
594
+ raise ORQ_ERROR(
595
+ error=error, payload=dict(message=message, state=state), log_txt=log_txt
596
+ )
597
+ print("The execution is ", execution)
598
+ print("The execution user is ", self.get_user())
599
+ print("The execution id is ", idx)
600
+ print("The parameter is ", execution.dag_run_id)
601
+
602
+ dag_run_id = execution.dag_run_id
603
+ if not dag_run_id:
604
+ # it's safe to say we will never get anything if we did not store the dag_run_id
605
+ _raise_af_error(
606
+ execution,
607
+ state=EXEC_STATE_ERROR,
608
+ error="The execution has no dag_run associated",
609
+ log_txt=f"Error while user {self.get_user()} tries to get the status of execution {idx}. "
610
+ f"The execution has no associated dag run id.",
611
+ )
612
+ schema = execution.schema
613
+ # TODO AGA: Revisar si merece la pena hacer una funcion que solo
614
+ orch_client, schema_info, execution= get_orch_client(schema ,ORQ_TYPE,execution)
615
+
616
+ if not orch_client.is_alive():
617
+ err = orq_const["name"] +" is not accessible"
618
+ _raise_af_error(
619
+ execution,
620
+ err,
621
+ log_txt=f"Error while user {self.get_user()} tries to get the status of execution {idx}. "
622
+ + err,
623
+ )
624
+
625
+ try:
626
+ # TODO: get the dag_name from somewhere!
627
+ state = orch_client.get_run_status(
628
+ dag_run_id
629
+ )
630
+ except ORQ_ERROR as err:
631
+ error = orq_const["name"] +f" responded with an error: {err}"
632
+ _raise_af_error(
633
+ execution,
634
+ error,
635
+ log_txt=f"Error while user {self.get_user()} tries to get the status of execution {idx}. "
636
+ + str(err),
637
+ )
638
+ print("The state before mapping is ", state)
639
+ state = map_run_state(state, ORQ_TYPE)
640
+ print("The state prev to updating is ", state)
641
+ execution.update_state(state)
642
+ current_app.logger.info(
643
+ f"User {self.get_user()} gets status of execution {idx}"
644
+ )
645
+ return execution, 200
646
+
647
+ @doc(description="Change status of an execution", tags=["Executions"])
648
+ @authenticate(auth_class=Auth())
649
+ @use_kwargs(ExecutionStatusEndpointUpdate)
650
+ def put(self, idx, **data):
651
+ """
652
+ Edit an existing execution
653
+
654
+ :param string idx: ID of the execution.
655
+ :return: A dictionary with a message (error if authentication failed, or the execution does not exist or
656
+ a message) and an integer with the HTTP status code.
657
+ :rtype: Tuple(dict, integer)
658
+ """
659
+
660
+ execution = self.data_model.get_one_object(user=self.get_user(), idx=idx)
661
+ if execution is None:
662
+ raise ObjectDoesNotExist()
663
+ if execution.state not in [
664
+ EXEC_STATE_RUNNING,
665
+ EXEC_STATE_UNKNOWN,
666
+ EXEC_STATE_QUEUED,
667
+ ]:
668
+ # we only care on asking airflow if the status is unknown or is running.
669
+ return {"message": f"execution {idx} updated correctly"}, 200
670
+ state = data.get("status")
671
+ if state is not None:
672
+ execution.update_state(state)
673
+ current_app.logger.info(f"User {self.get_user()} edits execution {idx}")
674
+ return {"message": f"execution {idx} updated correctly"}, 200
675
+ else:
676
+ return {"error": "status code was missing"}, 400
677
+
678
+
679
+ class ExecutionDataEndpoint(ExecutionDetailsEndpointBase):
680
+ """
681
+ Endpoint used to get the solution of a certain execution.
682
+ """
683
+
684
+ @doc(
685
+ description="Get solution data of an execution",
686
+ tags=["Executions"],
687
+ inherit=False,
688
+ )
689
+ @authenticate(auth_class=Auth())
690
+ @marshal_with(ExecutionDataEndpointResponse)
691
+ @BaseMetaResource.get_data_or_404
692
+ @compressed
693
+ def get(self, idx):
694
+ """
695
+
696
+ :param str idx: ID of the execution.
697
+ :return: A dictionary with a message (error if authentication failed, or the execution does not exist or
698
+ the data of the execution) and an integer with the HTTP status code.
699
+ :rtype: Tuple(dict, integer)
700
+ """
701
+ current_app.logger.info(f"User {self.get_user()} gets data of execution {idx}")
702
+ return self.get_detail(user=self.get_user(), idx=idx)
703
+
704
+
705
+ class ExecutionLogEndpoint(ExecutionDetailsEndpointBase):
706
+ """
707
+ Endpoint used to get the log of a certain execution.
708
+ """
709
+
710
+ @doc(description="Get log of an execution", tags=["Executions"], inherit=False)
711
+ @authenticate(auth_class=Auth())
712
+ @marshal_with(ExecutionLogEndpointResponse)
713
+ @BaseMetaResource.get_data_or_404
714
+ @compressed
715
+ def get(self, idx):
716
+ """
717
+
718
+ :param str idx: ID of the execution.
719
+ :return: A dictionary with a message (error if authentication failed, or the execution does not exist or
720
+ the data of the execution) and an integer with the HTTP status code.
721
+ :rtype: Tuple(dict, integer)
722
+ """
723
+ current_app.logger.info(f"User {self.get_user()} gets log of execution {idx}")
724
+ return self.get_detail(user=self.get_user(), idx=idx)
725
+
726
+ # region aux_functions
727
+ def submit_one_job(cid):
728
+ # trigger one-time-run job and get waiter object
729
+ waiter = w.jobs.submit(run_name=f'cornflow-job-{time.time()}', tasks=[
730
+ j.SubmitTask(
731
+ task_key='nippon_production_scheduling',
732
+ existing_cluster_id=cid,
733
+ libraries=[],
734
+ spark_python_task=j.SparkPythonTask(
735
+ python_file='/Workspace/Repos/nippon/nippon_production_scheduling/main.py',
736
+ ),
737
+ timeout_seconds=0,
738
+ )
739
+ ])
740
+ logging.info(f'starting to poll: {waiter.run_id}')
741
+ # callback, that receives a polled entity between state updates
742
+ # If you want to perform polling in a separate thread, process, or service,
743
+ # you can use w.jobs.wait_get_run_job_terminated_or_skipped(
744
+ # run_id=waiter.run_id,
745
+ # timeout=datetime.timedelta(minutes=15),
746
+ # callback=print_status) to achieve the same results.
747
+ #
748
+ # Waiter interface allows for `w.jobs.submit(..).result()` simplicity in
749
+ # the scenarios, where you need to block the calling thread for the job to finish.
750
+ run = waiter.result(timeout=datetime.timedelta(minutes=15),
751
+ callback=print_status)
752
+ logging.info(f'job finished: {run.run_page_url}')
753
+ return waiter.run_id
754
+
755
+ def print_status(run: j.Run):
756
+ statuses = [f'{t.task_key}: {t.state.life_cycle_state}' for t in run.tasks]
757
+ logging.info(f'workflow intermediate status: {", ".join(statuses)}')
758
+
759
+ def get_orch_client(schema, orq_type, execution):
760
+ """
761
+ Get the orchestrator client and the schema info
762
+ """
763
+ if orq_type == AIRFLOW_BACKEND:
764
+ return get_airflow(schema, execution=execution)
765
+ elif orq_type == DATABRICKS_BACKEND:
766
+ return get_databricks(schema,execution=execution)
767
+ else:
768
+ raise EndpointNotImplemented()
769
+
770
+
771
+ def get_airflow(schema, execution):
772
+ """
773
+ Get the Airflow client and the schema info
774
+ """
775
+ af_client = Airflow.from_config(current_app.config)
776
+ schema_info = af_client.get_orch_info(schema)
777
+ if not af_client.is_alive():
778
+ err = "Airflow is not accessible"
779
+ current_app.logger.error(err)
780
+ execution.update_state(EXEC_STATE_ERROR_START)
781
+ raise AirflowError(
782
+ error=err,
783
+ payload=dict(
784
+ message=EXECUTION_STATE_MESSAGE_DICT[EXEC_STATE_ERROR_START],
785
+ state=EXEC_STATE_ERROR_START,
786
+ ),
787
+ log_txt=f"Error while user {self.get_user()} tries to create an execution "
788
+ + err,
789
+ )
790
+ # TODO AGA: revisar si tiene sentido que se devuelva execution o si
791
+ # es un puntero
792
+ return af_client,schema_info, execution
793
+
794
+
795
+ def get_databricks(schema, execution):
796
+ """
797
+ Get the Databricks client and the schema info
798
+ """
799
+ db_client = Databricks.from_config(current_app.config)
800
+ schema_info = db_client.get_orch_info(schema)
801
+ if not db_client.is_alive():
802
+ err = "Databricks is not accessible"
803
+ current_app.logger.error(err)
804
+ execution.update_state(EXEC_STATE_ERROR_START)
805
+ raise DatabricksError(
806
+ error=err,
807
+ payload=dict(
808
+ message=EXECUTION_STATE_MESSAGE_DICT[EXEC_STATE_ERROR_START],
809
+ state=EXEC_STATE_ERROR_START,
810
+ ),
811
+ log_txt=f"Error while user {self.get_user()} tries to create an execution "
812
+ + err,
813
+ )
814
+ return db_client, schema_info, execution
815
+ # endregion
816
+
817
+ def map_run_state(state,ORQ_TYPE):
818
+ """
819
+ Maps the state of the execution in the orchestrator to the state of the execution in cornflow
820
+ """
821
+ if ORQ_TYPE==AIRFLOW_BACKEND:
822
+ return AIRFLOW_TO_STATE_MAP.get(state, EXEC_STATE_UNKNOWN)
823
+ elif ORQ_TYPE==DATABRICKS_BACKEND:
824
+ print("The state is ", state)
825
+ preliminar_state = DATABRICKS_TO_STATE_MAP.get(state,EXEC_STATE_UNKNOWN)
826
+ # print("The preliminar state is ", preliminar_state)
827
+ # if preliminar_state =="TERMINATED":
828
+ # # TODO AGA DUDA: Revisar si es correcto el error predeterminado
829
+ # return DATABRICKS_FINISH_TO_STATE_MAP.get(state,EXEC_STATE_ERROR)
830
+ return preliminar_state