cornflow 2.0.0a11__py3-none-any.whl → 2.0.0a12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. airflow_config/airflow_local_settings.py +1 -1
  2. cornflow/app.py +8 -3
  3. cornflow/cli/migrations.py +23 -3
  4. cornflow/cli/service.py +18 -18
  5. cornflow/cli/utils.py +16 -1
  6. cornflow/commands/dag.py +1 -1
  7. cornflow/config.py +13 -8
  8. cornflow/endpoints/__init__.py +8 -2
  9. cornflow/endpoints/alarms.py +66 -2
  10. cornflow/endpoints/data_check.py +53 -26
  11. cornflow/endpoints/execution.py +387 -132
  12. cornflow/endpoints/login.py +81 -63
  13. cornflow/endpoints/meta_resource.py +11 -3
  14. cornflow/migrations/versions/999b98e24225.py +34 -0
  15. cornflow/models/base_data_model.py +4 -32
  16. cornflow/models/execution.py +2 -3
  17. cornflow/models/meta_models.py +28 -22
  18. cornflow/models/user.py +7 -10
  19. cornflow/schemas/alarms.py +8 -0
  20. cornflow/schemas/execution.py +1 -1
  21. cornflow/schemas/query.py +2 -1
  22. cornflow/schemas/user.py +5 -20
  23. cornflow/shared/authentication/auth.py +201 -264
  24. cornflow/shared/const.py +3 -14
  25. cornflow/shared/databricks.py +5 -1
  26. cornflow/tests/const.py +1 -0
  27. cornflow/tests/custom_test_case.py +77 -26
  28. cornflow/tests/unit/test_actions.py +2 -2
  29. cornflow/tests/unit/test_alarms.py +55 -1
  30. cornflow/tests/unit/test_apiview.py +108 -3
  31. cornflow/tests/unit/test_cases.py +20 -29
  32. cornflow/tests/unit/test_cli.py +6 -5
  33. cornflow/tests/unit/test_commands.py +3 -3
  34. cornflow/tests/unit/test_dags.py +5 -6
  35. cornflow/tests/unit/test_executions.py +443 -123
  36. cornflow/tests/unit/test_instances.py +14 -2
  37. cornflow/tests/unit/test_instances_file.py +1 -1
  38. cornflow/tests/unit/test_licenses.py +1 -1
  39. cornflow/tests/unit/test_log_in.py +230 -207
  40. cornflow/tests/unit/test_permissions.py +8 -8
  41. cornflow/tests/unit/test_roles.py +48 -10
  42. cornflow/tests/unit/test_schemas.py +1 -1
  43. cornflow/tests/unit/test_tables.py +7 -7
  44. cornflow/tests/unit/test_token.py +19 -5
  45. cornflow/tests/unit/test_users.py +22 -6
  46. cornflow/tests/unit/tools.py +75 -10
  47. {cornflow-2.0.0a11.dist-info → cornflow-2.0.0a12.dist-info}/METADATA +16 -15
  48. {cornflow-2.0.0a11.dist-info → cornflow-2.0.0a12.dist-info}/RECORD +51 -51
  49. {cornflow-2.0.0a11.dist-info → cornflow-2.0.0a12.dist-info}/WHEEL +1 -1
  50. cornflow/endpoints/execution_databricks.py +0 -808
  51. {cornflow-2.0.0a11.dist-info → cornflow-2.0.0a12.dist-info}/entry_points.txt +0 -0
  52. {cornflow-2.0.0a11.dist-info → cornflow-2.0.0a12.dist-info}/top_level.txt +0 -0
@@ -1,808 +0,0 @@
1
- """
2
- External endpoints to manage the executions: create new ones, list all of them, get one in particular
3
- or check the status of an ongoing one
4
- These endpoints hve different access url, but manage the same data entities
5
- """
6
-
7
- # Import from libraries
8
- from cornflow_client.airflow.api import Airflow
9
- from cornflow_client.constants import INSTANCE_SCHEMA, CONFIG_SCHEMA, SOLUTION_SCHEMA
10
-
11
- # TODO AGA: Porqué el import no funcina correctamente
12
- from flask import request, current_app
13
- from flask_apispec import marshal_with, use_kwargs, doc
14
-
15
- # Import from internal modules
16
- from cornflow.endpoints.meta_resource import BaseMetaResource
17
- from cornflow.models import InstanceModel, DeployedOrch, ExecutionModel
18
- from cornflow.orchestrator_constants import config_orchestrator
19
- from cornflow.schemas.execution import (
20
- ExecutionDetailsEndpointResponse,
21
- ExecutionDetailsEndpointWithIndicatorsResponse,
22
- ExecutionDataEndpointResponse,
23
- ExecutionLogEndpointResponse,
24
- ExecutionStatusEndpointResponse,
25
- ExecutionStatusEndpointUpdate,
26
- ExecutionRequest,
27
- ExecutionEditRequest,
28
- QueryFiltersExecution,
29
- ReLaunchExecutionRequest,
30
- ExecutionDetailsWithIndicatorsAndLogResponse,
31
- )
32
- from cornflow.shared.authentication import Auth, authenticate
33
- from cornflow.shared.compress import compressed
34
- from cornflow.shared.const import (
35
- AIRFLOW_BACKEND,
36
- DATABRICKS_BACKEND,
37
- )
38
- from cornflow.shared.const import (
39
- EXEC_STATE_RUNNING,
40
- EXEC_STATE_ERROR,
41
- EXEC_STATE_ERROR_START,
42
- EXEC_STATE_NOT_RUN,
43
- EXEC_STATE_UNKNOWN,
44
- EXECUTION_STATE_MESSAGE_DICT,
45
- AIRFLOW_TO_STATE_MAP,
46
- DATABRICKS_TO_STATE_MAP,
47
- EXEC_STATE_STOPPED,
48
- EXEC_STATE_QUEUED,
49
- )
50
-
51
- # TODO AGA: Modificar import para sacarlo de cornflow_client
52
- from cornflow.shared.databricks import Databricks
53
- from cornflow.shared.exceptions import (
54
- AirflowError,
55
- DatabricksError,
56
- ObjectDoesNotExist,
57
- InvalidData,
58
- EndpointNotImplemented,
59
- )
60
- from cornflow.shared.validators import (
61
- json_schema_validate_as_string,
62
- json_schema_extend_and_validate_as_string,
63
- )
64
-
65
-
66
- class ExecutionEndpoint(BaseMetaResource):
67
- """
68
- Endpoint used to create a new execution or get all the executions and their information back
69
- """
70
-
71
- def __init__(self):
72
- super().__init__()
73
- self.model = ExecutionModel
74
- self.data_model = ExecutionModel
75
- self.foreign_data = {"instance_id": InstanceModel}
76
-
77
- @doc(description="Get all executions", tags=["Executions"])
78
- @authenticate(auth_class=Auth())
79
- @marshal_with(ExecutionDetailsWithIndicatorsAndLogResponse(many=True))
80
- @use_kwargs(QueryFiltersExecution, location="query")
81
- def get(self, **kwargs):
82
- """
83
- API method to get all the executions created by the user and its related info
84
- It requires authentication to be passed in the form of a token that has to be linked to
85
- an existing session (login) made by a user
86
-
87
- :return: A dictionary with a message (error if authentication failed or a list with all the executions
88
- created by the authenticated user) and a integer with the HTTP status code
89
- :rtype: Tuple(dict, integer)
90
- """
91
- executions = self.get_list(user=self.get_user(), **kwargs)
92
- current_app.logger.info(f"User {self.get_user()} gets list of executions")
93
-
94
- executions = [
95
- execution
96
- for execution in executions
97
- if not execution.config.get("checks_only", False)
98
- ]
99
-
100
- running_executions = [
101
- execution
102
- for execution in executions
103
- if execution.state
104
- in [EXEC_STATE_RUNNING, EXEC_STATE_QUEUED, EXEC_STATE_UNKNOWN]
105
- ]
106
-
107
- for execution in running_executions:
108
- dag_run_id = execution.dag_run_id
109
- if not dag_run_id:
110
- # it's safe to say we will never get anything if we did not store the dag_run_id
111
- current_app.logger.warning(
112
- "Error while the app tried to update the status of all running executions."
113
- f"Execution {execution.id} has status {execution.state} but has no dag run associated."
114
- )
115
- continue
116
-
117
- af_client = Airflow.from_config(current_app.config)
118
- if not af_client.is_alive():
119
- current_app.logger.warning(
120
- "Error while the app tried to update the status of all running executions."
121
- "Airflow is not accessible."
122
- )
123
- continue
124
-
125
- try:
126
- response = af_client.get_run_status(
127
- dag_name=execution.schema, dag_run_id=dag_run_id
128
- )
129
- except AirflowError as err:
130
- current_app.logger.warning(
131
- "Error while the app tried to update the status of all running executions."
132
- f"Airflow responded with an error: {err}"
133
- )
134
- continue
135
-
136
- data = response.json()
137
- state = AIRFLOW_TO_STATE_MAP.get(data["state"], EXEC_STATE_UNKNOWN)
138
- execution.update_state(state)
139
-
140
- return executions
141
-
142
- @doc(description="Create an execution", tags=["Executions"])
143
- @authenticate(auth_class=Auth())
144
- @Auth.dag_permission_required
145
- @marshal_with(ExecutionDetailsEndpointResponse)
146
- @use_kwargs(ExecutionRequest, location="json")
147
- def post(self, **kwargs):
148
- """
149
- API method to create a new execution linked to an already existing instance
150
- It requires authentication to be passed in the form of a token that has to be linked to
151
- an existing session (login) made by a user
152
-
153
- :return: A dictionary with a message (error if authentication failed, error if data is not validated or
154
- the reference_id for the newly created execution if successful) and a integer wit the HTTP status code
155
- :rtype: Tuple(dict, integer)
156
- """
157
- # TODO: should validation should be done even if the execution is not going to be run?
158
- # TODO: should the schema field be cross validated with the instance schema field?
159
-
160
- ORQ_TYPE = current_app.config["CORNFLOW_BACKEND"]
161
- if ORQ_TYPE == AIRFLOW_BACKEND:
162
- orq_const = config_orchestrator["airflow"]
163
- ORQ_ERROR = AirflowError
164
- elif ORQ_TYPE == DATABRICKS_BACKEND:
165
- orq_const = config_orchestrator["databricks"]
166
- # TODO AGA: Revisar si esto funcionaría correctamente
167
- ORQ_ERROR = DatabricksError
168
-
169
- if "schema" not in kwargs:
170
- kwargs["schema"] = orq_const["def_schema"]
171
- # region INDEPENDIENTE A AIRFLOW
172
- config = current_app.config
173
- execution, status_code = self.post_list(data=kwargs)
174
- instance = InstanceModel.get_one_object(
175
- user=self.get_user(), idx=execution.instance_id
176
- )
177
-
178
- current_app.logger.debug(f"The request is: {request.args.get('run')}")
179
- # this allows testing without orchestrator interaction:
180
- if request.args.get("run", "1") == "0":
181
- current_app.logger.info(
182
- f"User {self.get_user_id()} creates execution {execution.id} but does not run it."
183
- )
184
- execution.update_state(EXEC_STATE_NOT_RUN)
185
- return execution, 201
186
-
187
- # We now try to launch the task in the orchestrator
188
- # We try to create an orch client
189
- # Note schema is a string with the name of the job/dag
190
- schema = execution.schema
191
- # If we are dealing with DataBricks, the schema will
192
- # be the job id
193
- orch_client, schema_info, execution = get_orch_client(
194
- schema, ORQ_TYPE, execution
195
- )
196
- # endregion
197
-
198
- # region VALIDACIONES
199
- # We check if the job/dag exists
200
- orch_client.get_orch_info(schema)
201
- # Validate config before running the dag
202
- config_schema = DeployedOrch.get_one_schema(config, schema, CONFIG_SCHEMA)
203
- new_config, config_errors = json_schema_extend_and_validate_as_string(
204
- config_schema, kwargs["config"]
205
- )
206
- if config_errors:
207
- execution.update_state(
208
- EXEC_STATE_ERROR_START,
209
- message="The execution could not be run because the config does not comply with the json schema. "
210
- "Check the log for more details",
211
- )
212
- execution.update_log_txt(f"{config_errors}")
213
- raise InvalidData(
214
- payload=dict(jsonschema_errors=config_errors),
215
- log_txt=f"Error while user {self.get_user()} tries to create an execution. "
216
- f"Configuration data does not match the jsonschema.",
217
- )
218
- elif new_config != kwargs["config"]:
219
- execution.update_config(new_config)
220
-
221
- # Validate instance data before running the dag
222
- instance_schema = DeployedOrch.get_one_schema(config, schema, INSTANCE_SCHEMA)
223
- instance_errors = json_schema_validate_as_string(instance_schema, instance.data)
224
- if instance_errors:
225
- execution.update_state(
226
- EXEC_STATE_ERROR_START,
227
- message="The execution could not be run because the instance data does not "
228
- "comply with the json schema. Check the log for more details",
229
- )
230
- execution.update_log_txt(f"{instance_errors}")
231
- raise InvalidData(
232
- payload=dict(jsonschema_errors=instance_errors),
233
- log_txt=f"Error while user {self.get_user()} tries to create an execution. "
234
- f"Instance data does not match the jsonschema.",
235
- )
236
- # Validate solution data before running the dag (if it exists)
237
- if kwargs.get("data") is not None:
238
- solution_schema = DeployedOrch.get_one_schema(
239
- config, schema, SOLUTION_SCHEMA
240
- )
241
- solution_errors = json_schema_validate_as_string(
242
- solution_schema, kwargs["data"]
243
- )
244
- if solution_errors:
245
- execution.update_state(
246
- EXEC_STATE_ERROR_START,
247
- message="The execution could not be run because the solution data does not "
248
- "comply with the json schema. Check the log for more details",
249
- )
250
- execution.update_log_txt(f"{solution_errors}")
251
- raise InvalidData(payload=dict(jsonschema_errors=solution_errors))
252
- # endregion
253
-
254
- if ORQ_TYPE == AIRFLOW_BACKEND:
255
- info = schema_info.json()
256
- if info["is_paused"]:
257
- err = "The dag exists but it is paused in airflow"
258
- current_app.logger.error(err)
259
- execution.update_state(EXEC_STATE_ERROR_START)
260
- raise ORQ_ERROR(
261
- error=err,
262
- payload=dict(
263
- message=EXECUTION_STATE_MESSAGE_DICT[EXEC_STATE_ERROR_START],
264
- state=EXEC_STATE_ERROR_START,
265
- ),
266
- log_txt=f"Error while user {self.get_user()} tries to create an execution. "
267
- + err,
268
- )
269
- # TODO AGA: revisar si hay que hacer alguna verificación a los JOBS
270
-
271
- try:
272
- # TODO AGA: Hay que genestionar la posible eliminación de execution.id como
273
- # parámetro, ya que no se puede seleccionar el id en databricks
274
- # revisar las consecuencias que puede tener
275
- response = orch_client.run_workflow(execution.id, orch_name=schema)
276
- except ORQ_ERROR as err:
277
- error = orq_const["name"] + " responded with an error: {}".format(err)
278
- current_app.logger.error(error)
279
- execution.update_state(EXEC_STATE_ERROR)
280
- raise ORQ_ERROR(
281
- error=error,
282
- payload=dict(
283
- message=EXECUTION_STATE_MESSAGE_DICT[EXEC_STATE_ERROR],
284
- state=EXEC_STATE_ERROR,
285
- ),
286
- log_txt=f"Error while user {self.get_user()} tries to create an execution. "
287
- + error,
288
- )
289
-
290
- # if we succeed, we register the dag_run_id in the execution table:
291
- orq_data = response.json()
292
- # TODO AGA: revisar el nombre del modelo de ejecuciones
293
- execution.dag_run_id = orq_data[orq_const["run_id"]]
294
- execution.update_state(EXEC_STATE_QUEUED)
295
- current_app.logger.info(
296
- "User {} creates execution {}".format(self.get_user_id(), execution.id)
297
- )
298
- return execution, 201
299
-
300
-
301
- class ExecutionRelaunchEndpoint(BaseMetaResource):
302
- def __init__(self):
303
- super().__init__()
304
- self.model = ExecutionModel
305
- self.data_model = ExecutionModel
306
- self.foreign_data = {"instance_id": InstanceModel}
307
-
308
- @doc(description="Re-launch an execution", tags=["Executions"])
309
- @authenticate(auth_class=Auth())
310
- @Auth.dag_permission_required
311
- @use_kwargs(ReLaunchExecutionRequest, location="json")
312
- def post(self, idx, **kwargs):
313
- """
314
- API method to re-launch an existing execution
315
- It requires authentication to be passed in the form of a token that has to be linked to
316
- an existing session (login) made by a user
317
-
318
- :return: A dictionary with a message (error if authentication failed, error if data is not validated or
319
- the reference_id for the newly created execution if successful) and a integer wit the HTTP status code
320
- :rtype: Tuple(dict, integer)
321
- """
322
- ORQ_TYPE = current_app.config["CORNFLOW_BACKEND"]
323
- if ORQ_TYPE == AIRFLOW_BACKEND:
324
- orq_const = config_orchestrator["airflow"]
325
- ORQ_ERROR = AirflowError
326
- elif ORQ_TYPE == DATABRICKS_BACKEND:
327
- orq_const = config_orchestrator["databricks"]
328
- # TODO AGA: Revisar si esto funcionaría correctamente
329
- ORQ_ERROR = DatabricksError
330
-
331
- config = current_app.config
332
- if "schema" not in kwargs:
333
- kwargs["schema"] = orq_const["def_schema"]
334
-
335
- self.put_detail(
336
- data=dict(config=kwargs["config"]), user=self.get_user(), idx=idx
337
- )
338
-
339
- execution = ExecutionModel.get_one_object(user=self.get_user(), idx=idx)
340
-
341
- # If the execution does not exist, raise an error
342
- if execution is None:
343
- err = "The execution to re-solve does not exist"
344
- raise ObjectDoesNotExist(
345
- err,
346
- log_txt=f"Error while user {self.get_user()} tries to relaunch execution {idx}. "
347
- + err,
348
- )
349
-
350
- execution.update({"checks": None})
351
-
352
- # If the execution is still running or queued, raise an error
353
- if execution.state == 0 or execution.state == -7:
354
- return {"message": "This execution is still running"}, 400
355
-
356
- # this allows testing without airflow interaction:
357
- if request.args.get("run", "1") == "0":
358
- execution.update_state(EXEC_STATE_NOT_RUN)
359
- return {
360
- "message": "The execution was set for relaunch but was not launched"
361
- }, 201
362
-
363
- # Validate config before running the dag
364
- config_schema = DeployedOrch.get_one_schema(
365
- config, kwargs["schema"], CONFIG_SCHEMA
366
- )
367
- config_errors = json_schema_validate_as_string(config_schema, kwargs["config"])
368
- if config_errors:
369
- raise InvalidData(
370
- payload=dict(jsonschema_errors=config_errors),
371
- log_txt=f"Error while user {self.get_user()} tries to relaunch execution {idx}. "
372
- f"Configuration data does not match the jsonschema.",
373
- )
374
- orch_client, schema_info, execution = get_orch_client(
375
- kwargs["schema"], ORQ_TYPE, execution
376
- )
377
-
378
- if not orch_client.is_alive():
379
- err = orq_const["name"] + " is not accessible"
380
- current_app.logger.error(err)
381
- execution.update_state(EXEC_STATE_ERROR_START)
382
- raise ORQ_ERROR(
383
- error=err,
384
- payload=dict(
385
- message=EXECUTION_STATE_MESSAGE_DICT[EXEC_STATE_ERROR_START],
386
- state=EXEC_STATE_ERROR_START,
387
- ),
388
- log_txt=f"Error while user {self.get_user()} tries to relaunch execution {idx}. "
389
- + err,
390
- )
391
-
392
- # ask airflow if dag_name exists
393
- schema = execution.schema
394
- schema_info = orch_client.get_orch_info(schema)
395
-
396
- info = schema_info.json()
397
- if ORQ_TYPE == AIRFLOW_BACKEND:
398
- if info["is_paused"]:
399
- err = "The dag exists but it is paused in airflow"
400
- current_app.logger.error(err)
401
- execution.update_state(EXEC_STATE_ERROR_START)
402
- raise ORQ_ERROR(
403
- error=err,
404
- payload=dict(
405
- message=EXECUTION_STATE_MESSAGE_DICT[EXEC_STATE_ERROR_START],
406
- state=EXEC_STATE_ERROR_START,
407
- ),
408
- log_txt=f"Error while user {self.get_user()} tries to relaunch execution {idx}. "
409
- + err,
410
- )
411
- # TODO AGA: revisar si hay que hacer alguna comprobación del estilo a databricks
412
- try:
413
- response = orch_client.run_workflow(execution.id, orch_name=schema)
414
- except ORQ_ERROR as err:
415
- error = orq_const["name"] + " responded with an error: {}".format(err)
416
- current_app.logger.error(error)
417
- execution.update_state(EXEC_STATE_ERROR)
418
- raise ORQ_ERROR(
419
- error=error,
420
- payload=dict(
421
- message=EXECUTION_STATE_MESSAGE_DICT[EXEC_STATE_ERROR],
422
- state=EXEC_STATE_ERROR,
423
- ),
424
- log_txt=f"Error while user {self.get_user()} tries to relaunch execution {idx}. "
425
- + error,
426
- )
427
-
428
- # if we succeed, we register the dag_run_id in the execution table:
429
- orq_data = response.json()
430
- # TODO AGA: revisar el nombre del modelo de ejecuciones
431
- execution.dag_run_id = orq_data[orq_const["run_id"]]
432
- execution.update_state(EXEC_STATE_QUEUED)
433
- current_app.logger.info(
434
- "User {} relaunches execution {}".format(self.get_user_id(), execution.id)
435
- )
436
- return {"message": "The execution was relaunched correctly"}, 201
437
-
438
-
439
- class ExecutionDetailsEndpointBase(BaseMetaResource):
440
- """
441
- Endpoint used to get the information of a certain execution. But not the data!
442
- """
443
-
444
- # TODO AGA DUDA: Se usa? Qué debería devolver?
445
- def __init__(self):
446
- super().__init__()
447
- self.data_model = ExecutionModel
448
- self.foreign_data = {"instance_id": InstanceModel}
449
-
450
-
451
- class ExecutionDetailsEndpoint(ExecutionDetailsEndpointBase):
452
- @doc(description="Get details of an execution", tags=["Executions"], inherit=False)
453
- @authenticate(auth_class=Auth())
454
- @marshal_with(ExecutionDetailsEndpointWithIndicatorsResponse)
455
- @BaseMetaResource.get_data_or_404
456
- def get(self, idx):
457
- """
458
- API method to get an execution created by the user and its related info.
459
- It requires authentication to be passed in the form of a token that has to be linked to
460
- an existing session (login) made by a user.
461
-
462
- :param str idx: ID of the execution.
463
- :return: A dictionary with a message (error if authentication failed, or the execution does not exist or
464
- the data of the execution) and an integer with the HTTP status code.
465
- :rtype: Tuple(dict, integer)
466
- """
467
- current_app.logger.info(
468
- f"User {self.get_user()} gets details of execution {idx}"
469
- )
470
- return self.get_detail(user=self.get_user(), idx=idx)
471
-
472
- @doc(description="Edit an execution", tags=["Executions"], inherit=False)
473
- @authenticate(auth_class=Auth())
474
- @use_kwargs(ExecutionEditRequest, location="json")
475
- def put(self, idx, **data):
476
- """
477
- Edit an existing execution
478
-
479
- :param string idx: ID of the execution.
480
- :return: A dictionary with a message (error if authentication failed, or the execution does not exist or
481
- a message) and an integer with the HTTP status code.
482
- :rtype: Tuple(dict, integer)
483
- """
484
- config = current_app.config
485
-
486
- schema = ExecutionModel.get_one_object(user=self.get_user(), idx=idx).schema
487
-
488
- if data.get("data") is not None and schema is not None:
489
- data_jsonschema = DeployedOrch.get_one_schema(
490
- config, schema, SOLUTION_SCHEMA
491
- )
492
- validation_errors = json_schema_validate_as_string(
493
- data_jsonschema, data["data"]
494
- )
495
-
496
- if validation_errors:
497
- raise InvalidData(
498
- payload=dict(jsonschema_errors=validation_errors),
499
- log_txt=f"Error while user {self.get_user()} tries to edit execution {idx}. "
500
- f"Solution data does not match the jsonschema.",
501
- )
502
-
503
- current_app.logger.info(f"User {self.get_user()} edits execution {idx}")
504
- return self.put_detail(data, user=self.get_user(), idx=idx)
505
-
506
- @doc(description="Delete an execution", tags=["Executions"], inherit=False)
507
- @authenticate(auth_class=Auth())
508
- def delete(self, idx):
509
- """
510
- API method to delete an execution created by the user and its related info.
511
- It requires authentication to be passed in the form of a token that has to be linked to
512
- an existing session (login) made by a user.
513
-
514
- :param string idx: ID of the execution.
515
- :return: A dictionary with a message (error if authentication failed, or the execution does not exist or
516
- a message) and an integer with the HTTP status code.
517
- :rtype: Tuple(dict, integer)
518
- """
519
- current_app.logger.info(f"User {self.get_user()} deleted execution {idx}")
520
- return self.delete_detail(user=self.get_user(), idx=idx)
521
-
522
- @doc(description="Stop an execution", tags=["Executions"], inherit=False)
523
- @authenticate(auth_class=Auth())
524
- @Auth.dag_permission_required
525
- def post(self, idx):
526
- execution = ExecutionModel.get_one_object(user=self.get_user(), idx=idx)
527
- if execution is None:
528
- raise ObjectDoesNotExist(
529
- log_txt=f"Error while user {self.get_user()} tries to stop execution {idx}. "
530
- f"The execution does not exist."
531
- )
532
- af_client = Airflow.from_config(current_app.config)
533
- if not af_client.is_alive():
534
- err = "Airflow is not accessible"
535
- raise AirflowError(
536
- error=err,
537
- log_txt=f"Error while user {self.get_user()} tries to stop execution {idx}. "
538
- + err,
539
- )
540
- response = af_client.set_dag_run_to_fail(
541
- dag_name=execution.schema, dag_run_id=execution.dag_run_id
542
- )
543
- execution.update_state(EXEC_STATE_STOPPED)
544
- current_app.logger.info(f"User {self.get_user()} stopped execution {idx}")
545
- return {"message": "The execution has been stopped"}, 200
546
-
547
-
548
- class ExecutionStatusEndpoint(BaseMetaResource):
549
- """
550
- Endpoint used to get the status of a certain execution that is running in the airflow webserver
551
- """
552
-
553
- def __init__(self):
554
- super().__init__()
555
- self.data_model = ExecutionModel
556
-
557
- @doc(description="Get status of an execution", tags=["Executions"])
558
- @authenticate(auth_class=Auth())
559
- @marshal_with(ExecutionStatusEndpointResponse)
560
- def get(self, idx):
561
- """
562
- API method to get the status of the execution created by the user
563
- It requires authentication to be passed in the form of a token that has to be linked to
564
- an existing session (login) made by a user.
565
-
566
- :param str idx: ID of the execution
567
- :return: A dictionary with a message (error if the execution does not exist or status of the execution)
568
- and an integer with the HTTP status code.
569
- :rtype: Tuple(dict, integer)
570
- """
571
- ORQ_TYPE = current_app.config["CORNFLOW_BACKEND"]
572
- if ORQ_TYPE == AIRFLOW_BACKEND:
573
- orq_const = config_orchestrator["airflow"]
574
- ORQ_ERROR = AirflowError
575
- elif ORQ_TYPE == DATABRICKS_BACKEND:
576
- orq_const = config_orchestrator["databricks"]
577
- # TODO AGA: Revisar si esto funcionaría correctamente
578
- ORQ_ERROR = DatabricksError
579
- execution = self.data_model.get_one_object(user=self.get_user(), idx=idx)
580
- if execution is None:
581
- raise ObjectDoesNotExist(
582
- log_txt=f"Error while user {self.get_user()} tries to get the status of execution {idx}. "
583
- f"The execution does not exist."
584
- )
585
- if execution.state not in [
586
- EXEC_STATE_RUNNING,
587
- EXEC_STATE_QUEUED,
588
- EXEC_STATE_UNKNOWN,
589
- ]:
590
- # we only care on asking orchestrator if the status is unknown, queued or running.
591
- return execution, 200
592
-
593
- def _raise_af_error(execution, error, state=EXEC_STATE_UNKNOWN, log_txt=None):
594
- if log_txt is None:
595
- log_txt = error
596
- message = EXECUTION_STATE_MESSAGE_DICT[state]
597
- execution.update_state(state)
598
- raise ORQ_ERROR(
599
- error=error, payload=dict(message=message, state=state), log_txt=log_txt
600
- )
601
-
602
- print("The execution is ", execution)
603
- print("The execution user is ", self.get_user())
604
- print("The execution id is ", idx)
605
- print("The parameter is ", execution.dag_run_id)
606
-
607
- dag_run_id = execution.dag_run_id
608
- if not dag_run_id:
609
- # it's safe to say we will never get anything if we did not store the dag_run_id
610
- _raise_af_error(
611
- execution,
612
- state=EXEC_STATE_ERROR,
613
- error="The execution has no dag_run associated",
614
- log_txt=f"Error while user {self.get_user()} tries to get the status of execution {idx}. "
615
- f"The execution has no associated dag run id.",
616
- )
617
- schema = execution.schema
618
- # TODO AGA: Revisar si merece la pena hacer una funcion que solo
619
- orch_client, schema_info, execution = get_orch_client(
620
- schema, ORQ_TYPE, execution
621
- )
622
-
623
- if not orch_client.is_alive():
624
- err = orq_const["name"] + " is not accessible"
625
- _raise_af_error(
626
- execution,
627
- err,
628
- log_txt=f"Error while user {self.get_user()} tries to get the status of execution {idx}. "
629
- + err,
630
- )
631
-
632
- try:
633
- # TODO: get the dag_name from somewhere!
634
- state = orch_client.get_run_status(schema, dag_run_id)
635
- except ORQ_ERROR as err:
636
- error = orq_const["name"] + f" responded with an error: {err}"
637
- _raise_af_error(
638
- execution,
639
- error,
640
- log_txt=f"Error while user {self.get_user()} tries to get the status of execution {idx}. "
641
- + str(err),
642
- )
643
- print("The state before mapping is ", state)
644
- state = map_run_state(state, ORQ_TYPE)
645
- print("The state prev to updating is ", state)
646
- execution.update_state(state)
647
- current_app.logger.info(
648
- f"User {self.get_user()} gets status of execution {idx}"
649
- )
650
- return execution, 200
651
-
652
- @doc(description="Change status of an execution", tags=["Executions"])
653
- @authenticate(auth_class=Auth())
654
- @use_kwargs(ExecutionStatusEndpointUpdate)
655
- def put(self, idx, **data):
656
- """
657
- Edit an existing execution
658
-
659
- :param string idx: ID of the execution.
660
- :return: A dictionary with a message (error if authentication failed, or the execution does not exist or
661
- a message) and an integer with the HTTP status code.
662
- :rtype: Tuple(dict, integer)
663
- """
664
-
665
- execution = self.data_model.get_one_object(user=self.get_user(), idx=idx)
666
- if execution is None:
667
- raise ObjectDoesNotExist()
668
- if execution.state not in [
669
- EXEC_STATE_RUNNING,
670
- EXEC_STATE_UNKNOWN,
671
- EXEC_STATE_QUEUED,
672
- ]:
673
- # we only care on asking airflow if the status is unknown or is running.
674
- return {"message": f"execution {idx} updated correctly"}, 200
675
- state = data.get("status")
676
- if state is not None:
677
- execution.update_state(state)
678
- current_app.logger.info(f"User {self.get_user()} edits execution {idx}")
679
- return {"message": f"execution {idx} updated correctly"}, 200
680
- else:
681
- return {"error": "status code was missing"}, 400
682
-
683
-
684
- class ExecutionDataEndpoint(ExecutionDetailsEndpointBase):
685
- """
686
- Endpoint used to get the solution of a certain execution.
687
- """
688
-
689
- @doc(
690
- description="Get solution data of an execution",
691
- tags=["Executions"],
692
- inherit=False,
693
- )
694
- @authenticate(auth_class=Auth())
695
- @marshal_with(ExecutionDataEndpointResponse)
696
- @BaseMetaResource.get_data_or_404
697
- @compressed
698
- def get(self, idx):
699
- """
700
-
701
- :param str idx: ID of the execution.
702
- :return: A dictionary with a message (error if authentication failed, or the execution does not exist or
703
- the data of the execution) and an integer with the HTTP status code.
704
- :rtype: Tuple(dict, integer)
705
- """
706
- current_app.logger.info(f"User {self.get_user()} gets data of execution {idx}")
707
- return self.get_detail(user=self.get_user(), idx=idx)
708
-
709
-
710
- class ExecutionLogEndpoint(ExecutionDetailsEndpointBase):
711
- """
712
- Endpoint used to get the log of a certain execution.
713
- """
714
-
715
- @doc(description="Get log of an execution", tags=["Executions"], inherit=False)
716
- @authenticate(auth_class=Auth())
717
- @marshal_with(ExecutionLogEndpointResponse)
718
- @BaseMetaResource.get_data_or_404
719
- @compressed
720
- def get(self, idx):
721
- """
722
-
723
- :param str idx: ID of the execution.
724
- :return: A dictionary with a message (error if authentication failed, or the execution does not exist or
725
- the data of the execution) and an integer with the HTTP status code.
726
- :rtype: Tuple(dict, integer)
727
- """
728
- current_app.logger.info(f"User {self.get_user()} gets log of execution {idx}")
729
- return self.get_detail(user=self.get_user(), idx=idx)
730
-
731
-
732
- # region aux_functions
733
-
734
-
735
- def get_orch_client(schema, orq_type, execution):
736
- """
737
- Get the orchestrator client and the schema info
738
- """
739
- if orq_type == AIRFLOW_BACKEND:
740
- return get_airflow(schema, execution=execution)
741
- elif orq_type == DATABRICKS_BACKEND:
742
- return get_databricks(schema, execution=execution)
743
- else:
744
- raise EndpointNotImplemented()
745
-
746
-
747
- def get_airflow(schema, execution):
748
- """
749
- Get the Airflow client and the schema info
750
- """
751
- af_client = Airflow.from_config(current_app.config)
752
- schema_info = af_client.get_orch_info(schema)
753
- if not af_client.is_alive():
754
- err = "Airflow is not accessible"
755
- current_app.logger.error(err)
756
- execution.update_state(EXEC_STATE_ERROR_START)
757
- raise AirflowError(
758
- error=err,
759
- payload=dict(
760
- message=EXECUTION_STATE_MESSAGE_DICT[EXEC_STATE_ERROR_START],
761
- state=EXEC_STATE_ERROR_START,
762
- ),
763
- log_txt=f"Error while user {execution.user_id} tries to create an execution "
764
- + err,
765
- )
766
- # TODO AGA: revisar si tiene sentido que se devuelva execution o si
767
- # es un puntero
768
- return af_client, schema_info, execution
769
-
770
-
771
- def get_databricks(schema, execution):
772
- """
773
- Get the Databricks client and the schema info
774
- """
775
- db_client = Databricks.from_config(current_app.config)
776
- schema_info = db_client.get_orch_info(schema)
777
- if not db_client.is_alive():
778
- err = "Databricks is not accessible"
779
- current_app.logger.error(err)
780
- execution.update_state(EXEC_STATE_ERROR_START)
781
- raise DatabricksError(
782
- error=err,
783
- payload=dict(
784
- message=EXECUTION_STATE_MESSAGE_DICT[EXEC_STATE_ERROR_START],
785
- state=EXEC_STATE_ERROR_START,
786
- ),
787
- log_txt=f"Error while user {execution.user_id} tries to create an execution "
788
- + err,
789
- )
790
- return db_client, schema_info, execution
791
- # endregion
792
-
793
-
794
- def map_run_state(state, ORQ_TYPE):
795
- """
796
- Maps the state of the execution in the orchestrator to the state of the execution in cornflow
797
- """
798
- if ORQ_TYPE == AIRFLOW_BACKEND:
799
- state = state.json()["state"]
800
- return AIRFLOW_TO_STATE_MAP.get(state, EXEC_STATE_UNKNOWN)
801
- elif ORQ_TYPE == DATABRICKS_BACKEND:
802
- print("The state is ", state)
803
- preliminar_state = DATABRICKS_TO_STATE_MAP.get(state, EXEC_STATE_UNKNOWN)
804
- # print("The preliminar state is ", preliminar_state)
805
- # if preliminar_state =="TERMINATED":
806
- # # TODO AGA DUDA: Revisar si es correcto el error predeterminado
807
- # return DATABRICKS_FINISH_TO_STATE_MAP.get(state,EXEC_STATE_ERROR)
808
- return preliminar_state