easylink 0.1.17__py3-none-any.whl → 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
easylink/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.17"
1
+ __version__ = "0.1.18"
easylink/cli.py CHANGED
@@ -91,6 +91,11 @@ SHARED_OPTIONS = [
91
91
  default=False,
92
92
  help="Do not save the results in a timestamped sub-directory of ``--output-dir``.",
93
93
  ),
94
+ click.option(
95
+ "--schema",
96
+ hidden=True,
97
+ default="main",
98
+ ),
94
99
  ]
95
100
 
96
101
  VERBOSE_WITH_DEBUGGER_OPTIONS = [
@@ -165,6 +170,7 @@ def run(
165
170
  input_data: str,
166
171
  output_dir: str | None,
167
172
  no_timestamp: bool,
173
+ schema: str,
168
174
  computing_environment: str | None,
169
175
  verbose: int,
170
176
  with_debugger: bool,
@@ -190,6 +196,7 @@ def run(
190
196
  input_data=input_data,
191
197
  computing_environment=computing_environment,
192
198
  results_dir=results_dir,
199
+ schema_name=schema,
193
200
  )
194
201
  logger.info("*** FINISHED ***")
195
202
 
@@ -201,6 +208,7 @@ def generate_dag(
201
208
  input_data: str,
202
209
  output_dir: str | None,
203
210
  no_timestamp: bool,
211
+ schema: str,
204
212
  verbose: int,
205
213
  with_debugger: bool,
206
214
  ) -> None:
@@ -223,6 +231,7 @@ def generate_dag(
223
231
  input_data=input_data,
224
232
  computing_environment=None,
225
233
  results_dir=results_dir,
234
+ schema_name=schema,
226
235
  )
227
236
  logger.info("*** DAG saved to result directory ***")
228
237
 
easylink/configuration.py CHANGED
@@ -14,7 +14,7 @@ from typing import Any
14
14
 
15
15
  from layered_config_tree import LayeredConfigTree
16
16
 
17
- from easylink.pipeline_schema import PIPELINE_SCHEMAS, PipelineSchema
17
+ from easylink.pipeline_schema import PipelineSchema
18
18
  from easylink.utilities.data_utils import load_yaml
19
19
  from easylink.utilities.general_utils import exit_with_validation_error
20
20
 
@@ -67,9 +67,8 @@ class Config(LayeredConfigTree):
67
67
  A dictionary of all specifications required to run the pipeline. This
68
68
  includes the pipeline, input data, and computing environment specifications,
69
69
  as well as the results directory.
70
- potential_schemas
71
- A list of potential schemas to validate the pipeline configuration against.
72
- This is primarily used for testing purposes. Defaults to the supported schemas.
70
+ schema_name
71
+ The name of the schema to validate the pipeline configuration against.
73
72
 
74
73
  Attributes
75
74
  ----------
@@ -82,22 +81,14 @@ class Config(LayeredConfigTree):
82
81
  input_data
83
82
  The input data filepaths.
84
83
  schema
85
- The :class:`~easylink.pipeline_schema.PipelineSchema` that successfully
86
- validated the requested pipeline.
87
-
88
- Notes
89
- -----
90
- The requested pipeline is checked against a set of supported
91
- ``PipelineSchemas``. The first schema that successfully validates is assumed
92
- to be the correct one and is attached to the ``Config`` object and its
93
- :meth:`~easylink.pipeline_schema.PipelineSchema.configure_pipeline`
94
- method is called.
84
+ The :class:`~easylink.pipeline_schema.PipelineSchema`.
85
+
95
86
  """
96
87
 
97
88
  def __init__(
98
89
  self,
99
90
  config_params: dict[str, Any],
100
- potential_schemas: PipelineSchema | list[PipelineSchema] = PIPELINE_SCHEMAS,
91
+ schema_name: str = "main",
101
92
  ) -> None:
102
93
  super().__init__(layers=["initial_data", "default", "user_configured"])
103
94
  self.update(DEFAULT_ENVIRONMENT, layer="default")
@@ -108,9 +99,7 @@ class Config(LayeredConfigTree):
108
99
  # Set slurm defaults to empty dict instead of None so that we don't get errors
109
100
  # in slurm_resources property
110
101
  self.update({"environment": {"slurm": {}}}, layer="default")
111
- if not isinstance(potential_schemas, list):
112
- potential_schemas = [potential_schemas]
113
- self.update({"schema": self._get_schema(potential_schemas)}, layer="initial_data")
102
+ self.update({"schema": self._get_schema(schema_name)}, layer="initial_data")
114
103
  self.schema.configure_pipeline(self.pipeline, self.input_data)
115
104
  self._validate()
116
105
  self.freeze()
@@ -173,22 +162,22 @@ class Config(LayeredConfigTree):
173
162
  # Setup Methods #
174
163
  #################
175
164
 
176
- def _get_schema(self, potential_schemas: list[PipelineSchema]) -> PipelineSchema:
165
+ def _get_schema(self, schema_name: str = "main") -> PipelineSchema:
177
166
  """Returns the first :class:`~easylink.pipeline_schema.PipelineSchema` that validates the requested pipeline.
178
167
 
179
168
  Parameters
180
169
  ----------
181
- potential_schemas
182
- ``PipelineSchemas`` to validate the pipeline configuration against.
170
+ schema_name
171
+ The name of the specific ``PipelineSchema`` to validate the pipeline configuration against.
183
172
 
184
173
  Returns
185
174
  -------
186
- The first ``PipelineSchema`` that validates the requested pipeline configuration.
175
+ The requested ``PipelineSchema`` if it validates the requested pipeline configuration.
187
176
 
188
177
  Raises
189
178
  ------
190
179
  SystemExit
191
- If the pipeline configuration is not valid for any of the ``potential_schemas``,
180
+ If the pipeline configuration is not valid for the requested schema,
192
181
  the program exits with a non-zero code and all validation errors found
193
182
  are logged.
194
183
 
@@ -197,20 +186,15 @@ class Config(LayeredConfigTree):
197
186
  This acts as the pipeline configuration file's validation method since
198
187
  we can only find a matching ``PipelineSchema`` if that file is valid.
199
188
 
200
- This method returns the *first* ``PipelineSchema`` that validates and does
201
- not attempt to check additional ones.
202
189
  """
203
190
  errors = defaultdict(dict)
204
191
  # Try each schema until one is validated
205
- for schema in potential_schemas:
206
- logs = schema.validate_step(self.pipeline, self.input_data)
207
- if logs:
208
- errors[PIPELINE_ERRORS_KEY][schema.name] = logs
209
- pass # try the next schema
210
- else: # schema was validated
211
- return schema
212
- # No schemas were validated
213
- exit_with_validation_error(dict(errors))
192
+ schema = PipelineSchema.get_schema(schema_name)
193
+ logs = schema.validate_step(self.pipeline, self.input_data)
194
+ if logs:
195
+ errors[PIPELINE_ERRORS_KEY][schema.name] = logs
196
+ exit_with_validation_error(dict(errors))
197
+ return schema
214
198
 
215
199
  def _validate(self) -> None:
216
200
  """Validates the ``Config``.
@@ -19,7 +19,7 @@ from typing import cast
19
19
  import yaml
20
20
  from loguru import logger
21
21
 
22
- from easylink.pipeline_schema_constants import ALLOWED_SCHEMA_PARAMS
22
+ from easylink.pipeline_schema_constants import SCHEMA_PARAMS
23
23
  from easylink.step import (
24
24
  ChoiceStep,
25
25
  EmbarrassinglyParallelStep,
@@ -244,17 +244,17 @@ class ImplementationCreator:
244
244
  @staticmethod
245
245
  def _extract_output_slot(script_path: Path, step_name: str) -> str:
246
246
  """Extracts the name of the output slot that this script is implementing."""
247
- schema = ImplementationCreator._extract_pipeline_schema(script_path)
248
- implementable_steps = ImplementationCreator._extract_implementable_steps(schema)
247
+ schema_name = ImplementationCreator._extract_pipeline_schema_name(script_path)
248
+ implementable_steps = ImplementationCreator._extract_implementable_steps(schema_name)
249
249
  step_names = [step.name for step in implementable_steps]
250
250
  if step_name not in step_names:
251
251
  raise ValueError(
252
- f"'{step_name}' does not exist as an implementable step in the '{schema}' pipeline schema. "
252
+ f"'{step_name}' does not exist as an implementable step in the '{schema_name}' pipeline schema. "
253
253
  )
254
254
  duplicates = list(set([step for step in step_names if step_names.count(step) > 1]))
255
255
  if duplicates:
256
256
  raise ValueError(
257
- f"Multiple implementable steps with the same name found in the '{schema}' "
257
+ f"Multiple implementable steps with the same name found in the '{schema_name}' "
258
258
  f"pipeline schema: {duplicates}."
259
259
  )
260
260
  implemented_step = [step for step in implementable_steps if step.name == step_name][0]
@@ -266,7 +266,7 @@ class ImplementationCreator:
266
266
  return list(implemented_step.output_slots)[0]
267
267
 
268
268
  @staticmethod
269
- def _extract_implementable_steps(schema: str) -> list[Step]:
269
+ def _extract_implementable_steps(schema_name: str) -> list[Step]:
270
270
  """Extracts all implementable steps from the pipeline schema.
271
271
 
272
272
  This method recursively traverses the pipeline schema specified in the script
@@ -296,8 +296,7 @@ class ImplementationCreator:
296
296
  implementable_steps.append(node)
297
297
  return
298
298
 
299
- schema_steps = ALLOWED_SCHEMA_PARAMS[schema][0]
300
-
299
+ schema_steps, _edges = SCHEMA_PARAMS[schema_name]
301
300
  implementable_steps: list[Step] = []
302
301
  for schema_step in schema_steps:
303
302
  _process_step(schema_step)
@@ -305,7 +304,7 @@ class ImplementationCreator:
305
304
  return implementable_steps
306
305
 
307
306
  @staticmethod
308
- def _extract_pipeline_schema(script_path: Path) -> str:
307
+ def _extract_pipeline_schema_name(script_path: Path) -> str:
309
308
  """Extracts the relevant pipeline schema name.
310
309
 
311
310
  The expectation is that the output slot's name is specified within the script
@@ -316,8 +315,11 @@ class ImplementationCreator:
316
315
 
317
316
  If no pipeline schema is specified, "main" will be used by default.
318
317
  """
319
- schema = _extract_metadata("PIPELINE_SCHEMA", script_path)
320
- return "main" if len(schema) == 0 else schema[0]
318
+ schema_name_list: list[str] = _extract_metadata("PIPELINE_SCHEMA", script_path)
319
+ schema_name = "main" if len(schema_name_list) == 0 else schema_name_list[0]
320
+ if schema_name not in SCHEMA_PARAMS:
321
+ raise ValueError(f"Pipeline schema '{schema_name}' is not supported.")
322
+ return schema_name
321
323
 
322
324
  @staticmethod
323
325
  def _write_metadata(info: dict[str, dict[str, str]]) -> None:
@@ -204,9 +204,6 @@ dummy_step_1_for_output_dir_example_default:
204
204
  - step_1_for_output_dir_example
205
205
  image_path: /mnt/team/simulation_science/priv/engineering/er_ecosystem/images/zmbc/dummy_step_1_for_output_dir_example.sif
206
206
  script_cmd: python /dummy_step_1_for_output_dir_example.py
207
- # leave outputs out for testing purposes
208
- # outputs:
209
- # step_1_main_output_directory: output_dir/
210
207
  dummy_step_2_for_output_dir_example:
211
208
  steps:
212
209
  - step_2_for_output_dir_example
@@ -14,7 +14,7 @@ from pathlib import Path
14
14
  from layered_config_tree import LayeredConfigTree
15
15
 
16
16
  from easylink.graph_components import EdgeParams, ImplementationGraph
17
- from easylink.pipeline_schema_constants import ALLOWED_SCHEMA_PARAMS
17
+ from easylink.pipeline_schema_constants import SCHEMA_PARAMS
18
18
  from easylink.step import HierarchicalStep, NonLeafConfigurationState, Step
19
19
 
20
20
 
@@ -39,7 +39,7 @@ class PipelineSchema(HierarchicalStep):
39
39
 
40
40
  Notes
41
41
  -----
42
- All ``PipelineSchema`` instances are intended to be created by the :meth:`_get_schemas`
42
+ A ``PipelineSchema`` is intended to be constructed by the :meth:`get_schema`
43
43
  class method.
44
44
 
45
45
  The ``PipelineSchema`` is a high-level abstraction; it represents the desired
@@ -159,22 +159,21 @@ class PipelineSchema(HierarchicalStep):
159
159
  )
160
160
 
161
161
  @classmethod
162
- def _get_schemas(cls) -> list["PipelineSchema"]:
162
+ def get_schema(cls, name: str = "main") -> list["PipelineSchema"]:
163
163
  """Gets all allowable ``PipelineSchemas``.
164
164
 
165
165
  These ``PipelineSchemas`` represent the fully supported pipelines and are
166
166
  used to validate the user-requested pipeline.
167
167
 
168
+ Parameters
169
+ ----------
170
+ name
171
+ The name of the ``PipelineSchema`` to get.
172
+
168
173
  Returns
169
174
  -------
170
- All allowable ``PipelineSchemas``.
175
+ The requested ``PipelineSchema``.
171
176
  """
172
- return [
173
- cls(name, nodes=nodes, edges=edges)
174
- for name, (nodes, edges) in ALLOWED_SCHEMA_PARAMS.items()
175
- ]
176
-
177
-
178
- PIPELINE_SCHEMAS = PipelineSchema._get_schemas()
179
- """All allowable :class:`PipelineSchemas<PipelineSchema>` to validate the requested
180
- pipeline against."""
177
+ if name not in SCHEMA_PARAMS:
178
+ raise ValueError(f"Pipeline schema '{name}' is not supported.")
179
+ return cls(name, *SCHEMA_PARAMS[name])
@@ -11,11 +11,10 @@ package defines the nodes and edges required to instantiate such ``PipelineSchem
11
11
 
12
12
  from easylink.pipeline_schema_constants import development, testing
13
13
 
14
- ALLOWED_SCHEMA_PARAMS = {
14
+ SCHEMA_PARAMS = {
15
+ "main": "TODO",
16
+ # development and testing
15
17
  "development": development.SCHEMA_PARAMS,
16
- }
17
-
18
- TESTING_SCHEMA_PARAMS = {
19
18
  "integration": testing.SCHEMA_PARAMS_ONE_STEP,
20
19
  "output_dir": testing.SCHEMA_PARAMS_OUTPUT_DIR,
21
20
  "combine_bad_topology": testing.SCHEMA_PARAMS_BAD_COMBINED_TOPOLOGY,
easylink/runner.py CHANGED
@@ -19,7 +19,6 @@ from snakemake.cli import main as snake_main
19
19
 
20
20
  from easylink.configuration import Config, load_params_from_specification
21
21
  from easylink.pipeline import Pipeline
22
- from easylink.pipeline_schema import PIPELINE_SCHEMAS, PipelineSchema
23
22
  from easylink.utilities.data_utils import (
24
23
  copy_configuration_files_to_results_directory,
25
24
  create_results_directory,
@@ -35,8 +34,8 @@ def main(
35
34
  input_data: str | Path,
36
35
  computing_environment: str | Path | None,
37
36
  results_dir: str | Path,
38
- debug=False,
39
- potential_schemas: PipelineSchema | list[PipelineSchema] = PIPELINE_SCHEMAS,
37
+ schema_name: str = "main",
38
+ debug: bool = False,
40
39
  ) -> None:
41
40
  """Runs an EasyLink command.
42
41
 
@@ -60,17 +59,16 @@ def main(
60
59
  to run the pipeline on. If None, the pipeline will be run locally.
61
60
  results_dir
62
61
  The directory to write results and incidental files (logs, etc.) to.
62
+ schema_name
63
+ The name of the schema to validate the pipeline configuration against.
63
64
  debug
64
65
  If False (the default), will suppress some of the workflow output. This
65
66
  is intended to only be used for testing and development purposes.
66
- potential_schemas
67
- A list of potential schemas to validate the pipeline configuration against.
68
- This is primarily used for testing purposes. Defaults to the supported schemas.
69
67
  """
70
68
  config_params = load_params_from_specification(
71
69
  pipeline_specification, input_data, computing_environment, results_dir
72
70
  )
73
- config = Config(config_params, potential_schemas)
71
+ config = Config(config_params, schema_name)
74
72
  pipeline = Pipeline(config)
75
73
  # After validation is completed, create the results directory
76
74
  create_results_directory(Path(results_dir))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: easylink
3
- Version: 0.1.17
3
+ Version: 0.1.18
4
4
  Summary: Research repository for the EasyLink ER ecosystem project.
5
5
  Home-page: https://github.com/ihmeuw/easylink
6
6
  Author: The EasyLink developers
@@ -1,21 +1,21 @@
1
1
  easylink/__about__.py,sha256=2-oxCfu9t9yUJouLDwqYRZ0eii8kN25SxRzsawjWjho,440
2
2
  easylink/__init__.py,sha256=gGMcIVfiVnHtlDw5mZwhevcDb2wt-kuP6F64gnkFack,159
3
- easylink/_version.py,sha256=BzIjnki8Bz3evNWo6bjGxxpLhy_tN9MRYhtM0MnDiWs,23
4
- easylink/cli.py,sha256=mv9l9XHojfhDK4hpDeV1E4iensgt6zx2ovkGBQ8x9xk,9745
5
- easylink/configuration.py,sha256=lfm8ViUpr1-O-EovTjKZbAlIht2EBv3RndN1mzYbmDE,12565
3
+ easylink/_version.py,sha256=6BiuMUkhwQp6bzUZSF8np8F1NwCltEtK0sPBF__tepU,23
4
+ easylink/cli.py,sha256=v8OALTAI3WlNELrHuGQumlJFdmYML4K-XX_OtqSJYZM,9925
5
+ easylink/configuration.py,sha256=rFPTZMEBZjiXYZWesUqpppOj6ONxp3sybf5g9MiDsOY,11639
6
6
  easylink/graph_components.py,sha256=zZDZXg5smReHO3ryQC4pao24wyKXzWDe6jS3C6fM2ak,13892
7
7
  easylink/implementation.py,sha256=XLSat6_IXFn-nH6X8AazmfWhDtTK4GtA7yiht9QLlQQ,11366
8
- easylink/implementation_metadata.yaml,sha256=0BQ_NIV29EtJ6G_wGD_-2OYPjYw2sNLAgCg2GbNwcuI,7662
8
+ easylink/implementation_metadata.yaml,sha256=trq5CvSSZRmqRQ979o68L2QONvlv-ncFXS-rh3-79Uk,7558
9
9
  easylink/pipeline.py,sha256=5KOYH5HyJjVlFoBRKGLs2hn5mpC3tPYG_ux3T1qSV9k,17504
10
10
  easylink/pipeline_graph.py,sha256=9ysX4wAkA-WkUoo15jSLAErncybE4tJwznVx7N_kwIA,23922
11
- easylink/pipeline_schema.py,sha256=Q2sCpsC-F2W0yxVP7ufunowDepOBrRVENXOdap9J5iY,6921
11
+ easylink/pipeline_schema.py,sha256=FieJBa3rKgaCIB9QDuQEfWJ9joNBUUp6iHT6xmns-Vk,6886
12
12
  easylink/rule.py,sha256=NusEUtBxx18L7UCcgDi3KKooFxSUgyS4eisVM5aPqFE,16770
13
- easylink/runner.py,sha256=cbCo5_NvvulmjjAaBCG6qCmbtJiHK-7NuDvbngdU_PY,6675
13
+ easylink/runner.py,sha256=GhkPGDh9UFOb38ksqXpMKZoxXs9hZaOFzZDo2jlEp-U,6458
14
14
  easylink/step.py,sha256=u1AMPrYGNVb3ZH6uB_U0dUeJvOeQ2MoVHdlC8k63AA8,85226
15
- easylink/devtools/implementation_creator.py,sha256=mkiQ9nhtQC3mhxcG8IyvejzSK0WSkwplCztPLXbpXXQ,16199
15
+ easylink/devtools/implementation_creator.py,sha256=ddzJltlzOfvzwAMuInovCbfn3IM2u_s7I_dObWV4os0,16430
16
16
  easylink/images/spark_cluster/Dockerfile,sha256=3PHotbR4jdjVYRHOJ0VQW55b5Qd4tQ1pLLQMrTKWVA0,576
17
17
  easylink/images/spark_cluster/README.md,sha256=KdgSttZRplNNWqHn4K1GTsTIab3dTOSG4V99QPLxSp8,569
18
- easylink/pipeline_schema_constants/__init__.py,sha256=FUngnh80yfpr76j18iBVKSoR8-5zxQj_mW_muUMrafw,1324
18
+ easylink/pipeline_schema_constants/__init__.py,sha256=45S-Q69CugGfBroHuGR8c7Jlq1wqAy5lRtys5C_0--M,1337
19
19
  easylink/pipeline_schema_constants/development.py,sha256=XxcYYZDZM4IADp3eFPQCchD6-OtMp99GiyZBfSswzFo,12640
20
20
  easylink/pipeline_schema_constants/testing.py,sha256=UDmVVjI1SiDktMbJ2CrSb7amHSYNwhgqNkXhl4lYxQw,20459
21
21
  easylink/steps/dev/README.md,sha256=u9dZUggpY2Lf2qb-xkDLWWgHjcmi4osbQtzSNo4uklE,4549
@@ -48,8 +48,8 @@ easylink/utilities/paths.py,sha256=KM1GlnsAcKbUJrC4LZKpeJfPljxe_aXP1ZhVp43TYRA,9
48
48
  easylink/utilities/spark.smk,sha256=kGtpem7LfQc71tMh5WAYaqKnHQKFvcdhPQSdumOP70k,5799
49
49
  easylink/utilities/splitter_utils.py,sha256=UOz4hjkEPqaAz0RrDkDYYej79lLSaq0VVVSH_tF1z0o,3838
50
50
  easylink/utilities/validation_utils.py,sha256=rOIeQbbXXdsuL2hI0i2gApAWfiNJXMwYH4pmw8uLrGM,1867
51
- easylink-0.1.17.dist-info/METADATA,sha256=fl6OzaU74KHClV-dADXheUoKBMuIs8pUqLANelfaqBY,3477
52
- easylink-0.1.17.dist-info/WHEEL,sha256=A8Eltl-h0W-qZDVezsLjjslosEH_pdYC2lQ0JcbgCzs,91
53
- easylink-0.1.17.dist-info/entry_points.txt,sha256=OGMZDFltg3yMboT7XjJt3joiPhRfV_7jnREVtrAIQNU,51
54
- easylink-0.1.17.dist-info/top_level.txt,sha256=oHcOpcF_jDMWFiJRzfGQvuskENGDjSPC_Agu9Z_Xvik,9
55
- easylink-0.1.17.dist-info/RECORD,,
51
+ easylink-0.1.18.dist-info/METADATA,sha256=9RPc6nIJrkdNQxUXqVYQW26h2G3ukGuXyAmUA4razpA,3477
52
+ easylink-0.1.18.dist-info/WHEEL,sha256=QZxptf4Y1BKFRCEDxD4h2V0mBFQOVFLFEpvxHmIs52A,91
53
+ easylink-0.1.18.dist-info/entry_points.txt,sha256=OGMZDFltg3yMboT7XjJt3joiPhRfV_7jnREVtrAIQNU,51
54
+ easylink-0.1.18.dist-info/top_level.txt,sha256=oHcOpcF_jDMWFiJRzfGQvuskENGDjSPC_Agu9Z_Xvik,9
55
+ easylink-0.1.18.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.7.0)
2
+ Generator: setuptools (80.6.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5