easylink 0.1.20__tar.gz → 0.1.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {easylink-0.1.20 → easylink-0.1.21}/.gitignore +1 -1
- {easylink-0.1.20 → easylink-0.1.21}/CHANGELOG.rst +5 -0
- {easylink-0.1.20 → easylink-0.1.21}/PKG-INFO +1 -1
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/workarounds.rst +2 -2
- easylink-0.1.21/docs/source/user_guide/tutorials/DAG-e2e-pipeline-expanded.svg +283 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/user_guide/tutorials/getting_started.rst +4 -5
- easylink-0.1.21/src/easylink/_version.py +1 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/devtools/implementation_creator.py +2 -2
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/implementation.py +2 -2
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/pipeline.py +13 -15
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/pipeline_graph.py +10 -15
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/pipeline_schema_constants/__init__.py +4 -4
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/pipeline_schema_constants/development.py +4 -4
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/pipeline_schema_constants/main.py +5 -5
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/pipeline_schema_constants/testing.py +22 -16
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/rule.py +9 -10
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/step.py +34 -35
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/utilities/aggregator_utils.py +2 -2
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/utilities/splitter_utils.py +1 -1
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink.egg-info/PKG-INFO +1 -1
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink.egg-info/SOURCES.txt +5 -5
- {easylink-0.1.20 → easylink-0.1.21}/tests/e2e/test_step_types.py +5 -5
- {easylink-0.1.20 → easylink-0.1.21}/tests/integration/test_compositions.py +25 -22
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/e2e/pipeline_cascade.yaml +4 -4
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/e2e/pipeline_expanded.yaml +1 -1
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/e2e/pipeline_splink_dummy.yaml +2 -2
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/e2e/pipeline_with_fastLink.yaml +2 -2
- easylink-0.1.20/tests/specifications/integration/embarrassingly_parallel/pipeline_parallel_step.yaml → easylink-0.1.21/tests/specifications/integration/auto_parallel/pipeline_cloneable_step.yaml +1 -1
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_combine_with_parallel.yaml +1 -1
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_nested_templated_steps.yaml +3 -3
- easylink-0.1.20/tests/specifications/unit/pipeline_wrong_parallel_split_keys.yaml → easylink-0.1.21/tests/specifications/unit/pipeline_wrong_clone_keys.yaml +1 -1
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/test_pipeline_graph.py +28 -36
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/test_rule.py +8 -8
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/test_step.py +85 -89
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/test_validations.py +2 -2
- easylink-0.1.20/docs/source/user_guide/tutorials/DAG-e2e-pipeline-expanded.svg +0 -319
- easylink-0.1.20/src/easylink/_version.py +0 -1
- {easylink-0.1.20 → easylink-0.1.21}/.bandit +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/.flake8 +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/.github/CODEOWNERS +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/.github/pull_request_template.md +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/.github/workflows/deploy.yml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/.github/workflows/update_readme.yml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/.readthedocs.yml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/CONTRIBUTING.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/Jenkinsfile +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/LICENSE +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/Makefile +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/README.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/Makefile +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/nitpick-exceptions +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/_static/style.css +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/_templates/layout.html +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/cli.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/configuration.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/graph_components.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/implementation.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/index.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/pipeline.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/pipeline_graph.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/pipeline_schema.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/pipeline_schema_constants/development.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/pipeline_schema_constants/index.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/pipeline_schema_constants/testing.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/rule.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/runner.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/step.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/utilities/aggregator_utils.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/utilities/data_utils.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/utilities/general_utils.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/utilities/index.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/utilities/paths.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/utilities/splitter_utils.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/api_reference/utilities/validation_utils.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/index.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/01_step.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/02_default_implementation.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/03_slots.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/04_data_dependency.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/05_pipeline_schema.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/06_default_input.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/07_cloneable_section.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/08_cloneable_section_expanded.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/09_loopable_section.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/10_loopable_section_expanded.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/11_cloneable_section_splitter.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/12_cloneable_section_splitter_expanded.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/13_autoparallel_section.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/14_choice_section.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/15_choice_section_expanded.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/16_step_hierarchy.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/18_schema_to_pipeline.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/19_schema_to_pipeline_combined.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/clustering_sub_steps.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/easylink_pipeline_schema.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/entity_resolution_sub_steps.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/images/linking_sub_steps.drawio.png +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/concepts/pipeline_schema/index.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/conf.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/glossary.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/index.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/user_guide/cli.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/user_guide/index.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/user_guide/tutorials/DAG-common-pipeline.svg +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/user_guide/tutorials/DAG-e2e-pipeline.svg +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/user_guide/tutorials/DAG-r-pyspark.svg +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/user_guide/tutorials/environment_slurm.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/user_guide/tutorials/impl-config-pipeline.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/user_guide/tutorials/index.rst +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/user_guide/tutorials/input_data.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/user_guide/tutorials/input_file_1.parquet +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/user_guide/tutorials/input_file_2.parquet +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/user_guide/tutorials/input_file_3.parquet +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/docs/source/user_guide/tutorials/r_spark_pipeline.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/pyproject.toml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/python_versions.json +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/pytype.cfg +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/setup.cfg +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/setup.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/__about__.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/__init__.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/cli.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/configuration.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/graph_components.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/images/spark_cluster/Dockerfile +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/images/spark_cluster/README.md +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/implementation_metadata.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/pipeline_schema.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/runner.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/cascading/exclude_clustered.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/cascading/exclude_clustered.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/cascading/exclude_none.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/cascading/exclude_none.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/cascading/update_clusters_by_connected_components.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/cascading/update_clusters_by_connected_components.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/default/default_clusters_to_links.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/default/default_clusters_to_links.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/default/default_determining_exclusions.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/default/default_determining_exclusions.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/default/default_removing_records.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/default/default_removing_records.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/default/default_schema_alignment.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/default/default_schema_alignment.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/default/default_updating_clusters.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/default/default_updating_clusters.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/README.md +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/build-containers-local.sh +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/build-containers-remote.sh +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/input_data/create_input_files.ipynb +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/input_data/input_file_1.csv +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/input_data/input_file_1.parquet +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/input_data/input_file_2.csv +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/input_data/input_file_2.parquet +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/python_pandas/README.md +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/python_pandas/dummy_step.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/python_pandas/python_pandas.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/python_pyspark/README.md +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/python_pyspark/dummy_step.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/python_pyspark/python_pyspark.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/r/README.md +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/r/dummy_step.R +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/r/r-image.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/dev/test.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/fastLink/fastLink_evaluating_pairs.R +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/fastLink/fastLink_evaluating_pairs.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/fastLink/fastLink_links_to_clusters.R +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/fastLink/fastLink_links_to_clusters.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/output_dir/dummy_step_1_for_output_dir_example.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/output_dir/dummy_step_1_for_output_dir_example.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/output_dir/dummy_step_2_for_output_dir_example.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/output_dir/dummy_step_2_for_output_dir_example.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/rl-dummy/canonicalizing_and_downstream_analysis/dummy_canonicalizing_and_downstream_analysis.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/rl-dummy/canonicalizing_and_downstream_analysis/dummy_canonicalizing_and_downstream_analysis.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/rl-dummy/input_data/create_input_files.ipynb +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/rl-dummy/input_data/input_file_1.parquet +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/rl-dummy/input_data/input_file_2.parquet +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/rl-dummy/input_data/known_clusters.parquet +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/rl-dummy/pre-processing/dummy_pre-processing.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/rl-dummy/pre-processing/dummy_pre-processing.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/splink/splink_blocking_and_filtering.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/splink/splink_blocking_and_filtering.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/splink/splink_evaluating_pairs.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/splink/splink_evaluating_pairs.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/splink/splink_links_to_clusters.def +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/steps/splink/splink_links_to_clusters.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/utilities/__init__.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/utilities/data_utils.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/utilities/general_utils.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/utilities/paths.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/utilities/spark.smk +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink/utilities/validation_utils.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink.egg-info/dependency_links.txt +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink.egg-info/entry_points.txt +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink.egg-info/not-zip-safe +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink.egg-info/requires.txt +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/src/easylink.egg-info/top_level.txt +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/__init__.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/conftest.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/e2e/pipeline_splink_dummy_results.csv +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/e2e/test_easylink_run.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/e2e/test_pipelines_main_schema.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/integration/test_data_utils.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/integration/test_snakemake.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/integration/test_snakemake_slurm.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/integration/test_snakemake_spark.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/common/environment_local.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/common/input_data.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/common/input_data_one_file.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/common/pipeline.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/e2e/environment_slurm.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/e2e/input_data_dummy.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/e2e/pipeline.yaml +0 -0
- {easylink-0.1.20/tests/specifications/integration/embarrassingly_parallel → easylink-0.1.21/tests/specifications/integration/auto_parallel}/pipeline_hierarchical_step.yaml +0 -0
- {easylink-0.1.20/tests/specifications/integration/embarrassingly_parallel → easylink-0.1.21/tests/specifications/integration/auto_parallel}/pipeline_loop_step.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/integration/environment_spark_slurm.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/integration/pipeline.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/integration/pipeline_output_dir.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/integration/pipeline_output_dir_default.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/integration/pipeline_spark.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/environment_minimum.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/environment_spark_slurm.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_bad_combined_implementations.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_bad_implementation.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_bad_loop_formatting.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_bad_step.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_bad_type_key.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_combine_bad_implementation_names.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_combine_bad_topology.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_combine_two_steps.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_combine_with_extra_node.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_combine_with_iteration.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_combine_with_iteration_cycle.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_combine_with_missing_node.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_missing_implementation_name.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_missing_implementations.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_missing_loop_nodes.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_missing_step.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_missing_substeps.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_missing_type_key.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_out_of_order.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_spark.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/specifications/unit/pipeline_type_config_mismatch.yaml +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/__init__.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/conftest.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/recipe_strings/python_pandas.txt +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/rule_strings/aggregation_rule.txt +0 -0
- /easylink-0.1.20/tests/unit/rule_strings/embarrassingly_parallel_rule.txt → /easylink-0.1.21/tests/unit/rule_strings/auto_parallel_rule.txt +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/rule_strings/checkpoint_rule.txt +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/rule_strings/implemented_rule_local.txt +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/rule_strings/implemented_rule_slurm.txt +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/rule_strings/pipeline_local.txt +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/rule_strings/pipeline_slurm.txt +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/rule_strings/target_rule.txt +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/rule_strings/validation_rule.txt +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/test_cli.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/test_config.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/test_data_utils.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/test_general_utils.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/test_graph_components.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/test_implementation.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/test_implementation_creator.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/test_pipeline.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/test_pipeline_schema.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/tests/unit/test_runner.py +0 -0
- {easylink-0.1.20 → easylink-0.1.21}/update_readme.py +0 -0
@@ -1,3 +1,8 @@
|
|
1
|
+
**0.1.21 - 6/16/25**
|
2
|
+
|
3
|
+
- Update the "parallel" pipeline configuration key to "clones"
|
4
|
+
- Update "ParallelStep" and "EmbarrassinglyParallelStep" class names to "CloneableStep" and "AutoParallelStep", respectively
|
5
|
+
|
1
6
|
**0.1.20 - 6/11/25**
|
2
7
|
|
3
8
|
- Update implementation creator tool for new implementation_metadata.yaml format
|
@@ -74,7 +74,7 @@ The ``pipeline.yaml`` section for the cloneable section diagrammed above might l
|
|
74
74
|
:emphasize-lines: 7,12,17,22
|
75
75
|
|
76
76
|
determining_exclusions_and_removing_records:
|
77
|
-
|
77
|
+
clones:
|
78
78
|
- determining_exclusions:
|
79
79
|
implementation:
|
80
80
|
name: default_determining_exclusions
|
@@ -103,7 +103,7 @@ both steps in the above diagram received the output of the splitter.
|
|
103
103
|
|
104
104
|
It is very important that the user:
|
105
105
|
|
106
|
-
* Create a number of clones (items in the list under ``
|
106
|
+
* Create a number of clones (items in the list under ``clones``) that matches the number of files
|
107
107
|
* Pass each clone the name of one of the files
|
108
108
|
* Pass each sub-step *within* each clone the same file name
|
109
109
|
|
@@ -0,0 +1,283 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
2
|
+
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
3
|
+
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
4
|
+
<!-- Generated by graphviz version 12.2.1 (0)
|
5
|
+
-->
|
6
|
+
<!-- Title: snakemake_dag Pages: 1 -->
|
7
|
+
<svg width="1106pt" height="980pt"
|
8
|
+
viewBox="0.00 0.00 1106.25 980.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
9
|
+
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 976)">
|
10
|
+
<title>snakemake_dag</title>
|
11
|
+
<polygon fill="white" stroke="none" points="-4,4 -4,-976 1102.25,-976 1102.25,4 -4,4"/>
|
12
|
+
<!-- 0 -->
|
13
|
+
<g id="node1" class="node">
|
14
|
+
<title>0</title>
|
15
|
+
<path fill="none" stroke="#d6d856" stroke-width="2" d="M630.12,-36C630.12,-36 600.12,-36 600.12,-36 594.12,-36 588.12,-30 588.12,-24 588.12,-24 588.12,-12 588.12,-12 588.12,-6 594.12,0 600.12,0 600.12,0 630.12,0 630.12,0 636.12,0 642.12,-6 642.12,-12 642.12,-12 642.12,-24 642.12,-24 642.12,-30 636.12,-36 630.12,-36"/>
|
16
|
+
<text text-anchor="middle" x="615.12" y="-14.88" font-family="sans" font-size="10.00">all</text>
|
17
|
+
</g>
|
18
|
+
<!-- 1 -->
|
19
|
+
<g id="node2" class="node">
|
20
|
+
<title>1</title>
|
21
|
+
<path fill="none" stroke="#56d8d8" stroke-width="2" d="M670,-180C670,-180 560.25,-180 560.25,-180 554.25,-180 548.25,-174 548.25,-168 548.25,-168 548.25,-156 548.25,-156 548.25,-150 554.25,-144 560.25,-144 560.25,-144 670,-144 670,-144 676,-144 682,-150 682,-156 682,-156 682,-168 682,-168 682,-174 676,-180 670,-180"/>
|
22
|
+
<text text-anchor="middle" x="615.12" y="-158.88" font-family="sans" font-size="10.00">step_4b_python_pandas</text>
|
23
|
+
</g>
|
24
|
+
<!-- 1->0 -->
|
25
|
+
<g id="edge1" class="edge">
|
26
|
+
<title>1->0</title>
|
27
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M603.38,-143.19C597.54,-133.28 591.11,-120.45 588.12,-108 584.39,-92.44 584.39,-87.56 588.12,-72 590.06,-63.93 593.45,-55.7 597.18,-48.2"/>
|
28
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="600.22,-49.93 601.93,-39.47 594.08,-46.58 600.22,-49.93"/>
|
29
|
+
</g>
|
30
|
+
<!-- 19 -->
|
31
|
+
<g id="node20" class="node">
|
32
|
+
<title>19</title>
|
33
|
+
<path fill="none" stroke="#d8ac56" stroke-width="2" d="M715.5,-108C715.5,-108 608.75,-108 608.75,-108 602.75,-108 596.75,-102 596.75,-96 596.75,-96 596.75,-84 596.75,-84 596.75,-78 602.75,-72 608.75,-72 608.75,-72 715.5,-72 715.5,-72 721.5,-72 727.5,-78 727.5,-84 727.5,-84 727.5,-96 727.5,-96 727.5,-102 721.5,-108 715.5,-108"/>
|
34
|
+
<text text-anchor="middle" x="662.12" y="-86.88" font-family="sans" font-size="10.00">results_result_validator</text>
|
35
|
+
</g>
|
36
|
+
<!-- 1->19 -->
|
37
|
+
<g id="edge25" class="edge">
|
38
|
+
<title>1->19</title>
|
39
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M626.99,-143.34C631.85,-136.1 637.6,-127.53 643.02,-119.45"/>
|
40
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="645.75,-121.67 648.42,-111.41 639.94,-117.77 645.75,-121.67"/>
|
41
|
+
</g>
|
42
|
+
<!-- 2 -->
|
43
|
+
<g id="node3" class="node">
|
44
|
+
<title>2</title>
|
45
|
+
<path fill="none" stroke="#569ad8" stroke-width="2" d="M603,-324C603,-324 493.25,-324 493.25,-324 487.25,-324 481.25,-318 481.25,-312 481.25,-312 481.25,-300 481.25,-300 481.25,-294 487.25,-288 493.25,-288 493.25,-288 603,-288 603,-288 609,-288 615,-294 615,-300 615,-300 615,-312 615,-312 615,-318 609,-324 603,-324"/>
|
46
|
+
<text text-anchor="middle" x="548.12" y="-302.88" font-family="sans" font-size="10.00">step_4a_python_pandas</text>
|
47
|
+
</g>
|
48
|
+
<!-- 2->1 -->
|
49
|
+
<g id="edge3" class="edge">
|
50
|
+
<title>2->1</title>
|
51
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M615.72,-298.8C676.7,-291.64 759.84,-277.44 780.12,-252 809.98,-214.56 751.02,-190.76 695.47,-177.28"/>
|
52
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="696.45,-173.92 685.92,-175.08 694.88,-180.74 696.45,-173.92"/>
|
53
|
+
</g>
|
54
|
+
<!-- 18 -->
|
55
|
+
<g id="node19" class="node">
|
56
|
+
<title>18</title>
|
57
|
+
<path fill="none" stroke="#d88556" stroke-width="2" d="M429.12,-252C429.12,-252 167.12,-252 167.12,-252 161.12,-252 155.12,-246 155.12,-240 155.12,-240 155.12,-228 155.12,-228 155.12,-222 161.12,-216 167.12,-216 167.12,-216 429.12,-216 429.12,-216 435.12,-216 441.12,-222 441.12,-228 441.12,-228 441.12,-240 441.12,-240 441.12,-246 435.12,-252 429.12,-252"/>
|
58
|
+
<text text-anchor="middle" x="298.12" y="-230.88" font-family="sans" font-size="10.00">step_4b_python_pandas_step_4b_main_input_validator</text>
|
59
|
+
</g>
|
60
|
+
<!-- 2->18 -->
|
61
|
+
<g id="edge24" class="edge">
|
62
|
+
<title>2->18</title>
|
63
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M484.07,-287.06C450.55,-277.68 409.29,-266.13 374.16,-256.29"/>
|
64
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="375.5,-253.03 364.93,-253.71 373.62,-259.77 375.5,-253.03"/>
|
65
|
+
</g>
|
66
|
+
<!-- 3 -->
|
67
|
+
<g id="node4" class="node">
|
68
|
+
<title>3</title>
|
69
|
+
<path fill="none" stroke="#d86e56" stroke-width="2" d="M591.5,-468C591.5,-468 370.75,-468 370.75,-468 364.75,-468 358.75,-462 358.75,-456 358.75,-456 358.75,-444 358.75,-444 358.75,-438 364.75,-432 370.75,-432 370.75,-432 591.5,-432 591.5,-432 597.5,-432 603.5,-438 603.5,-444 603.5,-444 603.5,-456 603.5,-456 603.5,-462 597.5,-468 591.5,-468"/>
|
70
|
+
<text text-anchor="middle" x="481.12" y="-446.88" font-family="sans" font-size="10.00">step_3_loop_2_aggregate_step_3_main_output</text>
|
71
|
+
</g>
|
72
|
+
<!-- 3->2 -->
|
73
|
+
<g id="edge6" class="edge">
|
74
|
+
<title>3->2</title>
|
75
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M604.44,-434.84C652.08,-426.61 698.64,-414.17 713.12,-396 742.98,-358.56 684.02,-334.76 628.47,-321.28"/>
|
76
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="629.45,-317.92 618.92,-319.08 627.88,-324.74 629.45,-317.92"/>
|
77
|
+
</g>
|
78
|
+
<!-- 16 -->
|
79
|
+
<g id="node17" class="node">
|
80
|
+
<title>16</title>
|
81
|
+
<path fill="none" stroke="#56d882" stroke-width="2" d="M362.12,-396C362.12,-396 100.12,-396 100.12,-396 94.12,-396 88.12,-390 88.12,-384 88.12,-384 88.12,-372 88.12,-372 88.12,-366 94.12,-360 100.12,-360 100.12,-360 362.12,-360 362.12,-360 368.12,-360 374.12,-366 374.12,-372 374.12,-372 374.12,-384 374.12,-384 374.12,-390 368.12,-396 362.12,-396"/>
|
82
|
+
<text text-anchor="middle" x="231.12" y="-374.88" font-family="sans" font-size="10.00">step_4a_python_pandas_step_4a_main_input_validator</text>
|
83
|
+
</g>
|
84
|
+
<!-- 3->16 -->
|
85
|
+
<g id="edge23" class="edge">
|
86
|
+
<title>3->16</title>
|
87
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M417.07,-431.06C383.55,-421.68 342.29,-410.13 307.16,-400.29"/>
|
88
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="308.5,-397.03 297.93,-397.71 306.62,-403.77 308.5,-397.03"/>
|
89
|
+
</g>
|
90
|
+
<!-- 4 -->
|
91
|
+
<g id="node5" class="node">
|
92
|
+
<title>4</title>
|
93
|
+
<path fill="none" stroke="#97d856" stroke-width="2" d="M573.88,-540C573.88,-540 388.38,-540 388.38,-540 382.38,-540 376.38,-534 376.38,-528 376.38,-528 376.38,-516 376.38,-516 376.38,-510 382.38,-504 388.38,-504 388.38,-504 573.88,-504 573.88,-504 579.88,-504 585.88,-510 585.88,-516 585.88,-516 585.88,-528 585.88,-528 585.88,-534 579.88,-540 573.88,-540"/>
|
94
|
+
<text text-anchor="middle" x="481.12" y="-518.88" font-family="sans" font-size="10.00">step_3_loop_2_step_3_main_input_split</text>
|
95
|
+
</g>
|
96
|
+
<!-- 4->3 -->
|
97
|
+
<g id="edge9" class="edge">
|
98
|
+
<title>4->3</title>
|
99
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M481.12,-503.34C481.12,-496.75 481.12,-489.08 481.12,-481.67"/>
|
100
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="484.63,-481.93 481.13,-471.93 477.63,-481.93 484.63,-481.93"/>
|
101
|
+
</g>
|
102
|
+
<!-- 5 -->
|
103
|
+
<g id="node6" class="node">
|
104
|
+
<title>5</title>
|
105
|
+
<path fill="none" stroke="#5682d8" stroke-width="2" d="M591.5,-612C591.5,-612 370.75,-612 370.75,-612 364.75,-612 358.75,-606 358.75,-600 358.75,-600 358.75,-588 358.75,-588 358.75,-582 364.75,-576 370.75,-576 370.75,-576 591.5,-576 591.5,-576 597.5,-576 603.5,-582 603.5,-588 603.5,-588 603.5,-600 603.5,-600 603.5,-606 597.5,-612 591.5,-612"/>
|
106
|
+
<text text-anchor="middle" x="481.12" y="-590.88" font-family="sans" font-size="10.00">step_3_loop_1_aggregate_step_3_main_output</text>
|
107
|
+
</g>
|
108
|
+
<!-- 5->4 -->
|
109
|
+
<g id="edge10" class="edge">
|
110
|
+
<title>5->4</title>
|
111
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M481.12,-575.34C481.12,-568.75 481.12,-561.08 481.12,-553.67"/>
|
112
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="484.63,-553.93 481.13,-543.93 477.63,-553.93 484.63,-553.93"/>
|
113
|
+
</g>
|
114
|
+
<!-- 6 -->
|
115
|
+
<g id="node7" class="node">
|
116
|
+
<title>6</title>
|
117
|
+
<path fill="none" stroke="#56a9d8" stroke-width="2" d="M573.88,-684C573.88,-684 388.38,-684 388.38,-684 382.38,-684 376.38,-678 376.38,-672 376.38,-672 376.38,-660 376.38,-660 376.38,-654 382.38,-648 388.38,-648 388.38,-648 573.88,-648 573.88,-648 579.88,-648 585.88,-654 585.88,-660 585.88,-660 585.88,-672 585.88,-672 585.88,-678 579.88,-684 573.88,-684"/>
|
118
|
+
<text text-anchor="middle" x="481.12" y="-662.88" font-family="sans" font-size="10.00">step_3_loop_1_step_3_main_input_split</text>
|
119
|
+
</g>
|
120
|
+
<!-- 6->5 -->
|
121
|
+
<g id="edge11" class="edge">
|
122
|
+
<title>6->5</title>
|
123
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M481.12,-647.34C481.12,-640.75 481.12,-633.08 481.12,-625.67"/>
|
124
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="484.63,-625.93 481.13,-615.93 477.63,-625.93 484.63,-625.93"/>
|
125
|
+
</g>
|
126
|
+
<!-- 7 -->
|
127
|
+
<g id="node8" class="node">
|
128
|
+
<title>7</title>
|
129
|
+
<path fill="none" stroke="#d89556" stroke-width="2" d="M533,-756C533,-756 429.25,-756 429.25,-756 423.25,-756 417.25,-750 417.25,-744 417.25,-744 417.25,-732 417.25,-732 417.25,-726 423.25,-720 429.25,-720 429.25,-720 533,-720 533,-720 539,-720 545,-726 545,-732 545,-732 545,-744 545,-744 545,-750 539,-756 533,-756"/>
|
130
|
+
<text text-anchor="middle" x="481.12" y="-734.88" font-family="sans" font-size="10.00">step_2_python_pandas</text>
|
131
|
+
</g>
|
132
|
+
<!-- 7->6 -->
|
133
|
+
<g id="edge12" class="edge">
|
134
|
+
<title>7->6</title>
|
135
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M481.12,-719.34C481.12,-712.75 481.12,-705.08 481.12,-697.67"/>
|
136
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="484.63,-697.93 481.13,-687.93 477.63,-697.93 484.63,-697.93"/>
|
137
|
+
</g>
|
138
|
+
<!-- 8 -->
|
139
|
+
<g id="node9" class="node">
|
140
|
+
<title>8</title>
|
141
|
+
<path fill="none" stroke="#68d856" stroke-width="2" d="M415.12,-900C415.12,-900 231.12,-900 231.12,-900 225.12,-900 219.12,-894 219.12,-888 219.12,-888 219.12,-876 219.12,-876 219.12,-870 225.12,-864 231.12,-864 231.12,-864 415.12,-864 415.12,-864 421.12,-864 427.12,-870 427.12,-876 427.12,-876 427.12,-888 427.12,-888 427.12,-894 421.12,-900 415.12,-900"/>
|
142
|
+
<text text-anchor="middle" x="323.12" y="-878.88" font-family="sans" font-size="10.00">step_1_clone_1_step_1_python_pandas</text>
|
143
|
+
</g>
|
144
|
+
<!-- 8->7 -->
|
145
|
+
<g id="edge13" class="edge">
|
146
|
+
<title>8->7</title>
|
147
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M338.55,-863.03C354.95,-844.37 382.06,-814.78 408.12,-792 419.23,-782.3 432.04,-772.56 443.81,-764.13"/>
|
148
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="445.59,-767.15 451.76,-758.53 441.57,-761.42 445.59,-767.15"/>
|
149
|
+
</g>
|
150
|
+
<!-- 14 -->
|
151
|
+
<g id="node15" class="node">
|
152
|
+
<title>14</title>
|
153
|
+
<path fill="none" stroke="#c6d856" stroke-width="2" d="M689.12,-828C689.12,-828 439.12,-828 439.12,-828 433.12,-828 427.12,-822 427.12,-816 427.12,-816 427.12,-804 427.12,-804 427.12,-798 433.12,-792 439.12,-792 439.12,-792 689.12,-792 689.12,-792 695.12,-792 701.12,-798 701.12,-804 701.12,-804 701.12,-816 701.12,-816 701.12,-822 695.12,-828 689.12,-828"/>
|
154
|
+
<text text-anchor="middle" x="564.12" y="-806.88" font-family="sans" font-size="10.00">step_2_python_pandas_step_2_main_input_validator</text>
|
155
|
+
</g>
|
156
|
+
<!-- 8->14 -->
|
157
|
+
<g id="edge20" class="edge">
|
158
|
+
<title>8->14</title>
|
159
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M384.88,-863.06C417.05,-853.72 456.61,-842.23 490.38,-832.42"/>
|
160
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="491,-835.88 499.63,-829.73 489.05,-829.16 491,-835.88"/>
|
161
|
+
</g>
|
162
|
+
<!-- 9 -->
|
163
|
+
<g id="node10" class="node">
|
164
|
+
<title>9</title>
|
165
|
+
<path fill="none" stroke="#80d856" stroke-width="2" d="M342.25,-972C342.25,-972 12,-972 12,-972 6,-972 0,-966 0,-960 0,-960 0,-948 0,-948 0,-942 6,-936 12,-936 12,-936 342.25,-936 342.25,-936 348.25,-936 354.25,-942 354.25,-948 354.25,-948 354.25,-960 354.25,-960 354.25,-966 348.25,-972 342.25,-972"/>
|
166
|
+
<text text-anchor="middle" x="177.12" y="-950.88" font-family="sans" font-size="10.00">step_1_clone_1_step_1_python_pandas_step_1_main_input_validator</text>
|
167
|
+
</g>
|
168
|
+
<!-- 9->8 -->
|
169
|
+
<g id="edge17" class="edge">
|
170
|
+
<title>9->8</title>
|
171
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M214.34,-935.15C232.47,-926.47 254.49,-915.91 273.98,-906.56"/>
|
172
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="275.49,-909.72 282.99,-902.24 272.46,-903.41 275.49,-909.72"/>
|
173
|
+
</g>
|
174
|
+
<!-- 10 -->
|
175
|
+
<g id="node11" class="node">
|
176
|
+
<title>10</title>
|
177
|
+
<path fill="none" stroke="#afd856" stroke-width="2" d="M641.12,-900C641.12,-900 457.12,-900 457.12,-900 451.12,-900 445.12,-894 445.12,-888 445.12,-888 445.12,-876 445.12,-876 445.12,-870 451.12,-864 457.12,-864 457.12,-864 641.12,-864 641.12,-864 647.12,-864 653.12,-870 653.12,-876 653.12,-876 653.12,-888 653.12,-888 653.12,-894 647.12,-900 641.12,-900"/>
|
178
|
+
<text text-anchor="middle" x="549.12" y="-878.88" font-family="sans" font-size="10.00">step_1_clone_2_step_1_python_pandas</text>
|
179
|
+
</g>
|
180
|
+
<!-- 10->7 -->
|
181
|
+
<g id="edge14" class="edge">
|
182
|
+
<title>10->7</title>
|
183
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M463.39,-863.07C445.58,-855.27 428.96,-844.07 418.12,-828 409.18,-814.73 411.37,-806.51 418.12,-792 422.99,-781.55 431.04,-772.42 439.79,-764.82"/>
|
184
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="441.77,-767.72 447.45,-758.78 437.44,-762.23 441.77,-767.72"/>
|
185
|
+
</g>
|
186
|
+
<!-- 10->14 -->
|
187
|
+
<g id="edge21" class="edge">
|
188
|
+
<title>10->14</title>
|
189
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M552.91,-863.34C554.34,-856.67 556.01,-848.89 557.61,-841.39"/>
|
190
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="560.98,-842.37 559.65,-831.86 554.14,-840.91 560.98,-842.37"/>
|
191
|
+
</g>
|
192
|
+
<!-- 11 -->
|
193
|
+
<g id="node12" class="node">
|
194
|
+
<title>11</title>
|
195
|
+
<path fill="none" stroke="#56c1d8" stroke-width="2" d="M714.25,-972C714.25,-972 384,-972 384,-972 378,-972 372,-966 372,-960 372,-960 372,-948 372,-948 372,-942 378,-936 384,-936 384,-936 714.25,-936 714.25,-936 720.25,-936 726.25,-942 726.25,-948 726.25,-948 726.25,-960 726.25,-960 726.25,-966 720.25,-972 714.25,-972"/>
|
196
|
+
<text text-anchor="middle" x="549.12" y="-950.88" font-family="sans" font-size="10.00">step_1_clone_2_step_1_python_pandas_step_1_main_input_validator</text>
|
197
|
+
</g>
|
198
|
+
<!-- 11->10 -->
|
199
|
+
<g id="edge18" class="edge">
|
200
|
+
<title>11->10</title>
|
201
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M549.12,-935.34C549.12,-928.75 549.12,-921.08 549.12,-913.67"/>
|
202
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="552.63,-913.93 549.13,-903.93 545.63,-913.93 552.63,-913.93"/>
|
203
|
+
</g>
|
204
|
+
<!-- 12 -->
|
205
|
+
<g id="node13" class="node">
|
206
|
+
<title>12</title>
|
207
|
+
<path fill="none" stroke="#566bd8" stroke-width="2" d="M867.12,-900C867.12,-900 683.12,-900 683.12,-900 677.12,-900 671.12,-894 671.12,-888 671.12,-888 671.12,-876 671.12,-876 671.12,-870 677.12,-864 683.12,-864 683.12,-864 867.12,-864 867.12,-864 873.12,-864 879.12,-870 879.12,-876 879.12,-876 879.12,-888 879.12,-888 879.12,-894 873.12,-900 867.12,-900"/>
|
208
|
+
<text text-anchor="middle" x="775.12" y="-878.88" font-family="sans" font-size="10.00">step_1_clone_3_step_1_python_pandas</text>
|
209
|
+
</g>
|
210
|
+
<!-- 12->7 -->
|
211
|
+
<g id="edge15" class="edge">
|
212
|
+
<title>12->7</title>
|
213
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M767.05,-863.32C756.79,-842.9 737.11,-809.82 710.12,-792 665.23,-762.35 605.83,-749.18 558.78,-743.38"/>
|
214
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="559.19,-739.91 548.87,-742.27 558.42,-746.86 559.19,-739.91"/>
|
215
|
+
</g>
|
216
|
+
<!-- 12->14 -->
|
217
|
+
<g id="edge22" class="edge">
|
218
|
+
<title>12->14</title>
|
219
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M721.06,-863.06C693.39,-853.88 659.47,-842.63 630.25,-832.94"/>
|
220
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="631.51,-829.67 620.92,-829.84 629.31,-836.31 631.51,-829.67"/>
|
221
|
+
</g>
|
222
|
+
<!-- 13 -->
|
223
|
+
<g id="node14" class="node">
|
224
|
+
<title>13</title>
|
225
|
+
<path fill="none" stroke="#d85656" stroke-width="2" d="M1086.25,-972C1086.25,-972 756,-972 756,-972 750,-972 744,-966 744,-960 744,-960 744,-948 744,-948 744,-942 750,-936 756,-936 756,-936 1086.25,-936 1086.25,-936 1092.25,-936 1098.25,-942 1098.25,-948 1098.25,-948 1098.25,-960 1098.25,-960 1098.25,-966 1092.25,-972 1086.25,-972"/>
|
226
|
+
<text text-anchor="middle" x="921.12" y="-950.88" font-family="sans" font-size="10.00">step_1_clone_3_step_1_python_pandas_step_1_main_input_validator</text>
|
227
|
+
</g>
|
228
|
+
<!-- 13->12 -->
|
229
|
+
<g id="edge19" class="edge">
|
230
|
+
<title>13->12</title>
|
231
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M883.91,-935.15C865.78,-926.47 843.76,-915.91 824.27,-906.56"/>
|
232
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="825.79,-903.41 815.26,-902.24 822.76,-909.72 825.79,-903.41"/>
|
233
|
+
</g>
|
234
|
+
<!-- 14->7 -->
|
235
|
+
<g id="edge16" class="edge">
|
236
|
+
<title>14->7</title>
|
237
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M543.18,-791.34C533.82,-783.44 522.58,-773.96 512.26,-765.26"/>
|
238
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="514.57,-762.62 504.67,-758.85 510.05,-767.98 514.57,-762.62"/>
|
239
|
+
</g>
|
240
|
+
<!-- 15 -->
|
241
|
+
<g id="node16" class="node">
|
242
|
+
<title>15</title>
|
243
|
+
<path fill="none" stroke="#59d856" stroke-width="2" d="M691.88,-396C691.88,-396 404.38,-396 404.38,-396 398.38,-396 392.38,-390 392.38,-384 392.38,-384 392.38,-372 392.38,-372 392.38,-366 398.38,-360 404.38,-360 404.38,-360 691.88,-360 691.88,-360 697.88,-360 703.88,-366 703.88,-372 703.88,-372 703.88,-384 703.88,-384 703.88,-390 697.88,-396 691.88,-396"/>
|
244
|
+
<text text-anchor="middle" x="548.12" y="-374.88" font-family="sans" font-size="10.00">step_4a_python_pandas_step_4a_secondary_input_validator</text>
|
245
|
+
</g>
|
246
|
+
<!-- 15->2 -->
|
247
|
+
<g id="edge7" class="edge">
|
248
|
+
<title>15->2</title>
|
249
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M548.12,-359.34C548.12,-352.75 548.12,-345.08 548.12,-337.67"/>
|
250
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="551.63,-337.93 548.13,-327.93 544.63,-337.93 551.63,-337.93"/>
|
251
|
+
</g>
|
252
|
+
<!-- 16->2 -->
|
253
|
+
<g id="edge8" class="edge">
|
254
|
+
<title>16->2</title>
|
255
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M312.35,-359.06C360.17,-348.5 420.42,-335.2 468.05,-324.68"/>
|
256
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="468.53,-328.16 477.54,-322.59 467.02,-321.33 468.53,-328.16"/>
|
257
|
+
</g>
|
258
|
+
<!-- 17 -->
|
259
|
+
<g id="node18" class="node">
|
260
|
+
<title>17</title>
|
261
|
+
<path fill="none" stroke="#56d86b" stroke-width="2" d="M758.88,-252C758.88,-252 471.38,-252 471.38,-252 465.38,-252 459.38,-246 459.38,-240 459.38,-240 459.38,-228 459.38,-228 459.38,-222 465.38,-216 471.38,-216 471.38,-216 758.88,-216 758.88,-216 764.88,-216 770.88,-222 770.88,-228 770.88,-228 770.88,-240 770.88,-240 770.88,-246 764.88,-252 758.88,-252"/>
|
262
|
+
<text text-anchor="middle" x="615.12" y="-230.88" font-family="sans" font-size="10.00">step_4b_python_pandas_step_4b_secondary_input_validator</text>
|
263
|
+
</g>
|
264
|
+
<!-- 17->1 -->
|
265
|
+
<g id="edge4" class="edge">
|
266
|
+
<title>17->1</title>
|
267
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M615.12,-215.34C615.12,-208.75 615.12,-201.08 615.12,-193.67"/>
|
268
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="618.63,-193.93 615.13,-183.93 611.63,-193.93 618.63,-193.93"/>
|
269
|
+
</g>
|
270
|
+
<!-- 18->1 -->
|
271
|
+
<g id="edge5" class="edge">
|
272
|
+
<title>18->1</title>
|
273
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M379.35,-215.06C427.17,-204.5 487.42,-191.2 535.05,-180.68"/>
|
274
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="535.53,-184.16 544.54,-178.59 534.02,-177.33 535.53,-184.16"/>
|
275
|
+
</g>
|
276
|
+
<!-- 19->0 -->
|
277
|
+
<g id="edge2" class="edge">
|
278
|
+
<title>19->0</title>
|
279
|
+
<path fill="none" stroke="grey" stroke-width="2" d="M650.26,-71.34C645.4,-64.1 639.65,-55.53 634.23,-47.45"/>
|
280
|
+
<polygon fill="grey" stroke="grey" stroke-width="2" points="637.31,-45.77 628.83,-39.41 631.5,-49.67 637.31,-45.77"/>
|
281
|
+
</g>
|
282
|
+
</g>
|
283
|
+
</svg>
|
@@ -139,11 +139,11 @@ configures the pipeline for this run, by specifying configuration details for ea
|
|
139
139
|
defined by the pipeline schema. The schema steps, and the edges between them, are defined in
|
140
140
|
``pipeline_schema_constants/development.py``. The schema steps, or nodes, define input and output slots for
|
141
141
|
data used or produced by the schema steps, as well as any logical or behavioral structure of the step,
|
142
|
-
such as defining a step as a ``LoopStep``, ``
|
142
|
+
such as defining a step as a ``LoopStep``, ``CloneableStep``, ``ChoiceStep``, or ``HierarchicalStep``. The edges
|
143
143
|
define how data moves between steps' input and output slots.
|
144
144
|
|
145
145
|
``pipeline_schema_constants/development.py`` defines that the pipeline schema requires four steps, that the
|
146
|
-
third step is ``
|
146
|
+
third step is ``AutoParallel``, that the fourth step is a ``ChoiceStep``, and that all steps have
|
147
147
|
one input except the fourth step, which has two.
|
148
148
|
The edges in the ``EDGES`` variable in that file connect the steps, so an output from one becomes an input
|
149
149
|
to another.
|
@@ -196,15 +196,14 @@ except that ``step_4`` has two inputs, as defined in
|
|
196
196
|
|
197
197
|
.. warning::
|
198
198
|
Note that this diagram doesn't show the dependencies on original (user-provided) input data, and
|
199
|
-
displays validations, and doesn't show Step 3 because it is
|
199
|
+
displays validations, and doesn't show Step 3 because it is an auto-parallel step. See
|
200
200
|
`this ticket <https://jira.ihme.washington.edu/browse/MIC-5767>`_ where we plan to fix these issues.
|
201
201
|
|
202
202
|
Now we can understand why the final output has 60k rows. For the current dummy implementation, when there are multiple input data files, the rows
|
203
203
|
in the files are concatenated. So ``step_1`` concatenates three 10k row datasets, and ``step_4`` concatenates these
|
204
204
|
30k rows with another 30k rows.
|
205
205
|
|
206
|
-
``step_3`` is aggregated and split because it is defined as
|
207
|
-
``EmbarrassinglyParallel``.
|
206
|
+
``step_3`` is aggregated and split because it is defined as ``AutoParallel``.
|
208
207
|
|
209
208
|
We've already viewed the final output, but if we want to see how the data is transformed over the course
|
210
209
|
of the pipeline, we can view intermediary outputs as well::
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "0.1.21"
|
@@ -21,8 +21,8 @@ from loguru import logger
|
|
21
21
|
|
22
22
|
from easylink.pipeline_schema_constants import SCHEMA_PARAMS
|
23
23
|
from easylink.step import (
|
24
|
+
AutoParallelStep,
|
24
25
|
ChoiceStep,
|
25
|
-
EmbarrassinglyParallelStep,
|
26
26
|
HierarchicalStep,
|
27
27
|
IOStep,
|
28
28
|
Step,
|
@@ -319,7 +319,7 @@ class ImplementationCreator:
|
|
319
319
|
elif isinstance(node, TemplatedStep):
|
320
320
|
_process_step(node.template_step)
|
321
321
|
return
|
322
|
-
elif isinstance(node,
|
322
|
+
elif isinstance(node, AutoParallelStep):
|
323
323
|
_process_step(node.step)
|
324
324
|
return
|
325
325
|
elif isinstance(node, ChoiceStep):
|
@@ -55,7 +55,7 @@ class Implementation:
|
|
55
55
|
implementation_config: LayeredConfigTree,
|
56
56
|
input_slots: Iterable[InputSlot] = (),
|
57
57
|
output_slots: Iterable[OutputSlot] = (),
|
58
|
-
|
58
|
+
is_auto_parallel: bool = False,
|
59
59
|
):
|
60
60
|
self.name = implementation_config.name
|
61
61
|
"""The name of this ``Implementation``."""
|
@@ -74,7 +74,7 @@ class Implementation:
|
|
74
74
|
implemented by this particular ``Implementation``."""
|
75
75
|
self.requires_spark = self._metadata.get("requires_spark", False)
|
76
76
|
"""Whether this ``Implementation`` requires a Spark environment."""
|
77
|
-
self.
|
77
|
+
self.is_auto_parallel = is_auto_parallel
|
78
78
|
|
79
79
|
def __repr__(self) -> str:
|
80
80
|
return f"Implementation.{self.name}"
|
@@ -45,9 +45,9 @@ class Pipeline:
|
|
45
45
|
The :class:`~easylink.pipeline_graph.PipelineGraph` object.
|
46
46
|
spark_is_required
|
47
47
|
A boolean indicating whether the pipeline requires Spark.
|
48
|
-
|
48
|
+
any_auto_parallel
|
49
49
|
A boolean indicating whether any implementation in the pipeline is to be
|
50
|
-
run in
|
50
|
+
automatically run in parallel.
|
51
51
|
|
52
52
|
"""
|
53
53
|
|
@@ -55,7 +55,7 @@ class Pipeline:
|
|
55
55
|
self.config = config
|
56
56
|
self.pipeline_graph = PipelineGraph(config)
|
57
57
|
self.spark_is_required = self.pipeline_graph.spark_is_required
|
58
|
-
self.
|
58
|
+
self.any_auto_parallel = self.pipeline_graph.any_auto_parallel
|
59
59
|
|
60
60
|
# TODO [MIC-4880]: refactor into validation object
|
61
61
|
self._validate()
|
@@ -179,7 +179,7 @@ class Pipeline:
|
|
179
179
|
#################################
|
180
180
|
|
181
181
|
def _write_imports(self) -> None:
|
182
|
-
if not self.
|
182
|
+
if not self.any_auto_parallel:
|
183
183
|
imports = "from easylink.utilities import validation_utils\n"
|
184
184
|
else:
|
185
185
|
imports = """import glob
|
@@ -193,7 +193,7 @@ from easylink.utilities import aggregator_utils, splitter_utils, validation_util
|
|
193
193
|
f.write(imports)
|
194
194
|
|
195
195
|
def _write_wildcard_constraints(self) -> None:
|
196
|
-
if self.
|
196
|
+
if self.any_auto_parallel:
|
197
197
|
with open(self.snakefile_path, "a") as f:
|
198
198
|
f.write(
|
199
199
|
"""
|
@@ -301,12 +301,10 @@ use rule start_spark_worker from spark_cluster with:
|
|
301
301
|
The name of the ``Implementation`` to write the rule(s) for.
|
302
302
|
"""
|
303
303
|
|
304
|
-
|
305
|
-
node_name
|
306
|
-
)
|
304
|
+
is_auto_parallel = self.pipeline_graph.get_whether_auto_parallel(node_name)
|
307
305
|
input_slots, _output_slots = self.pipeline_graph.get_io_slot_attributes(node_name)
|
308
306
|
validation_files, validation_rules = self._get_validations(
|
309
|
-
node_name, input_slots,
|
307
|
+
node_name, input_slots, is_auto_parallel
|
310
308
|
)
|
311
309
|
for validation_rule in validation_rules:
|
312
310
|
validation_rule.write_to_snakefile(self.snakefile_path)
|
@@ -334,7 +332,7 @@ use rule start_spark_worker from spark_cluster with:
|
|
334
332
|
image_path=self.config.images_dir / implementation.singularity_image_name,
|
335
333
|
script_cmd=implementation.script_cmd,
|
336
334
|
requires_spark=implementation.requires_spark,
|
337
|
-
|
335
|
+
is_auto_parallel=is_auto_parallel,
|
338
336
|
).write_to_snakefile(self.snakefile_path)
|
339
337
|
|
340
338
|
def _write_checkpoint_rule(self, node_name: str, checkpoint_filepath: str) -> None:
|
@@ -377,7 +375,7 @@ use rule start_spark_worker from spark_cluster with:
|
|
377
375
|
input_files, output_files = self.pipeline_graph.get_io_filepaths(node_name)
|
378
376
|
if len(output_slots) > 1:
|
379
377
|
raise NotImplementedError(
|
380
|
-
"FIXME [MIC-5883] Multiple output slots/files of
|
378
|
+
"FIXME [MIC-5883] Multiple output slots/files of AutoParallelSteps not yet supported"
|
381
379
|
)
|
382
380
|
if len(output_files) > 1:
|
383
381
|
raise ValueError(
|
@@ -388,7 +386,7 @@ use rule start_spark_worker from spark_cluster with:
|
|
388
386
|
output_slot_attrs = list(output_slots.values())[0]
|
389
387
|
if len(output_slot_attrs["filepaths"]) > 1:
|
390
388
|
raise NotImplementedError(
|
391
|
-
"FIXME [MIC-5883] Multiple output slots/files of
|
389
|
+
"FIXME [MIC-5883] Multiple output slots/files of AutoParallelSteps not yet supported"
|
392
390
|
)
|
393
391
|
checkpoint_rule_name = f"checkpoints.{implementation.splitter_node_name}"
|
394
392
|
AggregationRule(
|
@@ -404,7 +402,7 @@ use rule start_spark_worker from spark_cluster with:
|
|
404
402
|
def _get_validations(
|
405
403
|
node_name: str,
|
406
404
|
input_slots: dict[str, dict[str, str | list[str]]],
|
407
|
-
|
405
|
+
is_auto_parallel: bool,
|
408
406
|
) -> tuple[list[str], list[InputValidationRule]]:
|
409
407
|
"""Gets the validation rule and its output filepath for each slot for a given node.
|
410
408
|
|
@@ -423,10 +421,10 @@ use rule start_spark_worker from spark_cluster with:
|
|
423
421
|
validation_rules = []
|
424
422
|
|
425
423
|
for input_slot_name, input_slot_attrs in input_slots.items():
|
426
|
-
#
|
424
|
+
# auto-parallel implementations rely on snakemake wildcards
|
427
425
|
# TODO: [MIC-5787] - need to support multiple wildcards at once
|
428
426
|
validation_file = f"input_validations/{node_name}/{input_slot_name}_validator" + (
|
429
|
-
"-{chunk}" if
|
427
|
+
"-{chunk}" if is_auto_parallel else ""
|
430
428
|
)
|
431
429
|
validation_files.append(validation_file)
|
432
430
|
validation_rules.append(
|
@@ -72,31 +72,26 @@ class PipelineGraph(ImplementationGraph):
|
|
72
72
|
return any([implementation.requires_spark for implementation in self.implementations])
|
73
73
|
|
74
74
|
@property
|
75
|
-
def
|
75
|
+
def any_auto_parallel(self) -> bool:
|
76
76
|
"""Whether or not any :class:`~easylink.implementation.Implementation` is
|
77
|
-
to be run in
|
77
|
+
to be automatically run in parallel."""
|
78
78
|
return any(
|
79
|
-
[
|
80
|
-
self.get_whether_embarrassingly_parallel(node)
|
81
|
-
for node in self.implementation_nodes
|
82
|
-
]
|
79
|
+
[self.get_whether_auto_parallel(node) for node in self.implementation_nodes]
|
83
80
|
)
|
84
81
|
|
85
|
-
def
|
86
|
-
"""Determines whether a node is to be run in
|
82
|
+
def get_whether_auto_parallel(self, node: str) -> dict[str, bool]:
|
83
|
+
"""Determines whether a node is to be automatically run in parallel.
|
87
84
|
|
88
85
|
Parameters
|
89
86
|
----------
|
90
87
|
node
|
91
|
-
The node name to determine whether or not it is to be run in
|
92
|
-
embarrassingly parallel way.
|
88
|
+
The node name to determine whether or not it is to be automatically run in parallel.
|
93
89
|
|
94
90
|
Returns
|
95
91
|
-------
|
96
|
-
A boolean indicating whether the node is to be run in
|
97
|
-
parallel way.
|
92
|
+
A boolean indicating whether the node is to be automatically run in parallel.
|
98
93
|
"""
|
99
|
-
return self.nodes[node]["implementation"].
|
94
|
+
return self.nodes[node]["implementation"].is_auto_parallel
|
100
95
|
|
101
96
|
def get_io_filepaths(self, node: str) -> tuple[list[str], list[str]]:
|
102
97
|
"""Gets all of a node's input and output filepaths from its edges.
|
@@ -482,9 +477,9 @@ class PipelineGraph(ImplementationGraph):
|
|
482
477
|
str(
|
483
478
|
Path("intermediate")
|
484
479
|
/ node
|
485
|
-
#
|
480
|
+
# auto-parallel implementations rely on snakemake wildcards
|
486
481
|
# TODO: [MIC-5787] - need to support multiple wildcards at once
|
487
|
-
/ ("{chunk}" if implementation.
|
482
|
+
/ ("{chunk}" if implementation.is_auto_parallel else "")
|
488
483
|
/ imp_outputs[edge_attrs["output_slot"].name]
|
489
484
|
),
|
490
485
|
)
|
@@ -23,8 +23,8 @@ SCHEMA_PARAMS = {
|
|
23
23
|
"combine_with_iteration": testing.SCHEMA_PARAMS_COMBINE_WITH_ITERATION,
|
24
24
|
"combine_with_iteration_cycle": testing.SCHEMA_PARAMS_COMBINE_WITH_ITERATION,
|
25
25
|
"combine_with_extra_node": testing.SCHEMA_PARAMS_THREE_STEPS,
|
26
|
-
"
|
27
|
-
"
|
28
|
-
"
|
29
|
-
"
|
26
|
+
"looping_auto_parallel_step": testing.SCHEMA_PARAMS_LOOPING_AUTO_PARALLEL_STEP,
|
27
|
+
"auto_parallel_cloneable_step": testing.SCHEMA_PARAMS_AUTO_PARALLEL_CLONEABLE_STEP,
|
28
|
+
"auto_parallel_loop_step": testing.SCHEMA_PARAMS_AUTO_PARALLEL_LOOP_STEP,
|
29
|
+
"auto_parallel_hierarchical_step": testing.SCHEMA_PARAMS_AUTO_PARALLEL_HIERARCHICAL_STEP,
|
30
30
|
}
|
@@ -18,13 +18,13 @@ from easylink.graph_components import (
|
|
18
18
|
OutputSlotMapping,
|
19
19
|
)
|
20
20
|
from easylink.step import (
|
21
|
+
AutoParallelStep,
|
21
22
|
ChoiceStep,
|
22
|
-
|
23
|
+
CloneableStep,
|
23
24
|
HierarchicalStep,
|
24
25
|
InputStep,
|
25
26
|
LoopStep,
|
26
27
|
OutputStep,
|
27
|
-
ParallelStep,
|
28
28
|
Step,
|
29
29
|
)
|
30
30
|
from easylink.utilities.aggregator_utils import concatenate_datasets
|
@@ -33,7 +33,7 @@ from easylink.utilities.validation_utils import validate_input_file_dummy
|
|
33
33
|
|
34
34
|
NODES = [
|
35
35
|
InputStep(),
|
36
|
-
|
36
|
+
CloneableStep(
|
37
37
|
template_step=Step(
|
38
38
|
step_name="step_1",
|
39
39
|
input_slots=[
|
@@ -58,7 +58,7 @@ NODES = [
|
|
58
58
|
output_slots=[OutputSlot("step_2_main_output")],
|
59
59
|
),
|
60
60
|
LoopStep(
|
61
|
-
template_step=
|
61
|
+
template_step=AutoParallelStep(
|
62
62
|
step=Step(
|
63
63
|
step_name="step_3",
|
64
64
|
input_slots=[
|