easylink 0.1.6__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. {easylink-0.1.6 → easylink-0.1.8}/CHANGELOG.rst +10 -0
  2. {easylink-0.1.6 → easylink-0.1.8}/PKG-INFO +1 -1
  3. easylink-0.1.8/docs/source/api_reference/cli.rst +1 -0
  4. easylink-0.1.8/docs/source/api_reference/utilities/aggregator_utils.rst +2 -0
  5. easylink-0.1.8/docs/source/api_reference/utilities/splitter_utils.rst +2 -0
  6. easylink-0.1.8/pyproject.toml +58 -0
  7. easylink-0.1.8/src/easylink/_version.py +1 -0
  8. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/cli.py +18 -9
  9. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/graph_components.py +19 -5
  10. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/implementation.py +2 -0
  11. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/pipeline.py +92 -34
  12. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/pipeline_graph.py +112 -27
  13. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/pipeline_schema.py +7 -7
  14. easylink-0.1.8/src/easylink/pipeline_schema_constants/__init__.py +26 -0
  15. easylink-0.1.8/src/easylink/pipeline_schema_constants/development.py +303 -0
  16. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/pipeline_schema_constants/testing.py +142 -3
  17. easylink-0.1.8/src/easylink/rule.py +476 -0
  18. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/runner.py +1 -0
  19. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/step.py +442 -345
  20. easylink-0.1.8/src/easylink/utilities/__init__.py +9 -0
  21. easylink-0.1.8/src/easylink/utilities/aggregator_utils.py +32 -0
  22. easylink-0.1.8/src/easylink/utilities/data_utils.py +154 -0
  23. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/utilities/general_utils.py +49 -10
  24. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/utilities/paths.py +9 -3
  25. easylink-0.1.8/src/easylink/utilities/splitter_utils.py +72 -0
  26. easylink-0.1.8/src/easylink/utilities/validation_utils.py +52 -0
  27. {easylink-0.1.6 → easylink-0.1.8}/src/easylink.egg-info/PKG-INFO +1 -1
  28. {easylink-0.1.6 → easylink-0.1.8}/src/easylink.egg-info/SOURCES.txt +8 -1
  29. {easylink-0.1.6 → easylink-0.1.8}/tests/conftest.py +1 -0
  30. {easylink-0.1.6 → easylink-0.1.8}/tests/e2e/test_easylink_run.py +1 -0
  31. {easylink-0.1.6 → easylink-0.1.8}/tests/e2e/test_step_types.py +1 -0
  32. {easylink-0.1.6 → easylink-0.1.8}/tests/integration/test_snakemake.py +1 -0
  33. {easylink-0.1.6 → easylink-0.1.8}/tests/integration/test_snakemake_spark.py +1 -0
  34. easylink-0.1.8/tests/specifications/unit/pipeline_combine_two_steps.yaml +16 -0
  35. easylink-0.1.8/tests/specifications/unit/pipeline_combine_with_extra_node.yaml +10 -0
  36. easylink-0.1.8/tests/specifications/unit/pipeline_combine_with_iteration.yaml +11 -0
  37. easylink-0.1.8/tests/specifications/unit/pipeline_combine_with_iteration_cycle.yaml +12 -0
  38. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_type_config_mismatch.yaml +8 -6
  39. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/conftest.py +1 -0
  40. easylink-0.1.8/tests/unit/rule_strings/aggregation_rule.txt +23 -0
  41. easylink-0.1.8/tests/unit/rule_strings/checkpoint_rule.txt +19 -0
  42. easylink-0.1.8/tests/unit/rule_strings/embarrassingly_parallel_rule.txt +19 -0
  43. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/rule_strings/pipeline_local.txt +57 -7
  44. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/rule_strings/pipeline_slurm.txt +57 -7
  45. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/rule_strings/validation_rule.txt +1 -1
  46. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/test_cli.py +1 -0
  47. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/test_config.py +1 -0
  48. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/test_data_utils.py +1 -0
  49. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/test_general_utils.py +1 -0
  50. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/test_implementation.py +1 -0
  51. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/test_pipeline.py +1 -0
  52. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/test_pipeline_graph.py +133 -89
  53. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/test_pipeline_schema.py +6 -1
  54. easylink-0.1.8/tests/unit/test_rule.py +256 -0
  55. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/test_runner.py +1 -0
  56. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/test_step.py +427 -274
  57. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/test_validations.py +1 -1
  58. easylink-0.1.6/.github/workflows/build.yml +0 -60
  59. easylink-0.1.6/pyproject.toml +0 -14
  60. easylink-0.1.6/src/easylink/_version.py +0 -1
  61. easylink-0.1.6/src/easylink/pipeline_schema_constants/__init__.py +0 -12
  62. easylink-0.1.6/src/easylink/pipeline_schema_constants/development.py +0 -288
  63. easylink-0.1.6/src/easylink/rule.py +0 -216
  64. easylink-0.1.6/src/easylink/utilities/__init__.py +0 -8
  65. easylink-0.1.6/src/easylink/utilities/data_utils.py +0 -60
  66. easylink-0.1.6/src/easylink/utilities/validation_utils.py +0 -23
  67. easylink-0.1.6/tests/specifications/unit/pipeline_combine_two_steps.yaml +0 -16
  68. easylink-0.1.6/tests/specifications/unit/pipeline_combine_with_extra_node.yaml +0 -15
  69. easylink-0.1.6/tests/specifications/unit/pipeline_combine_with_iteration.yaml +0 -19
  70. easylink-0.1.6/tests/specifications/unit/pipeline_combine_with_iteration_cycle.yaml +0 -19
  71. easylink-0.1.6/tests/unit/test_rule.py +0 -115
  72. {easylink-0.1.6 → easylink-0.1.8}/.bandit +0 -0
  73. {easylink-0.1.6 → easylink-0.1.8}/.flake8 +0 -0
  74. {easylink-0.1.6 → easylink-0.1.8}/.github/CODEOWNERS +0 -0
  75. {easylink-0.1.6 → easylink-0.1.8}/.github/pull_request_template.md +0 -0
  76. {easylink-0.1.6 → easylink-0.1.8}/.github/workflows/deploy.yml +0 -0
  77. {easylink-0.1.6 → easylink-0.1.8}/.github/workflows/update_readme.yml +0 -0
  78. {easylink-0.1.6 → easylink-0.1.8}/.gitignore +0 -0
  79. {easylink-0.1.6 → easylink-0.1.8}/.readthedocs.yml +0 -0
  80. {easylink-0.1.6 → easylink-0.1.8}/Jenkinsfile +0 -0
  81. {easylink-0.1.6 → easylink-0.1.8}/Makefile +0 -0
  82. {easylink-0.1.6 → easylink-0.1.8}/README.rst +0 -0
  83. {easylink-0.1.6 → easylink-0.1.8}/docs/Makefile +0 -0
  84. {easylink-0.1.6 → easylink-0.1.8}/docs/nitpick-exceptions +0 -0
  85. {easylink-0.1.6 → easylink-0.1.8}/docs/source/_static/style.css +0 -0
  86. {easylink-0.1.6 → easylink-0.1.8}/docs/source/_templates/layout.html +0 -0
  87. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/configuration.rst +0 -0
  88. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/graph_components.rst +0 -0
  89. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/implementation.rst +0 -0
  90. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/index.rst +0 -0
  91. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/pipeline.rst +0 -0
  92. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/pipeline_graph.rst +0 -0
  93. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/pipeline_schema.rst +0 -0
  94. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/pipeline_schema_constants/development.rst +0 -0
  95. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/pipeline_schema_constants/index.rst +0 -0
  96. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/pipeline_schema_constants/testing.rst +0 -0
  97. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/rule.rst +0 -0
  98. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/runner.rst +0 -0
  99. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/step.rst +0 -0
  100. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/utilities/data_utils.rst +0 -0
  101. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/utilities/general_utils.rst +0 -0
  102. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/utilities/index.rst +0 -0
  103. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/utilities/paths.rst +0 -0
  104. {easylink-0.1.6 → easylink-0.1.8}/docs/source/api_reference/utilities/validation_utils.rst +0 -0
  105. {easylink-0.1.6 → easylink-0.1.8}/docs/source/concepts/index.rst +0 -0
  106. {easylink-0.1.6 → easylink-0.1.8}/docs/source/conf.py +0 -0
  107. {easylink-0.1.6 → easylink-0.1.8}/docs/source/glossary.rst +0 -0
  108. {easylink-0.1.6 → easylink-0.1.8}/docs/source/index.rst +0 -0
  109. {easylink-0.1.6 → easylink-0.1.8}/docs/source/user_guide/cli.rst +0 -0
  110. {easylink-0.1.6 → easylink-0.1.8}/docs/source/user_guide/index.rst +0 -0
  111. {easylink-0.1.6 → easylink-0.1.8}/docs/source/user_guide/tutorials/getting_started.rst +0 -0
  112. {easylink-0.1.6 → easylink-0.1.8}/docs/source/user_guide/tutorials/index.rst +0 -0
  113. {easylink-0.1.6 → easylink-0.1.8}/python_versions.json +0 -0
  114. {easylink-0.1.6 → easylink-0.1.8}/pytype.cfg +0 -0
  115. {easylink-0.1.6 → easylink-0.1.8}/setup.cfg +0 -0
  116. {easylink-0.1.6 → easylink-0.1.8}/setup.py +0 -0
  117. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/__about__.py +0 -0
  118. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/__init__.py +0 -0
  119. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/configuration.py +0 -0
  120. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/images/spark_cluster/Dockerfile +0 -0
  121. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/images/spark_cluster/README.md +0 -0
  122. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/implementation_metadata.yaml +0 -0
  123. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/README.md +0 -0
  124. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/build-containers-local.sh +0 -0
  125. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/build-containers-remote.sh +0 -0
  126. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/input_data/create_input_files.ipynb +0 -0
  127. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/input_data/input_file_1.csv +0 -0
  128. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/input_data/input_file_1.parquet +0 -0
  129. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/input_data/input_file_2.csv +0 -0
  130. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/input_data/input_file_2.parquet +0 -0
  131. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/python_pandas/README.md +0 -0
  132. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/python_pandas/dummy_step.py +0 -0
  133. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/python_pandas/python_pandas.def +0 -0
  134. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/python_pyspark/README.md +0 -0
  135. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/python_pyspark/dummy_step.py +0 -0
  136. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/python_pyspark/python_pyspark.def +0 -0
  137. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/r/README.md +0 -0
  138. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/r/dummy_step.R +0 -0
  139. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/r/r-image.def +0 -0
  140. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/steps/dev/test.py +0 -0
  141. {easylink-0.1.6 → easylink-0.1.8}/src/easylink/utilities/spark.smk +0 -0
  142. {easylink-0.1.6 → easylink-0.1.8}/src/easylink.egg-info/dependency_links.txt +0 -0
  143. {easylink-0.1.6 → easylink-0.1.8}/src/easylink.egg-info/entry_points.txt +0 -0
  144. {easylink-0.1.6 → easylink-0.1.8}/src/easylink.egg-info/not-zip-safe +0 -0
  145. {easylink-0.1.6 → easylink-0.1.8}/src/easylink.egg-info/requires.txt +0 -0
  146. {easylink-0.1.6 → easylink-0.1.8}/src/easylink.egg-info/top_level.txt +0 -0
  147. {easylink-0.1.6 → easylink-0.1.8}/tests/__init__.py +0 -0
  148. {easylink-0.1.6 → easylink-0.1.8}/tests/integration/test_snakemake_slurm.py +0 -0
  149. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/common/environment_local.yaml +0 -0
  150. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/common/input_data.yaml +0 -0
  151. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/common/pipeline.yaml +0 -0
  152. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/e2e/environment_slurm.yaml +0 -0
  153. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/e2e/pipeline.yaml +0 -0
  154. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/e2e/pipeline_expanded.yaml +0 -0
  155. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/integration/environment_spark_slurm.yaml +0 -0
  156. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/integration/pipeline.yaml +0 -0
  157. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/integration/pipeline_spark.yaml +0 -0
  158. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/environment_minimum.yaml +0 -0
  159. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/environment_spark_slurm.yaml +0 -0
  160. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline.yaml +0 -0
  161. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_bad_combined_implementations.yaml +0 -0
  162. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_bad_implementation.yaml +0 -0
  163. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_bad_loop_formatting.yaml +0 -0
  164. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_bad_step.yaml +0 -0
  165. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_bad_type_key.yaml +0 -0
  166. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_combine_bad_implementation_names.yaml +0 -0
  167. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_combine_bad_topology.yaml +0 -0
  168. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_combine_with_missing_node.yaml +0 -0
  169. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_combine_with_parallel.yaml +0 -0
  170. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_missing_implementation_name.yaml +0 -0
  171. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_missing_implementations.yaml +0 -0
  172. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_missing_loop_nodes.yaml +0 -0
  173. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_missing_step.yaml +0 -0
  174. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_missing_substeps.yaml +0 -0
  175. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_missing_type_key.yaml +0 -0
  176. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_nested_templated_steps.yaml +0 -0
  177. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_out_of_order.yaml +0 -0
  178. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_spark.yaml +0 -0
  179. {easylink-0.1.6 → easylink-0.1.8}/tests/specifications/unit/pipeline_wrong_parallel_split_keys.yaml +0 -0
  180. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/__init__.py +0 -0
  181. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/rule_strings/implemented_rule_local.txt +0 -0
  182. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/rule_strings/implemented_rule_slurm.txt +0 -0
  183. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/rule_strings/target_rule.txt +0 -0
  184. {easylink-0.1.6 → easylink-0.1.8}/tests/unit/test_graph_components.py +0 -0
  185. {easylink-0.1.6 → easylink-0.1.8}/update_readme.py +0 -0
@@ -1,3 +1,13 @@
1
+ **0.1.8 - 3/13/25**
2
+
3
+ - Refactor subgraph logic from Step to HierarchicalStep
4
+ - Refactor ChoiceStep so that each choice requires a single Step instead of nodes/edges
5
+ - Standardize the passing around of configurations to be step config instead of parent config
6
+
7
+ **0.1.7 - 2/26/25**
8
+
9
+ - Implement initial embarrassingly/auto-parallel step support
10
+
1
11
  **0.1.6 - 2/21/25**
2
12
 
3
13
  - Move test dictionaries to yaml files
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: easylink
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: Research repository for the EasyLink ER ecosystem project.
5
5
  Home-page: https://github.com/ihmeuw/easylink
6
6
  Author: The EasyLink developers
@@ -0,0 +1 @@
1
+ .. automodule:: easylink.cli
@@ -0,0 +1,2 @@
1
+ .. automodule:: easylink.utilities.aggregator_utils
2
+
@@ -0,0 +1,2 @@
1
+ .. automodule:: easylink.utilities.splitter_utils
2
+
@@ -0,0 +1,58 @@
1
+ [build-system]
2
+ requires = ["packaging", "setuptools"]
3
+
4
+ [tool.black]
5
+ line_length = 94
6
+
7
+ [tool.isort]
8
+ profile = "black"
9
+
10
+ [tool.coverage.run]
11
+ source = ["easylink"]
12
+
13
+ [tool.coverage.report]
14
+ show_missing = true
15
+
16
+ [tool.mypy]
17
+ # This is the global mypy configuration.
18
+ strict = true # See all the enabled flags `mypy --help | grep -A 10 'Strict mode'`
19
+ disallow_any_unimported = false
20
+ implicit_reexport = true
21
+ exclude = [
22
+ # Files below here should have their errors fixed and then be removed from this list
23
+ # You may need to remove a `# mypy: ignore-errors` comment from the file heading as well
24
+ 'docs/source/conf.py',
25
+ 'setup.py',
26
+ 'src/easylink/configuration.py',
27
+ 'src/easylink/graph_components.py',
28
+ 'src/easylink/implementation.py',
29
+ 'src/easylink/pipeline_graph.py',
30
+ 'src/easylink/pipeline.py',
31
+ 'src/easylink/pipeline_schema.py',
32
+ 'src/easylink/rule.py',
33
+ 'src/easylink/runner.py',
34
+ 'src/easylink/step.py',
35
+ 'src/easylink/steps/dev/python_pandas/dummy_step.py',
36
+ 'src/easylink/steps/dev/python_pyspark/dummy_step.py',
37
+ 'src/easylink/steps/dev/test.py',
38
+ 'src/easylink/utilities/aggregator_utils.py',
39
+ 'src/easylink/utilities/splitter_utils.py',
40
+ 'src/easylink/utilities/validation_utils.py',
41
+ 'tests/integration/test_snakemake_slurm.py',
42
+ 'tests/unit/test_graph_components.py',
43
+ 'tests/unit/test_pipeline_graph.py',
44
+ 'tests/unit/test_pipeline_schema.py',
45
+ 'tests/unit/test_step.py',
46
+ 'tests/unit/test_validations.py',
47
+ ]
48
+
49
+ # ignore specific error codes
50
+ disable_error_code = []
51
+
52
+ # handle mypy errors when 3rd party packages are not typed.
53
+ [[tool.mypy.overrides]]
54
+ module = [
55
+ # "scipy.*",
56
+ # "ipywidgets.*",
57
+ ]
58
+ ignore_missing_imports = true
@@ -0,0 +1 @@
1
+ __version__ = "0.1.8"
@@ -1,3 +1,4 @@
1
+ # mypy: ignore-errors
1
2
  """
2
3
  ======================
3
4
  Command Line Interface
@@ -86,6 +87,16 @@ SHARED_OPTIONS = [
86
87
  default=False,
87
88
  help="Do not save the results in a timestamped sub-directory of ``--output-dir``.",
88
89
  ),
90
+ click.option(
91
+ "-v", "--verbose", count=True, help="Increase logging verbosity.", hidden=True
92
+ ),
93
+ click.option(
94
+ "--pdb",
95
+ "with_debugger",
96
+ is_flag=True,
97
+ help="Drop into python debugger if an error occurs.",
98
+ hidden=True,
99
+ ),
89
100
  ]
90
101
 
91
102
 
@@ -128,14 +139,6 @@ def easylink():
128
139
  "the pipeline will be run locally."
129
140
  ),
130
141
  )
131
- @click.option("-v", "--verbose", count=True, help="Increase logging verbosity.", hidden=True)
132
- @click.option(
133
- "--pdb",
134
- "with_debugger",
135
- is_flag=True,
136
- help="Drop into python debugger if an error occurs.",
137
- hidden=True,
138
- )
139
142
  def run(
140
143
  pipeline_specification: str,
141
144
  input_data: str,
@@ -177,17 +180,23 @@ def generate_dag(
177
180
  input_data: str,
178
181
  output_dir: str | None,
179
182
  no_timestamp: bool,
183
+ verbose: int,
184
+ with_debugger: bool,
180
185
  ) -> None:
181
186
  """Generates an image of the proposed pipeline directed acyclic graph (DAG).
182
187
 
183
188
  This command only generates the DAG image of the pipeline; it does not actually
184
189
  run it. To run the pipeline, use the ``easylink run`` command.
185
190
  """
191
+ configure_logging_to_terminal(verbose)
186
192
  logger.info("Generating DAG")
187
193
  results_dir = get_results_directory(output_dir, no_timestamp).as_posix()
188
194
  logger.info(f"Results directory: {results_dir}")
189
195
  # TODO [MIC-4493]: Add configuration validation
190
- runner.main(
196
+ main = handle_exceptions(
197
+ func=runner.main, exceptions_logger=logger, with_debugger=with_debugger
198
+ )
199
+ main(
191
200
  command="generate_dag",
192
201
  pipeline_specification=pipeline_specification,
193
202
  input_data=input_data,
@@ -13,7 +13,7 @@ from __future__ import annotations
13
13
  from abc import ABC, abstractmethod
14
14
  from collections.abc import Callable
15
15
  from dataclasses import dataclass
16
- from typing import TYPE_CHECKING
16
+ from typing import TYPE_CHECKING, Any
17
17
 
18
18
  import networkx as nx
19
19
 
@@ -45,8 +45,13 @@ class InputSlot:
45
45
  """A function that validates the input data being passed into the pipeline via
46
46
  this ``InputSlot``. If the data is invalid, the function should raise an exception
47
47
  with a descriptive error message which will then be reported to the user.
48
- **Note that the function must be defined in the** :mod:`easylink.utilities.validation_utils`
48
+ **Note that the function *must* be defined in the** :mod:`easylink.utilities.validation_utils`
49
49
  **module!**"""
50
+ splitter: Callable[[list[str], str, Any], None] | None = None
51
+ """A function that splits the incoming data to this ``InputSlot`` into smaller
52
+ pieces. The primary purpose of this functionality is to run sections of the
53
+ pipeline in an embarrassingly parallel manner. **Note that the function *must*
54
+ be defined in the **:mod:`easylink.utilities.splitter_utils`** module!**"""
50
55
 
51
56
 
52
57
  @dataclass(frozen=True)
@@ -70,6 +75,11 @@ class OutputSlot:
70
75
 
71
76
  name: str
72
77
  """The name of the ``OutputSlot``."""
78
+ aggregator: Callable[[list[str], str], None] = None
79
+ """A function that aggregates all of the generated data to be passed out via this
80
+ ``OutputSlot``. The primary purpose of this functionality is to run sections
81
+ of the pipeline in an embarrassingly parallel manner. **Note that the function
82
+ *must* be defined in the **:py:mod:`easylink.utilities.aggregator_utils`** module!**"""
73
83
 
74
84
 
75
85
  @dataclass(frozen=True)
@@ -249,9 +259,13 @@ class SlotMapping(ABC):
249
259
  """A mapping between a slot on a parent node and a slot on one of its child nodes.
250
260
 
251
261
  ``SlotMapping`` is an interface intended to be used by concrete :class:`InputSlotMapping`
252
- and :class:`OutputSlotMapping` classes. It represents a mapping between
253
- parent and child nodes at different levels of a potentially-nested
254
- :class:`~easylink.pipeline_schema.PipelineSchema`.
262
+ and :class:`OutputSlotMapping` classes to represent a mapping between parent
263
+ and child nodes at different levels of a potentially-nested graph. Specifically,
264
+ they are used to (1) remap edges between parent and child nodes in a
265
+ :class:`~easylink.pipeline_schema.PipelineSchema` and (2) map a leaf
266
+ :class:`Step's<easylink.step.Step>` slots to the corresponding
267
+ :class:`~easylink.implementation.Implementation` slots when building the
268
+ :class:`~easylink.graph_components.ImplementationGraph`.
255
269
 
256
270
  Notes
257
271
  -----
@@ -45,6 +45,7 @@ class Implementation:
45
45
  implementation_config: LayeredConfigTree,
46
46
  input_slots: Iterable["InputSlot"] = (),
47
47
  output_slots: Iterable["OutputSlot"] = (),
48
+ is_embarrassingly_parallel: bool = False,
48
49
  ):
49
50
  self.name = implementation_config.name
50
51
  """The name of this ``Implementation``."""
@@ -63,6 +64,7 @@ class Implementation:
63
64
  implemented by this particular ``Implementation``."""
64
65
  self.requires_spark = self._metadata.get("requires_spark", False)
65
66
  """Whether this ``Implementation`` requires a Spark environment."""
67
+ self.is_embarrassingly_parallel = is_embarrassingly_parallel
66
68
 
67
69
  def __repr__(self) -> str:
68
70
  return f"Implementation.{self.name}"
@@ -16,7 +16,13 @@ from loguru import logger
16
16
 
17
17
  from easylink.configuration import Config
18
18
  from easylink.pipeline_graph import PipelineGraph
19
- from easylink.rule import ImplementedRule, InputValidationRule, TargetRule
19
+ from easylink.rule import (
20
+ AggregationRule,
21
+ CheckpointRule,
22
+ ImplementedRule,
23
+ InputValidationRule,
24
+ TargetRule,
25
+ )
20
26
  from easylink.utilities.general_utils import exit_with_validation_error
21
27
  from easylink.utilities.paths import SPARK_SNAKEFILE
22
28
  from easylink.utilities.validation_utils import validate_input_file_dummy
@@ -40,13 +46,17 @@ class Pipeline:
40
46
  The :class:`~easylink.pipeline_graph.PipelineGraph` object.
41
47
  spark_is_required
42
48
  A boolean indicating whether the pipeline requires Spark.
49
+ any_embarrassingly_parallel
50
+ A boolean indicating whether any implementation in the pipeline is to be
51
+ run in an embarrassingly parallel manner.
43
52
 
44
53
  """
45
54
 
46
55
  def __init__(self, config: Config):
47
56
  self.config = config
48
57
  self.pipeline_graph = PipelineGraph(config)
49
- self.spark_is_required = self.pipeline_graph.spark_is_required()
58
+ self.spark_is_required = self.pipeline_graph.spark_is_required
59
+ self.any_embarrassingly_parallel = self.pipeline_graph.any_embarrassingly_parallel
50
60
 
51
61
  # TODO [MIC-4880]: refactor into validation object
52
62
  self._validate()
@@ -79,10 +89,10 @@ class Pipeline:
79
89
  logger.warning("Snakefile already exists, overwriting.")
80
90
  self.snakefile_path.unlink()
81
91
  self._write_imports()
82
- self._write_config()
92
+ self._write_wildcard_constraints()
93
+ self._write_spark_config()
83
94
  self._write_target_rules()
84
- if self.spark_is_required:
85
- self._write_spark_module()
95
+ self._write_spark_module()
86
96
  for node in self.pipeline_graph.implementation_nodes:
87
97
  self._write_implementation_rules(node)
88
98
  return self.snakefile_path
@@ -121,26 +131,35 @@ class Pipeline:
121
131
  return errors
122
132
 
123
133
  def _write_imports(self) -> None:
124
- """Writes the necessary imports to the Snakefile."""
125
- with open(self.snakefile_path, "a") as f:
126
- f.write("from easylink.utilities import validation_utils")
134
+ if not self.any_embarrassingly_parallel:
135
+ imports = "from easylink.utilities import validation_utils\n"
136
+ else:
137
+ imports = """import glob
138
+ import os
127
139
 
128
- def _write_config(self) -> None:
129
- """Writes configuration settings to the Snakefile.
140
+ from snakemake.exceptions import IncompleteCheckpointException
141
+ from snakemake.io import checkpoint_target
130
142
 
131
- Notes
132
- -----
133
- This is currently only applicable for spark-dependent pipelines.
134
- """
143
+ from easylink.utilities import aggregator_utils, splitter_utils, validation_utils\n"""
135
144
  with open(self.snakefile_path, "a") as f:
136
- if self.spark_is_required:
145
+ f.write(imports)
146
+
147
+ def _write_wildcard_constraints(self) -> None:
148
+ if self.any_embarrassingly_parallel:
149
+ with open(self.snakefile_path, "a") as f:
137
150
  f.write(
138
- f"\nscattergather:\n\tnum_workers={self.config.spark_resources['num_workers']},"
151
+ """
152
+ wildcard_constraints:
153
+ # never include '/' since those are reserved for filepaths
154
+ chunk="[^/]+",\n"""
139
155
  )
140
156
 
141
157
  def _write_target_rules(self) -> None:
142
- """Writes the rule for the final output and its validation."""
143
- ## The "input" files to the result node/the target rule are the final output themselves.
158
+ """Writes the rule for the final output and its validation.
159
+
160
+ The input files to the the target rule (i.e. the result node) are the final
161
+ output themselves.
162
+ """
144
163
  final_output, _ = self.pipeline_graph.get_io_filepaths("results")
145
164
  validator_file = str("input_validations/final_validator")
146
165
  # Snakemake resolves the DAG based on the first rule, so we put the target
@@ -152,7 +171,7 @@ class Pipeline:
152
171
  )
153
172
  final_validation = InputValidationRule(
154
173
  name="results",
155
- slot_name="main_input",
174
+ input_slot_name="main_input",
156
175
  input=final_output,
157
176
  output=validator_file,
158
177
  validator=validate_input_file_dummy,
@@ -160,12 +179,26 @@ class Pipeline:
160
179
  target_rule.write_to_snakefile(self.snakefile_path)
161
180
  final_validation.write_to_snakefile(self.snakefile_path)
162
181
 
182
+ def _write_spark_config(self) -> None:
183
+ """Writes configuration settings to the Snakefile.
184
+
185
+ Notes
186
+ -----
187
+ This is currently only applicable for spark-dependent pipelines.
188
+ """
189
+ if self.spark_is_required:
190
+ with open(self.snakefile_path, "a") as f:
191
+ f.write(
192
+ f"\nscattergather:\n\tnum_workers={self.config.spark_resources['num_workers']},"
193
+ )
194
+
163
195
  def _write_spark_module(self) -> None:
164
196
  """Inserts the ``easylink.utilities.spark.smk`` Snakemake module into the Snakefile."""
197
+ if not self.spark_is_required:
198
+ return
165
199
  slurm_resources = self.config.slurm_resources
166
200
  spark_resources = self.config.spark_resources
167
- with open(self.snakefile_path, "a") as f:
168
- module = f"""
201
+ module = f"""
169
202
  module spark_cluster:
170
203
  snakefile: '{SPARK_SNAKEFILE}'
171
204
  config: config
@@ -173,8 +206,8 @@ module spark_cluster:
173
206
  use rule * from spark_cluster
174
207
  use rule terminate_spark from spark_cluster with:
175
208
  input: rules.all.input.final_output"""
176
- if self.config.computing_environment == "slurm":
177
- module += f"""
209
+ if self.config.computing_environment == "slurm":
210
+ module += f"""
178
211
  use rule start_spark_master from spark_cluster with:
179
212
  resources:
180
213
  slurm_account={slurm_resources['slurm_account']},
@@ -195,21 +228,49 @@ use rule start_spark_worker from spark_cluster with:
195
228
  terminate_file_name=rules.terminate_spark.output,
196
229
  user=os.environ["USER"],
197
230
  cores={spark_resources['cpus_per_task']},
198
- memory={spark_resources['mem_mb']}
199
- """
231
+ memory={spark_resources['mem_mb']}"""
232
+
233
+ with open(self.snakefile_path, "a") as f:
200
234
  f.write(module)
201
235
 
202
236
  def _write_implementation_rules(self, node_name: str) -> None:
203
237
  """Writes the rules for each :class:`~easylink.implementation.Implementation`.
204
238
 
239
+ This method writes *all* rules required for a given ``Implementation``,
240
+ e.g. splitters and aggregators (if necessary), validations, and the actual
241
+ rule to run the container itself.
242
+
205
243
  Parameters
206
244
  ----------
207
245
  node_name
208
246
  The name of the ``Implementation`` to write the rule(s) for.
209
247
  """
210
- implementation = self.pipeline_graph.nodes[node_name]["implementation"]
248
+
249
+ input_slots, output_slots = self.pipeline_graph.get_io_slot_attributes(node_name)
250
+ validation_files, validation_rules = self._get_validations(node_name, input_slots)
251
+ for validation_rule in validation_rules:
252
+ validation_rule.write_to_snakefile(self.snakefile_path)
253
+
211
254
  _input_files, output_files = self.pipeline_graph.get_io_filepaths(node_name)
212
- input_slots = self.pipeline_graph.get_input_slot_attributes(node_name)
255
+ is_embarrassingly_parallel = self.pipeline_graph.get_whether_embarrassingly_parallel(
256
+ node_name
257
+ )
258
+ if is_embarrassingly_parallel:
259
+ CheckpointRule(
260
+ name=node_name,
261
+ input_slots=input_slots,
262
+ validations=validation_files,
263
+ output=output_files,
264
+ ).write_to_snakefile(self.snakefile_path)
265
+ for name, attrs in output_slots.items():
266
+ AggregationRule(
267
+ name=node_name,
268
+ input_slots=input_slots,
269
+ output_slot_name=name,
270
+ output_slot=attrs,
271
+ ).write_to_snakefile(self.snakefile_path)
272
+
273
+ implementation = self.pipeline_graph.nodes[node_name]["implementation"]
213
274
  diagnostics_dir = Path("diagnostics") / node_name
214
275
  diagnostics_dir.mkdir(parents=True, exist_ok=True)
215
276
  resources = (
@@ -217,8 +278,7 @@ use rule start_spark_worker from spark_cluster with:
217
278
  if self.config.computing_environment == "slurm"
218
279
  else None
219
280
  )
220
- validation_files, validation_rules = self._get_validations(node_name, input_slots)
221
- implementation_rule = ImplementedRule(
281
+ ImplementedRule(
222
282
  name=node_name,
223
283
  step_name=" and ".join(implementation.metadata_steps),
224
284
  implementation_name=implementation.name,
@@ -231,10 +291,8 @@ use rule start_spark_worker from spark_cluster with:
231
291
  image_path=implementation.singularity_image_path,
232
292
  script_cmd=implementation.script_cmd,
233
293
  requires_spark=implementation.requires_spark,
234
- )
235
- for validation_rule in validation_rules:
236
- validation_rule.write_to_snakefile(self.snakefile_path)
237
- implementation_rule.write_to_snakefile(self.snakefile_path)
294
+ is_embarrassingly_parallel=is_embarrassingly_parallel,
295
+ ).write_to_snakefile(self.snakefile_path)
238
296
 
239
297
  @staticmethod
240
298
  def _get_validations(
@@ -262,7 +320,7 @@ use rule start_spark_worker from spark_cluster with:
262
320
  validation_rules.append(
263
321
  InputValidationRule(
264
322
  name=node_name,
265
- slot_name=input_slot_name,
323
+ input_slot_name=input_slot_name,
266
324
  input=input_slot_attrs["filepaths"],
267
325
  output=validation_file,
268
326
  validator=input_slot_attrs["validator"],