easylink 0.1.8__tar.gz → 0.1.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. {easylink-0.1.8 → easylink-0.1.10}/CHANGELOG.rst +8 -0
  2. {easylink-0.1.8/src/easylink.egg-info → easylink-0.1.10}/PKG-INFO +2 -2
  3. {easylink-0.1.8 → easylink-0.1.10}/docs/source/concepts/index.rst +7 -1
  4. easylink-0.1.10/docs/source/concepts/pipeline_schema/index.rst +47 -0
  5. {easylink-0.1.8 → easylink-0.1.10}/pyproject.toml +2 -0
  6. easylink-0.1.10/src/easylink/_version.py +1 -0
  7. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/graph_components.py +55 -6
  8. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/pipeline_schema_constants/development.py +17 -15
  9. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/step.py +12 -6
  10. {easylink-0.1.8 → easylink-0.1.10/src/easylink.egg-info}/PKG-INFO +2 -2
  11. {easylink-0.1.8 → easylink-0.1.10}/src/easylink.egg-info/SOURCES.txt +1 -0
  12. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/test_graph_components.py +112 -1
  13. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/test_pipeline_graph.py +24 -6
  14. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/test_step.py +17 -13
  15. easylink-0.1.8/src/easylink/_version.py +0 -1
  16. {easylink-0.1.8 → easylink-0.1.10}/.bandit +0 -0
  17. {easylink-0.1.8 → easylink-0.1.10}/.flake8 +0 -0
  18. {easylink-0.1.8 → easylink-0.1.10}/.github/CODEOWNERS +0 -0
  19. {easylink-0.1.8 → easylink-0.1.10}/.github/pull_request_template.md +0 -0
  20. {easylink-0.1.8 → easylink-0.1.10}/.github/workflows/deploy.yml +0 -0
  21. {easylink-0.1.8 → easylink-0.1.10}/.github/workflows/update_readme.yml +0 -0
  22. {easylink-0.1.8 → easylink-0.1.10}/.gitignore +0 -0
  23. {easylink-0.1.8 → easylink-0.1.10}/.readthedocs.yml +0 -0
  24. {easylink-0.1.8 → easylink-0.1.10}/Jenkinsfile +0 -0
  25. {easylink-0.1.8 → easylink-0.1.10}/Makefile +0 -0
  26. {easylink-0.1.8 → easylink-0.1.10}/README.rst +0 -0
  27. {easylink-0.1.8 → easylink-0.1.10}/docs/Makefile +0 -0
  28. {easylink-0.1.8 → easylink-0.1.10}/docs/nitpick-exceptions +0 -0
  29. {easylink-0.1.8 → easylink-0.1.10}/docs/source/_static/style.css +0 -0
  30. {easylink-0.1.8 → easylink-0.1.10}/docs/source/_templates/layout.html +0 -0
  31. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/cli.rst +0 -0
  32. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/configuration.rst +0 -0
  33. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/graph_components.rst +0 -0
  34. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/implementation.rst +0 -0
  35. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/index.rst +0 -0
  36. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/pipeline.rst +0 -0
  37. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/pipeline_graph.rst +0 -0
  38. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/pipeline_schema.rst +0 -0
  39. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/pipeline_schema_constants/development.rst +0 -0
  40. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/pipeline_schema_constants/index.rst +0 -0
  41. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/pipeline_schema_constants/testing.rst +0 -0
  42. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/rule.rst +0 -0
  43. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/runner.rst +0 -0
  44. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/step.rst +0 -0
  45. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/utilities/aggregator_utils.rst +0 -0
  46. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/utilities/data_utils.rst +0 -0
  47. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/utilities/general_utils.rst +0 -0
  48. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/utilities/index.rst +0 -0
  49. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/utilities/paths.rst +0 -0
  50. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/utilities/splitter_utils.rst +0 -0
  51. {easylink-0.1.8 → easylink-0.1.10}/docs/source/api_reference/utilities/validation_utils.rst +0 -0
  52. {easylink-0.1.8 → easylink-0.1.10}/docs/source/conf.py +0 -0
  53. {easylink-0.1.8 → easylink-0.1.10}/docs/source/glossary.rst +0 -0
  54. {easylink-0.1.8 → easylink-0.1.10}/docs/source/index.rst +0 -0
  55. {easylink-0.1.8 → easylink-0.1.10}/docs/source/user_guide/cli.rst +0 -0
  56. {easylink-0.1.8 → easylink-0.1.10}/docs/source/user_guide/index.rst +0 -0
  57. {easylink-0.1.8 → easylink-0.1.10}/docs/source/user_guide/tutorials/getting_started.rst +0 -0
  58. {easylink-0.1.8 → easylink-0.1.10}/docs/source/user_guide/tutorials/index.rst +0 -0
  59. {easylink-0.1.8 → easylink-0.1.10}/python_versions.json +0 -0
  60. {easylink-0.1.8 → easylink-0.1.10}/pytype.cfg +0 -0
  61. {easylink-0.1.8 → easylink-0.1.10}/setup.cfg +0 -0
  62. {easylink-0.1.8 → easylink-0.1.10}/setup.py +0 -0
  63. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/__about__.py +0 -0
  64. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/__init__.py +0 -0
  65. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/cli.py +0 -0
  66. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/configuration.py +0 -0
  67. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/images/spark_cluster/Dockerfile +0 -0
  68. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/images/spark_cluster/README.md +0 -0
  69. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/implementation.py +0 -0
  70. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/implementation_metadata.yaml +0 -0
  71. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/pipeline.py +0 -0
  72. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/pipeline_graph.py +0 -0
  73. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/pipeline_schema.py +0 -0
  74. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/pipeline_schema_constants/__init__.py +0 -0
  75. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/pipeline_schema_constants/testing.py +0 -0
  76. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/rule.py +0 -0
  77. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/runner.py +0 -0
  78. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/README.md +0 -0
  79. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/build-containers-local.sh +0 -0
  80. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/build-containers-remote.sh +0 -0
  81. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/input_data/create_input_files.ipynb +0 -0
  82. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/input_data/input_file_1.csv +0 -0
  83. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/input_data/input_file_1.parquet +0 -0
  84. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/input_data/input_file_2.csv +0 -0
  85. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/input_data/input_file_2.parquet +0 -0
  86. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/python_pandas/README.md +0 -0
  87. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/python_pandas/dummy_step.py +0 -0
  88. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/python_pandas/python_pandas.def +0 -0
  89. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/python_pyspark/README.md +0 -0
  90. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/python_pyspark/dummy_step.py +0 -0
  91. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/python_pyspark/python_pyspark.def +0 -0
  92. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/r/README.md +0 -0
  93. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/r/dummy_step.R +0 -0
  94. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/r/r-image.def +0 -0
  95. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/steps/dev/test.py +0 -0
  96. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/utilities/__init__.py +0 -0
  97. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/utilities/aggregator_utils.py +0 -0
  98. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/utilities/data_utils.py +0 -0
  99. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/utilities/general_utils.py +0 -0
  100. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/utilities/paths.py +0 -0
  101. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/utilities/spark.smk +0 -0
  102. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/utilities/splitter_utils.py +0 -0
  103. {easylink-0.1.8 → easylink-0.1.10}/src/easylink/utilities/validation_utils.py +0 -0
  104. {easylink-0.1.8 → easylink-0.1.10}/src/easylink.egg-info/dependency_links.txt +0 -0
  105. {easylink-0.1.8 → easylink-0.1.10}/src/easylink.egg-info/entry_points.txt +0 -0
  106. {easylink-0.1.8 → easylink-0.1.10}/src/easylink.egg-info/not-zip-safe +0 -0
  107. {easylink-0.1.8 → easylink-0.1.10}/src/easylink.egg-info/requires.txt +0 -0
  108. {easylink-0.1.8 → easylink-0.1.10}/src/easylink.egg-info/top_level.txt +0 -0
  109. {easylink-0.1.8 → easylink-0.1.10}/tests/__init__.py +0 -0
  110. {easylink-0.1.8 → easylink-0.1.10}/tests/conftest.py +0 -0
  111. {easylink-0.1.8 → easylink-0.1.10}/tests/e2e/test_easylink_run.py +0 -0
  112. {easylink-0.1.8 → easylink-0.1.10}/tests/e2e/test_step_types.py +0 -0
  113. {easylink-0.1.8 → easylink-0.1.10}/tests/integration/test_snakemake.py +0 -0
  114. {easylink-0.1.8 → easylink-0.1.10}/tests/integration/test_snakemake_slurm.py +0 -0
  115. {easylink-0.1.8 → easylink-0.1.10}/tests/integration/test_snakemake_spark.py +0 -0
  116. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/common/environment_local.yaml +0 -0
  117. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/common/input_data.yaml +0 -0
  118. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/common/pipeline.yaml +0 -0
  119. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/e2e/environment_slurm.yaml +0 -0
  120. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/e2e/pipeline.yaml +0 -0
  121. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/e2e/pipeline_expanded.yaml +0 -0
  122. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/integration/environment_spark_slurm.yaml +0 -0
  123. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/integration/pipeline.yaml +0 -0
  124. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/integration/pipeline_spark.yaml +0 -0
  125. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/environment_minimum.yaml +0 -0
  126. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/environment_spark_slurm.yaml +0 -0
  127. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline.yaml +0 -0
  128. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_bad_combined_implementations.yaml +0 -0
  129. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_bad_implementation.yaml +0 -0
  130. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_bad_loop_formatting.yaml +0 -0
  131. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_bad_step.yaml +0 -0
  132. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_bad_type_key.yaml +0 -0
  133. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_combine_bad_implementation_names.yaml +0 -0
  134. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_combine_bad_topology.yaml +0 -0
  135. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_combine_two_steps.yaml +0 -0
  136. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_combine_with_extra_node.yaml +0 -0
  137. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_combine_with_iteration.yaml +0 -0
  138. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_combine_with_iteration_cycle.yaml +0 -0
  139. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_combine_with_missing_node.yaml +0 -0
  140. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_combine_with_parallel.yaml +0 -0
  141. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_missing_implementation_name.yaml +0 -0
  142. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_missing_implementations.yaml +0 -0
  143. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_missing_loop_nodes.yaml +0 -0
  144. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_missing_step.yaml +0 -0
  145. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_missing_substeps.yaml +0 -0
  146. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_missing_type_key.yaml +0 -0
  147. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_nested_templated_steps.yaml +0 -0
  148. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_out_of_order.yaml +0 -0
  149. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_spark.yaml +0 -0
  150. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_type_config_mismatch.yaml +0 -0
  151. {easylink-0.1.8 → easylink-0.1.10}/tests/specifications/unit/pipeline_wrong_parallel_split_keys.yaml +0 -0
  152. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/__init__.py +0 -0
  153. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/conftest.py +0 -0
  154. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/rule_strings/aggregation_rule.txt +0 -0
  155. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/rule_strings/checkpoint_rule.txt +0 -0
  156. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/rule_strings/embarrassingly_parallel_rule.txt +0 -0
  157. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/rule_strings/implemented_rule_local.txt +0 -0
  158. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/rule_strings/implemented_rule_slurm.txt +0 -0
  159. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/rule_strings/pipeline_local.txt +0 -0
  160. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/rule_strings/pipeline_slurm.txt +0 -0
  161. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/rule_strings/target_rule.txt +0 -0
  162. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/rule_strings/validation_rule.txt +0 -0
  163. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/test_cli.py +0 -0
  164. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/test_config.py +0 -0
  165. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/test_data_utils.py +0 -0
  166. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/test_general_utils.py +0 -0
  167. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/test_implementation.py +0 -0
  168. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/test_pipeline.py +0 -0
  169. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/test_pipeline_schema.py +0 -0
  170. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/test_rule.py +0 -0
  171. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/test_runner.py +0 -0
  172. {easylink-0.1.8 → easylink-0.1.10}/tests/unit/test_validations.py +0 -0
  173. {easylink-0.1.8 → easylink-0.1.10}/update_readme.py +0 -0
@@ -1,3 +1,11 @@
1
+ **0.1.10 - 3/25/25**
2
+
3
+ - Make InputSlots and OutputSlots mutable
4
+
5
+ **0.1.9 - 3/14/25**
6
+
7
+ - Refactor EmbarrassinglyParallelStep to require a Step during construction
8
+
1
9
  **0.1.8 - 3/13/25**
2
10
 
3
11
  - Refactor subgraph logic from Step to HierarchicalStep
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: easylink
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Research repository for the EasyLink ER ecosystem project.
5
5
  Home-page: https://github.com/ihmeuw/easylink
6
6
  Author: The EasyLink developers
@@ -5,6 +5,12 @@ Concepts
5
5
  ========
6
6
  Here we cover several core conceptual topics related using EasyLink.
7
7
 
8
+ .. toctree::
9
+ :maxdepth: 1
10
+ :glob:
11
+
12
+ */index
13
+
8
14
  .. todo::
9
15
 
10
- Everything
16
+ Almost everything
@@ -0,0 +1,47 @@
1
+ .. _pipeline_schema:
2
+
3
+ Pipeline Schema
4
+ ===============
5
+
6
+ Motivation
7
+ ----------
8
+
9
+ Imagine the Census Bureau has an entity resolution pipeline that links people between datasets.
10
+ One step in this pipeline, called *blocking*, rules out comparing certain records with each other
11
+ in order to focus only on the pairs of records that might really be links.
12
+ The current pipeline uses a simple blocking mechanism,
13
+ which won't compare two records unless they match exactly on any of a few key attributes.
14
+ Census wants to explore whether using more sophisticated blocking methods would improve results,
15
+ without changing anything else in the pipeline.
16
+
17
+ Currently, software for entity resolution is mostly created by researchers.
18
+ Each researcher uses the technologies familiar to them and frames the entity resolution task
19
+ in the way that is most natural for their own examples,
20
+ making it hard to use multiple software modules together.
21
+ As a result, trying a new blocking method is too expensive for the Census Bureau
22
+ to undertake without knowing what the benefit will be.
23
+
24
+ Introduction
25
+ ------------
26
+
27
+ **EasyLink** is a tool for creating entity resolution pipelines
28
+ by chaining together existing pieces of software.
29
+
30
+ It doesn't allow making pipelines arbitrarily by chaining together whatever software you want however you want,
31
+ and this is actually the key value proposition of EasyLink.
32
+ To be used in pipelines created with EasyLink, software modules must follow standard patterns.
33
+ These standards (including standard data formats) allow a single piece of software
34
+ to be used for the same conceptual task in any entity resolution pipeline.
35
+
36
+ We define our standards via the *pipeline schema*, which is described on this page.
37
+ The design goals of the pipeline schema are to be:
38
+
39
+ - **Flexible** enough to capture current entity resolution methods and new methods that are yet to be developed.
40
+ In particular, we want to avoid a small innovation in one part of a pipeline causing that entire pipeline
41
+ to become impossible to construct with EasyLink—*"bend, don't break."*
42
+ - **Detailed/standardized** enough in capturing current entity resolution methods,
43
+ and areas of active methodological research, to allow very fine-grained experiments and interoperability.
44
+
45
+ Our pipeline schema can also be viewed as a restricted (but still very large) space of possible pipelines.
46
+ That is, there are certain pipelines EasyLink does not allow because they do not conform to our standards,
47
+ and the pipeline schema tells EasyLink how to check whether a pipeline is or isn't allowed.
@@ -29,6 +29,8 @@ exclude = [
29
29
  'src/easylink/pipeline_graph.py',
30
30
  'src/easylink/pipeline.py',
31
31
  'src/easylink/pipeline_schema.py',
32
+ 'src/easylink/pipeline_schema_constants/testing.py',
33
+ 'src/easylink/pipeline_schema_constants/development.py',
32
34
  'src/easylink/rule.py',
33
35
  'src/easylink/runner.py',
34
36
  'src/easylink/step.py',
@@ -0,0 +1 @@
1
+ __version__ = "0.1.10"
@@ -12,7 +12,8 @@ from __future__ import annotations
12
12
 
13
13
  from abc import ABC, abstractmethod
14
14
  from collections.abc import Callable
15
- from dataclasses import dataclass
15
+ from dataclasses import dataclass, field
16
+ from types import NotImplementedType
16
17
  from typing import TYPE_CHECKING, Any
17
18
 
18
19
  import networkx as nx
@@ -22,7 +23,7 @@ if TYPE_CHECKING:
22
23
  from easylink.step import Step
23
24
 
24
25
 
25
- @dataclass(frozen=True)
26
+ @dataclass()
26
27
  class InputSlot:
27
28
  """A single input slot to a specific node.
28
29
 
@@ -41,20 +42,48 @@ class InputSlot:
41
42
  env_var: str | None
42
43
  """The environment variable that is used to pass a list of data filepaths to
43
44
  an ``Implementation``."""
44
- validator: Callable[[str], None]
45
+ validator: Callable[[str], None] = field(compare=False)
45
46
  """A function that validates the input data being passed into the pipeline via
46
47
  this ``InputSlot``. If the data is invalid, the function should raise an exception
47
48
  with a descriptive error message which will then be reported to the user.
48
49
  **Note that the function *must* be defined in the** :mod:`easylink.utilities.validation_utils`
49
50
  **module!**"""
50
- splitter: Callable[[list[str], str, Any], None] | None = None
51
+ splitter: Callable[[list[str], str, Any], None] | None = field(
52
+ default=None, compare=False
53
+ )
51
54
  """A function that splits the incoming data to this ``InputSlot`` into smaller
52
55
  pieces. The primary purpose of this functionality is to run sections of the
53
56
  pipeline in an embarrassingly parallel manner. **Note that the function *must*
54
57
  be defined in the **:mod:`easylink.utilities.splitter_utils`** module!**"""
55
58
 
59
+ def __eq__(self, other: Any) -> bool | NotImplementedType:
60
+ """Checks if two ``InputSlots`` are equal.
56
61
 
57
- @dataclass(frozen=True)
62
+ Two ``InputSlots`` are considered equal if their names, ``env_vars``, and
63
+ names of their ``validators`` and ``splitters`` are all the same.
64
+ """
65
+ if not isinstance(other, InputSlot):
66
+ return NotImplemented
67
+ splitter_name = self.splitter.__name__ if self.splitter else None
68
+ other_splitter_name = other.splitter.__name__ if other.splitter else None
69
+ return (
70
+ self.name == other.name
71
+ and self.env_var == other.env_var
72
+ and self.validator.__name__ == other.validator.__name__
73
+ and splitter_name == other_splitter_name
74
+ )
75
+
76
+ def __hash__(self) -> int:
77
+ """Hashes an ``InputSlot``.
78
+
79
+ The hash is based on the name of the ``InputSlot``, its ``env_var``, and
80
+ the names of its ``validator`` and ``splitter``.
81
+ """
82
+ splitter_name = self.splitter.__name__ if self.splitter else None
83
+ return hash((self.name, self.env_var, self.validator.__name__, splitter_name))
84
+
85
+
86
+ @dataclass()
58
87
  class OutputSlot:
59
88
  """A single output slot from a specific node.
60
89
 
@@ -75,12 +104,32 @@ class OutputSlot:
75
104
 
76
105
  name: str
77
106
  """The name of the ``OutputSlot``."""
78
- aggregator: Callable[[list[str], str], None] = None
107
+ aggregator: Callable[[list[str], str], None] = field(default=None, compare=False)
79
108
  """A function that aggregates all of the generated data to be passed out via this
80
109
  ``OutputSlot``. The primary purpose of this functionality is to run sections
81
110
  of the pipeline in an embarrassingly parallel manner. **Note that the function
82
111
  *must* be defined in the **:py:mod:`easylink.utilities.aggregator_utils`** module!**"""
83
112
 
113
+ def __eq__(self, other: Any) -> bool | NotImplementedType:
114
+ """Checks if two ``OutputSlots`` are equal.
115
+
116
+ Two ``OutputSlots`` are considered equal if their names and the names of their
117
+ ``aggregators`` are the same.
118
+ """
119
+ if not isinstance(other, OutputSlot):
120
+ return NotImplemented
121
+ aggregator_name = self.aggregator.__name__ if self.aggregator else None
122
+ other_aggregator_name = other.aggregator.__name__ if other.aggregator else None
123
+ return self.name == other.name and aggregator_name == other_aggregator_name
124
+
125
+ def __hash__(self) -> int:
126
+ """Hashes an ``OutputSlot``.
127
+
128
+ The hash is based on the name of the ``OutputSlot`` and the name of its ``aggregator``.
129
+ """
130
+ aggregator_name = self.aggregator.__name__ if self.aggregator else None
131
+ return hash((self.name, aggregator_name))
132
+
84
133
 
85
134
  @dataclass(frozen=True)
86
135
  class EdgeParams:
@@ -59,21 +59,23 @@ NODES = [
59
59
  ),
60
60
  LoopStep(
61
61
  template_step=EmbarrassinglyParallelStep(
62
- step_name="step_3",
63
- input_slots=[
64
- InputSlot(
65
- name="step_3_main_input",
66
- env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
67
- validator=validate_input_file_dummy,
68
- splitter=split_data_by_size,
69
- ),
70
- ],
71
- output_slots=[
72
- OutputSlot(
73
- name="step_3_main_output",
74
- aggregator=concatenate_datasets,
75
- ),
76
- ],
62
+ step=Step(
63
+ step_name="step_3",
64
+ input_slots=[
65
+ InputSlot(
66
+ name="step_3_main_input",
67
+ env_var="DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
68
+ validator=validate_input_file_dummy,
69
+ splitter=split_data_by_size,
70
+ ),
71
+ ],
72
+ output_slots=[
73
+ OutputSlot(
74
+ name="step_3_main_output",
75
+ aggregator=concatenate_datasets,
76
+ ),
77
+ ],
78
+ ),
77
79
  ),
78
80
  self_edges=[
79
81
  EdgeParams(
@@ -1138,19 +1138,25 @@ class EmbarrassinglyParallelStep(Step):
1138
1138
 
1139
1139
  An ``EmbarrassinglyParallelStep`` is different than a :class:`ParallelStep`
1140
1140
  in that it is not configured by the user to be run in parallel - it completely
1141
- happens on the back end for performance reasons. As such, note that it inherits
1142
- from :class:`Step` instead of :class:`TemplatedStep`.
1141
+ happens on the back end for performance reasons.
1143
1142
 
1144
1143
  See :class:`Step` for inherited attributes.
1144
+
1145
+ Parameters
1146
+ ----------
1147
+ step
1148
+ The ``Step`` to be run in an embarrassingly parallel manner. To run multiple
1149
+ steps in parallel, use a :class:`HierarchicalStep`.
1150
+
1145
1151
  """
1146
1152
 
1147
1153
  def __init__(
1148
1154
  self,
1149
- step_name: str,
1150
- input_slots: Iterable[InputSlot],
1151
- output_slots: Iterable[OutputSlot],
1155
+ step: Step,
1152
1156
  ) -> None:
1153
- super().__init__(step_name, input_slots=input_slots, output_slots=output_slots)
1157
+ super().__init__(
1158
+ step.step_name, step.name, step.input_slots.values(), step.output_slots.values()
1159
+ )
1154
1160
  self._validate()
1155
1161
 
1156
1162
  def _validate(self) -> None:
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: easylink
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: Research repository for the EasyLink ER ecosystem project.
5
5
  Home-page: https://github.com/ihmeuw/easylink
6
6
  Author: The EasyLink developers
@@ -44,6 +44,7 @@ docs/source/api_reference/utilities/paths.rst
44
44
  docs/source/api_reference/utilities/splitter_utils.rst
45
45
  docs/source/api_reference/utilities/validation_utils.rst
46
46
  docs/source/concepts/index.rst
47
+ docs/source/concepts/pipeline_schema/index.rst
47
48
  docs/source/user_guide/cli.rst
48
49
  docs/source/user_guide/index.rst
49
50
  docs/source/user_guide/tutorials/getting_started.rst
@@ -7,7 +7,6 @@ from easylink.graph_components import (
7
7
  InputSlotMapping,
8
8
  OutputSlot,
9
9
  OutputSlotMapping,
10
- SlotMapping,
11
10
  StepGraph,
12
11
  )
13
12
  from easylink.implementation import Implementation
@@ -29,6 +28,93 @@ def test_output_slot() -> None:
29
28
  assert output_slot.name == "file1"
30
29
 
31
30
 
31
+ def test_input_slot_hashing() -> None:
32
+ slot = InputSlot("slot", "foo", validate_input_file_dummy)
33
+ slot_dupe = InputSlot("slot", "foo", validate_input_file_dummy)
34
+ assert slot == slot_dupe
35
+ assert {slot, slot_dupe} == {slot}
36
+
37
+ slot_with_splitter = InputSlot("slot", "foo", validate_input_file_dummy, dummy_splitter)
38
+
39
+ assert slot != slot_with_splitter
40
+ assert {slot, slot_with_splitter, slot, slot_with_splitter} == {
41
+ slot,
42
+ slot_with_splitter,
43
+ }
44
+
45
+ slot_with_different_splitter = InputSlot(
46
+ "slot", "foo", validate_input_file_dummy, dummy_splitter_2
47
+ )
48
+ assert slot_with_splitter != slot_with_different_splitter
49
+ assert {
50
+ slot,
51
+ slot_dupe,
52
+ slot_with_splitter,
53
+ slot_with_different_splitter,
54
+ slot_with_splitter,
55
+ slot_with_different_splitter,
56
+ } == {
57
+ slot,
58
+ slot_with_splitter,
59
+ slot_with_different_splitter,
60
+ }
61
+
62
+ slot_with_different_validator = InputSlot("slot", "foo", dummy_validator)
63
+ assert slot != slot_with_different_validator
64
+ assert {slot, slot_dupe, slot_with_different_validator} == {
65
+ slot,
66
+ slot_with_different_validator,
67
+ }
68
+
69
+
70
+ def test_output_slot_hashing() -> None:
71
+ slot = OutputSlot("slot")
72
+ slot_dupe = OutputSlot("slot")
73
+ assert slot == slot_dupe
74
+ assert {slot, slot_dupe} == {slot}
75
+
76
+ slot_with_aggregator = OutputSlot("slot", dummy_aggregator)
77
+
78
+ assert slot != slot_with_aggregator
79
+ assert {slot, slot_with_aggregator, slot, slot_with_aggregator} == {
80
+ slot,
81
+ slot_with_aggregator,
82
+ }
83
+
84
+ slot_with_different_aggregator = OutputSlot("slot", dummy_aggregator_2)
85
+ assert slot_with_aggregator != slot_with_different_aggregator
86
+ assert {
87
+ slot,
88
+ slot_dupe,
89
+ slot_with_aggregator,
90
+ slot_with_different_aggregator,
91
+ slot_with_aggregator,
92
+ slot_with_different_aggregator,
93
+ } == {
94
+ slot,
95
+ slot_with_aggregator,
96
+ slot_with_different_aggregator,
97
+ }
98
+
99
+ input_slot = InputSlot("slot", "foo", validate_input_file_dummy)
100
+
101
+
102
+ def test_slot_mutability() -> None:
103
+ slot = InputSlot("slot", "foo", validate_input_file_dummy)
104
+ assert slot.splitter is None
105
+ slot.splitter = dummy_splitter
106
+ assert slot.splitter == dummy_splitter
107
+ slot.splitter = dummy_splitter_2
108
+ assert slot.splitter == dummy_splitter_2
109
+
110
+ slot = OutputSlot("slot")
111
+ assert slot.aggregator is None
112
+ slot.aggregator = dummy_aggregator
113
+ assert slot.aggregator == dummy_aggregator
114
+ slot.aggregator = dummy_aggregator_2
115
+ assert slot.aggregator == dummy_aggregator_2
116
+
117
+
32
118
  def test_edge() -> None:
33
119
  edge = EdgeParams(
34
120
  source_node="input_data",
@@ -136,3 +222,28 @@ def test_output_slot_mapping() -> None:
136
222
  assert new_edge.target_node == "output_data"
137
223
  assert new_edge.output_slot == "step_1a_main_input"
138
224
  assert new_edge.input_slot == "file1"
225
+
226
+
227
+ ####################
228
+ # Helper functions #
229
+ ####################
230
+
231
+
232
+ def dummy_splitter():
233
+ pass
234
+
235
+
236
+ def dummy_splitter_2():
237
+ pass
238
+
239
+
240
+ def dummy_aggregator():
241
+ pass
242
+
243
+
244
+ def dummy_aggregator_2():
245
+ pass
246
+
247
+
248
+ def dummy_validator():
249
+ pass
@@ -99,24 +99,42 @@ def test_implementations(default_config: Config) -> None:
99
99
  "input_slot",
100
100
  False,
101
101
  {
102
- ("step_a", InputSlot("foo", env_var="bar", validator=None)),
103
- ("step_b", InputSlot("baz", env_var="spam", validator=None)),
102
+ (
103
+ "step_a",
104
+ InputSlot("foo", env_var="bar", validator=validate_input_file_dummy),
105
+ ),
106
+ (
107
+ "step_b",
108
+ InputSlot("baz", env_var="spam", validator=validate_input_file_dummy),
109
+ ),
104
110
  },
105
111
  ),
106
112
  (
107
113
  "input_slot",
108
114
  True,
109
115
  {
110
- ("step_a", InputSlot("foo", env_var="bar", validator=None)),
111
- ("step_b", InputSlot("foo", env_var="spam", validator=None)),
116
+ (
117
+ "step_a",
118
+ InputSlot("foo", env_var="bar", validator=validate_input_file_dummy),
119
+ ),
120
+ (
121
+ "step_b",
122
+ InputSlot("foo", env_var="spam", validator=validate_input_file_dummy),
123
+ ),
112
124
  },
113
125
  ),
114
126
  (
115
127
  "input_slot",
116
128
  True,
117
129
  {
118
- ("step_a", InputSlot("foo", env_var="bar", validator=None)),
119
- ("step_b", InputSlot("baz", env_var="bar", validator=None)),
130
+ (
131
+ "step_a",
132
+ InputSlot("foo", env_var="bar", validator=validate_input_file_dummy),
133
+ ),
134
+ (
135
+ "step_b",
136
+ InputSlot("baz", env_var="bar", validator=validate_input_file_dummy),
137
+ ),
120
138
  },
121
139
  ),
122
140
  (
@@ -1114,16 +1114,18 @@ def test_complex_choice_step_get_implementation_graph(
1114
1114
  @pytest.fixture
1115
1115
  def embarrassingly_parallel_step_params() -> dict[str, Any]:
1116
1116
  return {
1117
- "step_name": "step_3",
1118
- "input_slots": [
1119
- InputSlot(
1120
- "step_3_main_input",
1121
- "DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
1122
- validate_input_file_dummy,
1123
- split_data_by_size,
1124
- )
1125
- ],
1126
- "output_slots": [OutputSlot("step_3_main_output", concatenate_datasets)],
1117
+ "step": Step(
1118
+ step_name="step_3",
1119
+ input_slots=[
1120
+ InputSlot(
1121
+ "step_3_main_input",
1122
+ "DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS",
1123
+ validate_input_file_dummy,
1124
+ split_data_by_size,
1125
+ )
1126
+ ],
1127
+ output_slots=[OutputSlot("step_3_main_output", concatenate_datasets)],
1128
+ ),
1127
1129
  }
1128
1130
 
1129
1131
 
@@ -1261,9 +1263,11 @@ def test_embarrassingly_parallel_step__validation(
1261
1263
  expected_error_msg: str | list[str],
1262
1264
  ):
1263
1265
  step_params = {
1264
- "step_name": "step",
1265
- "input_slots": input_slots,
1266
- "output_slots": output_slots,
1266
+ "step": Step(
1267
+ step_name="step",
1268
+ input_slots=input_slots,
1269
+ output_slots=output_slots,
1270
+ ),
1267
1271
  }
1268
1272
  with pytest.raises(ValueError) as error:
1269
1273
  EmbarrassinglyParallelStep(**step_params)
@@ -1 +0,0 @@
1
- __version__ = "0.1.8"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes