easylink 0.1.14__tar.gz → 0.1.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. {easylink-0.1.14 → easylink-0.1.15}/CHANGELOG.rst +4 -0
  2. {easylink-0.1.14 → easylink-0.1.15}/PKG-INFO +1 -1
  3. {easylink-0.1.14 → easylink-0.1.15}/docs/source/user_guide/tutorials/getting_started.rst +140 -11
  4. easylink-0.1.15/docs/source/user_guide/tutorials/input_data.yaml +3 -0
  5. easylink-0.1.15/docs/source/user_guide/tutorials/input_file_1.parquet +0 -0
  6. easylink-0.1.15/docs/source/user_guide/tutorials/input_file_2.parquet +0 -0
  7. easylink-0.1.15/docs/source/user_guide/tutorials/input_file_3.parquet +0 -0
  8. easylink-0.1.15/src/easylink/_version.py +1 -0
  9. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/utilities/spark.smk +2 -2
  10. {easylink-0.1.14 → easylink-0.1.15}/src/easylink.egg-info/PKG-INFO +1 -1
  11. {easylink-0.1.14 → easylink-0.1.15}/src/easylink.egg-info/SOURCES.txt +5 -1
  12. easylink-0.1.14/src/easylink/_version.py +0 -1
  13. {easylink-0.1.14 → easylink-0.1.15}/.bandit +0 -0
  14. {easylink-0.1.14 → easylink-0.1.15}/.flake8 +0 -0
  15. {easylink-0.1.14 → easylink-0.1.15}/.github/CODEOWNERS +0 -0
  16. {easylink-0.1.14 → easylink-0.1.15}/.github/pull_request_template.md +0 -0
  17. {easylink-0.1.14 → easylink-0.1.15}/.github/workflows/deploy.yml +0 -0
  18. {easylink-0.1.14 → easylink-0.1.15}/.github/workflows/update_readme.yml +0 -0
  19. {easylink-0.1.14 → easylink-0.1.15}/.gitignore +0 -0
  20. {easylink-0.1.14 → easylink-0.1.15}/.readthedocs.yml +0 -0
  21. {easylink-0.1.14 → easylink-0.1.15}/CONTRIBUTING.rst +0 -0
  22. {easylink-0.1.14 → easylink-0.1.15}/Jenkinsfile +0 -0
  23. {easylink-0.1.14 → easylink-0.1.15}/Makefile +0 -0
  24. {easylink-0.1.14 → easylink-0.1.15}/README.rst +0 -0
  25. {easylink-0.1.14 → easylink-0.1.15}/docs/Makefile +0 -0
  26. {easylink-0.1.14 → easylink-0.1.15}/docs/nitpick-exceptions +0 -0
  27. {easylink-0.1.14 → easylink-0.1.15}/docs/source/_static/style.css +0 -0
  28. {easylink-0.1.14 → easylink-0.1.15}/docs/source/_templates/layout.html +0 -0
  29. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/cli.rst +0 -0
  30. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/configuration.rst +0 -0
  31. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/graph_components.rst +0 -0
  32. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/implementation.rst +0 -0
  33. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/index.rst +0 -0
  34. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/pipeline.rst +0 -0
  35. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/pipeline_graph.rst +0 -0
  36. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/pipeline_schema.rst +0 -0
  37. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/pipeline_schema_constants/development.rst +0 -0
  38. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/pipeline_schema_constants/index.rst +0 -0
  39. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/pipeline_schema_constants/testing.rst +0 -0
  40. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/rule.rst +0 -0
  41. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/runner.rst +0 -0
  42. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/step.rst +0 -0
  43. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/utilities/aggregator_utils.rst +0 -0
  44. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/utilities/data_utils.rst +0 -0
  45. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/utilities/general_utils.rst +0 -0
  46. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/utilities/index.rst +0 -0
  47. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/utilities/paths.rst +0 -0
  48. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/utilities/splitter_utils.rst +0 -0
  49. {easylink-0.1.14 → easylink-0.1.15}/docs/source/api_reference/utilities/validation_utils.rst +0 -0
  50. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/index.rst +0 -0
  51. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/01_step.drawio.png +0 -0
  52. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/02_default_implementation.drawio.png +0 -0
  53. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/03_slots.drawio.png +0 -0
  54. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/04_data_dependency.drawio.png +0 -0
  55. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/05_pipeline_schema.drawio.png +0 -0
  56. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/06_default_input.drawio.png +0 -0
  57. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/07_cloneable_section.drawio.png +0 -0
  58. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/08_cloneable_section_expanded.drawio.png +0 -0
  59. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/09_loopable_section.drawio.png +0 -0
  60. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/10_loopable_section_expanded.drawio.png +0 -0
  61. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/11_cloneable_section_splitter.drawio.png +0 -0
  62. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/12_cloneable_section_splitter_expanded.drawio.png +0 -0
  63. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/13_autoparallel_section.drawio.png +0 -0
  64. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/14_choice_section.drawio.png +0 -0
  65. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/15_choice_section_expanded.drawio.png +0 -0
  66. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/16_step_hierarchy.drawio.png +0 -0
  67. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/17_draws.drawio.png +0 -0
  68. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/18_schema_to_pipeline.drawio.png +0 -0
  69. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/19_schema_to_pipeline_combined.drawio.png +0 -0
  70. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/clustering_pass_sub_steps.drawio.png +0 -0
  71. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/images/entity_resolution_pipeline_schema.drawio.png +0 -0
  72. {easylink-0.1.14 → easylink-0.1.15}/docs/source/concepts/pipeline_schema/index.rst +0 -0
  73. {easylink-0.1.14 → easylink-0.1.15}/docs/source/conf.py +0 -0
  74. {easylink-0.1.14 → easylink-0.1.15}/docs/source/glossary.rst +0 -0
  75. {easylink-0.1.14 → easylink-0.1.15}/docs/source/index.rst +0 -0
  76. {easylink-0.1.14 → easylink-0.1.15}/docs/source/user_guide/cli.rst +0 -0
  77. {easylink-0.1.14 → easylink-0.1.15}/docs/source/user_guide/index.rst +0 -0
  78. {easylink-0.1.14 → easylink-0.1.15}/docs/source/user_guide/tutorials/DAG-common-pipeline.svg +0 -0
  79. {easylink-0.1.14 → easylink-0.1.15}/docs/source/user_guide/tutorials/DAG-e2e-pipeline-expanded.svg +0 -0
  80. {easylink-0.1.14 → easylink-0.1.15}/docs/source/user_guide/tutorials/DAG-e2e-pipeline.svg +0 -0
  81. {easylink-0.1.14/tests/specifications/examples → easylink-0.1.15/docs/source/user_guide/tutorials}/environment_slurm.yaml +0 -0
  82. {easylink-0.1.14 → easylink-0.1.15}/docs/source/user_guide/tutorials/index.rst +0 -0
  83. {easylink-0.1.14 → easylink-0.1.15}/pyproject.toml +0 -0
  84. {easylink-0.1.14 → easylink-0.1.15}/python_versions.json +0 -0
  85. {easylink-0.1.14 → easylink-0.1.15}/pytype.cfg +0 -0
  86. {easylink-0.1.14 → easylink-0.1.15}/setup.cfg +0 -0
  87. {easylink-0.1.14 → easylink-0.1.15}/setup.py +0 -0
  88. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/__about__.py +0 -0
  89. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/__init__.py +0 -0
  90. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/cli.py +0 -0
  91. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/configuration.py +0 -0
  92. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/graph_components.py +0 -0
  93. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/images/spark_cluster/Dockerfile +0 -0
  94. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/images/spark_cluster/README.md +0 -0
  95. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/implementation.py +0 -0
  96. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/implementation_metadata.yaml +0 -0
  97. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/pipeline.py +0 -0
  98. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/pipeline_graph.py +0 -0
  99. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/pipeline_schema.py +0 -0
  100. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/pipeline_schema_constants/__init__.py +0 -0
  101. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/pipeline_schema_constants/development.py +0 -0
  102. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/pipeline_schema_constants/testing.py +0 -0
  103. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/rule.py +0 -0
  104. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/runner.py +0 -0
  105. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/step.py +0 -0
  106. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/README.md +0 -0
  107. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/build-containers-local.sh +0 -0
  108. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/build-containers-remote.sh +0 -0
  109. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/input_data/create_input_files.ipynb +0 -0
  110. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/input_data/input_file_1.csv +0 -0
  111. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/input_data/input_file_1.parquet +0 -0
  112. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/input_data/input_file_2.csv +0 -0
  113. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/input_data/input_file_2.parquet +0 -0
  114. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/python_pandas/README.md +0 -0
  115. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/python_pandas/dummy_step.py +0 -0
  116. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/python_pandas/python_pandas.def +0 -0
  117. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/python_pyspark/README.md +0 -0
  118. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/python_pyspark/dummy_step.py +0 -0
  119. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/python_pyspark/python_pyspark.def +0 -0
  120. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/r/README.md +0 -0
  121. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/r/dummy_step.R +0 -0
  122. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/r/r-image.def +0 -0
  123. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/steps/dev/test.py +0 -0
  124. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/utilities/__init__.py +0 -0
  125. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/utilities/aggregator_utils.py +0 -0
  126. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/utilities/data_utils.py +0 -0
  127. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/utilities/general_utils.py +0 -0
  128. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/utilities/paths.py +0 -0
  129. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/utilities/splitter_utils.py +0 -0
  130. {easylink-0.1.14 → easylink-0.1.15}/src/easylink/utilities/validation_utils.py +0 -0
  131. {easylink-0.1.14 → easylink-0.1.15}/src/easylink.egg-info/dependency_links.txt +0 -0
  132. {easylink-0.1.14 → easylink-0.1.15}/src/easylink.egg-info/entry_points.txt +0 -0
  133. {easylink-0.1.14 → easylink-0.1.15}/src/easylink.egg-info/not-zip-safe +0 -0
  134. {easylink-0.1.14 → easylink-0.1.15}/src/easylink.egg-info/requires.txt +0 -0
  135. {easylink-0.1.14 → easylink-0.1.15}/src/easylink.egg-info/top_level.txt +0 -0
  136. {easylink-0.1.14 → easylink-0.1.15}/tests/__init__.py +0 -0
  137. {easylink-0.1.14 → easylink-0.1.15}/tests/conftest.py +0 -0
  138. {easylink-0.1.14 → easylink-0.1.15}/tests/e2e/test_easylink_run.py +0 -0
  139. {easylink-0.1.14 → easylink-0.1.15}/tests/e2e/test_step_types.py +0 -0
  140. {easylink-0.1.14 → easylink-0.1.15}/tests/integration/test_compositions.py +0 -0
  141. {easylink-0.1.14 → easylink-0.1.15}/tests/integration/test_snakemake.py +0 -0
  142. {easylink-0.1.14 → easylink-0.1.15}/tests/integration/test_snakemake_slurm.py +0 -0
  143. {easylink-0.1.14 → easylink-0.1.15}/tests/integration/test_snakemake_spark.py +0 -0
  144. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/common/environment_local.yaml +0 -0
  145. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/common/input_data.yaml +0 -0
  146. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/common/pipeline.yaml +0 -0
  147. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/e2e/environment_slurm.yaml +0 -0
  148. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/e2e/pipeline.yaml +0 -0
  149. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/e2e/pipeline_expanded.yaml +0 -0
  150. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/integration/embarrassingly_parallel/pipeline_hierarchical_step.yaml +0 -0
  151. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/integration/embarrassingly_parallel/pipeline_loop_step.yaml +0 -0
  152. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/integration/embarrassingly_parallel/pipeline_parallel_step.yaml +0 -0
  153. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/integration/environment_spark_slurm.yaml +0 -0
  154. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/integration/pipeline.yaml +0 -0
  155. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/integration/pipeline_spark.yaml +0 -0
  156. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/environment_minimum.yaml +0 -0
  157. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/environment_spark_slurm.yaml +0 -0
  158. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline.yaml +0 -0
  159. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_bad_combined_implementations.yaml +0 -0
  160. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_bad_implementation.yaml +0 -0
  161. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_bad_loop_formatting.yaml +0 -0
  162. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_bad_step.yaml +0 -0
  163. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_bad_type_key.yaml +0 -0
  164. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_combine_bad_implementation_names.yaml +0 -0
  165. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_combine_bad_topology.yaml +0 -0
  166. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_combine_two_steps.yaml +0 -0
  167. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_combine_with_extra_node.yaml +0 -0
  168. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_combine_with_iteration.yaml +0 -0
  169. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_combine_with_iteration_cycle.yaml +0 -0
  170. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_combine_with_missing_node.yaml +0 -0
  171. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_combine_with_parallel.yaml +0 -0
  172. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_missing_implementation_name.yaml +0 -0
  173. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_missing_implementations.yaml +0 -0
  174. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_missing_loop_nodes.yaml +0 -0
  175. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_missing_step.yaml +0 -0
  176. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_missing_substeps.yaml +0 -0
  177. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_missing_type_key.yaml +0 -0
  178. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_nested_templated_steps.yaml +0 -0
  179. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_out_of_order.yaml +0 -0
  180. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_spark.yaml +0 -0
  181. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_type_config_mismatch.yaml +0 -0
  182. {easylink-0.1.14 → easylink-0.1.15}/tests/specifications/unit/pipeline_wrong_parallel_split_keys.yaml +0 -0
  183. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/__init__.py +0 -0
  184. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/conftest.py +0 -0
  185. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/rule_strings/aggregation_rule.txt +0 -0
  186. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/rule_strings/checkpoint_rule.txt +0 -0
  187. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/rule_strings/embarrassingly_parallel_rule.txt +0 -0
  188. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/rule_strings/implemented_rule_local.txt +0 -0
  189. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/rule_strings/implemented_rule_slurm.txt +0 -0
  190. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/rule_strings/pipeline_local.txt +0 -0
  191. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/rule_strings/pipeline_slurm.txt +0 -0
  192. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/rule_strings/target_rule.txt +0 -0
  193. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/rule_strings/validation_rule.txt +0 -0
  194. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/test_cli.py +0 -0
  195. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/test_config.py +0 -0
  196. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/test_data_utils.py +0 -0
  197. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/test_general_utils.py +0 -0
  198. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/test_graph_components.py +0 -0
  199. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/test_implementation.py +0 -0
  200. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/test_pipeline.py +0 -0
  201. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/test_pipeline_graph.py +0 -0
  202. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/test_pipeline_schema.py +0 -0
  203. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/test_rule.py +0 -0
  204. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/test_runner.py +0 -0
  205. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/test_step.py +0 -0
  206. {easylink-0.1.14 → easylink-0.1.15}/tests/unit/test_validations.py +0 -0
  207. {easylink-0.1.14 → easylink-0.1.15}/update_readme.py +0 -0
@@ -1,3 +1,7 @@
1
+ **0.1.15 - 5/5/25**
2
+
3
+ - Fix SyntaxWarning for unescaped backslashes
4
+
1
5
  **0.1.14 - 5/1/25**
2
6
 
3
7
  - Add support for EmbarrassinglyParallelSteps to accept sections (i.e. non-leaf steps)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: easylink
3
- Version: 0.1.14
3
+ Version: 0.1.15
4
4
  Summary: Research repository for the EasyLink ER ecosystem project.
5
5
  Home-page: https://github.com/ihmeuw/easylink
6
6
  Author: The EasyLink developers
@@ -227,9 +227,11 @@ of the pipeline, we can view intermediary outputs as well::
227
227
 
228
228
  Environments
229
229
  ============
230
- The ``-e`` argument to ``easylink run`` accepts a YAML file specifying information about the computing environment which will execute the steps of the
231
- pipeline. When we ran our first pipeline, ``common/pipeline.yaml``, above, we passed ``specifications/common/environment_local.yaml`` to this argument.
232
- The contents of this YAML file are shown below.
230
+ The ``--computing-environment`` (``-e``) argument to ``easylink run`` accepts a YAML file specifying
231
+ information about the computing environment which will execute the steps of the
232
+ pipeline. When we ran our first pipeline, ``tests/specifications/common/pipeline.yaml`` above, we passed
233
+ ``tests/specifications/common/environment_local.yaml``
234
+ to this argument. The contents of this YAML file are shown below.
233
235
 
234
236
  .. code-block:: yaml
235
237
 
@@ -246,8 +248,9 @@ Let's run this same pipeline with the ``slurm`` computing environment. `Slurm <h
246
248
  cluster management system which EasyLink can interface with to schedule and run the steps of a pipeline using the resources of a computing cluster. This means that instead of
247
249
  running all pipeline steps in your local computing environment, each step can be run with the additional resources of a separate compute node.
248
250
 
249
- To run the pipeline using slurm, we will pass ``specifications/examples/environment_slurm.yaml``
250
- to the ``environment`` command line parameter, which looks like this:
251
+ To run the pipeline using slurm, we will pass :download:`environment_slurm.yaml <environment_slurm.yaml>`
252
+ to the ``--computing-environment`` command line parameter. Download the file to the directory you will run ``easylink``
253
+ in -- I downloaded it to the root ``easylink`` directory. The YAML looks like this:
251
254
 
252
255
  .. code-block:: yaml
253
256
 
@@ -277,15 +280,15 @@ section, but using the ``slurm`` environment rather than ``local``.
277
280
 
278
281
  .. code-block:: console
279
282
 
280
- $ easylink run -p specifications/common/pipeline.yaml -i specifications/common/input_data.yaml -e specifications/examples/environment_slurm.yaml
281
- 2025-04-23 08:39:45.486 | 0:00:02.489631 | run:158 - Running pipeline
282
- 2025-04-23 08:39:45.486 | 0:00:02.489962 | run:160 - Results directory: /mnt/share/homes/tylerdy/easylink/tests/results/2025_04_23_08_39_45
283
- 2025-04-23 08:39:48.973 | 0:00:05.976983 | main:115 - Running Snakemake
284
- [Wed Apr 23 08:39:49 2025]
283
+ $ easylink run -p tests/specifications/common/pipeline.yaml -i tests/specifications/common/input_data.yaml -e environment_slurm.yaml
284
+ 2025-05-01 08:24:01.901 | 0:00:02.805179 | run:158 - Running pipeline
285
+ 2025-05-01 08:24:01.901 | 0:00:02.805621 | run:160 - Results directory: /mnt/share/homes/tylerdy/easylink/results/2025_05_01_08_24_01
286
+ 2025-05-01 08:24:05.205 | 0:00:06.109547 | main:115 - Running Snakemake
287
+ [Thu May 1 08:24:06 2025]
285
288
  Job 9: Validating step_4_python_pandas input slot step_4_secondary_input
286
289
  Reason: Missing output files: input_validations/step_4_python_pandas/step_4_secondary_input_validator
287
290
  ...
288
- [Wed Apr 23 08:43:00 2025]
291
+ [Thu May 1 08:26:16 2025]
289
292
  Job 0: Grabbing final output
290
293
  Reason: Missing output files: result.parquet; Input files updated by another job: input_validations/final_validator, intermediate/step_4_python_pandas/result.parquet
291
294
 
@@ -299,6 +302,132 @@ environment. However, for a real large-scale record linkage pipeline, the additi
299
302
  faster than ``local``, or make it *possible* to run the pipeline when it wouldn't be otherwise
300
303
  (in the case where the local environment doesn't have sufficient resources to run the pipeline).
301
304
 
305
+ Input data
306
+ ==========
307
+ The ``--input-data`` (``-i``) argument to ``easylink run`` accepts a YAML file specifying a list
308
+ of paths to files or directories containing input data to be used by the pipeline.
309
+ When we ran our first pipeline, ``common/pipeline.yaml``, above, we passed
310
+ ``tests/specifications/common/input_data.yaml``
311
+ as this YAML file, shown below::
312
+
313
+ input_file_1: /mnt/team/simulation_science/priv/engineering/er_ecosystem/sample_data/dummy/input_file_1.parquet
314
+ input_file_2: /mnt/team/simulation_science/priv/engineering/er_ecosystem/sample_data/dummy/input_file_2.parquet
315
+ input_file_3: /mnt/team/simulation_science/priv/engineering/er_ecosystem/sample_data/dummy/input_file_3.parquet
316
+
317
+ Let's try passing a different input data specification YAML file,
318
+ :download:`input_data.yaml <input_data.yaml>`, which looks like this::
319
+
320
+ input_file_1: input_file_1.parquet
321
+ input_file_2: input_file_2.parquet
322
+ input_file_3: input_file_3.parquet
323
+
324
+ Download the file to the directory you will run easylink in, and then download the three input
325
+ Parquet files, :download:`input_file_1.parquet <input_file_1.parquet>`, :download:`input_file_2.parquet <input_file_2.parquet>`
326
+ and :download:`input_file_3.parquet <input_file_3.parquet>` to the same directory. In this case
327
+ I downloaded them to the root ``easylink`` directory.
328
+
329
+ These input files look a little different than the three input files we used in the pipelines we ran above,
330
+ where all three input files listed in the YAML specification were identical. Let's compare one of those,
331
+ ``/mnt/team/simulation_science/priv/engineering/er_ecosystem/sample_data/dummy/input_file_1.parquet``, to
332
+ the three files we will use here::
333
+
334
+ $ pqprint /mnt/team/simulation_science/priv/engineering/er_ecosystem/sample_data/dummy/input_file_1.parquet
335
+ foo bar counter
336
+ 0 0 a 0
337
+ 1 1 b 0
338
+ 2 2 c 0
339
+ 3 3 d 0
340
+ 4 4 e 0
341
+ ... ... .. ...
342
+ 9995 9995 a 0
343
+ 9996 9996 b 0
344
+ 9997 9997 c 0
345
+ 9998 9998 d 0
346
+ 9999 9999 e 0
347
+ [10000 rows x 3 columns]
348
+ $ pqprint input_file_1.parquet
349
+ foo bar counter
350
+ 0 0 l 10
351
+ 1 1 m 10
352
+ 2 2 n 10
353
+ 3 3 o 10
354
+ 4 4 p 10
355
+ .. ... .. ...
356
+ 95 95 l 10
357
+ 96 96 m 10
358
+ 97 97 n 10
359
+ 98 98 o 10
360
+ 99 99 p 10
361
+
362
+ [100 rows x 3 columns]
363
+ $ pqprint input_file_2.parquet
364
+ foo bar counter
365
+ 0 0 q 20
366
+ 1 1 r 20
367
+ 2 2 s 20
368
+ 3 3 t 20
369
+ 4 4 u 20
370
+ .. ... .. ...
371
+ 95 95 q 20
372
+ 96 96 r 20
373
+ 97 97 s 20
374
+ 98 98 t 20
375
+ 99 99 u 20
376
+ [100 rows x 3 columns]
377
+ $ pqprint input_file_3.parquet
378
+ foo bar counter
379
+ 0 0 v 30
380
+ 1 1 w 30
381
+ 2 2 x 30
382
+ 3 3 y 30
383
+ 4 4 z 30
384
+ .. ... .. ...
385
+ 95 95 v 30
386
+ 96 96 w 30
387
+ 97 97 x 30
388
+ 98 98 y 30
389
+ 99 99 z 30
390
+ [100 rows x 3 columns]
391
+
392
+ Our three new input files look different from each other and from the previous input files.
393
+ They have 100 rows each instead of 10000, the ``bar`` column has a different set of values
394
+ for each file, and the ``counter`` in each file starts at a different value.
395
+
396
+ Let's run the same pipeline as before, but with this new input data YAML.
397
+
398
+ .. code-block:: console
399
+
400
+ $ easylink run -p tests/specifications/common/pipeline.yaml -i input_data.yaml -e tests/specifications/common/environment_local.yaml
401
+ 2025-05-01 08:05:01.123 | 0:00:02.781384 | run:158 - Running pipeline
402
+ 2025-05-01 08:05:01.123 | 0:00:02.781776 | run:160 - Results directory: /mnt/share/homes/tylerdy/easylink/results/2025_05_01_08_05_01
403
+ 2025-05-01 08:05:04.498 | 0:00:06.156166 | main:115 - Running Snakemake
404
+ [Thu May 1 08:05:05 2025]
405
+ Job 9: Validating step_4_python_pandas input slot step_4_secondary_input
406
+ Reason: Missing output files: input_validations/step_4_python_pandas/step_4_secondary_input_validator
407
+ ...
408
+ [Thu May 1 08:05:32 2025]
409
+ Job 0: Grabbing final output
410
+ Reason: Missing output files: result.parquet; Input files updated by another job: intermediate/step_4_python_pandas/result.parquet, input_validations/final_validator
411
+ $ pqprint results/2025_05_01_08_05_01/result.parquet
412
+ foo bar counter added_column_0 added_column_1 added_column_2 added_column_3 added_column_4
413
+ 0 0 l 14 0.0 1.0 2.0 3.0 4
414
+ 1 1 m 14 0.0 1.0 2.0 3.0 4
415
+ 2 2 n 14 0.0 1.0 2.0 3.0 4
416
+ 3 3 o 14 0.0 1.0 2.0 3.0 4
417
+ 4 4 p 14 0.0 1.0 2.0 3.0 4
418
+ .. ... .. ... ... ... ... ... ...
419
+ 595 95 v 31 0.0 0.0 0.0 0.0 4
420
+ 596 96 w 31 0.0 0.0 0.0 0.0 4
421
+ 597 97 x 31 0.0 0.0 0.0 0.0 4
422
+ 598 98 y 31 0.0 0.0 0.0 0.0 4
423
+ 599 99 z 31 0.0 0.0 0.0 0.0 4
424
+
425
+ As expected, the ``results.parquet`` has 600 rows (as opposed to 60k with the old input YAML)
426
+ and the range of ``bar`` and ``counter`` values are consistent
427
+ with our new input files. As before, the transformation of the data is specific to the development schema and will
428
+ change.
429
+
430
+
302
431
  More Pipeline Specifications
303
432
  ============================
304
433
  The ``tests`` folder includes several other pipeline specification files (YAML files). While some are special
@@ -0,0 +1,3 @@
1
+ input_file_1: input_file_1.parquet
2
+ input_file_2: input_file_2.parquet
3
+ input_file_3: input_file_3.parquet
@@ -0,0 +1 @@
1
+ __version__ = "0.1.15"
@@ -70,7 +70,7 @@ rule wait_for_spark_master:
70
70
  while true; do
71
71
 
72
72
  if [[ -e {params.spark_master_log_file} ]]; then
73
- found=`grep -o "\(spark://.*$\)" {params.spark_master_log_file} || true`
73
+ found=`grep -o "\\(spark://.*$\\)" {params.spark_master_log_file} || true`
74
74
 
75
75
  if [[ ! -z $found ]]; then
76
76
  echo "Spark master URL found: $found"
@@ -178,7 +178,7 @@ rule wait_for_spark_worker:
178
178
  while true; do
179
179
 
180
180
  if [[ -e {params.spark_worker_log_file} ]]; then
181
- found=`grep -o "\(Worker: Successfully registered with master $MASTER_URL\)" {params.spark_worker_log_file} || true`
181
+ found=`grep -o "\\(Worker: Successfully registered with master $MASTER_URL\\)" {params.spark_worker_log_file} || true`
182
182
 
183
183
  if [[ ! -z $found ]]; then
184
184
  echo "Spark Worker {wildcards.scatteritem} registered successfully"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: easylink
3
- Version: 0.1.14
3
+ Version: 0.1.15
4
4
  Summary: Research repository for the EasyLink ER ecosystem project.
5
5
  Home-page: https://github.com/ihmeuw/easylink
6
6
  Author: The EasyLink developers
@@ -72,8 +72,13 @@ docs/source/user_guide/index.rst
72
72
  docs/source/user_guide/tutorials/DAG-common-pipeline.svg
73
73
  docs/source/user_guide/tutorials/DAG-e2e-pipeline-expanded.svg
74
74
  docs/source/user_guide/tutorials/DAG-e2e-pipeline.svg
75
+ docs/source/user_guide/tutorials/environment_slurm.yaml
75
76
  docs/source/user_guide/tutorials/getting_started.rst
76
77
  docs/source/user_guide/tutorials/index.rst
78
+ docs/source/user_guide/tutorials/input_data.yaml
79
+ docs/source/user_guide/tutorials/input_file_1.parquet
80
+ docs/source/user_guide/tutorials/input_file_2.parquet
81
+ docs/source/user_guide/tutorials/input_file_3.parquet
77
82
  src/easylink/__about__.py
78
83
  src/easylink/__init__.py
79
84
  src/easylink/_version.py
@@ -140,7 +145,6 @@ tests/specifications/common/pipeline.yaml
140
145
  tests/specifications/e2e/environment_slurm.yaml
141
146
  tests/specifications/e2e/pipeline.yaml
142
147
  tests/specifications/e2e/pipeline_expanded.yaml
143
- tests/specifications/examples/environment_slurm.yaml
144
148
  tests/specifications/integration/environment_spark_slurm.yaml
145
149
  tests/specifications/integration/pipeline.yaml
146
150
  tests/specifications/integration/pipeline_spark.yaml
@@ -1 +0,0 @@
1
- __version__ = "0.1.14"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes