easylink 0.1.24__tar.gz → 0.1.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (287) hide show
  1. {easylink-0.1.24 → easylink-0.1.25}/CHANGELOG.rst +5 -0
  2. {easylink-0.1.24 → easylink-0.1.25}/PKG-INFO +27 -11
  3. easylink-0.1.25/README.rst +70 -0
  4. easylink-0.1.25/docs/source/user_guide/tutorials/input_data_demo.yaml +3 -0
  5. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/pipeline_demo_improved.yaml +4 -5
  6. easylink-0.1.25/docs/source/user_guide/tutorials/pipeline_demo_naive.yaml +70 -0
  7. easylink-0.1.25/src/easylink/_version.py +1 -0
  8. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/cli.py +14 -10
  9. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/implementation_metadata.yaml +70 -44
  10. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/runner.py +118 -1
  11. easylink-0.1.25/src/easylink/steps/rl-dummy/input_data/known_clusters.parquet +0 -0
  12. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/splink/splink_evaluating_pairs.py +2 -1
  13. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/utilities/general_utils.py +18 -8
  14. {easylink-0.1.24 → easylink-0.1.25}/src/easylink.egg-info/PKG-INFO +27 -11
  15. {easylink-0.1.24 → easylink-0.1.25}/src/easylink.egg-info/SOURCES.txt +2 -0
  16. easylink-0.1.25/tests/specifications/common/environment_local.yaml +2 -0
  17. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/e2e/pipeline_cascade.yaml +5 -5
  18. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/e2e/pipeline_splink_dummy.yaml +3 -3
  19. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/e2e/pipeline_with_fastLink.yaml +3 -3
  20. easylink-0.1.25/tests/unit/test_runner.py +116 -0
  21. easylink-0.1.24/README.rst +0 -54
  22. easylink-0.1.24/docs/source/user_guide/tutorials/input_data_demo.yaml +0 -3
  23. easylink-0.1.24/docs/source/user_guide/tutorials/pipeline_demo_naive.yaml +0 -71
  24. easylink-0.1.24/src/easylink/_version.py +0 -1
  25. easylink-0.1.24/tests/unit/test_runner.py +0 -50
  26. {easylink-0.1.24 → easylink-0.1.25}/.bandit +0 -0
  27. {easylink-0.1.24 → easylink-0.1.25}/.flake8 +0 -0
  28. {easylink-0.1.24 → easylink-0.1.25}/.github/CODEOWNERS +0 -0
  29. {easylink-0.1.24 → easylink-0.1.25}/.github/pull_request_template.md +0 -0
  30. {easylink-0.1.24 → easylink-0.1.25}/.github/workflows/deploy.yml +0 -0
  31. {easylink-0.1.24 → easylink-0.1.25}/.github/workflows/update_readme.yml +0 -0
  32. {easylink-0.1.24 → easylink-0.1.25}/.gitignore +0 -0
  33. {easylink-0.1.24 → easylink-0.1.25}/.readthedocs.yml +0 -0
  34. {easylink-0.1.24 → easylink-0.1.25}/CONTRIBUTING.rst +0 -0
  35. {easylink-0.1.24 → easylink-0.1.25}/Jenkinsfile +0 -0
  36. {easylink-0.1.24 → easylink-0.1.25}/LICENSE +0 -0
  37. {easylink-0.1.24 → easylink-0.1.25}/Makefile +0 -0
  38. {easylink-0.1.24 → easylink-0.1.25}/docs/Makefile +0 -0
  39. {easylink-0.1.24 → easylink-0.1.25}/docs/nitpick-exceptions +0 -0
  40. {easylink-0.1.24 → easylink-0.1.25}/docs/source/_static/style.css +0 -0
  41. {easylink-0.1.24 → easylink-0.1.25}/docs/source/_templates/layout.html +0 -0
  42. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/cli.rst +0 -0
  43. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/configuration.rst +0 -0
  44. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/graph_components.rst +0 -0
  45. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/implementation.rst +0 -0
  46. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/index.rst +0 -0
  47. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/pipeline.rst +0 -0
  48. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/pipeline_graph.rst +0 -0
  49. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/pipeline_schema.rst +0 -0
  50. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/pipeline_schema_constants/development.rst +0 -0
  51. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/pipeline_schema_constants/index.rst +0 -0
  52. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/pipeline_schema_constants/testing.rst +0 -0
  53. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/rule.rst +0 -0
  54. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/runner.rst +0 -0
  55. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/step.rst +0 -0
  56. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/utilities/aggregator_utils.rst +0 -0
  57. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/utilities/data_utils.rst +0 -0
  58. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/utilities/general_utils.rst +0 -0
  59. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/utilities/index.rst +0 -0
  60. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/utilities/paths.rst +0 -0
  61. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/utilities/splitter_utils.rst +0 -0
  62. {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/utilities/validation_utils.rst +0 -0
  63. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/index.rst +0 -0
  64. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/01_step.drawio.png +0 -0
  65. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/02_default_implementation.drawio.png +0 -0
  66. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/03_slots.drawio.png +0 -0
  67. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/04_data_dependency.drawio.png +0 -0
  68. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/05_pipeline_schema.drawio.png +0 -0
  69. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/06_default_input.drawio.png +0 -0
  70. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/07_cloneable_section.drawio.png +0 -0
  71. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/08_cloneable_section_expanded.drawio.png +0 -0
  72. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/09_loopable_section.drawio.png +0 -0
  73. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/10_loopable_section_expanded.drawio.png +0 -0
  74. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/11_cloneable_section_splitter.drawio.png +0 -0
  75. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/12_cloneable_section_splitter_expanded.drawio.png +0 -0
  76. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/13_autoparallel_section.drawio.png +0 -0
  77. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/14_choice_section.drawio.png +0 -0
  78. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/15_choice_section_expanded.drawio.png +0 -0
  79. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/16_step_hierarchy.drawio.png +0 -0
  80. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/18_schema_to_pipeline.drawio.png +0 -0
  81. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/19_schema_to_pipeline_combined.drawio.png +0 -0
  82. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/clustering_sub_steps.drawio.png +0 -0
  83. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/easylink_pipeline_schema.drawio.png +0 -0
  84. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/entity_resolution_sub_steps.drawio.png +0 -0
  85. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/linking_sub_steps.drawio.png +0 -0
  86. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/index.rst +0 -0
  87. {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/workarounds.rst +0 -0
  88. {easylink-0.1.24 → easylink-0.1.25}/docs/source/conf.py +0 -0
  89. {easylink-0.1.24 → easylink-0.1.25}/docs/source/glossary.rst +0 -0
  90. {easylink-0.1.24 → easylink-0.1.25}/docs/source/index.rst +0 -0
  91. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/cli.rst +0 -0
  92. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/index.rst +0 -0
  93. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/2020/input_file_ssa.parquet +0 -0
  94. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/2020/input_file_w2.parquet +0 -0
  95. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/2030/input_file_ssa.parquet +0 -0
  96. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/2030/input_file_w2.parquet +0 -0
  97. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/DAG-common-pipeline.svg +0 -0
  98. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/DAG-e2e-pipeline-expanded.svg +0 -0
  99. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/DAG-e2e-pipeline.svg +0 -0
  100. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/DAG-r-pyspark.svg +0 -0
  101. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/create_inputs_demo.ipynb +0 -0
  102. {easylink-0.1.24/tests/specifications/common → easylink-0.1.25/docs/source/user_guide/tutorials}/environment_local.yaml +0 -0
  103. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/environment_slurm.yaml +0 -0
  104. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/getting_started.rst +0 -0
  105. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/impl-config-pipeline.yaml +0 -0
  106. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/index.rst +0 -0
  107. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/input_data.yaml +0 -0
  108. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/input_data_demo_2030.yaml +0 -0
  109. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/input_file_1.parquet +0 -0
  110. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/input_file_2.parquet +0 -0
  111. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/input_file_3.parquet +0 -0
  112. {easylink-0.1.24/src/easylink/steps/rl-dummy/input_data → easylink-0.1.25/docs/source/user_guide/tutorials}/known_clusters.parquet +0 -0
  113. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/print_fp_fn_w2_ssa.py +0 -0
  114. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/print_metrics_w2_ssa.py +0 -0
  115. {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/r_spark_pipeline.yaml +0 -0
  116. {easylink-0.1.24 → easylink-0.1.25}/pyproject.toml +0 -0
  117. {easylink-0.1.24 → easylink-0.1.25}/python_versions.json +0 -0
  118. {easylink-0.1.24 → easylink-0.1.25}/pytype.cfg +0 -0
  119. {easylink-0.1.24 → easylink-0.1.25}/setup.cfg +0 -0
  120. {easylink-0.1.24 → easylink-0.1.25}/setup.py +0 -0
  121. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/__about__.py +0 -0
  122. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/__init__.py +0 -0
  123. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/configuration.py +0 -0
  124. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/devtools/implementation_creator.py +0 -0
  125. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/graph_components.py +0 -0
  126. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/implementation.py +0 -0
  127. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/pipeline.py +0 -0
  128. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/pipeline_graph.py +0 -0
  129. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/pipeline_schema.py +0 -0
  130. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/pipeline_schema_constants/__init__.py +0 -0
  131. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/pipeline_schema_constants/development.py +0 -0
  132. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/pipeline_schema_constants/main.py +0 -0
  133. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/pipeline_schema_constants/testing.py +0 -0
  134. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/rule.py +0 -0
  135. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/step.py +0 -0
  136. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/cascading/exclude_clustered.def +0 -0
  137. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/cascading/exclude_clustered.py +0 -0
  138. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/cascading/exclude_none.def +0 -0
  139. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/cascading/exclude_none.py +0 -0
  140. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/cascading/update_clusters_by_connected_components.def +0 -0
  141. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/cascading/update_clusters_by_connected_components.py +0 -0
  142. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_clusters_to_links.def +0 -0
  143. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_clusters_to_links.py +0 -0
  144. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_determining_exclusions.def +0 -0
  145. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_determining_exclusions.py +0 -0
  146. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_removing_records.def +0 -0
  147. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_removing_records.py +0 -0
  148. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_schema_alignment.def +0 -0
  149. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_schema_alignment.py +0 -0
  150. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_updating_clusters.def +0 -0
  151. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_updating_clusters.py +0 -0
  152. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/README.md +0 -0
  153. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/build-containers-local.sh +0 -0
  154. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/build-containers-remote.sh +0 -0
  155. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/input_data/create_input_files.ipynb +0 -0
  156. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/input_data/input_file_1.csv +0 -0
  157. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/input_data/input_file_1.parquet +0 -0
  158. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/input_data/input_file_2.csv +0 -0
  159. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/input_data/input_file_2.parquet +0 -0
  160. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/python_pandas/README.md +0 -0
  161. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/python_pandas/dummy_step.py +0 -0
  162. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/python_pandas/python_pandas.def +0 -0
  163. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/python_pyspark/README.md +0 -0
  164. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/python_pyspark/dummy_step.py +0 -0
  165. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/python_pyspark/python_pyspark.def +0 -0
  166. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/r/README.md +0 -0
  167. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/r/dummy_step.R +0 -0
  168. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/r/r-image.def +0 -0
  169. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/test.py +0 -0
  170. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/example/middle_name_to_initial.def +0 -0
  171. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/example/middle_name_to_initial.py +0 -0
  172. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/fastLink/fastLink_evaluating_pairs.R +0 -0
  173. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/fastLink/fastLink_evaluating_pairs.def +0 -0
  174. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/fastLink/fastLink_links_to_clusters.R +0 -0
  175. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/fastLink/fastLink_links_to_clusters.def +0 -0
  176. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/output_dir/dummy_step_1_for_output_dir_example.def +0 -0
  177. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/output_dir/dummy_step_1_for_output_dir_example.py +0 -0
  178. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/output_dir/dummy_step_2_for_output_dir_example.def +0 -0
  179. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/output_dir/dummy_step_2_for_output_dir_example.py +0 -0
  180. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/rl-dummy/canonicalizing_and_downstream_analysis/dummy_canonicalizing_and_downstream_analysis.def +0 -0
  181. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/rl-dummy/canonicalizing_and_downstream_analysis/dummy_canonicalizing_and_downstream_analysis.py +0 -0
  182. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/rl-dummy/input_data/create_input_files.ipynb +0 -0
  183. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/rl-dummy/input_data/input_file_1.parquet +0 -0
  184. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/rl-dummy/input_data/input_file_2.parquet +0 -0
  185. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/rl-dummy/pre-processing/dummy_pre-processing.def +0 -0
  186. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/rl-dummy/pre-processing/dummy_pre-processing.py +0 -0
  187. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/splink/splink_blocking_and_filtering.def +0 -0
  188. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/splink/splink_blocking_and_filtering.py +0 -0
  189. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/splink/splink_evaluating_pairs.def +0 -0
  190. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/splink/splink_links_to_clusters.def +0 -0
  191. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/splink/splink_links_to_clusters.py +0 -0
  192. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/utilities/__init__.py +0 -0
  193. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/utilities/aggregator_utils.py +0 -0
  194. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/utilities/data_utils.py +0 -0
  195. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/utilities/paths.py +0 -0
  196. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/utilities/spark.smk +0 -0
  197. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/utilities/splitter_utils.py +0 -0
  198. {easylink-0.1.24 → easylink-0.1.25}/src/easylink/utilities/validation_utils.py +0 -0
  199. {easylink-0.1.24 → easylink-0.1.25}/src/easylink.egg-info/dependency_links.txt +0 -0
  200. {easylink-0.1.24 → easylink-0.1.25}/src/easylink.egg-info/entry_points.txt +0 -0
  201. {easylink-0.1.24 → easylink-0.1.25}/src/easylink.egg-info/not-zip-safe +0 -0
  202. {easylink-0.1.24 → easylink-0.1.25}/src/easylink.egg-info/requires.txt +0 -0
  203. {easylink-0.1.24 → easylink-0.1.25}/src/easylink.egg-info/top_level.txt +0 -0
  204. {easylink-0.1.24 → easylink-0.1.25}/tests/__init__.py +0 -0
  205. {easylink-0.1.24 → easylink-0.1.25}/tests/conftest.py +0 -0
  206. {easylink-0.1.24 → easylink-0.1.25}/tests/e2e/pipeline_improved_results.csv +0 -0
  207. {easylink-0.1.24 → easylink-0.1.25}/tests/e2e/pipeline_improved_results_2030.csv +0 -0
  208. {easylink-0.1.24 → easylink-0.1.25}/tests/e2e/pipeline_naive_results.csv +0 -0
  209. {easylink-0.1.24 → easylink-0.1.25}/tests/e2e/pipeline_splink_dummy_results.csv +0 -0
  210. {easylink-0.1.24 → easylink-0.1.25}/tests/e2e/test_easylink_cli.py +0 -0
  211. {easylink-0.1.24 → easylink-0.1.25}/tests/e2e/test_pipelines_main_schema.py +0 -0
  212. {easylink-0.1.24 → easylink-0.1.25}/tests/e2e/test_step_types.py +0 -0
  213. {easylink-0.1.24 → easylink-0.1.25}/tests/integration/test_compositions.py +0 -0
  214. {easylink-0.1.24 → easylink-0.1.25}/tests/integration/test_data_utils.py +0 -0
  215. {easylink-0.1.24 → easylink-0.1.25}/tests/integration/test_snakemake.py +0 -0
  216. {easylink-0.1.24 → easylink-0.1.25}/tests/integration/test_snakemake_slurm.py +0 -0
  217. {easylink-0.1.24 → easylink-0.1.25}/tests/integration/test_snakemake_spark.py +0 -0
  218. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/common/input_data.yaml +0 -0
  219. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/common/input_data_one_file.yaml +0 -0
  220. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/common/pipeline.yaml +0 -0
  221. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/e2e/environment_slurm.yaml +0 -0
  222. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/e2e/environment_slurm_4GB.yaml +0 -0
  223. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/e2e/input_data_dummy.yaml +0 -0
  224. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/e2e/pipeline.yaml +0 -0
  225. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/e2e/pipeline_expanded.yaml +0 -0
  226. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/integration/auto_parallel/pipeline_cloneable_step.yaml +0 -0
  227. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/integration/auto_parallel/pipeline_hierarchical_step.yaml +0 -0
  228. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/integration/auto_parallel/pipeline_loop_step.yaml +0 -0
  229. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/integration/environment_spark_slurm.yaml +0 -0
  230. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/integration/pipeline.yaml +0 -0
  231. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/integration/pipeline_output_dir.yaml +0 -0
  232. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/integration/pipeline_output_dir_default.yaml +0 -0
  233. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/integration/pipeline_spark.yaml +0 -0
  234. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/environment_minimum.yaml +0 -0
  235. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/environment_spark_slurm.yaml +0 -0
  236. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline.yaml +0 -0
  237. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_bad_combined_implementations.yaml +0 -0
  238. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_bad_implementation.yaml +0 -0
  239. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_bad_loop_formatting.yaml +0 -0
  240. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_bad_step.yaml +0 -0
  241. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_bad_type_key.yaml +0 -0
  242. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_combine_bad_implementation_names.yaml +0 -0
  243. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_combine_bad_topology.yaml +0 -0
  244. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_combine_two_steps.yaml +0 -0
  245. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_combine_with_extra_node.yaml +0 -0
  246. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_combine_with_iteration.yaml +0 -0
  247. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_combine_with_iteration_cycle.yaml +0 -0
  248. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_combine_with_missing_node.yaml +0 -0
  249. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_combine_with_parallel.yaml +0 -0
  250. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_default_implementations.yaml +0 -0
  251. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_missing_implementation_name.yaml +0 -0
  252. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_missing_implementations.yaml +0 -0
  253. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_missing_loop_nodes.yaml +0 -0
  254. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_missing_step.yaml +0 -0
  255. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_missing_substeps.yaml +0 -0
  256. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_missing_type_key.yaml +0 -0
  257. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_nested_templated_steps.yaml +0 -0
  258. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_out_of_order.yaml +0 -0
  259. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_spark.yaml +0 -0
  260. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_type_config_mismatch.yaml +0 -0
  261. {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_wrong_clone_keys.yaml +0 -0
  262. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/__init__.py +0 -0
  263. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/conftest.py +0 -0
  264. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/recipe_strings/python_pandas.txt +0 -0
  265. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/aggregation_rule.txt +0 -0
  266. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/auto_parallel_rule.txt +0 -0
  267. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/checkpoint_rule.txt +0 -0
  268. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/implemented_rule_local.txt +0 -0
  269. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/implemented_rule_slurm.txt +0 -0
  270. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/pipeline_local.txt +0 -0
  271. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/pipeline_slurm.txt +0 -0
  272. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/target_rule.txt +0 -0
  273. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/validation_rule.txt +0 -0
  274. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_cli.py +0 -0
  275. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_config.py +0 -0
  276. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_data_utils.py +0 -0
  277. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_general_utils.py +0 -0
  278. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_graph_components.py +0 -0
  279. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_implementation.py +0 -0
  280. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_implementation_creator.py +0 -0
  281. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_pipeline.py +0 -0
  282. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_pipeline_graph.py +0 -0
  283. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_pipeline_schema.py +0 -0
  284. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_rule.py +0 -0
  285. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_step.py +0 -0
  286. {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_validations.py +0 -0
  287. {easylink-0.1.24 → easylink-0.1.25}/update_readme.py +0 -0
@@ -1,3 +1,8 @@
1
+ **0.1.25 - 6/30/25**
2
+
3
+ - Release new images
4
+ - Clean up stdout logging
5
+
1
6
  **0.1.24 - 6/26/25**
2
7
 
3
8
  - Properly escape special characters in envars specified via pipeline configuration
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: easylink
3
- Version: 0.1.24
3
+ Version: 0.1.25
4
4
  Summary: Research repository for the EasyLink ER ecosystem project.
5
5
  Home-page: https://github.com/ihmeuw/easylink
6
6
  Author: The EasyLink developers
@@ -78,34 +78,50 @@ Installation
78
78
 
79
79
  .. _installation:
80
80
 
81
+ **NOTE: This package requires AMD64 CPU architecture - it is not compatible with
82
+ Apple's ARM64 architecture (e.g. M1 and newer Macs).**
83
+
81
84
  There are a few things to install in order to use this package:
82
85
 
83
- - Install singularity.
86
+ - Set up Linux.
87
+
88
+ Singularity (and thus EasyLink) requires Linux to run. If you are not already
89
+ using Linux, you will need to set up a virtual machine; refer to the
90
+ `Singularity documentation for installing on Windows or Mac <https://docs.sylabs.io/guides/4.1/admin-guide/installation.html#installation-on-windows-or-mac>`_.
84
91
 
85
- You may need to request it from your system admin.
86
- Refer to https://docs.sylabs.io/guides/4.1/admin-guide/installation.html.
87
- You can check if you already have singularity installed by running the command
88
- ``singularity --version``. For an existing installation, your singularity version
92
+ - Install Singularity.
93
+
94
+ First check if you already have Singularity installed by running the command
95
+ ``singularity --version``. For an existing installation, your Singularity version
89
96
  number is printed.
90
97
 
98
+ If Singularity is not yet installed, you will need to install it;
99
+ refer to the `Singularity docs for installing on Linux <https://docs.sylabs.io/guides/4.1/admin-guide/installation.html#installation-on-linux>`_.
100
+
101
+ Note that this requires administrator privileges; you may need to request installation
102
+ from your system admin if you are working in a shared computing environment.
103
+
91
104
  - Install conda.
92
105
 
93
106
  We recommend `miniforge <https://github.com/conda-forge/miniforge>`_. You can
94
107
  check if you already have conda installed by running the command ``conda --version``.
95
108
  For an existing installation, a version will be displayed.
96
109
 
97
- - Install easylink, python and graphviz in a conda environment.
110
+ - Create a conda environment with python and graphviz installed.
111
+
112
+ ::
113
+
114
+ $ conda create --name easylink -c conda-forge python=3.12 graphviz 'gcc<14' -y
115
+ $ conda activate easylink
116
+
117
+ - Install easylink in the environment.
98
118
 
99
119
  Option 1 - Install from PyPI with pip::
100
120
 
101
- $ conda create --name easylink -c conda-forge python=3.12 graphviz 'gcc<14' -y
102
- $ conda activate easylink
103
121
  $ pip install easylink
104
122
 
105
123
  Option 2 - Build from source with pip::
106
124
 
107
- $ conda create --name easylink -c conda-forge python=3.12 graphviz 'gcc<14' -y
108
- $ conda activate easylink
109
125
  $ pip install git+https://github.com/ihmeuw/easylink.git
110
126
 
111
127
  .. _end_installation:
@@ -0,0 +1,70 @@
1
+ ========
2
+ EasyLink
3
+ ========
4
+
5
+ EasyLink is a framework that allows users to build and run highly configurable
6
+ entity resolution (ER) pipelines.
7
+
8
+ .. _python_support:
9
+
10
+ **Supported Python versions: 3.11, 3.12**
11
+
12
+ .. _end_python_support:
13
+
14
+ Installation
15
+ ============
16
+
17
+ .. _installation:
18
+
19
+ **NOTE: This package requires AMD64 CPU architecture - it is not compatible with
20
+ Apple's ARM64 architecture (e.g. M1 and newer Macs).**
21
+
22
+ There are a few things to install in order to use this package:
23
+
24
+ - Set up Linux.
25
+
26
+ Singularity (and thus EasyLink) requires Linux to run. If you are not already
27
+ using Linux, you will need to set up a virtual machine; refer to the
28
+ `Singularity documentation for installing on Windows or Mac <https://docs.sylabs.io/guides/4.1/admin-guide/installation.html#installation-on-windows-or-mac>`_.
29
+
30
+ - Install Singularity.
31
+
32
+ First check if you already have Singularity installed by running the command
33
+ ``singularity --version``. For an existing installation, your Singularity version
34
+ number is printed.
35
+
36
+ If Singularity is not yet installed, you will need to install it;
37
+ refer to the `Singularity docs for installing on Linux <https://docs.sylabs.io/guides/4.1/admin-guide/installation.html#installation-on-linux>`_.
38
+
39
+ Note that this requires administrator privileges; you may need to request installation
40
+ from your system admin if you are working in a shared computing environment.
41
+
42
+ - Install conda.
43
+
44
+ We recommend `miniforge <https://github.com/conda-forge/miniforge>`_. You can
45
+ check if you already have conda installed by running the command ``conda --version``.
46
+ For an existing installation, a version will be displayed.
47
+
48
+ - Create a conda environment with python and graphviz installed.
49
+
50
+ ::
51
+
52
+ $ conda create --name easylink -c conda-forge python=3.12 graphviz 'gcc<14' -y
53
+ $ conda activate easylink
54
+
55
+ - Install easylink in the environment.
56
+
57
+ Option 1 - Install from PyPI with pip::
58
+
59
+ $ pip install easylink
60
+
61
+ Option 2 - Build from source with pip::
62
+
63
+ $ pip install git+https://github.com/ihmeuw/easylink.git
64
+
65
+ .. _end_installation:
66
+
67
+ Documentation
68
+ =============
69
+
70
+ You can view documentation at https://easylink.readthedocs.io/en/latest/
@@ -0,0 +1,3 @@
1
+ input_file_ssa: 2020/input_file_ssa.parquet
2
+ input_file_w2: 2020/input_file_w2.parquet
3
+ known_clusters: known_clusters.parquet
@@ -37,7 +37,7 @@ steps:
37
37
  configuration:
38
38
  INPUT_DATASET: input_file_ssa
39
39
  - implementation:
40
- name: dummy_pre-processing
40
+ name: no_pre-processing
41
41
  configuration:
42
42
  INPUT_DATASET: input_file_w2
43
43
  schema_alignment:
@@ -47,17 +47,16 @@ steps:
47
47
  implementation:
48
48
  name: splink_blocking_and_filtering
49
49
  configuration:
50
- BLOCKING_RULES: "l.first_name == r.first_name,l.last_name == r.last_name"
51
50
  LINK_ONLY: true
51
+ BLOCKING_RULES: "l.first_name == r.first_name,l.last_name == r.last_name"
52
52
  evaluating_pairs:
53
53
  implementation:
54
54
  name: splink_evaluating_pairs
55
55
  configuration:
56
+ LINK_ONLY: true
56
57
  BLOCKING_RULES_FOR_TRAINING: "l.first_name == r.first_name,l.last_name == r.last_name"
57
58
  COMPARISONS: "ssn:levenshtein,first_name:name,middle_initial:exact,last_name:name"
58
59
  PROBABILITY_TWO_RANDOM_RECORDS_MATCH: 0.0001 # == 1 / len(w2)
59
- THRESHOLD_MATCH_PROBABILITY: 0
60
- LINK_ONLY: true
61
60
  links_to_clusters:
62
61
  implementation:
63
62
  name: splink_links_to_clusters
@@ -68,4 +67,4 @@ steps:
68
67
  name: default_updating_clusters
69
68
  canonicalizing_and_downstream_analysis:
70
69
  implementation:
71
- name: dummy_canonicalizing_and_downstream_analysis
70
+ name: save_clusters
@@ -0,0 +1,70 @@
1
+ steps:
2
+ entity_resolution:
3
+ substeps:
4
+ determining_exclusions_and_removing_records:
5
+ clones:
6
+ - determining_exclusions:
7
+ implementation:
8
+ name: default_determining_exclusions
9
+ configuration:
10
+ INPUT_DATASET: input_file_ssa
11
+ removing_records:
12
+ implementation:
13
+ name: default_removing_records
14
+ configuration:
15
+ INPUT_DATASET: input_file_ssa
16
+ - determining_exclusions:
17
+ implementation:
18
+ name: default_determining_exclusions
19
+ configuration:
20
+ INPUT_DATASET: input_file_w2
21
+ removing_records:
22
+ implementation:
23
+ name: default_removing_records
24
+ configuration:
25
+ INPUT_DATASET: input_file_w2
26
+ clustering:
27
+ substeps:
28
+ clusters_to_links:
29
+ implementation:
30
+ name: default_clusters_to_links
31
+ linking:
32
+ substeps:
33
+ pre-processing:
34
+ clones:
35
+ - implementation:
36
+ name: middle_name_to_initial
37
+ configuration:
38
+ INPUT_DATASET: input_file_ssa
39
+ - implementation:
40
+ name: no_pre-processing
41
+ configuration:
42
+ INPUT_DATASET: input_file_w2
43
+ schema_alignment:
44
+ implementation:
45
+ name: default_schema_alignment
46
+ blocking_and_filtering:
47
+ implementation:
48
+ name: splink_blocking_and_filtering
49
+ configuration:
50
+ LINK_ONLY: true
51
+ BLOCKING_RULES: "l.first_name == r.first_name,l.last_name == r.last_name"
52
+ evaluating_pairs:
53
+ implementation:
54
+ name: splink_evaluating_pairs
55
+ configuration:
56
+ LINK_ONLY: true
57
+ BLOCKING_RULES_FOR_TRAINING: "l.first_name == r.first_name,l.last_name == r.last_name"
58
+ COMPARISONS: "ssn:exact,first_name:exact,middle_initial:exact,last_name:exact"
59
+ PROBABILITY_TWO_RANDOM_RECORDS_MATCH: 0.0001 # == 1 / len(w2)
60
+ links_to_clusters:
61
+ implementation:
62
+ name: splink_links_to_clusters
63
+ configuration:
64
+ THRESHOLD_MATCH_PROBABILITY: 0.996
65
+ updating_clusters:
66
+ implementation:
67
+ name: default_updating_clusters
68
+ canonicalizing_and_downstream_analysis:
69
+ implementation:
70
+ name: save_clusters
@@ -0,0 +1 @@
1
+ __version__ = "0.1.25"
@@ -201,16 +201,20 @@ def run(
201
201
  main = handle_exceptions(
202
202
  func=runner.main, exceptions_logger=logger, with_debugger=with_debugger
203
203
  )
204
- main(
205
- command="run",
206
- pipeline_specification=pipeline_specification,
207
- input_data=input_data,
208
- computing_environment=computing_environment,
209
- results_dir=results_dir,
210
- images_dir=images,
211
- schema_name=schema,
212
- )
213
- logger.info("*** FINISHED ***")
204
+ try:
205
+ main(
206
+ command="run",
207
+ pipeline_specification=pipeline_specification,
208
+ input_data=input_data,
209
+ computing_environment=computing_environment,
210
+ results_dir=results_dir,
211
+ images_dir=images,
212
+ schema_name=schema,
213
+ )
214
+ except SystemExit:
215
+ # Snakemake uses SystemExit for completion - log success and re-raise
216
+ logger.info("*** FINISHED ***")
217
+ raise
214
218
 
215
219
 
216
220
  @easylink.command()
@@ -2,7 +2,7 @@ step_1_python_pandas:
2
2
  steps:
3
3
  - step_1
4
4
  image_name: python_pandas.sif
5
- zenodo_record_id: 15733426
5
+ zenodo_record_id: 15757317
6
6
  md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
7
7
  script_cmd: python /dummy_step.py
8
8
  outputs:
@@ -11,7 +11,7 @@ step_1a_python_pandas:
11
11
  steps:
12
12
  - step_1a
13
13
  image_name: python_pandas.sif
14
- zenodo_record_id: 15733426
14
+ zenodo_record_id: 15757317
15
15
  md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
16
16
  script_cmd: python /dummy_step.py
17
17
  env:
@@ -22,7 +22,7 @@ step_1b_python_pandas:
22
22
  steps:
23
23
  - step_1b
24
24
  image_name: python_pandas.sif
25
- zenodo_record_id: 15733426
25
+ zenodo_record_id: 15757317
26
26
  md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
27
27
  script_cmd: python /dummy_step.py
28
28
  env:
@@ -33,7 +33,7 @@ step_2_python_pandas:
33
33
  steps:
34
34
  - step_2
35
35
  image_name: python_pandas.sif
36
- zenodo_record_id: 15733426
36
+ zenodo_record_id: 15757317
37
37
  md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
38
38
  script_cmd: python /dummy_step.py
39
39
  outputs:
@@ -42,7 +42,7 @@ step_3_python_pandas:
42
42
  steps:
43
43
  - step_3
44
44
  image_name: python_pandas.sif
45
- zenodo_record_id: 15733426
45
+ zenodo_record_id: 15757317
46
46
  md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
47
47
  script_cmd: python /dummy_step.py
48
48
  outputs:
@@ -51,7 +51,7 @@ step_4_python_pandas:
51
51
  steps:
52
52
  - step_4
53
53
  image_name: python_pandas.sif
54
- zenodo_record_id: 15733426
54
+ zenodo_record_id: 15757317
55
55
  md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
56
56
  script_cmd: python /dummy_step.py
57
57
  env:
@@ -62,7 +62,7 @@ step_5_python_pandas:
62
62
  steps:
63
63
  - step_5
64
64
  image_name: python_pandas.sif
65
- zenodo_record_id: 15733426
65
+ zenodo_record_id: 15757317
66
66
  md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
67
67
  script_cmd: python /dummy_step.py
68
68
  env:
@@ -73,7 +73,7 @@ step_6_python_pandas:
73
73
  steps:
74
74
  - step_6
75
75
  image_name: python_pandas.sif
76
- zenodo_record_id: 15733426
76
+ zenodo_record_id: 15757317
77
77
  md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
78
78
  script_cmd: python /dummy_step.py
79
79
  env:
@@ -84,7 +84,7 @@ step_4a_python_pandas:
84
84
  steps:
85
85
  - step_4a
86
86
  image_name: python_pandas.sif
87
- zenodo_record_id: 15733426
87
+ zenodo_record_id: 15757317
88
88
  md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
89
89
  script_cmd: python /dummy_step.py
90
90
  env:
@@ -95,7 +95,7 @@ step_4b_python_pandas:
95
95
  steps:
96
96
  - step_4b
97
97
  image_name: python_pandas.sif
98
- zenodo_record_id: 15733426
98
+ zenodo_record_id: 15757317
99
99
  md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
100
100
  script_cmd: python /dummy_step.py
101
101
  env:
@@ -106,7 +106,7 @@ step_4b_r:
106
106
  steps:
107
107
  - step_4b
108
108
  image_name: r-image.sif
109
- zenodo_record_id: 15733426
109
+ zenodo_record_id: 15757317
110
110
  md5_checksum: a4a03b836694a2b81a1bd2852736ccc5
111
111
  script_cmd: Rscript /dummy_step.R
112
112
  env:
@@ -117,7 +117,7 @@ step_1_python_pyspark:
117
117
  steps:
118
118
  - step_1
119
119
  image_name: python_pyspark.sif
120
- zenodo_record_id: 15733426
120
+ zenodo_record_id: 15757317
121
121
  md5_checksum: c948577ab0607411dd4b640622d9ec3a
122
122
  script_cmd: python3 /code/dummy_step.py
123
123
  outputs:
@@ -127,7 +127,7 @@ step_2_python_pyspark:
127
127
  steps:
128
128
  - step_2
129
129
  image_name: python_pyspark.sif
130
- zenodo_record_id: 15733426
130
+ zenodo_record_id: 15757317
131
131
  md5_checksum: c948577ab0607411dd4b640622d9ec3a
132
132
  script_cmd: python3 /code/dummy_step.py
133
133
  outputs:
@@ -137,7 +137,7 @@ step_3_python_pyspark:
137
137
  steps:
138
138
  - step_3
139
139
  image_name: python_pyspark.sif
140
- zenodo_record_id: 15733426
140
+ zenodo_record_id: 15757317
141
141
  md5_checksum: c948577ab0607411dd4b640622d9ec3a
142
142
  script_cmd: python3 /code/dummy_step.py
143
143
  outputs:
@@ -147,7 +147,7 @@ step_4_python_pyspark:
147
147
  steps:
148
148
  - step_4
149
149
  image_name: python_pyspark.sif
150
- zenodo_record_id: 15733426
150
+ zenodo_record_id: 15757317
151
151
  md5_checksum: c948577ab0607411dd4b640622d9ec3a
152
152
  script_cmd: python3 /code/dummy_step.py
153
153
  env:
@@ -158,7 +158,7 @@ step_1_r:
158
158
  steps:
159
159
  - step_1
160
160
  image_name: r-image.sif
161
- zenodo_record_id: 15733426
161
+ zenodo_record_id: 15757317
162
162
  md5_checksum: a4a03b836694a2b81a1bd2852736ccc5
163
163
  script_cmd: Rscript /dummy_step.R
164
164
  outputs:
@@ -168,7 +168,7 @@ step_2_r:
168
168
  steps:
169
169
  - step_2
170
170
  image_name: r-image.sif
171
- zenodo_record_id: 15733426
171
+ zenodo_record_id: 15757317
172
172
  md5_checksum: a4a03b836694a2b81a1bd2852736ccc5
173
173
  script_cmd: Rscript /dummy_step.R
174
174
  outputs:
@@ -178,7 +178,7 @@ step_3_r:
178
178
  steps:
179
179
  - step_3
180
180
  image_name: r-image.sif
181
- zenodo_record_id: 15733426
181
+ zenodo_record_id: 15757317
182
182
  md5_checksum: a4a03b836694a2b81a1bd2852736ccc5
183
183
  script_cmd: Rscript /dummy_step.R
184
184
  outputs:
@@ -188,7 +188,7 @@ step_4_r:
188
188
  steps:
189
189
  - step_4
190
190
  image_name: r-image.sif
191
- zenodo_record_id: 15733426
191
+ zenodo_record_id: 15757317
192
192
  md5_checksum: a4a03b836694a2b81a1bd2852736ccc5
193
193
  script_cmd: Rscript /dummy_step.R
194
194
  env:
@@ -201,7 +201,7 @@ step_1_and_step_2_combined_python_pandas:
201
201
  - step_1
202
202
  - step_2
203
203
  image_name: python_pandas.sif
204
- zenodo_record_id: 15733426
204
+ zenodo_record_id: 15757317
205
205
  md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
206
206
  script_cmd: python /dummy_step.py
207
207
  outputs:
@@ -211,7 +211,7 @@ step_1_and_step_2_parallel_python_pandas:
211
211
  - step_1
212
212
  - step_2
213
213
  image_name: python_pandas.sif
214
- zenodo_record_id: 15733426
214
+ zenodo_record_id: 15757317
215
215
  md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
216
216
  script_cmd: python /dummy_step.py
217
217
  env:
@@ -223,7 +223,7 @@ step_3_and_step_4_combined_python_pandas:
223
223
  - step_3
224
224
  - step_4
225
225
  image_name: python_pandas.sif
226
- zenodo_record_id: 15733426
226
+ zenodo_record_id: 15757317
227
227
  md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
228
228
  script_cmd: python /dummy_step.py
229
229
  outputs:
@@ -233,7 +233,7 @@ step_1a_and_step_1b_combined_python_pandas:
233
233
  - step_1a
234
234
  - step_1b
235
235
  image_name: python_pandas.sif
236
- zenodo_record_id: 15733426
236
+ zenodo_record_id: 15757317
237
237
  md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
238
238
  script_cmd: python /dummy_step.py
239
239
  outputs:
@@ -241,131 +241,157 @@ step_1a_and_step_1b_combined_python_pandas:
241
241
  dummy_step_1_for_output_dir_example:
242
242
  steps:
243
243
  - step_1_for_output_dir_example
244
- image_name: main/dummy_step_1_for_output_dir_example.sif
244
+ image_name: dummy_step_1_for_output_dir_example.sif
245
245
  script_cmd: python /dummy_step_1_for_output_dir_example.py
246
246
  outputs:
247
247
  step_1_main_output_directory: output_dir/
248
248
  dummy_step_1_for_output_dir_example_default:
249
249
  steps:
250
250
  - step_1_for_output_dir_example
251
- image_name: main/dummy_step_1_for_output_dir_example.sif
251
+ image_name: dummy_step_1_for_output_dir_example.sif
252
252
  script_cmd: python /dummy_step_1_for_output_dir_example.py
253
253
  dummy_step_2_for_output_dir_example:
254
254
  steps:
255
255
  - step_2_for_output_dir_example
256
- image_name: main/dummy_step_2_for_output_dir_example.sif
256
+ image_name: dummy_step_2_for_output_dir_example.sif
257
257
  script_cmd: python /dummy_step_2_for_output_dir_example.py
258
258
  outputs:
259
259
  step_2_main_output: result.parquet
260
260
  default_removing_records:
261
261
  steps:
262
262
  - removing_records
263
- image_name: main/default_removing_records.sif
263
+ image_name: default_removing_records.sif
264
+ zenodo_record_id: 15757317
265
+ md5_checksum: 85dba6fd73c9f8f504fddb6d5c30f2de
264
266
  script_cmd: python /default_removing_records.py
265
267
  outputs:
266
268
  dataset: dataset
267
269
  default_clusters_to_links:
268
270
  steps:
269
271
  - clusters_to_links
270
- image_name: main/default_clusters_to_links.sif
272
+ image_name: default_clusters_to_links.sif
273
+ zenodo_record_id: 15757317
274
+ md5_checksum: 0d00d1272bd8193f60727791097aa065
271
275
  script_cmd: python /default_clusters_to_links.py
272
276
  outputs:
273
277
  known_links: result.parquet
274
278
  default_determining_exclusions:
275
279
  steps:
276
280
  - determining_exclusions
277
- image_name: main/default_determining_exclusions.sif
281
+ image_name: default_determining_exclusions.sif
282
+ zenodo_record_id: 15757317
283
+ md5_checksum: e61cb32ad45b79ca9a2c36db4e76ef7e
278
284
  script_cmd: python /default_determining_exclusions.py
279
285
  outputs:
280
286
  ids_to_remove: result.parquet
281
287
  default_updating_clusters:
282
288
  steps:
283
289
  - updating_clusters
284
- image_name: main/default_updating_clusters.sif
290
+ image_name: default_updating_clusters.sif
291
+ zenodo_record_id: 15757317
292
+ md5_checksum: cc6bd29e099c2523347fa04545aa35c9
285
293
  script_cmd: python /default_updating_clusters.py
286
294
  outputs:
287
295
  clusters: clusters.parquet
288
- dummy_canonicalizing_and_downstream_analysis:
296
+ # NOTE: This was made from dummy_canonicalizing_and_downstream_analysis.py,
297
+ # if rebuilding change the name of that file to save_clusters.py
298
+ save_clusters:
289
299
  steps:
290
300
  - canonicalizing_and_downstream_analysis
291
- image_name: main/dummy_canonicalizing_and_downstream_analysis.sif
301
+ image_name: save_clusters.sif
302
+ zenodo_record_id: 15757317
303
+ md5_checksum: 384ab2be668cbadc45160a674f621022
292
304
  script_cmd: python /dummy_canonicalizing_and_downstream_analysis.py
293
305
  outputs:
294
306
  analysis_output: result.parquet
295
- dummy_pre-processing:
307
+ # NOTE: This was made from dummy_pre-processing.py,
308
+ # if rebuilding change the name of that file to no_pre-processing.py
309
+ no_pre-processing:
296
310
  steps:
297
311
  - pre-processing
298
- image_name: main/dummy_pre-processing.sif
312
+ image_name: no_pre-processing.sif
313
+ zenodo_record_id: 15757317
314
+ md5_checksum: 9a9c080cf145078152501cf96bf61f27
299
315
  script_cmd: python /dummy_pre-processing.py
300
316
  outputs:
301
317
  dataset: dataset
302
318
  default_schema_alignment:
303
319
  steps:
304
320
  - schema_alignment
305
- image_name: main/default_schema_alignment.sif
321
+ image_name: default_schema_alignment.sif
322
+ zenodo_record_id: 15757317
323
+ md5_checksum: 3166587f9cfec478b999a17074d628f7
306
324
  script_cmd: python /default_schema_alignment.py
307
325
  outputs:
308
326
  records: result.parquet
309
327
  splink_blocking_and_filtering:
310
328
  steps:
311
329
  - blocking_and_filtering
312
- image_name: main/splink_blocking_and_filtering.sif
330
+ image_name: splink_blocking_and_filtering.sif
331
+ zenodo_record_id: 15757317
332
+ md5_checksum: 8a365b90295ef6beaad2b7f80a03d768
313
333
  script_cmd: python /splink_blocking_and_filtering.py
314
334
  outputs:
315
335
  blocks: blocks
316
336
  splink_evaluating_pairs:
317
337
  steps:
318
338
  - evaluating_pairs
319
- image_name: main/splink_evaluating_pairs.sif
339
+ image_name: splink_evaluating_pairs.sif
340
+ zenodo_record_id: 15757317
341
+ md5_checksum: b57f4bd16b7a3aa5099569078ea4c064
320
342
  script_cmd: python /splink_evaluating_pairs.py
321
343
  outputs:
322
344
  links: result.parquet
323
345
  splink_links_to_clusters:
324
346
  steps:
325
347
  - links_to_clusters
326
- image_name: main/splink_links_to_clusters.sif
348
+ image_name: splink_links_to_clusters.sif
349
+ zenodo_record_id: 15757317
350
+ md5_checksum: 645937f7bab9c2557b7aacafaf4e4765
327
351
  script_cmd: python /splink_links_to_clusters.py
328
352
  outputs:
329
353
  clusters: result.parquet
330
354
  fastLink_evaluating_pairs:
331
355
  steps:
332
356
  - evaluating_pairs
333
- image_name: main/fastLink_evaluating_pairs.sif
357
+ image_name: fastLink_evaluating_pairs.sif
334
358
  script_cmd: Rscript /fastLink_evaluating_pairs.R
335
359
  outputs:
336
360
  links: result.parquet
337
361
  fastLink_links_to_clusters:
338
362
  steps:
339
363
  - links_to_clusters
340
- image_name: main/fastLink_links_to_clusters.sif
364
+ image_name: fastLink_links_to_clusters.sif
341
365
  script_cmd: Rscript /fastLink_links_to_clusters.R
342
366
  outputs:
343
367
  clusters: result.parquet
344
368
  exclude_clustered:
345
369
  steps:
346
370
  - determining_exclusions
347
- image_name: main/exclude_clustered.sif
371
+ image_name: exclude_clustered.sif
348
372
  script_cmd: python /exclude_clustered.py
349
373
  outputs:
350
374
  ids_to_remove: result.parquet
351
375
  exclude_none:
352
376
  steps:
353
377
  - determining_exclusions
354
- image_name: main/exclude_none.sif
378
+ image_name: exclude_none.sif
355
379
  script_cmd: python /exclude_none.py
356
380
  outputs:
357
381
  ids_to_remove: result.parquet
358
382
  update_clusters_by_connected_components:
359
383
  steps:
360
384
  - updating_clusters
361
- image_name: main/update_clusters_by_connected_components.sif
385
+ image_name: update_clusters_by_connected_components.sif
362
386
  script_cmd: python /update_clusters_by_connected_components.py
363
387
  outputs:
364
388
  clusters: result.parquet
365
389
  middle_name_to_initial:
366
390
  steps:
367
391
  - pre-processing
368
- image_name: main/middle_name_to_initial.sif
392
+ image_name: middle_name_to_initial.sif
393
+ zenodo_record_id: 15757317
394
+ md5_checksum: 89db9c3318300cda9d538cde08c3c323
369
395
  script_cmd: python /middle_name_to_initial.py
370
396
  outputs:
371
397
  dataset: dataset