easylink 0.1.24__tar.gz → 0.1.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {easylink-0.1.24 → easylink-0.1.25}/CHANGELOG.rst +5 -0
- {easylink-0.1.24 → easylink-0.1.25}/PKG-INFO +27 -11
- easylink-0.1.25/README.rst +70 -0
- easylink-0.1.25/docs/source/user_guide/tutorials/input_data_demo.yaml +3 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/pipeline_demo_improved.yaml +4 -5
- easylink-0.1.25/docs/source/user_guide/tutorials/pipeline_demo_naive.yaml +70 -0
- easylink-0.1.25/src/easylink/_version.py +1 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/cli.py +14 -10
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/implementation_metadata.yaml +70 -44
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/runner.py +118 -1
- easylink-0.1.25/src/easylink/steps/rl-dummy/input_data/known_clusters.parquet +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/splink/splink_evaluating_pairs.py +2 -1
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/utilities/general_utils.py +18 -8
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink.egg-info/PKG-INFO +27 -11
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink.egg-info/SOURCES.txt +2 -0
- easylink-0.1.25/tests/specifications/common/environment_local.yaml +2 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/e2e/pipeline_cascade.yaml +5 -5
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/e2e/pipeline_splink_dummy.yaml +3 -3
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/e2e/pipeline_with_fastLink.yaml +3 -3
- easylink-0.1.25/tests/unit/test_runner.py +116 -0
- easylink-0.1.24/README.rst +0 -54
- easylink-0.1.24/docs/source/user_guide/tutorials/input_data_demo.yaml +0 -3
- easylink-0.1.24/docs/source/user_guide/tutorials/pipeline_demo_naive.yaml +0 -71
- easylink-0.1.24/src/easylink/_version.py +0 -1
- easylink-0.1.24/tests/unit/test_runner.py +0 -50
- {easylink-0.1.24 → easylink-0.1.25}/.bandit +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/.flake8 +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/.github/CODEOWNERS +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/.github/pull_request_template.md +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/.github/workflows/deploy.yml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/.github/workflows/update_readme.yml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/.gitignore +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/.readthedocs.yml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/CONTRIBUTING.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/Jenkinsfile +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/LICENSE +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/Makefile +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/Makefile +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/nitpick-exceptions +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/_static/style.css +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/_templates/layout.html +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/cli.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/configuration.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/graph_components.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/implementation.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/index.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/pipeline.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/pipeline_graph.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/pipeline_schema.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/pipeline_schema_constants/development.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/pipeline_schema_constants/index.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/pipeline_schema_constants/testing.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/rule.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/runner.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/step.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/utilities/aggregator_utils.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/utilities/data_utils.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/utilities/general_utils.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/utilities/index.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/utilities/paths.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/utilities/splitter_utils.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/api_reference/utilities/validation_utils.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/index.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/01_step.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/02_default_implementation.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/03_slots.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/04_data_dependency.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/05_pipeline_schema.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/06_default_input.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/07_cloneable_section.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/08_cloneable_section_expanded.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/09_loopable_section.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/10_loopable_section_expanded.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/11_cloneable_section_splitter.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/12_cloneable_section_splitter_expanded.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/13_autoparallel_section.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/14_choice_section.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/15_choice_section_expanded.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/16_step_hierarchy.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/18_schema_to_pipeline.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/19_schema_to_pipeline_combined.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/clustering_sub_steps.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/easylink_pipeline_schema.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/entity_resolution_sub_steps.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/images/linking_sub_steps.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/pipeline_schema/index.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/concepts/workarounds.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/conf.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/glossary.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/index.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/cli.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/index.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/2020/input_file_ssa.parquet +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/2020/input_file_w2.parquet +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/2030/input_file_ssa.parquet +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/2030/input_file_w2.parquet +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/DAG-common-pipeline.svg +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/DAG-e2e-pipeline-expanded.svg +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/DAG-e2e-pipeline.svg +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/DAG-r-pyspark.svg +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/create_inputs_demo.ipynb +0 -0
- {easylink-0.1.24/tests/specifications/common → easylink-0.1.25/docs/source/user_guide/tutorials}/environment_local.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/environment_slurm.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/getting_started.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/impl-config-pipeline.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/index.rst +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/input_data.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/input_data_demo_2030.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/input_file_1.parquet +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/input_file_2.parquet +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/input_file_3.parquet +0 -0
- {easylink-0.1.24/src/easylink/steps/rl-dummy/input_data → easylink-0.1.25/docs/source/user_guide/tutorials}/known_clusters.parquet +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/print_fp_fn_w2_ssa.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/print_metrics_w2_ssa.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/r_spark_pipeline.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/pyproject.toml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/python_versions.json +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/pytype.cfg +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/setup.cfg +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/setup.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/__about__.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/__init__.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/configuration.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/devtools/implementation_creator.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/graph_components.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/implementation.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/pipeline.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/pipeline_graph.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/pipeline_schema.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/pipeline_schema_constants/__init__.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/pipeline_schema_constants/development.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/pipeline_schema_constants/main.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/pipeline_schema_constants/testing.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/rule.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/step.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/cascading/exclude_clustered.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/cascading/exclude_clustered.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/cascading/exclude_none.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/cascading/exclude_none.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/cascading/update_clusters_by_connected_components.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/cascading/update_clusters_by_connected_components.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_clusters_to_links.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_clusters_to_links.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_determining_exclusions.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_determining_exclusions.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_removing_records.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_removing_records.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_schema_alignment.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_schema_alignment.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_updating_clusters.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/default/default_updating_clusters.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/README.md +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/build-containers-local.sh +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/build-containers-remote.sh +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/input_data/create_input_files.ipynb +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/input_data/input_file_1.csv +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/input_data/input_file_1.parquet +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/input_data/input_file_2.csv +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/input_data/input_file_2.parquet +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/python_pandas/README.md +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/python_pandas/dummy_step.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/python_pandas/python_pandas.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/python_pyspark/README.md +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/python_pyspark/dummy_step.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/python_pyspark/python_pyspark.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/r/README.md +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/r/dummy_step.R +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/r/r-image.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/dev/test.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/example/middle_name_to_initial.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/example/middle_name_to_initial.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/fastLink/fastLink_evaluating_pairs.R +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/fastLink/fastLink_evaluating_pairs.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/fastLink/fastLink_links_to_clusters.R +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/fastLink/fastLink_links_to_clusters.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/output_dir/dummy_step_1_for_output_dir_example.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/output_dir/dummy_step_1_for_output_dir_example.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/output_dir/dummy_step_2_for_output_dir_example.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/output_dir/dummy_step_2_for_output_dir_example.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/rl-dummy/canonicalizing_and_downstream_analysis/dummy_canonicalizing_and_downstream_analysis.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/rl-dummy/canonicalizing_and_downstream_analysis/dummy_canonicalizing_and_downstream_analysis.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/rl-dummy/input_data/create_input_files.ipynb +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/rl-dummy/input_data/input_file_1.parquet +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/rl-dummy/input_data/input_file_2.parquet +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/rl-dummy/pre-processing/dummy_pre-processing.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/rl-dummy/pre-processing/dummy_pre-processing.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/splink/splink_blocking_and_filtering.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/splink/splink_blocking_and_filtering.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/splink/splink_evaluating_pairs.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/splink/splink_links_to_clusters.def +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/steps/splink/splink_links_to_clusters.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/utilities/__init__.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/utilities/aggregator_utils.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/utilities/data_utils.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/utilities/paths.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/utilities/spark.smk +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/utilities/splitter_utils.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink/utilities/validation_utils.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink.egg-info/dependency_links.txt +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink.egg-info/entry_points.txt +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink.egg-info/not-zip-safe +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink.egg-info/requires.txt +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/src/easylink.egg-info/top_level.txt +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/__init__.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/conftest.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/e2e/pipeline_improved_results.csv +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/e2e/pipeline_improved_results_2030.csv +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/e2e/pipeline_naive_results.csv +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/e2e/pipeline_splink_dummy_results.csv +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/e2e/test_easylink_cli.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/e2e/test_pipelines_main_schema.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/e2e/test_step_types.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/integration/test_compositions.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/integration/test_data_utils.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/integration/test_snakemake.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/integration/test_snakemake_slurm.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/integration/test_snakemake_spark.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/common/input_data.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/common/input_data_one_file.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/common/pipeline.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/e2e/environment_slurm.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/e2e/environment_slurm_4GB.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/e2e/input_data_dummy.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/e2e/pipeline.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/e2e/pipeline_expanded.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/integration/auto_parallel/pipeline_cloneable_step.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/integration/auto_parallel/pipeline_hierarchical_step.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/integration/auto_parallel/pipeline_loop_step.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/integration/environment_spark_slurm.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/integration/pipeline.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/integration/pipeline_output_dir.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/integration/pipeline_output_dir_default.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/integration/pipeline_spark.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/environment_minimum.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/environment_spark_slurm.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_bad_combined_implementations.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_bad_implementation.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_bad_loop_formatting.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_bad_step.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_bad_type_key.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_combine_bad_implementation_names.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_combine_bad_topology.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_combine_two_steps.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_combine_with_extra_node.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_combine_with_iteration.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_combine_with_iteration_cycle.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_combine_with_missing_node.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_combine_with_parallel.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_default_implementations.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_missing_implementation_name.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_missing_implementations.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_missing_loop_nodes.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_missing_step.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_missing_substeps.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_missing_type_key.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_nested_templated_steps.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_out_of_order.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_spark.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_type_config_mismatch.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/specifications/unit/pipeline_wrong_clone_keys.yaml +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/__init__.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/conftest.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/recipe_strings/python_pandas.txt +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/aggregation_rule.txt +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/auto_parallel_rule.txt +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/checkpoint_rule.txt +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/implemented_rule_local.txt +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/implemented_rule_slurm.txt +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/pipeline_local.txt +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/pipeline_slurm.txt +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/target_rule.txt +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/rule_strings/validation_rule.txt +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_cli.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_config.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_data_utils.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_general_utils.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_graph_components.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_implementation.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_implementation_creator.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_pipeline.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_pipeline_graph.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_pipeline_schema.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_rule.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_step.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/tests/unit/test_validations.py +0 -0
- {easylink-0.1.24 → easylink-0.1.25}/update_readme.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: easylink
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.25
|
4
4
|
Summary: Research repository for the EasyLink ER ecosystem project.
|
5
5
|
Home-page: https://github.com/ihmeuw/easylink
|
6
6
|
Author: The EasyLink developers
|
@@ -78,34 +78,50 @@ Installation
|
|
78
78
|
|
79
79
|
.. _installation:
|
80
80
|
|
81
|
+
**NOTE: This package requires AMD64 CPU architecture - it is not compatible with
|
82
|
+
Apple's ARM64 architecture (e.g. M1 and newer Macs).**
|
83
|
+
|
81
84
|
There are a few things to install in order to use this package:
|
82
85
|
|
83
|
-
-
|
86
|
+
- Set up Linux.
|
87
|
+
|
88
|
+
Singularity (and thus EasyLink) requires Linux to run. If you are not already
|
89
|
+
using Linux, you will need to set up a virtual machine; refer to the
|
90
|
+
`Singularity documentation for installing on Windows or Mac <https://docs.sylabs.io/guides/4.1/admin-guide/installation.html#installation-on-windows-or-mac>`_.
|
84
91
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
``singularity --version``. For an existing installation, your
|
92
|
+
- Install Singularity.
|
93
|
+
|
94
|
+
First check if you already have Singularity installed by running the command
|
95
|
+
``singularity --version``. For an existing installation, your Singularity version
|
89
96
|
number is printed.
|
90
97
|
|
98
|
+
If Singularity is not yet installed, you will need to install it;
|
99
|
+
refer to the `Singularity docs for installing on Linux <https://docs.sylabs.io/guides/4.1/admin-guide/installation.html#installation-on-linux>`_.
|
100
|
+
|
101
|
+
Note that this requires administrator privileges; you may need to request installation
|
102
|
+
from your system admin if you are working in a shared computing environment.
|
103
|
+
|
91
104
|
- Install conda.
|
92
105
|
|
93
106
|
We recommend `miniforge <https://github.com/conda-forge/miniforge>`_. You can
|
94
107
|
check if you already have conda installed by running the command ``conda --version``.
|
95
108
|
For an existing installation, a version will be displayed.
|
96
109
|
|
97
|
-
-
|
110
|
+
- Create a conda environment with python and graphviz installed.
|
111
|
+
|
112
|
+
::
|
113
|
+
|
114
|
+
$ conda create --name easylink -c conda-forge python=3.12 graphviz 'gcc<14' -y
|
115
|
+
$ conda activate easylink
|
116
|
+
|
117
|
+
- Install easylink in the environment.
|
98
118
|
|
99
119
|
Option 1 - Install from PyPI with pip::
|
100
120
|
|
101
|
-
$ conda create --name easylink -c conda-forge python=3.12 graphviz 'gcc<14' -y
|
102
|
-
$ conda activate easylink
|
103
121
|
$ pip install easylink
|
104
122
|
|
105
123
|
Option 2 - Build from source with pip::
|
106
124
|
|
107
|
-
$ conda create --name easylink -c conda-forge python=3.12 graphviz 'gcc<14' -y
|
108
|
-
$ conda activate easylink
|
109
125
|
$ pip install git+https://github.com/ihmeuw/easylink.git
|
110
126
|
|
111
127
|
.. _end_installation:
|
@@ -0,0 +1,70 @@
|
|
1
|
+
========
|
2
|
+
EasyLink
|
3
|
+
========
|
4
|
+
|
5
|
+
EasyLink is a framework that allows users to build and run highly configurable
|
6
|
+
entity resolution (ER) pipelines.
|
7
|
+
|
8
|
+
.. _python_support:
|
9
|
+
|
10
|
+
**Supported Python versions: 3.11, 3.12**
|
11
|
+
|
12
|
+
.. _end_python_support:
|
13
|
+
|
14
|
+
Installation
|
15
|
+
============
|
16
|
+
|
17
|
+
.. _installation:
|
18
|
+
|
19
|
+
**NOTE: This package requires AMD64 CPU architecture - it is not compatible with
|
20
|
+
Apple's ARM64 architecture (e.g. M1 and newer Macs).**
|
21
|
+
|
22
|
+
There are a few things to install in order to use this package:
|
23
|
+
|
24
|
+
- Set up Linux.
|
25
|
+
|
26
|
+
Singularity (and thus EasyLink) requires Linux to run. If you are not already
|
27
|
+
using Linux, you will need to set up a virtual machine; refer to the
|
28
|
+
`Singularity documentation for installing on Windows or Mac <https://docs.sylabs.io/guides/4.1/admin-guide/installation.html#installation-on-windows-or-mac>`_.
|
29
|
+
|
30
|
+
- Install Singularity.
|
31
|
+
|
32
|
+
First check if you already have Singularity installed by running the command
|
33
|
+
``singularity --version``. For an existing installation, your Singularity version
|
34
|
+
number is printed.
|
35
|
+
|
36
|
+
If Singularity is not yet installed, you will need to install it;
|
37
|
+
refer to the `Singularity docs for installing on Linux <https://docs.sylabs.io/guides/4.1/admin-guide/installation.html#installation-on-linux>`_.
|
38
|
+
|
39
|
+
Note that this requires administrator privileges; you may need to request installation
|
40
|
+
from your system admin if you are working in a shared computing environment.
|
41
|
+
|
42
|
+
- Install conda.
|
43
|
+
|
44
|
+
We recommend `miniforge <https://github.com/conda-forge/miniforge>`_. You can
|
45
|
+
check if you already have conda installed by running the command ``conda --version``.
|
46
|
+
For an existing installation, a version will be displayed.
|
47
|
+
|
48
|
+
- Create a conda environment with python and graphviz installed.
|
49
|
+
|
50
|
+
::
|
51
|
+
|
52
|
+
$ conda create --name easylink -c conda-forge python=3.12 graphviz 'gcc<14' -y
|
53
|
+
$ conda activate easylink
|
54
|
+
|
55
|
+
- Install easylink in the environment.
|
56
|
+
|
57
|
+
Option 1 - Install from PyPI with pip::
|
58
|
+
|
59
|
+
$ pip install easylink
|
60
|
+
|
61
|
+
Option 2 - Build from source with pip::
|
62
|
+
|
63
|
+
$ pip install git+https://github.com/ihmeuw/easylink.git
|
64
|
+
|
65
|
+
.. _end_installation:
|
66
|
+
|
67
|
+
Documentation
|
68
|
+
=============
|
69
|
+
|
70
|
+
You can view documentation at https://easylink.readthedocs.io/en/latest/
|
{easylink-0.1.24 → easylink-0.1.25}/docs/source/user_guide/tutorials/pipeline_demo_improved.yaml
RENAMED
@@ -37,7 +37,7 @@ steps:
|
|
37
37
|
configuration:
|
38
38
|
INPUT_DATASET: input_file_ssa
|
39
39
|
- implementation:
|
40
|
-
name:
|
40
|
+
name: no_pre-processing
|
41
41
|
configuration:
|
42
42
|
INPUT_DATASET: input_file_w2
|
43
43
|
schema_alignment:
|
@@ -47,17 +47,16 @@ steps:
|
|
47
47
|
implementation:
|
48
48
|
name: splink_blocking_and_filtering
|
49
49
|
configuration:
|
50
|
-
BLOCKING_RULES: "l.first_name == r.first_name,l.last_name == r.last_name"
|
51
50
|
LINK_ONLY: true
|
51
|
+
BLOCKING_RULES: "l.first_name == r.first_name,l.last_name == r.last_name"
|
52
52
|
evaluating_pairs:
|
53
53
|
implementation:
|
54
54
|
name: splink_evaluating_pairs
|
55
55
|
configuration:
|
56
|
+
LINK_ONLY: true
|
56
57
|
BLOCKING_RULES_FOR_TRAINING: "l.first_name == r.first_name,l.last_name == r.last_name"
|
57
58
|
COMPARISONS: "ssn:levenshtein,first_name:name,middle_initial:exact,last_name:name"
|
58
59
|
PROBABILITY_TWO_RANDOM_RECORDS_MATCH: 0.0001 # == 1 / len(w2)
|
59
|
-
THRESHOLD_MATCH_PROBABILITY: 0
|
60
|
-
LINK_ONLY: true
|
61
60
|
links_to_clusters:
|
62
61
|
implementation:
|
63
62
|
name: splink_links_to_clusters
|
@@ -68,4 +67,4 @@ steps:
|
|
68
67
|
name: default_updating_clusters
|
69
68
|
canonicalizing_and_downstream_analysis:
|
70
69
|
implementation:
|
71
|
-
name:
|
70
|
+
name: save_clusters
|
@@ -0,0 +1,70 @@
|
|
1
|
+
steps:
|
2
|
+
entity_resolution:
|
3
|
+
substeps:
|
4
|
+
determining_exclusions_and_removing_records:
|
5
|
+
clones:
|
6
|
+
- determining_exclusions:
|
7
|
+
implementation:
|
8
|
+
name: default_determining_exclusions
|
9
|
+
configuration:
|
10
|
+
INPUT_DATASET: input_file_ssa
|
11
|
+
removing_records:
|
12
|
+
implementation:
|
13
|
+
name: default_removing_records
|
14
|
+
configuration:
|
15
|
+
INPUT_DATASET: input_file_ssa
|
16
|
+
- determining_exclusions:
|
17
|
+
implementation:
|
18
|
+
name: default_determining_exclusions
|
19
|
+
configuration:
|
20
|
+
INPUT_DATASET: input_file_w2
|
21
|
+
removing_records:
|
22
|
+
implementation:
|
23
|
+
name: default_removing_records
|
24
|
+
configuration:
|
25
|
+
INPUT_DATASET: input_file_w2
|
26
|
+
clustering:
|
27
|
+
substeps:
|
28
|
+
clusters_to_links:
|
29
|
+
implementation:
|
30
|
+
name: default_clusters_to_links
|
31
|
+
linking:
|
32
|
+
substeps:
|
33
|
+
pre-processing:
|
34
|
+
clones:
|
35
|
+
- implementation:
|
36
|
+
name: middle_name_to_initial
|
37
|
+
configuration:
|
38
|
+
INPUT_DATASET: input_file_ssa
|
39
|
+
- implementation:
|
40
|
+
name: no_pre-processing
|
41
|
+
configuration:
|
42
|
+
INPUT_DATASET: input_file_w2
|
43
|
+
schema_alignment:
|
44
|
+
implementation:
|
45
|
+
name: default_schema_alignment
|
46
|
+
blocking_and_filtering:
|
47
|
+
implementation:
|
48
|
+
name: splink_blocking_and_filtering
|
49
|
+
configuration:
|
50
|
+
LINK_ONLY: true
|
51
|
+
BLOCKING_RULES: "l.first_name == r.first_name,l.last_name == r.last_name"
|
52
|
+
evaluating_pairs:
|
53
|
+
implementation:
|
54
|
+
name: splink_evaluating_pairs
|
55
|
+
configuration:
|
56
|
+
LINK_ONLY: true
|
57
|
+
BLOCKING_RULES_FOR_TRAINING: "l.first_name == r.first_name,l.last_name == r.last_name"
|
58
|
+
COMPARISONS: "ssn:exact,first_name:exact,middle_initial:exact,last_name:exact"
|
59
|
+
PROBABILITY_TWO_RANDOM_RECORDS_MATCH: 0.0001 # == 1 / len(w2)
|
60
|
+
links_to_clusters:
|
61
|
+
implementation:
|
62
|
+
name: splink_links_to_clusters
|
63
|
+
configuration:
|
64
|
+
THRESHOLD_MATCH_PROBABILITY: 0.996
|
65
|
+
updating_clusters:
|
66
|
+
implementation:
|
67
|
+
name: default_updating_clusters
|
68
|
+
canonicalizing_and_downstream_analysis:
|
69
|
+
implementation:
|
70
|
+
name: save_clusters
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "0.1.25"
|
@@ -201,16 +201,20 @@ def run(
|
|
201
201
|
main = handle_exceptions(
|
202
202
|
func=runner.main, exceptions_logger=logger, with_debugger=with_debugger
|
203
203
|
)
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
204
|
+
try:
|
205
|
+
main(
|
206
|
+
command="run",
|
207
|
+
pipeline_specification=pipeline_specification,
|
208
|
+
input_data=input_data,
|
209
|
+
computing_environment=computing_environment,
|
210
|
+
results_dir=results_dir,
|
211
|
+
images_dir=images,
|
212
|
+
schema_name=schema,
|
213
|
+
)
|
214
|
+
except SystemExit:
|
215
|
+
# Snakemake uses SystemExit for completion - log success and re-raise
|
216
|
+
logger.info("*** FINISHED ***")
|
217
|
+
raise
|
214
218
|
|
215
219
|
|
216
220
|
@easylink.command()
|
@@ -2,7 +2,7 @@ step_1_python_pandas:
|
|
2
2
|
steps:
|
3
3
|
- step_1
|
4
4
|
image_name: python_pandas.sif
|
5
|
-
zenodo_record_id:
|
5
|
+
zenodo_record_id: 15757317
|
6
6
|
md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
|
7
7
|
script_cmd: python /dummy_step.py
|
8
8
|
outputs:
|
@@ -11,7 +11,7 @@ step_1a_python_pandas:
|
|
11
11
|
steps:
|
12
12
|
- step_1a
|
13
13
|
image_name: python_pandas.sif
|
14
|
-
zenodo_record_id:
|
14
|
+
zenodo_record_id: 15757317
|
15
15
|
md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
|
16
16
|
script_cmd: python /dummy_step.py
|
17
17
|
env:
|
@@ -22,7 +22,7 @@ step_1b_python_pandas:
|
|
22
22
|
steps:
|
23
23
|
- step_1b
|
24
24
|
image_name: python_pandas.sif
|
25
|
-
zenodo_record_id:
|
25
|
+
zenodo_record_id: 15757317
|
26
26
|
md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
|
27
27
|
script_cmd: python /dummy_step.py
|
28
28
|
env:
|
@@ -33,7 +33,7 @@ step_2_python_pandas:
|
|
33
33
|
steps:
|
34
34
|
- step_2
|
35
35
|
image_name: python_pandas.sif
|
36
|
-
zenodo_record_id:
|
36
|
+
zenodo_record_id: 15757317
|
37
37
|
md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
|
38
38
|
script_cmd: python /dummy_step.py
|
39
39
|
outputs:
|
@@ -42,7 +42,7 @@ step_3_python_pandas:
|
|
42
42
|
steps:
|
43
43
|
- step_3
|
44
44
|
image_name: python_pandas.sif
|
45
|
-
zenodo_record_id:
|
45
|
+
zenodo_record_id: 15757317
|
46
46
|
md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
|
47
47
|
script_cmd: python /dummy_step.py
|
48
48
|
outputs:
|
@@ -51,7 +51,7 @@ step_4_python_pandas:
|
|
51
51
|
steps:
|
52
52
|
- step_4
|
53
53
|
image_name: python_pandas.sif
|
54
|
-
zenodo_record_id:
|
54
|
+
zenodo_record_id: 15757317
|
55
55
|
md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
|
56
56
|
script_cmd: python /dummy_step.py
|
57
57
|
env:
|
@@ -62,7 +62,7 @@ step_5_python_pandas:
|
|
62
62
|
steps:
|
63
63
|
- step_5
|
64
64
|
image_name: python_pandas.sif
|
65
|
-
zenodo_record_id:
|
65
|
+
zenodo_record_id: 15757317
|
66
66
|
md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
|
67
67
|
script_cmd: python /dummy_step.py
|
68
68
|
env:
|
@@ -73,7 +73,7 @@ step_6_python_pandas:
|
|
73
73
|
steps:
|
74
74
|
- step_6
|
75
75
|
image_name: python_pandas.sif
|
76
|
-
zenodo_record_id:
|
76
|
+
zenodo_record_id: 15757317
|
77
77
|
md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
|
78
78
|
script_cmd: python /dummy_step.py
|
79
79
|
env:
|
@@ -84,7 +84,7 @@ step_4a_python_pandas:
|
|
84
84
|
steps:
|
85
85
|
- step_4a
|
86
86
|
image_name: python_pandas.sif
|
87
|
-
zenodo_record_id:
|
87
|
+
zenodo_record_id: 15757317
|
88
88
|
md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
|
89
89
|
script_cmd: python /dummy_step.py
|
90
90
|
env:
|
@@ -95,7 +95,7 @@ step_4b_python_pandas:
|
|
95
95
|
steps:
|
96
96
|
- step_4b
|
97
97
|
image_name: python_pandas.sif
|
98
|
-
zenodo_record_id:
|
98
|
+
zenodo_record_id: 15757317
|
99
99
|
md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
|
100
100
|
script_cmd: python /dummy_step.py
|
101
101
|
env:
|
@@ -106,7 +106,7 @@ step_4b_r:
|
|
106
106
|
steps:
|
107
107
|
- step_4b
|
108
108
|
image_name: r-image.sif
|
109
|
-
zenodo_record_id:
|
109
|
+
zenodo_record_id: 15757317
|
110
110
|
md5_checksum: a4a03b836694a2b81a1bd2852736ccc5
|
111
111
|
script_cmd: Rscript /dummy_step.R
|
112
112
|
env:
|
@@ -117,7 +117,7 @@ step_1_python_pyspark:
|
|
117
117
|
steps:
|
118
118
|
- step_1
|
119
119
|
image_name: python_pyspark.sif
|
120
|
-
zenodo_record_id:
|
120
|
+
zenodo_record_id: 15757317
|
121
121
|
md5_checksum: c948577ab0607411dd4b640622d9ec3a
|
122
122
|
script_cmd: python3 /code/dummy_step.py
|
123
123
|
outputs:
|
@@ -127,7 +127,7 @@ step_2_python_pyspark:
|
|
127
127
|
steps:
|
128
128
|
- step_2
|
129
129
|
image_name: python_pyspark.sif
|
130
|
-
zenodo_record_id:
|
130
|
+
zenodo_record_id: 15757317
|
131
131
|
md5_checksum: c948577ab0607411dd4b640622d9ec3a
|
132
132
|
script_cmd: python3 /code/dummy_step.py
|
133
133
|
outputs:
|
@@ -137,7 +137,7 @@ step_3_python_pyspark:
|
|
137
137
|
steps:
|
138
138
|
- step_3
|
139
139
|
image_name: python_pyspark.sif
|
140
|
-
zenodo_record_id:
|
140
|
+
zenodo_record_id: 15757317
|
141
141
|
md5_checksum: c948577ab0607411dd4b640622d9ec3a
|
142
142
|
script_cmd: python3 /code/dummy_step.py
|
143
143
|
outputs:
|
@@ -147,7 +147,7 @@ step_4_python_pyspark:
|
|
147
147
|
steps:
|
148
148
|
- step_4
|
149
149
|
image_name: python_pyspark.sif
|
150
|
-
zenodo_record_id:
|
150
|
+
zenodo_record_id: 15757317
|
151
151
|
md5_checksum: c948577ab0607411dd4b640622d9ec3a
|
152
152
|
script_cmd: python3 /code/dummy_step.py
|
153
153
|
env:
|
@@ -158,7 +158,7 @@ step_1_r:
|
|
158
158
|
steps:
|
159
159
|
- step_1
|
160
160
|
image_name: r-image.sif
|
161
|
-
zenodo_record_id:
|
161
|
+
zenodo_record_id: 15757317
|
162
162
|
md5_checksum: a4a03b836694a2b81a1bd2852736ccc5
|
163
163
|
script_cmd: Rscript /dummy_step.R
|
164
164
|
outputs:
|
@@ -168,7 +168,7 @@ step_2_r:
|
|
168
168
|
steps:
|
169
169
|
- step_2
|
170
170
|
image_name: r-image.sif
|
171
|
-
zenodo_record_id:
|
171
|
+
zenodo_record_id: 15757317
|
172
172
|
md5_checksum: a4a03b836694a2b81a1bd2852736ccc5
|
173
173
|
script_cmd: Rscript /dummy_step.R
|
174
174
|
outputs:
|
@@ -178,7 +178,7 @@ step_3_r:
|
|
178
178
|
steps:
|
179
179
|
- step_3
|
180
180
|
image_name: r-image.sif
|
181
|
-
zenodo_record_id:
|
181
|
+
zenodo_record_id: 15757317
|
182
182
|
md5_checksum: a4a03b836694a2b81a1bd2852736ccc5
|
183
183
|
script_cmd: Rscript /dummy_step.R
|
184
184
|
outputs:
|
@@ -188,7 +188,7 @@ step_4_r:
|
|
188
188
|
steps:
|
189
189
|
- step_4
|
190
190
|
image_name: r-image.sif
|
191
|
-
zenodo_record_id:
|
191
|
+
zenodo_record_id: 15757317
|
192
192
|
md5_checksum: a4a03b836694a2b81a1bd2852736ccc5
|
193
193
|
script_cmd: Rscript /dummy_step.R
|
194
194
|
env:
|
@@ -201,7 +201,7 @@ step_1_and_step_2_combined_python_pandas:
|
|
201
201
|
- step_1
|
202
202
|
- step_2
|
203
203
|
image_name: python_pandas.sif
|
204
|
-
zenodo_record_id:
|
204
|
+
zenodo_record_id: 15757317
|
205
205
|
md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
|
206
206
|
script_cmd: python /dummy_step.py
|
207
207
|
outputs:
|
@@ -211,7 +211,7 @@ step_1_and_step_2_parallel_python_pandas:
|
|
211
211
|
- step_1
|
212
212
|
- step_2
|
213
213
|
image_name: python_pandas.sif
|
214
|
-
zenodo_record_id:
|
214
|
+
zenodo_record_id: 15757317
|
215
215
|
md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
|
216
216
|
script_cmd: python /dummy_step.py
|
217
217
|
env:
|
@@ -223,7 +223,7 @@ step_3_and_step_4_combined_python_pandas:
|
|
223
223
|
- step_3
|
224
224
|
- step_4
|
225
225
|
image_name: python_pandas.sif
|
226
|
-
zenodo_record_id:
|
226
|
+
zenodo_record_id: 15757317
|
227
227
|
md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
|
228
228
|
script_cmd: python /dummy_step.py
|
229
229
|
outputs:
|
@@ -233,7 +233,7 @@ step_1a_and_step_1b_combined_python_pandas:
|
|
233
233
|
- step_1a
|
234
234
|
- step_1b
|
235
235
|
image_name: python_pandas.sif
|
236
|
-
zenodo_record_id:
|
236
|
+
zenodo_record_id: 15757317
|
237
237
|
md5_checksum: 9177b8e168fcc9cae91bf61265f2185c
|
238
238
|
script_cmd: python /dummy_step.py
|
239
239
|
outputs:
|
@@ -241,131 +241,157 @@ step_1a_and_step_1b_combined_python_pandas:
|
|
241
241
|
dummy_step_1_for_output_dir_example:
|
242
242
|
steps:
|
243
243
|
- step_1_for_output_dir_example
|
244
|
-
image_name:
|
244
|
+
image_name: dummy_step_1_for_output_dir_example.sif
|
245
245
|
script_cmd: python /dummy_step_1_for_output_dir_example.py
|
246
246
|
outputs:
|
247
247
|
step_1_main_output_directory: output_dir/
|
248
248
|
dummy_step_1_for_output_dir_example_default:
|
249
249
|
steps:
|
250
250
|
- step_1_for_output_dir_example
|
251
|
-
image_name:
|
251
|
+
image_name: dummy_step_1_for_output_dir_example.sif
|
252
252
|
script_cmd: python /dummy_step_1_for_output_dir_example.py
|
253
253
|
dummy_step_2_for_output_dir_example:
|
254
254
|
steps:
|
255
255
|
- step_2_for_output_dir_example
|
256
|
-
image_name:
|
256
|
+
image_name: dummy_step_2_for_output_dir_example.sif
|
257
257
|
script_cmd: python /dummy_step_2_for_output_dir_example.py
|
258
258
|
outputs:
|
259
259
|
step_2_main_output: result.parquet
|
260
260
|
default_removing_records:
|
261
261
|
steps:
|
262
262
|
- removing_records
|
263
|
-
image_name:
|
263
|
+
image_name: default_removing_records.sif
|
264
|
+
zenodo_record_id: 15757317
|
265
|
+
md5_checksum: 85dba6fd73c9f8f504fddb6d5c30f2de
|
264
266
|
script_cmd: python /default_removing_records.py
|
265
267
|
outputs:
|
266
268
|
dataset: dataset
|
267
269
|
default_clusters_to_links:
|
268
270
|
steps:
|
269
271
|
- clusters_to_links
|
270
|
-
image_name:
|
272
|
+
image_name: default_clusters_to_links.sif
|
273
|
+
zenodo_record_id: 15757317
|
274
|
+
md5_checksum: 0d00d1272bd8193f60727791097aa065
|
271
275
|
script_cmd: python /default_clusters_to_links.py
|
272
276
|
outputs:
|
273
277
|
known_links: result.parquet
|
274
278
|
default_determining_exclusions:
|
275
279
|
steps:
|
276
280
|
- determining_exclusions
|
277
|
-
image_name:
|
281
|
+
image_name: default_determining_exclusions.sif
|
282
|
+
zenodo_record_id: 15757317
|
283
|
+
md5_checksum: e61cb32ad45b79ca9a2c36db4e76ef7e
|
278
284
|
script_cmd: python /default_determining_exclusions.py
|
279
285
|
outputs:
|
280
286
|
ids_to_remove: result.parquet
|
281
287
|
default_updating_clusters:
|
282
288
|
steps:
|
283
289
|
- updating_clusters
|
284
|
-
image_name:
|
290
|
+
image_name: default_updating_clusters.sif
|
291
|
+
zenodo_record_id: 15757317
|
292
|
+
md5_checksum: cc6bd29e099c2523347fa04545aa35c9
|
285
293
|
script_cmd: python /default_updating_clusters.py
|
286
294
|
outputs:
|
287
295
|
clusters: clusters.parquet
|
288
|
-
dummy_canonicalizing_and_downstream_analysis
|
296
|
+
# NOTE: This was made from dummy_canonicalizing_and_downstream_analysis.py,
|
297
|
+
# if rebuilding change the name of that file to save_clusters.py
|
298
|
+
save_clusters:
|
289
299
|
steps:
|
290
300
|
- canonicalizing_and_downstream_analysis
|
291
|
-
image_name:
|
301
|
+
image_name: save_clusters.sif
|
302
|
+
zenodo_record_id: 15757317
|
303
|
+
md5_checksum: 384ab2be668cbadc45160a674f621022
|
292
304
|
script_cmd: python /dummy_canonicalizing_and_downstream_analysis.py
|
293
305
|
outputs:
|
294
306
|
analysis_output: result.parquet
|
295
|
-
dummy_pre-processing
|
307
|
+
# NOTE: This was made from dummy_pre-processing.py,
|
308
|
+
# if rebuilding change the name of that file to no_pre-processing.py
|
309
|
+
no_pre-processing:
|
296
310
|
steps:
|
297
311
|
- pre-processing
|
298
|
-
image_name:
|
312
|
+
image_name: no_pre-processing.sif
|
313
|
+
zenodo_record_id: 15757317
|
314
|
+
md5_checksum: 9a9c080cf145078152501cf96bf61f27
|
299
315
|
script_cmd: python /dummy_pre-processing.py
|
300
316
|
outputs:
|
301
317
|
dataset: dataset
|
302
318
|
default_schema_alignment:
|
303
319
|
steps:
|
304
320
|
- schema_alignment
|
305
|
-
image_name:
|
321
|
+
image_name: default_schema_alignment.sif
|
322
|
+
zenodo_record_id: 15757317
|
323
|
+
md5_checksum: 3166587f9cfec478b999a17074d628f7
|
306
324
|
script_cmd: python /default_schema_alignment.py
|
307
325
|
outputs:
|
308
326
|
records: result.parquet
|
309
327
|
splink_blocking_and_filtering:
|
310
328
|
steps:
|
311
329
|
- blocking_and_filtering
|
312
|
-
image_name:
|
330
|
+
image_name: splink_blocking_and_filtering.sif
|
331
|
+
zenodo_record_id: 15757317
|
332
|
+
md5_checksum: 8a365b90295ef6beaad2b7f80a03d768
|
313
333
|
script_cmd: python /splink_blocking_and_filtering.py
|
314
334
|
outputs:
|
315
335
|
blocks: blocks
|
316
336
|
splink_evaluating_pairs:
|
317
337
|
steps:
|
318
338
|
- evaluating_pairs
|
319
|
-
image_name:
|
339
|
+
image_name: splink_evaluating_pairs.sif
|
340
|
+
zenodo_record_id: 15757317
|
341
|
+
md5_checksum: b57f4bd16b7a3aa5099569078ea4c064
|
320
342
|
script_cmd: python /splink_evaluating_pairs.py
|
321
343
|
outputs:
|
322
344
|
links: result.parquet
|
323
345
|
splink_links_to_clusters:
|
324
346
|
steps:
|
325
347
|
- links_to_clusters
|
326
|
-
image_name:
|
348
|
+
image_name: splink_links_to_clusters.sif
|
349
|
+
zenodo_record_id: 15757317
|
350
|
+
md5_checksum: 645937f7bab9c2557b7aacafaf4e4765
|
327
351
|
script_cmd: python /splink_links_to_clusters.py
|
328
352
|
outputs:
|
329
353
|
clusters: result.parquet
|
330
354
|
fastLink_evaluating_pairs:
|
331
355
|
steps:
|
332
356
|
- evaluating_pairs
|
333
|
-
image_name:
|
357
|
+
image_name: fastLink_evaluating_pairs.sif
|
334
358
|
script_cmd: Rscript /fastLink_evaluating_pairs.R
|
335
359
|
outputs:
|
336
360
|
links: result.parquet
|
337
361
|
fastLink_links_to_clusters:
|
338
362
|
steps:
|
339
363
|
- links_to_clusters
|
340
|
-
image_name:
|
364
|
+
image_name: fastLink_links_to_clusters.sif
|
341
365
|
script_cmd: Rscript /fastLink_links_to_clusters.R
|
342
366
|
outputs:
|
343
367
|
clusters: result.parquet
|
344
368
|
exclude_clustered:
|
345
369
|
steps:
|
346
370
|
- determining_exclusions
|
347
|
-
image_name:
|
371
|
+
image_name: exclude_clustered.sif
|
348
372
|
script_cmd: python /exclude_clustered.py
|
349
373
|
outputs:
|
350
374
|
ids_to_remove: result.parquet
|
351
375
|
exclude_none:
|
352
376
|
steps:
|
353
377
|
- determining_exclusions
|
354
|
-
image_name:
|
378
|
+
image_name: exclude_none.sif
|
355
379
|
script_cmd: python /exclude_none.py
|
356
380
|
outputs:
|
357
381
|
ids_to_remove: result.parquet
|
358
382
|
update_clusters_by_connected_components:
|
359
383
|
steps:
|
360
384
|
- updating_clusters
|
361
|
-
image_name:
|
385
|
+
image_name: update_clusters_by_connected_components.sif
|
362
386
|
script_cmd: python /update_clusters_by_connected_components.py
|
363
387
|
outputs:
|
364
388
|
clusters: result.parquet
|
365
389
|
middle_name_to_initial:
|
366
390
|
steps:
|
367
391
|
- pre-processing
|
368
|
-
image_name:
|
392
|
+
image_name: middle_name_to_initial.sif
|
393
|
+
zenodo_record_id: 15757317
|
394
|
+
md5_checksum: 89db9c3318300cda9d538cde08c3c323
|
369
395
|
script_cmd: python /middle_name_to_initial.py
|
370
396
|
outputs:
|
371
397
|
dataset: dataset
|