easylink 0.1.24__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {easylink-0.1.24 → easylink-0.2.0}/.gitignore +2 -0
- {easylink-0.1.24 → easylink-0.2.0}/CHANGELOG.rst +10 -0
- {easylink-0.1.24 → easylink-0.2.0}/PKG-INFO +27 -11
- easylink-0.2.0/README.rst +70 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/index.rst +2 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/index.rst +0 -4
- easylink-0.2.0/docs/source/user_guide/tutorials/DAG-naive-pipeline.svg +553 -0
- easylink-0.2.0/docs/source/user_guide/tutorials/debug_compare_fp_fn.py +176 -0
- easylink-0.1.24/docs/source/user_guide/tutorials/print_metrics_w2_ssa.py → easylink-0.2.0/docs/source/user_guide/tutorials/debug_metrics_w2_ssa.py +3 -0
- easylink-0.2.0/docs/source/user_guide/tutorials/getting_started.rst +699 -0
- easylink-0.2.0/docs/source/user_guide/tutorials/input_data_demo.yaml +3 -0
- easylink-0.2.0/docs/source/user_guide/tutorials/input_data_demo_2030.yaml +3 -0
- easylink-0.2.0/docs/source/user_guide/tutorials/naive_comparison_viewer.html +11024 -0
- easylink-0.2.0/docs/source/user_guide/tutorials/naive_match_weights.html +42 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/pipeline_demo_improved.yaml +13 -13
- easylink-0.2.0/docs/source/user_guide/tutorials/pipeline_demo_improved_cascade.yaml +134 -0
- easylink-0.2.0/docs/source/user_guide/tutorials/pipeline_demo_naive.yaml +71 -0
- easylink-0.2.0/docs/source/user_guide/tutorials/print_fp_fn_w2_ssa.py +136 -0
- easylink-0.2.0/src/easylink/_version.py +1 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/cli.py +19 -10
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/implementation_metadata.yaml +94 -44
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/runner.py +174 -1
- easylink-0.2.0/src/easylink/steps/cascading/accept_all_pairs.def +22 -0
- easylink-0.2.0/src/easylink/steps/cascading/accept_all_pairs.py +26 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/cascading/exclude_clustered.py +11 -2
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/cascading/exclude_none.py +1 -1
- easylink-0.2.0/src/easylink/steps/cascading/one_to_many_links_to_clusters.def +22 -0
- easylink-0.2.0/src/easylink/steps/cascading/one_to_many_links_to_clusters.py +109 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/cascading/update_clusters_by_connected_components.py +17 -4
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/default/default_determining_exclusions.py +1 -1
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/default/default_removing_records.py +1 -1
- easylink-0.2.0/src/easylink/steps/rl-dummy/input_data/known_clusters.parquet +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/splink/splink_blocking_and_filtering.py +12 -6
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/splink/splink_evaluating_pairs.py +2 -1
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/splink/splink_links_to_clusters.py +2 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/utilities/general_utils.py +18 -8
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/utilities/validation_utils.py +6 -6
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink.egg-info/PKG-INFO +27 -11
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink.egg-info/SOURCES.txt +13 -1
- easylink-0.2.0/tests/e2e/pipeline_improved_cascade_results.csv +15861 -0
- easylink-0.2.0/tests/e2e/pipeline_improved_results.csv +15848 -0
- easylink-0.2.0/tests/e2e/pipeline_improved_results_2030.csv +17684 -0
- easylink-0.2.0/tests/e2e/pipeline_naive_results.csv +15816 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/e2e/test_pipelines_main_schema.py +19 -4
- easylink-0.2.0/tests/specifications/common/environment_local.yaml +2 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/e2e/pipeline_cascade.yaml +12 -15
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/e2e/pipeline_splink_dummy.yaml +3 -3
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/e2e/pipeline_with_fastLink.yaml +3 -3
- easylink-0.2.0/tests/unit/test_runner.py +116 -0
- easylink-0.1.24/README.rst +0 -54
- easylink-0.1.24/docs/source/user_guide/tutorials/getting_started.rst +0 -736
- easylink-0.1.24/docs/source/user_guide/tutorials/input_data_demo.yaml +0 -3
- easylink-0.1.24/docs/source/user_guide/tutorials/input_data_demo_2030.yaml +0 -3
- easylink-0.1.24/docs/source/user_guide/tutorials/pipeline_demo_naive.yaml +0 -71
- easylink-0.1.24/docs/source/user_guide/tutorials/print_fp_fn_w2_ssa.py +0 -80
- easylink-0.1.24/src/easylink/_version.py +0 -1
- easylink-0.1.24/tests/e2e/pipeline_improved_results.csv +0 -25184
- easylink-0.1.24/tests/e2e/pipeline_improved_results_2030.csv +0 -28890
- easylink-0.1.24/tests/e2e/pipeline_naive_results.csv +0 -25184
- easylink-0.1.24/tests/unit/test_runner.py +0 -50
- {easylink-0.1.24 → easylink-0.2.0}/.bandit +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/.flake8 +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/.github/CODEOWNERS +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/.github/pull_request_template.md +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/.github/workflows/deploy.yml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/.github/workflows/update_readme.yml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/.readthedocs.yml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/CONTRIBUTING.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/Jenkinsfile +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/LICENSE +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/Makefile +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/Makefile +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/nitpick-exceptions +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/_static/style.css +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/_templates/layout.html +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/cli.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/configuration.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/graph_components.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/implementation.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/index.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/pipeline.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/pipeline_graph.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/pipeline_schema.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/pipeline_schema_constants/development.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/pipeline_schema_constants/index.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/pipeline_schema_constants/testing.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/rule.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/runner.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/step.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/utilities/aggregator_utils.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/utilities/data_utils.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/utilities/general_utils.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/utilities/index.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/utilities/paths.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/utilities/splitter_utils.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/api_reference/utilities/validation_utils.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/index.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/01_step.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/02_default_implementation.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/03_slots.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/04_data_dependency.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/05_pipeline_schema.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/06_default_input.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/07_cloneable_section.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/08_cloneable_section_expanded.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/09_loopable_section.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/10_loopable_section_expanded.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/11_cloneable_section_splitter.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/12_cloneable_section_splitter_expanded.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/13_autoparallel_section.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/14_choice_section.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/15_choice_section_expanded.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/16_step_hierarchy.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/18_schema_to_pipeline.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/19_schema_to_pipeline_combined.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/clustering_sub_steps.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/easylink_pipeline_schema.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/entity_resolution_sub_steps.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/pipeline_schema/images/linking_sub_steps.drawio.png +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/concepts/workarounds.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/conf.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/glossary.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/index.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/cli.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/2020/input_file_ssa.parquet +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/2020/input_file_w2.parquet +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/2030/input_file_ssa.parquet +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/2030/input_file_w2.parquet +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/DAG-common-pipeline.svg +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/DAG-e2e-pipeline-expanded.svg +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/DAG-e2e-pipeline.svg +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/DAG-r-pyspark.svg +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/create_inputs_demo.ipynb +0 -0
- {easylink-0.1.24/tests/specifications/common → easylink-0.2.0/docs/source/user_guide/tutorials}/environment_local.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/environment_slurm.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/impl-config-pipeline.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/index.rst +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/input_data.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/input_file_1.parquet +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/input_file_2.parquet +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/input_file_3.parquet +0 -0
- {easylink-0.1.24/src/easylink/steps/rl-dummy/input_data → easylink-0.2.0/docs/source/user_guide/tutorials}/known_clusters.parquet +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/docs/source/user_guide/tutorials/r_spark_pipeline.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/pyproject.toml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/python_versions.json +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/pytype.cfg +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/setup.cfg +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/setup.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/__about__.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/__init__.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/configuration.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/devtools/implementation_creator.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/graph_components.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/implementation.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/pipeline.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/pipeline_graph.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/pipeline_schema.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/pipeline_schema_constants/__init__.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/pipeline_schema_constants/development.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/pipeline_schema_constants/main.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/pipeline_schema_constants/testing.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/rule.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/step.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/cascading/exclude_clustered.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/cascading/exclude_none.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/cascading/update_clusters_by_connected_components.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/default/default_clusters_to_links.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/default/default_clusters_to_links.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/default/default_determining_exclusions.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/default/default_removing_records.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/default/default_schema_alignment.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/default/default_schema_alignment.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/default/default_updating_clusters.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/default/default_updating_clusters.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/README.md +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/build-containers-local.sh +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/build-containers-remote.sh +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/input_data/create_input_files.ipynb +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/input_data/input_file_1.csv +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/input_data/input_file_1.parquet +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/input_data/input_file_2.csv +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/input_data/input_file_2.parquet +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/python_pandas/README.md +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/python_pandas/dummy_step.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/python_pandas/python_pandas.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/python_pyspark/README.md +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/python_pyspark/dummy_step.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/python_pyspark/python_pyspark.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/r/README.md +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/r/dummy_step.R +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/r/r-image.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/dev/test.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/example/middle_name_to_initial.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/example/middle_name_to_initial.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/fastLink/fastLink_evaluating_pairs.R +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/fastLink/fastLink_evaluating_pairs.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/fastLink/fastLink_links_to_clusters.R +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/fastLink/fastLink_links_to_clusters.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/output_dir/dummy_step_1_for_output_dir_example.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/output_dir/dummy_step_1_for_output_dir_example.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/output_dir/dummy_step_2_for_output_dir_example.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/output_dir/dummy_step_2_for_output_dir_example.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/rl-dummy/canonicalizing_and_downstream_analysis/dummy_canonicalizing_and_downstream_analysis.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/rl-dummy/canonicalizing_and_downstream_analysis/dummy_canonicalizing_and_downstream_analysis.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/rl-dummy/input_data/create_input_files.ipynb +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/rl-dummy/input_data/input_file_1.parquet +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/rl-dummy/input_data/input_file_2.parquet +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/rl-dummy/pre-processing/dummy_pre-processing.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/rl-dummy/pre-processing/dummy_pre-processing.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/splink/splink_blocking_and_filtering.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/splink/splink_evaluating_pairs.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/steps/splink/splink_links_to_clusters.def +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/utilities/__init__.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/utilities/aggregator_utils.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/utilities/data_utils.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/utilities/paths.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/utilities/spark.smk +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink/utilities/splitter_utils.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink.egg-info/dependency_links.txt +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink.egg-info/entry_points.txt +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink.egg-info/not-zip-safe +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink.egg-info/requires.txt +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/src/easylink.egg-info/top_level.txt +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/__init__.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/conftest.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/e2e/pipeline_splink_dummy_results.csv +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/e2e/test_easylink_cli.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/e2e/test_step_types.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/integration/test_compositions.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/integration/test_data_utils.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/integration/test_snakemake.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/integration/test_snakemake_slurm.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/integration/test_snakemake_spark.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/common/input_data.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/common/input_data_one_file.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/common/pipeline.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/e2e/environment_slurm.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/e2e/environment_slurm_4GB.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/e2e/input_data_dummy.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/e2e/pipeline.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/e2e/pipeline_expanded.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/integration/auto_parallel/pipeline_cloneable_step.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/integration/auto_parallel/pipeline_hierarchical_step.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/integration/auto_parallel/pipeline_loop_step.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/integration/environment_spark_slurm.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/integration/pipeline.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/integration/pipeline_output_dir.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/integration/pipeline_output_dir_default.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/integration/pipeline_spark.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/environment_minimum.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/environment_spark_slurm.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_bad_combined_implementations.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_bad_implementation.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_bad_loop_formatting.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_bad_step.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_bad_type_key.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_combine_bad_implementation_names.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_combine_bad_topology.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_combine_two_steps.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_combine_with_extra_node.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_combine_with_iteration.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_combine_with_iteration_cycle.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_combine_with_missing_node.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_combine_with_parallel.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_default_implementations.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_missing_implementation_name.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_missing_implementations.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_missing_loop_nodes.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_missing_step.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_missing_substeps.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_missing_type_key.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_nested_templated_steps.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_out_of_order.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_spark.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_type_config_mismatch.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/specifications/unit/pipeline_wrong_clone_keys.yaml +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/__init__.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/conftest.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/recipe_strings/python_pandas.txt +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/rule_strings/aggregation_rule.txt +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/rule_strings/auto_parallel_rule.txt +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/rule_strings/checkpoint_rule.txt +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/rule_strings/implemented_rule_local.txt +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/rule_strings/implemented_rule_slurm.txt +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/rule_strings/pipeline_local.txt +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/rule_strings/pipeline_slurm.txt +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/rule_strings/target_rule.txt +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/rule_strings/validation_rule.txt +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/test_cli.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/test_config.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/test_data_utils.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/test_general_utils.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/test_graph_components.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/test_implementation.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/test_implementation_creator.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/test_pipeline.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/test_pipeline_graph.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/test_pipeline_schema.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/test_rule.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/test_step.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/tests/unit/test_validations.py +0 -0
- {easylink-0.1.24 → easylink-0.2.0}/update_readme.py +0 -0
@@ -1,3 +1,13 @@
|
|
1
|
+
**0.2.0 - 6/30/25**
|
2
|
+
|
3
|
+
- Official demo release
|
4
|
+
- Add progress dots to terminal output
|
5
|
+
|
6
|
+
**0.1.25 - 6/30/25**
|
7
|
+
|
8
|
+
- Release new images
|
9
|
+
- Clean up stdout logging
|
10
|
+
|
1
11
|
**0.1.24 - 6/26/25**
|
2
12
|
|
3
13
|
- Properly escape special characters in envars specified via pipeline configuration
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: easylink
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.2.0
|
4
4
|
Summary: Research repository for the EasyLink ER ecosystem project.
|
5
5
|
Home-page: https://github.com/ihmeuw/easylink
|
6
6
|
Author: The EasyLink developers
|
@@ -78,34 +78,50 @@ Installation
|
|
78
78
|
|
79
79
|
.. _installation:
|
80
80
|
|
81
|
+
**NOTE: This package requires AMD64 CPU architecture - it is not compatible with
|
82
|
+
Apple's ARM64 architecture (e.g. M1 and newer Macs).**
|
83
|
+
|
81
84
|
There are a few things to install in order to use this package:
|
82
85
|
|
83
|
-
-
|
86
|
+
- Set up Linux.
|
87
|
+
|
88
|
+
Singularity (and thus EasyLink) requires Linux to run. If you are not already
|
89
|
+
using Linux, you will need to set up a virtual machine; refer to the
|
90
|
+
`Singularity documentation for installing on Windows or Mac <https://docs.sylabs.io/guides/4.1/admin-guide/installation.html#installation-on-windows-or-mac>`_.
|
84
91
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
``singularity --version``. For an existing installation, your
|
92
|
+
- Install Singularity.
|
93
|
+
|
94
|
+
First check if you already have Singularity installed by running the command
|
95
|
+
``singularity --version``. For an existing installation, your Singularity version
|
89
96
|
number is printed.
|
90
97
|
|
98
|
+
If Singularity is not yet installed, you will need to install it;
|
99
|
+
refer to the `Singularity docs for installing on Linux <https://docs.sylabs.io/guides/4.1/admin-guide/installation.html#installation-on-linux>`_.
|
100
|
+
|
101
|
+
Note that this requires administrator privileges; you may need to request installation
|
102
|
+
from your system admin if you are working in a shared computing environment.
|
103
|
+
|
91
104
|
- Install conda.
|
92
105
|
|
93
106
|
We recommend `miniforge <https://github.com/conda-forge/miniforge>`_. You can
|
94
107
|
check if you already have conda installed by running the command ``conda --version``.
|
95
108
|
For an existing installation, a version will be displayed.
|
96
109
|
|
97
|
-
-
|
110
|
+
- Create a conda environment with python and graphviz installed.
|
111
|
+
|
112
|
+
::
|
113
|
+
|
114
|
+
$ conda create --name easylink -c conda-forge python=3.12 graphviz 'gcc<14' -y
|
115
|
+
$ conda activate easylink
|
116
|
+
|
117
|
+
- Install easylink in the environment.
|
98
118
|
|
99
119
|
Option 1 - Install from PyPI with pip::
|
100
120
|
|
101
|
-
$ conda create --name easylink -c conda-forge python=3.12 graphviz 'gcc<14' -y
|
102
|
-
$ conda activate easylink
|
103
121
|
$ pip install easylink
|
104
122
|
|
105
123
|
Option 2 - Build from source with pip::
|
106
124
|
|
107
|
-
$ conda create --name easylink -c conda-forge python=3.12 graphviz 'gcc<14' -y
|
108
|
-
$ conda activate easylink
|
109
125
|
$ pip install git+https://github.com/ihmeuw/easylink.git
|
110
126
|
|
111
127
|
.. _end_installation:
|
@@ -0,0 +1,70 @@
|
|
1
|
+
========
|
2
|
+
EasyLink
|
3
|
+
========
|
4
|
+
|
5
|
+
EasyLink is a framework that allows users to build and run highly configurable
|
6
|
+
entity resolution (ER) pipelines.
|
7
|
+
|
8
|
+
.. _python_support:
|
9
|
+
|
10
|
+
**Supported Python versions: 3.11, 3.12**
|
11
|
+
|
12
|
+
.. _end_python_support:
|
13
|
+
|
14
|
+
Installation
|
15
|
+
============
|
16
|
+
|
17
|
+
.. _installation:
|
18
|
+
|
19
|
+
**NOTE: This package requires AMD64 CPU architecture - it is not compatible with
|
20
|
+
Apple's ARM64 architecture (e.g. M1 and newer Macs).**
|
21
|
+
|
22
|
+
There are a few things to install in order to use this package:
|
23
|
+
|
24
|
+
- Set up Linux.
|
25
|
+
|
26
|
+
Singularity (and thus EasyLink) requires Linux to run. If you are not already
|
27
|
+
using Linux, you will need to set up a virtual machine; refer to the
|
28
|
+
`Singularity documentation for installing on Windows or Mac <https://docs.sylabs.io/guides/4.1/admin-guide/installation.html#installation-on-windows-or-mac>`_.
|
29
|
+
|
30
|
+
- Install Singularity.
|
31
|
+
|
32
|
+
First check if you already have Singularity installed by running the command
|
33
|
+
``singularity --version``. For an existing installation, your Singularity version
|
34
|
+
number is printed.
|
35
|
+
|
36
|
+
If Singularity is not yet installed, you will need to install it;
|
37
|
+
refer to the `Singularity docs for installing on Linux <https://docs.sylabs.io/guides/4.1/admin-guide/installation.html#installation-on-linux>`_.
|
38
|
+
|
39
|
+
Note that this requires administrator privileges; you may need to request installation
|
40
|
+
from your system admin if you are working in a shared computing environment.
|
41
|
+
|
42
|
+
- Install conda.
|
43
|
+
|
44
|
+
We recommend `miniforge <https://github.com/conda-forge/miniforge>`_. You can
|
45
|
+
check if you already have conda installed by running the command ``conda --version``.
|
46
|
+
For an existing installation, a version will be displayed.
|
47
|
+
|
48
|
+
- Create a conda environment with python and graphviz installed.
|
49
|
+
|
50
|
+
::
|
51
|
+
|
52
|
+
$ conda create --name easylink -c conda-forge python=3.12 graphviz 'gcc<14' -y
|
53
|
+
$ conda activate easylink
|
54
|
+
|
55
|
+
- Install easylink in the environment.
|
56
|
+
|
57
|
+
Option 1 - Install from PyPI with pip::
|
58
|
+
|
59
|
+
$ pip install easylink
|
60
|
+
|
61
|
+
Option 2 - Build from source with pip::
|
62
|
+
|
63
|
+
$ pip install git+https://github.com/ihmeuw/easylink.git
|
64
|
+
|
65
|
+
.. _end_installation:
|
66
|
+
|
67
|
+
Documentation
|
68
|
+
=============
|
69
|
+
|
70
|
+
You can view documentation at https://easylink.readthedocs.io/en/latest/
|
@@ -623,6 +623,8 @@ This step :ref:`has sub-steps <entity_resolution_sub_steps>`, which may be expan
|
|
623
623
|
and similar incremental methods, the already-found clusters would be used directly
|
624
624
|
and updated with new decisions about not-yet-clustered records.
|
625
625
|
|
626
|
+
.. _canonicalizing:
|
627
|
+
|
626
628
|
Canonicalizing and downstream analysis
|
627
629
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
628
630
|
|