easylink 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,9 @@
1
1
  """
2
2
  =========
3
- utilities
3
+ Utilities
4
4
  =========
5
5
 
6
- Utility functions.
6
+ This package contains a collection of helper modules and files. It serves as a
7
+ central repository for common operations, filepaths, and configuration data.
7
8
 
8
9
  """
@@ -9,6 +9,7 @@ embarrassingly parallel manner.
9
9
 
10
10
  Note that it is critical that all data aggregating utility functions are definied
11
11
  in this module; easylink will not be able to find them otherwise.
12
+
12
13
  """
13
14
 
14
15
  import pandas as pd
@@ -1,14 +1,43 @@
1
1
  # mypy: ignore-errors
2
+ """
3
+ ==============
4
+ Data Utilities
5
+ ==============
6
+
7
+ This module contains utility functions for handling data files and directories.
8
+
9
+ """
10
+
2
11
  import os
3
12
  import shutil
13
+ from collections.abc import Callable
4
14
  from datetime import datetime
5
15
  from pathlib import Path
6
16
 
7
17
  import yaml
8
18
 
9
19
 
10
- def modify_umask(func):
11
- """Decorator to wrap umask modification before making directories"""
20
+ def modify_umask(func: Callable) -> Callable:
21
+ """Decorates a function to modify a process's umask temporarily before calling the function.
22
+
23
+ This decorator sets the umask to 0o002, which grants write permission to the
24
+ group while preserving the umask settings for the owner and others. It ensures
25
+ that any file or directory created by the decorated function has group write
26
+ permissions. After the function executes, the decorator restores the original
27
+ umask.
28
+
29
+ Parameters
30
+ ----------
31
+ func
32
+ The function to be decorated. It can be any callable that might create files
33
+ or directories during its execution.
34
+
35
+ Returns
36
+ -------
37
+ A wrapper function that, when called, modifies the umask, calls the original
38
+ function with the provided arguments, and finally restores the umask to its
39
+ original value.
40
+ """
12
41
 
13
42
  def wrapper(*args, **kwargs):
14
43
  old_umask = os.umask(0o002)
@@ -22,11 +51,28 @@ def modify_umask(func):
22
51
 
23
52
  @modify_umask
24
53
  def create_results_directory(results_dir: Path) -> None:
54
+ """Creates a results directory.
55
+
56
+ This creates the high-level results directory to be used for storing results
57
+ (including any missing sub-directories).
58
+
59
+ Parameters
60
+ ----------
61
+ results_dir
62
+ The directory to be created.
63
+ """
25
64
  results_dir.mkdir(parents=True, exist_ok=True)
26
65
 
27
66
 
28
67
  @modify_umask
29
68
  def create_results_intermediates(results_dir: Path) -> None:
69
+ """Creates required sub-directories within a given run's results directory.
70
+
71
+ Parameters
72
+ ----------
73
+ results_dir
74
+ The results directory for the current run.
75
+ """
30
76
  (results_dir / "intermediate").mkdir(exist_ok=True)
31
77
  (results_dir / "diagnostics").mkdir(exist_ok=True)
32
78
 
@@ -37,6 +83,21 @@ def copy_configuration_files_to_results_directory(
37
83
  computing_environment: Path | None,
38
84
  results_dir: Path,
39
85
  ) -> None:
86
+ """Copies all configuration files into the results directory.
87
+
88
+ Parameters
89
+ ----------
90
+ pipeline_specification
91
+ The filepath to the pipeline specification file.
92
+ input_data
93
+ The filepath to the input data specification file (_not_ the paths to the
94
+ input data themselves).
95
+ computing_environment
96
+ The filepath to the specification file defining the computing environment
97
+ to run the pipeline on.
98
+ results_dir
99
+ The directory to write results and incidental files (logs, etc.) to.
100
+ """
40
101
  shutil.copy(pipeline_specification, results_dir)
41
102
  shutil.copy(input_data, results_dir)
42
103
  if computing_environment:
@@ -44,18 +105,50 @@ def copy_configuration_files_to_results_directory(
44
105
 
45
106
 
46
107
  def get_results_directory(output_dir: str | None, no_timestamp: bool) -> Path:
108
+ """Determines the results directory path.
109
+
110
+ This function determines the filepath for storing results by (optionally) appending
111
+ a timestamp to the specified output directory. If no output directory is provided,
112
+ it defaults to a directory named 'results' in the current working directory.
113
+
114
+ Parameters
115
+ ----------
116
+ output_dir
117
+ The directory to write results and incidental files (logs, etc.) to. If no
118
+ value is provided, results will be written to a 'results/' directory in the
119
+ current working directory.
120
+ no_timestamp
121
+ Whether or not to save the results in a timestamped sub-directory.
122
+
123
+ Returns
124
+ -------
125
+ The fully resolved path to the results directory.
126
+ """
47
127
  results_dir = Path("results" if output_dir is None else output_dir).resolve()
48
128
  if not no_timestamp:
49
- launch_time = _get_timestamp()
50
- results_dir = results_dir / launch_time
129
+ results_dir = results_dir / _get_timestamp()
51
130
  return results_dir
52
131
 
53
132
 
54
- def _get_timestamp():
133
+ def _get_timestamp() -> str:
55
134
  return datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
56
135
 
57
136
 
58
137
  def load_yaml(filepath: str | Path) -> dict:
138
+ """Loads and returns the contents of a YAML file.
139
+
140
+ This function uses `yaml.safe_load` to parse the YAML file, which is designed
141
+ to safely load a subset of YAML without executing arbitrary code.
142
+
143
+ Parameters
144
+ ----------
145
+ filepath
146
+ The path to the YAML file to be loaded.
147
+
148
+ Returns
149
+ -------
150
+ The contents of the YAML file.
151
+ """
59
152
  with open(filepath, "r") as file:
60
153
  data = yaml.safe_load(file)
61
154
  return data
@@ -1,4 +1,13 @@
1
1
  # mypy: ignore-errors
2
+ """
3
+ =================
4
+ General Utilities
5
+ =================
6
+
7
+ This module contains various utility functions.
8
+
9
+ """
10
+
2
11
  import errno
3
12
  import functools
4
13
  import shutil
@@ -14,7 +23,26 @@ from loguru import logger
14
23
  def handle_exceptions(
15
24
  func: Callable, exceptions_logger: Any, with_debugger: bool
16
25
  ) -> Callable:
17
- """Drops a user into an interactive debugger if func raises an error."""
26
+ """Wraps a function to handle exceptions by logging and optionally dropping into a debugger.
27
+
28
+ Parameters
29
+ ----------
30
+ func
31
+ The wrapped function that is executed and monitored for exceptions.
32
+ exceptions_logger
33
+ The logging object used to log exceptions that occur during function execution.
34
+ with_debugger
35
+ Whether or not to drop into an interactive debugger upon encountering an exception.
36
+
37
+ Returns
38
+ -------
39
+ A wrapped version of `func` that includes the exception handling logic.
40
+
41
+ Notes
42
+ -----
43
+ Exceptions `BdbQuit` and `KeyboardInterrupt` are re-raised _without_ logging
44
+ to allow for normal debugger and program exit behaviors.
45
+ """
18
46
 
19
47
  @functools.wraps(func)
20
48
  def wrapped(*args, **kwargs):
@@ -35,13 +63,18 @@ def handle_exceptions(
35
63
  return wrapped
36
64
 
37
65
 
38
- def configure_logging_to_terminal(verbose: int):
39
- """Sets up logging to ``sys.stdout``.
66
+ def configure_logging_to_terminal(verbose: int) -> None:
67
+ """Configures logging output to the terminal with optional verbosity levels.
40
68
 
41
69
  Parameters
42
70
  ----------
43
71
  verbose
44
- Verbosity of the logger.
72
+ An integer indicating the verbosity level of the logging output. Higher
73
+ values produce more detailed logging information.
74
+
75
+ Notes
76
+ -----
77
+ This function clears any default logging configuration before applying the new settings.
45
78
  """
46
79
  logger.remove(0) # Clear default configuration
47
80
  _add_logging_sink(sys.stdout, verbose, colorize=True)
@@ -49,21 +82,20 @@ def configure_logging_to_terminal(verbose: int):
49
82
 
50
83
  def _add_logging_sink(
51
84
  sink: TextIO, verbose: int, colorize: bool = False, serialize: bool = False
52
- ):
85
+ ) -> None:
53
86
  """Adds a logging sink to the global process logger.
54
87
 
55
88
  Parameters
56
89
  ----------
57
90
  sink
58
- Either a file or system file descriptor like ``sys.stdout``.
91
+ The output stream to which log messages will be directed, e.g. ``sys.stdout``.
59
92
  verbose
60
- Verbosity of the logger.
93
+ Verbosity of the logger. The log level is set to INFO if 0 and DEBUG otherwise.
61
94
  colorize
62
95
  Whether to use the colorization options from :mod:`loguru`.
63
96
  serialize
64
97
  Whether the logs should be converted to JSON before they're dumped
65
98
  to the logging sink.
66
-
67
99
  """
68
100
  message_format = (
69
101
  "<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | <green>{elapsed}</green> | "
@@ -104,7 +136,6 @@ def exit_with_validation_error(error_msg: dict) -> None:
104
136
  SystemExit
105
137
  Exits the program with an EINVAL (invalid argument) code due to
106
138
  previously-determined validation errors.
107
-
108
139
  """
109
140
 
110
141
  logger.error(
@@ -118,5 +149,12 @@ def exit_with_validation_error(error_msg: dict) -> None:
118
149
 
119
150
 
120
151
  def is_on_slurm() -> bool:
121
- """Returns True if the current environment is a SLURM cluster."""
152
+ """Returns True if the current environment is a SLURM cluster.
153
+
154
+ Notes
155
+ -----
156
+ This function simply checks for the presence of the `sbatch` command to _infer_
157
+ if SLURM is installed. It does _not_ check if SLURM is currently active or
158
+ managing jobs.
159
+ """
122
160
  return shutil.which("sbatch") is not None
@@ -2,15 +2,21 @@
2
2
  =========
3
3
  Filepaths
4
4
  =========
5
+
6
+ This module contains commonly-used filepaths and directories.
7
+
5
8
  """
6
9
 
7
10
  from pathlib import Path
8
11
 
9
12
  # TODO: We'll need to update this to be more generic for external users and have a way of configuring this
10
13
  CONTAINER_DIR = "/mnt/team/simulation_science/priv/engineering/er_ecosystem/images"
14
+ """Path to the directory where the container images are stored."""
11
15
  IMPLEMENTATION_METADATA = Path(__file__).parent.parent / "implementation_metadata.yaml"
12
- # Bind EasyLink temp dir to /tmp in the container.
13
- # For now, put slurm in /tmp to avoid creating a subdir with a prolog script
16
+ """Path to the implementation metadata file."""
14
17
  EASYLINK_TEMP = {"local": Path("/tmp/easylink"), "slurm": Path("/tmp")}
15
-
18
+ """Paths to the easylink tmp/ directory to get bound to the container's /tmp directory.
19
+ When running on slurm, we bind /tmp (rather than /tmp/easylink) to avoid creating
20
+ a subdir with a prolog script"""
16
21
  SPARK_SNAKEFILE = Path(__file__).parent / "spark.smk"
22
+ """Path to the Snakemake snakefile containing spark-specific rules."""
@@ -9,6 +9,7 @@ parallel manner.
9
9
 
10
10
  Note that it is critical that all data splitting utility functions are definied
11
11
  in this module; easylink will not be able to find them otherwise.
12
+
12
13
  """
13
14
 
14
15
  import math
@@ -1,3 +1,13 @@
1
+ """
2
+ =========================
3
+ Data Validation Utilities
4
+ =========================
5
+
6
+ This module contains utility functions for validating datasets, e.g. the validation
7
+ function(s) for processed data being passed out of one pipeline step and into the next.
8
+
9
+ """
10
+
1
11
  from pathlib import Path
2
12
 
3
13
  import pandas as pd
@@ -5,6 +15,25 @@ from pyarrow import parquet as pq
5
15
 
6
16
 
7
17
  def validate_input_file_dummy(filepath: str) -> None:
18
+ """Validates an input file to a dummy :class:`~easylink.step.Step`.
19
+
20
+ This function is intended to be used as the :attr:`~easylink.graph_components.InputSlot.validator`
21
+ for _all_ input data at every step in the dummy/:mod:`easylink.pipeline_schema_constants.development`
22
+ pipeline schema. It simply checks for supported file types as well as the presence
23
+ of required columns.
24
+
25
+ Parameters
26
+ ----------
27
+ filepath
28
+ The path to the input data file to be validated.
29
+
30
+ Raises
31
+ ------
32
+ NotImplementedError
33
+ If the file type is not supported.
34
+ LookupError
35
+ If the file is missing required columns.
36
+ """
8
37
  extension = Path(filepath).suffix
9
38
  if extension == ".parquet":
10
39
  output_columns = set(pq.ParquetFile(filepath).schema.names)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: easylink
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: Research repository for the EasyLink ER ecosystem project.
5
5
  Home-page: https://github.com/ihmeuw/easylink
6
6
  Author: The EasyLink developers
@@ -1,22 +1,22 @@
1
1
  easylink/__about__.py,sha256=2-oxCfu9t9yUJouLDwqYRZ0eii8kN25SxRzsawjWjho,440
2
2
  easylink/__init__.py,sha256=gGMcIVfiVnHtlDw5mZwhevcDb2wt-kuP6F64gnkFack,159
3
- easylink/_version.py,sha256=YpKDcdV7CqL8n45u267wKtyloM13FSVbOdrqgNZnSLM,22
3
+ easylink/_version.py,sha256=XIaxbMbyiP-L3kguR1GhxirFblTXiHR1lMfDVITvHUI,22
4
4
  easylink/cli.py,sha256=ARSKAljepNOEYd1VCS_QqBJQIBLzE3IgKiOb5-OROdY,6380
5
5
  easylink/configuration.py,sha256=Ire2pMZNZ6wtSwhcWnQpYa-snX4KrhXgovlQwQ2Wxf4,12530
6
- easylink/graph_components.py,sha256=U6gbKjQVTBftdOGlH-oOKS6o0dyS88IL6MCpI_mhv3s,12354
6
+ easylink/graph_components.py,sha256=6OipaUkCW2ESBW6bxwZVgpRAX8RuL15m4x_mGE7i4R8,12669
7
7
  easylink/implementation.py,sha256=AwGl5YCKCSQo91owWj-gg9_5lBz7H_4q2z7jF0BhXs4,8992
8
8
  easylink/implementation_metadata.yaml,sha256=VvlEu3Dvlmeh1MpzeYx91j22GiV-9mu3hZP5yVuW04o,6763
9
9
  easylink/pipeline.py,sha256=EyCXv5p9WzTqcndXK6ukBJE6jY_fWIP_DGZQUl1wRcY,12284
10
10
  easylink/pipeline_graph.py,sha256=vsY6nW_iEwZCNf_N_3CsixsKBUy_5JxGEi61-1Q-KAw,22842
11
- easylink/pipeline_schema.py,sha256=ckvA4deRYalY5dLLbJDrO_pKttMuWnEUvSn5fSdu4jc,5900
11
+ easylink/pipeline_schema.py,sha256=kINpvy2Fl2S3FBqgdgZCCFHEk237_36X4ltLOtk5-dE,5862
12
12
  easylink/rule.py,sha256=W97LMI-vkEPipJbnSZLn2BxfYfFtvzGTKzq6YgDVri0,19913
13
13
  easylink/runner.py,sha256=k9ICTToHj2xr6MGIuvlWf6YMeZ47UGgseaMByMgUGac,6271
14
- easylink/step.py,sha256=tTlDbhtjd7vkKmsnq622WnwQgBAdTN1dapUJqhUlPjA,65664
14
+ easylink/step.py,sha256=8EhoFOXBLWgDfb3OhmQu5g03fqElIJCWg8-Y_5azKEA,67100
15
15
  easylink/images/spark_cluster/Dockerfile,sha256=3PHotbR4jdjVYRHOJ0VQW55b5Qd4tQ1pLLQMrTKWVA0,576
16
16
  easylink/images/spark_cluster/README.md,sha256=KdgSttZRplNNWqHn4K1GTsTIab3dTOSG4V99QPLxSp8,569
17
- easylink/pipeline_schema_constants/__init__.py,sha256=RVUncdInRvafu10hbf7J9QQv7cE_pg3ylw_C0v1uIOY,684
18
- easylink/pipeline_schema_constants/development.py,sha256=-F2xaht9u66oJsXyJ8-9Mnx0PoibVZNpTC6ReRykD9w,11280
19
- easylink/pipeline_schema_constants/testing.py,sha256=icg7Vx0t8Wnic_Bx8tGkYB5wlZmrBqHYeQSRc9mR0Lo,10704
17
+ easylink/pipeline_schema_constants/__init__.py,sha256=uRVjQw7_Ff5IBQw0_Jc93Fzfa-MnbPVPKsy18CCaW7E,1021
18
+ easylink/pipeline_schema_constants/development.py,sha256=kOTEqfZD5pWqP9gu7E6r9Cubf3ILtWEUxCfJfrN8znc,11547
19
+ easylink/pipeline_schema_constants/testing.py,sha256=ohcTlT_viZYxS1GkO46mjkb8IzXo6yIOqvBbb4YrOhA,10897
20
20
  easylink/steps/dev/README.md,sha256=u9dZUggpY2Lf2qb-xkDLWWgHjcmi4osbQtzSNo4uklE,4549
21
21
  easylink/steps/dev/build-containers-local.sh,sha256=Wy3pfcyt7I-BNvHcr7ZXDe0g5Ihd00BIPqt9YuRbLeA,259
22
22
  easylink/steps/dev/build-containers-remote.sh,sha256=Hy-kaaXf-ta6n8SzOz_ahByjMY5T7J71MvzXRXDvQw8,271
@@ -35,16 +35,16 @@ easylink/steps/dev/python_pyspark/python_pyspark.def,sha256=j_RmVjspmXGOhJTr10ED
35
35
  easylink/steps/dev/r/README.md,sha256=dPjZdDTqcJsZCiwhddzlOj1ob0P7YocZUNFrLIGM1-0,1201
36
36
  easylink/steps/dev/r/dummy_step.R,sha256=1TWZY8CEkT6gavrulBxFsKbDSKJJjk0NtJrGH7TIikE,4975
37
37
  easylink/steps/dev/r/r-image.def,sha256=LrhXlt0C3k7d_VJWopRPEVARnFWSuq_oILlwo7g03bE,627
38
- easylink/utilities/__init__.py,sha256=EBk0rvRPZqwqzIqdVo8jkpSiZFFnj_fHaRB-P6EuCmk,59
39
- easylink/utilities/aggregator_utils.py,sha256=7-zg7znkUow3SuwbqOAX1H0j2GzqU1RjLs2B-hiQWls,971
40
- easylink/utilities/data_utils.py,sha256=D1Srj_2Ol5mAVt_D8k8fpm5E-GU8s6k1uKLatcQ1Oeo,1597
41
- easylink/utilities/general_utils.py,sha256=IM78EToICkmkZX1pvYsU6uZnVvXYDmS26H9Tjmg0XCM,3293
42
- easylink/utilities/paths.py,sha256=yl0cuWChJmB6YKMCQavTKw9jIl-VQhH6cnsM6D5c0Zk,599
38
+ easylink/utilities/__init__.py,sha256=0U33kbv4hoMfFQ_lh5hLwifxRPzOgkLkjKLYxmaK10g,196
39
+ easylink/utilities/aggregator_utils.py,sha256=pqBog6kEX4MXBBMjQtHFlE5gEMqRWb5VFl64u0Lr__g,972
40
+ easylink/utilities/data_utils.py,sha256=CcnM3u0_MQDQo3jMs3E4IK_rz8wAsFdJ674fZxYEFZg,4620
41
+ easylink/utilities/general_utils.py,sha256=El1W0nn4P27sRBGotNQb-9du-Gbhk9ggSuu4vmGDfwo,4591
42
+ easylink/utilities/paths.py,sha256=KM1GlnsAcKbUJrC4LZKpeJfPljxe_aXP1ZhVp43TYRA,924
43
43
  easylink/utilities/spark.smk,sha256=tQ7RArNQzhjbaBQQcRORB4IxxkuDx4gPHUBcWHDYJ_U,5795
44
- easylink/utilities/splitter_utils.py,sha256=riz3rflTrbkQ8uqMaqmXCY1BaWvgdxGzl8WN7Lb7eO8,2601
45
- easylink/utilities/validation_utils.py,sha256=qOgn1n3_m5blFN7eHJ9MbOt5DkFA6DWucAOUAjvGvco,764
46
- easylink-0.1.7.dist-info/METADATA,sha256=tNiHPs5mHZUjAYO4hHpBS8k-26MgUZZe_g6W6GXuVV8,2804
47
- easylink-0.1.7.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
48
- easylink-0.1.7.dist-info/entry_points.txt,sha256=OGMZDFltg3yMboT7XjJt3joiPhRfV_7jnREVtrAIQNU,51
49
- easylink-0.1.7.dist-info/top_level.txt,sha256=oHcOpcF_jDMWFiJRzfGQvuskENGDjSPC_Agu9Z_Xvik,9
50
- easylink-0.1.7.dist-info/RECORD,,
44
+ easylink/utilities/splitter_utils.py,sha256=y4CbbTBgRaoXFxy-9Eu5eWx4lA4ZEcbrYpxgLIzG_kc,2602
45
+ easylink/utilities/validation_utils.py,sha256=W9r_RXcivJjfpioLhONirfwdByYttxNsVY489_sbrYQ,1683
46
+ easylink-0.1.9.dist-info/METADATA,sha256=kc6QCMEr_QU7QtZNnQu-Ic20wmV2OzoN_23Ndw6ArEc,2804
47
+ easylink-0.1.9.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
48
+ easylink-0.1.9.dist-info/entry_points.txt,sha256=OGMZDFltg3yMboT7XjJt3joiPhRfV_7jnREVtrAIQNU,51
49
+ easylink-0.1.9.dist-info/top_level.txt,sha256=oHcOpcF_jDMWFiJRzfGQvuskENGDjSPC_Agu9Z_Xvik,9
50
+ easylink-0.1.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.1)
2
+ Generator: setuptools (76.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5