resubmit 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,223 @@
1
+ import re
2
+ from typing import Any, Dict, List, Tuple, Union, Optional, Iterable
3
+ import pandas as pd
4
+ from itertools import product
5
+ import logging
6
+
7
+
8
+ def _is_regex_spec(val: Any) -> bool:
9
+ """Return True if val looks like a regex specifier.
10
+
11
+ Accepted forms:
12
+ - compiled `re.Pattern`
13
+ - tuple (`re.Pattern`, exclude: bool)
14
+ - dict with keys `pattern` (re.Pattern) and optional `exclude` (bool)
15
+ - string starting with 're:' (e.g. 're:^foo.*') meaning include matches
16
+ - string starting with '!re:' meaning exclude matches
17
+ """
18
+ if hasattr(val, "search") and callable(val.search):
19
+ return True
20
+ if isinstance(val, tuple) and len(val) >= 1 and hasattr(val[0], "search"):
21
+ return True
22
+ if isinstance(val, dict) and "pattern" in val:
23
+ return True
24
+ if isinstance(val, str) and (val.startswith("re:") or val.startswith("!re:")):
25
+ return True
26
+ return False
27
+
28
+
29
+ def _normalize_regex_spec(val: Any) -> Tuple[re.Pattern, bool]:
30
+ """Return (compiled_pattern, exclude_flag) for a given regex spec.
31
+
32
+ Raises ValueError for unsupported types.
33
+ """
34
+ if hasattr(val, "search") and callable(val.search):
35
+ return val, False
36
+ if isinstance(val, tuple) and len(val) >= 1:
37
+ pat = val[0]
38
+ exclude = bool(val[1]) if len(val) > 1 else False
39
+ return pat, exclude
40
+ if isinstance(val, dict):
41
+ pat = val["pattern"]
42
+ exclude = bool(val.get("exclude", False))
43
+ return pat, exclude
44
+ if isinstance(val, str):
45
+ if val.startswith("!re:"):
46
+ return re.compile(val[4:]), True
47
+ elif val.startswith("re:"):
48
+ return re.compile(val[3:]), False
49
+ raise ValueError(f"Unsupported regex spec: {val!r}")
50
+
51
+
52
+ def ensure_unique_combinations(
53
+ df: pd.DataFrame, cols: Union[str, List[str]], raise_on_conflict: bool = True
54
+ ) -> Tuple[bool, Optional[pd.DataFrame]]:
55
+ """Check that combinations of columns `cols` are unique across `df`.
56
+
57
+ Returns (is_unique, duplicates_df) where `duplicates_df` is None when unique.
58
+ If `raise_on_conflict` is True, raises `ValueError` when duplicates are found.
59
+ """
60
+ if isinstance(cols, str):
61
+ cols = [cols]
62
+ # Stringify to avoid dtype mismatch effects
63
+ key_series = df[cols].astype(str).agg("||".join, axis=1)
64
+ nunique = key_series.nunique()
65
+ if nunique == len(df):
66
+ return True, None
67
+
68
+ duplicates = df[key_series.duplicated(keep=False)]
69
+ if raise_on_conflict:
70
+ raise ValueError(
71
+ f"Found {len(duplicates)} rows with non-unique combinations for cols={cols}."
72
+ )
73
+ return False, duplicates
74
+
75
+
76
+ def create_jobs_dataframe(params: Dict[str, Any]) -> pd.DataFrame:
77
+ """Create a job DataFrame from a parameter map.
78
+
79
+ Rules:
80
+ - For parameters whose values are iterable (lists, tuples), we build the Cartesian
81
+ product across all such parameters.
82
+ - If a parameter value is callable, it is evaluated AFTER the initial DataFrame
83
+ is created; the callable is called as `col_values = fn(df)` and the result is
84
+ used as the column values (must be same length as `df`).
85
+ - If a parameter value is a regex spec (see `_is_regex_spec`), it is applied LAST
86
+ as a filter on the generated DataFrame. Regex specs can be used to include or
87
+ exclude rows based on the stringified value of that column.
88
+
89
+ Returns a filtered DataFrame with the applied callables and regex filters.
90
+ """
91
+ # Separate static values (used for product), callables and regex specs
92
+ static_items = {}
93
+ callables: Dict[str, Any] = {}
94
+ regex_specs: Dict[str, Any] = {}
95
+ unique_items: Dict[str, Any] = {}
96
+
97
+ for k, v in params.items():
98
+ # support explicit regex keys like 'name__regex' or 'name_regex' to filter 'name'
99
+ if k.endswith("__regex") or k.endswith("_regex"):
100
+ if k.endswith("__regex"):
101
+ base = k[: -len("__regex")]
102
+ else:
103
+ base = k[: -len("_regex")]
104
+ regex_specs[base] = v
105
+ elif k.endswith("__callable") or k.endswith("_callable"):
106
+ if k.endswith("__callable"):
107
+ base = k[: -len("__callable")]
108
+ else:
109
+ base = k[: -len("_callable")]
110
+ callables[base] = v
111
+ elif k.endswith("__unique") or k.endswith("_unique"):
112
+ if k.endswith("__unique"):
113
+ base = k[: -len("__unique")]
114
+ else:
115
+ base = k[: -len("_unique")]
116
+ unique_items[base] = v
117
+ continue
118
+ elif callable(v):
119
+ callables[k] = v
120
+ elif _is_regex_spec(v):
121
+ # treat a regex spec provided under the same key as a filter for that column
122
+ regex_specs[k] = v
123
+ else:
124
+ static_items[k] = v
125
+
126
+ # If there are no static items, start from single-row DataFrame so callables
127
+ # can still compute columns.
128
+ if len(static_items) == 0:
129
+ df = pd.DataFrame([{}])
130
+ else:
131
+ df = pd.DataFrame(
132
+ list(product(*static_items.values())), columns=static_items.keys()
133
+ )
134
+
135
+ # Apply callables (they must accept the dataframe and return a list-like)
136
+ for k, fn in callables.items():
137
+ vals = fn(df)
138
+ if len(vals) != len(df):
139
+ raise ValueError(
140
+ f"Callable for param {k!r} returned length {len(vals)} != {len(df)}"
141
+ )
142
+ df[k] = vals
143
+
144
+ # Apply regex specs last as filters
145
+ if len(regex_specs) > 0:
146
+ mask = pd.Series([True] * len(df), index=df.index)
147
+ for k, spec in regex_specs.items():
148
+ pat, exclude = _normalize_regex_spec(spec)
149
+ col_str = df[k].astype(str)
150
+ matches = col_str.apply(lambda s: bool(pat.search(s)))
151
+ if exclude:
152
+ mask = mask & ~matches
153
+ else:
154
+ mask = mask & matches
155
+ df = df[mask].reset_index(drop=True)
156
+
157
+ # apply unique constraints
158
+ for k, unique_val in unique_items.items():
159
+ is_unique, duplicates = ensure_unique_combinations(
160
+ df,
161
+ k,
162
+ raise_on_conflict=unique_val,
163
+ )
164
+ if not is_unique:
165
+ logging.warning(f"Non-unique values found for column {k!r}:\n{duplicates}")
166
+
167
+ return df
168
+
169
+
170
+ def submit_jobs(
171
+ jobs_args: dict[Iterable],
172
+ func: Any,
173
+ *,
174
+ timeout_min: int,
175
+ cpus_per_task: int = 16,
176
+ mem_gb: int = 64,
177
+ num_gpus: int = 1,
178
+ folder: str = "logs/%j",
179
+ block: bool = False,
180
+ prompt: bool = True,
181
+ local_run: bool = False,
182
+ slurm_additional_parameters: Dict | None = None,
183
+ ) -> Any:
184
+ """
185
+ Submit jobs described by `jobs_args` where each entry is a dict of kwargs for `func`.
186
+ A dataframe is created from cartesian product of parameter lists, with support for callables and regex filtering.
187
+ 1. use `__unique' postfix in keys to enforce uniqueness.
188
+ 2. use `__callable' postfix in keys to define callables for column values.
189
+ 3. use `__regex' postfix in keys to define regex filters for columns.
190
+
191
+ Args:
192
+ jobs_args: dict of lists of job parameters.
193
+ func: Function to be submitted for each job.
194
+ timeout_min: Job timeout in minutes.
195
+ cpus_per_task: Number of CPUs per task.
196
+ mem_gb: Memory in GB.
197
+ num_gpus: Number of GPUs.
198
+ folder: Folder for logs.
199
+ block: Whether to block until jobs complete.
200
+ prompt: Whether to prompt for confirmation before submission.
201
+ local_run: If True, runs the function locally instead of submitting.
202
+ slurm_additional_parameters: Additional Slurm parameters as a dict. If not provided, defaults to {"gpus": num_gpus}.
203
+ Returns:
204
+ The result of `submit_jobs` from `.__submit`.
205
+ """
206
+
207
+ jobs_df = create_jobs_dataframe(jobs_args)
208
+ records = jobs_df.to_dict(orient="records")
209
+ from .__submit import submit_jobs as _submit_jobs
210
+
211
+ return _submit_jobs(
212
+ records,
213
+ func,
214
+ timeout_min=timeout_min,
215
+ cpus_per_task=cpus_per_task,
216
+ mem_gb=mem_gb,
217
+ num_gpus=num_gpus,
218
+ folder=folder,
219
+ block=block,
220
+ prompt=prompt,
221
+ local_run=local_run,
222
+ slurm_additional_parameters=slurm_additional_parameters,
223
+ )
resubmit/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """resubmit: small helpers around submitit for reproducible cluster submissions."""
2
2
 
3
- from .submit import submit_jobs
4
- from .debug import maybe_attach_debugger
3
+ from .__debug import maybe_attach_debugger
4
+ from .__bookkeeping import submit_jobs
5
5
 
6
6
  __all__ = ["submit_jobs", "maybe_attach_debugger"]
@@ -1,4 +1,5 @@
1
1
  """Core submission utilities wrapping submitit."""
2
+
2
3
  from typing import Any, Callable, Iterable, List, Optional, Dict
3
4
 
4
5
 
@@ -7,17 +8,14 @@ def submit_jobs(
7
8
  func: Callable[[List[dict]], Any],
8
9
  *,
9
10
  timeout_min: int,
10
- cpus_per_task: int = 16,
11
- mem_gb: int = 64,
12
- num_gpus: int = 1,
13
- account: Optional[str] = None,
14
- folder: str = "logs/%j",
15
- block: bool = False,
16
- prompt: bool = True,
17
- local_run: bool = False,
11
+ cpus_per_task: int,
12
+ mem_gb: int,
13
+ num_gpus: int,
14
+ folder: str,
15
+ block: bool,
16
+ prompt: bool,
17
+ local_run: bool,
18
18
  slurm_additional_parameters: Optional[Dict] = None,
19
- constraint: Optional[str] = None,
20
- reservation: Optional[str] = None,
21
19
  ):
22
20
  """Submit jobs described by `jobs_args` where each entry is a dict of kwargs for `func`.
23
21
 
@@ -46,6 +44,7 @@ def submit_jobs(
46
44
  return
47
45
 
48
46
  import submitit
47
+
49
48
  print("submitting jobs")
50
49
  executor = submitit.AutoExecutor(folder=folder)
51
50
 
@@ -56,14 +55,6 @@ def submit_jobs(
56
55
  slurm_additional_parameters = dict(slurm_additional_parameters)
57
56
  slurm_additional_parameters.setdefault("gpus", num_gpus)
58
57
 
59
- # Allow explicit overrides similar to `account`.
60
- if account is not None:
61
- slurm_additional_parameters["account"] = account
62
- if reservation is not None:
63
- slurm_additional_parameters["reservation"] = reservation
64
- if constraint is not None:
65
- slurm_additional_parameters["constraint"] = constraint
66
-
67
58
  print("Slurm additional parameters:", slurm_additional_parameters)
68
59
 
69
60
  executor.update_parameters(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: resubmit
3
- Version: 0.0.3
3
+ Version: 0.0.4
4
4
  Summary: Small wrapper around submitit to simplify cluster submissions
5
5
  Author: Amir Mehrpanah
6
6
  License: MIT
@@ -0,0 +1,9 @@
1
+ resubmit/__bookkeeping.py,sha256=FhC9WamX907uyZh7idk-1hfDvX025LOPskUE0KSKypc,8210
2
+ resubmit/__debug.py,sha256=8RINyz7eSAiT47d018wR0R3B_u4PllQJCiLy0zTSQDE,887
3
+ resubmit/__init__.py,sha256=FLKq6KZeI973gBXzdnSkvK1aEdxF--5V2T82fxyzv0U,219
4
+ resubmit/__submit.py,sha256=w3-1_SbB5u9xPpuOUxEvwvH7GXMLWlke_gb4S7RjCRQ,2385
5
+ resubmit-0.0.4.dist-info/licenses/LICENSE,sha256=v2spsd7N1pKFFh2G8wGP_45iwe5S0DYiJzG4im8Rupc,1066
6
+ resubmit-0.0.4.dist-info/METADATA,sha256=kJ07kk1jUq6zAe3btefCSNFqf9Mvp3ZszzoDYtvA-_E,2976
7
+ resubmit-0.0.4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
8
+ resubmit-0.0.4.dist-info/top_level.txt,sha256=BfCexfX-VhUZuNi8sI88i0HF_e3ppausQ76hxPeXjYc,9
9
+ resubmit-0.0.4.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- resubmit/__init__.py,sha256=E9oorHt7ntQZgLeV9GtB79jHJCCC9WR5_skifBMHnGQ,210
2
- resubmit/debug.py,sha256=8RINyz7eSAiT47d018wR0R3B_u4PllQJCiLy0zTSQDE,887
3
- resubmit/submit.py,sha256=iyL-VTDmL_2YvdOAbfeJTH9m4FVb7lLXQLnOIoHqIZI,2874
4
- resubmit-0.0.3.dist-info/licenses/LICENSE,sha256=v2spsd7N1pKFFh2G8wGP_45iwe5S0DYiJzG4im8Rupc,1066
5
- resubmit-0.0.3.dist-info/METADATA,sha256=lVUYWXWkdtlco8409sxgQAzjvTp60_lM_FOrKRrj72I,2976
6
- resubmit-0.0.3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
7
- resubmit-0.0.3.dist-info/top_level.txt,sha256=BfCexfX-VhUZuNi8sI88i0HF_e3ppausQ76hxPeXjYc,9
8
- resubmit-0.0.3.dist-info/RECORD,,
File without changes