fractal-task-tools 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fractal-task-tools might be problematic. Click here for more details.

@@ -0,0 +1,236 @@
1
+ import ast
2
+ import logging
3
+ import os
4
+ from importlib import import_module
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ from docstring_parser import parse as docparse
9
+
10
+
11
+ def _sanitize_description(string: str) -> str:
12
+ """
13
+ Sanitize a description string.
14
+
15
+ This is a provisional helper function that replaces newlines with spaces
16
+ and reduces multiple contiguous whitespace characters to a single one.
17
+ Future iterations of the docstrings format/parsing may render this function
18
+ not-needed or obsolete.
19
+
20
+ Args:
21
+ string: TBD
22
+ """
23
+ # Replace newline with space
24
+ new_string = string.replace("\n", " ")
25
+ # Replace N-whitespace characters with a single one
26
+ while " " in new_string:
27
+ new_string = new_string.replace(" ", " ")
28
+ return new_string
29
+
30
+
31
+ def _get_function_docstring(
32
+ *,
33
+ package_name: Optional[str],
34
+ module_path: str,
35
+ function_name: str,
36
+ verbose: bool = False,
37
+ ) -> str:
38
+ """
39
+ Extract docstring from a function.
40
+
41
+
42
+ Args:
43
+ package_name: Example `fractal_tasks_core`.
44
+ module_path:
45
+ This must be an absolute path like `/some/module.py` (if
46
+ `package_name` is `None`) or a relative path like `something.py`
47
+ (if `package_name` is not `None`).
48
+ function_name: Example `create_ome_zarr`.
49
+ """
50
+
51
+ if not module_path.endswith(".py"):
52
+ raise ValueError(f"Module {module_path} must end with '.py'")
53
+
54
+ # Get the function ast.FunctionDef object
55
+ if package_name is not None:
56
+ if os.path.isabs(module_path):
57
+ raise ValueError(
58
+ "Error in _get_function_docstring: `package_name` is not "
59
+ "None but `module_path` is absolute."
60
+ )
61
+ package_path = Path(import_module(package_name).__file__).parent
62
+ module_path = package_path / module_path
63
+ else:
64
+ if not os.path.isabs(module_path):
65
+ raise ValueError(
66
+ "Error in _get_function_docstring: `package_name` is None "
67
+ "but `module_path` is not absolute."
68
+ )
69
+ module_path = Path(module_path)
70
+
71
+ if verbose:
72
+ logging.info(f"[_get_function_docstring] {function_name=}")
73
+ logging.info(f"[_get_function_docstring] {module_path=}")
74
+
75
+ tree = ast.parse(module_path.read_text())
76
+ _function = next(
77
+ f
78
+ for f in ast.walk(tree)
79
+ if (isinstance(f, ast.FunctionDef) and f.name == function_name)
80
+ )
81
+
82
+ # Extract docstring from ast.FunctionDef
83
+ return ast.get_docstring(_function)
84
+
85
+
86
+ def _get_function_args_descriptions(
87
+ *,
88
+ package_name: Optional[str],
89
+ module_path: str,
90
+ function_name: str,
91
+ verbose: bool = False,
92
+ ) -> dict[str, str]:
93
+ """
94
+ Extract argument descriptions from a function.
95
+
96
+ Args:
97
+ package_name: Example `fractal_tasks_core`.
98
+ module_path:
99
+ This must be an absolute path like `/some/module.py` (if
100
+ `package_name` is `None`) or a relative path like `something.py`
101
+ (if `package_name` is not `None`).
102
+ function_name: Example `create_ome_zarr`.
103
+ """
104
+
105
+ # Extract docstring from ast.FunctionDef
106
+ docstring = _get_function_docstring(
107
+ package_name=package_name,
108
+ module_path=module_path,
109
+ function_name=function_name,
110
+ verbose=verbose,
111
+ )
112
+ if verbose:
113
+ logging.info(f"[_get_function_args_descriptions] {docstring}")
114
+
115
+ # Parse docstring (via docstring_parser) and prepare output
116
+ parsed_docstring = docparse(docstring)
117
+ descriptions = {
118
+ param.arg_name: _sanitize_description(param.description)
119
+ for param in parsed_docstring.params
120
+ }
121
+ logging.info(f"[_get_function_args_descriptions] END ({function_name=})")
122
+ return descriptions
123
+
124
+
125
+ def _get_class_attrs_descriptions(
126
+ package_name: str, module_relative_path: str, class_name: str
127
+ ) -> dict[str, str]:
128
+ """
129
+ Extract attribute descriptions from a class.
130
+
131
+ Args:
132
+ package_name: Example `fractal_tasks_core`.
133
+ module_relative_path: Example `lib_channels.py`.
134
+ class_name: Example `OmeroChannel`.
135
+ """
136
+
137
+ if not module_relative_path.endswith(".py"):
138
+ raise ValueError(f"Module {module_relative_path} must end with '.py'")
139
+
140
+ # Get the class ast.ClassDef object
141
+ package_path = Path(import_module(package_name).__file__).parent
142
+ module_path = package_path / module_relative_path
143
+ tree = ast.parse(module_path.read_text())
144
+ try:
145
+ _class = next(
146
+ c
147
+ for c in ast.walk(tree)
148
+ if (isinstance(c, ast.ClassDef) and c.name == class_name)
149
+ )
150
+ except StopIteration:
151
+ raise RuntimeError(
152
+ f"Cannot find {class_name=} for {package_name=} "
153
+ f"and {module_relative_path=}"
154
+ )
155
+ docstring = ast.get_docstring(_class)
156
+ parsed_docstring = docparse(docstring)
157
+ descriptions = {
158
+ x.arg_name: _sanitize_description(x.description)
159
+ if x.description
160
+ else "Missing description"
161
+ for x in parsed_docstring.params
162
+ }
163
+ logging.info(f"[_get_class_attrs_descriptions] END ({class_name=})")
164
+ return descriptions
165
+
166
+
167
+ def _insert_function_args_descriptions(
168
+ *, schema: dict, descriptions: dict, verbose: bool = False
169
+ ):
170
+ """
171
+ Merge the descriptions obtained via `_get_args_descriptions` into the
172
+ properties of an existing JSON Schema.
173
+
174
+ Args:
175
+ schema: TBD
176
+ descriptions: TBD
177
+ """
178
+ new_schema = schema.copy()
179
+ new_properties = schema["properties"].copy()
180
+ for key, value in schema["properties"].items():
181
+ if "description" in value:
182
+ raise ValueError("Property already has description")
183
+ else:
184
+ if key in descriptions:
185
+ value["description"] = descriptions[key]
186
+ else:
187
+ value["description"] = "Missing description"
188
+ new_properties[key] = value
189
+ if verbose:
190
+ logging.info(
191
+ "[_insert_function_args_descriptions] "
192
+ f"Add {key=}, {value=}"
193
+ )
194
+ new_schema["properties"] = new_properties
195
+ logging.info("[_insert_function_args_descriptions] END")
196
+ return new_schema
197
+
198
+
199
+ def _insert_class_attrs_descriptions(
200
+ *,
201
+ schema: dict,
202
+ class_name: str,
203
+ descriptions: dict,
204
+ definition_key: str,
205
+ ):
206
+ """
207
+ Merge the descriptions obtained via `_get_attributes_models_descriptions`
208
+ into the `class_name` definition, within an existing JSON Schema
209
+
210
+ Args:
211
+ schema: TBD
212
+ class_name: TBD
213
+ descriptions: TBD
214
+ definition_key: Either `"definitions"` (for Pydantic V1) or
215
+ `"$defs"` (for Pydantic V2)
216
+ """
217
+ new_schema = schema.copy()
218
+ if definition_key not in schema:
219
+ return new_schema
220
+ else:
221
+ new_definitions = schema[definition_key].copy()
222
+ # Loop over existing definitions
223
+ for name, definition in schema[definition_key].items():
224
+ if name == class_name:
225
+ for prop in definition["properties"]:
226
+ if "description" in new_definitions[name]["properties"][prop]:
227
+ raise ValueError(
228
+ f"Property {name}.{prop} already has description"
229
+ )
230
+ else:
231
+ new_definitions[name]["properties"][prop][
232
+ "description"
233
+ ] = descriptions[prop]
234
+ new_schema[definition_key] = new_definitions
235
+ logging.info("[_insert_class_attrs_descriptions] END")
236
+ return new_schema
@@ -0,0 +1,27 @@
1
+ import re
2
+
3
+
4
+ def normalize_package_name(pkg_name: str) -> str:
5
+ """
6
+ Implement both PyPa and custom package-name normalization
7
+
8
+ 1. PyPa normalization: The name should be lowercased with all runs of the
9
+ characters `.`, `-`, or `_` replaced with a single `-` character
10
+ (https://packaging.python.org/en/latest/specifications/name-normalization).
11
+ 2. Custom normalization: Replace `-` with `_`, to obtain the
12
+ imported-module name.
13
+
14
+ Args:
15
+ name: The non-normalized package name.
16
+
17
+ Returns:
18
+ The normalized package name.
19
+ """
20
+
21
+ # Apply PyPa normalization
22
+ pypa_normalized_package_name = re.sub(r"[-_.]+", "-", pkg_name).lower()
23
+
24
+ # Replace `-` with `_`
25
+ final_package_name = pypa_normalized_package_name.replace("-", "_")
26
+
27
+ return final_package_name
@@ -0,0 +1,81 @@
1
+ """
2
+ Custom Pydantic v2 JSON Schema generation tools.
3
+
4
+ As of Pydantic V2, the JSON Schema representation of model attributes marked
5
+ as `Optional` changed, and the new behavior consists in marking the
6
+ corresponding properties as an `anyOf` of either a `null` or the actual type.
7
+ This is not always the required behavior, see e.g.
8
+ * https://github.com/pydantic/pydantic/issues/7161
9
+ * https://github.com/pydantic/pydantic/issues/8394
10
+
11
+ Here we list some alternative ways of reverting this change.
12
+ """
13
+ import logging
14
+
15
+ from pydantic.json_schema import GenerateJsonSchema
16
+ from pydantic.json_schema import JsonSchemaValue
17
+ from pydantic_core.core_schema import WithDefaultSchema
18
+
19
+ logger = logging.getLogger("CustomGenerateJsonSchema")
20
+
21
+
22
+ class CustomGenerateJsonSchema(GenerateJsonSchema):
23
+ def get_flattened_anyof(
24
+ self, schemas: list[JsonSchemaValue]
25
+ ) -> JsonSchemaValue:
26
+ null_schema = {"type": "null"}
27
+ if null_schema in schemas:
28
+ logger.warning(
29
+ "Drop `null_schema` before calling `get_flattened_anyof`"
30
+ )
31
+ schemas.pop(schemas.index(null_schema))
32
+ return super().get_flattened_anyof(schemas)
33
+
34
+ def default_schema(self, schema: WithDefaultSchema) -> JsonSchemaValue:
35
+ json_schema = super().default_schema(schema)
36
+ if "default" in json_schema.keys() and json_schema["default"] is None:
37
+ logger.warning(f"Pop `None` default value from {json_schema=}")
38
+ json_schema.pop("default")
39
+ return json_schema
40
+
41
+
42
+ # class GenerateJsonSchemaA(GenerateJsonSchema):
43
+ # def nullable_schema(self, schema):
44
+ # null_schema = {"type": "null"}
45
+ # inner_json_schema = self.generate_inner(schema["schema"])
46
+ # if inner_json_schema == null_schema:
47
+ # return null_schema
48
+ # else:
49
+ # logging.info("A: Skip calling `get_flattened_anyof` method")
50
+ # return inner_json_schema
51
+
52
+
53
+ # class GenerateJsonSchemaB(GenerateJsonSchemaA):
54
+ # def default_schema(self, schema: WithDefaultSchema) -> JsonSchemaValue:
55
+ # original_json_schema = super().default_schema(schema)
56
+ # new_json_schema = deepcopy(original_json_schema)
57
+ # default = new_json_schema.get("default", None)
58
+ # if default is None:
59
+ # logging.info("B: Pop None default")
60
+ # new_json_schema.pop("default")
61
+ # return new_json_schema
62
+
63
+
64
+ # class GenerateJsonSchemaC(GenerateJsonSchema):
65
+ # def get_flattened_anyof(
66
+ # self, schemas: list[JsonSchemaValue]
67
+ # ) -> JsonSchemaValue:
68
+
69
+ # original_json_schema_value = super().get_flattened_anyof(schemas)
70
+ # members = original_json_schema_value.get("anyOf")
71
+ # logging.info("C", original_json_schema_value)
72
+ # if (
73
+ # members is not None
74
+ # and len(members) == 2
75
+ # and {"type": "null"} in members
76
+ # ):
77
+ # new_json_schema_value = {"type": [t["type"] for t in members]}
78
+ # logging.info("C", new_json_schema_value)
79
+ # return new_json_schema_value
80
+ # else:
81
+ # return original_json_schema_value
@@ -0,0 +1,101 @@
1
+ import inspect
2
+ import logging
3
+ from importlib import import_module
4
+ from inspect import signature
5
+ from pathlib import Path
6
+ from typing import Callable
7
+
8
+ from pydantic.v1.decorator import ALT_V_ARGS
9
+ from pydantic.v1.decorator import ALT_V_KWARGS
10
+ from pydantic.v1.decorator import V_DUPLICATE_KWARGS
11
+ from pydantic.v1.decorator import V_POSITIONAL_ONLY_NAME
12
+
13
+ FORBIDDEN_PARAM_NAMES = (
14
+ "args",
15
+ "kwargs",
16
+ V_POSITIONAL_ONLY_NAME,
17
+ V_DUPLICATE_KWARGS,
18
+ ALT_V_ARGS,
19
+ ALT_V_KWARGS,
20
+ )
21
+
22
+
23
+ def _extract_function(
24
+ module_relative_path: str,
25
+ function_name: str,
26
+ package_name: str,
27
+ verbose: bool = False,
28
+ ) -> Callable:
29
+ """
30
+ Extract function from a module with the same name.
31
+
32
+ Args:
33
+ package_name: Example `fractal_tasks_core`.
34
+ module_relative_path: Example `tasks/create_ome_zarr.py`.
35
+ function_name: Example `create_ome_zarr`.
36
+ verbose:
37
+ """
38
+ if not module_relative_path.endswith(".py"):
39
+ raise ValueError(f"{module_relative_path=} must end with '.py'")
40
+ module_relative_path_no_py = str(
41
+ Path(module_relative_path).with_suffix("")
42
+ )
43
+ module_relative_path_dots = module_relative_path_no_py.replace("/", ".")
44
+ if verbose:
45
+ logging.info(
46
+ f"Now calling `import_module` for "
47
+ f"{package_name}.{module_relative_path_dots}"
48
+ )
49
+ imported_module = import_module(
50
+ f"{package_name}.{module_relative_path_dots}"
51
+ )
52
+ if verbose:
53
+ logging.info(
54
+ f"Now getting attribute {function_name} from "
55
+ f"imported module {imported_module}."
56
+ )
57
+ task_function = getattr(imported_module, function_name)
58
+ return task_function
59
+
60
+
61
+ def _validate_function_signature(function: Callable):
62
+ """
63
+ Validate the function signature.
64
+
65
+ Implement a set of checks for type hints that do not play well with the
66
+ creation of JSON Schema, see
67
+ https://github.com/fractal-analytics-platform/fractal-tasks-core/issues/399.
68
+
69
+ Args:
70
+ function: TBD
71
+ """
72
+ sig = signature(function)
73
+ for param in sig.parameters.values():
74
+
75
+ # CASE 1: Check that name is not forbidden
76
+ if param.name in FORBIDDEN_PARAM_NAMES:
77
+ raise ValueError(
78
+ f"Function {function} has argument with name {param.name}"
79
+ )
80
+
81
+ # CASE 2: Raise an error for unions
82
+ if str(param.annotation).startswith(("typing.Union[", "Union[")):
83
+ raise ValueError("typing.Union is not supported")
84
+
85
+ # CASE 3: Raise an error for "|"
86
+ if "|" in str(param.annotation):
87
+ raise ValueError('Use of "|" in type hints is not supported')
88
+
89
+ # CASE 4: Raise an error for optional parameter with given (non-None)
90
+ # default, e.g. Optional[str] = "asd"
91
+ is_annotation_optional = str(param.annotation).startswith(
92
+ ("typing.Optional[", "Optional[")
93
+ )
94
+ default_given = (param.default is not None) and (
95
+ param.default != inspect._empty
96
+ )
97
+ if default_given and is_annotation_optional:
98
+ raise ValueError("Optional parameter has non-None default value")
99
+
100
+ logging.info("[_validate_function_signature] END")
101
+ return sig
@@ -0,0 +1,109 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import Optional
4
+
5
+ from docstring_parser import parse as docparse
6
+
7
+ from ._descriptions import _get_function_docstring
8
+
9
+
10
+ def _get_function_description(
11
+ package_name: str, module_path: str, function_name: str
12
+ ) -> str:
13
+ """
14
+ Extract function description from its docstring.
15
+
16
+ Args:
17
+ package_name: Example `fractal_tasks_core`.
18
+ module_path: Example `tasks/create_ome_zarr.py`.
19
+ function_name: Example `create_ome_zarr`.
20
+ """
21
+ # Extract docstring from ast.FunctionDef
22
+ docstring = _get_function_docstring(
23
+ package_name=package_name,
24
+ module_path=module_path,
25
+ function_name=function_name,
26
+ )
27
+ # Parse docstring (via docstring_parser)
28
+ parsed_docstring = docparse(docstring)
29
+ # Combine short/long descriptions (if present)
30
+ short_description = parsed_docstring.short_description
31
+ long_description = parsed_docstring.long_description
32
+ items = []
33
+ if short_description:
34
+ items.append(short_description)
35
+ if long_description:
36
+ items.append(long_description)
37
+ if items:
38
+ if parsed_docstring.blank_after_short_description:
39
+ return "\n\n".join(items)
40
+ else:
41
+ return "\n".join(items)
42
+ else:
43
+ return ""
44
+
45
+
46
+ def create_docs_info(
47
+ *,
48
+ executable_non_parallel: Optional[str] = None,
49
+ executable_parallel: Optional[str] = None,
50
+ package: str,
51
+ ) -> str:
52
+ """
53
+ Return task description based on function docstring.
54
+ """
55
+ logging.info("[create_docs_info] START")
56
+ docs_info = []
57
+ for executable in [executable_non_parallel, executable_parallel]:
58
+ if executable is None:
59
+ continue
60
+ # Extract the function name.
61
+ # Note: this could be made more general, but for the moment we assume
62
+ # that the function has the same name as the module)
63
+ function_name = Path(executable).with_suffix("").name
64
+ logging.info(f"[create_docs_info] {function_name=}")
65
+ # Get function description
66
+ description = _get_function_description(
67
+ package_name=package,
68
+ module_path=executable,
69
+ function_name=function_name,
70
+ )
71
+ docs_info.append(f"## {function_name}\n{description}\n")
72
+ docs_info = "".join(docs_info)
73
+ logging.info("[create_docs_info] END")
74
+ return docs_info
75
+
76
+
77
+ def read_docs_info_from_file(
78
+ *,
79
+ docs_info: str,
80
+ task_list_path: str,
81
+ ) -> str:
82
+ """
83
+ Return task description based on the content of a file.
84
+
85
+ An example of valid argument is
86
+ ```
87
+ docs_info = "file:relative/path/info.md"
88
+ ```
89
+ where the path is relative to the folder where `task_list.py` is.
90
+ """
91
+ logging.info("[read_docs_info_from_file] START")
92
+
93
+ # Preliminary checks
94
+ if not docs_info.startswith("file:"):
95
+ raise ValueError(f"Invalid docs_info='{docs_info}'.")
96
+ relative_path = Path(docs_info[5:])
97
+ if relative_path.is_absolute():
98
+ raise ValueError(
99
+ f"Invalid docs_info='{docs_info}' (path must be relative)."
100
+ )
101
+
102
+ base_path = Path(task_list_path).parent
103
+ docs_path = (base_path / relative_path).as_posix()
104
+ logging.info(f"[read_docs_info_from_file] Reading docs from {docs_path}")
105
+ with open(docs_path, "r") as f:
106
+ docs_info = f.read()
107
+ logging.info("[read_docs_info_from_file] END")
108
+
109
+ return docs_info
@@ -0,0 +1,100 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+
5
+ _Schema = dict[str, Any]
6
+
7
+
8
+ def _include_titles_for_properties(
9
+ properties: dict[str, dict],
10
+ verbose: bool = False,
11
+ ) -> dict[str, dict]:
12
+ """
13
+ Scan through properties of a JSON Schema, and set their title when it is
14
+ missing.
15
+
16
+ The title is set to `name.title()`, where `title` is a standard string
17
+ method - see https://docs.python.org/3/library/stdtypes.html#str.title.
18
+
19
+ Args:
20
+ properties: TBD
21
+ """
22
+ if verbose:
23
+ logging.info(
24
+ f"[_include_titles_for_properties] Original properties:\n"
25
+ f"{properties}"
26
+ )
27
+
28
+ new_properties = properties.copy()
29
+ for prop_name, prop in properties.items():
30
+ if "title" not in prop.keys():
31
+ new_prop = prop.copy()
32
+ new_prop["title"] = prop_name.title()
33
+ new_properties[prop_name] = new_prop
34
+ if verbose:
35
+ logging.info(
36
+ f"[_include_titles_for_properties] New properties:\n"
37
+ f"{new_properties}"
38
+ )
39
+ return new_properties
40
+
41
+
42
+ def _include_titles(
43
+ schema: _Schema,
44
+ definitions_key: str,
45
+ verbose: bool = False,
46
+ ) -> _Schema:
47
+ """
48
+ Include property titles, when missing.
49
+
50
+ This handles both:
51
+
52
+ - first-level JSON Schema properties (corresponding to task
53
+ arguments);
54
+ - properties of JSON Schema definitions (corresponding to
55
+ task-argument attributes).
56
+
57
+ Args:
58
+ schema: TBD
59
+ definitions_key: Either `"definitions"` (for Pydantic V1) or
60
+ `"$defs"` (for Pydantic V2)
61
+ verbose:
62
+ """
63
+ new_schema = schema.copy()
64
+
65
+ if verbose:
66
+ logging.info("[_include_titles] START")
67
+ logging.info(f"[_include_titles] Input schema:\n{schema}")
68
+
69
+ # Update first-level properties (that is, task arguments)
70
+ new_properties = _include_titles_for_properties(
71
+ schema["properties"], verbose=verbose
72
+ )
73
+ new_schema["properties"] = new_properties
74
+
75
+ if verbose:
76
+ logging.info("[_include_titles] Titles for properties now included.")
77
+
78
+ # Update properties of definitions
79
+ if definitions_key in schema.keys():
80
+ new_definitions = schema[definitions_key].copy()
81
+ for def_name, def_schema in new_definitions.items():
82
+ if "properties" not in def_schema.keys():
83
+ if verbose:
84
+ logging.info(
85
+ f"Definition schema {def_name} has no 'properties' "
86
+ "key. Skip."
87
+ )
88
+ else:
89
+ new_properties = _include_titles_for_properties(
90
+ def_schema["properties"], verbose=verbose
91
+ )
92
+ new_definitions[def_name]["properties"] = new_properties
93
+ new_schema[definitions_key] = new_definitions
94
+
95
+ if verbose:
96
+ logging.info(
97
+ "[_include_titles] Titles for definitions properties now included."
98
+ )
99
+ logging.info("[_include_titles] END")
100
+ return new_schema