aiida-pythonjob 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ """AiiDA plugin that run Python function on remote computers."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ from .calculations import PythonJob
6
+ from .data import PickledData, PickledFunction
7
+ from .launch import prepare_pythonjob_inputs
8
+ from .parsers import PythonJobParser
9
+
10
+ __all__ = (
11
+ "PythonJob",
12
+ "PickledData",
13
+ "PickledFunction",
14
+ "prepare_pythonjob_inputs",
15
+ "PythonJobParser",
16
+ )
@@ -0,0 +1,3 @@
1
+ from .pythonjob import PythonJob
2
+
3
+ __all__ = ("PythonJob",)
@@ -0,0 +1,306 @@
1
+ """Calcjob to run a Python function on a remote computer."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import pathlib
6
+ import typing as t
7
+
8
+ from aiida.common.datastructures import CalcInfo, CodeInfo
9
+ from aiida.common.extendeddicts import AttributeDict
10
+ from aiida.common.folders import Folder
11
+ from aiida.engine import CalcJob, CalcJobProcessSpec
12
+ from aiida.orm import (
13
+ Data,
14
+ FolderData,
15
+ List,
16
+ RemoteData,
17
+ SinglefileData,
18
+ Str,
19
+ to_aiida_type,
20
+ )
21
+
22
+ from aiida_pythonjob.data.pickled_function import PickledFunction, to_pickled_function
23
+
24
+ __all__ = ("PythonJob",)
25
+
26
+
27
+ class PythonJob(CalcJob):
28
+ """Calcjob to run a Python function on a remote computer."""
29
+
30
+ _internal_retrieve_list = []
31
+ _retrieve_singlefile_list = []
32
+ _retrieve_temporary_list = []
33
+
34
+ _DEFAULT_INPUT_FILE = "script.py"
35
+ _DEFAULT_OUTPUT_FILE = "aiida.out"
36
+ _DEFAULT_PARENT_FOLDER_NAME = "./parent_folder/"
37
+
38
+ @classmethod
39
+ def define(cls, spec: CalcJobProcessSpec) -> None: # type: ignore[override]
40
+ """Define the process specification, including its inputs, outputs and known exit codes.
41
+
42
+ :param spec: the calculation job process spec to define.
43
+ """
44
+ super().define(spec)
45
+ spec.input(
46
+ "function",
47
+ valid_type=PickledFunction,
48
+ serializer=to_pickled_function,
49
+ required=False,
50
+ )
51
+ spec.input(
52
+ "function_source_code",
53
+ valid_type=Str,
54
+ serializer=to_aiida_type,
55
+ required=False,
56
+ )
57
+ spec.input("function_name", valid_type=Str, serializer=to_aiida_type, required=False)
58
+ spec.input("process_label", valid_type=Str, serializer=to_aiida_type, required=False)
59
+ spec.input_namespace(
60
+ "function_inputs", valid_type=Data, required=False
61
+ ) # , serializer=serialize_to_aiida_nodes)
62
+ spec.input(
63
+ "function_outputs",
64
+ valid_type=List,
65
+ default=lambda: List(),
66
+ required=False,
67
+ serializer=to_aiida_type,
68
+ help="The information of the output ports",
69
+ )
70
+ spec.input(
71
+ "parent_folder",
72
+ valid_type=(RemoteData, FolderData, SinglefileData),
73
+ required=False,
74
+ help="Use a local or remote folder as parent folder (for restarts and similar)",
75
+ )
76
+ spec.input(
77
+ "parent_folder_name",
78
+ valid_type=Str,
79
+ required=False,
80
+ serializer=to_aiida_type,
81
+ help="""Default name of the subfolder that you want to create in the working directory,
82
+ in which you want to place the files taken from parent_folder""",
83
+ )
84
+ spec.input(
85
+ "parent_output_folder",
86
+ valid_type=Str,
87
+ default=None,
88
+ required=False,
89
+ serializer=to_aiida_type,
90
+ help="Name of the subfolder inside 'parent_folder' from which you want to copy the files",
91
+ )
92
+ spec.input_namespace(
93
+ "upload_files",
94
+ valid_type=(FolderData, SinglefileData),
95
+ required=False,
96
+ help="The folder/files to upload",
97
+ )
98
+ spec.input_namespace(
99
+ "copy_files",
100
+ valid_type=(RemoteData,),
101
+ required=False,
102
+ help="The folder/files to copy from the remote computer",
103
+ )
104
+ spec.input(
105
+ "additional_retrieve_list",
106
+ valid_type=List,
107
+ default=None,
108
+ required=False,
109
+ serializer=to_aiida_type,
110
+ help="The names of the files to retrieve",
111
+ )
112
+ spec.outputs.dynamic = True
113
+ # set default options (optional)
114
+ spec.inputs["metadata"]["options"]["parser_name"].default = "pythonjob.pythonjob"
115
+ spec.inputs["metadata"]["options"]["input_filename"].default = "script.py"
116
+ spec.inputs["metadata"]["options"]["output_filename"].default = "aiida.out"
117
+ spec.inputs["metadata"]["options"]["resources"].default = {
118
+ "num_machines": 1,
119
+ "num_mpiprocs_per_machine": 1,
120
+ }
121
+ # start exit codes - marker for docs
122
+ spec.exit_code(
123
+ 310,
124
+ "ERROR_READING_OUTPUT_FILE",
125
+ invalidates_cache=True,
126
+ message="The output file could not be read.",
127
+ )
128
+ spec.exit_code(
129
+ 320,
130
+ "ERROR_INVALID_OUTPUT",
131
+ invalidates_cache=True,
132
+ message="The output file contains invalid output.",
133
+ )
134
+ spec.exit_code(
135
+ 321,
136
+ "ERROR_RESULT_OUTPUT_MISMATCH",
137
+ invalidates_cache=True,
138
+ message="The number of results does not match the number of outputs.",
139
+ )
140
+
141
+ def _build_process_label(self) -> str:
142
+ """Use the function name as the process label.
143
+
144
+ :returns: The process label to use for ``ProcessNode`` instances.
145
+ """
146
+ if "process_label" in self.inputs:
147
+ return self.inputs.process_label.value
148
+ else:
149
+ data = self.get_function_data()
150
+ return f"PythonJob<{data['name']}>"
151
+
152
+ def on_create(self) -> None:
153
+ """Called when a Process is created."""
154
+
155
+ super().on_create()
156
+ self.node.label = self._build_process_label()
157
+
158
+ def get_function_data(self) -> dict[str, t.Any]:
159
+ """Get the function data.
160
+
161
+ :returns: The function data.
162
+ """
163
+ if "function" in self.inputs:
164
+ metadata = self.inputs.function.metadata
165
+ metadata["source_code"] = metadata["import_statements"] + "\n" + metadata["source_code_without_decorator"]
166
+ return metadata
167
+ else:
168
+ return {
169
+ "source_code": self.inputs.function_source_code.value,
170
+ "name": self.inputs.function_name.value,
171
+ }
172
+
173
+ def prepare_for_submission(self, folder: Folder) -> CalcInfo:
174
+ """Prepare the calculation for submission.
175
+
176
+ 1) Write the python script to the folder.
177
+ 2) Write the inputs to a pickle file and save it to the folder.
178
+
179
+ :param folder: A temporary folder on the local file system.
180
+ :returns: A :class:`aiida.common.datastructures.CalcInfo` instance.
181
+ """
182
+ import cloudpickle as pickle
183
+
184
+ dirpath = pathlib.Path(folder._abspath)
185
+ inputs: dict[str, t.Any]
186
+
187
+ if self.inputs.function_inputs:
188
+ inputs = dict(self.inputs.function_inputs)
189
+ else:
190
+ inputs = {}
191
+ if "parent_folder_name" in self.inputs:
192
+ parent_folder_name = self.inputs.parent_folder_name.value
193
+ else:
194
+ parent_folder_name = self._DEFAULT_PARENT_FOLDER_NAME
195
+ function_data = self.get_function_data()
196
+ # create python script to run the function
197
+ script = f"""
198
+ import pickle
199
+
200
+ # define the function
201
+ {function_data["source_code"]}
202
+
203
+ # load the inputs from the pickle file
204
+ with open('inputs.pickle', 'rb') as handle:
205
+ inputs = pickle.load(handle)
206
+
207
+ # run the function
208
+ result = {function_data["name"]}(**inputs)
209
+ # save the result as a pickle file
210
+ with open('results.pickle', 'wb') as handle:
211
+ pickle.dump(result, handle)
212
+ """
213
+ # write the script to the folder
214
+ with folder.open(self.options.input_filename, "w", encoding="utf8") as handle:
215
+ handle.write(script)
216
+ # symlink = settings.pop('PARENT_FOLDER_SYMLINK', False)
217
+ symlink = True
218
+
219
+ remote_copy_list = []
220
+ local_copy_list = []
221
+ remote_symlink_list = []
222
+ remote_list = remote_symlink_list if symlink else remote_copy_list
223
+
224
+ source = self.inputs.get("parent_folder", None)
225
+
226
+ if source is not None:
227
+ if isinstance(source, RemoteData):
228
+ dirpath = pathlib.Path(source.get_remote_path())
229
+ if self.inputs.parent_output_folder is not None:
230
+ dirpath = pathlib.Path(source.get_remote_path()) / self.inputs.parent_output_folder.value
231
+ remote_list.append(
232
+ (
233
+ source.computer.uuid,
234
+ str(dirpath),
235
+ parent_folder_name,
236
+ )
237
+ )
238
+ elif isinstance(source, FolderData):
239
+ dirname = self.inputs.parent_output_folder.value if self.inputs.parent_output_folder is not None else ""
240
+ local_copy_list.append((source.uuid, dirname, parent_folder_name))
241
+ elif isinstance(source, SinglefileData):
242
+ local_copy_list.append((source.uuid, source.filename, source.filename))
243
+ if "upload_files" in self.inputs:
244
+ upload_files = self.inputs.upload_files
245
+ for key, source in upload_files.items():
246
+ # replace "_dot_" with "." in the key
247
+ new_key = key.replace("_dot_", ".")
248
+ if isinstance(source, FolderData):
249
+ local_copy_list.append((source.uuid, "", new_key))
250
+ elif isinstance(source, SinglefileData):
251
+ local_copy_list.append((source.uuid, source.filename, source.filename))
252
+ else:
253
+ raise ValueError(
254
+ f"""Input folder/file: {source} is not supported.
255
+ Only AiiDA SinglefileData and FolderData are allowed."""
256
+ )
257
+ if "copy_files" in self.inputs:
258
+ copy_files = self.inputs.copy_files
259
+ for key, source in copy_files.items():
260
+ # replace "_dot_" with "." in the key
261
+ new_key = key.replace("_dot_", ".")
262
+ dirpath = pathlib.Path(source.get_remote_path())
263
+ remote_list.append((source.computer.uuid, str(dirpath), new_key))
264
+ # create pickle file for the inputs
265
+ input_values = {}
266
+ for key, value in inputs.items():
267
+ if isinstance(value, Data) and hasattr(value, "value"):
268
+ # get the value of the pickled data
269
+ input_values[key] = value.value
270
+ # TODO: should check this recursively
271
+ elif isinstance(value, (AttributeDict, dict)):
272
+ # if the value is an AttributeDict, use recursively
273
+ input_values[key] = {k: v.value for k, v in value.items()}
274
+ else:
275
+ raise ValueError(
276
+ f"Input data {value} is not supported. Only AiiDA data Node with a value attribute is allowed. "
277
+ )
278
+ # save the value as a pickle file, the path is absolute
279
+ filename = "inputs.pickle"
280
+ dirpath = pathlib.Path(folder._abspath)
281
+ with folder.open(filename, "wb") as handle:
282
+ pickle.dump(input_values, handle)
283
+ # create a singlefiledata object for the pickled data
284
+ file_data = SinglefileData(file=f"{dirpath}/{filename}")
285
+ file_data.store()
286
+ local_copy_list.append((file_data.uuid, file_data.filename, filename))
287
+
288
+ codeinfo = CodeInfo()
289
+ codeinfo.stdin_name = self.options.input_filename
290
+ codeinfo.stdout_name = self.options.output_filename
291
+ codeinfo.code_uuid = self.inputs.code.uuid
292
+
293
+ calcinfo = CalcInfo()
294
+ calcinfo.codes_info = [codeinfo]
295
+ calcinfo.local_copy_list = local_copy_list
296
+ calcinfo.remote_copy_list = remote_copy_list
297
+ calcinfo.remote_symlink_list = remote_symlink_list
298
+ calcinfo.retrieve_list = ["results.pickle", self.options.output_filename]
299
+ if self.inputs.additional_retrieve_list is not None:
300
+ calcinfo.retrieve_list += self.inputs.additional_retrieve_list.get_list()
301
+ calcinfo.retrieve_list += self._internal_retrieve_list
302
+
303
+ calcinfo.retrieve_temporary_list = self._retrieve_temporary_list
304
+ calcinfo.retrieve_singlefile_list = self._retrieve_singlefile_list
305
+
306
+ return calcinfo
@@ -0,0 +1,14 @@
1
+ import json
2
+
3
+ from aiida.manage.configuration.settings import AIIDA_CONFIG_FOLDER
4
+
5
+
6
+ def load_config() -> dict:
7
+ """Load the configuration from the config file."""
8
+ config_file_path = AIIDA_CONFIG_FOLDER / "pythonjob.json"
9
+ try:
10
+ with config_file_path.open("r") as f:
11
+ config = json.load(f)
12
+ except FileNotFoundError:
13
+ config = {}
14
+ return config
@@ -0,0 +1,4 @@
1
+ from .pickled_data import PickledData
2
+ from .pickled_function import PickledFunction
3
+
4
+ __all__ = ("PickledData", "PickledFunction")
@@ -0,0 +1,87 @@
1
+ """`Data` sub class to represent any data using pickle."""
2
+
3
+ import sys
4
+ from pickle import UnpicklingError
5
+
6
+ import cloudpickle
7
+ from aiida import orm
8
+
9
+
10
+ class Dict(orm.Dict):
11
+ @property
12
+ def value(self):
13
+ return self.get_dict()
14
+
15
+
16
+ class List(orm.List):
17
+ @property
18
+ def value(self):
19
+ return self.get_list()
20
+
21
+
22
+ class PickledData(orm.Data):
23
+ """Data to represent a pickled value using cloudpickle."""
24
+
25
+ FILENAME = "value.pkl" # Class attribute to store the filename
26
+
27
+ def __init__(self, value=None, **kwargs):
28
+ """Initialize a `PickledData` node instance.
29
+
30
+ :param value: raw Python value to initialize the `PickledData` node from.
31
+ """
32
+ super().__init__(**kwargs)
33
+ self.set_value(value)
34
+
35
+ def __str__(self):
36
+ return f"{super().__str__()} : {self.get_value()}"
37
+
38
+ @property
39
+ def value(self):
40
+ """Return the contents of this node.
41
+
42
+ :return: The unpickled value.
43
+ """
44
+ return self.get_value()
45
+
46
+ @value.setter
47
+ def value(self, value):
48
+ self.set_value(value)
49
+
50
+ def get_value(self):
51
+ """Return the contents of this node, unpickling the stored value.
52
+
53
+ :return: The unpickled value.
54
+ """
55
+ return self._get_value_from_file()
56
+
57
+ def _get_value_from_file(self):
58
+ """Read the pickled value from file and return it."""
59
+ try:
60
+ with self.base.repository.open(self.FILENAME, mode="rb") as f:
61
+ return cloudpickle.loads(f.read()) # Deserialize the value
62
+ except (UnpicklingError, ValueError) as e:
63
+ raise ImportError(
64
+ "Failed to load the pickled value. This may be due to an incompatible pickle protocol. "
65
+ "Please ensure that the correct environment and cloudpickle version are being used."
66
+ ) from e
67
+ except ModuleNotFoundError as e:
68
+ raise ImportError(
69
+ "Failed to load the pickled value. This may be due to a missing module. "
70
+ "Please ensure that the correct environment and cloudpickle version are being used."
71
+ ) from e
72
+
73
+ def set_value(self, value):
74
+ """Set the contents of this node by pickling the provided value.
75
+
76
+ :param value: The Python value to pickle and store.
77
+ """
78
+ # Serialize the value and store it
79
+ serialized_value = cloudpickle.dumps(value)
80
+ self.base.repository.put_object_from_bytes(serialized_value, self.FILENAME)
81
+
82
+ # Store relevant metadata
83
+ python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
84
+ self.base.attributes.set("python_version", python_version)
85
+ self.base.attributes.set("serializer_module", cloudpickle.__name__)
86
+ self.base.attributes.set("serializer_version", cloudpickle.__version__)
87
+ self.base.attributes.set("pickle_protocol", cloudpickle.DEFAULT_PROTOCOL)
@@ -0,0 +1,145 @@
1
+ import inspect
2
+ import textwrap
3
+ from typing import Any, Callable, Dict, _SpecialForm, get_type_hints
4
+
5
+ from .pickled_data import PickledData
6
+
7
+
8
+ class PickledFunction(PickledData):
9
+ """Data class to represent a pickled Python function."""
10
+
11
+ def __init__(self, value=None, **kwargs):
12
+ """Initialize a PickledFunction node instance.
13
+
14
+ :param value: a Python function
15
+ """
16
+ super().__init__(**kwargs)
17
+ if not callable(value):
18
+ raise ValueError("value must be a callable Python function")
19
+ self.set_value(value)
20
+ self.set_attribute(value)
21
+
22
+ def __str__(self):
23
+ return f"PickledFunction<{self.base.attributes.get('function_name')}> pk={self.pk}"
24
+
25
+ @property
26
+ def metadata(self):
27
+ """Return a dictionary of metadata."""
28
+ return {
29
+ "name": self.base.attributes.get("name"),
30
+ "import_statements": self.base.attributes.get("import_statements"),
31
+ "source_code": self.base.attributes.get("source_code"),
32
+ "source_code_without_decorator": self.base.attributes.get("source_code_without_decorator"),
33
+ "type": "function",
34
+ "is_pickle": True,
35
+ }
36
+
37
+ @classmethod
38
+ def build_callable(cls, func):
39
+ """Return the executor for this node."""
40
+ import cloudpickle as pickle
41
+
42
+ executor = {
43
+ "executor": pickle.dumps(func),
44
+ "type": "function",
45
+ "is_pickle": True,
46
+ }
47
+ executor.update(cls.inspect_function(func))
48
+ return executor
49
+
50
+ def set_attribute(self, value):
51
+ """Set the contents of this node by pickling the provided function.
52
+
53
+ :param value: The Python function to pickle and store.
54
+ """
55
+ # Serialize the function and extract metadata
56
+ serialized_data = self.inspect_function(value)
57
+
58
+ # Store relevant metadata
59
+ self.base.attributes.set("name", serialized_data["name"])
60
+ self.base.attributes.set("import_statements", serialized_data["import_statements"])
61
+ self.base.attributes.set("source_code", serialized_data["source_code"])
62
+ self.base.attributes.set(
63
+ "source_code_without_decorator",
64
+ serialized_data["source_code_without_decorator"],
65
+ )
66
+
67
+ @classmethod
68
+ def inspect_function(cls, func: Callable) -> Dict[str, Any]:
69
+ """Serialize a function for storage or transmission."""
70
+ try:
71
+ # we need save the source code explicitly, because in the case of jupyter notebook,
72
+ # the source code is not saved in the pickle file
73
+ source_code = inspect.getsource(func)
74
+ # Split the source into lines for processing
75
+ source_code_lines = source_code.split("\n")
76
+ function_source_code = "\n".join(source_code_lines)
77
+ # Find the first line of the actual function definition
78
+ for i, line in enumerate(source_code_lines):
79
+ if line.strip().startswith("def "):
80
+ break
81
+ function_source_code_without_decorator = "\n".join(source_code_lines[i:])
82
+ function_source_code_without_decorator = textwrap.dedent(function_source_code_without_decorator)
83
+ # we also need to include the necessary imports for the types used in the type hints.
84
+ try:
85
+ required_imports = cls.get_required_imports(func)
86
+ except Exception as e:
87
+ required_imports = {}
88
+ print(f"Failed to get required imports for function {func.__name__}: {e}")
89
+ # Generate import statements
90
+ import_statements = "\n".join(
91
+ f"from {module} import {', '.join(types)}" for module, types in required_imports.items()
92
+ )
93
+ except Exception as e:
94
+ print(f"Failed to inspect function {func.__name__}: {e}")
95
+ function_source_code = ""
96
+ function_source_code_without_decorator = ""
97
+ import_statements = ""
98
+ return {
99
+ "name": func.__name__,
100
+ "source_code": function_source_code,
101
+ "source_code_without_decorator": function_source_code_without_decorator,
102
+ "import_statements": import_statements,
103
+ }
104
+
105
+ @classmethod
106
+ def get_required_imports(cls, func: Callable) -> Dict[str, set]:
107
+ """Retrieve type hints and the corresponding modules."""
108
+ type_hints = get_type_hints(func)
109
+ imports = {}
110
+
111
+ def add_imports(type_hint):
112
+ if isinstance(type_hint, _SpecialForm): # Handle special forms like Any, Union, Optional
113
+ module_name = "typing"
114
+ type_name = type_hint._name or str(type_hint)
115
+ elif hasattr(type_hint, "__origin__"): # This checks for higher-order types like List, Dict
116
+ module_name = type_hint.__module__
117
+ type_name = getattr(type_hint, "_name", None) or getattr(type_hint.__origin__, "__name__", None)
118
+ for arg in getattr(type_hint, "__args__", []):
119
+ if arg is type(None):
120
+ continue
121
+ add_imports(arg) # Recursively add imports for each argument
122
+ elif hasattr(type_hint, "__module__"):
123
+ module_name = type_hint.__module__
124
+ type_name = type_hint.__name__
125
+ else:
126
+ return # If no module or origin, we can't import it, e.g., for literals
127
+
128
+ if type_name is not None:
129
+ if module_name not in imports:
130
+ imports[module_name] = set()
131
+ imports[module_name].add(type_name)
132
+
133
+ for _, type_hint in type_hints.items():
134
+ add_imports(type_hint)
135
+
136
+ return imports
137
+
138
+
139
+ def to_pickled_function(value):
140
+ """Convert a Python function to a `PickledFunction` instance."""
141
+ return PickledFunction(value)
142
+
143
+
144
+ class PickledLocalFunction(PickledFunction):
145
+ """PickledFunction subclass for local functions."""
@@ -0,0 +1,122 @@
1
+ import sys
2
+ from importlib.metadata import entry_points
3
+ from typing import Any
4
+
5
+ from aiida import common, orm
6
+
7
+ from aiida_pythonjob.config import load_config
8
+
9
+ from .pickled_data import PickledData
10
+
11
+
12
+ def get_serializer_from_entry_points() -> dict:
13
+ """Retrieve the serializer from the entry points."""
14
+ # import time
15
+
16
+ # ts = time.time()
17
+ configs = load_config()
18
+ serializers = configs.get("serializers", {})
19
+ excludes = serializers.get("excludes", [])
20
+ # Retrieve the entry points for 'aiida.data' and store them in a dictionary
21
+ eps = entry_points()
22
+ if sys.version_info >= (3, 10):
23
+ group = eps.select(group="aiida.data")
24
+ else:
25
+ group = eps.get("aiida.data", [])
26
+ eps = {}
27
+ for ep in group:
28
+ # split the entry point name by first ".", and check the last part
29
+ key = ep.name.split(".", 1)[-1]
30
+ # skip key without "." because it is not a module name for a data type
31
+ if "." not in key or key in excludes:
32
+ continue
33
+ eps.setdefault(key, [])
34
+ eps[key].append(ep)
35
+
36
+ # print("Time to load entry points: ", time.time() - ts)
37
+ # check if there are duplicates
38
+ for key, value in eps.items():
39
+ if len(value) > 1:
40
+ if key in serializers:
41
+ [ep for ep in value if ep.name == serializers[key]]
42
+ eps[key] = [ep for ep in value if ep.name == serializers[key]]
43
+ if not eps[key]:
44
+ raise ValueError(f"Entry point {serializers[key]} not found for {key}")
45
+ else:
46
+ msg = f"Duplicate entry points for {key}: {[ep.name for ep in value]}"
47
+ raise ValueError(msg)
48
+ return eps
49
+
50
+
51
+ eps = get_serializer_from_entry_points()
52
+
53
+
54
+ def serialize_to_aiida_nodes(inputs: dict | None = None) -> dict:
55
+ """Serialize the inputs to a dictionary of AiiDA data nodes.
56
+
57
+ Args:
58
+ inputs (dict): The inputs to be serialized.
59
+
60
+ Returns:
61
+ dict: The serialized inputs.
62
+ """
63
+ new_inputs = {}
64
+ # save all kwargs to inputs port
65
+ for key, data in inputs.items():
66
+ new_inputs[key] = general_serializer(data)
67
+ return new_inputs
68
+
69
+
70
+ def clean_dict_key(data):
71
+ """Replace "." with "__dot__" in the keys of a dictionary."""
72
+ if isinstance(data, dict):
73
+ return {k.replace(".", "__dot__"): clean_dict_key(v) for k, v in data.items()}
74
+ return data
75
+
76
+
77
+ def general_serializer(data: Any, check_value=True) -> orm.Node:
78
+ """Serialize the data to an AiiDA data node."""
79
+ if isinstance(data, orm.Data):
80
+ if check_value and not hasattr(data, "value"):
81
+ raise ValueError("Only AiiDA data Node with a value attribute is allowed.")
82
+ return data
83
+ elif isinstance(data, common.extendeddicts.AttributeDict):
84
+ # if the data is an AttributeDict, use it directly
85
+ return data
86
+ # if is string with syntax {{}}, this is a port will read data from ctx
87
+ elif isinstance(data, str) and data.startswith("{{") and data.endswith("}}"):
88
+ return data
89
+ # if data is a class instance, get its __module__ and class name as a string
90
+ # for example, an Atoms will have ase.atoms.Atoms
91
+ else:
92
+ data = clean_dict_key(data)
93
+ # try to get the serializer from the entry points
94
+ data_type = type(data)
95
+ ep_key = f"{data_type.__module__}.{data_type.__name__}"
96
+ # search for the key in the entry points
97
+ if ep_key in eps:
98
+ try:
99
+ new_node = eps[ep_key][0].load()(data)
100
+ except Exception as e:
101
+ raise ValueError(f"Error in serializing {ep_key}: {e}")
102
+ finally:
103
+ # try to save the node to da
104
+ try:
105
+ new_node.store()
106
+ return new_node
107
+ except Exception:
108
+ # try to serialize the value as a PickledData
109
+ try:
110
+ new_node = PickledData(data)
111
+ new_node.store()
112
+ return new_node
113
+ except Exception as e:
114
+ raise ValueError(f"Error in serializing {ep_key}: {e}")
115
+ else:
116
+ # try to serialize the data as a PickledData
117
+ try:
118
+ new_node = PickledData(data)
119
+ new_node.store()
120
+ return new_node
121
+ except Exception as e:
122
+ raise ValueError(f"Error in serializing {ep_key}: {e}")
@@ -0,0 +1,67 @@
1
+ from typing import Any, Callable
2
+
3
+ from aiida.orm import AbstractCode, Computer, FolderData, List, SinglefileData, Str
4
+
5
+ from .data.pickled_function import PickledFunction
6
+ from .data.serializer import serialize_to_aiida_nodes
7
+ from .utils import get_or_create_code
8
+
9
+
10
+ def prepare_pythonjob_inputs(
11
+ function: Callable[..., Any],
12
+ function_inputs: dict[str, Any] | None = None,
13
+ function_outputs: dict[str, Any] | None = None,
14
+ code: AbstractCode | None = None,
15
+ command_info: dict[str, str] | None = None,
16
+ computer: str | Computer = "localhost",
17
+ metadata: dict[str, Any] | None = None,
18
+ upload_files: dict[str, str] = {},
19
+ **kwargs: Any,
20
+ ) -> dict[str, Any]:
21
+ """Prepare the inputs for PythonJob"""
22
+ import os
23
+
24
+ # get the names kwargs for the PythonJob, which are the inputs before _wait
25
+ executor = PickledFunction.build_callable(function)
26
+ new_upload_files = {}
27
+ # change the string in the upload files to SingleFileData, or FolderData
28
+ for key, source in upload_files.items():
29
+ # only alphanumeric and underscores are allowed in the key
30
+ # replace all "." with "_dot_"
31
+ new_key = key.replace(".", "_dot_")
32
+ if isinstance(source, str):
33
+ if os.path.isfile(source):
34
+ new_upload_files[new_key] = SinglefileData(file=source)
35
+ elif os.path.isdir(source):
36
+ new_upload_files[new_key] = FolderData(tree=source)
37
+ elif isinstance(source, (SinglefileData, FolderData)):
38
+ new_upload_files[new_key] = source
39
+ else:
40
+ raise ValueError(f"Invalid upload file type: {type(source)}, {source}")
41
+ #
42
+ if code is None:
43
+ command_info = command_info or {}
44
+ code = get_or_create_code(computer=computer, **command_info)
45
+ # get the source code of the function
46
+ function_name = executor["name"]
47
+ if executor.get("is_pickle", False):
48
+ function_source_code = executor["import_statements"] + "\n" + executor["source_code_without_decorator"]
49
+ else:
50
+ function_source_code = f"from {executor['module']} import {function_name}"
51
+
52
+ # serialize the kwargs into AiiDA Data
53
+ function_inputs = function_inputs or {}
54
+ function_inputs = serialize_to_aiida_nodes(function_inputs)
55
+ # transfer the args to kwargs
56
+ inputs = {
57
+ "process_label": "PythonJob<{}>".format(function_name),
58
+ "function_source_code": Str(function_source_code),
59
+ "function_name": Str(function_name),
60
+ "code": code,
61
+ "function_inputs": function_inputs,
62
+ "upload_files": new_upload_files,
63
+ "function_outputs": List(function_outputs),
64
+ "metadata": metadata or {},
65
+ **kwargs,
66
+ }
67
+ return inputs
@@ -0,0 +1,3 @@
1
+ from .pythonjob import PythonJobParser
2
+
3
+ __all__ = ("PythonJobParser",)
@@ -0,0 +1,111 @@
1
+ """Parser for an `PythonJob` job."""
2
+
3
+ from aiida.engine import ExitCode
4
+ from aiida.parsers.parser import Parser
5
+
6
+ from aiida_pythonjob.data.serializer import general_serializer
7
+
8
+
9
+ class PythonJobParser(Parser):
10
+ """Parser for an `PythonJob` job."""
11
+
12
+ def parse(self, **kwargs):
13
+ """Parse the contents of the output files stored in the `retrieved` output node.
14
+
15
+ The function_outputs could be a namespce, e.g.,
16
+ function_outputs=[
17
+ {"identifier": "namespace", "name": "add_multiply"},
18
+ {"name": "add_multiply.add"},
19
+ {"name": "add_multiply.multiply"},
20
+ {"name": "minus"},
21
+ ]
22
+ """
23
+ import pickle
24
+
25
+ function_outputs = self.node.inputs.function_outputs.get_list()
26
+ if len(function_outputs) == 0:
27
+ function_outputs = [{"name": "result"}]
28
+ self.output_list = function_outputs
29
+ # first we remove nested outputs, e.g., "add_multiply.add"
30
+ top_level_output_list = [output for output in self.output_list if "." not in output["name"]]
31
+ exit_code = 0
32
+ try:
33
+ with self.retrieved.base.repository.open("results.pickle", "rb") as handle:
34
+ results = pickle.load(handle)
35
+ if isinstance(results, tuple):
36
+ if len(top_level_output_list) != len(results):
37
+ self.exit_codes.ERROR_RESULT_OUTPUT_MISMATCH
38
+ for i in range(len(top_level_output_list)):
39
+ top_level_output_list[i]["value"] = self.serialize_output(results[i], top_level_output_list[i])
40
+ elif isinstance(results, dict) and len(top_level_output_list) > 1:
41
+ # pop the exit code if it exists
42
+ exit_code = results.pop("exit_code", 0)
43
+ for output in top_level_output_list:
44
+ if output.get("required", False):
45
+ if output["name"] not in results:
46
+ self.exit_codes.ERROR_MISSING_OUTPUT
47
+ output["value"] = self.serialize_output(results.pop(output["name"]), output)
48
+ # if there are any remaining results, raise an warning
49
+ if results:
50
+ self.logger.warning(
51
+ f"Found extra results that are not included in the output: {results.keys()}"
52
+ )
53
+ elif isinstance(results, dict) and len(top_level_output_list) == 1:
54
+ exit_code = results.pop("exit_code", 0)
55
+ # if output name in results, use it
56
+ if top_level_output_list[0]["name"] in results:
57
+ top_level_output_list[0]["value"] = self.serialize_output(
58
+ results[top_level_output_list[0]["name"]],
59
+ top_level_output_list[0],
60
+ )
61
+ # otherwise, we assume the results is the output
62
+ else:
63
+ top_level_output_list[0]["value"] = self.serialize_output(results, top_level_output_list[0])
64
+ elif len(top_level_output_list) == 1:
65
+ # otherwise, we assume the results is the output
66
+ top_level_output_list[0]["value"] = self.serialize_output(results, top_level_output_list[0])
67
+ else:
68
+ raise ValueError("The number of results does not match the number of outputs.")
69
+ for output in top_level_output_list:
70
+ self.out(output["name"], output["value"])
71
+ if exit_code:
72
+ if isinstance(exit_code, dict):
73
+ exit_code = ExitCode(exit_code["status"], exit_code["message"])
74
+ elif isinstance(exit_code, int):
75
+ exit_code = ExitCode(exit_code)
76
+ return exit_code
77
+ except OSError:
78
+ return self.exit_codes.ERROR_READING_OUTPUT_FILE
79
+ except ValueError as exception:
80
+ self.logger.error(exception)
81
+ return self.exit_codes.ERROR_INVALID_OUTPUT
82
+
83
+ def find_output(self, name):
84
+ """Find the output with the given name."""
85
+ for output in self.output_list:
86
+ if output["name"] == name:
87
+ return output
88
+ return None
89
+
90
+ def serialize_output(self, result, output):
91
+ """Serialize outputs."""
92
+
93
+ name = output["name"]
94
+ if output.get("identifier", "Any").upper() in ["NAMESPACE", "WORKGRAPH.NAMESPACE"]:
95
+ if isinstance(result, dict):
96
+ serialized_result = {}
97
+ for key, value in result.items():
98
+ full_name = f"{name}.{key}"
99
+ full_name_output = self.find_output(full_name)
100
+ if full_name_output and full_name_output.get("identifier", "Any").upper() in [
101
+ "NAMESPACE",
102
+ "WORKGRAPH.NAMESPACE",
103
+ ]:
104
+ serialized_result[key] = self.serialize_output(value, full_name_output)
105
+ else:
106
+ serialized_result[key] = general_serializer(value)
107
+ return serialized_result
108
+ else:
109
+ self.exit_codes.ERROR_INVALID_OUTPUT
110
+ else:
111
+ return general_serializer(result)
@@ -0,0 +1,31 @@
1
+ from typing import Optional
2
+
3
+ from aiida.common.exceptions import NotExistent
4
+ from aiida.orm import Computer, InstalledCode, load_code, load_computer
5
+
6
+
7
+ def get_or_create_code(
8
+ label: str = "python3",
9
+ computer: Optional[str | Computer] = "localhost",
10
+ filepath_executable: Optional[str] = None,
11
+ prepend_text: str = "",
12
+ ):
13
+ """Try to load code, create if not exit."""
14
+
15
+ try:
16
+ return load_code(f"{label}@{computer}")
17
+ except NotExistent:
18
+ description = f"Code on computer: {computer}"
19
+ computer = load_computer(computer)
20
+ filepath_executable = filepath_executable or label
21
+ code = InstalledCode(
22
+ computer=computer,
23
+ label=label,
24
+ description=description,
25
+ filepath_executable=filepath_executable,
26
+ default_calc_job_plugin="pythonjob.pythonjob",
27
+ prepend_text=prepend_text,
28
+ )
29
+
30
+ code.store()
31
+ return code
@@ -0,0 +1,82 @@
1
+ Metadata-Version: 2.3
2
+ Name: aiida-pythonjob
3
+ Version: 0.1.0
4
+ Summary: Run Python functions on a remote computer.
5
+ Project-URL: Source, https://github.com/aiidateam/aiida-pythonjob
6
+ Author-email: Xing Wang <xingwang1991@gmail.com>
7
+ License: MIT License
8
+
9
+ Copyright (c) 2024 AiiDA team
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in all
19
+ copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27
+ SOFTWARE.
28
+ Keywords: aiida,plugin
29
+ Classifier: Development Status :: 3 - Alpha
30
+ Classifier: Framework :: AiiDA
31
+ Classifier: Intended Audience :: Science/Research
32
+ Classifier: License :: OSI Approved :: MIT License
33
+ Classifier: Natural Language :: English
34
+ Classifier: Programming Language :: Python
35
+ Requires-Python: >=3.9
36
+ Requires-Dist: aiida-core<3,>=2.3
37
+ Requires-Dist: cloudpickle
38
+ Requires-Dist: voluptuous
39
+ Provides-Extra: docs
40
+ Requires-Dist: furo; extra == 'docs'
41
+ Requires-Dist: markupsafe<2.1; extra == 'docs'
42
+ Requires-Dist: nbsphinx; extra == 'docs'
43
+ Requires-Dist: sphinx; extra == 'docs'
44
+ Requires-Dist: sphinx-gallery; extra == 'docs'
45
+ Requires-Dist: sphinx-rtd-theme; extra == 'docs'
46
+ Requires-Dist: sphinxcontrib-contentui; extra == 'docs'
47
+ Requires-Dist: sphinxcontrib-details-directive; extra == 'docs'
48
+ Provides-Extra: pre-commit
49
+ Requires-Dist: pre-commit~=3.5; extra == 'pre-commit'
50
+ Description-Content-Type: text/markdown
51
+
52
+ # AiiDA-PythonJob
53
+ [![PyPI version](https://badge.fury.io/py/aiida-pythonjob.svg)](https://badge.fury.io/py/aiida-pythonjob)
54
+ [![Unit test](https://github.com/aiidateam/aiida-pythonjob/actions/workflows/ci.yml/badge.svg)](https://github.com/aiidateam/aiida-pythonjob/actions/workflows/ci.yml)
55
+ [![codecov](https://codecov.io/gh/aiidateam/aiida-pythonjob/branch/main/graph/badge.svg)](https://codecov.io/gh/aiidateam/aiida-pythonjob)
56
+ [![Docs status](https://readthedocs.org/projects/aiida-pythonjob/badge)](http://aiida-pythonjob.readthedocs.io/)
57
+
58
+ Efficiently design and manage flexible workflows with AiiDA, featuring an interactive GUI, checkpoints, provenance tracking, error-resistant, and remote execution capabilities.
59
+
60
+
61
+
62
+ ## Installation
63
+
64
+ ```console
65
+ pip install aiida-pythonjob
66
+ ```
67
+
68
+ To install the latest version from source, first clone the repository and then install using `pip`:
69
+
70
+ ```console
71
+ git clone https://github.com/aiidateam/aiida-pythonjob
72
+ cd aiida-pythonjob
73
+ pip install -e .
74
+ ```
75
+
76
+
77
+ ## Documentation
78
+ Explore the comprehensive [documentation](https://aiida-pythonjob.readthedocs.io/en/latest/) to discover all the features and capabilities of AiiDA Workgraph.
79
+
80
+
81
+ ## License
82
+ [MIT](http://opensource.org/licenses/MIT)
@@ -0,0 +1,17 @@
1
+ aiida_pythonjob/__init__.py,sha256=8PyvN-j-1t8OpA30SINJmQrtBHcfce_V47Y1clUu1e8,383
2
+ aiida_pythonjob/config.py,sha256=ZHeS_7qRmkea4ltWpju5vsOHvYfi9mVdpklWfUu2LRs,384
3
+ aiida_pythonjob/launch.py,sha256=Ch9TtgJA6IXlvapg8sspC_QDgni0xJWj1OIISimbMs0,2709
4
+ aiida_pythonjob/utils.py,sha256=HqxmYD76xFwqwBR3jezHNM0zw1uB1WPCn6dHH3NLJDM,947
5
+ aiida_pythonjob/calculations/__init__.py,sha256=-xxZoADS038pH5T_ssINO-Ql5q7SY31nPYHlua6f0Ns,59
6
+ aiida_pythonjob/calculations/pythonjob.py,sha256=ktE67rIx0yitf7wek4XMGB8nhNAg1omgh2juNqlRMJo,11826
7
+ aiida_pythonjob/data/__init__.py,sha256=fgl_e1ZbQ4It8l1zfQUM1p3dcIWitG-A8RHpy6N7wgk,130
8
+ aiida_pythonjob/data/pickled_data.py,sha256=x4ZQOlExzdbww2_ojg5BxKGr4-didDMjXRUXLGUgUD8,2901
9
+ aiida_pythonjob/data/pickled_function.py,sha256=VNpbXKxgntXKKhfxrvdJKs9NtV6ormAwcJ5BQvlSMmM,5975
10
+ aiida_pythonjob/data/serializer.py,sha256=a0Gyx3SH6J9C7Vra1DKSRyDpBHAEn8vtu1En-mlemfs,4454
11
+ aiida_pythonjob/parsers/__init__.py,sha256=-gwCpe5K-lMR1Yonp0AKST_z3y63xsfDoFRpTnHSfHw,71
12
+ aiida_pythonjob/parsers/pythonjob.py,sha256=WM_nNKUXUcnWgoqZbbosI_AKBCphlt2ZXsV7XGnilz8,5386
13
+ aiida_pythonjob-0.1.0.dist-info/METADATA,sha256=avZe5Bw0XFo0SGsWGhIGYjrdaQHgsDwaqtj8gVJeNFc,3606
14
+ aiida_pythonjob-0.1.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
15
+ aiida_pythonjob-0.1.0.dist-info/entry_points.txt,sha256=F7WrhW9wyC4WIu5FYxozTGuzrJGcaY1HHk_WeVU9PIM,349
16
+ aiida_pythonjob-0.1.0.dist-info/licenses/LICENSE,sha256=YHoQimZwJx-5oMGat60FgIuvY6rTfVSJN_NPe66nlPc,1067
17
+ aiida_pythonjob-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.26.3
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,9 @@
1
+ [aiida.calculations]
2
+ pythonjob.pythonjob = aiida_pythonjob.calculations.pythonjob:PythonJob
3
+
4
+ [aiida.data]
5
+ pythonjob.pickled_data = aiida_pythonjob.data.pickled_data:PickledData
6
+ pythonjob.pickled_function = aiida_pythonjob.data.pickled_function:PickledFunction
7
+
8
+ [aiida.parsers]
9
+ pythonjob.pythonjob = aiida_pythonjob.parsers.pythonjob:PythonJobParser
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 AiiDA team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.