python-workflow-definition 0.0.1__tar.gz → 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (15) hide show
  1. {python_workflow_definition-0.0.1 → python_workflow_definition-0.1.0}/PKG-INFO +2 -1
  2. {python_workflow_definition-0.0.1 → python_workflow_definition-0.1.0}/pyproject.toml +6 -5
  3. {python_workflow_definition-0.0.1 → python_workflow_definition-0.1.0}/src/python_workflow_definition/aiida.py +19 -10
  4. python_workflow_definition-0.1.0/src/python_workflow_definition/cwl/__init__.py +240 -0
  5. python_workflow_definition-0.1.0/src/python_workflow_definition/cwl/__main__.py +51 -0
  6. {python_workflow_definition-0.0.1 → python_workflow_definition-0.1.0}/src/python_workflow_definition/executorlib.py +7 -3
  7. {python_workflow_definition-0.0.1 → python_workflow_definition-0.1.0}/src/python_workflow_definition/jobflow.py +25 -8
  8. python_workflow_definition-0.1.0/src/python_workflow_definition/models.py +254 -0
  9. {python_workflow_definition-0.0.1 → python_workflow_definition-0.1.0}/src/python_workflow_definition/plot.py +6 -5
  10. {python_workflow_definition-0.0.1 → python_workflow_definition-0.1.0}/src/python_workflow_definition/purepython.py +7 -3
  11. {python_workflow_definition-0.0.1 → python_workflow_definition-0.1.0}/src/python_workflow_definition/pyiron_base.py +28 -9
  12. python_workflow_definition-0.1.0/src/python_workflow_definition/shared.py +117 -0
  13. python_workflow_definition-0.0.1/src/python_workflow_definition/shared.py +0 -45
  14. {python_workflow_definition-0.0.1 → python_workflow_definition-0.1.0}/.gitignore +0 -0
  15. {python_workflow_definition-0.0.1 → python_workflow_definition-0.1.0}/src/python_workflow_definition/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python_workflow_definition
3
- Version: 0.0.1
3
+ Version: 0.1.0
4
4
  Summary: Python Workflow Definition - workflow interoperability for aiida, jobflow and pyiron
5
5
  Author-email: Jan Janssen <janssen@mpie.de>, Janine George <janine.geogre@bam.de>, Julian Geiger <julian.geiger@psi.ch>, Xing Wang <xing.wang@psi.ch>, Marnik Bercx <marnik.bercx@psi.ch>, Christina Ertural <christina.ertural@bam.de>
6
6
  License: BSD 3-Clause License
@@ -35,6 +35,7 @@ License: BSD 3-Clause License
35
35
  Requires-Dist: aiida-workgraph<=0.5.2,>=0.5.1
36
36
  Requires-Dist: jobflow<=0.1.19,>=0.1.18
37
37
  Requires-Dist: numpy<2,>=1.21
38
+ Requires-Dist: pydantic<=2.11.4,>=2.7.0
38
39
  Requires-Dist: pyiron-base<=0.11.11,>=0.11.10
39
40
  Provides-Extra: plot
40
41
  Requires-Dist: ipython<=9.0.2,>=7.33.0; extra == 'plot'
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "python_workflow_definition"
7
- version = "0.0.1"
7
+ version = "0.1.0"
8
8
  description = "Python Workflow Definition - workflow interoperability for aiida, jobflow and pyiron"
9
9
  authors = [
10
10
  { name = "Jan Janssen", email = "janssen@mpie.de" },
@@ -16,10 +16,11 @@ authors = [
16
16
  ]
17
17
  license = { file = "../LICENSE" }
18
18
  dependencies = [
19
- "aiida-workgraph>=0.5.1,<=0.5.2",
20
- "numpy>=1.21,<2",
21
- "jobflow>=0.1.18,<=0.1.19",
22
- "pyiron_base>=0.11.10,<=0.11.11",
19
+ "aiida-workgraph>=0.5.1,<=0.5.2",
20
+ "numpy>=1.21,<2",
21
+ "jobflow>=0.1.18,<=0.1.19",
22
+ "pyiron_base>=0.11.10,<=0.11.11",
23
+ "pydantic>=2.7.0,<=2.11.4",
23
24
  ]
24
25
 
25
26
  [project.optional-dependencies]
@@ -1,5 +1,4 @@
1
1
  from importlib import import_module
2
- import json
3
2
  import traceback
4
3
 
5
4
  from aiida import orm
@@ -7,20 +6,29 @@ from aiida_pythonjob.data.serializer import general_serializer
7
6
  from aiida_workgraph import WorkGraph, task
8
7
  from aiida_workgraph.socket import TaskSocketNamespace
9
8
 
9
+ from python_workflow_definition.models import PythonWorkflowDefinitionWorkflow
10
10
  from python_workflow_definition.shared import (
11
11
  convert_nodes_list_to_dict,
12
+ update_node_names,
13
+ remove_result,
14
+ set_result_node,
12
15
  NODES_LABEL,
13
16
  EDGES_LABEL,
14
17
  SOURCE_LABEL,
15
18
  SOURCE_PORT_LABEL,
16
19
  TARGET_LABEL,
17
20
  TARGET_PORT_LABEL,
21
+ VERSION_NUMBER,
22
+ VERSION_LABEL,
18
23
  )
19
24
 
20
25
 
21
26
  def load_workflow_json(file_name: str) -> WorkGraph:
22
- with open(file_name) as f:
23
- data = json.load(f)
27
+ data = remove_result(
28
+ workflow_dict=PythonWorkflowDefinitionWorkflow.load_json_file(
29
+ file_name=file_name
30
+ )
31
+ )
24
32
 
25
33
  wg = WorkGraph()
26
34
  task_name_mapping = {}
@@ -88,7 +96,7 @@ def write_workflow_json(wg: WorkGraph, file_name: str) -> dict:
88
96
 
89
97
  callable_name = executor["callable_name"]
90
98
  callable_name = f"{executor['module_path']}.{callable_name}"
91
- data[NODES_LABEL].append({"id": i, "function": callable_name})
99
+ data[NODES_LABEL].append({"id": i, "type": "function", "value": callable_name})
92
100
  i += 1
93
101
 
94
102
  for link in wg.links:
@@ -117,7 +125,9 @@ def write_workflow_json(wg: WorkGraph, file_name: str) -> dict:
117
125
  raw_value.pop("node_type", None)
118
126
  else:
119
127
  raw_value = input.value.value
120
- data[NODES_LABEL].append({"id": i, "value": raw_value})
128
+ data[NODES_LABEL].append(
129
+ {"id": i, "type": "input", "value": raw_value}
130
+ )
121
131
  input_node_name = i
122
132
  data_node_name_mapping[input.value.uuid] = input_node_name
123
133
  i += 1
@@ -131,8 +141,7 @@ def write_workflow_json(wg: WorkGraph, file_name: str) -> dict:
131
141
  SOURCE_PORT_LABEL: None,
132
142
  }
133
143
  )
134
- with open(file_name, "w") as f:
135
- # json.dump({"nodes": data[], "edges": edges_new_lst}, f)
136
- json.dump(data, f, indent=2)
137
-
138
- return data
144
+ data[VERSION_LABEL] = VERSION_NUMBER
145
+ PythonWorkflowDefinitionWorkflow(
146
+ **set_result_node(workflow_dict=update_node_names(workflow_dict=data))
147
+ ).dump_json_file(file_name=file_name, indent=2)
@@ -0,0 +1,240 @@
1
+ import json
2
+ import pickle
3
+ from yaml import CDumper as Dumper, dump
4
+
5
+
6
+ from python_workflow_definition.purepython import (
7
+ group_edges,
8
+ resort_total_lst,
9
+ )
10
+ from python_workflow_definition.shared import (
11
+ convert_nodes_list_to_dict,
12
+ remove_result,
13
+ EDGES_LABEL,
14
+ NODES_LABEL,
15
+ TARGET_LABEL,
16
+ TARGET_PORT_LABEL,
17
+ SOURCE_LABEL,
18
+ SOURCE_PORT_LABEL,
19
+ )
20
+
21
+
22
+ def _get_function_argument(argument: str, position: int = 3) -> dict:
23
+ return {
24
+ argument
25
+ + "_file": {
26
+ "type": "File",
27
+ "inputBinding": {
28
+ "prefix": "--arg_" + argument + "=",
29
+ "separate": False,
30
+ "position": position,
31
+ },
32
+ },
33
+ }
34
+
35
+
36
+ def _get_function_template(function_name: str) -> dict:
37
+ return {
38
+ "function": {
39
+ "default": function_name,
40
+ "inputBinding": {"position": 3, "prefix": "--function=", "separate": False},
41
+ "type": "string",
42
+ },
43
+ }
44
+
45
+
46
+ def _get_output_name(output_name: str) -> dict:
47
+ return {
48
+ output_name
49
+ + "_file": {"type": "File", "outputBinding": {"glob": output_name + ".pickle"}}
50
+ }
51
+
52
+
53
+ def _get_function(workflow):
54
+ function_nodes_dict = {
55
+ n["id"]: n["value"] for n in workflow[NODES_LABEL] if n["type"] == "function"
56
+ }
57
+ funct_dict = {}
58
+ for funct_id in function_nodes_dict.keys():
59
+ target_ports = list(
60
+ set(
61
+ [
62
+ e[TARGET_PORT_LABEL]
63
+ for e in workflow[EDGES_LABEL]
64
+ if e["target"] == funct_id
65
+ ]
66
+ )
67
+ )
68
+ source_ports = list(
69
+ set(
70
+ [
71
+ e[SOURCE_PORT_LABEL]
72
+ for e in workflow[EDGES_LABEL]
73
+ if e["source"] == funct_id
74
+ ]
75
+ )
76
+ )
77
+ funct_dict[funct_id] = {
78
+ "targetPorts": target_ports,
79
+ "sourcePorts": source_ports,
80
+ }
81
+ return function_nodes_dict, funct_dict
82
+
83
+
84
+ def _write_function_cwl(workflow):
85
+ function_nodes_dict, funct_dict = _get_function(workflow)
86
+
87
+ for i in function_nodes_dict.keys():
88
+ template = {
89
+ "cwlVersion": "v1.2",
90
+ "class": "CommandLineTool",
91
+ "baseCommand": "python",
92
+ "inputs": {
93
+ "wrapper": {
94
+ "type": "string",
95
+ "inputBinding": {"position": 1, "prefix": "-m"},
96
+ "default": "python_workflow_definition.cwl",
97
+ },
98
+ "workflowfile": {
99
+ "type": "File",
100
+ "inputBinding": {
101
+ "position": 2,
102
+ "prefix": "--workflowfile=",
103
+ "separate": False,
104
+ },
105
+ "default": {"class": "File", "location": "workflow.py"},
106
+ },
107
+ },
108
+ "outputs": {},
109
+ }
110
+ file_name = function_nodes_dict[i].split(".")[-1] + "_" + str(i) + ".cwl"
111
+ if function_nodes_dict[i].split(".")[0] != "python_workflow_definition":
112
+ template["inputs"]["workflowfile"]["default"]["location"] = (
113
+ function_nodes_dict[i].split(".")[0] + ".py"
114
+ )
115
+ else:
116
+ del template["inputs"]["workflowfile"]
117
+ template["inputs"].update(
118
+ _get_function_template(function_name=function_nodes_dict[i])
119
+ )
120
+ for j, arg in enumerate(funct_dict[i]["targetPorts"]):
121
+ template["inputs"].update(
122
+ _get_function_argument(argument=arg, position=4 + j)
123
+ )
124
+ for out in funct_dict[i]["sourcePorts"]:
125
+ if out is None:
126
+ template["outputs"].update(_get_output_name(output_name="result"))
127
+ else:
128
+ template["outputs"].update(_get_output_name(output_name=out))
129
+ with open(file_name, "w") as f:
130
+ dump(template, f, Dumper=Dumper)
131
+
132
+
133
+ def _write_workflow_config(workflow):
134
+ input_dict = {
135
+ n["name"]: n["value"] for n in workflow[NODES_LABEL] if n["type"] == "input"
136
+ }
137
+ with open("workflow.yml", "w") as f:
138
+ dump(
139
+ {
140
+ k + "_file": {"class": "File", "path": k + ".pickle"}
141
+ for k in input_dict.keys()
142
+ },
143
+ f,
144
+ Dumper=Dumper,
145
+ )
146
+ for k, v in input_dict.items():
147
+ with open(k + ".pickle", "wb") as f:
148
+ pickle.dump(v, f)
149
+
150
+
151
+ def _write_workflow(workflow):
152
+ workflow_template = {
153
+ "cwlVersion": "v1.2",
154
+ "class": "Workflow",
155
+ "inputs": {},
156
+ "steps": {},
157
+ "outputs": {},
158
+ }
159
+ input_dict = {
160
+ n["name"]: n["value"] for n in workflow[NODES_LABEL] if n["type"] == "input"
161
+ }
162
+ function_nodes_dict, funct_dict = _get_function(workflow)
163
+ result_id = [n["id"] for n in workflow[NODES_LABEL] if n["type"] == "output"][0]
164
+ last_compute_id = [
165
+ e[SOURCE_LABEL] for e in workflow[EDGES_LABEL] if e[TARGET_LABEL] == result_id
166
+ ][0]
167
+ workflow_template["inputs"].update({k + "_file": "File" for k in input_dict.keys()})
168
+ if funct_dict[last_compute_id]["sourcePorts"] == [None]:
169
+ workflow_template["outputs"] = {
170
+ "result_file": {
171
+ "type": "File",
172
+ "outputSource": function_nodes_dict[last_compute_id].split(".")[-1]
173
+ + "_"
174
+ + str(last_compute_id)
175
+ + "/result_file",
176
+ },
177
+ }
178
+ else:
179
+ raise ValueError()
180
+
181
+ content = remove_result(workflow_dict=workflow)
182
+ edges_new_lst = content[EDGES_LABEL]
183
+ total_lst = group_edges(edges_new_lst)
184
+ nodes_new_dict = {
185
+ int(k): v
186
+ for k, v in convert_nodes_list_to_dict(nodes_list=content[NODES_LABEL]).items()
187
+ }
188
+ total_new_lst = resort_total_lst(total_lst=total_lst, nodes_dict=nodes_new_dict)
189
+ step_name_lst = {
190
+ t[0]: function_nodes_dict[t[0]].split(".")[-1] for t in total_new_lst
191
+ }
192
+ input_id_dict = {
193
+ n["id"]: n["name"] for n in workflow[NODES_LABEL] if n["type"] == "input"
194
+ }
195
+ for t in total_new_lst:
196
+ ind = t[0]
197
+ node_script = step_name_lst[ind] + "_" + str(ind) + ".cwl"
198
+ output = [
199
+ o + "_file" if o is not None else "result_file"
200
+ for o in funct_dict[ind]["sourcePorts"]
201
+ ]
202
+ in_dict = {}
203
+ for k, v in t[1].items():
204
+ if v[SOURCE_LABEL] in input_id_dict:
205
+ in_dict[k + "_file"] = input_id_dict[v[SOURCE_LABEL]] + "_file"
206
+ else:
207
+ if v["sourcePort"] is None:
208
+ in_dict[k + "_file"] = (
209
+ step_name_lst[v[SOURCE_LABEL]]
210
+ + "_"
211
+ + str(v[SOURCE_LABEL])
212
+ + "/result_file"
213
+ )
214
+ else:
215
+ in_dict[k + "_file"] = (
216
+ step_name_lst[v[SOURCE_LABEL]]
217
+ + "_"
218
+ + str(v[SOURCE_LABEL])
219
+ + "/"
220
+ + v[SOURCE_PORT_LABEL]
221
+ + "_file"
222
+ )
223
+ workflow_template["steps"].update(
224
+ {
225
+ step_name_lst[ind]
226
+ + "_"
227
+ + str(ind): {"run": node_script, "in": in_dict, "out": output}
228
+ }
229
+ )
230
+ with open("workflow.cwl", "w") as f:
231
+ dump(workflow_template, f, Dumper=Dumper)
232
+
233
+
234
+ def write_workflow(file_name: str):
235
+ with open(file_name, "r") as f:
236
+ workflow = json.load(f)
237
+
238
+ _write_function_cwl(workflow=workflow)
239
+ _write_workflow_config(workflow=workflow)
240
+ _write_workflow(workflow=workflow)
@@ -0,0 +1,51 @@
1
+ import sys
2
+ import pickle
3
+ from ast import literal_eval
4
+ import importlib.util
5
+
6
+
7
+ def load_function(file_name, funct):
8
+ spec = importlib.util.spec_from_file_location("workflow", file_name)
9
+ module = importlib.util.module_from_spec(spec)
10
+ sys.modules["workflow"] = module
11
+ spec.loader.exec_module(module)
12
+ return getattr(module, funct.split(".")[-1])
13
+
14
+
15
+ def convert_argument(arg):
16
+ if ".pickle" in arg:
17
+ with open(arg, "rb") as f:
18
+ return pickle.load(f)
19
+ else:
20
+ return literal_eval(arg)
21
+
22
+
23
+ if __name__ == "__main__":
24
+ # load input
25
+ argument_lst = sys.argv[1:]
26
+ funct_lst = [arg.split("=")[-1] for arg in argument_lst if "--function=" in arg]
27
+ file_lst = [arg.split("=")[-1] for arg in argument_lst if "--workflowfile=" in arg]
28
+ if len(file_lst) > 0:
29
+ workflow_function = load_function(file_name=file_lst[0], funct=funct_lst[0])
30
+ internal_function = False
31
+ else:
32
+ m, p = funct_lst[0].rsplit(".", 1)
33
+ workflow_function = getattr(importlib.import_module(m), p)
34
+ internal_function = True
35
+ kwargs = {
36
+ arg.split("=")[0][6:]: convert_argument(arg=arg.split("=")[-1])
37
+ for arg in argument_lst
38
+ if "--arg_" in arg
39
+ }
40
+
41
+ # evaluate function
42
+ result = workflow_function(**kwargs)
43
+
44
+ # store output
45
+ if isinstance(result, dict) and not internal_function:
46
+ for k, v in result.items():
47
+ with open(k + ".pickle", "wb") as f:
48
+ pickle.dump(v, f)
49
+ else:
50
+ with open("result.pickle", "wb") as f:
51
+ pickle.dump(result, f)
@@ -1,15 +1,16 @@
1
1
  from concurrent.futures import Executor
2
2
  from importlib import import_module
3
3
  from inspect import isfunction
4
- import json
5
4
 
6
5
 
6
+ from python_workflow_definition.models import PythonWorkflowDefinitionWorkflow
7
7
  from python_workflow_definition.shared import (
8
8
  get_dict,
9
9
  get_list,
10
10
  get_kwargs,
11
11
  get_source_handles,
12
12
  convert_nodes_list_to_dict,
13
+ remove_result,
13
14
  NODES_LABEL,
14
15
  EDGES_LABEL,
15
16
  SOURCE_LABEL,
@@ -37,8 +38,11 @@ def _get_value(result_dict: dict, nodes_new_dict: dict, link_dict: dict, exe: Ex
37
38
 
38
39
 
39
40
  def load_workflow_json(file_name: str, exe: Executor):
40
- with open(file_name, "r") as f:
41
- content = json.load(f)
41
+ content = remove_result(
42
+ workflow_dict=PythonWorkflowDefinitionWorkflow.load_json_file(
43
+ file_name=file_name
44
+ )
45
+ )
42
46
 
43
47
  edges_new_lst = content[EDGES_LABEL]
44
48
  nodes_new_dict = {}
@@ -1,22 +1,27 @@
1
- import json
2
1
  from importlib import import_module
3
2
  from inspect import isfunction
4
3
 
5
4
  import numpy as np
6
5
  from jobflow import job, Flow
7
6
 
7
+ from python_workflow_definition.models import PythonWorkflowDefinitionWorkflow
8
8
  from python_workflow_definition.shared import (
9
9
  get_dict,
10
10
  get_list,
11
11
  get_kwargs,
12
12
  get_source_handles,
13
+ update_node_names,
13
14
  convert_nodes_list_to_dict,
15
+ remove_result,
16
+ set_result_node,
14
17
  NODES_LABEL,
15
18
  EDGES_LABEL,
16
19
  SOURCE_LABEL,
17
20
  SOURCE_PORT_LABEL,
18
21
  TARGET_LABEL,
19
22
  TARGET_PORT_LABEL,
23
+ VERSION_NUMBER,
24
+ VERSION_LABEL,
20
25
  )
21
26
 
22
27
 
@@ -269,8 +274,11 @@ def _get_item_from_tuple(input_obj, index, index_lst):
269
274
 
270
275
 
271
276
  def load_workflow_json(file_name: str) -> Flow:
272
- with open(file_name, "r") as f:
273
- content = json.load(f)
277
+ content = remove_result(
278
+ workflow_dict=PythonWorkflowDefinitionWorkflow.load_json_file(
279
+ file_name=file_name
280
+ )
281
+ )
274
282
 
275
283
  edges_new_lst = []
276
284
  for edge in content[EDGES_LABEL]:
@@ -322,12 +330,21 @@ def write_workflow_json(flow: Flow, file_name: str = "workflow.json"):
322
330
  for k, v in nodes_dict.items():
323
331
  if isfunction(v):
324
332
  nodes_store_lst.append(
325
- {"id": k, "function": v.__module__ + "." + v.__name__}
333
+ {"id": k, "type": "function", "value": v.__module__ + "." + v.__name__}
326
334
  )
327
335
  elif isinstance(v, np.ndarray):
328
- nodes_store_lst.append({"id": k, "value": v.tolist()})
336
+ nodes_store_lst.append({"id": k, "type": "input", "value": v.tolist()})
329
337
  else:
330
- nodes_store_lst.append({"id": k, "value": v})
338
+ nodes_store_lst.append({"id": k, "type": "input", "value": v})
331
339
 
332
- with open(file_name, "w") as f:
333
- json.dump({NODES_LABEL: nodes_store_lst, EDGES_LABEL: edges_lst}, f)
340
+ PythonWorkflowDefinitionWorkflow(
341
+ **set_result_node(
342
+ workflow_dict=update_node_names(
343
+ workflow_dict={
344
+ VERSION_LABEL: VERSION_NUMBER,
345
+ NODES_LABEL: nodes_store_lst,
346
+ EDGES_LABEL: edges_lst,
347
+ }
348
+ )
349
+ )
350
+ ).dump_json_file(file_name=file_name, indent=2)
@@ -0,0 +1,254 @@
1
+ from pathlib import Path
2
+ from typing import List, Union, Optional, Literal, Any, Annotated, Type, TypeVar
3
+ from pydantic import BaseModel, Field, field_validator, field_serializer
4
+ from pydantic import ValidationError
5
+ import json
6
+ import logging
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ INTERNAL_DEFAULT_HANDLE = "__result__"
11
+ T = TypeVar("T", bound="PythonWorkflowDefinitionWorkflow")
12
+
13
+ __all__ = (
14
+ "PythonWorkflowDefinitionInputNode",
15
+ "PythonWorkflowDefinitionOutputNode",
16
+ "PythonWorkflowDefinitionFunctionNode",
17
+ "PythonWorkflowDefinitionEdge",
18
+ "PythonWorkflowDefinitionWorkflow",
19
+ )
20
+
21
+
22
+ class PythonWorkflowDefinitionBaseNode(BaseModel):
23
+ """Base model for all node types, containing common fields."""
24
+
25
+ id: int
26
+ # The 'type' field will be overridden in subclasses with Literal types
27
+ # to enable discriminated unions.
28
+ type: str
29
+
30
+
31
+ class PythonWorkflowDefinitionInputNode(PythonWorkflowDefinitionBaseNode):
32
+ """Model for input nodes."""
33
+
34
+ type: Literal["input"]
35
+ name: str
36
+ value: Optional[Any] = None
37
+
38
+
39
+ class PythonWorkflowDefinitionOutputNode(PythonWorkflowDefinitionBaseNode):
40
+ """Model for output nodes."""
41
+
42
+ type: Literal["output"]
43
+ name: str
44
+
45
+
46
+ class PythonWorkflowDefinitionFunctionNode(PythonWorkflowDefinitionBaseNode):
47
+ """
48
+ Model for function execution nodes.
49
+ The 'name' attribute is computed automatically from 'value'.
50
+ """
51
+
52
+ type: Literal["function"]
53
+ value: str # Expected format: 'module.function'
54
+
55
+ @field_validator("value")
56
+ @classmethod
57
+ def check_value_format(cls, v: str):
58
+ if not v or "." not in v or v.startswith(".") or v.endswith("."):
59
+ msg = (
60
+ "FunctionNode 'value' must be a non-empty string ",
61
+ "in 'module.function' format with at least one period.",
62
+ )
63
+ raise ValueError(msg)
64
+ return v
65
+
66
+
67
+ # Discriminated Union for Nodes
68
+ PythonWorkflowDefinitionNode = Annotated[
69
+ Union[
70
+ PythonWorkflowDefinitionInputNode,
71
+ PythonWorkflowDefinitionOutputNode,
72
+ PythonWorkflowDefinitionFunctionNode,
73
+ ],
74
+ Field(discriminator="type"),
75
+ ]
76
+
77
+
78
+ class PythonWorkflowDefinitionEdge(BaseModel):
79
+ """Model for edges connecting nodes."""
80
+
81
+ target: int
82
+ targetPort: Optional[str] = None
83
+ source: int
84
+ sourcePort: Optional[str] = None
85
+
86
+ @field_validator("sourcePort", mode="before")
87
+ @classmethod
88
+ def handle_default_source(cls, v: Any) -> Optional[str]:
89
+ """
90
+ Transforms incoming None/null for sourcePort to INTERNAL_DEFAULT_HANDLE.
91
+ Runs before standard validation.
92
+ """
93
+ # Allow not specifying the sourcePort -> null gets resolved to __result__
94
+ if v is None:
95
+ return INTERNAL_DEFAULT_HANDLE
96
+ elif v == INTERNAL_DEFAULT_HANDLE:
97
+ # Disallow explicit use of the internal reserved handle name
98
+ msg = (
99
+ f"Explicit use of reserved sourcePort '{INTERNAL_DEFAULT_HANDLE}' "
100
+ f"is not allowed. Use null/None for default output."
101
+ )
102
+ raise ValueError(msg)
103
+ return v
104
+
105
+ @field_serializer("sourcePort")
106
+ def serialize_source_handle(self, v: Optional[str]) -> Optional[str]:
107
+ """
108
+ SERIALIZATION (Output): Converts internal INTERNAL_DEFAULT_HANDLE ("__result__")
109
+ back to None.
110
+ """
111
+ if v == INTERNAL_DEFAULT_HANDLE:
112
+ return None # Map "__result__" back to None for JSON output
113
+ return v # Keep other handle names as they are
114
+
115
+
116
+ class PythonWorkflowDefinitionWorkflow(BaseModel):
117
+ """The main workflow model."""
118
+
119
+ version: str
120
+ nodes: List[PythonWorkflowDefinitionNode]
121
+ edges: List[PythonWorkflowDefinitionEdge]
122
+
123
+ def dump_json(
124
+ self,
125
+ *,
126
+ indent: Optional[int] = 2,
127
+ **kwargs,
128
+ ) -> str:
129
+ """
130
+ Dumps the workflow model to a JSON string.
131
+
132
+ Args:
133
+ indent: JSON indentation level.
134
+ exclude_computed_function_names: If True (default), excludes the computed
135
+ 'name' field from FunctionNode objects
136
+ in the output.
137
+ **kwargs: Additional keyword arguments passed to Pydantic's model_dump.
138
+
139
+ Returns:
140
+ JSON string representation of the workflow.
141
+ """
142
+
143
+ # Dump the model to a dictionary first, using mode='json' for compatible types
144
+ # Pass any extra kwargs (like custom 'exclude' rules for other fields)
145
+ workflow_dict = self.model_dump(mode="json", **kwargs)
146
+
147
+ # Dump the dictionary to a JSON string
148
+ try:
149
+ json_string = json.dumps(workflow_dict, indent=indent)
150
+ logger.info("Successfully dumped workflow model to JSON string.")
151
+ return json_string
152
+ except TypeError as e:
153
+ logger.error(
154
+ f"Error serializing workflow dictionary to JSON: {e}", exc_info=True
155
+ )
156
+ raise # Re-raise after logging
157
+
158
+ def dump_json_file(
159
+ self,
160
+ file_name: Union[str, Path],
161
+ *,
162
+ indent: Optional[int] = 2,
163
+ **kwargs,
164
+ ) -> None:
165
+ """
166
+ Dumps the workflow model to a JSON file.
167
+
168
+ Args:
169
+ file_path: Path to the output JSON file.
170
+ indent: JSON indentation level.
171
+ exclude_computed_function_names: If True, excludes the computed 'name' field
172
+ from FunctionNode objects.
173
+ **kwargs: Additional keyword arguments passed to Pydantic's model_dump.
174
+ """
175
+ logger.info(f"Dumping workflow model to JSON file: {file_name}")
176
+ # Pass kwargs to dump_json, which passes them to model_dump
177
+ json_string = self.dump_json(
178
+ indent=indent,
179
+ **kwargs,
180
+ )
181
+ try:
182
+ with open(file_name, "w", encoding="utf-8") as f:
183
+ f.write(json_string)
184
+ logger.info(f"Successfully wrote workflow model to {file_name}.")
185
+ except IOError as e:
186
+ logger.error(
187
+ f"Error writing workflow model to file {file_name}: {e}", exc_info=True
188
+ )
189
+ raise
190
+
191
+ @classmethod
192
+ def load_json_str(cls: Type[T], json_data: Union[str, bytes]) -> dict:
193
+ """
194
+ Loads and validates workflow data from a JSON string or bytes.
195
+
196
+ Args:
197
+ json_data: The JSON data as a string or bytes.
198
+
199
+ Returns:
200
+ An instance of PwdWorkflow.
201
+
202
+ Raises:
203
+ pydantic.ValidationError: If validation fails.
204
+ json.JSONDecodeError: If json_data is not valid JSON.
205
+ """
206
+ logger.info("Loading workflow model from JSON data...")
207
+ try:
208
+ # Pydantic v2 method handles bytes or str directly
209
+ instance = cls.model_validate_json(json_data)
210
+ # Pydantic v1 equivalent: instance = cls.parse_raw(json_data)
211
+ logger.info(
212
+ "Successfully loaded and validated workflow model from JSON data."
213
+ )
214
+ return instance.model_dump()
215
+ except ValidationError: # Catch validation errors specifically
216
+ logger.error("Workflow model validation failed.", exc_info=True)
217
+ raise
218
+ except json.JSONDecodeError: # Catch JSON parsing errors specifically
219
+ logger.error("Invalid JSON format encountered.", exc_info=True)
220
+ raise
221
+ except Exception as e: # Catch any other unexpected errors
222
+ logger.error(
223
+ f"An unexpected error occurred during JSON loading: {e}", exc_info=True
224
+ )
225
+ raise
226
+
227
+ @classmethod
228
+ def load_json_file(cls: Type[T], file_name: Union[str, Path]) -> dict:
229
+ """
230
+ Loads and validates workflow data from a JSON file.
231
+
232
+ Args:
233
+ file_path: The path to the JSON file.
234
+
235
+ Returns:
236
+ An instance of PwdWorkflow.
237
+
238
+ Raises:
239
+ FileNotFoundError: If the file is not found.
240
+ pydantic.ValidationError: If validation fails.
241
+ json.JSONDecodeError: If the file is not valid JSON.
242
+ IOError: If there are other file reading issues.
243
+ """
244
+ logger.info(f"Loading workflow model from JSON file: {file_name}")
245
+ try:
246
+ file_content = Path(file_name).read_text(encoding="utf-8")
247
+ # Delegate validation to the string loading method
248
+ return cls.load_json_str(file_content)
249
+ except FileNotFoundError:
250
+ logger.error(f"JSON file not found: {file_name}", exc_info=True)
251
+ raise
252
+ except IOError as e:
253
+ logger.error(f"Error reading JSON file {file_name}: {e}", exc_info=True)
254
+ raise
@@ -1,9 +1,8 @@
1
- import json
2
-
3
1
  from IPython.display import SVG, display
4
2
  import networkx as nx
5
3
 
6
4
 
5
+ from python_workflow_definition.models import PythonWorkflowDefinitionWorkflow
7
6
  from python_workflow_definition.purepython import group_edges
8
7
  from python_workflow_definition.shared import (
9
8
  get_kwargs,
@@ -16,8 +15,7 @@ from python_workflow_definition.shared import (
16
15
 
17
16
 
18
17
  def plot(file_name: str):
19
- with open(file_name, "r") as f:
20
- content = json.load(f)
18
+ content = PythonWorkflowDefinitionWorkflow.load_json_file(file_name=file_name)
21
19
 
22
20
  graph = nx.DiGraph()
23
21
  node_dict = convert_nodes_list_to_dict(nodes_list=content[NODES_LABEL])
@@ -39,7 +37,10 @@ def plot(file_name: str):
39
37
  k + "=result[" + v[SOURCE_PORT_LABEL] + "]"
40
38
  )
41
39
  for k, v in edge_label_dict.items():
42
- graph.add_edge(str(k), str(target_node), label=", ".join(v))
40
+ if len(v) == 1 and v[0] is not None:
41
+ graph.add_edge(str(k), str(target_node), label=", ".join(v))
42
+ else:
43
+ graph.add_edge(str(k), str(target_node))
43
44
 
44
45
  svg = nx.nx_agraph.to_agraph(graph).draw(prog="dot", format="svg")
45
46
  display(SVG(svg))
@@ -1,14 +1,15 @@
1
- import json
2
1
  from importlib import import_module
3
2
  from inspect import isfunction
4
3
 
5
4
 
5
+ from python_workflow_definition.models import PythonWorkflowDefinitionWorkflow
6
6
  from python_workflow_definition.shared import (
7
7
  get_dict,
8
8
  get_list,
9
9
  get_kwargs,
10
10
  get_source_handles,
11
11
  convert_nodes_list_to_dict,
12
+ remove_result,
12
13
  NODES_LABEL,
13
14
  EDGES_LABEL,
14
15
  SOURCE_LABEL,
@@ -66,8 +67,11 @@ def _get_value(result_dict: dict, nodes_new_dict: dict, link_dict: dict):
66
67
 
67
68
 
68
69
  def load_workflow_json(file_name: str):
69
- with open(file_name, "r") as f:
70
- content = json.load(f)
70
+ content = remove_result(
71
+ workflow_dict=PythonWorkflowDefinitionWorkflow.load_json_file(
72
+ file_name=file_name
73
+ )
74
+ )
71
75
 
72
76
  edges_new_lst = content[EDGES_LABEL]
73
77
  nodes_new_dict = {}
@@ -1,22 +1,27 @@
1
1
  from importlib import import_module
2
2
  from inspect import isfunction
3
- import json
4
3
  from typing import Optional
5
4
 
6
5
  import numpy as np
7
6
  from pyiron_base import job, Project
8
7
  from pyiron_base.project.delayed import DelayedObject
9
8
 
9
+ from python_workflow_definition.models import PythonWorkflowDefinitionWorkflow
10
10
  from python_workflow_definition.shared import (
11
11
  get_kwargs,
12
12
  get_source_handles,
13
13
  convert_nodes_list_to_dict,
14
+ update_node_names,
15
+ remove_result,
16
+ set_result_node,
14
17
  NODES_LABEL,
15
18
  EDGES_LABEL,
16
19
  SOURCE_LABEL,
17
20
  SOURCE_PORT_LABEL,
18
21
  TARGET_LABEL,
19
22
  TARGET_PORT_LABEL,
23
+ VERSION_NUMBER,
24
+ VERSION_LABEL,
20
25
  )
21
26
 
22
27
 
@@ -227,8 +232,11 @@ def load_workflow_json(file_name: str, project: Optional[Project] = None):
227
232
  if project is None:
228
233
  project = Project(".")
229
234
 
230
- with open(file_name, "r") as f:
231
- content = json.load(f)
235
+ content = remove_result(
236
+ workflow_dict=PythonWorkflowDefinitionWorkflow.load_json_file(
237
+ file_name=file_name
238
+ )
239
+ )
232
240
 
233
241
  edges_new_lst = content[EDGES_LABEL]
234
242
  nodes_new_dict = {}
@@ -282,11 +290,22 @@ def write_workflow_json(
282
290
  mod = v.__module__
283
291
  if mod == "python_workflow_definition.pyiron_base":
284
292
  mod = "python_workflow_definition.shared"
285
- nodes_store_lst.append({"id": k, "function": mod + "." + v.__name__})
293
+ nodes_store_lst.append(
294
+ {"id": k, "type": "function", "value": mod + "." + v.__name__}
295
+ )
286
296
  elif isinstance(v, np.ndarray):
287
- nodes_store_lst.append({"id": k, "value": v.tolist()})
297
+ nodes_store_lst.append({"id": k, "type": "input", "value": v.tolist()})
288
298
  else:
289
- nodes_store_lst.append({"id": k, "value": v})
290
-
291
- with open(file_name, "w") as f:
292
- json.dump({NODES_LABEL: nodes_store_lst, EDGES_LABEL: edges_new_lst}, f)
299
+ nodes_store_lst.append({"id": k, "type": "input", "value": v})
300
+
301
+ PythonWorkflowDefinitionWorkflow(
302
+ **set_result_node(
303
+ workflow_dict=update_node_names(
304
+ workflow_dict={
305
+ VERSION_LABEL: VERSION_NUMBER,
306
+ NODES_LABEL: nodes_store_lst,
307
+ EDGES_LABEL: edges_new_lst,
308
+ }
309
+ )
310
+ )
311
+ ).dump_json_file(file_name=file_name, indent=2)
@@ -0,0 +1,117 @@
1
+ from collections import Counter
2
+
3
+ NODES_LABEL = "nodes"
4
+ EDGES_LABEL = "edges"
5
+ SOURCE_LABEL = "source"
6
+ SOURCE_PORT_LABEL = "sourcePort"
7
+ TARGET_LABEL = "target"
8
+ TARGET_PORT_LABEL = "targetPort"
9
+ VERSION_NUMBER = "0.1.0"
10
+ VERSION_LABEL = "version"
11
+
12
+
13
+ def get_dict(**kwargs) -> dict:
14
+ # NOTE: In WG, this will automatically be wrapped in a dict with the `result` key
15
+ return {k: v for k, v in kwargs.items()}
16
+ # return {'dict': {k: v for k, v in kwargs.items()}}
17
+
18
+
19
+ def get_list(**kwargs) -> list:
20
+ return list(kwargs.values())
21
+
22
+
23
+ def get_kwargs(lst: list) -> dict:
24
+ return {
25
+ t[TARGET_PORT_LABEL]: {
26
+ SOURCE_LABEL: t[SOURCE_LABEL],
27
+ SOURCE_PORT_LABEL: t[SOURCE_PORT_LABEL],
28
+ }
29
+ for t in lst
30
+ }
31
+
32
+
33
+ def get_source_handles(edges_lst: list) -> dict:
34
+ source_handle_dict = {}
35
+ for ed in edges_lst:
36
+ if ed[SOURCE_LABEL] not in source_handle_dict.keys():
37
+ source_handle_dict[ed[SOURCE_LABEL]] = []
38
+ source_handle_dict[ed[SOURCE_LABEL]].append(ed[SOURCE_PORT_LABEL])
39
+ return {
40
+ k: list(range(len(v))) if len(v) > 1 and all([el is None for el in v]) else v
41
+ for k, v in source_handle_dict.items()
42
+ }
43
+
44
+
45
+ def convert_nodes_list_to_dict(nodes_list: list) -> dict:
46
+ return {
47
+ str(el["id"]): el["value"] if "value" in el else el["name"]
48
+ for el in sorted(nodes_list, key=lambda d: d["id"])
49
+ }
50
+
51
+
52
+ def update_node_names(workflow_dict: dict) -> dict:
53
+ node_names_final_dict = {}
54
+ input_nodes = [n for n in workflow_dict[NODES_LABEL] if n["type"] == "input"]
55
+ node_names_dict = {
56
+ n["id"]: list(
57
+ set(
58
+ [
59
+ e[TARGET_PORT_LABEL]
60
+ for e in workflow_dict[EDGES_LABEL]
61
+ if e[SOURCE_LABEL] == n["id"]
62
+ ]
63
+ )
64
+ )[0]
65
+ for n in input_nodes
66
+ }
67
+
68
+ counter_dict = Counter(node_names_dict.values())
69
+ node_names_useage_dict = {k: -1 for k in counter_dict.keys()}
70
+ for k, v in node_names_dict.items():
71
+ node_names_useage_dict[v] += 1
72
+ if counter_dict[v] > 1:
73
+ node_names_final_dict[k] = v + "_" + str(node_names_useage_dict[v])
74
+ else:
75
+ node_names_final_dict[k] = v
76
+
77
+ for n in workflow_dict[NODES_LABEL]:
78
+ if n["type"] == "input":
79
+ n["name"] = node_names_final_dict[n["id"]]
80
+ return workflow_dict
81
+
82
+
83
+ def set_result_node(workflow_dict):
84
+ node_id_lst = [n["id"] for n in workflow_dict[NODES_LABEL]]
85
+ source_lst = list(set([e[SOURCE_LABEL] for e in workflow_dict[EDGES_LABEL]]))
86
+
87
+ end_node_lst = []
88
+ for ni in node_id_lst:
89
+ if ni not in source_lst:
90
+ end_node_lst.append(ni)
91
+
92
+ node_id = len(workflow_dict[NODES_LABEL])
93
+ workflow_dict[NODES_LABEL].append(
94
+ {"id": node_id, "type": "output", "name": "result"}
95
+ )
96
+ workflow_dict[EDGES_LABEL].append(
97
+ {
98
+ TARGET_LABEL: node_id,
99
+ TARGET_PORT_LABEL: None,
100
+ SOURCE_LABEL: end_node_lst[0],
101
+ SOURCE_PORT_LABEL: None,
102
+ }
103
+ )
104
+
105
+ return workflow_dict
106
+
107
+
108
+ def remove_result(workflow_dict):
109
+ node_output_id = [
110
+ n["id"] for n in workflow_dict[NODES_LABEL] if n["type"] == "output"
111
+ ][0]
112
+ return {
113
+ NODES_LABEL: [n for n in workflow_dict[NODES_LABEL] if n["type"] != "output"],
114
+ EDGES_LABEL: [
115
+ e for e in workflow_dict[EDGES_LABEL] if e[TARGET_LABEL] != node_output_id
116
+ ],
117
+ }
@@ -1,45 +0,0 @@
1
- NODES_LABEL = "nodes"
2
- EDGES_LABEL = "edges"
3
- SOURCE_LABEL = "source"
4
- SOURCE_PORT_LABEL = "sourcePort"
5
- TARGET_LABEL = "target"
6
- TARGET_PORT_LABEL = "targetPort"
7
-
8
-
9
- def get_dict(**kwargs) -> dict:
10
- # NOTE: In WG, this will automatically be wrapped in a dict with the `result` key
11
- return {k: v for k, v in kwargs.items()}
12
- # return {'dict': {k: v for k, v in kwargs.items()}}
13
-
14
-
15
- def get_list(**kwargs) -> list:
16
- return list(kwargs.values())
17
-
18
-
19
- def get_kwargs(lst: list) -> dict:
20
- return {
21
- t[TARGET_PORT_LABEL]: {
22
- SOURCE_LABEL: t[SOURCE_LABEL],
23
- SOURCE_PORT_LABEL: t[SOURCE_PORT_LABEL],
24
- }
25
- for t in lst
26
- }
27
-
28
-
29
- def get_source_handles(edges_lst: list) -> dict:
30
- source_handle_dict = {}
31
- for ed in edges_lst:
32
- if ed[SOURCE_LABEL] not in source_handle_dict.keys():
33
- source_handle_dict[ed[SOURCE_LABEL]] = []
34
- source_handle_dict[ed[SOURCE_LABEL]].append(ed[SOURCE_PORT_LABEL])
35
- return {
36
- k: list(range(len(v))) if len(v) > 1 and all([el is None for el in v]) else v
37
- for k, v in source_handle_dict.items()
38
- }
39
-
40
-
41
- def convert_nodes_list_to_dict(nodes_list: list) -> dict:
42
- return {
43
- str(el["id"]): el["value"] if "value" in el else el["function"]
44
- for el in sorted(nodes_list, key=lambda d: d["id"])
45
- }