PyPI - python-workflow-definition - Versions diffs - 0.0.1__py2.py3-none-any.whl - Mend

python-workflow-definition 0.0.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

python_workflow_definition/__init__.py +0 -0
python_workflow_definition/aiida.py +138 -0
python_workflow_definition/executorlib.py +74 -0
python_workflow_definition/jobflow.py +333 -0
python_workflow_definition/plot.py +45 -0
python_workflow_definition/purepython.py +99 -0
python_workflow_definition/pyiron_base.py +292 -0
python_workflow_definition/shared.py +45 -0
python_workflow_definition-0.0.1.dist-info/METADATA +42 -0
python_workflow_definition-0.0.1.dist-info/RECORD +11 -0
python_workflow_definition-0.0.1.dist-info/WHEEL +5 -0

python_workflow_definition/__init__.py ADDED Viewed

File without changes

python_workflow_definition/aiida.py ADDED Viewed

@@ -0,0 +1,138 @@
+from importlib import import_module
+import json
+import traceback
+from aiida import orm
+from aiida_pythonjob.data.serializer import general_serializer
+from aiida_workgraph import WorkGraph, task
+from aiida_workgraph.socket import TaskSocketNamespace
+from python_workflow_definition.shared import (
+    convert_nodes_list_to_dict,
+    NODES_LABEL,
+    EDGES_LABEL,
+    SOURCE_LABEL,
+    SOURCE_PORT_LABEL,
+    TARGET_LABEL,
+    TARGET_PORT_LABEL,
+)
+def load_workflow_json(file_name: str) -> WorkGraph:
+    with open(file_name) as f:
+        data = json.load(f)
+    wg = WorkGraph()
+    task_name_mapping = {}
+    for id, identifier in convert_nodes_list_to_dict(
+        nodes_list=data[NODES_LABEL]
+    ).items():
+        if isinstance(identifier, str) and "." in identifier:
+            p, m = identifier.rsplit(".", 1)
+            mod = import_module(p)
+            func = getattr(mod, m)
+            wg.add_task(func)
+            # Remove the default result output, because we will add the outputs later from the data in the link
+            del wg.tasks[-1].outputs["result"]
+            task_name_mapping[id] = wg.tasks[-1]
+        else:
+            # data task
+            data_node = general_serializer(identifier)
+            task_name_mapping[id] = data_node
+    # add links
+    for link in data[EDGES_LABEL]:
+        to_task = task_name_mapping[str(link[TARGET_LABEL])]
+        # if the input is not exit, it means we pass the data into to the kwargs
+        # in this case, we add the input socket
+        if link[TARGET_PORT_LABEL] not in to_task.inputs:
+            to_socket = to_task.add_input("workgraph.any", name=link[TARGET_PORT_LABEL])
+        else:
+            to_socket = to_task.inputs[link[TARGET_PORT_LABEL]]
+        from_task = task_name_mapping[str(link[SOURCE_LABEL])]
+        if isinstance(from_task, orm.Data):
+            to_socket.value = from_task
+        else:
+            try:
+                if link[SOURCE_PORT_LABEL] is None:
+                    link[SOURCE_PORT_LABEL] = "result"
+                # because we are not define the outputs explicitly during the pythonjob creation
+                # we add it here, and assume the output exit
+                if link[SOURCE_PORT_LABEL] not in from_task.outputs:
+                    # if str(link["sourcePort"]) not in from_task.outputs:
+                    from_socket = from_task.add_output(
+                        "workgraph.any",
+                        name=link[SOURCE_PORT_LABEL],
+                        # name=str(link["sourcePort"]),
+                        metadata={"is_function_output": True},
+                    )
+                else:
+                    from_socket = from_task.outputs[link[SOURCE_PORT_LABEL]]
+                wg.add_link(from_socket, to_socket)
+            except Exception as e:
+                traceback.print_exc()
+                print("Failed to link", link, "with error:", e)
+    return wg
+def write_workflow_json(wg: WorkGraph, file_name: str) -> dict:
+    data = {NODES_LABEL: [], EDGES_LABEL: []}
+    node_name_mapping = {}
+    data_node_name_mapping = {}
+    i = 0
+    for node in wg.tasks:
+        executor = node.get_executor()
+        node_name_mapping[node.name] = i
+        callable_name = executor["callable_name"]
+        callable_name = f"{executor['module_path']}.{callable_name}"
+        data[NODES_LABEL].append({"id": i, "function": callable_name})
+        i += 1
+    for link in wg.links:
+        link_data = link.to_dict()
+        # if the from socket is the default result, we set it to None
+        if link_data["from_socket"] == "result":
+            link_data["from_socket"] = None
+        link_data[TARGET_LABEL] = node_name_mapping[link_data.pop("to_node")]
+        link_data[TARGET_PORT_LABEL] = link_data.pop("to_socket")
+        link_data[SOURCE_LABEL] = node_name_mapping[link_data.pop("from_node")]
+        link_data[SOURCE_PORT_LABEL] = link_data.pop("from_socket")
+        data[EDGES_LABEL].append(link_data)
+    for node in wg.tasks:
+        for input in node.inputs:
+            # assume namespace is not used as input
+            if isinstance(input, TaskSocketNamespace):
+                continue
+            if isinstance(input.value, orm.Data):
+                if input.value.uuid not in data_node_name_mapping:
+                    if isinstance(input.value, orm.List):
+                        raw_value = input.value.get_list()
+                    elif isinstance(input.value, orm.Dict):
+                        raw_value = input.value.get_dict()
+                        # unknow reason, there is a key "node_type" in the dict
+                        raw_value.pop("node_type", None)
+                    else:
+                        raw_value = input.value.value
+                    data[NODES_LABEL].append({"id": i, "value": raw_value})
+                    input_node_name = i
+                    data_node_name_mapping[input.value.uuid] = input_node_name
+                    i += 1
+                else:
+                    input_node_name = data_node_name_mapping[input.value.uuid]
+                data[EDGES_LABEL].append(
+                    {
+                        TARGET_LABEL: node_name_mapping[node.name],
+                        TARGET_PORT_LABEL: input._name,
+                        SOURCE_LABEL: input_node_name,
+                        SOURCE_PORT_LABEL: None,
+                    }
+                )
+    with open(file_name, "w") as f:
+        # json.dump({"nodes": data[], "edges": edges_new_lst}, f)
+        json.dump(data, f, indent=2)
+    return data

python_workflow_definition/executorlib.py ADDED Viewed

@@ -0,0 +1,74 @@
+from concurrent.futures import Executor
+from importlib import import_module
+from inspect import isfunction
+import json
+from python_workflow_definition.shared import (
+    get_dict,
+    get_list,
+    get_kwargs,
+    get_source_handles,
+    convert_nodes_list_to_dict,
+    NODES_LABEL,
+    EDGES_LABEL,
+    SOURCE_LABEL,
+    SOURCE_PORT_LABEL,
+)
+from python_workflow_definition.purepython import resort_total_lst, group_edges
+def get_item(obj, key):
+    return obj[key]
+def _get_value(result_dict: dict, nodes_new_dict: dict, link_dict: dict, exe: Executor):
+    source, source_handle = link_dict[SOURCE_LABEL], link_dict[SOURCE_PORT_LABEL]
+    if source in result_dict.keys():
+        result = result_dict[source]
+    elif source in nodes_new_dict.keys():
+        result = nodes_new_dict[source]
+    else:
+        raise KeyError()
+    if source_handle is None:
+        return result
+    else:
+        return exe.submit(fn=get_item, obj=result, key=source_handle)
+def load_workflow_json(file_name: str, exe: Executor):
+    with open(file_name, "r") as f:
+        content = json.load(f)
+    edges_new_lst = content[EDGES_LABEL]
+    nodes_new_dict = {}
+    for k, v in convert_nodes_list_to_dict(nodes_list=content[NODES_LABEL]).items():
+        if isinstance(v, str) and "." in v:
+            p, m = v.rsplit(".", 1)
+            mod = import_module(p)
+            nodes_new_dict[int(k)] = getattr(mod, m)
+        else:
+            nodes_new_dict[int(k)] = v
+    total_lst = group_edges(edges_new_lst)
+    total_new_lst = resort_total_lst(total_lst=total_lst, nodes_dict=nodes_new_dict)
+    result_dict = {}
+    last_key = None
+    for lst in total_new_lst:
+        node = nodes_new_dict[lst[0]]
+        if isfunction(node):
+            kwargs = {
+                k: _get_value(
+                    result_dict=result_dict,
+                    nodes_new_dict=nodes_new_dict,
+                    link_dict=v,
+                    exe=exe,
+                )
+                for k, v in lst[1].items()
+            }
+            result_dict[lst[0]] = exe.submit(node, **kwargs)
+            last_key = lst[0]
+    return result_dict[last_key]

python_workflow_definition/jobflow.py ADDED Viewed

@@ -0,0 +1,333 @@
+import json
+from importlib import import_module
+from inspect import isfunction
+import numpy as np
+from jobflow import job, Flow
+from python_workflow_definition.shared import (
+    get_dict,
+    get_list,
+    get_kwargs,
+    get_source_handles,
+    convert_nodes_list_to_dict,
+    NODES_LABEL,
+    EDGES_LABEL,
+    SOURCE_LABEL,
+    SOURCE_PORT_LABEL,
+    TARGET_LABEL,
+    TARGET_PORT_LABEL,
+)
+def _get_function_dict(flow: Flow):
+    return {job.uuid: job.function for job in flow.jobs}
+def _get_nodes_dict(function_dict: dict):
+    nodes_dict, nodes_mapping_dict = {}, {}
+    for i, [k, v] in enumerate(function_dict.items()):
+        nodes_dict[i] = v
+        nodes_mapping_dict[k] = i
+    return nodes_dict, nodes_mapping_dict
+def _get_edge_from_dict(
+    target: str, key: str, value_dict: dict, nodes_mapping_dict: dict
+) -> dict:
+    if len(value_dict["attributes"]) == 1:
+        return {
+            TARGET_LABEL: target,
+            TARGET_PORT_LABEL: key,
+            SOURCE_LABEL: nodes_mapping_dict[value_dict["uuid"]],
+            SOURCE_PORT_LABEL: value_dict["attributes"][0][1],
+        }
+    else:
+        return {
+            TARGET_LABEL: target,
+            TARGET_PORT_LABEL: key,
+            SOURCE_LABEL: nodes_mapping_dict[value_dict["uuid"]],
+            SOURCE_PORT_LABEL: None,
+        }
+def _get_edges_and_extend_nodes(
+    flow_dict: dict, nodes_mapping_dict: dict, nodes_dict: dict
+):
+    edges_lst = []
+    for job in flow_dict["jobs"]:
+        for k, v in job["function_kwargs"].items():
+            if (
+                isinstance(v, dict)
+                and "@module" in v
+                and "@class" in v
+                and "@version" in v
+            ):
+                edges_lst.append(
+                    _get_edge_from_dict(
+                        target=nodes_mapping_dict[job["uuid"]],
+                        key=k,
+                        value_dict=v,
+                        nodes_mapping_dict=nodes_mapping_dict,
+                    )
+                )
+            elif isinstance(v, dict) and any(
+                [
+                    isinstance(el, dict)
+                    and "@module" in el
+                    and "@class" in el
+                    and "@version" in el
+                    for el in v.values()
+                ]
+            ):
+                node_dict_index = len(nodes_dict)
+                nodes_dict[node_dict_index] = get_dict
+                for kt, vt in v.items():
+                    if (
+                        isinstance(vt, dict)
+                        and "@module" in vt
+                        and "@class" in vt
+                        and "@version" in vt
+                    ):
+                        edges_lst.append(
+                            _get_edge_from_dict(
+                                target=node_dict_index,
+                                key=kt,
+                                value_dict=vt,
+                                nodes_mapping_dict=nodes_mapping_dict,
+                            )
+                        )
+                    else:
+                        if vt not in nodes_dict.values():
+                            node_index = len(nodes_dict)
+                            nodes_dict[node_index] = vt
+                        else:
+                            node_index = {str(tv): tk for tk, tv in nodes_dict.items()}[
+                                str(vt)
+                            ]
+                        edges_lst.append(
+                            {
+                                TARGET_LABEL: node_dict_index,
+                                TARGET_PORT_LABEL: kt,
+                                SOURCE_LABEL: node_index,
+                                SOURCE_PORT_LABEL: None,
+                            }
+                        )
+                edges_lst.append(
+                    {
+                        TARGET_LABEL: nodes_mapping_dict[job["uuid"]],
+                        TARGET_PORT_LABEL: k,
+                        SOURCE_LABEL: node_dict_index,
+                        SOURCE_PORT_LABEL: None,
+                    }
+                )
+            elif isinstance(v, list) and any(
+                [
+                    isinstance(el, dict)
+                    and "@module" in el
+                    and "@class" in el
+                    and "@version" in el
+                    for el in v
+                ]
+            ):
+                node_list_index = len(nodes_dict)
+                nodes_dict[node_list_index] = get_list
+                for kt, vt in enumerate(v):
+                    if (
+                        isinstance(vt, dict)
+                        and "@module" in vt
+                        and "@class" in vt
+                        and "@version" in vt
+                    ):
+                        edges_lst.append(
+                            _get_edge_from_dict(
+                                target=node_list_index,
+                                key=str(kt),
+                                value_dict=vt,
+                                nodes_mapping_dict=nodes_mapping_dict,
+                            )
+                        )
+                    else:
+                        if vt not in nodes_dict.values():
+                            node_index = len(nodes_dict)
+                            nodes_dict[node_index] = vt
+                        else:
+                            node_index = {str(tv): tk for tk, tv in nodes_dict.items()}[
+                                str(vt)
+                            ]
+                        edges_lst.append(
+                            {
+                                TARGET_LABEL: node_list_index,
+                                TARGET_PORT_LABEL: kt,
+                                SOURCE_LABEL: node_index,
+                                SOURCE_PORT_LABEL: None,
+                            }
+                        )
+                edges_lst.append(
+                    {
+                        TARGET_LABEL: nodes_mapping_dict[job["uuid"]],
+                        TARGET_PORT_LABEL: k,
+                        SOURCE_LABEL: node_list_index,
+                        SOURCE_PORT_LABEL: None,
+                    }
+                )
+            else:
+                if v not in nodes_dict.values():
+                    node_index = len(nodes_dict)
+                    nodes_dict[node_index] = v
+                else:
+                    node_index = {tv: tk for tk, tv in nodes_dict.items()}[v]
+                edges_lst.append(
+                    {
+                        TARGET_LABEL: nodes_mapping_dict[job["uuid"]],
+                        TARGET_PORT_LABEL: k,
+                        SOURCE_LABEL: node_index,
+                        SOURCE_PORT_LABEL: None,
+                    }
+                )
+    return edges_lst, nodes_dict
+def _resort_total_lst(total_dict: dict, nodes_dict: dict) -> dict:
+    nodes_with_dep_lst = list(sorted(total_dict.keys()))
+    nodes_without_dep_lst = [
+        k for k in nodes_dict.keys() if k not in nodes_with_dep_lst
+    ]
+    ordered_lst = []
+    total_new_dict = {}
+    while len(total_new_dict) < len(total_dict):
+        for ind in sorted(total_dict.keys()):
+            connect = total_dict[ind]
+            if ind not in ordered_lst:
+                source_lst = [sd[SOURCE_LABEL] for sd in connect.values()]
+                if all(
+                    [s in ordered_lst or s in nodes_without_dep_lst for s in source_lst]
+                ):
+                    ordered_lst.append(ind)
+                    total_new_dict[ind] = connect
+    return total_new_dict
+def _group_edges(edges_lst: list) -> dict:
+    total_dict = {}
+    for ed_major in edges_lst:
+        target_id = ed_major[TARGET_LABEL]
+        tmp_lst = []
+        if target_id not in total_dict.keys():
+            for ed in edges_lst:
+                if target_id == ed[TARGET_LABEL]:
+                    tmp_lst.append(ed)
+            total_dict[target_id] = get_kwargs(lst=tmp_lst)
+    return total_dict
+def _get_input_dict(nodes_dict: dict) -> dict:
+    return {k: v for k, v in nodes_dict.items() if not isfunction(v)}
+def _get_workflow(
+    nodes_dict: dict, input_dict: dict, total_dict: dict, source_handles_dict: dict
+) -> list:
+    def get_attr_helper(obj, source_handle):
+        if source_handle is None:
+            return getattr(obj, "output")
+        else:
+            return getattr(getattr(obj, "output"), source_handle)
+    memory_dict = {}
+    for k in total_dict.keys():
+        v = nodes_dict[k]
+        if isfunction(v):
+            if k in source_handles_dict.keys():
+                fn = job(
+                    method=v,
+                    data=[el for el in source_handles_dict[k] if el is not None],
+                )
+            else:
+                fn = job(method=v)
+            kwargs = {
+                kw: (
+                    input_dict[vw[SOURCE_LABEL]]
+                    if vw[SOURCE_LABEL] in input_dict
+                    else get_attr_helper(
+                        obj=memory_dict[vw[SOURCE_LABEL]],
+                        source_handle=vw[SOURCE_PORT_LABEL],
+                    )
+                )
+                for kw, vw in total_dict[k].items()
+            }
+            memory_dict[k] = fn(**kwargs)
+    return list(memory_dict.values())
+def _get_item_from_tuple(input_obj, index, index_lst):
+    if isinstance(input_obj, dict):
+        return input_obj[index]
+    else:
+        return list(input_obj)[index_lst.index(index)]
+def load_workflow_json(file_name: str) -> Flow:
+    with open(file_name, "r") as f:
+        content = json.load(f)
+    edges_new_lst = []
+    for edge in content[EDGES_LABEL]:
+        if edge[SOURCE_PORT_LABEL] is None:
+            edges_new_lst.append(edge)
+        else:
+            edges_new_lst.append(
+                {
+                    TARGET_LABEL: edge[TARGET_LABEL],
+                    TARGET_PORT_LABEL: edge[TARGET_PORT_LABEL],
+                    SOURCE_LABEL: edge[SOURCE_LABEL],
+                    SOURCE_PORT_LABEL: str(edge[SOURCE_PORT_LABEL]),
+                }
+            )
+    nodes_new_dict = {}
+    for k, v in convert_nodes_list_to_dict(nodes_list=content[NODES_LABEL]).items():
+        if isinstance(v, str) and "." in v:
+            p, m = v.rsplit(".", 1)
+            mod = import_module(p)
+            nodes_new_dict[int(k)] = getattr(mod, m)
+        else:
+            nodes_new_dict[int(k)] = v
+    source_handles_dict = get_source_handles(edges_lst=edges_new_lst)
+    total_dict = _group_edges(edges_lst=edges_new_lst)
+    input_dict = _get_input_dict(nodes_dict=nodes_new_dict)
+    new_total_dict = _resort_total_lst(total_dict=total_dict, nodes_dict=nodes_new_dict)
+    task_lst = _get_workflow(
+        nodes_dict=nodes_new_dict,
+        input_dict=input_dict,
+        total_dict=new_total_dict,
+        source_handles_dict=source_handles_dict,
+    )
+    return Flow(task_lst)
+def write_workflow_json(flow: Flow, file_name: str = "workflow.json"):
+    flow_dict = flow.as_dict()
+    function_dict = _get_function_dict(flow=flow)
+    nodes_dict, nodes_mapping_dict = _get_nodes_dict(function_dict=function_dict)
+    edges_lst, nodes_dict = _get_edges_and_extend_nodes(
+        flow_dict=flow_dict,
+        nodes_mapping_dict=nodes_mapping_dict,
+        nodes_dict=nodes_dict,
+    )
+    nodes_store_lst = []
+    for k, v in nodes_dict.items():
+        if isfunction(v):
+            nodes_store_lst.append(
+                {"id": k, "function": v.__module__ + "." + v.__name__}
+            )
+        elif isinstance(v, np.ndarray):
+            nodes_store_lst.append({"id": k, "value": v.tolist()})
+        else:
+            nodes_store_lst.append({"id": k, "value": v})
+    with open(file_name, "w") as f:
+        json.dump({NODES_LABEL: nodes_store_lst, EDGES_LABEL: edges_lst}, f)

python_workflow_definition/plot.py ADDED Viewed

@@ -0,0 +1,45 @@
+import json
+from IPython.display import SVG, display
+import networkx as nx
+from python_workflow_definition.purepython import group_edges
+from python_workflow_definition.shared import (
+    get_kwargs,
+    convert_nodes_list_to_dict,
+    NODES_LABEL,
+    EDGES_LABEL,
+    SOURCE_LABEL,
+    SOURCE_PORT_LABEL,
+)
+def plot(file_name: str):
+    with open(file_name, "r") as f:
+        content = json.load(f)
+    graph = nx.DiGraph()
+    node_dict = convert_nodes_list_to_dict(nodes_list=content[NODES_LABEL])
+    total_lst = group_edges(edges_lst=content[EDGES_LABEL])
+    for node_id, node_name in node_dict.items():
+        graph.add_node(node_id, name=str(node_name), label=str(node_name))
+    for edge_tuple in total_lst:
+        target_node, edge_dict = edge_tuple
+        edge_label_dict = {}
+        for k, v in edge_dict.items():
+            if v[SOURCE_LABEL] not in edge_label_dict:
+                edge_label_dict[v[SOURCE_LABEL]] = []
+            if v[SOURCE_PORT_LABEL] is None:
+                edge_label_dict[v[SOURCE_LABEL]].append(k)
+            else:
+                edge_label_dict[v[SOURCE_LABEL]].append(
+                    k + "=result[" + v[SOURCE_PORT_LABEL] + "]"
+                )
+        for k, v in edge_label_dict.items():
+            graph.add_edge(str(k), str(target_node), label=", ".join(v))
+    svg = nx.nx_agraph.to_agraph(graph).draw(prog="dot", format="svg")
+    display(SVG(svg))

python_workflow_definition/purepython.py ADDED Viewed

@@ -0,0 +1,99 @@
+import json
+from importlib import import_module
+from inspect import isfunction
+from python_workflow_definition.shared import (
+    get_dict,
+    get_list,
+    get_kwargs,
+    get_source_handles,
+    convert_nodes_list_to_dict,
+    NODES_LABEL,
+    EDGES_LABEL,
+    SOURCE_LABEL,
+    SOURCE_PORT_LABEL,
+    TARGET_LABEL,
+    TARGET_PORT_LABEL,
+)
+def resort_total_lst(total_lst: list, nodes_dict: dict) -> list:
+    nodes_with_dep_lst = list(sorted([v[0] for v in total_lst]))
+    nodes_without_dep_lst = [
+        k for k in nodes_dict.keys() if k not in nodes_with_dep_lst
+    ]
+    ordered_lst, total_new_lst = [], []
+    while len(total_new_lst) < len(total_lst):
+        for ind, connect in total_lst:
+            if ind not in ordered_lst:
+                source_lst = [sd[SOURCE_LABEL] for sd in connect.values()]
+                if all(
+                    [s in ordered_lst or s in nodes_without_dep_lst for s in source_lst]
+                ):
+                    ordered_lst.append(ind)
+                    total_new_lst.append([ind, connect])
+    return total_new_lst
+def group_edges(edges_lst: list) -> list:
+    edges_sorted_lst = sorted(edges_lst, key=lambda x: x[TARGET_LABEL], reverse=True)
+    total_lst, tmp_lst = [], []
+    target_id = edges_sorted_lst[0][TARGET_LABEL]
+    for ed in edges_sorted_lst:
+        if target_id == ed[TARGET_LABEL]:
+            tmp_lst.append(ed)
+        else:
+            total_lst.append((target_id, get_kwargs(lst=tmp_lst)))
+            target_id = ed[TARGET_LABEL]
+            tmp_lst = [ed]
+    total_lst.append((target_id, get_kwargs(lst=tmp_lst)))
+    return total_lst
+def _get_value(result_dict: dict, nodes_new_dict: dict, link_dict: dict):
+    source, source_handle = link_dict[SOURCE_LABEL], link_dict[SOURCE_PORT_LABEL]
+    if source in result_dict.keys():
+        result = result_dict[source]
+    elif source in nodes_new_dict.keys():
+        result = nodes_new_dict[source]
+    else:
+        raise KeyError()
+    if source_handle is None:
+        return result
+    else:
+        return result[source_handle]
+def load_workflow_json(file_name: str):
+    with open(file_name, "r") as f:
+        content = json.load(f)
+    edges_new_lst = content[EDGES_LABEL]
+    nodes_new_dict = {}
+    for k, v in convert_nodes_list_to_dict(nodes_list=content[NODES_LABEL]).items():
+        if isinstance(v, str) and "." in v:
+            p, m = v.rsplit(".", 1)
+            mod = import_module(p)
+            nodes_new_dict[int(k)] = getattr(mod, m)
+        else:
+            nodes_new_dict[int(k)] = v
+    total_lst = group_edges(edges_new_lst)
+    total_new_lst = resort_total_lst(total_lst=total_lst, nodes_dict=nodes_new_dict)
+    result_dict = {}
+    last_key = None
+    for lst in total_new_lst:
+        node = nodes_new_dict[lst[0]]
+        if isfunction(node):
+            kwargs = {
+                k: _get_value(
+                    result_dict=result_dict, nodes_new_dict=nodes_new_dict, link_dict=v
+                )
+                for k, v in lst[1].items()
+            }
+            result_dict[lst[0]] = node(**kwargs)
+            last_key = lst[0]
+    return result_dict[last_key]

python_workflow_definition/pyiron_base.py ADDED Viewed

@@ -0,0 +1,292 @@
+from importlib import import_module
+from inspect import isfunction
+import json
+from typing import Optional
+import numpy as np
+from pyiron_base import job, Project
+from pyiron_base.project.delayed import DelayedObject
+from python_workflow_definition.shared import (
+    get_kwargs,
+    get_source_handles,
+    convert_nodes_list_to_dict,
+    NODES_LABEL,
+    EDGES_LABEL,
+    SOURCE_LABEL,
+    SOURCE_PORT_LABEL,
+    TARGET_LABEL,
+    TARGET_PORT_LABEL,
+)
+def _resort_total_lst(total_lst: list, nodes_dict: dict) -> list:
+    nodes_with_dep_lst = list(sorted([v[0] for v in total_lst]))
+    nodes_without_dep_lst = [
+        k for k in nodes_dict.keys() if k not in nodes_with_dep_lst
+    ]
+    ordered_lst, total_new_lst = [], []
+    while len(total_new_lst) < len(total_lst):
+        for ind, connect in total_lst:
+            if ind not in ordered_lst:
+                source_lst = [sd[SOURCE_LABEL] for sd in connect.values()]
+                if all(
+                    [s in ordered_lst or s in nodes_without_dep_lst for s in source_lst]
+                ):
+                    ordered_lst.append(ind)
+                    total_new_lst.append([ind, connect])
+    return total_new_lst
+def _group_edges(edges_lst: list) -> list:
+    edges_sorted_lst = sorted(edges_lst, key=lambda x: x[TARGET_LABEL], reverse=True)
+    total_lst, tmp_lst = [], []
+    target_id = edges_sorted_lst[0][TARGET_LABEL]
+    for ed in edges_sorted_lst:
+        if target_id == ed[TARGET_LABEL]:
+            tmp_lst.append(ed)
+        else:
+            total_lst.append((target_id, get_kwargs(lst=tmp_lst)))
+            target_id = ed[TARGET_LABEL]
+            tmp_lst = [ed]
+    total_lst.append((target_id, get_kwargs(lst=tmp_lst)))
+    return total_lst
+def _get_source(
+    nodes_dict: dict, delayed_object_dict: dict, source: str, source_handle: str
+):
+    if source in delayed_object_dict.keys() and source_handle is not None:
+        return (
+            delayed_object_dict[source].__getattr__("output").__getattr__(source_handle)
+        )
+    elif source in delayed_object_dict.keys():
+        return delayed_object_dict[source]
+    else:
+        return nodes_dict[source]
+def _get_delayed_object_dict(
+    total_lst: list, nodes_dict: dict, source_handle_dict: dict, pyiron_project: Project
+) -> dict:
+    delayed_object_dict = {}
+    for item in total_lst:
+        key, input_dict = item
+        kwargs = {
+            k: _get_source(
+                nodes_dict=nodes_dict,
+                delayed_object_dict=delayed_object_dict,
+                source=v[SOURCE_LABEL],
+                source_handle=v[SOURCE_PORT_LABEL],
+            )
+            for k, v in input_dict.items()
+        }
+        delayed_object_dict[key] = job(
+            funct=nodes_dict[key],
+            output_key_lst=source_handle_dict.get(key, []),
+        )(**kwargs, pyiron_project=pyiron_project)
+    return delayed_object_dict
+def get_dict(**kwargs) -> dict:
+    return {k: v for k, v in kwargs["kwargs"].items()}
+def get_list(**kwargs) -> list:
+    return list(kwargs["kwargs"].values())
+def _remove_server_obj(nodes_dict: dict, edges_lst: list):
+    server_lst = [k for k in nodes_dict.keys() if k.startswith("server_obj_")]
+    for s in server_lst:
+        del nodes_dict[s]
+        edges_lst = [ep for ep in edges_lst if s not in ep]
+    return nodes_dict, edges_lst
+def _get_nodes(connection_dict: dict, delayed_object_updated_dict: dict):
+    return {
+        connection_dict[k]: v._python_function if isinstance(v, DelayedObject) else v
+        for k, v in delayed_object_updated_dict.items()
+    }
+def _get_unique_objects(nodes_dict: dict):
+    delayed_object_dict = {}
+    for k, v in nodes_dict.items():
+        if isinstance(v, DelayedObject):
+            delayed_object_dict[k] = v
+        elif isinstance(v, list) and any([isinstance(el, DelayedObject) for el in v]):
+            delayed_object_dict[k] = DelayedObject(function=get_list)
+            delayed_object_dict[k]._input = {i: el for i, el in enumerate(v)}
+            delayed_object_dict[k]._python_function = get_list
+        elif isinstance(v, dict) and any(
+            [isinstance(el, DelayedObject) for el in v.values()]
+        ):
+            delayed_object_dict[k] = DelayedObject(
+                function=get_dict,
+                **v,
+            )
+            delayed_object_dict[k]._python_function = get_dict
+            delayed_object_dict[k]._input = v
+    unique_lst = []
+    delayed_object_updated_dict, match_dict = {}, {}
+    for dobj in delayed_object_dict.keys():
+        match = False
+        for obj in unique_lst:
+            if (
+                delayed_object_updated_dict[obj]._python_function
+                == delayed_object_dict[dobj]._python_function
+                and delayed_object_dict[dobj]._input == delayed_object_dict[obj]._input
+            ):
+                delayed_object_updated_dict[obj] = delayed_object_dict[obj]
+                match_dict[dobj] = obj
+                match = True
+                break
+        if not match:
+            unique_lst.append(dobj)
+            delayed_object_updated_dict[dobj] = delayed_object_dict[dobj]
+    update_dict = {}
+    for k, v in nodes_dict.items():
+        if not (
+            isinstance(v, DelayedObject)
+            or (
+                isinstance(v, list) and any([isinstance(el, DelayedObject) for el in v])
+            )
+            or (
+                isinstance(v, dict)
+                and any([isinstance(el, DelayedObject) for el in v.values()])
+            )
+        ):
+            update_dict[k] = v
+    delayed_object_updated_dict.update(update_dict)
+    return delayed_object_updated_dict, match_dict
+def _get_connection_dict(delayed_object_updated_dict: dict, match_dict: dict):
+    new_obj_dict = {}
+    connection_dict = {}
+    lookup_dict = {}
+    for i, [k, v] in enumerate(delayed_object_updated_dict.items()):
+        new_obj_dict[i] = v
+        connection_dict[k] = i
+        lookup_dict[i] = k
+    for k, v in match_dict.items():
+        if v in connection_dict.keys():
+            connection_dict[k] = connection_dict[v]
+    return connection_dict, lookup_dict
+def _get_edges_dict(
+    edges_lst: list, nodes_dict: dict, connection_dict: dict, lookup_dict: dict
+):
+    edges_dict_lst = []
+    existing_connection_lst = []
+    for ep in edges_lst:
+        input_name, output_name = ep
+        target = connection_dict[input_name]
+        target_handle = "_".join(output_name.split("_")[:-1])
+        connection_name = lookup_dict[target] + "_" + target_handle
+        if connection_name not in existing_connection_lst:
+            output = nodes_dict[output_name]
+            if isinstance(output, DelayedObject):
+                if output._list_index is not None:
+                    edges_dict_lst.append(
+                        {
+                            TARGET_LABEL: target,
+                            TARGET_PORT_LABEL: target_handle,
+                            SOURCE_LABEL: connection_dict[output_name],
+                            SOURCE_PORT_LABEL: f"s_{output._list_index}",  # check for list index
+                        }
+                    )
+                else:
+                    edges_dict_lst.append(
+                        {
+                            TARGET_LABEL: target,
+                            TARGET_PORT_LABEL: target_handle,
+                            SOURCE_LABEL: connection_dict[output_name],
+                            SOURCE_PORT_LABEL: output._output_key,  # check for list index
+                        }
+                    )
+            else:
+                edges_dict_lst.append(
+                    {
+                        TARGET_LABEL: target,
+                        TARGET_PORT_LABEL: target_handle,
+                        SOURCE_LABEL: connection_dict[output_name],
+                        SOURCE_PORT_LABEL: None,
+                    }
+                )
+            existing_connection_lst.append(connection_name)
+    return edges_dict_lst
+def load_workflow_json(file_name: str, project: Optional[Project] = None):
+    if project is None:
+        project = Project(".")
+    with open(file_name, "r") as f:
+        content = json.load(f)
+    edges_new_lst = content[EDGES_LABEL]
+    nodes_new_dict = {}
+    for k, v in convert_nodes_list_to_dict(nodes_list=content[NODES_LABEL]).items():
+        if isinstance(v, str) and "." in v:
+            p, m = v.rsplit(".", 1)
+            if p == "python_workflow_definition.shared":
+                p = "python_workflow_definition.pyiron_base"
+            mod = import_module(p)
+            nodes_new_dict[int(k)] = getattr(mod, m)
+        else:
+            nodes_new_dict[int(k)] = v
+    total_lst = _group_edges(edges_new_lst)
+    total_new_lst = _resort_total_lst(total_lst=total_lst, nodes_dict=nodes_new_dict)
+    source_handle_dict = get_source_handles(edges_new_lst)
+    delayed_object_dict = _get_delayed_object_dict(
+        total_lst=total_new_lst,
+        nodes_dict=nodes_new_dict,
+        source_handle_dict=source_handle_dict,
+        pyiron_project=project,
+    )
+    return list(delayed_object_dict.values())
+def write_workflow_json(
+    delayed_object: DelayedObject, file_name: str = "workflow.json"
+):
+    nodes_dict, edges_lst = delayed_object.get_graph()
+    nodes_dict, edges_lst = _remove_server_obj(
+        nodes_dict=nodes_dict, edges_lst=edges_lst
+    )
+    delayed_object_updated_dict, match_dict = _get_unique_objects(nodes_dict=nodes_dict)
+    connection_dict, lookup_dict = _get_connection_dict(
+        delayed_object_updated_dict=delayed_object_updated_dict, match_dict=match_dict
+    )
+    nodes_new_dict = _get_nodes(
+        connection_dict=connection_dict,
+        delayed_object_updated_dict=delayed_object_updated_dict,
+    )
+    edges_new_lst = _get_edges_dict(
+        edges_lst=edges_lst,
+        nodes_dict=nodes_dict,
+        connection_dict=connection_dict,
+        lookup_dict=lookup_dict,
+    )
+    nodes_store_lst = []
+    for k, v in nodes_new_dict.items():
+        if isfunction(v):
+            mod = v.__module__
+            if mod == "python_workflow_definition.pyiron_base":
+                mod = "python_workflow_definition.shared"
+            nodes_store_lst.append({"id": k, "function": mod + "." + v.__name__})
+        elif isinstance(v, np.ndarray):
+            nodes_store_lst.append({"id": k, "value": v.tolist()})
+        else:
+            nodes_store_lst.append({"id": k, "value": v})
+    with open(file_name, "w") as f:
+        json.dump({NODES_LABEL: nodes_store_lst, EDGES_LABEL: edges_new_lst}, f)

python_workflow_definition/shared.py ADDED Viewed

@@ -0,0 +1,45 @@
+NODES_LABEL = "nodes"
+EDGES_LABEL = "edges"
+SOURCE_LABEL = "source"
+SOURCE_PORT_LABEL = "sourcePort"
+TARGET_LABEL = "target"
+TARGET_PORT_LABEL = "targetPort"
+def get_dict(**kwargs) -> dict:
+    # NOTE: In WG, this will automatically be wrapped in a dict with the `result` key
+    return {k: v for k, v in kwargs.items()}
+    # return {'dict': {k: v for k, v in kwargs.items()}}
+def get_list(**kwargs) -> list:
+    return list(kwargs.values())
+def get_kwargs(lst: list) -> dict:
+    return {
+        t[TARGET_PORT_LABEL]: {
+            SOURCE_LABEL: t[SOURCE_LABEL],
+            SOURCE_PORT_LABEL: t[SOURCE_PORT_LABEL],
+        }
+        for t in lst
+    }
+def get_source_handles(edges_lst: list) -> dict:
+    source_handle_dict = {}
+    for ed in edges_lst:
+        if ed[SOURCE_LABEL] not in source_handle_dict.keys():
+            source_handle_dict[ed[SOURCE_LABEL]] = []
+        source_handle_dict[ed[SOURCE_LABEL]].append(ed[SOURCE_PORT_LABEL])
+    return {
+        k: list(range(len(v))) if len(v) > 1 and all([el is None for el in v]) else v
+        for k, v in source_handle_dict.items()
+    }
+def convert_nodes_list_to_dict(nodes_list: list) -> dict:
+    return {
+        str(el["id"]): el["value"] if "value" in el else el["function"]
+        for el in sorted(nodes_list, key=lambda d: d["id"])
+    }

python_workflow_definition-0.0.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,42 @@
+Metadata-Version: 2.4
+Name: python_workflow_definition
+Version: 0.0.1
+Summary: Python Workflow Definition - workflow interoperability for aiida, jobflow and pyiron
+Author-email: Jan Janssen <janssen@mpie.de>, Janine George <janine.geogre@bam.de>, Julian Geiger <julian.geiger@psi.ch>, Xing Wang <xing.wang@psi.ch>, Marnik Bercx <marnik.bercx@psi.ch>, Christina Ertural <christina.ertural@bam.de>
+License: BSD 3-Clause License
+        Copyright (c) 2025, Jan Janssen
+        All rights reserved.
+        Redistribution and use in source and binary forms, with or without
+        modification, are permitted provided that the following conditions are met:
+        * Redistributions of source code must retain the above copyright notice, this
+          list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above copyright notice,
+          this list of conditions and the following disclaimer in the documentation
+          and/or other materials provided with the distribution.
+        * Neither the name of the copyright holder nor the names of its
+          contributors may be used to endorse or promote products derived from
+          this software without specific prior written permission.
+        THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+        AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+        IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+        DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+        FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+        DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+        SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+        CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+        OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+        OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+Requires-Dist: aiida-workgraph<=0.5.2,>=0.5.1
+Requires-Dist: jobflow<=0.1.19,>=0.1.18
+Requires-Dist: numpy<2,>=1.21
+Requires-Dist: pyiron-base<=0.11.11,>=0.11.10
+Provides-Extra: plot
+Requires-Dist: ipython<=9.0.2,>=7.33.0; extra == 'plot'
+Requires-Dist: networkx<=3.4.2,>=2.8.8; extra == 'plot'
+Requires-Dist: pygraphviz<=1.14,>=1.10; extra == 'plot'

python_workflow_definition-0.0.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+python_workflow_definition/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+python_workflow_definition/aiida.py,sha256=WvDE_2Bhv2IhqYdv-PZ3eGxHRw8k35PsSV4vk5qkOhg,5550
+python_workflow_definition/executorlib.py,sha256=x6Nw01s3WsH7MGnw8-jvkdD5Yy_V4O0LTsDO_OKpUHs,2145
+python_workflow_definition/jobflow.py,sha256=hkmwCl-xTOmHPd-9peawJolFrpwd6nvWwWu9K3L3Hxc,11809
+python_workflow_definition/plot.py,sha256=L_FOSLp1kyNSkj3_owJpxIFe2raKCF0KBqRXa69xacE,1423
+python_workflow_definition/purepython.py,sha256=YgJQaBP60GjOCAAhISf-Alc2DVrAs6U71hSBbcnSxlk,3154
+python_workflow_definition/pyiron_base.py,sha256=ehFMKaZE2U5hhLx9KGwLkAZv1LqKsw7fsB0qZ-UfV3c,10562
+python_workflow_definition/shared.py,sha256=ZjcxB0BdXspsMIjiZakqWBGvrwzxleM_7t4XsQ9topg,1320
+python_workflow_definition-0.0.1.dist-info/METADATA,sha256=XBAKbtfMI54OXAPsfhlxCKA8RBXLsI5e0-ubBfsC49M,2491
+python_workflow_definition-0.0.1.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
+python_workflow_definition-0.0.1.dist-info/RECORD,,

python_workflow_definition-0.0.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.27.0
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any