PyPI - sera-2 - Versions diffs - 1.21.0__tar.gz → 1.21.2__tar.gz - Mend

sera-2 1.21.0tar.gz → 1.21.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

{sera_2-1.21.0 → sera_2-1.21.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: sera-2
-Version: 1.21.0
+Version: 1.21.2
 Summary:
 Author: Binh Vu
 Author-email: bvu687@gmail.com

{sera_2-1.21.0 → sera_2-1.21.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "sera-2"
-version = "1.21.0"
+version = "1.21.2"
 description = ""
 authors = ["Binh Vu <bvu687@gmail.com>"]
 readme = "README.md"

{sera_2-1.21.0 → sera_2-1.21.2}/sera/libs/directed_computing_graph/_dcg.py RENAMED Viewed

@@ -1,18 +1,6 @@
 from __future__ import annotations
-import asyncio
-import inspect
-from dataclasses import dataclass
-from enum import Enum
-from typing import (
-    Annotated,
-    Any,
-    Awaitable,
-    Callable,
-    MutableSequence,
-    Optional,
-    Sequence,
-)
+from typing import Annotated, Any, Callable, MutableSequence, Optional, Sequence
 from graph.retworkx import RetworkXStrDiGraph
@@ -53,6 +41,11 @@ class DirectedComputingGraph:
     ):
         self.graph = graph
         self.type_service = type_service
+        self.node2descendants: dict[str, list[DCGNode]] = {}
+        for u in graph.iter_nodes():
+            self.node2descendants[u.id] = graph.descendants(u.id)
+            self.node2descendants[u.id].append(u)
     @staticmethod
     def from_flows(
@@ -191,7 +184,7 @@ class DirectedComputingGraph:
     def execute(
         self,
         input: dict[ComputeFnId, tuple],
-        output: set[str],
+        output: Optional[set[str]] = None,
         context: Optional[
             dict[str, Callable | Any] | Callable[[], dict[str, Any]]
         ] = None,
@@ -215,6 +208,9 @@ class DirectedComputingGraph:
         else:
             context = {k: v() if callable(v) else v for k, v in context.items()}
+        if output is None:
+            output = set()
         # This is a quick reactive algorithm, we may be able to do it better.
         # The idea is when all inputs of a function is available, we can execute a function.
         # We assume that the memory is large enough to hold all the functions and their inputs
@@ -223,25 +219,25 @@ class DirectedComputingGraph:
         # we execute the computing nodes
         # when it's finished, we put the outgoing edges into a stack.
         runtimes: dict[NodeId, NodeRuntime] = {}
+        for id in input.keys():
+            for u in self.node2descendants[id]:
+                if u.id in input:
+                    # user provided input should supersede the context
+                    n_provided_args = len(input[u.id])
+                    n_consumed_context = n_provided_args - len(u.required_args)
+                else:
+                    n_consumed_context = 0
-        for u in self.graph.iter_nodes():
-            if u.id in input:
-                # user provided input should supersede the context
-                n_provided_args = len(input[u.id])
-                n_consumed_context = n_provided_args - len(u.required_args)
-            else:
-                n_consumed_context = 0
-            node_context = tuple(
-                (
-                    context[name]
-                    if name in context
-                    else u.required_context_default_args[name]
+                node_context = tuple(
+                    (
+                        context[name]
+                        if name in context
+                        else u.required_context_default_args[name]
+                    )
+                    for name in u.required_context[n_consumed_context:]
                 )
-                for name in u.required_context[n_consumed_context:]
-            )
-            runtimes[u.id] = NodeRuntime.from_node(self.graph, u, node_context)
+                runtimes[u.id] = NodeRuntime.from_node(self.graph, u, node_context)
         stack: list[NodeId] = []
         for id, args in input.items():
@@ -296,7 +292,7 @@ class DirectedComputingGraph:
     async def execute_async(
         self,
         input: dict[ComputeFnId, tuple],
-        output: set[str],
+        output: Optional[set[str]] = None,
         context: Optional[
             dict[str, Callable | Any] | Callable[[], dict[str, Any]]
         ] = None,
@@ -321,6 +317,9 @@ class DirectedComputingGraph:
         else:
             context = {k: v() if callable(v) else v for k, v in context.items()}
+        if output is None:
+            output = set()
         # This is a quick reactive algorithm, we may be able to do it better.
         # The idea is when all inputs of a function is available, we can execute a function.
         # We assume that the memory is large enough to hold all the functions and their inputs
@@ -330,24 +329,25 @@ class DirectedComputingGraph:
         # when it's finished, we put the outgoing edges into a stack.
         runtimes: dict[NodeId, NodeRuntime] = {}
-        for u in self.graph.iter_nodes():
-            if u.id in input:
-                # user provided input should supersede the context
-                n_provided_args = len(input[u.id])
-                n_consumed_context = n_provided_args - len(u.required_args)
-            else:
-                n_consumed_context = 0
+        for id in input.keys():
+            for u in self.node2descendants[id]:
+                if u.id in input:
+                    # user provided input should supersede the context
+                    n_provided_args = len(input[u.id])
+                    n_consumed_context = n_provided_args - len(u.required_args)
+                else:
+                    n_consumed_context = 0
-            node_context = tuple(
-                (
-                    context[name]
-                    if name in context
-                    else u.required_context_default_args[name]
+                node_context = tuple(
+                    (
+                        context[name]
+                        if name in context
+                        else u.required_context_default_args[name]
+                    )
+                    for name in u.required_context[n_consumed_context:]
                 )
-                for name in u.required_context[n_consumed_context:]
-            )
+                runtimes[u.id] = NodeRuntime.from_node(self.graph, u, node_context)
-            runtimes[u.id] = NodeRuntime.from_node(self.graph, u, node_context)
         stack: list[NodeId] = []
         for id, args in input.items():

{sera_2-1.21.0 → sera_2-1.21.2}/sera/misc/__init__.py RENAMED Viewed

@@ -4,6 +4,7 @@ from sera.misc._utils import (
     RelTableIndex,
     assert_isinstance,
     assert_not_null,
+    auto_import,
     filter_duplication,
     get_classpath,
     identity,
@@ -32,4 +33,5 @@ __all__ = [
     "RelTableIndex",
     "load_data_from_dir",
     "replay_events",
+    "auto_import",
 ]

{sera_2-1.21.0 → sera_2-1.21.2}/sera/misc/_utils.py RENAMED Viewed

@@ -1,9 +1,9 @@
 from __future__ import annotations
-import importlib
 import inspect
 import re
 from collections import defaultdict
+from importlib import import_module
 from pathlib import Path
 from typing import (
     TYPE_CHECKING,
@@ -23,7 +23,8 @@ import orjson
 import serde.csv
 import serde.json
 from loguru import logger
-from sqlalchemy import Engine, text
+from sqlalchemy import Engine, select, text
+from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession
 from sqlalchemy.orm import Session
 from tqdm import tqdm
@@ -77,7 +78,7 @@ reserved_keywords = {
 def import_attr(attr_ident: str):
     lst = attr_ident.rsplit(".", 1)
     module, cls = lst
-    module = importlib.import_module(module)
+    module = import_module(module)
     return getattr(module, cls)
@@ -147,7 +148,7 @@ def identity(x: T) -> T:
 def get_classpath(type: Type | Callable) -> str:
-    if type.__module__ == "builtins":
+    if hasattr(type, "__module__") and type.__module__ == "builtins":
         return type.__qualname__
     if hasattr(type, "__qualname__"):
@@ -174,7 +175,7 @@ def get_dbclass_deser_func(type: type[T]) -> Callable[[dict], T]:
         .replace(".models.db.", ".models.data.")
         .rsplit(".", maxsplit=1)
     )
-    StructType = getattr(importlib.import_module(module), f"Create{clsname}")
+    StructType = getattr(import_module(module), f"Create{clsname}")
     def deser_func(obj: dict):
         record = msgspec.json.decode(orjson.dumps(obj), type=StructType)
@@ -187,6 +188,29 @@ def get_dbclass_deser_func(type: type[T]) -> Callable[[dict], T]:
     return deser_func
+def auto_import(module: type):
+    """Auto-import all submodules of a given module."""
+    mdir = Path(module.__path__[0])
+    for py_file in mdir.rglob("*.py"):
+        if py_file.name == "__init__.py":
+            continue
+        # Get the path of the submodule relative to the parent module's directory
+        relative_path = py_file.relative_to(mdir)
+        # Create the module import string from the file path
+        # e.g., for a file like `sub/module.py`, this creates `sub.module`
+        module_parts = list(relative_path.parts)
+        module_parts[-1] = relative_path.stem  # remove .py extension
+        relative_module_name = ".".join(module_parts)
+        # Construct the full module path
+        full_module_path = f"{module.__name__}.{relative_module_name}"
+        # Dynamically import the module
+        import_module(full_module_path)
 class LoadTableDataArgs(TypedDict, total=False):
     table: type
     tables: Sequence[type]
@@ -374,7 +398,10 @@ def load_data_from_dir(
 async def replay_events(
-    engine: AsyncEngine, dcg: DirectedComputingGraph, tables: Sequence[type]
+    engine: AsyncEngine,
+    dcg: DirectedComputingGraph,
+    tables: Sequence[type],
+    verbose: bool = False,
 ):
     """Replay the events in the DirectedComputingGraph. This is useful to re-run the workflows
     that computes derived data after initial data loading.
@@ -383,7 +410,12 @@ async def replay_events(
         for tbl in tables:
             innode = f"{tbl.__tablename__}.create"
             for record in tqdm(
-                session.execute(select(tbl)).scalars(),
+                (await session.execute(select(tbl))).scalars(),
                 desc=f"Replaying events for {tbl.__tablename__}",
+                disable=not verbose,
             ):
-                await dcg.execute_async(input={innode: record})
+                await dcg.execute_async(
+                    input={innode: (record,)}, context={"session": session}
+                )
+        await session.commit()