PyPI - edges - Versions diffs - 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl - Mend - Supply Chain Defender

edges 1.0.2py3-none-any.whl → 1.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of edges might be problematic. Click here for more details.

Files changed (66) hide show

edges/flow_matching.py CHANGED Viewed

@@ -5,7 +5,7 @@ from copy import deepcopy
 import json, time
 from typing import NamedTuple, List, Optional
-from .utils import make_hashable, _short_cf, _head
+from edges.utils import make_hashable, _short_cf, _head
 import logging
@@ -56,6 +56,14 @@ def process_cf_list(
     filtered_supplier: dict,
     filtered_consumer: dict,
 ) -> list:
+    """
+    Select the best-matching CF from a candidate list given supplier/consumer filters.
+    :param cf_list: List of candidate CF dictionaries.
+    :param filtered_supplier: Supplier-side fields to match against.
+    :param filtered_consumer: Consumer-side fields to match against.
+    :return: List with the single best CF (or empty if none matched).
+    """
     results = []
     best_score = -1
     best_cf = None
@@ -69,7 +77,7 @@ def process_cf_list(
             criteria=supplier_cf,
         )
-        if supplier_match is False:
+        if not supplier_match:
             continue
         consumer_match = match_flow(
@@ -77,7 +85,7 @@ def process_cf_list(
             criteria=consumer_cf,
         )
-        if consumer_match is False:
+        if not consumer_match:
             continue
         match_score = 0
@@ -98,9 +106,10 @@ def process_cf_list(
         if match_score > best_score:
             best_score = match_score
             best_cf = cf
+            if best_score == 2:
+                break
     if best_cf:
-        logger.debug("Best matching CF selected with score %d: %s", best_score, best_cf)
         results.append(best_cf)
     else:
         logger.debug(
@@ -113,7 +122,13 @@ def process_cf_list(
 def matches_classifications(cf_classifications, dataset_classifications):
-    """Match CF classification codes to dataset classifications."""
+    """
+    Check if CF classification codes match dataset classifications (prefix logic).
+    :param cf_classifications: CF-side classifications (dict or list/tuple).
+    :param dataset_classifications: Dataset classifications as list/tuple pairs.
+    :return: True if at least one scheme/code pair matches by prefix, else False.
+    """
     if isinstance(cf_classifications, dict):
         cf_classifications = [
@@ -148,6 +163,14 @@ def matches_classifications(cf_classifications, dataset_classifications):
 def match_flow(flow: dict, criteria: dict) -> bool:
+    """
+    Match a flow dictionary against criteria with operator and exclude support.
+    :param flow: Flow metadata to test.
+    :param criteria: Matching criteria (fields, operator, excludes, classifications).
+    :return: True if all non-special fields match, else False.
+    """
     operator = criteria.get("operator", "equals")
     excludes = criteria.get("excludes", [])
@@ -215,7 +238,12 @@ def match_operator(value: str, target: str, operator: str) -> bool:
 def normalize_classification_entries(cf_list: list[dict]) -> list[dict]:
+    """
+    Normalize supplier-side 'classifications' to a flat tuple of (scheme, code).
+    :param cf_list: List of CF dictionaries to normalize in-place.
+    :return: The same list with normalized supplier classifications.
+    """
     for cf in cf_list:
         supplier = cf.get("supplier", {})
         classifications = supplier.get("classifications")
@@ -244,8 +272,10 @@ def normalize_classification_entries(cf_list: list[dict]) -> list[dict]:
 def build_cf_index(raw_cfs: list[dict]) -> dict:
     """
-    Build a nested CF index:
-        cf_index[(supplier_loc, consumer_loc)] → list of CFs
+    Build a CF index keyed by (supplier_location, consumer_location).
+    :param raw_cfs: List of CF dictionaries.
+    :return: Dict mapping (supplier_loc, consumer_loc) -> list of CFs.
     """
     index = defaultdict(list)
@@ -262,6 +292,7 @@ def build_cf_index(raw_cfs: list[dict]) -> dict:
 def cached_match_with_index(flow_to_match_hashable, required_fields_tuple):
     flow_to_match = dict(flow_to_match_hashable)
     required_fields = set(required_fields_tuple)
+    # the contexts live on the function as attributes
     return match_with_index(
         flow_to_match,
         cached_match_with_index.index,
@@ -273,9 +304,11 @@ def cached_match_with_index(flow_to_match_hashable, required_fields_tuple):
 def preprocess_flows(flows_list: list, mandatory_fields: set) -> dict:
     """
-    Preprocess flows into a lookup dictionary.
-    Each flow is keyed by a tuple of selected metadata fields.
-    If no fields are present, falls back to a single universal key ().
+    Preprocess flows into a lookup dict keyed by selected metadata fields.
+    :param flows_list: Iterable of flow dicts with at least a 'position' key.
+    :param mandatory_fields: Set of fields to include in the key (may be empty).
+    :return: Dict where key is a tuple of (field, value) and value is list of positions.
     """
     lookup = {}
@@ -328,6 +361,12 @@ def build_index(lookup: dict, required_fields: set) -> dict:
 class MatchResult(NamedTuple):
+    """Result container for indexed matching.
+    :var matches: List of matched positions.
+    :var location_only_rejects: Map of position -> reason ("location").
+    """
     matches: List[int]
     location_only_rejects: dict[int, str]
@@ -339,6 +378,9 @@ def match_with_index(
     required_fields: set,
     reversed_lookup: dict,
 ) -> MatchResult:
+    """
+    Match a flow to positions using a per-field inverted index and full criteria.
+    """
     SPECIAL = {"excludes", "operator", "matrix"}
     nonloc_fields = [f for f in required_fields if f not in SPECIAL and f != "location"]
     has_location_constraint = ("location" in required_fields) and (
@@ -382,14 +424,46 @@ def match_with_index(
         if not keys:
             return []
         out = []
+        # Fast path: no excludes -> everything in these keys already matches
+        excludes = ft_for_matchflow.get("excludes")
+        if not excludes:
+            for key in keys:
+                # lookup_mapping[key] is the list of positions for this composite key
+                bucket = lookup_mapping.get(key)
+                if bucket:
+                    out.extend(bucket)
+            return out
+        # Slow path: excludes present -> filter per-record once
+        # Normalize excludes for faster checks
+        ex = tuple(e.lower() for e in (excludes or ()))
         for key in keys:
-            for pos in lookup_mapping.get(key, []):
+            bucket = lookup_mapping.get(key)
+            if not bucket:
+                continue
+            for pos in bucket:
                 raw = reversed_lookup[pos]
                 flow = dict(raw) if isinstance(raw, tuple) else raw
-                if flow and match_flow(flow, ft_for_matchflow):
-                    out.append(pos)
+                # Only scan string fields; short-circuit early
+                if any(
+                    isinstance(v, str) and any(e in v.lower() for e in ex)
+                    for v in flow.values()
+                ):
+                    continue
+                out.append(pos)
         return out
+    def intersect_smallest_first(sets_iterable):
+        sets_list = [s for s in sets_iterable if s is not None]
+        if not sets_list:
+            return set()
+        acc = min(sets_list, key=len).copy()
+        for s in sorted((x for x in sets_list if x is not acc), key=len):
+            acc &= s
+            if not acc:
+                break
+        return acc
     # --- SPECIAL CASE: only 'location' is required ---
     if not nonloc_fields and has_location_constraint:
         all_keys = set(lookup_mapping.keys())
@@ -413,22 +487,28 @@ def match_with_index(
     # --- NORMAL PATH: there are non-location required fields ---
     if nonloc_fields:
-        pre_location_keys = None
+        # Build candidate key sets per non-location field
+        per_field_sets = []
         for field in nonloc_fields:
             cand = field_candidates(field, flow_to_match.get(field), op)
-            pre_location_keys = (
-                cand if pre_location_keys is None else (pre_location_keys & cand)
-            )
-            if not pre_location_keys:
+            if not cand:
+                # Any empty set means no matches possible
                 return MatchResult(matches=[], location_only_rejects={})
+            per_field_sets.append(cand)
+        # Intersect smallest-first for speed
+        pre_location_keys = intersect_smallest_first(per_field_sets)
+        if not pre_location_keys:
+            return MatchResult(matches=[], location_only_rejects={})
     else:
-        # no required fields at all
+        # no required fields at all → start from all keys
         pre_location_keys = set(lookup_mapping.keys())
-    # apply location last
+    # Apply location as an extra filter (kept separate to preserve location-only diagnostics)
     candidate_keys = pre_location_keys
     if has_location_constraint:
         loc_cand = field_candidates("location", flow_to_match.get("location"), op)
+        # Intersect with location last (fast set op on already reduced key-space)
         candidate_keys = pre_location_keys & loc_cand
     # noloc matches (for diagnosing location-only)
@@ -440,7 +520,7 @@ def match_with_index(
     full_matches = gather_positions(candidate_keys, flow_to_match)
     loc_only = (
-        set(noloc_matches) - set(full_matches) if has_location_constraint else set()
+        (set(noloc_matches) - set(full_matches)) if has_location_constraint else set()
     )
     return MatchResult(
@@ -450,8 +530,17 @@ def match_with_index(
 def compute_cf_memoized_factory(
-    cf_index, required_supplier_fields, required_consumer_fields, weights
+    cf_index, required_supplier_fields, required_consumer_fields
 ):
+    """
+    Factory for a memoized compute_average_cf over signature/location candidates.
+    :param cf_index: CF index keyed by (supplier_loc, consumer_loc).
+    :param required_supplier_fields: Required fields for supplier signature.
+    :param required_consumer_fields: Required fields for consumer signature.
+    :return: Cached function(s_key, c_key, supplier_candidates, consumer_candidates) -> tuple.
+    """
     @lru_cache(maxsize=None)
     def compute_cf(s_key, c_key, supplier_candidates, consumer_candidates):
         return compute_average_cf(
@@ -468,6 +557,14 @@ def compute_cf_memoized_factory(
 def normalize_signature_data(info_dict, required_fields):
+    """
+    Filter and normalize a dict to required fields for signature hashing.
+    :param info_dict: Original supplier/consumer info dict.
+    :param required_fields: Required field names to keep.
+    :return: Filtered dict with normalized 'classifications' if present.
+    """
     filtered = {k: info_dict[k] for k in required_fields if k in info_dict}
     # Normalize classifications
@@ -501,49 +598,76 @@ def normalize_signature_data(info_dict, required_fields):
     return filtered
-@lru_cache(maxsize=None)
+@lru_cache(maxsize=4096)
+def _available_locs_from_weights(weights_key_tuple: tuple, supplier: bool) -> tuple:
+    """
+    Project available locations from a stable weights key.
+    weights_key_tuple is a tuple of (supplier_loc, consumer_loc) pairs.
+    Returns a sorted, de-duplicated tuple of allowed codes for the given side.
+    """
+    if supplier:
+        vals = {w[0] for w in weights_key_tuple}
+    else:
+        vals = {w[1] for w in weights_key_tuple}
+    # Keep deterministic order; don't special-case __ANY__ here
+    return tuple(sorted(vals))
+@lru_cache(maxsize=200_000)
 def resolve_candidate_locations(
     *,
     geo,
     location: str,
-    weights: frozenset,
+    weights: tuple,
     containing: bool = False,
-    exceptions: set = None,
+    exceptions: tuple | None = None,  # <— changed: tuple for caching
     supplier: bool = True,
-) -> list:
+) -> tuple:
     """
-    Resolve candidate consumer locations from a base location.
-    Parameters:
-    - geo: GeoResolver instance
-    - location: base location string (e.g., "GLO", "CH")
-    - weights: valid weight region codes
-    - containing: if True, return regions containing the location;
-                  if False, return regions contained by the location
-    - exceptions: list of regions to exclude (used with GLO fallback)
-    Returns:
-    - list of valid candidate location codes
+    Cached candidate resolver:
+    - derives available locations once per weights_key_tuple + side
+    - filters inside (including dropping 'GLO' when expanding GLO) to avoid extra list comps in hot loops
+    - returns a tuple (hashable, deterministic)
     """
     try:
+        exceptions = list(exceptions) if exceptions else []
         candidates = geo.resolve(
-            location=location,
-            containing=containing,
-            exceptions=exceptions or [],
+            location=location, containing=containing, exceptions=exceptions
         )
     except KeyError:
-        return []
+        return tuple()
+    # When expanding GLO to its contained regions, drop 'GLO' itself here
+    if containing and isinstance(location, str) and location == "GLO":
+        candidates = [c for c in candidates if c != "GLO"]
-    if supplier is True:
-        available_locs = [loc[0] for loc in weights]
+    avail = _available_locs_from_weights(weights, supplier=supplier)
+    # If wildcard is allowed on this side, we don't filter candidates by availability
+    if "__ANY__" in avail:
+        pool = candidates
     else:
-        available_locs = [loc[1] for loc in weights]
-    return [loc for loc in candidates if loc in available_locs]
+        # avail is small; convert to set once for O(1) membership
+        a = set(avail)
+        pool = [loc for loc in candidates if loc in a]
+    # Deterministic ordering across platforms
+    # If you still want 'GLO' first (we dropped it above for GLO-expansion),
+    # keep the same policy for non-GLO locations
+    return tuple(sorted(set(pool)))
 def group_edges_by_signature(
     edge_list, required_supplier_fields, required_consumer_fields
 ):
+    """
+    Group edges by (supplier signature, consumer signature, candidate locations).
+    :param edge_list: Iterable of (s_idx, c_idx, s_info, c_info, s_cands, c_cands).
+    :param required_supplier_fields: Supplier fields required for signature.
+    :param required_consumer_fields: Consumer fields required for signature.
+    :return: Dict[(s_key, c_key, (s_cands, c_cands))] -> list of (s_idx, c_idx).
+    """
     grouped = defaultdict(list)
     for (
@@ -567,12 +691,15 @@ def group_edges_by_signature(
         grouped[(s_key, c_key, loc_key)].append((supplier_idx, consumer_idx))
+        for _k in grouped:
+            grouped[_k].sort()
     return grouped
 def compute_average_cf(
-    candidate_suppliers: list,
-    candidate_consumers: list,
+    candidate_suppliers: list | tuple,
+    candidate_consumers: list | tuple,
     supplier_info: dict,
     consumer_info: dict,
     cf_index: dict,
@@ -580,12 +707,62 @@ def compute_average_cf(
     required_consumer_fields: set = None,
 ) -> tuple[str | float, Optional[dict], Optional[dict]]:
     """
-    Compute weighted CF and a canonical aggregated uncertainty for composite regions.
-    Returns: (expr_or_value, matched_cf_obj|None, agg_uncertainty|None)
+    Compute a weighted CF expression and aggregated uncertainty for composite regions.
+    Deterministic across platforms without deep freezing: we sort by (s_loc, c_loc, cf_signature),
+    where cf_signature is a compact, shallow tuple of stable fields.
     """
-    # Optional timing (only if DEBUG)
     _t0 = time.perf_counter() if logger.isEnabledFor(logging.DEBUG) else None
+    # ---- compact, shallow signatures (no deep recursion) ----
+    # Keep only a few stable fields that define semantics; fall back to repr for odd types.
+    def _cf_signature(cf: dict) -> tuple:
+        # Pull once to locals (avoid many dict.get calls)
+        # Choose a small set of fields that make equal CFs sort adjacent/stably
+        v = cf.get("value")
+        w = cf.get("weight")
+        u = cf.get("unit")
+        sym = cf.get("symbolic")  # expression or None
+        # If there is an explicit identifier, prefer it for stability
+        cfid = cf.get("id") or cf.get("code") or None
+        # Normalize numerics; avoid touching nested dicts/lists
+        try:
+            v_norm = float(v) if isinstance(v, (int, float)) else repr(v)
+        except Exception:
+            v_norm = repr(v)
+        try:
+            w_norm = (
+                float(w)
+                if isinstance(w, (int, float))
+                else (0.0 if w in (None, "", False) else 0.0)
+            )
+        except Exception:
+            w_norm = 0.0
+        return (cfid, v_norm, u or "", bool(sym))
+    def _unc_signature(unc: dict | None) -> tuple:
+        if not unc:
+            return ("",)
+        dist = unc.get("distribution", "")
+        neg = unc.get("negative", None)
+        # Shallow, order-stable snapshot of top-level parameters only
+        params = unc.get("parameters")
+        if isinstance(params, dict):
+            # Only sort top-level keys; values kept as-is (repr) to avoid deep cost
+            par_sig = tuple(sorted((k, repr(params[k])) for k in params.keys()))
+        else:
+            par_sig = repr(params)
+        return (
+            dist,
+            1 if neg in (1, True) else 0 if neg in (0, False) else -1,
+            par_sig,
+        )
+    # ---------- 1) Canonicalize candidate pools (once) ----------
+    if not isinstance(candidate_suppliers, tuple):
+        candidate_suppliers = tuple(set(candidate_suppliers))
+    if not isinstance(candidate_consumers, tuple):
+        candidate_consumers = tuple(set(candidate_consumers))
     if not candidate_suppliers and not candidate_consumers:
         logger.warning(
             "CF-AVG: no candidate locations provided | supplier_cands=%s | consumer_cands=%s",
@@ -594,59 +771,60 @@ def compute_average_cf(
         )
         return 0, None, None
-    # -------- Gate 1: location-key presence in cf_index --------
-    valid_location_pairs = [
-        (s, c)
-        for s in candidate_suppliers
-        for c in candidate_consumers
-        if cf_index.get((s, c))
-    ]
+    S = candidate_suppliers
+    C = candidate_consumers
+    setS, setC = set(S), set(C)
+    # ---------- 2) Efficient valid (s,c) pair discovery ----------
+    idx_keys = cf_index.keys()
+    prod_size = len(S) * len(C)
+    if prod_size and prod_size <= len(idx_keys):
+        valid_location_pairs = [(s, c) for s in S for c in C if (s, c) in cf_index]
+        # S and C are already sorted; this is lexicographically ordered
+    else:
+        valid_location_pairs = [k for k in idx_keys if k[0] in setS and k[1] in setC]
+        valid_location_pairs.sort()
     if not valid_location_pairs:
         if logger.isEnabledFor(logging.DEBUG):
-            # show small sample of what keys do exist for quick diagnosis
-            some_keys = _head(cf_index.keys(), 10)
+            some_keys = _head(idx_keys, 10)
             logger.debug(
                 "CF-AVG: no (supplier,consumer) keys in cf_index for candidates "
                 "| suppliers=%s | consumers=%s | sample_index_keys=%s",
-                _head(candidate_suppliers),
-                _head(candidate_consumers),
+                _head(S),
+                _head(C),
                 some_keys,
             )
         return 0, None, None
-    else:
-        if logger.isEnabledFor(logging.DEBUG):
-            logger.debug(
-                "CF-AVG: %d valid (s,c) keys found (showing up to 10): %s",
-                len(valid_location_pairs),
-                _head(valid_location_pairs, 10),
-            )
-    # Build field-filtered views (exclude location; added per-loop)
-    filtered_supplier = {
+    # ---------- 3) Base, field-filtered views (exclude 'location' here) ----------
+    required_supplier_fields = required_supplier_fields or set()
+    required_consumer_fields = required_consumer_fields or set()
+    base_supplier = {
         k: supplier_info[k]
-        for k in (required_supplier_fields or ())
+        for k in required_supplier_fields
         if k in supplier_info and k != "location"
     }
-    filtered_consumer = {
+    base_consumer = {
         k: consumer_info[k]
-        for k in (required_consumer_fields or ())
+        for k in required_consumer_fields
         if k in consumer_info and k != "location"
     }
-    # -------- Gate 2: field/operator/classification match --------
-    matched = []
+    # ---------- 4) Field/operator/classification match ----------
+    matched: list[tuple[str, str, dict]] = []
     total_candidates_seen = 0
     for s_loc, c_loc in valid_location_pairs:
         cands = cf_index.get((s_loc, c_loc)) or []
         total_candidates_seen += len(cands)
-        filtered_supplier["location"] = s_loc
-        filtered_consumer["location"] = c_loc
+        fs = {**base_supplier, "location": s_loc}
+        fc = {**base_consumer, "location": c_loc}
-        got = process_cf_list(cands, filtered_supplier, filtered_consumer)
-        if logger.isEnabledFor(logging.DEBUG) and got:
+        got = process_cf_list(cands, fs, fc)
+        if got and logger.isEnabledFor(logging.DEBUG):
             logger.debug(
                 "CF-AVG: matched %d/%d CFs @ (%s,%s); example=%s",
                 len(got),
@@ -655,7 +833,8 @@ def compute_average_cf(
                 c_loc,
                 _short_cf(got[0]),
             )
-        matched.extend(got)
+        for cf in got:
+            matched.append((s_loc, c_loc, cf))
     if not matched:
         if logger.isEnabledFor(logging.DEBUG):
@@ -669,37 +848,54 @@ def compute_average_cf(
             )
         return 0, None, None
-    # Weights
-    total_w = sum(cf.get("weight", 0.0) for cf in matched)
-    if total_w == 0:
-        logger.warning(
-            "CF-AVG: weights all zero/missing → using equal shares | matched=%d | example=%s",
-            len(matched),
-            _short_cf(matched[0]) if matched else None,
-        )
-        matched_cfs = [(cf, 1.0 / len(matched)) for cf in matched]
+    # ---------- 5) Deterministic ordering without deep freezing ----------
+    matched.sort(key=lambda t: (t[0], t[1], _cf_signature(t[2])))
+    # ---------- 6) Build and normalize weights ----------
+    # Pull weights once; avoid repeated cf.get in loops
+    weights = []
+    for _s, _c, cf in matched:
+        w = cf.get("weight", 0.0)
+        try:
+            w = float(w)
+        except Exception:
+            w = 0.0
+        if not np.isfinite(w) or w < 0.0:
+            w = 0.0
+        weights.append(w)
+    w_arr = np.asarray(weights, dtype=np.float64)
+    w_sum = float(w_arr.sum(dtype=np.float64))
+    n_m = len(matched)
+    if w_sum <= 0.0:
+        shares = np.full(n_m, 1.0 / n_m, dtype=np.float64)
+        if logger.isEnabledFor(logging.DEBUG):
+            logger.debug(
+                "CF-AVG: weights all zero/missing → using equal shares | matched=%d | example=%s",
+                n_m,
+                _short_cf(matched[0][2]) if matched else None,
+            )
     else:
-        matched_cfs = [(cf, cf.get("weight", 0.0) / total_w) for cf in matched]
-    # Safety check on weights; log before assert explodes
-    share_sum = sum(s for _, s in matched_cfs)
-    if logger.isEnabledFor(logging.DEBUG):
-        logger.debug(
-            "CF-AVG: matched=%d | sum_shares=%.6f | example=%s",
-            len(matched_cfs),
-            share_sum,
-            _short_cf(matched_cfs[0][0]) if matched_cfs else None,
+        shares = w_arr / w_sum
+        # prune tiny contributions to stabilize representation
+        shares = np.where(shares < 1e-4, 0.0, shares)
+        ssum = float(shares.sum(dtype=np.float64))
+        shares = (
+            (shares / ssum) if ssum > 0.0 else np.full(n_m, 1.0 / n_m, dtype=np.float64)
         )
-    assert np.isclose(share_sum, 1.0), f"Total shares must equal 1. Got: {share_sum}"
-    # Build deterministic expression (string)
-    expressions = [f"({share:.3f} * ({cf['value']}))" for cf, share in matched_cfs]
+    # ---------- 7) Expression assembly (uses matched order) ----------
+    # Use shallow value access (no deep repr/formatting)
+    expressions = []
+    for (_s, _c, cf), sh in zip(matched, shares):
+        if sh > 0.0:
+            expressions.append(f"({sh:.4f} * ({cf.get('value')}))")
     expr = " + ".join(expressions)
-    # Single CF shortcut (pass-through uncertainty)
-    if len(matched_cfs) == 1:
-        single_cf = matched_cfs[0][0]
+    # ---------- 8) Single CF shortcut ----------
+    if len(matched) == 1:
+        single_cf = matched[0][2]
         agg_uncertainty = single_cf.get("uncertainty")
         if logger.isEnabledFor(logging.DEBUG):
             dt = (time.perf_counter() - _t0) if _t0 else None
@@ -711,9 +907,10 @@ def compute_average_cf(
             )
         return (expr, single_cf, agg_uncertainty)
-    # Multi-CF aggregated uncertainty
+    # ---------- 9) Aggregate uncertainty (deterministic, shallow) ----------
     def _cf_sign(cf_obj) -> int | None:
-        neg = (cf_obj.get("uncertainty") or {}).get("negative", None)
+        unc = cf_obj.get("uncertainty")
+        neg = None if unc is None else unc.get("negative", None)
         if neg in (0, 1):
             return -1 if neg == 1 else +1
         v = cf_obj.get("value")
@@ -721,54 +918,70 @@ def compute_average_cf(
             return -1 if v < 0 else (+1 if v > 0 else None)
         return None
-    cf_signs = [s for (cf, _sh) in matched_cfs if (s := _cf_sign(cf)) is not None]
+    cf_signs = [_cf_sign(cf) for (_s, _c, cf) in matched]
+    cf_signs = [s for s in cf_signs if s is not None]
     agg_sign = (
         cf_signs[0] if (cf_signs and all(s == cf_signs[0] for s in cf_signs)) else None
     )
     child_values, child_weights = [], []
-    for cf, share in matched_cfs:
-        if share <= 0:
+    for (_s, _c, cf), sh in zip(matched, shares):
+        if sh <= 0.0:
             continue
-        if cf.get("uncertainty") is not None:
-            u = deepcopy(cf["uncertainty"])
-            u["negative"] = 0
-            child_unc = u
+        unc = cf.get("uncertainty")
+        if unc is not None:
+            # Shallow copy of top-level only (keeps nested as-is)
+            child_unc = {
+                k: (dict(v) if isinstance(v, dict) else v) for k, v in unc.items()
+            }
+            child_unc["negative"] = 0
         else:
             v = cf.get("value")
             if isinstance(v, (int, float)):
                 child_unc = {
                     "distribution": "discrete_empirical",
-                    "parameters": {"values": [abs(v)], "weights": [1.0]},
+                    "parameters": {"values": [abs(float(v))], "weights": [1.0]},
                     "negative": 0,
                 }
             else:
+                # symbolic without uncertainty: cannot aggregate deterministically
                 if logger.isEnabledFor(logging.DEBUG):
                     logger.debug(
                         "CF-AVG: skip agg-unc (symbolic child without unc) | child=%s",
                         _short_cf(cf),
                     )
-                return (expr, None, None)
+                return expr, None, None
         child_values.append(child_unc)
-        child_weights.append(float(share))
+        child_weights.append(float(sh))
-    wsum = sum(child_weights) or 1.0
-    child_weights = [w / wsum for w in child_weights]
+    if not child_values:
+        if logger.isEnabledFor(logging.DEBUG):
+            logger.debug("CF-AVG: filtered children empty after cleanup.")
+        return 0, None, None
+    w = np.asarray(child_weights, dtype=np.float64)
+    w = np.clip(w, 0.0, None)
+    wsum = float(w.sum(dtype=np.float64))
+    w = (w / wsum) if wsum > 0.0 else np.full_like(w, 1.0 / len(w), dtype=np.float64)
-    ordering = sorted(
-        range(len(child_values)),
-        key=lambda i: json.dumps(child_values[i], sort_keys=True),
+    # Deterministic order of child uncertainties via shallow signature only
+    order = sorted(
+        range(len(child_values)), key=lambda i: _unc_signature(child_values[i])
     )
-    child_values = [child_values[i] for i in ordering]
-    child_weights = [child_weights[i] for i in ordering]
+    child_values = [child_values[i] for i in order]
+    child_weights = [float(w[i]) for i in order]
+    # Final cleanup
     filtered = [
-        (v, w) for v, w in zip(child_values, child_weights) if w > 0 and v is not None
+        (v, wt)
+        for v, wt in zip(child_values, child_weights)
+        if wt > 0.0 and v is not None
     ]
     if not filtered:
         if logger.isEnabledFor(logging.DEBUG):
-            logger.debug("CF-AVG: filtered children empty after cleanup.")
+            logger.debug("CF-AVG: filtered children empty after cleanup (post-sort).")
         return 0, None, None
     child_values, child_weights = zip(*filtered)
     agg_uncertainty = {
@@ -781,11 +994,12 @@ def compute_average_cf(
     if logger.isEnabledFor(logging.DEBUG):
         dt = (time.perf_counter() - _t0) if _t0 else None
         logger.debug(
-            "CF-AVG: success | children=%d | expr_len=%d | agg_sign=%s | dt=%.3f ms",
+            "CF-AVG: success | children=%d | expr_len=%d | agg_sign=%s | dt=%.3f ms | expr=%s",
             len(child_values),
             len(expr),
             agg_sign,
             (dt * 1000.0) if dt else -1.0,
+            expr,
         )
-    return (expr, None, agg_uncertainty)
+    return expr, None, agg_uncertainty