PyPI - kele - Versions diffs - 0.0.1a1__cp313-cp313-macosx_11_0_arm64.whl - Mend

kele 0.0.1a1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

kele/__init__.py +38 -0
kele/_version.py +1 -0
kele/config.py +243 -0
kele/control/README_metrics.md +102 -0
kele/control/__init__.py +20 -0
kele/control/callback.py +255 -0
kele/control/grounding_selector/__init__.py +5 -0
kele/control/grounding_selector/_rule_strategies/README.md +13 -0
kele/control/grounding_selector/_rule_strategies/__init__.py +24 -0
kele/control/grounding_selector/_rule_strategies/_sequential_strategy.py +42 -0
kele/control/grounding_selector/_rule_strategies/strategy_protocol.py +51 -0
kele/control/grounding_selector/_selector_utils.py +123 -0
kele/control/grounding_selector/_term_strategies/__init__.py +24 -0
kele/control/grounding_selector/_term_strategies/_exhausted_strategy.py +34 -0
kele/control/grounding_selector/_term_strategies/strategy_protocol.py +50 -0
kele/control/grounding_selector/rule_selector.py +98 -0
kele/control/grounding_selector/term_selector.py +89 -0
kele/control/infer_path.py +306 -0
kele/control/metrics.py +357 -0
kele/control/status.py +286 -0
kele/egg_equiv.pyi +11 -0
kele/egg_equiv.so +0 -0
kele/equality/README.md +8 -0
kele/equality/__init__.py +4 -0
kele/equality/_egg_equiv/src/lib.rs +267 -0
kele/equality/_equiv_elem.py +67 -0
kele/equality/_utils.py +36 -0
kele/equality/equivalence.py +141 -0
kele/executer/__init__.py +4 -0
kele/executer/executing.py +139 -0
kele/grounder/README.md +83 -0
kele/grounder/__init__.py +17 -0
kele/grounder/grounded_rule_ds/__init__.py +6 -0
kele/grounder/grounded_rule_ds/_nodes/__init__.py +24 -0
kele/grounder/grounded_rule_ds/_nodes/_assertion.py +353 -0
kele/grounder/grounded_rule_ds/_nodes/_conn.py +116 -0
kele/grounder/grounded_rule_ds/_nodes/_op.py +57 -0
kele/grounder/grounded_rule_ds/_nodes/_root.py +71 -0
kele/grounder/grounded_rule_ds/_nodes/_rule.py +119 -0
kele/grounder/grounded_rule_ds/_nodes/_term.py +390 -0
kele/grounder/grounded_rule_ds/_nodes/_tftable.py +15 -0
kele/grounder/grounded_rule_ds/_nodes/_tupletable.py +444 -0
kele/grounder/grounded_rule_ds/_nodes/_typing_polars.py +26 -0
kele/grounder/grounded_rule_ds/grounded_class.py +461 -0
kele/grounder/grounded_rule_ds/grounded_ds_utils.py +91 -0
kele/grounder/grounded_rule_ds/rule_check.py +373 -0
kele/grounder/grounding.py +118 -0
kele/knowledge_bases/README.md +112 -0
kele/knowledge_bases/__init__.py +6 -0
kele/knowledge_bases/builtin_base/__init__.py +1 -0
kele/knowledge_bases/builtin_base/builtin_concepts.py +13 -0
kele/knowledge_bases/builtin_base/builtin_facts.py +43 -0
kele/knowledge_bases/builtin_base/builtin_operators.py +105 -0
kele/knowledge_bases/builtin_base/builtin_rules.py +14 -0
kele/knowledge_bases/fact_base.py +158 -0
kele/knowledge_bases/ontology_base.py +67 -0
kele/knowledge_bases/rule_base.py +194 -0
kele/main.py +464 -0
kele/py.typed +0 -0
kele/syntax/CONCEPT_README.md +117 -0
kele/syntax/__init__.py +40 -0
kele/syntax/_cnf_converter.py +161 -0
kele/syntax/_sat_solver.py +116 -0
kele/syntax/base_classes.py +1482 -0
kele/syntax/connectives.py +20 -0
kele/syntax/dnf_converter.py +145 -0
kele/syntax/external.py +17 -0
kele/syntax/sub_concept.py +87 -0
kele/syntax/syntacticsugar.py +201 -0
kele-0.0.1a1.dist-info/METADATA +165 -0
kele-0.0.1a1.dist-info/RECORD +73 -0
kele-0.0.1a1.dist-info/WHEEL +6 -0
kele-0.0.1a1.dist-info/licenses/LICENSE +28 -0

kele/control/infer_path.py ADDED Viewed

@@ -0,0 +1,306 @@
+from __future__ import annotations
+import warnings
+from pyvis.network import Network
+from collections import deque
+from typing import TYPE_CHECKING, Literal
+from kele.syntax import Assertion, Formula
+import logging
+from kele.syntax import FACT_TYPE
+if TYPE_CHECKING:
+    from kele.config import RunControlConfig
+    from collections.abc import Sequence
+    from kele.syntax import Rule
+    from kele.equality import Equivalence
+logger = logging.getLogger(__name__)
+# 单个推理步：记录一个事实由哪条规则得到，以及它与前后事实的连接
+class FactStep:
+    """与上游/下游的事实联系的封装，记录事实来源用的"""
+    def __init__(self, content: FACT_TYPE, infer_step: Rule | tuple[Assertion, ...] | None,
+                fact_type: Literal['premise', 'equivalence', 'rule_infer']) -> None:
+        # 当前仅记录等价类推导的“存在性”，不追溯具体等价链路
+        # TODO: 可扩展记录推理深度或来源解释
+        self.fact_type: Literal['premise', 'equivalence', 'rule_infer'] = fact_type  # 事实的类型
+        self.content: FACT_TYPE = content        # 实例化后的事实
+        self.infer_step: Rule | tuple[Assertion, ...] | None = infer_step  # 派生该事实的规则，若由等价关系/同余闭包推导则为tuple，
+        # 若为前提事实则为 None
+        self._next_facts: list[FactStep] = []   # 由当前事实推演出的下游事实
+        self._prev_facts: list[FactStep] = []   # 支撑当前事实的上游事实
+    def add_next(self, fact: FactStep) -> None:
+        """将事实与它帮助推导的下游事实联系起来"""
+        self._next_facts.append(fact)
+    def add_prev(self, fact: FactStep) -> None:
+        """将事实连接到支持它的上游事实"""
+        self._prev_facts.append(fact)
+    @property
+    def next(self) -> tuple[FactStep, ...]:
+        """下游事实"""
+        return tuple(self._next_facts)
+    @property
+    def prev(self) -> tuple[FactStep, ...]:
+        """上游事实"""
+        return tuple(self._prev_facts)
+    @property
+    def step_name(self) -> str:
+        """
+        FactStep的名称，用于打印
+        """
+        if self.fact_type == 'premise':
+            return f"无前提事实：{self.content !s}"
+        if self.fact_type == 'equivalence':
+            return f"等价推出事实：{self.content !s}"
+        return f"规则推导：{self.infer_step !s} 新事实({self.content !s})"
+    def __repr__(self) -> str:  # pragma: no cover
+        rule_name = getattr(self.infer_step, "name", None)
+        return f"FactStep({self.content}, rule={rule_name})"
+    def __hash__(self) -> int:
+        return hash((self.content, self.infer_step))
+    def __eq__(self, other: object) -> bool:
+        return isinstance(other, FactStep) and self.content == other.content and self.infer_step == other.infer_step
+class InferencePath:
+    """
+        存储推理图：
+            1. forward  : (antecedent_fact, rule)  -> [consequent_facts]
+            2. reverse  : (consequent_fact, rule)  -> [antecedent_facts]
+    """
+    def __init__(self, args: RunControlConfig, equivalence: Equivalence) -> None:
+        self._args = args
+        self.fact_factstep_pool: dict[FACT_TYPE, FactStep] = {}
+        self.terminal_step: FactStep | None = None  # 记录最后的终点fact，通常是question对应的fact
+        self.equivalence: Equivalence = equivalence
+        self.initial_facts: set[FACT_TYPE] = set()
+        self._fact_counter = 1
+        self._step_counter = 1
+        self.fact_factid_map: dict[FACT_TYPE, str] = {}
+        self.step_stepid_map: dict[str, str] = {}
+    def _add_initial_facts(self, facts: Sequence[FACT_TYPE] | FACT_TYPE) -> None:
+        """
+        添加初始事实
+        """
+        if isinstance(facts, FACT_TYPE):
+            facts = [facts]
+        for fact in facts:
+            if isinstance(fact, Assertion):
+                self.initial_facts.add(fact)
+                self.initial_facts.add(self._reverse_fact(fact))
+            else:
+                self.initial_facts.add(fact)
+    def _is_validate_none_premise_assertion(self, fact: Assertion) -> bool:
+        """
+        检查一个fact是否是一个合法的前提为None的Assertion
+        以下两种情况前提为None
+        1、fact出现在initial_fact里面
+        2、左右显然相等
+        """
+        return fact in self.initial_facts or fact.lhs == fact.rhs or fact.is_action_assertion
+    def _query_equiv_step(self, fact: FACT_TYPE) -> FactStep:
+        """
+        获取一个fact的推理路径，总共由三个可能：
+        1. 它是由等价关系推出来的
+        2. 它是由规则推出来的
+        3. 它是一个前提事实
+        最后都会返回一个FactStep
+        :param fact: 待检查的事实
+        :type fact: FACT_TYPE
+        :raises RuntimeError: 若等价关系处理器未设置
+        :return: 若fact是由等价关系推出来的，则返回等价关系的FactStep，否则返回None
+        :rtype: FactStep
+        """  # noqa: DOC501
+        if fact in self.fact_factstep_pool:
+            # fact 已记录过推理路径，直接复用
+            return self.fact_factstep_pool[fact]
+        if isinstance(fact, Assertion) and self._is_validate_none_premise_assertion(fact):
+            # fact是一个前提事实
+            return FactStep(fact, None, 'premise')
+        # Assertion的factstep需要考虑是否是等价关系推导出来的，但是Formula类型的（实质只可能为NOT Assertion）则不需要
+        if isinstance(fact, Formula) and isinstance(fact.formula_left, Assertion) and fact.connective == 'NOT':
+            # NOT Assertion类型的Fact不需要考虑等价关系
+            # 在正常情况下，它自然是成立的前提事实，否则是不可能推理出结果的
+            return FactStep(fact, None, 'premise')
+        if isinstance(fact, Formula):
+            raise TypeError(
+                "Rule premises cannot contain connectives other than AND and NOT. "
+                "This error may come from CNF_convert."
+            )
+        if self.equivalence is None:
+            raise RuntimeError(
+                "Equivalence handler is not set; cannot properly record inference paths for equivalence facts."
+            )
+        if self.equivalence.query_equivalence(fact):
+            fact_step = FactStep(fact, None, 'equivalence')  # HACK：暂时不详细处理等价关系推出的事实，
+            # 后续需要获取等价关系的解释
+            self.fact_factstep_pool[fact] = fact_step
+            self.fact_factstep_pool[self._reverse_fact(fact)] = fact_step  # 对称事实也要记录进去
+            return fact_step
+        raise ValueError(f"Fact {fact!s} is not true; cannot record inference path.")
+    @staticmethod
+    def _reverse_fact(fact: FACT_TYPE) -> FACT_TYPE:
+        if isinstance(fact, Assertion):
+            return Assertion.from_parts(fact.rhs, fact.lhs)
+        return fact
+    def add_infer_edge(self,
+                       consequent: FACT_TYPE,  # FIXME: 这里得缩减为Assertion
+                       antecedents: list[FACT_TYPE] | None = None,
+                       grounded_rule: Rule | None = None,
+                    ) -> None:
+        """
+        录入一条推理边：多前提 → 单结论
+        :param antecedents: 对应某条rule的前提，不过已经实例化过了
+        :param consequent: 对应规则后件的实例化结果
+        :param grounded_rule: 触发推理的规则
+        :return: None
+        """
+        if antecedents is None:
+            return self._add_initial_facts(consequent)
+        if consequent in self.fact_factstep_pool:
+            # 事实已经存在，推理路径默认保留一条即可
+            # TODO: 可选保留多条推理路径
+            return None
+        # 记录结论的推理路径
+        conse_step = FactStep(consequent, grounded_rule, 'rule_infer')
+        self.fact_factstep_pool[consequent] = conse_step
+        self.fact_factstep_pool[self._reverse_fact(consequent)] = conse_step  # 对称事实也要记录进去
+        for fact in antecedents:
+            factstep = self._query_equiv_step(fact)
+            self.fact_factstep_pool[fact] = factstep
+            self.fact_factstep_pool[self._reverse_fact(fact)] = factstep  # 对称事实也要记录进去
+            factstep.add_next(conse_step)
+            conse_step.add_prev(factstep)
+        return None
+    def add_terminal_status(self, termnial_fact: FACT_TYPE) -> None:
+        """记录终点事实"""
+        try:
+            self.terminal_step = self._query_equiv_step(termnial_fact)  # termnimal_step也要考虑由等价关系推出的可能
+        except ValueError:
+            warnings.warn(f"Terminal fact {termnial_fact!s} is trivially true.", stacklevel=1)
+            self.terminal_step = None
+    @staticmethod
+    def _print_log_info(prev_fact_steps: list[FactStep], infer_path: deque[FactStep], terminal_fact: FACT_TYPE) -> None:
+        logger.info("================Premise facts:=================")
+        for prev_fact_counter, fact_step in enumerate(prev_fact_steps):
+            logger.info("%d. %s", prev_fact_counter + 1, fact_step.step_name)  # FIXME: 这里的注释有点奇怪，一个数字
+            # 一个name。博洋改到这里的时候留意一下，反正你的infer最近要动，我就不细究了
+        logger.info("================Inference path:=================")
+        for infer_fact_counter, fact_step in enumerate(infer_path):
+            logger.info("step %d: %s", infer_fact_counter + 1, fact_step.step_name)
+        logger.info("================Terminal fact:=================")
+        logger.info("Terminal fact: %s", terminal_fact)
+    def get_infer_graph(self, terminal_fact: FACT_TYPE | None = None) -> tuple[list[FactStep], FACT_TYPE | None]:
+        """
+        获得推理路径
+        :param terminal_fact: 推理的终点事实，默认是question对应的fact
+        :return: 推理路径，终点事实
+        """
+        if not self._args.trace:
+            warnings.warn("Inference path tracing is disabled; cannot print inference path.", stacklevel=5)
+            return [], None
+        terminal_step = self.terminal_step if terminal_fact is None else self._query_equiv_step(terminal_fact)
+        infered: set[FactStep] = set()
+        if terminal_step is not None:
+            infer_path: deque[FactStep] = deque()
+            prev_fact_steps: list[FactStep] = []
+            cur_fact_queue = deque([terminal_step])
+            while cur_fact_queue:
+                cur_fact_step = cur_fact_queue.popleft()
+                if cur_fact_step in infered:  # 推出的事实不再重复推导
+                    continue
+                infered.add(cur_fact_step)
+                if cur_fact_step.fact_type != 'premise':
+                    infer_path.appendleft(cur_fact_step)
+                else:
+                    # 前提事实全部在第一步展示
+                    # 这里的前提事实指的是原始Premises中真正被用于推理的那些前提事实
+                    prev_fact_steps.append(cur_fact_step)
+                if cur_fact_step.infer_step is not None:
+                    cur_fact_queue.extend(cur_fact_step.prev)
+            self._print_log_info(prev_fact_steps, infer_path, terminal_step.content)
+            prev_fact_steps.extend(infer_path)  # 将分开的两个集合合并起来返回，此时顺序已经被确定下来
+            return prev_fact_steps, terminal_step.content
+        warnings.warn("Inference engine could not derive a result, or the terminal fact is trivially true.", stacklevel=1)
+        return [], None
+    def _get_fact_id(self, fact: FACT_TYPE, net: Network) -> str:
+        if fact not in self.fact_factid_map:
+            self.fact_factid_map[fact] = f"fact{self._fact_counter}"
+            self._fact_counter += 1
+            net.add_node(self.fact_factid_map[fact], label=self.fact_factid_map[fact], title=str(fact))
+        return self.fact_factid_map[fact]
+    def _get_step_id(self, step_name: str, net: Network) -> str:
+        if step_name not in self.step_stepid_map:
+            self.step_stepid_map[step_name] = f"step{self._step_counter}"
+            self._step_counter += 1
+            net.add_node(self.step_stepid_map[step_name], label=self.step_stepid_map[step_name], title=str(step_name), shape="square", color="red")
+        return self.step_stepid_map[step_name]
+    def gennerate_infer_path_graph(self, infer_path: list[FactStep], terminal_fact: FACT_TYPE | None = None) -> None:
+        """
+        生成推理路径的图
+        :param infer_path: 推理路径
+        :param terminal_fact: 推理的终点事实，默认是question对应的fact
+        :return: None
+        """
+        net = Network(height="600px", width="100%", bgcolor="#ffffff", font_color="black")
+        self._fact_counter = 1
+        self._step_counter = 1
+        self.fact_factid_map.clear()
+        self.step_stepid_map.clear()
+        if terminal_fact is not None:
+            self.fact_factid_map[terminal_fact] = "terminal_fact"
+            net.add_node("terminal_fact", label="终点事实", shape="star", color="red")
+        for fact_step in infer_path:
+            cur_fact_id = self._get_fact_id(fact_step.content, net)
+            cur_step_id = self._get_step_id(fact_step.step_name, net) if fact_step.infer_step is not None else None
+            if cur_step_id is not None:
+                for fact in fact_step.prev:
+                    prev_fact_id = self._get_fact_id(fact.content, net)
+                    net.add_edge(prev_fact_id, cur_step_id, label="前提", color="blue")
+                net.add_edge(cur_step_id, cur_fact_id, label="结论", color="red", arrows="to")
+            else:
+                for nodes in net.nodes:
+                    if nodes["id"] == cur_fact_id:
+                        nodes["label"] = "无前提事实"
+                        nodes["shape"] = "triangle"
+                        nodes["color"] = "green"
+        net.save_graph("infer_path.html")
+    def reset(self) -> None:
+        """重置推理路径"""
+        self.fact_factstep_pool.clear()
+        self.terminal_step = None
+        self.initial_facts.clear()

kele/control/metrics.py ADDED Viewed

@@ -0,0 +1,357 @@
+# metrics_typed.py
+# pip install prometheus-client psutil
+from __future__ import annotations
+import functools
+import json
+import os
+import time
+import uuid
+import warnings
+from dataclasses import dataclass, field
+from datetime import datetime, UTC
+from pathlib import Path
+from typing import Any, ParamSpec, TypeVar, TYPE_CHECKING, Self
+import psutil
+from prometheus_client import (
+    CollectorRegistry,
+    Counter,
+    Gauge,
+    Histogram,
+    push_to_gateway,
+    start_http_server,
+)
+import logging
+if TYPE_CHECKING:
+    from collections.abc import Callable
+    from collections.abc import Mapping
+logger = logging.getLogger(__name__)
+# 涉及到的主要metrics包括：cpu_percent: float; rss_mib: float; count: int; module: str; phase: str; duration_seconds: float
+P = ParamSpec("P")
+R = TypeVar("R")
+type JSONValue = bool | int | float | str | list[JSONValue] | dict[str, JSONValue] | None
+type JSONObject = dict[str, JSONValue]
+__all__ = [
+    "PhaseTimer",
+    "RunRecorder",
+    "end_run",
+    "inc_iter",
+    "init_metrics",
+    "maybe_push",
+    "measure",
+    "observe_counts",
+    "sample_process_gauges",
+    "start_run",
+]
+def _now_iso() -> str:
+    """此刻时间"""
+    return datetime.now(UTC).astimezone().isoformat(timespec="seconds")
+def _bytes_to_mib(n: int) -> float:
+    return n / (1024 * 1024)
+class RunRecorder:
+    """
+    把一次推理运行的关键过程与资源信息写入 JSON（metrics_logs/<run_id>.json）。
+    - 使用 event 记录任意事件（统一用 timestamp、event 命名）。
+    - 使用 observe_cpu_mem 记录 CPU/内存采样点（ cpu_percent 、 rss_mib ）。
+    - 调用 end 收尾并记录（ started_at / ended_at 、峰值/均值等）。
+    """
+    def __init__(self, log_dir: str = "metrics_logs", run_id: str | None = None) -> None:
+        """
+        初始化一个运行记录器。
+        """
+        Path(log_dir).mkdir(parents=True, exist_ok=True)
+        self.run_id: str = run_id or (time.strftime("%Y%m%d-%H%M%S-") + uuid.uuid1().hex[:6])
+        self.log_dir: str = log_dir
+        self.meta: dict[str, Any] = {
+            "run_id": self.run_id,
+            "started_at": _now_iso(),  # ISO 8601
+        }
+        self.events: list[JSONObject] = []
+        self._cpu_percent_peaks: list[float] = []
+        self._rss_mib_peaks: list[float] = []
+        self._phase_totals: dict[tuple[str, str], float] = {}
+        self._func_totals: dict[tuple[str, str], float] = {}
+    def event(self, kind: str, /, **kwargs: Any) -> None:  # noqa: ANN401
+        """记录一条通用事件"""
+        self.events.append({"timestamp": _now_iso(), "event": kind, **kwargs})
+    def observe_cpu_mem(self, cpu_pct: float, rss_bytes: int) -> None:
+        """记录一次 CPU(%) 与 RSS(bytes) 采样，并存为事件（MiB 化）"""
+        cpu_percent = cpu_pct
+        rss_mib = _bytes_to_mib(rss_bytes)
+        self._cpu_percent_peaks.append(cpu_percent)
+        self._rss_mib_peaks.append(rss_mib)
+        self.event("process_sample", cpu_percent=cpu_percent, rss_mib=rss_mib)
+    def end(self, extra_meta: dict[str, Any] | None = None) -> str:
+        """
+        结束当前运行，汇总峰值指标并将记录保存。
+        """
+        self.meta["ended_at"] = _now_iso()
+        if extra_meta:
+            self.meta.update(extra_meta)
+        if self._cpu_percent_peaks:
+            self.meta["cpu_percent_max"] = max(self._cpu_percent_peaks)
+            self.meta["cpu_percent_mean"] = sum(self._cpu_percent_peaks) / len(self._cpu_percent_peaks)
+        if self._rss_mib_peaks:
+            self.meta["rss_max_mib"] = max(self._rss_mib_peaks)
+        if self._phase_totals:
+            self.meta["phase_durations_seconds_total"] = [
+                {"module": m, "phase": p, "duration_seconds_total": t}
+                for (m, p), t in sorted(self._phase_totals.items())
+            ]
+            self.meta["all_phases_duration_seconds_total"] = sum(self._phase_totals.values())
+        else:
+            self.meta["all_phases_duration_seconds_total"] = "no running time"
+        if self._func_totals:
+            self.meta["function_durations_seconds_total"] = [
+                {"module": m, "name": n, "duration_seconds_total": t}
+                for (m, n), t in sorted(self._func_totals.items())
+            ]
+            self.meta["all_functions_duration_seconds_total"] = sum(self._func_totals.values())
+        path = str(Path(self.log_dir) / f"{self.run_id}.json")
+        with open(path, "w", encoding="utf-8") as f:
+            json.dump({"meta": self.meta, "events": self.events}, f, ensure_ascii=False, indent=4)
+        logger.result("Elapsed time: %ss", self.meta['all_phases_duration_seconds_total'])  # type: ignore[attr-defined]
+        return path
+    def add_phase_duration(self, module: str, phase: str, seconds: float) -> None:
+        """记录一次 phase 的持续时间"""
+        key = (module, phase)
+        self._phase_totals[key] = self._phase_totals.get(key, 0.0) + seconds
+    def add_func_duration(self, module: str, name: str, seconds: float) -> None:
+        """记录一次函数的持续时间（用于 JSON 汇总）"""
+        key = (module, name)
+        self._func_totals[key] = self._func_totals.get(key, 0.0) + seconds
+# --------- 把所有“全局变量”折叠进一个 State 对象，避免 global 赋值（PLW0603） ---------
+@dataclass
+class _State:
+    registry: CollectorRegistry | None = None
+    pushgateway: str | None = None
+    job: str = "al_inference"
+    grouping: dict[str, str] = field(default_factory=dict)
+    proc: psutil.Process = field(default_factory=lambda: psutil.Process(os.getpid()))
+    # metrics
+    h_func_lat: Histogram | None = None
+    h_phase_lat: Histogram | None = None
+    g_rss: Gauge | None = None
+    g_cpu_pct: Gauge | None = None
+    c_iter: Counter | None = None
+    h_grounded_rules: Histogram | None = None
+    h_facts_count: Histogram | None = None
+    # run recorder
+    run: RunRecorder | None = None
+STATE = _State()
+def _new_hist(
+    name: str,
+    help_: str,
+    buckets: tuple[float, ...] | None = None,
+    labels: tuple[str, ...] = (),
+) -> Histogram:
+    """构造一个直方图指标（要求先调用 init_metrics）"""
+    if buckets is None:
+        buckets = (0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10, float("inf"))
+    return Histogram(name, help_, labels, buckets=buckets, registry=STATE.registry)
+# ---- 需要调取的函数 ----
+def start_run(log_dir: str = "metrics_logs", run_id: str | None = None) -> str:
+    """在一次完整推理前调用；初始化 RunRecorder 并返回 run_id"""
+    STATE.run = RunRecorder(log_dir=log_dir, run_id=run_id)
+    return STATE.run.run_id
+def end_run(extra_meta: Mapping[str, JSONValue] | None = None) -> str | None:
+    """在一次完整推理结束后调用；写入并返回 JSON 路径。若未 start_run 则返回 None"""
+    if STATE.run:
+        return STATE.run.end(dict(extra_meta) if extra_meta is not None else None)
+    return None
+def init_metrics(
+    port: int | None = None,
+    pushgateway: str | None = None,
+    job: str = "al_inference",
+    grouping: Mapping[str, str] | None = None,
+) -> None:
+    """
+    初始化 Prometheus 指标。
+    - 批处理：不指定   port  ，用 Pushgateway 推送。
+    - 本地开发：指定   port  （如 8000），直接被 Prometheus scrape。
+    上述两个是常用功能，不过我们都默认存储到json了，这两个基本没啥影响
+    """
+    STATE.registry = CollectorRegistry()
+    STATE.pushgateway = pushgateway
+    STATE.job = job
+    STATE.grouping = dict(grouping or {})
+    STATE.h_func_lat = _new_hist("func_latency_seconds", "Function/phase latency", labels=("module", "name"))
+    STATE.h_phase_lat = _new_hist("phase_latency_seconds", "Inference phase latency", labels=("module", "phase"))
+    STATE.g_rss = Gauge("process_rss_bytes", "Process RSS bytes", registry=STATE.registry)
+    STATE.g_cpu_pct = Gauge("process_cpu_percent", "Process CPU percent", registry=STATE.registry)
+    STATE.c_iter = Counter("inference_iterations_total", "Total inference iterations", ["module"], registry=STATE.registry)
+    STATE.h_grounded_rules = _new_hist("grounded_rules_count", "Grounded rules per iteration")
+    STATE.h_facts_count = _new_hist("facts_count_snapshot", "Facts count snapshot")
+    # 由于过高的时间开销而移除。以后如果对内存敏感是，再考虑参数控制或者换其他的 tracemalloc.start()
+    if port:
+        start_http_server(port, registry=STATE.registry)
+def maybe_push() -> None:
+    """若配置了 Pushgateway，则推送当前注册表中的指标。注：由于我个人倾向于json记录，此函数并未被引擎仓库使用，但不妨保留"""
+    if STATE.pushgateway and STATE.registry:
+        push_to_gateway(
+            STATE.pushgateway,
+            job=STATE.job,
+            registry=STATE.registry,
+            grouping_key=STATE.grouping,
+        )
+def sample_process_gauges() -> None:
+    """
+    采样一次进程 RSS/CPU 指标，写入对应 Gauge，并记录至运行日志（若有）。
+    需要先调用 init_metrics  初始化 Gauge。
+    """
+    if STATE.g_rss is None or STATE.g_cpu_pct is None:
+        warnings.warn("Gauges not initialized, skipping sample_process_gauges", stacklevel=2)
+        return
+    rss: int = STATE.proc.memory_info().rss
+    STATE.g_rss.set(rss)
+    cpu: float = STATE.proc.cpu_percent(interval=None)
+    STATE.g_cpu_pct.set(cpu)
+    if STATE.run:
+        STATE.run.observe_cpu_mem(cpu_pct=cpu, rss_bytes=rss)
+def measure(name: str, module: str | None = None, *, skip_process_gauges: bool = True,
+            skip_envent_record: bool = True) -> Callable[[Callable[P, R]], Callable[P, R]]:
+    """
+    装饰器/上下文：记录函数耗时并采样一次进程指标。
+    Examples
+    --------
+    >>> @measure("step", module="pipeline")
+    ... def work(x: int) -> int:
+    ...     return x * 2
+    """
+    resolved_module = module or __name__
+    def _decor(f: Callable[P, R]) -> Callable[P, R]:
+        @functools.wraps(f)
+        def _wrap(*a: P.args, **k: P.kwargs) -> R:
+            t0 = time.perf_counter()
+            try:
+                return f(*a, **k)
+            finally:
+                dt = time.perf_counter() - t0
+                if STATE.h_func_lat is not None:
+                    STATE.h_func_lat.labels(module=resolved_module, name=name).observe(dt)
+                if not skip_process_gauges:
+                    sample_process_gauges()
+                if STATE.run:
+                    STATE.run.add_func_duration(resolved_module, name, dt)
+                    if not skip_envent_record:
+                        STATE.run.event("func_timing", module=resolved_module, name=name, duration_seconds=dt)
+        return _wrap
+    return _decor
+class PhaseTimer:
+    """
+    上下文管理器：用于手动分段计时并采样进程指标。
+    使用示例::
+        with PhaseTimer("retrieve", module="pipeline"):
+            do_retrieve()
+    """
+    def __init__(self, phase: str, module: str | None = None, *, skip_process_gauges: bool = True,
+                 skip_envent_record: bool = True, skip_count_record: bool = True) -> None:
+        self.phase: str = phase
+        self.module: str = module or __name__
+        self.t0: float | None = None
+        self.skip_process_gauges: bool = skip_process_gauges
+        self.skip_envent_record: bool = skip_envent_record
+    def __enter__(self) -> Self:
+        self.t0 = time.perf_counter()
+        return self
+    def __exit__(self, exc_type, exc, tb) -> None:  # type: ignore[no-untyped-def]  # noqa: ANN001
+        if self.t0 is None:
+            return
+        dt = time.perf_counter() - self.t0
+        if STATE.h_phase_lat is not None:
+            STATE.h_phase_lat.labels(module=self.module, phase=self.phase).observe(dt)
+        if not self.skip_process_gauges:
+            sample_process_gauges()
+        if STATE.run:
+            STATE.run.add_phase_duration(self.module, self.phase, dt)
+            if not self.skip_envent_record:
+                STATE.run.event("phase_timing", module=self.module, phase=self.phase, duration_seconds=dt)
+        return
+def observe_counts(grounded_rules: int | None = None, facts_count: int | None = None) -> None:
+    """
+    记录离散计数类指标（例如每次 grounding 的规则数、事实库快照大小）。
+    若未初始化相应的直方图，调用将被忽略。
+    """
+    if grounded_rules is not None and STATE.h_grounded_rules is not None:
+        STATE.h_grounded_rules.observe(float(grounded_rules))
+        if STATE.run:
+            STATE.run.event("grounded_rules", count=grounded_rules)
+    if facts_count is not None and STATE.h_facts_count is not None:
+        STATE.h_facts_count.observe(float(facts_count))
+        if STATE.run:
+            STATE.run.event("facts_count", count=facts_count)
+def inc_iter(module: str) -> None:
+    """将指定   module   的推理迭代次数自增 1。未初始化则忽略。"""
+    if STATE.c_iter is None:
+        return
+    STATE.c_iter.labels(module=module).inc()