kele 0.0.1a1__cp313-cp313-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kele/__init__.py +38 -0
- kele/_version.py +1 -0
- kele/config.py +243 -0
- kele/control/README_metrics.md +102 -0
- kele/control/__init__.py +20 -0
- kele/control/callback.py +255 -0
- kele/control/grounding_selector/__init__.py +5 -0
- kele/control/grounding_selector/_rule_strategies/README.md +13 -0
- kele/control/grounding_selector/_rule_strategies/__init__.py +24 -0
- kele/control/grounding_selector/_rule_strategies/_sequential_strategy.py +42 -0
- kele/control/grounding_selector/_rule_strategies/strategy_protocol.py +51 -0
- kele/control/grounding_selector/_selector_utils.py +123 -0
- kele/control/grounding_selector/_term_strategies/__init__.py +24 -0
- kele/control/grounding_selector/_term_strategies/_exhausted_strategy.py +34 -0
- kele/control/grounding_selector/_term_strategies/strategy_protocol.py +50 -0
- kele/control/grounding_selector/rule_selector.py +98 -0
- kele/control/grounding_selector/term_selector.py +89 -0
- kele/control/infer_path.py +306 -0
- kele/control/metrics.py +357 -0
- kele/control/status.py +286 -0
- kele/egg_equiv.pyd +0 -0
- kele/egg_equiv.pyi +11 -0
- kele/equality/README.md +8 -0
- kele/equality/__init__.py +4 -0
- kele/equality/_egg_equiv/src/lib.rs +267 -0
- kele/equality/_equiv_elem.py +67 -0
- kele/equality/_utils.py +36 -0
- kele/equality/equivalence.py +141 -0
- kele/executer/__init__.py +4 -0
- kele/executer/executing.py +139 -0
- kele/grounder/README.md +83 -0
- kele/grounder/__init__.py +17 -0
- kele/grounder/grounded_rule_ds/__init__.py +6 -0
- kele/grounder/grounded_rule_ds/_nodes/__init__.py +24 -0
- kele/grounder/grounded_rule_ds/_nodes/_assertion.py +353 -0
- kele/grounder/grounded_rule_ds/_nodes/_conn.py +116 -0
- kele/grounder/grounded_rule_ds/_nodes/_op.py +57 -0
- kele/grounder/grounded_rule_ds/_nodes/_root.py +71 -0
- kele/grounder/grounded_rule_ds/_nodes/_rule.py +119 -0
- kele/grounder/grounded_rule_ds/_nodes/_term.py +390 -0
- kele/grounder/grounded_rule_ds/_nodes/_tftable.py +15 -0
- kele/grounder/grounded_rule_ds/_nodes/_tupletable.py +444 -0
- kele/grounder/grounded_rule_ds/_nodes/_typing_polars.py +26 -0
- kele/grounder/grounded_rule_ds/grounded_class.py +461 -0
- kele/grounder/grounded_rule_ds/grounded_ds_utils.py +91 -0
- kele/grounder/grounded_rule_ds/rule_check.py +373 -0
- kele/grounder/grounding.py +118 -0
- kele/knowledge_bases/README.md +112 -0
- kele/knowledge_bases/__init__.py +6 -0
- kele/knowledge_bases/builtin_base/__init__.py +1 -0
- kele/knowledge_bases/builtin_base/builtin_concepts.py +13 -0
- kele/knowledge_bases/builtin_base/builtin_facts.py +43 -0
- kele/knowledge_bases/builtin_base/builtin_operators.py +105 -0
- kele/knowledge_bases/builtin_base/builtin_rules.py +14 -0
- kele/knowledge_bases/fact_base.py +158 -0
- kele/knowledge_bases/ontology_base.py +67 -0
- kele/knowledge_bases/rule_base.py +194 -0
- kele/main.py +464 -0
- kele/py.typed +0 -0
- kele/syntax/CONCEPT_README.md +117 -0
- kele/syntax/__init__.py +40 -0
- kele/syntax/_cnf_converter.py +161 -0
- kele/syntax/_sat_solver.py +116 -0
- kele/syntax/base_classes.py +1482 -0
- kele/syntax/connectives.py +20 -0
- kele/syntax/dnf_converter.py +145 -0
- kele/syntax/external.py +17 -0
- kele/syntax/sub_concept.py +87 -0
- kele/syntax/syntacticsugar.py +201 -0
- kele-0.0.1a1.dist-info/METADATA +166 -0
- kele-0.0.1a1.dist-info/RECORD +74 -0
- kele-0.0.1a1.dist-info/WHEEL +4 -0
- kele-0.0.1a1.dist-info/licenses/LICENSE +28 -0
- kele-0.0.1a1.dist-info/licenses/licensecheck.json +20 -0
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
from typing import TypeGuard
|
|
2
|
+
|
|
3
|
+
from kele.syntax import Operator, Constant, Variable, FlatCompoundTerm
|
|
4
|
+
from kele.knowledge_bases.builtin_base.builtin_concepts import COMPLEX_NUMBER_CONCEPT, EQUATION_CONCEPT
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _is_constant(x: Constant | Variable) -> TypeGuard[Constant]:
|
|
8
|
+
return isinstance(x, Constant)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _unpack2_numbers(term: FlatCompoundTerm): # type: ignore[no-untyped-def] # noqa: ANN202
|
|
12
|
+
a0, a1 = term.arguments
|
|
13
|
+
if not _is_constant(a0) or not _is_constant(a1):
|
|
14
|
+
raise TypeError(f"This operator expects Constant arguments, got{[str(a0), str(a1)]} .")
|
|
15
|
+
return a0.symbol, a1.symbol
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _unpack1_number(term: FlatCompoundTerm): # type: ignore[no-untyped-def] # noqa: ANN202
|
|
19
|
+
a0 = term.arguments[0]
|
|
20
|
+
if not _is_constant(a0):
|
|
21
|
+
raise TypeError("This operator expects a Constant argument, not a Variable.")
|
|
22
|
+
return a0.symbol
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _plus(term: FlatCompoundTerm) -> Constant:
|
|
26
|
+
v0, v1 = _unpack2_numbers(term)
|
|
27
|
+
return Constant(v0 + v1, COMPLEX_NUMBER_CONCEPT)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
arithmetic_plus_op = Operator(
|
|
31
|
+
name="arithmetic_plus_op",
|
|
32
|
+
input_concepts=[COMPLEX_NUMBER_CONCEPT, COMPLEX_NUMBER_CONCEPT],
|
|
33
|
+
output_concept=COMPLEX_NUMBER_CONCEPT,
|
|
34
|
+
implement_func=_plus,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _minus(term: FlatCompoundTerm) -> Constant:
|
|
39
|
+
v0, v1 = _unpack2_numbers(term)
|
|
40
|
+
return Constant(v0 - v1, COMPLEX_NUMBER_CONCEPT)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
arithmetic_minus_op = Operator(
|
|
44
|
+
name="arithmetic_minus_op",
|
|
45
|
+
input_concepts=[COMPLEX_NUMBER_CONCEPT, COMPLEX_NUMBER_CONCEPT],
|
|
46
|
+
output_concept=COMPLEX_NUMBER_CONCEPT,
|
|
47
|
+
implement_func=_minus,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _times(term: FlatCompoundTerm) -> Constant:
|
|
52
|
+
v0, v1 = _unpack2_numbers(term)
|
|
53
|
+
return Constant(v0 * v1, COMPLEX_NUMBER_CONCEPT)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
arithmetic_times_op = Operator(
|
|
57
|
+
name="arithmetic_times_op",
|
|
58
|
+
input_concepts=[COMPLEX_NUMBER_CONCEPT, COMPLEX_NUMBER_CONCEPT],
|
|
59
|
+
output_concept=COMPLEX_NUMBER_CONCEPT,
|
|
60
|
+
implement_func=_times,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _divide(term: FlatCompoundTerm) -> Constant:
|
|
65
|
+
v0, v1 = _unpack2_numbers(term)
|
|
66
|
+
if v1 == 0:
|
|
67
|
+
raise ZeroDivisionError("Division by zero in arithmetic_divide_op.")
|
|
68
|
+
return Constant(v0 / v1, COMPLEX_NUMBER_CONCEPT)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
arithmetic_divide_op = Operator(
|
|
72
|
+
name="arithmetic_divide_op",
|
|
73
|
+
input_concepts=[COMPLEX_NUMBER_CONCEPT, COMPLEX_NUMBER_CONCEPT],
|
|
74
|
+
output_concept=COMPLEX_NUMBER_CONCEPT,
|
|
75
|
+
implement_func=_divide,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _negate(term: FlatCompoundTerm) -> Constant:
|
|
80
|
+
v0 = _unpack1_number(term)
|
|
81
|
+
return Constant(-v0, COMPLEX_NUMBER_CONCEPT)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
arithmetic_negate_op = Operator(
|
|
85
|
+
name="arithmetic_negate_op",
|
|
86
|
+
input_concepts=[COMPLEX_NUMBER_CONCEPT],
|
|
87
|
+
output_concept=COMPLEX_NUMBER_CONCEPT,
|
|
88
|
+
implement_func=_negate,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
get_arithmetic_equation_op = Operator("get_arithmetic_equation_op", [COMPLEX_NUMBER_CONCEPT, COMPLEX_NUMBER_CONCEPT], EQUATION_CONCEPT)
|
|
92
|
+
|
|
93
|
+
# Example Operators
|
|
94
|
+
example_operator_1 = Operator(
|
|
95
|
+
name="parent_example",
|
|
96
|
+
input_concepts=['Person_Example', 'Person_Example'], # example_concept_1在builtin_concepts.py中定义
|
|
97
|
+
# 我们在此处模拟“使用者自行声明本体的场景”,且concept和operator分别放置于两个文件内。此时系统无法直接定位到builtin_concepts.py
|
|
98
|
+
# 的父目录,所以要么使用者使用字符串使用concept,要么需要自行控制sys.path或concept相关声明文件的位置,使得导入可以顺利进行。
|
|
99
|
+
output_concept='Person_Example',
|
|
100
|
+
)
|
|
101
|
+
example_operator_2 = Operator(
|
|
102
|
+
name="color_of_example",
|
|
103
|
+
input_concepts=['Object_Example'],
|
|
104
|
+
output_concept='Color_Example',
|
|
105
|
+
)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from kele.syntax import Rule
|
|
2
|
+
from .builtin_facts import example_assertion_1, example_formula_1
|
|
3
|
+
|
|
4
|
+
# Example Rules
|
|
5
|
+
example_rule_1 = Rule(
|
|
6
|
+
head=example_assertion_1,
|
|
7
|
+
body=example_assertion_1,
|
|
8
|
+
priority=0.1,
|
|
9
|
+
)
|
|
10
|
+
example_rule_2 = Rule(
|
|
11
|
+
head=example_assertion_1,
|
|
12
|
+
body=example_formula_1,
|
|
13
|
+
priority=0.5,
|
|
14
|
+
)
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from kele.syntax.base_classes import Assertion
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from kele.config import KBConfig
|
|
10
|
+
from kele.syntax.base_classes import FACT_TYPE, Question
|
|
11
|
+
from collections.abc import Sequence
|
|
12
|
+
from kele.equality import Equivalence
|
|
13
|
+
from kele.syntax.external import SankuManagementSystem
|
|
14
|
+
from kele.control.grounding_selector import GroundingFlatTermWithWildCardSelector
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# FIXME: 存入事实库和规则库的信息应当满足safe的条件,我们将在 #132 中对CompoundTerm加入generic且引入safe的约束,以完成更精细的类型标注
|
|
18
|
+
class FactBase:
|
|
19
|
+
"""存储所有fact的总结构"""
|
|
20
|
+
|
|
21
|
+
def __init__(self,
|
|
22
|
+
initial_facts_or_dir_or_path: Sequence[FACT_TYPE] | str,
|
|
23
|
+
equivalence_handler: Equivalence,
|
|
24
|
+
term_selector: GroundingFlatTermWithWildCardSelector,
|
|
25
|
+
sk_system_handler: SankuManagementSystem,
|
|
26
|
+
args: KBConfig,
|
|
27
|
+
):
|
|
28
|
+
if isinstance(initial_facts_or_dir_or_path, str):
|
|
29
|
+
self.facts: set[FACT_TYPE] = self._read_facts(initial_facts_or_dir_or_path)
|
|
30
|
+
else:
|
|
31
|
+
self.facts = set(initial_facts_or_dir_or_path)
|
|
32
|
+
|
|
33
|
+
for fact in self.facts:
|
|
34
|
+
self._validate_fact_for_storage(fact, check_free_variables=True)
|
|
35
|
+
|
|
36
|
+
self.equivalence_handler = equivalence_handler
|
|
37
|
+
self.term_selector = term_selector
|
|
38
|
+
self.sk_system_handler = sk_system_handler # HACK: 三库系统在解题前,会执行一次initial_by_question更新事实库。考虑到
|
|
39
|
+
# 事实库的初始化是一个独立环节,此函数的调用被放置到FactBase内,而非最外层的InferenceEngine内。虽然从模块拆分的角度上FactBase和
|
|
40
|
+
# 三库系统是两个系统,可能会建议在main函数内完成二者的交互。
|
|
41
|
+
self._args = args
|
|
42
|
+
self.max_facts = self._args.fact_cache_size
|
|
43
|
+
|
|
44
|
+
self.initial_sign = False # 引擎希望先挑选FactBase中的部分事实进行初始化。因此在__init__阶段我们认为初始化未完成,为False。
|
|
45
|
+
# 当初始化完成后、当前值为True时才应当进行后续其他操作
|
|
46
|
+
self.cur_facts: set[FACT_TYPE] = set()
|
|
47
|
+
|
|
48
|
+
def _read_facts(self, path: str) -> set[FACT_TYPE]:
|
|
49
|
+
"""传入一个dir或文件,读取整个文件夹下面所有的文件或单个文件。TODO: 需要约定字符串书写格式(此刻还没有字符串的parser,先不实现)"""
|
|
50
|
+
raise NotImplementedError
|
|
51
|
+
|
|
52
|
+
def _add_or_not(self, fact: FACT_TYPE) -> bool:
|
|
53
|
+
"""判断某条事实是否应当被加入事实库。"""
|
|
54
|
+
|
|
55
|
+
# 1. 判断事实是否已存在
|
|
56
|
+
if fact in self.facts:
|
|
57
|
+
return False # 已存在的事实不再加入
|
|
58
|
+
|
|
59
|
+
# 2. 判断是否超过事实库大小上限
|
|
60
|
+
if self.max_facts != -1 and len(self.facts) >= self.max_facts: # noqa: SIM103 # 函数还可以继续扩充,
|
|
61
|
+
# 不应当直接简化为return (self.max_facts is not None...),因此注释掉SIM103
|
|
62
|
+
return False # TODO: 如果事实库已满,最好是根据启发式决定是否丢弃现有事实或丢弃库内事实但保留当前事实
|
|
63
|
+
|
|
64
|
+
# 3. 不合需求没必要保留(依赖启发式/NN判断)
|
|
65
|
+
|
|
66
|
+
return True
|
|
67
|
+
|
|
68
|
+
def add_facts(
|
|
69
|
+
self,
|
|
70
|
+
facts: Sequence[FACT_TYPE],
|
|
71
|
+
*,
|
|
72
|
+
force_add: bool = False,
|
|
73
|
+
check_free_variables: bool = False,
|
|
74
|
+
) -> list[FACT_TYPE]:
|
|
75
|
+
"""
|
|
76
|
+
通过add加入的fact也会同步更新等价类。TODO: 为了效率可以拆分add_facts和add_fact,转C的时候留意下即可
|
|
77
|
+
|
|
78
|
+
:param facts: 要加入的事实序列
|
|
79
|
+
:param force_add: 是否强制加入,默认为False
|
|
80
|
+
:param check_free_variables: 是否检查事实中包含自由变量,仅对用户输入事实启用
|
|
81
|
+
:return: 实际加入的事实序列
|
|
82
|
+
:raise ValueError: 当事实包含自由变量时抛出
|
|
83
|
+
"""
|
|
84
|
+
added_facts = []
|
|
85
|
+
for fact in facts:
|
|
86
|
+
self._validate_fact_for_storage(fact, check_free_variables=check_free_variables)
|
|
87
|
+
|
|
88
|
+
if force_add: # 冗余一点避免多次判断force_add
|
|
89
|
+
for fact in facts:
|
|
90
|
+
self.facts.add(fact)
|
|
91
|
+
added_facts.append(fact)
|
|
92
|
+
else:
|
|
93
|
+
for fact in facts:
|
|
94
|
+
if self._add_or_not(fact):
|
|
95
|
+
self.facts.add(fact)
|
|
96
|
+
added_facts.append(fact)
|
|
97
|
+
|
|
98
|
+
self.cur_facts |= set(added_facts)
|
|
99
|
+
self.equivalence_handler.update_equiv_class(added_facts)
|
|
100
|
+
if added_facts:
|
|
101
|
+
self.term_selector.update_terms(facts=added_facts)
|
|
102
|
+
|
|
103
|
+
return added_facts
|
|
104
|
+
|
|
105
|
+
@staticmethod
|
|
106
|
+
def _validate_fact_for_storage(fact: FACT_TYPE, *, check_free_variables: bool) -> None:
|
|
107
|
+
if not isinstance(fact, Assertion):
|
|
108
|
+
raise TypeError(f"Fact {fact} is not an Assertion, which is not allowed in the fact base.")
|
|
109
|
+
if check_free_variables and fact.free_variables:
|
|
110
|
+
raise ValueError(f"Fact {fact} contains free variables, which is not allowed.")
|
|
111
|
+
|
|
112
|
+
def _select_initial_facts(self, question: Question, topn: int | None = None) -> list[FACT_TYPE]:
|
|
113
|
+
"""根据问题选择最有可能有用的topn条事实,当没有num时将不对最终结果的数量做限制"""
|
|
114
|
+
all_facts = list(self.facts)
|
|
115
|
+
|
|
116
|
+
if topn is None or topn == -1 or topn >= len(all_facts):
|
|
117
|
+
return all_facts
|
|
118
|
+
|
|
119
|
+
return all_facts[:topn] # TODO: 默认按插入顺序(或原始顺序)截取,是优化点
|
|
120
|
+
|
|
121
|
+
def initial_fact_base(self, question: Question, topn: int | None = None) -> None:
|
|
122
|
+
"""作为整个解题流程开始前的一次筛选,需要选择充足的事实以免无法成功。此外也包括向三库获取一部分事实、初始化等价类等"""
|
|
123
|
+
selected_facts = self._select_initial_facts(question, topn)
|
|
124
|
+
self.equivalence_handler.update_equiv_class(selected_facts)
|
|
125
|
+
self.cur_facts |= set(selected_facts)
|
|
126
|
+
|
|
127
|
+
sk_facts = self.sk_system_handler.initial_by_question(question, topn)
|
|
128
|
+
self.add_facts(sk_facts)
|
|
129
|
+
|
|
130
|
+
self.cur_facts |= set(sk_facts)
|
|
131
|
+
self.initial_sign = True
|
|
132
|
+
|
|
133
|
+
def reset_fact_base(self) -> None:
|
|
134
|
+
"""将事实库置回初始状态,sign为False,尚不确定等价类和sanku信息是否有必要移除"""
|
|
135
|
+
self.initial_sign = False
|
|
136
|
+
self.cur_facts.clear()
|
|
137
|
+
self.equivalence_handler.clear() # 理论上如果这个函数仅有main调用,这一行是不应当有的。但我担心由于函数是外部的,如果被
|
|
138
|
+
# 凑巧谁的代码调用过时,一步clear的冗余可以减少风险
|
|
139
|
+
|
|
140
|
+
def get_facts(self) -> list[FACT_TYPE]:
|
|
141
|
+
"""待定,取出正在使用的所有facts,可能用于一些日志追踪等,尤其是求解完毕后打印所有的facts"""
|
|
142
|
+
if self.initial_sign:
|
|
143
|
+
return list(self.cur_facts)
|
|
144
|
+
|
|
145
|
+
warnings.warn("Fact base has not been initialized yet.", stacklevel=2)
|
|
146
|
+
return list(self.facts)
|
|
147
|
+
|
|
148
|
+
def __str__(self) -> str:
|
|
149
|
+
fact_count = len(self.facts)
|
|
150
|
+
show_topn = 5
|
|
151
|
+
|
|
152
|
+
fact_summary = ', '.join(str(fact) for fact in list(self.facts)[:show_topn]) # 只展示前五条事实
|
|
153
|
+
|
|
154
|
+
# 如果事实数目大于5,提示用户后续还有更多
|
|
155
|
+
if fact_count > show_topn:
|
|
156
|
+
fact_summary += "..."
|
|
157
|
+
|
|
158
|
+
return f"FactBase with {fact_count} facts. First 5 facts: [{fact_summary}]"
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""本体库的导入文件"""
|
|
2
|
+
|
|
3
|
+
import importlib.util
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from types import ModuleType
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _py_auto_load(filename: Path) -> ModuleType:
|
|
9
|
+
"""
|
|
10
|
+
从给定的文件路径动态加载一个 Python 模块。
|
|
11
|
+
|
|
12
|
+
:param filename: Python 文件路径。
|
|
13
|
+
:type filename: Path
|
|
14
|
+
:return: 加载的模块对象。
|
|
15
|
+
:rtype: types.ModuleType
|
|
16
|
+
:raises ImportError: 如果无法加载模块。
|
|
17
|
+
""" # noqa: DOC501
|
|
18
|
+
module_name = filename.stem # 获取不带扩展名的文件名作为模块名
|
|
19
|
+
spec = importlib.util.spec_from_file_location(module_name, str(filename))
|
|
20
|
+
if spec and spec.loader:
|
|
21
|
+
module = importlib.util.module_from_spec(spec)
|
|
22
|
+
spec.loader.exec_module(module)
|
|
23
|
+
return module
|
|
24
|
+
|
|
25
|
+
raise ImportError(f"Unable to import module from {filename}")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def load_ontologies(
|
|
29
|
+
concept_dir_or_path: str | Path,
|
|
30
|
+
operator_dir_or_path: str | Path,
|
|
31
|
+
) -> None:
|
|
32
|
+
"""
|
|
33
|
+
从概念目录和算子目录(或文件)加载所有 Python 本体模块。
|
|
34
|
+
|
|
35
|
+
:param: concept_dir_or_path: 概念模块的目录或文件路径或yaml文件。
|
|
36
|
+
:param: operator_dir_or_path: 算子模块的目录或文件路径或yaml文件。
|
|
37
|
+
"""
|
|
38
|
+
def _collect_files(path: str | Path, suffix: str = '.py') -> list[Path]:
|
|
39
|
+
"""
|
|
40
|
+
收集指定路径下所有 Python 文件。
|
|
41
|
+
|
|
42
|
+
参数:
|
|
43
|
+
path (str | Path): 文件路径或目录路径。
|
|
44
|
+
|
|
45
|
+
返回:
|
|
46
|
+
List[Path]: 所有符合条件的 Python 文件路径。
|
|
47
|
+
"""
|
|
48
|
+
path = Path(path)
|
|
49
|
+
if path.is_dir():
|
|
50
|
+
# 返回目录下所有 .py 文件(不包括子目录)
|
|
51
|
+
return sorted(path.glob(f"*{suffix}"))
|
|
52
|
+
|
|
53
|
+
if path.is_file() and path.suffix == ".py":
|
|
54
|
+
return [path]
|
|
55
|
+
|
|
56
|
+
return []
|
|
57
|
+
|
|
58
|
+
# 收集概念和算子模块的 Python 文件
|
|
59
|
+
concept_py_files = _collect_files(concept_dir_or_path)
|
|
60
|
+
operator_py_files = _collect_files(operator_dir_or_path)
|
|
61
|
+
|
|
62
|
+
all_modules = []
|
|
63
|
+
for file in concept_py_files + operator_py_files:
|
|
64
|
+
module = _py_auto_load(file)
|
|
65
|
+
all_modules.append(module)
|
|
66
|
+
|
|
67
|
+
# TODO: 收集概念和算子模块的 yaml 文件
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from typing import overload
|
|
2
|
+
import warnings
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
|
|
5
|
+
from kele.config import KBConfig
|
|
6
|
+
from kele.syntax import Variable, Constant
|
|
7
|
+
from kele.syntax.base_classes import Rule, Question, Formula, Assertion, CompoundTerm, _QuestionRule
|
|
8
|
+
from kele.syntax.dnf_converter import RuleSafetyProcesser
|
|
9
|
+
from kele.knowledge_bases.builtin_base.builtin_facts import true_const
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RuleBase:
|
|
13
|
+
"""存储所有abstract rule的总结构"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, rules_or_dir_or_path: Sequence[Rule] | str, args: KBConfig):
|
|
16
|
+
if isinstance(rules_or_dir_or_path, str):
|
|
17
|
+
self.rules = list(dict.fromkeys(self._read_rules(rules_or_dir_or_path)))
|
|
18
|
+
# 使用list代替set,以保持输入的规则顺序、因为set后的随机顺序会引入
|
|
19
|
+
# 不必要的不确定性,随意的排序也对于推理无益
|
|
20
|
+
else:
|
|
21
|
+
self.rules = list(dict.fromkeys(rules_or_dir_or_path)) # 加入去重操作
|
|
22
|
+
|
|
23
|
+
self.rule_suffix: int = 1
|
|
24
|
+
|
|
25
|
+
new_rules: list[Rule] = []
|
|
26
|
+
for single_rule in self.rules:
|
|
27
|
+
new_rules.extend(self._split_and_rename_rule(single_rule))
|
|
28
|
+
self.rules = new_rules
|
|
29
|
+
|
|
30
|
+
self.initial_sign: bool = False # initial_rule_base后为True,和Fact base一样
|
|
31
|
+
self.cur_rules: list[Rule] = [] # FactBase由于经常需要更新而用了set,而abstract rule不变,为了便捷用了list
|
|
32
|
+
|
|
33
|
+
self.question_rule: list[_QuestionRule] = [] # 用于存储question转化成的rule,不干涉正常流程,不参与select等。
|
|
34
|
+
|
|
35
|
+
self._var_counter = 0
|
|
36
|
+
|
|
37
|
+
self._args = args
|
|
38
|
+
|
|
39
|
+
def _read_rules(self, path: str) -> list[Rule]:
|
|
40
|
+
"""传入一个dir或文件,读取整个文件夹下面所有的文件或单个文件,文件格式需要py或yaml。TODO: 到时候需要约定字符串书写格式"""
|
|
41
|
+
raise NotImplementedError
|
|
42
|
+
|
|
43
|
+
def add_rule(self, rule: Rule) -> None:
|
|
44
|
+
"""可能用不到的函数,添加rule"""
|
|
45
|
+
warnings.warn(
|
|
46
|
+
"Not supported yet. When runtime rule additions are enabled, SelectionStrategy must add add_rule and "
|
|
47
|
+
"adjust RuleCheckRule behavior as needed.",
|
|
48
|
+
stacklevel=2,
|
|
49
|
+
)
|
|
50
|
+
if rule not in self.rules:
|
|
51
|
+
splited_rules = self._split_and_rename_rule(rule)
|
|
52
|
+
splited_rules = [self._preprocess_rule(rule) for rule in splited_rules]
|
|
53
|
+
|
|
54
|
+
self.rules.extend(splited_rules)
|
|
55
|
+
|
|
56
|
+
def get_rules(self) -> list[Rule]:
|
|
57
|
+
"""取出正在使用的所有rule,可能用于一些日志追踪等"""
|
|
58
|
+
if self.initial_sign:
|
|
59
|
+
return list(self.cur_rules)
|
|
60
|
+
|
|
61
|
+
warnings.warn("Rule base has not been initialized yet.", stacklevel=2)
|
|
62
|
+
return [r for r in self.rules if not isinstance(r, _QuestionRule)]
|
|
63
|
+
|
|
64
|
+
def get_question_rules(self) -> list[_QuestionRule]:
|
|
65
|
+
"""取出所有由question转化来的规则"""
|
|
66
|
+
return self.question_rule
|
|
67
|
+
|
|
68
|
+
def _select_rules_from_current_question(self, question: Question, topn: int | None = None) -> list[Rule]:
|
|
69
|
+
"""
|
|
70
|
+
根据当前问题的前提或目标问题选择一组规则。
|
|
71
|
+
可选地,通过 'topn' 参数限制选择的规则数量。
|
|
72
|
+
"""
|
|
73
|
+
# 由于选前k个而需要list或tuple一下 TODO: 另外:暂时将所有规则视为可能相关的规则,以后可以添加过滤逻辑或排序什么的。
|
|
74
|
+
all_rules = list(self.rules)
|
|
75
|
+
|
|
76
|
+
# 如果没有指定限制则返回所有规则。
|
|
77
|
+
if topn is None or topn == -1 or topn >= len(all_rules):
|
|
78
|
+
return all_rules
|
|
79
|
+
|
|
80
|
+
return all_rules[:topn]
|
|
81
|
+
|
|
82
|
+
def initial_rule_base(self, question: Question, topn: int | None = None) -> None:
|
|
83
|
+
"""作为整个解题流程开始前的一次筛选,需要选择充足的规则以免无法成功。可能Rule中本身带有和领域相关的标签"""
|
|
84
|
+
if question.question:
|
|
85
|
+
question_r = _QuestionRule.from_parts(head=Assertion.from_parts(_QuestionRule.QUESTION_SOLVED_FLAG, true_const),
|
|
86
|
+
body=question.question, name=_QuestionRule.QUESTIONRULE_NAME)
|
|
87
|
+
splited = self._split_and_rename_rule(question_r)
|
|
88
|
+
self.question_rule.extend(splited)
|
|
89
|
+
|
|
90
|
+
selected_rules = self._select_rules_from_current_question(question, topn) + self.question_rule
|
|
91
|
+
self.cur_rules = [self._preprocess_rule(r) for r in selected_rules]
|
|
92
|
+
self.initial_sign = True
|
|
93
|
+
|
|
94
|
+
def reset_rule_base(self) -> None:
|
|
95
|
+
"""
|
|
96
|
+
将RuleBase重置为其预初始化状态。清除所有活动规则并将initial_sign设置为False
|
|
97
|
+
"""
|
|
98
|
+
self.cur_rules.clear()
|
|
99
|
+
self.initial_sign = False
|
|
100
|
+
|
|
101
|
+
def _preprocess_rule(self, rule: Rule) -> Rule:
|
|
102
|
+
return self._rename_rule_vars(rule)
|
|
103
|
+
|
|
104
|
+
def _rename_rule_vars(self, rule: Rule) -> Rule:
|
|
105
|
+
"""调用_random_variable_name对规则中的变量重命名"""
|
|
106
|
+
var_map: dict[str, Variable] = {} # 仅作为临时工作区
|
|
107
|
+
new_head = self._generate_renamed_item(rule.head, var_map)
|
|
108
|
+
new_body = self._generate_renamed_item(rule.body, var_map)
|
|
109
|
+
|
|
110
|
+
return rule.replace(head=new_head, body=new_body)
|
|
111
|
+
|
|
112
|
+
def _split_and_rename_rule[T1: Rule](self, rule: T1) -> Sequence[T1]:
|
|
113
|
+
"""给没有命名的规则一个代称,并处理 DNF 拆分"""
|
|
114
|
+
origin_name = rule.name or f"rule_{self.rule_suffix}"
|
|
115
|
+
self.rule_suffix += 1
|
|
116
|
+
|
|
117
|
+
rule_spliter = RuleSafetyProcesser()
|
|
118
|
+
splited_rules = rule_spliter.split_rule_and_process_safety(rule)
|
|
119
|
+
|
|
120
|
+
if len(splited_rules) == 1:
|
|
121
|
+
return [splited_rules[0].replace(name=origin_name)]
|
|
122
|
+
|
|
123
|
+
result = []
|
|
124
|
+
for idx, single_rule in enumerate(splited_rules):
|
|
125
|
+
new_name = f"{origin_name}_{idx + 1}"
|
|
126
|
+
result.append(single_rule.replace(name=new_name))
|
|
127
|
+
|
|
128
|
+
return result
|
|
129
|
+
|
|
130
|
+
@overload
|
|
131
|
+
def _generate_renamed_item(self, item: Variable, var_map: dict[str, Variable]) -> Variable: ...
|
|
132
|
+
@overload
|
|
133
|
+
def _generate_renamed_item(self, item: Constant, var_map: dict[str, Variable]) -> Constant: ...
|
|
134
|
+
@overload
|
|
135
|
+
def _generate_renamed_item(self, item: CompoundTerm, var_map: dict[str, Variable]) -> CompoundTerm: ...
|
|
136
|
+
@overload
|
|
137
|
+
def _generate_renamed_item(self, item: Assertion, var_map: dict[str, Variable]) -> Assertion: ...
|
|
138
|
+
@overload
|
|
139
|
+
def _generate_renamed_item(self, item: Formula, var_map: dict[str, Variable]) -> Formula: ...
|
|
140
|
+
@overload
|
|
141
|
+
def _generate_renamed_item(self, item: None, var_map: dict[str, Variable]) -> None: ...
|
|
142
|
+
|
|
143
|
+
def _generate_renamed_item(self,
|
|
144
|
+
item: Formula | Assertion | CompoundTerm | Variable | Constant | None,
|
|
145
|
+
var_map: dict[str, Variable]) -> (
|
|
146
|
+
Formula | Assertion | CompoundTerm | Variable | Constant | None):
|
|
147
|
+
if isinstance(item, Variable):
|
|
148
|
+
if item.symbol not in var_map:
|
|
149
|
+
new_name = f"_v{self._var_counter}"
|
|
150
|
+
self._var_counter += 1
|
|
151
|
+
var_map[item.symbol] = item.create_renamed_variable(new_name)
|
|
152
|
+
# risk: 两条规则里如果都用到了x,然后使用者使用了同一个x的instance,
|
|
153
|
+
# 我们是否有理由认为它俩是一个x?(我认为不合理,所以我们应当不管instance,而只是取它的value本身。但这样会不会给使用者带来困惑?
|
|
154
|
+
# 他可能刻意用了同一个instance,甚至在里面存了一些自定义的信息。但我们可能是直接丢弃然后换个了新名字新地址)
|
|
155
|
+
return var_map[item.symbol]
|
|
156
|
+
|
|
157
|
+
if isinstance(item, Constant):
|
|
158
|
+
return item
|
|
159
|
+
|
|
160
|
+
if isinstance(item, CompoundTerm):
|
|
161
|
+
return CompoundTerm.from_parts(
|
|
162
|
+
item.operator,
|
|
163
|
+
tuple(self._generate_renamed_item(arg, var_map) for arg in item.arguments),
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
if isinstance(item, Assertion):
|
|
167
|
+
return Assertion.from_parts(
|
|
168
|
+
self._generate_renamed_item(item.lhs, var_map),
|
|
169
|
+
self._generate_renamed_item(item.rhs, var_map),
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
if isinstance(item, Formula):
|
|
173
|
+
return Formula(
|
|
174
|
+
self._generate_renamed_item(item.formula_left, var_map),
|
|
175
|
+
item.connective,
|
|
176
|
+
self._generate_renamed_item(item.formula_right, var_map),
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
if item is None:
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
raise ValueError(f"Unknown item type: {type(item)}")
|
|
183
|
+
|
|
184
|
+
def __str__(self) -> str:
|
|
185
|
+
rule_count = len(self.rules)
|
|
186
|
+
|
|
187
|
+
show_topn = 5
|
|
188
|
+
|
|
189
|
+
# 展示前5条规则
|
|
190
|
+
preview_rules = ', '.join(str(rule) for rule in list(self.rules)[:5])
|
|
191
|
+
if rule_count > show_topn:
|
|
192
|
+
preview_rules += "..."
|
|
193
|
+
|
|
194
|
+
return f"RuleBase with {rule_count} rules. First 5 rules: [{preview_rules}]"
|