kele 0.0.1a1__cp313-cp313-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. kele/__init__.py +38 -0
  2. kele/_version.py +1 -0
  3. kele/config.py +243 -0
  4. kele/control/README_metrics.md +102 -0
  5. kele/control/__init__.py +20 -0
  6. kele/control/callback.py +255 -0
  7. kele/control/grounding_selector/__init__.py +5 -0
  8. kele/control/grounding_selector/_rule_strategies/README.md +13 -0
  9. kele/control/grounding_selector/_rule_strategies/__init__.py +24 -0
  10. kele/control/grounding_selector/_rule_strategies/_sequential_strategy.py +42 -0
  11. kele/control/grounding_selector/_rule_strategies/strategy_protocol.py +51 -0
  12. kele/control/grounding_selector/_selector_utils.py +123 -0
  13. kele/control/grounding_selector/_term_strategies/__init__.py +24 -0
  14. kele/control/grounding_selector/_term_strategies/_exhausted_strategy.py +34 -0
  15. kele/control/grounding_selector/_term_strategies/strategy_protocol.py +50 -0
  16. kele/control/grounding_selector/rule_selector.py +98 -0
  17. kele/control/grounding_selector/term_selector.py +89 -0
  18. kele/control/infer_path.py +306 -0
  19. kele/control/metrics.py +357 -0
  20. kele/control/status.py +286 -0
  21. kele/egg_equiv.pyd +0 -0
  22. kele/egg_equiv.pyi +11 -0
  23. kele/equality/README.md +8 -0
  24. kele/equality/__init__.py +4 -0
  25. kele/equality/_egg_equiv/src/lib.rs +267 -0
  26. kele/equality/_equiv_elem.py +67 -0
  27. kele/equality/_utils.py +36 -0
  28. kele/equality/equivalence.py +141 -0
  29. kele/executer/__init__.py +4 -0
  30. kele/executer/executing.py +139 -0
  31. kele/grounder/README.md +83 -0
  32. kele/grounder/__init__.py +17 -0
  33. kele/grounder/grounded_rule_ds/__init__.py +6 -0
  34. kele/grounder/grounded_rule_ds/_nodes/__init__.py +24 -0
  35. kele/grounder/grounded_rule_ds/_nodes/_assertion.py +353 -0
  36. kele/grounder/grounded_rule_ds/_nodes/_conn.py +116 -0
  37. kele/grounder/grounded_rule_ds/_nodes/_op.py +57 -0
  38. kele/grounder/grounded_rule_ds/_nodes/_root.py +71 -0
  39. kele/grounder/grounded_rule_ds/_nodes/_rule.py +119 -0
  40. kele/grounder/grounded_rule_ds/_nodes/_term.py +390 -0
  41. kele/grounder/grounded_rule_ds/_nodes/_tftable.py +15 -0
  42. kele/grounder/grounded_rule_ds/_nodes/_tupletable.py +444 -0
  43. kele/grounder/grounded_rule_ds/_nodes/_typing_polars.py +26 -0
  44. kele/grounder/grounded_rule_ds/grounded_class.py +461 -0
  45. kele/grounder/grounded_rule_ds/grounded_ds_utils.py +91 -0
  46. kele/grounder/grounded_rule_ds/rule_check.py +373 -0
  47. kele/grounder/grounding.py +118 -0
  48. kele/knowledge_bases/README.md +112 -0
  49. kele/knowledge_bases/__init__.py +6 -0
  50. kele/knowledge_bases/builtin_base/__init__.py +1 -0
  51. kele/knowledge_bases/builtin_base/builtin_concepts.py +13 -0
  52. kele/knowledge_bases/builtin_base/builtin_facts.py +43 -0
  53. kele/knowledge_bases/builtin_base/builtin_operators.py +105 -0
  54. kele/knowledge_bases/builtin_base/builtin_rules.py +14 -0
  55. kele/knowledge_bases/fact_base.py +158 -0
  56. kele/knowledge_bases/ontology_base.py +67 -0
  57. kele/knowledge_bases/rule_base.py +194 -0
  58. kele/main.py +464 -0
  59. kele/py.typed +0 -0
  60. kele/syntax/CONCEPT_README.md +117 -0
  61. kele/syntax/__init__.py +40 -0
  62. kele/syntax/_cnf_converter.py +161 -0
  63. kele/syntax/_sat_solver.py +116 -0
  64. kele/syntax/base_classes.py +1482 -0
  65. kele/syntax/connectives.py +20 -0
  66. kele/syntax/dnf_converter.py +145 -0
  67. kele/syntax/external.py +17 -0
  68. kele/syntax/sub_concept.py +87 -0
  69. kele/syntax/syntacticsugar.py +201 -0
  70. kele-0.0.1a1.dist-info/METADATA +166 -0
  71. kele-0.0.1a1.dist-info/RECORD +74 -0
  72. kele-0.0.1a1.dist-info/WHEEL +4 -0
  73. kele-0.0.1a1.dist-info/licenses/LICENSE +28 -0
  74. kele-0.0.1a1.dist-info/licenses/licensecheck.json +20 -0
@@ -0,0 +1,373 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import defaultdict, deque
4
+ from graphlib import TopologicalSorter
5
+ from typing import TYPE_CHECKING, cast, Any
6
+ from graphviz import Digraph
7
+
8
+ from ._nodes import _OperatorNode, _AssertionNode, _ConnectiveNode, _BuildTerm, _RuleNode, _FlatCompoundTermNode, _RootNode, _QuestionRuleNode
9
+ from kele.syntax import _QuestionRule
10
+ from kele.syntax import Assertion, Formula, Variable, FlatCompoundTerm
11
+ import operator
12
+
13
+
14
+ if TYPE_CHECKING:
15
+ from kele.config import Config
16
+ from kele.syntax import FACT_TYPE, TERM_TYPE
17
+ from .grounded_class import GroundedRule
18
+
19
+
20
+ class RuleCheckGraph:
21
+ """将Assertion/Formula级别的规则处理为易于处理的图结构"""
22
+
23
+ def __init__(self, cur_rule: GroundedRule, args: Config) -> None:
24
+ self.cur_rule = cur_rule
25
+ self.args = args
26
+
27
+ self.antecedent = cur_rule.rule.body
28
+ self.abstract_rule = cur_rule.rule
29
+ self.execute_nodes: list[_AssertionNode] = [] # 记录所有的AssertionNode,这将是execute开始的地方
30
+ if isinstance(self.abstract_rule, _QuestionRule):
31
+ self.rule_node: _RuleNode = _QuestionRuleNode(self.abstract_rule, self.cur_rule, self.args)
32
+ else:
33
+ self.rule_node = _RuleNode(self.abstract_rule, self.cur_rule, self.args)
34
+ # 记录RuleNode的位置,这将是execute结束的地方
35
+
36
+ self.grounding_nodes: list[_AssertionNode] = [] # HACK: 记录所有参与grounding过程的AssertionNode。以后total table下沉到TermNode
37
+ # 后,这里的类型标注似乎应当转为list[TermNode]
38
+ # HACK: 在grounding_nodes保证action_nodes一定在末尾位置,方便mid_table的记录
39
+ self.action_nodes: list[_AssertionNode] = [] # HACK: 记录action_assertion,这些节点需要单独执行exec_action并记录结果
40
+
41
+ self.substitution_nodes: list[_AssertionNode] = [] # HACK: 记录所有不参与grounding过程、只执行substitution的AssertionNode。
42
+ # 以后total table下沉到TermNode后,这里的类型标注似乎应当转为list[TermNode]
43
+ # 在DNF的情况下,不应该存在某个Assertion既可以取True又可以取False
44
+
45
+ self.graph_root = self._convert_formula_into_graph(self.antecedent)
46
+
47
+ sat_result = self.cur_rule.rule.get_models
48
+ for assertion_node in self.execute_nodes:
49
+ if sat_result[assertion_node.content][0] and sat_result[assertion_node.content][1]:
50
+ assertion_node.keep_table = None
51
+ elif sat_result[assertion_node.content][0]:
52
+ assertion_node.keep_table = True
53
+ elif sat_result[assertion_node.content][1]:
54
+ assertion_node.keep_table = False
55
+
56
+ def _build_assertion_structure(self, cur_assertion: Assertion, root_node: _RootNode) -> _AssertionNode:
57
+ """
58
+ 构造 AssertionNode 及其对应的 term 子图结构。
59
+ """
60
+ # TODO: 这里的工程封装还有待商榷。比如可能这些Assertion不在图上出现或者不在query_children等处出现
61
+ # 被not影响也是AssertionNode不进入join流程的可能性之一
62
+ cur_rule = self.cur_rule
63
+
64
+ term_node_builder = _BuildTerm(cur_rule, root_node, self.args)
65
+
66
+ assertion_node = _AssertionNode(content=cur_assertion,
67
+ grounded_rule=cur_rule, negated_assertion=self._influenced_by_not(cur_assertion))
68
+
69
+ drop_left, drop_right = self._drop_assertion_term(assertion_node)
70
+
71
+ term_l = term_node_builder.build_term_structure(cur_term=cur_assertion.lhs, root_node=root_node, only_substitution=drop_left)
72
+ term_r = term_node_builder.build_term_structure(cur_term=cur_assertion.rhs, root_node=root_node, only_substitution=drop_right)
73
+
74
+ term_l.add_child(assertion_node)
75
+ term_r.add_child(assertion_node)
76
+
77
+ self.execute_nodes.append(assertion_node)
78
+ return assertion_node
79
+
80
+ def _drop_assertion_term(self, assertion_node: _AssertionNode) -> tuple[bool, bool]:
81
+ """
82
+ 判断 AssertionNode 左右 term 是否仅做 substitution。
83
+ """
84
+ assertion = assertion_node.content
85
+ term_l = assertion.lhs
86
+ term_r = assertion.rhs
87
+
88
+ if assertion_node.negated_assertion:
89
+ # 否定的Assertion,左右都要被丢弃
90
+ return True, True
91
+
92
+ # 默认情况:如果term_l或term_r有free_variable,那么就不被丢弃
93
+ drop_left = not bool(term_l.free_variables)
94
+ drop_right = not bool(term_r.free_variables)
95
+ if drop_left and drop_right:
96
+ # 如果左右都被丢弃,那么就没有必要继续判断了
97
+ return drop_left, drop_right
98
+
99
+ # action_term需要被丢弃
100
+ if assertion_node.content.is_action_assertion:
101
+ if term_l.is_action_term:
102
+ drop_left = True
103
+ if term_r.is_action_term:
104
+ drop_right = True
105
+ # 如果存在action_term,那么就不应该进入下面的优化流程,所以直接return
106
+ return drop_left, drop_right
107
+
108
+ # 优化:当前term是Variable,且被包含在另一侧的时候,可以被丢弃
109
+ if self._variable_included(term_l, term_r):
110
+ # 如果term_l是Variable,被包含于term_r的free_variables中,则丢弃term_l
111
+ drop_left = True
112
+ elif self._variable_included(term_r, term_l):
113
+ # 这里使用elif,保证了x=x这种互相包含的情况下不会把两边一起丢弃掉
114
+ # 这里无需再判断drop_left = False,因为能走到这一步意味着不是negated_assertion也不是action_assertion
115
+ # 而且term_l一定有free_variable,所以drop_left一定是False
116
+ drop_right = True
117
+
118
+ return drop_left, drop_right
119
+
120
+ def _build_formula_structure(self, cur_formula: Formula, root_node: _RootNode) -> _ConnectiveNode:
121
+ """
122
+ 拆解 Formula 并构造 ConnectiveNode。
123
+ """
124
+ r_formula = cur_formula.formula_right
125
+
126
+ connective_node = _ConnectiveNode(formula=cur_formula)
127
+ # 当前节点是否被not影响,取决于其父节点是否被not影响,以及当前节点的算子
128
+ # 传递的not_influenced如果是True,意味着它的某个字节是NOT,如果此时它自身还是NOT,最后的结果是它的父节点不被NOT影响
129
+ father_node = self._call_assertion_or_formula_builder(cur_formula.formula_left, root_node)
130
+ father_node.add_child(connective_node)
131
+
132
+ if r_formula is not None:
133
+ father_node = self._call_assertion_or_formula_builder(r_formula, root_node)
134
+ father_node.add_child(connective_node) # 这里就是添加ConnectiveNode的地方,左右两侧的最末Node都需要_ConnectiveNode作为子节点
135
+
136
+ return connective_node # 这里返回的是ConnectiveNode
137
+
138
+ def _call_assertion_or_formula_builder(self, input_formula: FACT_TYPE, root_node: _RootNode) ->\
139
+ _AssertionNode | _ConnectiveNode:
140
+ if isinstance(input_formula, Assertion):
141
+ return self._build_assertion_structure(input_formula, root_node)
142
+ return self._build_formula_structure(input_formula, root_node)
143
+
144
+ def _convert_formula_into_graph(self, formula: FACT_TYPE) -> _RootNode:
145
+ """
146
+ 将规则前提转换为图结构,具体说明见 grounder/README.md。
147
+ """
148
+ root_node = _RootNode()
149
+
150
+ if isinstance(formula, Assertion):
151
+ # 只有Assertion,此时的任何AssertionNode自然都不会被NOT影响
152
+ a_node = self._build_assertion_structure(formula, root_node)
153
+ a_node.add_child(self.rule_node)
154
+
155
+ elif isinstance(formula, Formula):
156
+ # 此时还没有拆分到基础单元,我们应当继续拆分,如何拆分呢?递归地拆分即可
157
+ # 我们除了应当建立两个TermNode和一个join节点,还应当建立一个ConnectiveNode,将两个TermNode和一个join节点连接到ConnectiveNode上。
158
+ # 最底层节点没有子connective节点,自然也不认为是被NOT影响
159
+ f_node = self._build_formula_structure(formula, root_node)
160
+ # 最底层的节点,连接到RuleNode
161
+ f_node.add_child(self.rule_node)
162
+
163
+ # 一些初始变量声明
164
+ none_action_grounding_nodes = []
165
+ action_grounding_nodes = []
166
+ substitution_nodes = []
167
+
168
+ for node in self.execute_nodes:
169
+ if node.only_substitution:
170
+ substitution_nodes.append(node)
171
+ elif node.action_assertion:
172
+ action_grounding_nodes.append(node)
173
+ else:
174
+ none_action_grounding_nodes.append(node)
175
+
176
+ self.grounding_nodes = self._grounding_nodes_merge_optimization(none_action_grounding_nodes) +\
177
+ self._grounding_nodes_merge_optimization(action_grounding_nodes)
178
+ self.substitution_nodes = self._sort_substitution_nodes(substitution_nodes)
179
+ self.action_nodes = action_grounding_nodes
180
+
181
+ return root_node
182
+
183
+ @staticmethod
184
+ def _grounding_nodes_merge_optimization(assertion_list: list[_AssertionNode]) -> list[_AssertionNode]:
185
+ """
186
+ 贪心地优化AssertionNode的合并顺序
187
+ 具体地,从列数最多的开始,每次选择与当前合并表重合度最高的表进行合并。
188
+ """
189
+ if not assertion_list:
190
+ return assertion_list
191
+
192
+ variable_count: defaultdict[Variable, int] = defaultdict(int)
193
+ for node in assertion_list:
194
+ for var in node.content.free_variables:
195
+ variable_count[var] += 1
196
+
197
+ result_merge_order: list[_AssertionNode] = [max(assertion_list, key=lambda node: (
198
+ len(variable_count) - sum((variable_count[var] == 1) for var in node.content.free_variables)
199
+ ))]
200
+
201
+ # 从与其他表重合度最高的开始
202
+ remaining_tables: list[_AssertionNode] = assertion_list.copy() # 移除第一个表
203
+ remaining_tables.remove(result_merge_order[0])
204
+ cur_table_columns: set[Variable] = set(result_merge_order[0].content.free_variables)
205
+
206
+ while remaining_tables:
207
+ cur_table = result_merge_order[-1]
208
+ cur_table_columns |= set(cur_table.content.free_variables)
209
+
210
+ # 使用max和key函数找到最佳匹配的AssertionNode
211
+ # 原则是:重合度高的优先,重合度相同的话,列数多的优先
212
+ best_node = max(remaining_tables,
213
+ key=lambda node: (
214
+ len(set(node.content.free_variables) & cur_table_columns),
215
+ len(node.content.free_variables) # 列数多的优先
216
+ ))
217
+ remaining_tables.remove(best_node)
218
+ result_merge_order.append(best_node)
219
+
220
+ return result_merge_order
221
+
222
+ def _influenced_by_not(self, assertion: Assertion) -> bool:
223
+ """
224
+ 判断某个 assertion 是否被 NOT 影响。
225
+
226
+ :param assertion_node: 待判断的断言节点
227
+ :type assertion_node: _AssertionNode
228
+ :return: 是否被NOT影响
229
+ :rtype: bool
230
+ """
231
+ sat_result = self.cur_rule.rule.get_models
232
+ # 如果当前Assertion取False,那么意味着它被NOT影响,并进行记录
233
+ # TODO: 目前来看这个记录和drop_true_table/drop_false_table有重合,可能可以简化
234
+ return sat_result[assertion][1]
235
+
236
+ @staticmethod
237
+ def _variable_included(left: TERM_TYPE, right: TERM_TYPE) -> bool:
238
+ """
239
+ 如果左侧是Variable并且被严格包含在右侧的Variables中,那么返回True,否则返回False
240
+ """
241
+ if isinstance(left, Variable):
242
+ return set(left.free_variables) <= set(right.free_variables)
243
+ return False
244
+
245
+ def _sort_substitution_nodes(self, substitution_nodes: list[_AssertionNode]) -> list[_AssertionNode]:
246
+ """按依赖关系排序 substitution nodes。"""
247
+ # 邻接表:arg -> [var],表示先处理 arg,再处理 var
248
+ self._substitution_graph: dict[Variable, list[Variable]] = defaultdict(list)
249
+
250
+ substitution_nodes_var: list[tuple[_AssertionNode, Variable]] = []
251
+
252
+ for node in substitution_nodes:
253
+ lhs = node.content.lhs
254
+ rhs = node.content.rhs
255
+
256
+ if TYPE_CHECKING:
257
+ lhs, rhs = cast("tuple[Variable, FlatCompoundTerm] | tuple[FlatCompoundTerm, Variable]", (lhs, rhs))
258
+
259
+ if isinstance(lhs, Variable): # 写成一行的时候mypy过不去
260
+ var = lhs
261
+ term = rhs
262
+ else:
263
+ if TYPE_CHECKING: # 一定有一边是Variable,是对action op写法的限制。 TODO:以后期望从类型上强制引入这个限制,现在只是
264
+ # 对原代码的最小改动
265
+ rhs = cast("Variable", rhs)
266
+
267
+ term = lhs
268
+ var = rhs
269
+
270
+ # 建边:_term.arguments 中的每个变量 -> _var
271
+ # 并确保 _var 也作为节点出现(即便没有出边)
272
+ self._substitution_graph.setdefault(var, [])
273
+ for arg in getattr(term, "arguments", ()):
274
+ self._substitution_graph[arg].append(var)
275
+
276
+ substitution_nodes_var.append((node, var))
277
+
278
+ # 计算拓扑序,并建立节点到其位置的映射
279
+ topo_order = list(TopologicalSorter(self._substitution_graph).static_order())
280
+ pos = {v: i for i, v in enumerate(topo_order)}
281
+
282
+ # 根据拓扑位置从小到大排序(越靠前越应先执行)
283
+ substitution_nodes_order = sorted(
284
+ ((node, pos.get(var, float("inf"))) for node, var in substitution_nodes_var),
285
+ key=operator.itemgetter(1),
286
+ reverse=True,
287
+ )
288
+
289
+ return [n for n, _ in substitution_nodes_order]
290
+
291
+ def reset(self) -> None:
292
+ """重置RuleCheckGraph的状态,用于新一轮(iteration)的推理。仅仅clear是权宜之计,影响效率"""
293
+ node_queue: deque[_RootNode | _OperatorNode | _AssertionNode |
294
+ _ConnectiveNode | _RuleNode | _FlatCompoundTermNode] = deque([self.graph_root])
295
+ while node_queue: # TODO: 这里似乎应该封个遍历用的函数
296
+ node = node_queue.popleft()
297
+ node.reset()
298
+ node_queue.extend([u for u in node.get_all_children() if u is not None])
299
+
300
+ def generate_graphviz(self, show_mode: str = "default", filename: str = 'rule_graph') -> None: # noqa: C901
301
+ """
302
+ 生成Graphviz格式的图结构可视化
303
+ :param show_mode: 显示模式,可选值为"default"、"free_var",如果选择free_var将会显示free_variables的信息
304
+ :param filename: 输出文件名(不含扩展名)
305
+ """
306
+
307
+ dot = Digraph(comment='Rule Check Graph')
308
+ visited: set[str] = set()
309
+
310
+ def _add_nodes(node: _RootNode | _OperatorNode | _AssertionNode | # noqa: C901
311
+ _ConnectiveNode | _RuleNode | _FlatCompoundTermNode | None) -> None:
312
+ if node is None:
313
+ return
314
+
315
+ node_id = str(id(node))
316
+ if node_id in visited:
317
+ return
318
+ visited.add(node_id)
319
+
320
+ # 根据节点类型设置样式
321
+ if isinstance(node, _RootNode):
322
+ dot.node(node_id, 'Root', shape='ellipse', color='green')
323
+ elif isinstance(node, _OperatorNode):
324
+ dot.node(node_id, f'Operator:\n{node.operator.name}', shape='box', color='blue')
325
+ elif isinstance(node, _AssertionNode):
326
+ dot.node(node_id, 'Assertion', shape='diamond', color='orange')
327
+ elif isinstance(node, _ConnectiveNode):
328
+ dot.node(node_id, f'Connective:\n{node.content.connective}', shape='hexagon', color='purple')
329
+ elif isinstance(node, _RuleNode):
330
+ dot.node(node_id, 'Rule Endpoint', shape='doubleoctagon', color='red')
331
+ elif isinstance(node, _FlatCompoundTermNode):
332
+ if show_mode == "free_var":
333
+ dot.node(node_id,
334
+ f'FlatCompoundTerm:\n{node.node_representative}\nfree_vars:\n{node.get_free_var_name()}',
335
+ shape='ellipse')
336
+ elif show_mode == "default":
337
+ dot.node(node_id, f'FlatCompoundTerm:\n {node.node_representative!s}', shape='ellipse')
338
+
339
+ # 递归处理子节点
340
+ for child in node.get_all_children():
341
+ child_id = str(id(child))
342
+ dot.edge(node_id, child_id)
343
+ _add_nodes(child)
344
+
345
+ # 从根节点开始遍历
346
+ _add_nodes(self.graph_root)
347
+ dot.render(filename, view=True)
348
+
349
+ def generate_graph_represent(self) -> dict[str, list[Any]]:
350
+ """
351
+ 用于生产代表图结构的一个字典,在pytest中使用
352
+
353
+ :return: 代表图结构的字典
354
+ :rtype: dict[str, tuple[list[str], list[dict[Any, Any]]]]
355
+ """
356
+ graph_represent = {}
357
+ node_queue: deque[_RootNode | _OperatorNode | _AssertionNode |
358
+ _ConnectiveNode | _RuleNode | _FlatCompoundTermNode] = deque([self.graph_root])
359
+ while node_queue:
360
+ node = node_queue.popleft()
361
+ key = str(node)
362
+ if isinstance(node, _RuleNode):
363
+ key = key[key.count(':'):].strip() # hack: Rule.__str__加入了不确定的name,故此移除。也许以后把str(node)用一个专为图
364
+ # 的str代替最好。但此时感觉引入了价值不大的复杂度
365
+
366
+ if key not in graph_represent:
367
+ if isinstance(node, (_FlatCompoundTermNode)):
368
+ str_freevar_table = node.freevar_table.table_represent
369
+ graph_represent[key] = [[str(u) for u in node.get_all_children()], str_freevar_table]
370
+ else:
371
+ graph_represent[key] = [[str(u) for u in node.get_all_children()], [{}]]
372
+ node_queue.extend([u for u in node.get_all_children() if u is not None])
373
+ return graph_represent
@@ -0,0 +1,118 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import time
5
+
6
+ from .grounded_rule_ds import GroundedRuleDS, GroundedProcess, GroundedRule
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ from kele.syntax import _QuestionRule
11
+
12
+ if TYPE_CHECKING:
13
+ from kele.syntax import Rule, Question, GROUNDED_TYPE_FOR_UNIFICATION
14
+ from kele.knowledge_bases import FactBase, RuleBase
15
+ from collections.abc import Sequence
16
+ from kele.control.grounding_selector import GroundingRuleSelector, GroundingFlatTermWithWildCardSelector
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class Grounder:
22
+ """Grounding过程最外层的数据结构,执行完整的grounding过程并与Executor对接"""
23
+ def __init__(self, # noqa: PLR0913
24
+ *,
25
+ fact_base: FactBase,
26
+ rule_base: RuleBase,
27
+ rule_selector: GroundingRuleSelector,
28
+ term_selector: GroundingFlatTermWithWildCardSelector,
29
+ grounded_structure: GroundedRuleDS,
30
+ rules_num_every_step: int = 5,
31
+ facts_num_for_each_rule: int = 10): # 这些参数是不是也进selector即可
32
+ """
33
+ :param fact_base: 事实库指针,供 selector 与 grounding 使用
34
+ :param rule_base: 规则库指针(后续将由 selector 直接接管)
35
+ :param rules_num_every_step: 每轮 grounding 可选中的规则数量上限
36
+ :param facts_num_for_each_rule: 每条规则在当前轮次可使用的事实数量上限
37
+ """
38
+ self.grounded_structure = grounded_structure
39
+
40
+ self.facts_base = fact_base
41
+ self.rule_base = rule_base # TODO: 两个 base 将逐步移除,被 rule/term selector 代替
42
+ self.m = rules_num_every_step
43
+ self.n = facts_num_for_each_rule # TODO: 由 selector 控制上限
44
+
45
+ self._rule_selector = rule_selector
46
+ self._term_selector = term_selector
47
+ self._last_selected_rules: Sequence[Rule] = []
48
+
49
+ def __select_abstract_rule(self, question: Question, m: int = 5) -> Sequence[Rule]:
50
+ """选择进入 grounding 的抽象规则集合。"""
51
+ abstract_rules: Sequence[Rule] = self._rule_selector.next_rules()
52
+ if logger.isEnabledFor(logging.DEBUG):
53
+ logger.debug("Selected %s abstract rules from facts_base with %s active facts.",
54
+ len(abstract_rules),
55
+ len(self.facts_base.get_facts()))
56
+ return abstract_rules
57
+
58
+ def __select_facts_for_abstract_rules(self, question: Question, abstract_rules: Sequence[Rule]) \
59
+ -> Sequence[tuple[Rule, list[GROUNDED_TYPE_FOR_UNIFICATION]]]:
60
+ """为抽象规则选择用于实例化的事实/term 列表。"""
61
+ rule_terms_pairs: list[tuple[Rule, list[GROUNDED_TYPE_FOR_UNIFICATION]]] = [(r, self._term_selector.next_terms(r)) for r in abstract_rules]
62
+ self._last_selected_rules = [rule for rule, _ in rule_terms_pairs]
63
+ # 变量名字我先不改,类型先写成GROUNDED_TYPE
64
+ if logger.isEnabledFor(logging.DEBUG):
65
+ logger.debug("Selected %s rule-fact pairs from rule base with %s active rules",
66
+ len(rule_terms_pairs),
67
+ len(self.rule_base.get_rules()))
68
+ return rule_terms_pairs
69
+
70
+ def _select_rule_terms_pair(self, question: Question) -> Sequence[tuple[Rule, list[GROUNDED_TYPE_FOR_UNIFICATION]]]:
71
+ """选择规则并为每条规则准备可见的事实集合。"""
72
+ return self.__select_facts_for_abstract_rules(question=question, abstract_rules=self.__select_abstract_rule(question))
73
+
74
+ def select_facts_rules_pair(self, question: Question) -> list[tuple[Rule, list[GROUNDED_TYPE_FOR_UNIFICATION]]]:
75
+ """先选 facts 后选 rules 的对称实现,当前未启用。"""
76
+ raise NotImplementedError
77
+
78
+ def _select_grounded_rule(self, grounded_rules: Sequence[GroundedRule]) -> Sequence[GroundedRule]:
79
+ """用于过滤无候选值的 grounded rule(未来可接入更细粒度筛选)。"""
80
+ self.max_grounded_series: int # 如果注释中的TODO完成,那以后会有一个参数用于控制筛选算法或数量
81
+ return list(grounded_rules)
82
+
83
+ def grounding_process(self, question: Question) -> Sequence[GroundedRule]:
84
+ """
85
+ 执行一次完整的 grounding 流程并返回 GroundedRule 集合。
86
+
87
+ :param question: 查询的问题及其前提
88
+ :returns: GroundedRule 列表
89
+ """
90
+ logger.info("Starting grounding process for question: %s", question.description)
91
+ start_time = time.time()
92
+
93
+ selected_rule_terms_pair = self._select_rule_terms_pair(question)
94
+ with GroundedProcess(grounded_structure=self.grounded_structure, cur_rules_terms=selected_rule_terms_pair) as grounded_structure:
95
+ grounded_structure.exec_grounding()
96
+
97
+ grounded_rules = self.grounded_structure.get_corresponding_grounded_rules(
98
+ abstract_rules=[r[0] for r in selected_rule_terms_pair])
99
+
100
+ elapsed = time.time() - start_time
101
+ logger.info("Grounding completed in %.2f seconds. Grounded rules: %s", elapsed, len(grounded_rules))
102
+
103
+ return self._select_grounded_rule(grounded_rules)
104
+
105
+ def reset(self) -> None:
106
+ """在面向新问题推理时重置 Grounder 状态。"""
107
+ self.grounded_structure.reset() # 现在的RuleCheckGraph是每条GroundedRule自己维护的,所以清空Pool就相当于删除了Graph
108
+ # 如果后期Graph合并时,这里应当进行额外的reset过程
109
+ self._rule_selector.reset() # 不会清空规则,只是重置到初始状态
110
+ # 1. 由更换问题导致的rule base整个变更,应当重新声明InferenceEngine类
111
+ # 2. 由rule base或其他地方选择推理所用规则导致的变更,由各处自行通过set rules进行变更
112
+ self._term_selector.reset() # 会清空term候选表,因为事实是从初始阶段逐步推理而得的
113
+
114
+ def selected_only_question_rules(self) -> bool:
115
+ """判断最近一次选择是否只有 question rules。"""
116
+ return bool(self._last_selected_rules) and all(
117
+ isinstance(rule, _QuestionRule) for rule in self._last_selected_rules
118
+ )
@@ -0,0 +1,112 @@
1
+ # 文字录入事实/规则库的设计文档
2
+
3
+ ## FactBase
4
+
5
+ ### 1. 概述
6
+
7
+ 本设计针对知识推理系统中“事实库”(FactBase)提供一种**基于 YAML** 的文本存储格式。每条事实(Fact)以字符串形式录入,通过parser解析,
8
+ 辅以元数据(Metadata)扩展。**Concepts** 与 **Operators** 在 Facts 部分声明前定义,确保所有使用到的概念与运算符均已定义、符合约束。
9
+
10
+ ---
11
+
12
+ ### 二、文档结构
13
+
14
+ ```text
15
+ ├─ Concepts: # 列表,每项为一个概念定义
16
+ │ ├─ id # 概念唯一标识,如 C001
17
+ │ ├─ name # 概念名称,如 Person # TODO: 后面可能会增加概念从属关系的录入
18
+ │ └─ description # 概念描述
19
+
20
+ ├─ Operators: # 列表,每项为一个运算符定义
21
+ │ ├─ id # 运算符 ID,如 OP001
22
+ │ ├─ symbol # 运算符符号或名称,如 born_in
23
+ │ ├─ input_type # 输入约束,指定 subject/object 所属的 concept ID,列表
24
+ │ ├─ output_type # 输出约束,指定 subject/object 所属的 concept ID,仅有一个
25
+ │ └─ description # 运算符描述
26
+
27
+ └─ Facts: # 列表,事实条目
28
+ ├─ FactID # 唯一 ID,如 F001
29
+ ├─ content # 事实内容,使用自然语言
30
+ ├─ Metadata # 可选,具体信息自由调整,如存放来源、时间、可信度等信息,可能不会被读入或需要自行在引擎上二次开发
31
+ └─ description # 事实描述
32
+ ```
33
+
34
+ ---
35
+ ### 三、字段详解
36
+
37
+ 1. **Concepts**
38
+
39
+ * **id**:建议`^C\d{3,}$` 格式,唯一且不可重复。
40
+ * **name**:人类可读名称。
41
+ * **description**:可选,对该概念的补充说明。
42
+
43
+ 2. **Operators**
44
+
45
+ * **id**:建议`^O\d{3,}$` 格式,唯一。
46
+ * **symbol**:运算符或关系名。
47
+ * **input_type**:可接受的概念 ID 列表或单值。
48
+ * **output_type**:可接受的概念 ID 列表或单值。
49
+ * **description**:可选。
50
+
51
+ 3. **Facts**
52
+ * **FactID**:格式 `^F\d{4,}$`。
53
+ * **content**:字符串,其中使用到的operator必须是已声明的 Operator。
54
+ * **Metadata**:可选。
55
+ * **description**:可选。
56
+
57
+ ---
58
+ ### 四、示例
59
+
60
+ ```yaml
61
+ # ───────────────────────────────────────────────────────────────────────────
62
+ # 顶层定义:先声明 Concepts(事实用到的实体/类型),再声明 Operators(可用的关系/运算符),最后才是具体的 Fact
63
+ # ───────────────────────────────────────────────────────────────────────────
64
+
65
+ # ───────────────────────────────────────────────────────────────────────────
66
+ # 下面是概念(concept)条目,概念必须提供概念名称
67
+ # ───────────────────────────────────────────────────────────────────────────
68
+ Concepts:
69
+ # 每个 concept 有唯一 ID、名称及可选说明
70
+ - id: C001
71
+ name: "Person"
72
+ description: "表示人类个体"
73
+ - id: C002
74
+ name: "Location"
75
+ description: "地理位置"
76
+ - id: C003
77
+ name: "Organization"
78
+ - id: C004
79
+ name: "Bool"
80
+ # ───────────────────────────────────────────────────────────────────────────
81
+ # 下面是算子(operator)条目,仅允许使用上面定义过的concept
82
+ # ───────────────────────────────────────────────────────────────────────────
83
+ Operators:
84
+ # 每个 operator 有唯一 ID、符号/名称、输入输出类型约束
85
+ - id: OP001
86
+ symbol: "born_in"
87
+ input_type:
88
+ - C001
89
+ output_type: C002
90
+ description: "某人出生于何地"
91
+ - id: OP002
92
+ symbol: "membership"
93
+ input_type:
94
+ - C001
95
+ - C003
96
+ output_type: C004
97
+ description: "某人是否为组织成员"
98
+ # ───────────────────────────────────────────────────────────────────────────
99
+ # 下面是事实条目,仅允许使用上面定义过的operator
100
+ # ───────────────────────────────────────────────────────────────────────────
101
+ Facts:
102
+ - FactID: F0001
103
+ content: "born_in (Albert_Einstein) = German Empire"
104
+ Metadata:
105
+ source: "wikipedia.org/Albert_Einstein"
106
+ created_at: "2025-07-10"
107
+ description: "爱因斯坦出生于德国"
108
+ ```
109
+
110
+
111
+ ### 五、具体的读取、解析、校验流程介绍
112
+ work in process
@@ -0,0 +1,6 @@
1
+ """断言逻辑和推理引擎所需要的知识库相关结构"""
2
+ from .fact_base import FactBase
3
+ from .rule_base import RuleBase
4
+ from .ontology_base import load_ontologies
5
+
6
+ __all__ = ["FactBase", "RuleBase", 'load_ontologies']
@@ -0,0 +1 @@
1
+ """暂时提供一些声明的示例,使用时用于下游用户增设自己领域特定的常用信息,如数学的Int,金融的Company等等"""
@@ -0,0 +1,13 @@
1
+ from kele.syntax import Concept
2
+
3
+ FREEVARANY_CONCEPT = Concept('FREEVARANY')
4
+ BOOL_CONCEPT: Concept = Concept("Bool") # 布尔
5
+ COMPLEX_NUMBER_CONCEPT = Concept("ComplexNumber")
6
+ EQUATION_CONCEPT = Concept("Equation", "仅用于表示算术方程")
7
+
8
+
9
+ # Example Concepts
10
+ example_concept_1: Concept = Concept("Person_Example")
11
+ example_concept_2: Concept = Concept("Color_Example")
12
+ example_concept_3: Concept = Concept("Location_Example")
13
+ example_concept_4: Concept = Concept("Object_Example")
@@ -0,0 +1,43 @@
1
+ from kele.syntax import Constant, Variable, CompoundTerm, FlatCompoundTerm, Assertion, Formula
2
+ from .builtin_concepts import BOOL_CONCEPT, example_concept_1, example_concept_2, example_concept_4
3
+ # 不同于builtin_operators.py中的注释,本文件模拟正常的builtin base的导入,由于引擎内部是直接导入py文件的,我们可以自然地使用相对导入。
4
+ # 同时两个文件也用于说明本体名可以用变量或字符串任一来调用
5
+ from .builtin_operators import example_operator_1, example_operator_2
6
+
7
+ true_const = Constant("TrueConst", BOOL_CONCEPT)
8
+ false_const = Constant('FalseConst', BOOL_CONCEPT)
9
+
10
+ # Example Constants
11
+ example_constant_1 = Constant("Alice", example_concept_1)
12
+ example_constant_2 = Constant("Bob", example_concept_1)
13
+ example_constant_3 = Constant("Red", example_concept_2)
14
+ example_constant_4 = Constant("desk", example_concept_4)
15
+
16
+
17
+ # Example Variables
18
+ example_variable_1 = Variable("x")
19
+ example_variable_2 = Variable("y")
20
+
21
+ # Example Terms
22
+ example_term_1 = CompoundTerm(
23
+ example_operator_1,
24
+ [example_constant_1, example_variable_1],
25
+ )
26
+ example_term_2 = CompoundTerm(
27
+ example_operator_2,
28
+ [example_constant_4],
29
+ )
30
+
31
+ # Example Flat CompoundTerm
32
+ example_flat_compound_term_1 = FlatCompoundTerm(
33
+ example_operator_1,
34
+ [example_constant_2, example_constant_1],
35
+ )
36
+
37
+ # Example Assertions
38
+ example_assertion_1 = Assertion(example_term_1, example_constant_1)
39
+ example_assertion_2 = Assertion(example_flat_compound_term_1, example_constant_1)
40
+
41
+ # Example Formulas
42
+ example_formula_1 = Formula(example_assertion_1, "AND", example_assertion_2)
43
+ example_formula_2 = Formula(example_assertion_2, "NOT", None)