adaptive-memory-engine 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adaptive_memory_engine-0.1.6.dist-info/METADATA +228 -0
- adaptive_memory_engine-0.1.6.dist-info/RECORD +72 -0
- adaptive_memory_engine-0.1.6.dist-info/WHEEL +4 -0
- adaptive_memory_engine-0.1.6.dist-info/entry_points.txt +3 -0
- adaptive_memory_engine-0.1.6.dist-info/licenses/LICENSE +21 -0
- ame/__init__.py +1 -0
- ame/agent/__init__.py +1 -0
- ame/agent/mcp.py +474 -0
- ame/agent/memory_api.py +141 -0
- ame/agent/results.py +30 -0
- ame/bronze/schema.py +17 -0
- ame/bronze/store.py +38 -0
- ame/cli/__init__.py +1 -0
- ame/cli/main.py +903 -0
- ame/connectors/base.py +30 -0
- ame/connectors/contract.py +199 -0
- ame/connectors/github.py +66 -0
- ame/connectors/google.py +464 -0
- ame/connectors/google_oauth.py +156 -0
- ame/connectors/jira.py +66 -0
- ame/connectors/json_helpers.py +43 -0
- ame/connectors/markdown.py +116 -0
- ame/connectors/notion.py +59 -0
- ame/connectors/oauth_callback.py +102 -0
- ame/connectors/oauth_provider.py +250 -0
- ame/connectors/obsidian.py +19 -0
- ame/connectors/router.py +155 -0
- ame/connectors/slack.py +66 -0
- ame/connectors/slack_oauth.py +417 -0
- ame/connectors/sync_history.py +73 -0
- ame/context_budget.py +106 -0
- ame/core/config.py +77 -0
- ame/core/corpus.py +17 -0
- ame/core/errors.py +18 -0
- ame/core/paths.py +111 -0
- ame/core/state.py +57 -0
- ame/export/obsidian.py +123 -0
- ame/gold/builder.py +300 -0
- ame/gold/ontology.py +80 -0
- ame/gold/resolver.py +91 -0
- ame/gold/schema.py +40 -0
- ame/gold/store.py +45 -0
- ame/hardware/profiler.py +85 -0
- ame/hardware/tier.py +27 -0
- ame/hermes/__init__.py +3 -0
- ame/hermes/memory.py +209 -0
- ame/models/download.py +243 -0
- ame/models/ollama.py +60 -0
- ame/models/registry.py +101 -0
- ame/models/router.py +22 -0
- ame/pipeline.py +155 -0
- ame/query/diff.py +40 -0
- ame/query/engine.py +919 -0
- ame/query/memory_os.py +313 -0
- ame/query/mql.py +84 -0
- ame/query/multihop.py +264 -0
- ame/query/result.py +20 -0
- ame/sdk.py +52 -0
- ame/security.py +145 -0
- ame/silver/extractor.py +414 -0
- ame/silver/llm_extractor.py +181 -0
- ame/silver/prompts.py +56 -0
- ame/silver/rationale.py +140 -0
- ame/silver/schema.py +51 -0
- ame/silver/store.py +59 -0
- ame/storage/custom_kg.py +33 -0
- ame/storage/lightrag_adapter.py +362 -0
- ame/validation/confidence.py +5 -0
- ame/validation/grounding.py +10 -0
- ame/validation/type_gate.py +22 -0
- ame/writeback.py +173 -0
- memory/__init__.py +3 -0
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from typing import Literal, Protocol
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field, ValidationError
|
|
7
|
+
|
|
8
|
+
from ame.bronze.schema import BronzeDocument
|
|
9
|
+
from ame.core.errors import LlmClientError
|
|
10
|
+
from ame.silver.extractor import DeterministicExtractor
|
|
11
|
+
from ame.silver.prompts import MEMORY_EXTRACTION_PROMPT
|
|
12
|
+
from ame.silver.schema import SilverDecision, SilverEntity, SilverRelation
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LlmClient(Protocol):
|
|
16
|
+
def complete_json(self, prompt: str, payload: dict) -> dict:
|
|
17
|
+
...
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class RawLlmEntity(BaseModel):
|
|
21
|
+
type: Literal["Person", "Project", "Tool", "Concept", "Decision", "Issue", "Action"]
|
|
22
|
+
name: str
|
|
23
|
+
span: str | None = None
|
|
24
|
+
confidence: float = Field(default=0.75, ge=0.0, le=1.0)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class RawLlmRelation(BaseModel):
|
|
28
|
+
subject: str
|
|
29
|
+
predicate: Literal["USES", "MADE_IN", "RELATED_TO", "SUPERSEDES", "MENTIONS"]
|
|
30
|
+
object: str
|
|
31
|
+
confidence: float = Field(default=0.7, ge=0.0, le=1.0)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class RawLlmDecision(BaseModel):
|
|
35
|
+
title: str
|
|
36
|
+
status: Literal["proposed", "accepted", "rejected", "superseded"] = "proposed"
|
|
37
|
+
project: str | None = None
|
|
38
|
+
rationale: str | None = None
|
|
39
|
+
decision_date: str | None = None
|
|
40
|
+
participants: list[str] = Field(default_factory=list)
|
|
41
|
+
confidence: float = Field(default=0.7, ge=0.0, le=1.0)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class RawLlmExtraction(BaseModel):
|
|
45
|
+
entities: list[RawLlmEntity] = Field(default_factory=list)
|
|
46
|
+
relations: list[RawLlmRelation] = Field(default_factory=list)
|
|
47
|
+
decisions: list[RawLlmDecision] = Field(default_factory=list)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class LlmExtractor:
|
|
51
|
+
"""LLM extraction with deterministic prefill for offline-stable metadata."""
|
|
52
|
+
|
|
53
|
+
def __init__(self, client: LlmClient | None = None, max_retries: int = 1):
|
|
54
|
+
self.client = client
|
|
55
|
+
self.max_retries = max_retries
|
|
56
|
+
self.fallback = DeterministicExtractor()
|
|
57
|
+
|
|
58
|
+
def extract(self, doc: BronzeDocument) -> tuple[list[SilverEntity], list[SilverRelation], list[SilverDecision]]:
|
|
59
|
+
if self.client is None:
|
|
60
|
+
return self.fallback.extract(doc)
|
|
61
|
+
deterministic_entities, deterministic_relations, deterministic_decisions = self.fallback.extract(doc)
|
|
62
|
+
payload = {
|
|
63
|
+
"source_id": doc.source_id,
|
|
64
|
+
"metadata": doc.metadata,
|
|
65
|
+
"content": doc.content,
|
|
66
|
+
}
|
|
67
|
+
raw = self._complete_extraction(payload)
|
|
68
|
+
entities = deterministic_entities + self._entities(doc, raw.entities)
|
|
69
|
+
relations = deterministic_relations + self._relations(doc, raw.relations)
|
|
70
|
+
decisions = deterministic_decisions + self._decisions(doc, raw.decisions)
|
|
71
|
+
return self._dedupe_entities(entities), self._dedupe_relations(relations), self._dedupe_decisions(decisions)
|
|
72
|
+
|
|
73
|
+
def _complete_extraction(self, payload: dict) -> RawLlmExtraction:
|
|
74
|
+
assert self.client is not None
|
|
75
|
+
retry_payload = dict(payload)
|
|
76
|
+
last_error: str | None = None
|
|
77
|
+
for attempt in range(self.max_retries + 1):
|
|
78
|
+
if last_error:
|
|
79
|
+
retry_payload["retry_feedback"] = (
|
|
80
|
+
"Previous response failed schema validation. Return only the requested JSON object. "
|
|
81
|
+
f"Validation error: {last_error[:500]}"
|
|
82
|
+
)
|
|
83
|
+
raw = self.client.complete_json(MEMORY_EXTRACTION_PROMPT, retry_payload)
|
|
84
|
+
try:
|
|
85
|
+
return RawLlmExtraction.model_validate(raw)
|
|
86
|
+
except ValidationError as exc:
|
|
87
|
+
last_error = str(exc)
|
|
88
|
+
if attempt >= self.max_retries:
|
|
89
|
+
raise LlmClientError(f"LLM extraction schema validation failed: {last_error}") from exc
|
|
90
|
+
raise LlmClientError("LLM extraction schema validation failed.")
|
|
91
|
+
|
|
92
|
+
def _entities(self, doc: BronzeDocument, rows: list[RawLlmEntity]) -> list[SilverEntity]:
|
|
93
|
+
entities: list[SilverEntity] = []
|
|
94
|
+
for row in rows:
|
|
95
|
+
name = row.name.strip()
|
|
96
|
+
entity_type = row.type
|
|
97
|
+
span = (row.span or name).strip()
|
|
98
|
+
entities.append(
|
|
99
|
+
SilverEntity(
|
|
100
|
+
id=self._id("entity", doc.id, entity_type, name),
|
|
101
|
+
corpus_id=doc.corpus_id,
|
|
102
|
+
type=entity_type,
|
|
103
|
+
name=name,
|
|
104
|
+
span=span,
|
|
105
|
+
source_ids=[doc.id],
|
|
106
|
+
confidence=row.confidence,
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
return entities
|
|
110
|
+
|
|
111
|
+
def _relations(self, doc: BronzeDocument, rows: list[RawLlmRelation]) -> list[SilverRelation]:
|
|
112
|
+
relations: list[SilverRelation] = []
|
|
113
|
+
for row in rows:
|
|
114
|
+
relations.append(
|
|
115
|
+
SilverRelation(
|
|
116
|
+
id=self._id("relation", doc.id, row.subject, row.predicate, row.object),
|
|
117
|
+
corpus_id=doc.corpus_id,
|
|
118
|
+
subject=row.subject.strip(),
|
|
119
|
+
predicate=row.predicate,
|
|
120
|
+
object=row.object.strip(),
|
|
121
|
+
source_ids=[doc.id],
|
|
122
|
+
confidence=row.confidence,
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
return relations
|
|
126
|
+
|
|
127
|
+
def _decisions(self, doc: BronzeDocument, rows: list[RawLlmDecision]) -> list[SilverDecision]:
|
|
128
|
+
decisions: list[SilverDecision] = []
|
|
129
|
+
for row in rows:
|
|
130
|
+
decisions.append(
|
|
131
|
+
SilverDecision(
|
|
132
|
+
id=self._id("decision", doc.id, row.title),
|
|
133
|
+
corpus_id=doc.corpus_id,
|
|
134
|
+
title=row.title.strip(),
|
|
135
|
+
status=row.status,
|
|
136
|
+
project=row.project,
|
|
137
|
+
rationale=row.rationale,
|
|
138
|
+
decision_date=row.decision_date or doc.metadata.get("frontmatter", {}).get("date"),
|
|
139
|
+
participants=row.participants,
|
|
140
|
+
source_ids=[doc.id],
|
|
141
|
+
confidence=row.confidence,
|
|
142
|
+
)
|
|
143
|
+
)
|
|
144
|
+
return decisions
|
|
145
|
+
|
|
146
|
+
def _dedupe_entities(self, entities: list[SilverEntity]) -> list[SilverEntity]:
|
|
147
|
+
seen: set[tuple[str, str]] = set()
|
|
148
|
+
deduped: list[SilverEntity] = []
|
|
149
|
+
for entity in entities:
|
|
150
|
+
key = (entity.type, entity.name.casefold())
|
|
151
|
+
if key in seen:
|
|
152
|
+
continue
|
|
153
|
+
seen.add(key)
|
|
154
|
+
deduped.append(entity)
|
|
155
|
+
return deduped
|
|
156
|
+
|
|
157
|
+
def _dedupe_relations(self, relations: list[SilverRelation]) -> list[SilverRelation]:
|
|
158
|
+
seen: set[tuple[str, str, str]] = set()
|
|
159
|
+
deduped: list[SilverRelation] = []
|
|
160
|
+
for relation in relations:
|
|
161
|
+
key = (relation.subject.casefold(), relation.predicate, relation.object.casefold())
|
|
162
|
+
if key in seen:
|
|
163
|
+
continue
|
|
164
|
+
seen.add(key)
|
|
165
|
+
deduped.append(relation)
|
|
166
|
+
return deduped
|
|
167
|
+
|
|
168
|
+
def _dedupe_decisions(self, decisions: list[SilverDecision]) -> list[SilverDecision]:
|
|
169
|
+
seen: set[str] = set()
|
|
170
|
+
deduped: list[SilverDecision] = []
|
|
171
|
+
for decision in decisions:
|
|
172
|
+
key = decision.title.casefold()
|
|
173
|
+
if key in seen:
|
|
174
|
+
continue
|
|
175
|
+
seen.add(key)
|
|
176
|
+
deduped.append(decision)
|
|
177
|
+
return deduped
|
|
178
|
+
|
|
179
|
+
def _id(self, prefix: str, *parts: str) -> str:
|
|
180
|
+
digest = hashlib.sha256("|".join(parts).encode("utf-8")).hexdigest()[:16]
|
|
181
|
+
return f"{prefix}_{digest}"
|
ame/silver/prompts.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
ENTITY_EXTRACTION_PROMPT = """너는 정보 추출기다.
|
|
2
|
+
|
|
3
|
+
허용 타입:
|
|
4
|
+
Person | Project | Tool | Concept | Decision | Issue | Action
|
|
5
|
+
|
|
6
|
+
규칙:
|
|
7
|
+
- 실제 원문에 등장한 것만 추출한다.
|
|
8
|
+
- 반드시 원문 span을 포함한다.
|
|
9
|
+
- JSON만 출력한다.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
RELATION_EXTRACTION_PROMPT = """너는 관계 추출기다.
|
|
13
|
+
|
|
14
|
+
허용 관계:
|
|
15
|
+
USES | MADE_IN | RELATED_TO | SUPERSEDES | MENTIONS
|
|
16
|
+
|
|
17
|
+
규칙:
|
|
18
|
+
- 제공된 엔티티 사이의 관계만 추출한다.
|
|
19
|
+
- 근거가 없으면 생성하지 않는다.
|
|
20
|
+
- JSON만 출력한다.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
MEMORY_EXTRACTION_PROMPT = """너는 Adaptive Memory Engine의 구조화 추출기다.
|
|
24
|
+
|
|
25
|
+
허용 엔티티 타입:
|
|
26
|
+
Person | Project | Tool | Concept | Decision | Issue | Action
|
|
27
|
+
|
|
28
|
+
허용 관계:
|
|
29
|
+
USES | MADE_IN | RELATED_TO | SUPERSEDES | MENTIONS
|
|
30
|
+
|
|
31
|
+
규칙:
|
|
32
|
+
- 실제 원문에 등장한 것만 추출한다.
|
|
33
|
+
- entity는 반드시 원문 span을 포함한다.
|
|
34
|
+
- relation은 추출된 entity 사이에서만 만든다.
|
|
35
|
+
- 근거가 약하면 confidence를 낮게 준다.
|
|
36
|
+
- JSON만 출력한다.
|
|
37
|
+
|
|
38
|
+
출력 형식:
|
|
39
|
+
{
|
|
40
|
+
"entities": [
|
|
41
|
+
{"type": "Project", "name": "OpenClaw", "span": "OpenClaw", "confidence": 0.9}
|
|
42
|
+
],
|
|
43
|
+
"relations": [
|
|
44
|
+
{"subject": "OpenClaw", "predicate": "USES", "object": "LightRAG", "confidence": 0.8}
|
|
45
|
+
],
|
|
46
|
+
"decisions": [
|
|
47
|
+
{
|
|
48
|
+
"title": "LightRAG 도입",
|
|
49
|
+
"status": "accepted",
|
|
50
|
+
"project": "OpenClaw",
|
|
51
|
+
"rationale": "로컬 검색 코어로 사용",
|
|
52
|
+
"confidence": 0.8
|
|
53
|
+
}
|
|
54
|
+
]
|
|
55
|
+
}
|
|
56
|
+
"""
|
ame/silver/rationale.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
from ame.bronze.schema import BronzeDocument
|
|
7
|
+
from ame.silver.schema import SilverDecision, SilverRationale
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class RationaleSpec:
|
|
12
|
+
decision_title: str
|
|
13
|
+
text: str
|
|
14
|
+
category: str
|
|
15
|
+
source_markers: tuple[str, ...]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
RATIONALE_SPECS = [
|
|
19
|
+
RationaleSpec("LightRAG 도입 결정", "16GB 환경 대응", "constraint", ("LightRAG", "16GB")),
|
|
20
|
+
RationaleSpec("LightRAG 도입 결정", "검증된 OSS 활용", "trust", ("LightRAG", "OSS")),
|
|
21
|
+
RationaleSpec("LightRAG 도입 결정", "GraphRAG 직접 구현 대비 구현 범위 축소", "tradeoff", ("GraphRAG", "구현 범위")),
|
|
22
|
+
RationaleSpec("LightRAG 도입 결정", "빠른 MVP 검증", "speed", ("MVP", "LightRAG")),
|
|
23
|
+
RationaleSpec("LightRAG 도입 결정", "저장·검색은 LightRAG에 맡기고 추출·검증 계층에 집중", "focus", ("저장", "검색", "검증")),
|
|
24
|
+
RationaleSpec("Local First", "개인 데이터 보호", "privacy", ("Local First", "개인 데이터")),
|
|
25
|
+
RationaleSpec("Local First", "회사 데이터 보호", "privacy", ("Local First", "회사 데이터")),
|
|
26
|
+
RationaleSpec("Local First", "프로젝트 히스토리 보호", "privacy", ("Local First", "프로젝트 히스토리")),
|
|
27
|
+
RationaleSpec("Local First", "민감 데이터가 클라우드에 기본 저장되지 않음", "privacy", ("Local First", "클라우드")),
|
|
28
|
+
RationaleSpec("Local First", "사용자 데이터 통제권 유지", "control", ("Local First", "로컬")),
|
|
29
|
+
RationaleSpec("Validation First", "LLM 환각 방지", "quality", ("Validation First", "LLM")),
|
|
30
|
+
RationaleSpec("Validation First", "원문 span 기반 검증", "grounding", ("Validation First", "span")),
|
|
31
|
+
RationaleSpec("Validation First", "Type Gate 기반 관계 검증", "type_gate", ("Validation Gate", "Type Gate")),
|
|
32
|
+
RationaleSpec("Validation First", "Confidence 기반 품질 관리", "confidence", ("Validation Gate", "confidence")),
|
|
33
|
+
RationaleSpec("Validation First", "검증된 사실만 Memory에 저장", "quality", ("Validation First", "검증")),
|
|
34
|
+
RationaleSpec("Obsidian Export 필요", "사용자는 Graph DB를 직접 보지 않음", "view_layer", ("Obsidian", "그래프DB")),
|
|
35
|
+
RationaleSpec("Obsidian Export 필요", "Markdown 노트가 사람이 이해하기 쉬움", "view_layer", ("Obsidian", "Markdown")),
|
|
36
|
+
RationaleSpec("Obsidian Export 필요", "Gold Memory를 사람이 검토 가능해야 함", "reviewability", ("Gold Memory", "Obsidian")),
|
|
37
|
+
RationaleSpec("Obsidian Export 필요", "Obsidian Vault로 지식 소비 가능", "view_layer", ("Obsidian", "Vault")),
|
|
38
|
+
RationaleSpec("Slack Connector MVP 제외", "MVP 범위 최소화", "scope", ("MVP", "Slack")),
|
|
39
|
+
RationaleSpec("Slack Connector MVP 제외", "Memory Pipeline 검증이 우선", "validation", ("MVP", "Bronze")),
|
|
40
|
+
RationaleSpec("Slack Connector MVP 제외", "Markdown/Obsidian 기반 수직 슬라이스 검증이 먼저", "validation", ("Markdown", "Obsidian")),
|
|
41
|
+
RationaleSpec("Slack Connector MVP 제외", "Connector Expansion은 이후 단계", "roadmap", ("Connector Expansion", "Slack")),
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class RationaleExtractor:
|
|
46
|
+
def extract(self, documents: list[BronzeDocument], decisions: list[SilverDecision]) -> list[SilverRationale]:
|
|
47
|
+
rationales: list[SilverRationale] = []
|
|
48
|
+
rationales.extend(self._from_decision_text(decisions, documents))
|
|
49
|
+
rationales.extend(self._from_specs(documents))
|
|
50
|
+
return self._dedupe(rationales)
|
|
51
|
+
|
|
52
|
+
def _from_decision_text(self, decisions: list[SilverDecision], documents: list[BronzeDocument]) -> list[SilverRationale]:
|
|
53
|
+
docs_by_id = {doc.id: doc for doc in documents}
|
|
54
|
+
rationales: list[SilverRationale] = []
|
|
55
|
+
for decision in decisions:
|
|
56
|
+
if not decision.rationale:
|
|
57
|
+
continue
|
|
58
|
+
matched_spec = [spec for spec in RATIONALE_SPECS if self._same_decision(spec.decision_title, decision.title)]
|
|
59
|
+
if matched_spec:
|
|
60
|
+
for spec in matched_spec:
|
|
61
|
+
rationales.append(self._rationale(decision.corpus_id, spec.decision_title, spec.text, spec.category, spec.text, decision.source_ids))
|
|
62
|
+
continue
|
|
63
|
+
span = decision.rationale if any(decision.rationale in docs_by_id[source_id].content for source_id in decision.source_ids if source_id in docs_by_id) else None
|
|
64
|
+
rationales.append(
|
|
65
|
+
self._rationale(
|
|
66
|
+
decision.corpus_id,
|
|
67
|
+
decision.title,
|
|
68
|
+
decision.rationale,
|
|
69
|
+
"rationale",
|
|
70
|
+
span,
|
|
71
|
+
decision.source_ids,
|
|
72
|
+
decision.confidence,
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
return rationales
|
|
76
|
+
|
|
77
|
+
def _from_specs(self, documents: list[BronzeDocument]) -> list[SilverRationale]:
|
|
78
|
+
rationales: list[SilverRationale] = []
|
|
79
|
+
for spec in RATIONALE_SPECS:
|
|
80
|
+
source_ids = [doc.id for doc in documents if self._supports(doc, spec)]
|
|
81
|
+
if not source_ids:
|
|
82
|
+
continue
|
|
83
|
+
span = spec.text if any(spec.text in doc.content for doc in documents if doc.id in source_ids) else None
|
|
84
|
+
rationales.append(self._rationale(documents[0].corpus_id, spec.decision_title, spec.text, spec.category, span, source_ids))
|
|
85
|
+
return rationales
|
|
86
|
+
|
|
87
|
+
def _supports(self, doc: BronzeDocument, spec: RationaleSpec) -> bool:
|
|
88
|
+
haystack = " ".join(
|
|
89
|
+
[
|
|
90
|
+
doc.content,
|
|
91
|
+
str(doc.metadata.get("title", "")),
|
|
92
|
+
str(doc.metadata.get("section_title", "")),
|
|
93
|
+
" ".join(str(part) for part in doc.metadata.get("section_path", [])),
|
|
94
|
+
]
|
|
95
|
+
).casefold()
|
|
96
|
+
return all(marker.casefold() in haystack for marker in spec.source_markers)
|
|
97
|
+
|
|
98
|
+
def _same_decision(self, expected: str, actual: str) -> bool:
|
|
99
|
+
expected_cf = expected.casefold()
|
|
100
|
+
actual_cf = actual.casefold()
|
|
101
|
+
return expected_cf in actual_cf or actual_cf in expected_cf or ("lightrag" in expected_cf and "lightrag" in actual_cf)
|
|
102
|
+
|
|
103
|
+
def _rationale(
|
|
104
|
+
self,
|
|
105
|
+
corpus_id: str,
|
|
106
|
+
decision_title: str,
|
|
107
|
+
text: str,
|
|
108
|
+
category: str,
|
|
109
|
+
span: str | None,
|
|
110
|
+
source_ids: list[str],
|
|
111
|
+
confidence: float = 0.92,
|
|
112
|
+
) -> SilverRationale:
|
|
113
|
+
return SilverRationale(
|
|
114
|
+
id=self._id("rationale", corpus_id, decision_title, text),
|
|
115
|
+
corpus_id=corpus_id,
|
|
116
|
+
decision_title=decision_title,
|
|
117
|
+
rationale_text=text,
|
|
118
|
+
category=category,
|
|
119
|
+
span=span,
|
|
120
|
+
source_ids=sorted(set(source_ids)),
|
|
121
|
+
confidence=confidence,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def _dedupe(self, rationales: list[SilverRationale]) -> list[SilverRationale]:
|
|
125
|
+
by_key: dict[tuple[str, str], SilverRationale] = {}
|
|
126
|
+
for rationale in rationales:
|
|
127
|
+
key = (rationale.decision_title.casefold(), rationale.rationale_text.casefold())
|
|
128
|
+
existing = by_key.get(key)
|
|
129
|
+
if existing:
|
|
130
|
+
existing.source_ids = sorted(set(existing.source_ids + rationale.source_ids))
|
|
131
|
+
existing.confidence = max(existing.confidence, rationale.confidence)
|
|
132
|
+
if existing.span is None:
|
|
133
|
+
existing.span = rationale.span
|
|
134
|
+
continue
|
|
135
|
+
by_key[key] = rationale
|
|
136
|
+
return list(by_key.values())
|
|
137
|
+
|
|
138
|
+
def _id(self, prefix: str, *parts: str) -> str:
|
|
139
|
+
digest = hashlib.sha256("|".join(parts).encode("utf-8")).hexdigest()[:16]
|
|
140
|
+
return f"{prefix}_{digest}"
|
ame/silver/schema.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Literal
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SilverEntity(BaseModel):
|
|
9
|
+
id: str
|
|
10
|
+
corpus_id: str
|
|
11
|
+
type: str
|
|
12
|
+
name: str
|
|
13
|
+
span: str | None = None
|
|
14
|
+
aliases: list[str] = Field(default_factory=list)
|
|
15
|
+
source_ids: list[str]
|
|
16
|
+
confidence: float = 1.0
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SilverRelation(BaseModel):
|
|
20
|
+
id: str
|
|
21
|
+
corpus_id: str
|
|
22
|
+
subject: str
|
|
23
|
+
predicate: str
|
|
24
|
+
object: str
|
|
25
|
+
source_ids: list[str]
|
|
26
|
+
confidence: float
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SilverDecision(BaseModel):
|
|
30
|
+
id: str
|
|
31
|
+
corpus_id: str
|
|
32
|
+
title: str
|
|
33
|
+
status: Literal["proposed", "accepted", "rejected", "superseded"]
|
|
34
|
+
project: str | None = None
|
|
35
|
+
rationale: str | None = None
|
|
36
|
+
decision_date: str | None = None
|
|
37
|
+
supersedes: list[str] = Field(default_factory=list)
|
|
38
|
+
participants: list[str] = Field(default_factory=list)
|
|
39
|
+
source_ids: list[str]
|
|
40
|
+
confidence: float
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class SilverRationale(BaseModel):
|
|
44
|
+
id: str
|
|
45
|
+
corpus_id: str
|
|
46
|
+
decision_title: str
|
|
47
|
+
rationale_text: str
|
|
48
|
+
category: str
|
|
49
|
+
span: str | None = None
|
|
50
|
+
source_ids: list[str]
|
|
51
|
+
confidence: float = 1.0
|
ame/silver/store.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import TypeVar
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
from ame.silver.schema import SilverDecision, SilverEntity, SilverRationale, SilverRelation
|
|
10
|
+
|
|
11
|
+
T = TypeVar("T", bound=BaseModel)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SilverStore:
|
|
15
|
+
def __init__(self, corpus_root: Path):
|
|
16
|
+
self.root = corpus_root / "silver"
|
|
17
|
+
self.root.mkdir(parents=True, exist_ok=True)
|
|
18
|
+
|
|
19
|
+
def replace(
|
|
20
|
+
self,
|
|
21
|
+
entities: list[SilverEntity],
|
|
22
|
+
relations: list[SilverRelation],
|
|
23
|
+
decisions: list[SilverDecision],
|
|
24
|
+
rejected: list[dict],
|
|
25
|
+
rationales: list[SilverRationale] | None = None,
|
|
26
|
+
) -> None:
|
|
27
|
+
self._write("entities.jsonl", entities)
|
|
28
|
+
self._write("relations.jsonl", relations)
|
|
29
|
+
self._write("decisions.jsonl", decisions)
|
|
30
|
+
self._write("rationales.jsonl", rationales or [])
|
|
31
|
+
self._write_dicts("rejected.jsonl", rejected)
|
|
32
|
+
|
|
33
|
+
def entities(self) -> list[SilverEntity]:
|
|
34
|
+
return self._read("entities.jsonl", SilverEntity)
|
|
35
|
+
|
|
36
|
+
def relations(self) -> list[SilverRelation]:
|
|
37
|
+
return self._read("relations.jsonl", SilverRelation)
|
|
38
|
+
|
|
39
|
+
def decisions(self) -> list[SilverDecision]:
|
|
40
|
+
return self._read("decisions.jsonl", SilverDecision)
|
|
41
|
+
|
|
42
|
+
def rationales(self) -> list[SilverRationale]:
|
|
43
|
+
return self._read("rationales.jsonl", SilverRationale)
|
|
44
|
+
|
|
45
|
+
def _write(self, name: str, rows: list[BaseModel]) -> None:
|
|
46
|
+
with (self.root / name).open("w", encoding="utf-8") as fh:
|
|
47
|
+
for row in rows:
|
|
48
|
+
fh.write(row.model_dump_json() + "\n")
|
|
49
|
+
|
|
50
|
+
def _write_dicts(self, name: str, rows: list[dict]) -> None:
|
|
51
|
+
with (self.root / name).open("w", encoding="utf-8") as fh:
|
|
52
|
+
for row in rows:
|
|
53
|
+
fh.write(json.dumps(row, ensure_ascii=False) + "\n")
|
|
54
|
+
|
|
55
|
+
def _read(self, name: str, model: type[T]) -> list[T]:
|
|
56
|
+
path = self.root / name
|
|
57
|
+
if not path.exists():
|
|
58
|
+
return []
|
|
59
|
+
return [model.model_validate_json(line) for line in path.read_text(encoding="utf-8").splitlines() if line.strip()]
|
ame/storage/custom_kg.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from ame.bronze.schema import BronzeDocument
|
|
4
|
+
from ame.gold.schema import GoldEdge, GoldNode
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def to_custom_kg(nodes: list[GoldNode], edges: list[GoldEdge], chunks: list[BronzeDocument] | None = None) -> dict:
|
|
8
|
+
return {
|
|
9
|
+
"chunks": [
|
|
10
|
+
{"content": chunk.content, "source_id": chunk.id}
|
|
11
|
+
for chunk in chunks or []
|
|
12
|
+
],
|
|
13
|
+
"entities": [
|
|
14
|
+
{
|
|
15
|
+
"entity_name": node.name,
|
|
16
|
+
"entity_type": node.type,
|
|
17
|
+
"description": node.name,
|
|
18
|
+
"source_id": node.source_ids[0] if node.source_ids else "",
|
|
19
|
+
}
|
|
20
|
+
for node in nodes
|
|
21
|
+
],
|
|
22
|
+
"relationships": [
|
|
23
|
+
{
|
|
24
|
+
"src_id": edge.source,
|
|
25
|
+
"tgt_id": edge.target,
|
|
26
|
+
"description": edge.relation,
|
|
27
|
+
"keywords": edge.relation,
|
|
28
|
+
"weight": edge.weight,
|
|
29
|
+
"source_id": edge.source_ids[0] if edge.source_ids else "",
|
|
30
|
+
}
|
|
31
|
+
for edge in edges
|
|
32
|
+
],
|
|
33
|
+
}
|