grounded-memory 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gmem/__init__.py +35 -0
- gmem/service.py +5 -0
- grounded_memory/__init__.py +114 -0
- grounded_memory/adapters/__init__.py +46 -0
- grounded_memory/adapters/base_agent.py +55 -0
- grounded_memory/adapters/discovery.py +251 -0
- grounded_memory/adapters/generic_agent.py +783 -0
- grounded_memory/adapters/healthcare/__init__.py +95 -0
- grounded_memory/adapters/healthcare/agent.py +121 -0
- grounded_memory/adapters/healthcare/constraints.py +471 -0
- grounded_memory/adapters/healthcare/extractor.py +424 -0
- grounded_memory/adapters/healthcare/kb_manager.py +207 -0
- grounded_memory/adapters/healthcare/knowledge.py +403 -0
- grounded_memory/adapters/healthcare/lifecycle.py +138 -0
- grounded_memory/adapters/healthcare/loaders/__init__.py +16 -0
- grounded_memory/adapters/healthcare/loaders/cache.py +219 -0
- grounded_memory/adapters/healthcare/loaders/openfda.py +172 -0
- grounded_memory/adapters/healthcare/loaders/rxnorm.py +206 -0
- grounded_memory/adapters/healthcare/models.py +129 -0
- grounded_memory/adapters/healthcare/retrieval.py +788 -0
- grounded_memory/adapters/identity_service.py +103 -0
- grounded_memory/adapters/registry.py +215 -0
- grounded_memory/adapters/result.py +20 -0
- grounded_memory/adapters/seeds.py +322 -0
- grounded_memory/configs/engineering_constraints.yaml +187 -0
- grounded_memory/configs/finance_constraints.yaml +175 -0
- grounded_memory/configs/generic_constraints.yaml +174 -0
- grounded_memory/configs/healthcare_constraints.yaml +282 -0
- grounded_memory/configs/healthcare_kb.yaml +126 -0
- grounded_memory/configs/legal_constraints.yaml +182 -0
- grounded_memory/configs/llm_config.yaml +50 -0
- grounded_memory/configs/neo4j_config.yaml +44 -0
- grounded_memory/core/__init__.py +122 -0
- grounded_memory/core/conflict_resolution.py +323 -0
- grounded_memory/core/constraints.py +1022 -0
- grounded_memory/core/entity_identity.py +62 -0
- grounded_memory/core/grounding.py +722 -0
- grounded_memory/core/hybrid_store.py +342 -0
- grounded_memory/core/intent.py +342 -0
- grounded_memory/core/models.py +474 -0
- grounded_memory/core/neo4j_store.py +1124 -0
- grounded_memory/core/postgres_hybrid_store.py +264 -0
- grounded_memory/core/postgres_store.py +1505 -0
- grounded_memory/core/store.py +548 -0
- grounded_memory/core/system.py +133 -0
- grounded_memory/core/tuple_normalization.py +176 -0
- grounded_memory/llm/__init__.py +52 -0
- grounded_memory/llm/client.py +558 -0
- grounded_memory/llm/extractor.py +72 -0
- grounded_memory/llm/prompts.py +437 -0
- grounded_memory/logging_utils.py +60 -0
- grounded_memory/memory.py +2267 -0
- grounded_memory/retrieval/__init__.py +17 -0
- grounded_memory/retrieval/graph.py +1474 -0
- grounded_memory/service/__init__.py +5 -0
- grounded_memory/service/app.py +331 -0
- grounded_memory/service/models.py +130 -0
- grounded_memory/system.py +106 -0
- grounded_memory-0.1.0.dist-info/METADATA +305 -0
- grounded_memory-0.1.0.dist-info/RECORD +63 -0
- grounded_memory-0.1.0.dist-info/WHEEL +5 -0
- grounded_memory-0.1.0.dist-info/licenses/LICENSE +21 -0
- grounded_memory-0.1.0.dist-info/top_level.txt +2 -0
gmem/__init__.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""gmem public facade package.
|
|
2
|
+
|
|
3
|
+
This package provides a compact import surface while reusing the
|
|
4
|
+
`grounded_memory` implementation.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from grounded_memory import (
|
|
8
|
+
ConstraintValidator,
|
|
9
|
+
GroundedMemorySystem,
|
|
10
|
+
GroundingOperator,
|
|
11
|
+
LLMConfig,
|
|
12
|
+
Memory,
|
|
13
|
+
MemoryStore,
|
|
14
|
+
__version__,
|
|
15
|
+
configure_logging,
|
|
16
|
+
create_app,
|
|
17
|
+
list_registered_adapters,
|
|
18
|
+
register_adapter,
|
|
19
|
+
unregister_adapter,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"__version__",
|
|
24
|
+
"Memory",
|
|
25
|
+
"GroundedMemorySystem",
|
|
26
|
+
"ConstraintValidator",
|
|
27
|
+
"GroundingOperator",
|
|
28
|
+
"MemoryStore",
|
|
29
|
+
"LLMConfig",
|
|
30
|
+
"create_app",
|
|
31
|
+
"configure_logging",
|
|
32
|
+
"list_registered_adapters",
|
|
33
|
+
"register_adapter",
|
|
34
|
+
"unregister_adapter",
|
|
35
|
+
]
|
gmem/service.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Grounded Memory System - Core Module
|
|
3
|
+
|
|
4
|
+
A correctness-first memory architecture for LLM agents.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from grounded_memory.adapters.discovery import (
|
|
8
|
+
ConstraintSeedDiscoverer,
|
|
9
|
+
DiscoveredConstraintSeed,
|
|
10
|
+
)
|
|
11
|
+
from grounded_memory.adapters.generic_agent import (
|
|
12
|
+
GenericMemoryAgent,
|
|
13
|
+
GenericProcessingResult,
|
|
14
|
+
)
|
|
15
|
+
from grounded_memory.adapters.registry import (
|
|
16
|
+
get_adapter_spec,
|
|
17
|
+
get_adapter_spec_by_key,
|
|
18
|
+
list_registered_adapters,
|
|
19
|
+
list_supported_profiles,
|
|
20
|
+
register_adapter,
|
|
21
|
+
register_adapter_spec,
|
|
22
|
+
unregister_adapter,
|
|
23
|
+
unregister_adapter_spec,
|
|
24
|
+
)
|
|
25
|
+
from grounded_memory.adapters.seeds import (
|
|
26
|
+
CardinalitySeedConstraintEvaluator,
|
|
27
|
+
SeedConstraintEvaluator,
|
|
28
|
+
TemporalCardinalitySeedConstraintEvaluator,
|
|
29
|
+
)
|
|
30
|
+
from grounded_memory.core.constraints import (
|
|
31
|
+
ConstraintValidator,
|
|
32
|
+
ConstraintViolation,
|
|
33
|
+
ValidationResult,
|
|
34
|
+
)
|
|
35
|
+
from grounded_memory.core.grounding import GroundingOperator, GroundingResult
|
|
36
|
+
from grounded_memory.core.models import (
|
|
37
|
+
CandidateFact,
|
|
38
|
+
Constraint,
|
|
39
|
+
Entity,
|
|
40
|
+
Interaction,
|
|
41
|
+
MemoryDisposition,
|
|
42
|
+
RelationType,
|
|
43
|
+
ValidatedFact,
|
|
44
|
+
)
|
|
45
|
+
from grounded_memory.core.store import MemoryStore
|
|
46
|
+
from grounded_memory.core.system import GroundedMemorySystem as CoreGroundedMemorySystem
|
|
47
|
+
from grounded_memory.llm.client import LLMClient, LLMConfig, SyncLLMClient
|
|
48
|
+
from grounded_memory.llm.extractor import LLMFactExtractor
|
|
49
|
+
from grounded_memory.logging_utils import configure_logging
|
|
50
|
+
from grounded_memory.memory import (
|
|
51
|
+
Memory,
|
|
52
|
+
OptimizationProfile,
|
|
53
|
+
OptimizationSettings,
|
|
54
|
+
SearchResult,
|
|
55
|
+
)
|
|
56
|
+
from grounded_memory.service import app as service_app
|
|
57
|
+
from grounded_memory.service import create_app
|
|
58
|
+
from grounded_memory.system import GroundedMemorySystem
|
|
59
|
+
|
|
60
|
+
LLM_AVAILABLE = True
|
|
61
|
+
|
|
62
|
+
__version__ = "0.1.0"
|
|
63
|
+
|
|
64
|
+
__all__ = [
|
|
65
|
+
# Core Models
|
|
66
|
+
"Interaction",
|
|
67
|
+
"Entity",
|
|
68
|
+
"CandidateFact",
|
|
69
|
+
"ValidatedFact",
|
|
70
|
+
"Constraint",
|
|
71
|
+
"MemoryDisposition",
|
|
72
|
+
"RelationType",
|
|
73
|
+
# Grounding
|
|
74
|
+
"GroundingOperator",
|
|
75
|
+
"GroundingResult",
|
|
76
|
+
# Store
|
|
77
|
+
"MemoryStore",
|
|
78
|
+
"GroundedMemorySystem",
|
|
79
|
+
"CoreGroundedMemorySystem",
|
|
80
|
+
# Constraints
|
|
81
|
+
"ConstraintValidator",
|
|
82
|
+
"ValidationResult",
|
|
83
|
+
"ConstraintViolation",
|
|
84
|
+
# LLM (if available)
|
|
85
|
+
"LLM_AVAILABLE",
|
|
86
|
+
"LLMConfig",
|
|
87
|
+
"SyncLLMClient",
|
|
88
|
+
"LLMClient",
|
|
89
|
+
"LLMFactExtractor",
|
|
90
|
+
# SDK facade
|
|
91
|
+
"Memory",
|
|
92
|
+
"SearchResult",
|
|
93
|
+
"OptimizationProfile",
|
|
94
|
+
"OptimizationSettings",
|
|
95
|
+
# Adapter registry APIs
|
|
96
|
+
"SeedConstraintEvaluator",
|
|
97
|
+
"CardinalitySeedConstraintEvaluator",
|
|
98
|
+
"TemporalCardinalitySeedConstraintEvaluator",
|
|
99
|
+
"DiscoveredConstraintSeed",
|
|
100
|
+
"ConstraintSeedDiscoverer",
|
|
101
|
+
"GenericMemoryAgent",
|
|
102
|
+
"GenericProcessingResult",
|
|
103
|
+
"configure_logging",
|
|
104
|
+
"create_app",
|
|
105
|
+
"service_app",
|
|
106
|
+
"list_registered_adapters",
|
|
107
|
+
"get_adapter_spec_by_key",
|
|
108
|
+
"register_adapter",
|
|
109
|
+
"unregister_adapter",
|
|
110
|
+
"list_supported_profiles",
|
|
111
|
+
"get_adapter_spec",
|
|
112
|
+
"register_adapter_spec",
|
|
113
|
+
"unregister_adapter_spec",
|
|
114
|
+
]
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Adapter packages for memory governance and agent wiring."""
|
|
2
|
+
|
|
3
|
+
from grounded_memory.adapters.discovery import (
|
|
4
|
+
ConstraintSeedDiscoverer,
|
|
5
|
+
DiscoveredConstraintSeed,
|
|
6
|
+
)
|
|
7
|
+
from grounded_memory.adapters.generic_agent import (
|
|
8
|
+
GenericMemoryAgent,
|
|
9
|
+
GenericProcessingResult,
|
|
10
|
+
)
|
|
11
|
+
from grounded_memory.adapters.registry import (
|
|
12
|
+
AdapterSpec,
|
|
13
|
+
get_adapter_spec,
|
|
14
|
+
get_adapter_spec_by_key,
|
|
15
|
+
list_registered_adapters,
|
|
16
|
+
# Backward-compatible exports
|
|
17
|
+
list_supported_profiles,
|
|
18
|
+
register_adapter,
|
|
19
|
+
register_adapter_spec,
|
|
20
|
+
unregister_adapter,
|
|
21
|
+
unregister_adapter_spec,
|
|
22
|
+
)
|
|
23
|
+
from grounded_memory.adapters.seeds import (
|
|
24
|
+
CardinalitySeedConstraintEvaluator,
|
|
25
|
+
SeedConstraintEvaluator,
|
|
26
|
+
TemporalCardinalitySeedConstraintEvaluator,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"SeedConstraintEvaluator",
|
|
31
|
+
"CardinalitySeedConstraintEvaluator",
|
|
32
|
+
"TemporalCardinalitySeedConstraintEvaluator",
|
|
33
|
+
"DiscoveredConstraintSeed",
|
|
34
|
+
"ConstraintSeedDiscoverer",
|
|
35
|
+
"AdapterSpec",
|
|
36
|
+
"GenericMemoryAgent",
|
|
37
|
+
"GenericProcessingResult",
|
|
38
|
+
"list_registered_adapters",
|
|
39
|
+
"get_adapter_spec_by_key",
|
|
40
|
+
"register_adapter",
|
|
41
|
+
"unregister_adapter",
|
|
42
|
+
"list_supported_profiles",
|
|
43
|
+
"get_adapter_spec",
|
|
44
|
+
"register_adapter_spec",
|
|
45
|
+
"unregister_adapter_spec",
|
|
46
|
+
]
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Shared adapter agent primitives."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BaseAsyncAdapterAgent(ABC):
|
|
11
|
+
"""Base class that provides a sync wrapper for async adapter agents."""
|
|
12
|
+
|
|
13
|
+
@abstractmethod
|
|
14
|
+
async def process_interaction(
|
|
15
|
+
self,
|
|
16
|
+
raw_text: str,
|
|
17
|
+
user_id: str | None = None,
|
|
18
|
+
session_id: str | None = None,
|
|
19
|
+
actor: str = "user",
|
|
20
|
+
metadata: dict[str, Any] | None = None,
|
|
21
|
+
**kwargs: Any,
|
|
22
|
+
) -> Any:
|
|
23
|
+
"""Process an interaction asynchronously."""
|
|
24
|
+
|
|
25
|
+
def process(self, input_text: str, source: str = "user", **kwargs: Any) -> Any:
|
|
26
|
+
"""Backward-compatible synchronous wrapper used by demos and SDK calls."""
|
|
27
|
+
metadata = kwargs.pop("metadata", None)
|
|
28
|
+
user_id = kwargs.pop("user_id", None)
|
|
29
|
+
session_id = kwargs.pop("session_id", None)
|
|
30
|
+
|
|
31
|
+
extra_metadata = {k: v for k, v in kwargs.items() if v is not None}
|
|
32
|
+
if metadata is None:
|
|
33
|
+
metadata = extra_metadata
|
|
34
|
+
elif isinstance(metadata, dict):
|
|
35
|
+
metadata = {**metadata, **extra_metadata}
|
|
36
|
+
|
|
37
|
+
actor = source.strip().lower() if isinstance(source, str) else "user"
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
asyncio.get_running_loop()
|
|
41
|
+
except RuntimeError:
|
|
42
|
+
return asyncio.run(
|
|
43
|
+
self.process_interaction(
|
|
44
|
+
raw_text=input_text,
|
|
45
|
+
user_id=user_id,
|
|
46
|
+
session_id=session_id,
|
|
47
|
+
actor=actor,
|
|
48
|
+
metadata=metadata,
|
|
49
|
+
)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
raise RuntimeError(
|
|
53
|
+
f"{self.__class__.__name__}.process() cannot be used while an event loop is running. "
|
|
54
|
+
"Await process_interaction(...) instead."
|
|
55
|
+
)
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"""Autonomous mining and synthesis of dynamic constraint seeds."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class DiscoveredConstraintSeed:
|
|
11
|
+
"""Synthesized seed proposal mined from governance signals."""
|
|
12
|
+
|
|
13
|
+
constraint_id: str
|
|
14
|
+
name: str
|
|
15
|
+
description: str
|
|
16
|
+
relation_types: list[str]
|
|
17
|
+
required_attribute_keys: list[str]
|
|
18
|
+
require_value: bool
|
|
19
|
+
confidence: float
|
|
20
|
+
evidence_count: int
|
|
21
|
+
mining_rule: str
|
|
22
|
+
|
|
23
|
+
def as_dict(self) -> dict[str, Any]:
|
|
24
|
+
return {
|
|
25
|
+
"constraint_id": self.constraint_id,
|
|
26
|
+
"name": self.name,
|
|
27
|
+
"description": self.description,
|
|
28
|
+
"relation_types": list(self.relation_types),
|
|
29
|
+
"required_attribute_keys": list(self.required_attribute_keys),
|
|
30
|
+
"require_value": self.require_value,
|
|
31
|
+
"confidence": self.confidence,
|
|
32
|
+
"evidence_count": self.evidence_count,
|
|
33
|
+
"mining_rule": self.mining_rule,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ConstraintSeedDiscoverer:
|
|
38
|
+
"""Mine validation/rejection signals and synthesize candidate seed constraints."""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
*,
|
|
43
|
+
min_samples_per_relation: int = 20,
|
|
44
|
+
min_rejections_per_relation: int = 6,
|
|
45
|
+
min_gap: float = 0.35,
|
|
46
|
+
min_gap_mode: str = "fixed",
|
|
47
|
+
min_gap_floor: float = 0.15,
|
|
48
|
+
min_gap_ceiling: float = 0.60,
|
|
49
|
+
target_false_block_rate: float = 0.10,
|
|
50
|
+
max_suggestions: int = 20,
|
|
51
|
+
) -> None:
|
|
52
|
+
self.min_samples_per_relation = min_samples_per_relation
|
|
53
|
+
self.min_rejections_per_relation = min_rejections_per_relation
|
|
54
|
+
self.min_gap = min_gap
|
|
55
|
+
self.min_gap_mode = min_gap_mode.strip().lower()
|
|
56
|
+
self.min_gap_floor = min_gap_floor
|
|
57
|
+
self.min_gap_ceiling = min_gap_ceiling
|
|
58
|
+
self.target_false_block_rate = target_false_block_rate
|
|
59
|
+
self.max_suggestions = max_suggestions
|
|
60
|
+
|
|
61
|
+
def discover(
|
|
62
|
+
self,
|
|
63
|
+
*,
|
|
64
|
+
validation_signals: list[dict[str, Any]],
|
|
65
|
+
existing_constraint_ids: set[str] | None = None,
|
|
66
|
+
) -> list[DiscoveredConstraintSeed]:
|
|
67
|
+
existing_ids = existing_constraint_ids or set()
|
|
68
|
+
grouped: dict[str, list[dict[str, Any]]] = {}
|
|
69
|
+
|
|
70
|
+
for signal in validation_signals:
|
|
71
|
+
relation = str(signal.get("relation") or "").strip()
|
|
72
|
+
if not relation:
|
|
73
|
+
continue
|
|
74
|
+
grouped.setdefault(relation, []).append(signal)
|
|
75
|
+
|
|
76
|
+
suggestions: list[DiscoveredConstraintSeed] = []
|
|
77
|
+
|
|
78
|
+
for relation, rows in grouped.items():
|
|
79
|
+
if len(rows) < self.min_samples_per_relation:
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
rejected = [row for row in rows if not bool(row.get("is_valid", True))]
|
|
83
|
+
accepted = [row for row in rows if bool(row.get("is_valid", True))]
|
|
84
|
+
if len(rejected) < self.min_rejections_per_relation:
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
effective_min_gap = self._resolve_min_gap(
|
|
88
|
+
total_samples=len(rows),
|
|
89
|
+
rejected_samples=len(rejected),
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
suggestion = self._synthesize_require_value_seed(
|
|
93
|
+
relation=relation,
|
|
94
|
+
rejected=rejected,
|
|
95
|
+
accepted=accepted,
|
|
96
|
+
existing_ids=existing_ids,
|
|
97
|
+
min_gap=effective_min_gap,
|
|
98
|
+
)
|
|
99
|
+
if suggestion is not None:
|
|
100
|
+
suggestions.append(suggestion)
|
|
101
|
+
existing_ids.add(suggestion.constraint_id)
|
|
102
|
+
|
|
103
|
+
suggestions.extend(
|
|
104
|
+
self._synthesize_required_attribute_key_seeds(
|
|
105
|
+
relation=relation,
|
|
106
|
+
rejected=rejected,
|
|
107
|
+
accepted=accepted,
|
|
108
|
+
existing_ids=existing_ids,
|
|
109
|
+
min_gap=effective_min_gap,
|
|
110
|
+
)
|
|
111
|
+
)
|
|
112
|
+
for seed in suggestions:
|
|
113
|
+
existing_ids.add(seed.constraint_id)
|
|
114
|
+
|
|
115
|
+
suggestions.sort(
|
|
116
|
+
key=lambda seed: (seed.confidence, seed.evidence_count),
|
|
117
|
+
reverse=True,
|
|
118
|
+
)
|
|
119
|
+
return suggestions[: self.max_suggestions]
|
|
120
|
+
|
|
121
|
+
def _synthesize_require_value_seed(
|
|
122
|
+
self,
|
|
123
|
+
*,
|
|
124
|
+
relation: str,
|
|
125
|
+
rejected: list[dict[str, Any]],
|
|
126
|
+
accepted: list[dict[str, Any]],
|
|
127
|
+
existing_ids: set[str],
|
|
128
|
+
min_gap: float,
|
|
129
|
+
) -> DiscoveredConstraintSeed | None:
|
|
130
|
+
rejected_missing_value = sum(
|
|
131
|
+
1
|
|
132
|
+
for row in rejected
|
|
133
|
+
if not bool(row.get("has_value", False))
|
|
134
|
+
)
|
|
135
|
+
accepted_missing_value = sum(
|
|
136
|
+
1
|
|
137
|
+
for row in accepted
|
|
138
|
+
if not bool(row.get("has_value", False))
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
rejected_rate = rejected_missing_value / max(len(rejected), 1)
|
|
142
|
+
accepted_rate = accepted_missing_value / max(len(accepted), 1)
|
|
143
|
+
gap = rejected_rate - accepted_rate
|
|
144
|
+
|
|
145
|
+
if rejected_missing_value == 0 or gap < min_gap:
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
constraint_id = f"seed_auto_require_value_{relation.lower()}"
|
|
149
|
+
if constraint_id in existing_ids:
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
confidence = min(0.99, max(0.5, gap * 0.8 + 0.2))
|
|
153
|
+
return DiscoveredConstraintSeed(
|
|
154
|
+
constraint_id=constraint_id,
|
|
155
|
+
name=f"Auto-discovered value requirement ({relation})",
|
|
156
|
+
description=(
|
|
157
|
+
f"Mined from governance signals: relation {relation} has elevated rejection "
|
|
158
|
+
"rate when candidate value is missing"
|
|
159
|
+
),
|
|
160
|
+
relation_types=[relation],
|
|
161
|
+
required_attribute_keys=[],
|
|
162
|
+
require_value=True,
|
|
163
|
+
confidence=round(confidence, 4),
|
|
164
|
+
evidence_count=rejected_missing_value,
|
|
165
|
+
mining_rule=f"missing_value_gap(min_gap={min_gap:.3f})",
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def _synthesize_required_attribute_key_seeds(
|
|
169
|
+
self,
|
|
170
|
+
*,
|
|
171
|
+
relation: str,
|
|
172
|
+
rejected: list[dict[str, Any]],
|
|
173
|
+
accepted: list[dict[str, Any]],
|
|
174
|
+
existing_ids: set[str],
|
|
175
|
+
min_gap: float,
|
|
176
|
+
) -> list[DiscoveredConstraintSeed]:
|
|
177
|
+
attribute_keys: set[str] = set()
|
|
178
|
+
for row in rejected + accepted:
|
|
179
|
+
attrs = row.get("candidate_attributes") or {}
|
|
180
|
+
if isinstance(attrs, dict):
|
|
181
|
+
attribute_keys.update(str(key) for key in attrs.keys())
|
|
182
|
+
|
|
183
|
+
seeds: list[DiscoveredConstraintSeed] = []
|
|
184
|
+
for key in sorted(attribute_keys):
|
|
185
|
+
rejected_missing = 0
|
|
186
|
+
for row in rejected:
|
|
187
|
+
attrs = row.get("candidate_attributes") or {}
|
|
188
|
+
value = attrs.get(key) if isinstance(attrs, dict) else None
|
|
189
|
+
if value in (None, ""):
|
|
190
|
+
rejected_missing += 1
|
|
191
|
+
|
|
192
|
+
accepted_missing = 0
|
|
193
|
+
for row in accepted:
|
|
194
|
+
attrs = row.get("candidate_attributes") or {}
|
|
195
|
+
value = attrs.get(key) if isinstance(attrs, dict) else None
|
|
196
|
+
if value in (None, ""):
|
|
197
|
+
accepted_missing += 1
|
|
198
|
+
|
|
199
|
+
rejected_rate = rejected_missing / max(len(rejected), 1)
|
|
200
|
+
accepted_rate = accepted_missing / max(len(accepted), 1)
|
|
201
|
+
gap = rejected_rate - accepted_rate
|
|
202
|
+
|
|
203
|
+
if rejected_missing == 0 or gap < min_gap:
|
|
204
|
+
continue
|
|
205
|
+
|
|
206
|
+
constraint_id = (
|
|
207
|
+
f"seed_auto_require_attr_{relation.lower()}_{key.lower().replace(' ', '_')}"
|
|
208
|
+
)
|
|
209
|
+
if constraint_id in existing_ids:
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
confidence = min(0.99, max(0.5, gap * 0.8 + 0.2))
|
|
213
|
+
seeds.append(
|
|
214
|
+
DiscoveredConstraintSeed(
|
|
215
|
+
constraint_id=constraint_id,
|
|
216
|
+
name=f"Auto-discovered required attribute '{key}' ({relation})",
|
|
217
|
+
description=(
|
|
218
|
+
f"Mined from governance signals: relation {relation} has elevated rejection "
|
|
219
|
+
f"rate when attribute '{key}' is missing"
|
|
220
|
+
),
|
|
221
|
+
relation_types=[relation],
|
|
222
|
+
required_attribute_keys=[key],
|
|
223
|
+
require_value=False,
|
|
224
|
+
confidence=round(confidence, 4),
|
|
225
|
+
evidence_count=rejected_missing,
|
|
226
|
+
mining_rule=f"missing_attribute_gap(min_gap={min_gap:.3f})",
|
|
227
|
+
)
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
seeds.sort(key=lambda seed: (seed.confidence, seed.evidence_count), reverse=True)
|
|
231
|
+
return seeds
|
|
232
|
+
|
|
233
|
+
def _resolve_min_gap(self, *, total_samples: int, rejected_samples: int) -> float:
|
|
234
|
+
if self.min_gap_mode == "fixed":
|
|
235
|
+
return float(self.min_gap)
|
|
236
|
+
|
|
237
|
+
if self.min_gap_mode != "adaptive":
|
|
238
|
+
raise ValueError("min_gap_mode must be 'fixed' or 'adaptive'")
|
|
239
|
+
|
|
240
|
+
n = max(total_samples, 1)
|
|
241
|
+
rejection_rate = rejected_samples / n
|
|
242
|
+
uncertainty_penalty = 1.0 / (n ** 0.5)
|
|
243
|
+
strictness_boost = 0.10 if rejection_rate >= 0.50 else 0.0
|
|
244
|
+
|
|
245
|
+
adaptive_gap = (
|
|
246
|
+
self.target_false_block_rate
|
|
247
|
+
+ 0.15
|
|
248
|
+
+ uncertainty_penalty
|
|
249
|
+
+ strictness_boost
|
|
250
|
+
)
|
|
251
|
+
return max(self.min_gap_floor, min(self.min_gap_ceiling, adaptive_gap))
|