abstractsemantics 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,26 @@
1
+ Metadata-Version: 2.4
2
+ Name: abstractsemantics
3
+ Version: 0.0.1
4
+ Summary: Semantics registry (predicates/types) for AbstractFramework.
5
+ Author: Laurent-Philippe Albou
6
+ License: MIT
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: PyYAML>=6.0
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
12
+
13
+ # AbstractSemantics
14
+
15
+ Central, editable semantics registry for AbstractFramework.
16
+
17
+ This package intentionally contains **definitions**, not storage:
18
+ - prefix mappings (CURIE namespaces)
19
+ - predicate allowlists (and optional inverses/constraints)
20
+ - entity-type allowlists (optional in v0)
21
+
22
+ It is designed to be consumed by:
23
+ - AbstractRuntime (validation at ingestion boundary)
24
+ - AbstractFlow (UI dropdowns + authoring support)
25
+ - AbstractMemory (storage/query on top of validated semantics)
26
+
@@ -0,0 +1,14 @@
1
+ # AbstractSemantics
2
+
3
+ Central, editable semantics registry for AbstractFramework.
4
+
5
+ This package intentionally contains **definitions**, not storage:
6
+ - prefix mappings (CURIE namespaces)
7
+ - predicate allowlists (and optional inverses/constraints)
8
+ - entity-type allowlists (optional in v0)
9
+
10
+ It is designed to be consumed by:
11
+ - AbstractRuntime (validation at ingestion boundary)
12
+ - AbstractFlow (UI dropdowns + authoring support)
13
+ - AbstractMemory (storage/query on top of validated semantics)
14
+
@@ -0,0 +1,26 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "abstractsemantics"
7
+ version = "0.0.1"
8
+ description = "Semantics registry (predicates/types) for AbstractFramework."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "Laurent-Philippe Albou" }]
13
+ dependencies = [
14
+ "PyYAML>=6.0",
15
+ ]
16
+
17
+ [project.optional-dependencies]
18
+ dev = ["pytest>=7.0.0"]
19
+
20
+ [tool.setuptools.packages.find]
21
+ where = ["src"]
22
+ include = ["abstractsemantics*"]
23
+ exclude = ["tests*"]
24
+
25
+ [tool.setuptools.package-data]
26
+ abstractsemantics = ["*.yaml", "*.yml"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,19 @@
1
+ from .registry import (
2
+ SemanticsRegistry,
3
+ load_semantics_registry,
4
+ resolve_semantics_registry_path,
5
+ )
6
+ from .schema import (
7
+ KG_ASSERTION_SCHEMA_REF_V0,
8
+ build_kg_assertion_schema_v0,
9
+ resolve_schema_ref,
10
+ )
11
+
12
+ __all__ = [
13
+ "SemanticsRegistry",
14
+ "load_semantics_registry",
15
+ "resolve_semantics_registry_path",
16
+ "KG_ASSERTION_SCHEMA_REF_V0",
17
+ "build_kg_assertion_schema_v0",
18
+ "resolve_schema_ref",
19
+ ]
@@ -0,0 +1,125 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import Any, Dict, List, Optional, Sequence
7
+
8
+ import yaml
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class PredicateDef:
13
+ id: str
14
+ label: Optional[str] = None
15
+ inverse: Optional[str] = None
16
+ description: Optional[str] = None
17
+
18
+
19
+ @dataclass(frozen=True)
20
+ class EntityTypeDef:
21
+ id: str
22
+ label: Optional[str] = None
23
+ parent: Optional[str] = None
24
+ description: Optional[str] = None
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class SemanticsRegistry:
29
+ version: int
30
+ prefixes: Dict[str, str]
31
+ predicates: List[PredicateDef]
32
+ entity_types: List[EntityTypeDef]
33
+
34
+ def predicate_ids(self) -> set[str]:
35
+ return {p.id for p in self.predicates if isinstance(p.id, str) and p.id.strip()}
36
+
37
+ def entity_type_ids(self) -> set[str]:
38
+ return {t.id for t in self.entity_types if isinstance(t.id, str) and t.id.strip()}
39
+
40
+
41
+ def resolve_semantics_registry_path() -> Path:
42
+ """Resolve the registry YAML path.
43
+
44
+ Env override:
45
+ - ABSTRACTSEMANTICS_REGISTRY_PATH
46
+ """
47
+ raw = os.getenv("ABSTRACTSEMANTICS_REGISTRY_PATH")
48
+ if isinstance(raw, str) and raw.strip():
49
+ p = Path(raw).expanduser().resolve()
50
+ if not p.exists():
51
+ raise FileNotFoundError(f"ABSTRACTSEMANTICS_REGISTRY_PATH does not exist: {p}")
52
+ return p
53
+ return Path(__file__).with_name("semantics.yaml")
54
+
55
+
56
+ def _as_list(value: Any) -> list:
57
+ return list(value) if isinstance(value, list) else []
58
+
59
+
60
+ def _load_yaml(path: Path) -> Dict[str, Any]:
61
+ raw = path.read_text(encoding="utf-8")
62
+ data = yaml.safe_load(raw)
63
+ return data if isinstance(data, dict) else {}
64
+
65
+
66
+ def load_semantics_registry(path: Path | None = None) -> SemanticsRegistry:
67
+ p = path or resolve_semantics_registry_path()
68
+ data = _load_yaml(p)
69
+
70
+ version_raw = data.get("version", 0)
71
+ try:
72
+ version = int(version_raw)
73
+ except Exception:
74
+ version = 0
75
+
76
+ prefixes_raw = data.get("prefixes")
77
+ prefixes: Dict[str, str] = {}
78
+ if isinstance(prefixes_raw, dict):
79
+ for k, v in prefixes_raw.items():
80
+ if isinstance(k, str) and isinstance(v, str) and k.strip() and v.strip():
81
+ prefixes[k.strip()] = v.strip()
82
+
83
+ predicates: list[PredicateDef] = []
84
+ for item in _as_list(data.get("predicates")):
85
+ if not isinstance(item, dict):
86
+ continue
87
+ pid = item.get("id")
88
+ if not isinstance(pid, str) or not pid.strip():
89
+ continue
90
+ predicates.append(
91
+ PredicateDef(
92
+ id=pid.strip(),
93
+ label=item.get("label") if isinstance(item.get("label"), str) else None,
94
+ inverse=item.get("inverse") if isinstance(item.get("inverse"), str) else None,
95
+ description=item.get("description") if isinstance(item.get("description"), str) else None,
96
+ )
97
+ )
98
+
99
+ entity_types: list[EntityTypeDef] = []
100
+ for item in _as_list(data.get("entity_types")):
101
+ if not isinstance(item, dict):
102
+ continue
103
+ tid = item.get("id")
104
+ if not isinstance(tid, str) or not tid.strip():
105
+ continue
106
+ entity_types.append(
107
+ EntityTypeDef(
108
+ id=tid.strip(),
109
+ label=item.get("label") if isinstance(item.get("label"), str) else None,
110
+ parent=item.get("parent") if isinstance(item.get("parent"), str) else None,
111
+ description=item.get("description") if isinstance(item.get("description"), str) else None,
112
+ )
113
+ )
114
+
115
+ if not predicates:
116
+ raise ValueError(f"Semantics registry has no predicates: {p}")
117
+
118
+ return SemanticsRegistry(
119
+ version=version,
120
+ prefixes=prefixes,
121
+ predicates=predicates,
122
+ entity_types=entity_types,
123
+ )
124
+
125
+
@@ -0,0 +1,136 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, List, Optional, Sequence
4
+
5
+ from .registry import SemanticsRegistry, load_semantics_registry
6
+
7
+ # Stable reference strings for flow-authored schemas.
8
+ #
9
+ # Visual flows may reference these via a `json_schema` literal like:
10
+ # {"$ref": "abstractsemantics:kg_assertion_schema_v0"}
11
+ # The runtime resolves them into a concrete JSON Schema dict at execution time.
12
+ KG_ASSERTION_SCHEMA_REF_V0 = "abstractsemantics:kg_assertion_schema_v0"
13
+
14
+
15
+ # Small, deterministic alias set for predicates that LLMs tend to emit by default.
16
+ #
17
+ # These are *not* part of the canonical semantics registry. Prefer keeping
18
+ # structured-output enums canonical (so the model is forced to pick from the
19
+ # agreed semantics). Alias handling belongs at the ingestion boundary.
20
+ #
21
+ # Keep this list intentionally small to protect model context + reduce confusion.
22
+ KG_PREDICATE_ALIASES_V0: Sequence[str] = (
23
+ "schema:description",
24
+ "schema:creator",
25
+ "schema:hasParent",
26
+ "schema:hasMember",
27
+ "schema:recognizedAs",
28
+ "schema:hasMemorySource",
29
+ "schema:hasPart",
30
+ "schema:isPartOf",
31
+ "dcterms:has_part",
32
+ "dcterms:is_part_of",
33
+ )
34
+
35
+
36
+ def _dedup_preserve_order(values: Sequence[str]) -> list[str]:
37
+ seen: set[str] = set()
38
+ out: list[str] = []
39
+ for v in values:
40
+ if not isinstance(v, str):
41
+ continue
42
+ v2 = v.strip()
43
+ if not v2 or v2 in seen:
44
+ continue
45
+ seen.add(v2)
46
+ out.append(v2)
47
+ return out
48
+
49
+
50
+ def build_kg_assertion_schema_v0(
51
+ registry: Optional[SemanticsRegistry] = None,
52
+ *,
53
+ include_predicate_aliases: bool = False,
54
+ max_assertions: int = 12,
55
+ min_assertions_when_nonempty: int = 3,
56
+ max_evidence_quote_len: int = 160,
57
+ max_original_context_len: int = 280,
58
+ ) -> Dict[str, Any]:
59
+ """Build the structured-output JSON Schema used by the KG extractor workflows.
60
+
61
+ This schema is deliberately small and meant to be stable:
62
+ - `predicate` is restricted to the semantics registry (+ optional aliases).
63
+ - `subject_type` / `object_type` are restricted to the registry entity types.
64
+ - Evidence fields are bounded (short verbatim snippets).
65
+ """
66
+ reg = registry or load_semantics_registry()
67
+
68
+ predicate_ids: List[str] = [p.id for p in reg.predicates if isinstance(p.id, str) and p.id.strip()]
69
+ if include_predicate_aliases:
70
+ predicate_ids = list(predicate_ids) + list(KG_PREDICATE_ALIASES_V0)
71
+ predicate_ids = _dedup_preserve_order(predicate_ids)
72
+
73
+ entity_type_ids: List[str] = [t.id for t in reg.entity_types if isinstance(t.id, str) and t.id.strip()]
74
+ entity_type_ids = _dedup_preserve_order(entity_type_ids)
75
+
76
+ if not predicate_ids:
77
+ raise ValueError("Semantics registry provided no predicate ids")
78
+ if not entity_type_ids:
79
+ raise ValueError("Semantics registry provided no entity type ids")
80
+
81
+ max_assertions2 = max(0, int(max_assertions))
82
+ min_nonempty2 = max(0, int(min_assertions_when_nonempty))
83
+ if max_assertions2 and min_nonempty2 and min_nonempty2 > max_assertions2:
84
+ min_nonempty2 = max_assertions2
85
+
86
+ assertions_schema: Dict[str, Any] = {
87
+ "type": "array",
88
+ "items": {
89
+ "type": "object",
90
+ "properties": {
91
+ "subject": {"type": "string"},
92
+ "predicate": {"type": "string", "enum": predicate_ids},
93
+ "object": {"type": "string"},
94
+ "confidence": {"type": ["number", "null"], "minimum": 0, "maximum": 1},
95
+ "valid_from": {"type": ["string", "null"]},
96
+ "valid_until": {"type": ["string", "null"]},
97
+ "provenance": {"type": ["object", "null"]},
98
+ "attributes": {
99
+ "type": "object",
100
+ "properties": {
101
+ "subject_type": {"type": "string", "enum": entity_type_ids},
102
+ "object_type": {"type": "string", "enum": entity_type_ids},
103
+ "evidence_quote": {"type": "string", "maxLength": int(max_evidence_quote_len)},
104
+ "original_context": {"type": "string", "maxLength": int(max_original_context_len)},
105
+ },
106
+ "required": ["evidence_quote"],
107
+ },
108
+ },
109
+ "required": ["subject", "predicate", "object", "attributes"],
110
+ },
111
+ }
112
+
113
+ if max_assertions2:
114
+ assertions_schema["maxItems"] = max_assertions2
115
+ if min_nonempty2:
116
+ # Either:
117
+ # - empty list (no facts), OR
118
+ # - at least N assertions (avoid low-signal singletons that “technically” validate).
119
+ assertions_schema["anyOf"] = [{"maxItems": 0}, {"minItems": min_nonempty2}]
120
+
121
+ return {
122
+ "type": "object",
123
+ "properties": {
124
+ "assertions": assertions_schema
125
+ },
126
+ "required": ["assertions"],
127
+ }
128
+
129
+
130
+ def resolve_schema_ref(schema: Dict[str, Any]) -> Optional[Dict[str, Any]]:
131
+ """Resolve a schema reference dict to a concrete JSON Schema (if supported)."""
132
+ ref = schema.get("$ref")
133
+ if isinstance(ref, str) and ref.strip():
134
+ if ref.strip() == KG_ASSERTION_SCHEMA_REF_V0:
135
+ return build_kg_assertion_schema_v0()
136
+ return None
@@ -0,0 +1,164 @@
1
+ version: 0
2
+
3
+ prefixes:
4
+ dcterms: "http://purl.org/dc/terms/"
5
+ schema: "https://schema.org/"
6
+ skos: "http://www.w3.org/2004/02/skos/core#"
7
+ cito: "http://purl.org/spar/cito/"
8
+ rdf: "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
9
+ ex: "http://example.org/"
10
+
11
+ # v0 policy: predicate allowlist for ingestion validation + UI dropdowns.
12
+ # Source: docs/guide/semantics/semantic-triple-prompt-v4-optimized.md (preferred predicates)
13
+ predicates:
14
+ - id: "rdf:type"
15
+ label: "type"
16
+ description: "Class membership (entity -> class)."
17
+
18
+ # Structure
19
+ - id: "dcterms:hasPart"
20
+ label: "has part"
21
+ inverse: "dcterms:isPartOf"
22
+ - id: "dcterms:isPartOf"
23
+ label: "is part of"
24
+ inverse: "dcterms:hasPart"
25
+ - id: "schema:nextItem"
26
+ label: "next item"
27
+ inverse: "schema:previousItem"
28
+ - id: "schema:previousItem"
29
+ label: "previous item"
30
+ inverse: "schema:nextItem"
31
+
32
+ # Metadata
33
+ - id: "dcterms:identifier"
34
+ label: "identifier"
35
+ - id: "dcterms:title"
36
+ label: "title"
37
+ - id: "schema:name"
38
+ label: "name"
39
+ description: "Primary label for non-document entities (agents, events, places, etc.)."
40
+ - id: "dcterms:description"
41
+ label: "description"
42
+ - id: "dcterms:abstract"
43
+ label: "abstract"
44
+ - id: "dcterms:created"
45
+ label: "created"
46
+ - id: "dcterms:modified"
47
+ label: "modified"
48
+ - id: "dcterms:creator"
49
+ label: "creator"
50
+ - id: "dcterms:publisher"
51
+ label: "publisher"
52
+ - id: "dcterms:subject"
53
+ label: "subject"
54
+ - id: "schema:genre"
55
+ label: "genre"
56
+
57
+ # Context & anchors
58
+ - id: "schema:about"
59
+ label: "about"
60
+ - id: "schema:mentions"
61
+ label: "mentions"
62
+ - id: "schema:knowsAbout"
63
+ label: "knows about"
64
+ description: "An agent's knowledge/awareness about a topic or entity (stronger than incidental mentions)."
65
+ - id: "dcterms:references"
66
+ label: "references"
67
+ - id: "schema:sameAs"
68
+ label: "same as"
69
+ - id: "schema:location"
70
+ label: "location"
71
+ - id: "schema:temporalCoverage"
72
+ label: "temporal coverage"
73
+
74
+ # Action & participants
75
+ - id: "schema:participant"
76
+ label: "participant"
77
+ - id: "schema:organizer"
78
+ label: "organizer"
79
+ - id: "schema:result"
80
+ label: "result"
81
+ - id: "schema:instrument"
82
+ label: "instrument"
83
+ - id: "schema:startDate"
84
+ label: "start date"
85
+ - id: "schema:endDate"
86
+ label: "end date"
87
+
88
+ # Concepts
89
+ - id: "skos:definition"
90
+ label: "definition"
91
+ - id: "skos:prefLabel"
92
+ label: "preferred label"
93
+ - id: "skos:altLabel"
94
+ label: "alternative label"
95
+ - id: "skos:broader"
96
+ label: "broader"
97
+ - id: "skos:narrower"
98
+ label: "narrower"
99
+ - id: "skos:related"
100
+ label: "related"
101
+ - id: "skos:exactMatch"
102
+ label: "exact match"
103
+ - id: "skos:closeMatch"
104
+ label: "close match"
105
+
106
+ # Evidence
107
+ - id: "cito:supports"
108
+ label: "supports"
109
+ - id: "cito:disagreesWith"
110
+ label: "disagrees with"
111
+ - id: "cito:usesDataFrom"
112
+ label: "uses data from"
113
+ inverse: "cito:providesDataFor"
114
+ - id: "cito:providesDataFor"
115
+ label: "provides data for"
116
+ inverse: "cito:usesDataFrom"
117
+ - id: "cito:extends"
118
+ label: "extends"
119
+ - id: "cito:discusses"
120
+ label: "discusses"
121
+ - id: "cito:confirms"
122
+ label: "confirms"
123
+
124
+ entity_types:
125
+ # Generic "real-world thing" catch-all (includes physical objects).
126
+ - id: "schema:Thing"
127
+ label: "Thing"
128
+
129
+ - id: "dcterms:Text"
130
+ label: "Text"
131
+ - id: "dcterms:Collection"
132
+ label: "Collection"
133
+ - id: "skos:Concept"
134
+ label: "Concept"
135
+ - id: "cito:Claim"
136
+ label: "Claim"
137
+ - id: "schema:Person"
138
+ label: "Person"
139
+ - id: "schema:Organization"
140
+ label: "Organization"
141
+ - id: "schema:ItemList"
142
+ label: "List"
143
+ - id: "schema:SoftwareApplication"
144
+ label: "Software Application"
145
+ - id: "schema:Event"
146
+ label: "Event"
147
+ - id: "schema:Place"
148
+ label: "Place"
149
+ - id: "schema:Country"
150
+ label: "Country"
151
+ parent: "schema:Place"
152
+ - id: "schema:Product"
153
+ label: "Product"
154
+ parent: "schema:Thing"
155
+ - id: "schema:Dataset"
156
+ label: "Dataset"
157
+ - id: "schema:ImageObject"
158
+ label: "Image"
159
+ - id: "schema:VideoObject"
160
+ label: "Video"
161
+ - id: "schema:Table"
162
+ label: "Table"
163
+ - id: "schema:SoftwareSourceCode"
164
+ label: "Code"
@@ -0,0 +1,26 @@
1
+ Metadata-Version: 2.4
2
+ Name: abstractsemantics
3
+ Version: 0.0.1
4
+ Summary: Semantics registry (predicates/types) for AbstractFramework.
5
+ Author: Laurent-Philippe Albou
6
+ License: MIT
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: PyYAML>=6.0
10
+ Provides-Extra: dev
11
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
12
+
13
+ # AbstractSemantics
14
+
15
+ Central, editable semantics registry for AbstractFramework.
16
+
17
+ This package intentionally contains **definitions**, not storage:
18
+ - prefix mappings (CURIE namespaces)
19
+ - predicate allowlists (and optional inverses/constraints)
20
+ - entity-type allowlists (optional in v0)
21
+
22
+ It is designed to be consumed by:
23
+ - AbstractRuntime (validation at ingestion boundary)
24
+ - AbstractFlow (UI dropdowns + authoring support)
25
+ - AbstractMemory (storage/query on top of validated semantics)
26
+
@@ -0,0 +1,13 @@
1
+ README.md
2
+ pyproject.toml
3
+ src/abstractsemantics/__init__.py
4
+ src/abstractsemantics/registry.py
5
+ src/abstractsemantics/schema.py
6
+ src/abstractsemantics/semantics.yaml
7
+ src/abstractsemantics.egg-info/PKG-INFO
8
+ src/abstractsemantics.egg-info/SOURCES.txt
9
+ src/abstractsemantics.egg-info/dependency_links.txt
10
+ src/abstractsemantics.egg-info/requires.txt
11
+ src/abstractsemantics.egg-info/top_level.txt
12
+ tests/test_registry.py
13
+ tests/test_schema.py
@@ -0,0 +1,4 @@
1
+ PyYAML>=6.0
2
+
3
+ [dev]
4
+ pytest>=7.0.0
@@ -0,0 +1 @@
1
+ abstractsemantics
@@ -0,0 +1,11 @@
1
+ from __future__ import annotations
2
+
3
+ from abstractsemantics import load_semantics_registry
4
+
5
+
6
+ def test_load_semantics_registry_has_predicates():
7
+ reg = load_semantics_registry()
8
+ ids = reg.predicate_ids()
9
+ assert "rdf:type" in ids
10
+ assert "dcterms:isPartOf" in ids
11
+
@@ -0,0 +1,39 @@
1
+ from __future__ import annotations
2
+
3
+ from abstractsemantics import (
4
+ KG_ASSERTION_SCHEMA_REF_V0,
5
+ build_kg_assertion_schema_v0,
6
+ load_semantics_registry,
7
+ resolve_schema_ref,
8
+ )
9
+
10
+
11
+ def test_build_kg_assertion_schema_v0_tracks_registry_predicates_and_types() -> None:
12
+ reg = load_semantics_registry()
13
+ schema = build_kg_assertion_schema_v0(registry=reg, include_predicate_aliases=True)
14
+
15
+ pred_enum = schema["properties"]["assertions"]["items"]["properties"]["predicate"]["enum"]
16
+ assert isinstance(pred_enum, list) and pred_enum
17
+ for pid in reg.predicate_ids():
18
+ assert pid in pred_enum
19
+
20
+ type_enum = schema["properties"]["assertions"]["items"]["properties"]["attributes"]["properties"]["subject_type"]["enum"]
21
+ assert isinstance(type_enum, list) and type_enum
22
+ for tid in reg.entity_type_ids():
23
+ assert tid in type_enum
24
+
25
+
26
+ def test_build_kg_assertion_schema_v0_can_disable_aliases() -> None:
27
+ reg = load_semantics_registry()
28
+ schema = build_kg_assertion_schema_v0(registry=reg, include_predicate_aliases=False)
29
+ pred_enum = schema["properties"]["assertions"]["items"]["properties"]["predicate"]["enum"]
30
+ assert "schema:creator" not in pred_enum
31
+ assert "schema:description" not in pred_enum
32
+
33
+
34
+ def test_resolve_schema_ref_returns_concrete_schema() -> None:
35
+ resolved = resolve_schema_ref({"$ref": KG_ASSERTION_SCHEMA_REF_V0})
36
+ assert isinstance(resolved, dict)
37
+ assert resolved.get("type") == "object"
38
+ assert "assertions" in resolved.get("properties", {})
39
+