metaobjects 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaobjects/__init__.py +75 -0
- metaobjects/agent_context/__init__.py +55 -0
- metaobjects/agent_context/_content/README.md +14 -0
- metaobjects/agent_context/_content/servers/csharp.meta.json +5 -0
- metaobjects/agent_context/_content/servers/java.meta.json +5 -0
- metaobjects/agent_context/_content/servers/kotlin.meta.json +5 -0
- metaobjects/agent_context/_content/servers/python.meta.json +5 -0
- metaobjects/agent_context/_content/servers/typescript.meta.json +5 -0
- metaobjects/agent_context/_content/skills/metaobjects-authoring/SKILL.md +301 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/SKILL.md +99 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/references/csharp.md +87 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/references/java.md +94 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/references/kotlin.md +110 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/references/typescript.md +135 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/SKILL.md +148 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/csharp.md +110 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/java.md +108 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/kotlin.md +130 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/python.md +116 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/typescript.md +150 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/SKILL.md +130 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/java.md +96 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/kotlin.md +99 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/react.md +86 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/tanstack.md +119 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/typescript.md +92 -0
- metaobjects/agent_context/_content/skills/metaobjects-verify/SKILL.md +107 -0
- metaobjects/agent_context/_content/skills/metaobjects-verify/references/migration.md +72 -0
- metaobjects/agent_context/_content/templates/always-on.md.mustache +27 -0
- metaobjects/agent_context/assemble.py +133 -0
- metaobjects/agent_context/content_root.py +54 -0
- metaobjects/agent_context/scaffold.py +191 -0
- metaobjects/agent_context/types.py +44 -0
- metaobjects/attr_class_map.py +23 -0
- metaobjects/cli.py +696 -0
- metaobjects/codegen/__init__.py +0 -0
- metaobjects/codegen/config.py +11 -0
- metaobjects/codegen/constants.py +13 -0
- metaobjects/codegen/extract_delegate_emitter.py +384 -0
- metaobjects/codegen/extract_schema_emitter.py +139 -0
- metaobjects/codegen/format.py +31 -0
- metaobjects/codegen/fr010_field_mapping.py +220 -0
- metaobjects/codegen/generator.py +62 -0
- metaobjects/codegen/generator_registry.py +163 -0
- metaobjects/codegen/generators/__init__.py +0 -0
- metaobjects/codegen/generators/entity_model.py +263 -0
- metaobjects/codegen/generators/extractor_generator.py +317 -0
- metaobjects/codegen/generators/filter_allowlist_generator.py +309 -0
- metaobjects/codegen/generators/m2m_codegen.py +192 -0
- metaobjects/codegen/generators/output_parser_generator.py +272 -0
- metaobjects/codegen/generators/output_prompt_generator.py +192 -0
- metaobjects/codegen/generators/payload_vo_generator.py +672 -0
- metaobjects/codegen/generators/render_helper_generator.py +451 -0
- metaobjects/codegen/generators/router_generator.py +635 -0
- metaobjects/codegen/generators/template_generator.py +70 -0
- metaobjects/codegen/generators/tph_plan.py +120 -0
- metaobjects/codegen/generators/trace_helper_generator.py +336 -0
- metaobjects/codegen/instance_artifacts.py +15 -0
- metaobjects/codegen/output_format_spec_emitter.py +79 -0
- metaobjects/codegen/overwrite_policy.py +27 -0
- metaobjects/codegen/runner.py +110 -0
- metaobjects/codegen/runtime/__init__.py +6 -0
- metaobjects/codegen/runtime/filter_parser.py +193 -0
- metaobjects/codegen/type_map.py +84 -0
- metaobjects/core_types.py +809 -0
- metaobjects/datatype.py +19 -0
- metaobjects/documentation/__init__.py +28 -0
- metaobjects/documentation/doc_constants.py +20 -0
- metaobjects/documentation/doc_provider.py +20 -0
- metaobjects/documentation/doc_schema.py +24 -0
- metaobjects/errors.py +124 -0
- metaobjects/loader/__init__.py +0 -0
- metaobjects/loader/merge.py +287 -0
- metaobjects/loader/meta_data_loader.py +245 -0
- metaobjects/loader/sources/__init__.py +24 -0
- metaobjects/loader/sources/directory_source.py +50 -0
- metaobjects/loader/sources/file_source.py +41 -0
- metaobjects/loader/sources/meta_data_source.py +67 -0
- metaobjects/loader/sources/uri_source.py +56 -0
- metaobjects/loader/validate_discriminator.py +181 -0
- metaobjects/loader/validate_field_readonly.py +146 -0
- metaobjects/loader/validate_source_parameter_ref.py +159 -0
- metaobjects/loader/validate_source_physical_names.py +140 -0
- metaobjects/loader/validation_passes.py +1513 -0
- metaobjects/meta/__init__.py +1 -0
- metaobjects/meta/core/__init__.py +0 -0
- metaobjects/meta/core/attr/__init__.py +0 -0
- metaobjects/meta/core/attr/attr_constants.py +31 -0
- metaobjects/meta/core/attr/meta_attr.py +136 -0
- metaobjects/meta/core/field/__init__.py +0 -0
- metaobjects/meta/core/field/field_constants.py +105 -0
- metaobjects/meta/core/field/meta_field.py +76 -0
- metaobjects/meta/core/identity/__init__.py +0 -0
- metaobjects/meta/core/identity/identity_constants.py +19 -0
- metaobjects/meta/core/identity/meta_identity.py +8 -0
- metaobjects/meta/core/object/__init__.py +0 -0
- metaobjects/meta/core/object/meta_object.py +65 -0
- metaobjects/meta/core/object/meta_object_aware.py +43 -0
- metaobjects/meta/core/object/object_class_registry.py +56 -0
- metaobjects/meta/core/object/object_constants.py +13 -0
- metaobjects/meta/core/object/object_extract.py +400 -0
- metaobjects/meta/core/object/value_object.py +70 -0
- metaobjects/meta/core/relationship/__init__.py +0 -0
- metaobjects/meta/core/relationship/derive_m2m_fields.py +180 -0
- metaobjects/meta/core/relationship/meta_relationship.py +54 -0
- metaobjects/meta/core/relationship/relationship_constants.py +51 -0
- metaobjects/meta/core/validator/__init__.py +0 -0
- metaobjects/meta/core/validator/validator_constants.py +18 -0
- metaobjects/meta/meta_data.py +206 -0
- metaobjects/meta/meta_root.py +8 -0
- metaobjects/meta/persistence/__init__.py +0 -0
- metaobjects/meta/persistence/db/__init__.py +1 -0
- metaobjects/meta/persistence/db/db_constants.py +41 -0
- metaobjects/meta/persistence/db/db_provider.py +60 -0
- metaobjects/meta/persistence/origin/__init__.py +0 -0
- metaobjects/meta/persistence/origin/meta_origin.py +8 -0
- metaobjects/meta/persistence/origin/origin_constants.py +20 -0
- metaobjects/meta/persistence/source/__init__.py +0 -0
- metaobjects/meta/persistence/source/meta_source.py +137 -0
- metaobjects/meta/persistence/source/source_constants.py +115 -0
- metaobjects/meta/presentation/__init__.py +0 -0
- metaobjects/meta/presentation/layout/__init__.py +0 -0
- metaobjects/meta/presentation/layout/layout_constants.py +13 -0
- metaobjects/meta/presentation/layout/meta_layout.py +8 -0
- metaobjects/meta/presentation/view/__init__.py +0 -0
- metaobjects/meta/presentation/view/meta_view.py +8 -0
- metaobjects/meta/presentation/view/view_constants.py +22 -0
- metaobjects/meta/template/__init__.py +0 -0
- metaobjects/meta/template/meta_template.py +46 -0
- metaobjects/meta/template/template_constants.py +112 -0
- metaobjects/meta/template/template_provider.py +43 -0
- metaobjects/parser.py +380 -0
- metaobjects/parser_yaml.py +82 -0
- metaobjects/provider.py +111 -0
- metaobjects/py.typed +0 -0
- metaobjects/registry.py +210 -0
- metaobjects/registry_manifest.py +223 -0
- metaobjects/render/__init__.py +74 -0
- metaobjects/render/email_document.py +14 -0
- metaobjects/render/escapers.py +109 -0
- metaobjects/render/extract/__init__.py +59 -0
- metaobjects/render/extract/coerce.py +279 -0
- metaobjects/render/extract/extract.py +211 -0
- metaobjects/render/extract/extract_map.py +61 -0
- metaobjects/render/extract/json_forgiving_reader.py +203 -0
- metaobjects/render/extract/locate.py +65 -0
- metaobjects/render/extract/normalize.py +96 -0
- metaobjects/render/extract/strip.py +20 -0
- metaobjects/render/extract/types.py +332 -0
- metaobjects/render/extract/xml_forgiving_reader.py +162 -0
- metaobjects/render/filesystem_provider.py +51 -0
- metaobjects/render/prompt/__init__.py +32 -0
- metaobjects/render/prompt/output_format_renderer.py +340 -0
- metaobjects/render/prompt/output_format_spec.py +28 -0
- metaobjects/render/prompt/prompt_field.py +29 -0
- metaobjects/render/prompt/prompt_overrides.py +29 -0
- metaobjects/render/prompt/prompt_style.py +38 -0
- metaobjects/render/renderer.py +358 -0
- metaobjects/render/verify.py +266 -0
- metaobjects/runtime/__init__.py +39 -0
- metaobjects/runtime/llm_recorder.py +210 -0
- metaobjects/runtime/n2m_resolver.py +155 -0
- metaobjects/runtime/object_manager.py +715 -0
- metaobjects/runtime/tph.py +50 -0
- metaobjects/serializer_json.py +172 -0
- metaobjects/shared/__init__.py +0 -0
- metaobjects/shared/base_types.py +16 -0
- metaobjects/shared/separators.py +4 -0
- metaobjects/shared/structural.py +9 -0
- metaobjects/source/__init__.py +79 -0
- metaobjects/source/error_source.py +266 -0
- metaobjects/source/json_path.py +106 -0
- metaobjects/source/semantic_diff.py +98 -0
- metaobjects/source/yaml_positions.py +174 -0
- metaobjects/super_resolve.py +128 -0
- metaobjects/yaml_desugar.py +481 -0
- metaobjects-0.9.0.dist-info/METADATA +97 -0
- metaobjects-0.9.0.dist-info/RECORD +181 -0
- metaobjects-0.9.0.dist-info/WHEEL +4 -0
- metaobjects-0.9.0.dist-info/entry_points.txt +2 -0
- metaobjects-0.9.0.dist-info/licenses/LICENSE +189 -0
metaobjects/registry.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""The type registry: (type, subType) -> TypeDefinition. Populated by providers."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Callable
|
|
6
|
+
|
|
7
|
+
from .errors import ErrorCode, ParseError
|
|
8
|
+
from .shared.base_types import SUBTYPE_BASE
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True)
|
|
12
|
+
class AttrSchema:
|
|
13
|
+
name: str
|
|
14
|
+
# An attr subtype name, e.g. "string", "boolean", "stringArray".
|
|
15
|
+
# Optional: a None value_type declares the attr as "known but untyped",
|
|
16
|
+
# which the YAML coercion guard skips. Used for polymorphic attrs like
|
|
17
|
+
# @default whose value type follows the OWNING field's subtype.
|
|
18
|
+
value_type: str | None
|
|
19
|
+
required: bool = False
|
|
20
|
+
allowed_values: tuple[str, ...] | None = None
|
|
21
|
+
default: object | None = None
|
|
22
|
+
# True for an array-valued attr (a list of the scalar value_type) — the
|
|
23
|
+
# single orthogonal array axis that replaced the retired "stringarray"
|
|
24
|
+
# subtype, mirroring Java's StringAttribute + @isArray. The loader coerces an
|
|
25
|
+
# array-flagged attr through the array string-attr coercion (bare-string →
|
|
26
|
+
# one-element list).
|
|
27
|
+
is_array: bool = False
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class ChildRule:
|
|
32
|
+
child_type: str
|
|
33
|
+
child_sub_type: str # "*" wildcard matches any subtype
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# factory(type, sub_type, name) -> a node instance
|
|
37
|
+
NodeFactory = Callable[[str, str, str], object]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class TypeDefinition:
|
|
42
|
+
type: str
|
|
43
|
+
sub_type: str
|
|
44
|
+
factory: NodeFactory
|
|
45
|
+
attrs: list[AttrSchema] = field(default_factory=list)
|
|
46
|
+
child_rules: list[ChildRule] = field(default_factory=list)
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def key(self) -> tuple[str, str]:
|
|
50
|
+
return (self.type, self.sub_type)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class TypeRegistry:
|
|
54
|
+
def __init__(self) -> None:
|
|
55
|
+
self._defs: dict[tuple[str, str], TypeDefinition] = {}
|
|
56
|
+
self._common_attrs: list[AttrSchema] = []
|
|
57
|
+
# Per-type designated default subType (queried by the YAML desugar to
|
|
58
|
+
# resolve a bare `metadata:` / `object:` key to e.g. `metadata.root` /
|
|
59
|
+
# `object.entity`). Mirrors TypeRegistry._defaultSubTypes in TS.
|
|
60
|
+
self._default_sub_types: dict[str, str] = {}
|
|
61
|
+
# ADR-0023 Decision 2 — sealed state. Once sealed, every mutating
|
|
62
|
+
# registration method raises ERR_REGISTRY_SEALED. Python composes from an
|
|
63
|
+
# explicit immutable provider set (compose_registry(core_providers)), so
|
|
64
|
+
# sealing here is the guard + negative test (no polluted singleton to
|
|
65
|
+
# pivot off). The library seals after the metamodel bootstrap; a
|
|
66
|
+
# downstream app composes its own (unsealed) registry.
|
|
67
|
+
self._sealed = False
|
|
68
|
+
|
|
69
|
+
def seal(self) -> None:
|
|
70
|
+
"""Seal the registry: every subsequent mutating registration raises
|
|
71
|
+
ERR_REGISTRY_SEALED. Idempotent. Reads are unaffected."""
|
|
72
|
+
self._sealed = True
|
|
73
|
+
|
|
74
|
+
def is_sealed(self) -> bool:
|
|
75
|
+
"""Whether this registry has been sealed (ADR-0023)."""
|
|
76
|
+
return self._sealed
|
|
77
|
+
|
|
78
|
+
def _check_not_sealed(self, operation: str) -> None:
|
|
79
|
+
if self._sealed:
|
|
80
|
+
raise ParseError(
|
|
81
|
+
f"TypeRegistry is sealed (ADR-0023): {operation} is not permitted after "
|
|
82
|
+
"metamodel bootstrap. Made-up metamodel attributes/types are structurally "
|
|
83
|
+
"disallowed — a new metamodel attribute requires a registered provider + "
|
|
84
|
+
"human agreement. Downstream apps that need extra vocabulary must compose "
|
|
85
|
+
"their own (unsealed) registry.",
|
|
86
|
+
ErrorCode.ERR_REGISTRY_SEALED,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
def register(self, definition: TypeDefinition) -> None:
|
|
90
|
+
self._check_not_sealed(f'register("{definition.key}")')
|
|
91
|
+
# Store a per-registry COPY of the definition's mutable lists. Providers
|
|
92
|
+
# hold their TypeDefinition objects as long-lived singletons (re-used across
|
|
93
|
+
# every compose_registry call); a later provider's extend() does
|
|
94
|
+
# definition.attrs.append(...). Without copying here, that append would mutate
|
|
95
|
+
# the provider's SHARED list and accumulate duplicates across composes. Copying
|
|
96
|
+
# makes extend() scoped to the registry being composed. The factory is shared
|
|
97
|
+
# (a type's identity belongs to whoever registered it — see extend()).
|
|
98
|
+
self._defs[definition.key] = TypeDefinition(
|
|
99
|
+
type=definition.type,
|
|
100
|
+
sub_type=definition.sub_type,
|
|
101
|
+
factory=definition.factory,
|
|
102
|
+
attrs=list(definition.attrs),
|
|
103
|
+
child_rules=list(definition.child_rules),
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def find(self, type_: str, sub_type: str) -> TypeDefinition | None:
|
|
107
|
+
return self._defs.get((type_, sub_type))
|
|
108
|
+
|
|
109
|
+
def has_type(self, type_: str) -> bool:
|
|
110
|
+
return any(t == type_ for (t, _s) in self._defs)
|
|
111
|
+
|
|
112
|
+
def set_default_sub_type(self, type_: str, sub_type: str) -> None:
|
|
113
|
+
"""Designate the default subType for a bare `type` YAML key (ADR-0006 Rule 1).
|
|
114
|
+
|
|
115
|
+
Mirrors TypeRegistry.setDefaultSubType in TS. Used by the YAML desugar
|
|
116
|
+
when resolving sugared `metadata:` / `object:` keys.
|
|
117
|
+
"""
|
|
118
|
+
self._check_not_sealed(f'set_default_sub_type("{type_}")')
|
|
119
|
+
self._default_sub_types[type_] = sub_type
|
|
120
|
+
|
|
121
|
+
def default_sub_type_of(self, type_: str) -> str | None:
|
|
122
|
+
"""Return the designated default subType for *type_*, or None if none.
|
|
123
|
+
|
|
124
|
+
Mirrors TypeRegistry.defaultSubTypeOf in TS.
|
|
125
|
+
"""
|
|
126
|
+
return self._default_sub_types.get(type_)
|
|
127
|
+
|
|
128
|
+
def register_common_attrs(self, attrs: list[AttrSchema]) -> None:
|
|
129
|
+
"""Register attrs accepted on every metatype. First-wins dedupe by name.
|
|
130
|
+
|
|
131
|
+
Conflict with per-type attrs is detected at validation time, not here.
|
|
132
|
+
"""
|
|
133
|
+
self._check_not_sealed("register_common_attrs")
|
|
134
|
+
for attr in attrs:
|
|
135
|
+
if any(existing.name == attr.name for existing in self._common_attrs):
|
|
136
|
+
continue # first registration wins
|
|
137
|
+
self._common_attrs.append(attr)
|
|
138
|
+
|
|
139
|
+
def get_common_attrs(self) -> list[AttrSchema]:
|
|
140
|
+
"""Return a defensive copy of the registered common attrs."""
|
|
141
|
+
return list(self._common_attrs)
|
|
142
|
+
|
|
143
|
+
def attrs_of(self, type_: str, sub_type: str) -> list[AttrSchema]:
|
|
144
|
+
"""The declared attribute schemas for a (type, subType), or [] if unregistered.
|
|
145
|
+
Mirrors the TS registry's attrsOf()."""
|
|
146
|
+
definition = self.find(type_, sub_type)
|
|
147
|
+
return list(definition.attrs) if definition is not None else []
|
|
148
|
+
|
|
149
|
+
def attr_schema(self, type_: str, sub_type: str, attr_name: str) -> AttrSchema | None:
|
|
150
|
+
"""Look up a per-type attr schema by name, then fall back to common attrs."""
|
|
151
|
+
for attr in self.attrs_of(type_, sub_type):
|
|
152
|
+
if attr.name == attr_name:
|
|
153
|
+
return attr
|
|
154
|
+
for attr in self._common_attrs:
|
|
155
|
+
if attr.name == attr_name:
|
|
156
|
+
return attr
|
|
157
|
+
return None
|
|
158
|
+
|
|
159
|
+
def extend(
|
|
160
|
+
self,
|
|
161
|
+
type_: str,
|
|
162
|
+
sub_type: str,
|
|
163
|
+
*,
|
|
164
|
+
attributes: list[AttrSchema] | None = None,
|
|
165
|
+
child_rules: list[ChildRule] | None = None,
|
|
166
|
+
) -> None:
|
|
167
|
+
"""Additively enrich an already-registered ``(type_, sub_type)``.
|
|
168
|
+
|
|
169
|
+
Append attributes and/or child rules to the existing TypeDefinition.
|
|
170
|
+
Does NOT touch the factory — a type's identity belongs to whoever
|
|
171
|
+
registered it. Used by providers to extend types another provider
|
|
172
|
+
defined (mirrors the TS ``TypeRegistry.extend`` and C#
|
|
173
|
+
``TypeRegistry.Extend``).
|
|
174
|
+
|
|
175
|
+
:raises ParseError: ``ERR_UNKNOWN_SUBTYPE`` if ``(type_, sub_type)``
|
|
176
|
+
is not registered.
|
|
177
|
+
:raises ParseError: ``ERR_PROVIDER_ATTR_CONFLICT`` if an attribute
|
|
178
|
+
name already exists on the type (own-only check — common-attr
|
|
179
|
+
collisions are still surfaced separately at validation time).
|
|
180
|
+
|
|
181
|
+
Note: providers calling ``extend`` MUST declare a dependency on the
|
|
182
|
+
provider that originally registered the ``(type_, sub_type)`` so
|
|
183
|
+
``compose_registry``'s topological ordering puts the registering
|
|
184
|
+
provider before the extending one.
|
|
185
|
+
"""
|
|
186
|
+
self._check_not_sealed(f'extend("{type_}.{sub_type}")')
|
|
187
|
+
definition = self.find(type_, sub_type)
|
|
188
|
+
if definition is None:
|
|
189
|
+
raise ParseError(
|
|
190
|
+
f'TypeRegistry.extend: no registered type "{type_}.{sub_type}" to extend',
|
|
191
|
+
ErrorCode.ERR_UNKNOWN_SUBTYPE,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
for attr in attributes or []:
|
|
195
|
+
if attr.value_type == SUBTYPE_BASE:
|
|
196
|
+
raise ValueError(
|
|
197
|
+
f'TypeRegistry.extend: attr "{attr.name}" being added to '
|
|
198
|
+
f'"{type_}.{sub_type}" declares value_type "{SUBTYPE_BASE}", '
|
|
199
|
+
f"which is not valid for attrs. Use None for a polymorphic/untyped attr."
|
|
200
|
+
)
|
|
201
|
+
if any(existing.name == attr.name for existing in definition.attrs):
|
|
202
|
+
raise ParseError(
|
|
203
|
+
f'TypeRegistry.extend: attribute "{attr.name}" is already declared '
|
|
204
|
+
f'on "{type_}.{sub_type}"',
|
|
205
|
+
ErrorCode.ERR_PROVIDER_ATTR_CONFLICT,
|
|
206
|
+
)
|
|
207
|
+
definition.attrs.append(attr)
|
|
208
|
+
|
|
209
|
+
for rule in child_rules or []:
|
|
210
|
+
definition.child_rules.append(rule)
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
"""SP-G Registry Conformance — the Python registry-manifest emitter.
|
|
2
|
+
|
|
3
|
+
Walks an assembled core ``TypeRegistry`` and serializes the LOGICAL metamodel
|
|
4
|
+
vocabulary as a canonical, fully-sorted, byte-stable JSON manifest. This is the
|
|
5
|
+
single-source contract the other four ports (TS / C# / Java / Kotlin) must
|
|
6
|
+
byte-match — a structural gate against the SP-C class of silent vocabulary
|
|
7
|
+
drift (a port's registry diverging — wrong attr names, missing subtypes,
|
|
8
|
+
different required-ness — with every behavioral corpus still green).
|
|
9
|
+
|
|
10
|
+
The IN/OUT boundary (the v1 logical subset emittable byte-identically by all
|
|
11
|
+
five ports) is documented in ``fixtures/registry-conformance/README.md``. In
|
|
12
|
+
short: ``type.subType`` + ``attrs[{name, valueType, required}]`` + ``commonAttrs``
|
|
13
|
+
+ ``defaultSubTypes``. EXCLUDED from v1 (per-port-physical or
|
|
14
|
+
not-universally-tracked-on-the-registry): factories/native bindings;
|
|
15
|
+
``AttrSchema.default`` and ``allowed_values`` (Java's attr model carries
|
|
16
|
+
neither); ``inheritsFrom``; ``child_rules``.
|
|
17
|
+
|
|
18
|
+
The TS emitter (``server/typescript/packages/metadata/src/registry-manifest.ts``,
|
|
19
|
+
``emitRegistryManifest``) is the reference implementation; the canonical bytes
|
|
20
|
+
live in ``fixtures/registry-conformance/expected-registry.json``.
|
|
21
|
+
"""
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import json
|
|
25
|
+
from enum import Enum
|
|
26
|
+
|
|
27
|
+
from .documentation.doc_constants import DOC_ATTR_DESCRIPTION
|
|
28
|
+
from .meta.core.attr.attr_constants import (
|
|
29
|
+
ATTR_SUBTYPE_STRING,
|
|
30
|
+
ATTR_SUBTYPE_STRINGARRAY,
|
|
31
|
+
)
|
|
32
|
+
from .meta.presentation.view.view_constants import VIEW_SUBTYPE_CURRENCY
|
|
33
|
+
from .registry import AttrSchema, TypeRegistry
|
|
34
|
+
from .shared.base_types import SUBTYPE_BASE, TYPE_METADATA, TYPE_VIEW
|
|
35
|
+
from .shared.structural import KEY_IS_ARRAY
|
|
36
|
+
|
|
37
|
+
# Wave 3b — the in/out boundary is an EXPLICIT CLASSIFICATION (a reason category
|
|
38
|
+
# per carve-out), not a bare name-match. The negative branch of a name-list
|
|
39
|
+
# silently meant "logical"; now ``classify_per_type_attr`` returns either an
|
|
40
|
+
# ``ExclusionReason`` (carved out, with a documented category) or ``INCLUDED``
|
|
41
|
+
# (logical cross-port vocabulary). Inclusion-by-classification is sound because
|
|
42
|
+
# ADR-0023 seals the agreed-vocabulary registry. The axis is
|
|
43
|
+
# cross-port-CONTRACT vs port-PRIVATE-mechanism (NOT abstract-vs-physical — the
|
|
44
|
+
# physical-DB attrs column/dbColumnType/db.indexed/precision/scale/maxLength/
|
|
45
|
+
# unique ARE logical here, the agreed persistence vocabulary). See
|
|
46
|
+
# fixtures/registry-conformance/README.md.
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ExclusionReason(str, Enum):
|
|
50
|
+
"""Reason a per-type attr/row is carved out of the agreed vocabulary."""
|
|
51
|
+
|
|
52
|
+
#: Sentinel: NOT excluded — logical cross-port vocabulary.
|
|
53
|
+
INCLUDED = "included"
|
|
54
|
+
#: Native type-binding / factory (incl. ADR-0001 ``object``, ADR-0005 ``objectAdapter``).
|
|
55
|
+
NATIVE_BINDING = "native-binding"
|
|
56
|
+
#: Bare structural / OO-shape keyword (isArray/isAbstract/extends/implements/isInterface).
|
|
57
|
+
STRUCTURAL_KEYWORD = "structural-keyword"
|
|
58
|
+
#: A commonAttr (``description``) re-registered per-type — belongs in commonAttrs.
|
|
59
|
+
COMMON_ATTR_DUP = "common-attr-dup"
|
|
60
|
+
#: The ``metadata.base`` per-port inheritance anchor (deferred inheritsFrom facet).
|
|
61
|
+
INHERITANCE_ANCHOR = "inheritance-anchor"
|
|
62
|
+
#: TS-web-presentation-only facet (the generic ``view.*`` controls).
|
|
63
|
+
PRESENTATION_ONLY = "presentation-only"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
_ATTR_NAME_IS_ABSTRACT = "isAbstract"
|
|
67
|
+
_ATTR_NAME_EXTENDS = "extends"
|
|
68
|
+
_ATTR_NAME_IMPLEMENTS = "implements"
|
|
69
|
+
_ATTR_NAME_IS_INTERFACE = "isInterface"
|
|
70
|
+
# ADR-0001 class-FQN type binding + ADR-0005 hybrid value-access seam.
|
|
71
|
+
_ATTR_NAME_OBJECT = "object"
|
|
72
|
+
_ATTR_NAME_OBJECT_ADAPTER = "objectAdapter"
|
|
73
|
+
|
|
74
|
+
# Per-type attr names carved out of the agreed vocabulary, each mapped to its
|
|
75
|
+
# PORT_PRIVATE reason. An attr NOT in this map is logical (INCLUDED) by the
|
|
76
|
+
# ADR-0023 sealed-vocabulary contract. ``description`` is carved out ONLY
|
|
77
|
+
# per-type — it stays in the commonAttrs block.
|
|
78
|
+
_EXCLUDED_PER_TYPE_ATTRS: dict[str, ExclusionReason] = {
|
|
79
|
+
KEY_IS_ARRAY: ExclusionReason.STRUCTURAL_KEYWORD,
|
|
80
|
+
_ATTR_NAME_IS_ABSTRACT: ExclusionReason.STRUCTURAL_KEYWORD,
|
|
81
|
+
_ATTR_NAME_EXTENDS: ExclusionReason.STRUCTURAL_KEYWORD,
|
|
82
|
+
_ATTR_NAME_IMPLEMENTS: ExclusionReason.STRUCTURAL_KEYWORD,
|
|
83
|
+
_ATTR_NAME_IS_INTERFACE: ExclusionReason.STRUCTURAL_KEYWORD,
|
|
84
|
+
_ATTR_NAME_OBJECT: ExclusionReason.NATIVE_BINDING,
|
|
85
|
+
_ATTR_NAME_OBJECT_ADAPTER: ExclusionReason.NATIVE_BINDING,
|
|
86
|
+
DOC_ATTR_DESCRIPTION: ExclusionReason.COMMON_ATTR_DUP,
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def classify_per_type_attr(name: str) -> ExclusionReason:
|
|
91
|
+
"""Classify a per-type attr: an ``ExclusionReason`` (carved out) or
|
|
92
|
+
``ExclusionReason.INCLUDED`` (logical). Total — no silent default."""
|
|
93
|
+
return _EXCLUDED_PER_TYPE_ATTRS.get(name, ExclusionReason.INCLUDED)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def classify_type_subtype(type_name: str, sub_type: str) -> ExclusionReason:
|
|
97
|
+
"""Classify a ``(type, subType)`` row: the metadata.base inheritance anchor
|
|
98
|
+
(C-5) / the generic ``view.*`` presentation controls (B-2) / INCLUDED."""
|
|
99
|
+
if type_name == TYPE_METADATA and sub_type == SUBTYPE_BASE:
|
|
100
|
+
return ExclusionReason.INHERITANCE_ANCHOR # C-5 — Java's internal inheritance anchor
|
|
101
|
+
if type_name == TYPE_VIEW and sub_type not in (SUBTYPE_BASE, VIEW_SUBTYPE_CURRENCY):
|
|
102
|
+
return ExclusionReason.PRESENTATION_ONLY # B-2 — TS-web-presentation generic view controls
|
|
103
|
+
return ExclusionReason.INCLUDED
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _is_excluded_type_subtype(type_name: str, sub_type: str) -> bool:
|
|
107
|
+
"""True if a ``(type, subType)`` row is carved out of the manifest (any reason)."""
|
|
108
|
+
return classify_type_subtype(type_name, sub_type) is not ExclusionReason.INCLUDED
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _to_manifest_attr(attr: AttrSchema) -> dict[str, object]:
|
|
112
|
+
"""Normalize one AttrSchema to the manifest's logical attr shape.
|
|
113
|
+
|
|
114
|
+
Emits ``{name, valueType, isArray, required}`` — decomposing array-ness into
|
|
115
|
+
a scalar ``valueType`` + an orthogonal ``isArray`` flag (``allowed_values`` /
|
|
116
|
+
``default`` are intentionally dropped, deferred per the v1 boundary).
|
|
117
|
+
``value_type`` is ``None`` for polymorphic/untyped attrs (e.g. ``@default``);
|
|
118
|
+
the manifest renders that as an explicit JSON ``null``. A legacy
|
|
119
|
+
``stringarray`` value_type token is decomposed to
|
|
120
|
+
``{valueType: "string", isArray: true}`` so no ``stringarray`` token reaches
|
|
121
|
+
the manifest.
|
|
122
|
+
"""
|
|
123
|
+
is_legacy_string_array = attr.value_type == ATTR_SUBTYPE_STRINGARRAY
|
|
124
|
+
is_array = attr.is_array or is_legacy_string_array
|
|
125
|
+
value_type = ATTR_SUBTYPE_STRING if is_legacy_string_array else attr.value_type
|
|
126
|
+
# Fixed key order: name, valueType, isArray, required.
|
|
127
|
+
return {
|
|
128
|
+
"name": attr.name,
|
|
129
|
+
"valueType": value_type,
|
|
130
|
+
"isArray": is_array,
|
|
131
|
+
"required": attr.required,
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _sorted_attrs(attrs: list[AttrSchema]) -> list[dict[str, object]]:
|
|
136
|
+
"""Sort attrs by name (ascending, ASCII codepoint compare)."""
|
|
137
|
+
return [_to_manifest_attr(a) for a in sorted(attrs, key=lambda a: a.name)]
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _sorted_per_type_attrs(attrs: list[AttrSchema]) -> list[dict[str, object]]:
|
|
141
|
+
"""As ``_sorted_attrs``, but keeping only attrs the explicit classification
|
|
142
|
+
marks ``INCLUDED`` (logical cross-port vocabulary). A carved-out attr
|
|
143
|
+
(structural keyword, native binding, per-type ``description`` dup) is dropped
|
|
144
|
+
for a documented reason, never a silent name-match. Applied ONLY to per-type
|
|
145
|
+
attrs — ``description`` stays in the commonAttrs block."""
|
|
146
|
+
return _sorted_attrs(
|
|
147
|
+
[a for a in attrs if classify_per_type_attr(a.name) is ExclusionReason.INCLUDED]
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def build_registry_manifest(registry: TypeRegistry) -> dict[str, object]:
|
|
152
|
+
"""Build the canonical registry-manifest object from an assembled registry.
|
|
153
|
+
|
|
154
|
+
The registry must already be composed (e.g.
|
|
155
|
+
``compose_registry([core_provider, doc_provider])``) so all providers — core
|
|
156
|
+
types, the DB-domain attrs Python keeps on its field defs, and the common
|
|
157
|
+
doc attrs — have run.
|
|
158
|
+
|
|
159
|
+
All collections are sorted explicitly (not relying on dict insertion order)
|
|
160
|
+
so the serialization is byte-stable and port-independent.
|
|
161
|
+
"""
|
|
162
|
+
types: list[dict[str, object]] = []
|
|
163
|
+
# Iterate every registered (type, subType). Sorting is applied after the
|
|
164
|
+
# walk, so dict iteration order is irrelevant.
|
|
165
|
+
for definition in registry._defs.values(): # noqa: SLF001 (no public iterator)
|
|
166
|
+
if _is_excluded_type_subtype(definition.type, definition.sub_type):
|
|
167
|
+
continue # metadata.base anchor (C-5) / generic view.* controls (B-2)
|
|
168
|
+
types.append(
|
|
169
|
+
{
|
|
170
|
+
"type": definition.type,
|
|
171
|
+
"subType": definition.sub_type,
|
|
172
|
+
"attrs": _sorted_per_type_attrs(definition.attrs),
|
|
173
|
+
}
|
|
174
|
+
)
|
|
175
|
+
types.sort(key=lambda t: f"{t['type']}.{t['subType']}")
|
|
176
|
+
|
|
177
|
+
common_attrs = _sorted_attrs(registry.get_common_attrs())
|
|
178
|
+
|
|
179
|
+
# defaultSubTypes: probe each registered type name; emit with sorted keys.
|
|
180
|
+
type_names = sorted({t["type"] for t in types}) # type: ignore[misc]
|
|
181
|
+
default_sub_types: dict[str, str] = {}
|
|
182
|
+
for type_name in type_names:
|
|
183
|
+
default_sub = registry.default_sub_type_of(type_name) # type: ignore[arg-type]
|
|
184
|
+
if default_sub is not None:
|
|
185
|
+
default_sub_types[type_name] = default_sub
|
|
186
|
+
|
|
187
|
+
# Fixed top-level key order: types, commonAttrs, defaultSubTypes.
|
|
188
|
+
return {
|
|
189
|
+
"types": types,
|
|
190
|
+
"commonAttrs": common_attrs,
|
|
191
|
+
"defaultSubTypes": default_sub_types,
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def emit_registry_manifest(registry: TypeRegistry) -> str:
|
|
196
|
+
"""Emit the canonical registry manifest as a byte-stable JSON string.
|
|
197
|
+
|
|
198
|
+
Serialization contract — every port MUST match this exactly:
|
|
199
|
+
- 2-space indentation.
|
|
200
|
+
- Object keys in a fixed order (``types`` / ``commonAttrs`` /
|
|
201
|
+
``defaultSubTypes``; each type ``type`` / ``subType`` / ``attrs``; each
|
|
202
|
+
attr ``name`` / ``valueType`` / ``isArray`` / ``required``).
|
|
203
|
+
- All arrays sorted: ``types`` by ``"type.subType"``; each ``attrs`` by
|
|
204
|
+
name; ``commonAttrs`` by name; ``defaultSubTypes`` keys sorted.
|
|
205
|
+
- ``valueType: null`` literal for polymorphic/untyped attrs.
|
|
206
|
+
- A single trailing newline.
|
|
207
|
+
|
|
208
|
+
``json.dumps(indent=2, separators=(",", ": "))`` reproduces JS
|
|
209
|
+
``JSON.stringify(obj, null, 2)`` byte-for-byte for ASCII content: no
|
|
210
|
+
trailing whitespace on container lines, ``": "`` after keys, ``,`` line
|
|
211
|
+
separators. ``ensure_ascii=False`` keeps any non-ASCII verbatim (there is
|
|
212
|
+
none in the core vocabulary, but it matches the JS contract).
|
|
213
|
+
"""
|
|
214
|
+
manifest = build_registry_manifest(registry)
|
|
215
|
+
return (
|
|
216
|
+
json.dumps(
|
|
217
|
+
manifest,
|
|
218
|
+
indent=2,
|
|
219
|
+
ensure_ascii=False,
|
|
220
|
+
separators=(",", ": "),
|
|
221
|
+
)
|
|
222
|
+
+ "\n"
|
|
223
|
+
)
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Render-tier engine: the build-time template drift-check ``verify`` (FR-004), the
|
|
2
|
+
FR-010 tolerant ``extract`` parser, and the FR-010 output-format prompt renderer."""
|
|
3
|
+
|
|
4
|
+
from metaobjects.render.email_document import EmailDocument
|
|
5
|
+
from metaobjects.render.filesystem_provider import FilesystemProvider
|
|
6
|
+
from metaobjects.render.prompt import (
|
|
7
|
+
PROMPT_OVERRIDES_NONE,
|
|
8
|
+
OutputFormatSpec,
|
|
9
|
+
PromptField,
|
|
10
|
+
PromptOverrides,
|
|
11
|
+
PromptStyle,
|
|
12
|
+
no_overrides,
|
|
13
|
+
prompt_style_from,
|
|
14
|
+
render_output_format,
|
|
15
|
+
)
|
|
16
|
+
from metaobjects.render.extract import (
|
|
17
|
+
FieldKind,
|
|
18
|
+
FieldExtraction,
|
|
19
|
+
FieldSpec,
|
|
20
|
+
Format,
|
|
21
|
+
ExtractOptions,
|
|
22
|
+
ExtractionOutcome,
|
|
23
|
+
ExtractSchema,
|
|
24
|
+
ExtractionReport,
|
|
25
|
+
ExtractionResult,
|
|
26
|
+
Tolerance,
|
|
27
|
+
extract,
|
|
28
|
+
extract_map,
|
|
29
|
+
)
|
|
30
|
+
from metaobjects.render.verify import (
|
|
31
|
+
ERR_OUTPUT_TAG_MISSING,
|
|
32
|
+
ERR_PARTIAL_UNRESOLVED,
|
|
33
|
+
ERR_REQUIRED_SLOT_UNUSED,
|
|
34
|
+
ERR_VAR_NOT_ON_PAYLOAD,
|
|
35
|
+
InMemoryProvider,
|
|
36
|
+
PayloadField,
|
|
37
|
+
Provider,
|
|
38
|
+
VerifyError,
|
|
39
|
+
verify,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
__all__ = [
|
|
43
|
+
"ERR_OUTPUT_TAG_MISSING",
|
|
44
|
+
"ERR_PARTIAL_UNRESOLVED",
|
|
45
|
+
"ERR_REQUIRED_SLOT_UNUSED",
|
|
46
|
+
"ERR_VAR_NOT_ON_PAYLOAD",
|
|
47
|
+
"PROMPT_OVERRIDES_NONE",
|
|
48
|
+
"EmailDocument",
|
|
49
|
+
"FieldKind",
|
|
50
|
+
"FieldExtraction",
|
|
51
|
+
"FieldSpec",
|
|
52
|
+
"Format",
|
|
53
|
+
"InMemoryProvider",
|
|
54
|
+
"OutputFormatSpec",
|
|
55
|
+
"PayloadField",
|
|
56
|
+
"PromptField",
|
|
57
|
+
"PromptOverrides",
|
|
58
|
+
"PromptStyle",
|
|
59
|
+
"Provider",
|
|
60
|
+
"ExtractOptions",
|
|
61
|
+
"ExtractionOutcome",
|
|
62
|
+
"ExtractSchema",
|
|
63
|
+
"ExtractionReport",
|
|
64
|
+
"ExtractionResult",
|
|
65
|
+
"FilesystemProvider",
|
|
66
|
+
"Tolerance",
|
|
67
|
+
"VerifyError",
|
|
68
|
+
"no_overrides",
|
|
69
|
+
"prompt_style_from",
|
|
70
|
+
"extract",
|
|
71
|
+
"extract_map",
|
|
72
|
+
"render_output_format",
|
|
73
|
+
"verify",
|
|
74
|
+
]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""A rendered email value type (mirrors the TS/Java/C# ``EmailDocument``)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True, slots=True)
|
|
9
|
+
class EmailDocument:
|
|
10
|
+
"""A rendered email: subject + HTML body + optional plain-text alternative (MIME multipart/alternative)."""
|
|
11
|
+
|
|
12
|
+
subject: str
|
|
13
|
+
html_body: str
|
|
14
|
+
text_body: str | None = None
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Format-keyed escapers for the render engine (FR-004).
|
|
2
|
+
|
|
3
|
+
Tier-1 invariant: per-format escaping behavior is character-by-character
|
|
4
|
+
identical to TS (``packages/render/src/escapers.ts``), C#
|
|
5
|
+
(``MetaObjects.Render/Escapers.cs``), and Java
|
|
6
|
+
(``server/java/render/.../Escapers.java``).
|
|
7
|
+
|
|
8
|
+
``{{var}}`` substitutions in a Mustache template are escaped per the
|
|
9
|
+
configured format; ``{{{var}}}`` bypasses escaping (Mustache's
|
|
10
|
+
"triple-stache" raw form is preserved by the renderer).
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
FORMAT_TEXT = "text"
|
|
15
|
+
FORMAT_HTML = "html"
|
|
16
|
+
FORMAT_XML = "xml"
|
|
17
|
+
FORMAT_CSV = "csv"
|
|
18
|
+
FORMAT_JSON = "json"
|
|
19
|
+
FORMAT_MARKDOWN = "markdown"
|
|
20
|
+
FORMAT_SPREADSHEET = "spreadsheet"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _escape_xml(s: str) -> str:
|
|
24
|
+
out: list[str] = []
|
|
25
|
+
for c in s:
|
|
26
|
+
if c == "&":
|
|
27
|
+
out.append("&")
|
|
28
|
+
elif c == "<":
|
|
29
|
+
out.append("<")
|
|
30
|
+
elif c == ">":
|
|
31
|
+
out.append(">")
|
|
32
|
+
elif c == '"':
|
|
33
|
+
out.append(""")
|
|
34
|
+
elif c == "'":
|
|
35
|
+
out.append("'")
|
|
36
|
+
else:
|
|
37
|
+
out.append(c)
|
|
38
|
+
return "".join(out)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _injection_guard(s: str) -> str:
|
|
42
|
+
"""OWASP CSV/Excel formula-injection guard: prefix a literal apostrophe when
|
|
43
|
+
the cell starts with an active char (``=``, ``+``, ``-``, ``@``, tab, CR)."""
|
|
44
|
+
if not s:
|
|
45
|
+
return s
|
|
46
|
+
first = s[0]
|
|
47
|
+
if first in ("=", "+", "-", "@", "\t", "\r"):
|
|
48
|
+
return "'" + s
|
|
49
|
+
return s
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _escape_csv(s: str) -> str:
|
|
53
|
+
guarded = _injection_guard(s)
|
|
54
|
+
needs_quote = any(ch in guarded for ch in (",", '"', "\n", "\r"))
|
|
55
|
+
if not needs_quote:
|
|
56
|
+
return guarded
|
|
57
|
+
return '"' + guarded.replace('"', '""') + '"'
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _escape_json(s: str) -> str:
|
|
61
|
+
"""Mirrors JS ``JSON.stringify(s).slice(1, -1)``: escape ``"``, ``\\``, and
|
|
62
|
+
ASCII control chars; nothing else (no HTML-safety escaping of ``< > &``)."""
|
|
63
|
+
out: list[str] = []
|
|
64
|
+
for c in s:
|
|
65
|
+
if c == '"':
|
|
66
|
+
out.append('\\"')
|
|
67
|
+
elif c == "\\":
|
|
68
|
+
out.append("\\\\")
|
|
69
|
+
elif c == "\b":
|
|
70
|
+
out.append("\\b")
|
|
71
|
+
elif c == "\f":
|
|
72
|
+
out.append("\\f")
|
|
73
|
+
elif c == "\n":
|
|
74
|
+
out.append("\\n")
|
|
75
|
+
elif c == "\r":
|
|
76
|
+
out.append("\\r")
|
|
77
|
+
elif c == "\t":
|
|
78
|
+
out.append("\\t")
|
|
79
|
+
elif ord(c) < 0x20:
|
|
80
|
+
out.append(f"\\u{ord(c):04x}")
|
|
81
|
+
else:
|
|
82
|
+
out.append(c)
|
|
83
|
+
return "".join(out)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _escape_spreadsheet(s: str) -> str:
|
|
87
|
+
"""XML-escape the content first, then guard — the guard's leading quote
|
|
88
|
+
stays a literal apostrophe (which tells Excel "treat as text"), not
|
|
89
|
+
``'``."""
|
|
90
|
+
return _injection_guard(_escape_xml(s))
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
_REGISTRY = {
|
|
94
|
+
FORMAT_TEXT: lambda s: s,
|
|
95
|
+
FORMAT_MARKDOWN: lambda s: s,
|
|
96
|
+
FORMAT_HTML: _escape_xml, # HTML uses XML entity set per FR-004 spec
|
|
97
|
+
FORMAT_XML: _escape_xml,
|
|
98
|
+
FORMAT_CSV: _escape_csv,
|
|
99
|
+
FORMAT_SPREADSHEET: _escape_spreadsheet,
|
|
100
|
+
FORMAT_JSON: _escape_json,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def escape(format_: str, value: str) -> str:
|
|
105
|
+
"""Escape *value* per *format_*. Unknown formats raise ``ValueError``."""
|
|
106
|
+
fn = _REGISTRY.get(format_)
|
|
107
|
+
if fn is None:
|
|
108
|
+
raise ValueError(f'unknown render format "{format_}"')
|
|
109
|
+
return fn(value)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""FR-010 tolerant ``extract`` engine (Tier 2).
|
|
2
|
+
|
|
3
|
+
A forgiving parser that takes dirty LLM output (fenced / preamble / prose-wrapped /
|
|
4
|
+
truncated / trailing-comma JSON, unclosed-tag XML) and extracts it into a typed
|
|
5
|
+
``dict``, classifying each field. It NEVER raises — the forgiving tier beyond
|
|
6
|
+
FR-006's strict Pydantic parser.
|
|
7
|
+
|
|
8
|
+
Public entry point: :func:`extract`.
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from metaobjects.render.extract import extract_map
|
|
13
|
+
from metaobjects.render.extract.coerce import MALFORMED, scalar_coerce
|
|
14
|
+
from metaobjects.render.extract.json_forgiving_reader import (
|
|
15
|
+
TRUNCATED,
|
|
16
|
+
JsonForgivingReader,
|
|
17
|
+
)
|
|
18
|
+
from metaobjects.render.extract.normalize import normalize_enum
|
|
19
|
+
from metaobjects.render.extract.extract import extract
|
|
20
|
+
from metaobjects.render.extract.types import (
|
|
21
|
+
Coercion,
|
|
22
|
+
FieldKind,
|
|
23
|
+
FieldExtraction,
|
|
24
|
+
FieldSpec,
|
|
25
|
+
Format,
|
|
26
|
+
Normalizer,
|
|
27
|
+
OnField,
|
|
28
|
+
ExtractOptions,
|
|
29
|
+
ExtractionOutcome,
|
|
30
|
+
ExtractSchema,
|
|
31
|
+
ExtractionReport,
|
|
32
|
+
ExtractionResult,
|
|
33
|
+
Tolerance,
|
|
34
|
+
)
|
|
35
|
+
from metaobjects.render.extract.xml_forgiving_reader import XmlForgivingReader
|
|
36
|
+
|
|
37
|
+
__all__ = [
|
|
38
|
+
"MALFORMED",
|
|
39
|
+
"TRUNCATED",
|
|
40
|
+
"Coercion",
|
|
41
|
+
"FieldKind",
|
|
42
|
+
"FieldExtraction",
|
|
43
|
+
"FieldSpec",
|
|
44
|
+
"Format",
|
|
45
|
+
"JsonForgivingReader",
|
|
46
|
+
"Normalizer",
|
|
47
|
+
"OnField",
|
|
48
|
+
"ExtractOptions",
|
|
49
|
+
"ExtractionOutcome",
|
|
50
|
+
"ExtractSchema",
|
|
51
|
+
"ExtractionReport",
|
|
52
|
+
"ExtractionResult",
|
|
53
|
+
"Tolerance",
|
|
54
|
+
"XmlForgivingReader",
|
|
55
|
+
"normalize_enum",
|
|
56
|
+
"extract",
|
|
57
|
+
"extract_map",
|
|
58
|
+
"scalar_coerce",
|
|
59
|
+
]
|