metaobjects 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaobjects/__init__.py +75 -0
- metaobjects/agent_context/__init__.py +55 -0
- metaobjects/agent_context/_content/README.md +14 -0
- metaobjects/agent_context/_content/servers/csharp.meta.json +5 -0
- metaobjects/agent_context/_content/servers/java.meta.json +5 -0
- metaobjects/agent_context/_content/servers/kotlin.meta.json +5 -0
- metaobjects/agent_context/_content/servers/python.meta.json +5 -0
- metaobjects/agent_context/_content/servers/typescript.meta.json +5 -0
- metaobjects/agent_context/_content/skills/metaobjects-authoring/SKILL.md +301 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/SKILL.md +99 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/references/csharp.md +87 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/references/java.md +94 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/references/kotlin.md +110 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/references/typescript.md +135 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/SKILL.md +148 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/csharp.md +110 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/java.md +108 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/kotlin.md +130 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/python.md +116 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/typescript.md +150 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/SKILL.md +130 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/java.md +96 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/kotlin.md +99 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/react.md +86 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/tanstack.md +119 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/typescript.md +92 -0
- metaobjects/agent_context/_content/skills/metaobjects-verify/SKILL.md +107 -0
- metaobjects/agent_context/_content/skills/metaobjects-verify/references/migration.md +72 -0
- metaobjects/agent_context/_content/templates/always-on.md.mustache +27 -0
- metaobjects/agent_context/assemble.py +133 -0
- metaobjects/agent_context/content_root.py +54 -0
- metaobjects/agent_context/scaffold.py +191 -0
- metaobjects/agent_context/types.py +44 -0
- metaobjects/attr_class_map.py +23 -0
- metaobjects/cli.py +696 -0
- metaobjects/codegen/__init__.py +0 -0
- metaobjects/codegen/config.py +11 -0
- metaobjects/codegen/constants.py +13 -0
- metaobjects/codegen/extract_delegate_emitter.py +384 -0
- metaobjects/codegen/extract_schema_emitter.py +139 -0
- metaobjects/codegen/format.py +31 -0
- metaobjects/codegen/fr010_field_mapping.py +220 -0
- metaobjects/codegen/generator.py +62 -0
- metaobjects/codegen/generator_registry.py +163 -0
- metaobjects/codegen/generators/__init__.py +0 -0
- metaobjects/codegen/generators/entity_model.py +263 -0
- metaobjects/codegen/generators/extractor_generator.py +317 -0
- metaobjects/codegen/generators/filter_allowlist_generator.py +309 -0
- metaobjects/codegen/generators/m2m_codegen.py +192 -0
- metaobjects/codegen/generators/output_parser_generator.py +272 -0
- metaobjects/codegen/generators/output_prompt_generator.py +192 -0
- metaobjects/codegen/generators/payload_vo_generator.py +672 -0
- metaobjects/codegen/generators/render_helper_generator.py +451 -0
- metaobjects/codegen/generators/router_generator.py +635 -0
- metaobjects/codegen/generators/template_generator.py +70 -0
- metaobjects/codegen/generators/tph_plan.py +120 -0
- metaobjects/codegen/generators/trace_helper_generator.py +336 -0
- metaobjects/codegen/instance_artifacts.py +15 -0
- metaobjects/codegen/output_format_spec_emitter.py +79 -0
- metaobjects/codegen/overwrite_policy.py +27 -0
- metaobjects/codegen/runner.py +110 -0
- metaobjects/codegen/runtime/__init__.py +6 -0
- metaobjects/codegen/runtime/filter_parser.py +193 -0
- metaobjects/codegen/type_map.py +84 -0
- metaobjects/core_types.py +809 -0
- metaobjects/datatype.py +19 -0
- metaobjects/documentation/__init__.py +28 -0
- metaobjects/documentation/doc_constants.py +20 -0
- metaobjects/documentation/doc_provider.py +20 -0
- metaobjects/documentation/doc_schema.py +24 -0
- metaobjects/errors.py +124 -0
- metaobjects/loader/__init__.py +0 -0
- metaobjects/loader/merge.py +287 -0
- metaobjects/loader/meta_data_loader.py +245 -0
- metaobjects/loader/sources/__init__.py +24 -0
- metaobjects/loader/sources/directory_source.py +50 -0
- metaobjects/loader/sources/file_source.py +41 -0
- metaobjects/loader/sources/meta_data_source.py +67 -0
- metaobjects/loader/sources/uri_source.py +56 -0
- metaobjects/loader/validate_discriminator.py +181 -0
- metaobjects/loader/validate_field_readonly.py +146 -0
- metaobjects/loader/validate_source_parameter_ref.py +159 -0
- metaobjects/loader/validate_source_physical_names.py +140 -0
- metaobjects/loader/validation_passes.py +1513 -0
- metaobjects/meta/__init__.py +1 -0
- metaobjects/meta/core/__init__.py +0 -0
- metaobjects/meta/core/attr/__init__.py +0 -0
- metaobjects/meta/core/attr/attr_constants.py +31 -0
- metaobjects/meta/core/attr/meta_attr.py +136 -0
- metaobjects/meta/core/field/__init__.py +0 -0
- metaobjects/meta/core/field/field_constants.py +105 -0
- metaobjects/meta/core/field/meta_field.py +76 -0
- metaobjects/meta/core/identity/__init__.py +0 -0
- metaobjects/meta/core/identity/identity_constants.py +19 -0
- metaobjects/meta/core/identity/meta_identity.py +8 -0
- metaobjects/meta/core/object/__init__.py +0 -0
- metaobjects/meta/core/object/meta_object.py +65 -0
- metaobjects/meta/core/object/meta_object_aware.py +43 -0
- metaobjects/meta/core/object/object_class_registry.py +56 -0
- metaobjects/meta/core/object/object_constants.py +13 -0
- metaobjects/meta/core/object/object_extract.py +400 -0
- metaobjects/meta/core/object/value_object.py +70 -0
- metaobjects/meta/core/relationship/__init__.py +0 -0
- metaobjects/meta/core/relationship/derive_m2m_fields.py +180 -0
- metaobjects/meta/core/relationship/meta_relationship.py +54 -0
- metaobjects/meta/core/relationship/relationship_constants.py +51 -0
- metaobjects/meta/core/validator/__init__.py +0 -0
- metaobjects/meta/core/validator/validator_constants.py +18 -0
- metaobjects/meta/meta_data.py +206 -0
- metaobjects/meta/meta_root.py +8 -0
- metaobjects/meta/persistence/__init__.py +0 -0
- metaobjects/meta/persistence/db/__init__.py +1 -0
- metaobjects/meta/persistence/db/db_constants.py +41 -0
- metaobjects/meta/persistence/db/db_provider.py +60 -0
- metaobjects/meta/persistence/origin/__init__.py +0 -0
- metaobjects/meta/persistence/origin/meta_origin.py +8 -0
- metaobjects/meta/persistence/origin/origin_constants.py +20 -0
- metaobjects/meta/persistence/source/__init__.py +0 -0
- metaobjects/meta/persistence/source/meta_source.py +137 -0
- metaobjects/meta/persistence/source/source_constants.py +115 -0
- metaobjects/meta/presentation/__init__.py +0 -0
- metaobjects/meta/presentation/layout/__init__.py +0 -0
- metaobjects/meta/presentation/layout/layout_constants.py +13 -0
- metaobjects/meta/presentation/layout/meta_layout.py +8 -0
- metaobjects/meta/presentation/view/__init__.py +0 -0
- metaobjects/meta/presentation/view/meta_view.py +8 -0
- metaobjects/meta/presentation/view/view_constants.py +22 -0
- metaobjects/meta/template/__init__.py +0 -0
- metaobjects/meta/template/meta_template.py +46 -0
- metaobjects/meta/template/template_constants.py +112 -0
- metaobjects/meta/template/template_provider.py +43 -0
- metaobjects/parser.py +380 -0
- metaobjects/parser_yaml.py +82 -0
- metaobjects/provider.py +111 -0
- metaobjects/py.typed +0 -0
- metaobjects/registry.py +210 -0
- metaobjects/registry_manifest.py +223 -0
- metaobjects/render/__init__.py +74 -0
- metaobjects/render/email_document.py +14 -0
- metaobjects/render/escapers.py +109 -0
- metaobjects/render/extract/__init__.py +59 -0
- metaobjects/render/extract/coerce.py +279 -0
- metaobjects/render/extract/extract.py +211 -0
- metaobjects/render/extract/extract_map.py +61 -0
- metaobjects/render/extract/json_forgiving_reader.py +203 -0
- metaobjects/render/extract/locate.py +65 -0
- metaobjects/render/extract/normalize.py +96 -0
- metaobjects/render/extract/strip.py +20 -0
- metaobjects/render/extract/types.py +332 -0
- metaobjects/render/extract/xml_forgiving_reader.py +162 -0
- metaobjects/render/filesystem_provider.py +51 -0
- metaobjects/render/prompt/__init__.py +32 -0
- metaobjects/render/prompt/output_format_renderer.py +340 -0
- metaobjects/render/prompt/output_format_spec.py +28 -0
- metaobjects/render/prompt/prompt_field.py +29 -0
- metaobjects/render/prompt/prompt_overrides.py +29 -0
- metaobjects/render/prompt/prompt_style.py +38 -0
- metaobjects/render/renderer.py +358 -0
- metaobjects/render/verify.py +266 -0
- metaobjects/runtime/__init__.py +39 -0
- metaobjects/runtime/llm_recorder.py +210 -0
- metaobjects/runtime/n2m_resolver.py +155 -0
- metaobjects/runtime/object_manager.py +715 -0
- metaobjects/runtime/tph.py +50 -0
- metaobjects/serializer_json.py +172 -0
- metaobjects/shared/__init__.py +0 -0
- metaobjects/shared/base_types.py +16 -0
- metaobjects/shared/separators.py +4 -0
- metaobjects/shared/structural.py +9 -0
- metaobjects/source/__init__.py +79 -0
- metaobjects/source/error_source.py +266 -0
- metaobjects/source/json_path.py +106 -0
- metaobjects/source/semantic_diff.py +98 -0
- metaobjects/source/yaml_positions.py +174 -0
- metaobjects/super_resolve.py +128 -0
- metaobjects/yaml_desugar.py +481 -0
- metaobjects-0.9.0.dist-info/METADATA +97 -0
- metaobjects-0.9.0.dist-info/RECORD +181 -0
- metaobjects-0.9.0.dist-info/WHEEL +4 -0
- metaobjects-0.9.0.dist-info/entry_points.txt +2 -0
- metaobjects-0.9.0.dist-info/licenses/LICENSE +189 -0
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
"""FR-010 extract model + report.
|
|
2
|
+
|
|
3
|
+
Frozen cross-port vocabularies (``FieldExtraction``, ``FieldKind``, ``Tolerance``,
|
|
4
|
+
``Format``) plus the immutable schema/option/outcome dataclasses and the mutable
|
|
5
|
+
``ExtractionReport`` accumulator.
|
|
6
|
+
|
|
7
|
+
The corpus serializes ``FieldExtraction`` with SCREAMING_SNAKE values
|
|
8
|
+
(``EXTRACTED`` / ``DEFAULTED`` / ``LOST_OPTIONAL`` / ``LOST_REQUIRED`` /
|
|
9
|
+
``MALFORMED``) and ``Format`` / ``FieldKind`` as UPPER tokens; the conformance
|
|
10
|
+
runner maps the schema-json tokens onto these enums.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from enum import Enum
|
|
16
|
+
from typing import Callable, Generic, TypeVar
|
|
17
|
+
|
|
18
|
+
from metaobjects.render.extract.normalize import DEFAULT as _NORMALIZE_DEFAULT
|
|
19
|
+
|
|
20
|
+
T = TypeVar("T")
|
|
21
|
+
|
|
22
|
+
# A bespoke per-field coercion hook: (field_path, raw_value, spec) -> coerced | None.
|
|
23
|
+
# Returning ``None`` falls through to the default coercion. Forward-referenced as a
|
|
24
|
+
# string in ExtractOptions to avoid a forward-declaration cycle with FieldSpec.
|
|
25
|
+
OnField = Callable[[str, str, "FieldSpec"], object | None]
|
|
26
|
+
Normalizer = Callable[[str], object | None]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Format(Enum):
|
|
30
|
+
"""Document format the extract pipeline targets."""
|
|
31
|
+
|
|
32
|
+
JSON = "JSON"
|
|
33
|
+
XML = "XML"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class FieldKind(Enum):
|
|
37
|
+
"""The coercion target kinds the engine understands. ``OBJECT`` = nested schema."""
|
|
38
|
+
|
|
39
|
+
STRING = "STRING"
|
|
40
|
+
INT = "INT"
|
|
41
|
+
LONG = "LONG"
|
|
42
|
+
DOUBLE = "DOUBLE"
|
|
43
|
+
BOOLEAN = "BOOLEAN"
|
|
44
|
+
ENUM = "ENUM"
|
|
45
|
+
OBJECT = "OBJECT"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class FieldExtraction(Enum):
|
|
49
|
+
"""FROZEN cross-port per-field extraction classification.
|
|
50
|
+
|
|
51
|
+
Do not reorder or add without an ADR. Values match the corpus serialization.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
EXTRACTED = "EXTRACTED"
|
|
55
|
+
# FR-011: a value reached via @coerceDefault (present-but-uncoercible fallback)
|
|
56
|
+
# or @default (absent-fill) — distinct from a cleanly EXTRACTED value.
|
|
57
|
+
DEFAULTED = "DEFAULTED"
|
|
58
|
+
LOST_OPTIONAL = "LOST_OPTIONAL"
|
|
59
|
+
LOST_REQUIRED = "LOST_REQUIRED"
|
|
60
|
+
MALFORMED = "MALFORMED"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class Tolerance(Enum):
|
|
64
|
+
"""STRICT: case-sensitive, minimal repair. NORMAL: case-insensitive (default). LOOSE: maximal repair.
|
|
65
|
+
|
|
66
|
+
NOTE: LOOSE currently behaves identically to NORMAL (case-insensitive). Reserved
|
|
67
|
+
for future maximal-repair behavior.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
STRICT = "STRICT"
|
|
71
|
+
NORMAL = "NORMAL"
|
|
72
|
+
LOOSE = "LOOSE"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass(frozen=True, slots=True)
|
|
76
|
+
class Coercion:
|
|
77
|
+
"""A recorded normalization/coercion.
|
|
78
|
+
|
|
79
|
+
``kind`` e.g. ``"normalize"``, ``"alias"``, ``"runtime-alias-override"``,
|
|
80
|
+
``"clamp"``, ``"coerceDefault"``, ``"default"``.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
field_path: str
|
|
84
|
+
from_: str
|
|
85
|
+
to: str
|
|
86
|
+
kind: str
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass(frozen=True, slots=True)
|
|
90
|
+
class FieldSpec:
|
|
91
|
+
"""One field's extract descriptor.
|
|
92
|
+
|
|
93
|
+
``enum_values``/``enum_alias`` non-None only for ENUM; ``min``/``max`` non-None
|
|
94
|
+
only for numeric range constraints; ``nested`` non-None only for OBJECT.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
name: str
|
|
98
|
+
kind: FieldKind
|
|
99
|
+
required: bool = False
|
|
100
|
+
array: bool = False
|
|
101
|
+
enum_values: list[str] | None = None
|
|
102
|
+
enum_alias: dict[str, str] | None = None
|
|
103
|
+
min: float | None = None
|
|
104
|
+
max: float | None = None
|
|
105
|
+
nested: "ExtractSchema | None" = None
|
|
106
|
+
# FR-011: present-but-uncoercible fallback member (from ``@coerceDefault``).
|
|
107
|
+
# ENUM-only; None = none.
|
|
108
|
+
coerce_default: str | None = None
|
|
109
|
+
# FR-011: absent-fill member (from ``@default``). ENUM-only; None = none.
|
|
110
|
+
default_value: str | None = None
|
|
111
|
+
# FR-011: resolved enum normalization mode (from ``@normalize``; default ``"strip"``).
|
|
112
|
+
normalize: str = _NORMALIZE_DEFAULT
|
|
113
|
+
# ``@xmlText``: this field receives its element's TEXT CONTENT (analogous to JAXB
|
|
114
|
+
# ``@XmlValue`` / Jackson ``@JacksonXmlText`` / .NET ``[XmlText]``). The extract engine
|
|
115
|
+
# reads it from the ``#text`` sentinel the lenient XML reader carries when an element has
|
|
116
|
+
# both attributes and a text body, instead of a same-named child. False for normal/JSON.
|
|
117
|
+
text_content: bool = False
|
|
118
|
+
|
|
119
|
+
@staticmethod
|
|
120
|
+
def scalar(
|
|
121
|
+
name: str,
|
|
122
|
+
kind: FieldKind,
|
|
123
|
+
required: bool,
|
|
124
|
+
default_value: str | None = None,
|
|
125
|
+
) -> "FieldSpec":
|
|
126
|
+
"""Phase B (generalized ``@default``): a scalar field optionally carrying an
|
|
127
|
+
absent-fill ``@default``. When the field is ABSENT, tolerant extract coerces
|
|
128
|
+
this string to ``kind`` (via the pure ``scalar_coerce``) and classifies the
|
|
129
|
+
field DEFAULTED (which satisfies ``required``). ``default_value is None`` is
|
|
130
|
+
the no-default case (back-compat)."""
|
|
131
|
+
return FieldSpec(
|
|
132
|
+
name=name, kind=kind, required=required, default_value=default_value
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
@staticmethod
|
|
136
|
+
def text_content_field(name: str, kind: FieldKind, required: bool) -> "FieldSpec":
|
|
137
|
+
"""A field that receives its element's TEXT CONTENT — the ``@xmlText`` marker
|
|
138
|
+
(see ``text_content``). A scalar with the flag set; coerced to ``kind``."""
|
|
139
|
+
return FieldSpec(name=name, kind=kind, required=required, text_content=True)
|
|
140
|
+
|
|
141
|
+
@staticmethod
|
|
142
|
+
def scalar_array(name: str, kind: FieldKind, required: bool) -> "FieldSpec":
|
|
143
|
+
"""A scalar-array FieldSpec (``array == True``); each element is coerced via
|
|
144
|
+
the scalar pipeline. No per-element default fill."""
|
|
145
|
+
return FieldSpec(name=name, kind=kind, required=required, array=True)
|
|
146
|
+
|
|
147
|
+
@staticmethod
|
|
148
|
+
def enum_array(
|
|
149
|
+
name: str,
|
|
150
|
+
required: bool,
|
|
151
|
+
values: list[str] | None,
|
|
152
|
+
aliases: dict[str, str] | None,
|
|
153
|
+
coerce_default: str | None = None,
|
|
154
|
+
normalize: str = _NORMALIZE_DEFAULT,
|
|
155
|
+
default_value: str | None = None,
|
|
156
|
+
) -> "FieldSpec":
|
|
157
|
+
"""Phase B (array-of-enum): an enum field that is a ``list[enum]``
|
|
158
|
+
(``array == True``). Each element flows through the SAME enum pipeline a
|
|
159
|
+
scalar enum uses (exact → normalize → ``@enumAlias`` → ``@coerceDefault`` →
|
|
160
|
+
MALFORMED) and is classified independently by indexed path (``tags[0]``,
|
|
161
|
+
``tags[1]``, …). Mirrors :meth:`enum_field` but with ``array = True``."""
|
|
162
|
+
return FieldSpec(
|
|
163
|
+
name=name,
|
|
164
|
+
kind=FieldKind.ENUM,
|
|
165
|
+
required=required,
|
|
166
|
+
array=True,
|
|
167
|
+
enum_values=None if values is None else list(values),
|
|
168
|
+
enum_alias={} if aliases is None else dict(aliases),
|
|
169
|
+
coerce_default=coerce_default,
|
|
170
|
+
default_value=default_value,
|
|
171
|
+
normalize=normalize,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
@staticmethod
|
|
175
|
+
def enum_field(
|
|
176
|
+
name: str,
|
|
177
|
+
required: bool,
|
|
178
|
+
values: list[str] | None,
|
|
179
|
+
aliases: dict[str, str] | None,
|
|
180
|
+
coerce_default: str | None = None,
|
|
181
|
+
normalize: str = _NORMALIZE_DEFAULT,
|
|
182
|
+
default_value: str | None = None,
|
|
183
|
+
) -> "FieldSpec":
|
|
184
|
+
return FieldSpec(
|
|
185
|
+
name=name,
|
|
186
|
+
kind=FieldKind.ENUM,
|
|
187
|
+
required=required,
|
|
188
|
+
enum_values=None if values is None else list(values),
|
|
189
|
+
enum_alias={} if aliases is None else dict(aliases),
|
|
190
|
+
coerce_default=coerce_default,
|
|
191
|
+
default_value=default_value,
|
|
192
|
+
normalize=normalize,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
@staticmethod
|
|
196
|
+
def range_(
|
|
197
|
+
name: str,
|
|
198
|
+
kind: FieldKind,
|
|
199
|
+
required: bool,
|
|
200
|
+
min: float | None,
|
|
201
|
+
max: float | None,
|
|
202
|
+
) -> "FieldSpec":
|
|
203
|
+
return FieldSpec(name=name, kind=kind, required=required, min=min, max=max)
|
|
204
|
+
|
|
205
|
+
@staticmethod
|
|
206
|
+
def object_(
|
|
207
|
+
name: str,
|
|
208
|
+
required: bool,
|
|
209
|
+
array: bool,
|
|
210
|
+
nested: "ExtractSchema | None",
|
|
211
|
+
) -> "FieldSpec":
|
|
212
|
+
return FieldSpec(
|
|
213
|
+
name=name,
|
|
214
|
+
kind=FieldKind.OBJECT,
|
|
215
|
+
required=required,
|
|
216
|
+
array=array,
|
|
217
|
+
nested=nested,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
@dataclass(frozen=True, slots=True)
|
|
222
|
+
class ExtractSchema:
|
|
223
|
+
"""Top-level extract descriptor.
|
|
224
|
+
|
|
225
|
+
``root_name`` = the XML root tag / logical JSON root name.
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
format: Format
|
|
229
|
+
root_name: str
|
|
230
|
+
fields: list[FieldSpec] = field(default_factory=list)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
@dataclass(frozen=True, slots=True)
|
|
234
|
+
class ExtractOptions:
|
|
235
|
+
"""Bounded runtime override surface (the "20%").
|
|
236
|
+
|
|
237
|
+
``aliases``/``normalizers`` are MERGED with the schema's, runtime winning on key
|
|
238
|
+
conflict. ``on_field`` is the single bespoke-coercion hook.
|
|
239
|
+
|
|
240
|
+
``rootless`` (XML only): when ``True``, the input has NO enclosing root element —
|
|
241
|
+
the payload's fields ARE the top-level elements (a flat sequence like
|
|
242
|
+
``<a>..</a><b>..</b>``). Mirrors Java ``ExtractOptions.rootless``.
|
|
243
|
+
"""
|
|
244
|
+
|
|
245
|
+
tolerance: Tolerance = Tolerance.NORMAL
|
|
246
|
+
aliases: dict[str, str] = field(default_factory=dict)
|
|
247
|
+
normalizers: dict[str, Normalizer] = field(default_factory=dict)
|
|
248
|
+
on_field: OnField | None = None
|
|
249
|
+
rootless: bool = False
|
|
250
|
+
|
|
251
|
+
@staticmethod
|
|
252
|
+
def defaults() -> "ExtractOptions":
|
|
253
|
+
return ExtractOptions()
|
|
254
|
+
|
|
255
|
+
def with_tolerance(self, t: Tolerance) -> "ExtractOptions":
|
|
256
|
+
return ExtractOptions(
|
|
257
|
+
tolerance=t,
|
|
258
|
+
aliases=dict(self.aliases),
|
|
259
|
+
normalizers=dict(self.normalizers),
|
|
260
|
+
on_field=self.on_field,
|
|
261
|
+
rootless=self.rootless,
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
def with_rootless(self, r: bool) -> "ExtractOptions":
|
|
265
|
+
"""XML only: parse a rootless flat element sequence directly (no wrapper
|
|
266
|
+
root). Returns a copy with ``rootless`` set. Mirrors Java ``withRootless``."""
|
|
267
|
+
return ExtractOptions(
|
|
268
|
+
tolerance=self.tolerance,
|
|
269
|
+
aliases=dict(self.aliases),
|
|
270
|
+
normalizers=dict(self.normalizers),
|
|
271
|
+
on_field=self.on_field,
|
|
272
|
+
rootless=r,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
@dataclass(frozen=True, slots=True)
|
|
277
|
+
class ExtractionOutcome:
|
|
278
|
+
"""Engine return.
|
|
279
|
+
|
|
280
|
+
``data`` is a forgiving ``dict[str, object]``; Plan 2 wraps it into a typed
|
|
281
|
+
``ExtractionResult``.
|
|
282
|
+
"""
|
|
283
|
+
|
|
284
|
+
data: dict[str, object]
|
|
285
|
+
report: "ExtractionReport"
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
@dataclass(frozen=True, slots=True)
|
|
289
|
+
class ExtractionResult(Generic[T]):
|
|
290
|
+
"""Typed result of a generated ``extract(...)``: best-effort value + report."""
|
|
291
|
+
|
|
292
|
+
data: T | None
|
|
293
|
+
report: "ExtractionReport"
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
class ExtractionReport:
|
|
297
|
+
"""Mutable accumulator of per-field classification, the empty flag, and coercion notes."""
|
|
298
|
+
|
|
299
|
+
def __init__(self) -> None:
|
|
300
|
+
self._states: dict[str, FieldExtraction] = {}
|
|
301
|
+
self._coercions: list[Coercion] = []
|
|
302
|
+
self._empty: bool = False
|
|
303
|
+
|
|
304
|
+
def set(self, field_path: str, state: FieldExtraction) -> None:
|
|
305
|
+
self._states[field_path] = state
|
|
306
|
+
|
|
307
|
+
def add_coercion(self, c: Coercion) -> None:
|
|
308
|
+
self._coercions.append(c)
|
|
309
|
+
|
|
310
|
+
def mark_empty(self) -> None:
|
|
311
|
+
self._empty = True
|
|
312
|
+
|
|
313
|
+
def is_empty(self) -> bool:
|
|
314
|
+
return self._empty
|
|
315
|
+
|
|
316
|
+
def states(self) -> dict[str, FieldExtraction]:
|
|
317
|
+
return dict(self._states)
|
|
318
|
+
|
|
319
|
+
def coercions(self) -> list[Coercion]:
|
|
320
|
+
return list(self._coercions)
|
|
321
|
+
|
|
322
|
+
def lost_required(self) -> list[str]:
|
|
323
|
+
return self._by_state(FieldExtraction.LOST_REQUIRED)
|
|
324
|
+
|
|
325
|
+
def malformed(self) -> list[str]:
|
|
326
|
+
return self._by_state(FieldExtraction.MALFORMED)
|
|
327
|
+
|
|
328
|
+
def has_lost_required(self) -> bool:
|
|
329
|
+
return len(self.lost_required()) > 0
|
|
330
|
+
|
|
331
|
+
def _by_state(self, s: FieldExtraction) -> list[str]:
|
|
332
|
+
return [k for k, v in self._states.items() if v == s]
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Stage-4 tolerant XML reader for the bounded corpus malformation set. Never throws.
|
|
2
|
+
|
|
3
|
+
Mirrors Java XmlForgivingReader: maps an element's child elements, text, AND attributes
|
|
4
|
+
into the field map, and handles self-closing tags (``<x a="1"/>``).
|
|
5
|
+
|
|
6
|
+
Representation:
|
|
7
|
+
|
|
8
|
+
- text-only element, no attributes → its trimmed text (``str``) — unchanged
|
|
9
|
+
- self-closing / attributes-only element → a dict of attribute name→value ("" when none)
|
|
10
|
+
- element with child elements (± attrs) → a dict merging attributes + child entries
|
|
11
|
+
(a child element wins a name collision)
|
|
12
|
+
- element with text AND attributes → a dict of the attributes plus the body text under
|
|
13
|
+
:data:`TEXT_KEY` (a scalar consumer unwraps it)
|
|
14
|
+
- repeated sibling tags → a list
|
|
15
|
+
|
|
16
|
+
Carries the FR-010 fixed-behavior edge cases:
|
|
17
|
+
|
|
18
|
+
- No-throw on a leading ``</x>``.
|
|
19
|
+
- Unclosed tags extract their text up to the next sibling open tag.
|
|
20
|
+
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import re
|
|
24
|
+
|
|
25
|
+
#: Reserved key holding an element's own text content when the element is represented as a
|
|
26
|
+
#: dict (because it also carries attributes). ``#`` is not a legal XML name char, so it never
|
|
27
|
+
#: collides with a real attribute or child-element name.
|
|
28
|
+
TEXT_KEY = "#text"
|
|
29
|
+
|
|
30
|
+
# tag name + everything up to the closing '>' (attributes and/or a trailing '/' for a
|
|
31
|
+
# self-closing tag). Non-greedy so the first '>' closes the open tag.
|
|
32
|
+
_OPEN_TAG = re.compile(r"<([A-Za-z_][A-Za-z0-9_]*)([^>]*?)>")
|
|
33
|
+
_OPEN_TAG_CI = re.compile(r"<([A-Za-z_][A-Za-z0-9_]*)([^>]*?)>", re.IGNORECASE)
|
|
34
|
+
# one attribute: name = "double" | 'single' | bareword.
|
|
35
|
+
_ATTR = re.compile(r"""([A-Za-z_:][A-Za-z0-9_:.\-]*)\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s/>]+))""")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class XmlForgivingReader:
|
|
39
|
+
def read(self, span: str | None, case_insensitive: bool) -> dict[str, object]:
|
|
40
|
+
out: dict[str, object] = {}
|
|
41
|
+
if span is None or span.strip() == "":
|
|
42
|
+
return out
|
|
43
|
+
gt = span.find(">")
|
|
44
|
+
if gt < 0:
|
|
45
|
+
return out
|
|
46
|
+
root_end = span.rfind("</")
|
|
47
|
+
inner_end = len(span) if (root_end < 0 or root_end <= gt) else root_end
|
|
48
|
+
inner = span[gt + 1 : inner_end]
|
|
49
|
+
self._parse_children(inner, case_insensitive, out)
|
|
50
|
+
return out
|
|
51
|
+
|
|
52
|
+
def read_rootless(self, text: str | None, case_insensitive: bool) -> dict[str, object]:
|
|
53
|
+
"""Rootless read: parse the WHOLE text's top-level elements directly, with no
|
|
54
|
+
enclosing root element to strip (a flat sequence like ``<a>..</a><b>..</b>``).
|
|
55
|
+
Used for :attr:`ExtractOptions.rootless` responses. Leading/trailing non-element
|
|
56
|
+
text is ignored. Never throws. Mirrors Java ``readRootless``."""
|
|
57
|
+
out: dict[str, object] = {}
|
|
58
|
+
if text is None or text.strip() == "":
|
|
59
|
+
return out
|
|
60
|
+
self._parse_children(text, case_insensitive, out)
|
|
61
|
+
return out
|
|
62
|
+
|
|
63
|
+
def _parse_children(self, inner: str, ci: bool, out: dict[str, object]) -> None:
|
|
64
|
+
open_tag = _OPEN_TAG_CI if ci else _OPEN_TAG
|
|
65
|
+
pos = 0
|
|
66
|
+
while True:
|
|
67
|
+
m = open_tag.search(inner, pos)
|
|
68
|
+
if m is None:
|
|
69
|
+
return
|
|
70
|
+
tag = m.group(1)
|
|
71
|
+
key = tag.lower() if ci else tag
|
|
72
|
+
|
|
73
|
+
raw_attrs = (m.group(2) or "").strip()
|
|
74
|
+
self_closing = raw_attrs.endswith("/")
|
|
75
|
+
if self_closing:
|
|
76
|
+
raw_attrs = raw_attrs[:-1].strip()
|
|
77
|
+
attrs = self._parse_attrs(raw_attrs, ci)
|
|
78
|
+
|
|
79
|
+
if self_closing:
|
|
80
|
+
self._accumulate(out, key, "" if not attrs else attrs)
|
|
81
|
+
pos = m.end()
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
content_start = m.end()
|
|
85
|
+
close_re = re.compile(
|
|
86
|
+
"</" + re.escape(tag) + r"\s*>", re.IGNORECASE if ci else 0
|
|
87
|
+
)
|
|
88
|
+
close_m = close_re.search(inner, content_start)
|
|
89
|
+
if close_m is not None:
|
|
90
|
+
content_end = close_m.start()
|
|
91
|
+
nxt = close_m.end()
|
|
92
|
+
else:
|
|
93
|
+
# unclosed tag: extract content up to the next sibling open tag.
|
|
94
|
+
sib = open_tag.search(inner, content_start)
|
|
95
|
+
if sib is not None:
|
|
96
|
+
# When the unclosed element's content begins IMMEDIATELY with a child
|
|
97
|
+
# open tag (no leading text), that child was almost certainly meant to
|
|
98
|
+
# be NESTED, not a sibling — a common LLM malformation is dropping the
|
|
99
|
+
# parent's close tag while still emitting a real child element
|
|
100
|
+
# (e.g. <check ...><payoff>text). Absorb the remainder of this span as
|
|
101
|
+
# the unclosed element's content so the child nests under it. When there
|
|
102
|
+
# IS leading text before the first child tag (e.g. <t>hi<c>..), keep the
|
|
103
|
+
# sibling split — the leading text is the unclosed element's body and the
|
|
104
|
+
# following tag is its sibling. Mirrors Java XmlForgivingReader.
|
|
105
|
+
no_leading_text = inner[content_start : sib.start()].strip() == ""
|
|
106
|
+
if no_leading_text:
|
|
107
|
+
content_end = len(inner)
|
|
108
|
+
nxt = len(inner)
|
|
109
|
+
else:
|
|
110
|
+
content_end = sib.start()
|
|
111
|
+
nxt = content_end
|
|
112
|
+
else:
|
|
113
|
+
content_end = len(inner)
|
|
114
|
+
nxt = len(inner)
|
|
115
|
+
content = inner[content_start:content_end]
|
|
116
|
+
self._accumulate(out, key, self._combine(attrs, content, ci))
|
|
117
|
+
pos = nxt
|
|
118
|
+
|
|
119
|
+
def _combine(self, attrs: dict[str, object], content: str, ci: bool) -> object:
|
|
120
|
+
"""Combine an element's attributes with its body (nested children or plain text)."""
|
|
121
|
+
if "<" in content:
|
|
122
|
+
nested: dict[str, object] = {}
|
|
123
|
+
self._parse_children(content, ci, nested)
|
|
124
|
+
if nested:
|
|
125
|
+
# attributes first; a child element wins a name collision
|
|
126
|
+
merged: dict[str, object] = dict(attrs)
|
|
127
|
+
merged.update(nested)
|
|
128
|
+
return merged
|
|
129
|
+
return self._text_value(attrs, content)
|
|
130
|
+
|
|
131
|
+
def _text_value(self, attrs: dict[str, object], content: str) -> object:
|
|
132
|
+
text = content.strip()
|
|
133
|
+
if not attrs:
|
|
134
|
+
return text
|
|
135
|
+
m: dict[str, object] = dict(attrs)
|
|
136
|
+
m[TEXT_KEY] = text
|
|
137
|
+
return m
|
|
138
|
+
|
|
139
|
+
def _parse_attrs(self, raw_attrs: str, ci: bool) -> dict[str, object]:
|
|
140
|
+
attrs: dict[str, object] = {}
|
|
141
|
+
if not raw_attrs:
|
|
142
|
+
return attrs
|
|
143
|
+
for a in _ATTR.finditer(raw_attrs):
|
|
144
|
+
name = a.group(1).lower() if ci else a.group(1)
|
|
145
|
+
val = a.group(2) if a.group(2) is not None else (
|
|
146
|
+
a.group(3) if a.group(3) is not None else (
|
|
147
|
+
a.group(4) if a.group(4) is not None else ""
|
|
148
|
+
)
|
|
149
|
+
)
|
|
150
|
+
if name not in attrs:
|
|
151
|
+
attrs[name] = val
|
|
152
|
+
return attrs
|
|
153
|
+
|
|
154
|
+
def _accumulate(self, out: dict[str, object], key: str, value: object) -> None:
|
|
155
|
+
if key not in out:
|
|
156
|
+
out[key] = value
|
|
157
|
+
return
|
|
158
|
+
existing = out[key]
|
|
159
|
+
if isinstance(existing, list):
|
|
160
|
+
existing.append(value)
|
|
161
|
+
else:
|
|
162
|
+
out[key] = [existing, value]
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Filesystem-backed :class:`~metaobjects.render.verify.Provider`.
|
|
2
|
+
|
|
3
|
+
Resolves a ``group/source`` reference to ``<root>/group/source.mustache``. The render
|
|
4
|
+
engine + verify delegate all I/O to a provider, so this is the only file-touching piece
|
|
5
|
+
of the render tier. Mirrors the C# ``MetaObjects.Render.FilesystemProvider`` and the Java
|
|
6
|
+
``com.metaobjects.render.FilesystemProvider`` semantics: read the file if present, return
|
|
7
|
+
``None`` when absent/unreadable, and reject refs that escape the root (via ``..``).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FilesystemProvider:
|
|
16
|
+
"""Resolves ``group/source`` references to text files under a root directory:
|
|
17
|
+
``resolve("npc/turn")`` → ``<root>/npc/turn.mustache``. Returns ``None`` when the file
|
|
18
|
+
is absent. Refs that escape the root (via ``..``) are rejected.
|
|
19
|
+
|
|
20
|
+
Implements the :class:`~metaobjects.render.verify.Provider` protocol.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, root: str | Path, extension: str = ".mustache") -> None:
|
|
24
|
+
self._root = Path(root).resolve()
|
|
25
|
+
self._extension = extension
|
|
26
|
+
|
|
27
|
+
def resolve(self, ref: str | None) -> str | None:
|
|
28
|
+
# Build <root>/<seg>/<seg> from the slash-separated ref, dropping empties.
|
|
29
|
+
if not ref:
|
|
30
|
+
return None
|
|
31
|
+
segments = [s for s in ref.split("/") if s]
|
|
32
|
+
if not segments or any(s == ".." for s in segments):
|
|
33
|
+
return None
|
|
34
|
+
|
|
35
|
+
base = self._root.joinpath(*segments)
|
|
36
|
+
candidate = base.with_name(base.name + self._extension)
|
|
37
|
+
try:
|
|
38
|
+
resolved = candidate.resolve()
|
|
39
|
+
except OSError:
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
# Path-traversal guard: the resolved file must stay under the root.
|
|
43
|
+
if resolved != self._root and self._root not in resolved.parents:
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
if not resolved.is_file():
|
|
47
|
+
return None
|
|
48
|
+
try:
|
|
49
|
+
return resolved.read_text(encoding="utf-8")
|
|
50
|
+
except OSError:
|
|
51
|
+
return None
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""FR-010 artifact 1 — output-format prompt renderer ("produce your answer like this").
|
|
2
|
+
|
|
3
|
+
Renders an :class:`OutputFormatSpec` into a prompt fragment that teaches an LLM how
|
|
4
|
+
to shape its answer. Three comment-free styles (guide / inline / exampleOnly) × two
|
|
5
|
+
formats (json / xml). Guidance is carried in prose / inline placeholders / a filled
|
|
6
|
+
skeleton — NEVER in comments (models ignore them).
|
|
7
|
+
|
|
8
|
+
Cross-port INVARIANT: the rendered text is byte-identical to the Java/C#/Kotlin/TS
|
|
9
|
+
reference (``com.metaobjects.render.prompt.OutputFormatRenderer``).
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from metaobjects.render.prompt.output_format_renderer import render_output_format
|
|
14
|
+
from metaobjects.render.prompt.output_format_spec import OutputFormatSpec
|
|
15
|
+
from metaobjects.render.prompt.prompt_field import PromptField
|
|
16
|
+
from metaobjects.render.prompt.prompt_overrides import (
|
|
17
|
+
PROMPT_OVERRIDES_NONE,
|
|
18
|
+
PromptOverrides,
|
|
19
|
+
no_overrides,
|
|
20
|
+
)
|
|
21
|
+
from metaobjects.render.prompt.prompt_style import PromptStyle, prompt_style_from
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"PROMPT_OVERRIDES_NONE",
|
|
25
|
+
"OutputFormatSpec",
|
|
26
|
+
"PromptField",
|
|
27
|
+
"PromptOverrides",
|
|
28
|
+
"PromptStyle",
|
|
29
|
+
"no_overrides",
|
|
30
|
+
"prompt_style_from",
|
|
31
|
+
"render_output_format",
|
|
32
|
+
]
|