metaobjects 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. metaobjects/__init__.py +75 -0
  2. metaobjects/agent_context/__init__.py +55 -0
  3. metaobjects/agent_context/_content/README.md +14 -0
  4. metaobjects/agent_context/_content/servers/csharp.meta.json +5 -0
  5. metaobjects/agent_context/_content/servers/java.meta.json +5 -0
  6. metaobjects/agent_context/_content/servers/kotlin.meta.json +5 -0
  7. metaobjects/agent_context/_content/servers/python.meta.json +5 -0
  8. metaobjects/agent_context/_content/servers/typescript.meta.json +5 -0
  9. metaobjects/agent_context/_content/skills/metaobjects-authoring/SKILL.md +301 -0
  10. metaobjects/agent_context/_content/skills/metaobjects-codegen/SKILL.md +99 -0
  11. metaobjects/agent_context/_content/skills/metaobjects-codegen/references/csharp.md +87 -0
  12. metaobjects/agent_context/_content/skills/metaobjects-codegen/references/java.md +94 -0
  13. metaobjects/agent_context/_content/skills/metaobjects-codegen/references/kotlin.md +110 -0
  14. metaobjects/agent_context/_content/skills/metaobjects-codegen/references/typescript.md +135 -0
  15. metaobjects/agent_context/_content/skills/metaobjects-prompts/SKILL.md +148 -0
  16. metaobjects/agent_context/_content/skills/metaobjects-prompts/references/csharp.md +110 -0
  17. metaobjects/agent_context/_content/skills/metaobjects-prompts/references/java.md +108 -0
  18. metaobjects/agent_context/_content/skills/metaobjects-prompts/references/kotlin.md +130 -0
  19. metaobjects/agent_context/_content/skills/metaobjects-prompts/references/python.md +116 -0
  20. metaobjects/agent_context/_content/skills/metaobjects-prompts/references/typescript.md +150 -0
  21. metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/SKILL.md +130 -0
  22. metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/java.md +96 -0
  23. metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/kotlin.md +99 -0
  24. metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/react.md +86 -0
  25. metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/tanstack.md +119 -0
  26. metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/typescript.md +92 -0
  27. metaobjects/agent_context/_content/skills/metaobjects-verify/SKILL.md +107 -0
  28. metaobjects/agent_context/_content/skills/metaobjects-verify/references/migration.md +72 -0
  29. metaobjects/agent_context/_content/templates/always-on.md.mustache +27 -0
  30. metaobjects/agent_context/assemble.py +133 -0
  31. metaobjects/agent_context/content_root.py +54 -0
  32. metaobjects/agent_context/scaffold.py +191 -0
  33. metaobjects/agent_context/types.py +44 -0
  34. metaobjects/attr_class_map.py +23 -0
  35. metaobjects/cli.py +696 -0
  36. metaobjects/codegen/__init__.py +0 -0
  37. metaobjects/codegen/config.py +11 -0
  38. metaobjects/codegen/constants.py +13 -0
  39. metaobjects/codegen/extract_delegate_emitter.py +384 -0
  40. metaobjects/codegen/extract_schema_emitter.py +139 -0
  41. metaobjects/codegen/format.py +31 -0
  42. metaobjects/codegen/fr010_field_mapping.py +220 -0
  43. metaobjects/codegen/generator.py +62 -0
  44. metaobjects/codegen/generator_registry.py +163 -0
  45. metaobjects/codegen/generators/__init__.py +0 -0
  46. metaobjects/codegen/generators/entity_model.py +263 -0
  47. metaobjects/codegen/generators/extractor_generator.py +317 -0
  48. metaobjects/codegen/generators/filter_allowlist_generator.py +309 -0
  49. metaobjects/codegen/generators/m2m_codegen.py +192 -0
  50. metaobjects/codegen/generators/output_parser_generator.py +272 -0
  51. metaobjects/codegen/generators/output_prompt_generator.py +192 -0
  52. metaobjects/codegen/generators/payload_vo_generator.py +672 -0
  53. metaobjects/codegen/generators/render_helper_generator.py +451 -0
  54. metaobjects/codegen/generators/router_generator.py +635 -0
  55. metaobjects/codegen/generators/template_generator.py +70 -0
  56. metaobjects/codegen/generators/tph_plan.py +120 -0
  57. metaobjects/codegen/generators/trace_helper_generator.py +336 -0
  58. metaobjects/codegen/instance_artifacts.py +15 -0
  59. metaobjects/codegen/output_format_spec_emitter.py +79 -0
  60. metaobjects/codegen/overwrite_policy.py +27 -0
  61. metaobjects/codegen/runner.py +110 -0
  62. metaobjects/codegen/runtime/__init__.py +6 -0
  63. metaobjects/codegen/runtime/filter_parser.py +193 -0
  64. metaobjects/codegen/type_map.py +84 -0
  65. metaobjects/core_types.py +809 -0
  66. metaobjects/datatype.py +19 -0
  67. metaobjects/documentation/__init__.py +28 -0
  68. metaobjects/documentation/doc_constants.py +20 -0
  69. metaobjects/documentation/doc_provider.py +20 -0
  70. metaobjects/documentation/doc_schema.py +24 -0
  71. metaobjects/errors.py +124 -0
  72. metaobjects/loader/__init__.py +0 -0
  73. metaobjects/loader/merge.py +287 -0
  74. metaobjects/loader/meta_data_loader.py +245 -0
  75. metaobjects/loader/sources/__init__.py +24 -0
  76. metaobjects/loader/sources/directory_source.py +50 -0
  77. metaobjects/loader/sources/file_source.py +41 -0
  78. metaobjects/loader/sources/meta_data_source.py +67 -0
  79. metaobjects/loader/sources/uri_source.py +56 -0
  80. metaobjects/loader/validate_discriminator.py +181 -0
  81. metaobjects/loader/validate_field_readonly.py +146 -0
  82. metaobjects/loader/validate_source_parameter_ref.py +159 -0
  83. metaobjects/loader/validate_source_physical_names.py +140 -0
  84. metaobjects/loader/validation_passes.py +1513 -0
  85. metaobjects/meta/__init__.py +1 -0
  86. metaobjects/meta/core/__init__.py +0 -0
  87. metaobjects/meta/core/attr/__init__.py +0 -0
  88. metaobjects/meta/core/attr/attr_constants.py +31 -0
  89. metaobjects/meta/core/attr/meta_attr.py +136 -0
  90. metaobjects/meta/core/field/__init__.py +0 -0
  91. metaobjects/meta/core/field/field_constants.py +105 -0
  92. metaobjects/meta/core/field/meta_field.py +76 -0
  93. metaobjects/meta/core/identity/__init__.py +0 -0
  94. metaobjects/meta/core/identity/identity_constants.py +19 -0
  95. metaobjects/meta/core/identity/meta_identity.py +8 -0
  96. metaobjects/meta/core/object/__init__.py +0 -0
  97. metaobjects/meta/core/object/meta_object.py +65 -0
  98. metaobjects/meta/core/object/meta_object_aware.py +43 -0
  99. metaobjects/meta/core/object/object_class_registry.py +56 -0
  100. metaobjects/meta/core/object/object_constants.py +13 -0
  101. metaobjects/meta/core/object/object_extract.py +400 -0
  102. metaobjects/meta/core/object/value_object.py +70 -0
  103. metaobjects/meta/core/relationship/__init__.py +0 -0
  104. metaobjects/meta/core/relationship/derive_m2m_fields.py +180 -0
  105. metaobjects/meta/core/relationship/meta_relationship.py +54 -0
  106. metaobjects/meta/core/relationship/relationship_constants.py +51 -0
  107. metaobjects/meta/core/validator/__init__.py +0 -0
  108. metaobjects/meta/core/validator/validator_constants.py +18 -0
  109. metaobjects/meta/meta_data.py +206 -0
  110. metaobjects/meta/meta_root.py +8 -0
  111. metaobjects/meta/persistence/__init__.py +0 -0
  112. metaobjects/meta/persistence/db/__init__.py +1 -0
  113. metaobjects/meta/persistence/db/db_constants.py +41 -0
  114. metaobjects/meta/persistence/db/db_provider.py +60 -0
  115. metaobjects/meta/persistence/origin/__init__.py +0 -0
  116. metaobjects/meta/persistence/origin/meta_origin.py +8 -0
  117. metaobjects/meta/persistence/origin/origin_constants.py +20 -0
  118. metaobjects/meta/persistence/source/__init__.py +0 -0
  119. metaobjects/meta/persistence/source/meta_source.py +137 -0
  120. metaobjects/meta/persistence/source/source_constants.py +115 -0
  121. metaobjects/meta/presentation/__init__.py +0 -0
  122. metaobjects/meta/presentation/layout/__init__.py +0 -0
  123. metaobjects/meta/presentation/layout/layout_constants.py +13 -0
  124. metaobjects/meta/presentation/layout/meta_layout.py +8 -0
  125. metaobjects/meta/presentation/view/__init__.py +0 -0
  126. metaobjects/meta/presentation/view/meta_view.py +8 -0
  127. metaobjects/meta/presentation/view/view_constants.py +22 -0
  128. metaobjects/meta/template/__init__.py +0 -0
  129. metaobjects/meta/template/meta_template.py +46 -0
  130. metaobjects/meta/template/template_constants.py +112 -0
  131. metaobjects/meta/template/template_provider.py +43 -0
  132. metaobjects/parser.py +380 -0
  133. metaobjects/parser_yaml.py +82 -0
  134. metaobjects/provider.py +111 -0
  135. metaobjects/py.typed +0 -0
  136. metaobjects/registry.py +210 -0
  137. metaobjects/registry_manifest.py +223 -0
  138. metaobjects/render/__init__.py +74 -0
  139. metaobjects/render/email_document.py +14 -0
  140. metaobjects/render/escapers.py +109 -0
  141. metaobjects/render/extract/__init__.py +59 -0
  142. metaobjects/render/extract/coerce.py +279 -0
  143. metaobjects/render/extract/extract.py +211 -0
  144. metaobjects/render/extract/extract_map.py +61 -0
  145. metaobjects/render/extract/json_forgiving_reader.py +203 -0
  146. metaobjects/render/extract/locate.py +65 -0
  147. metaobjects/render/extract/normalize.py +96 -0
  148. metaobjects/render/extract/strip.py +20 -0
  149. metaobjects/render/extract/types.py +332 -0
  150. metaobjects/render/extract/xml_forgiving_reader.py +162 -0
  151. metaobjects/render/filesystem_provider.py +51 -0
  152. metaobjects/render/prompt/__init__.py +32 -0
  153. metaobjects/render/prompt/output_format_renderer.py +340 -0
  154. metaobjects/render/prompt/output_format_spec.py +28 -0
  155. metaobjects/render/prompt/prompt_field.py +29 -0
  156. metaobjects/render/prompt/prompt_overrides.py +29 -0
  157. metaobjects/render/prompt/prompt_style.py +38 -0
  158. metaobjects/render/renderer.py +358 -0
  159. metaobjects/render/verify.py +266 -0
  160. metaobjects/runtime/__init__.py +39 -0
  161. metaobjects/runtime/llm_recorder.py +210 -0
  162. metaobjects/runtime/n2m_resolver.py +155 -0
  163. metaobjects/runtime/object_manager.py +715 -0
  164. metaobjects/runtime/tph.py +50 -0
  165. metaobjects/serializer_json.py +172 -0
  166. metaobjects/shared/__init__.py +0 -0
  167. metaobjects/shared/base_types.py +16 -0
  168. metaobjects/shared/separators.py +4 -0
  169. metaobjects/shared/structural.py +9 -0
  170. metaobjects/source/__init__.py +79 -0
  171. metaobjects/source/error_source.py +266 -0
  172. metaobjects/source/json_path.py +106 -0
  173. metaobjects/source/semantic_diff.py +98 -0
  174. metaobjects/source/yaml_positions.py +174 -0
  175. metaobjects/super_resolve.py +128 -0
  176. metaobjects/yaml_desugar.py +481 -0
  177. metaobjects-0.9.0.dist-info/METADATA +97 -0
  178. metaobjects-0.9.0.dist-info/RECORD +181 -0
  179. metaobjects-0.9.0.dist-info/WHEEL +4 -0
  180. metaobjects-0.9.0.dist-info/entry_points.txt +2 -0
  181. metaobjects-0.9.0.dist-info/licenses/LICENSE +189 -0
@@ -0,0 +1,332 @@
1
+ """FR-010 extract model + report.
2
+
3
+ Frozen cross-port vocabularies (``FieldExtraction``, ``FieldKind``, ``Tolerance``,
4
+ ``Format``) plus the immutable schema/option/outcome dataclasses and the mutable
5
+ ``ExtractionReport`` accumulator.
6
+
7
+ The corpus serializes ``FieldExtraction`` with SCREAMING_SNAKE values
8
+ (``EXTRACTED`` / ``DEFAULTED`` / ``LOST_OPTIONAL`` / ``LOST_REQUIRED`` /
9
+ ``MALFORMED``) and ``Format`` / ``FieldKind`` as UPPER tokens; the conformance
10
+ runner maps the schema-json tokens onto these enums.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ from dataclasses import dataclass, field
15
+ from enum import Enum
16
+ from typing import Callable, Generic, TypeVar
17
+
18
+ from metaobjects.render.extract.normalize import DEFAULT as _NORMALIZE_DEFAULT
19
+
20
+ T = TypeVar("T")
21
+
22
+ # A bespoke per-field coercion hook: (field_path, raw_value, spec) -> coerced | None.
23
+ # Returning ``None`` falls through to the default coercion. Forward-referenced as a
24
+ # string in ExtractOptions to avoid a forward-declaration cycle with FieldSpec.
25
+ OnField = Callable[[str, str, "FieldSpec"], object | None]
26
+ Normalizer = Callable[[str], object | None]
27
+
28
+
29
+ class Format(Enum):
30
+ """Document format the extract pipeline targets."""
31
+
32
+ JSON = "JSON"
33
+ XML = "XML"
34
+
35
+
36
+ class FieldKind(Enum):
37
+ """The coercion target kinds the engine understands. ``OBJECT`` = nested schema."""
38
+
39
+ STRING = "STRING"
40
+ INT = "INT"
41
+ LONG = "LONG"
42
+ DOUBLE = "DOUBLE"
43
+ BOOLEAN = "BOOLEAN"
44
+ ENUM = "ENUM"
45
+ OBJECT = "OBJECT"
46
+
47
+
48
+ class FieldExtraction(Enum):
49
+ """FROZEN cross-port per-field extraction classification.
50
+
51
+ Do not reorder or add without an ADR. Values match the corpus serialization.
52
+ """
53
+
54
+ EXTRACTED = "EXTRACTED"
55
+ # FR-011: a value reached via @coerceDefault (present-but-uncoercible fallback)
56
+ # or @default (absent-fill) — distinct from a cleanly EXTRACTED value.
57
+ DEFAULTED = "DEFAULTED"
58
+ LOST_OPTIONAL = "LOST_OPTIONAL"
59
+ LOST_REQUIRED = "LOST_REQUIRED"
60
+ MALFORMED = "MALFORMED"
61
+
62
+
63
+ class Tolerance(Enum):
64
+ """STRICT: case-sensitive, minimal repair. NORMAL: case-insensitive (default). LOOSE: maximal repair.
65
+
66
+ NOTE: LOOSE currently behaves identically to NORMAL (case-insensitive). Reserved
67
+ for future maximal-repair behavior.
68
+ """
69
+
70
+ STRICT = "STRICT"
71
+ NORMAL = "NORMAL"
72
+ LOOSE = "LOOSE"
73
+
74
+
75
+ @dataclass(frozen=True, slots=True)
76
+ class Coercion:
77
+ """A recorded normalization/coercion.
78
+
79
+ ``kind`` e.g. ``"normalize"``, ``"alias"``, ``"runtime-alias-override"``,
80
+ ``"clamp"``, ``"coerceDefault"``, ``"default"``.
81
+ """
82
+
83
+ field_path: str
84
+ from_: str
85
+ to: str
86
+ kind: str
87
+
88
+
89
+ @dataclass(frozen=True, slots=True)
90
+ class FieldSpec:
91
+ """One field's extract descriptor.
92
+
93
+ ``enum_values``/``enum_alias`` non-None only for ENUM; ``min``/``max`` non-None
94
+ only for numeric range constraints; ``nested`` non-None only for OBJECT.
95
+ """
96
+
97
+ name: str
98
+ kind: FieldKind
99
+ required: bool = False
100
+ array: bool = False
101
+ enum_values: list[str] | None = None
102
+ enum_alias: dict[str, str] | None = None
103
+ min: float | None = None
104
+ max: float | None = None
105
+ nested: "ExtractSchema | None" = None
106
+ # FR-011: present-but-uncoercible fallback member (from ``@coerceDefault``).
107
+ # ENUM-only; None = none.
108
+ coerce_default: str | None = None
109
+ # FR-011: absent-fill member (from ``@default``). ENUM-only; None = none.
110
+ default_value: str | None = None
111
+ # FR-011: resolved enum normalization mode (from ``@normalize``; default ``"strip"``).
112
+ normalize: str = _NORMALIZE_DEFAULT
113
+ # ``@xmlText``: this field receives its element's TEXT CONTENT (analogous to JAXB
114
+ # ``@XmlValue`` / Jackson ``@JacksonXmlText`` / .NET ``[XmlText]``). The extract engine
115
+ # reads it from the ``#text`` sentinel the lenient XML reader carries when an element has
116
+ # both attributes and a text body, instead of a same-named child. False for normal/JSON.
117
+ text_content: bool = False
118
+
119
+ @staticmethod
120
+ def scalar(
121
+ name: str,
122
+ kind: FieldKind,
123
+ required: bool,
124
+ default_value: str | None = None,
125
+ ) -> "FieldSpec":
126
+ """Phase B (generalized ``@default``): a scalar field optionally carrying an
127
+ absent-fill ``@default``. When the field is ABSENT, tolerant extract coerces
128
+ this string to ``kind`` (via the pure ``scalar_coerce``) and classifies the
129
+ field DEFAULTED (which satisfies ``required``). ``default_value is None`` is
130
+ the no-default case (back-compat)."""
131
+ return FieldSpec(
132
+ name=name, kind=kind, required=required, default_value=default_value
133
+ )
134
+
135
+ @staticmethod
136
+ def text_content_field(name: str, kind: FieldKind, required: bool) -> "FieldSpec":
137
+ """A field that receives its element's TEXT CONTENT — the ``@xmlText`` marker
138
+ (see ``text_content``). A scalar with the flag set; coerced to ``kind``."""
139
+ return FieldSpec(name=name, kind=kind, required=required, text_content=True)
140
+
141
+ @staticmethod
142
+ def scalar_array(name: str, kind: FieldKind, required: bool) -> "FieldSpec":
143
+ """A scalar-array FieldSpec (``array == True``); each element is coerced via
144
+ the scalar pipeline. No per-element default fill."""
145
+ return FieldSpec(name=name, kind=kind, required=required, array=True)
146
+
147
+ @staticmethod
148
+ def enum_array(
149
+ name: str,
150
+ required: bool,
151
+ values: list[str] | None,
152
+ aliases: dict[str, str] | None,
153
+ coerce_default: str | None = None,
154
+ normalize: str = _NORMALIZE_DEFAULT,
155
+ default_value: str | None = None,
156
+ ) -> "FieldSpec":
157
+ """Phase B (array-of-enum): an enum field that is a ``list[enum]``
158
+ (``array == True``). Each element flows through the SAME enum pipeline a
159
+ scalar enum uses (exact → normalize → ``@enumAlias`` → ``@coerceDefault`` →
160
+ MALFORMED) and is classified independently by indexed path (``tags[0]``,
161
+ ``tags[1]``, …). Mirrors :meth:`enum_field` but with ``array = True``."""
162
+ return FieldSpec(
163
+ name=name,
164
+ kind=FieldKind.ENUM,
165
+ required=required,
166
+ array=True,
167
+ enum_values=None if values is None else list(values),
168
+ enum_alias={} if aliases is None else dict(aliases),
169
+ coerce_default=coerce_default,
170
+ default_value=default_value,
171
+ normalize=normalize,
172
+ )
173
+
174
+ @staticmethod
175
+ def enum_field(
176
+ name: str,
177
+ required: bool,
178
+ values: list[str] | None,
179
+ aliases: dict[str, str] | None,
180
+ coerce_default: str | None = None,
181
+ normalize: str = _NORMALIZE_DEFAULT,
182
+ default_value: str | None = None,
183
+ ) -> "FieldSpec":
184
+ return FieldSpec(
185
+ name=name,
186
+ kind=FieldKind.ENUM,
187
+ required=required,
188
+ enum_values=None if values is None else list(values),
189
+ enum_alias={} if aliases is None else dict(aliases),
190
+ coerce_default=coerce_default,
191
+ default_value=default_value,
192
+ normalize=normalize,
193
+ )
194
+
195
+ @staticmethod
196
+ def range_(
197
+ name: str,
198
+ kind: FieldKind,
199
+ required: bool,
200
+ min: float | None,
201
+ max: float | None,
202
+ ) -> "FieldSpec":
203
+ return FieldSpec(name=name, kind=kind, required=required, min=min, max=max)
204
+
205
+ @staticmethod
206
+ def object_(
207
+ name: str,
208
+ required: bool,
209
+ array: bool,
210
+ nested: "ExtractSchema | None",
211
+ ) -> "FieldSpec":
212
+ return FieldSpec(
213
+ name=name,
214
+ kind=FieldKind.OBJECT,
215
+ required=required,
216
+ array=array,
217
+ nested=nested,
218
+ )
219
+
220
+
221
+ @dataclass(frozen=True, slots=True)
222
+ class ExtractSchema:
223
+ """Top-level extract descriptor.
224
+
225
+ ``root_name`` = the XML root tag / logical JSON root name.
226
+ """
227
+
228
+ format: Format
229
+ root_name: str
230
+ fields: list[FieldSpec] = field(default_factory=list)
231
+
232
+
233
+ @dataclass(frozen=True, slots=True)
234
+ class ExtractOptions:
235
+ """Bounded runtime override surface (the "20%").
236
+
237
+ ``aliases``/``normalizers`` are MERGED with the schema's, runtime winning on key
238
+ conflict. ``on_field`` is the single bespoke-coercion hook.
239
+
240
+ ``rootless`` (XML only): when ``True``, the input has NO enclosing root element —
241
+ the payload's fields ARE the top-level elements (a flat sequence like
242
+ ``<a>..</a><b>..</b>``). Mirrors Java ``ExtractOptions.rootless``.
243
+ """
244
+
245
+ tolerance: Tolerance = Tolerance.NORMAL
246
+ aliases: dict[str, str] = field(default_factory=dict)
247
+ normalizers: dict[str, Normalizer] = field(default_factory=dict)
248
+ on_field: OnField | None = None
249
+ rootless: bool = False
250
+
251
+ @staticmethod
252
+ def defaults() -> "ExtractOptions":
253
+ return ExtractOptions()
254
+
255
+ def with_tolerance(self, t: Tolerance) -> "ExtractOptions":
256
+ return ExtractOptions(
257
+ tolerance=t,
258
+ aliases=dict(self.aliases),
259
+ normalizers=dict(self.normalizers),
260
+ on_field=self.on_field,
261
+ rootless=self.rootless,
262
+ )
263
+
264
+ def with_rootless(self, r: bool) -> "ExtractOptions":
265
+ """XML only: parse a rootless flat element sequence directly (no wrapper
266
+ root). Returns a copy with ``rootless`` set. Mirrors Java ``withRootless``."""
267
+ return ExtractOptions(
268
+ tolerance=self.tolerance,
269
+ aliases=dict(self.aliases),
270
+ normalizers=dict(self.normalizers),
271
+ on_field=self.on_field,
272
+ rootless=r,
273
+ )
274
+
275
+
276
+ @dataclass(frozen=True, slots=True)
277
+ class ExtractionOutcome:
278
+ """Engine return.
279
+
280
+ ``data`` is a forgiving ``dict[str, object]``; Plan 2 wraps it into a typed
281
+ ``ExtractionResult``.
282
+ """
283
+
284
+ data: dict[str, object]
285
+ report: "ExtractionReport"
286
+
287
+
288
+ @dataclass(frozen=True, slots=True)
289
+ class ExtractionResult(Generic[T]):
290
+ """Typed result of a generated ``extract(...)``: best-effort value + report."""
291
+
292
+ data: T | None
293
+ report: "ExtractionReport"
294
+
295
+
296
+ class ExtractionReport:
297
+ """Mutable accumulator of per-field classification, the empty flag, and coercion notes."""
298
+
299
+ def __init__(self) -> None:
300
+ self._states: dict[str, FieldExtraction] = {}
301
+ self._coercions: list[Coercion] = []
302
+ self._empty: bool = False
303
+
304
+ def set(self, field_path: str, state: FieldExtraction) -> None:
305
+ self._states[field_path] = state
306
+
307
+ def add_coercion(self, c: Coercion) -> None:
308
+ self._coercions.append(c)
309
+
310
+ def mark_empty(self) -> None:
311
+ self._empty = True
312
+
313
+ def is_empty(self) -> bool:
314
+ return self._empty
315
+
316
+ def states(self) -> dict[str, FieldExtraction]:
317
+ return dict(self._states)
318
+
319
+ def coercions(self) -> list[Coercion]:
320
+ return list(self._coercions)
321
+
322
+ def lost_required(self) -> list[str]:
323
+ return self._by_state(FieldExtraction.LOST_REQUIRED)
324
+
325
+ def malformed(self) -> list[str]:
326
+ return self._by_state(FieldExtraction.MALFORMED)
327
+
328
+ def has_lost_required(self) -> bool:
329
+ return len(self.lost_required()) > 0
330
+
331
+ def _by_state(self, s: FieldExtraction) -> list[str]:
332
+ return [k for k, v in self._states.items() if v == s]
@@ -0,0 +1,162 @@
1
+ """Stage-4 tolerant XML reader for the bounded corpus malformation set. Never throws.
2
+
3
+ Mirrors Java XmlForgivingReader: maps an element's child elements, text, AND attributes
4
+ into the field map, and handles self-closing tags (``<x a="1"/>``).
5
+
6
+ Representation:
7
+
8
+ - text-only element, no attributes → its trimmed text (``str``) — unchanged
9
+ - self-closing / attributes-only element → a dict of attribute name→value ("" when none)
10
+ - element with child elements (± attrs) → a dict merging attributes + child entries
11
+ (a child element wins a name collision)
12
+ - element with text AND attributes → a dict of the attributes plus the body text under
13
+ :data:`TEXT_KEY` (a scalar consumer unwraps it)
14
+ - repeated sibling tags → a list
15
+
16
+ Carries the FR-010 fixed-behavior edge cases:
17
+
18
+ - No-throw on a leading ``</x>``.
19
+ - Unclosed tags extract their text up to the next sibling open tag.
20
+ """
21
+ from __future__ import annotations
22
+
23
+ import re
24
+
25
+ #: Reserved key holding an element's own text content when the element is represented as a
26
+ #: dict (because it also carries attributes). ``#`` is not a legal XML name char, so it never
27
+ #: collides with a real attribute or child-element name.
28
+ TEXT_KEY = "#text"
29
+
30
+ # tag name + everything up to the closing '>' (attributes and/or a trailing '/' for a
31
+ # self-closing tag). Non-greedy so the first '>' closes the open tag.
32
+ _OPEN_TAG = re.compile(r"<([A-Za-z_][A-Za-z0-9_]*)([^>]*?)>")
33
+ _OPEN_TAG_CI = re.compile(r"<([A-Za-z_][A-Za-z0-9_]*)([^>]*?)>", re.IGNORECASE)
34
+ # one attribute: name = "double" | 'single' | bareword.
35
+ _ATTR = re.compile(r"""([A-Za-z_:][A-Za-z0-9_:.\-]*)\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s/>]+))""")
36
+
37
+
38
+ class XmlForgivingReader:
39
+ def read(self, span: str | None, case_insensitive: bool) -> dict[str, object]:
40
+ out: dict[str, object] = {}
41
+ if span is None or span.strip() == "":
42
+ return out
43
+ gt = span.find(">")
44
+ if gt < 0:
45
+ return out
46
+ root_end = span.rfind("</")
47
+ inner_end = len(span) if (root_end < 0 or root_end <= gt) else root_end
48
+ inner = span[gt + 1 : inner_end]
49
+ self._parse_children(inner, case_insensitive, out)
50
+ return out
51
+
52
+ def read_rootless(self, text: str | None, case_insensitive: bool) -> dict[str, object]:
53
+ """Rootless read: parse the WHOLE text's top-level elements directly, with no
54
+ enclosing root element to strip (a flat sequence like ``<a>..</a><b>..</b>``).
55
+ Used for :attr:`ExtractOptions.rootless` responses. Leading/trailing non-element
56
+ text is ignored. Never throws. Mirrors Java ``readRootless``."""
57
+ out: dict[str, object] = {}
58
+ if text is None or text.strip() == "":
59
+ return out
60
+ self._parse_children(text, case_insensitive, out)
61
+ return out
62
+
63
+ def _parse_children(self, inner: str, ci: bool, out: dict[str, object]) -> None:
64
+ open_tag = _OPEN_TAG_CI if ci else _OPEN_TAG
65
+ pos = 0
66
+ while True:
67
+ m = open_tag.search(inner, pos)
68
+ if m is None:
69
+ return
70
+ tag = m.group(1)
71
+ key = tag.lower() if ci else tag
72
+
73
+ raw_attrs = (m.group(2) or "").strip()
74
+ self_closing = raw_attrs.endswith("/")
75
+ if self_closing:
76
+ raw_attrs = raw_attrs[:-1].strip()
77
+ attrs = self._parse_attrs(raw_attrs, ci)
78
+
79
+ if self_closing:
80
+ self._accumulate(out, key, "" if not attrs else attrs)
81
+ pos = m.end()
82
+ continue
83
+
84
+ content_start = m.end()
85
+ close_re = re.compile(
86
+ "</" + re.escape(tag) + r"\s*>", re.IGNORECASE if ci else 0
87
+ )
88
+ close_m = close_re.search(inner, content_start)
89
+ if close_m is not None:
90
+ content_end = close_m.start()
91
+ nxt = close_m.end()
92
+ else:
93
+ # unclosed tag: extract content up to the next sibling open tag.
94
+ sib = open_tag.search(inner, content_start)
95
+ if sib is not None:
96
+ # When the unclosed element's content begins IMMEDIATELY with a child
97
+ # open tag (no leading text), that child was almost certainly meant to
98
+ # be NESTED, not a sibling — a common LLM malformation is dropping the
99
+ # parent's close tag while still emitting a real child element
100
+ # (e.g. <check ...><payoff>text). Absorb the remainder of this span as
101
+ # the unclosed element's content so the child nests under it. When there
102
+ # IS leading text before the first child tag (e.g. <t>hi<c>..), keep the
103
+ # sibling split — the leading text is the unclosed element's body and the
104
+ # following tag is its sibling. Mirrors Java XmlForgivingReader.
105
+ no_leading_text = inner[content_start : sib.start()].strip() == ""
106
+ if no_leading_text:
107
+ content_end = len(inner)
108
+ nxt = len(inner)
109
+ else:
110
+ content_end = sib.start()
111
+ nxt = content_end
112
+ else:
113
+ content_end = len(inner)
114
+ nxt = len(inner)
115
+ content = inner[content_start:content_end]
116
+ self._accumulate(out, key, self._combine(attrs, content, ci))
117
+ pos = nxt
118
+
119
+ def _combine(self, attrs: dict[str, object], content: str, ci: bool) -> object:
120
+ """Combine an element's attributes with its body (nested children or plain text)."""
121
+ if "<" in content:
122
+ nested: dict[str, object] = {}
123
+ self._parse_children(content, ci, nested)
124
+ if nested:
125
+ # attributes first; a child element wins a name collision
126
+ merged: dict[str, object] = dict(attrs)
127
+ merged.update(nested)
128
+ return merged
129
+ return self._text_value(attrs, content)
130
+
131
+ def _text_value(self, attrs: dict[str, object], content: str) -> object:
132
+ text = content.strip()
133
+ if not attrs:
134
+ return text
135
+ m: dict[str, object] = dict(attrs)
136
+ m[TEXT_KEY] = text
137
+ return m
138
+
139
+ def _parse_attrs(self, raw_attrs: str, ci: bool) -> dict[str, object]:
140
+ attrs: dict[str, object] = {}
141
+ if not raw_attrs:
142
+ return attrs
143
+ for a in _ATTR.finditer(raw_attrs):
144
+ name = a.group(1).lower() if ci else a.group(1)
145
+ val = a.group(2) if a.group(2) is not None else (
146
+ a.group(3) if a.group(3) is not None else (
147
+ a.group(4) if a.group(4) is not None else ""
148
+ )
149
+ )
150
+ if name not in attrs:
151
+ attrs[name] = val
152
+ return attrs
153
+
154
+ def _accumulate(self, out: dict[str, object], key: str, value: object) -> None:
155
+ if key not in out:
156
+ out[key] = value
157
+ return
158
+ existing = out[key]
159
+ if isinstance(existing, list):
160
+ existing.append(value)
161
+ else:
162
+ out[key] = [existing, value]
@@ -0,0 +1,51 @@
1
+ """Filesystem-backed :class:`~metaobjects.render.verify.Provider`.
2
+
3
+ Resolves a ``group/source`` reference to ``<root>/group/source.mustache``. The render
4
+ engine + verify delegate all I/O to a provider, so this is the only file-touching piece
5
+ of the render tier. Mirrors the C# ``MetaObjects.Render.FilesystemProvider`` and the Java
6
+ ``com.metaobjects.render.FilesystemProvider`` semantics: read the file if present, return
7
+ ``None`` when absent/unreadable, and reject refs that escape the root (via ``..``).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from pathlib import Path
13
+
14
+
15
+ class FilesystemProvider:
16
+ """Resolves ``group/source`` references to text files under a root directory:
17
+ ``resolve("npc/turn")`` → ``<root>/npc/turn.mustache``. Returns ``None`` when the file
18
+ is absent. Refs that escape the root (via ``..``) are rejected.
19
+
20
+ Implements the :class:`~metaobjects.render.verify.Provider` protocol.
21
+ """
22
+
23
+ def __init__(self, root: str | Path, extension: str = ".mustache") -> None:
24
+ self._root = Path(root).resolve()
25
+ self._extension = extension
26
+
27
+ def resolve(self, ref: str | None) -> str | None:
28
+ # Build <root>/<seg>/<seg> from the slash-separated ref, dropping empties.
29
+ if not ref:
30
+ return None
31
+ segments = [s for s in ref.split("/") if s]
32
+ if not segments or any(s == ".." for s in segments):
33
+ return None
34
+
35
+ base = self._root.joinpath(*segments)
36
+ candidate = base.with_name(base.name + self._extension)
37
+ try:
38
+ resolved = candidate.resolve()
39
+ except OSError:
40
+ return None
41
+
42
+ # Path-traversal guard: the resolved file must stay under the root.
43
+ if resolved != self._root and self._root not in resolved.parents:
44
+ return None
45
+
46
+ if not resolved.is_file():
47
+ return None
48
+ try:
49
+ return resolved.read_text(encoding="utf-8")
50
+ except OSError:
51
+ return None
@@ -0,0 +1,32 @@
1
+ """FR-010 artifact 1 — output-format prompt renderer ("produce your answer like this").
2
+
3
+ Renders an :class:`OutputFormatSpec` into a prompt fragment that teaches an LLM how
4
+ to shape its answer. Three comment-free styles (guide / inline / exampleOnly) × two
5
+ formats (json / xml). Guidance is carried in prose / inline placeholders / a filled
6
+ skeleton — NEVER in comments (models ignore them).
7
+
8
+ Cross-port INVARIANT: the rendered text is byte-identical to the Java/C#/Kotlin/TS
9
+ reference (``com.metaobjects.render.prompt.OutputFormatRenderer``).
10
+ """
11
+ from __future__ import annotations
12
+
13
+ from metaobjects.render.prompt.output_format_renderer import render_output_format
14
+ from metaobjects.render.prompt.output_format_spec import OutputFormatSpec
15
+ from metaobjects.render.prompt.prompt_field import PromptField
16
+ from metaobjects.render.prompt.prompt_overrides import (
17
+ PROMPT_OVERRIDES_NONE,
18
+ PromptOverrides,
19
+ no_overrides,
20
+ )
21
+ from metaobjects.render.prompt.prompt_style import PromptStyle, prompt_style_from
22
+
23
+ __all__ = [
24
+ "PROMPT_OVERRIDES_NONE",
25
+ "OutputFormatSpec",
26
+ "PromptField",
27
+ "PromptOverrides",
28
+ "PromptStyle",
29
+ "no_overrides",
30
+ "prompt_style_from",
31
+ "render_output_format",
32
+ ]