metaobjects 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. metaobjects/__init__.py +75 -0
  2. metaobjects/agent_context/__init__.py +55 -0
  3. metaobjects/agent_context/_content/README.md +14 -0
  4. metaobjects/agent_context/_content/servers/csharp.meta.json +5 -0
  5. metaobjects/agent_context/_content/servers/java.meta.json +5 -0
  6. metaobjects/agent_context/_content/servers/kotlin.meta.json +5 -0
  7. metaobjects/agent_context/_content/servers/python.meta.json +5 -0
  8. metaobjects/agent_context/_content/servers/typescript.meta.json +5 -0
  9. metaobjects/agent_context/_content/skills/metaobjects-authoring/SKILL.md +301 -0
  10. metaobjects/agent_context/_content/skills/metaobjects-codegen/SKILL.md +99 -0
  11. metaobjects/agent_context/_content/skills/metaobjects-codegen/references/csharp.md +87 -0
  12. metaobjects/agent_context/_content/skills/metaobjects-codegen/references/java.md +94 -0
  13. metaobjects/agent_context/_content/skills/metaobjects-codegen/references/kotlin.md +110 -0
  14. metaobjects/agent_context/_content/skills/metaobjects-codegen/references/typescript.md +135 -0
  15. metaobjects/agent_context/_content/skills/metaobjects-prompts/SKILL.md +148 -0
  16. metaobjects/agent_context/_content/skills/metaobjects-prompts/references/csharp.md +110 -0
  17. metaobjects/agent_context/_content/skills/metaobjects-prompts/references/java.md +108 -0
  18. metaobjects/agent_context/_content/skills/metaobjects-prompts/references/kotlin.md +130 -0
  19. metaobjects/agent_context/_content/skills/metaobjects-prompts/references/python.md +116 -0
  20. metaobjects/agent_context/_content/skills/metaobjects-prompts/references/typescript.md +150 -0
  21. metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/SKILL.md +130 -0
  22. metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/java.md +96 -0
  23. metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/kotlin.md +99 -0
  24. metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/react.md +86 -0
  25. metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/tanstack.md +119 -0
  26. metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/typescript.md +92 -0
  27. metaobjects/agent_context/_content/skills/metaobjects-verify/SKILL.md +107 -0
  28. metaobjects/agent_context/_content/skills/metaobjects-verify/references/migration.md +72 -0
  29. metaobjects/agent_context/_content/templates/always-on.md.mustache +27 -0
  30. metaobjects/agent_context/assemble.py +133 -0
  31. metaobjects/agent_context/content_root.py +54 -0
  32. metaobjects/agent_context/scaffold.py +191 -0
  33. metaobjects/agent_context/types.py +44 -0
  34. metaobjects/attr_class_map.py +23 -0
  35. metaobjects/cli.py +696 -0
  36. metaobjects/codegen/__init__.py +0 -0
  37. metaobjects/codegen/config.py +11 -0
  38. metaobjects/codegen/constants.py +13 -0
  39. metaobjects/codegen/extract_delegate_emitter.py +384 -0
  40. metaobjects/codegen/extract_schema_emitter.py +139 -0
  41. metaobjects/codegen/format.py +31 -0
  42. metaobjects/codegen/fr010_field_mapping.py +220 -0
  43. metaobjects/codegen/generator.py +62 -0
  44. metaobjects/codegen/generator_registry.py +163 -0
  45. metaobjects/codegen/generators/__init__.py +0 -0
  46. metaobjects/codegen/generators/entity_model.py +263 -0
  47. metaobjects/codegen/generators/extractor_generator.py +317 -0
  48. metaobjects/codegen/generators/filter_allowlist_generator.py +309 -0
  49. metaobjects/codegen/generators/m2m_codegen.py +192 -0
  50. metaobjects/codegen/generators/output_parser_generator.py +272 -0
  51. metaobjects/codegen/generators/output_prompt_generator.py +192 -0
  52. metaobjects/codegen/generators/payload_vo_generator.py +672 -0
  53. metaobjects/codegen/generators/render_helper_generator.py +451 -0
  54. metaobjects/codegen/generators/router_generator.py +635 -0
  55. metaobjects/codegen/generators/template_generator.py +70 -0
  56. metaobjects/codegen/generators/tph_plan.py +120 -0
  57. metaobjects/codegen/generators/trace_helper_generator.py +336 -0
  58. metaobjects/codegen/instance_artifacts.py +15 -0
  59. metaobjects/codegen/output_format_spec_emitter.py +79 -0
  60. metaobjects/codegen/overwrite_policy.py +27 -0
  61. metaobjects/codegen/runner.py +110 -0
  62. metaobjects/codegen/runtime/__init__.py +6 -0
  63. metaobjects/codegen/runtime/filter_parser.py +193 -0
  64. metaobjects/codegen/type_map.py +84 -0
  65. metaobjects/core_types.py +809 -0
  66. metaobjects/datatype.py +19 -0
  67. metaobjects/documentation/__init__.py +28 -0
  68. metaobjects/documentation/doc_constants.py +20 -0
  69. metaobjects/documentation/doc_provider.py +20 -0
  70. metaobjects/documentation/doc_schema.py +24 -0
  71. metaobjects/errors.py +124 -0
  72. metaobjects/loader/__init__.py +0 -0
  73. metaobjects/loader/merge.py +287 -0
  74. metaobjects/loader/meta_data_loader.py +245 -0
  75. metaobjects/loader/sources/__init__.py +24 -0
  76. metaobjects/loader/sources/directory_source.py +50 -0
  77. metaobjects/loader/sources/file_source.py +41 -0
  78. metaobjects/loader/sources/meta_data_source.py +67 -0
  79. metaobjects/loader/sources/uri_source.py +56 -0
  80. metaobjects/loader/validate_discriminator.py +181 -0
  81. metaobjects/loader/validate_field_readonly.py +146 -0
  82. metaobjects/loader/validate_source_parameter_ref.py +159 -0
  83. metaobjects/loader/validate_source_physical_names.py +140 -0
  84. metaobjects/loader/validation_passes.py +1513 -0
  85. metaobjects/meta/__init__.py +1 -0
  86. metaobjects/meta/core/__init__.py +0 -0
  87. metaobjects/meta/core/attr/__init__.py +0 -0
  88. metaobjects/meta/core/attr/attr_constants.py +31 -0
  89. metaobjects/meta/core/attr/meta_attr.py +136 -0
  90. metaobjects/meta/core/field/__init__.py +0 -0
  91. metaobjects/meta/core/field/field_constants.py +105 -0
  92. metaobjects/meta/core/field/meta_field.py +76 -0
  93. metaobjects/meta/core/identity/__init__.py +0 -0
  94. metaobjects/meta/core/identity/identity_constants.py +19 -0
  95. metaobjects/meta/core/identity/meta_identity.py +8 -0
  96. metaobjects/meta/core/object/__init__.py +0 -0
  97. metaobjects/meta/core/object/meta_object.py +65 -0
  98. metaobjects/meta/core/object/meta_object_aware.py +43 -0
  99. metaobjects/meta/core/object/object_class_registry.py +56 -0
  100. metaobjects/meta/core/object/object_constants.py +13 -0
  101. metaobjects/meta/core/object/object_extract.py +400 -0
  102. metaobjects/meta/core/object/value_object.py +70 -0
  103. metaobjects/meta/core/relationship/__init__.py +0 -0
  104. metaobjects/meta/core/relationship/derive_m2m_fields.py +180 -0
  105. metaobjects/meta/core/relationship/meta_relationship.py +54 -0
  106. metaobjects/meta/core/relationship/relationship_constants.py +51 -0
  107. metaobjects/meta/core/validator/__init__.py +0 -0
  108. metaobjects/meta/core/validator/validator_constants.py +18 -0
  109. metaobjects/meta/meta_data.py +206 -0
  110. metaobjects/meta/meta_root.py +8 -0
  111. metaobjects/meta/persistence/__init__.py +0 -0
  112. metaobjects/meta/persistence/db/__init__.py +1 -0
  113. metaobjects/meta/persistence/db/db_constants.py +41 -0
  114. metaobjects/meta/persistence/db/db_provider.py +60 -0
  115. metaobjects/meta/persistence/origin/__init__.py +0 -0
  116. metaobjects/meta/persistence/origin/meta_origin.py +8 -0
  117. metaobjects/meta/persistence/origin/origin_constants.py +20 -0
  118. metaobjects/meta/persistence/source/__init__.py +0 -0
  119. metaobjects/meta/persistence/source/meta_source.py +137 -0
  120. metaobjects/meta/persistence/source/source_constants.py +115 -0
  121. metaobjects/meta/presentation/__init__.py +0 -0
  122. metaobjects/meta/presentation/layout/__init__.py +0 -0
  123. metaobjects/meta/presentation/layout/layout_constants.py +13 -0
  124. metaobjects/meta/presentation/layout/meta_layout.py +8 -0
  125. metaobjects/meta/presentation/view/__init__.py +0 -0
  126. metaobjects/meta/presentation/view/meta_view.py +8 -0
  127. metaobjects/meta/presentation/view/view_constants.py +22 -0
  128. metaobjects/meta/template/__init__.py +0 -0
  129. metaobjects/meta/template/meta_template.py +46 -0
  130. metaobjects/meta/template/template_constants.py +112 -0
  131. metaobjects/meta/template/template_provider.py +43 -0
  132. metaobjects/parser.py +380 -0
  133. metaobjects/parser_yaml.py +82 -0
  134. metaobjects/provider.py +111 -0
  135. metaobjects/py.typed +0 -0
  136. metaobjects/registry.py +210 -0
  137. metaobjects/registry_manifest.py +223 -0
  138. metaobjects/render/__init__.py +74 -0
  139. metaobjects/render/email_document.py +14 -0
  140. metaobjects/render/escapers.py +109 -0
  141. metaobjects/render/extract/__init__.py +59 -0
  142. metaobjects/render/extract/coerce.py +279 -0
  143. metaobjects/render/extract/extract.py +211 -0
  144. metaobjects/render/extract/extract_map.py +61 -0
  145. metaobjects/render/extract/json_forgiving_reader.py +203 -0
  146. metaobjects/render/extract/locate.py +65 -0
  147. metaobjects/render/extract/normalize.py +96 -0
  148. metaobjects/render/extract/strip.py +20 -0
  149. metaobjects/render/extract/types.py +332 -0
  150. metaobjects/render/extract/xml_forgiving_reader.py +162 -0
  151. metaobjects/render/filesystem_provider.py +51 -0
  152. metaobjects/render/prompt/__init__.py +32 -0
  153. metaobjects/render/prompt/output_format_renderer.py +340 -0
  154. metaobjects/render/prompt/output_format_spec.py +28 -0
  155. metaobjects/render/prompt/prompt_field.py +29 -0
  156. metaobjects/render/prompt/prompt_overrides.py +29 -0
  157. metaobjects/render/prompt/prompt_style.py +38 -0
  158. metaobjects/render/renderer.py +358 -0
  159. metaobjects/render/verify.py +266 -0
  160. metaobjects/runtime/__init__.py +39 -0
  161. metaobjects/runtime/llm_recorder.py +210 -0
  162. metaobjects/runtime/n2m_resolver.py +155 -0
  163. metaobjects/runtime/object_manager.py +715 -0
  164. metaobjects/runtime/tph.py +50 -0
  165. metaobjects/serializer_json.py +172 -0
  166. metaobjects/shared/__init__.py +0 -0
  167. metaobjects/shared/base_types.py +16 -0
  168. metaobjects/shared/separators.py +4 -0
  169. metaobjects/shared/structural.py +9 -0
  170. metaobjects/source/__init__.py +79 -0
  171. metaobjects/source/error_source.py +266 -0
  172. metaobjects/source/json_path.py +106 -0
  173. metaobjects/source/semantic_diff.py +98 -0
  174. metaobjects/source/yaml_positions.py +174 -0
  175. metaobjects/super_resolve.py +128 -0
  176. metaobjects/yaml_desugar.py +481 -0
  177. metaobjects-0.9.0.dist-info/METADATA +97 -0
  178. metaobjects-0.9.0.dist-info/RECORD +181 -0
  179. metaobjects-0.9.0.dist-info/WHEEL +4 -0
  180. metaobjects-0.9.0.dist-info/entry_points.txt +2 -0
  181. metaobjects-0.9.0.dist-info/licenses/LICENSE +189 -0
@@ -0,0 +1,279 @@
1
+ """Stage 7: canonicalize a raw scalar string per its FieldSpec.
2
+
3
+ Returns the ``MALFORMED`` sentinel when the value is present but uncoercible.
4
+
5
+ Cross-port number rules (parity with C#/TS, an accepted divergence from Java):
6
+
7
+ - Non-finite (NaN / ±Infinity) → MALFORMED.
8
+ - Radix-prefixed strings (``0x..`` / ``0b..`` / ``0o..``) are REJECTED. Python's
9
+ ``int(s, 0)`` would accept them but Java's ``Long.parseLong`` / C#'s
10
+ ``long.TryParse`` reject them — so the C# and TS ports added this guard; we match.
11
+ - We do NOT replicate Java's ``Double.parseDouble`` suffix tolerance
12
+ (``"42d"`` / hex-float) — documented accepted divergence (same as C#/TS).
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import math
17
+ import re
18
+ from typing import Final
19
+
20
+ from metaobjects.render.extract.normalize import NONE as _NORMALIZE_NONE
21
+ from metaobjects.render.extract.normalize import normalize_enum
22
+ from metaobjects.render.extract.types import (
23
+ Coercion,
24
+ FieldKind,
25
+ FieldSpec,
26
+ ExtractOptions,
27
+ ExtractionReport,
28
+ Tolerance,
29
+ )
30
+
31
+ # Sentinel: the value was present but could not be coerced to the declared kind/vocabulary.
32
+ MALFORMED: Final = object()
33
+
34
+ # A canonical ASCII numeric literal (int / decimal / scientific). Python's int()/float()
35
+ # are far more permissive than Java/C# numeric parsing — they accept underscore digit
36
+ # grouping ("1_000", PEP 515), Unicode digits ("123"), and radix prefixes ("0x10"). Gating
37
+ # on this ASCII-only pattern rejects all of those → MALFORMED, matching the strict cross-port
38
+ # behavior (C#'s TryParse). `[0-9]` (not `\d`) keeps it ASCII-only.
39
+ _ASCII_NUMERIC = re.compile(r"^[+-]?(?:[0-9]+\.?[0-9]*|\.[0-9]+)(?:[eE][+-]?[0-9]+)?$")
40
+
41
+
42
+ def value(
43
+ raw: str | None,
44
+ spec: FieldSpec,
45
+ opts: ExtractOptions,
46
+ field_path: str,
47
+ report: ExtractionReport,
48
+ ) -> object:
49
+ """Canonicalize ``raw`` to the native type described by ``spec``, or MALFORMED."""
50
+ if raw is None:
51
+ return MALFORMED
52
+
53
+ # OnField hook takes priority.
54
+ if opts.on_field is not None:
55
+ hooked = opts.on_field(field_path, raw, spec)
56
+ if hooked is not None:
57
+ report.add_coercion(Coercion(field_path, raw, str(hooked), "onField"))
58
+ return hooked
59
+
60
+ # Per-field runtime normalizer (bounded 20% surface). Keyed by path, then simple name.
61
+ norm = opts.normalizers.get(field_path)
62
+ if norm is None:
63
+ norm = opts.normalizers.get(spec.name)
64
+ if norm is not None:
65
+ normalized = norm(raw)
66
+ if normalized is not None:
67
+ report.add_coercion(Coercion(field_path, raw, str(normalized), "normalizer"))
68
+ return normalized
69
+
70
+ ci = opts.tolerance != Tolerance.STRICT
71
+ match spec.kind:
72
+ case FieldKind.ENUM:
73
+ return _coerce_enum(raw, spec, opts, field_path, report, ci)
74
+ case FieldKind.INT | FieldKind.LONG:
75
+ return _coerce_int(raw, spec, field_path, report, ci)
76
+ case FieldKind.DOUBLE:
77
+ return _coerce_double(raw, spec, field_path, report, ci)
78
+ case FieldKind.BOOLEAN:
79
+ return _coerce_bool(raw, ci)
80
+ case _:
81
+ return raw
82
+
83
+
84
+ def scalar_coerce(raw: str | None, spec: FieldSpec) -> object:
85
+ """Phase B (generalized ``@default``): coerce a non-enum default string to a
86
+ field's scalar kind, with NO side effects (no normalizer/on_field hooks, no
87
+ clamp logging) — the value originates from metadata, not the model response.
88
+
89
+ Returns the coerced value or the ``MALFORMED`` sentinel. INT/LONG accept an
90
+ integer or a truncatable finite number; DOUBLE accepts any finite number;
91
+ BOOLEAN accepts ``true|false|yes|no|1|0``; STRING (and any other kind) passes
92
+ through verbatim. Mirrors the parse semantics of :func:`value` without its
93
+ range-clamp / report machinery (Java ``Coerce.scalar``).
94
+ """
95
+ if raw is None:
96
+ return MALFORMED
97
+ match spec.kind:
98
+ case FieldKind.INT | FieldKind.LONG:
99
+ trimmed = raw.strip()
100
+ if not _ASCII_NUMERIC.match(trimmed):
101
+ return MALFORMED
102
+ try:
103
+ return int(trimmed)
104
+ except ValueError:
105
+ pass
106
+ try:
107
+ d = float(trimmed)
108
+ except ValueError:
109
+ return MALFORMED
110
+ return math.trunc(d) if math.isfinite(d) else MALFORMED
111
+ case FieldKind.DOUBLE:
112
+ trimmed = raw.strip()
113
+ if not _ASCII_NUMERIC.match(trimmed):
114
+ return MALFORMED
115
+ try:
116
+ d = float(trimmed)
117
+ except ValueError:
118
+ return MALFORMED
119
+ return d if math.isfinite(d) else MALFORMED
120
+ case FieldKind.BOOLEAN:
121
+ t = raw.strip().lower()
122
+ if t in ("true", "yes", "1"):
123
+ return True
124
+ if t in ("false", "no", "0"):
125
+ return False
126
+ return MALFORMED
127
+ case _:
128
+ return raw # STRING / ENUM / OBJECT — verbatim
129
+
130
+
131
+ def _coerce_enum(
132
+ raw: str,
133
+ spec: FieldSpec,
134
+ opts: ExtractOptions,
135
+ path: str,
136
+ report: ExtractionReport,
137
+ ci: bool,
138
+ ) -> object:
139
+ """FR-011 enum coercion pipeline: exact → normalize → ``@enumAlias`` →
140
+ (reserved fuzzy) → ``@coerceDefault`` → MALFORMED.
141
+
142
+ Resolution mode is ``spec.normalize`` (default ``"strip"``); under STRICT
143
+ tolerance (``ci`` is False) normalization is forced to ``"none"`` (exact-only),
144
+ preserving the case-sensitive STRICT contract. The FR-010 case-insensitive
145
+ default is now mode ``"strip"``. Mirrors the TS/C#/Java ``coerceEnum``.
146
+ """
147
+ mode = spec.normalize if ci else _NORMALIZE_NONE
148
+
149
+ # 1. exact match.
150
+ if spec.enum_values is not None:
151
+ for v in spec.enum_values:
152
+ if v == raw:
153
+ return v
154
+
155
+ # 2. normalized match (skipped when mode == none).
156
+ if mode != _NORMALIZE_NONE and spec.enum_values is not None:
157
+ norm_raw = normalize_enum(raw, mode)
158
+ for v in spec.enum_values:
159
+ if normalize_enum(v, mode) == norm_raw:
160
+ report.add_coercion(Coercion(path, raw, v, "normalize"))
161
+ return v
162
+
163
+ # 3. @enumAlias — runtime aliases win over schema; alias keys matched under the mode.
164
+ runtime_target = _lookup_alias_in(raw, opts.aliases, mode)
165
+ if runtime_target is not None:
166
+ schema_target = _lookup_alias_in(raw, spec.enum_alias, mode)
167
+ kind = (
168
+ "runtime-alias-override"
169
+ if schema_target is not None and schema_target != runtime_target
170
+ else "alias"
171
+ )
172
+ report.add_coercion(Coercion(path, raw, runtime_target, kind))
173
+ return runtime_target
174
+ schema_target = _lookup_alias_in(raw, spec.enum_alias, mode)
175
+ if schema_target is not None:
176
+ report.add_coercion(Coercion(path, raw, schema_target, "alias"))
177
+ return schema_target
178
+
179
+ # 4. reserved fuzzy slot — NOT implemented (FR-011 spec "Out of scope").
180
+
181
+ # 5. @coerceDefault — present-but-uncoercible fallback to a valid member → DEFAULTED.
182
+ if (
183
+ spec.coerce_default is not None
184
+ and spec.enum_values is not None
185
+ and spec.coerce_default in spec.enum_values
186
+ ):
187
+ report.add_coercion(Coercion(path, raw, spec.coerce_default, "coerceDefault"))
188
+ return spec.coerce_default
189
+
190
+ # 6. MALFORMED.
191
+ return MALFORMED
192
+
193
+
194
+ def _lookup_alias_in(raw: str, aliases: dict[str, str] | None, mode: str) -> str | None:
195
+ """Find ``raw`` in an alias map, matching keys exactly first then under ``mode``
196
+ normalization. Returns the target member, or ``None`` when no key matches."""
197
+ if not aliases:
198
+ return None
199
+ exact = aliases.get(raw)
200
+ if exact is not None:
201
+ return exact
202
+ if mode == _NORMALIZE_NONE:
203
+ return None
204
+ norm_raw = normalize_enum(raw, mode)
205
+ for key, target in aliases.items():
206
+ if normalize_enum(key, mode) == norm_raw:
207
+ return target
208
+ return None
209
+
210
+
211
+ def _coerce_int(
212
+ raw: str, spec: FieldSpec, path: str, report: ExtractionReport, lenient: bool
213
+ ) -> object:
214
+ trimmed = raw.strip()
215
+ if not _ASCII_NUMERIC.match(trimmed):
216
+ return MALFORMED
217
+ # Integer parse first (matches Java Long.parseLong / C# long.TryParse), then a
218
+ # float fallback (matches Java's Double.parseDouble fallback).
219
+ try:
220
+ return _clamp(float(int(trimmed)), spec, path, report, as_long=True, lenient=lenient)
221
+ except ValueError:
222
+ pass
223
+ try:
224
+ d = float(trimmed)
225
+ except ValueError:
226
+ return MALFORMED
227
+ return _clamp(d, spec, path, report, as_long=True, lenient=lenient)
228
+
229
+
230
+ def _coerce_double(
231
+ raw: str, spec: FieldSpec, path: str, report: ExtractionReport, lenient: bool
232
+ ) -> object:
233
+ trimmed = raw.strip()
234
+ if not _ASCII_NUMERIC.match(trimmed):
235
+ return MALFORMED
236
+ try:
237
+ d = float(trimmed)
238
+ except ValueError:
239
+ return MALFORMED
240
+ return _clamp(d, spec, path, report, as_long=False, lenient=lenient)
241
+
242
+
243
+ def _clamp(
244
+ n: float, spec: FieldSpec, path: str, report: ExtractionReport, as_long: bool, lenient: bool
245
+ ) -> object:
246
+ """Apply the field's @min/@max range (sourced from its numeric validator). Under LENIENT
247
+ tolerance an out-of-range value is CLAMPED to the bound (recorded as a "clamp" coercion);
248
+ under STRICT tolerance it is MALFORMED (the validator's "value out of range" contract).
249
+ Cross-port: ports must match the lenient-clamp / strict-reject split."""
250
+ # Non-finite (NaN, ±Infinity) → MALFORMED (cross-port classification parity).
251
+ if not math.isfinite(n):
252
+ return MALFORMED
253
+ c = n
254
+ if spec.min is not None and c < spec.min:
255
+ c = spec.min
256
+ if spec.max is not None and c > spec.max:
257
+ c = spec.max
258
+ if c != n:
259
+ if not lenient: # STRICT: out-of-range is invalid, not silently clamped
260
+ return MALFORMED
261
+ report.add_coercion(Coercion(path, _num_str(n), _num_str(c), "clamp"))
262
+ # Truncate toward zero for integer kinds (math.trunc / int()).
263
+ return math.trunc(c) if as_long else c
264
+
265
+
266
+ def _num_str(n: float) -> str:
267
+ # Render integral floats without a trailing ".0" to read like Java/C# longs in notes.
268
+ if n == math.trunc(n) and math.isfinite(n):
269
+ return str(int(n))
270
+ return str(n)
271
+
272
+
273
+ def _coerce_bool(raw: str, ci: bool) -> object:
274
+ t = raw.strip().lower() if ci else raw.strip()
275
+ if t in ("true", "yes", "1"):
276
+ return True
277
+ if t in ("false", "no", "0"):
278
+ return False
279
+ return MALFORMED
@@ -0,0 +1,211 @@
1
+ """Public entry point. Runs the extract pipeline; never throws."""
2
+ from __future__ import annotations
3
+
4
+ from metaobjects.render.extract import coerce as _coerce
5
+ from metaobjects.render.extract import locate as _locate
6
+ from metaobjects.render.extract import strip as _strip
7
+ from metaobjects.render.extract.coerce import MALFORMED
8
+ from metaobjects.render.extract.json_forgiving_reader import (
9
+ NULL_LITERAL,
10
+ TRUNCATED,
11
+ JsonForgivingReader,
12
+ )
13
+ from metaobjects.render.extract.types import (
14
+ Coercion,
15
+ FieldKind,
16
+ FieldExtraction,
17
+ FieldSpec,
18
+ Format,
19
+ ExtractOptions,
20
+ ExtractionOutcome,
21
+ ExtractSchema,
22
+ ExtractionReport,
23
+ Tolerance,
24
+ )
25
+ from metaobjects.render.extract.xml_forgiving_reader import TEXT_KEY, XmlForgivingReader
26
+
27
+
28
+ def extract(
29
+ text: str | None,
30
+ schema: ExtractSchema,
31
+ opts: ExtractOptions | None = None,
32
+ ) -> ExtractionOutcome:
33
+ """Extract structured data from dirty ``text`` per ``schema``. Never raises."""
34
+ o = ExtractOptions.defaults() if opts is None else opts
35
+ report = ExtractionReport()
36
+ data: dict[str, object] = {}
37
+
38
+ stripped = _strip.strip(text)
39
+ ci = o.tolerance != Tolerance.STRICT
40
+
41
+ # XML rootless (opts.rootless): the payload's fields ARE the top-level elements — there
42
+ # is no enclosing root to locate — so parse the whole stripped text's top-level elements
43
+ # directly. Otherwise locate the <rootName> span as before. JSON is unaffected. Mirrors
44
+ # Java Extract.extract.
45
+ span: str | None
46
+ raw: dict[str, object]
47
+ if schema.format == Format.JSON:
48
+ span = _locate.json(stripped)
49
+ raw = {} if span is None else JsonForgivingReader().read(span)
50
+ elif o.rootless:
51
+ span = None if stripped == "" else stripped
52
+ raw = {} if span is None else XmlForgivingReader().read_rootless(stripped, ci)
53
+ else:
54
+ span = _locate.xml(stripped, schema.root_name, ci)
55
+ raw = {} if span is None else XmlForgivingReader().read(span, ci)
56
+
57
+ if not raw and (stripped == "" or span is None):
58
+ report.mark_empty()
59
+
60
+ _extract(schema.fields, raw, "", data, report, o, ci)
61
+ return ExtractionOutcome(data=data, report=report)
62
+
63
+
64
+ def _extract(
65
+ fields: list[FieldSpec],
66
+ raw: dict[str, object],
67
+ prefix: str,
68
+ data: dict[str, object],
69
+ report: ExtractionReport,
70
+ o: ExtractOptions,
71
+ ci: bool,
72
+ ) -> None:
73
+ for f in fields:
74
+ path = f.name if prefix == "" else prefix + "." + f.name
75
+ # A @xmlText field reads the element's text body (carried under the #text sentinel when
76
+ # the element also has attributes), not a same-named child element.
77
+ present = raw.get(TEXT_KEY) if f.text_content else _lookup(raw, f.name, ci)
78
+ if present is None:
79
+ # FR-011 / Phase B: an absent field with a declared @default fills the
80
+ # value → DEFAULTED (which satisfies a @required field). Generalized to
81
+ # all field kinds: an enum default is its member string verbatim; a
82
+ # non-enum default is coerced to the field's kind via the PURE
83
+ # scalar_coerce (so @default "0" on field.int yields integer 0). A
84
+ # non-coercible non-enum default is treated as no default.
85
+ if f.default_value is not None:
86
+ coerced = (
87
+ f.default_value
88
+ if f.kind == FieldKind.ENUM
89
+ else _coerce.scalar_coerce(f.default_value, f)
90
+ )
91
+ if coerced is not MALFORMED:
92
+ data[f.name] = coerced
93
+ report.add_coercion(Coercion(path, "", f.default_value, "default"))
94
+ report.set(path, FieldExtraction.DEFAULTED)
95
+ continue
96
+ report.set(
97
+ path,
98
+ FieldExtraction.LOST_REQUIRED if f.required else FieldExtraction.LOST_OPTIONAL,
99
+ )
100
+ continue
101
+ if present is TRUNCATED: # present-but-garbled (empty/cut-off value)
102
+ report.set(path, FieldExtraction.MALFORMED)
103
+ continue
104
+ if present is NULL_LITERAL:
105
+ # The JSON null literal is the caller's explicit "no value": leave the field null
106
+ # (do NOT apply @default — an explicit null is a value, not an omission), matching a
107
+ # standard JSON bind. Without this the bare ``null`` token leaks as the string "null".
108
+ report.set(
109
+ path,
110
+ FieldExtraction.LOST_REQUIRED if f.required else FieldExtraction.LOST_OPTIONAL,
111
+ )
112
+ continue
113
+ if f.array:
114
+ # A single non-list value is treated as a one-element array (e.g. a single
115
+ # repeated-XML tag). Each element is coerced/recursed independently.
116
+ elements = present if isinstance(present, list) else [present]
117
+ out: list[object] = []
118
+ any_malformed = False
119
+ # Phase B (array-of-enum): an enum element flows through the SAME enum
120
+ # coercion pipeline a scalar enum uses (_extract_value → coerce.value →
121
+ # _coerce_enum) and is CLASSIFIED per element by indexed path (tags[0],
122
+ # tags[1], …) exactly as a scalar enum: EXTRACTED / DEFAULTED (via
123
+ # @coerceDefault) / MALFORMED. Non-enum scalar arrays keep their existing
124
+ # behavior (coerced element list, no per-element states).
125
+ enum_elements = f.kind == FieldKind.ENUM
126
+ for idx, el in enumerate(elements):
127
+ elem_path = f"{path}[{idx}]"
128
+ v = _extract_value(f, el, elem_path, report, o, ci)
129
+ if v is MALFORMED:
130
+ any_malformed = True
131
+ if enum_elements:
132
+ report.set(elem_path, FieldExtraction.MALFORMED)
133
+ else:
134
+ out.append(v)
135
+ if enum_elements:
136
+ report.set(elem_path, _classify_coerced(elem_path, report))
137
+ # Cross-port contract: a MALFORMED array still places its successfully-coerced
138
+ # elements into data (partial extraction), UNLIKE a MALFORMED scalar which is
139
+ # absent from data.
140
+ data[f.name] = out
141
+ report.set(
142
+ path, FieldExtraction.MALFORMED if any_malformed else FieldExtraction.EXTRACTED
143
+ )
144
+ continue
145
+ if isinstance(present, list): # a list where a singular value was expected
146
+ report.set(path, FieldExtraction.MALFORMED)
147
+ continue
148
+ v = _extract_value(f, present, path, report, o, ci)
149
+ if v is MALFORMED:
150
+ report.set(path, FieldExtraction.MALFORMED)
151
+ else:
152
+ data[f.name] = v
153
+ # FR-011: a value reached via @coerceDefault (or @default) is DEFAULTED,
154
+ # not EXTRACTED.
155
+ report.set(path, _classify_coerced(path, report))
156
+
157
+
158
+ def _classify_coerced(path: str, report: ExtractionReport) -> FieldExtraction:
159
+ """FR-011: classify a successfully-coerced field. DEFAULTED when its terminal
160
+ (last-logged) coercion for this path is a default-class fallback
161
+ (``coerceDefault`` / ``default``); EXTRACTED otherwise. Nested objects (which log
162
+ no coercion of their own) classify as EXTRACTED. Mirrors the TS/C#/Java classify."""
163
+ terminal_kind: str | None = None
164
+ for c in report.coercions():
165
+ if c.field_path == path:
166
+ terminal_kind = c.kind
167
+ return (
168
+ FieldExtraction.DEFAULTED
169
+ if terminal_kind in ("coerceDefault", "default")
170
+ else FieldExtraction.EXTRACTED
171
+ )
172
+
173
+
174
+ def _extract_value(
175
+ f: FieldSpec,
176
+ present: object,
177
+ path: str,
178
+ report: ExtractionReport,
179
+ o: ExtractOptions,
180
+ ci: bool,
181
+ ) -> object:
182
+ """Coerce one (non-array) element: nested recursion or scalar coercion."""
183
+ if present is NULL_LITERAL:
184
+ # A JSON null array element (e.g. [1, null, 3]) carries no value → drop it as malformed
185
+ # rather than letting the sentinel stringify.
186
+ return MALFORMED
187
+ if f.kind == FieldKind.OBJECT:
188
+ if f.nested is not None and isinstance(present, dict):
189
+ nested_data: dict[str, object] = {}
190
+ _extract(f.nested.fields, present, path, nested_data, report, o, ci)
191
+ return nested_data
192
+ return MALFORMED # object expected but scalar/non-map present
193
+ # A text element that also carried XML attributes is represented by XmlForgivingReader
194
+ # as a dict with the body under TEXT_KEY. A scalar field reads that text (attributes
195
+ # ignored for scalars — preserving pre-attribute-support behaviour).
196
+ if isinstance(present, dict) and TEXT_KEY in present:
197
+ present = present[TEXT_KEY]
198
+ raw_str = present if isinstance(present, str) else str(present)
199
+ return _coerce.value(raw_str, f, o, path, report)
200
+
201
+
202
+ def _lookup(raw: dict[str, object], name: str, ci: bool) -> object | None:
203
+ """Case-folding lookup honoring tolerance."""
204
+ if name in raw:
205
+ return raw[name]
206
+ if ci:
207
+ lower = name.lower()
208
+ for k, v in raw.items():
209
+ if k.lower() == lower:
210
+ return v
211
+ return None
@@ -0,0 +1,61 @@
1
+ """Null-safe coercions from a ExtractionOutcome data map onto typed values.
2
+
3
+ Generated ``extract(...)`` code calls these helpers. Python has a single ``int``
4
+ type, so ``as_int`` and ``as_long`` are intentionally identical (both
5
+ ``Optional[int]``, truncating toward zero). ``bool`` is excluded from the numeric
6
+ helpers (it is an ``int`` subclass in Python, but a boolean is never a number here).
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import math
11
+ from collections.abc import Mapping
12
+
13
+
14
+ def as_string(d: Mapping[str, object], k: str) -> str | None:
15
+ if k not in d:
16
+ return None
17
+ v = d[k]
18
+ if v is None:
19
+ return None
20
+ return v if isinstance(v, str) else str(v)
21
+
22
+
23
+ def as_int(d: Mapping[str, object], k: str) -> int | None:
24
+ return _as_int(d, k)
25
+
26
+
27
+ def as_long(d: Mapping[str, object], k: str) -> int | None:
28
+ # Python has one int type; as_int and as_long are the same (truncate toward zero).
29
+ return _as_int(d, k)
30
+
31
+
32
+ def _as_int(d: Mapping[str, object], k: str) -> int | None:
33
+ v = d.get(k)
34
+ if isinstance(v, bool):
35
+ return None
36
+ if isinstance(v, int):
37
+ return v
38
+ if isinstance(v, float):
39
+ return math.trunc(v)
40
+ return None
41
+
42
+
43
+ def as_double(d: Mapping[str, object], k: str) -> float | None:
44
+ v = d.get(k)
45
+ if isinstance(v, bool):
46
+ return None
47
+ if isinstance(v, (int, float)):
48
+ return float(v)
49
+ return None
50
+
51
+
52
+ def as_bool(d: Mapping[str, object], k: str) -> bool | None:
53
+ v = d.get(k)
54
+ return v if isinstance(v, bool) else None
55
+
56
+
57
+ def as_string_list(d: Mapping[str, object], k: str) -> list[str | None] | None:
58
+ v = d.get(k)
59
+ if not isinstance(v, list):
60
+ return None
61
+ return [None if e is None else (e if isinstance(e, str) else str(e)) for e in v]