metaobjects 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaobjects/__init__.py +75 -0
- metaobjects/agent_context/__init__.py +55 -0
- metaobjects/agent_context/_content/README.md +14 -0
- metaobjects/agent_context/_content/servers/csharp.meta.json +5 -0
- metaobjects/agent_context/_content/servers/java.meta.json +5 -0
- metaobjects/agent_context/_content/servers/kotlin.meta.json +5 -0
- metaobjects/agent_context/_content/servers/python.meta.json +5 -0
- metaobjects/agent_context/_content/servers/typescript.meta.json +5 -0
- metaobjects/agent_context/_content/skills/metaobjects-authoring/SKILL.md +301 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/SKILL.md +99 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/references/csharp.md +87 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/references/java.md +94 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/references/kotlin.md +110 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/references/typescript.md +135 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/SKILL.md +148 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/csharp.md +110 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/java.md +108 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/kotlin.md +130 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/python.md +116 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/typescript.md +150 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/SKILL.md +130 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/java.md +96 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/kotlin.md +99 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/react.md +86 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/tanstack.md +119 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/typescript.md +92 -0
- metaobjects/agent_context/_content/skills/metaobjects-verify/SKILL.md +107 -0
- metaobjects/agent_context/_content/skills/metaobjects-verify/references/migration.md +72 -0
- metaobjects/agent_context/_content/templates/always-on.md.mustache +27 -0
- metaobjects/agent_context/assemble.py +133 -0
- metaobjects/agent_context/content_root.py +54 -0
- metaobjects/agent_context/scaffold.py +191 -0
- metaobjects/agent_context/types.py +44 -0
- metaobjects/attr_class_map.py +23 -0
- metaobjects/cli.py +696 -0
- metaobjects/codegen/__init__.py +0 -0
- metaobjects/codegen/config.py +11 -0
- metaobjects/codegen/constants.py +13 -0
- metaobjects/codegen/extract_delegate_emitter.py +384 -0
- metaobjects/codegen/extract_schema_emitter.py +139 -0
- metaobjects/codegen/format.py +31 -0
- metaobjects/codegen/fr010_field_mapping.py +220 -0
- metaobjects/codegen/generator.py +62 -0
- metaobjects/codegen/generator_registry.py +163 -0
- metaobjects/codegen/generators/__init__.py +0 -0
- metaobjects/codegen/generators/entity_model.py +263 -0
- metaobjects/codegen/generators/extractor_generator.py +317 -0
- metaobjects/codegen/generators/filter_allowlist_generator.py +309 -0
- metaobjects/codegen/generators/m2m_codegen.py +192 -0
- metaobjects/codegen/generators/output_parser_generator.py +272 -0
- metaobjects/codegen/generators/output_prompt_generator.py +192 -0
- metaobjects/codegen/generators/payload_vo_generator.py +672 -0
- metaobjects/codegen/generators/render_helper_generator.py +451 -0
- metaobjects/codegen/generators/router_generator.py +635 -0
- metaobjects/codegen/generators/template_generator.py +70 -0
- metaobjects/codegen/generators/tph_plan.py +120 -0
- metaobjects/codegen/generators/trace_helper_generator.py +336 -0
- metaobjects/codegen/instance_artifacts.py +15 -0
- metaobjects/codegen/output_format_spec_emitter.py +79 -0
- metaobjects/codegen/overwrite_policy.py +27 -0
- metaobjects/codegen/runner.py +110 -0
- metaobjects/codegen/runtime/__init__.py +6 -0
- metaobjects/codegen/runtime/filter_parser.py +193 -0
- metaobjects/codegen/type_map.py +84 -0
- metaobjects/core_types.py +809 -0
- metaobjects/datatype.py +19 -0
- metaobjects/documentation/__init__.py +28 -0
- metaobjects/documentation/doc_constants.py +20 -0
- metaobjects/documentation/doc_provider.py +20 -0
- metaobjects/documentation/doc_schema.py +24 -0
- metaobjects/errors.py +124 -0
- metaobjects/loader/__init__.py +0 -0
- metaobjects/loader/merge.py +287 -0
- metaobjects/loader/meta_data_loader.py +245 -0
- metaobjects/loader/sources/__init__.py +24 -0
- metaobjects/loader/sources/directory_source.py +50 -0
- metaobjects/loader/sources/file_source.py +41 -0
- metaobjects/loader/sources/meta_data_source.py +67 -0
- metaobjects/loader/sources/uri_source.py +56 -0
- metaobjects/loader/validate_discriminator.py +181 -0
- metaobjects/loader/validate_field_readonly.py +146 -0
- metaobjects/loader/validate_source_parameter_ref.py +159 -0
- metaobjects/loader/validate_source_physical_names.py +140 -0
- metaobjects/loader/validation_passes.py +1513 -0
- metaobjects/meta/__init__.py +1 -0
- metaobjects/meta/core/__init__.py +0 -0
- metaobjects/meta/core/attr/__init__.py +0 -0
- metaobjects/meta/core/attr/attr_constants.py +31 -0
- metaobjects/meta/core/attr/meta_attr.py +136 -0
- metaobjects/meta/core/field/__init__.py +0 -0
- metaobjects/meta/core/field/field_constants.py +105 -0
- metaobjects/meta/core/field/meta_field.py +76 -0
- metaobjects/meta/core/identity/__init__.py +0 -0
- metaobjects/meta/core/identity/identity_constants.py +19 -0
- metaobjects/meta/core/identity/meta_identity.py +8 -0
- metaobjects/meta/core/object/__init__.py +0 -0
- metaobjects/meta/core/object/meta_object.py +65 -0
- metaobjects/meta/core/object/meta_object_aware.py +43 -0
- metaobjects/meta/core/object/object_class_registry.py +56 -0
- metaobjects/meta/core/object/object_constants.py +13 -0
- metaobjects/meta/core/object/object_extract.py +400 -0
- metaobjects/meta/core/object/value_object.py +70 -0
- metaobjects/meta/core/relationship/__init__.py +0 -0
- metaobjects/meta/core/relationship/derive_m2m_fields.py +180 -0
- metaobjects/meta/core/relationship/meta_relationship.py +54 -0
- metaobjects/meta/core/relationship/relationship_constants.py +51 -0
- metaobjects/meta/core/validator/__init__.py +0 -0
- metaobjects/meta/core/validator/validator_constants.py +18 -0
- metaobjects/meta/meta_data.py +206 -0
- metaobjects/meta/meta_root.py +8 -0
- metaobjects/meta/persistence/__init__.py +0 -0
- metaobjects/meta/persistence/db/__init__.py +1 -0
- metaobjects/meta/persistence/db/db_constants.py +41 -0
- metaobjects/meta/persistence/db/db_provider.py +60 -0
- metaobjects/meta/persistence/origin/__init__.py +0 -0
- metaobjects/meta/persistence/origin/meta_origin.py +8 -0
- metaobjects/meta/persistence/origin/origin_constants.py +20 -0
- metaobjects/meta/persistence/source/__init__.py +0 -0
- metaobjects/meta/persistence/source/meta_source.py +137 -0
- metaobjects/meta/persistence/source/source_constants.py +115 -0
- metaobjects/meta/presentation/__init__.py +0 -0
- metaobjects/meta/presentation/layout/__init__.py +0 -0
- metaobjects/meta/presentation/layout/layout_constants.py +13 -0
- metaobjects/meta/presentation/layout/meta_layout.py +8 -0
- metaobjects/meta/presentation/view/__init__.py +0 -0
- metaobjects/meta/presentation/view/meta_view.py +8 -0
- metaobjects/meta/presentation/view/view_constants.py +22 -0
- metaobjects/meta/template/__init__.py +0 -0
- metaobjects/meta/template/meta_template.py +46 -0
- metaobjects/meta/template/template_constants.py +112 -0
- metaobjects/meta/template/template_provider.py +43 -0
- metaobjects/parser.py +380 -0
- metaobjects/parser_yaml.py +82 -0
- metaobjects/provider.py +111 -0
- metaobjects/py.typed +0 -0
- metaobjects/registry.py +210 -0
- metaobjects/registry_manifest.py +223 -0
- metaobjects/render/__init__.py +74 -0
- metaobjects/render/email_document.py +14 -0
- metaobjects/render/escapers.py +109 -0
- metaobjects/render/extract/__init__.py +59 -0
- metaobjects/render/extract/coerce.py +279 -0
- metaobjects/render/extract/extract.py +211 -0
- metaobjects/render/extract/extract_map.py +61 -0
- metaobjects/render/extract/json_forgiving_reader.py +203 -0
- metaobjects/render/extract/locate.py +65 -0
- metaobjects/render/extract/normalize.py +96 -0
- metaobjects/render/extract/strip.py +20 -0
- metaobjects/render/extract/types.py +332 -0
- metaobjects/render/extract/xml_forgiving_reader.py +162 -0
- metaobjects/render/filesystem_provider.py +51 -0
- metaobjects/render/prompt/__init__.py +32 -0
- metaobjects/render/prompt/output_format_renderer.py +340 -0
- metaobjects/render/prompt/output_format_spec.py +28 -0
- metaobjects/render/prompt/prompt_field.py +29 -0
- metaobjects/render/prompt/prompt_overrides.py +29 -0
- metaobjects/render/prompt/prompt_style.py +38 -0
- metaobjects/render/renderer.py +358 -0
- metaobjects/render/verify.py +266 -0
- metaobjects/runtime/__init__.py +39 -0
- metaobjects/runtime/llm_recorder.py +210 -0
- metaobjects/runtime/n2m_resolver.py +155 -0
- metaobjects/runtime/object_manager.py +715 -0
- metaobjects/runtime/tph.py +50 -0
- metaobjects/serializer_json.py +172 -0
- metaobjects/shared/__init__.py +0 -0
- metaobjects/shared/base_types.py +16 -0
- metaobjects/shared/separators.py +4 -0
- metaobjects/shared/structural.py +9 -0
- metaobjects/source/__init__.py +79 -0
- metaobjects/source/error_source.py +266 -0
- metaobjects/source/json_path.py +106 -0
- metaobjects/source/semantic_diff.py +98 -0
- metaobjects/source/yaml_positions.py +174 -0
- metaobjects/super_resolve.py +128 -0
- metaobjects/yaml_desugar.py +481 -0
- metaobjects-0.9.0.dist-info/METADATA +97 -0
- metaobjects-0.9.0.dist-info/RECORD +181 -0
- metaobjects-0.9.0.dist-info/WHEEL +4 -0
- metaobjects-0.9.0.dist-info/entry_points.txt +2 -0
- metaobjects-0.9.0.dist-info/licenses/LICENSE +189 -0
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
"""Stage 7: canonicalize a raw scalar string per its FieldSpec.
|
|
2
|
+
|
|
3
|
+
Returns the ``MALFORMED`` sentinel when the value is present but uncoercible.
|
|
4
|
+
|
|
5
|
+
Cross-port number rules (parity with C#/TS, an accepted divergence from Java):
|
|
6
|
+
|
|
7
|
+
- Non-finite (NaN / ±Infinity) → MALFORMED.
|
|
8
|
+
- Radix-prefixed strings (``0x..`` / ``0b..`` / ``0o..``) are REJECTED. Python's
|
|
9
|
+
``int(s, 0)`` would accept them but Java's ``Long.parseLong`` / C#'s
|
|
10
|
+
``long.TryParse`` reject them — so the C# and TS ports added this guard; we match.
|
|
11
|
+
- We do NOT replicate Java's ``Double.parseDouble`` suffix tolerance
|
|
12
|
+
(``"42d"`` / hex-float) — documented accepted divergence (same as C#/TS).
|
|
13
|
+
"""
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import math
|
|
17
|
+
import re
|
|
18
|
+
from typing import Final
|
|
19
|
+
|
|
20
|
+
from metaobjects.render.extract.normalize import NONE as _NORMALIZE_NONE
|
|
21
|
+
from metaobjects.render.extract.normalize import normalize_enum
|
|
22
|
+
from metaobjects.render.extract.types import (
|
|
23
|
+
Coercion,
|
|
24
|
+
FieldKind,
|
|
25
|
+
FieldSpec,
|
|
26
|
+
ExtractOptions,
|
|
27
|
+
ExtractionReport,
|
|
28
|
+
Tolerance,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Sentinel: the value was present but could not be coerced to the declared kind/vocabulary.
|
|
32
|
+
MALFORMED: Final = object()
|
|
33
|
+
|
|
34
|
+
# A canonical ASCII numeric literal (int / decimal / scientific). Python's int()/float()
|
|
35
|
+
# are far more permissive than Java/C# numeric parsing — they accept underscore digit
|
|
36
|
+
# grouping ("1_000", PEP 515), Unicode digits ("123"), and radix prefixes ("0x10"). Gating
|
|
37
|
+
# on this ASCII-only pattern rejects all of those → MALFORMED, matching the strict cross-port
|
|
38
|
+
# behavior (C#'s TryParse). `[0-9]` (not `\d`) keeps it ASCII-only.
|
|
39
|
+
_ASCII_NUMERIC = re.compile(r"^[+-]?(?:[0-9]+\.?[0-9]*|\.[0-9]+)(?:[eE][+-]?[0-9]+)?$")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def value(
|
|
43
|
+
raw: str | None,
|
|
44
|
+
spec: FieldSpec,
|
|
45
|
+
opts: ExtractOptions,
|
|
46
|
+
field_path: str,
|
|
47
|
+
report: ExtractionReport,
|
|
48
|
+
) -> object:
|
|
49
|
+
"""Canonicalize ``raw`` to the native type described by ``spec``, or MALFORMED."""
|
|
50
|
+
if raw is None:
|
|
51
|
+
return MALFORMED
|
|
52
|
+
|
|
53
|
+
# OnField hook takes priority.
|
|
54
|
+
if opts.on_field is not None:
|
|
55
|
+
hooked = opts.on_field(field_path, raw, spec)
|
|
56
|
+
if hooked is not None:
|
|
57
|
+
report.add_coercion(Coercion(field_path, raw, str(hooked), "onField"))
|
|
58
|
+
return hooked
|
|
59
|
+
|
|
60
|
+
# Per-field runtime normalizer (bounded 20% surface). Keyed by path, then simple name.
|
|
61
|
+
norm = opts.normalizers.get(field_path)
|
|
62
|
+
if norm is None:
|
|
63
|
+
norm = opts.normalizers.get(spec.name)
|
|
64
|
+
if norm is not None:
|
|
65
|
+
normalized = norm(raw)
|
|
66
|
+
if normalized is not None:
|
|
67
|
+
report.add_coercion(Coercion(field_path, raw, str(normalized), "normalizer"))
|
|
68
|
+
return normalized
|
|
69
|
+
|
|
70
|
+
ci = opts.tolerance != Tolerance.STRICT
|
|
71
|
+
match spec.kind:
|
|
72
|
+
case FieldKind.ENUM:
|
|
73
|
+
return _coerce_enum(raw, spec, opts, field_path, report, ci)
|
|
74
|
+
case FieldKind.INT | FieldKind.LONG:
|
|
75
|
+
return _coerce_int(raw, spec, field_path, report, ci)
|
|
76
|
+
case FieldKind.DOUBLE:
|
|
77
|
+
return _coerce_double(raw, spec, field_path, report, ci)
|
|
78
|
+
case FieldKind.BOOLEAN:
|
|
79
|
+
return _coerce_bool(raw, ci)
|
|
80
|
+
case _:
|
|
81
|
+
return raw
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def scalar_coerce(raw: str | None, spec: FieldSpec) -> object:
|
|
85
|
+
"""Phase B (generalized ``@default``): coerce a non-enum default string to a
|
|
86
|
+
field's scalar kind, with NO side effects (no normalizer/on_field hooks, no
|
|
87
|
+
clamp logging) — the value originates from metadata, not the model response.
|
|
88
|
+
|
|
89
|
+
Returns the coerced value or the ``MALFORMED`` sentinel. INT/LONG accept an
|
|
90
|
+
integer or a truncatable finite number; DOUBLE accepts any finite number;
|
|
91
|
+
BOOLEAN accepts ``true|false|yes|no|1|0``; STRING (and any other kind) passes
|
|
92
|
+
through verbatim. Mirrors the parse semantics of :func:`value` without its
|
|
93
|
+
range-clamp / report machinery (Java ``Coerce.scalar``).
|
|
94
|
+
"""
|
|
95
|
+
if raw is None:
|
|
96
|
+
return MALFORMED
|
|
97
|
+
match spec.kind:
|
|
98
|
+
case FieldKind.INT | FieldKind.LONG:
|
|
99
|
+
trimmed = raw.strip()
|
|
100
|
+
if not _ASCII_NUMERIC.match(trimmed):
|
|
101
|
+
return MALFORMED
|
|
102
|
+
try:
|
|
103
|
+
return int(trimmed)
|
|
104
|
+
except ValueError:
|
|
105
|
+
pass
|
|
106
|
+
try:
|
|
107
|
+
d = float(trimmed)
|
|
108
|
+
except ValueError:
|
|
109
|
+
return MALFORMED
|
|
110
|
+
return math.trunc(d) if math.isfinite(d) else MALFORMED
|
|
111
|
+
case FieldKind.DOUBLE:
|
|
112
|
+
trimmed = raw.strip()
|
|
113
|
+
if not _ASCII_NUMERIC.match(trimmed):
|
|
114
|
+
return MALFORMED
|
|
115
|
+
try:
|
|
116
|
+
d = float(trimmed)
|
|
117
|
+
except ValueError:
|
|
118
|
+
return MALFORMED
|
|
119
|
+
return d if math.isfinite(d) else MALFORMED
|
|
120
|
+
case FieldKind.BOOLEAN:
|
|
121
|
+
t = raw.strip().lower()
|
|
122
|
+
if t in ("true", "yes", "1"):
|
|
123
|
+
return True
|
|
124
|
+
if t in ("false", "no", "0"):
|
|
125
|
+
return False
|
|
126
|
+
return MALFORMED
|
|
127
|
+
case _:
|
|
128
|
+
return raw # STRING / ENUM / OBJECT — verbatim
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _coerce_enum(
|
|
132
|
+
raw: str,
|
|
133
|
+
spec: FieldSpec,
|
|
134
|
+
opts: ExtractOptions,
|
|
135
|
+
path: str,
|
|
136
|
+
report: ExtractionReport,
|
|
137
|
+
ci: bool,
|
|
138
|
+
) -> object:
|
|
139
|
+
"""FR-011 enum coercion pipeline: exact → normalize → ``@enumAlias`` →
|
|
140
|
+
(reserved fuzzy) → ``@coerceDefault`` → MALFORMED.
|
|
141
|
+
|
|
142
|
+
Resolution mode is ``spec.normalize`` (default ``"strip"``); under STRICT
|
|
143
|
+
tolerance (``ci`` is False) normalization is forced to ``"none"`` (exact-only),
|
|
144
|
+
preserving the case-sensitive STRICT contract. The FR-010 case-insensitive
|
|
145
|
+
default is now mode ``"strip"``. Mirrors the TS/C#/Java ``coerceEnum``.
|
|
146
|
+
"""
|
|
147
|
+
mode = spec.normalize if ci else _NORMALIZE_NONE
|
|
148
|
+
|
|
149
|
+
# 1. exact match.
|
|
150
|
+
if spec.enum_values is not None:
|
|
151
|
+
for v in spec.enum_values:
|
|
152
|
+
if v == raw:
|
|
153
|
+
return v
|
|
154
|
+
|
|
155
|
+
# 2. normalized match (skipped when mode == none).
|
|
156
|
+
if mode != _NORMALIZE_NONE and spec.enum_values is not None:
|
|
157
|
+
norm_raw = normalize_enum(raw, mode)
|
|
158
|
+
for v in spec.enum_values:
|
|
159
|
+
if normalize_enum(v, mode) == norm_raw:
|
|
160
|
+
report.add_coercion(Coercion(path, raw, v, "normalize"))
|
|
161
|
+
return v
|
|
162
|
+
|
|
163
|
+
# 3. @enumAlias — runtime aliases win over schema; alias keys matched under the mode.
|
|
164
|
+
runtime_target = _lookup_alias_in(raw, opts.aliases, mode)
|
|
165
|
+
if runtime_target is not None:
|
|
166
|
+
schema_target = _lookup_alias_in(raw, spec.enum_alias, mode)
|
|
167
|
+
kind = (
|
|
168
|
+
"runtime-alias-override"
|
|
169
|
+
if schema_target is not None and schema_target != runtime_target
|
|
170
|
+
else "alias"
|
|
171
|
+
)
|
|
172
|
+
report.add_coercion(Coercion(path, raw, runtime_target, kind))
|
|
173
|
+
return runtime_target
|
|
174
|
+
schema_target = _lookup_alias_in(raw, spec.enum_alias, mode)
|
|
175
|
+
if schema_target is not None:
|
|
176
|
+
report.add_coercion(Coercion(path, raw, schema_target, "alias"))
|
|
177
|
+
return schema_target
|
|
178
|
+
|
|
179
|
+
# 4. reserved fuzzy slot — NOT implemented (FR-011 spec "Out of scope").
|
|
180
|
+
|
|
181
|
+
# 5. @coerceDefault — present-but-uncoercible fallback to a valid member → DEFAULTED.
|
|
182
|
+
if (
|
|
183
|
+
spec.coerce_default is not None
|
|
184
|
+
and spec.enum_values is not None
|
|
185
|
+
and spec.coerce_default in spec.enum_values
|
|
186
|
+
):
|
|
187
|
+
report.add_coercion(Coercion(path, raw, spec.coerce_default, "coerceDefault"))
|
|
188
|
+
return spec.coerce_default
|
|
189
|
+
|
|
190
|
+
# 6. MALFORMED.
|
|
191
|
+
return MALFORMED
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _lookup_alias_in(raw: str, aliases: dict[str, str] | None, mode: str) -> str | None:
|
|
195
|
+
"""Find ``raw`` in an alias map, matching keys exactly first then under ``mode``
|
|
196
|
+
normalization. Returns the target member, or ``None`` when no key matches."""
|
|
197
|
+
if not aliases:
|
|
198
|
+
return None
|
|
199
|
+
exact = aliases.get(raw)
|
|
200
|
+
if exact is not None:
|
|
201
|
+
return exact
|
|
202
|
+
if mode == _NORMALIZE_NONE:
|
|
203
|
+
return None
|
|
204
|
+
norm_raw = normalize_enum(raw, mode)
|
|
205
|
+
for key, target in aliases.items():
|
|
206
|
+
if normalize_enum(key, mode) == norm_raw:
|
|
207
|
+
return target
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _coerce_int(
|
|
212
|
+
raw: str, spec: FieldSpec, path: str, report: ExtractionReport, lenient: bool
|
|
213
|
+
) -> object:
|
|
214
|
+
trimmed = raw.strip()
|
|
215
|
+
if not _ASCII_NUMERIC.match(trimmed):
|
|
216
|
+
return MALFORMED
|
|
217
|
+
# Integer parse first (matches Java Long.parseLong / C# long.TryParse), then a
|
|
218
|
+
# float fallback (matches Java's Double.parseDouble fallback).
|
|
219
|
+
try:
|
|
220
|
+
return _clamp(float(int(trimmed)), spec, path, report, as_long=True, lenient=lenient)
|
|
221
|
+
except ValueError:
|
|
222
|
+
pass
|
|
223
|
+
try:
|
|
224
|
+
d = float(trimmed)
|
|
225
|
+
except ValueError:
|
|
226
|
+
return MALFORMED
|
|
227
|
+
return _clamp(d, spec, path, report, as_long=True, lenient=lenient)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _coerce_double(
|
|
231
|
+
raw: str, spec: FieldSpec, path: str, report: ExtractionReport, lenient: bool
|
|
232
|
+
) -> object:
|
|
233
|
+
trimmed = raw.strip()
|
|
234
|
+
if not _ASCII_NUMERIC.match(trimmed):
|
|
235
|
+
return MALFORMED
|
|
236
|
+
try:
|
|
237
|
+
d = float(trimmed)
|
|
238
|
+
except ValueError:
|
|
239
|
+
return MALFORMED
|
|
240
|
+
return _clamp(d, spec, path, report, as_long=False, lenient=lenient)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _clamp(
|
|
244
|
+
n: float, spec: FieldSpec, path: str, report: ExtractionReport, as_long: bool, lenient: bool
|
|
245
|
+
) -> object:
|
|
246
|
+
"""Apply the field's @min/@max range (sourced from its numeric validator). Under LENIENT
|
|
247
|
+
tolerance an out-of-range value is CLAMPED to the bound (recorded as a "clamp" coercion);
|
|
248
|
+
under STRICT tolerance it is MALFORMED (the validator's "value out of range" contract).
|
|
249
|
+
Cross-port: ports must match the lenient-clamp / strict-reject split."""
|
|
250
|
+
# Non-finite (NaN, ±Infinity) → MALFORMED (cross-port classification parity).
|
|
251
|
+
if not math.isfinite(n):
|
|
252
|
+
return MALFORMED
|
|
253
|
+
c = n
|
|
254
|
+
if spec.min is not None and c < spec.min:
|
|
255
|
+
c = spec.min
|
|
256
|
+
if spec.max is not None and c > spec.max:
|
|
257
|
+
c = spec.max
|
|
258
|
+
if c != n:
|
|
259
|
+
if not lenient: # STRICT: out-of-range is invalid, not silently clamped
|
|
260
|
+
return MALFORMED
|
|
261
|
+
report.add_coercion(Coercion(path, _num_str(n), _num_str(c), "clamp"))
|
|
262
|
+
# Truncate toward zero for integer kinds (math.trunc / int()).
|
|
263
|
+
return math.trunc(c) if as_long else c
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def _num_str(n: float) -> str:
|
|
267
|
+
# Render integral floats without a trailing ".0" to read like Java/C# longs in notes.
|
|
268
|
+
if n == math.trunc(n) and math.isfinite(n):
|
|
269
|
+
return str(int(n))
|
|
270
|
+
return str(n)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _coerce_bool(raw: str, ci: bool) -> object:
|
|
274
|
+
t = raw.strip().lower() if ci else raw.strip()
|
|
275
|
+
if t in ("true", "yes", "1"):
|
|
276
|
+
return True
|
|
277
|
+
if t in ("false", "no", "0"):
|
|
278
|
+
return False
|
|
279
|
+
return MALFORMED
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
"""Public entry point. Runs the extract pipeline; never throws."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from metaobjects.render.extract import coerce as _coerce
|
|
5
|
+
from metaobjects.render.extract import locate as _locate
|
|
6
|
+
from metaobjects.render.extract import strip as _strip
|
|
7
|
+
from metaobjects.render.extract.coerce import MALFORMED
|
|
8
|
+
from metaobjects.render.extract.json_forgiving_reader import (
|
|
9
|
+
NULL_LITERAL,
|
|
10
|
+
TRUNCATED,
|
|
11
|
+
JsonForgivingReader,
|
|
12
|
+
)
|
|
13
|
+
from metaobjects.render.extract.types import (
|
|
14
|
+
Coercion,
|
|
15
|
+
FieldKind,
|
|
16
|
+
FieldExtraction,
|
|
17
|
+
FieldSpec,
|
|
18
|
+
Format,
|
|
19
|
+
ExtractOptions,
|
|
20
|
+
ExtractionOutcome,
|
|
21
|
+
ExtractSchema,
|
|
22
|
+
ExtractionReport,
|
|
23
|
+
Tolerance,
|
|
24
|
+
)
|
|
25
|
+
from metaobjects.render.extract.xml_forgiving_reader import TEXT_KEY, XmlForgivingReader
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def extract(
|
|
29
|
+
text: str | None,
|
|
30
|
+
schema: ExtractSchema,
|
|
31
|
+
opts: ExtractOptions | None = None,
|
|
32
|
+
) -> ExtractionOutcome:
|
|
33
|
+
"""Extract structured data from dirty ``text`` per ``schema``. Never raises."""
|
|
34
|
+
o = ExtractOptions.defaults() if opts is None else opts
|
|
35
|
+
report = ExtractionReport()
|
|
36
|
+
data: dict[str, object] = {}
|
|
37
|
+
|
|
38
|
+
stripped = _strip.strip(text)
|
|
39
|
+
ci = o.tolerance != Tolerance.STRICT
|
|
40
|
+
|
|
41
|
+
# XML rootless (opts.rootless): the payload's fields ARE the top-level elements — there
|
|
42
|
+
# is no enclosing root to locate — so parse the whole stripped text's top-level elements
|
|
43
|
+
# directly. Otherwise locate the <rootName> span as before. JSON is unaffected. Mirrors
|
|
44
|
+
# Java Extract.extract.
|
|
45
|
+
span: str | None
|
|
46
|
+
raw: dict[str, object]
|
|
47
|
+
if schema.format == Format.JSON:
|
|
48
|
+
span = _locate.json(stripped)
|
|
49
|
+
raw = {} if span is None else JsonForgivingReader().read(span)
|
|
50
|
+
elif o.rootless:
|
|
51
|
+
span = None if stripped == "" else stripped
|
|
52
|
+
raw = {} if span is None else XmlForgivingReader().read_rootless(stripped, ci)
|
|
53
|
+
else:
|
|
54
|
+
span = _locate.xml(stripped, schema.root_name, ci)
|
|
55
|
+
raw = {} if span is None else XmlForgivingReader().read(span, ci)
|
|
56
|
+
|
|
57
|
+
if not raw and (stripped == "" or span is None):
|
|
58
|
+
report.mark_empty()
|
|
59
|
+
|
|
60
|
+
_extract(schema.fields, raw, "", data, report, o, ci)
|
|
61
|
+
return ExtractionOutcome(data=data, report=report)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _extract(
|
|
65
|
+
fields: list[FieldSpec],
|
|
66
|
+
raw: dict[str, object],
|
|
67
|
+
prefix: str,
|
|
68
|
+
data: dict[str, object],
|
|
69
|
+
report: ExtractionReport,
|
|
70
|
+
o: ExtractOptions,
|
|
71
|
+
ci: bool,
|
|
72
|
+
) -> None:
|
|
73
|
+
for f in fields:
|
|
74
|
+
path = f.name if prefix == "" else prefix + "." + f.name
|
|
75
|
+
# A @xmlText field reads the element's text body (carried under the #text sentinel when
|
|
76
|
+
# the element also has attributes), not a same-named child element.
|
|
77
|
+
present = raw.get(TEXT_KEY) if f.text_content else _lookup(raw, f.name, ci)
|
|
78
|
+
if present is None:
|
|
79
|
+
# FR-011 / Phase B: an absent field with a declared @default fills the
|
|
80
|
+
# value → DEFAULTED (which satisfies a @required field). Generalized to
|
|
81
|
+
# all field kinds: an enum default is its member string verbatim; a
|
|
82
|
+
# non-enum default is coerced to the field's kind via the PURE
|
|
83
|
+
# scalar_coerce (so @default "0" on field.int yields integer 0). A
|
|
84
|
+
# non-coercible non-enum default is treated as no default.
|
|
85
|
+
if f.default_value is not None:
|
|
86
|
+
coerced = (
|
|
87
|
+
f.default_value
|
|
88
|
+
if f.kind == FieldKind.ENUM
|
|
89
|
+
else _coerce.scalar_coerce(f.default_value, f)
|
|
90
|
+
)
|
|
91
|
+
if coerced is not MALFORMED:
|
|
92
|
+
data[f.name] = coerced
|
|
93
|
+
report.add_coercion(Coercion(path, "", f.default_value, "default"))
|
|
94
|
+
report.set(path, FieldExtraction.DEFAULTED)
|
|
95
|
+
continue
|
|
96
|
+
report.set(
|
|
97
|
+
path,
|
|
98
|
+
FieldExtraction.LOST_REQUIRED if f.required else FieldExtraction.LOST_OPTIONAL,
|
|
99
|
+
)
|
|
100
|
+
continue
|
|
101
|
+
if present is TRUNCATED: # present-but-garbled (empty/cut-off value)
|
|
102
|
+
report.set(path, FieldExtraction.MALFORMED)
|
|
103
|
+
continue
|
|
104
|
+
if present is NULL_LITERAL:
|
|
105
|
+
# The JSON null literal is the caller's explicit "no value": leave the field null
|
|
106
|
+
# (do NOT apply @default — an explicit null is a value, not an omission), matching a
|
|
107
|
+
# standard JSON bind. Without this the bare ``null`` token leaks as the string "null".
|
|
108
|
+
report.set(
|
|
109
|
+
path,
|
|
110
|
+
FieldExtraction.LOST_REQUIRED if f.required else FieldExtraction.LOST_OPTIONAL,
|
|
111
|
+
)
|
|
112
|
+
continue
|
|
113
|
+
if f.array:
|
|
114
|
+
# A single non-list value is treated as a one-element array (e.g. a single
|
|
115
|
+
# repeated-XML tag). Each element is coerced/recursed independently.
|
|
116
|
+
elements = present if isinstance(present, list) else [present]
|
|
117
|
+
out: list[object] = []
|
|
118
|
+
any_malformed = False
|
|
119
|
+
# Phase B (array-of-enum): an enum element flows through the SAME enum
|
|
120
|
+
# coercion pipeline a scalar enum uses (_extract_value → coerce.value →
|
|
121
|
+
# _coerce_enum) and is CLASSIFIED per element by indexed path (tags[0],
|
|
122
|
+
# tags[1], …) exactly as a scalar enum: EXTRACTED / DEFAULTED (via
|
|
123
|
+
# @coerceDefault) / MALFORMED. Non-enum scalar arrays keep their existing
|
|
124
|
+
# behavior (coerced element list, no per-element states).
|
|
125
|
+
enum_elements = f.kind == FieldKind.ENUM
|
|
126
|
+
for idx, el in enumerate(elements):
|
|
127
|
+
elem_path = f"{path}[{idx}]"
|
|
128
|
+
v = _extract_value(f, el, elem_path, report, o, ci)
|
|
129
|
+
if v is MALFORMED:
|
|
130
|
+
any_malformed = True
|
|
131
|
+
if enum_elements:
|
|
132
|
+
report.set(elem_path, FieldExtraction.MALFORMED)
|
|
133
|
+
else:
|
|
134
|
+
out.append(v)
|
|
135
|
+
if enum_elements:
|
|
136
|
+
report.set(elem_path, _classify_coerced(elem_path, report))
|
|
137
|
+
# Cross-port contract: a MALFORMED array still places its successfully-coerced
|
|
138
|
+
# elements into data (partial extraction), UNLIKE a MALFORMED scalar which is
|
|
139
|
+
# absent from data.
|
|
140
|
+
data[f.name] = out
|
|
141
|
+
report.set(
|
|
142
|
+
path, FieldExtraction.MALFORMED if any_malformed else FieldExtraction.EXTRACTED
|
|
143
|
+
)
|
|
144
|
+
continue
|
|
145
|
+
if isinstance(present, list): # a list where a singular value was expected
|
|
146
|
+
report.set(path, FieldExtraction.MALFORMED)
|
|
147
|
+
continue
|
|
148
|
+
v = _extract_value(f, present, path, report, o, ci)
|
|
149
|
+
if v is MALFORMED:
|
|
150
|
+
report.set(path, FieldExtraction.MALFORMED)
|
|
151
|
+
else:
|
|
152
|
+
data[f.name] = v
|
|
153
|
+
# FR-011: a value reached via @coerceDefault (or @default) is DEFAULTED,
|
|
154
|
+
# not EXTRACTED.
|
|
155
|
+
report.set(path, _classify_coerced(path, report))
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _classify_coerced(path: str, report: ExtractionReport) -> FieldExtraction:
|
|
159
|
+
"""FR-011: classify a successfully-coerced field. DEFAULTED when its terminal
|
|
160
|
+
(last-logged) coercion for this path is a default-class fallback
|
|
161
|
+
(``coerceDefault`` / ``default``); EXTRACTED otherwise. Nested objects (which log
|
|
162
|
+
no coercion of their own) classify as EXTRACTED. Mirrors the TS/C#/Java classify."""
|
|
163
|
+
terminal_kind: str | None = None
|
|
164
|
+
for c in report.coercions():
|
|
165
|
+
if c.field_path == path:
|
|
166
|
+
terminal_kind = c.kind
|
|
167
|
+
return (
|
|
168
|
+
FieldExtraction.DEFAULTED
|
|
169
|
+
if terminal_kind in ("coerceDefault", "default")
|
|
170
|
+
else FieldExtraction.EXTRACTED
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _extract_value(
|
|
175
|
+
f: FieldSpec,
|
|
176
|
+
present: object,
|
|
177
|
+
path: str,
|
|
178
|
+
report: ExtractionReport,
|
|
179
|
+
o: ExtractOptions,
|
|
180
|
+
ci: bool,
|
|
181
|
+
) -> object:
|
|
182
|
+
"""Coerce one (non-array) element: nested recursion or scalar coercion."""
|
|
183
|
+
if present is NULL_LITERAL:
|
|
184
|
+
# A JSON null array element (e.g. [1, null, 3]) carries no value → drop it as malformed
|
|
185
|
+
# rather than letting the sentinel stringify.
|
|
186
|
+
return MALFORMED
|
|
187
|
+
if f.kind == FieldKind.OBJECT:
|
|
188
|
+
if f.nested is not None and isinstance(present, dict):
|
|
189
|
+
nested_data: dict[str, object] = {}
|
|
190
|
+
_extract(f.nested.fields, present, path, nested_data, report, o, ci)
|
|
191
|
+
return nested_data
|
|
192
|
+
return MALFORMED # object expected but scalar/non-map present
|
|
193
|
+
# A text element that also carried XML attributes is represented by XmlForgivingReader
|
|
194
|
+
# as a dict with the body under TEXT_KEY. A scalar field reads that text (attributes
|
|
195
|
+
# ignored for scalars — preserving pre-attribute-support behaviour).
|
|
196
|
+
if isinstance(present, dict) and TEXT_KEY in present:
|
|
197
|
+
present = present[TEXT_KEY]
|
|
198
|
+
raw_str = present if isinstance(present, str) else str(present)
|
|
199
|
+
return _coerce.value(raw_str, f, o, path, report)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _lookup(raw: dict[str, object], name: str, ci: bool) -> object | None:
|
|
203
|
+
"""Case-folding lookup honoring tolerance."""
|
|
204
|
+
if name in raw:
|
|
205
|
+
return raw[name]
|
|
206
|
+
if ci:
|
|
207
|
+
lower = name.lower()
|
|
208
|
+
for k, v in raw.items():
|
|
209
|
+
if k.lower() == lower:
|
|
210
|
+
return v
|
|
211
|
+
return None
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Null-safe coercions from a ExtractionOutcome data map onto typed values.
|
|
2
|
+
|
|
3
|
+
Generated ``extract(...)`` code calls these helpers. Python has a single ``int``
|
|
4
|
+
type, so ``as_int`` and ``as_long`` are intentionally identical (both
|
|
5
|
+
``Optional[int]``, truncating toward zero). ``bool`` is excluded from the numeric
|
|
6
|
+
helpers (it is an ``int`` subclass in Python, but a boolean is never a number here).
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import math
|
|
11
|
+
from collections.abc import Mapping
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def as_string(d: Mapping[str, object], k: str) -> str | None:
|
|
15
|
+
if k not in d:
|
|
16
|
+
return None
|
|
17
|
+
v = d[k]
|
|
18
|
+
if v is None:
|
|
19
|
+
return None
|
|
20
|
+
return v if isinstance(v, str) else str(v)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def as_int(d: Mapping[str, object], k: str) -> int | None:
|
|
24
|
+
return _as_int(d, k)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def as_long(d: Mapping[str, object], k: str) -> int | None:
|
|
28
|
+
# Python has one int type; as_int and as_long are the same (truncate toward zero).
|
|
29
|
+
return _as_int(d, k)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _as_int(d: Mapping[str, object], k: str) -> int | None:
|
|
33
|
+
v = d.get(k)
|
|
34
|
+
if isinstance(v, bool):
|
|
35
|
+
return None
|
|
36
|
+
if isinstance(v, int):
|
|
37
|
+
return v
|
|
38
|
+
if isinstance(v, float):
|
|
39
|
+
return math.trunc(v)
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def as_double(d: Mapping[str, object], k: str) -> float | None:
|
|
44
|
+
v = d.get(k)
|
|
45
|
+
if isinstance(v, bool):
|
|
46
|
+
return None
|
|
47
|
+
if isinstance(v, (int, float)):
|
|
48
|
+
return float(v)
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def as_bool(d: Mapping[str, object], k: str) -> bool | None:
|
|
53
|
+
v = d.get(k)
|
|
54
|
+
return v if isinstance(v, bool) else None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def as_string_list(d: Mapping[str, object], k: str) -> list[str | None] | None:
|
|
58
|
+
v = d.get(k)
|
|
59
|
+
if not isinstance(v, list):
|
|
60
|
+
return None
|
|
61
|
+
return [None if e is None else (e if isinstance(e, str) else str(e)) for e in v]
|