metaobjects 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaobjects/__init__.py +75 -0
- metaobjects/agent_context/__init__.py +55 -0
- metaobjects/agent_context/_content/README.md +14 -0
- metaobjects/agent_context/_content/servers/csharp.meta.json +5 -0
- metaobjects/agent_context/_content/servers/java.meta.json +5 -0
- metaobjects/agent_context/_content/servers/kotlin.meta.json +5 -0
- metaobjects/agent_context/_content/servers/python.meta.json +5 -0
- metaobjects/agent_context/_content/servers/typescript.meta.json +5 -0
- metaobjects/agent_context/_content/skills/metaobjects-authoring/SKILL.md +301 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/SKILL.md +99 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/references/csharp.md +87 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/references/java.md +94 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/references/kotlin.md +110 -0
- metaobjects/agent_context/_content/skills/metaobjects-codegen/references/typescript.md +135 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/SKILL.md +148 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/csharp.md +110 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/java.md +108 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/kotlin.md +130 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/python.md +116 -0
- metaobjects/agent_context/_content/skills/metaobjects-prompts/references/typescript.md +150 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/SKILL.md +130 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/java.md +96 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/kotlin.md +99 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/react.md +86 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/tanstack.md +119 -0
- metaobjects/agent_context/_content/skills/metaobjects-runtime-ui/references/typescript.md +92 -0
- metaobjects/agent_context/_content/skills/metaobjects-verify/SKILL.md +107 -0
- metaobjects/agent_context/_content/skills/metaobjects-verify/references/migration.md +72 -0
- metaobjects/agent_context/_content/templates/always-on.md.mustache +27 -0
- metaobjects/agent_context/assemble.py +133 -0
- metaobjects/agent_context/content_root.py +54 -0
- metaobjects/agent_context/scaffold.py +191 -0
- metaobjects/agent_context/types.py +44 -0
- metaobjects/attr_class_map.py +23 -0
- metaobjects/cli.py +696 -0
- metaobjects/codegen/__init__.py +0 -0
- metaobjects/codegen/config.py +11 -0
- metaobjects/codegen/constants.py +13 -0
- metaobjects/codegen/extract_delegate_emitter.py +384 -0
- metaobjects/codegen/extract_schema_emitter.py +139 -0
- metaobjects/codegen/format.py +31 -0
- metaobjects/codegen/fr010_field_mapping.py +220 -0
- metaobjects/codegen/generator.py +62 -0
- metaobjects/codegen/generator_registry.py +163 -0
- metaobjects/codegen/generators/__init__.py +0 -0
- metaobjects/codegen/generators/entity_model.py +263 -0
- metaobjects/codegen/generators/extractor_generator.py +317 -0
- metaobjects/codegen/generators/filter_allowlist_generator.py +309 -0
- metaobjects/codegen/generators/m2m_codegen.py +192 -0
- metaobjects/codegen/generators/output_parser_generator.py +272 -0
- metaobjects/codegen/generators/output_prompt_generator.py +192 -0
- metaobjects/codegen/generators/payload_vo_generator.py +672 -0
- metaobjects/codegen/generators/render_helper_generator.py +451 -0
- metaobjects/codegen/generators/router_generator.py +635 -0
- metaobjects/codegen/generators/template_generator.py +70 -0
- metaobjects/codegen/generators/tph_plan.py +120 -0
- metaobjects/codegen/generators/trace_helper_generator.py +336 -0
- metaobjects/codegen/instance_artifacts.py +15 -0
- metaobjects/codegen/output_format_spec_emitter.py +79 -0
- metaobjects/codegen/overwrite_policy.py +27 -0
- metaobjects/codegen/runner.py +110 -0
- metaobjects/codegen/runtime/__init__.py +6 -0
- metaobjects/codegen/runtime/filter_parser.py +193 -0
- metaobjects/codegen/type_map.py +84 -0
- metaobjects/core_types.py +809 -0
- metaobjects/datatype.py +19 -0
- metaobjects/documentation/__init__.py +28 -0
- metaobjects/documentation/doc_constants.py +20 -0
- metaobjects/documentation/doc_provider.py +20 -0
- metaobjects/documentation/doc_schema.py +24 -0
- metaobjects/errors.py +124 -0
- metaobjects/loader/__init__.py +0 -0
- metaobjects/loader/merge.py +287 -0
- metaobjects/loader/meta_data_loader.py +245 -0
- metaobjects/loader/sources/__init__.py +24 -0
- metaobjects/loader/sources/directory_source.py +50 -0
- metaobjects/loader/sources/file_source.py +41 -0
- metaobjects/loader/sources/meta_data_source.py +67 -0
- metaobjects/loader/sources/uri_source.py +56 -0
- metaobjects/loader/validate_discriminator.py +181 -0
- metaobjects/loader/validate_field_readonly.py +146 -0
- metaobjects/loader/validate_source_parameter_ref.py +159 -0
- metaobjects/loader/validate_source_physical_names.py +140 -0
- metaobjects/loader/validation_passes.py +1513 -0
- metaobjects/meta/__init__.py +1 -0
- metaobjects/meta/core/__init__.py +0 -0
- metaobjects/meta/core/attr/__init__.py +0 -0
- metaobjects/meta/core/attr/attr_constants.py +31 -0
- metaobjects/meta/core/attr/meta_attr.py +136 -0
- metaobjects/meta/core/field/__init__.py +0 -0
- metaobjects/meta/core/field/field_constants.py +105 -0
- metaobjects/meta/core/field/meta_field.py +76 -0
- metaobjects/meta/core/identity/__init__.py +0 -0
- metaobjects/meta/core/identity/identity_constants.py +19 -0
- metaobjects/meta/core/identity/meta_identity.py +8 -0
- metaobjects/meta/core/object/__init__.py +0 -0
- metaobjects/meta/core/object/meta_object.py +65 -0
- metaobjects/meta/core/object/meta_object_aware.py +43 -0
- metaobjects/meta/core/object/object_class_registry.py +56 -0
- metaobjects/meta/core/object/object_constants.py +13 -0
- metaobjects/meta/core/object/object_extract.py +400 -0
- metaobjects/meta/core/object/value_object.py +70 -0
- metaobjects/meta/core/relationship/__init__.py +0 -0
- metaobjects/meta/core/relationship/derive_m2m_fields.py +180 -0
- metaobjects/meta/core/relationship/meta_relationship.py +54 -0
- metaobjects/meta/core/relationship/relationship_constants.py +51 -0
- metaobjects/meta/core/validator/__init__.py +0 -0
- metaobjects/meta/core/validator/validator_constants.py +18 -0
- metaobjects/meta/meta_data.py +206 -0
- metaobjects/meta/meta_root.py +8 -0
- metaobjects/meta/persistence/__init__.py +0 -0
- metaobjects/meta/persistence/db/__init__.py +1 -0
- metaobjects/meta/persistence/db/db_constants.py +41 -0
- metaobjects/meta/persistence/db/db_provider.py +60 -0
- metaobjects/meta/persistence/origin/__init__.py +0 -0
- metaobjects/meta/persistence/origin/meta_origin.py +8 -0
- metaobjects/meta/persistence/origin/origin_constants.py +20 -0
- metaobjects/meta/persistence/source/__init__.py +0 -0
- metaobjects/meta/persistence/source/meta_source.py +137 -0
- metaobjects/meta/persistence/source/source_constants.py +115 -0
- metaobjects/meta/presentation/__init__.py +0 -0
- metaobjects/meta/presentation/layout/__init__.py +0 -0
- metaobjects/meta/presentation/layout/layout_constants.py +13 -0
- metaobjects/meta/presentation/layout/meta_layout.py +8 -0
- metaobjects/meta/presentation/view/__init__.py +0 -0
- metaobjects/meta/presentation/view/meta_view.py +8 -0
- metaobjects/meta/presentation/view/view_constants.py +22 -0
- metaobjects/meta/template/__init__.py +0 -0
- metaobjects/meta/template/meta_template.py +46 -0
- metaobjects/meta/template/template_constants.py +112 -0
- metaobjects/meta/template/template_provider.py +43 -0
- metaobjects/parser.py +380 -0
- metaobjects/parser_yaml.py +82 -0
- metaobjects/provider.py +111 -0
- metaobjects/py.typed +0 -0
- metaobjects/registry.py +210 -0
- metaobjects/registry_manifest.py +223 -0
- metaobjects/render/__init__.py +74 -0
- metaobjects/render/email_document.py +14 -0
- metaobjects/render/escapers.py +109 -0
- metaobjects/render/extract/__init__.py +59 -0
- metaobjects/render/extract/coerce.py +279 -0
- metaobjects/render/extract/extract.py +211 -0
- metaobjects/render/extract/extract_map.py +61 -0
- metaobjects/render/extract/json_forgiving_reader.py +203 -0
- metaobjects/render/extract/locate.py +65 -0
- metaobjects/render/extract/normalize.py +96 -0
- metaobjects/render/extract/strip.py +20 -0
- metaobjects/render/extract/types.py +332 -0
- metaobjects/render/extract/xml_forgiving_reader.py +162 -0
- metaobjects/render/filesystem_provider.py +51 -0
- metaobjects/render/prompt/__init__.py +32 -0
- metaobjects/render/prompt/output_format_renderer.py +340 -0
- metaobjects/render/prompt/output_format_spec.py +28 -0
- metaobjects/render/prompt/prompt_field.py +29 -0
- metaobjects/render/prompt/prompt_overrides.py +29 -0
- metaobjects/render/prompt/prompt_style.py +38 -0
- metaobjects/render/renderer.py +358 -0
- metaobjects/render/verify.py +266 -0
- metaobjects/runtime/__init__.py +39 -0
- metaobjects/runtime/llm_recorder.py +210 -0
- metaobjects/runtime/n2m_resolver.py +155 -0
- metaobjects/runtime/object_manager.py +715 -0
- metaobjects/runtime/tph.py +50 -0
- metaobjects/serializer_json.py +172 -0
- metaobjects/shared/__init__.py +0 -0
- metaobjects/shared/base_types.py +16 -0
- metaobjects/shared/separators.py +4 -0
- metaobjects/shared/structural.py +9 -0
- metaobjects/source/__init__.py +79 -0
- metaobjects/source/error_source.py +266 -0
- metaobjects/source/json_path.py +106 -0
- metaobjects/source/semantic_diff.py +98 -0
- metaobjects/source/yaml_positions.py +174 -0
- metaobjects/super_resolve.py +128 -0
- metaobjects/yaml_desugar.py +481 -0
- metaobjects-0.9.0.dist-info/METADATA +97 -0
- metaobjects-0.9.0.dist-info/RECORD +181 -0
- metaobjects-0.9.0.dist-info/WHEEL +4 -0
- metaobjects-0.9.0.dist-info/entry_points.txt +2 -0
- metaobjects-0.9.0.dist-info/licenses/LICENSE +189 -0
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""Stage-4 tolerant JSON reader for the bounded corpus malformation set. Never throws.
|
|
2
|
+
|
|
3
|
+
Carries the FR-010 fixed-behavior edge cases:
|
|
4
|
+
|
|
5
|
+
- No-hang: ``{"xs":[}`` / ``{"xs":[1,`` terminate (no infinite loop).
|
|
6
|
+
- TRUNCATED sentinel: a present-but-cut-off/empty value is recorded as ``TRUNCATED``
|
|
7
|
+
so the extract stage classifies it as MALFORMED (not LOST_REQUIRED).
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import Final
|
|
12
|
+
|
|
13
|
+
# Sentinel: a key appeared in the text but its value was empty/cut-off (present-but-garbled).
|
|
14
|
+
TRUNCATED: Final = object()
|
|
15
|
+
|
|
16
|
+
# Sentinel: the JSON ``null`` literal. Distinct from a Python ``None`` return (which this reader
|
|
17
|
+
# uses internally for "no token / garbled") and from the 4-char string ``"null"``. The extract
|
|
18
|
+
# phase maps this to an actual null field value (JSON null -> None), instead of letting the bare
|
|
19
|
+
# ``null`` literal leak through as the text ``"null"``.
|
|
20
|
+
NULL_LITERAL: Final = object()
|
|
21
|
+
|
|
22
|
+
# Max container nesting before the reader stops recursing. Python's recursion limit is far
|
|
23
|
+
# lower than the JVM/.NET stack, so a pathologically deep input (hundreds of nested brackets
|
|
24
|
+
# in adversarial LLM output) would raise RecursionError — violating the never-throws contract.
|
|
25
|
+
# Past this depth the container is skipped (string-aware, non-recursive) and recorded as
|
|
26
|
+
# garbled. Far above any realistic payload nesting.
|
|
27
|
+
_MAX_DEPTH: Final = 100
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class JsonForgivingReader:
|
|
31
|
+
def __init__(self) -> None:
|
|
32
|
+
self._s: str = ""
|
|
33
|
+
self._i: int = 0
|
|
34
|
+
self._depth: int = 0
|
|
35
|
+
|
|
36
|
+
def read(self, span: str | None) -> dict[str, object]:
|
|
37
|
+
"""Parse ``span`` as a forgiving JSON object; empty dict for garbage/non-object root."""
|
|
38
|
+
self._s = span if span is not None else ""
|
|
39
|
+
self._i = 0
|
|
40
|
+
self._depth = 0
|
|
41
|
+
self._ws()
|
|
42
|
+
if self._i >= len(self._s) or self._s[self._i] != "{":
|
|
43
|
+
return {}
|
|
44
|
+
o = self._read_value()
|
|
45
|
+
return o if isinstance(o, dict) else {}
|
|
46
|
+
|
|
47
|
+
def _read_value(self) -> object | None:
|
|
48
|
+
self._ws()
|
|
49
|
+
if self._i >= len(self._s):
|
|
50
|
+
return None
|
|
51
|
+
c = self._s[self._i]
|
|
52
|
+
if c == "{" or c == "[":
|
|
53
|
+
if self._depth >= _MAX_DEPTH:
|
|
54
|
+
self._skip_balanced() # too deep: consume it, record as garbled (→ MALFORMED)
|
|
55
|
+
return None
|
|
56
|
+
self._depth += 1
|
|
57
|
+
try:
|
|
58
|
+
return self._read_object() if c == "{" else self._read_array()
|
|
59
|
+
finally:
|
|
60
|
+
self._depth -= 1
|
|
61
|
+
if c in ('"', "'"):
|
|
62
|
+
return self._read_string(c)
|
|
63
|
+
return self._read_bare_scalar()
|
|
64
|
+
|
|
65
|
+
def _skip_balanced(self) -> None:
|
|
66
|
+
"""Consume a balanced ``{...}``/``[...]`` (or to EOF) without recursing, string-aware."""
|
|
67
|
+
depth = 0
|
|
68
|
+
n = len(self._s)
|
|
69
|
+
while self._i < n:
|
|
70
|
+
ch = self._s[self._i]
|
|
71
|
+
if ch in ('"', "'"):
|
|
72
|
+
self._read_string(ch) # advances past the quoted string (handles escapes)
|
|
73
|
+
continue
|
|
74
|
+
self._i += 1
|
|
75
|
+
if ch == "{" or ch == "[":
|
|
76
|
+
depth += 1
|
|
77
|
+
elif ch == "}" or ch == "]":
|
|
78
|
+
depth -= 1
|
|
79
|
+
if depth <= 0:
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
def _read_object(self) -> dict[str, object]:
|
|
83
|
+
m: dict[str, object] = {}
|
|
84
|
+
self._i += 1 # consume '{'
|
|
85
|
+
while True:
|
|
86
|
+
self._ws()
|
|
87
|
+
if self._i >= len(self._s):
|
|
88
|
+
return m # truncation
|
|
89
|
+
if self._s[self._i] == "}":
|
|
90
|
+
self._i += 1
|
|
91
|
+
return m
|
|
92
|
+
key = self._read_key()
|
|
93
|
+
if key is None:
|
|
94
|
+
return m # truncation mid-key
|
|
95
|
+
self._ws()
|
|
96
|
+
if self._i >= len(self._s) or self._s[self._i] != ":":
|
|
97
|
+
return m # truncation before value
|
|
98
|
+
self._i += 1 # consume ':'
|
|
99
|
+
self._ws()
|
|
100
|
+
if self._i >= len(self._s):
|
|
101
|
+
m[key] = TRUNCATED # value cut off at EOF → present-but-garbled
|
|
102
|
+
return m
|
|
103
|
+
v = self._read_value()
|
|
104
|
+
if v is None:
|
|
105
|
+
# present key, empty/zero-width value → present-but-garbled
|
|
106
|
+
m[key] = TRUNCATED
|
|
107
|
+
self._ws()
|
|
108
|
+
if self._i < len(self._s) and self._s[self._i] == ",":
|
|
109
|
+
self._i += 1
|
|
110
|
+
continue
|
|
111
|
+
if self._i < len(self._s) and self._s[self._i] == "}":
|
|
112
|
+
self._i += 1
|
|
113
|
+
return m
|
|
114
|
+
m[key] = v
|
|
115
|
+
self._ws()
|
|
116
|
+
if self._i < len(self._s) and self._s[self._i] == ",":
|
|
117
|
+
self._i += 1 # optional/trailing comma
|
|
118
|
+
|
|
119
|
+
def _read_array(self) -> list[object]:
|
|
120
|
+
xs: list[object] = []
|
|
121
|
+
self._i += 1 # consume '['
|
|
122
|
+
while True:
|
|
123
|
+
self._ws()
|
|
124
|
+
if self._i >= len(self._s):
|
|
125
|
+
return xs
|
|
126
|
+
if self._s[self._i] == "]":
|
|
127
|
+
self._i += 1
|
|
128
|
+
return xs
|
|
129
|
+
if self._s[self._i] == "}":
|
|
130
|
+
self._i += 1
|
|
131
|
+
return xs # malformed brace-close terminates array
|
|
132
|
+
v = self._read_value()
|
|
133
|
+
if v is None:
|
|
134
|
+
# zero-width / no value → stop (no spin)
|
|
135
|
+
self._ws()
|
|
136
|
+
if self._i < len(self._s) and self._s[self._i] in ("]", "}"):
|
|
137
|
+
self._i += 1
|
|
138
|
+
return xs
|
|
139
|
+
xs.append(v)
|
|
140
|
+
self._ws()
|
|
141
|
+
if self._i < len(self._s) and self._s[self._i] == ",":
|
|
142
|
+
self._i += 1
|
|
143
|
+
elif self._i < len(self._s) and self._s[self._i] == "]":
|
|
144
|
+
self._i += 1
|
|
145
|
+
return xs
|
|
146
|
+
else:
|
|
147
|
+
# EOF or any other non-separator char → stop
|
|
148
|
+
return xs
|
|
149
|
+
|
|
150
|
+
def _read_key(self) -> str | None:
|
|
151
|
+
self._ws()
|
|
152
|
+
if self._i >= len(self._s):
|
|
153
|
+
return None
|
|
154
|
+
c = self._s[self._i]
|
|
155
|
+
if c in ('"', "'"):
|
|
156
|
+
return self._read_string(c)
|
|
157
|
+
start = self._i
|
|
158
|
+
while self._i < len(self._s) and (self._s[self._i].isalnum() or self._s[self._i] == "_"):
|
|
159
|
+
self._i += 1
|
|
160
|
+
return self._s[start : self._i] if self._i > start else None
|
|
161
|
+
|
|
162
|
+
def _read_string(self, quote: str) -> str:
|
|
163
|
+
self._i += 1 # opening quote
|
|
164
|
+
out: list[str] = []
|
|
165
|
+
esc = False
|
|
166
|
+
while self._i < len(self._s):
|
|
167
|
+
c = self._s[self._i]
|
|
168
|
+
self._i += 1
|
|
169
|
+
if esc:
|
|
170
|
+
out.append(_unescape(c))
|
|
171
|
+
esc = False
|
|
172
|
+
elif c == "\\":
|
|
173
|
+
esc = True
|
|
174
|
+
elif c == quote:
|
|
175
|
+
return "".join(out)
|
|
176
|
+
else:
|
|
177
|
+
out.append(c)
|
|
178
|
+
return "".join(out) # unterminated string → return what we have
|
|
179
|
+
|
|
180
|
+
def _read_bare_scalar(self) -> object | None:
|
|
181
|
+
start = self._i
|
|
182
|
+
while self._i < len(self._s) and self._s[self._i] not in ",}]":
|
|
183
|
+
self._i += 1
|
|
184
|
+
result = self._s[start : self._i].strip()
|
|
185
|
+
if not result:
|
|
186
|
+
return None # no token read (zero-width)
|
|
187
|
+
if result == "null":
|
|
188
|
+
return NULL_LITERAL # JSON null literal -> explicit null, NOT the string "null"
|
|
189
|
+
return result
|
|
190
|
+
|
|
191
|
+
def _ws(self) -> None:
|
|
192
|
+
while self._i < len(self._s) and self._s[self._i].isspace():
|
|
193
|
+
self._i += 1
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _unescape(c: str) -> str:
|
|
197
|
+
if c == "n":
|
|
198
|
+
return "\n"
|
|
199
|
+
if c == "t":
|
|
200
|
+
return "\t"
|
|
201
|
+
if c == "r":
|
|
202
|
+
return "\r"
|
|
203
|
+
return c
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Stages 2-3: isolate and select the payload root span.
|
|
2
|
+
|
|
3
|
+
Selection rule: first-closed-else-first-open.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def json(text: str | None) -> str | None:
|
|
11
|
+
"""First balanced ``{...}``; if none closes, first ``{`` to end; ``None`` if no ``{``."""
|
|
12
|
+
if text is None:
|
|
13
|
+
return None
|
|
14
|
+
first_open = -1
|
|
15
|
+
for i, ch in enumerate(text):
|
|
16
|
+
if ch == "{":
|
|
17
|
+
if first_open < 0:
|
|
18
|
+
first_open = i
|
|
19
|
+
end = _scan_balanced(text, i)
|
|
20
|
+
if end >= 0:
|
|
21
|
+
return text[i : end + 1]
|
|
22
|
+
return None if first_open < 0 else text[first_open:]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _scan_balanced(s: str, open_idx: int) -> int:
|
|
26
|
+
"""Return index of the matching ``}``, or -1 if unterminated. String-aware."""
|
|
27
|
+
depth = 0
|
|
28
|
+
in_str = False
|
|
29
|
+
esc = False
|
|
30
|
+
for i in range(open_idx, len(s)):
|
|
31
|
+
c = s[i]
|
|
32
|
+
if in_str:
|
|
33
|
+
if esc:
|
|
34
|
+
esc = False
|
|
35
|
+
elif c == "\\":
|
|
36
|
+
esc = True
|
|
37
|
+
elif c == '"':
|
|
38
|
+
in_str = False
|
|
39
|
+
continue
|
|
40
|
+
if c == '"':
|
|
41
|
+
in_str = True
|
|
42
|
+
elif c == "{":
|
|
43
|
+
depth += 1
|
|
44
|
+
elif c == "}":
|
|
45
|
+
depth -= 1
|
|
46
|
+
if depth == 0:
|
|
47
|
+
return i
|
|
48
|
+
return -1
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def xml(text: str | None, root_name: str | None, case_insensitive: bool) -> str | None:
|
|
52
|
+
"""Span of ``<root>...</root>``; if close absent, opener to end; ``None`` if no opener."""
|
|
53
|
+
if text is None or root_name is None:
|
|
54
|
+
return None
|
|
55
|
+
flags = re.IGNORECASE if case_insensitive else 0
|
|
56
|
+
open_re = re.compile("<" + re.escape(root_name) + r"(\s[^>]*)?>", flags)
|
|
57
|
+
open_m = open_re.search(text)
|
|
58
|
+
if open_m is None:
|
|
59
|
+
return None
|
|
60
|
+
start = open_m.start()
|
|
61
|
+
close_re = re.compile("</" + re.escape(root_name) + r"\s*>", flags)
|
|
62
|
+
close_m = close_re.search(text, open_m.end())
|
|
63
|
+
if close_m is not None:
|
|
64
|
+
return text[start : close_m.end()]
|
|
65
|
+
return text[start:]
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""FR-011: enum-variant normalization for the Coerce stage.
|
|
2
|
+
|
|
3
|
+
ASCII-only by design: enum members are ASCII identifiers, so a pure ``[A-Za-z0-9]``
|
|
4
|
+
transform is byte-identical across ports and sidesteps locale case-folding
|
|
5
|
+
(Turkish-I). The mode comes from the ``@normalize`` attr (``none|collapse|strip``;
|
|
6
|
+
default ``strip``). Mirrors the TS ``normalizeEnum``, the C# ``Normalize.Enum``, and
|
|
7
|
+
the Java ``Normalize.enumValue``.
|
|
8
|
+
|
|
9
|
+
**Uppercasing is a MANUAL a-z -> A-Z fold**, NOT :py:meth:`str.upper` (which is
|
|
10
|
+
Unicode-aware and would diverge on non-ASCII). Only the 26 ASCII lowercase letters
|
|
11
|
+
are folded; every other code point passes through unchanged.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import Final
|
|
16
|
+
|
|
17
|
+
# FR-011 @normalize modes — the closed cross-port vocabulary.
|
|
18
|
+
NONE: Final = "none" # exact match only (no normalization)
|
|
19
|
+
COLLAPSE: Final = "collapse" # ASCII-upper + trim + collapse runs of [\s_-]+ to "_"
|
|
20
|
+
STRIP: Final = "strip" # ASCII-upper + keep only [A-Z0-9]
|
|
21
|
+
DEFAULT: Final = STRIP # default when absent on both field and owning object
|
|
22
|
+
|
|
23
|
+
# The separator set for COLLAPSE: the JS `\s` whitespace class PLUS `_` and `-`.
|
|
24
|
+
# The corpus only exercises ASCII space, but the full set keeps byte-identical
|
|
25
|
+
# cross-port parity with the Java/C#/TS ports' isSeparator.
|
|
26
|
+
_SEPARATORS: Final = frozenset(
|
|
27
|
+
{
|
|
28
|
+
"_",
|
|
29
|
+
"-",
|
|
30
|
+
" ", # space
|
|
31
|
+
"\t", # tab
|
|
32
|
+
"\n", # line feed
|
|
33
|
+
"\v", # vertical tab (U+000B)
|
|
34
|
+
"\f", # form feed
|
|
35
|
+
"\r", # carriage return
|
|
36
|
+
" ", # no-break space
|
|
37
|
+
" ", # ogham space mark
|
|
38
|
+
"
", # line separator
|
|
39
|
+
"
", # paragraph separator
|
|
40
|
+
" ", # narrow no-break space
|
|
41
|
+
" ", # medium mathematical space
|
|
42
|
+
" ", # ideographic space
|
|
43
|
+
"", # zero-width no-break space (BOM)
|
|
44
|
+
}
|
|
45
|
+
# U+2000..U+200A (en/em/thin spaces etc.) — the rest of the JS \s range.
|
|
46
|
+
| {chr(c) for c in range(0x2000, 0x200B)}
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def normalize_enum(s: str, mode: str) -> str:
|
|
51
|
+
"""ASCII-only enum normalization. Pure ``[A-Za-z0-9]`` transform → byte-identical cross-port.
|
|
52
|
+
|
|
53
|
+
* ``none`` — identity.
|
|
54
|
+
* ``collapse`` — ASCII-upper + trim + collapse runs of ``[\\s_-]+`` to a single ``_``.
|
|
55
|
+
* ``strip`` — ASCII-upper + keep only ``[A-Z0-9]``.
|
|
56
|
+
|
|
57
|
+
Any unknown mode is treated as ``strip`` (the default), matching the cross-port resolver.
|
|
58
|
+
"""
|
|
59
|
+
if mode == NONE:
|
|
60
|
+
return s
|
|
61
|
+
up = _ascii_upper(s.strip())
|
|
62
|
+
return _collapse_separators(up) if mode == COLLAPSE else _strip_non_alnum(up)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _ascii_upper(s: str) -> str:
|
|
66
|
+
"""ASCII-only uppercasing (a-z -> A-Z); all other code points pass through unchanged."""
|
|
67
|
+
out: list[str] = []
|
|
68
|
+
for ch in s:
|
|
69
|
+
c = ord(ch)
|
|
70
|
+
out.append(chr(c - 32) if 0x61 <= c <= 0x7A else ch)
|
|
71
|
+
return "".join(out)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _collapse_separators(s: str) -> str:
|
|
75
|
+
"""Collapse runs of whitespace / underscore / hyphen into a single ``_`` (mirrors ``/[\\s_-]+/g -> "_"``)."""
|
|
76
|
+
out: list[str] = []
|
|
77
|
+
in_run = False
|
|
78
|
+
for ch in s:
|
|
79
|
+
if ch in _SEPARATORS:
|
|
80
|
+
if not in_run:
|
|
81
|
+
out.append("_")
|
|
82
|
+
in_run = True
|
|
83
|
+
else:
|
|
84
|
+
out.append(ch)
|
|
85
|
+
in_run = False
|
|
86
|
+
return "".join(out)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _strip_non_alnum(s: str) -> str:
|
|
90
|
+
"""Keep only ``[A-Z0-9]`` (mirrors ``/[^A-Z0-9]/g -> ""`` on an already-uppercased string)."""
|
|
91
|
+
out: list[str] = []
|
|
92
|
+
for ch in s:
|
|
93
|
+
c = ord(ch)
|
|
94
|
+
if (0x41 <= c <= 0x5A) or (0x30 <= c <= 0x39):
|
|
95
|
+
out.append(ch)
|
|
96
|
+
return "".join(out)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Stage 1: remove markdown code-fence markers.
|
|
2
|
+
|
|
3
|
+
Prose around the payload is left for ``locate`` to handle.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
|
|
9
|
+
# Captures the body inside a fenced block; optional language tag (json/xml/etc) is dropped.
|
|
10
|
+
_FENCE = re.compile(r"```[a-zA-Z0-9_-]*[ \t]*\r?\n(.*?)\r?\n?```", re.DOTALL)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def strip(raw: str | None) -> str:
|
|
14
|
+
"""Strip the first markdown code fence, splicing its body back in place; else trim."""
|
|
15
|
+
if raw is None:
|
|
16
|
+
return ""
|
|
17
|
+
m = _FENCE.search(raw)
|
|
18
|
+
if m:
|
|
19
|
+
return (raw[: m.start()] + m.group(1) + raw[m.end():]).strip()
|
|
20
|
+
return raw.strip()
|