generaltranslation-icu-messageformat-parser 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,123 @@
1
+ # Functional Source License, Version 1.1, ALv2 Future License
2
+
3
+ ## Abbreviation
4
+
5
+ FSL-1.1-ALv2
6
+
7
+ ## Notice
8
+
9
+ Copyright 2026 General Translation, Inc.
10
+
11
+ ## Terms and Conditions
12
+
13
+ ### Licensor ("We")
14
+
15
+ The party offering the Software under these Terms and Conditions.
16
+
17
+ ### The Software
18
+
19
+ The "Software" is each version of the software that we make available under
20
+ these Terms and Conditions, as indicated by our inclusion of these Terms and
21
+ Conditions with the Software.
22
+
23
+ ### License Grant
24
+
25
+ Subject to your compliance with this License Grant and the Patents,
26
+ Redistribution and Trademark clauses below, we hereby grant you the right to
27
+ use, copy, modify, create derivative works, publicly perform, publicly display
28
+ and redistribute the Software for any Permitted Purpose identified below.
29
+
30
+ ### Permitted Purpose
31
+
32
+ A Permitted Purpose is any purpose other than a Competing Use. A Competing Use
33
+ means making the Software available to others in a commercial product or
34
+ service that:
35
+
36
+ 1. substitutes for the Software;
37
+
38
+ 2. substitutes for any other product or service we offer using the Software
39
+ that exists as of the date we make the Software available; or
40
+
41
+ 3. offers the same or substantially similar functionality as the Software.
42
+
43
+ Permitted Purposes specifically include using the Software:
44
+
45
+ 1. for your internal use and access;
46
+
47
+ 2. for non-commercial education;
48
+
49
+ 3. for non-commercial research; and
50
+
51
+ 4. in connection with professional services that you provide to a licensee
52
+ using the Software in accordance with these Terms and Conditions.
53
+
54
+ ### Patents
55
+
56
+ To the extent your use for a Permitted Purpose would necessarily infringe our
57
+ patents, the license grant above includes a license under our patents. If you
58
+ make a claim against any party that the Software infringes or contributes to
59
+ the infringement of any patent, then your patent license to the Software ends
60
+ immediately.
61
+
62
+ ### Redistribution
63
+
64
+ The Terms and Conditions apply to all copies, modifications and derivatives of
65
+ the Software.
66
+
67
+ If you redistribute any copies, modifications or derivatives of the Software,
68
+ you must include a copy of or a link to these Terms and Conditions and not
69
+ remove any copyright notices provided in or with the Software.
70
+
71
+ ### Disclaimer
72
+
73
+ THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR
74
+ IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR
75
+ PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT.
76
+
77
+ IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE
78
+ SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES,
79
+ EVEN IF WE HAVE BEEN INFORMED OF THEIR POSSIBILITY IN ADVANCE.
80
+
81
+ ### Trademarks
82
+
83
+ Except for displaying the License Details and identifying us as the origin of
84
+ the Software, you have no right under these Terms and Conditions to use our
85
+ trademarks, trade names, service marks or product names.
86
+
87
+ ## Grant of Future License
88
+
89
+ We hereby irrevocably grant you an additional license to use the Software under
90
+ the Apache License, Version 2.0 that is effective on the second anniversary of
91
+ the date we make the Software available. On or after that date, you may use the
92
+ Software under the Apache License, Version 2.0, in which case the following
93
+ will apply:
94
+
95
+ Licensed under the Apache License, Version 2.0 (the "License"); you may not use
96
+ this file except in compliance with the License.
97
+
98
+ You may obtain a copy of the License at
99
+
100
+ http://www.apache.org/licenses/LICENSE-2.0
101
+
102
+ Unless required by applicable law or agreed to in writing, software distributed
103
+ under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
104
+ CONDITIONS OF ANY KIND, either express or implied. See the License for the
105
+ specific language governing permissions and limitations under the License.
106
+
107
+ ---
108
+
109
+ ## Third-Party Attribution
110
+
111
+ This software includes code derived from
112
+ [pyicumessageformat](https://github.com/SirStendec/pyicumessageformat),
113
+ Copyright (c) 2021 Mike deBeaubien, licensed under the MIT License:
114
+
115
+ > Permission is hereby granted, free of charge, to any person obtaining a copy
116
+ > of this software and associated documentation files (the "Software"), to deal
117
+ > in the Software without restriction, including without limitation the rights
118
+ > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
119
+ > copies of the Software, and to permit persons to whom the Software is
120
+ > furnished to do so, subject to the following conditions:
121
+ >
122
+ > The above copyright notice and this permission notice shall be included in all
123
+ > copies or substantial portions of the Software.
@@ -0,0 +1,155 @@
1
+ Metadata-Version: 2.4
2
+ Name: generaltranslation-icu-messageformat-parser
3
+ Version: 0.0.0
4
+ Summary: ICU MessageFormat parser with whitespace-preserving AST and string reconstruction
5
+ Author: General Translation, Inc.
6
+ Author-email: General Translation, Inc. <support@generaltranslation.com>
7
+ License-Expression: FSL-1.1-ALv2
8
+ License-File: LICENSE.md
9
+ Requires-Python: >=3.10
10
+ Description-Content-Type: text/markdown
11
+
12
+ # generaltranslation-icu-messageformat-parser
13
+
14
+ A pure-Python ICU MessageFormat parser with whitespace-preserving AST and string reconstruction. Python equivalent of [`@formatjs/icu-messageformat-parser`](https://www.npmjs.com/package/@formatjs/icu-messageformat-parser).
15
+
16
+ Derived from [pyicumessageformat](https://github.com/SirStendec/pyicumessageformat) by Mike deBeaubien (MIT license).
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ pip install generaltranslation-icu-messageformat-parser
22
+ ```
23
+
24
+ No dependencies. Pure Python. Requires Python 3.10+.
25
+
26
+ ## Quick Start
27
+
28
+ ```python
29
+ from generaltranslation_icu_messageformat_parser import Parser, print_ast
30
+
31
+ parser = Parser()
32
+ ast = parser.parse("{count, plural, one {# item} other {# items}}")
33
+ # [{'name': 'count', 'type': 'plural', 'offset': 0, 'options': {'one': [{'type': 'number', 'name': 'count', 'hash': True}, ' item'], 'other': [{'type': 'number', 'name': 'count', 'hash': True}, ' items']}}]
34
+ ```
35
+
36
+ ## API
37
+
38
+ ### `Parser(options=None)`
39
+
40
+ Create a parser instance with optional configuration.
41
+
42
+ **Options dict keys:**
43
+
44
+ | Option | Type | Default | Description |
45
+ |---|---|---|---|
46
+ | `subnumeric_types` | `list[str]` | `['plural', 'selectordinal']` | Types that support `#` hash replacement |
47
+ | `submessage_types` | `list[str]` | `['plural', 'selectordinal', 'select']` | Types with sub-message branches |
48
+ | `maximum_depth` | `int` | `50` | Maximum nesting depth |
49
+ | `allow_tags` | `bool` | `False` | Enable XML-style `<tag>` parsing |
50
+ | `strict_tags` | `bool` | `False` | Strict tag parsing mode |
51
+ | `tag_prefix` | `str \| None` | `None` | Required tag name prefix |
52
+ | `tag_type` | `str` | `'tag'` | AST node type string for tags |
53
+ | `include_indices` | `bool` | `False` | Include `start`/`end` positions in AST nodes |
54
+ | `loose_submessages` | `bool` | `False` | Allow loose submessage parsing |
55
+ | `allow_format_spaces` | `bool` | `True` | Allow spaces in format strings |
56
+ | `require_other` | `bool` | `True` | Require `other` branch in plural/select |
57
+ | `preserve_whitespace` | `bool` | `False` | Store whitespace in `_ws` dict on AST nodes for lossless round-trips |
58
+
59
+ ### `Parser.parse(input, tokens=None)`
60
+
61
+ Parse an ICU MessageFormat string into an AST.
62
+
63
+ **Args:**
64
+ - `input` (`str`): The ICU MessageFormat string to parse.
65
+ - `tokens` (`list | None`): Optional list to populate with token objects for low-level analysis.
66
+
67
+ **Returns:** `list` — A list of AST nodes (strings and dicts).
68
+
69
+ **Raises:** `SyntaxError` on malformed input, `TypeError` if input is not a string.
70
+
71
+ ### `print_ast(ast)`
72
+
73
+ Reconstruct an ICU MessageFormat string from an AST.
74
+
75
+ **Args:**
76
+ - `ast` (`list`): The AST as returned by `Parser.parse()`.
77
+
78
+ **Returns:** `str` — The reconstructed ICU MessageFormat string.
79
+
80
+ When the AST contains `_ws` whitespace metadata (from `preserve_whitespace=True`), reconstruction is lossless — the output exactly matches the original input. Without whitespace metadata, normalized spacing is used.
81
+
82
+ ## AST Node Types
83
+
84
+ ### String literal
85
+ Plain strings appear directly in the AST list:
86
+ ```python
87
+ parser.parse("Hello world")
88
+ # ["Hello world"]
89
+ ```
90
+
91
+ ### Simple variable `{name}`
92
+ ```python
93
+ {"name": "username"}
94
+ ```
95
+
96
+ ### Typed placeholder `{name, type, style}`
97
+ ```python
98
+ {"name": "amount", "type": "number", "format": "::currency/USD"}
99
+ ```
100
+
101
+ ### Plural / selectordinal `{n, plural, ...}`
102
+ ```python
103
+ {
104
+ "name": "count",
105
+ "type": "plural", # or "selectordinal"
106
+ "offset": 0, # offset value (0 if none)
107
+ "options": {
108
+ "one": [{"type": "number", "name": "count", "hash": True}, " item"],
109
+ "other": [{"type": "number", "name": "count", "hash": True}, " items"],
110
+ "=0": ["no items"], # exact match keys
111
+ }
112
+ }
113
+ ```
114
+
115
+ ### Select `{gender, select, ...}`
116
+ ```python
117
+ {
118
+ "name": "gender",
119
+ "type": "select",
120
+ "options": {
121
+ "male": ["He"],
122
+ "female": ["She"],
123
+ "other": ["They"],
124
+ }
125
+ }
126
+ ```
127
+
128
+ ### Hash `#` (inside plural/selectordinal)
129
+ ```python
130
+ {"type": "number", "name": "count", "hash": True}
131
+ ```
132
+
133
+ ### With `include_indices=True`
134
+ All dict nodes gain `start` and `end` integer fields indicating byte positions in the original string.
135
+
136
+ ### With `preserve_whitespace=True`
137
+ Dict nodes gain a `_ws` dict storing whitespace at each structural position, enabling lossless `print_ast()` round-trips.
138
+
139
+ ## Supported ICU Features
140
+
141
+ - Simple variable interpolation: `{name}`
142
+ - Plural with CLDR categories: `{n, plural, one {...} other {...}}`
143
+ - Exact match: `{n, plural, =0 {...} =1 {...} other {...}}`
144
+ - Plural offset: `{n, plural, offset:1 ...}`
145
+ - Selectordinal: `{n, selectordinal, one {#st} two {#nd} few {#rd} other {#th}}`
146
+ - Select: `{gender, select, male {...} female {...} other {...}}`
147
+ - Nested expressions: plural inside select, select inside plural, etc.
148
+ - Typed placeholders: `{amount, number}`, `{d, date, short}`
149
+ - ICU escape sequences: `''` for literal quote, `'{...}'` for literal braces
150
+ - Hash `#` replacement inside plural/selectordinal branches
151
+ - XML-style tags (opt-in): `<bold>text</bold>`
152
+
153
+ ## Known Limitations
154
+
155
+ - **Escape sequences are consumed during parsing.** `''` becomes `'` and `'{...}'` becomes `{...}` in the AST. These cannot be reconstructed by `print_ast()`. This matches the behavior of `@formatjs/icu-messageformat-parser`.
@@ -0,0 +1,144 @@
1
+ # generaltranslation-icu-messageformat-parser
2
+
3
+ A pure-Python ICU MessageFormat parser with whitespace-preserving AST and string reconstruction. Python equivalent of [`@formatjs/icu-messageformat-parser`](https://www.npmjs.com/package/@formatjs/icu-messageformat-parser).
4
+
5
+ Derived from [pyicumessageformat](https://github.com/SirStendec/pyicumessageformat) by Mike deBeaubien (MIT license).
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install generaltranslation-icu-messageformat-parser
11
+ ```
12
+
13
+ No dependencies. Pure Python. Requires Python 3.10+.
14
+
15
+ ## Quick Start
16
+
17
+ ```python
18
+ from generaltranslation_icu_messageformat_parser import Parser, print_ast
19
+
20
+ parser = Parser()
21
+ ast = parser.parse("{count, plural, one {# item} other {# items}}")
22
+ # [{'name': 'count', 'type': 'plural', 'offset': 0, 'options': {'one': [{'type': 'number', 'name': 'count', 'hash': True}, ' item'], 'other': [{'type': 'number', 'name': 'count', 'hash': True}, ' items']}}]
23
+ ```
24
+
25
+ ## API
26
+
27
+ ### `Parser(options=None)`
28
+
29
+ Create a parser instance with optional configuration.
30
+
31
+ **Options dict keys:**
32
+
33
+ | Option | Type | Default | Description |
34
+ |---|---|---|---|
35
+ | `subnumeric_types` | `list[str]` | `['plural', 'selectordinal']` | Types that support `#` hash replacement |
36
+ | `submessage_types` | `list[str]` | `['plural', 'selectordinal', 'select']` | Types with sub-message branches |
37
+ | `maximum_depth` | `int` | `50` | Maximum nesting depth |
38
+ | `allow_tags` | `bool` | `False` | Enable XML-style `<tag>` parsing |
39
+ | `strict_tags` | `bool` | `False` | Strict tag parsing mode |
40
+ | `tag_prefix` | `str \| None` | `None` | Required tag name prefix |
41
+ | `tag_type` | `str` | `'tag'` | AST node type string for tags |
42
+ | `include_indices` | `bool` | `False` | Include `start`/`end` positions in AST nodes |
43
+ | `loose_submessages` | `bool` | `False` | Allow loose submessage parsing |
44
+ | `allow_format_spaces` | `bool` | `True` | Allow spaces in format strings |
45
+ | `require_other` | `bool` | `True` | Require `other` branch in plural/select |
46
+ | `preserve_whitespace` | `bool` | `False` | Store whitespace in `_ws` dict on AST nodes for lossless round-trips |
47
+
48
+ ### `Parser.parse(input, tokens=None)`
49
+
50
+ Parse an ICU MessageFormat string into an AST.
51
+
52
+ **Args:**
53
+ - `input` (`str`): The ICU MessageFormat string to parse.
54
+ - `tokens` (`list | None`): Optional list to populate with token objects for low-level analysis.
55
+
56
+ **Returns:** `list` — A list of AST nodes (strings and dicts).
57
+
58
+ **Raises:** `SyntaxError` on malformed input, `TypeError` if input is not a string.
59
+
60
+ ### `print_ast(ast)`
61
+
62
+ Reconstruct an ICU MessageFormat string from an AST.
63
+
64
+ **Args:**
65
+ - `ast` (`list`): The AST as returned by `Parser.parse()`.
66
+
67
+ **Returns:** `str` — The reconstructed ICU MessageFormat string.
68
+
69
+ When the AST contains `_ws` whitespace metadata (from `preserve_whitespace=True`), reconstruction is lossless — the output exactly matches the original input. Without whitespace metadata, normalized spacing is used.
70
+
71
+ ## AST Node Types
72
+
73
+ ### String literal
74
+ Plain strings appear directly in the AST list:
75
+ ```python
76
+ parser.parse("Hello world")
77
+ # ["Hello world"]
78
+ ```
79
+
80
+ ### Simple variable `{name}`
81
+ ```python
82
+ {"name": "username"}
83
+ ```
84
+
85
+ ### Typed placeholder `{name, type, style}`
86
+ ```python
87
+ {"name": "amount", "type": "number", "format": "::currency/USD"}
88
+ ```
89
+
90
+ ### Plural / selectordinal `{n, plural, ...}`
91
+ ```python
92
+ {
93
+ "name": "count",
94
+ "type": "plural", # or "selectordinal"
95
+ "offset": 0, # offset value (0 if none)
96
+ "options": {
97
+ "one": [{"type": "number", "name": "count", "hash": True}, " item"],
98
+ "other": [{"type": "number", "name": "count", "hash": True}, " items"],
99
+ "=0": ["no items"], # exact match keys
100
+ }
101
+ }
102
+ ```
103
+
104
+ ### Select `{gender, select, ...}`
105
+ ```python
106
+ {
107
+ "name": "gender",
108
+ "type": "select",
109
+ "options": {
110
+ "male": ["He"],
111
+ "female": ["She"],
112
+ "other": ["They"],
113
+ }
114
+ }
115
+ ```
116
+
117
+ ### Hash `#` (inside plural/selectordinal)
118
+ ```python
119
+ {"type": "number", "name": "count", "hash": True}
120
+ ```
121
+
122
+ ### With `include_indices=True`
123
+ All dict nodes gain `start` and `end` integer fields indicating byte positions in the original string.
124
+
125
+ ### With `preserve_whitespace=True`
126
+ Dict nodes gain a `_ws` dict storing whitespace at each structural position, enabling lossless `print_ast()` round-trips.
127
+
128
+ ## Supported ICU Features
129
+
130
+ - Simple variable interpolation: `{name}`
131
+ - Plural with CLDR categories: `{n, plural, one {...} other {...}}`
132
+ - Exact match: `{n, plural, =0 {...} =1 {...} other {...}}`
133
+ - Plural offset: `{n, plural, offset:1 ...}`
134
+ - Selectordinal: `{n, selectordinal, one {#st} two {#nd} few {#rd} other {#th}}`
135
+ - Select: `{gender, select, male {...} female {...} other {...}}`
136
+ - Nested expressions: plural inside select, select inside plural, etc.
137
+ - Typed placeholders: `{amount, number}`, `{d, date, short}`
138
+ - ICU escape sequences: `''` for literal quote, `'{...}'` for literal braces
139
+ - Hash `#` replacement inside plural/selectordinal branches
140
+ - XML-style tags (opt-in): `<bold>text</bold>`
141
+
142
+ ## Known Limitations
143
+
144
+ - **Escape sequences are consumed during parsing.** `''` becomes `'` and `'{...}'` becomes `{...}` in the AST. These cannot be reconstructed by `print_ast()`. This matches the behavior of `@formatjs/icu-messageformat-parser`.
@@ -0,0 +1,16 @@
1
+ [project]
2
+ name = "generaltranslation-icu-messageformat-parser"
3
+ version = "0.0.0"
4
+ description = "ICU MessageFormat parser with whitespace-preserving AST and string reconstruction"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "General Translation, Inc.", email = "support@generaltranslation.com" }
8
+ ]
9
+ requires-python = ">=3.10"
10
+ license = "FSL-1.1-ALv2"
11
+ license-files = ["LICENSE.md"]
12
+ dependencies = []
13
+
14
+ [build-system]
15
+ requires = ["uv_build>=0.10.8,<0.11.0"]
16
+ build-backend = "uv_build"
@@ -0,0 +1,13 @@
1
+ """ICU MessageFormat parser with whitespace-preserving AST and string reconstruction.
2
+
3
+ A Python equivalent of ``@formatjs/icu-messageformat-parser``. Parses ICU
4
+ MessageFormat strings into ASTs and reconstructs strings from ASTs.
5
+
6
+ Derived from `pyicumessageformat <https://github.com/SirStendec/pyicumessageformat>`_
7
+ by Mike deBeaubien (MIT license).
8
+ """
9
+
10
+ from generaltranslation_icu_messageformat_parser._parser import Parser
11
+ from generaltranslation_icu_messageformat_parser._printer import print_ast
12
+
13
+ __all__ = ["Parser", "print_ast"]
@@ -0,0 +1,38 @@
1
+ """ICU MessageFormat parser constants.
2
+
3
+ Derived from pyicumessageformat by Mike deBeaubien (MIT).
4
+ """
5
+
6
+ CHAR_OPEN = "{"
7
+ CHAR_CLOSE = "}"
8
+ CHAR_TAG_OPEN = "<"
9
+ CHAR_TAG_CLOSE = "/"
10
+ CHAR_TAG_END = ">"
11
+ CHAR_SEP = ","
12
+ CHAR_HASH = "#"
13
+ CHAR_ESCAPE = "'"
14
+
15
+ OFFSET = "offset:"
16
+
17
+ VAR_CHARS = [CHAR_OPEN, CHAR_CLOSE]
18
+
19
+ TAG_CHARS = [CHAR_TAG_OPEN, CHAR_TAG_CLOSE, CHAR_TAG_END]
20
+
21
+ TAG_CLOSING = CHAR_TAG_CLOSE + CHAR_TAG_END
22
+ TAG_END = CHAR_TAG_OPEN + CHAR_TAG_CLOSE
23
+
24
+ SPACE_CHARS = [
25
+ 0x20,
26
+ 0x85,
27
+ 0xA0,
28
+ 0x180E,
29
+ 0x2028,
30
+ 0x2029,
31
+ 0x202F,
32
+ 0x205F,
33
+ 0x2060,
34
+ 0x3000,
35
+ 0xFEFF,
36
+ ]
37
+
38
+ CLOSE_TAG = {}
@@ -0,0 +1,686 @@
1
+ """ICU MessageFormat parser.
2
+
3
+ Derived from pyicumessageformat by Mike deBeaubien (MIT).
4
+ Enhanced with whitespace-preserving AST support.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from . import _constants as constants
10
+
11
+ SEP_OR_CLOSE = f"{constants.CHAR_SEP} or {constants.CHAR_CLOSE}"
12
+
13
+
14
+ def _append_token(context: dict, type: str, text: str) -> None:
15
+ if "tokens" in context:
16
+ context["tokens"].append({"type": type, "text": text})
17
+
18
+
19
+ def _is_alpha(char: str) -> bool:
20
+ if not char:
21
+ return False
22
+ code = ord(char)
23
+ return (97 <= code <= 122) or (65 <= code <= 90)
24
+
25
+
26
+ def _is_digit(char: str) -> bool:
27
+ if not char:
28
+ return False
29
+ code = ord(char)
30
+ return 0x30 <= code <= 0x39
31
+
32
+
33
+ def _is_space(char: str) -> bool:
34
+ if not char:
35
+ return False
36
+ code = ord(char)
37
+ return (
38
+ code in constants.SPACE_CHARS
39
+ or (0x09 <= code <= 0x0D)
40
+ or (0x2000 <= code <= 0x200D)
41
+ )
42
+
43
+
44
+ def _skip_space(context: dict, ret: bool = False) -> str:
45
+ msg = context["msg"]
46
+ length = context["length"]
47
+ start = context["i"]
48
+ if start >= length:
49
+ return ""
50
+
51
+ while context["i"] < length and _is_space(msg[context["i"]]):
52
+ context["i"] += 1
53
+
54
+ captured = msg[start : context["i"]]
55
+ if ret:
56
+ return captured
57
+ elif start < context["i"]:
58
+ _append_token(context, "space", captured)
59
+ return captured
60
+
61
+
62
+ def _recursion(context: dict) -> SyntaxError:
63
+ return SyntaxError(f"Too much recursion at position {context['i']}")
64
+
65
+
66
+ def _unexpected(char, index=None) -> SyntaxError:
67
+ if isinstance(char, dict):
68
+ index = char["i"]
69
+ c = char["msg"][index] if index < char["length"] else "<EOF>"
70
+ return _unexpected(c, index)
71
+ return SyntaxError(f'Unexpected "{char}" at position {index}')
72
+
73
+
74
+ def _expected(char, found, index=None) -> SyntaxError:
75
+ if isinstance(found, dict):
76
+ index = found["i"]
77
+ f = found["msg"][index] if index < found["length"] else "<EOF>"
78
+ return _expected(char, f, index)
79
+ return SyntaxError(
80
+ f'Expected {char} at position {index} but found "{found if found else "<EOF>"}"'
81
+ )
82
+
83
+
84
+ class Parser:
85
+ """ICU MessageFormat parser.
86
+
87
+ Parses ICU MessageFormat strings into ASTs (list of dicts/strings).
88
+
89
+ Args:
90
+ options: Parser configuration dict. Supported keys:
91
+
92
+ - ``subnumeric_types``: Types that support ``#`` (default: ``['plural', 'selectordinal']``)
93
+ - ``submessage_types``: Types with sub-messages (default: ``['plural', 'selectordinal', 'select']``)
94
+ - ``maximum_depth``: Max nesting depth (default: ``50``)
95
+ - ``allow_tags``: Enable XML-style tag parsing (default: ``False``)
96
+ - ``strict_tags``: Strict tag parsing mode (default: ``False``)
97
+ - ``tag_prefix``: Required tag name prefix (default: ``None``)
98
+ - ``tag_type``: AST node type for tags (default: ``'tag'``)
99
+ - ``include_indices``: Include ``start``/``end`` in AST nodes (default: ``False``)
100
+ - ``loose_submessages``: Allow loose submessage parsing (default: ``False``)
101
+ - ``allow_format_spaces``: Allow spaces in format strings (default: ``True``)
102
+ - ``require_other``: Require ``other`` branch (default: ``True``)
103
+ - ``preserve_whitespace``: Store whitespace in ``_ws`` dict on AST nodes (default: ``False``)
104
+ """
105
+
106
+ def __init__(self, options: dict | None = None) -> None:
107
+ self.options = {
108
+ "subnumeric_types": ["plural", "selectordinal"],
109
+ "submessage_types": ["plural", "selectordinal", "select"],
110
+ "maximum_depth": 50,
111
+ "allow_tags": False,
112
+ "strict_tags": False,
113
+ "tag_prefix": None,
114
+ "tag_type": "tag",
115
+ "include_indices": False,
116
+ "loose_submessages": False,
117
+ "allow_format_spaces": True,
118
+ "require_other": True,
119
+ "preserve_whitespace": False,
120
+ }
121
+ if isinstance(options, dict):
122
+ self.options.update(options)
123
+
124
+ def parse(self, input: str, tokens: list | None = None) -> list:
125
+ """Parse an ICU MessageFormat string into an AST.
126
+
127
+ Args:
128
+ input: The ICU MessageFormat string to parse.
129
+ tokens: Optional list to populate with token objects.
130
+
131
+ Returns:
132
+ A list of AST nodes (strings and dicts).
133
+ """
134
+ if not isinstance(input, str):
135
+ raise TypeError("input must be string")
136
+
137
+ context = {
138
+ "msg": input,
139
+ "length": len(input),
140
+ "i": 0,
141
+ "depth": 0,
142
+ }
143
+
144
+ if tokens is not None:
145
+ if not isinstance(tokens, list):
146
+ raise TypeError("tokens must be list or None")
147
+ context["tokens"] = tokens
148
+
149
+ try:
150
+ return self._parse_ast(context, None)
151
+ except RecursionError:
152
+ raise _recursion(context)
153
+ except IndexError:
154
+ raise SyntaxError
155
+
156
+ def _parse_ast(self, context: dict, parent: dict | None) -> list:
157
+ msg = context["msg"]
158
+ length = context["length"]
159
+ start = context["i"]
160
+ out: list = []
161
+
162
+ text = self._parse_text(context, parent)
163
+ if text:
164
+ out.append(text)
165
+ _append_token(context, "text", msg[start : context["i"]])
166
+
167
+ while context["i"] < length:
168
+ i = context["i"]
169
+ char = msg[i]
170
+ if char == constants.CHAR_CLOSE:
171
+ if not parent:
172
+ raise _unexpected(context)
173
+ break
174
+
175
+ if (
176
+ parent
177
+ and self.options["allow_tags"]
178
+ and msg[i : i + len(constants.TAG_END)] == constants.TAG_END
179
+ and self._can_read_tag(context, parent, True)
180
+ ):
181
+ break
182
+
183
+ out.append(self._parse_placeholder(context, parent))
184
+ start = context["i"]
185
+ text = self._parse_text(context, parent)
186
+ if text:
187
+ out.append(text)
188
+ _append_token(context, "text", msg[start : context["i"]])
189
+
190
+ return out
191
+
192
+ def _can_read_tag(
193
+ self, context: dict, parent: dict | None, require_closing: bool = False
194
+ ) -> bool:
195
+ msg = context["msg"]
196
+ length = context["length"]
197
+ current = context["i"]
198
+ if not self.options["allow_tags"]:
199
+ return False
200
+
201
+ char = msg[current] if current < length else None
202
+ if char != constants.CHAR_TAG_OPEN:
203
+ return False
204
+
205
+ if self.options["strict_tags"] and not require_closing:
206
+ return True
207
+
208
+ current += 1
209
+ char = msg[current] if current < length else None
210
+
211
+ if char == constants.CHAR_TAG_CLOSE:
212
+ if self.options["strict_tags"]:
213
+ return True
214
+ current += 1
215
+ char = msg[current] if current < length else None
216
+ elif require_closing:
217
+ return False
218
+
219
+ if self.options["tag_prefix"]:
220
+ prefix = self.options["tag_prefix"]
221
+ return prefix == msg[current : current + len(prefix)]
222
+ elif _is_alpha(char):
223
+ return True
224
+
225
+ return False
226
+
227
+ def _parse_text(
228
+ self,
229
+ context: dict,
230
+ parent: dict | None,
231
+ is_arg_style: bool = False,
232
+ ) -> str:
233
+ msg = context["msg"]
234
+ length = context["length"]
235
+ start = context["i"]
236
+ is_hash_special = (
237
+ parent and parent["type"] in self.options["subnumeric_types"]
238
+ )
239
+ is_tag_special = self.options["allow_tags"]
240
+ allow_arg_spaces = self.options["allow_format_spaces"]
241
+
242
+ text = ""
243
+ trailing_space = 0
244
+
245
+ while context["i"] < length:
246
+ char = msg[context["i"]]
247
+ is_sp = _is_space(char)
248
+
249
+ if (
250
+ char in constants.VAR_CHARS
251
+ or (is_hash_special and char == constants.CHAR_HASH)
252
+ or (
253
+ is_tag_special
254
+ and char == constants.CHAR_TAG_OPEN
255
+ and self._can_read_tag(context, parent)
256
+ )
257
+ or (is_arg_style and not allow_arg_spaces and is_sp)
258
+ ):
259
+ break
260
+
261
+ if is_sp:
262
+ trailing_space += 1
263
+ else:
264
+ trailing_space = 0
265
+
266
+ if char == constants.CHAR_ESCAPE:
267
+ context["i"] += 1
268
+ if context["i"] < length:
269
+ char = msg[context["i"]]
270
+ if char == constants.CHAR_ESCAPE:
271
+ text += char
272
+ context["i"] += 1
273
+ elif (
274
+ char in constants.VAR_CHARS
275
+ or (is_hash_special and char == constants.CHAR_HASH)
276
+ or char == constants.CHAR_TAG_OPEN
277
+ or char == constants.CHAR_TAG_END
278
+ or is_arg_style
279
+ ):
280
+ text += char
281
+ context["i"] += 1
282
+ while context["i"] < length:
283
+ nxt = msg[context["i"]]
284
+ if nxt == constants.CHAR_ESCAPE:
285
+ context["i"] += 1
286
+ if (
287
+ context["i"] < length
288
+ and msg[context["i"]] == constants.CHAR_ESCAPE
289
+ ):
290
+ text += nxt
291
+ else:
292
+ break
293
+ else:
294
+ text += nxt
295
+ context["i"] += 1
296
+ else:
297
+ context["i"] += 1
298
+ text += constants.CHAR_ESCAPE + char
299
+ else:
300
+ text += char
301
+ else:
302
+ text += char
303
+ context["i"] += 1
304
+
305
+ if is_arg_style and trailing_space:
306
+ trimmed = len(text) - trailing_space
307
+ if trimmed <= 0:
308
+ context["i"] = start
309
+ return ""
310
+ else:
311
+ context["i"] -= trailing_space
312
+ return text[:trimmed]
313
+
314
+ return text
315
+
316
+ def _token_indices(self, token: dict, start: int, end: int) -> dict:
317
+ if self.options["include_indices"]:
318
+ token["start"] = start
319
+ token["end"] = end
320
+ return token
321
+
322
+ def _parse_placeholder(self, context: dict, parent: dict | None) -> dict:
323
+ msg = context["msg"]
324
+ length = context["length"]
325
+ preserve_ws = self.options["preserve_whitespace"]
326
+ is_hash_special = (
327
+ parent and parent["type"] in self.options["subnumeric_types"]
328
+ )
329
+
330
+ start_idx = context["i"]
331
+ char = msg[start_idx] if start_idx < length else None
332
+ if is_hash_special and char == constants.CHAR_HASH:
333
+ _append_token(context, "hash", char)
334
+ context["i"] += 1
335
+ return self._token_indices(
336
+ {"type": "number", "name": parent["name"], "hash": True},
337
+ start_idx,
338
+ context["i"],
339
+ )
340
+
341
+ tag = self._parse_tag(context, parent)
342
+ if tag:
343
+ return tag
344
+
345
+ if char != constants.CHAR_OPEN:
346
+ raise _expected(constants.CHAR_OPEN, context)
347
+
348
+ _append_token(context, "syntax", char)
349
+ context["i"] += 1
350
+
351
+ ws: dict = {}
352
+ ws_before_name = _skip_space(context, ret=preserve_ws)
353
+ if preserve_ws:
354
+ ws["before_name"] = ws_before_name
355
+
356
+ name = self._parse_name(context)
357
+ if not name:
358
+ raise _expected("placeholder name", context)
359
+
360
+ _append_token(context, "name", name)
361
+ token: dict = {"name": name}
362
+
363
+ ws_after_name = _skip_space(context, ret=preserve_ws)
364
+ if preserve_ws:
365
+ ws["after_name"] = ws_after_name
366
+
367
+ char = msg[context["i"]] if context["i"] < length else None
368
+
369
+ if char == constants.CHAR_CLOSE:
370
+ _append_token(context, "syntax", char)
371
+ context["i"] += 1
372
+ if preserve_ws and ws:
373
+ token["_ws"] = ws
374
+ return self._token_indices(token, start_idx, context["i"])
375
+
376
+ if char != constants.CHAR_SEP:
377
+ raise _expected(SEP_OR_CLOSE, context)
378
+
379
+ _append_token(context, "syntax", char)
380
+ context["i"] += 1
381
+
382
+ ws_after_type_sep = _skip_space(context, ret=preserve_ws)
383
+ if preserve_ws:
384
+ ws["after_type_sep"] = ws_after_type_sep
385
+
386
+ ttype = self._parse_name(context)
387
+ if not ttype:
388
+ raise _expected("placeholder type", context)
389
+
390
+ _append_token(context, "type", ttype)
391
+ token["type"] = ttype
392
+
393
+ ws_after_type = _skip_space(context, ret=preserve_ws)
394
+ if preserve_ws:
395
+ ws["after_type"] = ws_after_type
396
+
397
+ char = msg[context["i"]] if context["i"] < length else None
398
+ if char == constants.CHAR_CLOSE:
399
+ _append_token(context, "syntax", char)
400
+ if ttype in self.options["submessage_types"]:
401
+ raise _expected(f"{ttype} sub-messages", context)
402
+ context["i"] += 1
403
+ if preserve_ws and ws:
404
+ token["_ws"] = ws
405
+ return self._token_indices(token, start_idx, context["i"])
406
+
407
+ if char != constants.CHAR_SEP:
408
+ raise _expected(SEP_OR_CLOSE, context)
409
+
410
+ _append_token(context, "syntax", char)
411
+ context["i"] += 1
412
+
413
+ ws_after_style_sep = _skip_space(context, ret=preserve_ws)
414
+ if preserve_ws:
415
+ ws["after_style_sep"] = ws_after_style_sep
416
+
417
+ if ttype in self.options["subnumeric_types"]:
418
+ offset = self._parse_offset(context)
419
+ token["offset"] = offset if offset else 0
420
+ if offset:
421
+ _skip_space(context)
422
+
423
+ if ttype in self.options["submessage_types"]:
424
+ messages = self._parse_submessages(context, token)
425
+ if not messages:
426
+ raise _expected(f"{ttype} sub-messages", context)
427
+ token["options"] = messages
428
+ else:
429
+ start = context["i"]
430
+ fmt = self._parse_text(context, token, True)
431
+ if not fmt:
432
+ raise _expected("placeholder style", context)
433
+
434
+ end = context["i"]
435
+ spaces = _skip_space(context, True)
436
+
437
+ if (
438
+ self.options["loose_submessages"]
439
+ and context["i"] < length
440
+ and msg[context["i"]] == constants.CHAR_OPEN
441
+ ):
442
+ context["i"] = start
443
+ messages = self._parse_submessages(context, token)
444
+ if not messages:
445
+ raise _expected(f"{ttype} sub-messages", context)
446
+ token["options"] = messages
447
+ else:
448
+ token["format"] = fmt
449
+ _append_token(context, "style", msg[start:end])
450
+ if spaces:
451
+ _append_token(context, "space", spaces)
452
+
453
+ ws_before_close = _skip_space(context, ret=preserve_ws)
454
+ if preserve_ws:
455
+ ws["before_close"] = ws_before_close
456
+
457
+ char = msg[context["i"]] if context["i"] < length else None
458
+ if char != constants.CHAR_CLOSE:
459
+ raise _expected(constants.CHAR_CLOSE, context)
460
+
461
+ _append_token(context, "syntax", char)
462
+ context["i"] += 1
463
+
464
+ if preserve_ws and ws:
465
+ token["_ws"] = ws
466
+
467
+ return self._token_indices(token, start_idx, context["i"])
468
+
469
+ def _parse_tag(self, context: dict, parent: dict | None) -> dict | None:
470
+ if not self.options["allow_tags"]:
471
+ return None
472
+
473
+ if not self._can_read_tag(context, parent):
474
+ return None
475
+
476
+ msg = context["msg"]
477
+ length = context["length"]
478
+ i = context["i"]
479
+ start_idx = i
480
+ char = msg[i] if i < length else None
481
+
482
+ if char != constants.CHAR_TAG_OPEN:
483
+ return None
484
+
485
+ if msg[i : i + len(constants.TAG_END)] == constants.TAG_END:
486
+ raise _unexpected(constants.TAG_END, i)
487
+
488
+ context["i"] += 1
489
+ name = self._parse_name(context, True)
490
+ if not name:
491
+ if not self.options["strict_tags"]:
492
+ context["i"] = start_idx
493
+ return None
494
+ raise _expected("tag name", context)
495
+
496
+ _append_token(context, "syntax", char)
497
+ token: dict = {"type": self.options["tag_type"], "name": name}
498
+ _append_token(context, "name", name)
499
+ _skip_space(context)
500
+
501
+ i = context["i"]
502
+ if (
503
+ i < length
504
+ and msg[i : i + len(constants.TAG_CLOSING)] == constants.TAG_CLOSING
505
+ ):
506
+ _append_token(context, "syntax", constants.TAG_CLOSING)
507
+ context["i"] += len(constants.TAG_CLOSING)
508
+ return self._token_indices(token, start_idx, context["i"])
509
+
510
+ char = msg[i] if i < length else None
511
+ if char != constants.CHAR_TAG_END:
512
+ raise _expected(
513
+ constants.CHAR_TAG_END + " or " + constants.TAG_CLOSING, context
514
+ )
515
+
516
+ _append_token(context, "syntax", char)
517
+ context["i"] += 1
518
+
519
+ children = self._parse_ast(context, token)
520
+ if children:
521
+ token["contents"] = children
522
+ end = context["i"]
523
+
524
+ if (
525
+ end < length
526
+ and msg[end : end + len(constants.TAG_END)] != constants.TAG_END
527
+ ):
528
+ raise _expected(constants.TAG_END, context)
529
+
530
+ _append_token(context, "syntax", constants.TAG_END)
531
+ context["i"] += len(constants.TAG_END)
532
+
533
+ close_name = self._parse_name(context, True)
534
+ if close_name:
535
+ _append_token(context, "name", close_name)
536
+ if close_name != name:
537
+ raise _expected(
538
+ constants.TAG_END + name + constants.CHAR_TAG_END,
539
+ msg[end] if end < length else "<EOF>",
540
+ end,
541
+ )
542
+
543
+ _skip_space(context)
544
+ char = msg[context["i"]] if context["i"] < length else None
545
+ if char != constants.CHAR_TAG_END:
546
+ raise _expected(constants.CHAR_TAG_END, context)
547
+
548
+ _append_token(context, "syntax", char)
549
+ context["i"] += 1
550
+ return self._token_indices(token, start_idx, context["i"])
551
+
552
+ def _parse_name(self, context: dict, is_tag: bool = False) -> str:
553
+ msg = context["msg"]
554
+ length = context["length"]
555
+ name = ""
556
+
557
+ while context["i"] < length:
558
+ char = msg[context["i"]]
559
+ if (
560
+ char in constants.VAR_CHARS
561
+ or char == constants.CHAR_SEP
562
+ or char == constants.CHAR_HASH
563
+ or char == constants.CHAR_ESCAPE
564
+ or _is_space(char)
565
+ or (is_tag and char in constants.TAG_CHARS)
566
+ ):
567
+ break
568
+ name += char
569
+ context["i"] += 1
570
+
571
+ return name
572
+
573
+ def _parse_offset(self, context: dict) -> int:
574
+ msg = context["msg"]
575
+ length = context["length"]
576
+ start = context["i"]
577
+
578
+ if start >= length or msg[start : start + len(constants.OFFSET)] != constants.OFFSET:
579
+ return 0
580
+
581
+ _append_token(context, "offset", constants.OFFSET)
582
+ context["i"] += len(constants.OFFSET)
583
+ _skip_space(context)
584
+
585
+ start = context["i"]
586
+ while context["i"] < length and (
587
+ _is_digit(msg[context["i"]])
588
+ or (context["i"] == start and msg[context["i"]] == "-")
589
+ ):
590
+ context["i"] += 1
591
+
592
+ if start == context["i"]:
593
+ raise _expected("offset number", context)
594
+
595
+ offset = msg[start : context["i"]]
596
+ _append_token(context, "number", offset)
597
+ return int(offset, 10)
598
+
599
+ def _parse_submessages(self, context: dict, parent: dict) -> dict | None:
600
+ msg = context["msg"]
601
+ length = context["length"]
602
+ preserve_ws = self.options["preserve_whitespace"]
603
+ options: dict = {}
604
+
605
+ context["depth"] += 1
606
+
607
+ while context["i"] < length and msg[context["i"]] != constants.CHAR_CLOSE:
608
+ # Save position before consuming space so we can rewind if we hit }
609
+ pre_space_pos = context["i"]
610
+ ws_before_selector = _skip_space(context, ret=preserve_ws) if preserve_ws else _skip_space(context)
611
+
612
+ if context["i"] >= length or msg[context["i"]] == constants.CHAR_CLOSE:
613
+ # Rewind: this trailing space belongs to the outer placeholder's before_close
614
+ if preserve_ws:
615
+ context["i"] = pre_space_pos
616
+ break
617
+
618
+ selector = self._parse_name(context)
619
+ if not selector:
620
+ raise _expected("sub-message selector", context)
621
+ _append_token(context, "selector", selector)
622
+
623
+ ws_after_selector = _skip_space(context, ret=preserve_ws) if preserve_ws else _skip_space(context)
624
+
625
+ submessage = self._parse_submessage(context, parent)
626
+
627
+ if preserve_ws:
628
+ # Store whitespace metadata on the submessage list
629
+ # We use a wrapper dict so we can attach _ws
630
+ options[selector] = submessage
631
+ # Store selector whitespace as metadata on the options dict
632
+ if "_ws" not in options:
633
+ options["_ws"] = {}
634
+ options["_ws"][selector] = {
635
+ "before_selector": ws_before_selector or "",
636
+ "after_selector": ws_after_selector or "",
637
+ }
638
+ else:
639
+ options[selector] = submessage
640
+
641
+ if not preserve_ws:
642
+ _skip_space(context)
643
+
644
+ context["depth"] -= 1
645
+
646
+ if not options or (len(options) == 1 and "_ws" in options):
647
+ return None
648
+
649
+ req = self.options["require_other"]
650
+ ttype = parent["type"] if parent else None
651
+ if req == "all":
652
+ req = True
653
+ elif req == "subnumeric":
654
+ req = self.options["subnumeric_types"]
655
+ elif req and not isinstance(req, list):
656
+ req = self.options["submessage_types"]
657
+ if isinstance(req, list):
658
+ req = ttype in req
659
+
660
+ if req and "other" not in options:
661
+ raise _expected(f"{ttype} sub-message other", context)
662
+
663
+ return options
664
+
665
+ def _parse_submessage(self, context: dict, parent: dict) -> list:
666
+ if context["depth"] >= self.options["maximum_depth"]:
667
+ raise _recursion(context)
668
+
669
+ msg = context["msg"]
670
+ length = context["length"]
671
+ if context["i"] >= length or msg[context["i"]] != constants.CHAR_OPEN:
672
+ raise _expected(constants.CHAR_OPEN, context)
673
+
674
+ _append_token(context, "syntax", constants.CHAR_OPEN)
675
+ context["i"] += 1
676
+
677
+ message = self._parse_ast(context, parent)
678
+
679
+ char = msg[context["i"]] if context["i"] < length else None
680
+ if char != constants.CHAR_CLOSE:
681
+ raise _expected(constants.CHAR_CLOSE, context)
682
+
683
+ _append_token(context, "syntax", constants.CHAR_CLOSE)
684
+ context["i"] += 1
685
+
686
+ return message
@@ -0,0 +1,168 @@
1
+ """AST to ICU MessageFormat string reconstruction.
2
+
3
+ Reconstructs an ICU MessageFormat string from an AST produced by
4
+ :class:`Parser`. When the AST contains ``_ws`` whitespace metadata
5
+ (from ``preserve_whitespace=True``), the reconstruction is lossless.
6
+ Otherwise, normalized whitespace (single spaces) is used.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import re
12
+
13
+ from . import _constants as constants
14
+
15
+ _BRACE_RE = re.compile(r"([{}](?:[\s\S]*[{}])?)")
16
+
17
+
18
+ def print_ast(ast: list) -> str:
19
+ """Reconstruct an ICU MessageFormat string from an AST.
20
+
21
+ Args:
22
+ ast: The AST as returned by :meth:`Parser.parse`.
23
+
24
+ Returns:
25
+ The reconstructed ICU MessageFormat string.
26
+ """
27
+ return _print_nodes(ast)
28
+
29
+
30
+ def _escape_message(message: str) -> str:
31
+ """Wrap the first region containing ``{`` or ``}`` in single quotes.
32
+
33
+ Port of JS ``printEscapedMessage``.
34
+ """
35
+ return _BRACE_RE.sub(r"'\1'", message, count=1)
36
+
37
+
38
+ def _print_literal(
39
+ value: str, is_in_plural: bool, is_first: bool, is_last: bool
40
+ ) -> str:
41
+ """Re-escape a literal text node for safe ICU round-tripping.
42
+
43
+ Port of JS ``printLiteralElement``.
44
+ """
45
+ escaped = value
46
+
47
+ # Double leading ' when not first element (prevents mis-parse after })
48
+ if not is_first and escaped and escaped[0] == "'":
49
+ escaped = "''" + escaped[1:]
50
+
51
+ # Double trailing ' when not last element (prevents mis-parse before {)
52
+ if not is_last and escaped and escaped[-1] == "'":
53
+ escaped = escaped[:-1] + "''"
54
+
55
+ # Re-escape {} regions
56
+ escaped = _escape_message(escaped)
57
+
58
+ # Re-escape # in plural context
59
+ if is_in_plural:
60
+ escaped = escaped.replace("#", "'#'")
61
+
62
+ return escaped
63
+
64
+
65
+ def _print_nodes(nodes: list, is_in_plural: bool = False) -> str:
66
+ parts: list[str] = []
67
+ for i, node in enumerate(nodes):
68
+ if isinstance(node, str):
69
+ parts.append(
70
+ _print_literal(node, is_in_plural, i == 0, i == len(nodes) - 1)
71
+ )
72
+ elif isinstance(node, dict):
73
+ parts.append(_print_node(node))
74
+ return "".join(parts)
75
+
76
+
77
+ def _print_node(node: dict) -> str:
78
+ ws = node.get("_ws", {})
79
+
80
+ # Hash replacement node (#)
81
+ if node.get("hash"):
82
+ return constants.CHAR_HASH
83
+
84
+ name = node.get("name", "")
85
+ node_type = node.get("type")
86
+
87
+ # Tag node
88
+ if node_type == "tag" or (node_type and node.get("contents") is not None):
89
+ contents = node.get("contents", [])
90
+ return (
91
+ constants.CHAR_TAG_OPEN
92
+ + name
93
+ + constants.CHAR_TAG_END
94
+ + _print_nodes(contents)
95
+ + constants.TAG_END
96
+ + name
97
+ + constants.CHAR_TAG_END
98
+ )
99
+
100
+ # Simple variable: {name}
101
+ if node_type is None and "options" not in node and "format" not in node:
102
+ before = ws.get("before_name", "")
103
+ after = ws.get("after_name", "")
104
+ return constants.CHAR_OPEN + before + name + after + constants.CHAR_CLOSE
105
+
106
+ # Typed placeholder
107
+ # JS printAST uses no spaces after commas for select/plural but spaces
108
+ # for simple format types (number, date, time).
109
+ has_options = "options" in node
110
+ default_sep = "" if has_options else " "
111
+
112
+ before_name = ws.get("before_name", "")
113
+ after_name = ws.get("after_name", "")
114
+ after_type_sep = ws.get("after_type_sep", default_sep)
115
+ after_type = ws.get("after_type", "")
116
+ after_style_sep = ws.get("after_style_sep", default_sep)
117
+ before_close = ws.get("before_close", "")
118
+
119
+ result = constants.CHAR_OPEN + before_name + name + after_name
120
+
121
+ if node_type:
122
+ result += constants.CHAR_SEP + after_type_sep + node_type + after_type
123
+
124
+ # Format string (e.g. {n, number, ::currency/USD})
125
+ if "format" in node:
126
+ result += constants.CHAR_SEP + after_style_sep + node["format"]
127
+ result += before_close + constants.CHAR_CLOSE
128
+ return result
129
+
130
+ # Submessages (plural, select, selectordinal)
131
+ if "options" in node:
132
+ result += constants.CHAR_SEP + after_style_sep
133
+
134
+ # Offset for plural/selectordinal
135
+ offset = node.get("offset", 0)
136
+ if offset:
137
+ result += f"offset:{offset} "
138
+
139
+ options = node["options"]
140
+ options_ws = options.get("_ws", {}) if isinstance(options.get("_ws"), dict) else {}
141
+
142
+ child_in_plural = node_type in ("plural", "selectordinal")
143
+
144
+ first = True
145
+ for key, value in options.items():
146
+ if key == "_ws":
147
+ continue
148
+
149
+ selector_ws = options_ws.get(key, {})
150
+ before_sel = selector_ws.get("before_selector", "" if first else " ")
151
+ after_sel = selector_ws.get("after_selector", "")
152
+
153
+ if not first:
154
+ result += before_sel
155
+ else:
156
+ first = False
157
+
158
+ result += key + after_sel
159
+ result += constants.CHAR_OPEN
160
+ if isinstance(value, list):
161
+ result += _print_nodes(value, is_in_plural=child_in_plural)
162
+ result += constants.CHAR_CLOSE
163
+
164
+ result += before_close + constants.CHAR_CLOSE
165
+ return result
166
+
167
+ result += before_close + constants.CHAR_CLOSE
168
+ return result