pomlight 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pomlight-0.1.0/.gitignore +3 -0
- pomlight-0.1.0/PKG-INFO +28 -0
- pomlight-0.1.0/README.md +17 -0
- pomlight-0.1.0/pomlight/__init__.py +101 -0
- pomlight-0.1.0/pomlight/components.py +416 -0
- pomlight-0.1.0/pomlight/directives.py +162 -0
- pomlight-0.1.0/pomlight/expr.py +398 -0
- pomlight-0.1.0/pomlight/inline.py +88 -0
- pomlight-0.1.0/pomlight/read.py +863 -0
- pomlight-0.1.0/pomlight/serialize.py +187 -0
- pomlight-0.1.0/pomlight/style.py +70 -0
- pomlight-0.1.0/pomlight/tags.py +76 -0
- pomlight-0.1.0/pomlight/types.py +283 -0
- pomlight-0.1.0/pomlight/write.py +371 -0
- pomlight-0.1.0/pomlight/xml_parser.py +78 -0
- pomlight-0.1.0/pyproject.toml +18 -0
- pomlight-0.1.0/tests/test_integration.py +77 -0
- pomlight-0.1.0/tests/test_unit.py +187 -0
- pomlight-0.1.0/uv.lock +225 -0
pomlight-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pomlight
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Lightweight Python library for parsing and rendering POML prompts
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: pyyaml>=6.0
|
|
8
|
+
Provides-Extra: dev
|
|
9
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
|
|
12
|
+
# Pomlight (Python)
|
|
13
|
+
|
|
14
|
+
Lightweight Python library for parsing and rendering POML prompts.
|
|
15
|
+
|
|
16
|
+
## Install
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install pomlight
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Usage
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
from pomlight import poml
|
|
26
|
+
|
|
27
|
+
result = poml("<poml><p>Hello, world!</p></poml>")
|
|
28
|
+
```
|
pomlight-0.1.0/README.md
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Pomlight (Python)
|
|
2
|
+
|
|
3
|
+
Lightweight Python library for parsing and rendering POML prompts.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install pomlight
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from pomlight import poml
|
|
15
|
+
|
|
16
|
+
result = poml("<poml><p>Hello, world!</p></poml>")
|
|
17
|
+
```
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Pomlight — a lightweight POML parser."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from .read import read, read_full
|
|
8
|
+
from .write import write, format_messages, render_content, render_messages, FormatSideband
|
|
9
|
+
from .types import (
|
|
10
|
+
Block,
|
|
11
|
+
ContentMultiMedia,
|
|
12
|
+
ContentMultiMediaBinary,
|
|
13
|
+
ContentMultiMediaJson,
|
|
14
|
+
Heading,
|
|
15
|
+
ListBlock,
|
|
16
|
+
ListItem,
|
|
17
|
+
Message,
|
|
18
|
+
MultiMediaBlock,
|
|
19
|
+
OutputFormat,
|
|
20
|
+
Paragraph,
|
|
21
|
+
ReadOptions,
|
|
22
|
+
ReadResult,
|
|
23
|
+
RichContent,
|
|
24
|
+
SerializedNode,
|
|
25
|
+
Speaker,
|
|
26
|
+
StyleSheet,
|
|
27
|
+
ToolDefinition,
|
|
28
|
+
WriteOptions,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"read",
|
|
33
|
+
"read_full",
|
|
34
|
+
"write",
|
|
35
|
+
"format_messages",
|
|
36
|
+
"render_content",
|
|
37
|
+
"render_messages",
|
|
38
|
+
"poml",
|
|
39
|
+
"PomlOptions",
|
|
40
|
+
# types
|
|
41
|
+
"Block",
|
|
42
|
+
"ContentMultiMedia",
|
|
43
|
+
"ContentMultiMediaBinary",
|
|
44
|
+
"ContentMultiMediaJson",
|
|
45
|
+
"Heading",
|
|
46
|
+
"ListBlock",
|
|
47
|
+
"ListItem",
|
|
48
|
+
"Message",
|
|
49
|
+
"MultiMediaBlock",
|
|
50
|
+
"OutputFormat",
|
|
51
|
+
"Paragraph",
|
|
52
|
+
"ReadOptions",
|
|
53
|
+
"ReadResult",
|
|
54
|
+
"RichContent",
|
|
55
|
+
"SerializedNode",
|
|
56
|
+
"Speaker",
|
|
57
|
+
"StyleSheet",
|
|
58
|
+
"ToolDefinition",
|
|
59
|
+
"WriteOptions",
|
|
60
|
+
"FormatSideband",
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class PomlOptions:
|
|
65
|
+
def __init__(
|
|
66
|
+
self,
|
|
67
|
+
context: dict[str, Any] | None = None,
|
|
68
|
+
read_options: ReadOptions | None = None,
|
|
69
|
+
stylesheet: StyleSheet | None = None,
|
|
70
|
+
source_path: str | None = None,
|
|
71
|
+
format: OutputFormat | None = None,
|
|
72
|
+
):
|
|
73
|
+
self.context = context
|
|
74
|
+
self.read_options = read_options
|
|
75
|
+
self.stylesheet = stylesheet
|
|
76
|
+
self.source_path = source_path
|
|
77
|
+
self.format = format
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def poml(element: str, options: PomlOptions | None = None) -> Any:
|
|
81
|
+
"""Convenience: read + write in one call, matching the official SDK's poml() API."""
|
|
82
|
+
opts = options or PomlOptions()
|
|
83
|
+
result = read_full(
|
|
84
|
+
element,
|
|
85
|
+
opts.read_options,
|
|
86
|
+
opts.context,
|
|
87
|
+
opts.stylesheet,
|
|
88
|
+
opts.source_path,
|
|
89
|
+
)
|
|
90
|
+
fmt: OutputFormat = opts.format or "message_dict"
|
|
91
|
+
messages = write(result.blocks, {"speaker": True})
|
|
92
|
+
assert isinstance(messages, list)
|
|
93
|
+
|
|
94
|
+
has_sideband = result.schema or (result.tools and len(result.tools) > 0) or result.runtime
|
|
95
|
+
if not has_sideband:
|
|
96
|
+
return format_messages(messages, fmt)
|
|
97
|
+
return format_messages(messages, fmt, FormatSideband(
|
|
98
|
+
tools=result.tools,
|
|
99
|
+
schema=result.schema,
|
|
100
|
+
runtime=result.runtime,
|
|
101
|
+
))
|
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import yaml
|
|
8
|
+
|
|
9
|
+
from .xml_parser import XmlElement
|
|
10
|
+
from .types import Block, Paragraph, State
|
|
11
|
+
from .expr import eval_expr, interpolate
|
|
12
|
+
from .style import get_style_prop
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class _IndentedListDumper(yaml.SafeDumper):
|
|
16
|
+
"""Custom YAML dumper that indents list items inside mappings (matching Deno @std/yaml)."""
|
|
17
|
+
def increase_indent(self, flow: bool = False, indentless: bool = False) -> None:
|
|
18
|
+
return super().increase_indent(flow, False)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
# Document component
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
def process_document(node: XmlElement, state: State) -> list[Block]:
|
|
26
|
+
raw_src = node.attrs.get("src")
|
|
27
|
+
if not raw_src or not state.file_path:
|
|
28
|
+
return []
|
|
29
|
+
src = interpolate(raw_src, state.ctx)
|
|
30
|
+
|
|
31
|
+
dir_path = state.file_path[: state.file_path.rfind("/") + 1]
|
|
32
|
+
doc_path = dir_path + src
|
|
33
|
+
try:
|
|
34
|
+
content = Path(doc_path).read_text()
|
|
35
|
+
except OSError:
|
|
36
|
+
return []
|
|
37
|
+
|
|
38
|
+
return [Paragraph(text=content)]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
# Table component
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
|
|
45
|
+
def process_table(node: XmlElement, state: State) -> list[Block]:
|
|
46
|
+
syntax = node.attrs.get("syntax") or get_style_prop("table", "syntax", state) or "markdown"
|
|
47
|
+
|
|
48
|
+
records: list[dict[str, Any]] = []
|
|
49
|
+
columns: list[str] = []
|
|
50
|
+
|
|
51
|
+
records_expr = node.attrs.get("records")
|
|
52
|
+
if records_expr:
|
|
53
|
+
raw = records_expr.strip()
|
|
54
|
+
if raw.startswith("{{") and raw.endswith("}}"):
|
|
55
|
+
raw = raw[2:-2].strip()
|
|
56
|
+
data = eval_expr(raw, state.ctx)
|
|
57
|
+
if isinstance(data, list) and len(data) > 0:
|
|
58
|
+
first = data[0]
|
|
59
|
+
if isinstance(first, list):
|
|
60
|
+
records = [
|
|
61
|
+
{f"Column {i}": val for i, val in enumerate(row)}
|
|
62
|
+
for row in data
|
|
63
|
+
]
|
|
64
|
+
columns = list(records[0].keys())
|
|
65
|
+
elif isinstance(first, dict):
|
|
66
|
+
columns = list(first.keys())
|
|
67
|
+
records = data
|
|
68
|
+
else:
|
|
69
|
+
raw_src = node.attrs.get("src")
|
|
70
|
+
if not raw_src or not state.file_path:
|
|
71
|
+
return []
|
|
72
|
+
src = interpolate(raw_src, state.ctx)
|
|
73
|
+
|
|
74
|
+
dir_path = state.file_path[: state.file_path.rfind("/") + 1]
|
|
75
|
+
table_path = dir_path + src
|
|
76
|
+
ext = src.rsplit(".", 1)[-1].lower() if "." in src else ""
|
|
77
|
+
|
|
78
|
+
if ext == "csv":
|
|
79
|
+
try:
|
|
80
|
+
content = Path(table_path).read_text()
|
|
81
|
+
except OSError:
|
|
82
|
+
return []
|
|
83
|
+
parsed = _parse_csv_table(content)
|
|
84
|
+
records = parsed["records"]
|
|
85
|
+
columns = parsed["columns"]
|
|
86
|
+
elif ext == "json":
|
|
87
|
+
try:
|
|
88
|
+
content = Path(table_path).read_text()
|
|
89
|
+
except OSError:
|
|
90
|
+
return []
|
|
91
|
+
data = json.loads(content)
|
|
92
|
+
if isinstance(data, list) and len(data) > 0:
|
|
93
|
+
columns = list(data[0].keys())
|
|
94
|
+
records = data
|
|
95
|
+
elif ext == "jsonl" or node.attrs.get("parser") == "jsonl":
|
|
96
|
+
try:
|
|
97
|
+
content = Path(table_path).read_text()
|
|
98
|
+
except OSError:
|
|
99
|
+
return []
|
|
100
|
+
parsed = _parse_jsonl_table(content)
|
|
101
|
+
records = parsed["records"]
|
|
102
|
+
columns = parsed["columns"]
|
|
103
|
+
else:
|
|
104
|
+
return []
|
|
105
|
+
|
|
106
|
+
records = _infer_column_types(records, columns)
|
|
107
|
+
|
|
108
|
+
# Apply columns attribute
|
|
109
|
+
columns_attr = node.attrs.get("columns")
|
|
110
|
+
header_map: dict[str, str] = {}
|
|
111
|
+
if columns_attr:
|
|
112
|
+
raw = columns_attr.strip()
|
|
113
|
+
if raw.startswith("{{") and raw.endswith("}}"):
|
|
114
|
+
raw = raw[2:-2].strip()
|
|
115
|
+
col_defs = eval_expr(raw, state.ctx)
|
|
116
|
+
if isinstance(col_defs, list):
|
|
117
|
+
fields = [d["field"] for d in col_defs]
|
|
118
|
+
columns = fields
|
|
119
|
+
for d in col_defs:
|
|
120
|
+
if d.get("header"):
|
|
121
|
+
header_map[d["field"]] = d["header"]
|
|
122
|
+
|
|
123
|
+
# Apply selectedRecords slice
|
|
124
|
+
selected_records = node.attrs.get("selectedRecords")
|
|
125
|
+
if selected_records:
|
|
126
|
+
start, end = parse_python_style_slice(selected_records, len(records))
|
|
127
|
+
records = records[start:end]
|
|
128
|
+
|
|
129
|
+
# Apply selectedColumns
|
|
130
|
+
selected_columns = node.attrs.get("selectedColumns")
|
|
131
|
+
if selected_columns and not records_expr:
|
|
132
|
+
cols = [c.strip() for c in selected_columns.split(",")]
|
|
133
|
+
columns = [c for c in cols if c in columns]
|
|
134
|
+
|
|
135
|
+
# Apply maxRecords
|
|
136
|
+
max_records_str = node.attrs.get("maxRecords", "")
|
|
137
|
+
try:
|
|
138
|
+
max_records = int(max_records_str)
|
|
139
|
+
except (ValueError, TypeError):
|
|
140
|
+
max_records = 0
|
|
141
|
+
if max_records > 0 and len(records) > max_records:
|
|
142
|
+
first = records[: max_records - 1]
|
|
143
|
+
last = records[-1]
|
|
144
|
+
ellipsis_row: dict[str, Any] = {c: "..." for c in columns}
|
|
145
|
+
records = [*first, ellipsis_row, last]
|
|
146
|
+
|
|
147
|
+
if not columns or not records:
|
|
148
|
+
return []
|
|
149
|
+
|
|
150
|
+
def fmt_cell(v: Any) -> str:
|
|
151
|
+
if isinstance(v, bool):
|
|
152
|
+
return ""
|
|
153
|
+
if v is None:
|
|
154
|
+
return ""
|
|
155
|
+
return str(v)
|
|
156
|
+
|
|
157
|
+
# Writer options
|
|
158
|
+
writer_options: dict[str, Any] = {}
|
|
159
|
+
raw_wo = node.attrs.get("writerOptions")
|
|
160
|
+
if raw_wo is None and state.styles and "table" in state.styles:
|
|
161
|
+
table_style = state.styles["table"]
|
|
162
|
+
if isinstance(table_style, dict):
|
|
163
|
+
raw_wo = table_style.get("writerOptions")
|
|
164
|
+
if raw_wo:
|
|
165
|
+
if isinstance(raw_wo, str):
|
|
166
|
+
try:
|
|
167
|
+
writer_options = json.loads(raw_wo)
|
|
168
|
+
except (json.JSONDecodeError, TypeError):
|
|
169
|
+
pass
|
|
170
|
+
elif isinstance(raw_wo, dict):
|
|
171
|
+
writer_options = raw_wo
|
|
172
|
+
|
|
173
|
+
if syntax == "csv":
|
|
174
|
+
separator = writer_options.get("csvSeparator", ",")
|
|
175
|
+
show_header = writer_options.get("csvHeader", True) is not False
|
|
176
|
+
lines: list[str] = []
|
|
177
|
+
if show_header:
|
|
178
|
+
lines.append(separator.join(columns))
|
|
179
|
+
for rec in records:
|
|
180
|
+
lines.append(separator.join(fmt_cell(rec.get(c)) for c in columns))
|
|
181
|
+
return [Paragraph(text="\n".join(lines))]
|
|
182
|
+
|
|
183
|
+
if syntax == "tsv":
|
|
184
|
+
header_line = "\t".join(columns)
|
|
185
|
+
body_lines = ["\t".join(fmt_cell(rec.get(c)) for c in columns) for rec in records]
|
|
186
|
+
table = "\n".join([header_line, *body_lines])
|
|
187
|
+
return [Paragraph(text=table)]
|
|
188
|
+
|
|
189
|
+
# Default: markdown table
|
|
190
|
+
display_headers = [header_map.get(c, c) for c in columns]
|
|
191
|
+
|
|
192
|
+
col_widths: list[int] = []
|
|
193
|
+
for idx, col in enumerate(columns):
|
|
194
|
+
max_w = len(display_headers[idx])
|
|
195
|
+
for rec in records:
|
|
196
|
+
val = fmt_cell(rec.get(col))
|
|
197
|
+
if len(val) > max_w:
|
|
198
|
+
max_w = len(val)
|
|
199
|
+
col_widths.append(max(max_w, 3))
|
|
200
|
+
|
|
201
|
+
def pad(s: str, w: int) -> str:
|
|
202
|
+
return s + " " * max(0, w - len(s))
|
|
203
|
+
|
|
204
|
+
header_line = "| " + " | ".join(pad(h, col_widths[i]) for i, h in enumerate(display_headers)) + " |"
|
|
205
|
+
sep_line = "| " + " | ".join("-" * w for w in col_widths) + " |"
|
|
206
|
+
body_lines = [
|
|
207
|
+
"| " + " | ".join(pad(fmt_cell(rec.get(c)), col_widths[i]) for i, c in enumerate(columns)) + " |"
|
|
208
|
+
for rec in records
|
|
209
|
+
]
|
|
210
|
+
table = "\n".join([header_line, sep_line, *body_lines])
|
|
211
|
+
return [Paragraph(text=table)]
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
# ---------------------------------------------------------------------------
|
|
215
|
+
# Object component
|
|
216
|
+
# ---------------------------------------------------------------------------
|
|
217
|
+
|
|
218
|
+
def process_object(node: XmlElement, state: State) -> list[Block]:
|
|
219
|
+
data_expr = node.attrs.get("data")
|
|
220
|
+
if not data_expr:
|
|
221
|
+
return []
|
|
222
|
+
raw = data_expr.strip()
|
|
223
|
+
if raw.startswith("{{") and raw.endswith("}}"):
|
|
224
|
+
raw = raw[2:-2].strip()
|
|
225
|
+
data = eval_expr(raw, state.ctx)
|
|
226
|
+
if data is None:
|
|
227
|
+
return []
|
|
228
|
+
|
|
229
|
+
syntax = node.attrs.get("syntax", "json")
|
|
230
|
+
|
|
231
|
+
if syntax == "yaml":
|
|
232
|
+
yaml_str = yaml.dump(data, default_flow_style=False, width=1000000, sort_keys=False, Dumper=_IndentedListDumper).rstrip()
|
|
233
|
+
return [Paragraph(text="```yaml\n" + yaml_str + "\n```")]
|
|
234
|
+
|
|
235
|
+
if syntax == "xml":
|
|
236
|
+
xml_str = _object_to_xml(data)
|
|
237
|
+
return [Paragraph(text="```xml\n" + xml_str + "\n```")]
|
|
238
|
+
|
|
239
|
+
# Default: JSON
|
|
240
|
+
json_str = json.dumps(data, indent=2)
|
|
241
|
+
return [Paragraph(text="```json\n" + json_str + "\n```")]
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
# ---------------------------------------------------------------------------
|
|
245
|
+
# Tree component
|
|
246
|
+
# ---------------------------------------------------------------------------
|
|
247
|
+
|
|
248
|
+
def process_tree(node: XmlElement, state: State) -> list[Block]:
|
|
249
|
+
from .types import Heading
|
|
250
|
+
|
|
251
|
+
items_expr = node.attrs.get("items")
|
|
252
|
+
if not items_expr:
|
|
253
|
+
return []
|
|
254
|
+
raw = items_expr.strip()
|
|
255
|
+
if raw.startswith("{{") and raw.endswith("}}"):
|
|
256
|
+
raw = raw[2:-2].strip()
|
|
257
|
+
items = eval_expr(raw, state.ctx)
|
|
258
|
+
if not isinstance(items, list):
|
|
259
|
+
return []
|
|
260
|
+
|
|
261
|
+
show_content = node.attrs.get("showContent") in ("true", True)
|
|
262
|
+
blocks: list[Block] = []
|
|
263
|
+
|
|
264
|
+
def walk_tree(nodes: list[dict[str, Any]], depth: int, path_prefix: str) -> None:
|
|
265
|
+
for item in nodes:
|
|
266
|
+
name = item.get("name", "")
|
|
267
|
+
full_path = f"{path_prefix}/{name}" if path_prefix else name
|
|
268
|
+
blocks.append(Heading(depth=depth, text=full_path))
|
|
269
|
+
|
|
270
|
+
if show_content and "value" in item:
|
|
271
|
+
ext = name.rsplit(".", 1)[-1] if "." in name else ""
|
|
272
|
+
fence = f"```{ext}\n{item['value']}\n```"
|
|
273
|
+
blocks.append(Paragraph(text=fence))
|
|
274
|
+
|
|
275
|
+
if "children" in item:
|
|
276
|
+
walk_tree(item["children"], depth + 1, full_path)
|
|
277
|
+
|
|
278
|
+
walk_tree(items, state.depth, "")
|
|
279
|
+
return blocks
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
# ---------------------------------------------------------------------------
|
|
283
|
+
# Shared helpers
|
|
284
|
+
# ---------------------------------------------------------------------------
|
|
285
|
+
|
|
286
|
+
def parse_python_style_slice(slice_str: str, total_length: int) -> tuple[int, int]:
|
|
287
|
+
if slice_str == ":":
|
|
288
|
+
return (0, total_length)
|
|
289
|
+
if slice_str.endswith(":"):
|
|
290
|
+
return (int(slice_str[:-1]), total_length)
|
|
291
|
+
if slice_str.startswith(":"):
|
|
292
|
+
end = int(slice_str[1:])
|
|
293
|
+
return (0, total_length + end if end < 0 else end)
|
|
294
|
+
if ":" in slice_str:
|
|
295
|
+
parts = slice_str.split(":")
|
|
296
|
+
s, e = int(parts[0]), int(parts[1])
|
|
297
|
+
return (s, total_length + e if e < 0 else e)
|
|
298
|
+
index = int(slice_str)
|
|
299
|
+
return (index, index + 1)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
# ---------------------------------------------------------------------------
|
|
303
|
+
# Internal helpers
|
|
304
|
+
# ---------------------------------------------------------------------------
|
|
305
|
+
|
|
306
|
+
def _parse_csv_table(content: str) -> dict[str, Any]:
|
|
307
|
+
lines = content.strip().split("\n")
|
|
308
|
+
if not lines:
|
|
309
|
+
return {"records": [], "columns": []}
|
|
310
|
+
headers = [h.strip() for h in lines[0].split(",")]
|
|
311
|
+
records = []
|
|
312
|
+
for line in lines[1:]:
|
|
313
|
+
vals = [v.strip() for v in line.split(",")]
|
|
314
|
+
obj: dict[str, str] = {}
|
|
315
|
+
for i, h in enumerate(headers):
|
|
316
|
+
obj[h] = vals[i] if i < len(vals) else ""
|
|
317
|
+
records.append(obj)
|
|
318
|
+
return {"records": records, "columns": headers}
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def _parse_jsonl_table(content: str) -> dict[str, Any]:
|
|
322
|
+
lines = [l for l in content.strip().split("\n") if l.strip()]
|
|
323
|
+
records = [json.loads(line) for line in lines]
|
|
324
|
+
columns = list(records[0].keys()) if records else []
|
|
325
|
+
return {"records": records, "columns": columns}
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _infer_column_types(
|
|
329
|
+
records: list[dict[str, Any]], columns: list[str]
|
|
330
|
+
) -> list[dict[str, Any]]:
|
|
331
|
+
if not records:
|
|
332
|
+
return records
|
|
333
|
+
|
|
334
|
+
col_types: dict[str, str] = {}
|
|
335
|
+
for col in columns:
|
|
336
|
+
col_type = "string"
|
|
337
|
+
all_empty = True
|
|
338
|
+
for rec in records:
|
|
339
|
+
raw_val = rec.get(col, "")
|
|
340
|
+
# Normalize to lowercase string (matches JS String() behavior)
|
|
341
|
+
val = str(raw_val).strip() if not isinstance(raw_val, bool) else str(raw_val).lower()
|
|
342
|
+
if val == "":
|
|
343
|
+
continue
|
|
344
|
+
all_empty = False
|
|
345
|
+
if val in ("true", "false"):
|
|
346
|
+
if col_type == "string":
|
|
347
|
+
col_type = "boolean"
|
|
348
|
+
else:
|
|
349
|
+
try:
|
|
350
|
+
num = float(val)
|
|
351
|
+
if col_type == "string":
|
|
352
|
+
col_type = "integer" if num == int(num) and "." not in val else "float"
|
|
353
|
+
except ValueError:
|
|
354
|
+
col_type = "string"
|
|
355
|
+
break
|
|
356
|
+
if all_empty:
|
|
357
|
+
col_type = "string"
|
|
358
|
+
col_types[col] = col_type
|
|
359
|
+
|
|
360
|
+
result = []
|
|
361
|
+
for rec in records:
|
|
362
|
+
out: dict[str, Any] = {}
|
|
363
|
+
for col in columns:
|
|
364
|
+
raw_val = rec.get(col, "")
|
|
365
|
+
# For booleans, use the native value directly
|
|
366
|
+
if isinstance(raw_val, bool) and col_types[col] == "boolean":
|
|
367
|
+
out[col] = raw_val
|
|
368
|
+
continue
|
|
369
|
+
raw = str(raw_val).strip() if not isinstance(raw_val, bool) else str(raw_val).lower()
|
|
370
|
+
ct = col_types[col]
|
|
371
|
+
if ct == "boolean":
|
|
372
|
+
out[col] = raw == "true"
|
|
373
|
+
elif ct == "integer":
|
|
374
|
+
out[col] = int(raw) if raw else ""
|
|
375
|
+
elif ct == "float":
|
|
376
|
+
out[col] = float(raw) if raw else ""
|
|
377
|
+
else:
|
|
378
|
+
out[col] = raw
|
|
379
|
+
result.append(out)
|
|
380
|
+
return result
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def _object_to_xml(data: Any, indent: str = "", wrap_in_item: bool = False) -> str:
|
|
384
|
+
if isinstance(data, list):
|
|
385
|
+
return "\n".join(_object_to_xml(item, indent, True) for item in data)
|
|
386
|
+
if isinstance(data, dict):
|
|
387
|
+
child_indent = indent + " " if wrap_in_item else indent
|
|
388
|
+
parts = []
|
|
389
|
+
for k, val in data.items():
|
|
390
|
+
if val is not None and isinstance(val, (dict, list)):
|
|
391
|
+
inner = _object_to_xml(val, child_indent + " ")
|
|
392
|
+
parts.append(f"{child_indent}<{k}>\n{inner}\n{child_indent}</{k}>")
|
|
393
|
+
else:
|
|
394
|
+
str_val = _js_string(val)
|
|
395
|
+
if str_val == "":
|
|
396
|
+
parts.append(f"{child_indent}<{k}/>")
|
|
397
|
+
else:
|
|
398
|
+
parts.append(f"{child_indent}<{k}>{_escape_xml_content(str_val)}</{k}>")
|
|
399
|
+
inner = "\n".join(parts)
|
|
400
|
+
if wrap_in_item:
|
|
401
|
+
return f"{indent}<item>\n{inner}\n{indent}</item>"
|
|
402
|
+
return inner
|
|
403
|
+
return indent + _escape_xml_content(_js_string(data))
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def _escape_xml_content(s: str) -> str:
|
|
407
|
+
return s.replace("&", "&").replace("<", "<").replace(">", ">")
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def _js_string(val: Any) -> str:
|
|
411
|
+
"""Convert a value to string matching JavaScript's String() behavior."""
|
|
412
|
+
if val is None:
|
|
413
|
+
return ""
|
|
414
|
+
if isinstance(val, bool):
|
|
415
|
+
return "true" if val else "false"
|
|
416
|
+
return str(val)
|