pomlight 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ __pycache__/
2
+ *.pyc
3
+ .venv/
@@ -0,0 +1,28 @@
1
+ Metadata-Version: 2.4
2
+ Name: pomlight
3
+ Version: 0.1.0
4
+ Summary: Lightweight Python library for parsing and rendering POML prompts
5
+ License: MIT
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: pyyaml>=6.0
8
+ Provides-Extra: dev
9
+ Requires-Dist: pytest>=7.0; extra == 'dev'
10
+ Description-Content-Type: text/markdown
11
+
12
+ # Pomlight (Python)
13
+
14
+ Lightweight Python library for parsing and rendering POML prompts.
15
+
16
+ ## Install
17
+
18
+ ```bash
19
+ pip install pomlight
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ```python
25
+ from pomlight import poml
26
+
27
+ result = poml("<poml><p>Hello, world!</p></poml>")
28
+ ```
@@ -0,0 +1,17 @@
1
+ # Pomlight (Python)
2
+
3
+ Lightweight Python library for parsing and rendering POML prompts.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install pomlight
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ```python
14
+ from pomlight import poml
15
+
16
+ result = poml("<poml><p>Hello, world!</p></poml>")
17
+ ```
@@ -0,0 +1,101 @@
1
+ """Pomlight — a lightweight POML parser."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from .read import read, read_full
8
+ from .write import write, format_messages, render_content, render_messages, FormatSideband
9
+ from .types import (
10
+ Block,
11
+ ContentMultiMedia,
12
+ ContentMultiMediaBinary,
13
+ ContentMultiMediaJson,
14
+ Heading,
15
+ ListBlock,
16
+ ListItem,
17
+ Message,
18
+ MultiMediaBlock,
19
+ OutputFormat,
20
+ Paragraph,
21
+ ReadOptions,
22
+ ReadResult,
23
+ RichContent,
24
+ SerializedNode,
25
+ Speaker,
26
+ StyleSheet,
27
+ ToolDefinition,
28
+ WriteOptions,
29
+ )
30
+
31
+ __all__ = [
32
+ "read",
33
+ "read_full",
34
+ "write",
35
+ "format_messages",
36
+ "render_content",
37
+ "render_messages",
38
+ "poml",
39
+ "PomlOptions",
40
+ # types
41
+ "Block",
42
+ "ContentMultiMedia",
43
+ "ContentMultiMediaBinary",
44
+ "ContentMultiMediaJson",
45
+ "Heading",
46
+ "ListBlock",
47
+ "ListItem",
48
+ "Message",
49
+ "MultiMediaBlock",
50
+ "OutputFormat",
51
+ "Paragraph",
52
+ "ReadOptions",
53
+ "ReadResult",
54
+ "RichContent",
55
+ "SerializedNode",
56
+ "Speaker",
57
+ "StyleSheet",
58
+ "ToolDefinition",
59
+ "WriteOptions",
60
+ "FormatSideband",
61
+ ]
62
+
63
+
64
+ class PomlOptions:
65
+ def __init__(
66
+ self,
67
+ context: dict[str, Any] | None = None,
68
+ read_options: ReadOptions | None = None,
69
+ stylesheet: StyleSheet | None = None,
70
+ source_path: str | None = None,
71
+ format: OutputFormat | None = None,
72
+ ):
73
+ self.context = context
74
+ self.read_options = read_options
75
+ self.stylesheet = stylesheet
76
+ self.source_path = source_path
77
+ self.format = format
78
+
79
+
80
+ def poml(element: str, options: PomlOptions | None = None) -> Any:
81
+ """Convenience: read + write in one call, matching the official SDK's poml() API."""
82
+ opts = options or PomlOptions()
83
+ result = read_full(
84
+ element,
85
+ opts.read_options,
86
+ opts.context,
87
+ opts.stylesheet,
88
+ opts.source_path,
89
+ )
90
+ fmt: OutputFormat = opts.format or "message_dict"
91
+ messages = write(result.blocks, {"speaker": True})
92
+ assert isinstance(messages, list)
93
+
94
+ has_sideband = result.schema or (result.tools and len(result.tools) > 0) or result.runtime
95
+ if not has_sideband:
96
+ return format_messages(messages, fmt)
97
+ return format_messages(messages, fmt, FormatSideband(
98
+ tools=result.tools,
99
+ schema=result.schema,
100
+ runtime=result.runtime,
101
+ ))
@@ -0,0 +1,416 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ import yaml
8
+
9
+ from .xml_parser import XmlElement
10
+ from .types import Block, Paragraph, State
11
+ from .expr import eval_expr, interpolate
12
+ from .style import get_style_prop
13
+
14
+
15
+ class _IndentedListDumper(yaml.SafeDumper):
16
+ """Custom YAML dumper that indents list items inside mappings (matching Deno @std/yaml)."""
17
+ def increase_indent(self, flow: bool = False, indentless: bool = False) -> None:
18
+ return super().increase_indent(flow, False)
19
+
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Document component
23
+ # ---------------------------------------------------------------------------
24
+
25
+ def process_document(node: XmlElement, state: State) -> list[Block]:
26
+ raw_src = node.attrs.get("src")
27
+ if not raw_src or not state.file_path:
28
+ return []
29
+ src = interpolate(raw_src, state.ctx)
30
+
31
+ dir_path = state.file_path[: state.file_path.rfind("/") + 1]
32
+ doc_path = dir_path + src
33
+ try:
34
+ content = Path(doc_path).read_text()
35
+ except OSError:
36
+ return []
37
+
38
+ return [Paragraph(text=content)]
39
+
40
+
41
+ # ---------------------------------------------------------------------------
42
+ # Table component
43
+ # ---------------------------------------------------------------------------
44
+
45
+ def process_table(node: XmlElement, state: State) -> list[Block]:
46
+ syntax = node.attrs.get("syntax") or get_style_prop("table", "syntax", state) or "markdown"
47
+
48
+ records: list[dict[str, Any]] = []
49
+ columns: list[str] = []
50
+
51
+ records_expr = node.attrs.get("records")
52
+ if records_expr:
53
+ raw = records_expr.strip()
54
+ if raw.startswith("{{") and raw.endswith("}}"):
55
+ raw = raw[2:-2].strip()
56
+ data = eval_expr(raw, state.ctx)
57
+ if isinstance(data, list) and len(data) > 0:
58
+ first = data[0]
59
+ if isinstance(first, list):
60
+ records = [
61
+ {f"Column {i}": val for i, val in enumerate(row)}
62
+ for row in data
63
+ ]
64
+ columns = list(records[0].keys())
65
+ elif isinstance(first, dict):
66
+ columns = list(first.keys())
67
+ records = data
68
+ else:
69
+ raw_src = node.attrs.get("src")
70
+ if not raw_src or not state.file_path:
71
+ return []
72
+ src = interpolate(raw_src, state.ctx)
73
+
74
+ dir_path = state.file_path[: state.file_path.rfind("/") + 1]
75
+ table_path = dir_path + src
76
+ ext = src.rsplit(".", 1)[-1].lower() if "." in src else ""
77
+
78
+ if ext == "csv":
79
+ try:
80
+ content = Path(table_path).read_text()
81
+ except OSError:
82
+ return []
83
+ parsed = _parse_csv_table(content)
84
+ records = parsed["records"]
85
+ columns = parsed["columns"]
86
+ elif ext == "json":
87
+ try:
88
+ content = Path(table_path).read_text()
89
+ except OSError:
90
+ return []
91
+ data = json.loads(content)
92
+ if isinstance(data, list) and len(data) > 0:
93
+ columns = list(data[0].keys())
94
+ records = data
95
+ elif ext == "jsonl" or node.attrs.get("parser") == "jsonl":
96
+ try:
97
+ content = Path(table_path).read_text()
98
+ except OSError:
99
+ return []
100
+ parsed = _parse_jsonl_table(content)
101
+ records = parsed["records"]
102
+ columns = parsed["columns"]
103
+ else:
104
+ return []
105
+
106
+ records = _infer_column_types(records, columns)
107
+
108
+ # Apply columns attribute
109
+ columns_attr = node.attrs.get("columns")
110
+ header_map: dict[str, str] = {}
111
+ if columns_attr:
112
+ raw = columns_attr.strip()
113
+ if raw.startswith("{{") and raw.endswith("}}"):
114
+ raw = raw[2:-2].strip()
115
+ col_defs = eval_expr(raw, state.ctx)
116
+ if isinstance(col_defs, list):
117
+ fields = [d["field"] for d in col_defs]
118
+ columns = fields
119
+ for d in col_defs:
120
+ if d.get("header"):
121
+ header_map[d["field"]] = d["header"]
122
+
123
+ # Apply selectedRecords slice
124
+ selected_records = node.attrs.get("selectedRecords")
125
+ if selected_records:
126
+ start, end = parse_python_style_slice(selected_records, len(records))
127
+ records = records[start:end]
128
+
129
+ # Apply selectedColumns
130
+ selected_columns = node.attrs.get("selectedColumns")
131
+ if selected_columns and not records_expr:
132
+ cols = [c.strip() for c in selected_columns.split(",")]
133
+ columns = [c for c in cols if c in columns]
134
+
135
+ # Apply maxRecords
136
+ max_records_str = node.attrs.get("maxRecords", "")
137
+ try:
138
+ max_records = int(max_records_str)
139
+ except (ValueError, TypeError):
140
+ max_records = 0
141
+ if max_records > 0 and len(records) > max_records:
142
+ first = records[: max_records - 1]
143
+ last = records[-1]
144
+ ellipsis_row: dict[str, Any] = {c: "..." for c in columns}
145
+ records = [*first, ellipsis_row, last]
146
+
147
+ if not columns or not records:
148
+ return []
149
+
150
+ def fmt_cell(v: Any) -> str:
151
+ if isinstance(v, bool):
152
+ return ""
153
+ if v is None:
154
+ return ""
155
+ return str(v)
156
+
157
+ # Writer options
158
+ writer_options: dict[str, Any] = {}
159
+ raw_wo = node.attrs.get("writerOptions")
160
+ if raw_wo is None and state.styles and "table" in state.styles:
161
+ table_style = state.styles["table"]
162
+ if isinstance(table_style, dict):
163
+ raw_wo = table_style.get("writerOptions")
164
+ if raw_wo:
165
+ if isinstance(raw_wo, str):
166
+ try:
167
+ writer_options = json.loads(raw_wo)
168
+ except (json.JSONDecodeError, TypeError):
169
+ pass
170
+ elif isinstance(raw_wo, dict):
171
+ writer_options = raw_wo
172
+
173
+ if syntax == "csv":
174
+ separator = writer_options.get("csvSeparator", ",")
175
+ show_header = writer_options.get("csvHeader", True) is not False
176
+ lines: list[str] = []
177
+ if show_header:
178
+ lines.append(separator.join(columns))
179
+ for rec in records:
180
+ lines.append(separator.join(fmt_cell(rec.get(c)) for c in columns))
181
+ return [Paragraph(text="\n".join(lines))]
182
+
183
+ if syntax == "tsv":
184
+ header_line = "\t".join(columns)
185
+ body_lines = ["\t".join(fmt_cell(rec.get(c)) for c in columns) for rec in records]
186
+ table = "\n".join([header_line, *body_lines])
187
+ return [Paragraph(text=table)]
188
+
189
+ # Default: markdown table
190
+ display_headers = [header_map.get(c, c) for c in columns]
191
+
192
+ col_widths: list[int] = []
193
+ for idx, col in enumerate(columns):
194
+ max_w = len(display_headers[idx])
195
+ for rec in records:
196
+ val = fmt_cell(rec.get(col))
197
+ if len(val) > max_w:
198
+ max_w = len(val)
199
+ col_widths.append(max(max_w, 3))
200
+
201
+ def pad(s: str, w: int) -> str:
202
+ return s + " " * max(0, w - len(s))
203
+
204
+ header_line = "| " + " | ".join(pad(h, col_widths[i]) for i, h in enumerate(display_headers)) + " |"
205
+ sep_line = "| " + " | ".join("-" * w for w in col_widths) + " |"
206
+ body_lines = [
207
+ "| " + " | ".join(pad(fmt_cell(rec.get(c)), col_widths[i]) for i, c in enumerate(columns)) + " |"
208
+ for rec in records
209
+ ]
210
+ table = "\n".join([header_line, sep_line, *body_lines])
211
+ return [Paragraph(text=table)]
212
+
213
+
214
+ # ---------------------------------------------------------------------------
215
+ # Object component
216
+ # ---------------------------------------------------------------------------
217
+
218
+ def process_object(node: XmlElement, state: State) -> list[Block]:
219
+ data_expr = node.attrs.get("data")
220
+ if not data_expr:
221
+ return []
222
+ raw = data_expr.strip()
223
+ if raw.startswith("{{") and raw.endswith("}}"):
224
+ raw = raw[2:-2].strip()
225
+ data = eval_expr(raw, state.ctx)
226
+ if data is None:
227
+ return []
228
+
229
+ syntax = node.attrs.get("syntax", "json")
230
+
231
+ if syntax == "yaml":
232
+ yaml_str = yaml.dump(data, default_flow_style=False, width=1000000, sort_keys=False, Dumper=_IndentedListDumper).rstrip()
233
+ return [Paragraph(text="```yaml\n" + yaml_str + "\n```")]
234
+
235
+ if syntax == "xml":
236
+ xml_str = _object_to_xml(data)
237
+ return [Paragraph(text="```xml\n" + xml_str + "\n```")]
238
+
239
+ # Default: JSON
240
+ json_str = json.dumps(data, indent=2)
241
+ return [Paragraph(text="```json\n" + json_str + "\n```")]
242
+
243
+
244
+ # ---------------------------------------------------------------------------
245
+ # Tree component
246
+ # ---------------------------------------------------------------------------
247
+
248
+ def process_tree(node: XmlElement, state: State) -> list[Block]:
249
+ from .types import Heading
250
+
251
+ items_expr = node.attrs.get("items")
252
+ if not items_expr:
253
+ return []
254
+ raw = items_expr.strip()
255
+ if raw.startswith("{{") and raw.endswith("}}"):
256
+ raw = raw[2:-2].strip()
257
+ items = eval_expr(raw, state.ctx)
258
+ if not isinstance(items, list):
259
+ return []
260
+
261
+ show_content = node.attrs.get("showContent") in ("true", True)
262
+ blocks: list[Block] = []
263
+
264
+ def walk_tree(nodes: list[dict[str, Any]], depth: int, path_prefix: str) -> None:
265
+ for item in nodes:
266
+ name = item.get("name", "")
267
+ full_path = f"{path_prefix}/{name}" if path_prefix else name
268
+ blocks.append(Heading(depth=depth, text=full_path))
269
+
270
+ if show_content and "value" in item:
271
+ ext = name.rsplit(".", 1)[-1] if "." in name else ""
272
+ fence = f"```{ext}\n{item['value']}\n```"
273
+ blocks.append(Paragraph(text=fence))
274
+
275
+ if "children" in item:
276
+ walk_tree(item["children"], depth + 1, full_path)
277
+
278
+ walk_tree(items, state.depth, "")
279
+ return blocks
280
+
281
+
282
+ # ---------------------------------------------------------------------------
283
+ # Shared helpers
284
+ # ---------------------------------------------------------------------------
285
+
286
+ def parse_python_style_slice(slice_str: str, total_length: int) -> tuple[int, int]:
287
+ if slice_str == ":":
288
+ return (0, total_length)
289
+ if slice_str.endswith(":"):
290
+ return (int(slice_str[:-1]), total_length)
291
+ if slice_str.startswith(":"):
292
+ end = int(slice_str[1:])
293
+ return (0, total_length + end if end < 0 else end)
294
+ if ":" in slice_str:
295
+ parts = slice_str.split(":")
296
+ s, e = int(parts[0]), int(parts[1])
297
+ return (s, total_length + e if e < 0 else e)
298
+ index = int(slice_str)
299
+ return (index, index + 1)
300
+
301
+
302
+ # ---------------------------------------------------------------------------
303
+ # Internal helpers
304
+ # ---------------------------------------------------------------------------
305
+
306
+ def _parse_csv_table(content: str) -> dict[str, Any]:
307
+ lines = content.strip().split("\n")
308
+ if not lines:
309
+ return {"records": [], "columns": []}
310
+ headers = [h.strip() for h in lines[0].split(",")]
311
+ records = []
312
+ for line in lines[1:]:
313
+ vals = [v.strip() for v in line.split(",")]
314
+ obj: dict[str, str] = {}
315
+ for i, h in enumerate(headers):
316
+ obj[h] = vals[i] if i < len(vals) else ""
317
+ records.append(obj)
318
+ return {"records": records, "columns": headers}
319
+
320
+
321
+ def _parse_jsonl_table(content: str) -> dict[str, Any]:
322
+ lines = [l for l in content.strip().split("\n") if l.strip()]
323
+ records = [json.loads(line) for line in lines]
324
+ columns = list(records[0].keys()) if records else []
325
+ return {"records": records, "columns": columns}
326
+
327
+
328
+ def _infer_column_types(
329
+ records: list[dict[str, Any]], columns: list[str]
330
+ ) -> list[dict[str, Any]]:
331
+ if not records:
332
+ return records
333
+
334
+ col_types: dict[str, str] = {}
335
+ for col in columns:
336
+ col_type = "string"
337
+ all_empty = True
338
+ for rec in records:
339
+ raw_val = rec.get(col, "")
340
+ # Normalize to lowercase string (matches JS String() behavior)
341
+ val = str(raw_val).strip() if not isinstance(raw_val, bool) else str(raw_val).lower()
342
+ if val == "":
343
+ continue
344
+ all_empty = False
345
+ if val in ("true", "false"):
346
+ if col_type == "string":
347
+ col_type = "boolean"
348
+ else:
349
+ try:
350
+ num = float(val)
351
+ if col_type == "string":
352
+ col_type = "integer" if num == int(num) and "." not in val else "float"
353
+ except ValueError:
354
+ col_type = "string"
355
+ break
356
+ if all_empty:
357
+ col_type = "string"
358
+ col_types[col] = col_type
359
+
360
+ result = []
361
+ for rec in records:
362
+ out: dict[str, Any] = {}
363
+ for col in columns:
364
+ raw_val = rec.get(col, "")
365
+ # For booleans, use the native value directly
366
+ if isinstance(raw_val, bool) and col_types[col] == "boolean":
367
+ out[col] = raw_val
368
+ continue
369
+ raw = str(raw_val).strip() if not isinstance(raw_val, bool) else str(raw_val).lower()
370
+ ct = col_types[col]
371
+ if ct == "boolean":
372
+ out[col] = raw == "true"
373
+ elif ct == "integer":
374
+ out[col] = int(raw) if raw else ""
375
+ elif ct == "float":
376
+ out[col] = float(raw) if raw else ""
377
+ else:
378
+ out[col] = raw
379
+ result.append(out)
380
+ return result
381
+
382
+
383
+ def _object_to_xml(data: Any, indent: str = "", wrap_in_item: bool = False) -> str:
384
+ if isinstance(data, list):
385
+ return "\n".join(_object_to_xml(item, indent, True) for item in data)
386
+ if isinstance(data, dict):
387
+ child_indent = indent + " " if wrap_in_item else indent
388
+ parts = []
389
+ for k, val in data.items():
390
+ if val is not None and isinstance(val, (dict, list)):
391
+ inner = _object_to_xml(val, child_indent + " ")
392
+ parts.append(f"{child_indent}<{k}>\n{inner}\n{child_indent}</{k}>")
393
+ else:
394
+ str_val = _js_string(val)
395
+ if str_val == "":
396
+ parts.append(f"{child_indent}<{k}/>")
397
+ else:
398
+ parts.append(f"{child_indent}<{k}>{_escape_xml_content(str_val)}</{k}>")
399
+ inner = "\n".join(parts)
400
+ if wrap_in_item:
401
+ return f"{indent}<item>\n{inner}\n{indent}</item>"
402
+ return inner
403
+ return indent + _escape_xml_content(_js_string(data))
404
+
405
+
406
+ def _escape_xml_content(s: str) -> str:
407
+ return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
408
+
409
+
410
+ def _js_string(val: Any) -> str:
411
+ """Convert a value to string matching JavaScript's String() behavior."""
412
+ if val is None:
413
+ return ""
414
+ if isinstance(val, bool):
415
+ return "true" if val else "false"
416
+ return str(val)