poml 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +239 -0
  4. data/TUTORIAL.md +987 -0
  5. data/bin/poml +80 -0
  6. data/examples/101_explain_character.poml +30 -0
  7. data/examples/102_render_xml.poml +40 -0
  8. data/examples/103_word_todos.poml +27 -0
  9. data/examples/104_financial_analysis.poml +33 -0
  10. data/examples/105_write_blog_post.poml +48 -0
  11. data/examples/106_research.poml +36 -0
  12. data/examples/107_read_report_pdf.poml +4 -0
  13. data/examples/201_orders_qa.poml +50 -0
  14. data/examples/202_arc_agi.poml +36 -0
  15. data/examples/301_generate_poml.poml +46 -0
  16. data/examples/README.md +50 -0
  17. data/examples/_generate_expects.py +35 -0
  18. data/examples/assets/101_jerry_mouse.jpg +0 -0
  19. data/examples/assets/101_tom_and_jerry.docx +0 -0
  20. data/examples/assets/101_tom_cat.jpg +0 -0
  21. data/examples/assets/101_tom_introduction.txt +9 -0
  22. data/examples/assets/103_prompt_wizard.docx +0 -0
  23. data/examples/assets/104_chart_normalized_price.png +0 -0
  24. data/examples/assets/104_chart_price.png +0 -0
  25. data/examples/assets/104_mag7.xlsx +0 -0
  26. data/examples/assets/107_usenix_paper.pdf +0 -0
  27. data/examples/assets/201_order_instructions.json +7 -0
  28. data/examples/assets/201_orderlines.csv +2 -0
  29. data/examples/assets/201_orders.csv +3 -0
  30. data/examples/assets/202_arc_agi_data.json +1 -0
  31. data/examples/expects/101_explain_character.txt +117 -0
  32. data/examples/expects/102_render_xml.txt +28 -0
  33. data/examples/expects/103_word_todos.txt +121 -0
  34. data/examples/expects/104_financial_analysis.txt +86 -0
  35. data/examples/expects/105_write_blog_post.txt +41 -0
  36. data/examples/expects/106_research.txt +29 -0
  37. data/examples/expects/107_read_report_pdf.txt +151 -0
  38. data/examples/expects/201_orders_qa.txt +44 -0
  39. data/examples/expects/202_arc_agi.txt +64 -0
  40. data/examples/expects/301_generate_poml.txt +153 -0
  41. data/examples/ruby_expects/101_explain_character.txt +17 -0
  42. data/examples/ruby_expects/102_render_xml.txt +28 -0
  43. data/examples/ruby_expects/103_word_todos.txt +14 -0
  44. data/examples/ruby_expects/104_financial_analysis.txt +0 -0
  45. data/examples/ruby_expects/105_write_blog_post.txt +57 -0
  46. data/examples/ruby_expects/106_research.txt +5 -0
  47. data/examples/ruby_expects/107_read_report_pdf.txt +403 -0
  48. data/examples/ruby_expects/201_orders_qa.txt +41 -0
  49. data/examples/ruby_expects/202_arc_agi.txt +17 -0
  50. data/examples/ruby_expects/301_generate_poml.txt +17 -0
  51. data/lib/poml/components/base.rb +132 -0
  52. data/lib/poml/components/content.rb +156 -0
  53. data/lib/poml/components/data.rb +346 -0
  54. data/lib/poml/components/examples.rb +55 -0
  55. data/lib/poml/components/instructions.rb +93 -0
  56. data/lib/poml/components/layout.rb +50 -0
  57. data/lib/poml/components/lists.rb +82 -0
  58. data/lib/poml/components/styling.rb +36 -0
  59. data/lib/poml/components/text.rb +8 -0
  60. data/lib/poml/components/workflow.rb +63 -0
  61. data/lib/poml/components.rb +47 -0
  62. data/lib/poml/components_new.rb +297 -0
  63. data/lib/poml/components_old.rb +1096 -0
  64. data/lib/poml/context.rb +53 -0
  65. data/lib/poml/parser.rb +153 -0
  66. data/lib/poml/renderer.rb +147 -0
  67. data/lib/poml/template_engine.rb +66 -0
  68. data/lib/poml/version.rb +5 -0
  69. data/lib/poml.rb +53 -0
  70. data/media/logo-16-purple.png +0 -0
  71. data/media/logo-64-white.png +0 -0
  72. metadata +149 -0
@@ -0,0 +1,1096 @@
1
+ module Poml
2
+ # Base class for all POML components
3
+ class Component
4
+ attr_reader :element, :context
5
+
6
+ def initialize(element, context)
7
+ @element = element
8
+ @context = context
9
+ end
10
+
11
+ def render
12
+ raise NotImplementedError, "Components must implement render method"
13
+ end
14
+
15
+ protected
16
+
17
+ def apply_stylesheet
18
+ # Apply stylesheet rules to the element
19
+ style_rules = @context.stylesheet[@element.tag_name.to_s] || {}
20
+ style_rules.each do |attr, value|
21
+ @element.attributes[attr] ||= value
22
+ end
23
+
24
+ # Apply class-based styles
25
+ class_name = @element.attributes['classname'] || @element.attributes['className']
26
+ if class_name
27
+ class_rules = @context.stylesheet[".#{class_name}"] || {}
28
+ class_rules.each do |attr, value|
29
+ @element.attributes[attr] ||= value
30
+ end
31
+ end
32
+ end
33
+
34
+ def xml_mode?
35
+ @context.determine_syntax(@element) == 'xml'
36
+ end
37
+
38
+ def render_as_xml(tag_name, content = nil, attributes = {})
39
+ # Render as XML element with proper formatting
40
+ content ||= render_children
41
+ attrs_str = attributes.map { |k, v| " #{k}=\"#{v}\"" }.join('')
42
+
43
+ if content.strip.empty?
44
+ "<#{tag_name}#{attrs_str}/>\n"
45
+ else
46
+ # Add line breaks for nice formatting
47
+ if content.include?('<item>')
48
+ # Multi-line content with nested items - add indentation
49
+ indented_content = content.split("\n").map { |line|
50
+ line.strip.empty? ? "" : " #{line}"
51
+ }.join("\n").strip
52
+ "<#{tag_name}#{attrs_str}>\n #{indented_content}\n</#{tag_name}>\n"
53
+ else
54
+ # Simple content
55
+ "<#{tag_name}#{attrs_str}>#{content}</#{tag_name}>\n"
56
+ end
57
+ end
58
+ end
59
+
60
+ def get_attribute(name, default = nil)
61
+ value = @element.attributes[name.to_s.downcase]
62
+ case value
63
+ when REXML::Attribute
64
+ value.value
65
+ when String
66
+ value
67
+ else
68
+ default
69
+ end
70
+ end
71
+
72
+ def render_children
73
+ return '' if @element.children.empty?
74
+
75
+ rendered_children = @element.children.map do |child_element|
76
+ Components.render_element(child_element, @context)
77
+ end
78
+
79
+ # Add proper spacing between elements - specifically between text and components
80
+ result = []
81
+ rendered_children.each_with_index do |child_content, index|
82
+ result << child_content
83
+
84
+ # Add spacing if current element is text and next element is a component
85
+ if index < rendered_children.length - 1
86
+ current_element = @element.children[index]
87
+ next_element = @element.children[index + 1]
88
+
89
+ if current_element.text? && next_element.component?
90
+ result << "\n\n"
91
+ end
92
+ end
93
+ end
94
+
95
+ result.join('')
96
+ end
97
+
98
+ def apply_text_transform(text)
99
+ return text if text.nil? || text.empty?
100
+
101
+ # Get text transformation from stylesheet
102
+ component_name = self.class.name.split('::').last.gsub('Component', '').downcase
103
+
104
+ # Check for text transformation in stylesheet - first try component-specific, then "cp" (for captioned paragraph inheritance)
105
+ transform = @context.stylesheet.dig(component_name, 'captionTextTransform') ||
106
+ @context.stylesheet.dig('cp', 'captionTextTransform')
107
+
108
+ case transform
109
+ when 'upper'
110
+ text.upcase
111
+ when 'lower'
112
+ text.downcase
113
+ when 'capitalize'
114
+ text.split(' ').map(&:capitalize).join(' ')
115
+ else
116
+ text
117
+ end
118
+ end
119
+ end
120
+
121
+ # Text component for plain text content
122
+ class TextComponent < Component
123
+ def render
124
+ @element.content
125
+ end
126
+ end
127
+
128
+ # Role component
129
+ class RoleComponent < Component
130
+ def render
131
+ apply_stylesheet
132
+
133
+ content = @element.content.empty? ? render_children : @element.content
134
+
135
+ if xml_mode?
136
+ render_as_xml('role', content)
137
+ else
138
+ caption = apply_text_transform(get_attribute('caption', 'Role'))
139
+ caption_style = get_attribute('captionStyle', 'header')
140
+
141
+ case caption_style
142
+ when 'header'
143
+ "# #{caption}\n\n#{content}\n\n"
144
+ when 'bold'
145
+ "**#{caption}:** #{content}\n\n"
146
+ when 'plain'
147
+ "#{caption}: #{content}\n\n"
148
+ when 'hidden'
149
+ "#{content}\n\n"
150
+ else
151
+ "# #{caption}\n\n#{content}\n\n"
152
+ end
153
+ end
154
+ end
155
+ end
156
+
157
+ # Task component
158
+ class TaskComponent < Component
159
+ def render
160
+ apply_stylesheet
161
+
162
+ # For mixed content (text + elements), preserve spacing
163
+ content = if @element.children.empty?
164
+ @element.content.strip
165
+ else
166
+ # Don't strip when there are children to preserve spacing between text and elements
167
+ render_children
168
+ end
169
+
170
+ if xml_mode?
171
+ render_as_xml('task', content)
172
+ else
173
+ caption = apply_text_transform(get_attribute('caption', 'Task'))
174
+ caption_style = get_attribute('captionStyle', 'header')
175
+
176
+ case caption_style
177
+ when 'header'
178
+ # Don't add extra newlines if content already ends with newlines
179
+ content_ending = content.end_with?("\n\n") ? "" : "\n\n"
180
+ "# #{caption}\n\n#{content}#{content_ending}"
181
+ when 'bold'
182
+ "**#{caption}:** #{content}\n\n"
183
+ when 'plain'
184
+ "#{caption}: #{content}\n\n"
185
+ when 'hidden'
186
+ content_ending = content.end_with?("\n\n") ? "" : "\n\n"
187
+ "#{content}#{content_ending}"
188
+ else
189
+ content_ending = content.end_with?("\n\n") ? "" : "\n\n"
190
+ "# #{caption}\n\n#{content}#{content_ending}"
191
+ end
192
+ end
193
+ end
194
+ end
195
+
196
+ # Hint component
197
+ class HintComponent < Component
198
+ def render
199
+ apply_stylesheet
200
+
201
+ caption = get_attribute('caption', 'Hint')
202
+ caption_style = get_attribute('captionStyle', 'header')
203
+ content = @element.content.empty? ? render_children : @element.content
204
+
205
+ case caption_style
206
+ when 'header'
207
+ "# #{caption}\n\n#{content}\n\n"
208
+ when 'bold'
209
+ "**#{caption}:** #{content}\n\n"
210
+ when 'plain'
211
+ "#{caption}: #{content}\n\n"
212
+ when 'hidden'
213
+ "#{content}\n\n"
214
+ else
215
+ "# #{caption}\n\n#{content}\n\n"
216
+ end
217
+ end
218
+ end
219
+
220
+ # Document component (reads and includes external files)
221
+ class DocumentComponent < Component
222
+ def initialize(element, context)
223
+ super
224
+ @src = element.attributes['src']
225
+ @selected_pages = element.attributes['selectedpages'] || element.attributes['selectedPages']
226
+ @syntax = element.attributes['syntax'] || 'text'
227
+ end
228
+
229
+ def render(context = nil)
230
+ return "[Document: no src specified]" unless @src
231
+
232
+ begin
233
+ # Resolve file path - try relative to current working directory first
234
+ file_path = @src
235
+ unless File.exist?(file_path)
236
+ # Try relative to examples directory
237
+ examples_dir = File.expand_path('examples')
238
+ file_path = File.join(examples_dir, @src)
239
+ end
240
+
241
+ unless File.exist?(file_path)
242
+ # Try relative to project root examples directory
243
+ project_root = File.expand_path('..', File.dirname(__dir__))
244
+ examples_dir = File.join(project_root, 'examples')
245
+ file_path = File.join(examples_dir, @src)
246
+ end
247
+
248
+ # Check if file exists
249
+ unless File.exist?(file_path)
250
+ return "[Document: #{@src} (not found)]"
251
+ end
252
+
253
+ # Check file type and extract content
254
+ if file_path.downcase.end_with?('.pdf')
255
+ read_pdf_content(file_path)
256
+ else
257
+ File.read(file_path)
258
+ end
259
+ rescue => e
260
+ "[Document: #{@src} (error reading: #{e.message})]"
261
+ end
262
+ end
263
+
264
+ private
265
+
266
+ def read_pdf_content(file_path)
267
+ if @selected_pages
268
+ # Parse Python-style slice notation
269
+ start_page, end_page = parse_python_style_slice(@selected_pages, get_pdf_page_count(file_path))
270
+
271
+ # Convert 0-indexed to 1-indexed for pdftotext (-f and -l are 1-indexed)
272
+ start_page_1indexed = start_page + 1
273
+ # For Python slice "1:3" -> start=1, end=3 (0-indexed, end exclusive)
274
+ # This means we want pages 1,2 (0-indexed) = pages 2,3 (1-indexed)
275
+ # So pdftotext should use -f 2 -l 3
276
+ last_page_1indexed = start_page_1indexed + (end_page - start_page) - 1
277
+
278
+ if end_page > start_page + 1
279
+ # Extract range of pages
280
+ command = "pdftotext -f #{start_page_1indexed} -l #{last_page_1indexed} \"#{file_path}\" -"
281
+ result = `#{command}`
282
+ else
283
+ # Single page
284
+ command = "pdftotext -f #{start_page_1indexed} -l #{start_page_1indexed} \"#{file_path}\" -"
285
+ result = `#{command}`
286
+ end
287
+ else
288
+ # Extract all pages
289
+ result = `pdftotext "#{file_path}" -`
290
+ end
291
+
292
+ if $?.success?
293
+ result
294
+ else
295
+ "[Document: #{@src} (error extracting PDF)]"
296
+ end
297
+ end
298
+
299
+ def parse_python_style_slice(slice, total_length)
300
+ # Handle different slice formats: "1:3", ":3", "3:", "3", ":"
301
+ if slice == ':'
302
+ [0, total_length]
303
+ elsif slice.end_with?(':')
304
+ [slice[0..-2].to_i, total_length]
305
+ elsif slice.start_with?(':')
306
+ [0, slice[1..-1].to_i]
307
+ elsif slice.include?(':')
308
+ parts = slice.split(':')
309
+ [parts[0].to_i, parts[1].to_i]
310
+ else
311
+ index = slice.to_i
312
+ [index, index + 1]
313
+ end
314
+ end
315
+
316
+ def get_pdf_page_count(file_path)
317
+ # Get page count using pdfinfo (if available) or default to large number
318
+ result = `pdfinfo "#{file_path}" 2>/dev/null | grep "Pages:" | awk '{print $2}'`
319
+ if $?.success? && !result.strip.empty?
320
+ result.strip.to_i
321
+ else
322
+ # Fallback: try to extract and count pages
323
+ 100 # Default fallback
324
+ end
325
+ end
326
+ end
327
+
328
+ # Table component for displaying tabular data
329
+ class TableComponent < Component
330
+ require 'csv'
331
+ require 'json'
332
+
333
+ def render
334
+ apply_stylesheet
335
+
336
+ src = get_attribute('src')
337
+ records_attr = get_attribute('records')
338
+ columns_attr = get_attribute('columns')
339
+ parser = get_attribute('parser', 'auto')
340
+ syntax = get_attribute('syntax')
341
+ selected_columns = get_attribute('selectedColumns')
342
+ selected_records = get_attribute('selectedRecords')
343
+ max_records = get_attribute('maxRecords')
344
+ max_columns = get_attribute('maxColumns')
345
+
346
+ # Load data from source or use provided records
347
+ data = if src
348
+ load_table_data(src, parser)
349
+ elsif records_attr
350
+ parse_records_attribute(records_attr)
351
+ else
352
+ { records: [], columns: [] }
353
+ end
354
+
355
+ # Apply column and record selection
356
+ data = apply_selection(data, selected_columns, selected_records, max_records, max_columns)
357
+
358
+ # Check syntax preference
359
+ if syntax == 'tsv' || syntax == 'csv'
360
+ render_table_raw(data, syntax)
361
+ elsif xml_mode?
362
+ render_table_xml(data)
363
+ else
364
+ render_table_markdown(data)
365
+ end
366
+ end
367
+
368
+ private
369
+
370
+ def load_table_data(src, parser)
371
+ # Resolve relative paths
372
+ file_path = if src.start_with?('/')
373
+ src
374
+ else
375
+ base_path = if @context.source_path
376
+ File.dirname(@context.source_path)
377
+ else
378
+ Dir.pwd
379
+ end
380
+ File.join(base_path, src)
381
+ end
382
+
383
+ unless File.exist?(file_path)
384
+ return { records: [], columns: [] }
385
+ end
386
+
387
+ # Determine parser from file extension if auto
388
+ if parser == 'auto'
389
+ ext = File.extname(file_path).downcase
390
+ parser = case ext
391
+ when '.csv' then 'csv'
392
+ when '.tsv' then 'tsv'
393
+ when '.json' then 'json'
394
+ when '.jsonl' then 'jsonl'
395
+ else 'csv'
396
+ end
397
+ end
398
+
399
+ case parser
400
+ when 'csv'
401
+ parse_csv_file(file_path)
402
+ when 'tsv'
403
+ parse_tsv_file(file_path)
404
+ when 'json'
405
+ parse_json_file(file_path)
406
+ when 'jsonl'
407
+ parse_jsonl_file(file_path)
408
+ else
409
+ { records: [], columns: [] }
410
+ end
411
+ rescue => e
412
+ { records: [], columns: [] }
413
+ end
414
+
415
+ def parse_csv_file(file_path)
416
+ data = CSV.read(file_path, headers: true)
417
+ columns = data.headers.map { |header| { field: header, header: header } }
418
+ records = data.map(&:to_h)
419
+ { records: records, columns: columns }
420
+ end
421
+
422
+ def parse_tsv_file(file_path)
423
+ data = CSV.read(file_path, headers: true, col_sep: "\t")
424
+ columns = data.headers.map { |header| { field: header, header: header } }
425
+ records = data.map(&:to_h)
426
+ { records: records, columns: columns }
427
+ end
428
+
429
+ def parse_json_file(file_path)
430
+ content = File.read(file_path)
431
+ records = JSON.parse(content)
432
+
433
+ # Extract columns from first record if it's an array of objects
434
+ columns = if records.is_a?(Array) && !records.empty? && records.first.is_a?(Hash)
435
+ records.first.keys.map { |key| { field: key, header: key } }
436
+ else
437
+ []
438
+ end
439
+
440
+ { records: records.is_a?(Array) ? records : [records], columns: columns }
441
+ end
442
+
443
+ def parse_jsonl_file(file_path)
444
+ records = []
445
+ File.readlines(file_path).each do |line|
446
+ records << JSON.parse(line.strip) unless line.strip.empty?
447
+ end
448
+
449
+ # Extract columns from first record
450
+ columns = if !records.empty? && records.first.is_a?(Hash)
451
+ records.first.keys.map { |key| { field: key, header: key } }
452
+ else
453
+ []
454
+ end
455
+
456
+ { records: records, columns: columns }
457
+ end
458
+
459
+ def parse_records_attribute(records_attr)
460
+ # Handle string records (JSON) or already parsed arrays
461
+ records = if records_attr.is_a?(String)
462
+ JSON.parse(records_attr)
463
+ else
464
+ records_attr
465
+ end
466
+
467
+ columns = if records.is_a?(Array) && !records.empty? && records.first.is_a?(Hash)
468
+ records.first.keys.map { |key| { field: key, header: key } }
469
+ else
470
+ []
471
+ end
472
+
473
+ { records: records.is_a?(Array) ? records : [records], columns: columns }
474
+ end
475
+
476
+ def apply_selection(data, selected_columns, selected_records, max_records, max_columns)
477
+ records = data[:records]
478
+ columns = data[:columns]
479
+
480
+ # Apply column selection
481
+ if selected_columns && columns
482
+ if selected_columns.is_a?(Array)
483
+ # Array of column names
484
+ new_columns = selected_columns.map do |col_name|
485
+ columns.find { |col| col[:field] == col_name } || { field: col_name, header: col_name }
486
+ end
487
+ columns = new_columns
488
+ records = records.map do |record|
489
+ selected_columns.each_with_object({}) { |col, new_record| new_record[col] = record[col] }
490
+ end
491
+ elsif selected_columns.is_a?(String) && selected_columns.include?(':')
492
+ # Python-style slice
493
+ start_idx, end_idx = parse_slice(selected_columns, columns.length)
494
+ columns = columns[start_idx...end_idx]
495
+ column_fields = columns.map { |col| col[:field] }
496
+ records = records.map do |record|
497
+ column_fields.each_with_object({}) { |field, new_record| new_record[field] = record[field] }
498
+ end
499
+ end
500
+ end
501
+
502
+ # Apply record selection
503
+ if selected_records
504
+ if selected_records.is_a?(Array)
505
+ records = selected_records.map { |idx| records[idx] }.compact
506
+ elsif selected_records.is_a?(String) && selected_records.include?(':')
507
+ start_idx, end_idx = parse_slice(selected_records, records.length)
508
+ records = records[start_idx...end_idx]
509
+ end
510
+ end
511
+
512
+ # Apply max records
513
+ if max_records && records.length > max_records
514
+ # Show top half and bottom half with ellipsis
515
+ top_rows = (max_records / 2.0).ceil
516
+ bottom_rows = max_records - top_rows
517
+ ellipsis_record = columns.each_with_object({}) { |col, record| record[col[:field]] = '...' }
518
+ records = records[0...top_rows] + [ellipsis_record] + records[-bottom_rows..-1]
519
+ end
520
+
521
+ # Apply max columns
522
+ if max_columns && columns && columns.length > max_columns
523
+ columns = columns[0...max_columns]
524
+ column_fields = columns.map { |col| col[:field] }
525
+ records = records.map do |record|
526
+ column_fields.each_with_object({}) { |field, new_record| new_record[field] = record[field] }
527
+ end
528
+ end
529
+
530
+ { records: records, columns: columns }
531
+ end
532
+
533
+ def parse_slice(slice_str, total_length)
534
+ # Parse Python-style slice notation like "1:3"
535
+ parts = slice_str.split(':')
536
+ start_idx = parts[0].to_i
537
+ end_idx = parts[1] ? parts[1].to_i : total_length
538
+ [start_idx, end_idx]
539
+ end
540
+
541
+ def render_table_markdown(data)
542
+ records = data[:records]
543
+ columns = data[:columns]
544
+
545
+ return '' if records.empty?
546
+
547
+ # If no columns specified, infer from first record
548
+ if columns.empty? && records.first.is_a?(Hash)
549
+ columns = records.first.keys.map { |key| { field: key, header: key } }
550
+ end
551
+
552
+ return '' if columns.empty?
553
+
554
+ # Build markdown table
555
+ result = []
556
+
557
+ # Header row
558
+ headers = columns.map { |col| col[:header] || col[:field] }
559
+ result << "| #{headers.join(' | ')} |"
560
+
561
+ # Separator row
562
+ result << "| #{headers.map { '---' }.join(' | ')} |"
563
+
564
+ # Data rows
565
+ records.each do |record|
566
+ row_values = columns.map do |col|
567
+ value = record[col[:field]]
568
+ value.nil? ? '' : value.to_s
569
+ end
570
+ result << "| #{row_values.join(' | ')} |"
571
+ end
572
+
573
+ result.join("\n")
574
+ end
575
+
576
+ def render_table_raw(data, syntax)
577
+ records = data[:records]
578
+ columns = data[:columns]
579
+
580
+ return '' if records.empty?
581
+
582
+ # If no columns specified, infer from first record
583
+ if columns.empty? && records.first.is_a?(Hash)
584
+ columns = records.first.keys.map { |key| { field: key, header: key } }
585
+ end
586
+
587
+ return '' if columns.empty?
588
+
589
+ # Determine separator
590
+ separator = syntax == 'tsv' ? "\t" : ","
591
+
592
+ # Build raw table
593
+ result = []
594
+
595
+ # Header row
596
+ headers = columns.map { |col| col[:header] || col[:field] }
597
+ result << headers.join(separator)
598
+
599
+ # Data rows
600
+ records.each do |record|
601
+ row_values = columns.map do |col|
602
+ value = record[col[:field]]
603
+ value.nil? ? '' : value.to_s
604
+ end
605
+ result << row_values.join(separator)
606
+ end
607
+
608
+ result.join("\n")
609
+ end
610
+
611
+ def render_table_xml(data)
612
+ records = data[:records]
613
+ columns = data[:columns]
614
+
615
+ return '' if records.empty?
616
+
617
+ # If no columns specified, infer from first record
618
+ if columns.empty? && records.first.is_a?(Hash)
619
+ columns = records.first.keys.map { |key| { field: key, header: key } }
620
+ end
621
+
622
+ return '' if columns.empty?
623
+
624
+ # Build XML table structure
625
+ result = []
626
+ result << '<table>'
627
+ result << ' <thead>'
628
+ result << ' <trow>'
629
+ columns.each do |col|
630
+ result << " <tcell>#{escape_xml(col[:header] || col[:field])}</tcell>"
631
+ end
632
+ result << ' </trow>'
633
+ result << ' </thead>'
634
+ result << ' <tbody>'
635
+
636
+ records.each do |record|
637
+ result << ' <trow>'
638
+ columns.each do |col|
639
+ value = record[col[:field]]
640
+ result << " <tcell>#{escape_xml(value.nil? ? '' : value.to_s)}</tcell>"
641
+ end
642
+ result << ' </trow>'
643
+ end
644
+
645
+ result << ' </tbody>'
646
+ result << '</table>'
647
+
648
+ result.join("\n")
649
+ end
650
+
651
+ def escape_xml(text)
652
+ text.to_s.gsub('&', '&amp;').gsub('<', '&lt;').gsub('>', '&gt;')
653
+ end
654
+ end
655
+
656
+ # Image component
657
+ class ImageComponent < Component
658
+ def render
659
+ apply_stylesheet
660
+
661
+ src = get_attribute('src')
662
+ alt = get_attribute('alt', '')
663
+ syntax = get_attribute('syntax', 'text')
664
+
665
+ if syntax == 'multimedia'
666
+ "[Image: #{src}]#{alt.empty? ? '' : " (#{alt})"}"
667
+ else
668
+ alt.empty? ? "[Image: #{src}]" : alt
669
+ end
670
+ end
671
+ end
672
+
673
+ # Paragraph component
674
+ class ParagraphComponent < Component
675
+ def render
676
+ apply_stylesheet
677
+
678
+ content = @element.content.empty? ? render_children : @element.content
679
+ "#{content}\n\n"
680
+ end
681
+ end
682
+
683
+ # Example component
684
+ class ExampleComponent < Component
685
+ def render
686
+ apply_stylesheet
687
+
688
+ content = render_children
689
+ if @context.chat
690
+ content
691
+ else
692
+ "## Example\n\n#{content}\n\n"
693
+ end
694
+ end
695
+ end
696
+
697
+ # Input component
698
+ class InputComponent < Component
699
+ def render
700
+ apply_stylesheet
701
+
702
+ content = @element.content.empty? ? render_children : @element.content
703
+ if @context.chat
704
+ content
705
+ else
706
+ "**Input:** #{content}\n\n"
707
+ end
708
+ end
709
+ end
710
+
711
+ # Output component
712
+ class OutputComponent < Component
713
+ def render
714
+ apply_stylesheet
715
+
716
+ content = @element.content.empty? ? render_children : @element.content
717
+ if @context.chat
718
+ content
719
+ else
720
+ "**Output:** #{content}\n\n"
721
+ end
722
+ end
723
+ end
724
+
725
+ # Output format component
726
+ class OutputFormatComponent < Component
727
+ def render
728
+ apply_stylesheet
729
+
730
+ content = @element.content.empty? ? render_children : @element.content
731
+
732
+ if xml_mode?
733
+ render_as_xml('outputFormat', content)
734
+ else
735
+ caption = get_attribute('caption', 'Output Format')
736
+ caption_style = get_attribute('captionStyle', 'header')
737
+
738
+ case caption_style
739
+ when 'header'
740
+ "# #{caption}\n\n#{content}\n\n"
741
+ when 'bold'
742
+ "**#{caption}:** #{content}\n\n"
743
+ when 'plain'
744
+ "#{caption}: #{content}\n\n"
745
+ when 'hidden'
746
+ "#{content}\n\n"
747
+ else
748
+ "# #{caption}\n\n#{content}\n\n"
749
+ end
750
+ end
751
+ end
752
+ end
753
+
754
+ # List component
755
+ class ListComponent < Component
756
+ def render
757
+ apply_stylesheet
758
+
759
+ if xml_mode?
760
+ # In XML mode, lists don't exist - items are rendered directly
761
+ @element.children.map do |child|
762
+ if child.tag_name == :item
763
+ Components.render_element(child, @context)
764
+ end
765
+ end.compact.join('')
766
+ else
767
+ list_style = get_attribute('listStyle', 'dash')
768
+ items = []
769
+ index = 0
770
+
771
+ @element.children.each do |child|
772
+ if child.tag_name == :item
773
+ index += 1
774
+
775
+ bullet = case list_style
776
+ when 'decimal', 'number', 'numbered'
777
+ "#{index}. "
778
+ when 'star'
779
+ "* "
780
+ when 'plus'
781
+ "+ "
782
+ when 'dash', 'bullet', 'unordered'
783
+ "- "
784
+ else
785
+ "- "
786
+ end
787
+
788
+ # Get text content and nested elements separately
789
+ text_content = child.content.strip
790
+ nested_elements = child.children.reject { |c| c.tag_name == :text }
791
+
792
+ if nested_elements.any?
793
+ # Item has both text and nested elements (like nested lists)
794
+ nested_content = nested_elements.map { |nested_child|
795
+ Components.render_element(nested_child, @context)
796
+ }.join('').strip
797
+
798
+ # Format with text content on first line, nested content indented
799
+ indented_nested = nested_content.split("\n").map { |line|
800
+ line.strip.empty? ? "" : " #{line}"
801
+ }.join("\n").strip
802
+
803
+ if text_content.empty?
804
+ items << "#{bullet}#{indented_nested}"
805
+ else
806
+ items << "#{bullet}#{text_content} \n\n#{indented_nested}"
807
+ end
808
+ else
809
+ # Simple text-only item
810
+ items << "#{bullet}#{text_content}"
811
+ end
812
+ end
813
+ end
814
+
815
+ return "\n\n" if items.empty?
816
+ items.join("\n") + "\n\n"
817
+ end
818
+ end
819
+ end
820
+
821
+ # Item component (for list items)
822
+ class ItemComponent < Component
823
+ def render
824
+ apply_stylesheet
825
+ content = @element.content.empty? ? render_children : @element.content.strip
826
+
827
+ if xml_mode?
828
+ "<item>#{content}</item>\n"
829
+ else
830
+ content
831
+ end
832
+ end
833
+ end
834
+
835
+ # CP component (custom component with caption)
836
+ class CPComponent < Component
837
+ def render
838
+ apply_stylesheet
839
+
840
+ caption = get_attribute('caption', '')
841
+ caption_serialized = get_attribute('captionSerialized', caption)
842
+
843
+ # Render children with increased header level for nested CPs
844
+ content = if @element.content.empty?
845
+ @context.with_increased_header_level { render_children }
846
+ else
847
+ @element.content
848
+ end
849
+
850
+ if xml_mode?
851
+ # Use captionSerialized for XML tag name, fallback to caption
852
+ tag_name = caption_serialized.empty? ? caption : caption_serialized
853
+ return render_as_xml(tag_name, content) unless tag_name.empty?
854
+ # If no caption, just return content
855
+ return "#{content}\n\n"
856
+ else
857
+ caption_style = get_attribute('captionStyle', 'header')
858
+ # Use captionSerialized for the actual header if provided
859
+ display_caption = caption_serialized.empty? ? caption : caption_serialized
860
+
861
+ # Apply stylesheet text transformation
862
+ display_caption = apply_text_transform(display_caption)
863
+
864
+ return content + "\n\n" if display_caption.empty?
865
+
866
+ case caption_style
867
+ when 'header'
868
+ header_prefix = '#' * @context.header_level
869
+ "#{header_prefix} #{display_caption}\n\n#{content}\n\n"
870
+ when 'bold'
871
+ "**#{display_caption}:** #{content}\n\n"
872
+ when 'plain'
873
+ "#{display_caption}: #{content}\n\n"
874
+ when 'hidden'
875
+ "#{content}\n\n"
876
+ else
877
+ header_prefix = '#' * @context.header_level
878
+ "#{header_prefix} #{display_caption}\n\n#{content}\n\n"
879
+ end
880
+ end
881
+ end
882
+ end
883
+
884
+ # StepwiseInstructions component
885
+ class StepwiseInstructionsComponent < Component
886
+ def render
887
+ apply_stylesheet
888
+
889
+ content = render_children
890
+
891
+ if xml_mode?
892
+ render_as_xml('stepwise-instructions', content)
893
+ else
894
+ caption = apply_text_transform(get_attribute('caption', 'Stepwise Instructions'))
895
+ "# #{caption}\n\n#{content}\n\n"
896
+ end
897
+ end
898
+ end
899
+
900
+ # HumanMessage component
901
+ class HumanMessageComponent < Component
902
+ def render
903
+ apply_stylesheet
904
+
905
+ content = render_children
906
+
907
+ if xml_mode?
908
+ render_as_xml('human-message', content)
909
+ else
910
+ content
911
+ end
912
+ end
913
+ end
914
+
915
+ # QA component
916
+ class QAComponent < Component
917
+ def render
918
+ apply_stylesheet
919
+
920
+ content = @element.content.empty? ? render_children : @element.content
921
+
922
+ if xml_mode?
923
+ render_as_xml('qa', content)
924
+ else
925
+ "**QUESTION:** #{content}\n\n**Answer:**\n\n"
926
+ end
927
+ end
928
+ end
929
+
930
+ # Let component (for template variables)
931
+ class LetComponent < Component
932
+ def render
933
+ apply_stylesheet
934
+
935
+ src = get_attribute('src')
936
+ name = get_attribute('name')
937
+
938
+ if src && name
939
+ # Load JSON file and set as template variable
940
+ file_path = if src.start_with?('/')
941
+ src
942
+ else
943
+ base_path = if @context.source_path
944
+ File.dirname(@context.source_path)
945
+ else
946
+ Dir.pwd
947
+ end
948
+ File.join(base_path, src)
949
+ end
950
+
951
+ if File.exist?(file_path)
952
+ begin
953
+ content = File.read(file_path)
954
+ data = JSON.parse(content)
955
+ @context.variables[name] = data
956
+ rescue => e
957
+ # Silently fail for now
958
+ end
959
+ end
960
+ end
961
+
962
+ # Let components don't produce output
963
+ ''
964
+ end
965
+ end
966
+
967
+ # Enhanced Paragraph component with template support
968
+ class ParagraphComponent < Component
969
+ def render
970
+ apply_stylesheet
971
+
972
+ # Handle template loops
973
+ for_attr = get_attribute('for')
974
+ if for_attr
975
+ return render_template_loop(for_attr)
976
+ end
977
+
978
+ content = @element.content.empty? ? render_children : @element.content
979
+
980
+ if xml_mode?
981
+ render_as_xml('p', content)
982
+ else
983
+ "#{content}\n\n"
984
+ end
985
+ end
986
+
987
+ private
988
+
989
+ def render_template_loop(for_expr)
990
+ # Parse for expression like "ins in instructions"
991
+ if for_expr =~ /(\w+)\s+in\s+(\w+)/
992
+ item_var = $1
993
+ collection_var = $2
994
+
995
+ collection = @context.variables[collection_var]
996
+ return '' unless collection.is_a?(Array)
997
+
998
+ results = []
999
+ collection.each_with_index do |item, index|
1000
+ # Set template variables
1001
+ old_item = @context.variables[item_var]
1002
+ old_loop = @context.variables['loop']
1003
+
1004
+ @context.variables[item_var] = item
1005
+ @context.variables['loop'] = { 'index' => index }
1006
+
1007
+ # Render content with template substitution
1008
+ content = @element.content.empty? ? render_children : @element.content
1009
+ template_engine = TemplateEngine.new(@context)
1010
+ processed_content = template_engine.substitute(content)
1011
+
1012
+ results << processed_content
1013
+
1014
+ # Restore old variables
1015
+ if old_item
1016
+ @context.variables[item_var] = old_item
1017
+ else
1018
+ @context.variables.delete(item_var)
1019
+ end
1020
+
1021
+ if old_loop
1022
+ @context.variables['loop'] = old_loop
1023
+ else
1024
+ @context.variables.delete('loop')
1025
+ end
1026
+ end
1027
+
1028
+ results.join("\n")
1029
+ else
1030
+ # Invalid for expression, return empty
1031
+ ''
1032
+ end
1033
+ end
1034
+ end
1035
+
1036
+ # Stylesheet component
1037
+ class StylesheetComponent < Component
1038
+ def render
1039
+ # Parse and apply stylesheet
1040
+ begin
1041
+ stylesheet_content = @element.content.strip
1042
+ if stylesheet_content.start_with?('{') && stylesheet_content.end_with?('}')
1043
+ stylesheet = JSON.parse(stylesheet_content)
1044
+ @context.stylesheet.merge!(stylesheet) if stylesheet.is_a?(Hash)
1045
+ end
1046
+ rescue => e
1047
+ # Silently fail JSON parsing errors
1048
+ end
1049
+
1050
+ # Stylesheet components don't produce output
1051
+ ''
1052
+ end
1053
+ end
1054
+
1055
+ # Component registry and factory
1056
+ module Components
1057
+ COMPONENT_MAPPING = {
1058
+ text: TextComponent,
1059
+ role: RoleComponent,
1060
+ task: TaskComponent,
1061
+ hint: HintComponent,
1062
+ document: DocumentComponent,
1063
+ Document: DocumentComponent, # Capitalized version
1064
+ table: TableComponent,
1065
+ Table: TableComponent, # Capitalized version
1066
+ img: ImageComponent,
1067
+ p: ParagraphComponent,
1068
+ example: ExampleComponent,
1069
+ input: InputComponent,
1070
+ output: OutputComponent,
1071
+ 'output-format': OutputFormatComponent,
1072
+ 'outputformat': OutputFormatComponent,
1073
+ list: ListComponent,
1074
+ item: ItemComponent,
1075
+ cp: CPComponent,
1076
+ 'stepwise-instructions': StepwiseInstructionsComponent,
1077
+ 'stepwiseinstructions': StepwiseInstructionsComponent,
1078
+ StepwiseInstructions: StepwiseInstructionsComponent,
1079
+ 'human-message': HumanMessageComponent,
1080
+ 'humanmessage': HumanMessageComponent,
1081
+ HumanMessage: HumanMessageComponent,
1082
+ qa: QAComponent,
1083
+ QA: QAComponent,
1084
+ let: LetComponent,
1085
+ Let: LetComponent,
1086
+ stylesheet: StylesheetComponent,
1087
+ Stylesheet: StylesheetComponent
1088
+ }.freeze
1089
+
1090
+ def self.render_element(element, context)
1091
+ component_class = COMPONENT_MAPPING[element.tag_name] || TextComponent
1092
+ component = component_class.new(element, context)
1093
+ component.render
1094
+ end
1095
+ end
1096
+ end