prosereflect 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/docs.yml +63 -0
  3. data/.github/workflows/links.yml +97 -0
  4. data/.gitignore +4 -0
  5. data/.rubocop_todo.yml +61 -75
  6. data/README.adoc +2 -0
  7. data/docs/Gemfile +10 -0
  8. data/docs/INDEX.adoc +45 -0
  9. data/docs/_advanced/index.adoc +15 -0
  10. data/docs/_advanced/schema.adoc +112 -0
  11. data/docs/_advanced/step-map.adoc +66 -0
  12. data/docs/_advanced/steps.adoc +88 -0
  13. data/docs/_advanced/test-builder.adoc +61 -0
  14. data/docs/_advanced/transform.adoc +92 -0
  15. data/docs/_config.yml +174 -0
  16. data/docs/_features/html-input.adoc +69 -0
  17. data/docs/_features/html-output.adoc +45 -0
  18. data/docs/_features/index.adoc +15 -0
  19. data/docs/_features/marks.adoc +86 -0
  20. data/docs/_features/node-types.adoc +124 -0
  21. data/docs/_features/user-mentions.adoc +47 -0
  22. data/docs/_guides/custom-nodes.adoc +107 -0
  23. data/docs/_guides/index.adoc +13 -0
  24. data/docs/_guides/round-trip-html.adoc +91 -0
  25. data/docs/_guides/serialization.adoc +109 -0
  26. data/docs/_pages/index.adoc +67 -0
  27. data/docs/_reference/document-api.adoc +49 -0
  28. data/docs/_reference/index.adoc +14 -0
  29. data/docs/_reference/node-api.adoc +79 -0
  30. data/docs/_reference/schema-api.adoc +95 -0
  31. data/docs/_reference/transform-api.adoc +77 -0
  32. data/docs/_understanding/document-model.adoc +65 -0
  33. data/docs/_understanding/fragment.adoc +52 -0
  34. data/docs/_understanding/index.adoc +14 -0
  35. data/docs/_understanding/resolved-position.adoc +53 -0
  36. data/docs/_understanding/slice.adoc +54 -0
  37. data/docs/lychee.toml +63 -0
  38. data/lib/prosereflect/blockquote.rb +9 -0
  39. data/lib/prosereflect/bullet_list.rb +25 -19
  40. data/lib/prosereflect/code_block.rb +1 -5
  41. data/lib/prosereflect/fragment.rb +249 -0
  42. data/lib/prosereflect/horizontal_rule.rb +9 -0
  43. data/lib/prosereflect/image.rb +9 -0
  44. data/lib/prosereflect/input/html.rb +96 -0
  45. data/lib/prosereflect/node.rb +141 -3
  46. data/lib/prosereflect/ordered_list.rb +2 -0
  47. data/lib/prosereflect/output/html.rb +227 -0
  48. data/lib/prosereflect/parser.rb +9 -0
  49. data/lib/prosereflect/resolved_pos.rb +256 -0
  50. data/lib/prosereflect/schema/attribute.rb +57 -0
  51. data/lib/prosereflect/schema/content_match.rb +656 -0
  52. data/lib/prosereflect/schema/fragment.rb +166 -0
  53. data/lib/prosereflect/schema/mark.rb +121 -0
  54. data/lib/prosereflect/schema/mark_type.rb +130 -0
  55. data/lib/prosereflect/schema/node.rb +236 -0
  56. data/lib/prosereflect/schema/node_type.rb +274 -0
  57. data/lib/prosereflect/schema/schema_main.rb +190 -0
  58. data/lib/prosereflect/schema/spec.rb +92 -0
  59. data/lib/prosereflect/schema.rb +39 -0
  60. data/lib/prosereflect/text.rb +24 -0
  61. data/lib/prosereflect/transform/attr_step.rb +157 -0
  62. data/lib/prosereflect/transform/insert_step.rb +115 -0
  63. data/lib/prosereflect/transform/mapping.rb +82 -0
  64. data/lib/prosereflect/transform/mark_step.rb +269 -0
  65. data/lib/prosereflect/transform/replace_around_step.rb +181 -0
  66. data/lib/prosereflect/transform/replace_step.rb +157 -0
  67. data/lib/prosereflect/transform/slice.rb +91 -0
  68. data/lib/prosereflect/transform/step.rb +89 -0
  69. data/lib/prosereflect/transform/step_map.rb +126 -0
  70. data/lib/prosereflect/transform/structure.rb +120 -0
  71. data/lib/prosereflect/transform/transform.rb +341 -0
  72. data/lib/prosereflect/transform.rb +26 -0
  73. data/lib/prosereflect/version.rb +1 -1
  74. data/lib/prosereflect.rb +3 -0
  75. data/spec/fixtures/documents/formatted_text.yaml +14 -0
  76. data/spec/fixtures/documents/heading_paragraph.yaml +16 -0
  77. data/spec/fixtures/documents/lists_doc.yaml +32 -0
  78. data/spec/fixtures/documents/mixed_content.yaml +40 -0
  79. data/spec/fixtures/documents/nested_doc.yaml +20 -0
  80. data/spec/fixtures/documents/simple_doc.yaml +6 -0
  81. data/spec/fixtures/documents/table_doc.yaml +32 -0
  82. data/spec/fixtures/documents/transform_test.yaml +14 -0
  83. data/spec/fixtures/schema/custom_schema.rb +37 -0
  84. data/spec/fixtures/schema/test_schema.rb +46 -0
  85. data/spec/fixtures/test_builder/helpers.rb +212 -0
  86. data/spec/prosereflect/document_spec.rb +1 -1
  87. data/spec/prosereflect/fragment_spec.rb +273 -0
  88. data/spec/prosereflect/input/html_spec.rb +197 -1
  89. data/spec/prosereflect/node_spec.rb +128 -0
  90. data/spec/prosereflect/output/whitespace_spec.rb +248 -0
  91. data/spec/prosereflect/parser/round_trip_spec.rb +472 -0
  92. data/spec/prosereflect/resolved_pos_spec.rb +74 -0
  93. data/spec/prosereflect/schema/conftest.rb +68 -0
  94. data/spec/prosereflect/schema/content_match_spec.rb +237 -0
  95. data/spec/prosereflect/schema/mark_spec.rb +274 -0
  96. data/spec/prosereflect/schema/mark_type_spec.rb +86 -0
  97. data/spec/prosereflect/schema/node_type_spec.rb +142 -0
  98. data/spec/prosereflect/schema/schema_spec.rb +194 -0
  99. data/spec/prosereflect/test_builder/marks_spec.rb +127 -0
  100. data/spec/prosereflect/transform/equivalence_spec.rb +487 -0
  101. data/spec/prosereflect/transform/mapping_spec.rb +226 -0
  102. data/spec/prosereflect/transform/replace_spec.rb +832 -0
  103. data/spec/prosereflect/transform/replace_step_spec.rb +157 -0
  104. data/spec/prosereflect/transform/slice_spec.rb +48 -0
  105. data/spec/prosereflect/transform/step_map_spec.rb +70 -0
  106. data/spec/prosereflect/transform/step_spec.rb +211 -0
  107. data/spec/prosereflect/transform/structure_spec.rb +98 -0
  108. data/spec/prosereflect/transform/transform_spec.rb +238 -0
  109. data/spec/spec_helper.rb +1 -0
  110. metadata +90 -2
@@ -35,6 +35,31 @@ module Prosereflect
35
35
  html
36
36
  end
37
37
 
38
+ # Render document with options
39
+ def render(document, options = {})
40
+ options = {
41
+ document: true,
42
+ text: ->(text, _marks) { text },
43
+ mark: ->(_mark, content) { content },
44
+ node: ->(_node, content) { content },
45
+ }.merge(options)
46
+
47
+ serializer = DOMSerializer.new(document.schema, options)
48
+ serializer.serialize(document)
49
+ end
50
+
51
+ # Render single node with marks
52
+ def render_node(node, options = {})
53
+ serializer = DOMSerializer.new(nil, options)
54
+ serializer.render_node(node)
55
+ end
56
+
57
+ # Render text with marks applied
58
+ def render_text(text, marks, options = {})
59
+ serializer = DOMSerializer.new(nil, options)
60
+ serializer.render_text(text, marks)
61
+ end
62
+
38
63
  private
39
64
 
40
65
  # Process a node and its children
@@ -372,5 +397,207 @@ module Prosereflect
372
397
  end
373
398
  end
374
399
  end
400
+
401
+ # DOMSerializer provides configurable document serialization to HTML
402
+ class DOMSerializer
403
+ attr_reader :schema, :options, :marks
404
+
405
+ def initialize(schema, options = {})
406
+ @schema = schema
407
+ @options = options
408
+ @marks = build_mark_serializers
409
+ end
410
+
411
+ def serialize(document)
412
+ render_node(document)
413
+ end
414
+
415
+ def serialize_node(node)
416
+ render_node(node)
417
+ end
418
+
419
+ def render_node(node)
420
+ return render_text(node.text, node.marks) if node.text?
421
+
422
+ builder = Nokogiri::HTML::Builder.new
423
+ render_node_to_builder(node, builder)
424
+ builder.doc.root.children.to_html
425
+ end
426
+
427
+ def render_node_to_builder(node, builder)
428
+ content = render_node_content(node)
429
+ wrap_node(node, content, builder)
430
+ end
431
+
432
+ def render_text(text, node_marks = nil)
433
+ marks_to_apply = node_marks || []
434
+ marks_to_apply.each do |mark|
435
+ text = apply_mark(mark, text)
436
+ end
437
+ text
438
+ end
439
+
440
+ def apply_mark(mark, content)
441
+ mark_handler = @marks[mark.type]
442
+ return content unless mark_handler
443
+
444
+ case mark.type
445
+ when "bold"
446
+ "<strong>#{content}</strong>"
447
+ when "italic"
448
+ "<em>#{content}</em>"
449
+ when "code"
450
+ "<code>#{content}</code>"
451
+ when "link"
452
+ href = extract_mark_attr(mark, "href")
453
+ "<a href=\"#{href}\">#{content}</a>"
454
+ when "strike"
455
+ "<del>#{content}</del>"
456
+ when "underline"
457
+ "<u>#{content}</u>"
458
+ when "subscript"
459
+ "<sub>#{content}</sub>"
460
+ when "superscript"
461
+ "<sup>#{content}</sup>"
462
+ else
463
+ content
464
+ end
465
+ end
466
+
467
+ private
468
+
469
+ def build_mark_serializers
470
+ return {} unless @schema
471
+
472
+ @schema.marks.transform_values do |_mark_type|
473
+ ->(mark, content) { apply_mark(mark, content) }
474
+ end
475
+ end
476
+
477
+ def extract_mark_attr(mark, attr_name)
478
+ return nil unless mark.respond_to?(:attrs)
479
+
480
+ attrs = mark.attrs
481
+ return nil unless attrs.is_a?(Hash)
482
+
483
+ attrs[attr_name]
484
+ end
485
+
486
+ def render_node_content(node)
487
+ return render_text(node.text, node.marks) if node.text?
488
+
489
+ children = node.content.map { |child| render_node(child) }.join
490
+ apply_node_marks(node, children)
491
+ end
492
+
493
+ def apply_node_marks(node, content)
494
+ return content unless node.marks && !node.marks.empty?
495
+
496
+ node.marks.reverse_each do |mark|
497
+ content = apply_mark(mark, content)
498
+ end
499
+ content
500
+ end
501
+
502
+ def wrap_node(node, content, builder)
503
+ tag_name = node_tag_name(node)
504
+ return builder << content unless tag_name
505
+
506
+ builder.tag(tag_name, wrap_attrs(node)) do
507
+ builder << content
508
+ end
509
+ end
510
+
511
+ def node_tag_name(node)
512
+ case node.type
513
+ when "paragraph" then "p"
514
+ when "heading" then "h#{node.attrs[:level] || 1}"
515
+ when "table" then "table"
516
+ when "table_row" then "tr"
517
+ when "table_cell" then "td"
518
+ when "table_header" then "th"
519
+ when "bullet_list" then "ul"
520
+ when "ordered_list" then "ol"
521
+ when "list_item" then "li"
522
+ when "blockquote" then "blockquote"
523
+ when "hard_break" then "br"
524
+ when "horizontal_rule" then "hr"
525
+ when "code_block_wrapper" then "pre"
526
+ when "code_block" then "code"
527
+ when "image" then "img"
528
+ when "doc", "text", "user"
529
+ nil
530
+ end
531
+ end
532
+
533
+ def wrap_attrs(node)
534
+ return nil unless node.respond_to?(:attrs) && node.attrs.is_a?(Hash)
535
+
536
+ attrs = {}
537
+ case node.type
538
+ when "image"
539
+ attrs[:src] = node.attrs["src"]
540
+ attrs[:alt] = node.attrs["alt"] if node.attrs["alt"]
541
+ attrs[:title] = node.attrs["title"] if node.attrs["title"]
542
+ when "ordered_list"
543
+ attrs[:start] = node.attrs["start"] if node.attrs["start"]
544
+ end
545
+ attrs.empty? ? nil : attrs
546
+ end
547
+
548
+ # Check if a node should preserve whitespace
549
+ # Nodes like <pre>, <textarea>, or nodes with style="white-space: pre" preserve whitespace
550
+ def preserve_whitespace?(node)
551
+ return false unless node.respond_to?(:type)
552
+
553
+ case node.type
554
+ when "code_block", "code_block_wrapper", "pre"
555
+ return true
556
+ end
557
+
558
+ # Check for white-space style in attrs
559
+ if node.respond_to?(:attrs) && node.attrs.is_a?(Hash)
560
+ style = node.attrs["style"]
561
+ if style.is_a?(String) && style.include?("white-space: pre")
562
+ return true
563
+ end
564
+ end
565
+
566
+ false
567
+ end
568
+
569
+ # Determine how whitespace should be collapsed for a node
570
+ # Returns a symbol: :preserve, :collapse, :normalize
571
+ def whitespace_mode(node)
572
+ if preserve_whitespace?(node)
573
+ :preserve
574
+ else
575
+ :collapse
576
+ end
577
+ end
578
+
579
+ # Collapse multiple spaces into one
580
+ def collapse_whitespace(text)
581
+ text.gsub(/[ \t]+/, " ")
582
+ end
583
+
584
+ # Normalize whitespace (replace tabs/newlines with spaces, collapse multiple spaces)
585
+ def normalize_whitespace(text)
586
+ text.gsub(/[\t \n\r]+/, " ")
587
+ end
588
+
589
+ # Process text content with appropriate whitespace handling
590
+ def process_text_whitespace(text, node)
591
+ mode = whitespace_mode(node)
592
+ case mode
593
+ when :preserve
594
+ text
595
+ when :normalize
596
+ normalize_whitespace(text)
597
+ else
598
+ collapse_whitespace(text)
599
+ end
600
+ end
601
+ end
375
602
  end
376
603
  end
@@ -48,6 +48,10 @@ module Prosereflect
48
48
  HorizontalRule
49
49
  when "image"
50
50
  Image
51
+ when "code_block"
52
+ CodeBlock
53
+ when "code_block_wrapper"
54
+ CodeBlockWrapper
51
55
  when "user"
52
56
  User
53
57
  else
@@ -95,6 +99,11 @@ module Prosereflect
95
99
  node.abbr = attrs["abbr"] if attrs["abbr"]
96
100
  node.colspan = attrs["colspan"] if attrs["colspan"]
97
101
  end
102
+ when "code_block"
103
+ if attrs
104
+ node.language = attrs["language"] if attrs["language"]
105
+ node.line_numbers = attrs["line_numbers"] if attrs["line_numbers"]
106
+ end
98
107
  end
99
108
 
100
109
  node.marks = marks_data if marks_data && !marks_data.empty?
@@ -0,0 +1,256 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prosereflect
4
+ # ResolvedPos represents a document position that has been resolved
5
+ # to a specific location in the document tree.
6
+ #
7
+ # The path array contains: [parent_node, index, start, parent_node, index, start, ...]
8
+ # depth 0 = before any nodes, depth N = inside node at path[N*2]
9
+ class ResolvedPos
10
+ attr_reader :pos, :path, :depth
11
+
12
+ def initialize(pos, path, depth)
13
+ @pos = pos
14
+ @path = path
15
+ @depth = depth
16
+ @parent_offset = nil
17
+ end
18
+
19
+ # The parent node at current depth
20
+ def parent
21
+ @path[@depth * 3]
22
+ end
23
+
24
+ # Index within parent
25
+ def index(depth = @depth)
26
+ @path[(depth * 3) + 1]
27
+ end
28
+
29
+ # Start position of current parent node
30
+ def start(depth = @depth)
31
+ @path[(depth * 3) + 2]
32
+ end
33
+
34
+ # End position of current parent node
35
+ def end_(depth = @depth)
36
+ start(depth) + parent.content.size
37
+ end
38
+
39
+ # The node at a given depth
40
+ def node(depth = @depth)
41
+ @path[depth * 3]
42
+ end
43
+
44
+ # Position within the parent node
45
+ def parent_offset
46
+ @parent_offset ||= @pos - start
47
+ end
48
+
49
+ # Marks at this position
50
+ def marks
51
+ if depth.zero?
52
+ # At root - no marks
53
+ []
54
+ else
55
+ parent_mark = parent.respond_to?(:marks) ? parent.marks : []
56
+ parent_mark || []
57
+ end
58
+ end
59
+
60
+ # Marks between two positions
61
+ def marks_between(from, to, marks)
62
+ result = marks.dup
63
+ nodes_between(from, to) do |node|
64
+ if node.respond_to?(:marks) && node.marks
65
+ result = result | node.marks
66
+ end
67
+ end
68
+ result
69
+ end
70
+
71
+ # Find shared depth with another position
72
+ def shared_depth(other_pos)
73
+ my_depth = depth
74
+ other_depth = other_pos.depth
75
+
76
+ while my_depth > other_depth
77
+ my_depth -= 1
78
+ end
79
+
80
+ while other_depth > my_depth
81
+ other_depth -= 1
82
+ end
83
+
84
+ while my_depth.positive?
85
+ break unless index(my_depth) == other_pos.index(my_depth)
86
+
87
+ my_depth -= 1
88
+
89
+ end
90
+
91
+ my_depth
92
+ end
93
+
94
+ # Get block range to another position
95
+ def block_range(other_pos = nil)
96
+ other_pos ||= self
97
+ NodeRange.new(self, other_pos)
98
+ end
99
+
100
+ # Check if at block boundary
101
+ def block?
102
+ parent.respond_to?(:is_block?) && parent.is_block?
103
+ end
104
+
105
+ # Check if at inline boundary
106
+ def inline?
107
+ !block?
108
+ end
109
+
110
+ # Check if in text block
111
+ def text_block?
112
+ parent.respond_to?(:is_textblock?) && parent.is_textblock?
113
+ end
114
+
115
+ # Check if at start of parent
116
+ def start_of_parent?
117
+ parent_offset.zero?
118
+ end
119
+
120
+ # Check if at end of parent
121
+ def end_of_parent?
122
+ parent_offset >= parent.content.size - 1
123
+ end
124
+
125
+ # Get position before current node
126
+ def before?
127
+ if depth.zero?
128
+ @pos.zero?
129
+ else
130
+ index.zero?
131
+ end
132
+ end
133
+
134
+ # Get position after current node
135
+ def after?
136
+ if depth.zero?
137
+ @pos >= 0
138
+ else
139
+ index >= parent.content.size
140
+ end
141
+ end
142
+
143
+ def eq?(other)
144
+ return false unless other.is_a?(ResolvedPos)
145
+
146
+ @pos == other.pos && @depth == other.depth
147
+ end
148
+
149
+ alias == eq?
150
+
151
+ def hash
152
+ [@pos, @depth].hash
153
+ end
154
+
155
+ def to_s
156
+ "<ResolvedPos #{@pos}:#{depth}>"
157
+ end
158
+
159
+ def inspect
160
+ to_s
161
+ end
162
+
163
+ private
164
+
165
+ def nodes_between(from, to, &block)
166
+ return unless to > from
167
+
168
+ depth.times do |d|
169
+ node = node(d)
170
+ node.nodes_between(from, to, &block) if node.respond_to?(:nodes_between)
171
+ end
172
+ end
173
+ end
174
+
175
+ # NodeRange represents a range between two resolved positions
176
+ class NodeRange
177
+ attr_reader :start, :end_
178
+
179
+ alias end end_
180
+
181
+ def initialize(start_resolved, end_resolved)
182
+ @start = start_resolved
183
+ @end_ = end_resolved
184
+ end
185
+
186
+ # Content fragment between start and end
187
+ def content
188
+ # Would extract the fragment
189
+ Fragment.new([])
190
+ end
191
+
192
+ # Nodes within this range
193
+ def nodes
194
+ result = []
195
+ start.node.nodes_between(start.pos, end_.pos) { |n| result << n }
196
+ result
197
+ end
198
+
199
+ def to_s
200
+ "<NodeRange #{start.pos}:#{end_.pos}>"
201
+ end
202
+
203
+ def inspect
204
+ to_s
205
+ end
206
+ end
207
+
208
+ # Extension to Node for position resolution
209
+ class Node
210
+ # Resolve a position to a ResolvedPos
211
+ def resolve(pos)
212
+ path = []
213
+ build_path_for_pos(pos, path)
214
+ depth = [(path.length / 3) - 1, 0].max
215
+ ResolvedPos.new(pos, path, depth)
216
+ end
217
+
218
+ private
219
+
220
+ def find_block_depth(common_depth)
221
+ block_depth = common_depth
222
+ while block_depth.positive?
223
+ current_node = node(block_depth)
224
+ break if current_node.respond_to?(:is_block?) && current_node.is_block?
225
+
226
+ block_depth -= 1
227
+ end
228
+ block_depth
229
+ end
230
+
231
+ def build_path_for_pos(pos, path, index = 0, start_offset = 0)
232
+ path << self << index << start_offset
233
+ return if pos.zero?
234
+
235
+ traverse_children_for_resolve(pos, path)
236
+ end
237
+
238
+ def traverse_children_for_resolve(pos, path)
239
+ return unless content
240
+
241
+ content_offset = 1
242
+ child_index = 0
243
+
244
+ content.each do |child|
245
+ child_end = content_offset + child.node_size
246
+ if pos < child_end
247
+ child.send(:build_path_for_pos, pos - content_offset, path, child_index, content_offset)
248
+ return
249
+ end
250
+
251
+ content_offset = child_end
252
+ child_index += 1
253
+ end
254
+ end
255
+ end
256
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prosereflect
4
+ class Schema
5
+ class Attribute
6
+ attr_reader :name, :default
7
+
8
+ def initialize(name:, default: nil, validate: nil)
9
+ @name = name
10
+ @default = default
11
+ @validate = validate
12
+ end
13
+
14
+ def has_default?
15
+ !@default.nil?
16
+ end
17
+
18
+ def required?
19
+ !has_default?
20
+ end
21
+
22
+ def validate_value(value)
23
+ return true if @validate.nil?
24
+ return @validate.call(value) if @validate.respond_to?(:call)
25
+
26
+ # Handle string-based type validation like "string", "number", "string|null"
27
+ validate_type(value, @validate.to_s)
28
+ end
29
+
30
+ private
31
+
32
+ def validate_type(value, type_str)
33
+ types = type_str.split("|")
34
+ actual_type = get_type_name(value)
35
+
36
+ unless types.include?(actual_type)
37
+ raise ::Prosereflect::SchemaErrors::ValidationError,
38
+ "Expected value of type #{types} for attribute #{@name}, got #{actual_type}"
39
+ end
40
+ true
41
+ end
42
+
43
+ def get_type_name(value)
44
+ case value
45
+ when nil then "null"
46
+ when String then "string"
47
+ when Integer, Float then "number"
48
+ when TrueClass, FalseClass then "boolean"
49
+ when Hash then "object"
50
+ when Array then "object"
51
+ else
52
+ value.class.name.downcase
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end