prosereflect 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/docs.yml +63 -0
  3. data/.github/workflows/links.yml +97 -0
  4. data/.github/workflows/rake.yml +4 -0
  5. data/.github/workflows/release.yml +5 -0
  6. data/.gitignore +4 -0
  7. data/.rubocop.yml +19 -1
  8. data/.rubocop_todo.yml +119 -183
  9. data/CLAUDE.md +78 -0
  10. data/Gemfile +8 -4
  11. data/README.adoc +2 -0
  12. data/Rakefile +3 -3
  13. data/docs/Gemfile +10 -0
  14. data/docs/INDEX.adoc +45 -0
  15. data/docs/_advanced/index.adoc +15 -0
  16. data/docs/_advanced/schema.adoc +112 -0
  17. data/docs/_advanced/step-map.adoc +66 -0
  18. data/docs/_advanced/steps.adoc +88 -0
  19. data/docs/_advanced/test-builder.adoc +61 -0
  20. data/docs/_advanced/transform.adoc +92 -0
  21. data/docs/_config.yml +174 -0
  22. data/docs/_features/html-input.adoc +69 -0
  23. data/docs/_features/html-output.adoc +45 -0
  24. data/docs/_features/index.adoc +15 -0
  25. data/docs/_features/marks.adoc +86 -0
  26. data/docs/_features/node-types.adoc +124 -0
  27. data/docs/_features/user-mentions.adoc +47 -0
  28. data/docs/_guides/custom-nodes.adoc +107 -0
  29. data/docs/_guides/index.adoc +13 -0
  30. data/docs/_guides/round-trip-html.adoc +91 -0
  31. data/docs/_guides/serialization.adoc +109 -0
  32. data/docs/_pages/index.adoc +67 -0
  33. data/docs/_reference/document-api.adoc +49 -0
  34. data/docs/_reference/index.adoc +14 -0
  35. data/docs/_reference/node-api.adoc +79 -0
  36. data/docs/_reference/schema-api.adoc +95 -0
  37. data/docs/_reference/transform-api.adoc +77 -0
  38. data/docs/_understanding/document-model.adoc +65 -0
  39. data/docs/_understanding/fragment.adoc +52 -0
  40. data/docs/_understanding/index.adoc +14 -0
  41. data/docs/_understanding/resolved-position.adoc +53 -0
  42. data/docs/_understanding/slice.adoc +54 -0
  43. data/docs/lychee.toml +63 -0
  44. data/lib/prosereflect/attribute/base.rb +4 -6
  45. data/lib/prosereflect/attribute/bold.rb +2 -4
  46. data/lib/prosereflect/attribute/href.rb +1 -3
  47. data/lib/prosereflect/attribute/id.rb +7 -7
  48. data/lib/prosereflect/attribute.rb +4 -7
  49. data/lib/prosereflect/blockquote.rb +19 -11
  50. data/lib/prosereflect/bullet_list.rb +36 -29
  51. data/lib/prosereflect/code_block.rb +23 -27
  52. data/lib/prosereflect/code_block_wrapper.rb +12 -13
  53. data/lib/prosereflect/document.rb +14 -22
  54. data/lib/prosereflect/fragment.rb +249 -0
  55. data/lib/prosereflect/hard_break.rb +6 -6
  56. data/lib/prosereflect/heading.rb +14 -15
  57. data/lib/prosereflect/horizontal_rule.rb +23 -14
  58. data/lib/prosereflect/image.rb +32 -23
  59. data/lib/prosereflect/input/html.rb +179 -104
  60. data/lib/prosereflect/input.rb +7 -0
  61. data/lib/prosereflect/list_item.rb +11 -12
  62. data/lib/prosereflect/mark/base.rb +9 -11
  63. data/lib/prosereflect/mark/bold.rb +1 -3
  64. data/lib/prosereflect/mark/code.rb +1 -3
  65. data/lib/prosereflect/mark/italic.rb +1 -3
  66. data/lib/prosereflect/mark/link.rb +1 -3
  67. data/lib/prosereflect/mark/strike.rb +1 -3
  68. data/lib/prosereflect/mark/subscript.rb +1 -3
  69. data/lib/prosereflect/mark/superscript.rb +1 -3
  70. data/lib/prosereflect/mark/underline.rb +1 -3
  71. data/lib/prosereflect/mark.rb +9 -5
  72. data/lib/prosereflect/node.rb +171 -33
  73. data/lib/prosereflect/ordered_list.rb +17 -14
  74. data/lib/prosereflect/output/html.rb +279 -50
  75. data/lib/prosereflect/output.rb +7 -0
  76. data/lib/prosereflect/paragraph.rb +11 -13
  77. data/lib/prosereflect/parser.rb +56 -66
  78. data/lib/prosereflect/resolved_pos.rb +256 -0
  79. data/lib/prosereflect/schema/attribute.rb +57 -0
  80. data/lib/prosereflect/schema/content_match.rb +656 -0
  81. data/lib/prosereflect/schema/fragment.rb +166 -0
  82. data/lib/prosereflect/schema/mark.rb +121 -0
  83. data/lib/prosereflect/schema/mark_type.rb +130 -0
  84. data/lib/prosereflect/schema/node.rb +236 -0
  85. data/lib/prosereflect/schema/node_type.rb +274 -0
  86. data/lib/prosereflect/schema/schema_main.rb +190 -0
  87. data/lib/prosereflect/schema/spec.rb +92 -0
  88. data/lib/prosereflect/schema.rb +39 -0
  89. data/lib/prosereflect/table.rb +12 -13
  90. data/lib/prosereflect/table_cell.rb +13 -13
  91. data/lib/prosereflect/table_header.rb +17 -17
  92. data/lib/prosereflect/table_row.rb +12 -12
  93. data/lib/prosereflect/text.rb +35 -11
  94. data/lib/prosereflect/transform/attr_step.rb +157 -0
  95. data/lib/prosereflect/transform/insert_step.rb +115 -0
  96. data/lib/prosereflect/transform/mapping.rb +82 -0
  97. data/lib/prosereflect/transform/mark_step.rb +269 -0
  98. data/lib/prosereflect/transform/replace_around_step.rb +181 -0
  99. data/lib/prosereflect/transform/replace_step.rb +157 -0
  100. data/lib/prosereflect/transform/slice.rb +91 -0
  101. data/lib/prosereflect/transform/step.rb +89 -0
  102. data/lib/prosereflect/transform/step_map.rb +126 -0
  103. data/lib/prosereflect/transform/structure.rb +120 -0
  104. data/lib/prosereflect/transform/transform.rb +341 -0
  105. data/lib/prosereflect/transform.rb +26 -0
  106. data/lib/prosereflect/user.rb +15 -15
  107. data/lib/prosereflect/version.rb +1 -1
  108. data/lib/prosereflect.rb +30 -17
  109. data/prosereflect.gemspec +17 -16
  110. data/spec/fixtures/documents/formatted_text.yaml +14 -0
  111. data/spec/fixtures/documents/heading_paragraph.yaml +16 -0
  112. data/spec/fixtures/documents/lists_doc.yaml +32 -0
  113. data/spec/fixtures/documents/mixed_content.yaml +40 -0
  114. data/spec/fixtures/documents/nested_doc.yaml +20 -0
  115. data/spec/fixtures/documents/simple_doc.yaml +6 -0
  116. data/spec/fixtures/documents/table_doc.yaml +32 -0
  117. data/spec/fixtures/documents/transform_test.yaml +14 -0
  118. data/spec/fixtures/schema/custom_schema.rb +37 -0
  119. data/spec/fixtures/schema/test_schema.rb +46 -0
  120. data/spec/fixtures/test_builder/helpers.rb +212 -0
  121. data/spec/prosereflect/document_spec.rb +332 -330
  122. data/spec/prosereflect/fragment_spec.rb +273 -0
  123. data/spec/prosereflect/hard_break_spec.rb +125 -125
  124. data/spec/prosereflect/input/html_spec.rb +718 -522
  125. data/spec/prosereflect/node_spec.rb +311 -182
  126. data/spec/prosereflect/output/html_spec.rb +105 -105
  127. data/spec/prosereflect/output/whitespace_spec.rb +248 -0
  128. data/spec/prosereflect/paragraph_spec.rb +275 -274
  129. data/spec/prosereflect/parser/round_trip_spec.rb +472 -0
  130. data/spec/prosereflect/parser_spec.rb +185 -180
  131. data/spec/prosereflect/resolved_pos_spec.rb +74 -0
  132. data/spec/prosereflect/schema/conftest.rb +68 -0
  133. data/spec/prosereflect/schema/content_match_spec.rb +237 -0
  134. data/spec/prosereflect/schema/mark_spec.rb +274 -0
  135. data/spec/prosereflect/schema/mark_type_spec.rb +86 -0
  136. data/spec/prosereflect/schema/node_type_spec.rb +142 -0
  137. data/spec/prosereflect/schema/schema_spec.rb +194 -0
  138. data/spec/prosereflect/table_cell_spec.rb +183 -183
  139. data/spec/prosereflect/table_row_spec.rb +149 -149
  140. data/spec/prosereflect/table_spec.rb +320 -318
  141. data/spec/prosereflect/test_builder/marks_spec.rb +127 -0
  142. data/spec/prosereflect/text_spec.rb +133 -132
  143. data/spec/prosereflect/transform/equivalence_spec.rb +487 -0
  144. data/spec/prosereflect/transform/mapping_spec.rb +226 -0
  145. data/spec/prosereflect/transform/replace_spec.rb +832 -0
  146. data/spec/prosereflect/transform/replace_step_spec.rb +157 -0
  147. data/spec/prosereflect/transform/slice_spec.rb +48 -0
  148. data/spec/prosereflect/transform/step_map_spec.rb +70 -0
  149. data/spec/prosereflect/transform/step_spec.rb +211 -0
  150. data/spec/prosereflect/transform/structure_spec.rb +98 -0
  151. data/spec/prosereflect/transform/transform_spec.rb +238 -0
  152. data/spec/prosereflect/user_spec.rb +31 -28
  153. data/spec/prosereflect_spec.rb +28 -26
  154. data/spec/spec_helper.rb +7 -6
  155. data/spec/support/matchers.rb +6 -6
  156. data/spec/support/shared_examples.rb +49 -49
  157. metadata +96 -5
  158. data/spec/prosereflect/version_spec.rb +0 -11
@@ -1,33 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'nokogiri'
4
- require_relative '../document'
5
- require_relative '../paragraph'
6
- require_relative '../text'
7
- require_relative '../table'
8
- require_relative '../table_row'
9
- require_relative '../table_cell'
10
- require_relative '../table_header'
11
- require_relative '../hard_break'
12
- require_relative '../mark/bold'
13
- require_relative '../mark/italic'
14
- require_relative '../mark/code'
15
- require_relative '../mark/link'
16
- require_relative '../mark/strike'
17
- require_relative '../mark/subscript'
18
- require_relative '../mark/superscript'
19
- require_relative '../mark/underline'
20
- require_relative '../attribute/href'
21
- require_relative '../ordered_list'
22
- require_relative '../bullet_list'
23
- require_relative '../list_item'
24
- require_relative '../blockquote'
25
- require_relative '../horizontal_rule'
26
- require_relative '../image'
27
- require_relative '../code_block_wrapper'
28
- require_relative '../code_block'
29
- require_relative '../heading'
30
- require_relative '../user'
3
+ require "nokogiri"
31
4
 
32
5
  module Prosereflect
33
6
  module Input
@@ -38,7 +11,7 @@ module Prosereflect
38
11
  html_doc = Nokogiri::HTML(html)
39
12
  document = Document.create # Use create instead of new to initialize content array
40
13
 
41
- content_node = html_doc.at_css('body') || html_doc.root
14
+ content_node = html_doc.at_css("body") || html_doc.root
42
15
 
43
16
  # Process all child nodes
44
17
  process_node_children(content_node, document)
@@ -65,43 +38,43 @@ module Prosereflect
65
38
 
66
39
  # Convert an HTML node to a ProseMirror node
67
40
  def convert_node(html_node)
68
- return nil if html_node.comment? || html_node.text? && html_node.text.strip.empty?
41
+ return nil if html_node.comment? || (html_node.text? && html_node.text.strip.empty?)
69
42
 
70
43
  case html_node.name
71
- when 'text', '#text'
44
+ when "text", "#text"
72
45
  create_text_node(html_node)
73
- when 'p'
46
+ when "p"
74
47
  create_paragraph_node(html_node)
75
48
  when /^h([1-6])$/
76
49
  create_heading_node(html_node, Regexp.last_match(1).to_i)
77
- when 'br'
50
+ when "br"
78
51
  HardBreak.new
79
- when 'table'
52
+ when "table"
80
53
  create_table_node(html_node)
81
- when 'tr'
54
+ when "tr"
82
55
  create_table_row_node(html_node)
83
- when 'th', 'td'
56
+ when "th", "td"
84
57
  create_table_cell_node(html_node)
85
- when 'ol'
58
+ when "ol"
86
59
  create_ordered_list_node(html_node)
87
- when 'ul'
60
+ when "ul"
88
61
  create_bullet_list_node(html_node)
89
- when 'li'
62
+ when "li"
90
63
  create_list_item_node(html_node)
91
- when 'blockquote'
64
+ when "blockquote"
92
65
  create_blockquote_node(html_node)
93
- when 'hr'
66
+ when "hr"
94
67
  create_horizontal_rule_node(html_node)
95
- when 'img'
68
+ when "img"
96
69
  create_image_node(html_node)
97
- when 'user-mention'
70
+ when "user-mention"
98
71
  create_user_node(html_node)
99
- when 'div', 'span'
72
+ when "div", "span"
100
73
  # For containers, we process their children
101
74
  handle_container_node(html_node)
102
- when 'pre'
75
+ when "pre"
103
76
  create_code_block_wrapper(html_node)
104
- when 'strong', 'b', 'em', 'i', 'code', 'a', 'strike', 's', 'del', 'sub', 'sup', 'u'
77
+ when "strong", "b", "em", "i", "code", "a", "strike", "s", "del", "sub", "sup", "u"
105
78
  # For inline elements with text styling, we handle differently
106
79
  handle_styled_text(html_node)
107
80
  else
@@ -126,13 +99,13 @@ module Prosereflect
126
99
  def create_table_node(html_node)
127
100
  table = Table.new
128
101
 
129
- thead = html_node.at_css('thead')
130
- thead&.css('tr')&.each do |tr|
102
+ thead = html_node.at_css("thead")
103
+ thead&.css("tr")&.each do |tr|
131
104
  process_table_row(tr, table, true)
132
105
  end
133
106
 
134
- tbody = html_node.at_css('tbody') || html_node
135
- tbody.css('tr').each do |tr|
107
+ tbody = html_node.at_css("tbody") || html_node
108
+ tbody.css("tr").each do |tr|
136
109
  process_table_row(tr, table, false)
137
110
  end
138
111
 
@@ -142,7 +115,7 @@ module Prosereflect
142
115
  # Process a table row
143
116
  def create_table_row_node(html_node)
144
117
  row = TableRow.new
145
- html_node.css('th, td').each do |cell|
118
+ html_node.css("th, td").each do |cell|
146
119
  row.add_child(create_table_cell_node(cell))
147
120
  end
148
121
  row
@@ -157,13 +130,13 @@ module Prosereflect
157
130
  # Create a table cell node from HTML cell
158
131
  def create_table_cell_node(html_node)
159
132
  # Create either a TableHeader or TableCell based on the tag name
160
- cell = if html_node.name == 'th'
133
+ cell = if html_node.name == "th"
161
134
  header = TableHeader.create
162
135
 
163
136
  # Handle header-specific attributes
164
- header.scope = html_node['scope'] if html_node['scope']
165
- header.abbr = html_node['abbr'] if html_node['abbr']
166
- header.colspan = html_node['colspan'] if html_node['colspan']
137
+ header.scope = html_node["scope"] if html_node["scope"]
138
+ header.abbr = html_node["abbr"] if html_node["abbr"]
139
+ header.colspan = html_node["colspan"] if html_node["colspan"]
167
140
 
168
141
  header
169
142
  else
@@ -184,7 +157,7 @@ module Prosereflect
184
157
  # Handle a container-like node (div, span, etc.)
185
158
  def handle_container_node(html_node)
186
159
  # For top-level divs, process children directly
187
- if html_node.name == 'div'
160
+ if html_node.name == "div"
188
161
  results = []
189
162
  html_node.children.each do |child|
190
163
  next if child.text? && child.text.strip.empty?
@@ -224,38 +197,38 @@ module Prosereflect
224
197
  def handle_styled_text(html_node)
225
198
  # Create mark based on the current node
226
199
  mark = case html_node.name
227
- when 'strong', 'b'
200
+ when "strong", "b"
228
201
  mark = Mark::Bold.new
229
- mark.type = 'bold'
202
+ mark.type = "bold"
230
203
  mark
231
- when 'em', 'i'
204
+ when "em", "i"
232
205
  mark = Mark::Italic.new
233
- mark.type = 'italic'
206
+ mark.type = "italic"
234
207
  mark
235
- when 'code'
208
+ when "code"
236
209
  mark = Mark::Code.new
237
- mark.type = 'code'
210
+ mark.type = "code"
238
211
  mark
239
- when 'a'
212
+ when "a"
240
213
  mark = Mark::Link.new
241
- mark.type = 'link'
242
- mark.attrs = { 'href' => html_node['href'] } if html_node['href']
214
+ mark.type = "link"
215
+ mark.attrs = { "href" => html_node["href"] } if html_node["href"]
243
216
  mark
244
- when 'strike', 's', 'del'
217
+ when "strike", "s", "del"
245
218
  mark = Mark::Strike.new
246
- mark.type = 'strike'
219
+ mark.type = "strike"
247
220
  mark
248
- when 'sub'
221
+ when "sub"
249
222
  mark = Mark::Subscript.new
250
- mark.type = 'subscript'
223
+ mark.type = "subscript"
251
224
  mark
252
- when 'sup'
225
+ when "sup"
253
226
  mark = Mark::Superscript.new
254
- mark.type = 'superscript'
227
+ mark.type = "superscript"
255
228
  mark
256
- when 'u'
229
+ when "u"
257
230
  mark = Mark::Underline.new
258
- mark.type = 'underline'
231
+ mark.type = "underline"
259
232
  mark
260
233
  end
261
234
 
@@ -292,7 +265,8 @@ module Prosereflect
292
265
  def contains_only_text_or_inline(node)
293
266
  node.children.all? do |child|
294
267
  child.text? ||
295
- %w[strong b em i code a br span strike s del sub sup u].include?(child.name) ||
268
+ %w[strong b em i code a br span strike s del sub sup
269
+ u].include?(child.name) ||
296
270
  (child.element? && contains_only_text_or_inline(child))
297
271
  end
298
272
  end
@@ -302,11 +276,11 @@ module Prosereflect
302
276
  list = OrderedList.new
303
277
 
304
278
  # Handle start attribute
305
- start_val = (html_node['start'] || '1').to_i
279
+ start_val = (html_node["start"] || "1").to_i
306
280
  list.start = start_val
307
281
 
308
282
  # Process list items
309
- html_node.css('> li').each do |li|
283
+ html_node.css("> li").each do |li|
310
284
  list.add_child(create_list_item_node(li))
311
285
  end
312
286
 
@@ -319,11 +293,11 @@ module Prosereflect
319
293
  list.bullet_style = nil
320
294
 
321
295
  # Handle style attribute if present
322
- if html_node['style']&.include?('list-style-type')
323
- style = case html_node['style']
324
- when /disc/ then 'disc'
325
- when /circle/ then 'circle'
326
- when /square/ then 'square'
296
+ if html_node["style"]&.include?("list-style-type")
297
+ style = case html_node["style"]
298
+ when /disc/ then "disc"
299
+ when /circle/ then "circle"
300
+ when /square/ then "square"
327
301
  end
328
302
  list.bullet_style = style
329
303
  end
@@ -337,7 +311,9 @@ module Prosereflect
337
311
  item = ListItem.new
338
312
 
339
313
  # Handle text content first
340
- text_content = html_node.children.select { |child| child.text? || inline_element?(child) }
314
+ text_content = html_node.children.select do |child|
315
+ child.text? || inline_element?(child)
316
+ end
341
317
  if text_content.any?
342
318
  paragraph = Paragraph.new
343
319
  text_content.each do |child|
@@ -348,7 +324,9 @@ module Prosereflect
348
324
  end
349
325
 
350
326
  # Handle nested content
351
- html_node.children.reject { |child| child.text? || inline_element?(child) }.each do |child|
327
+ html_node.children.reject do |child|
328
+ child.text? || inline_element?(child)
329
+ end.each do |child|
352
330
  node = convert_node(child)
353
331
  if node.is_a?(Array)
354
332
  node.each { |n| item.add_content(n) }
@@ -364,7 +342,8 @@ module Prosereflect
364
342
  def inline_element?(node)
365
343
  return false unless node.element?
366
344
 
367
- %w[strong b em i code a br span strike s del sub sup u].include?(node.name)
345
+ %w[strong b em i code a br span strike s del sub sup
346
+ u].include?(node.name)
368
347
  end
369
348
 
370
349
  # Create a blockquote node from HTML blockquote
@@ -372,7 +351,7 @@ module Prosereflect
372
351
  quote = Blockquote.new
373
352
 
374
353
  # Handle cite attribute if present
375
- quote.citation = html_node['cite'] if html_node['cite']
354
+ quote.citation = html_node["cite"] if html_node["cite"]
376
355
 
377
356
  # Process each child separately to maintain block structure
378
357
  html_node.children.each do |child|
@@ -401,8 +380,8 @@ module Prosereflect
401
380
  hr = HorizontalRule.new
402
381
 
403
382
  # Handle style attributes if present
404
- if html_node['style']
405
- style = html_node['style']
383
+ if html_node["style"]
384
+ style = html_node["style"]
406
385
 
407
386
  # Parse border-style
408
387
  hr.style = Regexp.last_match(1) if style =~ /border-style:\s*(solid|dashed|dotted)/
@@ -420,20 +399,20 @@ module Prosereflect
420
399
  # Create an image node from HTML img
421
400
  def create_image_node(html_node)
422
401
  # Skip images without src
423
- return nil unless html_node['src']
402
+ return nil unless html_node["src"]
424
403
 
425
404
  image = Image.new
426
405
 
427
406
  # Handle required src attribute
428
- image.src = html_node['src']
407
+ image.src = html_node["src"]
429
408
 
430
409
  # Handle optional attributes
431
- image.alt = html_node['alt'] if html_node['alt']
432
- image.title = html_node['title'] if html_node['title']
410
+ image.alt = html_node["alt"] if html_node["alt"]
411
+ image.title = html_node["title"] if html_node["title"]
433
412
 
434
413
  # Handle dimensions
435
- width = html_node['width']&.to_i
436
- height = html_node['height']&.to_i
414
+ width = html_node["width"]&.to_i
415
+ height = html_node["height"]&.to_i
437
416
  image.dimensions = [width, height] if width || height
438
417
 
439
418
  image
@@ -443,17 +422,17 @@ module Prosereflect
443
422
  def create_code_block_wrapper(html_node)
444
423
  wrapper = CodeBlockWrapper.new
445
424
  wrapper.attrs = {
446
- 'line_numbers' => false
425
+ "line_numbers" => false,
447
426
  }
448
427
 
449
- code_node = html_node.at_css('code')
428
+ code_node = html_node.at_css("code")
450
429
  if code_node
451
430
  block = create_code_block(code_node)
452
431
  wrapper.add_child(block)
453
432
  end
454
433
 
455
- wrapper.to_h['attrs'] = {
456
- 'line_numbers' => false
434
+ wrapper.to_h["attrs"] = {
435
+ "line_numbers" => false,
457
436
  }
458
437
  wrapper
459
438
  end
@@ -465,9 +444,9 @@ module Prosereflect
465
444
  language = extract_language(html_node)
466
445
 
467
446
  block.attrs = {
468
- 'content' => content,
469
- 'language' => language,
470
- 'line_numbers' => nil
447
+ "content" => content,
448
+ "language" => language,
449
+ "line_numbers" => nil,
471
450
  }
472
451
  block.content = content
473
452
 
@@ -475,9 +454,9 @@ module Prosereflect
475
454
  end
476
455
 
477
456
  def extract_language(html_node)
478
- return nil unless html_node['class']
457
+ return nil unless html_node["class"]
479
458
 
480
- return unless html_node['class'] =~ /language-(\w+)/
459
+ return unless html_node["class"] =~ /language-(\w+)/
481
460
 
482
461
  Regexp.last_match(1)
483
462
  end
@@ -493,13 +472,109 @@ module Prosereflect
493
472
  # Create a user mention node from HTML user-mention element
494
473
  def create_user_node(html_node)
495
474
  # Skip user mentions without data-id
496
- return nil unless html_node['data-id']
475
+ return nil unless html_node["data-id"]
497
476
 
498
477
  user = User.new
499
- user.id = html_node['data-id']
478
+ user.id = html_node["data-id"]
500
479
  user
501
480
  end
481
+
482
+ # Parse HTML with full schema validation
483
+ def parse_with_schema(html, schema, _rules = {})
484
+ document = parse(html)
485
+ validate_against_schema(document, schema)
486
+ document
487
+ rescue ValidationError
488
+ # Fall back to basic parsing if validation fails
489
+ document
490
+ end
491
+
492
+ # Parse HTML with custom parse rules
493
+ def parse_with_rules(html, rules:)
494
+ options = {
495
+ keep_empty: rules[:keep_empty] || false,
496
+ find_wrapping: rules[:find_wrapping],
497
+ top_node: rules[:top_node],
498
+ top_start: rules[:top_start],
499
+ }.merge(rules)
500
+
501
+ document = parse(html)
502
+ apply_parse_rules(document, options)
503
+ end
504
+
505
+ # Parse a single node with context
506
+ def parse_node(html_node, options = {})
507
+ parent_node = options[:node]
508
+ saved_styles = options[:saved_styles] || []
509
+ top_node = options[:top_node] || false
510
+ clear_null = options.fetch(:clear_null, true)
511
+
512
+ node = convert_node(html_node)
513
+ return nil if clear_null && node.nil?
514
+
515
+ apply_node_options(node, parent_node, saved_styles, top_node)
516
+ end
517
+
518
+ # Check if whitespace should be preserved in node
519
+ def preserve_whitespace?(node)
520
+ return true if node.name == "pre"
521
+ return true if node.name == "textarea"
522
+
523
+ style = node["style"]
524
+ return false unless style
525
+
526
+ style.include?("white-space") && style.include?("pre")
527
+ end
528
+
529
+ # Determine space collapsing behavior
530
+ def collapsed_spaces(node)
531
+ return :preserve if preserve_whitespace?(node)
532
+ return :collapse if node.name == "br"
533
+
534
+ :collapse
535
+ end
536
+
537
+ # Normalize whitespace in text
538
+ def normalize_whitespace(text)
539
+ text.gsub(/\s+/, " ").strip
540
+ end
541
+
542
+ def validate_against_schema(document, schema)
543
+ # Basic schema validation
544
+ document.nodes.each do |node|
545
+ validate_node_against_schema(node, schema)
546
+ end
547
+ end
548
+
549
+ def validate_node_against_schema(node, schema)
550
+ node_type = schema.node_type(node.type)
551
+ return unless node_type
552
+
553
+ # Check required content
554
+ return unless node_type.required_content.any?
555
+
556
+ missing = node_type.required_content - (node.content.map(&:type) & node_type.required_content)
557
+ raise ValidationError, "Missing required content: #{missing.join(', ')}" unless missing.empty?
558
+ end
559
+
560
+ def apply_parse_rules(document, options)
561
+ return document unless options[:keep_empty]
562
+
563
+ document
564
+ end
565
+
566
+ def apply_node_options(node, parent_node, saved_styles, top_node)
567
+ return node unless node.respond_to?(:marks=)
568
+
569
+ if top_node && parent_node
570
+ # Apply parent context marks
571
+ node.marks = saved_styles.dup
572
+ end
573
+ node
574
+ end
502
575
  end
576
+
577
+ class ValidationError < StandardError; end
503
578
  end
504
579
  end
505
580
  end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prosereflect
4
+ module Input
5
+ autoload :Html, "#{__dir__}/input/html"
6
+ end
7
+ end
@@ -1,22 +1,19 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'node'
4
- require_relative 'paragraph'
5
- require_relative 'text'
6
- require_relative 'hard_break'
7
-
8
3
  module Prosereflect
9
4
  # ListItem class represents a list item in ProseMirror.
10
5
  class ListItem < Node
11
- PM_TYPE = 'list_item'
6
+ PM_TYPE = "list_item"
12
7
 
13
- attribute :type, :string, default: -> { send('const_get', 'PM_TYPE') }
8
+ attribute :type, :string, default: -> {
9
+ self.class.send(:const_get, "PM_TYPE")
10
+ }
14
11
  attribute :attrs, :hash
15
12
 
16
13
  key_value do
17
- map 'type', to: :type, render_default: true
18
- map 'attrs', to: :attrs
19
- map 'content', to: :content
14
+ map "type", to: :type, render_default: true
15
+ map "attrs", to: :attrs
16
+ map "content", to: :content
20
17
  end
21
18
 
22
19
  def initialize(attributes = {})
@@ -57,9 +54,11 @@ module Prosereflect
57
54
 
58
55
  # Get plain text content from all nodes
59
56
  def text_content
60
- return '' unless content
57
+ return "" unless content
61
58
 
62
- content.map { |node| node.respond_to?(:text_content) ? node.text_content : '' }.join("\n").strip
59
+ content.map do |node|
60
+ node.respond_to?(:text_content) ? node.text_content : ""
61
+ end.join("\n").strip
63
62
  end
64
63
  end
65
64
  end
@@ -1,47 +1,45 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'lutaml/model'
4
-
5
3
  module Prosereflect
6
4
  module Mark
7
5
  class Base < Lutaml::Model::Serializable
8
- PM_TYPE = 'mark'
6
+ PM_TYPE = "mark"
9
7
 
10
8
  attribute :type, :string, default: lambda {
11
9
  begin
12
10
  self.class.const_get(:PM_TYPE)
13
11
  rescue StandardError
14
- 'mark'
12
+ "mark"
15
13
  end
16
14
  }
17
15
  attribute :attrs, :hash
18
16
 
19
17
  key_value do
20
- map 'type', to: :type, render_default: true
21
- map 'attrs', to: :attrs
18
+ map "type", to: :type, render_default: true
19
+ map "attrs", to: :attrs
22
20
  end
23
21
 
24
22
  def self.create(attrs = nil)
25
23
  new(type: const_get(:PM_TYPE), attrs: attrs)
26
24
  rescue NameError
27
- new(type: 'mark', attrs: attrs)
25
+ new(type: "mark", attrs: attrs)
28
26
  end
29
27
 
30
28
  # Convert to hash for serialization
31
29
  def to_h
32
- result = { 'type' => type }
33
- result['attrs'] = attrs if attrs && !attrs.empty?
30
+ result = { "type" => type }
31
+ result["attrs"] = attrs if attrs && !attrs.empty?
34
32
  result
35
33
  end
36
34
 
37
35
  # Override initialize to ensure the type is set correctly
38
36
  def initialize(options = {})
39
- super(options)
37
+ super
40
38
  # Only set the type to PM_TYPE if no type was provided in options
41
39
  self.type = begin
42
40
  options[:type] || self.class.const_get(:PM_TYPE)
43
41
  rescue StandardError
44
- 'mark'
42
+ "mark"
45
43
  end
46
44
  end
47
45
  end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'base'
4
-
5
3
  # {
6
4
  # type: "bold"
7
5
  # }
@@ -9,7 +7,7 @@ require_relative 'base'
9
7
  module Prosereflect
10
8
  module Mark
11
9
  class Bold < Base
12
- PM_TYPE = 'bold'
10
+ PM_TYPE = "bold"
13
11
  end
14
12
  end
15
13
  end
@@ -1,14 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'base'
4
-
5
3
  # {
6
4
  # type: "code"
7
5
  # }
8
6
  module Prosereflect
9
7
  module Mark
10
8
  class Code < Base
11
- PM_TYPE = 'code'
9
+ PM_TYPE = "code"
12
10
  end
13
11
  end
14
12
  end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'base'
4
-
5
3
  # {
6
4
  # type: "italic"
7
5
  # }
@@ -9,7 +7,7 @@ require_relative 'base'
9
7
  module Prosereflect
10
8
  module Mark
11
9
  class Italic < Base
12
- PM_TYPE = 'italic'
10
+ PM_TYPE = "italic"
13
11
  end
14
12
  end
15
13
  end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'base'
4
-
5
3
  # {
6
4
  # type: "link",
7
5
  # attrs: {
@@ -12,7 +10,7 @@ require_relative 'base'
12
10
  module Prosereflect
13
11
  module Mark
14
12
  class Link < Base
15
- PM_TYPE = 'link'
13
+ PM_TYPE = "link"
16
14
  end
17
15
  end
18
16
  end