prosereflect 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/docs.yml +63 -0
  3. data/.github/workflows/links.yml +97 -0
  4. data/.github/workflows/rake.yml +4 -0
  5. data/.github/workflows/release.yml +5 -0
  6. data/.gitignore +4 -0
  7. data/.rubocop.yml +19 -1
  8. data/.rubocop_todo.yml +119 -183
  9. data/CLAUDE.md +78 -0
  10. data/Gemfile +8 -4
  11. data/README.adoc +2 -0
  12. data/Rakefile +3 -3
  13. data/docs/Gemfile +10 -0
  14. data/docs/INDEX.adoc +45 -0
  15. data/docs/_advanced/index.adoc +15 -0
  16. data/docs/_advanced/schema.adoc +112 -0
  17. data/docs/_advanced/step-map.adoc +66 -0
  18. data/docs/_advanced/steps.adoc +88 -0
  19. data/docs/_advanced/test-builder.adoc +61 -0
  20. data/docs/_advanced/transform.adoc +92 -0
  21. data/docs/_config.yml +174 -0
  22. data/docs/_features/html-input.adoc +69 -0
  23. data/docs/_features/html-output.adoc +45 -0
  24. data/docs/_features/index.adoc +15 -0
  25. data/docs/_features/marks.adoc +86 -0
  26. data/docs/_features/node-types.adoc +124 -0
  27. data/docs/_features/user-mentions.adoc +47 -0
  28. data/docs/_guides/custom-nodes.adoc +107 -0
  29. data/docs/_guides/index.adoc +13 -0
  30. data/docs/_guides/round-trip-html.adoc +91 -0
  31. data/docs/_guides/serialization.adoc +109 -0
  32. data/docs/_pages/index.adoc +67 -0
  33. data/docs/_reference/document-api.adoc +49 -0
  34. data/docs/_reference/index.adoc +14 -0
  35. data/docs/_reference/node-api.adoc +79 -0
  36. data/docs/_reference/schema-api.adoc +95 -0
  37. data/docs/_reference/transform-api.adoc +77 -0
  38. data/docs/_understanding/document-model.adoc +65 -0
  39. data/docs/_understanding/fragment.adoc +52 -0
  40. data/docs/_understanding/index.adoc +14 -0
  41. data/docs/_understanding/resolved-position.adoc +53 -0
  42. data/docs/_understanding/slice.adoc +54 -0
  43. data/docs/lychee.toml +63 -0
  44. data/lib/prosereflect/attribute/base.rb +4 -6
  45. data/lib/prosereflect/attribute/bold.rb +2 -4
  46. data/lib/prosereflect/attribute/href.rb +1 -3
  47. data/lib/prosereflect/attribute/id.rb +7 -7
  48. data/lib/prosereflect/attribute.rb +4 -7
  49. data/lib/prosereflect/blockquote.rb +19 -11
  50. data/lib/prosereflect/bullet_list.rb +36 -29
  51. data/lib/prosereflect/code_block.rb +23 -27
  52. data/lib/prosereflect/code_block_wrapper.rb +12 -13
  53. data/lib/prosereflect/document.rb +14 -22
  54. data/lib/prosereflect/fragment.rb +249 -0
  55. data/lib/prosereflect/hard_break.rb +6 -6
  56. data/lib/prosereflect/heading.rb +14 -15
  57. data/lib/prosereflect/horizontal_rule.rb +23 -14
  58. data/lib/prosereflect/image.rb +32 -23
  59. data/lib/prosereflect/input/html.rb +179 -104
  60. data/lib/prosereflect/input.rb +7 -0
  61. data/lib/prosereflect/list_item.rb +11 -12
  62. data/lib/prosereflect/mark/base.rb +9 -11
  63. data/lib/prosereflect/mark/bold.rb +1 -3
  64. data/lib/prosereflect/mark/code.rb +1 -3
  65. data/lib/prosereflect/mark/italic.rb +1 -3
  66. data/lib/prosereflect/mark/link.rb +1 -3
  67. data/lib/prosereflect/mark/strike.rb +1 -3
  68. data/lib/prosereflect/mark/subscript.rb +1 -3
  69. data/lib/prosereflect/mark/superscript.rb +1 -3
  70. data/lib/prosereflect/mark/underline.rb +1 -3
  71. data/lib/prosereflect/mark.rb +9 -5
  72. data/lib/prosereflect/node.rb +171 -33
  73. data/lib/prosereflect/ordered_list.rb +17 -14
  74. data/lib/prosereflect/output/html.rb +279 -50
  75. data/lib/prosereflect/output.rb +7 -0
  76. data/lib/prosereflect/paragraph.rb +11 -13
  77. data/lib/prosereflect/parser.rb +56 -66
  78. data/lib/prosereflect/resolved_pos.rb +256 -0
  79. data/lib/prosereflect/schema/attribute.rb +57 -0
  80. data/lib/prosereflect/schema/content_match.rb +656 -0
  81. data/lib/prosereflect/schema/fragment.rb +166 -0
  82. data/lib/prosereflect/schema/mark.rb +121 -0
  83. data/lib/prosereflect/schema/mark_type.rb +130 -0
  84. data/lib/prosereflect/schema/node.rb +236 -0
  85. data/lib/prosereflect/schema/node_type.rb +274 -0
  86. data/lib/prosereflect/schema/schema_main.rb +190 -0
  87. data/lib/prosereflect/schema/spec.rb +92 -0
  88. data/lib/prosereflect/schema.rb +39 -0
  89. data/lib/prosereflect/table.rb +12 -13
  90. data/lib/prosereflect/table_cell.rb +13 -13
  91. data/lib/prosereflect/table_header.rb +17 -17
  92. data/lib/prosereflect/table_row.rb +12 -12
  93. data/lib/prosereflect/text.rb +35 -11
  94. data/lib/prosereflect/transform/attr_step.rb +157 -0
  95. data/lib/prosereflect/transform/insert_step.rb +115 -0
  96. data/lib/prosereflect/transform/mapping.rb +82 -0
  97. data/lib/prosereflect/transform/mark_step.rb +269 -0
  98. data/lib/prosereflect/transform/replace_around_step.rb +181 -0
  99. data/lib/prosereflect/transform/replace_step.rb +157 -0
  100. data/lib/prosereflect/transform/slice.rb +91 -0
  101. data/lib/prosereflect/transform/step.rb +89 -0
  102. data/lib/prosereflect/transform/step_map.rb +126 -0
  103. data/lib/prosereflect/transform/structure.rb +120 -0
  104. data/lib/prosereflect/transform/transform.rb +341 -0
  105. data/lib/prosereflect/transform.rb +26 -0
  106. data/lib/prosereflect/user.rb +15 -15
  107. data/lib/prosereflect/version.rb +1 -1
  108. data/lib/prosereflect.rb +30 -17
  109. data/prosereflect.gemspec +17 -16
  110. data/spec/fixtures/documents/formatted_text.yaml +14 -0
  111. data/spec/fixtures/documents/heading_paragraph.yaml +16 -0
  112. data/spec/fixtures/documents/lists_doc.yaml +32 -0
  113. data/spec/fixtures/documents/mixed_content.yaml +40 -0
  114. data/spec/fixtures/documents/nested_doc.yaml +20 -0
  115. data/spec/fixtures/documents/simple_doc.yaml +6 -0
  116. data/spec/fixtures/documents/table_doc.yaml +32 -0
  117. data/spec/fixtures/documents/transform_test.yaml +14 -0
  118. data/spec/fixtures/schema/custom_schema.rb +37 -0
  119. data/spec/fixtures/schema/test_schema.rb +46 -0
  120. data/spec/fixtures/test_builder/helpers.rb +212 -0
  121. data/spec/prosereflect/document_spec.rb +332 -330
  122. data/spec/prosereflect/fragment_spec.rb +273 -0
  123. data/spec/prosereflect/hard_break_spec.rb +125 -125
  124. data/spec/prosereflect/input/html_spec.rb +718 -522
  125. data/spec/prosereflect/node_spec.rb +311 -182
  126. data/spec/prosereflect/output/html_spec.rb +105 -105
  127. data/spec/prosereflect/output/whitespace_spec.rb +248 -0
  128. data/spec/prosereflect/paragraph_spec.rb +275 -274
  129. data/spec/prosereflect/parser/round_trip_spec.rb +472 -0
  130. data/spec/prosereflect/parser_spec.rb +185 -180
  131. data/spec/prosereflect/resolved_pos_spec.rb +74 -0
  132. data/spec/prosereflect/schema/conftest.rb +68 -0
  133. data/spec/prosereflect/schema/content_match_spec.rb +237 -0
  134. data/spec/prosereflect/schema/mark_spec.rb +274 -0
  135. data/spec/prosereflect/schema/mark_type_spec.rb +86 -0
  136. data/spec/prosereflect/schema/node_type_spec.rb +142 -0
  137. data/spec/prosereflect/schema/schema_spec.rb +194 -0
  138. data/spec/prosereflect/table_cell_spec.rb +183 -183
  139. data/spec/prosereflect/table_row_spec.rb +149 -149
  140. data/spec/prosereflect/table_spec.rb +320 -318
  141. data/spec/prosereflect/test_builder/marks_spec.rb +127 -0
  142. data/spec/prosereflect/text_spec.rb +133 -132
  143. data/spec/prosereflect/transform/equivalence_spec.rb +487 -0
  144. data/spec/prosereflect/transform/mapping_spec.rb +226 -0
  145. data/spec/prosereflect/transform/replace_spec.rb +832 -0
  146. data/spec/prosereflect/transform/replace_step_spec.rb +157 -0
  147. data/spec/prosereflect/transform/slice_spec.rb +48 -0
  148. data/spec/prosereflect/transform/step_map_spec.rb +70 -0
  149. data/spec/prosereflect/transform/step_spec.rb +211 -0
  150. data/spec/prosereflect/transform/structure_spec.rb +98 -0
  151. data/spec/prosereflect/transform/transform_spec.rb +238 -0
  152. data/spec/prosereflect/user_spec.rb +31 -28
  153. data/spec/prosereflect_spec.rb +28 -26
  154. data/spec/spec_helper.rb +7 -6
  155. data/spec/support/matchers.rb +6 -6
  156. data/spec/support/shared_examples.rb +49 -49
  157. metadata +96 -5
  158. data/spec/prosereflect/version_spec.rb +0 -11
@@ -1,303 +1,303 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'spec_helper'
3
+ require "spec_helper"
4
4
 
5
5
  RSpec.describe Prosereflect::Input::Html do
6
- describe '.parse' do
7
- it 'parses simple HTML into a document' do
8
- html = '<p>This is a test paragraph.</p>'
6
+ describe ".parse" do
7
+ it "parses simple HTML into a document" do
8
+ html = "<p>This is a test paragraph.</p>"
9
9
  document = described_class.parse(html)
10
10
 
11
11
  expected = {
12
- 'type' => 'doc',
13
- 'content' => [{
14
- 'type' => 'paragraph',
15
- 'content' => [{
16
- 'type' => 'text',
17
- 'text' => 'This is a test paragraph.'
18
- }]
19
- }]
12
+ "type" => "doc",
13
+ "content" => [{
14
+ "type" => "paragraph",
15
+ "content" => [{
16
+ "type" => "text",
17
+ "text" => "This is a test paragraph.",
18
+ }],
19
+ }],
20
20
  }
21
21
 
22
22
  expect(document.to_h).to eq(expected)
23
23
  end
24
24
 
25
- it 'renders basic styled text correctly' do
26
- html = '<p>This is <strong>bold</strong> and <em>italic</em> text.</p>'
25
+ it "renders basic styled text correctly" do
26
+ html = "<p>This is <strong>bold</strong> and <em>italic</em> text.</p>"
27
27
  document = described_class.parse(html)
28
28
 
29
29
  expected = {
30
- 'type' => 'doc',
31
- 'content' => [{
32
- 'type' => 'paragraph',
33
- 'content' => [{
34
- 'type' => 'text',
35
- 'text' => 'This is '
36
- }, {
37
- 'type' => 'text',
38
- 'text' => 'bold',
39
- 'marks' => [{
40
- 'type' => 'bold'
41
- }]
42
- }, {
43
- 'type' => 'text',
44
- 'text' => ' and '
45
- }, {
46
- 'type' => 'text',
47
- 'text' => 'italic',
48
- 'marks' => [{
49
- 'type' => 'italic'
50
- }]
51
- }, {
52
- 'type' => 'text',
53
- 'text' => ' text.'
54
- }]
55
- }]
30
+ "type" => "doc",
31
+ "content" => [{
32
+ "type" => "paragraph",
33
+ "content" => [{
34
+ "type" => "text",
35
+ "text" => "This is ",
36
+ }, {
37
+ "type" => "text",
38
+ "text" => "bold",
39
+ "marks" => [{
40
+ "type" => "bold",
41
+ }],
42
+ }, {
43
+ "type" => "text",
44
+ "text" => " and ",
45
+ }, {
46
+ "type" => "text",
47
+ "text" => "italic",
48
+ "marks" => [{
49
+ "type" => "italic",
50
+ }],
51
+ }, {
52
+ "type" => "text",
53
+ "text" => " text.",
54
+ }],
55
+ }],
56
56
  }
57
57
 
58
58
  expect(document.to_h).to eq(expected)
59
59
  end
60
60
 
61
- it 'parses strike text correctly' do
62
- html = '<p>This is <strike>struck through</strike> text and <s>this too</s> and <del>deleted</del>.</p>'
61
+ it "parses strike text correctly" do
62
+ html = "<p>This is <strike>struck through</strike> text and <s>this too</s> and <del>deleted</del>.</p>"
63
63
  document = described_class.parse(html)
64
64
 
65
65
  expected = {
66
- 'type' => 'doc',
67
- 'content' => [{
68
- 'type' => 'paragraph',
69
- 'content' => [{
70
- 'type' => 'text',
71
- 'text' => 'This is '
72
- }, {
73
- 'type' => 'text',
74
- 'text' => 'struck through',
75
- 'marks' => [{
76
- 'type' => 'strike'
77
- }]
78
- }, {
79
- 'type' => 'text',
80
- 'text' => ' text and '
81
- }, {
82
- 'type' => 'text',
83
- 'text' => 'this too',
84
- 'marks' => [{
85
- 'type' => 'strike'
86
- }]
87
- }, {
88
- 'type' => 'text',
89
- 'text' => ' and '
90
- }, {
91
- 'type' => 'text',
92
- 'text' => 'deleted',
93
- 'marks' => [{
94
- 'type' => 'strike'
95
- }]
96
- }, {
97
- 'type' => 'text',
98
- 'text' => '.'
99
- }]
100
- }]
66
+ "type" => "doc",
67
+ "content" => [{
68
+ "type" => "paragraph",
69
+ "content" => [{
70
+ "type" => "text",
71
+ "text" => "This is ",
72
+ }, {
73
+ "type" => "text",
74
+ "text" => "struck through",
75
+ "marks" => [{
76
+ "type" => "strike",
77
+ }],
78
+ }, {
79
+ "type" => "text",
80
+ "text" => " text and ",
81
+ }, {
82
+ "type" => "text",
83
+ "text" => "this too",
84
+ "marks" => [{
85
+ "type" => "strike",
86
+ }],
87
+ }, {
88
+ "type" => "text",
89
+ "text" => " and ",
90
+ }, {
91
+ "type" => "text",
92
+ "text" => "deleted",
93
+ "marks" => [{
94
+ "type" => "strike",
95
+ }],
96
+ }, {
97
+ "type" => "text",
98
+ "text" => ".",
99
+ }],
100
+ }],
101
101
  }
102
102
 
103
103
  expect(document.to_h).to eq(expected)
104
104
  end
105
105
 
106
- it 'parses subscript text correctly' do
107
- html = '<p>H<sub>2</sub>O and E = mc<sub>2</sub></p>'
106
+ it "parses subscript text correctly" do
107
+ html = "<p>H<sub>2</sub>O and E = mc<sub>2</sub></p>"
108
108
  document = described_class.parse(html)
109
109
 
110
110
  expected = {
111
- 'type' => 'doc',
112
- 'content' => [{
113
- 'type' => 'paragraph',
114
- 'content' => [{
115
- 'type' => 'text',
116
- 'text' => 'H'
117
- }, {
118
- 'type' => 'text',
119
- 'text' => '2',
120
- 'marks' => [{
121
- 'type' => 'subscript'
122
- }]
123
- }, {
124
- 'type' => 'text',
125
- 'text' => 'O and E = mc'
126
- }, {
127
- 'type' => 'text',
128
- 'text' => '2',
129
- 'marks' => [{
130
- 'type' => 'subscript'
131
- }]
132
- }]
133
- }]
111
+ "type" => "doc",
112
+ "content" => [{
113
+ "type" => "paragraph",
114
+ "content" => [{
115
+ "type" => "text",
116
+ "text" => "H",
117
+ }, {
118
+ "type" => "text",
119
+ "text" => "2",
120
+ "marks" => [{
121
+ "type" => "subscript",
122
+ }],
123
+ }, {
124
+ "type" => "text",
125
+ "text" => "O and E = mc",
126
+ }, {
127
+ "type" => "text",
128
+ "text" => "2",
129
+ "marks" => [{
130
+ "type" => "subscript",
131
+ }],
132
+ }],
133
+ }],
134
134
  }
135
135
 
136
136
  expect(document.to_h).to eq(expected)
137
137
  end
138
138
 
139
- it 'parses superscript text correctly' do
140
- html = '<p>x<sup>2</sup> + y<sup>2</sup> = z<sup>2</sup></p>'
139
+ it "parses superscript text correctly" do
140
+ html = "<p>x<sup>2</sup> + y<sup>2</sup> = z<sup>2</sup></p>"
141
141
  document = described_class.parse(html)
142
142
 
143
143
  expected = {
144
- 'type' => 'doc',
145
- 'content' => [{
146
- 'type' => 'paragraph',
147
- 'content' => [{
148
- 'type' => 'text',
149
- 'text' => 'x'
150
- }, {
151
- 'type' => 'text',
152
- 'text' => '2',
153
- 'marks' => [{
154
- 'type' => 'superscript'
155
- }]
156
- }, {
157
- 'type' => 'text',
158
- 'text' => ' + y'
159
- }, {
160
- 'type' => 'text',
161
- 'text' => '2',
162
- 'marks' => [{
163
- 'type' => 'superscript'
164
- }]
165
- }, {
166
- 'type' => 'text',
167
- 'text' => ' = z'
168
- }, {
169
- 'type' => 'text',
170
- 'text' => '2',
171
- 'marks' => [{
172
- 'type' => 'superscript'
173
- }]
174
- }]
175
- }]
144
+ "type" => "doc",
145
+ "content" => [{
146
+ "type" => "paragraph",
147
+ "content" => [{
148
+ "type" => "text",
149
+ "text" => "x",
150
+ }, {
151
+ "type" => "text",
152
+ "text" => "2",
153
+ "marks" => [{
154
+ "type" => "superscript",
155
+ }],
156
+ }, {
157
+ "type" => "text",
158
+ "text" => " + y",
159
+ }, {
160
+ "type" => "text",
161
+ "text" => "2",
162
+ "marks" => [{
163
+ "type" => "superscript",
164
+ }],
165
+ }, {
166
+ "type" => "text",
167
+ "text" => " = z",
168
+ }, {
169
+ "type" => "text",
170
+ "text" => "2",
171
+ "marks" => [{
172
+ "type" => "superscript",
173
+ }],
174
+ }],
175
+ }],
176
176
  }
177
177
 
178
178
  expect(document.to_h).to eq(expected)
179
179
  end
180
180
 
181
- it 'parses underlined text correctly' do
182
- html = '<p>This is <u>underlined</u> text.</p>'
181
+ it "parses underlined text correctly" do
182
+ html = "<p>This is <u>underlined</u> text.</p>"
183
183
  document = described_class.parse(html)
184
184
 
185
185
  expected = {
186
- 'type' => 'doc',
187
- 'content' => [{
188
- 'type' => 'paragraph',
189
- 'content' => [{
190
- 'type' => 'text',
191
- 'text' => 'This is '
192
- }, {
193
- 'type' => 'text',
194
- 'text' => 'underlined',
195
- 'marks' => [{
196
- 'type' => 'underline'
197
- }]
198
- }, {
199
- 'type' => 'text',
200
- 'text' => ' text.'
201
- }]
202
- }]
186
+ "type" => "doc",
187
+ "content" => [{
188
+ "type" => "paragraph",
189
+ "content" => [{
190
+ "type" => "text",
191
+ "text" => "This is ",
192
+ }, {
193
+ "type" => "text",
194
+ "text" => "underlined",
195
+ "marks" => [{
196
+ "type" => "underline",
197
+ }],
198
+ }, {
199
+ "type" => "text",
200
+ "text" => " text.",
201
+ }],
202
+ }],
203
203
  }
204
204
 
205
205
  expect(document.to_h).to eq(expected)
206
206
  end
207
207
 
208
- it 'handles mixed text styles correctly' do
209
- html = '<p><strong><u>Bold and underlined</u></strong> and <em><strike>italic struck</strike></em></p>'
208
+ it "handles mixed text styles correctly" do
209
+ html = "<p><strong><u>Bold and underlined</u></strong> and <em><strike>italic struck</strike></em></p>"
210
210
  document = described_class.parse(html)
211
211
 
212
212
  expected = {
213
- 'type' => 'doc',
214
- 'content' => [{
215
- 'type' => 'paragraph',
216
- 'content' => [{
217
- 'type' => 'text',
218
- 'text' => 'Bold and underlined',
219
- 'marks' => [{
220
- 'type' => 'underline'
213
+ "type" => "doc",
214
+ "content" => [{
215
+ "type" => "paragraph",
216
+ "content" => [{
217
+ "type" => "text",
218
+ "text" => "Bold and underlined",
219
+ "marks" => [{
220
+ "type" => "underline",
221
221
  }, {
222
- 'type' => 'bold'
223
- }]
222
+ "type" => "bold",
223
+ }],
224
224
  }, {
225
- 'type' => 'text',
226
- 'text' => ' and '
225
+ "type" => "text",
226
+ "text" => " and ",
227
227
  }, {
228
- 'type' => 'text',
229
- 'text' => 'italic struck',
230
- 'marks' => [{
231
- 'type' => 'strike'
228
+ "type" => "text",
229
+ "text" => "italic struck",
230
+ "marks" => [{
231
+ "type" => "strike",
232
232
  }, {
233
- 'type' => 'italic'
234
- }]
235
- }]
236
- }]
233
+ "type" => "italic",
234
+ }],
235
+ }],
236
+ }],
237
237
  }
238
238
 
239
239
  expect(document.to_h).to eq(expected)
240
240
  end
241
241
 
242
- it 'handles complex mixed text styles correctly' do
243
- html = '<p>x<sup>2</sup> + <u>y<sub>1</sub></u> = <strike>z<sup>n</sup></strike></p>'
242
+ it "handles complex mixed text styles correctly" do
243
+ html = "<p>x<sup>2</sup> + <u>y<sub>1</sub></u> = <strike>z<sup>n</sup></strike></p>"
244
244
  document = described_class.parse(html)
245
245
 
246
246
  expected = {
247
- 'type' => 'doc',
248
- 'content' => [{
249
- 'type' => 'paragraph',
250
- 'content' => [{
251
- 'type' => 'text',
252
- 'text' => 'x'
253
- }, {
254
- 'type' => 'text',
255
- 'text' => '2',
256
- 'marks' => [{
257
- 'type' => 'superscript'
258
- }]
259
- }, {
260
- 'type' => 'text',
261
- 'text' => ' + '
262
- }, {
263
- 'type' => 'text',
264
- 'text' => 'y',
265
- 'marks' => [{
266
- 'type' => 'underline'
267
- }]
268
- }, {
269
- 'type' => 'text',
270
- 'text' => '1',
271
- 'marks' => [{
272
- 'type' => 'subscript'
247
+ "type" => "doc",
248
+ "content" => [{
249
+ "type" => "paragraph",
250
+ "content" => [{
251
+ "type" => "text",
252
+ "text" => "x",
253
+ }, {
254
+ "type" => "text",
255
+ "text" => "2",
256
+ "marks" => [{
257
+ "type" => "superscript",
258
+ }],
259
+ }, {
260
+ "type" => "text",
261
+ "text" => " + ",
262
+ }, {
263
+ "type" => "text",
264
+ "text" => "y",
265
+ "marks" => [{
266
+ "type" => "underline",
267
+ }],
268
+ }, {
269
+ "type" => "text",
270
+ "text" => "1",
271
+ "marks" => [{
272
+ "type" => "subscript",
273
273
  }, {
274
- 'type' => 'underline'
275
- }]
276
- }, {
277
- 'type' => 'text',
278
- 'text' => ' = '
279
- }, {
280
- 'type' => 'text',
281
- 'text' => 'z',
282
- 'marks' => [{
283
- 'type' => 'strike'
284
- }]
285
- }, {
286
- 'type' => 'text',
287
- 'text' => 'n',
288
- 'marks' => [{
289
- 'type' => 'superscript'
274
+ "type" => "underline",
275
+ }],
276
+ }, {
277
+ "type" => "text",
278
+ "text" => " = ",
279
+ }, {
280
+ "type" => "text",
281
+ "text" => "z",
282
+ "marks" => [{
283
+ "type" => "strike",
284
+ }],
285
+ }, {
286
+ "type" => "text",
287
+ "text" => "n",
288
+ "marks" => [{
289
+ "type" => "superscript",
290
290
  }, {
291
- 'type' => 'strike'
292
- }]
293
- }]
294
- }]
291
+ "type" => "strike",
292
+ }],
293
+ }],
294
+ }],
295
295
  }
296
296
 
297
297
  expect(document.to_h).to eq(expected)
298
298
  end
299
299
 
300
- it 'parses tables correctly' do
300
+ it "parses tables correctly" do
301
301
  html = <<~HTML
302
302
  <table>
303
303
  <tr>
@@ -314,109 +314,109 @@ RSpec.describe Prosereflect::Input::Html do
314
314
  document = described_class.parse(html)
315
315
 
316
316
  expected = {
317
- 'type' => 'doc',
318
- 'content' => [{
319
- 'type' => 'table',
320
- 'content' => [{
321
- 'type' => 'table_row',
322
- 'content' => [{
323
- 'type' => 'table_cell',
324
- 'content' => [{
325
- 'type' => 'paragraph',
326
- 'content' => [{
327
- 'type' => 'text',
328
- 'text' => 'Row 1, Cell 1'
329
- }]
330
- }]
317
+ "type" => "doc",
318
+ "content" => [{
319
+ "type" => "table",
320
+ "content" => [{
321
+ "type" => "table_row",
322
+ "content" => [{
323
+ "type" => "table_cell",
324
+ "content" => [{
325
+ "type" => "paragraph",
326
+ "content" => [{
327
+ "type" => "text",
328
+ "text" => "Row 1, Cell 1",
329
+ }],
330
+ }],
331
331
  }, {
332
- 'type' => 'table_cell',
333
- 'content' => [{
334
- 'type' => 'paragraph',
335
- 'content' => [{
336
- 'type' => 'text',
337
- 'text' => 'Row 1, Cell 2'
338
- }]
339
- }]
340
- }]
341
- }, {
342
- 'type' => 'table_row',
343
- 'content' => [{
344
- 'type' => 'table_cell',
345
- 'content' => [{
346
- 'type' => 'paragraph',
347
- 'content' => [{
348
- 'type' => 'text',
349
- 'text' => 'Row 2, Cell 1'
350
- }]
351
- }]
332
+ "type" => "table_cell",
333
+ "content" => [{
334
+ "type" => "paragraph",
335
+ "content" => [{
336
+ "type" => "text",
337
+ "text" => "Row 1, Cell 2",
338
+ }],
339
+ }],
340
+ }],
341
+ }, {
342
+ "type" => "table_row",
343
+ "content" => [{
344
+ "type" => "table_cell",
345
+ "content" => [{
346
+ "type" => "paragraph",
347
+ "content" => [{
348
+ "type" => "text",
349
+ "text" => "Row 2, Cell 1",
350
+ }],
351
+ }],
352
352
  }, {
353
- 'type' => 'table_cell',
354
- 'content' => [{
355
- 'type' => 'paragraph',
356
- 'content' => [{
357
- 'type' => 'text',
358
- 'text' => 'Row 2, Cell 2'
359
- }]
360
- }]
361
- }]
362
- }]
363
- }]
353
+ "type" => "table_cell",
354
+ "content" => [{
355
+ "type" => "paragraph",
356
+ "content" => [{
357
+ "type" => "text",
358
+ "text" => "Row 2, Cell 2",
359
+ }],
360
+ }],
361
+ }],
362
+ }],
363
+ }],
364
364
  }
365
365
 
366
366
  expect(document.to_h).to eq(expected)
367
367
  end
368
368
 
369
- it 'parses links correctly' do
369
+ it "parses links correctly" do
370
370
  html = '<p>This is a <a href="https://example.com">link</a></p>'
371
371
  document = described_class.parse(html)
372
372
 
373
373
  expected = {
374
- 'type' => 'doc',
375
- 'content' => [{
376
- 'type' => 'paragraph',
377
- 'content' => [{
378
- 'type' => 'text',
379
- 'text' => 'This is a '
380
- }, {
381
- 'type' => 'text',
382
- 'text' => 'link',
383
- 'marks' => [{
384
- 'type' => 'link',
385
- 'attrs' => {
386
- 'href' => 'https://example.com'
387
- }
388
- }]
389
- }]
390
- }]
374
+ "type" => "doc",
375
+ "content" => [{
376
+ "type" => "paragraph",
377
+ "content" => [{
378
+ "type" => "text",
379
+ "text" => "This is a ",
380
+ }, {
381
+ "type" => "text",
382
+ "text" => "link",
383
+ "marks" => [{
384
+ "type" => "link",
385
+ "attrs" => {
386
+ "href" => "https://example.com",
387
+ },
388
+ }],
389
+ }],
390
+ }],
391
391
  }
392
392
 
393
393
  expect(document.to_h).to eq(expected)
394
394
  end
395
395
 
396
- it 'handles line breaks correctly' do
397
- html = '<p>Line 1<br>Line 2</p>'
396
+ it "handles line breaks correctly" do
397
+ html = "<p>Line 1<br>Line 2</p>"
398
398
  document = described_class.parse(html)
399
399
 
400
400
  expected = {
401
- 'type' => 'doc',
402
- 'content' => [{
403
- 'type' => 'paragraph',
404
- 'content' => [{
405
- 'type' => 'text',
406
- 'text' => 'Line 1'
407
- }, {
408
- 'type' => 'hard_break'
409
- }, {
410
- 'type' => 'text',
411
- 'text' => 'Line 2'
412
- }]
413
- }]
401
+ "type" => "doc",
402
+ "content" => [{
403
+ "type" => "paragraph",
404
+ "content" => [{
405
+ "type" => "text",
406
+ "text" => "Line 1",
407
+ }, {
408
+ "type" => "hard_break",
409
+ }, {
410
+ "type" => "text",
411
+ "text" => "Line 2",
412
+ }],
413
+ }],
414
414
  }
415
415
 
416
416
  expect(document.to_h).to eq(expected)
417
417
  end
418
418
 
419
- it 'parses ordered lists with start attribute correctly' do
419
+ it "parses ordered lists with start attribute correctly" do
420
420
  html = <<~HTML
421
421
  <ol start="3">
422
422
  <li>Third item</li>
@@ -427,38 +427,38 @@ RSpec.describe Prosereflect::Input::Html do
427
427
  document = described_class.parse(html)
428
428
 
429
429
  expected = {
430
- 'type' => 'doc',
431
- 'content' => [{
432
- 'type' => 'ordered_list',
433
- 'attrs' => {
434
- 'start' => 3
430
+ "type" => "doc",
431
+ "content" => [{
432
+ "type" => "ordered_list",
433
+ "attrs" => {
434
+ "start" => 3,
435
435
  },
436
- 'content' => [{
437
- 'type' => 'list_item',
438
- 'content' => [{
439
- 'type' => 'paragraph',
440
- 'content' => [{
441
- 'type' => 'text',
442
- 'text' => 'Third item'
443
- }]
444
- }]
445
- }, {
446
- 'type' => 'list_item',
447
- 'content' => [{
448
- 'type' => 'paragraph',
449
- 'content' => [{
450
- 'type' => 'text',
451
- 'text' => 'Fourth item'
452
- }]
453
- }]
454
- }]
455
- }]
436
+ "content" => [{
437
+ "type" => "list_item",
438
+ "content" => [{
439
+ "type" => "paragraph",
440
+ "content" => [{
441
+ "type" => "text",
442
+ "text" => "Third item",
443
+ }],
444
+ }],
445
+ }, {
446
+ "type" => "list_item",
447
+ "content" => [{
448
+ "type" => "paragraph",
449
+ "content" => [{
450
+ "type" => "text",
451
+ "text" => "Fourth item",
452
+ }],
453
+ }],
454
+ }],
455
+ }],
456
456
  }
457
457
 
458
458
  expect(document.to_h).to eq(expected)
459
459
  end
460
460
 
461
- it 'parses bullet lists with styles correctly' do
461
+ it "parses bullet lists with styles correctly" do
462
462
  html = <<~HTML
463
463
  <ul style="list-style-type: square">
464
464
  <li>First bullet</li>
@@ -469,79 +469,79 @@ RSpec.describe Prosereflect::Input::Html do
469
469
  document = described_class.parse(html)
470
470
 
471
471
  expected = {
472
- 'type' => 'doc',
473
- 'content' => [{
474
- 'type' => 'bullet_list',
475
- 'attrs' => {
476
- 'bullet_style' => 'square'
472
+ "type" => "doc",
473
+ "content" => [{
474
+ "type" => "bullet_list",
475
+ "attrs" => {
476
+ "bullet_style" => "square",
477
477
  },
478
- 'content' => [{
479
- 'type' => 'list_item',
480
- 'content' => [{
481
- 'type' => 'paragraph',
482
- 'content' => [{
483
- 'type' => 'text',
484
- 'text' => 'First bullet'
485
- }]
486
- }]
487
- }, {
488
- 'type' => 'list_item',
489
- 'content' => [{
490
- 'type' => 'paragraph',
491
- 'content' => [{
492
- 'type' => 'text',
493
- 'text' => 'Second bullet'
494
- }]
495
- }]
496
- }]
497
- }]
478
+ "content" => [{
479
+ "type" => "list_item",
480
+ "content" => [{
481
+ "type" => "paragraph",
482
+ "content" => [{
483
+ "type" => "text",
484
+ "text" => "First bullet",
485
+ }],
486
+ }],
487
+ }, {
488
+ "type" => "list_item",
489
+ "content" => [{
490
+ "type" => "paragraph",
491
+ "content" => [{
492
+ "type" => "text",
493
+ "text" => "Second bullet",
494
+ }],
495
+ }],
496
+ }],
497
+ }],
498
498
  }
499
499
 
500
500
  expect(document.to_h).to eq(expected)
501
501
  end
502
502
 
503
- it 'renders headings with mixed content correctly' do
503
+ it "renders headings with mixed content correctly" do
504
504
  html = <<~HTML
505
505
  <h1>Title with <strong>bold</strong> and <a href="https://example.com">link</a></h1>
506
506
  HTML
507
507
 
508
508
  expected = {
509
- 'type' => 'doc',
510
- 'content' => [{
511
- 'type' => 'heading',
512
- 'attrs' => {
513
- 'level' => 1
509
+ "type" => "doc",
510
+ "content" => [{
511
+ "type" => "heading",
512
+ "attrs" => {
513
+ "level" => 1,
514
514
  },
515
- 'content' => [{
516
- 'type' => 'text',
517
- 'text' => 'Title with '
518
- }, {
519
- 'type' => 'text',
520
- 'text' => 'bold',
521
- 'marks' => [{
522
- 'type' => 'bold'
523
- }]
524
- }, {
525
- 'type' => 'text',
526
- 'text' => ' and '
527
- }, {
528
- 'type' => 'text',
529
- 'text' => 'link',
530
- 'marks' => [{
531
- 'type' => 'link',
532
- 'attrs' => {
533
- 'href' => 'https://example.com'
534
- }
535
- }]
536
- }]
537
- }]
515
+ "content" => [{
516
+ "type" => "text",
517
+ "text" => "Title with ",
518
+ }, {
519
+ "type" => "text",
520
+ "text" => "bold",
521
+ "marks" => [{
522
+ "type" => "bold",
523
+ }],
524
+ }, {
525
+ "type" => "text",
526
+ "text" => " and ",
527
+ }, {
528
+ "type" => "text",
529
+ "text" => "link",
530
+ "marks" => [{
531
+ "type" => "link",
532
+ "attrs" => {
533
+ "href" => "https://example.com",
534
+ },
535
+ }],
536
+ }],
537
+ }],
538
538
  }
539
539
 
540
540
  document = described_class.parse(html)
541
541
  expect(document.to_h).to eq(expected)
542
542
  end
543
543
 
544
- it 'renders lists with nested content correctly' do
544
+ it "renders lists with nested content correctly" do
545
545
  html = <<~HTML
546
546
  <ul>
547
547
  <li>First item with <em>emphasis</em></li>
@@ -550,51 +550,51 @@ RSpec.describe Prosereflect::Input::Html do
550
550
  HTML
551
551
 
552
552
  expected = {
553
- 'type' => 'doc',
554
- 'content' => [{
555
- 'type' => 'bullet_list',
556
- 'attrs' => {
557
- 'bullet_style' => nil
553
+ "type" => "doc",
554
+ "content" => [{
555
+ "type" => "bullet_list",
556
+ "attrs" => {
557
+ "bullet_style" => nil,
558
558
  },
559
- 'content' => [{
560
- 'type' => 'list_item',
561
- 'content' => [{
562
- 'type' => 'paragraph',
563
- 'content' => [{
564
- 'type' => 'text',
565
- 'text' => 'First item with '
559
+ "content" => [{
560
+ "type" => "list_item",
561
+ "content" => [{
562
+ "type" => "paragraph",
563
+ "content" => [{
564
+ "type" => "text",
565
+ "text" => "First item with ",
566
566
  }, {
567
- 'type' => 'text',
568
- 'text' => 'emphasis',
569
- 'marks' => [{
570
- 'type' => 'italic'
571
- }]
572
- }]
573
- }]
574
- }, {
575
- 'type' => 'list_item',
576
- 'content' => [{
577
- 'type' => 'paragraph',
578
- 'content' => [{
579
- 'type' => 'text',
580
- 'text' => 'Second item with '
567
+ "type" => "text",
568
+ "text" => "emphasis",
569
+ "marks" => [{
570
+ "type" => "italic",
571
+ }],
572
+ }],
573
+ }],
574
+ }, {
575
+ "type" => "list_item",
576
+ "content" => [{
577
+ "type" => "paragraph",
578
+ "content" => [{
579
+ "type" => "text",
580
+ "text" => "Second item with ",
581
581
  }, {
582
- 'type' => 'text',
583
- 'text' => 'code',
584
- 'marks' => [{
585
- 'type' => 'code'
586
- }]
587
- }]
588
- }]
589
- }]
590
- }]
582
+ "type" => "text",
583
+ "text" => "code",
584
+ "marks" => [{
585
+ "type" => "code",
586
+ }],
587
+ }],
588
+ }],
589
+ }],
590
+ }],
591
591
  }
592
592
 
593
593
  document = described_class.parse(html)
594
594
  expect(document.to_h).to eq(expected)
595
595
  end
596
596
 
597
- it 'renders blockquotes with citations correctly' do
597
+ it "renders blockquotes with citations correctly" do
598
598
  html = <<~HTML
599
599
  <blockquote cite="https://example.com">
600
600
  <p>A quote with <strong>bold</strong> text</p>
@@ -602,36 +602,36 @@ RSpec.describe Prosereflect::Input::Html do
602
602
  HTML
603
603
 
604
604
  expected = {
605
- 'type' => 'doc',
606
- 'content' => [{
607
- 'type' => 'blockquote',
608
- 'attrs' => {
609
- 'citation' => 'https://example.com'
605
+ "type" => "doc",
606
+ "content" => [{
607
+ "type" => "blockquote",
608
+ "attrs" => {
609
+ "citation" => "https://example.com",
610
610
  },
611
- 'content' => [{
612
- 'type' => 'paragraph',
613
- 'content' => [{
614
- 'type' => 'text',
615
- 'text' => 'A quote with '
611
+ "content" => [{
612
+ "type" => "paragraph",
613
+ "content" => [{
614
+ "type" => "text",
615
+ "text" => "A quote with ",
616
616
  }, {
617
- 'type' => 'text',
618
- 'text' => 'bold',
619
- 'marks' => [{
620
- 'type' => 'bold'
621
- }]
617
+ "type" => "text",
618
+ "text" => "bold",
619
+ "marks" => [{
620
+ "type" => "bold",
621
+ }],
622
622
  }, {
623
- 'type' => 'text',
624
- 'text' => ' text'
625
- }]
626
- }]
627
- }]
623
+ "type" => "text",
624
+ "text" => " text",
625
+ }],
626
+ }],
627
+ }],
628
628
  }
629
629
 
630
630
  document = described_class.parse(html)
631
631
  expect(document.to_h).to eq(expected)
632
632
  end
633
633
 
634
- it 'renders code blocks with language correctly' do
634
+ it "renders code blocks with language correctly" do
635
635
  html = <<~HTML
636
636
  <pre><code class="language-ruby">def example
637
637
  puts "Hello"
@@ -639,159 +639,355 @@ RSpec.describe Prosereflect::Input::Html do
639
639
  HTML
640
640
 
641
641
  expected = {
642
- 'type' => 'doc',
643
- 'content' => [{
644
- 'type' => 'code_block_wrapper',
645
- 'attrs' => {
646
- 'line_numbers' => false
642
+ "type" => "doc",
643
+ "content" => [{
644
+ "type" => "code_block_wrapper",
645
+ "attrs" => {
646
+ "line_numbers" => false,
647
647
  },
648
- 'content' => [{
649
- 'type' => 'code_block',
650
- 'attrs' => {
651
- 'content' => "def example\n puts \"Hello\"\nend",
652
- 'language' => 'ruby'
653
- }
654
- }]
655
- }]
648
+ "content" => [{
649
+ "type" => "code_block",
650
+ "attrs" => {
651
+ "language" => "ruby",
652
+ },
653
+ "content" => ["def example\n puts \"Hello\"\nend"],
654
+ }],
655
+ }],
656
656
  }
657
657
 
658
658
  document = described_class.parse(html)
659
659
  expect(document.to_h).to eq(expected)
660
660
  end
661
661
 
662
- it 'renders images with attributes correctly' do
662
+ it "renders images with attributes correctly" do
663
663
  html = '<img src="test.jpg" alt="Test image" title="Test title" width="800" height="600">'
664
664
 
665
665
  expected = {
666
- 'type' => 'doc',
667
- 'content' => [{
668
- 'type' => 'image',
669
- 'attrs' => {
670
- 'src' => 'test.jpg',
671
- 'alt' => 'Test image',
672
- 'title' => 'Test title',
673
- 'width' => 800,
674
- 'height' => 600
675
- }
676
- }]
666
+ "type" => "doc",
667
+ "content" => [{
668
+ "type" => "image",
669
+ "attrs" => {
670
+ "src" => "test.jpg",
671
+ "alt" => "Test image",
672
+ "title" => "Test title",
673
+ "width" => 800,
674
+ "height" => 600,
675
+ },
676
+ }],
677
677
  }
678
678
 
679
679
  document = described_class.parse(html)
680
680
  expect(document.to_h).to eq(expected)
681
681
  end
682
682
 
683
- it 'renders horizontal rules with styles correctly' do
683
+ it "renders horizontal rules with styles correctly" do
684
684
  html = '<hr style="border-style: dashed; width: 80%; border-width: 2px">'
685
685
 
686
686
  expected = {
687
- 'type' => 'doc',
688
- 'content' => [{
689
- 'type' => 'horizontal_rule',
690
- 'attrs' => {
691
- 'style' => 'dashed',
692
- 'width' => '80%',
693
- 'thickness' => 2
694
- }
695
- }]
687
+ "type" => "doc",
688
+ "content" => [{
689
+ "type" => "horizontal_rule",
690
+ "attrs" => {
691
+ "style" => "dashed",
692
+ "width" => "80%",
693
+ "thickness" => 2,
694
+ },
695
+ }],
696
696
  }
697
697
 
698
698
  document = described_class.parse(html)
699
699
  expect(document.to_h).to eq(expected)
700
700
  end
701
701
 
702
- it 'parses user mentions correctly' do
702
+ it "parses user mentions correctly" do
703
703
  html = '<user-mention data-id="123"></user-mention>'
704
704
 
705
705
  expected = {
706
- 'type' => 'doc',
707
- 'content' => [{
708
- 'type' => 'user',
709
- 'attrs' => {
710
- 'id' => '123'
706
+ "type" => "doc",
707
+ "content" => [{
708
+ "type" => "user",
709
+ "attrs" => {
710
+ "id" => "123",
711
711
  },
712
- 'content' => []
713
- }]
712
+ "content" => [],
713
+ }],
714
714
  }
715
715
 
716
716
  document = described_class.parse(html)
717
717
  expect(document.to_h).to eq(expected)
718
718
  end
719
719
 
720
- it 'parses user mentions in paragraphs' do
720
+ it "parses user mentions in paragraphs" do
721
721
  html = '<p>Hello <user-mention data-id="123"></user-mention>!</p>'
722
722
 
723
723
  expected = {
724
- 'type' => 'doc',
725
- 'content' => [{
726
- 'type' => 'paragraph',
727
- 'content' => [
724
+ "type" => "doc",
725
+ "content" => [{
726
+ "type" => "paragraph",
727
+ "content" => [
728
728
  {
729
- 'type' => 'text',
730
- 'text' => 'Hello '
729
+ "type" => "text",
730
+ "text" => "Hello ",
731
731
  },
732
732
  {
733
- 'type' => 'user',
734
- 'attrs' => {
735
- 'id' => '123'
733
+ "type" => "user",
734
+ "attrs" => {
735
+ "id" => "123",
736
736
  },
737
- 'content' => []
737
+ "content" => [],
738
738
  },
739
739
  {
740
- 'type' => 'text',
741
- 'text' => '!'
742
- }
743
- ]
744
- }]
740
+ "type" => "text",
741
+ "text" => "!",
742
+ },
743
+ ],
744
+ }],
745
745
  }
746
746
 
747
747
  document = described_class.parse(html)
748
748
  expect(document.to_h).to eq(expected)
749
749
  end
750
750
 
751
- it 'ignores user mentions without data-id' do
752
- html = '<user-mention></user-mention>'
751
+ it "ignores user mentions without data-id" do
752
+ html = "<user-mention></user-mention>"
753
753
 
754
754
  expected = {
755
- 'type' => 'doc'
755
+ "type" => "doc",
756
756
  }
757
757
 
758
758
  document = described_class.parse(html)
759
759
  expect(document.to_h).to eq(expected)
760
760
  end
761
761
 
762
- it 'parses multiple user mentions' do
762
+ it "parses multiple user mentions" do
763
763
  html = '<div>Mentioned: <user-mention data-id="123"></user-mention> and <user-mention data-id="456"></user-mention></div>'
764
764
 
765
765
  expected = {
766
- 'type' => 'doc',
767
- 'content' => [
766
+ "type" => "doc",
767
+ "content" => [
768
768
  {
769
- 'type' => 'text',
770
- 'text' => 'Mentioned: '
769
+ "type" => "text",
770
+ "text" => "Mentioned: ",
771
771
  },
772
772
  {
773
- 'type' => 'user',
774
- 'attrs' => {
775
- 'id' => '123'
773
+ "type" => "user",
774
+ "attrs" => {
775
+ "id" => "123",
776
776
  },
777
- 'content' => []
777
+ "content" => [],
778
778
  },
779
779
  {
780
- 'type' => 'text',
781
- 'text' => ' and '
780
+ "type" => "text",
781
+ "text" => " and ",
782
782
  },
783
783
  {
784
- 'type' => 'user',
785
- 'attrs' => {
786
- 'id' => '456'
784
+ "type" => "user",
785
+ "attrs" => {
786
+ "id" => "456",
787
787
  },
788
- 'content' => []
789
- }
790
- ]
788
+ "content" => [],
789
+ },
790
+ ],
791
791
  }
792
792
 
793
793
  document = described_class.parse(html)
794
794
  expect(document.to_h).to eq(expected)
795
795
  end
796
796
  end
797
+
798
+ describe ".parse_with_schema" do
799
+ it "parses HTML and returns a document when validation is bypassed" do
800
+ html = "<p>Hello world</p>"
801
+ allow(described_class).to receive(:validate_against_schema)
802
+ document = described_class.send(:parse_with_schema, html, nil)
803
+ expect(document).to be_a(Prosereflect::Document)
804
+ expect(document.to_h["content"].first["type"]).to eq("paragraph")
805
+ end
806
+
807
+ it "preserves document content when validation is bypassed" do
808
+ html = "<p>Schema test</p>"
809
+ allow(described_class).to receive(:validate_against_schema)
810
+ document = described_class.send(:parse_with_schema, html, nil)
811
+ para = document.to_h["content"].first
812
+ text_node = para["content"].first
813
+ expect(text_node["text"]).to eq("Schema test")
814
+ end
815
+
816
+ it "parses complex HTML with validation bypassed" do
817
+ html = "<h1>Title</h1><p>Paragraph with <strong>bold</strong> text</p>"
818
+ allow(described_class).to receive(:validate_against_schema)
819
+ document = described_class.send(:parse_with_schema, html, nil)
820
+ content = document.to_h["content"]
821
+ expect(content.length).to eq(2)
822
+ expect(content[0]["type"]).to eq("heading")
823
+ expect(content[1]["type"]).to eq("paragraph")
824
+ end
825
+
826
+ it "rescues ValidationError and returns the document" do
827
+ html = "<p>Validation error test</p>"
828
+ allow(described_class).to receive(:validate_against_schema).and_raise(
829
+ Prosereflect::Input::Html::ValidationError, "Missing required content"
830
+ )
831
+ document = described_class.send(:parse_with_schema, html, nil)
832
+ expect(document).to be_a(Prosereflect::Document)
833
+ expect(document.to_h["content"].first["type"]).to eq("paragraph")
834
+ end
835
+ end
836
+
837
+ describe ".parse_with_rules" do
838
+ it "parses HTML with keep_empty option" do
839
+ html = "<p>Keep empty test</p>"
840
+ document = described_class.send(:parse_with_rules, html, rules: { keep_empty: true })
841
+ expect(document).to be_a(Prosereflect::Document)
842
+ expect(document.to_h["content"].first["type"]).to eq("paragraph")
843
+ end
844
+
845
+ it "parses HTML with empty rules" do
846
+ html = "<p>Empty rules test</p>"
847
+ document = described_class.send(:parse_with_rules, html, rules: {})
848
+ expect(document).to be_a(Prosereflect::Document)
849
+ end
850
+
851
+ it "preserves content with keep_empty false" do
852
+ html = "<p>Keep empty false</p>"
853
+ document = described_class.send(:parse_with_rules, html, rules: { keep_empty: false })
854
+ para = document.to_h["content"].first
855
+ text_node = para["content"].first
856
+ expect(text_node["text"]).to eq("Keep empty false")
857
+ end
858
+
859
+ it "accepts top_node option" do
860
+ html = "<p>Top node test</p>"
861
+ document = described_class.send(:parse_with_rules, html, rules: { top_node: "doc" })
862
+ expect(document.to_h["type"]).to eq("doc")
863
+ end
864
+ end
865
+
866
+ describe ".parse_node" do
867
+ it "parses a single HTML paragraph node" do
868
+ doc = Nokogiri::HTML("<p>Single node</p>")
869
+ html_node = doc.at_css("p")
870
+ result = described_class.send(:parse_node, html_node)
871
+ expect(result).to be_a(Prosereflect::Paragraph)
872
+ end
873
+
874
+ it "parses a text node" do
875
+ doc = Nokogiri::HTML("<p>text content</p>")
876
+ html_node = doc.at_css("p").children.first
877
+ result = described_class.send(:parse_node, html_node)
878
+ expect(result).to be_a(Prosereflect::Text)
879
+ expect(result.text).to eq("text content")
880
+ end
881
+
882
+ it "returns nil for empty text nodes with clear_null" do
883
+ doc = Nokogiri::HTML("<p> </p>")
884
+ html_node = doc.at_css("p").children.first
885
+ result = described_class.send(:parse_node, html_node, clear_null: true)
886
+ expect(result).to be_nil
887
+ end
888
+
889
+ it "returns nil for empty text nodes by default" do
890
+ doc = Nokogiri::HTML("<p> </p>")
891
+ html_node = doc.at_css("p").children.first
892
+ result = described_class.send(:parse_node, html_node)
893
+ expect(result).to be_nil
894
+ end
895
+
896
+ it "accepts node option for parent context" do
897
+ doc = Nokogiri::HTML("<p>parent context</p>")
898
+ html_node = doc.at_css("p")
899
+ parent_node = Prosereflect::Document.new
900
+ result = described_class.send(:parse_node, html_node, node: parent_node)
901
+ expect(result).to be_a(Prosereflect::Paragraph)
902
+ end
903
+
904
+ it "accepts saved_styles option" do
905
+ doc = Nokogiri::HTML("<p>styled</p>")
906
+ html_node = doc.at_css("p")
907
+ result = described_class.send(:parse_node, html_node, saved_styles: [])
908
+ expect(result).to be_a(Prosereflect::Paragraph)
909
+ end
910
+ end
911
+
912
+ describe ".preserve_whitespace?" do
913
+ it "returns true for pre elements" do
914
+ doc = Nokogiri::HTML("<pre>code</pre>")
915
+ pre_node = doc.at_css("pre")
916
+ expect(described_class.send(:preserve_whitespace?, pre_node)).to be true
917
+ end
918
+
919
+ it "returns true for textarea elements" do
920
+ doc = Nokogiri::HTML("<textarea>text</textarea>")
921
+ textarea_node = doc.at_css("textarea")
922
+ expect(described_class.send(:preserve_whitespace?, textarea_node)).to be true
923
+ end
924
+
925
+ it "returns true for elements with white-space: pre style" do
926
+ doc = Nokogiri::HTML('<div style="white-space: pre">text</div>')
927
+ div_node = doc.at_css("div")
928
+ expect(described_class.send(:preserve_whitespace?, div_node)).to be true
929
+ end
930
+
931
+ it "returns false for paragraph elements" do
932
+ doc = Nokogiri::HTML("<p>text</p>")
933
+ p_node = doc.at_css("p")
934
+ expect(described_class.send(:preserve_whitespace?, p_node)).to be false
935
+ end
936
+
937
+ it "returns false for elements without white-space style" do
938
+ doc = Nokogiri::HTML('<div style="color: red">text</div>')
939
+ div_node = doc.at_css("div")
940
+ expect(described_class.send(:preserve_whitespace?, div_node)).to be false
941
+ end
942
+
943
+ it "returns false for elements without style attribute" do
944
+ doc = Nokogiri::HTML("<div>text</div>")
945
+ div_node = doc.at_css("div")
946
+ expect(described_class.send(:preserve_whitespace?, div_node)).to be false
947
+ end
948
+
949
+ it "returns false for elements with white-space but not pre" do
950
+ doc = Nokogiri::HTML('<div style="white-space: nowrap">text</div>')
951
+ div_node = doc.at_css("div")
952
+ expect(described_class.send(:preserve_whitespace?, div_node)).to be false
953
+ end
954
+ end
955
+
956
+ describe ".normalize_whitespace" do
957
+ it "replaces multiple spaces with a single space" do
958
+ expect(described_class.send(:normalize_whitespace, "hello world")).to eq("hello world")
959
+ end
960
+
961
+ it "replaces tabs with spaces" do
962
+ expect(described_class.send(:normalize_whitespace, "hello\tworld")).to eq("hello world")
963
+ end
964
+
965
+ it "replaces newlines with spaces" do
966
+ expect(described_class.send(:normalize_whitespace, "hello\nworld")).to eq("hello world")
967
+ end
968
+
969
+ it "replaces carriage returns with spaces" do
970
+ expect(described_class.send(:normalize_whitespace, "hello\rworld")).to eq("hello world")
971
+ end
972
+
973
+ it "strips leading and trailing whitespace" do
974
+ expect(described_class.send(:normalize_whitespace, " hello ")).to eq("hello")
975
+ end
976
+
977
+ it "handles mixed whitespace" do
978
+ expect(described_class.send(:normalize_whitespace, " hello \t\n world ")).to eq("hello world")
979
+ end
980
+
981
+ it "returns empty string for whitespace-only input" do
982
+ expect(described_class.send(:normalize_whitespace, " ")).to eq("")
983
+ end
984
+
985
+ it "handles an empty string" do
986
+ expect(described_class.send(:normalize_whitespace, "")).to eq("")
987
+ end
988
+
989
+ it "does not modify a clean string" do
990
+ expect(described_class.send(:normalize_whitespace, "hello world")).to eq("hello world")
991
+ end
992
+ end
797
993
  end