prosereflect 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/docs.yml +63 -0
- data/.github/workflows/links.yml +97 -0
- data/.github/workflows/rake.yml +4 -0
- data/.github/workflows/release.yml +5 -0
- data/.gitignore +4 -0
- data/.rubocop.yml +19 -1
- data/.rubocop_todo.yml +119 -183
- data/CLAUDE.md +78 -0
- data/Gemfile +8 -4
- data/README.adoc +2 -0
- data/Rakefile +3 -3
- data/docs/Gemfile +10 -0
- data/docs/INDEX.adoc +45 -0
- data/docs/_advanced/index.adoc +15 -0
- data/docs/_advanced/schema.adoc +112 -0
- data/docs/_advanced/step-map.adoc +66 -0
- data/docs/_advanced/steps.adoc +88 -0
- data/docs/_advanced/test-builder.adoc +61 -0
- data/docs/_advanced/transform.adoc +92 -0
- data/docs/_config.yml +174 -0
- data/docs/_features/html-input.adoc +69 -0
- data/docs/_features/html-output.adoc +45 -0
- data/docs/_features/index.adoc +15 -0
- data/docs/_features/marks.adoc +86 -0
- data/docs/_features/node-types.adoc +124 -0
- data/docs/_features/user-mentions.adoc +47 -0
- data/docs/_guides/custom-nodes.adoc +107 -0
- data/docs/_guides/index.adoc +13 -0
- data/docs/_guides/round-trip-html.adoc +91 -0
- data/docs/_guides/serialization.adoc +109 -0
- data/docs/_pages/index.adoc +67 -0
- data/docs/_reference/document-api.adoc +49 -0
- data/docs/_reference/index.adoc +14 -0
- data/docs/_reference/node-api.adoc +79 -0
- data/docs/_reference/schema-api.adoc +95 -0
- data/docs/_reference/transform-api.adoc +77 -0
- data/docs/_understanding/document-model.adoc +65 -0
- data/docs/_understanding/fragment.adoc +52 -0
- data/docs/_understanding/index.adoc +14 -0
- data/docs/_understanding/resolved-position.adoc +53 -0
- data/docs/_understanding/slice.adoc +54 -0
- data/docs/lychee.toml +63 -0
- data/lib/prosereflect/attribute/base.rb +4 -6
- data/lib/prosereflect/attribute/bold.rb +2 -4
- data/lib/prosereflect/attribute/href.rb +1 -3
- data/lib/prosereflect/attribute/id.rb +7 -7
- data/lib/prosereflect/attribute.rb +4 -7
- data/lib/prosereflect/blockquote.rb +19 -11
- data/lib/prosereflect/bullet_list.rb +36 -29
- data/lib/prosereflect/code_block.rb +23 -27
- data/lib/prosereflect/code_block_wrapper.rb +12 -13
- data/lib/prosereflect/document.rb +14 -22
- data/lib/prosereflect/fragment.rb +249 -0
- data/lib/prosereflect/hard_break.rb +6 -6
- data/lib/prosereflect/heading.rb +14 -15
- data/lib/prosereflect/horizontal_rule.rb +23 -14
- data/lib/prosereflect/image.rb +32 -23
- data/lib/prosereflect/input/html.rb +179 -104
- data/lib/prosereflect/input.rb +7 -0
- data/lib/prosereflect/list_item.rb +11 -12
- data/lib/prosereflect/mark/base.rb +9 -11
- data/lib/prosereflect/mark/bold.rb +1 -3
- data/lib/prosereflect/mark/code.rb +1 -3
- data/lib/prosereflect/mark/italic.rb +1 -3
- data/lib/prosereflect/mark/link.rb +1 -3
- data/lib/prosereflect/mark/strike.rb +1 -3
- data/lib/prosereflect/mark/subscript.rb +1 -3
- data/lib/prosereflect/mark/superscript.rb +1 -3
- data/lib/prosereflect/mark/underline.rb +1 -3
- data/lib/prosereflect/mark.rb +9 -5
- data/lib/prosereflect/node.rb +171 -33
- data/lib/prosereflect/ordered_list.rb +17 -14
- data/lib/prosereflect/output/html.rb +279 -50
- data/lib/prosereflect/output.rb +7 -0
- data/lib/prosereflect/paragraph.rb +11 -13
- data/lib/prosereflect/parser.rb +56 -66
- data/lib/prosereflect/resolved_pos.rb +256 -0
- data/lib/prosereflect/schema/attribute.rb +57 -0
- data/lib/prosereflect/schema/content_match.rb +656 -0
- data/lib/prosereflect/schema/fragment.rb +166 -0
- data/lib/prosereflect/schema/mark.rb +121 -0
- data/lib/prosereflect/schema/mark_type.rb +130 -0
- data/lib/prosereflect/schema/node.rb +236 -0
- data/lib/prosereflect/schema/node_type.rb +274 -0
- data/lib/prosereflect/schema/schema_main.rb +190 -0
- data/lib/prosereflect/schema/spec.rb +92 -0
- data/lib/prosereflect/schema.rb +39 -0
- data/lib/prosereflect/table.rb +12 -13
- data/lib/prosereflect/table_cell.rb +13 -13
- data/lib/prosereflect/table_header.rb +17 -17
- data/lib/prosereflect/table_row.rb +12 -12
- data/lib/prosereflect/text.rb +35 -11
- data/lib/prosereflect/transform/attr_step.rb +157 -0
- data/lib/prosereflect/transform/insert_step.rb +115 -0
- data/lib/prosereflect/transform/mapping.rb +82 -0
- data/lib/prosereflect/transform/mark_step.rb +269 -0
- data/lib/prosereflect/transform/replace_around_step.rb +181 -0
- data/lib/prosereflect/transform/replace_step.rb +157 -0
- data/lib/prosereflect/transform/slice.rb +91 -0
- data/lib/prosereflect/transform/step.rb +89 -0
- data/lib/prosereflect/transform/step_map.rb +126 -0
- data/lib/prosereflect/transform/structure.rb +120 -0
- data/lib/prosereflect/transform/transform.rb +341 -0
- data/lib/prosereflect/transform.rb +26 -0
- data/lib/prosereflect/user.rb +15 -15
- data/lib/prosereflect/version.rb +1 -1
- data/lib/prosereflect.rb +30 -17
- data/prosereflect.gemspec +17 -16
- data/spec/fixtures/documents/formatted_text.yaml +14 -0
- data/spec/fixtures/documents/heading_paragraph.yaml +16 -0
- data/spec/fixtures/documents/lists_doc.yaml +32 -0
- data/spec/fixtures/documents/mixed_content.yaml +40 -0
- data/spec/fixtures/documents/nested_doc.yaml +20 -0
- data/spec/fixtures/documents/simple_doc.yaml +6 -0
- data/spec/fixtures/documents/table_doc.yaml +32 -0
- data/spec/fixtures/documents/transform_test.yaml +14 -0
- data/spec/fixtures/schema/custom_schema.rb +37 -0
- data/spec/fixtures/schema/test_schema.rb +46 -0
- data/spec/fixtures/test_builder/helpers.rb +212 -0
- data/spec/prosereflect/document_spec.rb +332 -330
- data/spec/prosereflect/fragment_spec.rb +273 -0
- data/spec/prosereflect/hard_break_spec.rb +125 -125
- data/spec/prosereflect/input/html_spec.rb +718 -522
- data/spec/prosereflect/node_spec.rb +311 -182
- data/spec/prosereflect/output/html_spec.rb +105 -105
- data/spec/prosereflect/output/whitespace_spec.rb +248 -0
- data/spec/prosereflect/paragraph_spec.rb +275 -274
- data/spec/prosereflect/parser/round_trip_spec.rb +472 -0
- data/spec/prosereflect/parser_spec.rb +185 -180
- data/spec/prosereflect/resolved_pos_spec.rb +74 -0
- data/spec/prosereflect/schema/conftest.rb +68 -0
- data/spec/prosereflect/schema/content_match_spec.rb +237 -0
- data/spec/prosereflect/schema/mark_spec.rb +274 -0
- data/spec/prosereflect/schema/mark_type_spec.rb +86 -0
- data/spec/prosereflect/schema/node_type_spec.rb +142 -0
- data/spec/prosereflect/schema/schema_spec.rb +194 -0
- data/spec/prosereflect/table_cell_spec.rb +183 -183
- data/spec/prosereflect/table_row_spec.rb +149 -149
- data/spec/prosereflect/table_spec.rb +320 -318
- data/spec/prosereflect/test_builder/marks_spec.rb +127 -0
- data/spec/prosereflect/text_spec.rb +133 -132
- data/spec/prosereflect/transform/equivalence_spec.rb +487 -0
- data/spec/prosereflect/transform/mapping_spec.rb +226 -0
- data/spec/prosereflect/transform/replace_spec.rb +832 -0
- data/spec/prosereflect/transform/replace_step_spec.rb +157 -0
- data/spec/prosereflect/transform/slice_spec.rb +48 -0
- data/spec/prosereflect/transform/step_map_spec.rb +70 -0
- data/spec/prosereflect/transform/step_spec.rb +211 -0
- data/spec/prosereflect/transform/structure_spec.rb +98 -0
- data/spec/prosereflect/transform/transform_spec.rb +238 -0
- data/spec/prosereflect/user_spec.rb +31 -28
- data/spec/prosereflect_spec.rb +28 -26
- data/spec/spec_helper.rb +7 -6
- data/spec/support/matchers.rb +6 -6
- data/spec/support/shared_examples.rb +49 -49
- metadata +96 -5
- data/spec/prosereflect/version_spec.rb +0 -11
|
@@ -1,303 +1,303 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require
|
|
3
|
+
require "spec_helper"
|
|
4
4
|
|
|
5
5
|
RSpec.describe Prosereflect::Input::Html do
|
|
6
|
-
describe
|
|
7
|
-
it
|
|
8
|
-
html =
|
|
6
|
+
describe ".parse" do
|
|
7
|
+
it "parses simple HTML into a document" do
|
|
8
|
+
html = "<p>This is a test paragraph.</p>"
|
|
9
9
|
document = described_class.parse(html)
|
|
10
10
|
|
|
11
11
|
expected = {
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
}]
|
|
19
|
-
}]
|
|
12
|
+
"type" => "doc",
|
|
13
|
+
"content" => [{
|
|
14
|
+
"type" => "paragraph",
|
|
15
|
+
"content" => [{
|
|
16
|
+
"type" => "text",
|
|
17
|
+
"text" => "This is a test paragraph.",
|
|
18
|
+
}],
|
|
19
|
+
}],
|
|
20
20
|
}
|
|
21
21
|
|
|
22
22
|
expect(document.to_h).to eq(expected)
|
|
23
23
|
end
|
|
24
24
|
|
|
25
|
-
it
|
|
26
|
-
html =
|
|
25
|
+
it "renders basic styled text correctly" do
|
|
26
|
+
html = "<p>This is <strong>bold</strong> and <em>italic</em> text.</p>"
|
|
27
27
|
document = described_class.parse(html)
|
|
28
28
|
|
|
29
29
|
expected = {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
}, {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
}]
|
|
42
|
-
}, {
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
}, {
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
}]
|
|
51
|
-
}, {
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
}]
|
|
55
|
-
}]
|
|
30
|
+
"type" => "doc",
|
|
31
|
+
"content" => [{
|
|
32
|
+
"type" => "paragraph",
|
|
33
|
+
"content" => [{
|
|
34
|
+
"type" => "text",
|
|
35
|
+
"text" => "This is ",
|
|
36
|
+
}, {
|
|
37
|
+
"type" => "text",
|
|
38
|
+
"text" => "bold",
|
|
39
|
+
"marks" => [{
|
|
40
|
+
"type" => "bold",
|
|
41
|
+
}],
|
|
42
|
+
}, {
|
|
43
|
+
"type" => "text",
|
|
44
|
+
"text" => " and ",
|
|
45
|
+
}, {
|
|
46
|
+
"type" => "text",
|
|
47
|
+
"text" => "italic",
|
|
48
|
+
"marks" => [{
|
|
49
|
+
"type" => "italic",
|
|
50
|
+
}],
|
|
51
|
+
}, {
|
|
52
|
+
"type" => "text",
|
|
53
|
+
"text" => " text.",
|
|
54
|
+
}],
|
|
55
|
+
}],
|
|
56
56
|
}
|
|
57
57
|
|
|
58
58
|
expect(document.to_h).to eq(expected)
|
|
59
59
|
end
|
|
60
60
|
|
|
61
|
-
it
|
|
62
|
-
html =
|
|
61
|
+
it "parses strike text correctly" do
|
|
62
|
+
html = "<p>This is <strike>struck through</strike> text and <s>this too</s> and <del>deleted</del>.</p>"
|
|
63
63
|
document = described_class.parse(html)
|
|
64
64
|
|
|
65
65
|
expected = {
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
}, {
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
}]
|
|
78
|
-
}, {
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
}, {
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
}]
|
|
87
|
-
}, {
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
}, {
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
}]
|
|
96
|
-
}, {
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
}]
|
|
100
|
-
}]
|
|
66
|
+
"type" => "doc",
|
|
67
|
+
"content" => [{
|
|
68
|
+
"type" => "paragraph",
|
|
69
|
+
"content" => [{
|
|
70
|
+
"type" => "text",
|
|
71
|
+
"text" => "This is ",
|
|
72
|
+
}, {
|
|
73
|
+
"type" => "text",
|
|
74
|
+
"text" => "struck through",
|
|
75
|
+
"marks" => [{
|
|
76
|
+
"type" => "strike",
|
|
77
|
+
}],
|
|
78
|
+
}, {
|
|
79
|
+
"type" => "text",
|
|
80
|
+
"text" => " text and ",
|
|
81
|
+
}, {
|
|
82
|
+
"type" => "text",
|
|
83
|
+
"text" => "this too",
|
|
84
|
+
"marks" => [{
|
|
85
|
+
"type" => "strike",
|
|
86
|
+
}],
|
|
87
|
+
}, {
|
|
88
|
+
"type" => "text",
|
|
89
|
+
"text" => " and ",
|
|
90
|
+
}, {
|
|
91
|
+
"type" => "text",
|
|
92
|
+
"text" => "deleted",
|
|
93
|
+
"marks" => [{
|
|
94
|
+
"type" => "strike",
|
|
95
|
+
}],
|
|
96
|
+
}, {
|
|
97
|
+
"type" => "text",
|
|
98
|
+
"text" => ".",
|
|
99
|
+
}],
|
|
100
|
+
}],
|
|
101
101
|
}
|
|
102
102
|
|
|
103
103
|
expect(document.to_h).to eq(expected)
|
|
104
104
|
end
|
|
105
105
|
|
|
106
|
-
it
|
|
107
|
-
html =
|
|
106
|
+
it "parses subscript text correctly" do
|
|
107
|
+
html = "<p>H<sub>2</sub>O and E = mc<sub>2</sub></p>"
|
|
108
108
|
document = described_class.parse(html)
|
|
109
109
|
|
|
110
110
|
expected = {
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
}, {
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
}]
|
|
123
|
-
}, {
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
}, {
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
}]
|
|
132
|
-
}]
|
|
133
|
-
}]
|
|
111
|
+
"type" => "doc",
|
|
112
|
+
"content" => [{
|
|
113
|
+
"type" => "paragraph",
|
|
114
|
+
"content" => [{
|
|
115
|
+
"type" => "text",
|
|
116
|
+
"text" => "H",
|
|
117
|
+
}, {
|
|
118
|
+
"type" => "text",
|
|
119
|
+
"text" => "2",
|
|
120
|
+
"marks" => [{
|
|
121
|
+
"type" => "subscript",
|
|
122
|
+
}],
|
|
123
|
+
}, {
|
|
124
|
+
"type" => "text",
|
|
125
|
+
"text" => "O and E = mc",
|
|
126
|
+
}, {
|
|
127
|
+
"type" => "text",
|
|
128
|
+
"text" => "2",
|
|
129
|
+
"marks" => [{
|
|
130
|
+
"type" => "subscript",
|
|
131
|
+
}],
|
|
132
|
+
}],
|
|
133
|
+
}],
|
|
134
134
|
}
|
|
135
135
|
|
|
136
136
|
expect(document.to_h).to eq(expected)
|
|
137
137
|
end
|
|
138
138
|
|
|
139
|
-
it
|
|
140
|
-
html =
|
|
139
|
+
it "parses superscript text correctly" do
|
|
140
|
+
html = "<p>x<sup>2</sup> + y<sup>2</sup> = z<sup>2</sup></p>"
|
|
141
141
|
document = described_class.parse(html)
|
|
142
142
|
|
|
143
143
|
expected = {
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
}, {
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
}]
|
|
156
|
-
}, {
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
}, {
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
}]
|
|
165
|
-
}, {
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
}, {
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
}]
|
|
174
|
-
}]
|
|
175
|
-
}]
|
|
144
|
+
"type" => "doc",
|
|
145
|
+
"content" => [{
|
|
146
|
+
"type" => "paragraph",
|
|
147
|
+
"content" => [{
|
|
148
|
+
"type" => "text",
|
|
149
|
+
"text" => "x",
|
|
150
|
+
}, {
|
|
151
|
+
"type" => "text",
|
|
152
|
+
"text" => "2",
|
|
153
|
+
"marks" => [{
|
|
154
|
+
"type" => "superscript",
|
|
155
|
+
}],
|
|
156
|
+
}, {
|
|
157
|
+
"type" => "text",
|
|
158
|
+
"text" => " + y",
|
|
159
|
+
}, {
|
|
160
|
+
"type" => "text",
|
|
161
|
+
"text" => "2",
|
|
162
|
+
"marks" => [{
|
|
163
|
+
"type" => "superscript",
|
|
164
|
+
}],
|
|
165
|
+
}, {
|
|
166
|
+
"type" => "text",
|
|
167
|
+
"text" => " = z",
|
|
168
|
+
}, {
|
|
169
|
+
"type" => "text",
|
|
170
|
+
"text" => "2",
|
|
171
|
+
"marks" => [{
|
|
172
|
+
"type" => "superscript",
|
|
173
|
+
}],
|
|
174
|
+
}],
|
|
175
|
+
}],
|
|
176
176
|
}
|
|
177
177
|
|
|
178
178
|
expect(document.to_h).to eq(expected)
|
|
179
179
|
end
|
|
180
180
|
|
|
181
|
-
it
|
|
182
|
-
html =
|
|
181
|
+
it "parses underlined text correctly" do
|
|
182
|
+
html = "<p>This is <u>underlined</u> text.</p>"
|
|
183
183
|
document = described_class.parse(html)
|
|
184
184
|
|
|
185
185
|
expected = {
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
}, {
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
}]
|
|
198
|
-
}, {
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
}]
|
|
202
|
-
}]
|
|
186
|
+
"type" => "doc",
|
|
187
|
+
"content" => [{
|
|
188
|
+
"type" => "paragraph",
|
|
189
|
+
"content" => [{
|
|
190
|
+
"type" => "text",
|
|
191
|
+
"text" => "This is ",
|
|
192
|
+
}, {
|
|
193
|
+
"type" => "text",
|
|
194
|
+
"text" => "underlined",
|
|
195
|
+
"marks" => [{
|
|
196
|
+
"type" => "underline",
|
|
197
|
+
}],
|
|
198
|
+
}, {
|
|
199
|
+
"type" => "text",
|
|
200
|
+
"text" => " text.",
|
|
201
|
+
}],
|
|
202
|
+
}],
|
|
203
203
|
}
|
|
204
204
|
|
|
205
205
|
expect(document.to_h).to eq(expected)
|
|
206
206
|
end
|
|
207
207
|
|
|
208
|
-
it
|
|
209
|
-
html =
|
|
208
|
+
it "handles mixed text styles correctly" do
|
|
209
|
+
html = "<p><strong><u>Bold and underlined</u></strong> and <em><strike>italic struck</strike></em></p>"
|
|
210
210
|
document = described_class.parse(html)
|
|
211
211
|
|
|
212
212
|
expected = {
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
213
|
+
"type" => "doc",
|
|
214
|
+
"content" => [{
|
|
215
|
+
"type" => "paragraph",
|
|
216
|
+
"content" => [{
|
|
217
|
+
"type" => "text",
|
|
218
|
+
"text" => "Bold and underlined",
|
|
219
|
+
"marks" => [{
|
|
220
|
+
"type" => "underline",
|
|
221
221
|
}, {
|
|
222
|
-
|
|
223
|
-
}]
|
|
222
|
+
"type" => "bold",
|
|
223
|
+
}],
|
|
224
224
|
}, {
|
|
225
|
-
|
|
226
|
-
|
|
225
|
+
"type" => "text",
|
|
226
|
+
"text" => " and ",
|
|
227
227
|
}, {
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
228
|
+
"type" => "text",
|
|
229
|
+
"text" => "italic struck",
|
|
230
|
+
"marks" => [{
|
|
231
|
+
"type" => "strike",
|
|
232
232
|
}, {
|
|
233
|
-
|
|
234
|
-
}]
|
|
235
|
-
}]
|
|
236
|
-
}]
|
|
233
|
+
"type" => "italic",
|
|
234
|
+
}],
|
|
235
|
+
}],
|
|
236
|
+
}],
|
|
237
237
|
}
|
|
238
238
|
|
|
239
239
|
expect(document.to_h).to eq(expected)
|
|
240
240
|
end
|
|
241
241
|
|
|
242
|
-
it
|
|
243
|
-
html =
|
|
242
|
+
it "handles complex mixed text styles correctly" do
|
|
243
|
+
html = "<p>x<sup>2</sup> + <u>y<sub>1</sub></u> = <strike>z<sup>n</sup></strike></p>"
|
|
244
244
|
document = described_class.parse(html)
|
|
245
245
|
|
|
246
246
|
expected = {
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
}, {
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
}]
|
|
259
|
-
}, {
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
}, {
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
}]
|
|
268
|
-
}, {
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
247
|
+
"type" => "doc",
|
|
248
|
+
"content" => [{
|
|
249
|
+
"type" => "paragraph",
|
|
250
|
+
"content" => [{
|
|
251
|
+
"type" => "text",
|
|
252
|
+
"text" => "x",
|
|
253
|
+
}, {
|
|
254
|
+
"type" => "text",
|
|
255
|
+
"text" => "2",
|
|
256
|
+
"marks" => [{
|
|
257
|
+
"type" => "superscript",
|
|
258
|
+
}],
|
|
259
|
+
}, {
|
|
260
|
+
"type" => "text",
|
|
261
|
+
"text" => " + ",
|
|
262
|
+
}, {
|
|
263
|
+
"type" => "text",
|
|
264
|
+
"text" => "y",
|
|
265
|
+
"marks" => [{
|
|
266
|
+
"type" => "underline",
|
|
267
|
+
}],
|
|
268
|
+
}, {
|
|
269
|
+
"type" => "text",
|
|
270
|
+
"text" => "1",
|
|
271
|
+
"marks" => [{
|
|
272
|
+
"type" => "subscript",
|
|
273
273
|
}, {
|
|
274
|
-
|
|
275
|
-
}]
|
|
276
|
-
}, {
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
}, {
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
}]
|
|
285
|
-
}, {
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
274
|
+
"type" => "underline",
|
|
275
|
+
}],
|
|
276
|
+
}, {
|
|
277
|
+
"type" => "text",
|
|
278
|
+
"text" => " = ",
|
|
279
|
+
}, {
|
|
280
|
+
"type" => "text",
|
|
281
|
+
"text" => "z",
|
|
282
|
+
"marks" => [{
|
|
283
|
+
"type" => "strike",
|
|
284
|
+
}],
|
|
285
|
+
}, {
|
|
286
|
+
"type" => "text",
|
|
287
|
+
"text" => "n",
|
|
288
|
+
"marks" => [{
|
|
289
|
+
"type" => "superscript",
|
|
290
290
|
}, {
|
|
291
|
-
|
|
292
|
-
}]
|
|
293
|
-
}]
|
|
294
|
-
}]
|
|
291
|
+
"type" => "strike",
|
|
292
|
+
}],
|
|
293
|
+
}],
|
|
294
|
+
}],
|
|
295
295
|
}
|
|
296
296
|
|
|
297
297
|
expect(document.to_h).to eq(expected)
|
|
298
298
|
end
|
|
299
299
|
|
|
300
|
-
it
|
|
300
|
+
it "parses tables correctly" do
|
|
301
301
|
html = <<~HTML
|
|
302
302
|
<table>
|
|
303
303
|
<tr>
|
|
@@ -314,109 +314,109 @@ RSpec.describe Prosereflect::Input::Html do
|
|
|
314
314
|
document = described_class.parse(html)
|
|
315
315
|
|
|
316
316
|
expected = {
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
}]
|
|
330
|
-
}]
|
|
317
|
+
"type" => "doc",
|
|
318
|
+
"content" => [{
|
|
319
|
+
"type" => "table",
|
|
320
|
+
"content" => [{
|
|
321
|
+
"type" => "table_row",
|
|
322
|
+
"content" => [{
|
|
323
|
+
"type" => "table_cell",
|
|
324
|
+
"content" => [{
|
|
325
|
+
"type" => "paragraph",
|
|
326
|
+
"content" => [{
|
|
327
|
+
"type" => "text",
|
|
328
|
+
"text" => "Row 1, Cell 1",
|
|
329
|
+
}],
|
|
330
|
+
}],
|
|
331
331
|
}, {
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
}]
|
|
339
|
-
}]
|
|
340
|
-
}]
|
|
341
|
-
}, {
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
}]
|
|
351
|
-
}]
|
|
332
|
+
"type" => "table_cell",
|
|
333
|
+
"content" => [{
|
|
334
|
+
"type" => "paragraph",
|
|
335
|
+
"content" => [{
|
|
336
|
+
"type" => "text",
|
|
337
|
+
"text" => "Row 1, Cell 2",
|
|
338
|
+
}],
|
|
339
|
+
}],
|
|
340
|
+
}],
|
|
341
|
+
}, {
|
|
342
|
+
"type" => "table_row",
|
|
343
|
+
"content" => [{
|
|
344
|
+
"type" => "table_cell",
|
|
345
|
+
"content" => [{
|
|
346
|
+
"type" => "paragraph",
|
|
347
|
+
"content" => [{
|
|
348
|
+
"type" => "text",
|
|
349
|
+
"text" => "Row 2, Cell 1",
|
|
350
|
+
}],
|
|
351
|
+
}],
|
|
352
352
|
}, {
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
}]
|
|
360
|
-
}]
|
|
361
|
-
}]
|
|
362
|
-
}]
|
|
363
|
-
}]
|
|
353
|
+
"type" => "table_cell",
|
|
354
|
+
"content" => [{
|
|
355
|
+
"type" => "paragraph",
|
|
356
|
+
"content" => [{
|
|
357
|
+
"type" => "text",
|
|
358
|
+
"text" => "Row 2, Cell 2",
|
|
359
|
+
}],
|
|
360
|
+
}],
|
|
361
|
+
}],
|
|
362
|
+
}],
|
|
363
|
+
}],
|
|
364
364
|
}
|
|
365
365
|
|
|
366
366
|
expect(document.to_h).to eq(expected)
|
|
367
367
|
end
|
|
368
368
|
|
|
369
|
-
it
|
|
369
|
+
it "parses links correctly" do
|
|
370
370
|
html = '<p>This is a <a href="https://example.com">link</a></p>'
|
|
371
371
|
document = described_class.parse(html)
|
|
372
372
|
|
|
373
373
|
expected = {
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
}, {
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
}
|
|
388
|
-
}]
|
|
389
|
-
}]
|
|
390
|
-
}]
|
|
374
|
+
"type" => "doc",
|
|
375
|
+
"content" => [{
|
|
376
|
+
"type" => "paragraph",
|
|
377
|
+
"content" => [{
|
|
378
|
+
"type" => "text",
|
|
379
|
+
"text" => "This is a ",
|
|
380
|
+
}, {
|
|
381
|
+
"type" => "text",
|
|
382
|
+
"text" => "link",
|
|
383
|
+
"marks" => [{
|
|
384
|
+
"type" => "link",
|
|
385
|
+
"attrs" => {
|
|
386
|
+
"href" => "https://example.com",
|
|
387
|
+
},
|
|
388
|
+
}],
|
|
389
|
+
}],
|
|
390
|
+
}],
|
|
391
391
|
}
|
|
392
392
|
|
|
393
393
|
expect(document.to_h).to eq(expected)
|
|
394
394
|
end
|
|
395
395
|
|
|
396
|
-
it
|
|
397
|
-
html =
|
|
396
|
+
it "handles line breaks correctly" do
|
|
397
|
+
html = "<p>Line 1<br>Line 2</p>"
|
|
398
398
|
document = described_class.parse(html)
|
|
399
399
|
|
|
400
400
|
expected = {
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
}, {
|
|
408
|
-
|
|
409
|
-
}, {
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
}]
|
|
413
|
-
}]
|
|
401
|
+
"type" => "doc",
|
|
402
|
+
"content" => [{
|
|
403
|
+
"type" => "paragraph",
|
|
404
|
+
"content" => [{
|
|
405
|
+
"type" => "text",
|
|
406
|
+
"text" => "Line 1",
|
|
407
|
+
}, {
|
|
408
|
+
"type" => "hard_break",
|
|
409
|
+
}, {
|
|
410
|
+
"type" => "text",
|
|
411
|
+
"text" => "Line 2",
|
|
412
|
+
}],
|
|
413
|
+
}],
|
|
414
414
|
}
|
|
415
415
|
|
|
416
416
|
expect(document.to_h).to eq(expected)
|
|
417
417
|
end
|
|
418
418
|
|
|
419
|
-
it
|
|
419
|
+
it "parses ordered lists with start attribute correctly" do
|
|
420
420
|
html = <<~HTML
|
|
421
421
|
<ol start="3">
|
|
422
422
|
<li>Third item</li>
|
|
@@ -427,38 +427,38 @@ RSpec.describe Prosereflect::Input::Html do
|
|
|
427
427
|
document = described_class.parse(html)
|
|
428
428
|
|
|
429
429
|
expected = {
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
430
|
+
"type" => "doc",
|
|
431
|
+
"content" => [{
|
|
432
|
+
"type" => "ordered_list",
|
|
433
|
+
"attrs" => {
|
|
434
|
+
"start" => 3,
|
|
435
435
|
},
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
}]
|
|
444
|
-
}]
|
|
445
|
-
}, {
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
}]
|
|
453
|
-
}]
|
|
454
|
-
}]
|
|
455
|
-
}]
|
|
436
|
+
"content" => [{
|
|
437
|
+
"type" => "list_item",
|
|
438
|
+
"content" => [{
|
|
439
|
+
"type" => "paragraph",
|
|
440
|
+
"content" => [{
|
|
441
|
+
"type" => "text",
|
|
442
|
+
"text" => "Third item",
|
|
443
|
+
}],
|
|
444
|
+
}],
|
|
445
|
+
}, {
|
|
446
|
+
"type" => "list_item",
|
|
447
|
+
"content" => [{
|
|
448
|
+
"type" => "paragraph",
|
|
449
|
+
"content" => [{
|
|
450
|
+
"type" => "text",
|
|
451
|
+
"text" => "Fourth item",
|
|
452
|
+
}],
|
|
453
|
+
}],
|
|
454
|
+
}],
|
|
455
|
+
}],
|
|
456
456
|
}
|
|
457
457
|
|
|
458
458
|
expect(document.to_h).to eq(expected)
|
|
459
459
|
end
|
|
460
460
|
|
|
461
|
-
it
|
|
461
|
+
it "parses bullet lists with styles correctly" do
|
|
462
462
|
html = <<~HTML
|
|
463
463
|
<ul style="list-style-type: square">
|
|
464
464
|
<li>First bullet</li>
|
|
@@ -469,79 +469,79 @@ RSpec.describe Prosereflect::Input::Html do
|
|
|
469
469
|
document = described_class.parse(html)
|
|
470
470
|
|
|
471
471
|
expected = {
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
472
|
+
"type" => "doc",
|
|
473
|
+
"content" => [{
|
|
474
|
+
"type" => "bullet_list",
|
|
475
|
+
"attrs" => {
|
|
476
|
+
"bullet_style" => "square",
|
|
477
477
|
},
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
}]
|
|
486
|
-
}]
|
|
487
|
-
}, {
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
}]
|
|
495
|
-
}]
|
|
496
|
-
}]
|
|
497
|
-
}]
|
|
478
|
+
"content" => [{
|
|
479
|
+
"type" => "list_item",
|
|
480
|
+
"content" => [{
|
|
481
|
+
"type" => "paragraph",
|
|
482
|
+
"content" => [{
|
|
483
|
+
"type" => "text",
|
|
484
|
+
"text" => "First bullet",
|
|
485
|
+
}],
|
|
486
|
+
}],
|
|
487
|
+
}, {
|
|
488
|
+
"type" => "list_item",
|
|
489
|
+
"content" => [{
|
|
490
|
+
"type" => "paragraph",
|
|
491
|
+
"content" => [{
|
|
492
|
+
"type" => "text",
|
|
493
|
+
"text" => "Second bullet",
|
|
494
|
+
}],
|
|
495
|
+
}],
|
|
496
|
+
}],
|
|
497
|
+
}],
|
|
498
498
|
}
|
|
499
499
|
|
|
500
500
|
expect(document.to_h).to eq(expected)
|
|
501
501
|
end
|
|
502
502
|
|
|
503
|
-
it
|
|
503
|
+
it "renders headings with mixed content correctly" do
|
|
504
504
|
html = <<~HTML
|
|
505
505
|
<h1>Title with <strong>bold</strong> and <a href="https://example.com">link</a></h1>
|
|
506
506
|
HTML
|
|
507
507
|
|
|
508
508
|
expected = {
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
509
|
+
"type" => "doc",
|
|
510
|
+
"content" => [{
|
|
511
|
+
"type" => "heading",
|
|
512
|
+
"attrs" => {
|
|
513
|
+
"level" => 1,
|
|
514
514
|
},
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
}, {
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
}]
|
|
524
|
-
}, {
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
}, {
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
}
|
|
535
|
-
}]
|
|
536
|
-
}]
|
|
537
|
-
}]
|
|
515
|
+
"content" => [{
|
|
516
|
+
"type" => "text",
|
|
517
|
+
"text" => "Title with ",
|
|
518
|
+
}, {
|
|
519
|
+
"type" => "text",
|
|
520
|
+
"text" => "bold",
|
|
521
|
+
"marks" => [{
|
|
522
|
+
"type" => "bold",
|
|
523
|
+
}],
|
|
524
|
+
}, {
|
|
525
|
+
"type" => "text",
|
|
526
|
+
"text" => " and ",
|
|
527
|
+
}, {
|
|
528
|
+
"type" => "text",
|
|
529
|
+
"text" => "link",
|
|
530
|
+
"marks" => [{
|
|
531
|
+
"type" => "link",
|
|
532
|
+
"attrs" => {
|
|
533
|
+
"href" => "https://example.com",
|
|
534
|
+
},
|
|
535
|
+
}],
|
|
536
|
+
}],
|
|
537
|
+
}],
|
|
538
538
|
}
|
|
539
539
|
|
|
540
540
|
document = described_class.parse(html)
|
|
541
541
|
expect(document.to_h).to eq(expected)
|
|
542
542
|
end
|
|
543
543
|
|
|
544
|
-
it
|
|
544
|
+
it "renders lists with nested content correctly" do
|
|
545
545
|
html = <<~HTML
|
|
546
546
|
<ul>
|
|
547
547
|
<li>First item with <em>emphasis</em></li>
|
|
@@ -550,51 +550,51 @@ RSpec.describe Prosereflect::Input::Html do
|
|
|
550
550
|
HTML
|
|
551
551
|
|
|
552
552
|
expected = {
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
553
|
+
"type" => "doc",
|
|
554
|
+
"content" => [{
|
|
555
|
+
"type" => "bullet_list",
|
|
556
|
+
"attrs" => {
|
|
557
|
+
"bullet_style" => nil,
|
|
558
558
|
},
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
559
|
+
"content" => [{
|
|
560
|
+
"type" => "list_item",
|
|
561
|
+
"content" => [{
|
|
562
|
+
"type" => "paragraph",
|
|
563
|
+
"content" => [{
|
|
564
|
+
"type" => "text",
|
|
565
|
+
"text" => "First item with ",
|
|
566
566
|
}, {
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
}]
|
|
572
|
-
}]
|
|
573
|
-
}]
|
|
574
|
-
}, {
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
567
|
+
"type" => "text",
|
|
568
|
+
"text" => "emphasis",
|
|
569
|
+
"marks" => [{
|
|
570
|
+
"type" => "italic",
|
|
571
|
+
}],
|
|
572
|
+
}],
|
|
573
|
+
}],
|
|
574
|
+
}, {
|
|
575
|
+
"type" => "list_item",
|
|
576
|
+
"content" => [{
|
|
577
|
+
"type" => "paragraph",
|
|
578
|
+
"content" => [{
|
|
579
|
+
"type" => "text",
|
|
580
|
+
"text" => "Second item with ",
|
|
581
581
|
}, {
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
}]
|
|
587
|
-
}]
|
|
588
|
-
}]
|
|
589
|
-
}]
|
|
590
|
-
}]
|
|
582
|
+
"type" => "text",
|
|
583
|
+
"text" => "code",
|
|
584
|
+
"marks" => [{
|
|
585
|
+
"type" => "code",
|
|
586
|
+
}],
|
|
587
|
+
}],
|
|
588
|
+
}],
|
|
589
|
+
}],
|
|
590
|
+
}],
|
|
591
591
|
}
|
|
592
592
|
|
|
593
593
|
document = described_class.parse(html)
|
|
594
594
|
expect(document.to_h).to eq(expected)
|
|
595
595
|
end
|
|
596
596
|
|
|
597
|
-
it
|
|
597
|
+
it "renders blockquotes with citations correctly" do
|
|
598
598
|
html = <<~HTML
|
|
599
599
|
<blockquote cite="https://example.com">
|
|
600
600
|
<p>A quote with <strong>bold</strong> text</p>
|
|
@@ -602,36 +602,36 @@ RSpec.describe Prosereflect::Input::Html do
|
|
|
602
602
|
HTML
|
|
603
603
|
|
|
604
604
|
expected = {
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
605
|
+
"type" => "doc",
|
|
606
|
+
"content" => [{
|
|
607
|
+
"type" => "blockquote",
|
|
608
|
+
"attrs" => {
|
|
609
|
+
"citation" => "https://example.com",
|
|
610
610
|
},
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
611
|
+
"content" => [{
|
|
612
|
+
"type" => "paragraph",
|
|
613
|
+
"content" => [{
|
|
614
|
+
"type" => "text",
|
|
615
|
+
"text" => "A quote with ",
|
|
616
616
|
}, {
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
}]
|
|
617
|
+
"type" => "text",
|
|
618
|
+
"text" => "bold",
|
|
619
|
+
"marks" => [{
|
|
620
|
+
"type" => "bold",
|
|
621
|
+
}],
|
|
622
622
|
}, {
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
}]
|
|
626
|
-
}]
|
|
627
|
-
}]
|
|
623
|
+
"type" => "text",
|
|
624
|
+
"text" => " text",
|
|
625
|
+
}],
|
|
626
|
+
}],
|
|
627
|
+
}],
|
|
628
628
|
}
|
|
629
629
|
|
|
630
630
|
document = described_class.parse(html)
|
|
631
631
|
expect(document.to_h).to eq(expected)
|
|
632
632
|
end
|
|
633
633
|
|
|
634
|
-
it
|
|
634
|
+
it "renders code blocks with language correctly" do
|
|
635
635
|
html = <<~HTML
|
|
636
636
|
<pre><code class="language-ruby">def example
|
|
637
637
|
puts "Hello"
|
|
@@ -639,159 +639,355 @@ RSpec.describe Prosereflect::Input::Html do
|
|
|
639
639
|
HTML
|
|
640
640
|
|
|
641
641
|
expected = {
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
642
|
+
"type" => "doc",
|
|
643
|
+
"content" => [{
|
|
644
|
+
"type" => "code_block_wrapper",
|
|
645
|
+
"attrs" => {
|
|
646
|
+
"line_numbers" => false,
|
|
647
647
|
},
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
}]
|
|
655
|
-
}]
|
|
648
|
+
"content" => [{
|
|
649
|
+
"type" => "code_block",
|
|
650
|
+
"attrs" => {
|
|
651
|
+
"language" => "ruby",
|
|
652
|
+
},
|
|
653
|
+
"content" => ["def example\n puts \"Hello\"\nend"],
|
|
654
|
+
}],
|
|
655
|
+
}],
|
|
656
656
|
}
|
|
657
657
|
|
|
658
658
|
document = described_class.parse(html)
|
|
659
659
|
expect(document.to_h).to eq(expected)
|
|
660
660
|
end
|
|
661
661
|
|
|
662
|
-
it
|
|
662
|
+
it "renders images with attributes correctly" do
|
|
663
663
|
html = '<img src="test.jpg" alt="Test image" title="Test title" width="800" height="600">'
|
|
664
664
|
|
|
665
665
|
expected = {
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
}
|
|
676
|
-
}]
|
|
666
|
+
"type" => "doc",
|
|
667
|
+
"content" => [{
|
|
668
|
+
"type" => "image",
|
|
669
|
+
"attrs" => {
|
|
670
|
+
"src" => "test.jpg",
|
|
671
|
+
"alt" => "Test image",
|
|
672
|
+
"title" => "Test title",
|
|
673
|
+
"width" => 800,
|
|
674
|
+
"height" => 600,
|
|
675
|
+
},
|
|
676
|
+
}],
|
|
677
677
|
}
|
|
678
678
|
|
|
679
679
|
document = described_class.parse(html)
|
|
680
680
|
expect(document.to_h).to eq(expected)
|
|
681
681
|
end
|
|
682
682
|
|
|
683
|
-
it
|
|
683
|
+
it "renders horizontal rules with styles correctly" do
|
|
684
684
|
html = '<hr style="border-style: dashed; width: 80%; border-width: 2px">'
|
|
685
685
|
|
|
686
686
|
expected = {
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
}
|
|
695
|
-
}]
|
|
687
|
+
"type" => "doc",
|
|
688
|
+
"content" => [{
|
|
689
|
+
"type" => "horizontal_rule",
|
|
690
|
+
"attrs" => {
|
|
691
|
+
"style" => "dashed",
|
|
692
|
+
"width" => "80%",
|
|
693
|
+
"thickness" => 2,
|
|
694
|
+
},
|
|
695
|
+
}],
|
|
696
696
|
}
|
|
697
697
|
|
|
698
698
|
document = described_class.parse(html)
|
|
699
699
|
expect(document.to_h).to eq(expected)
|
|
700
700
|
end
|
|
701
701
|
|
|
702
|
-
it
|
|
702
|
+
it "parses user mentions correctly" do
|
|
703
703
|
html = '<user-mention data-id="123"></user-mention>'
|
|
704
704
|
|
|
705
705
|
expected = {
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
706
|
+
"type" => "doc",
|
|
707
|
+
"content" => [{
|
|
708
|
+
"type" => "user",
|
|
709
|
+
"attrs" => {
|
|
710
|
+
"id" => "123",
|
|
711
711
|
},
|
|
712
|
-
|
|
713
|
-
}]
|
|
712
|
+
"content" => [],
|
|
713
|
+
}],
|
|
714
714
|
}
|
|
715
715
|
|
|
716
716
|
document = described_class.parse(html)
|
|
717
717
|
expect(document.to_h).to eq(expected)
|
|
718
718
|
end
|
|
719
719
|
|
|
720
|
-
it
|
|
720
|
+
it "parses user mentions in paragraphs" do
|
|
721
721
|
html = '<p>Hello <user-mention data-id="123"></user-mention>!</p>'
|
|
722
722
|
|
|
723
723
|
expected = {
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
724
|
+
"type" => "doc",
|
|
725
|
+
"content" => [{
|
|
726
|
+
"type" => "paragraph",
|
|
727
|
+
"content" => [
|
|
728
728
|
{
|
|
729
|
-
|
|
730
|
-
|
|
729
|
+
"type" => "text",
|
|
730
|
+
"text" => "Hello ",
|
|
731
731
|
},
|
|
732
732
|
{
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
733
|
+
"type" => "user",
|
|
734
|
+
"attrs" => {
|
|
735
|
+
"id" => "123",
|
|
736
736
|
},
|
|
737
|
-
|
|
737
|
+
"content" => [],
|
|
738
738
|
},
|
|
739
739
|
{
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
}
|
|
743
|
-
]
|
|
744
|
-
}]
|
|
740
|
+
"type" => "text",
|
|
741
|
+
"text" => "!",
|
|
742
|
+
},
|
|
743
|
+
],
|
|
744
|
+
}],
|
|
745
745
|
}
|
|
746
746
|
|
|
747
747
|
document = described_class.parse(html)
|
|
748
748
|
expect(document.to_h).to eq(expected)
|
|
749
749
|
end
|
|
750
750
|
|
|
751
|
-
it
|
|
752
|
-
html =
|
|
751
|
+
it "ignores user mentions without data-id" do
|
|
752
|
+
html = "<user-mention></user-mention>"
|
|
753
753
|
|
|
754
754
|
expected = {
|
|
755
|
-
|
|
755
|
+
"type" => "doc",
|
|
756
756
|
}
|
|
757
757
|
|
|
758
758
|
document = described_class.parse(html)
|
|
759
759
|
expect(document.to_h).to eq(expected)
|
|
760
760
|
end
|
|
761
761
|
|
|
762
|
-
it
|
|
762
|
+
it "parses multiple user mentions" do
|
|
763
763
|
html = '<div>Mentioned: <user-mention data-id="123"></user-mention> and <user-mention data-id="456"></user-mention></div>'
|
|
764
764
|
|
|
765
765
|
expected = {
|
|
766
|
-
|
|
767
|
-
|
|
766
|
+
"type" => "doc",
|
|
767
|
+
"content" => [
|
|
768
768
|
{
|
|
769
|
-
|
|
770
|
-
|
|
769
|
+
"type" => "text",
|
|
770
|
+
"text" => "Mentioned: ",
|
|
771
771
|
},
|
|
772
772
|
{
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
773
|
+
"type" => "user",
|
|
774
|
+
"attrs" => {
|
|
775
|
+
"id" => "123",
|
|
776
776
|
},
|
|
777
|
-
|
|
777
|
+
"content" => [],
|
|
778
778
|
},
|
|
779
779
|
{
|
|
780
|
-
|
|
781
|
-
|
|
780
|
+
"type" => "text",
|
|
781
|
+
"text" => " and ",
|
|
782
782
|
},
|
|
783
783
|
{
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
784
|
+
"type" => "user",
|
|
785
|
+
"attrs" => {
|
|
786
|
+
"id" => "456",
|
|
787
787
|
},
|
|
788
|
-
|
|
789
|
-
}
|
|
790
|
-
]
|
|
788
|
+
"content" => [],
|
|
789
|
+
},
|
|
790
|
+
],
|
|
791
791
|
}
|
|
792
792
|
|
|
793
793
|
document = described_class.parse(html)
|
|
794
794
|
expect(document.to_h).to eq(expected)
|
|
795
795
|
end
|
|
796
796
|
end
|
|
797
|
+
|
|
798
|
+
describe ".parse_with_schema" do
|
|
799
|
+
it "parses HTML and returns a document when validation is bypassed" do
|
|
800
|
+
html = "<p>Hello world</p>"
|
|
801
|
+
allow(described_class).to receive(:validate_against_schema)
|
|
802
|
+
document = described_class.send(:parse_with_schema, html, nil)
|
|
803
|
+
expect(document).to be_a(Prosereflect::Document)
|
|
804
|
+
expect(document.to_h["content"].first["type"]).to eq("paragraph")
|
|
805
|
+
end
|
|
806
|
+
|
|
807
|
+
it "preserves document content when validation is bypassed" do
|
|
808
|
+
html = "<p>Schema test</p>"
|
|
809
|
+
allow(described_class).to receive(:validate_against_schema)
|
|
810
|
+
document = described_class.send(:parse_with_schema, html, nil)
|
|
811
|
+
para = document.to_h["content"].first
|
|
812
|
+
text_node = para["content"].first
|
|
813
|
+
expect(text_node["text"]).to eq("Schema test")
|
|
814
|
+
end
|
|
815
|
+
|
|
816
|
+
it "parses complex HTML with validation bypassed" do
|
|
817
|
+
html = "<h1>Title</h1><p>Paragraph with <strong>bold</strong> text</p>"
|
|
818
|
+
allow(described_class).to receive(:validate_against_schema)
|
|
819
|
+
document = described_class.send(:parse_with_schema, html, nil)
|
|
820
|
+
content = document.to_h["content"]
|
|
821
|
+
expect(content.length).to eq(2)
|
|
822
|
+
expect(content[0]["type"]).to eq("heading")
|
|
823
|
+
expect(content[1]["type"]).to eq("paragraph")
|
|
824
|
+
end
|
|
825
|
+
|
|
826
|
+
it "rescues ValidationError and returns the document" do
|
|
827
|
+
html = "<p>Validation error test</p>"
|
|
828
|
+
allow(described_class).to receive(:validate_against_schema).and_raise(
|
|
829
|
+
Prosereflect::Input::Html::ValidationError, "Missing required content"
|
|
830
|
+
)
|
|
831
|
+
document = described_class.send(:parse_with_schema, html, nil)
|
|
832
|
+
expect(document).to be_a(Prosereflect::Document)
|
|
833
|
+
expect(document.to_h["content"].first["type"]).to eq("paragraph")
|
|
834
|
+
end
|
|
835
|
+
end
|
|
836
|
+
|
|
837
|
+
describe ".parse_with_rules" do
|
|
838
|
+
it "parses HTML with keep_empty option" do
|
|
839
|
+
html = "<p>Keep empty test</p>"
|
|
840
|
+
document = described_class.send(:parse_with_rules, html, rules: { keep_empty: true })
|
|
841
|
+
expect(document).to be_a(Prosereflect::Document)
|
|
842
|
+
expect(document.to_h["content"].first["type"]).to eq("paragraph")
|
|
843
|
+
end
|
|
844
|
+
|
|
845
|
+
it "parses HTML with empty rules" do
|
|
846
|
+
html = "<p>Empty rules test</p>"
|
|
847
|
+
document = described_class.send(:parse_with_rules, html, rules: {})
|
|
848
|
+
expect(document).to be_a(Prosereflect::Document)
|
|
849
|
+
end
|
|
850
|
+
|
|
851
|
+
it "preserves content with keep_empty false" do
|
|
852
|
+
html = "<p>Keep empty false</p>"
|
|
853
|
+
document = described_class.send(:parse_with_rules, html, rules: { keep_empty: false })
|
|
854
|
+
para = document.to_h["content"].first
|
|
855
|
+
text_node = para["content"].first
|
|
856
|
+
expect(text_node["text"]).to eq("Keep empty false")
|
|
857
|
+
end
|
|
858
|
+
|
|
859
|
+
it "accepts top_node option" do
|
|
860
|
+
html = "<p>Top node test</p>"
|
|
861
|
+
document = described_class.send(:parse_with_rules, html, rules: { top_node: "doc" })
|
|
862
|
+
expect(document.to_h["type"]).to eq("doc")
|
|
863
|
+
end
|
|
864
|
+
end
|
|
865
|
+
|
|
866
|
+
describe ".parse_node" do
|
|
867
|
+
it "parses a single HTML paragraph node" do
|
|
868
|
+
doc = Nokogiri::HTML("<p>Single node</p>")
|
|
869
|
+
html_node = doc.at_css("p")
|
|
870
|
+
result = described_class.send(:parse_node, html_node)
|
|
871
|
+
expect(result).to be_a(Prosereflect::Paragraph)
|
|
872
|
+
end
|
|
873
|
+
|
|
874
|
+
it "parses a text node" do
|
|
875
|
+
doc = Nokogiri::HTML("<p>text content</p>")
|
|
876
|
+
html_node = doc.at_css("p").children.first
|
|
877
|
+
result = described_class.send(:parse_node, html_node)
|
|
878
|
+
expect(result).to be_a(Prosereflect::Text)
|
|
879
|
+
expect(result.text).to eq("text content")
|
|
880
|
+
end
|
|
881
|
+
|
|
882
|
+
it "returns nil for empty text nodes with clear_null" do
|
|
883
|
+
doc = Nokogiri::HTML("<p> </p>")
|
|
884
|
+
html_node = doc.at_css("p").children.first
|
|
885
|
+
result = described_class.send(:parse_node, html_node, clear_null: true)
|
|
886
|
+
expect(result).to be_nil
|
|
887
|
+
end
|
|
888
|
+
|
|
889
|
+
it "returns nil for empty text nodes by default" do
|
|
890
|
+
doc = Nokogiri::HTML("<p> </p>")
|
|
891
|
+
html_node = doc.at_css("p").children.first
|
|
892
|
+
result = described_class.send(:parse_node, html_node)
|
|
893
|
+
expect(result).to be_nil
|
|
894
|
+
end
|
|
895
|
+
|
|
896
|
+
it "accepts node option for parent context" do
|
|
897
|
+
doc = Nokogiri::HTML("<p>parent context</p>")
|
|
898
|
+
html_node = doc.at_css("p")
|
|
899
|
+
parent_node = Prosereflect::Document.new
|
|
900
|
+
result = described_class.send(:parse_node, html_node, node: parent_node)
|
|
901
|
+
expect(result).to be_a(Prosereflect::Paragraph)
|
|
902
|
+
end
|
|
903
|
+
|
|
904
|
+
it "accepts saved_styles option" do
|
|
905
|
+
doc = Nokogiri::HTML("<p>styled</p>")
|
|
906
|
+
html_node = doc.at_css("p")
|
|
907
|
+
result = described_class.send(:parse_node, html_node, saved_styles: [])
|
|
908
|
+
expect(result).to be_a(Prosereflect::Paragraph)
|
|
909
|
+
end
|
|
910
|
+
end
|
|
911
|
+
|
|
912
|
+
describe ".preserve_whitespace?" do
|
|
913
|
+
it "returns true for pre elements" do
|
|
914
|
+
doc = Nokogiri::HTML("<pre>code</pre>")
|
|
915
|
+
pre_node = doc.at_css("pre")
|
|
916
|
+
expect(described_class.send(:preserve_whitespace?, pre_node)).to be true
|
|
917
|
+
end
|
|
918
|
+
|
|
919
|
+
it "returns true for textarea elements" do
|
|
920
|
+
doc = Nokogiri::HTML("<textarea>text</textarea>")
|
|
921
|
+
textarea_node = doc.at_css("textarea")
|
|
922
|
+
expect(described_class.send(:preserve_whitespace?, textarea_node)).to be true
|
|
923
|
+
end
|
|
924
|
+
|
|
925
|
+
it "returns true for elements with white-space: pre style" do
|
|
926
|
+
doc = Nokogiri::HTML('<div style="white-space: pre">text</div>')
|
|
927
|
+
div_node = doc.at_css("div")
|
|
928
|
+
expect(described_class.send(:preserve_whitespace?, div_node)).to be true
|
|
929
|
+
end
|
|
930
|
+
|
|
931
|
+
it "returns false for paragraph elements" do
|
|
932
|
+
doc = Nokogiri::HTML("<p>text</p>")
|
|
933
|
+
p_node = doc.at_css("p")
|
|
934
|
+
expect(described_class.send(:preserve_whitespace?, p_node)).to be false
|
|
935
|
+
end
|
|
936
|
+
|
|
937
|
+
it "returns false for elements without white-space style" do
|
|
938
|
+
doc = Nokogiri::HTML('<div style="color: red">text</div>')
|
|
939
|
+
div_node = doc.at_css("div")
|
|
940
|
+
expect(described_class.send(:preserve_whitespace?, div_node)).to be false
|
|
941
|
+
end
|
|
942
|
+
|
|
943
|
+
it "returns false for elements without style attribute" do
|
|
944
|
+
doc = Nokogiri::HTML("<div>text</div>")
|
|
945
|
+
div_node = doc.at_css("div")
|
|
946
|
+
expect(described_class.send(:preserve_whitespace?, div_node)).to be false
|
|
947
|
+
end
|
|
948
|
+
|
|
949
|
+
it "returns false for elements with white-space but not pre" do
|
|
950
|
+
doc = Nokogiri::HTML('<div style="white-space: nowrap">text</div>')
|
|
951
|
+
div_node = doc.at_css("div")
|
|
952
|
+
expect(described_class.send(:preserve_whitespace?, div_node)).to be false
|
|
953
|
+
end
|
|
954
|
+
end
|
|
955
|
+
|
|
956
|
+
describe ".normalize_whitespace" do
|
|
957
|
+
it "replaces multiple spaces with a single space" do
|
|
958
|
+
expect(described_class.send(:normalize_whitespace, "hello world")).to eq("hello world")
|
|
959
|
+
end
|
|
960
|
+
|
|
961
|
+
it "replaces tabs with spaces" do
|
|
962
|
+
expect(described_class.send(:normalize_whitespace, "hello\tworld")).to eq("hello world")
|
|
963
|
+
end
|
|
964
|
+
|
|
965
|
+
it "replaces newlines with spaces" do
|
|
966
|
+
expect(described_class.send(:normalize_whitespace, "hello\nworld")).to eq("hello world")
|
|
967
|
+
end
|
|
968
|
+
|
|
969
|
+
it "replaces carriage returns with spaces" do
|
|
970
|
+
expect(described_class.send(:normalize_whitespace, "hello\rworld")).to eq("hello world")
|
|
971
|
+
end
|
|
972
|
+
|
|
973
|
+
it "strips leading and trailing whitespace" do
|
|
974
|
+
expect(described_class.send(:normalize_whitespace, " hello ")).to eq("hello")
|
|
975
|
+
end
|
|
976
|
+
|
|
977
|
+
it "handles mixed whitespace" do
|
|
978
|
+
expect(described_class.send(:normalize_whitespace, " hello \t\n world ")).to eq("hello world")
|
|
979
|
+
end
|
|
980
|
+
|
|
981
|
+
it "returns empty string for whitespace-only input" do
|
|
982
|
+
expect(described_class.send(:normalize_whitespace, " ")).to eq("")
|
|
983
|
+
end
|
|
984
|
+
|
|
985
|
+
it "handles an empty string" do
|
|
986
|
+
expect(described_class.send(:normalize_whitespace, "")).to eq("")
|
|
987
|
+
end
|
|
988
|
+
|
|
989
|
+
it "does not modify a clean string" do
|
|
990
|
+
expect(described_class.send(:normalize_whitespace, "hello world")).to eq("hello world")
|
|
991
|
+
end
|
|
992
|
+
end
|
|
797
993
|
end
|