coradoc-markdown 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/lib/coradoc/markdown/errors.rb +28 -0
- data/lib/coradoc/markdown/model/abbreviation.rb +27 -0
- data/lib/coradoc/markdown/model/attribute_list.rb +98 -0
- data/lib/coradoc/markdown/model/base.rb +86 -0
- data/lib/coradoc/markdown/model/blockquote.rb +21 -0
- data/lib/coradoc/markdown/model/code.rb +11 -0
- data/lib/coradoc/markdown/model/code_block.rb +24 -0
- data/lib/coradoc/markdown/model/definition_item.rb +24 -0
- data/lib/coradoc/markdown/model/definition_list.rb +47 -0
- data/lib/coradoc/markdown/model/definition_term.rb +21 -0
- data/lib/coradoc/markdown/model/document.rb +39 -0
- data/lib/coradoc/markdown/model/emphasis.rb +11 -0
- data/lib/coradoc/markdown/model/extension.rb +92 -0
- data/lib/coradoc/markdown/model/footnote.rb +31 -0
- data/lib/coradoc/markdown/model/footnote_reference.rb +22 -0
- data/lib/coradoc/markdown/model/heading.rb +44 -0
- data/lib/coradoc/markdown/model/highlight.rb +18 -0
- data/lib/coradoc/markdown/model/horizontal_rule.rb +16 -0
- data/lib/coradoc/markdown/model/image.rb +19 -0
- data/lib/coradoc/markdown/model/link.rb +19 -0
- data/lib/coradoc/markdown/model/list.rb +22 -0
- data/lib/coradoc/markdown/model/list_item.rb +29 -0
- data/lib/coradoc/markdown/model/math.rb +50 -0
- data/lib/coradoc/markdown/model/paragraph.rb +28 -0
- data/lib/coradoc/markdown/model/strikethrough.rb +18 -0
- data/lib/coradoc/markdown/model/strong.rb +11 -0
- data/lib/coradoc/markdown/model/table.rb +13 -0
- data/lib/coradoc/markdown/model/text.rb +15 -0
- data/lib/coradoc/markdown/parser/ast_processor.rb +543 -0
- data/lib/coradoc/markdown/parser/block_parser.rb +745 -0
- data/lib/coradoc/markdown/parser/html_entities.rb +2149 -0
- data/lib/coradoc/markdown/parser/inline_parser.rb +274 -0
- data/lib/coradoc/markdown/parser/parslet_extras.rb +215 -0
- data/lib/coradoc/markdown/parser.rb +11 -0
- data/lib/coradoc/markdown/parser_util.rb +90 -0
- data/lib/coradoc/markdown/serializer.rb +199 -0
- data/lib/coradoc/markdown/toc_generator.rb +215 -0
- data/lib/coradoc/markdown/transform/from_core_model.rb +325 -0
- data/lib/coradoc/markdown/transform/text_extraction.rb +19 -0
- data/lib/coradoc/markdown/transform/to_core_model.rb +287 -0
- data/lib/coradoc/markdown/transformer.rb +463 -0
- data/lib/coradoc/markdown/version.rb +7 -0
- data/lib/coradoc/markdown.rb +190 -0
- metadata +173 -0
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Markdown
|
|
5
|
+
# Serializer for Markdown Document models.
|
|
6
|
+
#
|
|
7
|
+
# This serializer converts Document model objects back into
|
|
8
|
+
# Markdown text format.
|
|
9
|
+
#
|
|
10
|
+
class Serializer
|
|
11
|
+
# Serialize a document model to Markdown string
|
|
12
|
+
#
|
|
13
|
+
# @param document [Coradoc::Markdown::Base] The document or element to serialize
|
|
14
|
+
# @param options [Hash] Serialization options
|
|
15
|
+
# @return [String] The Markdown output
|
|
16
|
+
def self.serialize(document, options = {})
|
|
17
|
+
new.serialize(document, options)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Serialize a document model to Markdown string
|
|
21
|
+
#
|
|
22
|
+
# @param element [Coradoc::Markdown::Base] The element to serialize
|
|
23
|
+
# @param options [Hash] Serialization options
|
|
24
|
+
# @return [String] The Markdown output
|
|
25
|
+
def serialize(element, _options = {})
|
|
26
|
+
case element
|
|
27
|
+
when Document
|
|
28
|
+
serialize_document(element)
|
|
29
|
+
when Heading
|
|
30
|
+
serialize_heading(element)
|
|
31
|
+
when Paragraph
|
|
32
|
+
serialize_paragraph(element)
|
|
33
|
+
when List
|
|
34
|
+
serialize_list(element)
|
|
35
|
+
when CodeBlock
|
|
36
|
+
serialize_code_block(element)
|
|
37
|
+
when Blockquote
|
|
38
|
+
serialize_blockquote(element)
|
|
39
|
+
when Link
|
|
40
|
+
serialize_link(element)
|
|
41
|
+
when Image
|
|
42
|
+
serialize_image(element)
|
|
43
|
+
when HorizontalRule
|
|
44
|
+
serialize_horizontal_rule(element)
|
|
45
|
+
when Table
|
|
46
|
+
serialize_table(element)
|
|
47
|
+
when Emphasis
|
|
48
|
+
serialize_emphasis(element)
|
|
49
|
+
when Strong
|
|
50
|
+
serialize_strong(element)
|
|
51
|
+
when Code
|
|
52
|
+
serialize_code(element)
|
|
53
|
+
when DefinitionList
|
|
54
|
+
serialize_definition_list(element)
|
|
55
|
+
when Footnote
|
|
56
|
+
serialize_footnote(element)
|
|
57
|
+
when FootnoteReference
|
|
58
|
+
serialize_footnote_reference(element)
|
|
59
|
+
when Abbreviation
|
|
60
|
+
serialize_abbreviation(element)
|
|
61
|
+
when Strikethrough
|
|
62
|
+
element.to_md
|
|
63
|
+
when Highlight
|
|
64
|
+
element.to_md
|
|
65
|
+
when AttributeList
|
|
66
|
+
element.to_md
|
|
67
|
+
when Math
|
|
68
|
+
element.to_md
|
|
69
|
+
when Extension
|
|
70
|
+
element.to_md
|
|
71
|
+
when String
|
|
72
|
+
element
|
|
73
|
+
else
|
|
74
|
+
raise ArgumentError,
|
|
75
|
+
"Unknown element type for serialization: #{element.class}. " \
|
|
76
|
+
'Expected a known Markdown model type.'
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
def serialize_document(doc)
|
|
83
|
+
doc.blocks.map { |block| serialize(block) }.join("\n\n")
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def serialize_heading(heading)
|
|
87
|
+
"#{'#' * heading.level} #{heading.text}"
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def serialize_paragraph(para)
|
|
91
|
+
if para.children.any?
|
|
92
|
+
para.children.map { |child| serialize_inline_content(child) }.join
|
|
93
|
+
else
|
|
94
|
+
para.text.to_s
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def serialize_inline_content(element)
|
|
99
|
+
case element
|
|
100
|
+
when String
|
|
101
|
+
element
|
|
102
|
+
when Emphasis, Strong, Code, Link, Image, FootnoteReference, Math, Extension, Strikethrough, Highlight
|
|
103
|
+
serialize(element)
|
|
104
|
+
else
|
|
105
|
+
if element.is_a?(Base)
|
|
106
|
+
element.to_md
|
|
107
|
+
else
|
|
108
|
+
raise ArgumentError,
|
|
109
|
+
"Cannot serialize inline content of type #{element.class}. " \
|
|
110
|
+
'Expected String, known inline model, or Base subclass.'
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def serialize_list(list)
|
|
116
|
+
marker = list.ordered ? '1.' : '-'
|
|
117
|
+
list.items.map do |item|
|
|
118
|
+
text = if item.children.any?
|
|
119
|
+
item.children.map { |child| serialize_inline_content(child) }.join
|
|
120
|
+
else
|
|
121
|
+
item.text.to_s
|
|
122
|
+
end
|
|
123
|
+
if item.checked == true
|
|
124
|
+
"- [x] #{text.sub(/^- \[[ x]\] /, '')}"
|
|
125
|
+
elsif item.checked == false
|
|
126
|
+
"- [ ] #{text.sub(/^- \[[ x]\] /, '')}"
|
|
127
|
+
else
|
|
128
|
+
"#{marker} #{text}"
|
|
129
|
+
end
|
|
130
|
+
end.join("\n")
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def serialize_code_block(block)
|
|
134
|
+
"```#{block.language}\n#{block.code}\n```"
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def serialize_blockquote(quote)
|
|
138
|
+
quote.content.to_s.lines.map { |line| "> #{line}" }.join
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def serialize_link(link)
|
|
142
|
+
"[#{link.text}](#{link.url}#{link.title ? " \"#{link.title}\"" : ''})"
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def serialize_image(img)
|
|
146
|
+
""
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def serialize_horizontal_rule(rule)
|
|
150
|
+
rule.style || '---'
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def serialize_table(table)
|
|
154
|
+
return '' if table.headers.empty?
|
|
155
|
+
|
|
156
|
+
header_row = "| #{table.headers.join(' | ')} |"
|
|
157
|
+
separator = "| #{table.headers.map { |_| '---' }.join(' | ')} |"
|
|
158
|
+
rows = table.rows.map { |row| "| #{Array(row).join(' | ')} |" }
|
|
159
|
+
|
|
160
|
+
[header_row, separator, *rows].join("\n")
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def serialize_emphasis(em)
|
|
164
|
+
"*#{em.text}*"
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def serialize_strong(strong)
|
|
168
|
+
"**#{strong.text}**"
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def serialize_code(code)
|
|
172
|
+
"`#{code.text}`"
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def serialize_definition_list(dl)
|
|
176
|
+
dl.items.map do |term|
|
|
177
|
+
lines = [term.text.to_s]
|
|
178
|
+
term.definitions.each do |defn|
|
|
179
|
+
lines << ": #{defn.content}"
|
|
180
|
+
end
|
|
181
|
+
lines.join("\n")
|
|
182
|
+
end.join("\n\n")
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def serialize_footnote(fn)
|
|
186
|
+
content = fn.content.to_s
|
|
187
|
+
"[^#{fn.id}]: #{content}"
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def serialize_footnote_reference(ref)
|
|
191
|
+
"[^#{ref.id}]"
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def serialize_abbreviation(abbr)
|
|
195
|
+
"*[#{abbr.term}]: #{abbr.definition}"
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Coradoc
|
|
4
|
+
module Markdown
|
|
5
|
+
module Model
|
|
6
|
+
autoload :Base, "#{__dir__}/model/base"
|
|
7
|
+
autoload :Heading, "#{__dir__}/model/heading"
|
|
8
|
+
autoload :Document, "#{__dir__}/model/document"
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
# Table of Contents Generator
|
|
12
|
+
#
|
|
13
|
+
# Generates a table of contents from document headings.
|
|
14
|
+
# Supports Kramdown-style TOC with options for levels, depth, etc.
|
|
15
|
+
#
|
|
16
|
+
# @example Basic usage
|
|
17
|
+
# doc = Coradoc::Markdown.parse(markdown_text)
|
|
18
|
+
# toc = Coradoc::Markdown::TocGenerator.generate(doc)
|
|
19
|
+
# puts toc.to_markdown
|
|
20
|
+
#
|
|
21
|
+
# @example With options
|
|
22
|
+
# toc = Coradoc::Markdown::TocGenerator.generate(doc,
|
|
23
|
+
# min_level: 2,
|
|
24
|
+
# max_level: 4,
|
|
25
|
+
# numbered: true
|
|
26
|
+
# )
|
|
27
|
+
#
|
|
28
|
+
class TocGenerator
|
|
29
|
+
include Transform::TextExtraction
|
|
30
|
+
|
|
31
|
+
# Default options for TOC generation
|
|
32
|
+
DEFAULT_OPTIONS = {
|
|
33
|
+
min_level: 1,
|
|
34
|
+
max_level: 6,
|
|
35
|
+
numbered: false,
|
|
36
|
+
styled: false,
|
|
37
|
+
link_headings: true
|
|
38
|
+
}.freeze
|
|
39
|
+
|
|
40
|
+
# Represents a single TOC entry
|
|
41
|
+
class Entry
|
|
42
|
+
attr_accessor :id, :text, :level, :children, :number
|
|
43
|
+
|
|
44
|
+
def initialize(id:, text:, level:, number: nil)
|
|
45
|
+
@id = id
|
|
46
|
+
@text = text
|
|
47
|
+
@level = level
|
|
48
|
+
@number = number
|
|
49
|
+
@children = []
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def to_markdown(indent: 0)
|
|
53
|
+
prefix = ' ' * indent
|
|
54
|
+
link = @id && @id != @text ? "[#{@text}](##{@id})" : @text
|
|
55
|
+
number_prefix = @number ? "#{@number} " : ''
|
|
56
|
+
"#{prefix}* #{number_prefix}#{link}\n".tap do |result|
|
|
57
|
+
@children.each do |child|
|
|
58
|
+
result << child.to_markdown(indent: indent + 1)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Convert entry to hash representation
|
|
64
|
+
# @return [Hash] Hash with id, text, level, number, and optional children
|
|
65
|
+
def to_h
|
|
66
|
+
result = { id: @id, text: @text, level: @level, number: @number }
|
|
67
|
+
result[:children] = @children.map(&:to_h) unless @children.empty?
|
|
68
|
+
result
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Generate a TOC from a document
|
|
73
|
+
#
|
|
74
|
+
# @param document [Coradoc::Markdown::Document] The document to process
|
|
75
|
+
# @param options [Hash] Generation options
|
|
76
|
+
# @option options [Integer] :min_level (1) Minimum heading level to include
|
|
77
|
+
# @option options [Integer] :max_level (6) Maximum heading level to include
|
|
78
|
+
# @option options [Boolean] :numbered (false) Whether to add section numbers
|
|
79
|
+
# @option options [Boolean] :styled (false) Whether to add styling classes
|
|
80
|
+
# @option options [Boolean] :link_headings (true) Whether to link to headings
|
|
81
|
+
# @return [Entry, nil] The root TOC entry or nil if no headings
|
|
82
|
+
def self.generate(document, options = {})
|
|
83
|
+
new(options).generate(document)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Generate TOC as Markdown string
|
|
87
|
+
#
|
|
88
|
+
# @param document [Coradoc::Markdown::Document] The document to process
|
|
89
|
+
# @param options [Hash] Generation options
|
|
90
|
+
# @return [String] Markdown-formatted TOC
|
|
91
|
+
def self.generate_markdown(document, options = {})
|
|
92
|
+
toc = generate(document, options)
|
|
93
|
+
toc ? toc.to_markdown : ''
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Generate TOC as array structure
|
|
97
|
+
#
|
|
98
|
+
# @param document [Coradoc::Markdown::Document] The document to process
|
|
99
|
+
# @param options [Hash] Generation options
|
|
100
|
+
# @return [Array<Hash>] Array of TOC entries
|
|
101
|
+
def self.generate_array(document, options = {})
|
|
102
|
+
toc = generate(document, options)
|
|
103
|
+
return [] unless toc
|
|
104
|
+
|
|
105
|
+
toc.children.map(&:to_h)
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def initialize(options = {})
|
|
109
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
|
110
|
+
@min_level = @options[:min_level]
|
|
111
|
+
@max_level = @options[:max_level]
|
|
112
|
+
@numbered = @options[:numbered]
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Generate TOC from document
|
|
116
|
+
#
|
|
117
|
+
# @param document [Coradoc::Markdown::Document] The document
|
|
118
|
+
# @return [Entry, nil] Root TOC entry
|
|
119
|
+
def generate(document)
|
|
120
|
+
headings = extract_headings(document)
|
|
121
|
+
return nil if headings.empty?
|
|
122
|
+
|
|
123
|
+
root = Entry.new(id: nil, text: 'Table of Contents', level: 0)
|
|
124
|
+
build_toc_tree(root, headings)
|
|
125
|
+
root
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
private
|
|
129
|
+
|
|
130
|
+
# Extract headings from document blocks
|
|
131
|
+
def extract_headings(document)
|
|
132
|
+
headings = []
|
|
133
|
+
return headings unless document.is_a?(Coradoc::Markdown::Document)
|
|
134
|
+
|
|
135
|
+
Array(document.blocks).each do |block|
|
|
136
|
+
if block.is_a?(Coradoc::Markdown::Heading)
|
|
137
|
+
headings << block if within_level_range?(block.level)
|
|
138
|
+
elsif block.is_a?(Coradoc::Markdown::Base) && block.class.attributes.key?(:blocks)
|
|
139
|
+
headings.concat(extract_headings_from_nested(block))
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
headings
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def extract_headings_from_nested(block)
|
|
147
|
+
headings = []
|
|
148
|
+
return headings unless block.is_a?(Coradoc::Markdown::Base) && block.class.attributes.key?(:blocks)
|
|
149
|
+
|
|
150
|
+
Array(block.blocks).each do |nested|
|
|
151
|
+
if nested.is_a?(Coradoc::Markdown::Heading)
|
|
152
|
+
headings << nested if within_level_range?(nested.level)
|
|
153
|
+
elsif nested.is_a?(Coradoc::Markdown::Base) && nested.class.attributes.key?(:blocks)
|
|
154
|
+
headings.concat(extract_headings_from_nested(nested))
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
headings
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def within_level_range?(level)
|
|
162
|
+
level >= @min_level && level <= @max_level
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Build the hierarchical TOC tree
|
|
166
|
+
def build_toc_tree(root, headings)
|
|
167
|
+
return if headings.empty?
|
|
168
|
+
|
|
169
|
+
# Track section numbers for each level
|
|
170
|
+
counters = {}
|
|
171
|
+
|
|
172
|
+
# Stack of entries at each level
|
|
173
|
+
stack = [root]
|
|
174
|
+
|
|
175
|
+
headings.each do |heading|
|
|
176
|
+
level = heading.level
|
|
177
|
+
|
|
178
|
+
# Update counters
|
|
179
|
+
counters[level] ||= 0
|
|
180
|
+
counters[level] += 1
|
|
181
|
+
# Reset counters for deeper levels
|
|
182
|
+
(level + 1..6).each { |l| counters[l] = 0 }
|
|
183
|
+
|
|
184
|
+
# Generate number if needed
|
|
185
|
+
number = (generate_section_number(counters, level) if @numbered)
|
|
186
|
+
|
|
187
|
+
# Create entry
|
|
188
|
+
entry = Entry.new(
|
|
189
|
+
id: heading.heading_id,
|
|
190
|
+
text: extract_text(heading.text),
|
|
191
|
+
level: level,
|
|
192
|
+
number: number
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# Find the correct parent in the stack
|
|
196
|
+
stack.pop while stack.length > level - @min_level + 1
|
|
197
|
+
|
|
198
|
+
# Add to current parent
|
|
199
|
+
stack.last.children << entry
|
|
200
|
+
|
|
201
|
+
# Push for potential children
|
|
202
|
+
stack.push(entry)
|
|
203
|
+
end
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def generate_section_number(counters, level)
|
|
207
|
+
parts = []
|
|
208
|
+
(@min_level..level).each do |l|
|
|
209
|
+
parts << counters[l] if counters[l]&.positive?
|
|
210
|
+
end
|
|
211
|
+
parts.join('.')
|
|
212
|
+
end
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|