coradoc-markdown 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/lib/coradoc/markdown/errors.rb +28 -0
  4. data/lib/coradoc/markdown/model/abbreviation.rb +27 -0
  5. data/lib/coradoc/markdown/model/attribute_list.rb +98 -0
  6. data/lib/coradoc/markdown/model/base.rb +86 -0
  7. data/lib/coradoc/markdown/model/blockquote.rb +21 -0
  8. data/lib/coradoc/markdown/model/code.rb +11 -0
  9. data/lib/coradoc/markdown/model/code_block.rb +24 -0
  10. data/lib/coradoc/markdown/model/definition_item.rb +24 -0
  11. data/lib/coradoc/markdown/model/definition_list.rb +47 -0
  12. data/lib/coradoc/markdown/model/definition_term.rb +21 -0
  13. data/lib/coradoc/markdown/model/document.rb +39 -0
  14. data/lib/coradoc/markdown/model/emphasis.rb +11 -0
  15. data/lib/coradoc/markdown/model/extension.rb +92 -0
  16. data/lib/coradoc/markdown/model/footnote.rb +31 -0
  17. data/lib/coradoc/markdown/model/footnote_reference.rb +22 -0
  18. data/lib/coradoc/markdown/model/heading.rb +44 -0
  19. data/lib/coradoc/markdown/model/highlight.rb +18 -0
  20. data/lib/coradoc/markdown/model/horizontal_rule.rb +16 -0
  21. data/lib/coradoc/markdown/model/image.rb +19 -0
  22. data/lib/coradoc/markdown/model/link.rb +19 -0
  23. data/lib/coradoc/markdown/model/list.rb +22 -0
  24. data/lib/coradoc/markdown/model/list_item.rb +29 -0
  25. data/lib/coradoc/markdown/model/math.rb +50 -0
  26. data/lib/coradoc/markdown/model/paragraph.rb +28 -0
  27. data/lib/coradoc/markdown/model/strikethrough.rb +18 -0
  28. data/lib/coradoc/markdown/model/strong.rb +11 -0
  29. data/lib/coradoc/markdown/model/table.rb +13 -0
  30. data/lib/coradoc/markdown/model/text.rb +15 -0
  31. data/lib/coradoc/markdown/parser/ast_processor.rb +543 -0
  32. data/lib/coradoc/markdown/parser/block_parser.rb +745 -0
  33. data/lib/coradoc/markdown/parser/html_entities.rb +2149 -0
  34. data/lib/coradoc/markdown/parser/inline_parser.rb +274 -0
  35. data/lib/coradoc/markdown/parser/parslet_extras.rb +215 -0
  36. data/lib/coradoc/markdown/parser.rb +11 -0
  37. data/lib/coradoc/markdown/parser_util.rb +90 -0
  38. data/lib/coradoc/markdown/serializer.rb +199 -0
  39. data/lib/coradoc/markdown/toc_generator.rb +215 -0
  40. data/lib/coradoc/markdown/transform/from_core_model.rb +325 -0
  41. data/lib/coradoc/markdown/transform/text_extraction.rb +19 -0
  42. data/lib/coradoc/markdown/transform/to_core_model.rb +287 -0
  43. data/lib/coradoc/markdown/transformer.rb +463 -0
  44. data/lib/coradoc/markdown/version.rb +7 -0
  45. data/lib/coradoc/markdown.rb +190 -0
  46. metadata +173 -0
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Markdown
5
+ # Serializer for Markdown Document models.
6
+ #
7
+ # This serializer converts Document model objects back into
8
+ # Markdown text format.
9
+ #
10
+ class Serializer
11
+ # Serialize a document model to Markdown string
12
+ #
13
+ # @param document [Coradoc::Markdown::Base] The document or element to serialize
14
+ # @param options [Hash] Serialization options
15
+ # @return [String] The Markdown output
16
+ def self.serialize(document, options = {})
17
+ new.serialize(document, options)
18
+ end
19
+
20
+ # Serialize a document model to Markdown string
21
+ #
22
+ # @param element [Coradoc::Markdown::Base] The element to serialize
23
+ # @param options [Hash] Serialization options
24
+ # @return [String] The Markdown output
25
+ def serialize(element, _options = {})
26
+ case element
27
+ when Document
28
+ serialize_document(element)
29
+ when Heading
30
+ serialize_heading(element)
31
+ when Paragraph
32
+ serialize_paragraph(element)
33
+ when List
34
+ serialize_list(element)
35
+ when CodeBlock
36
+ serialize_code_block(element)
37
+ when Blockquote
38
+ serialize_blockquote(element)
39
+ when Link
40
+ serialize_link(element)
41
+ when Image
42
+ serialize_image(element)
43
+ when HorizontalRule
44
+ serialize_horizontal_rule(element)
45
+ when Table
46
+ serialize_table(element)
47
+ when Emphasis
48
+ serialize_emphasis(element)
49
+ when Strong
50
+ serialize_strong(element)
51
+ when Code
52
+ serialize_code(element)
53
+ when DefinitionList
54
+ serialize_definition_list(element)
55
+ when Footnote
56
+ serialize_footnote(element)
57
+ when FootnoteReference
58
+ serialize_footnote_reference(element)
59
+ when Abbreviation
60
+ serialize_abbreviation(element)
61
+ when Strikethrough
62
+ element.to_md
63
+ when Highlight
64
+ element.to_md
65
+ when AttributeList
66
+ element.to_md
67
+ when Math
68
+ element.to_md
69
+ when Extension
70
+ element.to_md
71
+ when String
72
+ element
73
+ else
74
+ raise ArgumentError,
75
+ "Unknown element type for serialization: #{element.class}. " \
76
+ 'Expected a known Markdown model type.'
77
+ end
78
+ end
79
+
80
+ private
81
+
82
+ def serialize_document(doc)
83
+ doc.blocks.map { |block| serialize(block) }.join("\n\n")
84
+ end
85
+
86
+ def serialize_heading(heading)
87
+ "#{'#' * heading.level} #{heading.text}"
88
+ end
89
+
90
+ def serialize_paragraph(para)
91
+ if para.children.any?
92
+ para.children.map { |child| serialize_inline_content(child) }.join
93
+ else
94
+ para.text.to_s
95
+ end
96
+ end
97
+
98
+ def serialize_inline_content(element)
99
+ case element
100
+ when String
101
+ element
102
+ when Emphasis, Strong, Code, Link, Image, FootnoteReference, Math, Extension, Strikethrough, Highlight
103
+ serialize(element)
104
+ else
105
+ if element.is_a?(Base)
106
+ element.to_md
107
+ else
108
+ raise ArgumentError,
109
+ "Cannot serialize inline content of type #{element.class}. " \
110
+ 'Expected String, known inline model, or Base subclass.'
111
+ end
112
+ end
113
+ end
114
+
115
+ def serialize_list(list)
116
+ marker = list.ordered ? '1.' : '-'
117
+ list.items.map do |item|
118
+ text = if item.children.any?
119
+ item.children.map { |child| serialize_inline_content(child) }.join
120
+ else
121
+ item.text.to_s
122
+ end
123
+ if item.checked == true
124
+ "- [x] #{text.sub(/^- \[[ x]\] /, '')}"
125
+ elsif item.checked == false
126
+ "- [ ] #{text.sub(/^- \[[ x]\] /, '')}"
127
+ else
128
+ "#{marker} #{text}"
129
+ end
130
+ end.join("\n")
131
+ end
132
+
133
+ def serialize_code_block(block)
134
+ "```#{block.language}\n#{block.code}\n```"
135
+ end
136
+
137
+ def serialize_blockquote(quote)
138
+ quote.content.to_s.lines.map { |line| "> #{line}" }.join
139
+ end
140
+
141
+ def serialize_link(link)
142
+ "[#{link.text}](#{link.url}#{link.title ? " \"#{link.title}\"" : ''})"
143
+ end
144
+
145
+ def serialize_image(img)
146
+ "![#{img.alt}](#{img.src}#{img.title ? " \"#{img.title}\"" : ''})"
147
+ end
148
+
149
+ def serialize_horizontal_rule(rule)
150
+ rule.style || '---'
151
+ end
152
+
153
+ def serialize_table(table)
154
+ return '' if table.headers.empty?
155
+
156
+ header_row = "| #{table.headers.join(' | ')} |"
157
+ separator = "| #{table.headers.map { |_| '---' }.join(' | ')} |"
158
+ rows = table.rows.map { |row| "| #{Array(row).join(' | ')} |" }
159
+
160
+ [header_row, separator, *rows].join("\n")
161
+ end
162
+
163
+ def serialize_emphasis(em)
164
+ "*#{em.text}*"
165
+ end
166
+
167
+ def serialize_strong(strong)
168
+ "**#{strong.text}**"
169
+ end
170
+
171
+ def serialize_code(code)
172
+ "`#{code.text}`"
173
+ end
174
+
175
+ def serialize_definition_list(dl)
176
+ dl.items.map do |term|
177
+ lines = [term.text.to_s]
178
+ term.definitions.each do |defn|
179
+ lines << ": #{defn.content}"
180
+ end
181
+ lines.join("\n")
182
+ end.join("\n\n")
183
+ end
184
+
185
+ def serialize_footnote(fn)
186
+ content = fn.content.to_s
187
+ "[^#{fn.id}]: #{content}"
188
+ end
189
+
190
+ def serialize_footnote_reference(ref)
191
+ "[^#{ref.id}]"
192
+ end
193
+
194
+ def serialize_abbreviation(abbr)
195
+ "*[#{abbr.term}]: #{abbr.definition}"
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,215 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Coradoc
4
+ module Markdown
5
+ module Model
6
+ autoload :Base, "#{__dir__}/model/base"
7
+ autoload :Heading, "#{__dir__}/model/heading"
8
+ autoload :Document, "#{__dir__}/model/document"
9
+ end
10
+
11
+ # Table of Contents Generator
12
+ #
13
+ # Generates a table of contents from document headings.
14
+ # Supports Kramdown-style TOC with options for levels, depth, etc.
15
+ #
16
+ # @example Basic usage
17
+ # doc = Coradoc::Markdown.parse(markdown_text)
18
+ # toc = Coradoc::Markdown::TocGenerator.generate(doc)
19
+ # puts toc.to_markdown
20
+ #
21
+ # @example With options
22
+ # toc = Coradoc::Markdown::TocGenerator.generate(doc,
23
+ # min_level: 2,
24
+ # max_level: 4,
25
+ # numbered: true
26
+ # )
27
+ #
28
+ class TocGenerator
29
+ include Transform::TextExtraction
30
+
31
+ # Default options for TOC generation
32
+ DEFAULT_OPTIONS = {
33
+ min_level: 1,
34
+ max_level: 6,
35
+ numbered: false,
36
+ styled: false,
37
+ link_headings: true
38
+ }.freeze
39
+
40
+ # Represents a single TOC entry
41
+ class Entry
42
+ attr_accessor :id, :text, :level, :children, :number
43
+
44
+ def initialize(id:, text:, level:, number: nil)
45
+ @id = id
46
+ @text = text
47
+ @level = level
48
+ @number = number
49
+ @children = []
50
+ end
51
+
52
+ def to_markdown(indent: 0)
53
+ prefix = ' ' * indent
54
+ link = @id && @id != @text ? "[#{@text}](##{@id})" : @text
55
+ number_prefix = @number ? "#{@number} " : ''
56
+ "#{prefix}* #{number_prefix}#{link}\n".tap do |result|
57
+ @children.each do |child|
58
+ result << child.to_markdown(indent: indent + 1)
59
+ end
60
+ end
61
+ end
62
+
63
+ # Convert entry to hash representation
64
+ # @return [Hash] Hash with id, text, level, number, and optional children
65
+ def to_h
66
+ result = { id: @id, text: @text, level: @level, number: @number }
67
+ result[:children] = @children.map(&:to_h) unless @children.empty?
68
+ result
69
+ end
70
+ end
71
+
72
+ # Generate a TOC from a document
73
+ #
74
+ # @param document [Coradoc::Markdown::Document] The document to process
75
+ # @param options [Hash] Generation options
76
+ # @option options [Integer] :min_level (1) Minimum heading level to include
77
+ # @option options [Integer] :max_level (6) Maximum heading level to include
78
+ # @option options [Boolean] :numbered (false) Whether to add section numbers
79
+ # @option options [Boolean] :styled (false) Whether to add styling classes
80
+ # @option options [Boolean] :link_headings (true) Whether to link to headings
81
+ # @return [Entry, nil] The root TOC entry or nil if no headings
82
+ def self.generate(document, options = {})
83
+ new(options).generate(document)
84
+ end
85
+
86
+ # Generate TOC as Markdown string
87
+ #
88
+ # @param document [Coradoc::Markdown::Document] The document to process
89
+ # @param options [Hash] Generation options
90
+ # @return [String] Markdown-formatted TOC
91
+ def self.generate_markdown(document, options = {})
92
+ toc = generate(document, options)
93
+ toc ? toc.to_markdown : ''
94
+ end
95
+
96
+ # Generate TOC as array structure
97
+ #
98
+ # @param document [Coradoc::Markdown::Document] The document to process
99
+ # @param options [Hash] Generation options
100
+ # @return [Array<Hash>] Array of TOC entries
101
+ def self.generate_array(document, options = {})
102
+ toc = generate(document, options)
103
+ return [] unless toc
104
+
105
+ toc.children.map(&:to_h)
106
+ end
107
+
108
+ def initialize(options = {})
109
+ @options = DEFAULT_OPTIONS.merge(options)
110
+ @min_level = @options[:min_level]
111
+ @max_level = @options[:max_level]
112
+ @numbered = @options[:numbered]
113
+ end
114
+
115
+ # Generate TOC from document
116
+ #
117
+ # @param document [Coradoc::Markdown::Document] The document
118
+ # @return [Entry, nil] Root TOC entry
119
+ def generate(document)
120
+ headings = extract_headings(document)
121
+ return nil if headings.empty?
122
+
123
+ root = Entry.new(id: nil, text: 'Table of Contents', level: 0)
124
+ build_toc_tree(root, headings)
125
+ root
126
+ end
127
+
128
+ private
129
+
130
+ # Extract headings from document blocks
131
+ def extract_headings(document)
132
+ headings = []
133
+ return headings unless document.is_a?(Coradoc::Markdown::Document)
134
+
135
+ Array(document.blocks).each do |block|
136
+ if block.is_a?(Coradoc::Markdown::Heading)
137
+ headings << block if within_level_range?(block.level)
138
+ elsif block.is_a?(Coradoc::Markdown::Base) && block.class.attributes.key?(:blocks)
139
+ headings.concat(extract_headings_from_nested(block))
140
+ end
141
+ end
142
+
143
+ headings
144
+ end
145
+
146
+ def extract_headings_from_nested(block)
147
+ headings = []
148
+ return headings unless block.is_a?(Coradoc::Markdown::Base) && block.class.attributes.key?(:blocks)
149
+
150
+ Array(block.blocks).each do |nested|
151
+ if nested.is_a?(Coradoc::Markdown::Heading)
152
+ headings << nested if within_level_range?(nested.level)
153
+ elsif nested.is_a?(Coradoc::Markdown::Base) && nested.class.attributes.key?(:blocks)
154
+ headings.concat(extract_headings_from_nested(nested))
155
+ end
156
+ end
157
+
158
+ headings
159
+ end
160
+
161
+ def within_level_range?(level)
162
+ level >= @min_level && level <= @max_level
163
+ end
164
+
165
+ # Build the hierarchical TOC tree
166
+ def build_toc_tree(root, headings)
167
+ return if headings.empty?
168
+
169
+ # Track section numbers for each level
170
+ counters = {}
171
+
172
+ # Stack of entries at each level
173
+ stack = [root]
174
+
175
+ headings.each do |heading|
176
+ level = heading.level
177
+
178
+ # Update counters
179
+ counters[level] ||= 0
180
+ counters[level] += 1
181
+ # Reset counters for deeper levels
182
+ (level + 1..6).each { |l| counters[l] = 0 }
183
+
184
+ # Generate number if needed
185
+ number = (generate_section_number(counters, level) if @numbered)
186
+
187
+ # Create entry
188
+ entry = Entry.new(
189
+ id: heading.heading_id,
190
+ text: extract_text(heading.text),
191
+ level: level,
192
+ number: number
193
+ )
194
+
195
+ # Find the correct parent in the stack
196
+ stack.pop while stack.length > level - @min_level + 1
197
+
198
+ # Add to current parent
199
+ stack.last.children << entry
200
+
201
+ # Push for potential children
202
+ stack.push(entry)
203
+ end
204
+ end
205
+
206
+ def generate_section_number(counters, level)
207
+ parts = []
208
+ (@min_level..level).each do |l|
209
+ parts << counters[l] if counters[l]&.positive?
210
+ end
211
+ parts.join('.')
212
+ end
213
+ end
214
+ end
215
+ end