markdownator 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +41 -5
- data/Gemfile +0 -2
- data/README.md +5 -7
- data/lib/markdownator/converters/html.rb +7 -3
- data/lib/markdownator/converters/xlsx.rb +87 -20
- data/lib/markdownator/renderers/html_renderer.rb +190 -0
- data/lib/markdownator/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ef75f03777f577049069b98fee3104ee31f7ad3d4cece96f3cc78052db648f6e
|
|
4
|
+
data.tar.gz: '047778687f87ca470f627d082e7653c17999c8981f7c579512eed0c8a31cb9fa'
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e3ce45e8c1f63c4f374aa475a27c3a567846423005caafa8999e136531139a36695fd180d806dcd5e49d1a5414b7a372d6a448f3c1bccb49d5868c13ae2e3870
|
|
7
|
+
data.tar.gz: ef73a6ccac7125b16fd8838a88f56be01c8fa555b39887963e8a79170c34428c21f20ac7fea867af06c6aa3410114ad01619f804ed3f1333669596bd656aed2c
|
data/CHANGELOG.md
CHANGED
|
@@ -1,9 +1,45 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project are documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
1
8
|
## [Unreleased]
|
|
2
9
|
|
|
10
|
+
## [0.1.2] - 2026-06-13
|
|
11
|
+
|
|
12
|
+
### Changed
|
|
13
|
+
|
|
14
|
+
- XLSX conversion now reads the workbook directly with rubyzip and Nokogiri
|
|
15
|
+
instead of `roo`, so every Office format (DOCX, XLSX, PPTX, EPUB) shares one
|
|
16
|
+
approach and the `roo` dependency is dropped.
|
|
17
|
+
- Moved the HTML renderer to `Markdownator::Renderers::HtmlRenderer` (it renders
|
|
18
|
+
Markdown, it does not convert a source file).
|
|
19
|
+
|
|
20
|
+
## [0.1.1] - 2026-06-13
|
|
21
|
+
|
|
22
|
+
### Changed
|
|
23
|
+
|
|
24
|
+
- HTML (and EPUB) conversion now renders Markdown directly from the Nokogiri
|
|
25
|
+
node tree, dropping the `reverse_markdown` dependency (it was only a thin
|
|
26
|
+
layer over Nokogiri).
|
|
27
|
+
|
|
3
28
|
## [0.1.0] - 2026-06-12
|
|
4
29
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
-
|
|
8
|
-
|
|
9
|
-
|
|
30
|
+
### Added
|
|
31
|
+
|
|
32
|
+
- Converter-registry engine (`Markdownator.convert`, `.convert_local`,
|
|
33
|
+
`.convert_stream`) dispatching local paths, URLs, and IO streams to the first
|
|
34
|
+
converter that accepts the stream.
|
|
35
|
+
- Converters for plain text, HTML, CSV, JSON, XML, DOCX, XLSX, PPTX, PDF, EPUB,
|
|
36
|
+
ZIP (recursive), and image metadata.
|
|
37
|
+
- Optional, lazily loaded format dependencies with a helpful
|
|
38
|
+
`MissingDependencyError` when a required gem is absent; zero hard runtime
|
|
39
|
+
dependencies.
|
|
40
|
+
- Pluggable LLM image-captioner hook (off by default).
|
|
41
|
+
|
|
42
|
+
[Unreleased]: https://github.com/alexrupom/markdownator/compare/v0.1.2...HEAD
|
|
43
|
+
[0.1.2]: https://github.com/alexrupom/markdownator/compare/v0.1.1...v0.1.2
|
|
44
|
+
[0.1.1]: https://github.com/alexrupom/markdownator/compare/v0.1.0...v0.1.1
|
|
45
|
+
[0.1.0]: https://github.com/alexrupom/markdownator/releases/tag/v0.1.0
|
data/Gemfile
CHANGED
data/README.md
CHANGED
|
@@ -13,13 +13,13 @@ libraries **lazily**, so you only install the gems for the formats you actually
|
|
|
13
13
|
| Plain text / Markdown | `.txt`, `.md` | — (built in) |
|
|
14
14
|
| CSV | `.csv` | — (built in) |
|
|
15
15
|
| JSON | `.json` | — (built in) |
|
|
16
|
-
| HTML | `.html`, `.htm` | `
|
|
16
|
+
| HTML | `.html`, `.htm` | `nokogiri` |
|
|
17
17
|
| XML | `.xml` | `nokogiri` |
|
|
18
18
|
| Word | `.docx` | `rubyzip`, `nokogiri` |
|
|
19
|
-
| Excel | `.xlsx` | `
|
|
19
|
+
| Excel | `.xlsx` | `rubyzip`, `nokogiri` |
|
|
20
20
|
| PowerPoint | `.pptx` | `rubyzip`, `nokogiri` |
|
|
21
21
|
| PDF | `.pdf` | `pdf-reader` |
|
|
22
|
-
| EPUB | `.epub` | `rubyzip`, `nokogiri
|
|
22
|
+
| EPUB | `.epub` | `rubyzip`, `nokogiri` |
|
|
23
23
|
| ZIP (recurses) | `.zip` | `rubyzip` |
|
|
24
24
|
| Images (metadata) | `.jpg`, `.png`, `.tiff`, … | `exifr` (for EXIF) |
|
|
25
25
|
|
|
@@ -36,10 +36,8 @@ Then add the gems for the formats you need, e.g.:
|
|
|
36
36
|
|
|
37
37
|
```ruby
|
|
38
38
|
gem "pdf-reader" # PDF
|
|
39
|
-
gem "
|
|
40
|
-
gem "
|
|
41
|
-
gem "nokogiri" # HTML, XML, DOCX, PPTX, EPUB
|
|
42
|
-
gem "reverse_markdown" # HTML, EPUB
|
|
39
|
+
gem "rubyzip" # DOCX, XLSX, PPTX, EPUB, ZIP
|
|
40
|
+
gem "nokogiri" # HTML, XML, DOCX, XLSX, PPTX, EPUB
|
|
43
41
|
gem "exifr" # image EXIF
|
|
44
42
|
```
|
|
45
43
|
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "../renderers/html_renderer"
|
|
4
|
+
|
|
3
5
|
module Markdownator
|
|
4
6
|
module Converters
|
|
5
|
-
# Converts HTML into Markdown
|
|
7
|
+
# Converts HTML into Markdown by walking the Nokogiri node tree.
|
|
6
8
|
class Html < Base
|
|
7
9
|
def accepts?(_io, stream_info)
|
|
8
10
|
matches?(stream_info, extensions: %w[html htm], mimetypes: %w[text/html application/xhtml+xml])
|
|
@@ -15,8 +17,10 @@ module Markdownator
|
|
|
15
17
|
|
|
16
18
|
# Shared so other container converters (EPUB) can reuse HTML conversion.
|
|
17
19
|
def self.html_to_markdown(html)
|
|
18
|
-
Markdownator.require_optional("
|
|
19
|
-
|
|
20
|
+
Markdownator.require_optional("nokogiri", feature: "HTML conversion")
|
|
21
|
+
doc = Nokogiri::HTML(html)
|
|
22
|
+
root = doc.at_css("body") || doc.root || doc
|
|
23
|
+
Renderers::HtmlRenderer.new.render(root)
|
|
20
24
|
end
|
|
21
25
|
|
|
22
26
|
def self.extract_title(html)
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "tempfile"
|
|
4
|
-
|
|
5
3
|
module Markdownator
|
|
6
4
|
module Converters
|
|
7
5
|
# Converts an Excel .xlsx workbook into Markdown: one `## SheetName` heading
|
|
8
|
-
# and a Markdown table per sheet
|
|
6
|
+
# and a Markdown table per sheet.
|
|
7
|
+
#
|
|
8
|
+
# Reads the OOXML zip directly with rubyzip and Nokogiri, the same approach
|
|
9
|
+
# used by the DOCX, PPTX, and EPUB converters.
|
|
9
10
|
class Xlsx < Base
|
|
10
11
|
def accepts?(_io, stream_info)
|
|
11
12
|
matches?(
|
|
@@ -16,12 +17,16 @@ module Markdownator
|
|
|
16
17
|
end
|
|
17
18
|
|
|
18
19
|
def convert(io, _stream_info, **_options)
|
|
19
|
-
Markdownator.require_optional("
|
|
20
|
+
Markdownator.require_optional("zip", feature: "XLSX conversion")
|
|
21
|
+
Markdownator.require_optional("nokogiri", feature: "XLSX conversion")
|
|
20
22
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
sections =
|
|
24
|
-
|
|
23
|
+
::Zip::File.open_buffer(io) do |zip|
|
|
24
|
+
shared = shared_strings(zip)
|
|
25
|
+
sections = sheets(zip).filter_map do |name, path|
|
|
26
|
+
table = markdown_table(parse_sheet(read(zip, path), shared))
|
|
27
|
+
"## #{name}\n\n#{table}" unless table.empty?
|
|
28
|
+
end
|
|
29
|
+
return Result.new(markdown: sections.join("\n\n"))
|
|
25
30
|
end
|
|
26
31
|
rescue StandardError => e
|
|
27
32
|
raise FileConversionError, "Could not read XLSX: #{e.message}"
|
|
@@ -29,23 +34,85 @@ module Markdownator
|
|
|
29
34
|
|
|
30
35
|
private
|
|
31
36
|
|
|
32
|
-
def
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
37
|
+
def read(zip, path)
|
|
38
|
+
zip.find_entry(path)&.get_input_stream&.read
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Ordered [[sheet_name, worksheet_path], ...] resolved via the workbook
|
|
42
|
+
# relationships.
|
|
43
|
+
def sheets(zip)
|
|
44
|
+
workbook = parse(read(zip, "xl/workbook.xml"))
|
|
45
|
+
return [] if workbook.nil?
|
|
46
|
+
|
|
47
|
+
rels = relationships(zip)
|
|
48
|
+
workbook.xpath("//sheets/sheet").filter_map do |sheet|
|
|
49
|
+
path = resolve_target(rels[sheet["id"]])
|
|
50
|
+
[sheet["name"].to_s, path] if path
|
|
36
51
|
end
|
|
37
|
-
table = markdown_table(rows)
|
|
38
|
-
table.empty? ? nil : "## #{name}\n\n#{table}"
|
|
39
52
|
end
|
|
40
53
|
|
|
41
|
-
def
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
54
|
+
def relationships(zip)
|
|
55
|
+
doc = parse(read(zip, "xl/_rels/workbook.xml.rels"))
|
|
56
|
+
return {} if doc.nil?
|
|
57
|
+
|
|
58
|
+
doc.xpath("//Relationship").to_h { |rel| [rel["Id"], rel["Target"]] }
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def resolve_target(target)
|
|
62
|
+
return nil if target.nil? || target.empty?
|
|
63
|
+
|
|
64
|
+
target = target.delete_prefix("/")
|
|
65
|
+
target.start_with?("xl/") ? target : "xl/#{target}"
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# The shared string table: index -> text.
|
|
69
|
+
def shared_strings(zip)
|
|
70
|
+
doc = parse(read(zip, "xl/sharedStrings.xml"))
|
|
71
|
+
return [] if doc.nil?
|
|
72
|
+
|
|
73
|
+
doc.xpath("//si").map { |si| si.xpath(".//t").map(&:text).join }
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def parse_sheet(xml, shared)
|
|
77
|
+
doc = parse(xml)
|
|
78
|
+
return [] if doc.nil?
|
|
79
|
+
|
|
80
|
+
doc.xpath("//sheetData/row").map do |row|
|
|
81
|
+
values = {}
|
|
82
|
+
width = 0
|
|
83
|
+
row.xpath("./c").each_with_index do |cell, position|
|
|
84
|
+
column = column_index(cell["r"]) || (position + 1)
|
|
85
|
+
width = column if column > width
|
|
86
|
+
values[column] = cell_value(cell, shared)
|
|
87
|
+
end
|
|
88
|
+
(1..width).map { |i| values[i] || "" }
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def cell_value(cell, shared)
|
|
93
|
+
case cell["t"]
|
|
94
|
+
when "s" then shared[cell.at_xpath("./v")&.text.to_i].to_s
|
|
95
|
+
when "inlineStr" then cell.xpath("./is//t").map(&:text).join
|
|
96
|
+
when "b" then cell.at_xpath("./v")&.text == "1" ? "TRUE" : "FALSE"
|
|
97
|
+
else cell.at_xpath("./v")&.text.to_s
|
|
47
98
|
end
|
|
48
99
|
end
|
|
100
|
+
|
|
101
|
+
# Converts a cell reference like "B2" into a 1-based column index (2).
|
|
102
|
+
def column_index(ref)
|
|
103
|
+
letters = ref.to_s[/\A[A-Z]+/i]
|
|
104
|
+
return nil if letters.nil?
|
|
105
|
+
|
|
106
|
+
letters.upcase.each_char.reduce(0) { |acc, char| (acc * 26) + (char.ord - 64) }
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def parse(xml)
|
|
110
|
+
return nil if xml.nil?
|
|
111
|
+
|
|
112
|
+
doc = Nokogiri::XML(xml)
|
|
113
|
+
doc.remove_namespaces!
|
|
114
|
+
doc
|
|
115
|
+
end
|
|
49
116
|
end
|
|
50
117
|
end
|
|
51
118
|
end
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Markdownator
|
|
4
|
+
module Renderers
|
|
5
|
+
# Walks a Nokogiri HTML node tree and renders Markdown. A focused,
|
|
6
|
+
# dependency-free replacement for reverse_markdown: HTML conversion needs
|
|
7
|
+
# only Nokogiri (which reverse_markdown depended on anyway).
|
|
8
|
+
class HtmlRenderer
|
|
9
|
+
# Elements that introduce their own block (line-separated) content.
|
|
10
|
+
BLOCK_TAGS = %w[
|
|
11
|
+
address article aside blockquote details div dl figcaption figure
|
|
12
|
+
footer form h1 h2 h3 h4 h5 h6 header hr main nav ol p pre section
|
|
13
|
+
table ul
|
|
14
|
+
].freeze
|
|
15
|
+
|
|
16
|
+
# Elements whose contents are dropped entirely.
|
|
17
|
+
SKIP_TAGS = %w[script style head title noscript template].freeze
|
|
18
|
+
|
|
19
|
+
def render(node)
|
|
20
|
+
blocks_to_string(render_blocks(node))
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
# Renders the children of +node+ into an array of block strings, grouping
|
|
26
|
+
# consecutive inline content into paragraphs.
|
|
27
|
+
def render_blocks(node)
|
|
28
|
+
blocks = []
|
|
29
|
+
buffer = +""
|
|
30
|
+
|
|
31
|
+
node.children.each do |child|
|
|
32
|
+
if block?(child)
|
|
33
|
+
push_paragraph(blocks, buffer)
|
|
34
|
+
buffer = +""
|
|
35
|
+
block = render_block(child)
|
|
36
|
+
blocks << block unless block.nil? || block.empty?
|
|
37
|
+
elsif !skip?(child)
|
|
38
|
+
buffer << render_inline(child)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
push_paragraph(blocks, buffer)
|
|
42
|
+
blocks
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def render_block(node)
|
|
46
|
+
case node.name
|
|
47
|
+
when /\Ah([1-6])\z/ then "#{"#" * Regexp.last_match(1).to_i} #{inline_of(node)}"
|
|
48
|
+
when "ul" then render_list(node, ordered: false)
|
|
49
|
+
when "ol" then render_list(node, ordered: true)
|
|
50
|
+
when "pre" then render_pre(node)
|
|
51
|
+
when "blockquote" then render_blockquote(node)
|
|
52
|
+
when "table" then render_table(node)
|
|
53
|
+
when "dl" then render_definition_list(node)
|
|
54
|
+
when "hr" then "---"
|
|
55
|
+
else blocks_to_string(render_blocks(node)) # div, section, p, unknown blocks
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def render_inline(node)
|
|
60
|
+
return normalize(node.text) if node.text?
|
|
61
|
+
return "" if node.comment? || skip?(node)
|
|
62
|
+
|
|
63
|
+
case node.name
|
|
64
|
+
when "strong", "b" then emphasis(node, "**")
|
|
65
|
+
when "em", "i" then emphasis(node, "_")
|
|
66
|
+
when "del", "s", "strike" then emphasis(node, "~~")
|
|
67
|
+
when "code" then inline_code(node)
|
|
68
|
+
when "a" then render_link(node)
|
|
69
|
+
when "img" then render_image(node)
|
|
70
|
+
when "br" then "\n"
|
|
71
|
+
else inline_of(node)
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def render_link(node)
|
|
76
|
+
href = node["href"].to_s.strip
|
|
77
|
+
text = inline_of(node)
|
|
78
|
+
text = href if text.empty?
|
|
79
|
+
href.empty? ? text : "[#{text}](#{href})"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def render_image(node)
|
|
83
|
+
"![#{node["alt"].to_s.strip}](#{node["src"].to_s.strip})"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def render_list(node, ordered:)
|
|
87
|
+
index = 0
|
|
88
|
+
list_items(node).map do |li|
|
|
89
|
+
index += 1
|
|
90
|
+
marker = ordered ? "#{index}." : "-"
|
|
91
|
+
indent = " " * (marker.length + 1)
|
|
92
|
+
lines = blocks_to_string(render_blocks(li)).split("\n")
|
|
93
|
+
first = lines.shift.to_s
|
|
94
|
+
rest = lines.map { |line| line.empty? ? "" : "#{indent}#{line}" }
|
|
95
|
+
(["#{marker} #{first}"] + rest).join("\n")
|
|
96
|
+
end.join("\n")
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def render_pre(node)
|
|
100
|
+
code = node.at_css("code") || node
|
|
101
|
+
language = code["class"].to_s[/(?:language|lang)-(\w+)/, 1].to_s
|
|
102
|
+
"```#{language}\n#{code.text.chomp}\n```"
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def render_blockquote(node)
|
|
106
|
+
blocks_to_string(render_blocks(node)).split("\n").map do |line|
|
|
107
|
+
line.empty? ? ">" : "> #{line}"
|
|
108
|
+
end.join("\n")
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def render_table(node)
|
|
112
|
+
rows = node.css("tr").map do |tr|
|
|
113
|
+
tr.css("th, td").map { |cell| inline_of(cell).gsub("|", "\\|") }
|
|
114
|
+
end
|
|
115
|
+
rows.reject!(&:empty?)
|
|
116
|
+
return "" if rows.empty?
|
|
117
|
+
|
|
118
|
+
width = rows.map(&:length).max
|
|
119
|
+
rows.each { |row| row.fill("", row.length...width) }
|
|
120
|
+
header, *body = rows
|
|
121
|
+
lines = ["| #{header.join(" | ")} |", "| #{Array.new(width, "---").join(" | ")} |"]
|
|
122
|
+
body.each { |row| lines << "| #{row.join(" | ")} |" }
|
|
123
|
+
lines.join("\n")
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def render_definition_list(node)
|
|
127
|
+
node.element_children.map do |child|
|
|
128
|
+
text = inline_of(child)
|
|
129
|
+
next if text.empty?
|
|
130
|
+
|
|
131
|
+
child.name == "dt" ? "**#{text}**" : ": #{text}"
|
|
132
|
+
end.compact.join("\n")
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# --- helpers ---------------------------------------------------------
|
|
136
|
+
|
|
137
|
+
def inline_code(node)
|
|
138
|
+
text = node.text
|
|
139
|
+
fence = text.include?("`") ? "`` " : "`"
|
|
140
|
+
close = text.include?("`") ? " ``" : "`"
|
|
141
|
+
"#{fence}#{text}#{close}"
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def emphasis(node, marker)
|
|
145
|
+
inner = inline_of(node)
|
|
146
|
+
inner.empty? ? "" : "#{marker}#{inner}#{marker}"
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Inline content of a node, with surrounding whitespace collapsed.
|
|
150
|
+
def inline_of(node)
|
|
151
|
+
clean_inline(node.children.map { |child| render_inline(child) }.join)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def list_items(node)
|
|
155
|
+
node.element_children.select { |child| child.name == "li" }
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def push_paragraph(blocks, buffer)
|
|
159
|
+
text = clean_block(buffer)
|
|
160
|
+
blocks << text unless text.empty?
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def blocks_to_string(blocks)
|
|
164
|
+
blocks.reject(&:empty?).join("\n\n")
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Collapses source whitespace (including newlines) to single spaces, so
|
|
168
|
+
# only explicit <br> newlines survive.
|
|
169
|
+
def normalize(text)
|
|
170
|
+
text.gsub(/\s+/, " ")
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def clean_inline(text)
|
|
174
|
+
text.gsub(/[ \t]{2,}/, " ").strip
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def clean_block(text)
|
|
178
|
+
text.gsub(/ *\n */, "\n").gsub(/[ \t]{2,}/, " ").gsub(/\n{3,}/, "\n\n").strip
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def block?(node)
|
|
182
|
+
node.element? && BLOCK_TAGS.include?(node.name)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def skip?(node)
|
|
186
|
+
node.element? && SKIP_TAGS.include?(node.name)
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
end
|
data/lib/markdownator/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: markdownator
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- alexrupom
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-06-
|
|
11
|
+
date: 2026-06-13 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: Markdownator converts PDF, Word, Excel, PowerPoint, EPUB, HTML, CSV,
|
|
14
14
|
JSON, XML, ZIP archives and images into clean Markdown suitable for large language
|
|
@@ -43,6 +43,7 @@ files:
|
|
|
43
43
|
- lib/markdownator/converters/zip.rb
|
|
44
44
|
- lib/markdownator/engine.rb
|
|
45
45
|
- lib/markdownator/errors.rb
|
|
46
|
+
- lib/markdownator/renderers/html_renderer.rb
|
|
46
47
|
- lib/markdownator/result.rb
|
|
47
48
|
- lib/markdownator/stream_info.rb
|
|
48
49
|
- lib/markdownator/version.rb
|