reverse_markdown 3.0.1 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 71599ffe8eb3e568f71c10ac33d6f6e8010c340075659ebb759026dcb5e55993
4
- data.tar.gz: eb6b3608de31ab57229ddc202ac9b941824442b03eedc02dc8a8e12edd59c08d
3
+ metadata.gz: d374ce991c236ccd344ef88d17531e3dc845bc4eb76015f5237f2646be1b3b66
4
+ data.tar.gz: 3bb3e85c76d512647fe7c70f983b22113775c2ab02bc7410dec85cfd06c46cb8
5
5
  SHA512:
6
- metadata.gz: 254da531c9557092c906bf885a8672b5ff6762a81a99a0acca628eea44013364d9fb5ebc34360af000ff3754253951454c93a61f89aecc2112e2c1931a349069
7
- data.tar.gz: 60c65f05fb636ba4a574f5fcca1fd8700e187830e94729ca19ef66be4ee7dacf6d3ee7bee2d8d2d32c215fd7aa3ecbd05a030abdf68534e91b92e9172e452366
6
+ metadata.gz: 1c860d94f4b07ae28cd2553e86c6538ebcaecd861a25bb695879ed2f69b6da6459c2ff9884d7c86127c18dac6217ddd3b41ee8e069eaa012e04b9d6d280cebd4
7
+ data.tar.gz: 1025d7a735a38f59d2e921d5e7fb29e41b007a238eca7052bba16b319316695a50dc15f1729b47f2c27887244000b0c5a2b3d386be1765ee0c37578f5be2978d
@@ -13,7 +13,7 @@ jobs:
13
13
  runs-on: ubuntu-latest
14
14
  strategy:
15
15
  matrix:
16
- ruby-version: [ '2.7', '3.0', '3.1', '3.2', '3.3', 'jruby-9.4' ]
16
+ ruby-version: [ '2.7', '3.0', '3.1', '3.2', '3.3', '4.0', 'jruby-9.4' ]
17
17
 
18
18
  steps:
19
19
  - name: Checkout code
@@ -31,18 +31,5 @@ jobs:
31
31
  - name: Run tests
32
32
  run: bundle exec rspec
33
33
 
34
- - name: Run Code Climate Test Reporter
35
- run: |
36
- curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
37
- chmod +x ./cc-test-reporter
38
- ./cc-test-reporter before-build
39
-
40
34
  - name: Run tests
41
35
  run: bundle exec rspec
42
-
43
- - name: Upload Code Climate Coverage Report
44
- if: matrix.ruby-version == '3.3'
45
- env:
46
- CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }}
47
- run: |
48
- ./cc-test-reporter after-build --exit-code $?
data/CHANGELOG.md CHANGED
@@ -1,6 +1,13 @@
1
1
  # Change Log
2
2
  All notable changes to this project will be documented in this file.
3
3
 
4
+ ## 3.0.2 - January 2026
5
+ - Add support for ruby 4.0
6
+ - Some cleanup of test cases, thanks @joelhawksley, see #107
7
+ - Fix whitespace collapsing between inline elements, see #34
8
+ - Fix whitespace around links, see #91
9
+ - Split emphasis markers at paragraph breaks, merge heading lines, see #95
10
+
4
11
  ## 3.0.1 - December 2025
5
12
  - Use https instead of http
6
13
 
data/README.md CHANGED
@@ -8,6 +8,12 @@ Transform html into markdown. Useful for example if you want to import html into
8
8
 
9
9
  See [Change Log](CHANGELOG.md)
10
10
 
11
+ ## Limitations
12
+
13
+ A perfect HTML to Markdown conversion is not possible. HTML is far more expressive than Markdown - it supports tables with merged cells, arbitrary nesting, inline styles, and countless other features that have no Markdown equivalent.
14
+
15
+ This gem aims to provide good enough defaults for most common cases. It handles standard content well but does not attempt to solve every edge case. If you have highly specific conversion needs, you can [write custom converters](https://github.com/xijo/reverse_markdown/wiki/Write-your-own-converter) to handle them.
16
+
11
17
  ## Requirements
12
18
 
13
19
  1. [Nokogiri](http://nokogiri.org/)
@@ -15,6 +15,26 @@ module ReverseMarkdown
15
15
  string.gsub(/(?<!\\)[*_]/, '*' => '\*', '_' => '\_')
16
16
  end
17
17
 
18
+ # Wrap content with markers (e.g., ** or _), splitting at paragraph breaks
19
+ # so markers don't span across breaks (which breaks markdown rendering)
20
+ def wrap_with_markers(content, marker)
21
+ # Split on paragraph breaks, preserving the breaks
22
+ segments = content.split(/(\s*\n\s*\n\s*)/)
23
+
24
+ segments.map.with_index do |segment, i|
25
+ if i.odd? # This is a break segment (captured delimiter)
26
+ segment
27
+ elsif segment.strip.empty?
28
+ segment
29
+ else
30
+ # Wrap with markers, preserving border whitespace
31
+ leading = segment[/\A\s*/]
32
+ trailing = segment[/\s*\z/]
33
+ "#{leading}#{marker}#{segment.strip}#{marker}#{trailing}"
34
+ end
35
+ end.join
36
+ end
37
+
18
38
  def extract_title(node)
19
39
  title = escape_keychars(node['title'].to_s)
20
40
  title.empty? ? '' : %[ "#{title}"]
@@ -6,7 +6,7 @@ module ReverseMarkdown
6
6
  if content.strip.empty? || state[:already_italic]
7
7
  content
8
8
  else
9
- "#{content[/^\s*/]}_#{content.strip}_#{content[/\s*$/]}"
9
+ wrap_with_markers(content, '_')
10
10
  end
11
11
  end
12
12
  end
@@ -3,7 +3,10 @@ module ReverseMarkdown
3
3
  class H < Base
4
4
  def convert(node, state = {})
5
5
  prefix = '#' * node.name[/\d/].to_i
6
- ["\n", prefix, ' ', treat_children(node, state), "\n"].join
6
+ content = treat_children(node, state).strip
7
+ # Merge lines into one (markdown headings can't span multiple lines)
8
+ content = content.split(/\s*\n\s*/).join(' ')
9
+ "\n#{prefix} #{content}\n"
7
10
  end
8
11
  end
9
12
 
@@ -6,7 +6,7 @@ module ReverseMarkdown
6
6
  if content.strip.empty? || state[:already_strong]
7
7
  content
8
8
  else
9
- "#{content[/^\s*/]}**#{content.strip}**#{content[/\s*$/]}"
9
+ wrap_with_markers(content, '**')
10
10
  end
11
11
  end
12
12
  end
@@ -11,12 +11,20 @@ module ReverseMarkdown
11
11
 
12
12
  private
13
13
 
14
+ INLINE_ELEMENTS = [:a, :abbr, :b, :bdi, :bdo, :cite, :code, :data, :del,
15
+ :dfn, :em, :i, :ins, :kbd, :mark, :q, :rp, :rt, :ruby,
16
+ :s, :samp, :small, :span, :strong, :sub, :sup, :time,
17
+ :u, :var, :wbr, :font, :tt].freeze
18
+
14
19
  def treat_empty(node)
15
20
  parent = node.parent.name.to_sym
16
21
  if [:ol, :ul].include?(parent) # Otherwise the identation is broken
17
22
  ''
18
23
  elsif node.text == ' ' # Regular whitespace text node
19
24
  ' '
25
+ elsif INLINE_ELEMENTS.include?(parent) && node.text =~ /\n/
26
+ # Preserve newlines between inline elements as space (HTML whitespace collapsing)
27
+ ' '
20
28
  else
21
29
  ''
22
30
  end
@@ -25,7 +33,7 @@ module ReverseMarkdown
25
33
  def treat_text(node)
26
34
  text = node.text
27
35
  text = preserve_nbsp(text)
28
- text = remove_border_newlines(text)
36
+ text = remove_border_newlines(text, node)
29
37
  text = remove_inner_newlines(text)
30
38
  text = escape_keychars(text)
31
39
 
@@ -43,8 +51,39 @@ module ReverseMarkdown
43
51
  text.gsub(/[<>]/, '>' => '\>', '<' => '\<')
44
52
  end
45
53
 
46
- def remove_border_newlines(text)
47
- text.gsub(/\A\n+/, '').gsub(/\n+\z/, '')
54
+ def remove_border_newlines(text, node)
55
+ # Convert leading newlines to space if there's preceding inline content
56
+ result = if has_adjacent_inline_content?(node, :previous)
57
+ text.gsub(/\A\n+/, ' ')
58
+ else
59
+ text.gsub(/\A\n+/, '')
60
+ end
61
+
62
+ # Convert trailing newlines to space if there's following inline content
63
+ if has_adjacent_inline_content?(node, :next)
64
+ result.gsub(/\n+\z/, ' ')
65
+ else
66
+ result.gsub(/\n+\z/, '')
67
+ end
68
+ end
69
+
70
+ def has_adjacent_inline_content?(node, direction)
71
+ sibling = direction == :next ? node.next_sibling : node.previous_sibling
72
+ while sibling
73
+ if sibling.text?
74
+ return true unless sibling.text.strip.empty?
75
+ elsif INLINE_ELEMENTS.include?(sibling.name.to_sym)
76
+ return true
77
+ else
78
+ return false
79
+ end
80
+ sibling = direction == :next ? sibling.next_sibling : sibling.previous_sibling
81
+ end
82
+
83
+ parent = node.parent
84
+ return false unless INLINE_ELEMENTS.include?(parent.name.to_sym)
85
+
86
+ has_adjacent_inline_content?(parent, direction)
48
87
  end
49
88
 
50
89
  def remove_inner_newlines(text)
@@ -1,3 +1,3 @@
1
1
  module ReverseMarkdown
2
- VERSION = '3.0.1'
2
+ VERSION = '3.0.2'
3
3
  end
@@ -24,5 +24,4 @@ Gem::Specification.new do |s|
24
24
  s.add_development_dependency 'rake'
25
25
  s.add_development_dependency 'kramdown'
26
26
  s.add_development_dependency 'debug' unless RUBY_ENGINE == 'jruby'
27
- s.add_development_dependency 'codeclimate-test-reporter'
28
27
  end
@@ -9,7 +9,7 @@
9
9
  Even with stripped elements inbetween: !<span><a href="http://still.not.an.image.foobar.com">there</a></span> should be an extra space.
10
10
 
11
11
  ignore <a href="foo.html"> </a> anchor tags with no link text
12
- not ignore <a href="foo.html"><img src="image.png" alt="An Image" /></a> anchor tags with images
12
+ not ignore <a href="foo.html"><img src="image.png" alt="An Image"></a> anchor tags with images
13
13
  pass through the text of <a href="#content">internal jumplinks</a> without treating them as links
14
14
  pass through the text of <a id="content">anchor tags with no href</a> without treating them as links
15
15
 
@@ -11,7 +11,7 @@
11
11
  <em>em tag content</em>
12
12
  before <em></em> and after empty em tags
13
13
  before <em> </em> and after em tags containing whitespace
14
- before <em> <em> <br /> </em> </em> and after em tags containing whitespace
14
+ before <em> <em> <br> </em> </em> and after em tags containing whitespace
15
15
  <em><em>double em tags</em></em>
16
16
  <p><em><em>double em tags in p tag</em></em></p>
17
17
  a<em> em with leading and trailing </em>whitespace
@@ -22,7 +22,7 @@
22
22
  <strong>strong tag content</strong>
23
23
  before <strong></strong> and after empty strong tags
24
24
  before <strong> </strong> and after strong tags containing whitespace
25
- before <strong> <strong> <br /> </strong> </strong> and after strong tags containing whitespace
25
+ before <strong> <strong> <br> </strong> </strong> and after strong tags containing whitespace
26
26
  <strong><strong>double strong tags</strong></strong>
27
27
  <p><strong><strong>double strong tags in p tag</strong></strong></p>
28
28
  before
@@ -39,10 +39,10 @@
39
39
  <b>b tag content</b>
40
40
  <i>i tag content</i>
41
41
 
42
- br tags become double space followed by newline<br/>
42
+ br tags become double space followed by newline<br>
43
43
 
44
44
  before hr
45
- <hr/>
45
+ <hr>
46
46
  after hr
47
47
 
48
48
  <div>section 1</div>
@@ -19,4 +19,3 @@ end tell
19
19
 
20
20
  </body>
21
21
  </html>
22
-
@@ -1,14 +1,14 @@
1
1
  <p>
2
2
  <strong>
3
3
  <strong>
4
- .<br />
4
+ .<br>
5
5
  </strong>
6
6
  *** intentcast
7
7
  </strong>
8
8
  : logo design
9
9
  <strong>
10
10
  <strong>
11
- <br />
11
+ <br>
12
12
  </strong>
13
13
  </strong>
14
14
  <strong>
@@ -1,3 +1,3 @@
1
1
  naked text 1
2
2
  <p>paragraph text</p>
3
- naked text 2
3
+ naked text 2
@@ -35,7 +35,7 @@
35
35
  <ul>
36
36
  <li class="toclevel-1 tocsection-1"><a href="Basic_concepts"><span class="tocnumber">1</span> <span class="toctext">Basic concepts</span></a></li>
37
37
  <li class="toclevel-1 tocsection-2"><a href="History_of_the_idea"><span class="tocnumber">2</span> <span class="toctext">History of the idea</span></a></li>
38
- <li class="toclevel-1 tocsection-3"><a href="Intelligence_explosion"><span class="tocnumber">3</span> <span class="toctext">Intelligence explosion</span></a>
38
+ <li class="toclevel-1 tocsection-3"><a href="Intelligence_explosion"><span class="tocnumber">3</span> <span class="toctext">Intelligence explosion</span></a></li>
39
39
  </ul>
40
40
 
41
41
  <ul>
@@ -86,7 +86,7 @@
86
86
  <li>bravo alpha</li>
87
87
  <li>bravo bravo
88
88
  <ul>
89
- <li>bravo bravo alpha</i>
89
+ <li>bravo bravo alpha</li>
90
90
  </ul>
91
91
  </li>
92
92
  </ul>
@@ -21,4 +21,4 @@
21
21
  <strong><em>Combination:&nbsp;</em></strong>
22
22
  </p>
23
23
  </body>
24
- </html>
24
+ </html>
@@ -40,4 +40,32 @@ describe ReverseMarkdown do
40
40
  it { is_expected.to match /before hr \n\* \* \*\n after hr/ }
41
41
 
42
42
  it { is_expected.to match /section 1\n ?\nsection 2/ }
43
+
44
+ describe 'whitespace handling between inline elements' do
45
+ it 'preserves whitespace (including newlines) between spans' do
46
+ input = "<span>Hello\n</span><span>World</span>"
47
+ result = ReverseMarkdown.convert(input)
48
+ expect(result).to eq "Hello World"
49
+ end
50
+
51
+ it 'preserves whitespace between inline elements in paragraphs' do
52
+ input = "<p><span>Hello\n</span><span>World</span></p>"
53
+ result = ReverseMarkdown.convert(input)
54
+ expect(result).to eq "Hello World\n\n"
55
+ end
56
+
57
+ it 'preserves whitespace between nested inline elements' do
58
+ # The text "A" is nested inside <span> inside <em>, but <em> has a following sibling
59
+ # This requires traversing up through parent nodes to find following content
60
+ input = "<p><em><span>A\n</span></em><span>B</span></p>"
61
+ result = ReverseMarkdown.convert(input)
62
+ expect(result).to eq "_A_ B\n\n"
63
+ end
64
+
65
+ it 'preserves whitespace surrounding links' do
66
+ # Issue #91: newlines around inline elements should become spaces
67
+ result = ReverseMarkdown.convert("a\n<a href='1'>link</a>\nis good")
68
+ expect(result.strip).to eq "a [link](1) is good"
69
+ end
70
+ end
43
71
  end
@@ -6,7 +6,7 @@ describe ReverseMarkdown do
6
6
  subject { ReverseMarkdown.convert(input) }
7
7
 
8
8
  it "should make sense of strong-crazy markup (as seen in the wild)" do
9
- expect(subject).to include "**. \n \\*\\*\\* intentcast** : logo design \n **.**\n\n"
9
+ expect(subject).to include "**. \n \\*\\*\\* intentcast** : logo design \n **.**\n\n"
10
10
  end
11
11
 
12
12
  it "should not over escape * or _" do
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+
3
+ describe ReverseMarkdown::Converters::Em do
4
+ let(:converter) { ReverseMarkdown::Converters::Em.new }
5
+
6
+ it 'returns an empty string if the node is empty' do
7
+ input = node_for('<em></em>')
8
+ expect(converter.convert(input)).to eq ''
9
+ end
10
+
11
+ it 'returns just the content if the em tag is nested in another em' do
12
+ input = node_for('<em><em>foo</em></em>')
13
+ expect(converter.convert(input.children.first, already_italic: true)).to eq 'foo'
14
+ end
15
+
16
+ it 'moves border whitespaces outside of the delimiters tag' do
17
+ input = node_for("<em> \n foo </em>")
18
+ expect(converter.convert(input)).to eq " _foo_ "
19
+ end
20
+
21
+ it 'splits markers at paragraph breaks' do
22
+ # Issue #95: <br><br> inside em creates a paragraph break
23
+ # Markers must be split so markdown renders correctly
24
+ result = ReverseMarkdown.convert('<em>hello<br><br>world</em>')
25
+ expect(result).to include('_hello_')
26
+ expect(result).to include('_world_')
27
+ end
28
+ end
@@ -0,0 +1,16 @@
1
+ require 'spec_helper'
2
+
3
+ describe ReverseMarkdown::Converters::H do
4
+ let(:converter) { ReverseMarkdown::Converters::H.new }
5
+
6
+ it 'merges line breaks into single line' do
7
+ # Markdown headings can't span multiple lines, so merge them
8
+ result = ReverseMarkdown.convert('<h1>foo<br>bar</h1>')
9
+ expect(result.strip).to eq '# foo bar'
10
+ end
11
+
12
+ it 'handles multiple line breaks' do
13
+ result = ReverseMarkdown.convert('<h2>a<br>b<br>c</h2>')
14
+ expect(result.strip).to eq '## a b c'
15
+ end
16
+ end
@@ -17,4 +17,12 @@ describe ReverseMarkdown::Converters::Strong do
17
17
  input = node_for("<strong> \n foo </strong>")
18
18
  expect(converter.convert(input)).to eq " **foo** "
19
19
  end
20
+
21
+ it 'splits markers at paragraph breaks' do
22
+ # Issue #95: <br><br> inside strong creates a paragraph break
23
+ # Markers must be split so markdown renders correctly
24
+ result = ReverseMarkdown.convert('<strong>hello<br><br>world</strong>')
25
+ expect(result).to include('**hello**')
26
+ expect(result).to include('**world**')
27
+ end
20
28
  end
@@ -22,7 +22,7 @@ describe ReverseMarkdown::Converters::Text do
22
22
  expect(result).to eq 'foo bar'
23
23
  end
24
24
 
25
- it 'removes trailing newlines' do
25
+ it 'removes trailing newlines when no following content' do
26
26
  input = node_for("<p>foo bar\n\n</p>")
27
27
  result = converter.convert(input)
28
28
  expect(result).to eq 'foo bar'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: reverse_markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.1
4
+ version: 3.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Johannes Opper
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-12-05 00:00:00.000000000 Z
11
+ date: 2026-01-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -94,20 +94,6 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: codeclimate-test-reporter
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - ">="
102
- - !ruby/object:Gem::Version
103
- version: '0'
104
- type: :development
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - ">="
109
- - !ruby/object:Gem::Version
110
- version: '0'
111
97
  description: Map simple html back into markdown, e.g. if you want to import existing
112
98
  html data in your application.
113
99
  email:
@@ -195,7 +181,9 @@ files:
195
181
  - spec/lib/reverse_markdown/converters/br_spec.rb
196
182
  - spec/lib/reverse_markdown/converters/del_spec.rb
197
183
  - spec/lib/reverse_markdown/converters/details_spec.rb
184
+ - spec/lib/reverse_markdown/converters/em_spec.rb
198
185
  - spec/lib/reverse_markdown/converters/figure_spec.rb
186
+ - spec/lib/reverse_markdown/converters/h_spec.rb
199
187
  - spec/lib/reverse_markdown/converters/li_spec.rb
200
188
  - spec/lib/reverse_markdown/converters/pre_spec.rb
201
189
  - spec/lib/reverse_markdown/converters/strong_spec.rb
@@ -260,7 +248,9 @@ test_files:
260
248
  - spec/lib/reverse_markdown/converters/br_spec.rb
261
249
  - spec/lib/reverse_markdown/converters/del_spec.rb
262
250
  - spec/lib/reverse_markdown/converters/details_spec.rb
251
+ - spec/lib/reverse_markdown/converters/em_spec.rb
263
252
  - spec/lib/reverse_markdown/converters/figure_spec.rb
253
+ - spec/lib/reverse_markdown/converters/h_spec.rb
264
254
  - spec/lib/reverse_markdown/converters/li_spec.rb
265
255
  - spec/lib/reverse_markdown/converters/pre_spec.rb
266
256
  - spec/lib/reverse_markdown/converters/strong_spec.rb