reverse_markdown 2.1.1 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yml +35 -0
  3. data/.tool-versions +1 -0
  4. data/CHANGELOG.md +16 -0
  5. data/Gemfile +1 -1
  6. data/README.md +8 -2
  7. data/lib/reverse_markdown/converters/base.rb +25 -1
  8. data/lib/reverse_markdown/converters/blockquote.rb +1 -1
  9. data/lib/reverse_markdown/converters/div.rb +1 -1
  10. data/lib/reverse_markdown/converters/em.rb +1 -1
  11. data/lib/reverse_markdown/converters/figcaption.rb +1 -1
  12. data/lib/reverse_markdown/converters/h.rb +4 -1
  13. data/lib/reverse_markdown/converters/iframe.rb +11 -0
  14. data/lib/reverse_markdown/converters/ol.rb +1 -1
  15. data/lib/reverse_markdown/converters/p.rb +1 -1
  16. data/lib/reverse_markdown/converters/pre.rb +2 -2
  17. data/lib/reverse_markdown/converters/strong.rb +1 -1
  18. data/lib/reverse_markdown/converters/table.rb +1 -1
  19. data/lib/reverse_markdown/converters/text.rb +42 -3
  20. data/lib/reverse_markdown/version.rb +1 -1
  21. data/lib/reverse_markdown.rb +1 -0
  22. data/reverse_markdown.gemspec +1 -2
  23. data/spec/assets/anchors.html +1 -1
  24. data/spec/assets/basic.html +4 -4
  25. data/spec/assets/code.html +0 -1
  26. data/spec/assets/from_the_wild.html +2 -2
  27. data/spec/assets/html_fragment.html +1 -1
  28. data/spec/assets/iframe.html +4 -0
  29. data/spec/assets/lists.html +7 -2
  30. data/spec/assets/paragraphs.html +1 -1
  31. data/spec/components/basic_spec.rb +28 -0
  32. data/spec/components/from_the_wild_spec.rb +1 -3
  33. data/spec/components/html_fragment_spec.rb +0 -2
  34. data/spec/components/iframe_spec.rb +22 -0
  35. data/spec/components/lists_spec.rb +3 -0
  36. data/spec/lib/reverse_markdown/converters/em_spec.rb +28 -0
  37. data/spec/lib/reverse_markdown/converters/h_spec.rb +16 -0
  38. data/spec/lib/reverse_markdown/converters/strong_spec.rb +8 -0
  39. data/spec/lib/reverse_markdown/converters/text_spec.rb +1 -1
  40. data/spec/lib/reverse_markdown_spec.rb +2 -1
  41. data/spec/spec_helper.rb +1 -1
  42. metadata +18 -22
  43. data/.travis.yml +0 -18
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cffa2a16f5ad4187cb23a2e6a1015121b943f1c83a688308cf0a480ec15d3c49
4
- data.tar.gz: 6941ad7c07005ff4d5c115fd894ee2a0cf40a09367833268f4c55c018f30263a
3
+ metadata.gz: d374ce991c236ccd344ef88d17531e3dc845bc4eb76015f5237f2646be1b3b66
4
+ data.tar.gz: 3bb3e85c76d512647fe7c70f983b22113775c2ab02bc7410dec85cfd06c46cb8
5
5
  SHA512:
6
- metadata.gz: 620ce03beae7238accdc6836e025dc7b19fe308a7bcbcd7143dc7472d09e7d25758a3f6c2080d5d75e60fee662bc87ab200f177b5b6f41fa5958bde4b569a49e
7
- data.tar.gz: ea69f20a1e70e4f41d6024502af362351ccaf6ec8936b4dcadc2414202e994cdf8ddf08ebbd829c0722c373bbe7e87a223c3c913bf6d845be9308ef418f3be75
6
+ metadata.gz: 1c860d94f4b07ae28cd2553e86c6538ebcaecd861a25bb695879ed2f69b6da6459c2ff9884d7c86127c18dac6217ddd3b41ee8e069eaa012e04b9d6d280cebd4
7
+ data.tar.gz: 1025d7a735a38f59d2e921d5e7fb29e41b007a238eca7052bba16b319316695a50dc15f1729b47f2c27887244000b0c5a2b3d386be1765ee0c37578f5be2978d
@@ -0,0 +1,35 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+ pull_request:
8
+ branches:
9
+ - master
10
+
11
+ jobs:
12
+ test:
13
+ runs-on: ubuntu-latest
14
+ strategy:
15
+ matrix:
16
+ ruby-version: [ '2.7', '3.0', '3.1', '3.2', '3.3', '4.0', 'jruby-9.4' ]
17
+
18
+ steps:
19
+ - name: Checkout code
20
+ uses: actions/checkout@v3
21
+
22
+ - name: Set up Ruby
23
+ uses: ruby/setup-ruby@v1
24
+ with:
25
+ ruby-version: ${{ matrix.ruby-version }}
26
+ bundler-cache: true
27
+
28
+ - name: Install dependencies
29
+ run: bundle install
30
+
31
+ - name: Run tests
32
+ run: bundle exec rspec
33
+
34
+ - name: Run tests
35
+ run: bundle exec rspec
data/.tool-versions ADDED
@@ -0,0 +1 @@
1
+ ruby 3.3.5
data/CHANGELOG.md CHANGED
@@ -1,6 +1,22 @@
1
1
  # Change Log
2
2
  All notable changes to this project will be documented in this file.
3
3
 
4
+ ## 3.0.2 - January 2026
5
+ - Add support for ruby 4.0
6
+ - Some cleanup of test cases, thanks @joelhawksley, see #107
7
+ - Fix whitespace collapsing between inline elements, see #34
8
+ - Fix whitespace around links, see #91
9
+ - Split emphasis markers at paragraph breaks, merge heading lines, see #95
10
+
11
+ ## 3.0.1 - December 2025
12
+ - Use https instead of http
13
+
14
+ ## 3.0.0 - October 2024
15
+ - BREAKING: Dropped support for ruby 2.6.0 and lower
16
+ - Bugfix for missing newline behind `ol`, thanks @Kevinrob, see #104
17
+ - Add support for `iframe` tags, thanks @gagandeepsinghj, see #102
18
+ - Support for frozen string literals, thanks @pat, see #105
19
+
4
20
  ## 2.1.1 - October 2021
5
21
  - Fixes unintentional newline characters within lists with paragraphs, thanks @diogoosorio, see #93
6
22
  - Lets \n to be present in <pre> tag. solves #77 #78, thanks @shivabhusal
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source "http://rubygems.org"
1
+ source "https://rubygems.org"
2
2
 
3
3
  # Specify your gem's dependencies in reverse_markdown.gemspec
4
4
  gemspec
data/README.md CHANGED
@@ -2,16 +2,22 @@
2
2
 
3
3
  Transform html into markdown. Useful for example if you want to import html into your markdown based application.
4
4
 
5
- [![Build Status](https://secure.travis-ci.org/xijo/reverse_markdown.svg?branch=master)](https://travis-ci.org/xijo/reverse_markdown) [![Gem Version](https://badge.fury.io/rb/reverse_markdown.svg)](http://badge.fury.io/rb/reverse_markdown) [![Code Climate](https://codeclimate.com/github/xijo/reverse_markdown.svg)](https://codeclimate.com/github/xijo/reverse_markdown) [![Code Climate](https://codeclimate.com/github/xijo/reverse_markdown/coverage.png)](https://codeclimate.com/github/xijo/reverse_markdown)
5
+ ![Build Status](https://github.com/xijo/reverse_markdown/actions/workflows/ci.yml/badge.svg) [![Gem Version](https://badge.fury.io/rb/reverse_markdown.svg)](http://badge.fury.io/rb/reverse_markdown) [![Code Climate](https://codeclimate.com/github/xijo/reverse_markdown.svg)](https://codeclimate.com/github/xijo/reverse_markdown) [![Test Coverage](https://api.codeclimate.com/v1/badges/6ce481ba7ae6f57dc4d3/test_coverage)](https://codeclimate.com/github/xijo/reverse_markdown/test_coverage)
6
6
 
7
7
  ## Changelog
8
8
 
9
9
  See [Change Log](CHANGELOG.md)
10
10
 
11
+ ## Limitations
12
+
13
+ A perfect HTML to Markdown conversion is not possible. HTML is far more expressive than Markdown - it supports tables with merged cells, arbitrary nesting, inline styles, and countless other features that have no Markdown equivalent.
14
+
15
+ This gem aims to provide good enough defaults for most common cases. It handles standard content well but does not attempt to solve every edge case. If you have highly specific conversion needs, you can [write custom converters](https://github.com/xijo/reverse_markdown/wiki/Write-your-own-converter) to handle them.
16
+
11
17
  ## Requirements
12
18
 
13
19
  1. [Nokogiri](http://nokogiri.org/)
14
- 2. Ruby 2.0.0 or higher
20
+ 2. Ruby 2.7.0 or higher
15
21
 
16
22
  ## Installation
17
23
 
@@ -2,7 +2,7 @@ module ReverseMarkdown
2
2
  module Converters
3
3
  class Base
4
4
  def treat_children(node, state)
5
- node.children.inject('') do |memo, child|
5
+ node.children.inject(+'') do |memo, child|
6
6
  memo << treat(child, state)
7
7
  end
8
8
  end
@@ -15,10 +15,34 @@ module ReverseMarkdown
15
15
  string.gsub(/(?<!\\)[*_]/, '*' => '\*', '_' => '\_')
16
16
  end
17
17
 
18
+ # Wrap content with markers (e.g., ** or _), splitting at paragraph breaks
19
+ # so markers don't span across breaks (which breaks markdown rendering)
20
+ def wrap_with_markers(content, marker)
21
+ # Split on paragraph breaks, preserving the breaks
22
+ segments = content.split(/(\s*\n\s*\n\s*)/)
23
+
24
+ segments.map.with_index do |segment, i|
25
+ if i.odd? # This is a break segment (captured delimiter)
26
+ segment
27
+ elsif segment.strip.empty?
28
+ segment
29
+ else
30
+ # Wrap with markers, preserving border whitespace
31
+ leading = segment[/\A\s*/]
32
+ trailing = segment[/\s*\z/]
33
+ "#{leading}#{marker}#{segment.strip}#{marker}#{trailing}"
34
+ end
35
+ end.join
36
+ end
37
+
18
38
  def extract_title(node)
19
39
  title = escape_keychars(node['title'].to_s)
20
40
  title.empty? ? '' : %[ "#{title}"]
21
41
  end
42
+
43
+ def extract_src(node)
44
+ node['src'].to_s.empty? ? '' : node['src'].to_s
45
+ end
22
46
  end
23
47
  end
24
48
  end
@@ -4,7 +4,7 @@ module ReverseMarkdown
4
4
  def convert(node, state = {})
5
5
  content = treat_children(node, state).strip
6
6
  content = ReverseMarkdown.cleaner.remove_newlines(content)
7
- "\n\n> " << content.lines.to_a.join('> ') << "\n\n"
7
+ +"\n\n> " << content.lines.to_a.join('> ') << "\n\n"
8
8
  end
9
9
  end
10
10
 
@@ -2,7 +2,7 @@ module ReverseMarkdown
2
2
  module Converters
3
3
  class Div < Base
4
4
  def convert(node, state = {})
5
- "\n" << treat_children(node, state) << "\n"
5
+ +"\n" << treat_children(node, state) << "\n"
6
6
  end
7
7
  end
8
8
 
@@ -6,7 +6,7 @@ module ReverseMarkdown
6
6
  if content.strip.empty? || state[:already_italic]
7
7
  content
8
8
  else
9
- "#{content[/^\s*/]}_#{content.strip}_#{content[/\s*$/]}"
9
+ wrap_with_markers(content, '_')
10
10
  end
11
11
  end
12
12
  end
@@ -5,7 +5,7 @@ module ReverseMarkdown
5
5
  if node.text.strip.empty?
6
6
  ""
7
7
  else
8
- "\n" << "_#{node.text.strip}_" << "\n"
8
+ +"\n" << "_#{node.text.strip}_" << "\n"
9
9
  end
10
10
  end
11
11
  end
@@ -3,7 +3,10 @@ module ReverseMarkdown
3
3
  class H < Base
4
4
  def convert(node, state = {})
5
5
  prefix = '#' * node.name[/\d/].to_i
6
- ["\n", prefix, ' ', treat_children(node, state), "\n"].join
6
+ content = treat_children(node, state).strip
7
+ # Merge lines into one (markdown headings can't span multiple lines)
8
+ content = content.split(/\s*\n\s*/).join(' ')
9
+ "\n#{prefix} #{content}\n"
7
10
  end
8
11
  end
9
12
 
@@ -0,0 +1,11 @@
1
+ module ReverseMarkdown
2
+ module Converters
3
+ class Iframe < Base
4
+ def convert(node, state = {})
5
+ extract_src(node)
6
+ end
7
+ end
8
+
9
+ register :iframe, Iframe.new
10
+ end
11
+ end
@@ -3,7 +3,7 @@ module ReverseMarkdown
3
3
  class Ol < Base
4
4
  def convert(node, state = {})
5
5
  ol_count = state.fetch(:ol_count, 0) + 1
6
- "\n" << treat_children(node, state.merge(ol_count: ol_count))
6
+ +"\n" << treat_children(node, state.merge(ol_count: ol_count)) << "\n"
7
7
  end
8
8
  end
9
9
 
@@ -2,7 +2,7 @@ module ReverseMarkdown
2
2
  module Converters
3
3
  class P < Base
4
4
  def convert(node, state = {})
5
- "\n\n" << treat_children(node, state).strip << "\n\n"
5
+ +"\n\n" << treat_children(node, state).strip << "\n\n"
6
6
  end
7
7
  end
8
8
 
@@ -4,9 +4,9 @@ module ReverseMarkdown
4
4
  def convert(node, state = {})
5
5
  content = treat_children(node, state)
6
6
  if ReverseMarkdown.config.github_flavored
7
- "\n```#{language(node)}\n" << content.strip << "\n```\n"
7
+ +"\n```#{language(node)}\n" << content.strip << "\n```\n"
8
8
  else
9
- "\n\n " << content.lines.to_a.join(" ") << "\n\n"
9
+ +"\n\n " << content.lines.to_a.join(" ") << "\n\n"
10
10
  end
11
11
  end
12
12
 
@@ -6,7 +6,7 @@ module ReverseMarkdown
6
6
  if content.strip.empty? || state[:already_strong]
7
7
  content
8
8
  else
9
- "#{content[/^\s*/]}**#{content.strip}**#{content[/\s*$/]}"
9
+ wrap_with_markers(content, '**')
10
10
  end
11
11
  end
12
12
  end
@@ -2,7 +2,7 @@ module ReverseMarkdown
2
2
  module Converters
3
3
  class Table < Base
4
4
  def convert(node, state = {})
5
- "\n\n" << treat_children(node, state) << "\n"
5
+ +"\n\n" << treat_children(node, state) << "\n"
6
6
  end
7
7
  end
8
8
 
@@ -11,12 +11,20 @@ module ReverseMarkdown
11
11
 
12
12
  private
13
13
 
14
+ INLINE_ELEMENTS = [:a, :abbr, :b, :bdi, :bdo, :cite, :code, :data, :del,
15
+ :dfn, :em, :i, :ins, :kbd, :mark, :q, :rp, :rt, :ruby,
16
+ :s, :samp, :small, :span, :strong, :sub, :sup, :time,
17
+ :u, :var, :wbr, :font, :tt].freeze
18
+
14
19
  def treat_empty(node)
15
20
  parent = node.parent.name.to_sym
16
21
  if [:ol, :ul].include?(parent) # Otherwise the identation is broken
17
22
  ''
18
23
  elsif node.text == ' ' # Regular whitespace text node
19
24
  ' '
25
+ elsif INLINE_ELEMENTS.include?(parent) && node.text =~ /\n/
26
+ # Preserve newlines between inline elements as space (HTML whitespace collapsing)
27
+ ' '
20
28
  else
21
29
  ''
22
30
  end
@@ -25,7 +33,7 @@ module ReverseMarkdown
25
33
  def treat_text(node)
26
34
  text = node.text
27
35
  text = preserve_nbsp(text)
28
- text = remove_border_newlines(text)
36
+ text = remove_border_newlines(text, node)
29
37
  text = remove_inner_newlines(text)
30
38
  text = escape_keychars(text)
31
39
 
@@ -43,8 +51,39 @@ module ReverseMarkdown
43
51
  text.gsub(/[<>]/, '>' => '\>', '<' => '\<')
44
52
  end
45
53
 
46
- def remove_border_newlines(text)
47
- text.gsub(/\A\n+/, '').gsub(/\n+\z/, '')
54
+ def remove_border_newlines(text, node)
55
+ # Convert leading newlines to space if there's preceding inline content
56
+ result = if has_adjacent_inline_content?(node, :previous)
57
+ text.gsub(/\A\n+/, ' ')
58
+ else
59
+ text.gsub(/\A\n+/, '')
60
+ end
61
+
62
+ # Convert trailing newlines to space if there's following inline content
63
+ if has_adjacent_inline_content?(node, :next)
64
+ result.gsub(/\n+\z/, ' ')
65
+ else
66
+ result.gsub(/\n+\z/, '')
67
+ end
68
+ end
69
+
70
+ def has_adjacent_inline_content?(node, direction)
71
+ sibling = direction == :next ? node.next_sibling : node.previous_sibling
72
+ while sibling
73
+ if sibling.text?
74
+ return true unless sibling.text.strip.empty?
75
+ elsif INLINE_ELEMENTS.include?(sibling.name.to_sym)
76
+ return true
77
+ else
78
+ return false
79
+ end
80
+ sibling = direction == :next ? sibling.next_sibling : sibling.previous_sibling
81
+ end
82
+
83
+ parent = node.parent
84
+ return false unless INLINE_ELEMENTS.include?(parent.name.to_sym)
85
+
86
+ has_adjacent_inline_content?(parent, direction)
48
87
  end
49
88
 
50
89
  def remove_inner_newlines(text)
@@ -1,3 +1,3 @@
1
1
  module ReverseMarkdown
2
- VERSION = '2.1.1'
2
+ VERSION = '3.0.2'
3
3
  end
@@ -31,6 +31,7 @@ require 'reverse_markdown/converters/table'
31
31
  require 'reverse_markdown/converters/td'
32
32
  require 'reverse_markdown/converters/text'
33
33
  require 'reverse_markdown/converters/tr'
34
+ require 'reverse_markdown/converters/iframe'
34
35
 
35
36
  module ReverseMarkdown
36
37
 
@@ -23,6 +23,5 @@ Gem::Specification.new do |s|
23
23
  s.add_development_dependency 'simplecov'
24
24
  s.add_development_dependency 'rake'
25
25
  s.add_development_dependency 'kramdown'
26
- s.add_development_dependency 'byebug'
27
- s.add_development_dependency 'codeclimate-test-reporter'
26
+ s.add_development_dependency 'debug' unless RUBY_ENGINE == 'jruby'
28
27
  end
@@ -9,7 +9,7 @@
9
9
  Even with stripped elements inbetween: !<span><a href="http://still.not.an.image.foobar.com">there</a></span> should be an extra space.
10
10
 
11
11
  ignore <a href="foo.html"> </a> anchor tags with no link text
12
- not ignore <a href="foo.html"><img src="image.png" alt="An Image" /></a> anchor tags with images
12
+ not ignore <a href="foo.html"><img src="image.png" alt="An Image"></a> anchor tags with images
13
13
  pass through the text of <a href="#content">internal jumplinks</a> without treating them as links
14
14
  pass through the text of <a id="content">anchor tags with no href</a> without treating them as links
15
15
 
@@ -11,7 +11,7 @@
11
11
  <em>em tag content</em>
12
12
  before <em></em> and after empty em tags
13
13
  before <em> </em> and after em tags containing whitespace
14
- before <em> <em> <br /> </em> </em> and after em tags containing whitespace
14
+ before <em> <em> <br> </em> </em> and after em tags containing whitespace
15
15
  <em><em>double em tags</em></em>
16
16
  <p><em><em>double em tags in p tag</em></em></p>
17
17
  a<em> em with leading and trailing </em>whitespace
@@ -22,7 +22,7 @@
22
22
  <strong>strong tag content</strong>
23
23
  before <strong></strong> and after empty strong tags
24
24
  before <strong> </strong> and after strong tags containing whitespace
25
- before <strong> <strong> <br /> </strong> </strong> and after strong tags containing whitespace
25
+ before <strong> <strong> <br> </strong> </strong> and after strong tags containing whitespace
26
26
  <strong><strong>double strong tags</strong></strong>
27
27
  <p><strong><strong>double strong tags in p tag</strong></strong></p>
28
28
  before
@@ -39,10 +39,10 @@
39
39
  <b>b tag content</b>
40
40
  <i>i tag content</i>
41
41
 
42
- br tags become double space followed by newline<br/>
42
+ br tags become double space followed by newline<br>
43
43
 
44
44
  before hr
45
- <hr/>
45
+ <hr>
46
46
  after hr
47
47
 
48
48
  <div>section 1</div>
@@ -19,4 +19,3 @@ end tell
19
19
 
20
20
  </body>
21
21
  </html>
22
-
@@ -1,14 +1,14 @@
1
1
  <p>
2
2
  <strong>
3
3
  <strong>
4
- .<br />
4
+ .<br>
5
5
  </strong>
6
6
  *** intentcast
7
7
  </strong>
8
8
  : logo design
9
9
  <strong>
10
10
  <strong>
11
- <br />
11
+ <br>
12
12
  </strong>
13
13
  </strong>
14
14
  <strong>
@@ -1,3 +1,3 @@
1
1
  naked text 1
2
2
  <p>paragraph text</p>
3
- naked text 2
3
+ naked text 2
@@ -0,0 +1,4 @@
1
+ <h1>Welcome to My Page</h1>
2
+ <p>This is a sample paragraph before the iframe.</p>
3
+ <iframe src="https://www.example.com" width="600" height="400"></iframe>
4
+ <p>This is a sample paragraph after the iframe.</p>
@@ -35,7 +35,7 @@
35
35
  <ul>
36
36
  <li class="toclevel-1 tocsection-1"><a href="Basic_concepts"><span class="tocnumber">1</span> <span class="toctext">Basic concepts</span></a></li>
37
37
  <li class="toclevel-1 tocsection-2"><a href="History_of_the_idea"><span class="tocnumber">2</span> <span class="toctext">History of the idea</span></a></li>
38
- <li class="toclevel-1 tocsection-3"><a href="Intelligence_explosion"><span class="tocnumber">3</span> <span class="toctext">Intelligence explosion</span></a>
38
+ <li class="toclevel-1 tocsection-3"><a href="Intelligence_explosion"><span class="tocnumber">3</span> <span class="toctext">Intelligence explosion</span></a></li>
39
39
  </ul>
40
40
 
41
41
  <ul>
@@ -86,7 +86,7 @@
86
86
  <li>bravo alpha</li>
87
87
  <li>bravo bravo
88
88
  <ul>
89
- <li>bravo bravo alpha</i>
89
+ <li>bravo bravo alpha</li>
90
90
  </ul>
91
91
  </li>
92
92
  </ul>
@@ -95,5 +95,10 @@
95
95
  <li>delta</li>
96
96
  </ul>
97
97
 
98
+ <ul>
99
+ <li>item followed with a text</li>
100
+ </ul>
101
+ text after the list
102
+
98
103
  </body>
99
104
  </html>
@@ -21,4 +21,4 @@
21
21
  <strong><em>Combination:&nbsp;</em></strong>
22
22
  </p>
23
23
  </body>
24
- </html>
24
+ </html>
@@ -40,4 +40,32 @@ describe ReverseMarkdown do
40
40
  it { is_expected.to match /before hr \n\* \* \*\n after hr/ }
41
41
 
42
42
  it { is_expected.to match /section 1\n ?\nsection 2/ }
43
+
44
+ describe 'whitespace handling between inline elements' do
45
+ it 'preserves whitespace (including newlines) between spans' do
46
+ input = "<span>Hello\n</span><span>World</span>"
47
+ result = ReverseMarkdown.convert(input)
48
+ expect(result).to eq "Hello World"
49
+ end
50
+
51
+ it 'preserves whitespace between inline elements in paragraphs' do
52
+ input = "<p><span>Hello\n</span><span>World</span></p>"
53
+ result = ReverseMarkdown.convert(input)
54
+ expect(result).to eq "Hello World\n\n"
55
+ end
56
+
57
+ it 'preserves whitespace between nested inline elements' do
58
+ # The text "A" is nested inside <span> inside <em>, but <em> has a following sibling
59
+ # This requires traversing up through parent nodes to find following content
60
+ input = "<p><em><span>A\n</span></em><span>B</span></p>"
61
+ result = ReverseMarkdown.convert(input)
62
+ expect(result).to eq "_A_ B\n\n"
63
+ end
64
+
65
+ it 'preserves whitespace surrounding links' do
66
+ # Issue #91: newlines around inline elements should become spaces
67
+ result = ReverseMarkdown.convert("a\n<a href='1'>link</a>\nis good")
68
+ expect(result.strip).to eq "a [link](1) is good"
69
+ end
70
+ end
43
71
  end
@@ -1,17 +1,15 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe ReverseMarkdown do
4
-
5
4
  let(:input) { File.read('spec/assets/from_the_wild.html') }
6
5
  let(:document) { Nokogiri::HTML(input) }
7
6
  subject { ReverseMarkdown.convert(input) }
8
7
 
9
8
  it "should make sense of strong-crazy markup (as seen in the wild)" do
10
- expect(subject).to include "**. \n \\*\\*\\* intentcast** : logo design \n **.**\n\n"
9
+ expect(subject).to include "**. \n \\*\\*\\* intentcast** : logo design \n **.**\n\n"
11
10
  end
12
11
 
13
12
  it "should not over escape * or _" do
14
13
  expect(subject).to include '[![](example.com/foo_bar.png) I\_AM\_HELPFUL](example.com/foo_bar)'
15
14
  end
16
-
17
15
  end
@@ -1,11 +1,9 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe ReverseMarkdown do
4
-
5
4
  let(:input) { File.read('spec/assets/html_fragment.html') }
6
5
  let(:document) { Nokogiri::HTML(input) }
7
6
  subject { ReverseMarkdown.convert(input) }
8
7
 
9
8
  it { is_expected.to eq("naked text 1\n\nparagraph text\n\nnaked text 2") }
10
9
  end
11
-
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+
3
+ describe ReverseMarkdown do
4
+ let(:input) { File.read('spec/assets/iframe.html') }
5
+ let(:document) { Nokogiri::HTML(input) }
6
+ subject { ReverseMarkdown.convert(input) }
7
+
8
+ it do
9
+ expected = <<~MD
10
+ # Welcome to My Page
11
+
12
+ This is a sample paragraph before the iframe.
13
+
14
+ https://www.example.com
15
+
16
+ This is a sample paragraph after the iframe.
17
+
18
+ MD
19
+
20
+ expect(subject).to eq expected
21
+ end
22
+ end
@@ -65,4 +65,7 @@ describe ReverseMarkdown do
65
65
  it { is_expected.to match /\n- delta\n/ }
66
66
  end
67
67
 
68
+ context "text following list should have a new line separator" do
69
+ it { is_expected.to match /\n- item followed with a text\n\n text after the list/ }
70
+ end
68
71
  end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+
3
+ describe ReverseMarkdown::Converters::Em do
4
+ let(:converter) { ReverseMarkdown::Converters::Em.new }
5
+
6
+ it 'returns an empty string if the node is empty' do
7
+ input = node_for('<em></em>')
8
+ expect(converter.convert(input)).to eq ''
9
+ end
10
+
11
+ it 'returns just the content if the em tag is nested in another em' do
12
+ input = node_for('<em><em>foo</em></em>')
13
+ expect(converter.convert(input.children.first, already_italic: true)).to eq 'foo'
14
+ end
15
+
16
+ it 'moves border whitespaces outside of the delimiters tag' do
17
+ input = node_for("<em> \n foo </em>")
18
+ expect(converter.convert(input)).to eq " _foo_ "
19
+ end
20
+
21
+ it 'splits markers at paragraph breaks' do
22
+ # Issue #95: <br><br> inside em creates a paragraph break
23
+ # Markers must be split so markdown renders correctly
24
+ result = ReverseMarkdown.convert('<em>hello<br><br>world</em>')
25
+ expect(result).to include('_hello_')
26
+ expect(result).to include('_world_')
27
+ end
28
+ end
@@ -0,0 +1,16 @@
1
+ require 'spec_helper'
2
+
3
+ describe ReverseMarkdown::Converters::H do
4
+ let(:converter) { ReverseMarkdown::Converters::H.new }
5
+
6
+ it 'merges line breaks into single line' do
7
+ # Markdown headings can't span multiple lines, so merge them
8
+ result = ReverseMarkdown.convert('<h1>foo<br>bar</h1>')
9
+ expect(result.strip).to eq '# foo bar'
10
+ end
11
+
12
+ it 'handles multiple line breaks' do
13
+ result = ReverseMarkdown.convert('<h2>a<br>b<br>c</h2>')
14
+ expect(result.strip).to eq '## a b c'
15
+ end
16
+ end
@@ -17,4 +17,12 @@ describe ReverseMarkdown::Converters::Strong do
17
17
  input = node_for("<strong> \n foo </strong>")
18
18
  expect(converter.convert(input)).to eq " **foo** "
19
19
  end
20
+
21
+ it 'splits markers at paragraph breaks' do
22
+ # Issue #95: <br><br> inside strong creates a paragraph break
23
+ # Markers must be split so markdown renders correctly
24
+ result = ReverseMarkdown.convert('<strong>hello<br><br>world</strong>')
25
+ expect(result).to include('**hello**')
26
+ expect(result).to include('**world**')
27
+ end
20
28
  end
@@ -22,7 +22,7 @@ describe ReverseMarkdown::Converters::Text do
22
22
  expect(result).to eq 'foo bar'
23
23
  end
24
24
 
25
- it 'removes trailing newlines' do
25
+ it 'removes trailing newlines when no following content' do
26
26
  input = node_for("<p>foo bar\n\n</p>")
27
27
  result = converter.convert(input)
28
28
  expect(result).to eq 'foo bar'
@@ -36,7 +36,8 @@ describe ReverseMarkdown do
36
36
 
37
37
  describe 'force_encoding option', jruby: :exclude do
38
38
  it 'raises invalid byte sequence in UTF-8 exception' do
39
- expect { ReverseMarkdown.convert("hi \255") }.to raise_error(ArgumentError)
39
+ # Older versions of ruby used to raise ArgumentError here. Remove when we drop support for 3.1.
40
+ expect { ReverseMarkdown.convert("hi \255") }.to raise_error { [Encoding::CompatibilityError, ArgumentError].include?(_1.class) }
40
41
  end
41
42
 
42
43
  it 'handles invalid byte sequence if option is set' do
data/spec/spec_helper.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  require 'simplecov'
2
- # require 'byebug'
2
+ require 'debug' unless RUBY_ENGINE == 'jruby'
3
3
 
4
4
  SimpleCov.profiles.define 'gem' do
5
5
  add_filter '/spec/'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: reverse_markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.1
4
+ version: 3.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Johannes Opper
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-31 00:00:00.000000000 Z
11
+ date: 2026-01-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -81,21 +81,7 @@ dependencies:
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: byebug
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '0'
97
- - !ruby/object:Gem::Dependency
98
- name: codeclimate-test-reporter
84
+ name: debug
99
85
  requirement: !ruby/object:Gem::Requirement
100
86
  requirements:
101
87
  - - ">="
@@ -117,9 +103,10 @@ executables:
117
103
  extensions: []
118
104
  extra_rdoc_files: []
119
105
  files:
106
+ - ".github/workflows/ci.yml"
120
107
  - ".gitignore"
121
108
  - ".rspec"
122
- - ".travis.yml"
109
+ - ".tool-versions"
123
110
  - CHANGELOG.md
124
111
  - Gemfile
125
112
  - LICENSE
@@ -145,6 +132,7 @@ files:
145
132
  - lib/reverse_markdown/converters/figure.rb
146
133
  - lib/reverse_markdown/converters/h.rb
147
134
  - lib/reverse_markdown/converters/hr.rb
135
+ - lib/reverse_markdown/converters/iframe.rb
148
136
  - lib/reverse_markdown/converters/ignore.rb
149
137
  - lib/reverse_markdown/converters/img.rb
150
138
  - lib/reverse_markdown/converters/li.rb
@@ -167,6 +155,7 @@ files:
167
155
  - spec/assets/from_the_wild.html
168
156
  - spec/assets/full_example.html
169
157
  - spec/assets/html_fragment.html
158
+ - spec/assets/iframe.html
170
159
  - spec/assets/lists.html
171
160
  - spec/assets/minimum.html
172
161
  - spec/assets/paragraphs.html
@@ -179,6 +168,7 @@ files:
179
168
  - spec/components/escapables_spec.rb
180
169
  - spec/components/from_the_wild_spec.rb
181
170
  - spec/components/html_fragment_spec.rb
171
+ - spec/components/iframe_spec.rb
182
172
  - spec/components/lists_spec.rb
183
173
  - spec/components/paragraphs_spec.rb
184
174
  - spec/components/quotation_spec.rb
@@ -191,7 +181,9 @@ files:
191
181
  - spec/lib/reverse_markdown/converters/br_spec.rb
192
182
  - spec/lib/reverse_markdown/converters/del_spec.rb
193
183
  - spec/lib/reverse_markdown/converters/details_spec.rb
184
+ - spec/lib/reverse_markdown/converters/em_spec.rb
194
185
  - spec/lib/reverse_markdown/converters/figure_spec.rb
186
+ - spec/lib/reverse_markdown/converters/h_spec.rb
195
187
  - spec/lib/reverse_markdown/converters/li_spec.rb
196
188
  - spec/lib/reverse_markdown/converters/pre_spec.rb
197
189
  - spec/lib/reverse_markdown/converters/strong_spec.rb
@@ -203,7 +195,7 @@ homepage: http://github.com/xijo/reverse_markdown
203
195
  licenses:
204
196
  - WTFPL
205
197
  metadata: {}
206
- post_install_message:
198
+ post_install_message:
207
199
  rdoc_options: []
208
200
  require_paths:
209
201
  - lib
@@ -218,8 +210,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
218
210
  - !ruby/object:Gem::Version
219
211
  version: '0'
220
212
  requirements: []
221
- rubygems_version: 3.1.4
222
- signing_key:
213
+ rubygems_version: 3.5.16
214
+ signing_key:
223
215
  specification_version: 4
224
216
  summary: Convert html code into markdown.
225
217
  test_files:
@@ -230,6 +222,7 @@ test_files:
230
222
  - spec/assets/from_the_wild.html
231
223
  - spec/assets/full_example.html
232
224
  - spec/assets/html_fragment.html
225
+ - spec/assets/iframe.html
233
226
  - spec/assets/lists.html
234
227
  - spec/assets/minimum.html
235
228
  - spec/assets/paragraphs.html
@@ -242,6 +235,7 @@ test_files:
242
235
  - spec/components/escapables_spec.rb
243
236
  - spec/components/from_the_wild_spec.rb
244
237
  - spec/components/html_fragment_spec.rb
238
+ - spec/components/iframe_spec.rb
245
239
  - spec/components/lists_spec.rb
246
240
  - spec/components/paragraphs_spec.rb
247
241
  - spec/components/quotation_spec.rb
@@ -254,7 +248,9 @@ test_files:
254
248
  - spec/lib/reverse_markdown/converters/br_spec.rb
255
249
  - spec/lib/reverse_markdown/converters/del_spec.rb
256
250
  - spec/lib/reverse_markdown/converters/details_spec.rb
251
+ - spec/lib/reverse_markdown/converters/em_spec.rb
257
252
  - spec/lib/reverse_markdown/converters/figure_spec.rb
253
+ - spec/lib/reverse_markdown/converters/h_spec.rb
258
254
  - spec/lib/reverse_markdown/converters/li_spec.rb
259
255
  - spec/lib/reverse_markdown/converters/pre_spec.rb
260
256
  - spec/lib/reverse_markdown/converters/strong_spec.rb
data/.travis.yml DELETED
@@ -1,18 +0,0 @@
1
- language: ruby
2
- cache: bundler
3
-
4
- rvm:
5
- - 2.0
6
- - 2.1
7
- - 2.2
8
- - 2.3
9
- - 2.4
10
- - 2.5
11
- - 2.6
12
- - 2.7
13
- - jruby-9.2.8.0
14
-
15
- notifications:
16
- disabled: false
17
- recipients:
18
- - xijo@pm.me