upmark 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e6e70467bec74fa706947ab9b0aea3c6cf626c6cf201a07aa918b5fba980a1a2
4
- data.tar.gz: a237b227e1c64e116dad035548ac4d364f9fca96956b26ad7b90f0b5919b29fc
3
+ metadata.gz: 30ee9a20ac459c48e9a74d6b2ce2df2328366421c2193b0b81b7d60504dbb171
4
+ data.tar.gz: 5cd1167cf96a88018f9a34cd1ea7b330b21c7732ffaf1525f110694f83f90edc
5
5
  SHA512:
6
- metadata.gz: 8c132b5f1a0fdc78cd7c5a4e3c7b3a9bad1a7aacfc7ad3e59f9d2a88bdb03bdd610bd5a907bddf565e2183c0c70dc0558834b80e7fddac8ad9421fe245f2e99c
7
- data.tar.gz: 7a4997aea6c74b5a2ee3adcf377e963663ca1ce6e4aec1cb48dc7d3066f0f8ec46710235885df300e6bd0fe15923825b130f0ab1de22bfc15d649bacbaae98b8
6
+ metadata.gz: d3e08e299cc8405a1c2833637b85ffd04aa078f7af031f8a52745fd742d316aae118efbfa0c32db8a6411ea94be684c488902a9864b9bc664eab0f2c39a84247
7
+ data.tar.gz: 858361e80fa96cd4001c317b3af32dfc36bf30d6ca70375c1fac81f24b3d96cd9c02f0ad1ee17f9059c4141a6e9b2916a587f456bb91004cc6e8c44c15e9d0c3
data/README.md CHANGED
@@ -42,7 +42,7 @@ It will also pass through block and span-level HTML elements (e.g. `table`, `div
42
42
 
43
43
  ## How it works
44
44
 
45
- Upmark defines a parsing expression grammar (PEG) using the very awesome [Parslet](http://kschiess.github.com/parslet/) gem. This PEG is then used to convert HTML into Markdown in 4 steps:
45
+ Upmark defines a parsing expression grammar (PEG) using the very awesome [Parslet](https://github.com/kschiess/parslet/) gem. This PEG is then used to convert HTML into Markdown in 4 steps:
46
46
 
47
47
  1. Parse the XHTML into an abstract syntax tree (AST).
48
48
  2. Normalize the AST into a nested hash of HTML elements.
@@ -48,12 +48,12 @@ module Upmark
48
48
  element(:li) {|element| "#{text(element)}" }
49
49
 
50
50
  element(:ul) do |element|
51
- children = element[:children].map {|value| value.strip != "" ? value : nil }.compact
51
+ children = element[:children].flatten.map {|value| value.strip != "" ? value : nil }.compact
52
52
  children.map {|value| "* #{value.gsub(/^\s*•\s*/,'')}\n" }
53
53
  end
54
54
 
55
55
  element(:ol) do |element|
56
- children = element[:children].map {|value| value.strip != "" ? value : nil }.compact
56
+ children = element[:children].flatten.map {|value| value.strip != "" ? value : nil }.compact
57
57
  children.map.with_index {|value, i| "#{i + 1}. #{value}\n" }
58
58
  end
59
59
 
@@ -71,7 +71,7 @@ module Upmark
71
71
 
72
72
  element(:img) do |element|
73
73
  attributes = map_attributes_subtree(element[:attributes])
74
- href = attributes[:src]
74
+ href = attributes[:src].to_s
75
75
  title = attributes[:title]
76
76
  alt_text = attributes[:alt]
77
77
 
data/lib/upmark.rb CHANGED
@@ -14,7 +14,11 @@ module Upmark
14
14
  preprocess = Transform::Preprocess.new
15
15
  markdown = Transform::Markdown.new
16
16
 
17
- ast = xml.parse(html.strip)
17
+ # Remove base64 data URLs that cause parser issues
18
+ html = html.gsub(/(data:image\/[^;]*;base64,)[A-Za-z0-9+\/=]+/, '').strip
19
+
20
+ ast = xml.parse(html)
21
+
18
22
  ast = normalise.apply(ast)
19
23
  ast = preprocess.apply(ast)
20
24
  ast = markdown.apply(ast)
@@ -86,6 +86,17 @@ Labor MP calls to end dogs
86
86
  ![messenger bag skateboard](http://helvetica.com/image.gif "art party organic")
87
87
  MD
88
88
  end
89
+
90
+ specify "removes base64 data URLs" do
91
+ expect(<<~HTML).to convert_to("")
92
+ <img src="data:image/png;base64,abc" />
93
+ HTML
94
+
95
+ src = "abc" * 10000
96
+ expect(<<~HTML).to convert_to("")
97
+ <img src="data:image/png;base64,#{src}" />
98
+ HTML
99
+ end
89
100
  end
90
101
 
91
102
  context "<p>" do
@@ -341,4 +352,36 @@ messenger **bag** skateboard
341
352
  expect(html).to convert_to("Hi\nthere")
342
353
  end
343
354
  end
344
- end
355
+
356
+ context "nested unordered lists" do
357
+ let(:html) do
358
+ <<-HTML
359
+ <ul>
360
+ <ul>
361
+ <li>List item</li>
362
+ </ul>
363
+ </ul>
364
+ HTML
365
+ end
366
+
367
+ it "generates readable output" do
368
+ expect(html).to convert_to("* * List item")
369
+ end
370
+ end
371
+
372
+ context "nested ordered lists" do
373
+ let(:html) do
374
+ <<-HTML
375
+ <ol>
376
+ <ol>
377
+ <li>List item</li>
378
+ </ol>
379
+ </ol>
380
+ HTML
381
+ end
382
+
383
+ it "generates readable output" do
384
+ expect(html).to convert_to("1. 1. List item")
385
+ end
386
+ end
387
+ end
metadata CHANGED
@@ -1,16 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: upmark
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Josh Bassett
8
8
  - Gus Gollings
9
9
  - James Healy
10
- autorequire:
10
+ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2018-03-26 00:00:00.000000000 Z
13
+ date: 2025-09-25 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rspec
@@ -95,7 +95,7 @@ files:
95
95
  homepage: http://github.com/conversation/upmark
96
96
  licenses: []
97
97
  metadata: {}
98
- post_install_message:
98
+ post_install_message:
99
99
  rdoc_options: []
100
100
  require_paths:
101
101
  - lib
@@ -110,14 +110,13 @@ required_rubygems_version: !ruby/object:Gem::Requirement
110
110
  - !ruby/object:Gem::Version
111
111
  version: '0'
112
112
  requirements: []
113
- rubyforge_project: upmark
114
- rubygems_version: 2.7.0
115
- signing_key:
113
+ rubygems_version: 3.4.10
114
+ signing_key:
116
115
  specification_version: 4
117
116
  summary: A HTML to Markdown converter.
118
117
  test_files:
118
+ - spec/acceptance/upmark_spec.rb
119
+ - spec/errors_spec.rb
119
120
  - spec/spec_helper.rb
120
121
  - spec/unit/lib/upmark/parser/xml_spec.rb
121
122
  - spec/unit/lib/upmark/transform/markdown_spec.rb
122
- - spec/acceptance/upmark_spec.rb
123
- - spec/errors_spec.rb