upmark 0.10.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/{LICENSE → LICENSE.md} +1 -1
- data/README.md +8 -10
- data/lib/upmark/errors.rb +3 -3
- data/lib/upmark/transform/markdown.rb +3 -3
- data/lib/upmark.rb +0 -2
- data/spec/acceptance/upmark_spec.rb +55 -1
- data/spec/errors_spec.rb +12 -0
- metadata +16 -16
- data/lib/core_ext/array.rb +0 -9
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 6b50ae60e763766a5a5b4276591f488e17668ef3d85d2f78079d2a259e81fcdc
|
|
4
|
+
data.tar.gz: 3d5722b9cee80ab15a9d9f975b0b6f5d662a53d325a54849ec47bbe6c17ee4d6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: bed12a83c45c01bc2033d4c41bd3a5af61aa2777925f51b19322a0655756f8d4aab7f76b1db1a633b08bd9d43a1f30001b0489ca4bb8ad7105666d3e5f3f7d50
|
|
7
|
+
data.tar.gz: b2ffc99286769acc223f9065bbdacf82c3c3ca2c6132120b7af036ec8a71721950bd03125f61767c6411fb93c5f19ab10ea78f780ad534449ab670c4410e312c
|
data/{LICENSE → LICENSE.md}
RENAMED
data/README.md
CHANGED
|
@@ -4,16 +4,18 @@ A HTML to Markdown converter.
|
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
7
|
-
gem install upmark
|
|
7
|
+
> gem install upmark
|
|
8
8
|
|
|
9
9
|
## Usage
|
|
10
10
|
|
|
11
11
|
In ruby:
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
```ruby
|
|
14
|
+
require "upmark"
|
|
15
|
+
html = "<p>messenger <strong>bag</strong> skateboard</p>"
|
|
16
|
+
markdown = Upmark.convert(html)
|
|
17
|
+
puts markdown
|
|
18
|
+
```
|
|
17
19
|
|
|
18
20
|
From the command-line:
|
|
19
21
|
|
|
@@ -40,13 +42,9 @@ It will also pass through block and span-level HTML elements (e.g. `table`, `div
|
|
|
40
42
|
|
|
41
43
|
## How it works
|
|
42
44
|
|
|
43
|
-
Upmark defines a parsing expression grammar (PEG) using the very awesome [Parslet](
|
|
45
|
+
Upmark defines a parsing expression grammar (PEG) using the very awesome [Parslet](https://github.com/kschiess/parslet/) gem. This PEG is then used to convert HTML into Markdown in 4 steps:
|
|
44
46
|
|
|
45
47
|
1. Parse the XHTML into an abstract syntax tree (AST).
|
|
46
48
|
2. Normalize the AST into a nested hash of HTML elements.
|
|
47
49
|
3. Mark the block and span-level subtrees which should be ignored (`table`, `div`, `span`, etc).
|
|
48
50
|
4. Convert the AST leaves into Markdown.
|
|
49
|
-
|
|
50
|
-
## License
|
|
51
|
-
|
|
52
|
-
Upmark is Copyright (c) 2014 The Conversation Media Group and distributed under the MIT license.
|
data/lib/upmark/errors.rb
CHANGED
|
@@ -48,13 +48,13 @@ module Upmark
|
|
|
48
48
|
element(:li) {|element| "#{text(element)}" }
|
|
49
49
|
|
|
50
50
|
element(:ul) do |element|
|
|
51
|
-
children = element[:children].map {|value| value.strip != "" ? value : nil }.compact
|
|
51
|
+
children = element[:children].flatten.map {|value| value.strip != "" ? value : nil }.compact
|
|
52
52
|
children.map {|value| "* #{value.gsub(/^\s*•\s*/,'')}\n" }
|
|
53
53
|
end
|
|
54
54
|
|
|
55
55
|
element(:ol) do |element|
|
|
56
|
-
children = element[:children].map {|value| value.strip != "" ? value : nil }.compact
|
|
57
|
-
children.
|
|
56
|
+
children = element[:children].flatten.map {|value| value.strip != "" ? value : nil }.compact
|
|
57
|
+
children.map.with_index {|value, i| "#{i + 1}. #{value}\n" }
|
|
58
58
|
end
|
|
59
59
|
|
|
60
60
|
element(:a) do |element|
|
data/lib/upmark.rb
CHANGED
|
@@ -55,6 +55,28 @@ RSpec.describe Upmark, ".convert" do
|
|
|
55
55
|
end
|
|
56
56
|
end
|
|
57
57
|
|
|
58
|
+
context "<a> with inline elements, no href" do
|
|
59
|
+
specify 'converts as plain text' do
|
|
60
|
+
expect(<<-HTML.strip
|
|
61
|
+
<a>How Australia can respond to the security challenges posed by climate change in the Asian Century</a>
|
|
62
|
+
HTML
|
|
63
|
+
).to convert_to <<-MD.strip
|
|
64
|
+
How Australia can respond to the security challenges posed by climate change in the Asian Century
|
|
65
|
+
MD
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
context "<a> with id href" do
|
|
70
|
+
specify 'converts as plain text' do
|
|
71
|
+
expect(<<-HTML.strip
|
|
72
|
+
<a href=\"#sdfootnote3anc\">Labor MP calls to end dogs</a>
|
|
73
|
+
HTML
|
|
74
|
+
).to convert_to <<-MD.strip
|
|
75
|
+
Labor MP calls to end dogs
|
|
76
|
+
MD
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
58
80
|
context "<img>" do
|
|
59
81
|
specify 'converts as ![]()' do
|
|
60
82
|
expect(<<-HTML.strip
|
|
@@ -319,4 +341,36 @@ messenger **bag** skateboard
|
|
|
319
341
|
expect(html).to convert_to("Hi\nthere")
|
|
320
342
|
end
|
|
321
343
|
end
|
|
322
|
-
|
|
344
|
+
|
|
345
|
+
context "nested unordered lists" do
|
|
346
|
+
let(:html) do
|
|
347
|
+
<<-HTML
|
|
348
|
+
<ul>
|
|
349
|
+
<ul>
|
|
350
|
+
<li>List item</li>
|
|
351
|
+
</ul>
|
|
352
|
+
</ul>
|
|
353
|
+
HTML
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
it "generates readable output" do
|
|
357
|
+
expect(html).to convert_to("* * List item")
|
|
358
|
+
end
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
context "nested ordered lists" do
|
|
362
|
+
let(:html) do
|
|
363
|
+
<<-HTML
|
|
364
|
+
<ol>
|
|
365
|
+
<ol>
|
|
366
|
+
<li>List item</li>
|
|
367
|
+
</ol>
|
|
368
|
+
</ol>
|
|
369
|
+
HTML
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
it "generates readable output" do
|
|
373
|
+
expect(html).to convert_to("1. 1. List item")
|
|
374
|
+
end
|
|
375
|
+
end
|
|
376
|
+
end
|
data/spec/errors_spec.rb
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
RSpec.describe Upmark::ParseFailed, ".ascii_tree" do
|
|
2
|
+
it "delegates to a cause object" do
|
|
3
|
+
cause = double(ascii_tree: double)
|
|
4
|
+
error = Upmark::ParseFailed.new("oh noes", cause)
|
|
5
|
+
expect(error.ascii_tree).to be(cause.ascii_tree)
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
it "returns nil when there is no cause" do
|
|
9
|
+
error = Upmark::ParseFailed.new("oh noes", nil)
|
|
10
|
+
expect(error.ascii_tree).to be_nil
|
|
11
|
+
end
|
|
12
|
+
end
|
metadata
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: upmark
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 1.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Josh Bassett
|
|
8
8
|
- Gus Gollings
|
|
9
9
|
- James Healy
|
|
10
|
-
autorequire:
|
|
10
|
+
autorequire:
|
|
11
11
|
bindir: bin
|
|
12
12
|
cert_chain: []
|
|
13
|
-
date:
|
|
13
|
+
date: 2024-04-22 00:00:00.000000000 Z
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
16
16
|
name: rspec
|
|
@@ -18,14 +18,14 @@ dependencies:
|
|
|
18
18
|
requirements:
|
|
19
19
|
- - "~>"
|
|
20
20
|
- !ruby/object:Gem::Version
|
|
21
|
-
version: '3.
|
|
21
|
+
version: '3.7'
|
|
22
22
|
type: :development
|
|
23
23
|
prerelease: false
|
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
|
25
25
|
requirements:
|
|
26
26
|
- - "~>"
|
|
27
27
|
- !ruby/object:Gem::Version
|
|
28
|
-
version: '3.
|
|
28
|
+
version: '3.7'
|
|
29
29
|
- !ruby/object:Gem::Dependency
|
|
30
30
|
name: rake
|
|
31
31
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -60,14 +60,14 @@ dependencies:
|
|
|
60
60
|
requirements:
|
|
61
61
|
- - "~>"
|
|
62
62
|
- !ruby/object:Gem::Version
|
|
63
|
-
version: 1.
|
|
63
|
+
version: 1.8.2
|
|
64
64
|
type: :runtime
|
|
65
65
|
prerelease: false
|
|
66
66
|
version_requirements: !ruby/object:Gem::Requirement
|
|
67
67
|
requirements:
|
|
68
68
|
- - "~>"
|
|
69
69
|
- !ruby/object:Gem::Version
|
|
70
|
-
version: 1.
|
|
70
|
+
version: 1.8.2
|
|
71
71
|
description: Upmark has the skills to convert your HTML to Markdown.
|
|
72
72
|
email: dev@theconversation.edu.au
|
|
73
73
|
executables:
|
|
@@ -75,11 +75,10 @@ executables:
|
|
|
75
75
|
extensions: []
|
|
76
76
|
extra_rdoc_files: []
|
|
77
77
|
files:
|
|
78
|
-
- LICENSE
|
|
78
|
+
- LICENSE.md
|
|
79
79
|
- README.md
|
|
80
80
|
- Rakefile
|
|
81
81
|
- bin/upmark
|
|
82
|
-
- lib/core_ext/array.rb
|
|
83
82
|
- lib/upmark.rb
|
|
84
83
|
- lib/upmark/errors.rb
|
|
85
84
|
- lib/upmark/parser/xml.rb
|
|
@@ -89,13 +88,14 @@ files:
|
|
|
89
88
|
- lib/upmark/transform/preprocess.rb
|
|
90
89
|
- lib/upmark/transform_helpers.rb
|
|
91
90
|
- spec/acceptance/upmark_spec.rb
|
|
91
|
+
- spec/errors_spec.rb
|
|
92
92
|
- spec/spec_helper.rb
|
|
93
93
|
- spec/unit/lib/upmark/parser/xml_spec.rb
|
|
94
94
|
- spec/unit/lib/upmark/transform/markdown_spec.rb
|
|
95
95
|
homepage: http://github.com/conversation/upmark
|
|
96
96
|
licenses: []
|
|
97
97
|
metadata: {}
|
|
98
|
-
post_install_message:
|
|
98
|
+
post_install_message:
|
|
99
99
|
rdoc_options: []
|
|
100
100
|
require_paths:
|
|
101
101
|
- lib
|
|
@@ -103,20 +103,20 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
103
103
|
requirements:
|
|
104
104
|
- - ">="
|
|
105
105
|
- !ruby/object:Gem::Version
|
|
106
|
-
version:
|
|
106
|
+
version: 1.9.3
|
|
107
107
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
108
108
|
requirements:
|
|
109
109
|
- - ">="
|
|
110
110
|
- !ruby/object:Gem::Version
|
|
111
111
|
version: '0'
|
|
112
112
|
requirements: []
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
signing_key:
|
|
113
|
+
rubygems_version: 3.4.10
|
|
114
|
+
signing_key:
|
|
116
115
|
specification_version: 4
|
|
117
116
|
summary: A HTML to Markdown converter.
|
|
118
117
|
test_files:
|
|
118
|
+
- spec/acceptance/upmark_spec.rb
|
|
119
|
+
- spec/errors_spec.rb
|
|
120
|
+
- spec/spec_helper.rb
|
|
119
121
|
- spec/unit/lib/upmark/parser/xml_spec.rb
|
|
120
122
|
- spec/unit/lib/upmark/transform/markdown_spec.rb
|
|
121
|
-
- spec/spec_helper.rb
|
|
122
|
-
- spec/acceptance/upmark_spec.rb
|