markitdown 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/markitdown.rb +17 -1
- data/lib/markitdown/version.rb +1 -1
- data/spec/doc.html +9 -1
- data/spec/doc.markdown +17 -8
- data/spec/tag_spec.rb +24 -0
- metadata +4 -4
data/lib/markitdown.rb
CHANGED
@@ -97,6 +97,18 @@ module Markitdown
|
|
97
97
|
when "li"
|
98
98
|
results << "\n"
|
99
99
|
results << pre
|
100
|
+
when "dl"
|
101
|
+
unless self.nested_list?(states)
|
102
|
+
results << self.newline(pre, nil)
|
103
|
+
after = "\n\n"
|
104
|
+
end
|
105
|
+
when "dt"
|
106
|
+
results << "\n"
|
107
|
+
results << pre
|
108
|
+
when "dd"
|
109
|
+
results << "\n"
|
110
|
+
results << pre
|
111
|
+
results << " : "
|
100
112
|
when "a"
|
101
113
|
results << " ["
|
102
114
|
after = ["](#{node.attributes["href"].value if node.attributes["href"]}) "]
|
@@ -110,7 +122,11 @@ module Markitdown
|
|
110
122
|
when "text"
|
111
123
|
results << node.text.strip.gsub("\n","").gsub(/ {2,}/," ")
|
112
124
|
when "code"
|
113
|
-
|
125
|
+
if node.text.include?("\n")
|
126
|
+
results << "\n\n #{node.text.gsub("\n","\n ")}\n\n"
|
127
|
+
else
|
128
|
+
results << " `#{node.text}` "
|
129
|
+
end
|
114
130
|
recurse = false
|
115
131
|
end
|
116
132
|
if recurse
|
data/lib/markitdown/version.rb
CHANGED
data/spec/doc.html
CHANGED
@@ -38,5 +38,13 @@
|
|
38
38
|
of
|
39
39
|
code
|
40
40
|
</code>
|
41
|
+
This is a definition list
|
42
|
+
<dl>
|
43
|
+
<dt>simple</dt>
|
44
|
+
<dd>This is a simple term and a simple definition</dd>
|
45
|
+
<dt>complicated term</dt>
|
46
|
+
<dd>This is a complicated term with a multi-line definition</dd>
|
47
|
+
<dd>The <b>second definition</b> has a some <b>bold</b> terms</dd>
|
48
|
+
</dl>
|
41
49
|
</body>
|
42
|
-
</html>
|
50
|
+
</html>
|
data/spec/doc.markdown
CHANGED
@@ -23,11 +23,20 @@ This is some free text
|
|
23
23
|
|
24
24
|
> And here's a blockquote, right at the end.
|
25
25
|
|
26
|
-
This is some more free text
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
26
|
+
This is some more free text
|
27
|
+
|
28
|
+
This
|
29
|
+
is
|
30
|
+
a
|
31
|
+
block
|
32
|
+
of
|
33
|
+
code
|
34
|
+
|
35
|
+
This is a definition list
|
36
|
+
|
37
|
+
simple
|
38
|
+
: This is a simple term and a simple definition
|
39
|
+
complicated term
|
40
|
+
: This is a complicated term with a multi-line definition
|
41
|
+
: The **second definition** has a some **bold** terms
|
42
|
+
|
data/spec/tag_spec.rb
CHANGED
@@ -155,6 +155,30 @@ describe Markitdown do
|
|
155
155
|
end
|
156
156
|
end
|
157
157
|
|
158
|
+
context "When parsing a DL" do
|
159
|
+
let(:html) { "<dl>
|
160
|
+
<dt>first term</dt>
|
161
|
+
<dd>first definition</dd>
|
162
|
+
<dt>second term</dt>
|
163
|
+
<dd>second definition</dd>
|
164
|
+
<dt>third term</dt>
|
165
|
+
<dd>third definition</dd>
|
166
|
+
</ol>"
|
167
|
+
}
|
168
|
+
it "should return valid markdown" do
|
169
|
+
Markitdown.from_html(html).should == "
|
170
|
+
|
171
|
+
first term
|
172
|
+
: first definition
|
173
|
+
second term
|
174
|
+
: second definition
|
175
|
+
third term
|
176
|
+
: third definition
|
177
|
+
|
178
|
+
"
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
158
182
|
context "When parsing a link" do
|
159
183
|
let(:html) { "<a href='http://www.google.com'>this is a link</a>" }
|
160
184
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: markitdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-05-
|
12
|
+
date: 2013-05-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -96,7 +96,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
96
96
|
version: '0'
|
97
97
|
segments:
|
98
98
|
- 0
|
99
|
-
hash:
|
99
|
+
hash: -2154247864884821099
|
100
100
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
101
|
none: false
|
102
102
|
requirements:
|
@@ -105,7 +105,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
105
105
|
version: '0'
|
106
106
|
segments:
|
107
107
|
- 0
|
108
|
-
hash:
|
108
|
+
hash: -2154247864884821099
|
109
109
|
requirements: []
|
110
110
|
rubyforge_project:
|
111
111
|
rubygems_version: 1.8.24
|