markitdown 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/markitdown.rb +1 -1
- data/lib/markitdown/version.rb +1 -1
- data/spec/doc.markdown +24 -0
- data/spec/doc_spec.rb +7 -23
- data/spec/evernote.markdown +7 -0
- data/spec/evernote.xml +5 -0
- metadata +9 -3
data/lib/markitdown.rb
CHANGED
@@ -11,7 +11,7 @@ module Markitdown
|
|
11
11
|
# gsub(/\n{2,}/,"\n\n") - collapse any series of more an than 2 new lines down to 2
|
12
12
|
# gsub(/\t+/," ") - collapse consecutive tabs down to a single space. I use tabs to pad divs and span, this causes multiple nested spans and divs to ultimately be surrounded by a single space.
|
13
13
|
# gsub(/ ([\.\?])/,'\1') - removes a space before a period or question mark. Things like links get surrounded by spaces. If they appear at the end of a sentence, this makes sure the punctation isn't off.
|
14
|
-
self.parse_node(node).flatten.compact.join.gsub(/\n\s+\n/,"\n\n").gsub(/\n{2,}/,"\n\n").gsub(/\t+/," ").gsub(/ ([\.\?])/,'\1')
|
14
|
+
self.parse_node(node).flatten.compact.join.gsub(/\n\s+\n/,"\n\n").gsub(/\n{2,}/,"\n\n").gsub(/( > \n){2,}/,"\n > ").gsub(/\t+/," ").gsub(/ ([\.\?])/,'\1')
|
15
15
|
end
|
16
16
|
|
17
17
|
private
|
data/lib/markitdown/version.rb
CHANGED
data/spec/doc.markdown
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
# Main Header
|
4
|
+
|
5
|
+
This *is* a **test**. It includes a [link](http://www.google.com) as well as an image ![Google Logo](https://www.google.com/images/srpr/logo3w.png)
|
6
|
+
|
7
|
+
* bullet 1
|
8
|
+
* bullet 2
|
9
|
+
* bullet 3
|
10
|
+
|
11
|
+
***
|
12
|
+
|
13
|
+
## Subheader
|
14
|
+
|
15
|
+
This is paragraph two.
|
16
|
+
|
17
|
+
1. bullet 1
|
18
|
+
* Sub-bullet 1 [Nested link](http://github.com).
|
19
|
+
1. bullet 2
|
20
|
+
1. bullet 3
|
21
|
+
|
22
|
+
This is some free text
|
23
|
+
|
24
|
+
> And here's a blockquote, right at the end.
|
data/spec/doc_spec.rb
CHANGED
@@ -5,31 +5,15 @@ describe Markitdown do
|
|
5
5
|
let(:html) { File.read("spec/doc.html") }
|
6
6
|
|
7
7
|
it "should produce valid markdown" do
|
8
|
-
Markitdown.from_html(html).should == "
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
This *is* a **test**. It includes a [link](http://www.google.com) as well as an image ![Google Logo](https://www.google.com/images/srpr/logo3w.png)
|
13
|
-
|
14
|
-
* bullet 1
|
15
|
-
* bullet 2
|
16
|
-
* bullet 3
|
17
|
-
|
18
|
-
***
|
19
|
-
|
20
|
-
## Subheader
|
21
|
-
|
22
|
-
This is paragraph two.
|
23
|
-
|
24
|
-
1. bullet 1
|
25
|
-
* Sub-bullet 1 [Nested link](http://github.com).
|
26
|
-
1. bullet 2
|
27
|
-
1. bullet 3
|
8
|
+
Markitdown.from_html(html).should == File.read("spec/doc.markdown")
|
9
|
+
end
|
10
|
+
end
|
28
11
|
|
29
|
-
|
12
|
+
context "When parsing an evernote document" do
|
13
|
+
let(:xml) { File.read("spec/evernote.xml") }
|
30
14
|
|
31
|
-
|
32
|
-
"
|
15
|
+
it "should produce valid markdown" do
|
16
|
+
Markitdown.from_html(xml).should == File.read("spec/evernote.markdown")
|
33
17
|
end
|
34
18
|
end
|
35
19
|
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
Overnight success rarely happens overnight.
|
2
|
+
|
3
|
+
>
|
4
|
+
> “I remember saying that I was just waiting for hockeystick growth. That doesn’t happen. You don’t just sit there, and all of the sudden there’s a lot of growth,” he said at PandoMonthly in San Francisco today.
|
5
|
+
> Hockey stick user adoption, like what he experienced with his next startup, Instagram, came from making a major change. Burbn was intially a check-in and communications app in a sea of a million check-in and communications apps. He realized that the thing he liked most about Burbn was the photos that the app’s 80 other active users shared. He decided to throw all his eggs in that basket, and boom — hockey stick growth.
|
6
|
+
> “We took in data about what our users were doing and focused in on that. The second we focused on what people were doing, it became a phenomenon,” he said.
|
7
|
+
>
|
data/spec/evernote.xml
ADDED
@@ -0,0 +1,5 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">
|
3
|
+
<en-note>Overnight success rarely happens overnight.<blockquote><p style="color:rgb(84, 84, 84);font-family:ars-maquette-web, 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size:15px;font-style:normal;font-variant:normal;font-weight:normal;text-align:-webkit-auto;text-decoration:none;text-indent:0px;text-rendering:auto;text-shadow:none;text-overflow:clip;text-transform:none;color-interpolation:srgb;color-interpolation-filters:linearrgb;color-rendering:auto;text-anchor:start;">“I remember saying that I was just waiting for hockeystick growth. That doesn’t happen. You don’t just sit there, and all of the sudden there’s a lot of growth,” he said at PandoMonthly in San Francisco today.</p>
|
4
|
+
<p style="color:rgb(84, 84, 84);font-family:ars-maquette-web, 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size:15px;font-style:normal;font-variant:normal;font-weight:normal;text-align:-webkit-auto;text-decoration:none;text-indent:0px;text-rendering:auto;text-shadow:none;text-overflow:clip;text-transform:none;color-interpolation:srgb;color-interpolation-filters:linearrgb;color-rendering:auto;text-anchor:start;">Hockey stick user adoption, like what he experienced with his next startup, Instagram, came from making a major change. Burbn was intially a check-in and communications app in a sea of a million check-in and communications apps. He realized that the thing he liked most about Burbn was the photos that the app’s 80 other active users shared. He decided to throw all his eggs in that basket, and boom — hockey stick growth.</p>
|
5
|
+
<p style="color:rgb(84, 84, 84);font-family:ars-maquette-web, 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size:15px;font-style:normal;font-variant:normal;font-weight:normal;text-align:-webkit-auto;text-decoration:none;text-indent:0px;text-rendering:auto;text-shadow:none;text-overflow:clip;text-transform:none;color-interpolation:srgb;color-interpolation-filters:linearrgb;color-rendering:auto;text-anchor:start;">“We took in data about what our users were doing and focused in on that. The second we focused on what people were doing, it became a phenomenon,” he said.</p></blockquote></en-note>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: markitdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -76,7 +76,10 @@ files:
|
|
76
76
|
- lib/markitdown/version.rb
|
77
77
|
- markitdown.gemspec
|
78
78
|
- spec/doc.html
|
79
|
+
- spec/doc.markdown
|
79
80
|
- spec/doc_spec.rb
|
81
|
+
- spec/evernote.markdown
|
82
|
+
- spec/evernote.xml
|
80
83
|
- spec/nesting_spec.rb
|
81
84
|
- spec/tag_spec.rb
|
82
85
|
homepage: https://github.com/cpetersen/markitdown
|
@@ -93,7 +96,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
93
96
|
version: '0'
|
94
97
|
segments:
|
95
98
|
- 0
|
96
|
-
hash:
|
99
|
+
hash: -4537345185868269635
|
97
100
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
98
101
|
none: false
|
99
102
|
requirements:
|
@@ -102,7 +105,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
102
105
|
version: '0'
|
103
106
|
segments:
|
104
107
|
- 0
|
105
|
-
hash:
|
108
|
+
hash: -4537345185868269635
|
106
109
|
requirements: []
|
107
110
|
rubyforge_project:
|
108
111
|
rubygems_version: 1.8.24
|
@@ -111,6 +114,9 @@ specification_version: 3
|
|
111
114
|
summary: Converts HTML to Markdown
|
112
115
|
test_files:
|
113
116
|
- spec/doc.html
|
117
|
+
- spec/doc.markdown
|
114
118
|
- spec/doc_spec.rb
|
119
|
+
- spec/evernote.markdown
|
120
|
+
- spec/evernote.xml
|
115
121
|
- spec/nesting_spec.rb
|
116
122
|
- spec/tag_spec.rb
|