markitdown 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -11,7 +11,7 @@ module Markitdown
11
11
  # gsub(/\n{2,}/,"\n\n") - collapse any series of more an than 2 new lines down to 2
12
12
  # gsub(/\t+/," ") - collapse consecutive tabs down to a single space. I use tabs to pad divs and span, this causes multiple nested spans and divs to ultimately be surrounded by a single space.
13
13
  # gsub(/ ([\.\?])/,'\1') - removes a space before a period or question mark. Things like links get surrounded by spaces. If they appear at the end of a sentence, this makes sure the punctation isn't off.
14
- self.parse_node(node).flatten.compact.join.gsub(/\n\s+\n/,"\n\n").gsub(/\n{2,}/,"\n\n").gsub(/\t+/," ").gsub(/ ([\.\?])/,'\1')
14
+ self.parse_node(node).flatten.compact.join.gsub(/\n\s+\n/,"\n\n").gsub(/\n{2,}/,"\n\n").gsub(/( > \n){2,}/,"\n > ").gsub(/\t+/," ").gsub(/ ([\.\?])/,'\1')
15
15
  end
16
16
 
17
17
  private
@@ -1,3 +1,3 @@
1
1
  module Markitdown
2
- VERSION = "0.0.4"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -0,0 +1,24 @@
1
+
2
+
3
+ # Main Header
4
+
5
+ This *is* a **test**. It includes a [link](http://www.google.com) as well as an image ![Google Logo](https://www.google.com/images/srpr/logo3w.png)
6
+
7
+ * bullet 1
8
+ * bullet 2
9
+ * bullet 3
10
+
11
+ ***
12
+
13
+ ## Subheader
14
+
15
+ This is paragraph two.
16
+
17
+ 1. bullet 1
18
+ * Sub-bullet 1 [Nested link](http://github.com).
19
+ 1. bullet 2
20
+ 1. bullet 3
21
+
22
+ This is some free text
23
+
24
+ > And here's a blockquote, right at the end.
@@ -5,31 +5,15 @@ describe Markitdown do
5
5
  let(:html) { File.read("spec/doc.html") }
6
6
 
7
7
  it "should produce valid markdown" do
8
- Markitdown.from_html(html).should == "
9
-
10
- # Main Header
11
-
12
- This *is* a **test**. It includes a [link](http://www.google.com) as well as an image ![Google Logo](https://www.google.com/images/srpr/logo3w.png)
13
-
14
- * bullet 1
15
- * bullet 2
16
- * bullet 3
17
-
18
- ***
19
-
20
- ## Subheader
21
-
22
- This is paragraph two.
23
-
24
- 1. bullet 1
25
- * Sub-bullet 1 [Nested link](http://github.com).
26
- 1. bullet 2
27
- 1. bullet 3
8
+ Markitdown.from_html(html).should == File.read("spec/doc.markdown")
9
+ end
10
+ end
28
11
 
29
- This is some free text
12
+ context "When parsing an evernote document" do
13
+ let(:xml) { File.read("spec/evernote.xml") }
30
14
 
31
- > And here's a blockquote, right at the end.
32
- "
15
+ it "should produce valid markdown" do
16
+ Markitdown.from_html(xml).should == File.read("spec/evernote.markdown")
33
17
  end
34
18
  end
35
19
  end
@@ -0,0 +1,7 @@
1
+ Overnight success rarely happens overnight.
2
+
3
+ >
4
+ > “I remember saying that I was just waiting for hockeystick growth. That doesn’t happen. You don’t just sit there, and all of the sudden there’s a lot of growth,” he said at PandoMonthly in San Francisco today.
5
+ > Hockey stick user adoption, like what he experienced with his next startup, Instagram, came from making a major change. Burbn was intially a check-in and communications app in a sea of a million check-in and communications apps. He realized that the thing he liked most about Burbn was the photos that the app’s 80 other active users shared. He decided to throw all his eggs in that basket, and boom — hockey stick growth.
6
+ > “We took in data about what our users were doing and focused in on that. The second we focused on what people were doing, it became a phenomenon,” he said.
7
+ >
@@ -0,0 +1,5 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">
3
+ <en-note>Overnight success rarely happens overnight.<blockquote><p style="color:rgb(84, 84, 84);font-family:ars-maquette-web, 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size:15px;font-style:normal;font-variant:normal;font-weight:normal;text-align:-webkit-auto;text-decoration:none;text-indent:0px;text-rendering:auto;text-shadow:none;text-overflow:clip;text-transform:none;color-interpolation:srgb;color-interpolation-filters:linearrgb;color-rendering:auto;text-anchor:start;">“I remember saying that I was just waiting for hockeystick growth. That doesn’t happen. You don’t just sit there, and all of the sudden there’s a lot of growth,” he said at PandoMonthly in San Francisco today.</p>
4
+ <p style="color:rgb(84, 84, 84);font-family:ars-maquette-web, 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size:15px;font-style:normal;font-variant:normal;font-weight:normal;text-align:-webkit-auto;text-decoration:none;text-indent:0px;text-rendering:auto;text-shadow:none;text-overflow:clip;text-transform:none;color-interpolation:srgb;color-interpolation-filters:linearrgb;color-rendering:auto;text-anchor:start;">Hockey stick user adoption, like what he experienced with his next startup, Instagram, came from making a major change. Burbn was intially a check-in and communications app in a sea of a million check-in and communications apps. He realized that the thing he liked most about Burbn was the photos that the app’s 80 other active users shared. He decided to throw all his eggs in that basket, and boom — hockey stick growth.</p>
5
+ <p style="color:rgb(84, 84, 84);font-family:ars-maquette-web, 'Helvetica Neue', Helvetica, Arial, sans-serif;font-size:15px;font-style:normal;font-variant:normal;font-weight:normal;text-align:-webkit-auto;text-decoration:none;text-indent:0px;text-rendering:auto;text-shadow:none;text-overflow:clip;text-transform:none;color-interpolation:srgb;color-interpolation-filters:linearrgb;color-rendering:auto;text-anchor:start;">“We took in data about what our users were doing and focused in on that. The second we focused on what people were doing, it became a phenomenon,” he said.</p></blockquote></en-note>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: markitdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -76,7 +76,10 @@ files:
76
76
  - lib/markitdown/version.rb
77
77
  - markitdown.gemspec
78
78
  - spec/doc.html
79
+ - spec/doc.markdown
79
80
  - spec/doc_spec.rb
81
+ - spec/evernote.markdown
82
+ - spec/evernote.xml
80
83
  - spec/nesting_spec.rb
81
84
  - spec/tag_spec.rb
82
85
  homepage: https://github.com/cpetersen/markitdown
@@ -93,7 +96,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
93
96
  version: '0'
94
97
  segments:
95
98
  - 0
96
- hash: 73619145748567662
99
+ hash: -4537345185868269635
97
100
  required_rubygems_version: !ruby/object:Gem::Requirement
98
101
  none: false
99
102
  requirements:
@@ -102,7 +105,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
102
105
  version: '0'
103
106
  segments:
104
107
  - 0
105
- hash: 73619145748567662
108
+ hash: -4537345185868269635
106
109
  requirements: []
107
110
  rubyforge_project:
108
111
  rubygems_version: 1.8.24
@@ -111,6 +114,9 @@ specification_version: 3
111
114
  summary: Converts HTML to Markdown
112
115
  test_files:
113
116
  - spec/doc.html
117
+ - spec/doc.markdown
114
118
  - spec/doc_spec.rb
119
+ - spec/evernote.markdown
120
+ - spec/evernote.xml
115
121
  - spec/nesting_spec.rb
116
122
  - spec/tag_spec.rb