markitdown 0.0.10 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -10,6 +10,7 @@ Markitdown is a Ruby library that converts HTML to Markdown. It's powered by Nok
10
10
  * Links
11
11
  * Code (inline and blocks)
12
12
  * Definition lists
13
+ * Tables
13
14
 
14
15
  As well as other tags.
15
16
 
data/lib/markitdown.rb CHANGED
@@ -130,7 +130,25 @@ module Markitdown
130
130
  results << " `#{node.text}` "
131
131
  end
132
132
  recurse = false
133
+ when "table"
134
+ results << "\n\n"
135
+ after = "\n\n"
136
+ when "th"
137
+ results << "|"
138
+ when "td"
139
+ results << "|"
140
+ when "tr"
141
+ after = "|\n"
142
+ table = find_parent(node.parent, "table")
143
+ if table
144
+ first_row = table.xpath("//tr").first
145
+ if first_row == node
146
+ cell_count = node.xpath("//th|td").count
147
+ after << ("|---"*cell_count) + "|\n"
148
+ end
149
+ end
133
150
  end
151
+
134
152
  if recurse
135
153
  node.children.each do |child|
136
154
  contents = self.parse_node(child, states)
@@ -138,6 +156,7 @@ module Markitdown
138
156
  results << contents
139
157
  end
140
158
  end
159
+
141
160
  if strip_content
142
161
  last_tags = results.pop
143
162
  after = after.flatten.compact.join if after.is_a?(Array)
@@ -195,4 +214,10 @@ module Markitdown
195
214
  end
196
215
  result
197
216
  end
217
+
218
+ def self.find_parent(node, tag_name)
219
+ return nil unless node
220
+ return node if node.name == tag_name
221
+ find_parent(node.parent, tag_name)
222
+ end
198
223
  end
@@ -1,3 +1,3 @@
1
1
  module Markitdown
2
- VERSION = "0.0.10"
2
+ VERSION = "0.1.0"
3
3
  end
data/spec/table.html ADDED
@@ -0,0 +1,59 @@
1
+ <html>
2
+ <body>
3
+ <h1>This is a test</h1>
4
+
5
+ <p>This is only a test</p>
6
+
7
+ <h2>Had this been a real test</h2>
8
+
9
+ <p>This <a href="http://www.google.com">announcement</a> would be followed by instructions.</p>
10
+
11
+ <table>
12
+ <thead>
13
+ <tr>
14
+ <th>This</th>
15
+ <th>is</th>
16
+ <th>a</th>
17
+ <th>table</th>
18
+ </tr>
19
+ </thead>
20
+ <tbody>
21
+ <tr>
22
+ <td>This</td>
23
+ <td>is</td>
24
+ <td>a</td>
25
+ </tr>
26
+ <tr>
27
+ <td>This</td>
28
+ <td>is</td>
29
+ <td>a</td>
30
+ <td><a href="http://www.google.com">announcement</a></td>
31
+ </tr>
32
+ <tr>
33
+ <td>This</td>
34
+ <td>is</td>
35
+ <td>a</td>
36
+ <td><em>table</em></td>
37
+ </tr>
38
+ <tr>
39
+ <td>This</td>
40
+ <td>is</td>
41
+ <td>a</td>
42
+ <td><strong>table</strong></td>
43
+ </tr>
44
+ <tr>
45
+ <td>This</td>
46
+ <td>is</td>
47
+ <td>a</td>
48
+ <td>table</td>
49
+ </tr>
50
+ <tr>
51
+ <td>This</td>
52
+ <td>is</td>
53
+ <td>a</td>
54
+ <td>table</td>
55
+ </tr>
56
+ </tbody>
57
+ </table>
58
+ </body>
59
+ </html>
@@ -0,0 +1,19 @@
1
+
2
+
3
+ # This is a test
4
+
5
+ This is only a test
6
+
7
+ ## Had this been a real test
8
+
9
+ This [announcement](http://www.google.com) would be followed by instructions.
10
+
11
+ |This|is|a|table|
12
+ |---|---|---|---|
13
+ |This|is|a|
14
+ |This|is|a| [announcement](http://www.google.com) |
15
+ |This|is|a| _table_ |
16
+ |This|is|a| **table** |
17
+ |This|is|a|table|
18
+ |This|is|a|table|
19
+
data/spec/table2.html ADDED
@@ -0,0 +1,54 @@
1
+ <html>
2
+ <body>
3
+ <h1>This is a test</h1>
4
+
5
+ <p>This is only a test</p>
6
+
7
+ <h2>Had this been a real test</h2>
8
+
9
+ <p>This <a href="http://www.google.com">announcement</a> would be followed by instructions.</p>
10
+
11
+ <table>
12
+ <tr>
13
+ <th>This</th>
14
+ <th>is</th>
15
+ <th>a</th>
16
+ <th>table</th>
17
+ </tr>
18
+ <tr>
19
+ <td>This</td>
20
+ <td>is</td>
21
+ <td>a</td>
22
+ </tr>
23
+ <tr>
24
+ <td>This</td>
25
+ <td>is</td>
26
+ <td>a</td>
27
+ <td><a href="http://www.google.com">announcement</a></td>
28
+ </tr>
29
+ <tr>
30
+ <td>This</td>
31
+ <td>is</td>
32
+ <td>a</td>
33
+ <td><em>table</em></td>
34
+ </tr>
35
+ <tr>
36
+ <td>This</td>
37
+ <td>is</td>
38
+ <td>a</td>
39
+ <td><strong>table</strong></td>
40
+ </tr>
41
+ <tr>
42
+ <td>This</td>
43
+ <td>is</td>
44
+ <td>a</td>
45
+ <td>table</td>
46
+ </tr>
47
+ <tr>
48
+ <td>This</td>
49
+ <td>is</td>
50
+ <td>a</td>
51
+ <td>table</td>
52
+ </tr>
53
+ </body>
54
+ </html>
@@ -0,0 +1,19 @@
1
+
2
+
3
+ # This is a test
4
+
5
+ This is only a test
6
+
7
+ ## Had this been a real test
8
+
9
+ This [announcement](http://www.google.com) would be followed by instructions.
10
+
11
+ |This|is|a|table|
12
+ |---|---|---|---|
13
+ |This|is|a|
14
+ |This|is|a| [announcement](http://www.google.com) |
15
+ |This|is|a| _table_ |
16
+ |This|is|a| **table** |
17
+ |This|is|a|table|
18
+ |This|is|a|table|
19
+
@@ -0,0 +1,19 @@
1
+ require 'markitdown'
2
+
3
+ describe Markitdown do
4
+ context "When parsing a table with a thead and tbody" do
5
+ let(:html) { File.read("spec/table.html") }
6
+
7
+ it "should produce valid markdown" do
8
+ Markitdown.from_html(html).should == File.read("spec/table.markdown")
9
+ end
10
+ end
11
+
12
+ context "When parsing a table without a thead and tbody" do
13
+ let(:html) { File.read("spec/table2.html") }
14
+
15
+ it "should produce valid markdown" do
16
+ Markitdown.from_html(html).should == File.read("spec/table2.markdown")
17
+ end
18
+ end
19
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: markitdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-05 00:00:00.000000000 Z
12
+ date: 2013-07-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -83,6 +83,11 @@ files:
83
83
  - spec/evernote.markdown
84
84
  - spec/evernote.xml
85
85
  - spec/nesting_spec.rb
86
+ - spec/table.html
87
+ - spec/table.markdown
88
+ - spec/table2.html
89
+ - spec/table2.markdown
90
+ - spec/table_spec.rb
86
91
  - spec/tag_spec.rb
87
92
  homepage: https://github.com/cpetersen/markitdown
88
93
  licenses: []
@@ -98,7 +103,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
98
103
  version: '0'
99
104
  segments:
100
105
  - 0
101
- hash: 73152326707210871
106
+ hash: 3407872921255543985
102
107
  required_rubygems_version: !ruby/object:Gem::Requirement
103
108
  none: false
104
109
  requirements:
@@ -107,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
107
112
  version: '0'
108
113
  segments:
109
114
  - 0
110
- hash: 73152326707210871
115
+ hash: 3407872921255543985
111
116
  requirements: []
112
117
  rubyforge_project:
113
118
  rubygems_version: 1.8.23
@@ -123,4 +128,9 @@ test_files:
123
128
  - spec/evernote.markdown
124
129
  - spec/evernote.xml
125
130
  - spec/nesting_spec.rb
131
+ - spec/table.html
132
+ - spec/table.markdown
133
+ - spec/table2.html
134
+ - spec/table2.markdown
135
+ - spec/table_spec.rb
126
136
  - spec/tag_spec.rb