markitdown 0.0.10 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -10,6 +10,7 @@ Markitdown is a Ruby library that converts HTML to Markdown. It's powered by Nok
10
10
  * Links
11
11
  * Code (inline and blocks)
12
12
  * Definition lists
13
+ * Tables
13
14
 
14
15
  As well as other tags.
15
16
 
data/lib/markitdown.rb CHANGED
@@ -130,7 +130,25 @@ module Markitdown
130
130
  results << " `#{node.text}` "
131
131
  end
132
132
  recurse = false
133
+ when "table"
134
+ results << "\n\n"
135
+ after = "\n\n"
136
+ when "th"
137
+ results << "|"
138
+ when "td"
139
+ results << "|"
140
+ when "tr"
141
+ after = "|\n"
142
+ table = find_parent(node.parent, "table")
143
+ if table
144
+ first_row = table.xpath("//tr").first
145
+ if first_row == node
146
+ cell_count = node.xpath("//th|td").count
147
+ after << ("|---"*cell_count) + "|\n"
148
+ end
149
+ end
133
150
  end
151
+
134
152
  if recurse
135
153
  node.children.each do |child|
136
154
  contents = self.parse_node(child, states)
@@ -138,6 +156,7 @@ module Markitdown
138
156
  results << contents
139
157
  end
140
158
  end
159
+
141
160
  if strip_content
142
161
  last_tags = results.pop
143
162
  after = after.flatten.compact.join if after.is_a?(Array)
@@ -195,4 +214,10 @@ module Markitdown
195
214
  end
196
215
  result
197
216
  end
217
+
218
+ def self.find_parent(node, tag_name)
219
+ return nil unless node
220
+ return node if node.name == tag_name
221
+ find_parent(node.parent, tag_name)
222
+ end
198
223
  end
@@ -1,3 +1,3 @@
1
1
  module Markitdown
2
- VERSION = "0.0.10"
2
+ VERSION = "0.1.0"
3
3
  end
data/spec/table.html ADDED
@@ -0,0 +1,59 @@
1
+ <html>
2
+ <body>
3
+ <h1>This is a test</h1>
4
+
5
+ <p>This is only a test</p>
6
+
7
+ <h2>Had this been a real test</h2>
8
+
9
+ <p>This <a href="http://www.google.com">announcement</a> would be followed by instructions.</p>
10
+
11
+ <table>
12
+ <thead>
13
+ <tr>
14
+ <th>This</th>
15
+ <th>is</th>
16
+ <th>a</th>
17
+ <th>table</th>
18
+ </tr>
19
+ </thead>
20
+ <tbody>
21
+ <tr>
22
+ <td>This</td>
23
+ <td>is</td>
24
+ <td>a</td>
25
+ </tr>
26
+ <tr>
27
+ <td>This</td>
28
+ <td>is</td>
29
+ <td>a</td>
30
+ <td><a href="http://www.google.com">announcement</a></td>
31
+ </tr>
32
+ <tr>
33
+ <td>This</td>
34
+ <td>is</td>
35
+ <td>a</td>
36
+ <td><em>table</em></td>
37
+ </tr>
38
+ <tr>
39
+ <td>This</td>
40
+ <td>is</td>
41
+ <td>a</td>
42
+ <td><strong>table</strong></td>
43
+ </tr>
44
+ <tr>
45
+ <td>This</td>
46
+ <td>is</td>
47
+ <td>a</td>
48
+ <td>table</td>
49
+ </tr>
50
+ <tr>
51
+ <td>This</td>
52
+ <td>is</td>
53
+ <td>a</td>
54
+ <td>table</td>
55
+ </tr>
56
+ </tbody>
57
+ </table>
58
+ </body>
59
+ </html>
@@ -0,0 +1,19 @@
1
+
2
+
3
+ # This is a test
4
+
5
+ This is only a test
6
+
7
+ ## Had this been a real test
8
+
9
+ This [announcement](http://www.google.com) would be followed by instructions.
10
+
11
+ |This|is|a|table|
12
+ |---|---|---|---|
13
+ |This|is|a|
14
+ |This|is|a| [announcement](http://www.google.com) |
15
+ |This|is|a| _table_ |
16
+ |This|is|a| **table** |
17
+ |This|is|a|table|
18
+ |This|is|a|table|
19
+
data/spec/table2.html ADDED
@@ -0,0 +1,54 @@
1
+ <html>
2
+ <body>
3
+ <h1>This is a test</h1>
4
+
5
+ <p>This is only a test</p>
6
+
7
+ <h2>Had this been a real test</h2>
8
+
9
+ <p>This <a href="http://www.google.com">announcement</a> would be followed by instructions.</p>
10
+
11
+ <table>
12
+ <tr>
13
+ <th>This</th>
14
+ <th>is</th>
15
+ <th>a</th>
16
+ <th>table</th>
17
+ </tr>
18
+ <tr>
19
+ <td>This</td>
20
+ <td>is</td>
21
+ <td>a</td>
22
+ </tr>
23
+ <tr>
24
+ <td>This</td>
25
+ <td>is</td>
26
+ <td>a</td>
27
+ <td><a href="http://www.google.com">announcement</a></td>
28
+ </tr>
29
+ <tr>
30
+ <td>This</td>
31
+ <td>is</td>
32
+ <td>a</td>
33
+ <td><em>table</em></td>
34
+ </tr>
35
+ <tr>
36
+ <td>This</td>
37
+ <td>is</td>
38
+ <td>a</td>
39
+ <td><strong>table</strong></td>
40
+ </tr>
41
+ <tr>
42
+ <td>This</td>
43
+ <td>is</td>
44
+ <td>a</td>
45
+ <td>table</td>
46
+ </tr>
47
+ <tr>
48
+ <td>This</td>
49
+ <td>is</td>
50
+ <td>a</td>
51
+ <td>table</td>
52
+ </tr>
53
+ </body>
54
+ </html>
@@ -0,0 +1,19 @@
1
+
2
+
3
+ # This is a test
4
+
5
+ This is only a test
6
+
7
+ ## Had this been a real test
8
+
9
+ This [announcement](http://www.google.com) would be followed by instructions.
10
+
11
+ |This|is|a|table|
12
+ |---|---|---|---|
13
+ |This|is|a|
14
+ |This|is|a| [announcement](http://www.google.com) |
15
+ |This|is|a| _table_ |
16
+ |This|is|a| **table** |
17
+ |This|is|a|table|
18
+ |This|is|a|table|
19
+
@@ -0,0 +1,19 @@
1
+ require 'markitdown'
2
+
3
+ describe Markitdown do
4
+ context "When parsing a table with a thead and tbody" do
5
+ let(:html) { File.read("spec/table.html") }
6
+
7
+ it "should produce valid markdown" do
8
+ Markitdown.from_html(html).should == File.read("spec/table.markdown")
9
+ end
10
+ end
11
+
12
+ context "When parsing a table without a thead and tbody" do
13
+ let(:html) { File.read("spec/table2.html") }
14
+
15
+ it "should produce valid markdown" do
16
+ Markitdown.from_html(html).should == File.read("spec/table2.markdown")
17
+ end
18
+ end
19
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: markitdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-05 00:00:00.000000000 Z
12
+ date: 2013-07-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -83,6 +83,11 @@ files:
83
83
  - spec/evernote.markdown
84
84
  - spec/evernote.xml
85
85
  - spec/nesting_spec.rb
86
+ - spec/table.html
87
+ - spec/table.markdown
88
+ - spec/table2.html
89
+ - spec/table2.markdown
90
+ - spec/table_spec.rb
86
91
  - spec/tag_spec.rb
87
92
  homepage: https://github.com/cpetersen/markitdown
88
93
  licenses: []
@@ -98,7 +103,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
98
103
  version: '0'
99
104
  segments:
100
105
  - 0
101
- hash: 73152326707210871
106
+ hash: 3407872921255543985
102
107
  required_rubygems_version: !ruby/object:Gem::Requirement
103
108
  none: false
104
109
  requirements:
@@ -107,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
107
112
  version: '0'
108
113
  segments:
109
114
  - 0
110
- hash: 73152326707210871
115
+ hash: 3407872921255543985
111
116
  requirements: []
112
117
  rubyforge_project:
113
118
  rubygems_version: 1.8.23
@@ -123,4 +128,9 @@ test_files:
123
128
  - spec/evernote.markdown
124
129
  - spec/evernote.xml
125
130
  - spec/nesting_spec.rb
131
+ - spec/table.html
132
+ - spec/table.markdown
133
+ - spec/table2.html
134
+ - spec/table2.markdown
135
+ - spec/table_spec.rb
126
136
  - spec/tag_spec.rb