markitdown 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -136,17 +136,19 @@ module Markitdown
136
136
  after = "\n\n"
137
137
  when "th"
138
138
  results << "|"
139
+ strip_content = true
139
140
  flatten_content = true
140
141
  when "td"
141
142
  results << "|"
143
+ strip_content = true
142
144
  flatten_content = true
143
145
  when "tr"
144
146
  after = "|\n"
145
147
  table = find_parent(node.parent, "table")
146
148
  if table
147
- first_row = table.xpath("//tr").first
149
+ first_row = table.xpath(".//tr").first
148
150
  if first_row == node
149
- cell_count = node.xpath("//th|td").count
151
+ cell_count = node.xpath(".//th|td").count
150
152
  after << ("|---"*cell_count) + "|\n"
151
153
  end
152
154
  end
@@ -156,7 +158,7 @@ module Markitdown
156
158
  node.children.each do |child|
157
159
  contents = self.parse_node(child, states)
158
160
  contents = contents.flatten.compact.join.strip if strip_content
159
- contents = contents.flatten.compact.join.gsub("\n", " ") if flatten_content
161
+ contents = [contents].flatten.compact.join.gsub("\n", " ") if flatten_content
160
162
  results << contents
161
163
  end
162
164
  end
@@ -1,3 +1,3 @@
1
1
  module Markitdown
2
- VERSION = "0.1.1"
2
+ VERSION = "0.1.2"
3
3
  end
@@ -11,9 +11,9 @@ This [announcement](http://www.google.com) would be followed by instructions.
11
11
  |This|is|a|table|
12
12
  |---|---|---|---|
13
13
  |This|is|a|
14
- |This|is|a| [announcement](http://www.google.com) |
15
- |This|is|a| _table_ |
16
- |This|is|a| **table** |
14
+ |This|is|a|[announcement](http://www.google.com)|
15
+ |This|is|a|_table_|
16
+ |This|is|a|**table**|
17
17
  |This|is|a|table|
18
18
  |This|is|a|table|
19
19
 
@@ -9,46 +9,73 @@
9
9
  <p>This <a href="http://www.google.com">announcement</a> would be followed by instructions.</p>
10
10
 
11
11
  <table>
12
- <tr>
13
- <th>This <br/></th>
14
- <th><p>is</p></th>
15
- <th>a</th>
16
- <th>table</th>
17
- </tr>
18
- <tr>
19
- <td>This</td>
20
- <td>is</td>
21
- <td>a</td>
22
- </tr>
23
- <tr>
24
- <td>This</td>
25
- <td>is</td>
26
- <td>a</td>
27
- <td><a href="http://www.google.com">announcement</a></td>
28
- </tr>
29
- <tr>
30
- <td>This</td>
31
- <td>is</td>
32
- <td>a</td>
33
- <td><em>table</em></td>
34
- </tr>
35
- <tr>
36
- <td>This</td>
37
- <td>is</td>
38
- <td>a</td>
39
- <td><strong>table</strong></td>
40
- </tr>
41
- <tr>
42
- <td>This</td>
43
- <td>is</td>
44
- <td>a</td>
45
- <td>table <br/></td>
46
- </tr>
47
- <tr>
48
- <td>This</td>
49
- <td><p>is</p></td>
50
- <td><p>a<br/></p><br/></td>
51
- <td>table</td>
52
- </tr>
12
+ <tr>
13
+ <th>This <br/></th>
14
+ <th><p>is</p></th>
15
+ <th>a</th>
16
+ <th>table</th>
17
+ </tr>
18
+ <tr>
19
+ <td>This</td>
20
+ <td>is</td>
21
+ <td>a</td>
22
+ </tr>
23
+ <tr>
24
+ <td>This</td>
25
+ <td>is</td>
26
+ <td>a</td>
27
+ <td><a href="http://www.google.com">announcement</a></td>
28
+ </tr>
29
+ <tr>
30
+ <td>This</td>
31
+ <td>is</td>
32
+ <td>a</td>
33
+ <td><em>table</em></td>
34
+ </tr>
35
+ <tr>
36
+ <td>This</td>
37
+ <td>is</td>
38
+ <td>a</td>
39
+ <td><strong>table</strong></td>
40
+ </tr>
41
+ <tr>
42
+ <td>This</td>
43
+ <td>is</td>
44
+ <td>a</td>
45
+ <td>table <br/></td>
46
+ </tr>
47
+ <tr>
48
+ <td>This</td>
49
+ <td><p>is</p></td>
50
+ <td><p>a<br/></p><br/></td>
51
+ <td>table</td>
52
+ </tr>
53
+ </table>
54
+
55
+ <p>This paragraph is just to break up the tables</p>
56
+
57
+ <table>
58
+ <tr>
59
+ <th>This</th>
60
+ <th>is</th>
61
+ <th>a</th>
62
+ <th>second</th>
63
+ <th>table</th>
64
+ </tr>
65
+ <tr>
66
+ <td>This</td>
67
+ <td>is</td>
68
+ <td>a</td>
69
+ <th>second</th>
70
+ <th>table</th>
71
+ </tr>
72
+ <tr>
73
+ <td>This</td>
74
+ <td>is</td>
75
+ <td>a</td>
76
+ <td><a href="http://www.google.com">announcement</a></td>
77
+ <th>second</th>
78
+ </tr>
79
+ </table>
53
80
  </body>
54
81
  </html>
@@ -8,12 +8,19 @@ This is only a test
8
8
 
9
9
  This [announcement](http://www.google.com) would be followed by instructions.
10
10
 
11
- |This | is |a|table|
11
+ |This|is|a|table|
12
12
  |---|---|---|---|
13
13
  |This|is|a|
14
- |This|is|a| [announcement](http://www.google.com) |
15
- |This|is|a| _table_ |
16
- |This|is|a| **table** |
17
- |This|is|a|table |
18
- |This| is | a |table|
14
+ |This|is|a|[announcement](http://www.google.com)|
15
+ |This|is|a|_table_|
16
+ |This|is|a|**table**|
17
+ |This|is|a|table|
18
+ |This|is|a|table|
19
+
20
+ This paragraph is just to break up the tables
21
+
22
+ |This|is|a|second|table|
23
+ |---|---|---|---|---|
24
+ |This|is|a|second|table|
25
+ |This|is|a|[announcement](http://www.google.com)|second|
19
26
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: markitdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -103,7 +103,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
103
103
  version: '0'
104
104
  segments:
105
105
  - 0
106
- hash: -667411799113990729
106
+ hash: 3593533597031584594
107
107
  required_rubygems_version: !ruby/object:Gem::Requirement
108
108
  none: false
109
109
  requirements:
@@ -112,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
112
  version: '0'
113
113
  segments:
114
114
  - 0
115
- hash: -667411799113990729
115
+ hash: 3593533597031584594
116
116
  requirements: []
117
117
  rubyforge_project:
118
118
  rubygems_version: 1.8.23