markitdown 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,7 +23,8 @@ module Markitdown
23
23
  states.unshift node.name.downcase
24
24
  pre = prefix(states)
25
25
  recurse = true
26
- strip_contents = false
26
+ strip_content = false
27
+ flatten_content = false
27
28
  case node.name
28
29
  when "head"
29
30
  return []
@@ -135,8 +136,10 @@ module Markitdown
135
136
  after = "\n\n"
136
137
  when "th"
137
138
  results << "|"
139
+ flatten_content = true
138
140
  when "td"
139
141
  results << "|"
142
+ flatten_content = true
140
143
  when "tr"
141
144
  after = "|\n"
142
145
  table = find_parent(node.parent, "table")
@@ -153,6 +156,7 @@ module Markitdown
153
156
  node.children.each do |child|
154
157
  contents = self.parse_node(child, states)
155
158
  contents = contents.flatten.compact.join.strip if strip_content
159
+ contents = contents.flatten.compact.join.gsub("\n", " ") if flatten_content
156
160
  results << contents
157
161
  end
158
162
  end
@@ -1,3 +1,3 @@
1
1
  module Markitdown
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -10,8 +10,8 @@
10
10
 
11
11
  <table>
12
12
  <tr>
13
- <th>This</th>
14
- <th>is</th>
13
+ <th>This <br/></th>
14
+ <th><p>is</p></th>
15
15
  <th>a</th>
16
16
  <th>table</th>
17
17
  </tr>
@@ -42,12 +42,12 @@
42
42
  <td>This</td>
43
43
  <td>is</td>
44
44
  <td>a</td>
45
- <td>table</td>
45
+ <td>table <br/></td>
46
46
  </tr>
47
47
  <tr>
48
48
  <td>This</td>
49
- <td>is</td>
50
- <td>a</td>
49
+ <td><p>is</p></td>
50
+ <td><p>a<br/></p><br/></td>
51
51
  <td>table</td>
52
52
  </tr>
53
53
  </body>
@@ -8,12 +8,12 @@ This is only a test
8
8
 
9
9
  This [announcement](http://www.google.com) would be followed by instructions.
10
10
 
11
- |This|is|a|table|
11
+ |This | is |a|table|
12
12
  |---|---|---|---|
13
13
  |This|is|a|
14
14
  |This|is|a| [announcement](http://www.google.com) |
15
15
  |This|is|a| _table_ |
16
16
  |This|is|a| **table** |
17
- |This|is|a|table|
18
- |This|is|a|table|
17
+ |This|is|a|table |
18
+ |This| is | a |table|
19
19
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: markitdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -103,7 +103,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
103
103
  version: '0'
104
104
  segments:
105
105
  - 0
106
- hash: 3407872921255543985
106
+ hash: -667411799113990729
107
107
  required_rubygems_version: !ruby/object:Gem::Requirement
108
108
  none: false
109
109
  requirements:
@@ -112,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
112
  version: '0'
113
113
  segments:
114
114
  - 0
115
- hash: 3407872921255543985
115
+ hash: -667411799113990729
116
116
  requirements: []
117
117
  rubyforge_project:
118
118
  rubygems_version: 1.8.23