markitdown 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -23,7 +23,8 @@ module Markitdown
23
23
  states.unshift node.name.downcase
24
24
  pre = prefix(states)
25
25
  recurse = true
26
- strip_contents = false
26
+ strip_content = false
27
+ flatten_content = false
27
28
  case node.name
28
29
  when "head"
29
30
  return []
@@ -135,8 +136,10 @@ module Markitdown
135
136
  after = "\n\n"
136
137
  when "th"
137
138
  results << "|"
139
+ flatten_content = true
138
140
  when "td"
139
141
  results << "|"
142
+ flatten_content = true
140
143
  when "tr"
141
144
  after = "|\n"
142
145
  table = find_parent(node.parent, "table")
@@ -153,6 +156,7 @@ module Markitdown
153
156
  node.children.each do |child|
154
157
  contents = self.parse_node(child, states)
155
158
  contents = contents.flatten.compact.join.strip if strip_content
159
+ contents = contents.flatten.compact.join.gsub("\n", " ") if flatten_content
156
160
  results << contents
157
161
  end
158
162
  end
@@ -1,3 +1,3 @@
1
1
  module Markitdown
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -10,8 +10,8 @@
10
10
 
11
11
  <table>
12
12
  <tr>
13
- <th>This</th>
14
- <th>is</th>
13
+ <th>This <br/></th>
14
+ <th><p>is</p></th>
15
15
  <th>a</th>
16
16
  <th>table</th>
17
17
  </tr>
@@ -42,12 +42,12 @@
42
42
  <td>This</td>
43
43
  <td>is</td>
44
44
  <td>a</td>
45
- <td>table</td>
45
+ <td>table <br/></td>
46
46
  </tr>
47
47
  <tr>
48
48
  <td>This</td>
49
- <td>is</td>
50
- <td>a</td>
49
+ <td><p>is</p></td>
50
+ <td><p>a<br/></p><br/></td>
51
51
  <td>table</td>
52
52
  </tr>
53
53
  </body>
@@ -8,12 +8,12 @@ This is only a test
8
8
 
9
9
  This [announcement](http://www.google.com) would be followed by instructions.
10
10
 
11
- |This|is|a|table|
11
+ |This | is |a|table|
12
12
  |---|---|---|---|
13
13
  |This|is|a|
14
14
  |This|is|a| [announcement](http://www.google.com) |
15
15
  |This|is|a| _table_ |
16
16
  |This|is|a| **table** |
17
- |This|is|a|table|
18
- |This|is|a|table|
17
+ |This|is|a|table |
18
+ |This| is | a |table|
19
19
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: markitdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -103,7 +103,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
103
103
  version: '0'
104
104
  segments:
105
105
  - 0
106
- hash: 3407872921255543985
106
+ hash: -667411799113990729
107
107
  required_rubygems_version: !ruby/object:Gem::Requirement
108
108
  none: false
109
109
  requirements:
@@ -112,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
112
112
  version: '0'
113
113
  segments:
114
114
  - 0
115
- hash: 3407872921255543985
115
+ hash: -667411799113990729
116
116
  requirements: []
117
117
  rubyforge_project:
118
118
  rubygems_version: 1.8.23