markitdown 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/markitdown.rb +5 -1
- data/lib/markitdown/version.rb +1 -1
- data/spec/table2.html +5 -5
- data/spec/table2.markdown +3 -3
- metadata +3 -3
data/lib/markitdown.rb
CHANGED
@@ -23,7 +23,8 @@ module Markitdown
|
|
23
23
|
states.unshift node.name.downcase
|
24
24
|
pre = prefix(states)
|
25
25
|
recurse = true
|
26
|
-
|
26
|
+
strip_content = false
|
27
|
+
flatten_content = false
|
27
28
|
case node.name
|
28
29
|
when "head"
|
29
30
|
return []
|
@@ -135,8 +136,10 @@ module Markitdown
|
|
135
136
|
after = "\n\n"
|
136
137
|
when "th"
|
137
138
|
results << "|"
|
139
|
+
flatten_content = true
|
138
140
|
when "td"
|
139
141
|
results << "|"
|
142
|
+
flatten_content = true
|
140
143
|
when "tr"
|
141
144
|
after = "|\n"
|
142
145
|
table = find_parent(node.parent, "table")
|
@@ -153,6 +156,7 @@ module Markitdown
|
|
153
156
|
node.children.each do |child|
|
154
157
|
contents = self.parse_node(child, states)
|
155
158
|
contents = contents.flatten.compact.join.strip if strip_content
|
159
|
+
contents = contents.flatten.compact.join.gsub("\n", " ") if flatten_content
|
156
160
|
results << contents
|
157
161
|
end
|
158
162
|
end
|
data/lib/markitdown/version.rb
CHANGED
data/spec/table2.html
CHANGED
@@ -10,8 +10,8 @@
|
|
10
10
|
|
11
11
|
<table>
|
12
12
|
<tr>
|
13
|
-
<th>This
|
14
|
-
<th>is</th>
|
13
|
+
<th>This <br/></th>
|
14
|
+
<th><p>is</p></th>
|
15
15
|
<th>a</th>
|
16
16
|
<th>table</th>
|
17
17
|
</tr>
|
@@ -42,12 +42,12 @@
|
|
42
42
|
<td>This</td>
|
43
43
|
<td>is</td>
|
44
44
|
<td>a</td>
|
45
|
-
<td>table
|
45
|
+
<td>table <br/></td>
|
46
46
|
</tr>
|
47
47
|
<tr>
|
48
48
|
<td>This</td>
|
49
|
-
<td>is</td>
|
50
|
-
<td>a
|
49
|
+
<td><p>is</p></td>
|
50
|
+
<td><p>a<br/></p><br/></td>
|
51
51
|
<td>table</td>
|
52
52
|
</tr>
|
53
53
|
</body>
|
data/spec/table2.markdown
CHANGED
@@ -8,12 +8,12 @@ This is only a test
|
|
8
8
|
|
9
9
|
This [announcement](http://www.google.com) would be followed by instructions.
|
10
10
|
|
11
|
-
|This|is|a|table|
|
11
|
+
|This | is |a|table|
|
12
12
|
|---|---|---|---|
|
13
13
|
|This|is|a|
|
14
14
|
|This|is|a| [announcement](http://www.google.com) |
|
15
15
|
|This|is|a| _table_ |
|
16
16
|
|This|is|a| **table** |
|
17
|
-
|This|is|a|table|
|
18
|
-
|This|is|a|table|
|
17
|
+
|This|is|a|table |
|
18
|
+
|This| is | a |table|
|
19
19
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: markitdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -103,7 +103,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
103
103
|
version: '0'
|
104
104
|
segments:
|
105
105
|
- 0
|
106
|
-
hash:
|
106
|
+
hash: -667411799113990729
|
107
107
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
108
108
|
none: false
|
109
109
|
requirements:
|
@@ -112,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
112
|
version: '0'
|
113
113
|
segments:
|
114
114
|
- 0
|
115
|
-
hash:
|
115
|
+
hash: -667411799113990729
|
116
116
|
requirements: []
|
117
117
|
rubyforge_project:
|
118
118
|
rubygems_version: 1.8.23
|