doc2text 0.2 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/doc2text/markdown_odt_parser.rb +8 -16
- data/lib/doc2text/odt_xml_namespaces.rb +3 -31
- data/lib/doc2text/odt_xml_node.rb +5 -13
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 143e076cf52e7beda9fc90bf38f80b0695dac194
|
4
|
+
data.tar.gz: bc1c78cda02cd9ab1c5536a83b50ea89711b8b3d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ebfe477ca9ca9da0b1784f929ead6f7729d8e626690562043d474debda3fd9209abf6862498569eca736d0ab8e21535b0a642cd47ed9e84e37b247b3cdacd0a
|
7
|
+
data.tar.gz: 5e826d5a707999eb81f1486d60b67cd7098fb1653f324f646bb4a83b4d8069c31f8395b38fbe5d500729ebc22871575e5684e2c991d176723e396ea970adb6e3
|
@@ -19,24 +19,16 @@ module Doc2Text
|
|
19
19
|
end
|
20
20
|
|
21
21
|
def close_node(prefix, name)
|
22
|
-
if Odt::XmlNodes::Node.create_node(prefix, name, nil, [], self).eql? @current_node
|
23
|
-
|
24
|
-
|
25
|
-
# if @current_node.parent
|
26
|
-
# @output << @current_node.parent.expand
|
27
|
-
# @current_node.parent.un_delete
|
28
|
-
# else
|
29
|
-
@output << @current_node.expand
|
30
|
-
# end
|
31
|
-
end
|
32
|
-
@current_node = @current_node.parent
|
33
|
-
if @current_node && @current_node.delete_on_close?
|
22
|
+
# if Odt::XmlNodes::Node.create_node(prefix, name, nil, [], self).eql? @current_node
|
23
|
+
if @current_node.parent and @current_node.parent.office_text?
|
24
|
+
@output << @current_node.expand
|
34
25
|
@current_node.delete
|
35
26
|
end
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
27
|
+
@current_node = @current_node.parent
|
28
|
+
# else
|
29
|
+
# # TODO remove this redundant(tree build algorithm) checks
|
30
|
+
# raise Doc2Text::XmlError, "!Close node child #{prefix} #{name} IS NOT correct, CURRENT_ELEM #{@current_node}"
|
31
|
+
# end
|
40
32
|
end
|
41
33
|
|
42
34
|
def text(string)
|
@@ -24,28 +24,16 @@ module Doc2Text
|
|
24
24
|
module Office
|
25
25
|
class AutomaticStyles
|
26
26
|
include Node
|
27
|
-
|
28
|
-
def visit
|
29
|
-
:automatic_styles
|
30
|
-
end
|
31
|
-
|
32
|
-
def delete_on_close?
|
33
|
-
false
|
34
|
-
end
|
35
27
|
end
|
36
28
|
|
37
29
|
class DocumentContent
|
38
30
|
include Node
|
39
|
-
|
40
|
-
def delete_on_close?
|
41
|
-
true
|
42
|
-
end
|
43
31
|
end
|
44
32
|
|
45
33
|
class Text
|
46
34
|
include Node
|
47
35
|
|
48
|
-
def
|
36
|
+
def office_text?
|
49
37
|
true
|
50
38
|
end
|
51
39
|
end
|
@@ -69,7 +57,7 @@ module Doc2Text
|
|
69
57
|
|
70
58
|
def expand
|
71
59
|
header_delimiter = parent.children.count >= 2 && parent.children[1] == self ? "\n|---|---|" : ''
|
72
|
-
result = "\n#{@children.
|
60
|
+
result = "\n#{@children.map(&:expand).join.strip.gsub "\n", ''} |#{header_delimiter}"
|
73
61
|
delete
|
74
62
|
result
|
75
63
|
end
|
@@ -86,18 +74,10 @@ module Doc2Text
|
|
86
74
|
module Style
|
87
75
|
class Style
|
88
76
|
include Node
|
89
|
-
|
90
|
-
def delete_on_close?
|
91
|
-
false
|
92
|
-
end
|
93
77
|
end
|
94
78
|
|
95
79
|
class TextProperties
|
96
80
|
include Node
|
97
|
-
|
98
|
-
def delete_on_close?
|
99
|
-
false
|
100
|
-
end
|
101
81
|
end
|
102
82
|
end
|
103
83
|
module XslFoCompatible; end
|
@@ -197,7 +177,7 @@ module Doc2Text
|
|
197
177
|
include Text
|
198
178
|
|
199
179
|
def expand
|
200
|
-
result = "* #{@children.
|
180
|
+
result = "* #{@children.map(&:expand).join.strip.gsub /\n{2,}/, "\n"}\n"
|
201
181
|
delete
|
202
182
|
result.clone
|
203
183
|
end
|
@@ -205,10 +185,6 @@ module Doc2Text
|
|
205
185
|
def fetch_style?
|
206
186
|
false
|
207
187
|
end
|
208
|
-
|
209
|
-
def delete_on_close?
|
210
|
-
false
|
211
|
-
end
|
212
188
|
end
|
213
189
|
|
214
190
|
class List
|
@@ -232,10 +208,6 @@ module Doc2Text
|
|
232
208
|
}
|
233
209
|
end
|
234
210
|
end
|
235
|
-
|
236
|
-
def delete_on_close?
|
237
|
-
false
|
238
|
-
end
|
239
211
|
end
|
240
212
|
end
|
241
213
|
end
|
@@ -42,22 +42,14 @@ module Doc2Text
|
|
42
42
|
''
|
43
43
|
end
|
44
44
|
|
45
|
-
def
|
45
|
+
def office_text?
|
46
46
|
false
|
47
47
|
end
|
48
48
|
|
49
|
-
def not_deleted?
|
50
|
-
!@deleted
|
51
|
-
end
|
52
|
-
|
53
49
|
def delete
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
end
|
58
|
-
|
59
|
-
def un_delete
|
60
|
-
@deleted = false
|
50
|
+
return true unless @children
|
51
|
+
@children.each { |child| child.delete }
|
52
|
+
@children = []
|
61
53
|
end
|
62
54
|
|
63
55
|
def eql?(object)
|
@@ -78,7 +70,7 @@ module Doc2Text
|
|
78
70
|
end
|
79
71
|
|
80
72
|
def expand
|
81
|
-
expanded = "#{open}#{@children.
|
73
|
+
expanded = "#{open}#{@children.map(&:expand).join}#{close}"
|
82
74
|
delete
|
83
75
|
expanded.clone
|
84
76
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: doc2text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.3'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Valentin Aitken
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-18 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Parses odt to markdown
|
14
14
|
email: bostko@gmail.com
|