doc2text 0.2 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/doc2text/markdown_odt_parser.rb +8 -16
- data/lib/doc2text/odt_xml_namespaces.rb +3 -31
- data/lib/doc2text/odt_xml_node.rb +5 -13
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 143e076cf52e7beda9fc90bf38f80b0695dac194
|
4
|
+
data.tar.gz: bc1c78cda02cd9ab1c5536a83b50ea89711b8b3d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ebfe477ca9ca9da0b1784f929ead6f7729d8e626690562043d474debda3fd9209abf6862498569eca736d0ab8e21535b0a642cd47ed9e84e37b247b3cdacd0a
|
7
|
+
data.tar.gz: 5e826d5a707999eb81f1486d60b67cd7098fb1653f324f646bb4a83b4d8069c31f8395b38fbe5d500729ebc22871575e5684e2c991d176723e396ea970adb6e3
|
@@ -19,24 +19,16 @@ module Doc2Text
|
|
19
19
|
end
|
20
20
|
|
21
21
|
def close_node(prefix, name)
|
22
|
-
if Odt::XmlNodes::Node.create_node(prefix, name, nil, [], self).eql? @current_node
|
23
|
-
|
24
|
-
|
25
|
-
# if @current_node.parent
|
26
|
-
# @output << @current_node.parent.expand
|
27
|
-
# @current_node.parent.un_delete
|
28
|
-
# else
|
29
|
-
@output << @current_node.expand
|
30
|
-
# end
|
31
|
-
end
|
32
|
-
@current_node = @current_node.parent
|
33
|
-
if @current_node && @current_node.delete_on_close?
|
22
|
+
# if Odt::XmlNodes::Node.create_node(prefix, name, nil, [], self).eql? @current_node
|
23
|
+
if @current_node.parent and @current_node.parent.office_text?
|
24
|
+
@output << @current_node.expand
|
34
25
|
@current_node.delete
|
35
26
|
end
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
27
|
+
@current_node = @current_node.parent
|
28
|
+
# else
|
29
|
+
# # TODO remove this redundant(tree build algorithm) checks
|
30
|
+
# raise Doc2Text::XmlError, "!Close node child #{prefix} #{name} IS NOT correct, CURRENT_ELEM #{@current_node}"
|
31
|
+
# end
|
40
32
|
end
|
41
33
|
|
42
34
|
def text(string)
|
@@ -24,28 +24,16 @@ module Doc2Text
|
|
24
24
|
module Office
|
25
25
|
class AutomaticStyles
|
26
26
|
include Node
|
27
|
-
|
28
|
-
def visit
|
29
|
-
:automatic_styles
|
30
|
-
end
|
31
|
-
|
32
|
-
def delete_on_close?
|
33
|
-
false
|
34
|
-
end
|
35
27
|
end
|
36
28
|
|
37
29
|
class DocumentContent
|
38
30
|
include Node
|
39
|
-
|
40
|
-
def delete_on_close?
|
41
|
-
true
|
42
|
-
end
|
43
31
|
end
|
44
32
|
|
45
33
|
class Text
|
46
34
|
include Node
|
47
35
|
|
48
|
-
def
|
36
|
+
def office_text?
|
49
37
|
true
|
50
38
|
end
|
51
39
|
end
|
@@ -69,7 +57,7 @@ module Doc2Text
|
|
69
57
|
|
70
58
|
def expand
|
71
59
|
header_delimiter = parent.children.count >= 2 && parent.children[1] == self ? "\n|---|---|" : ''
|
72
|
-
result = "\n#{@children.
|
60
|
+
result = "\n#{@children.map(&:expand).join.strip.gsub "\n", ''} |#{header_delimiter}"
|
73
61
|
delete
|
74
62
|
result
|
75
63
|
end
|
@@ -86,18 +74,10 @@ module Doc2Text
|
|
86
74
|
module Style
|
87
75
|
class Style
|
88
76
|
include Node
|
89
|
-
|
90
|
-
def delete_on_close?
|
91
|
-
false
|
92
|
-
end
|
93
77
|
end
|
94
78
|
|
95
79
|
class TextProperties
|
96
80
|
include Node
|
97
|
-
|
98
|
-
def delete_on_close?
|
99
|
-
false
|
100
|
-
end
|
101
81
|
end
|
102
82
|
end
|
103
83
|
module XslFoCompatible; end
|
@@ -197,7 +177,7 @@ module Doc2Text
|
|
197
177
|
include Text
|
198
178
|
|
199
179
|
def expand
|
200
|
-
result = "* #{@children.
|
180
|
+
result = "* #{@children.map(&:expand).join.strip.gsub /\n{2,}/, "\n"}\n"
|
201
181
|
delete
|
202
182
|
result.clone
|
203
183
|
end
|
@@ -205,10 +185,6 @@ module Doc2Text
|
|
205
185
|
def fetch_style?
|
206
186
|
false
|
207
187
|
end
|
208
|
-
|
209
|
-
def delete_on_close?
|
210
|
-
false
|
211
|
-
end
|
212
188
|
end
|
213
189
|
|
214
190
|
class List
|
@@ -232,10 +208,6 @@ module Doc2Text
|
|
232
208
|
}
|
233
209
|
end
|
234
210
|
end
|
235
|
-
|
236
|
-
def delete_on_close?
|
237
|
-
false
|
238
|
-
end
|
239
211
|
end
|
240
212
|
end
|
241
213
|
end
|
@@ -42,22 +42,14 @@ module Doc2Text
|
|
42
42
|
''
|
43
43
|
end
|
44
44
|
|
45
|
-
def
|
45
|
+
def office_text?
|
46
46
|
false
|
47
47
|
end
|
48
48
|
|
49
|
-
def not_deleted?
|
50
|
-
!@deleted
|
51
|
-
end
|
52
|
-
|
53
49
|
def delete
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
end
|
58
|
-
|
59
|
-
def un_delete
|
60
|
-
@deleted = false
|
50
|
+
return true unless @children
|
51
|
+
@children.each { |child| child.delete }
|
52
|
+
@children = []
|
61
53
|
end
|
62
54
|
|
63
55
|
def eql?(object)
|
@@ -78,7 +70,7 @@ module Doc2Text
|
|
78
70
|
end
|
79
71
|
|
80
72
|
def expand
|
81
|
-
expanded = "#{open}#{@children.
|
73
|
+
expanded = "#{open}#{@children.map(&:expand).join}#{close}"
|
82
74
|
delete
|
83
75
|
expanded.clone
|
84
76
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: doc2text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.3'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Valentin Aitken
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-18 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Parses odt to markdown
|
14
14
|
email: bostko@gmail.com
|