doc2text 0.4.2 → 0.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/doc2text.rb +4 -0
- data/lib/doc2text/pptx/markdown_pptx_parser.rb +55 -0
- data/lib/doc2text/pptx/pptx.rb +30 -0
- data/lib/doc2text/pptx/pptx_xml_namespaces.rb +55 -0
- data/lib/doc2text/resolution.rb +2 -0
- metadata +11 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e803f834de30ec59e70080fa881b47e4647e467f1d443cca097aaa726d80d48a
|
4
|
+
data.tar.gz: 5ff4569486e0a8f59a089918abd5f3025e46bf3970b7e7c5d50e6d2bd38e9718
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 29766bf3c446cd231277da1d8f41f6d3e2c8c8b46e01f58acefe9b62f123646de7757680cd94687637ef06a439bb69a8066a623fc25235a97e70970d311886dd
|
7
|
+
data.tar.gz: 585d4d505d4ffa9c9885e813debe512edefda10943fdf1693388838c858830e0a4802f12716fbc956ac8b8de4c0899892d9b35a9a8fe32e1e3707ff3c8fd7e00
|
data/lib/doc2text.rb
CHANGED
@@ -15,4 +15,8 @@ require 'doc2text/docx/docx'
|
|
15
15
|
require 'doc2text/docx/markdown_docx_parser'
|
16
16
|
require 'doc2text/docx/docx_xml_namespaces'
|
17
17
|
|
18
|
+
require 'doc2text/pptx/pptx'
|
19
|
+
require 'doc2text/pptx/markdown_pptx_parser'
|
20
|
+
require 'doc2text/pptx/pptx_xml_namespaces'
|
21
|
+
|
18
22
|
require 'doc2text/styles_parser'
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Doc2Text
|
2
|
+
module Pptx
|
3
|
+
module XmlNodes
|
4
|
+
class Node < XmlBasedDocument::XmlNodes::Node
|
5
|
+
def self.create_node(prefix, name, parent = nil, attrs = [], markdown_odt_parser = nil)
|
6
|
+
begin
|
7
|
+
clazz = XmlNodes.const_get "#{prefix.capitalize}::W#{name}"
|
8
|
+
rescue NameError => e
|
9
|
+
# markdown_odt_parser.logger.warn "No such <#{prefix}:#{name}> found"
|
10
|
+
Generic.new(parent, attrs, prefix, name, markdown_odt_parser)
|
11
|
+
else
|
12
|
+
clazz.new(parent, attrs, prefix, name, markdown_odt_parser)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def body?
|
17
|
+
false
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class PlainText < XmlBasedDocument::XmlNodes::PlainText
|
22
|
+
def body?
|
23
|
+
false
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
class Generic < Node
|
28
|
+
end
|
29
|
+
|
30
|
+
module W
|
31
|
+
class Wbody < Node
|
32
|
+
def body?
|
33
|
+
true
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class Wbr < Node
|
38
|
+
def open
|
39
|
+
'<br/>'
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class Wp < Node
|
44
|
+
def open
|
45
|
+
"\n"
|
46
|
+
end
|
47
|
+
|
48
|
+
def close
|
49
|
+
"\n"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Doc2Text
|
2
|
+
module Pptx
|
3
|
+
class Document < XmlBasedDocument::DocumentFile
|
4
|
+
|
5
|
+
def self.parse_and_save(input, output_filename)
|
6
|
+
docx = new input
|
7
|
+
begin
|
8
|
+
docx.unpack
|
9
|
+
output = File.open output_filename, 'w'
|
10
|
+
markdown = Markdown::DocxParser.new output, nil
|
11
|
+
begin
|
12
|
+
docx.parse markdown
|
13
|
+
ensure
|
14
|
+
markdown.close
|
15
|
+
end
|
16
|
+
ensure
|
17
|
+
docx.clean
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def contains_extracted_files?
|
22
|
+
File.exist? File.join(extract_path, '[Content_Types].xml')
|
23
|
+
end
|
24
|
+
|
25
|
+
def extract_extension
|
26
|
+
'unpacked_pptx'
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Doc2Text
|
2
|
+
module Pptx
|
3
|
+
module XmlNodes
|
4
|
+
class Node < XmlBasedDocument::XmlNodes::Node
|
5
|
+
def self.create_node(prefix, name, parent = nil, attrs = [], markdown_odt_parser = nil)
|
6
|
+
begin
|
7
|
+
clazz = XmlNodes.const_get "#{prefix.capitalize}::W#{name}"
|
8
|
+
rescue NameError => e
|
9
|
+
# markdown_odt_parser.logger.warn "No such <#{prefix}:#{name}> found"
|
10
|
+
Generic.new(parent, attrs, prefix, name, markdown_odt_parser)
|
11
|
+
else
|
12
|
+
clazz.new(parent, attrs, prefix, name, markdown_odt_parser)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def body?
|
17
|
+
false
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
class PlainText < XmlBasedDocument::XmlNodes::PlainText
|
22
|
+
def body?
|
23
|
+
false
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
class Generic < Node
|
28
|
+
end
|
29
|
+
|
30
|
+
module W
|
31
|
+
class Wbody < Node
|
32
|
+
def body?
|
33
|
+
true
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class Wbr < Node
|
38
|
+
def open
|
39
|
+
'<br/>'
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class Wp < Node
|
44
|
+
def open
|
45
|
+
"\n"
|
46
|
+
end
|
47
|
+
|
48
|
+
def close
|
49
|
+
"\n"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/doc2text/resolution.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: doc2text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
- Valentin
|
7
|
+
- Valentin A.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -16,28 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 1.
|
19
|
+
version: 1.11.1
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 1.
|
26
|
+
version: 1.11.1
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rubyzip
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 2.
|
33
|
+
version: 2.3.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 2.
|
40
|
+
version: 2.3.0
|
41
41
|
description: Parses odt to markdown
|
42
42
|
email: valentin@nalisbg.com
|
43
43
|
executables:
|
@@ -55,6 +55,9 @@ files:
|
|
55
55
|
- lib/doc2text/odt/markdown_odt_parser.rb
|
56
56
|
- lib/doc2text/odt/odt.rb
|
57
57
|
- lib/doc2text/odt/odt_xml_namespaces.rb
|
58
|
+
- lib/doc2text/pptx/markdown_pptx_parser.rb
|
59
|
+
- lib/doc2text/pptx/pptx.rb
|
60
|
+
- lib/doc2text/pptx/pptx_xml_namespaces.rb
|
58
61
|
- lib/doc2text/resolution.rb
|
59
62
|
- lib/doc2text/styles_parser.rb
|
60
63
|
- lib/doc2text/xml_based_document_file.rb
|
@@ -77,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
80
|
- !ruby/object:Gem::Version
|
78
81
|
version: '0'
|
79
82
|
requirements: []
|
80
|
-
rubygems_version: 3.
|
83
|
+
rubygems_version: 3.1.2
|
81
84
|
signing_key:
|
82
85
|
specification_version: 4
|
83
86
|
summary: Translates odt to markdown
|