doc2text 0.4.2 → 0.4.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: edc97441d869c8306f70a7bd3b9cd655fae7e846a872371cc72b71a541b1c743
4
- data.tar.gz: 56c2e44d3b9badcf11b4d8f86c765bfdacfd75da34f415513722e29cff980cff
3
+ metadata.gz: e803f834de30ec59e70080fa881b47e4647e467f1d443cca097aaa726d80d48a
4
+ data.tar.gz: 5ff4569486e0a8f59a089918abd5f3025e46bf3970b7e7c5d50e6d2bd38e9718
5
5
  SHA512:
6
- metadata.gz: 011a5934269c344d24251112eaee264080d7f4e7b3bec2b2cf03cd1e92c644be019d23670e5cd4e5a166ef2ecb5dc5c4a0afeed5b8ff4c2b1d2848425cb9eda6
7
- data.tar.gz: 97fb8ea67e2593b69d609ccf7aacde3590c9d16e0c7909b360fcff13d88ee8e2480e7aad14bf4dc073807ea363b4844d0c03e9617a8a1a783fffd81317e6d5c0
6
+ metadata.gz: 29766bf3c446cd231277da1d8f41f6d3e2c8c8b46e01f58acefe9b62f123646de7757680cd94687637ef06a439bb69a8066a623fc25235a97e70970d311886dd
7
+ data.tar.gz: 585d4d505d4ffa9c9885e813debe512edefda10943fdf1693388838c858830e0a4802f12716fbc956ac8b8de4c0899892d9b35a9a8fe32e1e3707ff3c8fd7e00
@@ -15,4 +15,8 @@ require 'doc2text/docx/docx'
15
15
  require 'doc2text/docx/markdown_docx_parser'
16
16
  require 'doc2text/docx/docx_xml_namespaces'
17
17
 
18
+ require 'doc2text/pptx/pptx'
19
+ require 'doc2text/pptx/markdown_pptx_parser'
20
+ require 'doc2text/pptx/pptx_xml_namespaces'
21
+
18
22
  require 'doc2text/styles_parser'
@@ -0,0 +1,55 @@
1
+ module Doc2Text
2
+ module Pptx
3
+ module XmlNodes
4
+ class Node < XmlBasedDocument::XmlNodes::Node
5
+ def self.create_node(prefix, name, parent = nil, attrs = [], markdown_odt_parser = nil)
6
+ begin
7
+ clazz = XmlNodes.const_get "#{prefix.capitalize}::W#{name}"
8
+ rescue NameError => e
9
+ # markdown_odt_parser.logger.warn "No such <#{prefix}:#{name}> found"
10
+ Generic.new(parent, attrs, prefix, name, markdown_odt_parser)
11
+ else
12
+ clazz.new(parent, attrs, prefix, name, markdown_odt_parser)
13
+ end
14
+ end
15
+
16
+ def body?
17
+ false
18
+ end
19
+ end
20
+
21
+ class PlainText < XmlBasedDocument::XmlNodes::PlainText
22
+ def body?
23
+ false
24
+ end
25
+ end
26
+
27
+ class Generic < Node
28
+ end
29
+
30
+ module W
31
+ class Wbody < Node
32
+ def body?
33
+ true
34
+ end
35
+ end
36
+
37
+ class Wbr < Node
38
+ def open
39
+ '<br/>'
40
+ end
41
+ end
42
+
43
+ class Wp < Node
44
+ def open
45
+ "\n"
46
+ end
47
+
48
+ def close
49
+ "\n"
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,30 @@
1
+ module Doc2Text
2
+ module Pptx
3
+ class Document < XmlBasedDocument::DocumentFile
4
+
5
+ def self.parse_and_save(input, output_filename)
6
+ docx = new input
7
+ begin
8
+ docx.unpack
9
+ output = File.open output_filename, 'w'
10
+ markdown = Markdown::DocxParser.new output, nil
11
+ begin
12
+ docx.parse markdown
13
+ ensure
14
+ markdown.close
15
+ end
16
+ ensure
17
+ docx.clean
18
+ end
19
+ end
20
+
21
+ def contains_extracted_files?
22
+ File.exist? File.join(extract_path, '[Content_Types].xml')
23
+ end
24
+
25
+ def extract_extension
26
+ 'unpacked_pptx'
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,55 @@
1
+ module Doc2Text
2
+ module Pptx
3
+ module XmlNodes
4
+ class Node < XmlBasedDocument::XmlNodes::Node
5
+ def self.create_node(prefix, name, parent = nil, attrs = [], markdown_odt_parser = nil)
6
+ begin
7
+ clazz = XmlNodes.const_get "#{prefix.capitalize}::W#{name}"
8
+ rescue NameError => e
9
+ # markdown_odt_parser.logger.warn "No such <#{prefix}:#{name}> found"
10
+ Generic.new(parent, attrs, prefix, name, markdown_odt_parser)
11
+ else
12
+ clazz.new(parent, attrs, prefix, name, markdown_odt_parser)
13
+ end
14
+ end
15
+
16
+ def body?
17
+ false
18
+ end
19
+ end
20
+
21
+ class PlainText < XmlBasedDocument::XmlNodes::PlainText
22
+ def body?
23
+ false
24
+ end
25
+ end
26
+
27
+ class Generic < Node
28
+ end
29
+
30
+ module W
31
+ class Wbody < Node
32
+ def body?
33
+ true
34
+ end
35
+ end
36
+
37
+ class Wbr < Node
38
+ def open
39
+ '<br/>'
40
+ end
41
+ end
42
+
43
+ class Wp < Node
44
+ def open
45
+ "\n"
46
+ end
47
+
48
+ def close
49
+ "\n"
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -4,6 +4,8 @@ module Doc2Text
4
4
  case File.extname source
5
5
  when '.docx'
6
6
  Doc2Text::Docx::Document.parse_and_save source, output
7
+ when '.pptx'
8
+ Doc2Text::Pptx::Document.parse_and_save source, output
7
9
  else
8
10
  Doc2Text::Odt::Document.parse_and_save source, output
9
11
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doc2text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
- - Valentin Aitken
7
+ - Valentin A.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-10-05 00:00:00.000000000 Z
11
+ date: 2021-01-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -16,28 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 1.10.0
19
+ version: 1.11.1
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 1.10.0
26
+ version: 1.11.1
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rubyzip
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 2.0.0
33
+ version: 2.3.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 2.0.0
40
+ version: 2.3.0
41
41
  description: Parses odt to markdown
42
42
  email: valentin@nalisbg.com
43
43
  executables:
@@ -55,6 +55,9 @@ files:
55
55
  - lib/doc2text/odt/markdown_odt_parser.rb
56
56
  - lib/doc2text/odt/odt.rb
57
57
  - lib/doc2text/odt/odt_xml_namespaces.rb
58
+ - lib/doc2text/pptx/markdown_pptx_parser.rb
59
+ - lib/doc2text/pptx/pptx.rb
60
+ - lib/doc2text/pptx/pptx_xml_namespaces.rb
58
61
  - lib/doc2text/resolution.rb
59
62
  - lib/doc2text/styles_parser.rb
60
63
  - lib/doc2text/xml_based_document_file.rb
@@ -77,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
77
80
  - !ruby/object:Gem::Version
78
81
  version: '0'
79
82
  requirements: []
80
- rubygems_version: 3.0.3
83
+ rubygems_version: 3.1.2
81
84
  signing_key:
82
85
  specification_version: 4
83
86
  summary: Translates odt to markdown