doc2text 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: edc97441d869c8306f70a7bd3b9cd655fae7e846a872371cc72b71a541b1c743
4
- data.tar.gz: 56c2e44d3b9badcf11b4d8f86c765bfdacfd75da34f415513722e29cff980cff
3
+ metadata.gz: e803f834de30ec59e70080fa881b47e4647e467f1d443cca097aaa726d80d48a
4
+ data.tar.gz: 5ff4569486e0a8f59a089918abd5f3025e46bf3970b7e7c5d50e6d2bd38e9718
5
5
  SHA512:
6
- metadata.gz: 011a5934269c344d24251112eaee264080d7f4e7b3bec2b2cf03cd1e92c644be019d23670e5cd4e5a166ef2ecb5dc5c4a0afeed5b8ff4c2b1d2848425cb9eda6
7
- data.tar.gz: 97fb8ea67e2593b69d609ccf7aacde3590c9d16e0c7909b360fcff13d88ee8e2480e7aad14bf4dc073807ea363b4844d0c03e9617a8a1a783fffd81317e6d5c0
6
+ metadata.gz: 29766bf3c446cd231277da1d8f41f6d3e2c8c8b46e01f58acefe9b62f123646de7757680cd94687637ef06a439bb69a8066a623fc25235a97e70970d311886dd
7
+ data.tar.gz: 585d4d505d4ffa9c9885e813debe512edefda10943fdf1693388838c858830e0a4802f12716fbc956ac8b8de4c0899892d9b35a9a8fe32e1e3707ff3c8fd7e00
@@ -15,4 +15,8 @@ require 'doc2text/docx/docx'
15
15
  require 'doc2text/docx/markdown_docx_parser'
16
16
  require 'doc2text/docx/docx_xml_namespaces'
17
17
 
18
+ require 'doc2text/pptx/pptx'
19
+ require 'doc2text/pptx/markdown_pptx_parser'
20
+ require 'doc2text/pptx/pptx_xml_namespaces'
21
+
18
22
  require 'doc2text/styles_parser'
@@ -0,0 +1,55 @@
1
+ module Doc2Text
2
+ module Pptx
3
+ module XmlNodes
4
+ class Node < XmlBasedDocument::XmlNodes::Node
5
+ def self.create_node(prefix, name, parent = nil, attrs = [], markdown_odt_parser = nil)
6
+ begin
7
+ clazz = XmlNodes.const_get "#{prefix.capitalize}::W#{name}"
8
+ rescue NameError => e
9
+ # markdown_odt_parser.logger.warn "No such <#{prefix}:#{name}> found"
10
+ Generic.new(parent, attrs, prefix, name, markdown_odt_parser)
11
+ else
12
+ clazz.new(parent, attrs, prefix, name, markdown_odt_parser)
13
+ end
14
+ end
15
+
16
+ def body?
17
+ false
18
+ end
19
+ end
20
+
21
+ class PlainText < XmlBasedDocument::XmlNodes::PlainText
22
+ def body?
23
+ false
24
+ end
25
+ end
26
+
27
+ class Generic < Node
28
+ end
29
+
30
+ module W
31
+ class Wbody < Node
32
+ def body?
33
+ true
34
+ end
35
+ end
36
+
37
+ class Wbr < Node
38
+ def open
39
+ '<br/>'
40
+ end
41
+ end
42
+
43
+ class Wp < Node
44
+ def open
45
+ "\n"
46
+ end
47
+
48
+ def close
49
+ "\n"
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,30 @@
1
+ module Doc2Text
2
+ module Pptx
3
+ class Document < XmlBasedDocument::DocumentFile
4
+
5
+ def self.parse_and_save(input, output_filename)
6
+ docx = new input
7
+ begin
8
+ docx.unpack
9
+ output = File.open output_filename, 'w'
10
+ markdown = Markdown::DocxParser.new output, nil
11
+ begin
12
+ docx.parse markdown
13
+ ensure
14
+ markdown.close
15
+ end
16
+ ensure
17
+ docx.clean
18
+ end
19
+ end
20
+
21
+ def contains_extracted_files?
22
+ File.exist? File.join(extract_path, '[Content_Types].xml')
23
+ end
24
+
25
+ def extract_extension
26
+ 'unpacked_pptx'
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,55 @@
1
+ module Doc2Text
2
+ module Pptx
3
+ module XmlNodes
4
+ class Node < XmlBasedDocument::XmlNodes::Node
5
+ def self.create_node(prefix, name, parent = nil, attrs = [], markdown_odt_parser = nil)
6
+ begin
7
+ clazz = XmlNodes.const_get "#{prefix.capitalize}::W#{name}"
8
+ rescue NameError => e
9
+ # markdown_odt_parser.logger.warn "No such <#{prefix}:#{name}> found"
10
+ Generic.new(parent, attrs, prefix, name, markdown_odt_parser)
11
+ else
12
+ clazz.new(parent, attrs, prefix, name, markdown_odt_parser)
13
+ end
14
+ end
15
+
16
+ def body?
17
+ false
18
+ end
19
+ end
20
+
21
+ class PlainText < XmlBasedDocument::XmlNodes::PlainText
22
+ def body?
23
+ false
24
+ end
25
+ end
26
+
27
+ class Generic < Node
28
+ end
29
+
30
+ module W
31
+ class Wbody < Node
32
+ def body?
33
+ true
34
+ end
35
+ end
36
+
37
+ class Wbr < Node
38
+ def open
39
+ '<br/>'
40
+ end
41
+ end
42
+
43
+ class Wp < Node
44
+ def open
45
+ "\n"
46
+ end
47
+
48
+ def close
49
+ "\n"
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -4,6 +4,8 @@ module Doc2Text
4
4
  case File.extname source
5
5
  when '.docx'
6
6
  Doc2Text::Docx::Document.parse_and_save source, output
7
+ when '.pptx'
8
+ Doc2Text::Pptx::Document.parse_and_save source, output
7
9
  else
8
10
  Doc2Text::Odt::Document.parse_and_save source, output
9
11
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doc2text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
- - Valentin Aitken
7
+ - Valentin A.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-10-05 00:00:00.000000000 Z
11
+ date: 2021-01-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -16,28 +16,28 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 1.10.0
19
+ version: 1.11.1
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 1.10.0
26
+ version: 1.11.1
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rubyzip
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: 2.0.0
33
+ version: 2.3.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: 2.0.0
40
+ version: 2.3.0
41
41
  description: Parses odt to markdown
42
42
  email: valentin@nalisbg.com
43
43
  executables:
@@ -55,6 +55,9 @@ files:
55
55
  - lib/doc2text/odt/markdown_odt_parser.rb
56
56
  - lib/doc2text/odt/odt.rb
57
57
  - lib/doc2text/odt/odt_xml_namespaces.rb
58
+ - lib/doc2text/pptx/markdown_pptx_parser.rb
59
+ - lib/doc2text/pptx/pptx.rb
60
+ - lib/doc2text/pptx/pptx_xml_namespaces.rb
58
61
  - lib/doc2text/resolution.rb
59
62
  - lib/doc2text/styles_parser.rb
60
63
  - lib/doc2text/xml_based_document_file.rb
@@ -77,7 +80,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
77
80
  - !ruby/object:Gem::Version
78
81
  version: '0'
79
82
  requirements: []
80
- rubygems_version: 3.0.3
83
+ rubygems_version: 3.1.2
81
84
  signing_key:
82
85
  specification_version: 4
83
86
  summary: Translates odt to markdown