doc2text 0.4.0 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7b9b5aeaa63d276696f0f4f716242b181d8d3aef2e47861053c03f9623cdf498
4
- data.tar.gz: a9ac2a3e0314334dda782f8ce8ef0d5a0691015ae70da0cc7a7fd79b2d6d7cd2
3
+ metadata.gz: dcded1a71b2126a042cde415956262082bf8bb256102aaf8fcfe731ed125161b
4
+ data.tar.gz: af197aac8bd0bb9b40a9b9f78358e54c788f3a582acb8cb77c0e227200cfe5f4
5
5
  SHA512:
6
- metadata.gz: 88fcdc3ade712a061c62641dd7713274c676f41d37c7020348ad401e0e7af3a86b07a3726a057870396ee68a290867fccf635d3191c8376b45850507e2f566e9
7
- data.tar.gz: a96c1f4cbfbb42079f5e5d6eea757531d7d7e852e01724c40cc58f94ee5ebd27e700bdffc221cc7c7202a3686a6f3c40d8a5e153f07dcdca7aa4fd542b13eac9
6
+ metadata.gz: 4c0aed71728b8be859273e6d8b502f9007e80ab6de17666d78095f9877d4035241656c127ef705e3c7878b33cf6d26fdd00f615f8c95e4bd2998f816f584c162
7
+ data.tar.gz: 5ad185c29658a5e98537221415f77dd5efc1f9b43d28144b57be9cedd998bd5c7cf808438ae4b23f9d712f0e2bb4af6d1546897e1d5951ed9ab4e54ff4263630
@@ -21,7 +21,7 @@ module Doc2Text
21
21
 
22
22
  def end_element_namespace(name, prefix = nil, uri = nil)
23
23
  if @current_node.parent and @current_node.parent.office_text?
24
- @output << @current_node.expand
24
+ @output.write @current_node.expand
25
25
  @current_node.delete
26
26
  end
27
27
  @current_node = @current_node.parent
@@ -0,0 +1,55 @@
1
+ module Doc2Text
2
+ module Pptx
3
+ module XmlNodes
4
+ class Node < XmlBasedDocument::XmlNodes::Node
5
+ def self.create_node(prefix, name, parent = nil, attrs = [], markdown_odt_parser = nil)
6
+ begin
7
+ clazz = XmlNodes.const_get "#{prefix.capitalize}::W#{name}"
8
+ rescue NameError => e
9
+ # markdown_odt_parser.logger.warn "No such <#{prefix}:#{name}> found"
10
+ Generic.new(parent, attrs, prefix, name, markdown_odt_parser)
11
+ else
12
+ clazz.new(parent, attrs, prefix, name, markdown_odt_parser)
13
+ end
14
+ end
15
+
16
+ def body?
17
+ false
18
+ end
19
+ end
20
+
21
+ class PlainText < XmlBasedDocument::XmlNodes::PlainText
22
+ def body?
23
+ false
24
+ end
25
+ end
26
+
27
+ class Generic < Node
28
+ end
29
+
30
+ module W
31
+ class Wbody < Node
32
+ def body?
33
+ true
34
+ end
35
+ end
36
+
37
+ class Wbr < Node
38
+ def open
39
+ '<br/>'
40
+ end
41
+ end
42
+
43
+ class Wp < Node
44
+ def open
45
+ "\n"
46
+ end
47
+
48
+ def close
49
+ "\n"
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,30 @@
1
+ module Doc2Text
2
+ module Pptx
3
+ class Document < XmlBasedDocument::DocumentFile
4
+
5
+ def self.parse_and_save(input, output_filename)
6
+ docx = new input
7
+ begin
8
+ docx.unpack
9
+ output = File.open output_filename, 'w'
10
+ markdown = Markdown::DocxParser.new output, nil
11
+ begin
12
+ docx.parse markdown
13
+ ensure
14
+ markdown.close
15
+ end
16
+ ensure
17
+ docx.clean
18
+ end
19
+ end
20
+
21
+ def contains_extracted_files?
22
+ File.exist? File.join(extract_path, '[Content_Types].xml')
23
+ end
24
+
25
+ def extract_extension
26
+ 'unpacked_pptx'
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,55 @@
1
+ module Doc2Text
2
+ module Pptx
3
+ module XmlNodes
4
+ class Node < XmlBasedDocument::XmlNodes::Node
5
+ def self.create_node(prefix, name, parent = nil, attrs = [], markdown_odt_parser = nil)
6
+ begin
7
+ clazz = XmlNodes.const_get "#{prefix.capitalize}::W#{name}"
8
+ rescue NameError => e
9
+ # markdown_odt_parser.logger.warn "No such <#{prefix}:#{name}> found"
10
+ Generic.new(parent, attrs, prefix, name, markdown_odt_parser)
11
+ else
12
+ clazz.new(parent, attrs, prefix, name, markdown_odt_parser)
13
+ end
14
+ end
15
+
16
+ def body?
17
+ false
18
+ end
19
+ end
20
+
21
+ class PlainText < XmlBasedDocument::XmlNodes::PlainText
22
+ def body?
23
+ false
24
+ end
25
+ end
26
+
27
+ class Generic < Node
28
+ end
29
+
30
+ module W
31
+ class Wbody < Node
32
+ def body?
33
+ true
34
+ end
35
+ end
36
+
37
+ class Wbr < Node
38
+ def open
39
+ '<br/>'
40
+ end
41
+ end
42
+
43
+ class Wp < Node
44
+ def open
45
+ "\n"
46
+ end
47
+
48
+ def close
49
+ "\n"
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -4,6 +4,8 @@ module Doc2Text
4
4
  case File.extname source
5
5
  when '.docx'
6
6
  Doc2Text::Docx::Document.parse_and_save source, output
7
+ when '.pptx'
8
+ Doc2Text::Pptx::Document.parse_and_save source, output
7
9
  else
8
10
  Doc2Text::Odt::Document.parse_and_save source, output
9
11
  end
data/lib/doc2text.rb CHANGED
@@ -15,4 +15,8 @@ require 'doc2text/docx/docx'
15
15
  require 'doc2text/docx/markdown_docx_parser'
16
16
  require 'doc2text/docx/docx_xml_namespaces'
17
17
 
18
+ require 'doc2text/pptx/pptx'
19
+ require 'doc2text/pptx/markdown_pptx_parser'
20
+ require 'doc2text/pptx/pptx_xml_namespaces'
21
+
18
22
  require 'doc2text/styles_parser'
metadata CHANGED
@@ -1,58 +1,53 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doc2text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.4
5
5
  platform: ruby
6
6
  authors:
7
- - Valentin Aitken
7
+ - Valentin A.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-01-14 00:00:00.000000000 Z
11
+ date: 2021-11-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '1.8'
20
17
  - - ">="
21
18
  - !ruby/object:Gem::Version
22
- version: 1.8.2
19
+ version: 1.12.5
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: 1.13.0
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
- - - "~>"
28
- - !ruby/object:Gem::Version
29
- version: '1.8'
30
27
  - - ">="
31
28
  - !ruby/object:Gem::Version
32
- version: 1.8.2
29
+ version: 1.12.5
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: 1.13.0
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: rubyzip
35
35
  requirement: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
- version: '1.2'
40
- - - ">="
41
- - !ruby/object:Gem::Version
42
- version: 1.2.2
39
+ version: 2.3.0
43
40
  type: :runtime
44
41
  prerelease: false
45
42
  version_requirements: !ruby/object:Gem::Requirement
46
43
  requirements:
47
44
  - - "~>"
48
45
  - !ruby/object:Gem::Version
49
- version: '1.2'
50
- - - ">="
51
- - !ruby/object:Gem::Version
52
- version: 1.2.2
46
+ version: 2.3.0
53
47
  description: Parses odt to markdown
54
48
  email: valentin@nalisbg.com
55
- executables: []
49
+ executables:
50
+ - doc2text
56
51
  extensions: []
57
52
  extra_rdoc_files: []
58
53
  files:
@@ -66,6 +61,9 @@ files:
66
61
  - lib/doc2text/odt/markdown_odt_parser.rb
67
62
  - lib/doc2text/odt/odt.rb
68
63
  - lib/doc2text/odt/odt_xml_namespaces.rb
64
+ - lib/doc2text/pptx/markdown_pptx_parser.rb
65
+ - lib/doc2text/pptx/pptx.rb
66
+ - lib/doc2text/pptx/pptx_xml_namespaces.rb
69
67
  - lib/doc2text/resolution.rb
70
68
  - lib/doc2text/styles_parser.rb
71
69
  - lib/doc2text/xml_based_document_file.rb
@@ -88,8 +86,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
88
86
  - !ruby/object:Gem::Version
89
87
  version: '0'
90
88
  requirements: []
91
- rubyforge_project:
92
- rubygems_version: 2.7.8
89
+ rubygems_version: 3.1.2
93
90
  signing_key:
94
91
  specification_version: 4
95
92
  summary: Translates odt to markdown