doc2text 0.4.0 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7b9b5aeaa63d276696f0f4f716242b181d8d3aef2e47861053c03f9623cdf498
4
- data.tar.gz: a9ac2a3e0314334dda782f8ce8ef0d5a0691015ae70da0cc7a7fd79b2d6d7cd2
3
+ metadata.gz: dcded1a71b2126a042cde415956262082bf8bb256102aaf8fcfe731ed125161b
4
+ data.tar.gz: af197aac8bd0bb9b40a9b9f78358e54c788f3a582acb8cb77c0e227200cfe5f4
5
5
  SHA512:
6
- metadata.gz: 88fcdc3ade712a061c62641dd7713274c676f41d37c7020348ad401e0e7af3a86b07a3726a057870396ee68a290867fccf635d3191c8376b45850507e2f566e9
7
- data.tar.gz: a96c1f4cbfbb42079f5e5d6eea757531d7d7e852e01724c40cc58f94ee5ebd27e700bdffc221cc7c7202a3686a6f3c40d8a5e153f07dcdca7aa4fd542b13eac9
6
+ metadata.gz: 4c0aed71728b8be859273e6d8b502f9007e80ab6de17666d78095f9877d4035241656c127ef705e3c7878b33cf6d26fdd00f615f8c95e4bd2998f816f584c162
7
+ data.tar.gz: 5ad185c29658a5e98537221415f77dd5efc1f9b43d28144b57be9cedd998bd5c7cf808438ae4b23f9d712f0e2bb4af6d1546897e1d5951ed9ab4e54ff4263630
@@ -21,7 +21,7 @@ module Doc2Text
21
21
 
22
22
  def end_element_namespace(name, prefix = nil, uri = nil)
23
23
  if @current_node.parent and @current_node.parent.office_text?
24
- @output << @current_node.expand
24
+ @output.write @current_node.expand
25
25
  @current_node.delete
26
26
  end
27
27
  @current_node = @current_node.parent
@@ -0,0 +1,55 @@
1
+ module Doc2Text
2
+ module Pptx
3
+ module XmlNodes
4
+ class Node < XmlBasedDocument::XmlNodes::Node
5
+ def self.create_node(prefix, name, parent = nil, attrs = [], markdown_odt_parser = nil)
6
+ begin
7
+ clazz = XmlNodes.const_get "#{prefix.capitalize}::W#{name}"
8
+ rescue NameError => e
9
+ # markdown_odt_parser.logger.warn "No such <#{prefix}:#{name}> found"
10
+ Generic.new(parent, attrs, prefix, name, markdown_odt_parser)
11
+ else
12
+ clazz.new(parent, attrs, prefix, name, markdown_odt_parser)
13
+ end
14
+ end
15
+
16
+ def body?
17
+ false
18
+ end
19
+ end
20
+
21
+ class PlainText < XmlBasedDocument::XmlNodes::PlainText
22
+ def body?
23
+ false
24
+ end
25
+ end
26
+
27
+ class Generic < Node
28
+ end
29
+
30
+ module W
31
+ class Wbody < Node
32
+ def body?
33
+ true
34
+ end
35
+ end
36
+
37
+ class Wbr < Node
38
+ def open
39
+ '<br/>'
40
+ end
41
+ end
42
+
43
+ class Wp < Node
44
+ def open
45
+ "\n"
46
+ end
47
+
48
+ def close
49
+ "\n"
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,30 @@
1
+ module Doc2Text
2
+ module Pptx
3
+ class Document < XmlBasedDocument::DocumentFile
4
+
5
+ def self.parse_and_save(input, output_filename)
6
+ docx = new input
7
+ begin
8
+ docx.unpack
9
+ output = File.open output_filename, 'w'
10
+ markdown = Markdown::DocxParser.new output, nil
11
+ begin
12
+ docx.parse markdown
13
+ ensure
14
+ markdown.close
15
+ end
16
+ ensure
17
+ docx.clean
18
+ end
19
+ end
20
+
21
+ def contains_extracted_files?
22
+ File.exist? File.join(extract_path, '[Content_Types].xml')
23
+ end
24
+
25
+ def extract_extension
26
+ 'unpacked_pptx'
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,55 @@
1
+ module Doc2Text
2
+ module Pptx
3
+ module XmlNodes
4
+ class Node < XmlBasedDocument::XmlNodes::Node
5
+ def self.create_node(prefix, name, parent = nil, attrs = [], markdown_odt_parser = nil)
6
+ begin
7
+ clazz = XmlNodes.const_get "#{prefix.capitalize}::W#{name}"
8
+ rescue NameError => e
9
+ # markdown_odt_parser.logger.warn "No such <#{prefix}:#{name}> found"
10
+ Generic.new(parent, attrs, prefix, name, markdown_odt_parser)
11
+ else
12
+ clazz.new(parent, attrs, prefix, name, markdown_odt_parser)
13
+ end
14
+ end
15
+
16
+ def body?
17
+ false
18
+ end
19
+ end
20
+
21
+ class PlainText < XmlBasedDocument::XmlNodes::PlainText
22
+ def body?
23
+ false
24
+ end
25
+ end
26
+
27
+ class Generic < Node
28
+ end
29
+
30
+ module W
31
+ class Wbody < Node
32
+ def body?
33
+ true
34
+ end
35
+ end
36
+
37
+ class Wbr < Node
38
+ def open
39
+ '<br/>'
40
+ end
41
+ end
42
+
43
+ class Wp < Node
44
+ def open
45
+ "\n"
46
+ end
47
+
48
+ def close
49
+ "\n"
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -4,6 +4,8 @@ module Doc2Text
4
4
  case File.extname source
5
5
  when '.docx'
6
6
  Doc2Text::Docx::Document.parse_and_save source, output
7
+ when '.pptx'
8
+ Doc2Text::Pptx::Document.parse_and_save source, output
7
9
  else
8
10
  Doc2Text::Odt::Document.parse_and_save source, output
9
11
  end
data/lib/doc2text.rb CHANGED
@@ -15,4 +15,8 @@ require 'doc2text/docx/docx'
15
15
  require 'doc2text/docx/markdown_docx_parser'
16
16
  require 'doc2text/docx/docx_xml_namespaces'
17
17
 
18
+ require 'doc2text/pptx/pptx'
19
+ require 'doc2text/pptx/markdown_pptx_parser'
20
+ require 'doc2text/pptx/pptx_xml_namespaces'
21
+
18
22
  require 'doc2text/styles_parser'
metadata CHANGED
@@ -1,58 +1,53 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doc2text
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.4
5
5
  platform: ruby
6
6
  authors:
7
- - Valentin Aitken
7
+ - Valentin A.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-01-14 00:00:00.000000000 Z
11
+ date: 2021-11-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '1.8'
20
17
  - - ">="
21
18
  - !ruby/object:Gem::Version
22
- version: 1.8.2
19
+ version: 1.12.5
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: 1.13.0
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
- - - "~>"
28
- - !ruby/object:Gem::Version
29
- version: '1.8'
30
27
  - - ">="
31
28
  - !ruby/object:Gem::Version
32
- version: 1.8.2
29
+ version: 1.12.5
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: 1.13.0
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: rubyzip
35
35
  requirement: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
- version: '1.2'
40
- - - ">="
41
- - !ruby/object:Gem::Version
42
- version: 1.2.2
39
+ version: 2.3.0
43
40
  type: :runtime
44
41
  prerelease: false
45
42
  version_requirements: !ruby/object:Gem::Requirement
46
43
  requirements:
47
44
  - - "~>"
48
45
  - !ruby/object:Gem::Version
49
- version: '1.2'
50
- - - ">="
51
- - !ruby/object:Gem::Version
52
- version: 1.2.2
46
+ version: 2.3.0
53
47
  description: Parses odt to markdown
54
48
  email: valentin@nalisbg.com
55
- executables: []
49
+ executables:
50
+ - doc2text
56
51
  extensions: []
57
52
  extra_rdoc_files: []
58
53
  files:
@@ -66,6 +61,9 @@ files:
66
61
  - lib/doc2text/odt/markdown_odt_parser.rb
67
62
  - lib/doc2text/odt/odt.rb
68
63
  - lib/doc2text/odt/odt_xml_namespaces.rb
64
+ - lib/doc2text/pptx/markdown_pptx_parser.rb
65
+ - lib/doc2text/pptx/pptx.rb
66
+ - lib/doc2text/pptx/pptx_xml_namespaces.rb
69
67
  - lib/doc2text/resolution.rb
70
68
  - lib/doc2text/styles_parser.rb
71
69
  - lib/doc2text/xml_based_document_file.rb
@@ -88,8 +86,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
88
86
  - !ruby/object:Gem::Version
89
87
  version: '0'
90
88
  requirements: []
91
- rubyforge_project:
92
- rubygems_version: 2.7.8
89
+ rubygems_version: 3.1.2
93
90
  signing_key:
94
91
  specification_version: 4
95
92
  summary: Translates odt to markdown