ruby_powerpoint 1.4.2 → 1.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/ruby_powerpoint.rb +1 -0
- data/lib/ruby_powerpoint/paragraph.rb +19 -0
- data/lib/ruby_powerpoint/slide.rb +29 -9
- data/lib/ruby_powerpoint/version.rb +1 -1
- data/spec/fixtures/sample.pptx +0 -0
- data/spec/test_spec.rb +27 -7
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZDQ5OTcyMTU2OTg4M2JjN2JkYzI4MjZjZGM0NGFhMzk0MTMzOGRkNA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
M2I2ZDgxZGM3OWRhNDBkYmZjYTgyNWQwNTA0MTI5ZmUwMzQ5YTE3Nw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
M2I4ZWZlOWFhMDRjN2E0ZDEzYjExMDY0ZTc4NjM1NzYyNGFiMWY3MmNlN2Ix
|
10
|
+
MDJlMDZjYmRhNDNiYWNkYTcxNmM4NDM3ZDU0OTExMzBjOWE1NDQ2YmRhNDU4
|
11
|
+
NGUxNWYxOWQ4MDQ5MDI1NWY1MjM4YTM3M2JkZTg0MWVkNzBiNTE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZWI2ZjA1YTIyZTllODBjYmQ2MGRmZTMzMjJjMDgxOTJiODQ2MGNiOTgzMWQz
|
14
|
+
YzgwODk1NzgyMWI3NmYxN2RjYTZkOTdkMTIwYmRhOGNkODY1ODlmYWIxZTAw
|
15
|
+
Mjk4ZGNmYmM2NmJiZDJjNTQ0ZjI5YzE1ZmIzNzMxOTY2ZGIzN2Q=
|
data/lib/ruby_powerpoint.rb
CHANGED
@@ -0,0 +1,19 @@
|
|
1
|
+
module RubyPowerpoint
|
2
|
+
class RubyPowerpoint::Paragraph
|
3
|
+
def initialize slide, paragraph_xml
|
4
|
+
@slide = slide
|
5
|
+
@presentation = slide.presentation
|
6
|
+
@paragraph_xml = paragraph_xml
|
7
|
+
end
|
8
|
+
|
9
|
+
def content
|
10
|
+
content_element @paragraph_xml
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def content_element(xml)
|
16
|
+
xml.xpath('.//a:t').collect{ |node| node.text }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -13,16 +13,24 @@ module RubyPowerpoint
|
|
13
13
|
@presentation = presentation
|
14
14
|
@slide_xml_path = slide_xml_path
|
15
15
|
@slide_number = extract_slide_number_from_path slide_xml_path
|
16
|
+
@slide_notes_xml_path = "ppt/notesSlides/notesSlide#{@slide_number}.xml"
|
16
17
|
@slide_file_name = extract_slide_file_name_from_path slide_xml_path
|
18
|
+
|
17
19
|
parse_slide
|
20
|
+
parse_slide_notes
|
18
21
|
parse_relation
|
19
22
|
end
|
20
23
|
|
21
|
-
def parse_slide
|
24
|
+
def parse_slide
|
22
25
|
slide_doc = @presentation.files.file.open @slide_xml_path
|
23
26
|
@slide_xml = Nokogiri::XML::Document.parse slide_doc
|
24
27
|
end
|
25
28
|
|
29
|
+
def parse_slide_notes
|
30
|
+
slide_notes_doc = @presentation.files.file.open @slide_notes_xml_path rescue nil
|
31
|
+
@slide_notes_xml = Nokogiri::XML::Document.parse(slide_notes_doc) if slide_notes_doc
|
32
|
+
end
|
33
|
+
|
26
34
|
def parse_relation
|
27
35
|
@relation_xml_path = "ppt/slides/_rels/#{@slide_file_name}.rels"
|
28
36
|
if @presentation.files.file.exist? @relation_xml_path
|
@@ -34,7 +42,11 @@ module RubyPowerpoint
|
|
34
42
|
def content
|
35
43
|
content_elements @slide_xml
|
36
44
|
end
|
37
|
-
|
45
|
+
|
46
|
+
def notes_content
|
47
|
+
content_elements @slide_notes_xml
|
48
|
+
end
|
49
|
+
|
38
50
|
def title
|
39
51
|
title_elements = title_elements(@slide_xml)
|
40
52
|
title_elements.join(" ") if title_elements.length > 0
|
@@ -47,11 +59,15 @@ module RubyPowerpoint
|
|
47
59
|
node['Target'].gsub('..', 'ppt'))
|
48
60
|
end
|
49
61
|
end
|
50
|
-
|
62
|
+
|
51
63
|
def slide_num
|
52
64
|
@slide_xml_path.match(/slide([0-9]*)\.xml$/)[1].to_i
|
53
65
|
end
|
54
|
-
|
66
|
+
|
67
|
+
def paragraphs
|
68
|
+
paragraph_element @slide_xml
|
69
|
+
end
|
70
|
+
|
55
71
|
private
|
56
72
|
|
57
73
|
def extract_slide_number_from_path path
|
@@ -65,25 +81,29 @@ module RubyPowerpoint
|
|
65
81
|
def title_elements(xml)
|
66
82
|
shape_elements(xml).select{ |shape| element_is_title(shape) }
|
67
83
|
end
|
68
|
-
|
84
|
+
|
69
85
|
def content_elements(xml)
|
70
86
|
xml.xpath('//a:t').collect{ |node| node.text }
|
71
87
|
end
|
72
88
|
|
73
89
|
def image_elements(xml)
|
74
90
|
xml.css('Relationship').select{ |node| element_is_image(node) }
|
75
|
-
end
|
91
|
+
end
|
76
92
|
|
77
93
|
def shape_elements(xml)
|
78
94
|
xml.xpath('//p:sp')
|
79
|
-
end
|
80
|
-
|
95
|
+
end
|
96
|
+
|
97
|
+
def paragraph_element(xml)
|
98
|
+
xml.xpath('//a:p').collect{ |node| RubyPowerpoint::Paragraph.new(self, node) }
|
99
|
+
end
|
100
|
+
|
81
101
|
def element_is_title(shape)
|
82
102
|
shape.xpath('.//p:nvSpPr/p:nvPr/p:ph').select{ |prop| prop['type'] == 'title' || prop['type'] == 'ctrTitle' }.length > 0
|
83
103
|
end
|
84
104
|
|
85
105
|
def element_is_image(node)
|
86
|
-
node['Type'].include? 'image'
|
106
|
+
node['Type'].include? 'image'
|
87
107
|
end
|
88
108
|
end
|
89
109
|
end
|
data/spec/fixtures/sample.pptx
CHANGED
Binary file
|
data/spec/test_spec.rb
CHANGED
@@ -24,11 +24,17 @@ describe 'RubyPowerpoint parsing a sample PPTX file' do
|
|
24
24
|
File.open('temp_1.jpg', 'w'){|f| f.puts image_byte_stream_1}
|
25
25
|
|
26
26
|
@deck.slides.first.images.first.should_not eql nil #"ppt/media/image1.jpeg"
|
27
|
-
@deck.slides.last.title.should eql "Some title here"
|
27
|
+
@deck.slides.last.title.should eql "Some title here"
|
28
28
|
@deck.slides.last.content.should eql ["Some title here", "Some txt here", "Some ", "more text here."]
|
29
29
|
image_byte_stream_2 = @deck.slides.last.images.first.read
|
30
30
|
File.open('temp_2.jpg', 'w'){|f| f.puts image_byte_stream_2}
|
31
31
|
end
|
32
|
+
|
33
|
+
it "it parses Slide Notes of a PPTX slides" do
|
34
|
+
notes_content = @deck.slides[0].notes_content
|
35
|
+
notes_content.should eql ["Testing", " Multiline Notes.", "To be extracted here.", "Multiline notes extracted.", "1"]
|
36
|
+
end
|
37
|
+
|
32
38
|
end
|
33
39
|
|
34
40
|
describe 'open rime.pptx file' do
|
@@ -39,20 +45,20 @@ describe 'open rime.pptx file' do
|
|
39
45
|
after(:all) do
|
40
46
|
@deck.close
|
41
47
|
end
|
42
|
-
|
48
|
+
|
43
49
|
it 'opened rime.pptx successfully' do
|
44
50
|
@deck.should_not be_nil
|
45
51
|
@deck.slides.should_not eql []
|
46
52
|
end
|
47
|
-
|
53
|
+
|
48
54
|
it 'should have the right number of slides' do
|
49
55
|
@deck.slides.length.should eql 12
|
50
56
|
end
|
51
|
-
|
57
|
+
|
52
58
|
it 'the old content method should work the same way' do
|
53
59
|
@deck.slides[0].content.should eql ["The Rime of the Ancient Mariner", "(text of 1834)", "http://rpo.library.utoronto.ca/poems/rime-ancient-mariner-text-1834"]
|
54
60
|
end
|
55
|
-
|
61
|
+
|
56
62
|
context 'the titles should be right' do
|
57
63
|
it 'should be able to get a main slide (usually centered)' do
|
58
64
|
@deck.slides[0].title.should eql "The Rime of the Ancient Mariner"
|
@@ -68,9 +74,23 @@ describe 'open rime.pptx file' do
|
|
68
74
|
@deck.slides[5].title.should be_nil
|
69
75
|
@deck.slides[6].title.should be_nil
|
70
76
|
end
|
71
|
-
|
77
|
+
|
72
78
|
it 'should only get one title even if there are two things that visually look like titles' do
|
73
79
|
@deck.slides[7].title.should eql "What if we have two"
|
74
80
|
end
|
81
|
+
|
82
|
+
context 'when slide contains paragraph' do
|
83
|
+
before(:all) do
|
84
|
+
@slide = @deck.slides[1]
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'should return the list of paragraphs' do
|
88
|
+
@slide.paragraphs.count.should eql 2
|
89
|
+
end
|
90
|
+
|
91
|
+
it 'should return the content of the paragraph' do
|
92
|
+
@slide.paragraphs[0].content.should eq ['Argument']
|
93
|
+
end
|
94
|
+
end
|
75
95
|
end
|
76
|
-
end
|
96
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby_powerpoint
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- pythonicrubyist
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-10-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -93,6 +93,7 @@ files:
|
|
93
93
|
- README.rdoc
|
94
94
|
- Rakefile
|
95
95
|
- lib/ruby_powerpoint.rb
|
96
|
+
- lib/ruby_powerpoint/paragraph.rb
|
96
97
|
- lib/ruby_powerpoint/presentation.rb
|
97
98
|
- lib/ruby_powerpoint/slide.rb
|
98
99
|
- lib/ruby_powerpoint/version.rb
|
@@ -121,7 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
121
122
|
version: '0'
|
122
123
|
requirements: []
|
123
124
|
rubyforge_project:
|
124
|
-
rubygems_version: 2.4.
|
125
|
+
rubygems_version: 2.4.3
|
125
126
|
signing_key:
|
126
127
|
specification_version: 4
|
127
128
|
summary: ruby_powerpoint is a Ruby gem that can extract title, content and images
|