ruby_powerpoint 1.4.2 → 1.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/ruby_powerpoint.rb +1 -0
- data/lib/ruby_powerpoint/paragraph.rb +19 -0
- data/lib/ruby_powerpoint/slide.rb +29 -9
- data/lib/ruby_powerpoint/version.rb +1 -1
- data/spec/fixtures/sample.pptx +0 -0
- data/spec/test_spec.rb +27 -7
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZDQ5OTcyMTU2OTg4M2JjN2JkYzI4MjZjZGM0NGFhMzk0MTMzOGRkNA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
M2I2ZDgxZGM3OWRhNDBkYmZjYTgyNWQwNTA0MTI5ZmUwMzQ5YTE3Nw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
M2I4ZWZlOWFhMDRjN2E0ZDEzYjExMDY0ZTc4NjM1NzYyNGFiMWY3MmNlN2Ix
|
10
|
+
MDJlMDZjYmRhNDNiYWNkYTcxNmM4NDM3ZDU0OTExMzBjOWE1NDQ2YmRhNDU4
|
11
|
+
NGUxNWYxOWQ4MDQ5MDI1NWY1MjM4YTM3M2JkZTg0MWVkNzBiNTE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZWI2ZjA1YTIyZTllODBjYmQ2MGRmZTMzMjJjMDgxOTJiODQ2MGNiOTgzMWQz
|
14
|
+
YzgwODk1NzgyMWI3NmYxN2RjYTZkOTdkMTIwYmRhOGNkODY1ODlmYWIxZTAw
|
15
|
+
Mjk4ZGNmYmM2NmJiZDJjNTQ0ZjI5YzE1ZmIzNzMxOTY2ZGIzN2Q=
|
data/lib/ruby_powerpoint.rb
CHANGED
@@ -0,0 +1,19 @@
|
|
1
|
+
module RubyPowerpoint
|
2
|
+
class RubyPowerpoint::Paragraph
|
3
|
+
def initialize slide, paragraph_xml
|
4
|
+
@slide = slide
|
5
|
+
@presentation = slide.presentation
|
6
|
+
@paragraph_xml = paragraph_xml
|
7
|
+
end
|
8
|
+
|
9
|
+
def content
|
10
|
+
content_element @paragraph_xml
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def content_element(xml)
|
16
|
+
xml.xpath('.//a:t').collect{ |node| node.text }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -13,16 +13,24 @@ module RubyPowerpoint
|
|
13
13
|
@presentation = presentation
|
14
14
|
@slide_xml_path = slide_xml_path
|
15
15
|
@slide_number = extract_slide_number_from_path slide_xml_path
|
16
|
+
@slide_notes_xml_path = "ppt/notesSlides/notesSlide#{@slide_number}.xml"
|
16
17
|
@slide_file_name = extract_slide_file_name_from_path slide_xml_path
|
18
|
+
|
17
19
|
parse_slide
|
20
|
+
parse_slide_notes
|
18
21
|
parse_relation
|
19
22
|
end
|
20
23
|
|
21
|
-
def parse_slide
|
24
|
+
def parse_slide
|
22
25
|
slide_doc = @presentation.files.file.open @slide_xml_path
|
23
26
|
@slide_xml = Nokogiri::XML::Document.parse slide_doc
|
24
27
|
end
|
25
28
|
|
29
|
+
def parse_slide_notes
|
30
|
+
slide_notes_doc = @presentation.files.file.open @slide_notes_xml_path rescue nil
|
31
|
+
@slide_notes_xml = Nokogiri::XML::Document.parse(slide_notes_doc) if slide_notes_doc
|
32
|
+
end
|
33
|
+
|
26
34
|
def parse_relation
|
27
35
|
@relation_xml_path = "ppt/slides/_rels/#{@slide_file_name}.rels"
|
28
36
|
if @presentation.files.file.exist? @relation_xml_path
|
@@ -34,7 +42,11 @@ module RubyPowerpoint
|
|
34
42
|
def content
|
35
43
|
content_elements @slide_xml
|
36
44
|
end
|
37
|
-
|
45
|
+
|
46
|
+
def notes_content
|
47
|
+
content_elements @slide_notes_xml
|
48
|
+
end
|
49
|
+
|
38
50
|
def title
|
39
51
|
title_elements = title_elements(@slide_xml)
|
40
52
|
title_elements.join(" ") if title_elements.length > 0
|
@@ -47,11 +59,15 @@ module RubyPowerpoint
|
|
47
59
|
node['Target'].gsub('..', 'ppt'))
|
48
60
|
end
|
49
61
|
end
|
50
|
-
|
62
|
+
|
51
63
|
def slide_num
|
52
64
|
@slide_xml_path.match(/slide([0-9]*)\.xml$/)[1].to_i
|
53
65
|
end
|
54
|
-
|
66
|
+
|
67
|
+
def paragraphs
|
68
|
+
paragraph_element @slide_xml
|
69
|
+
end
|
70
|
+
|
55
71
|
private
|
56
72
|
|
57
73
|
def extract_slide_number_from_path path
|
@@ -65,25 +81,29 @@ module RubyPowerpoint
|
|
65
81
|
def title_elements(xml)
|
66
82
|
shape_elements(xml).select{ |shape| element_is_title(shape) }
|
67
83
|
end
|
68
|
-
|
84
|
+
|
69
85
|
def content_elements(xml)
|
70
86
|
xml.xpath('//a:t').collect{ |node| node.text }
|
71
87
|
end
|
72
88
|
|
73
89
|
def image_elements(xml)
|
74
90
|
xml.css('Relationship').select{ |node| element_is_image(node) }
|
75
|
-
end
|
91
|
+
end
|
76
92
|
|
77
93
|
def shape_elements(xml)
|
78
94
|
xml.xpath('//p:sp')
|
79
|
-
end
|
80
|
-
|
95
|
+
end
|
96
|
+
|
97
|
+
def paragraph_element(xml)
|
98
|
+
xml.xpath('//a:p').collect{ |node| RubyPowerpoint::Paragraph.new(self, node) }
|
99
|
+
end
|
100
|
+
|
81
101
|
def element_is_title(shape)
|
82
102
|
shape.xpath('.//p:nvSpPr/p:nvPr/p:ph').select{ |prop| prop['type'] == 'title' || prop['type'] == 'ctrTitle' }.length > 0
|
83
103
|
end
|
84
104
|
|
85
105
|
def element_is_image(node)
|
86
|
-
node['Type'].include? 'image'
|
106
|
+
node['Type'].include? 'image'
|
87
107
|
end
|
88
108
|
end
|
89
109
|
end
|
data/spec/fixtures/sample.pptx
CHANGED
Binary file
|
data/spec/test_spec.rb
CHANGED
@@ -24,11 +24,17 @@ describe 'RubyPowerpoint parsing a sample PPTX file' do
|
|
24
24
|
File.open('temp_1.jpg', 'w'){|f| f.puts image_byte_stream_1}
|
25
25
|
|
26
26
|
@deck.slides.first.images.first.should_not eql nil #"ppt/media/image1.jpeg"
|
27
|
-
@deck.slides.last.title.should eql "Some title here"
|
27
|
+
@deck.slides.last.title.should eql "Some title here"
|
28
28
|
@deck.slides.last.content.should eql ["Some title here", "Some txt here", "Some ", "more text here."]
|
29
29
|
image_byte_stream_2 = @deck.slides.last.images.first.read
|
30
30
|
File.open('temp_2.jpg', 'w'){|f| f.puts image_byte_stream_2}
|
31
31
|
end
|
32
|
+
|
33
|
+
it "it parses Slide Notes of a PPTX slides" do
|
34
|
+
notes_content = @deck.slides[0].notes_content
|
35
|
+
notes_content.should eql ["Testing", " Multiline Notes.", "To be extracted here.", "Multiline notes extracted.", "1"]
|
36
|
+
end
|
37
|
+
|
32
38
|
end
|
33
39
|
|
34
40
|
describe 'open rime.pptx file' do
|
@@ -39,20 +45,20 @@ describe 'open rime.pptx file' do
|
|
39
45
|
after(:all) do
|
40
46
|
@deck.close
|
41
47
|
end
|
42
|
-
|
48
|
+
|
43
49
|
it 'opened rime.pptx successfully' do
|
44
50
|
@deck.should_not be_nil
|
45
51
|
@deck.slides.should_not eql []
|
46
52
|
end
|
47
|
-
|
53
|
+
|
48
54
|
it 'should have the right number of slides' do
|
49
55
|
@deck.slides.length.should eql 12
|
50
56
|
end
|
51
|
-
|
57
|
+
|
52
58
|
it 'the old content method should work the same way' do
|
53
59
|
@deck.slides[0].content.should eql ["The Rime of the Ancient Mariner", "(text of 1834)", "http://rpo.library.utoronto.ca/poems/rime-ancient-mariner-text-1834"]
|
54
60
|
end
|
55
|
-
|
61
|
+
|
56
62
|
context 'the titles should be right' do
|
57
63
|
it 'should be able to get a main slide (usually centered)' do
|
58
64
|
@deck.slides[0].title.should eql "The Rime of the Ancient Mariner"
|
@@ -68,9 +74,23 @@ describe 'open rime.pptx file' do
|
|
68
74
|
@deck.slides[5].title.should be_nil
|
69
75
|
@deck.slides[6].title.should be_nil
|
70
76
|
end
|
71
|
-
|
77
|
+
|
72
78
|
it 'should only get one title even if there are two things that visually look like titles' do
|
73
79
|
@deck.slides[7].title.should eql "What if we have two"
|
74
80
|
end
|
81
|
+
|
82
|
+
context 'when slide contains paragraph' do
|
83
|
+
before(:all) do
|
84
|
+
@slide = @deck.slides[1]
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'should return the list of paragraphs' do
|
88
|
+
@slide.paragraphs.count.should eql 2
|
89
|
+
end
|
90
|
+
|
91
|
+
it 'should return the content of the paragraph' do
|
92
|
+
@slide.paragraphs[0].content.should eq ['Argument']
|
93
|
+
end
|
94
|
+
end
|
75
95
|
end
|
76
|
-
end
|
96
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby_powerpoint
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- pythonicrubyist
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-10-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -93,6 +93,7 @@ files:
|
|
93
93
|
- README.rdoc
|
94
94
|
- Rakefile
|
95
95
|
- lib/ruby_powerpoint.rb
|
96
|
+
- lib/ruby_powerpoint/paragraph.rb
|
96
97
|
- lib/ruby_powerpoint/presentation.rb
|
97
98
|
- lib/ruby_powerpoint/slide.rb
|
98
99
|
- lib/ruby_powerpoint/version.rb
|
@@ -121,7 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
121
122
|
version: '0'
|
122
123
|
requirements: []
|
123
124
|
rubyforge_project:
|
124
|
-
rubygems_version: 2.4.
|
125
|
+
rubygems_version: 2.4.3
|
125
126
|
signing_key:
|
126
127
|
specification_version: 4
|
127
128
|
summary: ruby_powerpoint is a Ruby gem that can extract title, content and images
|