officex2str 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +2 -2
- data/fixtures/sample.txt +1 -0
- data/lib/officex2str/version.rb +1 -1
- data/lib/officex2str.rb +38 -15
- data/spec/officex2str_spec.rb +31 -23
- metadata +4 -3
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Officex2str
|
2
2
|
|
3
|
-
Convert office 2010 files to string.
|
3
|
+
Convert Microsoft office 2007/2010 files(docx/xlsx/pptx) to string.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -18,5 +18,5 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
## Usage
|
20
20
|
|
21
|
-
|
21
|
+
Officex2str.convert("file_path")
|
22
22
|
|
data/fixtures/sample.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
sample
|
data/lib/officex2str/version.rb
CHANGED
data/lib/officex2str.rb
CHANGED
@@ -1,37 +1,60 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'zipruby'
|
3
3
|
require 'mime/types'
|
4
|
-
#require "officex2str/version"
|
5
4
|
|
6
|
-
|
5
|
+
class Officex2str
|
6
|
+
DOCX_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
7
|
+
XLSX_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
8
|
+
PPTX_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
9
|
+
VALID_CONTENT_TYPE = [DOCX_CONTENT_TYPE, XLSX_CONTENT_TYPE, PPTX_CONTENT_TYPE].freeze
|
10
|
+
|
11
|
+
attr_accessor :path, :content_type
|
12
|
+
|
7
13
|
def self.convert(file_path)
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
14
|
+
self.new(file_path).convert
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(file_path)
|
18
|
+
@path = file_path
|
19
|
+
@content_type = MIME::Types.type_for(path).first.content_type
|
20
|
+
end
|
21
|
+
|
22
|
+
def convert
|
23
|
+
if valid_file?
|
24
|
+
archives = Zip::Archive.open(path) { |archive| archive.map(&:name) }
|
25
|
+
pages = pickup_pages(archives)
|
26
|
+
xmls = extract_xmls(pages)
|
27
|
+
xml_to_str(xmls)
|
28
|
+
else
|
29
|
+
raise InvaildFileTypeError, "Not recognized file type"
|
30
|
+
end
|
12
31
|
end
|
13
32
|
|
14
33
|
private
|
15
|
-
def
|
16
|
-
|
17
|
-
|
34
|
+
def valid_file?
|
35
|
+
!!VALID_CONTENT_TYPE.include?(content_type)
|
36
|
+
end
|
37
|
+
|
38
|
+
def pickup_pages archives
|
39
|
+
case content_type
|
40
|
+
when DOCX_CONTENT_TYPE
|
18
41
|
archives.select{|a| /^word\/document/ =~ a}
|
19
|
-
when
|
42
|
+
when XLSX_CONTENT_TYPE
|
20
43
|
archives.select{|a| /^xl\/worksheets\/sheet/ =~ a or /^xl\/sharedStrings/ =~ a or /^xl\/comments/ =~ a }
|
21
|
-
when
|
44
|
+
when PPTX_CONTENT_TYPE
|
22
45
|
archives.select{|a| /^ppt\/slides\/slide/ =~ a}
|
23
46
|
else
|
24
|
-
|
47
|
+
raise InvalidContentTypeError, "Not recognized content type"
|
25
48
|
end
|
26
49
|
end
|
27
50
|
|
28
|
-
def
|
51
|
+
def extract_xmls pages
|
29
52
|
xml_text = []
|
30
|
-
Zip::Archive.open(
|
53
|
+
Zip::Archive.open(path) { |archive| pages.each{ |page| archive.fopen(page) do |f| xml_text << f.read end; } }
|
31
54
|
xml_text
|
32
55
|
end
|
33
56
|
|
34
|
-
def
|
57
|
+
def xml_to_str xml_text
|
35
58
|
text = ""
|
36
59
|
xml_text.each{|xml_t| text << Nokogiri.XML(xml_t.toutf8, nil, 'utf8').to_str } unless xml_text.empty?
|
37
60
|
text
|
data/spec/officex2str_spec.rb
CHANGED
@@ -2,44 +2,56 @@
|
|
2
2
|
require 'spec_helper'
|
3
3
|
|
4
4
|
describe Officex2str do
|
5
|
+
context "#valid_file?" do
|
6
|
+
subject do
|
7
|
+
Officex2str.new(@file_path).send(:valid_file?)
|
8
|
+
end
|
9
|
+
context "extname is docx" do
|
10
|
+
before { @file_path = "fixtures/sample.docx" }
|
11
|
+
it { subject.should be_true }
|
12
|
+
end
|
13
|
+
context "extname is xlsx" do
|
14
|
+
before { @file_path = "fixtures/sample.xlsx" }
|
15
|
+
it { subject.should be_true }
|
16
|
+
end
|
17
|
+
context "extname is pptx" do
|
18
|
+
before { @file_path = "fixtures/sample.pptx" }
|
19
|
+
it { subject.should be_true }
|
20
|
+
end
|
21
|
+
context "extname is txt" do
|
22
|
+
before { @file_path = "fixtures/sample.txt" }
|
23
|
+
it { subject.should be_false }
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
5
27
|
context "#pickup_pages" do
|
6
28
|
subject do
|
7
29
|
archives = Zip::Archive.open(@file_path) { |archive| archive.map(&:name) }
|
8
|
-
Officex2str.send(:pickup_pages,
|
30
|
+
Officex2str.new(@file_path).send(:pickup_pages, archives).sort
|
9
31
|
end
|
10
32
|
context "extname is docx" do
|
11
|
-
before
|
12
|
-
@file_path = "fixtures/sample.docx"
|
13
|
-
end
|
33
|
+
before { @file_path = "fixtures/sample.docx" }
|
14
34
|
it { subject.should == ["word/document.xml"] }
|
15
35
|
end
|
16
36
|
|
17
37
|
context "extname is xlsx" do
|
18
|
-
before
|
19
|
-
@file_path = "fixtures/sample.xlsx"
|
20
|
-
end
|
38
|
+
before { @file_path = "fixtures/sample.xlsx" }
|
21
39
|
it { subject.should == ["xl/comments1.xml", "xl/sharedStrings.xml", "xl/worksheets/sheet1.xml", "xl/worksheets/sheet2.xml"] }
|
22
40
|
end
|
23
41
|
|
24
42
|
context "extname is pptx" do
|
25
|
-
before
|
26
|
-
@file_path = "fixtures/sample.pptx"
|
27
|
-
end
|
43
|
+
before { @file_path = "fixtures/sample.pptx" }
|
28
44
|
it { subject.should == ["ppt/slides/slide1.xml", "ppt/slides/slide2.xml"] }
|
29
45
|
end
|
46
|
+
|
30
47
|
end
|
31
48
|
|
32
49
|
context "#convert" do
|
33
50
|
subject do
|
34
|
-
archives = Zip::Archive.open(@file_path) { |archive| archive.map(&:name) }
|
35
|
-
pages = Officex2str.send(:pickup_pages, @file_path, archives)
|
36
|
-
xmls = Officex2str.send(:extract_xmls, @file_path, pages)
|
37
51
|
Officex2str.convert(@file_path)
|
38
52
|
end
|
39
53
|
context "extname is xlsx" do
|
40
|
-
before
|
41
|
-
@file_path = "fixtures/sample.xlsx"
|
42
|
-
end
|
54
|
+
before { @file_path = "fixtures/sample.xlsx" }
|
43
55
|
it do
|
44
56
|
subject.should include("複数シート対応")
|
45
57
|
subject.should include("ソニックガーデン")
|
@@ -52,9 +64,7 @@ describe Officex2str do
|
|
52
64
|
end
|
53
65
|
|
54
66
|
context "extname is docx" do
|
55
|
-
before
|
56
|
-
@file_path = "fixtures/sample.docx"
|
57
|
-
end
|
67
|
+
before { @file_path = "fixtures/sample.docx" }
|
58
68
|
it do
|
59
69
|
subject.should include("複数ページ対応")
|
60
70
|
subject.should include("ソニックガーデン")
|
@@ -65,9 +75,7 @@ describe Officex2str do
|
|
65
75
|
end
|
66
76
|
|
67
77
|
context "extname is pptx" do
|
68
|
-
before
|
69
|
-
@file_path = "fixtures/sample.pptx"
|
70
|
-
end
|
78
|
+
before { @file_path = "fixtures/sample.pptx" }
|
71
79
|
it do
|
72
80
|
subject.should include("Aタイトル")
|
73
81
|
subject.should include("Aサブタイトル")
|
@@ -78,6 +86,6 @@ describe Officex2str do
|
|
78
86
|
subject.should_not include("sheet")
|
79
87
|
end
|
80
88
|
end
|
81
|
-
|
82
89
|
end
|
90
|
+
|
83
91
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 4
|
9
|
+
version: 0.0.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- interu
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2012-07-
|
17
|
+
date: 2012-07-19 00:00:00 +09:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -94,6 +94,7 @@ files:
|
|
94
94
|
- Rakefile
|
95
95
|
- fixtures/sample.docx
|
96
96
|
- fixtures/sample.pptx
|
97
|
+
- fixtures/sample.txt
|
97
98
|
- fixtures/sample.xlsx
|
98
99
|
- lib/officex2str.rb
|
99
100
|
- lib/officex2str/version.rb
|