officex2str 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +2 -2
- data/fixtures/sample.txt +1 -0
- data/lib/officex2str/version.rb +1 -1
- data/lib/officex2str.rb +38 -15
- data/spec/officex2str_spec.rb +31 -23
- metadata +4 -3
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Officex2str
|
2
2
|
|
3
|
-
Convert office 2010 files to string.
|
3
|
+
Convert Microsoft office 2007/2010 files(docx/xlsx/pptx) to string.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -18,5 +18,5 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
## Usage
|
20
20
|
|
21
|
-
|
21
|
+
Officex2str.convert("file_path")
|
22
22
|
|
data/fixtures/sample.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
sample
|
data/lib/officex2str/version.rb
CHANGED
data/lib/officex2str.rb
CHANGED
@@ -1,37 +1,60 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'zipruby'
|
3
3
|
require 'mime/types'
|
4
|
-
#require "officex2str/version"
|
5
4
|
|
6
|
-
|
5
|
+
class Officex2str
|
6
|
+
DOCX_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
7
|
+
XLSX_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
8
|
+
PPTX_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
9
|
+
VALID_CONTENT_TYPE = [DOCX_CONTENT_TYPE, XLSX_CONTENT_TYPE, PPTX_CONTENT_TYPE].freeze
|
10
|
+
|
11
|
+
attr_accessor :path, :content_type
|
12
|
+
|
7
13
|
def self.convert(file_path)
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
14
|
+
self.new(file_path).convert
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize(file_path)
|
18
|
+
@path = file_path
|
19
|
+
@content_type = MIME::Types.type_for(path).first.content_type
|
20
|
+
end
|
21
|
+
|
22
|
+
def convert
|
23
|
+
if valid_file?
|
24
|
+
archives = Zip::Archive.open(path) { |archive| archive.map(&:name) }
|
25
|
+
pages = pickup_pages(archives)
|
26
|
+
xmls = extract_xmls(pages)
|
27
|
+
xml_to_str(xmls)
|
28
|
+
else
|
29
|
+
raise InvaildFileTypeError, "Not recognized file type"
|
30
|
+
end
|
12
31
|
end
|
13
32
|
|
14
33
|
private
|
15
|
-
def
|
16
|
-
|
17
|
-
|
34
|
+
def valid_file?
|
35
|
+
!!VALID_CONTENT_TYPE.include?(content_type)
|
36
|
+
end
|
37
|
+
|
38
|
+
def pickup_pages archives
|
39
|
+
case content_type
|
40
|
+
when DOCX_CONTENT_TYPE
|
18
41
|
archives.select{|a| /^word\/document/ =~ a}
|
19
|
-
when
|
42
|
+
when XLSX_CONTENT_TYPE
|
20
43
|
archives.select{|a| /^xl\/worksheets\/sheet/ =~ a or /^xl\/sharedStrings/ =~ a or /^xl\/comments/ =~ a }
|
21
|
-
when
|
44
|
+
when PPTX_CONTENT_TYPE
|
22
45
|
archives.select{|a| /^ppt\/slides\/slide/ =~ a}
|
23
46
|
else
|
24
|
-
|
47
|
+
raise InvalidContentTypeError, "Not recognized content type"
|
25
48
|
end
|
26
49
|
end
|
27
50
|
|
28
|
-
def
|
51
|
+
def extract_xmls pages
|
29
52
|
xml_text = []
|
30
|
-
Zip::Archive.open(
|
53
|
+
Zip::Archive.open(path) { |archive| pages.each{ |page| archive.fopen(page) do |f| xml_text << f.read end; } }
|
31
54
|
xml_text
|
32
55
|
end
|
33
56
|
|
34
|
-
def
|
57
|
+
def xml_to_str xml_text
|
35
58
|
text = ""
|
36
59
|
xml_text.each{|xml_t| text << Nokogiri.XML(xml_t.toutf8, nil, 'utf8').to_str } unless xml_text.empty?
|
37
60
|
text
|
data/spec/officex2str_spec.rb
CHANGED
@@ -2,44 +2,56 @@
|
|
2
2
|
require 'spec_helper'
|
3
3
|
|
4
4
|
describe Officex2str do
|
5
|
+
context "#valid_file?" do
|
6
|
+
subject do
|
7
|
+
Officex2str.new(@file_path).send(:valid_file?)
|
8
|
+
end
|
9
|
+
context "extname is docx" do
|
10
|
+
before { @file_path = "fixtures/sample.docx" }
|
11
|
+
it { subject.should be_true }
|
12
|
+
end
|
13
|
+
context "extname is xlsx" do
|
14
|
+
before { @file_path = "fixtures/sample.xlsx" }
|
15
|
+
it { subject.should be_true }
|
16
|
+
end
|
17
|
+
context "extname is pptx" do
|
18
|
+
before { @file_path = "fixtures/sample.pptx" }
|
19
|
+
it { subject.should be_true }
|
20
|
+
end
|
21
|
+
context "extname is txt" do
|
22
|
+
before { @file_path = "fixtures/sample.txt" }
|
23
|
+
it { subject.should be_false }
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
5
27
|
context "#pickup_pages" do
|
6
28
|
subject do
|
7
29
|
archives = Zip::Archive.open(@file_path) { |archive| archive.map(&:name) }
|
8
|
-
Officex2str.send(:pickup_pages,
|
30
|
+
Officex2str.new(@file_path).send(:pickup_pages, archives).sort
|
9
31
|
end
|
10
32
|
context "extname is docx" do
|
11
|
-
before
|
12
|
-
@file_path = "fixtures/sample.docx"
|
13
|
-
end
|
33
|
+
before { @file_path = "fixtures/sample.docx" }
|
14
34
|
it { subject.should == ["word/document.xml"] }
|
15
35
|
end
|
16
36
|
|
17
37
|
context "extname is xlsx" do
|
18
|
-
before
|
19
|
-
@file_path = "fixtures/sample.xlsx"
|
20
|
-
end
|
38
|
+
before { @file_path = "fixtures/sample.xlsx" }
|
21
39
|
it { subject.should == ["xl/comments1.xml", "xl/sharedStrings.xml", "xl/worksheets/sheet1.xml", "xl/worksheets/sheet2.xml"] }
|
22
40
|
end
|
23
41
|
|
24
42
|
context "extname is pptx" do
|
25
|
-
before
|
26
|
-
@file_path = "fixtures/sample.pptx"
|
27
|
-
end
|
43
|
+
before { @file_path = "fixtures/sample.pptx" }
|
28
44
|
it { subject.should == ["ppt/slides/slide1.xml", "ppt/slides/slide2.xml"] }
|
29
45
|
end
|
46
|
+
|
30
47
|
end
|
31
48
|
|
32
49
|
context "#convert" do
|
33
50
|
subject do
|
34
|
-
archives = Zip::Archive.open(@file_path) { |archive| archive.map(&:name) }
|
35
|
-
pages = Officex2str.send(:pickup_pages, @file_path, archives)
|
36
|
-
xmls = Officex2str.send(:extract_xmls, @file_path, pages)
|
37
51
|
Officex2str.convert(@file_path)
|
38
52
|
end
|
39
53
|
context "extname is xlsx" do
|
40
|
-
before
|
41
|
-
@file_path = "fixtures/sample.xlsx"
|
42
|
-
end
|
54
|
+
before { @file_path = "fixtures/sample.xlsx" }
|
43
55
|
it do
|
44
56
|
subject.should include("複数シート対応")
|
45
57
|
subject.should include("ソニックガーデン")
|
@@ -52,9 +64,7 @@ describe Officex2str do
|
|
52
64
|
end
|
53
65
|
|
54
66
|
context "extname is docx" do
|
55
|
-
before
|
56
|
-
@file_path = "fixtures/sample.docx"
|
57
|
-
end
|
67
|
+
before { @file_path = "fixtures/sample.docx" }
|
58
68
|
it do
|
59
69
|
subject.should include("複数ページ対応")
|
60
70
|
subject.should include("ソニックガーデン")
|
@@ -65,9 +75,7 @@ describe Officex2str do
|
|
65
75
|
end
|
66
76
|
|
67
77
|
context "extname is pptx" do
|
68
|
-
before
|
69
|
-
@file_path = "fixtures/sample.pptx"
|
70
|
-
end
|
78
|
+
before { @file_path = "fixtures/sample.pptx" }
|
71
79
|
it do
|
72
80
|
subject.should include("Aタイトル")
|
73
81
|
subject.should include("Aサブタイトル")
|
@@ -78,6 +86,6 @@ describe Officex2str do
|
|
78
86
|
subject.should_not include("sheet")
|
79
87
|
end
|
80
88
|
end
|
81
|
-
|
82
89
|
end
|
90
|
+
|
83
91
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 4
|
9
|
+
version: 0.0.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- interu
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2012-07-
|
17
|
+
date: 2012-07-19 00:00:00 +09:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -94,6 +94,7 @@ files:
|
|
94
94
|
- Rakefile
|
95
95
|
- fixtures/sample.docx
|
96
96
|
- fixtures/sample.pptx
|
97
|
+
- fixtures/sample.txt
|
97
98
|
- fixtures/sample.xlsx
|
98
99
|
- lib/officex2str.rb
|
99
100
|
- lib/officex2str/version.rb
|