officex2str 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Officex2str
2
2
 
3
- Convert office 2010 files to string.
3
+ Convert Microsoft office 2007/2010 files(docx/xlsx/pptx) to string.
4
4
 
5
5
  ## Installation
6
6
 
@@ -18,5 +18,5 @@ Or install it yourself as:
18
18
 
19
19
  ## Usage
20
20
 
21
- Officex2str.convert("file_path")
21
+ Officex2str.convert("file_path")
22
22
 
@@ -0,0 +1 @@
1
+ sample
@@ -1,3 +1,3 @@
1
1
  module Officex2str
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
data/lib/officex2str.rb CHANGED
@@ -1,37 +1,60 @@
1
1
  require 'nokogiri'
2
2
  require 'zipruby'
3
3
  require 'mime/types'
4
- #require "officex2str/version"
5
4
 
6
- module Officex2str
5
+ class Officex2str
6
+ DOCX_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
7
+ XLSX_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
8
+ PPTX_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
9
+ VALID_CONTENT_TYPE = [DOCX_CONTENT_TYPE, XLSX_CONTENT_TYPE, PPTX_CONTENT_TYPE].freeze
10
+
11
+ attr_accessor :path, :content_type
12
+
7
13
  def self.convert(file_path)
8
- archives = Zip::Archive.open(file_path) { |archive| archive.map(&:name) }
9
- pages = self.pickup_pages(file_path, archives)
10
- xmls = self.extract_xmls(file_path, pages)
11
- self.xml_to_str(xmls)
14
+ self.new(file_path).convert
15
+ end
16
+
17
+ def initialize(file_path)
18
+ @path = file_path
19
+ @content_type = MIME::Types.type_for(path).first.content_type
20
+ end
21
+
22
+ def convert
23
+ if valid_file?
24
+ archives = Zip::Archive.open(path) { |archive| archive.map(&:name) }
25
+ pages = pickup_pages(archives)
26
+ xmls = extract_xmls(pages)
27
+ xml_to_str(xmls)
28
+ else
29
+ raise InvaildFileTypeError, "Not recognized file type"
30
+ end
12
31
  end
13
32
 
14
33
  private
15
- def self.pickup_pages file_path, archives
16
- case content_type = MIME::Types.type_for(file_path).first.content_type
17
- when "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
34
+ def valid_file?
35
+ !!VALID_CONTENT_TYPE.include?(content_type)
36
+ end
37
+
38
+ def pickup_pages archives
39
+ case content_type
40
+ when DOCX_CONTENT_TYPE
18
41
  archives.select{|a| /^word\/document/ =~ a}
19
- when "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
42
+ when XLSX_CONTENT_TYPE
20
43
  archives.select{|a| /^xl\/worksheets\/sheet/ =~ a or /^xl\/sharedStrings/ =~ a or /^xl\/comments/ =~ a }
21
- when "application/vnd.openxmlformats-officedocument.presentationml.presentation"
44
+ when PPTX_CONTENT_TYPE
22
45
  archives.select{|a| /^ppt\/slides\/slide/ =~ a}
23
46
  else
24
- nil
47
+ raise InvalidContentTypeError, "Not recognized content type"
25
48
  end
26
49
  end
27
50
 
28
- def self.extract_xmls file_path, pages
51
+ def extract_xmls pages
29
52
  xml_text = []
30
- Zip::Archive.open(file_path) { |archive| pages.each{ |page| archive.fopen(page) do |f| xml_text << f.read end; } }
53
+ Zip::Archive.open(path) { |archive| pages.each{ |page| archive.fopen(page) do |f| xml_text << f.read end; } }
31
54
  xml_text
32
55
  end
33
56
 
34
- def self.xml_to_str xml_text
57
+ def xml_to_str xml_text
35
58
  text = ""
36
59
  xml_text.each{|xml_t| text << Nokogiri.XML(xml_t.toutf8, nil, 'utf8').to_str } unless xml_text.empty?
37
60
  text
@@ -2,44 +2,56 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe Officex2str do
5
+ context "#valid_file?" do
6
+ subject do
7
+ Officex2str.new(@file_path).send(:valid_file?)
8
+ end
9
+ context "extname is docx" do
10
+ before { @file_path = "fixtures/sample.docx" }
11
+ it { subject.should be_true }
12
+ end
13
+ context "extname is xlsx" do
14
+ before { @file_path = "fixtures/sample.xlsx" }
15
+ it { subject.should be_true }
16
+ end
17
+ context "extname is pptx" do
18
+ before { @file_path = "fixtures/sample.pptx" }
19
+ it { subject.should be_true }
20
+ end
21
+ context "extname is txt" do
22
+ before { @file_path = "fixtures/sample.txt" }
23
+ it { subject.should be_false }
24
+ end
25
+ end
26
+
5
27
  context "#pickup_pages" do
6
28
  subject do
7
29
  archives = Zip::Archive.open(@file_path) { |archive| archive.map(&:name) }
8
- Officex2str.send(:pickup_pages, @file_path, archives).sort
30
+ Officex2str.new(@file_path).send(:pickup_pages, archives).sort
9
31
  end
10
32
  context "extname is docx" do
11
- before do
12
- @file_path = "fixtures/sample.docx"
13
- end
33
+ before { @file_path = "fixtures/sample.docx" }
14
34
  it { subject.should == ["word/document.xml"] }
15
35
  end
16
36
 
17
37
  context "extname is xlsx" do
18
- before do
19
- @file_path = "fixtures/sample.xlsx"
20
- end
38
+ before { @file_path = "fixtures/sample.xlsx" }
21
39
  it { subject.should == ["xl/comments1.xml", "xl/sharedStrings.xml", "xl/worksheets/sheet1.xml", "xl/worksheets/sheet2.xml"] }
22
40
  end
23
41
 
24
42
  context "extname is pptx" do
25
- before do
26
- @file_path = "fixtures/sample.pptx"
27
- end
43
+ before { @file_path = "fixtures/sample.pptx" }
28
44
  it { subject.should == ["ppt/slides/slide1.xml", "ppt/slides/slide2.xml"] }
29
45
  end
46
+
30
47
  end
31
48
 
32
49
  context "#convert" do
33
50
  subject do
34
- archives = Zip::Archive.open(@file_path) { |archive| archive.map(&:name) }
35
- pages = Officex2str.send(:pickup_pages, @file_path, archives)
36
- xmls = Officex2str.send(:extract_xmls, @file_path, pages)
37
51
  Officex2str.convert(@file_path)
38
52
  end
39
53
  context "extname is xlsx" do
40
- before do
41
- @file_path = "fixtures/sample.xlsx"
42
- end
54
+ before { @file_path = "fixtures/sample.xlsx" }
43
55
  it do
44
56
  subject.should include("複数シート対応")
45
57
  subject.should include("ソニックガーデン")
@@ -52,9 +64,7 @@ describe Officex2str do
52
64
  end
53
65
 
54
66
  context "extname is docx" do
55
- before do
56
- @file_path = "fixtures/sample.docx"
57
- end
67
+ before { @file_path = "fixtures/sample.docx" }
58
68
  it do
59
69
  subject.should include("複数ページ対応")
60
70
  subject.should include("ソニックガーデン")
@@ -65,9 +75,7 @@ describe Officex2str do
65
75
  end
66
76
 
67
77
  context "extname is pptx" do
68
- before do
69
- @file_path = "fixtures/sample.pptx"
70
- end
78
+ before { @file_path = "fixtures/sample.pptx" }
71
79
  it do
72
80
  subject.should include("Aタイトル")
73
81
  subject.should include("Aサブタイトル")
@@ -78,6 +86,6 @@ describe Officex2str do
78
86
  subject.should_not include("sheet")
79
87
  end
80
88
  end
81
-
82
89
  end
90
+
83
91
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 3
9
- version: 0.0.3
8
+ - 4
9
+ version: 0.0.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - interu
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2012-07-18 00:00:00 +09:00
17
+ date: 2012-07-19 00:00:00 +09:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -94,6 +94,7 @@ files:
94
94
  - Rakefile
95
95
  - fixtures/sample.docx
96
96
  - fixtures/sample.pptx
97
+ - fixtures/sample.txt
97
98
  - fixtures/sample.xlsx
98
99
  - lib/officex2str.rb
99
100
  - lib/officex2str/version.rb