officex2str 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Officex2str
2
2
 
3
- Convert office 2010 files to string.
3
+ Convert Microsoft office 2007/2010 files(docx/xlsx/pptx) to string.
4
4
 
5
5
  ## Installation
6
6
 
@@ -18,5 +18,5 @@ Or install it yourself as:
18
18
 
19
19
  ## Usage
20
20
 
21
- Officex2str.convert("file_path")
21
+ Officex2str.convert("file_path")
22
22
 
@@ -0,0 +1 @@
1
+ sample
@@ -1,3 +1,3 @@
1
1
  module Officex2str
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
data/lib/officex2str.rb CHANGED
@@ -1,37 +1,60 @@
1
1
  require 'nokogiri'
2
2
  require 'zipruby'
3
3
  require 'mime/types'
4
- #require "officex2str/version"
5
4
 
6
- module Officex2str
5
+ class Officex2str
6
+ DOCX_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
7
+ XLSX_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
8
+ PPTX_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
9
+ VALID_CONTENT_TYPE = [DOCX_CONTENT_TYPE, XLSX_CONTENT_TYPE, PPTX_CONTENT_TYPE].freeze
10
+
11
+ attr_accessor :path, :content_type
12
+
7
13
  def self.convert(file_path)
8
- archives = Zip::Archive.open(file_path) { |archive| archive.map(&:name) }
9
- pages = self.pickup_pages(file_path, archives)
10
- xmls = self.extract_xmls(file_path, pages)
11
- self.xml_to_str(xmls)
14
+ self.new(file_path).convert
15
+ end
16
+
17
+ def initialize(file_path)
18
+ @path = file_path
19
+ @content_type = MIME::Types.type_for(path).first.content_type
20
+ end
21
+
22
+ def convert
23
+ if valid_file?
24
+ archives = Zip::Archive.open(path) { |archive| archive.map(&:name) }
25
+ pages = pickup_pages(archives)
26
+ xmls = extract_xmls(pages)
27
+ xml_to_str(xmls)
28
+ else
29
+ raise InvaildFileTypeError, "Not recognized file type"
30
+ end
12
31
  end
13
32
 
14
33
  private
15
- def self.pickup_pages file_path, archives
16
- case content_type = MIME::Types.type_for(file_path).first.content_type
17
- when "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
34
+ def valid_file?
35
+ !!VALID_CONTENT_TYPE.include?(content_type)
36
+ end
37
+
38
+ def pickup_pages archives
39
+ case content_type
40
+ when DOCX_CONTENT_TYPE
18
41
  archives.select{|a| /^word\/document/ =~ a}
19
- when "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
42
+ when XLSX_CONTENT_TYPE
20
43
  archives.select{|a| /^xl\/worksheets\/sheet/ =~ a or /^xl\/sharedStrings/ =~ a or /^xl\/comments/ =~ a }
21
- when "application/vnd.openxmlformats-officedocument.presentationml.presentation"
44
+ when PPTX_CONTENT_TYPE
22
45
  archives.select{|a| /^ppt\/slides\/slide/ =~ a}
23
46
  else
24
- nil
47
+ raise InvalidContentTypeError, "Not recognized content type"
25
48
  end
26
49
  end
27
50
 
28
- def self.extract_xmls file_path, pages
51
+ def extract_xmls pages
29
52
  xml_text = []
30
- Zip::Archive.open(file_path) { |archive| pages.each{ |page| archive.fopen(page) do |f| xml_text << f.read end; } }
53
+ Zip::Archive.open(path) { |archive| pages.each{ |page| archive.fopen(page) do |f| xml_text << f.read end; } }
31
54
  xml_text
32
55
  end
33
56
 
34
- def self.xml_to_str xml_text
57
+ def xml_to_str xml_text
35
58
  text = ""
36
59
  xml_text.each{|xml_t| text << Nokogiri.XML(xml_t.toutf8, nil, 'utf8').to_str } unless xml_text.empty?
37
60
  text
@@ -2,44 +2,56 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe Officex2str do
5
+ context "#valid_file?" do
6
+ subject do
7
+ Officex2str.new(@file_path).send(:valid_file?)
8
+ end
9
+ context "extname is docx" do
10
+ before { @file_path = "fixtures/sample.docx" }
11
+ it { subject.should be_true }
12
+ end
13
+ context "extname is xlsx" do
14
+ before { @file_path = "fixtures/sample.xlsx" }
15
+ it { subject.should be_true }
16
+ end
17
+ context "extname is pptx" do
18
+ before { @file_path = "fixtures/sample.pptx" }
19
+ it { subject.should be_true }
20
+ end
21
+ context "extname is txt" do
22
+ before { @file_path = "fixtures/sample.txt" }
23
+ it { subject.should be_false }
24
+ end
25
+ end
26
+
5
27
  context "#pickup_pages" do
6
28
  subject do
7
29
  archives = Zip::Archive.open(@file_path) { |archive| archive.map(&:name) }
8
- Officex2str.send(:pickup_pages, @file_path, archives).sort
30
+ Officex2str.new(@file_path).send(:pickup_pages, archives).sort
9
31
  end
10
32
  context "extname is docx" do
11
- before do
12
- @file_path = "fixtures/sample.docx"
13
- end
33
+ before { @file_path = "fixtures/sample.docx" }
14
34
  it { subject.should == ["word/document.xml"] }
15
35
  end
16
36
 
17
37
  context "extname is xlsx" do
18
- before do
19
- @file_path = "fixtures/sample.xlsx"
20
- end
38
+ before { @file_path = "fixtures/sample.xlsx" }
21
39
  it { subject.should == ["xl/comments1.xml", "xl/sharedStrings.xml", "xl/worksheets/sheet1.xml", "xl/worksheets/sheet2.xml"] }
22
40
  end
23
41
 
24
42
  context "extname is pptx" do
25
- before do
26
- @file_path = "fixtures/sample.pptx"
27
- end
43
+ before { @file_path = "fixtures/sample.pptx" }
28
44
  it { subject.should == ["ppt/slides/slide1.xml", "ppt/slides/slide2.xml"] }
29
45
  end
46
+
30
47
  end
31
48
 
32
49
  context "#convert" do
33
50
  subject do
34
- archives = Zip::Archive.open(@file_path) { |archive| archive.map(&:name) }
35
- pages = Officex2str.send(:pickup_pages, @file_path, archives)
36
- xmls = Officex2str.send(:extract_xmls, @file_path, pages)
37
51
  Officex2str.convert(@file_path)
38
52
  end
39
53
  context "extname is xlsx" do
40
- before do
41
- @file_path = "fixtures/sample.xlsx"
42
- end
54
+ before { @file_path = "fixtures/sample.xlsx" }
43
55
  it do
44
56
  subject.should include("複数シート対応")
45
57
  subject.should include("ソニックガーデン")
@@ -52,9 +64,7 @@ describe Officex2str do
52
64
  end
53
65
 
54
66
  context "extname is docx" do
55
- before do
56
- @file_path = "fixtures/sample.docx"
57
- end
67
+ before { @file_path = "fixtures/sample.docx" }
58
68
  it do
59
69
  subject.should include("複数ページ対応")
60
70
  subject.should include("ソニックガーデン")
@@ -65,9 +75,7 @@ describe Officex2str do
65
75
  end
66
76
 
67
77
  context "extname is pptx" do
68
- before do
69
- @file_path = "fixtures/sample.pptx"
70
- end
78
+ before { @file_path = "fixtures/sample.pptx" }
71
79
  it do
72
80
  subject.should include("Aタイトル")
73
81
  subject.should include("Aサブタイトル")
@@ -78,6 +86,6 @@ describe Officex2str do
78
86
  subject.should_not include("sheet")
79
87
  end
80
88
  end
81
-
82
89
  end
90
+
83
91
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 3
9
- version: 0.0.3
8
+ - 4
9
+ version: 0.0.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - interu
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2012-07-18 00:00:00 +09:00
17
+ date: 2012-07-19 00:00:00 +09:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -94,6 +94,7 @@ files:
94
94
  - Rakefile
95
95
  - fixtures/sample.docx
96
96
  - fixtures/sample.pptx
97
+ - fixtures/sample.txt
97
98
  - fixtures/sample.xlsx
98
99
  - lib/officex2str.rb
99
100
  - lib/officex2str/version.rb