officex2str 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
data/lib/officex2str.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'nokogiri'
2
- require 'zipruby'
3
2
  require 'mime/types'
3
+ require 'zip'
4
4
 
5
5
  class Officex2str
6
6
  class InvalidFileTypeError < Exception; end
@@ -10,7 +10,7 @@ class Officex2str
10
10
  PPTX_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
11
11
  VALID_CONTENT_TYPE = [DOCX_CONTENT_TYPE, XLSX_CONTENT_TYPE, PPTX_CONTENT_TYPE].freeze
12
12
 
13
- attr_accessor :path, :content_type
13
+ attr_accessor :path, :content_type, :entries, :xmls
14
14
 
15
15
  def self.convert(file_path)
16
16
  self.new(file_path).convert
@@ -19,14 +19,14 @@ class Officex2str
19
19
  def initialize(file_path)
20
20
  @path = file_path
21
21
  @content_type = MIME::Types.type_for(path).first.content_type
22
+ @entries = valid_file? ? Zip::File.open(path).entries : []
23
+ @xmls = []
22
24
  end
23
25
 
24
26
  def convert
25
27
  if valid_file?
26
- archives = Zip::Archive.open(path) { |archive| archive.map(&:name) }
27
- pages = pickup_pages(archives)
28
- xmls = extract_xmls(pages)
29
- xml_to_str(xmls)
28
+ extract_xmls
29
+ xml_to_str
30
30
  else
31
31
  raise InvalidFileTypeError, "Not recognized file type"
32
32
  end
@@ -37,37 +37,25 @@ private
37
37
  !!VALID_CONTENT_TYPE.include?(content_type)
38
38
  end
39
39
 
40
- def pickup_pages archives
40
+ def select_target_entries
41
41
  case content_type
42
42
  when DOCX_CONTENT_TYPE
43
- archives.select{|a| /^word\/document/ =~ a}
43
+ entries.select{|a| /^word\/document/ =~ a.to_s}
44
44
  when XLSX_CONTENT_TYPE
45
- archives.select{|a| /^xl\/worksheets\/sheet/ =~ a or /^xl\/sharedStrings/ =~ a or /^xl\/comments/ =~ a }
45
+ entries.select{|a| /^xl\/worksheets\/sheet/ =~ a.to_s or /^xl\/sharedStrings/ =~ a.to_s or /^xl\/comments/ =~ a.to_s }
46
46
  when PPTX_CONTENT_TYPE
47
- archives.select{|a| /^ppt\/slides\/slide/ =~ a}
47
+ entries.select{|a| /^ppt\/slides\/slide/ =~ a.to_s}
48
48
  else
49
49
  raise InvalidContentTypeError, "Not recognized content type"
50
50
  end
51
51
  end
52
52
 
53
- def extract_xmls pages
54
- xml_text = []
55
- Zip::Archive.open(path) { |archive| pages.each{ |page| archive.fopen(page) { |f| xml_text << f.read } } }
56
- xml_text
53
+ def extract_xmls
54
+ select_target_entries.map{|entry| xmls << entry.get_input_stream.read }
57
55
  end
58
56
 
59
- def xml_to_str xmls
60
- text = ""
61
- unless xmls.empty?
62
- if content_type == XLSX_CONTENT_TYPE
63
- xmls.map do |xml|
64
- doc = Nokogiri.XML(xml.toutf8)
65
- text += doc.search("t").map{|node| node.children.to_s}.join(' ')
66
- end
67
- else
68
- xmls.each{|xml| text << Nokogiri.XML(xml.toutf8, nil, 'utf8').to_str }
69
- end
70
- end
71
- text
57
+ def xml_to_str
58
+ return '' if xmls.empty?
59
+ xmls.inject(""){|result, xml| result << Nokogiri.XML(xml, nil, 'utf8').to_str }
72
60
  end
73
61
  end
@@ -1,3 +1,3 @@
1
1
  class Officex2str
2
- VERSION = "0.0.8"
2
+ VERSION = "0.0.9"
3
3
  end
data/officex2str.gemspec CHANGED
@@ -18,6 +18,6 @@ Gem::Specification.new do |gem|
18
18
  gem.add_development_dependency "rake", ["= 0.9.2"]
19
19
  gem.add_development_dependency "rspec", ["= 2.10.0"]
20
20
  gem.add_runtime_dependency "nokogiri", [">= 1.4.7"]
21
- gem.add_runtime_dependency "zipruby", ["= 0.3.6"]
22
- gem.add_runtime_dependency "mime-types", ["= 2.1"]
21
+ gem.add_runtime_dependency "rubyzip"
22
+ gem.add_runtime_dependency "mime-types"
23
23
  end
@@ -24,10 +24,9 @@ describe Officex2str do
24
24
  end
25
25
  end
26
26
 
27
- context "#pickup_pages" do
27
+ context "#select_target_entries" do
28
28
  subject do
29
- archives = Zip::Archive.open(@file_path) { |archive| archive.map(&:name) }
30
- Officex2str.new(@file_path).send(:pickup_pages, archives).sort
29
+ Officex2str.new(@file_path).send(:select_target_entries).map{|entry| entry.to_s}.sort
31
30
  end
32
31
  context "extname is docx" do
33
32
  before { @file_path = "fixtures/sample.docx" }
@@ -43,7 +42,6 @@ describe Officex2str do
43
42
  before { @file_path = "fixtures/sample.pptx" }
44
43
  it { subject.should == ["ppt/slides/slide1.xml", "ppt/slides/slide2.xml"] }
45
44
  end
46
-
47
45
  end
48
46
 
49
47
  context "#convert" do
@@ -54,7 +52,7 @@ describe Officex2str do
54
52
  before { @file_path = "fixtures/sample.xlsx" }
55
53
  it do
56
54
  subject.should include("複数シート対応")
57
- subject.should include("ソニックガーデン")
55
+ subject.should include("ソニックガーデン")
58
56
  subject.should include("SONICGARDEN")
59
57
  subject.should include("株式会社")
60
58
  subject.should include("コメント")
data/spec/spec_helper.rb CHANGED
@@ -5,7 +5,7 @@ require 'officex2str'
5
5
  require "rspec"
6
6
  require "rspec/core"
7
7
  require 'nokogiri'
8
- require 'zipruby'
8
+ require 'zip'
9
9
  require 'kconv'
10
10
 
11
11
  # Requires supporting files with custom matchers and macros, etc,
metadata CHANGED
@@ -1,18 +1,20 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: officex2str
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - interu
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2014-03-10 00:00:00.000000000 Z
12
+ date: 2014-07-14 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: rake
15
16
  requirement: !ruby/object:Gem::Requirement
17
+ none: false
16
18
  requirements:
17
19
  - - '='
18
20
  - !ruby/object:Gem::Version
@@ -20,6 +22,7 @@ dependencies:
20
22
  type: :development
21
23
  prerelease: false
22
24
  version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
23
26
  requirements:
24
27
  - - '='
25
28
  - !ruby/object:Gem::Version
@@ -27,6 +30,7 @@ dependencies:
27
30
  - !ruby/object:Gem::Dependency
28
31
  name: rspec
29
32
  requirement: !ruby/object:Gem::Requirement
33
+ none: false
30
34
  requirements:
31
35
  - - '='
32
36
  - !ruby/object:Gem::Version
@@ -34,6 +38,7 @@ dependencies:
34
38
  type: :development
35
39
  prerelease: false
36
40
  version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
37
42
  requirements:
38
43
  - - '='
39
44
  - !ruby/object:Gem::Version
@@ -41,45 +46,51 @@ dependencies:
41
46
  - !ruby/object:Gem::Dependency
42
47
  name: nokogiri
43
48
  requirement: !ruby/object:Gem::Requirement
49
+ none: false
44
50
  requirements:
45
- - - '>='
51
+ - - ! '>='
46
52
  - !ruby/object:Gem::Version
47
53
  version: 1.4.7
48
54
  type: :runtime
49
55
  prerelease: false
50
56
  version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
51
58
  requirements:
52
- - - '>='
59
+ - - ! '>='
53
60
  - !ruby/object:Gem::Version
54
61
  version: 1.4.7
55
62
  - !ruby/object:Gem::Dependency
56
- name: zipruby
63
+ name: rubyzip
57
64
  requirement: !ruby/object:Gem::Requirement
65
+ none: false
58
66
  requirements:
59
- - - '='
67
+ - - ! '>='
60
68
  - !ruby/object:Gem::Version
61
- version: 0.3.6
69
+ version: '0'
62
70
  type: :runtime
63
71
  prerelease: false
64
72
  version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
65
74
  requirements:
66
- - - '='
75
+ - - ! '>='
67
76
  - !ruby/object:Gem::Version
68
- version: 0.3.6
77
+ version: '0'
69
78
  - !ruby/object:Gem::Dependency
70
79
  name: mime-types
71
80
  requirement: !ruby/object:Gem::Requirement
81
+ none: false
72
82
  requirements:
73
- - - '='
83
+ - - ! '>='
74
84
  - !ruby/object:Gem::Version
75
- version: '2.1'
85
+ version: '0'
76
86
  type: :runtime
77
87
  prerelease: false
78
88
  version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
79
90
  requirements:
80
- - - '='
91
+ - - ! '>='
81
92
  - !ruby/object:Gem::Version
82
- version: '2.1'
93
+ version: '0'
83
94
  description: convert office 2010 files to str
84
95
  email:
85
96
  - interu@sonicgarden.jp
@@ -103,26 +114,33 @@ files:
103
114
  - spec/spec_helper.rb
104
115
  homepage: ''
105
116
  licenses: []
106
- metadata: {}
107
117
  post_install_message:
108
118
  rdoc_options: []
109
119
  require_paths:
110
120
  - lib
111
121
  required_ruby_version: !ruby/object:Gem::Requirement
122
+ none: false
112
123
  requirements:
113
- - - '>='
124
+ - - ! '>='
114
125
  - !ruby/object:Gem::Version
115
126
  version: '0'
127
+ segments:
128
+ - 0
129
+ hash: -2505515257273957175
116
130
  required_rubygems_version: !ruby/object:Gem::Requirement
131
+ none: false
117
132
  requirements:
118
- - - '>='
133
+ - - ! '>='
119
134
  - !ruby/object:Gem::Version
120
135
  version: '0'
136
+ segments:
137
+ - 0
138
+ hash: -2505515257273957175
121
139
  requirements: []
122
140
  rubyforge_project:
123
- rubygems_version: 2.0.14
141
+ rubygems_version: 1.8.23
124
142
  signing_key:
125
- specification_version: 4
143
+ specification_version: 3
126
144
  summary: convert office 2010 files(docx,xlsx,pptx) to str
127
145
  test_files:
128
146
  - spec/officex2str_spec.rb
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: 994b2f27242952785bc69daaa22df68747df9f61
4
- data.tar.gz: efd05647dd536e2fe7babb358c57b45bed84bbe7
5
- SHA512:
6
- metadata.gz: 850e295a22e0cc7c83e3d7c93fac038df76b4955f8d6141f51b80bf9ec3f0ed2af22c74889c336db923b95fc728910d8ad8ed6a86a9abef7f567a4838ca508b9
7
- data.tar.gz: 6e2c8d08613ffa67389615326ef8ff40656f2e645f93c2a7db76f6c41332241c1dd8167b44ce4decba7c6a43b8cfe920ce1849b9a903dbc2bebfb2e69d1afb9a