officex2str 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/officex2str.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'nokogiri'
2
- require 'zipruby'
3
2
  require 'mime/types'
3
+ require 'zip'
4
4
 
5
5
  class Officex2str
6
6
  class InvalidFileTypeError < Exception; end
@@ -10,7 +10,7 @@ class Officex2str
10
10
  PPTX_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
11
11
  VALID_CONTENT_TYPE = [DOCX_CONTENT_TYPE, XLSX_CONTENT_TYPE, PPTX_CONTENT_TYPE].freeze
12
12
 
13
- attr_accessor :path, :content_type
13
+ attr_accessor :path, :content_type, :entries, :xmls
14
14
 
15
15
  def self.convert(file_path)
16
16
  self.new(file_path).convert
@@ -19,14 +19,14 @@ class Officex2str
19
19
  def initialize(file_path)
20
20
  @path = file_path
21
21
  @content_type = MIME::Types.type_for(path).first.content_type
22
+ @entries = valid_file? ? Zip::File.open(path).entries : []
23
+ @xmls = []
22
24
  end
23
25
 
24
26
  def convert
25
27
  if valid_file?
26
- archives = Zip::Archive.open(path) { |archive| archive.map(&:name) }
27
- pages = pickup_pages(archives)
28
- xmls = extract_xmls(pages)
29
- xml_to_str(xmls)
28
+ extract_xmls
29
+ xml_to_str
30
30
  else
31
31
  raise InvalidFileTypeError, "Not recognized file type"
32
32
  end
@@ -37,37 +37,25 @@ private
37
37
  !!VALID_CONTENT_TYPE.include?(content_type)
38
38
  end
39
39
 
40
- def pickup_pages archives
40
+ def select_target_entries
41
41
  case content_type
42
42
  when DOCX_CONTENT_TYPE
43
- archives.select{|a| /^word\/document/ =~ a}
43
+ entries.select{|a| /^word\/document/ =~ a.to_s}
44
44
  when XLSX_CONTENT_TYPE
45
- archives.select{|a| /^xl\/worksheets\/sheet/ =~ a or /^xl\/sharedStrings/ =~ a or /^xl\/comments/ =~ a }
45
+ entries.select{|a| /^xl\/worksheets\/sheet/ =~ a.to_s or /^xl\/sharedStrings/ =~ a.to_s or /^xl\/comments/ =~ a.to_s }
46
46
  when PPTX_CONTENT_TYPE
47
- archives.select{|a| /^ppt\/slides\/slide/ =~ a}
47
+ entries.select{|a| /^ppt\/slides\/slide/ =~ a.to_s}
48
48
  else
49
49
  raise InvalidContentTypeError, "Not recognized content type"
50
50
  end
51
51
  end
52
52
 
53
- def extract_xmls pages
54
- xml_text = []
55
- Zip::Archive.open(path) { |archive| pages.each{ |page| archive.fopen(page) { |f| xml_text << f.read } } }
56
- xml_text
53
+ def extract_xmls
54
+ select_target_entries.map{|entry| xmls << entry.get_input_stream.read }
57
55
  end
58
56
 
59
- def xml_to_str xmls
60
- text = ""
61
- unless xmls.empty?
62
- if content_type == XLSX_CONTENT_TYPE
63
- xmls.map do |xml|
64
- doc = Nokogiri.XML(xml.toutf8)
65
- text += doc.search("t").map{|node| node.children.to_s}.join(' ')
66
- end
67
- else
68
- xmls.each{|xml| text << Nokogiri.XML(xml.toutf8, nil, 'utf8').to_str }
69
- end
70
- end
71
- text
57
+ def xml_to_str
58
+ return '' if xmls.empty?
59
+ xmls.inject(""){|result, xml| result << Nokogiri.XML(xml, nil, 'utf8').to_str }
72
60
  end
73
61
  end
@@ -1,3 +1,3 @@
1
1
  class Officex2str
2
- VERSION = "0.0.8"
2
+ VERSION = "0.0.9"
3
3
  end
data/officex2str.gemspec CHANGED
@@ -18,6 +18,6 @@ Gem::Specification.new do |gem|
18
18
  gem.add_development_dependency "rake", ["= 0.9.2"]
19
19
  gem.add_development_dependency "rspec", ["= 2.10.0"]
20
20
  gem.add_runtime_dependency "nokogiri", [">= 1.4.7"]
21
- gem.add_runtime_dependency "zipruby", ["= 0.3.6"]
22
- gem.add_runtime_dependency "mime-types", ["= 2.1"]
21
+ gem.add_runtime_dependency "rubyzip"
22
+ gem.add_runtime_dependency "mime-types"
23
23
  end
@@ -24,10 +24,9 @@ describe Officex2str do
24
24
  end
25
25
  end
26
26
 
27
- context "#pickup_pages" do
27
+ context "#select_target_entries" do
28
28
  subject do
29
- archives = Zip::Archive.open(@file_path) { |archive| archive.map(&:name) }
30
- Officex2str.new(@file_path).send(:pickup_pages, archives).sort
29
+ Officex2str.new(@file_path).send(:select_target_entries).map{|entry| entry.to_s}.sort
31
30
  end
32
31
  context "extname is docx" do
33
32
  before { @file_path = "fixtures/sample.docx" }
@@ -43,7 +42,6 @@ describe Officex2str do
43
42
  before { @file_path = "fixtures/sample.pptx" }
44
43
  it { subject.should == ["ppt/slides/slide1.xml", "ppt/slides/slide2.xml"] }
45
44
  end
46
-
47
45
  end
48
46
 
49
47
  context "#convert" do
@@ -54,7 +52,7 @@ describe Officex2str do
54
52
  before { @file_path = "fixtures/sample.xlsx" }
55
53
  it do
56
54
  subject.should include("複数シート対応")
57
- subject.should include("ソニックガーデン")
55
+ subject.should include("ソニックガーデン")
58
56
  subject.should include("SONICGARDEN")
59
57
  subject.should include("株式会社")
60
58
  subject.should include("コメント")
data/spec/spec_helper.rb CHANGED
@@ -5,7 +5,7 @@ require 'officex2str'
5
5
  require "rspec"
6
6
  require "rspec/core"
7
7
  require 'nokogiri'
8
- require 'zipruby'
8
+ require 'zip'
9
9
  require 'kconv'
10
10
 
11
11
  # Requires supporting files with custom matchers and macros, etc,
metadata CHANGED
@@ -1,18 +1,20 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: officex2str
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - interu
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2014-03-10 00:00:00.000000000 Z
12
+ date: 2014-07-14 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: rake
15
16
  requirement: !ruby/object:Gem::Requirement
17
+ none: false
16
18
  requirements:
17
19
  - - '='
18
20
  - !ruby/object:Gem::Version
@@ -20,6 +22,7 @@ dependencies:
20
22
  type: :development
21
23
  prerelease: false
22
24
  version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
23
26
  requirements:
24
27
  - - '='
25
28
  - !ruby/object:Gem::Version
@@ -27,6 +30,7 @@ dependencies:
27
30
  - !ruby/object:Gem::Dependency
28
31
  name: rspec
29
32
  requirement: !ruby/object:Gem::Requirement
33
+ none: false
30
34
  requirements:
31
35
  - - '='
32
36
  - !ruby/object:Gem::Version
@@ -34,6 +38,7 @@ dependencies:
34
38
  type: :development
35
39
  prerelease: false
36
40
  version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
37
42
  requirements:
38
43
  - - '='
39
44
  - !ruby/object:Gem::Version
@@ -41,45 +46,51 @@ dependencies:
41
46
  - !ruby/object:Gem::Dependency
42
47
  name: nokogiri
43
48
  requirement: !ruby/object:Gem::Requirement
49
+ none: false
44
50
  requirements:
45
- - - '>='
51
+ - - ! '>='
46
52
  - !ruby/object:Gem::Version
47
53
  version: 1.4.7
48
54
  type: :runtime
49
55
  prerelease: false
50
56
  version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
51
58
  requirements:
52
- - - '>='
59
+ - - ! '>='
53
60
  - !ruby/object:Gem::Version
54
61
  version: 1.4.7
55
62
  - !ruby/object:Gem::Dependency
56
- name: zipruby
63
+ name: rubyzip
57
64
  requirement: !ruby/object:Gem::Requirement
65
+ none: false
58
66
  requirements:
59
- - - '='
67
+ - - ! '>='
60
68
  - !ruby/object:Gem::Version
61
- version: 0.3.6
69
+ version: '0'
62
70
  type: :runtime
63
71
  prerelease: false
64
72
  version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
65
74
  requirements:
66
- - - '='
75
+ - - ! '>='
67
76
  - !ruby/object:Gem::Version
68
- version: 0.3.6
77
+ version: '0'
69
78
  - !ruby/object:Gem::Dependency
70
79
  name: mime-types
71
80
  requirement: !ruby/object:Gem::Requirement
81
+ none: false
72
82
  requirements:
73
- - - '='
83
+ - - ! '>='
74
84
  - !ruby/object:Gem::Version
75
- version: '2.1'
85
+ version: '0'
76
86
  type: :runtime
77
87
  prerelease: false
78
88
  version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
79
90
  requirements:
80
- - - '='
91
+ - - ! '>='
81
92
  - !ruby/object:Gem::Version
82
- version: '2.1'
93
+ version: '0'
83
94
  description: convert office 2010 files to str
84
95
  email:
85
96
  - interu@sonicgarden.jp
@@ -103,26 +114,33 @@ files:
103
114
  - spec/spec_helper.rb
104
115
  homepage: ''
105
116
  licenses: []
106
- metadata: {}
107
117
  post_install_message:
108
118
  rdoc_options: []
109
119
  require_paths:
110
120
  - lib
111
121
  required_ruby_version: !ruby/object:Gem::Requirement
122
+ none: false
112
123
  requirements:
113
- - - '>='
124
+ - - ! '>='
114
125
  - !ruby/object:Gem::Version
115
126
  version: '0'
127
+ segments:
128
+ - 0
129
+ hash: -2505515257273957175
116
130
  required_rubygems_version: !ruby/object:Gem::Requirement
131
+ none: false
117
132
  requirements:
118
- - - '>='
133
+ - - ! '>='
119
134
  - !ruby/object:Gem::Version
120
135
  version: '0'
136
+ segments:
137
+ - 0
138
+ hash: -2505515257273957175
121
139
  requirements: []
122
140
  rubyforge_project:
123
- rubygems_version: 2.0.14
141
+ rubygems_version: 1.8.23
124
142
  signing_key:
125
- specification_version: 4
143
+ specification_version: 3
126
144
  summary: convert office 2010 files(docx,xlsx,pptx) to str
127
145
  test_files:
128
146
  - spec/officex2str_spec.rb
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: 994b2f27242952785bc69daaa22df68747df9f61
4
- data.tar.gz: efd05647dd536e2fe7babb358c57b45bed84bbe7
5
- SHA512:
6
- metadata.gz: 850e295a22e0cc7c83e3d7c93fac038df76b4955f8d6141f51b80bf9ec3f0ed2af22c74889c336db923b95fc728910d8ad8ed6a86a9abef7f567a4838ca508b9
7
- data.tar.gz: 6e2c8d08613ffa67389615326ef8ff40656f2e645f93c2a7db76f6c41332241c1dd8167b44ce4decba7c6a43b8cfe920ce1849b9a903dbc2bebfb2e69d1afb9a