officex2str 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/officex2str.rb +15 -27
- data/lib/officex2str/version.rb +1 -1
- data/officex2str.gemspec +2 -2
- data/spec/officex2str_spec.rb +3 -5
- data/spec/spec_helper.rb +1 -1
- metadata +36 -18
- checksums.yaml +0 -7
data/lib/officex2str.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'nokogiri'
|
2
|
-
require 'zipruby'
|
3
2
|
require 'mime/types'
|
3
|
+
require 'zip'
|
4
4
|
|
5
5
|
class Officex2str
|
6
6
|
class InvalidFileTypeError < Exception; end
|
@@ -10,7 +10,7 @@ class Officex2str
|
|
10
10
|
PPTX_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
11
11
|
VALID_CONTENT_TYPE = [DOCX_CONTENT_TYPE, XLSX_CONTENT_TYPE, PPTX_CONTENT_TYPE].freeze
|
12
12
|
|
13
|
-
attr_accessor :path, :content_type
|
13
|
+
attr_accessor :path, :content_type, :entries, :xmls
|
14
14
|
|
15
15
|
def self.convert(file_path)
|
16
16
|
self.new(file_path).convert
|
@@ -19,14 +19,14 @@ class Officex2str
|
|
19
19
|
def initialize(file_path)
|
20
20
|
@path = file_path
|
21
21
|
@content_type = MIME::Types.type_for(path).first.content_type
|
22
|
+
@entries = valid_file? ? Zip::File.open(path).entries : []
|
23
|
+
@xmls = []
|
22
24
|
end
|
23
25
|
|
24
26
|
def convert
|
25
27
|
if valid_file?
|
26
|
-
|
27
|
-
|
28
|
-
xmls = extract_xmls(pages)
|
29
|
-
xml_to_str(xmls)
|
28
|
+
extract_xmls
|
29
|
+
xml_to_str
|
30
30
|
else
|
31
31
|
raise InvalidFileTypeError, "Not recognized file type"
|
32
32
|
end
|
@@ -37,37 +37,25 @@ private
|
|
37
37
|
!!VALID_CONTENT_TYPE.include?(content_type)
|
38
38
|
end
|
39
39
|
|
40
|
-
def
|
40
|
+
def select_target_entries
|
41
41
|
case content_type
|
42
42
|
when DOCX_CONTENT_TYPE
|
43
|
-
|
43
|
+
entries.select{|a| /^word\/document/ =~ a.to_s}
|
44
44
|
when XLSX_CONTENT_TYPE
|
45
|
-
|
45
|
+
entries.select{|a| /^xl\/worksheets\/sheet/ =~ a.to_s or /^xl\/sharedStrings/ =~ a.to_s or /^xl\/comments/ =~ a.to_s }
|
46
46
|
when PPTX_CONTENT_TYPE
|
47
|
-
|
47
|
+
entries.select{|a| /^ppt\/slides\/slide/ =~ a.to_s}
|
48
48
|
else
|
49
49
|
raise InvalidContentTypeError, "Not recognized content type"
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
|
-
def extract_xmls
|
54
|
-
|
55
|
-
Zip::Archive.open(path) { |archive| pages.each{ |page| archive.fopen(page) { |f| xml_text << f.read } } }
|
56
|
-
xml_text
|
53
|
+
def extract_xmls
|
54
|
+
select_target_entries.map{|entry| xmls << entry.get_input_stream.read }
|
57
55
|
end
|
58
56
|
|
59
|
-
def xml_to_str
|
60
|
-
|
61
|
-
|
62
|
-
if content_type == XLSX_CONTENT_TYPE
|
63
|
-
xmls.map do |xml|
|
64
|
-
doc = Nokogiri.XML(xml.toutf8)
|
65
|
-
text += doc.search("t").map{|node| node.children.to_s}.join(' ')
|
66
|
-
end
|
67
|
-
else
|
68
|
-
xmls.each{|xml| text << Nokogiri.XML(xml.toutf8, nil, 'utf8').to_str }
|
69
|
-
end
|
70
|
-
end
|
71
|
-
text
|
57
|
+
def xml_to_str
|
58
|
+
return '' if xmls.empty?
|
59
|
+
xmls.inject(""){|result, xml| result << Nokogiri.XML(xml, nil, 'utf8').to_str }
|
72
60
|
end
|
73
61
|
end
|
data/lib/officex2str/version.rb
CHANGED
data/officex2str.gemspec
CHANGED
@@ -18,6 +18,6 @@ Gem::Specification.new do |gem|
|
|
18
18
|
gem.add_development_dependency "rake", ["= 0.9.2"]
|
19
19
|
gem.add_development_dependency "rspec", ["= 2.10.0"]
|
20
20
|
gem.add_runtime_dependency "nokogiri", [">= 1.4.7"]
|
21
|
-
gem.add_runtime_dependency "
|
22
|
-
gem.add_runtime_dependency "mime-types"
|
21
|
+
gem.add_runtime_dependency "rubyzip"
|
22
|
+
gem.add_runtime_dependency "mime-types"
|
23
23
|
end
|
data/spec/officex2str_spec.rb
CHANGED
@@ -24,10 +24,9 @@ describe Officex2str do
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
context "#
|
27
|
+
context "#select_target_entries" do
|
28
28
|
subject do
|
29
|
-
|
30
|
-
Officex2str.new(@file_path).send(:pickup_pages, archives).sort
|
29
|
+
Officex2str.new(@file_path).send(:select_target_entries).map{|entry| entry.to_s}.sort
|
31
30
|
end
|
32
31
|
context "extname is docx" do
|
33
32
|
before { @file_path = "fixtures/sample.docx" }
|
@@ -43,7 +42,6 @@ describe Officex2str do
|
|
43
42
|
before { @file_path = "fixtures/sample.pptx" }
|
44
43
|
it { subject.should == ["ppt/slides/slide1.xml", "ppt/slides/slide2.xml"] }
|
45
44
|
end
|
46
|
-
|
47
45
|
end
|
48
46
|
|
49
47
|
context "#convert" do
|
@@ -54,7 +52,7 @@ describe Officex2str do
|
|
54
52
|
before { @file_path = "fixtures/sample.xlsx" }
|
55
53
|
it do
|
56
54
|
subject.should include("複数シート対応")
|
57
|
-
subject.should include("
|
55
|
+
subject.should include("ソニックガーデン")
|
58
56
|
subject.should include("SONICGARDEN")
|
59
57
|
subject.should include("株式会社")
|
60
58
|
subject.should include("コメント")
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,18 +1,20 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: officex2str
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
8
|
- interu
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2014-
|
12
|
+
date: 2014-07-14 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: rake
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
16
18
|
requirements:
|
17
19
|
- - '='
|
18
20
|
- !ruby/object:Gem::Version
|
@@ -20,6 +22,7 @@ dependencies:
|
|
20
22
|
type: :development
|
21
23
|
prerelease: false
|
22
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
23
26
|
requirements:
|
24
27
|
- - '='
|
25
28
|
- !ruby/object:Gem::Version
|
@@ -27,6 +30,7 @@ dependencies:
|
|
27
30
|
- !ruby/object:Gem::Dependency
|
28
31
|
name: rspec
|
29
32
|
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
30
34
|
requirements:
|
31
35
|
- - '='
|
32
36
|
- !ruby/object:Gem::Version
|
@@ -34,6 +38,7 @@ dependencies:
|
|
34
38
|
type: :development
|
35
39
|
prerelease: false
|
36
40
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
37
42
|
requirements:
|
38
43
|
- - '='
|
39
44
|
- !ruby/object:Gem::Version
|
@@ -41,45 +46,51 @@ dependencies:
|
|
41
46
|
- !ruby/object:Gem::Dependency
|
42
47
|
name: nokogiri
|
43
48
|
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
44
50
|
requirements:
|
45
|
-
- - '>='
|
51
|
+
- - ! '>='
|
46
52
|
- !ruby/object:Gem::Version
|
47
53
|
version: 1.4.7
|
48
54
|
type: :runtime
|
49
55
|
prerelease: false
|
50
56
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
51
58
|
requirements:
|
52
|
-
- - '>='
|
59
|
+
- - ! '>='
|
53
60
|
- !ruby/object:Gem::Version
|
54
61
|
version: 1.4.7
|
55
62
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
63
|
+
name: rubyzip
|
57
64
|
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
58
66
|
requirements:
|
59
|
-
- - '
|
67
|
+
- - ! '>='
|
60
68
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0
|
69
|
+
version: '0'
|
62
70
|
type: :runtime
|
63
71
|
prerelease: false
|
64
72
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
65
74
|
requirements:
|
66
|
-
- - '
|
75
|
+
- - ! '>='
|
67
76
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0
|
77
|
+
version: '0'
|
69
78
|
- !ruby/object:Gem::Dependency
|
70
79
|
name: mime-types
|
71
80
|
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
72
82
|
requirements:
|
73
|
-
- - '
|
83
|
+
- - ! '>='
|
74
84
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
85
|
+
version: '0'
|
76
86
|
type: :runtime
|
77
87
|
prerelease: false
|
78
88
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
79
90
|
requirements:
|
80
|
-
- - '
|
91
|
+
- - ! '>='
|
81
92
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
93
|
+
version: '0'
|
83
94
|
description: convert office 2010 files to str
|
84
95
|
email:
|
85
96
|
- interu@sonicgarden.jp
|
@@ -103,26 +114,33 @@ files:
|
|
103
114
|
- spec/spec_helper.rb
|
104
115
|
homepage: ''
|
105
116
|
licenses: []
|
106
|
-
metadata: {}
|
107
117
|
post_install_message:
|
108
118
|
rdoc_options: []
|
109
119
|
require_paths:
|
110
120
|
- lib
|
111
121
|
required_ruby_version: !ruby/object:Gem::Requirement
|
122
|
+
none: false
|
112
123
|
requirements:
|
113
|
-
- - '>='
|
124
|
+
- - ! '>='
|
114
125
|
- !ruby/object:Gem::Version
|
115
126
|
version: '0'
|
127
|
+
segments:
|
128
|
+
- 0
|
129
|
+
hash: -2505515257273957175
|
116
130
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
131
|
+
none: false
|
117
132
|
requirements:
|
118
|
-
- - '>='
|
133
|
+
- - ! '>='
|
119
134
|
- !ruby/object:Gem::Version
|
120
135
|
version: '0'
|
136
|
+
segments:
|
137
|
+
- 0
|
138
|
+
hash: -2505515257273957175
|
121
139
|
requirements: []
|
122
140
|
rubyforge_project:
|
123
|
-
rubygems_version:
|
141
|
+
rubygems_version: 1.8.23
|
124
142
|
signing_key:
|
125
|
-
specification_version:
|
143
|
+
specification_version: 3
|
126
144
|
summary: convert office 2010 files(docx,xlsx,pptx) to str
|
127
145
|
test_files:
|
128
146
|
- spec/officex2str_spec.rb
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: 994b2f27242952785bc69daaa22df68747df9f61
|
4
|
-
data.tar.gz: efd05647dd536e2fe7babb358c57b45bed84bbe7
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: 850e295a22e0cc7c83e3d7c93fac038df76b4955f8d6141f51b80bf9ec3f0ed2af22c74889c336db923b95fc728910d8ad8ed6a86a9abef7f567a4838ca508b9
|
7
|
-
data.tar.gz: 6e2c8d08613ffa67389615326ef8ff40656f2e645f93c2a7db76f6c41332241c1dd8167b44ce4decba7c6a43b8cfe920ce1849b9a903dbc2bebfb2e69d1afb9a
|