officex2str 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in officex2str.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Teruo Adachi
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,22 @@
1
+ # Officex2str
2
+
3
+ Convert office 2010 files to string.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'officex2str'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install officex2str
18
+
19
+ ## Usage
20
+
21
+ Officex2str.convert("file_path")
22
+
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
Binary file
Binary file
Binary file
@@ -0,0 +1,3 @@
1
+ module Officex2str
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,38 @@
1
+ require 'nokogiri'
2
+ require 'zipruby'
3
+ #require "officex2str/version"
4
+
5
+ module Officex2str
6
+ def self.convert(file_path)
7
+ archives = Zip::Archive.open(file_path) { |archive| archive.map(&:name) }
8
+ pages = self.pickup_pages(File.extname(file_path), archives)
9
+ xmls = self.extract_xmls(file_path, pages)
10
+ self.xml_to_str(xmls)
11
+ end
12
+
13
+ private
14
+ def self.pickup_pages extname, archives
15
+ case extname
16
+ when ".docx"
17
+ archives.select{|a| /^word\/document/ =~ a}
18
+ when ".xlsx"
19
+ archives.select{|a| /^xl\/worksheets\/sheet/ =~ a or /^xl\/sharedStrings/ =~ a or /^xl\/comments/ =~ a }
20
+ when ".pptx"
21
+ archives.select{|a| /^ppt\/slides\/slide/ =~ a}
22
+ else
23
+ nil
24
+ end
25
+ end
26
+
27
+ def self.extract_xmls file_path, pages
28
+ xml_text = []
29
+ Zip::Archive.open(file_path) { |archive| pages.each{ |page| archive.fopen(page) do |f| xml_text << f.read end; } }
30
+ xml_text
31
+ end
32
+
33
+ def self.xml_to_str xml_text
34
+ text = ""
35
+ xml_text.each{|xml_t| text << Nokogiri.XML(xml_t.toutf8, nil, 'utf8').to_str } unless xml_text.empty?
36
+ text
37
+ end
38
+ end
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/officex2str/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["interu"]
6
+ gem.email = ["interu@sonicgarden.jp"]
7
+ gem.description = %q{convert office 2010 files to str}
8
+ gem.summary = %q{convert office 2010 files(docx,xlsx,pptx) to str}
9
+ gem.homepage = ""
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "officex2str"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = Officex2str::VERSION
17
+
18
+ gem.add_development_dependency "rake", ["= 0.9.2"]
19
+ gem.add_development_dependency "nokogiri", [">= 1.4.7"]
20
+ gem.add_development_dependency "zipruby", ["= 0.3.6"]
21
+
22
+ end
@@ -0,0 +1,84 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe Officex2str do
5
+ context "#pickup_pages" do
6
+ subject do
7
+ archives = Zip::Archive.open(@file_path) { |archive| archive.map(&:name) }
8
+ Officex2str.send(:pickup_pages, File.extname(@file_path), archives).sort
9
+ end
10
+ context "extname is docx" do
11
+ before do
12
+ @file_path = "fixtures/sample.docx"
13
+ end
14
+ it { subject.should == ["word/document.xml"] }
15
+ end
16
+
17
+ context "extname is xlsx" do
18
+ before do
19
+ @file_path = "fixtures/sample.xlsx"
20
+ end
21
+ it { subject.should == ["xl/comments1.xml", "xl/sharedStrings.xml", "xl/worksheets/sheet1.xml", "xl/worksheets/sheet2.xml"] }
22
+ end
23
+
24
+ context "extname is pptx" do
25
+ before do
26
+ @file_path = "fixtures/sample.pptx"
27
+ end
28
+ it { subject.should == ["ppt/slides/slide1.xml", "ppt/slides/slide2.xml"] }
29
+ end
30
+ end
31
+
32
+ context "#convert" do
33
+ subject do
34
+ archives = Zip::Archive.open(@file_path) { |archive| archive.map(&:name) }
35
+ #pages = Officex2str.pickup_pages(File.extname(@file_path), archives)
36
+ pages = Officex2str.send(:pickup_pages, File.extname(@file_path), archives)
37
+ xmls = Officex2str.send(:extract_xmls, @file_path, pages)
38
+ Officex2str.convert(@file_path)
39
+ end
40
+ context "extname is xlsx" do
41
+ before do
42
+ @file_path = "fixtures/sample.xlsx"
43
+ end
44
+ it do
45
+ subject.should include("複数シート対応")
46
+ subject.should include("ソニックガーデン")
47
+ subject.should include("SONICGARDEN")
48
+ subject.should include("株式会社")
49
+ subject.should include("コメント")
50
+ subject.should_not include("sonicgarden")
51
+ subject.should_not include("sheet")
52
+ end
53
+ end
54
+
55
+ context "extname is docx" do
56
+ before do
57
+ @file_path = "fixtures/sample.docx"
58
+ end
59
+ it do
60
+ subject.should include("複数ページ対応")
61
+ subject.should include("ソニックガーデン")
62
+ subject.should include("テキストボックス")
63
+ subject.should_not include("sonicgarden")
64
+ subject.should_not include("sheet")
65
+ end
66
+ end
67
+
68
+ context "extname is pptx" do
69
+ before do
70
+ @file_path = "fixtures/sample.pptx"
71
+ end
72
+ it do
73
+ subject.should include("Aタイトル")
74
+ subject.should include("Aサブタイトル")
75
+ subject.should include("タイトルB")
76
+ subject.should include("テキストB")
77
+ subject.should include("テキストボックスB")
78
+ subject.should_not include("sonicgarden")
79
+ subject.should_not include("sheet")
80
+ end
81
+ end
82
+
83
+ end
84
+ end
@@ -0,0 +1,17 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+
4
+ require "rubygems"
5
+ require 'officex2str'
6
+ require "rspec"
7
+ require 'nokogiri'
8
+ require 'zipruby'
9
+ require 'kconv'
10
+
11
+ # Requires supporting files with custom matchers and macros, etc,
12
+ # in ./support/ and its subdirectories.
13
+ #Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
14
+
15
+ RSpec.configure do |config|
16
+
17
+ end
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: officex2str
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - interu
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2012-07-18 00:00:00 +09:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rake
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - "="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ - 9
31
+ - 2
32
+ version: 0.9.2
33
+ type: :development
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: nokogiri
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ segments:
44
+ - 1
45
+ - 4
46
+ - 7
47
+ version: 1.4.7
48
+ type: :development
49
+ version_requirements: *id002
50
+ - !ruby/object:Gem::Dependency
51
+ name: zipruby
52
+ prerelease: false
53
+ requirement: &id003 !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - "="
57
+ - !ruby/object:Gem::Version
58
+ segments:
59
+ - 0
60
+ - 3
61
+ - 6
62
+ version: 0.3.6
63
+ type: :development
64
+ version_requirements: *id003
65
+ description: convert office 2010 files to str
66
+ email:
67
+ - interu@sonicgarden.jp
68
+ executables: []
69
+
70
+ extensions: []
71
+
72
+ extra_rdoc_files: []
73
+
74
+ files:
75
+ - .gitignore
76
+ - Gemfile
77
+ - LICENSE
78
+ - README.md
79
+ - Rakefile
80
+ - fixtures/sample.docx
81
+ - fixtures/sample.pptx
82
+ - fixtures/sample.xlsx
83
+ - lib/officex2str.rb
84
+ - lib/officex2str/version.rb
85
+ - officex2str.gemspec
86
+ - spec/officex2str_spec.rb
87
+ - spec/spec_helper.rb
88
+ has_rdoc: true
89
+ homepage: ""
90
+ licenses: []
91
+
92
+ post_install_message:
93
+ rdoc_options: []
94
+
95
+ require_paths:
96
+ - lib
97
+ required_ruby_version: !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ segments:
103
+ - 0
104
+ version: "0"
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ segments:
111
+ - 0
112
+ version: "0"
113
+ requirements: []
114
+
115
+ rubyforge_project:
116
+ rubygems_version: 1.3.7
117
+ signing_key:
118
+ specification_version: 3
119
+ summary: convert office 2010 files(docx,xlsx,pptx) to str
120
+ test_files:
121
+ - spec/officex2str_spec.rb
122
+ - spec/spec_helper.rb