officex2str 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in officex2str.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Teruo Adachi
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,22 @@
1
+ # Officex2str
2
+
3
+ Convert office 2010 files to string.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'officex2str'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install officex2str
18
+
19
+ ## Usage
20
+
21
+ Officex2str.convert("file_path")
22
+
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
Binary file
Binary file
Binary file
@@ -0,0 +1,3 @@
1
+ module Officex2str
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,38 @@
1
+ require 'nokogiri'
2
+ require 'zipruby'
3
+ #require "officex2str/version"
4
+
5
+ module Officex2str
6
+ def self.convert(file_path)
7
+ archives = Zip::Archive.open(file_path) { |archive| archive.map(&:name) }
8
+ pages = self.pickup_pages(File.extname(file_path), archives)
9
+ xmls = self.extract_xmls(file_path, pages)
10
+ self.xml_to_str(xmls)
11
+ end
12
+
13
+ private
14
+ def self.pickup_pages extname, archives
15
+ case extname
16
+ when ".docx"
17
+ archives.select{|a| /^word\/document/ =~ a}
18
+ when ".xlsx"
19
+ archives.select{|a| /^xl\/worksheets\/sheet/ =~ a or /^xl\/sharedStrings/ =~ a or /^xl\/comments/ =~ a }
20
+ when ".pptx"
21
+ archives.select{|a| /^ppt\/slides\/slide/ =~ a}
22
+ else
23
+ nil
24
+ end
25
+ end
26
+
27
+ def self.extract_xmls file_path, pages
28
+ xml_text = []
29
+ Zip::Archive.open(file_path) { |archive| pages.each{ |page| archive.fopen(page) do |f| xml_text << f.read end; } }
30
+ xml_text
31
+ end
32
+
33
+ def self.xml_to_str xml_text
34
+ text = ""
35
+ xml_text.each{|xml_t| text << Nokogiri.XML(xml_t.toutf8, nil, 'utf8').to_str } unless xml_text.empty?
36
+ text
37
+ end
38
+ end
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/officex2str/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["interu"]
6
+ gem.email = ["interu@sonicgarden.jp"]
7
+ gem.description = %q{convert office 2010 files to str}
8
+ gem.summary = %q{convert office 2010 files(docx,xlsx,pptx) to str}
9
+ gem.homepage = ""
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "officex2str"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = Officex2str::VERSION
17
+
18
+ gem.add_development_dependency "rake", ["= 0.9.2"]
19
+ gem.add_development_dependency "nokogiri", [">= 1.4.7"]
20
+ gem.add_development_dependency "zipruby", ["= 0.3.6"]
21
+
22
+ end
@@ -0,0 +1,84 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe Officex2str do
5
+ context "#pickup_pages" do
6
+ subject do
7
+ archives = Zip::Archive.open(@file_path) { |archive| archive.map(&:name) }
8
+ Officex2str.send(:pickup_pages, File.extname(@file_path), archives).sort
9
+ end
10
+ context "extname is docx" do
11
+ before do
12
+ @file_path = "fixtures/sample.docx"
13
+ end
14
+ it { subject.should == ["word/document.xml"] }
15
+ end
16
+
17
+ context "extname is xlsx" do
18
+ before do
19
+ @file_path = "fixtures/sample.xlsx"
20
+ end
21
+ it { subject.should == ["xl/comments1.xml", "xl/sharedStrings.xml", "xl/worksheets/sheet1.xml", "xl/worksheets/sheet2.xml"] }
22
+ end
23
+
24
+ context "extname is pptx" do
25
+ before do
26
+ @file_path = "fixtures/sample.pptx"
27
+ end
28
+ it { subject.should == ["ppt/slides/slide1.xml", "ppt/slides/slide2.xml"] }
29
+ end
30
+ end
31
+
32
+ context "#convert" do
33
+ subject do
34
+ archives = Zip::Archive.open(@file_path) { |archive| archive.map(&:name) }
35
+ #pages = Officex2str.pickup_pages(File.extname(@file_path), archives)
36
+ pages = Officex2str.send(:pickup_pages, File.extname(@file_path), archives)
37
+ xmls = Officex2str.send(:extract_xmls, @file_path, pages)
38
+ Officex2str.convert(@file_path)
39
+ end
40
+ context "extname is xlsx" do
41
+ before do
42
+ @file_path = "fixtures/sample.xlsx"
43
+ end
44
+ it do
45
+ subject.should include("複数シート対応")
46
+ subject.should include("ソニックガーデン")
47
+ subject.should include("SONICGARDEN")
48
+ subject.should include("株式会社")
49
+ subject.should include("コメント")
50
+ subject.should_not include("sonicgarden")
51
+ subject.should_not include("sheet")
52
+ end
53
+ end
54
+
55
+ context "extname is docx" do
56
+ before do
57
+ @file_path = "fixtures/sample.docx"
58
+ end
59
+ it do
60
+ subject.should include("複数ページ対応")
61
+ subject.should include("ソニックガーデン")
62
+ subject.should include("テキストボックス")
63
+ subject.should_not include("sonicgarden")
64
+ subject.should_not include("sheet")
65
+ end
66
+ end
67
+
68
+ context "extname is pptx" do
69
+ before do
70
+ @file_path = "fixtures/sample.pptx"
71
+ end
72
+ it do
73
+ subject.should include("Aタイトル")
74
+ subject.should include("Aサブタイトル")
75
+ subject.should include("タイトルB")
76
+ subject.should include("テキストB")
77
+ subject.should include("テキストボックスB")
78
+ subject.should_not include("sonicgarden")
79
+ subject.should_not include("sheet")
80
+ end
81
+ end
82
+
83
+ end
84
+ end
@@ -0,0 +1,17 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+
4
+ require "rubygems"
5
+ require 'officex2str'
6
+ require "rspec"
7
+ require 'nokogiri'
8
+ require 'zipruby'
9
+ require 'kconv'
10
+
11
+ # Requires supporting files with custom matchers and macros, etc,
12
+ # in ./support/ and its subdirectories.
13
+ #Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
14
+
15
+ RSpec.configure do |config|
16
+
17
+ end
metadata ADDED
@@ -0,0 +1,122 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: officex2str
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 1
9
+ version: 0.0.1
10
+ platform: ruby
11
+ authors:
12
+ - interu
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2012-07-18 00:00:00 +09:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rake
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - "="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ - 9
31
+ - 2
32
+ version: 0.9.2
33
+ type: :development
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: nokogiri
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ segments:
44
+ - 1
45
+ - 4
46
+ - 7
47
+ version: 1.4.7
48
+ type: :development
49
+ version_requirements: *id002
50
+ - !ruby/object:Gem::Dependency
51
+ name: zipruby
52
+ prerelease: false
53
+ requirement: &id003 !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - "="
57
+ - !ruby/object:Gem::Version
58
+ segments:
59
+ - 0
60
+ - 3
61
+ - 6
62
+ version: 0.3.6
63
+ type: :development
64
+ version_requirements: *id003
65
+ description: convert office 2010 files to str
66
+ email:
67
+ - interu@sonicgarden.jp
68
+ executables: []
69
+
70
+ extensions: []
71
+
72
+ extra_rdoc_files: []
73
+
74
+ files:
75
+ - .gitignore
76
+ - Gemfile
77
+ - LICENSE
78
+ - README.md
79
+ - Rakefile
80
+ - fixtures/sample.docx
81
+ - fixtures/sample.pptx
82
+ - fixtures/sample.xlsx
83
+ - lib/officex2str.rb
84
+ - lib/officex2str/version.rb
85
+ - officex2str.gemspec
86
+ - spec/officex2str_spec.rb
87
+ - spec/spec_helper.rb
88
+ has_rdoc: true
89
+ homepage: ""
90
+ licenses: []
91
+
92
+ post_install_message:
93
+ rdoc_options: []
94
+
95
+ require_paths:
96
+ - lib
97
+ required_ruby_version: !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ segments:
103
+ - 0
104
+ version: "0"
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ segments:
111
+ - 0
112
+ version: "0"
113
+ requirements: []
114
+
115
+ rubyforge_project:
116
+ rubygems_version: 1.3.7
117
+ signing_key:
118
+ specification_version: 3
119
+ summary: convert office 2010 files(docx,xlsx,pptx) to str
120
+ test_files:
121
+ - spec/officex2str_spec.rb
122
+ - spec/spec_helper.rb