doc_image_extract 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/doc_image_extract.gemspec +1 -2
- data/lib/doc_image_extract.rb +2 -1
- data/lib/doc_image_extract/document.rb +12 -10
- data/lib/doc_image_extract/exporter.rb +4 -2
- data/lib/doc_image_extract/picture.rb +10 -0
- data/lib/doc_image_extract/reader.rb +2 -0
- data/lib/doc_image_extract/reader/hwpf.rb +26 -0
- data/lib/doc_image_extract/reader/xwpf.rb +26 -0
- data/lib/jars/ooxml-lib/dom4j-1.6.1.jar +0 -0
- data/lib/jars/ooxml-lib/stax-api-1.0.1.jar +0 -0
- data/lib/jars/ooxml-lib/xmlbeans-2.3.0.jar +0 -0
- data/lib/jars/{poi-3.9-20121203.jar → poi-3.10-FINAL-20140208.jar} +0 -0
- data/lib/jars/poi-ooxml-3.10-FINAL-20140208.jar +0 -0
- data/lib/jars/poi-ooxml-schemas-3.10-FINAL-20140208.jar +0 -0
- data/lib/jars/{poi-scratchpad-3.9-20121203.jar → poi-scratchpad-3.10-FINAL-20140208.jar} +0 -0
- data/lib/poi.rb +8 -2
- metadata +33 -25
- data/lib/doc_image_extract/version.rb +0 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 24f47e46b7193367b1bc79efbdffb62f212b13a0
|
4
|
+
data.tar.gz: 35ae2d5a3b4ef9f7592ceabb358f53406f651376
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 84db9d82b9ac6a3fefdcc2fa080c71d9175e8e36788db03279f5bca6b6f77bef65a3ad23d923e7c2849a7a348079ed28381690dc2dd4741af284b32e4ffed7cb
|
7
|
+
data.tar.gz: 8378fbfb6a5e586c9d119e5a107e5c03aa66efff1c0ec8a704239b3731bbc9b32c37ae49b9207b5fa19773a355b71dcf9d3b64531aab2357d40f06dbe85cbabe
|
data/doc_image_extract.gemspec
CHANGED
@@ -1,11 +1,10 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
lib = File.expand_path('../lib', __FILE__)
|
3
3
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require 'doc_image_extract/version'
|
5
4
|
|
6
5
|
Gem::Specification.new do |spec|
|
7
6
|
spec.name = "doc_image_extract"
|
8
|
-
spec.version =
|
7
|
+
spec.version = "0.0.2"
|
9
8
|
spec.authors = ["Eric Barendt"]
|
10
9
|
spec.email = ["ebarendt@gmail.com"]
|
11
10
|
spec.summary = %q{Extracts all images from a given Word document.}
|
data/lib/doc_image_extract.rb
CHANGED
@@ -1,21 +1,23 @@
|
|
1
|
-
require "poi"
|
2
|
-
|
3
1
|
module DocImageExtract
|
4
2
|
class Document
|
5
|
-
include Java
|
6
|
-
java_import "org.apache.poi.hwpf.HWPFDocument"
|
7
|
-
java_import "java.io.FileInputStream"
|
8
|
-
|
9
3
|
attr_reader :file
|
10
4
|
|
11
5
|
def initialize file
|
12
6
|
@file = file
|
13
7
|
end
|
14
8
|
|
9
|
+
def reader
|
10
|
+
@reader ||= begin
|
11
|
+
if file =~ /docx\Z/
|
12
|
+
Reader::Xwpf.new file
|
13
|
+
else
|
14
|
+
Reader::Hwpf.new file
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
15
19
|
def pictures
|
16
|
-
|
17
|
-
pictures_table = document.get_pictures_table
|
18
|
-
pictures_table.get_all_pictures
|
20
|
+
reader.pictures
|
19
21
|
end
|
20
22
|
end
|
21
|
-
end
|
23
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require "poi"
|
2
|
+
|
3
|
+
module DocImageExtract
|
4
|
+
module Reader
|
5
|
+
class Hwpf
|
6
|
+
java_import java.io.FileInputStream
|
7
|
+
java_import org.apache.poi.hwpf.HWPFDocument
|
8
|
+
|
9
|
+
attr_reader :file
|
10
|
+
|
11
|
+
def initialize(file)
|
12
|
+
@file = file
|
13
|
+
end
|
14
|
+
|
15
|
+
def pictures
|
16
|
+
document = HWPFDocument.new FileInputStream.new file
|
17
|
+
pictures_table = document.get_pictures_table
|
18
|
+
pictures_table.get_all_pictures.map do |picture|
|
19
|
+
filename = picture.suggest_full_file_name
|
20
|
+
data = picture.get_content
|
21
|
+
DocImageExtract::Picture.new filename, data
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require "poi"
|
2
|
+
|
3
|
+
module DocImageExtract
|
4
|
+
module Reader
|
5
|
+
class Xwpf
|
6
|
+
java_import java.io.FileInputStream
|
7
|
+
java_import org.apache.poi.xwpf.usermodel.XWPFDocument
|
8
|
+
|
9
|
+
attr_reader :file
|
10
|
+
|
11
|
+
def initialize(file)
|
12
|
+
@file = file
|
13
|
+
end
|
14
|
+
|
15
|
+
def pictures
|
16
|
+
document = XWPFDocument.new FileInputStream.new file
|
17
|
+
pictures = document.get_all_pictures
|
18
|
+
pictures.map do |picture|
|
19
|
+
filename = picture.get_file_name
|
20
|
+
data = picture.get_data
|
21
|
+
Picture.new filename, data
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/poi.rb
CHANGED
@@ -1,2 +1,8 @@
|
|
1
|
-
require "
|
2
|
-
require "jars/poi-
|
1
|
+
require "java"
|
2
|
+
require "jars/poi-3.10-FINAL-20140208.jar"
|
3
|
+
require "jars/poi-scratchpad-3.10-FINAL-20140208.jar"
|
4
|
+
require "jars/poi-ooxml-3.10-FINAL-20140208.jar"
|
5
|
+
require "jars/poi-ooxml-schemas-3.10-FINAL-20140208.jar"
|
6
|
+
require "jars/ooxml-lib/dom4j-1.6.1.jar"
|
7
|
+
require "jars/ooxml-lib/stax-api-1.0.1.jar"
|
8
|
+
require "jars/ooxml-lib/xmlbeans-2.3.0.jar"
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: doc_image_extract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Barendt
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-05-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
|
-
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - '>='
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
|
-
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
23
21
|
requirements:
|
24
|
-
- -
|
22
|
+
- - '>='
|
25
23
|
- !ruby/object:Gem::Version
|
26
24
|
version: '0'
|
25
|
+
prerelease: false
|
26
|
+
type: :development
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
|
-
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - '>='
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
|
-
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
37
35
|
requirements:
|
38
|
-
- -
|
36
|
+
- - '>='
|
39
37
|
- !ruby/object:Gem::Version
|
40
38
|
version: '0'
|
39
|
+
prerelease: false
|
40
|
+
type: :development
|
41
41
|
description: Extracts all images from a given Word document.
|
42
42
|
email:
|
43
43
|
- ebarendt@gmail.com
|
@@ -46,7 +46,7 @@ executables:
|
|
46
46
|
extensions: []
|
47
47
|
extra_rdoc_files: []
|
48
48
|
files:
|
49
|
-
-
|
49
|
+
- .gitignore
|
50
50
|
- Gemfile
|
51
51
|
- LICENSE.txt
|
52
52
|
- README.md
|
@@ -56,32 +56,40 @@ files:
|
|
56
56
|
- lib/doc_image_extract.rb
|
57
57
|
- lib/doc_image_extract/document.rb
|
58
58
|
- lib/doc_image_extract/exporter.rb
|
59
|
-
- lib/doc_image_extract/
|
60
|
-
- lib/
|
61
|
-
- lib/
|
59
|
+
- lib/doc_image_extract/picture.rb
|
60
|
+
- lib/doc_image_extract/reader.rb
|
61
|
+
- lib/doc_image_extract/reader/hwpf.rb
|
62
|
+
- lib/doc_image_extract/reader/xwpf.rb
|
63
|
+
- lib/jars/ooxml-lib/dom4j-1.6.1.jar
|
64
|
+
- lib/jars/ooxml-lib/stax-api-1.0.1.jar
|
65
|
+
- lib/jars/ooxml-lib/xmlbeans-2.3.0.jar
|
66
|
+
- lib/jars/poi-3.10-FINAL-20140208.jar
|
67
|
+
- lib/jars/poi-ooxml-3.10-FINAL-20140208.jar
|
68
|
+
- lib/jars/poi-ooxml-schemas-3.10-FINAL-20140208.jar
|
69
|
+
- lib/jars/poi-scratchpad-3.10-FINAL-20140208.jar
|
62
70
|
- lib/poi.rb
|
63
71
|
homepage: ''
|
64
72
|
licenses:
|
65
73
|
- MIT
|
66
74
|
metadata: {}
|
67
|
-
post_install_message:
|
75
|
+
post_install_message:
|
68
76
|
rdoc_options: []
|
69
77
|
require_paths:
|
70
78
|
- lib
|
71
79
|
required_ruby_version: !ruby/object:Gem::Requirement
|
72
80
|
requirements:
|
73
|
-
- -
|
81
|
+
- - '>='
|
74
82
|
- !ruby/object:Gem::Version
|
75
83
|
version: '0'
|
76
84
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
85
|
requirements:
|
78
|
-
- -
|
86
|
+
- - '>='
|
79
87
|
- !ruby/object:Gem::Version
|
80
88
|
version: '0'
|
81
89
|
requirements: []
|
82
|
-
rubyforge_project:
|
83
|
-
rubygems_version: 2.
|
84
|
-
signing_key:
|
90
|
+
rubyforge_project:
|
91
|
+
rubygems_version: 2.1.9
|
92
|
+
signing_key:
|
85
93
|
specification_version: 4
|
86
94
|
summary: Extracts all images from a given Word document.
|
87
95
|
test_files: []
|