doc_image_extract 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f209d30d130de8731ee02197a75cab6f5b3de928
4
- data.tar.gz: 7134763b289f13abba156eeb2c514c03abdfa909
3
+ metadata.gz: 24f47e46b7193367b1bc79efbdffb62f212b13a0
4
+ data.tar.gz: 35ae2d5a3b4ef9f7592ceabb358f53406f651376
5
5
  SHA512:
6
- metadata.gz: 1b979d2af4f5ee96ee7f9e6cd0970b7d09cfa6e64d2ad59999c03c24f33cf36a8d4b7e702f3d0ee07a13f60392cc04558f167e0c80a428410f8dec805133daff
7
- data.tar.gz: ad499558865070e2f2342499caff1df8b4cb5c656eb90dcbb9da0c884382cd038ae831a5d5d73fcb49926fe8c5698598da0073cc6a9c8ab02f037c13b2718cf2
6
+ metadata.gz: 84db9d82b9ac6a3fefdcc2fa080c71d9175e8e36788db03279f5bca6b6f77bef65a3ad23d923e7c2849a7a348079ed28381690dc2dd4741af284b32e4ffed7cb
7
+ data.tar.gz: 8378fbfb6a5e586c9d119e5a107e5c03aa66efff1c0ec8a704239b3731bbc9b32c37ae49b9207b5fa19773a355b71dcf9d3b64531aab2357d40f06dbe85cbabe
@@ -1,11 +1,10 @@
1
1
  # coding: utf-8
2
2
  lib = File.expand_path('../lib', __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'doc_image_extract/version'
5
4
 
6
5
  Gem::Specification.new do |spec|
7
6
  spec.name = "doc_image_extract"
8
- spec.version = DocImageExtract::VERSION
7
+ spec.version = "0.0.2"
9
8
  spec.authors = ["Eric Barendt"]
10
9
  spec.email = ["ebarendt@gmail.com"]
11
10
  spec.summary = %q{Extracts all images from a given Word document.}
@@ -1,6 +1,7 @@
1
- require "doc_image_extract/version"
2
1
  require "doc_image_extract/document"
3
2
  require "doc_image_extract/exporter"
3
+ require "doc_image_extract/reader"
4
+ require "doc_image_extract/picture"
4
5
 
5
6
  module DocImageExtract
6
7
  end
@@ -1,21 +1,23 @@
1
- require "poi"
2
-
3
1
  module DocImageExtract
4
2
  class Document
5
- include Java
6
- java_import "org.apache.poi.hwpf.HWPFDocument"
7
- java_import "java.io.FileInputStream"
8
-
9
3
  attr_reader :file
10
4
 
11
5
  def initialize file
12
6
  @file = file
13
7
  end
14
8
 
9
+ def reader
10
+ @reader ||= begin
11
+ if file =~ /docx\Z/
12
+ Reader::Xwpf.new file
13
+ else
14
+ Reader::Hwpf.new file
15
+ end
16
+ end
17
+ end
18
+
15
19
  def pictures
16
- document = HWPFDocument.new FileInputStream.new file
17
- pictures_table = document.get_pictures_table
18
- pictures_table.get_all_pictures
20
+ reader.pictures
19
21
  end
20
22
  end
21
- end
23
+ end
@@ -1,7 +1,9 @@
1
1
  module DocImageExtract
2
2
  class Exporter
3
3
  def save picture
4
- File.write picture.suggest_full_file_name, picture.get_content
4
+ if picture.file_name
5
+ File.write picture.file_name, picture.data
6
+ end
5
7
  end
6
8
  end
7
- end
9
+ end
@@ -0,0 +1,10 @@
1
+ module DocImageExtract
2
+ class Picture
3
+ attr_reader :file_name, :data
4
+
5
+ def initialize(file_name, data)
6
+ @file_name = file_name
7
+ @data = data
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,2 @@
1
+ require_relative 'reader/hwpf'
2
+ require_relative 'reader/xwpf'
@@ -0,0 +1,26 @@
1
+ require "poi"
2
+
3
+ module DocImageExtract
4
+ module Reader
5
+ class Hwpf
6
+ java_import java.io.FileInputStream
7
+ java_import org.apache.poi.hwpf.HWPFDocument
8
+
9
+ attr_reader :file
10
+
11
+ def initialize(file)
12
+ @file = file
13
+ end
14
+
15
+ def pictures
16
+ document = HWPFDocument.new FileInputStream.new file
17
+ pictures_table = document.get_pictures_table
18
+ pictures_table.get_all_pictures.map do |picture|
19
+ filename = picture.suggest_full_file_name
20
+ data = picture.get_content
21
+ DocImageExtract::Picture.new filename, data
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ require "poi"
2
+
3
+ module DocImageExtract
4
+ module Reader
5
+ class Xwpf
6
+ java_import java.io.FileInputStream
7
+ java_import org.apache.poi.xwpf.usermodel.XWPFDocument
8
+
9
+ attr_reader :file
10
+
11
+ def initialize(file)
12
+ @file = file
13
+ end
14
+
15
+ def pictures
16
+ document = XWPFDocument.new FileInputStream.new file
17
+ pictures = document.get_all_pictures
18
+ pictures.map do |picture|
19
+ filename = picture.get_file_name
20
+ data = picture.get_data
21
+ Picture.new filename, data
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
Binary file
data/lib/poi.rb CHANGED
@@ -1,2 +1,8 @@
1
- require "jars/poi-3.9-20121203.jar"
2
- require "jars/poi-scratchpad-3.9-20121203.jar"
1
+ require "java"
2
+ require "jars/poi-3.10-FINAL-20140208.jar"
3
+ require "jars/poi-scratchpad-3.10-FINAL-20140208.jar"
4
+ require "jars/poi-ooxml-3.10-FINAL-20140208.jar"
5
+ require "jars/poi-ooxml-schemas-3.10-FINAL-20140208.jar"
6
+ require "jars/ooxml-lib/dom4j-1.6.1.jar"
7
+ require "jars/ooxml-lib/stax-api-1.0.1.jar"
8
+ require "jars/ooxml-lib/xmlbeans-2.3.0.jar"
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doc_image_extract
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Barendt
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-27 00:00:00.000000000 Z
11
+ date: 2014-05-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
- requirement: !ruby/object:Gem::Requirement
15
+ version_requirements: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - '>='
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
20
+ requirement: !ruby/object:Gem::Requirement
23
21
  requirements:
24
- - - ">="
22
+ - - '>='
25
23
  - !ruby/object:Gem::Version
26
24
  version: '0'
25
+ prerelease: false
26
+ type: :development
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
- requirement: !ruby/object:Gem::Requirement
29
+ version_requirements: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - '>='
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
34
+ requirement: !ruby/object:Gem::Requirement
37
35
  requirements:
38
- - - ">="
36
+ - - '>='
39
37
  - !ruby/object:Gem::Version
40
38
  version: '0'
39
+ prerelease: false
40
+ type: :development
41
41
  description: Extracts all images from a given Word document.
42
42
  email:
43
43
  - ebarendt@gmail.com
@@ -46,7 +46,7 @@ executables:
46
46
  extensions: []
47
47
  extra_rdoc_files: []
48
48
  files:
49
- - ".gitignore"
49
+ - .gitignore
50
50
  - Gemfile
51
51
  - LICENSE.txt
52
52
  - README.md
@@ -56,32 +56,40 @@ files:
56
56
  - lib/doc_image_extract.rb
57
57
  - lib/doc_image_extract/document.rb
58
58
  - lib/doc_image_extract/exporter.rb
59
- - lib/doc_image_extract/version.rb
60
- - lib/jars/poi-3.9-20121203.jar
61
- - lib/jars/poi-scratchpad-3.9-20121203.jar
59
+ - lib/doc_image_extract/picture.rb
60
+ - lib/doc_image_extract/reader.rb
61
+ - lib/doc_image_extract/reader/hwpf.rb
62
+ - lib/doc_image_extract/reader/xwpf.rb
63
+ - lib/jars/ooxml-lib/dom4j-1.6.1.jar
64
+ - lib/jars/ooxml-lib/stax-api-1.0.1.jar
65
+ - lib/jars/ooxml-lib/xmlbeans-2.3.0.jar
66
+ - lib/jars/poi-3.10-FINAL-20140208.jar
67
+ - lib/jars/poi-ooxml-3.10-FINAL-20140208.jar
68
+ - lib/jars/poi-ooxml-schemas-3.10-FINAL-20140208.jar
69
+ - lib/jars/poi-scratchpad-3.10-FINAL-20140208.jar
62
70
  - lib/poi.rb
63
71
  homepage: ''
64
72
  licenses:
65
73
  - MIT
66
74
  metadata: {}
67
- post_install_message:
75
+ post_install_message:
68
76
  rdoc_options: []
69
77
  require_paths:
70
78
  - lib
71
79
  required_ruby_version: !ruby/object:Gem::Requirement
72
80
  requirements:
73
- - - ">="
81
+ - - '>='
74
82
  - !ruby/object:Gem::Version
75
83
  version: '0'
76
84
  required_rubygems_version: !ruby/object:Gem::Requirement
77
85
  requirements:
78
- - - ">="
86
+ - - '>='
79
87
  - !ruby/object:Gem::Version
80
88
  version: '0'
81
89
  requirements: []
82
- rubyforge_project:
83
- rubygems_version: 2.2.2
84
- signing_key:
90
+ rubyforge_project:
91
+ rubygems_version: 2.1.9
92
+ signing_key:
85
93
  specification_version: 4
86
94
  summary: Extracts all images from a given Word document.
87
95
  test_files: []
@@ -1,3 +0,0 @@
1
- module DocImageExtract
2
- VERSION = "0.0.1"
3
- end