doc_image_extract 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f209d30d130de8731ee02197a75cab6f5b3de928
4
- data.tar.gz: 7134763b289f13abba156eeb2c514c03abdfa909
3
+ metadata.gz: 24f47e46b7193367b1bc79efbdffb62f212b13a0
4
+ data.tar.gz: 35ae2d5a3b4ef9f7592ceabb358f53406f651376
5
5
  SHA512:
6
- metadata.gz: 1b979d2af4f5ee96ee7f9e6cd0970b7d09cfa6e64d2ad59999c03c24f33cf36a8d4b7e702f3d0ee07a13f60392cc04558f167e0c80a428410f8dec805133daff
7
- data.tar.gz: ad499558865070e2f2342499caff1df8b4cb5c656eb90dcbb9da0c884382cd038ae831a5d5d73fcb49926fe8c5698598da0073cc6a9c8ab02f037c13b2718cf2
6
+ metadata.gz: 84db9d82b9ac6a3fefdcc2fa080c71d9175e8e36788db03279f5bca6b6f77bef65a3ad23d923e7c2849a7a348079ed28381690dc2dd4741af284b32e4ffed7cb
7
+ data.tar.gz: 8378fbfb6a5e586c9d119e5a107e5c03aa66efff1c0ec8a704239b3731bbc9b32c37ae49b9207b5fa19773a355b71dcf9d3b64531aab2357d40f06dbe85cbabe
@@ -1,11 +1,10 @@
1
1
  # coding: utf-8
2
2
  lib = File.expand_path('../lib', __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'doc_image_extract/version'
5
4
 
6
5
  Gem::Specification.new do |spec|
7
6
  spec.name = "doc_image_extract"
8
- spec.version = DocImageExtract::VERSION
7
+ spec.version = "0.0.2"
9
8
  spec.authors = ["Eric Barendt"]
10
9
  spec.email = ["ebarendt@gmail.com"]
11
10
  spec.summary = %q{Extracts all images from a given Word document.}
@@ -1,6 +1,7 @@
1
- require "doc_image_extract/version"
2
1
  require "doc_image_extract/document"
3
2
  require "doc_image_extract/exporter"
3
+ require "doc_image_extract/reader"
4
+ require "doc_image_extract/picture"
4
5
 
5
6
  module DocImageExtract
6
7
  end
@@ -1,21 +1,23 @@
1
- require "poi"
2
-
3
1
  module DocImageExtract
4
2
  class Document
5
- include Java
6
- java_import "org.apache.poi.hwpf.HWPFDocument"
7
- java_import "java.io.FileInputStream"
8
-
9
3
  attr_reader :file
10
4
 
11
5
  def initialize file
12
6
  @file = file
13
7
  end
14
8
 
9
+ def reader
10
+ @reader ||= begin
11
+ if file =~ /docx\Z/
12
+ Reader::Xwpf.new file
13
+ else
14
+ Reader::Hwpf.new file
15
+ end
16
+ end
17
+ end
18
+
15
19
  def pictures
16
- document = HWPFDocument.new FileInputStream.new file
17
- pictures_table = document.get_pictures_table
18
- pictures_table.get_all_pictures
20
+ reader.pictures
19
21
  end
20
22
  end
21
- end
23
+ end
@@ -1,7 +1,9 @@
1
1
  module DocImageExtract
2
2
  class Exporter
3
3
  def save picture
4
- File.write picture.suggest_full_file_name, picture.get_content
4
+ if picture.file_name
5
+ File.write picture.file_name, picture.data
6
+ end
5
7
  end
6
8
  end
7
- end
9
+ end
@@ -0,0 +1,10 @@
1
+ module DocImageExtract
2
+ class Picture
3
+ attr_reader :file_name, :data
4
+
5
+ def initialize(file_name, data)
6
+ @file_name = file_name
7
+ @data = data
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,2 @@
1
+ require_relative 'reader/hwpf'
2
+ require_relative 'reader/xwpf'
@@ -0,0 +1,26 @@
1
+ require "poi"
2
+
3
+ module DocImageExtract
4
+ module Reader
5
+ class Hwpf
6
+ java_import java.io.FileInputStream
7
+ java_import org.apache.poi.hwpf.HWPFDocument
8
+
9
+ attr_reader :file
10
+
11
+ def initialize(file)
12
+ @file = file
13
+ end
14
+
15
+ def pictures
16
+ document = HWPFDocument.new FileInputStream.new file
17
+ pictures_table = document.get_pictures_table
18
+ pictures_table.get_all_pictures.map do |picture|
19
+ filename = picture.suggest_full_file_name
20
+ data = picture.get_content
21
+ DocImageExtract::Picture.new filename, data
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,26 @@
1
+ require "poi"
2
+
3
+ module DocImageExtract
4
+ module Reader
5
+ class Xwpf
6
+ java_import java.io.FileInputStream
7
+ java_import org.apache.poi.xwpf.usermodel.XWPFDocument
8
+
9
+ attr_reader :file
10
+
11
+ def initialize(file)
12
+ @file = file
13
+ end
14
+
15
+ def pictures
16
+ document = XWPFDocument.new FileInputStream.new file
17
+ pictures = document.get_all_pictures
18
+ pictures.map do |picture|
19
+ filename = picture.get_file_name
20
+ data = picture.get_data
21
+ Picture.new filename, data
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
Binary file
data/lib/poi.rb CHANGED
@@ -1,2 +1,8 @@
1
- require "jars/poi-3.9-20121203.jar"
2
- require "jars/poi-scratchpad-3.9-20121203.jar"
1
+ require "java"
2
+ require "jars/poi-3.10-FINAL-20140208.jar"
3
+ require "jars/poi-scratchpad-3.10-FINAL-20140208.jar"
4
+ require "jars/poi-ooxml-3.10-FINAL-20140208.jar"
5
+ require "jars/poi-ooxml-schemas-3.10-FINAL-20140208.jar"
6
+ require "jars/ooxml-lib/dom4j-1.6.1.jar"
7
+ require "jars/ooxml-lib/stax-api-1.0.1.jar"
8
+ require "jars/ooxml-lib/xmlbeans-2.3.0.jar"
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doc_image_extract
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eric Barendt
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-27 00:00:00.000000000 Z
11
+ date: 2014-05-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
- requirement: !ruby/object:Gem::Requirement
15
+ version_requirements: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - '>='
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
20
+ requirement: !ruby/object:Gem::Requirement
23
21
  requirements:
24
- - - ">="
22
+ - - '>='
25
23
  - !ruby/object:Gem::Version
26
24
  version: '0'
25
+ prerelease: false
26
+ type: :development
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
- requirement: !ruby/object:Gem::Requirement
29
+ version_requirements: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - '>='
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
34
+ requirement: !ruby/object:Gem::Requirement
37
35
  requirements:
38
- - - ">="
36
+ - - '>='
39
37
  - !ruby/object:Gem::Version
40
38
  version: '0'
39
+ prerelease: false
40
+ type: :development
41
41
  description: Extracts all images from a given Word document.
42
42
  email:
43
43
  - ebarendt@gmail.com
@@ -46,7 +46,7 @@ executables:
46
46
  extensions: []
47
47
  extra_rdoc_files: []
48
48
  files:
49
- - ".gitignore"
49
+ - .gitignore
50
50
  - Gemfile
51
51
  - LICENSE.txt
52
52
  - README.md
@@ -56,32 +56,40 @@ files:
56
56
  - lib/doc_image_extract.rb
57
57
  - lib/doc_image_extract/document.rb
58
58
  - lib/doc_image_extract/exporter.rb
59
- - lib/doc_image_extract/version.rb
60
- - lib/jars/poi-3.9-20121203.jar
61
- - lib/jars/poi-scratchpad-3.9-20121203.jar
59
+ - lib/doc_image_extract/picture.rb
60
+ - lib/doc_image_extract/reader.rb
61
+ - lib/doc_image_extract/reader/hwpf.rb
62
+ - lib/doc_image_extract/reader/xwpf.rb
63
+ - lib/jars/ooxml-lib/dom4j-1.6.1.jar
64
+ - lib/jars/ooxml-lib/stax-api-1.0.1.jar
65
+ - lib/jars/ooxml-lib/xmlbeans-2.3.0.jar
66
+ - lib/jars/poi-3.10-FINAL-20140208.jar
67
+ - lib/jars/poi-ooxml-3.10-FINAL-20140208.jar
68
+ - lib/jars/poi-ooxml-schemas-3.10-FINAL-20140208.jar
69
+ - lib/jars/poi-scratchpad-3.10-FINAL-20140208.jar
62
70
  - lib/poi.rb
63
71
  homepage: ''
64
72
  licenses:
65
73
  - MIT
66
74
  metadata: {}
67
- post_install_message:
75
+ post_install_message:
68
76
  rdoc_options: []
69
77
  require_paths:
70
78
  - lib
71
79
  required_ruby_version: !ruby/object:Gem::Requirement
72
80
  requirements:
73
- - - ">="
81
+ - - '>='
74
82
  - !ruby/object:Gem::Version
75
83
  version: '0'
76
84
  required_rubygems_version: !ruby/object:Gem::Requirement
77
85
  requirements:
78
- - - ">="
86
+ - - '>='
79
87
  - !ruby/object:Gem::Version
80
88
  version: '0'
81
89
  requirements: []
82
- rubyforge_project:
83
- rubygems_version: 2.2.2
84
- signing_key:
90
+ rubyforge_project:
91
+ rubygems_version: 2.1.9
92
+ signing_key:
85
93
  specification_version: 4
86
94
  summary: Extracts all images from a given Word document.
87
95
  test_files: []
@@ -1,3 +0,0 @@
1
- module DocImageExtract
2
- VERSION = "0.0.1"
3
- end