yomu 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -15,13 +15,14 @@ For the complete list of supported formats, please visit the Apache Tika
15
15
 
16
16
  ## Usage
17
17
 
18
- Text and metadata can be extracted by calling `Yomu.read` directly:
18
+ Text, metadata and MIME type information can be extracted by calling `Yomu.read` directly:
19
19
 
20
20
  require 'yomu'
21
21
 
22
22
  data = File.read 'sample.pages'
23
23
  text = Yomu.read :text, data
24
24
  metadata = Yomu.read :metadata, data
25
+ mimetype = Yomu.read :mimetype, data
25
26
 
26
27
  ### Reading text from a given filename
27
28
 
@@ -53,6 +54,14 @@ Metadata is returned as a hash.
53
54
  yomu = Yomu.new 'sample.pages'
54
55
  yomu.metadata['Content-Type'] #=> "application/vnd.apple.pages"
55
56
 
57
+ ### Reading MIME types
58
+
59
+ MIME type is returned as a MIME::Type object.
60
+
61
+ yomu = Yomu.new 'sample.docx'
62
+ yomu.mimetype.content_type #=> "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
63
+ yomu.mimetype.extensions #=> ['docx']
64
+
56
65
  ## Installation and Dependencies
57
66
 
58
67
  Add this line to your application's Gemfile:
data/lib/yomu/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class Yomu
2
- VERSION = "0.1.5"
2
+ VERSION = "0.1.6"
3
3
  end
data/lib/yomu.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'yomu/version'
2
2
 
3
3
  require 'net/http'
4
+ require 'mime/types'
4
5
  require 'yaml'
5
6
 
6
7
  class Yomu
@@ -19,6 +20,8 @@ class Yomu
19
20
  '-t'
20
21
  when :metadata
21
22
  '-m'
23
+ when :mimetype
24
+ '-m'
22
25
  end
23
26
 
24
27
  result = IO.popen "#{java} -Djava.awt.headless=true -jar #{Yomu::JARPATH} #{switch}", 'r+' do |io|
@@ -27,7 +30,14 @@ class Yomu
27
30
  io.read
28
31
  end
29
32
 
30
- type == :metadata ? YAML.load(result) : result
33
+ case type
34
+ when :text
35
+ result
36
+ when :metadata
37
+ YAML.load enclose_metadata_fields(result)
38
+ when :mimetype
39
+ MIME::Types[YAML.load(enclose_metadata_fields(result))['Content-Type']].first
40
+ end
31
41
  end
32
42
 
33
43
  # Create a new instance of Yomu with a given document.
@@ -82,6 +92,18 @@ class Yomu
82
92
  @metadata = Yomu.read :metadata, data
83
93
  end
84
94
 
95
+ # Returns the mimetype object of the Yomu document.
96
+ #
97
+ # yomu = Yomu.new 'sample.docx'
98
+ # yomu.mimetype.content_type #=> 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
99
+ # yomu.mimetype.extensions #=> ['docx']
100
+
101
+ def mimetype
102
+ return @mimetype if defined? @mimetype
103
+
104
+ @mimetype = MIME::Types[metadata['Content-Type']].first
105
+ end
106
+
85
107
  # Returns +true+ if the Yomu document was specified using a file path.
86
108
  #
87
109
  # yomu = Yomu.new 'sample.pages'
@@ -129,6 +151,12 @@ class Yomu
129
151
  @data
130
152
  end
131
153
 
154
+ def self.enclose_metadata_fields metadata
155
+ metadata.each_line.map do |l|
156
+ l.gsub(/: (.*)/,': "\1"')
157
+ end.join
158
+ end
159
+
132
160
  def self.java
133
161
  ENV['JAVA_HOME'] ? ENV['JAVA_HOME'] + '/bin/java' : 'java'
134
162
  end
Binary file
Binary file
data/test/specs/yomu.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require_relative '../helper.rb'
2
2
 
3
3
  describe Yomu do
4
- let(:data) { File.read 'test/samples/sample.pages' }
4
+ let(:data) { File.read 'test/samples/sample.docx' }
5
5
 
6
6
  before do
7
7
  ENV['JAVA_HOME'] = nil
@@ -17,7 +17,21 @@ describe Yomu do
17
17
  it 'reads metadata' do
18
18
  metadata = Yomu.read :metadata, data
19
19
 
20
- assert_equal 'application/vnd.apple.pages', metadata['Content-Type']
20
+ assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', metadata['Content-Type']
21
+ end
22
+
23
+ it 'accepts metadata with colon' do
24
+ doc = File.read 'test/samples/enclosure_problem.doc'
25
+ metadata = Yomu.read :metadata, doc
26
+
27
+ assert_equal 'problem: test', metadata['dc:title']
28
+ end
29
+
30
+ it 'reads mimetype' do
31
+ mimetype = Yomu.read :mimetype, data
32
+
33
+ assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', mimetype.content_type
34
+ assert_includes mimetype.extensions, 'docx'
21
35
  end
22
36
  end
23
37
 
data/yomu.gemspec CHANGED
@@ -15,5 +15,7 @@ Gem::Specification.new do |gem|
15
15
  gem.require_paths = ["lib"]
16
16
  gem.version = Yomu::VERSION
17
17
 
18
+ gem.add_runtime_dependency 'mime-types', '~> 1.23'
19
+
18
20
  gem.add_development_dependency 'minitest'
19
21
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yomu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,8 +9,24 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-10 00:00:00.000000000 Z
12
+ date: 2013-05-01 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: mime-types
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.23'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.23'
14
30
  - !ruby/object:Gem::Dependency
15
31
  name: minitest
16
32
  requirement: !ruby/object:Gem::Requirement
@@ -45,7 +61,9 @@ files:
45
61
  - lib/yomu.rb
46
62
  - lib/yomu/version.rb
47
63
  - test/helper.rb
64
+ - test/samples/enclosure_problem.doc
48
65
  - test/samples/sample filename with spaces.pages
66
+ - test/samples/sample.docx
49
67
  - test/samples/sample.pages
50
68
  - test/specs/yomu.rb
51
69
  - yomu.gemspec
@@ -76,6 +94,8 @@ summary: Read text and metadata from files and documents (.doc, .docx, .pages, .
76
94
  .rtf, .pdf)
77
95
  test_files:
78
96
  - test/helper.rb
97
+ - test/samples/enclosure_problem.doc
79
98
  - test/samples/sample filename with spaces.pages
99
+ - test/samples/sample.docx
80
100
  - test/samples/sample.pages
81
101
  - test/specs/yomu.rb