yomu 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -15,13 +15,14 @@ For the complete list of supported formats, please visit the Apache Tika
15
15
 
16
16
  ## Usage
17
17
 
18
- Text and metadata can be extracted by calling `Yomu.read` directly:
18
+ Text, metadata and MIME type information can be extracted by calling `Yomu.read` directly:
19
19
 
20
20
  require 'yomu'
21
21
 
22
22
  data = File.read 'sample.pages'
23
23
  text = Yomu.read :text, data
24
24
  metadata = Yomu.read :metadata, data
25
+ mimetype = Yomu.read :mimetype, data
25
26
 
26
27
  ### Reading text from a given filename
27
28
 
@@ -53,6 +54,14 @@ Metadata is returned as a hash.
53
54
  yomu = Yomu.new 'sample.pages'
54
55
  yomu.metadata['Content-Type'] #=> "application/vnd.apple.pages"
55
56
 
57
+ ### Reading MIME types
58
+
59
+ MIME type is returned as a MIME::Type object.
60
+
61
+ yomu = Yomu.new 'sample.docx'
62
+ yomu.mimetype.content_type #=> "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
63
+ yomu.mimetype.extensions #=> ['docx']
64
+
56
65
  ## Installation and Dependencies
57
66
 
58
67
  Add this line to your application's Gemfile:
data/lib/yomu/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class Yomu
2
- VERSION = "0.1.5"
2
+ VERSION = "0.1.6"
3
3
  end
data/lib/yomu.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'yomu/version'
2
2
 
3
3
  require 'net/http'
4
+ require 'mime/types'
4
5
  require 'yaml'
5
6
 
6
7
  class Yomu
@@ -19,6 +20,8 @@ class Yomu
19
20
  '-t'
20
21
  when :metadata
21
22
  '-m'
23
+ when :mimetype
24
+ '-m'
22
25
  end
23
26
 
24
27
  result = IO.popen "#{java} -Djava.awt.headless=true -jar #{Yomu::JARPATH} #{switch}", 'r+' do |io|
@@ -27,7 +30,14 @@ class Yomu
27
30
  io.read
28
31
  end
29
32
 
30
- type == :metadata ? YAML.load(result) : result
33
+ case type
34
+ when :text
35
+ result
36
+ when :metadata
37
+ YAML.load enclose_metadata_fields(result)
38
+ when :mimetype
39
+ MIME::Types[YAML.load(enclose_metadata_fields(result))['Content-Type']].first
40
+ end
31
41
  end
32
42
 
33
43
  # Create a new instance of Yomu with a given document.
@@ -82,6 +92,18 @@ class Yomu
82
92
  @metadata = Yomu.read :metadata, data
83
93
  end
84
94
 
95
+ # Returns the mimetype object of the Yomu document.
96
+ #
97
+ # yomu = Yomu.new 'sample.docx'
98
+ # yomu.mimetype.content_type #=> 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
99
+ # yomu.mimetype.extensions #=> ['docx']
100
+
101
+ def mimetype
102
+ return @mimetype if defined? @mimetype
103
+
104
+ @mimetype = MIME::Types[metadata['Content-Type']].first
105
+ end
106
+
85
107
  # Returns +true+ if the Yomu document was specified using a file path.
86
108
  #
87
109
  # yomu = Yomu.new 'sample.pages'
@@ -129,6 +151,12 @@ class Yomu
129
151
  @data
130
152
  end
131
153
 
154
+ def self.enclose_metadata_fields metadata
155
+ metadata.each_line.map do |l|
156
+ l.gsub(/: (.*)/,': "\1"')
157
+ end.join
158
+ end
159
+
132
160
  def self.java
133
161
  ENV['JAVA_HOME'] ? ENV['JAVA_HOME'] + '/bin/java' : 'java'
134
162
  end
Binary file
Binary file
data/test/specs/yomu.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require_relative '../helper.rb'
2
2
 
3
3
  describe Yomu do
4
- let(:data) { File.read 'test/samples/sample.pages' }
4
+ let(:data) { File.read 'test/samples/sample.docx' }
5
5
 
6
6
  before do
7
7
  ENV['JAVA_HOME'] = nil
@@ -17,7 +17,21 @@ describe Yomu do
17
17
  it 'reads metadata' do
18
18
  metadata = Yomu.read :metadata, data
19
19
 
20
- assert_equal 'application/vnd.apple.pages', metadata['Content-Type']
20
+ assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', metadata['Content-Type']
21
+ end
22
+
23
+ it 'accepts metadata with colon' do
24
+ doc = File.read 'test/samples/enclosure_problem.doc'
25
+ metadata = Yomu.read :metadata, doc
26
+
27
+ assert_equal 'problem: test', metadata['dc:title']
28
+ end
29
+
30
+ it 'reads mimetype' do
31
+ mimetype = Yomu.read :mimetype, data
32
+
33
+ assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', mimetype.content_type
34
+ assert_includes mimetype.extensions, 'docx'
21
35
  end
22
36
  end
23
37
 
data/yomu.gemspec CHANGED
@@ -15,5 +15,7 @@ Gem::Specification.new do |gem|
15
15
  gem.require_paths = ["lib"]
16
16
  gem.version = Yomu::VERSION
17
17
 
18
+ gem.add_runtime_dependency 'mime-types', '~> 1.23'
19
+
18
20
  gem.add_development_dependency 'minitest'
19
21
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yomu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,8 +9,24 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-10 00:00:00.000000000 Z
12
+ date: 2013-05-01 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: mime-types
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.23'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.23'
14
30
  - !ruby/object:Gem::Dependency
15
31
  name: minitest
16
32
  requirement: !ruby/object:Gem::Requirement
@@ -45,7 +61,9 @@ files:
45
61
  - lib/yomu.rb
46
62
  - lib/yomu/version.rb
47
63
  - test/helper.rb
64
+ - test/samples/enclosure_problem.doc
48
65
  - test/samples/sample filename with spaces.pages
66
+ - test/samples/sample.docx
49
67
  - test/samples/sample.pages
50
68
  - test/specs/yomu.rb
51
69
  - yomu.gemspec
@@ -76,6 +94,8 @@ summary: Read text and metadata from files and documents (.doc, .docx, .pages, .
76
94
  .rtf, .pdf)
77
95
  test_files:
78
96
  - test/helper.rb
97
+ - test/samples/enclosure_problem.doc
79
98
  - test/samples/sample filename with spaces.pages
99
+ - test/samples/sample.docx
80
100
  - test/samples/sample.pages
81
101
  - test/specs/yomu.rb