yomu 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +10 -1
- data/lib/yomu/version.rb +1 -1
- data/lib/yomu.rb +29 -1
- data/test/samples/enclosure_problem.doc +0 -0
- data/test/samples/sample.docx +0 -0
- data/test/specs/yomu.rb +16 -2
- data/yomu.gemspec +2 -0
- metadata +22 -2
data/README.md
CHANGED
@@ -15,13 +15,14 @@ For the complete list of supported formats, please visit the Apache Tika
|
|
15
15
|
|
16
16
|
## Usage
|
17
17
|
|
18
|
-
Text and
|
18
|
+
Text, metadata and MIME type information can be extracted by calling `Yomu.read` directly:
|
19
19
|
|
20
20
|
require 'yomu'
|
21
21
|
|
22
22
|
data = File.read 'sample.pages'
|
23
23
|
text = Yomu.read :text, data
|
24
24
|
metadata = Yomu.read :metadata, data
|
25
|
+
mimetype = Yomu.read :mimetype, data
|
25
26
|
|
26
27
|
### Reading text from a given filename
|
27
28
|
|
@@ -53,6 +54,14 @@ Metadata is returned as a hash.
|
|
53
54
|
yomu = Yomu.new 'sample.pages'
|
54
55
|
yomu.metadata['Content-Type'] #=> "application/vnd.apple.pages"
|
55
56
|
|
57
|
+
### Reading MIME types
|
58
|
+
|
59
|
+
MIME type is returned as a MIME::Type object.
|
60
|
+
|
61
|
+
yomu = Yomu.new 'sample.docx'
|
62
|
+
yomu.mimetype.content_type #=> "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
63
|
+
yomu.mimetype.extensions #=> ['docx']
|
64
|
+
|
56
65
|
## Installation and Dependencies
|
57
66
|
|
58
67
|
Add this line to your application's Gemfile:
|
data/lib/yomu/version.rb
CHANGED
data/lib/yomu.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'yomu/version'
|
2
2
|
|
3
3
|
require 'net/http'
|
4
|
+
require 'mime/types'
|
4
5
|
require 'yaml'
|
5
6
|
|
6
7
|
class Yomu
|
@@ -19,6 +20,8 @@ class Yomu
|
|
19
20
|
'-t'
|
20
21
|
when :metadata
|
21
22
|
'-m'
|
23
|
+
when :mimetype
|
24
|
+
'-m'
|
22
25
|
end
|
23
26
|
|
24
27
|
result = IO.popen "#{java} -Djava.awt.headless=true -jar #{Yomu::JARPATH} #{switch}", 'r+' do |io|
|
@@ -27,7 +30,14 @@ class Yomu
|
|
27
30
|
io.read
|
28
31
|
end
|
29
32
|
|
30
|
-
type
|
33
|
+
case type
|
34
|
+
when :text
|
35
|
+
result
|
36
|
+
when :metadata
|
37
|
+
YAML.load enclose_metadata_fields(result)
|
38
|
+
when :mimetype
|
39
|
+
MIME::Types[YAML.load(enclose_metadata_fields(result))['Content-Type']].first
|
40
|
+
end
|
31
41
|
end
|
32
42
|
|
33
43
|
# Create a new instance of Yomu with a given document.
|
@@ -82,6 +92,18 @@ class Yomu
|
|
82
92
|
@metadata = Yomu.read :metadata, data
|
83
93
|
end
|
84
94
|
|
95
|
+
# Returns the mimetype object of the Yomu document.
|
96
|
+
#
|
97
|
+
# yomu = Yomu.new 'sample.docx'
|
98
|
+
# yomu.mimetype.content_type #=> 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
99
|
+
# yomu.mimetype.extensions #=> ['docx']
|
100
|
+
|
101
|
+
def mimetype
|
102
|
+
return @mimetype if defined? @mimetype
|
103
|
+
|
104
|
+
@mimetype = MIME::Types[metadata['Content-Type']].first
|
105
|
+
end
|
106
|
+
|
85
107
|
# Returns +true+ if the Yomu document was specified using a file path.
|
86
108
|
#
|
87
109
|
# yomu = Yomu.new 'sample.pages'
|
@@ -129,6 +151,12 @@ class Yomu
|
|
129
151
|
@data
|
130
152
|
end
|
131
153
|
|
154
|
+
def self.enclose_metadata_fields metadata
|
155
|
+
metadata.each_line.map do |l|
|
156
|
+
l.gsub(/: (.*)/,': "\1"')
|
157
|
+
end.join
|
158
|
+
end
|
159
|
+
|
132
160
|
def self.java
|
133
161
|
ENV['JAVA_HOME'] ? ENV['JAVA_HOME'] + '/bin/java' : 'java'
|
134
162
|
end
|
Binary file
|
Binary file
|
data/test/specs/yomu.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require_relative '../helper.rb'
|
2
2
|
|
3
3
|
describe Yomu do
|
4
|
-
let(:data) { File.read 'test/samples/sample.
|
4
|
+
let(:data) { File.read 'test/samples/sample.docx' }
|
5
5
|
|
6
6
|
before do
|
7
7
|
ENV['JAVA_HOME'] = nil
|
@@ -17,7 +17,21 @@ describe Yomu do
|
|
17
17
|
it 'reads metadata' do
|
18
18
|
metadata = Yomu.read :metadata, data
|
19
19
|
|
20
|
-
assert_equal 'application/vnd.
|
20
|
+
assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', metadata['Content-Type']
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'accepts metadata with colon' do
|
24
|
+
doc = File.read 'test/samples/enclosure_problem.doc'
|
25
|
+
metadata = Yomu.read :metadata, doc
|
26
|
+
|
27
|
+
assert_equal 'problem: test', metadata['dc:title']
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'reads mimetype' do
|
31
|
+
mimetype = Yomu.read :mimetype, data
|
32
|
+
|
33
|
+
assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', mimetype.content_type
|
34
|
+
assert_includes mimetype.extensions, 'docx'
|
21
35
|
end
|
22
36
|
end
|
23
37
|
|
data/yomu.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yomu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,8 +9,24 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-05-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: mime-types
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '1.23'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.23'
|
14
30
|
- !ruby/object:Gem::Dependency
|
15
31
|
name: minitest
|
16
32
|
requirement: !ruby/object:Gem::Requirement
|
@@ -45,7 +61,9 @@ files:
|
|
45
61
|
- lib/yomu.rb
|
46
62
|
- lib/yomu/version.rb
|
47
63
|
- test/helper.rb
|
64
|
+
- test/samples/enclosure_problem.doc
|
48
65
|
- test/samples/sample filename with spaces.pages
|
66
|
+
- test/samples/sample.docx
|
49
67
|
- test/samples/sample.pages
|
50
68
|
- test/specs/yomu.rb
|
51
69
|
- yomu.gemspec
|
@@ -76,6 +94,8 @@ summary: Read text and metadata from files and documents (.doc, .docx, .pages, .
|
|
76
94
|
.rtf, .pdf)
|
77
95
|
test_files:
|
78
96
|
- test/helper.rb
|
97
|
+
- test/samples/enclosure_problem.doc
|
79
98
|
- test/samples/sample filename with spaces.pages
|
99
|
+
- test/samples/sample.docx
|
80
100
|
- test/samples/sample.pages
|
81
101
|
- test/specs/yomu.rb
|