yomu 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +10 -1
- data/lib/yomu/version.rb +1 -1
- data/lib/yomu.rb +29 -1
- data/test/samples/enclosure_problem.doc +0 -0
- data/test/samples/sample.docx +0 -0
- data/test/specs/yomu.rb +16 -2
- data/yomu.gemspec +2 -0
- metadata +22 -2
data/README.md
CHANGED
@@ -15,13 +15,14 @@ For the complete list of supported formats, please visit the Apache Tika
|
|
15
15
|
|
16
16
|
## Usage
|
17
17
|
|
18
|
-
Text and
|
18
|
+
Text, metadata and MIME type information can be extracted by calling `Yomu.read` directly:
|
19
19
|
|
20
20
|
require 'yomu'
|
21
21
|
|
22
22
|
data = File.read 'sample.pages'
|
23
23
|
text = Yomu.read :text, data
|
24
24
|
metadata = Yomu.read :metadata, data
|
25
|
+
mimetype = Yomu.read :mimetype, data
|
25
26
|
|
26
27
|
### Reading text from a given filename
|
27
28
|
|
@@ -53,6 +54,14 @@ Metadata is returned as a hash.
|
|
53
54
|
yomu = Yomu.new 'sample.pages'
|
54
55
|
yomu.metadata['Content-Type'] #=> "application/vnd.apple.pages"
|
55
56
|
|
57
|
+
### Reading MIME types
|
58
|
+
|
59
|
+
MIME type is returned as a MIME::Type object.
|
60
|
+
|
61
|
+
yomu = Yomu.new 'sample.docx'
|
62
|
+
yomu.mimetype.content_type #=> "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
63
|
+
yomu.mimetype.extensions #=> ['docx']
|
64
|
+
|
56
65
|
## Installation and Dependencies
|
57
66
|
|
58
67
|
Add this line to your application's Gemfile:
|
data/lib/yomu/version.rb
CHANGED
data/lib/yomu.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'yomu/version'
|
2
2
|
|
3
3
|
require 'net/http'
|
4
|
+
require 'mime/types'
|
4
5
|
require 'yaml'
|
5
6
|
|
6
7
|
class Yomu
|
@@ -19,6 +20,8 @@ class Yomu
|
|
19
20
|
'-t'
|
20
21
|
when :metadata
|
21
22
|
'-m'
|
23
|
+
when :mimetype
|
24
|
+
'-m'
|
22
25
|
end
|
23
26
|
|
24
27
|
result = IO.popen "#{java} -Djava.awt.headless=true -jar #{Yomu::JARPATH} #{switch}", 'r+' do |io|
|
@@ -27,7 +30,14 @@ class Yomu
|
|
27
30
|
io.read
|
28
31
|
end
|
29
32
|
|
30
|
-
type
|
33
|
+
case type
|
34
|
+
when :text
|
35
|
+
result
|
36
|
+
when :metadata
|
37
|
+
YAML.load enclose_metadata_fields(result)
|
38
|
+
when :mimetype
|
39
|
+
MIME::Types[YAML.load(enclose_metadata_fields(result))['Content-Type']].first
|
40
|
+
end
|
31
41
|
end
|
32
42
|
|
33
43
|
# Create a new instance of Yomu with a given document.
|
@@ -82,6 +92,18 @@ class Yomu
|
|
82
92
|
@metadata = Yomu.read :metadata, data
|
83
93
|
end
|
84
94
|
|
95
|
+
# Returns the mimetype object of the Yomu document.
|
96
|
+
#
|
97
|
+
# yomu = Yomu.new 'sample.docx'
|
98
|
+
# yomu.mimetype.content_type #=> 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
99
|
+
# yomu.mimetype.extensions #=> ['docx']
|
100
|
+
|
101
|
+
def mimetype
|
102
|
+
return @mimetype if defined? @mimetype
|
103
|
+
|
104
|
+
@mimetype = MIME::Types[metadata['Content-Type']].first
|
105
|
+
end
|
106
|
+
|
85
107
|
# Returns +true+ if the Yomu document was specified using a file path.
|
86
108
|
#
|
87
109
|
# yomu = Yomu.new 'sample.pages'
|
@@ -129,6 +151,12 @@ class Yomu
|
|
129
151
|
@data
|
130
152
|
end
|
131
153
|
|
154
|
+
def self.enclose_metadata_fields metadata
|
155
|
+
metadata.each_line.map do |l|
|
156
|
+
l.gsub(/: (.*)/,': "\1"')
|
157
|
+
end.join
|
158
|
+
end
|
159
|
+
|
132
160
|
def self.java
|
133
161
|
ENV['JAVA_HOME'] ? ENV['JAVA_HOME'] + '/bin/java' : 'java'
|
134
162
|
end
|
Binary file
|
Binary file
|
data/test/specs/yomu.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require_relative '../helper.rb'
|
2
2
|
|
3
3
|
describe Yomu do
|
4
|
-
let(:data) { File.read 'test/samples/sample.
|
4
|
+
let(:data) { File.read 'test/samples/sample.docx' }
|
5
5
|
|
6
6
|
before do
|
7
7
|
ENV['JAVA_HOME'] = nil
|
@@ -17,7 +17,21 @@ describe Yomu do
|
|
17
17
|
it 'reads metadata' do
|
18
18
|
metadata = Yomu.read :metadata, data
|
19
19
|
|
20
|
-
assert_equal 'application/vnd.
|
20
|
+
assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', metadata['Content-Type']
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'accepts metadata with colon' do
|
24
|
+
doc = File.read 'test/samples/enclosure_problem.doc'
|
25
|
+
metadata = Yomu.read :metadata, doc
|
26
|
+
|
27
|
+
assert_equal 'problem: test', metadata['dc:title']
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'reads mimetype' do
|
31
|
+
mimetype = Yomu.read :mimetype, data
|
32
|
+
|
33
|
+
assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', mimetype.content_type
|
34
|
+
assert_includes mimetype.extensions, 'docx'
|
21
35
|
end
|
22
36
|
end
|
23
37
|
|
data/yomu.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yomu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,8 +9,24 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-05-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: mime-types
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '1.23'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.23'
|
14
30
|
- !ruby/object:Gem::Dependency
|
15
31
|
name: minitest
|
16
32
|
requirement: !ruby/object:Gem::Requirement
|
@@ -45,7 +61,9 @@ files:
|
|
45
61
|
- lib/yomu.rb
|
46
62
|
- lib/yomu/version.rb
|
47
63
|
- test/helper.rb
|
64
|
+
- test/samples/enclosure_problem.doc
|
48
65
|
- test/samples/sample filename with spaces.pages
|
66
|
+
- test/samples/sample.docx
|
49
67
|
- test/samples/sample.pages
|
50
68
|
- test/specs/yomu.rb
|
51
69
|
- yomu.gemspec
|
@@ -76,6 +94,8 @@ summary: Read text and metadata from files and documents (.doc, .docx, .pages, .
|
|
76
94
|
.rtf, .pdf)
|
77
95
|
test_files:
|
78
96
|
- test/helper.rb
|
97
|
+
- test/samples/enclosure_problem.doc
|
79
98
|
- test/samples/sample filename with spaces.pages
|
99
|
+
- test/samples/sample.docx
|
80
100
|
- test/samples/sample.pages
|
81
101
|
- test/specs/yomu.rb
|