yomu 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/Rakefile +4 -6
- data/jar/{tika-app-1.3.jar → tika-app-1.4.jar} +0 -0
- data/lib/yomu/version.rb +1 -1
- data/lib/yomu.rb +5 -6
- data/spec/helper.rb +6 -0
- data/{test → spec}/samples/sample filename with spaces.pages +0 -0
- data/{test/samples/enclosure_problem.doc → spec/samples/sample-metadata-values-with-colons.doc} +0 -0
- data/{test → spec}/samples/sample.docx +0 -0
- data/{test → spec}/samples/sample.pages +0 -0
- data/spec/yomu_spec.rb +150 -0
- data/yomu.gemspec +1 -1
- metadata +18 -15
- data/test/specs/yomu.rb +0 -169
data/.gitignore
CHANGED
data/.rspec
ADDED
data/Rakefile
CHANGED
@@ -1,10 +1,8 @@
|
|
1
1
|
#!/usr/bin/env rake
|
2
2
|
|
3
3
|
require 'bundler/gem_tasks'
|
4
|
-
require '
|
4
|
+
require 'rspec/core/rake_task'
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
t.verbose = true
|
10
|
-
end
|
6
|
+
RSpec::Core::RakeTask.new 'spec'
|
7
|
+
|
8
|
+
task :default => :spec
|
Binary file
|
data/lib/yomu/version.rb
CHANGED
data/lib/yomu.rb
CHANGED
@@ -34,9 +34,9 @@ class Yomu
|
|
34
34
|
when :text
|
35
35
|
result
|
36
36
|
when :metadata
|
37
|
-
YAML.load
|
37
|
+
YAML.load quote(result)
|
38
38
|
when :mimetype
|
39
|
-
MIME::Types[YAML.load(
|
39
|
+
MIME::Types[YAML.load(quote(result))['Content-Type']].first
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
@@ -151,11 +151,10 @@ class Yomu
|
|
151
151
|
@data
|
152
152
|
end
|
153
153
|
|
154
|
-
def self.
|
155
|
-
metadata.
|
156
|
-
l.gsub(/: (.*)/,': "\1"')
|
157
|
-
end.join
|
154
|
+
def self.quote(metadata)
|
155
|
+
metadata.gsub(/: (.*: .*)$/, ': "\1"')
|
158
156
|
end
|
157
|
+
private_class_method :quote
|
159
158
|
|
160
159
|
def self.java
|
161
160
|
ENV['JAVA_HOME'] ? ENV['JAVA_HOME'] + '/bin/java' : 'java'
|
data/spec/helper.rb
ADDED
File without changes
|
data/{test/samples/enclosure_problem.doc → spec/samples/sample-metadata-values-with-colons.doc}
RENAMED
File without changes
|
File without changes
|
File without changes
|
data/spec/yomu_spec.rb
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
require 'helper.rb'
|
2
|
+
require 'yomu'
|
3
|
+
|
4
|
+
describe Yomu do
|
5
|
+
let(:data) { File.read 'spec/samples/sample.docx' }
|
6
|
+
|
7
|
+
before do
|
8
|
+
ENV['JAVA_HOME'] = nil
|
9
|
+
end
|
10
|
+
|
11
|
+
describe '.read' do
|
12
|
+
it 'reads text' do
|
13
|
+
text = Yomu.read :text, data
|
14
|
+
|
15
|
+
expect( text ).to include 'The quick brown fox jumped over the lazy cat.'
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'reads metadata' do
|
19
|
+
metadata = Yomu.read :metadata, data
|
20
|
+
|
21
|
+
expect( metadata['Content-Type'] ).to eql 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'reads metadata values with colons as strings' do
|
25
|
+
data = File.read 'spec/samples/sample-metadata-values-with-colons.doc'
|
26
|
+
metadata = Yomu.read :metadata, data
|
27
|
+
|
28
|
+
expect( metadata['dc:title'] ).to eql 'problem: test'
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'reads metadata time values as time values' do
|
32
|
+
metadata = Yomu.read :metadata, data
|
33
|
+
|
34
|
+
expect( metadata['Creation-Date'] ).to be_a Time
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'reads mimetype' do
|
38
|
+
mimetype = Yomu.read :mimetype, data
|
39
|
+
|
40
|
+
expect( mimetype.content_type ).to eql 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
41
|
+
expect( mimetype.extensions ).to include 'docx'
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe '.new' do
|
46
|
+
it 'requires parameters' do
|
47
|
+
expect { Yomu.new }.to raise_error ArgumentError
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'accepts a root path' do
|
51
|
+
yomu = Yomu.new 'spec/samples/sample.pages'
|
52
|
+
|
53
|
+
expect( yomu ).to be_path
|
54
|
+
expect( yomu ).not_to be_uri
|
55
|
+
expect( yomu ).not_to be_stream
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'accepts a relative path' do
|
59
|
+
yomu = Yomu.new 'spec/samples/sample.pages'
|
60
|
+
|
61
|
+
expect( yomu ).to be_path
|
62
|
+
expect( yomu ).not_to be_uri
|
63
|
+
expect( yomu ).not_to be_stream
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'accepts a path with spaces' do
|
67
|
+
yomu = Yomu.new 'spec/samples/sample filename with spaces.pages'
|
68
|
+
|
69
|
+
expect( yomu ).to be_path
|
70
|
+
expect( yomu ).not_to be_uri
|
71
|
+
expect( yomu ).not_to be_stream
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'accepts a URI' do
|
75
|
+
yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
76
|
+
|
77
|
+
expect( yomu ).to be_uri
|
78
|
+
expect( yomu ).not_to be_path
|
79
|
+
expect( yomu ).not_to be_stream
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'accepts a stream or object that can be read' do
|
83
|
+
File.open 'spec/samples/sample.pages', 'r' do |file|
|
84
|
+
yomu = Yomu.new file
|
85
|
+
|
86
|
+
expect( yomu ).to be_stream
|
87
|
+
expect( yomu ).not_to be_path
|
88
|
+
expect( yomu ).not_to be_uri
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'refuses a path to a missing file' do
|
93
|
+
expect { Yomu.new 'test/sample/missing.pages'}.to raise_error Errno::ENOENT
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'refuses other objects' do
|
97
|
+
[nil, 1, 1.1].each do |object|
|
98
|
+
expect { Yomu.new object }.to raise_error TypeError
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
describe '.java' do
|
104
|
+
specify 'with no specified JAVA_HOME' do
|
105
|
+
expect( Yomu.send(:java) ).to eql 'java'
|
106
|
+
end
|
107
|
+
|
108
|
+
specify 'with a specified JAVA_HOME' do
|
109
|
+
ENV['JAVA_HOME'] = '/path/to/java/home'
|
110
|
+
|
111
|
+
expect( Yomu.send(:java) ).to eql '/path/to/java/home/bin/java'
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
context 'initialized with a given path' do
|
116
|
+
let(:yomu) { Yomu.new 'spec/samples/sample.pages' }
|
117
|
+
|
118
|
+
specify '#text reads text' do
|
119
|
+
expect( yomu.text).to include 'The quick brown fox jumped over the lazy cat.'
|
120
|
+
end
|
121
|
+
|
122
|
+
specify '#metadata reads metadata' do
|
123
|
+
expect( yomu.metadata['Content-Type'] ).to eql 'application/vnd.apple.pages'
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
context 'initialized with a given URI' do
|
128
|
+
let(:yomu) { Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx' }
|
129
|
+
|
130
|
+
specify '#text reads text' do
|
131
|
+
expect( yomu.text ).to include 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'
|
132
|
+
end
|
133
|
+
|
134
|
+
specify '#metadata reads metadata' do
|
135
|
+
expect( yomu.metadata['Content-Type'] ).to eql 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
context 'initialized with a given stream' do
|
140
|
+
let(:yomu) { Yomu.new File.open('spec/samples/sample.pages', 'rb') }
|
141
|
+
|
142
|
+
specify '#text reads text' do
|
143
|
+
expect( yomu.text ).to include 'The quick brown fox jumped over the lazy cat.'
|
144
|
+
end
|
145
|
+
|
146
|
+
specify '#metadata reads metadata' do
|
147
|
+
expect( yomu.metadata['Content-Type'] ).to eql 'application/vnd.apple.pages'
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
data/yomu.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yomu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-07-16 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mime-types
|
@@ -28,7 +28,7 @@ dependencies:
|
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '1.23'
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
|
-
name:
|
31
|
+
name: rspec
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
33
33
|
none: false
|
34
34
|
requirements:
|
@@ -52,20 +52,22 @@ extensions: []
|
|
52
52
|
extra_rdoc_files: []
|
53
53
|
files:
|
54
54
|
- .gitignore
|
55
|
+
- .rspec
|
55
56
|
- Gemfile
|
56
57
|
- LICENSE
|
57
58
|
- NOTICE.txt
|
58
59
|
- README.md
|
59
60
|
- Rakefile
|
60
|
-
- jar/tika-app-1.
|
61
|
+
- jar/tika-app-1.4.jar
|
61
62
|
- lib/yomu.rb
|
62
63
|
- lib/yomu/version.rb
|
64
|
+
- spec/helper.rb
|
65
|
+
- spec/samples/sample filename with spaces.pages
|
66
|
+
- spec/samples/sample-metadata-values-with-colons.doc
|
67
|
+
- spec/samples/sample.docx
|
68
|
+
- spec/samples/sample.pages
|
69
|
+
- spec/yomu_spec.rb
|
63
70
|
- test/helper.rb
|
64
|
-
- test/samples/enclosure_problem.doc
|
65
|
-
- test/samples/sample filename with spaces.pages
|
66
|
-
- test/samples/sample.docx
|
67
|
-
- test/samples/sample.pages
|
68
|
-
- test/specs/yomu.rb
|
69
71
|
- yomu.gemspec
|
70
72
|
homepage: http://erol.github.com/yomu
|
71
73
|
licenses: []
|
@@ -87,15 +89,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
87
89
|
version: '0'
|
88
90
|
requirements: []
|
89
91
|
rubyforge_project:
|
90
|
-
rubygems_version: 1.8.
|
92
|
+
rubygems_version: 1.8.23
|
91
93
|
signing_key:
|
92
94
|
specification_version: 3
|
93
95
|
summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
|
94
96
|
.rtf, .pdf)
|
95
97
|
test_files:
|
98
|
+
- spec/helper.rb
|
99
|
+
- spec/samples/sample filename with spaces.pages
|
100
|
+
- spec/samples/sample-metadata-values-with-colons.doc
|
101
|
+
- spec/samples/sample.docx
|
102
|
+
- spec/samples/sample.pages
|
103
|
+
- spec/yomu_spec.rb
|
96
104
|
- test/helper.rb
|
97
|
-
- test/samples/enclosure_problem.doc
|
98
|
-
- test/samples/sample filename with spaces.pages
|
99
|
-
- test/samples/sample.docx
|
100
|
-
- test/samples/sample.pages
|
101
|
-
- test/specs/yomu.rb
|
data/test/specs/yomu.rb
DELETED
@@ -1,169 +0,0 @@
|
|
1
|
-
require_relative '../helper.rb'
|
2
|
-
|
3
|
-
describe Yomu do
|
4
|
-
let(:data) { File.read 'test/samples/sample.docx' }
|
5
|
-
|
6
|
-
before do
|
7
|
-
ENV['JAVA_HOME'] = nil
|
8
|
-
end
|
9
|
-
|
10
|
-
describe '.read' do
|
11
|
-
it 'reads text' do
|
12
|
-
text = Yomu.read :text, data
|
13
|
-
|
14
|
-
assert_includes text, 'The quick brown fox jumped over the lazy cat.'
|
15
|
-
end
|
16
|
-
|
17
|
-
it 'reads metadata' do
|
18
|
-
metadata = Yomu.read :metadata, data
|
19
|
-
|
20
|
-
assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', metadata['Content-Type']
|
21
|
-
end
|
22
|
-
|
23
|
-
it 'accepts metadata with colon' do
|
24
|
-
doc = File.read 'test/samples/enclosure_problem.doc'
|
25
|
-
metadata = Yomu.read :metadata, doc
|
26
|
-
|
27
|
-
assert_equal 'problem: test', metadata['dc:title']
|
28
|
-
end
|
29
|
-
|
30
|
-
it 'reads mimetype' do
|
31
|
-
mimetype = Yomu.read :mimetype, data
|
32
|
-
|
33
|
-
assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', mimetype.content_type
|
34
|
-
assert_includes mimetype.extensions, 'docx'
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
describe '.new' do
|
39
|
-
it 'requires parameters' do
|
40
|
-
assert_raises ArgumentError do
|
41
|
-
Yomu.new
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
it 'accepts a root path' do
|
46
|
-
yomu = nil
|
47
|
-
|
48
|
-
assert_silent do
|
49
|
-
yomu = Yomu.new 'test/samples/sample.pages'
|
50
|
-
end
|
51
|
-
|
52
|
-
assert yomu.path?
|
53
|
-
refute yomu.uri?
|
54
|
-
refute yomu.stream?
|
55
|
-
end
|
56
|
-
|
57
|
-
it 'accepts a relative path' do
|
58
|
-
yomu = nil
|
59
|
-
|
60
|
-
assert_silent do
|
61
|
-
yomu = Yomu.new 'test/samples/sample.pages'
|
62
|
-
end
|
63
|
-
|
64
|
-
assert yomu.path?
|
65
|
-
refute yomu.uri?
|
66
|
-
refute yomu.stream?
|
67
|
-
end
|
68
|
-
|
69
|
-
it 'accepts a path with spaces' do
|
70
|
-
yomu = nil
|
71
|
-
|
72
|
-
assert_silent do
|
73
|
-
yomu = Yomu.new 'test/samples/sample filename with spaces.pages'
|
74
|
-
end
|
75
|
-
|
76
|
-
assert yomu.path?
|
77
|
-
refute yomu.uri?
|
78
|
-
refute yomu.stream?
|
79
|
-
end
|
80
|
-
|
81
|
-
it 'accepts a URI' do
|
82
|
-
yomu = nil
|
83
|
-
|
84
|
-
assert_silent do
|
85
|
-
yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
86
|
-
end
|
87
|
-
|
88
|
-
assert yomu.uri?
|
89
|
-
refute yomu.path?
|
90
|
-
refute yomu.stream?
|
91
|
-
end
|
92
|
-
|
93
|
-
it 'accepts a stream or object that can be read' do
|
94
|
-
yomu = nil
|
95
|
-
|
96
|
-
assert_silent do
|
97
|
-
File.open 'test/samples/sample.pages', 'r' do |file|
|
98
|
-
yomu = Yomu.new file
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
assert yomu.stream?
|
103
|
-
refute yomu.path?
|
104
|
-
refute yomu.uri?
|
105
|
-
end
|
106
|
-
|
107
|
-
it 'does not accept a path to a missing file' do
|
108
|
-
assert_raises Errno::ENOENT do
|
109
|
-
Yomu.new 'test/sample/missing.pages'
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
it 'does not accept other objects' do
|
114
|
-
[nil, 1, 1.1].each do |object|
|
115
|
-
assert_raises TypeError do
|
116
|
-
Yomu.new object
|
117
|
-
end
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
|
-
describe '.java' do
|
123
|
-
specify 'with no specified JAVA_HOME' do
|
124
|
-
assert_equal 'java', Yomu.send(:java)
|
125
|
-
end
|
126
|
-
|
127
|
-
specify 'with a specified JAVA_HOME' do
|
128
|
-
ENV['JAVA_HOME'] = '/path/to/java/home'
|
129
|
-
|
130
|
-
assert_equal '/path/to/java/home/bin/java', Yomu.send(:java)
|
131
|
-
end
|
132
|
-
end
|
133
|
-
|
134
|
-
describe 'initialized with a given path' do
|
135
|
-
let(:yomu) { Yomu.new 'test/samples/sample.pages' }
|
136
|
-
|
137
|
-
specify '#text reads text' do
|
138
|
-
assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
|
139
|
-
end
|
140
|
-
|
141
|
-
specify '#metada reads metadata' do
|
142
|
-
assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
describe 'initialized with a given URI' do
|
147
|
-
let(:yomu) { Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx' }
|
148
|
-
|
149
|
-
specify '#text reads text' do
|
150
|
-
assert_includes yomu.text, 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'
|
151
|
-
end
|
152
|
-
|
153
|
-
specify '#metadata reads metadata' do
|
154
|
-
assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', yomu.metadata['Content-Type']
|
155
|
-
end
|
156
|
-
end
|
157
|
-
|
158
|
-
describe 'initialized with a given stream' do
|
159
|
-
let(:yomu) { Yomu.new File.open('test/samples/sample.pages', 'rb') }
|
160
|
-
|
161
|
-
specify '#text reads text' do
|
162
|
-
assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
|
163
|
-
end
|
164
|
-
|
165
|
-
specify '#metadata reads metadata' do
|
166
|
-
assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
|
167
|
-
end
|
168
|
-
end
|
169
|
-
end
|