yomu 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/Rakefile +4 -6
- data/jar/{tika-app-1.3.jar → tika-app-1.4.jar} +0 -0
- data/lib/yomu/version.rb +1 -1
- data/lib/yomu.rb +5 -6
- data/spec/helper.rb +6 -0
- data/{test → spec}/samples/sample filename with spaces.pages +0 -0
- data/{test/samples/enclosure_problem.doc → spec/samples/sample-metadata-values-with-colons.doc} +0 -0
- data/{test → spec}/samples/sample.docx +0 -0
- data/{test → spec}/samples/sample.pages +0 -0
- data/spec/yomu_spec.rb +150 -0
- data/yomu.gemspec +1 -1
- metadata +18 -15
- data/test/specs/yomu.rb +0 -169
data/.gitignore
CHANGED
data/.rspec
ADDED
data/Rakefile
CHANGED
@@ -1,10 +1,8 @@
|
|
1
1
|
#!/usr/bin/env rake
|
2
2
|
|
3
3
|
require 'bundler/gem_tasks'
|
4
|
-
require '
|
4
|
+
require 'rspec/core/rake_task'
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
t.verbose = true
|
10
|
-
end
|
6
|
+
RSpec::Core::RakeTask.new 'spec'
|
7
|
+
|
8
|
+
task :default => :spec
|
Binary file
|
data/lib/yomu/version.rb
CHANGED
data/lib/yomu.rb
CHANGED
@@ -34,9 +34,9 @@ class Yomu
|
|
34
34
|
when :text
|
35
35
|
result
|
36
36
|
when :metadata
|
37
|
-
YAML.load
|
37
|
+
YAML.load quote(result)
|
38
38
|
when :mimetype
|
39
|
-
MIME::Types[YAML.load(
|
39
|
+
MIME::Types[YAML.load(quote(result))['Content-Type']].first
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
@@ -151,11 +151,10 @@ class Yomu
|
|
151
151
|
@data
|
152
152
|
end
|
153
153
|
|
154
|
-
def self.
|
155
|
-
metadata.
|
156
|
-
l.gsub(/: (.*)/,': "\1"')
|
157
|
-
end.join
|
154
|
+
def self.quote(metadata)
|
155
|
+
metadata.gsub(/: (.*: .*)$/, ': "\1"')
|
158
156
|
end
|
157
|
+
private_class_method :quote
|
159
158
|
|
160
159
|
def self.java
|
161
160
|
ENV['JAVA_HOME'] ? ENV['JAVA_HOME'] + '/bin/java' : 'java'
|
data/spec/helper.rb
ADDED
File without changes
|
data/{test/samples/enclosure_problem.doc → spec/samples/sample-metadata-values-with-colons.doc}
RENAMED
File without changes
|
File without changes
|
File without changes
|
data/spec/yomu_spec.rb
ADDED
@@ -0,0 +1,150 @@
|
|
1
|
+
require 'helper.rb'
|
2
|
+
require 'yomu'
|
3
|
+
|
4
|
+
describe Yomu do
|
5
|
+
let(:data) { File.read 'spec/samples/sample.docx' }
|
6
|
+
|
7
|
+
before do
|
8
|
+
ENV['JAVA_HOME'] = nil
|
9
|
+
end
|
10
|
+
|
11
|
+
describe '.read' do
|
12
|
+
it 'reads text' do
|
13
|
+
text = Yomu.read :text, data
|
14
|
+
|
15
|
+
expect( text ).to include 'The quick brown fox jumped over the lazy cat.'
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'reads metadata' do
|
19
|
+
metadata = Yomu.read :metadata, data
|
20
|
+
|
21
|
+
expect( metadata['Content-Type'] ).to eql 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'reads metadata values with colons as strings' do
|
25
|
+
data = File.read 'spec/samples/sample-metadata-values-with-colons.doc'
|
26
|
+
metadata = Yomu.read :metadata, data
|
27
|
+
|
28
|
+
expect( metadata['dc:title'] ).to eql 'problem: test'
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'reads metadata time values as time values' do
|
32
|
+
metadata = Yomu.read :metadata, data
|
33
|
+
|
34
|
+
expect( metadata['Creation-Date'] ).to be_a Time
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'reads mimetype' do
|
38
|
+
mimetype = Yomu.read :mimetype, data
|
39
|
+
|
40
|
+
expect( mimetype.content_type ).to eql 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
41
|
+
expect( mimetype.extensions ).to include 'docx'
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
describe '.new' do
|
46
|
+
it 'requires parameters' do
|
47
|
+
expect { Yomu.new }.to raise_error ArgumentError
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'accepts a root path' do
|
51
|
+
yomu = Yomu.new 'spec/samples/sample.pages'
|
52
|
+
|
53
|
+
expect( yomu ).to be_path
|
54
|
+
expect( yomu ).not_to be_uri
|
55
|
+
expect( yomu ).not_to be_stream
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'accepts a relative path' do
|
59
|
+
yomu = Yomu.new 'spec/samples/sample.pages'
|
60
|
+
|
61
|
+
expect( yomu ).to be_path
|
62
|
+
expect( yomu ).not_to be_uri
|
63
|
+
expect( yomu ).not_to be_stream
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'accepts a path with spaces' do
|
67
|
+
yomu = Yomu.new 'spec/samples/sample filename with spaces.pages'
|
68
|
+
|
69
|
+
expect( yomu ).to be_path
|
70
|
+
expect( yomu ).not_to be_uri
|
71
|
+
expect( yomu ).not_to be_stream
|
72
|
+
end
|
73
|
+
|
74
|
+
it 'accepts a URI' do
|
75
|
+
yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
76
|
+
|
77
|
+
expect( yomu ).to be_uri
|
78
|
+
expect( yomu ).not_to be_path
|
79
|
+
expect( yomu ).not_to be_stream
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'accepts a stream or object that can be read' do
|
83
|
+
File.open 'spec/samples/sample.pages', 'r' do |file|
|
84
|
+
yomu = Yomu.new file
|
85
|
+
|
86
|
+
expect( yomu ).to be_stream
|
87
|
+
expect( yomu ).not_to be_path
|
88
|
+
expect( yomu ).not_to be_uri
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
it 'refuses a path to a missing file' do
|
93
|
+
expect { Yomu.new 'test/sample/missing.pages'}.to raise_error Errno::ENOENT
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'refuses other objects' do
|
97
|
+
[nil, 1, 1.1].each do |object|
|
98
|
+
expect { Yomu.new object }.to raise_error TypeError
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
describe '.java' do
|
104
|
+
specify 'with no specified JAVA_HOME' do
|
105
|
+
expect( Yomu.send(:java) ).to eql 'java'
|
106
|
+
end
|
107
|
+
|
108
|
+
specify 'with a specified JAVA_HOME' do
|
109
|
+
ENV['JAVA_HOME'] = '/path/to/java/home'
|
110
|
+
|
111
|
+
expect( Yomu.send(:java) ).to eql '/path/to/java/home/bin/java'
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
context 'initialized with a given path' do
|
116
|
+
let(:yomu) { Yomu.new 'spec/samples/sample.pages' }
|
117
|
+
|
118
|
+
specify '#text reads text' do
|
119
|
+
expect( yomu.text).to include 'The quick brown fox jumped over the lazy cat.'
|
120
|
+
end
|
121
|
+
|
122
|
+
specify '#metadata reads metadata' do
|
123
|
+
expect( yomu.metadata['Content-Type'] ).to eql 'application/vnd.apple.pages'
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
context 'initialized with a given URI' do
|
128
|
+
let(:yomu) { Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx' }
|
129
|
+
|
130
|
+
specify '#text reads text' do
|
131
|
+
expect( yomu.text ).to include 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'
|
132
|
+
end
|
133
|
+
|
134
|
+
specify '#metadata reads metadata' do
|
135
|
+
expect( yomu.metadata['Content-Type'] ).to eql 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
context 'initialized with a given stream' do
|
140
|
+
let(:yomu) { Yomu.new File.open('spec/samples/sample.pages', 'rb') }
|
141
|
+
|
142
|
+
specify '#text reads text' do
|
143
|
+
expect( yomu.text ).to include 'The quick brown fox jumped over the lazy cat.'
|
144
|
+
end
|
145
|
+
|
146
|
+
specify '#metadata reads metadata' do
|
147
|
+
expect( yomu.metadata['Content-Type'] ).to eql 'application/vnd.apple.pages'
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
data/yomu.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yomu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-07-16 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mime-types
|
@@ -28,7 +28,7 @@ dependencies:
|
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '1.23'
|
30
30
|
- !ruby/object:Gem::Dependency
|
31
|
-
name:
|
31
|
+
name: rspec
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
33
33
|
none: false
|
34
34
|
requirements:
|
@@ -52,20 +52,22 @@ extensions: []
|
|
52
52
|
extra_rdoc_files: []
|
53
53
|
files:
|
54
54
|
- .gitignore
|
55
|
+
- .rspec
|
55
56
|
- Gemfile
|
56
57
|
- LICENSE
|
57
58
|
- NOTICE.txt
|
58
59
|
- README.md
|
59
60
|
- Rakefile
|
60
|
-
- jar/tika-app-1.
|
61
|
+
- jar/tika-app-1.4.jar
|
61
62
|
- lib/yomu.rb
|
62
63
|
- lib/yomu/version.rb
|
64
|
+
- spec/helper.rb
|
65
|
+
- spec/samples/sample filename with spaces.pages
|
66
|
+
- spec/samples/sample-metadata-values-with-colons.doc
|
67
|
+
- spec/samples/sample.docx
|
68
|
+
- spec/samples/sample.pages
|
69
|
+
- spec/yomu_spec.rb
|
63
70
|
- test/helper.rb
|
64
|
-
- test/samples/enclosure_problem.doc
|
65
|
-
- test/samples/sample filename with spaces.pages
|
66
|
-
- test/samples/sample.docx
|
67
|
-
- test/samples/sample.pages
|
68
|
-
- test/specs/yomu.rb
|
69
71
|
- yomu.gemspec
|
70
72
|
homepage: http://erol.github.com/yomu
|
71
73
|
licenses: []
|
@@ -87,15 +89,16 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
87
89
|
version: '0'
|
88
90
|
requirements: []
|
89
91
|
rubyforge_project:
|
90
|
-
rubygems_version: 1.8.
|
92
|
+
rubygems_version: 1.8.23
|
91
93
|
signing_key:
|
92
94
|
specification_version: 3
|
93
95
|
summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
|
94
96
|
.rtf, .pdf)
|
95
97
|
test_files:
|
98
|
+
- spec/helper.rb
|
99
|
+
- spec/samples/sample filename with spaces.pages
|
100
|
+
- spec/samples/sample-metadata-values-with-colons.doc
|
101
|
+
- spec/samples/sample.docx
|
102
|
+
- spec/samples/sample.pages
|
103
|
+
- spec/yomu_spec.rb
|
96
104
|
- test/helper.rb
|
97
|
-
- test/samples/enclosure_problem.doc
|
98
|
-
- test/samples/sample filename with spaces.pages
|
99
|
-
- test/samples/sample.docx
|
100
|
-
- test/samples/sample.pages
|
101
|
-
- test/specs/yomu.rb
|
data/test/specs/yomu.rb
DELETED
@@ -1,169 +0,0 @@
|
|
1
|
-
require_relative '../helper.rb'
|
2
|
-
|
3
|
-
describe Yomu do
|
4
|
-
let(:data) { File.read 'test/samples/sample.docx' }
|
5
|
-
|
6
|
-
before do
|
7
|
-
ENV['JAVA_HOME'] = nil
|
8
|
-
end
|
9
|
-
|
10
|
-
describe '.read' do
|
11
|
-
it 'reads text' do
|
12
|
-
text = Yomu.read :text, data
|
13
|
-
|
14
|
-
assert_includes text, 'The quick brown fox jumped over the lazy cat.'
|
15
|
-
end
|
16
|
-
|
17
|
-
it 'reads metadata' do
|
18
|
-
metadata = Yomu.read :metadata, data
|
19
|
-
|
20
|
-
assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', metadata['Content-Type']
|
21
|
-
end
|
22
|
-
|
23
|
-
it 'accepts metadata with colon' do
|
24
|
-
doc = File.read 'test/samples/enclosure_problem.doc'
|
25
|
-
metadata = Yomu.read :metadata, doc
|
26
|
-
|
27
|
-
assert_equal 'problem: test', metadata['dc:title']
|
28
|
-
end
|
29
|
-
|
30
|
-
it 'reads mimetype' do
|
31
|
-
mimetype = Yomu.read :mimetype, data
|
32
|
-
|
33
|
-
assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', mimetype.content_type
|
34
|
-
assert_includes mimetype.extensions, 'docx'
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
describe '.new' do
|
39
|
-
it 'requires parameters' do
|
40
|
-
assert_raises ArgumentError do
|
41
|
-
Yomu.new
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
it 'accepts a root path' do
|
46
|
-
yomu = nil
|
47
|
-
|
48
|
-
assert_silent do
|
49
|
-
yomu = Yomu.new 'test/samples/sample.pages'
|
50
|
-
end
|
51
|
-
|
52
|
-
assert yomu.path?
|
53
|
-
refute yomu.uri?
|
54
|
-
refute yomu.stream?
|
55
|
-
end
|
56
|
-
|
57
|
-
it 'accepts a relative path' do
|
58
|
-
yomu = nil
|
59
|
-
|
60
|
-
assert_silent do
|
61
|
-
yomu = Yomu.new 'test/samples/sample.pages'
|
62
|
-
end
|
63
|
-
|
64
|
-
assert yomu.path?
|
65
|
-
refute yomu.uri?
|
66
|
-
refute yomu.stream?
|
67
|
-
end
|
68
|
-
|
69
|
-
it 'accepts a path with spaces' do
|
70
|
-
yomu = nil
|
71
|
-
|
72
|
-
assert_silent do
|
73
|
-
yomu = Yomu.new 'test/samples/sample filename with spaces.pages'
|
74
|
-
end
|
75
|
-
|
76
|
-
assert yomu.path?
|
77
|
-
refute yomu.uri?
|
78
|
-
refute yomu.stream?
|
79
|
-
end
|
80
|
-
|
81
|
-
it 'accepts a URI' do
|
82
|
-
yomu = nil
|
83
|
-
|
84
|
-
assert_silent do
|
85
|
-
yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
86
|
-
end
|
87
|
-
|
88
|
-
assert yomu.uri?
|
89
|
-
refute yomu.path?
|
90
|
-
refute yomu.stream?
|
91
|
-
end
|
92
|
-
|
93
|
-
it 'accepts a stream or object that can be read' do
|
94
|
-
yomu = nil
|
95
|
-
|
96
|
-
assert_silent do
|
97
|
-
File.open 'test/samples/sample.pages', 'r' do |file|
|
98
|
-
yomu = Yomu.new file
|
99
|
-
end
|
100
|
-
end
|
101
|
-
|
102
|
-
assert yomu.stream?
|
103
|
-
refute yomu.path?
|
104
|
-
refute yomu.uri?
|
105
|
-
end
|
106
|
-
|
107
|
-
it 'does not accept a path to a missing file' do
|
108
|
-
assert_raises Errno::ENOENT do
|
109
|
-
Yomu.new 'test/sample/missing.pages'
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
it 'does not accept other objects' do
|
114
|
-
[nil, 1, 1.1].each do |object|
|
115
|
-
assert_raises TypeError do
|
116
|
-
Yomu.new object
|
117
|
-
end
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
|
-
describe '.java' do
|
123
|
-
specify 'with no specified JAVA_HOME' do
|
124
|
-
assert_equal 'java', Yomu.send(:java)
|
125
|
-
end
|
126
|
-
|
127
|
-
specify 'with a specified JAVA_HOME' do
|
128
|
-
ENV['JAVA_HOME'] = '/path/to/java/home'
|
129
|
-
|
130
|
-
assert_equal '/path/to/java/home/bin/java', Yomu.send(:java)
|
131
|
-
end
|
132
|
-
end
|
133
|
-
|
134
|
-
describe 'initialized with a given path' do
|
135
|
-
let(:yomu) { Yomu.new 'test/samples/sample.pages' }
|
136
|
-
|
137
|
-
specify '#text reads text' do
|
138
|
-
assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
|
139
|
-
end
|
140
|
-
|
141
|
-
specify '#metada reads metadata' do
|
142
|
-
assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
describe 'initialized with a given URI' do
|
147
|
-
let(:yomu) { Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx' }
|
148
|
-
|
149
|
-
specify '#text reads text' do
|
150
|
-
assert_includes yomu.text, 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'
|
151
|
-
end
|
152
|
-
|
153
|
-
specify '#metadata reads metadata' do
|
154
|
-
assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', yomu.metadata['Content-Type']
|
155
|
-
end
|
156
|
-
end
|
157
|
-
|
158
|
-
describe 'initialized with a given stream' do
|
159
|
-
let(:yomu) { Yomu.new File.open('test/samples/sample.pages', 'rb') }
|
160
|
-
|
161
|
-
specify '#text reads text' do
|
162
|
-
assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
|
163
|
-
end
|
164
|
-
|
165
|
-
specify '#metadata reads metadata' do
|
166
|
-
assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
|
167
|
-
end
|
168
|
-
end
|
169
|
-
end
|