yomu 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +28 -22
- data/Rakefile +2 -2
- data/lib/yomu.rb +8 -8
- data/lib/yomu/version.rb +1 -1
- data/test/helper.rb +3 -0
- data/test/specs/yomu.rb +141 -0
- data/yomu.gemspec +4 -4
- metadata +11 -11
- data/test/test_helper.rb +0 -2
- data/test/yomu_test.rb +0 -131
data/README.md
CHANGED
@@ -13,36 +13,19 @@ Here are some of the formats supported:
|
|
13
13
|
For the complete list of supported formats, please visit the Apache Tika
|
14
14
|
[Supported Document Formats](http://tika.apache.org/0.9/formats.html) page.
|
15
15
|
|
16
|
-
## Installation and Dependencies
|
17
|
-
|
18
|
-
Add this line to your application's Gemfile:
|
19
|
-
|
20
|
-
gem 'yomu'
|
21
|
-
|
22
|
-
And then execute:
|
23
|
-
|
24
|
-
$ bundle
|
25
|
-
|
26
|
-
Or install it yourself as:
|
27
|
-
|
28
|
-
$ gem install yomu
|
29
|
-
|
30
|
-
**Yomu packages the Apache Tika application jar and requires a working JRE for it to work.**
|
31
|
-
|
32
16
|
## Usage
|
33
17
|
|
34
|
-
|
18
|
+
Text and metadata can be extracted by calling `Yomu.read` directly:
|
35
19
|
|
36
20
|
require 'yomu'
|
37
21
|
|
38
|
-
You can extract text by calling `Yomu.read` directly:
|
39
|
-
|
40
22
|
data = File.read 'sample.pages'
|
41
23
|
text = Yomu.read :text, data
|
24
|
+
metadata = Yomu.read :metadata, data
|
42
25
|
|
43
26
|
### Reading text from a given filename
|
44
27
|
|
45
|
-
|
28
|
+
Create a new instance of Yomu and pass a filename.
|
46
29
|
|
47
30
|
yomu = Yomu.new 'sample.pages'
|
48
31
|
text = yomu.text
|
@@ -56,13 +39,36 @@ This is useful for reading remote files, like documents hosted on Amazon S3.
|
|
56
39
|
|
57
40
|
### Reading text from a stream
|
58
41
|
|
59
|
-
Yomu can also read from a stream or any object that responds to `read`, including Ruby on Rails
|
42
|
+
Yomu can also read from a stream or any object that responds to `read`, including file uploads from Ruby on Rails or Sinatra.
|
60
43
|
|
61
44
|
post '/:name/:filename' do
|
62
|
-
yomu = Yomu.new params[:data]
|
45
|
+
yomu = Yomu.new params[:data][:tempfile]
|
63
46
|
yomu.text
|
64
47
|
end
|
65
48
|
|
49
|
+
### Reading metadata
|
50
|
+
|
51
|
+
Metadata is returned as a hash.
|
52
|
+
|
53
|
+
yomu = Yomu.new 'sample.pages'
|
54
|
+
yomu.metadata['Content-Type'] #=> "application/vnd.apple.pages"
|
55
|
+
|
56
|
+
## Installation and Dependencies
|
57
|
+
|
58
|
+
Add this line to your application's Gemfile:
|
59
|
+
|
60
|
+
gem 'yomu'
|
61
|
+
|
62
|
+
And then execute:
|
63
|
+
|
64
|
+
$ bundle
|
65
|
+
|
66
|
+
Or install it yourself as:
|
67
|
+
|
68
|
+
$ gem install yomu
|
69
|
+
|
70
|
+
**Yomu packages the Apache Tika application jar and requires a working JRE for it to work.**
|
71
|
+
|
66
72
|
## Contributing
|
67
73
|
|
68
74
|
1. Fork it
|
data/Rakefile
CHANGED
data/lib/yomu.rb
CHANGED
@@ -12,7 +12,7 @@ class Yomu
|
|
12
12
|
# data = File.read 'sample.pages'
|
13
13
|
# text = Yomu.read :text, data
|
14
14
|
# metadata = Yomu.read :metadata, data
|
15
|
-
|
15
|
+
#
|
16
16
|
def self.read(type, data)
|
17
17
|
switch = case type
|
18
18
|
when :text
|
@@ -43,7 +43,7 @@ class Yomu
|
|
43
43
|
# From a stream or an object which responds to +read+
|
44
44
|
#
|
45
45
|
# Yomu.new File.open('sample.pages')
|
46
|
-
|
46
|
+
#
|
47
47
|
def initialize(input)
|
48
48
|
if input.is_a? String
|
49
49
|
if input =~ URI::regexp
|
@@ -64,7 +64,7 @@ class Yomu
|
|
64
64
|
#
|
65
65
|
# yomu = Yomu.new 'sample.pages'
|
66
66
|
# yomu.text
|
67
|
-
|
67
|
+
#
|
68
68
|
def text
|
69
69
|
return @text if defined? @text
|
70
70
|
|
@@ -75,7 +75,7 @@ class Yomu
|
|
75
75
|
#
|
76
76
|
# yomu = Yomu.new 'sample.pages'
|
77
77
|
# yomu.metadata['Content-Type']
|
78
|
-
|
78
|
+
#
|
79
79
|
def metadata
|
80
80
|
return @metadata if defined? @metadata
|
81
81
|
|
@@ -86,7 +86,7 @@ class Yomu
|
|
86
86
|
#
|
87
87
|
# yomu = Yomu.new 'sample.pages'
|
88
88
|
# yomu.path? #=> true
|
89
|
-
|
89
|
+
#
|
90
90
|
def path?
|
91
91
|
defined? @path
|
92
92
|
end
|
@@ -95,7 +95,7 @@ class Yomu
|
|
95
95
|
#
|
96
96
|
# yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
97
97
|
# yomu.uri? #=> true
|
98
|
-
|
98
|
+
#
|
99
99
|
def uri?
|
100
100
|
defined? @uri
|
101
101
|
end
|
@@ -105,7 +105,7 @@ class Yomu
|
|
105
105
|
# file = File.open('sample.pages')
|
106
106
|
# yomu = Yomu.new file
|
107
107
|
# yomu.stream? #=> true
|
108
|
-
|
108
|
+
#
|
109
109
|
def stream?
|
110
110
|
defined? @stream
|
111
111
|
end
|
@@ -114,7 +114,7 @@ class Yomu
|
|
114
114
|
#
|
115
115
|
# yomu = Yomu.new 'sample.pages'
|
116
116
|
# yomu.data
|
117
|
-
|
117
|
+
#
|
118
118
|
def data
|
119
119
|
return @data if defined? @data
|
120
120
|
|
data/lib/yomu/version.rb
CHANGED
data/test/helper.rb
ADDED
data/test/specs/yomu.rb
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
require_relative '../helper.rb'
|
2
|
+
|
3
|
+
describe Yomu do
|
4
|
+
let(:data) { File.read 'test/samples/sample.pages' }
|
5
|
+
|
6
|
+
describe '.read' do
|
7
|
+
it 'reads text' do
|
8
|
+
text = Yomu.read :text, data
|
9
|
+
|
10
|
+
assert_includes text, 'The quick brown fox jumped over the lazy cat.'
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'reads metadata' do
|
14
|
+
metadata = Yomu.read :metadata, data
|
15
|
+
|
16
|
+
assert_equal 'application/vnd.apple.pages', metadata['Content-Type']
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '.new' do
|
21
|
+
it 'requires parameters' do
|
22
|
+
assert_raises ArgumentError do
|
23
|
+
Yomu.new
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'accepts a root path' do
|
28
|
+
assert_silent do
|
29
|
+
yomu = Yomu.new 'test/samples/sample.pages'
|
30
|
+
|
31
|
+
assert_block { yomu.path? }
|
32
|
+
assert_block { !yomu.uri? }
|
33
|
+
assert_block { !yomu.stream? }
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'accepts a relative path' do
|
38
|
+
assert_silent do
|
39
|
+
yomu = Yomu.new 'test/samples/sample.pages'
|
40
|
+
|
41
|
+
assert_block { yomu.path? }
|
42
|
+
assert_block { !yomu.uri? }
|
43
|
+
assert_block { !yomu.stream? }
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'accepts a path with spaces' do
|
48
|
+
assert_silent do
|
49
|
+
yomu = Yomu.new 'test/samples/sample filename with spaces.pages'
|
50
|
+
|
51
|
+
assert_block { yomu.path? }
|
52
|
+
assert_block { !yomu.uri? }
|
53
|
+
assert_block { !yomu.stream? }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'accepts a URI' do
|
58
|
+
assert_silent do
|
59
|
+
yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
60
|
+
|
61
|
+
assert_block { yomu.uri? }
|
62
|
+
assert_block { !yomu.path? }
|
63
|
+
assert_block { !yomu.stream? }
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'accepts a stream or object that can be read' do
|
68
|
+
assert_silent do
|
69
|
+
File.open 'test/samples/sample.pages', 'r' do |file|
|
70
|
+
yomu = Yomu.new file
|
71
|
+
|
72
|
+
assert_block { yomu.stream? }
|
73
|
+
assert_block { !yomu.path? }
|
74
|
+
assert_block { !yomu.uri? }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
it 'does not accept a path to a missing file' do
|
80
|
+
assert_raises Errno::ENOENT do
|
81
|
+
Yomu.new 'test/sample/missing.pages'
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'does not accept other objects' do
|
86
|
+
[nil, 1, 1.1].each do |object|
|
87
|
+
assert_raises TypeError do
|
88
|
+
Yomu.new object
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
describe 'initialized with a given path' do
|
95
|
+
let(:yomu) { Yomu.new 'test/samples/sample.pages' }
|
96
|
+
|
97
|
+
describe '#text' do
|
98
|
+
it 'reads text' do
|
99
|
+
assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
describe '#metadata' do
|
104
|
+
it 'reads metadata' do
|
105
|
+
assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
describe 'initialized with a given URI' do
|
111
|
+
let(:yomu) { Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx' }
|
112
|
+
|
113
|
+
describe '#text' do
|
114
|
+
it 'reads text' do
|
115
|
+
assert_includes yomu.text, 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
describe '#metadata' do
|
120
|
+
it 'reads metadata' do
|
121
|
+
assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', yomu.metadata['Content-Type']
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
describe 'initialized with a given stream' do
|
127
|
+
let(:yomu) { Yomu.new File.open('test/samples/sample.pages', 'rb') }
|
128
|
+
|
129
|
+
describe '#text' do
|
130
|
+
it 'reads text' do
|
131
|
+
assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
describe '#metadata' do
|
136
|
+
it 'reads metadata' do
|
137
|
+
assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
data/yomu.gemspec
CHANGED
@@ -4,9 +4,9 @@ require File.expand_path('../lib/yomu/version', __FILE__)
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
5
|
gem.authors = ["Erol Fornoles"]
|
6
6
|
gem.email = ["erol.fornoles@gmail.com"]
|
7
|
-
gem.description = %q{
|
8
|
-
gem.summary = %q{
|
9
|
-
gem.homepage = "http://github.com/
|
7
|
+
gem.description = %q{Read text and metadata from files and documents (.doc, .docx, .pages, .odt, .rtf, .pdf)}
|
8
|
+
gem.summary = %q{Read text and metadata from files and documents (.doc, .docx, .pages, .odt, .rtf, .pdf)}
|
9
|
+
gem.homepage = "http://erol.github.com/yomu"
|
10
10
|
|
11
11
|
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
12
12
|
gem.files = `git ls-files`.split("\n")
|
@@ -14,4 +14,4 @@ Gem::Specification.new do |gem|
|
|
14
14
|
gem.name = "yomu"
|
15
15
|
gem.require_paths = ["lib"]
|
16
16
|
gem.version = Yomu::VERSION
|
17
|
-
end
|
17
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yomu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,10 +9,10 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-10-22 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
|
-
description:
|
15
|
-
|
14
|
+
description: Read text and metadata from files and documents (.doc, .docx, .pages,
|
15
|
+
.odt, .rtf, .pdf)
|
16
16
|
email:
|
17
17
|
- erol.fornoles@gmail.com
|
18
18
|
executables: []
|
@@ -28,12 +28,12 @@ files:
|
|
28
28
|
- jar/tika-app-1.2.jar
|
29
29
|
- lib/yomu.rb
|
30
30
|
- lib/yomu/version.rb
|
31
|
+
- test/helper.rb
|
31
32
|
- test/samples/sample filename with spaces.pages
|
32
33
|
- test/samples/sample.pages
|
33
|
-
- test/
|
34
|
-
- test/yomu_test.rb
|
34
|
+
- test/specs/yomu.rb
|
35
35
|
- yomu.gemspec
|
36
|
-
homepage: http://github.com/
|
36
|
+
homepage: http://erol.github.com/yomu
|
37
37
|
licenses: []
|
38
38
|
post_install_message:
|
39
39
|
rdoc_options: []
|
@@ -56,10 +56,10 @@ rubyforge_project:
|
|
56
56
|
rubygems_version: 1.8.24
|
57
57
|
signing_key:
|
58
58
|
specification_version: 3
|
59
|
-
summary:
|
60
|
-
|
59
|
+
summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
|
60
|
+
.rtf, .pdf)
|
61
61
|
test_files:
|
62
|
+
- test/helper.rb
|
62
63
|
- test/samples/sample filename with spaces.pages
|
63
64
|
- test/samples/sample.pages
|
64
|
-
- test/
|
65
|
-
- test/yomu_test.rb
|
65
|
+
- test/specs/yomu.rb
|
data/test/test_helper.rb
DELETED
data/test/yomu_test.rb
DELETED
@@ -1,131 +0,0 @@
|
|
1
|
-
require_relative 'test_helper.rb'
|
2
|
-
|
3
|
-
require 'yomu.rb'
|
4
|
-
|
5
|
-
class YomuTest < MiniTest::Unit::TestCase
|
6
|
-
def test_yomu_can_read_text
|
7
|
-
data = File.read 'test/samples/sample.pages'
|
8
|
-
text = Yomu.read :text, data
|
9
|
-
|
10
|
-
assert_includes text, 'The quick brown fox jumped over the lazy cat.'
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_yomu_can_read_metadata
|
14
|
-
data = File.read 'test/samples/sample.pages'
|
15
|
-
metadata = Yomu.read :metadata, data
|
16
|
-
|
17
|
-
assert_equal 'application/vnd.apple.pages', metadata['Content-Type']
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_yomu_cannot_be_initialized_without_parameters
|
21
|
-
assert_raises ArgumentError do
|
22
|
-
Yomu.new
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_yomu_can_be_initialized_with_a_root_path
|
27
|
-
assert_silent do
|
28
|
-
yomu = Yomu.new File.join(File.dirname(__FILE__), 'samples/sample.pages')
|
29
|
-
|
30
|
-
assert_block { yomu.path? }
|
31
|
-
assert_block { !yomu.uri? }
|
32
|
-
assert_block { !yomu.stream? }
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
def test_yomu_can_be_initialized_with_a_relative_path
|
37
|
-
assert_silent do
|
38
|
-
yomu = Yomu.new 'test/samples/sample.pages'
|
39
|
-
|
40
|
-
assert_block { yomu.path? }
|
41
|
-
assert_block { !yomu.uri? }
|
42
|
-
assert_block { !yomu.stream? }
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def test_yomu_can_be_initialized_with_a_path_with_spaces
|
47
|
-
assert_silent do
|
48
|
-
yomu = Yomu.new 'test/samples/sample filename with spaces.pages'
|
49
|
-
|
50
|
-
assert_block { yomu.path? }
|
51
|
-
assert_block { !yomu.uri? }
|
52
|
-
assert_block { !yomu.stream? }
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def test_yomu_can_be_initialized_with_a_uri
|
57
|
-
assert_silent do
|
58
|
-
yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
59
|
-
|
60
|
-
assert_block { yomu.uri? }
|
61
|
-
assert_block { !yomu.path? }
|
62
|
-
assert_block { !yomu.stream? }
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def test_yomu_can_be_initialized_with_a_stream_or_object_that_can_be_read
|
67
|
-
assert_silent do
|
68
|
-
File.open 'test/samples/sample.pages', 'r' do |file|
|
69
|
-
yomu = Yomu.new file
|
70
|
-
|
71
|
-
assert_block { yomu.stream? }
|
72
|
-
assert_block { !yomu.path? }
|
73
|
-
assert_block { !yomu.uri? }
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def test_yomu_cannot_be_initialized_with_a_path_to_a_missing_file
|
79
|
-
assert_raises Errno::ENOENT do
|
80
|
-
Yomu.new 'test/sample/missing.pages'
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
def test_yomu_cannot_be_initialized_with_other_objects
|
85
|
-
[nil, 1, 1.1].each do |object|
|
86
|
-
assert_raises TypeError do
|
87
|
-
Yomu.new object
|
88
|
-
end
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
def test_yomu_initialized_with_a_path_can_read_text
|
93
|
-
yomu = Yomu.new 'test/samples/sample.pages'
|
94
|
-
|
95
|
-
assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
|
96
|
-
end
|
97
|
-
|
98
|
-
def test_yomu_initialized_with_a_path_can_read_metadata
|
99
|
-
yomu = Yomu.new 'test/samples/sample.pages'
|
100
|
-
|
101
|
-
assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
|
102
|
-
end
|
103
|
-
|
104
|
-
def test_yomu_initialized_with_a_url_can_read_text
|
105
|
-
yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
106
|
-
|
107
|
-
assert_includes yomu.text, 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'
|
108
|
-
end
|
109
|
-
|
110
|
-
def test_yomu_initialized_with_a_url_can_read_metadata
|
111
|
-
yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
112
|
-
|
113
|
-
assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', yomu.metadata['Content-Type']
|
114
|
-
end
|
115
|
-
|
116
|
-
def test_yomu_initialized_with_a_stream_can_read_text
|
117
|
-
File.open 'test/samples/sample.pages', 'rb' do |file|
|
118
|
-
yomu = Yomu.new file
|
119
|
-
|
120
|
-
assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
|
-
def test_yomu_initialized_with_a_stream_can_read_metadata
|
125
|
-
File.open 'test/samples/sample.pages', 'rb' do |file|
|
126
|
-
yomu = Yomu.new file
|
127
|
-
|
128
|
-
assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
|
129
|
-
end
|
130
|
-
end
|
131
|
-
end
|