yomu 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +28 -22
- data/Rakefile +2 -2
- data/lib/yomu.rb +8 -8
- data/lib/yomu/version.rb +1 -1
- data/test/helper.rb +3 -0
- data/test/specs/yomu.rb +141 -0
- data/yomu.gemspec +4 -4
- metadata +11 -11
- data/test/test_helper.rb +0 -2
- data/test/yomu_test.rb +0 -131
data/README.md
CHANGED
@@ -13,36 +13,19 @@ Here are some of the formats supported:
|
|
13
13
|
For the complete list of supported formats, please visit the Apache Tika
|
14
14
|
[Supported Document Formats](http://tika.apache.org/0.9/formats.html) page.
|
15
15
|
|
16
|
-
## Installation and Dependencies
|
17
|
-
|
18
|
-
Add this line to your application's Gemfile:
|
19
|
-
|
20
|
-
gem 'yomu'
|
21
|
-
|
22
|
-
And then execute:
|
23
|
-
|
24
|
-
$ bundle
|
25
|
-
|
26
|
-
Or install it yourself as:
|
27
|
-
|
28
|
-
$ gem install yomu
|
29
|
-
|
30
|
-
**Yomu packages the Apache Tika application jar and requires a working JRE for it to work.**
|
31
|
-
|
32
16
|
## Usage
|
33
17
|
|
34
|
-
|
18
|
+
Text and metadata can be extracted by calling `Yomu.read` directly:
|
35
19
|
|
36
20
|
require 'yomu'
|
37
21
|
|
38
|
-
You can extract text by calling `Yomu.read` directly:
|
39
|
-
|
40
22
|
data = File.read 'sample.pages'
|
41
23
|
text = Yomu.read :text, data
|
24
|
+
metadata = Yomu.read :metadata, data
|
42
25
|
|
43
26
|
### Reading text from a given filename
|
44
27
|
|
45
|
-
|
28
|
+
Create a new instance of Yomu and pass a filename.
|
46
29
|
|
47
30
|
yomu = Yomu.new 'sample.pages'
|
48
31
|
text = yomu.text
|
@@ -56,13 +39,36 @@ This is useful for reading remote files, like documents hosted on Amazon S3.
|
|
56
39
|
|
57
40
|
### Reading text from a stream
|
58
41
|
|
59
|
-
Yomu can also read from a stream or any object that responds to `read`, including Ruby on Rails
|
42
|
+
Yomu can also read from a stream or any object that responds to `read`, including file uploads from Ruby on Rails or Sinatra.
|
60
43
|
|
61
44
|
post '/:name/:filename' do
|
62
|
-
yomu = Yomu.new params[:data]
|
45
|
+
yomu = Yomu.new params[:data][:tempfile]
|
63
46
|
yomu.text
|
64
47
|
end
|
65
48
|
|
49
|
+
### Reading metadata
|
50
|
+
|
51
|
+
Metadata is returned as a hash.
|
52
|
+
|
53
|
+
yomu = Yomu.new 'sample.pages'
|
54
|
+
yomu.metadata['Content-Type'] #=> "application/vnd.apple.pages"
|
55
|
+
|
56
|
+
## Installation and Dependencies
|
57
|
+
|
58
|
+
Add this line to your application's Gemfile:
|
59
|
+
|
60
|
+
gem 'yomu'
|
61
|
+
|
62
|
+
And then execute:
|
63
|
+
|
64
|
+
$ bundle
|
65
|
+
|
66
|
+
Or install it yourself as:
|
67
|
+
|
68
|
+
$ gem install yomu
|
69
|
+
|
70
|
+
**Yomu packages the Apache Tika application jar and requires a working JRE for it to work.**
|
71
|
+
|
66
72
|
## Contributing
|
67
73
|
|
68
74
|
1. Fork it
|
data/Rakefile
CHANGED
data/lib/yomu.rb
CHANGED
@@ -12,7 +12,7 @@ class Yomu
|
|
12
12
|
# data = File.read 'sample.pages'
|
13
13
|
# text = Yomu.read :text, data
|
14
14
|
# metadata = Yomu.read :metadata, data
|
15
|
-
|
15
|
+
#
|
16
16
|
def self.read(type, data)
|
17
17
|
switch = case type
|
18
18
|
when :text
|
@@ -43,7 +43,7 @@ class Yomu
|
|
43
43
|
# From a stream or an object which responds to +read+
|
44
44
|
#
|
45
45
|
# Yomu.new File.open('sample.pages')
|
46
|
-
|
46
|
+
#
|
47
47
|
def initialize(input)
|
48
48
|
if input.is_a? String
|
49
49
|
if input =~ URI::regexp
|
@@ -64,7 +64,7 @@ class Yomu
|
|
64
64
|
#
|
65
65
|
# yomu = Yomu.new 'sample.pages'
|
66
66
|
# yomu.text
|
67
|
-
|
67
|
+
#
|
68
68
|
def text
|
69
69
|
return @text if defined? @text
|
70
70
|
|
@@ -75,7 +75,7 @@ class Yomu
|
|
75
75
|
#
|
76
76
|
# yomu = Yomu.new 'sample.pages'
|
77
77
|
# yomu.metadata['Content-Type']
|
78
|
-
|
78
|
+
#
|
79
79
|
def metadata
|
80
80
|
return @metadata if defined? @metadata
|
81
81
|
|
@@ -86,7 +86,7 @@ class Yomu
|
|
86
86
|
#
|
87
87
|
# yomu = Yomu.new 'sample.pages'
|
88
88
|
# yomu.path? #=> true
|
89
|
-
|
89
|
+
#
|
90
90
|
def path?
|
91
91
|
defined? @path
|
92
92
|
end
|
@@ -95,7 +95,7 @@ class Yomu
|
|
95
95
|
#
|
96
96
|
# yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
97
97
|
# yomu.uri? #=> true
|
98
|
-
|
98
|
+
#
|
99
99
|
def uri?
|
100
100
|
defined? @uri
|
101
101
|
end
|
@@ -105,7 +105,7 @@ class Yomu
|
|
105
105
|
# file = File.open('sample.pages')
|
106
106
|
# yomu = Yomu.new file
|
107
107
|
# yomu.stream? #=> true
|
108
|
-
|
108
|
+
#
|
109
109
|
def stream?
|
110
110
|
defined? @stream
|
111
111
|
end
|
@@ -114,7 +114,7 @@ class Yomu
|
|
114
114
|
#
|
115
115
|
# yomu = Yomu.new 'sample.pages'
|
116
116
|
# yomu.data
|
117
|
-
|
117
|
+
#
|
118
118
|
def data
|
119
119
|
return @data if defined? @data
|
120
120
|
|
data/lib/yomu/version.rb
CHANGED
data/test/helper.rb
ADDED
data/test/specs/yomu.rb
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
require_relative '../helper.rb'
|
2
|
+
|
3
|
+
describe Yomu do
|
4
|
+
let(:data) { File.read 'test/samples/sample.pages' }
|
5
|
+
|
6
|
+
describe '.read' do
|
7
|
+
it 'reads text' do
|
8
|
+
text = Yomu.read :text, data
|
9
|
+
|
10
|
+
assert_includes text, 'The quick brown fox jumped over the lazy cat.'
|
11
|
+
end
|
12
|
+
|
13
|
+
it 'reads metadata' do
|
14
|
+
metadata = Yomu.read :metadata, data
|
15
|
+
|
16
|
+
assert_equal 'application/vnd.apple.pages', metadata['Content-Type']
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '.new' do
|
21
|
+
it 'requires parameters' do
|
22
|
+
assert_raises ArgumentError do
|
23
|
+
Yomu.new
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'accepts a root path' do
|
28
|
+
assert_silent do
|
29
|
+
yomu = Yomu.new 'test/samples/sample.pages'
|
30
|
+
|
31
|
+
assert_block { yomu.path? }
|
32
|
+
assert_block { !yomu.uri? }
|
33
|
+
assert_block { !yomu.stream? }
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'accepts a relative path' do
|
38
|
+
assert_silent do
|
39
|
+
yomu = Yomu.new 'test/samples/sample.pages'
|
40
|
+
|
41
|
+
assert_block { yomu.path? }
|
42
|
+
assert_block { !yomu.uri? }
|
43
|
+
assert_block { !yomu.stream? }
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'accepts a path with spaces' do
|
48
|
+
assert_silent do
|
49
|
+
yomu = Yomu.new 'test/samples/sample filename with spaces.pages'
|
50
|
+
|
51
|
+
assert_block { yomu.path? }
|
52
|
+
assert_block { !yomu.uri? }
|
53
|
+
assert_block { !yomu.stream? }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'accepts a URI' do
|
58
|
+
assert_silent do
|
59
|
+
yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
60
|
+
|
61
|
+
assert_block { yomu.uri? }
|
62
|
+
assert_block { !yomu.path? }
|
63
|
+
assert_block { !yomu.stream? }
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'accepts a stream or object that can be read' do
|
68
|
+
assert_silent do
|
69
|
+
File.open 'test/samples/sample.pages', 'r' do |file|
|
70
|
+
yomu = Yomu.new file
|
71
|
+
|
72
|
+
assert_block { yomu.stream? }
|
73
|
+
assert_block { !yomu.path? }
|
74
|
+
assert_block { !yomu.uri? }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
it 'does not accept a path to a missing file' do
|
80
|
+
assert_raises Errno::ENOENT do
|
81
|
+
Yomu.new 'test/sample/missing.pages'
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'does not accept other objects' do
|
86
|
+
[nil, 1, 1.1].each do |object|
|
87
|
+
assert_raises TypeError do
|
88
|
+
Yomu.new object
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
describe 'initialized with a given path' do
|
95
|
+
let(:yomu) { Yomu.new 'test/samples/sample.pages' }
|
96
|
+
|
97
|
+
describe '#text' do
|
98
|
+
it 'reads text' do
|
99
|
+
assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
describe '#metadata' do
|
104
|
+
it 'reads metadata' do
|
105
|
+
assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
describe 'initialized with a given URI' do
|
111
|
+
let(:yomu) { Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx' }
|
112
|
+
|
113
|
+
describe '#text' do
|
114
|
+
it 'reads text' do
|
115
|
+
assert_includes yomu.text, 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
describe '#metadata' do
|
120
|
+
it 'reads metadata' do
|
121
|
+
assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', yomu.metadata['Content-Type']
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
describe 'initialized with a given stream' do
|
127
|
+
let(:yomu) { Yomu.new File.open('test/samples/sample.pages', 'rb') }
|
128
|
+
|
129
|
+
describe '#text' do
|
130
|
+
it 'reads text' do
|
131
|
+
assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
describe '#metadata' do
|
136
|
+
it 'reads metadata' do
|
137
|
+
assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
data/yomu.gemspec
CHANGED
@@ -4,9 +4,9 @@ require File.expand_path('../lib/yomu/version', __FILE__)
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
5
|
gem.authors = ["Erol Fornoles"]
|
6
6
|
gem.email = ["erol.fornoles@gmail.com"]
|
7
|
-
gem.description = %q{
|
8
|
-
gem.summary = %q{
|
9
|
-
gem.homepage = "http://github.com/
|
7
|
+
gem.description = %q{Read text and metadata from files and documents (.doc, .docx, .pages, .odt, .rtf, .pdf)}
|
8
|
+
gem.summary = %q{Read text and metadata from files and documents (.doc, .docx, .pages, .odt, .rtf, .pdf)}
|
9
|
+
gem.homepage = "http://erol.github.com/yomu"
|
10
10
|
|
11
11
|
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
12
12
|
gem.files = `git ls-files`.split("\n")
|
@@ -14,4 +14,4 @@ Gem::Specification.new do |gem|
|
|
14
14
|
gem.name = "yomu"
|
15
15
|
gem.require_paths = ["lib"]
|
16
16
|
gem.version = Yomu::VERSION
|
17
|
-
end
|
17
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yomu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,10 +9,10 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-10-22 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
|
-
description:
|
15
|
-
|
14
|
+
description: Read text and metadata from files and documents (.doc, .docx, .pages,
|
15
|
+
.odt, .rtf, .pdf)
|
16
16
|
email:
|
17
17
|
- erol.fornoles@gmail.com
|
18
18
|
executables: []
|
@@ -28,12 +28,12 @@ files:
|
|
28
28
|
- jar/tika-app-1.2.jar
|
29
29
|
- lib/yomu.rb
|
30
30
|
- lib/yomu/version.rb
|
31
|
+
- test/helper.rb
|
31
32
|
- test/samples/sample filename with spaces.pages
|
32
33
|
- test/samples/sample.pages
|
33
|
-
- test/
|
34
|
-
- test/yomu_test.rb
|
34
|
+
- test/specs/yomu.rb
|
35
35
|
- yomu.gemspec
|
36
|
-
homepage: http://github.com/
|
36
|
+
homepage: http://erol.github.com/yomu
|
37
37
|
licenses: []
|
38
38
|
post_install_message:
|
39
39
|
rdoc_options: []
|
@@ -56,10 +56,10 @@ rubyforge_project:
|
|
56
56
|
rubygems_version: 1.8.24
|
57
57
|
signing_key:
|
58
58
|
specification_version: 3
|
59
|
-
summary:
|
60
|
-
|
59
|
+
summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
|
60
|
+
.rtf, .pdf)
|
61
61
|
test_files:
|
62
|
+
- test/helper.rb
|
62
63
|
- test/samples/sample filename with spaces.pages
|
63
64
|
- test/samples/sample.pages
|
64
|
-
- test/
|
65
|
-
- test/yomu_test.rb
|
65
|
+
- test/specs/yomu.rb
|
data/test/test_helper.rb
DELETED
data/test/yomu_test.rb
DELETED
@@ -1,131 +0,0 @@
|
|
1
|
-
require_relative 'test_helper.rb'
|
2
|
-
|
3
|
-
require 'yomu.rb'
|
4
|
-
|
5
|
-
class YomuTest < MiniTest::Unit::TestCase
|
6
|
-
def test_yomu_can_read_text
|
7
|
-
data = File.read 'test/samples/sample.pages'
|
8
|
-
text = Yomu.read :text, data
|
9
|
-
|
10
|
-
assert_includes text, 'The quick brown fox jumped over the lazy cat.'
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_yomu_can_read_metadata
|
14
|
-
data = File.read 'test/samples/sample.pages'
|
15
|
-
metadata = Yomu.read :metadata, data
|
16
|
-
|
17
|
-
assert_equal 'application/vnd.apple.pages', metadata['Content-Type']
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_yomu_cannot_be_initialized_without_parameters
|
21
|
-
assert_raises ArgumentError do
|
22
|
-
Yomu.new
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_yomu_can_be_initialized_with_a_root_path
|
27
|
-
assert_silent do
|
28
|
-
yomu = Yomu.new File.join(File.dirname(__FILE__), 'samples/sample.pages')
|
29
|
-
|
30
|
-
assert_block { yomu.path? }
|
31
|
-
assert_block { !yomu.uri? }
|
32
|
-
assert_block { !yomu.stream? }
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
def test_yomu_can_be_initialized_with_a_relative_path
|
37
|
-
assert_silent do
|
38
|
-
yomu = Yomu.new 'test/samples/sample.pages'
|
39
|
-
|
40
|
-
assert_block { yomu.path? }
|
41
|
-
assert_block { !yomu.uri? }
|
42
|
-
assert_block { !yomu.stream? }
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def test_yomu_can_be_initialized_with_a_path_with_spaces
|
47
|
-
assert_silent do
|
48
|
-
yomu = Yomu.new 'test/samples/sample filename with spaces.pages'
|
49
|
-
|
50
|
-
assert_block { yomu.path? }
|
51
|
-
assert_block { !yomu.uri? }
|
52
|
-
assert_block { !yomu.stream? }
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def test_yomu_can_be_initialized_with_a_uri
|
57
|
-
assert_silent do
|
58
|
-
yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
59
|
-
|
60
|
-
assert_block { yomu.uri? }
|
61
|
-
assert_block { !yomu.path? }
|
62
|
-
assert_block { !yomu.stream? }
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def test_yomu_can_be_initialized_with_a_stream_or_object_that_can_be_read
|
67
|
-
assert_silent do
|
68
|
-
File.open 'test/samples/sample.pages', 'r' do |file|
|
69
|
-
yomu = Yomu.new file
|
70
|
-
|
71
|
-
assert_block { yomu.stream? }
|
72
|
-
assert_block { !yomu.path? }
|
73
|
-
assert_block { !yomu.uri? }
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def test_yomu_cannot_be_initialized_with_a_path_to_a_missing_file
|
79
|
-
assert_raises Errno::ENOENT do
|
80
|
-
Yomu.new 'test/sample/missing.pages'
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
def test_yomu_cannot_be_initialized_with_other_objects
|
85
|
-
[nil, 1, 1.1].each do |object|
|
86
|
-
assert_raises TypeError do
|
87
|
-
Yomu.new object
|
88
|
-
end
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
def test_yomu_initialized_with_a_path_can_read_text
|
93
|
-
yomu = Yomu.new 'test/samples/sample.pages'
|
94
|
-
|
95
|
-
assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
|
96
|
-
end
|
97
|
-
|
98
|
-
def test_yomu_initialized_with_a_path_can_read_metadata
|
99
|
-
yomu = Yomu.new 'test/samples/sample.pages'
|
100
|
-
|
101
|
-
assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
|
102
|
-
end
|
103
|
-
|
104
|
-
def test_yomu_initialized_with_a_url_can_read_text
|
105
|
-
yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
106
|
-
|
107
|
-
assert_includes yomu.text, 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'
|
108
|
-
end
|
109
|
-
|
110
|
-
def test_yomu_initialized_with_a_url_can_read_metadata
|
111
|
-
yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
112
|
-
|
113
|
-
assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', yomu.metadata['Content-Type']
|
114
|
-
end
|
115
|
-
|
116
|
-
def test_yomu_initialized_with_a_stream_can_read_text
|
117
|
-
File.open 'test/samples/sample.pages', 'rb' do |file|
|
118
|
-
yomu = Yomu.new file
|
119
|
-
|
120
|
-
assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
|
-
def test_yomu_initialized_with_a_stream_can_read_metadata
|
125
|
-
File.open 'test/samples/sample.pages', 'rb' do |file|
|
126
|
-
yomu = Yomu.new file
|
127
|
-
|
128
|
-
assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
|
129
|
-
end
|
130
|
-
end
|
131
|
-
end
|