yomu 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -13,36 +13,19 @@ Here are some of the formats supported:
13
13
  For the complete list of supported formats, please visit the Apache Tika
14
14
  [Supported Document Formats](http://tika.apache.org/0.9/formats.html) page.
15
15
 
16
- ## Installation and Dependencies
17
-
18
- Add this line to your application's Gemfile:
19
-
20
- gem 'yomu'
21
-
22
- And then execute:
23
-
24
- $ bundle
25
-
26
- Or install it yourself as:
27
-
28
- $ gem install yomu
29
-
30
- **Yomu packages the Apache Tika application jar and requires a working JRE for it to work.**
31
-
32
16
  ## Usage
33
17
 
34
- If you're not using Bundler, you will need to require Yomu in your application:
18
+ Text and metadata can be extracted by calling `Yomu.read` directly:
35
19
 
36
20
  require 'yomu'
37
21
 
38
- You can extract text by calling `Yomu.read` directly:
39
-
40
22
  data = File.read 'sample.pages'
41
23
  text = Yomu.read :text, data
24
+ metadata = Yomu.read :metadata, data
42
25
 
43
26
  ### Reading text from a given filename
44
27
 
45
- You can also make a new instance of Yomu and pass a filename.
28
+ Create a new instance of Yomu and pass a filename.
46
29
 
47
30
  yomu = Yomu.new 'sample.pages'
48
31
  text = yomu.text
@@ -56,13 +39,36 @@ This is useful for reading remote files, like documents hosted on Amazon S3.
56
39
 
57
40
  ### Reading text from a stream
58
41
 
59
- Yomu can also read from a stream or any object that responds to `read`, including Ruby on Rails' and Sinatra's file uploads:
42
+ Yomu can also read from a stream or any object that responds to `read`, including file uploads from Ruby on Rails or Sinatra.
60
43
 
61
44
  post '/:name/:filename' do
62
- yomu = Yomu.new params[:data]
45
+ yomu = Yomu.new params[:data][:tempfile]
63
46
  yomu.text
64
47
  end
65
48
 
49
+ ### Reading metadata
50
+
51
+ Metadata is returned as a hash.
52
+
53
+ yomu = Yomu.new 'sample.pages'
54
+ yomu.metadata['Content-Type'] #=> "application/vnd.apple.pages"
55
+
56
+ ## Installation and Dependencies
57
+
58
+ Add this line to your application's Gemfile:
59
+
60
+ gem 'yomu'
61
+
62
+ And then execute:
63
+
64
+ $ bundle
65
+
66
+ Or install it yourself as:
67
+
68
+ $ gem install yomu
69
+
70
+ **Yomu packages the Apache Tika application jar and requires a working JRE for it to work.**
71
+
66
72
  ## Contributing
67
73
 
68
74
  1. Fork it
data/Rakefile CHANGED
@@ -5,6 +5,6 @@ require 'rake/testtask'
5
5
 
6
6
  Rake::TestTask.new do |t|
7
7
  t.libs << 'test'
8
- t.test_files = FileList['test/*test.rb']
8
+ t.test_files = FileList['test/specs/*.rb']
9
9
  t.verbose = true
10
- end
10
+ end
data/lib/yomu.rb CHANGED
@@ -12,7 +12,7 @@ class Yomu
12
12
  # data = File.read 'sample.pages'
13
13
  # text = Yomu.read :text, data
14
14
  # metadata = Yomu.read :metadata, data
15
-
15
+ #
16
16
  def self.read(type, data)
17
17
  switch = case type
18
18
  when :text
@@ -43,7 +43,7 @@ class Yomu
43
43
  # From a stream or an object which responds to +read+
44
44
  #
45
45
  # Yomu.new File.open('sample.pages')
46
-
46
+ #
47
47
  def initialize(input)
48
48
  if input.is_a? String
49
49
  if input =~ URI::regexp
@@ -64,7 +64,7 @@ class Yomu
64
64
  #
65
65
  # yomu = Yomu.new 'sample.pages'
66
66
  # yomu.text
67
-
67
+ #
68
68
  def text
69
69
  return @text if defined? @text
70
70
 
@@ -75,7 +75,7 @@ class Yomu
75
75
  #
76
76
  # yomu = Yomu.new 'sample.pages'
77
77
  # yomu.metadata['Content-Type']
78
-
78
+ #
79
79
  def metadata
80
80
  return @metadata if defined? @metadata
81
81
 
@@ -86,7 +86,7 @@ class Yomu
86
86
  #
87
87
  # yomu = Yomu.new 'sample.pages'
88
88
  # yomu.path? #=> true
89
-
89
+ #
90
90
  def path?
91
91
  defined? @path
92
92
  end
@@ -95,7 +95,7 @@ class Yomu
95
95
  #
96
96
  # yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
97
97
  # yomu.uri? #=> true
98
-
98
+ #
99
99
  def uri?
100
100
  defined? @uri
101
101
  end
@@ -105,7 +105,7 @@ class Yomu
105
105
  # file = File.open('sample.pages')
106
106
  # yomu = Yomu.new file
107
107
  # yomu.stream? #=> true
108
-
108
+ #
109
109
  def stream?
110
110
  defined? @stream
111
111
  end
@@ -114,7 +114,7 @@ class Yomu
114
114
  #
115
115
  # yomu = Yomu.new 'sample.pages'
116
116
  # yomu.data
117
-
117
+ #
118
118
  def data
119
119
  return @data if defined? @data
120
120
 
data/lib/yomu/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class Yomu
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
data/test/helper.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'bundler/setup'
2
+ require 'yomu'
3
+ require 'minitest/autorun'
@@ -0,0 +1,141 @@
1
+ require_relative '../helper.rb'
2
+
3
+ describe Yomu do
4
+ let(:data) { File.read 'test/samples/sample.pages' }
5
+
6
+ describe '.read' do
7
+ it 'reads text' do
8
+ text = Yomu.read :text, data
9
+
10
+ assert_includes text, 'The quick brown fox jumped over the lazy cat.'
11
+ end
12
+
13
+ it 'reads metadata' do
14
+ metadata = Yomu.read :metadata, data
15
+
16
+ assert_equal 'application/vnd.apple.pages', metadata['Content-Type']
17
+ end
18
+ end
19
+
20
+ describe '.new' do
21
+ it 'requires parameters' do
22
+ assert_raises ArgumentError do
23
+ Yomu.new
24
+ end
25
+ end
26
+
27
+ it 'accepts a root path' do
28
+ assert_silent do
29
+ yomu = Yomu.new 'test/samples/sample.pages'
30
+
31
+ assert_block { yomu.path? }
32
+ assert_block { !yomu.uri? }
33
+ assert_block { !yomu.stream? }
34
+ end
35
+ end
36
+
37
+ it 'accepts a relative path' do
38
+ assert_silent do
39
+ yomu = Yomu.new 'test/samples/sample.pages'
40
+
41
+ assert_block { yomu.path? }
42
+ assert_block { !yomu.uri? }
43
+ assert_block { !yomu.stream? }
44
+ end
45
+ end
46
+
47
+ it 'accepts a path with spaces' do
48
+ assert_silent do
49
+ yomu = Yomu.new 'test/samples/sample filename with spaces.pages'
50
+
51
+ assert_block { yomu.path? }
52
+ assert_block { !yomu.uri? }
53
+ assert_block { !yomu.stream? }
54
+ end
55
+ end
56
+
57
+ it 'accepts a URI' do
58
+ assert_silent do
59
+ yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
60
+
61
+ assert_block { yomu.uri? }
62
+ assert_block { !yomu.path? }
63
+ assert_block { !yomu.stream? }
64
+ end
65
+ end
66
+
67
+ it 'accepts a stream or object that can be read' do
68
+ assert_silent do
69
+ File.open 'test/samples/sample.pages', 'r' do |file|
70
+ yomu = Yomu.new file
71
+
72
+ assert_block { yomu.stream? }
73
+ assert_block { !yomu.path? }
74
+ assert_block { !yomu.uri? }
75
+ end
76
+ end
77
+ end
78
+
79
+ it 'does not accept a path to a missing file' do
80
+ assert_raises Errno::ENOENT do
81
+ Yomu.new 'test/sample/missing.pages'
82
+ end
83
+ end
84
+
85
+ it 'does not accept other objects' do
86
+ [nil, 1, 1.1].each do |object|
87
+ assert_raises TypeError do
88
+ Yomu.new object
89
+ end
90
+ end
91
+ end
92
+ end
93
+
94
+ describe 'initialized with a given path' do
95
+ let(:yomu) { Yomu.new 'test/samples/sample.pages' }
96
+
97
+ describe '#text' do
98
+ it 'reads text' do
99
+ assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
100
+ end
101
+ end
102
+
103
+ describe '#metadata' do
104
+ it 'reads metadata' do
105
+ assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
106
+ end
107
+ end
108
+ end
109
+
110
+ describe 'initialized with a given URI' do
111
+ let(:yomu) { Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx' }
112
+
113
+ describe '#text' do
114
+ it 'reads text' do
115
+ assert_includes yomu.text, 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'
116
+ end
117
+ end
118
+
119
+ describe '#metadata' do
120
+ it 'reads metadata' do
121
+ assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', yomu.metadata['Content-Type']
122
+ end
123
+ end
124
+ end
125
+
126
+ describe 'initialized with a given stream' do
127
+ let(:yomu) { Yomu.new File.open('test/samples/sample.pages', 'rb') }
128
+
129
+ describe '#text' do
130
+ it 'reads text' do
131
+ assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
132
+ end
133
+ end
134
+
135
+ describe '#metadata' do
136
+ it 'reads metadata' do
137
+ assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
138
+ end
139
+ end
140
+ end
141
+ end
data/yomu.gemspec CHANGED
@@ -4,9 +4,9 @@ require File.expand_path('../lib/yomu/version', __FILE__)
4
4
  Gem::Specification.new do |gem|
5
5
  gem.authors = ["Erol Fornoles"]
6
6
  gem.email = ["erol.fornoles@gmail.com"]
7
- gem.description = %q{Yomu is a library for extracting text and metadata using the Apache TIKA content analysis toolkit.}
8
- gem.summary = %q{Yomu is a library for extracting text and metadata using the Apache TIKA content analysis toolkit.}
9
- gem.homepage = "http://github.com/Erol/yomu"
7
+ gem.description = %q{Read text and metadata from files and documents (.doc, .docx, .pages, .odt, .rtf, .pdf)}
8
+ gem.summary = %q{Read text and metadata from files and documents (.doc, .docx, .pages, .odt, .rtf, .pdf)}
9
+ gem.homepage = "http://erol.github.com/yomu"
10
10
 
11
11
  gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
12
  gem.files = `git ls-files`.split("\n")
@@ -14,4 +14,4 @@ Gem::Specification.new do |gem|
14
14
  gem.name = "yomu"
15
15
  gem.require_paths = ["lib"]
16
16
  gem.version = Yomu::VERSION
17
- end
17
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yomu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,10 +9,10 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-09-09 00:00:00.000000000 Z
12
+ date: 2012-10-22 00:00:00.000000000 Z
13
13
  dependencies: []
14
- description: Yomu is a library for extracting text and metadata using the Apache TIKA
15
- content analysis toolkit.
14
+ description: Read text and metadata from files and documents (.doc, .docx, .pages,
15
+ .odt, .rtf, .pdf)
16
16
  email:
17
17
  - erol.fornoles@gmail.com
18
18
  executables: []
@@ -28,12 +28,12 @@ files:
28
28
  - jar/tika-app-1.2.jar
29
29
  - lib/yomu.rb
30
30
  - lib/yomu/version.rb
31
+ - test/helper.rb
31
32
  - test/samples/sample filename with spaces.pages
32
33
  - test/samples/sample.pages
33
- - test/test_helper.rb
34
- - test/yomu_test.rb
34
+ - test/specs/yomu.rb
35
35
  - yomu.gemspec
36
- homepage: http://github.com/Erol/yomu
36
+ homepage: http://erol.github.com/yomu
37
37
  licenses: []
38
38
  post_install_message:
39
39
  rdoc_options: []
@@ -56,10 +56,10 @@ rubyforge_project:
56
56
  rubygems_version: 1.8.24
57
57
  signing_key:
58
58
  specification_version: 3
59
- summary: Yomu is a library for extracting text and metadata using the Apache TIKA
60
- content analysis toolkit.
59
+ summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
60
+ .rtf, .pdf)
61
61
  test_files:
62
+ - test/helper.rb
62
63
  - test/samples/sample filename with spaces.pages
63
64
  - test/samples/sample.pages
64
- - test/test_helper.rb
65
- - test/yomu_test.rb
65
+ - test/specs/yomu.rb
data/test/test_helper.rb DELETED
@@ -1,2 +0,0 @@
1
- require 'bundler/setup'
2
- require 'minitest/autorun'
data/test/yomu_test.rb DELETED
@@ -1,131 +0,0 @@
1
- require_relative 'test_helper.rb'
2
-
3
- require 'yomu.rb'
4
-
5
- class YomuTest < MiniTest::Unit::TestCase
6
- def test_yomu_can_read_text
7
- data = File.read 'test/samples/sample.pages'
8
- text = Yomu.read :text, data
9
-
10
- assert_includes text, 'The quick brown fox jumped over the lazy cat.'
11
- end
12
-
13
- def test_yomu_can_read_metadata
14
- data = File.read 'test/samples/sample.pages'
15
- metadata = Yomu.read :metadata, data
16
-
17
- assert_equal 'application/vnd.apple.pages', metadata['Content-Type']
18
- end
19
-
20
- def test_yomu_cannot_be_initialized_without_parameters
21
- assert_raises ArgumentError do
22
- Yomu.new
23
- end
24
- end
25
-
26
- def test_yomu_can_be_initialized_with_a_root_path
27
- assert_silent do
28
- yomu = Yomu.new File.join(File.dirname(__FILE__), 'samples/sample.pages')
29
-
30
- assert_block { yomu.path? }
31
- assert_block { !yomu.uri? }
32
- assert_block { !yomu.stream? }
33
- end
34
- end
35
-
36
- def test_yomu_can_be_initialized_with_a_relative_path
37
- assert_silent do
38
- yomu = Yomu.new 'test/samples/sample.pages'
39
-
40
- assert_block { yomu.path? }
41
- assert_block { !yomu.uri? }
42
- assert_block { !yomu.stream? }
43
- end
44
- end
45
-
46
- def test_yomu_can_be_initialized_with_a_path_with_spaces
47
- assert_silent do
48
- yomu = Yomu.new 'test/samples/sample filename with spaces.pages'
49
-
50
- assert_block { yomu.path? }
51
- assert_block { !yomu.uri? }
52
- assert_block { !yomu.stream? }
53
- end
54
- end
55
-
56
- def test_yomu_can_be_initialized_with_a_uri
57
- assert_silent do
58
- yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
59
-
60
- assert_block { yomu.uri? }
61
- assert_block { !yomu.path? }
62
- assert_block { !yomu.stream? }
63
- end
64
- end
65
-
66
- def test_yomu_can_be_initialized_with_a_stream_or_object_that_can_be_read
67
- assert_silent do
68
- File.open 'test/samples/sample.pages', 'r' do |file|
69
- yomu = Yomu.new file
70
-
71
- assert_block { yomu.stream? }
72
- assert_block { !yomu.path? }
73
- assert_block { !yomu.uri? }
74
- end
75
- end
76
- end
77
-
78
- def test_yomu_cannot_be_initialized_with_a_path_to_a_missing_file
79
- assert_raises Errno::ENOENT do
80
- Yomu.new 'test/sample/missing.pages'
81
- end
82
- end
83
-
84
- def test_yomu_cannot_be_initialized_with_other_objects
85
- [nil, 1, 1.1].each do |object|
86
- assert_raises TypeError do
87
- Yomu.new object
88
- end
89
- end
90
- end
91
-
92
- def test_yomu_initialized_with_a_path_can_read_text
93
- yomu = Yomu.new 'test/samples/sample.pages'
94
-
95
- assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
96
- end
97
-
98
- def test_yomu_initialized_with_a_path_can_read_metadata
99
- yomu = Yomu.new 'test/samples/sample.pages'
100
-
101
- assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
102
- end
103
-
104
- def test_yomu_initialized_with_a_url_can_read_text
105
- yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
106
-
107
- assert_includes yomu.text, 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'
108
- end
109
-
110
- def test_yomu_initialized_with_a_url_can_read_metadata
111
- yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
112
-
113
- assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', yomu.metadata['Content-Type']
114
- end
115
-
116
- def test_yomu_initialized_with_a_stream_can_read_text
117
- File.open 'test/samples/sample.pages', 'rb' do |file|
118
- yomu = Yomu.new file
119
-
120
- assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
121
- end
122
- end
123
-
124
- def test_yomu_initialized_with_a_stream_can_read_metadata
125
- File.open 'test/samples/sample.pages', 'rb' do |file|
126
- yomu = Yomu.new file
127
-
128
- assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
129
- end
130
- end
131
- end