yomu 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -13,36 +13,19 @@ Here are some of the formats supported:
13
13
  For the complete list of supported formats, please visit the Apache Tika
14
14
  [Supported Document Formats](http://tika.apache.org/0.9/formats.html) page.
15
15
 
16
- ## Installation and Dependencies
17
-
18
- Add this line to your application's Gemfile:
19
-
20
- gem 'yomu'
21
-
22
- And then execute:
23
-
24
- $ bundle
25
-
26
- Or install it yourself as:
27
-
28
- $ gem install yomu
29
-
30
- **Yomu packages the Apache Tika application jar and requires a working JRE for it to work.**
31
-
32
16
  ## Usage
33
17
 
34
- If you're not using Bundler, you will need to require Yomu in your application:
18
+ Text and metadata can be extracted by calling `Yomu.read` directly:
35
19
 
36
20
  require 'yomu'
37
21
 
38
- You can extract text by calling `Yomu.read` directly:
39
-
40
22
  data = File.read 'sample.pages'
41
23
  text = Yomu.read :text, data
24
+ metadata = Yomu.read :metadata, data
42
25
 
43
26
  ### Reading text from a given filename
44
27
 
45
- You can also make a new instance of Yomu and pass a filename.
28
+ Create a new instance of Yomu and pass a filename.
46
29
 
47
30
  yomu = Yomu.new 'sample.pages'
48
31
  text = yomu.text
@@ -56,13 +39,36 @@ This is useful for reading remote files, like documents hosted on Amazon S3.
56
39
 
57
40
  ### Reading text from a stream
58
41
 
59
- Yomu can also read from a stream or any object that responds to `read`, including Ruby on Rails' and Sinatra's file uploads:
42
+ Yomu can also read from a stream or any object that responds to `read`, including file uploads from Ruby on Rails or Sinatra.
60
43
 
61
44
  post '/:name/:filename' do
62
- yomu = Yomu.new params[:data]
45
+ yomu = Yomu.new params[:data][:tempfile]
63
46
  yomu.text
64
47
  end
65
48
 
49
+ ### Reading metadata
50
+
51
+ Metadata is returned as a hash.
52
+
53
+ yomu = Yomu.new 'sample.pages'
54
+ yomu.metadata['Content-Type'] #=> "application/vnd.apple.pages"
55
+
56
+ ## Installation and Dependencies
57
+
58
+ Add this line to your application's Gemfile:
59
+
60
+ gem 'yomu'
61
+
62
+ And then execute:
63
+
64
+ $ bundle
65
+
66
+ Or install it yourself as:
67
+
68
+ $ gem install yomu
69
+
70
+ **Yomu packages the Apache Tika application jar and requires a working JRE for it to work.**
71
+
66
72
  ## Contributing
67
73
 
68
74
  1. Fork it
data/Rakefile CHANGED
@@ -5,6 +5,6 @@ require 'rake/testtask'
5
5
 
6
6
  Rake::TestTask.new do |t|
7
7
  t.libs << 'test'
8
- t.test_files = FileList['test/*test.rb']
8
+ t.test_files = FileList['test/specs/*.rb']
9
9
  t.verbose = true
10
- end
10
+ end
data/lib/yomu.rb CHANGED
@@ -12,7 +12,7 @@ class Yomu
12
12
  # data = File.read 'sample.pages'
13
13
  # text = Yomu.read :text, data
14
14
  # metadata = Yomu.read :metadata, data
15
-
15
+ #
16
16
  def self.read(type, data)
17
17
  switch = case type
18
18
  when :text
@@ -43,7 +43,7 @@ class Yomu
43
43
  # From a stream or an object which responds to +read+
44
44
  #
45
45
  # Yomu.new File.open('sample.pages')
46
-
46
+ #
47
47
  def initialize(input)
48
48
  if input.is_a? String
49
49
  if input =~ URI::regexp
@@ -64,7 +64,7 @@ class Yomu
64
64
  #
65
65
  # yomu = Yomu.new 'sample.pages'
66
66
  # yomu.text
67
-
67
+ #
68
68
  def text
69
69
  return @text if defined? @text
70
70
 
@@ -75,7 +75,7 @@ class Yomu
75
75
  #
76
76
  # yomu = Yomu.new 'sample.pages'
77
77
  # yomu.metadata['Content-Type']
78
-
78
+ #
79
79
  def metadata
80
80
  return @metadata if defined? @metadata
81
81
 
@@ -86,7 +86,7 @@ class Yomu
86
86
  #
87
87
  # yomu = Yomu.new 'sample.pages'
88
88
  # yomu.path? #=> true
89
-
89
+ #
90
90
  def path?
91
91
  defined? @path
92
92
  end
@@ -95,7 +95,7 @@ class Yomu
95
95
  #
96
96
  # yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
97
97
  # yomu.uri? #=> true
98
-
98
+ #
99
99
  def uri?
100
100
  defined? @uri
101
101
  end
@@ -105,7 +105,7 @@ class Yomu
105
105
  # file = File.open('sample.pages')
106
106
  # yomu = Yomu.new file
107
107
  # yomu.stream? #=> true
108
-
108
+ #
109
109
  def stream?
110
110
  defined? @stream
111
111
  end
@@ -114,7 +114,7 @@ class Yomu
114
114
  #
115
115
  # yomu = Yomu.new 'sample.pages'
116
116
  # yomu.data
117
-
117
+ #
118
118
  def data
119
119
  return @data if defined? @data
120
120
 
data/lib/yomu/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class Yomu
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3"
3
3
  end
data/test/helper.rb ADDED
@@ -0,0 +1,3 @@
1
+ require 'bundler/setup'
2
+ require 'yomu'
3
+ require 'minitest/autorun'
@@ -0,0 +1,141 @@
1
+ require_relative '../helper.rb'
2
+
3
+ describe Yomu do
4
+ let(:data) { File.read 'test/samples/sample.pages' }
5
+
6
+ describe '.read' do
7
+ it 'reads text' do
8
+ text = Yomu.read :text, data
9
+
10
+ assert_includes text, 'The quick brown fox jumped over the lazy cat.'
11
+ end
12
+
13
+ it 'reads metadata' do
14
+ metadata = Yomu.read :metadata, data
15
+
16
+ assert_equal 'application/vnd.apple.pages', metadata['Content-Type']
17
+ end
18
+ end
19
+
20
+ describe '.new' do
21
+ it 'requires parameters' do
22
+ assert_raises ArgumentError do
23
+ Yomu.new
24
+ end
25
+ end
26
+
27
+ it 'accepts a root path' do
28
+ assert_silent do
29
+ yomu = Yomu.new 'test/samples/sample.pages'
30
+
31
+ assert_block { yomu.path? }
32
+ assert_block { !yomu.uri? }
33
+ assert_block { !yomu.stream? }
34
+ end
35
+ end
36
+
37
+ it 'accepts a relative path' do
38
+ assert_silent do
39
+ yomu = Yomu.new 'test/samples/sample.pages'
40
+
41
+ assert_block { yomu.path? }
42
+ assert_block { !yomu.uri? }
43
+ assert_block { !yomu.stream? }
44
+ end
45
+ end
46
+
47
+ it 'accepts a path with spaces' do
48
+ assert_silent do
49
+ yomu = Yomu.new 'test/samples/sample filename with spaces.pages'
50
+
51
+ assert_block { yomu.path? }
52
+ assert_block { !yomu.uri? }
53
+ assert_block { !yomu.stream? }
54
+ end
55
+ end
56
+
57
+ it 'accepts a URI' do
58
+ assert_silent do
59
+ yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
60
+
61
+ assert_block { yomu.uri? }
62
+ assert_block { !yomu.path? }
63
+ assert_block { !yomu.stream? }
64
+ end
65
+ end
66
+
67
+ it 'accepts a stream or object that can be read' do
68
+ assert_silent do
69
+ File.open 'test/samples/sample.pages', 'r' do |file|
70
+ yomu = Yomu.new file
71
+
72
+ assert_block { yomu.stream? }
73
+ assert_block { !yomu.path? }
74
+ assert_block { !yomu.uri? }
75
+ end
76
+ end
77
+ end
78
+
79
+ it 'does not accept a path to a missing file' do
80
+ assert_raises Errno::ENOENT do
81
+ Yomu.new 'test/sample/missing.pages'
82
+ end
83
+ end
84
+
85
+ it 'does not accept other objects' do
86
+ [nil, 1, 1.1].each do |object|
87
+ assert_raises TypeError do
88
+ Yomu.new object
89
+ end
90
+ end
91
+ end
92
+ end
93
+
94
+ describe 'initialized with a given path' do
95
+ let(:yomu) { Yomu.new 'test/samples/sample.pages' }
96
+
97
+ describe '#text' do
98
+ it 'reads text' do
99
+ assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
100
+ end
101
+ end
102
+
103
+ describe '#metadata' do
104
+ it 'reads metadata' do
105
+ assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
106
+ end
107
+ end
108
+ end
109
+
110
+ describe 'initialized with a given URI' do
111
+ let(:yomu) { Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx' }
112
+
113
+ describe '#text' do
114
+ it 'reads text' do
115
+ assert_includes yomu.text, 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'
116
+ end
117
+ end
118
+
119
+ describe '#metadata' do
120
+ it 'reads metadata' do
121
+ assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', yomu.metadata['Content-Type']
122
+ end
123
+ end
124
+ end
125
+
126
+ describe 'initialized with a given stream' do
127
+ let(:yomu) { Yomu.new File.open('test/samples/sample.pages', 'rb') }
128
+
129
+ describe '#text' do
130
+ it 'reads text' do
131
+ assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
132
+ end
133
+ end
134
+
135
+ describe '#metadata' do
136
+ it 'reads metadata' do
137
+ assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
138
+ end
139
+ end
140
+ end
141
+ end
data/yomu.gemspec CHANGED
@@ -4,9 +4,9 @@ require File.expand_path('../lib/yomu/version', __FILE__)
4
4
  Gem::Specification.new do |gem|
5
5
  gem.authors = ["Erol Fornoles"]
6
6
  gem.email = ["erol.fornoles@gmail.com"]
7
- gem.description = %q{Yomu is a library for extracting text and metadata using the Apache TIKA content analysis toolkit.}
8
- gem.summary = %q{Yomu is a library for extracting text and metadata using the Apache TIKA content analysis toolkit.}
9
- gem.homepage = "http://github.com/Erol/yomu"
7
+ gem.description = %q{Read text and metadata from files and documents (.doc, .docx, .pages, .odt, .rtf, .pdf)}
8
+ gem.summary = %q{Read text and metadata from files and documents (.doc, .docx, .pages, .odt, .rtf, .pdf)}
9
+ gem.homepage = "http://erol.github.com/yomu"
10
10
 
11
11
  gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
12
  gem.files = `git ls-files`.split("\n")
@@ -14,4 +14,4 @@ Gem::Specification.new do |gem|
14
14
  gem.name = "yomu"
15
15
  gem.require_paths = ["lib"]
16
16
  gem.version = Yomu::VERSION
17
- end
17
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yomu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,10 +9,10 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-09-09 00:00:00.000000000 Z
12
+ date: 2012-10-22 00:00:00.000000000 Z
13
13
  dependencies: []
14
- description: Yomu is a library for extracting text and metadata using the Apache TIKA
15
- content analysis toolkit.
14
+ description: Read text and metadata from files and documents (.doc, .docx, .pages,
15
+ .odt, .rtf, .pdf)
16
16
  email:
17
17
  - erol.fornoles@gmail.com
18
18
  executables: []
@@ -28,12 +28,12 @@ files:
28
28
  - jar/tika-app-1.2.jar
29
29
  - lib/yomu.rb
30
30
  - lib/yomu/version.rb
31
+ - test/helper.rb
31
32
  - test/samples/sample filename with spaces.pages
32
33
  - test/samples/sample.pages
33
- - test/test_helper.rb
34
- - test/yomu_test.rb
34
+ - test/specs/yomu.rb
35
35
  - yomu.gemspec
36
- homepage: http://github.com/Erol/yomu
36
+ homepage: http://erol.github.com/yomu
37
37
  licenses: []
38
38
  post_install_message:
39
39
  rdoc_options: []
@@ -56,10 +56,10 @@ rubyforge_project:
56
56
  rubygems_version: 1.8.24
57
57
  signing_key:
58
58
  specification_version: 3
59
- summary: Yomu is a library for extracting text and metadata using the Apache TIKA
60
- content analysis toolkit.
59
+ summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
60
+ .rtf, .pdf)
61
61
  test_files:
62
+ - test/helper.rb
62
63
  - test/samples/sample filename with spaces.pages
63
64
  - test/samples/sample.pages
64
- - test/test_helper.rb
65
- - test/yomu_test.rb
65
+ - test/specs/yomu.rb
data/test/test_helper.rb DELETED
@@ -1,2 +0,0 @@
1
- require 'bundler/setup'
2
- require 'minitest/autorun'
data/test/yomu_test.rb DELETED
@@ -1,131 +0,0 @@
1
- require_relative 'test_helper.rb'
2
-
3
- require 'yomu.rb'
4
-
5
- class YomuTest < MiniTest::Unit::TestCase
6
- def test_yomu_can_read_text
7
- data = File.read 'test/samples/sample.pages'
8
- text = Yomu.read :text, data
9
-
10
- assert_includes text, 'The quick brown fox jumped over the lazy cat.'
11
- end
12
-
13
- def test_yomu_can_read_metadata
14
- data = File.read 'test/samples/sample.pages'
15
- metadata = Yomu.read :metadata, data
16
-
17
- assert_equal 'application/vnd.apple.pages', metadata['Content-Type']
18
- end
19
-
20
- def test_yomu_cannot_be_initialized_without_parameters
21
- assert_raises ArgumentError do
22
- Yomu.new
23
- end
24
- end
25
-
26
- def test_yomu_can_be_initialized_with_a_root_path
27
- assert_silent do
28
- yomu = Yomu.new File.join(File.dirname(__FILE__), 'samples/sample.pages')
29
-
30
- assert_block { yomu.path? }
31
- assert_block { !yomu.uri? }
32
- assert_block { !yomu.stream? }
33
- end
34
- end
35
-
36
- def test_yomu_can_be_initialized_with_a_relative_path
37
- assert_silent do
38
- yomu = Yomu.new 'test/samples/sample.pages'
39
-
40
- assert_block { yomu.path? }
41
- assert_block { !yomu.uri? }
42
- assert_block { !yomu.stream? }
43
- end
44
- end
45
-
46
- def test_yomu_can_be_initialized_with_a_path_with_spaces
47
- assert_silent do
48
- yomu = Yomu.new 'test/samples/sample filename with spaces.pages'
49
-
50
- assert_block { yomu.path? }
51
- assert_block { !yomu.uri? }
52
- assert_block { !yomu.stream? }
53
- end
54
- end
55
-
56
- def test_yomu_can_be_initialized_with_a_uri
57
- assert_silent do
58
- yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
59
-
60
- assert_block { yomu.uri? }
61
- assert_block { !yomu.path? }
62
- assert_block { !yomu.stream? }
63
- end
64
- end
65
-
66
- def test_yomu_can_be_initialized_with_a_stream_or_object_that_can_be_read
67
- assert_silent do
68
- File.open 'test/samples/sample.pages', 'r' do |file|
69
- yomu = Yomu.new file
70
-
71
- assert_block { yomu.stream? }
72
- assert_block { !yomu.path? }
73
- assert_block { !yomu.uri? }
74
- end
75
- end
76
- end
77
-
78
- def test_yomu_cannot_be_initialized_with_a_path_to_a_missing_file
79
- assert_raises Errno::ENOENT do
80
- Yomu.new 'test/sample/missing.pages'
81
- end
82
- end
83
-
84
- def test_yomu_cannot_be_initialized_with_other_objects
85
- [nil, 1, 1.1].each do |object|
86
- assert_raises TypeError do
87
- Yomu.new object
88
- end
89
- end
90
- end
91
-
92
- def test_yomu_initialized_with_a_path_can_read_text
93
- yomu = Yomu.new 'test/samples/sample.pages'
94
-
95
- assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
96
- end
97
-
98
- def test_yomu_initialized_with_a_path_can_read_metadata
99
- yomu = Yomu.new 'test/samples/sample.pages'
100
-
101
- assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
102
- end
103
-
104
- def test_yomu_initialized_with_a_url_can_read_text
105
- yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
106
-
107
- assert_includes yomu.text, 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'
108
- end
109
-
110
- def test_yomu_initialized_with_a_url_can_read_metadata
111
- yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
112
-
113
- assert_equal 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', yomu.metadata['Content-Type']
114
- end
115
-
116
- def test_yomu_initialized_with_a_stream_can_read_text
117
- File.open 'test/samples/sample.pages', 'rb' do |file|
118
- yomu = Yomu.new file
119
-
120
- assert_includes yomu.text, 'The quick brown fox jumped over the lazy cat.'
121
- end
122
- end
123
-
124
- def test_yomu_initialized_with_a_stream_can_read_metadata
125
- File.open 'test/samples/sample.pages', 'rb' do |file|
126
- yomu = Yomu.new file
127
-
128
- assert_equal 'application/vnd.apple.pages', yomu.metadata['Content-Type']
129
- end
130
- end
131
- end