yomu2 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6b01b6c47c318f92b499b25df734fad15f37a79b
4
+ data.tar.gz: 71430d5a68f0db97d3a80242d9a86f67aa6116da
5
+ SHA512:
6
+ metadata.gz: 0d29eeb6f7026c32e9bb7bf9c0d6a8f56eeede3af8eb6a4126b61753e343f97d37cd7b7bccd4a46e1ca9fe38c7569f840a9ea6a51c5f34d36641fc3fd232834c
7
+ data.tar.gz: af9252b76bd976c3e643fda9345f0232eca67a8e7924a6ece11f34fe0f2589236003deadf3f616eaa24d8ee9717e533e2b6b58d6055a5bc198c06589cc1cb729
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .gs
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - 2.0.0
5
+ - 2.1.0
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in readen.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Erol Fornoles
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/NOTICE.txt ADDED
@@ -0,0 +1,8 @@
1
+ Textract
2
+ Copyright 2011 Erol Fornoles, released under the MIT license
3
+
4
+ Apache Tika
5
+ Copyright 2011 The Apache Software Foundation
6
+
7
+ This product includes software developed at
8
+ The Apache Software Foundation (http://www.apache.org/).
data/README.md ADDED
@@ -0,0 +1,111 @@
1
+ [![Travis Build Status](https://travis-ci.org/AlphaExchange/yomu2.svg?branch=master)](https://travis-ci.org/AlphaExchange/yomu2)
2
+ [![Code Climate](https://codeclimate.com/github/AlphaExchange/yomu2/badges/gpa.svg)](https://codeclimate.com/github/AlphaExchange/yomu2)
3
+ [![Gem Version](http://img.shields.io/gem/v/yomu2.svg?style=flat)](#)
4
+
5
+ # Yomu2 読む2
6
+
7
+ [Yomu2](http://github.com/AlphaExchange/yomu2) is a library for extracting text and metadata from files and documents using the [Apache Tika](http://tika.apache.org/) content analysis toolkit.
8
+
9
+ This is a up-to-date and maintained fork of Yomu gem.
10
+
11
+ Here are some of the formats supported:
12
+
13
+ - Microsoft Office OLE 2 and Office Open XML Formats (.doc, .docx, .xls, .xlsx,
14
+ .ppt, .pptx)
15
+ - OpenOffice.org OpenDocument Formats (.odt, .ods, .odp)
16
+ - Apple iWorks Formats
17
+ - Rich Text Format (.rtf)
18
+ - Portable Document Format (.pdf)
19
+
20
+ For the complete list of supported formats, please visit the Apache Tika
21
+ [Supported Document Formats](http://tika.apache.org/0.9/formats.html) page.
22
+
23
+ ## Usage
24
+
25
+ Text, metadata and MIME type information can be extracted by calling `Yomu.read` directly:
26
+
27
+ ```ruby
28
+ require 'yomu'
29
+
30
+ data = File.read 'sample.pages'
31
+ text = Yomu.read :text, data
32
+ metadata = Yomu.read :metadata, data
33
+ mimetype = Yomu.read :mimetype, data
34
+ ```
35
+
36
+ ### Reading text from a given filename
37
+
38
+ Create a new instance of Yomu and pass a filename.
39
+
40
+ ```ruby
41
+ yomu = Yomu.new 'sample.pages'
42
+ text = yomu.text
43
+ ```
44
+
45
+ ### Reading text from a given URL
46
+
47
+ This is useful for reading remote files, like documents hosted on Amazon S3.
48
+
49
+ ```ruby
50
+ yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
51
+ text = yomu.text
52
+ ```
53
+
54
+ ### Reading text from a stream
55
+
56
+ Yomu can also read from a stream or any object that responds to `read`, including file uploads from Ruby on Rails or Sinatra.
57
+
58
+ ```ruby
59
+ post '/:name/:filename' do
60
+ yomu = Yomu.new params[:data][:tempfile]
61
+ yomu.text
62
+ end
63
+ ```
64
+
65
+ ### Reading metadata
66
+
67
+ Metadata is returned as a hash.
68
+
69
+ ```ruby
70
+ yomu = Yomu.new 'sample.pages'
71
+ yomu.metadata['Content-Type'] #=> "application/vnd.apple.pages"
72
+ ```
73
+
74
+ ### Reading MIME types
75
+
76
+ MIME type is returned as a MIME::Type object.
77
+
78
+ ```ruby
79
+ yomu = Yomu.new 'sample.docx'
80
+ yomu.mimetype.content_type #=> "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
81
+ yomu.mimetype.extensions #=> ['docx']
82
+ ```
83
+
84
+ ## Installation and Dependencies
85
+
86
+ ### Java Runtime
87
+
88
+ Yomu packages the Apache Tika application jar and requires a working JRE for it to work.
89
+
90
+ ### Gem
91
+
92
+ Add this line to your application's Gemfile:
93
+
94
+ gem 'yomu'
95
+
96
+ And then execute:
97
+
98
+ $ bundle
99
+
100
+ Or install it yourself as:
101
+
102
+ $ gem install yomu
103
+
104
+ ## Contributing
105
+
106
+ 1. Fork it
107
+ 2. Create your feature branch ( `git checkout -b my-new-feature` )
108
+ 3. Create tests and make them pass ( `rake` )
109
+ 4. Commit your changes ( `git commit -am 'Added some feature'` )
110
+ 5. Push to the branch ( `git push origin my-new-feature` )
111
+ 6. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env rake
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new 'spec'
7
+
8
+ task :default => :spec
Binary file
data/lib/yomu.rb ADDED
@@ -0,0 +1,274 @@
1
+ require 'yomu/version'
2
+
3
+ require 'net/http'
4
+ require 'mime/types'
5
+ require 'json'
6
+
7
+ require 'socket'
8
+ require 'stringio'
9
+
10
+ class Yomu
11
+ GEMPATH = File.dirname(File.dirname(__FILE__))
12
+ JARPATH = File.join(Yomu::GEMPATH, 'jar', 'tika-app-1.11.jar')
13
+ DEFAULT_SERVER_PORT = 9293 # an arbitrary, but perfectly cromulent, port
14
+
15
+ @@server_port = nil
16
+ @@server_pid = nil
17
+
18
+ # Read text or metadata from a data buffer.
19
+ #
20
+ # data = File.read 'sample.pages'
21
+ # text = Yomu.read :text, data
22
+ # metadata = Yomu.read :metadata, data
23
+
24
+ def self.read(type, data)
25
+ result = @@server_port ? self._server_read(type, data) : self._client_read(type, data)
26
+
27
+ case type
28
+ when :text
29
+ result
30
+ when :html
31
+ result
32
+ when :metadata
33
+ JSON.parse(result)
34
+ when :mimetype
35
+ MIME::Types[JSON.parse(result)['Content-Type']].first
36
+ end
37
+ end
38
+
39
+ def self._client_read(type, data)
40
+ switch = case type
41
+ when :text
42
+ '-t'
43
+ when :html
44
+ '-h'
45
+ when :metadata
46
+ '-m -j'
47
+ when :mimetype
48
+ '-m -j'
49
+ end
50
+
51
+ IO.popen "#{java} -Djava.awt.headless=true -jar #{Yomu::JARPATH} #{switch}", 'r+' do |io|
52
+ io.write data
53
+ io.close_write
54
+ io.read
55
+ end
56
+ end
57
+
58
+
59
+ def self._server_read(_, data)
60
+ s = TCPSocket.new('localhost', @@server_port)
61
+ file = StringIO.new(data, 'r')
62
+
63
+ while 1
64
+ chunk = file.read(65536)
65
+ break unless chunk
66
+ s.write(chunk)
67
+ end
68
+
69
+ # tell Tika that we're done sending data
70
+ s.shutdown(Socket::SHUT_WR)
71
+
72
+ resp = ''
73
+ while 1
74
+ chunk = s.recv(65536)
75
+ break if chunk.empty? || !chunk
76
+ resp << chunk
77
+ end
78
+ resp
79
+ ensure
80
+ s.close unless s.nil?
81
+ end
82
+
83
+ # Create a new instance of Yomu with a given document.
84
+ #
85
+ # Using a file path:
86
+ #
87
+ # Yomu.new 'sample.pages'
88
+ #
89
+ # Using a URL:
90
+ #
91
+ # Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
92
+ #
93
+ # From a stream or an object which responds to +read+
94
+ #
95
+ # Yomu.new File.open('sample.pages')
96
+
97
+ def initialize(input)
98
+ if input.is_a? String
99
+ if File.exists? input
100
+ @path = input
101
+ elsif input =~ URI::regexp
102
+ @uri = URI.parse input
103
+ else
104
+ raise Errno::ENOENT.new "missing file or invalid URI - #{input}"
105
+ end
106
+ elsif input.respond_to? :read
107
+ @stream = input
108
+ else
109
+ raise TypeError.new "can't read from #{input.class.name}"
110
+ end
111
+ end
112
+
113
+ # Returns the text content of the Yomu document.
114
+ #
115
+ # yomu = Yomu.new 'sample.pages'
116
+ # yomu.text
117
+
118
+ def text
119
+ return @text if defined? @text
120
+
121
+ @text = Yomu.read :text, data
122
+ end
123
+
124
+ # Returns the text content of the Yomu document in HTML.
125
+ #
126
+ # yomu = Yomu.new 'sample.pages'
127
+ # yomu.html
128
+
129
+ def html
130
+ return @html if defined? @html
131
+
132
+ @html = Yomu.read :html, data
133
+ end
134
+
135
+ # Returns the metadata hash of the Yomu document.
136
+ #
137
+ # yomu = Yomu.new 'sample.pages'
138
+ # yomu.metadata['Content-Type']
139
+
140
+ def metadata
141
+ return @metadata if defined? @metadata
142
+
143
+ @metadata = Yomu.read :metadata, data
144
+ end
145
+
146
+ # Returns the mimetype object of the Yomu document.
147
+ #
148
+ # yomu = Yomu.new 'sample.docx'
149
+ # yomu.mimetype.content_type #=> 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
150
+ # yomu.mimetype.extensions #=> ['docx']
151
+
152
+ def mimetype
153
+ return @mimetype if defined? @mimetype
154
+
155
+ type = metadata["Content-Type"].is_a?(Array) ? metadata["Content-Type"].first : metadata["Content-Type"]
156
+
157
+ @mimetype = MIME::Types[type].first
158
+ end
159
+
160
+ # Returns +true+ if the Yomu document was specified using a file path.
161
+ #
162
+ # yomu = Yomu.new 'sample.pages'
163
+ # yomu.path? #=> true
164
+
165
+
166
+ def creation_date
167
+ return @creation_date if defined? @creation_date
168
+
169
+ if metadata['Creation-Date']
170
+ @creation_date = Time.parse(metadata['Creation-Date'])
171
+ else
172
+ nil
173
+ end
174
+ end
175
+
176
+ def path?
177
+ defined? @path
178
+ end
179
+
180
+ # Returns +true+ if the Yomu document was specified using a URI.
181
+ #
182
+ # yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
183
+ # yomu.uri? #=> true
184
+
185
+ def uri?
186
+ defined? @uri
187
+ end
188
+
189
+ # Returns +true+ if the Yomu document was specified from a stream or an object which responds to +read+.
190
+ #
191
+ # file = File.open('sample.pages')
192
+ # yomu = Yomu.new file
193
+ # yomu.stream? #=> true
194
+
195
+ def stream?
196
+ defined? @stream
197
+ end
198
+
199
+ # Returns the raw/unparsed content of the Yomu document.
200
+ #
201
+ # yomu = Yomu.new 'sample.pages'
202
+ # yomu.data
203
+
204
+ def data
205
+ return @data if defined? @data
206
+
207
+ if path?
208
+ @data = File.read @path
209
+ elsif uri?
210
+ @data = Net::HTTP.get @uri
211
+ elsif stream?
212
+ @data = @stream.read
213
+ end
214
+
215
+ @data
216
+ end
217
+
218
+ # Returns pid of Tika server, started as a new spawned process.
219
+ #
220
+ # type :html, :text or :metadata
221
+ # custom_port e.g. 9293
222
+ #
223
+ # Yomu.server(:text, 9294)
224
+ #
225
+ def self.server(type, custom_port=nil)
226
+ switch = case type
227
+ when :text
228
+ '-t'
229
+ when :html
230
+ '-h'
231
+ when :metadata
232
+ '-m -j'
233
+ when :mimetype
234
+ '-m -j'
235
+ end
236
+
237
+ @@server_port = custom_port || DEFAULT_SERVER_PORT
238
+
239
+ begin
240
+ TCPSocket.new('localhost', @@server_port).close
241
+ rescue Errno::ECONNREFUSED
242
+ @@server_pid = Process.spawn("#{java} -Djava.awt.headless=true -jar #{Yomu::JARPATH} --server --port #{@@server_port} #{switch}")
243
+ sleep(2) # Give the server 2 seconds to spin up.
244
+ @@server_pid
245
+ end
246
+ end
247
+
248
+ # Kills server started by Yomu.server
249
+ #
250
+ # Always run this when you're done, or else Tika might run until you kill it manually
251
+ # You might try putting your extraction in a begin..rescue...ensure...end block and
252
+ # putting this method in the ensure block.
253
+ #
254
+ # Yomu.server(:text)
255
+ # reports = ["report1.docx", "report2.doc", "report3.pdf"]
256
+ # begin
257
+ # my_texts = reports.map{|report_path| Yomu.new(report_path).text }
258
+ # rescue
259
+ # ensure
260
+ # Yomu.kill_server!
261
+ # end
262
+ def self.kill_server!
263
+ if @@server_pid
264
+ Process.kill('INT', @@server_pid)
265
+ @@server_pid = nil
266
+ @@server_port = nil
267
+ end
268
+ end
269
+
270
+ def self.java
271
+ ENV['JAVA_HOME'] ? ENV['JAVA_HOME'] + '/bin/java' : 'java'
272
+ end
273
+ private_class_method :java
274
+ end
@@ -0,0 +1,3 @@
1
+ class Yomu
2
+ VERSION = '0.3.0'
3
+ end
data/spec/helper.rb ADDED
@@ -0,0 +1,6 @@
1
+ RSpec.configure do |config|
2
+ config.treat_symbols_as_metadata_keys_with_true_values = true
3
+ config.run_all_when_everything_filtered = true
4
+ config.filter_run :focus
5
+ config.order = 'random'
6
+ end
Binary file
Binary file
data/spec/yomu_spec.rb ADDED
@@ -0,0 +1,182 @@
1
+ require 'helper.rb'
2
+ require 'yomu'
3
+
4
+ describe Yomu do
5
+ let(:data) { File.read 'spec/samples/sample.docx' }
6
+
7
+ before do
8
+ ENV['JAVA_HOME'] = nil
9
+ end
10
+
11
+ describe '.read' do
12
+ it 'reads text' do
13
+ text = Yomu.read :text, data
14
+
15
+ expect( text ).to include 'The quick brown fox jumped over the lazy cat.'
16
+ end
17
+
18
+ it 'reads metadata' do
19
+ metadata = Yomu.read :metadata, data
20
+
21
+ expect( metadata['Content-Type'] ).to eql 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
22
+ end
23
+
24
+ it 'reads metadata values with colons as strings' do
25
+ data = File.read 'spec/samples/sample-metadata-values-with-colons.doc'
26
+ metadata = Yomu.read :metadata, data
27
+
28
+ expect( metadata['dc:title'] ).to eql 'problem: test'
29
+ end
30
+
31
+ it 'reads mimetype' do
32
+ mimetype = Yomu.read :mimetype, data
33
+
34
+ expect( mimetype.content_type ).to eql 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
35
+ expect( mimetype.extensions ).to include 'docx'
36
+ end
37
+ end
38
+
39
+ describe '.new' do
40
+ it 'requires parameters' do
41
+ expect { Yomu.new }.to raise_error ArgumentError
42
+ end
43
+
44
+ it 'accepts a root path' do
45
+ yomu = Yomu.new 'spec/samples/sample.pages'
46
+
47
+ expect( yomu ).to be_path
48
+ expect( yomu ).not_to be_uri
49
+ expect( yomu ).not_to be_stream
50
+ end
51
+
52
+ it 'accepts a relative path' do
53
+ yomu = Yomu.new 'spec/samples/sample.pages'
54
+
55
+ expect( yomu ).to be_path
56
+ expect( yomu ).not_to be_uri
57
+ expect( yomu ).not_to be_stream
58
+ end
59
+
60
+ it 'accepts a path with spaces' do
61
+ yomu = Yomu.new 'spec/samples/sample filename with spaces.pages'
62
+
63
+ expect( yomu ).to be_path
64
+ expect( yomu ).not_to be_uri
65
+ expect( yomu ).not_to be_stream
66
+ end
67
+
68
+ it 'accepts a URI' do
69
+ yomu = Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
70
+
71
+ expect( yomu ).to be_uri
72
+ expect( yomu ).not_to be_path
73
+ expect( yomu ).not_to be_stream
74
+ end
75
+
76
+ it 'accepts a stream or object that can be read' do
77
+ File.open 'spec/samples/sample.pages', 'r' do |file|
78
+ yomu = Yomu.new file
79
+
80
+ expect( yomu ).to be_stream
81
+ expect( yomu ).not_to be_path
82
+ expect( yomu ).not_to be_uri
83
+ end
84
+ end
85
+
86
+ it 'refuses a path to a missing file' do
87
+ expect { Yomu.new 'test/sample/missing.pages'}.to raise_error Errno::ENOENT
88
+ end
89
+
90
+ it 'refuses other objects' do
91
+ [nil, 1, 1.1].each do |object|
92
+ expect { Yomu.new object }.to raise_error TypeError
93
+ end
94
+ end
95
+ end
96
+
97
+
98
+ describe '.creation_date' do
99
+ let(:yomu) { Yomu.new 'spec/samples/sample.pages' }
100
+ it 'should retur Time' do
101
+ expect( yomu.creation_date ).to be_a Time
102
+ end
103
+ end
104
+
105
+ describe '.java' do
106
+ specify 'with no specified JAVA_HOME' do
107
+ expect( Yomu.send(:java) ).to eql 'java'
108
+ end
109
+
110
+ specify 'with a specified JAVA_HOME' do
111
+ ENV['JAVA_HOME'] = '/path/to/java/home'
112
+
113
+ expect( Yomu.send(:java) ).to eql '/path/to/java/home/bin/java'
114
+ end
115
+ end
116
+
117
+ context 'initialized with a given path' do
118
+ let(:yomu) { Yomu.new 'spec/samples/sample.pages' }
119
+
120
+ specify '#text reads text' do
121
+ expect( yomu.text).to include 'The quick brown fox jumped over the lazy cat.'
122
+ end
123
+
124
+ specify '#metadata reads metadata' do
125
+ expect( yomu.metadata['Content-Type'] ).to eql ["application/vnd.apple.pages", "application/vnd.apple.pages"]
126
+ end
127
+ end
128
+
129
+ context 'initialized with a given URI' do
130
+ let(:yomu) { Yomu.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx' }
131
+
132
+ specify '#text reads text' do
133
+ expect( yomu.text ).to include 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'
134
+ end
135
+
136
+ specify '#metadata reads metadata' do
137
+ expect( yomu.metadata['Content-Type'] ).to eql 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
138
+ end
139
+ end
140
+
141
+ context 'initialized with a given stream' do
142
+ let(:yomu) { Yomu.new File.open('spec/samples/sample.pages', 'rb') }
143
+
144
+ specify '#text reads text' do
145
+ expect( yomu.text ).to include 'The quick brown fox jumped over the lazy cat.'
146
+ end
147
+
148
+ specify '#metadata reads metadata' do
149
+ expect( yomu.metadata['Content-Type'] ).to eql ["application/vnd.apple.pages", "application/vnd.apple.pages"]
150
+ end
151
+ end
152
+
153
+ context 'working as server mode' do
154
+ specify '#starts and kills server' do
155
+ begin
156
+ Yomu.server(:text)
157
+ expect(Yomu.class_variable_get(:@@server_pid)).not_to be_nil
158
+ expect(Yomu.class_variable_get(:@@server_port)).not_to be_nil
159
+
160
+ s = TCPSocket.new('localhost', Yomu.class_variable_get(:@@server_port))
161
+ expect(s).to be_a TCPSocket
162
+ s.close
163
+ ensure
164
+ port = Yomu.class_variable_get(:@@server_port)
165
+ Yomu.kill_server!
166
+ sleep 2
167
+ expect { TCPSocket.new('localhost', port) }.to raise_error Errno::ECONNREFUSED
168
+ end
169
+ end
170
+
171
+ specify '#runs samples through server mode' do
172
+ begin
173
+ Yomu.server(:text)
174
+ expect(Yomu.new('spec/samples/sample.pages').text).to include 'The quick brown fox jumped over the lazy cat.'
175
+ expect(Yomu.new('spec/samples/sample filename with spaces.pages').text).to include 'The quick brown fox jumped over the lazy cat.'
176
+ expect(Yomu.new('spec/samples/sample.docx').text).to include 'The quick brown fox jumped over the lazy cat.'
177
+ ensure
178
+ Yomu.kill_server!
179
+ end
180
+ end
181
+ end
182
+ end
data/yomu2.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'yomu/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'yomu2'
8
+ spec.version = Yomu::VERSION
9
+ spec.authors = ['Erol Fornoles', 'Diego Silva']
10
+ spec.email = ['erol.fornoles@gmail.com', 'diego@alpha-exchange.com']
11
+ spec.description = %q{Read text and metadata from files and documents (.doc, .docx, .pages, .odt, .rtf, .pdf)}
12
+ spec.summary = %q{Read text and metadata from files and documents (.doc, .docx, .pages, .odt, .rtf, .pdf)}
13
+ spec.homepage = 'http://erol.github.com/yomu'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_runtime_dependency 'mime-types', '~> 1.23'
22
+ spec.add_runtime_dependency 'json', '~> 1.8'
23
+
24
+ spec.add_development_dependency 'bundler', '~> 1.3'
25
+ spec.add_development_dependency 'rake'
26
+ spec.add_development_dependency 'rspec', '~> 3.4'
27
+ end
metadata ADDED
@@ -0,0 +1,142 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yomu2
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
+ platform: ruby
6
+ authors:
7
+ - Erol Fornoles
8
+ - Diego Silva
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2017-02-09 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: mime-types
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.23'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '1.23'
28
+ - !ruby/object:Gem::Dependency
29
+ name: json
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '1.8'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '1.8'
42
+ - !ruby/object:Gem::Dependency
43
+ name: bundler
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '1.3'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '1.3'
56
+ - !ruby/object:Gem::Dependency
57
+ name: rake
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ - !ruby/object:Gem::Dependency
71
+ name: rspec
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: '3.4'
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - "~>"
82
+ - !ruby/object:Gem::Version
83
+ version: '3.4'
84
+ description: Read text and metadata from files and documents (.doc, .docx, .pages,
85
+ .odt, .rtf, .pdf)
86
+ email:
87
+ - erol.fornoles@gmail.com
88
+ - diego@alpha-exchange.com
89
+ executables: []
90
+ extensions: []
91
+ extra_rdoc_files: []
92
+ files:
93
+ - ".gitignore"
94
+ - ".rspec"
95
+ - ".travis.yml"
96
+ - Gemfile
97
+ - LICENSE
98
+ - NOTICE.txt
99
+ - README.md
100
+ - Rakefile
101
+ - jar/tika-app-1.11.jar
102
+ - lib/yomu.rb
103
+ - lib/yomu/version.rb
104
+ - spec/helper.rb
105
+ - spec/samples/sample filename with spaces.pages
106
+ - spec/samples/sample-metadata-values-with-colons.doc
107
+ - spec/samples/sample.docx
108
+ - spec/samples/sample.pages
109
+ - spec/yomu_spec.rb
110
+ - yomu2.gemspec
111
+ homepage: http://erol.github.com/yomu
112
+ licenses:
113
+ - MIT
114
+ metadata: {}
115
+ post_install_message:
116
+ rdoc_options: []
117
+ require_paths:
118
+ - lib
119
+ required_ruby_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ required_rubygems_version: !ruby/object:Gem::Requirement
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ version: '0'
129
+ requirements: []
130
+ rubyforge_project:
131
+ rubygems_version: 2.5.1
132
+ signing_key:
133
+ specification_version: 4
134
+ summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
135
+ .rtf, .pdf)
136
+ test_files:
137
+ - spec/helper.rb
138
+ - spec/samples/sample filename with spaces.pages
139
+ - spec/samples/sample-metadata-values-with-colons.doc
140
+ - spec/samples/sample.docx
141
+ - spec/samples/sample.pages
142
+ - spec/yomu_spec.rb