tika_wrapper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
File without changes
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ sudo: false
3
+ rvm:
4
+ - 2.2.0
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in tika_wrapper.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Chris Beer
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,15 @@
1
+ # tika_wrapper
2
+
3
+ Wrap any task with a running tika server:
4
+
5
+ ```ruby
6
+ TikaWrapper.wrap do |tika|
7
+ # Something that requires tika
8
+ end
9
+ ```
10
+
11
+ ## Basic Options
12
+
13
+ ```ruby
14
+ TikaWrapper.wrap port: '9998'
15
+ ```
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rspec/core/rake_task'
4
+
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ task default: ['spec']
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'tika_wrapper'
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require 'irb'
14
+ IRB.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1 @@
1
+ service_name: travis-ci
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'tika_wrapper'
4
+ require 'optparse'
5
+
6
+ options = {}
7
+ OptionParser.new do |opts|
8
+ opts.banner = "Usage: tika_wrapper [options]"
9
+
10
+ opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
11
+ options[:verbose] = v
12
+ end
13
+
14
+ opts.on("--version VERSION", "Specify a tika version to download (default: #{TikaWrapper.default_tika_version})") do |v|
15
+ options[:version] = v
16
+ end
17
+
18
+ opts.on("-pPORT", "--port PORT", "Specify the port tika should run at (default: 9998)") do |p|
19
+ options[:port] = p
20
+ end
21
+ end.parse!
22
+
23
+ # default to verbose
24
+ options[:verbose] = true if options[:verbose].nil?
25
+
26
+ TikaWrapper.wrap(options) do |conn|
27
+ while conn.status
28
+ sleep 1
29
+ end
30
+ end
@@ -0,0 +1,18 @@
1
+ require 'tika_wrapper/version'
2
+ require 'tika_wrapper/instance'
3
+
4
+ module TikaWrapper
5
+ def self.default_tika_version
6
+ "1.8"
7
+ end
8
+
9
+ def self.default_instance(options = {})
10
+ @default_instance ||= TikaWrapper::Instance.new options
11
+ end
12
+
13
+ ##
14
+ # Ensures a tika service is running before executing the block
15
+ def self.wrap(options = {}, &block)
16
+ default_instance(options).wrap(&block)
17
+ end
18
+ end
@@ -0,0 +1,234 @@
1
+ require 'digest'
2
+ require 'fileutils'
3
+ require 'json'
4
+ require 'open-uri'
5
+ require 'ruby-progressbar'
6
+ require 'securerandom'
7
+ require 'stringio'
8
+ require 'tmpdir'
9
+
10
+ module TikaWrapper
11
+ class Instance
12
+ attr_reader :options, :pid
13
+
14
+ ##
15
+ # @param [Hash] options
16
+ # @option options [String] :url
17
+ # @option options [String] :version
18
+ # @option options [String] :port
19
+ # @option options [String] :version_file
20
+ # @option options [String] :instance_dir
21
+ # @option options [String] :download_path
22
+ # @option options [String] :md5sum
23
+ # @option options [String] :tika_xml
24
+ # @option options [Boolean] :verbose
25
+ # @option options [Boolean] :managed
26
+ # @option options [Boolean] :ignore_md5sum
27
+ # @option options [Hash] :tika_options
28
+ # @option options [Hash] :env
29
+ def initialize(options = {})
30
+ @options = options
31
+ end
32
+
33
+ def wrap(&_block)
34
+ start
35
+ yield self
36
+ ensure
37
+ stop
38
+ end
39
+
40
+ ##
41
+ # Start tika and wait for it to become available
42
+ def start
43
+ download
44
+ if managed?
45
+ exec(p: port)
46
+
47
+ # Wait for tika to start
48
+ unless status
49
+ sleep 1
50
+ end
51
+ end
52
+ end
53
+
54
+ ##
55
+ # Stop tika and wait for it to finish exiting
56
+ def stop
57
+ if managed? && started?
58
+ Process.kill("KILL", pid.to_i)
59
+
60
+ # Wait for tika to stop
61
+ while status
62
+ sleep 1
63
+ end
64
+ end
65
+
66
+ @pid = nil
67
+ end
68
+
69
+ ##
70
+ # Check the status of a managed tika service
71
+ def status
72
+ return true unless managed?
73
+
74
+ begin
75
+ open(url + "version")
76
+ true
77
+ rescue
78
+ false
79
+ end
80
+ end
81
+
82
+ ##
83
+ # Is tika running?
84
+ def started?
85
+ !!status
86
+ end
87
+
88
+ ##
89
+ # Get the port this tika instance is running at
90
+ def port
91
+ options.fetch(:port, "9998").to_s
92
+ end
93
+
94
+ ##
95
+ # Clean up any files tika_wrapper may have downloaded
96
+ def clean!
97
+ stop
98
+ FileUtils.remove_entry(download_path) if File.exists? download_path
99
+ FileUtils.remove_entry(md5sum_path) if File.exists? md5sum_path
100
+ end
101
+
102
+ ##
103
+ # Get a (likely) URL to the tika instance
104
+ def url
105
+ "http://127.0.0.1:#{port}/"
106
+ end
107
+
108
+ protected
109
+
110
+ def download
111
+ unless File.exists?(download_path) && validate?(download_path)
112
+ fetch_with_progressbar download_url, download_path
113
+ validate! download_path
114
+ end
115
+
116
+ download_path
117
+ end
118
+
119
+ def validate?(file)
120
+ Digest::MD5.file(file).hexdigest == expected_md5sum
121
+ end
122
+
123
+ def validate!(file)
124
+ unless validate? file
125
+ raise "MD5 mismatch" unless options[:ignore_md5sum]
126
+ end
127
+ end
128
+
129
+ ##
130
+ # Run the tika server
131
+ def exec(options = {})
132
+ args = ["java", "-jar", tika_binary] + tika_options.merge(options).map { |k, v| ["-#{k}", "#{v}"] }.flatten + [">&2"]
133
+ io = IO.popen(env, args + [err: [:child, :out]])
134
+ @pid = io.pid
135
+ end
136
+
137
+ private
138
+
139
+ def download_url
140
+ @download_url ||= options.fetch(:url, default_download_url)
141
+ end
142
+
143
+ def default_download_url
144
+ @default_url ||= begin
145
+ mirror_url = "http://www.apache.org/dyn/closer.cgi/tika/tika-server-#{version}.jar?asjson=true"
146
+ json = open(mirror_url).read
147
+ doc = JSON.parse(json)
148
+ doc['preferred'] + doc['path_info']
149
+ end
150
+ end
151
+
152
+ def md5url
153
+ "http://archive.apache.org/dist/tika/tika-server-#{version}.jar.md5"
154
+ end
155
+
156
+ def version
157
+ @version ||= options.fetch(:version, default_tika_version)
158
+ end
159
+
160
+ def tika_options
161
+ options.fetch(:tika_options, {})
162
+ end
163
+
164
+ def env
165
+ options.fetch(:env, {})
166
+ end
167
+
168
+ def default_tika_version
169
+ TikaWrapper.default_tika_version
170
+ end
171
+
172
+ def download_path
173
+ @download_path ||= options.fetch(:download_path, default_download_path)
174
+ end
175
+
176
+ def default_download_path
177
+ File.join(Dir.tmpdir, File.basename(download_url))
178
+ end
179
+
180
+ def tika_dir
181
+ @tika_dir ||= options.fetch(:instance_dir, File.join(Dir.tmpdir, File.basename(download_url, ".jar")))
182
+ end
183
+
184
+ def verbose?
185
+ !!options.fetch(:verbose, false)
186
+ end
187
+
188
+ def managed?
189
+ !!options.fetch(:managed, true)
190
+ end
191
+
192
+ def version_file
193
+ options.fetch(:version_file, File.join(tika_dir, "VERSION"))
194
+ end
195
+
196
+ def expected_md5sum
197
+ @md5sum ||= options.fetch(:md5sum, open(md5file).read.split(" ").first)
198
+ end
199
+
200
+ def tika_binary
201
+ download_path
202
+ end
203
+
204
+ def md5sum_path
205
+ File.join(Dir.tmpdir, File.basename(md5url))
206
+ end
207
+
208
+ def tmp_save_dir
209
+ @tmp_save_dir ||= Dir.mktmpdir
210
+ end
211
+
212
+ def fetch_with_progressbar(url, output)
213
+ pbar = ProgressBar.create(title: File.basename(url), total: nil, format: "%t: |%B| %p%% (%e )")
214
+ open(url, content_length_proc: lambda do|t|
215
+ if t && 0 < t
216
+ pbar.total = t
217
+ end
218
+ end,
219
+ progress_proc: lambda do|s|
220
+ pbar.progress = s
221
+ end) do |io|
222
+ IO.copy_stream(io, output)
223
+ end
224
+ end
225
+
226
+ def md5file
227
+ unless File.exists? md5sum_path
228
+ fetch_with_progressbar md5url, md5sum_path
229
+ end
230
+
231
+ md5sum_path
232
+ end
233
+ end
234
+ end
@@ -0,0 +1,3 @@
1
+ module TikaWrapper
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,15 @@
1
+ require 'spec_helper'
2
+
3
+ describe TikaWrapper do
4
+ describe ".wrap" do
5
+ it "should launch tika" do
6
+ TikaWrapper.wrap do |tika|
7
+ expect do
8
+ Timeout::timeout(15) do
9
+ TCPSocket.new('127.0.0.1', tika.port).close
10
+ end
11
+ end.not_to raise_exception
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,9 @@
1
+ require 'coveralls'
2
+ Coveralls.wear!
3
+
4
+ require 'tika_wrapper'
5
+
6
+ require 'rspec'
7
+
8
+ RSpec.configure do |_config|
9
+ end
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'tika_wrapper/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "tika_wrapper"
8
+ spec.version = TikaWrapper::VERSION
9
+ spec.authors = ["Chris Beer"]
10
+ spec.email = ["chris@cbeer.info"]
11
+ spec.summary = %q{Tika service wrapper}
12
+ spec.homepage = "https://github.com/cbeer/tika_wrapper"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.bindir = 'exe'
17
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "ruby-progressbar"
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.7"
24
+ spec.add_development_dependency "rake", "~> 10.0"
25
+
26
+ spec.add_development_dependency "rspec"
27
+ spec.add_development_dependency "coveralls"
28
+ end
metadata ADDED
@@ -0,0 +1,136 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tika_wrapper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Chris Beer
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-06-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: ruby-progressbar
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.7'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.7'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: coveralls
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description:
84
+ email:
85
+ - chris@cbeer.info
86
+ executables:
87
+ - tika_wrapper
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".gitignore"
92
+ - ".rubocop.yml"
93
+ - ".rubocop_hound.yml"
94
+ - ".rubocop_todo.yml"
95
+ - ".travis.yml"
96
+ - Gemfile
97
+ - LICENSE
98
+ - README.md
99
+ - Rakefile
100
+ - bin/console
101
+ - bin/setup
102
+ - coveralls.yml
103
+ - exe/tika_wrapper
104
+ - lib/tika_wrapper.rb
105
+ - lib/tika_wrapper/instance.rb
106
+ - lib/tika_wrapper/version.rb
107
+ - spec/lib/tika_wrapper_spec.rb
108
+ - spec/spec_helper.rb
109
+ - tika_wrapper.gemspec
110
+ homepage: https://github.com/cbeer/tika_wrapper
111
+ licenses:
112
+ - MIT
113
+ metadata: {}
114
+ post_install_message:
115
+ rdoc_options: []
116
+ require_paths:
117
+ - lib
118
+ required_ruby_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ required_rubygems_version: !ruby/object:Gem::Requirement
124
+ requirements:
125
+ - - ">="
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
128
+ requirements: []
129
+ rubyforge_project:
130
+ rubygems_version: 2.4.5
131
+ signing_key:
132
+ specification_version: 4
133
+ summary: Tika service wrapper
134
+ test_files:
135
+ - spec/lib/tika_wrapper_spec.rb
136
+ - spec/spec_helper.rb