tika_wrapper 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ sudo: false
3
+ rvm:
4
+ - 2.2.0
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in tika_wrapper.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Chris Beer
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
@@ -0,0 +1,15 @@
1
+ # tika_wrapper
2
+
3
+ Wrap any task with a running tika server:
4
+
5
+ ```ruby
6
+ TikaWrapper.wrap do |tika|
7
+ # Something that requires tika
8
+ end
9
+ ```
10
+
11
+ ## Basic Options
12
+
13
+ ```ruby
14
+ TikaWrapper.wrap port: '9998'
15
+ ```
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rspec/core/rake_task'
4
+
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ task default: ['spec']
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'tika_wrapper'
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require 'irb'
14
+ IRB.start
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1 @@
1
+ service_name: travis-ci
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'tika_wrapper'
4
+ require 'optparse'
5
+
6
+ options = {}
7
+ OptionParser.new do |opts|
8
+ opts.banner = "Usage: tika_wrapper [options]"
9
+
10
+ opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
11
+ options[:verbose] = v
12
+ end
13
+
14
+ opts.on("--version VERSION", "Specify a tika version to download (default: #{TikaWrapper.default_tika_version})") do |v|
15
+ options[:version] = v
16
+ end
17
+
18
+ opts.on("-pPORT", "--port PORT", "Specify the port tika should run at (default: 9998)") do |p|
19
+ options[:port] = p
20
+ end
21
+ end.parse!
22
+
23
+ # default to verbose
24
+ options[:verbose] = true if options[:verbose].nil?
25
+
26
+ TikaWrapper.wrap(options) do |conn|
27
+ while conn.status
28
+ sleep 1
29
+ end
30
+ end
@@ -0,0 +1,18 @@
1
+ require 'tika_wrapper/version'
2
+ require 'tika_wrapper/instance'
3
+
4
+ module TikaWrapper
5
+ def self.default_tika_version
6
+ "1.8"
7
+ end
8
+
9
+ def self.default_instance(options = {})
10
+ @default_instance ||= TikaWrapper::Instance.new options
11
+ end
12
+
13
+ ##
14
+ # Ensures a tika service is running before executing the block
15
+ def self.wrap(options = {}, &block)
16
+ default_instance(options).wrap(&block)
17
+ end
18
+ end
@@ -0,0 +1,234 @@
1
+ require 'digest'
2
+ require 'fileutils'
3
+ require 'json'
4
+ require 'open-uri'
5
+ require 'ruby-progressbar'
6
+ require 'securerandom'
7
+ require 'stringio'
8
+ require 'tmpdir'
9
+
10
+ module TikaWrapper
11
+ class Instance
12
+ attr_reader :options, :pid
13
+
14
+ ##
15
+ # @param [Hash] options
16
+ # @option options [String] :url
17
+ # @option options [String] :version
18
+ # @option options [String] :port
19
+ # @option options [String] :version_file
20
+ # @option options [String] :instance_dir
21
+ # @option options [String] :download_path
22
+ # @option options [String] :md5sum
23
+ # @option options [String] :tika_xml
24
+ # @option options [Boolean] :verbose
25
+ # @option options [Boolean] :managed
26
+ # @option options [Boolean] :ignore_md5sum
27
+ # @option options [Hash] :tika_options
28
+ # @option options [Hash] :env
29
+ def initialize(options = {})
30
+ @options = options
31
+ end
32
+
33
+ def wrap(&_block)
34
+ start
35
+ yield self
36
+ ensure
37
+ stop
38
+ end
39
+
40
+ ##
41
+ # Start tika and wait for it to become available
42
+ def start
43
+ download
44
+ if managed?
45
+ exec(p: port)
46
+
47
+ # Wait for tika to start
48
+ unless status
49
+ sleep 1
50
+ end
51
+ end
52
+ end
53
+
54
+ ##
55
+ # Stop tika and wait for it to finish exiting
56
+ def stop
57
+ if managed? && started?
58
+ Process.kill("KILL", pid.to_i)
59
+
60
+ # Wait for tika to stop
61
+ while status
62
+ sleep 1
63
+ end
64
+ end
65
+
66
+ @pid = nil
67
+ end
68
+
69
+ ##
70
+ # Check the status of a managed tika service
71
+ def status
72
+ return true unless managed?
73
+
74
+ begin
75
+ open(url + "version")
76
+ true
77
+ rescue
78
+ false
79
+ end
80
+ end
81
+
82
+ ##
83
+ # Is tika running?
84
+ def started?
85
+ !!status
86
+ end
87
+
88
+ ##
89
+ # Get the port this tika instance is running at
90
+ def port
91
+ options.fetch(:port, "9998").to_s
92
+ end
93
+
94
+ ##
95
+ # Clean up any files tika_wrapper may have downloaded
96
+ def clean!
97
+ stop
98
+ FileUtils.remove_entry(download_path) if File.exists? download_path
99
+ FileUtils.remove_entry(md5sum_path) if File.exists? md5sum_path
100
+ end
101
+
102
+ ##
103
+ # Get a (likely) URL to the tika instance
104
+ def url
105
+ "http://127.0.0.1:#{port}/"
106
+ end
107
+
108
+ protected
109
+
110
+ def download
111
+ unless File.exists?(download_path) && validate?(download_path)
112
+ fetch_with_progressbar download_url, download_path
113
+ validate! download_path
114
+ end
115
+
116
+ download_path
117
+ end
118
+
119
+ def validate?(file)
120
+ Digest::MD5.file(file).hexdigest == expected_md5sum
121
+ end
122
+
123
+ def validate!(file)
124
+ unless validate? file
125
+ raise "MD5 mismatch" unless options[:ignore_md5sum]
126
+ end
127
+ end
128
+
129
+ ##
130
+ # Run the tika server
131
+ def exec(options = {})
132
+ args = ["java", "-jar", tika_binary] + tika_options.merge(options).map { |k, v| ["-#{k}", "#{v}"] }.flatten + [">&2"]
133
+ io = IO.popen(env, args + [err: [:child, :out]])
134
+ @pid = io.pid
135
+ end
136
+
137
+ private
138
+
139
+ def download_url
140
+ @download_url ||= options.fetch(:url, default_download_url)
141
+ end
142
+
143
+ def default_download_url
144
+ @default_url ||= begin
145
+ mirror_url = "http://www.apache.org/dyn/closer.cgi/tika/tika-server-#{version}.jar?asjson=true"
146
+ json = open(mirror_url).read
147
+ doc = JSON.parse(json)
148
+ doc['preferred'] + doc['path_info']
149
+ end
150
+ end
151
+
152
+ def md5url
153
+ "http://archive.apache.org/dist/tika/tika-server-#{version}.jar.md5"
154
+ end
155
+
156
+ def version
157
+ @version ||= options.fetch(:version, default_tika_version)
158
+ end
159
+
160
+ def tika_options
161
+ options.fetch(:tika_options, {})
162
+ end
163
+
164
+ def env
165
+ options.fetch(:env, {})
166
+ end
167
+
168
+ def default_tika_version
169
+ TikaWrapper.default_tika_version
170
+ end
171
+
172
+ def download_path
173
+ @download_path ||= options.fetch(:download_path, default_download_path)
174
+ end
175
+
176
+ def default_download_path
177
+ File.join(Dir.tmpdir, File.basename(download_url))
178
+ end
179
+
180
+ def tika_dir
181
+ @tika_dir ||= options.fetch(:instance_dir, File.join(Dir.tmpdir, File.basename(download_url, ".jar")))
182
+ end
183
+
184
+ def verbose?
185
+ !!options.fetch(:verbose, false)
186
+ end
187
+
188
+ def managed?
189
+ !!options.fetch(:managed, true)
190
+ end
191
+
192
+ def version_file
193
+ options.fetch(:version_file, File.join(tika_dir, "VERSION"))
194
+ end
195
+
196
+ def expected_md5sum
197
+ @md5sum ||= options.fetch(:md5sum, open(md5file).read.split(" ").first)
198
+ end
199
+
200
+ def tika_binary
201
+ download_path
202
+ end
203
+
204
+ def md5sum_path
205
+ File.join(Dir.tmpdir, File.basename(md5url))
206
+ end
207
+
208
+ def tmp_save_dir
209
+ @tmp_save_dir ||= Dir.mktmpdir
210
+ end
211
+
212
+ def fetch_with_progressbar(url, output)
213
+ pbar = ProgressBar.create(title: File.basename(url), total: nil, format: "%t: |%B| %p%% (%e )")
214
+ open(url, content_length_proc: lambda do|t|
215
+ if t && 0 < t
216
+ pbar.total = t
217
+ end
218
+ end,
219
+ progress_proc: lambda do|s|
220
+ pbar.progress = s
221
+ end) do |io|
222
+ IO.copy_stream(io, output)
223
+ end
224
+ end
225
+
226
+ def md5file
227
+ unless File.exists? md5sum_path
228
+ fetch_with_progressbar md5url, md5sum_path
229
+ end
230
+
231
+ md5sum_path
232
+ end
233
+ end
234
+ end
@@ -0,0 +1,3 @@
1
+ module TikaWrapper
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,15 @@
1
+ require 'spec_helper'
2
+
3
+ describe TikaWrapper do
4
+ describe ".wrap" do
5
+ it "should launch tika" do
6
+ TikaWrapper.wrap do |tika|
7
+ expect do
8
+ Timeout::timeout(15) do
9
+ TCPSocket.new('127.0.0.1', tika.port).close
10
+ end
11
+ end.not_to raise_exception
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,9 @@
1
+ require 'coveralls'
2
+ Coveralls.wear!
3
+
4
+ require 'tika_wrapper'
5
+
6
+ require 'rspec'
7
+
8
+ RSpec.configure do |_config|
9
+ end
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'tika_wrapper/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "tika_wrapper"
8
+ spec.version = TikaWrapper::VERSION
9
+ spec.authors = ["Chris Beer"]
10
+ spec.email = ["chris@cbeer.info"]
11
+ spec.summary = %q{Tika service wrapper}
12
+ spec.homepage = "https://github.com/cbeer/tika_wrapper"
13
+ spec.license = "MIT"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.bindir = 'exe'
17
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "ruby-progressbar"
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.7"
24
+ spec.add_development_dependency "rake", "~> 10.0"
25
+
26
+ spec.add_development_dependency "rspec"
27
+ spec.add_development_dependency "coveralls"
28
+ end
metadata ADDED
@@ -0,0 +1,136 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tika_wrapper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Chris Beer
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-06-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: ruby-progressbar
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.7'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.7'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: coveralls
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description:
84
+ email:
85
+ - chris@cbeer.info
86
+ executables:
87
+ - tika_wrapper
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".gitignore"
92
+ - ".rubocop.yml"
93
+ - ".rubocop_hound.yml"
94
+ - ".rubocop_todo.yml"
95
+ - ".travis.yml"
96
+ - Gemfile
97
+ - LICENSE
98
+ - README.md
99
+ - Rakefile
100
+ - bin/console
101
+ - bin/setup
102
+ - coveralls.yml
103
+ - exe/tika_wrapper
104
+ - lib/tika_wrapper.rb
105
+ - lib/tika_wrapper/instance.rb
106
+ - lib/tika_wrapper/version.rb
107
+ - spec/lib/tika_wrapper_spec.rb
108
+ - spec/spec_helper.rb
109
+ - tika_wrapper.gemspec
110
+ homepage: https://github.com/cbeer/tika_wrapper
111
+ licenses:
112
+ - MIT
113
+ metadata: {}
114
+ post_install_message:
115
+ rdoc_options: []
116
+ require_paths:
117
+ - lib
118
+ required_ruby_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ required_rubygems_version: !ruby/object:Gem::Requirement
124
+ requirements:
125
+ - - ">="
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
128
+ requirements: []
129
+ rubyforge_project:
130
+ rubygems_version: 2.4.5
131
+ signing_key:
132
+ specification_version: 4
133
+ summary: Tika service wrapper
134
+ test_files:
135
+ - spec/lib/tika_wrapper_spec.rb
136
+ - spec/spec_helper.rb