tika_wrapper 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/.gitignore +35 -0
 - data/.rubocop.yml +15 -0
 - data/.rubocop_hound.yml +1063 -0
 - data/.rubocop_todo.yml +0 -0
 - data/.travis.yml +4 -0
 - data/Gemfile +4 -0
 - data/LICENSE +22 -0
 - data/README.md +15 -0
 - data/Rakefile +7 -0
 - data/bin/console +14 -0
 - data/bin/setup +7 -0
 - data/coveralls.yml +1 -0
 - data/exe/tika_wrapper +30 -0
 - data/lib/tika_wrapper.rb +18 -0
 - data/lib/tika_wrapper/instance.rb +234 -0
 - data/lib/tika_wrapper/version.rb +3 -0
 - data/spec/lib/tika_wrapper_spec.rb +15 -0
 - data/spec/spec_helper.rb +9 -0
 - data/tika_wrapper.gemspec +28 -0
 - metadata +136 -0
 
    
        data/.rubocop_todo.yml
    ADDED
    
    | 
         
            File without changes
         
     | 
    
        data/.travis.yml
    ADDED
    
    
    
        data/Gemfile
    ADDED
    
    
    
        data/LICENSE
    ADDED
    
    | 
         @@ -0,0 +1,22 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            The MIT License (MIT)
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            Copyright (c) 2015 Chris Beer
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            Permission is hereby granted, free of charge, to any person obtaining a copy
         
     | 
| 
      
 6 
     | 
    
         
            +
            of this software and associated documentation files (the "Software"), to deal
         
     | 
| 
      
 7 
     | 
    
         
            +
            in the Software without restriction, including without limitation the rights
         
     | 
| 
      
 8 
     | 
    
         
            +
            to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
         
     | 
| 
      
 9 
     | 
    
         
            +
            copies of the Software, and to permit persons to whom the Software is
         
     | 
| 
      
 10 
     | 
    
         
            +
            furnished to do so, subject to the following conditions:
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
            The above copyright notice and this permission notice shall be included in all
         
     | 
| 
      
 13 
     | 
    
         
            +
            copies or substantial portions of the Software.
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
         
     | 
| 
      
 16 
     | 
    
         
            +
            IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
         
     | 
| 
      
 17 
     | 
    
         
            +
            FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
         
     | 
| 
      
 18 
     | 
    
         
            +
            AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
         
     | 
| 
      
 19 
     | 
    
         
            +
            LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
         
     | 
| 
      
 20 
     | 
    
         
            +
            OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         
     | 
| 
      
 21 
     | 
    
         
            +
            SOFTWARE.
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
    
        data/README.md
    ADDED
    
    
    
        data/Rakefile
    ADDED
    
    
    
        data/bin/console
    ADDED
    
    | 
         @@ -0,0 +1,14 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'bundler/setup'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'tika_wrapper'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            # You can add fixtures and/or initialization code here to make experimenting
         
     | 
| 
      
 7 
     | 
    
         
            +
            # with your gem easier. You can also use a different console, if you like.
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            # (If you use this, don't forget to add pry to your Gemfile!)
         
     | 
| 
      
 10 
     | 
    
         
            +
            # require "pry"
         
     | 
| 
      
 11 
     | 
    
         
            +
            # Pry.start
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            require 'irb'
         
     | 
| 
      
 14 
     | 
    
         
            +
            IRB.start
         
     | 
    
        data/bin/setup
    ADDED
    
    
    
        data/coveralls.yml
    ADDED
    
    | 
         @@ -0,0 +1 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            service_name: travis-ci
         
     | 
    
        data/exe/tika_wrapper
    ADDED
    
    | 
         @@ -0,0 +1,30 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'tika_wrapper'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'optparse'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            options = {}
         
     | 
| 
      
 7 
     | 
    
         
            +
            OptionParser.new do |opts|
         
     | 
| 
      
 8 
     | 
    
         
            +
              opts.banner = "Usage: tika_wrapper [options]"
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
              opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
         
     | 
| 
      
 11 
     | 
    
         
            +
                options[:verbose] = v
         
     | 
| 
      
 12 
     | 
    
         
            +
              end
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
              opts.on("--version VERSION", "Specify a tika version to download (default: #{TikaWrapper.default_tika_version})") do |v|
         
     | 
| 
      
 15 
     | 
    
         
            +
                options[:version] = v
         
     | 
| 
      
 16 
     | 
    
         
            +
              end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
              opts.on("-pPORT", "--port PORT", "Specify the port tika should run at (default: 9998)") do |p|
         
     | 
| 
      
 19 
     | 
    
         
            +
                options[:port] = p
         
     | 
| 
      
 20 
     | 
    
         
            +
              end
         
     | 
| 
      
 21 
     | 
    
         
            +
            end.parse!
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
            # default to verbose
         
     | 
| 
      
 24 
     | 
    
         
            +
            options[:verbose] = true if options[:verbose].nil?
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
            TikaWrapper.wrap(options) do |conn|
         
     | 
| 
      
 27 
     | 
    
         
            +
              while conn.status
         
     | 
| 
      
 28 
     | 
    
         
            +
                sleep 1
         
     | 
| 
      
 29 
     | 
    
         
            +
              end
         
     | 
| 
      
 30 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/tika_wrapper.rb
    ADDED
    
    | 
         @@ -0,0 +1,18 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'tika_wrapper/version'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'tika_wrapper/instance'
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            module TikaWrapper
         
     | 
| 
      
 5 
     | 
    
         
            +
              def self.default_tika_version
         
     | 
| 
      
 6 
     | 
    
         
            +
                "1.8"
         
     | 
| 
      
 7 
     | 
    
         
            +
              end
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
              def self.default_instance(options = {})
         
     | 
| 
      
 10 
     | 
    
         
            +
                @default_instance ||= TikaWrapper::Instance.new options
         
     | 
| 
      
 11 
     | 
    
         
            +
              end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
              ##
         
     | 
| 
      
 14 
     | 
    
         
            +
              # Ensures a tika service is running before executing the block
         
     | 
| 
      
 15 
     | 
    
         
            +
              def self.wrap(options = {}, &block)
         
     | 
| 
      
 16 
     | 
    
         
            +
                default_instance(options).wrap(&block)
         
     | 
| 
      
 17 
     | 
    
         
            +
              end
         
     | 
| 
      
 18 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,234 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'digest'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'fileutils'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'json'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'open-uri'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'ruby-progressbar'
         
     | 
| 
      
 6 
     | 
    
         
            +
            require 'securerandom'
         
     | 
| 
      
 7 
     | 
    
         
            +
            require 'stringio'
         
     | 
| 
      
 8 
     | 
    
         
            +
            require 'tmpdir'
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            module TikaWrapper
         
     | 
| 
      
 11 
     | 
    
         
            +
              class Instance
         
     | 
| 
      
 12 
     | 
    
         
            +
                attr_reader :options, :pid
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
                ##
         
     | 
| 
      
 15 
     | 
    
         
            +
                # @param [Hash] options
         
     | 
| 
      
 16 
     | 
    
         
            +
                # @option options [String] :url
         
     | 
| 
      
 17 
     | 
    
         
            +
                # @option options [String] :version
         
     | 
| 
      
 18 
     | 
    
         
            +
                # @option options [String] :port
         
     | 
| 
      
 19 
     | 
    
         
            +
                # @option options [String] :version_file
         
     | 
| 
      
 20 
     | 
    
         
            +
                # @option options [String] :instance_dir
         
     | 
| 
      
 21 
     | 
    
         
            +
                # @option options [String] :download_path
         
     | 
| 
      
 22 
     | 
    
         
            +
                # @option options [String] :md5sum
         
     | 
| 
      
 23 
     | 
    
         
            +
                # @option options [String] :tika_xml
         
     | 
| 
      
 24 
     | 
    
         
            +
                # @option options [Boolean] :verbose
         
     | 
| 
      
 25 
     | 
    
         
            +
                # @option options [Boolean] :managed
         
     | 
| 
      
 26 
     | 
    
         
            +
                # @option options [Boolean] :ignore_md5sum
         
     | 
| 
      
 27 
     | 
    
         
            +
                # @option options [Hash] :tika_options
         
     | 
| 
      
 28 
     | 
    
         
            +
                # @option options [Hash] :env
         
     | 
| 
      
 29 
     | 
    
         
            +
                def initialize(options = {})
         
     | 
| 
      
 30 
     | 
    
         
            +
                  @options = options
         
     | 
| 
      
 31 
     | 
    
         
            +
                end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                def wrap(&_block)
         
     | 
| 
      
 34 
     | 
    
         
            +
                  start
         
     | 
| 
      
 35 
     | 
    
         
            +
                  yield self
         
     | 
| 
      
 36 
     | 
    
         
            +
                ensure
         
     | 
| 
      
 37 
     | 
    
         
            +
                  stop
         
     | 
| 
      
 38 
     | 
    
         
            +
                end
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
                ##
         
     | 
| 
      
 41 
     | 
    
         
            +
                # Start tika and wait for it to become available
         
     | 
| 
      
 42 
     | 
    
         
            +
                def start
         
     | 
| 
      
 43 
     | 
    
         
            +
                  download
         
     | 
| 
      
 44 
     | 
    
         
            +
                  if managed?
         
     | 
| 
      
 45 
     | 
    
         
            +
                    exec(p: port)
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
                    # Wait for tika to start
         
     | 
| 
      
 48 
     | 
    
         
            +
                    unless status
         
     | 
| 
      
 49 
     | 
    
         
            +
                      sleep 1
         
     | 
| 
      
 50 
     | 
    
         
            +
                    end
         
     | 
| 
      
 51 
     | 
    
         
            +
                  end
         
     | 
| 
      
 52 
     | 
    
         
            +
                end
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
                ##
         
     | 
| 
      
 55 
     | 
    
         
            +
                # Stop tika and wait for it to finish exiting
         
     | 
| 
      
 56 
     | 
    
         
            +
                def stop
         
     | 
| 
      
 57 
     | 
    
         
            +
                  if managed? && started?
         
     | 
| 
      
 58 
     | 
    
         
            +
                    Process.kill("KILL", pid.to_i)
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                    # Wait for tika to stop
         
     | 
| 
      
 61 
     | 
    
         
            +
                    while status
         
     | 
| 
      
 62 
     | 
    
         
            +
                      sleep 1
         
     | 
| 
      
 63 
     | 
    
         
            +
                    end
         
     | 
| 
      
 64 
     | 
    
         
            +
                  end
         
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
                  @pid = nil
         
     | 
| 
      
 67 
     | 
    
         
            +
                end
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
                ##
         
     | 
| 
      
 70 
     | 
    
         
            +
                # Check the status of a managed tika service
         
     | 
| 
      
 71 
     | 
    
         
            +
                def status
         
     | 
| 
      
 72 
     | 
    
         
            +
                  return true unless managed?
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
                  begin
         
     | 
| 
      
 75 
     | 
    
         
            +
                    open(url + "version")
         
     | 
| 
      
 76 
     | 
    
         
            +
                    true
         
     | 
| 
      
 77 
     | 
    
         
            +
                  rescue
         
     | 
| 
      
 78 
     | 
    
         
            +
                    false
         
     | 
| 
      
 79 
     | 
    
         
            +
                  end
         
     | 
| 
      
 80 
     | 
    
         
            +
                end
         
     | 
| 
      
 81 
     | 
    
         
            +
             
     | 
| 
      
 82 
     | 
    
         
            +
                ##
         
     | 
| 
      
 83 
     | 
    
         
            +
                # Is tika running?
         
     | 
| 
      
 84 
     | 
    
         
            +
                def started?
         
     | 
| 
      
 85 
     | 
    
         
            +
                  !!status
         
     | 
| 
      
 86 
     | 
    
         
            +
                end
         
     | 
| 
      
 87 
     | 
    
         
            +
             
     | 
| 
      
 88 
     | 
    
         
            +
                ##
         
     | 
| 
      
 89 
     | 
    
         
            +
                # Get the port this tika instance is running at
         
     | 
| 
      
 90 
     | 
    
         
            +
                def port
         
     | 
| 
      
 91 
     | 
    
         
            +
                  options.fetch(:port, "9998").to_s
         
     | 
| 
      
 92 
     | 
    
         
            +
                end
         
     | 
| 
      
 93 
     | 
    
         
            +
             
     | 
| 
      
 94 
     | 
    
         
            +
                ##
         
     | 
| 
      
 95 
     | 
    
         
            +
                # Clean up any files tika_wrapper may have downloaded
         
     | 
| 
      
 96 
     | 
    
         
            +
                def clean!
         
     | 
| 
      
 97 
     | 
    
         
            +
                  stop
         
     | 
| 
      
 98 
     | 
    
         
            +
                  FileUtils.remove_entry(download_path) if File.exists? download_path
         
     | 
| 
      
 99 
     | 
    
         
            +
                  FileUtils.remove_entry(md5sum_path) if File.exists? md5sum_path
         
     | 
| 
      
 100 
     | 
    
         
            +
                end
         
     | 
| 
      
 101 
     | 
    
         
            +
             
     | 
| 
      
 102 
     | 
    
         
            +
                ##
         
     | 
| 
      
 103 
     | 
    
         
            +
                # Get a (likely) URL to the tika instance
         
     | 
| 
      
 104 
     | 
    
         
            +
                def url
         
     | 
| 
      
 105 
     | 
    
         
            +
                  "http://127.0.0.1:#{port}/"
         
     | 
| 
      
 106 
     | 
    
         
            +
                end
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
                protected
         
     | 
| 
      
 109 
     | 
    
         
            +
             
     | 
| 
      
 110 
     | 
    
         
            +
                def download
         
     | 
| 
      
 111 
     | 
    
         
            +
                  unless File.exists?(download_path) && validate?(download_path)
         
     | 
| 
      
 112 
     | 
    
         
            +
                    fetch_with_progressbar download_url, download_path
         
     | 
| 
      
 113 
     | 
    
         
            +
                    validate! download_path
         
     | 
| 
      
 114 
     | 
    
         
            +
                  end
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
                  download_path
         
     | 
| 
      
 117 
     | 
    
         
            +
                end
         
     | 
| 
      
 118 
     | 
    
         
            +
             
     | 
| 
      
 119 
     | 
    
         
            +
                def validate?(file)
         
     | 
| 
      
 120 
     | 
    
         
            +
                  Digest::MD5.file(file).hexdigest == expected_md5sum
         
     | 
| 
      
 121 
     | 
    
         
            +
                end
         
     | 
| 
      
 122 
     | 
    
         
            +
             
     | 
| 
      
 123 
     | 
    
         
            +
                def validate!(file)
         
     | 
| 
      
 124 
     | 
    
         
            +
                  unless validate? file
         
     | 
| 
      
 125 
     | 
    
         
            +
                    raise "MD5 mismatch" unless options[:ignore_md5sum]
         
     | 
| 
      
 126 
     | 
    
         
            +
                  end
         
     | 
| 
      
 127 
     | 
    
         
            +
                end
         
     | 
| 
      
 128 
     | 
    
         
            +
             
     | 
| 
      
 129 
     | 
    
         
            +
                ##
         
     | 
| 
      
 130 
     | 
    
         
            +
                # Run the tika server
         
     | 
| 
      
 131 
     | 
    
         
            +
                def exec(options = {})
         
     | 
| 
      
 132 
     | 
    
         
            +
                  args = ["java", "-jar", tika_binary] + tika_options.merge(options).map { |k, v| ["-#{k}", "#{v}"] }.flatten + [">&2"]
         
     | 
| 
      
 133 
     | 
    
         
            +
                  io = IO.popen(env, args + [err: [:child, :out]])
         
     | 
| 
      
 134 
     | 
    
         
            +
                  @pid = io.pid
         
     | 
| 
      
 135 
     | 
    
         
            +
                end
         
     | 
| 
      
 136 
     | 
    
         
            +
             
     | 
| 
      
 137 
     | 
    
         
            +
                private
         
     | 
| 
      
 138 
     | 
    
         
            +
             
     | 
| 
      
 139 
     | 
    
         
            +
                def download_url
         
     | 
| 
      
 140 
     | 
    
         
            +
                  @download_url ||= options.fetch(:url, default_download_url)
         
     | 
| 
      
 141 
     | 
    
         
            +
                end
         
     | 
| 
      
 142 
     | 
    
         
            +
             
     | 
| 
      
 143 
     | 
    
         
            +
                def default_download_url
         
     | 
| 
      
 144 
     | 
    
         
            +
                  @default_url ||= begin
         
     | 
| 
      
 145 
     | 
    
         
            +
                    mirror_url = "http://www.apache.org/dyn/closer.cgi/tika/tika-server-#{version}.jar?asjson=true"
         
     | 
| 
      
 146 
     | 
    
         
            +
                    json = open(mirror_url).read
         
     | 
| 
      
 147 
     | 
    
         
            +
                    doc = JSON.parse(json)
         
     | 
| 
      
 148 
     | 
    
         
            +
                    doc['preferred'] + doc['path_info']
         
     | 
| 
      
 149 
     | 
    
         
            +
                  end
         
     | 
| 
      
 150 
     | 
    
         
            +
                end
         
     | 
| 
      
 151 
     | 
    
         
            +
             
     | 
| 
      
 152 
     | 
    
         
            +
                def md5url
         
     | 
| 
      
 153 
     | 
    
         
            +
                  "http://archive.apache.org/dist/tika/tika-server-#{version}.jar.md5"
         
     | 
| 
      
 154 
     | 
    
         
            +
                end
         
     | 
| 
      
 155 
     | 
    
         
            +
             
     | 
| 
      
 156 
     | 
    
         
            +
                def version
         
     | 
| 
      
 157 
     | 
    
         
            +
                  @version ||= options.fetch(:version, default_tika_version)
         
     | 
| 
      
 158 
     | 
    
         
            +
                end
         
     | 
| 
      
 159 
     | 
    
         
            +
             
     | 
| 
      
 160 
     | 
    
         
            +
                def tika_options
         
     | 
| 
      
 161 
     | 
    
         
            +
                  options.fetch(:tika_options, {})
         
     | 
| 
      
 162 
     | 
    
         
            +
                end
         
     | 
| 
      
 163 
     | 
    
         
            +
             
     | 
| 
      
 164 
     | 
    
         
            +
                def env
         
     | 
| 
      
 165 
     | 
    
         
            +
                  options.fetch(:env, {})
         
     | 
| 
      
 166 
     | 
    
         
            +
                end
         
     | 
| 
      
 167 
     | 
    
         
            +
             
     | 
| 
      
 168 
     | 
    
         
            +
                def default_tika_version
         
     | 
| 
      
 169 
     | 
    
         
            +
                  TikaWrapper.default_tika_version
         
     | 
| 
      
 170 
     | 
    
         
            +
                end
         
     | 
| 
      
 171 
     | 
    
         
            +
             
     | 
| 
      
 172 
     | 
    
         
            +
                def download_path
         
     | 
| 
      
 173 
     | 
    
         
            +
                  @download_path ||= options.fetch(:download_path, default_download_path)
         
     | 
| 
      
 174 
     | 
    
         
            +
                end
         
     | 
| 
      
 175 
     | 
    
         
            +
             
     | 
| 
      
 176 
     | 
    
         
            +
                def default_download_path
         
     | 
| 
      
 177 
     | 
    
         
            +
                  File.join(Dir.tmpdir, File.basename(download_url))
         
     | 
| 
      
 178 
     | 
    
         
            +
                end
         
     | 
| 
      
 179 
     | 
    
         
            +
             
     | 
| 
      
 180 
     | 
    
         
            +
                def tika_dir
         
     | 
| 
      
 181 
     | 
    
         
            +
                  @tika_dir ||= options.fetch(:instance_dir, File.join(Dir.tmpdir, File.basename(download_url, ".jar")))
         
     | 
| 
      
 182 
     | 
    
         
            +
                end
         
     | 
| 
      
 183 
     | 
    
         
            +
             
     | 
| 
      
 184 
     | 
    
         
            +
                def verbose?
         
     | 
| 
      
 185 
     | 
    
         
            +
                  !!options.fetch(:verbose, false)
         
     | 
| 
      
 186 
     | 
    
         
            +
                end
         
     | 
| 
      
 187 
     | 
    
         
            +
             
     | 
| 
      
 188 
     | 
    
         
            +
                def managed?
         
     | 
| 
      
 189 
     | 
    
         
            +
                  !!options.fetch(:managed, true)
         
     | 
| 
      
 190 
     | 
    
         
            +
                end
         
     | 
| 
      
 191 
     | 
    
         
            +
             
     | 
| 
      
 192 
     | 
    
         
            +
                def version_file
         
     | 
| 
      
 193 
     | 
    
         
            +
                  options.fetch(:version_file, File.join(tika_dir, "VERSION"))
         
     | 
| 
      
 194 
     | 
    
         
            +
                end
         
     | 
| 
      
 195 
     | 
    
         
            +
             
     | 
| 
      
 196 
     | 
    
         
            +
                def expected_md5sum
         
     | 
| 
      
 197 
     | 
    
         
            +
                  @md5sum ||= options.fetch(:md5sum, open(md5file).read.split(" ").first)
         
     | 
| 
      
 198 
     | 
    
         
            +
                end
         
     | 
| 
      
 199 
     | 
    
         
            +
             
     | 
| 
      
 200 
     | 
    
         
            +
                def tika_binary
         
     | 
| 
      
 201 
     | 
    
         
            +
                  download_path
         
     | 
| 
      
 202 
     | 
    
         
            +
                end
         
     | 
| 
      
 203 
     | 
    
         
            +
             
     | 
| 
      
 204 
     | 
    
         
            +
                def md5sum_path
         
     | 
| 
      
 205 
     | 
    
         
            +
                  File.join(Dir.tmpdir, File.basename(md5url))
         
     | 
| 
      
 206 
     | 
    
         
            +
                end
         
     | 
| 
      
 207 
     | 
    
         
            +
             
     | 
| 
      
 208 
     | 
    
         
            +
                def tmp_save_dir
         
     | 
| 
      
 209 
     | 
    
         
            +
                  @tmp_save_dir ||= Dir.mktmpdir
         
     | 
| 
      
 210 
     | 
    
         
            +
                end
         
     | 
| 
      
 211 
     | 
    
         
            +
             
     | 
| 
      
 212 
     | 
    
         
            +
                def fetch_with_progressbar(url, output)
         
     | 
| 
      
 213 
     | 
    
         
            +
                  pbar = ProgressBar.create(title: File.basename(url), total: nil, format: "%t: |%B| %p%% (%e )")
         
     | 
| 
      
 214 
     | 
    
         
            +
                  open(url, content_length_proc: lambda do|t|
         
     | 
| 
      
 215 
     | 
    
         
            +
                    if t && 0 < t
         
     | 
| 
      
 216 
     | 
    
         
            +
                      pbar.total = t
         
     | 
| 
      
 217 
     | 
    
         
            +
                    end
         
     | 
| 
      
 218 
     | 
    
         
            +
                  end,
         
     | 
| 
      
 219 
     | 
    
         
            +
                            progress_proc: lambda do|s|
         
     | 
| 
      
 220 
     | 
    
         
            +
                              pbar.progress = s
         
     | 
| 
      
 221 
     | 
    
         
            +
                            end) do |io|
         
     | 
| 
      
 222 
     | 
    
         
            +
                    IO.copy_stream(io, output)
         
     | 
| 
      
 223 
     | 
    
         
            +
                  end
         
     | 
| 
      
 224 
     | 
    
         
            +
                end
         
     | 
| 
      
 225 
     | 
    
         
            +
             
     | 
| 
      
 226 
     | 
    
         
            +
                def md5file
         
     | 
| 
      
 227 
     | 
    
         
            +
                  unless File.exists? md5sum_path
         
     | 
| 
      
 228 
     | 
    
         
            +
                    fetch_with_progressbar md5url, md5sum_path
         
     | 
| 
      
 229 
     | 
    
         
            +
                  end
         
     | 
| 
      
 230 
     | 
    
         
            +
             
     | 
| 
      
 231 
     | 
    
         
            +
                  md5sum_path
         
     | 
| 
      
 232 
     | 
    
         
            +
                end
         
     | 
| 
      
 233 
     | 
    
         
            +
              end
         
     | 
| 
      
 234 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,15 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'spec_helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            describe TikaWrapper do
         
     | 
| 
      
 4 
     | 
    
         
            +
              describe ".wrap" do
         
     | 
| 
      
 5 
     | 
    
         
            +
                it "should launch tika" do
         
     | 
| 
      
 6 
     | 
    
         
            +
                  TikaWrapper.wrap do |tika|
         
     | 
| 
      
 7 
     | 
    
         
            +
                    expect do
         
     | 
| 
      
 8 
     | 
    
         
            +
                      Timeout::timeout(15) do
         
     | 
| 
      
 9 
     | 
    
         
            +
                        TCPSocket.new('127.0.0.1', tika.port).close
         
     | 
| 
      
 10 
     | 
    
         
            +
                      end
         
     | 
| 
      
 11 
     | 
    
         
            +
                    end.not_to raise_exception
         
     | 
| 
      
 12 
     | 
    
         
            +
                  end
         
     | 
| 
      
 13 
     | 
    
         
            +
                end
         
     | 
| 
      
 14 
     | 
    
         
            +
              end
         
     | 
| 
      
 15 
     | 
    
         
            +
            end
         
     | 
    
        data/spec/spec_helper.rb
    ADDED
    
    
| 
         @@ -0,0 +1,28 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # coding: utf-8
         
     | 
| 
      
 2 
     | 
    
         
            +
            lib = File.expand_path('../lib', __FILE__)
         
     | 
| 
      
 3 
     | 
    
         
            +
            $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'tika_wrapper/version'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            Gem::Specification.new do |spec|
         
     | 
| 
      
 7 
     | 
    
         
            +
              spec.name          = "tika_wrapper"
         
     | 
| 
      
 8 
     | 
    
         
            +
              spec.version       = TikaWrapper::VERSION
         
     | 
| 
      
 9 
     | 
    
         
            +
              spec.authors       = ["Chris Beer"]
         
     | 
| 
      
 10 
     | 
    
         
            +
              spec.email         = ["chris@cbeer.info"]
         
     | 
| 
      
 11 
     | 
    
         
            +
              spec.summary       = %q{Tika service wrapper}
         
     | 
| 
      
 12 
     | 
    
         
            +
              spec.homepage      = "https://github.com/cbeer/tika_wrapper"
         
     | 
| 
      
 13 
     | 
    
         
            +
              spec.license       = "MIT"
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
              spec.files         = `git ls-files -z`.split("\x0")
         
     | 
| 
      
 16 
     | 
    
         
            +
              spec.bindir        = 'exe'
         
     | 
| 
      
 17 
     | 
    
         
            +
              spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
         
     | 
| 
      
 18 
     | 
    
         
            +
              spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
         
     | 
| 
      
 19 
     | 
    
         
            +
              spec.require_paths = ["lib"]
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
              spec.add_dependency "ruby-progressbar"
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
              spec.add_development_dependency "bundler", "~> 1.7"
         
     | 
| 
      
 24 
     | 
    
         
            +
              spec.add_development_dependency "rake", "~> 10.0"
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
              spec.add_development_dependency "rspec"
         
     | 
| 
      
 27 
     | 
    
         
            +
              spec.add_development_dependency "coveralls"
         
     | 
| 
      
 28 
     | 
    
         
            +
            end
         
     | 
    
        metadata
    ADDED
    
    | 
         @@ -0,0 +1,136 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            --- !ruby/object:Gem::Specification
         
     | 
| 
      
 2 
     | 
    
         
            +
            name: tika_wrapper
         
     | 
| 
      
 3 
     | 
    
         
            +
            version: !ruby/object:Gem::Version
         
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.0.1
         
     | 
| 
      
 5 
     | 
    
         
            +
            platform: ruby
         
     | 
| 
      
 6 
     | 
    
         
            +
            authors:
         
     | 
| 
      
 7 
     | 
    
         
            +
            - Chris Beer
         
     | 
| 
      
 8 
     | 
    
         
            +
            autorequire: 
         
     | 
| 
      
 9 
     | 
    
         
            +
            bindir: exe
         
     | 
| 
      
 10 
     | 
    
         
            +
            cert_chain: []
         
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2015-06-03 00:00:00.000000000 Z
         
     | 
| 
      
 12 
     | 
    
         
            +
            dependencies:
         
     | 
| 
      
 13 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 14 
     | 
    
         
            +
              name: ruby-progressbar
         
     | 
| 
      
 15 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 16 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 17 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 18 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 19 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 20 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 21 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 22 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 23 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 24 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 25 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 26 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 27 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 28 
     | 
    
         
            +
              name: bundler
         
     | 
| 
      
 29 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 30 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 31 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 32 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 33 
     | 
    
         
            +
                    version: '1.7'
         
     | 
| 
      
 34 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 35 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 36 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 37 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 38 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 39 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 40 
     | 
    
         
            +
                    version: '1.7'
         
     | 
| 
      
 41 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 42 
     | 
    
         
            +
              name: rake
         
     | 
| 
      
 43 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 44 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 45 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 46 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 47 
     | 
    
         
            +
                    version: '10.0'
         
     | 
| 
      
 48 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 49 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 50 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 51 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 52 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 53 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 54 
     | 
    
         
            +
                    version: '10.0'
         
     | 
| 
      
 55 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 56 
     | 
    
         
            +
              name: rspec
         
     | 
| 
      
 57 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 58 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 59 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 60 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 61 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 62 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 63 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 64 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 65 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 66 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 67 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 68 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 69 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 70 
     | 
    
         
            +
              name: coveralls
         
     | 
| 
      
 71 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 72 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 73 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 74 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 75 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 76 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 77 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 78 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 79 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 80 
     | 
    
         
            +
                - - ">="
         
     | 
| 
      
 81 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 82 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 83 
     | 
    
         
            +
            description: 
         
     | 
| 
      
 84 
     | 
    
         
            +
            email:
         
     | 
| 
      
 85 
     | 
    
         
            +
            - chris@cbeer.info
         
     | 
| 
      
 86 
     | 
    
         
            +
            executables:
         
     | 
| 
      
 87 
     | 
    
         
            +
            - tika_wrapper
         
     | 
| 
      
 88 
     | 
    
         
            +
            extensions: []
         
     | 
| 
      
 89 
     | 
    
         
            +
            extra_rdoc_files: []
         
     | 
| 
      
 90 
     | 
    
         
            +
            files:
         
     | 
| 
      
 91 
     | 
    
         
            +
            - ".gitignore"
         
     | 
| 
      
 92 
     | 
    
         
            +
            - ".rubocop.yml"
         
     | 
| 
      
 93 
     | 
    
         
            +
            - ".rubocop_hound.yml"
         
     | 
| 
      
 94 
     | 
    
         
            +
            - ".rubocop_todo.yml"
         
     | 
| 
      
 95 
     | 
    
         
            +
            - ".travis.yml"
         
     | 
| 
      
 96 
     | 
    
         
            +
            - Gemfile
         
     | 
| 
      
 97 
     | 
    
         
            +
            - LICENSE
         
     | 
| 
      
 98 
     | 
    
         
            +
            - README.md
         
     | 
| 
      
 99 
     | 
    
         
            +
            - Rakefile
         
     | 
| 
      
 100 
     | 
    
         
            +
            - bin/console
         
     | 
| 
      
 101 
     | 
    
         
            +
            - bin/setup
         
     | 
| 
      
 102 
     | 
    
         
            +
            - coveralls.yml
         
     | 
| 
      
 103 
     | 
    
         
            +
            - exe/tika_wrapper
         
     | 
| 
      
 104 
     | 
    
         
            +
            - lib/tika_wrapper.rb
         
     | 
| 
      
 105 
     | 
    
         
            +
            - lib/tika_wrapper/instance.rb
         
     | 
| 
      
 106 
     | 
    
         
            +
            - lib/tika_wrapper/version.rb
         
     | 
| 
      
 107 
     | 
    
         
            +
            - spec/lib/tika_wrapper_spec.rb
         
     | 
| 
      
 108 
     | 
    
         
            +
            - spec/spec_helper.rb
         
     | 
| 
      
 109 
     | 
    
         
            +
            - tika_wrapper.gemspec
         
     | 
| 
      
 110 
     | 
    
         
            +
            homepage: https://github.com/cbeer/tika_wrapper
         
     | 
| 
      
 111 
     | 
    
         
            +
            licenses:
         
     | 
| 
      
 112 
     | 
    
         
            +
            - MIT
         
     | 
| 
      
 113 
     | 
    
         
            +
            metadata: {}
         
     | 
| 
      
 114 
     | 
    
         
            +
            post_install_message: 
         
     | 
| 
      
 115 
     | 
    
         
            +
            rdoc_options: []
         
     | 
| 
      
 116 
     | 
    
         
            +
            require_paths:
         
     | 
| 
      
 117 
     | 
    
         
            +
            - lib
         
     | 
| 
      
 118 
     | 
    
         
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 119 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 120 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 121 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 122 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 123 
     | 
    
         
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 124 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 125 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 126 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 127 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 128 
     | 
    
         
            +
            requirements: []
         
     | 
| 
      
 129 
     | 
    
         
            +
            rubyforge_project: 
         
     | 
| 
      
 130 
     | 
    
         
            +
            rubygems_version: 2.4.5
         
     | 
| 
      
 131 
     | 
    
         
            +
            signing_key: 
         
     | 
| 
      
 132 
     | 
    
         
            +
            specification_version: 4
         
     | 
| 
      
 133 
     | 
    
         
            +
            summary: Tika service wrapper
         
     | 
| 
      
 134 
     | 
    
         
            +
            test_files:
         
     | 
| 
      
 135 
     | 
    
         
            +
            - spec/lib/tika_wrapper_spec.rb
         
     | 
| 
      
 136 
     | 
    
         
            +
            - spec/spec_helper.rb
         
     |