ruby_tika_app 1.8.0 → 1.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bbd4069c575a64c475bf64daa5529fb6d2d552ddd5282791708de6344bbe2755
4
- data.tar.gz: b4254a9b725fa8a320fbe6a90d1c41077d972a181fb1f3a90447061fa43be9f6
3
+ metadata.gz: f0427b0e689b3e45dfb50a6e97819438c306606f7b889e60da80752c21d72b22
4
+ data.tar.gz: c8cd7c7eee7e1159f550873351cbfb89ce35f342d4a81b21831c76baecb3e64a
5
5
  SHA512:
6
- metadata.gz: f87262cb29711262b8ceef0ec55a4dd6db9a964999190734242efa00f92e4847a7ab72e1fae3e53318f93fb08b82f40dbd345f2a79021e4839a861f59878a970
7
- data.tar.gz: cc425ee26c3a8f7879ae58b2872704e15bf589961b22d40c73bd43caea0b1720a589670c5897dee03acb707dbe067a7b97587bb0ce48818c541282f604b3b8da
6
+ metadata.gz: 1c2fd6d9c927085051b5bda116d337869289a1a8616a0ce445c0914c4c223ca54f38726144dbad522fe7eb2e43d6eb3c7c520a6324b014c37eac41ecb91b4973
7
+ data.tar.gz: bcf71992f5372c8594a452fedce88f0d22f435104f7b63477ec49beaa62e9b0ec50bb73ee6939942cb84a963add608f64a80bbc294b17cc4f024232d3b24d901
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'http://rubygems.org'
2
4
 
3
5
  # Specify your gem's dependencies in ruby_tika_app.gemspec
data/HISTORY CHANGED
@@ -1,3 +1,6 @@
1
+ 1.9.0 - February 4, 2020
2
+ * Bumped tika to 1.23
3
+
1
4
  1.0.1 - May 8, 2013
2
5
  * Fixed issue where URLs were not being parsed.
3
6
 
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2011-2018 Chris Parker
1
+ Copyright (c) 2011-2020 Chris Parker
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -6,7 +6,7 @@ This is a simple frontend to the Java Tika parser command line jar / app.
6
6
 
7
7
  It is the same as running:
8
8
 
9
- java -server -Djava.awt.headless=true -jar tika-app-0.10.jar FileToParse.pdf
9
+ java -server -Djava.awt.headless=true -Dfile.encoding=UTF-8 -jar tika-app-1.23.jar FileToParse.pdf
10
10
 
11
11
  with options like --xml, --text, etc.
12
12
 
data/Rakefile CHANGED
@@ -1 +1,3 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'bundler/gem_tasks'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Based on the rake remote task code
2
4
 
3
5
  require 'rubygems'
@@ -5,6 +7,8 @@ require 'stringio'
5
7
  require 'open4'
6
8
 
7
9
  class RubyTikaApp
10
+ TIKA_APP_VERSION = '1.23'
11
+
8
12
  class Error < RuntimeError; end
9
13
 
10
14
  class CommandFailedError < Error
@@ -22,9 +26,9 @@ class RubyTikaApp
22
26
  end
23
27
 
24
28
  java_cmd = 'java'
25
- java_args = '-server -Djava.awt.headless=true'
29
+ java_args = '-server -Djava.awt.headless=true -Dfile.encoding=UTF-8'
26
30
  ext_dir = File.join(File.dirname(__FILE__))
27
- tika_path = "#{ext_dir}/../ext/tika-app-1.19.1.jar"
31
+ tika_path = "#{ext_dir}/../ext/tika-app-#{TIKA_APP_VERSION}.jar"
28
32
  tika_config_path = "#{ext_dir}/../ext/tika-config.xml"
29
33
 
30
34
  @tika_cmd = "#{java_cmd} #{java_args} -jar '#{tika_path}' --config='#{tika_config_path}'"
@@ -38,7 +42,7 @@ class RubyTikaApp
38
42
  run_tika('--html')
39
43
  end
40
44
 
41
- def to_json
45
+ def to_json(*_args)
42
46
  run_tika('--json')
43
47
  end
44
48
 
@@ -64,7 +68,7 @@ class RubyTikaApp
64
68
  stdout_result = stdout.read.strip
65
69
  stderr_result = stderr.read.strip
66
70
 
67
- unless strip_stderr(stderr_result).empty?
71
+ if stdout_result.empty? && !stderr_result.empty?
68
72
  raise(CommandFailedError.new(stderr_result),
69
73
  "execution failed with status #{stderr_result}: #{final_cmd}")
70
74
  end
@@ -75,12 +79,4 @@ class RubyTikaApp
75
79
  stdout.close
76
80
  stderr.close
77
81
  end
78
-
79
- def strip_stderr(err)
80
- err
81
- .gsub(/^(info|warn) - .*$/i, '')
82
- .strip
83
- .gsub(/Picked up JAVA_TOOL_OPTIONS: .+ -Dfile.encoding=UTF-8/i, '')
84
- .strip
85
- end
86
82
  end
@@ -1,8 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  $LOAD_PATH.push File.expand_path('lib', __dir__)
2
4
 
3
5
  Gem::Specification.new do |s|
4
6
  s.name = 'ruby_tika_app'
5
- s.version = '1.8.0'
7
+ s.version = '1.9.0'
6
8
  s.platform = Gem::Platform::RUBY
7
9
  s.authors = ['Chris Parker']
8
10
  s.email = %w[mrcsparker@gmail.com]
@@ -10,8 +12,6 @@ Gem::Specification.new do |s|
10
12
  s.summary = 'Wrapper around the tika-app jar'
11
13
  s.description = 'Wrapper around the tika-app jar'
12
14
 
13
- s.rubyforge_project = 'ruby_tika_app'
14
-
15
15
  s.files = `git ls-files`.split("\n") +
16
16
  %w[LICENSE README.md HISTORY]
17
17
  s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
@@ -25,7 +25,7 @@ Gem::Specification.new do |s|
25
25
  s.add_development_dependency('pry')
26
26
  s.add_development_dependency('rack')
27
27
  s.add_development_dependency('rake')
28
- s.add_development_dependency('rspec', '~> 3.8.0')
28
+ s.add_development_dependency('rspec', '~> 3.9.0')
29
29
  s.add_development_dependency('simplecov')
30
30
  s.add_development_dependency('thin')
31
31
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  describe RubyTikaApp do
@@ -19,6 +21,15 @@ describe RubyTikaApp do
19
21
  end
20
22
  end
21
23
 
24
+ describe 'CommandFailedError' do
25
+ it 'is raised correctly' do
26
+ expect do
27
+ rta = RubyTikaApp.new('/file_not_found.pdf')
28
+ rta.to_text
29
+ end.to raise_error(RubyTikaApp::CommandFailedError)
30
+ end
31
+ end
32
+
22
33
  describe '#to_xml' do
23
34
  it 'header' do
24
35
  rta = RubyTikaApp.new(@test_file)
@@ -31,7 +42,7 @@ describe RubyTikaApp do
31
42
 
32
43
  xml_size = xml.size / 2
33
44
 
34
- expect(xml[xml_size..(xml_size + 100)]).to eq("ph\nG. This methodology is also used in Frontier Sampling (FS).\nSince this is the only difference betw")
45
+ expect(xml[xml_size..(xml_size + 100)]).to eq("dology is also used in Frontier Sampling (FS).\nSince this is the only difference between MHRW and USD")
35
46
  end
36
47
  end
37
48
 
@@ -43,7 +54,7 @@ describe RubyTikaApp do
43
54
 
44
55
  it 'middle' do
45
56
  rta = RubyTikaApp.new(@test_file)
46
- expect(rta.to_html[1000...1100]).to eq("Z\"/>\n<meta name=\"meta:save-date\" content=\"2011-03-29T13:00:16Z\"/>\n<meta name=\"pdf:encrypted\" content")
57
+ expect(rta.to_html[1000...1100]).to eq("nfo:modified\" content=\"2011-03-29T13:00:16Z\"/>\n<meta name=\"meta:save-date\" content=\"2011-03-29T13:00")
47
58
  end
48
59
  end
49
60
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'simplecov'
2
4
  SimpleCov.start
3
5
 
@@ -8,11 +10,11 @@ require 'ruby_tika_app'
8
10
  require 'rspec'
9
11
 
10
12
  # Include all files under spec/support
11
- Dir['./spec/support/**/*.rb'].each { |f| require f }
13
+ Dir['./spec/support/**/*.rb'].sort.each { |f| require f }
12
14
 
13
15
  # Start a local rack server to serve up test pages.
14
16
  @server_thread = Thread.new do
15
- Rack::Handler::Thin.run MyApp::Test::Server.new, Port: 9299
17
+ Rack::Handler::Thin.run(MyApp::Test::Server.new, Port: 9299, Host: '127.0.0.1')
16
18
  end
17
19
 
18
20
  sleep(1) # wait a sec for the server to be booted
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rubygems'
2
4
  require 'rack'
3
5
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_tika_app
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.0
4
+ version: 1.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Parker
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-11 00:00:00.000000000 Z
11
+ date: 2020-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: open4
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 3.8.0
103
+ version: 3.9.0
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 3.8.0
110
+ version: 3.9.0
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: simplecov
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -150,7 +150,7 @@ files:
150
150
  - LICENSE
151
151
  - README.md
152
152
  - Rakefile
153
- - ext/tika-app-1.19.1.jar
153
+ - ext/tika-app-1.23.jar
154
154
  - ext/tika-config.xml
155
155
  - lib/ruby_tika_app.rb
156
156
  - ruby_tika_app.gemspec
@@ -178,15 +178,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
178
178
  - !ruby/object:Gem::Version
179
179
  version: '0'
180
180
  requirements: []
181
- rubyforge_project: ruby_tika_app
182
- rubygems_version: 2.7.7
181
+ rubygems_version: 3.1.2
183
182
  signing_key:
184
183
  specification_version: 4
185
184
  summary: Wrapper around the tika-app jar
186
185
  test_files:
187
- - spec/ruby_tika_app_spec.rb
188
- - spec/docs/graph sampling simplex - 11.pdf
186
+ - spec/spec_helper.rb
189
187
  - spec/docs/cnn.com
190
188
  - spec/docs/news.ycombinator.com
189
+ - spec/docs/graph sampling simplex - 11.pdf
191
190
  - spec/support/test_server.rb
192
- - spec/spec_helper.rb
191
+ - spec/ruby_tika_app_spec.rb