ruby_tika_app 1.8.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bbd4069c575a64c475bf64daa5529fb6d2d552ddd5282791708de6344bbe2755
4
- data.tar.gz: b4254a9b725fa8a320fbe6a90d1c41077d972a181fb1f3a90447061fa43be9f6
3
+ metadata.gz: f0427b0e689b3e45dfb50a6e97819438c306606f7b889e60da80752c21d72b22
4
+ data.tar.gz: c8cd7c7eee7e1159f550873351cbfb89ce35f342d4a81b21831c76baecb3e64a
5
5
  SHA512:
6
- metadata.gz: f87262cb29711262b8ceef0ec55a4dd6db9a964999190734242efa00f92e4847a7ab72e1fae3e53318f93fb08b82f40dbd345f2a79021e4839a861f59878a970
7
- data.tar.gz: cc425ee26c3a8f7879ae58b2872704e15bf589961b22d40c73bd43caea0b1720a589670c5897dee03acb707dbe067a7b97587bb0ce48818c541282f604b3b8da
6
+ metadata.gz: 1c2fd6d9c927085051b5bda116d337869289a1a8616a0ce445c0914c4c223ca54f38726144dbad522fe7eb2e43d6eb3c7c520a6324b014c37eac41ecb91b4973
7
+ data.tar.gz: bcf71992f5372c8594a452fedce88f0d22f435104f7b63477ec49beaa62e9b0ec50bb73ee6939942cb84a963add608f64a80bbc294b17cc4f024232d3b24d901
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'http://rubygems.org'
2
4
 
3
5
  # Specify your gem's dependencies in ruby_tika_app.gemspec
data/HISTORY CHANGED
@@ -1,3 +1,6 @@
1
+ 1.9.0 - February 4, 2020
2
+ * Bumped tika to 1.23
3
+
1
4
  1.0.1 - May 8, 2013
2
5
  * Fixed issue where URLs were not being parsed.
3
6
 
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2011-2018 Chris Parker
1
+ Copyright (c) 2011-2020 Chris Parker
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining
4
4
  a copy of this software and associated documentation files (the
data/README.md CHANGED
@@ -6,7 +6,7 @@ This is a simple frontend to the Java Tika parser command line jar / app.
6
6
 
7
7
  It is the same as running:
8
8
 
9
- java -server -Djava.awt.headless=true -jar tika-app-0.10.jar FileToParse.pdf
9
+ java -server -Djava.awt.headless=true -Dfile.encoding=UTF-8 -jar tika-app-1.23.jar FileToParse.pdf
10
10
 
11
11
  with options like --xml, --text, etc.
12
12
 
data/Rakefile CHANGED
@@ -1 +1,3 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'bundler/gem_tasks'
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Based on the rake remote task code
2
4
 
3
5
  require 'rubygems'
@@ -5,6 +7,8 @@ require 'stringio'
5
7
  require 'open4'
6
8
 
7
9
  class RubyTikaApp
10
+ TIKA_APP_VERSION = '1.23'
11
+
8
12
  class Error < RuntimeError; end
9
13
 
10
14
  class CommandFailedError < Error
@@ -22,9 +26,9 @@ class RubyTikaApp
22
26
  end
23
27
 
24
28
  java_cmd = 'java'
25
- java_args = '-server -Djava.awt.headless=true'
29
+ java_args = '-server -Djava.awt.headless=true -Dfile.encoding=UTF-8'
26
30
  ext_dir = File.join(File.dirname(__FILE__))
27
- tika_path = "#{ext_dir}/../ext/tika-app-1.19.1.jar"
31
+ tika_path = "#{ext_dir}/../ext/tika-app-#{TIKA_APP_VERSION}.jar"
28
32
  tika_config_path = "#{ext_dir}/../ext/tika-config.xml"
29
33
 
30
34
  @tika_cmd = "#{java_cmd} #{java_args} -jar '#{tika_path}' --config='#{tika_config_path}'"
@@ -38,7 +42,7 @@ class RubyTikaApp
38
42
  run_tika('--html')
39
43
  end
40
44
 
41
- def to_json
45
+ def to_json(*_args)
42
46
  run_tika('--json')
43
47
  end
44
48
 
@@ -64,7 +68,7 @@ class RubyTikaApp
64
68
  stdout_result = stdout.read.strip
65
69
  stderr_result = stderr.read.strip
66
70
 
67
- unless strip_stderr(stderr_result).empty?
71
+ if stdout_result.empty? && !stderr_result.empty?
68
72
  raise(CommandFailedError.new(stderr_result),
69
73
  "execution failed with status #{stderr_result}: #{final_cmd}")
70
74
  end
@@ -75,12 +79,4 @@ class RubyTikaApp
75
79
  stdout.close
76
80
  stderr.close
77
81
  end
78
-
79
- def strip_stderr(err)
80
- err
81
- .gsub(/^(info|warn) - .*$/i, '')
82
- .strip
83
- .gsub(/Picked up JAVA_TOOL_OPTIONS: .+ -Dfile.encoding=UTF-8/i, '')
84
- .strip
85
- end
86
82
  end
@@ -1,8 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  $LOAD_PATH.push File.expand_path('lib', __dir__)
2
4
 
3
5
  Gem::Specification.new do |s|
4
6
  s.name = 'ruby_tika_app'
5
- s.version = '1.8.0'
7
+ s.version = '1.9.0'
6
8
  s.platform = Gem::Platform::RUBY
7
9
  s.authors = ['Chris Parker']
8
10
  s.email = %w[mrcsparker@gmail.com]
@@ -10,8 +12,6 @@ Gem::Specification.new do |s|
10
12
  s.summary = 'Wrapper around the tika-app jar'
11
13
  s.description = 'Wrapper around the tika-app jar'
12
14
 
13
- s.rubyforge_project = 'ruby_tika_app'
14
-
15
15
  s.files = `git ls-files`.split("\n") +
16
16
  %w[LICENSE README.md HISTORY]
17
17
  s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
@@ -25,7 +25,7 @@ Gem::Specification.new do |s|
25
25
  s.add_development_dependency('pry')
26
26
  s.add_development_dependency('rack')
27
27
  s.add_development_dependency('rake')
28
- s.add_development_dependency('rspec', '~> 3.8.0')
28
+ s.add_development_dependency('rspec', '~> 3.9.0')
29
29
  s.add_development_dependency('simplecov')
30
30
  s.add_development_dependency('thin')
31
31
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'spec_helper'
2
4
 
3
5
  describe RubyTikaApp do
@@ -19,6 +21,15 @@ describe RubyTikaApp do
19
21
  end
20
22
  end
21
23
 
24
+ describe 'CommandFailedError' do
25
+ it 'is raised correctly' do
26
+ expect do
27
+ rta = RubyTikaApp.new('/file_not_found.pdf')
28
+ rta.to_text
29
+ end.to raise_error(RubyTikaApp::CommandFailedError)
30
+ end
31
+ end
32
+
22
33
  describe '#to_xml' do
23
34
  it 'header' do
24
35
  rta = RubyTikaApp.new(@test_file)
@@ -31,7 +42,7 @@ describe RubyTikaApp do
31
42
 
32
43
  xml_size = xml.size / 2
33
44
 
34
- expect(xml[xml_size..(xml_size + 100)]).to eq("ph\nG. This methodology is also used in Frontier Sampling (FS).\nSince this is the only difference betw")
45
+ expect(xml[xml_size..(xml_size + 100)]).to eq("dology is also used in Frontier Sampling (FS).\nSince this is the only difference between MHRW and USD")
35
46
  end
36
47
  end
37
48
 
@@ -43,7 +54,7 @@ describe RubyTikaApp do
43
54
 
44
55
  it 'middle' do
45
56
  rta = RubyTikaApp.new(@test_file)
46
- expect(rta.to_html[1000...1100]).to eq("Z\"/>\n<meta name=\"meta:save-date\" content=\"2011-03-29T13:00:16Z\"/>\n<meta name=\"pdf:encrypted\" content")
57
+ expect(rta.to_html[1000...1100]).to eq("nfo:modified\" content=\"2011-03-29T13:00:16Z\"/>\n<meta name=\"meta:save-date\" content=\"2011-03-29T13:00")
47
58
  end
48
59
  end
49
60
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'simplecov'
2
4
  SimpleCov.start
3
5
 
@@ -8,11 +10,11 @@ require 'ruby_tika_app'
8
10
  require 'rspec'
9
11
 
10
12
  # Include all files under spec/support
11
- Dir['./spec/support/**/*.rb'].each { |f| require f }
13
+ Dir['./spec/support/**/*.rb'].sort.each { |f| require f }
12
14
 
13
15
  # Start a local rack server to serve up test pages.
14
16
  @server_thread = Thread.new do
15
- Rack::Handler::Thin.run MyApp::Test::Server.new, Port: 9299
17
+ Rack::Handler::Thin.run(MyApp::Test::Server.new, Port: 9299, Host: '127.0.0.1')
16
18
  end
17
19
 
18
20
  sleep(1) # wait a sec for the server to be booted
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rubygems'
2
4
  require 'rack'
3
5
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_tika_app
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.8.0
4
+ version: 1.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Parker
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-11 00:00:00.000000000 Z
11
+ date: 2020-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: open4
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 3.8.0
103
+ version: 3.9.0
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 3.8.0
110
+ version: 3.9.0
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: simplecov
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -150,7 +150,7 @@ files:
150
150
  - LICENSE
151
151
  - README.md
152
152
  - Rakefile
153
- - ext/tika-app-1.19.1.jar
153
+ - ext/tika-app-1.23.jar
154
154
  - ext/tika-config.xml
155
155
  - lib/ruby_tika_app.rb
156
156
  - ruby_tika_app.gemspec
@@ -178,15 +178,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
178
178
  - !ruby/object:Gem::Version
179
179
  version: '0'
180
180
  requirements: []
181
- rubyforge_project: ruby_tika_app
182
- rubygems_version: 2.7.7
181
+ rubygems_version: 3.1.2
183
182
  signing_key:
184
183
  specification_version: 4
185
184
  summary: Wrapper around the tika-app jar
186
185
  test_files:
187
- - spec/ruby_tika_app_spec.rb
188
- - spec/docs/graph sampling simplex - 11.pdf
186
+ - spec/spec_helper.rb
189
187
  - spec/docs/cnn.com
190
188
  - spec/docs/news.ycombinator.com
189
+ - spec/docs/graph sampling simplex - 11.pdf
191
190
  - spec/support/test_server.rb
192
- - spec/spec_helper.rb
191
+ - spec/ruby_tika_app_spec.rb