ruby_tika_app 0.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/.rspec +2 -1
- data/Gemfile +1 -1
- data/Rakefile +1 -1
- data/lib/ruby_tika_app.rb +9 -9
- data/ruby_tika_app.gemspec +13 -11
- data/spec/docs/graph_sampling_simplex11.pdf b/data/spec/docs/graph sampling simplex - → 11.pdf +0 -0
- data/spec/ruby_tika_app_spec.rb +31 -23
- data/spec/spec_helper.rb +3 -0
- metadata +39 -7
data/.gitignore
CHANGED
data/.rspec
CHANGED
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -1 +1 @@
|
|
1
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
data/lib/ruby_tika_app.rb
CHANGED
@@ -10,20 +10,19 @@ class RubyTikaApp
|
|
10
10
|
|
11
11
|
class CommandFailedError < Error
|
12
12
|
attr_reader :status
|
13
|
-
def initialize
|
13
|
+
def initialize(status)
|
14
14
|
@status = status
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
18
18
|
def initialize(document)
|
19
|
-
|
20
|
-
@document = document
|
19
|
+
@document = "file://#{document}"
|
21
20
|
|
22
21
|
java_cmd = 'java'
|
23
22
|
java_args = '-server -Djava.awt.headless=true'
|
24
23
|
tika_path = "#{File.join(File.dirname(__FILE__))}/../ext/tika-app-1.2.jar"
|
25
24
|
|
26
|
-
@tika_cmd = "#{java_cmd} #{java_args} -jar #{tika_path}"
|
25
|
+
@tika_cmd = "#{java_cmd} #{java_args} -jar '#{tika_path}'"
|
27
26
|
end
|
28
27
|
|
29
28
|
def to_xml
|
@@ -53,17 +52,14 @@ class RubyTikaApp
|
|
53
52
|
private
|
54
53
|
|
55
54
|
def run_tika(option)
|
56
|
-
|
57
|
-
final_cmd = "#{@tika_cmd} #{option} #{@document}"
|
58
|
-
result = []
|
59
|
-
|
55
|
+
final_cmd = "#{@tika_cmd} #{option} '#{@document}'"
|
60
56
|
|
61
57
|
pid, stdin, stdout, stderr = Open4::popen4(final_cmd)
|
62
58
|
|
63
59
|
stdout_result = stdout.read.strip
|
64
60
|
stderr_result = stderr.read.strip
|
65
61
|
|
66
|
-
unless stderr_result.
|
62
|
+
unless strip_stderr(stderr_result).empty?
|
67
63
|
raise(CommandFailedError.new(stderr_result),
|
68
64
|
"execution failed with status #{stderr_result}: #{final_cmd}")
|
69
65
|
end
|
@@ -75,4 +71,8 @@ class RubyTikaApp
|
|
75
71
|
stderr.close
|
76
72
|
end
|
77
73
|
|
74
|
+
def strip_stderr(s)
|
75
|
+
s.gsub(/^(info|warn) - .*$/i, '').strip
|
76
|
+
end
|
77
|
+
|
78
78
|
end
|
data/ruby_tika_app.gemspec
CHANGED
@@ -1,27 +1,29 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
-
$:.push File.expand_path(
|
2
|
+
$:.push File.expand_path('../lib', __FILE__)
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
|
-
s.name =
|
6
|
-
s.version =
|
5
|
+
s.name = 'ruby_tika_app'
|
6
|
+
s.version = '1.0.0'
|
7
7
|
s.platform = Gem::Platform::RUBY
|
8
|
-
s.authors = [
|
9
|
-
s.email =
|
10
|
-
s.homepage =
|
8
|
+
s.authors = ['Chris Parker']
|
9
|
+
s.email = %w(mrcsparker@gmail.com)
|
10
|
+
s.homepage = 'https://github.com/mrcsparker/ruby_tika_app'
|
11
11
|
s.summary = %q{Wrapper around the tika-app jar}
|
12
12
|
s.description = %q{Wrapper around the tika-app jar}
|
13
13
|
|
14
|
-
s.rubyforge_project =
|
14
|
+
s.rubyforge_project = 'ruby_tika_app'
|
15
15
|
|
16
16
|
s.files = `git ls-files`.split("\n") +
|
17
17
|
%w(LICENSE README.textile HISTORY)
|
18
18
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
19
19
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
-
s.require_paths =
|
20
|
+
s.require_paths = %w(lib)
|
21
21
|
s.test_files = Dir.glob('spec/**/*')
|
22
22
|
|
23
|
-
s.add_runtime_dependency(
|
23
|
+
s.add_runtime_dependency('open4')
|
24
24
|
|
25
|
-
s.add_development_dependency(
|
26
|
-
s.add_development_dependency(
|
25
|
+
s.add_development_dependency('rake')
|
26
|
+
s.add_development_dependency('rspec', '~> 2.13.0')
|
27
|
+
s.add_development_dependency('bundler', '>= 1.0.15')
|
28
|
+
s.add_development_dependency('simplecov')
|
27
29
|
end
|
data/spec/docs/graph_sampling_simplex11.pdf b/data/spec/docs/graph sampling simplex - → 11.pdf
RENAMED
File without changes
|
data/spec/ruby_tika_app_spec.rb
CHANGED
@@ -3,84 +3,92 @@ require 'spec_helper'
|
|
3
3
|
describe RubyTikaApp do
|
4
4
|
|
5
5
|
before(:each) do
|
6
|
-
@test_file = "#{File.join(File.dirname(__FILE__))}/docs/
|
6
|
+
@test_file = "#{File.join(File.dirname(__FILE__))}/docs/graph sampling simplex - 11.pdf"
|
7
7
|
end
|
8
8
|
|
9
|
-
describe
|
10
|
-
it
|
9
|
+
describe 'Error' do
|
10
|
+
it 'has an error' do
|
11
|
+
expect {
|
12
|
+
rta = RubyTikaApp.new('No file')
|
13
|
+
rta.to_xml
|
14
|
+
}.to raise_error
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe '#to_xml' do
|
19
|
+
it 'header' do
|
11
20
|
rta = RubyTikaApp.new(@test_file)
|
12
21
|
rta.to_xml[0..37].should == "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
|
13
22
|
end
|
14
23
|
|
15
|
-
it
|
24
|
+
it 'middle' do
|
16
25
|
rta = RubyTikaApp.new(@test_file)
|
17
26
|
xml = rta.to_xml
|
18
27
|
|
19
28
|
xml_size = xml.size / 2
|
20
29
|
|
21
|
-
xml[xml_size..(xml_size + 100)].should == "(Section IV). Besides,\nMHRW performs better in well connected graphs than in\nloosely connected
|
30
|
+
xml[xml_size..(xml_size + 100)].should == "S (Section IV). Besides,\nMHRW performs better in well connected graphs than in\nloosely connected grap"
|
22
31
|
end
|
23
32
|
end
|
24
33
|
|
25
|
-
describe
|
26
|
-
it
|
34
|
+
describe '#to_html' do
|
35
|
+
it 'header' do
|
27
36
|
rta = RubyTikaApp.new(@test_file)
|
28
37
|
rta.to_html[0..42].should == "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
|
29
38
|
end
|
30
39
|
|
31
|
-
it
|
40
|
+
it 'middle' do
|
32
41
|
rta = RubyTikaApp.new(@test_file)
|
33
|
-
rta.to_html[1000 ... 1100].should == "rceName\" content=\"
|
42
|
+
rta.to_html[1000 ... 1100].should == "rceName\" content=\"graph sampling simplex - 11.pdf\"/>\n<meta name=\"Last-Save-Date\" content=\"2011-03-29"
|
34
43
|
end
|
35
44
|
end
|
36
45
|
|
37
|
-
describe
|
38
|
-
it
|
46
|
+
describe '#to_json' do
|
47
|
+
it 'header' do
|
39
48
|
rta = RubyTikaApp.new(@test_file)
|
40
49
|
rta.to_json[0..42].should == "{ \"Application\":\"\\u0027Certified by IEEE PD"
|
41
50
|
end
|
42
51
|
|
43
|
-
it
|
52
|
+
it 'middle' do
|
44
53
|
rta = RubyTikaApp.new(@test_file)
|
45
54
|
rta.to_json[100 ... 150].should == "h\":171510, \n\"Content-Type\":\"application/pdf\", \n\"Cr"
|
46
55
|
end
|
47
56
|
end
|
48
57
|
|
49
|
-
describe
|
50
|
-
it
|
58
|
+
describe '#to_text' do
|
59
|
+
it 'header' do
|
51
60
|
rta = RubyTikaApp.new(@test_file)
|
52
61
|
rta.to_text[0..42].should == "Understanding Graph Sampling Algorithms\nfor"
|
53
62
|
end
|
54
63
|
|
55
|
-
it
|
64
|
+
it 'middle' do
|
56
65
|
rta = RubyTikaApp.new(@test_file)
|
57
66
|
rta.to_text[100 ... 150].should == "n Zhang3, Tianyin Xu2\n\nLong Jin1, Pan Hui4, Beixin"
|
58
67
|
end
|
59
68
|
end
|
60
69
|
|
61
|
-
describe
|
62
|
-
it
|
70
|
+
describe '#to_text_main' do
|
71
|
+
it 'header' do
|
63
72
|
rta = RubyTikaApp.new(@test_file)
|
64
|
-
rta.to_text_main[0..42].should ==
|
73
|
+
rta.to_text_main[0..42].should == 'Understanding Graph Sampling Algorithms for'
|
65
74
|
end
|
66
75
|
|
67
|
-
it
|
76
|
+
it 'middle' do
|
68
77
|
rta = RubyTikaApp.new(@test_file)
|
69
78
|
rta.to_text_main[100 ... 150].should == "n Zhang3, Tianyin Xu2\nLong Jin1, Pan Hui4, Beixing"
|
70
79
|
end
|
71
80
|
end
|
72
81
|
|
73
|
-
describe
|
74
|
-
it
|
82
|
+
describe '#to_metadata' do
|
83
|
+
it 'header' do
|
75
84
|
rta = RubyTikaApp.new(@test_file)
|
76
85
|
rta.to_metadata[0..42].should == "Application: 'Certified by IEEE PDFeXpress "
|
77
86
|
end
|
78
87
|
|
79
|
-
it
|
88
|
+
it 'middle' do
|
80
89
|
rta = RubyTikaApp.new(@test_file)
|
81
90
|
rta.to_metadata[100 ... 150].should == "Type: application/pdf\nCreation-Date: 2011-03-29T12"
|
82
91
|
end
|
83
|
-
|
84
92
|
end
|
85
93
|
|
86
94
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby_tika_app
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-03-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: open4
|
@@ -27,6 +27,22 @@ dependencies:
|
|
27
27
|
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rake
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
30
46
|
- !ruby/object:Gem::Dependency
|
31
47
|
name: rspec
|
32
48
|
requirement: !ruby/object:Gem::Requirement
|
@@ -34,7 +50,7 @@ dependencies:
|
|
34
50
|
requirements:
|
35
51
|
- - ~>
|
36
52
|
- !ruby/object:Gem::Version
|
37
|
-
version: 2.
|
53
|
+
version: 2.13.0
|
38
54
|
type: :development
|
39
55
|
prerelease: false
|
40
56
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -42,7 +58,7 @@ dependencies:
|
|
42
58
|
requirements:
|
43
59
|
- - ~>
|
44
60
|
- !ruby/object:Gem::Version
|
45
|
-
version: 2.
|
61
|
+
version: 2.13.0
|
46
62
|
- !ruby/object:Gem::Dependency
|
47
63
|
name: bundler
|
48
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -59,6 +75,22 @@ dependencies:
|
|
59
75
|
- - ! '>='
|
60
76
|
- !ruby/object:Gem::Version
|
61
77
|
version: 1.0.15
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: simplecov
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
62
94
|
description: Wrapper around the tika-app jar
|
63
95
|
email:
|
64
96
|
- mrcsparker@gmail.com
|
@@ -76,7 +108,7 @@ files:
|
|
76
108
|
- ext/tika-app-1.2.jar
|
77
109
|
- lib/ruby_tika_app.rb
|
78
110
|
- ruby_tika_app.gemspec
|
79
|
-
- spec/docs/
|
111
|
+
- spec/docs/graph sampling simplex - 11.pdf
|
80
112
|
- spec/ruby_tika_app_spec.rb
|
81
113
|
- spec/spec_helper.rb
|
82
114
|
homepage: https://github.com/mrcsparker/ruby_tika_app
|
@@ -99,11 +131,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
131
|
version: '0'
|
100
132
|
requirements: []
|
101
133
|
rubyforge_project: ruby_tika_app
|
102
|
-
rubygems_version: 1.8.
|
134
|
+
rubygems_version: 1.8.25
|
103
135
|
signing_key:
|
104
136
|
specification_version: 3
|
105
137
|
summary: Wrapper around the tika-app jar
|
106
138
|
test_files:
|
107
|
-
- spec/docs/
|
139
|
+
- spec/docs/graph sampling simplex - 11.pdf
|
108
140
|
- spec/ruby_tika_app_spec.rb
|
109
141
|
- spec/spec_helper.rb
|