ruby_tika_app 0.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -5,3 +5,5 @@ pkg/*
5
5
  .sublime-project
6
6
  .sublime-project.sublime-workspace
7
7
  .rbenv-version
8
+ .idea
9
+ coverage/*
data/.rspec CHANGED
@@ -1,2 +1,3 @@
1
- --color
1
+ --colour --profile
2
2
  --format documentation
3
+ --format progress
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source "http://rubygems.org"
1
+ source 'http://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in ruby_tika_app.gemspec
4
4
  gemspec
data/Rakefile CHANGED
@@ -1 +1 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
@@ -10,20 +10,19 @@ class RubyTikaApp
10
10
 
11
11
  class CommandFailedError < Error
12
12
  attr_reader :status
13
- def initialize status
13
+ def initialize(status)
14
14
  @status = status
15
15
  end
16
16
  end
17
17
 
18
18
  def initialize(document)
19
-
20
- @document = document
19
+ @document = "file://#{document}"
21
20
 
22
21
  java_cmd = 'java'
23
22
  java_args = '-server -Djava.awt.headless=true'
24
23
  tika_path = "#{File.join(File.dirname(__FILE__))}/../ext/tika-app-1.2.jar"
25
24
 
26
- @tika_cmd = "#{java_cmd} #{java_args} -jar #{tika_path}"
25
+ @tika_cmd = "#{java_cmd} #{java_args} -jar '#{tika_path}'"
27
26
  end
28
27
 
29
28
  def to_xml
@@ -53,17 +52,14 @@ class RubyTikaApp
53
52
  private
54
53
 
55
54
  def run_tika(option)
56
-
57
- final_cmd = "#{@tika_cmd} #{option} #{@document}"
58
- result = []
59
-
55
+ final_cmd = "#{@tika_cmd} #{option} '#{@document}'"
60
56
 
61
57
  pid, stdin, stdout, stderr = Open4::popen4(final_cmd)
62
58
 
63
59
  stdout_result = stdout.read.strip
64
60
  stderr_result = stderr.read.strip
65
61
 
66
- unless stderr_result.strip == "" then
62
+ unless strip_stderr(stderr_result).empty?
67
63
  raise(CommandFailedError.new(stderr_result),
68
64
  "execution failed with status #{stderr_result}: #{final_cmd}")
69
65
  end
@@ -75,4 +71,8 @@ class RubyTikaApp
75
71
  stderr.close
76
72
  end
77
73
 
74
+ def strip_stderr(s)
75
+ s.gsub(/^(info|warn) - .*$/i, '').strip
76
+ end
77
+
78
78
  end
@@ -1,27 +1,29 @@
1
1
  # -*- encoding: utf-8 -*-
2
- $:.push File.expand_path("../lib", __FILE__)
2
+ $:.push File.expand_path('../lib', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
- s.name = "ruby_tika_app"
6
- s.version = "0.3"
5
+ s.name = 'ruby_tika_app'
6
+ s.version = '1.0.0'
7
7
  s.platform = Gem::Platform::RUBY
8
- s.authors = ["Chris Parker"]
9
- s.email = ["mrcsparker@gmail.com"]
10
- s.homepage = "https://github.com/mrcsparker/ruby_tika_app"
8
+ s.authors = ['Chris Parker']
9
+ s.email = %w(mrcsparker@gmail.com)
10
+ s.homepage = 'https://github.com/mrcsparker/ruby_tika_app'
11
11
  s.summary = %q{Wrapper around the tika-app jar}
12
12
  s.description = %q{Wrapper around the tika-app jar}
13
13
 
14
- s.rubyforge_project = "ruby_tika_app"
14
+ s.rubyforge_project = 'ruby_tika_app'
15
15
 
16
16
  s.files = `git ls-files`.split("\n") +
17
17
  %w(LICENSE README.textile HISTORY)
18
18
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
19
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
- s.require_paths = ["lib"]
20
+ s.require_paths = %w(lib)
21
21
  s.test_files = Dir.glob('spec/**/*')
22
22
 
23
- s.add_runtime_dependency("open4")
23
+ s.add_runtime_dependency('open4')
24
24
 
25
- s.add_development_dependency("rspec", "~> 2.7.0")
26
- s.add_development_dependency("bundler", ">= 1.0.15")
25
+ s.add_development_dependency('rake')
26
+ s.add_development_dependency('rspec', '~> 2.13.0')
27
+ s.add_development_dependency('bundler', '>= 1.0.15')
28
+ s.add_development_dependency('simplecov')
27
29
  end
@@ -3,84 +3,92 @@ require 'spec_helper'
3
3
  describe RubyTikaApp do
4
4
 
5
5
  before(:each) do
6
- @test_file = "#{File.join(File.dirname(__FILE__))}/docs/graph_sampling_simplex11.pdf"
6
+ @test_file = "#{File.join(File.dirname(__FILE__))}/docs/graph sampling simplex - 11.pdf"
7
7
  end
8
8
 
9
- describe "#to_xml" do
10
- it "header" do
9
+ describe 'Error' do
10
+ it 'has an error' do
11
+ expect {
12
+ rta = RubyTikaApp.new('No file')
13
+ rta.to_xml
14
+ }.to raise_error
15
+ end
16
+ end
17
+
18
+ describe '#to_xml' do
19
+ it 'header' do
11
20
  rta = RubyTikaApp.new(@test_file)
12
21
  rta.to_xml[0..37].should == "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
13
22
  end
14
23
 
15
- it "middle" do
24
+ it 'middle' do
16
25
  rta = RubyTikaApp.new(@test_file)
17
26
  xml = rta.to_xml
18
27
 
19
28
  xml_size = xml.size / 2
20
29
 
21
- xml[xml_size..(xml_size + 100)].should == "(Section IV). Besides,\nMHRW performs better in well connected graphs than in\nloosely connected graphs"
30
+ xml[xml_size..(xml_size + 100)].should == "S (Section IV). Besides,\nMHRW performs better in well connected graphs than in\nloosely connected grap"
22
31
  end
23
32
  end
24
33
 
25
- describe "#to_html" do
26
- it "header" do
34
+ describe '#to_html' do
35
+ it 'header' do
27
36
  rta = RubyTikaApp.new(@test_file)
28
37
  rta.to_html[0..42].should == "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
29
38
  end
30
39
 
31
- it "middle" do
40
+ it 'middle' do
32
41
  rta = RubyTikaApp.new(@test_file)
33
- rta.to_html[1000 ... 1100].should == "rceName\" content=\"graph_sampling_simplex11.pdf\"/>\n<meta name=\"Last-Save-Date\" content=\"2011-03-29T13"
42
+ rta.to_html[1000 ... 1100].should == "rceName\" content=\"graph sampling simplex - 11.pdf\"/>\n<meta name=\"Last-Save-Date\" content=\"2011-03-29"
34
43
  end
35
44
  end
36
45
 
37
- describe "#to_json" do
38
- it "header" do
46
+ describe '#to_json' do
47
+ it 'header' do
39
48
  rta = RubyTikaApp.new(@test_file)
40
49
  rta.to_json[0..42].should == "{ \"Application\":\"\\u0027Certified by IEEE PD"
41
50
  end
42
51
 
43
- it "middle" do
52
+ it 'middle' do
44
53
  rta = RubyTikaApp.new(@test_file)
45
54
  rta.to_json[100 ... 150].should == "h\":171510, \n\"Content-Type\":\"application/pdf\", \n\"Cr"
46
55
  end
47
56
  end
48
57
 
49
- describe "#to_text" do
50
- it "header" do
58
+ describe '#to_text' do
59
+ it 'header' do
51
60
  rta = RubyTikaApp.new(@test_file)
52
61
  rta.to_text[0..42].should == "Understanding Graph Sampling Algorithms\nfor"
53
62
  end
54
63
 
55
- it "middle" do
64
+ it 'middle' do
56
65
  rta = RubyTikaApp.new(@test_file)
57
66
  rta.to_text[100 ... 150].should == "n Zhang3, Tianyin Xu2\n\nLong Jin1, Pan Hui4, Beixin"
58
67
  end
59
68
  end
60
69
 
61
- describe "#to_text_main" do
62
- it "header" do
70
+ describe '#to_text_main' do
71
+ it 'header' do
63
72
  rta = RubyTikaApp.new(@test_file)
64
- rta.to_text_main[0..42].should == "Understanding Graph Sampling Algorithms for"
73
+ rta.to_text_main[0..42].should == 'Understanding Graph Sampling Algorithms for'
65
74
  end
66
75
 
67
- it "middle" do
76
+ it 'middle' do
68
77
  rta = RubyTikaApp.new(@test_file)
69
78
  rta.to_text_main[100 ... 150].should == "n Zhang3, Tianyin Xu2\nLong Jin1, Pan Hui4, Beixing"
70
79
  end
71
80
  end
72
81
 
73
- describe "#to_metadata" do
74
- it "header" do
82
+ describe '#to_metadata' do
83
+ it 'header' do
75
84
  rta = RubyTikaApp.new(@test_file)
76
85
  rta.to_metadata[0..42].should == "Application: 'Certified by IEEE PDFeXpress "
77
86
  end
78
87
 
79
- it "middle" do
88
+ it 'middle' do
80
89
  rta = RubyTikaApp.new(@test_file)
81
90
  rta.to_metadata[100 ... 150].should == "Type: application/pdf\nCreation-Date: 2011-03-29T12"
82
91
  end
83
-
84
92
  end
85
93
 
86
94
  end
@@ -1,3 +1,6 @@
1
+ require 'simplecov'
2
+ SimpleCov.start
3
+
1
4
  require 'rubygems'
2
5
  require 'bundler/setup'
3
6
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_tika_app
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.3'
4
+ version: 1.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-29 00:00:00.000000000 Z
12
+ date: 2013-03-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: open4
@@ -27,6 +27,22 @@ dependencies:
27
27
  - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
29
  version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
30
46
  - !ruby/object:Gem::Dependency
31
47
  name: rspec
32
48
  requirement: !ruby/object:Gem::Requirement
@@ -34,7 +50,7 @@ dependencies:
34
50
  requirements:
35
51
  - - ~>
36
52
  - !ruby/object:Gem::Version
37
- version: 2.7.0
53
+ version: 2.13.0
38
54
  type: :development
39
55
  prerelease: false
40
56
  version_requirements: !ruby/object:Gem::Requirement
@@ -42,7 +58,7 @@ dependencies:
42
58
  requirements:
43
59
  - - ~>
44
60
  - !ruby/object:Gem::Version
45
- version: 2.7.0
61
+ version: 2.13.0
46
62
  - !ruby/object:Gem::Dependency
47
63
  name: bundler
48
64
  requirement: !ruby/object:Gem::Requirement
@@ -59,6 +75,22 @@ dependencies:
59
75
  - - ! '>='
60
76
  - !ruby/object:Gem::Version
61
77
  version: 1.0.15
78
+ - !ruby/object:Gem::Dependency
79
+ name: simplecov
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
62
94
  description: Wrapper around the tika-app jar
63
95
  email:
64
96
  - mrcsparker@gmail.com
@@ -76,7 +108,7 @@ files:
76
108
  - ext/tika-app-1.2.jar
77
109
  - lib/ruby_tika_app.rb
78
110
  - ruby_tika_app.gemspec
79
- - spec/docs/graph_sampling_simplex11.pdf
111
+ - spec/docs/graph sampling simplex - 11.pdf
80
112
  - spec/ruby_tika_app_spec.rb
81
113
  - spec/spec_helper.rb
82
114
  homepage: https://github.com/mrcsparker/ruby_tika_app
@@ -99,11 +131,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
99
131
  version: '0'
100
132
  requirements: []
101
133
  rubyforge_project: ruby_tika_app
102
- rubygems_version: 1.8.24
134
+ rubygems_version: 1.8.25
103
135
  signing_key:
104
136
  specification_version: 3
105
137
  summary: Wrapper around the tika-app jar
106
138
  test_files:
107
- - spec/docs/graph_sampling_simplex11.pdf
139
+ - spec/docs/graph sampling simplex - 11.pdf
108
140
  - spec/ruby_tika_app_spec.rb
109
141
  - spec/spec_helper.rb