ruby_tika_app 0.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -5,3 +5,5 @@ pkg/*
5
5
  .sublime-project
6
6
  .sublime-project.sublime-workspace
7
7
  .rbenv-version
8
+ .idea
9
+ coverage/*
data/.rspec CHANGED
@@ -1,2 +1,3 @@
1
- --color
1
+ --colour --profile
2
2
  --format documentation
3
+ --format progress
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source "http://rubygems.org"
1
+ source 'http://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in ruby_tika_app.gemspec
4
4
  gemspec
data/Rakefile CHANGED
@@ -1 +1 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
@@ -10,20 +10,19 @@ class RubyTikaApp
10
10
 
11
11
  class CommandFailedError < Error
12
12
  attr_reader :status
13
- def initialize status
13
+ def initialize(status)
14
14
  @status = status
15
15
  end
16
16
  end
17
17
 
18
18
  def initialize(document)
19
-
20
- @document = document
19
+ @document = "file://#{document}"
21
20
 
22
21
  java_cmd = 'java'
23
22
  java_args = '-server -Djava.awt.headless=true'
24
23
  tika_path = "#{File.join(File.dirname(__FILE__))}/../ext/tika-app-1.2.jar"
25
24
 
26
- @tika_cmd = "#{java_cmd} #{java_args} -jar #{tika_path}"
25
+ @tika_cmd = "#{java_cmd} #{java_args} -jar '#{tika_path}'"
27
26
  end
28
27
 
29
28
  def to_xml
@@ -53,17 +52,14 @@ class RubyTikaApp
53
52
  private
54
53
 
55
54
  def run_tika(option)
56
-
57
- final_cmd = "#{@tika_cmd} #{option} #{@document}"
58
- result = []
59
-
55
+ final_cmd = "#{@tika_cmd} #{option} '#{@document}'"
60
56
 
61
57
  pid, stdin, stdout, stderr = Open4::popen4(final_cmd)
62
58
 
63
59
  stdout_result = stdout.read.strip
64
60
  stderr_result = stderr.read.strip
65
61
 
66
- unless stderr_result.strip == "" then
62
+ unless strip_stderr(stderr_result).empty?
67
63
  raise(CommandFailedError.new(stderr_result),
68
64
  "execution failed with status #{stderr_result}: #{final_cmd}")
69
65
  end
@@ -75,4 +71,8 @@ class RubyTikaApp
75
71
  stderr.close
76
72
  end
77
73
 
74
+ def strip_stderr(s)
75
+ s.gsub(/^(info|warn) - .*$/i, '').strip
76
+ end
77
+
78
78
  end
@@ -1,27 +1,29 @@
1
1
  # -*- encoding: utf-8 -*-
2
- $:.push File.expand_path("../lib", __FILE__)
2
+ $:.push File.expand_path('../lib', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
- s.name = "ruby_tika_app"
6
- s.version = "0.3"
5
+ s.name = 'ruby_tika_app'
6
+ s.version = '1.0.0'
7
7
  s.platform = Gem::Platform::RUBY
8
- s.authors = ["Chris Parker"]
9
- s.email = ["mrcsparker@gmail.com"]
10
- s.homepage = "https://github.com/mrcsparker/ruby_tika_app"
8
+ s.authors = ['Chris Parker']
9
+ s.email = %w(mrcsparker@gmail.com)
10
+ s.homepage = 'https://github.com/mrcsparker/ruby_tika_app'
11
11
  s.summary = %q{Wrapper around the tika-app jar}
12
12
  s.description = %q{Wrapper around the tika-app jar}
13
13
 
14
- s.rubyforge_project = "ruby_tika_app"
14
+ s.rubyforge_project = 'ruby_tika_app'
15
15
 
16
16
  s.files = `git ls-files`.split("\n") +
17
17
  %w(LICENSE README.textile HISTORY)
18
18
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
19
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
- s.require_paths = ["lib"]
20
+ s.require_paths = %w(lib)
21
21
  s.test_files = Dir.glob('spec/**/*')
22
22
 
23
- s.add_runtime_dependency("open4")
23
+ s.add_runtime_dependency('open4')
24
24
 
25
- s.add_development_dependency("rspec", "~> 2.7.0")
26
- s.add_development_dependency("bundler", ">= 1.0.15")
25
+ s.add_development_dependency('rake')
26
+ s.add_development_dependency('rspec', '~> 2.13.0')
27
+ s.add_development_dependency('bundler', '>= 1.0.15')
28
+ s.add_development_dependency('simplecov')
27
29
  end
@@ -3,84 +3,92 @@ require 'spec_helper'
3
3
  describe RubyTikaApp do
4
4
 
5
5
  before(:each) do
6
- @test_file = "#{File.join(File.dirname(__FILE__))}/docs/graph_sampling_simplex11.pdf"
6
+ @test_file = "#{File.join(File.dirname(__FILE__))}/docs/graph sampling simplex - 11.pdf"
7
7
  end
8
8
 
9
- describe "#to_xml" do
10
- it "header" do
9
+ describe 'Error' do
10
+ it 'has an error' do
11
+ expect {
12
+ rta = RubyTikaApp.new('No file')
13
+ rta.to_xml
14
+ }.to raise_error
15
+ end
16
+ end
17
+
18
+ describe '#to_xml' do
19
+ it 'header' do
11
20
  rta = RubyTikaApp.new(@test_file)
12
21
  rta.to_xml[0..37].should == "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
13
22
  end
14
23
 
15
- it "middle" do
24
+ it 'middle' do
16
25
  rta = RubyTikaApp.new(@test_file)
17
26
  xml = rta.to_xml
18
27
 
19
28
  xml_size = xml.size / 2
20
29
 
21
- xml[xml_size..(xml_size + 100)].should == "(Section IV). Besides,\nMHRW performs better in well connected graphs than in\nloosely connected graphs"
30
+ xml[xml_size..(xml_size + 100)].should == "S (Section IV). Besides,\nMHRW performs better in well connected graphs than in\nloosely connected grap"
22
31
  end
23
32
  end
24
33
 
25
- describe "#to_html" do
26
- it "header" do
34
+ describe '#to_html' do
35
+ it 'header' do
27
36
  rta = RubyTikaApp.new(@test_file)
28
37
  rta.to_html[0..42].should == "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
29
38
  end
30
39
 
31
- it "middle" do
40
+ it 'middle' do
32
41
  rta = RubyTikaApp.new(@test_file)
33
- rta.to_html[1000 ... 1100].should == "rceName\" content=\"graph_sampling_simplex11.pdf\"/>\n<meta name=\"Last-Save-Date\" content=\"2011-03-29T13"
42
+ rta.to_html[1000 ... 1100].should == "rceName\" content=\"graph sampling simplex - 11.pdf\"/>\n<meta name=\"Last-Save-Date\" content=\"2011-03-29"
34
43
  end
35
44
  end
36
45
 
37
- describe "#to_json" do
38
- it "header" do
46
+ describe '#to_json' do
47
+ it 'header' do
39
48
  rta = RubyTikaApp.new(@test_file)
40
49
  rta.to_json[0..42].should == "{ \"Application\":\"\\u0027Certified by IEEE PD"
41
50
  end
42
51
 
43
- it "middle" do
52
+ it 'middle' do
44
53
  rta = RubyTikaApp.new(@test_file)
45
54
  rta.to_json[100 ... 150].should == "h\":171510, \n\"Content-Type\":\"application/pdf\", \n\"Cr"
46
55
  end
47
56
  end
48
57
 
49
- describe "#to_text" do
50
- it "header" do
58
+ describe '#to_text' do
59
+ it 'header' do
51
60
  rta = RubyTikaApp.new(@test_file)
52
61
  rta.to_text[0..42].should == "Understanding Graph Sampling Algorithms\nfor"
53
62
  end
54
63
 
55
- it "middle" do
64
+ it 'middle' do
56
65
  rta = RubyTikaApp.new(@test_file)
57
66
  rta.to_text[100 ... 150].should == "n Zhang3, Tianyin Xu2\n\nLong Jin1, Pan Hui4, Beixin"
58
67
  end
59
68
  end
60
69
 
61
- describe "#to_text_main" do
62
- it "header" do
70
+ describe '#to_text_main' do
71
+ it 'header' do
63
72
  rta = RubyTikaApp.new(@test_file)
64
- rta.to_text_main[0..42].should == "Understanding Graph Sampling Algorithms for"
73
+ rta.to_text_main[0..42].should == 'Understanding Graph Sampling Algorithms for'
65
74
  end
66
75
 
67
- it "middle" do
76
+ it 'middle' do
68
77
  rta = RubyTikaApp.new(@test_file)
69
78
  rta.to_text_main[100 ... 150].should == "n Zhang3, Tianyin Xu2\nLong Jin1, Pan Hui4, Beixing"
70
79
  end
71
80
  end
72
81
 
73
- describe "#to_metadata" do
74
- it "header" do
82
+ describe '#to_metadata' do
83
+ it 'header' do
75
84
  rta = RubyTikaApp.new(@test_file)
76
85
  rta.to_metadata[0..42].should == "Application: 'Certified by IEEE PDFeXpress "
77
86
  end
78
87
 
79
- it "middle" do
88
+ it 'middle' do
80
89
  rta = RubyTikaApp.new(@test_file)
81
90
  rta.to_metadata[100 ... 150].should == "Type: application/pdf\nCreation-Date: 2011-03-29T12"
82
91
  end
83
-
84
92
  end
85
93
 
86
94
  end
@@ -1,3 +1,6 @@
1
+ require 'simplecov'
2
+ SimpleCov.start
3
+
1
4
  require 'rubygems'
2
5
  require 'bundler/setup'
3
6
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby_tika_app
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.3'
4
+ version: 1.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-29 00:00:00.000000000 Z
12
+ date: 2013-03-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: open4
@@ -27,6 +27,22 @@ dependencies:
27
27
  - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
29
  version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
30
46
  - !ruby/object:Gem::Dependency
31
47
  name: rspec
32
48
  requirement: !ruby/object:Gem::Requirement
@@ -34,7 +50,7 @@ dependencies:
34
50
  requirements:
35
51
  - - ~>
36
52
  - !ruby/object:Gem::Version
37
- version: 2.7.0
53
+ version: 2.13.0
38
54
  type: :development
39
55
  prerelease: false
40
56
  version_requirements: !ruby/object:Gem::Requirement
@@ -42,7 +58,7 @@ dependencies:
42
58
  requirements:
43
59
  - - ~>
44
60
  - !ruby/object:Gem::Version
45
- version: 2.7.0
61
+ version: 2.13.0
46
62
  - !ruby/object:Gem::Dependency
47
63
  name: bundler
48
64
  requirement: !ruby/object:Gem::Requirement
@@ -59,6 +75,22 @@ dependencies:
59
75
  - - ! '>='
60
76
  - !ruby/object:Gem::Version
61
77
  version: 1.0.15
78
+ - !ruby/object:Gem::Dependency
79
+ name: simplecov
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
62
94
  description: Wrapper around the tika-app jar
63
95
  email:
64
96
  - mrcsparker@gmail.com
@@ -76,7 +108,7 @@ files:
76
108
  - ext/tika-app-1.2.jar
77
109
  - lib/ruby_tika_app.rb
78
110
  - ruby_tika_app.gemspec
79
- - spec/docs/graph_sampling_simplex11.pdf
111
+ - spec/docs/graph sampling simplex - 11.pdf
80
112
  - spec/ruby_tika_app_spec.rb
81
113
  - spec/spec_helper.rb
82
114
  homepage: https://github.com/mrcsparker/ruby_tika_app
@@ -99,11 +131,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
99
131
  version: '0'
100
132
  requirements: []
101
133
  rubyforge_project: ruby_tika_app
102
- rubygems_version: 1.8.24
134
+ rubygems_version: 1.8.25
103
135
  signing_key:
104
136
  specification_version: 3
105
137
  summary: Wrapper around the tika-app jar
106
138
  test_files:
107
- - spec/docs/graph_sampling_simplex11.pdf
139
+ - spec/docs/graph sampling simplex - 11.pdf
108
140
  - spec/ruby_tika_app_spec.rb
109
141
  - spec/spec_helper.rb