ruby_tika_app 1.4.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/{tika-app-1.4.jar → tika-app-1.9.jar} +0 -0
- data/lib/ruby_tika_app.rb +1 -1
- data/ruby_tika_app.gemspec +2 -2
- data/spec/ruby_tika_app_spec.rb +17 -17
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 23bb9e8faef2930749c2a92c52bca6991c6f4cfd
|
4
|
+
data.tar.gz: aff9a6f879ed8f100e3e72cc13077c3e81093eb0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: afc3e589189d68e809f5a7f56742676ad3b9daa520c2fa412f7af84671c416b31e18b90cf89aa713dcdbb8752e605cf511d854c939ea29b4bfbf509d59e742e2
|
7
|
+
data.tar.gz: fc3a95fd56ff9cc646ae01f8ddbfb8942038af77fc8ae7dc6c7de7297eec182b6fad334b09f96ebf065a95c4967da109e6b207b3e4daebd660255753c50ce909
|
Binary file
|
data/lib/ruby_tika_app.rb
CHANGED
@@ -24,7 +24,7 @@ class RubyTikaApp
|
|
24
24
|
|
25
25
|
java_cmd = 'java'
|
26
26
|
java_args = '-server -Djava.awt.headless=true'
|
27
|
-
tika_path = "#{File.join(File.dirname(__FILE__))}/../ext/tika-app-1.
|
27
|
+
tika_path = "#{File.join(File.dirname(__FILE__))}/../ext/tika-app-1.9.jar"
|
28
28
|
|
29
29
|
@tika_cmd = "#{java_cmd} #{java_args} -jar '#{tika_path}'"
|
30
30
|
end
|
data/ruby_tika_app.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = 'ruby_tika_app'
|
6
|
-
s.version = '1.
|
6
|
+
s.version = '1.5.0'
|
7
7
|
s.platform = Gem::Platform::RUBY
|
8
8
|
s.authors = ['Chris Parker']
|
9
9
|
s.email = %w(mrcsparker@gmail.com)
|
@@ -23,7 +23,7 @@ Gem::Specification.new do |s|
|
|
23
23
|
s.add_runtime_dependency('open4')
|
24
24
|
|
25
25
|
s.add_development_dependency('rake')
|
26
|
-
s.add_development_dependency('rspec', '~>
|
26
|
+
s.add_development_dependency('rspec', '~> 3.3.0')
|
27
27
|
s.add_development_dependency('bundler', '>= 1.0.15')
|
28
28
|
s.add_development_dependency('simplecov')
|
29
29
|
s.add_development_dependency('json')
|
data/spec/ruby_tika_app_spec.rb
CHANGED
@@ -16,14 +16,14 @@ describe RubyTikaApp do
|
|
16
16
|
expect {
|
17
17
|
rta = RubyTikaApp.new('No file')
|
18
18
|
rta.to_xml
|
19
|
-
}.to raise_error
|
19
|
+
}.to raise_error(RuntimeError)
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
23
|
describe '#to_xml' do
|
24
24
|
it 'header' do
|
25
25
|
rta = RubyTikaApp.new(@test_file)
|
26
|
-
rta.to_xml[0..37].
|
26
|
+
expect(rta.to_xml[0..37]).to eq("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")
|
27
27
|
end
|
28
28
|
|
29
29
|
it 'middle' do
|
@@ -32,81 +32,81 @@ describe RubyTikaApp do
|
|
32
32
|
|
33
33
|
xml_size = xml.size / 2
|
34
34
|
|
35
|
-
xml[xml_size..(xml_size + 100)].
|
35
|
+
expect(xml[xml_size..(xml_size + 100)]).to eq("plicated nodes make the node distribution converge\nto uniform distribution. We do not need to conside")
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
39
|
describe '#to_html' do
|
40
40
|
it 'header' do
|
41
41
|
rta = RubyTikaApp.new(@test_file)
|
42
|
-
rta.to_html[0..42].
|
42
|
+
expect(rta.to_html[0..42]).to eq("<html xmlns=\"http://www.w3.org/1999/xhtml\">")
|
43
43
|
end
|
44
44
|
|
45
45
|
it 'middle' do
|
46
46
|
rta = RubyTikaApp.new(@test_file)
|
47
|
-
rta.to_html[1000 ... 1100].
|
47
|
+
expect(rta.to_html[1000 ... 1100]).to eq("on/pdf\"/>\n<meta name=\"X-Parsed-By\" content=\"org.apache.tika.parser.DefaultParser\"/>\n<meta name=\"X-Pa")
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
51
|
describe '#to_json' do
|
52
52
|
it 'header' do
|
53
53
|
rta = RubyTikaApp.new(@test_file)
|
54
|
-
rta.to_json[0..42].
|
54
|
+
expect(rta.to_json[0..42]).to eq("{\"Application\":\"\\u0027Certified by IEEE PDF")
|
55
55
|
end
|
56
56
|
|
57
57
|
it 'middle' do
|
58
58
|
rta = RubyTikaApp.new(@test_file)
|
59
|
-
rta.to_json[100 ... 150].
|
59
|
+
expect(rta.to_json[100 ... 150]).to eq("\"171510\",\"Content-Type\":\"application/pdf\",\"Creatio")
|
60
60
|
end
|
61
61
|
end
|
62
62
|
|
63
63
|
describe '#to_text' do
|
64
64
|
it 'header' do
|
65
65
|
rta = RubyTikaApp.new(@test_file)
|
66
|
-
rta.to_text[0..42].
|
66
|
+
expect(rta.to_text[0..42]).to eq("Understanding Graph Sampling Algorithms\nfor")
|
67
67
|
end
|
68
68
|
|
69
69
|
it 'middle' do
|
70
70
|
rta = RubyTikaApp.new(@test_file)
|
71
|
-
rta.to_text[100 ... 150].
|
71
|
+
expect(rta.to_text[100 ... 150]).to eq("n Zhang3, Tianyin Xu2\n\nLong Jin1, Pan Hui4, Beixin")
|
72
72
|
end
|
73
73
|
end
|
74
74
|
|
75
75
|
describe '#to_text_main' do
|
76
76
|
it 'header' do
|
77
77
|
rta = RubyTikaApp.new(@test_file)
|
78
|
-
rta.to_text_main[0..42].
|
78
|
+
expect(rta.to_text_main[0..42]).to eq('Understanding Graph Sampling Algorithms for')
|
79
79
|
end
|
80
80
|
|
81
81
|
it 'middle' do
|
82
82
|
rta = RubyTikaApp.new(@test_file)
|
83
|
-
rta.to_text_main[100 ... 150].
|
83
|
+
expect(rta.to_text_main[100 ... 150]).to eq("n Zhang3, Tianyin Xu2\nLong Jin1, Pan Hui4, Beixing")
|
84
84
|
end
|
85
85
|
end
|
86
86
|
|
87
87
|
describe '#to_metadata' do
|
88
88
|
it 'header' do
|
89
89
|
rta = RubyTikaApp.new(@test_file)
|
90
|
-
rta.to_metadata[0..42].
|
90
|
+
expect(rta.to_metadata[0..42]).to eq("Application: 'Certified by IEEE PDFeXpress ")
|
91
91
|
end
|
92
92
|
|
93
93
|
it 'middle' do
|
94
94
|
rta = RubyTikaApp.new(@test_file)
|
95
|
-
rta.to_metadata[100 ... 150].
|
95
|
+
expect(rta.to_metadata[100 ... 150]).to eq("Type: application/pdf\nCreation-Date: 2011-03-29T12")
|
96
96
|
end
|
97
97
|
end
|
98
98
|
|
99
99
|
describe 'external URLs' do
|
100
100
|
it 'should be able to parse an http url' do
|
101
101
|
rta = RubyTikaApp.new('http://localhost:9299/cnn.com')
|
102
|
-
rta.to_text.
|
103
|
-
rta.to_text.
|
102
|
+
expect(rta.to_text).to_not be_nil
|
103
|
+
expect(rta.to_text).to eq(RubyTikaApp.new(@cnn_com_file).to_text)
|
104
104
|
end
|
105
105
|
|
106
106
|
it 'should be able to parse another http url' do
|
107
107
|
rta = RubyTikaApp.new('http://localhost:9299/news.ycombinator.com')
|
108
|
-
rta.to_text.
|
109
|
-
rta.to_text.
|
108
|
+
expect(rta.to_text).to_not be_nil
|
109
|
+
expect(rta.to_text).to eq(RubyTikaApp.new(@news_ycombinator_com_file).to_text)
|
110
110
|
end
|
111
111
|
end
|
112
112
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby_tika_app
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Parker
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-06-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: open4
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 3.3.0
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 3.3.0
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: bundler
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,7 +136,7 @@ files:
|
|
136
136
|
- LICENSE
|
137
137
|
- README.md
|
138
138
|
- Rakefile
|
139
|
-
- ext/tika-app-1.
|
139
|
+
- ext/tika-app-1.9.jar
|
140
140
|
- lib/ruby_tika_app.rb
|
141
141
|
- ruby_tika_app.gemspec
|
142
142
|
- spec/docs/cnn.com
|
@@ -164,7 +164,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
164
164
|
version: '0'
|
165
165
|
requirements: []
|
166
166
|
rubyforge_project: ruby_tika_app
|
167
|
-
rubygems_version: 2.
|
167
|
+
rubygems_version: 2.4.5
|
168
168
|
signing_key:
|
169
169
|
specification_version: 4
|
170
170
|
summary: Wrapper around the tika-app jar
|