ruby_tika_app 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/{tika-app-1.4.jar → tika-app-1.9.jar} +0 -0
- data/lib/ruby_tika_app.rb +1 -1
- data/ruby_tika_app.gemspec +2 -2
- data/spec/ruby_tika_app_spec.rb +17 -17
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 23bb9e8faef2930749c2a92c52bca6991c6f4cfd
|
4
|
+
data.tar.gz: aff9a6f879ed8f100e3e72cc13077c3e81093eb0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: afc3e589189d68e809f5a7f56742676ad3b9daa520c2fa412f7af84671c416b31e18b90cf89aa713dcdbb8752e605cf511d854c939ea29b4bfbf509d59e742e2
|
7
|
+
data.tar.gz: fc3a95fd56ff9cc646ae01f8ddbfb8942038af77fc8ae7dc6c7de7297eec182b6fad334b09f96ebf065a95c4967da109e6b207b3e4daebd660255753c50ce909
|
Binary file
|
data/lib/ruby_tika_app.rb
CHANGED
@@ -24,7 +24,7 @@ class RubyTikaApp
|
|
24
24
|
|
25
25
|
java_cmd = 'java'
|
26
26
|
java_args = '-server -Djava.awt.headless=true'
|
27
|
-
tika_path = "#{File.join(File.dirname(__FILE__))}/../ext/tika-app-1.
|
27
|
+
tika_path = "#{File.join(File.dirname(__FILE__))}/../ext/tika-app-1.9.jar"
|
28
28
|
|
29
29
|
@tika_cmd = "#{java_cmd} #{java_args} -jar '#{tika_path}'"
|
30
30
|
end
|
data/ruby_tika_app.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path('../lib', __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = 'ruby_tika_app'
|
6
|
-
s.version = '1.
|
6
|
+
s.version = '1.5.0'
|
7
7
|
s.platform = Gem::Platform::RUBY
|
8
8
|
s.authors = ['Chris Parker']
|
9
9
|
s.email = %w(mrcsparker@gmail.com)
|
@@ -23,7 +23,7 @@ Gem::Specification.new do |s|
|
|
23
23
|
s.add_runtime_dependency('open4')
|
24
24
|
|
25
25
|
s.add_development_dependency('rake')
|
26
|
-
s.add_development_dependency('rspec', '~>
|
26
|
+
s.add_development_dependency('rspec', '~> 3.3.0')
|
27
27
|
s.add_development_dependency('bundler', '>= 1.0.15')
|
28
28
|
s.add_development_dependency('simplecov')
|
29
29
|
s.add_development_dependency('json')
|
data/spec/ruby_tika_app_spec.rb
CHANGED
@@ -16,14 +16,14 @@ describe RubyTikaApp do
|
|
16
16
|
expect {
|
17
17
|
rta = RubyTikaApp.new('No file')
|
18
18
|
rta.to_xml
|
19
|
-
}.to raise_error
|
19
|
+
}.to raise_error(RuntimeError)
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
23
|
describe '#to_xml' do
|
24
24
|
it 'header' do
|
25
25
|
rta = RubyTikaApp.new(@test_file)
|
26
|
-
rta.to_xml[0..37].
|
26
|
+
expect(rta.to_xml[0..37]).to eq("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")
|
27
27
|
end
|
28
28
|
|
29
29
|
it 'middle' do
|
@@ -32,81 +32,81 @@ describe RubyTikaApp do
|
|
32
32
|
|
33
33
|
xml_size = xml.size / 2
|
34
34
|
|
35
|
-
xml[xml_size..(xml_size + 100)].
|
35
|
+
expect(xml[xml_size..(xml_size + 100)]).to eq("plicated nodes make the node distribution converge\nto uniform distribution. We do not need to conside")
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
39
|
describe '#to_html' do
|
40
40
|
it 'header' do
|
41
41
|
rta = RubyTikaApp.new(@test_file)
|
42
|
-
rta.to_html[0..42].
|
42
|
+
expect(rta.to_html[0..42]).to eq("<html xmlns=\"http://www.w3.org/1999/xhtml\">")
|
43
43
|
end
|
44
44
|
|
45
45
|
it 'middle' do
|
46
46
|
rta = RubyTikaApp.new(@test_file)
|
47
|
-
rta.to_html[1000 ... 1100].
|
47
|
+
expect(rta.to_html[1000 ... 1100]).to eq("on/pdf\"/>\n<meta name=\"X-Parsed-By\" content=\"org.apache.tika.parser.DefaultParser\"/>\n<meta name=\"X-Pa")
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
51
|
describe '#to_json' do
|
52
52
|
it 'header' do
|
53
53
|
rta = RubyTikaApp.new(@test_file)
|
54
|
-
rta.to_json[0..42].
|
54
|
+
expect(rta.to_json[0..42]).to eq("{\"Application\":\"\\u0027Certified by IEEE PDF")
|
55
55
|
end
|
56
56
|
|
57
57
|
it 'middle' do
|
58
58
|
rta = RubyTikaApp.new(@test_file)
|
59
|
-
rta.to_json[100 ... 150].
|
59
|
+
expect(rta.to_json[100 ... 150]).to eq("\"171510\",\"Content-Type\":\"application/pdf\",\"Creatio")
|
60
60
|
end
|
61
61
|
end
|
62
62
|
|
63
63
|
describe '#to_text' do
|
64
64
|
it 'header' do
|
65
65
|
rta = RubyTikaApp.new(@test_file)
|
66
|
-
rta.to_text[0..42].
|
66
|
+
expect(rta.to_text[0..42]).to eq("Understanding Graph Sampling Algorithms\nfor")
|
67
67
|
end
|
68
68
|
|
69
69
|
it 'middle' do
|
70
70
|
rta = RubyTikaApp.new(@test_file)
|
71
|
-
rta.to_text[100 ... 150].
|
71
|
+
expect(rta.to_text[100 ... 150]).to eq("n Zhang3, Tianyin Xu2\n\nLong Jin1, Pan Hui4, Beixin")
|
72
72
|
end
|
73
73
|
end
|
74
74
|
|
75
75
|
describe '#to_text_main' do
|
76
76
|
it 'header' do
|
77
77
|
rta = RubyTikaApp.new(@test_file)
|
78
|
-
rta.to_text_main[0..42].
|
78
|
+
expect(rta.to_text_main[0..42]).to eq('Understanding Graph Sampling Algorithms for')
|
79
79
|
end
|
80
80
|
|
81
81
|
it 'middle' do
|
82
82
|
rta = RubyTikaApp.new(@test_file)
|
83
|
-
rta.to_text_main[100 ... 150].
|
83
|
+
expect(rta.to_text_main[100 ... 150]).to eq("n Zhang3, Tianyin Xu2\nLong Jin1, Pan Hui4, Beixing")
|
84
84
|
end
|
85
85
|
end
|
86
86
|
|
87
87
|
describe '#to_metadata' do
|
88
88
|
it 'header' do
|
89
89
|
rta = RubyTikaApp.new(@test_file)
|
90
|
-
rta.to_metadata[0..42].
|
90
|
+
expect(rta.to_metadata[0..42]).to eq("Application: 'Certified by IEEE PDFeXpress ")
|
91
91
|
end
|
92
92
|
|
93
93
|
it 'middle' do
|
94
94
|
rta = RubyTikaApp.new(@test_file)
|
95
|
-
rta.to_metadata[100 ... 150].
|
95
|
+
expect(rta.to_metadata[100 ... 150]).to eq("Type: application/pdf\nCreation-Date: 2011-03-29T12")
|
96
96
|
end
|
97
97
|
end
|
98
98
|
|
99
99
|
describe 'external URLs' do
|
100
100
|
it 'should be able to parse an http url' do
|
101
101
|
rta = RubyTikaApp.new('http://localhost:9299/cnn.com')
|
102
|
-
rta.to_text.
|
103
|
-
rta.to_text.
|
102
|
+
expect(rta.to_text).to_not be_nil
|
103
|
+
expect(rta.to_text).to eq(RubyTikaApp.new(@cnn_com_file).to_text)
|
104
104
|
end
|
105
105
|
|
106
106
|
it 'should be able to parse another http url' do
|
107
107
|
rta = RubyTikaApp.new('http://localhost:9299/news.ycombinator.com')
|
108
|
-
rta.to_text.
|
109
|
-
rta.to_text.
|
108
|
+
expect(rta.to_text).to_not be_nil
|
109
|
+
expect(rta.to_text).to eq(RubyTikaApp.new(@news_ycombinator_com_file).to_text)
|
110
110
|
end
|
111
111
|
end
|
112
112
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby_tika_app
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Parker
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-06-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: open4
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 3.3.0
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 3.3.0
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: bundler
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,7 +136,7 @@ files:
|
|
136
136
|
- LICENSE
|
137
137
|
- README.md
|
138
138
|
- Rakefile
|
139
|
-
- ext/tika-app-1.
|
139
|
+
- ext/tika-app-1.9.jar
|
140
140
|
- lib/ruby_tika_app.rb
|
141
141
|
- ruby_tika_app.gemspec
|
142
142
|
- spec/docs/cnn.com
|
@@ -164,7 +164,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
164
164
|
version: '0'
|
165
165
|
requirements: []
|
166
166
|
rubyforge_project: ruby_tika_app
|
167
|
-
rubygems_version: 2.
|
167
|
+
rubygems_version: 2.4.5
|
168
168
|
signing_key:
|
169
169
|
specification_version: 4
|
170
170
|
summary: Wrapper around the tika-app jar
|