rika 1.1.1-java → 1.2.0-java
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rika.rb +8 -4
- data/lib/rika/version.rb +1 -1
- data/spec/rika_spec.rb +9 -0
- metadata +2 -2
data/lib/rika.rb
CHANGED
@@ -16,6 +16,7 @@ module Rika
|
|
16
16
|
import org.apache.tika.metadata.Metadata
|
17
17
|
import org.apache.tika.Tika
|
18
18
|
import org.apache.tika.language.LanguageIdentifier
|
19
|
+
import org.apache.tika.detect.DefaultDetector
|
19
20
|
import java.io.FileInputStream
|
20
21
|
import java.net.URL
|
21
22
|
|
@@ -34,12 +35,11 @@ module Rika
|
|
34
35
|
parser.metadata
|
35
36
|
end
|
36
37
|
|
37
|
-
|
38
38
|
class Parser
|
39
39
|
|
40
|
-
def initialize(file_location, max_content_length = -1)
|
40
|
+
def initialize(file_location, max_content_length = -1, detector = DefaultDetector.new)
|
41
41
|
@uri = file_location
|
42
|
-
@tika = Tika.new
|
42
|
+
@tika = Tika.new(detector)
|
43
43
|
@tika.set_max_string_length(max_content_length)
|
44
44
|
@metadata_java = Metadata.new
|
45
45
|
@metadata_ruby = nil
|
@@ -64,7 +64,11 @@ module Rika
|
|
64
64
|
end
|
65
65
|
|
66
66
|
def media_type
|
67
|
-
|
67
|
+
if file?
|
68
|
+
@media_type ||= @tika.detect(java.io.File.new(@uri))
|
69
|
+
else
|
70
|
+
@media_type ||= @tika.detect(input_stream)
|
71
|
+
end
|
68
72
|
end
|
69
73
|
|
70
74
|
def available_metadata
|
data/lib/rika/version.rb
CHANGED
data/spec/rika_spec.rb
CHANGED
@@ -9,6 +9,7 @@ describe Rika::Parser do
|
|
9
9
|
before(:all) do
|
10
10
|
@txt_parser = Rika::Parser.new(file_path("text_file.txt"))
|
11
11
|
@docx_parser = Rika::Parser.new(file_path("document.docx"))
|
12
|
+
@doc_parser = Rika::Parser.new(file_path("document.doc"))
|
12
13
|
@pdf_parser = Rika::Parser.new(file_path("document.pdf"))
|
13
14
|
@image_parser = Rika::Parser.new(file_path("image.jpg"))
|
14
15
|
@unknown_parser = Rika::Parser.new(file_path("unknown.bin"))
|
@@ -151,6 +152,14 @@ describe Rika::Parser do
|
|
151
152
|
it "should return application/octet-stream for unknown file" do
|
152
153
|
@unknown_parser.media_type.should == "application/octet-stream"
|
153
154
|
end
|
155
|
+
|
156
|
+
it "should return msword for a doc file" do
|
157
|
+
@doc_parser.media_type.should == "application/msword"
|
158
|
+
end
|
159
|
+
|
160
|
+
it "should return wordprocessingml for a docx file" do
|
161
|
+
@docx_parser.media_type.should == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
162
|
+
end
|
154
163
|
end
|
155
164
|
|
156
165
|
describe '#language' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rika
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: java
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-04-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|