rika 1.1.1-java → 1.2.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rika.rb +8 -4
- data/lib/rika/version.rb +1 -1
- data/spec/rika_spec.rb +9 -0
- metadata +2 -2
data/lib/rika.rb
CHANGED
@@ -16,6 +16,7 @@ module Rika
|
|
16
16
|
import org.apache.tika.metadata.Metadata
|
17
17
|
import org.apache.tika.Tika
|
18
18
|
import org.apache.tika.language.LanguageIdentifier
|
19
|
+
import org.apache.tika.detect.DefaultDetector
|
19
20
|
import java.io.FileInputStream
|
20
21
|
import java.net.URL
|
21
22
|
|
@@ -34,12 +35,11 @@ module Rika
|
|
34
35
|
parser.metadata
|
35
36
|
end
|
36
37
|
|
37
|
-
|
38
38
|
class Parser
|
39
39
|
|
40
|
-
def initialize(file_location, max_content_length = -1)
|
40
|
+
def initialize(file_location, max_content_length = -1, detector = DefaultDetector.new)
|
41
41
|
@uri = file_location
|
42
|
-
@tika = Tika.new
|
42
|
+
@tika = Tika.new(detector)
|
43
43
|
@tika.set_max_string_length(max_content_length)
|
44
44
|
@metadata_java = Metadata.new
|
45
45
|
@metadata_ruby = nil
|
@@ -64,7 +64,11 @@ module Rika
|
|
64
64
|
end
|
65
65
|
|
66
66
|
def media_type
|
67
|
-
|
67
|
+
if file?
|
68
|
+
@media_type ||= @tika.detect(java.io.File.new(@uri))
|
69
|
+
else
|
70
|
+
@media_type ||= @tika.detect(input_stream)
|
71
|
+
end
|
68
72
|
end
|
69
73
|
|
70
74
|
def available_metadata
|
data/lib/rika/version.rb
CHANGED
data/spec/rika_spec.rb
CHANGED
@@ -9,6 +9,7 @@ describe Rika::Parser do
|
|
9
9
|
before(:all) do
|
10
10
|
@txt_parser = Rika::Parser.new(file_path("text_file.txt"))
|
11
11
|
@docx_parser = Rika::Parser.new(file_path("document.docx"))
|
12
|
+
@doc_parser = Rika::Parser.new(file_path("document.doc"))
|
12
13
|
@pdf_parser = Rika::Parser.new(file_path("document.pdf"))
|
13
14
|
@image_parser = Rika::Parser.new(file_path("image.jpg"))
|
14
15
|
@unknown_parser = Rika::Parser.new(file_path("unknown.bin"))
|
@@ -151,6 +152,14 @@ describe Rika::Parser do
|
|
151
152
|
it "should return application/octet-stream for unknown file" do
|
152
153
|
@unknown_parser.media_type.should == "application/octet-stream"
|
153
154
|
end
|
155
|
+
|
156
|
+
it "should return msword for a doc file" do
|
157
|
+
@doc_parser.media_type.should == "application/msword"
|
158
|
+
end
|
159
|
+
|
160
|
+
it "should return wordprocessingml for a docx file" do
|
161
|
+
@docx_parser.media_type.should == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
162
|
+
end
|
154
163
|
end
|
155
164
|
|
156
165
|
describe '#language' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rika
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: java
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-04-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|