rika 1.1.1-java → 1.2.0-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -16,6 +16,7 @@ module Rika
16
16
  import org.apache.tika.metadata.Metadata
17
17
  import org.apache.tika.Tika
18
18
  import org.apache.tika.language.LanguageIdentifier
19
+ import org.apache.tika.detect.DefaultDetector
19
20
  import java.io.FileInputStream
20
21
  import java.net.URL
21
22
 
@@ -34,12 +35,11 @@ module Rika
34
35
  parser.metadata
35
36
  end
36
37
 
37
-
38
38
  class Parser
39
39
 
40
- def initialize(file_location, max_content_length = -1)
40
+ def initialize(file_location, max_content_length = -1, detector = DefaultDetector.new)
41
41
  @uri = file_location
42
- @tika = Tika.new
42
+ @tika = Tika.new(detector)
43
43
  @tika.set_max_string_length(max_content_length)
44
44
  @metadata_java = Metadata.new
45
45
  @metadata_ruby = nil
@@ -64,7 +64,11 @@ module Rika
64
64
  end
65
65
 
66
66
  def media_type
67
- @media_type ||= @tika.detect(input_stream)
67
+ if file?
68
+ @media_type ||= @tika.detect(java.io.File.new(@uri))
69
+ else
70
+ @media_type ||= @tika.detect(input_stream)
71
+ end
68
72
  end
69
73
 
70
74
  def available_metadata
@@ -1,3 +1,3 @@
1
1
  module Rika
2
- VERSION = "1.1.1"
2
+ VERSION = "1.2.0"
3
3
  end
@@ -9,6 +9,7 @@ describe Rika::Parser do
9
9
  before(:all) do
10
10
  @txt_parser = Rika::Parser.new(file_path("text_file.txt"))
11
11
  @docx_parser = Rika::Parser.new(file_path("document.docx"))
12
+ @doc_parser = Rika::Parser.new(file_path("document.doc"))
12
13
  @pdf_parser = Rika::Parser.new(file_path("document.pdf"))
13
14
  @image_parser = Rika::Parser.new(file_path("image.jpg"))
14
15
  @unknown_parser = Rika::Parser.new(file_path("unknown.bin"))
@@ -151,6 +152,14 @@ describe Rika::Parser do
151
152
  it "should return application/octet-stream for unknown file" do
152
153
  @unknown_parser.media_type.should == "application/octet-stream"
153
154
  end
155
+
156
+ it "should return msword for a doc file" do
157
+ @doc_parser.media_type.should == "application/msword"
158
+ end
159
+
160
+ it "should return wordprocessingml for a docx file" do
161
+ @docx_parser.media_type.should == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
162
+ end
154
163
  end
155
164
 
156
165
  describe '#language' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rika
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.0
5
5
  prerelease:
6
6
  platform: java
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-26 00:00:00.000000000 Z
12
+ date: 2013-04-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec