rika 1.1.1-java → 1.2.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,7 @@ module Rika
16
16
  import org.apache.tika.metadata.Metadata
17
17
  import org.apache.tika.Tika
18
18
  import org.apache.tika.language.LanguageIdentifier
19
+ import org.apache.tika.detect.DefaultDetector
19
20
  import java.io.FileInputStream
20
21
  import java.net.URL
21
22
 
@@ -34,12 +35,11 @@ module Rika
34
35
  parser.metadata
35
36
  end
36
37
 
37
-
38
38
  class Parser
39
39
 
40
- def initialize(file_location, max_content_length = -1)
40
+ def initialize(file_location, max_content_length = -1, detector = DefaultDetector.new)
41
41
  @uri = file_location
42
- @tika = Tika.new
42
+ @tika = Tika.new(detector)
43
43
  @tika.set_max_string_length(max_content_length)
44
44
  @metadata_java = Metadata.new
45
45
  @metadata_ruby = nil
@@ -64,7 +64,11 @@ module Rika
64
64
  end
65
65
 
66
66
  def media_type
67
- @media_type ||= @tika.detect(input_stream)
67
+ if file?
68
+ @media_type ||= @tika.detect(java.io.File.new(@uri))
69
+ else
70
+ @media_type ||= @tika.detect(input_stream)
71
+ end
68
72
  end
69
73
 
70
74
  def available_metadata
@@ -1,3 +1,3 @@
1
1
  module Rika
2
- VERSION = "1.1.1"
2
+ VERSION = "1.2.0"
3
3
  end
@@ -9,6 +9,7 @@ describe Rika::Parser do
9
9
  before(:all) do
10
10
  @txt_parser = Rika::Parser.new(file_path("text_file.txt"))
11
11
  @docx_parser = Rika::Parser.new(file_path("document.docx"))
12
+ @doc_parser = Rika::Parser.new(file_path("document.doc"))
12
13
  @pdf_parser = Rika::Parser.new(file_path("document.pdf"))
13
14
  @image_parser = Rika::Parser.new(file_path("image.jpg"))
14
15
  @unknown_parser = Rika::Parser.new(file_path("unknown.bin"))
@@ -151,6 +152,14 @@ describe Rika::Parser do
151
152
  it "should return application/octet-stream for unknown file" do
152
153
  @unknown_parser.media_type.should == "application/octet-stream"
153
154
  end
155
+
156
+ it "should return msword for a doc file" do
157
+ @doc_parser.media_type.should == "application/msword"
158
+ end
159
+
160
+ it "should return wordprocessingml for a docx file" do
161
+ @docx_parser.media_type.should == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
162
+ end
154
163
  end
155
164
 
156
165
  describe '#language' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rika
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.0
5
5
  prerelease:
6
6
  platform: java
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-26 00:00:00.000000000 Z
12
+ date: 2013-04-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec