rika 0.9.7-java → 0.9.8-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rika.rb CHANGED
@@ -11,7 +11,7 @@ Dir[File.join(File.dirname(__FILE__), "../target/dependency/*.jar")].each do |ja
11
11
  require jar
12
12
  end
13
13
 
14
- # Heavily based on the Apache Tika API: http://tika.apache.org/1.2/api/org/apache/tika/Tika.html
14
+ # Heavily based on the Apache Tika API: http://tika.apache.org/1.3/api/org/apache/tika/Tika.html
15
15
  module Rika
16
16
  import org.apache.tika.metadata.Metadata
17
17
  import org.apache.tika.Tika
@@ -24,14 +24,9 @@ module Rika
24
24
  @uri = file_location
25
25
  @tika = Tika.new
26
26
  @tika.set_max_string_length(max_content_length)
27
- @metadata = Metadata.new
28
-
29
- @is_file = File.exists?(@uri) && File.directory?(@uri) == false
30
- is_http = URI(@uri).scheme == "http" && Net::HTTP.get_response(URI(@uri)).is_a?(Net::HTTPSuccess) if !@is_file
31
-
32
- if !@is_file && !is_http
33
- raise IOError, "File does not exist or can't be reached."
34
- end
27
+ @metadata_java = Metadata.new
28
+ @metadata_ruby = nil
29
+ @input_type = get_input_type
35
30
  end
36
31
 
37
32
  def content
@@ -40,14 +35,15 @@ module Rika
40
35
  end
41
36
 
42
37
  def metadata
43
- self.parse
44
- metadata_hash = {}
38
+ unless @metadata_ruby
39
+ self.parse
40
+ @metadata_ruby = {}
45
41
 
46
- @metadata.names.each do |name|
47
- metadata_hash[name] = @metadata.get(name)
42
+ @metadata_java.names.each do |name|
43
+ @metadata_ruby[name] = @metadata_java.get(name)
44
+ end
48
45
  end
49
-
50
- metadata_hash
46
+ @metadata_ruby
51
47
  end
52
48
 
53
49
  def media_type
@@ -55,25 +51,37 @@ module Rika
55
51
  end
56
52
 
57
53
  def available_metadata
58
- self.parse
59
- @metadata.names.to_a
54
+ metadata.keys
60
55
  end
61
56
 
62
57
  def metadata_exists?(name)
63
- self.parse
64
- @metadata.get(name) != nil
58
+ metadata[name] != nil
59
+ end
60
+
61
+ def file?
62
+ @input_type == :file
65
63
  end
66
64
 
67
65
  protected
68
66
 
69
67
  def parse
70
- @content ||= @tika.parse_to_string(input_stream, @metadata).to_s.strip
68
+ @content ||= @tika.parse_to_string(input_stream, @metadata_java).to_s.strip
69
+ end
70
+
71
+ def get_input_type
72
+ if File.exists?(@uri) && File.directory?(@uri) == false
73
+ :file
74
+ elsif URI(@uri).scheme == "http" && Net::HTTP.get_response(URI(@uri)).is_a?(Net::HTTPSuccess)
75
+ :http
76
+ else
77
+ raise IOError, "Input (#{@uri}) is neither file nor http."
78
+ end
71
79
  end
72
80
 
73
81
  def input_stream
74
- if @is_file
82
+ if file?
75
83
  FileInputStream.new(java.io.File.new(@uri))
76
- else
84
+ else # :http
77
85
  URL.new(@uri).open_stream
78
86
  end
79
87
  end
data/lib/rika/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rika
2
- VERSION = "0.9.7"
2
+ VERSION = "0.9.8"
3
3
  end
data/spec/rika_spec.rb CHANGED
@@ -15,7 +15,7 @@ describe Rika::Parser do
15
15
  @dir = File.expand_path(File.join(File.dirname(__FILE__), 'fixtures'))
16
16
  port = 50505
17
17
  @url = "http://#{Socket.gethostname}:#{port}"
18
- @qoute = "First they ignore you, then they ridicule you, then they fight you, then you win."
18
+ @quote = "First they ignore you, then they ridicule you, then they fight you, then you win."
19
19
  @t1 = Thread.new do
20
20
  @server = HTTPServer.new(:Port => port, :DocumentRoot => @dir,
21
21
  :AccessLog => [], :Logger => WEBrick::Log::new("/dev/null", 7))
@@ -28,11 +28,11 @@ describe Rika::Parser do
28
28
  end
29
29
 
30
30
  it "should raise error if file does not exists" do
31
- lambda { Rika::Parser.new(file_path("nonsense.txt")) }.should raise_error(IOError, "File does not exist or can't be reached.")
31
+ lambda { Rika::Parser.new(file_path("nonsense.txt")) }.should raise_error(IOError)
32
32
  end
33
33
 
34
34
  it "should raise error if URL does not exists" do
35
- lambda { Rika::Parser.new("http://nonsense.com/whatever.pdf") }.should raise_error(IOError, "File does not exist or can't be reached.")
35
+ lambda { Rika::Parser.new("http://nonsense.com/whatever.pdf") }.should raise_error(IOError)
36
36
  end
37
37
 
38
38
  it "should detect file type without a file extension" do
@@ -41,20 +41,20 @@ describe Rika::Parser do
41
41
  end
42
42
 
43
43
  it "should not be possible to trick the parser to read a folder with an extension" do
44
- lambda { Rika::Parser.new(file_path("folder.js")).content }.should raise_error(IOError, "File does not exist or can't be reached.")
44
+ lambda { Rika::Parser.new(file_path("folder.js")).content }.should raise_error(IOError)
45
45
  end
46
46
 
47
47
  describe '#content' do
48
48
  it "should return the content in a text file" do
49
- @txt_parser.content.strip.should == @qoute
49
+ @txt_parser.content.strip.should == @quote
50
50
  end
51
51
 
52
52
  it "should return the content in a docx file" do
53
- @docx_parser.content.should == @qoute
53
+ @docx_parser.content.should == @quote
54
54
  end
55
55
 
56
56
  it "should return the content in a pdf file" do
57
- @pdf_parser.content.should == @qoute
57
+ @pdf_parser.content.should == @quote
58
58
  end
59
59
 
60
60
  it "should return no content for an image" do
@@ -78,7 +78,7 @@ describe Rika::Parser do
78
78
 
79
79
  it "should return the content from a file over http" do
80
80
  parser = Rika::Parser.new(@url + "/document.pdf")
81
- parser.content.should == @qoute
81
+ parser.content.should == @quote
82
82
  end
83
83
 
84
84
  it "should return empty string for unknown file" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rika
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.7
4
+ version: 0.9.8
5
5
  prerelease:
6
6
  platform: java
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-22 00:00:00.000000000 Z
12
+ date: 2013-01-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec