rika 0.9.4-java → 0.9.5-java

Sign up to get free protection for your applications and to get access to all the features.
data/lib/rika.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # encoding: utf-8
2
+
1
3
  raise "You need to run JRuby to use Rika" unless RUBY_PLATFORM =~ /java/
2
4
 
3
5
  require "rika/version"
@@ -13,30 +15,32 @@ end
13
15
  module Rika
14
16
  import org.apache.tika.metadata.Metadata
15
17
  import org.apache.tika.Tika
18
+ import java.io.FileInputStream
19
+ import java.net.URL
16
20
 
17
21
  class Parser
18
22
 
19
- def initialize(uri, max_content_length = -1)
20
- p = URI::Parser.new
21
- @uri = uri
23
+ def initialize(file_location, max_content_length = -1)
24
+ @uri = file_location
22
25
  @tika = Tika.new
23
26
  @tika.set_max_string_length(max_content_length)
24
27
  @metadata = Metadata.new
28
+
29
+ @is_file = File.exists?(@uri)
30
+ is_http = URI(@uri).scheme == "http" && Net::HTTP.get_response(URI(@uri)).is_a?(Net::HTTPSuccess) if !@is_file
25
31
 
26
- if File.exists?(@uri)
27
- self.parse_file
28
- elsif p.parse(@uri).scheme == 'http' || p.parse(@uri).scheme == 'https'
29
- self.parse_url
30
- else
32
+ if !@is_file && !is_http
31
33
  raise IOError, "File does not exist or can't be reached."
32
34
  end
33
35
  end
34
36
 
35
37
  def content
36
- @content.to_s.strip
38
+ self.parse
39
+ @content
37
40
  end
38
41
 
39
42
  def metadata
43
+ self.parse
40
44
  metadata_hash = {}
41
45
 
42
46
  @metadata.names.each do |name|
@@ -47,33 +51,31 @@ module Rika
47
51
  end
48
52
 
49
53
  def media_type
50
- @media_type
54
+ @media_type ||= @tika.detect(input_stream)
51
55
  end
52
56
 
53
57
  def available_metadata
58
+ self.parse
54
59
  @metadata.names.to_a
55
60
  end
56
61
 
57
62
  def metadata_exists?(name)
63
+ self.parse
58
64
  @metadata.get(name) != nil
59
65
  end
60
66
 
61
67
  protected
62
68
 
63
- def parse_file
64
- input_stream = java.io.FileInputStream.new(java.io.File.new(@uri))
65
- @metadata.set("filename", File.basename(@uri))
66
- @media_type = @tika.detect(java.io.FileInputStream.new(java.io.File.new(@uri)))
67
- @content = @tika.parse_to_string(input_stream, @metadata)
69
+ def parse
70
+ @content ||= @tika.parse_to_string(input_stream, @metadata).to_s.strip
68
71
  end
69
72
 
70
- def parse_url
71
- raise IOError, "File does not exist or can't be reached." if not Net::HTTP.get_response(URI(@uri)).is_a?(Net::HTTPSuccess)
72
- url = java.net.URL.new(@uri)
73
- input_stream = url.open_stream
74
- @media_type = @tika.detect(url.open_stream)
75
- @metadata.set("url", @uri)
76
- @content = @tika.parse_to_string(input_stream, @metadata)
73
+ def input_stream
74
+ if @is_file
75
+ FileInputStream.new(java.io.File.new(@uri))
76
+ else
77
+ URL.new(@uri).open_stream
78
+ end
77
79
  end
78
80
  end
79
81
  end
data/lib/rika/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rika
2
- VERSION = "0.9.4"
2
+ VERSION = "0.9.5"
3
3
  end
data/spec/rika_spec.rb CHANGED
@@ -90,10 +90,6 @@ describe Rika::Parser do
90
90
  @txt_parser.metadata["nonsense"].should be_nil
91
91
  end
92
92
 
93
- it "should return metadata from a text file" do
94
- @txt_parser.metadata["filename"].should == "text_file.txt"
95
- end
96
-
97
93
  it "should return metadata from a docx file" do
98
94
  @docx_parser.metadata["Page-Count"].should == "1"
99
95
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rika
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.4
4
+ version: 0.9.5
5
5
  prerelease:
6
6
  platform: java
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-09-25 00:00:00.000000000 Z
12
+ date: 2012-09-29 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec