rika 0.9.4-java → 0.9.5-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rika.rb +24 -22
- data/lib/rika/version.rb +1 -1
- data/spec/rika_spec.rb +0 -4
- metadata +2 -2
data/lib/rika.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
raise "You need to run JRuby to use Rika" unless RUBY_PLATFORM =~ /java/
|
2
4
|
|
3
5
|
require "rika/version"
|
@@ -13,30 +15,32 @@ end
|
|
13
15
|
module Rika
|
14
16
|
import org.apache.tika.metadata.Metadata
|
15
17
|
import org.apache.tika.Tika
|
18
|
+
import java.io.FileInputStream
|
19
|
+
import java.net.URL
|
16
20
|
|
17
21
|
class Parser
|
18
22
|
|
19
|
-
def initialize(
|
20
|
-
|
21
|
-
@uri = uri
|
23
|
+
def initialize(file_location, max_content_length = -1)
|
24
|
+
@uri = file_location
|
22
25
|
@tika = Tika.new
|
23
26
|
@tika.set_max_string_length(max_content_length)
|
24
27
|
@metadata = Metadata.new
|
28
|
+
|
29
|
+
@is_file = File.exists?(@uri)
|
30
|
+
is_http = URI(@uri).scheme == "http" && Net::HTTP.get_response(URI(@uri)).is_a?(Net::HTTPSuccess) if !@is_file
|
25
31
|
|
26
|
-
if
|
27
|
-
self.parse_file
|
28
|
-
elsif p.parse(@uri).scheme == 'http' || p.parse(@uri).scheme == 'https'
|
29
|
-
self.parse_url
|
30
|
-
else
|
32
|
+
if !@is_file && !is_http
|
31
33
|
raise IOError, "File does not exist or can't be reached."
|
32
34
|
end
|
33
35
|
end
|
34
36
|
|
35
37
|
def content
|
36
|
-
|
38
|
+
self.parse
|
39
|
+
@content
|
37
40
|
end
|
38
41
|
|
39
42
|
def metadata
|
43
|
+
self.parse
|
40
44
|
metadata_hash = {}
|
41
45
|
|
42
46
|
@metadata.names.each do |name|
|
@@ -47,33 +51,31 @@ module Rika
|
|
47
51
|
end
|
48
52
|
|
49
53
|
def media_type
|
50
|
-
@media_type
|
54
|
+
@media_type ||= @tika.detect(input_stream)
|
51
55
|
end
|
52
56
|
|
53
57
|
def available_metadata
|
58
|
+
self.parse
|
54
59
|
@metadata.names.to_a
|
55
60
|
end
|
56
61
|
|
57
62
|
def metadata_exists?(name)
|
63
|
+
self.parse
|
58
64
|
@metadata.get(name) != nil
|
59
65
|
end
|
60
66
|
|
61
67
|
protected
|
62
68
|
|
63
|
-
def
|
64
|
-
|
65
|
-
@metadata.set("filename", File.basename(@uri))
|
66
|
-
@media_type = @tika.detect(java.io.FileInputStream.new(java.io.File.new(@uri)))
|
67
|
-
@content = @tika.parse_to_string(input_stream, @metadata)
|
69
|
+
def parse
|
70
|
+
@content ||= @tika.parse_to_string(input_stream, @metadata).to_s.strip
|
68
71
|
end
|
69
72
|
|
70
|
-
def
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
@content = @tika.parse_to_string(input_stream, @metadata)
|
73
|
+
def input_stream
|
74
|
+
if @is_file
|
75
|
+
FileInputStream.new(java.io.File.new(@uri))
|
76
|
+
else
|
77
|
+
URL.new(@uri).open_stream
|
78
|
+
end
|
77
79
|
end
|
78
80
|
end
|
79
81
|
end
|
data/lib/rika/version.rb
CHANGED
data/spec/rika_spec.rb
CHANGED
@@ -90,10 +90,6 @@ describe Rika::Parser do
|
|
90
90
|
@txt_parser.metadata["nonsense"].should be_nil
|
91
91
|
end
|
92
92
|
|
93
|
-
it "should return metadata from a text file" do
|
94
|
-
@txt_parser.metadata["filename"].should == "text_file.txt"
|
95
|
-
end
|
96
|
-
|
97
93
|
it "should return metadata from a docx file" do
|
98
94
|
@docx_parser.metadata["Page-Count"].should == "1"
|
99
95
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rika
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.5
|
5
5
|
prerelease:
|
6
6
|
platform: java
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-09-
|
12
|
+
date: 2012-09-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|