rika-stevedore 1.7.4-java → 1.9.0-java
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4beab0e9663dba1c8b2de4cec723ca7e00714b07
|
4
|
+
data.tar.gz: 55df5eaaf20cbfaea198cdd9d4d71b244408b3fd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c66312fb3a670fac33751b29a2d389f73908810384c1d84d42c4a56202e627507318cc5c0c3d2969492732bded3691f07b6681587d9ba5d3f8e25567999feae5
|
7
|
+
data.tar.gz: 1741ce3aa81b696036c6110e0ecac7302cf1db7ae905b6ee3f43d094ed62390a26e2e1010ca60b224ba04ee6c232f65370eabd46f360d0887bf22618f92f5dd2
|
data/lib/rika.rb
CHANGED
@@ -21,6 +21,15 @@ module Rika
|
|
21
21
|
import org.apache.tika.detect.DefaultDetector
|
22
22
|
import java.io.FileInputStream
|
23
23
|
import java.net.URL
|
24
|
+
import org.apache.tika.sax.BodyContentHandler;
|
25
|
+
import org.apache.tika.parser.AutoDetectParser;
|
26
|
+
import org.apache.tika.parser.ParseContext;
|
27
|
+
import org.apache.tika.parser.html.BoilerpipeContentHandler;
|
28
|
+
|
29
|
+
import org.apache.tika.language.translate.GoogleTranslator
|
30
|
+
|
31
|
+
|
32
|
+
|
24
33
|
|
25
34
|
def self.parse_content_and_metadata(file_location, max_content_length = -1)
|
26
35
|
parser = Parser.new(file_location, max_content_length)
|
@@ -32,6 +41,11 @@ module Rika
|
|
32
41
|
parser.content
|
33
42
|
end
|
34
43
|
|
44
|
+
def self.parse_main_content(file_location, max_content_length = -1)
|
45
|
+
parser = Parser.new(file_location, max_content_length)
|
46
|
+
parser.main_content
|
47
|
+
end
|
48
|
+
|
35
49
|
def self.parse_metadata(file_location)
|
36
50
|
parser = Parser.new(file_location, 0)
|
37
51
|
parser.metadata
|
@@ -54,13 +68,18 @@ module Rika
|
|
54
68
|
end
|
55
69
|
|
56
70
|
def content
|
57
|
-
self.parse
|
71
|
+
self.parse!
|
58
72
|
@content
|
59
73
|
end
|
60
74
|
|
75
|
+
def main_content
|
76
|
+
self.parse_main_content!
|
77
|
+
@main_content
|
78
|
+
end
|
79
|
+
|
61
80
|
def metadata
|
62
81
|
unless @metadata_ruby
|
63
|
-
self.parse
|
82
|
+
self.parse!
|
64
83
|
@metadata_ruby = {}
|
65
84
|
|
66
85
|
@metadata_java.names.each do |name|
|
@@ -104,10 +123,18 @@ module Rika
|
|
104
123
|
|
105
124
|
protected
|
106
125
|
|
107
|
-
def parse
|
126
|
+
def parse!
|
108
127
|
@content ||= @tika.parse_to_string(input_stream, @metadata_java).to_s.strip
|
109
128
|
end
|
110
129
|
|
130
|
+
def parse_main_content!
|
131
|
+
text_handler = BodyContentHandler.new
|
132
|
+
auto_detect_parser = AutoDetectParser.new
|
133
|
+
context = ParseContext.new
|
134
|
+
auto_detect_parser.parse(input_stream, BoilerpipeContentHandler.new(text_handler), @metadata_java, context);
|
135
|
+
@main_content = text_handler.to_s
|
136
|
+
end
|
137
|
+
|
111
138
|
def get_input_type
|
112
139
|
if File.exists?(@uri) && File.directory?(@uri) == false
|
113
140
|
:file
|
@@ -126,4 +153,19 @@ module Rika
|
|
126
153
|
end
|
127
154
|
end
|
128
155
|
end
|
156
|
+
|
157
|
+
class Translator
|
158
|
+
def initialize
|
159
|
+
@translator = GoogleTranslator.new
|
160
|
+
end
|
161
|
+
|
162
|
+
def translate(inputtext, source='ru', target='en')
|
163
|
+
# begin
|
164
|
+
puts "translating #{inputtext.size} chars to #{target} at a cost of $#{(inputtext.size / 50000.0).round(2)}"
|
165
|
+
return @translator.translate(inputtext, source, target);
|
166
|
+
# rescue StandardError
|
167
|
+
# return "Error while translating.";
|
168
|
+
# end
|
169
|
+
end
|
170
|
+
end
|
129
171
|
end
|
data/lib/rika/version.rb
CHANGED
Binary file
|
Binary file
|
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rika-stevedore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.9.0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Richard Nyström
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-
|
12
|
+
date: 2017-03-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -120,7 +120,9 @@ files:
|
|
120
120
|
- target/dependency/jVinci-2.4.0.jar
|
121
121
|
- target/dependency/jackcess-2.1.4.jar
|
122
122
|
- target/dependency/jackcess-encrypt-2.1.1.jar
|
123
|
+
- target/dependency/jackson-annotations-2.8.1.jar
|
123
124
|
- target/dependency/jackson-core-2.8.1.jar
|
125
|
+
- target/dependency/jackson-databind-2.8.1.jar
|
124
126
|
- target/dependency/jai-imageio-core-1.3.1.jar
|
125
127
|
- target/dependency/jakarta-regexp-1.4.jar
|
126
128
|
- target/dependency/java-libpst-0.8.1.jar
|
@@ -195,6 +197,7 @@ files:
|
|
195
197
|
- target/dependency/tagsoup-1.2.1.jar
|
196
198
|
- target/dependency/tika-core-1.15-SNAPSHOT.jar
|
197
199
|
- target/dependency/tika-parsers-1.15-SNAPSHOT.jar
|
200
|
+
- target/dependency/tika-translate-1.15-SNAPSHOT.jar
|
198
201
|
- target/dependency/udunits-4.5.5.jar
|
199
202
|
- target/dependency/uimafit-core-2.1.0.jar
|
200
203
|
- target/dependency/uimaj-adapter-vinci-2.4.0.jar
|