rika-stevedore 1.7.4-java → 1.9.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4beab0e9663dba1c8b2de4cec723ca7e00714b07
|
4
|
+
data.tar.gz: 55df5eaaf20cbfaea198cdd9d4d71b244408b3fd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c66312fb3a670fac33751b29a2d389f73908810384c1d84d42c4a56202e627507318cc5c0c3d2969492732bded3691f07b6681587d9ba5d3f8e25567999feae5
|
7
|
+
data.tar.gz: 1741ce3aa81b696036c6110e0ecac7302cf1db7ae905b6ee3f43d094ed62390a26e2e1010ca60b224ba04ee6c232f65370eabd46f360d0887bf22618f92f5dd2
|
data/lib/rika.rb
CHANGED
@@ -21,6 +21,15 @@ module Rika
|
|
21
21
|
import org.apache.tika.detect.DefaultDetector
|
22
22
|
import java.io.FileInputStream
|
23
23
|
import java.net.URL
|
24
|
+
import org.apache.tika.sax.BodyContentHandler;
|
25
|
+
import org.apache.tika.parser.AutoDetectParser;
|
26
|
+
import org.apache.tika.parser.ParseContext;
|
27
|
+
import org.apache.tika.parser.html.BoilerpipeContentHandler;
|
28
|
+
|
29
|
+
import org.apache.tika.language.translate.GoogleTranslator
|
30
|
+
|
31
|
+
|
32
|
+
|
24
33
|
|
25
34
|
def self.parse_content_and_metadata(file_location, max_content_length = -1)
|
26
35
|
parser = Parser.new(file_location, max_content_length)
|
@@ -32,6 +41,11 @@ module Rika
|
|
32
41
|
parser.content
|
33
42
|
end
|
34
43
|
|
44
|
+
def self.parse_main_content(file_location, max_content_length = -1)
|
45
|
+
parser = Parser.new(file_location, max_content_length)
|
46
|
+
parser.main_content
|
47
|
+
end
|
48
|
+
|
35
49
|
def self.parse_metadata(file_location)
|
36
50
|
parser = Parser.new(file_location, 0)
|
37
51
|
parser.metadata
|
@@ -54,13 +68,18 @@ module Rika
|
|
54
68
|
end
|
55
69
|
|
56
70
|
def content
|
57
|
-
self.parse
|
71
|
+
self.parse!
|
58
72
|
@content
|
59
73
|
end
|
60
74
|
|
75
|
+
def main_content
|
76
|
+
self.parse_main_content!
|
77
|
+
@main_content
|
78
|
+
end
|
79
|
+
|
61
80
|
def metadata
|
62
81
|
unless @metadata_ruby
|
63
|
-
self.parse
|
82
|
+
self.parse!
|
64
83
|
@metadata_ruby = {}
|
65
84
|
|
66
85
|
@metadata_java.names.each do |name|
|
@@ -104,10 +123,18 @@ module Rika
|
|
104
123
|
|
105
124
|
protected
|
106
125
|
|
107
|
-
def parse
|
126
|
+
def parse!
|
108
127
|
@content ||= @tika.parse_to_string(input_stream, @metadata_java).to_s.strip
|
109
128
|
end
|
110
129
|
|
130
|
+
def parse_main_content!
|
131
|
+
text_handler = BodyContentHandler.new
|
132
|
+
auto_detect_parser = AutoDetectParser.new
|
133
|
+
context = ParseContext.new
|
134
|
+
auto_detect_parser.parse(input_stream, BoilerpipeContentHandler.new(text_handler), @metadata_java, context);
|
135
|
+
@main_content = text_handler.to_s
|
136
|
+
end
|
137
|
+
|
111
138
|
def get_input_type
|
112
139
|
if File.exists?(@uri) && File.directory?(@uri) == false
|
113
140
|
:file
|
@@ -126,4 +153,19 @@ module Rika
|
|
126
153
|
end
|
127
154
|
end
|
128
155
|
end
|
156
|
+
|
157
|
+
class Translator
|
158
|
+
def initialize
|
159
|
+
@translator = GoogleTranslator.new
|
160
|
+
end
|
161
|
+
|
162
|
+
def translate(inputtext, source='ru', target='en')
|
163
|
+
# begin
|
164
|
+
puts "translating #{inputtext.size} chars to #{target} at a cost of $#{(inputtext.size / 50000.0).round(2)}"
|
165
|
+
return @translator.translate(inputtext, source, target);
|
166
|
+
# rescue StandardError
|
167
|
+
# return "Error while translating.";
|
168
|
+
# end
|
169
|
+
end
|
170
|
+
end
|
129
171
|
end
|
data/lib/rika/version.rb
CHANGED
Binary file
|
Binary file
|
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rika-stevedore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.9.0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Richard Nyström
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-
|
12
|
+
date: 2017-03-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -120,7 +120,9 @@ files:
|
|
120
120
|
- target/dependency/jVinci-2.4.0.jar
|
121
121
|
- target/dependency/jackcess-2.1.4.jar
|
122
122
|
- target/dependency/jackcess-encrypt-2.1.1.jar
|
123
|
+
- target/dependency/jackson-annotations-2.8.1.jar
|
123
124
|
- target/dependency/jackson-core-2.8.1.jar
|
125
|
+
- target/dependency/jackson-databind-2.8.1.jar
|
124
126
|
- target/dependency/jai-imageio-core-1.3.1.jar
|
125
127
|
- target/dependency/jakarta-regexp-1.4.jar
|
126
128
|
- target/dependency/java-libpst-0.8.1.jar
|
@@ -195,6 +197,7 @@ files:
|
|
195
197
|
- target/dependency/tagsoup-1.2.1.jar
|
196
198
|
- target/dependency/tika-core-1.15-SNAPSHOT.jar
|
197
199
|
- target/dependency/tika-parsers-1.15-SNAPSHOT.jar
|
200
|
+
- target/dependency/tika-translate-1.15-SNAPSHOT.jar
|
198
201
|
- target/dependency/udunits-4.5.5.jar
|
199
202
|
- target/dependency/uimafit-core-2.1.0.jar
|
200
203
|
- target/dependency/uimaj-adapter-vinci-2.4.0.jar
|