rika-stevedore 1.7.4-java → 1.9.0-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 83d7d6e8e35afe690ea816221fdbe8dde41f7cc0
4
- data.tar.gz: ff47f562f035a944c32eb56998b3412133111c23
3
+ metadata.gz: 4beab0e9663dba1c8b2de4cec723ca7e00714b07
4
+ data.tar.gz: 55df5eaaf20cbfaea198cdd9d4d71b244408b3fd
5
5
  SHA512:
6
- metadata.gz: 55cdd0d5b8c24f10ba759a6c78c92d84a018a8345c6c279af1c25bca5653c5a362a731a491c6bb297a57220438e4561f75acb96492568b6e1656b27a18b10a41
7
- data.tar.gz: 63de6db66b835f9a71fbc543aaf8d7f6ad590feb72e3fc79c253c3b063adbfbd66632ec6d36389d9ba6f3c36dfbb99adbfc8e49e6e9df9ed0fc97c20e637fdfe
6
+ metadata.gz: c66312fb3a670fac33751b29a2d389f73908810384c1d84d42c4a56202e627507318cc5c0c3d2969492732bded3691f07b6681587d9ba5d3f8e25567999feae5
7
+ data.tar.gz: 1741ce3aa81b696036c6110e0ecac7302cf1db7ae905b6ee3f43d094ed62390a26e2e1010ca60b224ba04ee6c232f65370eabd46f360d0887bf22618f92f5dd2
data/lib/rika.rb CHANGED
@@ -21,6 +21,15 @@ module Rika
21
21
  import org.apache.tika.detect.DefaultDetector
22
22
  import java.io.FileInputStream
23
23
  import java.net.URL
24
+ import org.apache.tika.sax.BodyContentHandler;
25
+ import org.apache.tika.parser.AutoDetectParser;
26
+ import org.apache.tika.parser.ParseContext;
27
+ import org.apache.tika.parser.html.BoilerpipeContentHandler;
28
+
29
+ import org.apache.tika.language.translate.GoogleTranslator
30
+
31
+
32
+
24
33
 
25
34
  def self.parse_content_and_metadata(file_location, max_content_length = -1)
26
35
  parser = Parser.new(file_location, max_content_length)
@@ -32,6 +41,11 @@ module Rika
32
41
  parser.content
33
42
  end
34
43
 
44
+ def self.parse_main_content(file_location, max_content_length = -1)
45
+ parser = Parser.new(file_location, max_content_length)
46
+ parser.main_content
47
+ end
48
+
35
49
  def self.parse_metadata(file_location)
36
50
  parser = Parser.new(file_location, 0)
37
51
  parser.metadata
@@ -54,13 +68,18 @@ module Rika
54
68
  end
55
69
 
56
70
  def content
57
- self.parse
71
+ self.parse!
58
72
  @content
59
73
  end
60
74
 
75
+ def main_content
76
+ self.parse_main_content!
77
+ @main_content
78
+ end
79
+
61
80
  def metadata
62
81
  unless @metadata_ruby
63
- self.parse
82
+ self.parse!
64
83
  @metadata_ruby = {}
65
84
 
66
85
  @metadata_java.names.each do |name|
@@ -104,10 +123,18 @@ module Rika
104
123
 
105
124
  protected
106
125
 
107
- def parse
126
+ def parse!
108
127
  @content ||= @tika.parse_to_string(input_stream, @metadata_java).to_s.strip
109
128
  end
110
129
 
130
+ def parse_main_content!
131
+ text_handler = BodyContentHandler.new
132
+ auto_detect_parser = AutoDetectParser.new
133
+ context = ParseContext.new
134
+ auto_detect_parser.parse(input_stream, BoilerpipeContentHandler.new(text_handler), @metadata_java, context);
135
+ @main_content = text_handler.to_s
136
+ end
137
+
111
138
  def get_input_type
112
139
  if File.exists?(@uri) && File.directory?(@uri) == false
113
140
  :file
@@ -126,4 +153,19 @@ module Rika
126
153
  end
127
154
  end
128
155
  end
156
+
157
+ class Translator
158
+ def initialize
159
+ @translator = GoogleTranslator.new
160
+ end
161
+
162
+ def translate(inputtext, source='ru', target='en')
163
+ # begin
164
+ puts "translating #{inputtext.size} chars to #{target} at a cost of $#{(inputtext.size / 50000.0).round(2)}"
165
+ return @translator.translate(inputtext, source, target);
166
+ # rescue StandardError
167
+ # return "Error while translating.";
168
+ # end
169
+ end
170
+ end
129
171
  end
data/lib/rika/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rika
2
- VERSION = "1.7.4"
2
+ VERSION = "1.9.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rika-stevedore
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.4
4
+ version: 1.9.0
5
5
  platform: java
6
6
  authors:
7
7
  - Richard Nyström
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-02-16 00:00:00.000000000 Z
12
+ date: 2017-03-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement
@@ -120,7 +120,9 @@ files:
120
120
  - target/dependency/jVinci-2.4.0.jar
121
121
  - target/dependency/jackcess-2.1.4.jar
122
122
  - target/dependency/jackcess-encrypt-2.1.1.jar
123
+ - target/dependency/jackson-annotations-2.8.1.jar
123
124
  - target/dependency/jackson-core-2.8.1.jar
125
+ - target/dependency/jackson-databind-2.8.1.jar
124
126
  - target/dependency/jai-imageio-core-1.3.1.jar
125
127
  - target/dependency/jakarta-regexp-1.4.jar
126
128
  - target/dependency/java-libpst-0.8.1.jar
@@ -195,6 +197,7 @@ files:
195
197
  - target/dependency/tagsoup-1.2.1.jar
196
198
  - target/dependency/tika-core-1.15-SNAPSHOT.jar
197
199
  - target/dependency/tika-parsers-1.15-SNAPSHOT.jar
200
+ - target/dependency/tika-translate-1.15-SNAPSHOT.jar
198
201
  - target/dependency/udunits-4.5.5.jar
199
202
  - target/dependency/uimafit-core-2.1.0.jar
200
203
  - target/dependency/uimaj-adapter-vinci-2.4.0.jar