rika-stevedore 1.7.4-java → 1.9.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 83d7d6e8e35afe690ea816221fdbe8dde41f7cc0
4
- data.tar.gz: ff47f562f035a944c32eb56998b3412133111c23
3
+ metadata.gz: 4beab0e9663dba1c8b2de4cec723ca7e00714b07
4
+ data.tar.gz: 55df5eaaf20cbfaea198cdd9d4d71b244408b3fd
5
5
  SHA512:
6
- metadata.gz: 55cdd0d5b8c24f10ba759a6c78c92d84a018a8345c6c279af1c25bca5653c5a362a731a491c6bb297a57220438e4561f75acb96492568b6e1656b27a18b10a41
7
- data.tar.gz: 63de6db66b835f9a71fbc543aaf8d7f6ad590feb72e3fc79c253c3b063adbfbd66632ec6d36389d9ba6f3c36dfbb99adbfc8e49e6e9df9ed0fc97c20e637fdfe
6
+ metadata.gz: c66312fb3a670fac33751b29a2d389f73908810384c1d84d42c4a56202e627507318cc5c0c3d2969492732bded3691f07b6681587d9ba5d3f8e25567999feae5
7
+ data.tar.gz: 1741ce3aa81b696036c6110e0ecac7302cf1db7ae905b6ee3f43d094ed62390a26e2e1010ca60b224ba04ee6c232f65370eabd46f360d0887bf22618f92f5dd2
data/lib/rika.rb CHANGED
@@ -21,6 +21,15 @@ module Rika
21
21
  import org.apache.tika.detect.DefaultDetector
22
22
  import java.io.FileInputStream
23
23
  import java.net.URL
24
+ import org.apache.tika.sax.BodyContentHandler;
25
+ import org.apache.tika.parser.AutoDetectParser;
26
+ import org.apache.tika.parser.ParseContext;
27
+ import org.apache.tika.parser.html.BoilerpipeContentHandler;
28
+
29
+ import org.apache.tika.language.translate.GoogleTranslator
30
+
31
+
32
+
24
33
 
25
34
  def self.parse_content_and_metadata(file_location, max_content_length = -1)
26
35
  parser = Parser.new(file_location, max_content_length)
@@ -32,6 +41,11 @@ module Rika
32
41
  parser.content
33
42
  end
34
43
 
44
+ def self.parse_main_content(file_location, max_content_length = -1)
45
+ parser = Parser.new(file_location, max_content_length)
46
+ parser.main_content
47
+ end
48
+
35
49
  def self.parse_metadata(file_location)
36
50
  parser = Parser.new(file_location, 0)
37
51
  parser.metadata
@@ -54,13 +68,18 @@ module Rika
54
68
  end
55
69
 
56
70
  def content
57
- self.parse
71
+ self.parse!
58
72
  @content
59
73
  end
60
74
 
75
+ def main_content
76
+ self.parse_main_content!
77
+ @main_content
78
+ end
79
+
61
80
  def metadata
62
81
  unless @metadata_ruby
63
- self.parse
82
+ self.parse!
64
83
  @metadata_ruby = {}
65
84
 
66
85
  @metadata_java.names.each do |name|
@@ -104,10 +123,18 @@ module Rika
104
123
 
105
124
  protected
106
125
 
107
- def parse
126
+ def parse!
108
127
  @content ||= @tika.parse_to_string(input_stream, @metadata_java).to_s.strip
109
128
  end
110
129
 
130
+ def parse_main_content!
131
+ text_handler = BodyContentHandler.new
132
+ auto_detect_parser = AutoDetectParser.new
133
+ context = ParseContext.new
134
+ auto_detect_parser.parse(input_stream, BoilerpipeContentHandler.new(text_handler), @metadata_java, context);
135
+ @main_content = text_handler.to_s
136
+ end
137
+
111
138
  def get_input_type
112
139
  if File.exists?(@uri) && File.directory?(@uri) == false
113
140
  :file
@@ -126,4 +153,19 @@ module Rika
126
153
  end
127
154
  end
128
155
  end
156
+
157
+ class Translator
158
+ def initialize
159
+ @translator = GoogleTranslator.new
160
+ end
161
+
162
+ def translate(inputtext, source='ru', target='en')
163
+ # begin
164
+ puts "translating #{inputtext.size} chars to #{target} at a cost of $#{(inputtext.size / 50000.0).round(2)}"
165
+ return @translator.translate(inputtext, source, target);
166
+ # rescue StandardError
167
+ # return "Error while translating.";
168
+ # end
169
+ end
170
+ end
129
171
  end
data/lib/rika/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Rika
2
- VERSION = "1.7.4"
2
+ VERSION = "1.9.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rika-stevedore
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.4
4
+ version: 1.9.0
5
5
  platform: java
6
6
  authors:
7
7
  - Richard Nyström
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-02-16 00:00:00.000000000 Z
12
+ date: 2017-03-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  requirement: !ruby/object:Gem::Requirement
@@ -120,7 +120,9 @@ files:
120
120
  - target/dependency/jVinci-2.4.0.jar
121
121
  - target/dependency/jackcess-2.1.4.jar
122
122
  - target/dependency/jackcess-encrypt-2.1.1.jar
123
+ - target/dependency/jackson-annotations-2.8.1.jar
123
124
  - target/dependency/jackson-core-2.8.1.jar
125
+ - target/dependency/jackson-databind-2.8.1.jar
124
126
  - target/dependency/jai-imageio-core-1.3.1.jar
125
127
  - target/dependency/jakarta-regexp-1.4.jar
126
128
  - target/dependency/java-libpst-0.8.1.jar
@@ -195,6 +197,7 @@ files:
195
197
  - target/dependency/tagsoup-1.2.1.jar
196
198
  - target/dependency/tika-core-1.15-SNAPSHOT.jar
197
199
  - target/dependency/tika-parsers-1.15-SNAPSHOT.jar
200
+ - target/dependency/tika-translate-1.15-SNAPSHOT.jar
198
201
  - target/dependency/udunits-4.5.5.jar
199
202
  - target/dependency/uimafit-core-2.1.0.jar
200
203
  - target/dependency/uimaj-adapter-vinci-2.4.0.jar