treat 2.0.6 → 2.0.7
Sign up to get free protection for your applications and to get access to all the features.
@@ -47,8 +47,8 @@ module Treat::Entities::Entity::Buildable
|
|
47
47
|
elsif file_or_value.is_a?(Hash)
|
48
48
|
from_db(file_or_value)
|
49
49
|
elsif self == Treat::Entities::Document ||
|
50
|
-
(fv.index('yml') || fv.index('yaml') ||
|
51
|
-
fv.index('xml')
|
50
|
+
(fv.index('.yml') || fv.index('.yaml') ||
|
51
|
+
fv.index('.xml'))
|
52
52
|
if fv =~ UriRegexp
|
53
53
|
from_url(fv)
|
54
54
|
else
|
data/lib/treat/version.rb
CHANGED
@@ -7,8 +7,8 @@
|
|
7
7
|
# statistical natural language modeling, and multi-
|
8
8
|
# lingual capabilities."
|
9
9
|
#
|
10
|
-
# Original paper: Google Ocropus Engine: Breuel,
|
11
|
-
# Thomas M. The Ocropus Open Source OCR System.
|
10
|
+
# Original paper: Google Ocropus Engine: Breuel,
|
11
|
+
# Thomas M. The Ocropus Open Source OCR System.
|
12
12
|
# DFKI and U. Kaiserslautern, Germany.
|
13
13
|
class Treat::Workers::Formatters::Readers::Image
|
14
14
|
|
@@ -18,27 +18,27 @@ class Treat::Workers::Formatters::Readers::Image
|
|
18
18
|
#
|
19
19
|
# - (Boolean) :silent => whether to silence Ocropus.
|
20
20
|
def self.read(document, options = {})
|
21
|
-
|
21
|
+
|
22
22
|
read = lambda do |doc|
|
23
23
|
self.create_temp_dir do |tmp|
|
24
|
-
`ocropus
|
25
|
-
`ocropus
|
26
|
-
`ocropus
|
27
|
-
`ocropus
|
28
|
-
doc.set :file, "#{tmp}/
|
24
|
+
`ocropus-nlbin -o #{tmp}/out #{doc.file}`
|
25
|
+
`ocropus-gpageseg #{tmp}/out/????.bin.png --minscale 2`
|
26
|
+
`ocropus-rpred #{tmp}/out/????/??????.bin.png`
|
27
|
+
`ocropus-hocr #{tmp}/out/????.bin.png -o #{tmp}/book.html`
|
28
|
+
doc.set :file, "#{tmp}/book.html"
|
29
29
|
doc.set :format, :html
|
30
|
+
|
30
31
|
doc = doc.read(:html)
|
31
32
|
end
|
32
33
|
end
|
33
|
-
|
34
|
-
Treat.core.verbosity.silence ? silence_stdout {
|
34
|
+
|
35
|
+
Treat.core.verbosity.silence ? silence_stdout {
|
35
36
|
read.call(document) } : read.call(document)
|
36
|
-
|
37
|
+
|
37
38
|
document
|
38
|
-
|
39
39
|
end
|
40
|
-
|
41
|
-
# Create a
|
40
|
+
|
41
|
+
# Create a dir that gets deleted after execution of the block.
|
42
42
|
def self.create_temp_dir(&block)
|
43
43
|
if not FileTest.directory?(Treat.paths.tmp)
|
44
44
|
FileUtils.mkdir(Treat.paths.tmp)
|
@@ -50,5 +50,5 @@ class Treat::Workers::Formatters::Readers::Image
|
|
50
50
|
ensure
|
51
51
|
FileUtils.rm_rf(dname)
|
52
52
|
end
|
53
|
-
|
53
|
+
|
54
54
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: treat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-06-
|
12
|
+
date: 2013-06-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: schiphol
|