treat 2.0.6 → 2.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -47,8 +47,8 @@ module Treat::Entities::Entity::Buildable
|
|
47
47
|
elsif file_or_value.is_a?(Hash)
|
48
48
|
from_db(file_or_value)
|
49
49
|
elsif self == Treat::Entities::Document ||
|
50
|
-
(fv.index('yml') || fv.index('yaml') ||
|
51
|
-
fv.index('xml')
|
50
|
+
(fv.index('.yml') || fv.index('.yaml') ||
|
51
|
+
fv.index('.xml'))
|
52
52
|
if fv =~ UriRegexp
|
53
53
|
from_url(fv)
|
54
54
|
else
|
data/lib/treat/version.rb
CHANGED
@@ -7,8 +7,8 @@
|
|
7
7
|
# statistical natural language modeling, and multi-
|
8
8
|
# lingual capabilities."
|
9
9
|
#
|
10
|
-
# Original paper: Google Ocropus Engine: Breuel,
|
11
|
-
# Thomas M. The Ocropus Open Source OCR System.
|
10
|
+
# Original paper: Google Ocropus Engine: Breuel,
|
11
|
+
# Thomas M. The Ocropus Open Source OCR System.
|
12
12
|
# DFKI and U. Kaiserslautern, Germany.
|
13
13
|
class Treat::Workers::Formatters::Readers::Image
|
14
14
|
|
@@ -18,27 +18,27 @@ class Treat::Workers::Formatters::Readers::Image
|
|
18
18
|
#
|
19
19
|
# - (Boolean) :silent => whether to silence Ocropus.
|
20
20
|
def self.read(document, options = {})
|
21
|
-
|
21
|
+
|
22
22
|
read = lambda do |doc|
|
23
23
|
self.create_temp_dir do |tmp|
|
24
|
-
`ocropus
|
25
|
-
`ocropus
|
26
|
-
`ocropus
|
27
|
-
`ocropus
|
28
|
-
doc.set :file, "#{tmp}/
|
24
|
+
`ocropus-nlbin -o #{tmp}/out #{doc.file}`
|
25
|
+
`ocropus-gpageseg #{tmp}/out/????.bin.png --minscale 2`
|
26
|
+
`ocropus-rpred #{tmp}/out/????/??????.bin.png`
|
27
|
+
`ocropus-hocr #{tmp}/out/????.bin.png -o #{tmp}/book.html`
|
28
|
+
doc.set :file, "#{tmp}/book.html"
|
29
29
|
doc.set :format, :html
|
30
|
+
|
30
31
|
doc = doc.read(:html)
|
31
32
|
end
|
32
33
|
end
|
33
|
-
|
34
|
-
Treat.core.verbosity.silence ? silence_stdout {
|
34
|
+
|
35
|
+
Treat.core.verbosity.silence ? silence_stdout {
|
35
36
|
read.call(document) } : read.call(document)
|
36
|
-
|
37
|
+
|
37
38
|
document
|
38
|
-
|
39
39
|
end
|
40
|
-
|
41
|
-
# Create a
|
40
|
+
|
41
|
+
# Create a dir that gets deleted after execution of the block.
|
42
42
|
def self.create_temp_dir(&block)
|
43
43
|
if not FileTest.directory?(Treat.paths.tmp)
|
44
44
|
FileUtils.mkdir(Treat.paths.tmp)
|
@@ -50,5 +50,5 @@ class Treat::Workers::Formatters::Readers::Image
|
|
50
50
|
ensure
|
51
51
|
FileUtils.rm_rf(dname)
|
52
52
|
end
|
53
|
-
|
53
|
+
|
54
54
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: treat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-06-
|
12
|
+
date: 2013-06-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: schiphol
|