libera 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +9 -0
- data/lib/libera/tei.rb +35 -1
- data/lib/libera/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a48b962889918bed974055b7c00258ab1d546826
|
|
4
|
+
data.tar.gz: f70e99edf393c7fb1f534ce878f1e79279f6ab56
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 00aa349e53c83c32b84c901d48d37ace7472f095fd65c0b63fae43a68c7c700bbc45141e7b3617cc51c54db493a109daf694f15e5ef2b6e2d6e78299fe65ff76
|
|
7
|
+
data.tar.gz: c336a4d25119edcf30c5eba034b7c875656b1d626445ab354e125e289b1e532ea3e202cefc638c76aac77bcfbcca46a52603d6c571ee1d929eab8716b1dc4734
|
data/README.md
CHANGED
|
@@ -6,6 +6,15 @@ It's purpose is to take PDF files as input, and split them apart into individual
|
|
|
6
6
|
|
|
7
7
|
## Installation
|
|
8
8
|
|
|
9
|
+
There are some programs that are required for Libera to work;
|
|
10
|
+
|
|
11
|
+
* Tesseract - https://github.com/tesseract-ocr/tesseract
|
|
12
|
+
* ImageMagick - https://www.imagemagick.org
|
|
13
|
+
|
|
14
|
+
Tesseract 3.03 and ImageMagick 6.7.7-10 were the versions used in the development of this gem.
|
|
15
|
+
|
|
16
|
+
Both should be available through package managers such as APT, Yum, Homebrew etc.
|
|
17
|
+
|
|
9
18
|
Add this line to your application's Gemfile:
|
|
10
19
|
|
|
11
20
|
```ruby
|
data/lib/libera/tei.rb
CHANGED
|
@@ -5,7 +5,7 @@ module Libera
|
|
|
5
5
|
include OM::XML::Document
|
|
6
6
|
|
|
7
7
|
set_terminology do |t|
|
|
8
|
-
t.root(:path => '
|
|
8
|
+
t.root(:path => 'TEI', :xmlns => 'http://www.tei-c.org/ns/1.0', :namespace_prefix => nil)
|
|
9
9
|
t.text(path: 'text'){
|
|
10
10
|
t.body(path: 'body'){
|
|
11
11
|
t.page_break(path: 'pb')
|
|
@@ -14,6 +14,14 @@ module Libera
|
|
|
14
14
|
}
|
|
15
15
|
end
|
|
16
16
|
|
|
17
|
+
define_template :text do |xml|
|
|
18
|
+
xml.text_
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
define_template :body do |xml|
|
|
22
|
+
xml.body
|
|
23
|
+
end
|
|
24
|
+
|
|
17
25
|
define_template :page_break do |xml, img_src|
|
|
18
26
|
xml.pb(:facs => img_src)
|
|
19
27
|
end
|
|
@@ -51,7 +59,33 @@ module Libera
|
|
|
51
59
|
return builder.doc
|
|
52
60
|
end
|
|
53
61
|
|
|
62
|
+
def add_text
|
|
63
|
+
begin
|
|
64
|
+
self.template_registry.add_child(self.ng_xml.root, :text)
|
|
65
|
+
rescue NoMethodError
|
|
66
|
+
raise "Unable to add XML node to base template"
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def add_body
|
|
71
|
+
begin
|
|
72
|
+
self.template_registry.add_child(self.find_by_terms(:text => 0), :body)
|
|
73
|
+
rescue NoMethodError
|
|
74
|
+
raise "Unable to add XML node to base template"
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
54
78
|
def add_page_break(page_img)
|
|
79
|
+
# any text?
|
|
80
|
+
if self.find_by_terms(:text => 0).blank?
|
|
81
|
+
self.add_text
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# any body?
|
|
85
|
+
if self.find_by_terms(:text, :body => 0).blank?
|
|
86
|
+
self.add_body
|
|
87
|
+
end
|
|
88
|
+
|
|
55
89
|
# any anon breaks?
|
|
56
90
|
ab_count = self.find_by_terms(:text, :body, :anon_block).count
|
|
57
91
|
|
data/lib/libera/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: libera
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0.
|
|
4
|
+
version: 1.0.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- David Cliff
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2018-06-
|
|
11
|
+
date: 2018-06-08 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|