libera 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cc099d4ec4b7e794ee9fcd8dbd057a1ccd04467c
4
- data.tar.gz: 42e414654cab1579bdaad82e5fe6d70f97b0fd7a
3
+ metadata.gz: a48b962889918bed974055b7c00258ab1d546826
4
+ data.tar.gz: f70e99edf393c7fb1f534ce878f1e79279f6ab56
5
5
  SHA512:
6
- metadata.gz: 5e4e80f572b7e2bc9ac1febedd14693a956d818aa0ec999d486e11f74813df1029a2eaeed846c36d4fb7951d57c2c0981e50b6a9393c1daaa86b3114a504dd3f
7
- data.tar.gz: d7bb316456adfd50f29e19c28eee08051c6bc2439ac41b98a6f550ee37031528ad89724225e20f6b5a424ea12c208dd9f0f9ada4ec035aff6e4999dc13c986c4
6
+ metadata.gz: 00aa349e53c83c32b84c901d48d37ace7472f095fd65c0b63fae43a68c7c700bbc45141e7b3617cc51c54db493a109daf694f15e5ef2b6e2d6e78299fe65ff76
7
+ data.tar.gz: c336a4d25119edcf30c5eba034b7c875656b1d626445ab354e125e289b1e532ea3e202cefc638c76aac77bcfbcca46a52603d6c571ee1d929eab8716b1dc4734
data/README.md CHANGED
@@ -6,6 +6,15 @@ It's purpose is to take PDF files as input, and split them apart into individual
6
6
 
7
7
  ## Installation
8
8
 
9
+ There are some programs that are required for Libera to work;
10
+
11
+ * Tesseract - https://github.com/tesseract-ocr/tesseract
12
+ * ImageMagick - https://www.imagemagick.org
13
+
14
+ Tesseract 3.03 and ImageMagick 6.7.7-10 were the versions used in the development of this gem.
15
+
16
+ Both should be available through package managers such as APT, Yum, Homebrew etc.
17
+
9
18
  Add this line to your application's Gemfile:
10
19
 
11
20
  ```ruby
data/lib/libera/tei.rb CHANGED
@@ -5,7 +5,7 @@ module Libera
5
5
  include OM::XML::Document
6
6
 
7
7
  set_terminology do |t|
8
- t.root(:path => 'tei', :xmlns => 'http://www.tei-c.org/ns/1.0', :namespace_prefix => nil)
8
+ t.root(:path => 'TEI', :xmlns => 'http://www.tei-c.org/ns/1.0', :namespace_prefix => nil)
9
9
  t.text(path: 'text'){
10
10
  t.body(path: 'body'){
11
11
  t.page_break(path: 'pb')
@@ -14,6 +14,14 @@ module Libera
14
14
  }
15
15
  end
16
16
 
17
+ define_template :text do |xml|
18
+ xml.text_
19
+ end
20
+
21
+ define_template :body do |xml|
22
+ xml.body
23
+ end
24
+
17
25
  define_template :page_break do |xml, img_src|
18
26
  xml.pb(:facs => img_src)
19
27
  end
@@ -51,7 +59,33 @@ module Libera
51
59
  return builder.doc
52
60
  end
53
61
 
62
+ def add_text
63
+ begin
64
+ self.template_registry.add_child(self.ng_xml.root, :text)
65
+ rescue NoMethodError
66
+ raise "Unable to add XML node to base template"
67
+ end
68
+ end
69
+
70
+ def add_body
71
+ begin
72
+ self.template_registry.add_child(self.find_by_terms(:text => 0), :body)
73
+ rescue NoMethodError
74
+ raise "Unable to add XML node to base template"
75
+ end
76
+ end
77
+
54
78
  def add_page_break(page_img)
79
+ # any text?
80
+ if self.find_by_terms(:text => 0).blank?
81
+ self.add_text
82
+ end
83
+
84
+ # any body?
85
+ if self.find_by_terms(:text, :body => 0).blank?
86
+ self.add_body
87
+ end
88
+
55
89
  # any anon breaks?
56
90
  ab_count = self.find_by_terms(:text, :body, :anon_block).count
57
91
 
@@ -1,3 +1,3 @@
1
1
  module Libera
2
- VERSION = "1.0.0"
2
+ VERSION = "1.0.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: libera
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Cliff
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-06-06 00:00:00.000000000 Z
11
+ date: 2018-06-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler