libera 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cc099d4ec4b7e794ee9fcd8dbd057a1ccd04467c
4
- data.tar.gz: 42e414654cab1579bdaad82e5fe6d70f97b0fd7a
3
+ metadata.gz: a48b962889918bed974055b7c00258ab1d546826
4
+ data.tar.gz: f70e99edf393c7fb1f534ce878f1e79279f6ab56
5
5
  SHA512:
6
- metadata.gz: 5e4e80f572b7e2bc9ac1febedd14693a956d818aa0ec999d486e11f74813df1029a2eaeed846c36d4fb7951d57c2c0981e50b6a9393c1daaa86b3114a504dd3f
7
- data.tar.gz: d7bb316456adfd50f29e19c28eee08051c6bc2439ac41b98a6f550ee37031528ad89724225e20f6b5a424ea12c208dd9f0f9ada4ec035aff6e4999dc13c986c4
6
+ metadata.gz: 00aa349e53c83c32b84c901d48d37ace7472f095fd65c0b63fae43a68c7c700bbc45141e7b3617cc51c54db493a109daf694f15e5ef2b6e2d6e78299fe65ff76
7
+ data.tar.gz: c336a4d25119edcf30c5eba034b7c875656b1d626445ab354e125e289b1e532ea3e202cefc638c76aac77bcfbcca46a52603d6c571ee1d929eab8716b1dc4734
data/README.md CHANGED
@@ -6,6 +6,15 @@ It's purpose is to take PDF files as input, and split them apart into individual
6
6
 
7
7
  ## Installation
8
8
 
9
+ There are some programs that are required for Libera to work;
10
+
11
+ * Tesseract - https://github.com/tesseract-ocr/tesseract
12
+ * ImageMagick - https://www.imagemagick.org
13
+
14
+ Tesseract 3.03 and ImageMagick 6.7.7-10 were the versions used in the development of this gem.
15
+
16
+ Both should be available through package managers such as APT, Yum, Homebrew etc.
17
+
9
18
  Add this line to your application's Gemfile:
10
19
 
11
20
  ```ruby
data/lib/libera/tei.rb CHANGED
@@ -5,7 +5,7 @@ module Libera
5
5
  include OM::XML::Document
6
6
 
7
7
  set_terminology do |t|
8
- t.root(:path => 'tei', :xmlns => 'http://www.tei-c.org/ns/1.0', :namespace_prefix => nil)
8
+ t.root(:path => 'TEI', :xmlns => 'http://www.tei-c.org/ns/1.0', :namespace_prefix => nil)
9
9
  t.text(path: 'text'){
10
10
  t.body(path: 'body'){
11
11
  t.page_break(path: 'pb')
@@ -14,6 +14,14 @@ module Libera
14
14
  }
15
15
  end
16
16
 
17
+ define_template :text do |xml|
18
+ xml.text_
19
+ end
20
+
21
+ define_template :body do |xml|
22
+ xml.body
23
+ end
24
+
17
25
  define_template :page_break do |xml, img_src|
18
26
  xml.pb(:facs => img_src)
19
27
  end
@@ -51,7 +59,33 @@ module Libera
51
59
  return builder.doc
52
60
  end
53
61
 
62
+ def add_text
63
+ begin
64
+ self.template_registry.add_child(self.ng_xml.root, :text)
65
+ rescue NoMethodError
66
+ raise "Unable to add XML node to base template"
67
+ end
68
+ end
69
+
70
+ def add_body
71
+ begin
72
+ self.template_registry.add_child(self.find_by_terms(:text => 0), :body)
73
+ rescue NoMethodError
74
+ raise "Unable to add XML node to base template"
75
+ end
76
+ end
77
+
54
78
  def add_page_break(page_img)
79
+ # any text?
80
+ if self.find_by_terms(:text => 0).blank?
81
+ self.add_text
82
+ end
83
+
84
+ # any body?
85
+ if self.find_by_terms(:text, :body => 0).blank?
86
+ self.add_body
87
+ end
88
+
55
89
  # any anon breaks?
56
90
  ab_count = self.find_by_terms(:text, :body, :anon_block).count
57
91
 
@@ -1,3 +1,3 @@
1
1
  module Libera
2
- VERSION = "1.0.0"
2
+ VERSION = "1.0.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: libera
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Cliff
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-06-06 00:00:00.000000000 Z
11
+ date: 2018-06-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler