libis-format 0.9.48 → 0.9.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/libis/format/config.rb +1 -0
- data/lib/libis/format/converter/fop_pdf_converter.rb +42 -0
- data/lib/libis/format/converter/xslt_converter.rb +93 -0
- data/lib/libis/format/tool/fop_pdf.rb +39 -0
- data/lib/libis/format/tool.rb +1 -0
- data/lib/libis/format/version.rb +1 -1
- data/libis-format.gemspec +1 -0
- data/spec/converter_fop_spec.rb +38 -0
- data/spec/converter_xsl_spec.rb +56 -0
- data/spec/data/xml/134476_ead.XML +1 -0
- data/spec/data/xml/134476_ead.pdf +0 -0
- data/spec/data/xml/134476_fo.XML +383 -0
- data/spec/data/xml/134476_raw.XML +141 -0
- data/spec/data/xml/ead2fo_pdf.xsl +1453 -0
- data/spec/data/xml/header_nolink_pdf.xsl +120 -0
- data/spec/data/xml/lookupLists.xsl +488 -0
- data/spec/data/xml/scope_eadToHTML.xsl +2376 -0
- data/spec/data/xml/scope_xmlToEAD_dom.xsl +950 -0
- metadata +41 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 47b210d27058d7ee640e2435f952d8dc58537deb
|
4
|
+
data.tar.gz: 531d8103bea3b028c4d51f10aa793e8488b03df9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2288f2bc2962c96fdaf361c9db9da137c98acbe084962a0fa47f503df8fdc030dc8600f5c62d98bac6ad8e2a6f89f002e22a1a5c4f33a3c2af189a137d583299
|
7
|
+
data.tar.gz: 48a6446cc762623fac7403c70a6c80b4fedab6b409973653bb6a2038180d2dd7e85c1ca0e73b17a1fb7a0ea52c50c4c5371528b432161c8a5b4b6b7718109c7d
|
data/lib/libis/format/config.rb
CHANGED
@@ -15,6 +15,7 @@ module Libis
|
|
15
15
|
Config[:ghostscript_path] = 'gs'
|
16
16
|
Config[:droid_path] = '/opt/droid/droid.sh'
|
17
17
|
Config[:fido_path] = '/usr/local/bin/fido'
|
18
|
+
Config[:fop_jar] = '/opt/fop/current/fop/build/fop.jar'
|
18
19
|
Config[:ffmpeg_path] = 'ffmpeg'
|
19
20
|
Config[:fido_formats] = [(File.join(Libis::Format::DATA_DIR, 'lias_formats.xml'))]
|
20
21
|
Config[:pdf_tool] = File.join(Libis::Format::TOOL_DIR, 'PdfTool.jar')
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
require_relative 'base'
|
4
|
+
|
5
|
+
require 'libis/format/tool/fop_pdf'
|
6
|
+
|
7
|
+
module Libis
|
8
|
+
module Format
|
9
|
+
module Converter
|
10
|
+
|
11
|
+
class FopPdfConverter < Libis::Format::Converter::Base
|
12
|
+
|
13
|
+
def self.input_types
|
14
|
+
[:XML]
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.output_types(format = nil)
|
18
|
+
return [] unless input_types.include?(format) if format
|
19
|
+
[:PDF]
|
20
|
+
end
|
21
|
+
|
22
|
+
def convert(source, target, _format, opts = {})
|
23
|
+
super
|
24
|
+
|
25
|
+
unless File.file?(source) && File.exist?(source) && File.readable?(source)
|
26
|
+
error "File '#{source}' does not exist or is not readable"
|
27
|
+
return nil
|
28
|
+
end
|
29
|
+
|
30
|
+
FileUtils.mkpath(File.dirname(target))
|
31
|
+
|
32
|
+
Libis::Format::Tool::FopPdf.run(source, target)
|
33
|
+
|
34
|
+
target
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
require_relative 'base'
|
4
|
+
|
5
|
+
module Libis
|
6
|
+
module Format
|
7
|
+
module Converter
|
8
|
+
|
9
|
+
class XsltConverter < Libis::Format::Converter::Base
|
10
|
+
|
11
|
+
def self.input_types
|
12
|
+
[:XML]
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.output_types(format = nil)
|
16
|
+
return [] unless input_types.include?(format) if format
|
17
|
+
[:XML, :HTML, :TXT]
|
18
|
+
end
|
19
|
+
|
20
|
+
def xsl_file(file_path)
|
21
|
+
@options[:xsl_file] = file_path
|
22
|
+
end
|
23
|
+
|
24
|
+
def convert(source, target, _format, opts = {})
|
25
|
+
super
|
26
|
+
|
27
|
+
unless File.file?(source) && File.exist?(source) && File.readable?(source)
|
28
|
+
error "File '#{source}' does not exist or is not readable"
|
29
|
+
return nil
|
30
|
+
end
|
31
|
+
|
32
|
+
doc = nil
|
33
|
+
begin
|
34
|
+
doc = Nokogiri::XML(File.read(source)) do |config|
|
35
|
+
config.options = Nokogiri::XML::ParseOptions::STRICT | Nokogiri::XML::ParseOptions::NOBLANKS
|
36
|
+
end
|
37
|
+
rescue Nokogiri::XML::SyntaxError => e
|
38
|
+
if e.fatal? || e.error?
|
39
|
+
error "Error parsing XML input '#{source}': #{e.messsage} @ #{e.backtrace[0]}"
|
40
|
+
return nil
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
unless @options[:xsl_file]
|
45
|
+
error 'No xsl_file supplied'
|
46
|
+
return nil
|
47
|
+
end
|
48
|
+
|
49
|
+
file = @options[:xsl_file]
|
50
|
+
|
51
|
+
unless File.file?(file) && File.exist?(file) && File.readable?(file)
|
52
|
+
error "XSL file '#{@options[:xsl_file]}' does not exist or is not readable"
|
53
|
+
return nil
|
54
|
+
end
|
55
|
+
|
56
|
+
FileUtils.mkpath(File.dirname(target))
|
57
|
+
|
58
|
+
xsl = nil
|
59
|
+
|
60
|
+
begin
|
61
|
+
fp = File.open(file, 'r')
|
62
|
+
xsl = Nokogiri::XSLT(fp) do |config|
|
63
|
+
config.options = Nokogiri::XML::ParseOptions::STRICT | Nokogiri::XML::ParseOptions::NOBLANKS
|
64
|
+
end
|
65
|
+
rescue Nokogiri::XML::SyntaxError => e
|
66
|
+
if e.fatal? || e.error?
|
67
|
+
error "Error parsing XSL input '#{file}': #{e.message} @ #{e.backtrace[0]}"
|
68
|
+
return nil
|
69
|
+
end
|
70
|
+
ensure
|
71
|
+
fp.close
|
72
|
+
end
|
73
|
+
|
74
|
+
begin
|
75
|
+
target_xml = xsl.transform(doc)
|
76
|
+
fp = File.open(target, 'w')
|
77
|
+
fp.write(target_xml)
|
78
|
+
rescue Exception => e
|
79
|
+
error "Error transforming '#{source}' with '#{file}': #{e.message} @ #{e.backtrace[0]}"
|
80
|
+
return nil
|
81
|
+
ensure
|
82
|
+
fp.close
|
83
|
+
end
|
84
|
+
|
85
|
+
target
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'os'
|
2
|
+
|
3
|
+
require 'libis/tools/extend/string'
|
4
|
+
require 'libis/tools/logger'
|
5
|
+
require 'libis/tools/command'
|
6
|
+
|
7
|
+
require 'libis/format/config'
|
8
|
+
|
9
|
+
module Libis
|
10
|
+
module Format
|
11
|
+
module Tool
|
12
|
+
|
13
|
+
class FopPdf
|
14
|
+
include ::Libis::Tools::Logger
|
15
|
+
|
16
|
+
def self.run(xml, target, options = [])
|
17
|
+
self.new.run xml, target, options
|
18
|
+
end
|
19
|
+
|
20
|
+
def run(xml, target, options = [])
|
21
|
+
|
22
|
+
if OS.java?
|
23
|
+
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
24
|
+
end
|
25
|
+
|
26
|
+
Libis::Tools::Command.run(
|
27
|
+
Libis::Format::Config[:java_path],
|
28
|
+
"-Dfop.home=#{File.dirname(Libis::Format::Config[:fop_jar])}",
|
29
|
+
'-jar', Libis::Format::Config[:fop_jar],
|
30
|
+
'-fo', xml,
|
31
|
+
'-pdf', target
|
32
|
+
)
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/libis/format/tool.rb
CHANGED
@@ -11,6 +11,7 @@ module Libis
|
|
11
11
|
|
12
12
|
autoload :OfficeToPdf, 'libis/format/tool/office_to_pdf'
|
13
13
|
autoload :FFMpeg, 'libis/format/tool/ffmpeg'
|
14
|
+
autoload :FopPdf, 'libis/format/tool/fop_pdf'
|
14
15
|
autoload :PdfCopy, 'libis/format/tool/pdf_copy'
|
15
16
|
autoload :PdfMerge, 'libis/format/tool/pdf_merge'
|
16
17
|
autoload :PdfOptimizer, 'libis/format/tool/pdf_optimizer'
|
data/lib/libis/format/version.rb
CHANGED
data/libis-format.gemspec
CHANGED
@@ -26,6 +26,7 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.add_development_dependency 'rake', '~> 10.3'
|
27
27
|
spec.add_development_dependency 'rspec', '~> 3.1'
|
28
28
|
spec.add_development_dependency 'awesome_print'
|
29
|
+
spec.add_development_dependency 'equivalent-xml', '~> 0.5'
|
29
30
|
|
30
31
|
spec.add_runtime_dependency 'libis-tools', '~> 0.9.57'
|
31
32
|
spec.add_runtime_dependency 'os', '= 0.9.6'
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
require 'fileutils'
|
5
|
+
require 'rspec/matchers'
|
6
|
+
|
7
|
+
require 'libis/format/converter/fop_pdf_converter'
|
8
|
+
|
9
|
+
describe 'Converters' do
|
10
|
+
|
11
|
+
let(:repository) {Libis::Format::Converter::Repository}
|
12
|
+
let(:file_dir) {File.dirname(__FILE__)}
|
13
|
+
|
14
|
+
before(:all) {
|
15
|
+
Libis::Tools::Config.logger.level = 'error'
|
16
|
+
}
|
17
|
+
|
18
|
+
context 'Apache FOP-PDF Converter' do
|
19
|
+
|
20
|
+
let(:converter) {Libis::Format::Converter::FopPdfConverter.new}
|
21
|
+
let(:data_dir) {File.join(file_dir, 'data', 'xml')}
|
22
|
+
|
23
|
+
it 'converts XML-FO to PDF' do
|
24
|
+
src_file = File.join data_dir, '134476_fo.XML'
|
25
|
+
tgt_file = File.join '', 'tmp', '134476_ead.pdf'
|
26
|
+
cmp_file = File.join data_dir, '134476_ead.pdf'
|
27
|
+
FileUtils.remove tgt_file, force: true
|
28
|
+
FileUtils.mkdir_p File.dirname(tgt_file)
|
29
|
+
result = converter.convert src_file, tgt_file, :PDF
|
30
|
+
expect(result).to eq tgt_file
|
31
|
+
# tgt = Nokogiri::XML(File.read(tgt_file))
|
32
|
+
# cmp = Nokogiri::XML(File.read(cmp_file))
|
33
|
+
# expect(tgt.root).to be_equivalent_to(cmp.root).respecting_element_order
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
require 'fileutils'
|
5
|
+
require 'rspec/matchers'
|
6
|
+
require 'equivalent-xml'
|
7
|
+
|
8
|
+
require 'libis/format/converter/xslt_converter'
|
9
|
+
|
10
|
+
describe 'Converters' do
|
11
|
+
|
12
|
+
let(:repository) {Libis::Format::Converter::Repository}
|
13
|
+
let(:file_dir) {File.dirname(__FILE__)}
|
14
|
+
|
15
|
+
before(:all) {
|
16
|
+
Libis::Tools::Config.logger.level = 'error'
|
17
|
+
}
|
18
|
+
|
19
|
+
context 'XSLT Converter' do
|
20
|
+
|
21
|
+
let(:converter) {Libis::Format::Converter::XsltConverter.new}
|
22
|
+
let(:data_dir) {File.join(file_dir, 'data', 'xml')}
|
23
|
+
|
24
|
+
it 'converts XML to EAD' do
|
25
|
+
src_file = File.join data_dir, '134476_raw.XML'
|
26
|
+
tgt_file = File.join '', 'tmp', '134476_ead.XML'
|
27
|
+
cmp_file = File.join data_dir, '134476_ead.XML'
|
28
|
+
xsl_file = File.join data_dir, 'scope_xmlToEAD_dom.xsl'
|
29
|
+
converter.xsl_file xsl_file
|
30
|
+
FileUtils.remove tgt_file, force: true
|
31
|
+
FileUtils.mkdir_p File.dirname(tgt_file)
|
32
|
+
result = converter.convert src_file, tgt_file, :XML
|
33
|
+
expect(result).to eq tgt_file
|
34
|
+
tgt = Nokogiri::XML(File.read(tgt_file))
|
35
|
+
cmp = Nokogiri::XML(File.read(cmp_file))
|
36
|
+
expect(tgt.root).to be_equivalent_to(cmp.root).respecting_element_order
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'converts EAD to XML-FO' do
|
40
|
+
src_file = File.join data_dir, '134476_ead.XML'
|
41
|
+
tgt_file = File.join '', 'tmp', '134476_fo.XML'
|
42
|
+
cmp_file = File.join data_dir, '134476_fo.XML'
|
43
|
+
xsl_file = File.join data_dir, 'ead2fo_pdf.xsl'
|
44
|
+
converter.xsl_file xsl_file
|
45
|
+
FileUtils.remove tgt_file, force: true
|
46
|
+
FileUtils.mkdir_p File.dirname(tgt_file)
|
47
|
+
result = converter.convert src_file, tgt_file, :XML
|
48
|
+
expect(result).to eq tgt_file
|
49
|
+
tgt = Nokogiri::XML(File.read(tgt_file))
|
50
|
+
cmp = Nokogiri::XML(File.read(cmp_file))
|
51
|
+
expect(tgt.root).to be_equivalent_to(cmp.root).respecting_element_order
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?><ead><eadheader audience="external"><eadid identifier="134476" countrycode="be" mainagencycode="BE/942855">ead_BE/942855/1572</eadid><filedesc><titlestmt><titleproper label="Titel">Plaatsingslijst van het archief Katholieke Arbeiders Vrouwenbeweging (KAV) afdeling Sint-Elisabeth Borgerhout</titleproper></titlestmt></filedesc><profiledesc><creation>Deze inventaris werd automatisch gegenereerd en gecodeerd.De EAD-export werd gecontroleerd en gevalideerd door de werkgroep archivarissen van LIAS K.U.Leuven (Leuvens Integraal Archiveringssysteem).<date>23 januari 2014</date></creation><langusage><language label="Taal" langcode="du">Nederlands</language></langusage></profiledesc></eadheader><archdesc level="archief"><descgrp><head>1. IDENTIFICATIE</head><repository label="Archiefinstelling"><corpname>KADOC KU Leuven, Leuven</corpname></repository><unitid countrycode="be" repositorycode="BE/942855/1572"><head>1.1. Referentie(s)</head><p>BE/942855/1572</p><p/></unitid><unittitle><head>1.2. Titel</head><p>Archief Katholieke Arbeiders Vrouwenbeweging (KAV) afdeling Sint-Elisabeth Borgerhout</p></unittitle><unitdate><head>1.3. Datering</head><p>1917-1970; 1990</p></unitdate><physdesc><head>1.4. Omvang en medium</head><extent label="Omvang"><p>0,4 strekkende meter</p></extent></physdesc></descgrp><descgrp><head>2. CONTEXT</head><origination label="Archiefvormer"><head>2.1. Naam van de archiefvormer(s)</head><extref entityref="ODIS" show="new" href="http://www.odis.be/lnk/ORG_30709"><corpname normal="KAV-afdeling Parochie van Sint-Jan, Borgerhout. (1917-heden)">KAV-afdeling Parochie van Sint-Jan, Borgerhout. (1917-heden)</corpname></extref></origination><acqinfo><head>2.4. Verwerving</head><p>In het archief van het KAV-verbond Antwerpen zoals het in 2008 werd overgemaakt aan KADOC bevond zich ook een afzonderlijke en zeer boeiend afdelingsarchief, namelijk dit van de Sint-Elisabethgilde, de KAV-afdeling van de Parochie van Sint-Jan, Borgerhout. (1917-1980). Dit archief werd in KADOC uit het archief van de KAV-verbond Antwerpen gelicht en als een afzonderlijk archief behandeld.</p></acqinfo></descgrp><descgrp><head>3. INHOUD EN STRUCTUUR</head><scopecontent><head>3.1. Bereik en inhoud</head><scopecontent label="Bereik en inhoud"><head>Bereik en inhoud</head><p>Het archief van de de Sint-Elisabethgilde bevat 4 handgeschreven verslagboeken van deze KAV-afdeling betrefende de periode van 1917-1927 en 1937-1980. Verder is er een album met foto's van de jaren 1926 tot 1944. Tenslotte zijn er ook een 11-tal gebedenboeken uit de periode 1928 tot 1934. "Deze stukken belichten nauwgezet de werking van de gilde in al haar aspecten: inspiratie en bestuur, leden en activiteiten, propaganda en verwevenheid met het lokale parochiale, sociale en politieke weefsel. Ze tonen een vooroorlogs (katholiek) Borgerhout van processies, gilde- en koffiefeesten, bedevaarten, religieuze vieringen, studiekringen, spaarkassen en vakbonden voor vrouwen. Uit de documenten blijkt de verbazingwekkende schaal (600 tot 700 leden) en intensiteit van de werking van de Sint-Elisabethgilde. De archivalia wachten op een onderzoeker die zijn/haar tanden zet in dit merkwaardige stukje vrouwengeschiedenis op microniveau." Citaat uit "Voor de 'stoffelijke, zedelijke en god</p><p>sdienstige belangen der vrouw'. Archief katholieke vrouwengilde Sint-Elisabeth Borgerhout" KADOC-e-nieuwsbrief, 5de jg., nr. 11, november 2008.</p></scopecontent></scopecontent></descgrp><descgrp><head>4. VOORWAARDEN VOOR RAADPLEGING EN GEBRUIK</head><accessrestrict><head>4.1. Voorwaarden voor raadpleging</head><p>Raadpleegbaar na toestemming van de archivaris</p></accessrestrict><otherfindaid><head>4.5. Toegangen</head><p><link dest="http://aleph08.libis.kuleuven.be:8881/R/?func=search-advanced-go&find_code1=WAZ&request1=ID=134476" target="_blank">Plaatsingslijst van het archief Katholieke Arbeiders Vrouwenbeweging (KAV) afdeling Sint-Elisabeth Borgerhout</link></p></otherfindaid></descgrp><descgrp><head>5. VERWANT MATERIAAL</head><relatedmaterial><head>5.3. Verwante beschrijvingseenheden</head><p>Bij het archief bevond zich een fotoalbum die werd overgedragen aan de Audiovisuele Afdeling.</p></relatedmaterial></descgrp><dsc type="combined"><head>LIJST VAN DE DOSSIERS</head><c level="Bestanddeel" id="209275"><did><unitid label="Ref.code" identifier="BE/942855/1572/1">1</unitid><unittitle label="Titel"><p>KAV-afdeling Parochie van Sint- Jan, Borgerhout. Verslagboeken</p></unittitle><unitdate label="Datum"><p>1917-1927</p></unitdate><physdesc><extent label="Omvang"><p>1 stuk</p></extent></physdesc></did></c><c level="Bestanddeel" id="209276"><did><unitid label="Ref.code" identifier="BE/942855/1572/2">2</unitid><unittitle label="Titel"><p>KAV-afdeling Parochie van Sint- Jan, Borgerhout. Verslagboeken</p></unittitle><unitdate label="Datum"><p>1937-1980</p></unitdate><physdesc><extent label="Omvang"><p>3 stukken</p></extent></physdesc></did></c><c level="Bestanddeel" id="209277"><did><unitid label="Ref.code" identifier="BE/942855/1572/3">3</unitid><unittitle label="Titel"><p>KAV-afdeling Parochie van Sint- Jan, Borgerhout. Fotoalbum</p></unittitle><unitdate label="Datum"><p>1926-1944</p></unitdate><physdesc><extent label="Omvang"><p>1 stuk</p></extent></physdesc><relatedmaterial label="Verwant audiovisueel materiaal"><p>Overgedragen aan de Afdeling Audiovisuele Documentatie</p></relatedmaterial></did></c><c level="Bestanddeel" id="209278"><did><unitid label="Ref.code" identifier="BE/942855/1572/4">4</unitid><unittitle label="Titel"><p>KAV-afdeling Parochie van Sint- Jan, Borgerhout. Gebedenboeken</p></unittitle><unitdate label="Datum"><p>1928-1934</p></unitdate><physdesc><extent label="Omvang"><p>11 stukken</p></extent></physdesc><bibliography label="Publicaties"><p>Overgedragen aan Afdeling Bibibliotheek</p></bibliography></did></c></dsc></archdesc></ead>
|
Binary file
|