relaton-cli 0.1.2 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +13 -0
- data/.hound.yml +3 -0
- data/.rubocop.yml +2 -7
- data/.travis.yml +5 -0
- data/Gemfile.lock +31 -24
- data/README.adoc +77 -36
- data/bin/rspec +29 -0
- data/exe/relaton +21 -0
- data/lib/relaton/bibcollection.rb +27 -38
- data/lib/relaton/bibdata.rb +81 -18
- data/lib/relaton/cli/_document.liquid +2 -2
- data/lib/relaton/cli/base_convertor.rb +94 -0
- data/lib/relaton/cli/command.rb +83 -0
- data/lib/relaton/cli/relaton_file.rb +153 -0
- data/lib/relaton/cli/version.rb +1 -1
- data/lib/relaton/cli/xml_convertor.rb +41 -0
- data/lib/relaton/cli/xml_to_html_renderer.rb +4 -14
- data/lib/relaton/cli/yaml_convertor.rb +59 -0
- data/lib/relaton/cli.rb +21 -3
- data/relaton-cli.gemspec +9 -7
- data/templates/_document.liquid +7 -2
- data/templates/_index.liquid +53 -0
- metadata +31 -18
- data/.rubocop.ribose.yml +0 -66
- data/.rubocop.tb.yml +0 -650
- data/exe/relaton-concatenate +0 -24
- data/exe/relaton-fetch +0 -41
- data/exe/relaton-metanorma-extract +0 -20
- data/exe/relaton-xml-html +0 -28
- data/exe/relaton-yaml-xml +0 -68
data/lib/relaton/bibdata.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
+
require "date"
|
1
2
|
|
2
3
|
module Relaton
|
3
4
|
class Bibdata
|
4
5
|
ATTRIBS = %i[
|
5
|
-
|
6
|
+
docidentifier
|
6
7
|
doctype
|
7
8
|
title
|
8
9
|
stage
|
@@ -16,6 +17,16 @@ module Relaton
|
|
16
17
|
revdate
|
17
18
|
abstract
|
18
19
|
technical_committee
|
20
|
+
copyright_from
|
21
|
+
copyright_owner
|
22
|
+
contributor_author_role
|
23
|
+
contributor_author_organization
|
24
|
+
contributor_publisher_role
|
25
|
+
contributor_publisher_organization
|
26
|
+
language
|
27
|
+
script
|
28
|
+
edition
|
29
|
+
datetype
|
19
30
|
]
|
20
31
|
|
21
32
|
attr_accessor *ATTRIBS
|
@@ -31,68 +42,120 @@ module Relaton
|
|
31
42
|
options.each_pair do |k,v|
|
32
43
|
send("#{k.to_s}=", v)
|
33
44
|
end
|
45
|
+
self
|
46
|
+
end
|
34
47
|
|
35
|
-
|
36
|
-
|
48
|
+
# From http://gavinmiller.io/2016/creating-a-secure-sanitization-function/
|
49
|
+
FILENAME_BAD_CHARS = [ '/', '\\', '?', '%', '*', ':', '|', '"', '<', '>', '.', ' ' ]
|
37
50
|
|
38
|
-
|
51
|
+
def docidentifier_code
|
52
|
+
return "" if docidentifier.nil?
|
53
|
+
a = FILENAME_BAD_CHARS.inject(docidentifier.downcase) do |result, bad_char|
|
54
|
+
result.gsub(bad_char, '-')
|
55
|
+
end
|
39
56
|
end
|
40
57
|
|
41
|
-
|
42
|
-
|
58
|
+
DOC_NUMBER_REGEX = /([\w\/]+)\s+(\d+):?(\d*)/
|
59
|
+
def doc_number
|
60
|
+
docidentifier&.match(DOC_NUMBER_REGEX) ? $2.to_i : 999999
|
43
61
|
end
|
44
62
|
|
45
63
|
def self.from_xml(source)
|
46
64
|
|
47
65
|
# bib.relaton_xml_path = URI.escape("#{relaton_root}/#{id_code}.xml")
|
48
|
-
|
49
|
-
|
50
|
-
|
66
|
+
revdate = source.at(ns("./date[@type = 'published']")) ||
|
67
|
+
source.at(ns("./date[@type = 'circulated']")) || source.at(ns("./date"))
|
68
|
+
datetype = "circulated"
|
69
|
+
datetype = revdate["type"] if revdate
|
51
70
|
|
52
71
|
new({
|
53
|
-
uri: source.at(ns("./uri"))&.text,
|
72
|
+
uri: source.at(ns("./uri[not(@type)]"))&.text,
|
54
73
|
xml: source.at(ns("./uri[@type='xml']"))&.text,
|
55
74
|
pdf: source.at(ns("./uri[@type='pdf']"))&.text,
|
56
75
|
html: source.at(ns("./uri[@type='html']"))&.text,
|
57
76
|
relaton: source.at(ns("./uri[@type='relaton']"))&.text,
|
58
77
|
doc: source.at(ns("./uri[@type='doc']"))&.text,
|
59
|
-
|
78
|
+
docidentifier: source.at(ns("./docidentifier"))&.text,
|
60
79
|
title: source.at(ns("./title"))&.text,
|
61
80
|
doctype: source.at(ns("./@type"))&.text,
|
62
81
|
stage: source.at(ns("./status"))&.text,
|
63
|
-
technical_committee: source.at(ns("./technical-committee"))&.text,
|
82
|
+
technical_committee: source.at(ns("./editorialgroup/technical-committee"))&.text,
|
64
83
|
abstract: source.at(ns("./abstract"))&.text,
|
65
|
-
revdate: Date.parse(revdate)
|
66
|
-
|
84
|
+
revdate: revdate ? Date.parse(revdate.text) : nil,
|
85
|
+
language: source.at(ns("./language"))&.text,
|
86
|
+
script: source.at(ns("./script"))&.text,
|
87
|
+
edition: source.at(ns("./edition"))&.text,
|
88
|
+
copyright_from: source.at(ns("./copyright/from"))&.text,
|
89
|
+
copyright_owner: source.at(ns("./copyright/owner/organization/name"))&.text,
|
90
|
+
contributor_author_role: source.at(ns("./contributor/role[@type='author']"))&.text,
|
91
|
+
contributor_author_organization: source.at(ns("./contributor/role[@type='author']"))&.parent&.at(ns("./organization/name"))&.text,
|
92
|
+
contributor_publisher_role: source.at(ns("./contributor/role[@type='publisher']"))&.text,
|
93
|
+
contributor_publisher_organization: source.at(ns("./contributor/role[@type='publisher']"))&.parent&.at(ns("./organization/name"))&.text,
|
94
|
+
datetype: datetype
|
67
95
|
})
|
68
96
|
end
|
69
97
|
|
70
98
|
def to_xml
|
71
|
-
datetype = stage
|
99
|
+
#datetype = stage&.casecmp("published") == 0 ? "published" : "circulated"
|
72
100
|
|
73
101
|
ret = "<bibdata type='#{doctype}'>\n"
|
102
|
+
ret += "<fetched>#{Date.today.to_s}</fetched>\n"
|
74
103
|
ret += "<title>#{title}</title>\n"
|
104
|
+
ret += "<docidentifier>#{docidentifier}</docidentifier>\n" if docidentifier
|
75
105
|
ret += "<uri>#{uri}</uri>\n" if uri
|
76
106
|
ret += "<uri type='xml'>#{xml}</uri>\n" if xml
|
77
107
|
ret += "<uri type='html'>#{html}</uri>\n" if html
|
78
108
|
ret += "<uri type='pdf'>#{pdf}</uri>\n" if pdf
|
79
109
|
ret += "<uri type='doc'>#{doc}</uri>\n" if doc
|
80
110
|
ret += "<uri type='relaton'>#{relaton}</uri>\n" if relaton
|
81
|
-
|
111
|
+
|
112
|
+
ret += "<language>#{language}</language>\n"
|
113
|
+
ret += "<script>#{script}</script>\n"
|
114
|
+
|
115
|
+
if copyright_from
|
116
|
+
ret += "<copyright>"
|
117
|
+
ret += "<from>#{copyright_from}</from>\n" if copyright_from
|
118
|
+
ret += "<owner><organization><name>#{copyright_owner}</name></organization></owner>\n" if copyright_owner
|
119
|
+
ret += "</copyright>"
|
120
|
+
end
|
121
|
+
|
122
|
+
if contributor_author_role
|
123
|
+
ret += "<contributor>\n"
|
124
|
+
ret += "<role type='author'/>\n"
|
125
|
+
ret += "<organization><name>#{contributor_author_organization}</name></organization>\n"
|
126
|
+
ret += "</contributor>\n"
|
127
|
+
end
|
128
|
+
|
129
|
+
if contributor_publisher_role
|
130
|
+
ret += "<contributor>\n"
|
131
|
+
ret += "<role type='publisher'/>\n"
|
132
|
+
ret += "<organization><name>#{contributor_publisher_organization}</name></organization>\n"
|
133
|
+
ret += "</contributor>\n"
|
134
|
+
end
|
135
|
+
|
82
136
|
ret += "<date type='#{datetype}'><on>#{revdate}</on></date>\n" if revdate
|
137
|
+
# ret += "<contributor><role type='author'/><organization><name>#{agency}</name></organization></contributor>" if agency
|
138
|
+
# ret += "<contributor><role type='publisher'/><organization><name>#{agency}</name></organization></contributor>" if agency
|
139
|
+
ret += "<edition>#{edition}</edition>\n" if edition
|
140
|
+
ret += "<language>#{language}</language>\n" if language
|
141
|
+
ret += "<script>#{script}</script>\n" if script
|
83
142
|
ret += "<abstract>#{abstract}</abstract>\n" if abstract
|
84
143
|
ret += "<status>#{stage}</status>\n" if stage
|
85
|
-
ret += "<technical-committee>#{technical_committee}</technical-committee>\n" if technical_committee
|
144
|
+
ret += "<editorialgroup><technical-committee>#{technical_committee}</technical-committee></editorialgroup>\n" if technical_committee
|
86
145
|
ret += "</bibdata>\n"
|
87
146
|
end
|
88
147
|
|
89
148
|
def to_h
|
90
149
|
ATTRIBS.inject({}) do |acc, k|
|
91
150
|
value = send(k)
|
92
|
-
acc[k] = value unless value.nil?
|
151
|
+
acc[k.to_s] = value unless value.nil?
|
93
152
|
acc
|
94
153
|
end
|
95
154
|
end
|
96
155
|
|
156
|
+
def to_yaml
|
157
|
+
to_h.to_yaml
|
158
|
+
end
|
159
|
+
|
97
160
|
end
|
98
161
|
end
|
@@ -8,9 +8,9 @@
|
|
8
8
|
<div class="doc-identifier">
|
9
9
|
<h{{ depth }}>
|
10
10
|
{% if document.html == "" %}
|
11
|
-
{{ document.
|
11
|
+
{{ document.docidentifier }}
|
12
12
|
{% else %}
|
13
|
-
<a href="{{ document.html }}">{{ document.
|
13
|
+
<a href="{{ document.html }}">{{ document.docidentifier }}</a>
|
14
14
|
{% endif %}
|
15
15
|
</h{{ depth }}>
|
16
16
|
</div>
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require "fileutils"
|
2
|
+
require "relaton/bibdata"
|
3
|
+
require "relaton/bibcollection"
|
4
|
+
require "relaton/cli/xml_to_html_renderer"
|
5
|
+
|
6
|
+
module Relaton
|
7
|
+
module Cli
|
8
|
+
class BaseConvertor
|
9
|
+
def initialize(file, options = {})
|
10
|
+
@file = file
|
11
|
+
@options = options
|
12
|
+
@outdir = options.fetch(:outdir, nil)
|
13
|
+
@writable = options.fetch(:write, true)
|
14
|
+
|
15
|
+
install_dependencies(options[:require] || [])
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_html
|
19
|
+
content = convert_to_html
|
20
|
+
write_to_a_file(content)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Convert to HTML
|
24
|
+
#
|
25
|
+
# This interface expect us to provide Relaton collection XML
|
26
|
+
# as XML/RXL, and necessary styels / templates then it will be
|
27
|
+
# used convert that collection to HTML.
|
28
|
+
#
|
29
|
+
# @param file [String] Relaton collection file path
|
30
|
+
# @param style [String] Stylesheet file path for styles
|
31
|
+
# @param template [String] The liquid tempalte directory
|
32
|
+
#
|
33
|
+
def self.to_html(file, style, template)
|
34
|
+
new(file, style: style, template: template, extension: "html").to_html
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
attr_reader :file, :outdir, :options, :writable
|
40
|
+
|
41
|
+
def default_ext
|
42
|
+
raise "Override this method"
|
43
|
+
end
|
44
|
+
|
45
|
+
def convert_to_html
|
46
|
+
Relaton::Cli::XmlToHtmlRenderer.render(
|
47
|
+
xml_content(file),
|
48
|
+
stylesheet: options[:style],
|
49
|
+
liquid_dir: options[:template],
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
def xml_content(file)
|
54
|
+
File.read(file, encoding: "utf-8")
|
55
|
+
end
|
56
|
+
|
57
|
+
def install_dependencies(dependencies)
|
58
|
+
dependencies.each { |dependency| require(dependency) }
|
59
|
+
end
|
60
|
+
|
61
|
+
def convert_and_write(content, format)
|
62
|
+
content = convert_content(content)
|
63
|
+
write_to_a_file(content.send(format.to_sym))
|
64
|
+
write_to_file_collection(content, format.to_sym)
|
65
|
+
end
|
66
|
+
|
67
|
+
def write_to_a_file(content, outfile = nil)
|
68
|
+
outfile ||= Pathname.new(file).sub_ext(extension).to_s
|
69
|
+
File.open(outfile, "w:utf-8") { |file| file.write(content) }
|
70
|
+
end
|
71
|
+
|
72
|
+
def write_to_file_collection(content, format)
|
73
|
+
if outdir && content.is_a?(Relaton::Bibcollection)
|
74
|
+
FileUtils.mkdir_p(outdir)
|
75
|
+
|
76
|
+
content.items_flattened.each do |item|
|
77
|
+
collection = collection_filename(item.docidentifier_code)
|
78
|
+
write_to_a_file(item.send(format.to_sym), collection)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def extension
|
84
|
+
@extension ||= [".", options.fetch(:extension, default_ext)].join
|
85
|
+
end
|
86
|
+
|
87
|
+
def collection_filename(identifier)
|
88
|
+
File.join(
|
89
|
+
outdir, [@options[:prefix], identifier, extension].compact.join("")
|
90
|
+
)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require "relaton/cli/relaton_file"
|
2
|
+
require "relaton/cli/xml_convertor"
|
3
|
+
require "relaton/cli/yaml_convertor"
|
4
|
+
|
5
|
+
module Relaton
|
6
|
+
module Cli
|
7
|
+
class Command < Thor
|
8
|
+
desc "fetch CODE", "Fetch Relaton XML for Standard identifier CODE"
|
9
|
+
option :type, aliases: :t, required: true, desc: "Type of standard to get bibliographic entry for"
|
10
|
+
option :year, aliases: :y, type: :numeric, desc: "Year the standard was published"
|
11
|
+
|
12
|
+
def fetch(code)
|
13
|
+
Relaton::Cli.relaton
|
14
|
+
say(fetch_document(code, options) || supported_type_message)
|
15
|
+
end
|
16
|
+
|
17
|
+
desc "extract Metanorma-XML-Directory Relaton-XML-Directory", "Extract Relaton XML from folder of Metanorma XML"
|
18
|
+
option :extension, aliases: :x, desc: "File extension of Relaton XML files, defaults to 'rxl'"
|
19
|
+
|
20
|
+
def extract(source_dir, outdir)
|
21
|
+
Relaton::Cli::RelatonFile.extract(source_dir, outdir, options)
|
22
|
+
end
|
23
|
+
|
24
|
+
desc "concatenate SOURCE-DIR COLLECTION-FILE", "Concatenate entries in DIRECTORY (containing Relaton-XML or YAML) into a Relaton Collection"
|
25
|
+
option :title, aliases: :t, desc: "Title of resulting Relaton collection"
|
26
|
+
option :organization, aliases: :g, desc: "Organization owner of Relaton collection"
|
27
|
+
|
28
|
+
def concatenate(source_dir, outfile)
|
29
|
+
Relaton::Cli::RelatonFile.concatenate(source_dir, outfile, options)
|
30
|
+
end
|
31
|
+
|
32
|
+
desc "yaml2xml YAML", "Convert Relaton YAML into Relaton Collection XML or separate files"
|
33
|
+
option :extension, aliases: :x, desc: "File extension of Relaton XML files, defaults to 'rxl'"
|
34
|
+
option :prefix, aliases: :p, desc: "Filename prefix of individual Relaton XML files, defaults to empty"
|
35
|
+
option :outdir, aliases: :o, desc: "Output to the specified directory with individual Relaton Bibdata XML files"
|
36
|
+
option :require, aliases: :r, type: :array, desc: "Require LIBRARY prior to execution"
|
37
|
+
|
38
|
+
def yaml2xml(filename)
|
39
|
+
Relaton::Cli::YAMLConvertor.to_xml(filename, options)
|
40
|
+
end
|
41
|
+
|
42
|
+
desc "xml2yaml XML", "Convert Relaton YAML into Relaton Bibcollection YAML (and separate files)"
|
43
|
+
option :extension, aliases: :x, desc: "File extension of Relaton YAML files, defaults to 'yaml'"
|
44
|
+
option :prefix, aliases: :p, desc: "Filename prefix of Relaton XML files, defaults to empty"
|
45
|
+
option :outdir, aliases: :o, desc: "Output to the specified directory with individual Relaton Bibdata YAML files"
|
46
|
+
option :require, aliases: :r, type: :array, desc: "Require LIBRARY prior to execution"
|
47
|
+
|
48
|
+
def xml2yaml(filename)
|
49
|
+
Relaton::Cli::XMLConvertor.to_yaml(filename, options)
|
50
|
+
end
|
51
|
+
|
52
|
+
desc "xml2html RELATON-INDEX-XML STYLESHEET LIQUID-TEMPLATE-DIR", "Convert Relaton Collection XML into HTML"
|
53
|
+
|
54
|
+
def xml2html(file, style, template)
|
55
|
+
Relaton::Cli::XMLConvertor.to_html(file, style, template)
|
56
|
+
end
|
57
|
+
|
58
|
+
desc "yaml2html YAML STYLESHEET LIQUID-TEMPLATE-DIR", "Concatenate Relaton YAML into HTML"
|
59
|
+
|
60
|
+
def yaml2html(file, style, template)
|
61
|
+
Relaton::Cli::YAMLConvertor.to_html(file, style, template)
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def fetch_document(code, options)
|
67
|
+
if registered_types.include?(options[:type])
|
68
|
+
doc = Cli.relaton.fetch_std(code, options[:year], options[:type])
|
69
|
+
doc ? doc.to_xml : "No matching bibliographic entry found"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def supported_type_message
|
74
|
+
["Recognised types:", registered_types.sort.join(", ")].join(" ")
|
75
|
+
end
|
76
|
+
|
77
|
+
def registered_types
|
78
|
+
@registered_types ||=
|
79
|
+
Relaton::Registry.instance.processors.each.map { |_n, pr| pr.prefix }
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
require "pathname"
|
3
|
+
|
4
|
+
module Relaton
|
5
|
+
module Cli
|
6
|
+
class RelatonFile
|
7
|
+
def initialize(source, options = {})
|
8
|
+
@source = source
|
9
|
+
@options = options
|
10
|
+
@outdir = options.fetch(:outdir, nil)
|
11
|
+
@outfile = options.fetch(:outfile, nil)
|
12
|
+
end
|
13
|
+
|
14
|
+
def extract
|
15
|
+
extract_and_write_to_files
|
16
|
+
end
|
17
|
+
|
18
|
+
def concatenate
|
19
|
+
write_to_file(bibcollection.to_xml)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Extract files
|
23
|
+
#
|
24
|
+
# This interface expect us to provide a source directory, output
|
25
|
+
# directory and custom configuration options. Then it wll extract
|
26
|
+
# Relaton XML files to output directory from the source directory
|
27
|
+
# During this process it will use custom options when available.
|
28
|
+
#
|
29
|
+
# @param source [Dir] The source directory for files
|
30
|
+
# @param outdir [Dir] The output directory for files
|
31
|
+
# @param options [Hash] Options as hash key value pair
|
32
|
+
#
|
33
|
+
def self.extract(source, outdir, options = {})
|
34
|
+
new(source, options.merge(outdir: outdir)).extract
|
35
|
+
end
|
36
|
+
|
37
|
+
# Concatenate files
|
38
|
+
#
|
39
|
+
## This interface expect us to provide a source directory, output
|
40
|
+
# file and custom configuration options. Normally, this expect the
|
41
|
+
# source directory to contain RXL fles, but it also converts any
|
42
|
+
# YAML files to RXL and then finally combines those together.
|
43
|
+
#
|
44
|
+
# This interface also allow us to provdie options like title and
|
45
|
+
# organization and then it usage those details to generate the
|
46
|
+
# collection file.
|
47
|
+
#
|
48
|
+
# @param source [Dir] The source directory for files
|
49
|
+
# @param output [String] The collection output file
|
50
|
+
# @param options [Hash] Options as hash key value pair
|
51
|
+
#
|
52
|
+
def self.concatenate(source, outfile, options = {})
|
53
|
+
new(source, options.merge(outfile: outfile)).concatenate
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
attr_reader :source, :options, :outdir, :outfile
|
59
|
+
|
60
|
+
def bibcollection
|
61
|
+
::Relaton::Bibcollection.new(
|
62
|
+
title: options[:title],
|
63
|
+
items: concatenate_files,
|
64
|
+
doctype: options[:doctype],
|
65
|
+
author: options[:organization],
|
66
|
+
)
|
67
|
+
end
|
68
|
+
|
69
|
+
def nokogiri_document(document, file = nil)
|
70
|
+
document ||= File.read(file, encoding: "utf-8")
|
71
|
+
Nokogiri.XML(document)
|
72
|
+
end
|
73
|
+
|
74
|
+
def extract_and_write_to_files
|
75
|
+
select_files_with("xml").each do |file|
|
76
|
+
xml = nokogiri_document(nil, file)
|
77
|
+
xml.remove_namespaces!
|
78
|
+
|
79
|
+
bib = xml.at("//bibdata") || next
|
80
|
+
bib.add_namespace(nil, "")
|
81
|
+
|
82
|
+
outfile = [outdir, build_filename(file, bib)].join("/")
|
83
|
+
write_to_file(bib.to_xml, outfile)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def concatenate_files
|
88
|
+
xml_files = [convert_rxl_to_xml, convert_yamls_to_xml]
|
89
|
+
|
90
|
+
xml_files.flatten.map do |xml|
|
91
|
+
doc = nokogiri_document(xml[:content])
|
92
|
+
bibdata_instance(doc, xml[:file]) if doc.root.name == "bibdata"
|
93
|
+
end.compact
|
94
|
+
end
|
95
|
+
|
96
|
+
def bibdata_instance(document, file)
|
97
|
+
document = clean_nokogiri_document(document)
|
98
|
+
bibdata = Relaton::Bibdata.from_xml(document.root)
|
99
|
+
build_bibdata_relaton(bibdata, file)
|
100
|
+
|
101
|
+
bibdata
|
102
|
+
end
|
103
|
+
|
104
|
+
def build_bibdata_relaton(bibdata, file)
|
105
|
+
["xml", "pdf", "doc", "html"].each do |type|
|
106
|
+
file = Pathname.new(file).sub_ext(".#{type}")
|
107
|
+
bibdata.send("#{type}=", file) if File.file?(file)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Force a namespace otherwise Nokogiri won't parse.
|
112
|
+
# The reason is we use Bibcollection's from_xml, but that one
|
113
|
+
# has an xmlns. We don't want to change the code for bibdata
|
114
|
+
# hence this hack #bibdata_doc.root['xmlns'] = "xmlns"
|
115
|
+
#
|
116
|
+
def clean_nokogiri_document(document)
|
117
|
+
document.remove_namespaces!
|
118
|
+
document.root.add_namespace(nil, "xmlns")
|
119
|
+
nokogiri_document(document.to_xml)
|
120
|
+
end
|
121
|
+
|
122
|
+
def convert_rxl_to_xml
|
123
|
+
select_files_with("{rxl}").map do |file|
|
124
|
+
{ file: file, content: File.read(file, encoding: "utf-8") }
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def convert_yamls_to_xml
|
129
|
+
select_files_with("yaml").map do |file|
|
130
|
+
{ file: file, content: YAMLConvertor.to_xml(file, write: false) }
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def select_files_with(extension)
|
135
|
+
files = File.join(source, "**", "*.#{extension}")
|
136
|
+
Dir[files].reject { |file| File.directory?(file) }
|
137
|
+
end
|
138
|
+
|
139
|
+
def write_to_file(content, output_file = nil)
|
140
|
+
output_file ||= outfile
|
141
|
+
File.open(output_file, "w:utf-8") { |file| file.write(content) }
|
142
|
+
end
|
143
|
+
|
144
|
+
def build_filename(file, document)
|
145
|
+
identifier = document&.at("./docidentifier")&.text ||
|
146
|
+
Pathname.new(File.basename(file, ".xml")).to_s
|
147
|
+
|
148
|
+
filename = identifier.sub(/^\s+/, "").sub(/\s+$/, "").gsub(/\s+/, "-")
|
149
|
+
[filename, options[:extension] || "rxl"].join(".")
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|