relaton-cli 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +13 -0
- data/.hound.yml +3 -0
- data/.rubocop.yml +2 -7
- data/.travis.yml +5 -0
- data/Gemfile.lock +31 -24
- data/README.adoc +77 -36
- data/bin/rspec +29 -0
- data/exe/relaton +21 -0
- data/lib/relaton/bibcollection.rb +27 -38
- data/lib/relaton/bibdata.rb +81 -18
- data/lib/relaton/cli/_document.liquid +2 -2
- data/lib/relaton/cli/base_convertor.rb +94 -0
- data/lib/relaton/cli/command.rb +83 -0
- data/lib/relaton/cli/relaton_file.rb +153 -0
- data/lib/relaton/cli/version.rb +1 -1
- data/lib/relaton/cli/xml_convertor.rb +41 -0
- data/lib/relaton/cli/xml_to_html_renderer.rb +4 -14
- data/lib/relaton/cli/yaml_convertor.rb +59 -0
- data/lib/relaton/cli.rb +21 -3
- data/relaton-cli.gemspec +9 -7
- data/templates/_document.liquid +7 -2
- data/templates/_index.liquid +53 -0
- metadata +31 -18
- data/.rubocop.ribose.yml +0 -66
- data/.rubocop.tb.yml +0 -650
- data/exe/relaton-concatenate +0 -24
- data/exe/relaton-fetch +0 -41
- data/exe/relaton-metanorma-extract +0 -20
- data/exe/relaton-xml-html +0 -28
- data/exe/relaton-yaml-xml +0 -68
data/lib/relaton/bibdata.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
+
require "date"
|
1
2
|
|
2
3
|
module Relaton
|
3
4
|
class Bibdata
|
4
5
|
ATTRIBS = %i[
|
5
|
-
|
6
|
+
docidentifier
|
6
7
|
doctype
|
7
8
|
title
|
8
9
|
stage
|
@@ -16,6 +17,16 @@ module Relaton
|
|
16
17
|
revdate
|
17
18
|
abstract
|
18
19
|
technical_committee
|
20
|
+
copyright_from
|
21
|
+
copyright_owner
|
22
|
+
contributor_author_role
|
23
|
+
contributor_author_organization
|
24
|
+
contributor_publisher_role
|
25
|
+
contributor_publisher_organization
|
26
|
+
language
|
27
|
+
script
|
28
|
+
edition
|
29
|
+
datetype
|
19
30
|
]
|
20
31
|
|
21
32
|
attr_accessor *ATTRIBS
|
@@ -31,68 +42,120 @@ module Relaton
|
|
31
42
|
options.each_pair do |k,v|
|
32
43
|
send("#{k.to_s}=", v)
|
33
44
|
end
|
45
|
+
self
|
46
|
+
end
|
34
47
|
|
35
|
-
|
36
|
-
|
48
|
+
# From http://gavinmiller.io/2016/creating-a-secure-sanitization-function/
|
49
|
+
FILENAME_BAD_CHARS = [ '/', '\\', '?', '%', '*', ':', '|', '"', '<', '>', '.', ' ' ]
|
37
50
|
|
38
|
-
|
51
|
+
def docidentifier_code
|
52
|
+
return "" if docidentifier.nil?
|
53
|
+
a = FILENAME_BAD_CHARS.inject(docidentifier.downcase) do |result, bad_char|
|
54
|
+
result.gsub(bad_char, '-')
|
55
|
+
end
|
39
56
|
end
|
40
57
|
|
41
|
-
|
42
|
-
|
58
|
+
DOC_NUMBER_REGEX = /([\w\/]+)\s+(\d+):?(\d*)/
|
59
|
+
def doc_number
|
60
|
+
docidentifier&.match(DOC_NUMBER_REGEX) ? $2.to_i : 999999
|
43
61
|
end
|
44
62
|
|
45
63
|
def self.from_xml(source)
|
46
64
|
|
47
65
|
# bib.relaton_xml_path = URI.escape("#{relaton_root}/#{id_code}.xml")
|
48
|
-
|
49
|
-
|
50
|
-
|
66
|
+
revdate = source.at(ns("./date[@type = 'published']")) ||
|
67
|
+
source.at(ns("./date[@type = 'circulated']")) || source.at(ns("./date"))
|
68
|
+
datetype = "circulated"
|
69
|
+
datetype = revdate["type"] if revdate
|
51
70
|
|
52
71
|
new({
|
53
|
-
uri: source.at(ns("./uri"))&.text,
|
72
|
+
uri: source.at(ns("./uri[not(@type)]"))&.text,
|
54
73
|
xml: source.at(ns("./uri[@type='xml']"))&.text,
|
55
74
|
pdf: source.at(ns("./uri[@type='pdf']"))&.text,
|
56
75
|
html: source.at(ns("./uri[@type='html']"))&.text,
|
57
76
|
relaton: source.at(ns("./uri[@type='relaton']"))&.text,
|
58
77
|
doc: source.at(ns("./uri[@type='doc']"))&.text,
|
59
|
-
|
78
|
+
docidentifier: source.at(ns("./docidentifier"))&.text,
|
60
79
|
title: source.at(ns("./title"))&.text,
|
61
80
|
doctype: source.at(ns("./@type"))&.text,
|
62
81
|
stage: source.at(ns("./status"))&.text,
|
63
|
-
technical_committee: source.at(ns("./technical-committee"))&.text,
|
82
|
+
technical_committee: source.at(ns("./editorialgroup/technical-committee"))&.text,
|
64
83
|
abstract: source.at(ns("./abstract"))&.text,
|
65
|
-
revdate: Date.parse(revdate)
|
66
|
-
|
84
|
+
revdate: revdate ? Date.parse(revdate.text) : nil,
|
85
|
+
language: source.at(ns("./language"))&.text,
|
86
|
+
script: source.at(ns("./script"))&.text,
|
87
|
+
edition: source.at(ns("./edition"))&.text,
|
88
|
+
copyright_from: source.at(ns("./copyright/from"))&.text,
|
89
|
+
copyright_owner: source.at(ns("./copyright/owner/organization/name"))&.text,
|
90
|
+
contributor_author_role: source.at(ns("./contributor/role[@type='author']"))&.text,
|
91
|
+
contributor_author_organization: source.at(ns("./contributor/role[@type='author']"))&.parent&.at(ns("./organization/name"))&.text,
|
92
|
+
contributor_publisher_role: source.at(ns("./contributor/role[@type='publisher']"))&.text,
|
93
|
+
contributor_publisher_organization: source.at(ns("./contributor/role[@type='publisher']"))&.parent&.at(ns("./organization/name"))&.text,
|
94
|
+
datetype: datetype
|
67
95
|
})
|
68
96
|
end
|
69
97
|
|
70
98
|
def to_xml
|
71
|
-
datetype = stage
|
99
|
+
#datetype = stage&.casecmp("published") == 0 ? "published" : "circulated"
|
72
100
|
|
73
101
|
ret = "<bibdata type='#{doctype}'>\n"
|
102
|
+
ret += "<fetched>#{Date.today.to_s}</fetched>\n"
|
74
103
|
ret += "<title>#{title}</title>\n"
|
104
|
+
ret += "<docidentifier>#{docidentifier}</docidentifier>\n" if docidentifier
|
75
105
|
ret += "<uri>#{uri}</uri>\n" if uri
|
76
106
|
ret += "<uri type='xml'>#{xml}</uri>\n" if xml
|
77
107
|
ret += "<uri type='html'>#{html}</uri>\n" if html
|
78
108
|
ret += "<uri type='pdf'>#{pdf}</uri>\n" if pdf
|
79
109
|
ret += "<uri type='doc'>#{doc}</uri>\n" if doc
|
80
110
|
ret += "<uri type='relaton'>#{relaton}</uri>\n" if relaton
|
81
|
-
|
111
|
+
|
112
|
+
ret += "<language>#{language}</language>\n"
|
113
|
+
ret += "<script>#{script}</script>\n"
|
114
|
+
|
115
|
+
if copyright_from
|
116
|
+
ret += "<copyright>"
|
117
|
+
ret += "<from>#{copyright_from}</from>\n" if copyright_from
|
118
|
+
ret += "<owner><organization><name>#{copyright_owner}</name></organization></owner>\n" if copyright_owner
|
119
|
+
ret += "</copyright>"
|
120
|
+
end
|
121
|
+
|
122
|
+
if contributor_author_role
|
123
|
+
ret += "<contributor>\n"
|
124
|
+
ret += "<role type='author'/>\n"
|
125
|
+
ret += "<organization><name>#{contributor_author_organization}</name></organization>\n"
|
126
|
+
ret += "</contributor>\n"
|
127
|
+
end
|
128
|
+
|
129
|
+
if contributor_publisher_role
|
130
|
+
ret += "<contributor>\n"
|
131
|
+
ret += "<role type='publisher'/>\n"
|
132
|
+
ret += "<organization><name>#{contributor_publisher_organization}</name></organization>\n"
|
133
|
+
ret += "</contributor>\n"
|
134
|
+
end
|
135
|
+
|
82
136
|
ret += "<date type='#{datetype}'><on>#{revdate}</on></date>\n" if revdate
|
137
|
+
# ret += "<contributor><role type='author'/><organization><name>#{agency}</name></organization></contributor>" if agency
|
138
|
+
# ret += "<contributor><role type='publisher'/><organization><name>#{agency}</name></organization></contributor>" if agency
|
139
|
+
ret += "<edition>#{edition}</edition>\n" if edition
|
140
|
+
ret += "<language>#{language}</language>\n" if language
|
141
|
+
ret += "<script>#{script}</script>\n" if script
|
83
142
|
ret += "<abstract>#{abstract}</abstract>\n" if abstract
|
84
143
|
ret += "<status>#{stage}</status>\n" if stage
|
85
|
-
ret += "<technical-committee>#{technical_committee}</technical-committee>\n" if technical_committee
|
144
|
+
ret += "<editorialgroup><technical-committee>#{technical_committee}</technical-committee></editorialgroup>\n" if technical_committee
|
86
145
|
ret += "</bibdata>\n"
|
87
146
|
end
|
88
147
|
|
89
148
|
def to_h
|
90
149
|
ATTRIBS.inject({}) do |acc, k|
|
91
150
|
value = send(k)
|
92
|
-
acc[k] = value unless value.nil?
|
151
|
+
acc[k.to_s] = value unless value.nil?
|
93
152
|
acc
|
94
153
|
end
|
95
154
|
end
|
96
155
|
|
156
|
+
def to_yaml
|
157
|
+
to_h.to_yaml
|
158
|
+
end
|
159
|
+
|
97
160
|
end
|
98
161
|
end
|
@@ -8,9 +8,9 @@
|
|
8
8
|
<div class="doc-identifier">
|
9
9
|
<h{{ depth }}>
|
10
10
|
{% if document.html == "" %}
|
11
|
-
{{ document.
|
11
|
+
{{ document.docidentifier }}
|
12
12
|
{% else %}
|
13
|
-
<a href="{{ document.html }}">{{ document.
|
13
|
+
<a href="{{ document.html }}">{{ document.docidentifier }}</a>
|
14
14
|
{% endif %}
|
15
15
|
</h{{ depth }}>
|
16
16
|
</div>
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require "fileutils"
|
2
|
+
require "relaton/bibdata"
|
3
|
+
require "relaton/bibcollection"
|
4
|
+
require "relaton/cli/xml_to_html_renderer"
|
5
|
+
|
6
|
+
module Relaton
|
7
|
+
module Cli
|
8
|
+
class BaseConvertor
|
9
|
+
def initialize(file, options = {})
|
10
|
+
@file = file
|
11
|
+
@options = options
|
12
|
+
@outdir = options.fetch(:outdir, nil)
|
13
|
+
@writable = options.fetch(:write, true)
|
14
|
+
|
15
|
+
install_dependencies(options[:require] || [])
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_html
|
19
|
+
content = convert_to_html
|
20
|
+
write_to_a_file(content)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Convert to HTML
|
24
|
+
#
|
25
|
+
# This interface expect us to provide Relaton collection XML
|
26
|
+
# as XML/RXL, and necessary styels / templates then it will be
|
27
|
+
# used convert that collection to HTML.
|
28
|
+
#
|
29
|
+
# @param file [String] Relaton collection file path
|
30
|
+
# @param style [String] Stylesheet file path for styles
|
31
|
+
# @param template [String] The liquid tempalte directory
|
32
|
+
#
|
33
|
+
def self.to_html(file, style, template)
|
34
|
+
new(file, style: style, template: template, extension: "html").to_html
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
attr_reader :file, :outdir, :options, :writable
|
40
|
+
|
41
|
+
def default_ext
|
42
|
+
raise "Override this method"
|
43
|
+
end
|
44
|
+
|
45
|
+
def convert_to_html
|
46
|
+
Relaton::Cli::XmlToHtmlRenderer.render(
|
47
|
+
xml_content(file),
|
48
|
+
stylesheet: options[:style],
|
49
|
+
liquid_dir: options[:template],
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
def xml_content(file)
|
54
|
+
File.read(file, encoding: "utf-8")
|
55
|
+
end
|
56
|
+
|
57
|
+
def install_dependencies(dependencies)
|
58
|
+
dependencies.each { |dependency| require(dependency) }
|
59
|
+
end
|
60
|
+
|
61
|
+
def convert_and_write(content, format)
|
62
|
+
content = convert_content(content)
|
63
|
+
write_to_a_file(content.send(format.to_sym))
|
64
|
+
write_to_file_collection(content, format.to_sym)
|
65
|
+
end
|
66
|
+
|
67
|
+
def write_to_a_file(content, outfile = nil)
|
68
|
+
outfile ||= Pathname.new(file).sub_ext(extension).to_s
|
69
|
+
File.open(outfile, "w:utf-8") { |file| file.write(content) }
|
70
|
+
end
|
71
|
+
|
72
|
+
def write_to_file_collection(content, format)
|
73
|
+
if outdir && content.is_a?(Relaton::Bibcollection)
|
74
|
+
FileUtils.mkdir_p(outdir)
|
75
|
+
|
76
|
+
content.items_flattened.each do |item|
|
77
|
+
collection = collection_filename(item.docidentifier_code)
|
78
|
+
write_to_a_file(item.send(format.to_sym), collection)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def extension
|
84
|
+
@extension ||= [".", options.fetch(:extension, default_ext)].join
|
85
|
+
end
|
86
|
+
|
87
|
+
def collection_filename(identifier)
|
88
|
+
File.join(
|
89
|
+
outdir, [@options[:prefix], identifier, extension].compact.join("")
|
90
|
+
)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require "relaton/cli/relaton_file"
|
2
|
+
require "relaton/cli/xml_convertor"
|
3
|
+
require "relaton/cli/yaml_convertor"
|
4
|
+
|
5
|
+
module Relaton
|
6
|
+
module Cli
|
7
|
+
class Command < Thor
|
8
|
+
desc "fetch CODE", "Fetch Relaton XML for Standard identifier CODE"
|
9
|
+
option :type, aliases: :t, required: true, desc: "Type of standard to get bibliographic entry for"
|
10
|
+
option :year, aliases: :y, type: :numeric, desc: "Year the standard was published"
|
11
|
+
|
12
|
+
def fetch(code)
|
13
|
+
Relaton::Cli.relaton
|
14
|
+
say(fetch_document(code, options) || supported_type_message)
|
15
|
+
end
|
16
|
+
|
17
|
+
desc "extract Metanorma-XML-Directory Relaton-XML-Directory", "Extract Relaton XML from folder of Metanorma XML"
|
18
|
+
option :extension, aliases: :x, desc: "File extension of Relaton XML files, defaults to 'rxl'"
|
19
|
+
|
20
|
+
def extract(source_dir, outdir)
|
21
|
+
Relaton::Cli::RelatonFile.extract(source_dir, outdir, options)
|
22
|
+
end
|
23
|
+
|
24
|
+
desc "concatenate SOURCE-DIR COLLECTION-FILE", "Concatenate entries in DIRECTORY (containing Relaton-XML or YAML) into a Relaton Collection"
|
25
|
+
option :title, aliases: :t, desc: "Title of resulting Relaton collection"
|
26
|
+
option :organization, aliases: :g, desc: "Organization owner of Relaton collection"
|
27
|
+
|
28
|
+
def concatenate(source_dir, outfile)
|
29
|
+
Relaton::Cli::RelatonFile.concatenate(source_dir, outfile, options)
|
30
|
+
end
|
31
|
+
|
32
|
+
desc "yaml2xml YAML", "Convert Relaton YAML into Relaton Collection XML or separate files"
|
33
|
+
option :extension, aliases: :x, desc: "File extension of Relaton XML files, defaults to 'rxl'"
|
34
|
+
option :prefix, aliases: :p, desc: "Filename prefix of individual Relaton XML files, defaults to empty"
|
35
|
+
option :outdir, aliases: :o, desc: "Output to the specified directory with individual Relaton Bibdata XML files"
|
36
|
+
option :require, aliases: :r, type: :array, desc: "Require LIBRARY prior to execution"
|
37
|
+
|
38
|
+
def yaml2xml(filename)
|
39
|
+
Relaton::Cli::YAMLConvertor.to_xml(filename, options)
|
40
|
+
end
|
41
|
+
|
42
|
+
desc "xml2yaml XML", "Convert Relaton YAML into Relaton Bibcollection YAML (and separate files)"
|
43
|
+
option :extension, aliases: :x, desc: "File extension of Relaton YAML files, defaults to 'yaml'"
|
44
|
+
option :prefix, aliases: :p, desc: "Filename prefix of Relaton XML files, defaults to empty"
|
45
|
+
option :outdir, aliases: :o, desc: "Output to the specified directory with individual Relaton Bibdata YAML files"
|
46
|
+
option :require, aliases: :r, type: :array, desc: "Require LIBRARY prior to execution"
|
47
|
+
|
48
|
+
def xml2yaml(filename)
|
49
|
+
Relaton::Cli::XMLConvertor.to_yaml(filename, options)
|
50
|
+
end
|
51
|
+
|
52
|
+
desc "xml2html RELATON-INDEX-XML STYLESHEET LIQUID-TEMPLATE-DIR", "Convert Relaton Collection XML into HTML"
|
53
|
+
|
54
|
+
def xml2html(file, style, template)
|
55
|
+
Relaton::Cli::XMLConvertor.to_html(file, style, template)
|
56
|
+
end
|
57
|
+
|
58
|
+
desc "yaml2html YAML STYLESHEET LIQUID-TEMPLATE-DIR", "Concatenate Relaton YAML into HTML"
|
59
|
+
|
60
|
+
def yaml2html(file, style, template)
|
61
|
+
Relaton::Cli::YAMLConvertor.to_html(file, style, template)
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def fetch_document(code, options)
|
67
|
+
if registered_types.include?(options[:type])
|
68
|
+
doc = Cli.relaton.fetch_std(code, options[:year], options[:type])
|
69
|
+
doc ? doc.to_xml : "No matching bibliographic entry found"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def supported_type_message
|
74
|
+
["Recognised types:", registered_types.sort.join(", ")].join(" ")
|
75
|
+
end
|
76
|
+
|
77
|
+
def registered_types
|
78
|
+
@registered_types ||=
|
79
|
+
Relaton::Registry.instance.processors.each.map { |_n, pr| pr.prefix }
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
require "pathname"
|
3
|
+
|
4
|
+
module Relaton
|
5
|
+
module Cli
|
6
|
+
class RelatonFile
|
7
|
+
def initialize(source, options = {})
|
8
|
+
@source = source
|
9
|
+
@options = options
|
10
|
+
@outdir = options.fetch(:outdir, nil)
|
11
|
+
@outfile = options.fetch(:outfile, nil)
|
12
|
+
end
|
13
|
+
|
14
|
+
def extract
|
15
|
+
extract_and_write_to_files
|
16
|
+
end
|
17
|
+
|
18
|
+
def concatenate
|
19
|
+
write_to_file(bibcollection.to_xml)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Extract files
|
23
|
+
#
|
24
|
+
# This interface expect us to provide a source directory, output
|
25
|
+
# directory and custom configuration options. Then it wll extract
|
26
|
+
# Relaton XML files to output directory from the source directory
|
27
|
+
# During this process it will use custom options when available.
|
28
|
+
#
|
29
|
+
# @param source [Dir] The source directory for files
|
30
|
+
# @param outdir [Dir] The output directory for files
|
31
|
+
# @param options [Hash] Options as hash key value pair
|
32
|
+
#
|
33
|
+
def self.extract(source, outdir, options = {})
|
34
|
+
new(source, options.merge(outdir: outdir)).extract
|
35
|
+
end
|
36
|
+
|
37
|
+
# Concatenate files
|
38
|
+
#
|
39
|
+
## This interface expect us to provide a source directory, output
|
40
|
+
# file and custom configuration options. Normally, this expect the
|
41
|
+
# source directory to contain RXL fles, but it also converts any
|
42
|
+
# YAML files to RXL and then finally combines those together.
|
43
|
+
#
|
44
|
+
# This interface also allow us to provdie options like title and
|
45
|
+
# organization and then it usage those details to generate the
|
46
|
+
# collection file.
|
47
|
+
#
|
48
|
+
# @param source [Dir] The source directory for files
|
49
|
+
# @param output [String] The collection output file
|
50
|
+
# @param options [Hash] Options as hash key value pair
|
51
|
+
#
|
52
|
+
def self.concatenate(source, outfile, options = {})
|
53
|
+
new(source, options.merge(outfile: outfile)).concatenate
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
attr_reader :source, :options, :outdir, :outfile
|
59
|
+
|
60
|
+
def bibcollection
|
61
|
+
::Relaton::Bibcollection.new(
|
62
|
+
title: options[:title],
|
63
|
+
items: concatenate_files,
|
64
|
+
doctype: options[:doctype],
|
65
|
+
author: options[:organization],
|
66
|
+
)
|
67
|
+
end
|
68
|
+
|
69
|
+
def nokogiri_document(document, file = nil)
|
70
|
+
document ||= File.read(file, encoding: "utf-8")
|
71
|
+
Nokogiri.XML(document)
|
72
|
+
end
|
73
|
+
|
74
|
+
def extract_and_write_to_files
|
75
|
+
select_files_with("xml").each do |file|
|
76
|
+
xml = nokogiri_document(nil, file)
|
77
|
+
xml.remove_namespaces!
|
78
|
+
|
79
|
+
bib = xml.at("//bibdata") || next
|
80
|
+
bib.add_namespace(nil, "")
|
81
|
+
|
82
|
+
outfile = [outdir, build_filename(file, bib)].join("/")
|
83
|
+
write_to_file(bib.to_xml, outfile)
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def concatenate_files
|
88
|
+
xml_files = [convert_rxl_to_xml, convert_yamls_to_xml]
|
89
|
+
|
90
|
+
xml_files.flatten.map do |xml|
|
91
|
+
doc = nokogiri_document(xml[:content])
|
92
|
+
bibdata_instance(doc, xml[:file]) if doc.root.name == "bibdata"
|
93
|
+
end.compact
|
94
|
+
end
|
95
|
+
|
96
|
+
def bibdata_instance(document, file)
|
97
|
+
document = clean_nokogiri_document(document)
|
98
|
+
bibdata = Relaton::Bibdata.from_xml(document.root)
|
99
|
+
build_bibdata_relaton(bibdata, file)
|
100
|
+
|
101
|
+
bibdata
|
102
|
+
end
|
103
|
+
|
104
|
+
def build_bibdata_relaton(bibdata, file)
|
105
|
+
["xml", "pdf", "doc", "html"].each do |type|
|
106
|
+
file = Pathname.new(file).sub_ext(".#{type}")
|
107
|
+
bibdata.send("#{type}=", file) if File.file?(file)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# Force a namespace otherwise Nokogiri won't parse.
|
112
|
+
# The reason is we use Bibcollection's from_xml, but that one
|
113
|
+
# has an xmlns. We don't want to change the code for bibdata
|
114
|
+
# hence this hack #bibdata_doc.root['xmlns'] = "xmlns"
|
115
|
+
#
|
116
|
+
def clean_nokogiri_document(document)
|
117
|
+
document.remove_namespaces!
|
118
|
+
document.root.add_namespace(nil, "xmlns")
|
119
|
+
nokogiri_document(document.to_xml)
|
120
|
+
end
|
121
|
+
|
122
|
+
def convert_rxl_to_xml
|
123
|
+
select_files_with("{rxl}").map do |file|
|
124
|
+
{ file: file, content: File.read(file, encoding: "utf-8") }
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def convert_yamls_to_xml
|
129
|
+
select_files_with("yaml").map do |file|
|
130
|
+
{ file: file, content: YAMLConvertor.to_xml(file, write: false) }
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def select_files_with(extension)
|
135
|
+
files = File.join(source, "**", "*.#{extension}")
|
136
|
+
Dir[files].reject { |file| File.directory?(file) }
|
137
|
+
end
|
138
|
+
|
139
|
+
def write_to_file(content, output_file = nil)
|
140
|
+
output_file ||= outfile
|
141
|
+
File.open(output_file, "w:utf-8") { |file| file.write(content) }
|
142
|
+
end
|
143
|
+
|
144
|
+
def build_filename(file, document)
|
145
|
+
identifier = document&.at("./docidentifier")&.text ||
|
146
|
+
Pathname.new(File.basename(file, ".xml")).to_s
|
147
|
+
|
148
|
+
filename = identifier.sub(/^\s+/, "").sub(/\s+$/, "").gsub(/\s+/, "-")
|
149
|
+
[filename, options[:extension] || "rxl"].join(".")
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|