metanorma 2.3.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +14 -0
- data/lib/metanorma/collection/collection.rb +46 -105
- data/lib/metanorma/collection/config/config.rb +5 -0
- data/lib/metanorma/collection/config/converters.rb +1 -0
- data/lib/metanorma/collection/config/manifest.rb +23 -0
- data/lib/metanorma/collection/document/document.rb +32 -10
- data/lib/metanorma/collection/filelookup/base.rb +43 -0
- data/lib/metanorma/collection/filelookup/filelookup.rb +168 -69
- data/lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb +49 -10
- data/lib/metanorma/collection/filelookup/utils.rb +93 -0
- data/lib/metanorma/collection/helpers.rb +82 -0
- data/lib/metanorma/collection/manifest/manifest.rb +14 -3
- data/lib/metanorma/collection/multilingual/multilingual.rb +1 -1
- data/lib/metanorma/collection/renderer/filelocation.rb +162 -0
- data/lib/metanorma/collection/renderer/fileparse.rb +9 -6
- data/lib/metanorma/collection/renderer/fileprocess.rb +56 -42
- data/lib/metanorma/collection/renderer/navigation.rb +15 -1
- data/lib/metanorma/collection/renderer/render_word.rb +8 -4
- data/lib/metanorma/collection/renderer/renderer.rb +104 -10
- data/lib/metanorma/collection/renderer/svg.rb +54 -7
- data/lib/metanorma/collection/renderer/utils.rb +58 -22
- data/lib/metanorma/collection/sectionsplit/collection.rb +14 -5
- data/lib/metanorma/collection/sectionsplit/sectionsplit.rb +20 -7
- data/lib/metanorma/collection/util/disambig_files.rb +4 -5
- data/lib/metanorma/collection/util/util.rb +106 -6
- data/lib/metanorma/collection/xrefprocess/xrefprocess.rb +2 -2
- data/lib/metanorma/compile/compile_options.rb +3 -2
- data/lib/metanorma/compile/flavor.rb +11 -4
- data/lib/metanorma/compile/render.rb +1 -0
- data/lib/metanorma/version.rb +1 -1
- data/metanorma.gemspec +4 -14
- metadata +28 -150
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
require "isodoc"
|
|
2
2
|
require "htmlentities"
|
|
3
3
|
require "metanorma-utils"
|
|
4
|
+
require "marcel"
|
|
4
5
|
require_relative "filelookup_sectionsplit"
|
|
6
|
+
require_relative "base"
|
|
7
|
+
require_relative "utils"
|
|
5
8
|
|
|
6
9
|
module Metanorma
|
|
7
10
|
class Collection
|
|
@@ -28,12 +31,16 @@ module Metanorma
|
|
|
28
31
|
read_files(@manifest.entry, parent.manifest)
|
|
29
32
|
end
|
|
30
33
|
|
|
31
|
-
def read_files(entries, parent)
|
|
34
|
+
def read_files(entries, parent, idx = 0)
|
|
32
35
|
Array(entries).each do |e|
|
|
33
36
|
derive_format(e, parent)
|
|
34
|
-
e.file
|
|
35
|
-
|
|
37
|
+
if e.file
|
|
38
|
+
read_file(e, idx)
|
|
39
|
+
idx += 1
|
|
40
|
+
end
|
|
41
|
+
idx = read_files(e.entry, e, idx)
|
|
36
42
|
end
|
|
43
|
+
idx
|
|
37
44
|
end
|
|
38
45
|
|
|
39
46
|
def derive_format(entry, parent)
|
|
@@ -42,9 +49,9 @@ module Metanorma
|
|
|
42
49
|
entry.format |= ["xml", "presentation"]
|
|
43
50
|
end
|
|
44
51
|
|
|
45
|
-
def read_file(manifest)
|
|
52
|
+
def read_file(manifest, idx)
|
|
46
53
|
i, k = read_file_idents(manifest)
|
|
47
|
-
entry = file_entry(manifest, k) or return
|
|
54
|
+
entry = file_entry(manifest, k, idx) or return
|
|
48
55
|
bibdata_process(entry, i)
|
|
49
56
|
bibitem_process(entry)
|
|
50
57
|
@files[key(i)] = entry
|
|
@@ -74,12 +81,6 @@ module Metanorma
|
|
|
74
81
|
end
|
|
75
82
|
end
|
|
76
83
|
|
|
77
|
-
def anchors_lookup(anchors)
|
|
78
|
-
anchors.values.each_with_object({}) do |v, m|
|
|
79
|
-
v.each_value { |v1| m[v1] = true }
|
|
80
|
-
end
|
|
81
|
-
end
|
|
82
|
-
|
|
83
84
|
def bibdata_extract(xml)
|
|
84
85
|
anchors = read_anchors(xml)
|
|
85
86
|
{ anchors: anchors, anchors_lookup: anchors_lookup(anchors),
|
|
@@ -100,30 +101,131 @@ module Metanorma
|
|
|
100
101
|
# out_path is the destination file address, with any references outside
|
|
101
102
|
# the working directory (../../...) truncated, and based on relative path
|
|
102
103
|
# identifier is the id with only spaces, no nbsp
|
|
103
|
-
|
|
104
|
+
# idx is the index of the document in the manifest
|
|
105
|
+
def file_entry(ref, identifier, idx)
|
|
104
106
|
ref.file or return
|
|
105
107
|
abs = @documents[Util::key identifier].file
|
|
108
|
+
# For sectionsplit outputs from YAML manifest, we need to compute the full path
|
|
109
|
+
# by combining sectionsplit_filename directory with ref.file basename
|
|
110
|
+
sso = ref.respond_to?(:sectionsplit_output) && ref.sectionsplit_output
|
|
111
|
+
out_path, rel_path = file_entry_paths(ref, idx, sso)
|
|
106
112
|
ret = if ref.file
|
|
107
|
-
{ type: "fileref", ref: abs, rel_path:
|
|
108
|
-
out_path:
|
|
109
|
-
|
|
113
|
+
{ type: "fileref", ref: abs, rel_path: rel_path, url: ref.url,
|
|
114
|
+
out_path: out_path, idx: idx,
|
|
115
|
+
output_filename: ref.output_filename,
|
|
116
|
+
sectionsplit_filename: ref.sectionsplit_filename,
|
|
117
|
+
pdffile: ref.pdffile, format: ref.format&.map(&:to_sym) }
|
|
118
|
+
.compact
|
|
110
119
|
else { type: "id", ref: ref.id }
|
|
111
120
|
end
|
|
112
121
|
file_entry_copy(ref, ret)
|
|
113
122
|
ret.compact
|
|
114
123
|
end
|
|
115
124
|
|
|
125
|
+
|
|
126
|
+
# ref is the absolute source file address
|
|
127
|
+
# rel_path is the relative source file address, relative to the YAML location
|
|
128
|
+
# out_path is the destination file address, with any references outside
|
|
129
|
+
# the working directory (../../...) truncated, and based on relative path
|
|
130
|
+
# identifier is the id with only spaces, no nbsp
|
|
131
|
+
# extract_opts are the compilation options extracted as document attributes
|
|
132
|
+
def file_entry_struct(ref, abs)
|
|
133
|
+
adoc = abs.sub(/\.xml$/, ".adoc")
|
|
134
|
+
if adoc.end_with?(".adoc") && File.exist?(adoc)
|
|
135
|
+
opts = Metanorma::Input::Asciidoc.new.extract_options(File.read(adoc))
|
|
136
|
+
end
|
|
137
|
+
{ type: "fileref", ref: abs, rel_path: ref.file, url: ref.url,
|
|
138
|
+
out_path: output_file_path(ref), pdffile: ref.pdffile,
|
|
139
|
+
format: ref.format&.map(&:to_sym), extract_opts: opts }.compact
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def file_entry_paths(ref, idx, sso)
|
|
143
|
+
base = File.basename(ref.file, ".xml")
|
|
144
|
+
if sso && ref.respond_to?(:sectionsplit_filename) &&
|
|
145
|
+
ref.sectionsplit_filename
|
|
146
|
+
# Extract directory from sectionsplit_filename
|
|
147
|
+
dir = File.dirname(ref.sectionsplit_filename)
|
|
148
|
+
if dir == "." # No directory in pattern
|
|
149
|
+
[output_file_path(ref, idx), ref.file]
|
|
150
|
+
else # Pattern has directory, prepend it
|
|
151
|
+
full_path = File.join(dir, base)
|
|
152
|
+
[full_path, "#{full_path}.xml"]
|
|
153
|
+
end
|
|
154
|
+
else [output_file_path(ref, idx), ref.file]
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Substitute special strings in filename patterns
|
|
159
|
+
# @param pattern [String] filename pattern with placeholders
|
|
160
|
+
# @param options [Hash] substitution values
|
|
161
|
+
# @option options [Integer] :document_num document index
|
|
162
|
+
# @option options [String] :basename filename without extension
|
|
163
|
+
# @option options [String] :basename_legacy full filename with extension
|
|
164
|
+
# @option options [Integer] :sectionsplit_num sectionsplit index
|
|
165
|
+
def substitute_filename_pattern(pattern, options = {})
|
|
166
|
+
pattern or return pattern
|
|
167
|
+
result = pattern.dup
|
|
168
|
+
options[:document_num] and
|
|
169
|
+
result.gsub!(/\{document-num\}/, options[:document_num].to_s)
|
|
170
|
+
result.gsub!(/\{basename\}/, options[:basename]) if options[:basename]
|
|
171
|
+
options[:basename_legacy] and
|
|
172
|
+
result.gsub!(/\{basename_legacy\}/, options[:basename_legacy])
|
|
173
|
+
options[:sectionsplit_num] and
|
|
174
|
+
result.gsub!(/\{sectionsplit-num\}/, options[:sectionsplit_num].to_s)
|
|
175
|
+
result
|
|
176
|
+
end
|
|
177
|
+
|
|
116
178
|
# TODO make the output file location reflect source location universally,
|
|
117
179
|
# not just for attachments: no File.basename
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
180
|
+
#
|
|
181
|
+
# For files with custom directory structure, construct path with directory
|
|
182
|
+
# For files with output_filename, use that (with substitutions)
|
|
183
|
+
# For others, use basename of ref.file
|
|
184
|
+
def output_file_path(ref, idx)
|
|
185
|
+
has_custom_dir, file_has_dir, params = output_file_path_prep(ref, idx)
|
|
186
|
+
# Apply sectionsplit_filename directory structure if:
|
|
187
|
+
# 1. File has sectionsplit enabled (parent document being split), OR
|
|
188
|
+
# 2. File is a sectionsplit output (from collection or single-file sectionsplit)
|
|
189
|
+
# Regular files that inherit sectionsplit_filename from collection level
|
|
190
|
+
# but are not sectionsplit outputs should NOT use it
|
|
191
|
+
is_sectionsplit_output = ref.respond_to?(:sectionsplit_output) && ref.sectionsplit_output
|
|
192
|
+
use_sectionsplit_dir = ref.sectionsplit_filename && has_custom_dir &&
|
|
193
|
+
(ref.sectionsplit || is_sectionsplit_output || file_has_dir)
|
|
194
|
+
f = if use_sectionsplit_dir
|
|
195
|
+
# For sectionsplit outputs, return just the basename
|
|
196
|
+
# The directory will be applied during file_compile_format
|
|
197
|
+
# via preserve_directory_structure?
|
|
198
|
+
File.basename(ref.file)
|
|
199
|
+
elsif ref.output_filename
|
|
200
|
+
substitute_filename_pattern(ref.output_filename, **params)
|
|
201
|
+
elsif file_has_dir
|
|
202
|
+
ref.file # Preserve directory structure already in ref.file
|
|
203
|
+
elsif ref.attachment
|
|
204
|
+
ref.file
|
|
205
|
+
else File.basename(ref.file)
|
|
206
|
+
end
|
|
207
|
+
ret = @disambig.source2dest_filename(f, preserve_dirs: ref.attachment)
|
|
208
|
+
warn ret
|
|
209
|
+
ret
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def output_file_path_prep(ref, idx)
|
|
213
|
+
b = File.basename(ref.file)
|
|
214
|
+
b_no_ext = File.basename(ref.file, ".*")
|
|
215
|
+
# Check for sectionsplit_filename (for both parent and split output files)
|
|
216
|
+
# or output_filename
|
|
217
|
+
custom_filename = ref.sectionsplit_filename || ref.output_filename
|
|
218
|
+
has_custom_dir = custom_filename && File.dirname(custom_filename) != "."
|
|
219
|
+
# Also check if ref.file itself contains a directory
|
|
220
|
+
file_has_dir = File.dirname(ref.file) != "."
|
|
221
|
+
params = { document_num: idx, basename: b_no_ext, basename_legacy: b }
|
|
222
|
+
[has_custom_dir, file_has_dir, params]
|
|
122
223
|
end
|
|
123
224
|
|
|
124
225
|
def file_entry_copy(ref, ret)
|
|
125
226
|
%w(attachment sectionsplit index presentation-xml url
|
|
126
|
-
bare-after-first
|
|
227
|
+
bare-after-first output_filename sectionsplit_filename
|
|
228
|
+
sectionsplit_output).each do |s|
|
|
127
229
|
ref.respond_to?(s.to_sym) and
|
|
128
230
|
ret[s.delete("-").to_sym] = ref.send(s)
|
|
129
231
|
end
|
|
@@ -131,23 +233,16 @@ module Metanorma
|
|
|
131
233
|
|
|
132
234
|
def add_document_suffix(identifier, doc)
|
|
133
235
|
document_suffix = Metanorma::Utils::to_ncname(identifier)
|
|
236
|
+
ids = doc.xpath("./@id | .//@id").map(&:value)
|
|
134
237
|
Util::anchor_id_attributes.each do |(tag_name, attr_name)|
|
|
135
238
|
Util::add_suffix_to_attrs(doc, document_suffix, tag_name, attr_name,
|
|
136
239
|
@isodoc)
|
|
137
240
|
end
|
|
138
|
-
url_in_css_styles(doc, document_suffix)
|
|
241
|
+
Util::url_in_css_styles(doc, ids, document_suffix)
|
|
139
242
|
doc.root["document_suffix"] ||= ""
|
|
140
243
|
doc.root["document_suffix"] += document_suffix
|
|
141
244
|
end
|
|
142
245
|
|
|
143
|
-
# update relative URLs, url(#...), in CSS in @style attrs (including SVG)
|
|
144
|
-
def url_in_css_styles(doc, document_suffix)
|
|
145
|
-
doc.xpath("//*[@style]").each do |s|
|
|
146
|
-
s["style"] = s["style"]
|
|
147
|
-
.gsub(%r{url\(#([^()]+)\)}, "url(#\\1_#{document_suffix})")
|
|
148
|
-
end
|
|
149
|
-
end
|
|
150
|
-
|
|
151
246
|
# return citation url for file
|
|
152
247
|
# @param doc [Boolean] I am a Metanorma document,
|
|
153
248
|
# so my URL should end with html or pdf or whatever
|
|
@@ -156,13 +251,6 @@ module Metanorma
|
|
|
156
251
|
data[:url] || targetfile(data, options)[1]
|
|
157
252
|
end
|
|
158
253
|
|
|
159
|
-
# are references to the file to be linked to a file in the collection,
|
|
160
|
-
# or externally? Determines whether file suffix anchors are to be used
|
|
161
|
-
def url?(ident)
|
|
162
|
-
data = get(ident) or return false
|
|
163
|
-
data[:url]
|
|
164
|
-
end
|
|
165
|
-
|
|
166
254
|
# return file contents + output filename for each file in the collection,
|
|
167
255
|
# given a docref entry
|
|
168
256
|
# @param data [Hash] docref entry
|
|
@@ -176,7 +264,7 @@ module Metanorma
|
|
|
176
264
|
options = { read: false, doc: true, relative: false }.merge(options)
|
|
177
265
|
path = options[:relative] ? data[:rel_path] : data[:ref]
|
|
178
266
|
if data[:type] == "fileref"
|
|
179
|
-
ref_file path, data
|
|
267
|
+
ref_file path, data, options[:read], options[:doc]
|
|
180
268
|
else
|
|
181
269
|
xml_file data[:id], options[:read]
|
|
182
270
|
end
|
|
@@ -186,13 +274,40 @@ module Metanorma
|
|
|
186
274
|
targetfile(get(ident), options)
|
|
187
275
|
end
|
|
188
276
|
|
|
189
|
-
def ref_file(ref,
|
|
277
|
+
def ref_file(ref, data, read, doc)
|
|
190
278
|
file = File.read(ref, encoding: "utf-8") if read
|
|
191
|
-
|
|
192
|
-
|
|
279
|
+
# Use the actual output path from :outputs if available (set after compilation)
|
|
280
|
+
# Otherwise fall back to :out_path (set at initialization)
|
|
281
|
+
filename = if doc && data[:outputs] && data[:outputs][:html]
|
|
282
|
+
data[:outputs][:html].sub(
|
|
283
|
+
%r{^#{Regexp.escape(@parent.outdir)}/}, ""
|
|
284
|
+
)
|
|
285
|
+
else
|
|
286
|
+
data[:out_path].dup
|
|
287
|
+
end
|
|
288
|
+
if doc && !data[:outputs]
|
|
289
|
+
filename = ref_file_xml2html(filename)
|
|
290
|
+
end
|
|
193
291
|
[file, filename]
|
|
194
292
|
end
|
|
195
293
|
|
|
294
|
+
# Check if file has a recognized MIME type (other than XML)
|
|
295
|
+
# If so, don't append .html (e.g., .svg, .png, .jpg, etc.)
|
|
296
|
+
# Only process if it doesn't have a recognized non-XML extension
|
|
297
|
+
# If filename ends in .xml, replace with .html
|
|
298
|
+
# Otherwise (including sectionsplit files like "file.xml.0" or
|
|
299
|
+
# custom titles), append .html
|
|
300
|
+
def ref_file_xml2html(filename)
|
|
301
|
+
unless Util::mime_file_recognised?(filename) &&
|
|
302
|
+
!filename.end_with?(".xml")
|
|
303
|
+
filename = if filename.end_with?(".xml")
|
|
304
|
+
filename.sub(/\.xml$/, ".html")
|
|
305
|
+
else "#{filename}.html"
|
|
306
|
+
end
|
|
307
|
+
end
|
|
308
|
+
filename
|
|
309
|
+
end
|
|
310
|
+
|
|
196
311
|
def xml_file(id, read)
|
|
197
312
|
file = @xml.at(ns("//doc-container[@id = '#{id}']")).to_xml if read
|
|
198
313
|
filename = "#{id}.html"
|
|
@@ -235,35 +350,19 @@ module Metanorma
|
|
|
235
350
|
ret
|
|
236
351
|
end
|
|
237
352
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
end
|
|
252
|
-
|
|
253
|
-
def set(ident, attr, value)
|
|
254
|
-
@files[key(ident)][attr] = value
|
|
255
|
-
end
|
|
256
|
-
|
|
257
|
-
def each
|
|
258
|
-
@files.each
|
|
259
|
-
end
|
|
260
|
-
|
|
261
|
-
def each_with_index
|
|
262
|
-
@files.each_with_index
|
|
263
|
-
end
|
|
264
|
-
|
|
265
|
-
def ns(xpath)
|
|
266
|
-
@isodoc.ns(xpath)
|
|
353
|
+
# Check if we should preserve directory structure for an identifier
|
|
354
|
+
# Returns the custom filename if directory structure should be preserved,
|
|
355
|
+
# nil otherwise
|
|
356
|
+
def preserve_directory_structure?(ident)
|
|
357
|
+
ret = if get(ident, :sectionsplit_output)
|
|
358
|
+
# For sectionsplit outputs, use rel_path which has the directory
|
|
359
|
+
get(ident, :rel_path) || get(ident, :out_path)
|
|
360
|
+
elsif get(ident, :sectionsplit)
|
|
361
|
+
get(ident, :sectionsplit_filename)
|
|
362
|
+
else get(ident, :output_filename)
|
|
363
|
+
end
|
|
364
|
+
# Return the custom filename only if it contains a directory
|
|
365
|
+
ret && File.dirname(ret) != "." ? ret : nil
|
|
267
366
|
end
|
|
268
367
|
end
|
|
269
368
|
end
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
require_relative "../sectionsplit/sectionsplit"
|
|
2
|
-
# require "concurrent-ruby"
|
|
3
2
|
|
|
4
3
|
module Metanorma
|
|
5
4
|
class Collection
|
|
@@ -7,8 +6,8 @@ module Metanorma
|
|
|
7
6
|
def add_section_split
|
|
8
7
|
ret = @files.keys.each_with_object({}) do |k, m|
|
|
9
8
|
if @files[k][:sectionsplit] && !@files[k][:attachment]
|
|
10
|
-
process_section_split_instance(k, m)
|
|
11
|
-
cleanup_section_split_instance(k, m)
|
|
9
|
+
original_out_path = process_section_split_instance(k, m)
|
|
10
|
+
cleanup_section_split_instance(k, m, original_out_path)
|
|
12
11
|
end
|
|
13
12
|
m[k] = @files[k]
|
|
14
13
|
end
|
|
@@ -16,6 +15,8 @@ module Metanorma
|
|
|
16
15
|
end
|
|
17
16
|
|
|
18
17
|
def process_section_split_instance(key, manifest)
|
|
18
|
+
# Save the original out_path before it gets modified
|
|
19
|
+
original_out_path = @files[key][:out_path]
|
|
19
20
|
s, sectionsplit_manifest = sectionsplit(key)
|
|
20
21
|
# section_split_instance_threads(s, manifest, key)
|
|
21
22
|
s.each_with_index do |f1, i|
|
|
@@ -24,6 +25,8 @@ module Metanorma
|
|
|
24
25
|
a = add_section_split_attachments(sectionsplit_manifest, key) and
|
|
25
26
|
manifest["#{key}:attachments"] = a
|
|
26
27
|
add_section_split_cover(manifest, sectionsplit_manifest, key)
|
|
28
|
+
# Return the original path for cleanup
|
|
29
|
+
original_out_path
|
|
27
30
|
end
|
|
28
31
|
|
|
29
32
|
def section_split_instance_threads(s, manifest, key)
|
|
@@ -38,8 +41,17 @@ module Metanorma
|
|
|
38
41
|
pool.wait_for_termination
|
|
39
42
|
end
|
|
40
43
|
|
|
41
|
-
def cleanup_section_split_instance(key, manifest)
|
|
44
|
+
def cleanup_section_split_instance(key, manifest, original_out_path)
|
|
45
|
+
# Delete the sectionsplit index.html from source directory after it's copied to output
|
|
42
46
|
@files_to_delete << manifest["#{key}:index.html"][:ref]
|
|
47
|
+
# Delete the original files when sectionsplit happens (all formats: html, xml, presentation.xml)
|
|
48
|
+
# Use the saved original out_path (before it was changed to index.html)
|
|
49
|
+
if original_out_path
|
|
50
|
+
base = File.join(@parent.outdir, original_out_path.sub(/\.xml$/, ""))
|
|
51
|
+
@files_to_delete << "#{base}.html"
|
|
52
|
+
@files_to_delete << "#{base}.xml"
|
|
53
|
+
@files_to_delete << "#{base}.presentation.xml"
|
|
54
|
+
end
|
|
43
55
|
# @files[key].delete(:ids).delete(:anchors)
|
|
44
56
|
@files[key][:indirect_key] = @sectionsplit.key
|
|
45
57
|
end
|
|
@@ -86,20 +98,42 @@ module Metanorma
|
|
|
86
98
|
def add_section_split_instance(file, manifest, key, idx)
|
|
87
99
|
presfile, newkey, xml = add_section_split_instance_prep(file, key)
|
|
88
100
|
anchors = read_anchors(xml)
|
|
101
|
+
# Preserve directory structure in out_path if parent has custom sectionsplit_filename with directory
|
|
102
|
+
sectionsplit_fname = @files[key][:sectionsplit_filename]
|
|
103
|
+
|
|
104
|
+
# file[:url] from sectionsplit.rb already has placeholders substituted and includes full path
|
|
105
|
+
# Use it directly for out_path (without .xml extension)
|
|
106
|
+
base_filename = File.basename(file[:url], ".xml")
|
|
107
|
+
|
|
108
|
+
# Get the directory from file[:url] which already has placeholders substituted
|
|
109
|
+
file_dir = File.dirname(file[:url])
|
|
110
|
+
|
|
111
|
+
# If file[:url] has a directory (i.e., placeholders were substituted), use it
|
|
112
|
+
out_path_value = if file_dir == "."
|
|
113
|
+
base_filename
|
|
114
|
+
else
|
|
115
|
+
File.join(file_dir, base_filename)
|
|
116
|
+
end
|
|
117
|
+
|
|
89
118
|
m = { parentid: key, presentationxml: true, type: "fileref",
|
|
90
|
-
rel_path:
|
|
119
|
+
rel_path: out_path_value, out_path: out_path_value,
|
|
91
120
|
anchors: anchors, anchors_lookup: anchors_lookup(anchors),
|
|
92
121
|
ids: read_ids(xml), format: @files[key][:format],
|
|
93
122
|
sectionsplit_output: true, indirect_key: @sectionsplit.key,
|
|
94
|
-
bibdata: @files[key][:bibdata], ref: presfile
|
|
123
|
+
bibdata: @files[key][:bibdata], ref: presfile,
|
|
124
|
+
sectionsplit_filename: sectionsplit_fname,
|
|
125
|
+
idx: @files[key][:idx] }
|
|
95
126
|
m[:bare] = true unless idx.zero?
|
|
96
127
|
manifest[newkey] = m
|
|
97
|
-
|
|
128
|
+
# Don't delete split output files - we want to keep them!
|
|
129
|
+
# The original parent HTML file is deleted in cleanup_section_split_instance
|
|
98
130
|
end
|
|
99
131
|
|
|
100
132
|
def add_section_split_instance_prep(file, key)
|
|
101
|
-
|
|
102
|
-
|
|
133
|
+
# XML files are always stored flat in the _files directory (no subdirectories)
|
|
134
|
+
# file[:url] contains full path with directory for HTML output, but XML is basename only
|
|
135
|
+
xml_basename = File.basename(file[:url])
|
|
136
|
+
presfile = File.join(File.dirname(@files[key][:ref]), xml_basename)
|
|
103
137
|
newkey = key("#{key.strip} #{file[:title]}")
|
|
104
138
|
xml = Nokogiri::XML(File.read(presfile), &:huge)
|
|
105
139
|
[presfile, newkey, xml]
|
|
@@ -107,11 +141,16 @@ module Metanorma
|
|
|
107
141
|
|
|
108
142
|
def sectionsplit(ident)
|
|
109
143
|
file = @files[ident][:ref]
|
|
144
|
+
# @base must always be just basename, never contain directory components
|
|
145
|
+
# Directory structure comes from sectionsplit_filename pattern only
|
|
146
|
+
base = File.basename(@files[ident][:out_path] || file)
|
|
110
147
|
@sectionsplit = ::Metanorma::Collection::Sectionsplit
|
|
111
|
-
.new(input: file, base:
|
|
148
|
+
.new(input: file, base: base,
|
|
112
149
|
dir: File.dirname(file), output: @files[ident][:out_path],
|
|
113
150
|
compile_opts: @parent.compile_options, ident: ident,
|
|
114
151
|
fileslookup: self, isodoc: @isodoc,
|
|
152
|
+
parent_idx: @files[ident][:idx],
|
|
153
|
+
sectionsplit_filename: @files[ident][:sectionsplit_filename],
|
|
115
154
|
isodoc_presxml: @isodoc_presxml,
|
|
116
155
|
document_suffix: @files[ident][:document_suffix])
|
|
117
156
|
coll = @sectionsplit.sectionsplit.sort_by { |f| f[:order] }
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
module Metanorma
|
|
2
|
+
class Collection
|
|
3
|
+
class FileLookup
|
|
4
|
+
# Also parse all ids in doc (including ones which won't be xref targets)
|
|
5
|
+
def read_ids(xml)
|
|
6
|
+
ret = {}
|
|
7
|
+
xml.traverse do |x|
|
|
8
|
+
x.text? and next
|
|
9
|
+
x["id"] and ret[x["id"]] = true
|
|
10
|
+
end
|
|
11
|
+
ret
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# map locality type and label (e.g. "clause" "1") to id = anchor for
|
|
15
|
+
# a document
|
|
16
|
+
# Note: will only key clauses, which have unambiguous reference label in
|
|
17
|
+
# locality. Notes, examples etc with containers are just plunked against
|
|
18
|
+
# UUIDs, so that their IDs can at least be registered to be tracked
|
|
19
|
+
# as existing.
|
|
20
|
+
def read_anchors(xml)
|
|
21
|
+
xrefs = @isodoc.xref_init(@lang, @script, @isodoc, @isodoc.i18n,
|
|
22
|
+
{ locale: @locale })
|
|
23
|
+
xrefs.parse xml
|
|
24
|
+
xrefs.get.each_with_object({}) do |(k, v), ret|
|
|
25
|
+
read_anchors1(k, v, ret)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def read_anchors1(key, val, ret)
|
|
30
|
+
val[:type] ||= "clause"
|
|
31
|
+
ret[val[:type]] ||= {}
|
|
32
|
+
index = if val[:container] || val[:label].nil? || val[:label].empty?
|
|
33
|
+
UUIDTools::UUID.random_create.to_s
|
|
34
|
+
else val[:label].gsub(%r{<[^<>]+>}, "")
|
|
35
|
+
end
|
|
36
|
+
ret[val[:type]][index] = key
|
|
37
|
+
v = val[:value] and ret[val[:type]][v.gsub(%r{<[^<>]+>}, "")] = key
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def anchors_lookup(anchors)
|
|
41
|
+
anchors.values.each_with_object({}) do |v, m|
|
|
42
|
+
v.each_value { |v1| m[v1] = true }
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# return citation url for file
|
|
47
|
+
# @param doc [Boolean] I am a Metanorma document,
|
|
48
|
+
# so my URL should end with html or pdf or whatever
|
|
49
|
+
def url(ident, options)
|
|
50
|
+
data = get(ident)
|
|
51
|
+
data[:url] || targetfile(data, options)[1]
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# are references to the file to be linked to a file in the collection,
|
|
55
|
+
# or externally? Determines whether file suffix anchors are to be used
|
|
56
|
+
def url?(ident)
|
|
57
|
+
data = get(ident) or return false
|
|
58
|
+
data[:url]
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def key(ident)
|
|
62
|
+
@c.decode(ident).gsub(/(\p{Zs})+/, " ")
|
|
63
|
+
.sub(/^metanorma-collection /, "")
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def keys
|
|
67
|
+
@files.keys
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def get(ident, attr = nil)
|
|
71
|
+
if attr then @files[key(ident)][attr]
|
|
72
|
+
else @files[key(ident)]
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def set(ident, attr, value)
|
|
77
|
+
@files[key(ident)][attr] = value
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def each
|
|
81
|
+
@files.each
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def each_with_index
|
|
85
|
+
@files.each_with_index
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def ns(xpath)
|
|
89
|
+
@isodoc.ns(xpath)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
module Metanorma
|
|
2
|
+
class Collection
|
|
3
|
+
class << self
|
|
4
|
+
# @param Block [Proc]
|
|
5
|
+
# @note allow user-specific function to run in pre-parse model stage
|
|
6
|
+
def set_pre_parse_model(&block)
|
|
7
|
+
@pre_parse_model_proc = block
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# @param Block [Proc]
|
|
11
|
+
# @note allow user-specific function to resolve identifier
|
|
12
|
+
def set_identifier_resolver(&block)
|
|
13
|
+
@identifier_resolver = block
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# @param Block [Proc]
|
|
17
|
+
# @note allow user-specific function to resolve fileref
|
|
18
|
+
# NOTE: MUST ALWAYS RETURN PATH relative to working directory
|
|
19
|
+
# (initial YAML file location). @fileref_resolver.call(ref_folder, fileref)
|
|
20
|
+
# fileref is not what is in the YAML, but the resolved path
|
|
21
|
+
# relative to the working directory
|
|
22
|
+
def set_fileref_resolver(&block)
|
|
23
|
+
@fileref_resolver = block
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def unset_fileref_resolver
|
|
27
|
+
@fileref_resolver = nil
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# @param collection_model [Hash{String=>String}]
|
|
31
|
+
def pre_parse_model(collection_model)
|
|
32
|
+
@pre_parse_model_proc or return
|
|
33
|
+
@pre_parse_model_proc.call(collection_model)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# @param identifier [String]
|
|
37
|
+
# @return [String]
|
|
38
|
+
def resolve_identifier(identifier)
|
|
39
|
+
@identifier_resolver or return identifier
|
|
40
|
+
@identifier_resolver.call(identifier)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# @param fileref [String]
|
|
44
|
+
# @return [String]
|
|
45
|
+
def resolve_fileref(ref_folder, fileref)
|
|
46
|
+
warn ref_folder
|
|
47
|
+
warn fileref
|
|
48
|
+
unless @fileref_resolver
|
|
49
|
+
(Pathname.new fileref).absolute? or
|
|
50
|
+
fileref = File.join(ref_folder, fileref)
|
|
51
|
+
return fileref
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
@fileref_resolver.call(ref_folder, fileref)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# @param filepath
|
|
58
|
+
# @raise [FileNotFoundException]
|
|
59
|
+
def check_file_existence(filepath)
|
|
60
|
+
unless File.exist?(filepath)
|
|
61
|
+
error_message = "#{filepath} not found!"
|
|
62
|
+
::Metanorma::Util.log("[metanorma] Error: #{error_message}", :error)
|
|
63
|
+
raise FileNotFoundException.new error_message.to_s
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def parse(file)
|
|
68
|
+
# need @dirname initialised before collection object initialisation
|
|
69
|
+
@dirname = File.expand_path(File.dirname(file))
|
|
70
|
+
config = case file
|
|
71
|
+
when /\.xml$/
|
|
72
|
+
::Metanorma::Collection::Config::Config.from_xml(File.read(file))
|
|
73
|
+
when /.ya?ml$/
|
|
74
|
+
y = YAML.safe_load(File.read(file))
|
|
75
|
+
pre_parse_model(y)
|
|
76
|
+
::Metanorma::Collection::Config::Config.from_yaml(y.to_yaml)
|
|
77
|
+
end
|
|
78
|
+
new(file: file, config: config)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
@@ -14,7 +14,6 @@ module Metanorma
|
|
|
14
14
|
# @param docref [Array<Hash{String=>String}>]
|
|
15
15
|
# @param manifest [Array<Metanorma::Collection::Manifest>]
|
|
16
16
|
def initialize(config, collection, dir)
|
|
17
|
-
#require "debug"; binding.b
|
|
18
17
|
@collection = collection
|
|
19
18
|
@dir = dir
|
|
20
19
|
@disambig = ::Metanorma::Collection::Util::DisambigFiles.new
|
|
@@ -22,9 +21,9 @@ module Metanorma
|
|
|
22
21
|
end
|
|
23
22
|
|
|
24
23
|
def manifest_postprocess(config)
|
|
25
|
-
#require "debug"; binding.b
|
|
26
24
|
manifest_bibdata(config)
|
|
27
25
|
manifest_expand_yaml(config, @dir)
|
|
26
|
+
manifest_output_filenames(config)
|
|
28
27
|
manifest_compile_adoc(config)
|
|
29
28
|
manifest_filexist(config)
|
|
30
29
|
manifest_sectionsplit(config)
|
|
@@ -40,6 +39,17 @@ module Metanorma
|
|
|
40
39
|
|
|
41
40
|
GUID = "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
|
|
42
41
|
|
|
42
|
+
def manifest_output_filenames(config, parent = nil)
|
|
43
|
+
config.output_filename ||=
|
|
44
|
+
parent&.output_filename # || "{basename}.{document-num}"
|
|
45
|
+
config.sectionsplit_filename ||= parent&.sectionsplit_filename ||
|
|
46
|
+
"{basename_legacy}.{sectionsplit-num}"
|
|
47
|
+
# "{basename}-{document-num}.{sectionsplit-num}"
|
|
48
|
+
Array(config.entry).each do |f|
|
|
49
|
+
manifest_output_filenames(f, config)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
43
53
|
def manifest_identifier(config)
|
|
44
54
|
no_id = populate_id_from_doc(config)
|
|
45
55
|
config.identifier =
|
|
@@ -169,7 +179,8 @@ module Metanorma
|
|
|
169
179
|
::Metanorma::Compile.new
|
|
170
180
|
.compile(f, agree_to_terms: true, install_fonts: false,
|
|
171
181
|
extension_keys: [:xml])
|
|
172
|
-
::Metanorma::Util.log("[metanorma] Info: Compiling #{f}...done!",
|
|
182
|
+
::Metanorma::Util.log("[metanorma] Info: Compiling #{f}...done!",
|
|
183
|
+
:warning)
|
|
173
184
|
end
|
|
174
185
|
|
|
175
186
|
def compile_adoc_file?(file)
|