metanorma 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +14 -0
  3. data/lib/metanorma/collection/collection.rb +46 -105
  4. data/lib/metanorma/collection/config/config.rb +5 -0
  5. data/lib/metanorma/collection/config/converters.rb +1 -0
  6. data/lib/metanorma/collection/config/manifest.rb +23 -0
  7. data/lib/metanorma/collection/document/document.rb +32 -10
  8. data/lib/metanorma/collection/filelookup/base.rb +43 -0
  9. data/lib/metanorma/collection/filelookup/filelookup.rb +168 -69
  10. data/lib/metanorma/collection/filelookup/filelookup_sectionsplit.rb +49 -10
  11. data/lib/metanorma/collection/filelookup/utils.rb +93 -0
  12. data/lib/metanorma/collection/helpers.rb +82 -0
  13. data/lib/metanorma/collection/manifest/manifest.rb +14 -3
  14. data/lib/metanorma/collection/multilingual/multilingual.rb +1 -1
  15. data/lib/metanorma/collection/renderer/filelocation.rb +162 -0
  16. data/lib/metanorma/collection/renderer/fileparse.rb +9 -6
  17. data/lib/metanorma/collection/renderer/fileprocess.rb +56 -42
  18. data/lib/metanorma/collection/renderer/navigation.rb +15 -1
  19. data/lib/metanorma/collection/renderer/render_word.rb +8 -4
  20. data/lib/metanorma/collection/renderer/renderer.rb +104 -10
  21. data/lib/metanorma/collection/renderer/svg.rb +54 -7
  22. data/lib/metanorma/collection/renderer/utils.rb +58 -22
  23. data/lib/metanorma/collection/sectionsplit/collection.rb +14 -5
  24. data/lib/metanorma/collection/sectionsplit/sectionsplit.rb +20 -7
  25. data/lib/metanorma/collection/util/disambig_files.rb +4 -5
  26. data/lib/metanorma/collection/util/util.rb +106 -6
  27. data/lib/metanorma/collection/xrefprocess/xrefprocess.rb +2 -2
  28. data/lib/metanorma/compile/compile_options.rb +3 -2
  29. data/lib/metanorma/compile/flavor.rb +11 -4
  30. data/lib/metanorma/compile/render.rb +1 -0
  31. data/lib/metanorma/version.rb +1 -1
  32. data/metanorma.gemspec +4 -14
  33. metadata +28 -150
@@ -1,7 +1,10 @@
1
1
  require "isodoc"
2
2
  require "htmlentities"
3
3
  require "metanorma-utils"
4
+ require "marcel"
4
5
  require_relative "filelookup_sectionsplit"
6
+ require_relative "base"
7
+ require_relative "utils"
5
8
 
6
9
  module Metanorma
7
10
  class Collection
@@ -28,12 +31,16 @@ module Metanorma
28
31
  read_files(@manifest.entry, parent.manifest)
29
32
  end
30
33
 
31
- def read_files(entries, parent)
34
+ def read_files(entries, parent, idx = 0)
32
35
  Array(entries).each do |e|
33
36
  derive_format(e, parent)
34
- e.file and read_file(e)
35
- read_files(e.entry, e)
37
+ if e.file
38
+ read_file(e, idx)
39
+ idx += 1
40
+ end
41
+ idx = read_files(e.entry, e, idx)
36
42
  end
43
+ idx
37
44
  end
38
45
 
39
46
  def derive_format(entry, parent)
@@ -42,9 +49,9 @@ module Metanorma
42
49
  entry.format |= ["xml", "presentation"]
43
50
  end
44
51
 
45
- def read_file(manifest)
52
+ def read_file(manifest, idx)
46
53
  i, k = read_file_idents(manifest)
47
- entry = file_entry(manifest, k) or return
54
+ entry = file_entry(manifest, k, idx) or return
48
55
  bibdata_process(entry, i)
49
56
  bibitem_process(entry)
50
57
  @files[key(i)] = entry
@@ -74,12 +81,6 @@ module Metanorma
74
81
  end
75
82
  end
76
83
 
77
- def anchors_lookup(anchors)
78
- anchors.values.each_with_object({}) do |v, m|
79
- v.each_value { |v1| m[v1] = true }
80
- end
81
- end
82
-
83
84
  def bibdata_extract(xml)
84
85
  anchors = read_anchors(xml)
85
86
  { anchors: anchors, anchors_lookup: anchors_lookup(anchors),
@@ -100,30 +101,131 @@ module Metanorma
100
101
  # out_path is the destination file address, with any references outside
101
102
  # the working directory (../../...) truncated, and based on relative path
102
103
  # identifier is the id with only spaces, no nbsp
103
- def file_entry(ref, identifier)
104
+ # idx is the index of the document in the manifest
105
+ def file_entry(ref, identifier, idx)
104
106
  ref.file or return
105
107
  abs = @documents[Util::key identifier].file
108
+ # For sectionsplit outputs from YAML manifest, we need to compute the full path
109
+ # by combining sectionsplit_filename directory with ref.file basename
110
+ sso = ref.respond_to?(:sectionsplit_output) && ref.sectionsplit_output
111
+ out_path, rel_path = file_entry_paths(ref, idx, sso)
106
112
  ret = if ref.file
107
- { type: "fileref", ref: abs, rel_path: ref.file, url: ref.url,
108
- out_path: output_file_path(ref),
109
- format: ref.format&.map(&:to_sym) }.compact
113
+ { type: "fileref", ref: abs, rel_path: rel_path, url: ref.url,
114
+ out_path: out_path, idx: idx,
115
+ output_filename: ref.output_filename,
116
+ sectionsplit_filename: ref.sectionsplit_filename,
117
+ pdffile: ref.pdffile, format: ref.format&.map(&:to_sym) }
118
+ .compact
110
119
  else { type: "id", ref: ref.id }
111
120
  end
112
121
  file_entry_copy(ref, ret)
113
122
  ret.compact
114
123
  end
115
124
 
125
+
126
+ # ref is the absolute source file address
127
+ # rel_path is the relative source file address, relative to the YAML location
128
+ # out_path is the destination file address, with any references outside
129
+ # the working directory (../../...) truncated, and based on relative path
130
+ # identifier is the id with only spaces, no nbsp
131
+ # extract_opts are the compilation options extracted as document attributes
132
+ def file_entry_struct(ref, abs)
133
+ adoc = abs.sub(/\.xml$/, ".adoc")
134
+ if adoc.end_with?(".adoc") && File.exist?(adoc)
135
+ opts = Metanorma::Input::Asciidoc.new.extract_options(File.read(adoc))
136
+ end
137
+ { type: "fileref", ref: abs, rel_path: ref.file, url: ref.url,
138
+ out_path: output_file_path(ref), pdffile: ref.pdffile,
139
+ format: ref.format&.map(&:to_sym), extract_opts: opts }.compact
140
+ end
141
+
142
+ def file_entry_paths(ref, idx, sso)
143
+ base = File.basename(ref.file, ".xml")
144
+ if sso && ref.respond_to?(:sectionsplit_filename) &&
145
+ ref.sectionsplit_filename
146
+ # Extract directory from sectionsplit_filename
147
+ dir = File.dirname(ref.sectionsplit_filename)
148
+ if dir == "." # No directory in pattern
149
+ [output_file_path(ref, idx), ref.file]
150
+ else # Pattern has directory, prepend it
151
+ full_path = File.join(dir, base)
152
+ [full_path, "#{full_path}.xml"]
153
+ end
154
+ else [output_file_path(ref, idx), ref.file]
155
+ end
156
+ end
157
+
158
+ # Substitute special strings in filename patterns
159
+ # @param pattern [String] filename pattern with placeholders
160
+ # @param options [Hash] substitution values
161
+ # @option options [Integer] :document_num document index
162
+ # @option options [String] :basename filename without extension
163
+ # @option options [String] :basename_legacy full filename with extension
164
+ # @option options [Integer] :sectionsplit_num sectionsplit index
165
+ def substitute_filename_pattern(pattern, options = {})
166
+ pattern or return pattern
167
+ result = pattern.dup
168
+ options[:document_num] and
169
+ result.gsub!(/\{document-num\}/, options[:document_num].to_s)
170
+ result.gsub!(/\{basename\}/, options[:basename]) if options[:basename]
171
+ options[:basename_legacy] and
172
+ result.gsub!(/\{basename_legacy\}/, options[:basename_legacy])
173
+ options[:sectionsplit_num] and
174
+ result.gsub!(/\{sectionsplit-num\}/, options[:sectionsplit_num].to_s)
175
+ result
176
+ end
177
+
116
178
  # TODO make the output file location reflect source location universally,
117
179
  # not just for attachments: no File.basename
118
- def output_file_path(ref)
119
- f = File.basename(ref.file)
120
- ref.attachment and f = ref.file
121
- @disambig.source2dest_filename(f)
180
+ #
181
+ # For files with custom directory structure, construct path with directory
182
+ # For files with output_filename, use that (with substitutions)
183
+ # For others, use basename of ref.file
184
+ def output_file_path(ref, idx)
185
+ has_custom_dir, file_has_dir, params = output_file_path_prep(ref, idx)
186
+ # Apply sectionsplit_filename directory structure if:
187
+ # 1. File has sectionsplit enabled (parent document being split), OR
188
+ # 2. File is a sectionsplit output (from collection or single-file sectionsplit)
189
+ # Regular files that inherit sectionsplit_filename from collection level
190
+ # but are not sectionsplit outputs should NOT use it
191
+ is_sectionsplit_output = ref.respond_to?(:sectionsplit_output) && ref.sectionsplit_output
192
+ use_sectionsplit_dir = ref.sectionsplit_filename && has_custom_dir &&
193
+ (ref.sectionsplit || is_sectionsplit_output || file_has_dir)
194
+ f = if use_sectionsplit_dir
195
+ # For sectionsplit outputs, return just the basename
196
+ # The directory will be applied during file_compile_format
197
+ # via preserve_directory_structure?
198
+ File.basename(ref.file)
199
+ elsif ref.output_filename
200
+ substitute_filename_pattern(ref.output_filename, **params)
201
+ elsif file_has_dir
202
+ ref.file # Preserve directory structure already in ref.file
203
+ elsif ref.attachment
204
+ ref.file
205
+ else File.basename(ref.file)
206
+ end
207
+ ret = @disambig.source2dest_filename(f, preserve_dirs: ref.attachment)
208
+ warn ret
209
+ ret
210
+ end
211
+
212
+ def output_file_path_prep(ref, idx)
213
+ b = File.basename(ref.file)
214
+ b_no_ext = File.basename(ref.file, ".*")
215
+ # Check for sectionsplit_filename (for both parent and split output files)
216
+ # or output_filename
217
+ custom_filename = ref.sectionsplit_filename || ref.output_filename
218
+ has_custom_dir = custom_filename && File.dirname(custom_filename) != "."
219
+ # Also check if ref.file itself contains a directory
220
+ file_has_dir = File.dirname(ref.file) != "."
221
+ params = { document_num: idx, basename: b_no_ext, basename_legacy: b }
222
+ [has_custom_dir, file_has_dir, params]
122
223
  end
123
224
 
124
225
  def file_entry_copy(ref, ret)
125
226
  %w(attachment sectionsplit index presentation-xml url
126
- bare-after-first).each do |s|
227
+ bare-after-first output_filename sectionsplit_filename
228
+ sectionsplit_output).each do |s|
127
229
  ref.respond_to?(s.to_sym) and
128
230
  ret[s.delete("-").to_sym] = ref.send(s)
129
231
  end
@@ -131,23 +233,16 @@ module Metanorma
131
233
 
132
234
  def add_document_suffix(identifier, doc)
133
235
  document_suffix = Metanorma::Utils::to_ncname(identifier)
236
+ ids = doc.xpath("./@id | .//@id").map(&:value)
134
237
  Util::anchor_id_attributes.each do |(tag_name, attr_name)|
135
238
  Util::add_suffix_to_attrs(doc, document_suffix, tag_name, attr_name,
136
239
  @isodoc)
137
240
  end
138
- url_in_css_styles(doc, document_suffix)
241
+ Util::url_in_css_styles(doc, ids, document_suffix)
139
242
  doc.root["document_suffix"] ||= ""
140
243
  doc.root["document_suffix"] += document_suffix
141
244
  end
142
245
 
143
- # update relative URLs, url(#...), in CSS in @style attrs (including SVG)
144
- def url_in_css_styles(doc, document_suffix)
145
- doc.xpath("//*[@style]").each do |s|
146
- s["style"] = s["style"]
147
- .gsub(%r{url\(#([^()]+)\)}, "url(#\\1_#{document_suffix})")
148
- end
149
- end
150
-
151
246
  # return citation url for file
152
247
  # @param doc [Boolean] I am a Metanorma document,
153
248
  # so my URL should end with html or pdf or whatever
@@ -156,13 +251,6 @@ module Metanorma
156
251
  data[:url] || targetfile(data, options)[1]
157
252
  end
158
253
 
159
- # are references to the file to be linked to a file in the collection,
160
- # or externally? Determines whether file suffix anchors are to be used
161
- def url?(ident)
162
- data = get(ident) or return false
163
- data[:url]
164
- end
165
-
166
254
  # return file contents + output filename for each file in the collection,
167
255
  # given a docref entry
168
256
  # @param data [Hash] docref entry
@@ -176,7 +264,7 @@ module Metanorma
176
264
  options = { read: false, doc: true, relative: false }.merge(options)
177
265
  path = options[:relative] ? data[:rel_path] : data[:ref]
178
266
  if data[:type] == "fileref"
179
- ref_file path, data[:out_path], options[:read], options[:doc]
267
+ ref_file path, data, options[:read], options[:doc]
180
268
  else
181
269
  xml_file data[:id], options[:read]
182
270
  end
@@ -186,13 +274,40 @@ module Metanorma
186
274
  targetfile(get(ident), options)
187
275
  end
188
276
 
189
- def ref_file(ref, out, read, doc)
277
+ def ref_file(ref, data, read, doc)
190
278
  file = File.read(ref, encoding: "utf-8") if read
191
- filename = out.dup
192
- filename.sub!(/\.xml$/, ".html") if doc
279
+ # Use the actual output path from :outputs if available (set after compilation)
280
+ # Otherwise fall back to :out_path (set at initialization)
281
+ filename = if doc && data[:outputs] && data[:outputs][:html]
282
+ data[:outputs][:html].sub(
283
+ %r{^#{Regexp.escape(@parent.outdir)}/}, ""
284
+ )
285
+ else
286
+ data[:out_path].dup
287
+ end
288
+ if doc && !data[:outputs]
289
+ filename = ref_file_xml2html(filename)
290
+ end
193
291
  [file, filename]
194
292
  end
195
293
 
294
+ # Check if file has a recognized MIME type (other than XML)
295
+ # If so, don't append .html (e.g., .svg, .png, .jpg, etc.)
296
+ # Only process if it doesn't have a recognized non-XML extension
297
+ # If filename ends in .xml, replace with .html
298
+ # Otherwise (including sectionsplit files like "file.xml.0" or
299
+ # custom titles), append .html
300
+ def ref_file_xml2html(filename)
301
+ unless Util::mime_file_recognised?(filename) &&
302
+ !filename.end_with?(".xml")
303
+ filename = if filename.end_with?(".xml")
304
+ filename.sub(/\.xml$/, ".html")
305
+ else "#{filename}.html"
306
+ end
307
+ end
308
+ filename
309
+ end
310
+
196
311
  def xml_file(id, read)
197
312
  file = @xml.at(ns("//doc-container[@id = '#{id}']")).to_xml if read
198
313
  filename = "#{id}.html"
@@ -235,35 +350,19 @@ module Metanorma
235
350
  ret
236
351
  end
237
352
 
238
- def key(ident)
239
- @c.decode(ident).gsub(/(\p{Zs})+/, " ")
240
- .sub(/^metanorma-collection /, "")
241
- end
242
-
243
- def keys
244
- @files.keys
245
- end
246
-
247
- def get(ident, attr = nil)
248
- if attr then @files[key(ident)][attr]
249
- else @files[key(ident)]
250
- end
251
- end
252
-
253
- def set(ident, attr, value)
254
- @files[key(ident)][attr] = value
255
- end
256
-
257
- def each
258
- @files.each
259
- end
260
-
261
- def each_with_index
262
- @files.each_with_index
263
- end
264
-
265
- def ns(xpath)
266
- @isodoc.ns(xpath)
353
+ # Check if we should preserve directory structure for an identifier
354
+ # Returns the custom filename if directory structure should be preserved,
355
+ # nil otherwise
356
+ def preserve_directory_structure?(ident)
357
+ ret = if get(ident, :sectionsplit_output)
358
+ # For sectionsplit outputs, use rel_path which has the directory
359
+ get(ident, :rel_path) || get(ident, :out_path)
360
+ elsif get(ident, :sectionsplit)
361
+ get(ident, :sectionsplit_filename)
362
+ else get(ident, :output_filename)
363
+ end
364
+ # Return the custom filename only if it contains a directory
365
+ ret && File.dirname(ret) != "." ? ret : nil
267
366
  end
268
367
  end
269
368
  end
@@ -1,5 +1,4 @@
1
1
  require_relative "../sectionsplit/sectionsplit"
2
- # require "concurrent-ruby"
3
2
 
4
3
  module Metanorma
5
4
  class Collection
@@ -7,8 +6,8 @@ module Metanorma
7
6
  def add_section_split
8
7
  ret = @files.keys.each_with_object({}) do |k, m|
9
8
  if @files[k][:sectionsplit] && !@files[k][:attachment]
10
- process_section_split_instance(k, m)
11
- cleanup_section_split_instance(k, m)
9
+ original_out_path = process_section_split_instance(k, m)
10
+ cleanup_section_split_instance(k, m, original_out_path)
12
11
  end
13
12
  m[k] = @files[k]
14
13
  end
@@ -16,6 +15,8 @@ module Metanorma
16
15
  end
17
16
 
18
17
  def process_section_split_instance(key, manifest)
18
+ # Save the original out_path before it gets modified
19
+ original_out_path = @files[key][:out_path]
19
20
  s, sectionsplit_manifest = sectionsplit(key)
20
21
  # section_split_instance_threads(s, manifest, key)
21
22
  s.each_with_index do |f1, i|
@@ -24,6 +25,8 @@ module Metanorma
24
25
  a = add_section_split_attachments(sectionsplit_manifest, key) and
25
26
  manifest["#{key}:attachments"] = a
26
27
  add_section_split_cover(manifest, sectionsplit_manifest, key)
28
+ # Return the original path for cleanup
29
+ original_out_path
27
30
  end
28
31
 
29
32
  def section_split_instance_threads(s, manifest, key)
@@ -38,8 +41,17 @@ module Metanorma
38
41
  pool.wait_for_termination
39
42
  end
40
43
 
41
- def cleanup_section_split_instance(key, manifest)
44
+ def cleanup_section_split_instance(key, manifest, original_out_path)
45
+ # Delete the sectionsplit index.html from source directory after it's copied to output
42
46
  @files_to_delete << manifest["#{key}:index.html"][:ref]
47
+ # Delete the original files when sectionsplit happens (all formats: html, xml, presentation.xml)
48
+ # Use the saved original out_path (before it was changed to index.html)
49
+ if original_out_path
50
+ base = File.join(@parent.outdir, original_out_path.sub(/\.xml$/, ""))
51
+ @files_to_delete << "#{base}.html"
52
+ @files_to_delete << "#{base}.xml"
53
+ @files_to_delete << "#{base}.presentation.xml"
54
+ end
43
55
  # @files[key].delete(:ids).delete(:anchors)
44
56
  @files[key][:indirect_key] = @sectionsplit.key
45
57
  end
@@ -86,20 +98,42 @@ module Metanorma
86
98
  def add_section_split_instance(file, manifest, key, idx)
87
99
  presfile, newkey, xml = add_section_split_instance_prep(file, key)
88
100
  anchors = read_anchors(xml)
101
+ # Preserve directory structure in out_path if parent has custom sectionsplit_filename with directory
102
+ sectionsplit_fname = @files[key][:sectionsplit_filename]
103
+
104
+ # file[:url] from sectionsplit.rb already has placeholders substituted and includes full path
105
+ # Use it directly for out_path (without .xml extension)
106
+ base_filename = File.basename(file[:url], ".xml")
107
+
108
+ # Get the directory from file[:url] which already has placeholders substituted
109
+ file_dir = File.dirname(file[:url])
110
+
111
+ # If file[:url] has a directory (i.e., placeholders were substituted), use it
112
+ out_path_value = if file_dir == "."
113
+ base_filename
114
+ else
115
+ File.join(file_dir, base_filename)
116
+ end
117
+
89
118
  m = { parentid: key, presentationxml: true, type: "fileref",
90
- rel_path: file[:url], out_path: File.basename(file[:url]),
119
+ rel_path: out_path_value, out_path: out_path_value,
91
120
  anchors: anchors, anchors_lookup: anchors_lookup(anchors),
92
121
  ids: read_ids(xml), format: @files[key][:format],
93
122
  sectionsplit_output: true, indirect_key: @sectionsplit.key,
94
- bibdata: @files[key][:bibdata], ref: presfile }
123
+ bibdata: @files[key][:bibdata], ref: presfile,
124
+ sectionsplit_filename: sectionsplit_fname,
125
+ idx: @files[key][:idx] }
95
126
  m[:bare] = true unless idx.zero?
96
127
  manifest[newkey] = m
97
- @files_to_delete << file[:url]
128
+ # Don't delete split output files - we want to keep them!
129
+ # The original parent HTML file is deleted in cleanup_section_split_instance
98
130
  end
99
131
 
100
132
  def add_section_split_instance_prep(file, key)
101
- presfile = File.join(File.dirname(@files[key][:ref]),
102
- File.basename(file[:url]))
133
+ # XML files are always stored flat in the _files directory (no subdirectories)
134
+ # file[:url] contains full path with directory for HTML output, but XML is basename only
135
+ xml_basename = File.basename(file[:url])
136
+ presfile = File.join(File.dirname(@files[key][:ref]), xml_basename)
103
137
  newkey = key("#{key.strip} #{file[:title]}")
104
138
  xml = Nokogiri::XML(File.read(presfile), &:huge)
105
139
  [presfile, newkey, xml]
@@ -107,11 +141,16 @@ module Metanorma
107
141
 
108
142
  def sectionsplit(ident)
109
143
  file = @files[ident][:ref]
144
+ # @base must always be just basename, never contain directory components
145
+ # Directory structure comes from sectionsplit_filename pattern only
146
+ base = File.basename(@files[ident][:out_path] || file)
110
147
  @sectionsplit = ::Metanorma::Collection::Sectionsplit
111
- .new(input: file, base: @files[ident][:out_path],
148
+ .new(input: file, base: base,
112
149
  dir: File.dirname(file), output: @files[ident][:out_path],
113
150
  compile_opts: @parent.compile_options, ident: ident,
114
151
  fileslookup: self, isodoc: @isodoc,
152
+ parent_idx: @files[ident][:idx],
153
+ sectionsplit_filename: @files[ident][:sectionsplit_filename],
115
154
  isodoc_presxml: @isodoc_presxml,
116
155
  document_suffix: @files[ident][:document_suffix])
117
156
  coll = @sectionsplit.sectionsplit.sort_by { |f| f[:order] }
@@ -0,0 +1,93 @@
1
+ module Metanorma
2
+ class Collection
3
+ class FileLookup
4
+ # Also parse all ids in doc (including ones which won't be xref targets)
5
+ def read_ids(xml)
6
+ ret = {}
7
+ xml.traverse do |x|
8
+ x.text? and next
9
+ x["id"] and ret[x["id"]] = true
10
+ end
11
+ ret
12
+ end
13
+
14
+ # map locality type and label (e.g. "clause" "1") to id = anchor for
15
+ # a document
16
+ # Note: will only key clauses, which have unambiguous reference label in
17
+ # locality. Notes, examples etc with containers are just plunked against
18
+ # UUIDs, so that their IDs can at least be registered to be tracked
19
+ # as existing.
20
+ def read_anchors(xml)
21
+ xrefs = @isodoc.xref_init(@lang, @script, @isodoc, @isodoc.i18n,
22
+ { locale: @locale })
23
+ xrefs.parse xml
24
+ xrefs.get.each_with_object({}) do |(k, v), ret|
25
+ read_anchors1(k, v, ret)
26
+ end
27
+ end
28
+
29
+ def read_anchors1(key, val, ret)
30
+ val[:type] ||= "clause"
31
+ ret[val[:type]] ||= {}
32
+ index = if val[:container] || val[:label].nil? || val[:label].empty?
33
+ UUIDTools::UUID.random_create.to_s
34
+ else val[:label].gsub(%r{<[^<>]+>}, "")
35
+ end
36
+ ret[val[:type]][index] = key
37
+ v = val[:value] and ret[val[:type]][v.gsub(%r{<[^<>]+>}, "")] = key
38
+ end
39
+
40
+ def anchors_lookup(anchors)
41
+ anchors.values.each_with_object({}) do |v, m|
42
+ v.each_value { |v1| m[v1] = true }
43
+ end
44
+ end
45
+
46
+ # return citation url for file
47
+ # @param doc [Boolean] I am a Metanorma document,
48
+ # so my URL should end with html or pdf or whatever
49
+ def url(ident, options)
50
+ data = get(ident)
51
+ data[:url] || targetfile(data, options)[1]
52
+ end
53
+
54
+ # are references to the file to be linked to a file in the collection,
55
+ # or externally? Determines whether file suffix anchors are to be used
56
+ def url?(ident)
57
+ data = get(ident) or return false
58
+ data[:url]
59
+ end
60
+
61
+ def key(ident)
62
+ @c.decode(ident).gsub(/(\p{Zs})+/, " ")
63
+ .sub(/^metanorma-collection /, "")
64
+ end
65
+
66
+ def keys
67
+ @files.keys
68
+ end
69
+
70
+ def get(ident, attr = nil)
71
+ if attr then @files[key(ident)][attr]
72
+ else @files[key(ident)]
73
+ end
74
+ end
75
+
76
+ def set(ident, attr, value)
77
+ @files[key(ident)][attr] = value
78
+ end
79
+
80
+ def each
81
+ @files.each
82
+ end
83
+
84
+ def each_with_index
85
+ @files.each_with_index
86
+ end
87
+
88
+ def ns(xpath)
89
+ @isodoc.ns(xpath)
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,82 @@
1
+ module Metanorma
2
+ class Collection
3
+ class << self
4
+ # @param Block [Proc]
5
+ # @note allow user-specific function to run in pre-parse model stage
6
+ def set_pre_parse_model(&block)
7
+ @pre_parse_model_proc = block
8
+ end
9
+
10
+ # @param Block [Proc]
11
+ # @note allow user-specific function to resolve identifier
12
+ def set_identifier_resolver(&block)
13
+ @identifier_resolver = block
14
+ end
15
+
16
+ # @param Block [Proc]
17
+ # @note allow user-specific function to resolve fileref
18
+ # NOTE: MUST ALWAYS RETURN PATH relative to working directory
19
+ # (initial YAML file location). @fileref_resolver.call(ref_folder, fileref)
20
+ # fileref is not what is in the YAML, but the resolved path
21
+ # relative to the working directory
22
+ def set_fileref_resolver(&block)
23
+ @fileref_resolver = block
24
+ end
25
+
26
+ def unset_fileref_resolver
27
+ @fileref_resolver = nil
28
+ end
29
+
30
+ # @param collection_model [Hash{String=>String}]
31
+ def pre_parse_model(collection_model)
32
+ @pre_parse_model_proc or return
33
+ @pre_parse_model_proc.call(collection_model)
34
+ end
35
+
36
+ # @param identifier [String]
37
+ # @return [String]
38
+ def resolve_identifier(identifier)
39
+ @identifier_resolver or return identifier
40
+ @identifier_resolver.call(identifier)
41
+ end
42
+
43
+ # @param fileref [String]
44
+ # @return [String]
45
+ def resolve_fileref(ref_folder, fileref)
46
+ warn ref_folder
47
+ warn fileref
48
+ unless @fileref_resolver
49
+ (Pathname.new fileref).absolute? or
50
+ fileref = File.join(ref_folder, fileref)
51
+ return fileref
52
+ end
53
+
54
+ @fileref_resolver.call(ref_folder, fileref)
55
+ end
56
+
57
+ # @param filepath
58
+ # @raise [FileNotFoundException]
59
+ def check_file_existence(filepath)
60
+ unless File.exist?(filepath)
61
+ error_message = "#{filepath} not found!"
62
+ ::Metanorma::Util.log("[metanorma] Error: #{error_message}", :error)
63
+ raise FileNotFoundException.new error_message.to_s
64
+ end
65
+ end
66
+
67
+ def parse(file)
68
+ # need @dirname initialised before collection object initialisation
69
+ @dirname = File.expand_path(File.dirname(file))
70
+ config = case file
71
+ when /\.xml$/
72
+ ::Metanorma::Collection::Config::Config.from_xml(File.read(file))
73
+ when /.ya?ml$/
74
+ y = YAML.safe_load(File.read(file))
75
+ pre_parse_model(y)
76
+ ::Metanorma::Collection::Config::Config.from_yaml(y.to_yaml)
77
+ end
78
+ new(file: file, config: config)
79
+ end
80
+ end
81
+ end
82
+ end
@@ -14,7 +14,6 @@ module Metanorma
14
14
  # @param docref [Array<Hash{String=>String}>]
15
15
  # @param manifest [Array<Metanorma::Collection::Manifest>]
16
16
  def initialize(config, collection, dir)
17
- #require "debug"; binding.b
18
17
  @collection = collection
19
18
  @dir = dir
20
19
  @disambig = ::Metanorma::Collection::Util::DisambigFiles.new
@@ -22,9 +21,9 @@ module Metanorma
22
21
  end
23
22
 
24
23
  def manifest_postprocess(config)
25
- #require "debug"; binding.b
26
24
  manifest_bibdata(config)
27
25
  manifest_expand_yaml(config, @dir)
26
+ manifest_output_filenames(config)
28
27
  manifest_compile_adoc(config)
29
28
  manifest_filexist(config)
30
29
  manifest_sectionsplit(config)
@@ -40,6 +39,17 @@ module Metanorma
40
39
 
41
40
  GUID = "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"
42
41
 
42
+ def manifest_output_filenames(config, parent = nil)
43
+ config.output_filename ||=
44
+ parent&.output_filename # || "{basename}.{document-num}"
45
+ config.sectionsplit_filename ||= parent&.sectionsplit_filename ||
46
+ "{basename_legacy}.{sectionsplit-num}"
47
+ # "{basename}-{document-num}.{sectionsplit-num}"
48
+ Array(config.entry).each do |f|
49
+ manifest_output_filenames(f, config)
50
+ end
51
+ end
52
+
43
53
  def manifest_identifier(config)
44
54
  no_id = populate_id_from_doc(config)
45
55
  config.identifier =
@@ -169,7 +179,8 @@ module Metanorma
169
179
  ::Metanorma::Compile.new
170
180
  .compile(f, agree_to_terms: true, install_fonts: false,
171
181
  extension_keys: [:xml])
172
- ::Metanorma::Util.log("[metanorma] Info: Compiling #{f}...done!", :warning)
182
+ ::Metanorma::Util.log("[metanorma] Info: Compiling #{f}...done!",
183
+ :warning)
173
184
  end
174
185
 
175
186
  def compile_adoc_file?(file)
@@ -617,7 +617,7 @@ module Metanorma
617
617
  end
618
618
 
619
619
  def htmlconv
620
- x = Asciidoctor.load nil, backend: @flavor
620
+ x = Asciidoctor.load nil, backend: Util::taste2flavor(@flavor)
621
621
  x.converter.html_converter(@converter_opt)
622
622
  end
623
623