asciisourcerer 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -208,7 +208,8 @@ module Sourcerer
208
208
  source_path,
209
209
  conversion_source_text,
210
210
  backend: selected_backend,
211
- header_footer: options[:header_footer])
211
+ header_footer: options[:header_footer],
212
+ attributes: options[:attributes])
212
213
 
213
214
  frontmatter = options[:include_frontmatter] ? extract_frontmatter(source_text, document.attributes) : {}
214
215
  html_body = document.convert
@@ -239,7 +240,7 @@ module Sourcerer
239
240
 
240
241
  # @api private
241
242
  def self.normalize_mark_down_grade_options options
242
- supported_option_keys = %i[html_output_path backend header_footer include_frontmatter markdown_options]
243
+ supported_option_keys = %i[html_output_path backend header_footer include_frontmatter markdown_options attributes]
243
244
  unknown_option_keys = options.keys - supported_option_keys
244
245
  raise ArgumentError, "unknown option(s): #{unknown_option_keys.join(', ')}" unless unknown_option_keys.empty?
245
246
 
@@ -248,7 +249,8 @@ module Sourcerer
248
249
  backend: options.fetch(:backend, 'asciidoctor-html5s'),
249
250
  header_footer: options.fetch(:header_footer, false),
250
251
  include_frontmatter: options.fetch(:include_frontmatter, true),
251
- markdown_options: options.fetch(:markdown_options, { github_flavored: true })
252
+ markdown_options: options.fetch(:markdown_options, { github_flavored: true }),
253
+ attributes: options.fetch(:attributes, {})
252
254
  }
253
255
  end
254
256
 
@@ -375,8 +377,9 @@ module Sourcerer
375
377
  # @param source_text [String]
376
378
  # @param backend [String]
377
379
  # @param header_footer [Boolean]
380
+ # @param attributes [Hash]
378
381
  # @return [Asciidoctor::Document]
379
- def self.load_document_for_markdown_grade source_path, source_text, backend:, header_footer:
382
+ def self.load_document_for_markdown_grade source_path, source_text, backend:, header_footer:, attributes: {}
380
383
  expanded_source_path = File.expand_path(source_path)
381
384
  Asciidoctor.load(
382
385
  source_text,
@@ -385,11 +388,12 @@ module Sourcerer
385
388
  backend: backend,
386
389
  header_footer: header_footer,
387
390
  base_dir: File.dirname(source_path),
388
- attributes: {
389
- 'docfile' => expanded_source_path,
390
- 'docdir' => File.dirname(expanded_source_path),
391
- 'docname' => File.basename(source_path, File.extname(source_path))
392
- })
391
+ attributes: attributes.merge(
392
+ {
393
+ 'docfile' => expanded_source_path,
394
+ 'docdir' => File.dirname(expanded_source_path),
395
+ 'docname' => File.basename(source_path, File.extname(source_path))
396
+ }))
393
397
  end
394
398
 
395
399
  # Extracts commands from listing and literal blocks with a specific role.
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'asciidoctor'
4
+
5
+ module Sourcerer
6
+ # Utilities for filtering and partitioning Asciidoctor document attributes.
7
+ #
8
+ # The primary use case is separating user-defined ("custom") attributes from
9
+ # those injected by Asciidoctor at parse time ("built-in"). This distinction
10
+ # matters when a skim consumer needs to inspect only the attributes an author
11
+ # explicitly set in their source.
12
+ #
13
+ # Additional attribute manipulation helpers may be added here over time.
14
+ #
15
+ # @example
16
+ # custom = Sourcerer::AttributesFilter.user_attributes(doc)
17
+ # builtin = Sourcerer::AttributesFilter.builtin_attributes(doc)
18
+ module AttributesFilter
19
+ # Attribute keys injected by Asciidoctor at parse time rather than defined
20
+ # by the document author.
21
+ BUILTIN_ATTR_KEYS = (Asciidoctor::DEFAULT_ATTRIBUTES.keys + %w[
22
+ asciidoctor asciidoctor-version
23
+ attribute-missing attribute-undefined
24
+ authorcount
25
+ docdate docdatetime docdir docfile docfilesuffix docname doctime doctitle doctype docyear
26
+ embedded
27
+ htmlsyntax
28
+ iconsdir
29
+ localdate localdatetime localtime localyear
30
+ max-include-depth
31
+ notitle
32
+ outfilesuffix
33
+ stylesdir
34
+ toc-position
35
+ user-home
36
+ ]).freeze
37
+
38
+ BUILTIN_ATTR_PATTERNS = [
39
+ /^backend(-|$)/,
40
+ /^basebackend(-|$)/,
41
+ /^doctype-/,
42
+ /^filetype(-|$)/,
43
+ /^safe-mode-/
44
+ ].freeze
45
+
46
+ module_function
47
+
48
+ # Returns a hash of user-defined attributes, excluding any key that belongs
49
+ # to Asciidoctor's built-in set.
50
+ #
51
+ # @param doc [Asciidoctor::Document]
52
+ # @return [Hash{String => String}]
53
+ def user_attributes doc
54
+ doc.attributes.reject do |k, _|
55
+ BUILTIN_ATTR_KEYS.include?(k) ||
56
+ BUILTIN_ATTR_PATTERNS.any? { |pat| pat.match?(k) }
57
+ end
58
+ end
59
+
60
+ # Returns a hash of built-in Asciidoctor attributes, i.e., those injected at
61
+ # parse time rather than authored in the document.
62
+ #
63
+ # @param doc [Asciidoctor::Document]
64
+ # @return [Hash{String => String}]
65
+ def builtin_attributes doc
66
+ doc.attributes.select do |k, _|
67
+ BUILTIN_ATTR_KEYS.include?(k) ||
68
+ BUILTIN_ATTR_PATTERNS.any? { |pat| pat.match?(k) }
69
+ end
70
+ end
71
+ end
72
+ end
@@ -178,6 +178,35 @@ module Sourcerer
178
178
  }
179
179
  template.render(context, options)
180
180
  end
181
+
182
+ # Render a Liquid template string directly with a data hash.
183
+ #
184
+ # Unlike {.render_template}, this method accepts an in-memory string and
185
+ # a plain Ruby Hash rather than paths to data files. Suitable for
186
+ # rendering individual block content (e.g. in Sync/Cast) without setting
187
+ # up a full template pipeline.
188
+ #
189
+ # Keys in `data` are stringified to satisfy Liquid's string-key contract.
190
+ # Nested key stringification is shallow; callee is responsible for deeper
191
+ # transformations if required.
192
+ #
193
+ # The Jekyll/Liquid runtime is initialized before rendering so that any
194
+ # custom filters or tags registered elsewhere in Sourcerer are available.
195
+ #
196
+ # @param content [String] Liquid template source.
197
+ # @param data [Hash] Variables available to the template.
198
+ # @return [String] Rendered output.
199
+ def self.render_liquid_string content, data
200
+ require_relative 'jekyll'
201
+ require_relative 'jekyll/liquid/filters'
202
+ require_relative 'jekyll/liquid/tags'
203
+ require 'liquid' unless defined?(Liquid::Template)
204
+ Sourcerer::Jekyll.initialize_liquid_runtime
205
+
206
+ template = Liquid::Template.parse(content)
207
+ template.render(data.transform_keys(&:to_s))
208
+ end
209
+
181
210
  private_class_method :load_render_data,
182
211
  :resolve_converter,
183
212
  :render_erb,
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sourcerer
4
+ module SourceSkim
5
+ # All recognized element categories. Sections are shape-controlled via +forms+
6
+ # rather than listed here.
7
+ ALL_CATEGORIES = %i[
8
+ attributes_custom
9
+ attributes_builtin
10
+ definition_lists
11
+ code_blocks
12
+ literal_blocks
13
+ examples
14
+ sidebars
15
+ tables
16
+ admonitions
17
+ quotes
18
+ images
19
+ ].freeze
20
+
21
+ # Categories included when a caller passes +categories: nil+ (the default).
22
+ # +attributes_builtin+, +admonitions+, and +quotes+ are opt-in only.
23
+ DEFAULT_CATEGORIES = (ALL_CATEGORIES - %i[attributes_builtin admonitions quotes]).freeze
24
+
25
+ # Configuration profile for a single SourceSkim pass.
26
+ #
27
+ # Controls which section shapes and element categories are emitted.
28
+ # Callers pass a +Config+ instance to {Skimmer#process}; it is not part of
29
+ # the public-facing module API and should be constructed via the keyword
30
+ # arguments on {Sourcerer::SourceSkim.skim_file} and friends.
31
+ # @api private
32
+ class Config
33
+ attr_reader :forms, :categories
34
+
35
+ def initialize forms: [:tree], categories: nil
36
+ @forms = Array(forms).map(&:to_sym)
37
+ @categories = categories ? Array(categories).map(&:to_sym) : DEFAULT_CATEGORIES.dup
38
+ end
39
+
40
+ def include? category
41
+ @categories.include?(category.to_sym)
42
+ end
43
+
44
+ def tree?
45
+ @forms.include?(:tree)
46
+ end
47
+
48
+ def flat?
49
+ @forms.include?(:flat)
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,298 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sourcerer
4
+ module SourceSkim
5
+ # Traverses a parsed Asciidoctor document and produces a JSON-ready skim hash.
6
+ #
7
+ # A new instance should be created per-document call; instance variables
8
+ # accumulate state during a single +process+ pass.
9
+ #
10
+ # This class is an internal implementation detail. External callers should
11
+ # use the {Sourcerer::SourceSkim} module-level methods rather than
12
+ # instantiating +Skimmer+ directly.
13
+ # @api private
14
+ class Skimmer
15
+ # Scan listing/literal block source for include directives that remain as
16
+ # raw text (i.e., were not resolved by the parser).
17
+ INCLUDE_DIRECTIVE_PATTERN = /include::[^\[]+\[[^\]]*\]/
18
+
19
+ def process document, config: Config.new
20
+ @config = config
21
+ @main_file = document.attr('docfile')
22
+ @definition_lists = []
23
+ @code_blocks = []
24
+ @literal_blocks = []
25
+ @examples = []
26
+ @sidebars = []
27
+ @tables = []
28
+ @admonitions = []
29
+ @quotes = []
30
+ @images = []
31
+
32
+ process_blocks(document.blocks, 0, nil)
33
+
34
+ tree = build_sections_tree(document.sections)
35
+ doc_end = line_count_for(@main_file)
36
+ assign_line_ends(tree, doc_end)
37
+
38
+ result = {
39
+ title: document.doctitle,
40
+ lines: doc_end
41
+ }
42
+
43
+ if @config.include?(:attributes_custom)
44
+ result[:attributes_custom] =
45
+ Sourcerer::AttributesFilter.user_attributes(document)
46
+ end
47
+ if @config.include?(:attributes_builtin)
48
+ result[:attributes_builtin] =
49
+ Sourcerer::AttributesFilter.builtin_attributes(document)
50
+ end
51
+
52
+ result[:sections_tree] = tree if @config.tree?
53
+ result[:sections_flat] = flatten_sections(tree) if @config.flat?
54
+
55
+ result[:definition_lists] = @definition_lists if @config.include?(:definition_lists)
56
+ result[:code_blocks] = @code_blocks if @config.include?(:code_blocks)
57
+ result[:literal_blocks] = @literal_blocks if @config.include?(:literal_blocks)
58
+ result[:examples] = @examples if @config.include?(:examples)
59
+ result[:sidebars] = @sidebars if @config.include?(:sidebars)
60
+ result[:tables] = @tables if @config.include?(:tables)
61
+ result[:admonitions] = @admonitions if @config.include?(:admonitions)
62
+ result[:quotes] = @quotes if @config.include?(:quotes)
63
+ result[:images] = @images if @config.include?(:images)
64
+
65
+ result
66
+ end
67
+
68
+ private
69
+
70
+ def line_count_for file_path
71
+ return nil unless file_path && File.exist?(file_path)
72
+
73
+ File.foreach(file_path).inject(0) { |c, _| c + 1 }
74
+ end
75
+
76
+ # Returns the relative filename when +loc+ originates from an included file,
77
+ # nil otherwise (meaning the block came from the main document).
78
+ def file_for loc
79
+ return nil unless loc
80
+
81
+ f = loc.file
82
+ return nil unless f
83
+ return nil if @main_file && f == @main_file
84
+
85
+ loc.path || File.basename(f)
86
+ end
87
+
88
+ def build_sections_tree sections, level = 0
89
+ sections.map do |section|
90
+ loc = section.source_location
91
+ f = file_for(loc)
92
+ record = {}
93
+ record[:file] = f if f
94
+ record.merge!(
95
+ id: section.id,
96
+ text: section.title,
97
+ level: level + 1,
98
+ starts_at: loc&.lineno,
99
+ sections: build_sections_tree(section.sections, level + 1))
100
+ record
101
+ end
102
+ end
103
+
104
+ # Assigns +ends_near+ to each section in-place. +parent_end_line+ is the
105
+ # last line of the enclosing scope (document total or parent section end).
106
+ def assign_line_ends sections, parent_end_line
107
+ sections.each_with_index do |rec, i|
108
+ next_start = sections[i + 1]&.dig(:starts_at)
109
+ end_line = next_start ? next_start - 1 : parent_end_line
110
+ # Omit ends_near from include-sourced nodes: their starts_at is a line
111
+ # number in the included file, so mixing it with a main-file end bound
112
+ # would produce a misleading range.
113
+ rec[:ends_near] = end_line unless rec.key?(:file)
114
+ assign_line_ends(rec[:sections], end_line)
115
+ end
116
+ end
117
+
118
+ # Returns a pre-order flat array from the annotated tree. Each record
119
+ # carries +parent_id+ (nil for root) and +sections+ as an array of child IDs.
120
+ def flatten_sections sections, acc = [], parent_id = nil
121
+ sections.each do |rec|
122
+ children = rec[:sections]
123
+ flat_rec = rec.except(:sections)
124
+ flat_rec[:parent_id] = parent_id
125
+ flat_rec[:sections] = children.map { |c| c[:id] }
126
+ acc << flat_rec
127
+ flatten_sections(children, acc, rec[:id])
128
+ end
129
+ acc
130
+ end
131
+
132
+ def detect_includes source
133
+ return [] unless source
134
+
135
+ source.scan(INCLUDE_DIRECTIVE_PATTERN)
136
+ end
137
+
138
+ def process_blocks blocks, level, section_id
139
+ # rubocop:disable Metrics/BlockLength
140
+ blocks.each do |block|
141
+ case block.context
142
+ when :section
143
+ process_blocks(block.blocks, level + 1, block.id)
144
+
145
+ when :dlist
146
+ next unless @config.include?(:definition_lists)
147
+
148
+ loc = block.source_location
149
+ f = file_for(loc)
150
+ entry = { id: block.id }
151
+ entry[:file] = f if f
152
+ entry[:title] = block.title
153
+ entry[:role] = block.style if block.style && !block.style.empty?
154
+ entry.merge!(
155
+ starts_at: loc&.lineno,
156
+ section_id: section_id,
157
+ definition_terms: block.items.flat_map do |terms, _|
158
+ terms.map do |term|
159
+ tloc = term.source_location
160
+ { text: term.text, starts_at: tloc&.lineno }
161
+ end
162
+ end)
163
+ @definition_lists << entry
164
+
165
+ when :listing
166
+ next unless @config.include?(:code_blocks)
167
+ next unless block.title
168
+
169
+ loc = block.source_location
170
+ f = file_for(loc)
171
+ entry = { id: block.id }
172
+ entry[:file] = f if f
173
+ entry.merge!(title: block.title, starts_at: loc&.lineno)
174
+ entry[:language] = block.attr('language') if block.style == 'source'
175
+ entry[:section_id] = section_id
176
+ entry[:includes] = detect_includes(block.source)
177
+ @code_blocks << entry
178
+
179
+ when :literal
180
+ next unless @config.include?(:literal_blocks)
181
+ next unless block.title
182
+
183
+ loc = block.source_location
184
+ f = file_for(loc)
185
+ entry = { id: block.id }
186
+ entry[:file] = f if f
187
+ entry.merge!(
188
+ title: block.title,
189
+ starts_at: loc&.lineno,
190
+ section_id: section_id,
191
+ includes: detect_includes(block.source))
192
+ @literal_blocks << entry
193
+
194
+ when :example
195
+ next unless @config.include?(:examples)
196
+ next unless block.title
197
+
198
+ loc = block.source_location
199
+ f = file_for(loc)
200
+ entry = { id: block.id }
201
+ entry[:file] = f if f
202
+ entry.merge!(
203
+ title: block.title,
204
+ starts_at: loc&.lineno,
205
+ section_id: section_id,
206
+ includes: [])
207
+ @examples << entry
208
+ process_blocks(block.blocks, level, section_id) if block.blocks.any?
209
+
210
+ when :sidebar
211
+ next unless @config.include?(:sidebars)
212
+ next unless block.title
213
+
214
+ loc = block.source_location
215
+ f = file_for(loc)
216
+ entry = { id: block.id }
217
+ entry[:file] = f if f
218
+ entry.merge!(
219
+ title: block.title,
220
+ starts_at: loc&.lineno,
221
+ section_id: section_id,
222
+ includes: [])
223
+ @sidebars << entry
224
+ process_blocks(block.blocks, level, section_id) if block.blocks.any?
225
+
226
+ when :table
227
+ next unless @config.include?(:tables)
228
+
229
+ header_row = block.rows.head.first
230
+ headers = header_row&.map(&:text) if header_row && !header_row.empty?
231
+ next unless block.title || headers
232
+
233
+ loc = block.source_location
234
+ f = file_for(loc)
235
+ entry = { id: block.id }
236
+ entry[:file] = f if f
237
+ entry[:title] = block.title
238
+ entry[:headers] = headers if headers
239
+ entry.merge!(starts_at: loc&.lineno, section_id: section_id)
240
+ @tables << entry
241
+
242
+ when :admonition
243
+ next unless @config.include?(:admonitions)
244
+ next unless block.title
245
+
246
+ loc = block.source_location
247
+ f = file_for(loc)
248
+ entry = { id: block.id }
249
+ entry[:file] = f if f
250
+ entry.merge!(
251
+ type: block.style,
252
+ title: block.title,
253
+ starts_at: loc&.lineno,
254
+ section_id: section_id)
255
+ @admonitions << entry
256
+ process_blocks(block.blocks, level, section_id) if block.respond_to?(:blocks) && block.blocks.any?
257
+
258
+ when :quote, :verse
259
+ next unless @config.include?(:quotes)
260
+
261
+ attribution = block.attr('attribution')
262
+ next unless block.title || attribution
263
+
264
+ loc = block.source_location
265
+ f = file_for(loc)
266
+ entry = { id: block.id }
267
+ entry[:file] = f if f
268
+ entry[:title] = block.title
269
+ entry[:attribution] = attribution if attribution
270
+ entry.merge!(starts_at: loc&.lineno, section_id: section_id)
271
+ @quotes << entry
272
+
273
+ when :image
274
+ next unless @config.include?(:images)
275
+
276
+ loc = block.source_location
277
+ f = file_for(loc)
278
+ entry = { id: block.id }
279
+ entry[:file] = f if f
280
+ entry[:title] = block.title if block.title
281
+ entry.merge!(
282
+ target: block.attr('target'),
283
+ alt: block.attr('alt'),
284
+ starts_at: loc&.lineno,
285
+ section_id: section_id)
286
+ entry[:width] = block.attr('width') if block.attr('width')
287
+ entry[:height] = block.attr('height') if block.attr('height')
288
+ @images << entry
289
+
290
+ else
291
+ process_blocks(block.blocks, level, section_id) if block.respond_to?(:blocks) && block.blocks.any?
292
+ end
293
+ end
294
+ # rubocop:enable Metrics/BlockLength
295
+ end
296
+ end
297
+ end
298
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'asciidoctor'
4
+ require 'logger'
5
+ require_relative 'attributes_filter'
6
+ require_relative 'source_skim/config'
7
+ require_relative 'source_skim/skimmer'
8
+
9
+ module Sourcerer
10
+ # SourceSkim produces machine-oriented skims of AsciiDoc source documents.
11
+ #
12
+ # A skim is a structured, JSON-ready representation of selected source elements
13
+ # intended to help automated tooling inspect documentation source and identify
14
+ # likely areas of interest when related product code changes.
15
+ #
16
+ # @example Skim a file with default tree output
17
+ # skim = Sourcerer::SourceSkim.skim_file('docs/install.adoc')
18
+ #
19
+ # @example Skim with both tree and flat section shapes
20
+ # skim = Sourcerer::SourceSkim.skim_file('docs/install.adoc', forms: [:tree, :flat])
21
+ #
22
+ # @example Skim a content string
23
+ # skim = Sourcerer::SourceSkim.skim_string(adoc_content, forms: [:flat])
24
+ #
25
+ # @example Skim with caller-supplied attribute overrides
26
+ # skim = Sourcerer::SourceSkim.skim_file('docs/ref.adoc', attributes: { 'env' => 'prod' })
27
+ module SourceSkim
28
+ NULL_LOGGER = Logger.new(IO::NULL)
29
+ LOAD_OPTS = { safe: :safe, sourcemap: true, logger: NULL_LOGGER }.freeze
30
+
31
+ # Skim the AsciiDoc file at +file_path+.
32
+ #
33
+ # @param file_path [String] path to the .adoc source file
34
+ # @param forms [Array<Symbol>] section shape(s) to emit: +:tree+, +:flat+, or both
35
+ # @param categories [Array<Symbol>, nil] element categories to include;
36
+ # nil uses {DEFAULT_CATEGORIES} (everything except +attributes_builtin+)
37
+ # @param attributes [Hash{String => String}] arbitrary Asciidoctor attribute
38
+ # overrides applied at parse time, e.g. <tt>'env' => 'test'</tt>.
39
+ # Useful for toggling conditionals or injecting values that affect which
40
+ # blocks are visible to the parser.
41
+ # @return [Hash] JSON-ready skim
42
+ def self.skim_file file_path, forms: [:tree], categories: nil, attributes: {}
43
+ opts = LOAD_OPTS.merge(attributes: attributes)
44
+ doc = Asciidoctor.load_file(file_path, **opts)
45
+ skim_doc(doc, forms: forms, categories: categories)
46
+ end
47
+
48
+ # Skim AsciiDoc source from a +content+ string.
49
+ #
50
+ # @param content [String] raw AsciiDoc markup
51
+ # @param forms [Array<Symbol>] section shape(s) to emit
52
+ # @param categories [Array<Symbol>, nil] element categories to include
53
+ # @param attributes [Hash{String => String}] arbitrary Asciidoctor attribute
54
+ # overrides applied at parse time
55
+ # @return [Hash] JSON-ready skim
56
+ def self.skim_string content, forms: [:tree], categories: nil, attributes: {}
57
+ opts = LOAD_OPTS.merge(attributes: attributes)
58
+ doc = Asciidoctor.load(content, **opts)
59
+ skim_doc(doc, forms: forms, categories: categories)
60
+ end
61
+
62
+ # Skim an already-parsed Asciidoctor +document+.
63
+ #
64
+ # This entry point is useful when the document has been loaded through
65
+ # other means, such as from an Asciidoctor extension callback.
66
+ #
67
+ # @param doc [Asciidoctor::Document] parsed document object
68
+ # @param forms [Array<Symbol>] section shape(s) to emit
69
+ # @param categories [Array<Symbol>, nil] element categories to include
70
+ # @return [Hash] JSON-ready skim
71
+ def self.skim_doc doc, forms: [:tree], categories: nil
72
+ config = Config.new(forms: forms, categories: categories)
73
+ Skimmer.new.process(doc, config: config)
74
+ end
75
+ end
76
+ end