asciisourcerer 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +95 -31
- data/lib/sourcerer/asciidoc.rb +64 -10
- data/lib/sourcerer/source_skim/markdown_skimmer.rb +108 -0
- data/lib/sourcerer/source_skim/skimmer.rb +2 -2
- data/lib/sourcerer/source_skim.rb +77 -31
- data/lib/sourcerer/sync/cast.rb +88 -22
- data/lib/sourcerer/version.rb +1 -1
- data/lib/sourcerer/yaml_frontmatter.rb +46 -0
- data/lib/sourcerer.rb +11 -4
- data/specs/docs/frontmatter-reader_prd.adoc +47 -0
- metadata +9 -4
- data/lib/sourcerer/attributes_filter.rb +0 -72
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 758bb9f8f554228a7d67b894471d79d966b0e6495b7b2da2854dcdfb18544e40
|
|
4
|
+
data.tar.gz: c82bb9ba5cf1391d78feb56682c470c6ded9d49f0d469678eb27801e1e30e9af
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d64e160a0e2ddc95e8114781c88e8f1a25cfaad689c43fd121e4bcae2c68b3e6bd8d5bc332464e496938b5d1e8cd769ce90ea74943c3d29edea593c560793d9b
|
|
7
|
+
data.tar.gz: 3a3a656576871fe4fdb3b4927d8cd3ed5b094f8f959ecbd2ccaf2ab4db16551ec6a136948526d137ff43b66ca043b808d935c8a22babd4bbd39db38811c2565e
|
data/README.adoc
CHANGED
|
@@ -37,11 +37,11 @@ endif::[]
|
|
|
37
37
|
:this_prod_name: {this_proj_name}
|
|
38
38
|
// end::universal-settings[]
|
|
39
39
|
:this_prod_vrsn_major: 0
|
|
40
|
-
:this_prod_vrsn_minor:
|
|
40
|
+
:this_prod_vrsn_minor: 3
|
|
41
41
|
:this_prod_vrsn_majmin: {this_prod_vrsn_major}.{this_prod_vrsn_minor}
|
|
42
|
-
:this_prod_vrsn_patch:
|
|
42
|
+
:this_prod_vrsn_patch: 0
|
|
43
43
|
:this_prod_vrsn: {this_prod_vrsn_majmin}.{this_prod_vrsn_patch}
|
|
44
|
-
:next_prod_vrsn: 0.
|
|
44
|
+
:next_prod_vrsn: 0.4.0
|
|
45
45
|
// end::global-settings[]
|
|
46
46
|
:toc: macro
|
|
47
47
|
:toclevels: 4
|
|
@@ -50,6 +50,7 @@ endif::[]
|
|
|
50
50
|
|
|
51
51
|
AsciiSourcerer is a Ruby library for radical single-sourcing of documentation and product data, primarily from AsciiDoc, YAML, and Liquid templating operations.
|
|
52
52
|
|
|
53
|
+
|
|
53
54
|
[[intro]]
|
|
54
55
|
== Introduction
|
|
55
56
|
|
|
@@ -98,7 +99,7 @@ AsciiDoc-to-Markdown conversion::
|
|
|
98
99
|
Convert AsciiDoc documents to Markdown for agentic consumption, with a focus on preserving semantic structure and document frontmatter.
|
|
99
100
|
See <<markdowngrade>>.
|
|
100
101
|
|
|
101
|
-
AsciiDoc source inspection::
|
|
102
|
+
AsciiDoc and Markdown source inspection::
|
|
102
103
|
Skim AsciiDoc documents to produce machine-oriented outlines of sections, code blocks, tables, and other semantic elements for tooling and agent consumption.
|
|
103
104
|
See <<source-skim>>.
|
|
104
105
|
|
|
@@ -299,20 +300,21 @@ Override it per call with the `canonical_prefix:` keyword argument.
|
|
|
299
300
|
Canonical blocks use AsciiDoc-style `tag::`/`end::` markers embedded inside comments.
|
|
300
301
|
Three comment styles are recognized so the same prime can manage files of any type:
|
|
301
302
|
|
|
302
|
-
[cols="
|
|
303
|
+
[cols="3,5m",options="header"]
|
|
303
304
|
|===
|
|
304
305
|
| Style | Example
|
|
305
|
-
| HTML / Markdown
|
|
306
|
-
| <!-- tag::universal-intro[]
|
|
307
|
-
| AsciiDoc
|
|
306
|
+
| HTML / Markdown
|
|
307
|
+
| <!-- tag::universal-intro[] +++-->+++
|
|
308
|
+
| AsciiDoc / JavaScript
|
|
308
309
|
| // tag::universal-intro[]
|
|
309
|
-
| Shell / Ruby / YAML
|
|
310
|
+
| Shell / Ruby / YAML / INI
|
|
310
311
|
| # tag::universal-intro[]
|
|
311
312
|
|===
|
|
312
313
|
|
|
313
|
-
|
|
314
|
+
A block with tag names beginning with the configured prefix (default `universal-`) is treated as canonical and managed by Sync/Cast.
|
|
315
|
+
|
|
316
|
+
Any block wrapped in `tag::_skip[]` and `end::_skip[]` will _not_ be passed from prime to target even during the init operation.
|
|
314
317
|
|
|
315
|
-
A tag whose name begins with the configured prefix (default `universal-`) is treated as canonical and managed by Sync/Cast.
|
|
316
318
|
All other tagged regions in a target file are left entirely alone, and their presence beside a canonical block never triggers a warning.
|
|
317
319
|
|
|
318
320
|
[[sync-cast-operations]]
|
|
@@ -321,6 +323,8 @@ All other tagged regions in a target file are left entirely alone, and their pre
|
|
|
321
323
|
init::
|
|
322
324
|
One-time operation that renders the prime template (the whole file, not just canonical blocks) through Liquid and writes the result as a new target file.
|
|
323
325
|
Use this to bootstrap a repo-local copy of a project template.
|
|
326
|
+
+
|
|
327
|
+
NOTE: Only blocks marked `_skip` are not carried over.
|
|
324
328
|
|
|
325
329
|
sync::
|
|
326
330
|
Ongoing operation that scans for canonical blocks, replaces their content with the prime version (after optional Liquid rendering), and leaves everything else verbatim.
|
|
@@ -332,7 +336,8 @@ The `result.diff` file contains the rendered content (`init`) or a unified diff
|
|
|
332
336
|
[NOTE]
|
|
333
337
|
====
|
|
334
338
|
If a canonical block is absent from a target file, `sync` emits a warning.
|
|
335
|
-
That warning is suppressed when the target contains a non-canonical tag sharing the same suffix
|
|
339
|
+
That warning is suppressed when the target contains a non-canonical tag sharing the same suffix indicating a deliberate project-local override.
|
|
340
|
+
Example: `local-intro` alongside canonical `universal-intro.
|
|
336
341
|
====
|
|
337
342
|
|
|
338
343
|
[[sync-cast-liquid]]
|
|
@@ -352,6 +357,18 @@ This is {{ data.variables.name }} version {{ data.variables.version }}.
|
|
|
352
357
|
<!-- end::universal-intro[] -->
|
|
353
358
|
----
|
|
354
359
|
|
|
360
|
+
Since each block is rendered independently, variables cannot reference content from other blocks.
|
|
361
|
+
Use a segment wrapped in `_liquid` tags to set variables inline that will be used across all subsequent blocks.
|
|
362
|
+
This `_liquid` segment only has access to variables passed into the main render call, not to any content blocks.
|
|
363
|
+
|
|
364
|
+
[source,markdown]
|
|
365
|
+
----
|
|
366
|
+
<!-- tag::_liquid -->
|
|
367
|
+
{%- assign name = data.variables.this_proj_name | default: 'Unknown Project' %}
|
|
368
|
+
{%- assign version = data.variables.this_prod_vrsn | default: '0.0.0' %}
|
|
369
|
+
<!-- end::_liquid -->
|
|
370
|
+
----
|
|
371
|
+
|
|
355
372
|
[[sync-cast-bootstrap]]
|
|
356
373
|
==== Bootstrap example
|
|
357
374
|
|
|
@@ -360,7 +377,7 @@ This is {{ data.variables.name }} version {{ data.variables.version }}.
|
|
|
360
377
|
Sourcerer::Sync.init(
|
|
361
378
|
'templates/AGENTS.markdown',
|
|
362
379
|
'AGENTS.md',
|
|
363
|
-
data: { 'project' => 'my-gem', 'org' => '
|
|
380
|
+
data: { 'project' => 'my-gem', 'org' => 'ACME Co' }
|
|
364
381
|
)
|
|
365
382
|
----
|
|
366
383
|
|
|
@@ -370,7 +387,7 @@ Pass `dry_run: true` to return the rendered content in `result.diff` without wri
|
|
|
370
387
|
[[templating-liquid-runtime]]
|
|
371
388
|
=== Templating and Liquid Runtime
|
|
372
389
|
|
|
373
|
-
Sourcerer supports Liquid and ERB, but its Liquid support is intentionally aligned with Jekyll
|
|
390
|
+
Sourcerer supports Liquid and ERB, but its Liquid support is intentionally aligned with Jekyll's runtime.
|
|
374
391
|
This keeps template behavior consistent with Jekyll projects while allowing DocOps Lab to register filters and tags.
|
|
375
392
|
|
|
376
393
|
If you are building a Jekyll-compatible templating pipeline, prefer Liquid.
|
|
@@ -379,7 +396,7 @@ If you want a low-friction Ruby template for internal tooling, ERB is available.
|
|
|
379
396
|
[[pipelines]]
|
|
380
397
|
=== Prebuild and Rendering Pipelines
|
|
381
398
|
|
|
382
|
-
Sourcerer
|
|
399
|
+
Sourcerer's rendering pipeline is optimized for build tooling and prebuild steps.
|
|
383
400
|
A typical prebuild might load attributes from `README.adoc`, extract tagged snippets into `build/snippets/`, and render YAML plus Liquid templates into `build/docs/`.
|
|
384
401
|
|
|
385
402
|
The API is intentionally small.
|
|
@@ -419,10 +436,14 @@ Schema-aware filters (including SGYML-specific classification filters) should be
|
|
|
419
436
|
[[source-skim]]
|
|
420
437
|
=== SourceSkim
|
|
421
438
|
|
|
422
|
-
`Sourcerer::SourceSkim` generates machine-oriented _skims_ of AsciiDoc source documents.
|
|
439
|
+
`Sourcerer::SourceSkim` generates machine-oriented _skims_ of AsciiDoc and Markdown source documents.
|
|
423
440
|
|
|
424
|
-
A skim is a structured, JSON/YAML-ready outline of selected source elements
|
|
441
|
+
A skim is a structured, JSON/YAML-ready outline of selected source elements.
|
|
442
|
+
AsciiDoc skims include:
|
|
425
443
|
sections, code blocks, definition lists, tables, images, and more.
|
|
444
|
+
|
|
445
|
+
Markdown skims include only sections and frontmatter, since other semantic elements are not reliably parseable in Markdown's freeform syntax.
|
|
446
|
+
|
|
426
447
|
Skims are intended to help tooling and agents inspect documentation source without ingesting full file contents.
|
|
427
448
|
|
|
428
449
|
.Example: Skim a file for sections and code blocks
|
|
@@ -439,6 +460,9 @@ skim = Sourcerer::SourceSkim.skim_file('docs/install.adoc', categories: [:code_b
|
|
|
439
460
|
|
|
440
461
|
# Skim inline content
|
|
441
462
|
skim = Sourcerer::SourceSkim.skim_string(raw_adoc_content)
|
|
463
|
+
|
|
464
|
+
# Skim Markdown file with flat sections only
|
|
465
|
+
skim = Sourcerer::SourceSkim.skim_file('README.md', forms: [:flat])
|
|
442
466
|
----
|
|
443
467
|
|
|
444
468
|
Section shapes::
|
|
@@ -446,6 +470,7 @@ Pass `forms: [:tree]` (default), `forms: [:flat]`, or both.
|
|
|
446
470
|
Tree shape preserves nesting; flat shape adds `parent_id` and expresses child section IDs as an array.
|
|
447
471
|
|
|
448
472
|
Categories::
|
|
473
|
+
(AsciiDoc skims only.)
|
|
449
474
|
Default output includes `attributes_custom`, `definition_lists`, `code_blocks`, `literal_blocks`, `examples`, `sidebars`, `tables`, and `images`.
|
|
450
475
|
Opt-in only (excluded by default): `attributes_builtin`, `admonitions`, `quotes`.
|
|
451
476
|
Pass `categories:` with an explicit array of symbols to restrict or expand output.
|
|
@@ -455,6 +480,7 @@ The skim output schema is at `specs/data/asciidoc-source-skim.schema.json` and a
|
|
|
455
480
|
|
|
456
481
|
[[skim-asciidoctor-extension]]
|
|
457
482
|
==== Asciidoctor extension
|
|
483
|
+
|
|
458
484
|
`Sourcerer::SourceSkim::TreeProcessorExtension` integrates SourceSkim into Asciidoctor parsing pipelines.
|
|
459
485
|
It stores the result as the `source-skim-result` document attribute and reads `source-skim-forms` and `source-skim-categories` document attributes for per-document configuration.
|
|
460
486
|
|
|
@@ -584,7 +610,7 @@ r.enum.to_a # => []
|
|
|
584
610
|
== Integrations (Implemented and Planned)
|
|
585
611
|
|
|
586
612
|
These notes describe how AsciiSourcerer relates to existing and future downstream tools.
|
|
587
|
-
For most users, these tools are the recommended way to access AsciiSourcerer
|
|
613
|
+
For most users, these tools are the recommended way to access AsciiSourcerer's capabilities, since they provide a richer context for configuration, input/output management, and workflow orchestration.
|
|
588
614
|
|
|
589
615
|
https://github.com/DocOps/lab/tree/main/gems/docopslab-dev[docopslab-dev]::
|
|
590
616
|
A harness for DocOps Lab developer tasks and operations.
|
|
@@ -608,24 +634,61 @@ All the benefits of extended YAML ingest and Liquid processing are available via
|
|
|
608
634
|
=== Alpha Scripts/CLIs
|
|
609
635
|
|
|
610
636
|
The AsciiSourcerer repo does host some CLI utilities and scripts that make direct usage of APIs hosted in the `asciisourcerer` gem.
|
|
611
|
-
|
|
637
|
+
|
|
638
|
+
These are largely prototype utilities that may eventually make it into official DocOps Lab applications with proper I/O surfaces.
|
|
612
639
|
|
|
613
640
|
[WARNING]
|
|
614
641
|
These scripts are *neither* rigorously tested *nor* officially supported.
|
|
615
642
|
They may be altered, deprecated, or dropped in backward-incompatible ways.
|
|
616
|
-
Use them as examples but do not rely on them in production.
|
|
643
|
+
Use them as examples or utilities but do not rely on them in production.
|
|
617
644
|
|
|
618
|
-
|
|
645
|
+
Current scripts include: ::
|
|
619
646
|
|
|
620
|
-
|
|
621
|
-
`skim_asciidoc.rb
|
|
622
|
-
For AsciiDoc source inspection.
|
|
623
|
-
Generates YAML or JSON versions of `Sourcerer::SourceSkim` output for a given AsciiDoc document or collection of
|
|
647
|
+
link:https://github.com/DocOps/asciisourcerer/blob/main/scripts/skim_markup.rb[`skim_markup.rb`]:::
|
|
648
|
+
(Formerly `skim_asciidoc.rb`.)
|
|
649
|
+
For AsciiDoc/Markdown source inspection.
|
|
650
|
+
Generates YAML or JSON versions of `Sourcerer::SourceSkim` output for a given AsciiDoc document, Markdown file, or collection of such.
|
|
624
651
|
|
|
625
|
-
`mark_down_grade.rb
|
|
652
|
+
link:https://github.com/DocOps/asciisourcerer/blob/main/scripts/mark_down_grade.rb[`mark_down_grade.rb`]:::
|
|
626
653
|
For AsciiDoc-to-Markdown conversion.
|
|
627
654
|
This script orchestrates `Sourcerer::AsciiDoc.mark_down_grade` and is a recommended starting point for users needing this functionality without building their own Ruby integration.
|
|
628
655
|
|
|
656
|
+
To use one of these ad hock CLIs, copy it from the repo's `scripts/` directory into your project, then run it with `ruby` or `bundle exec ruby` as appropriate.
|
|
657
|
+
|
|
658
|
+
[[usage-example]]
|
|
659
|
+
==== Setup / Usage Example
|
|
660
|
+
|
|
661
|
+
For these scripts to work, the `asciisourcerer` gem must be available.
|
|
662
|
+
|
|
663
|
+
. Add `asciisourcerer` to (or create) `Gemfile` in the root of a project repo.
|
|
664
|
+
+
|
|
665
|
+
[source,ruby]
|
|
666
|
+
----
|
|
667
|
+
source 'https://rubygems.org'
|
|
668
|
+
gem 'asciisourcerer'
|
|
669
|
+
----
|
|
670
|
+
|
|
671
|
+
. Install the library and dependencies.
|
|
672
|
+
+
|
|
673
|
+
[.prompt]
|
|
674
|
+
bundle install
|
|
675
|
+
|
|
676
|
+
For each script you wish to use:
|
|
677
|
+
|
|
678
|
+
[start=3]
|
|
679
|
+
. Download the script you want.
|
|
680
|
+
+
|
|
681
|
+
.Example: writes current GH copy to a local `scripts/` path.
|
|
682
|
+
[.prompt]
|
|
683
|
+
curl -o scripts/skim_markup.rb https://raw.githubusercontent.com/DocOps/asciisourcerer/refs/heads/main/scripts/skim_markup.rb
|
|
684
|
+
+
|
|
685
|
+
Change `-o scripts/` to wherever you wish to save the script locally.
|
|
686
|
+
|
|
687
|
+
. Run the script.
|
|
688
|
+
+
|
|
689
|
+
[.prompt]
|
|
690
|
+
bundle exec ruby scripts/skim_markup.rb --help
|
|
691
|
+
|
|
629
692
|
|
|
630
693
|
[[development]]
|
|
631
694
|
== Development
|
|
@@ -729,14 +792,15 @@ Standard release flow:
|
|
|
729
792
|
|
|
730
793
|
. Run `./scripts/build.sh` to validate the environment, run tests, and build the gem file to `pkg/`.
|
|
731
794
|
|
|
732
|
-
. Publish to RubyGems.
|
|
733
|
-
+
|
|
734
|
-
RUBYGEMS_API_KEY=<rubygems.org key> ./scripts/publish.sh
|
|
735
|
-
|
|
736
795
|
. Tag the release in Git:
|
|
737
796
|
+
|
|
738
797
|
[.prompt,subs=+attributes]
|
|
739
|
-
git tag v{this_prod_vrsn}
|
|
798
|
+
git tag v{this_prod_vrsn}
|
|
799
|
+
git push origin v{this_prod_vrsn}
|
|
800
|
+
|
|
801
|
+
. Publish to RubyGems.
|
|
802
|
+
+
|
|
803
|
+
RUBYGEMS_API_KEY=<rubygems.org key> ./scripts/publish.sh
|
|
740
804
|
|
|
741
805
|
[NOTE]
|
|
742
806
|
AsciiSourcerer does not yet publish a release history document.
|
data/lib/sourcerer/asciidoc.rb
CHANGED
|
@@ -4,6 +4,7 @@ require 'asciidoctor'
|
|
|
4
4
|
require 'fileutils'
|
|
5
5
|
require 'yaml'
|
|
6
6
|
require 'cgi'
|
|
7
|
+
require_relative 'yaml_frontmatter'
|
|
7
8
|
|
|
8
9
|
module Sourcerer
|
|
9
10
|
# AsciiDoc-focused primitives for attribute loading, region extraction,
|
|
@@ -35,7 +36,7 @@ module Sourcerer
|
|
|
35
36
|
#
|
|
36
37
|
# @see https://asciidoctor.org/ Asciidoctor Documentation
|
|
37
38
|
module AsciiDoc
|
|
38
|
-
YAML_FRONTMATTER_REGEXP =
|
|
39
|
+
YAML_FRONTMATTER_REGEXP = Sourcerer::YamlFrontmatter::REGEXP
|
|
39
40
|
YAML_FRONT_MATTER_REGEXP = YAML_FRONTMATTER_REGEXP
|
|
40
41
|
PAGE_ATTRIBUTE_PREFIX = 'page-'
|
|
41
42
|
|
|
@@ -327,14 +328,7 @@ module Sourcerer
|
|
|
327
328
|
# @param source_text [String]
|
|
328
329
|
# @return [Hash]
|
|
329
330
|
def self.extract_yaml_frontmatter source_text
|
|
330
|
-
|
|
331
|
-
return {} unless match
|
|
332
|
-
|
|
333
|
-
frontmatter_payload = match[1].sub(/\A---\s*\n/, '')
|
|
334
|
-
parsed = YAML.safe_load(frontmatter_payload, aliases: true)
|
|
335
|
-
parsed.is_a?(Hash) ? parsed : {}
|
|
336
|
-
rescue Psych::SyntaxError
|
|
337
|
-
{}
|
|
331
|
+
Sourcerer::YamlFrontmatter.extract(source_text)
|
|
338
332
|
end
|
|
339
333
|
|
|
340
334
|
# Remove leading YAML front matter fence block from AsciiDoc source.
|
|
@@ -342,7 +336,7 @@ module Sourcerer
|
|
|
342
336
|
# @param source_text [String]
|
|
343
337
|
# @return [String]
|
|
344
338
|
def self.strip_yaml_frontmatter source_text
|
|
345
|
-
|
|
339
|
+
Sourcerer::YamlFrontmatter.strip(source_text)
|
|
346
340
|
end
|
|
347
341
|
|
|
348
342
|
# Compatibility alias.
|
|
@@ -483,5 +477,65 @@ module Sourcerer
|
|
|
483
477
|
:normalize_extract_tags,
|
|
484
478
|
:collect_tagged_content,
|
|
485
479
|
:normalize_mark_down_grade_options
|
|
480
|
+
|
|
481
|
+
# Utilities for filtering and partitioning Asciidoctor document attributes.
|
|
482
|
+
#
|
|
483
|
+
# Separates user-defined ("custom") attributes from those injected by
|
|
484
|
+
# Asciidoctor at parse time ("built-in").
|
|
485
|
+
#
|
|
486
|
+
# @example
|
|
487
|
+
# custom = Sourcerer::AsciiDoc::AttributesFilter.user_attributes(doc)
|
|
488
|
+
# builtin = Sourcerer::AsciiDoc::AttributesFilter.builtin_attributes(doc)
|
|
489
|
+
module AttributesFilter
|
|
490
|
+
# Attribute keys injected by Asciidoctor at parse time.
|
|
491
|
+
BUILTIN_ATTR_KEYS = (Asciidoctor::DEFAULT_ATTRIBUTES.keys + %w[
|
|
492
|
+
asciidoctor asciidoctor-version
|
|
493
|
+
attribute-missing attribute-undefined
|
|
494
|
+
authorcount
|
|
495
|
+
docdate docdatetime docdir docfile docfilesuffix docname doctime doctitle doctype docyear
|
|
496
|
+
embedded
|
|
497
|
+
htmlsyntax
|
|
498
|
+
iconsdir
|
|
499
|
+
localdate localdatetime localtime localyear
|
|
500
|
+
max-include-depth
|
|
501
|
+
notitle
|
|
502
|
+
outfilesuffix
|
|
503
|
+
stylesdir
|
|
504
|
+
toc-position
|
|
505
|
+
user-home
|
|
506
|
+
]).freeze
|
|
507
|
+
|
|
508
|
+
BUILTIN_ATTR_PATTERNS = [
|
|
509
|
+
/^backend(-|$)/,
|
|
510
|
+
/^basebackend(-|$)/,
|
|
511
|
+
/^doctype-/,
|
|
512
|
+
/^filetype(-|$)/,
|
|
513
|
+
/^safe-mode-/
|
|
514
|
+
].freeze
|
|
515
|
+
|
|
516
|
+
module_function
|
|
517
|
+
|
|
518
|
+
# Returns user-defined attributes, excluding Asciidoctor built-ins.
|
|
519
|
+
#
|
|
520
|
+
# @param doc [Asciidoctor::Document]
|
|
521
|
+
# @return [Hash{String => String}]
|
|
522
|
+
def user_attributes doc
|
|
523
|
+
doc.attributes.reject do |k, _|
|
|
524
|
+
BUILTIN_ATTR_KEYS.include?(k) ||
|
|
525
|
+
BUILTIN_ATTR_PATTERNS.any? { |pat| pat.match?(k) }
|
|
526
|
+
end
|
|
527
|
+
end
|
|
528
|
+
|
|
529
|
+
# Returns built-in Asciidoctor attributes injected at parse time.
|
|
530
|
+
#
|
|
531
|
+
# @param doc [Asciidoctor::Document]
|
|
532
|
+
# @return [Hash{String => String}]
|
|
533
|
+
def builtin_attributes doc
|
|
534
|
+
doc.attributes.select do |k, _|
|
|
535
|
+
BUILTIN_ATTR_KEYS.include?(k) ||
|
|
536
|
+
BUILTIN_ATTR_PATTERNS.any? { |pat| pat.match?(k) }
|
|
537
|
+
end
|
|
538
|
+
end
|
|
539
|
+
end
|
|
486
540
|
end
|
|
487
541
|
end
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Sourcerer
|
|
4
|
+
module SourceSkim
|
|
5
|
+
# Parses Markdown content and produces a JSON-ready skim hash.
|
|
6
|
+
#
|
|
7
|
+
# Heading levels are mapped to mirror AsciiDoc document structure:
|
|
8
|
+
# a single +#+ heading becomes the document title (level 0); all subsequent
|
|
9
|
+
# +#++#+ headings become sections starting at level 1. This keeps Markdown
|
|
10
|
+
# and AsciiDoc skim output shapes consistent.
|
|
11
|
+
#
|
|
12
|
+
# A new instance should be created per-document call. External callers should
|
|
13
|
+
# use {Sourcerer::SourceSkim.skim_file} or {Sourcerer::SourceSkim.skim_string}
|
|
14
|
+
# with a Markdown file or +format: :markdown+ rather than instantiating this
|
|
15
|
+
# class directly.
|
|
16
|
+
# @api private
|
|
17
|
+
class MarkdownSkimmer
|
|
18
|
+
# Matches ATX-style Markdown headings: one to six leading # characters.
|
|
19
|
+
MD_HEADING_RE = /^(\#{1,6})\s+(.+?)\s*$/
|
|
20
|
+
|
|
21
|
+
# @param content [String] raw Markdown text
|
|
22
|
+
# @param config [Config]
|
|
23
|
+
# @return [Hash] JSON-ready skim
|
|
24
|
+
def process content, config: Config.new(forms: [:flat])
|
|
25
|
+
@config = config
|
|
26
|
+
|
|
27
|
+
fm = Sourcerer::YamlFrontmatter.extract(content)
|
|
28
|
+
body = Sourcerer::YamlFrontmatter.strip(content)
|
|
29
|
+
offset = content.lines.length - body.lines.length
|
|
30
|
+
title, sections = extract_title_and_sections(body, offset)
|
|
31
|
+
|
|
32
|
+
result = {
|
|
33
|
+
title: title || fm['title'].to_s,
|
|
34
|
+
frontmatter: fm
|
|
35
|
+
}
|
|
36
|
+
result[:sections_flat] = sections if @config.flat?
|
|
37
|
+
result[:sections_tree] = build_tree(sections) if @config.tree?
|
|
38
|
+
result
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
# Scan body content for ATX headings.
|
|
44
|
+
#
|
|
45
|
+
# The first +#+ heading is treated as the document title (level 0) and
|
|
46
|
+
# returned separately. All remaining headings are mapped to section level
|
|
47
|
+
# +hashes - 1+ so that +##+ becomes level 1, +###+ becomes level 2, etc.
|
|
48
|
+
#
|
|
49
|
+
# Lines inside fenced code blocks (delimited by +```+ or +~~~+) are skipped
|
|
50
|
+
# so that comment lines such as +# rubocop comment+ are not mistaken for headings.
|
|
51
|
+
def extract_title_and_sections content, offset
|
|
52
|
+
title = nil
|
|
53
|
+
sections = []
|
|
54
|
+
in_fence = nil
|
|
55
|
+
|
|
56
|
+
content.each_line.with_index(1) do |line, lineno|
|
|
57
|
+
stripped = line.chomp
|
|
58
|
+
in_fence, fence_line = update_fence(stripped, in_fence)
|
|
59
|
+
next if fence_line || in_fence
|
|
60
|
+
|
|
61
|
+
m = stripped.match(MD_HEADING_RE)
|
|
62
|
+
next unless m
|
|
63
|
+
|
|
64
|
+
hashes = m[1].length
|
|
65
|
+
if hashes == 1 && title.nil?
|
|
66
|
+
title = m[2]
|
|
67
|
+
else
|
|
68
|
+
sections << { text: m[2], level: hashes - 1, starts_at: lineno + offset }
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
[title, sections]
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Returns +[new_fence_state, is_fence_line]+ for the given stripped line.
|
|
76
|
+
#
|
|
77
|
+
# A fence line (the opening or closing +```+/+~~~+ marker) should always
|
|
78
|
+
# be skipped by the caller regardless of the new fence state.
|
|
79
|
+
def update_fence stripped, in_fence
|
|
80
|
+
m = stripped.match(/\A(`{3,}|~{3,})/)
|
|
81
|
+
return [in_fence, false] unless m
|
|
82
|
+
return [m[1], true] if in_fence.nil?
|
|
83
|
+
return [nil, true] if stripped.start_with?(in_fence)
|
|
84
|
+
|
|
85
|
+
[in_fence, false]
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Build a nested section tree (Array) from a flat section list.
|
|
89
|
+
#
|
|
90
|
+
# Returns an Array of top-level (level 1) section nodes, each with a
|
|
91
|
+
# +:sections+ array of children, mirroring the shape produced by
|
|
92
|
+
# {Skimmer} for AsciiDoc documents.
|
|
93
|
+
def build_tree sections
|
|
94
|
+
roots = []
|
|
95
|
+
stack = [{ level: 0, sections: roots }]
|
|
96
|
+
|
|
97
|
+
sections.each do |h|
|
|
98
|
+
node = h.merge(sections: [])
|
|
99
|
+
stack.pop while stack.size > 1 && stack.last[:level] >= h[:level]
|
|
100
|
+
stack.last[:sections] << node
|
|
101
|
+
stack << node
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
roots
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
@@ -42,11 +42,11 @@ module Sourcerer
|
|
|
42
42
|
|
|
43
43
|
if @config.include?(:attributes_custom)
|
|
44
44
|
result[:attributes_custom] =
|
|
45
|
-
Sourcerer::AttributesFilter.user_attributes(document)
|
|
45
|
+
Sourcerer::AsciiDoc::AttributesFilter.user_attributes(document)
|
|
46
46
|
end
|
|
47
47
|
if @config.include?(:attributes_builtin)
|
|
48
48
|
result[:attributes_builtin] =
|
|
49
|
-
Sourcerer::AttributesFilter.builtin_attributes(document)
|
|
49
|
+
Sourcerer::AsciiDoc::AttributesFilter.builtin_attributes(document)
|
|
50
50
|
end
|
|
51
51
|
|
|
52
52
|
result[:sections_tree] = tree if @config.tree?
|
|
@@ -2,61 +2,96 @@
|
|
|
2
2
|
|
|
3
3
|
require 'asciidoctor'
|
|
4
4
|
require 'logger'
|
|
5
|
-
require_relative '
|
|
5
|
+
require_relative 'yaml_frontmatter'
|
|
6
6
|
require_relative 'source_skim/config'
|
|
7
7
|
require_relative 'source_skim/skimmer'
|
|
8
|
+
require_relative 'source_skim/markdown_skimmer'
|
|
8
9
|
|
|
9
10
|
module Sourcerer
|
|
10
|
-
# SourceSkim produces machine-oriented skims of
|
|
11
|
+
# SourceSkim produces machine-oriented skims of markup source documents.
|
|
11
12
|
#
|
|
12
13
|
# A skim is a structured, JSON-ready representation of selected source elements
|
|
13
14
|
# intended to help automated tooling inspect documentation source and identify
|
|
14
15
|
# likely areas of interest when related product code changes.
|
|
15
16
|
#
|
|
16
|
-
#
|
|
17
|
+
# AsciiDoc files are fully parsed by Asciidoctor and yield rich semantic
|
|
18
|
+
# output (sections, attributes, code blocks, tables, etc.). Markdown files
|
|
19
|
+
# yield frontmatter and section headings only, since Markdown has no
|
|
20
|
+
# standardised semantic block model.
|
|
21
|
+
#
|
|
22
|
+
# The format is auto-detected from the file extension when using +skim_file+.
|
|
23
|
+
# Pass +format: :markdown+ or +format: :asciidoc+ to +skim_string+ to
|
|
24
|
+
# disambiguate when there is no path to inspect.
|
|
25
|
+
#
|
|
26
|
+
# @example Skim an AsciiDoc file (auto-detected)
|
|
17
27
|
# skim = Sourcerer::SourceSkim.skim_file('docs/install.adoc')
|
|
18
28
|
#
|
|
29
|
+
# @example Skim a Markdown file (auto-detected)
|
|
30
|
+
# skim = Sourcerer::SourceSkim.skim_file('docs/guide.md')
|
|
31
|
+
#
|
|
19
32
|
# @example Skim with both tree and flat section shapes
|
|
20
33
|
# skim = Sourcerer::SourceSkim.skim_file('docs/install.adoc', forms: [:tree, :flat])
|
|
21
34
|
#
|
|
22
|
-
# @example Skim a
|
|
23
|
-
# skim = Sourcerer::SourceSkim.skim_string(
|
|
35
|
+
# @example Skim a Markdown string explicitly
|
|
36
|
+
# skim = Sourcerer::SourceSkim.skim_string(content, format: :markdown)
|
|
24
37
|
#
|
|
25
|
-
# @example Skim with caller-supplied attribute overrides
|
|
38
|
+
# @example Skim with caller-supplied Asciidoctor attribute overrides
|
|
26
39
|
# skim = Sourcerer::SourceSkim.skim_file('docs/ref.adoc', attributes: { 'env' => 'prod' })
|
|
27
40
|
module SourceSkim
|
|
28
|
-
NULL_LOGGER
|
|
29
|
-
LOAD_OPTS
|
|
41
|
+
NULL_LOGGER = Logger.new(IO::NULL)
|
|
42
|
+
LOAD_OPTS = { safe: :safe, sourcemap: true, logger: NULL_LOGGER,
|
|
43
|
+
attributes: { 'skip-front-matter' => '' } }.freeze
|
|
30
44
|
|
|
31
|
-
# Skim the
|
|
45
|
+
# Skim the markup file at +file_path+.
|
|
32
46
|
#
|
|
33
|
-
#
|
|
34
|
-
#
|
|
35
|
-
#
|
|
36
|
-
#
|
|
37
|
-
# @param
|
|
38
|
-
#
|
|
39
|
-
#
|
|
40
|
-
#
|
|
47
|
+
# Format is auto-detected from the file extension (.adoc → AsciiDoc;
|
|
48
|
+
# .md / .markdown → Markdown). Override with +format: :asciidoc+ or
|
|
49
|
+
# +format: :markdown+.
|
|
50
|
+
#
|
|
51
|
+
# @param file_path [String] path to the source file
|
|
52
|
+
# @param forms [Array<Symbol>, nil] section shape(s) to emit: +:tree+, +:flat+,
|
|
53
|
+
# or both. Defaults to +[:tree]+ for AsciiDoc and +[:flat]+ for Markdown.
|
|
54
|
+
# @param format [Symbol, nil] +:asciidoc+ or +:markdown+; nil auto-detects
|
|
55
|
+
# @param categories [Array<Symbol>, nil] AsciiDoc only. Element categories to
|
|
56
|
+
# include; nil uses {DEFAULT_CATEGORIES}. Silently ignored for Markdown.
|
|
57
|
+
# @param attributes [Hash{String => String}] AsciiDoc only. Asciidoctor
|
|
58
|
+
# attribute overrides. Silently ignored for Markdown.
|
|
41
59
|
# @return [Hash] JSON-ready skim
|
|
42
|
-
def self.skim_file file_path, forms:
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
60
|
+
def self.skim_file file_path, forms: nil, format: nil, categories: nil, attributes: {}
|
|
61
|
+
fmt = format || detect_format(file_path)
|
|
62
|
+
if fmt == :markdown
|
|
63
|
+
config = Config.new(forms: forms || [:flat])
|
|
64
|
+
MarkdownSkimmer.new.process(File.read(file_path), config: config)
|
|
65
|
+
else
|
|
66
|
+
attrs = LOAD_OPTS[:attributes].merge(attributes)
|
|
67
|
+
opts = LOAD_OPTS.merge(attributes: attrs)
|
|
68
|
+
doc = Asciidoctor.load_file(file_path, **opts)
|
|
69
|
+
skim_doc(doc, forms: forms || [:tree], categories: categories)
|
|
70
|
+
end
|
|
46
71
|
end
|
|
47
72
|
|
|
48
|
-
# Skim
|
|
73
|
+
# Skim markup source from a +content+ string.
|
|
49
74
|
#
|
|
50
|
-
#
|
|
51
|
-
#
|
|
52
|
-
#
|
|
53
|
-
#
|
|
54
|
-
#
|
|
75
|
+
# +format:+ must be provided when the content is Markdown, since there is
|
|
76
|
+
# no file extension to inspect. Defaults to +:asciidoc+ for backward
|
|
77
|
+
# compatibility.
|
|
78
|
+
#
|
|
79
|
+
# @param content [String] raw markup text
|
|
80
|
+
# @param format [Symbol] +:asciidoc+ (default) or +:markdown+
|
|
81
|
+
# @param forms [Array<Symbol>, nil] section shape(s) to emit
|
|
82
|
+
# @param categories [Array<Symbol>, nil] AsciiDoc only
|
|
83
|
+
# @param attributes [Hash{String => String}] AsciiDoc only
|
|
55
84
|
# @return [Hash] JSON-ready skim
|
|
56
|
-
def self.skim_string content,
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
85
|
+
def self.skim_string content, format: :asciidoc, forms: nil, categories: nil, attributes: {}
|
|
86
|
+
if format == :markdown
|
|
87
|
+
config = Config.new(forms: forms || [:flat])
|
|
88
|
+
MarkdownSkimmer.new.process(content, config: config)
|
|
89
|
+
else
|
|
90
|
+
attrs = LOAD_OPTS[:attributes].merge(attributes)
|
|
91
|
+
opts = LOAD_OPTS.merge(attributes: attrs)
|
|
92
|
+
doc = Asciidoctor.load(content, **opts)
|
|
93
|
+
skim_doc(doc, forms: forms || [:tree], categories: categories)
|
|
94
|
+
end
|
|
60
95
|
end
|
|
61
96
|
|
|
62
97
|
# Skim an already-parsed Asciidoctor +document+.
|
|
@@ -72,5 +107,16 @@ module Sourcerer
|
|
|
72
107
|
config = Config.new(forms: forms, categories: categories)
|
|
73
108
|
Skimmer.new.process(doc, config: config)
|
|
74
109
|
end
|
|
110
|
+
|
|
111
|
+
# @api private
|
|
112
|
+
def self.detect_format file_path
|
|
113
|
+
ext = File.extname(file_path).downcase
|
|
114
|
+
if Sourcerer::MARKDOWN_EXTS.include?(ext)
|
|
115
|
+
:markdown
|
|
116
|
+
else
|
|
117
|
+
:asciidoc
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
private_class_method :detect_format
|
|
75
121
|
end
|
|
76
122
|
end
|
data/lib/sourcerer/sync/cast.rb
CHANGED
|
@@ -72,7 +72,8 @@ module Sourcerer
|
|
|
72
72
|
# @return [CastResult]
|
|
73
73
|
def self.init prime_path, target_path, data: {}, dry_run: false
|
|
74
74
|
prime_text = File.read(prime_path)
|
|
75
|
-
|
|
75
|
+
clean_text = strip_meta_blocks(prime_text)
|
|
76
|
+
rendered = data.empty? ? clean_text : render_liquid_string(clean_text, data)
|
|
76
77
|
|
|
77
78
|
unless dry_run
|
|
78
79
|
FileUtils.mkdir_p(File.dirname(File.expand_path(target_path)))
|
|
@@ -107,15 +108,23 @@ module Sourcerer
|
|
|
107
108
|
prime_text = File.read(@prime_path)
|
|
108
109
|
target_text = File.read(@target_path)
|
|
109
110
|
|
|
111
|
+
# Parse with canonical_prefix: '' so that ALL tagged regions -- including
|
|
112
|
+
# the non-canonical _liquid preamble block -- surface as Block objects
|
|
113
|
+
# rather than being swallowed into TextSegments.
|
|
110
114
|
prime_segments = BlockParser.parse(
|
|
111
115
|
prime_text,
|
|
112
|
-
canonical_prefix:
|
|
116
|
+
canonical_prefix: '',
|
|
113
117
|
tag_patterns: @tag_patterns)
|
|
114
118
|
target_segments = BlockParser.parse(
|
|
115
119
|
target_text,
|
|
116
|
-
canonical_prefix:
|
|
120
|
+
canonical_prefix: '',
|
|
117
121
|
tag_patterns: @tag_patterns)
|
|
118
122
|
|
|
123
|
+
# Extract the _liquid preamble from the prime (non-canonical; not synced as a
|
|
124
|
+
# canonical block but used to carry Liquid variable context to all rendered content).
|
|
125
|
+
prime_liquid_block = prime_segments.find { |s| s.is_a?(BlockParser::Block) && s.tag == '_liquid' }
|
|
126
|
+
liquid_preamble = prime_liquid_block&.content.to_s
|
|
127
|
+
|
|
119
128
|
prime_blocks = BlockParser.extract_canonical(prime_segments, canonical_prefix: @canonical_prefix)
|
|
120
129
|
target_blocks, errors = validate_target_canonical(target_segments)
|
|
121
130
|
|
|
@@ -129,7 +138,10 @@ module Sourcerer
|
|
|
129
138
|
end
|
|
130
139
|
|
|
131
140
|
warnings = collect_warnings(prime_blocks, target_blocks, target_text)
|
|
132
|
-
new_segments, applied_changes = apply_prime_blocks(
|
|
141
|
+
new_segments, applied_changes = apply_prime_blocks(
|
|
142
|
+
target_segments, prime_blocks,
|
|
143
|
+
prime_liquid_block: prime_liquid_block,
|
|
144
|
+
liquid_preamble: liquid_preamble)
|
|
133
145
|
|
|
134
146
|
new_text = reconstruct(new_segments)
|
|
135
147
|
diff = generate_diff(target_text, new_text) if applied_changes.any? || @dry_run
|
|
@@ -156,6 +168,23 @@ module Sourcerer
|
|
|
156
168
|
template.render(data.transform_keys(&:to_s))
|
|
157
169
|
end
|
|
158
170
|
|
|
171
|
+
# Remove every underscore-prefixed meta block (+_skip+, +_liquid+, etc.) from
|
|
172
|
+
# a prime text before it is written to a target during {.init}.
|
|
173
|
+
# These blocks carry template instructions or Liquid context that are only
|
|
174
|
+
# meaningful during the prime→target rendering pass, not in the output file.
|
|
175
|
+
# @api private
|
|
176
|
+
def self.strip_meta_blocks text
|
|
177
|
+
tag_patterns = BlockParser.build_tag_patterns(
|
|
178
|
+
BlockParser::DEFAULT_TAG_SYNTAX_START,
|
|
179
|
+
BlockParser::DEFAULT_TAG_SYNTAX_END,
|
|
180
|
+
BlockParser::DEFAULT_COMMENT_SYNTAX_PATTERNS)
|
|
181
|
+
segments = BlockParser.parse(text, canonical_prefix: '', tag_patterns: tag_patterns)
|
|
182
|
+
segments
|
|
183
|
+
.reject { |s| s.is_a?(BlockParser::Block) && s.tag.start_with?('_') }
|
|
184
|
+
.map { |s| s.is_a?(BlockParser::Block) ? "#{s.open_line}#{s.content}#{s.close_line}" : s.content }
|
|
185
|
+
.join
|
|
186
|
+
end
|
|
187
|
+
|
|
159
188
|
private
|
|
160
189
|
|
|
161
190
|
# Collect canonical blocks from target, raising errors for duplicates.
|
|
@@ -192,29 +221,65 @@ module Sourcerer
|
|
|
192
221
|
warnings
|
|
193
222
|
end
|
|
194
223
|
|
|
195
|
-
def apply_prime_blocks target_segments, prime_blocks
|
|
224
|
+
def apply_prime_blocks target_segments, prime_blocks,
|
|
225
|
+
prime_liquid_block: nil, liquid_preamble: ''
|
|
196
226
|
applied_changes = []
|
|
227
|
+
has_preamble = !liquid_preamble.empty?
|
|
228
|
+
liquid_seen = false
|
|
197
229
|
|
|
198
230
|
new_segments = target_segments.map do |segment|
|
|
199
|
-
|
|
200
|
-
|
|
231
|
+
if segment.is_a?(BlockParser::Block)
|
|
232
|
+
if segment.tag == '_liquid'
|
|
233
|
+
# Sync the _liquid block content from prime to target
|
|
234
|
+
liquid_seen = true
|
|
235
|
+
next segment unless prime_liquid_block
|
|
236
|
+
next segment if prime_liquid_block.content == segment.content
|
|
237
|
+
|
|
238
|
+
applied_changes << '_liquid'
|
|
239
|
+
BlockParser::Block.new(
|
|
240
|
+
tag: '_liquid',
|
|
241
|
+
open_line: segment.open_line,
|
|
242
|
+
content: prime_liquid_block.content,
|
|
243
|
+
close_line: segment.close_line)
|
|
244
|
+
|
|
245
|
+
elsif canonical?(segment.tag)
|
|
246
|
+
next segment unless prime_blocks.key?(segment.tag)
|
|
247
|
+
|
|
248
|
+
prime_content = prime_blocks[segment.tag].content
|
|
249
|
+
rendered_content = render_content(prime_content, preamble: liquid_preamble)
|
|
250
|
+
|
|
251
|
+
if rendered_content == segment.content
|
|
252
|
+
segment
|
|
253
|
+
else
|
|
254
|
+
applied_changes << segment.tag
|
|
255
|
+
BlockParser::Block.new(
|
|
256
|
+
tag: segment.tag,
|
|
257
|
+
open_line: segment.open_line,
|
|
258
|
+
content: rendered_content,
|
|
259
|
+
close_line: segment.close_line)
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
else
|
|
263
|
+
segment
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
elsif segment.is_a?(BlockParser::TextSegment) && has_preamble && liquid_seen
|
|
267
|
+
# Render in-between text with the preamble context, but only after the
|
|
268
|
+
# _liquid block has been encountered so all variables are in scope.
|
|
269
|
+
rendered_text = render_content(segment.content, preamble: liquid_preamble)
|
|
270
|
+
if rendered_text == segment.content
|
|
271
|
+
segment
|
|
272
|
+
else
|
|
273
|
+
applied_changes << 'document-text'
|
|
274
|
+
BlockParser::TextSegment.new(content: rendered_text)
|
|
275
|
+
end
|
|
201
276
|
|
|
202
|
-
prime_content = prime_blocks[segment.tag].content
|
|
203
|
-
rendered_content = render_content(prime_content)
|
|
204
|
-
|
|
205
|
-
if rendered_content == segment.content
|
|
206
|
-
segment
|
|
207
277
|
else
|
|
208
|
-
|
|
209
|
-
BlockParser::Block.new(
|
|
210
|
-
tag: segment.tag,
|
|
211
|
-
open_line: segment.open_line,
|
|
212
|
-
content: rendered_content,
|
|
213
|
-
close_line: segment.close_line)
|
|
278
|
+
segment
|
|
214
279
|
end
|
|
215
280
|
end
|
|
216
281
|
|
|
217
|
-
[new_segments, applied_changes]
|
|
282
|
+
[new_segments, applied_changes.uniq]
|
|
218
283
|
end
|
|
219
284
|
|
|
220
285
|
def reconstruct segments
|
|
@@ -244,10 +309,11 @@ module Sourcerer
|
|
|
244
309
|
end
|
|
245
310
|
end
|
|
246
311
|
|
|
247
|
-
def render_content content
|
|
248
|
-
return content if @data.empty?
|
|
312
|
+
def render_content content, preamble: ''
|
|
313
|
+
return content if @data.empty? && preamble.empty?
|
|
249
314
|
|
|
250
|
-
|
|
315
|
+
full = preamble.empty? ? content : "#{preamble}#{content}"
|
|
316
|
+
self.class.render_liquid_string(full, @data)
|
|
251
317
|
end
|
|
252
318
|
|
|
253
319
|
def generate_diff old_text, new_text
|
data/lib/sourcerer/version.rb
CHANGED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'yaml'
|
|
4
|
+
|
|
5
|
+
module Sourcerer
|
|
6
|
+
# Single owner of YAML frontmatter parsing across Sourcerer.
|
|
7
|
+
#
|
|
8
|
+
# Both AsciiDoc pages (the Jekyll convention of embedding +---+-fenced YAML
|
|
9
|
+
# at the top of a +.adoc+ file) and Markdown pages use the same syntax.
|
|
10
|
+
# All Sourcerer code that needs to detect, extract, or remove a frontmatter
|
|
11
|
+
# block delegates here instead of duplicating logic or constants.
|
|
12
|
+
module YamlFrontmatter
|
|
13
|
+
# Matches a leading +---+-fenced YAML block at the start of a file.
|
|
14
|
+
# Content between the fences must be non-empty (+.+?+, lazy).
|
|
15
|
+
# The closing fence must be followed by a newline.
|
|
16
|
+
REGEXP = /\A(---\s*\n.+?\n)(---\s*\n)/m
|
|
17
|
+
|
|
18
|
+
module_function
|
|
19
|
+
|
|
20
|
+
# Parse the YAML frontmatter from +source_text+ and return it as a Hash.
|
|
21
|
+
#
|
|
22
|
+
# Returns an empty Hash when no frontmatter is present or when the YAML
|
|
23
|
+
# is malformed.
|
|
24
|
+
#
|
|
25
|
+
# @param source_text [String]
|
|
26
|
+
# @return [Hash]
|
|
27
|
+
def extract source_text
|
|
28
|
+
match = source_text.match(REGEXP)
|
|
29
|
+
return {} unless match
|
|
30
|
+
|
|
31
|
+
frontmatter_payload = match[1].sub(/\A---\s*\n/, '')
|
|
32
|
+
parsed = YAML.safe_load(frontmatter_payload, aliases: true)
|
|
33
|
+
parsed.is_a?(Hash) ? parsed : {}
|
|
34
|
+
rescue Psych::SyntaxError
|
|
35
|
+
{}
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Return +source_text+ with the leading YAML frontmatter block removed.
|
|
39
|
+
#
|
|
40
|
+
# @param source_text [String]
|
|
41
|
+
# @return [String]
|
|
42
|
+
def strip source_text
|
|
43
|
+
source_text.sub(REGEXP, '')
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
data/lib/sourcerer.rb
CHANGED
|
@@ -17,11 +17,18 @@ require_relative 'sourcerer/yaml'
|
|
|
17
17
|
# Requiring `sourcerer` also makes adjacent public constants (for example,
|
|
18
18
|
# `Sourcerer::Builder`) available to downstream callers.
|
|
19
19
|
module Sourcerer
|
|
20
|
+
# File extensions recognised as Markdown source files.
|
|
21
|
+
MARKDOWN_EXTS = %w[.md .markdown].freeze
|
|
22
|
+
|
|
23
|
+
# File extensions recognised as AsciiDoc source files.
|
|
24
|
+
ASCIIDOC_EXTS = %w[.adoc .asciidoc .asc .ad].freeze
|
|
25
|
+
|
|
20
26
|
autoload :AttributesFilter, 'sourcerer/attributes_filter'
|
|
21
|
-
autoload :
|
|
22
|
-
autoload :
|
|
23
|
-
autoload :
|
|
24
|
-
autoload :
|
|
27
|
+
autoload :YamlFrontmatter, 'sourcerer/yaml_frontmatter'
|
|
28
|
+
autoload :Jekyll, 'sourcerer/jekyll'
|
|
29
|
+
autoload :MarkDownGrade, 'sourcerer/mark_down_grade'
|
|
30
|
+
autoload :SourceSkim, 'sourcerer/source_skim'
|
|
31
|
+
autoload :Sync, 'sourcerer/sync'
|
|
25
32
|
|
|
26
33
|
DEPRECATED_FACADE_METHODS = {
|
|
27
34
|
# DO NOT add new public methods to this surface
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
= FrontmatterReader: PRD
|
|
2
|
+
:status: implemented
|
|
3
|
+
:version: 0.3.0
|
|
4
|
+
|
|
5
|
+
== Overview
|
|
6
|
+
|
|
7
|
+
`Sourcerer::SourceSkim` needs to incorporate a simpler Skim object that derives data/content from frontmatter YAML and section headings.
|
|
8
|
+
|
|
9
|
+
`SourceSkim` currently generates structured, semantic metadata and content outline about AsciiDoc source files.
|
|
10
|
+
But many use cases (Jekyll site pipelines, documentation indexers, cross-repo search tools) need to work with Markdown files, as well:
|
|
11
|
+
|
|
12
|
+
* Frontmatter data (layout, title, navigation hints, etc.)
|
|
13
|
+
* Section headings and their levels:
|
|
14
|
+
** a flat list of section headings and their line numbers, and/or
|
|
15
|
+
** a nested hierarchy of sections and subsections.
|
|
16
|
+
|
|
17
|
+
== Output Shape
|
|
18
|
+
|
|
19
|
+
The core operation yields a skim in one of the following configurations:
|
|
20
|
+
|
|
21
|
+
[source,ruby]
|
|
22
|
+
----
|
|
23
|
+
{
|
|
24
|
+
title: 'Introduction', # String — from first # heading
|
|
25
|
+
frontmatter: { 'layout' => 'docs', 'title' => 'Guide', ... }, # Hash
|
|
26
|
+
sections_flat: [ # Array
|
|
27
|
+
{ text: 'Getting Started', level: 1, starts_at: 13 },
|
|
28
|
+
{ text: 'Prerequisites', level: 2, starts_at: 17 },
|
|
29
|
+
...
|
|
30
|
+
]
|
|
31
|
+
}
|
|
32
|
+
----
|
|
33
|
+
|
|
34
|
+
[source,ruby]
|
|
35
|
+
----
|
|
36
|
+
{
|
|
37
|
+
title: 'Introduction', # String
|
|
38
|
+
frontmatter: { 'layout' => 'docs', 'title' => 'Guide', ... }, # Hash
|
|
39
|
+
sections_tree: [ # Array (same shape as AsciiDoc)
|
|
40
|
+
{ text: 'Getting Started', level: 1, starts_at: 13, sections: [
|
|
41
|
+
{ text: 'Prerequisites', level: 2, starts_at: 17, sections: [] }
|
|
42
|
+
] },
|
|
43
|
+
{ text: 'Advanced Usage', level: 1, starts_at: 22, sections: [] },
|
|
44
|
+
...
|
|
45
|
+
]
|
|
46
|
+
}
|
|
47
|
+
----
|
metadata
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: asciisourcerer
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- DocOps Lab
|
|
8
|
+
autorequire:
|
|
8
9
|
bindir: bin
|
|
9
10
|
cert_chain: []
|
|
10
|
-
date:
|
|
11
|
+
date: 2026-04-01 00:00:00.000000000 Z
|
|
11
12
|
dependencies:
|
|
12
13
|
- !ruby/object:Gem::Dependency
|
|
13
14
|
name: asciidoctor
|
|
@@ -122,7 +123,6 @@ files:
|
|
|
122
123
|
- lib/asciisourcerer.rb
|
|
123
124
|
- lib/sourcerer.rb
|
|
124
125
|
- lib/sourcerer/asciidoc.rb
|
|
125
|
-
- lib/sourcerer/attributes_filter.rb
|
|
126
126
|
- lib/sourcerer/builder.rb
|
|
127
127
|
- lib/sourcerer/jekyll.rb
|
|
128
128
|
- lib/sourcerer/jekyll/bootstrapper.rb
|
|
@@ -135,6 +135,7 @@ files:
|
|
|
135
135
|
- lib/sourcerer/rendering.rb
|
|
136
136
|
- lib/sourcerer/source_skim.rb
|
|
137
137
|
- lib/sourcerer/source_skim/config.rb
|
|
138
|
+
- lib/sourcerer/source_skim/markdown_skimmer.rb
|
|
138
139
|
- lib/sourcerer/source_skim/skimmer.rb
|
|
139
140
|
- lib/sourcerer/sync.rb
|
|
140
141
|
- lib/sourcerer/sync/block_parser.rb
|
|
@@ -144,12 +145,15 @@ files:
|
|
|
144
145
|
- lib/sourcerer/util/pathifier.rb
|
|
145
146
|
- lib/sourcerer/version.rb
|
|
146
147
|
- lib/sourcerer/yaml.rb
|
|
148
|
+
- lib/sourcerer/yaml_frontmatter.rb
|
|
149
|
+
- specs/docs/frontmatter-reader_prd.adoc
|
|
147
150
|
homepage: https://github.com/DocOps/asciisourcerer
|
|
148
151
|
licenses:
|
|
149
152
|
- MIT
|
|
150
153
|
metadata:
|
|
151
154
|
allowed_push_host: https://rubygems.org
|
|
152
155
|
rubygems_mfa_required: 'true'
|
|
156
|
+
post_install_message:
|
|
153
157
|
rdoc_options: []
|
|
154
158
|
require_paths:
|
|
155
159
|
- lib
|
|
@@ -164,7 +168,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
164
168
|
- !ruby/object:Gem::Version
|
|
165
169
|
version: '0'
|
|
166
170
|
requirements: []
|
|
167
|
-
rubygems_version: 3.
|
|
171
|
+
rubygems_version: 3.4.19
|
|
172
|
+
signing_key:
|
|
168
173
|
specification_version: 4
|
|
169
174
|
summary: APIs for specialized handling of AsciiDoc, YAML, and Liquid documents.
|
|
170
175
|
test_files: []
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'asciidoctor'
|
|
4
|
-
|
|
5
|
-
module Sourcerer
|
|
6
|
-
# Utilities for filtering and partitioning Asciidoctor document attributes.
|
|
7
|
-
#
|
|
8
|
-
# The primary use case is separating user-defined ("custom") attributes from
|
|
9
|
-
# those injected by Asciidoctor at parse time ("built-in"). This distinction
|
|
10
|
-
# matters when a skim consumer needs to inspect only the attributes an author
|
|
11
|
-
# explicitly set in their source.
|
|
12
|
-
#
|
|
13
|
-
# Additional attribute manipulation helpers may be added here over time.
|
|
14
|
-
#
|
|
15
|
-
# @example
|
|
16
|
-
# custom = Sourcerer::AttributesFilter.user_attributes(doc)
|
|
17
|
-
# builtin = Sourcerer::AttributesFilter.builtin_attributes(doc)
|
|
18
|
-
module AttributesFilter
|
|
19
|
-
# Attribute keys injected by Asciidoctor at parse time rather than defined
|
|
20
|
-
# by the document author.
|
|
21
|
-
BUILTIN_ATTR_KEYS = (Asciidoctor::DEFAULT_ATTRIBUTES.keys + %w[
|
|
22
|
-
asciidoctor asciidoctor-version
|
|
23
|
-
attribute-missing attribute-undefined
|
|
24
|
-
authorcount
|
|
25
|
-
docdate docdatetime docdir docfile docfilesuffix docname doctime doctitle doctype docyear
|
|
26
|
-
embedded
|
|
27
|
-
htmlsyntax
|
|
28
|
-
iconsdir
|
|
29
|
-
localdate localdatetime localtime localyear
|
|
30
|
-
max-include-depth
|
|
31
|
-
notitle
|
|
32
|
-
outfilesuffix
|
|
33
|
-
stylesdir
|
|
34
|
-
toc-position
|
|
35
|
-
user-home
|
|
36
|
-
]).freeze
|
|
37
|
-
|
|
38
|
-
BUILTIN_ATTR_PATTERNS = [
|
|
39
|
-
/^backend(-|$)/,
|
|
40
|
-
/^basebackend(-|$)/,
|
|
41
|
-
/^doctype-/,
|
|
42
|
-
/^filetype(-|$)/,
|
|
43
|
-
/^safe-mode-/
|
|
44
|
-
].freeze
|
|
45
|
-
|
|
46
|
-
module_function
|
|
47
|
-
|
|
48
|
-
# Returns a hash of user-defined attributes, excluding any key that belongs
|
|
49
|
-
# to Asciidoctor's built-in set.
|
|
50
|
-
#
|
|
51
|
-
# @param doc [Asciidoctor::Document]
|
|
52
|
-
# @return [Hash{String => String}]
|
|
53
|
-
def user_attributes doc
|
|
54
|
-
doc.attributes.reject do |k, _|
|
|
55
|
-
BUILTIN_ATTR_KEYS.include?(k) ||
|
|
56
|
-
BUILTIN_ATTR_PATTERNS.any? { |pat| pat.match?(k) }
|
|
57
|
-
end
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
# Returns a hash of built-in Asciidoctor attributes, i.e., those injected at
|
|
61
|
-
# parse time rather than authored in the document.
|
|
62
|
-
#
|
|
63
|
-
# @param doc [Asciidoctor::Document]
|
|
64
|
-
# @return [Hash{String => String}]
|
|
65
|
-
def builtin_attributes doc
|
|
66
|
-
doc.attributes.select do |k, _|
|
|
67
|
-
BUILTIN_ATTR_KEYS.include?(k) ||
|
|
68
|
-
BUILTIN_ATTR_PATTERNS.any? { |pat| pat.match?(k) }
|
|
69
|
-
end
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
end
|