jekyll-aeo 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/jekyll-aeo/commands/validate.rb +128 -0
- data/lib/jekyll-aeo/config.rb +86 -0
- data/lib/jekyll-aeo/generators/domain_profile.rb +77 -0
- data/lib/jekyll-aeo/generators/llms_full_txt.rb +68 -0
- data/lib/jekyll-aeo/generators/llms_txt.rb +165 -0
- data/lib/jekyll-aeo/generators/markdown_page.rb +148 -0
- data/lib/jekyll-aeo/generators/robots_txt.rb +67 -0
- data/lib/jekyll-aeo/generators/url_map.rb +170 -0
- data/lib/jekyll-aeo/hooks.rb +32 -0
- data/lib/jekyll-aeo/link_tag.rb +27 -0
- data/lib/jekyll-aeo/schema/article.rb +56 -0
- data/lib/jekyll-aeo/schema/breadcrumb_list.rb +46 -0
- data/lib/jekyll-aeo/schema/faq_page.rb +33 -0
- data/lib/jekyll-aeo/schema/how_to.rb +40 -0
- data/lib/jekyll-aeo/schema/organization.rb +34 -0
- data/lib/jekyll-aeo/schema/speakable.rb +31 -0
- data/lib/jekyll-aeo/tags/aeo_json_ld.rb +33 -0
- data/lib/jekyll-aeo/utils/content_stripper.rb +122 -0
- data/lib/jekyll-aeo/utils/html_converter.rb +35 -0
- data/lib/jekyll-aeo/utils/md_url.rb +32 -0
- data/lib/jekyll-aeo/utils/skip_logic.rb +66 -0
- data/lib/jekyll-aeo/version.rb +5 -0
- data/lib/jekyll-aeo.rb +25 -0
- metadata +153 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 2f7c4d9f203c1e2bc36c2f3c1547d4f6056ed77cd07d250a25ee186a46bd7891
|
|
4
|
+
data.tar.gz: 88f985556de2aae0ce6a2d0c57fb0be9be36c7a2b8f275910f7ff06e9f04bd3a
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 0110becea058f84e0c28790655d695f95d3f4394003795a7c1dafef7ca0c56fcecef12b3e1dfdbcc7a1bd4f194cc5bf5db3b60b0623e98642123265b2e9436b1
|
|
7
|
+
data.tar.gz: ffaa6971e9bd344a1bb30775e00653dfe0fff53cf8ff4ec8adfcf32899c88df87003fa0b8e3bbd5e9e77507b8cae9b6a932f81adc880a3e6fcb2a6b4685094be
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module JekyllAeo
|
|
6
|
+
module Commands
|
|
7
|
+
class Validate < Jekyll::Command
|
|
8
|
+
class << self
|
|
9
|
+
def init_with_program(prog)
|
|
10
|
+
prog.command(:"aeo:validate") do |c|
|
|
11
|
+
c.syntax "aeo:validate [options]"
|
|
12
|
+
c.description "Validate AEO output files (llms.txt, llms-full.txt, and referenced .md files)"
|
|
13
|
+
|
|
14
|
+
c.option "destination", "-d", "--destination DESTINATION",
|
|
15
|
+
"The destination directory to validate"
|
|
16
|
+
c.option "source", "-s", "--source SOURCE", "Custom source directory"
|
|
17
|
+
c.option "config", "--config CONFIG_FILE[,CONFIG_FILE2,...]", Array,
|
|
18
|
+
"Custom configuration file"
|
|
19
|
+
|
|
20
|
+
c.action do |_args, options|
|
|
21
|
+
JekyllAeo::Commands::Validate.process(options)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def process(options)
|
|
27
|
+
options = configuration_from_options(options)
|
|
28
|
+
dest = options["destination"]
|
|
29
|
+
baseurl = options["baseurl"].to_s.chomp("/")
|
|
30
|
+
errors, warnings = validate(dest, baseurl)
|
|
31
|
+
report(errors, warnings)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def validate(dest, baseurl = "")
|
|
35
|
+
errors = []
|
|
36
|
+
warnings = []
|
|
37
|
+
validate_llms_txt(dest, errors)
|
|
38
|
+
validate_llms_full_txt(dest, errors)
|
|
39
|
+
validate_md_references(dest, baseurl, errors)
|
|
40
|
+
validate_domain_profile(dest, errors, warnings)
|
|
41
|
+
[errors, warnings]
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
def validate_llms_txt(dest, errors)
|
|
47
|
+
path = File.join(dest, "llms.txt")
|
|
48
|
+
unless File.exist?(path)
|
|
49
|
+
errors << "llms.txt not found at #{path}"
|
|
50
|
+
return
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
content = File.read(path, encoding: "utf-8")
|
|
54
|
+
return if content.start_with?("# ")
|
|
55
|
+
|
|
56
|
+
errors << "llms.txt does not start with an H1 heading (# )"
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def validate_llms_full_txt(dest, errors)
|
|
60
|
+
path = File.join(dest, "llms-full.txt")
|
|
61
|
+
return unless File.exist?(path)
|
|
62
|
+
|
|
63
|
+
return unless File.empty?(path)
|
|
64
|
+
|
|
65
|
+
errors << "llms-full.txt is empty"
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def validate_md_references(dest, baseurl, errors)
|
|
69
|
+
llms_path = File.join(dest, "llms.txt")
|
|
70
|
+
return unless File.exist?(llms_path)
|
|
71
|
+
|
|
72
|
+
content = File.read(llms_path, encoding: "utf-8")
|
|
73
|
+
md_urls = content.scan(/\[.*?\]\(([^)]*\.md)\)/).flatten
|
|
74
|
+
|
|
75
|
+
md_urls.each do |url|
|
|
76
|
+
relative_url = if !baseurl.empty? && url.start_with?(baseurl)
|
|
77
|
+
url.delete_prefix(baseurl)
|
|
78
|
+
else
|
|
79
|
+
url
|
|
80
|
+
end
|
|
81
|
+
file_path = File.join(dest, relative_url)
|
|
82
|
+
errors << "Referenced file not found: #{url} (expected at #{file_path})" unless File.exist?(file_path)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def validate_domain_profile(dest, errors, warnings)
|
|
87
|
+
path = File.join(dest, ".well-known", "domain-profile.json")
|
|
88
|
+
return unless File.exist?(path)
|
|
89
|
+
|
|
90
|
+
begin
|
|
91
|
+
data = JSON.parse(File.read(path, encoding: "utf-8"))
|
|
92
|
+
rescue JSON::ParserError => e
|
|
93
|
+
errors << "domain-profile.json is not valid JSON: #{e.message}"
|
|
94
|
+
return
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
%w[spec name description website contact].each do |field|
|
|
98
|
+
value = data[field]
|
|
99
|
+
if value.nil? || (value.is_a?(String) && value.empty?)
|
|
100
|
+
errors << "domain-profile.json missing required field: #{field}"
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
entity_type = data["entity_type"]
|
|
105
|
+
return unless entity_type && !JekyllAeo::Generators::DomainProfile::VALID_ENTITY_TYPES.include?(entity_type)
|
|
106
|
+
|
|
107
|
+
warnings << "domain-profile.json has invalid entity_type: #{entity_type}"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def report(errors, warnings)
|
|
111
|
+
if errors.empty? && warnings.empty?
|
|
112
|
+
Jekyll.logger.info "AEO Validate:", "All checks passed!"
|
|
113
|
+
return
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
warnings.each { |w| Jekyll.logger.warn "AEO Warning:", w }
|
|
117
|
+
errors.each { |e| Jekyll.logger.error "AEO Error:", e }
|
|
118
|
+
|
|
119
|
+
summary_parts = []
|
|
120
|
+
summary_parts << "#{errors.size} error(s)" unless errors.empty?
|
|
121
|
+
summary_parts << "#{warnings.size} warning(s)" unless warnings.empty?
|
|
122
|
+
Jekyll.logger.info "AEO Validate:", summary_parts.join(", ")
|
|
123
|
+
abort if errors.any?
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAeo
|
|
4
|
+
module Config
|
|
5
|
+
DEFAULTS = {
|
|
6
|
+
"enabled" => true,
|
|
7
|
+
"exclude" => [],
|
|
8
|
+
"dotmd" => {
|
|
9
|
+
"link_tag" => "auto",
|
|
10
|
+
"include_last_modified" => true,
|
|
11
|
+
"dotmd_metadata" => false,
|
|
12
|
+
"md2dotmd" => {
|
|
13
|
+
"strip_block_tags" => true,
|
|
14
|
+
"protect_indented_code" => false
|
|
15
|
+
},
|
|
16
|
+
"html2dotmd" => {
|
|
17
|
+
"enabled" => false,
|
|
18
|
+
"selector" => nil
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
"llms_txt" => {
|
|
22
|
+
"enabled" => true,
|
|
23
|
+
"description" => nil,
|
|
24
|
+
"sections" => nil,
|
|
25
|
+
"front_matter_keys" => [],
|
|
26
|
+
"show_lastmod" => false,
|
|
27
|
+
"include_descriptions" => true
|
|
28
|
+
},
|
|
29
|
+
"llms_full_txt" => {
|
|
30
|
+
"enabled" => true,
|
|
31
|
+
"description" => nil,
|
|
32
|
+
"full_txt_mode" => "all"
|
|
33
|
+
},
|
|
34
|
+
"url_map" => {
|
|
35
|
+
"enabled" => false,
|
|
36
|
+
"output_filepath" => "docs/Url-Map.md",
|
|
37
|
+
"columns" => %w[page_id url lang layout path redirects markdown_copy skipped],
|
|
38
|
+
"show_created_at" => true
|
|
39
|
+
},
|
|
40
|
+
"robots_txt" => {
|
|
41
|
+
"enabled" => false,
|
|
42
|
+
"allow" => %w[Googlebot Bingbot OAI-SearchBot ChatGPT-User Claude-SearchBot
|
|
43
|
+
Claude-User PerplexityBot Applebot-Extended],
|
|
44
|
+
"disallow" => %w[GPTBot ClaudeBot Google-Extended Meta-ExternalAgent Amazonbot],
|
|
45
|
+
"include_sitemap" => true,
|
|
46
|
+
"include_llms_txt" => true,
|
|
47
|
+
"custom_rules" => []
|
|
48
|
+
},
|
|
49
|
+
"domain_profile" => {
|
|
50
|
+
"enabled" => false,
|
|
51
|
+
"name" => nil,
|
|
52
|
+
"description" => nil,
|
|
53
|
+
"website" => nil,
|
|
54
|
+
"contact" => nil,
|
|
55
|
+
"logo" => nil,
|
|
56
|
+
"entity_type" => nil,
|
|
57
|
+
"jsonld" => nil
|
|
58
|
+
}
|
|
59
|
+
}.freeze
|
|
60
|
+
|
|
61
|
+
def self.from_site(site)
|
|
62
|
+
user_config = site.config["jekyll_aeo"] || {}
|
|
63
|
+
deep_merge(DEFAULTS, user_config)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Strict-schema merge: iterates only over keys defined in +defaults+,
|
|
67
|
+
# intentionally dropping any user-supplied keys that are not part of
|
|
68
|
+
# the known schema. This prevents typos from silently propagating and
|
|
69
|
+
# keeps the config surface predictable for downstream consumers.
|
|
70
|
+
def self.deep_merge(defaults, overrides)
|
|
71
|
+
defaults.each_with_object({}) do |(key, default_val), result|
|
|
72
|
+
override_val = overrides[key]
|
|
73
|
+
|
|
74
|
+
result[key] = if default_val.is_a?(Hash) && override_val.is_a?(Hash)
|
|
75
|
+
deep_merge(default_val, override_val)
|
|
76
|
+
elsif overrides.key?(key)
|
|
77
|
+
override_val
|
|
78
|
+
else
|
|
79
|
+
default_val
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
private_class_method :deep_merge
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "fileutils"
|
|
5
|
+
|
|
6
|
+
module JekyllAeo
|
|
7
|
+
module Generators
|
|
8
|
+
module DomainProfile
|
|
9
|
+
SPEC_URL = "https://ai-domain-data.org/spec/v0.1"
|
|
10
|
+
|
|
11
|
+
VALID_ENTITY_TYPES = %w[
|
|
12
|
+
Organization Person Blog NGO Community
|
|
13
|
+
Project CreativeWork SoftwareApplication Thing
|
|
14
|
+
].freeze
|
|
15
|
+
|
|
16
|
+
def self.generate(site)
|
|
17
|
+
config = JekyllAeo::Config.from_site(site)
|
|
18
|
+
return if config["enabled"] == false
|
|
19
|
+
|
|
20
|
+
dp_config = config["domain_profile"] || {}
|
|
21
|
+
return if dp_config["enabled"] == false
|
|
22
|
+
|
|
23
|
+
profile = build_profile(dp_config, site)
|
|
24
|
+
return unless profile
|
|
25
|
+
|
|
26
|
+
output_path = File.join(site.dest, ".well-known", "domain-profile.json")
|
|
27
|
+
FileUtils.mkdir_p(File.dirname(output_path))
|
|
28
|
+
File.write(output_path, "#{JSON.pretty_generate(profile)}\n")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def self.build_profile(dp_config, site)
|
|
32
|
+
name = dp_config["name"] || site.config["title"] || site.config["name"]
|
|
33
|
+
description = dp_config["description"] || site.config["description"]
|
|
34
|
+
website = dp_config["website"] || site.config["url"]
|
|
35
|
+
contact = dp_config["contact"]
|
|
36
|
+
|
|
37
|
+
missing = { "contact" => contact, "name" => name, "description" => description, "website" => website }
|
|
38
|
+
.select { |_, v| v.nil? || v.to_s.empty? }
|
|
39
|
+
.keys
|
|
40
|
+
unless missing.empty?
|
|
41
|
+
Jekyll.logger.warn "AEO Domain Profile:",
|
|
42
|
+
"Skipped — required field(s) missing: #{missing.join(', ')}"
|
|
43
|
+
return nil
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
profile = {
|
|
47
|
+
"spec" => SPEC_URL,
|
|
48
|
+
"name" => name.to_s,
|
|
49
|
+
"description" => description.to_s,
|
|
50
|
+
"website" => website.to_s,
|
|
51
|
+
"contact" => contact.to_s
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
add_optional_fields(profile, dp_config)
|
|
55
|
+
profile
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def self.add_optional_fields(profile, dp_config)
|
|
59
|
+
entity_type = dp_config["entity_type"]
|
|
60
|
+
if entity_type
|
|
61
|
+
if VALID_ENTITY_TYPES.include?(entity_type)
|
|
62
|
+
profile["entity_type"] = entity_type
|
|
63
|
+
else
|
|
64
|
+
Jekyll.logger.warn "AEO Domain Profile:",
|
|
65
|
+
"Invalid entity_type '#{entity_type}' — ignored. " \
|
|
66
|
+
"Valid: #{VALID_ENTITY_TYPES.join(', ')}"
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
profile["logo"] = dp_config["logo"] if dp_config["logo"]
|
|
70
|
+
jsonld = dp_config["jsonld"]
|
|
71
|
+
profile["jsonld"] = jsonld if jsonld.is_a?(Hash)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
private_class_method :build_profile, :add_optional_fields
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
|
|
5
|
+
module JekyllAeo
|
|
6
|
+
module Generators
|
|
7
|
+
module LlmsFullTxt
|
|
8
|
+
def self.generate(site)
|
|
9
|
+
config = JekyllAeo::Config.from_site(site)
|
|
10
|
+
return if config["enabled"] == false
|
|
11
|
+
|
|
12
|
+
full_config = config["llms_full_txt"] || {}
|
|
13
|
+
return if full_config["enabled"] == false
|
|
14
|
+
|
|
15
|
+
warn_deprecated_full_txt_mode(site.config["jekyll_aeo"])
|
|
16
|
+
|
|
17
|
+
llms_config = config["llms_txt"] || {}
|
|
18
|
+
eligible = LlmsTxt.collect_eligible(site, config)
|
|
19
|
+
sections = LlmsTxt.build_sections(eligible, llms_config)
|
|
20
|
+
|
|
21
|
+
content = build_content(site, sections, eligible, full_config)
|
|
22
|
+
File.write(File.join(site.dest, "llms-full.txt"), content)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def self.build_content(site, sections, eligible, full_config)
|
|
26
|
+
mode = full_config.fetch("full_txt_mode", "all")
|
|
27
|
+
|
|
28
|
+
items_to_include = if mode == "linked"
|
|
29
|
+
sections.flat_map { |s| s[:items] }
|
|
30
|
+
else
|
|
31
|
+
eligible
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
lines = []
|
|
35
|
+
lines << "# #{site.config['title']}"
|
|
36
|
+
lines << ""
|
|
37
|
+
|
|
38
|
+
description = full_config["description"] || site.config["description"]
|
|
39
|
+
if description && !description.to_s.empty?
|
|
40
|
+
lines << "> #{description}"
|
|
41
|
+
lines << ""
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
items_to_include.each do |item|
|
|
45
|
+
next unless File.exist?(item[:dest_md])
|
|
46
|
+
|
|
47
|
+
content = File.read(item[:dest_md], encoding: "utf-8")
|
|
48
|
+
lines << "---"
|
|
49
|
+
lines << ""
|
|
50
|
+
lines << content.strip
|
|
51
|
+
lines << ""
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
"#{lines.join("\n").rstrip}\n"
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def self.warn_deprecated_full_txt_mode(user_aeo_config)
|
|
58
|
+
user_aeo_config ||= {}
|
|
59
|
+
llms_txt_config = user_aeo_config["llms_txt"] || {}
|
|
60
|
+
return unless llms_txt_config.key?("full_txt_mode")
|
|
61
|
+
|
|
62
|
+
Jekyll.logger.warn "AEO:", "'llms_txt.full_txt_mode' has moved to 'llms_full_txt.full_txt_mode'"
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private_class_method :build_content, :warn_deprecated_full_txt_mode
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
|
|
5
|
+
module JekyllAeo
|
|
6
|
+
module Generators
|
|
7
|
+
module LlmsTxt
|
|
8
|
+
def self.generate(site)
|
|
9
|
+
config = JekyllAeo::Config.from_site(site)
|
|
10
|
+
return if config["enabled"] == false
|
|
11
|
+
|
|
12
|
+
llms_config = config["llms_txt"] || {}
|
|
13
|
+
return if llms_config["enabled"] == false
|
|
14
|
+
|
|
15
|
+
eligible = collect_eligible(site, config)
|
|
16
|
+
sections = build_sections(eligible, llms_config)
|
|
17
|
+
|
|
18
|
+
llms_txt = build_llms_txt(site, sections, llms_config, config)
|
|
19
|
+
File.write(File.join(site.dest, "llms.txt"), llms_txt)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def self.collect_eligible(site, config)
|
|
23
|
+
items = []
|
|
24
|
+
|
|
25
|
+
site.documents.each do |doc|
|
|
26
|
+
next if JekyllAeo::Utils::SkipLogic.skip?(doc, site, config)
|
|
27
|
+
|
|
28
|
+
items << {
|
|
29
|
+
obj: doc,
|
|
30
|
+
title: doc.data["title"] || "",
|
|
31
|
+
description: doc.data["description"] || "",
|
|
32
|
+
url: doc.url,
|
|
33
|
+
collection: doc.collection&.label,
|
|
34
|
+
dest_md: md_dest_path(doc, site)
|
|
35
|
+
}
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
site.pages.each do |page|
|
|
39
|
+
next if JekyllAeo::Utils::SkipLogic.skip?(page, site, config)
|
|
40
|
+
|
|
41
|
+
items << {
|
|
42
|
+
obj: page,
|
|
43
|
+
title: page.data["title"] || "",
|
|
44
|
+
description: page.data["description"] || "",
|
|
45
|
+
url: page.url,
|
|
46
|
+
collection: nil,
|
|
47
|
+
dest_md: md_dest_path(page, site)
|
|
48
|
+
}
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
items
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def self.build_sections(eligible, llms_config)
|
|
55
|
+
custom_sections = llms_config["sections"]
|
|
56
|
+
|
|
57
|
+
if custom_sections
|
|
58
|
+
build_custom_sections(eligible, custom_sections)
|
|
59
|
+
else
|
|
60
|
+
build_auto_sections(eligible)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def self.md_dest_path(obj, site)
|
|
65
|
+
JekyllAeo::Utils::MdUrl.dest_path(obj, site)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def self.md_url(url, baseurl = "")
|
|
69
|
+
JekyllAeo::Utils::MdUrl.for(url, baseurl)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def self.build_custom_sections(eligible, section_defs)
|
|
73
|
+
section_defs.map do |section_def|
|
|
74
|
+
title = section_def["title"]
|
|
75
|
+
collection = section_def["collection"]
|
|
76
|
+
|
|
77
|
+
items = eligible.select do |item|
|
|
78
|
+
if collection.nil?
|
|
79
|
+
item[:collection].nil?
|
|
80
|
+
else
|
|
81
|
+
item[:collection] == collection
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
{ title: title, items: items }
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def self.build_auto_sections(eligible)
|
|
90
|
+
grouped = eligible.group_by { |item| item[:collection] }
|
|
91
|
+
sections = []
|
|
92
|
+
|
|
93
|
+
# Standalone pages first
|
|
94
|
+
sections << { title: "Pages", items: grouped.delete(nil) } if grouped.key?(nil)
|
|
95
|
+
|
|
96
|
+
# Collections alphabetically, "Optional" last
|
|
97
|
+
sorted_keys = grouped.keys.compact.sort_by do |key|
|
|
98
|
+
key == "optional" ? "zzz" : key
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
sorted_keys.each do |key|
|
|
102
|
+
sections << { title: titleize(key), items: grouped[key] }
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
sections
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def self.titleize(label)
|
|
109
|
+
case label
|
|
110
|
+
when "posts"
|
|
111
|
+
"Blog Posts"
|
|
112
|
+
else
|
|
113
|
+
label.split(/[_-]/).map(&:capitalize).join(" ")
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def self.build_llms_txt(site, sections, llms_config, config)
|
|
118
|
+
lines = []
|
|
119
|
+
lines << "# #{site.config['title']}"
|
|
120
|
+
lines << ""
|
|
121
|
+
|
|
122
|
+
description = llms_config["description"] || site.config["description"]
|
|
123
|
+
lines.push("> #{description}", "") if description && !description.to_s.empty?
|
|
124
|
+
|
|
125
|
+
append_full_txt_link(lines, site, config)
|
|
126
|
+
append_sections(lines, sections, llms_config, site)
|
|
127
|
+
|
|
128
|
+
"#{lines.join("\n").rstrip}\n"
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def self.append_full_txt_link(lines, site, config)
|
|
132
|
+
full_txt_config = config["llms_full_txt"] || {}
|
|
133
|
+
return if full_txt_config["enabled"] == false
|
|
134
|
+
|
|
135
|
+
baseurl = site.config["baseurl"].to_s.chomp("/")
|
|
136
|
+
lines << "- [llms-full.txt](#{baseurl}/llms-full.txt): Complete contents of all pages"
|
|
137
|
+
lines << ""
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def self.append_sections(lines, sections, llms_config, site)
|
|
141
|
+
sections.each do |section|
|
|
142
|
+
next if section[:items].empty?
|
|
143
|
+
|
|
144
|
+
lines << "## #{section[:title]}"
|
|
145
|
+
lines << ""
|
|
146
|
+
|
|
147
|
+
section[:items].each do |item|
|
|
148
|
+
url_md = md_url(item[:url], site.config["baseurl"])
|
|
149
|
+
entry = "- [#{item[:title]}](#{url_md})"
|
|
150
|
+
if llms_config["include_descriptions"] != false && !item[:description].empty?
|
|
151
|
+
entry += ": #{item[:description]}"
|
|
152
|
+
end
|
|
153
|
+
lines << entry
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
lines << ""
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
private_class_method :build_custom_sections, :build_auto_sections,
|
|
161
|
+
:titleize, :build_llms_txt, :append_full_txt_link,
|
|
162
|
+
:append_sections
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
|
|
5
|
+
module JekyllAeo
|
|
6
|
+
module Generators
|
|
7
|
+
module MarkdownPage
|
|
8
|
+
YAML_FRONT_MATTER_REGEXP = /\A(---\s*\n.*?\n?)^(---\s*$\n?)/m
|
|
9
|
+
|
|
10
|
+
def self.process(obj, site)
|
|
11
|
+
config = JekyllAeo::Config.from_site(site)
|
|
12
|
+
dotmd_config = config["dotmd"]
|
|
13
|
+
source_path = JekyllAeo::Utils::SkipLogic.resolve_source_path(obj, site)
|
|
14
|
+
return if JekyllAeo::Utils::SkipLogic.skip?(obj, site, config)
|
|
15
|
+
|
|
16
|
+
dest_path = md_dest_path(obj, site)
|
|
17
|
+
body = extract_body(source_path, obj, dotmd_config)
|
|
18
|
+
|
|
19
|
+
if dotmd_config["include_last_modified"] && File.exist?(source_path)
|
|
20
|
+
last_modified = resolve_last_modified(obj, source_path)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
if dotmd_config["dotmd_metadata"]
|
|
24
|
+
metadata = build_metadata_block(obj, site, config, last_modified)
|
|
25
|
+
header = build_header(obj, body, config, last_modified: nil)
|
|
26
|
+
result = metadata + header + body.lstrip
|
|
27
|
+
else
|
|
28
|
+
header = build_header(obj, body, config, last_modified: last_modified)
|
|
29
|
+
result = header + body.lstrip
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
result = result.gsub(/\n{3,}/, "\n\n")
|
|
33
|
+
result = "#{result.rstrip}\n"
|
|
34
|
+
|
|
35
|
+
FileUtils.mkdir_p(File.dirname(dest_path))
|
|
36
|
+
File.write(dest_path, result)
|
|
37
|
+
|
|
38
|
+
root_index = File.basename(dest_path) == "index.md" && File.dirname(dest_path) == site.dest
|
|
39
|
+
File.write(File.join(site.dest, "index.html.md"), result) if root_index
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def self.extract_body(source_path, obj, dotmd_config)
|
|
43
|
+
if File.exist?(source_path)
|
|
44
|
+
raw = File.read(source_path, encoding: "utf-8")
|
|
45
|
+
body = raw.sub(YAML_FRONT_MATTER_REGEXP, "")
|
|
46
|
+
JekyllAeo::Utils::ContentStripper.strip(body, dotmd_config["md2dotmd"])
|
|
47
|
+
else
|
|
48
|
+
JekyllAeo::Utils::HtmlConverter.convert(obj.output, dotmd_config["html2dotmd"])
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def self.build_header(obj, body, _config = nil, last_modified: nil)
|
|
53
|
+
header = ""
|
|
54
|
+
title = obj.data["title"]
|
|
55
|
+
|
|
56
|
+
header += "# #{title}\n\n" if title && !title.to_s.empty? && !body.lstrip.start_with?("# ")
|
|
57
|
+
|
|
58
|
+
description = obj.data["description"]
|
|
59
|
+
if description && !description.to_s.empty?
|
|
60
|
+
lines = description.to_s.split("\n")
|
|
61
|
+
header += if lines.length > 1
|
|
62
|
+
"#{lines.map { |l| "> #{l}" }.join("\n")}\n\n"
|
|
63
|
+
else
|
|
64
|
+
"> #{description}\n\n"
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
header += "> Last updated: #{last_modified}\n\n" if last_modified
|
|
69
|
+
|
|
70
|
+
header
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def self.resolve_last_modified(obj, source_path)
|
|
74
|
+
lm = obj.data["last_modified_at"]
|
|
75
|
+
return format_date(lm) if lm
|
|
76
|
+
|
|
77
|
+
date = obj.data["date"]
|
|
78
|
+
return format_date(date) if date
|
|
79
|
+
|
|
80
|
+
return File.mtime(source_path).strftime("%Y-%m-%d") if File.exist?(source_path)
|
|
81
|
+
|
|
82
|
+
nil
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def self.format_date(value)
|
|
86
|
+
case value
|
|
87
|
+
when Time, DateTime
|
|
88
|
+
value.strftime("%Y-%m-%d")
|
|
89
|
+
when String
|
|
90
|
+
begin
|
|
91
|
+
Date.parse(value).to_s
|
|
92
|
+
rescue StandardError
|
|
93
|
+
value
|
|
94
|
+
end
|
|
95
|
+
else
|
|
96
|
+
value.to_s
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
YAML_NEEDS_QUOTING = /[:\#"'{}\[\],&*?|<>=!%@`\n\r]/
|
|
101
|
+
|
|
102
|
+
def self.yaml_safe_scalar(value)
|
|
103
|
+
str = value.to_s
|
|
104
|
+
return str unless str.match?(YAML_NEEDS_QUOTING) || str.strip != str
|
|
105
|
+
|
|
106
|
+
escaped = str.gsub("\\", "\\\\\\\\").gsub('"', '\\"').gsub("\n", '\n')
|
|
107
|
+
"\"#{escaped}\""
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
SCALAR_FIELDS = %w[title description author lang].freeze
|
|
111
|
+
|
|
112
|
+
def self.build_metadata_block(obj, site, _config, last_modified)
|
|
113
|
+
lines = ["---"]
|
|
114
|
+
metadata_fields(obj, site, last_modified).each do |key, value|
|
|
115
|
+
next if value.nil? || value.to_s.empty?
|
|
116
|
+
|
|
117
|
+
lines << "#{key}: #{SCALAR_FIELDS.include?(key) ? yaml_safe_scalar(value) : value}"
|
|
118
|
+
end
|
|
119
|
+
lines << "---"
|
|
120
|
+
lines << ""
|
|
121
|
+
lines.join("\n")
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def self.metadata_fields(obj, site, last_modified)
|
|
125
|
+
canonical = obj.data["canonical_url"] ||
|
|
126
|
+
"#{site.config['url']}#{site.config['baseurl'].to_s.chomp('/')}#{obj.url}"
|
|
127
|
+
[
|
|
128
|
+
["title", obj.data["title"]],
|
|
129
|
+
["url", obj.url],
|
|
130
|
+
["canonical", canonical],
|
|
131
|
+
["description", obj.data["description"]],
|
|
132
|
+
["author", obj.data["author"]],
|
|
133
|
+
["date", obj.data["date"] ? format_date(obj.data["date"]) : nil],
|
|
134
|
+
["last_modified", last_modified],
|
|
135
|
+
["lang", obj.data["lang"] || obj.data["language"]]
|
|
136
|
+
]
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def self.md_dest_path(obj, site)
|
|
140
|
+
JekyllAeo::Utils::MdUrl.dest_path(obj, site)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
private_class_method :build_header, :md_dest_path, :resolve_last_modified,
|
|
144
|
+
:format_date, :build_metadata_block,
|
|
145
|
+
:yaml_safe_scalar, :extract_body, :metadata_fields
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|