jekyll-ai-visible-content 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +39 -0
- data/CHANGELOG.md +12 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +227 -0
- data/LICENSE.txt +21 -0
- data/README.md +352 -0
- data/Rakefile +9 -0
- data/jekyll-ai-visible-content.gemspec +29 -0
- data/lib/jekyll-ai-visible-content.rb +47 -0
- data/lib/jekyll_ai_visible_content/configuration.rb +154 -0
- data/lib/jekyll_ai_visible_content/entity/organization.rb +68 -0
- data/lib/jekyll_ai_visible_content/entity/person.rb +114 -0
- data/lib/jekyll_ai_visible_content/entity/registry.rb +94 -0
- data/lib/jekyll_ai_visible_content/filters/entity_filter.rb +29 -0
- data/lib/jekyll_ai_visible_content/filters/naming_filter.rb +27 -0
- data/lib/jekyll_ai_visible_content/generators/content_graph_generator.rb +69 -0
- data/lib/jekyll_ai_visible_content/generators/entity_map_generator.rb +65 -0
- data/lib/jekyll_ai_visible_content/generators/llms_txt_generator.rb +170 -0
- data/lib/jekyll_ai_visible_content/generators/robots_txt_generator.rb +57 -0
- data/lib/jekyll_ai_visible_content/hooks/post_render_hook.rb +82 -0
- data/lib/jekyll_ai_visible_content/hooks/validate_hook.rb +49 -0
- data/lib/jekyll_ai_visible_content/json_ld/blog_posting_schema.rb +104 -0
- data/lib/jekyll_ai_visible_content/json_ld/breadcrumb_schema.rb +69 -0
- data/lib/jekyll_ai_visible_content/json_ld/builder.rb +64 -0
- data/lib/jekyll_ai_visible_content/json_ld/collection_schema.rb +47 -0
- data/lib/jekyll_ai_visible_content/json_ld/faq_schema.rb +37 -0
- data/lib/jekyll_ai_visible_content/json_ld/how_to_schema.rb +42 -0
- data/lib/jekyll_ai_visible_content/json_ld/person_schema.rb +18 -0
- data/lib/jekyll_ai_visible_content/json_ld/website_schema.rb +39 -0
- data/lib/jekyll_ai_visible_content/tags/ai_author_tag.rb +26 -0
- data/lib/jekyll_ai_visible_content/tags/ai_breadcrumb_tag.rb +50 -0
- data/lib/jekyll_ai_visible_content/tags/ai_entity_link_tag.rb +40 -0
- data/lib/jekyll_ai_visible_content/tags/ai_json_ld_tag.rb +54 -0
- data/lib/jekyll_ai_visible_content/tags/ai_related_posts_tag.rb +91 -0
- data/lib/jekyll_ai_visible_content/validators/entity_consistency_validator.rb +94 -0
- data/lib/jekyll_ai_visible_content/validators/json_ld_validator.rb +58 -0
- data/lib/jekyll_ai_visible_content/validators/link_validator.rb +27 -0
- data/lib/jekyll_ai_visible_content/version.rb +5 -0
- metadata +107 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Generators
|
|
5
|
+
class EntityMapGenerator < Jekyll::Generator
|
|
6
|
+
safe true
|
|
7
|
+
priority :lowest
|
|
8
|
+
|
|
9
|
+
def generate(site)
|
|
10
|
+
config = JekyllAiVisibleContent.config(site)
|
|
11
|
+
return unless config.enabled?
|
|
12
|
+
|
|
13
|
+
registry = Entity::Registry.new(config)
|
|
14
|
+
scan_content(site, registry, config)
|
|
15
|
+
|
|
16
|
+
content = build_entity_map(config, registry)
|
|
17
|
+
page = Jekyll::PageWithoutAFile.new(site, site.source, '', 'entity-map.json')
|
|
18
|
+
page.content = content
|
|
19
|
+
page.data['layout'] = nil
|
|
20
|
+
page.data['sitemap'] = false
|
|
21
|
+
site.pages << page
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
def scan_content(site, registry, config)
|
|
27
|
+
all_docs = site.posts.docs + site.pages
|
|
28
|
+
entity_names = (config.entity['knows_about'] || []) + [config.entity['name']].compact
|
|
29
|
+
|
|
30
|
+
all_docs.each do |doc|
|
|
31
|
+
text = (doc.content || '').downcase
|
|
32
|
+
page_url = doc.url
|
|
33
|
+
|
|
34
|
+
entity_names.each do |name|
|
|
35
|
+
registry.record_mention(name, page_url) if text.include?(name.downcase)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def build_entity_map(config, registry)
|
|
41
|
+
primary = {
|
|
42
|
+
'id' => config.entity_id,
|
|
43
|
+
'type' => config.entity_type,
|
|
44
|
+
'name' => config.entity['name'],
|
|
45
|
+
'source_pages' => registry.pages_for(config.entity['name'] || ''),
|
|
46
|
+
'total_references' => registry.mention_count(config.entity['name'] || '')
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
topics = (config.entity['knows_about'] || []).map do |topic|
|
|
50
|
+
{
|
|
51
|
+
'name' => topic,
|
|
52
|
+
'mentions' => registry.mention_count(topic),
|
|
53
|
+
'linked_posts' => registry.pages_for(topic)
|
|
54
|
+
}
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
JSON.pretty_generate(
|
|
58
|
+
'primary_entity' => primary,
|
|
59
|
+
'entities' => topics,
|
|
60
|
+
'generated_at' => Time.now.iso8601
|
|
61
|
+
)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Generators
|
|
5
|
+
class LlmsTxtGenerator < Jekyll::Generator
|
|
6
|
+
safe true
|
|
7
|
+
priority :low
|
|
8
|
+
|
|
9
|
+
def generate(site)
|
|
10
|
+
config = JekyllAiVisibleContent.config(site)
|
|
11
|
+
return unless config.enabled? && config.llms_txt['enabled']
|
|
12
|
+
|
|
13
|
+
registry = Entity::Registry.new(config)
|
|
14
|
+
|
|
15
|
+
site.pages << build_llms_txt(site, config, registry)
|
|
16
|
+
site.pages << build_llms_full_txt(site, config, registry) if config.llms_txt['include_full_text']
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private
|
|
20
|
+
|
|
21
|
+
def build_llms_txt(site, config, registry)
|
|
22
|
+
content = render_llms_txt(config, registry, site, full: false)
|
|
23
|
+
make_page(site, 'llms.txt', content)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def build_llms_full_txt(site, config, registry)
|
|
27
|
+
content = render_llms_txt(config, registry, site, full: true)
|
|
28
|
+
make_page(site, 'llms-full.txt', content)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def render_llms_txt(config, registry, site, full:)
|
|
32
|
+
lines = []
|
|
33
|
+
title = config.llms_txt['title'] || config.site_title
|
|
34
|
+
description = config.llms_txt['description'] || config.site_description
|
|
35
|
+
|
|
36
|
+
lines << "# #{title}"
|
|
37
|
+
lines << ''
|
|
38
|
+
lines << "> #{description.strip}" if description && !description.strip.empty?
|
|
39
|
+
lines << ''
|
|
40
|
+
|
|
41
|
+
append_entity_section(lines, config, registry)
|
|
42
|
+
append_topics_section(lines, config)
|
|
43
|
+
append_custom_sections(lines, config)
|
|
44
|
+
append_posts_section(lines, config, site, full: full)
|
|
45
|
+
append_links_section(lines, config)
|
|
46
|
+
|
|
47
|
+
lines.join("\n")
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def append_entity_section(lines, config, _registry)
|
|
51
|
+
entity = config.entity
|
|
52
|
+
return unless entity['name']
|
|
53
|
+
|
|
54
|
+
lines << '## About'
|
|
55
|
+
lines << ''
|
|
56
|
+
lines << "#{entity['name']} is #{entity['description']&.strip}" if entity['description']
|
|
57
|
+
lines << ''
|
|
58
|
+
|
|
59
|
+
lines << "- Role: #{entity['job_title']}" if entity['job_title']
|
|
60
|
+
loc = entity['location']
|
|
61
|
+
lines << "- Location: #{[loc['locality'], loc['country']].compact.join(', ')}" if loc&.values&.any?
|
|
62
|
+
lines << ''
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def append_topics_section(lines, config)
|
|
66
|
+
topics = config.entity['knows_about']
|
|
67
|
+
return unless topics&.any?
|
|
68
|
+
|
|
69
|
+
lines << '## Key Topics'
|
|
70
|
+
lines << ''
|
|
71
|
+
topics.each { |t| lines << "- #{t}" }
|
|
72
|
+
lines << ''
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def append_custom_sections(lines, config)
|
|
76
|
+
sections = config.llms_txt['sections'] || []
|
|
77
|
+
sections.each do |section|
|
|
78
|
+
next unless section['heading']
|
|
79
|
+
|
|
80
|
+
lines << "## #{section['heading']}"
|
|
81
|
+
lines << ''
|
|
82
|
+
lines << section['content'].to_s.strip if section['content']
|
|
83
|
+
lines << ''
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def append_posts_section(lines, config, site, full:)
|
|
88
|
+
posts = sorted_posts(site)
|
|
89
|
+
return if posts.empty?
|
|
90
|
+
|
|
91
|
+
lines << '## Posts'
|
|
92
|
+
lines << ''
|
|
93
|
+
|
|
94
|
+
posts.each do |post|
|
|
95
|
+
url = "#{config.site_url}#{post.url}"
|
|
96
|
+
desc = post.data['description']&.to_s&.strip
|
|
97
|
+
|
|
98
|
+
if full
|
|
99
|
+
lines << "### #{post.data['title']}"
|
|
100
|
+
lines << ''
|
|
101
|
+
lines << "URL: #{url}"
|
|
102
|
+
lines << "Date: #{post.data['date']&.strftime('%Y-%m-%d')}" if post.data['date']
|
|
103
|
+
lines << ''
|
|
104
|
+
lines << strip_html_and_liquid(post.content) if post.content
|
|
105
|
+
lines << ''
|
|
106
|
+
lines << '---'
|
|
107
|
+
lines << ''
|
|
108
|
+
else
|
|
109
|
+
entry = "- [#{post.data['title']}](#{url})"
|
|
110
|
+
entry += ": #{desc}" if desc && !desc.empty?
|
|
111
|
+
lines << entry
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
lines << ''
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def append_links_section(lines, config)
|
|
119
|
+
lines << '## Links'
|
|
120
|
+
lines << ''
|
|
121
|
+
lines << "- About: #{config.site_url}/about/"
|
|
122
|
+
|
|
123
|
+
(config.entity['same_as'] || []).each do |link|
|
|
124
|
+
platform = extract_platform(link)
|
|
125
|
+
lines << "- #{platform}: #{link}"
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
lines << ''
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def sorted_posts(site)
|
|
132
|
+
site.posts.docs.sort_by { |p| p.data['date'] || Time.at(0) }.reverse
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def strip_html_and_liquid(text)
|
|
136
|
+
text.to_s
|
|
137
|
+
.gsub(/\{%.*?%\}/m, '')
|
|
138
|
+
.gsub(/\{\{.*?\}\}/m, '')
|
|
139
|
+
.gsub(/<[^>]+>/, '')
|
|
140
|
+
.gsub(/\n{3,}/, "\n\n")
|
|
141
|
+
.strip
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
def extract_platform(url)
|
|
145
|
+
case url
|
|
146
|
+
when /linkedin/i then 'LinkedIn'
|
|
147
|
+
when /github/i then 'GitHub'
|
|
148
|
+
when /twitter|x\.com/i then 'Twitter'
|
|
149
|
+
when /mastodon/i then 'Mastodon'
|
|
150
|
+
when /youtube/i then 'YouTube'
|
|
151
|
+
else
|
|
152
|
+
host = URI.parse(url).host
|
|
153
|
+
parts = host&.split('.')
|
|
154
|
+
name = parts && parts.length >= 2 ? parts[-2] : nil
|
|
155
|
+
name&.capitalize || 'Link'
|
|
156
|
+
end
|
|
157
|
+
rescue URI::InvalidURIError
|
|
158
|
+
'Link'
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def make_page(site, name, content)
|
|
162
|
+
page = Jekyll::PageWithoutAFile.new(site, site.source, '', name)
|
|
163
|
+
page.content = content
|
|
164
|
+
page.data['layout'] = nil
|
|
165
|
+
page.data['sitemap'] = false
|
|
166
|
+
page
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Generators
|
|
5
|
+
class RobotsTxtGenerator < Jekyll::Generator
|
|
6
|
+
safe true
|
|
7
|
+
priority :low
|
|
8
|
+
|
|
9
|
+
CRAWLER_MAP = {
|
|
10
|
+
'allow_gptbot' => 'GPTBot',
|
|
11
|
+
'allow_perplexitybot' => 'PerplexityBot',
|
|
12
|
+
'allow_claudebot' => 'ClaudeBot',
|
|
13
|
+
'allow_googlebot' => 'Googlebot',
|
|
14
|
+
'allow_bingbot' => 'Bingbot'
|
|
15
|
+
}.freeze
|
|
16
|
+
|
|
17
|
+
def generate(site)
|
|
18
|
+
config = JekyllAiVisibleContent.config(site)
|
|
19
|
+
return unless config.enabled? && config.crawlers['generate_robots_txt']
|
|
20
|
+
|
|
21
|
+
content = render_robots_txt(config)
|
|
22
|
+
page = Jekyll::PageWithoutAFile.new(site, site.source, '', 'robots.txt')
|
|
23
|
+
page.content = content
|
|
24
|
+
page.data['layout'] = nil
|
|
25
|
+
page.data['sitemap'] = false
|
|
26
|
+
site.pages << page
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
def render_robots_txt(config)
|
|
32
|
+
lines = []
|
|
33
|
+
lines << 'User-agent: *'
|
|
34
|
+
lines << 'Allow: /'
|
|
35
|
+
lines << ''
|
|
36
|
+
|
|
37
|
+
CRAWLER_MAP.each do |key, agent|
|
|
38
|
+
next unless config.crawlers[key]
|
|
39
|
+
|
|
40
|
+
lines << "User-agent: #{agent}"
|
|
41
|
+
lines << 'Allow: /'
|
|
42
|
+
lines << ''
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
(config.crawlers['custom_rules'] || []).each do |rule|
|
|
46
|
+
lines << "User-agent: #{rule['user_agent']}"
|
|
47
|
+
lines << "#{rule['directive']}: #{rule['path']}" if rule['directive'] && rule['path']
|
|
48
|
+
lines << ''
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
lines << "Sitemap: #{config.site_url}/sitemap.xml"
|
|
52
|
+
lines << ''
|
|
53
|
+
lines.join("\n")
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Hooks
|
|
5
|
+
module PostRenderHook
|
|
6
|
+
class << self
|
|
7
|
+
def register!
|
|
8
|
+
Jekyll::Hooks.register(:pages, :post_render) { |page| process(page) }
|
|
9
|
+
Jekyll::Hooks.register(:documents, :post_render) { |doc| process(doc) }
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
private
|
|
13
|
+
|
|
14
|
+
def process(doc)
|
|
15
|
+
return unless doc.output_ext == '.html'
|
|
16
|
+
return unless doc.output
|
|
17
|
+
|
|
18
|
+
site = doc.site
|
|
19
|
+
config = JekyllAiVisibleContent.config(site)
|
|
20
|
+
return unless config.enabled?
|
|
21
|
+
|
|
22
|
+
inject_json_ld(doc, config) if config.json_ld['auto_inject']
|
|
23
|
+
auto_link_entities(doc, config) if config.linking['enable_entity_links']
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def inject_json_ld(doc, config)
|
|
27
|
+
return if doc.output.include?('application/ld+json')
|
|
28
|
+
|
|
29
|
+
registry = Entity::Registry.new(config)
|
|
30
|
+
builder = JsonLd::Builder.new(config, registry)
|
|
31
|
+
|
|
32
|
+
nodes = builder.build_for_page(doc)
|
|
33
|
+
return if nodes.empty?
|
|
34
|
+
|
|
35
|
+
if config.seo_tag_present?
|
|
36
|
+
nodes.reject! { |n| n['@type'] == 'WebSite' }
|
|
37
|
+
return if nodes.empty?
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
script_tag = builder.to_script_tag(nodes)
|
|
41
|
+
doc.output = doc.output.sub('</head>', "#{script_tag}\n</head>")
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def auto_link_entities(doc, config)
|
|
45
|
+
return unless doc.output
|
|
46
|
+
|
|
47
|
+
registry = Entity::Registry.new(config)
|
|
48
|
+
definitions = registry.entity_definitions
|
|
49
|
+
max_per = config.linking['max_links_per_entity_per_post'] || 1
|
|
50
|
+
|
|
51
|
+
definitions.each_value do |defn|
|
|
52
|
+
name = defn['name']
|
|
53
|
+
url = defn['url']
|
|
54
|
+
next unless name && url
|
|
55
|
+
|
|
56
|
+
link_html = %(<a href="#{url}" itemprop="about" itemscope ) +
|
|
57
|
+
%(itemtype="https://schema.org/Thing"><span itemprop="name">#{name}</span></a>)
|
|
58
|
+
|
|
59
|
+
count = 0
|
|
60
|
+
doc.output = doc.output.gsub(/(?<=\s|>)#{Regexp.escape(name)}(?=[\s,.<])/i) do |match|
|
|
61
|
+
if count < max_per && !inside_tag?(doc.output, Regexp.last_match.begin(0))
|
|
62
|
+
count += 1
|
|
63
|
+
link_html
|
|
64
|
+
else
|
|
65
|
+
match
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def inside_tag?(html, position)
|
|
72
|
+
preceding = html[0...position]
|
|
73
|
+
last_open = preceding.rindex('<') || -1
|
|
74
|
+
last_close = preceding.rindex('>') || -1
|
|
75
|
+
last_open > last_close
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
JekyllAiVisibleContent::Hooks::PostRenderHook.register!
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Hooks
|
|
5
|
+
module ValidateHook
|
|
6
|
+
class << self
|
|
7
|
+
def register!
|
|
8
|
+
Jekyll::Hooks.register(:site, :post_write) { |site| validate(site) }
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
private
|
|
12
|
+
|
|
13
|
+
def validate(site)
|
|
14
|
+
config = JekyllAiVisibleContent.config(site)
|
|
15
|
+
return unless config.enabled?
|
|
16
|
+
|
|
17
|
+
warnings = []
|
|
18
|
+
errors = []
|
|
19
|
+
|
|
20
|
+
entity_validator = Validators::EntityConsistencyValidator.new(config, site)
|
|
21
|
+
warnings.concat(entity_validator.validate)
|
|
22
|
+
|
|
23
|
+
json_ld_validator = Validators::JsonLdValidator.new(config)
|
|
24
|
+
errors.concat(json_ld_validator.validate)
|
|
25
|
+
|
|
26
|
+
link_validator = Validators::LinkValidator.new(config, site)
|
|
27
|
+
warnings.concat(link_validator.validate)
|
|
28
|
+
|
|
29
|
+
print_results(warnings, errors)
|
|
30
|
+
abort_if_needed(errors, config) if config.validation['fail_build_on_error'] && errors.any?
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def print_results(warnings, errors)
|
|
34
|
+
return if warnings.empty? && errors.empty?
|
|
35
|
+
|
|
36
|
+
Jekyll.logger.info 'AI Visible Content:', 'Validation report'
|
|
37
|
+
warnings.each { |w| Jekyll.logger.warn 'AI Visible Content:', w }
|
|
38
|
+
errors.each { |e| Jekyll.logger.error 'AI Visible Content:', e }
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def abort_if_needed(errors, _config)
|
|
42
|
+
raise JekyllAiVisibleContent::Error, "Build failed: #{errors.size} validation error(s)" if errors.any?
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
JekyllAiVisibleContent::Hooks::ValidateHook.register!
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module JsonLd
|
|
5
|
+
class BlogPostingSchema
|
|
6
|
+
attr_reader :config, :registry, :page
|
|
7
|
+
|
|
8
|
+
def initialize(config, registry, page)
|
|
9
|
+
@config = config
|
|
10
|
+
@registry = registry
|
|
11
|
+
@page = page
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def build
|
|
15
|
+
data = page.data
|
|
16
|
+
url = absolute_url(page.url)
|
|
17
|
+
|
|
18
|
+
posting = {
|
|
19
|
+
'@type' => 'BlogPosting',
|
|
20
|
+
'@id' => "#{url}#article",
|
|
21
|
+
'mainEntityOfPage' => {
|
|
22
|
+
'@type' => 'WebPage',
|
|
23
|
+
'@id' => url
|
|
24
|
+
},
|
|
25
|
+
'headline' => data['title'],
|
|
26
|
+
'description' => data['description']&.to_s&.strip,
|
|
27
|
+
'datePublished' => format_date(data['date']),
|
|
28
|
+
'dateModified' => format_date(data['last_modified_at'] || data['date']),
|
|
29
|
+
'author' => registry.primary_entity_ref,
|
|
30
|
+
'publisher' => registry.primary_entity_ref,
|
|
31
|
+
'isPartOf' => { '@id' => "#{config.site_url}/#website" }
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
append_image(posting, data)
|
|
35
|
+
append_keywords(posting, data)
|
|
36
|
+
append_about(posting, data)
|
|
37
|
+
append_word_count(posting)
|
|
38
|
+
append_article_body(posting)
|
|
39
|
+
|
|
40
|
+
posting.compact
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
def append_image(posting, data)
|
|
46
|
+
return unless data['image']
|
|
47
|
+
|
|
48
|
+
posting['image'] = absolute_url(data['image'])
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def append_keywords(posting, data)
|
|
52
|
+
tags = data['tags'] || data['keywords']
|
|
53
|
+
posting['keywords'] = tags if tags&.any?
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def append_about(posting, data)
|
|
57
|
+
topics = data['topics'] || data['categories']
|
|
58
|
+
return unless topics&.any?
|
|
59
|
+
|
|
60
|
+
posting['about'] = topics.map { |t| { '@type' => 'Thing', 'name' => t } }
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def append_word_count(posting)
|
|
64
|
+
content = page.content
|
|
65
|
+
return unless content
|
|
66
|
+
|
|
67
|
+
posting['wordCount'] = content.split(/\s+/).size
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def append_article_body(posting)
|
|
71
|
+
mode = config.json_ld['article_body']
|
|
72
|
+
case mode
|
|
73
|
+
when 'full'
|
|
74
|
+
posting['articleBody'] = strip_html(page.content) if page.content
|
|
75
|
+
when 'excerpt'
|
|
76
|
+
excerpt_text = page.data['description'] || page.data['excerpt']&.to_s
|
|
77
|
+
posting['articleBody'] = strip_html(excerpt_text.to_s).strip if excerpt_text
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def format_date(date)
|
|
82
|
+
return nil unless date
|
|
83
|
+
|
|
84
|
+
if date.respond_to?(:iso8601)
|
|
85
|
+
date.iso8601
|
|
86
|
+
elsif date.respond_to?(:strftime)
|
|
87
|
+
date.strftime('%Y-%m-%dT%H:%M:%S%:z')
|
|
88
|
+
else
|
|
89
|
+
date.to_s
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def absolute_url(path)
|
|
94
|
+
return path if path&.start_with?('http')
|
|
95
|
+
|
|
96
|
+
"#{config.site_url}#{path}"
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def strip_html(text)
|
|
100
|
+
text.to_s.gsub(/<[^>]+>/, '').gsub(/\s+/, ' ').strip
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module JsonLd
|
|
5
|
+
class BreadcrumbSchema
|
|
6
|
+
attr_reader :config, :page
|
|
7
|
+
|
|
8
|
+
def initialize(config, page)
|
|
9
|
+
@config = config
|
|
10
|
+
@page = page
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def build
|
|
14
|
+
items = build_items
|
|
15
|
+
return nil if items.size < 2
|
|
16
|
+
|
|
17
|
+
{
|
|
18
|
+
'@type' => 'BreadcrumbList',
|
|
19
|
+
'itemListElement' => items
|
|
20
|
+
}
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def build_items
|
|
26
|
+
url = page.respond_to?(:url) ? page.url : '/'
|
|
27
|
+
segments = url.to_s.split('/').reject(&:empty?)
|
|
28
|
+
|
|
29
|
+
items = [list_item(1, 'Home', "#{config.site_url}/")]
|
|
30
|
+
|
|
31
|
+
path = ''
|
|
32
|
+
segments.each_with_index do |segment, idx|
|
|
33
|
+
path = "#{path}/#{segment}"
|
|
34
|
+
position = idx + 2
|
|
35
|
+
name = humanize(segment)
|
|
36
|
+
|
|
37
|
+
items << if idx == segments.size - 1
|
|
38
|
+
list_item_no_url(position, page.data['title'] || name)
|
|
39
|
+
else
|
|
40
|
+
list_item(position, name, "#{config.site_url}#{path}/")
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
items
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def list_item(position, name, url)
|
|
48
|
+
{
|
|
49
|
+
'@type' => 'ListItem',
|
|
50
|
+
'position' => position,
|
|
51
|
+
'name' => name,
|
|
52
|
+
'item' => url
|
|
53
|
+
}
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def list_item_no_url(position, name)
|
|
57
|
+
{
|
|
58
|
+
'@type' => 'ListItem',
|
|
59
|
+
'position' => position,
|
|
60
|
+
'name' => name
|
|
61
|
+
}
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def humanize(slug)
|
|
65
|
+
slug.gsub(/[-_]/, ' ').gsub(/\b\w/, &:upcase)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module JsonLd
|
|
5
|
+
class Builder
|
|
6
|
+
attr_reader :config, :registry
|
|
7
|
+
|
|
8
|
+
def initialize(config, registry)
|
|
9
|
+
@config = config
|
|
10
|
+
@registry = registry
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def build_for_page(page)
|
|
14
|
+
nodes = []
|
|
15
|
+
page_data = page.data
|
|
16
|
+
|
|
17
|
+
if person_page?(page)
|
|
18
|
+
nodes << PersonSchema.new(config, registry).build
|
|
19
|
+
nodes << WebsiteSchema.new(config, registry).build if config.json_ld['include_website_schema']
|
|
20
|
+
elsif post_page?(page) && config.json_ld['include_blog_posting']
|
|
21
|
+
nodes << BlogPostingSchema.new(config, registry, page).build
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
if config.json_ld['include_breadcrumbs']
|
|
25
|
+
crumbs = BreadcrumbSchema.new(config, page).build
|
|
26
|
+
nodes << crumbs if crumbs
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
nodes << FaqSchema.new(config, page).build if config.json_ld['include_faq'] && page_data['faq']&.any?
|
|
30
|
+
|
|
31
|
+
nodes << HowToSchema.new(config, page).build if config.json_ld['include_how_to'] && page_data['how_to']
|
|
32
|
+
|
|
33
|
+
nodes.compact
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def build_for_homepage
|
|
37
|
+
nodes = []
|
|
38
|
+
nodes << PersonSchema.new(config, registry).build if registry.primary_entity
|
|
39
|
+
nodes << WebsiteSchema.new(config, registry).build if config.json_ld['include_website_schema']
|
|
40
|
+
nodes.compact
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def to_script_tag(nodes, compact: nil)
|
|
44
|
+
return '' if nodes.empty?
|
|
45
|
+
|
|
46
|
+
use_compact = compact.nil? ? config.json_ld['compact'] : compact
|
|
47
|
+
graph = { '@context' => 'https://schema.org', '@graph' => nodes }
|
|
48
|
+
json = use_compact ? JSON.generate(graph) : JSON.pretty_generate(graph)
|
|
49
|
+
%(<script type="application/ld+json">\n#{json}\n</script>)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
def person_page?(page)
|
|
55
|
+
page.data['entity_type'] == 'Person' ||
|
|
56
|
+
(page.respond_to?(:url) && page.url&.match?(%r{/about/?$}))
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def post_page?(page)
|
|
60
|
+
page.respond_to?(:collection) && page.collection&.label == 'posts'
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|