jekyll-ai-visible-content 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +39 -0
- data/CHANGELOG.md +12 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +227 -0
- data/LICENSE.txt +21 -0
- data/README.md +352 -0
- data/Rakefile +9 -0
- data/jekyll-ai-visible-content.gemspec +29 -0
- data/lib/jekyll-ai-visible-content.rb +47 -0
- data/lib/jekyll_ai_visible_content/configuration.rb +154 -0
- data/lib/jekyll_ai_visible_content/entity/organization.rb +68 -0
- data/lib/jekyll_ai_visible_content/entity/person.rb +114 -0
- data/lib/jekyll_ai_visible_content/entity/registry.rb +94 -0
- data/lib/jekyll_ai_visible_content/filters/entity_filter.rb +29 -0
- data/lib/jekyll_ai_visible_content/filters/naming_filter.rb +27 -0
- data/lib/jekyll_ai_visible_content/generators/content_graph_generator.rb +69 -0
- data/lib/jekyll_ai_visible_content/generators/entity_map_generator.rb +65 -0
- data/lib/jekyll_ai_visible_content/generators/llms_txt_generator.rb +170 -0
- data/lib/jekyll_ai_visible_content/generators/robots_txt_generator.rb +57 -0
- data/lib/jekyll_ai_visible_content/hooks/post_render_hook.rb +82 -0
- data/lib/jekyll_ai_visible_content/hooks/validate_hook.rb +49 -0
- data/lib/jekyll_ai_visible_content/json_ld/blog_posting_schema.rb +104 -0
- data/lib/jekyll_ai_visible_content/json_ld/breadcrumb_schema.rb +69 -0
- data/lib/jekyll_ai_visible_content/json_ld/builder.rb +64 -0
- data/lib/jekyll_ai_visible_content/json_ld/collection_schema.rb +47 -0
- data/lib/jekyll_ai_visible_content/json_ld/faq_schema.rb +37 -0
- data/lib/jekyll_ai_visible_content/json_ld/how_to_schema.rb +42 -0
- data/lib/jekyll_ai_visible_content/json_ld/person_schema.rb +18 -0
- data/lib/jekyll_ai_visible_content/json_ld/website_schema.rb +39 -0
- data/lib/jekyll_ai_visible_content/tags/ai_author_tag.rb +26 -0
- data/lib/jekyll_ai_visible_content/tags/ai_breadcrumb_tag.rb +50 -0
- data/lib/jekyll_ai_visible_content/tags/ai_entity_link_tag.rb +40 -0
- data/lib/jekyll_ai_visible_content/tags/ai_json_ld_tag.rb +54 -0
- data/lib/jekyll_ai_visible_content/tags/ai_related_posts_tag.rb +91 -0
- data/lib/jekyll_ai_visible_content/validators/entity_consistency_validator.rb +94 -0
- data/lib/jekyll_ai_visible_content/validators/json_ld_validator.rb +58 -0
- data/lib/jekyll_ai_visible_content/validators/link_validator.rb +27 -0
- data/lib/jekyll_ai_visible_content/version.rb +5 -0
- metadata +107 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module JsonLd
|
|
5
|
+
class CollectionSchema
|
|
6
|
+
attr_reader :config, :page, :items
|
|
7
|
+
|
|
8
|
+
def initialize(config, page, items)
|
|
9
|
+
@config = config
|
|
10
|
+
@page = page
|
|
11
|
+
@items = items
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def build
|
|
15
|
+
return nil unless items&.any?
|
|
16
|
+
|
|
17
|
+
{
|
|
18
|
+
'@type' => 'CollectionPage',
|
|
19
|
+
'name' => page.data['title'],
|
|
20
|
+
'description' => page.data['description'],
|
|
21
|
+
'url' => absolute_url(page.url),
|
|
22
|
+
'mainEntity' => {
|
|
23
|
+
'@type' => 'ItemList',
|
|
24
|
+
'itemListElement' => items.each_with_index.map { |item, i| list_item(item, i) }
|
|
25
|
+
}
|
|
26
|
+
}.compact
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
def list_item(item, index)
|
|
32
|
+
{
|
|
33
|
+
'@type' => 'ListItem',
|
|
34
|
+
'position' => index + 1,
|
|
35
|
+
'url' => absolute_url(item.url),
|
|
36
|
+
'name' => item.data['title']
|
|
37
|
+
}.compact
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def absolute_url(path)
|
|
41
|
+
return path if path&.start_with?('http')
|
|
42
|
+
|
|
43
|
+
"#{config.site_url}#{path}"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module JsonLd
|
|
5
|
+
class FaqSchema
|
|
6
|
+
attr_reader :config, :page
|
|
7
|
+
|
|
8
|
+
def initialize(config, page)
|
|
9
|
+
@config = config
|
|
10
|
+
@page = page
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def build
|
|
14
|
+
faq_items = page.data['faq']
|
|
15
|
+
return nil unless faq_items&.any?
|
|
16
|
+
|
|
17
|
+
{
|
|
18
|
+
'@type' => 'FAQPage',
|
|
19
|
+
'mainEntity' => faq_items.map { |item| build_question(item) }
|
|
20
|
+
}
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def build_question(item)
|
|
26
|
+
{
|
|
27
|
+
'@type' => 'Question',
|
|
28
|
+
'name' => item['question'],
|
|
29
|
+
'acceptedAnswer' => {
|
|
30
|
+
'@type' => 'Answer',
|
|
31
|
+
'text' => item['answer']
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module JsonLd
|
|
5
|
+
class HowToSchema
|
|
6
|
+
attr_reader :config, :page
|
|
7
|
+
|
|
8
|
+
def initialize(config, page)
|
|
9
|
+
@config = config
|
|
10
|
+
@page = page
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def build
|
|
14
|
+
how_to = page.data['how_to']
|
|
15
|
+
return nil unless how_to
|
|
16
|
+
|
|
17
|
+
data = {
|
|
18
|
+
'@type' => 'HowTo',
|
|
19
|
+
'name' => how_to['name']
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
data['totalTime'] = how_to['total_time'] if how_to['total_time']
|
|
23
|
+
data['step'] = build_steps(how_to['steps']) if how_to['steps']&.any?
|
|
24
|
+
|
|
25
|
+
data.compact
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
def build_steps(steps)
|
|
31
|
+
steps.each_with_index.map do |step, idx|
|
|
32
|
+
{
|
|
33
|
+
'@type' => 'HowToStep',
|
|
34
|
+
'position' => idx + 1,
|
|
35
|
+
'name' => step['name'],
|
|
36
|
+
'text' => step['text']
|
|
37
|
+
}.compact
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module JsonLd
|
|
5
|
+
class PersonSchema
|
|
6
|
+
attr_reader :config, :registry
|
|
7
|
+
|
|
8
|
+
def initialize(config, registry)
|
|
9
|
+
@config = config
|
|
10
|
+
@registry = registry
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def build
|
|
14
|
+
registry.primary_entity_hash
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module JsonLd
|
|
5
|
+
class WebsiteSchema
|
|
6
|
+
attr_reader :config, :registry
|
|
7
|
+
|
|
8
|
+
def initialize(config, registry)
|
|
9
|
+
@config = config
|
|
10
|
+
@registry = registry
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def build
|
|
14
|
+
data = {
|
|
15
|
+
'@type' => 'WebSite',
|
|
16
|
+
'@id' => "#{config.site_url}/#website",
|
|
17
|
+
'url' => config.site_url,
|
|
18
|
+
'name' => config.site_title,
|
|
19
|
+
'description' => config.site_description&.strip,
|
|
20
|
+
'publisher' => registry.primary_entity_ref
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
append_search_action(data)
|
|
24
|
+
data.compact
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
def append_search_action(data)
|
|
30
|
+
search_url = "#{config.site_url}/search?q={search_term_string}"
|
|
31
|
+
data['potentialAction'] = {
|
|
32
|
+
'@type' => 'SearchAction',
|
|
33
|
+
'target' => search_url,
|
|
34
|
+
'query-input' => 'required name=search_term_string'
|
|
35
|
+
}
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Tags
|
|
5
|
+
class AiAuthorTag < Liquid::Tag
|
|
6
|
+
def render(context)
|
|
7
|
+
site = context.registers[:site]
|
|
8
|
+
config = JekyllAiVisibleContent.config(site)
|
|
9
|
+
return '' unless config.enabled?
|
|
10
|
+
|
|
11
|
+
entity = config.entity
|
|
12
|
+
return '' unless entity['name']
|
|
13
|
+
|
|
14
|
+
parts = []
|
|
15
|
+
parts << %(<span itemprop="author" itemscope itemtype="https://schema.org/Person">)
|
|
16
|
+
parts << %( <a itemprop="url" href="#{config.site_url}/about/">)
|
|
17
|
+
parts << %( <span itemprop="name">#{entity['name']}</span>)
|
|
18
|
+
parts << %( </a>)
|
|
19
|
+
parts << %(</span>)
|
|
20
|
+
parts.join("\n")
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
Liquid::Template.register_tag('ai_author', JekyllAiVisibleContent::Tags::AiAuthorTag)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Tags
|
|
5
|
+
class AiBreadcrumbTag < Liquid::Tag
|
|
6
|
+
def render(context)
|
|
7
|
+
site = context.registers[:site]
|
|
8
|
+
page = context.registers[:page]
|
|
9
|
+
config = JekyllAiVisibleContent.config(site)
|
|
10
|
+
return '' unless config.enabled? && config.json_ld['include_breadcrumbs']
|
|
11
|
+
|
|
12
|
+
url = page['url'] || '/'
|
|
13
|
+
segments = url.to_s.split('/').reject(&:empty?)
|
|
14
|
+
return '' if segments.empty?
|
|
15
|
+
|
|
16
|
+
items = [breadcrumb_item('Home', "#{config.site_url}/")]
|
|
17
|
+
|
|
18
|
+
path = ''
|
|
19
|
+
segments.each_with_index do |segment, idx|
|
|
20
|
+
path = "#{path}/#{segment}"
|
|
21
|
+
name = if idx == segments.size - 1
|
|
22
|
+
page['title'] || humanize(segment)
|
|
23
|
+
else
|
|
24
|
+
humanize(segment)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
items << if idx == segments.size - 1
|
|
28
|
+
%(<li><span aria-current="page">#{name}</span></li>)
|
|
29
|
+
else
|
|
30
|
+
breadcrumb_item(name, "#{config.site_url}#{path}/")
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
%(<nav aria-label="Breadcrumb"><ol>#{items.join}</ol></nav>)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private
|
|
38
|
+
|
|
39
|
+
def breadcrumb_item(name, url)
|
|
40
|
+
%(<li><a href="#{url}">#{name}</a></li>)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def humanize(slug)
|
|
44
|
+
slug.gsub(/[-_]/, ' ').gsub(/\b\w/, &:upcase)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
Liquid::Template.register_tag('ai_breadcrumbs', JekyllAiVisibleContent::Tags::AiBreadcrumbTag)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Tags
|
|
5
|
+
class AiEntityLinkTag < Liquid::Tag
|
|
6
|
+
SYNTAX = /\A\s*"([^"]+)"\s*\z/
|
|
7
|
+
|
|
8
|
+
def initialize(tag_name, markup, tokens)
|
|
9
|
+
super
|
|
10
|
+
match = markup.match(SYNTAX)
|
|
11
|
+
@entity_name = match ? match[1] : markup.strip.delete('"')
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def render(context)
|
|
15
|
+
site = context.registers[:site]
|
|
16
|
+
config = JekyllAiVisibleContent.config(site)
|
|
17
|
+
return @entity_name unless config.enabled?
|
|
18
|
+
|
|
19
|
+
registry = Entity::Registry.new(config)
|
|
20
|
+
definition = registry.find_entity_by_name(@entity_name)
|
|
21
|
+
|
|
22
|
+
if definition
|
|
23
|
+
url = definition['url']
|
|
24
|
+
build_link(url, @entity_name)
|
|
25
|
+
else
|
|
26
|
+
@entity_name
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def build_link(url, name)
|
|
33
|
+
%(<a href="#{url}" itemprop="about" itemscope itemtype="https://schema.org/Thing">) +
|
|
34
|
+
%(<span itemprop="name">#{name}</span></a>)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
Liquid::Template.register_tag('ai_entity_link', JekyllAiVisibleContent::Tags::AiEntityLinkTag)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Tags
|
|
5
|
+
class AiJsonLdTag < Liquid::Tag
|
|
6
|
+
def render(context)
|
|
7
|
+
site = context.registers[:site]
|
|
8
|
+
page = context.registers[:page]
|
|
9
|
+
config = JekyllAiVisibleContent.config(site)
|
|
10
|
+
return '' unless config.enabled?
|
|
11
|
+
|
|
12
|
+
registry = Entity::Registry.new(config)
|
|
13
|
+
builder = JsonLd::Builder.new(config, registry)
|
|
14
|
+
|
|
15
|
+
page_obj = find_page_object(site, page)
|
|
16
|
+
return '' unless page_obj
|
|
17
|
+
|
|
18
|
+
nodes = if homepage?(page)
|
|
19
|
+
builder.build_for_homepage
|
|
20
|
+
else
|
|
21
|
+
builder.build_for_page(page_obj)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
return '' if nodes.empty?
|
|
25
|
+
|
|
26
|
+
skip_types = seo_tag_types(config)
|
|
27
|
+
nodes.reject! { |n| skip_types.include?(n['@type']) } if skip_types.any?
|
|
28
|
+
|
|
29
|
+
builder.to_script_tag(nodes)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
def homepage?(page)
|
|
35
|
+
url = page['url'] || page['permalink']
|
|
36
|
+
['/', '/index.html'].include?(url)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def find_page_object(site, page_hash)
|
|
40
|
+
url = page_hash['url'] || page_hash['permalink']
|
|
41
|
+
site.posts.docs.find { |p| p.url == url } ||
|
|
42
|
+
site.pages.find { |p| p.url == url }
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def seo_tag_types(config)
|
|
46
|
+
return [] unless config.seo_tag_present?
|
|
47
|
+
|
|
48
|
+
%w[WebSite]
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
Liquid::Template.register_tag('ai_json_ld', JekyllAiVisibleContent::Tags::AiJsonLdTag)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Tags
|
|
5
|
+
class AiRelatedPostsTag < Liquid::Tag
|
|
6
|
+
LIMIT_SYNTAX = /limit\s*:\s*(\d+)/
|
|
7
|
+
|
|
8
|
+
def initialize(tag_name, markup, tokens)
|
|
9
|
+
super
|
|
10
|
+
match = markup.match(LIMIT_SYNTAX)
|
|
11
|
+
@limit = match ? match[1].to_i : nil
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def render(context)
|
|
15
|
+
site = context.registers[:site]
|
|
16
|
+
page = context.registers[:page]
|
|
17
|
+
config = JekyllAiVisibleContent.config(site)
|
|
18
|
+
return '' unless config.enabled? && config.linking['enable_related_posts']
|
|
19
|
+
|
|
20
|
+
limit = @limit || config.linking['related_posts_limit'] || 3
|
|
21
|
+
page_obj = find_page_object(site, page)
|
|
22
|
+
return '' unless page_obj
|
|
23
|
+
|
|
24
|
+
related = find_related(site, page_obj, limit)
|
|
25
|
+
return '' if related.empty?
|
|
26
|
+
|
|
27
|
+
render_html(related)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
private
|
|
31
|
+
|
|
32
|
+
def find_page_object(site, page_hash)
|
|
33
|
+
url = page_hash['url']
|
|
34
|
+
site.posts.docs.find { |p| p.url == url } || site.pages.find { |p| p.url == url }
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def find_related(site, current, limit)
|
|
38
|
+
explicit = current.data['related_slugs']
|
|
39
|
+
if explicit&.any?
|
|
40
|
+
posts = explicit.filter_map do |slug|
|
|
41
|
+
site.posts.docs.find { |p| p.data['slug'] == slug || p.url.include?(slug) }
|
|
42
|
+
end
|
|
43
|
+
return posts.first(limit) if posts.any?
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
scored = site.posts.docs.reject { |p| p.url == current.url }.map do |post|
|
|
47
|
+
score = jaccard_similarity(current.data['tags'] || [], post.data['tags'] || []) * 3
|
|
48
|
+
score += jaccard_similarity(current.data['categories'] || [], post.data['categories'] || []) * 2
|
|
49
|
+
score += jaccard_similarity(current.data['topics'] || [], post.data['topics'] || [])
|
|
50
|
+
[post, score]
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
scored.select { |_, s| s.positive? }.sort_by { |_, s| -s }.first(limit).map(&:first)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def jaccard_similarity(set_a, set_b)
|
|
57
|
+
a = set_a.map(&:to_s).map(&:downcase)
|
|
58
|
+
b = set_b.map(&:to_s).map(&:downcase)
|
|
59
|
+
intersection = (a & b).size.to_f
|
|
60
|
+
union = (a | b).size.to_f
|
|
61
|
+
union.zero? ? 0.0 : intersection / union
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def render_html(posts)
|
|
65
|
+
lines = []
|
|
66
|
+
lines << '<nav aria-label="Related posts">'
|
|
67
|
+
lines << ' <h2>Related Posts</h2>'
|
|
68
|
+
lines << ' <ul>'
|
|
69
|
+
|
|
70
|
+
posts.each do |post|
|
|
71
|
+
lines << ' <li itemscope itemtype="https://schema.org/BlogPosting">'
|
|
72
|
+
lines << " <a itemprop=\"url\" href=\"#{post.url}\">"
|
|
73
|
+
lines << " <span itemprop=\"headline\">#{post.data['title']}</span>"
|
|
74
|
+
lines << ' </a>'
|
|
75
|
+
if post.data['date']
|
|
76
|
+
dt = post.data['date'].strftime('%Y-%m-%d')
|
|
77
|
+
published = post.data['date'].strftime('%b %d, %Y')
|
|
78
|
+
lines << " <time itemprop=\"datePublished\" datetime=\"#{dt}\">#{published}</time>"
|
|
79
|
+
end
|
|
80
|
+
lines << ' </li>'
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
lines << ' </ul>'
|
|
84
|
+
lines << '</nav>'
|
|
85
|
+
lines.join("\n")
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
Liquid::Template.register_tag('ai_related_posts', JekyllAiVisibleContent::Tags::AiRelatedPostsTag)
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Validators
|
|
5
|
+
class EntityConsistencyValidator
|
|
6
|
+
attr_reader :config, :site
|
|
7
|
+
|
|
8
|
+
def initialize(config, site)
|
|
9
|
+
@config = config
|
|
10
|
+
@site = site
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def validate
|
|
14
|
+
warnings = []
|
|
15
|
+
|
|
16
|
+
warnings.concat(check_name_consistency) if config.validation['warn_name_inconsistency']
|
|
17
|
+
warnings.concat(check_missing_same_as) if config.validation['warn_missing_same_as']
|
|
18
|
+
warnings.concat(check_missing_dates) if config.validation['warn_missing_dates']
|
|
19
|
+
warnings.concat(check_missing_descriptions) if config.validation['warn_missing_descriptions']
|
|
20
|
+
warnings.concat(check_generic_titles)
|
|
21
|
+
|
|
22
|
+
warnings
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def check_name_consistency
|
|
28
|
+
warnings = []
|
|
29
|
+
canonical = config.entity['name']
|
|
30
|
+
return warnings unless canonical
|
|
31
|
+
|
|
32
|
+
author_config = site.config['author']
|
|
33
|
+
site_author = author_config.is_a?(Hash) ? author_config['name'] : author_config
|
|
34
|
+
if site_author.is_a?(String) && site_author != canonical
|
|
35
|
+
warnings << "Name inconsistency: site.author='#{site_author}' differs from entity.name='#{canonical}'"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
site.posts.docs.each do |post|
|
|
39
|
+
author = post.data['author']
|
|
40
|
+
next unless author.is_a?(String) && author != canonical
|
|
41
|
+
|
|
42
|
+
warnings << "Name inconsistency in #{post.relative_path}: author='#{author}' differs from '#{canonical}'"
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
warnings
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def check_missing_same_as
|
|
49
|
+
same_as = config.entity['same_as']
|
|
50
|
+
return [] if same_as&.any?
|
|
51
|
+
|
|
52
|
+
['Entity has no sameAs links to external profiles (LinkedIn, GitHub, etc.)']
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def check_missing_dates
|
|
56
|
+
warnings = []
|
|
57
|
+
site.posts.docs.each do |post|
|
|
58
|
+
next if post.data['last_modified_at']
|
|
59
|
+
|
|
60
|
+
warnings << "Missing last_modified_at in #{post.relative_path} (freshness scoring disabled)"
|
|
61
|
+
end
|
|
62
|
+
warnings
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def check_missing_descriptions
|
|
66
|
+
warnings = []
|
|
67
|
+
all_docs.each do |doc|
|
|
68
|
+
next if doc.data['description'] && !doc.data['description'].to_s.strip.empty?
|
|
69
|
+
|
|
70
|
+
warnings << "Missing description in #{doc.respond_to?(:relative_path) ? doc.relative_path : doc.url}"
|
|
71
|
+
end
|
|
72
|
+
warnings
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def check_generic_titles
|
|
76
|
+
warnings = []
|
|
77
|
+
generic = %w[about blog home page post]
|
|
78
|
+
|
|
79
|
+
all_docs.each do |doc|
|
|
80
|
+
title = doc.data['title'].to_s.strip.downcase
|
|
81
|
+
next unless generic.include?(title)
|
|
82
|
+
|
|
83
|
+
path = doc.respond_to?(:relative_path) ? doc.relative_path : doc.url
|
|
84
|
+
warnings << "Generic title '#{doc.data['title']}' in #{path} (include entity name for discoverability)"
|
|
85
|
+
end
|
|
86
|
+
warnings
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def all_docs
|
|
90
|
+
site.posts.docs + site.pages
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Validators
|
|
5
|
+
class JsonLdValidator
|
|
6
|
+
REQUIRED_PERSON_FIELDS = %w[@type @id name].freeze
|
|
7
|
+
REQUIRED_POSTING_FIELDS = %w[@type headline author].freeze
|
|
8
|
+
|
|
9
|
+
attr_reader :config
|
|
10
|
+
|
|
11
|
+
def initialize(config)
|
|
12
|
+
@config = config
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def validate
|
|
16
|
+
errors = []
|
|
17
|
+
errors.concat(validate_entity_config)
|
|
18
|
+
errors
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def validate_node(node)
|
|
22
|
+
errors = []
|
|
23
|
+
return errors unless node.is_a?(Hash)
|
|
24
|
+
|
|
25
|
+
case node['@type']
|
|
26
|
+
when 'Person'
|
|
27
|
+
errors.concat(validate_fields(node, REQUIRED_PERSON_FIELDS, 'Person'))
|
|
28
|
+
when 'BlogPosting'
|
|
29
|
+
errors.concat(validate_fields(node, REQUIRED_POSTING_FIELDS, 'BlogPosting'))
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
errors
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
def validate_entity_config
|
|
38
|
+
errors = []
|
|
39
|
+
entity = config.entity
|
|
40
|
+
|
|
41
|
+
unless entity['name'] && !entity['name'].strip.empty?
|
|
42
|
+
errors << 'Entity name is required in ai_visible_content.entity.name'
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
if entity['id_slug'].nil? && entity['name'].nil?
|
|
46
|
+
errors << 'Entity requires either id_slug or name to generate @id'
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
errors
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def validate_fields(node, required, type_name)
|
|
53
|
+
missing = required.select { |f| node[f].nil? || node[f].to_s.strip.empty? }
|
|
54
|
+
missing.map { |f| "#{type_name} JSON-LD missing required field: #{f}" }
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Validators
|
|
5
|
+
class LinkValidator
|
|
6
|
+
attr_reader :config, :site
|
|
7
|
+
|
|
8
|
+
def initialize(config, site)
|
|
9
|
+
@config = config
|
|
10
|
+
@site = site
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def validate
|
|
14
|
+
warnings = []
|
|
15
|
+
warnings.concat(check_orphan_pages) if config.validation['warn_orphan_pages']
|
|
16
|
+
warnings
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
private
|
|
20
|
+
|
|
21
|
+
def check_orphan_pages
|
|
22
|
+
orphans = site.data['ai_orphan_pages'] || []
|
|
23
|
+
orphans.map { |url| "Orphan page (no inbound links): #{url}" }
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|