jekyll-ai-visible-content 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +39 -0
  4. data/CHANGELOG.md +12 -0
  5. data/Gemfile +11 -0
  6. data/Gemfile.lock +227 -0
  7. data/LICENSE.txt +21 -0
  8. data/README.md +352 -0
  9. data/Rakefile +9 -0
  10. data/jekyll-ai-visible-content.gemspec +29 -0
  11. data/lib/jekyll-ai-visible-content.rb +47 -0
  12. data/lib/jekyll_ai_visible_content/configuration.rb +154 -0
  13. data/lib/jekyll_ai_visible_content/entity/organization.rb +68 -0
  14. data/lib/jekyll_ai_visible_content/entity/person.rb +114 -0
  15. data/lib/jekyll_ai_visible_content/entity/registry.rb +94 -0
  16. data/lib/jekyll_ai_visible_content/filters/entity_filter.rb +29 -0
  17. data/lib/jekyll_ai_visible_content/filters/naming_filter.rb +27 -0
  18. data/lib/jekyll_ai_visible_content/generators/content_graph_generator.rb +69 -0
  19. data/lib/jekyll_ai_visible_content/generators/entity_map_generator.rb +65 -0
  20. data/lib/jekyll_ai_visible_content/generators/llms_txt_generator.rb +170 -0
  21. data/lib/jekyll_ai_visible_content/generators/robots_txt_generator.rb +57 -0
  22. data/lib/jekyll_ai_visible_content/hooks/post_render_hook.rb +82 -0
  23. data/lib/jekyll_ai_visible_content/hooks/validate_hook.rb +49 -0
  24. data/lib/jekyll_ai_visible_content/json_ld/blog_posting_schema.rb +104 -0
  25. data/lib/jekyll_ai_visible_content/json_ld/breadcrumb_schema.rb +69 -0
  26. data/lib/jekyll_ai_visible_content/json_ld/builder.rb +64 -0
  27. data/lib/jekyll_ai_visible_content/json_ld/collection_schema.rb +47 -0
  28. data/lib/jekyll_ai_visible_content/json_ld/faq_schema.rb +37 -0
  29. data/lib/jekyll_ai_visible_content/json_ld/how_to_schema.rb +42 -0
  30. data/lib/jekyll_ai_visible_content/json_ld/person_schema.rb +18 -0
  31. data/lib/jekyll_ai_visible_content/json_ld/website_schema.rb +39 -0
  32. data/lib/jekyll_ai_visible_content/tags/ai_author_tag.rb +26 -0
  33. data/lib/jekyll_ai_visible_content/tags/ai_breadcrumb_tag.rb +50 -0
  34. data/lib/jekyll_ai_visible_content/tags/ai_entity_link_tag.rb +40 -0
  35. data/lib/jekyll_ai_visible_content/tags/ai_json_ld_tag.rb +54 -0
  36. data/lib/jekyll_ai_visible_content/tags/ai_related_posts_tag.rb +91 -0
  37. data/lib/jekyll_ai_visible_content/validators/entity_consistency_validator.rb +94 -0
  38. data/lib/jekyll_ai_visible_content/validators/json_ld_validator.rb +58 -0
  39. data/lib/jekyll_ai_visible_content/validators/link_validator.rb +27 -0
  40. data/lib/jekyll_ai_visible_content/version.rb +5 -0
  41. metadata +107 -0
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllAiVisibleContent
4
+ module JsonLd
5
+ class CollectionSchema
6
+ attr_reader :config, :page, :items
7
+
8
+ def initialize(config, page, items)
9
+ @config = config
10
+ @page = page
11
+ @items = items
12
+ end
13
+
14
+ def build
15
+ return nil unless items&.any?
16
+
17
+ {
18
+ '@type' => 'CollectionPage',
19
+ 'name' => page.data['title'],
20
+ 'description' => page.data['description'],
21
+ 'url' => absolute_url(page.url),
22
+ 'mainEntity' => {
23
+ '@type' => 'ItemList',
24
+ 'itemListElement' => items.each_with_index.map { |item, i| list_item(item, i) }
25
+ }
26
+ }.compact
27
+ end
28
+
29
+ private
30
+
31
+ def list_item(item, index)
32
+ {
33
+ '@type' => 'ListItem',
34
+ 'position' => index + 1,
35
+ 'url' => absolute_url(item.url),
36
+ 'name' => item.data['title']
37
+ }.compact
38
+ end
39
+
40
+ def absolute_url(path)
41
+ return path if path&.start_with?('http')
42
+
43
+ "#{config.site_url}#{path}"
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllAiVisibleContent
4
+ module JsonLd
5
+ class FaqSchema
6
+ attr_reader :config, :page
7
+
8
+ def initialize(config, page)
9
+ @config = config
10
+ @page = page
11
+ end
12
+
13
+ def build
14
+ faq_items = page.data['faq']
15
+ return nil unless faq_items&.any?
16
+
17
+ {
18
+ '@type' => 'FAQPage',
19
+ 'mainEntity' => faq_items.map { |item| build_question(item) }
20
+ }
21
+ end
22
+
23
+ private
24
+
25
+ def build_question(item)
26
+ {
27
+ '@type' => 'Question',
28
+ 'name' => item['question'],
29
+ 'acceptedAnswer' => {
30
+ '@type' => 'Answer',
31
+ 'text' => item['answer']
32
+ }
33
+ }
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllAiVisibleContent
4
+ module JsonLd
5
+ class HowToSchema
6
+ attr_reader :config, :page
7
+
8
+ def initialize(config, page)
9
+ @config = config
10
+ @page = page
11
+ end
12
+
13
+ def build
14
+ how_to = page.data['how_to']
15
+ return nil unless how_to
16
+
17
+ data = {
18
+ '@type' => 'HowTo',
19
+ 'name' => how_to['name']
20
+ }
21
+
22
+ data['totalTime'] = how_to['total_time'] if how_to['total_time']
23
+ data['step'] = build_steps(how_to['steps']) if how_to['steps']&.any?
24
+
25
+ data.compact
26
+ end
27
+
28
+ private
29
+
30
+ def build_steps(steps)
31
+ steps.each_with_index.map do |step, idx|
32
+ {
33
+ '@type' => 'HowToStep',
34
+ 'position' => idx + 1,
35
+ 'name' => step['name'],
36
+ 'text' => step['text']
37
+ }.compact
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllAiVisibleContent
4
+ module JsonLd
5
+ class PersonSchema
6
+ attr_reader :config, :registry
7
+
8
+ def initialize(config, registry)
9
+ @config = config
10
+ @registry = registry
11
+ end
12
+
13
+ def build
14
+ registry.primary_entity_hash
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllAiVisibleContent
4
+ module JsonLd
5
+ class WebsiteSchema
6
+ attr_reader :config, :registry
7
+
8
+ def initialize(config, registry)
9
+ @config = config
10
+ @registry = registry
11
+ end
12
+
13
+ def build
14
+ data = {
15
+ '@type' => 'WebSite',
16
+ '@id' => "#{config.site_url}/#website",
17
+ 'url' => config.site_url,
18
+ 'name' => config.site_title,
19
+ 'description' => config.site_description&.strip,
20
+ 'publisher' => registry.primary_entity_ref
21
+ }
22
+
23
+ append_search_action(data)
24
+ data.compact
25
+ end
26
+
27
+ private
28
+
29
+ def append_search_action(data)
30
+ search_url = "#{config.site_url}/search?q={search_term_string}"
31
+ data['potentialAction'] = {
32
+ '@type' => 'SearchAction',
33
+ 'target' => search_url,
34
+ 'query-input' => 'required name=search_term_string'
35
+ }
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllAiVisibleContent
4
+ module Tags
5
+ class AiAuthorTag < Liquid::Tag
6
+ def render(context)
7
+ site = context.registers[:site]
8
+ config = JekyllAiVisibleContent.config(site)
9
+ return '' unless config.enabled?
10
+
11
+ entity = config.entity
12
+ return '' unless entity['name']
13
+
14
+ parts = []
15
+ parts << %(<span itemprop="author" itemscope itemtype="https://schema.org/Person">)
16
+ parts << %( <a itemprop="url" href="#{config.site_url}/about/">)
17
+ parts << %( <span itemprop="name">#{entity['name']}</span>)
18
+ parts << %( </a>)
19
+ parts << %(</span>)
20
+ parts.join("\n")
21
+ end
22
+ end
23
+ end
24
+ end
25
+
26
+ Liquid::Template.register_tag('ai_author', JekyllAiVisibleContent::Tags::AiAuthorTag)
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllAiVisibleContent
4
+ module Tags
5
+ class AiBreadcrumbTag < Liquid::Tag
6
+ def render(context)
7
+ site = context.registers[:site]
8
+ page = context.registers[:page]
9
+ config = JekyllAiVisibleContent.config(site)
10
+ return '' unless config.enabled? && config.json_ld['include_breadcrumbs']
11
+
12
+ url = page['url'] || '/'
13
+ segments = url.to_s.split('/').reject(&:empty?)
14
+ return '' if segments.empty?
15
+
16
+ items = [breadcrumb_item('Home', "#{config.site_url}/")]
17
+
18
+ path = ''
19
+ segments.each_with_index do |segment, idx|
20
+ path = "#{path}/#{segment}"
21
+ name = if idx == segments.size - 1
22
+ page['title'] || humanize(segment)
23
+ else
24
+ humanize(segment)
25
+ end
26
+
27
+ items << if idx == segments.size - 1
28
+ %(<li><span aria-current="page">#{name}</span></li>)
29
+ else
30
+ breadcrumb_item(name, "#{config.site_url}#{path}/")
31
+ end
32
+ end
33
+
34
+ %(<nav aria-label="Breadcrumb"><ol>#{items.join}</ol></nav>)
35
+ end
36
+
37
+ private
38
+
39
+ def breadcrumb_item(name, url)
40
+ %(<li><a href="#{url}">#{name}</a></li>)
41
+ end
42
+
43
+ def humanize(slug)
44
+ slug.gsub(/[-_]/, ' ').gsub(/\b\w/, &:upcase)
45
+ end
46
+ end
47
+ end
48
+ end
49
+
50
+ Liquid::Template.register_tag('ai_breadcrumbs', JekyllAiVisibleContent::Tags::AiBreadcrumbTag)
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllAiVisibleContent
4
+ module Tags
5
+ class AiEntityLinkTag < Liquid::Tag
6
+ SYNTAX = /\A\s*"([^"]+)"\s*\z/
7
+
8
+ def initialize(tag_name, markup, tokens)
9
+ super
10
+ match = markup.match(SYNTAX)
11
+ @entity_name = match ? match[1] : markup.strip.delete('"')
12
+ end
13
+
14
+ def render(context)
15
+ site = context.registers[:site]
16
+ config = JekyllAiVisibleContent.config(site)
17
+ return @entity_name unless config.enabled?
18
+
19
+ registry = Entity::Registry.new(config)
20
+ definition = registry.find_entity_by_name(@entity_name)
21
+
22
+ if definition
23
+ url = definition['url']
24
+ build_link(url, @entity_name)
25
+ else
26
+ @entity_name
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def build_link(url, name)
33
+ %(<a href="#{url}" itemprop="about" itemscope itemtype="https://schema.org/Thing">) +
34
+ %(<span itemprop="name">#{name}</span></a>)
35
+ end
36
+ end
37
+ end
38
+ end
39
+
40
+ Liquid::Template.register_tag('ai_entity_link', JekyllAiVisibleContent::Tags::AiEntityLinkTag)
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllAiVisibleContent
4
+ module Tags
5
+ class AiJsonLdTag < Liquid::Tag
6
+ def render(context)
7
+ site = context.registers[:site]
8
+ page = context.registers[:page]
9
+ config = JekyllAiVisibleContent.config(site)
10
+ return '' unless config.enabled?
11
+
12
+ registry = Entity::Registry.new(config)
13
+ builder = JsonLd::Builder.new(config, registry)
14
+
15
+ page_obj = find_page_object(site, page)
16
+ return '' unless page_obj
17
+
18
+ nodes = if homepage?(page)
19
+ builder.build_for_homepage
20
+ else
21
+ builder.build_for_page(page_obj)
22
+ end
23
+
24
+ return '' if nodes.empty?
25
+
26
+ skip_types = seo_tag_types(config)
27
+ nodes.reject! { |n| skip_types.include?(n['@type']) } if skip_types.any?
28
+
29
+ builder.to_script_tag(nodes)
30
+ end
31
+
32
+ private
33
+
34
+ def homepage?(page)
35
+ url = page['url'] || page['permalink']
36
+ ['/', '/index.html'].include?(url)
37
+ end
38
+
39
+ def find_page_object(site, page_hash)
40
+ url = page_hash['url'] || page_hash['permalink']
41
+ site.posts.docs.find { |p| p.url == url } ||
42
+ site.pages.find { |p| p.url == url }
43
+ end
44
+
45
+ def seo_tag_types(config)
46
+ return [] unless config.seo_tag_present?
47
+
48
+ %w[WebSite]
49
+ end
50
+ end
51
+ end
52
+ end
53
+
54
+ Liquid::Template.register_tag('ai_json_ld', JekyllAiVisibleContent::Tags::AiJsonLdTag)
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllAiVisibleContent
4
+ module Tags
5
+ class AiRelatedPostsTag < Liquid::Tag
6
+ LIMIT_SYNTAX = /limit\s*:\s*(\d+)/
7
+
8
+ def initialize(tag_name, markup, tokens)
9
+ super
10
+ match = markup.match(LIMIT_SYNTAX)
11
+ @limit = match ? match[1].to_i : nil
12
+ end
13
+
14
+ def render(context)
15
+ site = context.registers[:site]
16
+ page = context.registers[:page]
17
+ config = JekyllAiVisibleContent.config(site)
18
+ return '' unless config.enabled? && config.linking['enable_related_posts']
19
+
20
+ limit = @limit || config.linking['related_posts_limit'] || 3
21
+ page_obj = find_page_object(site, page)
22
+ return '' unless page_obj
23
+
24
+ related = find_related(site, page_obj, limit)
25
+ return '' if related.empty?
26
+
27
+ render_html(related)
28
+ end
29
+
30
+ private
31
+
32
+ def find_page_object(site, page_hash)
33
+ url = page_hash['url']
34
+ site.posts.docs.find { |p| p.url == url } || site.pages.find { |p| p.url == url }
35
+ end
36
+
37
+ def find_related(site, current, limit)
38
+ explicit = current.data['related_slugs']
39
+ if explicit&.any?
40
+ posts = explicit.filter_map do |slug|
41
+ site.posts.docs.find { |p| p.data['slug'] == slug || p.url.include?(slug) }
42
+ end
43
+ return posts.first(limit) if posts.any?
44
+ end
45
+
46
+ scored = site.posts.docs.reject { |p| p.url == current.url }.map do |post|
47
+ score = jaccard_similarity(current.data['tags'] || [], post.data['tags'] || []) * 3
48
+ score += jaccard_similarity(current.data['categories'] || [], post.data['categories'] || []) * 2
49
+ score += jaccard_similarity(current.data['topics'] || [], post.data['topics'] || [])
50
+ [post, score]
51
+ end
52
+
53
+ scored.select { |_, s| s.positive? }.sort_by { |_, s| -s }.first(limit).map(&:first)
54
+ end
55
+
56
+ def jaccard_similarity(set_a, set_b)
57
+ a = set_a.map(&:to_s).map(&:downcase)
58
+ b = set_b.map(&:to_s).map(&:downcase)
59
+ intersection = (a & b).size.to_f
60
+ union = (a | b).size.to_f
61
+ union.zero? ? 0.0 : intersection / union
62
+ end
63
+
64
+ def render_html(posts)
65
+ lines = []
66
+ lines << '<nav aria-label="Related posts">'
67
+ lines << ' <h2>Related Posts</h2>'
68
+ lines << ' <ul>'
69
+
70
+ posts.each do |post|
71
+ lines << ' <li itemscope itemtype="https://schema.org/BlogPosting">'
72
+ lines << " <a itemprop=\"url\" href=\"#{post.url}\">"
73
+ lines << " <span itemprop=\"headline\">#{post.data['title']}</span>"
74
+ lines << ' </a>'
75
+ if post.data['date']
76
+ dt = post.data['date'].strftime('%Y-%m-%d')
77
+ published = post.data['date'].strftime('%b %d, %Y')
78
+ lines << " <time itemprop=\"datePublished\" datetime=\"#{dt}\">#{published}</time>"
79
+ end
80
+ lines << ' </li>'
81
+ end
82
+
83
+ lines << ' </ul>'
84
+ lines << '</nav>'
85
+ lines.join("\n")
86
+ end
87
+ end
88
+ end
89
+ end
90
+
91
+ Liquid::Template.register_tag('ai_related_posts', JekyllAiVisibleContent::Tags::AiRelatedPostsTag)
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllAiVisibleContent
4
+ module Validators
5
+ class EntityConsistencyValidator
6
+ attr_reader :config, :site
7
+
8
+ def initialize(config, site)
9
+ @config = config
10
+ @site = site
11
+ end
12
+
13
+ def validate
14
+ warnings = []
15
+
16
+ warnings.concat(check_name_consistency) if config.validation['warn_name_inconsistency']
17
+ warnings.concat(check_missing_same_as) if config.validation['warn_missing_same_as']
18
+ warnings.concat(check_missing_dates) if config.validation['warn_missing_dates']
19
+ warnings.concat(check_missing_descriptions) if config.validation['warn_missing_descriptions']
20
+ warnings.concat(check_generic_titles)
21
+
22
+ warnings
23
+ end
24
+
25
+ private
26
+
27
+ def check_name_consistency
28
+ warnings = []
29
+ canonical = config.entity['name']
30
+ return warnings unless canonical
31
+
32
+ author_config = site.config['author']
33
+ site_author = author_config.is_a?(Hash) ? author_config['name'] : author_config
34
+ if site_author.is_a?(String) && site_author != canonical
35
+ warnings << "Name inconsistency: site.author='#{site_author}' differs from entity.name='#{canonical}'"
36
+ end
37
+
38
+ site.posts.docs.each do |post|
39
+ author = post.data['author']
40
+ next unless author.is_a?(String) && author != canonical
41
+
42
+ warnings << "Name inconsistency in #{post.relative_path}: author='#{author}' differs from '#{canonical}'"
43
+ end
44
+
45
+ warnings
46
+ end
47
+
48
+ def check_missing_same_as
49
+ same_as = config.entity['same_as']
50
+ return [] if same_as&.any?
51
+
52
+ ['Entity has no sameAs links to external profiles (LinkedIn, GitHub, etc.)']
53
+ end
54
+
55
+ def check_missing_dates
56
+ warnings = []
57
+ site.posts.docs.each do |post|
58
+ next if post.data['last_modified_at']
59
+
60
+ warnings << "Missing last_modified_at in #{post.relative_path} (freshness scoring disabled)"
61
+ end
62
+ warnings
63
+ end
64
+
65
+ def check_missing_descriptions
66
+ warnings = []
67
+ all_docs.each do |doc|
68
+ next if doc.data['description'] && !doc.data['description'].to_s.strip.empty?
69
+
70
+ warnings << "Missing description in #{doc.respond_to?(:relative_path) ? doc.relative_path : doc.url}"
71
+ end
72
+ warnings
73
+ end
74
+
75
+ def check_generic_titles
76
+ warnings = []
77
+ generic = %w[about blog home page post]
78
+
79
+ all_docs.each do |doc|
80
+ title = doc.data['title'].to_s.strip.downcase
81
+ next unless generic.include?(title)
82
+
83
+ path = doc.respond_to?(:relative_path) ? doc.relative_path : doc.url
84
+ warnings << "Generic title '#{doc.data['title']}' in #{path} (include entity name for discoverability)"
85
+ end
86
+ warnings
87
+ end
88
+
89
+ def all_docs
90
+ site.posts.docs + site.pages
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllAiVisibleContent
4
+ module Validators
5
+ class JsonLdValidator
6
+ REQUIRED_PERSON_FIELDS = %w[@type @id name].freeze
7
+ REQUIRED_POSTING_FIELDS = %w[@type headline author].freeze
8
+
9
+ attr_reader :config
10
+
11
+ def initialize(config)
12
+ @config = config
13
+ end
14
+
15
+ def validate
16
+ errors = []
17
+ errors.concat(validate_entity_config)
18
+ errors
19
+ end
20
+
21
+ def validate_node(node)
22
+ errors = []
23
+ return errors unless node.is_a?(Hash)
24
+
25
+ case node['@type']
26
+ when 'Person'
27
+ errors.concat(validate_fields(node, REQUIRED_PERSON_FIELDS, 'Person'))
28
+ when 'BlogPosting'
29
+ errors.concat(validate_fields(node, REQUIRED_POSTING_FIELDS, 'BlogPosting'))
30
+ end
31
+
32
+ errors
33
+ end
34
+
35
+ private
36
+
37
+ def validate_entity_config
38
+ errors = []
39
+ entity = config.entity
40
+
41
+ unless entity['name'] && !entity['name'].strip.empty?
42
+ errors << 'Entity name is required in ai_visible_content.entity.name'
43
+ end
44
+
45
+ if entity['id_slug'].nil? && entity['name'].nil?
46
+ errors << 'Entity requires either id_slug or name to generate @id'
47
+ end
48
+
49
+ errors
50
+ end
51
+
52
+ def validate_fields(node, required, type_name)
53
+ missing = required.select { |f| node[f].nil? || node[f].to_s.strip.empty? }
54
+ missing.map { |f| "#{type_name} JSON-LD missing required field: #{f}" }
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllAiVisibleContent
4
+ module Validators
5
+ class LinkValidator
6
+ attr_reader :config, :site
7
+
8
+ def initialize(config, site)
9
+ @config = config
10
+ @site = site
11
+ end
12
+
13
+ def validate
14
+ warnings = []
15
+ warnings.concat(check_orphan_pages) if config.validation['warn_orphan_pages']
16
+ warnings
17
+ end
18
+
19
+ private
20
+
21
+ def check_orphan_pages
22
+ orphans = site.data['ai_orphan_pages'] || []
23
+ orphans.map { |url| "Orphan page (no inbound links): #{url}" }
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllAiVisibleContent
4
+ VERSION = '0.1.0'
5
+ end