jekyll-ai-visible-content 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +39 -0
- data/CHANGELOG.md +12 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +227 -0
- data/LICENSE.txt +21 -0
- data/README.md +352 -0
- data/Rakefile +9 -0
- data/jekyll-ai-visible-content.gemspec +29 -0
- data/lib/jekyll-ai-visible-content.rb +47 -0
- data/lib/jekyll_ai_visible_content/configuration.rb +154 -0
- data/lib/jekyll_ai_visible_content/entity/organization.rb +68 -0
- data/lib/jekyll_ai_visible_content/entity/person.rb +114 -0
- data/lib/jekyll_ai_visible_content/entity/registry.rb +94 -0
- data/lib/jekyll_ai_visible_content/filters/entity_filter.rb +29 -0
- data/lib/jekyll_ai_visible_content/filters/naming_filter.rb +27 -0
- data/lib/jekyll_ai_visible_content/generators/content_graph_generator.rb +69 -0
- data/lib/jekyll_ai_visible_content/generators/entity_map_generator.rb +65 -0
- data/lib/jekyll_ai_visible_content/generators/llms_txt_generator.rb +170 -0
- data/lib/jekyll_ai_visible_content/generators/robots_txt_generator.rb +57 -0
- data/lib/jekyll_ai_visible_content/hooks/post_render_hook.rb +82 -0
- data/lib/jekyll_ai_visible_content/hooks/validate_hook.rb +49 -0
- data/lib/jekyll_ai_visible_content/json_ld/blog_posting_schema.rb +104 -0
- data/lib/jekyll_ai_visible_content/json_ld/breadcrumb_schema.rb +69 -0
- data/lib/jekyll_ai_visible_content/json_ld/builder.rb +64 -0
- data/lib/jekyll_ai_visible_content/json_ld/collection_schema.rb +47 -0
- data/lib/jekyll_ai_visible_content/json_ld/faq_schema.rb +37 -0
- data/lib/jekyll_ai_visible_content/json_ld/how_to_schema.rb +42 -0
- data/lib/jekyll_ai_visible_content/json_ld/person_schema.rb +18 -0
- data/lib/jekyll_ai_visible_content/json_ld/website_schema.rb +39 -0
- data/lib/jekyll_ai_visible_content/tags/ai_author_tag.rb +26 -0
- data/lib/jekyll_ai_visible_content/tags/ai_breadcrumb_tag.rb +50 -0
- data/lib/jekyll_ai_visible_content/tags/ai_entity_link_tag.rb +40 -0
- data/lib/jekyll_ai_visible_content/tags/ai_json_ld_tag.rb +54 -0
- data/lib/jekyll_ai_visible_content/tags/ai_related_posts_tag.rb +91 -0
- data/lib/jekyll_ai_visible_content/validators/entity_consistency_validator.rb +94 -0
- data/lib/jekyll_ai_visible_content/validators/json_ld_validator.rb +58 -0
- data/lib/jekyll_ai_visible_content/validators/link_validator.rb +27 -0
- data/lib/jekyll_ai_visible_content/version.rb +5 -0
- metadata +107 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'lib/jekyll_ai_visible_content/version'
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = 'jekyll-ai-visible-content'
|
|
7
|
+
spec.version = JekyllAiVisibleContent::VERSION
|
|
8
|
+
spec.authors = ['madmatvey']
|
|
9
|
+
spec.email = ['potehin@gmail.com']
|
|
10
|
+
|
|
11
|
+
spec.summary = 'Jekyll plugin that maximizes AI search discoverability'
|
|
12
|
+
spec.description = 'Adds rich JSON-LD structured data, llms.txt, semantic HTML helpers, ' \
|
|
13
|
+
'entity identity management, and AI crawler policies to Jekyll sites.'
|
|
14
|
+
spec.homepage = 'https://github.com/madmatvey/jekyll-ai-visible-content'
|
|
15
|
+
spec.license = 'MIT'
|
|
16
|
+
|
|
17
|
+
spec.required_ruby_version = '>= 3.2'
|
|
18
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
|
19
|
+
spec.metadata['source_code_uri'] = spec.homepage
|
|
20
|
+
spec.metadata['changelog_uri'] = "#{spec.homepage}/blob/master/CHANGELOG.md"
|
|
21
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
|
22
|
+
|
|
23
|
+
spec.files = Dir.chdir(__dir__) do
|
|
24
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:spec|\.github)/}) }
|
|
25
|
+
end
|
|
26
|
+
spec.require_paths = ['lib']
|
|
27
|
+
|
|
28
|
+
spec.add_dependency 'jekyll', '>= 4.0', '< 5.0'
|
|
29
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'jekyll'
|
|
4
|
+
require 'json'
|
|
5
|
+
|
|
6
|
+
require_relative 'jekyll_ai_visible_content/version'
|
|
7
|
+
require_relative 'jekyll_ai_visible_content/configuration'
|
|
8
|
+
require_relative 'jekyll_ai_visible_content/entity/person'
|
|
9
|
+
require_relative 'jekyll_ai_visible_content/entity/organization'
|
|
10
|
+
require_relative 'jekyll_ai_visible_content/entity/registry'
|
|
11
|
+
require_relative 'jekyll_ai_visible_content/json_ld/builder'
|
|
12
|
+
require_relative 'jekyll_ai_visible_content/json_ld/person_schema'
|
|
13
|
+
require_relative 'jekyll_ai_visible_content/json_ld/blog_posting_schema'
|
|
14
|
+
require_relative 'jekyll_ai_visible_content/json_ld/website_schema'
|
|
15
|
+
require_relative 'jekyll_ai_visible_content/json_ld/breadcrumb_schema'
|
|
16
|
+
require_relative 'jekyll_ai_visible_content/json_ld/faq_schema'
|
|
17
|
+
require_relative 'jekyll_ai_visible_content/json_ld/how_to_schema'
|
|
18
|
+
require_relative 'jekyll_ai_visible_content/json_ld/collection_schema'
|
|
19
|
+
require_relative 'jekyll_ai_visible_content/generators/llms_txt_generator'
|
|
20
|
+
require_relative 'jekyll_ai_visible_content/generators/robots_txt_generator'
|
|
21
|
+
require_relative 'jekyll_ai_visible_content/generators/entity_map_generator'
|
|
22
|
+
require_relative 'jekyll_ai_visible_content/generators/content_graph_generator'
|
|
23
|
+
require_relative 'jekyll_ai_visible_content/tags/ai_json_ld_tag'
|
|
24
|
+
require_relative 'jekyll_ai_visible_content/tags/ai_author_tag'
|
|
25
|
+
require_relative 'jekyll_ai_visible_content/tags/ai_entity_link_tag'
|
|
26
|
+
require_relative 'jekyll_ai_visible_content/tags/ai_related_posts_tag'
|
|
27
|
+
require_relative 'jekyll_ai_visible_content/tags/ai_breadcrumb_tag'
|
|
28
|
+
require_relative 'jekyll_ai_visible_content/filters/naming_filter'
|
|
29
|
+
require_relative 'jekyll_ai_visible_content/filters/entity_filter'
|
|
30
|
+
require_relative 'jekyll_ai_visible_content/hooks/post_render_hook'
|
|
31
|
+
require_relative 'jekyll_ai_visible_content/hooks/validate_hook'
|
|
32
|
+
require_relative 'jekyll_ai_visible_content/validators/entity_consistency_validator'
|
|
33
|
+
require_relative 'jekyll_ai_visible_content/validators/json_ld_validator'
|
|
34
|
+
require_relative 'jekyll_ai_visible_content/validators/link_validator'
|
|
35
|
+
|
|
36
|
+
module JekyllAiVisibleContent
|
|
37
|
+
class Error < StandardError; end
|
|
38
|
+
|
|
39
|
+
def self.config(site)
|
|
40
|
+
@configs ||= {}.compare_by_identity
|
|
41
|
+
@configs[site] ||= Configuration.new(site)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def self.reset!
|
|
45
|
+
@configs = {}.compare_by_identity
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
class Configuration
|
|
5
|
+
CONFIG_KEY = 'ai_visible_content'
|
|
6
|
+
|
|
7
|
+
DEFAULTS = {
|
|
8
|
+
'enabled' => true,
|
|
9
|
+
'entity' => {
|
|
10
|
+
'type' => 'Person',
|
|
11
|
+
'id_slug' => nil,
|
|
12
|
+
'name' => nil,
|
|
13
|
+
'alternate_names' => [],
|
|
14
|
+
'job_title' => nil,
|
|
15
|
+
'description' => nil,
|
|
16
|
+
'image' => nil,
|
|
17
|
+
'email' => nil,
|
|
18
|
+
'location' => { 'locality' => nil, 'country' => nil },
|
|
19
|
+
'knows_about' => [],
|
|
20
|
+
'same_as' => [],
|
|
21
|
+
'works_for' => nil,
|
|
22
|
+
'occupation' => nil
|
|
23
|
+
},
|
|
24
|
+
'json_ld' => {
|
|
25
|
+
'auto_inject' => true,
|
|
26
|
+
'include_website_schema' => true,
|
|
27
|
+
'include_breadcrumbs' => true,
|
|
28
|
+
'include_blog_posting' => true,
|
|
29
|
+
'include_faq' => true,
|
|
30
|
+
'include_how_to' => true,
|
|
31
|
+
'article_body' => 'excerpt',
|
|
32
|
+
'compact' => false
|
|
33
|
+
},
|
|
34
|
+
'crawlers' => {
|
|
35
|
+
'allow_gptbot' => true,
|
|
36
|
+
'allow_perplexitybot' => true,
|
|
37
|
+
'allow_claudebot' => true,
|
|
38
|
+
'allow_googlebot' => true,
|
|
39
|
+
'allow_bingbot' => true,
|
|
40
|
+
'custom_rules' => [],
|
|
41
|
+
'generate_robots_txt' => true
|
|
42
|
+
},
|
|
43
|
+
'llms_txt' => {
|
|
44
|
+
'enabled' => true,
|
|
45
|
+
'title' => nil,
|
|
46
|
+
'description' => nil,
|
|
47
|
+
'sections' => [],
|
|
48
|
+
'include_full_text' => true
|
|
49
|
+
},
|
|
50
|
+
'linking' => {
|
|
51
|
+
'enable_entity_links' => true,
|
|
52
|
+
'entity_definitions' => {},
|
|
53
|
+
'max_links_per_entity_per_post' => 1,
|
|
54
|
+
'enable_related_posts' => true,
|
|
55
|
+
'related_posts_limit' => 3
|
|
56
|
+
},
|
|
57
|
+
'validation' => {
|
|
58
|
+
'warn_name_inconsistency' => true,
|
|
59
|
+
'warn_missing_same_as' => true,
|
|
60
|
+
'warn_missing_dates' => true,
|
|
61
|
+
'warn_orphan_pages' => true,
|
|
62
|
+
'warn_missing_descriptions' => true,
|
|
63
|
+
'fail_build_on_error' => false
|
|
64
|
+
}
|
|
65
|
+
}.freeze
|
|
66
|
+
|
|
67
|
+
attr_reader :site
|
|
68
|
+
|
|
69
|
+
def initialize(site)
|
|
70
|
+
@site = site
|
|
71
|
+
@raw = deep_merge(DEFAULTS, site.config.fetch(CONFIG_KEY, {}))
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def enabled?
|
|
75
|
+
@raw['enabled'] == true
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def entity
|
|
79
|
+
@raw['entity']
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def entity_id
|
|
83
|
+
slug = entity['id_slug'] || normalize_slug(entity['name'])
|
|
84
|
+
"#{site_url}/##{slug}" if slug
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def entity_type
|
|
88
|
+
entity['type'] || 'Person'
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def json_ld
|
|
92
|
+
@raw['json_ld']
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def crawlers
|
|
96
|
+
@raw['crawlers']
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def llms_txt
|
|
100
|
+
@raw['llms_txt']
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def linking
|
|
104
|
+
@raw['linking']
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def validation
|
|
108
|
+
@raw['validation']
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def site_url
|
|
112
|
+
@site.config['url'] || ''
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def site_title
|
|
116
|
+
@site.config['title'] || entity['name'] || ''
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def site_description
|
|
120
|
+
@site.config['description'] || entity['description'] || ''
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def seo_tag_present?
|
|
124
|
+
@site.config['plugins']&.include?('jekyll-seo-tag') ||
|
|
125
|
+
Gem.loaded_specs.key?('jekyll-seo-tag')
|
|
126
|
+
rescue StandardError
|
|
127
|
+
false
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def [](key)
|
|
131
|
+
@raw[key]
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
private
|
|
135
|
+
|
|
136
|
+
def normalize_slug(value)
|
|
137
|
+
return nil unless value
|
|
138
|
+
|
|
139
|
+
value.downcase.gsub(/[^a-z0-9]+/, '-').gsub(/(^-|-$)/, '')
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def deep_merge(base, override)
|
|
143
|
+
base.each_with_object(base.dup) do |(key, base_val), result|
|
|
144
|
+
next unless override.key?(key)
|
|
145
|
+
|
|
146
|
+
result[key] = if base_val.is_a?(Hash) && override[key].is_a?(Hash)
|
|
147
|
+
deep_merge(base_val, override[key])
|
|
148
|
+
else
|
|
149
|
+
override[key]
|
|
150
|
+
end
|
|
151
|
+
end.merge(override.reject { |k, _| base.key?(k) })
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Entity
|
|
5
|
+
class Organization
|
|
6
|
+
attr_reader :config
|
|
7
|
+
|
|
8
|
+
def initialize(config)
|
|
9
|
+
@config = config
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def to_hash
|
|
13
|
+
entity = config.entity
|
|
14
|
+
data = {
|
|
15
|
+
'@type' => 'Organization',
|
|
16
|
+
'@id' => config.entity_id,
|
|
17
|
+
'name' => entity['name'],
|
|
18
|
+
'url' => config.site_url
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
append_image(data, entity)
|
|
22
|
+
append_description(data, entity)
|
|
23
|
+
append_address(data, entity)
|
|
24
|
+
append_same_as(data, entity)
|
|
25
|
+
|
|
26
|
+
data.compact
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
def append_image(data, entity)
|
|
32
|
+
return unless entity['image']
|
|
33
|
+
|
|
34
|
+
url = absolute_url(entity['image'])
|
|
35
|
+
data['logo'] = {
|
|
36
|
+
'@type' => 'ImageObject',
|
|
37
|
+
'url' => url
|
|
38
|
+
}
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def append_description(data, entity)
|
|
42
|
+
data['description'] = entity['description']&.strip if entity['description']
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def append_address(data, entity)
|
|
46
|
+
loc = entity['location']
|
|
47
|
+
return unless loc && (loc['locality'] || loc['country'])
|
|
48
|
+
|
|
49
|
+
data['address'] = {
|
|
50
|
+
'@type' => 'PostalAddress',
|
|
51
|
+
'addressLocality' => loc['locality'],
|
|
52
|
+
'addressCountry' => loc['country']
|
|
53
|
+
}.compact
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def append_same_as(data, entity)
|
|
57
|
+
links = entity['same_as']
|
|
58
|
+
data['sameAs'] = links if links&.any?
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def absolute_url(path)
|
|
62
|
+
return path if path&.start_with?('http')
|
|
63
|
+
|
|
64
|
+
"#{config.site_url}#{path}"
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Entity
|
|
5
|
+
class Person
|
|
6
|
+
attr_reader :config
|
|
7
|
+
|
|
8
|
+
def initialize(config)
|
|
9
|
+
@config = config
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def to_hash
|
|
13
|
+
entity = config.entity
|
|
14
|
+
data = {
|
|
15
|
+
'@type' => 'Person',
|
|
16
|
+
'@id' => config.entity_id,
|
|
17
|
+
'name' => entity['name'],
|
|
18
|
+
'url' => config.site_url
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
append_alternate_names(data, entity)
|
|
22
|
+
append_image(data, entity)
|
|
23
|
+
append_simple_fields(data, entity)
|
|
24
|
+
append_address(data, entity)
|
|
25
|
+
append_knows_about(data, entity)
|
|
26
|
+
append_same_as(data, entity)
|
|
27
|
+
append_works_for(data, entity)
|
|
28
|
+
append_occupation(data, entity)
|
|
29
|
+
|
|
30
|
+
data.compact
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
private
|
|
34
|
+
|
|
35
|
+
def append_alternate_names(data, entity)
|
|
36
|
+
names = entity['alternate_names']
|
|
37
|
+
data['alternateName'] = names if names&.any?
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def append_image(data, entity)
|
|
41
|
+
return unless entity['image']
|
|
42
|
+
|
|
43
|
+
url = absolute_url(entity['image'])
|
|
44
|
+
data['image'] = {
|
|
45
|
+
'@type' => 'ImageObject',
|
|
46
|
+
'url' => url
|
|
47
|
+
}
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def append_simple_fields(data, entity)
|
|
51
|
+
data['jobTitle'] = entity['job_title'] if entity['job_title']
|
|
52
|
+
data['description'] = entity['description']&.strip if entity['description']
|
|
53
|
+
data['email'] = entity['email'] if entity['email']
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def append_address(data, entity)
|
|
57
|
+
loc = entity['location']
|
|
58
|
+
return unless loc && (loc['locality'] || loc['country'])
|
|
59
|
+
|
|
60
|
+
data['address'] = {
|
|
61
|
+
'@type' => 'PostalAddress',
|
|
62
|
+
'addressLocality' => loc['locality'],
|
|
63
|
+
'addressCountry' => loc['country']
|
|
64
|
+
}.compact
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def append_knows_about(data, entity)
|
|
68
|
+
items = entity['knows_about']
|
|
69
|
+
data['knowsAbout'] = items if items&.any?
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def append_same_as(data, entity)
|
|
73
|
+
links = entity['same_as']
|
|
74
|
+
data['sameAs'] = links if links&.any?
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def append_works_for(data, entity)
|
|
78
|
+
wf = entity['works_for']
|
|
79
|
+
return unless wf
|
|
80
|
+
|
|
81
|
+
data['worksFor'] = {
|
|
82
|
+
'@type' => wf['type'] || 'Organization',
|
|
83
|
+
'name' => wf['name']
|
|
84
|
+
}.compact
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def append_occupation(data, entity)
|
|
88
|
+
occ = entity['occupation']
|
|
89
|
+
return unless occ
|
|
90
|
+
|
|
91
|
+
occupation = {
|
|
92
|
+
'@type' => 'Occupation',
|
|
93
|
+
'name' => occ['name']
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if occ['location_country']
|
|
97
|
+
occupation['occupationLocation'] = {
|
|
98
|
+
'@type' => 'Country',
|
|
99
|
+
'name' => occ['location_country']
|
|
100
|
+
}
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
occupation['skills'] = occ['skills'] if occ['skills']
|
|
104
|
+
data['hasOccupation'] = occupation.compact
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def absolute_url(path)
|
|
108
|
+
return path if path&.start_with?('http')
|
|
109
|
+
|
|
110
|
+
"#{config.site_url}#{path}"
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Entity
|
|
5
|
+
class Registry
|
|
6
|
+
attr_reader :config, :primary_entity, :topic_entities
|
|
7
|
+
|
|
8
|
+
def initialize(config)
|
|
9
|
+
@config = config
|
|
10
|
+
@primary_entity = build_primary_entity
|
|
11
|
+
@topic_entities = build_topic_entities
|
|
12
|
+
@mention_counts = Hash.new(0)
|
|
13
|
+
@entity_pages = Hash.new { |h, k| h[k] = [] }
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def primary_entity_hash
|
|
17
|
+
@primary_entity_hash ||= primary_entity&.to_hash
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def primary_entity_ref
|
|
21
|
+
return nil unless config.entity_id
|
|
22
|
+
|
|
23
|
+
{ '@type' => config.entity_type, '@id' => config.entity_id }
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def record_mention(entity_name, page_url)
|
|
27
|
+
normalized = normalize_name(entity_name)
|
|
28
|
+
@mention_counts[normalized] += 1
|
|
29
|
+
@entity_pages[normalized] << page_url unless @entity_pages[normalized].include?(page_url)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def mention_count(entity_name)
|
|
33
|
+
@mention_counts[normalize_name(entity_name)]
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def pages_for(entity_name)
|
|
37
|
+
@entity_pages[normalize_name(entity_name)]
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def all_mentions
|
|
41
|
+
@mention_counts.dup
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def topic_url(topic_name)
|
|
45
|
+
slug = normalize_name(topic_name).gsub(/\s+/, '-')
|
|
46
|
+
"/topics/#{slug}/"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def entity_definitions
|
|
50
|
+
defs = config.linking['entity_definitions'] || {}
|
|
51
|
+
knows = config.entity['knows_about'] || []
|
|
52
|
+
|
|
53
|
+
knows.each_with_object(defs.dup) do |topic, result|
|
|
54
|
+
slug = normalize_name(topic).gsub(/\s+/, '-')
|
|
55
|
+
next if result.key?(slug)
|
|
56
|
+
|
|
57
|
+
result[slug] = {
|
|
58
|
+
'name' => topic,
|
|
59
|
+
'url' => topic_url(topic),
|
|
60
|
+
'description' => nil
|
|
61
|
+
}
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def find_entity_by_name(name)
|
|
66
|
+
normalized = normalize_name(name)
|
|
67
|
+
entity_definitions.values.find { |d| normalize_name(d['name']) == normalized }
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
private
|
|
71
|
+
|
|
72
|
+
def build_primary_entity
|
|
73
|
+
return nil unless config.entity['name']
|
|
74
|
+
|
|
75
|
+
case config.entity_type
|
|
76
|
+
when 'Person'
|
|
77
|
+
Person.new(config)
|
|
78
|
+
when 'Organization'
|
|
79
|
+
Organization.new(config)
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def build_topic_entities
|
|
84
|
+
(config.entity['knows_about'] || []).map do |topic|
|
|
85
|
+
{ 'name' => topic, 'slug' => normalize_name(topic).gsub(/\s+/, '-') }
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def normalize_name(name)
|
|
90
|
+
name.to_s.strip.downcase
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Filters
|
|
5
|
+
module EntityFilter
|
|
6
|
+
def ai_entity_name(input)
|
|
7
|
+
site = @context.registers[:site]
|
|
8
|
+
config = JekyllAiVisibleContent.config(site)
|
|
9
|
+
config.entity['name'] || input
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def ai_entity_id(input)
|
|
13
|
+
site = @context.registers[:site]
|
|
14
|
+
config = JekyllAiVisibleContent.config(site)
|
|
15
|
+
config.entity_id || input
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def ai_entity_url(entity_name)
|
|
19
|
+
site = @context.registers[:site]
|
|
20
|
+
config = JekyllAiVisibleContent.config(site)
|
|
21
|
+
registry = Entity::Registry.new(config)
|
|
22
|
+
definition = registry.find_entity_by_name(entity_name)
|
|
23
|
+
definition ? definition['url'] : '#'
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
Liquid::Template.register_filter(JekyllAiVisibleContent::Filters::EntityFilter)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Filters
|
|
5
|
+
module NamingFilter
|
|
6
|
+
def ai_slugify(input)
|
|
7
|
+
input.to_s.strip.downcase
|
|
8
|
+
.gsub(/[^a-z0-9\s-]/, '')
|
|
9
|
+
.gsub(/\s+/, '-')
|
|
10
|
+
.gsub(/-+/, '-')
|
|
11
|
+
.gsub(/(^-|-$)/, '')
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def ai_entity_slug(input)
|
|
15
|
+
ai_slugify(input)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def ai_canonical_url(input)
|
|
19
|
+
url = input.to_s.strip
|
|
20
|
+
url = "#{url}/" unless url.end_with?('/') || url.match?(/\.\w+$/)
|
|
21
|
+
url
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
Liquid::Template.register_filter(JekyllAiVisibleContent::Filters::NamingFilter)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module JekyllAiVisibleContent
|
|
4
|
+
module Generators
|
|
5
|
+
class ContentGraphGenerator < Jekyll::Generator
|
|
6
|
+
safe true
|
|
7
|
+
priority :lowest
|
|
8
|
+
|
|
9
|
+
def generate(site)
|
|
10
|
+
config = JekyllAiVisibleContent.config(site)
|
|
11
|
+
return unless config.enabled?
|
|
12
|
+
|
|
13
|
+
graph = build_link_graph(site, config)
|
|
14
|
+
orphans = find_orphans(graph, site)
|
|
15
|
+
|
|
16
|
+
site.data['ai_content_graph'] = graph
|
|
17
|
+
site.data['ai_orphan_pages'] = orphans
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
private
|
|
21
|
+
|
|
22
|
+
def build_link_graph(site, config)
|
|
23
|
+
graph = Hash.new { |h, k| h[k] = { 'outbound' => [], 'inbound' => [] } }
|
|
24
|
+
|
|
25
|
+
all_docs(site).each do |doc|
|
|
26
|
+
source_url = doc.url
|
|
27
|
+
links = extract_internal_links(doc.content || '', config.site_url)
|
|
28
|
+
|
|
29
|
+
links.each do |target_url|
|
|
30
|
+
graph[source_url]['outbound'] << target_url unless graph[source_url]['outbound'].include?(target_url)
|
|
31
|
+
graph[target_url]['inbound'] << source_url unless graph[target_url]['inbound'].include?(source_url)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
graph
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def find_orphans(graph, site)
|
|
39
|
+
all_urls = all_docs(site).map(&:url)
|
|
40
|
+
all_urls.select { |url| (graph[url]['inbound'] || []).empty? && url != '/' }
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def extract_internal_links(content, site_url)
|
|
44
|
+
links = []
|
|
45
|
+
content.scan(/href=["']([^"']+)["']/) do |match|
|
|
46
|
+
href = match[0]
|
|
47
|
+
next if href.start_with?('#', 'mailto:', 'tel:', 'javascript:')
|
|
48
|
+
|
|
49
|
+
if href.start_with?('/')
|
|
50
|
+
links << normalize_url(href)
|
|
51
|
+
elsif href.start_with?(site_url)
|
|
52
|
+
links << normalize_url(href.sub(site_url, ''))
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
links.uniq
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def normalize_url(url)
|
|
59
|
+
path = url.split('?').first.split('#').first
|
|
60
|
+
path = "#{path}/" unless path.end_with?('/') || path.match?(/\.\w+$/)
|
|
61
|
+
path
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def all_docs(site)
|
|
65
|
+
site.posts.docs + site.pages
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|