jekyll-ai-visible-content 0.1.0 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d6df8740bf4a545461ef47c5e1e40f2c957d0653f35980d5bd4debd69c3a2f2c
4
- data.tar.gz: 1efb32d2987403177eb39b901d3c082090020a77fe328b156e92c19ac188b095
3
+ metadata.gz: 70c2a75f924f17bb54ccd80796dc0da7f6ee26aeed7e7dc8555be88ad95c2497
4
+ data.tar.gz: c13846d123a0e9a81e3d4cea3bd31a5e81b9af24885e704cf5a8c586fd4220f9
5
5
  SHA512:
6
- metadata.gz: 46ca4cd8957244c6664fdf68af90f4cf1eb37b29015672b75ecc21dd2158ef09eea295028bafe52dbce10bcc48fd0449f9c33c8cb7b8306926229efc3e71b1b8
7
- data.tar.gz: 01e325c0f40da46e82c33c677fe5e910de2e5db9f43160398a95aaf223efab1bec1591042be869bf95badd11adb834c8a3a18c1024789b75e3f609537e40906c
6
+ metadata.gz: 388b85f05e6b0e3ac72f7c09f3e278c6505c0a640c6e307882c57e7de9716eb6671dd9534102f4a77aa32c41cc54a71d9578f52158ce5263a0af00f23a21a11a
7
+ data.tar.gz: fc73eef2bf48aa2867e6c146f56644f6962c104f079cbd4bf02c512f8ba047809823bcacab390b9f77b1e278ecad5b9a1aec56040c2bae21c5629be236991f10
data/.gitignore CHANGED
@@ -1 +1,3 @@
1
- .cursor
1
+ .cursor
2
+ *.gem
3
+ .playwright-mcp/
data/CHANGELOG.md CHANGED
@@ -1,6 +1,71 @@
1
1
  # Changelog
2
2
 
3
- ## 0.1.0 (Unreleased)
3
+ ## 0.4.6 (2026-04-07)
4
+
5
+ - Fix entity auto-linking to avoid nested `<a>` tags by skipping replacements inside existing anchor blocks
6
+ - Add integration regression coverage for homepage nested-anchor prevention
7
+ - Resolve remaining RuboCop style offense in `EntityClassifier`
8
+
9
+ ## 0.4.5 (2026-04-07)
10
+
11
+ - Apply safe layout fix by moving `link[rel="ai:*"]` injection into `<head>` while keeping AI instruction block before `</body>`
12
+ - Avoid appending raw `<link>` elements at the end of `<body>` to prevent theme/script edge-case rendering issues
13
+ - Keep AI resource discovery behavior unchanged for JSON/YAML/Markdown links
14
+
15
+ ## 0.4.4 (2026-04-07)
16
+
17
+ - Refine AI page markdown output to exclude full Jekyll front matter and keep only AI-relevant intro metadata
18
+ - Build structured AI-readable markdown preface from `title`, `subtitle`, and `description`
19
+ - Keep body content markdown while stripping Liquid/Jekyll template directives for cleaner LLM ingestion
20
+
21
+ ## 0.4.3 (2026-04-07)
22
+
23
+ - Serve `/ai/page/*.md` as raw markdown output (not HTML-rendered) by generating text-backed pages with `.md` permalinks
24
+ - Strip Liquid/Jekyll service tags (`{% ... %}`, `{{ ... }}`, comment blocks) from AI markdown content for cleaner machine-readable text
25
+ - Read markdown content from source files to avoid leaking internal Jekyll runtime objects into AI resources
26
+
27
+ ## 0.4.2 (2026-04-07)
28
+
29
+ - Ensure AI link and instruction injection also works reliably on home and about pages via URL normalization/fallback lookup
30
+ - Generate page-level markdown resources under `/ai/page/<slug>.md` using real source front matter/content instead of entity summary markdown
31
+ - Improve markdown resource slug normalization and add coverage for home-page injection and page-markdown outputs
32
+
33
+ ## 0.4.1 (2026-04-07)
34
+
35
+ - Add fallback entity classification for general articles (derive stable topic slug from page URL/title when explicit entities are absent)
36
+ - Ensure AI resources are generated for ordinary posts/pages so AI link injection can still occur
37
+ - Add regression test coverage for general-article fallback classification
38
+
39
+ ## 0.4.0 (2026-04-07)
40
+
41
+ - Add automatic AI resource generation per content page with deterministic `/ai/<type>/<slug>.{json,yml,md}` outputs
42
+ - Add content-aware entity classification heuristics (person/entity/topic) from front matter and page content
43
+ - Inject `<link rel="ai:*">` tags and AI parsing instruction block before `</body>` in rendered HTML
44
+ - Add `{% ai_resource_links %}` Liquid fallback for manual layout integration
45
+ - Exclude generated `/ai/` resources from content filtering/orphan detection
46
+ - Add unit and integration coverage for AI resource generation and HTML injection flow
47
+
48
+ ## 0.3.0 (2026-04-07)
49
+
50
+ - Fix false positives for orphan-page detection by analyzing rendered HTML instead of raw source content
51
+ - Build inbound-link graph from final `<a href>` values produced by Liquid/layout rendering
52
+ - Add canonical URL normalization for orphan analysis:
53
+ - strip query strings and hash fragments
54
+ - normalize `index.html` to directory URLs
55
+ - normalize trailing slashes for non-file paths
56
+ - resolve absolute internal URLs and handle `baseurl`
57
+ - Add regression tests for Liquid-generated links and URL normalization in content graph
58
+
59
+ ## 0.2.0 (2026-04-07)
60
+
61
+ - Add shared content filtering module to reduce validator noise on assets/generated pages
62
+ - Improve entity consistency checks with `entity.author_aliases` and `_data/authors.yml` resolution
63
+ - Add robots.txt conflict detection to skip generation when a site already provides `robots.txt`
64
+ - Add grouped validation output with counts and configurable examples (`validation.max_examples`)
65
+ - Add new validation config defaults: `content_only`, `exclude_paths`, `verbose`, `max_examples`
66
+ - Filter entity-map mention scanning to authored content pages only
67
+
68
+ ## 0.1.0
4
69
 
5
70
  - Initial release
6
71
  - JSON-LD generation: Person, BlogPosting, WebSite, BreadcrumbList, FAQPage, HowTo
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- jekyll-ai-visible-content (0.1.0)
4
+ jekyll-ai-visible-content (0.4.7)
5
5
  jekyll (>= 4.0, < 5.0)
6
6
 
7
7
  GEM
data/README.md CHANGED
@@ -90,6 +90,8 @@ ai_visible_content:
90
90
  same_as: # Links to authoritative profiles
91
91
  - https://linkedin.com/in/handle
92
92
  - https://github.com/handle
93
+ author_aliases: # Slugs that map to the canonical name
94
+ - your-slug # e.g., from _data/authors.yml keys
93
95
  works_for:
94
96
  type: Organization
95
97
  name: "Company Name"
@@ -130,6 +132,7 @@ ai_visible_content:
130
132
  # --- Internal Linking ---
131
133
  linking:
132
134
  enable_entity_links: true # Auto-link known entities in post body
135
+ apply_to_metadata: false # Safe default: never inject <a> into head/SEO/JSON-LD/feed fields
133
136
  entity_definitions: {} # Custom: slug -> {name, url, description}
134
137
  max_links_per_entity_per_post: 1
135
138
  enable_related_posts: true
@@ -143,8 +146,18 @@ ai_visible_content:
143
146
  warn_orphan_pages: true
144
147
  warn_missing_descriptions: true
145
148
  fail_build_on_error: false # true = exit 1 on validation failure
149
+ content_only: true # Only validate authored HTML content pages
150
+ exclude_paths: [] # Glob patterns to skip: ["/custom/*", "/drafts/*"]
151
+ verbose: false # true = show every warning; false = grouped summary
152
+ max_examples: 3 # Max examples per warning group in summary mode
146
153
  ```
147
154
 
155
+ ### Entity Linking Safety
156
+
157
+ `linking.apply_to_metadata` defaults to `false` to keep metadata as plain text. With this default, entity auto-linking is applied to article body content only and is not applied to `<head>` meta tags, JSON-LD descriptions, or feed summaries.
158
+
159
+ Set `linking.apply_to_metadata: true` only if you explicitly want legacy full-document linking behavior.
160
+
148
161
  ## Layout Integration
149
162
 
150
163
  ### Automatic Mode (Recommended)
@@ -320,14 +333,81 @@ Normalized, lowercase, hyphenated. Each tag can serve as a topic hub page.
320
333
 
321
334
  ## Build Validation
322
335
 
323
- During `jekyll build`, the plugin checks for:
336
+ During `jekyll build`, the plugin validates your site and prints a grouped summary:
337
+
338
+ ```
339
+ AI Visible Content: === Validation Report ===
340
+ AI Visible Content: 1 posts missing last_modified_at (freshness scoring disabled)
341
+ AI Visible Content: Missing last_modified_at in _posts/2026-03-04-redis.md
342
+ AI Visible Content: 1 content pages missing description
343
+ AI Visible Content: Missing description in _posts/2018-01-18-first-post.md
344
+ ```
345
+
346
+ ### What Gets Checked
347
+
348
+ | Check | Description | Config key |
349
+ |-------|-------------|------------|
350
+ | Name inconsistency | `site.author` or post `author:` differs from `entity.name` | `warn_name_inconsistency` |
351
+ | Missing sameAs | No links to LinkedIn, GitHub, etc. | `warn_missing_same_as` |
352
+ | Missing dateModified | Posts without `last_modified_at` | `warn_missing_dates` |
353
+ | Missing description | Content pages without `description` in front matter | `warn_missing_descriptions` |
354
+ | Orphan pages | Content pages with zero inbound internal links | `warn_orphan_pages` |
355
+ | Generic titles | Titles like "About" without entity name | always on |
356
+
357
+ ### Content-Only Filtering
358
+
359
+ By default (`content_only: true`), validation only checks authored HTML content pages. It automatically skips:
360
+
361
+ - **Generated files**: `robots.txt`, `llms.txt`, `entity-map.json`, `sitemap.xml`, `feed.xml`
362
+ - **Asset files**: `.js`, `.css`, `.json`, `.xml`, `.map`, `.webmanifest`
363
+ - **Tag/category pages**: `/tags/*`, `/categories/*`
364
+ - **Utility pages**: `404.html`, `redirect.html`, pagination pages (`/page2/`, etc.)
365
+ - **Assets directory**: anything under `/assets/`
366
+
367
+ Set `content_only: false` to validate all pages (not recommended for most sites).
368
+
369
+ ### Author Alias Resolution
370
+
371
+ Jekyll themes like Chirpy use `_data/authors.yml` to map author slugs to names. The plugin resolves author names through two mechanisms:
372
+
373
+ 1. **Explicit aliases** via `entity.author_aliases`:
374
+
375
+ ```yaml
376
+ ai_visible_content:
377
+ entity:
378
+ name: "Eugene Leontev"
379
+ author_aliases:
380
+ - eugene
381
+ - nasuta
382
+ ```
383
+
384
+ 2. **Automatic resolution** via `_data/authors.yml`: if a post's `author:` value is a key in `_data/authors.yml` whose `name` matches `entity.name`, no warning is emitted.
385
+
386
+ ### robots.txt Conflict Detection
387
+
388
+ If your site already has a `robots.txt` (as a source file or static file), the plugin skips generation and logs a warning. Either:
389
+ - Set `crawlers.generate_robots_txt: false` to silence the warning
390
+ - Remove your existing `robots.txt` to use the generated one with AI crawler rules
391
+
392
+ ### Excluding Paths from Validation
393
+
394
+ Use `validation.exclude_paths` to skip specific paths:
395
+
396
+ ```yaml
397
+ ai_visible_content:
398
+ validation:
399
+ exclude_paths:
400
+ - "/drafts/*"
401
+ - "/archive/*"
402
+ ```
403
+
404
+ ### Verbose Mode
405
+
406
+ Set `validation.verbose: true` to see every individual warning instead of grouped summaries. Useful for debugging but noisy on large sites.
407
+
408
+ ### Orphan Detection Limitation
324
409
 
325
- - **Name inconsistency**: `site.author` differs from `entity.name`
326
- - **Missing sameAs**: No links to LinkedIn, GitHub, etc.
327
- - **Missing dateModified**: Posts without `last_modified_at` (hurts freshness scoring)
328
- - **Missing description**: Pages without `description` in front matter
329
- - **Orphan pages**: Pages with zero inbound internal links
330
- - **Generic titles**: Titles like "About" without entity name
410
+ Orphan detection scans raw Markdown/HTML content for `href=` links. It cannot detect links generated by Liquid templates (e.g., `{{ post.url }}` in `{% for post in site.posts %}`). This means posts linked only through theme-generated navigation may still appear as orphans. This is a known limitation.
331
411
 
332
412
  Set `validation.fail_build_on_error: true` to make errors break the build in CI.
333
413
 
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
16
16
 
17
17
  spec.required_ruby_version = '>= 3.2'
18
18
  spec.metadata['homepage_uri'] = spec.homepage
19
- spec.metadata['source_code_uri'] = spec.homepage
19
+ spec.metadata['source_code_uri'] = "#{spec.homepage}.git"
20
20
  spec.metadata['changelog_uri'] = "#{spec.homepage}/blob/master/CHANGELOG.md"
21
21
  spec.metadata['rubygems_mfa_required'] = 'true'
22
22
 
@@ -5,6 +5,7 @@ require 'json'
5
5
 
6
6
  require_relative 'jekyll_ai_visible_content/version'
7
7
  require_relative 'jekyll_ai_visible_content/configuration'
8
+ require_relative 'jekyll_ai_visible_content/content_filter'
8
9
  require_relative 'jekyll_ai_visible_content/entity/person'
9
10
  require_relative 'jekyll_ai_visible_content/entity/organization'
10
11
  require_relative 'jekyll_ai_visible_content/entity/registry'
@@ -16,15 +17,18 @@ require_relative 'jekyll_ai_visible_content/json_ld/breadcrumb_schema'
16
17
  require_relative 'jekyll_ai_visible_content/json_ld/faq_schema'
17
18
  require_relative 'jekyll_ai_visible_content/json_ld/how_to_schema'
18
19
  require_relative 'jekyll_ai_visible_content/json_ld/collection_schema'
20
+ require_relative 'jekyll_ai_visible_content/entity_classifier'
19
21
  require_relative 'jekyll_ai_visible_content/generators/llms_txt_generator'
20
22
  require_relative 'jekyll_ai_visible_content/generators/robots_txt_generator'
21
23
  require_relative 'jekyll_ai_visible_content/generators/entity_map_generator'
22
24
  require_relative 'jekyll_ai_visible_content/generators/content_graph_generator'
25
+ require_relative 'jekyll_ai_visible_content/generators/ai_resource_generator'
23
26
  require_relative 'jekyll_ai_visible_content/tags/ai_json_ld_tag'
24
27
  require_relative 'jekyll_ai_visible_content/tags/ai_author_tag'
25
28
  require_relative 'jekyll_ai_visible_content/tags/ai_entity_link_tag'
26
29
  require_relative 'jekyll_ai_visible_content/tags/ai_related_posts_tag'
27
30
  require_relative 'jekyll_ai_visible_content/tags/ai_breadcrumb_tag'
31
+ require_relative 'jekyll_ai_visible_content/tags/ai_resource_links_tag'
28
32
  require_relative 'jekyll_ai_visible_content/filters/naming_filter'
29
33
  require_relative 'jekyll_ai_visible_content/filters/entity_filter'
30
34
  require_relative 'jekyll_ai_visible_content/hooks/post_render_hook'
@@ -19,7 +19,8 @@ module JekyllAiVisibleContent
19
19
  'knows_about' => [],
20
20
  'same_as' => [],
21
21
  'works_for' => nil,
22
- 'occupation' => nil
22
+ 'occupation' => nil,
23
+ 'author_aliases' => []
23
24
  },
24
25
  'json_ld' => {
25
26
  'auto_inject' => true,
@@ -49,6 +50,7 @@ module JekyllAiVisibleContent
49
50
  },
50
51
  'linking' => {
51
52
  'enable_entity_links' => true,
53
+ 'apply_to_metadata' => false,
52
54
  'entity_definitions' => {},
53
55
  'max_links_per_entity_per_post' => 1,
54
56
  'enable_related_posts' => true,
@@ -60,7 +62,19 @@ module JekyllAiVisibleContent
60
62
  'warn_missing_dates' => true,
61
63
  'warn_orphan_pages' => true,
62
64
  'warn_missing_descriptions' => true,
63
- 'fail_build_on_error' => false
65
+ 'fail_build_on_error' => false,
66
+ 'exclude_paths' => [],
67
+ 'content_only' => true,
68
+ 'verbose' => false,
69
+ 'max_examples' => 3
70
+ },
71
+ 'ai_resources' => {
72
+ 'enabled' => true,
73
+ 'formats' => %w[json yaml markdown],
74
+ 'max_links_per_page' => 5,
75
+ 'auto_inject' => true,
76
+ 'inject_instruction_block' => true,
77
+ 'base_path' => '/ai'
64
78
  }
65
79
  }.freeze
66
80
 
@@ -108,6 +122,10 @@ module JekyllAiVisibleContent
108
122
  @raw['validation']
109
123
  end
110
124
 
125
+ def ai_resources
126
+ @raw['ai_resources']
127
+ end
128
+
111
129
  def site_url
112
130
  @site.config['url'] || ''
113
131
  end
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllAiVisibleContent
4
+ module ContentFilter
5
+ GENERATED_NAMES = %w[
6
+ robots.txt llms.txt llms-full.txt entity-map.json
7
+ sitemap.xml feed.xml atom.xml redirects.json
8
+ ].freeze
9
+
10
+ ASSET_EXTENSIONS = %w[
11
+ .js .css .scss .map .json .xml .txt .webmanifest .ico .svg .png .jpg .jpeg .gif .woff .woff2 .ttf .eot
12
+ ].freeze
13
+
14
+ UTILITY_PATH_PATTERNS = [
15
+ %r{^/404\.html$},
16
+ %r{^/tags/},
17
+ %r{^/categories/},
18
+ %r{^/assets/},
19
+ %r{^/page\d+/},
20
+ %r{^/norobots/},
21
+ %r{^/ai/}
22
+ ].freeze
23
+
24
+ class << self
25
+ def content_page?(doc, config = nil)
26
+ return false unless html_output?(doc)
27
+ return false if generated_file?(doc)
28
+ return false if asset_path?(doc)
29
+ return false if redirect_page?(doc)
30
+ return false if utility_page?(doc)
31
+ return false if excluded_path?(doc, config)
32
+
33
+ true
34
+ end
35
+
36
+ def content_pages(site, config = nil)
37
+ site.posts.docs + site.pages.select { |p| content_page?(p, config) }
38
+ end
39
+
40
+ private
41
+
42
+ def html_output?(doc)
43
+ ext = doc.respond_to?(:output_ext) ? doc.output_ext : nil
44
+ ext ||= File.extname(doc.respond_to?(:name) ? doc.name.to_s : doc.url.to_s)
45
+ return true if ['.html', '.htm', '.md', '.markdown'].include?(ext)
46
+
47
+ url = doc.respond_to?(:url) ? doc.url.to_s : ''
48
+ url.end_with?('/') && !url.match?(/\.\w+$/)
49
+ end
50
+
51
+ def generated_file?(doc)
52
+ name = doc.respond_to?(:name) ? doc.name : File.basename(doc.url.to_s)
53
+ GENERATED_NAMES.include?(name)
54
+ end
55
+
56
+ def asset_path?(doc)
57
+ url = doc.respond_to?(:url) ? doc.url.to_s : ''
58
+ return true if url.start_with?('/assets/')
59
+
60
+ ext = File.extname(url)
61
+ ASSET_EXTENSIONS.include?(ext)
62
+ end
63
+
64
+ def redirect_page?(doc)
65
+ return true if doc.respond_to?(:data) && doc.data['redirect_to']
66
+
67
+ name = doc.respond_to?(:name) ? doc.name : File.basename(doc.url.to_s)
68
+ name == 'redirect.html'
69
+ end
70
+
71
+ def utility_page?(doc)
72
+ url = doc.respond_to?(:url) ? doc.url.to_s : ''
73
+ UTILITY_PATH_PATTERNS.any? { |pattern| url.match?(pattern) }
74
+ end
75
+
76
+ def excluded_path?(doc, config)
77
+ return false unless config
78
+
79
+ exclude_paths = config.validation['exclude_paths']
80
+ return false unless exclude_paths&.any?
81
+
82
+ url = doc.respond_to?(:url) ? doc.url.to_s : ''
83
+ path = doc.respond_to?(:relative_path) ? doc.relative_path.to_s : url
84
+
85
+ exclude_paths.any? do |pattern|
86
+ File.fnmatch?(pattern, url) || File.fnmatch?(pattern, path)
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JekyllAiVisibleContent
4
+ module EntityClassifier
5
+ RELEVANCE_FRONT_MATTER = 3
6
+ RELEVANCE_TITLE = 2
7
+ RELEVANCE_BODY = 1
8
+
9
+ class << self
10
+ def classify_page(doc, config)
11
+ max = config.ai_resources['max_links_per_page'] || 5
12
+ entities = []
13
+
14
+ add_primary_entity(entities, doc, config)
15
+ add_front_matter_topics(entities, doc, config)
16
+ add_detected_topics(entities, doc, config)
17
+ add_organization(entities, doc, config)
18
+ add_general_topic_fallback(entities, doc)
19
+
20
+ entities
21
+ .uniq { |e| e[:slug] }
22
+ .sort_by { |e| -e[:relevance] }
23
+ .first(max)
24
+ end
25
+
26
+ def slugify(name)
27
+ name.to_s.downcase.gsub(/[^a-z0-9]+/, '-').gsub(/(^-|-$)/, '')
28
+ end
29
+
30
+ private
31
+
32
+ def add_primary_entity(entities, doc, config)
33
+ return unless config.entity['name']
34
+
35
+ is_person_page = doc.data['entity_type']&.downcase == 'person' ||
36
+ doc.url.to_s.match?(%r{/about/?$})
37
+
38
+ return unless is_person_page
39
+
40
+ entities << {
41
+ type: config.entity_type.downcase == 'organization' ? 'entity' : 'person',
42
+ slug: slugify(config.entity['id_slug'] || config.entity['name']),
43
+ name: config.entity['name'],
44
+ relevance: RELEVANCE_FRONT_MATTER + 1
45
+ }
46
+ end
47
+
48
+ def add_front_matter_topics(entities, doc, _config)
49
+ topics = doc.data['topics']
50
+ return unless topics.is_a?(Array)
51
+
52
+ topics.each do |topic|
53
+ entities << {
54
+ type: 'topic',
55
+ slug: slugify(topic),
56
+ name: topic,
57
+ relevance: RELEVANCE_FRONT_MATTER
58
+ }
59
+ end
60
+ end
61
+
62
+ def add_detected_topics(entities, doc, config)
63
+ known_topics = config.entity['knows_about'] || []
64
+ return if known_topics.empty?
65
+
66
+ title = (doc.data['title'] || '').downcase
67
+ description = (doc.data['description'] || '').downcase
68
+ body = (doc.content || '').downcase
69
+
70
+ known_topics.each do |topic|
71
+ needle = topic.downcase
72
+ relevance = if title.include?(needle) || description.include?(needle)
73
+ RELEVANCE_TITLE
74
+ elsif body.include?(needle)
75
+ RELEVANCE_BODY
76
+ end
77
+ next unless relevance
78
+
79
+ entities << { type: 'topic', slug: slugify(topic), name: topic, relevance: relevance }
80
+ end
81
+ end
82
+
83
+ def add_organization(entities, doc, config)
84
+ works_for = config.entity['works_for']
85
+ return unless works_for.is_a?(Hash) && works_for['name']
86
+
87
+ text = "#{doc.data['title']} #{doc.data['description']} #{doc.content}".downcase
88
+ return unless text.include?(works_for['name'].downcase)
89
+
90
+ entities << {
91
+ type: 'entity',
92
+ slug: slugify(works_for['name']),
93
+ name: works_for['name'],
94
+ relevance: RELEVANCE_BODY
95
+ }
96
+ end
97
+
98
+ def add_general_topic_fallback(entities, doc)
99
+ return unless entities.empty?
100
+
101
+ slug = slugify(doc.url.to_s.split('/').reject(&:empty?).last)
102
+ title = doc.data['title'].to_s.strip
103
+
104
+ slug = slugify(title) if slug.empty?
105
+ return if slug.empty?
106
+
107
+ name = if title.empty?
108
+ slug.tr('-', ' ').split.map(&:capitalize).join(' ')
109
+ else
110
+ title
111
+ end
112
+
113
+ entities << {
114
+ type: 'topic',
115
+ slug: slug,
116
+ name: name,
117
+ relevance: RELEVANCE_BODY
118
+ }
119
+ end
120
+ end
121
+ end
122
+ end