poops 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,261 @@
1
+ import fs from 'node:fs'
2
+ import { globSync } from 'glob'
3
+ import { Marked } from 'marked'
4
+ import nunjucks from 'nunjucks'
5
+ import path from 'node:path'
6
+ import { discoverImageVariants, parseFrontMatter } from '../helpers.js'
7
+ import { highlightRenderer, highlightCode } from '../highlight.js'
8
+ import { slugify } from 'book-of-spells'
9
+ import dayjs from 'dayjs'
10
+ import log from '../../utils/log.js'
11
+
12
+ const marked = new Marked({ renderer: highlightRenderer })
13
+
14
+ class RelativeLoader extends nunjucks.Loader {
15
+ constructor(templatesDir, includePaths) {
16
+ super()
17
+ this.templatesDir = templatesDir
18
+ this.includePaths = includePaths || []
19
+ this.includePaths.push('_*')
20
+ }
21
+
22
+ getSource(name) {
23
+ let fullPath = name
24
+ if (!fs.existsSync(name)) {
25
+ let pattern = `**/${name}`
26
+ if (this.includePaths) {
27
+ pattern = `{${this.includePaths.join(',')}}/${pattern}`
28
+ }
29
+ fullPath = globSync(path.join(this.templatesDir, pattern))[0]
30
+ }
31
+ if (!fs.existsSync(fullPath)) {
32
+ log({ tag: 'markup', error: true, text: 'Template not found:', link: name })
33
+ return { src: '', path: fullPath, noCache: true }
34
+ }
35
+
36
+ let source = ''
37
+ let frontMatter = {}
38
+
39
+ try {
40
+ const frontMatterResult = parseFrontMatter(fullPath)
41
+ frontMatter = frontMatterResult.frontMatter
42
+ source = frontMatterResult.content
43
+ } catch (err) {
44
+ log({ tag: 'error', text: 'Failed parsing front matter:', link: fullPath })
45
+ console.error(err)
46
+ }
47
+
48
+ if (path.extname(fullPath) === '.md') {
49
+ source = marked.parse(source)
50
+ }
51
+
52
+ if (frontMatter.layout) {
53
+ source = `{% extends '${frontMatter.layout}.html' %}\n{% block content %}\n${source}\n{% endblock %}`
54
+ }
55
+
56
+ return { src: source, path: fullPath, noCache: true }
57
+ }
58
+
59
+ resolve(from, to) {
60
+ return path.resolve(path.dirname(from), to)
61
+ }
62
+ }
63
+
64
+ export default class NunjucksEngine {
65
+ constructor(templatesDir, includePaths, options) {
66
+ const autoescape = (options && options.autoescape) || false
67
+
68
+ this.env = new nunjucks.Environment(
69
+ new RelativeLoader(templatesDir, includePaths),
70
+ { autoescape, watch: false, noCache: true }
71
+ )
72
+ }
73
+
74
+ get fileExtension() { return '.njk' }
75
+ get indexableExtensions() { return new Set(['.html', '.md', '.njk']) }
76
+ get markupExtensions() { return 'html|xml|rss|atom|json|njk|md' }
77
+
78
+ registerFilters({ timeDateFormat, markupOut }) {
79
+ const env = this.env
80
+ env.addFilter('slugify', slugify)
81
+ env.addFilter('jsonify', (obj) => JSON.stringify(obj))
82
+ env.addFilter('markdown', (str) => marked.parse(str))
83
+ env.addFilter('concat', (arr, value) => {
84
+ if (!Array.isArray(arr)) return [value]
85
+ return arr.concat(value)
86
+ })
87
+ env.addFilter('push', (arr, value) => {
88
+ if (!Array.isArray(arr)) return [value]
89
+ arr.push(value)
90
+ return arr
91
+ })
92
+ env.addFilter('svg', (filePath) => {
93
+ const fullPath = path.resolve(process.cwd(), filePath)
94
+ if (!fs.existsSync(fullPath)) return ''
95
+ const content = fs.readFileSync(fullPath, 'utf-8').trim()
96
+ if (!/^(<\?xml[^?]*\?>\s*)?<svg[\s>]/i.test(content)) return ''
97
+ return new nunjucks.runtime.SafeString(content)
98
+ })
99
+ env.addFilter('date', (str, template) => {
100
+ const fmt = template || timeDateFormat
101
+ if (!fmt) return str
102
+ const date = !str || str.trim() === '' ? new Date() : new Date(str)
103
+ return dayjs(date).format(fmt)
104
+ })
105
+ env.addFilter('srcset', (imagePath) => {
106
+ const outputDir = path.join(process.cwd(), markupOut)
107
+ const { variants } = discoverImageVariants(imagePath, outputDir)
108
+ if (variants.length === 0) return ''
109
+ return variants.map(v => `${v.path} ${v.width}w`).join(', ')
110
+ })
111
+ env.addFilter('highlight', (code, lang) => {
112
+ const highlighted = highlightCode(code, lang)
113
+ const langClass = lang ? ` language-${lang}` : ''
114
+ return new nunjucks.runtime.SafeString(`<pre><code class="hljs${langClass}">${highlighted}</code></pre>`)
115
+ })
116
+ }
117
+
118
+ registerTags(getOutputDir) {
119
+ this.env.addExtension('GoogleFontsExtension', new GoogleFontsExtension())
120
+ this.env.addExtension('ImageExtension', new ImageExtension(getOutputDir))
121
+ this.env.addExtension('HighlightExtension', new HighlightExtension())
122
+ }
123
+
124
+ setGlobal(key, value) {
125
+ this.env.addGlobal(key, value)
126
+ }
127
+
128
+ render(templateName, context) {
129
+ return new Promise((resolve, reject) => {
130
+ this.env.getTemplate(templateName).render(context, (error, result) => {
131
+ if (!error) {
132
+ resolve(result)
133
+ } else {
134
+ reject(error)
135
+ }
136
+ })
137
+ })
138
+ }
139
+
140
+ renderString(source, context) {
141
+ return new Promise((resolve, reject) => {
142
+ this.env.renderString(source, context, (error, result) => {
143
+ if (!error) {
144
+ resolve(result)
145
+ } else {
146
+ reject(error)
147
+ }
148
+ })
149
+ })
150
+ }
151
+ }
152
+
153
+ // --- Nunjucks Extensions ---
154
+
155
+ class GoogleFontsExtension {
156
+ constructor() { this.tags = ['googleFonts'] }
157
+
158
+ parse(parser, nodes) {
159
+ const tok = parser.nextToken()
160
+ const args = parser.parseSignature(null, true)
161
+ parser.advanceAfterBlockEnd(tok.value)
162
+ return new nodes.CallExtension(this, 'run', args)
163
+ }
164
+
165
+ run(context, fonts, kwargs) {
166
+ if (!fonts || (Array.isArray(fonts) && fonts.length === 0)) return ''
167
+ if (typeof fonts === 'string') fonts = [fonts]
168
+ const display = (kwargs && kwargs.display) || 'swap'
169
+
170
+ const families = fonts.map(font => {
171
+ if (typeof font === 'string') return `family=${font.replace(/\s+/g, '+')}`
172
+ const name = font.name || font.family || ''
173
+ const weights = font.weights || font.wght
174
+ let param = `family=${name.replace(/\s+/g, '+')}`
175
+ if (weights) {
176
+ const wList = Array.isArray(weights) ? weights : [weights]
177
+ param += `:wght@${wList.join(';')}`
178
+ }
179
+ if (font.ital) {
180
+ param = param.replace(':wght@', ':ital,wght@')
181
+ const wList = param.match(/@(.+)$/)[1].split(';')
182
+ const expanded = []
183
+ for (const w of wList) { expanded.push(`0,${w}`); expanded.push(`1,${w}`) }
184
+ param = param.replace(/@.+$/, `@${expanded.join(';')}`)
185
+ }
186
+ return param
187
+ })
188
+
189
+ const url = `https://fonts.googleapis.com/css2?${families.join('&')}&display=${display}`
190
+ return new nunjucks.runtime.SafeString(
191
+ `<link rel="preconnect" href="https://fonts.googleapis.com">\n
192
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>\n
193
+ <link href="${url}" rel="stylesheet">`
194
+ )
195
+ }
196
+ }
197
+
198
+ class ImageExtension {
199
+ constructor(getOutputDir) { this.tags = ['image']; this.getOutputDir = getOutputDir }
200
+
201
+ parse(parser, nodes) {
202
+ const tok = parser.nextToken()
203
+ const args = parser.parseSignature(null, true)
204
+ parser.advanceAfterBlockEnd(tok.value)
205
+ return new nodes.CallExtension(this, 'run', args)
206
+ }
207
+
208
+ run(context, imagePath, kwargs) {
209
+ const prefix = context.lookup('relativePathPrefix') || ''
210
+ const alt = (kwargs && kwargs.alt) || ''
211
+ const loading = (kwargs && kwargs.loading) || 'lazy'
212
+ const isSvg = imagePath.endsWith('.svg')
213
+ const attrs = [`alt="${alt}"`]
214
+
215
+ if (isSvg) {
216
+ attrs.unshift(`src="${prefix}${imagePath}"`)
217
+ } else {
218
+ const outputDir = this.getOutputDir()
219
+ const { src, variants } = discoverImageVariants(imagePath, outputDir)
220
+ const sizes = (kwargs && kwargs.sizes) || '100vw'
221
+ attrs.unshift(`src="${prefix}${src}"`)
222
+ if (variants.length > 0) {
223
+ const srcsetVal = variants.map(v => `${prefix}${v.path} ${v.width}w`).join(', ')
224
+ attrs.push(`srcset="${srcsetVal}"`)
225
+ attrs.push(`sizes="${sizes}"`)
226
+ }
227
+ }
228
+
229
+ attrs.push(`loading="${loading}"`)
230
+ if (kwargs) {
231
+ const skip = new Set(['alt', 'sizes', 'loading'])
232
+ for (const [key, val] of Object.entries(kwargs)) {
233
+ if (key.startsWith('__') || skip.has(key)) continue
234
+ attrs.push(`${key}="${val}"`)
235
+ }
236
+ }
237
+ return new nunjucks.runtime.SafeString(`<img ${attrs.join(' ')}>`)
238
+ }
239
+ }
240
+
241
+ class HighlightExtension {
242
+ constructor() { this.tags = ['highlight'] }
243
+
244
+ parse(parser, nodes) {
245
+ const tok = parser.nextToken()
246
+ const args = parser.parseSignature(null, true)
247
+ parser.advanceAfterBlockEnd(tok.value)
248
+ const body = parser.parseUntilBlocks('endhighlight')
249
+ parser.advanceAfterBlockEnd()
250
+ return new nodes.CallExtension(this, 'run', args, [body])
251
+ }
252
+
253
+ run(context, lang, body) {
254
+ const code = body()
255
+ const highlighted = highlightCode(code, lang)
256
+ const langClass = lang ? ` language-${lang}` : ''
257
+ return new nunjucks.runtime.SafeString(
258
+ `<pre><code class="hljs${langClass}">${highlighted}</code></pre>`
259
+ )
260
+ }
261
+ }
@@ -0,0 +1,170 @@
1
+ import fs from 'node:fs'
2
+ import path from 'node:path'
3
+ import yaml from 'yaml'
4
+
5
+ const frontMatterCache = new Map()
6
+
7
+ export function parseFrontMatter(filePath) {
8
+ let stat
9
+ try {
10
+ stat = fs.statSync(filePath)
11
+ } catch (e) {
12
+ throw new Error(`Error stating file at ${filePath}: ${e.message}`)
13
+ }
14
+
15
+ const cached = frontMatterCache.get(filePath)
16
+ if (cached && cached.mtimeMs === stat.mtimeMs && cached.size === stat.size) {
17
+ return { frontMatter: { ...cached.value.frontMatter }, content: cached.value.content }
18
+ }
19
+
20
+ let content = ''
21
+ try {
22
+ content = fs.readFileSync(filePath, 'utf8')
23
+ } catch (e) {
24
+ throw new Error(`Error reading file at ${filePath}: ${e.message}`)
25
+ }
26
+
27
+ if (!content) {
28
+ throw new Error(`File at ${filePath} is empty`)
29
+ }
30
+
31
+ const frontMatterRegex = /^\s*---\s*[\r\n]+([\s\S]*?)\s*---\s*[\r\n]+/
32
+ const match = content.match(frontMatterRegex)
33
+
34
+ if (!match) {
35
+ const value = { frontMatter: {}, content }
36
+ frontMatterCache.set(filePath, { mtimeMs: stat.mtimeMs, size: stat.size, value })
37
+ return { frontMatter: {}, content }
38
+ }
39
+
40
+ let frontMatter = {}
41
+ try {
42
+ frontMatter = yaml.parse(match[1])
43
+ } catch (e) {
44
+ throw new Error(`Error parsing front matter in file at ${filePath}: ${e.message}`)
45
+ }
46
+
47
+ const contentWithoutFrontMatter = content.slice(match[0].length)
48
+ const value = { frontMatter, content: contentWithoutFrontMatter }
49
+ frontMatterCache.set(filePath, { mtimeMs: stat.mtimeMs, size: stat.size, value })
50
+ return { frontMatter: { ...frontMatter }, content: contentWithoutFrontMatter }
51
+ }
52
+
53
+ export function clearFrontMatterCache(filePath) {
54
+ if (!filePath) {
55
+ frontMatterCache.clear()
56
+ return
57
+ }
58
+ frontMatterCache.delete(filePath)
59
+ }
60
+
61
+ const FORMAT_PRIORITY = ['avif', 'webp']
62
+
63
+ export function discoverImageVariants(imagePath, outputDir) {
64
+ const parsed = path.parse(imagePath)
65
+ const dir = path.join(outputDir, parsed.dir)
66
+ const baseName = parsed.name
67
+ const originalExt = parsed.ext.replace('.', '')
68
+ const pattern = /^(.+)-(\d+)w\.([a-z0-9]+)$/
69
+
70
+ let files = []
71
+ try {
72
+ files = fs.readdirSync(dir)
73
+ } catch {
74
+ return { src: imagePath, variants: [] }
75
+ }
76
+
77
+ const variants = []
78
+ for (const file of files) {
79
+ const match = file.match(pattern)
80
+ if (!match) continue
81
+ const [, name, widthStr, format] = match
82
+ if (name !== baseName) continue
83
+ variants.push({
84
+ path: path.join(parsed.dir, file),
85
+ width: parseInt(widthStr, 10),
86
+ format
87
+ })
88
+ }
89
+
90
+ variants.sort((a, b) => a.width - b.width)
91
+
92
+ // Pick best format for srcset: highest priority format that has variants
93
+ const availableFormats = new Set(variants.map(v => v.format))
94
+ let srcsetFormat = null
95
+ for (const fmt of FORMAT_PRIORITY) {
96
+ if (availableFormats.has(fmt)) {
97
+ srcsetFormat = fmt
98
+ break
99
+ }
100
+ }
101
+ if (!srcsetFormat && availableFormats.has(originalExt)) {
102
+ srcsetFormat = originalExt
103
+ }
104
+ if (!srcsetFormat && availableFormats.size > 0) {
105
+ srcsetFormat = [...availableFormats][0]
106
+ }
107
+
108
+ const srcsetVariants = srcsetFormat ? variants.filter(v => v.format === srcsetFormat) : []
109
+
110
+ // Pick middle-sized variant in original format for src fallback
111
+ const originalVariants = variants.filter(v => v.format === originalExt)
112
+ let src = imagePath
113
+ if (originalVariants.length > 0) {
114
+ const mid = Math.floor((originalVariants.length - 1) / 2)
115
+ src = originalVariants[mid].path
116
+ } else if (srcsetVariants.length > 0) {
117
+ const mid = Math.floor((srcsetVariants.length - 1) / 2)
118
+ src = srcsetVariants[mid].path
119
+ }
120
+
121
+ return { src, variants: srcsetVariants }
122
+ }
123
+
124
+ export function replaceOutExtensions(outputPath) {
125
+ switch (path.extname(outputPath)) {
126
+ case '.md':
127
+ outputPath = outputPath.replace(/\.md$/, '.html')
128
+ break
129
+ case '.njk':
130
+ outputPath = outputPath.replace(/\.njk$/, '.html')
131
+ break
132
+ case '.liquid':
133
+ outputPath = outputPath.replace(/\.liquid$/, '.html')
134
+ break
135
+ }
136
+ return outputPath
137
+ }
138
+
139
+ export function getUpDirPrefix(relativeDir) {
140
+ if (relativeDir.trim() === '') return ''
141
+ if (relativeDir.startsWith('/')) relativeDir = relativeDir.slice(1)
142
+ if (relativeDir.endsWith('/')) relativeDir = relativeDir.slice(0, -1)
143
+ const relativePathParts = relativeDir.split('/')
144
+ let upDir = ''
145
+ for (let i = 0; i < relativePathParts.length; i++) {
146
+ upDir += '../'
147
+ }
148
+ return upDir
149
+ }
150
+
151
+ export function getRelativePathPrefix(outputDir, fromDir) {
152
+ let relativeDir = path.relative(process.cwd(), outputDir)
153
+ const fromRelativeDir = fromDir ? path.relative(process.cwd(), fromDir) : ''
154
+
155
+ if (fromRelativeDir && relativeDir.startsWith(fromRelativeDir)) {
156
+ relativeDir = relativeDir.replace(fromRelativeDir, '')
157
+ }
158
+
159
+ return getUpDirPrefix(relativeDir)
160
+ }
161
+
162
+ export function getPageUrl(outputPath) {
163
+ outputPath = replaceOutExtensions(outputPath)
164
+ return /index\.[a-z]+$/.test(path.basename(outputPath)) ? path.relative(process.cwd(), path.dirname(outputPath)) : path.relative(process.cwd(), outputPath)
165
+ }
166
+
167
+ export function getPageUrlRelativeToOutput(outputPath, outputDir) {
168
+ const pageUrl = getPageUrl(outputPath)
169
+ return path.relative(outputDir, pageUrl)
170
+ }
@@ -0,0 +1,77 @@
1
+ import hljs from 'highlight.js/lib/core'
2
+
3
+ import javascript from 'highlight.js/lib/languages/javascript'
4
+ import typescript from 'highlight.js/lib/languages/typescript'
5
+ import css from 'highlight.js/lib/languages/css'
6
+ import scss from 'highlight.js/lib/languages/scss'
7
+ import xml from 'highlight.js/lib/languages/xml'
8
+ import json from 'highlight.js/lib/languages/json'
9
+ import bash from 'highlight.js/lib/languages/bash'
10
+ import shell from 'highlight.js/lib/languages/shell'
11
+ import python from 'highlight.js/lib/languages/python'
12
+ import ruby from 'highlight.js/lib/languages/ruby'
13
+ import php from 'highlight.js/lib/languages/php'
14
+ import java from 'highlight.js/lib/languages/java'
15
+ import c from 'highlight.js/lib/languages/c'
16
+ import cpp from 'highlight.js/lib/languages/cpp'
17
+ import csharp from 'highlight.js/lib/languages/csharp'
18
+ import go from 'highlight.js/lib/languages/go'
19
+ import rust from 'highlight.js/lib/languages/rust'
20
+ import yaml from 'highlight.js/lib/languages/yaml'
21
+ import markdown from 'highlight.js/lib/languages/markdown'
22
+ import sql from 'highlight.js/lib/languages/sql'
23
+ import diff from 'highlight.js/lib/languages/diff'
24
+
25
+ hljs.registerLanguage('javascript', javascript)
26
+ hljs.registerLanguage('js', javascript)
27
+ hljs.registerLanguage('typescript', typescript)
28
+ hljs.registerLanguage('ts', typescript)
29
+ hljs.registerLanguage('css', css)
30
+ hljs.registerLanguage('scss', scss)
31
+ hljs.registerLanguage('html', xml)
32
+ hljs.registerLanguage('xml', xml)
33
+ hljs.registerLanguage('json', json)
34
+ hljs.registerLanguage('bash', bash)
35
+ hljs.registerLanguage('sh', bash)
36
+ hljs.registerLanguage('shell', shell)
37
+ hljs.registerLanguage('python', python)
38
+ hljs.registerLanguage('py', python)
39
+ hljs.registerLanguage('ruby', ruby)
40
+ hljs.registerLanguage('rb', ruby)
41
+ hljs.registerLanguage('php', php)
42
+ hljs.registerLanguage('java', java)
43
+ hljs.registerLanguage('c', c)
44
+ hljs.registerLanguage('cpp', cpp)
45
+ hljs.registerLanguage('csharp', csharp)
46
+ hljs.registerLanguage('cs', csharp)
47
+ hljs.registerLanguage('go', go)
48
+ hljs.registerLanguage('rust', rust)
49
+ hljs.registerLanguage('rs', rust)
50
+ hljs.registerLanguage('yaml', yaml)
51
+ hljs.registerLanguage('yml', yaml)
52
+ hljs.registerLanguage('markdown', markdown)
53
+ hljs.registerLanguage('md', markdown)
54
+ hljs.registerLanguage('sql', sql)
55
+ hljs.registerLanguage('diff', diff)
56
+
57
+ function escapeHtml(str) {
58
+ return str.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;')
59
+ }
60
+
61
+ export function highlightCode(code, language) {
62
+ if (language) {
63
+ const lang = language.toLowerCase().trim()
64
+ if (hljs.getLanguage(lang)) {
65
+ return hljs.highlight(code, { language: lang }).value
66
+ }
67
+ }
68
+ return hljs.highlightAuto(code).value
69
+ }
70
+
71
+ export const highlightRenderer = {
72
+ code(code, lang) {
73
+ const highlighted = highlightCode(code, lang)
74
+ const langClass = lang ? ` language-${escapeHtml(lang)}` : ''
75
+ return `<pre><code class="hljs${langClass}">${highlighted}</code></pre>\n`
76
+ }
77
+ }
@@ -0,0 +1,154 @@
1
+ import fs from 'node:fs'
2
+ import path from 'node:path'
3
+ import { fileURLToPath } from 'node:url'
4
+ import { readJsonFile, fileSize } from '../utils/helpers.js'
5
+ import log from '../utils/log.js'
6
+
7
+ const __dirname = path.dirname(fileURLToPath(import.meta.url))
8
+ const DEFAULT_STOP_WORDS_PATH = path.join(__dirname, 'stop-words-en.json')
9
+
10
+ function loadStopWords(stopWordsOption) {
11
+ if (stopWordsOption === false) return new Set()
12
+ if (Array.isArray(stopWordsOption)) return new Set(stopWordsOption)
13
+
14
+ const filePath = typeof stopWordsOption === 'string'
15
+ ? path.resolve(process.cwd(), stopWordsOption)
16
+ : DEFAULT_STOP_WORDS_PATH
17
+
18
+ try {
19
+ return new Set(readJsonFile(filePath))
20
+ } catch (err) {
21
+ log({ tag: 'indexer', error: true, text: 'Failed loading stop words:', link: filePath })
22
+ return new Set()
23
+ }
24
+ }
25
+
26
+ const DEFAULTS = {
27
+ minWordLength: 3,
28
+ maxKeywords: 20,
29
+ globalFrequencyCeiling: 0.8
30
+ }
31
+
32
+ const INTERNAL_FIELDS = new Set(['content', 'isIndex', 'layout', 'published'])
33
+
34
+ function normalizeConfig(config) {
35
+ if (!config) return null
36
+ if (typeof config === 'string') return { output: config, ...DEFAULTS }
37
+ return { ...DEFAULTS, ...config }
38
+ }
39
+
40
+ export function extractKeywords(htmlContent, options = {}) {
41
+ const { minWordLength = DEFAULTS.minWordLength, stopWords = new Set() } = options
42
+
43
+ const text = htmlContent.replace(/<[^>]*>/g, ' ')
44
+ const words = text
45
+ .toLowerCase()
46
+ .replace(/[^a-z0-9\-\s]/g, ' ')
47
+ .split(/\s+/)
48
+ .filter(w => w.length >= minWordLength && !stopWords.has(w) && !/^\d+$/.test(w))
49
+
50
+ const freq = new Map()
51
+ for (const word of words) {
52
+ freq.set(word, (freq.get(word) || 0) + 1)
53
+ }
54
+
55
+ return [...freq.entries()]
56
+ .sort((a, b) => b[1] - a[1])
57
+ .map(([word]) => word)
58
+ }
59
+
60
+ function applyGlobalFrequencyCeiling(entries, ceiling) {
61
+ const totalPages = entries.length
62
+ if (totalPages === 0) return entries
63
+
64
+ const maxAppearances = Math.max(1, Math.floor(totalPages * ceiling))
65
+
66
+ const wordPageCount = new Map()
67
+ for (const entry of entries) {
68
+ const seen = new Set(entry.keywords)
69
+ for (const word of seen) {
70
+ wordPageCount.set(word, (wordPageCount.get(word) || 0) + 1)
71
+ }
72
+ }
73
+
74
+ const tooCommon = new Set()
75
+ for (const [word, count] of wordPageCount) {
76
+ if (count > maxAppearances) tooCommon.add(word)
77
+ }
78
+
79
+ if (tooCommon.size === 0) return entries
80
+
81
+ for (const entry of entries) {
82
+ entry.keywords = entry.keywords.filter(w => !tooCommon.has(w))
83
+ }
84
+
85
+ return entries
86
+ }
87
+
88
+ function escapeXml(str) {
89
+ return str
90
+ .replace(/&/g, '&amp;')
91
+ .replace(/</g, '&lt;')
92
+ .replace(/>/g, '&gt;')
93
+ .replace(/"/g, '&quot;')
94
+ .replace(/'/g, '&apos;')
95
+ }
96
+
97
+ export function generateSearchIndex(pageEntries, outputDir, config) {
98
+ config = normalizeConfig(config)
99
+ if (!config) return
100
+
101
+ const stopWords = loadStopWords(config.stopWords)
102
+
103
+ let entries = pageEntries
104
+ .filter(e => !e.isIndex)
105
+ .map(e => {
106
+ const entry = {}
107
+ for (const [key, value] of Object.entries(e)) {
108
+ if (!INTERNAL_FIELDS.has(key)) entry[key] = value
109
+ }
110
+ if (!entry.keywords) {
111
+ entry.keywords = extractKeywords(e.content || '', { ...config, stopWords })
112
+ .slice(0, config.maxKeywords)
113
+ }
114
+ return entry
115
+ })
116
+
117
+ entries = applyGlobalFrequencyCeiling(entries, config.globalFrequencyCeiling)
118
+
119
+ const outputPath = path.join(process.cwd(), outputDir, config.output)
120
+ fs.writeFileSync(outputPath, JSON.stringify(entries, null, 2))
121
+ log({ tag: 'indexer', text: 'Generated search index:', link: path.relative(process.cwd(), outputPath), size: fileSize(outputPath) })
122
+ }
123
+
124
+ export function generateSitemap(pageEntries, outputDir, siteUrl, config) {
125
+ config = normalizeConfig(config)
126
+ if (!config) return
127
+
128
+ const baseUrl = siteUrl ? siteUrl.replace(/\/+$/, '') : ''
129
+
130
+ let xml = '<?xml version="1.0" encoding="UTF-8"?>\n'
131
+ xml += '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n'
132
+
133
+ for (const entry of pageEntries) {
134
+ const loc = baseUrl ? `${baseUrl}/${entry.url}` : entry.url
135
+ xml += ' <url>\n'
136
+ xml += ` <loc>${escapeXml(loc)}</loc>\n`
137
+ if (entry.date) {
138
+ const dateStr = new Date(entry.date).toISOString().slice(0, 10)
139
+ xml += ` <lastmod>${dateStr}</lastmod>\n`
140
+ }
141
+ xml += ' </url>\n'
142
+ }
143
+
144
+ xml += '</urlset>\n'
145
+
146
+ const outputPath = path.join(process.cwd(), outputDir, config.output)
147
+ fs.writeFileSync(outputPath, xml)
148
+ log({ tag: 'indexer', text: 'Generated sitemap:', link: path.relative(process.cwd(), outputPath), size: fileSize(outputPath) })
149
+ }
150
+
151
+ export function generateIndexFiles(pageEntries, outputDir, siteUrl, config) {
152
+ generateSearchIndex(pageEntries, outputDir, config.searchIndex)
153
+ generateSitemap(pageEntries, outputDir, siteUrl, config.sitemap)
154
+ }
@@ -0,0 +1,25 @@
1
+ [
2
+ "a", "ab", "about", "above", "after", "again", "against", "all", "am", "an",
3
+ "and", "any", "are", "as", "at", "be", "because", "been", "before", "being",
4
+ "below", "between", "both", "but", "by", "can", "could", "did", "do", "does",
5
+ "doing", "down", "during", "each", "few", "for", "from", "further", "get",
6
+ "got", "had", "has", "have", "having", "he", "her", "here", "hers", "herself",
7
+ "him", "himself", "his", "how", "i", "if", "in", "into", "is", "it", "its",
8
+ "itself", "just", "let", "like", "may", "me", "might", "more", "most", "must",
9
+ "my", "myself", "no", "nor", "not", "now", "of", "off", "on", "once", "only",
10
+ "or", "other", "our", "ours", "ourselves", "out", "over", "own", "per", "same",
11
+ "she", "should", "so", "some", "such", "than", "that", "the", "their", "theirs",
12
+ "them", "themselves", "then", "there", "these", "they", "this", "those", "through",
13
+ "to", "too", "under", "until", "up", "upon", "us", "very", "was", "we", "were",
14
+ "what", "when", "where", "which", "while", "who", "whom", "why", "will", "with",
15
+ "would", "you", "your", "yours", "yourself", "yourselves",
16
+ "also", "come", "even", "every", "find", "first", "give", "go", "going",
17
+ "gone", "good", "great", "help", "here", "href", "http", "https", "keep", "know",
18
+ "last", "long", "look", "made", "make", "many", "much", "need", "new", "next",
19
+ "old", "one", "part", "put", "right", "said", "say", "see", "set", "since",
20
+ "still", "take", "tell", "thing", "think", "time", "two", "use", "used", "using",
21
+ "want", "way", "well", "work", "www", "year",
22
+ "class", "div", "span", "true", "false", "null", "undefined", "var", "const",
23
+ "function", "return",
24
+ "amp", "quot", "nbsp", "apos", "mdash", "ndash", "hellip", "laquo", "raquo"
25
+ ]