mdorigin 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -3
- package/dist/adapters/cloudflare.d.ts +2 -0
- package/dist/adapters/cloudflare.js +17 -0
- package/dist/adapters/node.d.ts +2 -0
- package/dist/adapters/node.js +51 -11
- package/dist/cli/build-cloudflare.js +7 -1
- package/dist/cli/build-search.d.ts +1 -0
- package/dist/cli/build-search.js +44 -0
- package/dist/cli/dev.js +10 -1
- package/dist/cli/main.d.ts +1 -0
- package/dist/cli/main.js +15 -2
- package/dist/cli/search.d.ts +1 -0
- package/dist/cli/search.js +36 -0
- package/dist/cloudflare.d.ts +2 -0
- package/dist/cloudflare.js +41 -5
- package/dist/core/api.d.ts +13 -0
- package/dist/core/api.js +160 -0
- package/dist/core/content-store.js +5 -0
- package/dist/core/content-type.d.ts +1 -0
- package/dist/core/content-type.js +3 -0
- package/dist/core/directory-index.d.ts +1 -1
- package/dist/core/directory-index.js +5 -1
- package/dist/core/markdown.d.ts +12 -0
- package/dist/core/markdown.js +88 -0
- package/dist/core/request-handler.d.ts +5 -0
- package/dist/core/request-handler.js +412 -24
- package/dist/core/router.d.ts +1 -0
- package/dist/core/router.js +1 -1
- package/dist/core/site-config.d.ts +31 -0
- package/dist/core/site-config.js +98 -2
- package/dist/html/template-kind.d.ts +1 -1
- package/dist/html/template.d.ts +17 -1
- package/dist/html/template.js +282 -21
- package/dist/html/theme.js +542 -100
- package/dist/index-builder.js +62 -29
- package/dist/search.d.ts +59 -0
- package/dist/search.js +370 -0
- package/package.json +12 -3
package/dist/index-builder.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { readdir, readFile, stat, writeFile } from 'node:fs/promises';
|
|
1
|
+
import { readdir, readFile, realpath, stat, writeFile } from 'node:fs/promises';
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import { inferDirectoryContentType } from './core/content-type.js';
|
|
4
4
|
import { getDirectoryIndexCandidates } from './core/directory-index.js';
|
|
5
|
-
import { parseMarkdownDocument } from './core/markdown.js';
|
|
5
|
+
import { getDocumentSummary, getDocumentTitle, parseMarkdownDocument, } from './core/markdown.js';
|
|
6
6
|
const INDEX_START_MARKER = '<!-- INDEX:START -->';
|
|
7
7
|
const INDEX_END_MARKER = '<!-- INDEX:END -->';
|
|
8
8
|
export async function buildDirectoryIndexes(options) {
|
|
@@ -43,6 +43,14 @@ async function updateSingleDirectoryIndex(directoryPath, options) {
|
|
|
43
43
|
if (indexFilePath === null && !options.createIfMissing) {
|
|
44
44
|
return null;
|
|
45
45
|
}
|
|
46
|
+
if (indexFilePath !== null) {
|
|
47
|
+
const source = await readFile(indexFilePath, 'utf8');
|
|
48
|
+
const parsed = await parseMarkdownDocument(path.basename(indexFilePath), source);
|
|
49
|
+
const shape = await inspectDirectoryShape(directoryPath);
|
|
50
|
+
if (inferDirectoryContentType(parsed.meta, shape) === 'post') {
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
46
54
|
const targetFilePath = indexFilePath ?? path.join(directoryPath, 'index.md');
|
|
47
55
|
const existingContent = indexFilePath
|
|
48
56
|
? await readFile(indexFilePath, 'utf8')
|
|
@@ -65,7 +73,8 @@ export async function buildManagedIndexBlock(directoryPath) {
|
|
|
65
73
|
continue;
|
|
66
74
|
}
|
|
67
75
|
const fullPath = path.join(directoryPath, entry.name);
|
|
68
|
-
|
|
76
|
+
const entryStats = await stat(fullPath);
|
|
77
|
+
if (entryStats.isDirectory()) {
|
|
69
78
|
const resolvedEntry = await resolveDirectoryEntry(fullPath, entry.name);
|
|
70
79
|
if (resolvedEntry.draft) {
|
|
71
80
|
continue;
|
|
@@ -88,7 +97,7 @@ export async function buildManagedIndexBlock(directoryPath) {
|
|
|
88
97
|
}
|
|
89
98
|
continue;
|
|
90
99
|
}
|
|
91
|
-
if (!
|
|
100
|
+
if (!entryStats.isFile() || path.extname(entry.name).toLowerCase() !== '.md') {
|
|
92
101
|
continue;
|
|
93
102
|
}
|
|
94
103
|
if (entry.name === 'index.md' || entry.name === 'README.md') {
|
|
@@ -100,9 +109,9 @@ export async function buildManagedIndexBlock(directoryPath) {
|
|
|
100
109
|
continue;
|
|
101
110
|
}
|
|
102
111
|
articles.push({
|
|
103
|
-
title: parsed.meta.
|
|
112
|
+
title: getDocumentTitle(parsed.meta, parsed.body, entry.name.slice(0, -'.md'.length)),
|
|
104
113
|
date: parsed.meta.date,
|
|
105
|
-
summary: parsed.meta
|
|
114
|
+
summary: getDocumentSummary(parsed.meta, parsed.body),
|
|
106
115
|
link: `./${entry.name}`,
|
|
107
116
|
order: parsed.meta.order,
|
|
108
117
|
});
|
|
@@ -162,27 +171,55 @@ async function resolveDirectoryEntry(directoryPath, fallbackName) {
|
|
|
162
171
|
const parsed = await parseMarkdownDocument(path.basename(indexPath), source);
|
|
163
172
|
const shape = await inspectDirectoryShape(directoryPath);
|
|
164
173
|
return {
|
|
165
|
-
title: parsed.meta.
|
|
174
|
+
title: getDocumentTitle(parsed.meta, parsed.body, fallbackName),
|
|
166
175
|
type: inferDirectoryContentType(parsed.meta, shape),
|
|
167
176
|
date: parsed.meta.date,
|
|
168
|
-
summary: parsed.meta
|
|
177
|
+
summary: getDocumentSummary(parsed.meta, parsed.body),
|
|
169
178
|
draft: parsed.meta.draft === true,
|
|
170
179
|
order: parsed.meta.order,
|
|
171
180
|
};
|
|
172
181
|
}
|
|
173
182
|
async function listDirectoriesRecursively(rootDir) {
|
|
183
|
+
return listDirectoriesRecursivelyInternal(rootDir, new Set());
|
|
184
|
+
}
|
|
185
|
+
async function listDirectoriesRecursivelyInternal(rootDir, visitedRealDirectories) {
|
|
186
|
+
const realDirectoryPath = await realpath(rootDir);
|
|
187
|
+
if (visitedRealDirectories.has(realDirectoryPath)) {
|
|
188
|
+
return [];
|
|
189
|
+
}
|
|
190
|
+
visitedRealDirectories.add(realDirectoryPath);
|
|
174
191
|
const directories = [rootDir];
|
|
175
192
|
const entries = await readdir(rootDir, { withFileTypes: true });
|
|
193
|
+
const rootShape = await inspectDirectoryShape(rootDir);
|
|
176
194
|
for (const entry of entries) {
|
|
177
|
-
if (
|
|
195
|
+
if (entry.name.startsWith('.')) {
|
|
178
196
|
continue;
|
|
179
197
|
}
|
|
180
|
-
|
|
198
|
+
if (rootShape.hasSkillIndex && isIgnoredSkillSupportDirectory(entry.name)) {
|
|
199
|
+
continue;
|
|
200
|
+
}
|
|
201
|
+
const childPath = path.join(rootDir, entry.name);
|
|
202
|
+
const childStats = await stat(childPath);
|
|
203
|
+
if (!childStats.isDirectory()) {
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
directories.push(childPath);
|
|
207
|
+
const childIndexPath = await resolveDirectoryIndexFile(childPath);
|
|
208
|
+
if (childIndexPath !== null) {
|
|
209
|
+
const source = await readFile(childIndexPath, 'utf8');
|
|
210
|
+
const parsed = await parseMarkdownDocument(path.basename(childIndexPath), source);
|
|
211
|
+
const shape = await inspectDirectoryShape(childPath);
|
|
212
|
+
if (inferDirectoryContentType(parsed.meta, shape) === 'post') {
|
|
213
|
+
continue;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
directories.push(...(await listDirectoriesRecursivelyInternal(childPath, visitedRealDirectories)).slice(1));
|
|
181
217
|
}
|
|
182
218
|
return directories;
|
|
183
219
|
}
|
|
184
220
|
async function inspectDirectoryShape(directoryPath) {
|
|
185
221
|
const entries = await readdir(directoryPath, { withFileTypes: true });
|
|
222
|
+
let hasSkillIndex = false;
|
|
186
223
|
let hasChildDirectories = false;
|
|
187
224
|
let hasExtraMarkdownFiles = false;
|
|
188
225
|
let hasAssetFiles = false;
|
|
@@ -190,16 +227,21 @@ async function inspectDirectoryShape(directoryPath) {
|
|
|
190
227
|
if (entry.name.startsWith('.')) {
|
|
191
228
|
continue;
|
|
192
229
|
}
|
|
193
|
-
|
|
230
|
+
const fullPath = path.join(directoryPath, entry.name);
|
|
231
|
+
const entryStats = await stat(fullPath);
|
|
232
|
+
if (entryStats.isDirectory()) {
|
|
194
233
|
hasChildDirectories = true;
|
|
195
234
|
continue;
|
|
196
235
|
}
|
|
197
|
-
if (!
|
|
236
|
+
if (!entryStats.isFile()) {
|
|
198
237
|
continue;
|
|
199
238
|
}
|
|
200
239
|
const extension = path.extname(entry.name).toLowerCase();
|
|
201
240
|
if (extension === '.md') {
|
|
202
|
-
if (entry.name
|
|
241
|
+
if (entry.name === 'SKILL.md') {
|
|
242
|
+
hasSkillIndex = true;
|
|
243
|
+
}
|
|
244
|
+
else if (entry.name !== 'index.md' && entry.name !== 'README.md') {
|
|
203
245
|
hasExtraMarkdownFiles = true;
|
|
204
246
|
}
|
|
205
247
|
continue;
|
|
@@ -207,6 +249,7 @@ async function inspectDirectoryShape(directoryPath) {
|
|
|
207
249
|
hasAssetFiles = true;
|
|
208
250
|
}
|
|
209
251
|
return {
|
|
252
|
+
hasSkillIndex,
|
|
210
253
|
hasChildDirectories,
|
|
211
254
|
hasExtraMarkdownFiles,
|
|
212
255
|
hasAssetFiles,
|
|
@@ -254,22 +297,6 @@ function compareOptionalOrder(leftOrder, rightOrder) {
|
|
|
254
297
|
}
|
|
255
298
|
return 0;
|
|
256
299
|
}
|
|
257
|
-
function extractFirstParagraph(markdown) {
|
|
258
|
-
const paragraphs = markdown
|
|
259
|
-
.split(/\n\s*\n/g)
|
|
260
|
-
.map((paragraph) => paragraph.trim())
|
|
261
|
-
.filter((paragraph) => paragraph !== '');
|
|
262
|
-
for (const paragraph of paragraphs) {
|
|
263
|
-
if (paragraph.startsWith('#') ||
|
|
264
|
-
paragraph.startsWith('<!--') ||
|
|
265
|
-
paragraph.startsWith('- ') ||
|
|
266
|
-
paragraph.startsWith('* ')) {
|
|
267
|
-
continue;
|
|
268
|
-
}
|
|
269
|
-
return paragraph.replace(/\s+/g, ' ');
|
|
270
|
-
}
|
|
271
|
-
return undefined;
|
|
272
|
-
}
|
|
273
300
|
async function resolveDirectoryIndexFile(directoryPath) {
|
|
274
301
|
for (const candidate of getDirectoryIndexCandidates('')) {
|
|
275
302
|
const candidatePath = path.join(directoryPath, candidate);
|
|
@@ -279,6 +306,12 @@ async function resolveDirectoryIndexFile(directoryPath) {
|
|
|
279
306
|
}
|
|
280
307
|
return null;
|
|
281
308
|
}
|
|
309
|
+
function isIgnoredSkillSupportDirectory(name) {
|
|
310
|
+
return (name === 'scripts' ||
|
|
311
|
+
name === 'references' ||
|
|
312
|
+
name === 'assets' ||
|
|
313
|
+
name === 'templates');
|
|
314
|
+
}
|
|
282
315
|
async function pathExists(filePath) {
|
|
283
316
|
try {
|
|
284
317
|
await stat(filePath);
|
package/dist/search.d.ts
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import type { ResolvedSiteConfig } from './core/site-config.js';
|
|
2
|
+
type JsonValue = null | boolean | number | string | JsonValue[] | {
|
|
3
|
+
[key: string]: JsonValue;
|
|
4
|
+
};
|
|
5
|
+
export interface SearchHit {
|
|
6
|
+
docId: string;
|
|
7
|
+
relativePath: string;
|
|
8
|
+
canonicalUrl?: string;
|
|
9
|
+
title?: string;
|
|
10
|
+
summary?: string;
|
|
11
|
+
metadata: Record<string, JsonValue>;
|
|
12
|
+
score: number;
|
|
13
|
+
bestMatch: {
|
|
14
|
+
chunkId: number;
|
|
15
|
+
excerpt: string;
|
|
16
|
+
headingPath: string[];
|
|
17
|
+
charStart: number;
|
|
18
|
+
charEnd: number;
|
|
19
|
+
score: number;
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
export interface SearchApi {
|
|
23
|
+
search(query: string, options?: {
|
|
24
|
+
topK?: number;
|
|
25
|
+
relativePathPrefix?: string;
|
|
26
|
+
}): Promise<SearchHit[]>;
|
|
27
|
+
}
|
|
28
|
+
export interface SearchBundleEntry {
|
|
29
|
+
path: string;
|
|
30
|
+
kind: 'text' | 'binary';
|
|
31
|
+
mediaType: string;
|
|
32
|
+
text?: string;
|
|
33
|
+
base64?: string;
|
|
34
|
+
}
|
|
35
|
+
export interface BuildSearchBundleOptions {
|
|
36
|
+
rootDir: string;
|
|
37
|
+
outDir: string;
|
|
38
|
+
siteConfig: ResolvedSiteConfig;
|
|
39
|
+
draftMode?: 'include' | 'exclude';
|
|
40
|
+
embeddingBackend?: 'hashing' | 'model2vec';
|
|
41
|
+
model?: string;
|
|
42
|
+
}
|
|
43
|
+
export interface BuildSearchBundleResult {
|
|
44
|
+
outputDir: string;
|
|
45
|
+
documentCount: number;
|
|
46
|
+
chunkCount: number;
|
|
47
|
+
vectorDimensions: number;
|
|
48
|
+
}
|
|
49
|
+
export interface SearchBundleOptions {
|
|
50
|
+
indexDir: string;
|
|
51
|
+
query: string;
|
|
52
|
+
topK?: number;
|
|
53
|
+
relativePathPrefix?: string;
|
|
54
|
+
}
|
|
55
|
+
export declare function buildSearchBundle(options: BuildSearchBundleOptions): Promise<BuildSearchBundleResult>;
|
|
56
|
+
export declare function searchBundle(options: SearchBundleOptions): Promise<SearchHit[]>;
|
|
57
|
+
export declare function createSearchApiFromDirectory(indexDir: string): Promise<SearchApi>;
|
|
58
|
+
export declare function createSearchApiFromBundle(bundleEntries: SearchBundleEntry[]): SearchApi;
|
|
59
|
+
export {};
|
package/dist/search.js
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
import { readdir, readFile, realpath, stat } from 'node:fs/promises';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { inferDirectoryContentType } from './core/content-type.js';
|
|
4
|
+
import { getDirectoryIndexCandidates } from './core/directory-index.js';
|
|
5
|
+
import { getDocumentSummary, getDocumentTitle, parseMarkdownDocument, } from './core/markdown.js';
|
|
6
|
+
export async function buildSearchBundle(options) {
|
|
7
|
+
const buildModule = await loadIndexbindBuildModule();
|
|
8
|
+
const rootDir = path.resolve(options.rootDir);
|
|
9
|
+
const documents = await collectSearchDocuments(rootDir, options.siteConfig, {
|
|
10
|
+
draftMode: options.draftMode ?? 'exclude',
|
|
11
|
+
});
|
|
12
|
+
const stats = await buildModule.buildCanonicalBundle(path.resolve(options.outDir), documents, {
|
|
13
|
+
embeddingBackend: options.embeddingBackend ?? 'model2vec',
|
|
14
|
+
model: options.model,
|
|
15
|
+
sourceRootId: path.basename(rootDir),
|
|
16
|
+
sourceRootPath: rootDir,
|
|
17
|
+
});
|
|
18
|
+
return {
|
|
19
|
+
outputDir: path.resolve(options.outDir),
|
|
20
|
+
documentCount: stats.documentCount,
|
|
21
|
+
chunkCount: stats.chunkCount,
|
|
22
|
+
vectorDimensions: stats.vectorDimensions,
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
export async function searchBundle(options) {
|
|
26
|
+
const webModule = await loadIndexbindWebModule();
|
|
27
|
+
const index = await webModule.openWebIndex(path.resolve(options.indexDir));
|
|
28
|
+
return rerankSearchHits(await index.search(options.query, {
|
|
29
|
+
topK: options.topK ?? 10,
|
|
30
|
+
relativePathPrefix: options.relativePathPrefix,
|
|
31
|
+
}));
|
|
32
|
+
}
|
|
33
|
+
export async function createSearchApiFromDirectory(indexDir) {
|
|
34
|
+
const webModule = await loadIndexbindWebModule();
|
|
35
|
+
const index = await webModule.openWebIndex(path.resolve(indexDir));
|
|
36
|
+
return {
|
|
37
|
+
async search(query, options) {
|
|
38
|
+
return rerankSearchHits(await index.search(query, {
|
|
39
|
+
topK: options?.topK,
|
|
40
|
+
relativePathPrefix: options?.relativePathPrefix,
|
|
41
|
+
}));
|
|
42
|
+
},
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
export function createSearchApiFromBundle(bundleEntries) {
|
|
46
|
+
let indexPromise = null;
|
|
47
|
+
return {
|
|
48
|
+
async search(query, options) {
|
|
49
|
+
if (indexPromise === null) {
|
|
50
|
+
indexPromise = openWebIndexFromBundle(bundleEntries);
|
|
51
|
+
}
|
|
52
|
+
const index = await indexPromise;
|
|
53
|
+
return rerankSearchHits(await index.search(query, {
|
|
54
|
+
topK: options?.topK,
|
|
55
|
+
relativePathPrefix: options?.relativePathPrefix,
|
|
56
|
+
}));
|
|
57
|
+
},
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
async function collectSearchDocuments(rootDir, siteConfig, options) {
|
|
61
|
+
const searchDocuments = await listSearchDocuments(rootDir);
|
|
62
|
+
const documents = [];
|
|
63
|
+
for (const document of searchDocuments) {
|
|
64
|
+
const markdown = await readFile(document.absolutePath, 'utf8');
|
|
65
|
+
const parsed = await parseMarkdownDocument(document.relativePath, markdown);
|
|
66
|
+
if (parsed.meta.draft === true && options.draftMode === 'exclude') {
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
const canonicalPath = getCanonicalHtmlPathForContentPath(document.relativePath);
|
|
70
|
+
const absoluteCanonicalUrl = siteConfig.siteUrl
|
|
71
|
+
? new URL(trimLeadingSlash(canonicalPath), ensureTrailingSlash(siteConfig.siteUrl)).toString()
|
|
72
|
+
: canonicalPath;
|
|
73
|
+
documents.push({
|
|
74
|
+
docId: canonicalPath,
|
|
75
|
+
sourcePath: document.absolutePath,
|
|
76
|
+
relativePath: document.relativePath,
|
|
77
|
+
canonicalUrl: absoluteCanonicalUrl,
|
|
78
|
+
title: getDocumentTitle(parsed.meta, parsed.body, fallbackTitleFromRelativePath(document.relativePath)),
|
|
79
|
+
summary: getDocumentSummary(parsed.meta, parsed.body),
|
|
80
|
+
content: markdown,
|
|
81
|
+
metadata: buildSearchMetadata(document.relativePath, canonicalPath, parsed.meta, siteConfig),
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
return documents;
|
|
85
|
+
}
|
|
86
|
+
async function listSearchDocuments(rootDir) {
|
|
87
|
+
const results = [];
|
|
88
|
+
await walkDirectory(rootDir, '', new Set(), results);
|
|
89
|
+
results.sort((left, right) => left.relativePath.localeCompare(right.relativePath));
|
|
90
|
+
return results;
|
|
91
|
+
}
|
|
92
|
+
async function walkDirectory(absoluteDirectoryPath, relativeDirectoryPath, visitedRealDirectories, results) {
|
|
93
|
+
const realDirectoryPath = await realpath(absoluteDirectoryPath);
|
|
94
|
+
if (visitedRealDirectories.has(realDirectoryPath)) {
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
visitedRealDirectories.add(realDirectoryPath);
|
|
98
|
+
const entries = await readdir(absoluteDirectoryPath, { withFileTypes: true });
|
|
99
|
+
const shape = await inspectDirectoryShape(absoluteDirectoryPath);
|
|
100
|
+
const indexFile = await resolveVisibleDirectoryIndexFile(absoluteDirectoryPath, relativeDirectoryPath);
|
|
101
|
+
if (indexFile) {
|
|
102
|
+
results.push(indexFile);
|
|
103
|
+
}
|
|
104
|
+
for (const entry of entries) {
|
|
105
|
+
if (entry.name.startsWith('.')) {
|
|
106
|
+
continue;
|
|
107
|
+
}
|
|
108
|
+
const absoluteEntryPath = path.join(absoluteDirectoryPath, entry.name);
|
|
109
|
+
const entryStats = await stat(absoluteEntryPath);
|
|
110
|
+
const relativeEntryPath = relativeDirectoryPath === ''
|
|
111
|
+
? entry.name
|
|
112
|
+
: path.posix.join(relativeDirectoryPath, entry.name);
|
|
113
|
+
if (entryStats.isFile()) {
|
|
114
|
+
if (path.posix.extname(entry.name).toLowerCase() !== '.md') {
|
|
115
|
+
continue;
|
|
116
|
+
}
|
|
117
|
+
if (DIRECTORY_INDEX_FILENAMES_LOWER.has(entry.name.toLowerCase())) {
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
results.push({
|
|
121
|
+
absolutePath: absoluteEntryPath,
|
|
122
|
+
relativePath: relativeEntryPath,
|
|
123
|
+
});
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
if (!entryStats.isDirectory()) {
|
|
127
|
+
continue;
|
|
128
|
+
}
|
|
129
|
+
if (shape.hasSkillIndex && isIgnoredSkillSupportDirectory(entry.name)) {
|
|
130
|
+
continue;
|
|
131
|
+
}
|
|
132
|
+
if (indexFile) {
|
|
133
|
+
const source = await readFile(indexFile.absolutePath, 'utf8');
|
|
134
|
+
const parsed = await parseMarkdownDocument(indexFile.relativePath, source);
|
|
135
|
+
if (inferDirectoryContentType(parsed.meta, shape) === 'post') {
|
|
136
|
+
continue;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
await walkDirectory(absoluteEntryPath, relativeEntryPath, visitedRealDirectories, results);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
async function resolveVisibleDirectoryIndexFile(absoluteDirectoryPath, relativeDirectoryPath) {
|
|
143
|
+
for (const candidate of getDirectoryIndexCandidates('')) {
|
|
144
|
+
const absoluteCandidatePath = path.join(absoluteDirectoryPath, candidate);
|
|
145
|
+
if (await pathExists(absoluteCandidatePath)) {
|
|
146
|
+
return {
|
|
147
|
+
absolutePath: absoluteCandidatePath,
|
|
148
|
+
relativePath: relativeDirectoryPath === ''
|
|
149
|
+
? candidate
|
|
150
|
+
: path.posix.join(relativeDirectoryPath, candidate),
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
return null;
|
|
155
|
+
}
|
|
156
|
+
async function inspectDirectoryShape(directoryPath) {
|
|
157
|
+
const entries = await readdir(directoryPath, { withFileTypes: true });
|
|
158
|
+
let hasSkillIndex = false;
|
|
159
|
+
let hasChildDirectories = false;
|
|
160
|
+
let hasExtraMarkdownFiles = false;
|
|
161
|
+
let hasAssetFiles = false;
|
|
162
|
+
for (const entry of entries) {
|
|
163
|
+
if (entry.name.startsWith('.')) {
|
|
164
|
+
continue;
|
|
165
|
+
}
|
|
166
|
+
const absoluteEntryPath = path.join(directoryPath, entry.name);
|
|
167
|
+
const entryStats = await stat(absoluteEntryPath);
|
|
168
|
+
if (entryStats.isDirectory()) {
|
|
169
|
+
hasChildDirectories = true;
|
|
170
|
+
continue;
|
|
171
|
+
}
|
|
172
|
+
if (!entryStats.isFile()) {
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
const extension = path.extname(entry.name).toLowerCase();
|
|
176
|
+
if (extension === '.md') {
|
|
177
|
+
if (entry.name === 'SKILL.md') {
|
|
178
|
+
hasSkillIndex = true;
|
|
179
|
+
}
|
|
180
|
+
else if (entry.name !== 'index.md' && entry.name !== 'README.md') {
|
|
181
|
+
hasExtraMarkdownFiles = true;
|
|
182
|
+
}
|
|
183
|
+
continue;
|
|
184
|
+
}
|
|
185
|
+
hasAssetFiles = true;
|
|
186
|
+
}
|
|
187
|
+
return {
|
|
188
|
+
hasSkillIndex,
|
|
189
|
+
hasChildDirectories,
|
|
190
|
+
hasExtraMarkdownFiles,
|
|
191
|
+
hasAssetFiles,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
function buildSearchMetadata(relativePath, canonicalPath, meta, siteConfig) {
|
|
195
|
+
const metadata = {
|
|
196
|
+
markdownPath: `/${relativePath}`,
|
|
197
|
+
canonicalPath,
|
|
198
|
+
siteTitle: siteConfig.siteTitle,
|
|
199
|
+
};
|
|
200
|
+
if (meta.type === 'page' || meta.type === 'post') {
|
|
201
|
+
metadata.type = meta.type;
|
|
202
|
+
}
|
|
203
|
+
if (typeof meta.date === 'string') {
|
|
204
|
+
metadata.date = meta.date;
|
|
205
|
+
}
|
|
206
|
+
if (typeof meta.order === 'number') {
|
|
207
|
+
metadata.order = meta.order;
|
|
208
|
+
}
|
|
209
|
+
if (Array.isArray(meta.aliases)) {
|
|
210
|
+
metadata.aliases = meta.aliases;
|
|
211
|
+
}
|
|
212
|
+
return metadata;
|
|
213
|
+
}
|
|
214
|
+
function fallbackTitleFromRelativePath(relativePath) {
|
|
215
|
+
const baseName = path.posix.basename(relativePath);
|
|
216
|
+
if (DIRECTORY_INDEX_FILENAMES_LOWER.has(baseName.toLowerCase())) {
|
|
217
|
+
return path.posix.basename(path.posix.dirname(relativePath));
|
|
218
|
+
}
|
|
219
|
+
return baseName.replace(/\.md$/i, '');
|
|
220
|
+
}
|
|
221
|
+
function getCanonicalHtmlPathForContentPath(contentPath) {
|
|
222
|
+
const basename = path.posix.basename(contentPath).toLowerCase();
|
|
223
|
+
if (basename === 'index.md' ||
|
|
224
|
+
basename === 'readme.md' ||
|
|
225
|
+
basename === 'skill.md') {
|
|
226
|
+
const directory = path.posix.dirname(contentPath);
|
|
227
|
+
return directory === '.' ? '/' : `/${directory}/`;
|
|
228
|
+
}
|
|
229
|
+
return `/${contentPath.slice(0, -'.md'.length)}`;
|
|
230
|
+
}
|
|
231
|
+
function trimLeadingSlash(value) {
|
|
232
|
+
return value.startsWith('/') ? value.slice(1) : value;
|
|
233
|
+
}
|
|
234
|
+
function ensureTrailingSlash(value) {
|
|
235
|
+
return value.endsWith('/') ? value : `${value}/`;
|
|
236
|
+
}
|
|
237
|
+
async function pathExists(filePath) {
|
|
238
|
+
try {
|
|
239
|
+
await stat(filePath);
|
|
240
|
+
return true;
|
|
241
|
+
}
|
|
242
|
+
catch (error) {
|
|
243
|
+
if (typeof error === 'object' &&
|
|
244
|
+
error !== null &&
|
|
245
|
+
'code' in error &&
|
|
246
|
+
error.code === 'ENOENT') {
|
|
247
|
+
return false;
|
|
248
|
+
}
|
|
249
|
+
throw error;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
function isIgnoredSkillSupportDirectory(name) {
|
|
253
|
+
return (name === 'scripts' ||
|
|
254
|
+
name === 'references' ||
|
|
255
|
+
name === 'assets' ||
|
|
256
|
+
name === 'templates');
|
|
257
|
+
}
|
|
258
|
+
async function loadIndexbindBuildModule() {
|
|
259
|
+
try {
|
|
260
|
+
return (await import('indexbind/build'));
|
|
261
|
+
}
|
|
262
|
+
catch (error) {
|
|
263
|
+
throw new Error(`Search build requires the optional package "indexbind". Install it first, for example: npm install indexbind`, { cause: error instanceof Error ? error : undefined });
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
async function loadIndexbindWebModule() {
|
|
267
|
+
try {
|
|
268
|
+
return (await import('indexbind/web'));
|
|
269
|
+
}
|
|
270
|
+
catch (error) {
|
|
271
|
+
throw new Error(`Search query requires the optional package "indexbind". Install it first, for example: npm install indexbind`, { cause: error instanceof Error ? error : undefined });
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
async function loadIndexbindCloudflareModule() {
|
|
275
|
+
try {
|
|
276
|
+
return (await import('indexbind/cloudflare'));
|
|
277
|
+
}
|
|
278
|
+
catch (error) {
|
|
279
|
+
throw new Error(`Search query requires the optional package "indexbind". Install it first, for example: npm install indexbind`, { cause: error instanceof Error ? error : undefined });
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
async function openWebIndexFromBundle(bundleEntries) {
|
|
283
|
+
const cloudflareModule = await loadIndexbindCloudflareModule();
|
|
284
|
+
const baseUrl = 'https://mdorigin-search.invalid/';
|
|
285
|
+
const originalFetch = globalThis.fetch;
|
|
286
|
+
const bundleMap = new Map(bundleEntries.map((entry) => [new URL(entry.path, baseUrl).toString(), entry]));
|
|
287
|
+
globalThis.fetch = async (input, init) => {
|
|
288
|
+
const requestUrl = typeof input === 'string'
|
|
289
|
+
? input
|
|
290
|
+
: input instanceof URL
|
|
291
|
+
? input.toString()
|
|
292
|
+
: input.url;
|
|
293
|
+
const entry = bundleMap.get(requestUrl);
|
|
294
|
+
if (entry) {
|
|
295
|
+
const headers = new Headers({ 'content-type': entry.mediaType });
|
|
296
|
+
const binaryBody = decodeBase64(entry.base64 ?? '');
|
|
297
|
+
const body = entry.kind === 'text'
|
|
298
|
+
? entry.text ?? ''
|
|
299
|
+
: new Blob([new Uint8Array(binaryBody)], {
|
|
300
|
+
type: entry.mediaType,
|
|
301
|
+
});
|
|
302
|
+
return new Response(body, {
|
|
303
|
+
status: 200,
|
|
304
|
+
headers,
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
return originalFetch(input, init);
|
|
308
|
+
};
|
|
309
|
+
try {
|
|
310
|
+
return await cloudflareModule.openWebIndex(new URL(baseUrl));
|
|
311
|
+
}
|
|
312
|
+
finally {
|
|
313
|
+
globalThis.fetch = originalFetch;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
function decodeBase64(value) {
|
|
317
|
+
const decoded = atob(value);
|
|
318
|
+
return Uint8Array.from(decoded, (character) => character.charCodeAt(0));
|
|
319
|
+
}
|
|
320
|
+
function rerankSearchHits(hits) {
|
|
321
|
+
const remaining = [...hits];
|
|
322
|
+
const ordered = [];
|
|
323
|
+
while (remaining.length > 0) {
|
|
324
|
+
let bestIndex = 0;
|
|
325
|
+
let bestScore = Number.NEGATIVE_INFINITY;
|
|
326
|
+
for (let index = 0; index < remaining.length; index += 1) {
|
|
327
|
+
const candidate = remaining[index];
|
|
328
|
+
const candidateScore = getDiversifiedSearchRankScore(candidate, ordered);
|
|
329
|
+
if (candidateScore > bestScore) {
|
|
330
|
+
bestScore = candidateScore;
|
|
331
|
+
bestIndex = index;
|
|
332
|
+
continue;
|
|
333
|
+
}
|
|
334
|
+
if (Math.abs(candidateScore - bestScore) <= 1e-9 &&
|
|
335
|
+
compareHits(candidate, remaining[bestIndex]) < 0) {
|
|
336
|
+
bestIndex = index;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
ordered.push(remaining.splice(bestIndex, 1)[0]);
|
|
340
|
+
}
|
|
341
|
+
return ordered;
|
|
342
|
+
}
|
|
343
|
+
function getDiversifiedSearchRankScore(hit, selectedHits) {
|
|
344
|
+
let adjusted = hit.score;
|
|
345
|
+
const candidateSection = getTopLevelSection(hit.relativePath);
|
|
346
|
+
const sameSectionCount = selectedHits.filter((selectedHit) => getTopLevelSection(selectedHit.relativePath) === candidateSection).length;
|
|
347
|
+
if (sameSectionCount > 0) {
|
|
348
|
+
adjusted *= Math.pow(0.9, sameSectionCount);
|
|
349
|
+
}
|
|
350
|
+
if (isOverviewSearchHit(hit)) {
|
|
351
|
+
adjusted *= sameSectionCount > 0 ? 0.72 : 0.84;
|
|
352
|
+
}
|
|
353
|
+
return adjusted;
|
|
354
|
+
}
|
|
355
|
+
function compareHits(left, right) {
|
|
356
|
+
if (right.bestMatch.score !== left.bestMatch.score) {
|
|
357
|
+
return right.bestMatch.score - left.bestMatch.score;
|
|
358
|
+
}
|
|
359
|
+
return left.relativePath.localeCompare(right.relativePath);
|
|
360
|
+
}
|
|
361
|
+
function isOverviewSearchHit(hit) {
|
|
362
|
+
const baseName = path.posix.basename(hit.relativePath).toLowerCase();
|
|
363
|
+
return baseName === 'readme.md' || baseName === 'index.md';
|
|
364
|
+
}
|
|
365
|
+
function getTopLevelSection(relativePath) {
|
|
366
|
+
const normalized = relativePath.replaceAll('\\', '/');
|
|
367
|
+
const [firstSegment] = normalized.split('/', 1);
|
|
368
|
+
return firstSegment ?? '';
|
|
369
|
+
}
|
|
370
|
+
const DIRECTORY_INDEX_FILENAMES_LOWER = new Set(getDirectoryIndexCandidates('').map((name) => name.toLowerCase()));
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdorigin",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Markdown-first publishing engine with raw Markdown and HTML views.",
|
|
6
6
|
"repository": {
|
|
@@ -19,9 +19,13 @@
|
|
|
19
19
|
"cli"
|
|
20
20
|
],
|
|
21
21
|
"bin": {
|
|
22
|
-
"mdorigin": "
|
|
22
|
+
"mdorigin": "dist/cli/main.js"
|
|
23
23
|
},
|
|
24
24
|
"exports": {
|
|
25
|
+
"./search": {
|
|
26
|
+
"types": "./dist/search.d.ts",
|
|
27
|
+
"default": "./dist/search.js"
|
|
28
|
+
},
|
|
25
29
|
"./cloudflare": {
|
|
26
30
|
"types": "./dist/cloudflare.d.ts",
|
|
27
31
|
"default": "./dist/cloudflare.js"
|
|
@@ -37,10 +41,12 @@
|
|
|
37
41
|
],
|
|
38
42
|
"scripts": {
|
|
39
43
|
"dev": "tsx src/cli/main.ts dev",
|
|
40
|
-
"build": "node -e \"require('node:fs').rmSync('dist', { recursive: true, force: true })\" && tsc -p tsconfig.json",
|
|
44
|
+
"build": "node -e \"require('node:fs').rmSync('dist', { recursive: true, force: true })\" && tsc -p tsconfig.json && node -e \"const fs=require('node:fs'); const file='dist/cli/main.js'; const source=fs.readFileSync(file,'utf8'); if (!source.startsWith('#!/usr/bin/env node\\n')) fs.writeFileSync(file, '#!/usr/bin/env node\\n' + source);\"",
|
|
41
45
|
"build:index": "tsx src/cli/main.ts build index",
|
|
46
|
+
"build:search": "tsx src/cli/main.ts build search",
|
|
42
47
|
"build:cloudflare": "tsx src/cli/main.ts build cloudflare",
|
|
43
48
|
"init:cloudflare": "tsx src/cli/main.ts init cloudflare",
|
|
49
|
+
"search": "tsx src/cli/main.ts search",
|
|
44
50
|
"check": "tsc --noEmit -p tsconfig.json",
|
|
45
51
|
"test": "node --test --import tsx src/*.test.ts src/**/*.test.ts",
|
|
46
52
|
"prepack": "npm run build",
|
|
@@ -62,5 +68,8 @@
|
|
|
62
68
|
"@types/node": "^24.5.2",
|
|
63
69
|
"tsx": "^4.20.5",
|
|
64
70
|
"typescript": "^5.9.2"
|
|
71
|
+
},
|
|
72
|
+
"optionalDependencies": {
|
|
73
|
+
"indexbind": "^0.2.1"
|
|
65
74
|
}
|
|
66
75
|
}
|