mdorigin 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -4
- package/dist/adapters/cloudflare.d.ts +2 -0
- package/dist/adapters/cloudflare.js +9 -0
- package/dist/adapters/node.d.ts +2 -0
- package/dist/adapters/node.js +50 -11
- package/dist/cli/build-cloudflare.js +7 -1
- package/dist/cli/build-search.d.ts +1 -0
- package/dist/cli/build-search.js +44 -0
- package/dist/cli/dev.js +10 -1
- package/dist/cli/main.js +14 -2
- package/dist/cli/search.d.ts +1 -0
- package/dist/cli/search.js +36 -0
- package/dist/cloudflare.d.ts +2 -0
- package/dist/cloudflare.js +41 -5
- package/dist/core/api.d.ts +13 -0
- package/dist/core/api.js +160 -0
- package/dist/core/content-store.js +5 -0
- package/dist/core/content-type.d.ts +1 -0
- package/dist/core/content-type.js +3 -0
- package/dist/core/directory-index.d.ts +1 -1
- package/dist/core/directory-index.js +5 -1
- package/dist/core/markdown.d.ts +4 -0
- package/dist/core/markdown.js +53 -0
- package/dist/core/request-handler.d.ts +4 -0
- package/dist/core/request-handler.js +90 -14
- package/dist/core/site-config.d.ts +4 -0
- package/dist/core/site-config.js +14 -0
- package/dist/html/template.d.ts +5 -0
- package/dist/html/template.js +203 -11
- package/dist/html/theme.js +254 -9
- package/dist/index-builder.js +54 -29
- package/dist/search.d.ts +59 -0
- package/dist/search.js +370 -0
- package/package.json +10 -1
package/dist/index-builder.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import { readdir, readFile, stat, writeFile } from 'node:fs/promises';
|
|
1
|
+
import { readdir, readFile, realpath, stat, writeFile } from 'node:fs/promises';
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import { inferDirectoryContentType } from './core/content-type.js';
|
|
4
4
|
import { getDirectoryIndexCandidates } from './core/directory-index.js';
|
|
5
|
-
import { parseMarkdownDocument } from './core/markdown.js';
|
|
5
|
+
import { getDocumentSummary, getDocumentTitle, parseMarkdownDocument, } from './core/markdown.js';
|
|
6
6
|
const INDEX_START_MARKER = '<!-- INDEX:START -->';
|
|
7
7
|
const INDEX_END_MARKER = '<!-- INDEX:END -->';
|
|
8
8
|
export async function buildDirectoryIndexes(options) {
|
|
@@ -73,7 +73,8 @@ export async function buildManagedIndexBlock(directoryPath) {
|
|
|
73
73
|
continue;
|
|
74
74
|
}
|
|
75
75
|
const fullPath = path.join(directoryPath, entry.name);
|
|
76
|
-
|
|
76
|
+
const entryStats = await stat(fullPath);
|
|
77
|
+
if (entryStats.isDirectory()) {
|
|
77
78
|
const resolvedEntry = await resolveDirectoryEntry(fullPath, entry.name);
|
|
78
79
|
if (resolvedEntry.draft) {
|
|
79
80
|
continue;
|
|
@@ -96,7 +97,7 @@ export async function buildManagedIndexBlock(directoryPath) {
|
|
|
96
97
|
}
|
|
97
98
|
continue;
|
|
98
99
|
}
|
|
99
|
-
if (!
|
|
100
|
+
if (!entryStats.isFile() || path.extname(entry.name).toLowerCase() !== '.md') {
|
|
100
101
|
continue;
|
|
101
102
|
}
|
|
102
103
|
if (entry.name === 'index.md' || entry.name === 'README.md') {
|
|
@@ -108,9 +109,9 @@ export async function buildManagedIndexBlock(directoryPath) {
|
|
|
108
109
|
continue;
|
|
109
110
|
}
|
|
110
111
|
articles.push({
|
|
111
|
-
title: parsed.meta.
|
|
112
|
+
title: getDocumentTitle(parsed.meta, parsed.body, entry.name.slice(0, -'.md'.length)),
|
|
112
113
|
date: parsed.meta.date,
|
|
113
|
-
summary: parsed.meta
|
|
114
|
+
summary: getDocumentSummary(parsed.meta, parsed.body),
|
|
114
115
|
link: `./${entry.name}`,
|
|
115
116
|
order: parsed.meta.order,
|
|
116
117
|
});
|
|
@@ -170,27 +171,55 @@ async function resolveDirectoryEntry(directoryPath, fallbackName) {
|
|
|
170
171
|
const parsed = await parseMarkdownDocument(path.basename(indexPath), source);
|
|
171
172
|
const shape = await inspectDirectoryShape(directoryPath);
|
|
172
173
|
return {
|
|
173
|
-
title: parsed.meta.
|
|
174
|
+
title: getDocumentTitle(parsed.meta, parsed.body, fallbackName),
|
|
174
175
|
type: inferDirectoryContentType(parsed.meta, shape),
|
|
175
176
|
date: parsed.meta.date,
|
|
176
|
-
summary: parsed.meta
|
|
177
|
+
summary: getDocumentSummary(parsed.meta, parsed.body),
|
|
177
178
|
draft: parsed.meta.draft === true,
|
|
178
179
|
order: parsed.meta.order,
|
|
179
180
|
};
|
|
180
181
|
}
|
|
181
182
|
async function listDirectoriesRecursively(rootDir) {
|
|
183
|
+
return listDirectoriesRecursivelyInternal(rootDir, new Set());
|
|
184
|
+
}
|
|
185
|
+
async function listDirectoriesRecursivelyInternal(rootDir, visitedRealDirectories) {
|
|
186
|
+
const realDirectoryPath = await realpath(rootDir);
|
|
187
|
+
if (visitedRealDirectories.has(realDirectoryPath)) {
|
|
188
|
+
return [];
|
|
189
|
+
}
|
|
190
|
+
visitedRealDirectories.add(realDirectoryPath);
|
|
182
191
|
const directories = [rootDir];
|
|
183
192
|
const entries = await readdir(rootDir, { withFileTypes: true });
|
|
193
|
+
const rootShape = await inspectDirectoryShape(rootDir);
|
|
184
194
|
for (const entry of entries) {
|
|
185
|
-
if (
|
|
195
|
+
if (entry.name.startsWith('.')) {
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
if (rootShape.hasSkillIndex && isIgnoredSkillSupportDirectory(entry.name)) {
|
|
186
199
|
continue;
|
|
187
200
|
}
|
|
188
|
-
|
|
201
|
+
const childPath = path.join(rootDir, entry.name);
|
|
202
|
+
const childStats = await stat(childPath);
|
|
203
|
+
if (!childStats.isDirectory()) {
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
directories.push(childPath);
|
|
207
|
+
const childIndexPath = await resolveDirectoryIndexFile(childPath);
|
|
208
|
+
if (childIndexPath !== null) {
|
|
209
|
+
const source = await readFile(childIndexPath, 'utf8');
|
|
210
|
+
const parsed = await parseMarkdownDocument(path.basename(childIndexPath), source);
|
|
211
|
+
const shape = await inspectDirectoryShape(childPath);
|
|
212
|
+
if (inferDirectoryContentType(parsed.meta, shape) === 'post') {
|
|
213
|
+
continue;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
directories.push(...(await listDirectoriesRecursivelyInternal(childPath, visitedRealDirectories)).slice(1));
|
|
189
217
|
}
|
|
190
218
|
return directories;
|
|
191
219
|
}
|
|
192
220
|
async function inspectDirectoryShape(directoryPath) {
|
|
193
221
|
const entries = await readdir(directoryPath, { withFileTypes: true });
|
|
222
|
+
let hasSkillIndex = false;
|
|
194
223
|
let hasChildDirectories = false;
|
|
195
224
|
let hasExtraMarkdownFiles = false;
|
|
196
225
|
let hasAssetFiles = false;
|
|
@@ -198,16 +227,21 @@ async function inspectDirectoryShape(directoryPath) {
|
|
|
198
227
|
if (entry.name.startsWith('.')) {
|
|
199
228
|
continue;
|
|
200
229
|
}
|
|
201
|
-
|
|
230
|
+
const fullPath = path.join(directoryPath, entry.name);
|
|
231
|
+
const entryStats = await stat(fullPath);
|
|
232
|
+
if (entryStats.isDirectory()) {
|
|
202
233
|
hasChildDirectories = true;
|
|
203
234
|
continue;
|
|
204
235
|
}
|
|
205
|
-
if (!
|
|
236
|
+
if (!entryStats.isFile()) {
|
|
206
237
|
continue;
|
|
207
238
|
}
|
|
208
239
|
const extension = path.extname(entry.name).toLowerCase();
|
|
209
240
|
if (extension === '.md') {
|
|
210
|
-
if (entry.name
|
|
241
|
+
if (entry.name === 'SKILL.md') {
|
|
242
|
+
hasSkillIndex = true;
|
|
243
|
+
}
|
|
244
|
+
else if (entry.name !== 'index.md' && entry.name !== 'README.md') {
|
|
211
245
|
hasExtraMarkdownFiles = true;
|
|
212
246
|
}
|
|
213
247
|
continue;
|
|
@@ -215,6 +249,7 @@ async function inspectDirectoryShape(directoryPath) {
|
|
|
215
249
|
hasAssetFiles = true;
|
|
216
250
|
}
|
|
217
251
|
return {
|
|
252
|
+
hasSkillIndex,
|
|
218
253
|
hasChildDirectories,
|
|
219
254
|
hasExtraMarkdownFiles,
|
|
220
255
|
hasAssetFiles,
|
|
@@ -262,22 +297,6 @@ function compareOptionalOrder(leftOrder, rightOrder) {
|
|
|
262
297
|
}
|
|
263
298
|
return 0;
|
|
264
299
|
}
|
|
265
|
-
function extractFirstParagraph(markdown) {
|
|
266
|
-
const paragraphs = markdown
|
|
267
|
-
.split(/\n\s*\n/g)
|
|
268
|
-
.map((paragraph) => paragraph.trim())
|
|
269
|
-
.filter((paragraph) => paragraph !== '');
|
|
270
|
-
for (const paragraph of paragraphs) {
|
|
271
|
-
if (paragraph.startsWith('#') ||
|
|
272
|
-
paragraph.startsWith('<!--') ||
|
|
273
|
-
paragraph.startsWith('- ') ||
|
|
274
|
-
paragraph.startsWith('* ')) {
|
|
275
|
-
continue;
|
|
276
|
-
}
|
|
277
|
-
return paragraph.replace(/\s+/g, ' ');
|
|
278
|
-
}
|
|
279
|
-
return undefined;
|
|
280
|
-
}
|
|
281
300
|
async function resolveDirectoryIndexFile(directoryPath) {
|
|
282
301
|
for (const candidate of getDirectoryIndexCandidates('')) {
|
|
283
302
|
const candidatePath = path.join(directoryPath, candidate);
|
|
@@ -287,6 +306,12 @@ async function resolveDirectoryIndexFile(directoryPath) {
|
|
|
287
306
|
}
|
|
288
307
|
return null;
|
|
289
308
|
}
|
|
309
|
+
function isIgnoredSkillSupportDirectory(name) {
|
|
310
|
+
return (name === 'scripts' ||
|
|
311
|
+
name === 'references' ||
|
|
312
|
+
name === 'assets' ||
|
|
313
|
+
name === 'templates');
|
|
314
|
+
}
|
|
290
315
|
async function pathExists(filePath) {
|
|
291
316
|
try {
|
|
292
317
|
await stat(filePath);
|
package/dist/search.d.ts
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import type { ResolvedSiteConfig } from './core/site-config.js';
|
|
2
|
+
type JsonValue = null | boolean | number | string | JsonValue[] | {
|
|
3
|
+
[key: string]: JsonValue;
|
|
4
|
+
};
|
|
5
|
+
export interface SearchHit {
|
|
6
|
+
docId: string;
|
|
7
|
+
relativePath: string;
|
|
8
|
+
canonicalUrl?: string;
|
|
9
|
+
title?: string;
|
|
10
|
+
summary?: string;
|
|
11
|
+
metadata: Record<string, JsonValue>;
|
|
12
|
+
score: number;
|
|
13
|
+
bestMatch: {
|
|
14
|
+
chunkId: number;
|
|
15
|
+
excerpt: string;
|
|
16
|
+
headingPath: string[];
|
|
17
|
+
charStart: number;
|
|
18
|
+
charEnd: number;
|
|
19
|
+
score: number;
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
export interface SearchApi {
|
|
23
|
+
search(query: string, options?: {
|
|
24
|
+
topK?: number;
|
|
25
|
+
relativePathPrefix?: string;
|
|
26
|
+
}): Promise<SearchHit[]>;
|
|
27
|
+
}
|
|
28
|
+
export interface SearchBundleEntry {
|
|
29
|
+
path: string;
|
|
30
|
+
kind: 'text' | 'binary';
|
|
31
|
+
mediaType: string;
|
|
32
|
+
text?: string;
|
|
33
|
+
base64?: string;
|
|
34
|
+
}
|
|
35
|
+
export interface BuildSearchBundleOptions {
|
|
36
|
+
rootDir: string;
|
|
37
|
+
outDir: string;
|
|
38
|
+
siteConfig: ResolvedSiteConfig;
|
|
39
|
+
draftMode?: 'include' | 'exclude';
|
|
40
|
+
embeddingBackend?: 'hashing' | 'model2vec';
|
|
41
|
+
model?: string;
|
|
42
|
+
}
|
|
43
|
+
export interface BuildSearchBundleResult {
|
|
44
|
+
outputDir: string;
|
|
45
|
+
documentCount: number;
|
|
46
|
+
chunkCount: number;
|
|
47
|
+
vectorDimensions: number;
|
|
48
|
+
}
|
|
49
|
+
export interface SearchBundleOptions {
|
|
50
|
+
indexDir: string;
|
|
51
|
+
query: string;
|
|
52
|
+
topK?: number;
|
|
53
|
+
relativePathPrefix?: string;
|
|
54
|
+
}
|
|
55
|
+
export declare function buildSearchBundle(options: BuildSearchBundleOptions): Promise<BuildSearchBundleResult>;
|
|
56
|
+
export declare function searchBundle(options: SearchBundleOptions): Promise<SearchHit[]>;
|
|
57
|
+
export declare function createSearchApiFromDirectory(indexDir: string): Promise<SearchApi>;
|
|
58
|
+
export declare function createSearchApiFromBundle(bundleEntries: SearchBundleEntry[]): SearchApi;
|
|
59
|
+
export {};
|
package/dist/search.js
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
import { readdir, readFile, realpath, stat } from 'node:fs/promises';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { inferDirectoryContentType } from './core/content-type.js';
|
|
4
|
+
import { getDirectoryIndexCandidates } from './core/directory-index.js';
|
|
5
|
+
import { getDocumentSummary, getDocumentTitle, parseMarkdownDocument, } from './core/markdown.js';
|
|
6
|
+
export async function buildSearchBundle(options) {
|
|
7
|
+
const buildModule = await loadIndexbindBuildModule();
|
|
8
|
+
const rootDir = path.resolve(options.rootDir);
|
|
9
|
+
const documents = await collectSearchDocuments(rootDir, options.siteConfig, {
|
|
10
|
+
draftMode: options.draftMode ?? 'exclude',
|
|
11
|
+
});
|
|
12
|
+
const stats = await buildModule.buildCanonicalBundle(path.resolve(options.outDir), documents, {
|
|
13
|
+
embeddingBackend: options.embeddingBackend ?? 'model2vec',
|
|
14
|
+
model: options.model,
|
|
15
|
+
sourceRootId: path.basename(rootDir),
|
|
16
|
+
sourceRootPath: rootDir,
|
|
17
|
+
});
|
|
18
|
+
return {
|
|
19
|
+
outputDir: path.resolve(options.outDir),
|
|
20
|
+
documentCount: stats.documentCount,
|
|
21
|
+
chunkCount: stats.chunkCount,
|
|
22
|
+
vectorDimensions: stats.vectorDimensions,
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
export async function searchBundle(options) {
|
|
26
|
+
const webModule = await loadIndexbindWebModule();
|
|
27
|
+
const index = await webModule.openWebIndex(path.resolve(options.indexDir));
|
|
28
|
+
return rerankSearchHits(await index.search(options.query, {
|
|
29
|
+
topK: options.topK ?? 10,
|
|
30
|
+
relativePathPrefix: options.relativePathPrefix,
|
|
31
|
+
}));
|
|
32
|
+
}
|
|
33
|
+
export async function createSearchApiFromDirectory(indexDir) {
|
|
34
|
+
const webModule = await loadIndexbindWebModule();
|
|
35
|
+
const index = await webModule.openWebIndex(path.resolve(indexDir));
|
|
36
|
+
return {
|
|
37
|
+
async search(query, options) {
|
|
38
|
+
return rerankSearchHits(await index.search(query, {
|
|
39
|
+
topK: options?.topK,
|
|
40
|
+
relativePathPrefix: options?.relativePathPrefix,
|
|
41
|
+
}));
|
|
42
|
+
},
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
export function createSearchApiFromBundle(bundleEntries) {
|
|
46
|
+
let indexPromise = null;
|
|
47
|
+
return {
|
|
48
|
+
async search(query, options) {
|
|
49
|
+
if (indexPromise === null) {
|
|
50
|
+
indexPromise = openWebIndexFromBundle(bundleEntries);
|
|
51
|
+
}
|
|
52
|
+
const index = await indexPromise;
|
|
53
|
+
return rerankSearchHits(await index.search(query, {
|
|
54
|
+
topK: options?.topK,
|
|
55
|
+
relativePathPrefix: options?.relativePathPrefix,
|
|
56
|
+
}));
|
|
57
|
+
},
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
async function collectSearchDocuments(rootDir, siteConfig, options) {
|
|
61
|
+
const searchDocuments = await listSearchDocuments(rootDir);
|
|
62
|
+
const documents = [];
|
|
63
|
+
for (const document of searchDocuments) {
|
|
64
|
+
const markdown = await readFile(document.absolutePath, 'utf8');
|
|
65
|
+
const parsed = await parseMarkdownDocument(document.relativePath, markdown);
|
|
66
|
+
if (parsed.meta.draft === true && options.draftMode === 'exclude') {
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
const canonicalPath = getCanonicalHtmlPathForContentPath(document.relativePath);
|
|
70
|
+
const absoluteCanonicalUrl = siteConfig.siteUrl
|
|
71
|
+
? new URL(trimLeadingSlash(canonicalPath), ensureTrailingSlash(siteConfig.siteUrl)).toString()
|
|
72
|
+
: canonicalPath;
|
|
73
|
+
documents.push({
|
|
74
|
+
docId: canonicalPath,
|
|
75
|
+
sourcePath: document.absolutePath,
|
|
76
|
+
relativePath: document.relativePath,
|
|
77
|
+
canonicalUrl: absoluteCanonicalUrl,
|
|
78
|
+
title: getDocumentTitle(parsed.meta, parsed.body, fallbackTitleFromRelativePath(document.relativePath)),
|
|
79
|
+
summary: getDocumentSummary(parsed.meta, parsed.body),
|
|
80
|
+
content: markdown,
|
|
81
|
+
metadata: buildSearchMetadata(document.relativePath, canonicalPath, parsed.meta, siteConfig),
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
return documents;
|
|
85
|
+
}
|
|
86
|
+
async function listSearchDocuments(rootDir) {
|
|
87
|
+
const results = [];
|
|
88
|
+
await walkDirectory(rootDir, '', new Set(), results);
|
|
89
|
+
results.sort((left, right) => left.relativePath.localeCompare(right.relativePath));
|
|
90
|
+
return results;
|
|
91
|
+
}
|
|
92
|
+
async function walkDirectory(absoluteDirectoryPath, relativeDirectoryPath, visitedRealDirectories, results) {
|
|
93
|
+
const realDirectoryPath = await realpath(absoluteDirectoryPath);
|
|
94
|
+
if (visitedRealDirectories.has(realDirectoryPath)) {
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
visitedRealDirectories.add(realDirectoryPath);
|
|
98
|
+
const entries = await readdir(absoluteDirectoryPath, { withFileTypes: true });
|
|
99
|
+
const shape = await inspectDirectoryShape(absoluteDirectoryPath);
|
|
100
|
+
const indexFile = await resolveVisibleDirectoryIndexFile(absoluteDirectoryPath, relativeDirectoryPath);
|
|
101
|
+
if (indexFile) {
|
|
102
|
+
results.push(indexFile);
|
|
103
|
+
}
|
|
104
|
+
for (const entry of entries) {
|
|
105
|
+
if (entry.name.startsWith('.')) {
|
|
106
|
+
continue;
|
|
107
|
+
}
|
|
108
|
+
const absoluteEntryPath = path.join(absoluteDirectoryPath, entry.name);
|
|
109
|
+
const entryStats = await stat(absoluteEntryPath);
|
|
110
|
+
const relativeEntryPath = relativeDirectoryPath === ''
|
|
111
|
+
? entry.name
|
|
112
|
+
: path.posix.join(relativeDirectoryPath, entry.name);
|
|
113
|
+
if (entryStats.isFile()) {
|
|
114
|
+
if (path.posix.extname(entry.name).toLowerCase() !== '.md') {
|
|
115
|
+
continue;
|
|
116
|
+
}
|
|
117
|
+
if (DIRECTORY_INDEX_FILENAMES_LOWER.has(entry.name.toLowerCase())) {
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
results.push({
|
|
121
|
+
absolutePath: absoluteEntryPath,
|
|
122
|
+
relativePath: relativeEntryPath,
|
|
123
|
+
});
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
if (!entryStats.isDirectory()) {
|
|
127
|
+
continue;
|
|
128
|
+
}
|
|
129
|
+
if (shape.hasSkillIndex && isIgnoredSkillSupportDirectory(entry.name)) {
|
|
130
|
+
continue;
|
|
131
|
+
}
|
|
132
|
+
if (indexFile) {
|
|
133
|
+
const source = await readFile(indexFile.absolutePath, 'utf8');
|
|
134
|
+
const parsed = await parseMarkdownDocument(indexFile.relativePath, source);
|
|
135
|
+
if (inferDirectoryContentType(parsed.meta, shape) === 'post') {
|
|
136
|
+
continue;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
await walkDirectory(absoluteEntryPath, relativeEntryPath, visitedRealDirectories, results);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
async function resolveVisibleDirectoryIndexFile(absoluteDirectoryPath, relativeDirectoryPath) {
|
|
143
|
+
for (const candidate of getDirectoryIndexCandidates('')) {
|
|
144
|
+
const absoluteCandidatePath = path.join(absoluteDirectoryPath, candidate);
|
|
145
|
+
if (await pathExists(absoluteCandidatePath)) {
|
|
146
|
+
return {
|
|
147
|
+
absolutePath: absoluteCandidatePath,
|
|
148
|
+
relativePath: relativeDirectoryPath === ''
|
|
149
|
+
? candidate
|
|
150
|
+
: path.posix.join(relativeDirectoryPath, candidate),
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
return null;
|
|
155
|
+
}
|
|
156
|
+
async function inspectDirectoryShape(directoryPath) {
|
|
157
|
+
const entries = await readdir(directoryPath, { withFileTypes: true });
|
|
158
|
+
let hasSkillIndex = false;
|
|
159
|
+
let hasChildDirectories = false;
|
|
160
|
+
let hasExtraMarkdownFiles = false;
|
|
161
|
+
let hasAssetFiles = false;
|
|
162
|
+
for (const entry of entries) {
|
|
163
|
+
if (entry.name.startsWith('.')) {
|
|
164
|
+
continue;
|
|
165
|
+
}
|
|
166
|
+
const absoluteEntryPath = path.join(directoryPath, entry.name);
|
|
167
|
+
const entryStats = await stat(absoluteEntryPath);
|
|
168
|
+
if (entryStats.isDirectory()) {
|
|
169
|
+
hasChildDirectories = true;
|
|
170
|
+
continue;
|
|
171
|
+
}
|
|
172
|
+
if (!entryStats.isFile()) {
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
const extension = path.extname(entry.name).toLowerCase();
|
|
176
|
+
if (extension === '.md') {
|
|
177
|
+
if (entry.name === 'SKILL.md') {
|
|
178
|
+
hasSkillIndex = true;
|
|
179
|
+
}
|
|
180
|
+
else if (entry.name !== 'index.md' && entry.name !== 'README.md') {
|
|
181
|
+
hasExtraMarkdownFiles = true;
|
|
182
|
+
}
|
|
183
|
+
continue;
|
|
184
|
+
}
|
|
185
|
+
hasAssetFiles = true;
|
|
186
|
+
}
|
|
187
|
+
return {
|
|
188
|
+
hasSkillIndex,
|
|
189
|
+
hasChildDirectories,
|
|
190
|
+
hasExtraMarkdownFiles,
|
|
191
|
+
hasAssetFiles,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
function buildSearchMetadata(relativePath, canonicalPath, meta, siteConfig) {
|
|
195
|
+
const metadata = {
|
|
196
|
+
markdownPath: `/${relativePath}`,
|
|
197
|
+
canonicalPath,
|
|
198
|
+
siteTitle: siteConfig.siteTitle,
|
|
199
|
+
};
|
|
200
|
+
if (meta.type === 'page' || meta.type === 'post') {
|
|
201
|
+
metadata.type = meta.type;
|
|
202
|
+
}
|
|
203
|
+
if (typeof meta.date === 'string') {
|
|
204
|
+
metadata.date = meta.date;
|
|
205
|
+
}
|
|
206
|
+
if (typeof meta.order === 'number') {
|
|
207
|
+
metadata.order = meta.order;
|
|
208
|
+
}
|
|
209
|
+
if (Array.isArray(meta.aliases)) {
|
|
210
|
+
metadata.aliases = meta.aliases;
|
|
211
|
+
}
|
|
212
|
+
return metadata;
|
|
213
|
+
}
|
|
214
|
+
function fallbackTitleFromRelativePath(relativePath) {
|
|
215
|
+
const baseName = path.posix.basename(relativePath);
|
|
216
|
+
if (DIRECTORY_INDEX_FILENAMES_LOWER.has(baseName.toLowerCase())) {
|
|
217
|
+
return path.posix.basename(path.posix.dirname(relativePath));
|
|
218
|
+
}
|
|
219
|
+
return baseName.replace(/\.md$/i, '');
|
|
220
|
+
}
|
|
221
|
+
function getCanonicalHtmlPathForContentPath(contentPath) {
|
|
222
|
+
const basename = path.posix.basename(contentPath).toLowerCase();
|
|
223
|
+
if (basename === 'index.md' ||
|
|
224
|
+
basename === 'readme.md' ||
|
|
225
|
+
basename === 'skill.md') {
|
|
226
|
+
const directory = path.posix.dirname(contentPath);
|
|
227
|
+
return directory === '.' ? '/' : `/${directory}/`;
|
|
228
|
+
}
|
|
229
|
+
return `/${contentPath.slice(0, -'.md'.length)}`;
|
|
230
|
+
}
|
|
231
|
+
function trimLeadingSlash(value) {
|
|
232
|
+
return value.startsWith('/') ? value.slice(1) : value;
|
|
233
|
+
}
|
|
234
|
+
function ensureTrailingSlash(value) {
|
|
235
|
+
return value.endsWith('/') ? value : `${value}/`;
|
|
236
|
+
}
|
|
237
|
+
async function pathExists(filePath) {
|
|
238
|
+
try {
|
|
239
|
+
await stat(filePath);
|
|
240
|
+
return true;
|
|
241
|
+
}
|
|
242
|
+
catch (error) {
|
|
243
|
+
if (typeof error === 'object' &&
|
|
244
|
+
error !== null &&
|
|
245
|
+
'code' in error &&
|
|
246
|
+
error.code === 'ENOENT') {
|
|
247
|
+
return false;
|
|
248
|
+
}
|
|
249
|
+
throw error;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
function isIgnoredSkillSupportDirectory(name) {
|
|
253
|
+
return (name === 'scripts' ||
|
|
254
|
+
name === 'references' ||
|
|
255
|
+
name === 'assets' ||
|
|
256
|
+
name === 'templates');
|
|
257
|
+
}
|
|
258
|
+
async function loadIndexbindBuildModule() {
|
|
259
|
+
try {
|
|
260
|
+
return (await import('indexbind/build'));
|
|
261
|
+
}
|
|
262
|
+
catch (error) {
|
|
263
|
+
throw new Error(`Search build requires the optional package "indexbind". Install it first, for example: npm install indexbind`, { cause: error instanceof Error ? error : undefined });
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
async function loadIndexbindWebModule() {
|
|
267
|
+
try {
|
|
268
|
+
return (await import('indexbind/web'));
|
|
269
|
+
}
|
|
270
|
+
catch (error) {
|
|
271
|
+
throw new Error(`Search query requires the optional package "indexbind". Install it first, for example: npm install indexbind`, { cause: error instanceof Error ? error : undefined });
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
async function loadIndexbindCloudflareModule() {
|
|
275
|
+
try {
|
|
276
|
+
return (await import('indexbind/cloudflare'));
|
|
277
|
+
}
|
|
278
|
+
catch (error) {
|
|
279
|
+
throw new Error(`Search query requires the optional package "indexbind". Install it first, for example: npm install indexbind`, { cause: error instanceof Error ? error : undefined });
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
async function openWebIndexFromBundle(bundleEntries) {
|
|
283
|
+
const cloudflareModule = await loadIndexbindCloudflareModule();
|
|
284
|
+
const baseUrl = 'https://mdorigin-search.invalid/';
|
|
285
|
+
const originalFetch = globalThis.fetch;
|
|
286
|
+
const bundleMap = new Map(bundleEntries.map((entry) => [new URL(entry.path, baseUrl).toString(), entry]));
|
|
287
|
+
globalThis.fetch = async (input, init) => {
|
|
288
|
+
const requestUrl = typeof input === 'string'
|
|
289
|
+
? input
|
|
290
|
+
: input instanceof URL
|
|
291
|
+
? input.toString()
|
|
292
|
+
: input.url;
|
|
293
|
+
const entry = bundleMap.get(requestUrl);
|
|
294
|
+
if (entry) {
|
|
295
|
+
const headers = new Headers({ 'content-type': entry.mediaType });
|
|
296
|
+
const binaryBody = decodeBase64(entry.base64 ?? '');
|
|
297
|
+
const body = entry.kind === 'text'
|
|
298
|
+
? entry.text ?? ''
|
|
299
|
+
: new Blob([new Uint8Array(binaryBody)], {
|
|
300
|
+
type: entry.mediaType,
|
|
301
|
+
});
|
|
302
|
+
return new Response(body, {
|
|
303
|
+
status: 200,
|
|
304
|
+
headers,
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
return originalFetch(input, init);
|
|
308
|
+
};
|
|
309
|
+
try {
|
|
310
|
+
return await cloudflareModule.openWebIndex(new URL(baseUrl));
|
|
311
|
+
}
|
|
312
|
+
finally {
|
|
313
|
+
globalThis.fetch = originalFetch;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
function decodeBase64(value) {
|
|
317
|
+
const decoded = atob(value);
|
|
318
|
+
return Uint8Array.from(decoded, (character) => character.charCodeAt(0));
|
|
319
|
+
}
|
|
320
|
+
function rerankSearchHits(hits) {
|
|
321
|
+
const remaining = [...hits];
|
|
322
|
+
const ordered = [];
|
|
323
|
+
while (remaining.length > 0) {
|
|
324
|
+
let bestIndex = 0;
|
|
325
|
+
let bestScore = Number.NEGATIVE_INFINITY;
|
|
326
|
+
for (let index = 0; index < remaining.length; index += 1) {
|
|
327
|
+
const candidate = remaining[index];
|
|
328
|
+
const candidateScore = getDiversifiedSearchRankScore(candidate, ordered);
|
|
329
|
+
if (candidateScore > bestScore) {
|
|
330
|
+
bestScore = candidateScore;
|
|
331
|
+
bestIndex = index;
|
|
332
|
+
continue;
|
|
333
|
+
}
|
|
334
|
+
if (Math.abs(candidateScore - bestScore) <= 1e-9 &&
|
|
335
|
+
compareHits(candidate, remaining[bestIndex]) < 0) {
|
|
336
|
+
bestIndex = index;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
ordered.push(remaining.splice(bestIndex, 1)[0]);
|
|
340
|
+
}
|
|
341
|
+
return ordered;
|
|
342
|
+
}
|
|
343
|
+
function getDiversifiedSearchRankScore(hit, selectedHits) {
|
|
344
|
+
let adjusted = hit.score;
|
|
345
|
+
const candidateSection = getTopLevelSection(hit.relativePath);
|
|
346
|
+
const sameSectionCount = selectedHits.filter((selectedHit) => getTopLevelSection(selectedHit.relativePath) === candidateSection).length;
|
|
347
|
+
if (sameSectionCount > 0) {
|
|
348
|
+
adjusted *= Math.pow(0.9, sameSectionCount);
|
|
349
|
+
}
|
|
350
|
+
if (isOverviewSearchHit(hit)) {
|
|
351
|
+
adjusted *= sameSectionCount > 0 ? 0.72 : 0.84;
|
|
352
|
+
}
|
|
353
|
+
return adjusted;
|
|
354
|
+
}
|
|
355
|
+
function compareHits(left, right) {
|
|
356
|
+
if (right.bestMatch.score !== left.bestMatch.score) {
|
|
357
|
+
return right.bestMatch.score - left.bestMatch.score;
|
|
358
|
+
}
|
|
359
|
+
return left.relativePath.localeCompare(right.relativePath);
|
|
360
|
+
}
|
|
361
|
+
function isOverviewSearchHit(hit) {
|
|
362
|
+
const baseName = path.posix.basename(hit.relativePath).toLowerCase();
|
|
363
|
+
return baseName === 'readme.md' || baseName === 'index.md';
|
|
364
|
+
}
|
|
365
|
+
function getTopLevelSection(relativePath) {
|
|
366
|
+
const normalized = relativePath.replaceAll('\\', '/');
|
|
367
|
+
const [firstSegment] = normalized.split('/', 1);
|
|
368
|
+
return firstSegment ?? '';
|
|
369
|
+
}
|
|
370
|
+
const DIRECTORY_INDEX_FILENAMES_LOWER = new Set(getDirectoryIndexCandidates('').map((name) => name.toLowerCase()));
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mdorigin",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Markdown-first publishing engine with raw Markdown and HTML views.",
|
|
6
6
|
"repository": {
|
|
@@ -22,6 +22,10 @@
|
|
|
22
22
|
"mdorigin": "dist/cli/main.js"
|
|
23
23
|
},
|
|
24
24
|
"exports": {
|
|
25
|
+
"./search": {
|
|
26
|
+
"types": "./dist/search.d.ts",
|
|
27
|
+
"default": "./dist/search.js"
|
|
28
|
+
},
|
|
25
29
|
"./cloudflare": {
|
|
26
30
|
"types": "./dist/cloudflare.d.ts",
|
|
27
31
|
"default": "./dist/cloudflare.js"
|
|
@@ -39,8 +43,10 @@
|
|
|
39
43
|
"dev": "tsx src/cli/main.ts dev",
|
|
40
44
|
"build": "node -e \"require('node:fs').rmSync('dist', { recursive: true, force: true })\" && tsc -p tsconfig.json && node -e \"const fs=require('node:fs'); const file='dist/cli/main.js'; const source=fs.readFileSync(file,'utf8'); if (!source.startsWith('#!/usr/bin/env node\\n')) fs.writeFileSync(file, '#!/usr/bin/env node\\n' + source);\"",
|
|
41
45
|
"build:index": "tsx src/cli/main.ts build index",
|
|
46
|
+
"build:search": "tsx src/cli/main.ts build search",
|
|
42
47
|
"build:cloudflare": "tsx src/cli/main.ts build cloudflare",
|
|
43
48
|
"init:cloudflare": "tsx src/cli/main.ts init cloudflare",
|
|
49
|
+
"search": "tsx src/cli/main.ts search",
|
|
44
50
|
"check": "tsc --noEmit -p tsconfig.json",
|
|
45
51
|
"test": "node --test --import tsx src/*.test.ts src/**/*.test.ts",
|
|
46
52
|
"prepack": "npm run build",
|
|
@@ -62,5 +68,8 @@
|
|
|
62
68
|
"@types/node": "^24.5.2",
|
|
63
69
|
"tsx": "^4.20.5",
|
|
64
70
|
"typescript": "^5.9.2"
|
|
71
|
+
},
|
|
72
|
+
"optionalDependencies": {
|
|
73
|
+
"indexbind": "^0.2.1"
|
|
65
74
|
}
|
|
66
75
|
}
|