npm - @huyooo/ai-search - Versions diffs - 0.2.1 - Mend

@huyooo/ai-search 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +431 -0
package/dist/bridge/electron.d.ts +51 -0
package/dist/bridge/electron.js +10 -0
package/dist/bridge/electron.js.map +1 -0
package/dist/chunk-GAT4F5NK.js +176 -0
package/dist/chunk-GAT4F5NK.js.map +1 -0
package/dist/chunk-YJIIX54F.js +4239 -0
package/dist/chunk-YJIIX54F.js.map +1 -0
package/dist/index-B6UR8lRu.d.ts +576 -0
package/dist/index.d.ts +508 -0
package/dist/index.js +99 -0
package/dist/index.js.map +1 -0
package/dist/tools/index.d.ts +2 -0
package/dist/tools/index.js +9 -0
package/dist/tools/index.js.map +1 -0
package/package.json +89 -0

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,508 @@
+import { I as IndexProgress, F as FileType, a as IndexedDocument, b as IndexStats } from './index-B6UR8lRu.js';
+export { f as BackupInfo, B as BatchOperationResult, C as ChunkOptions, h as DEFAULT_CONFIG, D as DocumentSearch, E as ExportInfo, H as HealthCheckResult, g as IndexError, j as IndexingPipeline, P as PipelineConfig, k as PipelineStats, S as SearchConfig, c as SearchOptions, q as SearchPluginInstance, p as SearchPluginOptions, d as SearchResult, l as SkipCheckCallback, T as TextChunk, e as WatchEvent, W as WatchOptions, i as createIndexingPipeline, m as getChunkStats, o as getSearchPlugin, n as searchPlugin, s as splitText } from './index-B6UR8lRu.js';
+export { SearchElectronBridge, SearchElectronBridgeOptions, createSearchElectronBridge } from './bridge/electron.js';
+export { Tool, ToolContext, ToolPlugin } from '@huyooo/ai-chat-core';
+import 'electron';
+/**
+ * 全局进度监听器
+ *
+ * 所有 DocumentSearch.indexDirectory 调用都会触发这些监听器
+ * 用于 Electron 桥接等场景，无需包装方法，解决时序问题
+ *
+ * 使用 globalThis 确保在 monorepo 多包场景下只有一个实例
+ */
+/** 全局进度监听器类型 */
+type GlobalProgressListener = (progress: IndexProgress) => void;
+/** 添加全局进度监听器 */
+declare function addGlobalProgressListener(listener: GlobalProgressListener): void;
+/** 移除全局进度监听器 */
+declare function removeGlobalProgressListener(listener: GlobalProgressListener): void;
+/**
+ * 文件扩展名规则
+ */
+interface ExtensionRules {
+    /** 支持的文档扩展名列表 */
+    allowed: string[];
+    /** 排除的扩展名列表（即使在其他地方允许） */
+    excluded: string[];
+}
+/**
+ * 目录排除规则
+ */
+interface DirectoryRules {
+    /** 排除的目录名（不区分大小写） */
+    excludedNames: string[];
+    /** 排除的目录路径（完整路径或相对路径） */
+    excludedPaths: string[];
+    /** Windows 系统目录 */
+    windowsSystemDirs: string[];
+    /** 构建和缓存目录 */
+    buildDirs: string[];
+    /** 资源目录（通常包含图标等，不是文档） */
+    resourceDirs: string[];
+}
+/**
+ * 文件排除规则
+ */
+interface FileRules {
+    /** 是否排除隐藏文件（以 . 开头） */
+    excludeHidden: boolean;
+    /** 排除的文件名模式（支持通配符） */
+    excludedPatterns: string[];
+    /** 排除的路径包含模式（路径中包含这些字符串的文件将被排除） */
+    excludedPathContains: string[];
+}
+/**
+ * 路径匹配规则
+ */
+interface PathRules {
+    /** Windows 平台特殊规则 */
+    windows: {
+        /** 是否只扫描用户目录 */
+        onlyUserDirs: boolean;
+        /** 排除的系统盘根目录下的目录 */
+        excludedRootDirs: string[];
+    };
+}
+/**
+ * 扫描规则配置
+ */
+interface ScanRules {
+    extensions: ExtensionRules;
+    directories: DirectoryRules;
+    files: FileRules;
+    paths: PathRules;
+}
+/**
+ * 默认文件扩展名规则
+ */
+declare const DEFAULT_EXTENSION_RULES: ExtensionRules;
+/**
+ * 默认目录排除规则
+ * 注意：所有以 . 开头的隐藏目录会自动被排除（在 shouldExcludeDirectory 中处理）
+ * 这里只列出非隐藏的目录名
+ */
+declare const DEFAULT_DIRECTORY_RULES: DirectoryRules;
+/**
+ * 默认文件排除规则
+ * 注意：所有以 . 开头的隐藏文件会自动被排除（在 shouldExcludeFile 中处理）
+ */
+declare const DEFAULT_FILE_RULES: FileRules;
+/**
+ * 默认路径规则
+ */
+declare const DEFAULT_PATH_RULES: PathRules;
+/**
+ * 默认扫描规则
+ */
+declare const DEFAULT_SCAN_RULES: ScanRules;
+/**
+ * 规则管理器类
+ * 提供统一的规则检查和匹配接口
+ */
+declare class ScanRulesManager {
+    private rules;
+    private excludedDirNamesSet;
+    private allowedExtensionsSet;
+    private excludedExtensionsSet;
+    constructor(rules?: ScanRules);
+    /**
+     * 检查文件扩展名是否允许
+     */
+    isExtensionAllowed(ext: string): boolean;
+    /**
+     * 检查目录是否应该被排除（用于 fdir 的 exclude 方法）
+     * 这是性能优化的关键：在进入目录之前就排除，避免扫描目录内的文件
+     */
+    shouldExcludeDirectory(dirName: string): boolean;
+    /**
+     * 检查文件是否应该被排除
+     * 注意：隐藏目录已经在 shouldExcludeDirectory 中被排除，不会进入这里
+     * 这里主要处理隐藏文件和路径模式匹配
+     */
+    shouldExcludeFile(filePath: string, fileName: string): boolean;
+    /**
+     * Windows 平台路径排除检查
+     */
+    private shouldExcludeWindowsPath;
+    /**
+     * 获取所有排除的目录名（用于 fdir exclude）
+     */
+    getExcludedDirectoryNames(): Set<string>;
+    /**
+     * 更新规则（允许运行时修改）
+     */
+    updateRules(newRules: Partial<ScanRules>): void;
+    /**
+     * 获取当前规则配置
+     */
+    getRules(): Readonly<ScanRules>;
+}
+/**
+ * 创建规则管理器实例
+ */
+declare function createRulesManager(customRules?: Partial<ScanRules>): ScanRulesManager;
+interface ScanConfig {
+    /** 扫描目录 */
+    directories: string[];
+    /** 排除目录（用户自定义，会合并到规则管理器中） */
+    excludeDirs?: string[];
+    /** 支持的扩展名（用户自定义，会合并到规则管理器中） */
+    extensions?: string[];
+    /** 最大文件大小 */
+    maxFileSize: number;
+    /** 扫描进度回调 */
+    onProgress?: (progress: {
+        scanned: number;
+        currentDir?: string;
+    }) => void;
+    /** 自定义规则（可选，用于覆盖默认规则） */
+    customRules?: Partial<ScanRules>;
+}
+/**
+ * 扫描目录获取所有支持的文档文件
+ */
+declare function scanDirectories(directories: string[], config?: Partial<ScanConfig>): Promise<string[]>;
+/**
+ * 获取默认扫描目录（跨平台，只扫描用户目录，不扫描系统盘）
+ */
+declare function getDefaultDirectories(): string[];
+/**
+ * 格式化文件大小
+ */
+declare function formatSize(bytes: number): string;
+/**
+ * 格式化日期
+ */
+declare function formatDate(date: Date): string;
+/**
+ * 根据扩展名获取文件类型
+ */
+declare function getFileType(filePath: string): FileType;
+/**
+ * 提取文本摘要（用于搜索结果预览）
+ * 优先显示匹配查询词附近的内容
+ */
+declare function extractSnippet(content: string, query: string, maxLength?: number): string;
+interface ParsedDocument {
+    /** 提取的文本内容 */
+    content: string;
+    /** 文档标题（如果能提取） */
+    title?: string;
+    /** 元数据 */
+    metadata?: Record<string, unknown>;
+}
+/**
+ * 根据文件扩展名选择解析器
+ */
+declare function parseDocument(filePath: string): Promise<ParsedDocument>;
+/**
+ * 检查文件是否支持解析
+ */
+declare function isSupportedDocument(filePath: string): boolean;
+/**
+ * 获取文件类型
+ */
+declare function getDocumentType(filePath: string): string;
+interface VectorRecord {
+    id: string;
+    vector: number[];
+}
+interface VectorSearchResult {
+    id: string;
+    distance: number;
+}
+declare class VectorStore {
+    private db;
+    private table;
+    private dbPath;
+    private tableName;
+    private dimension;
+    constructor(dbPath: string, tableName?: string, dimension?: number);
+    /**
+     * 初始化连接
+     */
+    init(): Promise<void>;
+    /**
+     * 添加向量
+     */
+    add(records: VectorRecord[]): Promise<void>;
+    /**
+     * 更新向量（删除后重新添加）
+     */
+    update(record: VectorRecord): Promise<void>;
+    /**
+     * 根据 ID 获取向量
+     */
+    getById(id: string): Promise<number[] | null>;
+    /**
+     * 删除向量
+     */
+    delete(id: string): Promise<void>;
+    /**
+     * 搜索最相似的向量
+     */
+    search(queryVector: number[], limit?: number): Promise<VectorSearchResult[]>;
+    /**
+     * 获取记录数量
+     */
+    count(): Promise<number>;
+    /**
+     * 检查 ID 是否存在
+     */
+    exists(id: string): Promise<boolean>;
+    /**
+     * 关闭连接（清理资源）
+     * 注意：LanceDB 连接会在对象销毁时自动清理，但可以显式重置引用
+     */
+    close(): void;
+}
+interface FullTextRecord {
+    id: string;
+    title: string;
+    content: string;
+}
+interface FullTextSearchResult {
+    id: string;
+    score: number;
+}
+declare class FullTextIndex {
+    private index;
+    private indexPath;
+    private docCount;
+    constructor(dataDir: string);
+    private createIndex;
+    /**
+     * 初始化
+     */
+    init(): Promise<void>;
+    /**
+     * 添加文档
+     */
+    add(record: FullTextRecord): void;
+    /**
+     * 更新文档
+     */
+    update(record: FullTextRecord): void;
+    /**
+     * 删除文档
+     */
+    remove(id: string): void;
+    /**
+     * 搜索
+     */
+    search(query: string, limit?: number): FullTextSearchResult[];
+    /**
+     * 获取文档数量
+     */
+    getDocCount(): number;
+    /**
+     * 根据 ID 获取文档内容
+     */
+    getContent(id: string): string | null;
+    /**
+     * 保存索引
+     */
+    save(): Promise<void>;
+    /**
+     * 加载索引
+     */
+    load(): Promise<void>;
+    /**
+     * 清空索引
+     */
+    clear(): void;
+}
+declare class MetaStore {
+    private db;
+    private dbPath;
+    constructor(dataDir: string);
+    /**
+     * 初始化数据库表
+     */
+    private init;
+    /**
+     * 添加或更新文档
+     */
+    upsert(doc: IndexedDocument): void;
+    /**
+     * 根据 ID 获取文档
+     */
+    getById(id: string): IndexedDocument | null;
+    /**
+     * 根据路径获取文档
+     */
+    getByPath(filePath: string): IndexedDocument | null;
+    /**
+     * 根据路径和修改时间获取文档（更精确的唯一性检查）
+     */
+    getByPathAndTime(filePath: string, modifiedTime: Date): IndexedDocument | null;
+    /**
+     * 根据内容哈希获取文档（用于检测重复内容）
+     */
+    getByHash(contentHash: string): IndexedDocument | null;
+    /**
+     * 根据内容哈希获取所有相同内容的文档路径
+     */
+    getPathsByHash(contentHash: string): string[];
+    /**
+     * 根据 ID 列表获取文档
+     */
+    getByIds(ids: string[]): IndexedDocument[];
+    /**
+     * 删除文档
+     */
+    delete(id: string): void;
+    /**
+     * 根据路径删除文档
+     */
+    deleteByPath(filePath: string): void;
+    /**
+     * 获取所有文档
+     */
+    getAll(): IndexedDocument[];
+    /**
+     * 获取所有文档路径和哈希（用于增量更新）
+     */
+    getAllPathsAndHashes(): Map<string, {
+        id: string;
+        hash: string;
+        modifiedAt: Date;
+    }>;
+    /**
+     * 获取统计信息
+     */
+    getStats(): IndexStats;
+    /**
+     * 清空所有数据
+     */
+    clear(): void;
+    /**
+     * 关闭数据库连接
+     */
+    close(): void;
+    /**
+     * 数据行转换为文档对象
+     */
+    private rowToDocument;
+}
+/**
+ * 文本向量化模块
+ * 使用豆包 doubao-embedding-vision-250615 多模态向量化模型
+ *
+ * 支持：
+ * - 文本向量化（支持 instructions 指令）
+ * - 图片向量化（URL 或 Base64）
+ * - 视频向量化
+ * - 多模态混合输入
+ */
+/** Embedding 选项 */
+interface EmbedOptions {
+    /** 最大文本长度（超出截断） */
+    maxLength?: number;
+    /** 指令（用于指导模型理解输入意图） */
+    instructions?: string;
+}
+/**
+ * 默认指令配置
+ * 用于区分文档索引和查询检索
+ */
+declare const DEFAULT_INSTRUCTIONS: {
+    /** 文档索引指令 */
+    document: string;
+    /** 查询检索指令 */
+    query: string;
+};
+/**
+ * 初始化 Embedding 模型
+ * @param apiKey 豆包 API Key（ARK_API_KEY）
+ * @param modelName 模型名称（可选）
+ * @param dimensions 向量维度（可选，默认 1024）
+ */
+declare function initEmbedder(apiKey?: string, modelName?: string, dimensions?: number): Promise<void>;
+declare function embed(text: string, options?: EmbedOptions): Promise<number[]>;
+/**
+ * 生成文档向量（用于索引）
+ * 自动使用文档指令
+ */
+declare function embedDocument(text: string, maxLength?: number): Promise<number[]>;
+/**
+ * 生成查询向量（用于搜索）
+ * 自动使用查询指令
+ */
+declare function embedQuery(text: string, maxLength?: number): Promise<number[]>;
+/**
+ * 批量生成文档向量（串行处理，避免并发限制）
+ * @param texts 文本数组
+ * @param maxLength 最大文本长度
+ * @returns 向量数组的数组
+ */
+declare function embedBatch(texts: string[], maxLength?: number): Promise<number[][]>;
+/**
+ * 并发批量生成文档向量（适合大量数据）
+ * @param texts 文本数组
+ * @param concurrency 并发数（默认 5）
+ * @param maxLength 最大文本长度
+ * @returns 向量数组的数组
+ */
+declare function embedBatchConcurrent(texts: string[], concurrency?: number, maxLength?: number): Promise<number[][]>;
+/**
+ * 生成图片向量
+ * @param imageUrl 图片 URL 或 Base64 编码（格式: data:image/jpeg;base64,xxx）
+ * @returns 向量数组
+ */
+declare function embedImage(imageUrl: string): Promise<number[]>;
+/**
+ * 生成视频向量
+ * @param videoUrl 视频 URL 或 Base64 编码
+ * @returns 向量数组
+ */
+declare function embedVideo(videoUrl: string): Promise<number[]>;
+/**
+ * 多模态混合向量化
+ * @param inputs 混合输入数组
+ * @returns 向量数组
+ */
+declare function embedMultimodal(inputs: Array<{
+    type: 'text';
+    text: string;
+} | {
+    type: 'image_url';
+    image_url: {
+        url: string;
+    };
+} | {
+    type: 'video_url';
+    video_url: {
+        url: string;
+    };
+}>): Promise<number[]>;
+/**
+ * 获取向量维度
+ */
+declare function getEmbeddingDimension(): number;
+/**
+ * 设置向量维度
+ * @param dimension 新的维度值
+ */
+declare function setEmbeddingDimension(dimension: number): void;
+/**
+ * 释放资源（保持 API 兼容）
+ */
+declare function disposeEmbedder(): void;
+/**
+ * 检查是否已初始化
+ */
+declare function isEmbedderInitialized(): boolean;
+export { DEFAULT_DIRECTORY_RULES, DEFAULT_EXTENSION_RULES, DEFAULT_FILE_RULES, DEFAULT_INSTRUCTIONS, DEFAULT_PATH_RULES, DEFAULT_SCAN_RULES, type DirectoryRules, type EmbedOptions, type ExtensionRules, type FileRules, FileType, FullTextIndex, type GlobalProgressListener, IndexProgress, IndexStats, IndexedDocument, MetaStore, type PathRules, type ScanRules, ScanRulesManager, VectorStore, addGlobalProgressListener, createRulesManager, disposeEmbedder, embed, embedBatch, embedBatchConcurrent, embedDocument, embedImage, embedMultimodal, embedQuery, embedVideo, extractSnippet, formatDate, formatSize, getDefaultDirectories, getDocumentType, getEmbeddingDimension, getFileType, initEmbedder, isEmbedderInitialized, isSupportedDocument, parseDocument, removeGlobalProgressListener, scanDirectories, setEmbeddingDimension };

package/dist/index.js ADDED Viewed

@@ -0,0 +1,99 @@
+import {
+  SearchElectronBridge,
+  createSearchElectronBridge
+} from "./chunk-GAT4F5NK.js";
+import {
+  DEFAULT_CONFIG,
+  DEFAULT_DIRECTORY_RULES,
+  DEFAULT_EXTENSION_RULES,
+  DEFAULT_FILE_RULES,
+  DEFAULT_INSTRUCTIONS,
+  DEFAULT_PATH_RULES,
+  DEFAULT_SCAN_RULES,
+  DocumentSearch,
+  FileType,
+  FullTextIndex,
+  IndexingPipeline,
+  MetaStore,
+  ScanRulesManager,
+  VectorStore,
+  addGlobalProgressListener,
+  createIndexingPipeline,
+  createRulesManager,
+  disposeEmbedder,
+  embed,
+  embedBatch,
+  embedBatchConcurrent,
+  embedDocument,
+  embedImage,
+  embedMultimodal,
+  embedQuery,
+  embedVideo,
+  extractSnippet,
+  formatDate,
+  formatSize,
+  getChunkStats,
+  getDefaultDirectories,
+  getDocumentType,
+  getEmbeddingDimension,
+  getFileType,
+  getSearchPlugin,
+  initEmbedder,
+  isEmbedderInitialized,
+  isSupportedDocument,
+  parseDocument,
+  removeGlobalProgressListener,
+  scanDirectories,
+  searchPlugin,
+  setEmbeddingDimension,
+  splitText
+} from "./chunk-YJIIX54F.js";
+export {
+  DEFAULT_CONFIG,
+  DEFAULT_DIRECTORY_RULES,
+  DEFAULT_EXTENSION_RULES,
+  DEFAULT_FILE_RULES,
+  DEFAULT_INSTRUCTIONS,
+  DEFAULT_PATH_RULES,
+  DEFAULT_SCAN_RULES,
+  DocumentSearch,
+  FileType,
+  FullTextIndex,
+  IndexingPipeline,
+  MetaStore,
+  ScanRulesManager,
+  SearchElectronBridge,
+  VectorStore,
+  addGlobalProgressListener,
+  createIndexingPipeline,
+  createRulesManager,
+  createSearchElectronBridge,
+  disposeEmbedder,
+  embed,
+  embedBatch,
+  embedBatchConcurrent,
+  embedDocument,
+  embedImage,
+  embedMultimodal,
+  embedQuery,
+  embedVideo,
+  extractSnippet,
+  formatDate,
+  formatSize,
+  getChunkStats,
+  getDefaultDirectories,
+  getDocumentType,
+  getEmbeddingDimension,
+  getFileType,
+  getSearchPlugin,
+  initEmbedder,
+  isEmbedderInitialized,
+  isSupportedDocument,
+  parseDocument,
+  removeGlobalProgressListener,
+  scanDirectories,
+  searchPlugin,
+  setEmbeddingDimension,
+  splitText
+};
+//# sourceMappingURL=index.js.map

package/dist/index.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}

package/dist/tools/index.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export { q as SearchPluginInstance, p as SearchPluginOptions, o as getSearchPlugin, n as searchPlugin } from '../index-B6UR8lRu.js';
2	+ export { SideEffect, Tool, ToolContext, ToolPlugin, ToolResult } from '@huyooo/ai-chat-core';

package/dist/tools/index.js ADDED Viewed

@@ -0,0 +1,9 @@
+import {
+  getSearchPlugin,
+  searchPlugin
+} from "../chunk-YJIIX54F.js";
+export {
+  getSearchPlugin,
+  searchPlugin
+};
+//# sourceMappingURL=index.js.map

package/dist/tools/index.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}