npm - plugin-docpixie - Versions diffs - 1.0.0 - Mend

plugin-docpixie 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/client.d.ts +1 -0
package/client.js +1 -0
package/dist/client/index.js +10 -0
package/dist/externalVersion.js +17 -0
package/dist/index.js +48 -0
package/dist/locale/en-US.json +21 -0
package/dist/locale/vi-VN.json +21 -0
package/dist/server/collections/docpixie-config.js +61 -0
package/dist/server/collections/docpixie-documents.js +71 -0
package/dist/server/collections/docpixie-pages.js +59 -0
package/dist/server/exceptions.js +127 -0
package/dist/server/index.js +49 -0
package/dist/server/plugin.js +178 -0
package/dist/server/prompts.js +388 -0
package/dist/server/providers/index.js +36 -0
package/dist/server/providers/llm-adapter.js +253 -0
package/dist/server/services/DocPixieService.js +1300 -0
package/dist/server/types.js +24 -0
package/package.json +40 -0
package/server.d.ts +1 -0
package/server.js +1 -0

package/dist/server/services/DocPixieService.js ADDED Viewed

@@ -0,0 +1,1300 @@
+/**
+ * This file is part of the NocoBase (R) project.
+ * Copyright (c) 2020-2024 NocoBase Co., Ltd.
+ * Authors: NocoBase Team.
+ *
+ * This project is dual-licensed under AGPL-3.0 and NocoBase Commercial License.
+ * For more information, please refer to: https://www.nocobase.com/agreement.
+ */
+var __create = Object.create;
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __getProtoOf = Object.getPrototypeOf;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+var __copyProps = (to, from, except, desc) => {
+  if (from && typeof from === "object" || typeof from === "function") {
+    for (let key of __getOwnPropNames(from))
+      if (!__hasOwnProp.call(to, key) && key !== except)
+        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
+  }
+  return to;
+};
+var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
+  // If the importer is in node compatibility mode or this is not an ESM
+  // file that has been converted to a CommonJS file using a Babel-
+  // compatible transform (i.e. "__esModule" has not been set), then set
+  // "default" to the CommonJS "module.exports" for node compatibility.
+  isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
+  mod
+));
+var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+var DocPixieService_exports = {};
+__export(DocPixieService_exports, {
+  DocPixieService: () => DocPixieService
+});
+module.exports = __toCommonJS(DocPixieService_exports);
+var fs = __toESM(require("fs"));
+var path = __toESM(require("path"));
+var import_prompts = require("../prompts");
+var import_llm_adapter = require("../providers/llm-adapter");
+var import_exceptions = require("../exceptions");
+const SUPPORTED_EXTENSIONS = /* @__PURE__ */ new Set([
+  ".pdf",
+  ".jpg",
+  ".jpeg",
+  ".png",
+  ".gif",
+  ".webp",
+  ".bmp",
+  ".tiff",
+  ".tif"
+]);
+const MIME_TYPES = {
+  ".jpg": "image/jpeg",
+  ".jpeg": "image/jpeg",
+  ".png": "image/png",
+  ".gif": "image/gif",
+  ".webp": "image/webp",
+  ".bmp": "image/bmp",
+  ".tiff": "image/tiff",
+  ".tif": "image/tiff",
+  ".pdf": "application/pdf"
+};
+function createTaskPlan(query, tasks, maxIterations) {
+  const plan = {
+    initialQuery: query,
+    tasks: tasks.map((t) => ({
+      ...t,
+      id: `task_${Math.random().toString(36).substring(2, 9)}`,
+      status: "pending"
+    })),
+    currentIteration: 0,
+    maxIterations,
+    hasPendingTasks() {
+      return this.tasks.some((t) => t.status === "pending");
+    },
+    getNextPendingTask() {
+      return this.tasks.find((t) => t.status === "pending") || null;
+    },
+    getCompletedTasks() {
+      return this.tasks.filter((t) => t.status === "completed");
+    },
+    addTask(task) {
+      const newTask = {
+        ...task,
+        id: `task_${Math.random().toString(36).substring(2, 9)}`,
+        status: "pending"
+      };
+      this.tasks.push(newTask);
+      return newTask;
+    },
+    removeTask(taskId) {
+      const idx = this.tasks.findIndex((t) => t.id === taskId && t.status === "pending");
+      if (idx === -1) return false;
+      this.tasks.splice(idx, 1);
+      return true;
+    }
+  };
+  return plan;
+}
+class DocPixieService {
+  app;
+  db;
+  logger;
+  config = null;
+  llmProvider = null;
+  ocrProvider = null;
+  constructor(app, db, logger) {
+    this.app = app;
+    this.db = db;
+    this.logger = logger;
+  }
+  // ═══════════════════════════════════════════
+  // Initialization
+  // ═══════════════════════════════════════════
+  /**
+   * Initialize the service with plugin configuration.
+   *
+   * Loads config from `docpixie_config` collection, resolves LLM providers
+   * from NocoBase's plugin-ai infrastructure, and validates connectivity.
+   *
+   * Called during plugin `load()` and after config changes.
+   *
+   * @throws Error if config is missing or providers fail validation
+   */
+  async initialize(config) {
+    if (config) {
+      this.config = config;
+    } else {
+      this.config = await this.loadConfig();
+    }
+    if (!this.config) {
+      this.logger.warn("DocPixie: No configuration found. Plugin will be inactive.");
+      return;
+    }
+    this.llmProvider = await this.resolveNocoBaseLLMProvider(this.config);
+    this.ocrProvider = this.createOCRProvider(this.config);
+    this.logger.info("DocPixie service initialized", {
+      strategy: this.config.analysisStrategy,
+      ocrProvider: this.config.ocrProvider,
+      llmService: this.config.llmServiceName,
+      visionLlmService: this.config.visionLlmServiceName
+    });
+  }
+  /**
+   * Check whether the service is ready to process documents and queries.
+   */
+  isReady() {
+    return !!(this.config && this.llmProvider);
+  }
+  // ═══════════════════════════════════════════
+  // Document Processing
+  // ═══════════════════════════════════════════
+  /**
+   * Process a document file through the full ingestion pipeline:
+   *
+   * 1. Validate file exists and is a supported type (.pdf, .jpg, .png, ...)
+   * 2. Copy file to storage
+   * 3. Detect if page has text layer (digital PDF) or needs OCR (scanned)
+   * 4. Extract structured text per page (OCR or text layer)
+   * 5. Store document + pages in NocoBase collections
+   * 6. Generate document summary via LLM
+   * 7. Update document status to 'ready'
+   *
+   * @param filePath - Absolute path to the source document
+   * @param options  - Optional overrides
+   * @returns Created document record ID
+   */
+  async processDocument(filePath, options) {
+    this.ensureReady();
+    const docRepo = this.db.getRepository("docpixie_documents");
+    const pageRepo = this.db.getRepository("docpixie_pages");
+    const doc = await docRepo.create({
+      values: {
+        name: (options == null ? void 0 : options.name) || this.extractFileName(filePath),
+        originalPath: filePath,
+        status: "pending",
+        extractionMethod: this.config.ocrProvider,
+        createdById: options == null ? void 0 : options.userId,
+        createdAt: /* @__PURE__ */ new Date(),
+        updatedAt: /* @__PURE__ */ new Date()
+      }
+    });
+    const documentId = doc.get("id");
+    try {
+      await docRepo.update({
+        filterByTk: documentId,
+        values: { status: "extracting" }
+      });
+      const pages = await this.extractPages(filePath, documentId);
+      for (const page of pages) {
+        await pageRepo.create({
+          values: {
+            documentId,
+            pageNumber: page.pageNumber,
+            imagePath: page.imagePath,
+            structuredText: page.structuredText,
+            regions: page.regions,
+            hasTables: page.hasTables,
+            hasFigures: page.hasFigures,
+            headings: page.headings,
+            extractionMethod: page.extractionMethod
+          }
+        });
+      }
+      await docRepo.update({
+        filterByTk: documentId,
+        values: { status: "summarizing" }
+      });
+      const summary = await this.generateSummary(documentId, pages);
+      await docRepo.update({
+        filterByTk: documentId,
+        values: {
+          status: "ready",
+          pageCount: pages.length,
+          summary,
+          updatedAt: /* @__PURE__ */ new Date()
+        }
+      });
+      this.logger.info(`DocPixie: Document processed \u2014 id=${documentId}, pages=${pages.length}`);
+      return documentId;
+    } catch (error) {
+      await docRepo.update({
+        filterByTk: documentId,
+        values: { status: "failed", metadata: { error: String(error) } }
+      });
+      this.logger.error(`DocPixie: Document processing failed \u2014 id=${documentId}`, error);
+      throw error;
+    }
+  }
+  /**
+   * Delete a document and all its pages from the database and filesystem.
+   */
+  async deleteDocument(documentId) {
+    const docRepo = this.db.getRepository("docpixie_documents");
+    const pageRepo = this.db.getRepository("docpixie_pages");
+    const doc = await docRepo.findOne({ filterByTk: documentId });
+    if (!doc) return false;
+    await pageRepo.destroy({ filter: { documentId } });
+    await docRepo.destroy({ filterByTk: documentId });
+    const storageDir = path.join(process.cwd(), "storage", "docpixie", String(documentId));
+    if (fs.existsSync(storageDir)) {
+      fs.rmSync(storageDir, { recursive: true, force: true });
+    }
+    this.logger.info(`DocPixie: Document deleted \u2014 id=${documentId}`);
+    return true;
+  }
+  /**
+   * List all documents with their status and page counts.
+   */
+  async listDocuments(options) {
+    const repo = this.db.getRepository("docpixie_documents");
+    const filter = {};
+    if (options == null ? void 0 : options.status) filter.status = options.status;
+    return repo.find({
+      filter,
+      sort: ["-createdAt"],
+      limit: (options == null ? void 0 : options.limit) || 50,
+      offset: (options == null ? void 0 : options.offset) || 0
+    });
+  }
+  /**
+   * Get a single document with all its pages loaded.
+   */
+  async getDocument(documentId) {
+    const repo = this.db.getRepository("docpixie_documents");
+    return repo.findOne({
+      filterByTk: documentId,
+      appends: ["pages"]
+    });
+  }
+  // ═══════════════════════════════════════════
+  // Query Pipeline
+  // ═══════════════════════════════════════════
+  /**
+   * Execute a document query through the adaptive RAG pipeline.
+   *
+   * Pipeline steps (mirrors DocPixie's PixieRAGAgent):
+   *
+   * 1. **Context Processing** — Summarize long conversation history (>8 turns)
+   * 2. **Query Reformulation** — Resolve pronoun references ("it" → actual subject)
+   * 3. **Query Classification** — Determine if documents are needed
+   * 4. **Task Planning** — Create 1-4 tasks, each assigned to one document
+   * 5. **Page Selection** — For each task, select relevant pages
+   * 6. **Page Analysis** — Extract answer from selected pages
+   * 7. **Adaptive Update** — Agent may add/remove/modify tasks
+   * 8. **Response Synthesis** — Combine all task results into final answer
+   */
+  async query(input) {
+    this.ensureReady();
+    const startTime = Date.now();
+    const strategy = input.strategy || this.config.analysisStrategy;
+    this.logger.info("DocPixie: Query started", {
+      query: input.query.substring(0, 100),
+      strategy,
+      documentIds: input.documentIds
+    });
+    this.llmProvider.resetCost();
+    const documents = await this.loadQueryDocuments(input.documentIds);
+    if (documents.length === 0) {
+      return this.createEmptyResult(input.query, startTime, "No documents found");
+    }
+    let processedQuery = input.query;
+    if (input.conversationHistory && input.conversationHistory.length > 0) {
+      processedQuery = await this.reformulateQuery(
+        input.query,
+        input.conversationHistory
+      );
+    }
+    const needsDocuments = await this.classifyQuery(processedQuery);
+    if (!needsDocuments) {
+      const directAnswer = await this.getDirectAnswer(processedQuery);
+      return {
+        answer: directAnswer,
+        sourcePages: [],
+        confidence: 0.5,
+        totalCost: this.llmProvider.getTotalCost(),
+        processingTime: (Date.now() - startTime) / 1e3,
+        tasksSummary: []
+      };
+    }
+    const taskPlan = await this.createInitialPlan(processedQuery, documents);
+    const { taskResults, allSourcePages, analysisResults } = await this.executeAdaptivePlan(
+      taskPlan,
+      processedQuery,
+      documents,
+      strategy,
+      input.conversationHistory
+    );
+    const answer = await this.synthesizeResponse(processedQuery, analysisResults);
+    const result = {
+      answer,
+      sourcePages: allSourcePages,
+      confidence: this.calculateConfidence(taskResults),
+      totalCost: this.llmProvider.getTotalCost(),
+      processingTime: (Date.now() - startTime) / 1e3,
+      tasksSummary: taskResults
+    };
+    this.logger.info("DocPixie: Query completed", {
+      processingTime: result.processingTime,
+      totalCost: result.totalCost,
+      tasksCompleted: taskResults.filter((t) => t.status === "completed").length,
+      totalIterations: taskPlan.currentIteration
+    });
+    return result;
+  }
+  // ═══════════════════════════════════════════
+  // Page Selection Strategies
+  // ═══════════════════════════════════════════
+  /**
+   * Select relevant pages for a task using the configured strategy.
+   */
+  async selectPages(documentId, taskDescription, strategy, maxPages = 5) {
+    const pageRepo = this.db.getRepository("docpixie_pages");
+    const pages = await pageRepo.find({
+      filter: { documentId },
+      sort: ["pageNumber"]
+    });
+    if (pages.length === 0) return [];
+    if (pages.length <= maxPages) return pages.map((p) => p.get("pageNumber"));
+    switch (strategy) {
+      case "ocr_only":
+        return this.selectPagesByText(pages, taskDescription, maxPages);
+      case "vision":
+        return this.selectPagesByVision(pages, taskDescription, maxPages);
+      case "hybrid":
+        return this.selectPagesByText(pages, taskDescription, maxPages);
+      default:
+        return this.selectPagesByText(pages, taskDescription, maxPages);
+    }
+  }
+  /**
+   * Select pages by analyzing their structuredText content with text LLM.
+   */
+  async selectPagesByText(pages, taskDescription, maxPages) {
+    const pageSummaries = pages.map((p) => {
+      const text = p.get("structuredText") || "";
+      const headings = p.get("headings") || [];
+      const preview = text.substring(0, 300);
+      return `Page ${p.get("pageNumber")}: [Headings: ${headings.join(", ")}] ${preview}`;
+    });
+    const prompt = (0, import_prompts.fillPrompt)(import_prompts.TEXT_PAGE_SELECTION_PROMPT, {
+      task_description: taskDescription,
+      page_summaries: pageSummaries.join("\n\n"),
+      max_pages: String(maxPages)
+    });
+    const response = await this.llmProvider.processTextMessages(
+      [
+        { role: "system", content: import_prompts.SYSTEM_PAGE_SELECTOR },
+        { role: "user", content: prompt }
+      ],
+      200,
+      0.1
+    );
+    return this.parsePageSelection(response, maxPages);
+  }
+  /**
+   * Select pages by sending their images to the vision LLM.
+   */
+  async selectPagesByVision(pages, taskDescription, maxPages) {
+    const prompt = (0, import_prompts.fillPrompt)(import_prompts.VISION_PAGE_SELECTION_PROMPT, {
+      query: taskDescription,
+      query_description: taskDescription
+    });
+    const messageContent = [
+      { type: "text", text: prompt }
+    ];
+    for (const page of pages) {
+      const imagePath = page.get("imagePath");
+      if (imagePath) {
+        messageContent.push({
+          type: "text",
+          text: `--- Page ${page.get("pageNumber")} ---`
+        });
+        messageContent.push({
+          type: "image_path",
+          image_path: imagePath,
+          detail: "low"
+        });
+      }
+    }
+    const response = await this.llmProvider.processMultimodalMessages(
+      [
+        { role: "system", content: import_prompts.SYSTEM_PAGE_SELECTOR },
+        { role: "user", content: messageContent }
+      ],
+      200,
+      0.1
+    );
+    return this.parsePageSelection(response, maxPages);
+  }
+  // ═══════════════════════════════════════════
+  // Page Analysis
+  // ═══════════════════════════════════════════
+  /**
+   * Analyze selected pages to extract information for a task.
+   */
+  async analyzePages(pages, task, strategy, conversationHistory) {
+    const memorySummary = this.buildMemorySummary(conversationHistory);
+    try {
+      switch (strategy) {
+        case "ocr_only":
+          return await this.analyzePagesText(pages, task, memorySummary);
+        case "vision":
+          return await this.analyzePagesVision(pages, task, memorySummary);
+        case "hybrid":
+        default:
+          return await this.analyzePagesHybrid(pages, task, memorySummary);
+      }
+    } catch (err) {
+      throw new import_exceptions.TaskAnalysisError(`Page analysis failed: ${err.message}`);
+    }
+  }
+  /** Analyze pages using only their structured text (cheapest). */
+  async analyzePagesText(pages, task, memorySummary) {
+    const pagesContent = pages.map((p) => {
+      return `=== Page ${p.get("pageNumber")} ===
+${p.get("structuredText") || "(no text)"}`;
+    }).join("\n\n");
+    const prompt = (0, import_prompts.fillPrompt)(import_prompts.TASK_PROCESSING_PROMPT, {
+      task_description: task,
+      search_queries: task,
+      memory_summary: memorySummary
+    });
+    return this.llmProvider.processTextMessages(
+      [
+        { role: "system", content: import_prompts.SYSTEM_DOCPIXIE },
+        { role: "user", content: `${prompt}
+Document content (OCR text):
+${pagesContent}` }
+      ],
+      1e3,
+      0.3
+    );
+  }
+  /** Analyze pages using only their images (most expensive). */
+  async analyzePagesVision(pages, task, memorySummary) {
+    const prompt = (0, import_prompts.fillPrompt)(import_prompts.TASK_PROCESSING_PROMPT, {
+      task_description: task,
+      search_queries: task,
+      memory_summary: memorySummary
+    });
+    const content = [{ type: "text", text: prompt }];
+    for (let i = 0; i < pages.length; i++) {
+      const page = pages[i];
+      content.push({
+        type: "image_path",
+        image_path: page.get("imagePath"),
+        detail: "high"
+      });
+      content.push({
+        type: "text",
+        text: `[Page ${i + 1} from document]`
+      });
+    }
+    return this.llmProvider.processMultimodalMessages(
+      [
+        { role: "system", content: import_prompts.SYSTEM_DOCPIXIE },
+        { role: "user", content }
+      ],
+      600,
+      0.3
+    );
+  }
+  /** Analyze pages using structured text as ground truth + images for context. */
+  async analyzePagesHybrid(pages, task, memorySummary) {
+    const prompt = (0, import_prompts.fillPrompt)(import_prompts.TASK_PROCESSING_PROMPT, {
+      task_description: task,
+      search_queries: task,
+      memory_summary: memorySummary
+    });
+    const content = [];
+    const textReference = pages.map((p) => {
+      return `=== Page ${p.get("pageNumber")} (OCR Text) ===
+${p.get("structuredText") || "(no text)"}`;
+    }).join("\n\n");
+    content.push({
+      type: "text",
+      text: `${prompt}
+OCR text reference (use for exact numbers/data):
+${textReference}
+Page images below (use for charts, diagrams, visual context):`
+    });
+    for (let i = 0; i < pages.length; i++) {
+      const page = pages[i];
+      const imagePath = page.get("imagePath");
+      if (imagePath) {
+        content.push({ type: "image_path", image_path: imagePath, detail: "high" });
+        content.push({ type: "text", text: `[Page ${i + 1} from document]` });
+      }
+    }
+    return this.llmProvider.processMultimodalMessages(
+      [
+        {
+          role: "system",
+          content: `${import_prompts.SYSTEM_DOCPIXIE}
+When citing numbers or data, PRIORITIZE the OCR text reference. Use page images for understanding charts, diagrams, and visual layout.`
+        },
+        { role: "user", content }
+      ],
+      1e3,
+      0.3
+    );
+  }
+  // ═══════════════════════════════════════════
+  // Configuration
+  // ═══════════════════════════════════════════
+  /** Get current plugin configuration from the database. */
+  async getConfig() {
+    return this.loadConfig();
+  }
+  /** Update plugin configuration and reinitialize providers. */
+  async updateConfig(config) {
+    const repo = this.db.getRepository("docpixie_config");
+    const existing = await repo.findOne({});
+    if (existing) {
+      await repo.update({ filterByTk: existing.get("id"), values: config });
+    } else {
+      await repo.create({ values: config });
+    }
+    await this.initialize();
+    this.logger.info("DocPixie: Configuration updated");
+  }
+  // ═══════════════════════════════════════════
+  // Private Helpers
+  // ═══════════════════════════════════════════
+  ensureReady() {
+    if (!this.isReady()) {
+      throw new import_exceptions.DocPixieError("DocPixie service is not initialized. Call initialize() first.");
+    }
+  }
+  async loadConfig() {
+    const repo = this.db.getRepository("docpixie_config");
+    const record = await repo.findOne({});
+    if (!record) return null;
+    return {
+      llmServiceName: record.get("llmServiceName"),
+      visionLlmServiceName: record.get("visionLlmServiceName"),
+      analysisStrategy: record.get("analysisStrategy"),
+      ocrProvider: record.get("ocrProvider"),
+      ocrApiEndpoint: record.get("ocrApiEndpoint"),
+      ocrApiKey: record.get("ocrApiKey"),
+      maxPagesPerTask: record.get("maxPagesPerTask"),
+      maxTasksPerPlan: record.get("maxTasksPerPlan")
+    };
+  }
+  /**
+   * Resolve LLM providers from NocoBase's plugin-ai infrastructure.
+   *
+   * This is the KEY integration point — replaces the old createLLMProvider()
+   * that used a standalone OpenAICompatibleProvider.
+   *
+   * Flow:
+   *   1. Read llmServiceName from docpixie_config
+   *   2. Look up the service in `llmServices` collection
+   *   3. Get the registered provider class from aiManager
+   *   4. Create provider instance → extract chatModel
+   *   5. Wrap in NocoBaseLLMAdapter
+   */
+  async resolveNocoBaseLLMProvider(config) {
+    if (!config.llmServiceName) {
+      this.logger.warn("DocPixie: LLM service name not configured");
+      return this.createNoopProvider();
+    }
+    try {
+      const aiPlugin = this.app.pm.get("ai");
+      if (!aiPlugin) {
+        throw new import_exceptions.ProviderError("plugin-ai is not installed or enabled", "nocobase-llm");
+      }
+      const aiManager = aiPlugin.aiManager;
+      if (!aiManager) {
+        throw new import_exceptions.ProviderError("AIManager not available from plugin-ai", "nocobase-llm");
+      }
+      const textModel = await this.resolveChatModel(
+        aiManager,
+        config.llmServiceName,
+        "text"
+      );
+      const visionServiceName = config.visionLlmServiceName || config.llmServiceName;
+      let visionModel = textModel;
+      if (visionServiceName !== config.llmServiceName) {
+        visionModel = await this.resolveChatModel(
+          aiManager,
+          visionServiceName,
+          "vision"
+        );
+      }
+      this.logger.info(`DocPixie: LLM providers resolved \u2014 text: ${config.llmServiceName}, vision: ${visionServiceName}`);
+      return new import_llm_adapter.NocoBaseLLMAdapter(textModel, visionModel);
+    } catch (err) {
+      this.logger.error(`DocPixie: Failed to resolve LLM provider: ${err.message}`);
+      return this.createNoopProvider();
+    }
+  }
+  /**
+   * Resolve a LangChain chatModel from NocoBase's llmServices collection.
+   * Follows the same pattern as AIEmployee.getLLMService().
+   */
+  async resolveChatModel(aiManager, serviceName, purpose) {
+    var _a, _b;
+    const service = await this.db.getRepository("llmServices").findOne({
+      filter: { name: serviceName }
+    });
+    if (!service) {
+      throw new import_exceptions.ProviderError(
+        `LLM service '${serviceName}' not found in llmServices. Configure it in the AI Settings.`,
+        "nocobase-llm"
+      );
+    }
+    const providerMeta = aiManager.llmProviders.get(service.provider);
+    if (!providerMeta) {
+      throw new import_exceptions.ProviderError(
+        `LLM provider '${service.provider}' is not registered. Is the plugin installed?`,
+        "nocobase-llm"
+      );
+    }
+    const Provider = providerMeta.provider;
+    const provider = new Provider({
+      app: this.app,
+      serviceOptions: service.options,
+      modelOptions: {
+        llmService: serviceName,
+        model: ((_a = service.options) == null ? void 0 : _a.defaultModel) || ((_b = service.options) == null ? void 0 : _b.model)
+      }
+    });
+    const chatModel = provider.chatModel || provider.createModel();
+    if (!chatModel) {
+      throw new import_exceptions.ProviderError(
+        `Failed to create chatModel for service '${serviceName}' (${purpose})`,
+        "nocobase-llm"
+      );
+    }
+    this.logger.info(`DocPixie: Resolved ${purpose} model from service '${serviceName}' (provider: ${service.provider})`);
+    return chatModel;
+  }
+  /** Create a no-op provider that throws clear errors */
+  createNoopProvider() {
+    return {
+      async processTextMessages() {
+        throw new import_exceptions.ProviderError("LLM provider not configured. Set llmServiceName in DocPixie settings.", "none");
+      },
+      async processMultimodalMessages() {
+        throw new import_exceptions.ProviderError("LLM provider not configured. Set llmServiceName in DocPixie settings.", "none");
+      },
+      getTotalCost() {
+        return 0;
+      },
+      resetCost() {
+      }
+    };
+  }
+  /**
+   * Extract pages from a document file.
+   */
+  async extractPages(filePath, documentId) {
+    if (!fs.existsSync(filePath)) {
+      throw new import_exceptions.ProcessingError(`File not found: ${filePath}`, filePath);
+    }
+    const ext = path.extname(filePath).toLowerCase();
+    if (!SUPPORTED_EXTENSIONS.has(ext)) {
+      throw new import_exceptions.ProcessingError(
+        `Unsupported file type: ${ext}. Supported: ${[...SUPPORTED_EXTENSIONS].join(", ")}`,
+        filePath
+      );
+    }
+    this.logger.info(`DocPixie: Processing file ${filePath} (ext=${ext})`);
+    const storageDir = path.join(process.cwd(), "storage", "docpixie", String(documentId));
+    fs.mkdirSync(storageDir, { recursive: true });
+    const storedFileName = `original${ext}`;
+    const storedFilePath = path.join(storageDir, storedFileName);
+    fs.copyFileSync(filePath, storedFilePath);
+    if (ext === ".pdf") {
+      return this.extractPdfPages(storedFilePath, storageDir, documentId);
+    } else {
+      return [{
+        pageNumber: 1,
+        structuredText: "",
+        regions: [],
+        imagePath: storedFilePath,
+        hasTables: false,
+        hasFigures: false,
+        headings: [],
+        extractionMethod: "text_layer"
+      }];
+    }
+  }
+  /**
+   * Extract pages from a PDF file.
+   */
+  async extractPdfPages(pdfPath, storageDir, documentId) {
+    const pages = [];
+    let ocrTexts = [];
+    if (this.ocrProvider) {
+      try {
+        const available = await this.ocrProvider.isAvailable();
+        if (available) {
+          const text = await this.ocrProvider.extractText(pdfPath);
+          ocrTexts = text.split(/\f|\n{4,}/).filter((t) => t.trim().length > 0);
+        }
+      } catch (err) {
+        this.logger.warn("DocPixie: OCR extraction failed, continuing without text", err);
+      }
+    }
+    const pageCount = Math.max(ocrTexts.length, 1);
+    for (let i = 0; i < pageCount; i++) {
+      pages.push({
+        pageNumber: i + 1,
+        structuredText: ocrTexts[i] || "",
+        regions: [],
+        imagePath: pdfPath,
+        hasTables: false,
+        hasFigures: false,
+        headings: [],
+        extractionMethod: ocrTexts[i] ? this.config.ocrProvider : "text_layer"
+      });
+    }
+    this.logger.info(`DocPixie: PDF processed \u2014 ${pageCount} pages from ${pdfPath}`);
+    return pages;
+  }
+  /**
+   * Generate a summary for the entire document using LLM vision.
+   */
+  async generateSummary(documentId, pages) {
+    if (!this.llmProvider) return "Summary not available (LLM not configured)";
+    const imagePaths = pages.map((p) => p.imagePath).filter((p) => p && fs.existsSync(p));
+    if (imagePaths.length > 0) {
+      try {
+        const content = [
+          {
+            type: "text",
+            text: `Please analyze this complete document and provide a comprehensive summary. Look at all pages together to understand the document's overall structure, main themes, key information, and purpose.`
+          }
+        ];
+        for (const imgPath of imagePaths) {
+          content.push({
+            type: "image_path",
+            image_path: imgPath,
+            detail: "low"
+          });
+        }
+        return await this.llmProvider.processMultimodalMessages(
+          [
+            {
+              role: "system",
+              content: "You are a document analysis expert. Analyze all pages of this document and create a comprehensive summary."
+            },
+            { role: "user", content }
+          ],
+          400,
+          0.3
+        );
+      } catch (err) {
+        this.logger.warn("DocPixie: Vision summary failed, falling back to text", err);
+      }
+    }
+    const allText = pages.map((p) => p.structuredText).join("\n\n---\n\n");
+    if (!allText.trim()) return "Document processed (no text content extracted)";
+    const truncated = allText.substring(0, 8e3);
+    return this.llmProvider.processTextMessages(
+      [
+        { role: "system", content: import_prompts.SYSTEM_SUMMARIZER },
+        { role: "user", content: `Summarize this document concisely in 2-3 paragraphs:
+${truncated}` }
+      ],
+      500,
+      0.3
+    );
+  }
+  // ═══════════════════════════════════════════
+  // Context Processing (ported from context_processor.py)
+  // ═══════════════════════════════════════════
+  async processConversationContext(history, currentQuery) {
+    const MAX_TURNS_BEFORE_SUMMARY = 8;
+    const TURNS_TO_SUMMARIZE = 4;
+    const TURNS_TO_KEEP_FULL = 4;
+    const turns = history.filter((m) => m.role === "user").length;
+    if (turns <= MAX_TURNS_BEFORE_SUMMARY) {
+      return history.map((h) => `${h.role === "user" ? "User" : "Assistant"}: ${h.content}`).join("\n\n");
+    }
+    this.logger.info(`DocPixie: Conversation has ${turns} turns, applying context summarization`);
+    let turnCount = 0;
+    let splitIndex = 0;
+    for (let i = 0; i < history.length; i += 2) {
+      if (i + 1 < history.length && history[i].role === "user") {
+        turnCount++;
+        if (turnCount === TURNS_TO_SUMMARIZE) {
+          splitIndex = i + 2;
+          break;
+        }
+      }
+    }
+    const toSummarize = history.slice(0, splitIndex);
+    let toKeep = history.slice(splitIndex);
+    const maxKeep = TURNS_TO_KEEP_FULL * 2;
+    if (toKeep.length > maxKeep) {
+      toKeep = toKeep.slice(-maxKeep);
+    }
+    const conversationText = toSummarize.map((h) => `${h.role === "user" ? "User" : "Assistant"}: ${h.content}`).join("\n\n");
+    const summaryPrompt = (0, import_prompts.fillPrompt)(import_prompts.CONVERSATION_SUMMARIZATION_PROMPT, {
+      conversation_text: conversationText
+    });
+    const summary = await this.llmProvider.processTextMessages(
+      [
+        { role: "system", content: "You are a helpful assistant that creates concise conversation summaries." },
+        { role: "user", content: summaryPrompt }
+      ],
+      500,
+      0.3
+    );
+    const parts = [];
+    parts.push(`Previous Conversation Summary:
+${summary.trim()}
+`);
+    if (toKeep.length > 0) {
+      parts.push("Recent Conversation:");
+      parts.push(
+        toKeep.map((h) => `${h.role === "user" ? "User" : "Assistant"}: ${h.content}`).join("\n\n")
+      );
+    }
+    parts.push(`
+Current Query: ${currentQuery}`);
+    return parts.join("\n");
+  }
+  // ═══════════════════════════════════════════
+  // Query Reformulation
+  // ═══════════════════════════════════════════
+  async reformulateQuery(query, history) {
+    try {
+      const context = await this.processConversationContext(history, query);
+      const prompt = (0, import_prompts.fillPrompt)(import_prompts.QUERY_REFORMULATION_PROMPT, {
+        conversation_context: context,
+        recent_topics: "",
+        current_query: query
+      });
+      const response = await this.llmProvider.processTextMessages(
+        [
+          { role: "system", content: import_prompts.SYSTEM_QUERY_REFORMULATOR },
+          { role: "user", content: prompt }
+        ],
+        8192,
+        0.2
+      );
+      const parsed = JSON.parse(this.sanitizeLlmJson(response));
+      const reformulated = parsed.reformulated_query || query;
+      this.logger.info(`DocPixie: Query reformulation: '${query}' \u2192 '${reformulated}'`);
+      return reformulated;
+    } catch (err) {
+      this.logger.warn(`DocPixie: Reformulation failed, using original: ${err.message}`);
+      return query;
+    }
+  }
+  // ═══════════════════════════════════════════
+  // Query Classification
+  // ═══════════════════════════════════════════
+  async classifyQuery(query) {
+    try {
+      const prompt = (0, import_prompts.fillPrompt)(import_prompts.QUERY_CLASSIFICATION_PROMPT, { query });
+      const response = await this.llmProvider.processTextMessages(
+        [
+          { role: "system", content: import_prompts.SYSTEM_QUERY_CLASSIFIER },
+          { role: "user", content: prompt }
+        ],
+        200,
+        0.1
+      );
+      const parsed = JSON.parse(this.sanitizeLlmJson(response));
+      this.logger.info(`DocPixie: Query classification: ${parsed.reasoning}`);
+      return parsed.needs_documents !== false;
+    } catch (err) {
+      this.logger.warn(`DocPixie: Classification failed, defaulting to needs_documents=true: ${err.message}`);
+      return true;
+    }
+  }
+  async getDirectAnswer(query) {
+    return this.llmProvider.processTextMessages(
+      [
+        { role: "system", content: import_prompts.SYSTEM_DIRECT_ANSWER },
+        { role: "user", content: query }
+      ],
+      500,
+      0.3
+    );
+  }
+  // ═══════════════════════════════════════════
+  // Adaptive Task Planning
+  // ═══════════════════════════════════════════
+  async createInitialPlan(query, documents) {
+    const docTexts = documents.map((d) => {
+      const id = d.get("id");
+      const name = d.get("name");
+      const summary = d.get("summary") || `Document with ${d.get("pageCount")} pages`;
+      return `${id}: ${name}
+Summary: ${summary}`;
+    }).join("\n\n");
+    const prompt = (0, import_prompts.fillPrompt)(import_prompts.ADAPTIVE_INITIAL_PLANNING_PROMPT, {
+      query,
+      documents: docTexts
+    });
+    const response = await this.llmProvider.processTextMessages(
+      [
+        { role: "system", content: import_prompts.SYSTEM_TASK_PLANNER },
+        { role: "user", content: prompt }
+      ],
+      8192,
+      0.3
+    );
+    try {
+      const parsed = JSON.parse(this.sanitizeLlmJson(response));
+      const rawTasks = (parsed.tasks || []).slice(0, this.config.maxTasksPerPlan);
+      const tasks = rawTasks.map((t) => {
+        var _a, _b;
+        const docId = t.document || t.document_id;
+        const doc = documents.find((d) => String(d.get("id")) === String(docId));
+        return {
+          name: t.name || "Unnamed Task",
+          description: t.description || "",
+          documentId: doc ? doc.get("id") : (_a = documents[0]) == null ? void 0 : _a.get("id"),
+          documentName: doc ? doc.get("name") : ((_b = documents[0]) == null ? void 0 : _b.get("name")) || "Unknown"
+        };
+      });
+      this.logger.info(`DocPixie: Initial plan created with ${tasks.length} tasks`);
+      return createTaskPlan(query, tasks, this.config.maxTasksPerPlan * 2);
+    } catch {
+      this.logger.warn("DocPixie: Failed to parse initial plan, using fallback");
+      const fallbackTasks = documents.slice(0, 2).map((d) => ({
+        name: `Analyze ${d.get("name")}`,
+        description: query,
+        documentId: d.get("id"),
+        documentName: d.get("name")
+      }));
+      return createTaskPlan(query, fallbackTasks, this.config.maxTasksPerPlan * 2);
+    }
+  }
+  /**
+   * Execute tasks with adaptive replanning.
+   * After each completed task, asks the LLM if the plan should be updated.
+   */
+  async executeAdaptivePlan(taskPlan, query, documents, strategy, conversationHistory) {
+    const taskResults = [];
+    const allSourcePages = [];
+    const analysisResults = [];
+    let iteration = 0;
+    while (taskPlan.hasPendingTasks() && iteration < taskPlan.maxIterations) {
+      iteration++;
+      this.logger.info(`DocPixie: Agent iteration ${iteration}`);
+      const currentTask = taskPlan.getNextPendingTask();
+      if (!currentTask) break;
+      currentTask.status = "in_progress";
+      this.logger.info(`DocPixie: Executing task: ${currentTask.name}`);
+      try {
+        const { analysis, sourcePages } = await this.executeSingleTask(
+          currentTask,
+          query,
+          strategy,
+          conversationHistory
+        );
+        currentTask.status = "completed";
+        analysisResults.push(analysis);
+        allSourcePages.push(...sourcePages);
+        taskResults.push({
+          taskName: currentTask.name,
+          documentName: currentTask.documentName,
+          pagesAnalyzed: sourcePages.map((p) => p.pageNumber),
+          status: "completed"
+        });
+        this.logger.info(
+          `DocPixie: Task completed: ${currentTask.name} (analyzed ${sourcePages.length} pages)`
+        );
+        if (taskPlan.hasPendingTasks()) {
+          this.logger.info("DocPixie: Checking if task plan needs updating...");
+          const oldTaskCount = taskPlan.tasks.length;
+          await this.updatePlanAdaptively(
+            taskPlan,
+            currentTask,
+            analysis,
+            query,
+            documents
+          );
+          if (taskPlan.tasks.length !== oldTaskCount) {
+            this.logger.info(
+              `DocPixie: Plan updated \u2014 ${oldTaskCount} \u2192 ${taskPlan.tasks.length} tasks`
+            );
+          }
+        }
+      } catch (error) {
+        currentTask.status = "failed";
+        this.logger.error(`DocPixie: Task failed \u2014 ${currentTask.name}`, error);
+        taskResults.push({
+          taskName: currentTask.name,
+          documentName: currentTask.documentName,
+          pagesAnalyzed: [],
+          status: "failed"
+        });
+      }
+    }
+    taskPlan.currentIteration = iteration;
+    this.logger.info(`DocPixie: Adaptive execution completed after ${iteration} iterations`);
+    return { taskResults, allSourcePages, analysisResults };
+  }
+  /**
+   * Ask the LLM whether to continue/add/remove/modify tasks.
+   */
+  async updatePlanAdaptively(plan, completedTask, taskFindings, originalQuery, documents) {
+    var _a;
+    const planStatus = plan.tasks.map((t) => `- [${t.id}] ${t.name}: ${t.status}`).join("\n");
+    const completedTasks = plan.getCompletedTasks();
+    const progressSummary = completedTasks.length === 1 ? `Just completed first task: ${completedTask.name}` : "Completed tasks:\n" + completedTasks.map((t) => `\u2713 ${t.name}`).join("\n");
+    const docTexts = documents.map((d) => {
+      const summary = d.get("summary") || `Document with ${d.get("pageCount")} pages`;
+      return `${d.get("id")}: ${d.get("name")}
+Summary: ${summary}`;
+    }).join("\n\n");
+    const prompt = (0, import_prompts.fillPrompt)(import_prompts.ADAPTIVE_PLAN_UPDATE_PROMPT, {
+      original_query: originalQuery,
+      available_documents: docTexts,
+      current_plan_status: planStatus,
+      completed_task_name: completedTask.name,
+      task_findings: taskFindings.substring(0, 2e3),
+      progress_summary: progressSummary
+    });
+    const response = await this.llmProvider.processTextMessages(
+      [
+        { role: "system", content: import_prompts.SYSTEM_ADAPTIVE_PLANNER },
+        { role: "user", content: prompt }
+      ],
+      8192,
+      0.3
+    );
+    try {
+      const updateData = JSON.parse(this.sanitizeLlmJson(response));
+      const action = updateData.action || "continue";
+      const reason = updateData.reason || "";
+      this.logger.info(`DocPixie: Plan update action: ${action} \u2014 ${reason}`);
+      switch (action) {
+        case "continue":
+          break;
+        case "add_tasks": {
+          const newTasks = updateData.new_tasks || [];
+          for (const t of newTasks) {
+            const docId = t.document || t.document_id;
+            const doc = documents.find((d) => String(d.get("id")) === String(docId));
+            plan.addTask({
+              name: t.name || "New Task",
+              description: t.description || "",
+              documentId: doc ? doc.get("id") : (_a = documents[0]) == null ? void 0 : _a.get("id"),
+              documentName: doc ? doc.get("name") : "Unknown"
+            });
+            this.logger.info(`DocPixie: Added new task: ${t.name}`);
+          }
+          break;
+        }
+        case "remove_tasks": {
+          const toRemove = updateData.tasks_to_remove || [];
+          for (const taskId of toRemove) {
+            if (plan.removeTask(taskId)) {
+              this.logger.info(`DocPixie: Removed task: ${taskId}`);
+            }
+          }
+          break;
+        }
+        case "modify_tasks": {
+          const modifications = updateData.modified_tasks || [];
+          for (const mod of modifications) {
+            const task = plan.tasks.find((t) => t.id === mod.task_id && t.status === "pending");
+            if (task) {
+              const oldName = task.name;
+              task.name = mod.new_name || task.name;
+              task.description = mod.new_description || task.description;
+              if (mod.new_document) {
+                const doc = documents.find((d) => String(d.get("id")) === String(mod.new_document));
+                if (doc) {
+                  task.documentId = doc.get("id");
+                  task.documentName = doc.get("name");
+                }
+              }
+              this.logger.info(`DocPixie: Modified task '${oldName}' \u2192 '${task.name}'`);
+            }
+          }
+          break;
+        }
+      }
+    } catch (err) {
+      this.logger.warn("DocPixie: Failed to parse plan update response, continuing unchanged");
+    }
+    plan.currentIteration++;
+  }
+  /** Execute a single task: select pages → analyze. */
+  async executeSingleTask(task, query, strategy, conversationHistory) {
+    const selectedPageNumbers = await this.selectPages(
+      task.documentId,
+      task.description,
+      strategy,
+      this.config.maxPagesPerTask
+    );
+    const pageRepo = this.db.getRepository("docpixie_pages");
+    const pages = await pageRepo.find({
+      filter: {
+        documentId: task.documentId,
+        pageNumber: { $in: selectedPageNumbers }
+      },
+      sort: ["pageNumber"]
+    });
+    const analysis = await this.analyzePages(pages, task.description, strategy, conversationHistory);
+    const sourcePages = selectedPageNumbers.map((pn) => ({
+      documentId: task.documentId,
+      documentName: task.documentName,
+      pageNumber: pn
+    }));
+    return { analysis, sourcePages };
+  }
+  // ═══════════════════════════════════════════
+  // Response Synthesis
+  // ═══════════════════════════════════════════
+  async synthesizeResponse(query, analyses) {
+    if (analyses.length === 0) return "No relevant information found in the documents.";
+    if (analyses.length === 1) return analyses[0];
+    const resultsText = analyses.map((a, i) => `--- Analysis ${i + 1} ---
+${a}`).join("\n\n");
+    const prompt = (0, import_prompts.fillPrompt)(import_prompts.SYNTHESIS_PROMPT, {
+      original_query: query,
+      results_text: resultsText
+    });
+    return this.llmProvider.processTextMessages(
+      [
+        { role: "system", content: import_prompts.SYSTEM_SYNTHESIS },
+        { role: "user", content: prompt }
+      ],
+      2e3,
+      0.3
+    );
+  }
+  /** Build memory summary from conversation history (last 4 messages). */
+  buildMemorySummary(history) {
+    if (!history || history.length === 0) {
+      return "CONVERSATION CONTEXT: This is the first query in the conversation.";
+    }
+    const recent = history.length > 4 ? history.slice(-4) : history;
+    const parts = ["CONVERSATION CONTEXT:"];
+    for (const msg of recent) {
+      const role = msg.role === "user" ? "User" : "Assistant";
+      const content = msg.content.length > 100 ? msg.content.substring(0, 100) + "..." : msg.content;
+      parts.push(`- ${role}: ${content}`);
+    }
+    return parts.join("\n");
+  }
+  async loadQueryDocuments(documentIds) {
+    const repo = this.db.getRepository("docpixie_documents");
+    const filter = { status: "ready" };
+    if (documentIds && documentIds.length > 0) {
+      filter.id = { $in: documentIds };
+    }
+    return repo.find({ filter });
+  }
+  calculateConfidence(tasks) {
+    if (tasks.length === 0) return 0;
+    const completed = tasks.filter((t) => t.status === "completed").length;
+    return completed / tasks.length;
+  }
+  parsePageSelection(response, maxPages) {
+    try {
+      const cleaned = this.sanitizeLlmJson(response);
+      const parsed = JSON.parse(cleaned);
+      const pages = parsed.selected_pages || [];
+      return pages.slice(0, maxPages).sort((a, b) => a - b);
+    } catch {
+      this.logger.warn("DocPixie: Failed to parse page selection response");
+      return [1];
+    }
+  }
+  /** Sanitize LLM JSON response: strip markdown fences, trailing commas. */
+  sanitizeLlmJson(text) {
+    let cleaned = text.trim();
+    cleaned = cleaned.replace(/^```json?\s*/i, "").replace(/```\s*$/i, "").trim();
+    cleaned = cleaned.replace(/,\s*([}\]])/g, "$1");
+    return cleaned;
+  }
+  extractFileName(filePath) {
+    const parts = filePath.replace(/\\/g, "/").split("/");
+    const filename = parts[parts.length - 1];
+    return filename.replace(/\.[^.]+$/, "");
+  }
+  /**
+   * Create OCR provider adapter.
+   * OCR is optional — when all processing is delegated to LLM vision,
+   * this returns null (no OCR needed).
+   */
+  createOCRProvider(config) {
+    if (config.ocrProvider === "external_api" && config.ocrApiEndpoint) {
+      return {
+        name: "external_api",
+        async extractText(imagePath) {
+          const buffer = fs.readFileSync(imagePath);
+          const base64 = buffer.toString("base64");
+          const ext = path.extname(imagePath).toLowerCase();
+          const mime = MIME_TYPES[ext] || "image/jpeg";
+          const response = await fetch(config.ocrApiEndpoint, {
+            method: "POST",
+            headers: {
+              "Content-Type": "application/json",
+              ...config.ocrApiKey ? { Authorization: `Bearer ${config.ocrApiKey}` } : {}
+            },
+            body: JSON.stringify({
+              image: `data:${mime};base64,${base64}`
+            })
+          });
+          if (!response.ok) {
+            throw new import_exceptions.ProviderError(`OCR API returned ${response.status}`, "external_api");
+          }
+          const data = await response.json();
+          return data.text || data.result || "";
+        },
+        async extractStructured(imagePath) {
+          const text = await this.extractText(imagePath);
+          return {
+            pageNumber: 1,
+            structuredText: text,
+            regions: [],
+            imagePath,
+            hasTables: false,
+            hasFigures: false,
+            headings: [],
+            extractionMethod: "external_api"
+          };
+        },
+        async isAvailable() {
+          try {
+            const r = await fetch(config.ocrApiEndpoint, { method: "HEAD" });
+            return r.ok || r.status === 405;
+          } catch {
+            return false;
+          }
+        }
+      };
+    }
+    return null;
+  }
+  createEmptyResult(query, startTime, reason) {
+    return {
+      answer: reason,
+      sourcePages: [],
+      confidence: 0,
+      totalCost: 0,
+      processingTime: (Date.now() - startTime) / 1e3,
+      tasksSummary: []
+    };
+  }
+}
+// Annotate the CommonJS export names for ESM import in node:
+0 && (module.exports = {
+  DocPixieService
+});