npm - agentic-api - Versions diffs - 2.0.684 → 2.0.885 - Mend

agentic-api 2.0.684 → 2.0.885

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

package/dist/src/agents/prompts.d.ts +2 -3
package/dist/src/agents/prompts.js +13 -109
package/dist/src/agents/reducer.loaders.d.ts +46 -15
package/dist/src/agents/reducer.loaders.js +76 -21
package/dist/src/agents/reducer.types.d.ts +30 -3
package/dist/src/agents/simulator.d.ts +3 -2
package/dist/src/agents/simulator.executor.d.ts +8 -2
package/dist/src/agents/simulator.executor.js +62 -26
package/dist/src/agents/simulator.js +100 -11
package/dist/src/agents/simulator.prompts.d.ts +48 -21
package/dist/src/agents/simulator.prompts.js +289 -122
package/dist/src/agents/simulator.types.d.ts +33 -1
package/dist/src/agents/subagent.d.ts +128 -0
package/dist/src/agents/subagent.js +231 -0
package/dist/src/agents/worker.executor.d.ts +48 -0
package/dist/src/agents/worker.executor.js +152 -0
package/dist/src/execute/helpers.d.ts +3 -0
package/dist/src/execute/helpers.js +221 -15
package/dist/src/execute/responses.js +78 -51
package/dist/src/execute/shared.d.ts +5 -0
package/dist/src/execute/shared.js +27 -0
package/dist/src/index.d.ts +2 -1
package/dist/src/index.js +3 -1
package/dist/src/llm/openai.js +8 -1
package/dist/src/llm/pricing.js +2 -0
package/dist/src/llm/xai.js +11 -6
package/dist/src/prompts.d.ts +14 -0
package/dist/src/prompts.js +41 -1
package/dist/src/rag/rag.manager.d.ts +18 -3
package/dist/src/rag/rag.manager.js +91 -5
package/dist/src/rules/git/git.e2e.helper.js +3 -0
package/dist/src/rules/git/git.health.js +88 -57
package/dist/src/rules/git/index.d.ts +1 -1
package/dist/src/rules/git/index.js +13 -5
package/dist/src/rules/git/repo.d.ts +25 -6
package/dist/src/rules/git/repo.js +430 -146
package/dist/src/rules/git/repo.pr.js +45 -13
package/dist/src/rules/git/repo.tools.d.ts +5 -0
package/dist/src/rules/git/repo.tools.js +6 -1
package/dist/src/rules/types.d.ts +0 -2
package/dist/src/rules/utils.matter.js +1 -5
package/dist/src/scrapper.d.ts +138 -25
package/dist/src/scrapper.js +538 -160
package/dist/src/stategraph/stategraph.d.ts +4 -0
package/dist/src/stategraph/stategraph.js +16 -0
package/dist/src/stategraph/types.d.ts +13 -1
package/dist/src/types.d.ts +21 -0
package/dist/src/utils.d.ts +24 -0
package/dist/src/utils.js +84 -86
package/package.json +3 -2
package/dist/src/agents/semantic.d.ts +0 -4
package/dist/src/agents/semantic.js +0 -19
package/dist/src/execute/legacy.d.ts +0 -46
package/dist/src/execute/legacy.js +0 -460
package/dist/src/pricing.llm.d.ts +0 -5
package/dist/src/pricing.llm.js +0 -14

package/dist/src/rules/git/repo.pr.js CHANGED Viewed

@@ -50,6 +50,7 @@ const path_1 = require("path");
 const fs = __importStar(require("fs/promises"));
 const repo_tools_1 = require("./repo.tools");
 const repo_1 = require("./repo");
+const utils_matter_1 = require("../utils.matter");
 /**
  * Synchronise une branche PR avec son mergeBase pour corriger les références orphelines
  *
@@ -154,9 +155,12 @@ async function gitSyncPR(git, branch, user) {
                 throw new errors_1.GitOperationError(`Failed to sync PR ${branch} with merge base ${mergeBase}: ${mergeError}`, 'pr_sync', { branch, mergeBase, mergeError, fallbackError });
             }
         }
-        // Préserver les fichiers originaux de la PR (ne pas recalculer après merge)
-        // Note: Après un merge réussi, gitGetDiffFiles retournera [] car il n'y a plus de différences
-        // Les fichiers de la PR sont une information historique qui doit être préservée
+        // Préserver les fichiers originaux de la PR (ne pas recalculer après merge).
+        // NOTE IMPORTANTE:
+        // - gitSyncPR fait un merge technique pour maintenir la branche de validation à jour.
+        // - Ce merge NE DOIT PAS redéfinir le scope métier de la PR.
+        // - metadata.files reste la source de vérité (pilotée par add/edit/rename/delete).
+        // - Un diff Git post-merge peut être ambigu (base mouvante, merge commit, refs).
         const updatedMetadata = {
             ...pr.metadata,
             files: pr.metadata.files, // Préserver les fichiers originaux
@@ -236,13 +240,33 @@ async function gitIsPRClosedRobust(git, branch, config) {
 async function gitGetPRMetadata(git, branch, config) {
     const gitConf = (0, repo_tools_1.gitLoad)(config);
     try {
-        // D'abord essayer de chercher sur le commit HEAD de la branche
-        let metadata = await (0, repo_tools_1.gitReadNote)(git, branch, gitConf.gitNotes.namespace, 20);
-        // DEPRECATED: we now we always use the last 20 commits to find the metadata
-        // if (!metadata) {
-        //   metadata = await gitReadNote(git, branch, gitConf.gitNotes.namespace, 20);
-        // }
-        return metadata;
+        const expectedPRId = Number.parseInt(branch.replace(gitConf.validationPrefix, ''), 10);
+        // D'abord essayer de chercher rapidement sur HEAD / historique récent.
+        const metadata = await (0, repo_tools_1.gitReadNote)(git, branch, gitConf.gitNotes.namespace, 20);
+        if (metadata) {
+            // Si ce n'est pas une branche de validation standard, on retourne tel quel.
+            if (Number.isNaN(expectedPRId)) {
+                return metadata;
+            }
+            // Branche de validation: rejeter une note dont l'ID ne correspond pas.
+            if (metadata.id === expectedPRId) {
+                return metadata;
+            }
+            if (gitConf.verbose) {
+                console.warn(`⚠️ gitGetPRMetadata(${branch}): note récente id=${metadata.id}, attendu=${expectedPRId}. Recherche approfondie...`);
+            }
+        }
+        // Fallback robuste: scanner plus large et ne retenir que la note du bon PR.
+        if (!Number.isNaN(expectedPRId)) {
+            const log = await git.log(['-n', '200', branch]);
+            for (const commit of log.all) {
+                const note = await (0, repo_tools_1.gitReadNote)(git, commit.hash, gitConf.gitNotes.namespace, 1);
+                if (note && note.id === expectedPRId) {
+                    return note;
+                }
+            }
+        }
+        return null;
     }
     catch (error) {
         // This typically happens if the branch doesn't exist, which is a valid case.
@@ -323,12 +347,13 @@ async function gitGetClosedPRs(git, gitConfig) {
 }
 async function gitLoadPR(git, branch) {
     try {
+        const gitConf = (0, repo_tools_1.gitLoad)();
         // Load metadata from
         const metadata = await gitGetPRMetadata(git, branch);
         if (!metadata) {
             throw new errors_1.GitOperationError(`PR not found for branch ${branch}`, 'pr_load', { branch });
         }
-        const files = metadata.files || (await (0, repo_tools_1.gitGetDiffFiles)(git, branch, metadata?.mergeBase, '.md'));
+        const files = metadata.files || (await (0, repo_tools_1.gitGetDiffFiles)(git, branch, metadata?.mergeBase || gitConf.draftBranch, '.md'));
         // Récupérer les infos du dernier commit de la branche
         const log = await git.log({ from: branch, to: branch, maxCount: 1 });
         const lastCommit = log.latest;
@@ -619,9 +644,16 @@ async function gitNewValidationRequest(git, files, description, author, options
                 if (!content[idx]) {
                     continue;
                 }
-                // console.log('writeFile',files[idx],content[idx]);
+                let fileContent = content[idx];
+                // Assurer l'ID documentaire lors de la création d'une PR avec contenus fournis.
+                if (gitConfig.withID !== false) {
+                    const parsed = (0, utils_matter_1.matterParse)(fileContent);
+                    parsed.matter = (0, repo_1.gitEnsureMatterID)(parsed.matter, gitConfig, newBranchName, files[idx]);
+                    fileContent = (0, utils_matter_1.matterSerialize)(parsed.content, parsed.matter);
+                }
+                // console.log('writeFile',files[idx],fileContent);
                 // This writeFile is in-memory and stages the file.
-                await _writeFileAndCommit(git, files[idx], content[idx], author, gitConfig, initialCommitMessage);
+                await _writeFileAndCommit(git, files[idx], fileContent, author, gitConfig, initialCommitMessage);
             }
             const metadata = {
                 id: nextID,

package/dist/src/rules/git/repo.tools.d.ts CHANGED Viewed

@@ -123,6 +123,11 @@ export declare function gitGetAllBranches(git: SimpleGit, options?: {
  * @param baseBranch Branche de base (par défaut: main)
  * @param filter Filtre optionnel (ex: '.md')
  * @returns Liste des fichiers modifiés
+ *
+ * NOTE IMPORTANTE:
+ * - Ce résultat est une vue TECHNIQUE du diff Git (commits/références), pas une vérité métier PR.
+ * - Ne pas utiliser seul pour reconstruire metadata.files d'une PR.
+ * - La source de vérité métier reste metadata.files maintenu par add/edit/rename/delete.
  */
 export declare function gitGetDiffFiles(git: SimpleGit, targetBranch: string, baseBranch?: string, filter?: string): Promise<string[]>;
 /**

package/dist/src/rules/git/repo.tools.js CHANGED Viewed

@@ -129,7 +129,7 @@ function gitLoad(defaultConfig) {
     }
     // console.log(`🌶️ gitLoad: First Loading git config`,defaultConfig);
     const verbose = defaultConfig?.verbose || process.env.GIT_VERBOSE === 'true';
-    const remoteUrl = defaultConfig?.remoteUrl || process.env.GIT_REMOTE_URL;
+    const remoteUrl = defaultConfig?.remoteUrl ?? process.env.GIT_REMOTE_URL;
     const repoPath = defaultConfig?.repoPath || process.env.GIT_REPO_PATH;
     const uploadPath = defaultConfig?.uploadPath || process.env.GIT_UPLOAD_PATH;
     const draftBranch = defaultConfig?.draftBranch || process.env.DEFAULT_BRANCH_DRAFT;
@@ -625,6 +625,11 @@ async function gitGetAllBranches(git, options = {}) {
  * @param baseBranch Branche de base (par défaut: main)
  * @param filter Filtre optionnel (ex: '.md')
  * @returns Liste des fichiers modifiés
+ *
+ * NOTE IMPORTANTE:
+ * - Ce résultat est une vue TECHNIQUE du diff Git (commits/références), pas une vérité métier PR.
+ * - Ne pas utiliser seul pour reconstruire metadata.files d'une PR.
+ * - La source de vérité métier reste metadata.files maintenu par add/edit/rename/delete.
  */
 async function gitGetDiffFiles(git, targetBranch, baseBranch, filter) {
     const gitConfig = gitLoad();

package/dist/src/rules/types.d.ts CHANGED Viewed

@@ -122,8 +122,6 @@ export interface FrontMatter {
     id?: number;
     /** Titre descriptif de la règle */
     title: string;
-    /** FIXME(oldfile): champ legacy transitoire pour notification UI rename */
-    oldfile?: string;
     /** Auteur original de la règle (format git: "Name <email>") */
     author?: string;
     /** Email du validateur assigné à cette règle */

package/dist/src/rules/utils.matter.js CHANGED Viewed

@@ -121,13 +121,9 @@ function matterParse(markdown) {
  * ✅ NOUVELLE FONCTION pour reconstruire le contenu complet
 */
 function matterSerializeFromRule(rule) {
-    // Créer un objet propre pour le front-matter (exclure oldfile)
-    const matter = { ...rule.matter };
-    delete matter.oldfile;
-    return matterSerialize(rule.content, matter);
+    return matterSerialize(rule.content, rule.matter);
 }
 function matterSerialize(content, matter) {
-    // Créer un objet propre pour le front-matter (exclure oldfile)
     const cleanMatter = { ...matter };
     const result = Object.keys(cleanMatter).reduce((acc, key) => {
         const value = cleanMatter[key];

package/dist/src/scrapper.d.ts CHANGED Viewed

@@ -1,48 +1,161 @@
 import { FrontMatter } from "./rules/types";
-export declare function extractCaptcha(base64Image: string, openai: any): Promise<{
-    number: any;
-    cost: number;
-}>;
+/** Raw image data extracted from a PDF page. */
+export interface PageImage {
+    /** Raw pixel buffer: RGBA, RGB, grayscale bytes, or JPEG-encoded bytes. */
+    data: Buffer;
+    /** Pixel format / encoding of the buffer content. */
+    type: 'jpeg' | 'rgb' | 'rgba' | 'grayscale';
+    width: number;
+    height: number;
+}
+/** Dimensions of a single GFM table detected on a page. */
+export interface PageTable {
+    /** Number of data rows (header and separator lines excluded). */
+    rows: number;
+    /** Number of columns inferred from the header line. */
+    cols: number;
+}
+/** Structured representation of a single PDF page. */
+export interface Page {
+    pageNumber: number;
+    /** Cleaned body text, with reconstructed GFM tables. Running headers/footers removed. */
+    text: string;
+    /**
+     * Running page header detected across ≥ 3 consecutive pages (e.g. chapter title,
+     * magazine section name). Undefined for the poppler engine or single-page PDFs.
+     */
+    header?: string;
+    /**
+     * Running page footer detected across ≥ 3 consecutive pages (e.g. folio number,
+     * document title). Undefined for the poppler engine or single-page PDFs.
+     */
+    footer?: string;
+    /**
+     * Dimensions of each GFM table found in `text`.
+     * Used by `callLLMForParsingPDF` to select an appropriate model:
+     * pages with wide (cols > 3) or long (rows > 10) tables are upgraded
+     * from `LOW-fast` to `MEDIUM-fast` automatically.
+     */
+    tables: PageTable[];
+    /** Images extracted from the page. Always empty for the poppler engine. */
+    images: PageImage[];
+}
+/** Extraction backend selection. */
+export type PdftotextEngine = 'poppler' | 'mupdf';
 /**
- * Calls GPT to parse a PDF file and convert it to markdown format.
+ * Converts extracted PDF content to clean Markdown via LLM.
  *
- * @param {string} inputfile - The name of the PDF file being processed
- * @param {any} pdfData - The extracted content from the PDF file
- * @param {any[]} links - Optional array of links extracted from the PDF to be integrated into the markdown
- * @param {string} model - The model to use for parsing (default: "MEDIUM-fast")
- * @returns {Promise<{markdown: string, cost: number}>} - The parsed markdown content and the cost of the API call
+ * Two paths depending on the `pdfData` type:
+ *
+ * **`Page[]` (mupdf path)** — `MapLLM.reduce`, one page per chunk.
+ * Each page is processed by `mupdfPagePrompt` (heading normalisation, broken-cell
+ * fusion, repeated-header removal). No frontmatter is added here; the caller
+ * (`pdf2markdown`) prepends the single YAML block.
+ *
+ * NOTE: `finalReduce` is intentionally disabled — it is reserved for a future
+ * "N-page light summary" feature where a second LLM pass synthesises the whole
+ * document into a shorter version.
+ *
+ * A raw `string` (e.g. from `html2markdown`) is automatically wrapped into a
+ * single `Page` so both callers share the exact same code path.
+ *
+ * @param inputfile - Original file path (used for logging only).
+ * @param pdfData   - Either a `Page[]` array (mupdf) or a raw string.
+ * @param links     - External links appended as `## Liens` footer (string path).
+ * @param model     - LLM model alias (default: `'MEDIUM-fast'`).
  */
-export declare function callLLMForParsingPDF(inputfile: string, pdfData: any, links?: any[], model?: string): Promise<{
+export declare function callLLMForParsingPDF(inputfile: string, pdfData: Page[] | string, links?: {
+    text: string;
+    href: string;
+}[], model?: string): Promise<{
     markdown: string;
     cost: number;
 }>;
 /**
- * Parses an HTML file and converts it to markdown using GPT.
+ * Parses an HTML file and converts it to markdown using LLM.
  *
  * @param {string} output - The directory path where the output markdown file will be saved.
  * @param {string} file - The path to the HTML file to be parsed.
  * @param {string} service - The service name used as part of the output filename output.
  * @param {string} model - The model to use for parsing (default: "MEDIUM-fast")
- * @returns {Promise<{markdown: string, cost: number}>} - The generated markdown content and the cost of the GPT API call.
+ * @returns {Promise<{markdown: string, cost: number}>} - The generated markdown content and the cost of the API call.
  */
 export declare function html2markdown(output: string, file: string, service: string, model?: string): Promise<{
     markdown: string;
     cost: number;
 }>;
 /**
- * Parse un PDF en effectuant :
- * 1. Le nettoyage du PDF avec Ghostscript.
- * 2. Sa conversion en XML via pdftohtml.
- * 3. (Optionnellement) Le passage du contenu converti au modèle LLM pour analyser la structure.
- *
- * @param {string} outputDir - Dossier de sortie pour le fichier markdown.
- * @param {string} pdf - Chemin vers le fichier PDF à analyser.
- * @param {FrontMatter|null} matter - Métadonnées du document (title, service, author, role). Si null, utilise le nom du PDF pour le titre.
- * @param {string} model - Le modèle à utiliser (défaut: "MEDIUM-fast").
- * @returns {Promise<{markdown: string, cost: number, outputPath: string}>} - Le markdown structuré, le coût et le chemin du fichier de sortie.
+ * Extracts plain text from a PDF using the system `pdftotext` binary (poppler-utils).
+ *
+ * - Pages are delimited by form-feed (\f) characters in the binary's output.
+ * - Excessive blank lines are normalised (3+ → 2).
+ * - Images are NOT extracted (always []).
+ *
+ * NOTE: Better alternative is `pdftotext_pdfjs` which uses Mozilla's PDF engine
+ *   to extract text + images + links in a single Node.js-native pass, with better
+ *   table reconstruction for complex layouts. See `pdftotext_pdfjs` for details.
+ *
+ * @param {string} pdfPath   - Absolute path to the PDF file.
+ * @param {string} outputDir - Directory used for temporary files.
+ * @returns {Promise<Page[]>} One `Page` per PDF page, text-only.
+ */
+export declare function pdftotext_poppler(pdfPath: string, outputDir: string): Promise<Page[]>;
+/**
+ * Extracts text, reconstructed tables, links, and optionally page-raster images
+ * from a PDF using the **mupdf** npm package (WASM build of the MuPDF C library).
+ *
+ * Key advantages over the poppler engine:
+ * - `table-hunt` detects tables geometrically even in **untagged** PDFs.
+ * - `segment` splits the page into logical reading-order blocks.
+ * - Significantly faster than pdfjs for large documents.
+ * - No shell binary dependency (pure WASM, runs anywhere Node.js does).
+ *
+ * Images (opt-in via `withImages: true`): each page is rasterised at 1.5× scale
+ * (≈ 113 DPI). The `imageFormat` option controls encoding:
+ *
+ * | format      | size/page (base64) | notes                          |
+ * |-------------|-------------------|--------------------------------|
+ * | `'rgb'`     | ≈ 4.4 MB          | raw RGB, lossless, large       |
+ * | `'gray'`    | ≈ 1.5 MB          | raw grayscale, 3× smaller      |
+ * | `'jpeg'`    | ≈ 100–200 KB      | JPEG quality 75, 31× smaller   |
+ *
+ * Disabled by default because image data quickly exhausts stdout buffers for
+ * large documents. Use `jpeg` for production with vision models.
+ *
+ * NOTE: `mupdf` is ESM-only. Extraction is delegated to a standalone
+ * `mupdf-extract.mjs` worker spawned via `execAsync`, which avoids any
+ * ESM/CJS interoperability issues in the main process and under ts-jest.
+ *
+ * @param {string}  pdfPath    - Absolute path to the PDF file.
+ * @param {object}  [options]
+ * @param {boolean} [options.withImages=false]     - Rasterise each page.
+ * @param {'rgb'|'gray'|'jpeg'} [options.imageFormat='rgb'] - Pixel encoding.
+ * @returns {Promise<Page[]>} One `Page` per PDF page with text, GFM tables, and optional images.
  */
-export declare function pdf2markdown(outputDir: string, pdf: string, matter: FrontMatter | null, model?: string): Promise<{
+export declare function pdftotext_mupdf(pdfPath: string, options?: {
+    withImages?: boolean;
+    imageFormat?: 'rgb' | 'gray' | 'jpeg';
+}): Promise<Page[]>;
+/**
+ * Converts a PDF to a structured Markdown file.
+ *
+ * Pipeline:
+ * 1. `pdftotext_mupdf` (or poppler) → `Page[]`
+ * 2. `callLLMForParsingPDF` — MapLLM.reduce, one page per chunk
+ * 3. Prepend a **single** YAML frontmatter block and write to `outputDir`.
+ *
+ * Model choice: `LOW-fast` is sufficient — mupdf output is already clean GFM;
+ * the LLM only normalises headings and removes repeated headers/footers.
+ * Use `MEDIUM-fast` for complex layouts that need heavier restructuring.
+ *
+ * @param outputDir - Directory for the output `.md` file.
+ * @param pdf       - Absolute path to the PDF file.
+ * @param matter    - Document metadata; defaults derived from filename.
+ * @param model     - LLM model alias (default: `'LOW-fast'`).
+ * @param engine    - Extraction backend (default: `'mupdf'`).
+ * @returns `{ markdown, outputPath }` — frontmatter-prefixed markdown and output path.
+ */
+export declare function pdf2markdown(outputDir: string, pdf: string, matter: FrontMatter | null, model?: string, engine?: PdftotextEngine): Promise<{
     markdown: string;
-    cost: number;
     outputPath: string;
 }>;