agentic-api 2.0.684 → 2.0.885

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/dist/src/agents/prompts.d.ts +2 -3
  2. package/dist/src/agents/prompts.js +13 -109
  3. package/dist/src/agents/reducer.loaders.d.ts +46 -15
  4. package/dist/src/agents/reducer.loaders.js +76 -21
  5. package/dist/src/agents/reducer.types.d.ts +30 -3
  6. package/dist/src/agents/simulator.d.ts +3 -2
  7. package/dist/src/agents/simulator.executor.d.ts +8 -2
  8. package/dist/src/agents/simulator.executor.js +62 -26
  9. package/dist/src/agents/simulator.js +100 -11
  10. package/dist/src/agents/simulator.prompts.d.ts +48 -21
  11. package/dist/src/agents/simulator.prompts.js +289 -122
  12. package/dist/src/agents/simulator.types.d.ts +33 -1
  13. package/dist/src/agents/subagent.d.ts +128 -0
  14. package/dist/src/agents/subagent.js +231 -0
  15. package/dist/src/agents/worker.executor.d.ts +48 -0
  16. package/dist/src/agents/worker.executor.js +152 -0
  17. package/dist/src/execute/helpers.d.ts +3 -0
  18. package/dist/src/execute/helpers.js +221 -15
  19. package/dist/src/execute/responses.js +78 -51
  20. package/dist/src/execute/shared.d.ts +5 -0
  21. package/dist/src/execute/shared.js +27 -0
  22. package/dist/src/index.d.ts +2 -1
  23. package/dist/src/index.js +3 -1
  24. package/dist/src/llm/openai.js +8 -1
  25. package/dist/src/llm/pricing.js +2 -0
  26. package/dist/src/llm/xai.js +11 -6
  27. package/dist/src/prompts.d.ts +14 -0
  28. package/dist/src/prompts.js +41 -1
  29. package/dist/src/rag/rag.manager.d.ts +18 -3
  30. package/dist/src/rag/rag.manager.js +91 -5
  31. package/dist/src/rules/git/git.e2e.helper.js +3 -0
  32. package/dist/src/rules/git/git.health.js +88 -57
  33. package/dist/src/rules/git/index.d.ts +1 -1
  34. package/dist/src/rules/git/index.js +13 -5
  35. package/dist/src/rules/git/repo.d.ts +25 -6
  36. package/dist/src/rules/git/repo.js +430 -146
  37. package/dist/src/rules/git/repo.pr.js +45 -13
  38. package/dist/src/rules/git/repo.tools.d.ts +5 -0
  39. package/dist/src/rules/git/repo.tools.js +6 -1
  40. package/dist/src/rules/types.d.ts +0 -2
  41. package/dist/src/rules/utils.matter.js +1 -5
  42. package/dist/src/scrapper.d.ts +138 -25
  43. package/dist/src/scrapper.js +538 -160
  44. package/dist/src/stategraph/stategraph.d.ts +4 -0
  45. package/dist/src/stategraph/stategraph.js +16 -0
  46. package/dist/src/stategraph/types.d.ts +13 -1
  47. package/dist/src/types.d.ts +21 -0
  48. package/dist/src/utils.d.ts +24 -0
  49. package/dist/src/utils.js +84 -86
  50. package/package.json +3 -2
  51. package/dist/src/agents/semantic.d.ts +0 -4
  52. package/dist/src/agents/semantic.js +0 -19
  53. package/dist/src/execute/legacy.d.ts +0 -46
  54. package/dist/src/execute/legacy.js +0 -460
  55. package/dist/src/pricing.llm.d.ts +0 -5
  56. package/dist/src/pricing.llm.js +0 -14
@@ -50,6 +50,7 @@ const path_1 = require("path");
50
50
  const fs = __importStar(require("fs/promises"));
51
51
  const repo_tools_1 = require("./repo.tools");
52
52
  const repo_1 = require("./repo");
53
+ const utils_matter_1 = require("../utils.matter");
53
54
  /**
54
55
  * Synchronise une branche PR avec son mergeBase pour corriger les références orphelines
55
56
  *
@@ -154,9 +155,12 @@ async function gitSyncPR(git, branch, user) {
154
155
  throw new errors_1.GitOperationError(`Failed to sync PR ${branch} with merge base ${mergeBase}: ${mergeError}`, 'pr_sync', { branch, mergeBase, mergeError, fallbackError });
155
156
  }
156
157
  }
157
- // Préserver les fichiers originaux de la PR (ne pas recalculer après merge)
158
- // Note: Après un merge réussi, gitGetDiffFiles retournera [] car il n'y a plus de différences
159
- // Les fichiers de la PR sont une information historique qui doit être préservée
158
+ // Préserver les fichiers originaux de la PR (ne pas recalculer après merge).
159
+ // NOTE IMPORTANTE:
160
+ // - gitSyncPR fait un merge technique pour maintenir la branche de validation à jour.
161
+ // - Ce merge NE DOIT PAS redéfinir le scope métier de la PR.
162
+ // - metadata.files reste la source de vérité (pilotée par add/edit/rename/delete).
163
+ // - Un diff Git post-merge peut être ambigu (base mouvante, merge commit, refs).
160
164
  const updatedMetadata = {
161
165
  ...pr.metadata,
162
166
  files: pr.metadata.files, // Préserver les fichiers originaux
@@ -236,13 +240,33 @@ async function gitIsPRClosedRobust(git, branch, config) {
236
240
  async function gitGetPRMetadata(git, branch, config) {
237
241
  const gitConf = (0, repo_tools_1.gitLoad)(config);
238
242
  try {
239
- // D'abord essayer de chercher sur le commit HEAD de la branche
240
- let metadata = await (0, repo_tools_1.gitReadNote)(git, branch, gitConf.gitNotes.namespace, 20);
241
- // DEPRECATED: we now we always use the last 20 commits to find the metadata
242
- // if (!metadata) {
243
- // metadata = await gitReadNote(git, branch, gitConf.gitNotes.namespace, 20);
244
- // }
245
- return metadata;
243
+ const expectedPRId = Number.parseInt(branch.replace(gitConf.validationPrefix, ''), 10);
244
+ // D'abord essayer de chercher rapidement sur HEAD / historique récent.
245
+ const metadata = await (0, repo_tools_1.gitReadNote)(git, branch, gitConf.gitNotes.namespace, 20);
246
+ if (metadata) {
247
+ // Si ce n'est pas une branche de validation standard, on retourne tel quel.
248
+ if (Number.isNaN(expectedPRId)) {
249
+ return metadata;
250
+ }
251
+ // Branche de validation: rejeter une note dont l'ID ne correspond pas.
252
+ if (metadata.id === expectedPRId) {
253
+ return metadata;
254
+ }
255
+ if (gitConf.verbose) {
256
+ console.warn(`⚠️ gitGetPRMetadata(${branch}): note récente id=${metadata.id}, attendu=${expectedPRId}. Recherche approfondie...`);
257
+ }
258
+ }
259
+ // Fallback robuste: scanner plus large et ne retenir que la note du bon PR.
260
+ if (!Number.isNaN(expectedPRId)) {
261
+ const log = await git.log(['-n', '200', branch]);
262
+ for (const commit of log.all) {
263
+ const note = await (0, repo_tools_1.gitReadNote)(git, commit.hash, gitConf.gitNotes.namespace, 1);
264
+ if (note && note.id === expectedPRId) {
265
+ return note;
266
+ }
267
+ }
268
+ }
269
+ return null;
246
270
  }
247
271
  catch (error) {
248
272
  // This typically happens if the branch doesn't exist, which is a valid case.
@@ -323,12 +347,13 @@ async function gitGetClosedPRs(git, gitConfig) {
323
347
  }
324
348
  async function gitLoadPR(git, branch) {
325
349
  try {
350
+ const gitConf = (0, repo_tools_1.gitLoad)();
326
351
  // Load metadata from
327
352
  const metadata = await gitGetPRMetadata(git, branch);
328
353
  if (!metadata) {
329
354
  throw new errors_1.GitOperationError(`PR not found for branch ${branch}`, 'pr_load', { branch });
330
355
  }
331
- const files = metadata.files || (await (0, repo_tools_1.gitGetDiffFiles)(git, branch, metadata?.mergeBase, '.md'));
356
+ const files = metadata.files || (await (0, repo_tools_1.gitGetDiffFiles)(git, branch, metadata?.mergeBase || gitConf.draftBranch, '.md'));
332
357
  // Récupérer les infos du dernier commit de la branche
333
358
  const log = await git.log({ from: branch, to: branch, maxCount: 1 });
334
359
  const lastCommit = log.latest;
@@ -619,9 +644,16 @@ async function gitNewValidationRequest(git, files, description, author, options
619
644
  if (!content[idx]) {
620
645
  continue;
621
646
  }
622
- // console.log('writeFile',files[idx],content[idx]);
647
+ let fileContent = content[idx];
648
+ // Assurer l'ID documentaire lors de la création d'une PR avec contenus fournis.
649
+ if (gitConfig.withID !== false) {
650
+ const parsed = (0, utils_matter_1.matterParse)(fileContent);
651
+ parsed.matter = (0, repo_1.gitEnsureMatterID)(parsed.matter, gitConfig, newBranchName, files[idx]);
652
+ fileContent = (0, utils_matter_1.matterSerialize)(parsed.content, parsed.matter);
653
+ }
654
+ // console.log('writeFile',files[idx],fileContent);
623
655
  // This writeFile is in-memory and stages the file.
624
- await _writeFileAndCommit(git, files[idx], content[idx], author, gitConfig, initialCommitMessage);
656
+ await _writeFileAndCommit(git, files[idx], fileContent, author, gitConfig, initialCommitMessage);
625
657
  }
626
658
  const metadata = {
627
659
  id: nextID,
@@ -123,6 +123,11 @@ export declare function gitGetAllBranches(git: SimpleGit, options?: {
123
123
  * @param baseBranch Branche de base (par défaut: main)
124
124
  * @param filter Filtre optionnel (ex: '.md')
125
125
  * @returns Liste des fichiers modifiés
126
+ *
127
+ * NOTE IMPORTANTE:
128
+ * - Ce résultat est une vue TECHNIQUE du diff Git (commits/références), pas une vérité métier PR.
129
+ * - Ne pas utiliser seul pour reconstruire metadata.files d'une PR.
130
+ * - La source de vérité métier reste metadata.files maintenu par add/edit/rename/delete.
126
131
  */
127
132
  export declare function gitGetDiffFiles(git: SimpleGit, targetBranch: string, baseBranch?: string, filter?: string): Promise<string[]>;
128
133
  /**
@@ -129,7 +129,7 @@ function gitLoad(defaultConfig) {
129
129
  }
130
130
  // console.log(`🌶️ gitLoad: First Loading git config`,defaultConfig);
131
131
  const verbose = defaultConfig?.verbose || process.env.GIT_VERBOSE === 'true';
132
- const remoteUrl = defaultConfig?.remoteUrl || process.env.GIT_REMOTE_URL;
132
+ const remoteUrl = defaultConfig?.remoteUrl ?? process.env.GIT_REMOTE_URL;
133
133
  const repoPath = defaultConfig?.repoPath || process.env.GIT_REPO_PATH;
134
134
  const uploadPath = defaultConfig?.uploadPath || process.env.GIT_UPLOAD_PATH;
135
135
  const draftBranch = defaultConfig?.draftBranch || process.env.DEFAULT_BRANCH_DRAFT;
@@ -625,6 +625,11 @@ async function gitGetAllBranches(git, options = {}) {
625
625
  * @param baseBranch Branche de base (par défaut: main)
626
626
  * @param filter Filtre optionnel (ex: '.md')
627
627
  * @returns Liste des fichiers modifiés
628
+ *
629
+ * NOTE IMPORTANTE:
630
+ * - Ce résultat est une vue TECHNIQUE du diff Git (commits/références), pas une vérité métier PR.
631
+ * - Ne pas utiliser seul pour reconstruire metadata.files d'une PR.
632
+ * - La source de vérité métier reste metadata.files maintenu par add/edit/rename/delete.
628
633
  */
629
634
  async function gitGetDiffFiles(git, targetBranch, baseBranch, filter) {
630
635
  const gitConfig = gitLoad();
@@ -122,8 +122,6 @@ export interface FrontMatter {
122
122
  id?: number;
123
123
  /** Titre descriptif de la règle */
124
124
  title: string;
125
- /** FIXME(oldfile): champ legacy transitoire pour notification UI rename */
126
- oldfile?: string;
127
125
  /** Auteur original de la règle (format git: "Name <email>") */
128
126
  author?: string;
129
127
  /** Email du validateur assigné à cette règle */
@@ -121,13 +121,9 @@ function matterParse(markdown) {
121
121
  * ✅ NOUVELLE FONCTION pour reconstruire le contenu complet
122
122
  */
123
123
  function matterSerializeFromRule(rule) {
124
- // Créer un objet propre pour le front-matter (exclure oldfile)
125
- const matter = { ...rule.matter };
126
- delete matter.oldfile;
127
- return matterSerialize(rule.content, matter);
124
+ return matterSerialize(rule.content, rule.matter);
128
125
  }
129
126
  function matterSerialize(content, matter) {
130
- // Créer un objet propre pour le front-matter (exclure oldfile)
131
127
  const cleanMatter = { ...matter };
132
128
  const result = Object.keys(cleanMatter).reduce((acc, key) => {
133
129
  const value = cleanMatter[key];
@@ -1,48 +1,161 @@
1
1
  import { FrontMatter } from "./rules/types";
2
- export declare function extractCaptcha(base64Image: string, openai: any): Promise<{
3
- number: any;
4
- cost: number;
5
- }>;
2
+ /** Raw image data extracted from a PDF page. */
3
+ export interface PageImage {
4
+ /** Raw pixel buffer: RGBA, RGB, grayscale bytes, or JPEG-encoded bytes. */
5
+ data: Buffer;
6
+ /** Pixel format / encoding of the buffer content. */
7
+ type: 'jpeg' | 'rgb' | 'rgba' | 'grayscale';
8
+ width: number;
9
+ height: number;
10
+ }
11
+ /** Dimensions of a single GFM table detected on a page. */
12
+ export interface PageTable {
13
+ /** Number of data rows (header and separator lines excluded). */
14
+ rows: number;
15
+ /** Number of columns inferred from the header line. */
16
+ cols: number;
17
+ }
18
+ /** Structured representation of a single PDF page. */
19
+ export interface Page {
20
+ pageNumber: number;
21
+ /** Cleaned body text, with reconstructed GFM tables. Running headers/footers removed. */
22
+ text: string;
23
+ /**
24
+ * Running page header detected across ≥ 3 consecutive pages (e.g. chapter title,
25
+ * magazine section name). Undefined for the poppler engine or single-page PDFs.
26
+ */
27
+ header?: string;
28
+ /**
29
+ * Running page footer detected across ≥ 3 consecutive pages (e.g. folio number,
30
+ * document title). Undefined for the poppler engine or single-page PDFs.
31
+ */
32
+ footer?: string;
33
+ /**
34
+ * Dimensions of each GFM table found in `text`.
35
+ * Used by `callLLMForParsingPDF` to select an appropriate model:
36
+ * pages with wide (cols > 3) or long (rows > 10) tables are upgraded
37
+ * from `LOW-fast` to `MEDIUM-fast` automatically.
38
+ */
39
+ tables: PageTable[];
40
+ /** Images extracted from the page. Always empty for the poppler engine. */
41
+ images: PageImage[];
42
+ }
43
+ /** Extraction backend selection. */
44
+ export type PdftotextEngine = 'poppler' | 'mupdf';
6
45
  /**
7
- * Calls GPT to parse a PDF file and convert it to markdown format.
46
+ * Converts extracted PDF content to clean Markdown via LLM.
8
47
  *
9
- * @param {string} inputfile - The name of the PDF file being processed
10
- * @param {any} pdfData - The extracted content from the PDF file
11
- * @param {any[]} links - Optional array of links extracted from the PDF to be integrated into the markdown
12
- * @param {string} model - The model to use for parsing (default: "MEDIUM-fast")
13
- * @returns {Promise<{markdown: string, cost: number}>} - The parsed markdown content and the cost of the API call
48
+ * Two paths depending on the `pdfData` type:
49
+ *
50
+ * **`Page[]` (mupdf path)** `MapLLM.reduce`, one page per chunk.
51
+ * Each page is processed by `mupdfPagePrompt` (heading normalisation, broken-cell
52
+ * fusion, repeated-header removal). No frontmatter is added here; the caller
53
+ * (`pdf2markdown`) prepends the single YAML block.
54
+ *
55
+ * NOTE: `finalReduce` is intentionally disabled — it is reserved for a future
56
+ * "N-page light summary" feature where a second LLM pass synthesises the whole
57
+ * document into a shorter version.
58
+ *
59
+ * A raw `string` (e.g. from `html2markdown`) is automatically wrapped into a
60
+ * single `Page` so both callers share the exact same code path.
61
+ *
62
+ * @param inputfile - Original file path (used for logging only).
63
+ * @param pdfData - Either a `Page[]` array (mupdf) or a raw string.
64
+ * @param links - External links appended as `## Liens` footer (string path).
65
+ * @param model - LLM model alias (default: `'MEDIUM-fast'`).
14
66
  */
15
- export declare function callLLMForParsingPDF(inputfile: string, pdfData: any, links?: any[], model?: string): Promise<{
67
+ export declare function callLLMForParsingPDF(inputfile: string, pdfData: Page[] | string, links?: {
68
+ text: string;
69
+ href: string;
70
+ }[], model?: string): Promise<{
16
71
  markdown: string;
17
72
  cost: number;
18
73
  }>;
19
74
  /**
20
- * Parses an HTML file and converts it to markdown using GPT.
75
+ * Parses an HTML file and converts it to markdown using LLM.
21
76
  *
22
77
  * @param {string} output - The directory path where the output markdown file will be saved.
23
78
  * @param {string} file - The path to the HTML file to be parsed.
24
79
  * @param {string} service - The service name used as part of the output filename output.
25
80
  * @param {string} model - The model to use for parsing (default: "MEDIUM-fast")
26
- * @returns {Promise<{markdown: string, cost: number}>} - The generated markdown content and the cost of the GPT API call.
81
+ * @returns {Promise<{markdown: string, cost: number}>} - The generated markdown content and the cost of the API call.
27
82
  */
28
83
  export declare function html2markdown(output: string, file: string, service: string, model?: string): Promise<{
29
84
  markdown: string;
30
85
  cost: number;
31
86
  }>;
32
87
  /**
33
- * Parse un PDF en effectuant :
34
- * 1. Le nettoyage du PDF avec Ghostscript.
35
- * 2. Sa conversion en XML via pdftohtml.
36
- * 3. (Optionnellement) Le passage du contenu converti au modèle LLM pour analyser la structure.
37
- *
38
- * @param {string} outputDir - Dossier de sortie pour le fichier markdown.
39
- * @param {string} pdf - Chemin vers le fichier PDF à analyser.
40
- * @param {FrontMatter|null} matter - Métadonnées du document (title, service, author, role). Si null, utilise le nom du PDF pour le titre.
41
- * @param {string} model - Le modèle à utiliser (défaut: "MEDIUM-fast").
42
- * @returns {Promise<{markdown: string, cost: number, outputPath: string}>} - Le markdown structuré, le coût et le chemin du fichier de sortie.
88
+ * Extracts plain text from a PDF using the system `pdftotext` binary (poppler-utils).
89
+ *
90
+ * - Pages are delimited by form-feed (\f) characters in the binary's output.
91
+ * - Excessive blank lines are normalised (3+ 2).
92
+ * - Images are NOT extracted (always []).
93
+ *
94
+ * NOTE: Better alternative is `pdftotext_pdfjs` which uses Mozilla's PDF engine
95
+ * to extract text + images + links in a single Node.js-native pass, with better
96
+ * table reconstruction for complex layouts. See `pdftotext_pdfjs` for details.
97
+ *
98
+ * @param {string} pdfPath - Absolute path to the PDF file.
99
+ * @param {string} outputDir - Directory used for temporary files.
100
+ * @returns {Promise<Page[]>} One `Page` per PDF page, text-only.
101
+ */
102
+ export declare function pdftotext_poppler(pdfPath: string, outputDir: string): Promise<Page[]>;
103
+ /**
104
+ * Extracts text, reconstructed tables, links, and optionally page-raster images
105
+ * from a PDF using the **mupdf** npm package (WASM build of the MuPDF C library).
106
+ *
107
+ * Key advantages over the poppler engine:
108
+ * - `table-hunt` detects tables geometrically even in **untagged** PDFs.
109
+ * - `segment` splits the page into logical reading-order blocks.
110
+ * - Significantly faster than pdfjs for large documents.
111
+ * - No shell binary dependency (pure WASM, runs anywhere Node.js does).
112
+ *
113
+ * Images (opt-in via `withImages: true`): each page is rasterised at 1.5× scale
114
+ * (≈ 113 DPI). The `imageFormat` option controls encoding:
115
+ *
116
+ * | format | size/page (base64) | notes |
117
+ * |-------------|-------------------|--------------------------------|
118
+ * | `'rgb'` | ≈ 4.4 MB | raw RGB, lossless, large |
119
+ * | `'gray'` | ≈ 1.5 MB | raw grayscale, 3× smaller |
120
+ * | `'jpeg'` | ≈ 100–200 KB | JPEG quality 75, 31× smaller |
121
+ *
122
+ * Disabled by default because image data quickly exhausts stdout buffers for
123
+ * large documents. Use `jpeg` for production with vision models.
124
+ *
125
+ * NOTE: `mupdf` is ESM-only. Extraction is delegated to a standalone
126
+ * `mupdf-extract.mjs` worker spawned via `execAsync`, which avoids any
127
+ * ESM/CJS interoperability issues in the main process and under ts-jest.
128
+ *
129
+ * @param {string} pdfPath - Absolute path to the PDF file.
130
+ * @param {object} [options]
131
+ * @param {boolean} [options.withImages=false] - Rasterise each page.
132
+ * @param {'rgb'|'gray'|'jpeg'} [options.imageFormat='rgb'] - Pixel encoding.
133
+ * @returns {Promise<Page[]>} One `Page` per PDF page with text, GFM tables, and optional images.
43
134
  */
44
- export declare function pdf2markdown(outputDir: string, pdf: string, matter: FrontMatter | null, model?: string): Promise<{
135
+ export declare function pdftotext_mupdf(pdfPath: string, options?: {
136
+ withImages?: boolean;
137
+ imageFormat?: 'rgb' | 'gray' | 'jpeg';
138
+ }): Promise<Page[]>;
139
+ /**
140
+ * Converts a PDF to a structured Markdown file.
141
+ *
142
+ * Pipeline:
143
+ * 1. `pdftotext_mupdf` (or poppler) → `Page[]`
144
+ * 2. `callLLMForParsingPDF` — MapLLM.reduce, one page per chunk
145
+ * 3. Prepend a **single** YAML frontmatter block and write to `outputDir`.
146
+ *
147
+ * Model choice: `LOW-fast` is sufficient — mupdf output is already clean GFM;
148
+ * the LLM only normalises headings and removes repeated headers/footers.
149
+ * Use `MEDIUM-fast` for complex layouts that need heavier restructuring.
150
+ *
151
+ * @param outputDir - Directory for the output `.md` file.
152
+ * @param pdf - Absolute path to the PDF file.
153
+ * @param matter - Document metadata; defaults derived from filename.
154
+ * @param model - LLM model alias (default: `'LOW-fast'`).
155
+ * @param engine - Extraction backend (default: `'mupdf'`).
156
+ * @returns `{ markdown, outputPath }` — frontmatter-prefixed markdown and output path.
157
+ */
158
+ export declare function pdf2markdown(outputDir: string, pdf: string, matter: FrontMatter | null, model?: string, engine?: PdftotextEngine): Promise<{
45
159
  markdown: string;
46
- cost: number;
47
160
  outputPath: string;
48
161
  }>;