agentic-api 2.0.31 → 2.0.314
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/agents/agents.example.js +21 -22
- package/dist/src/agents/authentication.js +1 -2
- package/dist/src/agents/prompts.d.ts +5 -4
- package/dist/src/agents/prompts.js +42 -87
- package/dist/src/agents/reducer.core.d.ts +24 -2
- package/dist/src/agents/reducer.core.js +125 -35
- package/dist/src/agents/reducer.loaders.d.ts +55 -1
- package/dist/src/agents/reducer.loaders.js +114 -1
- package/dist/src/agents/reducer.types.d.ts +45 -2
- package/dist/src/agents/semantic.js +1 -2
- package/dist/src/agents/simulator.d.ts +4 -0
- package/dist/src/agents/simulator.executor.d.ts +5 -1
- package/dist/src/agents/simulator.executor.js +41 -9
- package/dist/src/agents/simulator.js +86 -28
- package/dist/src/agents/simulator.prompts.d.ts +3 -2
- package/dist/src/agents/simulator.prompts.js +52 -78
- package/dist/src/agents/simulator.types.d.ts +20 -5
- package/dist/src/agents/simulator.utils.d.ts +7 -2
- package/dist/src/agents/simulator.utils.js +33 -11
- package/dist/src/agents/system.js +1 -2
- package/dist/src/execute.d.ts +17 -3
- package/dist/src/execute.js +156 -158
- package/dist/src/index.d.ts +1 -1
- package/dist/src/index.js +1 -1
- package/dist/src/{princing.openai.d.ts → pricing.llm.d.ts} +6 -0
- package/dist/src/pricing.llm.js +234 -0
- package/dist/src/prompts.d.ts +13 -4
- package/dist/src/prompts.js +221 -114
- package/dist/src/rag/embeddings.d.ts +36 -18
- package/dist/src/rag/embeddings.js +125 -128
- package/dist/src/rag/index.d.ts +5 -5
- package/dist/src/rag/index.js +14 -17
- package/dist/src/rag/parser.d.ts +2 -1
- package/dist/src/rag/parser.js +11 -14
- package/dist/src/rag/rag.examples.d.ts +27 -0
- package/dist/src/rag/rag.examples.js +151 -0
- package/dist/src/rag/rag.manager.d.ts +383 -0
- package/dist/src/rag/rag.manager.js +1378 -0
- package/dist/src/rag/types.d.ts +128 -12
- package/dist/src/rag/types.js +100 -1
- package/dist/src/rag/usecase.d.ts +37 -0
- package/dist/src/rag/usecase.js +96 -7
- package/dist/src/rules/git/git.e2e.helper.js +1 -0
- package/dist/src/rules/git/git.health.d.ts +57 -0
- package/dist/src/rules/git/git.health.js +281 -1
- package/dist/src/rules/git/index.d.ts +2 -2
- package/dist/src/rules/git/index.js +12 -1
- package/dist/src/rules/git/repo.d.ts +117 -0
- package/dist/src/rules/git/repo.js +536 -0
- package/dist/src/rules/git/repo.tools.d.ts +22 -1
- package/dist/src/rules/git/repo.tools.js +50 -1
- package/dist/src/rules/types.d.ts +16 -14
- package/dist/src/rules/utils.matter.d.ts +0 -4
- package/dist/src/rules/utils.matter.js +26 -7
- package/dist/src/scrapper.d.ts +15 -22
- package/dist/src/scrapper.js +57 -110
- package/dist/src/stategraph/index.d.ts +1 -1
- package/dist/src/stategraph/stategraph.d.ts +31 -2
- package/dist/src/stategraph/stategraph.js +93 -6
- package/dist/src/stategraph/stategraph.storage.js +4 -0
- package/dist/src/stategraph/types.d.ts +22 -0
- package/dist/src/types.d.ts +4 -2
- package/dist/src/types.js +1 -1
- package/dist/src/usecase.d.ts +11 -2
- package/dist/src/usecase.js +27 -35
- package/dist/src/utils.d.ts +32 -18
- package/dist/src/utils.js +60 -126
- package/package.json +7 -2
- package/dist/src/agents/digestor.test.d.ts +0 -1
- package/dist/src/agents/digestor.test.js +0 -45
- package/dist/src/agents/reducer.example.d.ts +0 -28
- package/dist/src/agents/reducer.example.js +0 -118
- package/dist/src/agents/reducer.process.d.ts +0 -16
- package/dist/src/agents/reducer.process.js +0 -143
- package/dist/src/agents/reducer.tools.d.ts +0 -29
- package/dist/src/agents/reducer.tools.js +0 -157
- package/dist/src/agents/simpleExample.d.ts +0 -3
- package/dist/src/agents/simpleExample.js +0 -38
- package/dist/src/agents/system-review.d.ts +0 -5
- package/dist/src/agents/system-review.js +0 -181
- package/dist/src/agents/systemReview.d.ts +0 -4
- package/dist/src/agents/systemReview.js +0 -22
- package/dist/src/princing.openai.js +0 -54
- package/dist/src/rag/tools.d.ts +0 -76
- package/dist/src/rag/tools.js +0 -196
- package/dist/src/rules/user.mapper.d.ts +0 -61
- package/dist/src/rules/user.mapper.js +0 -160
- package/dist/src/rules/utils/slug.d.ts +0 -22
- package/dist/src/rules/utils/slug.js +0 -35
|
@@ -122,13 +122,10 @@ export interface FrontMatter {
|
|
|
122
122
|
id?: number;
|
|
123
123
|
/** Titre descriptif de la règle */
|
|
124
124
|
title: string;
|
|
125
|
+
/** Ancien nom de fichier (pour renommage) */
|
|
125
126
|
oldfile?: string;
|
|
126
|
-
/**
|
|
127
|
-
|
|
128
|
-
/** Version de la règle (optionnelle, format libre) */
|
|
129
|
-
version?: string;
|
|
130
|
-
/** Auteur original de la règle */
|
|
131
|
-
author?: RuleUser;
|
|
127
|
+
/** Auteur original de la règle (format git: "Name <email>") */
|
|
128
|
+
author?: string;
|
|
132
129
|
/** Email du validateur assigné à cette règle */
|
|
133
130
|
validator?: string;
|
|
134
131
|
/** Indique si la règle est finalisée et prête pour validation */
|
|
@@ -136,14 +133,8 @@ export interface FrontMatter {
|
|
|
136
133
|
/** Service ou département propriétaire de cette règle */
|
|
137
134
|
service?: string;
|
|
138
135
|
/** Type de contenu de ce document */
|
|
139
|
-
role?: "rule" | "template" | "rule-helper" | "web" | "document";
|
|
140
|
-
|
|
141
|
-
tags?: string[];
|
|
142
|
-
/** Date de dernière modification explicite */
|
|
143
|
-
lastModified?: Date;
|
|
144
|
-
/** Clé personnalisée pour données additionnelles */
|
|
145
|
-
custom?: string;
|
|
146
|
-
[key: string]: string | Date | number | string[] | RuleUser | boolean | undefined;
|
|
136
|
+
role?: "rule" | "template" | "rule-helper" | "web" | "document" | string;
|
|
137
|
+
[key: string]: string | Date | number | boolean | undefined;
|
|
147
138
|
}
|
|
148
139
|
/**
|
|
149
140
|
* Représente un élément de l'historique Git d'une règle
|
|
@@ -282,6 +273,17 @@ export interface RulesGitConfig {
|
|
|
282
273
|
remoteUrl?: string;
|
|
283
274
|
/** Active les logs verbeux pour debug */
|
|
284
275
|
verbose?: boolean;
|
|
276
|
+
/**
|
|
277
|
+
* Active la validation et génération automatique des IDs dans le matter
|
|
278
|
+
* @default false (pour compatibilité ascendante)
|
|
279
|
+
*/
|
|
280
|
+
withID?: boolean;
|
|
281
|
+
/**
|
|
282
|
+
* Mode strict : rejeter les documents sans ID valide
|
|
283
|
+
* Nécessite withID: true
|
|
284
|
+
* @default false (génère l'ID si manquant)
|
|
285
|
+
*/
|
|
286
|
+
strictID?: boolean;
|
|
285
287
|
}
|
|
286
288
|
/**
|
|
287
289
|
* Configuration des options de concurrence pour les opérations Git
|
|
@@ -38,7 +38,7 @@ function matterParse(markdown) {
|
|
|
38
38
|
// 1. Séparer les lignes
|
|
39
39
|
const lines = markdown.split(/\r?\n/);
|
|
40
40
|
if (lines[0].trim() !== '---') {
|
|
41
|
-
return { matter: {}, content: markdown }; // Pas de front-matter
|
|
41
|
+
return { matter: {}, content: markdown, data: {} }; // Pas de front-matter
|
|
42
42
|
}
|
|
43
43
|
// 2. Trouver la ligne de fermeture '---'
|
|
44
44
|
let end = 1;
|
|
@@ -53,6 +53,9 @@ function matterParse(markdown) {
|
|
|
53
53
|
const [rawKey, ...rawValue] = line.split(':');
|
|
54
54
|
const key = rawKey.trim();
|
|
55
55
|
const value = rawValue.join(':').trim();
|
|
56
|
+
// Ignorer les clés sans valeur
|
|
57
|
+
if (!value)
|
|
58
|
+
continue;
|
|
56
59
|
// Détection et parsing des arrays YAML
|
|
57
60
|
if (value.startsWith('[') && value.endsWith(']')) {
|
|
58
61
|
try {
|
|
@@ -86,10 +89,14 @@ function matterParse(markdown) {
|
|
|
86
89
|
}
|
|
87
90
|
// 4. parse slugs and tags as arrays
|
|
88
91
|
// Ensure slugs is always an array
|
|
89
|
-
//
|
|
92
|
+
// FIXME: this code is dead no more slugs in matter
|
|
90
93
|
if (matter.slugs && typeof matter.slugs === 'string') {
|
|
91
94
|
matter.slugs = matter.slugs.split(',').map((s) => s.trim()).filter((s) => s.length);
|
|
92
95
|
}
|
|
96
|
+
// Ensure id is always a number
|
|
97
|
+
if (matter.id && typeof matter.id === 'string') {
|
|
98
|
+
matter.id = Number(matter.id);
|
|
99
|
+
}
|
|
93
100
|
// Ensure tags is always an array
|
|
94
101
|
if (matter.tags && typeof matter.tags === 'string') {
|
|
95
102
|
const tagsStr = matter.tags;
|
|
@@ -115,21 +122,33 @@ function matterSerializeFromRule(rule) {
|
|
|
115
122
|
function matterSerialize(content, matter) {
|
|
116
123
|
// Créer un objet propre pour le front-matter (exclure oldfile)
|
|
117
124
|
const cleanMatter = { ...matter };
|
|
118
|
-
//
|
|
119
|
-
// FIX: Convertir la Date en string pour éviter l'erreur YAML
|
|
120
|
-
cleanMatter.lastModified = (cleanMatter.lastModified instanceof Date) ? `${new Date().toISOString()}` : `${cleanMatter.lastModified}`;
|
|
125
|
+
// //
|
|
126
|
+
// // FIX: Convertir la Date en string pour éviter l'erreur YAML
|
|
127
|
+
// cleanMatter.lastModified = (cleanMatter.lastModified instanceof Date) ? `${new Date().toISOString()}` : `${cleanMatter.lastModified}`;
|
|
121
128
|
const result = Object.keys(cleanMatter).reduce((acc, key) => {
|
|
122
129
|
const value = cleanMatter[key];
|
|
130
|
+
// Ignorer les valeurs undefined ou null
|
|
131
|
+
if (value === undefined || value === null) {
|
|
132
|
+
return acc;
|
|
133
|
+
}
|
|
123
134
|
if (Array.isArray(value)) {
|
|
124
135
|
return acc + `${key}: '${value.join(',')}'\n`;
|
|
125
136
|
}
|
|
137
|
+
else if (typeof value === 'number') {
|
|
138
|
+
// ✅ FIX: Ne PAS mettre de guillemets autour des nombres
|
|
139
|
+
return acc + `${key}: ${value}\n`;
|
|
140
|
+
}
|
|
141
|
+
else if (typeof value === 'boolean') {
|
|
142
|
+
// ✅ FIX: Ne PAS mettre de guillemets autour des booléens
|
|
143
|
+
return acc + `${key}: ${value}\n`;
|
|
144
|
+
}
|
|
126
145
|
else {
|
|
146
|
+
// Pour les strings, mettre des guillemets
|
|
127
147
|
return acc + `${key}: '${value}'\n`;
|
|
128
148
|
}
|
|
129
149
|
}, '');
|
|
130
150
|
return `---
|
|
131
|
-
${result}
|
|
132
|
-
---
|
|
151
|
+
${result}---
|
|
133
152
|
${content}`;
|
|
134
153
|
}
|
|
135
154
|
/**
|
package/dist/src/scrapper.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { FrontMatter } from "./rules/types";
|
|
1
2
|
export declare function extractCaptcha(base64Image: string, openai: any): Promise<{
|
|
2
3
|
number: any;
|
|
3
4
|
cost: number;
|
|
@@ -7,49 +8,41 @@ export declare function extractCaptcha(base64Image: string, openai: any): Promis
|
|
|
7
8
|
*
|
|
8
9
|
* @param {string} inputfile - The name of the PDF file being processed
|
|
9
10
|
* @param {any} pdfData - The extracted content from the PDF file
|
|
10
|
-
* @param {any} openai - The OpenAI client instance
|
|
11
11
|
* @param {any[]} links - Optional array of links extracted from the PDF to be integrated into the markdown
|
|
12
|
+
* @param {string} model - The model to use for parsing (default: "MEDIUM-fast")
|
|
12
13
|
* @returns {Promise<{markdown: string, cost: number}>} - The parsed markdown content and the cost of the API call
|
|
13
14
|
*/
|
|
14
|
-
export declare function
|
|
15
|
-
markdown:
|
|
15
|
+
export declare function callLLMForParsingPDF(inputfile: string, pdfData: any, links?: any[], model?: string): Promise<{
|
|
16
|
+
markdown: string;
|
|
16
17
|
cost: number;
|
|
17
18
|
}>;
|
|
18
|
-
/**
|
|
19
|
-
* Convertit un document HTML en markdown en appelant le modèle GPT (ex: gpt-4.1)
|
|
20
|
-
* pour analyser et reformater le document.
|
|
21
|
-
*
|
|
22
|
-
* @param {any} htmlData - Le document HTML à transformer en markdown.
|
|
23
|
-
* @param {any} openai - L'instance OpenAI configurée pour appeler l'API.
|
|
24
|
-
* @param {boolean} simple - Si true, utilise un prompt simplifié.
|
|
25
|
-
* @returns {Promise<Object>} - Le contenu markdown structuré créé par le modèle LLM.
|
|
26
|
-
*/
|
|
27
|
-
export declare function callGPTForParsingHTML(html: string, openai: any): Promise<any>;
|
|
28
19
|
/**
|
|
29
20
|
* Parses an HTML file and converts it to markdown using GPT.
|
|
30
21
|
*
|
|
31
22
|
* @param {string} output - The directory path where the output markdown file will be saved.
|
|
32
23
|
* @param {string} file - The path to the HTML file to be parsed.
|
|
33
24
|
* @param {string} service - The service name used as part of the output filename output.
|
|
34
|
-
* @param {
|
|
25
|
+
* @param {string} model - The model to use for parsing (default: "MEDIUM-fast")
|
|
35
26
|
* @returns {Promise<{markdown: string, cost: number}>} - The generated markdown content and the cost of the GPT API call.
|
|
36
27
|
*/
|
|
37
|
-
export declare function html2markdown(output: string, file: string, service: string,
|
|
38
|
-
markdown:
|
|
28
|
+
export declare function html2markdown(output: string, file: string, service: string, model?: string): Promise<{
|
|
29
|
+
markdown: string;
|
|
39
30
|
cost: number;
|
|
40
31
|
}>;
|
|
41
32
|
/**
|
|
42
33
|
* Parse un PDF en effectuant :
|
|
43
34
|
* 1. Le nettoyage du PDF avec Ghostscript.
|
|
44
35
|
* 2. Sa conversion en XML via pdftohtml.
|
|
45
|
-
* 3. (Optionnellement) Le passage du contenu converti au modèle
|
|
36
|
+
* 3. (Optionnellement) Le passage du contenu converti au modèle LLM pour analyser la structure.
|
|
46
37
|
*
|
|
47
|
-
* @param {string}
|
|
48
|
-
* @param {
|
|
49
|
-
* @
|
|
38
|
+
* @param {string} outputDir - Dossier de sortie pour le fichier markdown.
|
|
39
|
+
* @param {string} pdf - Chemin vers le fichier PDF à analyser.
|
|
40
|
+
* @param {FrontMatter|null} matter - Métadonnées du document (title, service, author, role). Si null, utilise le nom du PDF pour le titre.
|
|
41
|
+
* @param {string} model - Le modèle à utiliser (défaut: "MEDIUM-fast").
|
|
42
|
+
* @returns {Promise<{markdown: string, cost: number, outputPath: string}>} - Le markdown structuré, le coût et le chemin du fichier de sortie.
|
|
50
43
|
*/
|
|
51
|
-
export declare function pdf2markdown(
|
|
52
|
-
markdown:
|
|
44
|
+
export declare function pdf2markdown(outputDir: string, pdf: string, matter: FrontMatter | null, model?: string): Promise<{
|
|
45
|
+
markdown: string;
|
|
53
46
|
cost: number;
|
|
54
47
|
outputPath: string;
|
|
55
48
|
}>;
|
package/dist/src/scrapper.js
CHANGED
|
@@ -4,8 +4,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.extractCaptcha = extractCaptcha;
|
|
7
|
-
exports.
|
|
8
|
-
exports.callGPTForParsingHTML = callGPTForParsingHTML;
|
|
7
|
+
exports.callLLMForParsingPDF = callLLMForParsingPDF;
|
|
9
8
|
exports.html2markdown = html2markdown;
|
|
10
9
|
exports.pdf2markdown = pdf2markdown;
|
|
11
10
|
const child_process_1 = require("child_process");
|
|
@@ -14,9 +13,11 @@ const path_1 = __importDefault(require("path"));
|
|
|
14
13
|
const fs_1 = __importDefault(require("fs"));
|
|
15
14
|
const jsdom_1 = require("jsdom");
|
|
16
15
|
const readability_1 = require("@mozilla/readability");
|
|
17
|
-
const
|
|
16
|
+
const pricing_llm_1 = require("./pricing.llm");
|
|
18
17
|
const prompts_1 = require("./prompts");
|
|
19
18
|
const utils_1 = require("./utils");
|
|
19
|
+
const execute_1 = require("./execute");
|
|
20
|
+
const utils_matter_1 = require("./rules/utils.matter");
|
|
20
21
|
// Promisify exec for easier async/await usage
|
|
21
22
|
const execAsync = (0, util_1.promisify)(child_process_1.exec);
|
|
22
23
|
const execFileAsync = (0, util_1.promisify)(child_process_1.execFile);
|
|
@@ -36,7 +37,7 @@ async function extractCaptcha(base64Image, openai) {
|
|
|
36
37
|
messages: [{ role: "user", content }],
|
|
37
38
|
max_completion_tokens: 50,
|
|
38
39
|
});
|
|
39
|
-
const cost = (0,
|
|
40
|
+
const cost = (0, pricing_llm_1.calculateCost)(model, response.usage);
|
|
40
41
|
// Récupérer la réponse markdown
|
|
41
42
|
const number = response.choices[0].message.content;
|
|
42
43
|
return { number, cost };
|
|
@@ -46,11 +47,11 @@ async function extractCaptcha(base64Image, openai) {
|
|
|
46
47
|
*
|
|
47
48
|
* @param {string} inputfile - The name of the PDF file being processed
|
|
48
49
|
* @param {any} pdfData - The extracted content from the PDF file
|
|
49
|
-
* @param {any} openai - The OpenAI client instance
|
|
50
50
|
* @param {any[]} links - Optional array of links extracted from the PDF to be integrated into the markdown
|
|
51
|
+
* @param {string} model - The model to use for parsing (default: "MEDIUM-fast")
|
|
51
52
|
* @returns {Promise<{markdown: string, cost: number}>} - The parsed markdown content and the cost of the API call
|
|
52
53
|
*/
|
|
53
|
-
async function
|
|
54
|
+
async function callLLMForParsingPDF(inputfile, pdfData, links = [], model = "MEDIUM-fast") {
|
|
54
55
|
// Convertir le contenu en chaîne de caractères (attention à la taille potentielle !)
|
|
55
56
|
const pdfDataAsString = JSON.stringify(pdfData, null, 2);
|
|
56
57
|
// Format: YYYY-MM-DD
|
|
@@ -62,57 +63,25 @@ async function callGPTForParsingPDF(inputfile, pdfData, openai, links = []) {
|
|
|
62
63
|
// Créer le prompt pour décrire la tâche au LLM
|
|
63
64
|
const messages = [
|
|
64
65
|
{ role: "system",
|
|
65
|
-
content: prompts_1.htmlToMarkdownPrompt }
|
|
66
|
-
{ role: "user",
|
|
67
|
-
content: `Structure le contenu exhaustif en Markdown sans rien inventer, et avec les liens intégrés correctement.\n Nous sommes le ${today}.\n${linkPrefix}\nLe contenu du document est:\n${pdfDataAsString}`
|
|
68
|
-
}
|
|
66
|
+
content: prompts_1.htmlToMarkdownPrompt }
|
|
69
67
|
];
|
|
70
|
-
// console.log('🌶️ DEBUG:
|
|
71
|
-
// console.log('🌶️ DEBUG:
|
|
68
|
+
// console.log('🌶️ DEBUG: callLLMForParsingPDF -- SYSTEM:', messages[0].content);
|
|
69
|
+
// console.log('🌶️ DEBUG: callLLMForParsingPDF -- USER:', messages[1].content);
|
|
72
70
|
// WARNING: o3-mini is buggy with "Marche à suivre nouveau bail.pdf"
|
|
73
|
-
const
|
|
74
|
-
|
|
75
|
-
model
|
|
71
|
+
const response = await (0, execute_1.executeQuery)({
|
|
72
|
+
query: `Structure le contenu exhaustif en Markdown sans rien inventer, et avec les liens intégrés correctement.\n Nous sommes le ${today}.\n${linkPrefix}\nLe contenu du document est:\n${pdfDataAsString}`,
|
|
73
|
+
model,
|
|
76
74
|
messages,
|
|
75
|
+
stdout: execute_1.DummyWritable,
|
|
76
|
+
verbose: false
|
|
77
77
|
});
|
|
78
|
-
// response_format: { type: "json_object" }
|
|
79
|
-
let cost = (0, princing_openai_1.calculateCost)(model, response.usage);
|
|
80
|
-
// messages.push({
|
|
81
|
-
// role: "user",
|
|
82
|
-
// content: `Maintenant génère le contenu Markdown détaillé et exhaustif correspondant à chaque section avec les liens intégrés correctement.`
|
|
83
|
-
// });
|
|
84
|
-
// response = await openai.chat.completions.create({
|
|
85
|
-
// model: model,
|
|
86
|
-
// messages,
|
|
87
|
-
// max_completion_tokens: 15192,
|
|
88
|
-
// reasoning_effort:"low",
|
|
89
|
-
// stop:"|<|james|>|"
|
|
90
|
-
// });
|
|
91
78
|
// Récupérer la réponse markdown
|
|
92
|
-
|
|
93
|
-
console.log(`Markdown 💰 cost: ${cost}`);
|
|
79
|
+
const markdown = response.content;
|
|
80
|
+
console.log(`Markdown 💰 cost: ${response.usage.cost}`);
|
|
94
81
|
//
|
|
95
82
|
// add a regex to extract the markdown content between <thinking></thinking> tags
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
// messages.push({
|
|
99
|
-
// role: "user",
|
|
100
|
-
// content: hragPrompt
|
|
101
|
-
// });
|
|
102
|
-
// response = await openai.chat.completions.create({
|
|
103
|
-
// model: model,
|
|
104
|
-
// messages,
|
|
105
|
-
// max_completion_tokens: 15192,
|
|
106
|
-
// reasoning_effort:"low",
|
|
107
|
-
// stop:"|<|james|>|"
|
|
108
|
-
// });
|
|
109
|
-
// // Récupérer la réponse markdown
|
|
110
|
-
// markdown = response.choices[0].message.content;
|
|
111
|
-
// cost += calculateCost(model, response.usage);
|
|
112
|
-
// //
|
|
113
|
-
// // add a regex to extract the markdown content between <thinking></thinking> tags
|
|
114
|
-
// markdownWithoutThinking = markdown.replace(/<thinking>[\s\S]*?<\/thinking>/g, '');
|
|
115
|
-
return { markdown: markdownWithoutThinking, cost };
|
|
83
|
+
const markdownWithoutThinking = markdown.replace(/<thinking>[\s\S]*?<\/thinking>/g, '');
|
|
84
|
+
return { markdown: markdownWithoutThinking, cost: response.usage.cost };
|
|
116
85
|
}
|
|
117
86
|
/**
|
|
118
87
|
* Extracts hyperlinks from a PDF file by converting it to HTML and parsing the links.
|
|
@@ -156,47 +125,6 @@ async function extractLinksFromPDF(pdfPath, output) {
|
|
|
156
125
|
}
|
|
157
126
|
}
|
|
158
127
|
}
|
|
159
|
-
/**
|
|
160
|
-
* Convertit un document HTML en markdown en appelant le modèle GPT (ex: gpt-4.1)
|
|
161
|
-
* pour analyser et reformater le document.
|
|
162
|
-
*
|
|
163
|
-
* @param {any} htmlData - Le document HTML à transformer en markdown.
|
|
164
|
-
* @param {any} openai - L'instance OpenAI configurée pour appeler l'API.
|
|
165
|
-
* @param {boolean} simple - Si true, utilise un prompt simplifié.
|
|
166
|
-
* @returns {Promise<Object>} - Le contenu markdown structuré créé par le modèle LLM.
|
|
167
|
-
*/
|
|
168
|
-
async function callGPTForParsingHTML(html, openai) {
|
|
169
|
-
const htmlDataAsString = html;
|
|
170
|
-
// Créer le prompt pour décrire la tâche au LLM
|
|
171
|
-
const messages = [
|
|
172
|
-
{
|
|
173
|
-
role: "system",
|
|
174
|
-
content: prompts_1.htmlToMarkdownPrompt
|
|
175
|
-
},
|
|
176
|
-
{
|
|
177
|
-
role: "user",
|
|
178
|
-
content: `Voici le document HTML à transformer en markdown : \n${htmlDataAsString}`
|
|
179
|
-
}
|
|
180
|
-
];
|
|
181
|
-
// Appel à l'API ChatCompletion
|
|
182
|
-
const response = await openai.chat.completions.create({
|
|
183
|
-
model: "gpt-4.1",
|
|
184
|
-
messages,
|
|
185
|
-
max_completion_tokens: 15192,
|
|
186
|
-
temperature: 0,
|
|
187
|
-
frequency_penalty: 0.0,
|
|
188
|
-
presence_penalty: 0.0,
|
|
189
|
-
});
|
|
190
|
-
const cost = (0, princing_openai_1.calculateCost)("gpt-4.1", response.usage);
|
|
191
|
-
console.log(`Markdown 💰 cost: ${cost}`);
|
|
192
|
-
// Récupérer la réponse markdown
|
|
193
|
-
const markdown = response.choices[0].message.content;
|
|
194
|
-
if (!markdown)
|
|
195
|
-
throw new Error("No markdown found");
|
|
196
|
-
// Extraction et suppression des balises <thinking></thinking>
|
|
197
|
-
const markdownWithoutThinking = markdown.replace(/<thinking>[\s\S]*?<\/thinking>/g, '');
|
|
198
|
-
return markdownWithoutThinking;
|
|
199
|
-
}
|
|
200
128
|
function cleanHTML(html) {
|
|
201
129
|
const dom = new jsdom_1.JSDOM(html);
|
|
202
130
|
// Instancie Readability avec le document
|
|
@@ -210,15 +138,15 @@ function cleanHTML(html) {
|
|
|
210
138
|
* @param {string} output - The directory path where the output markdown file will be saved.
|
|
211
139
|
* @param {string} file - The path to the HTML file to be parsed.
|
|
212
140
|
* @param {string} service - The service name used as part of the output filename output.
|
|
213
|
-
* @param {
|
|
141
|
+
* @param {string} model - The model to use for parsing (default: "MEDIUM-fast")
|
|
214
142
|
* @returns {Promise<{markdown: string, cost: number}>} - The generated markdown content and the cost of the GPT API call.
|
|
215
143
|
*/
|
|
216
|
-
async function html2markdown(output, file, service,
|
|
144
|
+
async function html2markdown(output, file, service, model = "MEDIUM-fast") {
|
|
217
145
|
const filename = (0, utils_1.toSlug)(path_1.default.basename(file, path_1.default.extname(file)));
|
|
218
146
|
const htmlraw = fs_1.default.readFileSync(file, "utf8");
|
|
219
147
|
const html = cleanHTML(htmlraw);
|
|
220
148
|
const outputfile = html.indexOf('Please sign in') > -1 ? 'unauthorized-' : (service.toLocaleLowerCase() + '-');
|
|
221
|
-
const { markdown, cost } = await
|
|
149
|
+
const { markdown, cost } = await callLLMForParsingPDF(file, html, [], model);
|
|
222
150
|
fs_1.default.writeFileSync(path_1.default.join(output, `${outputfile + filename}.md`), markdown, { encoding: 'utf8', flag: 'w' });
|
|
223
151
|
return { markdown, cost };
|
|
224
152
|
}
|
|
@@ -226,32 +154,51 @@ async function html2markdown(output, file, service, openai) {
|
|
|
226
154
|
* Parse un PDF en effectuant :
|
|
227
155
|
* 1. Le nettoyage du PDF avec Ghostscript.
|
|
228
156
|
* 2. Sa conversion en XML via pdftohtml.
|
|
229
|
-
* 3. (Optionnellement) Le passage du contenu converti au modèle
|
|
157
|
+
* 3. (Optionnellement) Le passage du contenu converti au modèle LLM pour analyser la structure.
|
|
230
158
|
*
|
|
231
|
-
* @param {string}
|
|
232
|
-
* @param {
|
|
233
|
-
* @
|
|
159
|
+
* @param {string} outputDir - Dossier de sortie pour le fichier markdown.
|
|
160
|
+
* @param {string} pdf - Chemin vers le fichier PDF à analyser.
|
|
161
|
+
* @param {FrontMatter|null} matter - Métadonnées du document (title, service, author, role). Si null, utilise le nom du PDF pour le titre.
|
|
162
|
+
* @param {string} model - Le modèle à utiliser (défaut: "MEDIUM-fast").
|
|
163
|
+
* @returns {Promise<{markdown: string, cost: number, outputPath: string}>} - Le markdown structuré, le coût et le chemin du fichier de sortie.
|
|
234
164
|
*/
|
|
235
|
-
async function pdf2markdown(
|
|
236
|
-
|
|
165
|
+
async function pdf2markdown(outputDir, pdf, matter, model = "MEDIUM-fast") {
|
|
166
|
+
//
|
|
167
|
+
// Extract matter values with defaults
|
|
168
|
+
const service = matter?.service || 'unknown';
|
|
169
|
+
const title = matter?.title || path_1.default.basename(pdf, path_1.default.extname(pdf));
|
|
170
|
+
//
|
|
171
|
+
// Build complete FrontMatter with defaults
|
|
172
|
+
const frontMatter = {
|
|
173
|
+
title,
|
|
174
|
+
service,
|
|
175
|
+
author: matter?.author || '',
|
|
176
|
+
role: matter?.role || 'rule',
|
|
177
|
+
};
|
|
178
|
+
//
|
|
179
|
+
// Use title for filename
|
|
180
|
+
const filename = (0, utils_1.toSlug)(title);
|
|
237
181
|
// Créez des noms de fichiers temporaires pour le PDF nettoyé et le XML généré.
|
|
238
|
-
const tempPdf = path_1.default.join(
|
|
239
|
-
const tempOut = path_1.default.join(
|
|
182
|
+
const tempPdf = path_1.default.join(outputDir, `cleaned-${randomFile()}.pdf`);
|
|
183
|
+
const tempOut = path_1.default.join(outputDir, `${filename}.txt`);
|
|
240
184
|
//
|
|
241
185
|
// generated folder path
|
|
242
|
-
const outputPath = path_1.default.join(
|
|
186
|
+
const outputPath = path_1.default.join(outputDir, `${(0, utils_1.toSlug)(service.toLocaleLowerCase())}-${filename}.md`);
|
|
243
187
|
try {
|
|
244
188
|
//
|
|
245
189
|
// replace pdftotext with python script PyMuPDF
|
|
246
190
|
// Ca ne marche pas mieux que pdftotext
|
|
247
|
-
// const { stdout } = await execFileAsync("python3", ["./bin/extract_text_with_links.py",
|
|
191
|
+
// const { stdout } = await execFileAsync("python3", ["./bin/extract_text_with_links.py", pdf]);
|
|
248
192
|
// const { text, links } = JSON.parse(stdout);
|
|
249
|
-
await execAsync(`pdftotext -nodiag -nopgbrk "${
|
|
250
|
-
const links = await extractLinksFromPDF(
|
|
193
|
+
await execAsync(`pdftotext -nodiag -nopgbrk "${pdf}" "${outputPath}"`);
|
|
194
|
+
const links = await extractLinksFromPDF(pdf, outputDir);
|
|
251
195
|
const text = fs_1.default.readFileSync(outputPath, "utf8");
|
|
252
|
-
const { markdown, cost } = await
|
|
253
|
-
|
|
254
|
-
|
|
196
|
+
const { markdown, cost } = await callLLMForParsingPDF(pdf, text, links, model);
|
|
197
|
+
//
|
|
198
|
+
// Add frontmatter to the markdown before saving
|
|
199
|
+
const markdownWithMatter = (0, utils_matter_1.matterSerialize)(markdown, frontMatter);
|
|
200
|
+
fs_1.default.writeFileSync(outputPath, markdownWithMatter);
|
|
201
|
+
return { markdown: markdownWithMatter, cost, outputPath };
|
|
255
202
|
/**
|
|
256
203
|
|
|
257
204
|
// STEP 1: Clean the PDF using Ghostscript.
|
|
@@ -271,7 +218,7 @@ async function pdf2markdown(folder, file, service, openai) {
|
|
|
271
218
|
|
|
272
219
|
// (OPTIONNEL) STEP 3: Utilisez GPT pour analyser la structure du contenu XML.
|
|
273
220
|
// Vous pouvez adapter le traitement en fonction du contenu généré par pdftohtml.
|
|
274
|
-
const {markdown,cost} = await
|
|
221
|
+
const {markdown,cost} = await callLLMForParsingPDF(file, xmlContent, [], model);
|
|
275
222
|
fs.writeFileSync(outputPath, markdown);
|
|
276
223
|
return {markdown,cost}; */
|
|
277
224
|
}
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* @fileoverview Point d'entrée du module AgentStateGraph
|
|
3
3
|
* Exports principaux pour le système de gestion des discussions par agent
|
|
4
4
|
*/
|
|
5
|
-
export type { AgentMessage, TokenUsage, AgentDiscussion,
|
|
5
|
+
export type { AgentMessage, TokenUsage, AgentDiscussion, ClientDiscussion } from './types';
|
|
6
6
|
export { getSpecializedAgent } from './types';
|
|
7
7
|
export { AgentStateGraph } from './stategraph';
|
|
8
8
|
export { sessionStateGraphGet, sessionStateGraphSet, sessionStateGraphClear, sessionStateGraphExists, sessionStateGraphSize, migrateFromLegacySession } from './stategraph.storage';
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* @fileoverview Implémentation du système AgentStateGraph
|
|
3
3
|
* Classe principale pour gérer les discussions par agent
|
|
4
4
|
*/
|
|
5
|
-
import { AgentStateGraph as IAgentStateGraph, AgentDiscussion, AgentMessage, TokenUsage, ClientDiscussion } from './types';
|
|
5
|
+
import { AgentStateGraph as IAgentStateGraph, AgentDiscussion, AgentMessage, TokenUsage, ClientDiscussion, StepTrail } from './types';
|
|
6
6
|
/**
|
|
7
7
|
* Implémentation du StateGraph pour la gestion des discussions par agent
|
|
8
8
|
* Remplace AgenticMemoryManager avec une architecture simplifiée
|
|
@@ -24,10 +24,33 @@ export declare class AgentStateGraph implements IAgentStateGraph {
|
|
|
24
24
|
/**
|
|
25
25
|
* Écrase le message system de la discussion avec un nouvel agent
|
|
26
26
|
* Le message system est toujours messages[0] avec role: "system"
|
|
27
|
-
* @param agentName Nom de l'agent
|
|
27
|
+
* @param agentName Nom de l'agent (clé de discussion, reste fixe)
|
|
28
28
|
* @param content Nouveau contenu du message system
|
|
29
29
|
*/
|
|
30
30
|
set(agentName: string, content: string): void;
|
|
31
|
+
/**
|
|
32
|
+
* Ajoute une étape au CONTEXT TRAIL et met à jour le message system
|
|
33
|
+
* @param agentName Nom de l'agent
|
|
34
|
+
* @param step Étape à ajouter au trail
|
|
35
|
+
*/
|
|
36
|
+
addStep(agentName: string, step: StepTrail): void;
|
|
37
|
+
/**
|
|
38
|
+
* Retourne tous les steps du CONTEXT TRAIL pour une discussion
|
|
39
|
+
* @param agentName Nom de l'agent
|
|
40
|
+
* @returns Liste des steps enregistrés pour cette discussion
|
|
41
|
+
*/
|
|
42
|
+
steps(agentName: string): StepTrail[];
|
|
43
|
+
/**
|
|
44
|
+
* Formate le CONTEXT TRAIL d'une discussion pour injection dans system instructions
|
|
45
|
+
* @param discussion Discussion à formater
|
|
46
|
+
* @returns Trail formaté ou message par défaut
|
|
47
|
+
*/
|
|
48
|
+
private formatTrailPrompt;
|
|
49
|
+
/**
|
|
50
|
+
* Met à jour le message system avec le nouveau trail via regexp
|
|
51
|
+
* @param agentName Nom de l'agent
|
|
52
|
+
*/
|
|
53
|
+
private updateSystemMessage;
|
|
31
54
|
/**
|
|
32
55
|
* Additionne l'usage des tokens pour un agent
|
|
33
56
|
* @param agentName Nom de l'agent
|
|
@@ -78,6 +101,12 @@ export declare class AgentStateGraph implements IAgentStateGraph {
|
|
|
78
101
|
* @returns Nom de l'agent spécialisé
|
|
79
102
|
*/
|
|
80
103
|
getSpecializedAgent(agentName: string): string;
|
|
104
|
+
/**
|
|
105
|
+
* Charge une discussion en remplaçant l'existante ou en créant une nouvelle
|
|
106
|
+
* Recherche par id en priorité, sinon par startAgent
|
|
107
|
+
* @param discussion Discussion à charger
|
|
108
|
+
*/
|
|
109
|
+
load(discussion: AgentDiscussion): void;
|
|
81
110
|
/**
|
|
82
111
|
* Sérialise le StateGraph en JSON
|
|
83
112
|
* @returns Représentation JSON
|