@odda-ai/matching-core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +34 -0
- package/src/ai/AIProvider.ts +159 -0
- package/src/ai/adapters/AnthropicAdapter.ts +42 -0
- package/src/ai/adapters/OllamaAdapter.ts +42 -0
- package/src/ai/adapters/OpenAIAdapter.ts +53 -0
- package/src/ai/adapters/index.ts +3 -0
- package/src/ai/factory.ts +48 -0
- package/src/ai/index.ts +5 -0
- package/src/ai/registry.ts +15 -0
- package/src/ai/types.ts +59 -0
- package/src/cv-parser/PDFParserService.ts +160 -0
- package/src/cv-parser/index.ts +2 -0
- package/src/cv-parser/types.ts +58 -0
- package/src/features/ai-cv-resume.service.ts +104 -0
- package/src/features/ai-talent.service.ts +49 -0
- package/src/features/cv-chunking.service.ts +510 -0
- package/src/features/index.ts +5 -0
- package/src/features/job-matcher.service.ts +41 -0
- package/src/features/prompts.ts +621 -0
- package/src/features/system-messages.ts +28 -0
- package/src/features/types.ts +55 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import { AIProvider } from "../ai/index.js";
|
|
2
|
+
import {
|
|
3
|
+
pdfParser,
|
|
4
|
+
type PDFExtractionOptions,
|
|
5
|
+
} from "../cv-parser/index.js";
|
|
6
|
+
import { prompts } from "./prompts.js";
|
|
7
|
+
import { systemMessages } from "./system-messages.js";
|
|
8
|
+
import { Seniority, type CvAnalysisResponse } from "./types.js";
|
|
9
|
+
|
|
10
|
+
export type AnalyzeResumeOptions = { pdfoptions?: PDFExtractionOptions };
|
|
11
|
+
|
|
12
|
+
export class AiCvResumeService {
|
|
13
|
+
private pdfParser = pdfParser;
|
|
14
|
+
private ai: AIProvider;
|
|
15
|
+
|
|
16
|
+
constructor(ai: AIProvider) {
|
|
17
|
+
this.ai = ai;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
async analyzeResume(pdfBuffer: Buffer, options?: AnalyzeResumeOptions) {
|
|
21
|
+
const pdftext = await this.pdfParser.extractText(
|
|
22
|
+
pdfBuffer,
|
|
23
|
+
options?.pdfoptions,
|
|
24
|
+
);
|
|
25
|
+
const result = await this.ai.prompt(
|
|
26
|
+
prompts.cvAnalysis(pdftext.text),
|
|
27
|
+
systemMessages.analyzeResume(),
|
|
28
|
+
);
|
|
29
|
+
return this.parseAiResponse(result.content);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
private parseAiResponse(aiResponse: string): CvAnalysisResponse {
|
|
33
|
+
try {
|
|
34
|
+
// Remove markdown code blocks if present
|
|
35
|
+
let jsonStr = aiResponse.trim();
|
|
36
|
+
|
|
37
|
+
// Try to extract JSON from markdown
|
|
38
|
+
const jsonMatch = jsonStr.match(/```json\s*([\s\S]*?)```/);
|
|
39
|
+
if (jsonMatch && jsonMatch[1]) {
|
|
40
|
+
jsonStr = jsonMatch[1];
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Remove any leading/trailing non-JSON content
|
|
44
|
+
const jsonStart = jsonStr.indexOf("{");
|
|
45
|
+
const jsonEnd = jsonStr.lastIndexOf("}");
|
|
46
|
+
|
|
47
|
+
if (jsonStart === -1 || jsonEnd === -1) {
|
|
48
|
+
throw new Error("No valid JSON object found in AI response");
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
jsonStr = jsonStr.substring(jsonStart, jsonEnd + 1);
|
|
52
|
+
|
|
53
|
+
// Parse JSON
|
|
54
|
+
const parsed = JSON.parse(jsonStr);
|
|
55
|
+
|
|
56
|
+
// Map to DTO structure
|
|
57
|
+
const analysis: CvAnalysisResponse = {
|
|
58
|
+
personalInfo: {
|
|
59
|
+
firstName: parsed.personalInfo?.firstName || "N/A",
|
|
60
|
+
lastName: parsed.personalInfo?.lastName || "N/A",
|
|
61
|
+
email: parsed.personalInfo?.email || undefined,
|
|
62
|
+
phone: parsed.personalInfo?.phone || undefined,
|
|
63
|
+
address: parsed.personalInfo?.address || undefined,
|
|
64
|
+
dateOfBirth: parsed.personalInfo?.dateOfBirth || undefined,
|
|
65
|
+
nationality: parsed.personalInfo?.nationality || undefined,
|
|
66
|
+
linkedIn: parsed.personalInfo?.linkedIn || undefined,
|
|
67
|
+
github: parsed.personalInfo?.github || undefined,
|
|
68
|
+
website: parsed.personalInfo?.website || undefined,
|
|
69
|
+
},
|
|
70
|
+
description: parsed.description || "",
|
|
71
|
+
technicalSkills: (parsed.technicalSkills || []).map((skill: any) => ({
|
|
72
|
+
name: skill.name,
|
|
73
|
+
proficiency: Number(skill.proficiency),
|
|
74
|
+
isInferred: Boolean(skill.isInferred),
|
|
75
|
+
seniority: this.validateSeniority(skill.seniority),
|
|
76
|
+
})),
|
|
77
|
+
workExperienceSummary: parsed.workExperienceSummary || "",
|
|
78
|
+
certifications: (parsed.certifications || []).map((cert: any) => ({
|
|
79
|
+
name: cert.name,
|
|
80
|
+
issuer: cert.issuer || undefined,
|
|
81
|
+
year: cert.year ? Number(cert.year) : undefined,
|
|
82
|
+
})),
|
|
83
|
+
overallSeniority: this.validateSeniority(parsed.overallSeniority),
|
|
84
|
+
yearsOfExperience: Number(parsed.yearsOfExperience) || 0,
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
return analysis;
|
|
88
|
+
} catch (error: any) {
|
|
89
|
+
throw new Error(
|
|
90
|
+
`Failed to parse AI response into structured format: ${error.message}`,
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
private validateSeniority(value: string): Seniority {
|
|
96
|
+
const upperValue = value?.toUpperCase();
|
|
97
|
+
|
|
98
|
+
if (Object.values(Seniority).includes(upperValue as Seniority)) {
|
|
99
|
+
return upperValue as Seniority;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return Seniority.MID;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { AIProvider } from "../ai/index.js";
|
|
2
|
+
import {
|
|
3
|
+
AiCvResumeService,
|
|
4
|
+
type AnalyzeResumeOptions,
|
|
5
|
+
} from "./ai-cv-resume.service.js";
|
|
6
|
+
import {
|
|
7
|
+
type ChunkingOptions,
|
|
8
|
+
CvChunkingService,
|
|
9
|
+
} from "./cv-chunking.service.js";
|
|
10
|
+
import { JobMatcherService } from "./job-matcher.service.js";
|
|
11
|
+
import type { CvAnalysisResponse } from "./types.js";
|
|
12
|
+
|
|
13
|
+
export class AiTalentService {
|
|
14
|
+
private ai: AIProvider;
|
|
15
|
+
private aiCvResumeService: AiCvResumeService;
|
|
16
|
+
private cvChunkingService: CvChunkingService;
|
|
17
|
+
private jobMatcherService: JobMatcherService;
|
|
18
|
+
|
|
19
|
+
constructor(ai: AIProvider) {
|
|
20
|
+
this.ai = ai;
|
|
21
|
+
this.aiCvResumeService = new AiCvResumeService(this.ai);
|
|
22
|
+
this.cvChunkingService = new CvChunkingService();
|
|
23
|
+
this.jobMatcherService = new JobMatcherService(this.ai);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
public cv = {
|
|
27
|
+
analyzeCvResume: async (pdf: Buffer, options?: AnalyzeResumeOptions) => {
|
|
28
|
+
return this.aiCvResumeService.analyzeResume(pdf, options);
|
|
29
|
+
},
|
|
30
|
+
chunkCvAnalysis: async (
|
|
31
|
+
cvAnalysis: CvAnalysisResponse,
|
|
32
|
+
options?: ChunkingOptions,
|
|
33
|
+
) => {
|
|
34
|
+
return this.cvChunkingService.chunkCvAnalysis(cvAnalysis, options);
|
|
35
|
+
},
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
public jobs = {
|
|
39
|
+
getSkillsFromJobDescription: async (
|
|
40
|
+
jobDescription: string,
|
|
41
|
+
skillsList: string[],
|
|
42
|
+
) => {
|
|
43
|
+
return this.jobMatcherService.getJobSkillsFromDescription(
|
|
44
|
+
jobDescription,
|
|
45
|
+
skillsList,
|
|
46
|
+
);
|
|
47
|
+
},
|
|
48
|
+
};
|
|
49
|
+
}
|
|
@@ -0,0 +1,510 @@
|
|
|
1
|
+
import type { CvAnalysisResponse } from './types.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Opzioni per il chunking del CV
|
|
5
|
+
*/
|
|
6
|
+
export interface ChunkingOptions {
|
|
7
|
+
/**
|
|
8
|
+
* Dimensione massima di ciascun chunk in caratteri
|
|
9
|
+
* Default: 500
|
|
10
|
+
*/
|
|
11
|
+
maxChunkSize?: number;
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Sovrapposizione tra chunk in caratteri per mantenere il contesto
|
|
15
|
+
* Default: 50
|
|
16
|
+
*/
|
|
17
|
+
overlap?: number;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Se true, include metadati nel chunk
|
|
21
|
+
* Default: true
|
|
22
|
+
*/
|
|
23
|
+
includeMetadata?: boolean;
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Formato del chunk
|
|
27
|
+
* Default: 'text'
|
|
28
|
+
*/
|
|
29
|
+
format?: 'text' | 'json' | 'markdown';
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Rappresenta un singolo chunk di testo con metadati
|
|
34
|
+
*/
|
|
35
|
+
export interface CvChunk {
|
|
36
|
+
/**
|
|
37
|
+
* ID univoco del chunk
|
|
38
|
+
*/
|
|
39
|
+
id: string;
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Contenuto testuale del chunk
|
|
43
|
+
*/
|
|
44
|
+
content: string;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Tipo di sezione del CV
|
|
48
|
+
*/
|
|
49
|
+
section: 'personal' | 'description' | 'skills' | 'experience' | 'certifications' | 'summary';
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Metadati del chunk per facilitare il retrieval
|
|
53
|
+
*/
|
|
54
|
+
metadata: {
|
|
55
|
+
/**
|
|
56
|
+
* Indice del chunk nella sequenza
|
|
57
|
+
*/
|
|
58
|
+
index: number;
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Numero totale di chunk
|
|
62
|
+
*/
|
|
63
|
+
totalChunks: number;
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Parole chiave estratte dal chunk
|
|
67
|
+
*/
|
|
68
|
+
keywords: string[];
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Seniority associata (se applicabile)
|
|
72
|
+
*/
|
|
73
|
+
seniority?: string;
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Anni di esperienza (se applicabile)
|
|
77
|
+
*/
|
|
78
|
+
yearsOfExperience?: number;
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Risultato del processo di chunking
|
|
84
|
+
*/
|
|
85
|
+
export interface ChunkingResult {
|
|
86
|
+
/**
|
|
87
|
+
* Array di chunk generati
|
|
88
|
+
*/
|
|
89
|
+
chunks: CvChunk[];
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Statistiche sul chunking
|
|
93
|
+
*/
|
|
94
|
+
stats: {
|
|
95
|
+
totalChunks: number;
|
|
96
|
+
averageChunkSize: number;
|
|
97
|
+
totalCharacters: number;
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Servizio per convertire CvAnalysisResponse in chunk di testo
|
|
103
|
+
* ottimizzati per embedding e retrieval
|
|
104
|
+
*/
|
|
105
|
+
export class CvChunkingService {
|
|
106
|
+
private defaultOptions: Required<ChunkingOptions> = {
|
|
107
|
+
maxChunkSize: 500,
|
|
108
|
+
overlap: 50,
|
|
109
|
+
includeMetadata: true,
|
|
110
|
+
format: 'text'
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Converte un CvAnalysisResponse in chunk ottimizzati per embedding
|
|
115
|
+
*/
|
|
116
|
+
async chunkCvAnalysis(
|
|
117
|
+
cvAnalysis: CvAnalysisResponse,
|
|
118
|
+
options?: ChunkingOptions
|
|
119
|
+
): Promise<ChunkingResult> {
|
|
120
|
+
const opts = { ...this.defaultOptions, ...options };
|
|
121
|
+
const chunks: CvChunk[] = [];
|
|
122
|
+
let chunkIndex = 0;
|
|
123
|
+
|
|
124
|
+
// 1. Chunk delle informazioni personali
|
|
125
|
+
const personalChunk = this.createPersonalInfoChunk(cvAnalysis, chunkIndex++);
|
|
126
|
+
if (personalChunk) chunks.push(personalChunk);
|
|
127
|
+
|
|
128
|
+
// 2. Chunk della descrizione
|
|
129
|
+
const descriptionChunks = this.chunkText(
|
|
130
|
+
cvAnalysis.description,
|
|
131
|
+
'description',
|
|
132
|
+
chunkIndex,
|
|
133
|
+
opts,
|
|
134
|
+
{ seniority: cvAnalysis.overallSeniority }
|
|
135
|
+
);
|
|
136
|
+
chunks.push(...descriptionChunks);
|
|
137
|
+
chunkIndex += descriptionChunks.length;
|
|
138
|
+
|
|
139
|
+
// 3. Chunk delle skill tecniche (raggruppate per seniority)
|
|
140
|
+
const skillChunks = this.chunkSkills(cvAnalysis, chunkIndex, opts);
|
|
141
|
+
chunks.push(...skillChunks);
|
|
142
|
+
chunkIndex += skillChunks.length;
|
|
143
|
+
|
|
144
|
+
// 4. Chunk dell'esperienza lavorativa
|
|
145
|
+
const experienceChunks = this.chunkText(
|
|
146
|
+
cvAnalysis.workExperienceSummary,
|
|
147
|
+
'experience',
|
|
148
|
+
chunkIndex,
|
|
149
|
+
opts,
|
|
150
|
+
{
|
|
151
|
+
seniority: cvAnalysis.overallSeniority,
|
|
152
|
+
yearsOfExperience: cvAnalysis.yearsOfExperience
|
|
153
|
+
}
|
|
154
|
+
);
|
|
155
|
+
chunks.push(...experienceChunks);
|
|
156
|
+
chunkIndex += experienceChunks.length;
|
|
157
|
+
|
|
158
|
+
// 5. Chunk delle certificazioni
|
|
159
|
+
const certificationChunks = this.chunkCertifications(cvAnalysis, chunkIndex, opts);
|
|
160
|
+
chunks.push(...certificationChunks);
|
|
161
|
+
chunkIndex += certificationChunks.length;
|
|
162
|
+
|
|
163
|
+
// 6. Chunk di summary generale
|
|
164
|
+
const summaryChunk = this.createSummaryChunk(cvAnalysis, chunkIndex++);
|
|
165
|
+
chunks.push(summaryChunk);
|
|
166
|
+
|
|
167
|
+
// Aggiorna il totalChunks in tutti i chunk
|
|
168
|
+
const totalChunks = chunks.length;
|
|
169
|
+
chunks.forEach(chunk => {
|
|
170
|
+
chunk.metadata.totalChunks = totalChunks;
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
return {
|
|
174
|
+
chunks,
|
|
175
|
+
stats: this.calculateStats(chunks)
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Crea un chunk con le informazioni personali
|
|
181
|
+
*/
|
|
182
|
+
private createPersonalInfoChunk(
|
|
183
|
+
cvAnalysis: CvAnalysisResponse,
|
|
184
|
+
index: number
|
|
185
|
+
): CvChunk | null {
|
|
186
|
+
const info = cvAnalysis.personalInfo;
|
|
187
|
+
const parts: string[] = [];
|
|
188
|
+
|
|
189
|
+
if (info.firstName || info.lastName) {
|
|
190
|
+
parts.push(`Nome: ${[info.firstName, info.lastName].filter(Boolean).join(' ')}`);
|
|
191
|
+
}
|
|
192
|
+
if (info.email) parts.push(`Email: ${info.email}`);
|
|
193
|
+
if (info.phone) parts.push(`Telefono: ${info.phone}`);
|
|
194
|
+
if (info.address) parts.push(`Indirizzo: ${info.address}`);
|
|
195
|
+
if (info.linkedIn) parts.push(`LinkedIn: ${info.linkedIn}`);
|
|
196
|
+
if (info.github) parts.push(`GitHub: ${info.github}`);
|
|
197
|
+
if (info.website) parts.push(`Sito web: ${info.website}`);
|
|
198
|
+
if (info.nationality) parts.push(`Nazionalità: ${info.nationality}`);
|
|
199
|
+
|
|
200
|
+
if (parts.length === 0) return null;
|
|
201
|
+
|
|
202
|
+
const content = parts.join('\n');
|
|
203
|
+
const keywords = this.extractKeywords(content);
|
|
204
|
+
|
|
205
|
+
return {
|
|
206
|
+
id: `chunk-${index}`,
|
|
207
|
+
content,
|
|
208
|
+
section: 'personal',
|
|
209
|
+
metadata: {
|
|
210
|
+
index,
|
|
211
|
+
totalChunks: 0, // Verrà aggiornato dopo
|
|
212
|
+
keywords
|
|
213
|
+
}
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Crea chunk per le skill tecniche
|
|
219
|
+
*/
|
|
220
|
+
private chunkSkills(
|
|
221
|
+
cvAnalysis: CvAnalysisResponse,
|
|
222
|
+
startIndex: number,
|
|
223
|
+
options: Required<ChunkingOptions>
|
|
224
|
+
): CvChunk[] {
|
|
225
|
+
const chunks: CvChunk[] = [];
|
|
226
|
+
const skillsBySeniority = new Map<string, typeof cvAnalysis.technicalSkills>();
|
|
227
|
+
|
|
228
|
+
// Raggruppa skill per seniority
|
|
229
|
+
cvAnalysis.technicalSkills.forEach(skill => {
|
|
230
|
+
const key = skill.seniority;
|
|
231
|
+
if (!skillsBySeniority.has(key)) {
|
|
232
|
+
skillsBySeniority.set(key, []);
|
|
233
|
+
}
|
|
234
|
+
skillsBySeniority.get(key)!.push(skill);
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
let index = startIndex;
|
|
238
|
+
for (const [seniority, skills] of skillsBySeniority) {
|
|
239
|
+
const skillTexts = skills.map(s =>
|
|
240
|
+
`${s.name} (proficiency: ${s.proficiency}/10${s.isInferred ? ', inferred' : ''})`
|
|
241
|
+
);
|
|
242
|
+
|
|
243
|
+
const content = `Skills ${seniority}:\n${skillTexts.join('\n')}`;
|
|
244
|
+
const keywords = skills.map(s => s.name.toLowerCase());
|
|
245
|
+
|
|
246
|
+
chunks.push({
|
|
247
|
+
id: `chunk-${index}`,
|
|
248
|
+
content,
|
|
249
|
+
section: 'skills',
|
|
250
|
+
metadata: {
|
|
251
|
+
index,
|
|
252
|
+
totalChunks: 0,
|
|
253
|
+
keywords,
|
|
254
|
+
seniority
|
|
255
|
+
}
|
|
256
|
+
});
|
|
257
|
+
index++;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return chunks;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Crea chunk per le certificazioni
|
|
265
|
+
*/
|
|
266
|
+
private chunkCertifications(
|
|
267
|
+
cvAnalysis: CvAnalysisResponse,
|
|
268
|
+
startIndex: number,
|
|
269
|
+
options: Required<ChunkingOptions>
|
|
270
|
+
): CvChunk[] {
|
|
271
|
+
if (cvAnalysis.certifications.length === 0) return [];
|
|
272
|
+
|
|
273
|
+
const chunks: CvChunk[] = [];
|
|
274
|
+
let currentChunk: string[] = [];
|
|
275
|
+
let currentSize = 0;
|
|
276
|
+
let index = startIndex;
|
|
277
|
+
|
|
278
|
+
cvAnalysis.certifications.forEach(cert => {
|
|
279
|
+
const certText = `${cert.name}${cert.issuer ? ` - ${cert.issuer}` : ''}${cert.year ? ` (${cert.year})` : ''}`;
|
|
280
|
+
|
|
281
|
+
if (currentSize + certText.length > options.maxChunkSize && currentChunk.length > 0) {
|
|
282
|
+
// Crea il chunk corrente
|
|
283
|
+
const content = `Certificazioni:\n${currentChunk.join('\n')}`;
|
|
284
|
+
chunks.push({
|
|
285
|
+
id: `chunk-${index}`,
|
|
286
|
+
content,
|
|
287
|
+
section: 'certifications',
|
|
288
|
+
metadata: {
|
|
289
|
+
index,
|
|
290
|
+
totalChunks: 0,
|
|
291
|
+
keywords: this.extractKeywords(content)
|
|
292
|
+
}
|
|
293
|
+
});
|
|
294
|
+
index++;
|
|
295
|
+
currentChunk = [];
|
|
296
|
+
currentSize = 0;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
currentChunk.push(certText);
|
|
300
|
+
currentSize += certText.length;
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
// Aggiungi l'ultimo chunk
|
|
304
|
+
if (currentChunk.length > 0) {
|
|
305
|
+
const content = `Certificazioni:\n${currentChunk.join('\n')}`;
|
|
306
|
+
chunks.push({
|
|
307
|
+
id: `chunk-${index}`,
|
|
308
|
+
content,
|
|
309
|
+
section: 'certifications',
|
|
310
|
+
metadata: {
|
|
311
|
+
index,
|
|
312
|
+
totalChunks: 0,
|
|
313
|
+
keywords: this.extractKeywords(content)
|
|
314
|
+
}
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
return chunks;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Crea un chunk di summary generale
|
|
323
|
+
*/
|
|
324
|
+
private createSummaryChunk(
|
|
325
|
+
cvAnalysis: CvAnalysisResponse,
|
|
326
|
+
index: number
|
|
327
|
+
): CvChunk {
|
|
328
|
+
const topSkills = cvAnalysis.technicalSkills
|
|
329
|
+
.sort((a, b) => b.proficiency - a.proficiency)
|
|
330
|
+
.slice(0, 5)
|
|
331
|
+
.map(s => s.name)
|
|
332
|
+
.join(', ');
|
|
333
|
+
|
|
334
|
+
const content = [
|
|
335
|
+
`PROFILO PROFESSIONALE`,
|
|
336
|
+
`Seniority: ${cvAnalysis.overallSeniority}`,
|
|
337
|
+
`Anni di esperienza: ${cvAnalysis.yearsOfExperience}`,
|
|
338
|
+
`Top skills: ${topSkills}`,
|
|
339
|
+
`Numero certificazioni: ${cvAnalysis.certifications.length}`
|
|
340
|
+
].join('\n');
|
|
341
|
+
|
|
342
|
+
return {
|
|
343
|
+
id: `chunk-${index}`,
|
|
344
|
+
content,
|
|
345
|
+
section: 'summary',
|
|
346
|
+
metadata: {
|
|
347
|
+
index,
|
|
348
|
+
totalChunks: 0,
|
|
349
|
+
keywords: [...topSkills.toLowerCase().split(', '), cvAnalysis.overallSeniority.toLowerCase()],
|
|
350
|
+
seniority: cvAnalysis.overallSeniority,
|
|
351
|
+
yearsOfExperience: cvAnalysis.yearsOfExperience
|
|
352
|
+
}
|
|
353
|
+
};
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
/**
|
|
357
|
+
* Suddivide un testo lungo in chunk con overlap
|
|
358
|
+
*/
|
|
359
|
+
private chunkText(
|
|
360
|
+
text: string,
|
|
361
|
+
section: CvChunk['section'],
|
|
362
|
+
startIndex: number,
|
|
363
|
+
options: Required<ChunkingOptions>,
|
|
364
|
+
additionalMetadata?: { seniority?: string; yearsOfExperience?: number }
|
|
365
|
+
): CvChunk[] {
|
|
366
|
+
if (!text || text.trim().length === 0) return [];
|
|
367
|
+
|
|
368
|
+
const chunks: CvChunk[] = [];
|
|
369
|
+
const words = text.split(/\s+/);
|
|
370
|
+
let currentChunk: string[] = [];
|
|
371
|
+
let currentSize = 0;
|
|
372
|
+
let index = startIndex;
|
|
373
|
+
|
|
374
|
+
for (let i = 0; i < words.length; i++) {
|
|
375
|
+
const word = words[i]!;
|
|
376
|
+
const wordSize = word.length + 1; // +1 per lo spazio
|
|
377
|
+
|
|
378
|
+
if (currentSize + wordSize > options.maxChunkSize && currentChunk.length > 0) {
|
|
379
|
+
// Salva il chunk corrente
|
|
380
|
+
const content = currentChunk.join(' ');
|
|
381
|
+
chunks.push({
|
|
382
|
+
id: `chunk-${index}`,
|
|
383
|
+
content,
|
|
384
|
+
section,
|
|
385
|
+
metadata: {
|
|
386
|
+
index,
|
|
387
|
+
totalChunks: 0,
|
|
388
|
+
keywords: this.extractKeywords(content),
|
|
389
|
+
...additionalMetadata
|
|
390
|
+
}
|
|
391
|
+
});
|
|
392
|
+
index++;
|
|
393
|
+
|
|
394
|
+
// Crea overlap prendendo le ultime parole
|
|
395
|
+
const overlapWords = Math.floor(options.overlap / 10); // Approssimazione
|
|
396
|
+
currentChunk = currentChunk.slice(-overlapWords);
|
|
397
|
+
currentSize = currentChunk.join(' ').length;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
currentChunk.push(word);
|
|
401
|
+
currentSize += wordSize;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
// Aggiungi l'ultimo chunk
|
|
405
|
+
if (currentChunk.length > 0) {
|
|
406
|
+
const content = currentChunk.join(' ');
|
|
407
|
+
chunks.push({
|
|
408
|
+
id: `chunk-${index}`,
|
|
409
|
+
content,
|
|
410
|
+
section,
|
|
411
|
+
metadata: {
|
|
412
|
+
index,
|
|
413
|
+
totalChunks: 0,
|
|
414
|
+
keywords: this.extractKeywords(content),
|
|
415
|
+
...additionalMetadata
|
|
416
|
+
}
|
|
417
|
+
});
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
return chunks;
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
/**
|
|
424
|
+
* Estrae parole chiave da un testo
|
|
425
|
+
*/
|
|
426
|
+
private extractKeywords(text: string): string[] {
|
|
427
|
+
// Rimuovi parole comuni e estrai parole significative
|
|
428
|
+
const stopWords = new Set([
|
|
429
|
+
'il', 'la', 'di', 'da', 'in', 'con', 'per', 'a', 'e', 'o',
|
|
430
|
+
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for'
|
|
431
|
+
]);
|
|
432
|
+
|
|
433
|
+
const words = text.toLowerCase()
|
|
434
|
+
.replace(/[^\w\s]/g, ' ')
|
|
435
|
+
.split(/\s+/)
|
|
436
|
+
.filter(word => word.length > 3 && !stopWords.has(word));
|
|
437
|
+
|
|
438
|
+
// Conta frequenza e prendi le top 5
|
|
439
|
+
const frequency = new Map<string, number>();
|
|
440
|
+
words.forEach(word => {
|
|
441
|
+
frequency.set(word, (frequency.get(word) || 0) + 1);
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
return Array.from(frequency.entries())
|
|
445
|
+
.sort((a, b) => b[1] - a[1])
|
|
446
|
+
.slice(0, 5)
|
|
447
|
+
.map(([word]) => word);
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
/**
|
|
451
|
+
* Calcola statistiche sui chunk generati
|
|
452
|
+
*/
|
|
453
|
+
private calculateStats(chunks: CvChunk[]): ChunkingResult['stats'] {
|
|
454
|
+
const totalChunks = chunks.length;
|
|
455
|
+
const totalCharacters = chunks.reduce((sum, chunk) => sum + chunk.content.length, 0);
|
|
456
|
+
const averageChunkSize = totalChunks > 0 ? Math.round(totalCharacters / totalChunks) : 0;
|
|
457
|
+
|
|
458
|
+
return {
|
|
459
|
+
totalChunks,
|
|
460
|
+
averageChunkSize,
|
|
461
|
+
totalCharacters
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
/**
|
|
466
|
+
* Converte i chunk in formato JSON per embedding
|
|
467
|
+
*/
|
|
468
|
+
toEmbeddingFormat(chunks: CvChunk[]): Array<{ id: string; text: string; metadata: any }> {
|
|
469
|
+
return chunks.map(chunk => ({
|
|
470
|
+
id: chunk.id,
|
|
471
|
+
text: chunk.content,
|
|
472
|
+
metadata: {
|
|
473
|
+
section: chunk.section,
|
|
474
|
+
...chunk.metadata
|
|
475
|
+
}
|
|
476
|
+
}));
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
/**
|
|
480
|
+
* Cerca chunk rilevanti per una query
|
|
481
|
+
*/
|
|
482
|
+
searchChunks(chunks: CvChunk[], query: string, limit: number = 5): CvChunk[] {
|
|
483
|
+
const queryKeywords = this.extractKeywords(query.toLowerCase());
|
|
484
|
+
|
|
485
|
+
// Score basato su keyword match
|
|
486
|
+
const scored = chunks.map(chunk => {
|
|
487
|
+
const matchScore = queryKeywords.reduce((score, keyword) => {
|
|
488
|
+
if (chunk.content.toLowerCase().includes(keyword)) {
|
|
489
|
+
return score + 1;
|
|
490
|
+
}
|
|
491
|
+
if (chunk.metadata.keywords.some(k => k.includes(keyword))) {
|
|
492
|
+
return score + 0.5;
|
|
493
|
+
}
|
|
494
|
+
return score;
|
|
495
|
+
}, 0);
|
|
496
|
+
|
|
497
|
+
return { chunk, score: matchScore };
|
|
498
|
+
});
|
|
499
|
+
|
|
500
|
+
return scored
|
|
501
|
+
.sort((a, b) => b.score - a.score)
|
|
502
|
+
.slice(0, limit)
|
|
503
|
+
.map(({ chunk }) => chunk);
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
/**
|
|
508
|
+
* Istanza singleton del servizio
|
|
509
|
+
*/
|
|
510
|
+
export const cvChunkingService = new CvChunkingService();
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export { AiTalentService } from "./ai-talent.service.js";
|
|
2
|
+
export type { AnalyzeResumeOptions } from "./ai-cv-resume.service.js";
|
|
3
|
+
export { CvChunkingService, cvChunkingService } from "./cv-chunking.service.js";
|
|
4
|
+
export type { ChunkingOptions, CvChunk, ChunkingResult } from "./cv-chunking.service.js";
|
|
5
|
+
export * from "./types.js";
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { AIProvider } from "../ai/index.js";
|
|
2
|
+
import { prompts } from "./prompts.js";
|
|
3
|
+
import { systemMessages } from "./system-messages.js";
|
|
4
|
+
import { JobSkillType } from "./types.js";
|
|
5
|
+
|
|
6
|
+
export type SkillMatch = {
|
|
7
|
+
skill: string;
|
|
8
|
+
multiplier: number; // valore compreso tra 0.00 e 1.00
|
|
9
|
+
jobSkillType: JobSkillType;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
export class JobMatcherService {
|
|
13
|
+
private ai: AIProvider;
|
|
14
|
+
|
|
15
|
+
constructor(ai: AIProvider) {
|
|
16
|
+
this.ai = ai;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
async getJobSkillsFromDescription(
|
|
20
|
+
jobDescription: string,
|
|
21
|
+
skillsList: string[],
|
|
22
|
+
) {
|
|
23
|
+
const result = await this.ai.prompt(
|
|
24
|
+
prompts.jobMatching(jobDescription, skillsList),
|
|
25
|
+
systemMessages.jobMatching(),
|
|
26
|
+
);
|
|
27
|
+
return this.convertAiResponseToSkills(result.content);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
private convertAiResponseToSkills(aiResponse: string): SkillMatch[] {
|
|
31
|
+
return JSON.parse(aiResponse) as SkillMatch[];
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
async rateCandidateToJob(
|
|
36
|
+
candidateSkills: SkillMatch[],
|
|
37
|
+
jobSkills: SkillMatch[],
|
|
38
|
+
) {
|
|
39
|
+
|
|
40
|
+
}
|
|
41
|
+
}
|