@odda-ai/matching-core 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +790 -0
  2. package/dist/index.d.ts +3 -0
  3. package/dist/index.js +3 -0
  4. package/dist/src/ai/AIProvider.d.ts +36 -0
  5. package/dist/src/ai/AIProvider.js +127 -0
  6. package/dist/src/ai/adapters/AnthropicAdapter.d.ts +12 -0
  7. package/dist/src/ai/adapters/AnthropicAdapter.js +38 -0
  8. package/dist/src/ai/adapters/OllamaAdapter.d.ts +17 -0
  9. package/dist/src/ai/adapters/OllamaAdapter.js +38 -0
  10. package/dist/src/ai/adapters/OpenAIAdapter.d.ts +12 -0
  11. package/dist/src/ai/adapters/OpenAIAdapter.js +47 -0
  12. package/{src/ai/adapters/index.ts → dist/src/ai/adapters/index.d.ts} +1 -1
  13. package/dist/src/ai/adapters/index.js +3 -0
  14. package/dist/src/ai/factory.d.ts +12 -0
  15. package/dist/src/ai/factory.js +37 -0
  16. package/{src/ai/index.ts → dist/src/ai/index.d.ts} +1 -1
  17. package/dist/src/ai/index.js +5 -0
  18. package/dist/src/ai/registry.d.ts +13 -0
  19. package/{src/ai/registry.ts → dist/src/ai/registry.js} +7 -8
  20. package/dist/src/ai/types.d.ts +54 -0
  21. package/dist/src/ai/types.js +1 -0
  22. package/dist/src/cv-parser/PDFParserService.d.ts +41 -0
  23. package/dist/src/cv-parser/PDFParserService.js +136 -0
  24. package/dist/src/cv-parser/index.js +1 -0
  25. package/dist/src/cv-parser/types.d.ts +51 -0
  26. package/dist/src/cv-parser/types.js +1 -0
  27. package/dist/src/features/ai-cv-resume.service.d.ts +14 -0
  28. package/dist/src/features/ai-cv-resume.service.js +78 -0
  29. package/dist/src/features/ai-talent.service.d.ts +18 -0
  30. package/dist/src/features/ai-talent.service.js +29 -0
  31. package/dist/src/features/cv-chunking.service.d.ts +140 -0
  32. package/dist/src/features/cv-chunking.service.js +334 -0
  33. package/dist/src/features/index.js +3 -0
  34. package/dist/src/features/job-matcher.service.d.ts +14 -0
  35. package/dist/src/features/job-matcher.service.js +19 -0
  36. package/dist/src/features/prompts.d.ts +8 -0
  37. package/{src/features/prompts.ts → dist/src/features/prompts.js} +6 -21
  38. package/dist/src/features/system-messages.d.ts +6 -0
  39. package/{src/features/system-messages.ts → dist/src/features/system-messages.js} +3 -4
  40. package/dist/src/features/types.d.ts +49 -0
  41. package/dist/src/features/types.js +15 -0
  42. package/package.json +8 -9
  43. package/src/ai/AIProvider.ts +0 -159
  44. package/src/ai/adapters/AnthropicAdapter.ts +0 -42
  45. package/src/ai/adapters/OllamaAdapter.ts +0 -42
  46. package/src/ai/adapters/OpenAIAdapter.ts +0 -53
  47. package/src/ai/factory.ts +0 -48
  48. package/src/ai/types.ts +0 -59
  49. package/src/cv-parser/PDFParserService.ts +0 -160
  50. package/src/cv-parser/types.ts +0 -58
  51. package/src/features/ai-cv-resume.service.ts +0 -104
  52. package/src/features/ai-talent.service.ts +0 -49
  53. package/src/features/cv-chunking.service.ts +0 -510
  54. package/src/features/job-matcher.service.ts +0 -41
  55. package/src/features/types.ts +0 -55
  56. /package/{src/cv-parser/index.ts → dist/src/cv-parser/index.d.ts} +0 -0
  57. /package/{src/features/index.ts → dist/src/features/index.d.ts} +0 -0
@@ -0,0 +1,136 @@
1
+ import * as fs from 'fs';
2
+ import pdfParse from 'pdf-parse';
3
+ /**
4
+ * Servizio per estrarre testo dai file PDF
5
+ */
6
+ export class PDFParserService {
7
+ /**
8
+ * Estrae il testo da un file PDF
9
+ * @param pdfPath Percorso del file PDF o Buffer
10
+ * @param options Opzioni di estrazione
11
+ */
12
+ async extractText(pdfPath, options = {}) {
13
+ try {
14
+ // Leggi il file se è un percorso
15
+ const dataBuffer = typeof pdfPath === 'string'
16
+ ? await fs.promises.readFile(pdfPath)
17
+ : pdfPath;
18
+ // Estrai il testo usando pdf-parse
19
+ const pdfData = await pdfParse(dataBuffer, {
20
+ max: options.endPage,
21
+ pagerender: this.createPageRenderer(options)
22
+ });
23
+ // Estrai testo per pagina se richiesto
24
+ const pageTexts = [];
25
+ if (options.startPage || options.endPage) {
26
+ const start = options.startPage || 1;
27
+ const end = options.endPage || pdfData.numpages;
28
+ for (let i = start; i <= end && i <= pdfData.numpages; i++) {
29
+ const pageData = await pdfParse(dataBuffer, {
30
+ max: i,
31
+ pagerender: this.createPageRenderer(options)
32
+ });
33
+ pageTexts.push(pageData.text);
34
+ }
35
+ }
36
+ // Applica il range di pagine se specificato
37
+ let text = pdfData.text;
38
+ if (options.startPage || options.endPage) {
39
+ const lines = text.split('\n');
40
+ const start = options.startPage ? (options.startPage - 1) : 0;
41
+ const end = options.endPage;
42
+ text = lines.slice(start, end).join(options.pageSeparator || '\n');
43
+ }
44
+ return {
45
+ text: text.trim(),
46
+ totalPages: pdfData.numpages,
47
+ pageTexts: pageTexts.length > 0 ? pageTexts : undefined,
48
+ metadata: this.extractMetadata(pdfData.info)
49
+ };
50
+ }
51
+ catch (error) {
52
+ throw new Error(`Errore nell'estrazione del testo dal PDF: ${error instanceof Error ? error.message : String(error)}`);
53
+ }
54
+ }
55
+ /**
56
+ * Estrae il testo da più PDF contemporaneamente
57
+ * @param pdfPaths Array di percorsi PDF
58
+ * @param options Opzioni di estrazione
59
+ */
60
+ async extractTextFromMultiple(pdfPaths, options = {}) {
61
+ const promises = pdfPaths.map(path => this.extractText(path, options));
62
+ return Promise.all(promises);
63
+ }
64
+ /**
65
+ * Verifica se un file è un PDF valido
66
+ * @param pdfPath Percorso del file PDF o Buffer
67
+ */
68
+ async isValidPDF(pdfPath) {
69
+ try {
70
+ const dataBuffer = typeof pdfPath === 'string'
71
+ ? await fs.promises.readFile(pdfPath)
72
+ : pdfPath;
73
+ // Verifica il magic number del PDF (%PDF)
74
+ const header = dataBuffer.slice(0, 5).toString();
75
+ return header === '%PDF-';
76
+ }
77
+ catch {
78
+ return false;
79
+ }
80
+ }
81
+ /**
82
+ * Crea un renderer personalizzato per le pagine
83
+ */
84
+ createPageRenderer(options) {
85
+ if (!options.preserveFormatting) {
86
+ return undefined;
87
+ }
88
+ return (pageData) => {
89
+ return pageData.getTextContent().then((textContent) => {
90
+ let lastY = null;
91
+ let text = '';
92
+ for (const item of textContent.items) {
93
+ if (lastY !== null && lastY !== item.transform[5]) {
94
+ text += '\n';
95
+ }
96
+ text += item.str;
97
+ lastY = item.transform[5];
98
+ }
99
+ return text;
100
+ });
101
+ };
102
+ }
103
+ /**
104
+ * Estrae i metadati dal PDF
105
+ */
106
+ extractMetadata(info) {
107
+ if (!info)
108
+ return undefined;
109
+ return {
110
+ title: info.Title,
111
+ author: info.Author,
112
+ subject: info.Subject,
113
+ keywords: info.Keywords,
114
+ creator: info.Creator,
115
+ producer: info.Producer,
116
+ creationDate: info.CreationDate ? new Date(info.CreationDate) : undefined,
117
+ modificationDate: info.ModDate ? new Date(info.ModDate) : undefined
118
+ };
119
+ }
120
+ /**
121
+ * Estrae solo testo da una specifica pagina
122
+ * @param pdfPath Percorso del file PDF o Buffer
123
+ * @param pageNumber Numero della pagina (1-based)
124
+ */
125
+ async extractPageText(pdfPath, pageNumber) {
126
+ const result = await this.extractText(pdfPath, {
127
+ startPage: pageNumber,
128
+ endPage: pageNumber
129
+ });
130
+ return result.text;
131
+ }
132
+ }
133
+ /**
134
+ * Istanza singleton del parser
135
+ */
136
+ export const pdfParser = new PDFParserService();
@@ -0,0 +1 @@
1
+ export { PDFParserService, pdfParser } from './PDFParserService.js';
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Opzioni per l'estrazione del testo dal PDF
3
+ */
4
+ export interface PDFExtractionOptions {
5
+ /**
6
+ * Numero di pagina da cui iniziare l'estrazione (1-based)
7
+ */
8
+ startPage?: number;
9
+ /**
10
+ * Numero di pagina fino a cui estrarre (1-based)
11
+ */
12
+ endPage?: number;
13
+ /**
14
+ * Se true, preserva la formattazione e gli spazi
15
+ */
16
+ preserveFormatting?: boolean;
17
+ /**
18
+ * Separatore tra le pagine
19
+ */
20
+ pageSeparator?: string;
21
+ }
22
+ /**
23
+ * Risultato dell'estrazione del testo
24
+ */
25
+ export interface PDFExtractionResult {
26
+ /**
27
+ * Testo estratto dal PDF
28
+ */
29
+ text: string;
30
+ /**
31
+ * Numero totale di pagine nel documento
32
+ */
33
+ totalPages: number;
34
+ /**
35
+ * Testo estratto per ogni pagina
36
+ */
37
+ pageTexts?: string[] | undefined;
38
+ /**
39
+ * Metadati del PDF (se disponibili)
40
+ */
41
+ metadata?: {
42
+ title?: string;
43
+ author?: string;
44
+ subject?: string;
45
+ keywords?: string;
46
+ creator?: string;
47
+ producer?: string;
48
+ creationDate?: Date | undefined;
49
+ modificationDate?: Date | undefined;
50
+ } | undefined;
51
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,14 @@
1
+ import { AIProvider } from "../ai/index.js";
2
+ import { type PDFExtractionOptions } from "../cv-parser/index.js";
3
+ import { type CvAnalysisResponse } from "./types.js";
4
+ export type AnalyzeResumeOptions = {
5
+ pdfoptions?: PDFExtractionOptions;
6
+ };
7
+ export declare class AiCvResumeService {
8
+ private pdfParser;
9
+ private ai;
10
+ constructor(ai: AIProvider);
11
+ analyzeResume(pdfBuffer: Buffer, options?: AnalyzeResumeOptions): Promise<CvAnalysisResponse>;
12
+ private parseAiResponse;
13
+ private validateSeniority;
14
+ }
@@ -0,0 +1,78 @@
1
+ import { AIProvider } from "../ai/index.js";
2
+ import { pdfParser, } from "../cv-parser/index.js";
3
+ import { prompts } from "./prompts.js";
4
+ import { systemMessages } from "./system-messages.js";
5
+ import { Seniority } from "./types.js";
6
+ export class AiCvResumeService {
7
+ pdfParser = pdfParser;
8
+ ai;
9
+ constructor(ai) {
10
+ this.ai = ai;
11
+ }
12
+ async analyzeResume(pdfBuffer, options) {
13
+ const pdftext = await this.pdfParser.extractText(pdfBuffer, options?.pdfoptions);
14
+ const result = await this.ai.prompt(prompts.cvAnalysis(pdftext.text), systemMessages.analyzeResume());
15
+ return this.parseAiResponse(result.content);
16
+ }
17
+ parseAiResponse(aiResponse) {
18
+ try {
19
+ // Remove markdown code blocks if present
20
+ let jsonStr = aiResponse.trim();
21
+ // Try to extract JSON from markdown
22
+ const jsonMatch = jsonStr.match(/```json\s*([\s\S]*?)```/);
23
+ if (jsonMatch && jsonMatch[1]) {
24
+ jsonStr = jsonMatch[1];
25
+ }
26
+ // Remove any leading/trailing non-JSON content
27
+ const jsonStart = jsonStr.indexOf("{");
28
+ const jsonEnd = jsonStr.lastIndexOf("}");
29
+ if (jsonStart === -1 || jsonEnd === -1) {
30
+ throw new Error("No valid JSON object found in AI response");
31
+ }
32
+ jsonStr = jsonStr.substring(jsonStart, jsonEnd + 1);
33
+ // Parse JSON
34
+ const parsed = JSON.parse(jsonStr);
35
+ // Map to DTO structure
36
+ const analysis = {
37
+ personalInfo: {
38
+ firstName: parsed.personalInfo?.firstName || "N/A",
39
+ lastName: parsed.personalInfo?.lastName || "N/A",
40
+ email: parsed.personalInfo?.email || undefined,
41
+ phone: parsed.personalInfo?.phone || undefined,
42
+ address: parsed.personalInfo?.address || undefined,
43
+ dateOfBirth: parsed.personalInfo?.dateOfBirth || undefined,
44
+ nationality: parsed.personalInfo?.nationality || undefined,
45
+ linkedIn: parsed.personalInfo?.linkedIn || undefined,
46
+ github: parsed.personalInfo?.github || undefined,
47
+ website: parsed.personalInfo?.website || undefined,
48
+ },
49
+ description: parsed.description || "",
50
+ technicalSkills: (parsed.technicalSkills || []).map((skill) => ({
51
+ name: skill.name,
52
+ proficiency: Number(skill.proficiency),
53
+ isInferred: Boolean(skill.isInferred),
54
+ seniority: this.validateSeniority(skill.seniority),
55
+ })),
56
+ workExperienceSummary: parsed.workExperienceSummary || "",
57
+ certifications: (parsed.certifications || []).map((cert) => ({
58
+ name: cert.name,
59
+ issuer: cert.issuer || undefined,
60
+ year: cert.year ? Number(cert.year) : undefined,
61
+ })),
62
+ overallSeniority: this.validateSeniority(parsed.overallSeniority),
63
+ yearsOfExperience: Number(parsed.yearsOfExperience) || 0,
64
+ };
65
+ return analysis;
66
+ }
67
+ catch (error) {
68
+ throw new Error(`Failed to parse AI response into structured format: ${error.message}`);
69
+ }
70
+ }
71
+ validateSeniority(value) {
72
+ const upperValue = value?.toUpperCase();
73
+ if (Object.values(Seniority).includes(upperValue)) {
74
+ return upperValue;
75
+ }
76
+ return Seniority.MID;
77
+ }
78
+ }
@@ -0,0 +1,18 @@
1
+ import { AIProvider } from "../ai/index.js";
2
+ import { type AnalyzeResumeOptions } from "./ai-cv-resume.service.js";
3
+ import { type ChunkingOptions } from "./cv-chunking.service.js";
4
+ import type { CvAnalysisResponse } from "./types.js";
5
+ export declare class AiTalentService {
6
+ private ai;
7
+ private aiCvResumeService;
8
+ private cvChunkingService;
9
+ private jobMatcherService;
10
+ constructor(ai: AIProvider);
11
+ cv: {
12
+ analyzeCvResume: (pdf: Buffer, options?: AnalyzeResumeOptions) => Promise<CvAnalysisResponse>;
13
+ chunkCvAnalysis: (cvAnalysis: CvAnalysisResponse, options?: ChunkingOptions) => Promise<import("./cv-chunking.service.js").ChunkingResult>;
14
+ };
15
+ jobs: {
16
+ getSkillsFromJobDescription: (jobDescription: string, skillsList: string[]) => Promise<import("./job-matcher.service.js").SkillMatch[]>;
17
+ };
18
+ }
@@ -0,0 +1,29 @@
1
+ import { AIProvider } from "../ai/index.js";
2
+ import { AiCvResumeService, } from "./ai-cv-resume.service.js";
3
+ import { CvChunkingService, } from "./cv-chunking.service.js";
4
+ import { JobMatcherService } from "./job-matcher.service.js";
5
+ export class AiTalentService {
6
+ ai;
7
+ aiCvResumeService;
8
+ cvChunkingService;
9
+ jobMatcherService;
10
+ constructor(ai) {
11
+ this.ai = ai;
12
+ this.aiCvResumeService = new AiCvResumeService(this.ai);
13
+ this.cvChunkingService = new CvChunkingService();
14
+ this.jobMatcherService = new JobMatcherService(this.ai);
15
+ }
16
+ cv = {
17
+ analyzeCvResume: async (pdf, options) => {
18
+ return this.aiCvResumeService.analyzeResume(pdf, options);
19
+ },
20
+ chunkCvAnalysis: async (cvAnalysis, options) => {
21
+ return this.cvChunkingService.chunkCvAnalysis(cvAnalysis, options);
22
+ },
23
+ };
24
+ jobs = {
25
+ getSkillsFromJobDescription: async (jobDescription, skillsList) => {
26
+ return this.jobMatcherService.getJobSkillsFromDescription(jobDescription, skillsList);
27
+ },
28
+ };
29
+ }
@@ -0,0 +1,140 @@
1
+ import type { CvAnalysisResponse } from './types.js';
2
+ /**
3
+ * Opzioni per il chunking del CV
4
+ */
5
+ export interface ChunkingOptions {
6
+ /**
7
+ * Dimensione massima di ciascun chunk in caratteri
8
+ * Default: 500
9
+ */
10
+ maxChunkSize?: number;
11
+ /**
12
+ * Sovrapposizione tra chunk in caratteri per mantenere il contesto
13
+ * Default: 50
14
+ */
15
+ overlap?: number;
16
+ /**
17
+ * Se true, include metadati nel chunk
18
+ * Default: true
19
+ */
20
+ includeMetadata?: boolean;
21
+ /**
22
+ * Formato del chunk
23
+ * Default: 'text'
24
+ */
25
+ format?: 'text' | 'json' | 'markdown';
26
+ }
27
+ /**
28
+ * Rappresenta un singolo chunk di testo con metadati
29
+ */
30
+ export interface CvChunk {
31
+ /**
32
+ * ID univoco del chunk
33
+ */
34
+ id: string;
35
+ /**
36
+ * Contenuto testuale del chunk
37
+ */
38
+ content: string;
39
+ /**
40
+ * Tipo di sezione del CV
41
+ */
42
+ section: 'personal' | 'description' | 'skills' | 'experience' | 'certifications' | 'summary';
43
+ /**
44
+ * Metadati del chunk per facilitare il retrieval
45
+ */
46
+ metadata: {
47
+ /**
48
+ * Indice del chunk nella sequenza
49
+ */
50
+ index: number;
51
+ /**
52
+ * Numero totale di chunk
53
+ */
54
+ totalChunks: number;
55
+ /**
56
+ * Parole chiave estratte dal chunk
57
+ */
58
+ keywords: string[];
59
+ /**
60
+ * Seniority associata (se applicabile)
61
+ */
62
+ seniority?: string;
63
+ /**
64
+ * Anni di esperienza (se applicabile)
65
+ */
66
+ yearsOfExperience?: number;
67
+ };
68
+ }
69
+ /**
70
+ * Risultato del processo di chunking
71
+ */
72
+ export interface ChunkingResult {
73
+ /**
74
+ * Array di chunk generati
75
+ */
76
+ chunks: CvChunk[];
77
+ /**
78
+ * Statistiche sul chunking
79
+ */
80
+ stats: {
81
+ totalChunks: number;
82
+ averageChunkSize: number;
83
+ totalCharacters: number;
84
+ };
85
+ }
86
+ /**
87
+ * Servizio per convertire CvAnalysisResponse in chunk di testo
88
+ * ottimizzati per embedding e retrieval
89
+ */
90
+ export declare class CvChunkingService {
91
+ private defaultOptions;
92
+ /**
93
+ * Converte un CvAnalysisResponse in chunk ottimizzati per embedding
94
+ */
95
+ chunkCvAnalysis(cvAnalysis: CvAnalysisResponse, options?: ChunkingOptions): Promise<ChunkingResult>;
96
+ /**
97
+ * Crea un chunk con le informazioni personali
98
+ */
99
+ private createPersonalInfoChunk;
100
+ /**
101
+ * Crea chunk per le skill tecniche
102
+ */
103
+ private chunkSkills;
104
+ /**
105
+ * Crea chunk per le certificazioni
106
+ */
107
+ private chunkCertifications;
108
+ /**
109
+ * Crea un chunk di summary generale
110
+ */
111
+ private createSummaryChunk;
112
+ /**
113
+ * Suddivide un testo lungo in chunk con overlap
114
+ */
115
+ private chunkText;
116
+ /**
117
+ * Estrae parole chiave da un testo
118
+ */
119
+ private extractKeywords;
120
+ /**
121
+ * Calcola statistiche sui chunk generati
122
+ */
123
+ private calculateStats;
124
+ /**
125
+ * Converte i chunk in formato JSON per embedding
126
+ */
127
+ toEmbeddingFormat(chunks: CvChunk[]): Array<{
128
+ id: string;
129
+ text: string;
130
+ metadata: any;
131
+ }>;
132
+ /**
133
+ * Cerca chunk rilevanti per una query
134
+ */
135
+ searchChunks(chunks: CvChunk[], query: string, limit?: number): CvChunk[];
136
+ }
137
+ /**
138
+ * Istanza singleton del servizio
139
+ */
140
+ export declare const cvChunkingService: CvChunkingService;