only_ever_generator 0.7.8 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bootstrap/app.d.ts +97 -0
- package/dist/bootstrap/app.d.ts.map +1 -0
- package/dist/bootstrap/app.js +42 -48
- package/dist/bootstrap/app.js.map +1 -0
- package/dist/card_gen/generate_cards.d.ts +8 -0
- package/dist/card_gen/generate_cards.d.ts.map +1 -0
- package/dist/card_gen/generate_cards.js +32 -24
- package/dist/card_gen/generate_cards.js.map +1 -0
- package/dist/config.d.ts +8 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +3 -0
- package/dist/config.js.map +1 -0
- package/dist/constants/api_constants.d.ts +3 -0
- package/dist/constants/api_constants.d.ts.map +1 -0
- package/dist/constants/api_constants.js +5 -0
- package/dist/constants/api_constants.js.map +1 -0
- package/dist/constants/prompt_data.d.ts +4 -0
- package/dist/constants/prompt_data.js +1 -0
- package/dist/constants/prompt_data.js.map +1 -0
- package/dist/constants/prompts/card_gen_prompt.d.ts +2 -0
- package/dist/constants/prompts/card_gen_prompt.d.ts.map +1 -0
- package/dist/constants/prompts/card_gen_prompt.js +1 -0
- package/dist/constants/prompts/card_gen_prompt.js.map +1 -0
- package/dist/constants/prompts/typology_prompt.d.ts +2 -0
- package/dist/constants/prompts/typology_prompt.d.ts.map +1 -0
- package/dist/constants/prompts/typology_prompt.js +1 -0
- package/dist/constants/prompts/typology_prompt.js.map +1 -0
- package/dist/constants/source_data.d.ts +171 -0
- package/dist/constants/source_data.js +2 -1
- package/dist/constants/source_data.js.map +1 -0
- package/dist/embedding_generation/consolidation/global_consolidation.d.ts +33 -0
- package/dist/embedding_generation/consolidation/global_consolidation.d.ts.map +1 -0
- package/dist/embedding_generation/consolidation/global_consolidation.js +76 -0
- package/dist/embedding_generation/consolidation/global_consolidation.js.map +1 -0
- package/dist/embedding_generation/consolidation/local_consolidation.d.ts +39 -0
- package/dist/embedding_generation/consolidation/local_consolidation.d.ts.map +1 -0
- package/dist/embedding_generation/consolidation/local_consolidation.js +105 -0
- package/dist/embedding_generation/consolidation/local_consolidation.js.map +1 -0
- package/dist/embedding_generation/consolidation/write_consolidated_data.d.ts +46 -0
- package/dist/embedding_generation/consolidation/write_consolidated_data.d.ts.map +1 -0
- package/dist/embedding_generation/consolidation/write_consolidated_data.js +69 -0
- package/dist/embedding_generation/consolidation/write_consolidated_data.js.map +1 -0
- package/dist/embedding_generation/generate_embeddings.d.ts +28 -0
- package/dist/embedding_generation/generate_embeddings.d.ts.map +1 -0
- package/dist/embedding_generation/generate_embeddings.js +54 -0
- package/dist/embedding_generation/generate_embeddings.js.map +1 -0
- package/dist/embedding_generation/parse_embedding_response.d.ts +26 -0
- package/dist/embedding_generation/parse_embedding_response.d.ts.map +1 -0
- package/dist/embedding_generation/parse_embedding_response.js +29 -0
- package/dist/embedding_generation/parse_embedding_response.js.map +1 -0
- package/dist/enums/card_type_enum.d.ts +7 -0
- package/dist/enums/card_type_enum.d.ts.map +1 -0
- package/dist/enums/card_type_enum.js +11 -0
- package/dist/enums/card_type_enum.js.map +1 -0
- package/dist/gap_fill/calculate_gap_fill.d.ts +5 -0
- package/dist/gap_fill/calculate_gap_fill.d.ts.map +1 -0
- package/dist/gap_fill/calculate_gap_fill.js +3 -5
- package/dist/gap_fill/calculate_gap_fill.js.map +1 -0
- package/dist/helper/qdrant_db_methods.d.ts +40 -0
- package/dist/helper/qdrant_db_methods.d.ts.map +1 -0
- package/dist/helper/qdrant_db_methods.js +63 -0
- package/dist/helper/qdrant_db_methods.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +88 -49
- package/dist/index.js.map +1 -0
- package/dist/logger.d.ts +6 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +1 -0
- package/dist/logger.js.map +1 -0
- package/dist/parse/parse_card/parse_cloze_card.d.ts +17 -0
- package/dist/parse/parse_card/parse_cloze_card.d.ts.map +1 -0
- package/dist/parse/parse_card/parse_cloze_card.js +3 -2
- package/dist/parse/parse_card/parse_cloze_card.js.map +1 -0
- package/dist/parse/parse_card/parse_flash_cards.d.ts +26 -0
- package/dist/parse/parse_card/parse_flash_cards.d.ts.map +1 -0
- package/dist/parse/parse_card/parse_flash_cards.js +4 -3
- package/dist/parse/parse_card/parse_flash_cards.js.map +1 -0
- package/dist/parse/parse_card/parse_match_card.d.ts +23 -0
- package/dist/parse/parse_card/parse_match_card.d.ts.map +1 -0
- package/dist/parse/parse_card/parse_match_card.js +3 -3
- package/dist/parse/parse_card/parse_match_card.js.map +1 -0
- package/dist/parse/parse_card/parse_mcq_card.d.ts +31 -0
- package/dist/parse/parse_card/parse_mcq_card.d.ts.map +1 -0
- package/dist/parse/parse_card/parse_mcq_card.js +3 -2
- package/dist/parse/parse_card/parse_mcq_card.js.map +1 -0
- package/dist/parse/parse_card_response.d.ts +20 -0
- package/dist/parse/parse_card_response.d.ts.map +1 -0
- package/dist/parse/parse_card_response.js +62 -28
- package/dist/parse/parse_card_response.js.map +1 -0
- package/dist/parse/parse_source_content.d.ts +26 -0
- package/dist/parse/parse_source_content.d.ts.map +1 -0
- package/dist/parse/parse_source_content.js +1 -0
- package/dist/parse/parse_source_content.js.map +1 -0
- package/dist/parse/response_format_card.d.ts +176 -0
- package/dist/parse/response_format_card.js +1 -0
- package/dist/parse/response_format_card.js.map +1 -0
- package/dist/parse/response_format_typology.d.ts +1 -0
- package/dist/parse/response_format_typology.js +1 -0
- package/dist/parse/response_format_typology.js.map +1 -0
- package/dist/services/open_ai_service.d.ts +16 -0
- package/dist/services/open_ai_service.d.ts.map +1 -0
- package/dist/services/open_ai_service.js +29 -0
- package/dist/services/open_ai_service.js.map +1 -0
- package/dist/services/qdrant_service.d.ts +4 -0
- package/dist/services/qdrant_service.d.ts.map +1 -0
- package/dist/services/qdrant_service.js +14 -0
- package/dist/services/qdrant_service.js.map +1 -0
- package/dist/types/base_param_type.d.ts +16 -0
- package/dist/types/base_param_type.d.ts.map +1 -0
- package/dist/types/base_param_type.js +3 -0
- package/dist/types/base_param_type.js.map +1 -0
- package/dist/types/mongo_concept_fact_type.d.ts +12 -0
- package/dist/types/mongo_concept_fact_type.d.ts.map +1 -0
- package/dist/types/mongo_concept_fact_type.js +3 -0
- package/dist/types/mongo_concept_fact_type.js.map +1 -0
- package/dist/types/parsed_card_type.d.ts +12 -0
- package/dist/types/parsed_card_type.d.ts.map +1 -0
- package/dist/types/parsed_card_type.js +30 -0
- package/dist/types/parsed_card_type.js.map +1 -0
- package/dist/types/raw_card_response_types/generated_card_response_type.d.ts +40 -0
- package/dist/types/raw_card_response_types/generated_card_response_type.d.ts.map +1 -0
- package/dist/types/raw_card_response_types/generated_card_response_type.js +8 -0
- package/dist/types/raw_card_response_types/generated_card_response_type.js.map +1 -0
- package/dist/types/source_taxonomy_type.d.ts +15 -0
- package/dist/types/source_taxonomy_type.d.ts.map +1 -0
- package/dist/types/source_taxonomy_type.js +3 -0
- package/dist/types/source_taxonomy_type.js.map +1 -0
- package/dist/typology_gen/generate_typology.d.ts +29 -0
- package/dist/typology_gen/generate_typology.d.ts.map +1 -0
- package/dist/typology_gen/generate_typology.js +12 -11
- package/dist/typology_gen/generate_typology.js.map +1 -0
- package/dist/utils/generate_args.d.ts +8 -0
- package/dist/utils/generate_args.d.ts.map +1 -0
- package/dist/utils/generate_args.js +1 -0
- package/dist/utils/generate_args.js.map +1 -0
- package/dist/utils/parse_openai_response.d.ts +11 -0
- package/dist/utils/parse_openai_response.d.ts.map +1 -0
- package/dist/utils/parse_openai_response.js +1 -0
- package/dist/utils/parse_openai_response.js.map +1 -0
- package/package.json +18 -14
- package/src/bootstrap/app.ts +84 -71
- package/src/card_gen/generate_cards.ts +40 -29
- package/src/config.ts +2 -0
- package/src/constants/api_constants.ts +4 -0
- package/src/constants/prompts/card_gen_prompt.ts +1 -1
- package/src/embedding_generation/consolidation/global_consolidation.ts +94 -0
- package/src/embedding_generation/consolidation/local_consolidation.ts +141 -0
- package/src/embedding_generation/consolidation/write_consolidated_data.ts +98 -0
- package/src/embedding_generation/generate_embeddings.ts +41 -0
- package/src/embedding_generation/parse_embedding_response.ts +31 -0
- package/src/enums/card_type_enum.ts +6 -0
- package/src/gap_fill/calculate_gap_fill.ts +42 -45
- package/src/helper/qdrant_db_methods.ts +77 -0
- package/src/parse/parse_card/parse_cloze_card.ts +11 -3
- package/src/parse/parse_card/parse_flash_cards.ts +12 -4
- package/src/parse/parse_card/parse_match_card.ts +10 -4
- package/src/parse/parse_card/parse_mcq_card.ts +11 -3
- package/src/parse/parse_card_response.ts +94 -31
- package/src/parse/parse_source_content.ts +9 -1
- package/src/services/open_ai_service.ts +33 -1
- package/src/services/qdrant_service.ts +10 -0
- package/src/types/base_param_type.ts +16 -0
- package/src/types/mongo_concept_fact_type.ts +12 -0
- package/src/types/parsed_card_type.ts +39 -0
- package/src/types/raw_card_response_types/generated_card_response_type.ts +61 -0
- package/src/types/source_taxonomy_type.ts +17 -0
- package/src/typology_gen/generate_typology.ts +13 -12
- package/tsconfig.json +8 -3
- package/.env.example +0 -1
- package/prompts.json +0 -23
- package/readme.md +0 -35
- package/src/constants/prompt_data.ts +0 -301
- package/src/constants/source_data.ts +0 -1036
- package/src/index.ts +0 -72
- package/src/parse/response_format_card.ts +0 -386
- package/src/parse/response_format_typology.ts +0 -44
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { getSimilarConcepts } from "../../helper/qdrant_db_methods";
|
|
2
|
+
|
|
3
|
+
export class GlobalConsolidation {
|
|
4
|
+
async consolidate(
|
|
5
|
+
locally_consolidated_concepts_facts: {
|
|
6
|
+
text: string;
|
|
7
|
+
type: string;
|
|
8
|
+
reference: string;
|
|
9
|
+
id: string;
|
|
10
|
+
embedding: number[];
|
|
11
|
+
}[],
|
|
12
|
+
sourceId: string,
|
|
13
|
+
threshold: number
|
|
14
|
+
) {
|
|
15
|
+
// / for all concepts_facts, find the ,most similar concepts_facts in qdrant, threshold of 0.8
|
|
16
|
+
const similarConcepts = await getSimilarConcepts(
|
|
17
|
+
"concepts-vectors",
|
|
18
|
+
locally_consolidated_concepts_facts,
|
|
19
|
+
threshold ?? 0.8
|
|
20
|
+
);
|
|
21
|
+
const taxonomyConcepts: {
|
|
22
|
+
text: string;
|
|
23
|
+
type: string;
|
|
24
|
+
reference: string;
|
|
25
|
+
id: string;
|
|
26
|
+
}[] = [];
|
|
27
|
+
|
|
28
|
+
const globalUpdatesOps: {
|
|
29
|
+
id: string;
|
|
30
|
+
sourceIdToAdd: string;
|
|
31
|
+
currentPayload: {
|
|
32
|
+
_sources: string[];
|
|
33
|
+
text: string;
|
|
34
|
+
};
|
|
35
|
+
}[] = [];
|
|
36
|
+
|
|
37
|
+
const globalInsertsOps: {
|
|
38
|
+
id: string;
|
|
39
|
+
vector: number[];
|
|
40
|
+
payload: {
|
|
41
|
+
_sources: string[];
|
|
42
|
+
text: string;
|
|
43
|
+
};
|
|
44
|
+
}[] = [];
|
|
45
|
+
for (const index in similarConcepts) {
|
|
46
|
+
const points = similarConcepts[index].points;
|
|
47
|
+
const originalConcept = locally_consolidated_concepts_facts[index];
|
|
48
|
+
if (points.length == 0) {
|
|
49
|
+
taxonomyConcepts.push({
|
|
50
|
+
id: originalConcept.id,
|
|
51
|
+
text: originalConcept.text,
|
|
52
|
+
type: originalConcept.type,
|
|
53
|
+
reference: originalConcept.reference,
|
|
54
|
+
});
|
|
55
|
+
globalInsertsOps.push({
|
|
56
|
+
id: originalConcept.id,
|
|
57
|
+
vector: originalConcept.embedding,
|
|
58
|
+
payload: {
|
|
59
|
+
_sources: [sourceId],
|
|
60
|
+
text: originalConcept.text,
|
|
61
|
+
},
|
|
62
|
+
});
|
|
63
|
+
} else {
|
|
64
|
+
const consolidatedId = points[0].id;
|
|
65
|
+
const currentPayload = points[0].payload;
|
|
66
|
+
// const currentSources = currentPayload?['_sources'] ?? [];
|
|
67
|
+
taxonomyConcepts.push({
|
|
68
|
+
id: consolidatedId.toString(),
|
|
69
|
+
text: originalConcept.text,
|
|
70
|
+
type: originalConcept.type,
|
|
71
|
+
reference: originalConcept.reference,
|
|
72
|
+
});
|
|
73
|
+
globalUpdatesOps.push({
|
|
74
|
+
id: consolidatedId.toString(),
|
|
75
|
+
sourceIdToAdd: sourceId,
|
|
76
|
+
currentPayload: {
|
|
77
|
+
// _sources: currentPayload['_sources'] ?? [],
|
|
78
|
+
_sources: (currentPayload?._sources ?? []) as string[],
|
|
79
|
+
text: (currentPayload?.text ?? "").toString(),
|
|
80
|
+
},
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
return {
|
|
85
|
+
source_taxonomy: taxonomyConcepts,
|
|
86
|
+
global_updates: globalUpdatesOps,
|
|
87
|
+
global_inserts: globalInsertsOps,
|
|
88
|
+
};
|
|
89
|
+
/// if threshold is below 0.8, then add the concept_fact to qdrant
|
|
90
|
+
/// if threshold is above 0.8, then replace the id of the local concept_fact with the id of the global concept_fact
|
|
91
|
+
/// add _source.id to qdrant
|
|
92
|
+
/// return the concepts_facts
|
|
93
|
+
}
|
|
94
|
+
}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import { randomUUID, UUID } from "crypto";
|
|
2
|
+
|
|
3
|
+
export class LocalConsolidation {
|
|
4
|
+
consolidate(
|
|
5
|
+
concepts_facts: {
|
|
6
|
+
text: string;
|
|
7
|
+
type: string;
|
|
8
|
+
embedding: number[];
|
|
9
|
+
reference: string;
|
|
10
|
+
}[],
|
|
11
|
+
sourceId: string
|
|
12
|
+
) {
|
|
13
|
+
try {
|
|
14
|
+
const sourceIds = [];
|
|
15
|
+
const sourceTaxonomyOps: {
|
|
16
|
+
text: string;
|
|
17
|
+
id: string;
|
|
18
|
+
embedding: number[];
|
|
19
|
+
type: string;
|
|
20
|
+
reference: string;
|
|
21
|
+
}[] = [];
|
|
22
|
+
const globalConceptOps: {
|
|
23
|
+
id: string;
|
|
24
|
+
vector: number[];
|
|
25
|
+
payload: {
|
|
26
|
+
_sources: string[];
|
|
27
|
+
text: string;
|
|
28
|
+
};
|
|
29
|
+
}[] = [];
|
|
30
|
+
const consolidatedConcepts: any[] = [];
|
|
31
|
+
const concepts: {
|
|
32
|
+
id: string;
|
|
33
|
+
text: string;
|
|
34
|
+
reference: string;
|
|
35
|
+
embedding: number[];
|
|
36
|
+
type: string;
|
|
37
|
+
}[] = concepts_facts.map((e: any) => {
|
|
38
|
+
return {
|
|
39
|
+
id: randomUUID().toString(),
|
|
40
|
+
text: e.text,
|
|
41
|
+
reference: e.reference,
|
|
42
|
+
embedding: e.embedding,
|
|
43
|
+
type: e.type,
|
|
44
|
+
};
|
|
45
|
+
});
|
|
46
|
+
const afterConsolidation = this.consolidateSimilarEmbeddings(concepts);
|
|
47
|
+
sourceIds.push(JSON.stringify(sourceId));
|
|
48
|
+
console.log(afterConsolidation);
|
|
49
|
+
for (var concept of afterConsolidation.finalConsolidatedConcepts) {
|
|
50
|
+
sourceTaxonomyOps.push({
|
|
51
|
+
text: concept.text,
|
|
52
|
+
id: concept.id,
|
|
53
|
+
embedding: concept.embedding,
|
|
54
|
+
type: concept.type,
|
|
55
|
+
reference: concept.reference,
|
|
56
|
+
});
|
|
57
|
+
globalConceptOps.push({
|
|
58
|
+
id: concept.id.toString(),
|
|
59
|
+
vector: concept.embedding,
|
|
60
|
+
payload: {
|
|
61
|
+
_sources: [sourceId],
|
|
62
|
+
text: concept.text,
|
|
63
|
+
},
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
consolidatedConcepts.push(...afterConsolidation.consolidatedConcepts);
|
|
67
|
+
|
|
68
|
+
return {
|
|
69
|
+
globalConceptOps: globalConceptOps,
|
|
70
|
+
sourceIds: sourceIds,
|
|
71
|
+
consolidatedConcepts: consolidatedConcepts,
|
|
72
|
+
sourceTaxonomyOps: sourceTaxonomyOps,
|
|
73
|
+
};
|
|
74
|
+
} catch (e: any) {
|
|
75
|
+
console.error(
|
|
76
|
+
"Error occurred while converting the parsed array to db operations:",
|
|
77
|
+
e
|
|
78
|
+
);
|
|
79
|
+
throw e;
|
|
80
|
+
}
|
|
81
|
+
// const concepts = concepts_facts.map((e) => {
|
|
82
|
+
// return {
|
|
83
|
+
// id: randomUUID().toString(),
|
|
84
|
+
// text: e.text,
|
|
85
|
+
// type: e.type,
|
|
86
|
+
// embedding: e.embedding,
|
|
87
|
+
// };
|
|
88
|
+
// });
|
|
89
|
+
// const consolidated = this._consolidateSimilarEmbeddings(concepts);
|
|
90
|
+
// return consolidated;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
cosineSimilarity = (a: number[], b: number[]): number => {
|
|
94
|
+
const dotProduct = a.reduce((acc, val, index) => acc + val * b[index], 0);
|
|
95
|
+
const magnitudeA = Math.sqrt(a.reduce((acc, val) => acc + val * val, 0));
|
|
96
|
+
const magnitudeB = Math.sqrt(b.reduce((acc, val) => acc + val * val, 0));
|
|
97
|
+
return dotProduct / (magnitudeA * magnitudeB);
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
consolidateSimilarEmbeddings(data: any[], threshold = 0.65) {
|
|
101
|
+
const finalConsolidatedConcepts: {
|
|
102
|
+
id: UUID;
|
|
103
|
+
text: string;
|
|
104
|
+
reference: string;
|
|
105
|
+
embedding: number[];
|
|
106
|
+
type: string;
|
|
107
|
+
}[] = [];
|
|
108
|
+
const visited = new Array(data.length).fill(false);
|
|
109
|
+
const consolidatedConcepts: any[] = [];
|
|
110
|
+
|
|
111
|
+
for (let i = 0; i < data.length; i++) {
|
|
112
|
+
if (visited[i]) continue;
|
|
113
|
+
|
|
114
|
+
const group = [data[i]];
|
|
115
|
+
visited[i] = true;
|
|
116
|
+
|
|
117
|
+
for (let j = i + 1; j < data.length; j++) {
|
|
118
|
+
if (visited[j]) continue;
|
|
119
|
+
|
|
120
|
+
const sim = this.cosineSimilarity(data[i].embedding, data[j].embedding);
|
|
121
|
+
if (sim > threshold) {
|
|
122
|
+
group.push(data[j]);
|
|
123
|
+
visited[j] = true;
|
|
124
|
+
consolidatedConcepts.push({
|
|
125
|
+
concept1: data[i].text,
|
|
126
|
+
concept2: data[j].text,
|
|
127
|
+
similarity: sim,
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Consolidate the group (e.g., just take the first, or merge)
|
|
133
|
+
finalConsolidatedConcepts.push(group[0]); // Or you can customize how to merge
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return {
|
|
137
|
+
finalConsolidatedConcepts: finalConsolidatedConcepts,
|
|
138
|
+
consolidatedConcepts: consolidatedConcepts,
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import qdrantClient from "../../services/qdrant_service";
|
|
2
|
+
|
|
3
|
+
export class WriteConsolidatedData {
|
|
4
|
+
async writeConsolidatedData(
|
|
5
|
+
data: {
|
|
6
|
+
source_taxonomy: {
|
|
7
|
+
id: string;
|
|
8
|
+
text: string;
|
|
9
|
+
type: string;
|
|
10
|
+
reference: string;
|
|
11
|
+
}[];
|
|
12
|
+
global_updates: {
|
|
13
|
+
id: string;
|
|
14
|
+
currentPayload: {
|
|
15
|
+
_sources: string[];
|
|
16
|
+
text: string;
|
|
17
|
+
};
|
|
18
|
+
sourceIdToAdd: string;
|
|
19
|
+
}[];
|
|
20
|
+
global_inserts: {
|
|
21
|
+
id: string;
|
|
22
|
+
vector: number[];
|
|
23
|
+
payload: {
|
|
24
|
+
_sources: string[];
|
|
25
|
+
text: string;
|
|
26
|
+
};
|
|
27
|
+
}[];
|
|
28
|
+
},
|
|
29
|
+
generation_requests: any[],
|
|
30
|
+
source_id: string
|
|
31
|
+
) {
|
|
32
|
+
if (data.global_updates.length > 0) {
|
|
33
|
+
await this.writeGlobalUpdates(data.global_updates);
|
|
34
|
+
}
|
|
35
|
+
if (data.global_inserts.length > 0) {
|
|
36
|
+
await this.writeGlobalInserts(data.global_inserts);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return {
|
|
40
|
+
status: "success",
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async writeGlobalUpdates(
|
|
45
|
+
data: {
|
|
46
|
+
id: string;
|
|
47
|
+
currentPayload: {
|
|
48
|
+
_sources: string[];
|
|
49
|
+
text: string;
|
|
50
|
+
};
|
|
51
|
+
sourceIdToAdd: string;
|
|
52
|
+
}[]
|
|
53
|
+
) {
|
|
54
|
+
const operations = data.map((e) => {
|
|
55
|
+
return {
|
|
56
|
+
set_payload: {
|
|
57
|
+
points: [e.id],
|
|
58
|
+
payload: {
|
|
59
|
+
text: e.currentPayload.text,
|
|
60
|
+
_sources: [e.sourceIdToAdd, ...e.currentPayload._sources],
|
|
61
|
+
},
|
|
62
|
+
},
|
|
63
|
+
};
|
|
64
|
+
});
|
|
65
|
+
await qdrantClient.batchUpdate("concepts-vectors", {
|
|
66
|
+
wait: true,
|
|
67
|
+
operations: operations,
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async writeGlobalInserts(
|
|
72
|
+
data: {
|
|
73
|
+
id: string;
|
|
74
|
+
vector: number[];
|
|
75
|
+
payload: {
|
|
76
|
+
_sources: string[];
|
|
77
|
+
text: string;
|
|
78
|
+
};
|
|
79
|
+
}[]
|
|
80
|
+
) {
|
|
81
|
+
await qdrantClient.upsert("concepts-vectors", {
|
|
82
|
+
wait: true,
|
|
83
|
+
points: data,
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
async wiriteToMondo(
|
|
88
|
+
source_taxonomy: any[],
|
|
89
|
+
generation_requests: any[],
|
|
90
|
+
source_id: string
|
|
91
|
+
) {
|
|
92
|
+
const mondo_data = source_taxonomy.map((e) => {
|
|
93
|
+
return {
|
|
94
|
+
id: e.id,
|
|
95
|
+
};
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { OpenAiService } from "../services/open_ai_service";
|
|
2
|
+
|
|
3
|
+
export class GenerateEmbeddings {
|
|
4
|
+
constructor(private readonly openAiService: OpenAiService) {}
|
|
5
|
+
|
|
6
|
+
async generateEmbeddings(
|
|
7
|
+
concepts_facts: { text: string; type: string; reference: string }[]
|
|
8
|
+
) {
|
|
9
|
+
const texts = concepts_facts.map((e: any) => e.text);
|
|
10
|
+
const response = await this.openAiService.sendEmbeddingRequest(texts);
|
|
11
|
+
if (response.status_code !== 200) {
|
|
12
|
+
throw new Error(response.message);
|
|
13
|
+
}
|
|
14
|
+
const embeddings = response.data.data.sort((a: any) => a.index);
|
|
15
|
+
const model = response.data.model;
|
|
16
|
+
const usage = response.data.usage;
|
|
17
|
+
const embeddings_map = [];
|
|
18
|
+
for (let i = 0; i < concepts_facts.length; i++) {
|
|
19
|
+
embeddings_map.push({
|
|
20
|
+
text: concepts_facts[i].text,
|
|
21
|
+
type: concepts_facts[i].type,
|
|
22
|
+
embedding: embeddings[i].embedding,
|
|
23
|
+
reference: concepts_facts[i].reference,
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
// return embeddings_map;
|
|
27
|
+
return {
|
|
28
|
+
concepts_facts: embeddings_map,
|
|
29
|
+
metadata: {
|
|
30
|
+
req_time: new Date().toISOString(),
|
|
31
|
+
req_type: {
|
|
32
|
+
type: "embedding",
|
|
33
|
+
},
|
|
34
|
+
req_tokens: usage.prompt_tokens,
|
|
35
|
+
res_tokens: usage.completion_tokens,
|
|
36
|
+
model: model,
|
|
37
|
+
usage: usage,
|
|
38
|
+
},
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { LocalConsolidation } from "./consolidation/local_consolidation";
|
|
2
|
+
|
|
3
|
+
export class ParseEmbeddingResponse {
|
|
4
|
+
parse(
|
|
5
|
+
response: any,
|
|
6
|
+
concepts_facts: { text: string; type: string; reference: string }[]
|
|
7
|
+
) {
|
|
8
|
+
const data = response.data;
|
|
9
|
+
const model = data.model;
|
|
10
|
+
const usage = data.usage;
|
|
11
|
+
const rawEmbeddings = data.data;
|
|
12
|
+
const sorted = rawEmbeddings.sort((e: any) => e.index);
|
|
13
|
+
const embeddings = sorted.map((e: any) => e.embedding);
|
|
14
|
+
const embeddings_map = new Map();
|
|
15
|
+
const embeddings_map_array = [];
|
|
16
|
+
for (let i = 0; i < concepts_facts.length; i++) {
|
|
17
|
+
embeddings_map_array.push({
|
|
18
|
+
text: concepts_facts[i].text,
|
|
19
|
+
type: concepts_facts[i].type,
|
|
20
|
+
embedding: embeddings[i],
|
|
21
|
+
reference: concepts_facts[i].reference,
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
const embeddingMap = embeddings_map;
|
|
25
|
+
const consolidated = new LocalConsolidation().consolidate(
|
|
26
|
+
embeddings_map_array,
|
|
27
|
+
"sourceID"
|
|
28
|
+
);
|
|
29
|
+
return consolidated;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
@@ -1,53 +1,50 @@
|
|
|
1
1
|
function isEmpty(obj: object): boolean {
|
|
2
|
-
|
|
2
|
+
return Object.keys(obj).length === 0;
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
export function gapFilling(typologyResponse: any, cardgenResponse: any) {
|
|
6
|
+
let allConcepts: string[] = [];
|
|
7
|
+
let allFacts: string[] = [];
|
|
8
|
+
let generatedConceptsList: string[] = [];
|
|
9
|
+
let generatedFactsList: string[] = [];
|
|
10
|
+
let remainingConcepts: string[] = [];
|
|
11
|
+
let remainingFacts: string[] = [];
|
|
12
|
+
if (!isEmpty(typologyResponse)) {
|
|
13
|
+
allConcepts.push(...(typologyResponse.concepts ?? []));
|
|
14
|
+
allFacts.push(...(typologyResponse?.facts ?? []));
|
|
3
15
|
}
|
|
4
16
|
|
|
17
|
+
if (!isEmpty(cardgenResponse)) {
|
|
18
|
+
allConcepts.push(...(cardgenResponse.missing_concepts ?? []));
|
|
19
|
+
allFacts.push(...(cardgenResponse.missing_facts ?? []));
|
|
20
|
+
}
|
|
5
21
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
let
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
22
|
+
if (
|
|
23
|
+
cardgenResponse.cards_data !== undefined &&
|
|
24
|
+
cardgenResponse.cards_data?.length != 0
|
|
25
|
+
) {
|
|
26
|
+
for (let card of cardgenResponse.cards_data) {
|
|
27
|
+
if (card.concepts.length != 0) {
|
|
28
|
+
generatedConceptsList.push(...card.concepts);
|
|
29
|
+
}
|
|
30
|
+
if (card.facts.length != 0) {
|
|
31
|
+
generatedFactsList.push(...card.facts);
|
|
32
|
+
}
|
|
16
33
|
}
|
|
17
34
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
allFacts.push(...(cardgenResponse.missing_facts ?? []));
|
|
21
|
-
}
|
|
35
|
+
let generatedConceptsSet = Array.from(new Set(generatedConceptsList));
|
|
36
|
+
let generatedFactsSet = Array.from(new Set(generatedFactsList));
|
|
22
37
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
}
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
let generatedConceptsSet = Array.from(new Set(generatedConceptsList));
|
|
34
|
-
let generatedFactsSet = Array.from(new Set(generatedFactsList));
|
|
35
|
-
|
|
36
|
-
remainingConcepts = allConcepts.filter(
|
|
37
|
-
(item) => !generatedConceptsSet.includes(item)
|
|
38
|
-
);
|
|
39
|
-
remainingFacts = allFacts.filter(
|
|
40
|
-
(item) => !generatedFactsSet.includes(item)
|
|
41
|
-
);
|
|
42
|
-
}
|
|
43
|
-
|
|
38
|
+
remainingConcepts = allConcepts.filter(
|
|
39
|
+
(item) => !generatedConceptsSet.includes(item)
|
|
40
|
+
);
|
|
41
|
+
remainingFacts = allFacts.filter(
|
|
42
|
+
(item) => !generatedFactsSet.includes(item)
|
|
43
|
+
);
|
|
44
|
+
}
|
|
44
45
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
remainingConcepts: remainingConcepts,
|
|
51
|
-
remainingFacts: remainingFacts,
|
|
52
|
-
};
|
|
53
|
-
}
|
|
46
|
+
return {
|
|
47
|
+
remainingConcepts: remainingConcepts,
|
|
48
|
+
remainingFacts: remainingFacts,
|
|
49
|
+
};
|
|
50
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import qdrantClient from "../services/qdrant_service";
|
|
2
|
+
|
|
3
|
+
const createCollection = async (collectionName: string) => {
|
|
4
|
+
await qdrantClient.createCollection(collectionName, {
|
|
5
|
+
vectors: {
|
|
6
|
+
size: 1536,
|
|
7
|
+
distance: "Cosine",
|
|
8
|
+
},
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
const getCollection = async () => {
|
|
13
|
+
const collection = await qdrantClient.getCollections();
|
|
14
|
+
return collection;
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
const getCorrespondingConcepts = async (
|
|
18
|
+
collectionName: string,
|
|
19
|
+
embeddings: {
|
|
20
|
+
text: string;
|
|
21
|
+
id: string;
|
|
22
|
+
type: string;
|
|
23
|
+
reference: string;
|
|
24
|
+
embedding: number[];
|
|
25
|
+
}[],
|
|
26
|
+
threshold: number
|
|
27
|
+
) => {
|
|
28
|
+
try {
|
|
29
|
+
const searchQuery = embeddings.map((e) => {
|
|
30
|
+
return {
|
|
31
|
+
query: e.embedding,
|
|
32
|
+
limit: 1,
|
|
33
|
+
score_threshold: threshold,
|
|
34
|
+
with_payload: true,
|
|
35
|
+
};
|
|
36
|
+
});
|
|
37
|
+
const results = await qdrantClient.queryBatch(collectionName, {
|
|
38
|
+
searches: searchQuery,
|
|
39
|
+
});
|
|
40
|
+
return results;
|
|
41
|
+
} catch (error) {
|
|
42
|
+
console.log(error);
|
|
43
|
+
throw error;
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
const addEmbeddingsToCollection = async (
|
|
48
|
+
collectionName: string,
|
|
49
|
+
embeddings: {
|
|
50
|
+
id: string;
|
|
51
|
+
vector: number[];
|
|
52
|
+
payload: {
|
|
53
|
+
_sources: string[];
|
|
54
|
+
text: string;
|
|
55
|
+
};
|
|
56
|
+
}[]
|
|
57
|
+
) => {
|
|
58
|
+
const CHUNK_SIZE = 1000;
|
|
59
|
+
for (let i = 0; i < embeddings.length; i += CHUNK_SIZE) {
|
|
60
|
+
const batch = embeddings.slice(i, i + CHUNK_SIZE);
|
|
61
|
+
await qdrantClient.upsert(
|
|
62
|
+
collectionName,
|
|
63
|
+
|
|
64
|
+
{
|
|
65
|
+
wait: true,
|
|
66
|
+
points: batch,
|
|
67
|
+
}
|
|
68
|
+
);
|
|
69
|
+
}
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
export {
|
|
73
|
+
createCollection,
|
|
74
|
+
getCollection,
|
|
75
|
+
addEmbeddingsToCollection,
|
|
76
|
+
getCorrespondingConcepts as getSimilarConcepts,
|
|
77
|
+
};
|
|
@@ -1,5 +1,13 @@
|
|
|
1
|
+
import { MongoConceptFactCards } from "../../types/mongo_concept_fact_type";
|
|
2
|
+
import { RawClozeCardResponseType } from "../../types/raw_card_response_types/generated_card_response_type";
|
|
3
|
+
|
|
1
4
|
export class ParseClozeCard {
|
|
2
|
-
parse(data:
|
|
5
|
+
parse(data: {
|
|
6
|
+
card_content: RawClozeCardResponseType;
|
|
7
|
+
type: string;
|
|
8
|
+
concepts_facts: MongoConceptFactCards[];
|
|
9
|
+
bloom_level: number;
|
|
10
|
+
}) {
|
|
3
11
|
try {
|
|
4
12
|
const content = data.card_content;
|
|
5
13
|
let correctOptions = content.correct_options;
|
|
@@ -36,9 +44,9 @@ export class ParseClozeCard {
|
|
|
36
44
|
question: finalQuestion,
|
|
37
45
|
options: finalParsedOptions,
|
|
38
46
|
},
|
|
39
|
-
|
|
40
|
-
facts: data.facts,
|
|
47
|
+
concepts_facts: data.concepts_facts,
|
|
41
48
|
explanation: data.card_content.explanation,
|
|
49
|
+
bloom_level: data.bloom_level,
|
|
42
50
|
};
|
|
43
51
|
|
|
44
52
|
return this._validateCloze(clozeCardData);
|
|
@@ -1,5 +1,13 @@
|
|
|
1
|
+
import { MongoConceptFactCards } from "../../types/mongo_concept_fact_type";
|
|
2
|
+
import { RawFlashCardResponseType } from "../../types/raw_card_response_types/generated_card_response_type";
|
|
3
|
+
|
|
1
4
|
export class ParseFlashCard {
|
|
2
|
-
parse(data:
|
|
5
|
+
parse(data: {
|
|
6
|
+
card_content: RawFlashCardResponseType;
|
|
7
|
+
type: string;
|
|
8
|
+
concepts_facts: MongoConceptFactCards[];
|
|
9
|
+
bloom_level: number;
|
|
10
|
+
}) {
|
|
3
11
|
try {
|
|
4
12
|
let displayTitle = this.generateFlashCardDisplayTitle(
|
|
5
13
|
data.card_content.front,
|
|
@@ -8,7 +16,7 @@ export class ParseFlashCard {
|
|
|
8
16
|
let flashCardData = {
|
|
9
17
|
type: {
|
|
10
18
|
category: "learning",
|
|
11
|
-
sub_type:
|
|
19
|
+
sub_type: "flash",
|
|
12
20
|
},
|
|
13
21
|
heading: "",
|
|
14
22
|
displayTitle: displayTitle,
|
|
@@ -16,9 +24,9 @@ export class ParseFlashCard {
|
|
|
16
24
|
front_content: data.card_content.front,
|
|
17
25
|
back_content: data.card_content.back,
|
|
18
26
|
},
|
|
19
|
-
|
|
27
|
+
concepts_facts: data.concepts_facts,
|
|
20
28
|
explanation: data.card_content.explanation,
|
|
21
|
-
|
|
29
|
+
bloom_level: data.bloom_level,
|
|
22
30
|
};
|
|
23
31
|
|
|
24
32
|
return flashCardData;
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import { match } from "assert";
|
|
2
|
+
import { RawMatchCardResponseType } from "../../types/raw_card_response_types/generated_card_response_type";
|
|
3
|
+
import { MongoConceptFactCards } from "../../types/mongo_concept_fact_type";
|
|
2
4
|
|
|
3
5
|
type InputItem = {
|
|
4
6
|
left_item: string;
|
|
@@ -11,7 +13,12 @@ type OutputItem = {
|
|
|
11
13
|
};
|
|
12
14
|
|
|
13
15
|
export class ParseMatchCard {
|
|
14
|
-
parse(cardData:
|
|
16
|
+
parse(cardData: {
|
|
17
|
+
card_content: RawMatchCardResponseType;
|
|
18
|
+
type: string;
|
|
19
|
+
concepts_facts: MongoConceptFactCards[];
|
|
20
|
+
bloom_level: number;
|
|
21
|
+
}) {
|
|
15
22
|
try {
|
|
16
23
|
let content = cardData.card_content;
|
|
17
24
|
const finalContent = this._parseMatchContent(content);
|
|
@@ -26,9 +33,8 @@ export class ParseMatchCard {
|
|
|
26
33
|
content: finalContent,
|
|
27
34
|
// content: cardData.card_content,
|
|
28
35
|
displayTitle: displayTitle,
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
explanation: cardData.card_content.explanation,
|
|
36
|
+
concepts_facts: cardData.concepts_facts,
|
|
37
|
+
bloom_level: cardData.bloom_level,
|
|
32
38
|
};
|
|
33
39
|
|
|
34
40
|
return this._validateMatch(matchCard);
|