only_ever_generator 0.9.5 → 0.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bootstrap/app.js +120 -0
- package/dist/card_gen/generate_cards.js +59 -0
- package/dist/config.js +9 -0
- package/dist/constants/api_constants.js +10 -0
- package/dist/constants/prompt_data.js +302 -0
- package/dist/constants/prompts/card_gen_prompt.js +167 -0
- package/dist/constants/prompts/typology_prompt.js +138 -0
- package/dist/constants/source_data.js +973 -0
- package/dist/embedding_generation/consolidation/global_consolidation.js +75 -0
- package/dist/embedding_generation/consolidation/local_consolidation.js +104 -0
- package/dist/embedding_generation/consolidation/write_consolidated_data.js +68 -0
- package/dist/embedding_generation/generate_embeddings.js +53 -0
- package/dist/embedding_generation/parse_embedding_response.js +28 -0
- package/dist/gap_fill/calculate_gap_fill.js +42 -0
- package/dist/helper/qdrant_db_methods.js +62 -0
- package/dist/index.js +96 -0
- package/dist/logger.js +41 -0
- package/dist/parse/parse_card/parse_cloze_card.js +125 -0
- package/dist/parse/parse_card/parse_flash_cards.js +33 -0
- package/dist/parse/parse_card/parse_match_card.js +81 -0
- package/dist/parse/parse_card/parse_mcq_card.js +103 -0
- package/dist/parse/parse_card_response.js +99 -0
- package/dist/parse/parse_source_content.js +185 -0
- package/dist/parse/response_format_card.js +371 -0
- package/dist/parse/response_format_typology.js +46 -0
- package/dist/services/open_ai_service.js +91 -0
- package/dist/services/qdrant_service.js +13 -0
- package/dist/typology-parsed-response.js +1935 -0
- package/dist/typology_gen/generate_typology.js +103 -0
- package/dist/utils/generate_args.js +27 -0
- package/dist/utils/parse_openai_response.js +23 -0
- package/package.json +2 -2
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ParseClozeCard = void 0;
|
|
4
|
+
class ParseClozeCard {
|
|
5
|
+
parse(data) {
|
|
6
|
+
try {
|
|
7
|
+
const content = data.card_content;
|
|
8
|
+
let correctOptions = content.correct_options;
|
|
9
|
+
let incorrectOptions = content.incorrect_options;
|
|
10
|
+
let allOptions = [...correctOptions, ...incorrectOptions];
|
|
11
|
+
let displayTitle = this._generateClozeCardDisplayTitle(data.card_content.prompt, allOptions);
|
|
12
|
+
let preparedData = this._prepareQuestionAndCorrectAnswers(content.prompt, correctOptions);
|
|
13
|
+
let finalQuestion = preparedData.prompt;
|
|
14
|
+
let parsedCorrectOptions = preparedData.options;
|
|
15
|
+
let parsedIncorrectoptions = incorrectOptions.map((e) => {
|
|
16
|
+
return {
|
|
17
|
+
option: e,
|
|
18
|
+
cloze: "null",
|
|
19
|
+
};
|
|
20
|
+
});
|
|
21
|
+
const finalParsedOptions = [
|
|
22
|
+
...parsedCorrectOptions,
|
|
23
|
+
...parsedIncorrectoptions,
|
|
24
|
+
];
|
|
25
|
+
let clozeCardData = {
|
|
26
|
+
type: {
|
|
27
|
+
category: "learning",
|
|
28
|
+
sub_type: data.type,
|
|
29
|
+
},
|
|
30
|
+
heading: "",
|
|
31
|
+
displayTitle: displayTitle,
|
|
32
|
+
content: {
|
|
33
|
+
question: finalQuestion,
|
|
34
|
+
options: finalParsedOptions,
|
|
35
|
+
},
|
|
36
|
+
concepts: data.concepts,
|
|
37
|
+
facts: data.facts,
|
|
38
|
+
explanation: data.card_content.explanation,
|
|
39
|
+
};
|
|
40
|
+
return this._validateCloze(clozeCardData);
|
|
41
|
+
}
|
|
42
|
+
catch (e) {
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
_generateClozeCardDisplayTitle(question, answers) {
|
|
47
|
+
try {
|
|
48
|
+
let optionsString = "";
|
|
49
|
+
if (answers.length !== 0) {
|
|
50
|
+
optionsString = answers.join(", ");
|
|
51
|
+
}
|
|
52
|
+
return `${question} ---- ${optionsString}`;
|
|
53
|
+
}
|
|
54
|
+
catch (e) {
|
|
55
|
+
throw Error("Error in generating display title");
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
/// validate the cloze card
|
|
59
|
+
// 1. Has Empty cloze
|
|
60
|
+
// 2. has Duplicate Clozes
|
|
61
|
+
// 3. doesnt have any valid option,
|
|
62
|
+
// 4. Question length 320
|
|
63
|
+
// 5. More than 6 options
|
|
64
|
+
// 6. Less than 2 options
|
|
65
|
+
// 7. Max character for individual cloze: 90
|
|
66
|
+
_prepareQuestionAndCorrectAnswers(rawPrompt, correctOptions) {
|
|
67
|
+
try {
|
|
68
|
+
var finalCorrectOptions = [];
|
|
69
|
+
const regex = /{{(.*?)}}/g;
|
|
70
|
+
const transformed = rawPrompt.replace(regex, (match, p1) => {
|
|
71
|
+
// p1 is the captured group inside {{ }} (e.g., "fruit", "green")
|
|
72
|
+
const idx = correctOptions.indexOf(p1);
|
|
73
|
+
if (idx !== -1) {
|
|
74
|
+
let cloze = `c${idx}`;
|
|
75
|
+
finalCorrectOptions.push({
|
|
76
|
+
option: p1,
|
|
77
|
+
cloze: cloze,
|
|
78
|
+
});
|
|
79
|
+
return `{{c${idx}:${p1}}}`;
|
|
80
|
+
}
|
|
81
|
+
return match; // If not found in correct_options, leave as is or handle accordingly
|
|
82
|
+
});
|
|
83
|
+
return {
|
|
84
|
+
prompt: transformed,
|
|
85
|
+
options: finalCorrectOptions,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
catch (e) {
|
|
89
|
+
throw Error("Error in preparing question and correct answers");
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
_validateCloze(clozeCard) {
|
|
93
|
+
var _a;
|
|
94
|
+
let clozeRegex = /\{\{c(\d+):([^}]+)\}\}/g;
|
|
95
|
+
try {
|
|
96
|
+
/// validate emptu cloze
|
|
97
|
+
let options = (_a = clozeCard.content.options) !== null && _a !== void 0 ? _a : [];
|
|
98
|
+
let question = clozeCard.content.question;
|
|
99
|
+
if (options.length < 2 || options.length > 6) {
|
|
100
|
+
throw Error("Number of cloze options are invalid");
|
|
101
|
+
}
|
|
102
|
+
/// There are no correct clozes// or null cloze or empty cloze
|
|
103
|
+
let correctOptions = options.find((e) => e.cloze != "null" && e.cloze != null && e.cloze.trim() != "");
|
|
104
|
+
if (correctOptions) {
|
|
105
|
+
}
|
|
106
|
+
else {
|
|
107
|
+
throw Error(" No valid clozes exists");
|
|
108
|
+
}
|
|
109
|
+
let rightClozes = options.filter((e) => e.cloze.startsWith("c"));
|
|
110
|
+
/// matches our cloze syntax
|
|
111
|
+
let clozeMatches = [...question.matchAll(clozeRegex)];
|
|
112
|
+
if (clozeMatches.length == 0) {
|
|
113
|
+
throw Error("Question Invalid");
|
|
114
|
+
}
|
|
115
|
+
else if (clozeMatches.length != rightClozes.length) {
|
|
116
|
+
throw Error(" Clozes in question doesnt match to clozes in options");
|
|
117
|
+
}
|
|
118
|
+
return clozeCard;
|
|
119
|
+
}
|
|
120
|
+
catch (e) {
|
|
121
|
+
throw Error(`Error in validating cloze card ${e.message}`);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
exports.ParseClozeCard = ParseClozeCard;
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ParseFlashCard = void 0;
|
|
4
|
+
class ParseFlashCard {
|
|
5
|
+
parse(data) {
|
|
6
|
+
try {
|
|
7
|
+
let displayTitle = this.generateFlashCardDisplayTitle(data.card_content.front, data.card_content.back);
|
|
8
|
+
let flashCardData = {
|
|
9
|
+
type: {
|
|
10
|
+
category: "learning",
|
|
11
|
+
sub_type: data.type,
|
|
12
|
+
},
|
|
13
|
+
heading: "",
|
|
14
|
+
displayTitle: displayTitle,
|
|
15
|
+
content: {
|
|
16
|
+
front_content: data.card_content.front,
|
|
17
|
+
back_content: data.card_content.back,
|
|
18
|
+
},
|
|
19
|
+
concepts: data.concepts,
|
|
20
|
+
explanation: data.card_content.explanation,
|
|
21
|
+
facts: data.facts,
|
|
22
|
+
};
|
|
23
|
+
return flashCardData;
|
|
24
|
+
}
|
|
25
|
+
catch (e) {
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
generateFlashCardDisplayTitle(front, back) {
|
|
30
|
+
return `${front} ---- ${back}`;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
exports.ParseFlashCard = ParseFlashCard;
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ParseMatchCard = void 0;
|
|
4
|
+
class ParseMatchCard {
|
|
5
|
+
constructor() {
|
|
6
|
+
this._parseMatchContent = (input) => {
|
|
7
|
+
const grouped = input.reduce((acc, { left_item, right_item }) => {
|
|
8
|
+
if (!acc[left_item]) {
|
|
9
|
+
acc[left_item] = { left_item, right_item: [] };
|
|
10
|
+
}
|
|
11
|
+
acc[left_item].right_item.push(right_item);
|
|
12
|
+
return acc;
|
|
13
|
+
}, {});
|
|
14
|
+
return Object.values(grouped);
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
parse(cardData) {
|
|
18
|
+
try {
|
|
19
|
+
let content = cardData.card_content;
|
|
20
|
+
const finalContent = this._parseMatchContent(content);
|
|
21
|
+
let displayTitle = this._generateMatchCardDisplayTitle(content);
|
|
22
|
+
let matchCard = {
|
|
23
|
+
type: {
|
|
24
|
+
category: "learning",
|
|
25
|
+
sub_type: cardData.type,
|
|
26
|
+
},
|
|
27
|
+
heading: "",
|
|
28
|
+
content: finalContent,
|
|
29
|
+
// content: cardData.card_content,
|
|
30
|
+
displayTitle: displayTitle,
|
|
31
|
+
concepts: cardData.concepts,
|
|
32
|
+
facts: cardData.facts,
|
|
33
|
+
explanation: cardData.card_content.explanation,
|
|
34
|
+
};
|
|
35
|
+
return this._validateMatch(matchCard);
|
|
36
|
+
}
|
|
37
|
+
catch (e) {
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
_generateMatchCardDisplayTitle(answers) {
|
|
42
|
+
let titles = [];
|
|
43
|
+
let counter = 65;
|
|
44
|
+
for (let data of answers) {
|
|
45
|
+
let value = data.right_item;
|
|
46
|
+
let leftData = data.left_item;
|
|
47
|
+
let letter = String.fromCharCode(counter);
|
|
48
|
+
titles.push(`${letter}. ${leftData} -- ${value}`);
|
|
49
|
+
counter++;
|
|
50
|
+
}
|
|
51
|
+
let displayTitle = titles.join(",");
|
|
52
|
+
return displayTitle;
|
|
53
|
+
}
|
|
54
|
+
_validateMatch(matchCard) {
|
|
55
|
+
let matches = matchCard.content;
|
|
56
|
+
let content = [];
|
|
57
|
+
try {
|
|
58
|
+
if (matches.length < 1 || matches.length > 8) {
|
|
59
|
+
throw Error("Invalid number of matches");
|
|
60
|
+
}
|
|
61
|
+
for (let elem of matches) {
|
|
62
|
+
if (elem.left_item.length <= 30 && elem.left_item.length != 0) {
|
|
63
|
+
if (elem.right_item.length <= 40 && elem.right_item.length != 0) {
|
|
64
|
+
content.push(elem);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
if (content.length >= 2) {
|
|
69
|
+
matchCard.content = content;
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
throw Error("Invalid content");
|
|
73
|
+
}
|
|
74
|
+
return matchCard;
|
|
75
|
+
}
|
|
76
|
+
catch (e) {
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
exports.ParseMatchCard = ParseMatchCard;
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ParseMcqCard = void 0;
|
|
4
|
+
class ParseMcqCard {
|
|
5
|
+
parse(data) {
|
|
6
|
+
try {
|
|
7
|
+
let mcqAnswers = [];
|
|
8
|
+
if (data.card_content.choices !== undefined &&
|
|
9
|
+
data.card_content.choices.length != 0) {
|
|
10
|
+
for (let choice of data.card_content.choices) {
|
|
11
|
+
let answer = {
|
|
12
|
+
answer: choice.choice,
|
|
13
|
+
is_correct: choice.is_correct,
|
|
14
|
+
};
|
|
15
|
+
mcqAnswers.push(answer);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
let displayTitle = this._generateMcqCardDisplayTitle(data.card_content.prompt, mcqAnswers);
|
|
19
|
+
let mcqCard = {
|
|
20
|
+
type: {
|
|
21
|
+
category: "learning",
|
|
22
|
+
sub_type: data.type,
|
|
23
|
+
},
|
|
24
|
+
heading: "",
|
|
25
|
+
displayTitle: displayTitle,
|
|
26
|
+
content: {
|
|
27
|
+
question: data.card_content.prompt,
|
|
28
|
+
answers: mcqAnswers,
|
|
29
|
+
},
|
|
30
|
+
concepts: data.concepts,
|
|
31
|
+
facts: data.facts,
|
|
32
|
+
explanation: data.card_content.explanation,
|
|
33
|
+
};
|
|
34
|
+
// return mcqCard;
|
|
35
|
+
const isValid = this._validate(mcqCard);
|
|
36
|
+
if (isValid == true) {
|
|
37
|
+
return mcqCard;
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
catch (e) {
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
_generateMcqCardDisplayTitle(question, answers) {
|
|
48
|
+
let answersString = [];
|
|
49
|
+
if (answers.length != 0) {
|
|
50
|
+
for (let option of answers) {
|
|
51
|
+
let currentIndex = answers.indexOf(option) + 1;
|
|
52
|
+
let temp = `${currentIndex} . ${option.answer} `;
|
|
53
|
+
answersString.push(temp);
|
|
54
|
+
}
|
|
55
|
+
let resultString = answersString.join("");
|
|
56
|
+
let finalDisplayTitle = `${question} ---- ${resultString}`;
|
|
57
|
+
return finalDisplayTitle;
|
|
58
|
+
}
|
|
59
|
+
else {
|
|
60
|
+
return question;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
/// validate mcq card
|
|
64
|
+
// 1. Check if atleast 1 correct answer exists
|
|
65
|
+
// 2. Length of answer shouldnt exceed 24 chars
|
|
66
|
+
// 3. Length of question shouldnt exceed 90 chars
|
|
67
|
+
// 4. If Any option is Empty
|
|
68
|
+
_validate(mcqCard) {
|
|
69
|
+
var _a;
|
|
70
|
+
try {
|
|
71
|
+
let isQuestionValid = mcqCard.content.question.length <= 90;
|
|
72
|
+
if (!isQuestionValid) {
|
|
73
|
+
throw new Error("Question length exceeded");
|
|
74
|
+
}
|
|
75
|
+
/// check if all are wrong answers
|
|
76
|
+
let exists = this._checkIfAllAnswersAreWrong(mcqCard.content.answers);
|
|
77
|
+
if (exists) {
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
throw new Error("Every answers are wrong");
|
|
81
|
+
}
|
|
82
|
+
/// check if answers are of length <40 or is 0
|
|
83
|
+
let answerWhoseLengthisGreaterThan40or0 = ((_a = mcqCard.content.answers) !== null && _a !== void 0 ? _a : []).find((e) => e.answer.length == 0 || e.answer.length > 40);
|
|
84
|
+
if (answerWhoseLengthisGreaterThan40or0) {
|
|
85
|
+
throw new Error("Option has length more than 40 or is Empty");
|
|
86
|
+
}
|
|
87
|
+
return true;
|
|
88
|
+
}
|
|
89
|
+
catch (e) {
|
|
90
|
+
return false;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
_checkIfAllAnswersAreWrong(answers) {
|
|
94
|
+
let rightAnswer = answers.find((e) => e.is_correct == true);
|
|
95
|
+
if (rightAnswer) {
|
|
96
|
+
return true;
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
return false;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
exports.ParseMcqCard = ParseMcqCard;
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ParseCardResponse = void 0;
|
|
4
|
+
const parse_cloze_card_1 = require("./parse_card/parse_cloze_card");
|
|
5
|
+
const parse_flash_cards_1 = require("./parse_card/parse_flash_cards");
|
|
6
|
+
const parse_match_card_1 = require("./parse_card/parse_match_card");
|
|
7
|
+
const parse_mcq_card_1 = require("./parse_card/parse_mcq_card");
|
|
8
|
+
class ParseCardResponse {
|
|
9
|
+
parse(generatedData, isGapFill, sourceTaxonomy) {
|
|
10
|
+
let usage_data = generatedData.metadata;
|
|
11
|
+
try {
|
|
12
|
+
const cardData = [];
|
|
13
|
+
const unparsedTestCards = generatedData.generated_content.test_cards;
|
|
14
|
+
const type = generatedData.type;
|
|
15
|
+
if (unparsedTestCards !== undefined && unparsedTestCards.length != 0) {
|
|
16
|
+
for (let elem of unparsedTestCards) {
|
|
17
|
+
const managedCardConcepts = this._mapIdToConcepts(elem.concepts_facts, sourceTaxonomy.concepts_facts);
|
|
18
|
+
elem.concepts = managedCardConcepts;
|
|
19
|
+
if (elem.type == "flash") {
|
|
20
|
+
const flashCard = new parse_flash_cards_1.ParseFlashCard().parse(elem);
|
|
21
|
+
if (flashCard != null && flashCard) {
|
|
22
|
+
flashCard.heading = this._getCardReference(flashCard, sourceTaxonomy);
|
|
23
|
+
cardData.push(flashCard);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
else if (elem.type == "mcq") {
|
|
27
|
+
const mcqCard = new parse_mcq_card_1.ParseMcqCard().parse(elem);
|
|
28
|
+
if (mcqCard != null && mcqCard) {
|
|
29
|
+
mcqCard.heading = this._getCardReference(mcqCard, sourceTaxonomy);
|
|
30
|
+
cardData.push(mcqCard);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
else if (elem.type == "cloze") {
|
|
34
|
+
const clozeCard = new parse_cloze_card_1.ParseClozeCard().parse(elem);
|
|
35
|
+
if (clozeCard && clozeCard != null) {
|
|
36
|
+
clozeCard.heading = this._getCardReference(clozeCard, sourceTaxonomy);
|
|
37
|
+
cardData.push(clozeCard);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
else if (elem.type == "match") {
|
|
41
|
+
const matchCard = new parse_match_card_1.ParseMatchCard().parse(elem);
|
|
42
|
+
if (matchCard && matchCard != null) {
|
|
43
|
+
matchCard.heading = this._getCardReference(matchCard, sourceTaxonomy);
|
|
44
|
+
cardData.push(matchCard);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
if (!isGapFill) {
|
|
51
|
+
usage_data.status = "failed";
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
if (cardData.length == 0) {
|
|
55
|
+
usage_data.status = "failed";
|
|
56
|
+
}
|
|
57
|
+
return {
|
|
58
|
+
status_code: cardData.length == 0 ? 400 : 200,
|
|
59
|
+
metadata: usage_data,
|
|
60
|
+
type: type,
|
|
61
|
+
cards_data: cardData,
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
catch (e) {
|
|
65
|
+
return {
|
|
66
|
+
status_code: 500,
|
|
67
|
+
metadata: usage_data,
|
|
68
|
+
type: generatedData.type,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
_getCardReference(generatedCardData, sourceTaxonomy) {
|
|
73
|
+
var _a, _b;
|
|
74
|
+
const cardConcepts = ((_a = generatedCardData.concepts) !== null && _a !== void 0 ? _a : []).map((e) => e.text);
|
|
75
|
+
const sourceConceptsFacts = (_b = sourceTaxonomy.concepts_facts) !== null && _b !== void 0 ? _b : [];
|
|
76
|
+
const firstMatchedConcept = sourceConceptsFacts.find((elem) => cardConcepts.includes(elem.text));
|
|
77
|
+
if (firstMatchedConcept) {
|
|
78
|
+
return firstMatchedConcept.reference;
|
|
79
|
+
}
|
|
80
|
+
else {
|
|
81
|
+
return "";
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
_mapIdToConcepts(cardConcepts, sourceConceptsFacts) {
|
|
85
|
+
// const cardData = cardC
|
|
86
|
+
const managedCardConcepts = [];
|
|
87
|
+
for (const cardConcept of cardConcepts) {
|
|
88
|
+
const matchedConcept = sourceConceptsFacts.find((e) => e.text.toLowerCase().trim() == cardConcept.text.toLowerCase().trim());
|
|
89
|
+
if (matchedConcept) {
|
|
90
|
+
managedCardConcepts.push({
|
|
91
|
+
id: matchedConcept.id,
|
|
92
|
+
text: matchedConcept.text,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
return managedCardConcepts;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
exports.ParseCardResponse = ParseCardResponse;
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ParseSourceContent = void 0;
|
|
4
|
+
class ParseSourceContent {
|
|
5
|
+
constructor(sourceContent) {
|
|
6
|
+
/// Format of Content
|
|
7
|
+
// content: {
|
|
8
|
+
// title: source.title,
|
|
9
|
+
// headings: source.headings,
|
|
10
|
+
// content: source.content,
|
|
11
|
+
// fields: fields,
|
|
12
|
+
// taxonomy: source.source_taxonomy,
|
|
13
|
+
// type: source.source_type
|
|
14
|
+
// },
|
|
15
|
+
this.titles_to_remove = [
|
|
16
|
+
"See also",
|
|
17
|
+
"References",
|
|
18
|
+
"Further reading",
|
|
19
|
+
"External links",
|
|
20
|
+
"Notes and references",
|
|
21
|
+
"Bibliography",
|
|
22
|
+
"Notes",
|
|
23
|
+
"Cited sources",
|
|
24
|
+
];
|
|
25
|
+
this.block_types_toremove = ["table", "empty_line"];
|
|
26
|
+
this.content = sourceContent;
|
|
27
|
+
}
|
|
28
|
+
parseData() {
|
|
29
|
+
let sourceType = this.content.type;
|
|
30
|
+
let afterSanitized;
|
|
31
|
+
if (sourceType == "video") {
|
|
32
|
+
afterSanitized = this.parseVideoContent(this.content.content);
|
|
33
|
+
}
|
|
34
|
+
else {
|
|
35
|
+
let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(this.content.content);
|
|
36
|
+
afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
|
|
37
|
+
}
|
|
38
|
+
return {
|
|
39
|
+
type: this.content.type,
|
|
40
|
+
title: this.content.title,
|
|
41
|
+
content: afterSanitized,
|
|
42
|
+
headings: this.content.headings,
|
|
43
|
+
taxonomy: this.content.taxonomy,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
removeSectionsByTitle(data) {
|
|
47
|
+
let dataAfterRemoving = [];
|
|
48
|
+
for (let elem of data) {
|
|
49
|
+
if (elem.block_type == "heading" &&
|
|
50
|
+
this.titles_to_remove.includes(elem.content)) {
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
53
|
+
/// remove unwanted blcok types , for now `table` and `empty_line`
|
|
54
|
+
if (this.block_types_toremove.includes(elem.block_type)) {
|
|
55
|
+
continue;
|
|
56
|
+
}
|
|
57
|
+
if (elem.children) {
|
|
58
|
+
elem.children = this.removeSectionsByTitle(elem.children);
|
|
59
|
+
}
|
|
60
|
+
dataAfterRemoving.push(elem);
|
|
61
|
+
}
|
|
62
|
+
return dataAfterRemoving;
|
|
63
|
+
}
|
|
64
|
+
sanitizeTextContent(content) {
|
|
65
|
+
// Remove newline characters
|
|
66
|
+
content = content.replace(/\\n/g, " ");
|
|
67
|
+
// Remove internal link references, keeping only the link text
|
|
68
|
+
// Pattern explanation: [[link|text|index|wiki]] --> text
|
|
69
|
+
content = content.replace(/\[\[.*?\|(.*?)\|.*?\|wiki\]\]/g, "$1");
|
|
70
|
+
// Remove external links, keeping only the link text
|
|
71
|
+
// Pattern explanation: [url text] --> text
|
|
72
|
+
content = content.replace(/\[http[s]?:\/\/[^\s]+ ([^\]]+)\]/g, "$1");
|
|
73
|
+
// Remove Markdown link references, keeping only the link text
|
|
74
|
+
// Pattern explanation:  --> link text
|
|
75
|
+
content = content.replace(/\!\[([^\]]+)\]\([^\)]+\)/g, "$1");
|
|
76
|
+
return content;
|
|
77
|
+
}
|
|
78
|
+
sanitizeBlocks(blocks) {
|
|
79
|
+
let sanitizedBlocks = [];
|
|
80
|
+
blocks = blocks.filter((item) => item.block_type != "table");
|
|
81
|
+
blocks.forEach((block) => {
|
|
82
|
+
let sanitizedBlock = {};
|
|
83
|
+
for (let key in block) {
|
|
84
|
+
let value = block[key];
|
|
85
|
+
if (typeof value === "string") {
|
|
86
|
+
sanitizedBlock[key] = this.sanitizeTextContent(value);
|
|
87
|
+
}
|
|
88
|
+
else if (Array.isArray(value)) {
|
|
89
|
+
sanitizedBlock[key] = this.sanitizeBlocks(value);
|
|
90
|
+
}
|
|
91
|
+
else {
|
|
92
|
+
sanitizedBlock[key] = value;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
sanitizedBlocks.push(sanitizedBlock);
|
|
96
|
+
});
|
|
97
|
+
return sanitizedBlocks;
|
|
98
|
+
}
|
|
99
|
+
parseVideoContent(data) {
|
|
100
|
+
let finalChapters = [];
|
|
101
|
+
// let cleanedData = this.cleanTranscript(timeCodes);
|
|
102
|
+
data.forEach((e) => {
|
|
103
|
+
let combinedContent = this.cleanTranscript(e);
|
|
104
|
+
finalChapters.push({
|
|
105
|
+
startTime: e.startTime,
|
|
106
|
+
endTime: e.endTime,
|
|
107
|
+
content: combinedContent,
|
|
108
|
+
title: e.content,
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
return finalChapters;
|
|
112
|
+
}
|
|
113
|
+
// remove content inside [] which denotes non-speech sounds
|
|
114
|
+
isNonSpeech(content) {
|
|
115
|
+
// Check if the content is non-speech (enclosed in square brackets).
|
|
116
|
+
return /^\[.*\]$/.test(content.trim());
|
|
117
|
+
}
|
|
118
|
+
// remove non-essential content
|
|
119
|
+
cleanTranscript(data) {
|
|
120
|
+
var _a;
|
|
121
|
+
let finalContent = "";
|
|
122
|
+
let children = (_a = data.children) !== null && _a !== void 0 ? _a : [];
|
|
123
|
+
children.forEach((e) => {
|
|
124
|
+
let content = (e.content || "").trim();
|
|
125
|
+
if (this.isNonSpeech(content))
|
|
126
|
+
return;
|
|
127
|
+
content = content.replace(/\s+/g, " ");
|
|
128
|
+
finalContent += content;
|
|
129
|
+
});
|
|
130
|
+
return finalContent;
|
|
131
|
+
}
|
|
132
|
+
// collapse the timecode to 30 seconds
|
|
133
|
+
collapseTimeCodes(data, maxDuration = 30.0) {
|
|
134
|
+
// Collapse time codes into buckets of approximately maxDuration seconds.
|
|
135
|
+
const collapsedData = [];
|
|
136
|
+
let bucketStartTime = null;
|
|
137
|
+
let bucketEndTime = null;
|
|
138
|
+
let bucketContent = [];
|
|
139
|
+
let bucketDuration = 0.0;
|
|
140
|
+
data.forEach((entry) => {
|
|
141
|
+
const startTime = entry.start_time;
|
|
142
|
+
const endTime = entry.end_time;
|
|
143
|
+
const content = entry.content;
|
|
144
|
+
const entryDuration = endTime - startTime;
|
|
145
|
+
if (bucketStartTime === null) {
|
|
146
|
+
// Start a new bucket
|
|
147
|
+
bucketStartTime = startTime;
|
|
148
|
+
bucketEndTime = endTime;
|
|
149
|
+
bucketContent.push(content);
|
|
150
|
+
bucketDuration = entryDuration;
|
|
151
|
+
}
|
|
152
|
+
else if (bucketDuration + entryDuration <= maxDuration) {
|
|
153
|
+
// Add to current bucket
|
|
154
|
+
bucketEndTime = endTime;
|
|
155
|
+
bucketContent.push(content);
|
|
156
|
+
bucketDuration += entryDuration;
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
// Close current bucket and start a new one
|
|
160
|
+
const collapsedEntry = {
|
|
161
|
+
start_time: bucketStartTime,
|
|
162
|
+
end_time: bucketEndTime,
|
|
163
|
+
content: bucketContent.join(" "),
|
|
164
|
+
};
|
|
165
|
+
collapsedData.push(collapsedEntry);
|
|
166
|
+
// Start new bucket with current entry
|
|
167
|
+
bucketStartTime = startTime;
|
|
168
|
+
bucketEndTime = endTime;
|
|
169
|
+
bucketContent = [content];
|
|
170
|
+
bucketDuration = entryDuration;
|
|
171
|
+
}
|
|
172
|
+
});
|
|
173
|
+
// Add the last bucket if it exists
|
|
174
|
+
if (bucketContent.length > 0) {
|
|
175
|
+
const collapsedEntry = {
|
|
176
|
+
start_time: bucketStartTime,
|
|
177
|
+
end_time: bucketEndTime,
|
|
178
|
+
content: bucketContent.join(" "),
|
|
179
|
+
};
|
|
180
|
+
collapsedData.push(collapsedEntry);
|
|
181
|
+
}
|
|
182
|
+
return collapsedData;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
exports.ParseSourceContent = ParseSourceContent;
|