npm - only_ever_generator - Versions diffs - 0.4.6 → 0.4.8 - Mend

only_ever_generator 0.4.6 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/bootstrap/app.js +7 -6
package/dist/constants/prompts/card_gen_prompt.js +49 -55
package/dist/constants/prompts/typology_prompt.js +4 -3
package/dist/parse/parse_source_content.js +105 -5
package/package.json +1 -1
package/src/bootstrap/app.ts +9 -7
package/src/constants/prompts/card_gen_prompt.ts +49 -55
package/src/constants/prompts/typology_prompt.ts +4 -3
package/src/parse/parse_source_content.ts +118 -4

package/dist/bootstrap/app.js CHANGED Viewed

@@ -60,10 +60,10 @@ class OnlyEverGenerator {
                         this.cardgenResponse = yield this.generateCard(this.promptForCardGen, JSON.stringify(this.typologyResponse), false);
                         responseToReturn.push(this.cardgenResponse);
                         /// check if gap fill is required ie coverage determination
-                        // if(this.cardgenResponse.status_code == 200) {
-                        //   this.gapFillResponse = await this._generationForGapFill(this.typologyResponse, this.cardgenResponse);
-                        //   responseToReturn.push(this.gapFillResponse);
-                        // }
+                        if (this.cardgenResponse.status_code == 200) {
+                            this.gapFillResponse = yield this._generationForGapFill(this.typologyResponse, this.cardgenResponse);
+                            responseToReturn.push(this.gapFillResponse);
+                        }
                     }
                 }
             return responseToReturn;
@@ -85,10 +85,11 @@ class OnlyEverGenerator {
             let response;
             if (gapFill.remainingConcepts.length !== 0 ||
                 gapFill.remainingFacts.length !== 0) {
+                this.typologyResponse.facts = gapFill.remainingFacts;
+                this.typologyResponse.concepts = gapFill.remainingConcepts;
                 response = yield this.generateCard(this.promptForCardGen +
                     "Generate cards only suitable for the given remaining concepts and facts" +
-                    JSON.stringify(gapFill) +
-                    "Exclude generating  cards with content in the following", JSON.stringify(cardGenData.cards_data), true);
+                    JSON.stringify(gapFill), "", true);
             }
             return response;
         });

package/dist/constants/prompts/card_gen_prompt.js CHANGED Viewed

@@ -1,7 +1,8 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.returnCardGenPrompt = returnCardGenPrompt;
-const promptString = `As a dedicated assistant at a learning company, your role is to analyze educational content and create test cards that help learners understand and remember key concepts and facts. You will be provided with:
+const promptString = `
+As a dedicated assistant at a learning company, your role is to analyze educational content and create test cards that help learners understand and remember key concepts and facts. You will be provided with:
 1. Title of the source
 2. Main headings
@@ -15,7 +16,7 @@ const promptString = `As a dedicated assistant at a learning company, your role
 2. Generate test cards for concepts: Take each concept and re-read the text under the reference heading for that concept. Start by trying to create a card that is at the highest bloom level possible (5 being the highest). Then work your way down to the lower bloom levels. Generate as many cards as possible for each concept. Keep going through the list of concepts till you have completed all of them.
 3. Generate test cards for facts: Take each fact and re-read the text under the reference heading for that fact. Generate as many cards as possible to test that concept. Keep going through the list of concepts till you have completed all of them.
-**Note:** Further detailed instructions on how to create the content for each test card type will be provided subsequently.
+**Note:** Further detailed instructions on how to create the content, references and bloom level for each test card type will be provided subsequently.
 **Format your response in the following JSON format:**
@@ -25,18 +26,12 @@ json
         {
             "type": "mcq" | "cloze" | "match",
             "card_content": "{content}",
-            "concepts": [
-                "concept1",
-                "concept2",
-                "..."
-            ],
-            "facts": [
-                "fact1",
-                "fact2",
-                "..."
-            ],
+            "concepts": [{concept1}, {concept2}, "..."],
+            "facts": [{fact1}, {fact2}, {...}],
             "bloom_level": 1 | 2 | 3 | 4 | 5
-        }
+        },
+        {... as many as possible}
     ]
 }
@@ -44,7 +39,7 @@ json
 **Criteria:**
 * Each test card must include at least one concept or fact.
-* Each concept and fact must have at least one test card.
+* Each concept and fact MUST HAVE at least one test card associated with it.
 * The final output should include test cards that cover the first 5 levels of Bloom's Taxonomy.
 **Further Instructions:**
@@ -86,23 +81,23 @@ json
     [
         {
             "concept_text": "concept1_content",
-            "reference": "source_title#main_heading"
+            "reference": "main_heading"
         },
         {
             "concept_text": "concept2_content",
-            "reference": "source_title#main_heading"
+            "reference": "main_heading"
         },
         {...}
     ],
     "facts":
     [
             {
-                "factt_text": "fact1_content",
-                "reference": "source_title#main_heading"
+                "fact_text": "fact1_content",
+                "reference": "main_heading"
             },
             {
                 "fact_text": "fact2_content",
-                "reference": "source_title#main_heading"
+                "reference": "main_heading"
             },
             {...}
     ],
@@ -135,92 +130,91 @@ json
     [
         {
             "concept_text": "concept1_content",
-            "reference": "source_title#main_heading"
+            "reference": "main_heading"
         },
         {
             "concept_text": "concept2_content",
-            "reference": "source_title#main_heading"
+            "reference": "main_heading"
         },
         {...}
     ],
     "facts":
     [
             {
-                "factt_text": "fact1_content",
-                "reference": "source_title#main_heading"
+                "fact_text": "fact1_content",
+                "reference": "main_heading"
             },
             {
                 "fact_text": "fact2_content",
-                "reference": "source_title#main_heading"
+                "reference": "main_heading"
             },
             {...}
     ],
     "bloom_level": <1-5>
 }
-•	Minimum choices required: 2
-•	Maximum choices allowed: 8
-•	Minimum correct choices required: 1
-•	Maximum character length for the prompt: 320
-•	Maximum character length for an individual cloze: 90
+* Minimum choices required: 2
+* Maximum choices allowed: 8
+* Minimum correct choices required: 1
+* Maximum character length for the prompt: 320
+* Maximum character length for an individual cloze: 90
 4.	Match: Pairing items.
 json
 {
     "type": "match",
-    "card_content": [
-       {
-        "left_item": "left choice",
-        "right_item": [right item]
-       },
-       {
-        "left_item":" left choice",
-        "right_item": [right item]
-       },
-       {
-        "left_item": "left choice",
-        "right_item": [right item]
-       },
-        "... up to 8 total pairs"
-    ],
+    "card_content" : [
+        {
+            "left_item" : "left_item text",
+            "right_item" : ["right_item text" ]
+        },
+        {
+            "left_item" : "left_item text",
+            "right_item" : ["right_item text"]
+        },
+        {"... up to 8 total pairs"}
+    ],
     "concepts":
     [
         {
             "concept_text": "concept1_content",
-            "reference": "source_title#main_heading"
+            "reference": "main_heading"
         },
         {
             "concept_text": "concept2_content",
-            "reference": "source_title#main_heading"
+            "reference": "main_heading"
         },
         {...}
     ],
     "facts":
     [
             {
-                "factt_text": "fact1_content",
-                "reference": "source_title#main_heading"
+                "fact_text": "fact1_content",
+                "reference": "main_heading"
             },
             {
                 "fact_text": "fact2_content",
-                "reference": "source_title#main_heading"
+                "reference": "main_heading"
             },
             {...}
     ],
     "bloom_level": <1-5>
 }
-•	Maximum character length for each item in a pair: 42
+* Maximum character length for each item in a pair: 42
+* Duplicate items are allowed on the left side but not on the right side. Or in other words the same item on the left can be paired with multiple items on the right.
-** Criteria **
-1. Ensure that you produce at least one if not more card for each concept and fact.
-2. For each concept and fact start by trying to create a card at the highest bloom level possible.
-3. Do not skip any concepts or facts, and be thorough in your coverage.
-4. Cards should span across different levels of Bloom’s Taxonomy, from level 1 (Remembering) to level 5 (Evaluating), but exclude level 6 (Creating)
+** Overall Criteria for Testing Cards **
+1. Each card should present the learner with a unique challenge that improves their learning.
+2. Ensure that you produce at least one if not more card for each concept and fact.
+3. For each concept and fact start by trying to create a card at the highest bloom level possible.
+4. Do not skip any concepts or facts, and be thorough in your coverage.
+5. Cards should span across different levels of Bloom’s Taxonomy, from level 1 (Remembering) to level 5 (Evaluating), but exclude level 6 (Creating).
 Once you are done generating the test cards. Go back and evaulate the full list of concepts and facts provided as the input.
 Are there any concept or fact that don't have a test card yet? If yes, go back and create one.

package/dist/constants/prompts/typology_prompt.js CHANGED Viewed

@@ -64,7 +64,7 @@ Extract key concepts within the content after classifying the field. This is a c
 2. **Inclusion Criteria**: Include a concept only if it is discussed in detail, meaning it is explained thoroughly, tied to specific examples, or highlighted as a critical part of the subject matter.
 3. **How to describe a concept**: The concept should be described so that a reader can comprehend the gist of it.
 4. **Character Limit**: Maintain a limit of 60 characters for the  to ensure each concept is concise yet informative.
-5. **Reference**: Every concept must include a reference. A reference can either be the entire source or a specific heading in the source. The reference indicates the part of the text that is most relevant for that particular concept. Whenever possible, pick a main heading to direct the user to the most relevant part of the source material. The reference schema is as follows: source_title#main_heading, where #main_heading is optional. If a concept needs to reference multiple sections or the entire source then simply leave the reference as empty.
+5. **Reference**: Every concept must include a reference. A reference can either be the entire source or a specific heading in the source. Whenever possible, pick a main heading to direct the user to the most relevant part of the source material. The heading must exactly match one of the headings provided to you. Sometimes concepts may need to reference the entire text or multiple headings, leave the reference empty for such cases.
 List the concepts in the following JSON format:
@@ -83,9 +83,10 @@ After classifying the content and identifying key concepts, proceed to extract a
 1. **Definition of a Fact**: Ensure each fact is a standalone piece of information that is concrete and can be independently verified.
 2. **Selection Criteria**: Choose facts based on their significance to the content's main themes or concepts, their educational value, or their foundational role in the subject.
 3. **Character Limit**: Maintain a limit of 60 characters for the  to ensure each message is concise yet informative.
-4. **Reference**: Every fact must include a reference. The reference indicates the part of the text that is most relevant for that particular concept. Whenever possible, pick a main heading to direct the user to the most relevant part of the source material. The reference schema is as follows: source_title#main_heading, where #main_heading is optional. If a fact needs to reference multiple sections or the entire source then simply leave the reference as empty.
+4. **Reference**: Every fact must include a reference. A reference can either be the entire source or a specific heading in the source. Whenever possible, pick a main heading to direct the user to the most relevant part of the source material. The heading must exactly match one of the headings provided to you. Sometimes facts may need to reference the entire text or multiple headings, leave the reference empty for such cases.
 List the facts in the following JSON format:
 json
 "facts":
     [
@@ -95,6 +96,7 @@ json
         },
         {...}
     ]
 After analyzing the content, classifying its field, and identifying key concepts, and facts, assess whether the discovered elements warrant the creation of testing (quiz) materials.
 Consider if these elements provide significant educational value to an average learner by enhancing understanding, offering practical applications, or supporting crucial educational goals. If you decide that the source does not hold educational value that is worthy of generating testing material or quizzes for then please provide a reason in less than 90 characters.
@@ -128,7 +130,6 @@ json
     "summary_cards": ["summary_card1_content", "summary_card2_content", "summary_card3_content", "..."]
 }
 `;
 function returnTypologyPrompt() {
     return typologyPromptString;

package/dist/parse/parse_source_content.js CHANGED Viewed

@@ -3,14 +3,29 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.ParseSourceContent = void 0;
 class ParseSourceContent {
     constructor(sourceContent) {
+        /// Format of Content
+        // content: {
+        //     title: source.title,
+        //     headings: source.headings,
+        //     content: source.content,
+        //     fields: fields,
+        //     taxonomy: source.source_taxonomy,
+        //     type: source.source_type
+        //   },
         this.titles_to_remove = ['See also', 'References', 'Further reading', 'External links', 'Notes and references', 'Bibliography', 'Notes', 'Cited sources'];
         this.block_types_toremove = ['table', 'empty_line'];
         this.content = sourceContent;
     }
     parseData() {
-        // if(this.content.type == 'source') {
-        let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(this.content.content);
-        let afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
+        let sourceType = this.content.type;
+        let afterSanitized;
+        if (sourceType == "video") {
+            afterSanitized = this.parseVideoContent(this.content.content);
+        }
+        else {
+            let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(this.content.content);
+            afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
+        }
         return {
             type: this.content.type,
             title: this.content.title,
@@ -36,7 +51,7 @@ class ParseSourceContent {
         }
         return dataAfterRemoving;
     }
-    sanitizeWikiContent(content) {
+    sanitizeTextContent(content) {
         // Remove newline characters
         content = content.replace(/\\n/g, ' ');
         // Remove internal link references, keeping only the link text
@@ -58,7 +73,7 @@ class ParseSourceContent {
             for (let key in block) {
                 let value = block[key];
                 if (typeof value === 'string') {
-                    sanitizedBlock[key] = this.sanitizeWikiContent(value);
+                    sanitizedBlock[key] = this.sanitizeTextContent(value);
                 }
                 else if (Array.isArray(value)) {
                     sanitizedBlock[key] = this.sanitizeBlocks(value);
@@ -71,5 +86,90 @@ class ParseSourceContent {
         });
         return sanitizedBlocks;
     }
+    parseVideoContent(data) {
+        let timeCodes = [];
+        data.map((e) => timeCodes.push(...e.children));
+        let cleanedData = this.cleanTranscript(timeCodes);
+        let collapsedData = this.collapseTimeCodes(cleanedData, 100);
+        return collapsedData;
+    }
+    // remove content inside [] which denotes non-speech sounds
+    isNonSpeech(content) {
+        // Check if the content is non-speech (enclosed in square brackets).
+        return /^\[.*\]$/.test(content.trim());
+    }
+    // remove non-essential content
+    cleanTranscript(data) {
+        // Clean the transcript by removing non-speech content, normalizing whitespace, and keeping only necessary fields.
+        const cleanedData = [];
+        data.forEach(entry => {
+            let content = (entry.content || '').trim();
+            // Skip non-speech content
+            if (this.isNonSpeech(content))
+                return;
+            // Normalize whitespace in content
+            content = content.replace(/\s+/g, ' ');
+            // Only keep start_time, end_time, content
+            const currentEntry = {
+                start_time: entry.startTime,
+                end_time: entry.endTime,
+                content: content
+            };
+            cleanedData.push(currentEntry);
+        });
+        return cleanedData;
+    }
+    // collapse the timecode to 30 seconds
+    collapseTimeCodes(data, maxDuration = 30.0) {
+        // Collapse time codes into buckets of approximately maxDuration seconds.
+        const collapsedData = [];
+        let bucketStartTime = null;
+        let bucketEndTime = null;
+        let bucketContent = [];
+        let bucketDuration = 0.0;
+        data.forEach(entry => {
+            const startTime = entry.start_time;
+            const endTime = entry.end_time;
+            const content = entry.content;
+            const entryDuration = endTime - startTime;
+            if (bucketStartTime === null) {
+                // Start a new bucket
+                bucketStartTime = startTime;
+                bucketEndTime = endTime;
+                bucketContent.push(content);
+                bucketDuration = entryDuration;
+            }
+            else if ((bucketDuration + entryDuration) <= maxDuration) {
+                // Add to current bucket
+                bucketEndTime = endTime;
+                bucketContent.push(content);
+                bucketDuration += entryDuration;
+            }
+            else {
+                // Close current bucket and start a new one
+                const collapsedEntry = {
+                    start_time: bucketStartTime,
+                    end_time: bucketEndTime,
+                    content: bucketContent.join(' ')
+                };
+                collapsedData.push(collapsedEntry);
+                // Start new bucket with current entry
+                bucketStartTime = startTime;
+                bucketEndTime = endTime;
+                bucketContent = [content];
+                bucketDuration = entryDuration;
+            }
+        });
+        // Add the last bucket if it exists
+        if (bucketContent.length > 0) {
+            const collapsedEntry = {
+                start_time: bucketStartTime,
+                end_time: bucketEndTime,
+                content: bucketContent.join(' ')
+            };
+            collapsedData.push(collapsedEntry);
+        }
+        return collapsedData;
+    }
 }
 exports.ParseSourceContent = ParseSourceContent;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "only_ever_generator",
-  "version": "0.4.6",
+  "version": "0.4.8",
   "main": "dist/index.js",
   "scripts": {
     "start": "npm run build && nodemon dist/index.js",

package/src/bootstrap/app.ts CHANGED Viewed

@@ -78,10 +78,10 @@ export class OnlyEverGenerator {
           responseToReturn.push(this.cardgenResponse);
           /// check if gap fill is required ie coverage determination
-          // if(this.cardgenResponse.status_code == 200) {
-          //   this.gapFillResponse = await this._generationForGapFill(this.typologyResponse, this.cardgenResponse);
-          //   responseToReturn.push(this.gapFillResponse);
-          // }
+          if(this.cardgenResponse.status_code == 200) {
+            this.gapFillResponse = await this._generationForGapFill(this.typologyResponse, this.cardgenResponse);
+            responseToReturn.push(this.gapFillResponse);
+          }
         }
     }
@@ -106,12 +106,14 @@ export class OnlyEverGenerator {
         gapFill.remainingConcepts.length !== 0 ||
         gapFill.remainingFacts.length !== 0
       ) {
+        this.typologyResponse.facts = gapFill.remainingFacts;
+        this.typologyResponse.concepts = gapFill.remainingConcepts;
         response = await this.generateCard(
          this.promptForCardGen +
             "Generate cards only suitable for the given remaining concepts and facts" +
-            JSON.stringify(gapFill) +
-            "Exclude generating  cards with content in the following",
-            JSON.stringify(cardGenData.cards_data),
+            JSON.stringify(gapFill) ,
+            "",
           true
         );
       }

package/src/constants/prompts/card_gen_prompt.ts CHANGED Viewed

@@ -1,4 +1,5 @@
-const promptString: string = `As a dedicated assistant at a learning company, your role is to analyze educational content and create test cards that help learners understand and remember key concepts and facts. You will be provided with:
+const promptString: string = `
+As a dedicated assistant at a learning company, your role is to analyze educational content and create test cards that help learners understand and remember key concepts and facts. You will be provided with:
 1. Title of the source
 2. Main headings
@@ -12,7 +13,7 @@ const promptString: string = `As a dedicated assistant at a learning company, yo
 2. Generate test cards for concepts: Take each concept and re-read the text under the reference heading for that concept. Start by trying to create a card that is at the highest bloom level possible (5 being the highest). Then work your way down to the lower bloom levels. Generate as many cards as possible for each concept. Keep going through the list of concepts till you have completed all of them.
 3. Generate test cards for facts: Take each fact and re-read the text under the reference heading for that fact. Generate as many cards as possible to test that concept. Keep going through the list of concepts till you have completed all of them.
-**Note:** Further detailed instructions on how to create the content for each test card type will be provided subsequently.
+**Note:** Further detailed instructions on how to create the content, references and bloom level for each test card type will be provided subsequently.
 **Format your response in the following JSON format:**
@@ -22,18 +23,12 @@ json
         {
             "type": "mcq" | "cloze" | "match",
             "card_content": "{content}",
-            "concepts": [
-                "concept1",
-                "concept2",
-                "..."
-            ],
-            "facts": [
-                "fact1",
-                "fact2",
-                "..."
-            ],
+            "concepts": [{concept1}, {concept2}, "..."],
+            "facts": [{fact1}, {fact2}, {...}],
             "bloom_level": 1 | 2 | 3 | 4 | 5
-        }
+        },
+        {... as many as possible}
     ]
 }
@@ -41,7 +36,7 @@ json
 **Criteria:**
 * Each test card must include at least one concept or fact.
-* Each concept and fact must have at least one test card.
+* Each concept and fact MUST HAVE at least one test card associated with it.
 * The final output should include test cards that cover the first 5 levels of Bloom's Taxonomy.
 **Further Instructions:**
@@ -83,23 +78,23 @@ json
     [
         {
             "concept_text": "concept1_content",
-            "reference": "source_title#main_heading"
+            "reference": "main_heading"
         },
         {
             "concept_text": "concept2_content",
-            "reference": "source_title#main_heading"
+            "reference": "main_heading"
         },
         {...}
     ],
     "facts":
     [
             {
-                "factt_text": "fact1_content",
-                "reference": "source_title#main_heading"
+                "fact_text": "fact1_content",
+                "reference": "main_heading"
             },
             {
                 "fact_text": "fact2_content",
-                "reference": "source_title#main_heading"
+                "reference": "main_heading"
             },
             {...}
     ],
@@ -132,92 +127,91 @@ json
     [
         {
             "concept_text": "concept1_content",
-            "reference": "source_title#main_heading"
+            "reference": "main_heading"
         },
         {
             "concept_text": "concept2_content",
-            "reference": "source_title#main_heading"
+            "reference": "main_heading"
         },
         {...}
     ],
     "facts":
     [
             {
-                "factt_text": "fact1_content",
-                "reference": "source_title#main_heading"
+                "fact_text": "fact1_content",
+                "reference": "main_heading"
             },
             {
                 "fact_text": "fact2_content",
-                "reference": "source_title#main_heading"
+                "reference": "main_heading"
             },
             {...}
     ],
     "bloom_level": <1-5>
 }
-•	Minimum choices required: 2
-•	Maximum choices allowed: 8
-•	Minimum correct choices required: 1
-•	Maximum character length for the prompt: 320
-•	Maximum character length for an individual cloze: 90
+* Minimum choices required: 2
+* Maximum choices allowed: 8
+* Minimum correct choices required: 1
+* Maximum character length for the prompt: 320
+* Maximum character length for an individual cloze: 90
 4.	Match: Pairing items.
 json
 {
     "type": "match",
-    "card_content": [
-       {
-        "left_item": "left choice",
-        "right_item": [right item]
-       },
-       {
-        "left_item":" left choice",
-        "right_item": [right item]
-       },
-       {
-        "left_item": "left choice",
-        "right_item": [right item]
-       },
-        "... up to 8 total pairs"
-    ],
+    "card_content" : [
+        {
+            "left_item" : "left_item text",
+            "right_item" : ["right_item text" ]
+        },
+        {
+            "left_item" : "left_item text",
+            "right_item" : ["right_item text"]
+        },
+        {"... up to 8 total pairs"}
+    ],
     "concepts":
     [
         {
             "concept_text": "concept1_content",
-            "reference": "source_title#main_heading"
+            "reference": "main_heading"
         },
         {
             "concept_text": "concept2_content",
-            "reference": "source_title#main_heading"
+            "reference": "main_heading"
         },
         {...}
     ],
     "facts":
     [
             {
-                "factt_text": "fact1_content",
-                "reference": "source_title#main_heading"
+                "fact_text": "fact1_content",
+                "reference": "main_heading"
             },
             {
                 "fact_text": "fact2_content",
-                "reference": "source_title#main_heading"
+                "reference": "main_heading"
             },
             {...}
     ],
     "bloom_level": <1-5>
 }
-•	Maximum character length for each item in a pair: 42
+* Maximum character length for each item in a pair: 42
+* Duplicate items are allowed on the left side but not on the right side. Or in other words the same item on the left can be paired with multiple items on the right.
-** Criteria **
-1. Ensure that you produce at least one if not more card for each concept and fact.
-2. For each concept and fact start by trying to create a card at the highest bloom level possible.
-3. Do not skip any concepts or facts, and be thorough in your coverage.
-4. Cards should span across different levels of Bloom’s Taxonomy, from level 1 (Remembering) to level 5 (Evaluating), but exclude level 6 (Creating)
+** Overall Criteria for Testing Cards **
+1. Each card should present the learner with a unique challenge that improves their learning.
+2. Ensure that you produce at least one if not more card for each concept and fact.
+3. For each concept and fact start by trying to create a card at the highest bloom level possible.
+4. Do not skip any concepts or facts, and be thorough in your coverage.
+5. Cards should span across different levels of Bloom’s Taxonomy, from level 1 (Remembering) to level 5 (Evaluating), but exclude level 6 (Creating).
 Once you are done generating the test cards. Go back and evaulate the full list of concepts and facts provided as the input.
 Are there any concept or fact that don't have a test card yet? If yes, go back and create one.

package/src/constants/prompts/typology_prompt.ts CHANGED Viewed

@@ -61,7 +61,7 @@ Extract key concepts within the content after classifying the field. This is a c
 2. **Inclusion Criteria**: Include a concept only if it is discussed in detail, meaning it is explained thoroughly, tied to specific examples, or highlighted as a critical part of the subject matter.
 3. **How to describe a concept**: The concept should be described so that a reader can comprehend the gist of it.
 4. **Character Limit**: Maintain a limit of 60 characters for the  to ensure each concept is concise yet informative.
-5. **Reference**: Every concept must include a reference. A reference can either be the entire source or a specific heading in the source. The reference indicates the part of the text that is most relevant for that particular concept. Whenever possible, pick a main heading to direct the user to the most relevant part of the source material. The reference schema is as follows: source_title#main_heading, where #main_heading is optional. If a concept needs to reference multiple sections or the entire source then simply leave the reference as empty.
+5. **Reference**: Every concept must include a reference. A reference can either be the entire source or a specific heading in the source. Whenever possible, pick a main heading to direct the user to the most relevant part of the source material. The heading must exactly match one of the headings provided to you. Sometimes concepts may need to reference the entire text or multiple headings, leave the reference empty for such cases.
 List the concepts in the following JSON format:
@@ -80,9 +80,10 @@ After classifying the content and identifying key concepts, proceed to extract a
 1. **Definition of a Fact**: Ensure each fact is a standalone piece of information that is concrete and can be independently verified.
 2. **Selection Criteria**: Choose facts based on their significance to the content's main themes or concepts, their educational value, or their foundational role in the subject.
 3. **Character Limit**: Maintain a limit of 60 characters for the  to ensure each message is concise yet informative.
-4. **Reference**: Every fact must include a reference. The reference indicates the part of the text that is most relevant for that particular concept. Whenever possible, pick a main heading to direct the user to the most relevant part of the source material. The reference schema is as follows: source_title#main_heading, where #main_heading is optional. If a fact needs to reference multiple sections or the entire source then simply leave the reference as empty.
+4. **Reference**: Every fact must include a reference. A reference can either be the entire source or a specific heading in the source. Whenever possible, pick a main heading to direct the user to the most relevant part of the source material. The heading must exactly match one of the headings provided to you. Sometimes facts may need to reference the entire text or multiple headings, leave the reference empty for such cases.
 List the facts in the following JSON format:
 json
 "facts":
     [
@@ -92,6 +93,7 @@ json
         },
         {...}
     ]
 After analyzing the content, classifying its field, and identifying key concepts, and facts, assess whether the discovered elements warrant the creation of testing (quiz) materials.
 Consider if these elements provide significant educational value to an average learner by enhancing understanding, offering practical applications, or supporting crucial educational goals. If you decide that the source does not hold educational value that is worthy of generating testing material or quizzes for then please provide a reason in less than 90 characters.
@@ -125,7 +127,6 @@ json
     "summary_cards": ["summary_card1_content", "summary_card2_content", "summary_card3_content", "..."]
 }
 `;

package/src/parse/parse_source_content.ts CHANGED Viewed

@@ -1,5 +1,15 @@
 export class ParseSourceContent{
     public content: any;
+    /// Format of Content
+    // content: {
+    //     title: source.title,
+    //     headings: source.headings,
+    //     content: source.content,
+    //     fields: fields,
+    //     taxonomy: source.source_taxonomy,
+    //     type: source.source_type
+    //   },
     titles_to_remove = ['See also', 'References', 'Further reading', 'External links', 'Notes and references', 'Bibliography', 'Notes', 'Cited sources'];
     block_types_toremove = ['table','empty_line'];
@@ -8,9 +18,14 @@ export class ParseSourceContent{
     }
     parseData() {
-        // if(this.content.type == 'source') {
+            let sourceType = this.content.type;
+            let afterSanitized;
+            if(sourceType == "video"){
+                afterSanitized = this.parseVideoContent(this.content.content);
+            }else{
             let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(this.content.content);
-            let afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
+            afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
+        }
             return {
                 type: this.content.type,
                 title: this.content.title,
@@ -40,7 +55,7 @@ export class ParseSourceContent{
         return dataAfterRemoving;
     }
-    sanitizeWikiContent(content: String) {
+    sanitizeTextContent(content: String) {
         // Remove newline characters
         content = content.replace(/\\n/g, ' ');
@@ -68,7 +83,7 @@ export class ParseSourceContent{
             for (let key in block) {
                 let value = block[key];
                 if (typeof value === 'string') {
-                    sanitizedBlock[key] = this.sanitizeWikiContent(value);
+                    sanitizedBlock[key] = this.sanitizeTextContent(value);
                 } else if (Array.isArray(value)) {
                     sanitizedBlock[key] = this.sanitizeBlocks(value);
                 } else {
@@ -79,6 +94,105 @@ export class ParseSourceContent{
         });
         return sanitizedBlocks;
     }
+    parseVideoContent(data: Array<any>){
+        let timeCodes :Array<any> = [];
+        data.map((e) => timeCodes.push(...e.children));
+        let cleanedData = this.cleanTranscript(timeCodes);
+        let collapsedData = this.collapseTimeCodes(cleanedData,100);
+        return collapsedData;
+    }
+    // remove content inside [] which denotes non-speech sounds
+ isNonSpeech(content: string) {
+    // Check if the content is non-speech (enclosed in square brackets).
+    return /^\[.*\]$/.test(content.trim());
+}
+// remove non-essential content
+ cleanTranscript(data: Array<any>) {
+    // Clean the transcript by removing non-speech content, normalizing whitespace, and keeping only necessary fields.
+    const cleanedData = <any>[];
+    data.forEach(entry => {
+        let content = (entry.content || '').trim();
+        // Skip non-speech content
+        if (this.isNonSpeech(content)) return;
+        // Normalize whitespace in content
+        content = content.replace(/\s+/g, ' ');
+        // Only keep start_time, end_time, content
+        const currentEntry = {
+            start_time: entry.startTime,
+            end_time: entry.endTime,
+            content: content
+        };
+        cleanedData.push(currentEntry);
+    });
+    return cleanedData;
+}
+// collapse the timecode to 30 seconds
+  collapseTimeCodes(data: Array<any>, maxDuration = 30.0) {
+    // Collapse time codes into buckets of approximately maxDuration seconds.
+    const collapsedData = [];
+    let  bucketStartTime: number | null = null;
+    let bucketEndTime : number | null = null;
+    let bucketContent : Array<any> = [];
+    let bucketDuration = 0.0;
+    data.forEach(entry => {
+        const startTime = entry.start_time;
+        const endTime = entry.end_time;
+        const content = entry.content;
+        const entryDuration = endTime - startTime;
+        if (bucketStartTime === null) {
+            // Start a new bucket
+            bucketStartTime = startTime;
+            bucketEndTime = endTime;
+            bucketContent.push(content);
+            bucketDuration = entryDuration;
+        } else if ((bucketDuration + entryDuration) <= maxDuration) {
+            // Add to current bucket
+            bucketEndTime = endTime;
+            bucketContent.push(content);
+            bucketDuration += entryDuration;
+        } else {
+            // Close current bucket and start a new one
+            const collapsedEntry = {
+                start_time: bucketStartTime,
+                end_time: bucketEndTime,
+                content: bucketContent.join(' ')
+            };
+            collapsedData.push(collapsedEntry);
+            // Start new bucket with current entry
+            bucketStartTime = startTime;
+            bucketEndTime = endTime;
+            bucketContent = [content];
+            bucketDuration = entryDuration;
+        }
+    });
+    // Add the last bucket if it exists
+    if (bucketContent.length > 0) {
+        const collapsedEntry = {
+            start_time: bucketStartTime,
+            end_time: bucketEndTime,
+            content: bucketContent.join(' ')
+        };
+        collapsedData.push(collapsedEntry);
+    }
+    return collapsedData;
+}
 }