npm - only_ever_generator - Versions diffs - 0.4.7 → 0.4.8 - Mend

only_ever_generator 0.4.7 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/bootstrap/app.js +2 -0
package/dist/parse/parse_source_content.js +105 -5
package/package.json +1 -1
package/src/bootstrap/app.ts +2 -0
package/src/parse/parse_source_content.ts +118 -4

package/dist/bootstrap/app.js CHANGED Viewed

@@ -85,6 +85,8 @@ class OnlyEverGenerator {
             let response;
             if (gapFill.remainingConcepts.length !== 0 ||
                 gapFill.remainingFacts.length !== 0) {
+                this.typologyResponse.facts = gapFill.remainingFacts;
+                this.typologyResponse.concepts = gapFill.remainingConcepts;
                 response = yield this.generateCard(this.promptForCardGen +
                     "Generate cards only suitable for the given remaining concepts and facts" +
                     JSON.stringify(gapFill), "", true);

package/dist/parse/parse_source_content.js CHANGED Viewed

@@ -3,14 +3,29 @@ Object.defineProperty(exports, "__esModule", { value: true });
 exports.ParseSourceContent = void 0;
 class ParseSourceContent {
     constructor(sourceContent) {
+        /// Format of Content
+        // content: {
+        //     title: source.title,
+        //     headings: source.headings,
+        //     content: source.content,
+        //     fields: fields,
+        //     taxonomy: source.source_taxonomy,
+        //     type: source.source_type
+        //   },
         this.titles_to_remove = ['See also', 'References', 'Further reading', 'External links', 'Notes and references', 'Bibliography', 'Notes', 'Cited sources'];
         this.block_types_toremove = ['table', 'empty_line'];
         this.content = sourceContent;
     }
     parseData() {
-        // if(this.content.type == 'source') {
-        let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(this.content.content);
-        let afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
+        let sourceType = this.content.type;
+        let afterSanitized;
+        if (sourceType == "video") {
+            afterSanitized = this.parseVideoContent(this.content.content);
+        }
+        else {
+            let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(this.content.content);
+            afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
+        }
         return {
             type: this.content.type,
             title: this.content.title,
@@ -36,7 +51,7 @@ class ParseSourceContent {
         }
         return dataAfterRemoving;
     }
-    sanitizeWikiContent(content) {
+    sanitizeTextContent(content) {
         // Remove newline characters
         content = content.replace(/\\n/g, ' ');
         // Remove internal link references, keeping only the link text
@@ -58,7 +73,7 @@ class ParseSourceContent {
             for (let key in block) {
                 let value = block[key];
                 if (typeof value === 'string') {
-                    sanitizedBlock[key] = this.sanitizeWikiContent(value);
+                    sanitizedBlock[key] = this.sanitizeTextContent(value);
                 }
                 else if (Array.isArray(value)) {
                     sanitizedBlock[key] = this.sanitizeBlocks(value);
@@ -71,5 +86,90 @@ class ParseSourceContent {
         });
         return sanitizedBlocks;
     }
+    parseVideoContent(data) {
+        let timeCodes = [];
+        data.map((e) => timeCodes.push(...e.children));
+        let cleanedData = this.cleanTranscript(timeCodes);
+        let collapsedData = this.collapseTimeCodes(cleanedData, 100);
+        return collapsedData;
+    }
+    // remove content inside [] which denotes non-speech sounds
+    isNonSpeech(content) {
+        // Check if the content is non-speech (enclosed in square brackets).
+        return /^\[.*\]$/.test(content.trim());
+    }
+    // remove non-essential content
+    cleanTranscript(data) {
+        // Clean the transcript by removing non-speech content, normalizing whitespace, and keeping only necessary fields.
+        const cleanedData = [];
+        data.forEach(entry => {
+            let content = (entry.content || '').trim();
+            // Skip non-speech content
+            if (this.isNonSpeech(content))
+                return;
+            // Normalize whitespace in content
+            content = content.replace(/\s+/g, ' ');
+            // Only keep start_time, end_time, content
+            const currentEntry = {
+                start_time: entry.startTime,
+                end_time: entry.endTime,
+                content: content
+            };
+            cleanedData.push(currentEntry);
+        });
+        return cleanedData;
+    }
+    // collapse the timecode to 30 seconds
+    collapseTimeCodes(data, maxDuration = 30.0) {
+        // Collapse time codes into buckets of approximately maxDuration seconds.
+        const collapsedData = [];
+        let bucketStartTime = null;
+        let bucketEndTime = null;
+        let bucketContent = [];
+        let bucketDuration = 0.0;
+        data.forEach(entry => {
+            const startTime = entry.start_time;
+            const endTime = entry.end_time;
+            const content = entry.content;
+            const entryDuration = endTime - startTime;
+            if (bucketStartTime === null) {
+                // Start a new bucket
+                bucketStartTime = startTime;
+                bucketEndTime = endTime;
+                bucketContent.push(content);
+                bucketDuration = entryDuration;
+            }
+            else if ((bucketDuration + entryDuration) <= maxDuration) {
+                // Add to current bucket
+                bucketEndTime = endTime;
+                bucketContent.push(content);
+                bucketDuration += entryDuration;
+            }
+            else {
+                // Close current bucket and start a new one
+                const collapsedEntry = {
+                    start_time: bucketStartTime,
+                    end_time: bucketEndTime,
+                    content: bucketContent.join(' ')
+                };
+                collapsedData.push(collapsedEntry);
+                // Start new bucket with current entry
+                bucketStartTime = startTime;
+                bucketEndTime = endTime;
+                bucketContent = [content];
+                bucketDuration = entryDuration;
+            }
+        });
+        // Add the last bucket if it exists
+        if (bucketContent.length > 0) {
+            const collapsedEntry = {
+                start_time: bucketStartTime,
+                end_time: bucketEndTime,
+                content: bucketContent.join(' ')
+            };
+            collapsedData.push(collapsedEntry);
+        }
+        return collapsedData;
+    }
 }
 exports.ParseSourceContent = ParseSourceContent;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "only_ever_generator",
-  "version": "0.4.7",
+  "version": "0.4.8",
   "main": "dist/index.js",
   "scripts": {
     "start": "npm run build && nodemon dist/index.js",

package/src/bootstrap/app.ts CHANGED Viewed

@@ -106,6 +106,8 @@ export class OnlyEverGenerator {
         gapFill.remainingConcepts.length !== 0 ||
         gapFill.remainingFacts.length !== 0
       ) {
+        this.typologyResponse.facts = gapFill.remainingFacts;
+        this.typologyResponse.concepts = gapFill.remainingConcepts;
         response = await this.generateCard(
          this.promptForCardGen +
             "Generate cards only suitable for the given remaining concepts and facts" +

package/src/parse/parse_source_content.ts CHANGED Viewed

@@ -1,5 +1,15 @@
 export class ParseSourceContent{
     public content: any;
+    /// Format of Content
+    // content: {
+    //     title: source.title,
+    //     headings: source.headings,
+    //     content: source.content,
+    //     fields: fields,
+    //     taxonomy: source.source_taxonomy,
+    //     type: source.source_type
+    //   },
     titles_to_remove = ['See also', 'References', 'Further reading', 'External links', 'Notes and references', 'Bibliography', 'Notes', 'Cited sources'];
     block_types_toremove = ['table','empty_line'];
@@ -8,9 +18,14 @@ export class ParseSourceContent{
     }
     parseData() {
-        // if(this.content.type == 'source') {
+            let sourceType = this.content.type;
+            let afterSanitized;
+            if(sourceType == "video"){
+                afterSanitized = this.parseVideoContent(this.content.content);
+            }else{
             let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(this.content.content);
-            let afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
+            afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
+        }
             return {
                 type: this.content.type,
                 title: this.content.title,
@@ -40,7 +55,7 @@ export class ParseSourceContent{
         return dataAfterRemoving;
     }
-    sanitizeWikiContent(content: String) {
+    sanitizeTextContent(content: String) {
         // Remove newline characters
         content = content.replace(/\\n/g, ' ');
@@ -68,7 +83,7 @@ export class ParseSourceContent{
             for (let key in block) {
                 let value = block[key];
                 if (typeof value === 'string') {
-                    sanitizedBlock[key] = this.sanitizeWikiContent(value);
+                    sanitizedBlock[key] = this.sanitizeTextContent(value);
                 } else if (Array.isArray(value)) {
                     sanitizedBlock[key] = this.sanitizeBlocks(value);
                 } else {
@@ -79,6 +94,105 @@ export class ParseSourceContent{
         });
         return sanitizedBlocks;
     }
+    parseVideoContent(data: Array<any>){
+        let timeCodes :Array<any> = [];
+        data.map((e) => timeCodes.push(...e.children));
+        let cleanedData = this.cleanTranscript(timeCodes);
+        let collapsedData = this.collapseTimeCodes(cleanedData,100);
+        return collapsedData;
+    }
+    // remove content inside [] which denotes non-speech sounds
+ isNonSpeech(content: string) {
+    // Check if the content is non-speech (enclosed in square brackets).
+    return /^\[.*\]$/.test(content.trim());
+}
+// remove non-essential content
+ cleanTranscript(data: Array<any>) {
+    // Clean the transcript by removing non-speech content, normalizing whitespace, and keeping only necessary fields.
+    const cleanedData = <any>[];
+    data.forEach(entry => {
+        let content = (entry.content || '').trim();
+        // Skip non-speech content
+        if (this.isNonSpeech(content)) return;
+        // Normalize whitespace in content
+        content = content.replace(/\s+/g, ' ');
+        // Only keep start_time, end_time, content
+        const currentEntry = {
+            start_time: entry.startTime,
+            end_time: entry.endTime,
+            content: content
+        };
+        cleanedData.push(currentEntry);
+    });
+    return cleanedData;
+}
+// collapse the timecode to 30 seconds
+  collapseTimeCodes(data: Array<any>, maxDuration = 30.0) {
+    // Collapse time codes into buckets of approximately maxDuration seconds.
+    const collapsedData = [];
+    let  bucketStartTime: number | null = null;
+    let bucketEndTime : number | null = null;
+    let bucketContent : Array<any> = [];
+    let bucketDuration = 0.0;
+    data.forEach(entry => {
+        const startTime = entry.start_time;
+        const endTime = entry.end_time;
+        const content = entry.content;
+        const entryDuration = endTime - startTime;
+        if (bucketStartTime === null) {
+            // Start a new bucket
+            bucketStartTime = startTime;
+            bucketEndTime = endTime;
+            bucketContent.push(content);
+            bucketDuration = entryDuration;
+        } else if ((bucketDuration + entryDuration) <= maxDuration) {
+            // Add to current bucket
+            bucketEndTime = endTime;
+            bucketContent.push(content);
+            bucketDuration += entryDuration;
+        } else {
+            // Close current bucket and start a new one
+            const collapsedEntry = {
+                start_time: bucketStartTime,
+                end_time: bucketEndTime,
+                content: bucketContent.join(' ')
+            };
+            collapsedData.push(collapsedEntry);
+            // Start new bucket with current entry
+            bucketStartTime = startTime;
+            bucketEndTime = endTime;
+            bucketContent = [content];
+            bucketDuration = entryDuration;
+        }
+    });
+    // Add the last bucket if it exists
+    if (bucketContent.length > 0) {
+        const collapsedEntry = {
+            start_time: bucketStartTime,
+            end_time: bucketEndTime,
+            content: bucketContent.join(' ')
+        };
+        collapsedData.push(collapsedEntry);
+    }
+    return collapsedData;
+}
 }