only_ever_generator 0.4.7 → 0.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,6 +85,8 @@ class OnlyEverGenerator {
85
85
  let response;
86
86
  if (gapFill.remainingConcepts.length !== 0 ||
87
87
  gapFill.remainingFacts.length !== 0) {
88
+ this.typologyResponse.facts = gapFill.remainingFacts;
89
+ this.typologyResponse.concepts = gapFill.remainingConcepts;
88
90
  response = yield this.generateCard(this.promptForCardGen +
89
91
  "Generate cards only suitable for the given remaining concepts and facts" +
90
92
  JSON.stringify(gapFill), "", true);
@@ -3,14 +3,29 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.ParseSourceContent = void 0;
4
4
  class ParseSourceContent {
5
5
  constructor(sourceContent) {
6
+ /// Format of Content
7
+ // content: {
8
+ // title: source.title,
9
+ // headings: source.headings,
10
+ // content: source.content,
11
+ // fields: fields,
12
+ // taxonomy: source.source_taxonomy,
13
+ // type: source.source_type
14
+ // },
6
15
  this.titles_to_remove = ['See also', 'References', 'Further reading', 'External links', 'Notes and references', 'Bibliography', 'Notes', 'Cited sources'];
7
16
  this.block_types_toremove = ['table', 'empty_line'];
8
17
  this.content = sourceContent;
9
18
  }
10
19
  parseData() {
11
- // if(this.content.type == 'source') {
12
- let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(this.content.content);
13
- let afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
20
+ let sourceType = this.content.type;
21
+ let afterSanitized;
22
+ if (sourceType == "video") {
23
+ afterSanitized = this.parseVideoContent(this.content.content);
24
+ }
25
+ else {
26
+ let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(this.content.content);
27
+ afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
28
+ }
14
29
  return {
15
30
  type: this.content.type,
16
31
  title: this.content.title,
@@ -36,7 +51,7 @@ class ParseSourceContent {
36
51
  }
37
52
  return dataAfterRemoving;
38
53
  }
39
- sanitizeWikiContent(content) {
54
+ sanitizeTextContent(content) {
40
55
  // Remove newline characters
41
56
  content = content.replace(/\\n/g, ' ');
42
57
  // Remove internal link references, keeping only the link text
@@ -58,7 +73,7 @@ class ParseSourceContent {
58
73
  for (let key in block) {
59
74
  let value = block[key];
60
75
  if (typeof value === 'string') {
61
- sanitizedBlock[key] = this.sanitizeWikiContent(value);
76
+ sanitizedBlock[key] = this.sanitizeTextContent(value);
62
77
  }
63
78
  else if (Array.isArray(value)) {
64
79
  sanitizedBlock[key] = this.sanitizeBlocks(value);
@@ -71,5 +86,90 @@ class ParseSourceContent {
71
86
  });
72
87
  return sanitizedBlocks;
73
88
  }
89
+ parseVideoContent(data) {
90
+ let finalChapters = [];
91
+ // let cleanedData = this.cleanTranscript(timeCodes);
92
+ data.forEach((e) => {
93
+ let combinedContent = this.cleanTranscript(e);
94
+ finalChapters.push({
95
+ "startTime": e.startTime,
96
+ "endTime": e.endTime,
97
+ "content": combinedContent,
98
+ "title": e.content
99
+ });
100
+ });
101
+ return finalChapters;
102
+ }
103
+ // remove content inside [] which denotes non-speech sounds
104
+ isNonSpeech(content) {
105
+ // Check if the content is non-speech (enclosed in square brackets).
106
+ return /^\[.*\]$/.test(content.trim());
107
+ }
108
+ // remove non-essential content
109
+ cleanTranscript(data) {
110
+ var _a;
111
+ let finalContent = '';
112
+ let children = (_a = data.children) !== null && _a !== void 0 ? _a : [];
113
+ children.forEach((e) => {
114
+ let content = (e.content || "").trim();
115
+ if (this.isNonSpeech(content))
116
+ return;
117
+ content = content.replace(/\s+/g, ' ');
118
+ finalContent += content;
119
+ });
120
+ return finalContent;
121
+ }
122
+ // collapse the timecode to 30 seconds
123
+ collapseTimeCodes(data, maxDuration = 30.0) {
124
+ // Collapse time codes into buckets of approximately maxDuration seconds.
125
+ const collapsedData = [];
126
+ let bucketStartTime = null;
127
+ let bucketEndTime = null;
128
+ let bucketContent = [];
129
+ let bucketDuration = 0.0;
130
+ data.forEach(entry => {
131
+ const startTime = entry.start_time;
132
+ const endTime = entry.end_time;
133
+ const content = entry.content;
134
+ const entryDuration = endTime - startTime;
135
+ if (bucketStartTime === null) {
136
+ // Start a new bucket
137
+ bucketStartTime = startTime;
138
+ bucketEndTime = endTime;
139
+ bucketContent.push(content);
140
+ bucketDuration = entryDuration;
141
+ }
142
+ else if ((bucketDuration + entryDuration) <= maxDuration) {
143
+ // Add to current bucket
144
+ bucketEndTime = endTime;
145
+ bucketContent.push(content);
146
+ bucketDuration += entryDuration;
147
+ }
148
+ else {
149
+ // Close current bucket and start a new one
150
+ const collapsedEntry = {
151
+ start_time: bucketStartTime,
152
+ end_time: bucketEndTime,
153
+ content: bucketContent.join(' ')
154
+ };
155
+ collapsedData.push(collapsedEntry);
156
+ // Start new bucket with current entry
157
+ bucketStartTime = startTime;
158
+ bucketEndTime = endTime;
159
+ bucketContent = [content];
160
+ bucketDuration = entryDuration;
161
+ }
162
+ });
163
+ // Add the last bucket if it exists
164
+ if (bucketContent.length > 0) {
165
+ const collapsedEntry = {
166
+ start_time: bucketStartTime,
167
+ end_time: bucketEndTime,
168
+ content: bucketContent.join(' ')
169
+ };
170
+ collapsedData.push(collapsedEntry);
171
+ }
172
+ return collapsedData;
173
+ }
74
174
  }
75
175
  exports.ParseSourceContent = ParseSourceContent;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "only_ever_generator",
3
- "version": "0.4.7",
3
+ "version": "0.4.9",
4
4
  "main": "dist/index.js",
5
5
  "scripts": {
6
6
  "start": "npm run build && nodemon dist/index.js",
@@ -106,6 +106,8 @@ export class OnlyEverGenerator {
106
106
  gapFill.remainingConcepts.length !== 0 ||
107
107
  gapFill.remainingFacts.length !== 0
108
108
  ) {
109
+ this.typologyResponse.facts = gapFill.remainingFacts;
110
+ this.typologyResponse.concepts = gapFill.remainingConcepts;
109
111
  response = await this.generateCard(
110
112
  this.promptForCardGen +
111
113
  "Generate cards only suitable for the given remaining concepts and facts" +
@@ -1,5 +1,15 @@
1
1
  export class ParseSourceContent{
2
2
  public content: any;
3
+ /// Format of Content
4
+ // content: {
5
+ // title: source.title,
6
+ // headings: source.headings,
7
+ // content: source.content,
8
+ // fields: fields,
9
+ // taxonomy: source.source_taxonomy,
10
+ // type: source.source_type
11
+ // },
12
+
3
13
 
4
14
  titles_to_remove = ['See also', 'References', 'Further reading', 'External links', 'Notes and references', 'Bibliography', 'Notes', 'Cited sources'];
5
15
  block_types_toremove = ['table','empty_line'];
@@ -8,9 +18,14 @@ export class ParseSourceContent{
8
18
  }
9
19
 
10
20
  parseData() {
11
- // if(this.content.type == 'source') {
21
+ let sourceType = this.content.type;
22
+ let afterSanitized;
23
+ if(sourceType == "video"){
24
+ afterSanitized = this.parseVideoContent(this.content.content);
25
+ }else{
12
26
  let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(this.content.content);
13
- let afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
27
+ afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
28
+ }
14
29
  return {
15
30
  type: this.content.type,
16
31
  title: this.content.title,
@@ -40,7 +55,7 @@ export class ParseSourceContent{
40
55
  return dataAfterRemoving;
41
56
  }
42
57
 
43
- sanitizeWikiContent(content: String) {
58
+ sanitizeTextContent(content: String) {
44
59
  // Remove newline characters
45
60
  content = content.replace(/\\n/g, ' ');
46
61
 
@@ -68,7 +83,7 @@ export class ParseSourceContent{
68
83
  for (let key in block) {
69
84
  let value = block[key];
70
85
  if (typeof value === 'string') {
71
- sanitizedBlock[key] = this.sanitizeWikiContent(value);
86
+ sanitizedBlock[key] = this.sanitizeTextContent(value);
72
87
  } else if (Array.isArray(value)) {
73
88
  sanitizedBlock[key] = this.sanitizeBlocks(value);
74
89
  } else {
@@ -79,6 +94,104 @@ export class ParseSourceContent{
79
94
  });
80
95
  return sanitizedBlocks;
81
96
  }
97
+
98
+ parseVideoContent(data: Array<any>){
99
+ let finalChapters :Array<any> = [];
100
+ // let cleanedData = this.cleanTranscript(timeCodes);
101
+ data.forEach((e)=>{
102
+ let combinedContent = this.cleanTranscript(e);
103
+ finalChapters.push({
104
+ "startTime": e.startTime,
105
+ "endTime": e.endTime,
106
+ "content": combinedContent,
107
+ "title": e.content
108
+ });
109
+ });
110
+
111
+ return finalChapters;
112
+
113
+
114
+ }
115
+
116
+ // remove content inside [] which denotes non-speech sounds
117
+ isNonSpeech(content: string) {
118
+ // Check if the content is non-speech (enclosed in square brackets).
119
+ return /^\[.*\]$/.test(content.trim());
120
+ }
121
+
122
+ // remove non-essential content
123
+ cleanTranscript(data: any) {
124
+ let finalContent = '';
125
+ let children = data.children ?? [];
126
+
127
+ children.forEach((e:any)=>{
128
+ let content = (e.content || "").trim();
129
+
130
+ if(this.isNonSpeech(content)) return;
131
+
132
+ content = content.replace(/\s+/g, ' ');
133
+ finalContent += content;
134
+ });
135
+
136
+ return finalContent;
137
+ }
138
+
139
+ // collapse the timecode to 30 seconds
140
+ collapseTimeCodes(data: Array<any>, maxDuration = 30.0) {
141
+ // Collapse time codes into buckets of approximately maxDuration seconds.
142
+ const collapsedData = [];
143
+ let bucketStartTime: number | null = null;
144
+ let bucketEndTime : number | null = null;
145
+ let bucketContent : Array<any> = [];
146
+ let bucketDuration = 0.0;
147
+
148
+ data.forEach(entry => {
149
+ const startTime = entry.start_time;
150
+ const endTime = entry.end_time;
151
+ const content = entry.content;
152
+ const entryDuration = endTime - startTime;
153
+
154
+ if (bucketStartTime === null) {
155
+ // Start a new bucket
156
+ bucketStartTime = startTime;
157
+ bucketEndTime = endTime;
158
+ bucketContent.push(content);
159
+ bucketDuration = entryDuration;
160
+ } else if ((bucketDuration + entryDuration) <= maxDuration) {
161
+ // Add to current bucket
162
+ bucketEndTime = endTime;
163
+ bucketContent.push(content);
164
+ bucketDuration += entryDuration;
165
+ } else {
166
+ // Close current bucket and start a new one
167
+ const collapsedEntry = {
168
+ start_time: bucketStartTime,
169
+ end_time: bucketEndTime,
170
+ content: bucketContent.join(' ')
171
+ };
172
+ collapsedData.push(collapsedEntry);
173
+
174
+ // Start new bucket with current entry
175
+ bucketStartTime = startTime;
176
+ bucketEndTime = endTime;
177
+ bucketContent = [content];
178
+ bucketDuration = entryDuration;
179
+ }
180
+ });
181
+
182
+ // Add the last bucket if it exists
183
+ if (bucketContent.length > 0) {
184
+ const collapsedEntry = {
185
+ start_time: bucketStartTime,
186
+ end_time: bucketEndTime,
187
+ content: bucketContent.join(' ')
188
+ };
189
+ collapsedData.push(collapsedEntry);
190
+ }
191
+
192
+ return collapsedData;
193
+ }
194
+
82
195
 
83
196
 
84
197
  }