only_ever_generator 0.4.7 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,6 +85,8 @@ class OnlyEverGenerator {
85
85
  let response;
86
86
  if (gapFill.remainingConcepts.length !== 0 ||
87
87
  gapFill.remainingFacts.length !== 0) {
88
+ this.typologyResponse.facts = gapFill.remainingFacts;
89
+ this.typologyResponse.concepts = gapFill.remainingConcepts;
88
90
  response = yield this.generateCard(this.promptForCardGen +
89
91
  "Generate cards only suitable for the given remaining concepts and facts" +
90
92
  JSON.stringify(gapFill), "", true);
@@ -3,14 +3,29 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.ParseSourceContent = void 0;
4
4
  class ParseSourceContent {
5
5
  constructor(sourceContent) {
6
+ /// Format of Content
7
+ // content: {
8
+ // title: source.title,
9
+ // headings: source.headings,
10
+ // content: source.content,
11
+ // fields: fields,
12
+ // taxonomy: source.source_taxonomy,
13
+ // type: source.source_type
14
+ // },
6
15
  this.titles_to_remove = ['See also', 'References', 'Further reading', 'External links', 'Notes and references', 'Bibliography', 'Notes', 'Cited sources'];
7
16
  this.block_types_toremove = ['table', 'empty_line'];
8
17
  this.content = sourceContent;
9
18
  }
10
19
  parseData() {
11
- // if(this.content.type == 'source') {
12
- let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(this.content.content);
13
- let afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
20
+ let sourceType = this.content.type;
21
+ let afterSanitized;
22
+ if (sourceType == "video") {
23
+ afterSanitized = this.parseVideoContent(this.content.content);
24
+ }
25
+ else {
26
+ let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(this.content.content);
27
+ afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
28
+ }
14
29
  return {
15
30
  type: this.content.type,
16
31
  title: this.content.title,
@@ -36,7 +51,7 @@ class ParseSourceContent {
36
51
  }
37
52
  return dataAfterRemoving;
38
53
  }
39
- sanitizeWikiContent(content) {
54
+ sanitizeTextContent(content) {
40
55
  // Remove newline characters
41
56
  content = content.replace(/\\n/g, ' ');
42
57
  // Remove internal link references, keeping only the link text
@@ -58,7 +73,7 @@ class ParseSourceContent {
58
73
  for (let key in block) {
59
74
  let value = block[key];
60
75
  if (typeof value === 'string') {
61
- sanitizedBlock[key] = this.sanitizeWikiContent(value);
76
+ sanitizedBlock[key] = this.sanitizeTextContent(value);
62
77
  }
63
78
  else if (Array.isArray(value)) {
64
79
  sanitizedBlock[key] = this.sanitizeBlocks(value);
@@ -71,5 +86,90 @@ class ParseSourceContent {
71
86
  });
72
87
  return sanitizedBlocks;
73
88
  }
89
+ parseVideoContent(data) {
90
+ let timeCodes = [];
91
+ data.map((e) => timeCodes.push(...e.children));
92
+ let cleanedData = this.cleanTranscript(timeCodes);
93
+ let collapsedData = this.collapseTimeCodes(cleanedData, 100);
94
+ return collapsedData;
95
+ }
96
+ // remove content inside [] which denotes non-speech sounds
97
+ isNonSpeech(content) {
98
+ // Check if the content is non-speech (enclosed in square brackets).
99
+ return /^\[.*\]$/.test(content.trim());
100
+ }
101
+ // remove non-essential content
102
+ cleanTranscript(data) {
103
+ // Clean the transcript by removing non-speech content, normalizing whitespace, and keeping only necessary fields.
104
+ const cleanedData = [];
105
+ data.forEach(entry => {
106
+ let content = (entry.content || '').trim();
107
+ // Skip non-speech content
108
+ if (this.isNonSpeech(content))
109
+ return;
110
+ // Normalize whitespace in content
111
+ content = content.replace(/\s+/g, ' ');
112
+ // Only keep start_time, end_time, content
113
+ const currentEntry = {
114
+ start_time: entry.startTime,
115
+ end_time: entry.endTime,
116
+ content: content
117
+ };
118
+ cleanedData.push(currentEntry);
119
+ });
120
+ return cleanedData;
121
+ }
122
+ // collapse the timecode to 30 seconds
123
+ collapseTimeCodes(data, maxDuration = 30.0) {
124
+ // Collapse time codes into buckets of approximately maxDuration seconds.
125
+ const collapsedData = [];
126
+ let bucketStartTime = null;
127
+ let bucketEndTime = null;
128
+ let bucketContent = [];
129
+ let bucketDuration = 0.0;
130
+ data.forEach(entry => {
131
+ const startTime = entry.start_time;
132
+ const endTime = entry.end_time;
133
+ const content = entry.content;
134
+ const entryDuration = endTime - startTime;
135
+ if (bucketStartTime === null) {
136
+ // Start a new bucket
137
+ bucketStartTime = startTime;
138
+ bucketEndTime = endTime;
139
+ bucketContent.push(content);
140
+ bucketDuration = entryDuration;
141
+ }
142
+ else if ((bucketDuration + entryDuration) <= maxDuration) {
143
+ // Add to current bucket
144
+ bucketEndTime = endTime;
145
+ bucketContent.push(content);
146
+ bucketDuration += entryDuration;
147
+ }
148
+ else {
149
+ // Close current bucket and start a new one
150
+ const collapsedEntry = {
151
+ start_time: bucketStartTime,
152
+ end_time: bucketEndTime,
153
+ content: bucketContent.join(' ')
154
+ };
155
+ collapsedData.push(collapsedEntry);
156
+ // Start new bucket with current entry
157
+ bucketStartTime = startTime;
158
+ bucketEndTime = endTime;
159
+ bucketContent = [content];
160
+ bucketDuration = entryDuration;
161
+ }
162
+ });
163
+ // Add the last bucket if it exists
164
+ if (bucketContent.length > 0) {
165
+ const collapsedEntry = {
166
+ start_time: bucketStartTime,
167
+ end_time: bucketEndTime,
168
+ content: bucketContent.join(' ')
169
+ };
170
+ collapsedData.push(collapsedEntry);
171
+ }
172
+ return collapsedData;
173
+ }
74
174
  }
75
175
  exports.ParseSourceContent = ParseSourceContent;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "only_ever_generator",
3
- "version": "0.4.7",
3
+ "version": "0.4.8",
4
4
  "main": "dist/index.js",
5
5
  "scripts": {
6
6
  "start": "npm run build && nodemon dist/index.js",
@@ -106,6 +106,8 @@ export class OnlyEverGenerator {
106
106
  gapFill.remainingConcepts.length !== 0 ||
107
107
  gapFill.remainingFacts.length !== 0
108
108
  ) {
109
+ this.typologyResponse.facts = gapFill.remainingFacts;
110
+ this.typologyResponse.concepts = gapFill.remainingConcepts;
109
111
  response = await this.generateCard(
110
112
  this.promptForCardGen +
111
113
  "Generate cards only suitable for the given remaining concepts and facts" +
@@ -1,5 +1,15 @@
1
1
  export class ParseSourceContent{
2
2
  public content: any;
3
+ /// Format of Content
4
+ // content: {
5
+ // title: source.title,
6
+ // headings: source.headings,
7
+ // content: source.content,
8
+ // fields: fields,
9
+ // taxonomy: source.source_taxonomy,
10
+ // type: source.source_type
11
+ // },
12
+
3
13
 
4
14
  titles_to_remove = ['See also', 'References', 'Further reading', 'External links', 'Notes and references', 'Bibliography', 'Notes', 'Cited sources'];
5
15
  block_types_toremove = ['table','empty_line'];
@@ -8,9 +18,14 @@ export class ParseSourceContent{
8
18
  }
9
19
 
10
20
  parseData() {
11
- // if(this.content.type == 'source') {
21
+ let sourceType = this.content.type;
22
+ let afterSanitized;
23
+ if(sourceType == "video"){
24
+ afterSanitized = this.parseVideoContent(this.content.content);
25
+ }else{
12
26
  let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(this.content.content);
13
- let afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
27
+ afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
28
+ }
14
29
  return {
15
30
  type: this.content.type,
16
31
  title: this.content.title,
@@ -40,7 +55,7 @@ export class ParseSourceContent{
40
55
  return dataAfterRemoving;
41
56
  }
42
57
 
43
- sanitizeWikiContent(content: String) {
58
+ sanitizeTextContent(content: String) {
44
59
  // Remove newline characters
45
60
  content = content.replace(/\\n/g, ' ');
46
61
 
@@ -68,7 +83,7 @@ export class ParseSourceContent{
68
83
  for (let key in block) {
69
84
  let value = block[key];
70
85
  if (typeof value === 'string') {
71
- sanitizedBlock[key] = this.sanitizeWikiContent(value);
86
+ sanitizedBlock[key] = this.sanitizeTextContent(value);
72
87
  } else if (Array.isArray(value)) {
73
88
  sanitizedBlock[key] = this.sanitizeBlocks(value);
74
89
  } else {
@@ -79,6 +94,105 @@ export class ParseSourceContent{
79
94
  });
80
95
  return sanitizedBlocks;
81
96
  }
97
+
98
+ parseVideoContent(data: Array<any>){
99
+ let timeCodes :Array<any> = [];
100
+ data.map((e) => timeCodes.push(...e.children));
101
+ let cleanedData = this.cleanTranscript(timeCodes);
102
+ let collapsedData = this.collapseTimeCodes(cleanedData,100);
103
+ return collapsedData;
104
+
105
+ }
106
+
107
+ // remove content inside [] which denotes non-speech sounds
108
+ isNonSpeech(content: string) {
109
+ // Check if the content is non-speech (enclosed in square brackets).
110
+ return /^\[.*\]$/.test(content.trim());
111
+ }
112
+
113
+ // remove non-essential content
114
+ cleanTranscript(data: Array<any>) {
115
+ // Clean the transcript by removing non-speech content, normalizing whitespace, and keeping only necessary fields.
116
+ const cleanedData = <any>[];
117
+
118
+ data.forEach(entry => {
119
+ let content = (entry.content || '').trim();
120
+
121
+ // Skip non-speech content
122
+ if (this.isNonSpeech(content)) return;
123
+
124
+ // Normalize whitespace in content
125
+ content = content.replace(/\s+/g, ' ');
126
+
127
+ // Only keep start_time, end_time, content
128
+ const currentEntry = {
129
+ start_time: entry.startTime,
130
+ end_time: entry.endTime,
131
+ content: content
132
+ };
133
+
134
+ cleanedData.push(currentEntry);
135
+ });
136
+
137
+ return cleanedData;
138
+ }
139
+
140
+ // collapse the timecode to 30 seconds
141
+ collapseTimeCodes(data: Array<any>, maxDuration = 30.0) {
142
+ // Collapse time codes into buckets of approximately maxDuration seconds.
143
+ const collapsedData = [];
144
+ let bucketStartTime: number | null = null;
145
+ let bucketEndTime : number | null = null;
146
+ let bucketContent : Array<any> = [];
147
+ let bucketDuration = 0.0;
148
+
149
+ data.forEach(entry => {
150
+ const startTime = entry.start_time;
151
+ const endTime = entry.end_time;
152
+ const content = entry.content;
153
+ const entryDuration = endTime - startTime;
154
+
155
+ if (bucketStartTime === null) {
156
+ // Start a new bucket
157
+ bucketStartTime = startTime;
158
+ bucketEndTime = endTime;
159
+ bucketContent.push(content);
160
+ bucketDuration = entryDuration;
161
+ } else if ((bucketDuration + entryDuration) <= maxDuration) {
162
+ // Add to current bucket
163
+ bucketEndTime = endTime;
164
+ bucketContent.push(content);
165
+ bucketDuration += entryDuration;
166
+ } else {
167
+ // Close current bucket and start a new one
168
+ const collapsedEntry = {
169
+ start_time: bucketStartTime,
170
+ end_time: bucketEndTime,
171
+ content: bucketContent.join(' ')
172
+ };
173
+ collapsedData.push(collapsedEntry);
174
+
175
+ // Start new bucket with current entry
176
+ bucketStartTime = startTime;
177
+ bucketEndTime = endTime;
178
+ bucketContent = [content];
179
+ bucketDuration = entryDuration;
180
+ }
181
+ });
182
+
183
+ // Add the last bucket if it exists
184
+ if (bucketContent.length > 0) {
185
+ const collapsedEntry = {
186
+ start_time: bucketStartTime,
187
+ end_time: bucketEndTime,
188
+ content: bucketContent.join(' ')
189
+ };
190
+ collapsedData.push(collapsedEntry);
191
+ }
192
+
193
+ return collapsedData;
194
+ }
195
+
82
196
 
83
197
 
84
198
  }