only_ever_generator 0.4.7 → 0.4.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bootstrap/app.js
CHANGED
|
@@ -85,6 +85,8 @@ class OnlyEverGenerator {
|
|
|
85
85
|
let response;
|
|
86
86
|
if (gapFill.remainingConcepts.length !== 0 ||
|
|
87
87
|
gapFill.remainingFacts.length !== 0) {
|
|
88
|
+
this.typologyResponse.facts = gapFill.remainingFacts;
|
|
89
|
+
this.typologyResponse.concepts = gapFill.remainingConcepts;
|
|
88
90
|
response = yield this.generateCard(this.promptForCardGen +
|
|
89
91
|
"Generate cards only suitable for the given remaining concepts and facts" +
|
|
90
92
|
JSON.stringify(gapFill), "", true);
|
|
@@ -3,14 +3,29 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.ParseSourceContent = void 0;
|
|
4
4
|
class ParseSourceContent {
|
|
5
5
|
constructor(sourceContent) {
|
|
6
|
+
/// Format of Content
|
|
7
|
+
// content: {
|
|
8
|
+
// title: source.title,
|
|
9
|
+
// headings: source.headings,
|
|
10
|
+
// content: source.content,
|
|
11
|
+
// fields: fields,
|
|
12
|
+
// taxonomy: source.source_taxonomy,
|
|
13
|
+
// type: source.source_type
|
|
14
|
+
// },
|
|
6
15
|
this.titles_to_remove = ['See also', 'References', 'Further reading', 'External links', 'Notes and references', 'Bibliography', 'Notes', 'Cited sources'];
|
|
7
16
|
this.block_types_toremove = ['table', 'empty_line'];
|
|
8
17
|
this.content = sourceContent;
|
|
9
18
|
}
|
|
10
19
|
parseData() {
|
|
11
|
-
|
|
12
|
-
let
|
|
13
|
-
|
|
20
|
+
let sourceType = this.content.type;
|
|
21
|
+
let afterSanitized;
|
|
22
|
+
if (sourceType == "video") {
|
|
23
|
+
afterSanitized = this.parseVideoContent(this.content.content);
|
|
24
|
+
}
|
|
25
|
+
else {
|
|
26
|
+
let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(this.content.content);
|
|
27
|
+
afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
|
|
28
|
+
}
|
|
14
29
|
return {
|
|
15
30
|
type: this.content.type,
|
|
16
31
|
title: this.content.title,
|
|
@@ -36,7 +51,7 @@ class ParseSourceContent {
|
|
|
36
51
|
}
|
|
37
52
|
return dataAfterRemoving;
|
|
38
53
|
}
|
|
39
|
-
|
|
54
|
+
sanitizeTextContent(content) {
|
|
40
55
|
// Remove newline characters
|
|
41
56
|
content = content.replace(/\\n/g, ' ');
|
|
42
57
|
// Remove internal link references, keeping only the link text
|
|
@@ -58,7 +73,7 @@ class ParseSourceContent {
|
|
|
58
73
|
for (let key in block) {
|
|
59
74
|
let value = block[key];
|
|
60
75
|
if (typeof value === 'string') {
|
|
61
|
-
sanitizedBlock[key] = this.
|
|
76
|
+
sanitizedBlock[key] = this.sanitizeTextContent(value);
|
|
62
77
|
}
|
|
63
78
|
else if (Array.isArray(value)) {
|
|
64
79
|
sanitizedBlock[key] = this.sanitizeBlocks(value);
|
|
@@ -71,5 +86,90 @@ class ParseSourceContent {
|
|
|
71
86
|
});
|
|
72
87
|
return sanitizedBlocks;
|
|
73
88
|
}
|
|
89
|
+
parseVideoContent(data) {
|
|
90
|
+
let timeCodes = [];
|
|
91
|
+
data.map((e) => timeCodes.push(...e.children));
|
|
92
|
+
let cleanedData = this.cleanTranscript(timeCodes);
|
|
93
|
+
let collapsedData = this.collapseTimeCodes(cleanedData, 100);
|
|
94
|
+
return collapsedData;
|
|
95
|
+
}
|
|
96
|
+
// remove content inside [] which denotes non-speech sounds
|
|
97
|
+
isNonSpeech(content) {
|
|
98
|
+
// Check if the content is non-speech (enclosed in square brackets).
|
|
99
|
+
return /^\[.*\]$/.test(content.trim());
|
|
100
|
+
}
|
|
101
|
+
// remove non-essential content
|
|
102
|
+
cleanTranscript(data) {
|
|
103
|
+
// Clean the transcript by removing non-speech content, normalizing whitespace, and keeping only necessary fields.
|
|
104
|
+
const cleanedData = [];
|
|
105
|
+
data.forEach(entry => {
|
|
106
|
+
let content = (entry.content || '').trim();
|
|
107
|
+
// Skip non-speech content
|
|
108
|
+
if (this.isNonSpeech(content))
|
|
109
|
+
return;
|
|
110
|
+
// Normalize whitespace in content
|
|
111
|
+
content = content.replace(/\s+/g, ' ');
|
|
112
|
+
// Only keep start_time, end_time, content
|
|
113
|
+
const currentEntry = {
|
|
114
|
+
start_time: entry.startTime,
|
|
115
|
+
end_time: entry.endTime,
|
|
116
|
+
content: content
|
|
117
|
+
};
|
|
118
|
+
cleanedData.push(currentEntry);
|
|
119
|
+
});
|
|
120
|
+
return cleanedData;
|
|
121
|
+
}
|
|
122
|
+
// collapse the timecode to 30 seconds
|
|
123
|
+
collapseTimeCodes(data, maxDuration = 30.0) {
|
|
124
|
+
// Collapse time codes into buckets of approximately maxDuration seconds.
|
|
125
|
+
const collapsedData = [];
|
|
126
|
+
let bucketStartTime = null;
|
|
127
|
+
let bucketEndTime = null;
|
|
128
|
+
let bucketContent = [];
|
|
129
|
+
let bucketDuration = 0.0;
|
|
130
|
+
data.forEach(entry => {
|
|
131
|
+
const startTime = entry.start_time;
|
|
132
|
+
const endTime = entry.end_time;
|
|
133
|
+
const content = entry.content;
|
|
134
|
+
const entryDuration = endTime - startTime;
|
|
135
|
+
if (bucketStartTime === null) {
|
|
136
|
+
// Start a new bucket
|
|
137
|
+
bucketStartTime = startTime;
|
|
138
|
+
bucketEndTime = endTime;
|
|
139
|
+
bucketContent.push(content);
|
|
140
|
+
bucketDuration = entryDuration;
|
|
141
|
+
}
|
|
142
|
+
else if ((bucketDuration + entryDuration) <= maxDuration) {
|
|
143
|
+
// Add to current bucket
|
|
144
|
+
bucketEndTime = endTime;
|
|
145
|
+
bucketContent.push(content);
|
|
146
|
+
bucketDuration += entryDuration;
|
|
147
|
+
}
|
|
148
|
+
else {
|
|
149
|
+
// Close current bucket and start a new one
|
|
150
|
+
const collapsedEntry = {
|
|
151
|
+
start_time: bucketStartTime,
|
|
152
|
+
end_time: bucketEndTime,
|
|
153
|
+
content: bucketContent.join(' ')
|
|
154
|
+
};
|
|
155
|
+
collapsedData.push(collapsedEntry);
|
|
156
|
+
// Start new bucket with current entry
|
|
157
|
+
bucketStartTime = startTime;
|
|
158
|
+
bucketEndTime = endTime;
|
|
159
|
+
bucketContent = [content];
|
|
160
|
+
bucketDuration = entryDuration;
|
|
161
|
+
}
|
|
162
|
+
});
|
|
163
|
+
// Add the last bucket if it exists
|
|
164
|
+
if (bucketContent.length > 0) {
|
|
165
|
+
const collapsedEntry = {
|
|
166
|
+
start_time: bucketStartTime,
|
|
167
|
+
end_time: bucketEndTime,
|
|
168
|
+
content: bucketContent.join(' ')
|
|
169
|
+
};
|
|
170
|
+
collapsedData.push(collapsedEntry);
|
|
171
|
+
}
|
|
172
|
+
return collapsedData;
|
|
173
|
+
}
|
|
74
174
|
}
|
|
75
175
|
exports.ParseSourceContent = ParseSourceContent;
|
package/package.json
CHANGED
package/src/bootstrap/app.ts
CHANGED
|
@@ -106,6 +106,8 @@ export class OnlyEverGenerator {
|
|
|
106
106
|
gapFill.remainingConcepts.length !== 0 ||
|
|
107
107
|
gapFill.remainingFacts.length !== 0
|
|
108
108
|
) {
|
|
109
|
+
this.typologyResponse.facts = gapFill.remainingFacts;
|
|
110
|
+
this.typologyResponse.concepts = gapFill.remainingConcepts;
|
|
109
111
|
response = await this.generateCard(
|
|
110
112
|
this.promptForCardGen +
|
|
111
113
|
"Generate cards only suitable for the given remaining concepts and facts" +
|
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
export class ParseSourceContent{
|
|
2
2
|
public content: any;
|
|
3
|
+
/// Format of Content
|
|
4
|
+
// content: {
|
|
5
|
+
// title: source.title,
|
|
6
|
+
// headings: source.headings,
|
|
7
|
+
// content: source.content,
|
|
8
|
+
// fields: fields,
|
|
9
|
+
// taxonomy: source.source_taxonomy,
|
|
10
|
+
// type: source.source_type
|
|
11
|
+
// },
|
|
12
|
+
|
|
3
13
|
|
|
4
14
|
titles_to_remove = ['See also', 'References', 'Further reading', 'External links', 'Notes and references', 'Bibliography', 'Notes', 'Cited sources'];
|
|
5
15
|
block_types_toremove = ['table','empty_line'];
|
|
@@ -8,9 +18,14 @@ export class ParseSourceContent{
|
|
|
8
18
|
}
|
|
9
19
|
|
|
10
20
|
parseData() {
|
|
11
|
-
|
|
21
|
+
let sourceType = this.content.type;
|
|
22
|
+
let afterSanitized;
|
|
23
|
+
if(sourceType == "video"){
|
|
24
|
+
afterSanitized = this.parseVideoContent(this.content.content);
|
|
25
|
+
}else{
|
|
12
26
|
let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(this.content.content);
|
|
13
|
-
|
|
27
|
+
afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
|
|
28
|
+
}
|
|
14
29
|
return {
|
|
15
30
|
type: this.content.type,
|
|
16
31
|
title: this.content.title,
|
|
@@ -40,7 +55,7 @@ export class ParseSourceContent{
|
|
|
40
55
|
return dataAfterRemoving;
|
|
41
56
|
}
|
|
42
57
|
|
|
43
|
-
|
|
58
|
+
sanitizeTextContent(content: String) {
|
|
44
59
|
// Remove newline characters
|
|
45
60
|
content = content.replace(/\\n/g, ' ');
|
|
46
61
|
|
|
@@ -68,7 +83,7 @@ export class ParseSourceContent{
|
|
|
68
83
|
for (let key in block) {
|
|
69
84
|
let value = block[key];
|
|
70
85
|
if (typeof value === 'string') {
|
|
71
|
-
sanitizedBlock[key] = this.
|
|
86
|
+
sanitizedBlock[key] = this.sanitizeTextContent(value);
|
|
72
87
|
} else if (Array.isArray(value)) {
|
|
73
88
|
sanitizedBlock[key] = this.sanitizeBlocks(value);
|
|
74
89
|
} else {
|
|
@@ -79,6 +94,105 @@ export class ParseSourceContent{
|
|
|
79
94
|
});
|
|
80
95
|
return sanitizedBlocks;
|
|
81
96
|
}
|
|
97
|
+
|
|
98
|
+
parseVideoContent(data: Array<any>){
|
|
99
|
+
let timeCodes :Array<any> = [];
|
|
100
|
+
data.map((e) => timeCodes.push(...e.children));
|
|
101
|
+
let cleanedData = this.cleanTranscript(timeCodes);
|
|
102
|
+
let collapsedData = this.collapseTimeCodes(cleanedData,100);
|
|
103
|
+
return collapsedData;
|
|
104
|
+
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// remove content inside [] which denotes non-speech sounds
|
|
108
|
+
isNonSpeech(content: string) {
|
|
109
|
+
// Check if the content is non-speech (enclosed in square brackets).
|
|
110
|
+
return /^\[.*\]$/.test(content.trim());
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// remove non-essential content
|
|
114
|
+
cleanTranscript(data: Array<any>) {
|
|
115
|
+
// Clean the transcript by removing non-speech content, normalizing whitespace, and keeping only necessary fields.
|
|
116
|
+
const cleanedData = <any>[];
|
|
117
|
+
|
|
118
|
+
data.forEach(entry => {
|
|
119
|
+
let content = (entry.content || '').trim();
|
|
120
|
+
|
|
121
|
+
// Skip non-speech content
|
|
122
|
+
if (this.isNonSpeech(content)) return;
|
|
123
|
+
|
|
124
|
+
// Normalize whitespace in content
|
|
125
|
+
content = content.replace(/\s+/g, ' ');
|
|
126
|
+
|
|
127
|
+
// Only keep start_time, end_time, content
|
|
128
|
+
const currentEntry = {
|
|
129
|
+
start_time: entry.startTime,
|
|
130
|
+
end_time: entry.endTime,
|
|
131
|
+
content: content
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
cleanedData.push(currentEntry);
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
return cleanedData;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// collapse the timecode to 30 seconds
|
|
141
|
+
collapseTimeCodes(data: Array<any>, maxDuration = 30.0) {
|
|
142
|
+
// Collapse time codes into buckets of approximately maxDuration seconds.
|
|
143
|
+
const collapsedData = [];
|
|
144
|
+
let bucketStartTime: number | null = null;
|
|
145
|
+
let bucketEndTime : number | null = null;
|
|
146
|
+
let bucketContent : Array<any> = [];
|
|
147
|
+
let bucketDuration = 0.0;
|
|
148
|
+
|
|
149
|
+
data.forEach(entry => {
|
|
150
|
+
const startTime = entry.start_time;
|
|
151
|
+
const endTime = entry.end_time;
|
|
152
|
+
const content = entry.content;
|
|
153
|
+
const entryDuration = endTime - startTime;
|
|
154
|
+
|
|
155
|
+
if (bucketStartTime === null) {
|
|
156
|
+
// Start a new bucket
|
|
157
|
+
bucketStartTime = startTime;
|
|
158
|
+
bucketEndTime = endTime;
|
|
159
|
+
bucketContent.push(content);
|
|
160
|
+
bucketDuration = entryDuration;
|
|
161
|
+
} else if ((bucketDuration + entryDuration) <= maxDuration) {
|
|
162
|
+
// Add to current bucket
|
|
163
|
+
bucketEndTime = endTime;
|
|
164
|
+
bucketContent.push(content);
|
|
165
|
+
bucketDuration += entryDuration;
|
|
166
|
+
} else {
|
|
167
|
+
// Close current bucket and start a new one
|
|
168
|
+
const collapsedEntry = {
|
|
169
|
+
start_time: bucketStartTime,
|
|
170
|
+
end_time: bucketEndTime,
|
|
171
|
+
content: bucketContent.join(' ')
|
|
172
|
+
};
|
|
173
|
+
collapsedData.push(collapsedEntry);
|
|
174
|
+
|
|
175
|
+
// Start new bucket with current entry
|
|
176
|
+
bucketStartTime = startTime;
|
|
177
|
+
bucketEndTime = endTime;
|
|
178
|
+
bucketContent = [content];
|
|
179
|
+
bucketDuration = entryDuration;
|
|
180
|
+
}
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
// Add the last bucket if it exists
|
|
184
|
+
if (bucketContent.length > 0) {
|
|
185
|
+
const collapsedEntry = {
|
|
186
|
+
start_time: bucketStartTime,
|
|
187
|
+
end_time: bucketEndTime,
|
|
188
|
+
content: bucketContent.join(' ')
|
|
189
|
+
};
|
|
190
|
+
collapsedData.push(collapsedEntry);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
return collapsedData;
|
|
194
|
+
}
|
|
195
|
+
|
|
82
196
|
|
|
83
197
|
|
|
84
198
|
}
|