only_ever_generator 8.4.6 → 8.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/dist/bootstrap/app.d.ts +6 -3
  2. package/dist/bootstrap/app.d.ts.map +1 -1
  3. package/dist/bootstrap/app.js +11 -17
  4. package/dist/bootstrap/app.js.map +1 -1
  5. package/dist/card_gen/generate_cards.d.ts +2 -3
  6. package/dist/card_gen/generate_cards.d.ts.map +1 -1
  7. package/dist/card_gen/generate_cards.js +22 -15
  8. package/dist/card_gen/generate_cards.js.map +1 -1
  9. package/dist/constants/prompt_data.d.ts +4 -4
  10. package/dist/constants/prompt_data.js +302 -302
  11. package/dist/constants/prompts/card_gen_prompt.js +160 -160
  12. package/dist/constants/prompts/typology_prompt.js +131 -131
  13. package/dist/constants/source_data.d.ts +171 -171
  14. package/dist/constants/source_data.js +973 -973
  15. package/dist/embedding_generation/local_consolidation.js +104 -104
  16. package/dist/helper/build_concept_facts_schema.d.ts +42 -42
  17. package/dist/helper/build_concept_facts_schema.js +44 -44
  18. package/dist/helper/qdrant_db_methods.d.ts.map +1 -1
  19. package/dist/helper/schema_helper/build_card_schema.d.ts +1 -9
  20. package/dist/helper/schema_helper/build_card_schema.d.ts.map +1 -1
  21. package/dist/helper/schema_helper/build_card_schema.js +47 -50
  22. package/dist/helper/schema_helper/build_card_schema.js.map +1 -1
  23. package/dist/helper/schema_helper/build_concept_facts_schema.d.ts +1 -1
  24. package/dist/helper/schema_helper/build_concept_facts_schema.d.ts.map +1 -1
  25. package/dist/helper/schema_helper/build_concept_facts_schema.js +20 -5
  26. package/dist/helper/schema_helper/build_concept_facts_schema.js.map +1 -1
  27. package/dist/helper/schema_helper/build_summary_schema.d.ts +1 -1
  28. package/dist/helper/schema_helper/build_summary_schema.d.ts.map +1 -1
  29. package/dist/helper/schema_helper/build_summary_schema.js +18 -7
  30. package/dist/helper/schema_helper/build_summary_schema.js.map +1 -1
  31. package/dist/index.d.ts.map +1 -1
  32. package/dist/index.js +5 -5
  33. package/dist/index.js.map +1 -1
  34. package/dist/parse/response_format_card.d.ts +176 -176
  35. package/dist/parse/response_format_card.js +371 -371
  36. package/dist/parse/response_format_typology.d.ts +1 -1
  37. package/dist/parse/response_format_typology.js +46 -46
  38. package/dist/services/get_prompts.d.ts +8 -7
  39. package/dist/services/get_prompts.d.ts.map +1 -1
  40. package/dist/services/get_prompts.js +69 -21
  41. package/dist/services/get_prompts.js.map +1 -1
  42. package/dist/typology_gen/generate_concept_facts.d.ts +2 -3
  43. package/dist/typology_gen/generate_concept_facts.d.ts.map +1 -1
  44. package/dist/typology_gen/generate_concept_facts.js +25 -15
  45. package/dist/typology_gen/generate_concept_facts.js.map +1 -1
  46. package/dist/typology_gen/generate_typology.d.ts +2 -1
  47. package/dist/typology_gen/generate_typology.d.ts.map +1 -1
  48. package/dist/typology_gen/generate_typology.js +24 -13
  49. package/dist/typology_gen/generate_typology.js.map +1 -1
  50. package/dist/typology_gen/summarize.d.ts +2 -3
  51. package/dist/typology_gen/summarize.d.ts.map +1 -1
  52. package/dist/typology_gen/summarize.js +24 -13
  53. package/dist/typology_gen/summarize.js.map +1 -1
  54. package/package.json +39 -38
  55. package/src/bootstrap/app.ts +418 -416
  56. package/src/card_gen/generate_cards.ts +347 -345
  57. package/src/config.ts +11 -11
  58. package/src/constants/api_constants.ts +7 -7
  59. package/src/constants/prompts/card_gen_prompt.ts +164 -164
  60. package/src/constants/prompts/typology_prompt.ts +139 -139
  61. package/src/embedding_generation/consolidation/global_consolidation.ts +96 -96
  62. package/src/embedding_generation/consolidation/local_consolidation.ts +141 -141
  63. package/src/embedding_generation/consolidation/write_consolidated_data.ts +98 -98
  64. package/src/embedding_generation/generate_embeddings.ts +42 -42
  65. package/src/embedding_generation/parse_embedding_response.ts +31 -31
  66. package/src/enums/card_type_enum.ts +6 -6
  67. package/src/gap_fill/calculate_gap_fill.ts +50 -50
  68. package/src/helper/get_id_from_title.ts +33 -33
  69. package/src/helper/mongo_helper.ts +29 -29
  70. package/src/helper/openai_helper.ts +20 -20
  71. package/src/helper/qdrant_db_methods.ts +77 -77
  72. package/src/helper/schema_helper/build_card_schema.ts +74 -98
  73. package/src/helper/schema_helper/build_classify_summarize_schema.ts +43 -43
  74. package/src/helper/schema_helper/build_concept_facts_schema.ts +45 -31
  75. package/src/helper/schema_helper/build_summary_schema.ts +43 -32
  76. package/src/index.ts +71 -73
  77. package/src/logger.ts +65 -65
  78. package/src/parse/parse_card/parse_cloze_card.ts +146 -146
  79. package/src/parse/parse_card/parse_flash_cards.ts +42 -42
  80. package/src/parse/parse_card/parse_match_card.ts +104 -104
  81. package/src/parse/parse_card/parse_mcq_card.ts +114 -114
  82. package/src/parse/parse_card_response.ts +197 -197
  83. package/src/parse/parse_source_content.ts +212 -212
  84. package/src/services/get_prompts.ts +164 -112
  85. package/src/services/open_ai_service.ts +89 -89
  86. package/src/services/qdrant_service.ts +10 -10
  87. package/src/types/base_param_type.ts +13 -13
  88. package/src/types/mongo_concept_fact_type.ts +12 -12
  89. package/src/types/parsed_card_type.ts +39 -39
  90. package/src/types/raw_card_response_types/generated_card_response_type.ts +59 -59
  91. package/src/types/source_taxonomy_type.ts +24 -24
  92. package/src/typology-parsed-response.ts +1932 -1932
  93. package/src/typology_gen/generate_concept_facts.ts +180 -169
  94. package/src/typology_gen/generate_typology.ts +203 -189
  95. package/src/typology_gen/summarize.ts +176 -164
  96. package/src/utils/distributed_quote_restoration.ts +80 -80
  97. package/src/utils/generate_args.ts +29 -29
  98. package/src/utils/parse_openai_response.ts +19 -19
  99. package/src/utils/sanitize_strings.ts +65 -65
  100. package/tsconfig.json +16 -16
  101. package/dist/constants/default_generation_variables.d.ts +0 -3
  102. package/dist/constants/default_generation_variables.d.ts.map +0 -1
  103. package/dist/constants/default_generation_variables.js +0 -580
  104. package/dist/constants/default_generation_variables.js.map +0 -1
  105. package/dist/services/prompts_test.d.ts +0 -10
  106. package/dist/services/prompts_test.d.ts.map +0 -1
  107. package/dist/services/prompts_test.js +0 -227
  108. package/dist/services/prompts_test.js.map +0 -1
  109. package/dist/types/generation_variables_schema.d.ts +0 -14
  110. package/dist/types/generation_variables_schema.d.ts.map +0 -1
  111. package/dist/types/generation_variables_schema.js +0 -3
  112. package/dist/types/generation_variables_schema.js.map +0 -1
  113. package/dist/utils/test.d.ts +0 -2
  114. package/dist/utils/test.d.ts.map +0 -1
  115. package/dist/utils/test.js +0 -5
  116. package/dist/utils/test.js.map +0 -1
  117. package/src/constants/default_generation_variables.ts +0 -624
  118. package/src/types/generation_variables_schema.ts +0 -16
@@ -1,212 +1,212 @@
1
- import { SourceTaxonomy } from "../types/source_taxonomy_type";
2
-
3
- export class ParseSourceContent {
4
- public content: any;
5
- /// Format of Content
6
- // content: {
7
- // title: source.title,
8
- // headings: source.headings,
9
- // content: source.content,
10
- // fields: fields,
11
- // taxonomy: source.source_taxonomy,
12
- // type: source.source_type
13
- // },
14
-
15
- titles_to_remove = [
16
- "See also",
17
- "References",
18
- "Further reading",
19
- "External links",
20
- "Notes and references",
21
- "Bibliography",
22
- "Notes",
23
- "Cited sources",
24
- ];
25
- block_types_toremove = ["table", "empty_line"];
26
- constructor(sourceContent: any) {
27
- this.content = sourceContent;
28
- }
29
-
30
- parseData(): {
31
- source_id: string;
32
- type: string;
33
- title: string;
34
- content: any[];
35
- headings: string[];
36
- taxonomy: SourceTaxonomy;
37
- } {
38
- let sourceType = this.content.type;
39
- let afterSanitized;
40
- if (sourceType == "video") {
41
- afterSanitized = this.parseVideoContent(this.content.content);
42
- } else {
43
- let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(
44
- this.content.content
45
- );
46
- afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
47
- }
48
- return {
49
- source_id: this.content.source_id,
50
- type: this.content.type,
51
- title: this.content.title,
52
- content: afterSanitized,
53
- headings: this.content.headings,
54
- taxonomy: this.content.taxonomy,
55
- };
56
- }
57
-
58
- removeSectionsByTitle(data: Array<any>) {
59
- let dataAfterRemoving = [];
60
- for (let elem of data) {
61
- if (
62
- elem.block_type == "heading" &&
63
- this.titles_to_remove.includes(elem.content)
64
- ) {
65
- continue;
66
- }
67
- /// remove unwanted blcok types , for now `table` and `empty_line`
68
- if (this.block_types_toremove.includes(elem.block_type)) {
69
- continue;
70
- }
71
- if (elem.children) {
72
- elem.children = this.removeSectionsByTitle(elem.children);
73
- }
74
- dataAfterRemoving.push(elem);
75
- }
76
- return dataAfterRemoving;
77
- }
78
-
79
- sanitizeTextContent(content: String) {
80
- // Remove newline characters
81
- content = content.replace(/\\n/g, " ");
82
-
83
- // Remove internal link references, keeping only the link text
84
- // Pattern explanation: [[link|text|index|wiki]] --> text
85
- content = content.replace(/\[\[.*?\|(.*?)\|.*?\|wiki\]\]/g, "$1");
86
-
87
- // Remove external links, keeping only the link text
88
- // Pattern explanation: [url text] --> text
89
- content = content.replace(/\[http[s]?:\/\/[^\s]+ ([^\]]+)\]/g, "$1");
90
-
91
- // Remove Markdown link references, keeping only the link text
92
- // Pattern explanation: ![link text](url) --> link text
93
- content = content.replace(/\!\[([^\]]+)\]\([^\)]+\)/g, "$1");
94
-
95
- return content;
96
- }
97
-
98
- sanitizeBlocks(blocks: Array<any>) {
99
- let sanitizedBlocks = <any>[];
100
- blocks = blocks.filter((item) => item.block_type != "table");
101
- blocks.forEach((block) => {
102
- let sanitizedBlock: any = {};
103
- for (let key in block) {
104
- let value = block[key];
105
- if (typeof value === "string") {
106
- sanitizedBlock[key] = this.sanitizeTextContent(value);
107
- } else if (Array.isArray(value)) {
108
- sanitizedBlock[key] = this.sanitizeBlocks(value);
109
- } else {
110
- sanitizedBlock[key] = value;
111
- }
112
- }
113
- sanitizedBlocks.push(sanitizedBlock);
114
- });
115
- return sanitizedBlocks;
116
- }
117
-
118
- parseVideoContent(data: Array<any>) {
119
- let finalChapters: Array<any> = [];
120
- // let cleanedData = this.cleanTranscript(timeCodes);
121
- data.forEach((e) => {
122
- let combinedContent = this.cleanTranscript(e);
123
- finalChapters.push({
124
- startTime: e.startTime,
125
- endTime: e.endTime,
126
- content: combinedContent,
127
- title: e.content,
128
- });
129
- });
130
-
131
- return finalChapters;
132
- }
133
-
134
- // remove content inside [] which denotes non-speech sounds
135
- isNonSpeech(content: string) {
136
- // Check if the content is non-speech (enclosed in square brackets).
137
- return /^\[.*\]$/.test(content.trim());
138
- }
139
-
140
- // remove non-essential content
141
- cleanTranscript(data: any) {
142
- let finalContent = "";
143
- let children = data.children ?? [];
144
-
145
- children.forEach((e: any) => {
146
- let content = (e.content || "").trim();
147
-
148
- if (this.isNonSpeech(content)) return;
149
-
150
- content = content.replace(/\s+/g, " ");
151
- finalContent += content;
152
- });
153
-
154
- return finalContent;
155
- }
156
-
157
- // collapse the timecode to 30 seconds
158
- collapseTimeCodes(data: Array<any>, maxDuration = 30.0) {
159
- // Collapse time codes into buckets of approximately maxDuration seconds.
160
- const collapsedData = [];
161
- let bucketStartTime: number | null = null;
162
- let bucketEndTime: number | null = null;
163
- let bucketContent: Array<any> = [];
164
- let bucketDuration = 0.0;
165
-
166
- data.forEach((entry) => {
167
- const startTime = entry.start_time;
168
- const endTime = entry.end_time;
169
- const content = entry.content;
170
- const entryDuration = endTime - startTime;
171
-
172
- if (bucketStartTime === null) {
173
- // Start a new bucket
174
- bucketStartTime = startTime;
175
- bucketEndTime = endTime;
176
- bucketContent.push(content);
177
- bucketDuration = entryDuration;
178
- } else if (bucketDuration + entryDuration <= maxDuration) {
179
- // Add to current bucket
180
- bucketEndTime = endTime;
181
- bucketContent.push(content);
182
- bucketDuration += entryDuration;
183
- } else {
184
- // Close current bucket and start a new one
185
- const collapsedEntry = {
186
- start_time: bucketStartTime,
187
- end_time: bucketEndTime,
188
- content: bucketContent.join(" "),
189
- };
190
- collapsedData.push(collapsedEntry);
191
-
192
- // Start new bucket with current entry
193
- bucketStartTime = startTime;
194
- bucketEndTime = endTime;
195
- bucketContent = [content];
196
- bucketDuration = entryDuration;
197
- }
198
- });
199
-
200
- // Add the last bucket if it exists
201
- if (bucketContent.length > 0) {
202
- const collapsedEntry = {
203
- start_time: bucketStartTime,
204
- end_time: bucketEndTime,
205
- content: bucketContent.join(" "),
206
- };
207
- collapsedData.push(collapsedEntry);
208
- }
209
-
210
- return collapsedData;
211
- }
212
- }
1
+ import { SourceTaxonomy } from "../types/source_taxonomy_type";
2
+
3
+ export class ParseSourceContent {
4
+ public content: any;
5
+ /// Format of Content
6
+ // content: {
7
+ // title: source.title,
8
+ // headings: source.headings,
9
+ // content: source.content,
10
+ // fields: fields,
11
+ // taxonomy: source.source_taxonomy,
12
+ // type: source.source_type
13
+ // },
14
+
15
+ titles_to_remove = [
16
+ "See also",
17
+ "References",
18
+ "Further reading",
19
+ "External links",
20
+ "Notes and references",
21
+ "Bibliography",
22
+ "Notes",
23
+ "Cited sources",
24
+ ];
25
+ block_types_toremove = ["table", "empty_line"];
26
+ constructor(sourceContent: any) {
27
+ this.content = sourceContent;
28
+ }
29
+
30
+ parseData(): {
31
+ source_id: string;
32
+ type: string;
33
+ title: string;
34
+ content: any[];
35
+ headings: string[];
36
+ taxonomy: SourceTaxonomy;
37
+ } {
38
+ let sourceType = this.content.type;
39
+ let afterSanitized;
40
+ if (sourceType == "video") {
41
+ afterSanitized = this.parseVideoContent(this.content.content);
42
+ } else {
43
+ let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(
44
+ this.content.content
45
+ );
46
+ afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
47
+ }
48
+ return {
49
+ source_id: this.content.source_id,
50
+ type: this.content.type,
51
+ title: this.content.title,
52
+ content: afterSanitized,
53
+ headings: this.content.headings,
54
+ taxonomy: this.content.taxonomy,
55
+ };
56
+ }
57
+
58
+ removeSectionsByTitle(data: Array<any>) {
59
+ let dataAfterRemoving = [];
60
+ for (let elem of data) {
61
+ if (
62
+ elem.block_type == "heading" &&
63
+ this.titles_to_remove.includes(elem.content)
64
+ ) {
65
+ continue;
66
+ }
67
+ /// remove unwanted blcok types , for now `table` and `empty_line`
68
+ if (this.block_types_toremove.includes(elem.block_type)) {
69
+ continue;
70
+ }
71
+ if (elem.children) {
72
+ elem.children = this.removeSectionsByTitle(elem.children);
73
+ }
74
+ dataAfterRemoving.push(elem);
75
+ }
76
+ return dataAfterRemoving;
77
+ }
78
+
79
+ sanitizeTextContent(content: String) {
80
+ // Remove newline characters
81
+ content = content.replace(/\\n/g, " ");
82
+
83
+ // Remove internal link references, keeping only the link text
84
+ // Pattern explanation: [[link|text|index|wiki]] --> text
85
+ content = content.replace(/\[\[.*?\|(.*?)\|.*?\|wiki\]\]/g, "$1");
86
+
87
+ // Remove external links, keeping only the link text
88
+ // Pattern explanation: [url text] --> text
89
+ content = content.replace(/\[http[s]?:\/\/[^\s]+ ([^\]]+)\]/g, "$1");
90
+
91
+ // Remove Markdown link references, keeping only the link text
92
+ // Pattern explanation: ![link text](url) --> link text
93
+ content = content.replace(/\!\[([^\]]+)\]\([^\)]+\)/g, "$1");
94
+
95
+ return content;
96
+ }
97
+
98
+ sanitizeBlocks(blocks: Array<any>) {
99
+ let sanitizedBlocks = <any>[];
100
+ blocks = blocks.filter((item) => item.block_type != "table");
101
+ blocks.forEach((block) => {
102
+ let sanitizedBlock: any = {};
103
+ for (let key in block) {
104
+ let value = block[key];
105
+ if (typeof value === "string") {
106
+ sanitizedBlock[key] = this.sanitizeTextContent(value);
107
+ } else if (Array.isArray(value)) {
108
+ sanitizedBlock[key] = this.sanitizeBlocks(value);
109
+ } else {
110
+ sanitizedBlock[key] = value;
111
+ }
112
+ }
113
+ sanitizedBlocks.push(sanitizedBlock);
114
+ });
115
+ return sanitizedBlocks;
116
+ }
117
+
118
+ parseVideoContent(data: Array<any>) {
119
+ let finalChapters: Array<any> = [];
120
+ // let cleanedData = this.cleanTranscript(timeCodes);
121
+ data.forEach((e) => {
122
+ let combinedContent = this.cleanTranscript(e);
123
+ finalChapters.push({
124
+ startTime: e.startTime,
125
+ endTime: e.endTime,
126
+ content: combinedContent,
127
+ title: e.content,
128
+ });
129
+ });
130
+
131
+ return finalChapters;
132
+ }
133
+
134
+ // remove content inside [] which denotes non-speech sounds
135
+ isNonSpeech(content: string) {
136
+ // Check if the content is non-speech (enclosed in square brackets).
137
+ return /^\[.*\]$/.test(content.trim());
138
+ }
139
+
140
+ // remove non-essential content
141
+ cleanTranscript(data: any) {
142
+ let finalContent = "";
143
+ let children = data.children ?? [];
144
+
145
+ children.forEach((e: any) => {
146
+ let content = (e.content || "").trim();
147
+
148
+ if (this.isNonSpeech(content)) return;
149
+
150
+ content = content.replace(/\s+/g, " ");
151
+ finalContent += content;
152
+ });
153
+
154
+ return finalContent;
155
+ }
156
+
157
+ // collapse the timecode to 30 seconds
158
+ collapseTimeCodes(data: Array<any>, maxDuration = 30.0) {
159
+ // Collapse time codes into buckets of approximately maxDuration seconds.
160
+ const collapsedData = [];
161
+ let bucketStartTime: number | null = null;
162
+ let bucketEndTime: number | null = null;
163
+ let bucketContent: Array<any> = [];
164
+ let bucketDuration = 0.0;
165
+
166
+ data.forEach((entry) => {
167
+ const startTime = entry.start_time;
168
+ const endTime = entry.end_time;
169
+ const content = entry.content;
170
+ const entryDuration = endTime - startTime;
171
+
172
+ if (bucketStartTime === null) {
173
+ // Start a new bucket
174
+ bucketStartTime = startTime;
175
+ bucketEndTime = endTime;
176
+ bucketContent.push(content);
177
+ bucketDuration = entryDuration;
178
+ } else if (bucketDuration + entryDuration <= maxDuration) {
179
+ // Add to current bucket
180
+ bucketEndTime = endTime;
181
+ bucketContent.push(content);
182
+ bucketDuration += entryDuration;
183
+ } else {
184
+ // Close current bucket and start a new one
185
+ const collapsedEntry = {
186
+ start_time: bucketStartTime,
187
+ end_time: bucketEndTime,
188
+ content: bucketContent.join(" "),
189
+ };
190
+ collapsedData.push(collapsedEntry);
191
+
192
+ // Start new bucket with current entry
193
+ bucketStartTime = startTime;
194
+ bucketEndTime = endTime;
195
+ bucketContent = [content];
196
+ bucketDuration = entryDuration;
197
+ }
198
+ });
199
+
200
+ // Add the last bucket if it exists
201
+ if (bucketContent.length > 0) {
202
+ const collapsedEntry = {
203
+ start_time: bucketStartTime,
204
+ end_time: bucketEndTime,
205
+ content: bucketContent.join(" "),
206
+ };
207
+ collapsedData.push(collapsedEntry);
208
+ }
209
+
210
+ return collapsedData;
211
+ }
212
+ }