only_ever_generator 8.4.5 → 8.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. package/dist/bootstrap/app.d.ts +6 -3
  2. package/dist/bootstrap/app.d.ts.map +1 -1
  3. package/dist/bootstrap/app.js +11 -17
  4. package/dist/bootstrap/app.js.map +1 -1
  5. package/dist/card_gen/generate_cards.d.ts +2 -3
  6. package/dist/card_gen/generate_cards.d.ts.map +1 -1
  7. package/dist/card_gen/generate_cards.js +22 -15
  8. package/dist/card_gen/generate_cards.js.map +1 -1
  9. package/dist/constants/prompt_data.d.ts +4 -4
  10. package/dist/constants/prompt_data.js +302 -302
  11. package/dist/constants/prompts/card_gen_prompt.js +160 -160
  12. package/dist/constants/prompts/typology_prompt.js +131 -131
  13. package/dist/constants/source_data.d.ts +171 -171
  14. package/dist/constants/source_data.js +973 -973
  15. package/dist/embedding_generation/local_consolidation.js +104 -104
  16. package/dist/helper/build_concept_facts_schema.d.ts +42 -42
  17. package/dist/helper/build_concept_facts_schema.js +44 -44
  18. package/dist/helper/qdrant_db_methods.d.ts.map +1 -1
  19. package/dist/helper/schema_helper/build_card_schema.d.ts +1 -9
  20. package/dist/helper/schema_helper/build_card_schema.d.ts.map +1 -1
  21. package/dist/helper/schema_helper/build_card_schema.js +47 -50
  22. package/dist/helper/schema_helper/build_card_schema.js.map +1 -1
  23. package/dist/helper/schema_helper/build_concept_facts_schema.d.ts +1 -1
  24. package/dist/helper/schema_helper/build_concept_facts_schema.d.ts.map +1 -1
  25. package/dist/helper/schema_helper/build_concept_facts_schema.js +20 -5
  26. package/dist/helper/schema_helper/build_concept_facts_schema.js.map +1 -1
  27. package/dist/helper/schema_helper/build_summary_schema.d.ts +1 -1
  28. package/dist/helper/schema_helper/build_summary_schema.d.ts.map +1 -1
  29. package/dist/helper/schema_helper/build_summary_schema.js +18 -7
  30. package/dist/helper/schema_helper/build_summary_schema.js.map +1 -1
  31. package/dist/index.js +6 -8
  32. package/dist/index.js.map +1 -1
  33. package/dist/parse/response_format_card.d.ts +176 -176
  34. package/dist/parse/response_format_card.js +371 -371
  35. package/dist/parse/response_format_typology.d.ts +1 -1
  36. package/dist/parse/response_format_typology.js +46 -46
  37. package/dist/services/get_prompts.d.ts +8 -7
  38. package/dist/services/get_prompts.d.ts.map +1 -1
  39. package/dist/services/get_prompts.js +69 -21
  40. package/dist/services/get_prompts.js.map +1 -1
  41. package/dist/typology_gen/generate_concept_facts.d.ts +2 -3
  42. package/dist/typology_gen/generate_concept_facts.d.ts.map +1 -1
  43. package/dist/typology_gen/generate_concept_facts.js +25 -15
  44. package/dist/typology_gen/generate_concept_facts.js.map +1 -1
  45. package/dist/typology_gen/generate_typology.d.ts +2 -1
  46. package/dist/typology_gen/generate_typology.d.ts.map +1 -1
  47. package/dist/typology_gen/generate_typology.js +24 -13
  48. package/dist/typology_gen/generate_typology.js.map +1 -1
  49. package/dist/typology_gen/summarize.d.ts +2 -3
  50. package/dist/typology_gen/summarize.d.ts.map +1 -1
  51. package/dist/typology_gen/summarize.js +24 -13
  52. package/dist/typology_gen/summarize.js.map +1 -1
  53. package/package.json +39 -38
  54. package/src/bootstrap/app.ts +418 -416
  55. package/src/card_gen/generate_cards.ts +347 -345
  56. package/src/config.ts +11 -11
  57. package/src/constants/api_constants.ts +7 -7
  58. package/src/constants/prompts/card_gen_prompt.ts +164 -164
  59. package/src/constants/prompts/typology_prompt.ts +139 -139
  60. package/src/embedding_generation/consolidation/global_consolidation.ts +96 -96
  61. package/src/embedding_generation/consolidation/local_consolidation.ts +141 -141
  62. package/src/embedding_generation/consolidation/write_consolidated_data.ts +98 -98
  63. package/src/embedding_generation/generate_embeddings.ts +42 -42
  64. package/src/embedding_generation/parse_embedding_response.ts +31 -31
  65. package/src/enums/card_type_enum.ts +6 -6
  66. package/src/gap_fill/calculate_gap_fill.ts +50 -50
  67. package/src/helper/get_id_from_title.ts +33 -33
  68. package/src/helper/mongo_helper.ts +29 -29
  69. package/src/helper/openai_helper.ts +20 -20
  70. package/src/helper/qdrant_db_methods.ts +77 -77
  71. package/src/helper/schema_helper/build_card_schema.ts +74 -98
  72. package/src/helper/schema_helper/build_classify_summarize_schema.ts +43 -43
  73. package/src/helper/schema_helper/build_concept_facts_schema.ts +45 -31
  74. package/src/helper/schema_helper/build_summary_schema.ts +43 -32
  75. package/src/index.ts +71 -73
  76. package/src/logger.ts +65 -65
  77. package/src/parse/parse_card/parse_cloze_card.ts +146 -146
  78. package/src/parse/parse_card/parse_flash_cards.ts +42 -42
  79. package/src/parse/parse_card/parse_match_card.ts +104 -104
  80. package/src/parse/parse_card/parse_mcq_card.ts +114 -114
  81. package/src/parse/parse_card_response.ts +197 -197
  82. package/src/parse/parse_source_content.ts +212 -212
  83. package/src/services/get_prompts.ts +164 -112
  84. package/src/services/open_ai_service.ts +89 -89
  85. package/src/services/qdrant_service.ts +10 -10
  86. package/src/types/base_param_type.ts +13 -13
  87. package/src/types/mongo_concept_fact_type.ts +12 -12
  88. package/src/types/parsed_card_type.ts +39 -39
  89. package/src/types/raw_card_response_types/generated_card_response_type.ts +59 -59
  90. package/src/types/source_taxonomy_type.ts +24 -24
  91. package/src/typology-parsed-response.ts +1932 -1932
  92. package/src/typology_gen/generate_concept_facts.ts +180 -169
  93. package/src/typology_gen/generate_typology.ts +203 -189
  94. package/src/typology_gen/summarize.ts +176 -164
  95. package/src/utils/distributed_quote_restoration.ts +80 -80
  96. package/src/utils/generate_args.ts +29 -29
  97. package/src/utils/parse_openai_response.ts +19 -19
  98. package/src/utils/sanitize_strings.ts +65 -65
  99. package/tsconfig.json +16 -16
  100. package/dist/constants/default_generation_variables.d.ts +0 -3
  101. package/dist/constants/default_generation_variables.d.ts.map +0 -1
  102. package/dist/constants/default_generation_variables.js +0 -580
  103. package/dist/constants/default_generation_variables.js.map +0 -1
  104. package/dist/services/prompts_test.d.ts +0 -10
  105. package/dist/services/prompts_test.d.ts.map +0 -1
  106. package/dist/services/prompts_test.js +0 -227
  107. package/dist/services/prompts_test.js.map +0 -1
  108. package/dist/types/generation_variables_schema.d.ts +0 -14
  109. package/dist/types/generation_variables_schema.d.ts.map +0 -1
  110. package/dist/types/generation_variables_schema.js +0 -3
  111. package/dist/types/generation_variables_schema.js.map +0 -1
  112. package/dist/utils/test.d.ts +0 -2
  113. package/dist/utils/test.d.ts.map +0 -1
  114. package/dist/utils/test.js +0 -5
  115. package/dist/utils/test.js.map +0 -1
  116. package/src/constants/default_generation_variables.ts +0 -624
  117. package/src/types/generation_variables_schema.ts +0 -16
@@ -1,212 +1,212 @@
1
- import { SourceTaxonomy } from "../types/source_taxonomy_type";
2
-
3
- export class ParseSourceContent {
4
- public content: any;
5
- /// Format of Content
6
- // content: {
7
- // title: source.title,
8
- // headings: source.headings,
9
- // content: source.content,
10
- // fields: fields,
11
- // taxonomy: source.source_taxonomy,
12
- // type: source.source_type
13
- // },
14
-
15
- titles_to_remove = [
16
- "See also",
17
- "References",
18
- "Further reading",
19
- "External links",
20
- "Notes and references",
21
- "Bibliography",
22
- "Notes",
23
- "Cited sources",
24
- ];
25
- block_types_toremove = ["table", "empty_line"];
26
- constructor(sourceContent: any) {
27
- this.content = sourceContent;
28
- }
29
-
30
- parseData(): {
31
- source_id: string;
32
- type: string;
33
- title: string;
34
- content: any[];
35
- headings: string[];
36
- taxonomy: SourceTaxonomy;
37
- } {
38
- let sourceType = this.content.type;
39
- let afterSanitized;
40
- if (sourceType == "video") {
41
- afterSanitized = this.parseVideoContent(this.content.content);
42
- } else {
43
- let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(
44
- this.content.content
45
- );
46
- afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
47
- }
48
- return {
49
- source_id: this.content.source_id,
50
- type: this.content.type,
51
- title: this.content.title,
52
- content: afterSanitized,
53
- headings: this.content.headings,
54
- taxonomy: this.content.taxonomy,
55
- };
56
- }
57
-
58
- removeSectionsByTitle(data: Array<any>) {
59
- let dataAfterRemoving = [];
60
- for (let elem of data) {
61
- if (
62
- elem.block_type == "heading" &&
63
- this.titles_to_remove.includes(elem.content)
64
- ) {
65
- continue;
66
- }
67
- /// remove unwanted blcok types , for now `table` and `empty_line`
68
- if (this.block_types_toremove.includes(elem.block_type)) {
69
- continue;
70
- }
71
- if (elem.children) {
72
- elem.children = this.removeSectionsByTitle(elem.children);
73
- }
74
- dataAfterRemoving.push(elem);
75
- }
76
- return dataAfterRemoving;
77
- }
78
-
79
- sanitizeTextContent(content: String) {
80
- // Remove newline characters
81
- content = content.replace(/\\n/g, " ");
82
-
83
- // Remove internal link references, keeping only the link text
84
- // Pattern explanation: [[link|text|index|wiki]] --> text
85
- content = content.replace(/\[\[.*?\|(.*?)\|.*?\|wiki\]\]/g, "$1");
86
-
87
- // Remove external links, keeping only the link text
88
- // Pattern explanation: [url text] --> text
89
- content = content.replace(/\[http[s]?:\/\/[^\s]+ ([^\]]+)\]/g, "$1");
90
-
91
- // Remove Markdown link references, keeping only the link text
92
- // Pattern explanation: ![link text](url) --> link text
93
- content = content.replace(/\!\[([^\]]+)\]\([^\)]+\)/g, "$1");
94
-
95
- return content;
96
- }
97
-
98
- sanitizeBlocks(blocks: Array<any>) {
99
- let sanitizedBlocks = <any>[];
100
- blocks = blocks.filter((item) => item.block_type != "table");
101
- blocks.forEach((block) => {
102
- let sanitizedBlock: any = {};
103
- for (let key in block) {
104
- let value = block[key];
105
- if (typeof value === "string") {
106
- sanitizedBlock[key] = this.sanitizeTextContent(value);
107
- } else if (Array.isArray(value)) {
108
- sanitizedBlock[key] = this.sanitizeBlocks(value);
109
- } else {
110
- sanitizedBlock[key] = value;
111
- }
112
- }
113
- sanitizedBlocks.push(sanitizedBlock);
114
- });
115
- return sanitizedBlocks;
116
- }
117
-
118
- parseVideoContent(data: Array<any>) {
119
- let finalChapters: Array<any> = [];
120
- // let cleanedData = this.cleanTranscript(timeCodes);
121
- data.forEach((e) => {
122
- let combinedContent = this.cleanTranscript(e);
123
- finalChapters.push({
124
- startTime: e.startTime,
125
- endTime: e.endTime,
126
- content: combinedContent,
127
- title: e.content,
128
- });
129
- });
130
-
131
- return finalChapters;
132
- }
133
-
134
- // remove content inside [] which denotes non-speech sounds
135
- isNonSpeech(content: string) {
136
- // Check if the content is non-speech (enclosed in square brackets).
137
- return /^\[.*\]$/.test(content.trim());
138
- }
139
-
140
- // remove non-essential content
141
- cleanTranscript(data: any) {
142
- let finalContent = "";
143
- let children = data.children ?? [];
144
-
145
- children.forEach((e: any) => {
146
- let content = (e.content || "").trim();
147
-
148
- if (this.isNonSpeech(content)) return;
149
-
150
- content = content.replace(/\s+/g, " ");
151
- finalContent += content;
152
- });
153
-
154
- return finalContent;
155
- }
156
-
157
- // collapse the timecode to 30 seconds
158
- collapseTimeCodes(data: Array<any>, maxDuration = 30.0) {
159
- // Collapse time codes into buckets of approximately maxDuration seconds.
160
- const collapsedData = [];
161
- let bucketStartTime: number | null = null;
162
- let bucketEndTime: number | null = null;
163
- let bucketContent: Array<any> = [];
164
- let bucketDuration = 0.0;
165
-
166
- data.forEach((entry) => {
167
- const startTime = entry.start_time;
168
- const endTime = entry.end_time;
169
- const content = entry.content;
170
- const entryDuration = endTime - startTime;
171
-
172
- if (bucketStartTime === null) {
173
- // Start a new bucket
174
- bucketStartTime = startTime;
175
- bucketEndTime = endTime;
176
- bucketContent.push(content);
177
- bucketDuration = entryDuration;
178
- } else if (bucketDuration + entryDuration <= maxDuration) {
179
- // Add to current bucket
180
- bucketEndTime = endTime;
181
- bucketContent.push(content);
182
- bucketDuration += entryDuration;
183
- } else {
184
- // Close current bucket and start a new one
185
- const collapsedEntry = {
186
- start_time: bucketStartTime,
187
- end_time: bucketEndTime,
188
- content: bucketContent.join(" "),
189
- };
190
- collapsedData.push(collapsedEntry);
191
-
192
- // Start new bucket with current entry
193
- bucketStartTime = startTime;
194
- bucketEndTime = endTime;
195
- bucketContent = [content];
196
- bucketDuration = entryDuration;
197
- }
198
- });
199
-
200
- // Add the last bucket if it exists
201
- if (bucketContent.length > 0) {
202
- const collapsedEntry = {
203
- start_time: bucketStartTime,
204
- end_time: bucketEndTime,
205
- content: bucketContent.join(" "),
206
- };
207
- collapsedData.push(collapsedEntry);
208
- }
209
-
210
- return collapsedData;
211
- }
212
- }
1
+ import { SourceTaxonomy } from "../types/source_taxonomy_type";
2
+
3
+ export class ParseSourceContent {
4
+ public content: any;
5
+ /// Format of Content
6
+ // content: {
7
+ // title: source.title,
8
+ // headings: source.headings,
9
+ // content: source.content,
10
+ // fields: fields,
11
+ // taxonomy: source.source_taxonomy,
12
+ // type: source.source_type
13
+ // },
14
+
15
+ titles_to_remove = [
16
+ "See also",
17
+ "References",
18
+ "Further reading",
19
+ "External links",
20
+ "Notes and references",
21
+ "Bibliography",
22
+ "Notes",
23
+ "Cited sources",
24
+ ];
25
+ block_types_toremove = ["table", "empty_line"];
26
+ constructor(sourceContent: any) {
27
+ this.content = sourceContent;
28
+ }
29
+
30
+ parseData(): {
31
+ source_id: string;
32
+ type: string;
33
+ title: string;
34
+ content: any[];
35
+ headings: string[];
36
+ taxonomy: SourceTaxonomy;
37
+ } {
38
+ let sourceType = this.content.type;
39
+ let afterSanitized;
40
+ if (sourceType == "video") {
41
+ afterSanitized = this.parseVideoContent(this.content.content);
42
+ } else {
43
+ let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(
44
+ this.content.content
45
+ );
46
+ afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
47
+ }
48
+ return {
49
+ source_id: this.content.source_id,
50
+ type: this.content.type,
51
+ title: this.content.title,
52
+ content: afterSanitized,
53
+ headings: this.content.headings,
54
+ taxonomy: this.content.taxonomy,
55
+ };
56
+ }
57
+
58
+ removeSectionsByTitle(data: Array<any>) {
59
+ let dataAfterRemoving = [];
60
+ for (let elem of data) {
61
+ if (
62
+ elem.block_type == "heading" &&
63
+ this.titles_to_remove.includes(elem.content)
64
+ ) {
65
+ continue;
66
+ }
67
+ /// remove unwanted blcok types , for now `table` and `empty_line`
68
+ if (this.block_types_toremove.includes(elem.block_type)) {
69
+ continue;
70
+ }
71
+ if (elem.children) {
72
+ elem.children = this.removeSectionsByTitle(elem.children);
73
+ }
74
+ dataAfterRemoving.push(elem);
75
+ }
76
+ return dataAfterRemoving;
77
+ }
78
+
79
+ sanitizeTextContent(content: String) {
80
+ // Remove newline characters
81
+ content = content.replace(/\\n/g, " ");
82
+
83
+ // Remove internal link references, keeping only the link text
84
+ // Pattern explanation: [[link|text|index|wiki]] --> text
85
+ content = content.replace(/\[\[.*?\|(.*?)\|.*?\|wiki\]\]/g, "$1");
86
+
87
+ // Remove external links, keeping only the link text
88
+ // Pattern explanation: [url text] --> text
89
+ content = content.replace(/\[http[s]?:\/\/[^\s]+ ([^\]]+)\]/g, "$1");
90
+
91
+ // Remove Markdown link references, keeping only the link text
92
+ // Pattern explanation: ![link text](url) --> link text
93
+ content = content.replace(/\!\[([^\]]+)\]\([^\)]+\)/g, "$1");
94
+
95
+ return content;
96
+ }
97
+
98
+ sanitizeBlocks(blocks: Array<any>) {
99
+ let sanitizedBlocks = <any>[];
100
+ blocks = blocks.filter((item) => item.block_type != "table");
101
+ blocks.forEach((block) => {
102
+ let sanitizedBlock: any = {};
103
+ for (let key in block) {
104
+ let value = block[key];
105
+ if (typeof value === "string") {
106
+ sanitizedBlock[key] = this.sanitizeTextContent(value);
107
+ } else if (Array.isArray(value)) {
108
+ sanitizedBlock[key] = this.sanitizeBlocks(value);
109
+ } else {
110
+ sanitizedBlock[key] = value;
111
+ }
112
+ }
113
+ sanitizedBlocks.push(sanitizedBlock);
114
+ });
115
+ return sanitizedBlocks;
116
+ }
117
+
118
+ parseVideoContent(data: Array<any>) {
119
+ let finalChapters: Array<any> = [];
120
+ // let cleanedData = this.cleanTranscript(timeCodes);
121
+ data.forEach((e) => {
122
+ let combinedContent = this.cleanTranscript(e);
123
+ finalChapters.push({
124
+ startTime: e.startTime,
125
+ endTime: e.endTime,
126
+ content: combinedContent,
127
+ title: e.content,
128
+ });
129
+ });
130
+
131
+ return finalChapters;
132
+ }
133
+
134
+ // remove content inside [] which denotes non-speech sounds
135
+ isNonSpeech(content: string) {
136
+ // Check if the content is non-speech (enclosed in square brackets).
137
+ return /^\[.*\]$/.test(content.trim());
138
+ }
139
+
140
+ // remove non-essential content
141
+ cleanTranscript(data: any) {
142
+ let finalContent = "";
143
+ let children = data.children ?? [];
144
+
145
+ children.forEach((e: any) => {
146
+ let content = (e.content || "").trim();
147
+
148
+ if (this.isNonSpeech(content)) return;
149
+
150
+ content = content.replace(/\s+/g, " ");
151
+ finalContent += content;
152
+ });
153
+
154
+ return finalContent;
155
+ }
156
+
157
+ // collapse the timecode to 30 seconds
158
+ collapseTimeCodes(data: Array<any>, maxDuration = 30.0) {
159
+ // Collapse time codes into buckets of approximately maxDuration seconds.
160
+ const collapsedData = [];
161
+ let bucketStartTime: number | null = null;
162
+ let bucketEndTime: number | null = null;
163
+ let bucketContent: Array<any> = [];
164
+ let bucketDuration = 0.0;
165
+
166
+ data.forEach((entry) => {
167
+ const startTime = entry.start_time;
168
+ const endTime = entry.end_time;
169
+ const content = entry.content;
170
+ const entryDuration = endTime - startTime;
171
+
172
+ if (bucketStartTime === null) {
173
+ // Start a new bucket
174
+ bucketStartTime = startTime;
175
+ bucketEndTime = endTime;
176
+ bucketContent.push(content);
177
+ bucketDuration = entryDuration;
178
+ } else if (bucketDuration + entryDuration <= maxDuration) {
179
+ // Add to current bucket
180
+ bucketEndTime = endTime;
181
+ bucketContent.push(content);
182
+ bucketDuration += entryDuration;
183
+ } else {
184
+ // Close current bucket and start a new one
185
+ const collapsedEntry = {
186
+ start_time: bucketStartTime,
187
+ end_time: bucketEndTime,
188
+ content: bucketContent.join(" "),
189
+ };
190
+ collapsedData.push(collapsedEntry);
191
+
192
+ // Start new bucket with current entry
193
+ bucketStartTime = startTime;
194
+ bucketEndTime = endTime;
195
+ bucketContent = [content];
196
+ bucketDuration = entryDuration;
197
+ }
198
+ });
199
+
200
+ // Add the last bucket if it exists
201
+ if (bucketContent.length > 0) {
202
+ const collapsedEntry = {
203
+ start_time: bucketStartTime,
204
+ end_time: bucketEndTime,
205
+ content: bucketContent.join(" "),
206
+ };
207
+ collapsedData.push(collapsedEntry);
208
+ }
209
+
210
+ return collapsedData;
211
+ }
212
+ }