only_ever_generator 8.4.6 → 8.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/dist/bootstrap/app.d.ts +6 -3
  2. package/dist/bootstrap/app.d.ts.map +1 -1
  3. package/dist/bootstrap/app.js +11 -17
  4. package/dist/bootstrap/app.js.map +1 -1
  5. package/dist/card_gen/generate_cards.d.ts +2 -3
  6. package/dist/card_gen/generate_cards.d.ts.map +1 -1
  7. package/dist/card_gen/generate_cards.js +22 -15
  8. package/dist/card_gen/generate_cards.js.map +1 -1
  9. package/dist/constants/prompt_data.d.ts +4 -4
  10. package/dist/constants/prompt_data.js +302 -302
  11. package/dist/constants/prompts/card_gen_prompt.js +160 -160
  12. package/dist/constants/prompts/typology_prompt.js +131 -131
  13. package/dist/constants/source_data.d.ts +171 -171
  14. package/dist/constants/source_data.js +973 -973
  15. package/dist/embedding_generation/local_consolidation.js +104 -104
  16. package/dist/helper/build_concept_facts_schema.d.ts +42 -42
  17. package/dist/helper/build_concept_facts_schema.js +44 -44
  18. package/dist/helper/qdrant_db_methods.d.ts.map +1 -1
  19. package/dist/helper/schema_helper/build_card_schema.d.ts +1 -9
  20. package/dist/helper/schema_helper/build_card_schema.d.ts.map +1 -1
  21. package/dist/helper/schema_helper/build_card_schema.js +47 -50
  22. package/dist/helper/schema_helper/build_card_schema.js.map +1 -1
  23. package/dist/helper/schema_helper/build_concept_facts_schema.d.ts +1 -1
  24. package/dist/helper/schema_helper/build_concept_facts_schema.d.ts.map +1 -1
  25. package/dist/helper/schema_helper/build_concept_facts_schema.js +20 -5
  26. package/dist/helper/schema_helper/build_concept_facts_schema.js.map +1 -1
  27. package/dist/helper/schema_helper/build_summary_schema.d.ts +1 -1
  28. package/dist/helper/schema_helper/build_summary_schema.d.ts.map +1 -1
  29. package/dist/helper/schema_helper/build_summary_schema.js +18 -7
  30. package/dist/helper/schema_helper/build_summary_schema.js.map +1 -1
  31. package/dist/index.d.ts +0 -2
  32. package/dist/index.d.ts.map +1 -1
  33. package/dist/index.js +10 -9
  34. package/dist/index.js.map +1 -1
  35. package/dist/parse/response_format_card.d.ts +176 -176
  36. package/dist/parse/response_format_card.js +371 -371
  37. package/dist/parse/response_format_typology.d.ts +1 -1
  38. package/dist/parse/response_format_typology.js +46 -46
  39. package/dist/services/get_prompts.d.ts +8 -7
  40. package/dist/services/get_prompts.d.ts.map +1 -1
  41. package/dist/services/get_prompts.js +69 -21
  42. package/dist/services/get_prompts.js.map +1 -1
  43. package/dist/typology_gen/generate_concept_facts.d.ts +2 -3
  44. package/dist/typology_gen/generate_concept_facts.d.ts.map +1 -1
  45. package/dist/typology_gen/generate_concept_facts.js +25 -15
  46. package/dist/typology_gen/generate_concept_facts.js.map +1 -1
  47. package/dist/typology_gen/generate_typology.d.ts +2 -1
  48. package/dist/typology_gen/generate_typology.d.ts.map +1 -1
  49. package/dist/typology_gen/generate_typology.js +24 -13
  50. package/dist/typology_gen/generate_typology.js.map +1 -1
  51. package/dist/typology_gen/summarize.d.ts +2 -3
  52. package/dist/typology_gen/summarize.d.ts.map +1 -1
  53. package/dist/typology_gen/summarize.js +24 -13
  54. package/dist/typology_gen/summarize.js.map +1 -1
  55. package/package.json +39 -38
  56. package/src/bootstrap/app.ts +418 -416
  57. package/src/card_gen/generate_cards.ts +347 -345
  58. package/src/config.ts +11 -11
  59. package/src/constants/api_constants.ts +7 -7
  60. package/src/constants/prompts/card_gen_prompt.ts +164 -164
  61. package/src/constants/prompts/typology_prompt.ts +139 -139
  62. package/src/embedding_generation/consolidation/global_consolidation.ts +96 -96
  63. package/src/embedding_generation/consolidation/local_consolidation.ts +141 -141
  64. package/src/embedding_generation/consolidation/write_consolidated_data.ts +98 -98
  65. package/src/embedding_generation/generate_embeddings.ts +42 -42
  66. package/src/embedding_generation/parse_embedding_response.ts +31 -31
  67. package/src/enums/card_type_enum.ts +6 -6
  68. package/src/gap_fill/calculate_gap_fill.ts +50 -50
  69. package/src/helper/get_id_from_title.ts +33 -33
  70. package/src/helper/mongo_helper.ts +29 -29
  71. package/src/helper/openai_helper.ts +20 -20
  72. package/src/helper/qdrant_db_methods.ts +77 -77
  73. package/src/helper/schema_helper/build_card_schema.ts +74 -98
  74. package/src/helper/schema_helper/build_classify_summarize_schema.ts +43 -43
  75. package/src/helper/schema_helper/build_concept_facts_schema.ts +45 -31
  76. package/src/helper/schema_helper/build_summary_schema.ts +43 -32
  77. package/src/index.ts +71 -73
  78. package/src/logger.ts +65 -65
  79. package/src/parse/parse_card/parse_cloze_card.ts +146 -146
  80. package/src/parse/parse_card/parse_flash_cards.ts +42 -42
  81. package/src/parse/parse_card/parse_match_card.ts +104 -104
  82. package/src/parse/parse_card/parse_mcq_card.ts +114 -114
  83. package/src/parse/parse_card_response.ts +197 -197
  84. package/src/parse/parse_source_content.ts +212 -212
  85. package/src/services/get_prompts.ts +164 -112
  86. package/src/services/open_ai_service.ts +89 -89
  87. package/src/services/qdrant_service.ts +10 -10
  88. package/src/types/base_param_type.ts +13 -13
  89. package/src/types/mongo_concept_fact_type.ts +12 -12
  90. package/src/types/parsed_card_type.ts +39 -39
  91. package/src/types/raw_card_response_types/generated_card_response_type.ts +59 -59
  92. package/src/types/source_taxonomy_type.ts +24 -24
  93. package/src/typology-parsed-response.ts +1932 -1932
  94. package/src/typology_gen/generate_concept_facts.ts +180 -169
  95. package/src/typology_gen/generate_typology.ts +203 -189
  96. package/src/typology_gen/summarize.ts +176 -164
  97. package/src/utils/distributed_quote_restoration.ts +80 -80
  98. package/src/utils/generate_args.ts +29 -29
  99. package/src/utils/parse_openai_response.ts +19 -19
  100. package/src/utils/sanitize_strings.ts +65 -65
  101. package/tsconfig.json +16 -16
  102. package/dist/constants/default_generation_variables.d.ts +0 -3
  103. package/dist/constants/default_generation_variables.d.ts.map +0 -1
  104. package/dist/constants/default_generation_variables.js +0 -580
  105. package/dist/constants/default_generation_variables.js.map +0 -1
  106. package/dist/services/prompts_test.d.ts +0 -10
  107. package/dist/services/prompts_test.d.ts.map +0 -1
  108. package/dist/services/prompts_test.js +0 -227
  109. package/dist/services/prompts_test.js.map +0 -1
  110. package/dist/types/generation_variables_schema.d.ts +0 -14
  111. package/dist/types/generation_variables_schema.d.ts.map +0 -1
  112. package/dist/types/generation_variables_schema.js +0 -3
  113. package/dist/types/generation_variables_schema.js.map +0 -1
  114. package/dist/utils/test.d.ts +0 -2
  115. package/dist/utils/test.d.ts.map +0 -1
  116. package/dist/utils/test.js +0 -5
  117. package/dist/utils/test.js.map +0 -1
  118. package/src/constants/default_generation_variables.ts +0 -624
  119. package/src/types/generation_variables_schema.ts +0 -16
@@ -1,212 +1,212 @@
1
- import { SourceTaxonomy } from "../types/source_taxonomy_type";
2
-
3
- export class ParseSourceContent {
4
- public content: any;
5
- /// Format of Content
6
- // content: {
7
- // title: source.title,
8
- // headings: source.headings,
9
- // content: source.content,
10
- // fields: fields,
11
- // taxonomy: source.source_taxonomy,
12
- // type: source.source_type
13
- // },
14
-
15
- titles_to_remove = [
16
- "See also",
17
- "References",
18
- "Further reading",
19
- "External links",
20
- "Notes and references",
21
- "Bibliography",
22
- "Notes",
23
- "Cited sources",
24
- ];
25
- block_types_toremove = ["table", "empty_line"];
26
- constructor(sourceContent: any) {
27
- this.content = sourceContent;
28
- }
29
-
30
- parseData(): {
31
- source_id: string;
32
- type: string;
33
- title: string;
34
- content: any[];
35
- headings: string[];
36
- taxonomy: SourceTaxonomy;
37
- } {
38
- let sourceType = this.content.type;
39
- let afterSanitized;
40
- if (sourceType == "video") {
41
- afterSanitized = this.parseVideoContent(this.content.content);
42
- } else {
43
- let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(
44
- this.content.content
45
- );
46
- afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
47
- }
48
- return {
49
- source_id: this.content.source_id,
50
- type: this.content.type,
51
- title: this.content.title,
52
- content: afterSanitized,
53
- headings: this.content.headings,
54
- taxonomy: this.content.taxonomy,
55
- };
56
- }
57
-
58
- removeSectionsByTitle(data: Array<any>) {
59
- let dataAfterRemoving = [];
60
- for (let elem of data) {
61
- if (
62
- elem.block_type == "heading" &&
63
- this.titles_to_remove.includes(elem.content)
64
- ) {
65
- continue;
66
- }
67
- /// remove unwanted blcok types , for now `table` and `empty_line`
68
- if (this.block_types_toremove.includes(elem.block_type)) {
69
- continue;
70
- }
71
- if (elem.children) {
72
- elem.children = this.removeSectionsByTitle(elem.children);
73
- }
74
- dataAfterRemoving.push(elem);
75
- }
76
- return dataAfterRemoving;
77
- }
78
-
79
- sanitizeTextContent(content: String) {
80
- // Remove newline characters
81
- content = content.replace(/\\n/g, " ");
82
-
83
- // Remove internal link references, keeping only the link text
84
- // Pattern explanation: [[link|text|index|wiki]] --> text
85
- content = content.replace(/\[\[.*?\|(.*?)\|.*?\|wiki\]\]/g, "$1");
86
-
87
- // Remove external links, keeping only the link text
88
- // Pattern explanation: [url text] --> text
89
- content = content.replace(/\[http[s]?:\/\/[^\s]+ ([^\]]+)\]/g, "$1");
90
-
91
- // Remove Markdown link references, keeping only the link text
92
- // Pattern explanation: ![link text](url) --> link text
93
- content = content.replace(/\!\[([^\]]+)\]\([^\)]+\)/g, "$1");
94
-
95
- return content;
96
- }
97
-
98
- sanitizeBlocks(blocks: Array<any>) {
99
- let sanitizedBlocks = <any>[];
100
- blocks = blocks.filter((item) => item.block_type != "table");
101
- blocks.forEach((block) => {
102
- let sanitizedBlock: any = {};
103
- for (let key in block) {
104
- let value = block[key];
105
- if (typeof value === "string") {
106
- sanitizedBlock[key] = this.sanitizeTextContent(value);
107
- } else if (Array.isArray(value)) {
108
- sanitizedBlock[key] = this.sanitizeBlocks(value);
109
- } else {
110
- sanitizedBlock[key] = value;
111
- }
112
- }
113
- sanitizedBlocks.push(sanitizedBlock);
114
- });
115
- return sanitizedBlocks;
116
- }
117
-
118
- parseVideoContent(data: Array<any>) {
119
- let finalChapters: Array<any> = [];
120
- // let cleanedData = this.cleanTranscript(timeCodes);
121
- data.forEach((e) => {
122
- let combinedContent = this.cleanTranscript(e);
123
- finalChapters.push({
124
- startTime: e.startTime,
125
- endTime: e.endTime,
126
- content: combinedContent,
127
- title: e.content,
128
- });
129
- });
130
-
131
- return finalChapters;
132
- }
133
-
134
- // remove content inside [] which denotes non-speech sounds
135
- isNonSpeech(content: string) {
136
- // Check if the content is non-speech (enclosed in square brackets).
137
- return /^\[.*\]$/.test(content.trim());
138
- }
139
-
140
- // remove non-essential content
141
- cleanTranscript(data: any) {
142
- let finalContent = "";
143
- let children = data.children ?? [];
144
-
145
- children.forEach((e: any) => {
146
- let content = (e.content || "").trim();
147
-
148
- if (this.isNonSpeech(content)) return;
149
-
150
- content = content.replace(/\s+/g, " ");
151
- finalContent += content;
152
- });
153
-
154
- return finalContent;
155
- }
156
-
157
- // collapse the timecode to 30 seconds
158
- collapseTimeCodes(data: Array<any>, maxDuration = 30.0) {
159
- // Collapse time codes into buckets of approximately maxDuration seconds.
160
- const collapsedData = [];
161
- let bucketStartTime: number | null = null;
162
- let bucketEndTime: number | null = null;
163
- let bucketContent: Array<any> = [];
164
- let bucketDuration = 0.0;
165
-
166
- data.forEach((entry) => {
167
- const startTime = entry.start_time;
168
- const endTime = entry.end_time;
169
- const content = entry.content;
170
- const entryDuration = endTime - startTime;
171
-
172
- if (bucketStartTime === null) {
173
- // Start a new bucket
174
- bucketStartTime = startTime;
175
- bucketEndTime = endTime;
176
- bucketContent.push(content);
177
- bucketDuration = entryDuration;
178
- } else if (bucketDuration + entryDuration <= maxDuration) {
179
- // Add to current bucket
180
- bucketEndTime = endTime;
181
- bucketContent.push(content);
182
- bucketDuration += entryDuration;
183
- } else {
184
- // Close current bucket and start a new one
185
- const collapsedEntry = {
186
- start_time: bucketStartTime,
187
- end_time: bucketEndTime,
188
- content: bucketContent.join(" "),
189
- };
190
- collapsedData.push(collapsedEntry);
191
-
192
- // Start new bucket with current entry
193
- bucketStartTime = startTime;
194
- bucketEndTime = endTime;
195
- bucketContent = [content];
196
- bucketDuration = entryDuration;
197
- }
198
- });
199
-
200
- // Add the last bucket if it exists
201
- if (bucketContent.length > 0) {
202
- const collapsedEntry = {
203
- start_time: bucketStartTime,
204
- end_time: bucketEndTime,
205
- content: bucketContent.join(" "),
206
- };
207
- collapsedData.push(collapsedEntry);
208
- }
209
-
210
- return collapsedData;
211
- }
212
- }
1
+ import { SourceTaxonomy } from "../types/source_taxonomy_type";
2
+
3
+ export class ParseSourceContent {
4
+ public content: any;
5
+ /// Format of Content
6
+ // content: {
7
+ // title: source.title,
8
+ // headings: source.headings,
9
+ // content: source.content,
10
+ // fields: fields,
11
+ // taxonomy: source.source_taxonomy,
12
+ // type: source.source_type
13
+ // },
14
+
15
+ titles_to_remove = [
16
+ "See also",
17
+ "References",
18
+ "Further reading",
19
+ "External links",
20
+ "Notes and references",
21
+ "Bibliography",
22
+ "Notes",
23
+ "Cited sources",
24
+ ];
25
+ block_types_toremove = ["table", "empty_line"];
26
+ constructor(sourceContent: any) {
27
+ this.content = sourceContent;
28
+ }
29
+
30
+ parseData(): {
31
+ source_id: string;
32
+ type: string;
33
+ title: string;
34
+ content: any[];
35
+ headings: string[];
36
+ taxonomy: SourceTaxonomy;
37
+ } {
38
+ let sourceType = this.content.type;
39
+ let afterSanitized;
40
+ if (sourceType == "video") {
41
+ afterSanitized = this.parseVideoContent(this.content.content);
42
+ } else {
43
+ let dataAfterRemovingUnWantedBlocks = this.removeSectionsByTitle(
44
+ this.content.content
45
+ );
46
+ afterSanitized = this.sanitizeBlocks(dataAfterRemovingUnWantedBlocks);
47
+ }
48
+ return {
49
+ source_id: this.content.source_id,
50
+ type: this.content.type,
51
+ title: this.content.title,
52
+ content: afterSanitized,
53
+ headings: this.content.headings,
54
+ taxonomy: this.content.taxonomy,
55
+ };
56
+ }
57
+
58
+ removeSectionsByTitle(data: Array<any>) {
59
+ let dataAfterRemoving = [];
60
+ for (let elem of data) {
61
+ if (
62
+ elem.block_type == "heading" &&
63
+ this.titles_to_remove.includes(elem.content)
64
+ ) {
65
+ continue;
66
+ }
67
+ /// remove unwanted blcok types , for now `table` and `empty_line`
68
+ if (this.block_types_toremove.includes(elem.block_type)) {
69
+ continue;
70
+ }
71
+ if (elem.children) {
72
+ elem.children = this.removeSectionsByTitle(elem.children);
73
+ }
74
+ dataAfterRemoving.push(elem);
75
+ }
76
+ return dataAfterRemoving;
77
+ }
78
+
79
+ sanitizeTextContent(content: String) {
80
+ // Remove newline characters
81
+ content = content.replace(/\\n/g, " ");
82
+
83
+ // Remove internal link references, keeping only the link text
84
+ // Pattern explanation: [[link|text|index|wiki]] --> text
85
+ content = content.replace(/\[\[.*?\|(.*?)\|.*?\|wiki\]\]/g, "$1");
86
+
87
+ // Remove external links, keeping only the link text
88
+ // Pattern explanation: [url text] --> text
89
+ content = content.replace(/\[http[s]?:\/\/[^\s]+ ([^\]]+)\]/g, "$1");
90
+
91
+ // Remove Markdown link references, keeping only the link text
92
+ // Pattern explanation: ![link text](url) --> link text
93
+ content = content.replace(/\!\[([^\]]+)\]\([^\)]+\)/g, "$1");
94
+
95
+ return content;
96
+ }
97
+
98
+ sanitizeBlocks(blocks: Array<any>) {
99
+ let sanitizedBlocks = <any>[];
100
+ blocks = blocks.filter((item) => item.block_type != "table");
101
+ blocks.forEach((block) => {
102
+ let sanitizedBlock: any = {};
103
+ for (let key in block) {
104
+ let value = block[key];
105
+ if (typeof value === "string") {
106
+ sanitizedBlock[key] = this.sanitizeTextContent(value);
107
+ } else if (Array.isArray(value)) {
108
+ sanitizedBlock[key] = this.sanitizeBlocks(value);
109
+ } else {
110
+ sanitizedBlock[key] = value;
111
+ }
112
+ }
113
+ sanitizedBlocks.push(sanitizedBlock);
114
+ });
115
+ return sanitizedBlocks;
116
+ }
117
+
118
+ parseVideoContent(data: Array<any>) {
119
+ let finalChapters: Array<any> = [];
120
+ // let cleanedData = this.cleanTranscript(timeCodes);
121
+ data.forEach((e) => {
122
+ let combinedContent = this.cleanTranscript(e);
123
+ finalChapters.push({
124
+ startTime: e.startTime,
125
+ endTime: e.endTime,
126
+ content: combinedContent,
127
+ title: e.content,
128
+ });
129
+ });
130
+
131
+ return finalChapters;
132
+ }
133
+
134
+ // remove content inside [] which denotes non-speech sounds
135
+ isNonSpeech(content: string) {
136
+ // Check if the content is non-speech (enclosed in square brackets).
137
+ return /^\[.*\]$/.test(content.trim());
138
+ }
139
+
140
+ // remove non-essential content
141
+ cleanTranscript(data: any) {
142
+ let finalContent = "";
143
+ let children = data.children ?? [];
144
+
145
+ children.forEach((e: any) => {
146
+ let content = (e.content || "").trim();
147
+
148
+ if (this.isNonSpeech(content)) return;
149
+
150
+ content = content.replace(/\s+/g, " ");
151
+ finalContent += content;
152
+ });
153
+
154
+ return finalContent;
155
+ }
156
+
157
+ // collapse the timecode to 30 seconds
158
+ collapseTimeCodes(data: Array<any>, maxDuration = 30.0) {
159
+ // Collapse time codes into buckets of approximately maxDuration seconds.
160
+ const collapsedData = [];
161
+ let bucketStartTime: number | null = null;
162
+ let bucketEndTime: number | null = null;
163
+ let bucketContent: Array<any> = [];
164
+ let bucketDuration = 0.0;
165
+
166
+ data.forEach((entry) => {
167
+ const startTime = entry.start_time;
168
+ const endTime = entry.end_time;
169
+ const content = entry.content;
170
+ const entryDuration = endTime - startTime;
171
+
172
+ if (bucketStartTime === null) {
173
+ // Start a new bucket
174
+ bucketStartTime = startTime;
175
+ bucketEndTime = endTime;
176
+ bucketContent.push(content);
177
+ bucketDuration = entryDuration;
178
+ } else if (bucketDuration + entryDuration <= maxDuration) {
179
+ // Add to current bucket
180
+ bucketEndTime = endTime;
181
+ bucketContent.push(content);
182
+ bucketDuration += entryDuration;
183
+ } else {
184
+ // Close current bucket and start a new one
185
+ const collapsedEntry = {
186
+ start_time: bucketStartTime,
187
+ end_time: bucketEndTime,
188
+ content: bucketContent.join(" "),
189
+ };
190
+ collapsedData.push(collapsedEntry);
191
+
192
+ // Start new bucket with current entry
193
+ bucketStartTime = startTime;
194
+ bucketEndTime = endTime;
195
+ bucketContent = [content];
196
+ bucketDuration = entryDuration;
197
+ }
198
+ });
199
+
200
+ // Add the last bucket if it exists
201
+ if (bucketContent.length > 0) {
202
+ const collapsedEntry = {
203
+ start_time: bucketStartTime,
204
+ end_time: bucketEndTime,
205
+ content: bucketContent.join(" "),
206
+ };
207
+ collapsedData.push(collapsedEntry);
208
+ }
209
+
210
+ return collapsedData;
211
+ }
212
+ }