@digimakers/core 0.2.1 → 0.3.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/code-formatter.d.ts +2 -0
- package/dist/agents/code-formatter.d.ts.map +1 -0
- package/dist/agents/code-formatter.js +23 -0
- package/dist/agents/code-formatter.js.map +1 -0
- package/dist/docling-cleaner/cleaner.py +34 -0
- package/dist/docling-cleaner/pyproject.toml +10 -0
- package/dist/docling-cleaner/uv.lock +2077 -0
- package/dist/index.d.ts +0 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +0 -4
- package/dist/index.js.map +1 -1
- package/dist/parsing/docling-parser.d.ts +17 -0
- package/dist/parsing/docling-parser.d.ts.map +1 -0
- package/dist/parsing/docling-parser.js +109 -0
- package/dist/parsing/docling-parser.js.map +1 -0
- package/dist/parsing/docling-runners.d.ts +2 -0
- package/dist/parsing/docling-runners.d.ts.map +1 -0
- package/dist/parsing/docling-runners.js +85 -0
- package/dist/parsing/docling-runners.js.map +1 -0
- package/dist/parsing/docx-parser.d.ts +2 -2
- package/dist/parsing/docx-parser.d.ts.map +1 -1
- package/dist/parsing/docx-parser.js +118 -45
- package/dist/parsing/docx-parser.js.map +1 -1
- package/dist/parsing/index.d.ts +3 -0
- package/dist/parsing/index.d.ts.map +1 -1
- package/dist/parsing/index.js +2 -0
- package/dist/parsing/index.js.map +1 -1
- package/dist/parsing/normalise.d.ts +1 -0
- package/dist/parsing/normalise.d.ts.map +1 -1
- package/dist/parsing/normalise.js +41 -1
- package/dist/parsing/normalise.js.map +1 -1
- package/dist/parsing/post-processors.d.ts +6 -0
- package/dist/parsing/post-processors.d.ts.map +1 -0
- package/dist/parsing/post-processors.js +217 -0
- package/dist/parsing/post-processors.js.map +1 -0
- package/dist/parsing/prompts.d.ts +5 -0
- package/dist/parsing/prompts.d.ts.map +1 -0
- package/dist/parsing/prompts.js +24 -0
- package/dist/parsing/prompts.js.map +1 -0
- package/dist/pdf-generator.d.ts +3 -3
- package/dist/pdf-generator.d.ts.map +1 -1
- package/dist/pdf-generator.js +1 -1
- package/dist/pdf-generator.js.map +1 -1
- package/dist/sample-data.d.ts +2 -2
- package/dist/sample-data.d.ts.map +1 -1
- package/dist/sample-data.js +8 -7
- package/dist/sample-data.js.map +1 -1
- package/dist/schemas/index.d.ts +1 -1
- package/dist/schemas/index.d.ts.map +1 -1
- package/dist/schemas/index.js +1 -1
- package/dist/schemas/index.js.map +1 -1
- package/dist/schemas/lesson.d.ts +385 -39
- package/dist/schemas/lesson.d.ts.map +1 -1
- package/dist/schemas/lesson.js +96 -34
- package/dist/schemas/lesson.js.map +1 -1
- package/package.json +5 -3
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
export { StepWithImageSchema, StepsWithCodeBlockSchema, ChallengeSchema, NewProjectSchema, ParsedLessonSchema, GenerateOptionsSchema, type StepWithImage, type StepsWithCodeBlock, type MultipleStepsWithCodeBlock, type Challenge, type NewProject, type ParsedLesson, type GenerateOptions, } from './schemas/index.js';
|
|
2
1
|
export { createPdfGenerator, convertWithConcurrency, POOL_SIZE } from './pdf-generator.js';
|
|
3
2
|
export type { PdfGeneratorInstance, FileToConvert, ConversionResult } from './pdf-generator.js';
|
|
4
3
|
export { startServer, stopServer } from './server.js';
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAC3F,YAAY,EAAE,oBAAoB,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAGhG,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACtD,YAAY,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAGlD,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAC9D,YAAY,EAAE,gBAAgB,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAGxF,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,8 +1,4 @@
|
|
|
1
1
|
// Public API for @digimakers/core
|
|
2
|
-
// Schemas and types (single source of truth)
|
|
3
|
-
export {
|
|
4
|
-
// Schemas
|
|
5
|
-
StepWithImageSchema, StepsWithCodeBlockSchema, ChallengeSchema, NewProjectSchema, ParsedLessonSchema, GenerateOptionsSchema, } from './schemas/index.js';
|
|
6
2
|
// PDF generation
|
|
7
3
|
export { createPdfGenerator, convertWithConcurrency, POOL_SIZE } from './pdf-generator.js';
|
|
8
4
|
// Server
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,kCAAkC;AAElC,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,kCAAkC;AAElC,iBAAiB;AACjB,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAG3F,SAAS;AACT,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGtD,SAAS;AACT,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,UAAU;AACV,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAG9D,4BAA4B;AAC5B,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { ImageSlot } from '../schemas/lesson.js';
|
|
2
|
+
export interface ParsedSection {
|
|
3
|
+
content: string;
|
|
4
|
+
imageSlots: ImageSlot[];
|
|
5
|
+
}
|
|
6
|
+
export interface DoclingParsedSections {
|
|
7
|
+
preface: ParsedSection;
|
|
8
|
+
getReady: ParsedSection;
|
|
9
|
+
addYourCode: ParsedSection;
|
|
10
|
+
tryItOut: ParsedSection;
|
|
11
|
+
challenge: ParsedSection;
|
|
12
|
+
testYourself: ParsedSection;
|
|
13
|
+
funFact: ParsedSection;
|
|
14
|
+
}
|
|
15
|
+
export declare function parseDoclingMarkdown(markdown: string): DoclingParsedSections;
|
|
16
|
+
export declare function assignImagesToSlots(sections: DoclingParsedSections, images: string[]): DoclingParsedSections;
|
|
17
|
+
//# sourceMappingURL=docling-parser.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"docling-parser.d.ts","sourceRoot":"","sources":["../../src/parsing/docling-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAajD,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,SAAS,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,aAAa,CAAC;IACvB,QAAQ,EAAE,aAAa,CAAC;IACxB,WAAW,EAAE,aAAa,CAAC;IAC3B,QAAQ,EAAE,aAAa,CAAC;IACxB,SAAS,EAAE,aAAa,CAAC;IACzB,YAAY,EAAE,aAAa,CAAC;IAC5B,OAAO,EAAE,aAAa,CAAC;CACxB;AAiDD,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,MAAM,GAAG,qBAAqB,CA8D5E;AAED,wBAAgB,mBAAmB,CACjC,QAAQ,EAAE,qBAAqB,EAC/B,MAAM,EAAE,MAAM,EAAE,GACf,qBAAqB,CAiBvB"}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
const IMAGE_MARKER = '<!-- image -->';
|
|
2
|
+
const SECTION_HEADERS = {
|
|
3
|
+
getReady: /^##\s*Get\s*Ready/i,
|
|
4
|
+
addYourCode: /^##\s*(Add\s*Your\s*Code|My\s*First\s*Program)/i,
|
|
5
|
+
tryItOut: /^##\s*Try\s*It\s*Out/i,
|
|
6
|
+
challenge: /^##\s*Challenge/i,
|
|
7
|
+
testYourself: /^##\s*Test\s*Yourself/i,
|
|
8
|
+
funFact: /^##\s*Fun\s*Fact/i,
|
|
9
|
+
};
|
|
10
|
+
function countImageMarkers(content) {
|
|
11
|
+
return (content.match(new RegExp(IMAGE_MARKER, 'g')) || []).length;
|
|
12
|
+
}
|
|
13
|
+
function createImageSlots(count, prefix) {
|
|
14
|
+
return Array.from({ length: count }, (_, i) => ({
|
|
15
|
+
id: `${prefix}_img_${i + 1}`,
|
|
16
|
+
}));
|
|
17
|
+
}
|
|
18
|
+
function splitAtHeader(markdown, headerPattern) {
|
|
19
|
+
const lines = markdown.split('\n');
|
|
20
|
+
for (let i = 0; i < lines.length; i++) {
|
|
21
|
+
if (headerPattern.test(lines[i].trim())) {
|
|
22
|
+
return {
|
|
23
|
+
before: lines.slice(0, i).join('\n'),
|
|
24
|
+
after: lines.slice(i).join('\n'),
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
function extractSection(markdown, startPattern, endPatterns) {
|
|
31
|
+
const startSplit = splitAtHeader(markdown, startPattern);
|
|
32
|
+
if (!startSplit) {
|
|
33
|
+
return '';
|
|
34
|
+
}
|
|
35
|
+
let sectionContent = startSplit.after;
|
|
36
|
+
for (const endPattern of endPatterns) {
|
|
37
|
+
const endSplit = splitAtHeader(sectionContent, endPattern);
|
|
38
|
+
if (endSplit) {
|
|
39
|
+
sectionContent = endSplit.before;
|
|
40
|
+
break;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return sectionContent;
|
|
44
|
+
}
|
|
45
|
+
export function parseDoclingMarkdown(markdown) {
|
|
46
|
+
const sections = {
|
|
47
|
+
preface: { content: '', imageSlots: [] },
|
|
48
|
+
getReady: { content: '', imageSlots: [] },
|
|
49
|
+
addYourCode: { content: '', imageSlots: [] },
|
|
50
|
+
tryItOut: { content: '', imageSlots: [] },
|
|
51
|
+
challenge: { content: '', imageSlots: [] },
|
|
52
|
+
testYourself: { content: '', imageSlots: [] },
|
|
53
|
+
funFact: { content: '', imageSlots: [] },
|
|
54
|
+
};
|
|
55
|
+
// Extract preface (everything before Get Ready), with image slots
|
|
56
|
+
const getReadySplit = splitAtHeader(markdown, SECTION_HEADERS.getReady);
|
|
57
|
+
if (getReadySplit) {
|
|
58
|
+
sections.preface.content = getReadySplit.before.trim();
|
|
59
|
+
sections.preface.imageSlots = createImageSlots(countImageMarkers(sections.preface.content), 'preface');
|
|
60
|
+
}
|
|
61
|
+
// Extract Get Ready section, no images in this part
|
|
62
|
+
sections.getReady.content = extractSection(markdown, SECTION_HEADERS.getReady, [
|
|
63
|
+
SECTION_HEADERS.addYourCode,
|
|
64
|
+
SECTION_HEADERS.tryItOut,
|
|
65
|
+
SECTION_HEADERS.challenge,
|
|
66
|
+
SECTION_HEADERS.testYourself,
|
|
67
|
+
SECTION_HEADERS.funFact,
|
|
68
|
+
]).trim();
|
|
69
|
+
// Extract Add Your Code section, with image slots
|
|
70
|
+
sections.addYourCode.content = extractSection(markdown, SECTION_HEADERS.addYourCode, [
|
|
71
|
+
SECTION_HEADERS.tryItOut,
|
|
72
|
+
SECTION_HEADERS.challenge,
|
|
73
|
+
SECTION_HEADERS.testYourself,
|
|
74
|
+
SECTION_HEADERS.funFact,
|
|
75
|
+
]).trim();
|
|
76
|
+
sections.addYourCode.imageSlots = createImageSlots(countImageMarkers(sections.addYourCode.content), 'addYourCode');
|
|
77
|
+
// Extract remaining sections, no images in this part.
|
|
78
|
+
sections.tryItOut.content = extractSection(markdown, SECTION_HEADERS.tryItOut, [
|
|
79
|
+
SECTION_HEADERS.challenge,
|
|
80
|
+
]).trim();
|
|
81
|
+
sections.challenge.content = extractSection(markdown, SECTION_HEADERS.challenge, [
|
|
82
|
+
SECTION_HEADERS.testYourself,
|
|
83
|
+
SECTION_HEADERS.funFact,
|
|
84
|
+
]).trim();
|
|
85
|
+
sections.testYourself.content = extractSection(markdown, SECTION_HEADERS.testYourself, [
|
|
86
|
+
SECTION_HEADERS.funFact,
|
|
87
|
+
]).trim();
|
|
88
|
+
const funFactSplit = splitAtHeader(markdown, SECTION_HEADERS.funFact);
|
|
89
|
+
if (funFactSplit) {
|
|
90
|
+
sections.funFact.content = funFactSplit.after.trim();
|
|
91
|
+
}
|
|
92
|
+
return sections;
|
|
93
|
+
}
|
|
94
|
+
export function assignImagesToSlots(sections, images) {
|
|
95
|
+
let imageIndex = 0;
|
|
96
|
+
const assignToSection = (section) => {
|
|
97
|
+
for (const slot of section.imageSlots) {
|
|
98
|
+
if (imageIndex < images.length) {
|
|
99
|
+
slot.base64 = images[imageIndex];
|
|
100
|
+
imageIndex++;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
};
|
|
104
|
+
// Only assign to sections that track image slots
|
|
105
|
+
assignToSection(sections.preface);
|
|
106
|
+
assignToSection(sections.addYourCode);
|
|
107
|
+
return sections;
|
|
108
|
+
}
|
|
109
|
+
//# sourceMappingURL=docling-parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"docling-parser.js","sourceRoot":"","sources":["../../src/parsing/docling-parser.ts"],"names":[],"mappings":"AAEA,MAAM,YAAY,GAAG,gBAAgB,CAAC;AAEtC,MAAM,eAAe,GAAG;IACtB,QAAQ,EAAE,oBAAoB;IAC9B,WAAW,EAAE,iDAAiD;IAC9D,QAAQ,EAAE,uBAAuB;IACjC,SAAS,EAAE,kBAAkB;IAC7B,YAAY,EAAE,wBAAwB;IACtC,OAAO,EAAE,mBAAmB;CAC7B,CAAC;AAsBF,SAAS,iBAAiB,CAAC,OAAe;IACxC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,YAAY,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;AACrE,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa,EAAE,MAAc;IACrD,OAAO,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QAC9C,EAAE,EAAE,GAAG,MAAM,QAAQ,CAAC,GAAG,CAAC,EAAE;KAC7B,CAAC,CAAC,CAAC;AACN,CAAC;AAED,SAAS,aAAa,CAAC,QAAgB,EAAE,aAAqB;IAC5D,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;YACxC,OAAO;gBACL,MAAM,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;gBACpC,KAAK,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;aACjC,CAAC;QACJ,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,cAAc,CAAC,QAAgB,EAAE,YAAoB,EAAE,WAAqB;IACnF,MAAM,UAAU,GAAG,aAAa,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IACzD,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,IAAI,cAAc,GAAG,UAAU,CAAC,KAAK,CAAC;IAEtC,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;QACrC,MAAM,QAAQ,GAAG,aAAa,CAAC,cAAc,EAAE,UAAU,CAAC,CAAC;QAC3D,IAAI,QAAQ,EAAE,CAAC;YACb,cAAc,GAAG,QAAQ,CAAC,MAAM,CAAC;YACjC,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,QAAgB;IACnD,MAAM,QAAQ,GAA0B;QACtC,OAAO,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QACxC,QAAQ,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QACzC,WAAW,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QAC5C,QAAQ,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QACzC,SAAS,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QAC1C,YAAY,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QAC7C,OAAO,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;KACzC,CAAC;IAEF,kEAAkE;IAClE,MAAM,aAAa,GAAG,aAAa,CAAC,QAAQ,EAAE,eAAe,CAAC,QAAQ,CAAC,CAAC;IACxE,IAAI,aAAa,EAAE,CAAC;QAClB,QAAQ,CAAC,OAAO,CAAC,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACvD,QAAQ,CAAC,OAAO,CAAC,UAAU,GAAG,gBAAgB,CAC5C,iBAAiB,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,EAC3C,SAAS,CACV,CAAC;IACJ,CAAC;IAED,oDAAoD;IACpD,QAAQ,CAAC,QAAQ,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC,QAAQ,EAAE;QAC7E,eAAe,CAAC,WAAW;QAC3B,eAAe,CAAC,QAAQ;QACxB,eAAe,CAAC,SAAS;QACzB,eAAe,CAAC,YAAY;QAC5B,eAAe,CAAC,OAAO;KACxB,CAAC,CAAC,IAAI,EAAE,CAAC;IAEV,kDAAkD;IAClD,QAAQ,CAAC,WAAW,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC,WAAW,EAAE;QACnF,eAAe,CAAC,QAAQ;QACxB,eAAe,CAAC,SAAS;QACzB,eAAe,CAAC,YAAY;QAC5B,eAAe,CAAC,OAAO;KACxB,CAAC,CAAC,IAAI,EAAE,CAAC;IACV,QAAQ,CAAC,WAAW,CAAC,UAAU,GAAG,gBAAgB,CAChD,iBAAiB,CAAC,QAAQ,CAAC,WAAW,CAAC,OAAO,CAAC,EAC/C,aAAa,CACd,CAAC;IAEF,sDAAsD;IACtD,QAAQ,CAAC,QAAQ,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC,QAAQ,EAAE;QAC7E,eAAe,CAAC,SAAS;KAC1B,CAAC,CAAC,IAAI,EAAE,CAAC;IAEV,QAAQ,CAAC,SAAS,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC,SAAS,EAAE;QAC/E,eAAe,CAAC,YAAY;QAC5B,eAAe,CAAC,OAAO;KACxB,CAAC,CAAC,IAAI,EAAE,CAAC;IAEV,QAAQ,CAAC,YAAY,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC,YAAY,EAAE;QACrF,eAAe,CAAC,OAAO;KACxB,CAAC,CAAC,IAAI,EAAE,CAAC;IAEV,MAAM,YAAY,GAAG,aAAa,CAAC,QAAQ,EAAE,eAAe,CAAC,OAAO,CAAC,CAAC;IACtE,IAAI,YAAY,EAAE,CAAC;QACjB,QAAQ,CAAC,OAAO,CAAC,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;IACvD,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,mBAAmB,CACjC,QAA+B,EAC/B,MAAgB;IAEhB,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,MAAM,eAAe,GAAG,CAAC,OAAsB,EAAE,EAAE;QACjD,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;YACtC,IAAI,UAAU,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC;gBAC/B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC;gBACjC,UAAU,EAAE,CAAC;YACf,CAAC;QACH,CAAC;IACH,CAAC,CAAC;IAEF,iDAAiD;IACjD,eAAe,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IAClC,eAAe,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAEtC,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"docling-runners.d.ts","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"AA6GA,wBAAsB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAejF"}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
import { execFileSync } from 'child_process';
|
|
3
|
+
import { existsSync, statSync } from 'fs';
|
|
4
|
+
import { fileURLToPath } from 'url';
|
|
5
|
+
import { logger } from '../logger.js';
|
|
6
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
7
|
+
const __dirname = path.dirname(__filename);
|
|
8
|
+
async function resolveDoclingBinary() {
|
|
9
|
+
const platformTag = `${process.platform}-${process.arch}`;
|
|
10
|
+
const binaryName = process.platform === 'win32' ? 'docling-cleaner.exe' : 'docling-cleaner';
|
|
11
|
+
try {
|
|
12
|
+
const { ensureDoclingCleaner } = await import('@digimakers/docling-cleaner');
|
|
13
|
+
const binaryPath = await ensureDoclingCleaner();
|
|
14
|
+
if (binaryPath && existsSync(binaryPath)) {
|
|
15
|
+
try {
|
|
16
|
+
if (statSync(binaryPath).isFile())
|
|
17
|
+
return binaryPath;
|
|
18
|
+
}
|
|
19
|
+
catch {
|
|
20
|
+
// Ignore invalid paths.
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
catch (error) {
|
|
25
|
+
logger.warn({ err: error }, 'Docling downloader failed, trying bundled binaries');
|
|
26
|
+
}
|
|
27
|
+
const distBinary = path.resolve(__dirname, '..', 'docling-cleaner', 'bin', platformTag, binaryName);
|
|
28
|
+
if (existsSync(distBinary))
|
|
29
|
+
return distBinary;
|
|
30
|
+
const distOnedirBinary = path.resolve(__dirname, '..', 'docling-cleaner', 'bin', platformTag, 'docling-cleaner', binaryName);
|
|
31
|
+
if (existsSync(distOnedirBinary))
|
|
32
|
+
return distOnedirBinary;
|
|
33
|
+
const srcBinary = path.resolve(__dirname, '..', '..', 'src', 'docling-cleaner', 'bin', platformTag, binaryName);
|
|
34
|
+
if (existsSync(srcBinary))
|
|
35
|
+
return srcBinary;
|
|
36
|
+
const srcOnedirBinary = path.resolve(__dirname, '..', '..', 'src', 'docling-cleaner', 'bin', platformTag, 'docling-cleaner', binaryName);
|
|
37
|
+
if (existsSync(srcOnedirBinary))
|
|
38
|
+
return srcOnedirBinary;
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
function resolveDoclingCleanerDir() {
|
|
42
|
+
const distCleanerDir = path.resolve(__dirname, '..', 'docling-cleaner');
|
|
43
|
+
if (existsSync(path.join(distCleanerDir, 'cleaner.py')))
|
|
44
|
+
return distCleanerDir;
|
|
45
|
+
const srcCleanerDir = path.resolve(__dirname, '..', '..', 'src', 'docling-cleaner');
|
|
46
|
+
if (existsSync(path.join(srcCleanerDir, 'cleaner.py')))
|
|
47
|
+
return srcCleanerDir;
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
function getDoclingMarkdownFromUv(filePath) {
|
|
51
|
+
const cleanerDir = resolveDoclingCleanerDir();
|
|
52
|
+
if (!cleanerDir) {
|
|
53
|
+
logger.warn('Docling cleaner assets not found. Ensure the package includes dist/docling-cleaner.');
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
try {
|
|
57
|
+
return execFileSync('uv', ['run', 'python', 'cleaner.py', filePath], {
|
|
58
|
+
cwd: cleanerDir,
|
|
59
|
+
encoding: 'utf-8',
|
|
60
|
+
timeout: 120000,
|
|
61
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
catch (error) {
|
|
65
|
+
logger.warn({ err: error }, 'Docling uv fallback failed. Install uv and run in packages/core/src/docling-cleaner.');
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
export async function getDoclingMarkdown(filePath) {
|
|
70
|
+
const binaryPath = await resolveDoclingBinary();
|
|
71
|
+
if (binaryPath) {
|
|
72
|
+
try {
|
|
73
|
+
return execFileSync(binaryPath, [filePath], {
|
|
74
|
+
encoding: 'utf-8',
|
|
75
|
+
timeout: 120000,
|
|
76
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
catch (error) {
|
|
80
|
+
logger.warn({ err: error }, 'Docling binary failed, attempting uv fallback');
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return getDoclingMarkdownFromUv(filePath);
|
|
84
|
+
}
|
|
85
|
+
//# sourceMappingURL=docling-runners.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"docling-runners.js","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAEtC,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAC3C,KAAK,UAAU,oBAAoB;IACjC,MAAM,WAAW,GAAG,GAAG,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;IAC1D,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,iBAAiB,CAAC;IAE5F,IAAI,CAAC;QACH,MAAM,EAAE,oBAAoB,EAAE,GAAG,MAAM,MAAM,CAAC,6BAA6B,CAAC,CAAC;QAC7E,MAAM,UAAU,GAAG,MAAM,oBAAoB,EAAE,CAAC;QAChD,IAAI,UAAU,IAAI,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YACzC,IAAI,CAAC;gBACH,IAAI,QAAQ,CAAC,UAAU,CAAC,CAAC,MAAM,EAAE;oBAAE,OAAO,UAAU,CAAC;YACvD,CAAC;YAAC,MAAM,CAAC;gBACP,wBAAwB;YAC1B,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,oDAAoD,CAAC,CAAC;IACpF,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAC7B,SAAS,EACT,IAAI,EACJ,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,UAAU,CAAC;IAE9C,MAAM,gBAAgB,GAAG,IAAI,CAAC,OAAO,CACnC,SAAS,EACT,IAAI,EACJ,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,iBAAiB,EACjB,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,gBAAgB,CAAC;QAAE,OAAO,gBAAgB,CAAC;IAE1D,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAC5B,SAAS,EACT,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO,SAAS,CAAC;IAE5C,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,CAClC,SAAS,EACT,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,iBAAiB,EACjB,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,eAAe,CAAC;QAAE,OAAO,eAAe,CAAC;IAExD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,wBAAwB;IAC/B,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,iBAAiB,CAAC,CAAC;IACxE,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,YAAY,CAAC,CAAC;QAAE,OAAO,cAAc,CAAC;IAE/E,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,iBAAiB,CAAC,CAAC;IACpF,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,YAAY,CAAC,CAAC;QAAE,OAAO,aAAa,CAAC;IAE7E,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,wBAAwB,CAAC,QAAgB;IAChD,MAAM,UAAU,GAAG,wBAAwB,EAAE,CAAC;IAC9C,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,MAAM,CAAC,IAAI,CACT,qFAAqF,CACtF,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,CAAC;QACH,OAAO,YAAY,CAAC,IAAI,EAAE,CAAC,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,CAAC,EAAE;YACnE,GAAG,EAAE,UAAU;YACf,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,MAAM;YACf,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CACT,EAAE,GAAG,EAAE,KAAK,EAAE,EACd,sFAAsF,CACvF,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,QAAgB;IACvD,MAAM,UAAU,GAAG,MAAM,oBAAoB,EAAE,CAAC;IAChD,IAAI,UAAU,EAAE,CAAC;QACf,IAAI,CAAC;YACH,OAAO,YAAY,CAAC,UAAU,EAAE,CAAC,QAAQ,CAAC,EAAE;gBAC1C,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,MAAM;gBACf,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;aAChC,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,+CAA+C,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;IAED,OAAO,wBAAwB,CAAC,QAAQ,CAAC,CAAC;AAC5C,CAAC"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { Lesson } from '../schemas/index.js';
|
|
2
2
|
export interface ParseResult {
|
|
3
|
-
data:
|
|
3
|
+
data: Lesson;
|
|
4
4
|
sourcePath: string;
|
|
5
5
|
}
|
|
6
6
|
export declare function parseDocx(filePath: string): Promise<ParseResult>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docx-parser.d.ts","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAIA,OAAO,
|
|
1
|
+
{"version":3,"file":"docx-parser.d.ts","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,MAAM,EAAiB,MAAM,qBAAqB,CAAC;AAoB5D,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;CACpB;AA4BD,wBAAsB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CA6JtE"}
|
|
@@ -2,9 +2,14 @@ import fs from 'fs/promises';
|
|
|
2
2
|
import mammoth from 'mammoth';
|
|
3
3
|
import { generateText, Output } from 'ai';
|
|
4
4
|
import { createGoogleGenerativeAI } from '@ai-sdk/google';
|
|
5
|
-
import { ParsedLessonSchema } from '../schemas/index.js';
|
|
6
5
|
import { logger } from '../logger.js';
|
|
7
6
|
import { extractLanguageFromFooter } from './footer-parser.js';
|
|
7
|
+
import { LessonLLMSchema, LessonLLMSchemaWithoutLanguage, } from '../schemas/lesson.js';
|
|
8
|
+
import { parseDoclingMarkdown, assignImagesToSlots } from './docling-parser.js';
|
|
9
|
+
import { getDoclingMarkdown } from './docling-runners.js';
|
|
10
|
+
import { buildDocxParserPrompt, docxParserSystemPrompt } from './prompts.js';
|
|
11
|
+
import { formatDocumentCode } from '../agents/code-formatter.js';
|
|
12
|
+
import { enrichDebugIssues, inferLessonType, normaliseLessonContent, normaliseLessonForType, } from './post-processors.js';
|
|
8
13
|
// Setup google generative ai
|
|
9
14
|
const google = createGoogleGenerativeAI({
|
|
10
15
|
apiKey: process.env.GEMINI_API_KEY,
|
|
@@ -27,63 +32,131 @@ async function extractImages(buffer) {
|
|
|
27
32
|
export async function parseDocx(filePath) {
|
|
28
33
|
logger.info(`Parsing: ${filePath}`);
|
|
29
34
|
const buffer = await fs.readFile(filePath);
|
|
30
|
-
// Extract
|
|
31
|
-
const [
|
|
32
|
-
mammoth.extractRawText({ buffer }),
|
|
35
|
+
// Extract images and footer language in parallel, try docling markdown
|
|
36
|
+
const [allImages, footerLanguage, doclingMarkdown] = await Promise.all([
|
|
33
37
|
extractImages(buffer),
|
|
34
38
|
extractLanguageFromFooter(filePath),
|
|
39
|
+
getDoclingMarkdown(filePath),
|
|
35
40
|
]);
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
41
|
+
// Parse docling markdown to get sections with image placeholders
|
|
42
|
+
let parsedSections = null;
|
|
43
|
+
let textForLLM;
|
|
44
|
+
if (doclingMarkdown) {
|
|
45
|
+
parsedSections = parseDoclingMarkdown(doclingMarkdown);
|
|
46
|
+
assignImagesToSlots(parsedSections, allImages);
|
|
47
|
+
logger.debug({
|
|
48
|
+
prefaceImageSlots: parsedSections.preface.imageSlots.length,
|
|
49
|
+
addYourCodeImageSlots: parsedSections.addYourCode.imageSlots.length,
|
|
50
|
+
totalImages: allImages.length,
|
|
51
|
+
}, 'Docling image slots parsed');
|
|
52
|
+
textForLLM = doclingMarkdown;
|
|
53
|
+
logger.info('Using docling markdown with placeholder-based image mapping');
|
|
54
|
+
logger.info(textForLLM);
|
|
55
|
+
logger.info('Formatting document code blocks with agent');
|
|
56
|
+
textForLLM = await formatDocumentCode(doclingMarkdown, footerLanguage);
|
|
57
|
+
logger.info(textForLLM);
|
|
58
|
+
}
|
|
59
|
+
else {
|
|
60
|
+
const { value: text } = await mammoth.extractRawText({ buffer });
|
|
61
|
+
textForLLM = text;
|
|
62
|
+
logger.info('Falling back to mammoth text extraction');
|
|
63
|
+
logger.info(textForLLM);
|
|
64
|
+
}
|
|
65
|
+
logger.info(`Extracted ${textForLLM.length} characters and ${allImages.length} images`);
|
|
39
66
|
if (footerLanguage) {
|
|
40
67
|
logger.info(`Programming language from footer: ${footerLanguage}`);
|
|
41
68
|
}
|
|
42
69
|
else {
|
|
43
70
|
logger.warn('Footer language not found');
|
|
44
71
|
}
|
|
45
|
-
//
|
|
46
|
-
|
|
47
|
-
const
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
72
|
+
// If we find the programming language in the footer, we don't need the LLM
|
|
73
|
+
// to tell us.
|
|
74
|
+
const llmSchema = footerLanguage ? LessonLLMSchemaWithoutLanguage : LessonLLMSchema;
|
|
75
|
+
let output;
|
|
76
|
+
try {
|
|
77
|
+
// Use LLM to extract structured data
|
|
78
|
+
const response = await generateText({
|
|
79
|
+
model: google('gemini-2.5-pro'),
|
|
80
|
+
output: Output.object({
|
|
81
|
+
schema: llmSchema,
|
|
82
|
+
}),
|
|
83
|
+
system: docxParserSystemPrompt,
|
|
84
|
+
prompt: buildDocxParserPrompt(textForLLM),
|
|
85
|
+
temperature: 0,
|
|
86
|
+
maxRetries: 5,
|
|
87
|
+
});
|
|
88
|
+
output = response.output;
|
|
89
|
+
}
|
|
90
|
+
catch (error) {
|
|
91
|
+
const err = error;
|
|
92
|
+
logger.error({ err, filePath }, 'LLM extraction failed');
|
|
93
|
+
const issues = err?.cause?.issues ?? err?.issues;
|
|
94
|
+
if (issues) {
|
|
95
|
+
logger.error({ issues, filePath }, 'LLM schema validation issues');
|
|
96
|
+
}
|
|
97
|
+
const value = err?.cause?.value ?? err?.value;
|
|
98
|
+
if (value) {
|
|
99
|
+
logger.error({ value, filePath }, 'LLM output that failed validation');
|
|
100
|
+
}
|
|
101
|
+
throw error;
|
|
102
|
+
}
|
|
103
|
+
// Infer the lesson type with heuristic
|
|
104
|
+
const dataWithoutType = normaliseLessonContent(output);
|
|
105
|
+
let data = normaliseLessonForType({
|
|
106
|
+
...dataWithoutType,
|
|
107
|
+
lessonType: inferLessonType(textForLLM, footerLanguage, dataWithoutType),
|
|
69
108
|
});
|
|
70
|
-
|
|
71
|
-
logger.info(`
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
data.projectImage = projectImage;
|
|
75
|
-
// Set programming language from footer if found, otherwise use LLM's determination
|
|
109
|
+
data = enrichDebugIssues(textForLLM, data);
|
|
110
|
+
logger.info(`Inferred lesson type as: '${data.lessonType}'`);
|
|
111
|
+
logger.info(`Successfully extracted lesson: ${data.topic} - ${data.project}`);
|
|
112
|
+
// Set programming language from footer if found
|
|
76
113
|
if (footerLanguage) {
|
|
77
114
|
data.programmingLanguage = footerLanguage;
|
|
78
115
|
}
|
|
79
|
-
// Assign
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
116
|
+
// Assign images using placeholder-based mapping if available
|
|
117
|
+
if (parsedSections && data.lessonType !== 'debugging lesson') {
|
|
118
|
+
// Assign preface image slots
|
|
119
|
+
if (parsedSections.preface.imageSlots.length > 0) {
|
|
120
|
+
data.prefaceImageSlots = parsedSections.preface.imageSlots;
|
|
121
|
+
}
|
|
122
|
+
// Assign Add Your Code step images
|
|
123
|
+
const addSection = data.addYourCodeSection;
|
|
124
|
+
if (Array.isArray(addSection) && parsedSections.addYourCode.imageSlots.length > 0) {
|
|
125
|
+
const isStepWithImageArray = addSection.every((item) => typeof item === 'object' && item !== null && 'step' in item);
|
|
126
|
+
if (isStepWithImageArray) {
|
|
127
|
+
const slots = parsedSections.addYourCode.imageSlots;
|
|
128
|
+
addSection.forEach((step, index) => {
|
|
129
|
+
if (index < slots.length) {
|
|
130
|
+
step.imageSlot = {
|
|
131
|
+
id: slots[index].id,
|
|
132
|
+
base64: slots[index].base64,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
else if (data.lessonType !== 'debugging lesson') {
|
|
140
|
+
// Fallback using old behavior, first image is project, rest are steps
|
|
141
|
+
// Not good if there are multiple images in preface section
|
|
142
|
+
logger.warn('Falling back to old image assignment behaviour');
|
|
143
|
+
if (allImages.length > 0) {
|
|
144
|
+
data.prefaceImageSlots = [{ id: 'fallback_preface_img_1', base64: allImages[0] }];
|
|
145
|
+
}
|
|
146
|
+
const stepImages = allImages.slice(1);
|
|
147
|
+
const addSection = data.addYourCodeSection;
|
|
148
|
+
if (stepImages.length > 0 && Array.isArray(addSection)) {
|
|
149
|
+
const isStepWithImageArray = addSection.every((item) => typeof item === 'object' && item !== null && 'step' in item);
|
|
150
|
+
if (isStepWithImageArray) {
|
|
151
|
+
addSection.forEach((step, index) => {
|
|
152
|
+
if (stepImages[index]) {
|
|
153
|
+
step.imageSlot = {
|
|
154
|
+
id: `fallback_img_${index + 1}`,
|
|
155
|
+
base64: stepImages[index],
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
});
|
|
159
|
+
}
|
|
87
160
|
}
|
|
88
161
|
}
|
|
89
162
|
logger.info(data);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"docx-parser.js","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,IAAI,CAAC;AAC1C,OAAO,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"docx-parser.js","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,IAAI,CAAC;AAC1C,OAAO,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;AAE1D,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AACtC,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAC/D,OAAO,EAEL,eAAe,EACf,8BAA8B,GAE/B,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAChF,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAC1D,OAAO,EAAE,qBAAqB,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAC;AAC7E,OAAO,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AACjE,OAAO,EACL,iBAAiB,EACjB,eAAe,EACf,sBAAsB,EACtB,sBAAsB,GACvB,MAAM,sBAAsB,CAAC;AAO9B,6BAA6B;AAC7B,MAAM,MAAM,GAAG,wBAAwB,CAAC;IACtC,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;CACnC,CAAC,CAAC;AAEH,+CAA+C;AAC/C,KAAK,UAAU,aAAa,CAAC,MAAc;IACzC,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,MAAM,OAAO,CAAC,aAAa,CACzB,EAAE,MAAM,EAAE,EACV;QACE,YAAY,EAAE,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;YACtD,MAAM,WAAW,GAAG,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;YACvC,MAAM,MAAM,GAAG,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAC9C,MAAM,OAAO,GAAG,QAAQ,KAAK,CAAC,WAAW,WAAW,MAAM,EAAE,CAAC;YAC7D,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC;QAC1B,CAAC,CAAC;KACH,CACF,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,6CAA6C;AAC7C,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,QAAgB;IAC9C,MAAM,CAAC,IAAI,CAAC,YAAY,QAAQ,EAAE,CAAC,CAAC;IAEpC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAE3C,uEAAuE;IACvE,MAAM,CAAC,SAAS,EAAE,cAAc,EAAE,eAAe,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QACrE,aAAa,CAAC,MAAM,CAAC;QACrB,yBAAyB,CAAC,QAAQ,CAAC;QACnC,kBAAkB,CAAC,QAAQ,CAAC;KAC7B,CAAC,CAAC;IAEH,iEAAiE;IACjE,IAAI,cAAc,GAAG,IAAI,CAAC;IAC1B,IAAI,UAAkB,CAAC;IAEvB,IAAI,eAAe,EAAE,CAAC;QACpB,cAAc,GAAG,oBAAoB,CAAC,eAAe,CAAC,CAAC;QACvD,mBAAmB,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;QAC/C,MAAM,CAAC,KAAK,CACV;YACE,iBAAiB,EAAE,cAAc,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM;YAC3D,qBAAqB,EAAE,cAAc,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM;YACnE,WAAW,EAAE,SAAS,CAAC,MAAM;SAC9B,EACD,4BAA4B,CAC7B,CAAC;QACF,UAAU,GAAG,eAAe,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,6DAA6D,CAAC,CAAC;QAC3E,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACxB,MAAM,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;QAC1D,UAAU,GAAG,MAAM,kBAAkB,CAAC,eAAe,EAAE,cAAc,CAAC,CAAC;QACvE,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC1B,CAAC;SAAM,CAAC;QACN,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QACjE,UAAU,GAAG,IAAI,CAAC;QAClB,MAAM,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;QACvD,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC1B,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,aAAa,UAAU,CAAC,MAAM,mBAAmB,SAAS,CAAC,MAAM,SAAS,CAAC,CAAC;IACxF,IAAI,cAAc,EAAE,CAAC;QACnB,MAAM,CAAC,IAAI,CAAC,qCAAqC,cAAc,EAAE,CAAC,CAAC;IACrE,CAAC;SAAM,CAAC;QACN,MAAM,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IAC3C,CAAC;IAED,2EAA2E;IAC3E,cAAc;IACd,MAAM,SAAS,GAAG,cAAc,CAAC,CAAC,CAAC,8BAA8B,CAAC,CAAC,CAAC,eAAe,CAAC;IAEpF,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,qCAAqC;QACrC,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC;YAClC,KAAK,EAAE,MAAM,CAAC,gBAAgB,CAAC;YAC/B,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,SAAS;aAClB,CAAC;YACF,MAAM,EAAE,sBAAsB;YAC9B,MAAM,EAAE,qBAAqB,CAAC,UAAU,CAAC;YACzC,WAAW,EAAE,CAAC;YACd,UAAU,EAAE,CAAC;SACd,CAAC,CAAC;QACH,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;IAC3B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,GAAG,GAAG,KAAY,CAAC;QACzB,MAAM,CAAC,KAAK,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,EAAE,uBAAuB,CAAC,CAAC;QAEzD,MAAM,MAAM,GAAG,GAAG,EAAE,KAAK,EAAE,MAAM,IAAI,GAAG,EAAE,MAAM,CAAC;QACjD,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,8BAA8B,CAAC,CAAC;QACrE,CAAC;QAED,MAAM,KAAK,GAAG,GAAG,EAAE,KAAK,EAAE,KAAK,IAAI,GAAG,EAAE,KAAK,CAAC;QAC9C,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,CAAC,KAAK,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE,mCAAmC,CAAC,CAAC;QACzE,CAAC;QAED,MAAM,KAAK,CAAC;IACd,CAAC;IAED,uCAAuC;IACvC,MAAM,eAAe,GAAG,sBAAsB,CAAC,MAAmB,CAAC,CAAC;IACpE,IAAI,IAAI,GAAG,sBAAsB,CAAC;QAChC,GAAG,eAAe;QAClB,UAAU,EAAE,eAAe,CAAC,UAAU,EAAE,cAAqC,EAAE,eAAe,CAAC;KACtF,CAAC,CAAC;IACb,IAAI,GAAG,iBAAiB,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;IAC3C,MAAM,CAAC,IAAI,CAAC,6BAA6B,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC;IAC7D,MAAM,CAAC,IAAI,CAAC,kCAAkC,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;IAE9E,gDAAgD;IAChD,IAAI,cAAc,EAAE,CAAC;QACnB,IAAI,CAAC,mBAAmB,GAAG,cAAqC,CAAC;IACnE,CAAC;IAED,6DAA6D;IAC7D,IAAI,cAAc,IAAI,IAAI,CAAC,UAAU,KAAK,kBAAkB,EAAE,CAAC;QAC7D,6BAA6B;QAC7B,IAAI,cAAc,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjD,IAAI,CAAC,iBAAiB,GAAG,cAAc,CAAC,OAAO,CAAC,UAAU,CAAC;QAC7D,CAAC;QAED,mCAAmC;QACnC,MAAM,UAAU,GAAG,IAAI,CAAC,kBAAkB,CAAC;QAC3C,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI,cAAc,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClF,MAAM,oBAAoB,GAAG,UAAU,CAAC,KAAK,CAC3C,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,IAAI,MAAM,IAAI,IAAI,CACtE,CAAC;YAEF,IAAI,oBAAoB,EAAE,CAAC;gBACzB,MAAM,KAAK,GAAG,cAAc,CAAC,WAAW,CAAC,UAAU,CAAC;gBACnD,UAA8B,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;oBACtD,IAAI,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;wBACzB,IAAI,CAAC,SAAS,GAAG;4BACf,EAAE,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE;4BACnB,MAAM,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM;yBAC5B,CAAC;oBACJ,CAAC;gBACH,CAAC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;SAAM,IAAI,IAAI,CAAC,UAAU,KAAK,kBAAkB,EAAE,CAAC;QAClD,sEAAsE;QACtE,2DAA2D;QAC3D,MAAM,CAAC,IAAI,CAAC,gDAAgD,CAAC,CAAC;QAC9D,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,IAAI,CAAC,iBAAiB,GAAG,CAAC,EAAE,EAAE,EAAE,wBAAwB,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACpF,CAAC;QACD,MAAM,UAAU,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAEtC,MAAM,UAAU,GAAG,IAAI,CAAC,kBAAkB,CAAC;QAC3C,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;YACvD,MAAM,oBAAoB,GAAG,UAAU,CAAC,KAAK,CAC3C,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,IAAI,MAAM,IAAI,IAAI,CACtE,CAAC;YAEF,IAAI,oBAAoB,EAAE,CAAC;gBACxB,UAA8B,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;oBACtD,IAAI,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;wBACtB,IAAI,CAAC,SAAS,GAAG;4BACf,EAAE,EAAE,gBAAgB,KAAK,GAAG,CAAC,EAAE;4BAC/B,MAAM,EAAE,UAAU,CAAC,KAAK,CAAC;yBAC1B,CAAC;oBACJ,CAAC;gBACH,CAAC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAElB,OAAO;QACL,IAAI;QACJ,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC"}
|
package/dist/parsing/index.d.ts
CHANGED
|
@@ -2,4 +2,7 @@ export { findDocxFiles } from './file-discovery.js';
|
|
|
2
2
|
export type { DiscoveryOptions, DiscoveredFile } from './file-discovery.js';
|
|
3
3
|
export { parseDocx } from './docx-parser.js';
|
|
4
4
|
export type { ParseResult } from './docx-parser.js';
|
|
5
|
+
export { parseDoclingMarkdown, assignImagesToSlots } from './docling-parser.js';
|
|
6
|
+
export type { ParsedSection, DoclingParsedSections } from './docling-parser.js';
|
|
7
|
+
export { getDoclingMarkdown } from './docling-runners.js';
|
|
5
8
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/parsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,YAAY,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAE5E,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,YAAY,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/parsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,YAAY,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAE5E,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,YAAY,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAEpD,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAChF,YAAY,EAAE,aAAa,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAEhF,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC"}
|
package/dist/parsing/index.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
1
|
export { findDocxFiles } from './file-discovery.js';
|
|
2
2
|
export { parseDocx } from './docx-parser.js';
|
|
3
|
+
export { parseDoclingMarkdown, assignImagesToSlots } from './docling-parser.js';
|
|
4
|
+
export { getDoclingMarkdown } from './docling-runners.js';
|
|
3
5
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/parsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/parsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAG7C,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAGhF,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"normalise.d.ts","sourceRoot":"","sources":["../../src/parsing/normalise.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"normalise.d.ts","sourceRoot":"","sources":["../../src/parsing/normalise.ts"],"names":[],"mappings":"AA0CA,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,GAAG,MAAM,GAAG,IAAI,CAcrE;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,GAAG,MAAM,GAAG,IAAI,CAMhE"}
|
|
@@ -1,12 +1,52 @@
|
|
|
1
1
|
// Removes extra new lines and stuff in code blocks
|
|
2
|
+
function reflowSingleLineCode(code) {
|
|
3
|
+
if (code.includes('\n') || code.length < 80)
|
|
4
|
+
return code;
|
|
5
|
+
let reflowed = code
|
|
6
|
+
.replace(/;\s*/g, ';\n')
|
|
7
|
+
.replace(/{\s*/g, '{\n')
|
|
8
|
+
.replace(/}\s*/g, '}\n')
|
|
9
|
+
.replace(/\belse\b\s*/g, 'else\n');
|
|
10
|
+
if (!reflowed.includes('\n') &&
|
|
11
|
+
/\b(for|if|while|def|class)\b/.test(reflowed) &&
|
|
12
|
+
!/https?:\/\//i.test(reflowed)) {
|
|
13
|
+
reflowed = reflowed.replace(/:\s*/g, ':\n');
|
|
14
|
+
}
|
|
15
|
+
if (!reflowed.includes('\n') && /GraphicsWindow\./.test(reflowed)) {
|
|
16
|
+
reflowed = reflowed.replace(/GraphicsWindow\./g, '\nGraphicsWindow.').trim();
|
|
17
|
+
}
|
|
18
|
+
if (!reflowed.includes('\n') &&
|
|
19
|
+
/(\bfor\b|\bif\b|\bwhile\b|\bdef\b|\bclass\b|\belif\b|\belse\b)/.test(reflowed)) {
|
|
20
|
+
reflowed = reflowed.replace(/(\bfor\b|\bif\b|\bwhile\b|\bdef\b|\bclass\b|\belif\b|\belse\b)/g, '\n$1');
|
|
21
|
+
}
|
|
22
|
+
if (!reflowed.includes('\n') && /(System\.out|printf\(|println\()/.test(reflowed)) {
|
|
23
|
+
reflowed = reflowed
|
|
24
|
+
.replace(/System\.out/g, '\nSystem.out')
|
|
25
|
+
.replace(/printf\(/g, '\nprintf(')
|
|
26
|
+
.replace(/println\(/g, '\nprintln(');
|
|
27
|
+
}
|
|
28
|
+
return reflowed;
|
|
29
|
+
}
|
|
2
30
|
export function normaliseCodeBlock(code) {
|
|
3
31
|
if (!code)
|
|
4
32
|
return code;
|
|
5
|
-
|
|
33
|
+
const hasRealNewlines = code.includes('\n');
|
|
34
|
+
const hasEscapedNewlines = code.includes('\\n');
|
|
35
|
+
const normalisedInput = !hasRealNewlines && hasEscapedNewlines ? code.replace(/\\n/g, '\n') : code;
|
|
36
|
+
const reflowed = reflowSingleLineCode(normalisedInput);
|
|
37
|
+
return reflowed
|
|
6
38
|
.replace(/\r\n/g, '\n')
|
|
7
39
|
.split('\n')
|
|
8
40
|
.map((line) => line.replace(/\s+$/g, ''))
|
|
9
41
|
.join('\n')
|
|
10
42
|
.replace(/\n{2,}/g, '\n');
|
|
11
43
|
}
|
|
44
|
+
export function normaliseText(text) {
|
|
45
|
+
if (!text)
|
|
46
|
+
return text;
|
|
47
|
+
return text
|
|
48
|
+
.replace(/```(?:\w+)?/g, '')
|
|
49
|
+
.replace(/<\/?code>/g, '')
|
|
50
|
+
.trim();
|
|
51
|
+
}
|
|
12
52
|
//# sourceMappingURL=normalise.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"normalise.js","sourceRoot":"","sources":["../../src/parsing/normalise.ts"],"names":[],"mappings":"AAAA,mDAAmD;AACnD,MAAM,UAAU,kBAAkB,CAAC,IAAmB;IACpD,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,OAAO,IAAI;
|
|
1
|
+
{"version":3,"file":"normalise.js","sourceRoot":"","sources":["../../src/parsing/normalise.ts"],"names":[],"mappings":"AAAA,mDAAmD;AACnD,SAAS,oBAAoB,CAAC,IAAY;IACxC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,IAAI,CAAC;IAEzD,IAAI,QAAQ,GAAG,IAAI;SAChB,OAAO,CAAC,OAAO,EAAE,KAAK,CAAC;SACvB,OAAO,CAAC,OAAO,EAAE,KAAK,CAAC;SACvB,OAAO,CAAC,OAAO,EAAE,KAAK,CAAC;SACvB,OAAO,CAAC,cAAc,EAAE,QAAQ,CAAC,CAAC;IAErC,IACE,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC;QACxB,8BAA8B,CAAC,IAAI,CAAC,QAAQ,CAAC;QAC7C,CAAC,cAAc,CAAC,IAAI,CAAC,QAAQ,CAAC,EAC9B,CAAC;QACD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;IAC9C,CAAC;IAED,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QAClE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,mBAAmB,EAAE,mBAAmB,CAAC,CAAC,IAAI,EAAE,CAAC;IAC/E,CAAC;IAED,IACE,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC;QACxB,gEAAgE,CAAC,IAAI,CAAC,QAAQ,CAAC,EAC/E,CAAC;QACD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CACzB,iEAAiE,EACjE,MAAM,CACP,CAAC;IACJ,CAAC;IAED,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,kCAAkC,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QAClF,QAAQ,GAAG,QAAQ;aAChB,OAAO,CAAC,cAAc,EAAE,cAAc,CAAC;aACvC,OAAO,CAAC,WAAW,EAAE,WAAW,CAAC;aACjC,OAAO,CAAC,YAAY,EAAE,YAAY,CAAC,CAAC;IACzC,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,IAAmB;IACpD,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,MAAM,eAAe,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC5C,MAAM,kBAAkB,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;IAChD,MAAM,eAAe,GACnB,CAAC,eAAe,IAAI,kBAAkB,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC7E,MAAM,QAAQ,GAAG,oBAAoB,CAAC,eAAe,CAAC,CAAC;IAEvD,OAAO,QAAQ;SACZ,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;SACtB,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;SACxC,IAAI,CAAC,IAAI,CAAC;SACV,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;AAC9B,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,IAAmB;IAC/C,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,OAAO,IAAI;SACR,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC;SAC3B,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC;SACzB,IAAI,EAAE,CAAC;AACZ,CAAC"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { Lesson, LessonLLM, ProgrammingLanguage } from '../schemas/lesson';
|
|
2
|
+
export declare function normaliseLessonContent(data: LessonLLM): LessonLLM;
|
|
3
|
+
export declare function normaliseLessonForType(data: Lesson): Lesson;
|
|
4
|
+
export declare function enrichDebugIssues(textForLLM: string, data: Lesson): Lesson;
|
|
5
|
+
export declare function inferLessonType(textForLLM: string, footerLanguage: ProgrammingLanguage | null, data: LessonLLM): Lesson['lessonType'];
|
|
6
|
+
//# sourceMappingURL=post-processors.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"post-processors.d.ts","sourceRoot":"","sources":["../../src/parsing/post-processors.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAmF3E,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,SAAS,GAAG,SAAS,CAyEjE;AAED,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CA4B3D;AAED,wBAAgB,iBAAiB,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,CA8B1E;AAED,wBAAgB,eAAe,CAC7B,UAAU,EAAE,MAAM,EAClB,cAAc,EAAE,mBAAmB,GAAG,IAAI,EAC1C,IAAI,EAAE,SAAS,GACd,MAAM,CAAC,YAAY,CAAC,CA2BtB"}
|