@digimakers/core 0.2.1 → 0.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/dist/agents/code-formatter.d.ts +2 -0
  2. package/dist/agents/code-formatter.d.ts.map +1 -0
  3. package/dist/agents/code-formatter.js +25 -0
  4. package/dist/agents/code-formatter.js.map +1 -0
  5. package/dist/docling-cleaner/cleaner.py +34 -0
  6. package/dist/docling-cleaner/pyproject.toml +10 -0
  7. package/dist/docling-cleaner/uv.lock +2077 -0
  8. package/dist/index.d.ts +0 -1
  9. package/dist/index.d.ts.map +1 -1
  10. package/dist/index.js +0 -4
  11. package/dist/index.js.map +1 -1
  12. package/dist/parsing/docling-parser.d.ts +17 -0
  13. package/dist/parsing/docling-parser.d.ts.map +1 -0
  14. package/dist/parsing/docling-parser.js +109 -0
  15. package/dist/parsing/docling-parser.js.map +1 -0
  16. package/dist/parsing/docling-runners.d.ts +2 -0
  17. package/dist/parsing/docling-runners.d.ts.map +1 -0
  18. package/dist/parsing/docling-runners.js +85 -0
  19. package/dist/parsing/docling-runners.js.map +1 -0
  20. package/dist/parsing/docx-parser.d.ts +2 -2
  21. package/dist/parsing/docx-parser.d.ts.map +1 -1
  22. package/dist/parsing/docx-parser.js +123 -49
  23. package/dist/parsing/docx-parser.js.map +1 -1
  24. package/dist/parsing/index.d.ts +3 -0
  25. package/dist/parsing/index.d.ts.map +1 -1
  26. package/dist/parsing/index.js +2 -0
  27. package/dist/parsing/index.js.map +1 -1
  28. package/dist/parsing/normalise.d.ts +1 -0
  29. package/dist/parsing/normalise.d.ts.map +1 -1
  30. package/dist/parsing/normalise.js +41 -1
  31. package/dist/parsing/normalise.js.map +1 -1
  32. package/dist/parsing/post-processors.d.ts +6 -0
  33. package/dist/parsing/post-processors.d.ts.map +1 -0
  34. package/dist/parsing/post-processors.js +217 -0
  35. package/dist/parsing/post-processors.js.map +1 -0
  36. package/dist/parsing/prompts.d.ts +5 -0
  37. package/dist/parsing/prompts.d.ts.map +1 -0
  38. package/dist/parsing/prompts.js +24 -0
  39. package/dist/parsing/prompts.js.map +1 -0
  40. package/dist/pdf-generator.d.ts +3 -3
  41. package/dist/pdf-generator.d.ts.map +1 -1
  42. package/dist/pdf-generator.js +1 -1
  43. package/dist/pdf-generator.js.map +1 -1
  44. package/dist/sample-data.d.ts +2 -2
  45. package/dist/sample-data.d.ts.map +1 -1
  46. package/dist/sample-data.js +8 -7
  47. package/dist/sample-data.js.map +1 -1
  48. package/dist/schemas/index.d.ts +1 -1
  49. package/dist/schemas/index.d.ts.map +1 -1
  50. package/dist/schemas/index.js +1 -1
  51. package/dist/schemas/index.js.map +1 -1
  52. package/dist/schemas/lesson.d.ts +385 -39
  53. package/dist/schemas/lesson.d.ts.map +1 -1
  54. package/dist/schemas/lesson.js +96 -34
  55. package/dist/schemas/lesson.js.map +1 -1
  56. package/package.json +5 -3
package/dist/index.d.ts CHANGED
@@ -1,4 +1,3 @@
1
- export { StepWithImageSchema, StepsWithCodeBlockSchema, ChallengeSchema, NewProjectSchema, ParsedLessonSchema, GenerateOptionsSchema, type StepWithImage, type StepsWithCodeBlock, type MultipleStepsWithCodeBlock, type Challenge, type NewProject, type ParsedLesson, type GenerateOptions, } from './schemas/index.js';
2
1
  export { createPdfGenerator, convertWithConcurrency, POOL_SIZE } from './pdf-generator.js';
3
2
  export type { PdfGeneratorInstance, FileToConvert, ConversionResult } from './pdf-generator.js';
4
3
  export { startServer, stopServer } from './server.js';
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,EAEL,mBAAmB,EACnB,wBAAwB,EACxB,eAAe,EACf,gBAAgB,EAChB,kBAAkB,EAClB,qBAAqB,EAErB,KAAK,aAAa,EAClB,KAAK,kBAAkB,EACvB,KAAK,0BAA0B,EAC/B,KAAK,SAAS,EACd,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,eAAe,GACrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAC3F,YAAY,EAAE,oBAAoB,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAGhG,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACtD,YAAY,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAGlD,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAC9D,YAAY,EAAE,gBAAgB,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAGxF,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAC3F,YAAY,EAAE,oBAAoB,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAGhG,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACtD,YAAY,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAGlD,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAC9D,YAAY,EAAE,gBAAgB,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAGxF,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC"}
package/dist/index.js CHANGED
@@ -1,8 +1,4 @@
1
1
  // Public API for @digimakers/core
2
- // Schemas and types (single source of truth)
3
- export {
4
- // Schemas
5
- StepWithImageSchema, StepsWithCodeBlockSchema, ChallengeSchema, NewProjectSchema, ParsedLessonSchema, GenerateOptionsSchema, } from './schemas/index.js';
6
2
  // PDF generation
7
3
  export { createPdfGenerator, convertWithConcurrency, POOL_SIZE } from './pdf-generator.js';
8
4
  // Server
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,kCAAkC;AAElC,6CAA6C;AAC7C,OAAO;AACL,UAAU;AACV,mBAAmB,EACnB,wBAAwB,EACxB,eAAe,EACf,gBAAgB,EAChB,kBAAkB,EAClB,qBAAqB,GAStB,MAAM,oBAAoB,CAAC;AAE5B,iBAAiB;AACjB,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAG3F,SAAS;AACT,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGtD,SAAS;AACT,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,UAAU;AACV,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAG9D,4BAA4B;AAC5B,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,kCAAkC;AAElC,iBAAiB;AACjB,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAG3F,SAAS;AACT,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGtD,SAAS;AACT,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,UAAU;AACV,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAG9D,4BAA4B;AAC5B,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC"}
@@ -0,0 +1,17 @@
1
+ import { ImageSlot } from '../schemas/lesson.js';
2
+ export interface ParsedSection {
3
+ content: string;
4
+ imageSlots: ImageSlot[];
5
+ }
6
+ export interface DoclingParsedSections {
7
+ preface: ParsedSection;
8
+ getReady: ParsedSection;
9
+ addYourCode: ParsedSection;
10
+ tryItOut: ParsedSection;
11
+ challenge: ParsedSection;
12
+ testYourself: ParsedSection;
13
+ funFact: ParsedSection;
14
+ }
15
+ export declare function parseDoclingMarkdown(markdown: string): DoclingParsedSections;
16
+ export declare function assignImagesToSlots(sections: DoclingParsedSections, images: string[]): DoclingParsedSections;
17
+ //# sourceMappingURL=docling-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"docling-parser.d.ts","sourceRoot":"","sources":["../../src/parsing/docling-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAajD,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,SAAS,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,aAAa,CAAC;IACvB,QAAQ,EAAE,aAAa,CAAC;IACxB,WAAW,EAAE,aAAa,CAAC;IAC3B,QAAQ,EAAE,aAAa,CAAC;IACxB,SAAS,EAAE,aAAa,CAAC;IACzB,YAAY,EAAE,aAAa,CAAC;IAC5B,OAAO,EAAE,aAAa,CAAC;CACxB;AAiDD,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,MAAM,GAAG,qBAAqB,CA8D5E;AAED,wBAAgB,mBAAmB,CACjC,QAAQ,EAAE,qBAAqB,EAC/B,MAAM,EAAE,MAAM,EAAE,GACf,qBAAqB,CAiBvB"}
@@ -0,0 +1,109 @@
1
+ const IMAGE_MARKER = '<!-- image -->';
2
+ const SECTION_HEADERS = {
3
+ getReady: /^##\s*Get\s*Ready/i,
4
+ addYourCode: /^##\s*(Add\s*Your\s*Code|My\s*First\s*Program)/i,
5
+ tryItOut: /^##\s*Try\s*It\s*Out/i,
6
+ challenge: /^##\s*Challenge/i,
7
+ testYourself: /^##\s*Test\s*Yourself/i,
8
+ funFact: /^##\s*Fun\s*Fact/i,
9
+ };
10
+ function countImageMarkers(content) {
11
+ return (content.match(new RegExp(IMAGE_MARKER, 'g')) || []).length;
12
+ }
13
+ function createImageSlots(count, prefix) {
14
+ return Array.from({ length: count }, (_, i) => ({
15
+ id: `${prefix}_img_${i + 1}`,
16
+ }));
17
+ }
18
+ function splitAtHeader(markdown, headerPattern) {
19
+ const lines = markdown.split('\n');
20
+ for (let i = 0; i < lines.length; i++) {
21
+ if (headerPattern.test(lines[i].trim())) {
22
+ return {
23
+ before: lines.slice(0, i).join('\n'),
24
+ after: lines.slice(i).join('\n'),
25
+ };
26
+ }
27
+ }
28
+ return null;
29
+ }
30
+ function extractSection(markdown, startPattern, endPatterns) {
31
+ const startSplit = splitAtHeader(markdown, startPattern);
32
+ if (!startSplit) {
33
+ return '';
34
+ }
35
+ let sectionContent = startSplit.after;
36
+ for (const endPattern of endPatterns) {
37
+ const endSplit = splitAtHeader(sectionContent, endPattern);
38
+ if (endSplit) {
39
+ sectionContent = endSplit.before;
40
+ break;
41
+ }
42
+ }
43
+ return sectionContent;
44
+ }
45
+ export function parseDoclingMarkdown(markdown) {
46
+ const sections = {
47
+ preface: { content: '', imageSlots: [] },
48
+ getReady: { content: '', imageSlots: [] },
49
+ addYourCode: { content: '', imageSlots: [] },
50
+ tryItOut: { content: '', imageSlots: [] },
51
+ challenge: { content: '', imageSlots: [] },
52
+ testYourself: { content: '', imageSlots: [] },
53
+ funFact: { content: '', imageSlots: [] },
54
+ };
55
+ // Extract preface (everything before Get Ready), with image slots
56
+ const getReadySplit = splitAtHeader(markdown, SECTION_HEADERS.getReady);
57
+ if (getReadySplit) {
58
+ sections.preface.content = getReadySplit.before.trim();
59
+ sections.preface.imageSlots = createImageSlots(countImageMarkers(sections.preface.content), 'preface');
60
+ }
61
+ // Extract Get Ready section, no images in this part
62
+ sections.getReady.content = extractSection(markdown, SECTION_HEADERS.getReady, [
63
+ SECTION_HEADERS.addYourCode,
64
+ SECTION_HEADERS.tryItOut,
65
+ SECTION_HEADERS.challenge,
66
+ SECTION_HEADERS.testYourself,
67
+ SECTION_HEADERS.funFact,
68
+ ]).trim();
69
+ // Extract Add Your Code section, with image slots
70
+ sections.addYourCode.content = extractSection(markdown, SECTION_HEADERS.addYourCode, [
71
+ SECTION_HEADERS.tryItOut,
72
+ SECTION_HEADERS.challenge,
73
+ SECTION_HEADERS.testYourself,
74
+ SECTION_HEADERS.funFact,
75
+ ]).trim();
76
+ sections.addYourCode.imageSlots = createImageSlots(countImageMarkers(sections.addYourCode.content), 'addYourCode');
77
+ // Extract remaining sections, no images in this part.
78
+ sections.tryItOut.content = extractSection(markdown, SECTION_HEADERS.tryItOut, [
79
+ SECTION_HEADERS.challenge,
80
+ ]).trim();
81
+ sections.challenge.content = extractSection(markdown, SECTION_HEADERS.challenge, [
82
+ SECTION_HEADERS.testYourself,
83
+ SECTION_HEADERS.funFact,
84
+ ]).trim();
85
+ sections.testYourself.content = extractSection(markdown, SECTION_HEADERS.testYourself, [
86
+ SECTION_HEADERS.funFact,
87
+ ]).trim();
88
+ const funFactSplit = splitAtHeader(markdown, SECTION_HEADERS.funFact);
89
+ if (funFactSplit) {
90
+ sections.funFact.content = funFactSplit.after.trim();
91
+ }
92
+ return sections;
93
+ }
94
+ export function assignImagesToSlots(sections, images) {
95
+ let imageIndex = 0;
96
+ const assignToSection = (section) => {
97
+ for (const slot of section.imageSlots) {
98
+ if (imageIndex < images.length) {
99
+ slot.base64 = images[imageIndex];
100
+ imageIndex++;
101
+ }
102
+ }
103
+ };
104
+ // Only assign to sections that track image slots
105
+ assignToSection(sections.preface);
106
+ assignToSection(sections.addYourCode);
107
+ return sections;
108
+ }
109
+ //# sourceMappingURL=docling-parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"docling-parser.js","sourceRoot":"","sources":["../../src/parsing/docling-parser.ts"],"names":[],"mappings":"AAEA,MAAM,YAAY,GAAG,gBAAgB,CAAC;AAEtC,MAAM,eAAe,GAAG;IACtB,QAAQ,EAAE,oBAAoB;IAC9B,WAAW,EAAE,iDAAiD;IAC9D,QAAQ,EAAE,uBAAuB;IACjC,SAAS,EAAE,kBAAkB;IAC7B,YAAY,EAAE,wBAAwB;IACtC,OAAO,EAAE,mBAAmB;CAC7B,CAAC;AAsBF,SAAS,iBAAiB,CAAC,OAAe;IACxC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,YAAY,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;AACrE,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa,EAAE,MAAc;IACrD,OAAO,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QAC9C,EAAE,EAAE,GAAG,MAAM,QAAQ,CAAC,GAAG,CAAC,EAAE;KAC7B,CAAC,CAAC,CAAC;AACN,CAAC;AAED,SAAS,aAAa,CAAC,QAAgB,EAAE,aAAqB;IAC5D,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;YACxC,OAAO;gBACL,MAAM,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;gBACpC,KAAK,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;aACjC,CAAC;QACJ,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,cAAc,CAAC,QAAgB,EAAE,YAAoB,EAAE,WAAqB;IACnF,MAAM,UAAU,GAAG,aAAa,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IACzD,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,IAAI,cAAc,GAAG,UAAU,CAAC,KAAK,CAAC;IAEtC,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;QACrC,MAAM,QAAQ,GAAG,aAAa,CAAC,cAAc,EAAE,UAAU,CAAC,CAAC;QAC3D,IAAI,QAAQ,EAAE,CAAC;YACb,cAAc,GAAG,QAAQ,CAAC,MAAM,CAAC;YACjC,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,QAAgB;IACnD,MAAM,QAAQ,GAA0B;QACtC,OAAO,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QACxC,QAAQ,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QACzC,WAAW,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QAC5C,QAAQ,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QACzC,SAAS,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QAC1C,YAAY,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QAC7C,OAAO,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;KACzC,CAAC;IAEF,kEAAkE;IAClE,MAAM,aAAa,GAAG,aAAa,CAAC,QAAQ,EAAE,eAAe,CAAC,QAAQ,CAAC,CAAC;IACxE,IAAI,aAAa,EAAE,CAAC;QAClB,QAAQ,CAAC,OAAO,CAAC,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACvD,QAAQ,CAAC,OAAO,CAAC,UAAU,GAAG,gBAAgB,CAC5C,iBAAiB,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,EAC3C,SAAS,CACV,CAAC;IACJ,CAAC;IAED,oDAAoD;IACpD,QAAQ,CAAC,QAAQ,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC,QAAQ,EAAE;QAC7E,eAAe,CAAC,WAAW;QAC3B,eAAe,CAAC,QAAQ;QACxB,eAAe,CAAC,SAAS;QACzB,eAAe,CAAC,YAAY;QAC5B,eAAe,CAAC,OAAO;KACxB,CAAC,CAAC,IAAI,EAAE,CAAC;IAEV,kDAAkD;IAClD,QAAQ,CAAC,WAAW,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC,WAAW,EAAE;QACnF,eAAe,CAAC,QAAQ;QACxB,eAAe,CAAC,SAAS;QACzB,eAAe,CAAC,YAAY;QAC5B,eAAe,CAAC,OAAO;KACxB,CAAC,CAAC,IAAI,EAAE,CAAC;IACV,QAAQ,CAAC,WAAW,CAAC,UAAU,GAAG,gBAAgB,CAChD,iBAAiB,CAAC,QAAQ,CAAC,WAAW,CAAC,OAAO,CAAC,EAC/C,aAAa,CACd,CAAC;IAEF,sDAAsD;IACtD,QAAQ,CAAC,QAAQ,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC,QAAQ,EAAE;QAC7E,eAAe,CAAC,SAAS;KAC1B,CAAC,CAAC,IAAI,EAAE,CAAC;IAEV,QAAQ,CAAC,SAAS,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC,SAAS,EAAE;QAC/E,eAAe,CAAC,YAAY;QAC5B,eAAe,CAAC,OAAO;KACxB,CAAC,CAAC,IAAI,EAAE,CAAC;IAEV,QAAQ,CAAC,YAAY,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC,YAAY,EAAE;QACrF,eAAe,CAAC,OAAO;KACxB,CAAC,CAAC,IAAI,EAAE,CAAC;IAEV,MAAM,YAAY,GAAG,aAAa,CAAC,QAAQ,EAAE,eAAe,CAAC,OAAO,CAAC,CAAC;IACtE,IAAI,YAAY,EAAE,CAAC;QACjB,QAAQ,CAAC,OAAO,CAAC,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;IACvD,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,mBAAmB,CACjC,QAA+B,EAC/B,MAAgB;IAEhB,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,MAAM,eAAe,GAAG,CAAC,OAAsB,EAAE,EAAE;QACjD,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;YACtC,IAAI,UAAU,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC;gBAC/B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC;gBACjC,UAAU,EAAE,CAAC;YACf,CAAC;QACH,CAAC;IACH,CAAC,CAAC;IAEF,iDAAiD;IACjD,eAAe,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IAClC,eAAe,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAEtC,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function getDoclingMarkdown(filePath: string): Promise<string | null>;
2
+ //# sourceMappingURL=docling-runners.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"docling-runners.d.ts","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"AA6GA,wBAAsB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAejF"}
@@ -0,0 +1,85 @@
1
+ import path from 'path';
2
+ import { execFileSync } from 'child_process';
3
+ import { existsSync, statSync } from 'fs';
4
+ import { fileURLToPath } from 'url';
5
+ import { logger } from '../logger.js';
6
+ const __filename = fileURLToPath(import.meta.url);
7
+ const __dirname = path.dirname(__filename);
8
+ async function resolveDoclingBinary() {
9
+ const platformTag = `${process.platform}-${process.arch}`;
10
+ const binaryName = process.platform === 'win32' ? 'docling-cleaner.exe' : 'docling-cleaner';
11
+ try {
12
+ const { ensureDoclingCleaner } = await import('@digimakers/docling-cleaner');
13
+ const binaryPath = await ensureDoclingCleaner();
14
+ if (binaryPath && existsSync(binaryPath)) {
15
+ try {
16
+ if (statSync(binaryPath).isFile())
17
+ return binaryPath;
18
+ }
19
+ catch {
20
+ // Ignore invalid paths.
21
+ }
22
+ }
23
+ }
24
+ catch (error) {
25
+ logger.warn({ err: error }, 'Docling downloader failed, trying bundled binaries');
26
+ }
27
+ const distBinary = path.resolve(__dirname, '..', 'docling-cleaner', 'bin', platformTag, binaryName);
28
+ if (existsSync(distBinary))
29
+ return distBinary;
30
+ const distOnedirBinary = path.resolve(__dirname, '..', 'docling-cleaner', 'bin', platformTag, 'docling-cleaner', binaryName);
31
+ if (existsSync(distOnedirBinary))
32
+ return distOnedirBinary;
33
+ const srcBinary = path.resolve(__dirname, '..', '..', 'src', 'docling-cleaner', 'bin', platformTag, binaryName);
34
+ if (existsSync(srcBinary))
35
+ return srcBinary;
36
+ const srcOnedirBinary = path.resolve(__dirname, '..', '..', 'src', 'docling-cleaner', 'bin', platformTag, 'docling-cleaner', binaryName);
37
+ if (existsSync(srcOnedirBinary))
38
+ return srcOnedirBinary;
39
+ return null;
40
+ }
41
+ function resolveDoclingCleanerDir() {
42
+ const distCleanerDir = path.resolve(__dirname, '..', 'docling-cleaner');
43
+ if (existsSync(path.join(distCleanerDir, 'cleaner.py')))
44
+ return distCleanerDir;
45
+ const srcCleanerDir = path.resolve(__dirname, '..', '..', 'src', 'docling-cleaner');
46
+ if (existsSync(path.join(srcCleanerDir, 'cleaner.py')))
47
+ return srcCleanerDir;
48
+ return null;
49
+ }
50
+ function getDoclingMarkdownFromUv(filePath) {
51
+ const cleanerDir = resolveDoclingCleanerDir();
52
+ if (!cleanerDir) {
53
+ logger.warn('Docling cleaner assets not found. Ensure the package includes dist/docling-cleaner.');
54
+ return null;
55
+ }
56
+ try {
57
+ return execFileSync('uv', ['run', 'python', 'cleaner.py', filePath], {
58
+ cwd: cleanerDir,
59
+ encoding: 'utf-8',
60
+ timeout: 120000,
61
+ stdio: ['pipe', 'pipe', 'pipe'],
62
+ });
63
+ }
64
+ catch (error) {
65
+ logger.warn({ err: error }, 'Docling uv fallback failed. Install uv and run in packages/core/src/docling-cleaner.');
66
+ return null;
67
+ }
68
+ }
69
+ export async function getDoclingMarkdown(filePath) {
70
+ const binaryPath = await resolveDoclingBinary();
71
+ if (binaryPath) {
72
+ try {
73
+ return execFileSync(binaryPath, [filePath], {
74
+ encoding: 'utf-8',
75
+ timeout: 120000,
76
+ stdio: ['pipe', 'pipe', 'pipe'],
77
+ });
78
+ }
79
+ catch (error) {
80
+ logger.warn({ err: error }, 'Docling binary failed, attempting uv fallback');
81
+ }
82
+ }
83
+ return getDoclingMarkdownFromUv(filePath);
84
+ }
85
+ //# sourceMappingURL=docling-runners.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"docling-runners.js","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAEtC,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAC3C,KAAK,UAAU,oBAAoB;IACjC,MAAM,WAAW,GAAG,GAAG,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;IAC1D,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,iBAAiB,CAAC;IAE5F,IAAI,CAAC;QACH,MAAM,EAAE,oBAAoB,EAAE,GAAG,MAAM,MAAM,CAAC,6BAA6B,CAAC,CAAC;QAC7E,MAAM,UAAU,GAAG,MAAM,oBAAoB,EAAE,CAAC;QAChD,IAAI,UAAU,IAAI,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YACzC,IAAI,CAAC;gBACH,IAAI,QAAQ,CAAC,UAAU,CAAC,CAAC,MAAM,EAAE;oBAAE,OAAO,UAAU,CAAC;YACvD,CAAC;YAAC,MAAM,CAAC;gBACP,wBAAwB;YAC1B,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,oDAAoD,CAAC,CAAC;IACpF,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAC7B,SAAS,EACT,IAAI,EACJ,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,UAAU,CAAC;IAE9C,MAAM,gBAAgB,GAAG,IAAI,CAAC,OAAO,CACnC,SAAS,EACT,IAAI,EACJ,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,iBAAiB,EACjB,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,gBAAgB,CAAC;QAAE,OAAO,gBAAgB,CAAC;IAE1D,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAC5B,SAAS,EACT,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO,SAAS,CAAC;IAE5C,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,CAClC,SAAS,EACT,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,iBAAiB,EACjB,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,eAAe,CAAC;QAAE,OAAO,eAAe,CAAC;IAExD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,wBAAwB;IAC/B,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,iBAAiB,CAAC,CAAC;IACxE,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,YAAY,CAAC,CAAC;QAAE,OAAO,cAAc,CAAC;IAE/E,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,iBAAiB,CAAC,CAAC;IACpF,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,YAAY,CAAC,CAAC;QAAE,OAAO,aAAa,CAAC;IAE7E,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,wBAAwB,CAAC,QAAgB;IAChD,MAAM,UAAU,GAAG,wBAAwB,EAAE,CAAC;IAC9C,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,MAAM,CAAC,IAAI,CACT,qFAAqF,CACtF,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,CAAC;QACH,OAAO,YAAY,CAAC,IAAI,EAAE,CAAC,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,CAAC,EAAE;YACnE,GAAG,EAAE,UAAU;YACf,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,MAAM;YACf,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CACT,EAAE,GAAG,EAAE,KAAK,EAAE,EACd,sFAAsF,CACvF,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,QAAgB;IACvD,MAAM,UAAU,GAAG,MAAM,oBAAoB,EAAE,CAAC;IAChD,IAAI,UAAU,EAAE,CAAC;QACf,IAAI,CAAC;YACH,OAAO,YAAY,CAAC,UAAU,EAAE,CAAC,QAAQ,CAAC,EAAE;gBAC1C,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,MAAM;gBACf,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;aAChC,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,+CAA+C,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;IAED,OAAO,wBAAwB,CAAC,QAAQ,CAAC,CAAC;AAC5C,CAAC"}
@@ -1,6 +1,6 @@
1
- import { ParsedLesson } from '../schemas/index.js';
1
+ import { Lesson } from '../schemas/index.js';
2
2
  export interface ParseResult {
3
- data: ParsedLesson;
3
+ data: Lesson;
4
4
  sourcePath: string;
5
5
  }
6
6
  export declare function parseDocx(filePath: string): Promise<ParseResult>;
@@ -1 +1 @@
1
- {"version":3,"file":"docx-parser.d.ts","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAIA,OAAO,EAAsB,YAAY,EAAiB,MAAM,qBAAqB,CAAC;AAKtF,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,YAAY,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AA4BD,wBAAsB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAkFtE"}
1
+ {"version":3,"file":"docx-parser.d.ts","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,MAAM,EAAiB,MAAM,qBAAqB,CAAC;AAoB5D,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;CACpB;AA6BD,wBAAsB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CA6JtE"}
@@ -2,13 +2,19 @@ import fs from 'fs/promises';
2
2
  import mammoth from 'mammoth';
3
3
  import { generateText, Output } from 'ai';
4
4
  import { createGoogleGenerativeAI } from '@ai-sdk/google';
5
- import { ParsedLessonSchema } from '../schemas/index.js';
6
5
  import { logger } from '../logger.js';
7
6
  import { extractLanguageFromFooter } from './footer-parser.js';
8
- // Setup google generative ai
9
- const google = createGoogleGenerativeAI({
10
- apiKey: process.env.GEMINI_API_KEY,
11
- });
7
+ import { LessonLLMSchema, LessonLLMSchemaWithoutLanguage, } from '../schemas/lesson.js';
8
+ import { parseDoclingMarkdown, assignImagesToSlots } from './docling-parser.js';
9
+ import { getDoclingMarkdown } from './docling-runners.js';
10
+ import { buildDocxParserPrompt, docxParserSystemPrompt } from './prompts.js';
11
+ import { formatDocumentCode } from '../agents/code-formatter.js';
12
+ import { enrichDebugIssues, inferLessonType, normaliseLessonContent, normaliseLessonForType, } from './post-processors.js';
13
+ function getGoogleClient() {
14
+ return createGoogleGenerativeAI({
15
+ apiKey: process.env.GEMINI_API_KEY,
16
+ });
17
+ }
12
18
  // Extract images from docx as base64 data URIs
13
19
  async function extractImages(buffer) {
14
20
  const images = [];
@@ -27,63 +33,131 @@ async function extractImages(buffer) {
27
33
  export async function parseDocx(filePath) {
28
34
  logger.info(`Parsing: ${filePath}`);
29
35
  const buffer = await fs.readFile(filePath);
30
- // Extract text, images, and footer language in parallel
31
- const [{ value: text }, allImages, footerLanguage] = await Promise.all([
32
- mammoth.extractRawText({ buffer }),
36
+ // Extract images and footer language in parallel, try docling markdown
37
+ const [allImages, footerLanguage, doclingMarkdown] = await Promise.all([
33
38
  extractImages(buffer),
34
39
  extractLanguageFromFooter(filePath),
40
+ getDoclingMarkdown(filePath),
35
41
  ]);
36
- logger.info(text);
37
- logger.info(allImages);
38
- logger.info(`Extracted ${text.length} characters and ${allImages.length} images`);
42
+ // Parse docling markdown to get sections with image placeholders
43
+ let parsedSections = null;
44
+ let textForLLM;
45
+ if (doclingMarkdown) {
46
+ parsedSections = parseDoclingMarkdown(doclingMarkdown);
47
+ assignImagesToSlots(parsedSections, allImages);
48
+ logger.debug({
49
+ prefaceImageSlots: parsedSections.preface.imageSlots.length,
50
+ addYourCodeImageSlots: parsedSections.addYourCode.imageSlots.length,
51
+ totalImages: allImages.length,
52
+ }, 'Docling image slots parsed');
53
+ textForLLM = doclingMarkdown;
54
+ logger.info('Using docling markdown with placeholder-based image mapping');
55
+ logger.info(textForLLM);
56
+ logger.info('Formatting document code blocks with agent');
57
+ textForLLM = await formatDocumentCode(doclingMarkdown, footerLanguage);
58
+ logger.info(textForLLM);
59
+ }
60
+ else {
61
+ const { value: text } = await mammoth.extractRawText({ buffer });
62
+ textForLLM = text;
63
+ logger.info('Falling back to mammoth text extraction');
64
+ logger.info(textForLLM);
65
+ }
66
+ logger.info(`Extracted ${textForLLM.length} characters and ${allImages.length} images`);
39
67
  if (footerLanguage) {
40
68
  logger.info(`Programming language from footer: ${footerLanguage}`);
41
69
  }
42
70
  else {
43
71
  logger.warn('Footer language not found');
44
72
  }
45
- // First image is project cover, rest are for code steps
46
- const projectImage = allImages.length > 0 ? allImages[0] : null;
47
- const stepImages = allImages.slice(1);
48
- logger.info(`Found ${stepImages.length} step images, projectImage: ${projectImage ? 'yes' : 'no'}`);
49
- // Create schema for LLM - omit programmingLanguage if found in footer
50
- const llmSchema = footerLanguage
51
- ? ParsedLessonSchema.omit({ programmingLanguage: true })
52
- : ParsedLessonSchema;
53
- // Use LLM to extract structured data
54
- const { output } = await generateText({
55
- model: google('gemini-2.0-flash'),
56
- output: Output.object({
57
- schema: llmSchema,
58
- }),
59
- prompt: `Extract structured lesson data from this educational document.
60
-
61
- This is a programming lesson sheet for students. Extract all the relevant sections and content.
62
-
63
- If a section is not present in the document, use empty arrays for array fields, empty strings for required string fields, and null for nullable fields.
64
-
65
- For the addYourCodeSection, each step should be a clear instruction. Set image to null for all steps (images will be added separately).
66
-
67
- Document content:
68
- ${text}`,
73
+ // If we find the programming language in the footer, we don't need the LLM
74
+ // to tell us.
75
+ const llmSchema = footerLanguage ? LessonLLMSchemaWithoutLanguage : LessonLLMSchema;
76
+ let output;
77
+ try {
78
+ // Use LLM to extract structured data
79
+ const response = await generateText({
80
+ model: getGoogleClient()('gemini-2.5-pro'),
81
+ output: Output.object({
82
+ schema: llmSchema,
83
+ }),
84
+ system: docxParserSystemPrompt,
85
+ prompt: buildDocxParserPrompt(textForLLM),
86
+ temperature: 0,
87
+ maxRetries: 5,
88
+ });
89
+ output = response.output;
90
+ }
91
+ catch (error) {
92
+ const err = error;
93
+ logger.error({ err, filePath }, 'LLM extraction failed');
94
+ const issues = err?.cause?.issues ?? err?.issues;
95
+ if (issues) {
96
+ logger.error({ issues, filePath }, 'LLM schema validation issues');
97
+ }
98
+ const value = err?.cause?.value ?? err?.value;
99
+ if (value) {
100
+ logger.error({ value, filePath }, 'LLM output that failed validation');
101
+ }
102
+ throw error;
103
+ }
104
+ // Infer the lesson type with heuristic
105
+ const dataWithoutType = normaliseLessonContent(output);
106
+ let data = normaliseLessonForType({
107
+ ...dataWithoutType,
108
+ lessonType: inferLessonType(textForLLM, footerLanguage, dataWithoutType),
69
109
  });
70
- logger.info(output);
71
- logger.info(`Successfully extracted lesson: ${output.topic} - ${output.project}`);
72
- // Post-process: assign images and programming language to the extracted data
73
- const data = output;
74
- data.projectImage = projectImage;
75
- // Set programming language from footer if found, otherwise use LLM's determination
110
+ data = enrichDebugIssues(textForLLM, data);
111
+ logger.info(`Inferred lesson type as: '${data.lessonType}'`);
112
+ logger.info(`Successfully extracted lesson: ${data.topic} - ${data.project}`);
113
+ // Set programming language from footer if found
76
114
  if (footerLanguage) {
77
115
  data.programmingLanguage = footerLanguage;
78
116
  }
79
- // Assign step images in order
80
- const addSection = data.addYourCodeSection;
81
- if (stepImages.length > 0 && Array.isArray(addSection)) {
82
- const isStepWithImageArray = addSection.every((item) => typeof item === 'object' && item !== null && 'step' in item);
83
- if (isStepWithImageArray) {
84
- addSection.forEach((step, index) => {
85
- step.image = stepImages[index] ?? null;
86
- });
117
+ // Assign images using placeholder-based mapping if available
118
+ if (parsedSections && data.lessonType !== 'debugging lesson') {
119
+ // Assign preface image slots
120
+ if (parsedSections.preface.imageSlots.length > 0) {
121
+ data.prefaceImageSlots = parsedSections.preface.imageSlots;
122
+ }
123
+ // Assign Add Your Code step images
124
+ const addSection = data.addYourCodeSection;
125
+ if (Array.isArray(addSection) && parsedSections.addYourCode.imageSlots.length > 0) {
126
+ const isStepWithImageArray = addSection.every((item) => typeof item === 'object' && item !== null && 'step' in item);
127
+ if (isStepWithImageArray) {
128
+ const slots = parsedSections.addYourCode.imageSlots;
129
+ addSection.forEach((step, index) => {
130
+ if (index < slots.length) {
131
+ step.imageSlot = {
132
+ id: slots[index].id,
133
+ base64: slots[index].base64,
134
+ };
135
+ }
136
+ });
137
+ }
138
+ }
139
+ }
140
+ else if (data.lessonType !== 'debugging lesson') {
141
+ // Fallback using old behavior, first image is project, rest are steps
142
+ // Not good if there are multiple images in preface section
143
+ logger.warn('Falling back to old image assignment behaviour');
144
+ if (allImages.length > 0) {
145
+ data.prefaceImageSlots = [{ id: 'fallback_preface_img_1', base64: allImages[0] }];
146
+ }
147
+ const stepImages = allImages.slice(1);
148
+ const addSection = data.addYourCodeSection;
149
+ if (stepImages.length > 0 && Array.isArray(addSection)) {
150
+ const isStepWithImageArray = addSection.every((item) => typeof item === 'object' && item !== null && 'step' in item);
151
+ if (isStepWithImageArray) {
152
+ addSection.forEach((step, index) => {
153
+ if (stepImages[index]) {
154
+ step.imageSlot = {
155
+ id: `fallback_img_${index + 1}`,
156
+ base64: stepImages[index],
157
+ };
158
+ }
159
+ });
160
+ }
87
161
  }
88
162
  }
89
163
  logger.info(data);
@@ -1 +1 @@
1
- {"version":3,"file":"docx-parser.js","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,IAAI,CAAC;AAC1C,OAAO,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,kBAAkB,EAA+B,MAAM,qBAAqB,CAAC;AACtF,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AACtC,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAQ/D,6BAA6B;AAC7B,MAAM,MAAM,GAAG,wBAAwB,CAAC;IACtC,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;CACnC,CAAC,CAAC;AAEH,+CAA+C;AAC/C,KAAK,UAAU,aAAa,CAAC,MAAc;IACzC,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,MAAM,OAAO,CAAC,aAAa,CACzB,EAAE,MAAM,EAAE,EACV;QACE,YAAY,EAAE,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;YACtD,MAAM,WAAW,GAAG,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;YACvC,MAAM,MAAM,GAAG,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAC9C,MAAM,OAAO,GAAG,QAAQ,KAAK,CAAC,WAAW,WAAW,MAAM,EAAE,CAAC;YAC7D,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC;QAC1B,CAAC,CAAC;KACH,CACF,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,6CAA6C;AAC7C,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,QAAgB;IAC9C,MAAM,CAAC,IAAI,CAAC,YAAY,QAAQ,EAAE,CAAC,CAAC;IAEpC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAE3C,wDAAwD;IACxD,MAAM,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,cAAc,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QACrE,OAAO,CAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;QAClC,aAAa,CAAC,MAAM,CAAC;QACrB,yBAAyB,CAAC,QAAQ,CAAC;KACpC,CAAC,CAAC;IACH,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAClB,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACvB,MAAM,CAAC,IAAI,CAAC,aAAa,IAAI,CAAC,MAAM,mBAAmB,SAAS,CAAC,MAAM,SAAS,CAAC,CAAC;IAClF,IAAI,cAAc,EAAE,CAAC;QACnB,MAAM,CAAC,IAAI,CAAC,qCAAqC,cAAc,EAAE,CAAC,CAAC;IACrE,CAAC;SAAM,CAAC;QACN,MAAM,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IAC3C,CAAC;IAED,wDAAwD;IACxD,MAAM,YAAY,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAChE,MAAM,UAAU,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAEtC,MAAM,CAAC,IAAI,CACT,SAAS,UAAU,CAAC,MAAM,+BAA+B,YAAY,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CACvF,CAAC;IAEF,sEAAsE;IACtE,MAAM,SAAS,GAAG,cAAc;QAC9B,CAAC,CAAC,kBAAkB,CAAC,IAAI,CAAC,EAAE,mBAAmB,EAAE,IAAI,EAAE,CAAC;QACxD,CAAC,CAAC,kBAAkB,CAAC;IAEvB,qCAAqC;IACrC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,YAAY,CAAC;QACpC,KAAK,EAAE,MAAM,CAAC,kBAAkB,CAAC;QACjC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC;YACpB,MAAM,EAAE,SAAS;SAClB,CAAC;QACF,MAAM,EAAE;;;;;;;;;EASV,IAAI,EAAE;KACL,CAAC,CAAC;IAEH,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACpB,MAAM,CAAC,IAAI,CAAC,kCAAkC,MAAO,CAAC,KAAK,MAAM,MAAO,CAAC,OAAO,EAAE,CAAC,CAAC;IAEpF,6EAA6E;IAC7E,MAAM,IAAI,GAAG,MAAsB,CAAC;IACpC,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;IAEjC,mFAAmF;IACnF,IAAI,cAAc,EAAE,CAAC;QACnB,IAAI,CAAC,mBAAmB,GAAG,cAAqC,CAAC;IACnE,CAAC;IAED,8BAA8B;IAC9B,MAAM,UAAU,GAAG,IAAI,CAAC,kBAAkB,CAAC;IAC3C,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;QACvD,MAAM,oBAAoB,GAAG,UAAU,CAAC,KAAK,CAC3C,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,IAAI,MAAM,IAAI,IAAI,CACtE,CAAC;QAEF,IAAI,oBAAoB,EAAE,CAAC;YACxB,UAA8B,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;gBACtD,IAAI,CAAC,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC;YACzC,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IACD,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAElB,OAAO;QACL,IAAI;QACJ,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"docx-parser.js","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,IAAI,CAAC;AAC1C,OAAO,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;AAE1D,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AACtC,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAC/D,OAAO,EAEL,eAAe,EACf,8BAA8B,GAE/B,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAChF,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAC1D,OAAO,EAAE,qBAAqB,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAC;AAC7E,OAAO,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AACjE,OAAO,EACL,iBAAiB,EACjB,eAAe,EACf,sBAAsB,EACtB,sBAAsB,GACvB,MAAM,sBAAsB,CAAC;AAO9B,SAAS,eAAe;IACtB,OAAO,wBAAwB,CAAC;QAC9B,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;KACnC,CAAC,CAAC;AACL,CAAC;AAED,+CAA+C;AAC/C,KAAK,UAAU,aAAa,CAAC,MAAc;IACzC,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,MAAM,OAAO,CAAC,aAAa,CACzB,EAAE,MAAM,EAAE,EACV;QACE,YAAY,EAAE,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;YACtD,MAAM,WAAW,GAAG,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;YACvC,MAAM,MAAM,GAAG,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAC9C,MAAM,OAAO,GAAG,QAAQ,KAAK,CAAC,WAAW,WAAW,MAAM,EAAE,CAAC;YAC7D,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC;QAC1B,CAAC,CAAC;KACH,CACF,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,6CAA6C;AAC7C,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,QAAgB;IAC9C,MAAM,CAAC,IAAI,CAAC,YAAY,QAAQ,EAAE,CAAC,CAAC;IAEpC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAE3C,uEAAuE;IACvE,MAAM,CAAC,SAAS,EAAE,cAAc,EAAE,eAAe,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QACrE,aAAa,CAAC,MAAM,CAAC;QACrB,yBAAyB,CAAC,QAAQ,CAAC;QACnC,kBAAkB,CAAC,QAAQ,CAAC;KAC7B,CAAC,CAAC;IAEH,iEAAiE;IACjE,IAAI,cAAc,GAAG,IAAI,CAAC;IAC1B,IAAI,UAAkB,CAAC;IAEvB,IAAI,eAAe,EAAE,CAAC;QACpB,cAAc,GAAG,oBAAoB,CAAC,eAAe,CAAC,CAAC;QACvD,mBAAmB,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;QAC/C,MAAM,CAAC,KAAK,CACV;YACE,iBAAiB,EAAE,cAAc,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM;YAC3D,qBAAqB,EAAE,cAAc,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM;YACnE,WAAW,EAAE,SAAS,CAAC,MAAM;SAC9B,EACD,4BAA4B,CAC7B,CAAC;QACF,UAAU,GAAG,eAAe,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,6DAA6D,CAAC,CAAC;QAC3E,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACxB,MAAM,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;QAC1D,UAAU,GAAG,MAAM,kBAAkB,CAAC,eAAe,EAAE,cAAc,CAAC,CAAC;QACvE,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC1B,CAAC;SAAM,CAAC;QACN,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QACjE,UAAU,GAAG,IAAI,CAAC;QAClB,MAAM,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;QACvD,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC1B,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,aAAa,UAAU,CAAC,MAAM,mBAAmB,SAAS,CAAC,MAAM,SAAS,CAAC,CAAC;IACxF,IAAI,cAAc,EAAE,CAAC;QACnB,MAAM,CAAC,IAAI,CAAC,qCAAqC,cAAc,EAAE,CAAC,CAAC;IACrE,CAAC;SAAM,CAAC;QACN,MAAM,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IAC3C,CAAC;IAED,2EAA2E;IAC3E,cAAc;IACd,MAAM,SAAS,GAAG,cAAc,CAAC,CAAC,CAAC,8BAA8B,CAAC,CAAC,CAAC,eAAe,CAAC;IAEpF,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,qCAAqC;QACrC,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC;YAClC,KAAK,EAAE,eAAe,EAAE,CAAC,gBAAgB,CAAC;YAC1C,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,SAAS;aAClB,CAAC;YACF,MAAM,EAAE,sBAAsB;YAC9B,MAAM,EAAE,qBAAqB,CAAC,UAAU,CAAC;YACzC,WAAW,EAAE,CAAC;YACd,UAAU,EAAE,CAAC;SACd,CAAC,CAAC;QACH,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;IAC3B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,GAAG,GAAG,KAAY,CAAC;QACzB,MAAM,CAAC,KAAK,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,EAAE,uBAAuB,CAAC,CAAC;QAEzD,MAAM,MAAM,GAAG,GAAG,EAAE,KAAK,EAAE,MAAM,IAAI,GAAG,EAAE,MAAM,CAAC;QACjD,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,8BAA8B,CAAC,CAAC;QACrE,CAAC;QAED,MAAM,KAAK,GAAG,GAAG,EAAE,KAAK,EAAE,KAAK,IAAI,GAAG,EAAE,KAAK,CAAC;QAC9C,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,CAAC,KAAK,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE,mCAAmC,CAAC,CAAC;QACzE,CAAC;QAED,MAAM,KAAK,CAAC;IACd,CAAC;IAED,uCAAuC;IACvC,MAAM,eAAe,GAAG,sBAAsB,CAAC,MAAmB,CAAC,CAAC;IACpE,IAAI,IAAI,GAAG,sBAAsB,CAAC;QAChC,GAAG,eAAe;QAClB,UAAU,EAAE,eAAe,CAAC,UAAU,EAAE,cAAqC,EAAE,eAAe,CAAC;KACtF,CAAC,CAAC;IACb,IAAI,GAAG,iBAAiB,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;IAC3C,MAAM,CAAC,IAAI,CAAC,6BAA6B,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC;IAC7D,MAAM,CAAC,IAAI,CAAC,kCAAkC,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;IAE9E,gDAAgD;IAChD,IAAI,cAAc,EAAE,CAAC;QACnB,IAAI,CAAC,mBAAmB,GAAG,cAAqC,CAAC;IACnE,CAAC;IAED,6DAA6D;IAC7D,IAAI,cAAc,IAAI,IAAI,CAAC,UAAU,KAAK,kBAAkB,EAAE,CAAC;QAC7D,6BAA6B;QAC7B,IAAI,cAAc,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjD,IAAI,CAAC,iBAAiB,GAAG,cAAc,CAAC,OAAO,CAAC,UAAU,CAAC;QAC7D,CAAC;QAED,mCAAmC;QACnC,MAAM,UAAU,GAAG,IAAI,CAAC,kBAAkB,CAAC;QAC3C,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI,cAAc,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClF,MAAM,oBAAoB,GAAG,UAAU,CAAC,KAAK,CAC3C,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,IAAI,MAAM,IAAI,IAAI,CACtE,CAAC;YAEF,IAAI,oBAAoB,EAAE,CAAC;gBACzB,MAAM,KAAK,GAAG,cAAc,CAAC,WAAW,CAAC,UAAU,CAAC;gBACnD,UAA8B,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;oBACtD,IAAI,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;wBACzB,IAAI,CAAC,SAAS,GAAG;4BACf,EAAE,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE;4BACnB,MAAM,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM;yBAC5B,CAAC;oBACJ,CAAC;gBACH,CAAC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;SAAM,IAAI,IAAI,CAAC,UAAU,KAAK,kBAAkB,EAAE,CAAC;QAClD,sEAAsE;QACtE,2DAA2D;QAC3D,MAAM,CAAC,IAAI,CAAC,gDAAgD,CAAC,CAAC;QAC9D,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,IAAI,CAAC,iBAAiB,GAAG,CAAC,EAAE,EAAE,EAAE,wBAAwB,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACpF,CAAC;QACD,MAAM,UAAU,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAEtC,MAAM,UAAU,GAAG,IAAI,CAAC,kBAAkB,CAAC;QAC3C,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;YACvD,MAAM,oBAAoB,GAAG,UAAU,CAAC,KAAK,CAC3C,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,IAAI,MAAM,IAAI,IAAI,CACtE,CAAC;YAEF,IAAI,oBAAoB,EAAE,CAAC;gBACxB,UAA8B,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;oBACtD,IAAI,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;wBACtB,IAAI,CAAC,SAAS,GAAG;4BACf,EAAE,EAAE,gBAAgB,KAAK,GAAG,CAAC,EAAE;4BAC/B,MAAM,EAAE,UAAU,CAAC,KAAK,CAAC;yBAC1B,CAAC;oBACJ,CAAC;gBACH,CAAC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAElB,OAAO;QACL,IAAI;QACJ,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC"}
@@ -2,4 +2,7 @@ export { findDocxFiles } from './file-discovery.js';
2
2
  export type { DiscoveryOptions, DiscoveredFile } from './file-discovery.js';
3
3
  export { parseDocx } from './docx-parser.js';
4
4
  export type { ParseResult } from './docx-parser.js';
5
+ export { parseDoclingMarkdown, assignImagesToSlots } from './docling-parser.js';
6
+ export type { ParsedSection, DoclingParsedSections } from './docling-parser.js';
7
+ export { getDoclingMarkdown } from './docling-runners.js';
5
8
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/parsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,YAAY,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAE5E,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,YAAY,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/parsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,YAAY,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAE5E,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,YAAY,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAEpD,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAChF,YAAY,EAAE,aAAa,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAEhF,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC"}
@@ -1,3 +1,5 @@
1
1
  export { findDocxFiles } from './file-discovery.js';
2
2
  export { parseDocx } from './docx-parser.js';
3
+ export { parseDoclingMarkdown, assignImagesToSlots } from './docling-parser.js';
4
+ export { getDoclingMarkdown } from './docling-runners.js';
3
5
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/parsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/parsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAG7C,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAGhF,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC"}
@@ -1,2 +1,3 @@
1
1
  export declare function normaliseCodeBlock(code: string | null): string | null;
2
+ export declare function normaliseText(text: string | null): string | null;
2
3
  //# sourceMappingURL=normalise.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"normalise.d.ts","sourceRoot":"","sources":["../../src/parsing/normalise.ts"],"names":[],"mappings":"AACA,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,GAAG,MAAM,GAAG,IAAI,CAQrE"}
1
+ {"version":3,"file":"normalise.d.ts","sourceRoot":"","sources":["../../src/parsing/normalise.ts"],"names":[],"mappings":"AA0CA,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,GAAG,MAAM,GAAG,IAAI,CAcrE;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,GAAG,MAAM,GAAG,IAAI,CAMhE"}
@@ -1,12 +1,52 @@
1
1
  // Removes extra new lines and stuff in code blocks
2
+ function reflowSingleLineCode(code) {
3
+ if (code.includes('\n') || code.length < 80)
4
+ return code;
5
+ let reflowed = code
6
+ .replace(/;\s*/g, ';\n')
7
+ .replace(/{\s*/g, '{\n')
8
+ .replace(/}\s*/g, '}\n')
9
+ .replace(/\belse\b\s*/g, 'else\n');
10
+ if (!reflowed.includes('\n') &&
11
+ /\b(for|if|while|def|class)\b/.test(reflowed) &&
12
+ !/https?:\/\//i.test(reflowed)) {
13
+ reflowed = reflowed.replace(/:\s*/g, ':\n');
14
+ }
15
+ if (!reflowed.includes('\n') && /GraphicsWindow\./.test(reflowed)) {
16
+ reflowed = reflowed.replace(/GraphicsWindow\./g, '\nGraphicsWindow.').trim();
17
+ }
18
+ if (!reflowed.includes('\n') &&
19
+ /(\bfor\b|\bif\b|\bwhile\b|\bdef\b|\bclass\b|\belif\b|\belse\b)/.test(reflowed)) {
20
+ reflowed = reflowed.replace(/(\bfor\b|\bif\b|\bwhile\b|\bdef\b|\bclass\b|\belif\b|\belse\b)/g, '\n$1');
21
+ }
22
+ if (!reflowed.includes('\n') && /(System\.out|printf\(|println\()/.test(reflowed)) {
23
+ reflowed = reflowed
24
+ .replace(/System\.out/g, '\nSystem.out')
25
+ .replace(/printf\(/g, '\nprintf(')
26
+ .replace(/println\(/g, '\nprintln(');
27
+ }
28
+ return reflowed;
29
+ }
2
30
  export function normaliseCodeBlock(code) {
3
31
  if (!code)
4
32
  return code;
5
- return code
33
+ const hasRealNewlines = code.includes('\n');
34
+ const hasEscapedNewlines = code.includes('\\n');
35
+ const normalisedInput = !hasRealNewlines && hasEscapedNewlines ? code.replace(/\\n/g, '\n') : code;
36
+ const reflowed = reflowSingleLineCode(normalisedInput);
37
+ return reflowed
6
38
  .replace(/\r\n/g, '\n')
7
39
  .split('\n')
8
40
  .map((line) => line.replace(/\s+$/g, ''))
9
41
  .join('\n')
10
42
  .replace(/\n{2,}/g, '\n');
11
43
  }
44
+ export function normaliseText(text) {
45
+ if (!text)
46
+ return text;
47
+ return text
48
+ .replace(/```(?:\w+)?/g, '')
49
+ .replace(/<\/?code>/g, '')
50
+ .trim();
51
+ }
12
52
  //# sourceMappingURL=normalise.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"normalise.js","sourceRoot":"","sources":["../../src/parsing/normalise.ts"],"names":[],"mappings":"AAAA,mDAAmD;AACnD,MAAM,UAAU,kBAAkB,CAAC,IAAmB;IACpD,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,OAAO,IAAI;SACR,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;SACtB,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;SACxC,IAAI,CAAC,IAAI,CAAC;SACV,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;AAC9B,CAAC"}
1
+ {"version":3,"file":"normalise.js","sourceRoot":"","sources":["../../src/parsing/normalise.ts"],"names":[],"mappings":"AAAA,mDAAmD;AACnD,SAAS,oBAAoB,CAAC,IAAY;IACxC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,IAAI,CAAC;IAEzD,IAAI,QAAQ,GAAG,IAAI;SAChB,OAAO,CAAC,OAAO,EAAE,KAAK,CAAC;SACvB,OAAO,CAAC,OAAO,EAAE,KAAK,CAAC;SACvB,OAAO,CAAC,OAAO,EAAE,KAAK,CAAC;SACvB,OAAO,CAAC,cAAc,EAAE,QAAQ,CAAC,CAAC;IAErC,IACE,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC;QACxB,8BAA8B,CAAC,IAAI,CAAC,QAAQ,CAAC;QAC7C,CAAC,cAAc,CAAC,IAAI,CAAC,QAAQ,CAAC,EAC9B,CAAC;QACD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;IAC9C,CAAC;IAED,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,kBAAkB,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QAClE,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,mBAAmB,EAAE,mBAAmB,CAAC,CAAC,IAAI,EAAE,CAAC;IAC/E,CAAC;IAED,IACE,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC;QACxB,gEAAgE,CAAC,IAAI,CAAC,QAAQ,CAAC,EAC/E,CAAC;QACD,QAAQ,GAAG,QAAQ,CAAC,OAAO,CACzB,iEAAiE,EACjE,MAAM,CACP,CAAC;IACJ,CAAC;IAED,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,kCAAkC,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QAClF,QAAQ,GAAG,QAAQ;aAChB,OAAO,CAAC,cAAc,EAAE,cAAc,CAAC;aACvC,OAAO,CAAC,WAAW,EAAE,WAAW,CAAC;aACjC,OAAO,CAAC,YAAY,EAAE,YAAY,CAAC,CAAC;IACzC,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,IAAmB;IACpD,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,MAAM,eAAe,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC5C,MAAM,kBAAkB,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;IAChD,MAAM,eAAe,GACnB,CAAC,eAAe,IAAI,kBAAkB,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC7E,MAAM,QAAQ,GAAG,oBAAoB,CAAC,eAAe,CAAC,CAAC;IAEvD,OAAO,QAAQ;SACZ,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC;SACtB,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;SACxC,IAAI,CAAC,IAAI,CAAC;SACV,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;AAC9B,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,IAAmB;IAC/C,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,OAAO,IAAI;SACR,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC;SAC3B,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC;SACzB,IAAI,EAAE,CAAC;AACZ,CAAC"}