@digimakers/core 0.1.4 → 0.3.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/agents/code-formatter.d.ts +2 -0
  2. package/dist/agents/code-formatter.d.ts.map +1 -0
  3. package/dist/agents/code-formatter.js +23 -0
  4. package/dist/agents/code-formatter.js.map +1 -0
  5. package/dist/docling-cleaner/cleaner.py +34 -0
  6. package/dist/docling-cleaner/pyproject.toml +10 -0
  7. package/dist/docling-cleaner/uv.lock +2077 -0
  8. package/dist/index.d.ts +2 -3
  9. package/dist/index.d.ts.map +1 -1
  10. package/dist/index.js +1 -5
  11. package/dist/index.js.map +1 -1
  12. package/dist/parsing/docling-parser.d.ts +17 -0
  13. package/dist/parsing/docling-parser.d.ts.map +1 -0
  14. package/dist/parsing/docling-parser.js +109 -0
  15. package/dist/parsing/docling-parser.js.map +1 -0
  16. package/dist/parsing/docling-runners.d.ts +2 -0
  17. package/dist/parsing/docling-runners.d.ts.map +1 -0
  18. package/dist/parsing/docling-runners.js +85 -0
  19. package/dist/parsing/docling-runners.js.map +1 -0
  20. package/dist/parsing/docx-parser.d.ts +2 -2
  21. package/dist/parsing/docx-parser.d.ts.map +1 -1
  22. package/dist/parsing/docx-parser.js +130 -37
  23. package/dist/parsing/docx-parser.js.map +1 -1
  24. package/dist/parsing/footer-parser.d.ts +7 -0
  25. package/dist/parsing/footer-parser.d.ts.map +1 -0
  26. package/dist/parsing/footer-parser.js +49 -0
  27. package/dist/parsing/footer-parser.js.map +1 -0
  28. package/dist/parsing/index.d.ts +3 -0
  29. package/dist/parsing/index.d.ts.map +1 -1
  30. package/dist/parsing/index.js +2 -0
  31. package/dist/parsing/index.js.map +1 -1
  32. package/dist/parsing/normalise.d.ts +3 -0
  33. package/dist/parsing/normalise.d.ts.map +1 -0
  34. package/dist/parsing/normalise.js +52 -0
  35. package/dist/parsing/normalise.js.map +1 -0
  36. package/dist/parsing/post-processors.d.ts +6 -0
  37. package/dist/parsing/post-processors.d.ts.map +1 -0
  38. package/dist/parsing/post-processors.js +217 -0
  39. package/dist/parsing/post-processors.js.map +1 -0
  40. package/dist/parsing/prompts.d.ts +5 -0
  41. package/dist/parsing/prompts.d.ts.map +1 -0
  42. package/dist/parsing/prompts.js +24 -0
  43. package/dist/parsing/prompts.js.map +1 -0
  44. package/dist/pdf-generator.d.ts +18 -6
  45. package/dist/pdf-generator.d.ts.map +1 -1
  46. package/dist/pdf-generator.js +80 -70
  47. package/dist/pdf-generator.js.map +1 -1
  48. package/dist/sample-data.d.ts +2 -2
  49. package/dist/sample-data.d.ts.map +1 -1
  50. package/dist/sample-data.js +16 -10
  51. package/dist/sample-data.js.map +1 -1
  52. package/dist/schemas/index.d.ts +1 -1
  53. package/dist/schemas/index.d.ts.map +1 -1
  54. package/dist/schemas/index.js +1 -1
  55. package/dist/schemas/index.js.map +1 -1
  56. package/dist/schemas/lesson.d.ts +404 -44
  57. package/dist/schemas/lesson.d.ts.map +1 -1
  58. package/dist/schemas/lesson.js +117 -34
  59. package/dist/schemas/lesson.js.map +1 -1
  60. package/package.json +6 -3
package/dist/index.d.ts CHANGED
@@ -1,6 +1,5 @@
1
- export { StepWithImageSchema, StepsWithCodeBlockSchema, ChallengeSchema, NewProjectSchema, ParsedLessonSchema, GenerateOptionsSchema, type StepWithImage, type StepsWithCodeBlock, type Challenge, type NewProject, type ParsedLesson, type GenerateOptions, } from './schemas/index.js';
2
- export { createPdfGenerator } from './pdf-generator.js';
3
- export type { PdfGeneratorInstance } from './pdf-generator.js';
1
+ export { createPdfGenerator, convertWithConcurrency, POOL_SIZE } from './pdf-generator.js';
2
+ export type { PdfGeneratorInstance, FileToConvert, ConversionResult } from './pdf-generator.js';
4
3
  export { startServer, stopServer } from './server.js';
5
4
  export type { ServerInstance } from './server.js';
6
5
  export { logger } from './logger.js';
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,EAEL,mBAAmB,EACnB,wBAAwB,EACxB,eAAe,EACf,gBAAgB,EAChB,kBAAkB,EAClB,qBAAqB,EAErB,KAAK,aAAa,EAClB,KAAK,kBAAkB,EACvB,KAAK,SAAS,EACd,KAAK,UAAU,EACf,KAAK,YAAY,EACjB,KAAK,eAAe,GACrB,MAAM,oBAAoB,CAAC;AAG5B,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AACxD,YAAY,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAG/D,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACtD,YAAY,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAGlD,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAC9D,YAAY,EAAE,gBAAgB,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAGxF,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAC3F,YAAY,EAAE,oBAAoB,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AAGhG,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACtD,YAAY,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAGlD,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAC9D,YAAY,EAAE,gBAAgB,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAGxF,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC"}
package/dist/index.js CHANGED
@@ -1,10 +1,6 @@
1
1
  // Public API for @digimakers/core
2
- // Schemas and types (single source of truth)
3
- export {
4
- // Schemas
5
- StepWithImageSchema, StepsWithCodeBlockSchema, ChallengeSchema, NewProjectSchema, ParsedLessonSchema, GenerateOptionsSchema, } from './schemas/index.js';
6
2
  // PDF generation
7
- export { createPdfGenerator } from './pdf-generator.js';
3
+ export { createPdfGenerator, convertWithConcurrency, POOL_SIZE } from './pdf-generator.js';
8
4
  // Server
9
5
  export { startServer, stopServer } from './server.js';
10
6
  // Logger
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,kCAAkC;AAElC,6CAA6C;AAC7C,OAAO;AACL,UAAU;AACV,mBAAmB,EACnB,wBAAwB,EACxB,eAAe,EACf,gBAAgB,EAChB,kBAAkB,EAClB,qBAAqB,GAQtB,MAAM,oBAAoB,CAAC;AAE5B,iBAAiB;AACjB,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AAGxD,SAAS;AACT,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGtD,SAAS;AACT,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,UAAU;AACV,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAG9D,4BAA4B;AAC5B,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,kCAAkC;AAElC,iBAAiB;AACjB,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAG3F,SAAS;AACT,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGtD,SAAS;AACT,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,UAAU;AACV,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAG9D,4BAA4B;AAC5B,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAC"}
@@ -0,0 +1,17 @@
1
+ import { ImageSlot } from '../schemas/lesson.js';
2
+ export interface ParsedSection {
3
+ content: string;
4
+ imageSlots: ImageSlot[];
5
+ }
6
+ export interface DoclingParsedSections {
7
+ preface: ParsedSection;
8
+ getReady: ParsedSection;
9
+ addYourCode: ParsedSection;
10
+ tryItOut: ParsedSection;
11
+ challenge: ParsedSection;
12
+ testYourself: ParsedSection;
13
+ funFact: ParsedSection;
14
+ }
15
+ export declare function parseDoclingMarkdown(markdown: string): DoclingParsedSections;
16
+ export declare function assignImagesToSlots(sections: DoclingParsedSections, images: string[]): DoclingParsedSections;
17
+ //# sourceMappingURL=docling-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"docling-parser.d.ts","sourceRoot":"","sources":["../../src/parsing/docling-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,sBAAsB,CAAC;AAajD,MAAM,WAAW,aAAa;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,UAAU,EAAE,SAAS,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,aAAa,CAAC;IACvB,QAAQ,EAAE,aAAa,CAAC;IACxB,WAAW,EAAE,aAAa,CAAC;IAC3B,QAAQ,EAAE,aAAa,CAAC;IACxB,SAAS,EAAE,aAAa,CAAC;IACzB,YAAY,EAAE,aAAa,CAAC;IAC5B,OAAO,EAAE,aAAa,CAAC;CACxB;AAiDD,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,MAAM,GAAG,qBAAqB,CA8D5E;AAED,wBAAgB,mBAAmB,CACjC,QAAQ,EAAE,qBAAqB,EAC/B,MAAM,EAAE,MAAM,EAAE,GACf,qBAAqB,CAiBvB"}
@@ -0,0 +1,109 @@
1
+ const IMAGE_MARKER = '<!-- image -->';
2
+ const SECTION_HEADERS = {
3
+ getReady: /^##\s*Get\s*Ready/i,
4
+ addYourCode: /^##\s*(Add\s*Your\s*Code|My\s*First\s*Program)/i,
5
+ tryItOut: /^##\s*Try\s*It\s*Out/i,
6
+ challenge: /^##\s*Challenge/i,
7
+ testYourself: /^##\s*Test\s*Yourself/i,
8
+ funFact: /^##\s*Fun\s*Fact/i,
9
+ };
10
+ function countImageMarkers(content) {
11
+ return (content.match(new RegExp(IMAGE_MARKER, 'g')) || []).length;
12
+ }
13
+ function createImageSlots(count, prefix) {
14
+ return Array.from({ length: count }, (_, i) => ({
15
+ id: `${prefix}_img_${i + 1}`,
16
+ }));
17
+ }
18
+ function splitAtHeader(markdown, headerPattern) {
19
+ const lines = markdown.split('\n');
20
+ for (let i = 0; i < lines.length; i++) {
21
+ if (headerPattern.test(lines[i].trim())) {
22
+ return {
23
+ before: lines.slice(0, i).join('\n'),
24
+ after: lines.slice(i).join('\n'),
25
+ };
26
+ }
27
+ }
28
+ return null;
29
+ }
30
+ function extractSection(markdown, startPattern, endPatterns) {
31
+ const startSplit = splitAtHeader(markdown, startPattern);
32
+ if (!startSplit) {
33
+ return '';
34
+ }
35
+ let sectionContent = startSplit.after;
36
+ for (const endPattern of endPatterns) {
37
+ const endSplit = splitAtHeader(sectionContent, endPattern);
38
+ if (endSplit) {
39
+ sectionContent = endSplit.before;
40
+ break;
41
+ }
42
+ }
43
+ return sectionContent;
44
+ }
45
+ export function parseDoclingMarkdown(markdown) {
46
+ const sections = {
47
+ preface: { content: '', imageSlots: [] },
48
+ getReady: { content: '', imageSlots: [] },
49
+ addYourCode: { content: '', imageSlots: [] },
50
+ tryItOut: { content: '', imageSlots: [] },
51
+ challenge: { content: '', imageSlots: [] },
52
+ testYourself: { content: '', imageSlots: [] },
53
+ funFact: { content: '', imageSlots: [] },
54
+ };
55
+ // Extract preface (everything before Get Ready), with image slots
56
+ const getReadySplit = splitAtHeader(markdown, SECTION_HEADERS.getReady);
57
+ if (getReadySplit) {
58
+ sections.preface.content = getReadySplit.before.trim();
59
+ sections.preface.imageSlots = createImageSlots(countImageMarkers(sections.preface.content), 'preface');
60
+ }
61
+ // Extract Get Ready section, no images in this part
62
+ sections.getReady.content = extractSection(markdown, SECTION_HEADERS.getReady, [
63
+ SECTION_HEADERS.addYourCode,
64
+ SECTION_HEADERS.tryItOut,
65
+ SECTION_HEADERS.challenge,
66
+ SECTION_HEADERS.testYourself,
67
+ SECTION_HEADERS.funFact,
68
+ ]).trim();
69
+ // Extract Add Your Code section, with image slots
70
+ sections.addYourCode.content = extractSection(markdown, SECTION_HEADERS.addYourCode, [
71
+ SECTION_HEADERS.tryItOut,
72
+ SECTION_HEADERS.challenge,
73
+ SECTION_HEADERS.testYourself,
74
+ SECTION_HEADERS.funFact,
75
+ ]).trim();
76
+ sections.addYourCode.imageSlots = createImageSlots(countImageMarkers(sections.addYourCode.content), 'addYourCode');
77
+ // Extract remaining sections, no images in this part.
78
+ sections.tryItOut.content = extractSection(markdown, SECTION_HEADERS.tryItOut, [
79
+ SECTION_HEADERS.challenge,
80
+ ]).trim();
81
+ sections.challenge.content = extractSection(markdown, SECTION_HEADERS.challenge, [
82
+ SECTION_HEADERS.testYourself,
83
+ SECTION_HEADERS.funFact,
84
+ ]).trim();
85
+ sections.testYourself.content = extractSection(markdown, SECTION_HEADERS.testYourself, [
86
+ SECTION_HEADERS.funFact,
87
+ ]).trim();
88
+ const funFactSplit = splitAtHeader(markdown, SECTION_HEADERS.funFact);
89
+ if (funFactSplit) {
90
+ sections.funFact.content = funFactSplit.after.trim();
91
+ }
92
+ return sections;
93
+ }
94
+ export function assignImagesToSlots(sections, images) {
95
+ let imageIndex = 0;
96
+ const assignToSection = (section) => {
97
+ for (const slot of section.imageSlots) {
98
+ if (imageIndex < images.length) {
99
+ slot.base64 = images[imageIndex];
100
+ imageIndex++;
101
+ }
102
+ }
103
+ };
104
+ // Only assign to sections that track image slots
105
+ assignToSection(sections.preface);
106
+ assignToSection(sections.addYourCode);
107
+ return sections;
108
+ }
109
+ //# sourceMappingURL=docling-parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"docling-parser.js","sourceRoot":"","sources":["../../src/parsing/docling-parser.ts"],"names":[],"mappings":"AAEA,MAAM,YAAY,GAAG,gBAAgB,CAAC;AAEtC,MAAM,eAAe,GAAG;IACtB,QAAQ,EAAE,oBAAoB;IAC9B,WAAW,EAAE,iDAAiD;IAC9D,QAAQ,EAAE,uBAAuB;IACjC,SAAS,EAAE,kBAAkB;IAC7B,YAAY,EAAE,wBAAwB;IACtC,OAAO,EAAE,mBAAmB;CAC7B,CAAC;AAsBF,SAAS,iBAAiB,CAAC,OAAe;IACxC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,YAAY,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;AACrE,CAAC;AAED,SAAS,gBAAgB,CAAC,KAAa,EAAE,MAAc;IACrD,OAAO,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QAC9C,EAAE,EAAE,GAAG,MAAM,QAAQ,CAAC,GAAG,CAAC,EAAE;KAC7B,CAAC,CAAC,CAAC;AACN,CAAC;AAED,SAAS,aAAa,CAAC,QAAgB,EAAE,aAAqB;IAC5D,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;YACxC,OAAO;gBACL,MAAM,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;gBACpC,KAAK,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;aACjC,CAAC;QACJ,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,cAAc,CAAC,QAAgB,EAAE,YAAoB,EAAE,WAAqB;IACnF,MAAM,UAAU,GAAG,aAAa,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IACzD,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,IAAI,cAAc,GAAG,UAAU,CAAC,KAAK,CAAC;IAEtC,KAAK,MAAM,UAAU,IAAI,WAAW,EAAE,CAAC;QACrC,MAAM,QAAQ,GAAG,aAAa,CAAC,cAAc,EAAE,UAAU,CAAC,CAAC;QAC3D,IAAI,QAAQ,EAAE,CAAC;YACb,cAAc,GAAG,QAAQ,CAAC,MAAM,CAAC;YACjC,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,QAAgB;IACnD,MAAM,QAAQ,GAA0B;QACtC,OAAO,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QACxC,QAAQ,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QACzC,WAAW,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QAC5C,QAAQ,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QACzC,SAAS,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QAC1C,YAAY,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;QAC7C,OAAO,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,EAAE;KACzC,CAAC;IAEF,kEAAkE;IAClE,MAAM,aAAa,GAAG,aAAa,CAAC,QAAQ,EAAE,eAAe,CAAC,QAAQ,CAAC,CAAC;IACxE,IAAI,aAAa,EAAE,CAAC;QAClB,QAAQ,CAAC,OAAO,CAAC,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;QACvD,QAAQ,CAAC,OAAO,CAAC,UAAU,GAAG,gBAAgB,CAC5C,iBAAiB,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,EAC3C,SAAS,CACV,CAAC;IACJ,CAAC;IAED,oDAAoD;IACpD,QAAQ,CAAC,QAAQ,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC,QAAQ,EAAE;QAC7E,eAAe,CAAC,WAAW;QAC3B,eAAe,CAAC,QAAQ;QACxB,eAAe,CAAC,SAAS;QACzB,eAAe,CAAC,YAAY;QAC5B,eAAe,CAAC,OAAO;KACxB,CAAC,CAAC,IAAI,EAAE,CAAC;IAEV,kDAAkD;IAClD,QAAQ,CAAC,WAAW,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC,WAAW,EAAE;QACnF,eAAe,CAAC,QAAQ;QACxB,eAAe,CAAC,SAAS;QACzB,eAAe,CAAC,YAAY;QAC5B,eAAe,CAAC,OAAO;KACxB,CAAC,CAAC,IAAI,EAAE,CAAC;IACV,QAAQ,CAAC,WAAW,CAAC,UAAU,GAAG,gBAAgB,CAChD,iBAAiB,CAAC,QAAQ,CAAC,WAAW,CAAC,OAAO,CAAC,EAC/C,aAAa,CACd,CAAC;IAEF,sDAAsD;IACtD,QAAQ,CAAC,QAAQ,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC,QAAQ,EAAE;QAC7E,eAAe,CAAC,SAAS;KAC1B,CAAC,CAAC,IAAI,EAAE,CAAC;IAEV,QAAQ,CAAC,SAAS,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC,SAAS,EAAE;QAC/E,eAAe,CAAC,YAAY;QAC5B,eAAe,CAAC,OAAO;KACxB,CAAC,CAAC,IAAI,EAAE,CAAC;IAEV,QAAQ,CAAC,YAAY,CAAC,OAAO,GAAG,cAAc,CAAC,QAAQ,EAAE,eAAe,CAAC,YAAY,EAAE;QACrF,eAAe,CAAC,OAAO;KACxB,CAAC,CAAC,IAAI,EAAE,CAAC;IAEV,MAAM,YAAY,GAAG,aAAa,CAAC,QAAQ,EAAE,eAAe,CAAC,OAAO,CAAC,CAAC;IACtE,IAAI,YAAY,EAAE,CAAC;QACjB,QAAQ,CAAC,OAAO,CAAC,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;IACvD,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,mBAAmB,CACjC,QAA+B,EAC/B,MAAgB;IAEhB,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,MAAM,eAAe,GAAG,CAAC,OAAsB,EAAE,EAAE;QACjD,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;YACtC,IAAI,UAAU,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC;gBAC/B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC;gBACjC,UAAU,EAAE,CAAC;YACf,CAAC;QACH,CAAC;IACH,CAAC,CAAC;IAEF,iDAAiD;IACjD,eAAe,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;IAClC,eAAe,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAEtC,OAAO,QAAQ,CAAC;AAClB,CAAC"}
@@ -0,0 +1,2 @@
1
+ export declare function getDoclingMarkdown(filePath: string): Promise<string | null>;
2
+ //# sourceMappingURL=docling-runners.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"docling-runners.d.ts","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"AA6GA,wBAAsB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CAejF"}
@@ -0,0 +1,85 @@
1
+ import path from 'path';
2
+ import { execFileSync } from 'child_process';
3
+ import { existsSync, statSync } from 'fs';
4
+ import { fileURLToPath } from 'url';
5
+ import { logger } from '../logger.js';
6
+ const __filename = fileURLToPath(import.meta.url);
7
+ const __dirname = path.dirname(__filename);
8
+ async function resolveDoclingBinary() {
9
+ const platformTag = `${process.platform}-${process.arch}`;
10
+ const binaryName = process.platform === 'win32' ? 'docling-cleaner.exe' : 'docling-cleaner';
11
+ try {
12
+ const { ensureDoclingCleaner } = await import('@digimakers/docling-cleaner');
13
+ const binaryPath = await ensureDoclingCleaner();
14
+ if (binaryPath && existsSync(binaryPath)) {
15
+ try {
16
+ if (statSync(binaryPath).isFile())
17
+ return binaryPath;
18
+ }
19
+ catch {
20
+ // Ignore invalid paths.
21
+ }
22
+ }
23
+ }
24
+ catch (error) {
25
+ logger.warn({ err: error }, 'Docling downloader failed, trying bundled binaries');
26
+ }
27
+ const distBinary = path.resolve(__dirname, '..', 'docling-cleaner', 'bin', platformTag, binaryName);
28
+ if (existsSync(distBinary))
29
+ return distBinary;
30
+ const distOnedirBinary = path.resolve(__dirname, '..', 'docling-cleaner', 'bin', platformTag, 'docling-cleaner', binaryName);
31
+ if (existsSync(distOnedirBinary))
32
+ return distOnedirBinary;
33
+ const srcBinary = path.resolve(__dirname, '..', '..', 'src', 'docling-cleaner', 'bin', platformTag, binaryName);
34
+ if (existsSync(srcBinary))
35
+ return srcBinary;
36
+ const srcOnedirBinary = path.resolve(__dirname, '..', '..', 'src', 'docling-cleaner', 'bin', platformTag, 'docling-cleaner', binaryName);
37
+ if (existsSync(srcOnedirBinary))
38
+ return srcOnedirBinary;
39
+ return null;
40
+ }
41
+ function resolveDoclingCleanerDir() {
42
+ const distCleanerDir = path.resolve(__dirname, '..', 'docling-cleaner');
43
+ if (existsSync(path.join(distCleanerDir, 'cleaner.py')))
44
+ return distCleanerDir;
45
+ const srcCleanerDir = path.resolve(__dirname, '..', '..', 'src', 'docling-cleaner');
46
+ if (existsSync(path.join(srcCleanerDir, 'cleaner.py')))
47
+ return srcCleanerDir;
48
+ return null;
49
+ }
50
+ function getDoclingMarkdownFromUv(filePath) {
51
+ const cleanerDir = resolveDoclingCleanerDir();
52
+ if (!cleanerDir) {
53
+ logger.warn('Docling cleaner assets not found. Ensure the package includes dist/docling-cleaner.');
54
+ return null;
55
+ }
56
+ try {
57
+ return execFileSync('uv', ['run', 'python', 'cleaner.py', filePath], {
58
+ cwd: cleanerDir,
59
+ encoding: 'utf-8',
60
+ timeout: 120000,
61
+ stdio: ['pipe', 'pipe', 'pipe'],
62
+ });
63
+ }
64
+ catch (error) {
65
+ logger.warn({ err: error }, 'Docling uv fallback failed. Install uv and run in packages/core/src/docling-cleaner.');
66
+ return null;
67
+ }
68
+ }
69
+ export async function getDoclingMarkdown(filePath) {
70
+ const binaryPath = await resolveDoclingBinary();
71
+ if (binaryPath) {
72
+ try {
73
+ return execFileSync(binaryPath, [filePath], {
74
+ encoding: 'utf-8',
75
+ timeout: 120000,
76
+ stdio: ['pipe', 'pipe', 'pipe'],
77
+ });
78
+ }
79
+ catch (error) {
80
+ logger.warn({ err: error }, 'Docling binary failed, attempting uv fallback');
81
+ }
82
+ }
83
+ return getDoclingMarkdownFromUv(filePath);
84
+ }
85
+ //# sourceMappingURL=docling-runners.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"docling-runners.js","sourceRoot":"","sources":["../../src/parsing/docling-runners.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AAC7C,OAAO,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AAC1C,OAAO,EAAE,aAAa,EAAE,MAAM,KAAK,CAAC;AACpC,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAEtC,MAAM,UAAU,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAClD,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;AAC3C,KAAK,UAAU,oBAAoB;IACjC,MAAM,WAAW,GAAG,GAAG,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC;IAC1D,MAAM,UAAU,GAAG,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,iBAAiB,CAAC;IAE5F,IAAI,CAAC;QACH,MAAM,EAAE,oBAAoB,EAAE,GAAG,MAAM,MAAM,CAAC,6BAA6B,CAAC,CAAC;QAC7E,MAAM,UAAU,GAAG,MAAM,oBAAoB,EAAE,CAAC;QAChD,IAAI,UAAU,IAAI,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YACzC,IAAI,CAAC;gBACH,IAAI,QAAQ,CAAC,UAAU,CAAC,CAAC,MAAM,EAAE;oBAAE,OAAO,UAAU,CAAC;YACvD,CAAC;YAAC,MAAM,CAAC;gBACP,wBAAwB;YAC1B,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,oDAAoD,CAAC,CAAC;IACpF,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAC7B,SAAS,EACT,IAAI,EACJ,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,UAAU,CAAC;QAAE,OAAO,UAAU,CAAC;IAE9C,MAAM,gBAAgB,GAAG,IAAI,CAAC,OAAO,CACnC,SAAS,EACT,IAAI,EACJ,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,iBAAiB,EACjB,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,gBAAgB,CAAC;QAAE,OAAO,gBAAgB,CAAC;IAE1D,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAC5B,SAAS,EACT,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO,SAAS,CAAC;IAE5C,MAAM,eAAe,GAAG,IAAI,CAAC,OAAO,CAClC,SAAS,EACT,IAAI,EACJ,IAAI,EACJ,KAAK,EACL,iBAAiB,EACjB,KAAK,EACL,WAAW,EACX,iBAAiB,EACjB,UAAU,CACX,CAAC;IACF,IAAI,UAAU,CAAC,eAAe,CAAC;QAAE,OAAO,eAAe,CAAC;IAExD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,wBAAwB;IAC/B,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,iBAAiB,CAAC,CAAC;IACxE,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,YAAY,CAAC,CAAC;QAAE,OAAO,cAAc,CAAC;IAE/E,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,iBAAiB,CAAC,CAAC;IACpF,IAAI,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,YAAY,CAAC,CAAC;QAAE,OAAO,aAAa,CAAC;IAE7E,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,wBAAwB,CAAC,QAAgB;IAChD,MAAM,UAAU,GAAG,wBAAwB,EAAE,CAAC;IAC9C,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,MAAM,CAAC,IAAI,CACT,qFAAqF,CACtF,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,CAAC;QACH,OAAO,YAAY,CAAC,IAAI,EAAE,CAAC,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,CAAC,EAAE;YACnE,GAAG,EAAE,UAAU;YACf,QAAQ,EAAE,OAAO;YACjB,OAAO,EAAE,MAAM;YACf,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;SAChC,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,IAAI,CACT,EAAE,GAAG,EAAE,KAAK,EAAE,EACd,sFAAsF,CACvF,CAAC;QACF,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,QAAgB;IACvD,MAAM,UAAU,GAAG,MAAM,oBAAoB,EAAE,CAAC;IAChD,IAAI,UAAU,EAAE,CAAC;QACf,IAAI,CAAC;YACH,OAAO,YAAY,CAAC,UAAU,EAAE,CAAC,QAAQ,CAAC,EAAE;gBAC1C,QAAQ,EAAE,OAAO;gBACjB,OAAO,EAAE,MAAM;gBACf,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC;aAChC,CAAC,CAAC;QACL,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,+CAA+C,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC;IAED,OAAO,wBAAwB,CAAC,QAAQ,CAAC,CAAC;AAC5C,CAAC"}
@@ -1,6 +1,6 @@
1
- import { ParsedLesson } from '../schemas/index.js';
1
+ import { Lesson } from '../schemas/index.js';
2
2
  export interface ParseResult {
3
- data: ParsedLesson;
3
+ data: Lesson;
4
4
  sourcePath: string;
5
5
  }
6
6
  export declare function parseDocx(filePath: string): Promise<ParseResult>;
@@ -1 +1 @@
1
- {"version":3,"file":"docx-parser.d.ts","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAIA,OAAO,EAAsB,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAGvE,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,YAAY,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AA4BD,wBAAsB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CA4DtE"}
1
+ {"version":3,"file":"docx-parser.d.ts","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,MAAM,EAAiB,MAAM,qBAAqB,CAAC;AAoB5D,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,EAAE,MAAM,CAAC;CACpB;AA4BD,wBAAsB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CA6JtE"}
@@ -2,8 +2,14 @@ import fs from 'fs/promises';
2
2
  import mammoth from 'mammoth';
3
3
  import { generateText, Output } from 'ai';
4
4
  import { createGoogleGenerativeAI } from '@ai-sdk/google';
5
- import { ParsedLessonSchema } from '../schemas/index.js';
6
5
  import { logger } from '../logger.js';
6
+ import { extractLanguageFromFooter } from './footer-parser.js';
7
+ import { LessonLLMSchema, LessonLLMSchemaWithoutLanguage, } from '../schemas/lesson.js';
8
+ import { parseDoclingMarkdown, assignImagesToSlots } from './docling-parser.js';
9
+ import { getDoclingMarkdown } from './docling-runners.js';
10
+ import { buildDocxParserPrompt, docxParserSystemPrompt } from './prompts.js';
11
+ import { formatDocumentCode } from '../agents/code-formatter.js';
12
+ import { enrichDebugIssues, inferLessonType, normaliseLessonContent, normaliseLessonForType, } from './post-processors.js';
7
13
  // Setup google generative ai
8
14
  const google = createGoogleGenerativeAI({
9
15
  apiKey: process.env.GEMINI_API_KEY,
@@ -26,45 +32,132 @@ async function extractImages(buffer) {
26
32
  export async function parseDocx(filePath) {
27
33
  logger.info(`Parsing: ${filePath}`);
28
34
  const buffer = await fs.readFile(filePath);
29
- // Extract text and images in parallel
30
- const [{ value: text }, allImages] = await Promise.all([
31
- mammoth.extractRawText({ buffer }),
35
+ // Extract images and footer language in parallel, try docling markdown
36
+ const [allImages, footerLanguage, doclingMarkdown] = await Promise.all([
32
37
  extractImages(buffer),
38
+ extractLanguageFromFooter(filePath),
39
+ getDoclingMarkdown(filePath),
33
40
  ]);
34
- logger.info(text);
35
- logger.info(allImages);
36
- logger.debug(`Extracted ${text.length} characters and ${allImages.length} images`);
37
- // First image is project cover, rest are for code steps
38
- const projectImage = allImages.length > 0 ? allImages[0] : null;
39
- const stepImages = allImages.slice(1);
40
- logger.info(`Found ${stepImages.length} step images, projectImage: ${projectImage ? 'yes' : 'no'}`);
41
- // Use LLM to extract structured data
42
- const { output } = await generateText({
43
- model: google('gemini-2.0-flash'),
44
- output: Output.object({
45
- schema: ParsedLessonSchema,
46
- }),
47
- prompt: `Extract structured lesson data from this educational document.
48
-
49
- This is a programming lesson sheet for students. Extract all the relevant sections and content.
50
-
51
- If a section is not present in the document, use empty arrays for array fields, empty strings for required string fields, and null for nullable fields.
52
-
53
- For the addYourCodeSection, each step should be a clear instruction. Set image to null for all steps (images will be added separately).
54
-
55
- Document content:
56
- ${text}`,
57
- });
58
- logger.info(output);
59
- logger.info(`Successfully extracted lesson: ${output.topic} - ${output.project}`);
60
- // Post-process: assign images to the extracted data
61
- const data = output;
62
- data.projectImage = projectImage;
63
- // Assign step images in order
64
- if (stepImages.length > 0 && Array.isArray(data.addYourCodeSection)) {
65
- data.addYourCodeSection.forEach((step, index) => {
66
- step.image = stepImages[index] ?? null;
41
+ // Parse docling markdown to get sections with image placeholders
42
+ let parsedSections = null;
43
+ let textForLLM;
44
+ if (doclingMarkdown) {
45
+ parsedSections = parseDoclingMarkdown(doclingMarkdown);
46
+ assignImagesToSlots(parsedSections, allImages);
47
+ logger.debug({
48
+ prefaceImageSlots: parsedSections.preface.imageSlots.length,
49
+ addYourCodeImageSlots: parsedSections.addYourCode.imageSlots.length,
50
+ totalImages: allImages.length,
51
+ }, 'Docling image slots parsed');
52
+ textForLLM = doclingMarkdown;
53
+ logger.info('Using docling markdown with placeholder-based image mapping');
54
+ logger.info(textForLLM);
55
+ logger.info('Formatting document code blocks with agent');
56
+ textForLLM = await formatDocumentCode(doclingMarkdown, footerLanguage);
57
+ logger.info(textForLLM);
58
+ }
59
+ else {
60
+ const { value: text } = await mammoth.extractRawText({ buffer });
61
+ textForLLM = text;
62
+ logger.info('Falling back to mammoth text extraction');
63
+ logger.info(textForLLM);
64
+ }
65
+ logger.info(`Extracted ${textForLLM.length} characters and ${allImages.length} images`);
66
+ if (footerLanguage) {
67
+ logger.info(`Programming language from footer: ${footerLanguage}`);
68
+ }
69
+ else {
70
+ logger.warn('Footer language not found');
71
+ }
72
+ // If we find the programming language in the footer, we don't need the LLM
73
+ // to tell us.
74
+ const llmSchema = footerLanguage ? LessonLLMSchemaWithoutLanguage : LessonLLMSchema;
75
+ let output;
76
+ try {
77
+ // Use LLM to extract structured data
78
+ const response = await generateText({
79
+ model: google('gemini-2.5-pro'),
80
+ output: Output.object({
81
+ schema: llmSchema,
82
+ }),
83
+ system: docxParserSystemPrompt,
84
+ prompt: buildDocxParserPrompt(textForLLM),
85
+ temperature: 0,
86
+ maxRetries: 5,
67
87
  });
88
+ output = response.output;
89
+ }
90
+ catch (error) {
91
+ const err = error;
92
+ logger.error({ err, filePath }, 'LLM extraction failed');
93
+ const issues = err?.cause?.issues ?? err?.issues;
94
+ if (issues) {
95
+ logger.error({ issues, filePath }, 'LLM schema validation issues');
96
+ }
97
+ const value = err?.cause?.value ?? err?.value;
98
+ if (value) {
99
+ logger.error({ value, filePath }, 'LLM output that failed validation');
100
+ }
101
+ throw error;
102
+ }
103
+ // Infer the lesson type with heuristic
104
+ const dataWithoutType = normaliseLessonContent(output);
105
+ let data = normaliseLessonForType({
106
+ ...dataWithoutType,
107
+ lessonType: inferLessonType(textForLLM, footerLanguage, dataWithoutType),
108
+ });
109
+ data = enrichDebugIssues(textForLLM, data);
110
+ logger.info(`Inferred lesson type as: '${data.lessonType}'`);
111
+ logger.info(`Successfully extracted lesson: ${data.topic} - ${data.project}`);
112
+ // Set programming language from footer if found
113
+ if (footerLanguage) {
114
+ data.programmingLanguage = footerLanguage;
115
+ }
116
+ // Assign images using placeholder-based mapping if available
117
+ if (parsedSections && data.lessonType !== 'debugging lesson') {
118
+ // Assign preface image slots
119
+ if (parsedSections.preface.imageSlots.length > 0) {
120
+ data.prefaceImageSlots = parsedSections.preface.imageSlots;
121
+ }
122
+ // Assign Add Your Code step images
123
+ const addSection = data.addYourCodeSection;
124
+ if (Array.isArray(addSection) && parsedSections.addYourCode.imageSlots.length > 0) {
125
+ const isStepWithImageArray = addSection.every((item) => typeof item === 'object' && item !== null && 'step' in item);
126
+ if (isStepWithImageArray) {
127
+ const slots = parsedSections.addYourCode.imageSlots;
128
+ addSection.forEach((step, index) => {
129
+ if (index < slots.length) {
130
+ step.imageSlot = {
131
+ id: slots[index].id,
132
+ base64: slots[index].base64,
133
+ };
134
+ }
135
+ });
136
+ }
137
+ }
138
+ }
139
+ else if (data.lessonType !== 'debugging lesson') {
140
+ // Fallback using old behavior, first image is project, rest are steps
141
+ // Not good if there are multiple images in preface section
142
+ logger.warn('Falling back to old image assignment behaviour');
143
+ if (allImages.length > 0) {
144
+ data.prefaceImageSlots = [{ id: 'fallback_preface_img_1', base64: allImages[0] }];
145
+ }
146
+ const stepImages = allImages.slice(1);
147
+ const addSection = data.addYourCodeSection;
148
+ if (stepImages.length > 0 && Array.isArray(addSection)) {
149
+ const isStepWithImageArray = addSection.every((item) => typeof item === 'object' && item !== null && 'step' in item);
150
+ if (isStepWithImageArray) {
151
+ addSection.forEach((step, index) => {
152
+ if (stepImages[index]) {
153
+ step.imageSlot = {
154
+ id: `fallback_img_${index + 1}`,
155
+ base64: stepImages[index],
156
+ };
157
+ }
158
+ });
159
+ }
160
+ }
68
161
  }
69
162
  logger.info(data);
70
163
  return {
@@ -1 +1 @@
1
- {"version":3,"file":"docx-parser.js","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,IAAI,CAAC;AAC1C,OAAO,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,kBAAkB,EAAgB,MAAM,qBAAqB,CAAC;AACvE,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAOtC,6BAA6B;AAC7B,MAAM,MAAM,GAAG,wBAAwB,CAAC;IACtC,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;CACnC,CAAC,CAAC;AAEH,+CAA+C;AAC/C,KAAK,UAAU,aAAa,CAAC,MAAc;IACzC,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,MAAM,OAAO,CAAC,aAAa,CACzB,EAAE,MAAM,EAAE,EACV;QACE,YAAY,EAAE,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;YACtD,MAAM,WAAW,GAAG,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;YACvC,MAAM,MAAM,GAAG,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAC9C,MAAM,OAAO,GAAG,QAAQ,KAAK,CAAC,WAAW,WAAW,MAAM,EAAE,CAAC;YAC7D,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC;QAC1B,CAAC,CAAC;KACH,CACF,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,6CAA6C;AAC7C,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,QAAgB;IAC9C,MAAM,CAAC,IAAI,CAAC,YAAY,QAAQ,EAAE,CAAC,CAAC;IAEpC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAE3C,sCAAsC;IACtC,MAAM,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,SAAS,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QACrD,OAAO,CAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;QAClC,aAAa,CAAC,MAAM,CAAC;KACtB,CAAC,CAAC;IACH,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAClB,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAEvB,MAAM,CAAC,KAAK,CAAC,aAAa,IAAI,CAAC,MAAM,mBAAmB,SAAS,CAAC,MAAM,SAAS,CAAC,CAAC;IAEnF,wDAAwD;IACxD,MAAM,YAAY,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAChE,MAAM,UAAU,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAEtC,MAAM,CAAC,IAAI,CACT,SAAS,UAAU,CAAC,MAAM,+BAA+B,YAAY,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CACvF,CAAC;IAEF,qCAAqC;IACrC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,YAAY,CAAC;QACpC,KAAK,EAAE,MAAM,CAAC,kBAAkB,CAAC;QACjC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC;YACpB,MAAM,EAAE,kBAAkB;SAC3B,CAAC;QACF,MAAM,EAAE;;;;;;;;;EASV,IAAI,EAAE;KACL,CAAC,CAAC;IAEH,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACpB,MAAM,CAAC,IAAI,CAAC,kCAAkC,MAAO,CAAC,KAAK,MAAM,MAAO,CAAC,OAAO,EAAE,CAAC,CAAC;IAEpF,oDAAoD;IACpD,MAAM,IAAI,GAAG,MAAsB,CAAC;IACpC,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;IAEjC,8BAA8B;IAC9B,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,kBAAkB,CAAC,EAAE,CAAC;QACpE,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;YAC9C,IAAI,CAAC,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC;QACzC,CAAC,CAAC,CAAC;IACL,CAAC;IACD,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAElB,OAAO;QACL,IAAI;QACJ,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"docx-parser.js","sourceRoot":"","sources":["../../src/parsing/docx-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,aAAa,CAAC;AAC7B,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,IAAI,CAAC;AAC1C,OAAO,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;AAE1D,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AACtC,OAAO,EAAE,yBAAyB,EAAE,MAAM,oBAAoB,CAAC;AAC/D,OAAO,EAEL,eAAe,EACf,8BAA8B,GAE/B,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAChF,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAC1D,OAAO,EAAE,qBAAqB,EAAE,sBAAsB,EAAE,MAAM,cAAc,CAAC;AAC7E,OAAO,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AACjE,OAAO,EACL,iBAAiB,EACjB,eAAe,EACf,sBAAsB,EACtB,sBAAsB,GACvB,MAAM,sBAAsB,CAAC;AAO9B,6BAA6B;AAC7B,MAAM,MAAM,GAAG,wBAAwB,CAAC;IACtC,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;CACnC,CAAC,CAAC;AAEH,+CAA+C;AAC/C,KAAK,UAAU,aAAa,CAAC,MAAc;IACzC,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,MAAM,OAAO,CAAC,aAAa,CACzB,EAAE,MAAM,EAAE,EACV;QACE,YAAY,EAAE,OAAO,CAAC,MAAM,CAAC,UAAU,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;YACtD,MAAM,WAAW,GAAG,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;YACvC,MAAM,MAAM,GAAG,WAAW,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;YAC9C,MAAM,OAAO,GAAG,QAAQ,KAAK,CAAC,WAAW,WAAW,MAAM,EAAE,CAAC;YAC7D,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC;QAC1B,CAAC,CAAC;KACH,CACF,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,6CAA6C;AAC7C,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,QAAgB;IAC9C,MAAM,CAAC,IAAI,CAAC,YAAY,QAAQ,EAAE,CAAC,CAAC;IAEpC,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAE3C,uEAAuE;IACvE,MAAM,CAAC,SAAS,EAAE,cAAc,EAAE,eAAe,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QACrE,aAAa,CAAC,MAAM,CAAC;QACrB,yBAAyB,CAAC,QAAQ,CAAC;QACnC,kBAAkB,CAAC,QAAQ,CAAC;KAC7B,CAAC,CAAC;IAEH,iEAAiE;IACjE,IAAI,cAAc,GAAG,IAAI,CAAC;IAC1B,IAAI,UAAkB,CAAC;IAEvB,IAAI,eAAe,EAAE,CAAC;QACpB,cAAc,GAAG,oBAAoB,CAAC,eAAe,CAAC,CAAC;QACvD,mBAAmB,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;QAC/C,MAAM,CAAC,KAAK,CACV;YACE,iBAAiB,EAAE,cAAc,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM;YAC3D,qBAAqB,EAAE,cAAc,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM;YACnE,WAAW,EAAE,SAAS,CAAC,MAAM;SAC9B,EACD,4BAA4B,CAC7B,CAAC;QACF,UAAU,GAAG,eAAe,CAAC;QAC7B,MAAM,CAAC,IAAI,CAAC,6DAA6D,CAAC,CAAC;QAC3E,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACxB,MAAM,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;QAC1D,UAAU,GAAG,MAAM,kBAAkB,CAAC,eAAe,EAAE,cAAc,CAAC,CAAC;QACvE,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC1B,CAAC;SAAM,CAAC;QACN,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM,OAAO,CAAC,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;QACjE,UAAU,GAAG,IAAI,CAAC;QAClB,MAAM,CAAC,IAAI,CAAC,yCAAyC,CAAC,CAAC;QACvD,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC1B,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,aAAa,UAAU,CAAC,MAAM,mBAAmB,SAAS,CAAC,MAAM,SAAS,CAAC,CAAC;IACxF,IAAI,cAAc,EAAE,CAAC;QACnB,MAAM,CAAC,IAAI,CAAC,qCAAqC,cAAc,EAAE,CAAC,CAAC;IACrE,CAAC;SAAM,CAAC;QACN,MAAM,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IAC3C,CAAC;IAED,2EAA2E;IAC3E,cAAc;IACd,MAAM,SAAS,GAAG,cAAc,CAAC,CAAC,CAAC,8BAA8B,CAAC,CAAC,CAAC,eAAe,CAAC;IAEpF,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,qCAAqC;QACrC,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC;YAClC,KAAK,EAAE,MAAM,CAAC,gBAAgB,CAAC;YAC/B,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,SAAS;aAClB,CAAC;YACF,MAAM,EAAE,sBAAsB;YAC9B,MAAM,EAAE,qBAAqB,CAAC,UAAU,CAAC;YACzC,WAAW,EAAE,CAAC;YACd,UAAU,EAAE,CAAC;SACd,CAAC,CAAC;QACH,MAAM,GAAG,QAAQ,CAAC,MAAM,CAAC;IAC3B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,GAAG,GAAG,KAAY,CAAC;QACzB,MAAM,CAAC,KAAK,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,EAAE,uBAAuB,CAAC,CAAC;QAEzD,MAAM,MAAM,GAAG,GAAG,EAAE,KAAK,EAAE,MAAM,IAAI,GAAG,EAAE,MAAM,CAAC;QACjD,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,EAAE,8BAA8B,CAAC,CAAC;QACrE,CAAC;QAED,MAAM,KAAK,GAAG,GAAG,EAAE,KAAK,EAAE,KAAK,IAAI,GAAG,EAAE,KAAK,CAAC;QAC9C,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,CAAC,KAAK,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE,mCAAmC,CAAC,CAAC;QACzE,CAAC;QAED,MAAM,KAAK,CAAC;IACd,CAAC;IAED,uCAAuC;IACvC,MAAM,eAAe,GAAG,sBAAsB,CAAC,MAAmB,CAAC,CAAC;IACpE,IAAI,IAAI,GAAG,sBAAsB,CAAC;QAChC,GAAG,eAAe;QAClB,UAAU,EAAE,eAAe,CAAC,UAAU,EAAE,cAAqC,EAAE,eAAe,CAAC;KACtF,CAAC,CAAC;IACb,IAAI,GAAG,iBAAiB,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;IAC3C,MAAM,CAAC,IAAI,CAAC,6BAA6B,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC;IAC7D,MAAM,CAAC,IAAI,CAAC,kCAAkC,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;IAE9E,gDAAgD;IAChD,IAAI,cAAc,EAAE,CAAC;QACnB,IAAI,CAAC,mBAAmB,GAAG,cAAqC,CAAC;IACnE,CAAC;IAED,6DAA6D;IAC7D,IAAI,cAAc,IAAI,IAAI,CAAC,UAAU,KAAK,kBAAkB,EAAE,CAAC;QAC7D,6BAA6B;QAC7B,IAAI,cAAc,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjD,IAAI,CAAC,iBAAiB,GAAG,cAAc,CAAC,OAAO,CAAC,UAAU,CAAC;QAC7D,CAAC;QAED,mCAAmC;QACnC,MAAM,UAAU,GAAG,IAAI,CAAC,kBAAkB,CAAC;QAC3C,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,IAAI,cAAc,CAAC,WAAW,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClF,MAAM,oBAAoB,GAAG,UAAU,CAAC,KAAK,CAC3C,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,IAAI,MAAM,IAAI,IAAI,CACtE,CAAC;YAEF,IAAI,oBAAoB,EAAE,CAAC;gBACzB,MAAM,KAAK,GAAG,cAAc,CAAC,WAAW,CAAC,UAAU,CAAC;gBACnD,UAA8B,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;oBACtD,IAAI,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;wBACzB,IAAI,CAAC,SAAS,GAAG;4BACf,EAAE,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE;4BACnB,MAAM,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM;yBAC5B,CAAC;oBACJ,CAAC;gBACH,CAAC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;SAAM,IAAI,IAAI,CAAC,UAAU,KAAK,kBAAkB,EAAE,CAAC;QAClD,sEAAsE;QACtE,2DAA2D;QAC3D,MAAM,CAAC,IAAI,CAAC,gDAAgD,CAAC,CAAC;QAC9D,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACzB,IAAI,CAAC,iBAAiB,GAAG,CAAC,EAAE,EAAE,EAAE,wBAAwB,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACpF,CAAC;QACD,MAAM,UAAU,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAEtC,MAAM,UAAU,GAAG,IAAI,CAAC,kBAAkB,CAAC;QAC3C,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;YACvD,MAAM,oBAAoB,GAAG,UAAU,CAAC,KAAK,CAC3C,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,IAAI,MAAM,IAAI,IAAI,CACtE,CAAC;YAEF,IAAI,oBAAoB,EAAE,CAAC;gBACxB,UAA8B,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;oBACtD,IAAI,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;wBACtB,IAAI,CAAC,SAAS,GAAG;4BACf,EAAE,EAAE,gBAAgB,KAAK,GAAG,CAAC,EAAE;4BAC/B,MAAM,EAAE,UAAU,CAAC,KAAK,CAAC;yBAC1B,CAAC;oBACJ,CAAC;gBACH,CAAC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAElB,OAAO;QACL,IAAI;QACJ,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC"}
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Extract programming language from the footer of a .docx file
3
+ * Expected format: "Level: Scratch-1" or "Level: Python-2"
4
+ * Output: the mapped language for the programming language
5
+ */
6
+ export declare function extractLanguageFromFooter(filePath: string): Promise<string | null>;
7
+ //# sourceMappingURL=footer-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"footer-parser.d.ts","sourceRoot":"","sources":["../../src/parsing/footer-parser.ts"],"names":[],"mappings":"AAmBA;;;;GAIG;AACH,wBAAsB,yBAAyB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,CA2BxF"}
@@ -0,0 +1,49 @@
1
+ import { logger } from '../logger.js';
2
+ // @ts-expect-error
3
+ import WordExtractor from 'word-extractor';
4
+ const LANGUAGE_MAP = {
5
+ scratch: 'scratch',
6
+ 'small-basic': 'small-basic',
7
+ 'small basic': 'small-basic',
8
+ smallbasic: 'small-basic',
9
+ python: 'python',
10
+ java: 'java',
11
+ javascript: 'javascript or html or css',
12
+ html: 'javascript or html or css',
13
+ css: 'javascript or html or css',
14
+ c: 'c',
15
+ };
16
+ /**
17
+ * Extract programming language from the footer of a .docx file
18
+ * Expected format: "Level: Scratch-1" or "Level: Python-2"
19
+ * Output: the mapped language for the programming language
20
+ */
21
+ export async function extractLanguageFromFooter(filePath) {
22
+ try {
23
+ const extractor = new WordExtractor();
24
+ const doc = await extractor.extract(filePath);
25
+ const footerText = doc.getFooters().trim().toLowerCase();
26
+ if (!footerText || !footerText.includes('level')) {
27
+ return null;
28
+ }
29
+ logger.info(`FOOTER TEXT ======================= ${footerText}`);
30
+ const levelMatch = footerText.match(/level:\s*([A-Za-z\s-]+?)[-\d]/i);
31
+ if (levelMatch && levelMatch[1]) {
32
+ const rawLanguage = levelMatch[1].trim().toLowerCase();
33
+ const mappedLanguage = LANGUAGE_MAP[rawLanguage];
34
+ if (mappedLanguage) {
35
+ logger.info(`Found programming language in footer: ${mappedLanguage}`);
36
+ return mappedLanguage;
37
+ }
38
+ else {
39
+ logger.warn(`Language "${rawLanguage}" found in footer but not in language map`);
40
+ }
41
+ }
42
+ return null;
43
+ }
44
+ catch (error) {
45
+ logger.error(`Error extracting footer from ${filePath}: ${error}`);
46
+ return null;
47
+ }
48
+ }
49
+ //# sourceMappingURL=footer-parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"footer-parser.js","sourceRoot":"","sources":["../../src/parsing/footer-parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAGtC,mBAAmB;AACnB,OAAO,aAAa,MAAM,gBAAgB,CAAC;AAE3C,MAAM,YAAY,GAAwC;IACxD,OAAO,EAAE,SAAS;IAClB,aAAa,EAAE,aAAa;IAC5B,aAAa,EAAE,aAAa;IAC5B,UAAU,EAAE,aAAa;IACzB,MAAM,EAAE,QAAQ;IAChB,IAAI,EAAE,MAAM;IACZ,UAAU,EAAE,2BAA2B;IACvC,IAAI,EAAE,2BAA2B;IACjC,GAAG,EAAE,2BAA2B;IAChC,CAAC,EAAE,GAAG;CACP,CAAC;AAEF;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAAC,QAAgB;IAC9D,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,IAAI,aAAa,EAAE,CAAC;QACtC,MAAM,GAAG,GAAG,MAAM,SAAS,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QAC9C,MAAM,UAAU,GAAW,GAAG,CAAC,UAAU,EAAE,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACjE,IAAI,CAAC,UAAU,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YACjD,OAAO,IAAI,CAAC;QACd,CAAC;QACD,MAAM,CAAC,IAAI,CAAC,uCAAuC,UAAU,EAAE,CAAC,CAAC;QAEjE,MAAM,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QACtE,IAAI,UAAU,IAAI,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;YAChC,MAAM,WAAW,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YACvD,MAAM,cAAc,GAAG,YAAY,CAAC,WAAW,CAAC,CAAC;YACjD,IAAI,cAAc,EAAE,CAAC;gBACnB,MAAM,CAAC,IAAI,CAAC,yCAAyC,cAAc,EAAE,CAAC,CAAC;gBACvE,OAAO,cAAc,CAAC;YACxB,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,IAAI,CAAC,aAAa,WAAW,2CAA2C,CAAC,CAAC;YACnF,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,CAAC,KAAK,CAAC,gCAAgC,QAAQ,KAAK,KAAK,EAAE,CAAC,CAAC;QACnE,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
@@ -2,4 +2,7 @@ export { findDocxFiles } from './file-discovery.js';
2
2
  export type { DiscoveryOptions, DiscoveredFile } from './file-discovery.js';
3
3
  export { parseDocx } from './docx-parser.js';
4
4
  export type { ParseResult } from './docx-parser.js';
5
+ export { parseDoclingMarkdown, assignImagesToSlots } from './docling-parser.js';
6
+ export type { ParsedSection, DoclingParsedSections } from './docling-parser.js';
7
+ export { getDoclingMarkdown } from './docling-runners.js';
5
8
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/parsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,YAAY,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAE5E,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,YAAY,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/parsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,YAAY,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AAE5E,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC7C,YAAY,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAEpD,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAChF,YAAY,EAAE,aAAa,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AAEhF,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC"}
@@ -1,3 +1,5 @@
1
1
  export { findDocxFiles } from './file-discovery.js';
2
2
  export { parseDocx } from './docx-parser.js';
3
+ export { parseDoclingMarkdown, assignImagesToSlots } from './docling-parser.js';
4
+ export { getDoclingMarkdown } from './docling-runners.js';
3
5
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/parsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/parsing/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAGpD,OAAO,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAG7C,OAAO,EAAE,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAGhF,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC"}
@@ -0,0 +1,3 @@
1
+ export declare function normaliseCodeBlock(code: string | null): string | null;
2
+ export declare function normaliseText(text: string | null): string | null;
3
+ //# sourceMappingURL=normalise.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"normalise.d.ts","sourceRoot":"","sources":["../../src/parsing/normalise.ts"],"names":[],"mappings":"AA0CA,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,GAAG,MAAM,GAAG,IAAI,CAcrE;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,GAAG,MAAM,GAAG,IAAI,CAMhE"}